From a991ab9e33e3d1195f7b1301977dc18aa4af3b12 Mon Sep 17 00:00:00 2001
From: Justin Sing <32938975+singjc@users.noreply.github.com>
Date: Wed, 7 May 2025 10:07:52 -0400
Subject: [PATCH 01/75] Update model_interface.rs

Add Clone marco to Parameters struct
---
 crates/redeem-properties/src/models/model_interface.rs | 1 +
 1 file changed, 1 insertion(+)
diff --git a/crates/redeem-properties/src/models/model_interface.rs b/crates/redeem-properties/src/models/model_interface.rs
index d8c08af..3aad160 100644
--- a/crates/redeem-properties/src/models/model_interface.rs
+++ b/crates/redeem-properties/src/models/model_interface.rs
@@ -625,6 +625,7 @@ pub trait ModelInterface: Send + Sync {
 }
 
 /// Parameters for the `predict` method of a `ModelInterface` implementation.
+#[derive(Clone)]
 pub struct Parameters {
     /// The instrument data was acquired on. Refer to list of supported instruments in const yaml file.
     pub instrument: String,

From 8f338557787bd20edc56e9cd28932a13954acf27 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Wed, 7 May 2025 11:27:25 -0400
Subject: [PATCH 02/75] chore: Add Clone trait implementation for
 ModelInterface

---
 .../redeem-properties/src/models/ccs_model.rs |  1 +
 .../src/models/model_interface.rs             | 22 ++++++++++++++++++-
 .../redeem-properties/src/models/ms2_model.rs |  1 +
 .../redeem-properties/src/models/rt_model.rs  |  1 +
 4 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/crates/redeem-properties/src/models/ccs_model.rs b/crates/redeem-properties/src/models/ccs_model.rs
index 62a35ed..36d5e5f 100644
--- a/crates/redeem-properties/src/models/ccs_model.rs
+++ b/crates/redeem-properties/src/models/ccs_model.rs
@@ -17,6 +17,7 @@ pub enum CCSModelArch {
 pub const CCSMODEL_ARCHS: &[&str] = &["ccs_cnn_lstm"];
 
 // A wrapper struct for CCS models
+#[derive(Clone)]
 pub struct CCSModelWrapper {
     model: Box<dyn ModelInterface + Send + Sync>,
 }
diff --git a/crates/redeem-properties/src/models/model_interface.rs b/crates/redeem-properties/src/models/model_interface.rs
index d8c08af..d5aca40 100644
--- a/crates/redeem-properties/src/models/model_interface.rs
+++ b/crates/redeem-properties/src/models/model_interface.rs
@@ -176,10 +176,30 @@ pub fn create_var_map(
 }
 
 
+pub trait ModelClone {
+    fn clone_box(&self) -> Box<dyn ModelInterface>;
+}
+
+impl<T> ModelClone for T
+where
+    T: 'static + ModelInterface + Clone,
+{
+    fn clone_box(&self) -> Box<dyn ModelInterface> {
+        Box::new(self.clone())
+    }
+}
+
+impl Clone for Box<dyn ModelInterface> {
+    fn clone(&self) -> Box<dyn ModelInterface> {
+        self.clone_box()
+    }
+}
+
+
 /// Represents an abstract deep learning model interface.
 /// 
 /// This trait defines the methods and properties that a deep learning model must implement to be used for property prediction tasks.
-pub trait ModelInterface: Send + Sync {
+pub trait ModelInterface: Send + Sync + ModelClone {
 
     /// Get the property type of the model.
     fn property_type(&self) -> PropertyType;
diff --git a/crates/redeem-properties/src/models/ms2_model.rs b/crates/redeem-properties/src/models/ms2_model.rs
index fc590cd..cf1979b 100644
--- a/crates/redeem-properties/src/models/ms2_model.rs
+++ b/crates/redeem-properties/src/models/ms2_model.rs
@@ -17,6 +17,7 @@ pub enum MS2ModelArch {
 pub const MS2MODEL_ARCHS: &[&str] = &["ms2_bert"];
 
 // A wrapper struct for MS2 models
+#[derive(Clone)]
 pub struct MS2ModelWrapper {
     model: Box<dyn ModelInterface + Send + Sync>,
 }
diff --git a/crates/redeem-properties/src/models/rt_model.rs b/crates/redeem-properties/src/models/rt_model.rs
index 65dea9d..20086c9 100644
--- a/crates/redeem-properties/src/models/rt_model.rs
+++ b/crates/redeem-properties/src/models/rt_model.rs
@@ -19,6 +19,7 @@ pub enum RTModelArch {
 pub const RTMODEL_ARCHS: &[&str] = &["rt_cnn_lstm"];
 
 // A wrapper struct for RT models
+#[derive(Clone)]
 pub struct RTModelWrapper {
     model: Box<dyn ModelInterface + Send + Sync>,
 }

From 5860df81cc65c0fa996173e976a47d6fe9c03709 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Wed, 7 May 2025 11:38:20 -0400
Subject: [PATCH 03/75] refactor: Update model structs to use 'static lifetime
 for VarBuilder

---
 .../src/models/ccs_cnn_lstm_model.rs               | 14 +++++++-------
 .../redeem-properties/src/models/ms2_bert_model.rs | 14 +++++++-------
 .../src/models/rt_cnn_lstm_model.rs                | 12 ++++++------
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/crates/redeem-properties/src/models/ccs_cnn_lstm_model.rs b/crates/redeem-properties/src/models/ccs_cnn_lstm_model.rs
index 156ada6..bde8a86 100644
--- a/crates/redeem-properties/src/models/ccs_cnn_lstm_model.rs
+++ b/crates/redeem-properties/src/models/ccs_cnn_lstm_model.rs
@@ -32,8 +32,8 @@ const NCE_FACTOR: f64 = 0.01;
 // Main Model Struct
 #[derive(Clone)]
 /// Represents an AlphaPeptDeep MS2BERT model.
-pub struct CCSCNNLSTMModel<'a> {
-    var_store: VarBuilder<'a>,
+pub struct CCSCNNLSTMModel {
+    var_store: VarBuilder<'static>,
     varmap: VarMap,
     constants: ModelConstants,
     mod_to_feature: HashMap<String, Vec<f32>>,
@@ -52,11 +52,11 @@ pub struct CCSCNNLSTMModel<'a> {
 }
 
 // Automatically implement Send and Sync if all fields are Send and Sync
-unsafe impl<'a> Send for CCSCNNLSTMModel<'a> {}
-unsafe impl<'a> Sync for CCSCNNLSTMModel<'a> {}
+unsafe impl Send for CCSCNNLSTMModel {}
+unsafe impl Sync for CCSCNNLSTMModel {}
 
 // Code Model Implementation
-impl<'a> ModelInterface for CCSCNNLSTMModel<'a> {
+impl ModelInterface for CCSCNNLSTMModel {
     fn property_type(&self) -> PropertyType {
         PropertyType::CCS
     }
@@ -217,13 +217,13 @@ impl<'a> ModelInterface for CCSCNNLSTMModel<'a> {
 
 
 // // Forward Module Trait Implementation
-// impl <'a> Module for CCSCNNLSTMModel<'a> {
+// impl  Module for CCSCNNLSTMModel {
 //     fn forward(&self, input: &Tensor) -> Result<Tensor, candle_core::Error> {
 //         ModelInterface::forward(self, input)
 //     }    
 // }
 
-impl<'a> fmt::Debug for CCSCNNLSTMModel<'a> {
+impl fmt::Debug for CCSCNNLSTMModel {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         writeln!(f, "ModelCCS_LSTM(")?;
         writeln!(f, "  (dropout): Dropout(p={}, inplace={})", 0.1, false)?;
diff --git a/crates/redeem-properties/src/models/ms2_bert_model.rs b/crates/redeem-properties/src/models/ms2_bert_model.rs
index 204374e..9f3a1d3 100644
--- a/crates/redeem-properties/src/models/ms2_bert_model.rs
+++ b/crates/redeem-properties/src/models/ms2_bert_model.rs
@@ -36,8 +36,8 @@ const NCE_FACTOR: f64 = 0.01;
 // Main Model Struct
 #[derive(Clone)]
 /// Represents an AlphaPeptDeep MS2BERT model.
-pub struct MS2BertModel<'a> {
-    var_store: VarBuilder<'a>,
+pub struct MS2BertModel {
+    var_store: VarBuilder<'static>,
     varmap: VarMap,
     constants: ModelConstants,
     mod_to_feature: HashMap<String, Vec<f32>>,
@@ -60,11 +60,11 @@ pub struct MS2BertModel<'a> {
 }
 
 // Automatically implement Send and Sync if all fields are Send and Sync
-unsafe impl<'a> Send for MS2BertModel<'a> {}
-unsafe impl<'a> Sync for MS2BertModel<'a> {}
+unsafe impl Send for MS2BertModel {}
+unsafe impl Sync for MS2BertModel {}
 
 // Code Model Implementation
-impl<'a> ModelInterface for MS2BertModel<'a> {
+impl ModelInterface for MS2BertModel {
     fn property_type(&self) -> PropertyType {
         PropertyType::MS2
     }
@@ -342,13 +342,13 @@ impl<'a> ModelInterface for MS2BertModel<'a> {
 }
 
 // // Module Trait Implementation
-// impl<'a> Module for MS2BertModel<'a> {
+// impl Module for MS2BertModel {
 //     fn forward(&self, input: &Tensor) -> Result<Tensor, candle_core::Error> {
 //         ModelInterface::forward(self, input)
 //     }
 // }
 
-impl<'a> fmt::Debug for MS2BertModel<'a> {
+impl fmt::Debug for MS2BertModel {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         writeln!(f, "MS2BertModel(")?;
         writeln!(f, "  (dropout): Dropout(p={})", 0.1)?;
diff --git a/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs b/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
index 49ca8ee..3d5abc9 100644
--- a/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
+++ b/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
@@ -25,8 +25,8 @@ use crate::utils::logging::Progress;
 
 #[derive(Clone)]
 /// Represents an AlphaPeptDeep CNN-LSTM Retention Time model.
-pub struct RTCNNLSTMModel<'a> {
-    var_store: VarBuilder<'a>,
+pub struct RTCNNLSTMModel {
+    var_store: VarBuilder<'static>,
     varmap: VarMap,
     constants: ModelConstants,
     device: Device,
@@ -38,12 +38,12 @@ pub struct RTCNNLSTMModel<'a> {
 }
 
 // Automatically implement Send and Sync if all fields are Send and Sync
-unsafe impl<'a> Send for RTCNNLSTMModel<'a> {}
-unsafe impl<'a> Sync for RTCNNLSTMModel<'a> {}
+unsafe impl Send for RTCNNLSTMModel {}
+unsafe impl Sync for RTCNNLSTMModel {}
 
 // Core Model Implementation
 
-impl<'a> ModelInterface for RTCNNLSTMModel<'a> {
+impl ModelInterface for RTCNNLSTMModel {
     fn property_type(&self) -> PropertyType {
         PropertyType::RT
     }
@@ -268,7 +268,7 @@ impl<'a> ModelInterface for RTCNNLSTMModel<'a> {
 
 // Module Trait Implementation
 
-// impl<'a> Module for RTCNNLSTMModel<'a> {
+// impl Module for RTCNNLSTMModel {
 //     fn forward(&self, input: &Tensor) -> Result<Tensor, candle_core::Error> {
 //         ModelInterface::forward(self, input)
 //     }

From 8be72ea96e433604eebba2da1c4a6bbc7aba619f Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Wed, 7 May 2025 11:43:35 -0400
Subject: [PATCH 04/75] refactor: Update model structs to use 'static lifetime
 for VarBuilder

---
 crates/redeem-properties/src/models/ccs_model.rs |  9 ++++++++-
 crates/redeem-properties/src/models/ms2_model.rs |  9 ++++++++-
 crates/redeem-properties/src/models/rt_model.rs  | 10 +++++++++-
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/crates/redeem-properties/src/models/ccs_model.rs b/crates/redeem-properties/src/models/ccs_model.rs
index 36d5e5f..6adbdbb 100644
--- a/crates/redeem-properties/src/models/ccs_model.rs
+++ b/crates/redeem-properties/src/models/ccs_model.rs
@@ -17,11 +17,18 @@ pub enum CCSModelArch {
 pub const CCSMODEL_ARCHS: &[&str] = &["ccs_cnn_lstm"];
 
 // A wrapper struct for CCS models
-#[derive(Clone)]
 pub struct CCSModelWrapper {
     model: Box<dyn ModelInterface + Send + Sync>,
 }
 
+impl Clone for CCSModelWrapper {
+    fn clone(&self) -> Self {
+        CCSModelWrapper {
+            model: self.model.clone(), 
+        }
+    }
+}
+
 impl CCSModelWrapper {
     pub fn new<P: AsRef<Path>>(model_path: P, constants_path: P, arch: &str, device: Device) -> Result<Self> {
         let model: Box<dyn ModelInterface> = match arch {
diff --git a/crates/redeem-properties/src/models/ms2_model.rs b/crates/redeem-properties/src/models/ms2_model.rs
index cf1979b..f4ed7e1 100644
--- a/crates/redeem-properties/src/models/ms2_model.rs
+++ b/crates/redeem-properties/src/models/ms2_model.rs
@@ -17,11 +17,18 @@ pub enum MS2ModelArch {
 pub const MS2MODEL_ARCHS: &[&str] = &["ms2_bert"];
 
 // A wrapper struct for MS2 models
-#[derive(Clone)]
 pub struct MS2ModelWrapper {
     model: Box<dyn ModelInterface + Send + Sync>,
 }
 
+impl Clone for MS2ModelWrapper {
+    fn clone(&self) -> Self {
+        MS2ModelWrapper {
+            model: self.model.clone(), 
+        }
+    }
+}
+
 impl MS2ModelWrapper {
     pub fn new<P: AsRef<Path>>(model_path: P, constants_path: P, arch: &str, device: Device) -> Result<Self> {
         let model: Box<dyn ModelInterface> = match arch {
diff --git a/crates/redeem-properties/src/models/rt_model.rs b/crates/redeem-properties/src/models/rt_model.rs
index 20086c9..d6cc501 100644
--- a/crates/redeem-properties/src/models/rt_model.rs
+++ b/crates/redeem-properties/src/models/rt_model.rs
@@ -19,11 +19,19 @@ pub enum RTModelArch {
 pub const RTMODEL_ARCHS: &[&str] = &["rt_cnn_lstm"];
 
 // A wrapper struct for RT models
-#[derive(Clone)]
 pub struct RTModelWrapper {
     model: Box<dyn ModelInterface + Send + Sync>,
 }
 
+impl Clone for RTModelWrapper {
+    fn clone(&self) -> Self {
+        RTModelWrapper {
+            model: self.model.clone(), // uses clone_box() behind the scenes
+        }
+    }
+}
+
+
 impl RTModelWrapper {
     pub fn new<P: AsRef<Path>>(model_path: P, constants_path: P, arch: &str, device: Device) -> Result<Self> {
         let model: Box<dyn ModelInterface> = match arch {

From ac5afe96d35a6fbce10c99e4ba06fb7477d944d2 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Wed, 7 May 2025 11:46:28 -0400
Subject: [PATCH 05/75] refactor: Update ModelClone trait to include Send and
 Sync bounds

---
 .../redeem-properties/src/models/model_interface.rs | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/crates/redeem-properties/src/models/model_interface.rs b/crates/redeem-properties/src/models/model_interface.rs
index 4d63f35..78fe0a3 100644
--- a/crates/redeem-properties/src/models/model_interface.rs
+++ b/crates/redeem-properties/src/models/model_interface.rs
@@ -177,25 +177,26 @@ pub fn create_var_map(
 
 
 pub trait ModelClone {
-    fn clone_box(&self) -> Box<dyn ModelInterface>;
+    fn clone_box(&self) -> Box<dyn ModelInterface + Send + Sync>;
 }
 
+
 impl<T> ModelClone for T
 where
-    T: 'static + ModelInterface + Clone,
+    T: 'static + ModelInterface + Clone + Send + Sync,
 {
-    fn clone_box(&self) -> Box<dyn ModelInterface> {
+    fn clone_box(&self) -> Box<dyn ModelInterface + Send + Sync> {
         Box::new(self.clone())
     }
 }
 
-impl Clone for Box<dyn ModelInterface> {
-    fn clone(&self) -> Box<dyn ModelInterface> {
+
+impl Clone for Box<dyn ModelInterface + Send + Sync> {
+    fn clone(&self) -> Self {
         self.clone_box()
     }
 }
 
-
 /// Represents an abstract deep learning model interface.
 /// 
 /// This trait defines the methods and properties that a deep learning model must implement to be used for property prediction tasks.

From f41eeb5bca12c7af17544793aef906e4577cfbd8 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Wed, 7 May 2025 11:51:55 -0400
Subject: [PATCH 06/75] refactor: Update DLModels struct to remove unnecessary
 Arc and Mutex wrappers for model fields

---
 crates/redeem-properties/src/models/model_interface.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/crates/redeem-properties/src/models/model_interface.rs b/crates/redeem-properties/src/models/model_interface.rs
index 78fe0a3..1149a23 100644
--- a/crates/redeem-properties/src/models/model_interface.rs
+++ b/crates/redeem-properties/src/models/model_interface.rs
@@ -689,13 +689,13 @@ pub struct DLModels {
     pub params: Option<Parameters>,
 
     /// Optional retention time prediction model.
-    pub rt_model: Option<Arc<Mutex<RTModelWrapper>>>,
+    pub rt_model: Option<RTModelWrapper>,
 
     /// Optional collision cross-section prediction model.
-    pub ccs_model: Option<Arc<Mutex<CCSModelWrapper>>>,
+    pub ccs_model: Option<CCSModelWrapper>,
 
     /// Optional MS2 intensity prediction model.
-    pub ms2_model: Option<Arc<Mutex<MS2ModelWrapper>>>,
+    pub ms2_model: Option<MS2ModelWrapper>,
 }
 
 impl DLModels {
@@ -710,7 +710,7 @@ impl DLModels {
     /// ```
     /// let mut models = DLModels::new();
     ///
-    /// models.rt_model = Some(Arc::new(Mutex::new(RTModelWrapper::new())));
+    /// models.rt_model = Some(RTModelWrapper::new());
     ///
     /// ```
     pub fn new() -> Self {
@@ -734,7 +734,7 @@ impl DLModels {
     /// let mut models = DLModels::new();
     /// assert!(!models.is_not_empty());
     ///
-    /// models.rt_model = Some(Arc::new(Mutex::new(RTModelWrapper::new())));
+    /// models.rt_model = Some(RTModelWrapper::new());
     /// assert!(models.is_not_empty());
     /// ```
     pub fn is_not_empty(&self) -> bool {

From 6eecf07301b0831aee506b928c41ea5dfa61449d Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Thu, 8 May 2025 10:41:04 -0400
Subject: [PATCH 07/75] refactor: Update peptide modification handling to
 support mass shifts and UniMod annotations

---
 .../src/utils/peptdeep_utils.rs               | 206 +++++++++++++++---
 1 file changed, 174 insertions(+), 32 deletions(-)

diff --git a/crates/redeem-properties/src/utils/peptdeep_utils.rs b/crates/redeem-properties/src/utils/peptdeep_utils.rs
index e3fbc73..2b2d6f1 100644
--- a/crates/redeem-properties/src/utils/peptdeep_utils.rs
+++ b/crates/redeem-properties/src/utils/peptdeep_utils.rs
@@ -184,48 +184,76 @@ pub fn load_mod_to_feature(constants: &ModelConstants) -> Result<HashMap<String,
 }
 
 
-// #[derive(Debug, Clone)]
+#[derive(Debug, Clone)]
 pub struct ModificationMap {
     pub name: String,
     pub amino_acid: Option<char>, // Optional if not applicable
+    pub unimod_id: Option<u32>
 }
 
 
+/// Loads a unified modification map where the key is either:
+/// - ("57.0215", Some('C')) for mass-based lookup
+/// - ("UniMod:4", Some('C')) for UniMod ID–based lookup
 pub fn load_modifications() -> Result<HashMap<(String, Option<char>), ModificationMap>> {
     let path: PathBuf = ensure_mod_tsv_exists().context("Failed to ensure TSV exists")?;
 
     let mut rdr = ReaderBuilder::new()
         .delimiter(b'\t')
-        .from_path(path).context("Failed to read TSV file")?;
+        .from_path(&path)
+        .context("Failed to read modification TSV file")?;
 
     let mut modifications = HashMap::new();
-    
+
     for result in rdr.records() {
         let record = result.context("Failed to read record")?;
         let mod_name = record.get(0).unwrap_or("").to_string();
         let unimod_mass: f64 = record.get(1).unwrap_or("0").parse().unwrap_or(0.0);
-        
-        // Convert mass to string with 4 decimal places
+        let unimod_id: Option<u32> = record.get(7).and_then(|s| s.parse().ok());
+
         let mass_key = format!("{:.4}", unimod_mass);
-        
-        // Extract amino acid from mod_name
+        let unimod_key = unimod_id.map(|id| format!("UniMod:{}", id));
+
         let amino_acid = mod_name.split('@').nth(1).and_then(|aa| aa.chars().next());
 
-        // Create Modification struct
         let modification = ModificationMap {
             name: mod_name,
             amino_acid,
+            unimod_id,
         };
 
-        // Insert into HashMap
-        modifications.insert((mass_key, amino_acid), modification);
+        // Insert mass-based key
+        modifications.insert((mass_key.clone(), amino_acid), modification.clone());
+
+        // Insert unimod-id based key if available
+        if let Some(key) = unimod_key {
+            modifications.insert((key, amino_acid), modification.clone());
+        }
     }
 
     Ok(modifications)
 }
 
+
+
+
+/// Removes mass shifts and UniMod annotations from a modified peptide sequence.
+///
+/// Supports both bracketed mass shifts (e.g., `[+57.0215]`) and UniMod-style
+/// annotations (e.g., `(UniMod:4)`).
+///
+/// # Example
+/// ```
+/// use easypqp_core::data_handling::remove_mass_shift;
+/// 
+/// let peptide = "MGC[+57.0215]AAR";
+/// assert_eq!(remove_mass_shift(peptide), "MGCAAR");
+/// let peptide = "MGC(UniMod:4)AAR";
+/// assert_eq!(remove_mass_shift(peptide), "MGCAAR");
+/// ```
 pub fn remove_mass_shift(peptide: &str) -> String {
-    let re = Regex::new(r"\[.*?\]").unwrap();
+    // Regex to remove either [mass shift] or (UniMod:x) patterns
+    let re = Regex::new(r"(\[.*?\]|\(UniMod:\d+\))").unwrap();
     re.replace_all(peptide, "").to_string()
 }
 
@@ -283,37 +311,151 @@ pub fn get_modification_indices(peptide: &str) -> String {
     indices.join(";")
 }
 
-pub fn get_modification_string(
-    peptide: &str,
-    modification_map: &HashMap<(String, Option<char>), ModificationMap>,
-) -> String {
-    let naked_peptide = remove_mass_shift(peptide);
 
-    let extracted_masses_and_indices = extract_masses_and_indices(&peptide.to_string());
 
-    let mut found_modifications = Vec::new();
+/// Extracts mass shift annotations (e.g., [+57.0215]) from a peptide string and returns them
+/// as a vector of (mass_string, position) where position is the index of the annotated amino acid.
+///
+/// # Example
+/// ```
+/// use redeem_properties::utils::peptdeep_utils::extract_mass_annotations;
+/// let result = extract_mass_annotations("AC[+57.0215]DE");
+/// assert_eq!(result, vec![("57.0215".to_string(), 2)]);
+/// ```
+pub fn extract_mass_annotations(peptide: &str) -> Vec<(String, usize)> {
+    let re_mass = Regex::new(r"\[([+-]?\d*\.?\d+)\]").unwrap();
+    let mut results = Vec::new();
+    let mut offset = 0;
+    let mut idx = 0;
+
+    while idx < peptide.len() {
+        if let Some(mat) = re_mass.find_at(peptide, idx) {
+            if mat.start() == idx {
+                let cap = re_mass.captures(&peptide[idx..mat.end()]).unwrap();
+                let mass_str = format!("{:.4}", cap[1].parse::<f64>().unwrap_or(0.0));
+                let pos = idx - offset;
+                results.push((mass_str, pos));
+                offset += mat.end() - mat.start();
+                idx = mat.end();
+                continue;
+            }
+        }
+        idx += peptide[idx..].chars().next().unwrap().len_utf8();
+    }
+
+    results
+}
 
-    // Map modifications based on extracted masses and indices
-    for (mass, index) in extracted_masses_and_indices {
-        // Subtract 1 from index to get 0-based index, ensure it's within bounds
-        let index = index.saturating_sub(1);
-        let amino_acid = naked_peptide.chars().nth(index).unwrap_or('\0');
 
-        if let Some(modification) = modification_map
-            .get(&(format!("{:.4}", mass), Some(amino_acid)))
-        {
-            found_modifications.push(modification.name.clone());
-        } else if let Some(modification) =
-            modification_map.get(&(format!("{:.4}", mass), None))
-        {
-            found_modifications.push(modification.name.clone());
+/// Extracts UniMod annotations (e.g., (UniMod:4)) from a peptide string and returns them
+/// as a vector of (unimod_id_string, position) where position is the index of the annotated amino acid.
+///
+/// # Example
+/// ```
+/// use redeem_properties::utils::peptdeep_utils::extract_unimod_annotations;
+/// let result = extract_unimod_annotations("AC(UniMod:4)DE");
+/// assert_eq!(result, vec![("UniMod:4".to_string(), 2)]);
+/// ```
+pub fn extract_unimod_annotations(peptide: &str) -> Vec<(String, usize)> {
+    let re_unimod = Regex::new(r"\(UniMod:(\d+)\)").unwrap();
+    let mut results = Vec::new();
+    let mut offset = 0;
+    let mut idx = 0;
+
+    while idx < peptide.len() {
+        if let Some(mat) = re_unimod.find_at(peptide, idx) {
+            if mat.start() == idx {
+                let cap = re_unimod.captures(&peptide[idx..mat.end()]).unwrap();
+                let unimod_str = format!("UniMod:{}", &cap[1]);
+                let pos = idx - offset;
+                results.push((unimod_str, pos));
+                offset += mat.end() - mat.start();
+                idx = mat.end();
+                continue;
+            }
         }
+        idx += peptide[idx..].chars().next().unwrap().len_utf8();
     }
 
-    found_modifications.join(";")
+    results
 }
 
 
+/// Attempts to look up a modification name from a map using the provided key and amino acid.
+/// Falls back to a key with `None` if the exact amino acid is not matched.
+///
+/// # Example
+/// ```
+/// use redeem_properties::utils::peptdeep_utils::{ModificationMap, lookup_modification};
+/// let mut map = std::collections::HashMap::new();
+/// map.insert(("57.0215".to_string(), Some('C')), ModificationMap { name: "Carbamidomethyl@C".to_string(), amino_acid: Some('C'), unimod_id: Some(4) });
+///
+/// let result = lookup_modification("57.0215".to_string(), 'C', &map);
+/// assert_eq!(result, Some("Carbamidomethyl@C".to_string()));
+/// ```
+pub fn lookup_modification(
+    key: String,
+    aa: char,
+    map: &HashMap<(String, Option<char>), ModificationMap>,
+) -> Option<String> {
+    map.get(&(key.clone(), Some(aa)))
+        .or_else(|| map.get(&(key, None)))
+        .map(|m| m.name.clone())
+}
+
+
+
+/// Generates a standardized modification string (e.g., "Carbamidomethyl@C")
+/// for a peptide sequence based on mass shifts (e.g., `[+57.0215]`) or
+/// UniMod annotations (e.g., `(UniMod:4)`), using a preloaded modification map.
+///
+/// The function supports both mass-shift format and UniMod notation,
+/// matching entries from the `modification_map` using mass or UniMod ID along
+/// with the local amino acid context.
+///
+/// # Arguments
+/// * `peptide` - A modified peptide sequence string (e.g., `"MGC[+57.0215]AAR"` or `"MGC(UniMod:4)AAR"`).
+/// * `modification_map` - A HashMap mapping (key, amino_acid) to `ModificationMap`.
+///   - For `[+mass]`, key is formatted as a mass string (e.g., `"57.0215"`).
+///   - For `(UniMod:ID)`, key is the UniMod ID as string (e.g., `"4"`).
+///
+/// # Returns
+/// A `String` containing semicolon-separated modification names (e.g., `"Carbamidomethyl@C"`).
+///
+/// # Example
+/// ```
+/// use std::collections::HashMap;
+/// use redeem_properties::utils::peptdeep_utils::{load_modifications, get_modification_string};
+///
+/// let mod_map = load_modifications().unwrap();
+/// let peptide1 = "MGC[+57.0215]AAR";
+/// let result1 = get_modification_string(peptide1, &mod_map);
+/// assert_eq!(result1, "Carbamidomethyl@C");
+///
+/// let peptide2 = "MGC(UniMod:4)AAR";
+/// let result2 = get_modification_string(peptide2, &mod_map);
+/// assert_eq!(result2, "Carbamidomethyl@C");
+/// ```
+pub fn get_modification_string(
+    peptide: &str,
+    modification_map: &HashMap<(String, Option<char>), ModificationMap>,
+) -> String {
+    let naked_peptide = remove_mass_shift(peptide);
+    let mut found_mods = Vec::new();
+
+    for (key, pos) in extract_mass_annotations(peptide)
+        .into_iter()
+        .chain(extract_unimod_annotations(peptide))
+    {
+        let aa = naked_peptide.chars().nth(pos.saturating_sub(1)).unwrap_or('\0');
+        if let Some(name) = lookup_modification(key, aa, modification_map) {
+            found_mods.push(name);
+        }
+    }
+
+    found_mods.join(";")
+}
+
 
 // TODO: Derive from PeptDep constants yaml
 const IM_GAS_MASS: f64 = 28.0; 

From 1c70ac628940efe2879b70206c3ffb8655e349f3 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Thu, 8 May 2025 15:09:36 -0400
Subject: [PATCH 08/75] refactor: peptide encoding

---
 .../src/building_blocks/featurize.rs          | 175 +++++++-
 .../src/models/model_interface.rs             | 419 +++++++++++-------
 .../src/models/rt_cnn_lstm_model.rs           |  75 +++-
 3 files changed, 516 insertions(+), 153 deletions(-)

diff --git a/crates/redeem-properties/src/building_blocks/featurize.rs b/crates/redeem-properties/src/building_blocks/featurize.rs
index 1161d84..b9beb72 100644
--- a/crates/redeem-properties/src/building_blocks/featurize.rs
+++ b/crates/redeem-properties/src/building_blocks/featurize.rs
@@ -1,13 +1,27 @@
 use anyhow::{Result, anyhow};
 use std::{collections::HashMap, ops::Deref};
 use ndarray::Array2;
-use candle_core::{Device, Tensor};
+use candle_core::{DType, Device, Tensor};
 
 use crate::building_blocks::building_blocks::AA_EMBEDDING_SIZE;
 
 /// Convert peptide sequences into AA ID array.
 /// 
 /// Based on https://github.com/MannLabs/alphapeptdeep/blob/450518a39a4cd7d03db391108ec8700b365dd436/peptdeep/model/featurize.py#L88
+/// 
+/// Example:
+/// ```rust
+/// use redeem_properties::building_blocks::featurize::get_aa_indices;
+/// use anyhow::Result;
+/// use ndarray::Array2;
+/// 
+/// let seq = "AGHCEWQMKYR";
+/// let result = get_aa_indices(seq).unwrap();
+/// println!("aa_indices: {:?}", result);
+/// let expect_out = Array2::from_shape_vec((1, 13), vec![0, 1, 7, 8, 3, 5, 23, 17, 13, 11, 25, 18, 0]).unwrap();
+/// assert_eq!(result.shape(), &[1, 13]);
+/// assert_eq!(result, expect_out);
+/// ```
 pub fn get_aa_indices(seq: &str) -> Result<Array2<i64>> {
     let valid_aa = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; // amino acids as defined in alphabase: https://github.com/MannLabs/alphabase/blob/main/alphabase/constants/const_files/amino_acid.tsv
     let filtered_seq: String = seq.chars().filter(|c| valid_aa.contains(*c)).collect();
@@ -110,4 +124,163 @@ pub fn get_mod_features(mods: &str, mod_sites: &str, seq_len: usize, mod_feature
 
     Tensor::from_slice(&mod_x, (1, seq_len, mod_feature_size), &device)
         .map_err(|e| anyhow!("Failed to create tensor: {}", e))
+}
+
+
+const VALID_AA: &str = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+
+/// Precomputes amino acid index map from characters A-Z
+fn aa_index_map() -> HashMap<char, i64> {
+    VALID_AA
+        .chars()
+        .enumerate()
+        .map(|(i, c)| (c, i as i64 + 1))
+        .collect()
+}
+
+/// Efficiently converts an amino acid sequence to a padded tensor of indices
+pub fn aa_indices_tensor(seq: &str, device: &Device) -> Result<Tensor> {
+    let map = aa_index_map();
+    let filtered: Vec<i64> = seq
+        .chars()
+        .filter_map(|c| map.get(&c).copied())
+        .collect();
+    let mut indices = vec![0i64]; // padding start
+    indices.extend(filtered);
+    indices.push(0); // padding end
+
+    Ok(Tensor::from_slice(&indices, (1, indices.len()), device)?.to_dtype(DType::F32)?.unsqueeze(2)?)
+}
+
+
+/// Optimized version of get_mod_features that avoids repeated parsing
+pub fn get_mod_features_from_parsed(
+    mod_names: &[&str],
+    mod_sites: &[usize],
+    seq_len: usize,
+    mod_feature_size: usize,
+    mod_to_feature: &HashMap<String, Vec<f32>>,
+    device: &Device,
+) -> Result<Tensor> {
+    let mut mod_x = vec![0.0f32; seq_len * mod_feature_size];
+
+    for (mod_name, &site) in mod_names.iter().zip(mod_sites.iter()) {
+        if site >= seq_len {
+            log::warn!("Skipping mod {} at invalid site {} (seq_len {})", mod_name, site, seq_len);
+            continue;
+        }
+        if let Some(feat) = mod_to_feature.get(*mod_name) {
+            for (i, &val) in feat.iter().enumerate() {
+                mod_x[site * mod_feature_size + i] += val;
+            }
+        } else {
+            log::warn!("Unknown modification feature: {}", mod_name);
+        }
+    }
+
+    Ok(Tensor::from_slice(&mod_x, (1, seq_len, mod_feature_size), device)
+        .map_err(|e| anyhow!("Failed to create tensor: {}", e))?)
+}
+
+
+#[cfg(test)]
+mod tests {
+ 
+    use crate::utils::peptdeep_utils::load_mod_to_feature;
+    use crate::utils::peptdeep_utils::parse_model_constants;
+    use crate::utils::peptdeep_utils::ModelConstants;
+
+    use super::*;
+    use candle_core::Device;
+    use candle_core::Tensor;
+    use ndarray::Array2;
+    use std::collections::HashMap;
+    use std::path::PathBuf;
+
+    #[test]
+    fn test_get_aa_indices() {
+        let seq = "AGHCEWQMKYR";
+        let result = get_aa_indices(seq).unwrap();
+        // expected result is [[0, 1, 7, 8, 3, 5, 23, 17, 13, 11, 25, 18, 0]]
+        let expect_out = Array2::from_shape_vec((1, 13), vec![0, 1, 7, 8, 3, 5, 23, 17, 13, 11, 25, 18, 0]).unwrap();
+        println!("{:?} - aa_indices: {:?}", seq, result);
+        assert_eq!(result.shape(), &[1, 13]);
+        assert_eq!(result, expect_out);
+    }
+
+    #[test]
+    fn test_aa_indices_tensor(){
+        let device = Device::Cpu;
+        let seq = "AGHCEWQMKYR";
+        let result = aa_indices_tensor(seq, &device).unwrap();
+        // expected result is [[0, 1, 7, 8, 3, 5, 23, 17, 13, 11, 25, 18, 0]]
+        let expect_out = Tensor::from_vec(vec!{0.0f32, 1.0f32, 7.0f32, 8.0f32, 3.0f32, 5.0f32, 23.0f32, 17.0f32, 13.0f32, 11.0f32, 25.0f32, 18.0f32, 0.0f32}, (1, 13), &device).unwrap();
+        println!("{:?} - aa_indices_tensor: {:?}", seq, result.to_vec3::<f32>().unwrap());
+        println!("result shape: {:?}", result.shape());
+        assert_eq!(result.shape().dims(), &[1, 13, 1]);
+        // assert_eq!(result.to_vec3::<f32>().unwrap(), expect_out.to_vec3::<f32>().unwrap());
+    }
+
+    #[test]
+    fn test_get_mod_features() {
+        let mods = "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M";
+        let mod_sites = "0;4;8";
+        let seq_len = 11 + 2;
+        let mod_feature_size = 109;
+
+        let constants_path =
+            PathBuf::from("data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
+        let constants: ModelConstants =
+            parse_model_constants(constants_path.to_str().unwrap()).unwrap();
+        let mod_to_feature: HashMap<String, Vec<f32>> = load_mod_to_feature(&constants).unwrap();
+
+        let device = Device::Cpu;
+        let tensor = get_mod_features(
+            mods,
+            mod_sites,
+            seq_len,
+            mod_feature_size,
+            mod_to_feature,
+            device,
+        ).unwrap();
+        println!("tensor shape: {:?}", tensor.shape());
+        assert_eq!(tensor.shape().dims(), &[1, seq_len, mod_feature_size]);
+    }
+
+    #[test]
+    fn test_get_mod_features_from_parsed() {
+        let mods_str = "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M";
+        let sites_str = "0;4;8";
+
+        // Manually parse and split
+        let mod_names: Vec<&str> = mods_str.split(';').filter(|s| !s.is_empty()).collect();
+        let mod_sites: Vec<usize> = sites_str
+            .split(';')
+            .filter(|s| !s.is_empty())
+            .map(|s| s.parse::<usize>().unwrap())
+            .collect();
+        let seq_len = 11 + 2;
+        let mod_feature_size = 109;
+
+        let constants_path =
+            PathBuf::from("data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
+        let constants: ModelConstants =
+            parse_model_constants(constants_path.to_str().unwrap()).unwrap();
+        let mod_to_feature: HashMap<String, Vec<f32>> = load_mod_to_feature(&constants).unwrap();
+
+        let device = Device::Cpu;
+        let tensor = get_mod_features_from_parsed(
+            &mod_names,
+            &mod_sites,
+            seq_len,
+            mod_feature_size,
+            &mod_to_feature,
+            &device,
+        ).unwrap();
+
+        println!("tensor shape: {:?}", tensor.shape());
+
+        assert_eq!(tensor.shape().dims(), &[1, seq_len, mod_feature_size]);
+
+    }
 }
\ No newline at end of file
diff --git a/crates/redeem-properties/src/models/model_interface.rs b/crates/redeem-properties/src/models/model_interface.rs
index 1149a23..73070b5 100644
--- a/crates/redeem-properties/src/models/model_interface.rs
+++ b/crates/redeem-properties/src/models/model_interface.rs
@@ -1,5 +1,5 @@
 use crate::{
-    building_blocks::featurize::{self, get_aa_indices, get_mod_features},
+    building_blocks::featurize::{self, aa_indices_tensor, get_aa_indices, get_mod_features, get_mod_features_from_parsed},
     models::{ccs_model::CCSModelWrapper, ms2_model::MS2ModelWrapper, rt_model::RTModelWrapper},
     utils::{
         data_handling::PeptideData,
@@ -19,6 +19,7 @@ use std::ops::{Index, IndexMut};
 use std::path::Path;
 use std::sync::{Arc, Mutex};
 use std::{collections::HashMap, path::PathBuf};
+use itertools::izip;
 
 // Constants
 const CHARGE_FACTOR: f64 = 0.1;
@@ -274,98 +275,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         }
     }
 
-    /// Encode a batch of peptide sequences (plus modifications) into a tensor.
-    /// 
-    /// # Arguments
-    /// * `peptide_sequences` - A vector of peptide sequences.
-    /// * `mods` - A vector of strings representing the modifications for each peptide.
-    /// * `mod_sites` - A vector of strings representing the modification site indices for each peptide.
-    /// * `charge` - An optional vector of charge states for each peptide.
-    /// * `nce` - An optional vector of nominal collision energies for each peptide.
-    /// * `instruments` - An optional vector of instrument names for each peptide.
-    /// 
-    /// # Returns
-    /// A tensor containing the encoded peptide sequences.
-    fn encode_peptides(
-        &self,
-        peptide_sequences: &[String],
-        mods: &[String],
-        mod_sites: &[String],
-        charges: Option<Vec<i32>>,
-        nces: Option<Vec<i32>>,
-        instruments: Option<Vec<String>>,
-    ) -> Result<Tensor> {
-        if peptide_sequences.len() != mods.len() || peptide_sequences.len() != mod_sites.len() {
-            return Err(anyhow::anyhow!(
-                "Mismatch in input lengths: peptide_sequences, mods, and mod_sites must have the same length."
-            ));
-        }
-
-        // Encode peptides in parallel using Rayon
-        let encoded_tensors: Vec<Tensor> = peptide_sequences
-            .par_iter() // Use Rayon's parallel iterator
-            .enumerate()
-            .map(|(i, peptide)| {
-                self.encode_peptide(
-                    peptide,
-                    &mods[i],
-                    &mod_sites[i],
-                    charges.as_ref().map(|c| c[i]),
-                    nces.as_ref().map(|n| n[i]),
-                    instruments.as_ref().map(|ins| ins[i].as_str()),
-                )
-            })
-            .collect::<Result<Vec<Tensor>>>()?; // Collect results and propagate errors if any
-
-        // Determine the maximum sequence length
-        let max_seq_len = encoded_tensors
-            .par_iter()
-            .map(|t| t.shape().dims3().unwrap().1) // Get sequence length (dimension 1)
-            .max()
-            .unwrap_or(0);
-
-        // Pad tensors to the max_seq_len
-        let padded_tensors: Result<Vec<Tensor>> = encoded_tensors
-            .into_par_iter() // Use Rayon's parallel iterator
-            .map(|t| {
-                let (_, seq_len, feature_size) = t.shape().dims3()?; // Extract feature dimension
-                if seq_len < max_seq_len {
-                    let pad_size = max_seq_len - seq_len;
-                    // Create a padding tensor with the correct shape and type
-                    let pad = Tensor::zeros(
-                        &[1, pad_size, feature_size], // Use the correct feature dimension
-                        t.dtype(),
-                        t.device(),
-                    )?;
-                    // Concatenate padding along sequence length
-                    Tensor::cat(&[&t, &pad], 1)
-                } else {
-                    Ok(t)
-                }
-            })
-            .collect::<Result<Vec<_>, _>>()
-            .map_err(Into::into);
-
-        let padded_tensors = padded_tensors?;
-
-        // Concatenate all padded tensors along the batch dimension
-        let batch_tensor = Tensor::cat(&padded_tensors, 0)?;
-
-        Ok(batch_tensor)
-    }
-
     /// Encode peptide sequence (plus modifications) into a tensor.
-    /// 
-    /// # Arguments
-    /// * `peptide_sequence` - The peptide sequence.
-    /// * `mods` - A string representing the modifications for the peptide.
-    /// * `mod_sites` - A string representing the modification site indices for the peptide.
-    /// * `charge` - An optional charge state for the peptide.
-    /// * `nce` - An optional nominal collision energy for the peptide.
-    /// * `instrument` - An optional instrument name for the peptide.
-    /// 
-    /// # Returns
-    /// A tensor containing the encoded peptide sequence.
     fn encode_peptide(
         &self,
         peptide_sequence: &str,
@@ -375,90 +285,297 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         nce: Option<i32>,
         instrument: Option<&str>,
     ) -> Result<Tensor> {
-        log::trace!(
-            "[ModelInterface::encode_peptide] Encoding peptide: {:?}, mods: {:?}, mod_sites: {:?}, charge: {:?}, nce: {:?}, instrument: {:?}",
-            peptide_sequence,
-            mods,
-            mod_sites,
-            charge,
-            nce,
-            instrument
-        );
-        let aa_indices = get_aa_indices(peptide_sequence)?;
-        log::trace!(
-            "[ModelInterface::encode_peptide] aa_indices_tensor shape: {:?}, min: {:?}, max: {:?}",
-            aa_indices.shape(),
-            aa_indices.iter().min(),
-            aa_indices.iter().max()
-        );
-
-        // Convert ndarray to Tensor (F32)
-        let aa_indices_tensor = Tensor::from_slice(
-            &aa_indices.as_slice().unwrap(),
-            (aa_indices.shape()[0], aa_indices.shape()[1]),
-            &self.get_device(),
-        )?
-        .to_dtype(DType::F32)?;
-
-        let (batch_size, seq_len) = aa_indices_tensor.shape().dims2()?;
-        let aa_indices_tensor = aa_indices_tensor.unsqueeze(2)?; // Shape: batch_size x seq_len x 1
-
-        log::trace!(
-            "[ModelInterface::encode_peptide] aa_indices_tensor shape: {:?}, min: {:?}, max: {:?}",
-            aa_indices_tensor.shape(),
-            aa_indices_tensor.min_all(),
-            aa_indices_tensor.max_all() 
-        );
-
-        // Get modification features
-        let mod_x = get_mod_features(
-            mods,
-            mod_sites,
+        let device = self.get_device();
+        let mod_feature_size = self.get_mod_element_count();
+        let mod_to_feature = self.get_mod_to_feature().clone();
+
+        let aa_tensor = aa_indices_tensor(peptide_sequence, &device)?;
+        let (batch_size, seq_len, _) = aa_tensor.shape().dims3()?;
+
+        let mod_names: Vec<&str> = mods.split(';').filter(|s| !s.is_empty()).collect();
+        let mod_indices: Vec<usize> = mod_sites
+            .split(';')
+            .filter(|s| !s.is_empty())
+            .map(|s| s.parse::<usize>().unwrap())
+            .collect();
+
+        let mod_tensor = get_mod_features_from_parsed(
+            &mod_names,
+            &mod_indices,
             seq_len,
-            self.get_mod_element_count(),
-            self.get_mod_to_feature().clone(),
-            self.get_device().clone(),
+            mod_feature_size,
+            &mod_to_feature,
+            &device,
         )?;
 
-        let mut features = vec![aa_indices_tensor, mod_x];
+        let mut features = vec![aa_tensor, mod_tensor];
 
-        // Conditionally add charge
         if let Some(c) = charge {
             let charge_tensor = Tensor::from_slice(
                 &vec![c as f64 * CHARGE_FACTOR; seq_len],
                 &[batch_size, seq_len, 1],
-                &self.get_device(),
-            )?
-            .to_dtype(DType::F32)?;
+                &device,
+            )?.to_dtype(DType::F32)?;
             features.push(charge_tensor);
         }
 
-        // Conditionally add NCE
         if let Some(n) = nce {
             let nce_tensor = Tensor::from_slice(
                 &vec![n as f64 * NCE_FACTOR; seq_len],
                 &[batch_size, seq_len, 1],
-                &self.get_device(),
-            )?
-            .to_dtype(DType::F32)?;
+                &device,
+            )?.to_dtype(DType::F32)?;
             features.push(nce_tensor);
         }
 
-        // Conditionally add instrument
         if let Some(instr) = instrument {
-            let instrument_tensor = Tensor::from_slice(
-                &vec![parse_instrument_index(instr) as u32; seq_len],
+            let instr_idx = parse_instrument_index(instr) as u32;
+            let instr_tensor = Tensor::from_slice(
+                &vec![instr_idx; seq_len],
                 &[batch_size, seq_len, 1],
-                &self.get_device(),
-            )?
-            .to_dtype(DType::F32)?;
-            features.push(instrument_tensor);
+                &device,
+            )?.to_dtype(DType::F32)?;
+            features.push(instr_tensor);
         }
 
-        // Concatenate features
         Ok(Tensor::cat(&features, 2)?)
     }
 
+    /// Encode a batch of peptide sequences into a tensor
+    fn encode_peptides(
+        &self,
+        peptide_sequences: &[String],
+        mods: &[String],
+        mod_sites: &[String],
+        charges: Option<Vec<i32>>,
+        nces: Option<Vec<i32>>,
+        instruments: Option<Vec<String>>,
+    ) -> Result<Tensor> {
+        let len = peptide_sequences.len();
+    
+        let tensors: Vec<_> = (0..len)
+            .into_par_iter()
+            .map(|i| {
+                self.encode_peptide(
+                    &peptide_sequences[i],
+                    &mods[i],
+                    &mod_sites[i],
+                    charges.as_ref().map(|v| v[i]),
+                    nces.as_ref().map(|v| v[i]),
+                    instruments.as_ref().map(|v| v[i].as_str()),
+                )
+            })
+            .collect::<std::result::Result<Vec<_>, _>>()?; // Propagate errors
+    
+        let max_len = tensors
+            .iter()
+            .map(|t| t.shape().dims3().unwrap().1)
+            .max()
+            .unwrap_or(0);
+    
+            let padded = tensors
+            .into_par_iter()
+            .map(|t| {
+                let (_, seq_len, feat_dim) = t.shape().dims3()?;
+                if seq_len < max_len {
+                    let pad = Tensor::zeros(&[1, max_len - seq_len, feat_dim], t.dtype(), t.device())?;
+                    Tensor::cat(&[&t, &pad], 1)
+                } else {
+                    Ok(t)
+                }
+            })
+            .map(|res| res.map_err(anyhow::Error::from)) 
+            .collect::<Result<Vec<_>, _>>()?;
+    
+        Ok(Tensor::cat(&padded, 0)?)
+    }
+    
+
+    // /// Encode a batch of peptide sequences (plus modifications) into a tensor.
+    // /// 
+    // /// # Arguments
+    // /// * `peptide_sequences` - A vector of peptide sequences.
+    // /// * `mods` - A vector of strings representing the modifications for each peptide.
+    // /// * `mod_sites` - A vector of strings representing the modification site indices for each peptide.
+    // /// * `charge` - An optional vector of charge states for each peptide.
+    // /// * `nce` - An optional vector of nominal collision energies for each peptide.
+    // /// * `instruments` - An optional vector of instrument names for each peptide.
+    // /// 
+    // /// # Returns
+    // /// A tensor containing the encoded peptide sequences.
+    // fn encode_peptides(
+    //     &self,
+    //     peptide_sequences: &[String],
+    //     mods: &[String],
+    //     mod_sites: &[String],
+    //     charges: Option<Vec<i32>>,
+    //     nces: Option<Vec<i32>>,
+    //     instruments: Option<Vec<String>>,
+    // ) -> Result<Tensor> {
+    //     if peptide_sequences.len() != mods.len() || peptide_sequences.len() != mod_sites.len() {
+    //         return Err(anyhow::anyhow!(
+    //             "Mismatch in input lengths: peptide_sequences, mods, and mod_sites must have the same length."
+    //         ));
+    //     }
+
+    //     // Encode peptides in parallel using Rayon
+    //     let encoded_tensors: Vec<Tensor> = peptide_sequences
+    //         .par_iter() // Use Rayon's parallel iterator
+    //         .enumerate()
+    //         .map(|(i, peptide)| {
+    //             self.encode_peptide(
+    //                 peptide,
+    //                 &mods[i],
+    //                 &mod_sites[i],
+    //                 charges.as_ref().map(|c| c[i]),
+    //                 nces.as_ref().map(|n| n[i]),
+    //                 instruments.as_ref().map(|ins| ins[i].as_str()),
+    //             )
+    //         })
+    //         .collect::<Result<Vec<Tensor>>>()?; // Collect results and propagate errors if any
+
+    //     // Determine the maximum sequence length
+    //     let max_seq_len = encoded_tensors
+    //         .par_iter()
+    //         .map(|t| t.shape().dims3().unwrap().1) // Get sequence length (dimension 1)
+    //         .max()
+    //         .unwrap_or(0);
+
+    //     // Pad tensors to the max_seq_len
+    //     let padded_tensors: Result<Vec<Tensor>> = encoded_tensors
+    //         .into_par_iter() // Use Rayon's parallel iterator
+    //         .map(|t| {
+    //             let (_, seq_len, feature_size) = t.shape().dims3()?; // Extract feature dimension
+    //             if seq_len < max_seq_len {
+    //                 let pad_size = max_seq_len - seq_len;
+    //                 // Create a padding tensor with the correct shape and type
+    //                 let pad = Tensor::zeros(
+    //                     &[1, pad_size, feature_size], // Use the correct feature dimension
+    //                     t.dtype(),
+    //                     t.device(),
+    //                 )?;
+    //                 // Concatenate padding along sequence length
+    //                 Tensor::cat(&[&t, &pad], 1)
+    //             } else {
+    //                 Ok(t)
+    //             }
+    //         })
+    //         .collect::<Result<Vec<_>, _>>()
+    //         .map_err(Into::into);
+
+    //     let padded_tensors = padded_tensors?;
+
+    //     // Concatenate all padded tensors along the batch dimension
+    //     let batch_tensor = Tensor::cat(&padded_tensors, 0)?;
+
+    //     Ok(batch_tensor)
+    // }
+
+    // /// Encode peptide sequence (plus modifications) into a tensor.
+    // /// 
+    // /// # Arguments
+    // /// * `peptide_sequence` - The peptide sequence.
+    // /// * `mods` - A string representing the modifications for the peptide.
+    // /// * `mod_sites` - A string representing the modification site indices for the peptide.
+    // /// * `charge` - An optional charge state for the peptide.
+    // /// * `nce` - An optional nominal collision energy for the peptide.
+    // /// * `instrument` - An optional instrument name for the peptide.
+    // /// 
+    // /// # Returns
+    // /// A tensor containing the encoded peptide sequence.
+    // fn encode_peptide(
+    //     &self,
+    //     peptide_sequence: &str,
+    //     mods: &str,
+    //     mod_sites: &str,
+    //     charge: Option<i32>,
+    //     nce: Option<i32>,
+    //     instrument: Option<&str>,
+    // ) -> Result<Tensor> {
+    //     log::trace!(
+    //         "[ModelInterface::encode_peptide] Encoding peptide: {:?}, mods: {:?}, mod_sites: {:?}, charge: {:?}, nce: {:?}, instrument: {:?}",
+    //         peptide_sequence,
+    //         mods,
+    //         mod_sites,
+    //         charge,
+    //         nce,
+    //         instrument
+    //     );
+    //     let aa_indices = get_aa_indices(peptide_sequence)?;
+    //     log::trace!(
+    //         "[ModelInterface::encode_peptide] aa_indices_tensor shape: {:?}, min: {:?}, max: {:?}",
+    //         aa_indices.shape(),
+    //         aa_indices.iter().min(),
+    //         aa_indices.iter().max()
+    //     );
+
+    //     // Convert ndarray to Tensor (F32)
+    //     let aa_indices_tensor = Tensor::from_slice(
+    //         &aa_indices.as_slice().unwrap(),
+    //         (aa_indices.shape()[0], aa_indices.shape()[1]),
+    //         &self.get_device(),
+    //     )?
+    //     .to_dtype(DType::F32)?;
+
+    //     let (batch_size, seq_len) = aa_indices_tensor.shape().dims2()?;
+    //     let aa_indices_tensor = aa_indices_tensor.unsqueeze(2)?; // Shape: batch_size x seq_len x 1
+
+    //     log::trace!(
+    //         "[ModelInterface::encode_peptide] aa_indices_tensor shape: {:?}, min: {:?}, max: {:?}",
+    //         aa_indices_tensor.shape(),
+    //         aa_indices_tensor.min_all(),
+    //         aa_indices_tensor.max_all() 
+    //     );
+
+    //     // Get modification features
+    //     let mod_x = get_mod_features(
+    //         mods,
+    //         mod_sites,
+    //         seq_len,
+    //         self.get_mod_element_count(),
+    //         self.get_mod_to_feature().clone(),
+    //         self.get_device().clone(),
+    //     )?;
+
+    //     let mut features = vec![aa_indices_tensor, mod_x];
+
+    //     // Conditionally add charge
+    //     if let Some(c) = charge {
+    //         let charge_tensor = Tensor::from_slice(
+    //             &vec![c as f64 * CHARGE_FACTOR; seq_len],
+    //             &[batch_size, seq_len, 1],
+    //             &self.get_device(),
+    //         )?
+    //         .to_dtype(DType::F32)?;
+    //         features.push(charge_tensor);
+    //     }
+
+    //     // Conditionally add NCE
+    //     if let Some(n) = nce {
+    //         let nce_tensor = Tensor::from_slice(
+    //             &vec![n as f64 * NCE_FACTOR; seq_len],
+    //             &[batch_size, seq_len, 1],
+    //             &self.get_device(),
+    //         )?
+    //         .to_dtype(DType::F32)?;
+    //         features.push(nce_tensor);
+    //     }
+
+    //     // Conditionally add instrument
+    //     if let Some(instr) = instrument {
+    //         let instrument_tensor = Tensor::from_slice(
+    //             &vec![parse_instrument_index(instr) as u32; seq_len],
+    //             &[batch_size, seq_len, 1],
+    //             &self.get_device(),
+    //         )?
+    //         .to_dtype(DType::F32)?;
+    //         features.push(instrument_tensor);
+    //     }
+
+    //     // Concatenate features
+    //     Ok(Tensor::cat(&features, 2)?)
+    // }
+
     /// Fine-tune the model on a batch of training data.
     /// 
     /// # Arguments
diff --git a/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs b/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
index 3d5abc9..51bfa11 100644
--- a/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
+++ b/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
@@ -308,13 +308,86 @@ mod tests {
         assert_eq!(constants.nce_factor, Some(0.01));
     }
 
+    #[test]
+    fn test_encode_peptides() {
+        let model_path = PathBuf::from("data/models/alphapeptdeep/generic/rt.pth");
+        let constants_path =
+            PathBuf::from("data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
+        let device = Device::Cpu;
+        let model = RTCNNLSTMModel::new(&model_path, &constants_path, 0, 8, 4, true, device).unwrap(); 
+
+        let peptide_sequences = "AGHCEWQMKYR";
+        let mods = "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M";
+        let mod_sites = "0;4;8";
+        // let charge = Some(2);
+        // let nce = Some(20);
+        // let instrument = Some("QE");
+
+        let result =
+            model.encode_peptide(&peptide_sequences, mods, mod_sites, None, None, None);
+
+        println!("{:?}", result);
+
+        // assert!(result.is_ok());
+        // let encoded_peptides = result.unwrap();
+        // assert_eq!(encoded_peptides.shape().dims2().unwrap(), (1, 27 + 109 + 1 + 1 + 1));
+    }
+
+    #[test]
+    fn test_encode_peptides_batch() {
+
+        let model_path = PathBuf::from("data/models/alphapeptdeep/generic/rt.pth");
+        let constants_path = PathBuf::from("data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
+        let device = Device::Cpu;
+
+        let model = RTCNNLSTMModel::new(&model_path, &constants_path, 0, 8, 4, true, device.clone()).unwrap();
+
+        // Batched input
+        let peptide_sequences = vec![
+            "ACDEFGHIK".to_string(),
+            "AGHCEWQMKYR".to_string(),
+        ];
+        let mods = vec![
+            "Carbamidomethyl@C".to_string(),
+            "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M".to_string(),
+        ];
+        let mod_sites = vec![
+            "1".to_string(),
+            "0;4;8".to_string(),
+        ];
+
+        println!("Peptides: {:?}", peptide_sequences);
+        println!("Mods: {:?}", mods);
+        println!("Mod sites: {:?}", mod_sites);
+
+
+        let result = model.encode_peptides(
+            &peptide_sequences,
+            &mods,
+            &mod_sites,
+            None,
+            None,
+            None,
+        );
+
+        assert!(result.is_ok());
+        let tensor = result.unwrap();
+        println!("Batched encoded tensor shape: {:?}", tensor.shape());
+
+        let (batch, seq_len, feat_dim) = tensor.shape().dims3().unwrap();
+        assert_eq!(batch, 2); // two peptides
+        assert!(seq_len >= 11); // padded to max length
+        assert!(feat_dim > 1); // includes aa + mod features
+    }
+
+
     #[test]
     fn test_prediction() {
         let model_path = PathBuf::from("data/models/alphapeptdeep/generic/rt.pth");
         let constants_path =
             PathBuf::from("data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
         let device = /* Assuming Device is defined */ Device::new_cuda(0).unwrap_or(/* assuming Device::Cpu is defined */ Device::Cpu); // Replace with actual Device code.
-        let result = /* Assuming RTCNNLSTMModel is defined */ RTCNNLSTMModel::new(&model_path, &constants_path, 0, 8, 4, true, device); // Replace with actual RTCNNLSTMModel code
+        let result =  RTCNNLSTMModel::new(&model_path, &constants_path, 0, 8, 4, true, device); 
         let mut model = result.unwrap();
     
         // Test prediction with a few peptides after fine-tuning

From 1086bd6e16b82cd476160740c150715edb096d95 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Thu, 8 May 2025 15:11:31 -0400
Subject: [PATCH 09/75] chore: Update dependencies in redeem-properties crate

---
 crates/redeem-properties/Cargo.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crates/redeem-properties/Cargo.toml b/crates/redeem-properties/Cargo.toml
index 304a370..e39b7c5 100644
--- a/crates/redeem-properties/Cargo.toml
+++ b/crates/redeem-properties/Cargo.toml
@@ -17,6 +17,7 @@ serde_yaml = "0.9"
 ndarray = "0.15"
 #ndarray = "0.16.1"
 reqwest = { version = "0.11", features = ["blocking"] }
+itertools = "0.14.0"
 zip = "2.2.2"
 csv = "1.1"
 regex = "1.6"

From c13dabd933ba271894ca63977f3b55bbf93ecd0b Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Fri, 9 May 2025 00:40:02 -0400
Subject: [PATCH 10/75] refactor: bilstm

---
 .../src/building_blocks/bilstm.rs             | 242 ++++++------------
 1 file changed, 77 insertions(+), 165 deletions(-)

diff --git a/crates/redeem-properties/src/building_blocks/bilstm.rs b/crates/redeem-properties/src/building_blocks/bilstm.rs
index 0e04749..b6dd1aa 100644
--- a/crates/redeem-properties/src/building_blocks/bilstm.rs
+++ b/crates/redeem-properties/src/building_blocks/bilstm.rs
@@ -1,6 +1,6 @@
-use candle_core::{DType, Device, Result, Tensor};
+use candle_core::{IndexOp, Result, Tensor};
 use candle_nn::{rnn, Module, VarBuilder, RNN};
-// use crate::utils::logging::print_tensor;
+
 
 #[derive(Debug, Clone)]
 pub struct BidirectionalLSTM {
@@ -16,72 +16,26 @@ pub struct BidirectionalLSTM {
 }
 
 impl BidirectionalLSTM {
-
     pub fn new(
         input_size: usize,
         hidden_size: usize,
         num_layers: usize,
         vb: &VarBuilder,
     ) -> Result<Self> {
-
         let h0 = vb.get((num_layers * 2, 1, hidden_size), "rnn_h0")?;
         let c0 = vb.get((num_layers * 2, 1, hidden_size), "rnn_c0")?;
 
-        let lstm_config = rnn::LSTMConfig {
-            layer_idx: 0,
-            direction: rnn::Direction::Forward,
-            ..Default::default()
-        };
-
-        let lstm_config_rev = rnn::LSTMConfig {
-            layer_idx: 0,
-            direction: rnn::Direction::Backward,
-            ..Default::default()
-        };
-
-        let forward_lstm1 = rnn::lstm(
-            input_size, 
-            hidden_size, 
-            lstm_config.clone(), 
-            vb.pp("rnn").clone()
-        )?;
-        let backward_lstm1 = rnn::lstm(
-            input_size, 
-            hidden_size, 
-            lstm_config_rev.clone(), 
-            vb.pp("rnn").clone()
-        )?;
-        
-        let lstm_config2 = rnn::LSTMConfig {
-            layer_idx: 1,
-            direction: rnn::Direction::Forward,
-            ..Default::default()
-        };
-
-        let lstm_config2_rev = rnn::LSTMConfig {
-            layer_idx: 1,
-            direction: rnn::Direction::Backward,
-            ..Default::default()
-        };
-
-        let forward_lstm2 = rnn::lstm(
-            2 * hidden_size, 
-            hidden_size, 
-            lstm_config2.clone(), 
-            vb.pp("rnn").clone()
-        )?;
-        let backward_lstm2 = rnn::lstm(
-            2 * hidden_size, 
-            hidden_size, 
-            lstm_config2_rev.clone(), 
-            vb.pp("rnn").clone()
-        )?;
+        let lstm1_fw = rnn::lstm(input_size, hidden_size, rnn::LSTMConfig::default(), vb.pp("rnn"))?;
+        let lstm1_bw = rnn::lstm(input_size, hidden_size, rnn::LSTMConfig { direction: rnn::Direction::Backward, ..Default::default() }, vb.pp("rnn"))?;
+
+        let lstm2_fw = rnn::lstm(2 * hidden_size, hidden_size, rnn::LSTMConfig { layer_idx: 1, ..Default::default() }, vb.pp("rnn"))?;
+        let lstm2_bw = rnn::lstm(2 * hidden_size, hidden_size, rnn::LSTMConfig { layer_idx: 1, direction: rnn::Direction::Backward, ..Default::default() }, vb.pp("rnn"))?;
 
         Ok(Self {
-            forward_lstm1,
-            backward_lstm1,
-            forward_lstm2,
-            backward_lstm2,
+            forward_lstm1: lstm1_fw,
+            backward_lstm1: lstm1_bw,
+            forward_lstm2: lstm2_fw,
+            backward_lstm2: lstm2_bw,
             h0,
             c0,
             input_size,
@@ -90,125 +44,88 @@ impl BidirectionalLSTM {
         })
     }
 
-
-    fn apply_bidirectional_layer(&self, input: &Tensor, lstm_forward: &rnn::LSTM, lstm_backward: &rnn::LSTM, h0: &Tensor, c0: &Tensor, layer_idx: &i32) -> Result<(Tensor, (Tensor, Tensor))> {
-        let (batch_size, seq_len, input_size) = input.dims3()?;
+    fn apply_bidirectional_layer(
+        &self,
+        input: &Tensor,
+        lstm_forward: &rnn::LSTM,
+        lstm_backward: &rnn::LSTM,
+        h0: &Tensor,
+        c0: &Tensor,
+    ) -> Result<(Tensor, (Tensor, Tensor))> {
+        let (_batch_size, seq_len, _input_size) = input.dims3()?;
     
-        // Print first and last 5 values of the original input
-        let input_vec = input.to_vec3::<f32>()?;
+        // Initial states for forward
+        let h0_forward = h0.i(0)?;
+        let c0_forward = c0.i(0)?;
+        let state_fw = rnn::LSTMState { h: h0_forward, c: c0_forward };
     
-        // Forward pass
-        let h0_forward = h0.narrow(0, 0, 1)?.reshape((batch_size, h0.dim(2)?))?;
-        let c0_forward = c0.narrow(0, 0, 1)?.reshape((batch_size, c0.dim(2)?))?;
-        
-        let state_forward = rnn::LSTMState{ h: h0_forward.clone(), c: c0_forward.clone() };
-
-        let output_forward_states: Vec<rnn::LSTMState> = lstm_forward.seq_init(&input, &state_forward)?;
-        let output_forward = Tensor::stack(&output_forward_states.iter().map(|state| state.h().clone()).collect::<Vec<_>>(), 1)?;
-        let last_forward_state = output_forward_states.last().unwrap().h().clone();
-    
-        // Backward pass
-        let h0_backward = h0.narrow(0, 1, 1)?.reshape((batch_size, h0.dim(2)?))?;
-        let c0_backward = c0.narrow(0, 1, 1)?.reshape((batch_size, c0.dim(2)?))?;
-
-        let state_backward = rnn::LSTMState{ h: h0_backward.clone(), c: c0_backward.clone() };
-    
-        // Correctly reverse the input sequence
-        let mut reversed_input = vec![vec![vec![0.0; input_size]; seq_len]; batch_size];
-        for b in 0..batch_size {
-            for t in 0..seq_len {
-                for i in 0..input_size {
-                    reversed_input[b][seq_len - t - 1][i] = input_vec[b][t][i];
-                }
-            }
-        }
-        let input_reversed = Tensor::new(reversed_input, input.device())?
-            .to_dtype(DType::F32)?
-            .reshape((batch_size, seq_len, input_size))?;
-
-        // Print first and last 5 values of the reversed input
-        let reversed_input_vec = input_reversed.to_vec3::<f32>()?;
-
+        let start_time = std::time::Instant::now();
+        let out_fw_states = lstm_forward.seq_init(input, &state_fw)?;
+        let out_fw = Tensor::stack(
+            &out_fw_states.iter().map(|s| s.h()).collect::<Vec<_>>(),
+            1,
+        )?;
+        let last_fw_h = out_fw_states.last().unwrap().h().clone();
+        let last_fw_c = out_fw_states.last().unwrap().c().clone();
+        println!("BidirectionLSTM::apply_bidirectional_layer - Forward LSTM time: {:?}", start_time.elapsed());
     
-        let output_backward_states = lstm_backward.seq_init(&input_reversed, &state_backward)?;
-        let output_backward = Tensor::stack(&output_backward_states.iter().map(|state| state.h().clone()).collect::<Vec<_>>(), 1)?;
-        
-        // Use the last state of the backward LSTM (which corresponds to the first element of the original sequence)
-        let last_backward_state = output_backward_states.last().unwrap().h().clone();
+        // Reverse sequence
+        let start_time = std::time::Instant::now();
+        let input_reversed = Tensor::cat(
+            &(0..seq_len)
+                .rev()
+                .map(|t| input.i((.., t..=t, ..)))
+                .collect::<Result<Vec<_>>>()?,
+            1,
+        )?;
+        println!("BidirectionLSTM::apply_bidirectional_layer - Reverse sequence time: {:?}", start_time.elapsed());
+            
+        // Initial states for backward
+        let h0_backward = h0.i(1)?;
+        let c0_backward = c0.i(1)?;
+        let state_bw = rnn::LSTMState { h: h0_backward, c: c0_backward };
     
-        // Combine the forward and backward hidden states for hn
-        let hn = Tensor::cat(&[last_forward_state.unsqueeze(0)?, last_backward_state.unsqueeze(0)?], 0)?; // Shape: [2, 1, 128]
-        let hn_concat = Tensor::cat(&[last_forward_state, last_backward_state], 1)?; // Shape: [1, 256]
-
-        // Combine the forward and backwards cell states for cn
-        let cn = Tensor::cat(&[output_forward_states.last().unwrap().c().clone(), output_backward_states.last().unwrap().c().clone()], 0)?; // Shape: [2, 1, 128]
+        let start_time = std::time::Instant::now();
+        let out_bw_states = lstm_backward.seq_init(&input_reversed, &state_bw)?;
+        let out_bw = Tensor::stack(
+            &out_bw_states.iter().map(|s| s.h()).collect::<Vec<_>>(),
+            1,
+        )?;
+        let last_bw_h = out_bw_states.last().unwrap().h().clone();
+        let last_bw_c = out_bw_states.last().unwrap().c().clone();
+        println!("BidirectionLSTM::apply_bidirectional_layer - Backward LSTM time: {:?}", start_time.elapsed());
     
-        // The output_backward is already in the correct order for the original sequence
-        let output = Tensor::cat(&[output_forward, output_backward], 2)?; // Shape: [1, 13, 256]
+        // Combine hidden and cell states
+        let hn = Tensor::stack(&[last_fw_h.clone(), last_bw_h.clone()], 0)?;
+        let cn = Tensor::stack(&[last_fw_c, last_bw_c], 0)?;
+        let output = Tensor::cat(&[out_fw, out_bw], 2)?;
     
         Ok((output, (hn, cn)))
     }
     
     
-    // New method that returns output and states
-    pub fn forward_with_state(&self, xs: &Tensor) -> Result<(Tensor, (Tensor, Tensor))> {
-        let (batch_size, seq_len, input_size) = xs.dims3()?;
 
-        let h0 = &self.h0.expand((self.num_layers * 2, batch_size, self.hidden_size))?;
-        let c0 = &self.c0.expand((self.num_layers * 2, batch_size, self.hidden_size))?;
+    /// Forward with hidden states returned
+    pub fn forward_with_state(&self, xs: &Tensor) -> Result<(Tensor, (Tensor, Tensor))> {
+        let (batch_size, _, _) = xs.dims3()?;
+        let h0 = self.h0.expand((self.num_layers * 2, batch_size, self.hidden_size))?;
+        let c0 = self.c0.expand((self.num_layers * 2, batch_size, self.hidden_size))?;
 
         let h0_1 = h0.narrow(0, 0, 2)?;
-        let h0_2 = h0.narrow(0, 2, 2)?;
         let c0_1 = c0.narrow(0, 0, 2)?;
+        let h0_2 = h0.narrow(0, 2, 2)?;
         let c0_2 = c0.narrow(0, 2, 2)?;
 
-        let (layer1_output, (hn1, cn1)) = self.apply_bidirectional_layer(xs, &self.forward_lstm1, &self.backward_lstm1, &h0_1, &c0_1, &1)?;
-        let (layer2_output, (hn2, cn2)) = self.apply_bidirectional_layer(&layer1_output, &self.forward_lstm2, &self.backward_lstm2, &h0_2, &c0_2, &2)?;
+        let start_time = std::time::Instant::now();
+        let (out1, (hn1, cn1)) = self.apply_bidirectional_layer(xs, &self.forward_lstm1, &self.backward_lstm1, &h0_1, &c0_1)?;
+        println!("BidirectionLSTM::forward_with_state - Layer 1 time: {:?}", start_time.elapsed());
+        let start_time = std::time::Instant::now();
+        let (out2, (hn2, cn2)) = self.apply_bidirectional_layer(&out1, &self.forward_lstm2, &self.backward_lstm2, &h0_2, &c0_2)?;
+        println!("BidirectionLSTM::forward_with_state - Layer 2 time: {:?}", start_time.elapsed());
 
-        let final_hn = Tensor::cat(&[hn1, hn2], 0)?;
-        let final_cn = Tensor::cat(&[cn1, cn2], 0)?;
-
-        Ok((layer2_output, (final_hn, final_cn)))
-    }
-    
-
-    /// Print the weights of the BiLSTM
-    pub fn print_weights(&self, vb: &VarBuilder) -> Result<()> {
-        fn print_first_few(tensor: &Tensor, name: &str) -> Result<()> {
-            let flattened = tensor.flatten_all()?;
-            let num_elements = flattened.dim(0)?;
-            let num_to_print = 5.min(num_elements);
-            println!("{} shape: {:?}", name, tensor.shape());
-            println!("{} (first few values): {:?}", name, flattened.narrow(0, 0, num_to_print)?.to_vec1::<f32>()?);
-            Ok(())
-        }
-
-        fn print_lstm_weights(vb: &VarBuilder, layer: usize, direction: &str) -> Result<()> {
-            let prefix = format!("rt_encoder.hidden_nn.rnn.weight_");
-            let ih_name = format!("{}ih_l{}{}", prefix, layer, direction);
-            let hh_name = format!("{}hh_l{}{}", prefix, layer, direction);
-            
-            // println!("LSTM layer {} {} weights:", layer, direction);
-            if layer == 1{
-                print_first_few(&vb.get((512, 256), &ih_name)?, &format!("  {}", ih_name))?;
-            } else {
-                print_first_few(&vb.get((512, 140), &ih_name)?, &format!("  {}", ih_name))?;
-            }
-            
-            print_first_few(&vb.get((512, 128), &hh_name)?, &format!("  {}", hh_name))?;
-            
-            Ok(())
-        }
-
-        // Print forward LSTM weights
-        print_lstm_weights(vb, 0, "")?;
-        print_lstm_weights(vb, 1, "")?;
-
-        // Print backward LSTM weights
-        print_lstm_weights(vb, 0, "_reverse")?;
-        print_lstm_weights(vb, 1, "_reverse")?;
-
-        Ok(())
+        let hn = Tensor::cat(&[hn1, hn2], 0)?;
+        let cn = Tensor::cat(&[cn1, cn2], 0)?;
+        Ok((out2, (hn, cn)))
     }
 
     pub fn input_size(&self) -> usize {
@@ -222,21 +139,16 @@ impl BidirectionalLSTM {
     pub fn num_layers(&self) -> usize {
         self.num_layers
     }
-
 }
 
-
 impl Module for BidirectionalLSTM {
-
-    /// Forward pass of the BiLSTM
     fn forward(&self, xs: &Tensor) -> Result<Tensor> {
-        // This method now only returns the output tensor
         let (output, _) = self.forward_with_state(xs)?;
         Ok(output)
     }
-
 }
 
+
 #[cfg(test)]
 mod test {
     use super::*;

From e0b19c60e7179d6883f9881c899d2d5ec8caeadf Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Fri, 9 May 2025 00:44:08 -0400
Subject: [PATCH 11/75] refactor: Optimize peptide sequence featurization and
 one-hot encoding

---
 .../src/building_blocks/featurize.rs          | 259 ++++++------------
 1 file changed, 81 insertions(+), 178 deletions(-)

diff --git a/crates/redeem-properties/src/building_blocks/featurize.rs b/crates/redeem-properties/src/building_blocks/featurize.rs
index b9beb72..2272612 100644
--- a/crates/redeem-properties/src/building_blocks/featurize.rs
+++ b/crates/redeem-properties/src/building_blocks/featurize.rs
@@ -1,131 +1,11 @@
 use anyhow::{Result, anyhow};
-use std::{collections::HashMap, ops::Deref};
-use ndarray::Array2;
+use std::collections::HashMap;
 use candle_core::{DType, Device, Tensor};
+use rayon::prelude::*;
+use std::sync::atomic::{AtomicU32, Ordering};
 
 use crate::building_blocks::building_blocks::AA_EMBEDDING_SIZE;
 
-/// Convert peptide sequences into AA ID array.
-/// 
-/// Based on https://github.com/MannLabs/alphapeptdeep/blob/450518a39a4cd7d03db391108ec8700b365dd436/peptdeep/model/featurize.py#L88
-/// 
-/// Example:
-/// ```rust
-/// use redeem_properties::building_blocks::featurize::get_aa_indices;
-/// use anyhow::Result;
-/// use ndarray::Array2;
-/// 
-/// let seq = "AGHCEWQMKYR";
-/// let result = get_aa_indices(seq).unwrap();
-/// println!("aa_indices: {:?}", result);
-/// let expect_out = Array2::from_shape_vec((1, 13), vec![0, 1, 7, 8, 3, 5, 23, 17, 13, 11, 25, 18, 0]).unwrap();
-/// assert_eq!(result.shape(), &[1, 13]);
-/// assert_eq!(result, expect_out);
-/// ```
-pub fn get_aa_indices(seq: &str) -> Result<Array2<i64>> {
-    let valid_aa = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; // amino acids as defined in alphabase: https://github.com/MannLabs/alphabase/blob/main/alphabase/constants/const_files/amino_acid.tsv
-    let filtered_seq: String = seq.chars().filter(|c| valid_aa.contains(*c)).collect();
-
-    // TODO: Maybe this should be done higher up in the pipeline, and this should panic here instead.
-    // But for now this is done to deal with cases like: -MQPLSKL
-    if seq.len() != filtered_seq.len() {
-        log::trace!("Invalid amino acid characters found in sequence: {:?}, stripping them out to {:?}", seq, filtered_seq);
-    }
-
-    let seq_len = filtered_seq.len();
-    let mut result = Array2::<i64>::zeros((1, seq_len + 2));
-
-    for (j, c) in filtered_seq.chars().enumerate() {
-        let aa_index = (c as i64) - ('A' as i64) + 1;
-        result[[0, j + 1]] = aa_index;
-    }
-
-    Ok(result)
-}
-
-/// Convert peptide sequences into ASCII code array.
-///
-/// Based on https://github.com/MannLabs/alphapeptdeep/blob/450518a39a4cd7d03db391108ec8700b365dd436/peptdeep/model/featurize.py#L115
-pub fn get_ascii_indices(peptide_sequences: &[String], device: Device) -> Result<Tensor> {
-    // println!("Peptide sequences to encode: {:?}", peptide_sequences);
-    let max_len = peptide_sequences.iter().map(|s| s.len()).max().unwrap_or(0) + 2; // +2 for padding
-    let batch_size = peptide_sequences.len();
-
-    let mut aa_indices = vec![0u32; batch_size * max_len];
-
-    for (i, peptide) in peptide_sequences.iter().enumerate() {
-        for (j, c) in peptide.chars().enumerate() {
-            aa_indices[i * max_len + j + 1] = c as u32; // +1 to skip the first padding
-        }
-    }
-    let aa_indices_tensor =
-        Tensor::from_slice(&aa_indices, (batch_size, max_len), &device)?;
-    Ok(aa_indices_tensor)
-}
-
-/// One-hot encode amino acid indices and concatenate additional tensors.
-pub fn aa_one_hot(aa_indices: &Tensor, cat_others: &[&Tensor]) -> Result<Tensor> {
-    let (batch_size, seq_len) = aa_indices.shape().dims2()?;
-    let num_classes = AA_EMBEDDING_SIZE;
-
-    let mut one_hot_data = vec![0.0f32; batch_size * seq_len * num_classes];
-
-    // Iterate over the 2D tensor directly
-    for batch_idx in 0..batch_size {
-        for seq_idx in 0..seq_len {
-            let index = aa_indices.get(batch_idx)?.get(seq_idx)?.to_scalar::<f32>()?;
-            let class_idx = index.round() as usize; // Round to nearest integer and convert to usize
-            if class_idx < num_classes {
-                one_hot_data[batch_idx * seq_len * num_classes + seq_idx * num_classes + class_idx] = 1.0;
-            }
-        }
-    }
-
-    // Convert the one_hot_data vector directly to a tensor
-    let one_hot_tensor = Tensor::from_slice(&one_hot_data, (batch_size, seq_len, num_classes), aa_indices.device())
-        .map_err(|e| anyhow!("{}", e))?;
-
-    // Concatenate additional tensors if provided
-    let mut output_tensor = one_hot_tensor;
-
-    for other in cat_others {
-        output_tensor = Tensor::cat(&[output_tensor, other.deref().clone()], 2)?;
-    }
-
-    Ok(output_tensor)
-}
-
-
-/// Get the modification features for a given set of modifications and modification sites.
-/// 
-/// Based on https://github.com/MannLabs/alphapeptdeep/blob/450518a39a4cd7d03db391108ec8700b365dd436/peptdeep/model/featurize.py#L47
-pub fn get_mod_features(mods: &str, mod_sites: &str, seq_len: usize, mod_feature_size: usize, mod_to_feature: HashMap<String, Vec<f32>>, device: Device) -> Result<Tensor> {
-    let mod_names: Vec<&str> = mods.split(';').filter(|&s| !s.is_empty()).collect();
-    let mod_sites: Vec<usize> = mod_sites
-        .split(';')
-        .filter(|&s| !s.is_empty())
-        .map(|s| s.parse::<usize>().unwrap())
-        .collect();
-
-    // let mod_feature_size = self.constants.mod_elements.len();
-
-    let mut mod_x = vec![0.0f32; seq_len * mod_feature_size];
-
-    for (mod_name, &site) in mod_names.iter().zip(mod_sites.iter()) {
-        if let Some(feat) = mod_to_feature.get(*mod_name) {
-            for (i, &value) in feat.iter().enumerate() {
-                if site < seq_len {
-                    mod_x[site * mod_feature_size + i] += value;
-                }
-            }
-            // println!("Site: {}, feat: {:?}", site, feat);
-        }
-    }
-
-    Tensor::from_slice(&mod_x, (1, seq_len, mod_feature_size), &device)
-        .map_err(|e| anyhow!("Failed to create tensor: {}", e))
-}
-
 
 const VALID_AA: &str = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
 
@@ -138,7 +18,10 @@ fn aa_index_map() -> HashMap<char, i64> {
         .collect()
 }
 
-/// Efficiently converts an amino acid sequence to a padded tensor of indices
+
+/// Convert peptide sequences into AA ID array.
+/// 
+/// Based on https://github.com/MannLabs/alphapeptdeep/blob/450518a39a4cd7d03db391108ec8700b365dd436/peptdeep/model/featurize.py#L88
 pub fn aa_indices_tensor(seq: &str, device: &Device) -> Result<Tensor> {
     let map = aa_index_map();
     let filtered: Vec<i64> = seq
@@ -153,7 +36,48 @@ pub fn aa_indices_tensor(seq: &str, device: &Device) -> Result<Tensor> {
 }
 
 
-/// Optimized version of get_mod_features that avoids repeated parsing
+/// One-hot encode amino acid indices and concatenate additional tensors.
+pub fn aa_one_hot(aa_indices: &Tensor, cat_others: &[&Tensor]) -> Result<Tensor> {
+    let (batch_size, seq_len) = aa_indices.shape().dims2()?;
+    let num_classes = AA_EMBEDDING_SIZE;
+
+    // Extract all indices as f32s once
+    let indices = aa_indices.to_vec2::<f32>()?;
+
+    // Preallocate output buffer
+    let mut one_hot_data = vec![0.0f32; batch_size * seq_len * num_classes];
+
+    // Use parallel iterator for speed
+    one_hot_data
+        .par_chunks_mut(seq_len * num_classes)
+        .zip(indices.par_iter())
+        .for_each(|(chunk, row)| {
+            for (seq_idx, &fidx) in row.iter().enumerate() {
+                let class_idx = fidx.round() as usize;
+                if class_idx < num_classes {
+                    chunk[seq_idx * num_classes + class_idx] = 1.0;
+                }
+            }
+        });
+
+    let one_hot_tensor = Tensor::from_slice(&one_hot_data, (batch_size, seq_len, num_classes), aa_indices.device())
+        .map_err(|e| anyhow!("Failed to create one-hot tensor: {}", e))?;
+
+    // Concatenate with additional tensors
+    if cat_others.is_empty() {
+        Ok(one_hot_tensor)
+    } else {
+        let mut features = vec![one_hot_tensor];
+        features.extend(cat_others.iter().cloned().cloned());
+        Ok(Tensor::cat(&features, 2)?)
+    }
+}
+
+
+
+/// Get the modification features for a given set of modifications and modification sites.
+/// 
+/// Based on https://github.com/MannLabs/alphapeptdeep/blob/450518a39a4cd7d03db391108ec8700b365dd436/peptdeep/model/featurize.py#L47
 pub fn get_mod_features_from_parsed(
     mod_names: &[&str],
     mod_sites: &[usize],
@@ -162,24 +86,41 @@ pub fn get_mod_features_from_parsed(
     mod_to_feature: &HashMap<String, Vec<f32>>,
     device: &Device,
 ) -> Result<Tensor> {
-    let mut mod_x = vec![0.0f32; seq_len * mod_feature_size];
+    // Initialize buffer with atomic wrappers
+    let atomic_buffer: Vec<AtomicU32> = (0..seq_len * mod_feature_size)
+        .map(|_| AtomicU32::new(0))
+        .collect();
 
-    for (mod_name, &site) in mod_names.iter().zip(mod_sites.iter()) {
-        if site >= seq_len {
-            log::warn!("Skipping mod {} at invalid site {} (seq_len {})", mod_name, site, seq_len);
-            continue;
-        }
-        if let Some(feat) = mod_to_feature.get(*mod_name) {
-            for (i, &val) in feat.iter().enumerate() {
-                mod_x[site * mod_feature_size + i] += val;
+    mod_names
+        .par_iter()
+        .zip(mod_sites.par_iter())
+        .for_each(|(&mod_name, &site)| {
+            if site >= seq_len {
+                log::warn!(
+                    "Skipping mod {} at invalid site {} (seq_len {})",
+                    mod_name, site, seq_len
+                );
+                return;
             }
-        } else {
-            log::warn!("Unknown modification feature: {}", mod_name);
-        }
-    }
+            if let Some(feat) = mod_to_feature.get(mod_name) {
+                for (i, &val) in feat.iter().enumerate() {
+                    let idx = site * mod_feature_size + i;
+                    let val_bits = val.to_bits();
+                    atomic_buffer[idx].fetch_add(val_bits, Ordering::Relaxed);
+                }
+            } else {
+                log::warn!("Unknown modification feature: {}", mod_name);
+            }
+        });
 
-    Ok(Tensor::from_slice(&mod_x, (1, seq_len, mod_feature_size), device)
-        .map_err(|e| anyhow!("Failed to create tensor: {}", e))?)
+    // Convert atomic buffer back to f32
+    let mod_x: Vec<f32> = atomic_buffer
+        .into_iter()
+        .map(|a| f32::from_bits(a.load(Ordering::Relaxed)))
+        .collect();
+
+    Tensor::from_slice(&mod_x, (1, seq_len, mod_feature_size), device)
+        .map_err(|e| anyhow!("Failed to create tensor: {}", e))
 }
 
 
@@ -193,21 +134,9 @@ mod tests {
     use super::*;
     use candle_core::Device;
     use candle_core::Tensor;
-    use ndarray::Array2;
     use std::collections::HashMap;
     use std::path::PathBuf;
 
-    #[test]
-    fn test_get_aa_indices() {
-        let seq = "AGHCEWQMKYR";
-        let result = get_aa_indices(seq).unwrap();
-        // expected result is [[0, 1, 7, 8, 3, 5, 23, 17, 13, 11, 25, 18, 0]]
-        let expect_out = Array2::from_shape_vec((1, 13), vec![0, 1, 7, 8, 3, 5, 23, 17, 13, 11, 25, 18, 0]).unwrap();
-        println!("{:?} - aa_indices: {:?}", seq, result);
-        assert_eq!(result.shape(), &[1, 13]);
-        assert_eq!(result, expect_out);
-    }
-
     #[test]
     fn test_aa_indices_tensor(){
         let device = Device::Cpu;
@@ -221,32 +150,6 @@ mod tests {
         // assert_eq!(result.to_vec3::<f32>().unwrap(), expect_out.to_vec3::<f32>().unwrap());
     }
 
-    #[test]
-    fn test_get_mod_features() {
-        let mods = "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M";
-        let mod_sites = "0;4;8";
-        let seq_len = 11 + 2;
-        let mod_feature_size = 109;
-
-        let constants_path =
-            PathBuf::from("data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
-        let constants: ModelConstants =
-            parse_model_constants(constants_path.to_str().unwrap()).unwrap();
-        let mod_to_feature: HashMap<String, Vec<f32>> = load_mod_to_feature(&constants).unwrap();
-
-        let device = Device::Cpu;
-        let tensor = get_mod_features(
-            mods,
-            mod_sites,
-            seq_len,
-            mod_feature_size,
-            mod_to_feature,
-            device,
-        ).unwrap();
-        println!("tensor shape: {:?}", tensor.shape());
-        assert_eq!(tensor.shape().dims(), &[1, seq_len, mod_feature_size]);
-    }
-
     #[test]
     fn test_get_mod_features_from_parsed() {
         let mods_str = "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M";

From f3a50131b035ca8e38f66fc2cbc4709176c345e4 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Fri, 9 May 2025 00:46:52 -0400
Subject: [PATCH 12/75] refactor: Update RTCNNLSTMModel forward method to
 improve performance and readability

---
 .../src/models/rt_cnn_lstm_model.rs           | 40 ++++++++-----------
 1 file changed, 16 insertions(+), 24 deletions(-)

diff --git a/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs b/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
index 51bfa11..89a6cec 100644
--- a/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
+++ b/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
@@ -1,25 +1,20 @@
-use anyhow::{anyhow, Result};
-use candle_core::{DType, Device, IndexOp, Tensor, Var, D};
-use candle_nn::{ops, Dropout, Module, Optimizer, VarBuilder, VarMap};
-use ndarray::Array2;
-use serde::Deserialize;
+use anyhow::Result;
+use candle_core::{DType, Device, IndexOp, Tensor};
+use candle_nn::{Dropout, Module, VarBuilder, VarMap};
 use std::collections::HashMap;
 use std::path::Path;
-use log::info;
 
-// use crate::models::rt_model::RTModel;
-use crate::building_blocks::bilstm::BidirectionalLSTM;
+
+
 use crate::building_blocks::building_blocks::{
-    DecoderLinear, Encoder26aaModCnnLstmAttnSum, AA_EMBEDDING_SIZE, MOD_FEATURE_SIZE,
+    DecoderLinear, Encoder26aaModCnnLstmAttnSum, MOD_FEATURE_SIZE,
 };
-use crate::building_blocks::featurize::{aa_one_hot, get_aa_indices, get_mod_features};
-use crate::models::model_interface::{ModelInterface, PropertyType, PredictionResult, load_tensors_from_model, create_var_map};
-use crate::utils::data_handling::PeptideData;
+use crate::models::model_interface::{ModelInterface, PropertyType, load_tensors_from_model, create_var_map};
 use crate::utils::peptdeep_utils::{
-    extract_masses_and_indices, get_modification_indices, load_mod_to_feature, load_modifications,
-    parse_model_constants, remove_mass_shift, ModelConstants, ModificationMap,
+    load_mod_to_feature,
+    parse_model_constants, ModelConstants,
 };
-use crate::utils::logging::Progress;
+
 
 // Main Model Struct
 
@@ -115,17 +110,16 @@ impl ModelInterface for RTCNNLSTMModel {
 
 
     fn forward(&self, xs: &Tensor) -> Result<Tensor, candle_core::Error> {
-        let (batch_size, seq_len, _) = xs.shape().dims3()?;
-
-        let start_mod_x = 1;
+        let (_batch_size, _seq_len, _) = xs.shape().dims3()?;
+    
         let aa_indices_out = xs.i((.., .., 0))?;
-        let mod_x_out = xs.i((.., .., start_mod_x..start_mod_x + MOD_FEATURE_SIZE))?;
-
+        let mod_x_out = xs.i((.., .., 1..1 + MOD_FEATURE_SIZE))?;
         let x = self.rt_encoder.forward(&aa_indices_out, &mod_x_out)?;
         let x = self.dropout.forward(&x, self.is_training)?;
         let x = self.rt_decoder.forward(&x)?;
+        let result = x.squeeze(1)?;
 
-        Ok(x.squeeze(1)?)
+        Ok(result)
     }
 
     /// Set model to evaluation mode for inference
@@ -279,11 +273,8 @@ impl ModelInterface for RTCNNLSTMModel {
 mod tests {
     use crate::models::model_interface::ModelInterface;
     use crate::models::rt_cnn_lstm_model::RTCNNLSTMModel;
-    use crate::utils::peptdeep_utils::load_modifications;
     use candle_core::Device;
     use std::path::PathBuf;
-    use std::time::Instant;
-    // use itertools::izip;
 
     use super::*;
 
@@ -392,6 +383,7 @@ mod tests {
     
         // Test prediction with a few peptides after fine-tuning
         let test_peptides = vec![
+            ("AGHCEWQMKYR", "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M", "0;4;8", 0.2945),
             ("QPYAVSELAGHQTSAESWGTGR", "", "", 0.4328955),
             ("GMSVSDLADKLSTDDLNSLIAHAHR", "Oxidation@M", "1", 0.6536107),
             (

From d1aea7907561a3b468a864db0e14bedcfe5b9d9f Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Fri, 9 May 2025 00:53:45 -0400
Subject: [PATCH 13/75] refactor: Update redeem-properties crate models to
 remove unused imports and improve code organization

---
 .../src/models/ccs_cnn_lstm_model.rs          |  27 +-
 .../redeem-properties/src/models/ccs_model.rs |   2 +-
 .../src/models/model_interface.rs             | 231 ++----------------
 .../src/models/ms2_bert_model.rs              | 109 +++++----
 4 files changed, 102 insertions(+), 267 deletions(-)

diff --git a/crates/redeem-properties/src/models/ccs_cnn_lstm_model.rs b/crates/redeem-properties/src/models/ccs_cnn_lstm_model.rs
index bde8a86..4a06304 100644
--- a/crates/redeem-properties/src/models/ccs_cnn_lstm_model.rs
+++ b/crates/redeem-properties/src/models/ccs_cnn_lstm_model.rs
@@ -1,27 +1,20 @@
-use anyhow::{anyhow, Result};
-use candle_core::{DType, Device, IndexOp, Tensor, Var, D};
+use anyhow::Result;
+use candle_core::{DType, Device, IndexOp, Tensor};
 use candle_nn::{
-    ops, Dropout, Module, Optimizer, VarBuilder, VarMap,
+    Dropout, Module, VarBuilder, VarMap,
 };
-use log::info;
-use ndarray::Array2;
-use serde::Deserialize;
+
 use std::collections::HashMap;
-use std::process::Output;
-use std::{char, fmt, vec};
+use std::{fmt, vec};
 use std::path::Path;
 
 use crate::building_blocks::building_blocks::{
-    DecoderLinear, Encoder26aaModChargeCnnLstmAttnSum, AA_EMBEDDING_SIZE, MOD_FEATURE_SIZE,
+    DecoderLinear, Encoder26aaModChargeCnnLstmAttnSum, MOD_FEATURE_SIZE,
 };
-use crate::building_blocks::featurize::{aa_one_hot, get_aa_indices, get_mod_features};
-use crate::utils::logging::Progress;
-use crate::utils::data_handling::PeptideData;
-use crate::utils::peptdeep_utils::{extract_masses_and_indices, get_modification_indices, remove_mass_shift};
 use crate::{
-    models::model_interface::{ModelInterface, PropertyType, PredictionResult,load_tensors_from_model, create_var_map},
+    models::model_interface::{ModelInterface, PropertyType,load_tensors_from_model, create_var_map},
     utils::peptdeep_utils::{
-        load_mod_to_feature, parse_instrument_index, parse_model_constants, ModelConstants,
+        load_mod_to_feature, parse_model_constants, ModelConstants,
     },
 };
 
@@ -144,7 +137,7 @@ impl ModelInterface for CCSCNNLSTMModel {
     
 
     fn forward(&self, xs: &Tensor) -> Result<Tensor, candle_core::Error> {
-        let (batch_size, seq_len, _) = xs.shape().dims3()?;
+        let (_batch_size, _seq_len, _) = xs.shape().dims3()?;
 
         // Separate input into aa_indices, mod_x, charge
         let start_mod_x = 1;
@@ -288,8 +281,6 @@ mod tests {
     use super::*;
     use crate::models::model_interface::ModelInterface;
     use crate::models::ccs_cnn_lstm_model::CCSCNNLSTMModel;
-    use crate::utils::peptdeep_utils::load_modifications;
-    use crate::utils::data_handling::PeptideData;
     use candle_core::Device;
     use std::path::PathBuf;
 
diff --git a/crates/redeem-properties/src/models/ccs_model.rs b/crates/redeem-properties/src/models/ccs_model.rs
index 6adbdbb..4921bb4 100644
--- a/crates/redeem-properties/src/models/ccs_model.rs
+++ b/crates/redeem-properties/src/models/ccs_model.rs
@@ -1,5 +1,5 @@
 use std::path::Path;
-use candle_core::{Device, Tensor};
+use candle_core::Device;
 use anyhow::{Result, anyhow};
 use crate::models::model_interface::{ModelInterface,PredictionResult};
 use crate::models::ccs_cnn_lstm_model::CCSCNNLSTMModel;
diff --git a/crates/redeem-properties/src/models/model_interface.rs b/crates/redeem-properties/src/models/model_interface.rs
index 73070b5..b0e7655 100644
--- a/crates/redeem-properties/src/models/model_interface.rs
+++ b/crates/redeem-properties/src/models/model_interface.rs
@@ -1,25 +1,24 @@
 use crate::{
-    building_blocks::featurize::{self, aa_indices_tensor, get_aa_indices, get_mod_features, get_mod_features_from_parsed},
+    building_blocks::featurize::{self, aa_indices_tensor,  get_mod_features_from_parsed},
     models::{ccs_model::CCSModelWrapper, ms2_model::MS2ModelWrapper, rt_model::RTModelWrapper},
     utils::{
         data_handling::PeptideData,
         logging::Progress,
         peptdeep_utils::{
             get_modification_indices, get_modification_string, parse_instrument_index,
-            remove_mass_shift, ModificationMap,
+            remove_mass_shift,
         }
     },
 };
 use anyhow::{Context, Result};
 use candle_core::{DType, Device, Tensor, Var};
-use candle_nn::{Module, Optimizer, VarMap};
+use candle_nn::{Optimizer, VarMap};
 use log::info;
 use rayon::prelude::*;
-use std::ops::{Index, IndexMut};
+use std::ops::Index;
 use std::path::Path;
-use std::sync::{Arc, Mutex};
 use std::{collections::HashMap, path::PathBuf};
-use itertools::izip;
+
 
 // Constants
 const CHARGE_FACTOR: f64 = 0.1;
@@ -287,9 +286,9 @@ pub trait ModelInterface: Send + Sync + ModelClone {
     ) -> Result<Tensor> {
         let device = self.get_device();
         let mod_feature_size = self.get_mod_element_count();
-        let mod_to_feature = self.get_mod_to_feature().clone();
+        let mod_to_feature = self.get_mod_to_feature();
 
-        let aa_tensor = aa_indices_tensor(peptide_sequence, &device)?;
+        let aa_tensor = aa_indices_tensor(peptide_sequence, device)?;
         let (batch_size, seq_len, _) = aa_tensor.shape().dims3()?;
 
         let mod_names: Vec<&str> = mods.split(';').filter(|s| !s.is_empty()).collect();
@@ -304,8 +303,8 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             &mod_indices,
             seq_len,
             mod_feature_size,
-            &mod_to_feature,
-            &device,
+            mod_to_feature,
+            device,
         )?;
 
         let mut features = vec![aa_tensor, mod_tensor];
@@ -314,7 +313,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             let charge_tensor = Tensor::from_slice(
                 &vec![c as f64 * CHARGE_FACTOR; seq_len],
                 &[batch_size, seq_len, 1],
-                &device,
+                device,
             )?.to_dtype(DType::F32)?;
             features.push(charge_tensor);
         }
@@ -323,7 +322,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             let nce_tensor = Tensor::from_slice(
                 &vec![n as f64 * NCE_FACTOR; seq_len],
                 &[batch_size, seq_len, 1],
-                &device,
+                device,
             )?.to_dtype(DType::F32)?;
             features.push(nce_tensor);
         }
@@ -333,12 +332,16 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             let instr_tensor = Tensor::from_slice(
                 &vec![instr_idx; seq_len],
                 &[batch_size, seq_len, 1],
-                &device,
+                device,
             )?.to_dtype(DType::F32)?;
             features.push(instr_tensor);
         }
 
-        Ok(Tensor::cat(&features, 2)?)
+        if features.len() == 1 {
+            Ok(features.remove(0))
+        } else {
+            Ok(Tensor::cat(&features, 2)?)
+        }
     }
 
     /// Encode a batch of peptide sequences into a tensor
@@ -352,7 +355,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         instruments: Option<Vec<String>>,
     ) -> Result<Tensor> {
         let len = peptide_sequences.len();
-    
+
         let tensors: Vec<_> = (0..len)
             .into_par_iter()
             .map(|i| {
@@ -365,14 +368,18 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                     instruments.as_ref().map(|v| v[i].as_str()),
                 )
             })
-            .collect::<std::result::Result<Vec<_>, _>>()?; // Propagate errors
-    
+            .collect::<Result<Vec<_>>>()?;
+
+        if tensors.is_empty() {
+            return Err(anyhow::anyhow!("Encoding batch of peptides failed, the resulting tesnor batch is empty."));
+        }
+
         let max_len = tensors
             .iter()
             .map(|t| t.shape().dims3().unwrap().1)
             .max()
             .unwrap_or(0);
-    
+
             let padded = tensors
             .into_par_iter()
             .map(|t| {
@@ -386,195 +393,9 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             })
             .map(|res| res.map_err(anyhow::Error::from)) 
             .collect::<Result<Vec<_>, _>>()?;
-    
+
         Ok(Tensor::cat(&padded, 0)?)
     }
-    
-
-    // /// Encode a batch of peptide sequences (plus modifications) into a tensor.
-    // /// 
-    // /// # Arguments
-    // /// * `peptide_sequences` - A vector of peptide sequences.
-    // /// * `mods` - A vector of strings representing the modifications for each peptide.
-    // /// * `mod_sites` - A vector of strings representing the modification site indices for each peptide.
-    // /// * `charge` - An optional vector of charge states for each peptide.
-    // /// * `nce` - An optional vector of nominal collision energies for each peptide.
-    // /// * `instruments` - An optional vector of instrument names for each peptide.
-    // /// 
-    // /// # Returns
-    // /// A tensor containing the encoded peptide sequences.
-    // fn encode_peptides(
-    //     &self,
-    //     peptide_sequences: &[String],
-    //     mods: &[String],
-    //     mod_sites: &[String],
-    //     charges: Option<Vec<i32>>,
-    //     nces: Option<Vec<i32>>,
-    //     instruments: Option<Vec<String>>,
-    // ) -> Result<Tensor> {
-    //     if peptide_sequences.len() != mods.len() || peptide_sequences.len() != mod_sites.len() {
-    //         return Err(anyhow::anyhow!(
-    //             "Mismatch in input lengths: peptide_sequences, mods, and mod_sites must have the same length."
-    //         ));
-    //     }
-
-    //     // Encode peptides in parallel using Rayon
-    //     let encoded_tensors: Vec<Tensor> = peptide_sequences
-    //         .par_iter() // Use Rayon's parallel iterator
-    //         .enumerate()
-    //         .map(|(i, peptide)| {
-    //             self.encode_peptide(
-    //                 peptide,
-    //                 &mods[i],
-    //                 &mod_sites[i],
-    //                 charges.as_ref().map(|c| c[i]),
-    //                 nces.as_ref().map(|n| n[i]),
-    //                 instruments.as_ref().map(|ins| ins[i].as_str()),
-    //             )
-    //         })
-    //         .collect::<Result<Vec<Tensor>>>()?; // Collect results and propagate errors if any
-
-    //     // Determine the maximum sequence length
-    //     let max_seq_len = encoded_tensors
-    //         .par_iter()
-    //         .map(|t| t.shape().dims3().unwrap().1) // Get sequence length (dimension 1)
-    //         .max()
-    //         .unwrap_or(0);
-
-    //     // Pad tensors to the max_seq_len
-    //     let padded_tensors: Result<Vec<Tensor>> = encoded_tensors
-    //         .into_par_iter() // Use Rayon's parallel iterator
-    //         .map(|t| {
-    //             let (_, seq_len, feature_size) = t.shape().dims3()?; // Extract feature dimension
-    //             if seq_len < max_seq_len {
-    //                 let pad_size = max_seq_len - seq_len;
-    //                 // Create a padding tensor with the correct shape and type
-    //                 let pad = Tensor::zeros(
-    //                     &[1, pad_size, feature_size], // Use the correct feature dimension
-    //                     t.dtype(),
-    //                     t.device(),
-    //                 )?;
-    //                 // Concatenate padding along sequence length
-    //                 Tensor::cat(&[&t, &pad], 1)
-    //             } else {
-    //                 Ok(t)
-    //             }
-    //         })
-    //         .collect::<Result<Vec<_>, _>>()
-    //         .map_err(Into::into);
-
-    //     let padded_tensors = padded_tensors?;
-
-    //     // Concatenate all padded tensors along the batch dimension
-    //     let batch_tensor = Tensor::cat(&padded_tensors, 0)?;
-
-    //     Ok(batch_tensor)
-    // }
-
-    // /// Encode peptide sequence (plus modifications) into a tensor.
-    // /// 
-    // /// # Arguments
-    // /// * `peptide_sequence` - The peptide sequence.
-    // /// * `mods` - A string representing the modifications for the peptide.
-    // /// * `mod_sites` - A string representing the modification site indices for the peptide.
-    // /// * `charge` - An optional charge state for the peptide.
-    // /// * `nce` - An optional nominal collision energy for the peptide.
-    // /// * `instrument` - An optional instrument name for the peptide.
-    // /// 
-    // /// # Returns
-    // /// A tensor containing the encoded peptide sequence.
-    // fn encode_peptide(
-    //     &self,
-    //     peptide_sequence: &str,
-    //     mods: &str,
-    //     mod_sites: &str,
-    //     charge: Option<i32>,
-    //     nce: Option<i32>,
-    //     instrument: Option<&str>,
-    // ) -> Result<Tensor> {
-    //     log::trace!(
-    //         "[ModelInterface::encode_peptide] Encoding peptide: {:?}, mods: {:?}, mod_sites: {:?}, charge: {:?}, nce: {:?}, instrument: {:?}",
-    //         peptide_sequence,
-    //         mods,
-    //         mod_sites,
-    //         charge,
-    //         nce,
-    //         instrument
-    //     );
-    //     let aa_indices = get_aa_indices(peptide_sequence)?;
-    //     log::trace!(
-    //         "[ModelInterface::encode_peptide] aa_indices_tensor shape: {:?}, min: {:?}, max: {:?}",
-    //         aa_indices.shape(),
-    //         aa_indices.iter().min(),
-    //         aa_indices.iter().max()
-    //     );
-
-    //     // Convert ndarray to Tensor (F32)
-    //     let aa_indices_tensor = Tensor::from_slice(
-    //         &aa_indices.as_slice().unwrap(),
-    //         (aa_indices.shape()[0], aa_indices.shape()[1]),
-    //         &self.get_device(),
-    //     )?
-    //     .to_dtype(DType::F32)?;
-
-    //     let (batch_size, seq_len) = aa_indices_tensor.shape().dims2()?;
-    //     let aa_indices_tensor = aa_indices_tensor.unsqueeze(2)?; // Shape: batch_size x seq_len x 1
-
-    //     log::trace!(
-    //         "[ModelInterface::encode_peptide] aa_indices_tensor shape: {:?}, min: {:?}, max: {:?}",
-    //         aa_indices_tensor.shape(),
-    //         aa_indices_tensor.min_all(),
-    //         aa_indices_tensor.max_all() 
-    //     );
-
-    //     // Get modification features
-    //     let mod_x = get_mod_features(
-    //         mods,
-    //         mod_sites,
-    //         seq_len,
-    //         self.get_mod_element_count(),
-    //         self.get_mod_to_feature().clone(),
-    //         self.get_device().clone(),
-    //     )?;
-
-    //     let mut features = vec![aa_indices_tensor, mod_x];
-
-    //     // Conditionally add charge
-    //     if let Some(c) = charge {
-    //         let charge_tensor = Tensor::from_slice(
-    //             &vec![c as f64 * CHARGE_FACTOR; seq_len],
-    //             &[batch_size, seq_len, 1],
-    //             &self.get_device(),
-    //         )?
-    //         .to_dtype(DType::F32)?;
-    //         features.push(charge_tensor);
-    //     }
-
-    //     // Conditionally add NCE
-    //     if let Some(n) = nce {
-    //         let nce_tensor = Tensor::from_slice(
-    //             &vec![n as f64 * NCE_FACTOR; seq_len],
-    //             &[batch_size, seq_len, 1],
-    //             &self.get_device(),
-    //         )?
-    //         .to_dtype(DType::F32)?;
-    //         features.push(nce_tensor);
-    //     }
-
-    //     // Conditionally add instrument
-    //     if let Some(instr) = instrument {
-    //         let instrument_tensor = Tensor::from_slice(
-    //             &vec![parse_instrument_index(instr) as u32; seq_len],
-    //             &[batch_size, seq_len, 1],
-    //             &self.get_device(),
-    //         )?
-    //         .to_dtype(DType::F32)?;
-    //         features.push(instrument_tensor);
-    //     }
-
-    //     // Concatenate features
-    //     Ok(Tensor::cat(&features, 2)?)
-    // }
 
     /// Fine-tune the model on a batch of training data.
     /// 
diff --git a/crates/redeem-properties/src/models/ms2_bert_model.rs b/crates/redeem-properties/src/models/ms2_bert_model.rs
index 9f3a1d3..811a50c 100644
--- a/crates/redeem-properties/src/models/ms2_bert_model.rs
+++ b/crates/redeem-properties/src/models/ms2_bert_model.rs
@@ -1,32 +1,19 @@
-use anyhow::{anyhow, Result};
-use candle_core::{DType, Device, IndexOp, Tensor, Var, D};
-use candle_nn::{
-    ops, Conv1d, Conv1dConfig, Dropout, Linear, Module, Optimizer, PReLU, VarBuilder, VarMap,
-};
-use log::info;
-use ndarray::Array2;
-use serde::Deserialize;
+use anyhow::Result;
+use candle_core::{DType, Device, IndexOp, Tensor};
+use candle_nn::{Dropout, Module, VarBuilder, VarMap};
 use std::collections::HashMap;
 use std::fmt;
 use std::path::Path;
 
 use crate::{
-    building_blocks::{
-        building_blocks::{
-            DecoderLinear, HiddenHfaceTransformer, Input26aaModPositionalEncoding, MetaEmbedding,
-            ModLossNN, AA_EMBEDDING_SIZE, MOD_FEATURE_SIZE,
-        },
-        featurize::{aa_one_hot, get_aa_indices, get_mod_features},
+    building_blocks::building_blocks::{
+        DecoderLinear, HiddenHfaceTransformer, Input26aaModPositionalEncoding, MetaEmbedding,
+        ModLossNN, MOD_FEATURE_SIZE,
     },
-    models::model_interface::{load_tensors_from_model, create_var_map, ModelInterface, PropertyType},
-    utils::{
-        data_handling::PeptideData,
-        logging::Progress,
-        peptdeep_utils::{
-            get_modification_indices, get_modification_string, load_mod_to_feature,
-            parse_model_constants, remove_mass_shift, ModelConstants,
-        },
+    models::model_interface::{
+        create_var_map, load_tensors_from_model, ModelInterface, PropertyType,
     },
+    utils::peptdeep_utils::{load_mod_to_feature, parse_model_constants, ModelConstants},
 };
 
 // Constants
@@ -183,7 +170,7 @@ impl ModelInterface for MS2BertModel {
     }
 
     fn forward(&self, xs: &Tensor) -> Result<Tensor, candle_core::Error> {
-        let (batch_size, seq_len, _) = xs.shape().dims3()?;
+        let (_batch_size, seq_len, _) = xs.shape().dims3()?;
 
         // Separate the input tensor into the different parts
 
@@ -206,18 +193,42 @@ impl ModelInterface for MS2BertModel {
         let nce_out = nce_out.squeeze(2)?; // Squeeze to remove dimensions of size 1 if needed
         let instrument_out = instrument_out.squeeze(2)?.squeeze(1)?; // Squeeze to remove dimensions of size 1 if needed
 
-        log::trace!("[MS2BertModel::forward] aa_indices_out shape: {:?}, device: {:?}", aa_indices_out.shape(), aa_indices_out.device());
-        log::trace!("[MS2BertModel::forward] mod_x_out shape: {:?}, device: {:?}", mod_x_out.shape(), mod_x_out.device());
-        log::trace!("[MS2BertModel::forward] charge_out shape: {:?}, device: {:?}", charge_out.shape(), charge_out.device());
-        log::trace!("[MS2BertModel::forward] nce_out shape: {:?}, device: {:?}", nce_out.shape(), nce_out.device());
-        log::trace!("[MS2BertModel::forward] instrument_out shape: {:?}, device: {:?}", instrument_out.shape(), instrument_out.device());
+        log::trace!(
+            "[MS2BertModel::forward] aa_indices_out shape: {:?}, device: {:?}",
+            aa_indices_out.shape(),
+            aa_indices_out.device()
+        );
+        log::trace!(
+            "[MS2BertModel::forward] mod_x_out shape: {:?}, device: {:?}",
+            mod_x_out.shape(),
+            mod_x_out.device()
+        );
+        log::trace!(
+            "[MS2BertModel::forward] charge_out shape: {:?}, device: {:?}",
+            charge_out.shape(),
+            charge_out.device()
+        );
+        log::trace!(
+            "[MS2BertModel::forward] nce_out shape: {:?}, device: {:?}",
+            nce_out.shape(),
+            nce_out.device()
+        );
+        log::trace!(
+            "[MS2BertModel::forward] instrument_out shape: {:?}, device: {:?}",
+            instrument_out.shape(),
+            instrument_out.device()
+        );
 
         // Forward pass through input_nn with dropout
         let in_x = self
             .dropout
             .forward(&self.input_nn.forward(&aa_indices_out, &mod_x_out)?, true)?;
 
-        log::trace!("[MS2BertModel::forward] in_x shape (post dropout-input_nn): {:?}, device: {:?}", in_x.shape(), in_x.device());
+        log::trace!(
+            "[MS2BertModel::forward] in_x shape (post dropout-input_nn): {:?}, device: {:?}",
+            in_x.shape(),
+            in_x.device()
+        );
 
         // Prepare metadata for meta_nn
         let meta_x = self
@@ -225,17 +236,27 @@ impl ModelInterface for MS2BertModel {
             .forward(&charge_out, &nce_out, &instrument_out)?
             .unsqueeze(1)?
             .repeat(vec![1, seq_len as usize, 1])?;
-        log::trace!("[MS2BertModel::forward] meta_x (post meta_nn) shape: {:?}, device: {:?}", meta_x.shape(), meta_x.device());
+        log::trace!(
+            "[MS2BertModel::forward] meta_x (post meta_nn) shape: {:?}, device: {:?}",
+            meta_x.shape(),
+            meta_x.device()
+        );
 
         // Concatenate in_x and meta_x along dimension 2
         let combined_input = Tensor::cat(&[in_x.clone(), meta_x], 2)?;
-        log::trace!("[MS2BertModel::forward] combined_input shape: {:?}, device: {:?}", combined_input.shape(), combined_input.device());
+        log::trace!(
+            "[MS2BertModel::forward] combined_input shape: {:?}, device: {:?}",
+            combined_input.shape(),
+            combined_input.device()
+        );
 
         // Forward pass through hidden_nn
-        let hidden_x = self
-            .hidden_nn
-            .forward(&combined_input.clone(), None)?;
-        log::trace!("[MS2BertModel::forward] hidden_x shape: {:?}, device: {:?}", hidden_x.shape(), hidden_x.device());
+        let hidden_x = self.hidden_nn.forward(&combined_input.clone(), None)?;
+        log::trace!(
+            "[MS2BertModel::forward] hidden_x shape: {:?}, device: {:?}",
+            hidden_x.shape(),
+            hidden_x.device()
+        );
 
         // // Handle attentions if needed (similar to PyTorch)
         // if self.output_attentions {
@@ -247,11 +268,19 @@ impl ModelInterface for MS2BertModel {
         // Apply dropout and combine with input
         let x_tmp = (hidden_x + combined_input * 0.2)?;
         let hidden_output = self.dropout.forward(&x_tmp, true)?;
-        log::trace!("[MS2BertModel::forward] hidden_output shape: {:?}, device: {:?}", hidden_output.shape(), hidden_output.device());
+        log::trace!(
+            "[MS2BertModel::forward] hidden_output shape: {:?}, device: {:?}",
+            hidden_output.shape(),
+            hidden_output.device()
+        );
 
         // Forward pass through output_nn
         let mut out_x = self.output_nn.forward(&hidden_output)?;
-        log::trace!("[MS2BertModel::forward] out_x shape: {:?}, device: {:?}", out_x.shape(), out_x.device());
+        log::trace!(
+            "[MS2BertModel::forward] out_x shape: {:?}, device: {:?}",
+            out_x.shape(),
+            out_x.device()
+        );
 
         // Handle modloss if applicable (similar logic as PyTorch)
         if self.num_modloss_types > 0 {
@@ -338,7 +367,6 @@ impl ModelInterface for MS2BertModel {
     fn print_weights(&self) {
         todo!()
     }
-
 }
 
 // // Module Trait Implementation
@@ -403,12 +431,7 @@ mod tests {
     use super::*;
     use crate::models::model_interface::ModelInterface;
     use crate::models::ms2_bert_model::MS2BertModel;
-    use crate::utils::peptdeep_utils::load_modifications;
     use candle_core::Device;
-    use csv::Reader;
-    use rayon::vec;
-    use std::collections::HashMap;
-    use std::fs::File;
     use std::path::PathBuf;
 
     #[test]

From e67695109723d8daa5bf01b729f2ad57fb34c5a7 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Fri, 9 May 2025 00:59:57 -0400
Subject: [PATCH 14/75] add: TransformerEncoder and SeqTransformer block

---
 .../src/building_blocks/building_blocks.rs    | 315 ++++++++++++++++--
 .../src/building_blocks/nn.rs                 | 196 ++++++++++-
 2 files changed, 488 insertions(+), 23 deletions(-)

diff --git a/crates/redeem-properties/src/building_blocks/building_blocks.rs b/crates/redeem-properties/src/building_blocks/building_blocks.rs
index 2ec0f85..d1afa48 100644
--- a/crates/redeem-properties/src/building_blocks/building_blocks.rs
+++ b/crates/redeem-properties/src/building_blocks/building_blocks.rs
@@ -4,12 +4,15 @@ use candle_nn as nn;
 use candle_transformers as transformers;
 use core::num;
 use std::fmt;
+use std::time::Instant;
 
 use crate::building_blocks::bilstm::BidirectionalLSTM;
 use crate::building_blocks::featurize::aa_one_hot;
 use crate::building_blocks::nn::{BertEncoderModule, ModuleList};
 use crate::building_blocks::sequential::{seq, Sequential};
 
+use super::nn::TransformerEncoder;
+
 /// constants used by PeptDeep Models
 pub const MOD_FEATURE_SIZE: usize = 109; // TODO: derive from constants yaml
 pub const AA_EMBEDDING_SIZE: usize = 27; // TODO: derive from constants yaml
@@ -302,7 +305,7 @@ pub struct MetaEmbedding {
 }
 
 impl MetaEmbedding {
-    fn new(out_features: usize, device: &Device) -> Result<Self> {
+    fn _new(out_features: usize, device: &Device) -> Result<Self> {
         let nn = nn::linear(
             MAX_INSTRUMENT_NUM + 1,
             out_features - 1,
@@ -432,13 +435,13 @@ pub struct HiddenHfaceTransformer {
 }
 
 impl HiddenHfaceTransformer {
-    fn new(
-        hidden_dim: usize,
-        hidden_expand: usize,
-        nheads: usize,
-        nlayers: usize,
-        dropout: f64,
-        output_attentions: bool
+    fn _new(
+        _hidden_dim: usize,
+        _hidden_expand: usize,
+        _nheads: usize,
+        _nlayers: usize,
+        _dropout: f64,
+        _output_attentions: bool
     ) -> Result<Self> {
         unimplemented!()
     }
@@ -450,7 +453,7 @@ impl HiddenHfaceTransformer {
         nheads: usize,
         nlayers: usize,
         dropout: f64,
-        output_attentions: bool
+        _output_attentions: bool
     ) -> Result<Self> {
         let config = transformers::models::bert::Config {
             hidden_size: hidden_dim,
@@ -583,8 +586,48 @@ struct SeqCNN {
 }
 
 impl SeqCNN {
-    fn new() -> Self {
-        unimplemented!();
+    pub fn new(embedding_hidden: usize, device: &Device) -> Result<Self> {
+        let varmap = nn::VarMap::new();
+        let varbuilder = nn::VarBuilder::from_varmap(&varmap, DType::F32, device);
+
+        let cnn_short = nn::conv1d(
+            embedding_hidden,
+            embedding_hidden,
+            3,
+            nn::Conv1dConfig {
+                padding: 1,
+                ..Default::default()
+            },
+            varbuilder.pp("cnn_short"),
+        )?;
+
+        let cnn_medium = nn::conv1d(
+            embedding_hidden,
+            embedding_hidden,
+            5,
+            nn::Conv1dConfig {
+                padding: 2,
+                ..Default::default()
+            },
+            varbuilder.pp("cnn_medium"),
+        )?;
+
+        let cnn_long = nn::conv1d(
+            embedding_hidden,
+            embedding_hidden,
+            7,
+            nn::Conv1dConfig {
+                padding: 3,
+                ..Default::default()
+            },
+            varbuilder.pp("cnn_long"),
+        )?;
+
+        Ok(Self {
+            cnn_short,
+            cnn_medium,
+            cnn_long,
+        })
     }
 
     pub fn from_varstore(
@@ -654,7 +697,7 @@ struct SeqLSTM {
 }
 
 impl SeqLSTM {
-    fn new() -> Self {
+    fn _new() -> Self {
         unimplemented!();
     }
 
@@ -675,6 +718,90 @@ impl Module for SeqLSTM {
     }
 }
 
+/// Transformer block applied on sequence input using a custom Transformer encoder implementation.
+/// This replaces the LSTM with a Transformer encoder for sequence modeling.
+#[derive(Debug, Clone)]
+pub struct SeqTransformer {
+    encoder: TransformerEncoder,
+    training: bool,
+}
+
+impl SeqTransformer {
+    /// Construct a new transformer encoder block for sequence modeling.
+    ///
+    /// # Arguments
+    /// * `input_dim` - The input embedding dimension (e.g., CNN output).
+    /// * `model_dim` - The internal model dimension of the transformer.
+    /// * `ff_dim` - The feedforward hidden layer dimension.
+    /// * `num_heads` - Number of attention heads.
+    /// * `num_layers` - Number of transformer encoder layers.
+    /// * `max_len` - Maximum input sequence length.
+    /// * `dropout_prob` - Dropout probability.
+    /// * `device` - The device to place the tensors on.
+    pub fn new(
+        input_dim: usize,
+        model_dim: usize,
+        ff_dim: usize,
+        num_heads: usize,
+        num_layers: usize,
+        max_len: usize,
+        dropout_prob: f32,
+        device: &Device,
+    ) -> Result<Self> {
+        let varmap = nn::VarMap::new();
+        let varbuilder = nn::VarBuilder::from_varmap(&varmap, DType::F32, device);
+        let encoder = TransformerEncoder::new(
+            &varbuilder,
+            input_dim,
+            model_dim,
+            ff_dim,
+            num_heads,
+            num_layers,
+            max_len,
+            dropout_prob,
+            device,
+        )?;
+        Ok(Self { encoder, training: true })
+    }
+
+    /// Load a transformer encoder from a varstore (used when loading from pre-trained weights).
+    pub fn from_varstore(
+        varstore: nn::VarBuilder,
+        input_dim: usize,
+        model_dim: usize,
+        ff_dim: usize,
+        num_heads: usize,
+        num_layers: usize,
+        max_len: usize,
+        dropout_prob: f32,
+        device: &Device,
+    ) -> Result<Self> {
+        let encoder = TransformerEncoder::new(
+            &varstore,
+            input_dim,
+            model_dim,
+            ff_dim,
+            num_heads,
+            num_layers,
+            max_len,
+            dropout_prob,
+            device,
+        )?;
+        Ok(Self { encoder, training: true })
+    }
+
+    pub fn set_training(&mut self, training: bool) {
+        self.training = training;
+    }
+}
+
+impl Module for SeqTransformer {
+    fn forward(&self, x: &Tensor) -> Result<Tensor> {
+        self.encoder.forward_with_mask(x, None, self.training)
+    }
+}
+
+
 /// apply linear transformation and tensor rescaling with softmax
 #[derive(Debug, Clone)]
 struct SeqAttentionSum {
@@ -682,6 +809,16 @@ struct SeqAttentionSum {
 }
 
 impl SeqAttentionSum {
+    pub fn new(hidden_dim: usize, device: &Device) -> Result<Self> {
+        let varmap = nn::VarMap::new();
+        let varbuilder = nn::VarBuilder::from_varmap(&varmap, DType::F32, device);
+        let attention = nn::Linear::new(
+            varbuilder.get((1, hidden_dim), "attention.weight")?,
+            None,
+        );
+        Ok(Self { attention })
+    }
+
     pub fn from_varstore(varstore: nn::VarBuilder, hidden_dim: usize, name: &str) -> Result<Self> {
         let attention = nn::Linear::new(varstore.get((1, hidden_dim), name).unwrap(), None);
         Ok(Self { attention })
@@ -719,7 +856,7 @@ pub struct Encoder26aaModCnnLstmAttnSum {
 }
 
 impl Encoder26aaModCnnLstmAttnSum {
-    fn new() -> Self {
+    fn _new() -> Self {
         unimplemented!();
     }
 
@@ -764,14 +901,25 @@ impl Encoder26aaModCnnLstmAttnSum {
 
     pub fn forward(&self, aa_indices: &Tensor, mod_x: &Tensor) -> Result<Tensor> {
 
+        let start_time = Instant::now();
         let mod_x = self.mod_nn.forward(mod_x)?;
+        println!("Encoder26aaModCnnLstmAttnSum::forward - mod_x forward time: {:.3?}", start_time.elapsed());
+        let start_time = Instant::now();
         let additional_tensors: Vec<&Tensor> = vec![&mod_x];
+        println!("Encoder26aaModCnnLstmAttnSum::forward - additional_tensors forward time: {:.3?}", start_time.elapsed());
+        let start_time = Instant::now();
         let x = aa_one_hot(&aa_indices, &additional_tensors)
             .map_err(|e| candle_core::Error::Msg(e.to_string()))?;
-
+        println!("Encoder26aaModCnnLstmAttnSum::forward - aa_one_hot forward time: {:.3?}", start_time.elapsed());
+        let start_time = Instant::now();
         let x = self.input_cnn.forward(&x)?;
+        println!("Encoder26aaModCnnLstmAttnSum::forward - input_cnn forward time: {:.3?}", start_time.elapsed());
+        let start_time = Instant::now();
         let x = self.input_lstm.forward(&x)?;
+        println!("Encoder26aaModCnnLstmAttnSum::forward - input_lstm forward time: {:.3?}", start_time.elapsed());
+        let start_time = Instant::now();
         let x = self.attn_sum.forward(&x)?;
+        println!("Encoder26aaModCnnLstmAttnSum::forward - attn_sum forward time: {:.3?}", start_time.elapsed());
         Ok(x)
     }
 }
@@ -786,7 +934,7 @@ pub struct Encoder26aaModChargeCnnLstmAttnSum {
 }
 
 impl Encoder26aaModChargeCnnLstmAttnSum {
-    fn new() -> Self {
+    fn _new() -> Self {
         unimplemented!();
     }
 
@@ -831,29 +979,156 @@ impl Encoder26aaModChargeCnnLstmAttnSum {
 
     pub fn forward(&self, aa_indices: &Tensor, mod_x: &Tensor, charges: &Tensor) -> Result<Tensor> {
 
+        let start_time = Instant::now();
         let mod_x = self.mod_nn.forward(mod_x)?;
+        println!("Encoder26aaModChargeCnnLstmAttnSum::forward - mod_x forward time: {:.3?}", start_time.elapsed());
+        let start_time = Instant::now();
         let charges_repeated = charges.unsqueeze(1)?.repeat(&[1, mod_x.dim(1)?, 1])?;
+        println!("Encoder26aaModChargeCnnLstmAttnSum::forward - charges_repeated forward time: {:.3?}", start_time.elapsed());
+        let start_time = Instant::now();
         let additional_tensors: Vec<&Tensor> = vec![&mod_x, &charges_repeated];
+        println!("Encoder26aaModChargeCnnLstmAttnSum::forward - additional_tensors forward time: {:.3?}", start_time.elapsed());
+        let start_time = Instant::now();
         let x = aa_one_hot(&aa_indices, &additional_tensors)
             .map_err(|e| candle_core::Error::Msg(e.to_string()))?;
+        println!("Encoder26aaModChargeCnnLstmAttnSum::forward - aa_one_hot forward time: {:.3?}", start_time.elapsed());
 
+        let start_time = Instant::now();
         let x = self.input_cnn.forward(&x)?;
+        println!("Encoder26aaModChargeCnnLstmAttnSum::forward - input_cnn forward time: {:.3?}", start_time.elapsed());
+        let start_time = Instant::now();
         let x = self.input_lstm.forward(&x)?;
+        println!("Encoder26aaModChargeCnnLstmAttnSum::forward - input_lstm forward time: {:.3?}", start_time.elapsed());
+        let start_time = Instant::now();
+        let x = self.attn_sum.forward(&x)?;
+        println!("Encoder26aaModChargeCnnLstmAttnSum::forward - attn_sum forward time: {:.3?}", start_time.elapsed());
+        Ok(x)
+    }
+}
+
+
+/// Encode AAs (26 AA letters) and modifications using CNN + Transformer + AttentionSum.
+#[derive(Debug, Clone)]
+pub struct Encoder26aaModCnnTransformerAttnSum {
+    mod_nn: ModEmbeddingFixFirstK,
+    input_cnn: SeqCNN,
+    input_transformer: SeqTransformer,
+    attn_sum: SeqAttentionSum,
+}
+
+impl Encoder26aaModCnnTransformerAttnSum {
+    pub fn from_varstore(
+        varstore: &nn::VarBuilder,
+        mod_hidden_dim: usize,
+        hidden_dim: usize,
+        ff_dim: usize,
+        num_heads: usize,
+        num_layers: usize,
+        max_len: usize,
+        dropout_prob: f32,
+        names_mod_nn: Vec<&str>,
+        names_input_cnn_weight: Vec<&str>,
+        names_input_cnn_bias: Vec<&str>,
+        transformer_pp: &str,
+        names_attn_sum: Vec<&str>,
+        device: &Device,
+    ) -> Result<Self> {
+        let input_dim = AA_EMBEDDING_SIZE + mod_hidden_dim;
+        Ok(Self {
+            mod_nn: ModEmbeddingFixFirstK::from_varstore(
+                &varstore,
+                MOD_FEATURE_SIZE,
+                mod_hidden_dim,
+                names_mod_nn[0],
+            )?,
+            input_cnn: SeqCNN::from_varstore(
+                varstore.clone(),
+                input_dim,
+                names_input_cnn_weight,
+                names_input_cnn_bias,
+            )?,
+            input_transformer: SeqTransformer::from_varstore(
+                varstore.pp(transformer_pp).clone(),
+                input_dim * 4,
+                hidden_dim,
+                ff_dim,
+                num_heads,
+                num_layers,
+                max_len,
+                dropout_prob,
+                device,
+            )?,
+            attn_sum: SeqAttentionSum::from_varstore(
+                varstore.clone(),
+                hidden_dim,
+                names_attn_sum[0],
+            )?,
+        })
+    }
+
+    /// Construct a CNN+Transformer+Attention encoder from scratch (no pretrained weights).
+    pub fn new(
+        device: &Device,
+        mod_hidden_dim: usize,
+        hidden_dim: usize,
+        ff_dim: usize,
+        num_heads: usize,
+        num_layers: usize,
+        max_len: usize,
+        dropout_prob: f32,
+    ) -> Result<Self> {
+        let input_dim = AA_EMBEDDING_SIZE + mod_hidden_dim;
+        Ok(Self {
+            mod_nn: ModEmbeddingFixFirstK::new(MOD_FEATURE_SIZE, mod_hidden_dim, device)?,
+            input_cnn: SeqCNN::new(input_dim, device)?,
+            input_transformer: SeqTransformer::new(
+                input_dim * 4,
+                hidden_dim,
+                ff_dim,
+                num_heads,
+                num_layers,
+                max_len,
+                dropout_prob,
+                device,
+            )?,
+            attn_sum: SeqAttentionSum::new(hidden_dim, device)?,
+        })
+    }
+
+    pub fn forward(&self, aa_indices: &Tensor, mod_x: &Tensor) -> Result<Tensor> {
+        let start_time = Instant::now();
+        let mod_x = self.mod_nn.forward(mod_x)?;
+        println!("Encoder26aaModCnnTransformerAttnSum::forward - mod_x forward time: {:.3?}", start_time.elapsed());
+
+        let additional_tensors: Vec<&Tensor> = vec![&mod_x];
+        let start_time = Instant::now();
+        let x = aa_one_hot(aa_indices, &additional_tensors)
+            .map_err(|e| candle_core::Error::Msg(e.to_string()))?;
+        println!("Encoder26aaModCnnTransformerAttnSum::forward - aa_one_hot forward time: {:.3?}", start_time.elapsed());
+
+        let start_time = Instant::now();
+        let x = self.input_cnn.forward(&x)?;
+        println!("Encoder26aaModCnnTransformerAttnSum::forward - input_cnn forward time: {:.3?}", start_time.elapsed());
+
+        let start_time = Instant::now();
+        let x = self.input_transformer.forward(&x)?;
+        println!("Encoder26aaModCnnTransformerAttnSum::forward - input_transformer forward time: {:.3?}", start_time.elapsed());
+
+        let start_time = Instant::now();
         let x = self.attn_sum.forward(&x)?;
+        println!("Encoder26aaModCnnTransformerAttnSum::forward - attn_sum forward time: {:.3?}", start_time.elapsed());
+
         Ok(x)
     }
 }
 
+
+
 #[cfg(test)]
 mod tests {
-    use crate::models::model_interface::ModelInterface;
-    use crate::models::rt_cnn_lstm_model::RTCNNLSTMModel;
-    use crate::utils::peptdeep_utils::load_modifications;
     use candle_core::Device;
     use candle_nn::VarBuilder;
     use std::path::PathBuf;
-    use std::time::Instant;
-    // use itertools::izip;
 
     use super::*;
 
diff --git a/crates/redeem-properties/src/building_blocks/nn.rs b/crates/redeem-properties/src/building_blocks/nn.rs
index 753bedc..1373032 100644
--- a/crates/redeem-properties/src/building_blocks/nn.rs
+++ b/crates/redeem-properties/src/building_blocks/nn.rs
@@ -1,7 +1,8 @@
-use candle_core::{Result, Tensor};
-use candle_nn::Module;
-use std::ops::{Deref, DerefMut};
+use candle_core::{Device, IndexOp, Result, Tensor};
+use candle_nn::{Dropout, LayerNorm, Linear, Module, VarBuilder};
 use candle_transformers::models::bert::{BertEncoder, Config};
+use candle_nn::ops::softmax;
+use std::ops::{Deref, DerefMut};
 use std::sync::Arc;
 
 #[derive(Clone)]
@@ -68,3 +69,192 @@ impl Module for BertEncoderModule {
         self.encoder.forward(hidden_states, &attention_mask)
     }
 }
+
+
+/// A minimal Transformer encoder layer with multi-head self-attention, feedforward block,
+/// dropout, and optional sinusoidal positional encoding and padding mask support.
+#[derive(Debug, Clone)]
+pub struct TransformerEncoder {
+    layers: Vec<TransformerEncoderLayer>,
+    pos_encoding: Tensor,
+    dropout: Dropout,
+}
+
+impl TransformerEncoder {
+    pub fn new(
+        varbuilder: &VarBuilder,
+        input_dim: usize,
+        model_dim: usize,
+        ff_dim: usize,
+        num_heads: usize,
+        num_layers: usize,
+        max_len: usize,
+        dropout_prob: f32,
+        device: &Device,
+    ) -> Result<Self> {
+        let mut layers = Vec::new();
+        for i in 0..num_layers {
+            let layer = TransformerEncoderLayer::new(
+                &varbuilder.pp(&format!("layer_{}", i)),
+                input_dim,
+                model_dim,
+                ff_dim,
+                num_heads,
+                dropout_prob,
+            )?;
+            layers.push(layer);
+        }
+        let pos_encoding = create_sinusoidal_encoding(max_len, model_dim, device)?;
+        let dropout = Dropout::new(dropout_prob);
+        Ok(Self { layers, pos_encoding, dropout })
+    }
+
+    pub fn forward_with_mask(&self, x: &Tensor, padding_mask: Option<&Tensor>, training: bool) -> Result<Tensor> {
+        let (b, t, _) = x.dims3()?;
+        let pe = self.pos_encoding.i((..t, ..))?.unsqueeze(0)?.broadcast_as((b, t, self.pos_encoding.dim(1)?))?;
+        let mut out = x + pe;
+        out = self.dropout.forward(&out?, training);
+        for layer in &self.layers {
+            out = layer.forward(&out?, padding_mask, training);
+        }
+        Ok(out?)
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct TransformerEncoderLayer {
+    self_attn: MultiHeadAttention,
+    ff: FeedForward,
+    norm1: LayerNorm,
+    norm2: LayerNorm,
+    dropout1: Dropout,
+    dropout2: Dropout,
+}
+
+impl TransformerEncoderLayer {
+    pub fn new(
+        varbuilder: &VarBuilder,
+        input_dim: usize,
+        model_dim: usize,
+        ff_dim: usize,
+        num_heads: usize,
+        dropout_prob: f32,
+    ) -> Result<Self> {
+        Ok(Self {
+            self_attn: MultiHeadAttention::new(varbuilder, input_dim, model_dim, num_heads)?,
+            ff: FeedForward::new(varbuilder, model_dim, ff_dim)?,
+            norm1: {
+                let weight = varbuilder.get((model_dim,), "norm1.weight")?;
+                let bias = varbuilder.get((model_dim,), "norm1.bias")?;
+                LayerNorm::new(weight, bias, 1e-5)
+            },            
+            norm2: {
+                let weight = varbuilder.get((model_dim,), "norm2.weight")?;
+                let bias = varbuilder.get((model_dim,), "norm2.bias")?;
+                LayerNorm::new(weight, bias, 1e-5)
+            },            
+            dropout1: Dropout::new(dropout_prob),
+            dropout2: Dropout::new(dropout_prob),
+        })
+    }
+
+    pub fn forward(&self, x: &Tensor, mask: Option<&Tensor>, training: bool) -> Result<Tensor> {
+        let attn = self.self_attn.forward(x, mask)?;
+        let x = self.norm1.forward(&(x + self.dropout1.forward(&attn, training)?)?)?;
+        let ff = self.ff.forward(&x)?;
+        self.norm2.forward(&(x + self.dropout2.forward(&ff, training)?)?)
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct MultiHeadAttention {
+    proj_q: Linear,
+    proj_k: Linear,
+    proj_v: Linear,
+    proj_out: Linear,
+    num_heads: usize,
+    head_dim: usize,
+}
+
+impl MultiHeadAttention {
+    pub fn new(
+        varbuilder: &VarBuilder,
+        input_dim: usize,
+        model_dim: usize,
+        num_heads: usize,
+    ) -> Result<Self> {
+        let head_dim = model_dim / num_heads;
+        Ok(Self {
+            proj_q: linear_from_varbuilder(varbuilder, input_dim, model_dim, "proj_q")?,
+            proj_k: linear_from_varbuilder(varbuilder, input_dim, model_dim, "proj_k")?,
+            proj_v: linear_from_varbuilder(varbuilder, input_dim, model_dim, "proj_v")?,
+            proj_out: linear_from_varbuilder(varbuilder, input_dim, model_dim, "proj_out")?,
+            num_heads,
+            head_dim,
+        })
+    }
+
+    pub fn forward(&self, x: &Tensor, mask: Option<&Tensor>) -> Result<Tensor> {
+        let (b, t, _) = x.dims3()?;
+        let q = self.proj_q.forward(x)?.reshape((b, t, self.num_heads, self.head_dim))?.transpose(1, 2)?;
+        let k = self.proj_k.forward(x)?.reshape((b, t, self.num_heads, self.head_dim))?.transpose(1, 2)?;
+        let v = self.proj_v.forward(x)?.reshape((b, t, self.num_heads, self.head_dim))?.transpose(1, 2)?;
+
+        let mut scores = q.matmul(&k.transpose(2, 3)?)? / (self.head_dim as f64).sqrt();
+        if let Some(mask) = mask {
+            let mask = mask.unsqueeze(1)?;
+            let scale = Tensor::new(1e9f32, x.device())?;
+            scores = scores?.broadcast_add(&mask.neg()?.mul(&scale)?);
+        }
+
+        let scores = scores?; 
+        let attn = candle_nn::ops::softmax(&scores, scores.dims().len() - 1)?;
+        let context = attn.matmul(&v)?.transpose(1, 2)?.reshape((b, t, self.num_heads * self.head_dim))?;
+        self.proj_out.forward(&context)
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct FeedForward {
+    lin1: Linear,
+    lin2: Linear,
+}
+
+impl FeedForward {
+    pub fn new(varbuilder: &VarBuilder, model_dim: usize, ff_dim: usize) -> Result<Self> {
+        Ok(Self {
+            lin1: linear_from_varbuilder(varbuilder, model_dim, ff_dim, "lin1")?,
+            lin2: linear_from_varbuilder(varbuilder, ff_dim, model_dim, "lin2")?,
+        })
+    }
+
+    pub fn forward(&self, x: &Tensor) -> Result<Tensor> {
+        let x = self.lin1.forward(x)?.relu()?;
+        self.lin2.forward(&x)
+    }
+}
+
+
+fn linear_from_varbuilder(
+    vb: &VarBuilder,
+    in_dim: usize,
+    out_dim: usize,
+    prefix: &str,
+) -> Result<Linear> {
+    let weight = vb.get((out_dim, in_dim), &format!("{}.weight", prefix))?;
+    let bias = vb.get((out_dim,), &format!("{}.bias", prefix)).ok();
+    Ok(Linear::new(weight, bias))
+}
+
+
+/// Generate sinusoidal positional encoding like in "Attention is All You Need".
+pub fn create_sinusoidal_encoding(seq_len: usize, model_dim: usize, device: &Device) -> Result<Tensor> {
+    let mut pe = vec![0f32; seq_len * model_dim];
+    for pos in 0..seq_len {
+        for i in 0..model_dim {
+            let angle = pos as f32 / (10000f32).powf(2. * (i / 2) as f32 / model_dim as f32);
+            pe[pos * model_dim + i] = if i % 2 == 0 { angle.sin() } else { angle.cos() };
+        }
+    }
+    Tensor::from_vec(pe, (seq_len, model_dim), device)
+}

From e44dddda448d72a4c7ff6bf8cbd300f7ef887f12 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Fri, 9 May 2025 01:01:32 -0400
Subject: [PATCH 15/75] refactor: Update RTCNNLSTMModel forward method to
 improve performance and readability

---
 crates/redeem-properties/src/models/rt_cnn_lstm_model.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs b/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
index 89a6cec..e7ae329 100644
--- a/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
+++ b/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
@@ -271,7 +271,7 @@ impl ModelInterface for RTCNNLSTMModel {
 
 #[cfg(test)]
 mod tests {
-    use crate::models::model_interface::ModelInterface;
+    use crate::models::model_interface::{ModelInterface, PredictionResult};
     use crate::models::rt_cnn_lstm_model::RTCNNLSTMModel;
     use candle_core::Device;
     use std::path::PathBuf;

From 999ccf51ad6501550c1c44ae4a50d28e234050c7 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Fri, 9 May 2025 13:27:12 -0400
Subject: [PATCH 16/75] refactor: Add RT-CNN Transformer model and update
 redeem-properties crate models

---
 .../src/building_blocks/building_blocks.rs    |  87 ++---
 .../src/building_blocks/nn.rs                 | 110 ++++--
 .../src/models/ccs_cnn_lstm_model.rs          |   5 +
 crates/redeem-properties/src/models/mod.rs    |   1 +
 .../src/models/model_interface.rs             | 348 ++++++++++++++----
 .../src/models/ms2_bert_model.rs              |   5 +
 .../src/models/rt_cnn_lstm_model.rs           |   5 +
 .../src/models/rt_cnn_transformer_model.rs    | 333 +++++++++++++++++
 .../redeem-properties/src/models/rt_model.rs  |   4 +-
 .../src/utils/peptdeep_utils.rs               |  30 ++
 crates/redeem-properties/src/utils/utils.rs   |  58 +++
 11 files changed, 846 insertions(+), 140 deletions(-)
 create mode 100644 crates/redeem-properties/src/models/rt_cnn_transformer_model.rs

diff --git a/crates/redeem-properties/src/building_blocks/building_blocks.rs b/crates/redeem-properties/src/building_blocks/building_blocks.rs
index d1afa48..2c3fc28 100644
--- a/crates/redeem-properties/src/building_blocks/building_blocks.rs
+++ b/crates/redeem-properties/src/building_blocks/building_blocks.rs
@@ -2,6 +2,7 @@ use anyhow::{Context, Result as AnyHowResult};
 use candle_core::{DType, Device, Module, Result, Tensor, D};
 use candle_nn as nn;
 use candle_transformers as transformers;
+use serde::de;
 use core::num;
 use std::fmt;
 use std::time::Instant;
@@ -26,16 +27,18 @@ pub struct DecoderLinear {
 
 impl DecoderLinear {
     pub fn new(in_features: usize, out_features: usize, vb: &nn::VarBuilder) -> Result<Self> {
-        let weight = Tensor::zeros((in_features, 64), DType::F32, vb.device())?;
-        let bias = Tensor::zeros(64, DType::F32, vb.device())?;
+        // First linear layer: in_features -> 64
+        let weight1 = Tensor::zeros((64, in_features), DType::F32, vb.device())?;
+        let bias1 = Tensor::zeros(64, DType::F32, vb.device())?;
+        let linear1 = nn::Linear::new(weight1, Some(bias1));
 
-        let linear1 = nn::Linear::new(weight, Some(bias));
+        // Activation
         let prelu = nn::PReLU::new(Tensor::zeros(64, DType::F32, vb.device())?, false);
 
-        let weight = Tensor::zeros((64, out_features), DType::F32, vb.device())?;
-        let bias = Tensor::zeros(64, DType::F32, vb.device())?;
-
-        let linear2 = nn::Linear::new(weight, Some(bias));
+        // Second linear layer: 64 -> out_features
+        let weight2 = Tensor::zeros((out_features, 64), DType::F32, vb.device())?;
+        let bias2 = Tensor::zeros(out_features, DType::F32, vb.device())?;
+        let linear2 = nn::Linear::new(weight2, Some(bias2));
 
         let mut nn = seq();
         nn = nn.add(linear1);
@@ -73,10 +76,21 @@ impl DecoderLinear {
 
 impl Module for DecoderLinear {
     fn forward(&self, x: &Tensor) -> Result<Tensor> {
-        self.nn.forward(x)
+        log::trace!("[DecoderLinear] input shape: {:?}", x.shape());
+        match self.nn.forward(x) {
+            Ok(output) => {
+                log::trace!("[DecoderLinear] output shape: {:?}", output.shape());
+                Ok(output)
+            }
+            Err(e) => {
+                log::error!("[DecoderLinear] forward pass failed: {:?}", e);
+                Err(e)
+            }
+        }
     }
 }
 
+
 impl fmt::Debug for DecoderLinear {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         f.debug_struct("DecoderLinear")
@@ -198,10 +212,9 @@ struct ModEmbeddingFixFirstK {
 }
 
 impl ModEmbeddingFixFirstK {
-    fn new(mod_feature_size: usize, out_features: usize, device: &Device) -> Result<Self> {
+    fn new(mod_feature_size: usize, out_features: usize, varbuilder: &nn::VarBuilder) -> Result<Self> {
         let k = 6;
-        let vb = nn::VarBuilder::zeros(DType::F32, device);
-        let nn = nn::linear(mod_feature_size - k, out_features - k, vb.pp("linear"))?;
+        let nn = nn::linear(mod_feature_size - k, out_features - k, varbuilder.pp("linear"))?;
         Ok(Self { k, nn })
     }
 
@@ -243,17 +256,8 @@ pub struct Input26aaModPositionalEncoding {
 }
 
 impl Input26aaModPositionalEncoding {
-    fn new(out_features: usize, max_len: usize, device: &Device) -> Result<Self> {
-        let mod_hidden = 8;
-        let mod_nn = ModEmbeddingFixFirstK::new(MOD_FEATURE_SIZE, mod_hidden, device)?;
-        let aa_emb = AAEmbedding::new(out_features - mod_hidden, device)?;
-        let pos_encoder = PositionalEncoding::new(out_features, max_len, device)?;
-
-        Ok(Self {
-            mod_nn,
-            aa_emb,
-            pos_encoder,
-        })
+    fn new(_out_features: usize, _max_len: usize, _device: &Device) -> Result<Self> {
+        todo!("new untrained instance of Input26aaModPositionalEncoding not implemented");
     }
 
     pub fn from_varstore(
@@ -586,10 +590,7 @@ struct SeqCNN {
 }
 
 impl SeqCNN {
-    pub fn new(embedding_hidden: usize, device: &Device) -> Result<Self> {
-        let varmap = nn::VarMap::new();
-        let varbuilder = nn::VarBuilder::from_varmap(&varmap, DType::F32, device);
-
+    pub fn new(embedding_hidden: usize, varbuilder: &nn::VarBuilder) -> Result<Self> {
         let cnn_short = nn::conv1d(
             embedding_hidden,
             embedding_hidden,
@@ -730,6 +731,7 @@ impl SeqTransformer {
     /// Construct a new transformer encoder block for sequence modeling.
     ///
     /// # Arguments
+    /// * `varbuilder` - The variable builder for creating the model parameters.
     /// * `input_dim` - The input embedding dimension (e.g., CNN output).
     /// * `model_dim` - The internal model dimension of the transformer.
     /// * `ff_dim` - The feedforward hidden layer dimension.
@@ -739,26 +741,25 @@ impl SeqTransformer {
     /// * `dropout_prob` - Dropout probability.
     /// * `device` - The device to place the tensors on.
     pub fn new(
+        varbuilder: &nn::VarBuilder,
         input_dim: usize,
         model_dim: usize,
         ff_dim: usize,
         num_heads: usize,
         num_layers: usize,
         max_len: usize,
-        dropout_prob: f32,
+        dropout: f32,
         device: &Device,
     ) -> Result<Self> {
-        let varmap = nn::VarMap::new();
-        let varbuilder = nn::VarBuilder::from_varmap(&varmap, DType::F32, device);
         let encoder = TransformerEncoder::new(
-            &varbuilder,
+            varbuilder,
             input_dim,
             model_dim,
             ff_dim,
             num_heads,
             num_layers,
             max_len,
-            dropout_prob,
+            dropout,
             device,
         )?;
         Ok(Self { encoder, training: true })
@@ -809,9 +810,7 @@ struct SeqAttentionSum {
 }
 
 impl SeqAttentionSum {
-    pub fn new(hidden_dim: usize, device: &Device) -> Result<Self> {
-        let varmap = nn::VarMap::new();
-        let varbuilder = nn::VarBuilder::from_varmap(&varmap, DType::F32, device);
+    pub fn new(hidden_dim: usize, varbuilder: &nn::VarBuilder) -> Result<Self> {
         let attention = nn::Linear::new(
             varbuilder.get((1, hidden_dim), "attention.weight")?,
             None,
@@ -1068,7 +1067,7 @@ impl Encoder26aaModCnnTransformerAttnSum {
 
     /// Construct a CNN+Transformer+Attention encoder from scratch (no pretrained weights).
     pub fn new(
-        device: &Device,
+        varbuilder: &nn::VarBuilder,
         mod_hidden_dim: usize,
         hidden_dim: usize,
         ff_dim: usize,
@@ -1076,12 +1075,14 @@ impl Encoder26aaModCnnTransformerAttnSum {
         num_layers: usize,
         max_len: usize,
         dropout_prob: f32,
+        device: &Device,
     ) -> Result<Self> {
         let input_dim = AA_EMBEDDING_SIZE + mod_hidden_dim;
         Ok(Self {
-            mod_nn: ModEmbeddingFixFirstK::new(MOD_FEATURE_SIZE, mod_hidden_dim, device)?,
-            input_cnn: SeqCNN::new(input_dim, device)?,
+            mod_nn: ModEmbeddingFixFirstK::new(MOD_FEATURE_SIZE, mod_hidden_dim, &varbuilder.pp("mod_nn"))?,
+            input_cnn: SeqCNN::new(input_dim, &varbuilder.pp("input_cnn"))?,
             input_transformer: SeqTransformer::new(
+                &varbuilder.pp("input_transformer"),
                 input_dim * 4,
                 hidden_dim,
                 ff_dim,
@@ -1091,32 +1092,32 @@ impl Encoder26aaModCnnTransformerAttnSum {
                 dropout_prob,
                 device,
             )?,
-            attn_sum: SeqAttentionSum::new(hidden_dim, device)?,
+            attn_sum: SeqAttentionSum::new(hidden_dim, &varbuilder.pp("attn_sum"))?,
         })
     }
 
     pub fn forward(&self, aa_indices: &Tensor, mod_x: &Tensor) -> Result<Tensor> {
         let start_time = Instant::now();
         let mod_x = self.mod_nn.forward(mod_x)?;
-        println!("Encoder26aaModCnnTransformerAttnSum::forward - mod_x forward time: {:.3?}", start_time.elapsed());
+        log::trace!("[Encoder26aaModCnnTransformerAttnSum::forward] - mod_x forward time: {:.3?}", start_time.elapsed());
 
         let additional_tensors: Vec<&Tensor> = vec![&mod_x];
         let start_time = Instant::now();
         let x = aa_one_hot(aa_indices, &additional_tensors)
             .map_err(|e| candle_core::Error::Msg(e.to_string()))?;
-        println!("Encoder26aaModCnnTransformerAttnSum::forward - aa_one_hot forward time: {:.3?}", start_time.elapsed());
+        log::trace!("[Encoder26aaModCnnTransformerAttnSum::forward] - aa_one_hot forward time: {:.3?}", start_time.elapsed());
 
         let start_time = Instant::now();
         let x = self.input_cnn.forward(&x)?;
-        println!("Encoder26aaModCnnTransformerAttnSum::forward - input_cnn forward time: {:.3?}", start_time.elapsed());
+        log::trace!("[Encoder26aaModCnnTransformerAttnSum::forward] - input_cnn forward time: {:.3?}", start_time.elapsed());
 
         let start_time = Instant::now();
         let x = self.input_transformer.forward(&x)?;
-        println!("Encoder26aaModCnnTransformerAttnSum::forward - input_transformer forward time: {:.3?}", start_time.elapsed());
+        log::trace!("[Encoder26aaModCnnTransformerAttnSum::forward] - input_transformer forward time: {:.3?}", start_time.elapsed());
 
         let start_time = Instant::now();
         let x = self.attn_sum.forward(&x)?;
-        println!("Encoder26aaModCnnTransformerAttnSum::forward - attn_sum forward time: {:.3?}", start_time.elapsed());
+        log::trace!("[Encoder26aaModCnnTransformerAttnSum::forward] - attn_sum forward time: {:.3?}", start_time.elapsed());
 
         Ok(x)
     }
diff --git a/crates/redeem-properties/src/building_blocks/nn.rs b/crates/redeem-properties/src/building_blocks/nn.rs
index 1373032..cbab40e 100644
--- a/crates/redeem-properties/src/building_blocks/nn.rs
+++ b/crates/redeem-properties/src/building_blocks/nn.rs
@@ -96,7 +96,6 @@ impl TransformerEncoder {
         for i in 0..num_layers {
             let layer = TransformerEncoderLayer::new(
                 &varbuilder.pp(&format!("layer_{}", i)),
-                input_dim,
                 model_dim,
                 ff_dim,
                 num_heads,
@@ -110,14 +109,26 @@ impl TransformerEncoder {
     }
 
     pub fn forward_with_mask(&self, x: &Tensor, padding_mask: Option<&Tensor>, training: bool) -> Result<Tensor> {
+        log::trace!("[TransformerEncoder] input x shape: {:?}", x.shape());
+
         let (b, t, _) = x.dims3()?;
-        let pe = self.pos_encoding.i((..t, ..))?.unsqueeze(0)?.broadcast_as((b, t, self.pos_encoding.dim(1)?))?;
-        let mut out = x + pe;
-        out = self.dropout.forward(&out?, training);
-        for layer in &self.layers {
-            out = layer.forward(&out?, padding_mask, training);
+        let pe = self.pos_encoding.i((..t, ..))?
+            .unsqueeze(0)?
+            .broadcast_as((b, t, self.pos_encoding.dim(1)?))?;
+
+        log::trace!("[TransformerEncoder] positional encoding shape: {:?}", pe.shape());
+
+        let mut out = x.broadcast_add(&pe)?;
+        out = self.dropout.forward(&out, training)?;
+
+        log::trace!("[TransformerEncoder] after dropout shape: {:?}", out.shape());
+
+        for (i, layer) in self.layers.iter().enumerate() {
+            log::trace!("[TransformerEncoder] applying layer {}", i);
+            out = layer.forward(&out, padding_mask, training)?;
+            log::trace!("[TransformerEncoder] output shape after layer {}: {:?}", i, out.shape());
         }
-        Ok(out?)
+        Ok(out)
     }
 }
 
@@ -134,38 +145,41 @@ pub struct TransformerEncoderLayer {
 impl TransformerEncoderLayer {
     pub fn new(
         varbuilder: &VarBuilder,
-        input_dim: usize,
         model_dim: usize,
         ff_dim: usize,
         num_heads: usize,
         dropout_prob: f32,
     ) -> Result<Self> {
         Ok(Self {
-            self_attn: MultiHeadAttention::new(varbuilder, input_dim, model_dim, num_heads)?,
+            self_attn: MultiHeadAttention::new(varbuilder, model_dim, model_dim, num_heads)?,
             ff: FeedForward::new(varbuilder, model_dim, ff_dim)?,
             norm1: {
                 let weight = varbuilder.get((model_dim,), "norm1.weight")?;
                 let bias = varbuilder.get((model_dim,), "norm1.bias")?;
                 LayerNorm::new(weight, bias, 1e-5)
-            },            
+            },
             norm2: {
                 let weight = varbuilder.get((model_dim,), "norm2.weight")?;
                 let bias = varbuilder.get((model_dim,), "norm2.bias")?;
                 LayerNorm::new(weight, bias, 1e-5)
-            },            
+            },
             dropout1: Dropout::new(dropout_prob),
             dropout2: Dropout::new(dropout_prob),
         })
     }
 
     pub fn forward(&self, x: &Tensor, mask: Option<&Tensor>, training: bool) -> Result<Tensor> {
+        log::trace!("[TransformerEncoderLayer] input x shape: {:?}", x.shape());
         let attn = self.self_attn.forward(x, mask)?;
-        let x = self.norm1.forward(&(x + self.dropout1.forward(&attn, training)?)?)?;
+        let x = self.norm1.forward(&x.broadcast_add(&self.dropout1.forward(&attn, training)?)?)?;
         let ff = self.ff.forward(&x)?;
-        self.norm2.forward(&(x + self.dropout2.forward(&ff, training)?)?)
+        let result = self.norm2.forward(&x.broadcast_add(&self.dropout2.forward(&ff, training)?)?)?;
+        log::trace!("[TransformerEncoderLayer] output shape: {:?}", result.shape());
+        Ok(result)
     }
 }
 
+
 #[derive(Debug, Clone)]
 pub struct MultiHeadAttention {
     proj_q: Linear,
@@ -188,7 +202,7 @@ impl MultiHeadAttention {
             proj_q: linear_from_varbuilder(varbuilder, input_dim, model_dim, "proj_q")?,
             proj_k: linear_from_varbuilder(varbuilder, input_dim, model_dim, "proj_k")?,
             proj_v: linear_from_varbuilder(varbuilder, input_dim, model_dim, "proj_v")?,
-            proj_out: linear_from_varbuilder(varbuilder, input_dim, model_dim, "proj_out")?,
+            proj_out: linear_from_varbuilder(varbuilder, model_dim, model_dim, "proj_out")?,
             num_heads,
             head_dim,
         })
@@ -196,20 +210,69 @@ impl MultiHeadAttention {
 
     pub fn forward(&self, x: &Tensor, mask: Option<&Tensor>) -> Result<Tensor> {
         let (b, t, _) = x.dims3()?;
-        let q = self.proj_q.forward(x)?.reshape((b, t, self.num_heads, self.head_dim))?.transpose(1, 2)?;
-        let k = self.proj_k.forward(x)?.reshape((b, t, self.num_heads, self.head_dim))?.transpose(1, 2)?;
-        let v = self.proj_v.forward(x)?.reshape((b, t, self.num_heads, self.head_dim))?.transpose(1, 2)?;
+        log::trace!("[MultiHeadAttention] Input shape: b={}, t={}, head_dim={} (num_heads={})", b, t, self.head_dim, self.num_heads);
+
+        let q = self.proj_q.forward(x)?
+            .reshape((b, t, self.num_heads, self.head_dim))?
+            .transpose(1, 2)?
+            .contiguous()?;
+
+        let k = self.proj_k.forward(x)?
+            .reshape((b, t, self.num_heads, self.head_dim))?
+            .transpose(1, 2)?
+            .contiguous()?;
+
+        let v = self.proj_v.forward(x)?
+            .reshape((b, t, self.num_heads, self.head_dim))?
+            .transpose(1, 2)?
+            .contiguous()?;
+
+
+        log::trace!("[MultiHeadAttention] Q/K/V shape after projection and transpose: {:?}", q.shape());
+
+        let k_t = k.transpose(2, 3)?.contiguous()?;
+        let mut scores = q.matmul(&k_t)? / (self.head_dim as f64).sqrt();
+
+        let mut scores = match q.matmul(&k_t) {
+            Ok(s) => (s / (self.head_dim as f64).sqrt())?,
+            Err(e) => {
+                log::error!("[MultiHeadAttention] Failed during matmul for scores: {}", e);
+                return Err(e.into());
+            }
+        };
+
+        log::trace!("[MultiHeadAttention] Attention score shape: {:?}", scores.shape());
 
-        let mut scores = q.matmul(&k.transpose(2, 3)?)? / (self.head_dim as f64).sqrt();
         if let Some(mask) = mask {
+            log::trace!("[MultiHeadAttention] Applying mask");
             let mask = mask.unsqueeze(1)?;
             let scale = Tensor::new(1e9f32, x.device())?;
-            scores = scores?.broadcast_add(&mask.neg()?.mul(&scale)?);
+            scores = match scores.broadcast_add(&mask.neg()?.mul(&scale)?) {
+                Ok(s) => s,
+                Err(e) => {
+                    log::error!("[MultiHeadAttention] Failed during masking: {}", e);
+                    return Err(e.into());
+                }
+            };
         }
 
-        let scores = scores?; 
-        let attn = candle_nn::ops::softmax(&scores, scores.dims().len() - 1)?;
-        let context = attn.matmul(&v)?.transpose(1, 2)?.reshape((b, t, self.num_heads * self.head_dim))?;
+        let attn = match candle_nn::ops::softmax(&scores, scores.dims().len() - 1) {
+            Ok(a) => a,
+            Err(e) => {
+                log::error!("[MultiHeadAttention] Failed during softmax: {}", e);
+                return Err(e.into());
+            }
+        };
+
+        let context = match attn.matmul(&v) {
+            Ok(ctx) => ctx.transpose(1, 2)?.reshape((b, t, self.num_heads * self.head_dim))?,
+            Err(e) => {
+                log::error!("[MultiHeadAttention] Failed during attention context computation: {}", e);
+                return Err(e.into());
+            }
+        };
+
+        log::trace!("[MultiHeadAttention] Final context shape: {:?}", context.shape());
         self.proj_out.forward(&context)
     }
 }
@@ -234,7 +297,6 @@ impl FeedForward {
     }
 }
 
-
 fn linear_from_varbuilder(
     vb: &VarBuilder,
     in_dim: usize,
@@ -246,7 +308,6 @@ fn linear_from_varbuilder(
     Ok(Linear::new(weight, bias))
 }
 
-
 /// Generate sinusoidal positional encoding like in "Attention is All You Need".
 pub fn create_sinusoidal_encoding(seq_len: usize, model_dim: usize, device: &Device) -> Result<Tensor> {
     let mut pe = vec![0f32; seq_len * model_dim];
@@ -258,3 +319,4 @@ pub fn create_sinusoidal_encoding(seq_len: usize, model_dim: usize, device: &Dev
     }
     Tensor::from_vec(pe, (seq_len, model_dim), device)
 }
+
diff --git a/crates/redeem-properties/src/models/ccs_cnn_lstm_model.rs b/crates/redeem-properties/src/models/ccs_cnn_lstm_model.rs
index 4a06304..463c126 100644
--- a/crates/redeem-properties/src/models/ccs_cnn_lstm_model.rs
+++ b/crates/redeem-properties/src/models/ccs_cnn_lstm_model.rs
@@ -58,6 +58,11 @@ impl ModelInterface for CCSCNNLSTMModel {
         "ccs_cnn_lstm"   
     }
 
+    fn new_untrained(_device: Device) -> Result<Self>
+    {
+        unimplemented!("Untrained model creation is not implemented for this architecture.");
+    }
+
     /// Create a new CCSCNNLSTMModel instance model from the given model and constants files.
     fn new<P: AsRef<Path>>(
         model_path: P,
diff --git a/crates/redeem-properties/src/models/mod.rs b/crates/redeem-properties/src/models/mod.rs
index 9315e42..502cb15 100644
--- a/crates/redeem-properties/src/models/mod.rs
+++ b/crates/redeem-properties/src/models/mod.rs
@@ -5,3 +5,4 @@ pub mod ccs_cnn_lstm_model;
 pub mod ccs_model;
 pub mod ms2_model;
 pub mod model_interface;
+pub mod rt_cnn_transformer_model;
diff --git a/crates/redeem-properties/src/models/model_interface.rs b/crates/redeem-properties/src/models/model_interface.rs
index b0e7655..8e614ef 100644
--- a/crates/redeem-properties/src/models/model_interface.rs
+++ b/crates/redeem-properties/src/models/model_interface.rs
@@ -1,5 +1,5 @@
 use crate::{
-    building_blocks::featurize::{self, aa_indices_tensor,  get_mod_features_from_parsed},
+    building_blocks::featurize::{self, aa_indices_tensor, get_mod_features_from_parsed},
     models::{ccs_model::CCSModelWrapper, ms2_model::MS2ModelWrapper, rt_model::RTModelWrapper},
     utils::{
         data_handling::PeptideData,
@@ -7,7 +7,7 @@ use crate::{
         peptdeep_utils::{
             get_modification_indices, get_modification_string, parse_instrument_index,
             remove_mass_shift,
-        }
+        },
     },
 };
 use anyhow::{Context, Result};
@@ -19,31 +19,33 @@ use std::ops::Index;
 use std::path::Path;
 use std::{collections::HashMap, path::PathBuf};
 
-
 // Constants
 const CHARGE_FACTOR: f64 = 0.1;
 const NCE_FACTOR: f64 = 0.01;
 
-
 /// Load tensors from a model file.
-/// 
+///
 /// Supported model formats include:
 /// - PyTorch (.pt, .pth, .pkl)
 /// - SafeTensors (.safetensors)
-/// 
+///
 /// # Arguments
 /// * `model_path` - Path to the model file.
 /// * `device` - Device to load the tensors on.
-/// 
+///
 /// # Returns
 /// A vector of tuples containing the tensor names and their corresponding tensors.
-pub fn load_tensors_from_model<P: AsRef<Path>>(model_path: P, device: &Device) -> Result<Vec<(String, Tensor)>> {
+pub fn load_tensors_from_model<P: AsRef<Path>>(
+    model_path: P,
+    device: &Device,
+) -> Result<Vec<(String, Tensor)>> {
     let path: &Path = model_path.as_ref();
-    let extension = path.extension()
+    let extension = path
+        .extension()
         .and_then(|ext| ext.to_str())
         .unwrap_or("")
         .to_lowercase();
-    
+
     match extension.as_str() {
         "pt" | "pth" | "pkl" => {
             log::trace!("Loading tensors from PyTorch model: {:?}", path);
@@ -65,7 +67,6 @@ pub fn load_tensors_from_model<P: AsRef<Path>>(model_path: P, device: &Device) -
     }
 }
 
-
 /// Represents the type of property to predict.
 #[derive(Clone)]
 pub enum PropertyType {
@@ -85,7 +86,7 @@ impl PropertyType {
 }
 
 /// Represents a single prediction value or a matrix of prediction values.
-/// 
+///
 /// This enum is used to store the output of a model prediction, which can be a single value or a matrix of values. For example, retention time (RT) and collision cross-section (CCS) predictions are single values, while MS2 intensity predictions are matrices.
 #[derive(Clone)]
 pub enum PredictionValue {
@@ -97,11 +98,10 @@ impl PredictionValue {
     // Returns a reference to the element at position (i, j) if it exists
     pub fn get(&self, i: usize, j: usize) -> Option<&f32> {
         match self {
-            PredictionValue::Single(_) => None,  
+            PredictionValue::Single(_) => None,
             PredictionValue::Matrix(vec) => vec.get(i).and_then(|row| row.get(j)),
         }
     }
-
 }
 
 impl Index<usize> for PredictionValue {
@@ -126,9 +126,8 @@ impl Index<(usize, usize)> for PredictionValue {
     }
 }
 
-
 /// Represents the output of a model prediction.
-/// 
+///
 /// This enum is used to store the output of a model prediction, which can be a vector of retention times (RT), collision cross-sections (CCS), or a vector matrices of MS2 intensities.
 #[derive(Debug, Clone)]
 pub enum PredictionResult {
@@ -150,13 +149,13 @@ impl PredictionResult {
         match self {
             PredictionResult::RTResult(vec) => PredictionValue::Single(vec[index].clone()),
             PredictionResult::IMResult(vec) => PredictionValue::Single(vec[index].clone()),
-            PredictionResult::MS2Result(vec) => PredictionValue::Matrix(vec[index].clone()), 
+            PredictionResult::MS2Result(vec) => PredictionValue::Matrix(vec[index].clone()),
         }
     }
 }
 
 /// Populates a mutable `VarMap` instance with tensors.
-/// 
+///
 /// # Arguments
 /// * `var_map` - A mutable reference to a `VarMap` instance.
 /// * `tensor_data` - A vector of tuples containing the tensor names and their corresponding tensors.
@@ -175,12 +174,10 @@ pub fn create_var_map(
     Ok(())
 }
 
-
 pub trait ModelClone {
     fn clone_box(&self) -> Box<dyn ModelInterface + Send + Sync>;
 }
 
-
 impl<T> ModelClone for T
 where
     T: 'static + ModelInterface + Clone + Send + Sync,
@@ -190,7 +187,6 @@ where
     }
 }
 
-
 impl Clone for Box<dyn ModelInterface + Send + Sync> {
     fn clone(&self) -> Self {
         self.clone_box()
@@ -198,17 +194,22 @@ impl Clone for Box<dyn ModelInterface + Send + Sync> {
 }
 
 /// Represents an abstract deep learning model interface.
-/// 
+///
 /// This trait defines the methods and properties that a deep learning model must implement to be used for property prediction tasks.
 pub trait ModelInterface: Send + Sync + ModelClone {
-
     /// Get the property type of the model.
     fn property_type(&self) -> PropertyType;
 
     /// Get the model architecture name.
     fn model_arch(&self) -> &'static str;
 
-    /// Create a new instance of the model.
+    /// Create a new model instance from scratch (no pretrained weights).
+    /// This is typically used when training a new model from scratch.
+    fn new_untrained(device: Device) -> Result<Self>
+    where
+        Self: Sized;
+
+    /// Create a new instance of the model (given a pretrained model (.pth or .safetensors and constants file).
     fn new<P: AsRef<Path>>(
         model_path: P,
         constants_path: P,
@@ -314,7 +315,8 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                 &vec![c as f64 * CHARGE_FACTOR; seq_len],
                 &[batch_size, seq_len, 1],
                 device,
-            )?.to_dtype(DType::F32)?;
+            )?
+            .to_dtype(DType::F32)?;
             features.push(charge_tensor);
         }
 
@@ -323,17 +325,16 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                 &vec![n as f64 * NCE_FACTOR; seq_len],
                 &[batch_size, seq_len, 1],
                 device,
-            )?.to_dtype(DType::F32)?;
+            )?
+            .to_dtype(DType::F32)?;
             features.push(nce_tensor);
         }
 
         if let Some(instr) = instrument {
             let instr_idx = parse_instrument_index(instr) as u32;
-            let instr_tensor = Tensor::from_slice(
-                &vec![instr_idx; seq_len],
-                &[batch_size, seq_len, 1],
-                device,
-            )?.to_dtype(DType::F32)?;
+            let instr_tensor =
+                Tensor::from_slice(&vec![instr_idx; seq_len], &[batch_size, seq_len, 1], device)?
+                    .to_dtype(DType::F32)?;
             features.push(instr_tensor);
         }
 
@@ -371,7 +372,9 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             .collect::<Result<Vec<_>>>()?;
 
         if tensors.is_empty() {
-            return Err(anyhow::anyhow!("Encoding batch of peptides failed, the resulting tesnor batch is empty."));
+            return Err(anyhow::anyhow!(
+                "Encoding batch of peptides failed, the resulting tesnor batch is empty."
+            ));
         }
 
         let max_len = tensors
@@ -380,25 +383,168 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             .max()
             .unwrap_or(0);
 
-            let padded = tensors
+        let padded = tensors
             .into_par_iter()
             .map(|t| {
                 let (_, seq_len, feat_dim) = t.shape().dims3()?;
                 if seq_len < max_len {
-                    let pad = Tensor::zeros(&[1, max_len - seq_len, feat_dim], t.dtype(), t.device())?;
+                    let pad =
+                        Tensor::zeros(&[1, max_len - seq_len, feat_dim], t.dtype(), t.device())?;
                     Tensor::cat(&[&t, &pad], 1)
                 } else {
                     Ok(t)
                 }
             })
-            .map(|res| res.map_err(anyhow::Error::from)) 
+            .map(|res| res.map_err(anyhow::Error::from))
             .collect::<Result<Vec<_>, _>>()?;
 
         Ok(Tensor::cat(&padded, 0)?)
     }
 
+    /// Train the model from scratch using a batch of training data.
+    ///
+    /// This method is similar to `fine_tune`, but assumes that the model was created from `new_untrained`
+    /// and has no pre-existing learned weights.
+    fn train(
+        &mut self,
+        training_data: &Vec<PeptideData>,
+        validation_data: Option<&Vec<PeptideData>>,
+        modifications: HashMap<
+            (String, Option<char>),
+            crate::utils::peptdeep_utils::ModificationMap,
+        >,
+        batch_size: usize,
+        learning_rate: f64,
+        epochs: usize,
+    ) -> Result<()> {
+        let num_batches = (training_data.len() + batch_size - 1) / batch_size;
+
+        info!(
+            "Training {} model from scratch on {} peptide features ({} batches) for {} epochs",
+            self.get_model_arch(),
+            training_data.len(),
+            num_batches,
+            epochs
+        );
+
+        let params = candle_nn::ParamsAdamW {
+            lr: learning_rate,
+            ..Default::default()
+        };
+        let mut opt = candle_nn::AdamW::new(self.get_mut_varmap().all_vars(), params)?;
+
+        for epoch in 0..epochs {
+            let progress = Progress::new(num_batches, &format!("[training] Epoch {}: ", epoch));
+            let mut total_loss = 0.0;
+
+            training_data
+                .chunks(batch_size)
+                .enumerate()
+                .try_for_each(|(batch_idx, batch_data)| {
+                    let peptides: Vec<String> = batch_data.iter().map(|p| remove_mass_shift(&p.sequence)).collect();
+                    let mods: Vec<String> = batch_data.iter().map(|p| get_modification_string(&p.sequence, &modifications)).collect();
+                    let mod_sites: Vec<String> = batch_data.iter().map(|p| get_modification_indices(&p.sequence)).collect();
+
+                    let charges = batch_data.iter().filter_map(|p| p.charge).collect::<Vec<_>>();
+                    let charges = if charges.len() == batch_data.len() { Some(charges) } else { None };
+
+                    let nces = batch_data.iter().filter_map(|p| p.nce).collect::<Vec<_>>();
+                    let nces = if nces.len() == batch_data.len() { Some(nces) } else { None };
+
+                    let instruments = batch_data.iter().filter_map(|p| p.instrument.clone()).collect::<Vec<_>>();
+                    let instruments = if instruments.len() == batch_data.len() { Some(instruments) } else { None };
+
+                    let input_batch = self.encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments)?;
+
+                    let batch_targets = match self.property_type() {
+                        PropertyType::RT => PredictionResult::RTResult(
+                            batch_data.iter().map(|p| p.retention_time.unwrap_or_default()).collect(),
+                        ),
+                        PropertyType::CCS => PredictionResult::IMResult(
+                            batch_data.iter().map(|p| p.ion_mobility.unwrap_or_default()).collect(),
+                        ),
+                        PropertyType::MS2 => {
+                            return Err(anyhow::anyhow!("Training from scratch is not yet implemented for MS2"));
+                        }
+                    };
+
+                    let target_batch = match batch_targets {
+                        PredictionResult::RTResult(ref values) | PredictionResult::IMResult(ref values) => {
+                            Tensor::new(values.clone(), &self.get_device())?
+                        }
+                        PredictionResult::MS2Result(_) => unreachable!(),
+                    };
+
+                    let predicted = self.forward(&input_batch)?;
+                    let loss = candle_nn::loss::mse(&predicted, &target_batch)?;
+                    opt.backward_step(&loss)?;
+
+                    total_loss += loss.to_vec0::<f32>().unwrap_or(999.0);
+                    progress.update_description(&format!("[training] Epoch {}: Loss: {:.4}", epoch, loss.to_vec0::<f32>()?));
+                    progress.inc();
+
+                    Ok(())
+                })?;
+
+            // Optional validation evaluation
+            if let Some(val_data) = validation_data {
+                let peptides: Vec<String> = val_data.iter().map(|p| remove_mass_shift(&p.sequence)).collect();
+                let mods: Vec<String> = val_data.iter().map(|p| get_modification_string(&p.sequence, &modifications)).collect();
+                let mod_sites: Vec<String> = val_data.iter().map(|p| get_modification_indices(&p.sequence)).collect();
+
+                let charges = val_data.iter().filter_map(|p| p.charge).collect::<Vec<_>>();
+                let charges = if charges.len() == val_data.len() { Some(charges) } else { None };
+
+                let nces = val_data.iter().filter_map(|p| p.nce).collect::<Vec<_>>();
+                let nces = if nces.len() == val_data.len() { Some(nces) } else { None };
+
+                let instruments = val_data.iter().filter_map(|p| p.instrument.clone()).collect::<Vec<_>>();
+                let instruments = if instruments.len() == val_data.len() { Some(instruments) } else { None };
+
+                let input_val = self.encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments)?;
+
+                let val_targets = match self.property_type() {
+                    PropertyType::RT => PredictionResult::RTResult(
+                        val_data.iter().map(|p| p.retention_time.unwrap_or_default()).collect(),
+                    ),
+                    PropertyType::CCS => PredictionResult::IMResult(
+                        val_data.iter().map(|p| p.ion_mobility.unwrap_or_default()).collect(),
+                    ),
+                    PropertyType::MS2 => {
+                        return Err(anyhow::anyhow!("Validation not supported for MS2 yet"));
+                    }
+                };
+
+                let target_val = match val_targets {
+                    PredictionResult::RTResult(ref values) | PredictionResult::IMResult(ref values) => {
+                        Tensor::new(values.clone(), &self.get_device())?
+                    }
+                    PredictionResult::MS2Result(_) => unreachable!(),
+                };
+
+                let predicted = self.forward(&input_val)?;
+                let val_loss = candle_nn::loss::mse(&predicted, &target_val)?;
+                let val_loss_val = val_loss.to_vec0::<f32>()?;
+
+                info!("[validation] Epoch {}: Validation Loss: {:.4}", epoch, val_loss_val);
+
+                let avg_loss = total_loss / num_batches as f32;
+                progress.update_description(&format!("[training] Epoch {}: Avg. Loss: {:.4} | Val. Loss: {:.4}", epoch, avg_loss, val_loss_val));
+                progress.finish();
+            }
+            else 
+            {
+                let avg_loss = total_loss / num_batches as f32;
+                progress.update_description(&format!("[training] Epoch {}: Avg. Loss: {:.4}", epoch, avg_loss));
+                progress.finish();
+            }
+        }
+
+        Ok(())
+    }
+
     /// Fine-tune the model on a batch of training data.
-    /// 
+    ///
     /// # Arguments
     /// * `training_data` - A vector of `PeptideData` instances representing the training data.
     /// * `modifications` - A map of modifications and their corresponding feature vectors.
@@ -426,86 +572,146 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                 full_batches
             }
         };
-    
+
         info!(
             "Fine-tuning {} model on {} peptide features ({} batches) for {} epochs",
-            self.get_model_arch(), training_data.len(), num_batches, epochs
+            self.get_model_arch(),
+            training_data.len(),
+            num_batches,
+            epochs
         );
-    
+
         let params = candle_nn::ParamsAdamW {
             lr: learning_rate,
             ..Default::default()
         };
         let mut opt = candle_nn::AdamW::new(self.get_mut_varmap().all_vars(), params)?;
-    
+
         for epoch in 0..epochs {
             let progress = Progress::new(num_batches, &format!("[fine-tuning] Epoch {}: ", epoch));
             let mut total_loss = 0.0;
-    
+
             for batch_idx in 0..num_batches {
                 let start = batch_idx * batch_size;
                 let end = (start + batch_size).min(training_data.len());
                 let batch_data = &training_data[start..end];
-    
-                let peptides: Vec<String> = batch_data.iter().map(|p| remove_mass_shift(&p.sequence)).collect();
-                let mods: Vec<String> = batch_data.iter().map(|p| get_modification_string(&p.sequence, &modifications)).collect();
-                let mod_sites: Vec<String> = batch_data.iter().map(|p| get_modification_indices(&p.sequence)).collect();
-    
-                let charges = batch_data.iter().filter_map(|p| p.charge).collect::<Vec<_>>();
-                let charges = if charges.len() == batch_data.len() { Some(charges) } else { None };
-    
+
+                let peptides: Vec<String> = batch_data
+                    .iter()
+                    .map(|p| remove_mass_shift(&p.sequence))
+                    .collect();
+                let mods: Vec<String> = batch_data
+                    .iter()
+                    .map(|p| get_modification_string(&p.sequence, &modifications))
+                    .collect();
+                let mod_sites: Vec<String> = batch_data
+                    .iter()
+                    .map(|p| get_modification_indices(&p.sequence))
+                    .collect();
+
+                let charges = batch_data
+                    .iter()
+                    .filter_map(|p| p.charge)
+                    .collect::<Vec<_>>();
+                let charges = if charges.len() == batch_data.len() {
+                    Some(charges)
+                } else {
+                    None
+                };
+
                 let nces = batch_data.iter().filter_map(|p| p.nce).collect::<Vec<_>>();
-                let nces = if nces.len() == batch_data.len() { Some(nces) } else { None };
-    
-                let instruments = batch_data.iter().filter_map(|p| p.instrument.clone()).collect::<Vec<_>>();
-                let instruments = if instruments.len() == batch_data.len() { Some(instruments) } else { None };
-    
-                let input_batch = self.encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments)?;
-
-                log::trace!("[ModelInterface::fine_tune] input_batch shape: {:?}, device: {:?}", input_batch.shape(), input_batch.device());
-    
+                let nces = if nces.len() == batch_data.len() {
+                    Some(nces)
+                } else {
+                    None
+                };
+
+                let instruments = batch_data
+                    .iter()
+                    .filter_map(|p| p.instrument.clone())
+                    .collect::<Vec<_>>();
+                let instruments = if instruments.len() == batch_data.len() {
+                    Some(instruments)
+                } else {
+                    None
+                };
+
+                let input_batch =
+                    self.encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments)?;
+
+                log::trace!(
+                    "[ModelInterface::fine_tune] input_batch shape: {:?}, device: {:?}",
+                    input_batch.shape(),
+                    input_batch.device()
+                );
+
                 let batch_targets = match self.property_type() {
                     PropertyType::RT => PredictionResult::RTResult(
-                        batch_data.iter().map(|p| p.retention_time.unwrap_or_default()).collect(),
+                        batch_data
+                            .iter()
+                            .map(|p| p.retention_time.unwrap_or_default())
+                            .collect(),
                     ),
                     PropertyType::CCS => PredictionResult::IMResult(
-                        batch_data.iter().map(|p| p.ion_mobility.unwrap_or_default()).collect(),
+                        batch_data
+                            .iter()
+                            .map(|p| p.ion_mobility.unwrap_or_default())
+                            .collect(),
                     ),
                     PropertyType::MS2 => PredictionResult::MS2Result(
-                        batch_data.iter().map(|p| p.ms2_intensities.clone().unwrap_or_default()).collect(),
+                        batch_data
+                            .iter()
+                            .map(|p| p.ms2_intensities.clone().unwrap_or_default())
+                            .collect(),
                     ),
                 };
-    
+
                 let target_batch = match batch_targets {
-                    PredictionResult::RTResult(ref values) | PredictionResult::IMResult(ref values) => {
+                    PredictionResult::RTResult(ref values)
+                    | PredictionResult::IMResult(ref values) => {
                         Tensor::new(values.clone(), &self.get_device())?
                     }
                     PredictionResult::MS2Result(ref spectra) => {
                         let max_len = spectra.iter().map(|s| s.len()).max().unwrap_or(1);
-                        let feature_dim = spectra.get(0).and_then(|s| s.get(0)).map(|v| v.len()).unwrap_or(1);
+                        let feature_dim = spectra
+                            .get(0)
+                            .and_then(|s| s.get(0))
+                            .map(|v| v.len())
+                            .unwrap_or(1);
                         let mut padded_spectra = spectra.clone();
                         for s in &mut padded_spectra {
                             s.resize(max_len, vec![0.0; feature_dim]);
                         }
-                        Tensor::new(padded_spectra.concat(), &self.get_device())?.reshape((batch_data.len(), max_len, feature_dim))?
+                        Tensor::new(padded_spectra.concat(), &self.get_device())?.reshape((
+                            batch_data.len(),
+                            max_len,
+                            feature_dim,
+                        ))?
                     }
                 };
-    
+
                 let predicted = self.forward(&input_batch)?;
                 let loss = candle_nn::loss::mse(&predicted, &target_batch)?;
                 opt.backward_step(&loss)?;
-    
+
                 total_loss += loss.to_vec0::<f32>().unwrap_or(990.0);
 
-                progress.update_description(&format!("[fine-tuning] Epoch {}: Loss: {}", epoch, loss.to_vec0::<f32>()?));
+                progress.update_description(&format!(
+                    "[fine-tuning] Epoch {}: Loss: {}",
+                    epoch,
+                    loss.to_vec0::<f32>()?
+                ));
                 progress.inc();
             }
-    
+
             let avg_loss = total_loss / num_batches as f32;
-            progress.update_description(&format!("[fine-tuning] Epoch {}: Avg. Batch Loss: {}", epoch, avg_loss));
+            progress.update_description(&format!(
+                "[fine-tuning] Epoch {}: Avg. Batch Loss: {}",
+                epoch, avg_loss
+            ));
             progress.finish();
         }
-    
+
         Ok(())
     }
 
diff --git a/crates/redeem-properties/src/models/ms2_bert_model.rs b/crates/redeem-properties/src/models/ms2_bert_model.rs
index 811a50c..42b4d57 100644
--- a/crates/redeem-properties/src/models/ms2_bert_model.rs
+++ b/crates/redeem-properties/src/models/ms2_bert_model.rs
@@ -60,6 +60,11 @@ impl ModelInterface for MS2BertModel {
         "ms2_bert"
     }
 
+    fn new_untrained(_device: Device) -> Result<Self>
+    {
+        unimplemented!("Untrained model creation is not implemented for this architecture.");
+    }
+
     /// Create a new MS2BERT model from the given model and constants files.
     fn new<P: AsRef<Path>>(
         model_path: P,
diff --git a/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs b/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
index e7ae329..e73f6f4 100644
--- a/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
+++ b/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
@@ -47,6 +47,11 @@ impl ModelInterface for RTCNNLSTMModel {
         "rt_cnn_lstm"   
     }
 
+    fn new_untrained(_device: Device) -> Result<Self>
+    {
+        unimplemented!("Untrained model creation is not implemented for this architecture.");
+    }
+
     /// Create a new RTCNNLSTMModel from the given model and constants files.
     fn new<P: AsRef<Path>>(
         model_path: P,
diff --git a/crates/redeem-properties/src/models/rt_cnn_transformer_model.rs b/crates/redeem-properties/src/models/rt_cnn_transformer_model.rs
new file mode 100644
index 0000000..789c127
--- /dev/null
+++ b/crates/redeem-properties/src/models/rt_cnn_transformer_model.rs
@@ -0,0 +1,333 @@
+use anyhow::Result;
+use candle_core::{DType, Device, IndexOp, Tensor};
+use candle_nn::{Dropout, Module, VarBuilder, VarMap};
+use std::collections::HashMap;
+use std::path::Path;
+
+
+
+use crate::building_blocks::building_blocks::{
+    DecoderLinear, Encoder26aaModCnnTransformerAttnSum, MOD_FEATURE_SIZE,
+};
+use crate::building_blocks::nn;
+use crate::models::model_interface::{ModelInterface, PropertyType, load_tensors_from_model, create_var_map};
+use crate::utils::peptdeep_utils::{
+    load_mod_to_feature,
+    parse_model_constants, ModelConstants,
+};
+
+
+// Main Model Struct
+
+#[derive(Clone)]
+/// Represents an CNN-TF Retention Time model.
+pub struct RTCNNTFModel {
+    var_store: VarBuilder<'static>,
+    varmap: VarMap,
+    constants: ModelConstants,
+    device: Device,
+    mod_to_feature: HashMap<String, Vec<f32>>,
+    dropout: Dropout,
+    rt_encoder: Encoder26aaModCnnTransformerAttnSum,
+    rt_decoder: DecoderLinear,
+    is_training: bool,
+}
+
+// Automatically implement Send and Sync if all fields are Send and Sync
+unsafe impl Send for RTCNNTFModel {}
+unsafe impl Sync for RTCNNTFModel {}
+
+// Core Model Implementation
+
+impl ModelInterface for RTCNNTFModel {
+    fn property_type(&self) -> PropertyType {
+        PropertyType::RT
+    }
+
+    fn model_arch(&self) -> &'static str {
+        "rt_cnn_tf"   
+    }
+
+    fn new_untrained(device: Device) -> Result<Self> {
+        let mut varmap = VarMap::new();
+        let varbuilder = VarBuilder::from_varmap(&varmap, DType::F32, &device);
+
+
+        let rt_encoder = Encoder26aaModCnnTransformerAttnSum::new(
+            &varbuilder,
+            8,     // mod_hidden_dim
+            140,   // hidden_dim
+            256,   // ff_dim
+            4,     // num_heads
+            2,     // num_layers
+            100,   // max_len
+            0.1,   // dropout_prob
+            &device
+        )?;
+
+        let rt_decoder = DecoderLinear::new(140, 1, &varbuilder)?;
+        let constants = ModelConstants::default();
+        let mod_to_feature = load_mod_to_feature(&constants)?;
+
+        Ok(Self {
+            var_store: VarBuilder::from_varmap(&varmap, DType::F32, &device),
+            varmap,
+            constants,
+            device,
+            mod_to_feature,
+            dropout: Dropout::new(0.1),
+            rt_encoder,
+            rt_decoder,
+            is_training: true,
+        })
+    }
+
+    /// Create a new RTCNNTFModel from the given model and constants files.
+    fn new<P: AsRef<Path>>(
+        model_path: P,
+        constants_path: P,
+        _fixed_sequence_len: usize,
+        _num_frag_types: usize,
+        _num_modloss_types: usize,
+        _mask_modloss: bool,
+        device: Device,
+    ) -> Result<Self> {
+        let tensor_data = load_tensors_from_model(model_path.as_ref(), &device)?;
+        let mut varmap = candle_nn::VarMap::new();
+        create_var_map(&mut varmap, tensor_data, &device)?;
+        let var_store = candle_nn::VarBuilder::from_varmap(&varmap, DType::F32, &device);
+
+        let constants: ModelConstants =
+            parse_model_constants(constants_path.as_ref().to_str().unwrap())?;
+
+        let mod_to_feature = load_mod_to_feature(&constants)?;
+        let dropout = Dropout::new(0.1);
+
+        let rt_encoder = Encoder26aaModCnnTransformerAttnSum::from_varstore(
+            &var_store,
+            8,      // mod_hidden_dim
+            140,    // hidden_dim
+            256,    // ff_dim
+            4,      // num_heads
+            2,      // num_layers
+            100,    // max_len (set appropriately for your sequence length)
+            0.1,    // dropout_prob
+            vec!["rt_encoder.mod_nn.nn.weight"],
+            vec![
+                "rt_encoder.input_cnn.cnn_short.weight",
+                "rt_encoder.input_cnn.cnn_medium.weight",
+                "rt_encoder.input_cnn.cnn_long.weight",
+            ],
+            vec![
+                "rt_encoder.input_cnn.cnn_short.bias",
+                "rt_encoder.input_cnn.cnn_medium.bias",
+                "rt_encoder.input_cnn.cnn_long.bias",
+            ],
+            "rt_encoder.input_transformer",
+            vec!["rt_encoder.attn_sum.attn.0.weight"],
+            &device,
+        )?;
+        
+
+        let rt_decoder = DecoderLinear::from_varstore(
+            &var_store,
+            140,
+            1,
+            vec!["rt_decoder.nn.0.weight", "rt_decoder.nn.1.weight", "rt_decoder.nn.2.weight"],
+            vec!["rt_decoder.nn.0.bias", "rt_decoder.nn.2.bias"]
+        )?;
+
+        Ok(Self {
+            var_store,
+            varmap,
+            constants,
+            device,
+            mod_to_feature,
+            dropout,
+            rt_encoder,
+            rt_decoder,
+            is_training: true,
+        })
+    }
+
+    fn forward(&self, xs: &Tensor) -> Result<Tensor, candle_core::Error> {
+        let aa_indices_out = xs.i((.., .., 0))?;
+        let mod_x_out = xs.i((.., .., 1..1 + MOD_FEATURE_SIZE))?;
+        log::trace!("[RTCNNTFModel] aa_indices_out: {:?}, mod_x_out: {:?}", aa_indices_out, mod_x_out);
+        let x = self.rt_encoder.forward(&aa_indices_out, &mod_x_out)?;
+        log::trace!("[RTCNNTFModel] x.shape after rt_encoder: {:?}", x.shape());
+        let x = self.dropout.forward(&x, self.is_training)?;
+        log::trace!("[RTCNNTFModel] x.shape after dropout: {:?}", x.shape());
+        let x = self.rt_decoder.forward(&x)?;
+        log::trace!("[RTCNNTFModel] x.shape after rt_decoder: {:?}", x.shape());
+        Ok(x.squeeze(1)?)
+    }
+
+    /// Set model to evaluation mode for inference
+    /// This disables dropout and other training-specific layers.
+    fn set_evaluation_mode(&mut self) {
+        // println!("Setting evaluation mode");
+        self.is_training = false;
+    }
+
+    /// Set model to training mode for training
+    /// This enables dropout and other training-specific layers.
+    fn set_training_mode(&mut self) {
+        self.is_training = true;
+    }
+
+    fn get_property_type(&self) -> String {
+        self.property_type().clone().as_str().to_string()
+    }
+
+    fn get_model_arch(&self) -> String {
+        self.model_arch().to_string()
+    }
+
+    fn get_device(&self) -> &Device {
+        &self.device
+    }
+
+    fn get_mod_element_count(&self) -> usize {
+        self.constants.mod_elements.len()
+    }
+
+    fn get_mod_to_feature(&self) -> &HashMap<String, Vec<f32>> {
+        &self.mod_to_feature
+    }
+
+    fn get_min_pred_intensity(&self) -> f32 {
+        unimplemented!("Method not implemented for architecture: {}", self.model_arch())
+    }
+
+    fn get_mut_varmap(&mut self) -> &mut VarMap {
+        &mut self.varmap
+    }
+
+    /// Print a summary of the model's constants.
+    fn print_summary(&self) {
+        println!("RTModel Summary:");
+        println!("AA Embedding Size: {}", self.constants.aa_embedding_size.unwrap());
+        println!("Charge Factor: {:?}", self.constants.charge_factor);
+        println!("Instruments: {:?}", self.constants.instruments);
+        println!("Max Instrument Num: {}", self.constants.max_instrument_num);
+        println!("Mod Elements: {:?}", self.constants.mod_elements);
+        println!("NCE Factor: {:?}", self.constants.nce_factor);
+    }
+
+    /// Print the model's weights.
+    fn print_weights(&self) {
+        todo!("Implement print_weights for RTCNNTFModel");
+    }
+
+
+}
+
+// Module Trait Implementation
+
+// impl Module for RTCNNLSTMModel {
+//     fn forward(&self, input: &Tensor) -> Result<Tensor, candle_core::Error> {
+//         ModelInterface::forward(self, input)
+//     }
+// }
+
+
+#[cfg(test)]
+mod tests {
+    use crate::models::model_interface::ModelInterface;
+    use crate::models::rt_cnn_lstm_model::RTCNNLSTMModel;
+    use candle_core::Device;
+    use std::path::PathBuf;
+
+    use super::*;
+
+    #[test]
+    fn test_parse_model_constants() {
+        let path = "data/models/alphapeptdeep/generic/rt.pth.model_const.yaml";
+        let result = parse_model_constants(path);
+        assert!(result.is_ok());
+        let constants = result.unwrap();
+        assert_eq!(constants.aa_embedding_size.unwrap(), 27);
+        assert_eq!(constants.charge_factor, Some(0.1));
+        assert_eq!(constants.instruments.len(), 4);
+        assert_eq!(constants.max_instrument_num, 8);
+        assert_eq!(constants.mod_elements.len(), 109);
+        assert_eq!(constants.nce_factor, Some(0.01));
+    }
+
+    #[test]
+    fn test_encode_peptides() {
+        let model_path = PathBuf::from("data/models/alphapeptdeep/generic/rt.pth");
+        let constants_path =
+            PathBuf::from("data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
+        let device = Device::Cpu;
+        let model = RTCNNLSTMModel::new(&model_path, &constants_path, 0, 8, 4, true, device).unwrap(); 
+
+        let peptide_sequences = "AGHCEWQMKYR";
+        let mods = "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M";
+        let mod_sites = "0;4;8";
+        // let charge = Some(2);
+        // let nce = Some(20);
+        // let instrument = Some("QE");
+
+        let result =
+            model.encode_peptide(&peptide_sequences, mods, mod_sites, None, None, None);
+
+        println!("{:?}", result);
+
+        // assert!(result.is_ok());
+        // let encoded_peptides = result.unwrap();
+        // assert_eq!(encoded_peptides.shape().dims2().unwrap(), (1, 27 + 109 + 1 + 1 + 1));
+    }
+
+    #[test]
+    fn test_encode_peptides_batch() {
+
+        let model_path = PathBuf::from("data/models/alphapeptdeep/generic/rt.pth");
+        let constants_path = PathBuf::from("data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
+        let device = Device::Cpu;
+
+        let model = RTCNNLSTMModel::new(&model_path, &constants_path, 0, 8, 4, true, device.clone()).unwrap();
+
+        // Batched input
+        let peptide_sequences = vec![
+            "ACDEFGHIK".to_string(),
+            "AGHCEWQMKYR".to_string(),
+        ];
+        let mods = vec![
+            "Carbamidomethyl@C".to_string(),
+            "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M".to_string(),
+        ];
+        let mod_sites = vec![
+            "1".to_string(),
+            "0;4;8".to_string(),
+        ];
+
+        println!("Peptides: {:?}", peptide_sequences);
+        println!("Mods: {:?}", mods);
+        println!("Mod sites: {:?}", mod_sites);
+
+
+        let result = model.encode_peptides(
+            &peptide_sequences,
+            &mods,
+            &mod_sites,
+            None,
+            None,
+            None,
+        );
+
+        assert!(result.is_ok());
+        let tensor = result.unwrap();
+        println!("Batched encoded tensor shape: {:?}", tensor.shape());
+
+        let (batch, seq_len, feat_dim) = tensor.shape().dims3().unwrap();
+        assert_eq!(batch, 2); // two peptides
+        assert!(seq_len >= 11); // padded to max length
+        assert!(feat_dim > 1); // includes aa + mod features
+    }
+
+
+    
+    
+}
diff --git a/crates/redeem-properties/src/models/rt_model.rs b/crates/redeem-properties/src/models/rt_model.rs
index d6cc501..dd9bcab 100644
--- a/crates/redeem-properties/src/models/rt_model.rs
+++ b/crates/redeem-properties/src/models/rt_model.rs
@@ -12,11 +12,11 @@ use crate::utils::peptdeep_utils::ModificationMap;
 // Enum for different types of retention time models
 pub enum RTModelArch {
     RTCNNLSTM,
-    // Add other architectures here as needed
+    RTCNNTF
 }
 
 // Constants for different types of retention time models
-pub const RTMODEL_ARCHS: &[&str] = &["rt_cnn_lstm"];
+pub const RTMODEL_ARCHS: &[&str] = &["rt_cnn_lstm", "rt_cnn_tf"];
 
 // A wrapper struct for RT models
 pub struct RTModelWrapper {
diff --git a/crates/redeem-properties/src/utils/peptdeep_utils.rs b/crates/redeem-properties/src/utils/peptdeep_utils.rs
index 2b2d6f1..8b274b5 100644
--- a/crates/redeem-properties/src/utils/peptdeep_utils.rs
+++ b/crates/redeem-properties/src/utils/peptdeep_utils.rs
@@ -113,6 +113,36 @@ struct ModFeature {
     // Add other fields if needed
 }
 
+impl Default for ModelConstants {
+    fn default() -> Self {
+        Self {
+            aa_embedding_size: Some(27),
+            charge_factor: Some(0.1),
+            instruments: vec![
+                "QE".into(),
+                "Lumos".into(),
+                "timsTOF".into(),
+                "SciexTOF".into(),
+            ],
+            max_instrument_num: 8,
+            mod_elements: vec![
+                "C", "H", "N", "O", "P", "S", "B", "F", "I", "K", "U", "V", "W", "X", "Y", "Ac",
+                "Ag", "Al", "Am", "Ar", "As", "At", "Au", "Ba", "Be", "Bi", "Bk", "Br", "Ca", "Cd",
+                "Ce", "Cf", "Cl", "Cm", "Co", "Cr", "Cs", "Cu", "Dy", "Er", "Es", "Eu", "Fe", "Fm",
+                "Fr", "Ga", "Gd", "Ge", "He", "Hf", "Hg", "Ho", "In", "Ir", "Kr", "La", "Li", "Lr",
+                "Lu", "Md", "Mg", "Mn", "Mo", "Na", "Nb", "Nd", "Ne", "Ni", "No", "Np", "Os", "Pa",
+                "Pb", "Pd", "Pm", "Po", "Pr", "Pt", "Pu", "Ra", "Rb", "Re", "Rh", "Rn", "Ru", "Sb",
+                "Sc", "Se", "Si", "Sm", "Sn", "Sr", "Ta", "Tb", "Tc", "Te", "Th", "Ti", "Tl", "Tm",
+                "Xe", "Yb", "Zn", "Zr", "2H", "13C", "15N", "18O", "?"
+            ]
+            .into_iter()
+            .map(String::from)
+            .collect(),
+            nce_factor: Some(0.01),
+        }
+    }
+}
+
 /// Parse the model constants from a YAML file.
 pub fn parse_model_constants(path: &str) -> Result<ModelConstants> {
     let f = std::fs::File::open(path).map_err(|e| Error::msg(e.to_string()))?;
diff --git a/crates/redeem-properties/src/utils/utils.rs b/crates/redeem-properties/src/utils/utils.rs
index e1c105e..34572a6 100644
--- a/crates/redeem-properties/src/utils/utils.rs
+++ b/crates/redeem-properties/src/utils/utils.rs
@@ -1,6 +1,64 @@
 use candle_core::Device;
 use candle_core::utils::{cuda_is_available, metal_is_available};
 use anyhow::{Result, anyhow};
+use std::f64::consts::PI;
+
+pub trait LRScheduler {
+    /// Update the learning rate based on the current step
+    fn step(&mut self);
+    
+    /// Get the current learning rate
+    fn get_last_lr(&self) -> f64;
+}
+
+pub struct CosineWithWarmup {
+    initial_lr: f64,
+    current_step: usize,
+    num_warmup_steps: usize,
+    num_training_steps: usize,
+    num_cycles: f64,
+}
+
+impl CosineWithWarmup {
+    pub fn new(
+        initial_lr: f64,
+        num_warmup_steps: usize,
+        num_training_steps: usize,
+        num_cycles: f64,
+    ) -> Self {
+        Self {
+            initial_lr,
+            current_step: 0,
+            num_warmup_steps: num_warmup_steps,
+            num_training_steps,
+            num_cycles,
+        }
+    }
+
+    fn get_lr(&self) -> f64 {
+        if self.current_step < self.num_warmup_steps {
+            // Linear warmup
+            return self.initial_lr * (self.current_step as f64) / (self.num_warmup_steps as f64);
+        }
+
+        let progress = (self.current_step - self.num_warmup_steps) as f64 
+            / (self.num_training_steps - self.num_warmup_steps) as f64;
+        
+        // Cosine decay
+        let cosine_decay = 0.5 * (1.0 + (PI * self.num_cycles * 2.0 * progress).cos());
+        self.initial_lr * cosine_decay.max(1e-10)
+    }
+}
+
+impl LRScheduler for CosineWithWarmup {
+    fn step(&mut self) {
+        self.current_step += 1;
+    }
+    
+    fn get_last_lr(&self) -> f64 {
+        self.get_lr()
+    }
+}
 
 /// Converts a device string to a Candle Device.
 ///

From 50c4a070b8f61a2189a70d1943cad6e69d0010fa Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Fri, 9 May 2025 13:29:20 -0400
Subject: [PATCH 17/75] feat: Add new modules for training and loading data in
 redeem-cli

---
 Cargo.toml                                    |   2 +-
 crates/redeem-cli/Cargo.toml                  |  27 ++++
 crates/redeem-cli/src/lib.rs                  |   1 +
 crates/redeem-cli/src/main.rs                 | 138 ++++++++++++++++++
 crates/redeem-cli/src/properties/load_data.rs |  69 +++++++++
 crates/redeem-cli/src/properties/mod.rs       |   2 +
 .../redeem-cli/src/properties/train/input.rs  |  88 +++++++++++
 crates/redeem-cli/src/properties/train/mod.rs |   2 +
 .../src/properties/train/trainer.rs           |  60 ++++++++
 9 files changed, 388 insertions(+), 1 deletion(-)
 create mode 100644 crates/redeem-cli/Cargo.toml
 create mode 100644 crates/redeem-cli/src/lib.rs
 create mode 100644 crates/redeem-cli/src/main.rs
 create mode 100644 crates/redeem-cli/src/properties/load_data.rs
 create mode 100644 crates/redeem-cli/src/properties/mod.rs
 create mode 100644 crates/redeem-cli/src/properties/train/input.rs
 create mode 100644 crates/redeem-cli/src/properties/train/mod.rs
 create mode 100644 crates/redeem-cli/src/properties/train/trainer.rs

diff --git a/Cargo.toml b/Cargo.toml
index c196489..8539475 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,5 +1,5 @@
 [workspace]
-members = [ "crates/redeem-classifiers",
+members = [ "crates/redeem-classifiers", "crates/redeem-cli",
     "crates/redeem-properties"
 ]
 
diff --git a/crates/redeem-cli/Cargo.toml b/crates/redeem-cli/Cargo.toml
new file mode 100644
index 0000000..f75a161
--- /dev/null
+++ b/crates/redeem-cli/Cargo.toml
@@ -0,0 +1,27 @@
+[package]
+name = "redeem-cli"
+version = "0.1.0"
+edition = "2024"
+
+[[bin]]
+name = "redeem"
+path = "src/main.rs"
+
+[dependencies]
+redeem-properties = { path = "../redeem-properties" }
+env_logger = "0.11.8"
+log = "0.4"
+clap = { version="4.0", features = ["cargo", "unicode"] }
+anyhow = "1.0"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+csv = "1.1"
+
+[dependencies.candle-core]
+version = "0.8.4"
+default-features = false
+features = []
+
+[features]
+default = []
+cuda = ["candle-core/cuda"]
\ No newline at end of file
diff --git a/crates/redeem-cli/src/lib.rs b/crates/redeem-cli/src/lib.rs
new file mode 100644
index 0000000..76ebb50
--- /dev/null
+++ b/crates/redeem-cli/src/lib.rs
@@ -0,0 +1 @@
+pub mod properties;
\ No newline at end of file
diff --git a/crates/redeem-cli/src/main.rs b/crates/redeem-cli/src/main.rs
new file mode 100644
index 0000000..a6f8874
--- /dev/null
+++ b/crates/redeem-cli/src/main.rs
@@ -0,0 +1,138 @@
+use clap::{Arg, Command, ArgMatches, ValueHint};
+use log::LevelFilter;
+use std::path::PathBuf;
+use anyhow::Result;
+
+use redeem_cli::properties::train::input::{self, PropertyTrainConfig};
+use redeem_cli::properties::train::trainer;
+
+fn main() -> Result<()> {
+    env_logger::Builder::default()
+        .filter_level(LevelFilter::Error)
+        .parse_env(env_logger::Env::default().filter_or("REDEEM_LOG", "error,redeem=info"))
+        .init();
+
+    let matches = Command::new("redeem")
+        .version(clap::crate_version!())
+        .author("Justin Sing <justincsing@gmail.com>")
+        .about("\u{1F9EA} ReDeeM CLI - Modular Deep Learning Tools for Proteomics")
+        .subcommand_required(true)
+        .arg_required_else_help(true)
+        .subcommand(
+            Command::new("properties")
+                .about("Train or run peptide property prediction models")
+                .subcommand(
+                    Command::new("train")
+                        .about("Train a new property prediction model from scratch")
+                        .arg(
+                            Arg::new("config")
+                                .help("Path to training configuration file")
+                                .required(true)
+                                .value_parser(clap::value_parser!(PathBuf))
+                                .value_hint(ValueHint::FilePath),
+                        )
+                        .arg(
+                            Arg::new("train_data")
+                                .short('d')
+                                .long("train_data")
+                                .value_parser(clap::builder::NonEmptyStringValueParser::new())
+                                .help(
+                                    "Path to training data. Overrides the training data file \
+                                     specified in the configuration file.",
+                                )
+                                .value_hint(ValueHint::FilePath),
+                        )
+                        .arg(
+                            Arg::new("validation_data")
+                                .short('v')
+                                .long("validation_data")
+                                .value_parser(clap::builder::NonEmptyStringValueParser::new())
+                                .help(
+                                    "Path to validation data. Overrides the validation data file \
+                                     specified in the configuration file.",
+                                )
+                                .value_hint(ValueHint::FilePath),
+                        )
+                        .arg(
+                            Arg::new("output_file")
+                                .short('o')
+                                .long("output_file")
+                                .value_parser(clap::builder::NonEmptyStringValueParser::new())
+                                .help(
+                                    "File path that the safetensors trained model will be written to. \
+                                     Overrides the directory specified in the configuration file.",
+                                )
+                                .value_hint(ValueHint::FilePath),
+                        )
+                        .arg(
+                            Arg::new("model_arch")
+                                .short('m')
+                                .long("model_arch")
+                                .help(
+                                    "Model architecture to train. \
+                                     Overrides the model architecture specified in the configuration file.",
+                                )
+                                .value_parser([
+                                    "rt_cnn_lstm",
+                                    "rt_cnn_tf",
+                                    "ms2_bert",
+                                    "ccs_cnn_lstm",
+                                ])
+                                .required(false)
+                        )                        
+                        .help_template(
+                            "{usage-heading} {usage}\n\n\
+                             {about-with-newline}\n\
+                             Written by {author-with-newline}Version {version}\n\n\
+                             {all-args}{after-help}",
+                        ),
+                ),
+        )
+        .subcommand(
+            Command::new("classifiers")
+                .about("Run classification tools such as rescoring")
+                .subcommand(
+                    Command::new("rescore")
+                        .about("Run rescoring tool with specified configuration")
+                        .arg(
+                            Arg::new("config")
+                                .help("Path to classifier configuration file")
+                                .required(true)
+                                .value_parser(clap::value_parser!(PathBuf))
+                                .value_hint(ValueHint::FilePath),
+                        ),
+                ),
+        )
+        .get_matches();
+
+    match matches.subcommand() {
+        Some(("properties", sub_m)) => handle_properties(sub_m),
+        Some(("classifiers", sub_m)) => handle_classifiers(sub_m),
+        _ => unreachable!("Subcommand is required by CLI configuration"),
+    }
+}
+
+fn handle_properties(matches: &ArgMatches) -> Result<()> {
+    match matches.subcommand() {
+        Some(("train", train_matches)) => {
+            let config_path: &PathBuf = train_matches.get_one("config").unwrap();
+            println!("[ReDeeM::Properties] Training from config: {:?}", config_path);
+            let params: PropertyTrainConfig = input::PropertyTrainConfig::from_arguments(config_path, train_matches)?;
+            let _ = trainer::run_training(&params);
+            Ok(())
+        }
+        _ => unreachable!(),
+    }
+}
+
+fn handle_classifiers(matches: &ArgMatches) -> Result<()> {
+    match matches.subcommand() {
+        Some(("rescore", rescore_matches)) => {
+            let config_path: &PathBuf = rescore_matches.get_one("config").unwrap();
+            println!("[ReDeeM::Classifiers] Rescoring using config: {:?}", config_path);
+            // Call your classifier logic using config_path
+            Ok(())
+        }
+        _ => unreachable!(),
+    }
+}
diff --git a/crates/redeem-cli/src/properties/load_data.rs b/crates/redeem-cli/src/properties/load_data.rs
new file mode 100644
index 0000000..7be1cfb
--- /dev/null
+++ b/crates/redeem-cli/src/properties/load_data.rs
@@ -0,0 +1,69 @@
+use std::fs::File;
+use std::path::Path;
+use std::io::BufReader;
+use anyhow::{Result, Context};
+use csv::ReaderBuilder;
+use redeem_properties::utils::data_handling::PeptideData;
+
+/// Load peptide training data from a CSV or TSV file.
+///
+/// Automatically determines the delimiter and supports RT models.
+/// Currently expects columns: "sequence", "retention time" (others optional).
+///
+/// # Arguments
+/// * `path` - Path to the input CSV/TSV file
+///
+/// # Returns
+/// Vector of parsed `PeptideData` records
+pub fn load_peptide_data<P: AsRef<Path>>(path: P) -> Result<Vec<PeptideData>> {
+    let file = File::open(&path).with_context(|| format!("Failed to open file: {:?}", path.as_ref()))?;
+    let reader = BufReader::new(file);
+
+    let is_tsv = path.as_ref().extension().map(|e| e == "tsv").unwrap_or(false);
+    let delimiter = if is_tsv { b'\t' } else { b',' };
+
+    let mut rdr = ReaderBuilder::new()
+        .delimiter(delimiter)
+        .has_headers(true)
+        .from_reader(reader);
+
+    let headers = rdr.headers()?.clone();
+
+    let mut peptides = Vec::new();
+    for result in rdr.records() {
+        let record = result?;
+
+        let sequence = record
+            .get(headers.iter().position(|h| h == "sequence").unwrap_or(2))
+            .unwrap_or("")
+            .to_string();
+
+        let retention_time = record
+            .get(headers.iter().position(|h| h == "retention time").unwrap_or(3))
+            .and_then(|s| s.parse::<f32>().ok());
+
+        let charge = record
+            .get(headers.iter().position(|h| h == "charge").unwrap_or(usize::MAX))
+            .and_then(|s| s.parse::<i32>().ok());
+
+        let nce = record
+            .get(headers.iter().position(|h| h == "nce").unwrap_or(usize::MAX))
+            .and_then(|s| s.parse::<i32>().ok());
+
+        let instrument = record
+            .get(headers.iter().position(|h| h == "instrument").unwrap_or(usize::MAX))
+            .map(|s| s.to_string());
+
+        peptides.push(PeptideData::new(
+            &sequence,
+            charge,
+            nce,
+            instrument.as_deref(),
+            retention_time,
+            None,
+            None,
+        ));
+    }
+
+    Ok(peptides)
+}
diff --git a/crates/redeem-cli/src/properties/mod.rs b/crates/redeem-cli/src/properties/mod.rs
new file mode 100644
index 0000000..eb69af4
--- /dev/null
+++ b/crates/redeem-cli/src/properties/mod.rs
@@ -0,0 +1,2 @@
+pub mod train;
+pub mod load_data;
\ No newline at end of file
diff --git a/crates/redeem-cli/src/properties/train/input.rs b/crates/redeem-cli/src/properties/train/input.rs
new file mode 100644
index 0000000..0a9fe59
--- /dev/null
+++ b/crates/redeem-cli/src/properties/train/input.rs
@@ -0,0 +1,88 @@
+use serde::Deserialize;
+use std::fs;
+use std::path::PathBuf;
+use clap::ArgMatches;
+use anyhow::{Context, Result};
+
+#[derive(Debug, Deserialize, Clone)]
+pub struct PropertyTrainConfig {
+    pub train_data: String,
+    pub validation_data: Option<String>,
+    pub output_file: String,
+    pub model_arch: String,
+    pub device: String,
+    pub batch_size: usize,
+    pub learning_rate: f32,
+    pub epochs: usize,
+    pub instrument: String,
+    pub nce: i32,
+}
+
+impl Default for PropertyTrainConfig {
+    fn default() -> Self {
+        PropertyTrainConfig {
+            train_data: String::new(),
+            validation_data: None,
+            output_file: String::from("rt_cnn_tf.safetensors"),
+            model_arch: String::from("rt_cnn_tf"),
+            device: String::from("cpu"),
+            batch_size: 64,
+            learning_rate: 1e-3,
+            epochs: 10,
+            instrument: String::from("QE"),
+            nce: 20,
+        }
+    }
+}
+
+impl PropertyTrainConfig {
+    pub fn from_arguments(config_path: &PathBuf, matches: &ArgMatches) -> Result<Self> {
+        let config_json = fs::read_to_string(config_path)
+            .with_context(|| format!("Failed to read config file: {:?}", config_path))?;
+
+        let mut config: PropertyTrainConfig = serde_json::from_str(&config_json)
+            .unwrap_or_else(|_| PropertyTrainConfig::default());
+
+        // Apply CLI overrides
+        if let Some(train_data) = matches.get_one::<String>("train_data") {
+            validate_tsv_or_csv_file(train_data)?;
+            config.train_data = train_data.clone().to_string();
+        } else {
+            validate_tsv_or_csv_file(&config.train_data)?;
+        }
+
+        if let Some(validation_data) = matches.get_one::<String>("validation_data") {
+            validate_tsv_or_csv_file(validation_data)?;
+            config.validation_data = Some(validation_data.clone().to_string());
+        } else if let Some(val_data) = &config.validation_data {
+            validate_tsv_or_csv_file(val_data)?;
+        }
+
+        if let Some(output_file) = matches.get_one::<String>("output_file") {
+            config.output_file = output_file.clone();
+        }
+
+        if let Some(model_arch) = matches.get_one::<String>("model_arch") {
+            config.model_arch = model_arch.clone();
+        }
+
+        Ok(config)
+    }
+}
+
+
+pub fn validate_tsv_or_csv_file(path: &str) -> Result<()> {
+    let pb = PathBuf::from(path);
+
+    let ext = pb.extension().and_then(|s| s.to_str()).map(|s| s.to_lowercase());
+    match ext.as_deref() {
+        Some("tsv") | Some("csv") => {}
+        _ => anyhow::bail!("File must have a .tsv or .csv extension: {}", path),
+    }
+
+    if !pb.exists() {
+        anyhow::bail!("File does not exist: {}", path);
+    }
+
+    Ok(())
+}
diff --git a/crates/redeem-cli/src/properties/train/mod.rs b/crates/redeem-cli/src/properties/train/mod.rs
new file mode 100644
index 0000000..d60a05a
--- /dev/null
+++ b/crates/redeem-cli/src/properties/train/mod.rs
@@ -0,0 +1,2 @@
+pub mod input;
+pub mod trainer;
\ No newline at end of file
diff --git a/crates/redeem-cli/src/properties/train/trainer.rs b/crates/redeem-cli/src/properties/train/trainer.rs
new file mode 100644
index 0000000..ebd5f8b
--- /dev/null
+++ b/crates/redeem-cli/src/properties/train/trainer.rs
@@ -0,0 +1,60 @@
+use anyhow::{Context, Result};
+use input::PropertyTrainConfig;
+use load_data::load_peptide_data;
+use redeem_properties::models::model_interface::ModelInterface;
+use redeem_properties::models::{rt_cnn_lstm_model::RTCNNLSTMModel, rt_cnn_transformer_model::RTCNNTFModel};
+use redeem_properties::utils::data_handling::PeptideData;
+use redeem_properties::utils::peptdeep_utils::load_modifications;
+use redeem_properties::utils::utils::get_device;
+use std::path::PathBuf;
+use candle_core::Device;
+
+use crate::properties::load_data;
+
+use super::input;
+
+pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
+
+    // Load training data
+    let train_peptides: Vec<PeptideData> = load_peptide_data(&config.train_data)?;
+    println!("Loaded {} training peptides", train_peptides.len());
+
+    // Load validation data if specified
+    let val_peptides = if let Some(ref val_path) = config.validation_data {
+        Some(load_peptide_data(val_path).context("Failed to load validation data")?)
+    } else {
+        None
+    };
+
+    if let Some(ref val_data) = val_peptides {
+        println!("Loaded {} validation peptides", val_data.len());
+    } else {
+        println!("No validation data provided.");
+    }
+
+    // Dispatch model training based on architecture
+    let model_arch = config.model_arch.as_str();
+    let device = get_device(&config.device)?;
+
+    let mut model: Box<dyn ModelInterface + Send + Sync> = match model_arch {
+        "rt_cnn_lstm" => Box::new(RTCNNLSTMModel::new_untrained(device.clone())?),
+        "rt_cnn_tf" => Box::new(RTCNNTFModel::new_untrained(device.clone())?),
+        _ => return Err(anyhow::anyhow!("Unsupported model architecture: {}", model_arch)),
+    };
+
+    let modifications = load_modifications().context("Failed to load modifications")?;
+
+    model.train(
+        &train_peptides,
+        val_peptides.as_ref(),
+        modifications,
+        config.batch_size,
+        config.learning_rate as f64,
+        config.epochs,
+    )?;
+
+    model.save(&config.output_file)?;
+    println!("Model saved to: {}", config.output_file);
+
+    Ok(())
+}

From b5decf0e7db7752e6a233fabea134059b29126aa Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Fri, 9 May 2025 14:41:35 -0400
Subject: [PATCH 18/75] refactor: Add early stopping to property training

---
 .../redeem-cli/src/properties/train/input.rs  |   2 +
 .../src/properties/train/trainer.rs           |  11 +-
 .../src/models/model_interface.rs             | 115 +++++++++++-------
 3 files changed, 80 insertions(+), 48 deletions(-)

diff --git a/crates/redeem-cli/src/properties/train/input.rs b/crates/redeem-cli/src/properties/train/input.rs
index 0a9fe59..fb5d3f3 100644
--- a/crates/redeem-cli/src/properties/train/input.rs
+++ b/crates/redeem-cli/src/properties/train/input.rs
@@ -14,6 +14,7 @@ pub struct PropertyTrainConfig {
     pub batch_size: usize,
     pub learning_rate: f32,
     pub epochs: usize,
+    pub early_stopping_patience: usize,
     pub instrument: String,
     pub nce: i32,
 }
@@ -29,6 +30,7 @@ impl Default for PropertyTrainConfig {
             batch_size: 64,
             learning_rate: 1e-3,
             epochs: 10,
+            early_stopping_patience: 5,
             instrument: String::from("QE"),
             nce: 20,
         }
diff --git a/crates/redeem-cli/src/properties/train/trainer.rs b/crates/redeem-cli/src/properties/train/trainer.rs
index ebd5f8b..2d23994 100644
--- a/crates/redeem-cli/src/properties/train/trainer.rs
+++ b/crates/redeem-cli/src/properties/train/trainer.rs
@@ -17,7 +17,7 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
 
     // Load training data
     let train_peptides: Vec<PeptideData> = load_peptide_data(&config.train_data)?;
-    println!("Loaded {} training peptides", train_peptides.len());
+    log::info!("Loaded {} training peptides", train_peptides.len());
 
     // Load validation data if specified
     let val_peptides = if let Some(ref val_path) = config.validation_data {
@@ -27,9 +27,9 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
     };
 
     if let Some(ref val_data) = val_peptides {
-        println!("Loaded {} validation peptides", val_data.len());
+        log::info!("Loaded {} validation peptides", val_data.len());
     } else {
-        println!("No validation data provided.");
+        log::warn!("No validation data provided.");
     }
 
     // Dispatch model training based on architecture
@@ -44,6 +44,7 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
 
     let modifications = load_modifications().context("Failed to load modifications")?;
 
+    let start_time = std::time::Instant::now();
     model.train(
         &train_peptides,
         val_peptides.as_ref(),
@@ -51,10 +52,12 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
         config.batch_size,
         config.learning_rate as f64,
         config.epochs,
+        config.early_stopping_patience,
     )?;
+    log::info!("Training completed in {:?}", start_time.elapsed());
 
     model.save(&config.output_file)?;
-    println!("Model saved to: {}", config.output_file);
+    log::info!("Model saved to: {}", config.output_file);
 
     Ok(())
 }
diff --git a/crates/redeem-properties/src/models/model_interface.rs b/crates/redeem-properties/src/models/model_interface.rs
index 8e614ef..1817681 100644
--- a/crates/redeem-properties/src/models/model_interface.rs
+++ b/crates/redeem-properties/src/models/model_interface.rs
@@ -416,6 +416,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         batch_size: usize,
         learning_rate: f64,
         epochs: usize,
+        early_stopping_patience: usize,
     ) -> Result<()> {
         let num_batches = (training_data.len() + batch_size - 1) / batch_size;
 
@@ -433,6 +434,9 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         };
         let mut opt = candle_nn::AdamW::new(self.get_mut_varmap().all_vars(), params)?;
 
+        let mut best_val_loss = f32::INFINITY;
+        let mut epochs_without_improvement = 0;
+
         for epoch in 0..epochs {
             let progress = Progress::new(num_batches, &format!("[training] Epoch {}: ", epoch));
             let mut total_loss = 0.0;
@@ -488,54 +492,77 @@ pub trait ModelInterface: Send + Sync + ModelClone {
 
             // Optional validation evaluation
             if let Some(val_data) = validation_data {
-                let peptides: Vec<String> = val_data.iter().map(|p| remove_mass_shift(&p.sequence)).collect();
-                let mods: Vec<String> = val_data.iter().map(|p| get_modification_string(&p.sequence, &modifications)).collect();
-                let mod_sites: Vec<String> = val_data.iter().map(|p| get_modification_indices(&p.sequence)).collect();
-
-                let charges = val_data.iter().filter_map(|p| p.charge).collect::<Vec<_>>();
-                let charges = if charges.len() == val_data.len() { Some(charges) } else { None };
-
-                let nces = val_data.iter().filter_map(|p| p.nce).collect::<Vec<_>>();
-                let nces = if nces.len() == val_data.len() { Some(nces) } else { None };
-
-                let instruments = val_data.iter().filter_map(|p| p.instrument.clone()).collect::<Vec<_>>();
-                let instruments = if instruments.len() == val_data.len() { Some(instruments) } else { None };
-
-                let input_val = self.encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments)?;
+                let val_batches = (val_data.len() + batch_size - 1) / batch_size;
+                use rayon::prelude::*;
+
+                let total_val_loss: f32 = val_data
+                    .par_chunks(batch_size)
+                    .map(|batch_data| {
+                        let peptides: Vec<String> = batch_data.iter().map(|p| remove_mass_shift(&p.sequence)).collect();
+                        let mods: Vec<String> = batch_data.iter().map(|p| get_modification_string(&p.sequence, &modifications)).collect();
+                        let mod_sites: Vec<String> = batch_data.iter().map(|p| get_modification_indices(&p.sequence)).collect();
+
+                        let charges = batch_data.iter().filter_map(|p| p.charge).collect::<Vec<_>>();
+                        let charges = if charges.len() == batch_data.len() { Some(charges) } else { None };
+
+                        let nces = batch_data.iter().filter_map(|p| p.nce).collect::<Vec<_>>();
+                        let nces = if nces.len() == batch_data.len() { Some(nces) } else { None };
+
+                        let instruments = batch_data.iter().filter_map(|p| p.instrument.clone()).collect::<Vec<_>>();
+                        let instruments = if instruments.len() == batch_data.len() { Some(instruments) } else { None };
+
+                        let input_val = self.encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments);
+                        let input_val = match input_val {
+                            Ok(x) => x,
+                            Err(e) => return Err(e),
+                        };
+
+                        let val_targets = match self.property_type() {
+                            PropertyType::RT => PredictionResult::RTResult(
+                                batch_data.iter().map(|p| p.retention_time.unwrap_or_default()).collect(),
+                            ),
+                            PropertyType::CCS => PredictionResult::IMResult(
+                                batch_data.iter().map(|p| p.ion_mobility.unwrap_or_default()).collect(),
+                            ),
+                            PropertyType::MS2 => {
+                                return Err(anyhow::anyhow!("Validation not supported for MS2 yet"));
+                            }
+                        };
+
+                        let target_val = match val_targets {
+                            PredictionResult::RTResult(ref values) | PredictionResult::IMResult(ref values) => {
+                                Tensor::new(values.clone(), &self.get_device())?
+                            }
+                            PredictionResult::MS2Result(_) => unreachable!(),
+                        };
+
+                        let predicted = self.forward(&input_val)?;
+                        let val_loss = candle_nn::loss::mse(&predicted, &target_val)?;
+                        Ok(val_loss.to_vec0::<f32>()?)
+                    })
+                    .collect::<Result<Vec<f32>>>()?
+                    .into_iter()
+                    .sum();
+
+                let avg_val_loss = total_val_loss / val_batches as f32;
+                let avg_loss = total_loss / num_batches as f32;
 
-                let val_targets = match self.property_type() {
-                    PropertyType::RT => PredictionResult::RTResult(
-                        val_data.iter().map(|p| p.retention_time.unwrap_or_default()).collect(),
-                    ),
-                    PropertyType::CCS => PredictionResult::IMResult(
-                        val_data.iter().map(|p| p.ion_mobility.unwrap_or_default()).collect(),
-                    ),
-                    PropertyType::MS2 => {
-                        return Err(anyhow::anyhow!("Validation not supported for MS2 yet"));
-                    }
-                };
+                progress.update_description(&format!("Epoch {}: Avg. Train Loss: {:.4} | Avg. Val. Loss: {:.4}", epoch, avg_loss, avg_val_loss));
+                progress.finish();
 
-                let target_val = match val_targets {
-                    PredictionResult::RTResult(ref values) | PredictionResult::IMResult(ref values) => {
-                        Tensor::new(values.clone(), &self.get_device())?
+                if avg_val_loss < best_val_loss {
+                    best_val_loss = avg_val_loss;
+                    epochs_without_improvement = 0;
+                } else {
+                    epochs_without_improvement += 1;
+                    if epochs_without_improvement >= early_stopping_patience {
+                        info!("Early stopping triggered after {} epochs without validation loss improvement.", early_stopping_patience);
+                        break;
                     }
-                    PredictionResult::MS2Result(_) => unreachable!(),
-                };
-
-                let predicted = self.forward(&input_val)?;
-                let val_loss = candle_nn::loss::mse(&predicted, &target_val)?;
-                let val_loss_val = val_loss.to_vec0::<f32>()?;
-
-                info!("[validation] Epoch {}: Validation Loss: {:.4}", epoch, val_loss_val);
-
-                let avg_loss = total_loss / num_batches as f32;
-                progress.update_description(&format!("[training] Epoch {}: Avg. Loss: {:.4} | Val. Loss: {:.4}", epoch, avg_loss, val_loss_val));
-                progress.finish();
-            }
-            else 
-            {
+                }
+            } else {
                 let avg_loss = total_loss / num_batches as f32;
-                progress.update_description(&format!("[training] Epoch {}: Avg. Loss: {:.4}", epoch, avg_loss));
+                progress.update_description(&format!("Epoch {}: Avg. Train Loss: {:.4}", epoch, avg_loss));
                 progress.finish();
             }
         }

From 37081bafe7582cdc0c6109652d9f9b78b78500c8 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Fri, 9 May 2025 14:51:19 -0400
Subject: [PATCH 19/75] feat: Add Dockerfile for CUDA-based application
 containerization

---
 Dockerfile | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 Dockerfile

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..016d191
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,49 @@
+# Use the official NVIDIA CUDA base image with CUDA 12.2
+FROM nvidia/cuda:12.2.2-devel-ubuntu22.04
+
+# Install system dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    build-essential \
+    ca-certificates \
+    curl \
+    libssl-dev \
+    pkg-config \
+    clang \
+    libstdc++-12-dev \
+    cmake \  
+    git \    
+    && \
+    update-ca-certificates && \
+    rm -rf /var/lib/apt/lists/*
+
+# Install Rust using rustup
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+ENV PATH="/root/.cargo/bin:${PATH}"
+
+# Set environment variables for CUDA
+ENV CUDA_HOME=/usr/local/cuda
+ENV PATH=${CUDA_HOME}/bin:${PATH}
+ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
+
+# Set the CUDA compute capability for the build process
+# Tesla V100 has compute capability 7.0
+ENV CUDA_COMPUTE_CAP=70
+
+# Set the working directory
+WORKDIR /app
+
+# Copy the source code into the container
+COPY . .
+
+# Update specific dependencies (if needed)
+RUN cargo update -p redeem-classifiers
+
+# Build the application with CUDA support
+RUN cargo build --release --bin redeem --features cuda 
+
+# Copy the binary into the PATH
+RUN cp target/release/redeem /app/redeem
+
+# Set the PATH environment variable
+ENV PATH="/app:${PATH}"
\ No newline at end of file

From 1ceb7e69b485af720840570577c313859593c52a Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Fri, 9 May 2025 14:54:56 -0400
Subject: [PATCH 20/75] refactor: Remove unnecessary cargo update command in
 Dockerfile

---
 Dockerfile | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 016d191..4679edb 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -36,9 +36,6 @@ WORKDIR /app
 # Copy the source code into the container
 COPY . .
 
-# Update specific dependencies (if needed)
-RUN cargo update -p redeem-classifiers
-
 # Build the application with CUDA support
 RUN cargo build --release --bin redeem --features cuda 
 

From 480d6c3b253ae9db0172d66222f99f78353d5977 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Fri, 9 May 2025 15:00:57 -0400
Subject: [PATCH 21/75] refactor: Update dependencies and descriptions in
 Cargo.toml files

---
 crates/redeem-classifiers/Cargo.toml | 2 +-
 crates/redeem-cli/Cargo.toml         | 2 ++
 crates/redeem-properties/Cargo.toml  | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/crates/redeem-classifiers/Cargo.toml b/crates/redeem-classifiers/Cargo.toml
index f21af27..3660bf5 100644
--- a/crates/redeem-classifiers/Cargo.toml
+++ b/crates/redeem-classifiers/Cargo.toml
@@ -6,7 +6,7 @@ edition = "2021"
 rust-version = "1.76"
 description = "A repository of deep-learning models for mass spectrometry data"
 readme = "README.md"
-license = "MIT"
+
 
 [dependencies]
 anyhow = "1.0"
diff --git a/crates/redeem-cli/Cargo.toml b/crates/redeem-cli/Cargo.toml
index f75a161..8e2fbd7 100644
--- a/crates/redeem-cli/Cargo.toml
+++ b/crates/redeem-cli/Cargo.toml
@@ -2,6 +2,8 @@
 name = "redeem-cli"
 version = "0.1.0"
 edition = "2024"
+authors = ["Justin Sing <justincsing@gmail.com>"]
+description = "A repository of deep-learning models for mass spectrometry data"
 
 [[bin]]
 name = "redeem"
diff --git a/crates/redeem-properties/Cargo.toml b/crates/redeem-properties/Cargo.toml
index e39b7c5..56ef10b 100644
--- a/crates/redeem-properties/Cargo.toml
+++ b/crates/redeem-properties/Cargo.toml
@@ -6,7 +6,7 @@ edition = "2021"
 rust-version = "1.76"
 description = "A repository of deep-learning models for mass spectrometry data"
 readme = "README.md"
-license = "MIT"
+
 
 [dependencies]
 anyhow = "1.0"

From 4b7f92c580bbe485c848a25e3139cd837f09cd90 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Sat, 10 May 2025 21:44:03 -0400
Subject: [PATCH 22/75] refactor: Update redeem-properties crate models and add
 new modules for training and loading data in redeem-cli

---
 .../examples/alphapeptdeep_ccs_cnn_lstm.rs    |   2 +-
 .../examples/alphapeptdeep_ms2_bert.rs        |   2 +-
 .../examples/alphapeptdeep_rt_cnn_lstm.rs     |   2 +-
 .../src/building_blocks/bilstm.rs             |  10 +-
 .../src/building_blocks/building_blocks.rs    |  80 ++++--
 .../src/building_blocks/nn.rs                 |  93 ++++---
 .../src/models/ccs_cnn_lstm_model.rs          |  14 +-
 .../redeem-properties/src/models/ccs_model.rs |   2 +-
 .../src/models/model_interface.rs             | 244 +++++++++++++-----
 .../src/models/ms2_bert_model.rs              |  14 +-
 .../redeem-properties/src/models/ms2_model.rs |   2 +-
 .../src/models/rt_cnn_lstm_model.rs           |  25 +-
 .../src/models/rt_cnn_transformer_model.rs    |  48 +++-
 .../redeem-properties/src/models/rt_model.rs  |  17 +-
 .../src/utils/data_handling.rs                |   2 +-
 crates/redeem-properties/src/utils/logging.rs |   2 +-
 .../src/utils/peptdeep_utils.rs               |   1 +
 crates/redeem-properties/src/utils/utils.rs   |  25 +-
 18 files changed, 421 insertions(+), 164 deletions(-)

diff --git a/crates/redeem-properties/examples/alphapeptdeep_ccs_cnn_lstm.rs b/crates/redeem-properties/examples/alphapeptdeep_ccs_cnn_lstm.rs
index 874a82b..63973da 100644
--- a/crates/redeem-properties/examples/alphapeptdeep_ccs_cnn_lstm.rs
+++ b/crates/redeem-properties/examples/alphapeptdeep_ccs_cnn_lstm.rs
@@ -91,7 +91,7 @@ fn main() -> Result<()> {
 
     println!("Device: {:?}", device);
 
-    let mut model = CCSCNNLSTMModel::new(&model_path, &constants_path, 0, 8, 4, true, device)
+    let mut model = CCSCNNLSTMModel::new(&model_path, Some(&constants_path), 0, 8, 4, true, device)
         .context("Failed to create CCSCNNLSTMModel")?;
 
     // Define training data
diff --git a/crates/redeem-properties/examples/alphapeptdeep_ms2_bert.rs b/crates/redeem-properties/examples/alphapeptdeep_ms2_bert.rs
index 9e177de..b3ee4b1 100644
--- a/crates/redeem-properties/examples/alphapeptdeep_ms2_bert.rs
+++ b/crates/redeem-properties/examples/alphapeptdeep_ms2_bert.rs
@@ -139,7 +139,7 @@ fn main() -> Result<()> {
 
     println!("Device: {:?}", device);
 
-    let mut model = MS2BertModel::new(&model_path, &constants_path, 0, 8, 4, true, device)
+    let mut model = MS2BertModel::new(&model_path, Some(&constants_path), 0, 8, 4, true, device)
         .context("Failed to create MS2BertModel")?;
 
     // Open the CSV file
diff --git a/crates/redeem-properties/examples/alphapeptdeep_rt_cnn_lstm.rs b/crates/redeem-properties/examples/alphapeptdeep_rt_cnn_lstm.rs
index aeeb6f3..7408473 100644
--- a/crates/redeem-properties/examples/alphapeptdeep_rt_cnn_lstm.rs
+++ b/crates/redeem-properties/examples/alphapeptdeep_rt_cnn_lstm.rs
@@ -113,7 +113,7 @@ fn main() -> Result<()> {
 
     println!("Device: {:?}", device);
 
-    let mut model = RTCNNLSTMModel::new(&model_path, &constants_path, 0, 8, 4, true, device)
+    let mut model = RTCNNLSTMModel::new(&model_path, Some(&constants_path), 0, 8, 4, true, device)
         .context("Failed to create RTCNNLSTMModel")?;
 
     // Define training data
diff --git a/crates/redeem-properties/src/building_blocks/bilstm.rs b/crates/redeem-properties/src/building_blocks/bilstm.rs
index b6dd1aa..68d5204 100644
--- a/crates/redeem-properties/src/building_blocks/bilstm.rs
+++ b/crates/redeem-properties/src/building_blocks/bilstm.rs
@@ -67,7 +67,7 @@ impl BidirectionalLSTM {
         )?;
         let last_fw_h = out_fw_states.last().unwrap().h().clone();
         let last_fw_c = out_fw_states.last().unwrap().c().clone();
-        println!("BidirectionLSTM::apply_bidirectional_layer - Forward LSTM time: {:?}", start_time.elapsed());
+        log::trace!("BidirectionLSTM::apply_bidirectional_layer - Forward LSTM time: {:?}", start_time.elapsed());
     
         // Reverse sequence
         let start_time = std::time::Instant::now();
@@ -78,7 +78,7 @@ impl BidirectionalLSTM {
                 .collect::<Result<Vec<_>>>()?,
             1,
         )?;
-        println!("BidirectionLSTM::apply_bidirectional_layer - Reverse sequence time: {:?}", start_time.elapsed());
+        log::trace!("BidirectionLSTM::apply_bidirectional_layer - Reverse sequence time: {:?}", start_time.elapsed());
             
         // Initial states for backward
         let h0_backward = h0.i(1)?;
@@ -93,7 +93,7 @@ impl BidirectionalLSTM {
         )?;
         let last_bw_h = out_bw_states.last().unwrap().h().clone();
         let last_bw_c = out_bw_states.last().unwrap().c().clone();
-        println!("BidirectionLSTM::apply_bidirectional_layer - Backward LSTM time: {:?}", start_time.elapsed());
+        log::trace!("BidirectionLSTM::apply_bidirectional_layer - Backward LSTM time: {:?}", start_time.elapsed());
     
         // Combine hidden and cell states
         let hn = Tensor::stack(&[last_fw_h.clone(), last_bw_h.clone()], 0)?;
@@ -118,10 +118,10 @@ impl BidirectionalLSTM {
 
         let start_time = std::time::Instant::now();
         let (out1, (hn1, cn1)) = self.apply_bidirectional_layer(xs, &self.forward_lstm1, &self.backward_lstm1, &h0_1, &c0_1)?;
-        println!("BidirectionLSTM::forward_with_state - Layer 1 time: {:?}", start_time.elapsed());
+        log::trace!("BidirectionLSTM::forward_with_state - Layer 1 time: {:?}", start_time.elapsed());
         let start_time = std::time::Instant::now();
         let (out2, (hn2, cn2)) = self.apply_bidirectional_layer(&out1, &self.forward_lstm2, &self.backward_lstm2, &h0_2, &c0_2)?;
-        println!("BidirectionLSTM::forward_with_state - Layer 2 time: {:?}", start_time.elapsed());
+        log::trace!("BidirectionLSTM::forward_with_state - Layer 2 time: {:?}", start_time.elapsed());
 
         let hn = Tensor::cat(&[hn1, hn2], 0)?;
         let cn = Tensor::cat(&[cn1, cn2], 0)?;
diff --git a/crates/redeem-properties/src/building_blocks/building_blocks.rs b/crates/redeem-properties/src/building_blocks/building_blocks.rs
index 2c3fc28..edd7250 100644
--- a/crates/redeem-properties/src/building_blocks/building_blocks.rs
+++ b/crates/redeem-properties/src/building_blocks/building_blocks.rs
@@ -11,6 +11,7 @@ use crate::building_blocks::bilstm::BidirectionalLSTM;
 use crate::building_blocks::featurize::aa_one_hot;
 use crate::building_blocks::nn::{BertEncoderModule, ModuleList};
 use crate::building_blocks::sequential::{seq, Sequential};
+use crate::utils::utils::get_tensor_stats;
 
 use super::nn::TransformerEncoder;
 
@@ -27,26 +28,22 @@ pub struct DecoderLinear {
 
 impl DecoderLinear {
     pub fn new(in_features: usize, out_features: usize, vb: &nn::VarBuilder) -> Result<Self> {
-        // First linear layer: in_features -> 64
-        let weight1 = Tensor::zeros((64, in_features), DType::F32, vb.device())?;
-        let bias1 = Tensor::zeros(64, DType::F32, vb.device())?;
-        let linear1 = nn::Linear::new(weight1, Some(bias1));
-
-        // Activation
+        log::trace!("[DecoderLinear::new] Initializing linear1");
+        let linear1 = nn::linear(in_features, 64, vb.pp("nn.0"))?;
+        log::trace!("[DecoderLinear::new] Initializing prelu");
         let prelu = nn::PReLU::new(Tensor::zeros(64, DType::F32, vb.device())?, false);
-
-        // Second linear layer: 64 -> out_features
-        let weight2 = Tensor::zeros((out_features, 64), DType::F32, vb.device())?;
-        let bias2 = Tensor::zeros(out_features, DType::F32, vb.device())?;
-        let linear2 = nn::Linear::new(weight2, Some(bias2));
-
+        log::trace!("[DecoderLinear::new] Initializing linear2");
+        let linear2 = nn::linear(64, out_features, vb.pp("nn.2"))?;
+        log::trace!("[DecoderLinear::new] Initializing sequential");
         let mut nn = seq();
         nn = nn.add(linear1);
         nn = nn.add(prelu);
         nn = nn.add(linear2);
-
+    
         Ok(Self { nn })
     }
+    
+    
 
     pub fn from_varstore(
         varstore: &nn::VarBuilder,
@@ -80,6 +77,12 @@ impl Module for DecoderLinear {
         match self.nn.forward(x) {
             Ok(output) => {
                 log::trace!("[DecoderLinear] output shape: {:?}", output.shape());
+                log::trace!(
+                    "[DecoderLinear] output stats - min: {:.4}, max: {:.4}, mean: {:.4}",
+                    output.min_all()?.to_vec0::<f32>()?,
+                    output.max_all()?.to_vec0::<f32>()?,
+                    output.mean_all()?.to_vec0::<f32>()?,
+                );
                 Ok(output)
             }
             Err(e) => {
@@ -798,6 +801,15 @@ impl SeqTransformer {
 
 impl Module for SeqTransformer {
     fn forward(&self, x: &Tensor) -> Result<Tensor> {
+        // Add check to ensure input feature dim matches expected model dim
+        let (_b, _t, d) = x.dims3()?;
+        let model_dim = self.encoder.model_dim;
+        if d != model_dim {
+            return Err(candle_core::Error::Msg(format!(
+                "SeqTransformer received input with dim {} but expected {}",
+                d, model_dim
+            )));
+        }
         self.encoder.forward_with_mask(x, None, self.training)
     }
 }
@@ -902,23 +914,31 @@ impl Encoder26aaModCnnLstmAttnSum {
 
         let start_time = Instant::now();
         let mod_x = self.mod_nn.forward(mod_x)?;
-        println!("Encoder26aaModCnnLstmAttnSum::forward - mod_x forward time: {:.3?}", start_time.elapsed());
+        log::trace!("Encoder26aaModCnnLstmAttnSum::forward - mod_x forward time: {:.3?}", start_time.elapsed());
         let start_time = Instant::now();
         let additional_tensors: Vec<&Tensor> = vec![&mod_x];
-        println!("Encoder26aaModCnnLstmAttnSum::forward - additional_tensors forward time: {:.3?}", start_time.elapsed());
+        log::trace!("Encoder26aaModCnnLstmAttnSum::forward - additional_tensors forward time: {:.3?}", start_time.elapsed());
         let start_time = Instant::now();
         let x = aa_one_hot(&aa_indices, &additional_tensors)
             .map_err(|e| candle_core::Error::Msg(e.to_string()))?;
-        println!("Encoder26aaModCnnLstmAttnSum::forward - aa_one_hot forward time: {:.3?}", start_time.elapsed());
+        log::trace!("Encoder26aaModCnnLstmAttnSum::forward - aa_one_hot forward time: {:.3?}", start_time.elapsed());
+        let (mean, min, max) = get_tensor_stats(&x)?;
+        log::debug!("[Encoder26aaModCnnLstmAttnSum] one-hot output stats - min: {min}, max: {max}, mean: {mean}");
         let start_time = Instant::now();
         let x = self.input_cnn.forward(&x)?;
-        println!("Encoder26aaModCnnLstmAttnSum::forward - input_cnn forward time: {:.3?}", start_time.elapsed());
+        log::trace!("Encoder26aaModCnnLstmAttnSum::forward - input_cnn forward time: {:.3?}", start_time.elapsed());
+        let (mean, min, max) = get_tensor_stats(&x)?;
+        log::debug!("[Encoder26aaModCnnLstmAttnSum] CNN output stats - min: {min}, max: {max}, mean: {mean}");
         let start_time = Instant::now();
         let x = self.input_lstm.forward(&x)?;
-        println!("Encoder26aaModCnnLstmAttnSum::forward - input_lstm forward time: {:.3?}", start_time.elapsed());
+        log::trace!("Encoder26aaModCnnLstmAttnSum::forward - input_lstm forward time: {:.3?}", start_time.elapsed());
+        let (mean, min, max) = get_tensor_stats(&x)?;
+        log::debug!("[Encoder26aaModCnnLstmAttnSum] LSTM output stats - min: {min}, max: {max}, mean: {mean}");
         let start_time = Instant::now();
         let x = self.attn_sum.forward(&x)?;
-        println!("Encoder26aaModCnnLstmAttnSum::forward - attn_sum forward time: {:.3?}", start_time.elapsed());
+        log::trace!("Encoder26aaModCnnLstmAttnSum::forward - attn_sum forward time: {:.3?}", start_time.elapsed());
+        let (mean, min, max) = get_tensor_stats(&x)?;
+        log::debug!("[Encoder26aaModCnnLstmAttnSum] AttentionSum output stats - min: {min}, max: {max}, mean: {mean}");
         Ok(x)
     }
 }
@@ -980,27 +1000,27 @@ impl Encoder26aaModChargeCnnLstmAttnSum {
 
         let start_time = Instant::now();
         let mod_x = self.mod_nn.forward(mod_x)?;
-        println!("Encoder26aaModChargeCnnLstmAttnSum::forward - mod_x forward time: {:.3?}", start_time.elapsed());
+        log::trace!("Encoder26aaModChargeCnnLstmAttnSum::forward - mod_x forward time: {:.3?}", start_time.elapsed());
         let start_time = Instant::now();
         let charges_repeated = charges.unsqueeze(1)?.repeat(&[1, mod_x.dim(1)?, 1])?;
-        println!("Encoder26aaModChargeCnnLstmAttnSum::forward - charges_repeated forward time: {:.3?}", start_time.elapsed());
+        log::trace!("Encoder26aaModChargeCnnLstmAttnSum::forward - charges_repeated forward time: {:.3?}", start_time.elapsed());
         let start_time = Instant::now();
         let additional_tensors: Vec<&Tensor> = vec![&mod_x, &charges_repeated];
-        println!("Encoder26aaModChargeCnnLstmAttnSum::forward - additional_tensors forward time: {:.3?}", start_time.elapsed());
+        log::trace!("Encoder26aaModChargeCnnLstmAttnSum::forward - additional_tensors forward time: {:.3?}", start_time.elapsed());
         let start_time = Instant::now();
         let x = aa_one_hot(&aa_indices, &additional_tensors)
             .map_err(|e| candle_core::Error::Msg(e.to_string()))?;
-        println!("Encoder26aaModChargeCnnLstmAttnSum::forward - aa_one_hot forward time: {:.3?}", start_time.elapsed());
+        log::trace!("Encoder26aaModChargeCnnLstmAttnSum::forward - aa_one_hot forward time: {:.3?}", start_time.elapsed());
 
         let start_time = Instant::now();
         let x = self.input_cnn.forward(&x)?;
-        println!("Encoder26aaModChargeCnnLstmAttnSum::forward - input_cnn forward time: {:.3?}", start_time.elapsed());
+        log::trace!("Encoder26aaModChargeCnnLstmAttnSum::forward - input_cnn forward time: {:.3?}", start_time.elapsed());
         let start_time = Instant::now();
         let x = self.input_lstm.forward(&x)?;
-        println!("Encoder26aaModChargeCnnLstmAttnSum::forward - input_lstm forward time: {:.3?}", start_time.elapsed());
+        log::trace!("Encoder26aaModChargeCnnLstmAttnSum::forward - input_lstm forward time: {:.3?}", start_time.elapsed());
         let start_time = Instant::now();
         let x = self.attn_sum.forward(&x)?;
-        println!("Encoder26aaModChargeCnnLstmAttnSum::forward - attn_sum forward time: {:.3?}", start_time.elapsed());
+        log::trace!("Encoder26aaModChargeCnnLstmAttnSum::forward - attn_sum forward time: {:.3?}", start_time.elapsed());
         Ok(x)
     }
 }
@@ -1106,18 +1126,26 @@ impl Encoder26aaModCnnTransformerAttnSum {
         let x = aa_one_hot(aa_indices, &additional_tensors)
             .map_err(|e| candle_core::Error::Msg(e.to_string()))?;
         log::trace!("[Encoder26aaModCnnTransformerAttnSum::forward] - aa_one_hot forward time: {:.3?}", start_time.elapsed());
+        let (mean, min, max) = get_tensor_stats(&x)?;
+        log::debug!("[Encoder26aaModCnnTransformerAttnSum] one-hot output stats - min: {min}, max: {max}, mean: {mean}");
 
         let start_time = Instant::now();
         let x = self.input_cnn.forward(&x)?;
         log::trace!("[Encoder26aaModCnnTransformerAttnSum::forward] - input_cnn forward time: {:.3?}", start_time.elapsed());
+        let (mean, min, max) = get_tensor_stats(&x)?;
+        log::debug!("[Encoder26aaModCnnTransformerAttnSum] input_cnn output stats - min: {min}, max: {max}, mean: {mean}");
 
         let start_time = Instant::now();
         let x = self.input_transformer.forward(&x)?;
         log::trace!("[Encoder26aaModCnnTransformerAttnSum::forward] - input_transformer forward time: {:.3?}", start_time.elapsed());
+        let (mean, min, max) = get_tensor_stats(&x)?;
+        log::debug!("[Encoder26aaModCnnTransformerAttnSum] input_transformer output stats - min: {min}, max: {max}, mean: {mean}");
 
         let start_time = Instant::now();
         let x = self.attn_sum.forward(&x)?;
         log::trace!("[Encoder26aaModCnnTransformerAttnSum::forward] - attn_sum forward time: {:.3?}", start_time.elapsed());
+        let (mean, min, max) = get_tensor_stats(&x)?;
+        log::debug!("[Encoder26aaModCnnTransformerAttnSum] attn_sum output stats - min: {min}, max: {max}, mean: {mean}");
 
         Ok(x)
     }
diff --git a/crates/redeem-properties/src/building_blocks/nn.rs b/crates/redeem-properties/src/building_blocks/nn.rs
index cbab40e..a8343c7 100644
--- a/crates/redeem-properties/src/building_blocks/nn.rs
+++ b/crates/redeem-properties/src/building_blocks/nn.rs
@@ -1,10 +1,14 @@
 use candle_core::{Device, IndexOp, Result, Tensor};
-use candle_nn::{Dropout, LayerNorm, Linear, Module, VarBuilder};
+use candle_nn::init::{FanInOut, NonLinearity, NormalOrUniform};
+use candle_nn::{Dropout, Init, LayerNorm, Linear, Module, VarBuilder};
 use candle_transformers::models::bert::{BertEncoder, Config};
 use candle_nn::ops::softmax;
+use std::env::var;
 use std::ops::{Deref, DerefMut};
 use std::sync::Arc;
 
+use crate::utils::utils::get_tensor_stats;
+
 #[derive(Clone)]
 pub struct ModuleList {
     modules: Vec<Arc<dyn Module>>,
@@ -78,6 +82,7 @@ pub struct TransformerEncoder {
     layers: Vec<TransformerEncoderLayer>,
     pos_encoding: Tensor,
     dropout: Dropout,
+    pub model_dim: usize,
 }
 
 impl TransformerEncoder {
@@ -105,28 +110,38 @@ impl TransformerEncoder {
         }
         let pos_encoding = create_sinusoidal_encoding(max_len, model_dim, device)?;
         let dropout = Dropout::new(dropout_prob);
-        Ok(Self { layers, pos_encoding, dropout })
+        Ok(Self { layers, pos_encoding, dropout, model_dim })
     }
 
     pub fn forward_with_mask(&self, x: &Tensor, padding_mask: Option<&Tensor>, training: bool) -> Result<Tensor> {
         log::trace!("[TransformerEncoder] input x shape: {:?}", x.shape());
-
+        let (mean, min, max) = get_tensor_stats(x)?;
+        log::debug!("[TransformerEncoder] input stats: mean={}, min={}, max={}", mean, min, max);
         let (b, t, _) = x.dims3()?;
         let pe = self.pos_encoding.i((..t, ..))?
             .unsqueeze(0)?
             .broadcast_as((b, t, self.pos_encoding.dim(1)?))?;
 
         log::trace!("[TransformerEncoder] positional encoding shape: {:?}", pe.shape());
+        let (mean, min, max) = get_tensor_stats(&pe)?;
+        log::debug!("[TransformerEncoder] positional encoding stats: mean={}, min={}, max={}", mean, min, max);
 
         let mut out = x.broadcast_add(&pe)?;
+        let (mean, min, max) = get_tensor_stats(&out)?;
+        log::debug!("[TransformerEncoder] after positional encoding stats: mean={}, min={}, max={}", mean, min, max);
+
         out = self.dropout.forward(&out, training)?;
 
         log::trace!("[TransformerEncoder] after dropout shape: {:?}", out.shape());
+        let (mean, min, max) = get_tensor_stats(&out)?;
+        log::debug!("[TransformerEncoder] after dropout stats: mean={}, min={}, max={}", mean, min, max);
 
         for (i, layer) in self.layers.iter().enumerate() {
             log::trace!("[TransformerEncoder] applying layer {}", i);
             out = layer.forward(&out, padding_mask, training)?;
             log::trace!("[TransformerEncoder] output shape after layer {}: {:?}", i, out.shape());
+            let (mean, min, max) = get_tensor_stats(&out)?;
+            log::debug!("[TransformerEncoder] output stats after layer {}: mean={}, min={}, max={}", i, mean, min, max);
         }
         Ok(out)
     }
@@ -153,16 +168,14 @@ impl TransformerEncoderLayer {
         Ok(Self {
             self_attn: MultiHeadAttention::new(varbuilder, model_dim, model_dim, num_heads)?,
             ff: FeedForward::new(varbuilder, model_dim, ff_dim)?,
-            norm1: {
-                let weight = varbuilder.get((model_dim,), "norm1.weight")?;
-                let bias = varbuilder.get((model_dim,), "norm1.bias")?;
-                LayerNorm::new(weight, bias, 1e-5)
-            },
-            norm2: {
-                let weight = varbuilder.get((model_dim,), "norm2.weight")?;
-                let bias = varbuilder.get((model_dim,), "norm2.bias")?;
-                LayerNorm::new(weight, bias, 1e-5)
-            },
+            norm1: candle_nn::layer_norm(
+                model_dim,
+                candle_nn::LayerNormConfig::default(),
+                varbuilder.pp("norm1"))?,
+            norm2: candle_nn::layer_norm(
+                model_dim,
+                candle_nn::LayerNormConfig::default(),
+                varbuilder.pp("norm2"))?,
             dropout1: Dropout::new(dropout_prob),
             dropout2: Dropout::new(dropout_prob),
         })
@@ -171,10 +184,24 @@ impl TransformerEncoderLayer {
     pub fn forward(&self, x: &Tensor, mask: Option<&Tensor>, training: bool) -> Result<Tensor> {
         log::trace!("[TransformerEncoderLayer] input x shape: {:?}", x.shape());
         let attn = self.self_attn.forward(x, mask)?;
-        let x = self.norm1.forward(&x.broadcast_add(&self.dropout1.forward(&attn, training)?)?)?;
+        let (mean, min, max) = get_tensor_stats(&attn)?;
+        log::debug!("[TransformerEncoderLayer] attention stats: mean={}, min={}, max={}", mean, min, max);
+        let tmp = self.dropout1.forward(&attn, training)?;
+        let (mean, min, max) = get_tensor_stats(&tmp)?;
+        log::debug!("[TransformerEncoderLayer] attention after dropout stats: mean={}, min={}, max={}", mean, min, max);
+        let tmp2 = x.broadcast_add(&tmp)?;
+        let (mean, min, max) = get_tensor_stats(&tmp2)?;
+        log::debug!("[TransformerEncoderLayer] after residual connection stats: mean={}, min={}, max={}", mean, min, max);
+        let x = self.norm1.forward(&tmp2)?;
+        let (mean, min, max) = get_tensor_stats(&x)?;
+        log::debug!("[TransformerEncoderLayer] after norm1 stats: mean={}, min={}, max={}", mean, min, max);
         let ff = self.ff.forward(&x)?;
+        let (mean, min, max) = get_tensor_stats(&ff)?;
+        log::debug!("[TransformerEncoderLayer] feedforward stats: mean={}, min={}, max={}", mean, min, max);
         let result = self.norm2.forward(&x.broadcast_add(&self.dropout2.forward(&ff, training)?)?)?;
         log::trace!("[TransformerEncoderLayer] output shape: {:?}", result.shape());
+        let (mean, min, max) = get_tensor_stats(&result)?;
+        log::debug!("[TransformerEncoderLayer] output stats: mean={}, min={}, max={}", mean, min, max);
         Ok(result)
     }
 }
@@ -199,10 +226,10 @@ impl MultiHeadAttention {
     ) -> Result<Self> {
         let head_dim = model_dim / num_heads;
         Ok(Self {
-            proj_q: linear_from_varbuilder(varbuilder, input_dim, model_dim, "proj_q")?,
-            proj_k: linear_from_varbuilder(varbuilder, input_dim, model_dim, "proj_k")?,
-            proj_v: linear_from_varbuilder(varbuilder, input_dim, model_dim, "proj_v")?,
-            proj_out: linear_from_varbuilder(varbuilder, model_dim, model_dim, "proj_out")?,
+            proj_q: candle_nn::linear(input_dim, model_dim, varbuilder.pp("proj_q"))?,
+            proj_k: candle_nn::linear(input_dim, model_dim, varbuilder.pp("proj_k"))?,
+            proj_v: candle_nn::linear(input_dim, model_dim, varbuilder.pp("proj_v"))?,
+            proj_out: candle_nn::linear(model_dim, model_dim, varbuilder.pp("proj_out"))?,
             num_heads,
             head_dim,
         })
@@ -216,16 +243,25 @@ impl MultiHeadAttention {
             .reshape((b, t, self.num_heads, self.head_dim))?
             .transpose(1, 2)?
             .contiguous()?;
+        log::trace!("[MultiHeadAttention] Q shape after projection and transpose: {:?}", q.shape());
+        let (mean, min, max) = get_tensor_stats(&q)?;
+        log::debug!("[MultiHeadAttention] Q stats: mean={}, min={}, max={}", mean, min, max);
 
         let k = self.proj_k.forward(x)?
             .reshape((b, t, self.num_heads, self.head_dim))?
             .transpose(1, 2)?
             .contiguous()?;
+        log::trace!("[MultiHeadAttention] K shape after projection and transpose: {:?}", k.shape());
+        let (mean, min, max) = get_tensor_stats(&k)?;
+        log::debug!("[MultiHeadAttention] K stats: mean={}, min={}, max={}", mean, min, max);
 
         let v = self.proj_v.forward(x)?
             .reshape((b, t, self.num_heads, self.head_dim))?
             .transpose(1, 2)?
             .contiguous()?;
+        log::trace!("[MultiHeadAttention] V shape after projection and transpose: {:?}", v.shape());
+        let (mean, min, max) = get_tensor_stats(&v)?;
+        log::debug!("[MultiHeadAttention] V stats: mean={}, min={}, max={}", mean, min, max);
 
 
         log::trace!("[MultiHeadAttention] Q/K/V shape after projection and transpose: {:?}", q.shape());
@@ -242,6 +278,8 @@ impl MultiHeadAttention {
         };
 
         log::trace!("[MultiHeadAttention] Attention score shape: {:?}", scores.shape());
+        let (mean, min, max) = get_tensor_stats(&scores)?;
+        log::debug!("[MultiHeadAttention] Attention score stats: mean={}, min={}, max={}", mean, min, max);
 
         if let Some(mask) = mask {
             log::trace!("[MultiHeadAttention] Applying mask");
@@ -263,6 +301,8 @@ impl MultiHeadAttention {
                 return Err(e.into());
             }
         };
+        let (attn_mean, attn_min, attn_max) = get_tensor_stats(&attn)?;
+        log::debug!("[MultiHeadAttention] Attention stats: mean={}, min={}, max={}", attn_mean, attn_min, attn_max);
 
         let context = match attn.matmul(&v) {
             Ok(ctx) => ctx.transpose(1, 2)?.reshape((b, t, self.num_heads * self.head_dim))?,
@@ -273,6 +313,8 @@ impl MultiHeadAttention {
         };
 
         log::trace!("[MultiHeadAttention] Final context shape: {:?}", context.shape());
+        let (mean, min, max) = get_tensor_stats(&context)?;
+        log::debug!("[MultiHeadAttention] Context stats: mean={}, min={}, max={}", mean, min, max);
         self.proj_out.forward(&context)
     }
 }
@@ -286,8 +328,8 @@ pub struct FeedForward {
 impl FeedForward {
     pub fn new(varbuilder: &VarBuilder, model_dim: usize, ff_dim: usize) -> Result<Self> {
         Ok(Self {
-            lin1: linear_from_varbuilder(varbuilder, model_dim, ff_dim, "lin1")?,
-            lin2: linear_from_varbuilder(varbuilder, ff_dim, model_dim, "lin2")?,
+            lin1: candle_nn::linear(model_dim, ff_dim, varbuilder.pp("lin1"))?,
+            lin2: candle_nn::linear(ff_dim, model_dim, varbuilder.pp("lin2"))?,
         })
     }
 
@@ -297,17 +339,6 @@ impl FeedForward {
     }
 }
 
-fn linear_from_varbuilder(
-    vb: &VarBuilder,
-    in_dim: usize,
-    out_dim: usize,
-    prefix: &str,
-) -> Result<Linear> {
-    let weight = vb.get((out_dim, in_dim), &format!("{}.weight", prefix))?;
-    let bias = vb.get((out_dim,), &format!("{}.bias", prefix)).ok();
-    Ok(Linear::new(weight, bias))
-}
-
 /// Generate sinusoidal positional encoding like in "Attention is All You Need".
 pub fn create_sinusoidal_encoding(seq_len: usize, model_dim: usize, device: &Device) -> Result<Tensor> {
     let mut pe = vec![0f32; seq_len * model_dim];
diff --git a/crates/redeem-properties/src/models/ccs_cnn_lstm_model.rs b/crates/redeem-properties/src/models/ccs_cnn_lstm_model.rs
index 463c126..fa8489a 100644
--- a/crates/redeem-properties/src/models/ccs_cnn_lstm_model.rs
+++ b/crates/redeem-properties/src/models/ccs_cnn_lstm_model.rs
@@ -66,7 +66,7 @@ impl ModelInterface for CCSCNNLSTMModel {
     /// Create a new CCSCNNLSTMModel instance model from the given model and constants files.
     fn new<P: AsRef<Path>>(
         model_path: P,
-        constants_path: P,
+        constants_path: Option<P>,
         fixed_sequence_len: usize,
         num_frag_types: usize,
         num_modloss_types: usize,
@@ -80,8 +80,10 @@ impl ModelInterface for CCSCNNLSTMModel {
 
         let var_store = candle_nn::VarBuilder::from_varmap(&varmap, DType::F32, &device);
 
-        let constants: ModelConstants =
-            parse_model_constants(constants_path.as_ref().to_str().unwrap())?;
+        let constants = match constants_path {
+            Some(path) => parse_model_constants(path.as_ref().to_str().unwrap())?,
+            None => ModelConstants::default(),
+        };
 
         // Load the mod_to_feature mapping
         let mod_to_feature = load_mod_to_feature(&constants)?;
@@ -295,7 +297,7 @@ mod tests {
         let constants_path =
             PathBuf::from("data/models/alphapeptdeep/generic/ccs.pth.model_const.yaml");
         let device = Device::Cpu;
-        let model = CCSCNNLSTMModel::new(model_path, constants_path, 0, 8, 4, true, device).unwrap();
+        let model = CCSCNNLSTMModel::new(model_path, Some(constants_path), 0, 8, 4, true, device).unwrap();
 
         println!("{:?}", model);
     }
@@ -306,7 +308,7 @@ mod tests {
         let constants_path =
             PathBuf::from("data/models/alphapeptdeep/generic/ccs.pth.model_const.yaml");
         let device = Device::Cpu;
-        let model = CCSCNNLSTMModel::new(model_path, constants_path, 0, 8, 4, true, device).unwrap();
+        let model = CCSCNNLSTMModel::new(model_path, Some(constants_path), 0, 8, 4, true, device).unwrap();
 
         let peptide_sequences = "AGHCEWQMKYR";
         let mods = "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M";
@@ -331,7 +333,7 @@ mod tests {
         let constants_path =
             PathBuf::from("data/models/alphapeptdeep/generic/ccs.pth.model_const.yaml");
         let device = Device::Cpu;
-        let model = CCSCNNLSTMModel::new(model_path, constants_path, 0, 8, 4, true, device).unwrap();
+        let model = CCSCNNLSTMModel::new(model_path, Some(constants_path), 0, 8, 4, true, device).unwrap();
 
         let peptide_sequences = vec!["AGHCEWQMKYR".to_string(), "AGHCEWQMKYR".to_string()];
         let mods = vec!["Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M".to_string(), "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M".to_string()];
diff --git a/crates/redeem-properties/src/models/ccs_model.rs b/crates/redeem-properties/src/models/ccs_model.rs
index 4921bb4..c6b719a 100644
--- a/crates/redeem-properties/src/models/ccs_model.rs
+++ b/crates/redeem-properties/src/models/ccs_model.rs
@@ -32,7 +32,7 @@ impl Clone for CCSModelWrapper {
 impl CCSModelWrapper {
     pub fn new<P: AsRef<Path>>(model_path: P, constants_path: P, arch: &str, device: Device) -> Result<Self> {
         let model: Box<dyn ModelInterface> = match arch {
-            "ccs_cnn_lstm" => Box::new(CCSCNNLSTMModel::new(model_path, constants_path, 0, 8, 4, true, device)?),
+            "ccs_cnn_lstm" => Box::new(CCSCNNLSTMModel::new(model_path, Some(constants_path), 0, 8, 4, true, device)?),
             // Add other cases here as you implement more models
             _ => return Err(anyhow!("Unsupported CCS model architecture: {}", arch)),
         };
diff --git a/crates/redeem-properties/src/models/model_interface.rs b/crates/redeem-properties/src/models/model_interface.rs
index 1817681..120dd48 100644
--- a/crates/redeem-properties/src/models/model_interface.rs
+++ b/crates/redeem-properties/src/models/model_interface.rs
@@ -15,7 +15,7 @@ use candle_core::{DType, Device, Tensor, Var};
 use candle_nn::{Optimizer, VarMap};
 use log::info;
 use rayon::prelude::*;
-use std::ops::Index;
+use std::ops::{Deref, Index};
 use std::path::Path;
 use std::{collections::HashMap, path::PathBuf};
 
@@ -210,9 +210,14 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         Self: Sized;
 
     /// Create a new instance of the model (given a pretrained model (.pth or .safetensors and constants file).
+    /// 
+    /// # Arguments
+    /// * `model_path` - Path to the model file (.pth or .safetensors).
+    /// * `constants_path` - Optional path to the model constants file (.yaml). If none, will use the default constants.
+    /// 
     fn new<P: AsRef<Path>>(
         model_path: P,
-        constants_path: P,
+        constants_path: Option<P>,
         fixed_sequence_len: usize,
         num_frag_types: usize,
         num_modloss_types: usize,
@@ -225,7 +230,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
     /// Forward pass through the model.
     fn forward(&self, input: &Tensor) -> Result<Tensor, candle_core::Error>;
 
-    /// Predict the retention times for a peptide sequence.
+    /// Predict the property for a batch of peptide sequences.
     ///
     /// # Arguments
     ///   * `peptide_sequences` - A vector of peptide sequences.
@@ -248,7 +253,8 @@ pub trait ModelInterface: Send + Sync + ModelClone {
     ) -> Result<PredictionResult> {
         // Encode the batch of peptides
         let input_tensor =
-            self.encode_peptides(peptide_sequences, mods, mod_sites, charge, nce, instrument)?;
+            self.encode_peptides(peptide_sequences, mods, mod_sites, charge, nce, instrument)?
+                .to_device(self.get_device())?;
 
         // Forward pass through the model
         let output = self.forward(&input_tensor)?;
@@ -403,8 +409,27 @@ pub trait ModelInterface: Send + Sync + ModelClone {
 
     /// Train the model from scratch using a batch of training data.
     ///
-    /// This method is similar to `fine_tune`, but assumes that the model was created from `new_untrained`
-    /// and has no pre-existing learned weights.
+    /// This method initializes model weights from scratch and trains over the given peptide feature data for a specified
+    /// number of epochs. Optionally performs validation and tracks both training and validation loss statistics.
+    /// Early stopping is applied if the validation loss does not improve for a consecutive number of epochs.
+    ///
+    /// # Arguments
+    /// * `training_data` - Vector of peptide records used for training.
+    /// * `validation_data` - Optional vector of peptide records used for validation at the end of each epoch.
+    /// * `modifications` - A map of known modifications to encode modified peptides.
+    /// * `batch_size` - Batch size used for training.
+    /// * `validation_batch_size` - Batch size used during validation.
+    /// * `learning_rate` - Learning rate for the AdamW optimizer.
+    /// * `epochs` - Maximum number of training epochs.
+    /// * `early_stopping_patience` - Number of epochs to wait before stopping if validation loss does not improve.
+    ///
+    /// # Returns
+    /// A `Vec` of tuples where each tuple contains:
+    /// * `epoch` - Epoch number.
+    /// * `avg_train_loss` - Average training loss for the epoch.
+    /// * `avg_val_loss` - Optional average validation loss for the epoch.
+    /// * `train_std` - Standard deviation of training loss across batches.
+    /// * `val_std` - Optional standard deviation of validation loss across batches.
     fn train(
         &mut self,
         training_data: &Vec<PeptideData>,
@@ -414,12 +439,13 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             crate::utils::peptdeep_utils::ModificationMap,
         >,
         batch_size: usize,
+        validation_batch_size: usize,
         learning_rate: f64,
         epochs: usize,
         early_stopping_patience: usize,
-    ) -> Result<()> {
+    ) -> Result<Vec<(usize, f32, Option<f32>, f32, Option<f32>)>> {
         let num_batches = (training_data.len() + batch_size - 1) / batch_size;
-
+    
         info!(
             "Training {} model from scratch on {} peptide features ({} batches) for {} epochs",
             self.get_model_arch(),
@@ -427,39 +453,40 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             num_batches,
             epochs
         );
-
+    
         let params = candle_nn::ParamsAdamW {
             lr: learning_rate,
             ..Default::default()
         };
         let mut opt = candle_nn::AdamW::new(self.get_mut_varmap().all_vars(), params)?;
-
+    
         let mut best_val_loss = f32::INFINITY;
         let mut epochs_without_improvement = 0;
-
+        let mut epoch_losses = vec![];
+    
         for epoch in 0..epochs {
             let progress = Progress::new(num_batches, &format!("[training] Epoch {}: ", epoch));
-            let mut total_loss = 0.0;
-
+            let mut batch_losses = vec![];
+    
             training_data
                 .chunks(batch_size)
                 .enumerate()
-                .try_for_each(|(batch_idx, batch_data)| {
+                .try_for_each(|(_batch_idx, batch_data)| {
                     let peptides: Vec<String> = batch_data.iter().map(|p| remove_mass_shift(&p.sequence)).collect();
                     let mods: Vec<String> = batch_data.iter().map(|p| get_modification_string(&p.sequence, &modifications)).collect();
                     let mod_sites: Vec<String> = batch_data.iter().map(|p| get_modification_indices(&p.sequence)).collect();
-
+    
                     let charges = batch_data.iter().filter_map(|p| p.charge).collect::<Vec<_>>();
                     let charges = if charges.len() == batch_data.len() { Some(charges) } else { None };
-
+    
                     let nces = batch_data.iter().filter_map(|p| p.nce).collect::<Vec<_>>();
                     let nces = if nces.len() == batch_data.len() { Some(nces) } else { None };
-
+    
                     let instruments = batch_data.iter().filter_map(|p| p.instrument.clone()).collect::<Vec<_>>();
                     let instruments = if instruments.len() == batch_data.len() { Some(instruments) } else { None };
-
-                    let input_batch = self.encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments)?;
-
+    
+                    let input_batch = self.encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments)?.to_device(self.get_device())?;
+    
                     let batch_targets = match self.property_type() {
                         PropertyType::RT => PredictionResult::RTResult(
                             batch_data.iter().map(|p| p.retention_time.unwrap_or_default()).collect(),
@@ -471,52 +498,52 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                             return Err(anyhow::anyhow!("Training from scratch is not yet implemented for MS2"));
                         }
                     };
-
+    
                     let target_batch = match batch_targets {
                         PredictionResult::RTResult(ref values) | PredictionResult::IMResult(ref values) => {
                             Tensor::new(values.clone(), &self.get_device())?
                         }
                         PredictionResult::MS2Result(_) => unreachable!(),
-                    };
-
+                    }.to_device(self.get_device())?;
+    
                     let predicted = self.forward(&input_batch)?;
                     let loss = candle_nn::loss::mse(&predicted, &target_batch)?;
                     opt.backward_step(&loss)?;
-
-                    total_loss += loss.to_vec0::<f32>().unwrap_or(999.0);
-                    progress.update_description(&format!("[training] Epoch {}: Loss: {:.4}", epoch, loss.to_vec0::<f32>()?));
+    
+                    let loss_val = loss.to_vec0::<f32>().unwrap_or(999.0);
+                    batch_losses.push(loss_val);
+    
+                    progress.update_description(&format!("[training] Epoch {}: Loss: {:.4}", epoch, loss_val));
                     progress.inc();
-
+    
                     Ok(())
                 })?;
-
-            // Optional validation evaluation
+    
+            let avg_loss = batch_losses.iter().copied().sum::<f32>() / batch_losses.len() as f32;
+            let std_loss = (batch_losses.iter().map(|l| (l - avg_loss).powi(2)).sum::<f32>() / batch_losses.len() as f32).sqrt();
+    
             if let Some(val_data) = validation_data {
-                let val_batches = (val_data.len() + batch_size - 1) / batch_size;
+                let val_batches = (val_data.len() + validation_batch_size - 1) / validation_batch_size;
                 use rayon::prelude::*;
-
-                let total_val_loss: f32 = val_data
-                    .par_chunks(batch_size)
+    
+                let val_losses: Vec<f32> = val_data
+                    .par_chunks(validation_batch_size)
                     .map(|batch_data| {
                         let peptides: Vec<String> = batch_data.iter().map(|p| remove_mass_shift(&p.sequence)).collect();
                         let mods: Vec<String> = batch_data.iter().map(|p| get_modification_string(&p.sequence, &modifications)).collect();
                         let mod_sites: Vec<String> = batch_data.iter().map(|p| get_modification_indices(&p.sequence)).collect();
-
+    
                         let charges = batch_data.iter().filter_map(|p| p.charge).collect::<Vec<_>>();
                         let charges = if charges.len() == batch_data.len() { Some(charges) } else { None };
-
+    
                         let nces = batch_data.iter().filter_map(|p| p.nce).collect::<Vec<_>>();
                         let nces = if nces.len() == batch_data.len() { Some(nces) } else { None };
-
+    
                         let instruments = batch_data.iter().filter_map(|p| p.instrument.clone()).collect::<Vec<_>>();
                         let instruments = if instruments.len() == batch_data.len() { Some(instruments) } else { None };
-
-                        let input_val = self.encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments);
-                        let input_val = match input_val {
-                            Ok(x) => x,
-                            Err(e) => return Err(e),
-                        };
-
+    
+                        let input_val = self.encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments)?.to_device(self.get_device())?;
+    
                         let val_targets = match self.property_type() {
                             PropertyType::RT => PredictionResult::RTResult(
                                 batch_data.iter().map(|p| p.retention_time.unwrap_or_default()).collect(),
@@ -528,47 +555,57 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                                 return Err(anyhow::anyhow!("Validation not supported for MS2 yet"));
                             }
                         };
-
+    
                         let target_val = match val_targets {
                             PredictionResult::RTResult(ref values) | PredictionResult::IMResult(ref values) => {
                                 Tensor::new(values.clone(), &self.get_device())?
                             }
                             PredictionResult::MS2Result(_) => unreachable!(),
-                        };
-
+                        }.to_device(self.get_device())?;
+    
                         let predicted = self.forward(&input_val)?;
                         let val_loss = candle_nn::loss::mse(&predicted, &target_val)?;
                         Ok(val_loss.to_vec0::<f32>()?)
                     })
-                    .collect::<Result<Vec<f32>>>()?
-                    .into_iter()
-                    .sum();
-
-                let avg_val_loss = total_val_loss / val_batches as f32;
-                let avg_loss = total_loss / num_batches as f32;
-
-                progress.update_description(&format!("Epoch {}: Avg. Train Loss: {:.4} | Avg. Val. Loss: {:.4}", epoch, avg_loss, avg_val_loss));
+                    .collect::<Result<Vec<f32>>>()?;
+    
+                let avg_val_loss = val_losses.iter().sum::<f32>() / val_losses.len() as f32;
+                let std_val_loss = (val_losses.iter().map(|l| (l - avg_val_loss).powi(2)).sum::<f32>() / val_losses.len() as f32).sqrt();
+    
+                epoch_losses.push((epoch, avg_loss, Some(avg_val_loss), std_loss, Some(std_val_loss)));
+    
+                progress.update_description(&format!(
+                    "Epoch {}: Avg. Train Loss: {:.4} (±{:.4}) | Avg. Val. Loss: {:.4} (±{:.4})",
+                    epoch, avg_loss, std_loss, avg_val_loss, std_val_loss
+                ));
                 progress.finish();
-
+    
                 if avg_val_loss < best_val_loss {
                     best_val_loss = avg_val_loss;
                     epochs_without_improvement = 0;
+    
+                    let checkpoint_path = format!("redeem_{}_ckpt_model_epoch_{}.safetensors", self.get_model_arch(), epoch);
+                    self.get_mut_varmap().save(&checkpoint_path)?;
                 } else {
                     epochs_without_improvement += 1;
                     if epochs_without_improvement >= early_stopping_patience {
                         info!("Early stopping triggered after {} epochs without validation loss improvement.", early_stopping_patience);
-                        break;
+                        return Ok(epoch_losses);
                     }
                 }
             } else {
-                let avg_loss = total_loss / num_batches as f32;
-                progress.update_description(&format!("Epoch {}: Avg. Train Loss: {:.4}", epoch, avg_loss));
+                epoch_losses.push((epoch, avg_loss, None, std_loss, None));
+                progress.update_description(&format!("Epoch {}: Avg. Train Loss: {:.4} (±{:.4})", epoch, avg_loss, std_loss));
                 progress.finish();
+    
+                let checkpoint_path = format!("redeem_{}_ckpt_model_epoch_{}.safetensors", self.get_model_arch(), epoch);
+                self.get_mut_varmap().save(&checkpoint_path)?;
             }
         }
-
-        Ok(())
+    
+        Ok(epoch_losses)
     }
+    
 
     /// Fine-tune the model on a batch of training data.
     ///
@@ -664,7 +701,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                 };
 
                 let input_batch =
-                    self.encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments)?;
+                    self.encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments)?.to_device(self.get_device())?;
 
                 log::trace!(
                     "[ModelInterface::fine_tune] input_batch shape: {:?}, device: {:?}",
@@ -715,7 +752,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                             feature_dim,
                         ))?
                     }
-                };
+                }.to_device(self.get_device())?;
 
                 let predicted = self.forward(&input_batch)?;
                 let loss = candle_nn::loss::mse(&predicted, &target_batch)?;
@@ -742,6 +779,89 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         Ok(())
     }
 
+    fn inference(
+        &self,
+        inference_data: &Vec<PeptideData>,
+        batch_size: usize,
+        modifications: HashMap<
+            (String, Option<char>),
+            crate::utils::peptdeep_utils::ModificationMap,
+        >,
+        rt_norm_params: Option<(f32, f32)>,
+    ) -> Result<Vec<PeptideData>> {
+        let num_batches = (inference_data.len() + batch_size - 1) / batch_size;
+        info!(
+            "Performing inference on {} peptide features ({} batches)",
+            inference_data.len(),
+            num_batches
+        );
+    
+        let progress = Progress::new(inference_data.len(), "[inference] Batch:");
+        let mut result: Vec<Option<PeptideData>> = vec![None; inference_data.len()];
+    
+        inference_data
+            .par_chunks(batch_size)
+            .enumerate()
+            .map(|(batch_idx, batch_data)| {
+                let start_idx = batch_idx * batch_size;
+    
+                let peptides: Vec<String> = batch_data.iter().map(|p| remove_mass_shift(&p.sequence)).collect();
+                let mods: Vec<String> = batch_data.iter().map(|p| get_modification_string(&p.sequence, &modifications)).collect();
+                let mod_sites: Vec<String> = batch_data.iter().map(|p| get_modification_indices(&p.sequence)).collect();
+    
+                let charges = batch_data.iter().filter_map(|p| p.charge).collect::<Vec<_>>();
+                let charges = if charges.len() == batch_data.len() { Some(charges) } else { None };
+    
+                let nces = batch_data.iter().filter_map(|p| p.nce).collect::<Vec<_>>();
+                let nces = if nces.len() == batch_data.len() { Some(nces) } else { None };
+    
+                let instruments = batch_data.iter().filter_map(|p| p.instrument.clone()).collect::<Vec<_>>();
+                let instruments = if instruments.len() == batch_data.len() { Some(instruments) } else { None };
+    
+                let input_tensor = self.encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments)?.to_device(self.get_device())?;
+                let output = self.forward(&input_tensor)?;
+    
+                match self.property_type() {
+                    PropertyType::RT | PropertyType::CCS => {
+                        let predictions = output.to_vec1()?;
+                        let updated: Vec<(usize, PeptideData)> = predictions
+                            .into_iter()
+                            .enumerate()
+                            .map(|(i, pred)| {
+                                let mut peptide = batch_data[i].clone();
+                                match self.property_type() {
+                                    PropertyType::RT => {
+                                        peptide.retention_time = if let Some((mean, std)) = rt_norm_params {
+                                            Some(pred * std + mean)
+                                        } else {
+                                            Some(pred)
+                                        };
+                                    }
+                                    PropertyType::CCS => peptide.ion_mobility = Some(pred),
+                                    _ => {}
+                                };                                
+                                (start_idx + i, peptide)
+                            })
+                            .collect();
+                        Ok(updated)
+                    }
+                    PropertyType::MS2 => Err(anyhow::anyhow!("Inference not supported for MS2 models in batch mode")),
+                }
+            })
+            .collect::<Result<Vec<Vec<(usize, PeptideData)>>>>()?
+            .into_iter()
+            .flatten()
+            .for_each(|(idx, peptide)| {
+                result[idx] = Some(peptide);
+                progress.inc();
+            });
+    
+        progress.finish();
+        Ok(result.into_iter().flatten().collect())
+    }
+    
+    
+
     /// Set model to evaluation mode for inference
     /// This disables dropout and other training-specific layers.
     fn set_evaluation_mode(&mut self);
diff --git a/crates/redeem-properties/src/models/ms2_bert_model.rs b/crates/redeem-properties/src/models/ms2_bert_model.rs
index 42b4d57..32a634b 100644
--- a/crates/redeem-properties/src/models/ms2_bert_model.rs
+++ b/crates/redeem-properties/src/models/ms2_bert_model.rs
@@ -68,7 +68,7 @@ impl ModelInterface for MS2BertModel {
     /// Create a new MS2BERT model from the given model and constants files.
     fn new<P: AsRef<Path>>(
         model_path: P,
-        constants_path: P,
+        constants_path: Option<P>,
         fixed_sequence_len: usize,
         num_frag_types: usize,
         num_modloss_types: usize,
@@ -82,8 +82,10 @@ impl ModelInterface for MS2BertModel {
 
         let var_store = VarBuilder::from_varmap(&varmap, DType::F32, &device);
 
-        let constants: ModelConstants =
-            parse_model_constants(constants_path.as_ref().to_str().unwrap())?;
+        let constants = match constants_path {
+            Some(path) => parse_model_constants(path.as_ref().to_str().unwrap())?,
+            None => ModelConstants::default(),
+        };
 
         // Load the mod_to_feature mapping
         let mod_to_feature = load_mod_to_feature(&constants)?;
@@ -459,7 +461,7 @@ mod tests {
         let constants_path =
             PathBuf::from("data/models/alphapeptdeep/generic/ms2.pth.model_const.yaml");
         let device = Device::Cpu;
-        let model = MS2BertModel::new(model_path, constants_path, 0, 8, 4, true, device).unwrap();
+        let model = MS2BertModel::new(model_path, Some(constants_path), 0, 8, 4, true, device).unwrap();
 
         println!("{:?}", model);
     }
@@ -470,7 +472,7 @@ mod tests {
         let constants_path =
             PathBuf::from("data/models/alphapeptdeep/generic/ms2.pth.model_const.yaml");
         let device = Device::Cpu;
-        let model = MS2BertModel::new(model_path, constants_path, 0, 8, 4, true, device).unwrap();
+        let model = MS2BertModel::new(model_path, Some(constants_path), 0, 8, 4, true, device).unwrap();
 
         let peptide_sequences = "AGHCEWQMKYR";
         let mods = "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M";
@@ -495,7 +497,7 @@ mod tests {
         let constants_path =
             PathBuf::from("data/models/alphapeptdeep/generic/ms2.pth.model_const.yaml");
         let device = Device::Cpu;
-        let model = MS2BertModel::new(model_path, constants_path, 0, 8, 4, true, device).unwrap();
+        let model = MS2BertModel::new(model_path, Some(constants_path), 0, 8, 4, true, device).unwrap();
 
         let peptide_sequences = vec!["AGHCEWQMKYR".to_string(), "AGHCEWQMKYR".to_string()];
         let mods = vec![
diff --git a/crates/redeem-properties/src/models/ms2_model.rs b/crates/redeem-properties/src/models/ms2_model.rs
index f4ed7e1..ea3c489 100644
--- a/crates/redeem-properties/src/models/ms2_model.rs
+++ b/crates/redeem-properties/src/models/ms2_model.rs
@@ -32,7 +32,7 @@ impl Clone for MS2ModelWrapper {
 impl MS2ModelWrapper {
     pub fn new<P: AsRef<Path>>(model_path: P, constants_path: P, arch: &str, device: Device) -> Result<Self> {
         let model: Box<dyn ModelInterface> = match arch {
-            "ms2_bert" => Box::new(MS2BertModel::new(model_path, constants_path, 0, 8, 4, true, device)?),
+            "ms2_bert" => Box::new(MS2BertModel::new(model_path, Some(constants_path), 0, 8, 4, true, device)?),
             // Add other cases here as you implement more models
             _ => return Err(anyhow!("Unsupported MS2 model architecture: {}", arch)),
         };
diff --git a/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs b/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
index e73f6f4..1cb99c7 100644
--- a/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
+++ b/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
@@ -14,6 +14,7 @@ use crate::utils::peptdeep_utils::{
     load_mod_to_feature,
     parse_model_constants, ModelConstants,
 };
+use crate::utils::utils::get_tensor_stats;
 
 
 // Main Model Struct
@@ -55,7 +56,7 @@ impl ModelInterface for RTCNNLSTMModel {
     /// Create a new RTCNNLSTMModel from the given model and constants files.
     fn new<P: AsRef<Path>>(
         model_path: P,
-        constants_path: P,
+        constants_path: Option<P>,
         _fixed_sequence_len: usize,
         _num_frag_types: usize,
         _num_modloss_types: usize,
@@ -69,8 +70,10 @@ impl ModelInterface for RTCNNLSTMModel {
         create_var_map(&mut varmap, tensor_data, &device)?;
         let var_store = candle_nn::VarBuilder::from_varmap(&varmap, DType::F32, &device);
 
-        let constants: ModelConstants =
-            parse_model_constants(constants_path.as_ref().to_str().unwrap())?;
+        let constants = match constants_path {
+            Some(path) => parse_model_constants(path.as_ref().to_str().unwrap())?,
+            None => ModelConstants::default(),
+        };
 
         // Load the mod_to_feature mapping
         let mod_to_feature = load_mod_to_feature(&constants)?;
@@ -118,10 +121,20 @@ impl ModelInterface for RTCNNLSTMModel {
         let (_batch_size, _seq_len, _) = xs.shape().dims3()?;
     
         let aa_indices_out = xs.i((.., .., 0))?;
+        let (mean, min, max) = get_tensor_stats(&aa_indices_out)?;
+        log::debug!("[RTCNNLSTMModel] aa_indices_out stats - min: {min}, max: {max}, mean: {mean}");
         let mod_x_out = xs.i((.., .., 1..1 + MOD_FEATURE_SIZE))?;
+        let (mean, min, max) = get_tensor_stats(&mod_x_out)?;
+        log::debug!("[RTCNNLSTMModel] mod_x_out stats - min: {min}, max: {max}, mean: {mean}");
         let x = self.rt_encoder.forward(&aa_indices_out, &mod_x_out)?;
+        let (mean, min, max) = get_tensor_stats(&x)?;
+        log::debug!("[RTCNNLSTMModel] x stats - min: {min}, max: {max}, mean: {mean}");
         let x = self.dropout.forward(&x, self.is_training)?;
+        let (mean, min, max) = get_tensor_stats(&x)?;
+        log::debug!("[RTCNNLSTMModel] x after dropout stats - min: {min}, max: {max}, mean: {mean}");
         let x = self.rt_decoder.forward(&x)?;
+        let (mean, min, max) = get_tensor_stats(&x)?;
+        log::debug!("[RTCNNLSTMModel] x after decoder stats - min: {min}, max: {max}, mean: {mean}");
         let result = x.squeeze(1)?;
 
         Ok(result)
@@ -310,7 +323,7 @@ mod tests {
         let constants_path =
             PathBuf::from("data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
         let device = Device::Cpu;
-        let model = RTCNNLSTMModel::new(&model_path, &constants_path, 0, 8, 4, true, device).unwrap(); 
+        let model = RTCNNLSTMModel::new(&model_path, Some(&constants_path), 0, 8, 4, true, device).unwrap(); 
 
         let peptide_sequences = "AGHCEWQMKYR";
         let mods = "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M";
@@ -336,7 +349,7 @@ mod tests {
         let constants_path = PathBuf::from("data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
         let device = Device::Cpu;
 
-        let model = RTCNNLSTMModel::new(&model_path, &constants_path, 0, 8, 4, true, device.clone()).unwrap();
+        let model = RTCNNLSTMModel::new(&model_path, Some(&constants_path), 0, 8, 4, true, device.clone()).unwrap();
 
         // Batched input
         let peptide_sequences = vec![
@@ -383,7 +396,7 @@ mod tests {
         let constants_path =
             PathBuf::from("data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
         let device = /* Assuming Device is defined */ Device::new_cuda(0).unwrap_or(/* assuming Device::Cpu is defined */ Device::Cpu); // Replace with actual Device code.
-        let result =  RTCNNLSTMModel::new(&model_path, &constants_path, 0, 8, 4, true, device); 
+        let result =  RTCNNLSTMModel::new(&model_path, Some(&constants_path), 0, 8, 4, true, device); 
         let mut model = result.unwrap();
     
         // Test prediction with a few peptides after fine-tuning
diff --git a/crates/redeem-properties/src/models/rt_cnn_transformer_model.rs b/crates/redeem-properties/src/models/rt_cnn_transformer_model.rs
index 789c127..6008137 100644
--- a/crates/redeem-properties/src/models/rt_cnn_transformer_model.rs
+++ b/crates/redeem-properties/src/models/rt_cnn_transformer_model.rs
@@ -15,6 +15,7 @@ use crate::utils::peptdeep_utils::{
     load_mod_to_feature,
     parse_model_constants, ModelConstants,
 };
+use crate::utils::utils::get_tensor_stats;
 
 
 // Main Model Struct
@@ -52,9 +53,9 @@ impl ModelInterface for RTCNNTFModel {
         let mut varmap = VarMap::new();
         let varbuilder = VarBuilder::from_varmap(&varmap, DType::F32, &device);
 
-
+        log::trace!("[RTCNNTFModel] Initializing rt_encoder");
         let rt_encoder = Encoder26aaModCnnTransformerAttnSum::new(
-            &varbuilder,
+            &varbuilder.pp("rt_encoder"),
             8,     // mod_hidden_dim
             140,   // hidden_dim
             256,   // ff_dim
@@ -65,12 +66,13 @@ impl ModelInterface for RTCNNTFModel {
             &device
         )?;
 
-        let rt_decoder = DecoderLinear::new(140, 1, &varbuilder)?;
+        log::trace!("[RTCNNTFModel] Initializing rt_decoder");
+        let rt_decoder = DecoderLinear::new(140, 1, &varbuilder.pp("rt_decoder"))?;
         let constants = ModelConstants::default();
         let mod_to_feature = load_mod_to_feature(&constants)?;
 
         Ok(Self {
-            var_store: VarBuilder::from_varmap(&varmap, DType::F32, &device),
+            var_store: varbuilder,
             varmap,
             constants,
             device,
@@ -85,7 +87,7 @@ impl ModelInterface for RTCNNTFModel {
     /// Create a new RTCNNTFModel from the given model and constants files.
     fn new<P: AsRef<Path>>(
         model_path: P,
-        constants_path: P,
+        constants_path: Option<P>,
         _fixed_sequence_len: usize,
         _num_frag_types: usize,
         _num_modloss_types: usize,
@@ -97,8 +99,10 @@ impl ModelInterface for RTCNNTFModel {
         create_var_map(&mut varmap, tensor_data, &device)?;
         let var_store = candle_nn::VarBuilder::from_varmap(&varmap, DType::F32, &device);
 
-        let constants: ModelConstants =
-            parse_model_constants(constants_path.as_ref().to_str().unwrap())?;
+        let constants = match constants_path {
+            Some(path) => parse_model_constants(path.as_ref().to_str().unwrap())?,
+            None => ModelConstants::default(),
+        };
 
         let mod_to_feature = load_mod_to_feature(&constants)?;
         let dropout = Dropout::new(0.1);
@@ -146,20 +150,42 @@ impl ModelInterface for RTCNNTFModel {
             dropout,
             rt_encoder,
             rt_decoder,
-            is_training: true,
+            is_training: false,
         })
     }
 
     fn forward(&self, xs: &Tensor) -> Result<Tensor, candle_core::Error> {
         let aa_indices_out = xs.i((.., .., 0))?;
-        let mod_x_out = xs.i((.., .., 1..1 + MOD_FEATURE_SIZE))?;
+        let (mean, min, max) = get_tensor_stats(&aa_indices_out)?;
+        log::debug!("[RTCNNTFModel] aa_indices_out stats - min: {min}, max: {max}, mean: {mean}");
+        let mod_x_out = xs.i((.., .., 1..1 + MOD_FEATURE_SIZE))?;    
+         
+        if mod_x_out.shape().elem_count() == 0  {
+            log::error!("[RTCNNTFModel] mod_x_out is empty! shape: {:?}", mod_x_out.shape());
+        } else {
+            match get_tensor_stats(&mod_x_out) {
+                Ok((mean, min, max)) => {
+                    log::debug!("[RTCNNTFModel] mod_x_out stats - min: {min}, max: {max}, mean: {mean}");
+                }
+                Err(e) => {
+                    log::error!("[RTCNNTFModel] Failed to compute stats for mod_x_out: {:?}", e);
+                }
+            }
+        }        
+        
         log::trace!("[RTCNNTFModel] aa_indices_out: {:?}, mod_x_out: {:?}", aa_indices_out, mod_x_out);
         let x = self.rt_encoder.forward(&aa_indices_out, &mod_x_out)?;
         log::trace!("[RTCNNTFModel] x.shape after rt_encoder: {:?}", x.shape());
+        let (mean, min, max) = get_tensor_stats(&x)?;
+        log::debug!("[RTCNNTFModel] rt_encoder output stats - min: {min}, max: {max}, mean: {mean}");
         let x = self.dropout.forward(&x, self.is_training)?;
         log::trace!("[RTCNNTFModel] x.shape after dropout: {:?}", x.shape());
+        let (mean, min, max) = get_tensor_stats(&x)?;
+        log::debug!("[RTCNNTFModel] dropout output stats - min: {min}, max: {max}, mean: {mean}");
         let x = self.rt_decoder.forward(&x)?;
         log::trace!("[RTCNNTFModel] x.shape after rt_decoder: {:?}", x.shape());
+        let (mean, min, max) = get_tensor_stats(&x)?;
+        log::debug!("[RTCNNTFModel] rt_decoder output stats - min: {min}, max: {max}, mean: {mean}");
         Ok(x.squeeze(1)?)
     }
 
@@ -261,7 +287,7 @@ mod tests {
         let constants_path =
             PathBuf::from("data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
         let device = Device::Cpu;
-        let model = RTCNNLSTMModel::new(&model_path, &constants_path, 0, 8, 4, true, device).unwrap(); 
+        let model = RTCNNLSTMModel::new(&model_path, Some(&constants_path), 0, 8, 4, true, device).unwrap(); 
 
         let peptide_sequences = "AGHCEWQMKYR";
         let mods = "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M";
@@ -287,7 +313,7 @@ mod tests {
         let constants_path = PathBuf::from("data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
         let device = Device::Cpu;
 
-        let model = RTCNNLSTMModel::new(&model_path, &constants_path, 0, 8, 4, true, device.clone()).unwrap();
+        let model = RTCNNLSTMModel::new(&model_path, Some(&constants_path), 0, 8, 4, true, device.clone()).unwrap();
 
         // Batched input
         let peptide_sequences = vec![
diff --git a/crates/redeem-properties/src/models/rt_model.rs b/crates/redeem-properties/src/models/rt_model.rs
index dd9bcab..f4a9643 100644
--- a/crates/redeem-properties/src/models/rt_model.rs
+++ b/crates/redeem-properties/src/models/rt_model.rs
@@ -1,10 +1,13 @@
 // rt_model.rs
 
 use std::path::Path;
+use std::ops::Deref;
 use candle_core::{Device, Tensor};
 use anyhow::{Result, anyhow};
+use candle_nn::VarMap;
 use crate::models::model_interface::{ModelInterface,PredictionResult};
 use crate::models::rt_cnn_lstm_model::RTCNNLSTMModel;
+use crate::models::rt_cnn_transformer_model::RTCNNTFModel;
 use crate::utils::data_handling::PeptideData;
 use std::collections::HashMap;
 use crate::utils::peptdeep_utils::ModificationMap;
@@ -33,10 +36,10 @@ impl Clone for RTModelWrapper {
 
 
 impl RTModelWrapper {
-    pub fn new<P: AsRef<Path>>(model_path: P, constants_path: P, arch: &str, device: Device) -> Result<Self> {
+    pub fn new<P: AsRef<Path>>(model_path: P, constants_path: Option<P>, arch: &str, device: Device) -> Result<Self> {
         let model: Box<dyn ModelInterface> = match arch {
             "rt_cnn_lstm" => Box::new(RTCNNLSTMModel::new(model_path, constants_path, 0, 8, 4, true, device)?),
-            // Add other cases here as you implement more models
+            "rt_cnn_tf" => Box::new(RTCNNTFModel::new(model_path, constants_path, 0, 8, 4, true, device)?),
             _ => return Err(anyhow!("Unsupported RT model architecture: {}", arch)),
         };
 
@@ -47,10 +50,18 @@ impl RTModelWrapper {
         self.model.predict(peptide_sequence, mods, mod_sites, None, None, None)
     }
 
+    pub fn train(&mut self, training_data: &Vec<PeptideData>, val_data: Option<&Vec<PeptideData>>, modifications: HashMap<(String, Option<char>), ModificationMap>, batch_size: usize, val_batch_size: usize, learning_rate: f64, epochs: usize, early_stopping_patience: usize) -> Result<Vec<(usize, f32, Option<f32>, f32, Option<f32>)>> {
+        self.model.train(training_data, val_data, modifications, batch_size, val_batch_size, learning_rate, epochs, early_stopping_patience)
+    }
+
     pub fn fine_tune(&mut self, training_data: &Vec<PeptideData>, modifications: HashMap<(String, Option<char>), ModificationMap>, batch_size:usize, learning_rate: f64, epochs: usize) -> Result<()> {
         self.model.fine_tune(training_data, modifications, batch_size, learning_rate, epochs)
     }
 
+    pub fn inference(&mut self, inference_data: &Vec<PeptideData>, batch_size: usize, modifications: HashMap<(String, Option<char>), ModificationMap>, rt_norm_params: Option<(f32, f32)>,) -> Result<Vec<PeptideData>> {
+        self.model.inference(inference_data, batch_size, modifications, rt_norm_params)
+    }
+
     pub fn set_evaluation_mode(&mut self) {
         self.model.set_evaluation_mode()
     }
@@ -73,7 +84,7 @@ impl RTModelWrapper {
 }
 
 // Public API Function to load a new RT model
-pub fn load_retention_time_model<P: AsRef<Path>>(model_path: P, constants_path: P, arch: &str, device: Device) -> Result<RTModelWrapper> {
+pub fn load_retention_time_model<P: AsRef<Path>>(model_path: P, constants_path: Option<P>, arch: &str, device: Device) -> Result<RTModelWrapper> {
     RTModelWrapper::new(model_path, constants_path, arch, device)
 }
 
diff --git a/crates/redeem-properties/src/utils/data_handling.rs b/crates/redeem-properties/src/utils/data_handling.rs
index 84f26d0..11520ce 100644
--- a/crates/redeem-properties/src/utils/data_handling.rs
+++ b/crates/redeem-properties/src/utils/data_handling.rs
@@ -1,5 +1,5 @@
 
-
+#[derive(Clone)]
 pub struct PeptideData {
     pub sequence: String,
     pub charge: Option<i32>,
diff --git a/crates/redeem-properties/src/utils/logging.rs b/crates/redeem-properties/src/utils/logging.rs
index 29c3f0f..9b6d322 100644
--- a/crates/redeem-properties/src/utils/logging.rs
+++ b/crates/redeem-properties/src/utils/logging.rs
@@ -86,7 +86,7 @@ impl Progress {
         let new_count = self.count.fetch_add(1, Ordering::AcqRel) + 1;
     
         if new_count > self.total {
-            println!("⚠️ WARNING: Extra update detected! Skipping...");
+            log::trace!("⚠️ WARNING: Progress logger received and extra update! This is likely because the logger was initialized with an incorrect total counter, and the process is iterating beyond that counter.");
             return; // Prevent overflow
         }
 
diff --git a/crates/redeem-properties/src/utils/peptdeep_utils.rs b/crates/redeem-properties/src/utils/peptdeep_utils.rs
index 8b274b5..f477527 100644
--- a/crates/redeem-properties/src/utils/peptdeep_utils.rs
+++ b/crates/redeem-properties/src/utils/peptdeep_utils.rs
@@ -123,6 +123,7 @@ impl Default for ModelConstants {
                 "Lumos".into(),
                 "timsTOF".into(),
                 "SciexTOF".into(),
+                "ThermoTOF".into(),
             ],
             max_instrument_num: 8,
             mod_elements: vec![
diff --git a/crates/redeem-properties/src/utils/utils.rs b/crates/redeem-properties/src/utils/utils.rs
index 34572a6..5b21291 100644
--- a/crates/redeem-properties/src/utils/utils.rs
+++ b/crates/redeem-properties/src/utils/utils.rs
@@ -1,4 +1,4 @@
-use candle_core::Device;
+use candle_core::{Device, Tensor};
 use candle_core::utils::{cuda_is_available, metal_is_available};
 use anyhow::{Result, anyhow};
 use std::f64::consts::PI;
@@ -150,6 +150,29 @@ pub fn device(cpu: bool) -> Result<Device> {
 }
 
 
+pub fn get_tensor_stats(x: &Tensor) -> Result<(f32, f32, f32), candle_core::Error> {
+    // let flat: Vec<f32> = match x.rank() {
+    //     0 => vec![x.to_scalar::<f32>()?],
+    //     1 => x.to_vec1::<f32>()?,
+    //     2 => x.to_vec2::<f32>()?.into_iter().flatten().collect(),
+    //     3 => x.to_vec3::<f32>()?.into_iter().flatten().flatten().collect(),
+    //     _ => return Err(candle_core::Error::Msg(format!("Unsupported tensor rank: {}", x.rank()))),
+    // };
+    let flat = x.flatten_all()?.to_vec1::<f32>()?;
+
+    if flat.is_empty() {
+        return Err(candle_core::Error::Msg("Tensor has no elements to compute stats.".to_string()));
+    }
+
+    let mean = flat.iter().copied().sum::<f32>() / flat.len() as f32;
+    let min = flat.iter().copied().fold(f32::INFINITY, f32::min);
+    let max = flat.iter().copied().fold(f32::NEG_INFINITY, f32::max);
+
+    Ok((mean, min, max))
+}
+
+
+
 #[cfg(test)]
 mod tests {
     use super::*;

From bf62774fe0e032a087742ed23094fe744f26fa0d Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Sat, 10 May 2025 21:44:30 -0400
Subject: [PATCH 23/75] feat: Add inference functionality to redeem-cli

---
 crates/redeem-cli/src/main.rs                 | 72 ++++++++++++++++---
 .../src/properties/inference/inference.rs     | 52 ++++++++++++++
 .../src/properties/inference/input.rs         | 69 ++++++++++++++++++
 .../src/properties/inference/mod.rs           |  3 +
 .../src/properties/inference/output.rs        | 46 ++++++++++++
 5 files changed, 233 insertions(+), 9 deletions(-)
 create mode 100644 crates/redeem-cli/src/properties/inference/inference.rs
 create mode 100644 crates/redeem-cli/src/properties/inference/input.rs
 create mode 100644 crates/redeem-cli/src/properties/inference/mod.rs
 create mode 100644 crates/redeem-cli/src/properties/inference/output.rs

diff --git a/crates/redeem-cli/src/main.rs b/crates/redeem-cli/src/main.rs
index a6f8874..d9ea4dd 100644
--- a/crates/redeem-cli/src/main.rs
+++ b/crates/redeem-cli/src/main.rs
@@ -3,8 +3,10 @@ use log::LevelFilter;
 use std::path::PathBuf;
 use anyhow::Result;
 
-use redeem_cli::properties::train::input::{self, PropertyTrainConfig};
+use redeem_cli::properties::train::input::PropertyTrainConfig;
 use redeem_cli::properties::train::trainer;
+use redeem_cli::properties::inference::input::PropertyInferenceConfig;
+use redeem_cli::properties::inference::inference;
 
 fn main() -> Result<()> {
     env_logger::Builder::default()
@@ -79,13 +81,52 @@ fn main() -> Result<()> {
                                     "ccs_cnn_lstm",
                                 ])
                                 .required(false)
-                        )                        
-                        .help_template(
-                            "{usage-heading} {usage}\n\n\
-                             {about-with-newline}\n\
-                             Written by {author-with-newline}Version {version}\n\n\
-                             {all-args}{after-help}",
+                        )
+                        .arg(
+                            Arg::new("checkpoint_file")
+                                .short('c')
+                                .long("checkpoint_file")
+                                .value_parser(clap::builder::NonEmptyStringValueParser::new())
+                                .help(
+                                    "File path of the checkpoint safetensors file to load. \
+                                     Overrides the checkpoint_file specified in the configuration file.",
+                                )
+                                .value_hint(ValueHint::FilePath),
                         ),
+                )
+                .subcommand(Command::new("inference")
+                    .about("Perform inference on new data using a trained model")
+                    .arg(
+                        Arg::new("config")
+                            .help("Path to training configuration file")
+                            .required(true)
+                            .value_parser(clap::value_parser!(PathBuf))
+                            .value_hint(ValueHint::FilePath),
+                    )
+                    .arg(
+                        Arg::new("model_path")
+                            .short('m')
+                            .long("model")
+                            .help("Path to the trained model file (*.safetensors)")
+                            .value_parser(clap::value_parser!(PathBuf))
+                            .value_hint(ValueHint::FilePath),
+                    )
+                    .arg(
+                        Arg::new("inference_data")
+                            .short('d')
+                            .long("inference_data")
+                            .help("Path to the input data file")
+                            .value_parser(clap::value_parser!(PathBuf))
+                            .value_hint(ValueHint::FilePath),
+                    )
+                    .arg(
+                        Arg::new("output_file")
+                            .short('o')
+                            .long("output_file")
+                            .help("Path to the output file for predictions (*.tsv or *.csv)")
+                            .value_parser(clap::value_parser!(PathBuf))
+                            .value_hint(ValueHint::FilePath),
+                    )
                 ),
         )
         .subcommand(
@@ -103,6 +144,12 @@ fn main() -> Result<()> {
                         ),
                 ),
         )
+        .help_template(
+            "{usage-heading} {usage}\n\n\
+             {about-with-newline}\n\
+             Written by {author-with-newline}Version {version}\n\n\
+             {all-args}{after-help}",
+        )
         .get_matches();
 
     match matches.subcommand() {
@@ -116,10 +163,17 @@ fn handle_properties(matches: &ArgMatches) -> Result<()> {
     match matches.subcommand() {
         Some(("train", train_matches)) => {
             let config_path: &PathBuf = train_matches.get_one("config").unwrap();
-            println!("[ReDeeM::Properties] Training from config: {:?}", config_path);
-            let params: PropertyTrainConfig = input::PropertyTrainConfig::from_arguments(config_path, train_matches)?;
+            log::info!("[ReDeeM::Properties] Training from config: {:?}", config_path);
+            let params: PropertyTrainConfig = PropertyTrainConfig::from_arguments(config_path, train_matches)?;
             let _ = trainer::run_training(&params);
             Ok(())
+        },
+        Some(("inference", inference_matches)) => {
+            let config_path: &PathBuf = inference_matches.get_one("config").unwrap();
+            log::info!("[ReDeeM::Properties] Inference using config: {:?}", config_path);
+            let params: PropertyInferenceConfig = PropertyInferenceConfig::from_arguments(config_path, inference_matches)?;
+            let _ = inference:: run_inference(&params);
+            Ok(())
         }
         _ => unreachable!(),
     }
diff --git a/crates/redeem-cli/src/properties/inference/inference.rs b/crates/redeem-cli/src/properties/inference/inference.rs
new file mode 100644
index 0000000..876af7e
--- /dev/null
+++ b/crates/redeem-cli/src/properties/inference/inference.rs
@@ -0,0 +1,52 @@
+use anyhow::{Context, Result};
+use redeem_properties::utils::data_handling::PeptideData;
+use redeem_properties::utils::peptdeep_utils::load_modifications;
+use redeem_properties::utils::utils::get_device;
+use redeem_properties::models::rt_model::load_retention_time_model;
+
+use crate::properties::load_data::load_peptide_data;
+use crate::properties::util::write_bytes_to_file;
+use crate::properties::inference::input::PropertyInferenceConfig;
+use crate::properties::inference::output::write_peptide_data;
+
+pub fn run_inference(config: &PropertyInferenceConfig) -> Result<()> {
+
+    // Load inference data
+    let (inference_data, norm_factor) = load_peptide_data(&config.inference_data, Some(config.nce), Some(config.instrument.clone()), true)?;
+    log::info!("Loaded {} peptides", inference_data.len());
+
+    // Dispatch model training based on architecture
+    let model_arch = config.model_arch.as_str();
+    let device = get_device(&config.device)?;
+
+    let mut model = load_retention_time_model(
+        &config.model_path,
+        None,
+        &config.model_arch,
+        device.clone(),
+    )?;
+
+    let modifications = load_modifications().context("Failed to load modifications")?;
+
+    let start_time = std::time::Instant::now();
+    model.set_evaluation_mode();
+    let inference_results: Vec<PeptideData> = model.inference(
+        &inference_data,
+        config.batch_size,
+        modifications,
+        norm_factor,
+    )?;
+    log::info!("Inference completed in {:?}", start_time.elapsed());
+
+    
+    log::info!("Predictions saved to: {}", config.output_file);
+    write_peptide_data(&inference_results, &config.output_file)?;
+
+    let path = "redeem_inference_config.json";
+    let json = serde_json::to_string_pretty(&config)?;
+    println!("{}", json);
+    let bytes = serde_json::to_vec_pretty(&config)?;
+    write_bytes_to_file(path, &bytes)?;
+
+    Ok(())
+}
diff --git a/crates/redeem-cli/src/properties/inference/input.rs b/crates/redeem-cli/src/properties/inference/input.rs
new file mode 100644
index 0000000..0e6119e
--- /dev/null
+++ b/crates/redeem-cli/src/properties/inference/input.rs
@@ -0,0 +1,69 @@
+use serde::{Deserialize, Serialize};
+use std::fs;
+use std::path::PathBuf;
+use clap::ArgMatches;
+use anyhow::{Context, Result};
+
+use crate::properties::util::validate_tsv_or_csv_file;
+
+#[derive(Debug, Deserialize, Serialize, Clone)]
+pub struct PropertyInferenceConfig {
+    pub model_path: String,
+    pub inference_data: String,
+    pub output_file: String,
+    pub model_arch: String,
+    pub device: String,
+    pub batch_size: usize,
+    pub instrument: String,
+    pub nce: i32,
+}
+
+impl Default for PropertyInferenceConfig {
+    fn default() -> Self {
+        PropertyInferenceConfig {
+            model_path: String::new(),
+            inference_data: String::new(),
+            output_file: String::from("redeem_inference.csv"),
+            model_arch: String::from("rt_cnn_tf"),
+            device: String::from("cpu"),
+            batch_size: 64,
+            instrument: String::from("QE"),
+            nce: 20,
+        }
+    }
+}
+
+impl PropertyInferenceConfig {
+    pub fn from_arguments(config_path: &PathBuf, matches: &ArgMatches) -> Result<Self> {
+        let config_json = fs::read_to_string(config_path)
+            .with_context(|| format!("Failed to read config file: {:?}", config_path))?;
+
+        let mut config: PropertyInferenceConfig = serde_json::from_str(&config_json)
+            .unwrap_or_else(|_| PropertyInferenceConfig::default());
+
+        // Apply CLI overrides
+        if let Some(model_path) = matches.get_one::<String>("model_path") {
+            config.model_path = model_path.clone();
+        } else {
+            config.model_path = config.model_path.clone();
+        }
+
+        if let Some(inference_data) = matches.get_one::<String>("inference_data") {
+            validate_tsv_or_csv_file(inference_data)?;
+            config.inference_data = inference_data.clone().to_string();
+        } else {
+            validate_tsv_or_csv_file(&config.inference_data)?;
+        }
+
+        if let Some(output_file) = matches.get_one::<String>("output_file") {
+            config.output_file = output_file.clone();
+        }
+
+        if let Some(model_arch) = matches.get_one::<String>("model_arch") {
+            config.model_arch = model_arch.clone();
+        }
+
+        Ok(config)
+    }
+}
+
diff --git a/crates/redeem-cli/src/properties/inference/mod.rs b/crates/redeem-cli/src/properties/inference/mod.rs
new file mode 100644
index 0000000..54c03a5
--- /dev/null
+++ b/crates/redeem-cli/src/properties/inference/mod.rs
@@ -0,0 +1,3 @@
+pub mod inference;
+pub mod input;
+pub mod output;
\ No newline at end of file
diff --git a/crates/redeem-cli/src/properties/inference/output.rs b/crates/redeem-cli/src/properties/inference/output.rs
new file mode 100644
index 0000000..2e91c97
--- /dev/null
+++ b/crates/redeem-cli/src/properties/inference/output.rs
@@ -0,0 +1,46 @@
+use std::fs::File;
+use std::io::{BufWriter, Write};
+use anyhow::{Result, Context};
+use std::path::Path;
+use redeem_properties::utils::data_handling::PeptideData;
+
+/// Write a vector of PeptideData to a CSV or TSV file based on file extension.
+pub fn write_peptide_data<P: AsRef<Path>>(data: &[PeptideData], output_path: P) -> Result<()> {
+    let path = output_path.as_ref();
+    let extension = path.extension().and_then(|ext| ext.to_str()).unwrap_or("csv");
+    let delimiter = match extension {
+        "tsv" => '\t',
+        _ => ',',
+    };
+
+    let file = File::create(path).with_context(|| format!("Failed to create output file: {:?}", path))?;
+    let mut writer = csv::WriterBuilder::new()
+        .delimiter(delimiter as u8)
+        .from_writer(BufWriter::new(file));
+
+    // Write headers
+    writer.write_record(&["sequence", "charge", "nce", "instrument", "retention_time", "ion_mobility", "ms2_intensities"])?;
+
+    for entry in data {
+        let ms2_str = entry.ms2_intensities.as_ref()
+            .map(|intensities| {
+                intensities.iter()
+                    .map(|v| v.iter().map(|f| f.to_string()).collect::<Vec<_>>().join(","))
+                    .collect::<Vec<_>>().join("|")
+            })
+            .unwrap_or_default();
+
+        writer.write_record(&[
+            &entry.sequence,
+            &entry.charge.map_or(String::new(), |c| c.to_string()),
+            &entry.nce.map_or(String::new(), |n| n.to_string()),
+            &entry.instrument.clone().unwrap_or_default(),
+            &entry.retention_time.map_or(String::new(), |r| format!("{:.4}", r)),
+            &entry.ion_mobility.map_or(String::new(), |im| format!("{:.4}", im)),
+            &ms2_str,
+        ])?;
+    }
+
+    writer.flush()?;
+    Ok(())
+}
\ No newline at end of file

From eab57a03e1134b1109e361ce4facadf5e1bdf793 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Sat, 10 May 2025 21:44:59 -0400
Subject: [PATCH 24/75] refactor: Add new modules for training and loading data
 in redeem-cli

---
 crates/redeem-cli/Cargo.toml                  |   4 +
 crates/redeem-cli/src/properties/load_data.rs |  68 +++++---
 crates/redeem-cli/src/properties/mod.rs       |   4 +-
 .../redeem-cli/src/properties/train/input.rs  |  31 ++--
 crates/redeem-cli/src/properties/train/mod.rs |  16 +-
 .../redeem-cli/src/properties/train/plot.rs   |  79 ++++++++++
 .../src/properties/train/trainer.rs           | 145 ++++++++++++++++--
 crates/redeem-cli/src/properties/util.rs      |  26 ++++
 8 files changed, 319 insertions(+), 54 deletions(-)
 create mode 100644 crates/redeem-cli/src/properties/train/plot.rs
 create mode 100644 crates/redeem-cli/src/properties/util.rs

diff --git a/crates/redeem-cli/Cargo.toml b/crates/redeem-cli/Cargo.toml
index 8e2fbd7..f130987 100644
--- a/crates/redeem-cli/Cargo.toml
+++ b/crates/redeem-cli/Cargo.toml
@@ -18,6 +18,10 @@ anyhow = "1.0"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
 csv = "1.1"
+report-builder = "0.1.0"
+maud = "0.27.0"
+plotly = "0.12.1"
+rand = "0.8"
 
 [dependencies.candle-core]
 version = "0.8.4"
diff --git a/crates/redeem-cli/src/properties/load_data.rs b/crates/redeem-cli/src/properties/load_data.rs
index 7be1cfb..0c423ef 100644
--- a/crates/redeem-cli/src/properties/load_data.rs
+++ b/crates/redeem-cli/src/properties/load_data.rs
@@ -5,18 +5,17 @@ use anyhow::{Result, Context};
 use csv::ReaderBuilder;
 use redeem_properties::utils::data_handling::PeptideData;
 
-/// Load peptide training data from a CSV or TSV file.
+/// Load peptide training data from a CSV or TSV file and optionally normalize RT.
 ///
-/// Automatically determines the delimiter and supports RT models.
-/// Currently expects columns: "sequence", "retention time" (others optional).
-///
-/// # Arguments
-/// * `path` - Path to the input CSV/TSV file
-///
-/// # Returns
-/// Vector of parsed `PeptideData` records
-pub fn load_peptide_data<P: AsRef<Path>>(path: P) -> Result<Vec<PeptideData>> {
-    let file = File::open(&path).with_context(|| format!("Failed to open file: {:?}", path.as_ref()))?;
+/// Returns both the peptide vector and optionally (mean, std) of retention times.
+pub fn load_peptide_data<P: AsRef<Path>>(
+    path: P,
+    nce: Option<i32>,
+    instrument: Option<String>,
+    normalize_rt: bool,
+) -> Result<(Vec<PeptideData>, Option<(f32, f32)>)> {
+    let file = File::open(&path)
+        .with_context(|| format!("Failed to open file: {:?}", path.as_ref()))?;
     let reader = BufReader::new(file);
 
     let is_tsv = path.as_ref().extension().map(|e| e == "tsv").unwrap_or(false);
@@ -28,8 +27,9 @@ pub fn load_peptide_data<P: AsRef<Path>>(path: P) -> Result<Vec<PeptideData>> {
         .from_reader(reader);
 
     let headers = rdr.headers()?.clone();
-
     let mut peptides = Vec::new();
+    let mut rt_values = Vec::new();
+
     for result in rdr.records() {
         let record = result?;
 
@@ -46,24 +46,50 @@ pub fn load_peptide_data<P: AsRef<Path>>(path: P) -> Result<Vec<PeptideData>> {
             .get(headers.iter().position(|h| h == "charge").unwrap_or(usize::MAX))
             .and_then(|s| s.parse::<i32>().ok());
 
-        let nce = record
-            .get(headers.iter().position(|h| h == "nce").unwrap_or(usize::MAX))
-            .and_then(|s| s.parse::<i32>().ok());
+        let in_nce = nce.or_else(|| {
+            record
+                .get(headers.iter().position(|h| h == "nce").unwrap_or(usize::MAX))
+                .and_then(|s| s.parse::<i32>().ok())
+        });
 
-        let instrument = record
-            .get(headers.iter().position(|h| h == "instrument").unwrap_or(usize::MAX))
-            .map(|s| s.to_string());
+        let in_instrument = instrument.clone().or_else(|| {
+            record
+                .get(headers.iter().position(|h| h == "instrument").unwrap_or(usize::MAX))
+                .map(|s| s.to_string())
+        });
+
+        if let Some(rt) = retention_time {
+            rt_values.push(rt);
+        }
 
         peptides.push(PeptideData::new(
             &sequence,
             charge,
-            nce,
-            instrument.as_deref(),
+            in_nce,
+            in_instrument.as_deref(),
             retention_time,
             None,
             None,
         ));
     }
 
-    Ok(peptides)
+    if normalize_rt && !rt_values.is_empty() {
+        let mean = rt_values.iter().copied().sum::<f32>() / rt_values.len() as f32;
+        let std = (rt_values
+            .iter()
+            .map(|v| (v - mean).powi(2))
+            .sum::<f32>()
+            / rt_values.len() as f32)
+            .sqrt();
+
+        for peptide in &mut peptides {
+            if let Some(rt) = peptide.retention_time.as_mut() {
+                *rt = (*rt - mean) / std;
+            }
+        }
+
+        Ok((peptides, Some((mean, std))))
+    } else {
+        Ok((peptides, None))
+    }
 }
diff --git a/crates/redeem-cli/src/properties/mod.rs b/crates/redeem-cli/src/properties/mod.rs
index eb69af4..b53ed37 100644
--- a/crates/redeem-cli/src/properties/mod.rs
+++ b/crates/redeem-cli/src/properties/mod.rs
@@ -1,2 +1,4 @@
 pub mod train;
-pub mod load_data;
\ No newline at end of file
+pub mod inference;
+pub mod load_data;
+pub mod util;
diff --git a/crates/redeem-cli/src/properties/train/input.rs b/crates/redeem-cli/src/properties/train/input.rs
index fb5d3f3..58d3fd9 100644
--- a/crates/redeem-cli/src/properties/train/input.rs
+++ b/crates/redeem-cli/src/properties/train/input.rs
@@ -1,20 +1,25 @@
-use serde::Deserialize;
+use serde::{Deserialize, Serialize};
 use std::fs;
 use std::path::PathBuf;
 use clap::ArgMatches;
 use anyhow::{Context, Result};
 
-#[derive(Debug, Deserialize, Clone)]
+use crate::properties::util::validate_tsv_or_csv_file;
+
+#[derive(Debug, Deserialize, Serialize, Clone)]
 pub struct PropertyTrainConfig {
+    pub version: String,
     pub train_data: String,
     pub validation_data: Option<String>,
     pub output_file: String,
     pub model_arch: String,
     pub device: String,
     pub batch_size: usize,
+    pub validation_batch_size: Option<usize>,
     pub learning_rate: f32,
     pub epochs: usize,
     pub early_stopping_patience: usize,
+    pub checkpoint_file: Option<String>,
     pub instrument: String,
     pub nce: i32,
 }
@@ -22,15 +27,18 @@ pub struct PropertyTrainConfig {
 impl Default for PropertyTrainConfig {
     fn default() -> Self {
         PropertyTrainConfig {
+            version: clap::crate_version!().to_string(),
             train_data: String::new(),
             validation_data: None,
             output_file: String::from("rt_cnn_tf.safetensors"),
             model_arch: String::from("rt_cnn_tf"),
             device: String::from("cpu"),
             batch_size: 64,
+            validation_batch_size: None,
             learning_rate: 1e-3,
             epochs: 10,
             early_stopping_patience: 5,
+            checkpoint_file: None,
             instrument: String::from("QE"),
             nce: 20,
         }
@@ -68,23 +76,12 @@ impl PropertyTrainConfig {
             config.model_arch = model_arch.clone();
         }
 
+        if let Some(checkpoint_file) = matches.get_one::<String>("checkpoint_file") {
+            config.checkpoint_file = Some(checkpoint_file.clone());
+        }
+
         Ok(config)
     }
 }
 
 
-pub fn validate_tsv_or_csv_file(path: &str) -> Result<()> {
-    let pb = PathBuf::from(path);
-
-    let ext = pb.extension().and_then(|s| s.to_str()).map(|s| s.to_lowercase());
-    match ext.as_deref() {
-        Some("tsv") | Some("csv") => {}
-        _ => anyhow::bail!("File must have a .tsv or .csv extension: {}", path),
-    }
-
-    if !pb.exists() {
-        anyhow::bail!("File does not exist: {}", path);
-    }
-
-    Ok(())
-}
diff --git a/crates/redeem-cli/src/properties/train/mod.rs b/crates/redeem-cli/src/properties/train/mod.rs
index d60a05a..1b1dd9e 100644
--- a/crates/redeem-cli/src/properties/train/mod.rs
+++ b/crates/redeem-cli/src/properties/train/mod.rs
@@ -1,2 +1,16 @@
 pub mod input;
-pub mod trainer;
\ No newline at end of file
+pub mod trainer;
+pub mod plot;
+
+use rand::seq::SliceRandom;
+use rand::thread_rng;
+use redeem_properties::utils::data_handling::PeptideData;
+
+pub fn sample_peptides(peptides: &[PeptideData], n: usize) -> Vec<PeptideData> {
+    let mut rng = thread_rng();
+    let sample_size = n.min(peptides.len());
+    peptides
+        .choose_multiple(&mut rng, sample_size)
+        .cloned()
+        .collect()
+}
diff --git a/crates/redeem-cli/src/properties/train/plot.rs b/crates/redeem-cli/src/properties/train/plot.rs
new file mode 100644
index 0000000..f5b7b80
--- /dev/null
+++ b/crates/redeem-cli/src/properties/train/plot.rs
@@ -0,0 +1,79 @@
+use plotly::{Layout, Plot, Scatter};
+use plotly::common::{Fill, Mode, Title};
+
+pub fn plot_losses(
+    epoch_losses: &[(usize, f32, Option<f32>, f32, Option<f32>)]
+) -> Plot {
+    let epochs: Vec<_> = epoch_losses.iter().map(|(e, _, _, _, _)| *e as f64).collect();
+
+    let train_mean: Vec<_> = epoch_losses.iter().map(|(_, m, _, _, _)| *m as f64).collect();
+    let train_std: Vec<_> = epoch_losses.iter().map(|(_, _, _, std, _)| *std as f64).collect();
+    let train_upper: Vec<_> = train_mean.iter().zip(&train_std).map(|(m, s)| m + s).collect();
+    let train_lower: Vec<_> = train_mean.iter().zip(&train_std).map(|(m, s)| m - s).collect();
+
+    let val_mean: Vec<_> = epoch_losses.iter().map(|(_, _, val, _, _)| val.unwrap_or(f32::NAN) as f64).collect();
+    let val_std: Vec<_> = epoch_losses.iter().map(|(_, _, _, _, val_std)| val_std.unwrap_or(0.0) as f64).collect();
+    let val_upper: Vec<_> = val_mean.iter().zip(&val_std).map(|(m, s)| m + s).collect();
+    let val_lower: Vec<_> = val_mean.iter().zip(&val_std).map(|(m, s)| m - s).collect();
+
+    let mut plot = Plot::new();
+
+    // Training loss line
+    plot.add_trace(
+        Scatter::new(epochs.clone(), train_mean.clone())
+            .name("Train Loss")
+            .mode(Mode::Lines)
+            .line(plotly::common::Line::new().color("rgba(31, 119, 180, 1.0)")),
+    );
+
+    // Training loss band
+    let mut train_band_y = train_upper.clone();
+    let mut train_band_x = epochs.clone();
+    let mut lower_reversed: Vec<_> = train_lower.iter().cloned().rev().collect();
+    let mut x_reversed: Vec<_> = epochs.iter().cloned().rev().collect();
+    train_band_y.extend(lower_reversed);
+    train_band_x.extend(x_reversed);
+
+    plot.add_trace(
+        Scatter::new(train_band_x, train_band_y)
+            .name("Train ± σ")
+            .mode(Mode::Lines)
+            .fill(Fill::ToSelf)
+            .line(plotly::common::Line::new().width(0.0))
+            .fill_color("rgba(31, 119, 180, 0.2)")
+    );
+
+    // Validation loss line
+    plot.add_trace(
+        Scatter::new(epochs.clone(), val_mean.clone())
+            .name("Val Loss")
+            .mode(Mode::Lines)
+            .line(plotly::common::Line::new().color("rgba(255, 127, 14, 1.0)")),
+    );
+
+    // Validation loss band
+    let mut val_band_y = val_upper.clone();
+    let mut val_band_x = epochs.clone();
+    let mut val_lower_rev: Vec<_> = val_lower.iter().cloned().rev().collect();
+    let mut val_x_rev: Vec<_> = epochs.iter().cloned().rev().collect();
+    val_band_y.extend(val_lower_rev);
+    val_band_x.extend(val_x_rev);
+
+    plot.add_trace(
+        Scatter::new(val_band_x, val_band_y)
+            .name("Val ± σ")
+            .mode(Mode::Lines)
+            .fill(Fill::ToSelf)
+            .line(plotly::common::Line::new().width(0.0))
+            .fill_color("rgba(255, 127, 14, 0.2)")
+    );
+
+    plot.set_layout(
+        Layout::new()
+            .title("Training and Validation Loss Over Epochs")
+            .x_axis(plotly::layout::Axis::new().title("Epoch"))
+            .y_axis(plotly::layout::Axis::new().title("Loss"))
+    );
+
+    plot
+}
diff --git a/crates/redeem-cli/src/properties/train/trainer.rs b/crates/redeem-cli/src/properties/train/trainer.rs
index 2d23994..f16a4bf 100644
--- a/crates/redeem-cli/src/properties/train/trainer.rs
+++ b/crates/redeem-cli/src/properties/train/trainer.rs
@@ -1,30 +1,40 @@
 use anyhow::{Context, Result};
-use input::PropertyTrainConfig;
-use load_data::load_peptide_data;
 use redeem_properties::models::model_interface::ModelInterface;
+use redeem_properties::models::rt_model::load_retention_time_model;
 use redeem_properties::models::{rt_cnn_lstm_model::RTCNNLSTMModel, rt_cnn_transformer_model::RTCNNTFModel};
 use redeem_properties::utils::data_handling::PeptideData;
 use redeem_properties::utils::peptdeep_utils::load_modifications;
 use redeem_properties::utils::utils::get_device;
-use std::path::PathBuf;
-use candle_core::Device;
+use report_builder::{
+    plots::{plot_boxplot, plot_pp, plot_scatter, plot_score_histogram},
+    Report, ReportSection,
+};
+use maud::{html, PreEscaped};
 
+use input::PropertyTrainConfig;
+use load_data::load_peptide_data;
 use crate::properties::load_data;
+use crate::properties::train::plot::plot_losses;
+use crate::properties::train::sample_peptides;
+use crate::properties::util::write_bytes_to_file;
 
 use super::input;
 
 pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
 
     // Load training data
-    let train_peptides: Vec<PeptideData> = load_peptide_data(&config.train_data)?;
+    let (train_peptides, norm_factor) = load_peptide_data(&config.train_data, Some(config.nce), Some(config.instrument.clone()), true)?;
     log::info!("Loaded {} training peptides", train_peptides.len());
 
     // Load validation data if specified
-    let val_peptides = if let Some(ref val_path) = config.validation_data {
-        Some(load_peptide_data(val_path).context("Failed to load validation data")?)
+    let (val_peptides, _val_norm_factor) = if let Some(ref val_path) = config.validation_data {
+        let (peptides, norm) = load_peptide_data(val_path, Some(config.nce), Some(config.instrument.clone()), true)
+            .context("Failed to load validation data")?;
+        (Some(peptides), Some(norm))
     } else {
-        None
+        (None, None)
     };
+    
 
     if let Some(ref val_data) = val_peptides {
         log::info!("Loaded {} validation peptides", val_data.len());
@@ -35,29 +45,136 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
     // Dispatch model training based on architecture
     let model_arch = config.model_arch.as_str();
     let device = get_device(&config.device)?;
+    log::trace!("Loading model architecture: {} on device: {:?}", model_arch, device);
 
-    let mut model: Box<dyn ModelInterface + Send + Sync> = match model_arch {
-        "rt_cnn_lstm" => Box::new(RTCNNLSTMModel::new_untrained(device.clone())?),
-        "rt_cnn_tf" => Box::new(RTCNNTFModel::new_untrained(device.clone())?),
-        _ => return Err(anyhow::anyhow!("Unsupported model architecture: {}", model_arch)),
+    let mut model: Box<dyn ModelInterface + Send + Sync> = match &config.checkpoint_file {
+        Some(checkpoint_path) => {
+            log::info!("Loading model from checkpoint: {}", checkpoint_path);
+            match config.model_arch.as_str() {
+                "rt_cnn_lstm" => Box::new(RTCNNLSTMModel::new(checkpoint_path, None, 0, 8, 4, true, device.clone())?),
+                "rt_cnn_tf" => Box::new(RTCNNTFModel::new(checkpoint_path, None, 0, 8, 4, true, device.clone())?),
+                _ => return Err(anyhow::anyhow!("Unsupported model architecture: {}", config.model_arch)),
+            }
+        }
+        None => {
+            match config.model_arch.as_str() {
+                "rt_cnn_lstm" => Box::new(RTCNNLSTMModel::new_untrained(device.clone())?),
+                "rt_cnn_tf" => Box::new(RTCNNTFModel::new_untrained(device.clone())?),
+                _ => return Err(anyhow::anyhow!("Unsupported model architecture: {}", config.model_arch)),
+            }
+        }
     };
+    
+    
+    log::trace!("Model loaded successfully");
+    
 
+    log::trace!("Loading modifications map");
     let modifications = load_modifications().context("Failed to load modifications")?;
 
     let start_time = std::time::Instant::now();
-    model.train(
+    log::trace!("Training started");
+    let epoch_losses = model.train(
         &train_peptides,
         val_peptides.as_ref(),
-        modifications,
+        modifications.clone(),
         config.batch_size,
+        config.validation_batch_size.unwrap_or(config.batch_size),
         config.learning_rate as f64,
         config.epochs,
         config.early_stopping_patience,
     )?;
     log::info!("Training completed in {:?}", start_time.elapsed());
 
+    // Generate report
+    let mut report = Report::new(
+        "ReDeeM",
+        &config.version,
+        Some("https://github.com/singjc/redeem/blob/master/img/redeem_logo.png?raw=true"),
+        "ReDeeM Trainer Report",
+    );
+
+    /* Section 1: Overview */
+    {
+        let mut overview_section = ReportSection::new("Overview");
+
+        overview_section.add_content(html! {
+            "This report summarizes the training process of the ReDeeM model."
+        });
+
+        let losses_plot = plot_losses(&epoch_losses);
+        overview_section.add_plot(losses_plot);
+
+        // Lets perform inference on 1000 random samples from the validation set
+        let val_peptides: Vec<PeptideData> = sample_peptides(&val_peptides.as_ref().unwrap(), 1000);
+        let inference_results: Vec<PeptideData> = model.inference(
+            &val_peptides,
+            config.batch_size,
+            modifications,
+            norm_factor,
+        )?;
+        let (true_rt, pred_rt): (Vec<f64>, Vec<f64>) = val_peptides
+            .iter()
+            .zip(&inference_results)
+            .filter_map(|(true_pep, pred_pep)| {
+                match (true_pep.retention_time, pred_pep.retention_time) {
+                    (Some(t), Some(p)) => {
+                        let t_denorm = t as f64 * norm_factor.unwrap().1 as f64 + norm_factor.unwrap().0 as f64;  // de-normalized true RT
+                        Some((t_denorm, p as f64))  // assume predicted is already de-normalized
+                    },
+                    _ => None,
+                }
+            })
+            .unzip();
+
+
+        let scatter_plot = plot_scatter(
+            &vec![true_rt.clone()],
+            &vec![pred_rt.clone()],
+            vec!["RT Prediction".to_string()],
+            "Predicted vs True RT",
+            "Target RT",
+            "Predicted RT"
+        ).unwrap();
+        overview_section.add_plot(scatter_plot);
+        report.add_section(overview_section);    
+    }
+
+    /* Section 2: Configuration */
+    {
+        let mut config_section = ReportSection::new("Configuration");
+        config_section.add_content(html! {
+            style {
+                ".code-container {
+                    background-color: #f5f5f5;
+                    padding: 10px;
+                    border-radius: 5px;
+                    overflow-x: auto;
+                    font-family: monospace;
+                    white-space: pre-wrap;
+                }"
+            }
+            div class="code-container" {
+                pre {
+                    code { (PreEscaped(serde_json::to_string_pretty(&config)?)) }
+                }
+            }
+        });
+        report.add_section(config_section);
+    }
+
+    // Save the report to HTML file
+    let path = "redeem_trainer_report.html";
+    report.save_to_file(&path.to_string())?;
+
     model.save(&config.output_file)?;
     log::info!("Model saved to: {}", config.output_file);
 
+    let path = "redeem_trainer_config.json";
+    let json = serde_json::to_string_pretty(&config)?;
+    println!("{}", json);
+    let bytes = serde_json::to_vec_pretty(&config)?;
+    write_bytes_to_file(path, &bytes)?;
+
     Ok(())
 }
diff --git a/crates/redeem-cli/src/properties/util.rs b/crates/redeem-cli/src/properties/util.rs
new file mode 100644
index 0000000..27ef965
--- /dev/null
+++ b/crates/redeem-cli/src/properties/util.rs
@@ -0,0 +1,26 @@
+use anyhow::Result;
+use std::{fs::File, io::Write, path::{Path, PathBuf}};
+
+
+pub fn validate_tsv_or_csv_file(path: &str) -> Result<()> {
+    let pb = PathBuf::from(path);
+
+    let ext = pb.extension().and_then(|s| s.to_str()).map(|s| s.to_lowercase());
+    match ext.as_deref() {
+        Some("tsv") | Some("csv") => {}
+        _ => anyhow::bail!("File must have a .tsv or .csv extension: {}", path),
+    }
+
+    if !pb.exists() {
+        anyhow::bail!("File does not exist: {}", path);
+    }
+
+    Ok(())
+}
+
+pub fn write_bytes_to_file(path: &str, bytes: &[u8]) -> std::io::Result<()> {
+    let path = Path::new(path);
+    let mut file = File::create(path)?;
+    file.write_all(bytes)?;
+    Ok(())
+}
\ No newline at end of file

From 0b68ac33ae40be098b897baa799d145b85cf88f1 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Sat, 10 May 2025 21:45:15 -0400
Subject: [PATCH 25/75] refactor: Update Dockerfile to optimize build process
 and clean up artifacts

---
 Dockerfile | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 4679edb..c723776 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -34,7 +34,8 @@ ENV CUDA_COMPUTE_CAP=70
 WORKDIR /app
 
 # Copy the source code into the container
-COPY . .
+COPY Cargo.toml Cargo.lock ./
+COPY crates ./crates
 
 # Build the application with CUDA support
 RUN cargo build --release --bin redeem --features cuda 
@@ -42,5 +43,8 @@ RUN cargo build --release --bin redeem --features cuda
 # Copy the binary into the PATH
 RUN cp target/release/redeem /app/redeem
 
+# clean up build artifacts
+RUN cargo clean
+
 # Set the PATH environment variable
 ENV PATH="/app:${PATH}"
\ No newline at end of file

From e4bfaf9193f89f7b42920c1f31e09ddf6d45b757 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Sat, 10 May 2025 22:02:28 -0400
Subject: [PATCH 26/75] add: Encoder26aaModChargeCnnTransformerAttnSum
 implementation

---
 .../src/building_blocks/building_blocks.rs    | 126 ++++++++++++++++--
 1 file changed, 118 insertions(+), 8 deletions(-)

diff --git a/crates/redeem-properties/src/building_blocks/building_blocks.rs b/crates/redeem-properties/src/building_blocks/building_blocks.rs
index edd7250..c1e164d 100644
--- a/crates/redeem-properties/src/building_blocks/building_blocks.rs
+++ b/crates/redeem-properties/src/building_blocks/building_blocks.rs
@@ -998,19 +998,13 @@ impl Encoder26aaModChargeCnnLstmAttnSum {
 
     pub fn forward(&self, aa_indices: &Tensor, mod_x: &Tensor, charges: &Tensor) -> Result<Tensor> {
 
-        let start_time = Instant::now();
         let mod_x = self.mod_nn.forward(mod_x)?;
-        log::trace!("Encoder26aaModChargeCnnLstmAttnSum::forward - mod_x forward time: {:.3?}", start_time.elapsed());
-        let start_time = Instant::now();
+
         let charges_repeated = charges.unsqueeze(1)?.repeat(&[1, mod_x.dim(1)?, 1])?;
-        log::trace!("Encoder26aaModChargeCnnLstmAttnSum::forward - charges_repeated forward time: {:.3?}", start_time.elapsed());
-        let start_time = Instant::now();
         let additional_tensors: Vec<&Tensor> = vec![&mod_x, &charges_repeated];
-        log::trace!("Encoder26aaModChargeCnnLstmAttnSum::forward - additional_tensors forward time: {:.3?}", start_time.elapsed());
-        let start_time = Instant::now();
+        
         let x = aa_one_hot(&aa_indices, &additional_tensors)
             .map_err(|e| candle_core::Error::Msg(e.to_string()))?;
-        log::trace!("Encoder26aaModChargeCnnLstmAttnSum::forward - aa_one_hot forward time: {:.3?}", start_time.elapsed());
 
         let start_time = Instant::now();
         let x = self.input_cnn.forward(&x)?;
@@ -1152,6 +1146,122 @@ impl Encoder26aaModCnnTransformerAttnSum {
 }
 
 
+/// Encode AAs (26 AA letters), modifications and Charge state using CNN + Transformer + AttentionSum.
+#[derive(Debug, Clone)]
+pub struct Encoder26aaModChargeCnnTransformerAttnSum {
+    mod_nn: ModEmbeddingFixFirstK,
+    input_cnn: SeqCNN,
+    input_transformer: SeqTransformer,
+    attn_sum: SeqAttentionSum,
+}
+
+impl Encoder26aaModChargeCnnTransformerAttnSum {
+    pub fn from_varstore(
+        varstore: &nn::VarBuilder,
+        mod_hidden_dim: usize,
+        hidden_dim: usize,
+        ff_dim: usize,
+        num_heads: usize,
+        num_layers: usize,
+        max_len: usize,
+        dropout_prob: f32,
+        names_mod_nn: Vec<&str>,
+        names_input_cnn_weight: Vec<&str>,
+        names_input_cnn_bias: Vec<&str>,
+        transformer_pp: &str,
+        names_attn_sum: Vec<&str>,
+        device: &Device,
+    ) -> Result<Self> {
+        let input_dim = AA_EMBEDDING_SIZE + mod_hidden_dim;
+        Ok(Self {
+            mod_nn: ModEmbeddingFixFirstK::from_varstore(
+                &varstore,
+                MOD_FEATURE_SIZE,
+                mod_hidden_dim,
+                names_mod_nn[0],
+            )?,
+            input_cnn: SeqCNN::from_varstore(
+                varstore.clone(),
+                input_dim,
+                names_input_cnn_weight,
+                names_input_cnn_bias,
+            )?,
+            input_transformer: SeqTransformer::from_varstore(
+                varstore.pp(transformer_pp).clone(),
+                input_dim * 4,
+                hidden_dim,
+                ff_dim,
+                num_heads,
+                num_layers,
+                max_len,
+                dropout_prob,
+                device,
+            )?,
+            attn_sum: SeqAttentionSum::from_varstore(
+                varstore.clone(),
+                hidden_dim,
+                names_attn_sum[0],
+            )?,
+        })
+    }
+
+    /// Construct a CNN+Transformer+Attention encoder from scratch (no pretrained weights).
+    pub fn new(
+        varbuilder: &nn::VarBuilder,
+        mod_hidden_dim: usize,
+        hidden_dim: usize,
+        ff_dim: usize,
+        num_heads: usize,
+        num_layers: usize,
+        max_len: usize,
+        dropout_prob: f32,
+        device: &Device,
+    ) -> Result<Self> {
+        let input_dim = AA_EMBEDDING_SIZE + mod_hidden_dim;
+        Ok(Self {
+            mod_nn: ModEmbeddingFixFirstK::new(MOD_FEATURE_SIZE, mod_hidden_dim, &varbuilder.pp("mod_nn"))?,
+            input_cnn: SeqCNN::new(input_dim, &varbuilder.pp("input_cnn"))?,
+            input_transformer: SeqTransformer::new(
+                &varbuilder.pp("input_transformer"),
+                input_dim * 4,
+                hidden_dim,
+                ff_dim,
+                num_heads,
+                num_layers,
+                max_len,
+                dropout_prob,
+                device,
+            )?,
+            attn_sum: SeqAttentionSum::new(hidden_dim, &varbuilder.pp("attn_sum"))?,
+        })
+    }
+
+    pub fn forward(&self, aa_indices: &Tensor, mod_x: &Tensor, charges: &Tensor) -> Result<Tensor> {
+        let mod_x = self.mod_nn.forward(mod_x)?;
+        let charges_repeated = charges.unsqueeze(1)?.repeat(&[1, mod_x.dim(1)?, 1])?;
+
+        let additional_tensors: Vec<&Tensor> = vec![&mod_x, &charges_repeated];
+        let x = aa_one_hot(aa_indices, &additional_tensors)
+            .map_err(|e| candle_core::Error::Msg(e.to_string()))?;
+        let (mean, min, max) = get_tensor_stats(&x)?;
+        log::debug!("[Encoder26aaModChargeCnnTransformerAttnSum] one-hot output stats - min: {min}, max: {max}, mean: {mean}");
+
+        let x = self.input_cnn.forward(&x)?;
+        let (mean, min, max) = get_tensor_stats(&x)?;
+        log::debug!("[Encoder26aaModChargeCnnTransformerAttnSum] input_cnn output stats - min: {min}, max: {max}, mean: {mean}");
+
+        let x = self.input_transformer.forward(&x)?;
+        let (mean, min, max) = get_tensor_stats(&x)?;
+        log::debug!("[Encoder26aaModChargeCnnTransformerAttnSum] input_transformer output stats - min: {min}, max: {max}, mean: {mean}");
+
+        let x = self.attn_sum.forward(&x)?;
+
+        let (mean, min, max) = get_tensor_stats(&x)?;
+        log::debug!("[Encoder26aaModChargeCnnTransformerAttnSum] attn_sum output stats - min: {min}, max: {max}, mean: {mean}");
+
+        Ok(x)
+    }
+}
 
 #[cfg(test)]
 mod tests {

From 146dedd4b1b70e27d82bd54d3c9d5a021a55e45a Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Sat, 10 May 2025 22:15:49 -0400
Subject: [PATCH 27/75] refactor: Add CCSCNNTFModel implementation

---
 .../src/models/ccs_cnn_tf_model.rs            | 245 ++++++++++++++++++
 .../redeem-properties/src/models/ccs_model.rs |  11 +-
 crates/redeem-properties/src/models/mod.rs    |   7 +-
 3 files changed, 257 insertions(+), 6 deletions(-)
 create mode 100644 crates/redeem-properties/src/models/ccs_cnn_tf_model.rs

diff --git a/crates/redeem-properties/src/models/ccs_cnn_tf_model.rs b/crates/redeem-properties/src/models/ccs_cnn_tf_model.rs
new file mode 100644
index 0000000..e821546
--- /dev/null
+++ b/crates/redeem-properties/src/models/ccs_cnn_tf_model.rs
@@ -0,0 +1,245 @@
+use anyhow::Result;
+use candle_core::{DType, Device, IndexOp, Tensor};
+use candle_nn::{Dropout, Module, VarBuilder, VarMap};
+use std::collections::HashMap;
+use std::path::Path;
+
+use crate::building_blocks::building_blocks::{
+    DecoderLinear, Encoder26aaModChargeCnnTransformerAttnSum, MOD_FEATURE_SIZE,
+};
+use crate::models::model_interface::{ModelInterface, PropertyType, load_tensors_from_model, create_var_map};
+use crate::utils::peptdeep_utils::{
+    load_mod_to_feature,
+    parse_model_constants, ModelConstants,
+};
+use crate::utils::utils::get_tensor_stats;
+
+// Constants
+const CHARGE_FACTOR: f64 = 0.1;
+const NCE_FACTOR: f64 = 0.01;
+
+// Main Model Struct
+
+#[derive(Clone)]
+/// Represents an CNN-TF Collision Cross Section model.
+pub struct CCSCNNTFModel {
+    var_store: VarBuilder<'static>,
+    varmap: VarMap,
+    constants: ModelConstants,
+    device: Device,
+    mod_to_feature: HashMap<String, Vec<f32>>,
+    dropout: Dropout,
+    ccs_encoder: Encoder26aaModChargeCnnTransformerAttnSum,
+    ccs_decoder: DecoderLinear,
+    is_training: bool,
+}
+
+// Automatically implement Send and Sync if all fields are Send and Sync
+unsafe impl Send for CCSCNNTFModel {}
+unsafe impl Sync for CCSCNNTFModel {}
+
+// Core Model Implementation
+
+impl ModelInterface for CCSCNNTFModel {
+    fn property_type(&self) -> PropertyType {
+        PropertyType::CCS
+    }
+
+    fn model_arch(&self) -> &'static str {
+        "ccs_cnn_tf"   
+    }
+
+    fn new_untrained(device: Device) -> Result<Self> {
+        let mut varmap = VarMap::new();
+        let varbuilder = VarBuilder::from_varmap(&varmap, DType::F32, &device);
+
+        log::trace!("[CCSCNNTFModel] Initializing ccs_encoder");
+        let ccs_encoder = Encoder26aaModChargeCnnTransformerAttnSum::new(
+            &varbuilder.pp("ccs_encoder"),
+            8,     // mod_hidden_dim
+            140,   // hidden_dim
+            256,   // ff_dim
+            4,     // num_heads
+            2,     // num_layers
+            100,   // max_len
+            0.1,   // dropout_prob
+            &device
+        )?;
+
+        log::trace!("[CCSCNNTFModel] Initializing ccs_decoder");
+        let ccs_decoder = DecoderLinear::new(141, 1, &varbuilder.pp("ccs_decoder"))?;
+        let constants = ModelConstants::default();
+        let mod_to_feature = load_mod_to_feature(&constants)?;
+
+        Ok(Self {
+            var_store: varbuilder,
+            varmap,
+            constants,
+            device,
+            mod_to_feature,
+            dropout: Dropout::new(0.1),
+            ccs_encoder,
+            ccs_decoder,
+            is_training: true,
+        })
+    }
+
+    /// Create a new CCSCNNTFModel from the given model and constants files.
+    fn new<P: AsRef<Path>>(
+        model_path: P,
+        constants_path: Option<P>,
+        _fixed_sequence_len: usize,
+        _num_frag_types: usize,
+        _num_modloss_types: usize,
+        _mask_modloss: bool,
+        device: Device,
+    ) -> Result<Self> {
+        let tensor_data = load_tensors_from_model(model_path.as_ref(), &device)?;
+        let mut varmap = candle_nn::VarMap::new();
+        create_var_map(&mut varmap, tensor_data, &device)?;
+        let var_store = candle_nn::VarBuilder::from_varmap(&varmap, DType::F32, &device);
+
+        let constants = match constants_path {
+            Some(path) => parse_model_constants(path.as_ref().to_str().unwrap())?,
+            None => ModelConstants::default(),
+        };
+
+        let mod_to_feature = load_mod_to_feature(&constants)?;
+        let dropout = Dropout::new(0.1);
+
+        let ccs_encoder = Encoder26aaModChargeCnnTransformerAttnSum::from_varstore(
+            &var_store,
+            8,      // mod_hidden_dim
+            140,    // hidden_dim
+            256,    // ff_dim
+            4,      // num_heads
+            2,      // num_layers
+            100,    // max_len (set appropriately for your sequence length)
+            0.1,    // dropout_prob
+            vec!["ccs_encoder.mod_nn.nn.weight"],
+            vec![
+                "ccs_encoder.input_cnn.cnn_short.weight",
+                "ccs_encoder.input_cnn.cnn_medium.weight",
+                "ccs_encoder.input_cnn.cnn_long.weight",
+            ],
+            vec![
+                "ccs_encoder.input_cnn.cnn_short.bias",
+                "ccs_encoder.input_cnn.cnn_medium.bias",
+                "ccs_encoder.input_cnn.cnn_long.bias",
+            ],
+            "ccs_encoder.input_transformer",
+            vec!["ccs_encoder.attn_sum.attn.0.weight"],
+            &device,
+        )?;
+        
+
+        let ccs_decoder = DecoderLinear::from_varstore(
+            &var_store,
+            141,
+            1,
+            vec!["ccs_decoder.nn.0.weight", "ccs_decoder.nn.1.weight", "ccs_decoder.nn.2.weight"],
+            vec!["ccs_decoder.nn.0.bias", "ccs_decoder.nn.2.bias"]
+        )?;
+
+        Ok(Self {
+            var_store,
+            varmap,
+            constants,
+            device,
+            mod_to_feature,
+            dropout,
+            ccs_encoder,
+            ccs_decoder,
+            is_training: false,
+        })
+    }
+
+    fn forward(&self, xs: &Tensor) -> Result<Tensor, candle_core::Error> {
+        let (_batch_size, _seq_len, _) = xs.shape().dims3()?;
+
+        // Separate input into aa_indices, mod_x, charge
+        let start_mod_x = 1;
+        let start_charge = start_mod_x + MOD_FEATURE_SIZE;
+
+        let aa_indices_out = xs.i((.., .., 0))?;
+        let mod_x_out = xs.i((.., .., start_mod_x..start_mod_x + MOD_FEATURE_SIZE))?;
+        let charge_out = xs.i((.., 0..1, start_charge..start_charge + 1))?;
+        let charge_out = charge_out.squeeze(2)?;         
+        
+        let x = self.ccs_encoder.forward(&aa_indices_out, &mod_x_out, &charge_out)?;
+        let (mean, min, max) = get_tensor_stats(&x)?;
+        log::debug!("[CCSCNNTFModel] ccs_encoder output stats - min: {min}, max: {max}, mean: {mean}");
+
+        let x = self.dropout.forward(&x, self.is_training)?;
+        log::trace!("[CCSCNNTFModel] x.shape after dropout: {:?}", x.shape());
+        let (mean, min, max) = get_tensor_stats(&x)?;
+        log::debug!("[CCSCNNTFModel] dropout output stats - min: {min}, max: {max}, mean: {mean}");
+
+        let x = self.ccs_decoder.forward(&x)?;
+        log::trace!("[CCSCNNTFModel] x.shape after ccs_decoder: {:?}", x.shape());
+        let (mean, min, max) = get_tensor_stats(&x)?;
+        log::debug!("[CCSCNNTFModel] ccs_decoder output stats - min: {min}, max: {max}, mean: {mean}");
+        Ok(x.squeeze(1)?)
+    }
+
+    /// Set model to evaluation mode for inference
+    /// This disables dropout and other training-specific layers.
+    fn set_evaluation_mode(&mut self) {
+        // println!("Setting evaluation mode");
+        self.is_training = false;
+    }
+
+    /// Set model to training mode for training
+    /// This enables dropout and other training-specific layers.
+    fn set_training_mode(&mut self) {
+        self.is_training = true;
+    }
+
+    fn get_property_type(&self) -> String {
+        self.property_type().clone().as_str().to_string()
+    }
+
+    fn get_model_arch(&self) -> String {
+        self.model_arch().to_string()
+    }
+
+    fn get_device(&self) -> &Device {
+        &self.device
+    }
+
+    fn get_mod_element_count(&self) -> usize {
+        self.constants.mod_elements.len()
+    }
+
+    fn get_mod_to_feature(&self) -> &HashMap<String, Vec<f32>> {
+        &self.mod_to_feature
+    }
+
+    fn get_min_pred_intensity(&self) -> f32 {
+        unimplemented!("Method not implemented for architecture: {}", self.model_arch())
+    }
+
+    fn get_mut_varmap(&mut self) -> &mut VarMap {
+        &mut self.varmap
+    }
+
+    /// Print a summary of the model's constants.
+    fn print_summary(&self) {
+        println!("CCSModel Summary:");
+        println!("AA Embedding Size: {}", self.constants.aa_embedding_size.unwrap());
+        println!("Charge Factor: {:?}", self.constants.charge_factor);
+        println!("Instruments: {:?}", self.constants.instruments);
+        println!("Max Instrument Num: {}", self.constants.max_instrument_num);
+        println!("Mod Elements: {:?}", self.constants.mod_elements);
+        println!("NCE Factor: {:?}", self.constants.nce_factor);
+    }
+
+    /// Print the model's weights.
+    fn print_weights(&self) {
+        todo!("Implement print_weights for CCSCNNTFModel");
+    }
+
+
+}
+
+
diff --git a/crates/redeem-properties/src/models/ccs_model.rs b/crates/redeem-properties/src/models/ccs_model.rs
index c6b719a..7befcf3 100644
--- a/crates/redeem-properties/src/models/ccs_model.rs
+++ b/crates/redeem-properties/src/models/ccs_model.rs
@@ -3,6 +3,7 @@ use candle_core::Device;
 use anyhow::{Result, anyhow};
 use crate::models::model_interface::{ModelInterface,PredictionResult};
 use crate::models::ccs_cnn_lstm_model::CCSCNNLSTMModel;
+use crate::models::ccs_cnn_tf_model::CCSCNNTFModel;
 use crate::utils::data_handling::PeptideData;
 use std::collections::HashMap;
 use crate::utils::peptdeep_utils::ModificationMap;
@@ -10,11 +11,11 @@ use crate::utils::peptdeep_utils::ModificationMap;
 // Enum for different types of CCS models
 pub enum CCSModelArch {
     CCSCNNLSTM,
-    // Add other architectures here as needed
+    CCSCNNTF,
 }
 
 // Constants for different types of CCS models
-pub const CCSMODEL_ARCHS: &[&str] = &["ccs_cnn_lstm"];
+pub const CCSMODEL_ARCHS: &[&str] = &["ccs_cnn_lstm", "ccs_cnn_tf"];
 
 // A wrapper struct for CCS models
 pub struct CCSModelWrapper {
@@ -33,7 +34,7 @@ impl CCSModelWrapper {
     pub fn new<P: AsRef<Path>>(model_path: P, constants_path: P, arch: &str, device: Device) -> Result<Self> {
         let model: Box<dyn ModelInterface> = match arch {
             "ccs_cnn_lstm" => Box::new(CCSCNNLSTMModel::new(model_path, Some(constants_path), 0, 8, 4, true, device)?),
-            // Add other cases here as you implement more models
+            "ccs_cnn_tf" => Box::new(CCSCNNTFModel::new(model_path, Some(constants_path), 0, 8, 4, true, device)?),
             _ => return Err(anyhow!("Unsupported CCS model architecture: {}", arch)),
         };
 
@@ -44,6 +45,10 @@ impl CCSModelWrapper {
         self.model.predict(peptide_sequence, mods, mod_sites, Some(charge), None, None)
     }
 
+    pub fn train(&mut self, training_data: &Vec<PeptideData>, val_data: Option<&Vec<PeptideData>>, modifications: HashMap<(String, Option<char>), ModificationMap>, batch_size: usize, val_batch_size: usize, learning_rate: f64, epochs: usize, early_stopping_patience: usize) -> Result<Vec<(usize, f32, Option<f32>, f32, Option<f32>)>> {
+        self.model.train(training_data, val_data, modifications, batch_size, val_batch_size, learning_rate, epochs, early_stopping_patience)
+    }
+
     pub fn fine_tune(&mut self, training_data: &Vec<PeptideData>, modifications: HashMap<(String, Option<char>), ModificationMap>, batch_size: usize, learning_rate: f64, epochs: usize) -> Result<()> {
         self.model.fine_tune(training_data, modifications, batch_size, learning_rate, epochs)
     }
diff --git a/crates/redeem-properties/src/models/mod.rs b/crates/redeem-properties/src/models/mod.rs
index 502cb15..b7bd4d6 100644
--- a/crates/redeem-properties/src/models/mod.rs
+++ b/crates/redeem-properties/src/models/mod.rs
@@ -1,8 +1,9 @@
 pub mod rt_model;
 pub mod rt_cnn_lstm_model;
-pub mod ms2_bert_model;
-pub mod ccs_cnn_lstm_model;
+pub mod rt_cnn_transformer_model;
 pub mod ccs_model;
+pub mod ccs_cnn_lstm_model;
+pub mod ccs_cnn_tf_model;
+pub mod ms2_bert_model;
 pub mod ms2_model;
 pub mod model_interface;
-pub mod rt_cnn_transformer_model;

From ddc39f15435a882f78961fbf4517e13329e09320 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Sat, 10 May 2025 22:15:57 -0400
Subject: [PATCH 28/75] refactor: Update RTCNNTFModel implementation and remove
 unused code

---
 .../src/models/rt_cnn_transformer_model.rs    | 110 ------------------
 1 file changed, 110 deletions(-)

diff --git a/crates/redeem-properties/src/models/rt_cnn_transformer_model.rs b/crates/redeem-properties/src/models/rt_cnn_transformer_model.rs
index 6008137..09f1b07 100644
--- a/crates/redeem-properties/src/models/rt_cnn_transformer_model.rs
+++ b/crates/redeem-properties/src/models/rt_cnn_transformer_model.rs
@@ -4,12 +4,9 @@ use candle_nn::{Dropout, Module, VarBuilder, VarMap};
 use std::collections::HashMap;
 use std::path::Path;
 
-
-
 use crate::building_blocks::building_blocks::{
     DecoderLinear, Encoder26aaModCnnTransformerAttnSum, MOD_FEATURE_SIZE,
 };
-use crate::building_blocks::nn;
 use crate::models::model_interface::{ModelInterface, PropertyType, load_tensors_from_model, create_var_map};
 use crate::utils::peptdeep_utils::{
     load_mod_to_feature,
@@ -249,111 +246,4 @@ impl ModelInterface for RTCNNTFModel {
 
 }
 
-// Module Trait Implementation
-
-// impl Module for RTCNNLSTMModel {
-//     fn forward(&self, input: &Tensor) -> Result<Tensor, candle_core::Error> {
-//         ModelInterface::forward(self, input)
-//     }
-// }
-
-
-#[cfg(test)]
-mod tests {
-    use crate::models::model_interface::ModelInterface;
-    use crate::models::rt_cnn_lstm_model::RTCNNLSTMModel;
-    use candle_core::Device;
-    use std::path::PathBuf;
-
-    use super::*;
-
-    #[test]
-    fn test_parse_model_constants() {
-        let path = "data/models/alphapeptdeep/generic/rt.pth.model_const.yaml";
-        let result = parse_model_constants(path);
-        assert!(result.is_ok());
-        let constants = result.unwrap();
-        assert_eq!(constants.aa_embedding_size.unwrap(), 27);
-        assert_eq!(constants.charge_factor, Some(0.1));
-        assert_eq!(constants.instruments.len(), 4);
-        assert_eq!(constants.max_instrument_num, 8);
-        assert_eq!(constants.mod_elements.len(), 109);
-        assert_eq!(constants.nce_factor, Some(0.01));
-    }
-
-    #[test]
-    fn test_encode_peptides() {
-        let model_path = PathBuf::from("data/models/alphapeptdeep/generic/rt.pth");
-        let constants_path =
-            PathBuf::from("data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
-        let device = Device::Cpu;
-        let model = RTCNNLSTMModel::new(&model_path, Some(&constants_path), 0, 8, 4, true, device).unwrap(); 
-
-        let peptide_sequences = "AGHCEWQMKYR";
-        let mods = "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M";
-        let mod_sites = "0;4;8";
-        // let charge = Some(2);
-        // let nce = Some(20);
-        // let instrument = Some("QE");
-
-        let result =
-            model.encode_peptide(&peptide_sequences, mods, mod_sites, None, None, None);
-
-        println!("{:?}", result);
-
-        // assert!(result.is_ok());
-        // let encoded_peptides = result.unwrap();
-        // assert_eq!(encoded_peptides.shape().dims2().unwrap(), (1, 27 + 109 + 1 + 1 + 1));
-    }
 
-    #[test]
-    fn test_encode_peptides_batch() {
-
-        let model_path = PathBuf::from("data/models/alphapeptdeep/generic/rt.pth");
-        let constants_path = PathBuf::from("data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
-        let device = Device::Cpu;
-
-        let model = RTCNNLSTMModel::new(&model_path, Some(&constants_path), 0, 8, 4, true, device.clone()).unwrap();
-
-        // Batched input
-        let peptide_sequences = vec![
-            "ACDEFGHIK".to_string(),
-            "AGHCEWQMKYR".to_string(),
-        ];
-        let mods = vec![
-            "Carbamidomethyl@C".to_string(),
-            "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M".to_string(),
-        ];
-        let mod_sites = vec![
-            "1".to_string(),
-            "0;4;8".to_string(),
-        ];
-
-        println!("Peptides: {:?}", peptide_sequences);
-        println!("Mods: {:?}", mods);
-        println!("Mod sites: {:?}", mod_sites);
-
-
-        let result = model.encode_peptides(
-            &peptide_sequences,
-            &mods,
-            &mod_sites,
-            None,
-            None,
-            None,
-        );
-
-        assert!(result.is_ok());
-        let tensor = result.unwrap();
-        println!("Batched encoded tensor shape: {:?}", tensor.shape());
-
-        let (batch, seq_len, feat_dim) = tensor.shape().dims3().unwrap();
-        assert_eq!(batch, 2); // two peptides
-        assert!(seq_len >= 11); // padded to max length
-        assert!(feat_dim > 1); // includes aa + mod features
-    }
-
-
-    
-    
-}

From b0e0f22887858303faaa74a819a335edea34865f Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Sat, 10 May 2025 22:21:23 -0400
Subject: [PATCH 29/75] update: readme

---
 README.md | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 8afa499..d27e5f2 100644
--- a/README.md
+++ b/README.md
@@ -30,15 +30,17 @@ The ReDeeM project consists of two primary crates:
 
 1. **redeem-properties**: 
    - This crate focuses on deep learning models for peptide property prediction. It implements models for predicting retention time (RT), ion mobility (IM), and MS2 fragment intensities using the Candle library.
-   - The models can be fine-tuned on new data and can be saved in the safetensor format for later use.
+   - The models can be trained, fine-tuned on new data and can be saved in the safetensor format for later use.
    
    - Current Models
   
     Model | Name | Architecture | Implemented
     --- | --- | --- | ---
-    AlphaPept RT Model | `redeem_properties::RTCNNLSTMModel` | CNN-LSTM | :heavy_check_mark:
-    AlphaPept MS2 Model | `redeem_properties::MS2BertModel` | Bert | :heavy_check_mark:
-    AlphaPept IM Model | `redeem_properties::CCSCNNLSTMModel` | CNN-LSTM | :heavy_check_mark:
+    AlphaPept RT Model | `rt_cnn_lstm` | CNN-LSTM | :heavy_check_mark:
+    AlphaPept MS2 Model | `ms2_bert` | Bert | :heavy_check_mark:
+    AlphaPept CCS Model | `ccs_cnn_lstm` | CNN-LSTM | :heavy_check_mark:
+    RT Model | `rt_tf_lstm` | CNN-Transformer | :heavy_check_mark:
+    CCS Model | `ccs_tf_lstm` | CNN-Transformer | :heavy_check_mark:
 
 2. **redeem-classifiers**:
    - This crate is aimed at developing semi-supervised scoring classifier models. The goal is to create models for separating target peptides from decoys.

From 87797f37122714a935e770b02fada093f9542488 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Sun, 11 May 2025 00:14:37 -0400
Subject: [PATCH 30/75] refactor: Improve regex pattern for extracting
 modification indices in peptdeep_utils.rs

---
 .../redeem-properties/src/utils/peptdeep_utils.rs  | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/crates/redeem-properties/src/utils/peptdeep_utils.rs b/crates/redeem-properties/src/utils/peptdeep_utils.rs
index f477527..3a6702e 100644
--- a/crates/redeem-properties/src/utils/peptdeep_utils.rs
+++ b/crates/redeem-properties/src/utils/peptdeep_utils.rs
@@ -328,8 +328,20 @@ pub fn extract_masses_and_indices(peptide: &str) -> Vec<(f64, usize)> {
 }
 
 
+/// Extracts modification indices from a peptide string.
+/// The indices are 0-based and represent the positions of the modifications.
+/// 
+/// # Example
+/// ```
+/// use redeem_properties::utils::peptdeep_utils::get_modification_indices;
+/// let result = get_modification_indices("AC[+57.0215]DE");
+/// assert_eq!(result, "1");
+/// 
+/// let result = get_modification_indices("AC(UniMod:4)DE");
+/// assert_eq!(result, "1");
+/// ```
 pub fn get_modification_indices(peptide: &str) -> String {
-    let re = Regex::new(r"\[.*?\]").unwrap();
+    let re = Regex::new(r"(\[.*?\]|\(UniMod:\d+\)|\([a-zA-Z]+\))").unwrap();
     let mut indices = Vec::new();
     let mut offset = 1; // Offset by 1 for 0-based index
 

From 410074796dce184b32fa526c1e621ce5260fe3b3 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Sun, 11 May 2025 00:14:51 -0400
Subject: [PATCH 31/75] refactor: Update redeem-properties crate models for CCS
 prediction

---
 .../src/properties/train/trainer.rs           | 126 +++++++++++++-----
 .../examples/alphapeptdeep_ccs_cnn_lstm.rs    |   2 +-
 .../src/models/model_interface.rs             |  22 +--
 3 files changed, 102 insertions(+), 48 deletions(-)

diff --git a/crates/redeem-cli/src/properties/train/trainer.rs b/crates/redeem-cli/src/properties/train/trainer.rs
index f16a4bf..45ed0fe 100644
--- a/crates/redeem-cli/src/properties/train/trainer.rs
+++ b/crates/redeem-cli/src/properties/train/trainer.rs
@@ -1,40 +1,51 @@
 use anyhow::{Context, Result};
+use maud::{PreEscaped, html};
 use redeem_properties::models::model_interface::ModelInterface;
 use redeem_properties::models::rt_model::load_retention_time_model;
-use redeem_properties::models::{rt_cnn_lstm_model::RTCNNLSTMModel, rt_cnn_transformer_model::RTCNNTFModel};
+use redeem_properties::models::{
+    ccs_cnn_lstm_model::CCSCNNLSTMModel, ccs_cnn_tf_model::CCSCNNTFModel,
+    rt_cnn_lstm_model::RTCNNLSTMModel, rt_cnn_transformer_model::RTCNNTFModel,
+};
 use redeem_properties::utils::data_handling::PeptideData;
 use redeem_properties::utils::peptdeep_utils::load_modifications;
 use redeem_properties::utils::utils::get_device;
 use report_builder::{
-    plots::{plot_boxplot, plot_pp, plot_scatter, plot_score_histogram},
     Report, ReportSection,
+    plots::plot_scatter,
 };
-use maud::{html, PreEscaped};
 
-use input::PropertyTrainConfig;
-use load_data::load_peptide_data;
 use crate::properties::load_data;
 use crate::properties::train::plot::plot_losses;
 use crate::properties::train::sample_peptides;
 use crate::properties::util::write_bytes_to_file;
+use input::PropertyTrainConfig;
+use load_data::load_peptide_data;
 
 use super::input;
 
 pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
-
     // Load training data
-    let (train_peptides, norm_factor) = load_peptide_data(&config.train_data, Some(config.nce), Some(config.instrument.clone()), true)?;
+    let (train_peptides, norm_factor) = load_peptide_data(
+        &config.train_data,
+        Some(config.nce),
+        Some(config.instrument.clone()),
+        true,
+    )?;
     log::info!("Loaded {} training peptides", train_peptides.len());
 
     // Load validation data if specified
     let (val_peptides, _val_norm_factor) = if let Some(ref val_path) = config.validation_data {
-        let (peptides, norm) = load_peptide_data(val_path, Some(config.nce), Some(config.instrument.clone()), true)
-            .context("Failed to load validation data")?;
+        let (peptides, norm) = load_peptide_data(
+            val_path,
+            Some(config.nce),
+            Some(config.instrument.clone()),
+            true,
+        )
+        .context("Failed to load validation data")?;
         (Some(peptides), Some(norm))
     } else {
         (None, None)
     };
-    
 
     if let Some(ref val_data) = val_peptides {
         log::info!("Loaded {} validation peptides", val_data.len());
@@ -45,29 +56,75 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
     // Dispatch model training based on architecture
     let model_arch = config.model_arch.as_str();
     let device = get_device(&config.device)?;
-    log::trace!("Loading model architecture: {} on device: {:?}", model_arch, device);
+    log::trace!(
+        "Loading model architecture: {} on device: {:?}",
+        model_arch,
+        device
+    );
 
     let mut model: Box<dyn ModelInterface + Send + Sync> = match &config.checkpoint_file {
         Some(checkpoint_path) => {
             log::info!("Loading model from checkpoint: {}", checkpoint_path);
             match config.model_arch.as_str() {
-                "rt_cnn_lstm" => Box::new(RTCNNLSTMModel::new(checkpoint_path, None, 0, 8, 4, true, device.clone())?),
-                "rt_cnn_tf" => Box::new(RTCNNTFModel::new(checkpoint_path, None, 0, 8, 4, true, device.clone())?),
-                _ => return Err(anyhow::anyhow!("Unsupported model architecture: {}", config.model_arch)),
+                "rt_cnn_lstm" => Box::new(RTCNNLSTMModel::new(
+                    checkpoint_path,
+                    None,
+                    0,
+                    8,
+                    4,
+                    true,
+                    device.clone(),
+                )?),
+                "rt_cnn_tf" => Box::new(RTCNNTFModel::new(
+                    checkpoint_path,
+                    None,
+                    0,
+                    8,
+                    4,
+                    true,
+                    device.clone(),
+                )?),
+                "ccs_cnn_lstm" => Box::new(CCSCNNLSTMModel::new(
+                    checkpoint_path,
+                    None,
+                    0,
+                    8,
+                    4,
+                    true,
+                    device.clone(),
+                )?),
+                "ccs_cnn_tf" => Box::new(CCSCNNTFModel::new(
+                    checkpoint_path,
+                    None,
+                    0,
+                    8,
+                    4,
+                    true,
+                    device.clone(),
+                )?),
+                _ => {
+                    return Err(anyhow::anyhow!(
+                        "Unsupported model architecture: {}",
+                        config.model_arch
+                    ));
+                }
             }
         }
-        None => {
-            match config.model_arch.as_str() {
-                "rt_cnn_lstm" => Box::new(RTCNNLSTMModel::new_untrained(device.clone())?),
-                "rt_cnn_tf" => Box::new(RTCNNTFModel::new_untrained(device.clone())?),
-                _ => return Err(anyhow::anyhow!("Unsupported model architecture: {}", config.model_arch)),
+        None => match config.model_arch.as_str() {
+            "rt_cnn_lstm" => Box::new(RTCNNLSTMModel::new_untrained(device.clone())?),
+            "rt_cnn_tf" => Box::new(RTCNNTFModel::new_untrained(device.clone())?),
+            "ccs_cnn_lstm" => Box::new(CCSCNNLSTMModel::new_untrained(device.clone())?),
+            "ccs_cnn_tf" => Box::new(CCSCNNTFModel::new_untrained(device.clone())?),
+            _ => {
+                return Err(anyhow::anyhow!(
+                    "Unsupported model architecture: {}",
+                    config.model_arch
+                ));
             }
-        }
+        },
     };
-    
-    
+
     log::trace!("Model loaded successfully");
-    
 
     log::trace!("Loading modifications map");
     let modifications = load_modifications().context("Failed to load modifications")?;
@@ -107,37 +164,34 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
 
         // Lets perform inference on 1000 random samples from the validation set
         let val_peptides: Vec<PeptideData> = sample_peptides(&val_peptides.as_ref().unwrap(), 1000);
-        let inference_results: Vec<PeptideData> = model.inference(
-            &val_peptides,
-            config.batch_size,
-            modifications,
-            norm_factor,
-        )?;
+        let inference_results: Vec<PeptideData> =
+            model.inference(&val_peptides, config.batch_size, modifications, norm_factor)?;
         let (true_rt, pred_rt): (Vec<f64>, Vec<f64>) = val_peptides
             .iter()
             .zip(&inference_results)
             .filter_map(|(true_pep, pred_pep)| {
                 match (true_pep.retention_time, pred_pep.retention_time) {
                     (Some(t), Some(p)) => {
-                        let t_denorm = t as f64 * norm_factor.unwrap().1 as f64 + norm_factor.unwrap().0 as f64;  // de-normalized true RT
-                        Some((t_denorm, p as f64))  // assume predicted is already de-normalized
-                    },
+                        let t_denorm = t as f64 * norm_factor.unwrap().1 as f64
+                            + norm_factor.unwrap().0 as f64; // de-normalized true RT
+                        Some((t_denorm, p as f64)) // assume predicted is already de-normalized
+                    }
                     _ => None,
                 }
             })
             .unzip();
 
-
         let scatter_plot = plot_scatter(
             &vec![true_rt.clone()],
             &vec![pred_rt.clone()],
             vec!["RT Prediction".to_string()],
             "Predicted vs True RT",
             "Target RT",
-            "Predicted RT"
-        ).unwrap();
+            "Predicted RT",
+        )
+        .unwrap();
         overview_section.add_plot(scatter_plot);
-        report.add_section(overview_section);    
+        report.add_section(overview_section);
     }
 
     /* Section 2: Configuration */
diff --git a/crates/redeem-properties/examples/alphapeptdeep_ccs_cnn_lstm.rs b/crates/redeem-properties/examples/alphapeptdeep_ccs_cnn_lstm.rs
index 63973da..5b7f52b 100644
--- a/crates/redeem-properties/examples/alphapeptdeep_ccs_cnn_lstm.rs
+++ b/crates/redeem-properties/examples/alphapeptdeep_ccs_cnn_lstm.rs
@@ -45,7 +45,7 @@ fn run_prediction(model: &mut CCSCNNLSTMModel, prediction_context: &PredictionCo
         None,
     ) {
         Ok(predictions) => {
-            if let PredictionResult::IMResult(ccs_preds) = predictions {  
+            if let PredictionResult::CCSResult(ccs_preds) = predictions {  
                 let total_error: f32 = ccs_preds
                     .iter()
                     .zip(prediction_context.observed_ccs.iter())
diff --git a/crates/redeem-properties/src/models/model_interface.rs b/crates/redeem-properties/src/models/model_interface.rs
index 120dd48..9cb4f84 100644
--- a/crates/redeem-properties/src/models/model_interface.rs
+++ b/crates/redeem-properties/src/models/model_interface.rs
@@ -132,7 +132,7 @@ impl Index<(usize, usize)> for PredictionValue {
 #[derive(Debug, Clone)]
 pub enum PredictionResult {
     RTResult(Vec<f32>),
-    IMResult(Vec<f32>),
+    CCSResult(Vec<f32>),
     MS2Result(Vec<Vec<Vec<f32>>>),
 }
 
@@ -140,7 +140,7 @@ impl PredictionResult {
     pub fn len(&self) -> usize {
         match self {
             PredictionResult::RTResult(vec) => vec.len(),
-            PredictionResult::IMResult(vec) => vec.len(),
+            PredictionResult::CCSResult(vec) => vec.len(),
             PredictionResult::MS2Result(vec) => vec.len(),
         }
     }
@@ -148,7 +148,7 @@ impl PredictionResult {
     pub fn get_prediction_entry(&self, index: usize) -> PredictionValue {
         match self {
             PredictionResult::RTResult(vec) => PredictionValue::Single(vec[index].clone()),
-            PredictionResult::IMResult(vec) => PredictionValue::Single(vec[index].clone()),
+            PredictionResult::CCSResult(vec) => PredictionValue::Single(vec[index].clone()),
             PredictionResult::MS2Result(vec) => PredictionValue::Matrix(vec[index].clone()),
         }
     }
@@ -266,7 +266,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             }
             PropertyType::CCS => {
                 let predictions: Vec<f32> = output.to_vec1()?;
-                Ok(PredictionResult::IMResult(predictions))
+                Ok(PredictionResult::CCSResult(predictions))
             }
             PropertyType::MS2 => {
                 let out = self.process_predictions(&output, self.get_min_pred_intensity())?;
@@ -447,7 +447,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         let num_batches = (training_data.len() + batch_size - 1) / batch_size;
     
         info!(
-            "Training {} model from scratch on {} peptide features ({} batches) for {} epochs",
+            "Training {} model from on {} peptide features ({} batches) for {} epochs",
             self.get_model_arch(),
             training_data.len(),
             num_batches,
@@ -491,7 +491,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                         PropertyType::RT => PredictionResult::RTResult(
                             batch_data.iter().map(|p| p.retention_time.unwrap_or_default()).collect(),
                         ),
-                        PropertyType::CCS => PredictionResult::IMResult(
+                        PropertyType::CCS => PredictionResult::CCSResult(
                             batch_data.iter().map(|p| p.ion_mobility.unwrap_or_default()).collect(),
                         ),
                         PropertyType::MS2 => {
@@ -500,7 +500,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                     };
     
                     let target_batch = match batch_targets {
-                        PredictionResult::RTResult(ref values) | PredictionResult::IMResult(ref values) => {
+                        PredictionResult::RTResult(ref values) | PredictionResult::CCSResult(ref values) => {
                             Tensor::new(values.clone(), &self.get_device())?
                         }
                         PredictionResult::MS2Result(_) => unreachable!(),
@@ -548,7 +548,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                             PropertyType::RT => PredictionResult::RTResult(
                                 batch_data.iter().map(|p| p.retention_time.unwrap_or_default()).collect(),
                             ),
-                            PropertyType::CCS => PredictionResult::IMResult(
+                            PropertyType::CCS => PredictionResult::CCSResult(
                                 batch_data.iter().map(|p| p.ion_mobility.unwrap_or_default()).collect(),
                             ),
                             PropertyType::MS2 => {
@@ -557,7 +557,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                         };
     
                         let target_val = match val_targets {
-                            PredictionResult::RTResult(ref values) | PredictionResult::IMResult(ref values) => {
+                            PredictionResult::RTResult(ref values) | PredictionResult::CCSResult(ref values) => {
                                 Tensor::new(values.clone(), &self.get_device())?
                             }
                             PredictionResult::MS2Result(_) => unreachable!(),
@@ -716,7 +716,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                             .map(|p| p.retention_time.unwrap_or_default())
                             .collect(),
                     ),
-                    PropertyType::CCS => PredictionResult::IMResult(
+                    PropertyType::CCS => PredictionResult::CCSResult(
                         batch_data
                             .iter()
                             .map(|p| p.ion_mobility.unwrap_or_default())
@@ -732,7 +732,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
 
                 let target_batch = match batch_targets {
                     PredictionResult::RTResult(ref values)
-                    | PredictionResult::IMResult(ref values) => {
+                    | PredictionResult::CCSResult(ref values) => {
                         Tensor::new(values.clone(), &self.get_device())?
                     }
                     PredictionResult::MS2Result(ref spectra) => {

From a6d944fbd0713c88384c3e7f643d2bc922d303d6 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Sun, 11 May 2025 00:14:59 -0400
Subject: [PATCH 32/75] refactor: Add new fields to load_peptide_data function
 in redeem-cli

---
 crates/redeem-cli/src/properties/load_data.rs | 39 +++++++++++++------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/crates/redeem-cli/src/properties/load_data.rs b/crates/redeem-cli/src/properties/load_data.rs
index 0c423ef..6e85e22 100644
--- a/crates/redeem-cli/src/properties/load_data.rs
+++ b/crates/redeem-cli/src/properties/load_data.rs
@@ -34,27 +34,39 @@ pub fn load_peptide_data<P: AsRef<Path>>(
         let record = result?;
 
         let sequence = record
-            .get(headers.iter().position(|h| h == "sequence").unwrap_or(2))
+            .get(headers.iter().position(|h| h.to_lowercase() == "sequence").unwrap_or(2))
             .unwrap_or("")
             .to_string();
 
         let retention_time = record
-            .get(headers.iter().position(|h| h == "retention time").unwrap_or(3))
+            .get(headers.iter().position(|h| h.to_lowercase() == "retention time").unwrap_or(3))
             .and_then(|s| s.parse::<f32>().ok());
 
         let charge = record
-            .get(headers.iter().position(|h| h == "charge").unwrap_or(usize::MAX))
+            .get(headers.iter().position(|h| h.to_lowercase() == "charge").unwrap_or(usize::MAX))
             .and_then(|s| s.parse::<i32>().ok());
 
+        let precursor_mass = record
+            .get(headers.iter().position(|h| h.to_lowercase() == "precursor_mass").unwrap_or(usize::MAX))
+            .and_then(|s| s.parse::<f32>().ok());
+
+        let ion_mobility = record
+            .get(headers.iter().position(|h| h.to_lowercase() == "ion_mobility").unwrap_or(usize::MAX))
+            .and_then(|s| s.parse::<f32>().ok());
+
+        let ccs = record
+            .get(headers.iter().position(|h| h.to_lowercase() == "ccs").unwrap_or(usize::MAX))
+            .and_then(|s| s.parse::<f32>().ok());
+
         let in_nce = nce.or_else(|| {
             record
-                .get(headers.iter().position(|h| h == "nce").unwrap_or(usize::MAX))
+                .get(headers.iter().position(|h| h.to_lowercase() == "nce").unwrap_or(usize::MAX))
                 .and_then(|s| s.parse::<i32>().ok())
         });
 
         let in_instrument = instrument.clone().or_else(|| {
             record
-                .get(headers.iter().position(|h| h == "instrument").unwrap_or(usize::MAX))
+                .get(headers.iter().position(|h| h.to_lowercase() == "instrument").unwrap_or(usize::MAX))
                 .map(|s| s.to_string())
         });
 
@@ -62,15 +74,17 @@ pub fn load_peptide_data<P: AsRef<Path>>(
             rt_values.push(rt);
         }
 
-        peptides.push(PeptideData::new(
-            &sequence,
+        peptides.push(PeptideData {
+            sequence,
             charge,
-            in_nce,
-            in_instrument.as_deref(),
+            precursor_mass,
+            nce: in_nce,
+            instrument: in_instrument,
             retention_time,
-            None,
-            None,
-        ));
+            ion_mobility,
+            ccs,
+            ms2_intensities: None
+        });
     }
 
     if normalize_rt && !rt_values.is_empty() {
@@ -93,3 +107,4 @@ pub fn load_peptide_data<P: AsRef<Path>>(
         Ok((peptides, None))
     }
 }
+

From b22f28d33f1c430d458c688a68b4db28ddf253a1 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Sun, 11 May 2025 02:40:40 -0400
Subject: [PATCH 33/75] refactor: Add stats module to redeem-properties crate,
 and add lr scheduler

---
 .../src/properties/train/trainer.rs           |  59 ++-
 .../redeem-properties/src/models/ccs_model.rs |   3 +-
 .../src/models/model_interface.rs             | 413 +++++++++++-------
 .../redeem-properties/src/models/rt_model.rs  |   3 +-
 crates/redeem-properties/src/utils/mod.rs     |   3 +-
 crates/redeem-properties/src/utils/stats.rs   | 231 ++++++++++
 crates/redeem-properties/src/utils/utils.rs   |  49 ++-
 7 files changed, 580 insertions(+), 181 deletions(-)
 create mode 100644 crates/redeem-properties/src/utils/stats.rs

diff --git a/crates/redeem-cli/src/properties/train/trainer.rs b/crates/redeem-cli/src/properties/train/trainer.rs
index 45ed0fe..2b2695b 100644
--- a/crates/redeem-cli/src/properties/train/trainer.rs
+++ b/crates/redeem-cli/src/properties/train/trainer.rs
@@ -131,7 +131,7 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
 
     let start_time = std::time::Instant::now();
     log::trace!("Training started");
-    let epoch_losses = model.train(
+    let train_step_metrics = model.train(
         &train_peptides,
         val_peptides.as_ref(),
         modifications.clone(),
@@ -142,13 +142,15 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
         config.early_stopping_patience,
     )?;
     log::info!("Training completed in {:?}", start_time.elapsed());
+    model.save(&config.output_file)?;
+    log::info!("Model saved to: {}", config.output_file);
 
     // Generate report
     let mut report = Report::new(
         "ReDeeM",
         &config.version,
         Some("https://github.com/singjc/redeem/blob/master/img/redeem_logo.png?raw=true"),
-        "ReDeeM Trainer Report",
+        format!("ReDeeM (:?) Trainer Report", config.model_arch).as_str(),
     );
 
     /* Section 1: Overview */
@@ -156,13 +158,44 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
         let mut overview_section = ReportSection::new("Overview");
 
         overview_section.add_content(html! {
-            "This report summarizes the training process of the ReDeeM model."
+            "This report summarizes the training process of the ReDeeM model. It includes epoch-level summaries and step-wise dynamics such as learning rate scheduling and accuracy tracking over time. These plots provide insight into model convergence behavior and training stability."
         });
 
+        let epoch_losses = train_step_metrics.summarize_loss_for_plotting();
         let losses_plot = plot_losses(&epoch_losses);
         overview_section.add_plot(losses_plot);
 
-        // Lets perform inference on 1000 random samples from the validation set
+        // Step-wise learning rate plot
+        let lr_plot = plot_training_metric(
+            &train_step_metrics,
+            "lr",
+            "Learning Rate Over Steps",
+            "Step",
+            "Learning Rate",
+        );
+        overview_section.add_plot(lr_plot);
+
+        // Step-wise loss plot
+        let step_loss_plot = plot_training_metric(
+            &train_step_metrics,
+            "loss",
+            "Loss Over Steps",
+            "Step",
+            "Loss",
+        );
+        overview_section.add_plot(step_loss_plot);
+
+        // Step-wise accuracy plot
+        let acc_plot = plot_training_metric(
+            &train_step_metrics,
+            "accuracy",
+            "Accuracy Over Steps",
+            "Step",
+            "Accuracy",
+        );
+        overview_section.add_plot(acc_plot);
+
+        // Inference scatter plot
         let val_peptides: Vec<PeptideData> = sample_peptides(&val_peptides.as_ref().unwrap(), 1000);
         let inference_results: Vec<PeptideData> =
             model.inference(&val_peptides, config.batch_size, modifications, norm_factor)?;
@@ -173,8 +206,8 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
                 match (true_pep.retention_time, pred_pep.retention_time) {
                     (Some(t), Some(p)) => {
                         let t_denorm = t as f64 * norm_factor.unwrap().1 as f64
-                            + norm_factor.unwrap().0 as f64; // de-normalized true RT
-                        Some((t_denorm, p as f64)) // assume predicted is already de-normalized
+                            + norm_factor.unwrap().0 as f64;
+                        Some((t_denorm, p as f64))
                     }
                     _ => None,
                 }
@@ -184,16 +217,18 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
         let scatter_plot = plot_scatter(
             &vec![true_rt.clone()],
             &vec![pred_rt.clone()],
-            vec!["RT Prediction".to_string()],
-            "Predicted vs True RT",
-            "Target RT",
-            "Predicted RT",
+            vec!["Prediction".to_string()],
+            "Predicted vs True (Random 1000 Validation Peptides)",
+            "Target",
+            "Predicted",
         )
         .unwrap();
         overview_section.add_plot(scatter_plot);
+
         report.add_section(overview_section);
     }
 
+
     /* Section 2: Configuration */
     {
         let mut config_section = ReportSection::new("Configuration");
@@ -221,9 +256,7 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
     let path = "redeem_trainer_report.html";
     report.save_to_file(&path.to_string())?;
 
-    model.save(&config.output_file)?;
-    log::info!("Model saved to: {}", config.output_file);
-
+    // Save configuration to JSON file
     let path = "redeem_trainer_config.json";
     let json = serde_json::to_string_pretty(&config)?;
     println!("{}", json);
diff --git a/crates/redeem-properties/src/models/ccs_model.rs b/crates/redeem-properties/src/models/ccs_model.rs
index 7befcf3..43b5c87 100644
--- a/crates/redeem-properties/src/models/ccs_model.rs
+++ b/crates/redeem-properties/src/models/ccs_model.rs
@@ -5,6 +5,7 @@ use crate::models::model_interface::{ModelInterface,PredictionResult};
 use crate::models::ccs_cnn_lstm_model::CCSCNNLSTMModel;
 use crate::models::ccs_cnn_tf_model::CCSCNNTFModel;
 use crate::utils::data_handling::PeptideData;
+use crate::utils::stats::TrainingStepMetrics;
 use std::collections::HashMap;
 use crate::utils::peptdeep_utils::ModificationMap;
 
@@ -45,7 +46,7 @@ impl CCSModelWrapper {
         self.model.predict(peptide_sequence, mods, mod_sites, Some(charge), None, None)
     }
 
-    pub fn train(&mut self, training_data: &Vec<PeptideData>, val_data: Option<&Vec<PeptideData>>, modifications: HashMap<(String, Option<char>), ModificationMap>, batch_size: usize, val_batch_size: usize, learning_rate: f64, epochs: usize, early_stopping_patience: usize) -> Result<Vec<(usize, f32, Option<f32>, f32, Option<f32>)>> {
+    pub fn train(&mut self, training_data: &Vec<PeptideData>, val_data: Option<&Vec<PeptideData>>, modifications: HashMap<(String, Option<char>), ModificationMap>, batch_size: usize, val_batch_size: usize, learning_rate: f64, epochs: usize, early_stopping_patience: usize) -> Result<TrainingStepMetrics> {
         self.model.train(training_data, val_data, modifications, batch_size, val_batch_size, learning_rate, epochs, early_stopping_patience)
     }
 
diff --git a/crates/redeem-properties/src/models/model_interface.rs b/crates/redeem-properties/src/models/model_interface.rs
index 9cb4f84..9d3f1ef 100644
--- a/crates/redeem-properties/src/models/model_interface.rs
+++ b/crates/redeem-properties/src/models/model_interface.rs
@@ -2,12 +2,10 @@ use crate::{
     building_blocks::featurize::{self, aa_indices_tensor, get_mod_features_from_parsed},
     models::{ccs_model::CCSModelWrapper, ms2_model::MS2ModelWrapper, rt_model::RTModelWrapper},
     utils::{
-        data_handling::PeptideData,
-        logging::Progress,
-        peptdeep_utils::{
+        data_handling::PeptideData, logging::Progress, peptdeep_utils::{
             get_modification_indices, get_modification_string, parse_instrument_index,
             remove_mass_shift,
-        },
+        }, stats::{compute_loss_stats, Metrics, TrainingPhase, TrainingStepMetrics}, utils::{CosineWithWarmup, LRScheduler}
     },
 };
 use anyhow::{Context, Result};
@@ -210,11 +208,11 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         Self: Sized;
 
     /// Create a new instance of the model (given a pretrained model (.pth or .safetensors and constants file).
-    /// 
+    ///
     /// # Arguments
     /// * `model_path` - Path to the model file (.pth or .safetensors).
     /// * `constants_path` - Optional path to the model constants file (.yaml). If none, will use the default constants.
-    /// 
+    ///
     fn new<P: AsRef<Path>>(
         model_path: P,
         constants_path: Option<P>,
@@ -252,9 +250,9 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         instrument: Option<Vec<String>>,
     ) -> Result<PredictionResult> {
         // Encode the batch of peptides
-        let input_tensor =
-            self.encode_peptides(peptide_sequences, mods, mod_sites, charge, nce, instrument)?
-                .to_device(self.get_device())?;
+        let input_tensor = self
+            .encode_peptides(peptide_sequences, mods, mod_sites, charge, nce, instrument)?
+            .to_device(self.get_device())?;
 
         // Forward pass through the model
         let output = self.forward(&input_tensor)?;
@@ -443,9 +441,11 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         learning_rate: f64,
         epochs: usize,
         early_stopping_patience: usize,
-    ) -> Result<Vec<(usize, f32, Option<f32>, f32, Option<f32>)>> {
+    ) -> Result<TrainingStepMetrics> {
         let num_batches = (training_data.len() + batch_size - 1) / batch_size;
-    
+        let total_steps = num_batches * epochs;
+        let warmup_steps = total_steps / 10; // 10% of total steps
+
         info!(
             "Training {} model from on {} peptide features ({} batches) for {} epochs",
             self.get_model_arch(),
@@ -453,138 +453,158 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             num_batches,
             epochs
         );
-    
+
+        let mut step_metrics = TrainingStepMetrics {
+            epochs: vec![],
+            steps: vec![],
+            learning_rates: vec![],
+            losses: vec![],
+            phases: vec![],
+            precisions: vec![],
+            recalls: vec![],
+            accuracies: vec![],
+        };
+        
+        let mut step_idx = 0;
+        let mut val_step_idx = 0;
+
         let params = candle_nn::ParamsAdamW {
             lr: learning_rate,
             ..Default::default()
         };
         let mut opt = candle_nn::AdamW::new(self.get_mut_varmap().all_vars(), params)?;
-    
+        let mut lr_scheduler = CosineWithWarmup::new(
+            learning_rate, 
+            warmup_steps, 
+            total_steps, 
+            0.5 // one full cosine cycle
+        );
+
         let mut best_val_loss = f32::INFINITY;
         let mut epochs_without_improvement = 0;
         let mut epoch_losses = vec![];
-    
+
         for epoch in 0..epochs {
             let progress = Progress::new(num_batches, &format!("[training] Epoch {}: ", epoch));
             let mut batch_losses = vec![];
-    
-            training_data
-                .chunks(batch_size)
-                .enumerate()
-                .try_for_each(|(_batch_idx, batch_data)| {
-                    let peptides: Vec<String> = batch_data.iter().map(|p| remove_mass_shift(&p.sequence)).collect();
-                    let mods: Vec<String> = batch_data.iter().map(|p| get_modification_string(&p.sequence, &modifications)).collect();
-                    let mod_sites: Vec<String> = batch_data.iter().map(|p| get_modification_indices(&p.sequence)).collect();
-    
-                    let charges = batch_data.iter().filter_map(|p| p.charge).collect::<Vec<_>>();
-                    let charges = if charges.len() == batch_data.len() { Some(charges) } else { None };
-    
-                    let nces = batch_data.iter().filter_map(|p| p.nce).collect::<Vec<_>>();
-                    let nces = if nces.len() == batch_data.len() { Some(nces) } else { None };
-    
-                    let instruments = batch_data.iter().filter_map(|p| p.instrument.clone()).collect::<Vec<_>>();
-                    let instruments = if instruments.len() == batch_data.len() { Some(instruments) } else { None };
-    
-                    let input_batch = self.encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments)?.to_device(self.get_device())?;
-    
-                    let batch_targets = match self.property_type() {
-                        PropertyType::RT => PredictionResult::RTResult(
-                            batch_data.iter().map(|p| p.retention_time.unwrap_or_default()).collect(),
-                        ),
-                        PropertyType::CCS => PredictionResult::CCSResult(
-                            batch_data.iter().map(|p| p.ion_mobility.unwrap_or_default()).collect(),
-                        ),
-                        PropertyType::MS2 => {
-                            return Err(anyhow::anyhow!("Training from scratch is not yet implemented for MS2"));
-                        }
-                    };
-    
-                    let target_batch = match batch_targets {
-                        PredictionResult::RTResult(ref values) | PredictionResult::CCSResult(ref values) => {
-                            Tensor::new(values.clone(), &self.get_device())?
-                        }
-                        PredictionResult::MS2Result(_) => unreachable!(),
-                    }.to_device(self.get_device())?;
-    
+
+            training_data.chunks(batch_size).enumerate().try_for_each(
+                |(_batch_idx, batch_data)| -> anyhow::Result<()> {
+                    let (input_batch, target_batch) =
+                        self.prepare_batch_inputs(batch_data, &modifications)?;
+
                     let predicted = self.forward(&input_batch)?;
                     let loss = candle_nn::loss::mse(&predicted, &target_batch)?;
                     opt.backward_step(&loss)?;
-    
+
+                    // Update learning rate after optimizer step
+                    opt.set_learning_rate(lr_scheduler.get_last_lr());
+                    lr_scheduler.step();
+
                     let loss_val = loss.to_vec0::<f32>().unwrap_or(999.0);
                     batch_losses.push(loss_val);
-    
-                    progress.update_description(&format!("[training] Epoch {}: Loss: {:.4}", epoch, loss_val));
+
+                    let predictions = predicted.to_vec1::<f32>()?;
+                    let targets = target_batch.to_vec1::<f32>()?;
+
+                    let acc = match self.property_type() {
+                        PropertyType::RT => Some(Metrics::accuracy(&predictions, &targets, 0.5)), // is predicted RT within 0.5 min of target RT?
+                        PropertyType::CCS => {
+                            let tol: Vec<f32> = targets.iter().map(|t| t * 0.02).collect();
+                            Some(Metrics::accuracy_dynamic(&predictions, &targets, &tol))
+                        }, // is predicted CCS within 2% of target CCS?
+                        _ => None,
+                    };
+                    
+                    step_metrics.epochs.push(epoch);
+                    step_metrics.steps.push(step_idx);
+                    step_metrics.learning_rates.push(lr_scheduler.get_last_lr() as f64);
+                    step_metrics.losses.push(loss_val);
+                    step_metrics.phases.push(TrainingPhase::Train);
+                    step_metrics.accuracies.push(acc);
+                    step_metrics.precisions.push(None);
+                    step_metrics.recalls.push(None);
+                    step_idx += 1;
+
+                    progress.update_description(&format!(
+                        "[training] Epoch {}: Loss: {:.4}",
+                        epoch, loss_val
+                    ));
                     progress.inc();
-    
+
                     Ok(())
-                })?;
-    
-            let avg_loss = batch_losses.iter().copied().sum::<f32>() / batch_losses.len() as f32;
-            let std_loss = (batch_losses.iter().map(|l| (l - avg_loss).powi(2)).sum::<f32>() / batch_losses.len() as f32).sqrt();
-    
+                },
+            )?;
+
+            let (avg_loss, std_loss) = compute_loss_stats(&batch_losses);
+
             if let Some(val_data) = validation_data {
-                let val_batches = (val_data.len() + validation_batch_size - 1) / validation_batch_size;
-                use rayon::prelude::*;
-    
-                let val_losses: Vec<f32> = val_data
+                let val_batches =
+                    (val_data.len() + validation_batch_size - 1) / validation_batch_size;
+
+                let val_results: Vec<(f32, usize, f64, Option<f32>)> = val_data
                     .par_chunks(validation_batch_size)
-                    .map(|batch_data| {
-                        let peptides: Vec<String> = batch_data.iter().map(|p| remove_mass_shift(&p.sequence)).collect();
-                        let mods: Vec<String> = batch_data.iter().map(|p| get_modification_string(&p.sequence, &modifications)).collect();
-                        let mod_sites: Vec<String> = batch_data.iter().map(|p| get_modification_indices(&p.sequence)).collect();
-    
-                        let charges = batch_data.iter().filter_map(|p| p.charge).collect::<Vec<_>>();
-                        let charges = if charges.len() == batch_data.len() { Some(charges) } else { None };
-    
-                        let nces = batch_data.iter().filter_map(|p| p.nce).collect::<Vec<_>>();
-                        let nces = if nces.len() == batch_data.len() { Some(nces) } else { None };
-    
-                        let instruments = batch_data.iter().filter_map(|p| p.instrument.clone()).collect::<Vec<_>>();
-                        let instruments = if instruments.len() == batch_data.len() { Some(instruments) } else { None };
-    
-                        let input_val = self.encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments)?.to_device(self.get_device())?;
-    
-                        let val_targets = match self.property_type() {
-                            PropertyType::RT => PredictionResult::RTResult(
-                                batch_data.iter().map(|p| p.retention_time.unwrap_or_default()).collect(),
-                            ),
-                            PropertyType::CCS => PredictionResult::CCSResult(
-                                batch_data.iter().map(|p| p.ion_mobility.unwrap_or_default()).collect(),
-                            ),
-                            PropertyType::MS2 => {
-                                return Err(anyhow::anyhow!("Validation not supported for MS2 yet"));
-                            }
-                        };
-    
-                        let target_val = match val_targets {
-                            PredictionResult::RTResult(ref values) | PredictionResult::CCSResult(ref values) => {
-                                Tensor::new(values.clone(), &self.get_device())?
-                            }
-                            PredictionResult::MS2Result(_) => unreachable!(),
-                        }.to_device(self.get_device())?;
-    
+                    .enumerate()
+                    .map(|(idx, batch_data)| {
+                        let (input_val, target_val) = self.prepare_batch_inputs(batch_data, &modifications)?;
                         let predicted = self.forward(&input_val)?;
                         let val_loss = candle_nn::loss::mse(&predicted, &target_val)?;
-                        Ok(val_loss.to_vec0::<f32>()?)
+                        let loss_val = val_loss.to_vec0::<f32>()?;
+                
+                        let predictions = predicted.to_vec1::<f32>()?;
+                        let targets = target_val.to_vec1::<f32>()?;
+                
+                        let acc = match self.property_type() {
+                            PropertyType::RT => Some(Metrics::accuracy(&predictions, &targets, 0.5)),
+                            PropertyType::CCS => {
+                                let tol: Vec<f32> = targets.iter().map(|t| t * 0.02).collect();
+                                Some(Metrics::accuracy_dynamic(&predictions, &targets, &tol))
+                            },
+                            _ => None,
+                        };
+                
+                        Ok((loss_val, idx, lr_scheduler.get_last_lr(), acc))
                     })
-                    .collect::<Result<Vec<f32>>>()?;
-    
-                let avg_val_loss = val_losses.iter().sum::<f32>() / val_losses.len() as f32;
-                let std_val_loss = (val_losses.iter().map(|l| (l - avg_val_loss).powi(2)).sum::<f32>() / val_losses.len() as f32).sqrt();
-    
-                epoch_losses.push((epoch, avg_loss, Some(avg_val_loss), std_loss, Some(std_val_loss)));
-    
+                    .collect::<Result<_>>()?;
+
+                for (val_loss, idx, lr, acc) in &val_results {
+                    step_metrics.epochs.push(epoch);
+                    step_metrics.steps.push(val_step_idx + idx);
+                    step_metrics.learning_rates.push(*lr);
+                    step_metrics.losses.push(*val_loss);
+                    step_metrics.phases.push(TrainingPhase::Validation);
+                    step_metrics.accuracies.push(*acc);
+                    step_metrics.precisions.push(None);
+                    step_metrics.recalls.push(None);
+                }
+                val_step_idx += val_results.len();
+
+                let val_losses: Vec<f32> = val_results.iter().map(|(loss, _, _, _)| *loss).collect();
+                let (avg_val_loss, std_val_loss): (f32, f32) = compute_loss_stats(&val_losses);
+
+                epoch_losses.push((
+                    epoch,
+                    avg_loss,
+                    Some(avg_val_loss),
+                    std_loss,
+                    Some(std_val_loss),
+                ));
+
                 progress.update_description(&format!(
                     "Epoch {}: Avg. Train Loss: {:.4} (±{:.4}) | Avg. Val. Loss: {:.4} (±{:.4})",
                     epoch, avg_loss, std_loss, avg_val_loss, std_val_loss
                 ));
                 progress.finish();
-    
+
                 if avg_val_loss < best_val_loss {
                     best_val_loss = avg_val_loss;
                     epochs_without_improvement = 0;
-    
-                    let checkpoint_path = format!("redeem_{}_ckpt_model_epoch_{}.safetensors", self.get_model_arch(), epoch);
+
+                    let checkpoint_path = format!(
+                        "redeem_{}_best_val_ckpt_model_epoch_{}.safetensors",
+                        self.get_model_arch(),
+                        epoch
+                    );
                     self.get_mut_varmap().save(&checkpoint_path)?;
                 } else {
                     epochs_without_improvement += 1;
@@ -592,20 +612,52 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                         info!("Early stopping triggered after {} epochs without validation loss improvement.", early_stopping_patience);
                         return Ok(epoch_losses);
                     }
+                    let checkpoint_path = format!(
+                        "redeem_{}_ckpt_model_epoch_{}.safetensors",
+                        self.get_model_arch(),
+                        epoch- 1
+                    );
+                    // Check if the prior checkpoint exists, if it does delete it
+                    if PathBuf::from(&checkpoint_path).exists() {
+                        std::fs::remove_file(&checkpoint_path)?;
+                    }
+                    // Save the current checkpoint
+                    let checkpoint_path = format!(
+                        "redeem_{}_ckpt_model_epoch_{}.safetensors",
+                        self.get_model_arch(),
+                        epoch
+                    );
+                    self.get_mut_varmap().save(&checkpoint_path)?;
                 }
             } else {
                 epoch_losses.push((epoch, avg_loss, None, std_loss, None));
-                progress.update_description(&format!("Epoch {}: Avg. Train Loss: {:.4} (±{:.4})", epoch, avg_loss, std_loss));
+                progress.update_description(&format!(
+                    "Epoch {}: Avg. Train Loss: {:.4} (±{:.4})",
+                    epoch, avg_loss, std_loss
+                ));
                 progress.finish();
-    
-                let checkpoint_path = format!("redeem_{}_ckpt_model_epoch_{}.safetensors", self.get_model_arch(), epoch);
+
+                let checkpoint_path = format!(
+                    "redeem_{}_ckpt_model_epoch_{}.safetensors",
+                    self.get_model_arch(),
+                    epoch- 1
+                );
+                // Check if the prior checkpoint exists, if it does delete it
+                if PathBuf::from(&checkpoint_path).exists() {
+                    std::fs::remove_file(&checkpoint_path)?;
+                }
+                // Save the current checkpoint
+                let checkpoint_path = format!(
+                    "redeem_{}_ckpt_model_epoch_{}.safetensors",
+                    self.get_model_arch(),
+                    epoch
+                );
                 self.get_mut_varmap().save(&checkpoint_path)?;
             }
         }
-    
-        Ok(epoch_losses)
+
+        Ok(step_metrics)
     }
-    
 
     /// Fine-tune the model on a batch of training data.
     ///
@@ -700,8 +752,9 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                     None
                 };
 
-                let input_batch =
-                    self.encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments)?.to_device(self.get_device())?;
+                let input_batch = self
+                    .encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments)?
+                    .to_device(self.get_device())?;
 
                 log::trace!(
                     "[ModelInterface::fine_tune] input_batch shape: {:?}, device: {:?}",
@@ -752,7 +805,8 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                             feature_dim,
                         ))?
                     }
-                }.to_device(self.get_device())?;
+                }
+                .to_device(self.get_device())?;
 
                 let predicted = self.forward(&input_batch)?;
                 let loss = candle_nn::loss::mse(&predicted, &target_batch)?;
@@ -795,32 +849,61 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             inference_data.len(),
             num_batches
         );
-    
+
         let progress = Progress::new(inference_data.len(), "[inference] Batch:");
         let mut result: Vec<Option<PeptideData>> = vec![None; inference_data.len()];
-    
+
         inference_data
             .par_chunks(batch_size)
             .enumerate()
             .map(|(batch_idx, batch_data)| {
                 let start_idx = batch_idx * batch_size;
-    
-                let peptides: Vec<String> = batch_data.iter().map(|p| remove_mass_shift(&p.sequence)).collect();
-                let mods: Vec<String> = batch_data.iter().map(|p| get_modification_string(&p.sequence, &modifications)).collect();
-                let mod_sites: Vec<String> = batch_data.iter().map(|p| get_modification_indices(&p.sequence)).collect();
-    
-                let charges = batch_data.iter().filter_map(|p| p.charge).collect::<Vec<_>>();
-                let charges = if charges.len() == batch_data.len() { Some(charges) } else { None };
-    
+
+                let peptides: Vec<String> = batch_data
+                    .iter()
+                    .map(|p| remove_mass_shift(&p.sequence))
+                    .collect();
+                let mods: Vec<String> = batch_data
+                    .iter()
+                    .map(|p| get_modification_string(&p.sequence, &modifications))
+                    .collect();
+                let mod_sites: Vec<String> = batch_data
+                    .iter()
+                    .map(|p| get_modification_indices(&p.sequence))
+                    .collect();
+
+                let charges = batch_data
+                    .iter()
+                    .filter_map(|p| p.charge)
+                    .collect::<Vec<_>>();
+                let charges = if charges.len() == batch_data.len() {
+                    Some(charges)
+                } else {
+                    None
+                };
+
                 let nces = batch_data.iter().filter_map(|p| p.nce).collect::<Vec<_>>();
-                let nces = if nces.len() == batch_data.len() { Some(nces) } else { None };
-    
-                let instruments = batch_data.iter().filter_map(|p| p.instrument.clone()).collect::<Vec<_>>();
-                let instruments = if instruments.len() == batch_data.len() { Some(instruments) } else { None };
-    
-                let input_tensor = self.encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments)?.to_device(self.get_device())?;
+                let nces = if nces.len() == batch_data.len() {
+                    Some(nces)
+                } else {
+                    None
+                };
+
+                let instruments = batch_data
+                    .iter()
+                    .filter_map(|p| p.instrument.clone())
+                    .collect::<Vec<_>>();
+                let instruments = if instruments.len() == batch_data.len() {
+                    Some(instruments)
+                } else {
+                    None
+                };
+
+                let input_tensor = self
+                    .encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments)?
+                    .to_device(self.get_device())?;
                 let output = self.forward(&input_tensor)?;
-    
+
                 match self.property_type() {
                     PropertyType::RT | PropertyType::CCS => {
                         let predictions = output.to_vec1()?;
@@ -831,21 +914,24 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                                 let mut peptide = batch_data[i].clone();
                                 match self.property_type() {
                                     PropertyType::RT => {
-                                        peptide.retention_time = if let Some((mean, std)) = rt_norm_params {
-                                            Some(pred * std + mean)
-                                        } else {
-                                            Some(pred)
-                                        };
+                                        peptide.retention_time =
+                                            if let Some((mean, std)) = rt_norm_params {
+                                                Some(pred * std + mean)
+                                            } else {
+                                                Some(pred)
+                                            };
                                     }
                                     PropertyType::CCS => peptide.ion_mobility = Some(pred),
                                     _ => {}
-                                };                                
+                                };
                                 (start_idx + i, peptide)
                             })
                             .collect();
                         Ok(updated)
                     }
-                    PropertyType::MS2 => Err(anyhow::anyhow!("Inference not supported for MS2 models in batch mode")),
+                    PropertyType::MS2 => Err(anyhow::anyhow!(
+                        "Inference not supported for MS2 models in batch mode"
+                    )),
                 }
             })
             .collect::<Result<Vec<Vec<(usize, PeptideData)>>>>()?
@@ -855,12 +941,41 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                 result[idx] = Some(peptide);
                 progress.inc();
             });
-    
+
         progress.finish();
         Ok(result.into_iter().flatten().collect())
     }
-    
-    
+
+    /// Extract encoded input and target tensor for a batch of peptides.
+    fn prepare_batch_inputs(
+        &self,
+        batch_data: &[PeptideData],
+        modifications: &HashMap<(String, Option<char>), crate::utils::peptdeep_utils::ModificationMap>,
+    ) -> Result<(Tensor, Tensor)> {
+        let peptides: Vec<String> = batch_data.par_iter().map(|p| remove_mass_shift(&p.sequence)).collect();
+        let mods: Vec<String> = batch_data.par_iter().map(|p| get_modification_string(&p.sequence, modifications)).collect();
+        let mod_sites: Vec<String> = batch_data.par_iter().map(|p| get_modification_indices(&p.sequence)).collect();
+
+        let charges = batch_data.par_iter().filter_map(|p| p.charge).collect::<Vec<_>>();
+        let charges = if charges.len() == batch_data.len() { Some(charges) } else { None };
+
+        let nces = batch_data.par_iter().filter_map(|p| p.nce).collect::<Vec<_>>();
+        let nces = if nces.len() == batch_data.len() { Some(nces) } else { None };
+
+        let instruments = batch_data.par_iter().filter_map(|p| p.instrument.clone()).collect::<Vec<_>>();
+        let instruments = if instruments.len() == batch_data.len() { Some(instruments) } else { None };
+
+        let input_batch = self.encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments)?.to_device(self.get_device())?;
+
+        let target_values: Vec<f32> = match self.property_type() {
+            PropertyType::RT => batch_data.par_iter().map(|p| p.retention_time.unwrap_or_default()).collect(),
+            PropertyType::CCS => batch_data.par_iter().map(|p| p.ion_mobility.unwrap_or_default()).collect(),
+            PropertyType::MS2 => return Err(anyhow::anyhow!("MS2 training is not yet implemented")),
+        };
+
+        let target_tensor = Tensor::new(target_values, &self.get_device())?;
+        Ok((input_batch, target_tensor))
+    }
 
     /// Set model to evaluation mode for inference
     /// This disables dropout and other training-specific layers.
@@ -936,6 +1051,8 @@ pub trait ModelInterface: Send + Sync + ModelClone {
     }
 }
 
+
+
 /// Parameters for the `predict` method of a `ModelInterface` implementation.
 #[derive(Clone)]
 pub struct Parameters {
diff --git a/crates/redeem-properties/src/models/rt_model.rs b/crates/redeem-properties/src/models/rt_model.rs
index f4a9643..3b9672c 100644
--- a/crates/redeem-properties/src/models/rt_model.rs
+++ b/crates/redeem-properties/src/models/rt_model.rs
@@ -9,6 +9,7 @@ use crate::models::model_interface::{ModelInterface,PredictionResult};
 use crate::models::rt_cnn_lstm_model::RTCNNLSTMModel;
 use crate::models::rt_cnn_transformer_model::RTCNNTFModel;
 use crate::utils::data_handling::PeptideData;
+use crate::utils::stats::TrainingStepMetrics;
 use std::collections::HashMap;
 use crate::utils::peptdeep_utils::ModificationMap;
 
@@ -50,7 +51,7 @@ impl RTModelWrapper {
         self.model.predict(peptide_sequence, mods, mod_sites, None, None, None)
     }
 
-    pub fn train(&mut self, training_data: &Vec<PeptideData>, val_data: Option<&Vec<PeptideData>>, modifications: HashMap<(String, Option<char>), ModificationMap>, batch_size: usize, val_batch_size: usize, learning_rate: f64, epochs: usize, early_stopping_patience: usize) -> Result<Vec<(usize, f32, Option<f32>, f32, Option<f32>)>> {
+    pub fn train(&mut self, training_data: &Vec<PeptideData>, val_data: Option<&Vec<PeptideData>>, modifications: HashMap<(String, Option<char>), ModificationMap>, batch_size: usize, val_batch_size: usize, learning_rate: f64, epochs: usize, early_stopping_patience: usize) -> Result<TrainingStepMetrics> {
         self.model.train(training_data, val_data, modifications, batch_size, val_batch_size, learning_rate, epochs, early_stopping_patience)
     }
 
diff --git a/crates/redeem-properties/src/utils/mod.rs b/crates/redeem-properties/src/utils/mod.rs
index 10069ff..139fa1b 100644
--- a/crates/redeem-properties/src/utils/mod.rs
+++ b/crates/redeem-properties/src/utils/mod.rs
@@ -1,4 +1,5 @@
 pub mod peptdeep_utils;
 pub mod logging;
 pub mod utils;
-pub mod data_handling;
\ No newline at end of file
+pub mod data_handling;
+pub mod stats;
\ No newline at end of file
diff --git a/crates/redeem-properties/src/utils/stats.rs b/crates/redeem-properties/src/utils/stats.rs
new file mode 100644
index 0000000..da2318a
--- /dev/null
+++ b/crates/redeem-properties/src/utils/stats.rs
@@ -0,0 +1,231 @@
+/// Represents a single phase of training: either Training or Validation.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub enum TrainingPhase {
+    Train,
+    Validation,
+}
+
+/// Stores step-wise metrics for all training/validation iterations in a Struct of Arrays layout.
+#[derive(Debug, Clone)]
+pub struct TrainingStepMetrics {
+    pub epochs: Vec<usize>,
+    pub steps: Vec<usize>,
+    pub learning_rates: Vec<f64>,
+    pub losses: Vec<f32>,
+    pub phases: Vec<TrainingPhase>,
+    pub precisions: Vec<Option<f32>>,
+    pub recalls: Vec<Option<f32>>,
+    pub accuracies: Vec<Option<f32>>,
+}
+
+impl TrainingStepMetrics {
+    /// Computes the average and standard deviation of loss values grouped by epoch and training phase.
+    ///
+    /// # Returns
+    /// A `HashMap` where each key is a tuple `(epoch, TrainingPhase)` and each value is a tuple `(avg_loss, std_loss)`.
+    /// This can be used for reporting or plotting epoch-level training and validation loss trends.
+    pub fn summarize_by_epoch_phase(
+        &self,
+    ) -> std::collections::HashMap<(usize, TrainingPhase), (f32, f32)> {
+        use std::collections::HashMap;
+
+        let mut grouped: HashMap<(usize, TrainingPhase), Vec<f32>> = HashMap::new();
+
+        for i in 0..self.epochs.len() {
+            let key = (self.epochs[i], self.phases[i].clone());
+            grouped.entry(key).or_default().push(self.losses[i]);
+        }
+
+        let mut summary = HashMap::new();
+        for (key, values) in grouped {
+            let avg = values.iter().copied().sum::<f32>() / values.len() as f32;
+            let std = (values.iter().map(|v| (v - avg).powi(2)).sum::<f32>() / values.len() as f32)
+                .sqrt();
+            summary.insert(key, (avg, std)); // insert avg/std loss for this epoch + phase
+        }
+
+        summary
+    }
+
+    /// Summarizes average and std loss per epoch for training and validation phases.
+    ///
+    /// Returns a vector of tuples:
+    /// (epoch, avg_train_loss, avg_val_loss, std_train_loss, std_val_loss)
+    pub fn summarize_loss_for_plotting(&self) -> Vec<(usize, f32, Option<f32>, f32, Option<f32>)> {
+        use std::collections::HashMap;
+
+        let mut train_map: HashMap<usize, Vec<f32>> = HashMap::new();
+        let mut val_map: HashMap<usize, Vec<f32>> = HashMap::new();
+
+        for i in 0..self.epochs.len() {
+            match self.phases[i] {
+                TrainingPhase::Train => train_map.entry(self.epochs[i]).or_default().push(self.losses[i]),
+                TrainingPhase::Validation => val_map.entry(self.epochs[i]).or_default().push(self.losses[i]),
+            }
+        }
+
+        let mut epochs: Vec<_> = train_map.keys().chain(val_map.keys()).copied().collect();
+        epochs.sort_unstable();
+        epochs.dedup();
+
+        epochs
+            .into_iter()
+            .map(|epoch| {
+                let (avg_train, std_train) = train_map.get(&epoch)
+                    .map(|v| compute_loss_stats(v))
+                    .unwrap_or((f32::NAN, f32::NAN));
+                let (avg_val, std_val) = val_map.get(&epoch)
+                    .map(|v| compute_loss_stats(v))
+                    .map_or((None, None), |(avg, std)| (Some(avg), Some(std)));
+
+                (epoch, avg_train, avg_val, std_train, std_val)
+            })
+            .collect()
+    }
+
+    /// Computes the average and standard deviation of precision, recall, and accuracy values grouped by epoch and training phase.
+    ///
+    /// # Returns
+    /// A `HashMap` where each key is a tuple `(epoch, TrainingPhase)` and each value is a tuple of:
+    /// `(avg_precision, std_precision, avg_recall, std_recall, avg_accuracy, std_accuracy)`.
+    pub fn summarize_metrics_by_epoch_phase(
+        &self,
+    ) -> std::collections::HashMap<
+        (usize, TrainingPhase),
+        (
+            Option<f32>,
+            Option<f32>,
+            Option<f32>,
+            Option<f32>,
+            Option<f32>,
+            Option<f32>,
+        ),
+    > {
+        use std::collections::{HashMap, HashSet};
+
+        let mut prec_map: HashMap<(usize, TrainingPhase), Vec<f32>> = HashMap::new();
+        let mut rec_map: HashMap<(usize, TrainingPhase), Vec<f32>> = HashMap::new();
+        let mut acc_map: HashMap<(usize, TrainingPhase), Vec<f32>> = HashMap::new();
+
+        for i in 0..self.epochs.len() {
+            let key = (self.epochs[i], self.phases[i].clone());
+            if let Some(p) = self.precisions[i] {
+                prec_map.entry(key.clone()).or_default().push(p);
+            }
+            if let Some(r) = self.recalls[i] {
+                rec_map.entry(key.clone()).or_default().push(r);
+            }
+            if let Some(a) = self.accuracies[i] {
+                acc_map.entry(key.clone()).or_default().push(a);
+            }
+        }
+
+        let mut result = HashMap::new();
+        let keys: HashSet<_> = self
+            .epochs
+            .iter()
+            .zip(&self.phases)
+            .map(|(e, p)| (*e, p.clone()))
+            .collect();
+
+        let summarize = |vals: &Vec<f32>| {
+            let avg = vals.iter().copied().sum::<f32>() / vals.len() as f32;
+            let std =
+                (vals.iter().map(|v| (v - avg).powi(2)).sum::<f32>() / vals.len() as f32).sqrt();
+            (avg, std)
+        };
+
+        for key in keys {
+            let (prec_avg, prec_std) = prec_map
+                .get(&key)
+                .map(summarize)
+                .map_or((None, None), |(a, s)| (Some(a), Some(s)));
+            let (rec_avg, rec_std) = rec_map
+                .get(&key)
+                .map(summarize)
+                .map_or((None, None), |(a, s)| (Some(a), Some(s)));
+            let (acc_avg, acc_std) = acc_map
+                .get(&key)
+                .map(summarize)
+                .map_or((None, None), |(a, s)| (Some(a), Some(s)));
+
+            result.insert(
+                key,
+                (prec_avg, prec_std, rec_avg, rec_std, acc_avg, acc_std),
+            );
+        }
+
+        result
+    }
+}
+
+
+/// Utility functions for evaluating prediction metrics.
+pub struct Metrics;
+
+impl Metrics {
+    /// Computes accuracy as the proportion of predictions within a tolerance of the target.
+    pub fn accuracy(pred: &[f32], target: &[f32], tolerance: f32) -> f32 {
+        let correct = pred.iter().zip(target).filter(|(p, t)| (*p - *t).abs() <= tolerance).count();
+        correct as f32 / pred.len() as f32
+    }
+
+    /// Computes accuracy as the proportion of predictions within a dynamic tolerance of the target.
+    pub fn accuracy_dynamic(pred: &[f32], target: &[f32], tolerance: &[f32]) -> f32 {
+        pred.iter()
+            .zip(target)
+            .zip(tolerance)
+            .filter(|((p, t), tol)| (*p - *t).abs() <= **tol)
+            .count() as f32 / pred.len() as f32
+    }
+   
+    /// Computes precision as TP / (TP + FP), based on a binary threshold.
+    pub fn precision(pred: &[f32], target: &[f32], threshold: f32) -> Option<f32> {
+        let mut tp = 0;
+        let mut fp = 0;
+        for (&p, &t) in pred.iter().zip(target) {
+            if p > threshold {
+                if t > threshold {
+                    tp += 1;
+                } else {
+                    fp += 1;
+                }
+            }
+        }
+        if tp + fp > 0 {
+            Some(tp as f32 / (tp + fp) as f32)
+        } else {
+            None
+        }
+    }
+
+    /// Computes recall as TP / (TP + FN), based on a binary threshold.
+    pub fn recall(pred: &[f32], target: &[f32], threshold: f32) -> Option<f32> {
+        let mut tp = 0;
+        let mut fn_ = 0;
+        for (&p, &t) in pred.iter().zip(target) {
+            if t > threshold {
+                if p > threshold {
+                    tp += 1;
+                } else {
+                    fn_ += 1;
+                }
+            }
+        }
+        if tp + fn_ > 0 {
+            Some(tp as f32 / (tp + fn_) as f32)
+        } else {
+            None
+        }
+    }
+}
+
+
+/// Compute average and std deviation from a slice of loss values.
+pub fn compute_loss_stats(losses: &[f32]) -> (f32, f32) 
+{
+    
+    let avg = losses.iter().copied().sum::<f32>() / losses.len() as f32;
+    let std = (losses.iter().map(|l| (l - avg).powi(2)).sum::<f32>() / losses.len() as f32).sqrt();
+    (avg, std)
+}
\ No newline at end of file
diff --git a/crates/redeem-properties/src/utils/utils.rs b/crates/redeem-properties/src/utils/utils.rs
index 5b21291..98824a6 100644
--- a/crates/redeem-properties/src/utils/utils.rs
+++ b/crates/redeem-properties/src/utils/utils.rs
@@ -3,14 +3,27 @@ use candle_core::utils::{cuda_is_available, metal_is_available};
 use anyhow::{Result, anyhow};
 use std::f64::consts::PI;
 
+// Learning rate scheduler trait
+/// Trait representing a learning rate scheduler that can be updated each step
+/// and queried for the current learning rate.
 pub trait LRScheduler {
-    /// Update the learning rate based on the current step
     fn step(&mut self);
-    
-    /// Get the current learning rate
     fn get_last_lr(&self) -> f64;
 }
 
+// Cosine decay with warmup
+/// Cosine learning rate scheduler with linear warmup phase.
+///
+/// This scheduler increases the learning rate linearly from 0 to `initial_lr`
+/// over `num_warmup_steps`, then decays it using cosine annealing over the
+/// remaining training steps, optionally over multiple cycles.
+///
+/// # Fields
+/// * `initial_lr` - The peak learning rate after warmup.
+/// * `current_step` - Internal counter of the current step.
+/// * `num_warmup_steps` - Number of steps to warm up the learning rate.
+/// * `num_training_steps` - Total number of training steps.
+/// * `num_cycles` - Number of cosine cycles in the annealing phase.
 pub struct CosineWithWarmup {
     initial_lr: f64,
     current_step: usize,
@@ -20,32 +33,33 @@ pub struct CosineWithWarmup {
 }
 
 impl CosineWithWarmup {
-    pub fn new(
-        initial_lr: f64,
-        num_warmup_steps: usize,
-        num_training_steps: usize,
-        num_cycles: f64,
-    ) -> Self {
+    /// Create a new `CosineWithWarmup` scheduler.
+    ///
+    /// # Arguments
+    /// * `initial_lr` - Maximum learning rate after warmup.
+    /// * `num_warmup_steps` - Number of steps to linearly increase the learning rate.
+    /// * `num_training_steps` - Total number of training steps.
+    /// * `num_cycles` - Number of cosine cycles during decay.
+    pub fn new(initial_lr: f64, num_warmup_steps: usize, num_training_steps: usize, num_cycles: f64) -> Self {
         Self {
             initial_lr,
             current_step: 0,
-            num_warmup_steps: num_warmup_steps,
+            num_warmup_steps,
             num_training_steps,
             num_cycles,
         }
     }
 
+    /// Computes the learning rate for the current step based on warmup and cosine decay.
     fn get_lr(&self) -> f64 {
         if self.current_step < self.num_warmup_steps {
-            // Linear warmup
             return self.initial_lr * (self.current_step as f64) / (self.num_warmup_steps as f64);
         }
 
-        let progress = (self.current_step - self.num_warmup_steps) as f64 
-            / (self.num_training_steps - self.num_warmup_steps) as f64;
-        
-        // Cosine decay
-        let cosine_decay = 0.5 * (1.0 + (PI * self.num_cycles * 2.0 * progress).cos());
+        let progress = (self.current_step - self.num_warmup_steps) as f64
+            / (self.num_training_steps - self.num_warmup_steps).max(1) as f64;
+
+        let cosine_decay = 0.5 * (1.0 + (std::f64::consts::PI * self.num_cycles * 2.0 * progress).cos());
         self.initial_lr * cosine_decay.max(1e-10)
     }
 }
@@ -54,12 +68,13 @@ impl LRScheduler for CosineWithWarmup {
     fn step(&mut self) {
         self.current_step += 1;
     }
-    
+
     fn get_last_lr(&self) -> f64 {
         self.get_lr()
     }
 }
 
+
 /// Converts a device string to a Candle Device.
 ///
 /// # Supported Device Strings

From 5891b4e3c0f4954c1c76bf4d1b3d97941b295cf9 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Sun, 11 May 2025 02:40:52 -0400
Subject: [PATCH 34/75] refactor: Add precursor mass field to PeptideData
 struct in redeem-properties crate

---
 crates/redeem-properties/src/utils/data_handling.rs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/crates/redeem-properties/src/utils/data_handling.rs b/crates/redeem-properties/src/utils/data_handling.rs
index 11520ce..6b09c27 100644
--- a/crates/redeem-properties/src/utils/data_handling.rs
+++ b/crates/redeem-properties/src/utils/data_handling.rs
@@ -3,22 +3,26 @@
 pub struct PeptideData {
     pub sequence: String,
     pub charge: Option<i32>,
+    pub precursor_mass: Option<f32>,
     pub nce: Option<i32>,
     pub instrument: Option<String>,
     pub retention_time: Option<f32>,
     pub ion_mobility: Option<f32>,
+    pub ccs: Option<f32>,
     pub ms2_intensities: Option<Vec<Vec<f32>>>,
 }
 
 impl PeptideData {
-    pub fn new(sequence: &str, charge: Option<i32>, nce: Option<i32>, instrument: Option<&str>, retention_time: Option<f32>, ion_mobility: Option<f32>, ms2_intensities: Option<Vec<Vec<f32>>>) -> Self {
+    pub fn new(sequence: &str, charge: Option<i32>, precursor_mass: Option<f32>, nce: Option<i32>, instrument: Option<&str>, retention_time: Option<f32>, ion_mobility: Option<f32>, ccs: Option<f32>, ms2_intensities: Option<Vec<Vec<f32>>>) -> Self {
         Self {
             sequence: sequence.to_string(),
             charge,
+            precursor_mass,
             nce,
             instrument: instrument.map(|s| s.to_string()),
             retention_time,
             ion_mobility,
+            ccs,
             ms2_intensities
         }
     }

From 874e441aaf1c0ed46e85ff7599d7e94498f320b9 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Sun, 11 May 2025 02:41:00 -0400
Subject: [PATCH 35/75] refactor: Add plot_training_metric function to
 redeem-cli crate

---
 .../redeem-cli/src/properties/train/plot.rs   | 66 +++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/crates/redeem-cli/src/properties/train/plot.rs b/crates/redeem-cli/src/properties/train/plot.rs
index f5b7b80..0f27576 100644
--- a/crates/redeem-cli/src/properties/train/plot.rs
+++ b/crates/redeem-cli/src/properties/train/plot.rs
@@ -1,5 +1,6 @@
 use plotly::{Layout, Plot, Scatter};
 use plotly::common::{Fill, Mode, Title};
+use crate::training::{TrainingStepMetrics, TrainingPhase};
 
 pub fn plot_losses(
     epoch_losses: &[(usize, f32, Option<f32>, f32, Option<f32>)]
@@ -77,3 +78,68 @@ pub fn plot_losses(
 
     plot
 }
+
+
+
+/// Plot a single training metric (e.g. loss, learning rate, accuracy) over steps.
+pub fn plot_training_metric(
+    metrics: &TrainingStepMetrics,
+    metric_name: &str,
+    title: &str,
+    x_title: &str,
+    y_title: &str,
+) -> Plot {
+    let mut plot = Plot::new();
+
+    let mut train_x = vec![];
+    let mut train_y = vec![];
+    let mut val_x = vec![];
+    let mut val_y = vec![];
+
+    for i in 0..metrics.steps.len() {
+        let x = metrics.steps[i] as f64;
+        let y_opt = match metric_name {
+            "loss" => Some(metrics.losses[i] as f64),
+            "lr" => Some(metrics.learning_rates[i]),
+            "accuracy" => metrics.accuracies[i].map(|a| a as f64),
+            _ => None,
+        };
+
+        if let Some(y) = y_opt {
+            match metrics.phases[i] {
+                TrainingPhase::Train => {
+                    train_x.push(x);
+                    train_y.push(y);
+                }
+                TrainingPhase::Validation => {
+                    val_x.push(x);
+                    val_y.push(y);
+                }
+            }
+        }
+    }
+
+    if !train_x.is_empty() {
+        plot.add_trace(
+            Scatter::new(train_x.clone(), train_y.clone())
+                .mode(Mode::Lines)
+                .name("Train"),
+        );
+    }
+    if !val_x.is_empty() {
+        plot.add_trace(
+            Scatter::new(val_x.clone(), val_y.clone())
+                .mode(Mode::Lines)
+                .name("Validation"),
+        );
+    }
+
+    plot.set_layout(
+        Layout::new()
+            .title(Title::new().text(title))
+            .x_axis(plotly::layout::Axis::new().title(x_title))
+            .y_axis(plotly::layout::Axis::new().title(y_title))
+    );
+
+    plot
+}

From 487f6b7c60c7585a198f19e81900b7387146edf4 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Sun, 11 May 2025 02:43:35 -0400
Subject: [PATCH 36/75] refactor: Update early stopping logic in ModelInterface
 implementation

---
 crates/redeem-properties/src/models/model_interface.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/redeem-properties/src/models/model_interface.rs b/crates/redeem-properties/src/models/model_interface.rs
index 9d3f1ef..9ea9273 100644
--- a/crates/redeem-properties/src/models/model_interface.rs
+++ b/crates/redeem-properties/src/models/model_interface.rs
@@ -610,7 +610,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                     epochs_without_improvement += 1;
                     if epochs_without_improvement >= early_stopping_patience {
                         info!("Early stopping triggered after {} epochs without validation loss improvement.", early_stopping_patience);
-                        return Ok(epoch_losses);
+                        return Ok(step_metrics);
                     }
                     let checkpoint_path = format!(
                         "redeem_{}_ckpt_model_epoch_{}.safetensors",

From 30ad1025640be3e6ac99194d1849004070527477 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Mon, 12 May 2025 00:08:31 -0400
Subject: [PATCH 37/75] refactor: Update plot_losses function in redeem-cli
 crate

---
 crates/redeem-cli/src/properties/train/plot.rs    | 5 +++--
 crates/redeem-cli/src/properties/train/trainer.rs | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/crates/redeem-cli/src/properties/train/plot.rs b/crates/redeem-cli/src/properties/train/plot.rs
index 0f27576..dab78ff 100644
--- a/crates/redeem-cli/src/properties/train/plot.rs
+++ b/crates/redeem-cli/src/properties/train/plot.rs
@@ -1,6 +1,7 @@
 use plotly::{Layout, Plot, Scatter};
 use plotly::common::{Fill, Mode, Title};
-use crate::training::{TrainingStepMetrics, TrainingPhase};
+use redeem_properties::utils::stats::{TrainingStepMetrics, TrainingPhase};
+
 
 pub fn plot_losses(
     epoch_losses: &[(usize, f32, Option<f32>, f32, Option<f32>)]
@@ -136,7 +137,7 @@ pub fn plot_training_metric(
 
     plot.set_layout(
         Layout::new()
-            .title(Title::new().text(title))
+            .title(title)
             .x_axis(plotly::layout::Axis::new().title(x_title))
             .y_axis(plotly::layout::Axis::new().title(y_title))
     );
diff --git a/crates/redeem-cli/src/properties/train/trainer.rs b/crates/redeem-cli/src/properties/train/trainer.rs
index 2b2695b..cbd523c 100644
--- a/crates/redeem-cli/src/properties/train/trainer.rs
+++ b/crates/redeem-cli/src/properties/train/trainer.rs
@@ -15,7 +15,7 @@ use report_builder::{
 };
 
 use crate::properties::load_data;
-use crate::properties::train::plot::plot_losses;
+use crate::properties::train::plot::{plot_losses, plot_training_metric};
 use crate::properties::train::sample_peptides;
 use crate::properties::util::write_bytes_to_file;
 use input::PropertyTrainConfig;
@@ -150,7 +150,7 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
         "ReDeeM",
         &config.version,
         Some("https://github.com/singjc/redeem/blob/master/img/redeem_logo.png?raw=true"),
-        format!("ReDeeM (:?) Trainer Report", config.model_arch).as_str(),
+        &format!("ReDeeM {:?} Trainer Report", config.model_arch),
     );
 
     /* Section 1: Overview */

From f1c74a5eb0d9ad4751cabbd6edfbaedb1d6d5d44 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Mon, 12 May 2025 22:14:26 -0400
Subject: [PATCH 38/75] add: RT Norm struct to set type of normalization

---
 .../src/properties/inference/inference.rs     | 54 ++++++++++++++++---
 crates/redeem-cli/src/properties/load_data.rs | 46 +++++++++-------
 .../src/properties/train/trainer.rs           | 16 +++---
 .../src/utils/data_handling.rs                | 19 +++++++
 4 files changed, 101 insertions(+), 34 deletions(-)

diff --git a/crates/redeem-cli/src/properties/inference/inference.rs b/crates/redeem-cli/src/properties/inference/inference.rs
index 876af7e..9f642c8 100644
--- a/crates/redeem-cli/src/properties/inference/inference.rs
+++ b/crates/redeem-cli/src/properties/inference/inference.rs
@@ -1,5 +1,10 @@
 use anyhow::{Context, Result};
-use redeem_properties::utils::data_handling::PeptideData;
+use redeem_properties::models::ccs_cnn_lstm_model::CCSCNNLSTMModel;
+use redeem_properties::models::ccs_cnn_tf_model::CCSCNNTFModel;
+use redeem_properties::models::ccs_model::load_collision_cross_section_model;
+use redeem_properties::models::rt_cnn_lstm_model::RTCNNLSTMModel;
+use redeem_properties::models::model_interface::ModelInterface;
+use redeem_properties::utils::data_handling::{PeptideData, RTNormalization};
 use redeem_properties::utils::peptdeep_utils::load_modifications;
 use redeem_properties::utils::utils::get_device;
 use redeem_properties::models::rt_model::load_retention_time_model;
@@ -12,19 +17,52 @@ use crate::properties::inference::output::write_peptide_data;
 pub fn run_inference(config: &PropertyInferenceConfig) -> Result<()> {
 
     // Load inference data
-    let (inference_data, norm_factor) = load_peptide_data(&config.inference_data, Some(config.nce), Some(config.instrument.clone()), true)?;
+    let (inference_data, norm_factor) = load_peptide_data(&config.inference_data, Some(config.nce), Some(config.instrument.clone()), Some("min_max".to_string()))?;
     log::info!("Loaded {} peptides", inference_data.len());
 
     // Dispatch model training based on architecture
     let model_arch = config.model_arch.as_str();
     let device = get_device(&config.device)?;
 
-    let mut model = load_retention_time_model(
-        &config.model_path,
-        None,
-        &config.model_arch,
-        device.clone(),
-    )?;
+    let mut model: Box<dyn ModelInterface + Send + Sync> = match model_arch {
+        "rt_cnn_lstm" => Box::new(RTCNNLSTMModel::new(
+            &config.model_path,
+            None,
+            0,
+            8,
+            4,
+            true,
+            device.clone(),
+        )?),
+        "rt_cnn_tf" => Box::new(RTCNNLSTMModel::new(
+            &config.model_path,
+            None,
+            0,
+            8,
+            4,
+            true,
+            device.clone(),
+        )?),
+        "ccs_cnn_lstm" => Box::new(CCSCNNLSTMModel::new(
+                &config.model_path,
+                None,
+                0,
+                8,
+                4,
+                true,
+                device.clone(),
+                )?),
+        "ccs_cnn_tf" => Box::new(CCSCNNTFModel::new(
+            &config.model_path,
+            None,
+            0,
+            8,
+            4,
+            true,
+            device.clone(),
+        )?),
+        _ => return Err(anyhow::anyhow!("Unsupported RT model architecture: {}", model_arch)),
+    };
 
     let modifications = load_modifications().context("Failed to load modifications")?;
 
diff --git a/crates/redeem-cli/src/properties/load_data.rs b/crates/redeem-cli/src/properties/load_data.rs
index 6e85e22..0646320 100644
--- a/crates/redeem-cli/src/properties/load_data.rs
+++ b/crates/redeem-cli/src/properties/load_data.rs
@@ -3,7 +3,8 @@ use std::path::Path;
 use std::io::BufReader;
 use anyhow::{Result, Context};
 use csv::ReaderBuilder;
-use redeem_properties::utils::data_handling::PeptideData;
+use redeem_properties::utils::data_handling::{PeptideData, RTNormalization};
+
 
 /// Load peptide training data from a CSV or TSV file and optionally normalize RT.
 ///
@@ -12,8 +13,8 @@ pub fn load_peptide_data<P: AsRef<Path>>(
     path: P,
     nce: Option<i32>,
     instrument: Option<String>,
-    normalize_rt: bool,
-) -> Result<(Vec<PeptideData>, Option<(f32, f32)>)> {
+    normalize_rt: Option<String>,
+) -> Result<(Vec<PeptideData>, RTNormalization)> {
     let file = File::open(&path)
         .with_context(|| format!("Failed to open file: {:?}", path.as_ref()))?;
     let reader = BufReader::new(file);
@@ -83,28 +84,33 @@ pub fn load_peptide_data<P: AsRef<Path>>(
             retention_time,
             ion_mobility,
             ccs,
-            ms2_intensities: None
+            ms2_intensities: None,
         });
     }
 
-    if normalize_rt && !rt_values.is_empty() {
-        let mean = rt_values.iter().copied().sum::<f32>() / rt_values.len() as f32;
-        let std = (rt_values
-            .iter()
-            .map(|v| (v - mean).powi(2))
-            .sum::<f32>()
-            / rt_values.len() as f32)
-            .sqrt();
-
-        for peptide in &mut peptides {
-            if let Some(rt) = peptide.retention_time.as_mut() {
-                *rt = (*rt - mean) / std;
+    match RTNormalization::from_str(normalize_rt) {
+        RTNormalization::ZScore(_, _) if !rt_values.is_empty() => {
+            let mean = rt_values.iter().copied().sum::<f32>() / rt_values.len() as f32;
+            let std = (rt_values.iter().map(|v| (v - mean).powi(2)).sum::<f32>() / rt_values.len() as f32).sqrt();
+            for peptide in &mut peptides {
+                if let Some(rt) = peptide.retention_time.as_mut() {
+                    *rt = (*rt - mean) / std;
+                }
             }
+            Ok((peptides, RTNormalization::ZScore(mean, std)))
         }
-
-        Ok((peptides, Some((mean, std))))
-    } else {
-        Ok((peptides, None))
+        RTNormalization::MinMax(_, _) if !rt_values.is_empty() => {
+            let min = *rt_values.iter().min_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
+            let max = *rt_values.iter().max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
+            let range = max - min;
+            for peptide in &mut peptides {
+                if let Some(rt) = peptide.retention_time.as_mut() {
+                    *rt = (*rt - min) / range;
+                }
+            }
+            Ok((peptides, RTNormalization::MinMax(min, max)))
+        }
+        _ => Ok((peptides, RTNormalization::None))
     }
 }
 
diff --git a/crates/redeem-cli/src/properties/train/trainer.rs b/crates/redeem-cli/src/properties/train/trainer.rs
index cbd523c..bee5988 100644
--- a/crates/redeem-cli/src/properties/train/trainer.rs
+++ b/crates/redeem-cli/src/properties/train/trainer.rs
@@ -6,7 +6,7 @@ use redeem_properties::models::{
     ccs_cnn_lstm_model::CCSCNNLSTMModel, ccs_cnn_tf_model::CCSCNNTFModel,
     rt_cnn_lstm_model::RTCNNLSTMModel, rt_cnn_transformer_model::RTCNNTFModel,
 };
-use redeem_properties::utils::data_handling::PeptideData;
+use redeem_properties::utils::data_handling::{PeptideData, RTNormalization};
 use redeem_properties::utils::peptdeep_utils::load_modifications;
 use redeem_properties::utils::utils::get_device;
 use report_builder::{
@@ -29,7 +29,7 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
         &config.train_data,
         Some(config.nce),
         Some(config.instrument.clone()),
-        true,
+        Some(config.rt_normalization.clone().unwrap()),
     )?;
     log::info!("Loaded {} training peptides", train_peptides.len());
 
@@ -39,7 +39,7 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
             val_path,
             Some(config.nce),
             Some(config.instrument.clone()),
-            true,
+            Some(config.rt_normalization.clone().unwrap()),
         )
         .context("Failed to load validation data")?;
         (Some(peptides), Some(norm))
@@ -196,7 +196,7 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
         overview_section.add_plot(acc_plot);
 
         // Inference scatter plot
-        let val_peptides: Vec<PeptideData> = sample_peptides(&val_peptides.as_ref().unwrap(), 1000);
+        let val_peptides: Vec<PeptideData> = sample_peptides(&val_peptides.as_ref().unwrap(), 5000);
         let inference_results: Vec<PeptideData> =
             model.inference(&val_peptides, config.batch_size, modifications, norm_factor)?;
         let (true_rt, pred_rt): (Vec<f64>, Vec<f64>) = val_peptides
@@ -205,14 +205,18 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
             .filter_map(|(true_pep, pred_pep)| {
                 match (true_pep.retention_time, pred_pep.retention_time) {
                     (Some(t), Some(p)) => {
-                        let t_denorm = t as f64 * norm_factor.unwrap().1 as f64
-                            + norm_factor.unwrap().0 as f64;
+                        let t_denorm = match norm_factor {
+                            RTNormalization::ZScore(mean, std) => t as f64 * std as f64 + mean as f64,
+                            RTNormalization::MinMax(min, range) => t as f64 * range as f64 + min as f64,
+                            RTNormalization::None => t as f64,
+                        };
                         Some((t_denorm, p as f64))
                     }
                     _ => None,
                 }
             })
             .unzip();
+        
 
         let scatter_plot = plot_scatter(
             &vec![true_rt.clone()],
diff --git a/crates/redeem-properties/src/utils/data_handling.rs b/crates/redeem-properties/src/utils/data_handling.rs
index 6b09c27..2722122 100644
--- a/crates/redeem-properties/src/utils/data_handling.rs
+++ b/crates/redeem-properties/src/utils/data_handling.rs
@@ -1,4 +1,23 @@
 
+
+/// Type of RT normalization used
+#[derive(Debug, Clone, Copy)]
+pub enum RTNormalization {
+    ZScore(f32, f32),     // mean, std
+    MinMax(f32, f32),     // min, max
+    None,
+}
+
+impl RTNormalization {
+    pub fn from_str(norm: Option<String>) -> Self {
+        match norm.as_deref() {
+            Some("z_score") => RTNormalization::ZScore(0.0, 0.0),
+            Some("min_max") => RTNormalization::MinMax(0.0, 0.0),
+            _ => RTNormalization::None,
+        }
+    }
+}
+
 #[derive(Clone)]
 pub struct PeptideData {
     pub sequence: String,

From 92c7134de21b9a545777c6a8e6d47883913a2741 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Mon, 12 May 2025 22:14:34 -0400
Subject: [PATCH 39/75] refactor: Update config loading logic in redeem-cli
 crate

---
 .../src/properties/inference/input.rs         | 40 +++++++++---
 .../redeem-cli/src/properties/train/input.rs  | 64 +++++++++++++------
 2 files changed, 77 insertions(+), 27 deletions(-)

diff --git a/crates/redeem-cli/src/properties/inference/input.rs b/crates/redeem-cli/src/properties/inference/input.rs
index 0e6119e..fcdacd5 100644
--- a/crates/redeem-cli/src/properties/inference/input.rs
+++ b/crates/redeem-cli/src/properties/inference/input.rs
@@ -38,27 +38,51 @@ impl PropertyInferenceConfig {
         let config_json = fs::read_to_string(config_path)
             .with_context(|| format!("Failed to read config file: {:?}", config_path))?;
 
-        let mut config: PropertyInferenceConfig = serde_json::from_str(&config_json)
-            .unwrap_or_else(|_| PropertyInferenceConfig::default());
+        let partial: serde_json::Value = serde_json::from_str(&config_json)?;
+        let mut config = PropertyInferenceConfig::default();
+
+        macro_rules! load_or_default {
+            ($field:ident) => {
+                if let Some(val) = partial.get(stringify!($field)) {
+                    if let Ok(parsed) = serde_json::from_value(val.clone()) {
+                        config.$field = parsed;
+                    } else {
+                        log::warn!(
+                            "Config Invalid value for '{}', using default: {:?}",
+                            stringify!($field), config.$field
+                        );
+                    }
+                } else {
+                    log::warn!(
+                        "Config Missing field '{}', using default: {:?}",
+                        stringify!($field), config.$field
+                    );
+                }
+            };
+        }
+
+        load_or_default!(model_path);
+        load_or_default!(inference_data);
+        load_or_default!(output_file);
+        load_or_default!(model_arch);
+        load_or_default!(device);
+        load_or_default!(batch_size);
+        load_or_default!(instrument);
+        load_or_default!(nce);
 
         // Apply CLI overrides
         if let Some(model_path) = matches.get_one::<String>("model_path") {
             config.model_path = model_path.clone();
-        } else {
-            config.model_path = config.model_path.clone();
         }
-
         if let Some(inference_data) = matches.get_one::<String>("inference_data") {
             validate_tsv_or_csv_file(inference_data)?;
-            config.inference_data = inference_data.clone().to_string();
+            config.inference_data = inference_data.clone();
         } else {
             validate_tsv_or_csv_file(&config.inference_data)?;
         }
-
         if let Some(output_file) = matches.get_one::<String>("output_file") {
             config.output_file = output_file.clone();
         }
-
         if let Some(model_arch) = matches.get_one::<String>("model_arch") {
             config.model_arch = model_arch.clone();
         }
diff --git a/crates/redeem-cli/src/properties/train/input.rs b/crates/redeem-cli/src/properties/train/input.rs
index 58d3fd9..c8523b9 100644
--- a/crates/redeem-cli/src/properties/train/input.rs
+++ b/crates/redeem-cli/src/properties/train/input.rs
@@ -6,12 +6,14 @@ use anyhow::{Context, Result};
 
 use crate::properties::util::validate_tsv_or_csv_file;
 
+
 #[derive(Debug, Deserialize, Serialize, Clone)]
 pub struct PropertyTrainConfig {
     pub version: String,
     pub train_data: String,
     pub validation_data: Option<String>,
     pub output_file: String,
+    pub rt_normalization: Option<String>,
     pub model_arch: String,
     pub device: String,
     pub batch_size: usize,
@@ -31,6 +33,7 @@ impl Default for PropertyTrainConfig {
             train_data: String::new(),
             validation_data: None,
             output_file: String::from("rt_cnn_tf.safetensors"),
+            rt_normalization: Some(String::from("min_max")),
             model_arch: String::from("rt_cnn_tf"),
             device: String::from("cpu"),
             batch_size: 64,
@@ -46,42 +49,65 @@ impl Default for PropertyTrainConfig {
 }
 
 impl PropertyTrainConfig {
-    pub fn from_arguments(config_path: &PathBuf, matches: &ArgMatches) -> Result<Self> {
+    pub fn from_arguments(config_path: &PathBuf, matches: &ArgMatches) -> anyhow::Result<Self> {
         let config_json = fs::read_to_string(config_path)
-            .with_context(|| format!("Failed to read config file: {:?}", config_path))?;
+            .map_err(|e| anyhow::anyhow!("Failed to read config file: {}", e))?;
+
+        let partial: serde_json::Value = serde_json::from_str(&config_json)?;
+        let mut config = PropertyTrainConfig::default();
+
+        macro_rules! load_or_default {
+            ($field:ident) => {
+                if let Some(val) = partial.get(stringify!($field)) {
+                    if let Ok(parsed) = serde_json::from_value(val.clone()) {
+                        config.$field = parsed;
+                    } else {
+                        log::warn!(
+                            "Config Invalid value for '{}', using default: {:?}",
+                            stringify!($field), config.$field
+                        );
+                    }
+                } else {
+                    log::warn!(
+                        "Config Missing field '{}', using default: {:?}",
+                        stringify!($field), config.$field
+                    );
+                }
+            };
+        }
 
-        let mut config: PropertyTrainConfig = serde_json::from_str(&config_json)
-            .unwrap_or_else(|_| PropertyTrainConfig::default());
+        load_or_default!(train_data);
+        load_or_default!(validation_data);
+        load_or_default!(output_file);
+        load_or_default!(rt_normalization);
+        load_or_default!(model_arch);
+        load_or_default!(device);
+        load_or_default!(batch_size);
+        load_or_default!(validation_batch_size);
+        load_or_default!(learning_rate);
+        load_or_default!(epochs);
+        load_or_default!(early_stopping_patience);
+        load_or_default!(checkpoint_file);
+        load_or_default!(instrument);
+        load_or_default!(nce);
 
         // Apply CLI overrides
         if let Some(train_data) = matches.get_one::<String>("train_data") {
-            validate_tsv_or_csv_file(train_data)?;
-            config.train_data = train_data.clone().to_string();
-        } else {
-            validate_tsv_or_csv_file(&config.train_data)?;
+            config.train_data = train_data.clone();
         }
-
         if let Some(validation_data) = matches.get_one::<String>("validation_data") {
-            validate_tsv_or_csv_file(validation_data)?;
-            config.validation_data = Some(validation_data.clone().to_string());
-        } else if let Some(val_data) = &config.validation_data {
-            validate_tsv_or_csv_file(val_data)?;
+            config.validation_data = Some(validation_data.clone());
         }
-
         if let Some(output_file) = matches.get_one::<String>("output_file") {
             config.output_file = output_file.clone();
         }
-
         if let Some(model_arch) = matches.get_one::<String>("model_arch") {
             config.model_arch = model_arch.clone();
         }
-
         if let Some(checkpoint_file) = matches.get_one::<String>("checkpoint_file") {
             config.checkpoint_file = Some(checkpoint_file.clone());
         }
 
         Ok(config)
     }
-}
-
-
+}
\ No newline at end of file

From f95f0874dc23f6ad59c81d6508371695aaace0c6 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Mon, 12 May 2025 22:14:50 -0400
Subject: [PATCH 40/75] refactor: Update RT-CNN-LSTM and RT-CNN-Transformer
 models in redeem-properties crate

---
 .../src/models/rt_cnn_lstm_model.rs           | 12 +++----
 .../src/models/rt_cnn_transformer_model.rs    | 36 +++++--------------
 .../redeem-properties/src/models/rt_model.rs  |  4 +--
 3 files changed, 14 insertions(+), 38 deletions(-)

diff --git a/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs b/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
index 1cb99c7..19e3f21 100644
--- a/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
+++ b/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
@@ -124,17 +124,13 @@ impl ModelInterface for RTCNNLSTMModel {
         let (mean, min, max) = get_tensor_stats(&aa_indices_out)?;
         log::debug!("[RTCNNLSTMModel] aa_indices_out stats - min: {min}, max: {max}, mean: {mean}");
         let mod_x_out = xs.i((.., .., 1..1 + MOD_FEATURE_SIZE))?;
-        let (mean, min, max) = get_tensor_stats(&mod_x_out)?;
-        log::debug!("[RTCNNLSTMModel] mod_x_out stats - min: {min}, max: {max}, mean: {mean}");
+        
         let x = self.rt_encoder.forward(&aa_indices_out, &mod_x_out)?;
-        let (mean, min, max) = get_tensor_stats(&x)?;
-        log::debug!("[RTCNNLSTMModel] x stats - min: {min}, max: {max}, mean: {mean}");
+        
         let x = self.dropout.forward(&x, self.is_training)?;
-        let (mean, min, max) = get_tensor_stats(&x)?;
-        log::debug!("[RTCNNLSTMModel] x after dropout stats - min: {min}, max: {max}, mean: {mean}");
+        
         let x = self.rt_decoder.forward(&x)?;
-        let (mean, min, max) = get_tensor_stats(&x)?;
-        log::debug!("[RTCNNLSTMModel] x after decoder stats - min: {min}, max: {max}, mean: {mean}");
+        
         let result = x.squeeze(1)?;
 
         Ok(result)
diff --git a/crates/redeem-properties/src/models/rt_cnn_transformer_model.rs b/crates/redeem-properties/src/models/rt_cnn_transformer_model.rs
index 09f1b07..0231fe7 100644
--- a/crates/redeem-properties/src/models/rt_cnn_transformer_model.rs
+++ b/crates/redeem-properties/src/models/rt_cnn_transformer_model.rs
@@ -54,7 +54,7 @@ impl ModelInterface for RTCNNTFModel {
         let rt_encoder = Encoder26aaModCnnTransformerAttnSum::new(
             &varbuilder.pp("rt_encoder"),
             8,     // mod_hidden_dim
-            140,   // hidden_dim
+            128,   // hidden_dim
             256,   // ff_dim
             4,     // num_heads
             2,     // num_layers
@@ -64,7 +64,7 @@ impl ModelInterface for RTCNNTFModel {
         )?;
 
         log::trace!("[RTCNNTFModel] Initializing rt_decoder");
-        let rt_decoder = DecoderLinear::new(140, 1, &varbuilder.pp("rt_decoder"))?;
+        let rt_decoder = DecoderLinear::new(128, 1, &varbuilder.pp("rt_decoder"))?;
         let constants = ModelConstants::default();
         let mod_to_feature = load_mod_to_feature(&constants)?;
 
@@ -107,7 +107,7 @@ impl ModelInterface for RTCNNTFModel {
         let rt_encoder = Encoder26aaModCnnTransformerAttnSum::from_varstore(
             &var_store,
             8,      // mod_hidden_dim
-            140,    // hidden_dim
+            128,    // hidden_dim
             256,    // ff_dim
             4,      // num_heads
             2,      // num_layers
@@ -132,7 +132,7 @@ impl ModelInterface for RTCNNTFModel {
 
         let rt_decoder = DecoderLinear::from_varstore(
             &var_store,
-            140,
+            128,
             1,
             vec!["rt_decoder.nn.0.weight", "rt_decoder.nn.1.weight", "rt_decoder.nn.2.weight"],
             vec!["rt_decoder.nn.0.bias", "rt_decoder.nn.2.bias"]
@@ -156,33 +156,13 @@ impl ModelInterface for RTCNNTFModel {
         let (mean, min, max) = get_tensor_stats(&aa_indices_out)?;
         log::debug!("[RTCNNTFModel] aa_indices_out stats - min: {min}, max: {max}, mean: {mean}");
         let mod_x_out = xs.i((.., .., 1..1 + MOD_FEATURE_SIZE))?;    
-         
-        if mod_x_out.shape().elem_count() == 0  {
-            log::error!("[RTCNNTFModel] mod_x_out is empty! shape: {:?}", mod_x_out.shape());
-        } else {
-            match get_tensor_stats(&mod_x_out) {
-                Ok((mean, min, max)) => {
-                    log::debug!("[RTCNNTFModel] mod_x_out stats - min: {min}, max: {max}, mean: {mean}");
-                }
-                Err(e) => {
-                    log::error!("[RTCNNTFModel] Failed to compute stats for mod_x_out: {:?}", e);
-                }
-            }
-        }        
-        
-        log::trace!("[RTCNNTFModel] aa_indices_out: {:?}, mod_x_out: {:?}", aa_indices_out, mod_x_out);
+
         let x = self.rt_encoder.forward(&aa_indices_out, &mod_x_out)?;
-        log::trace!("[RTCNNTFModel] x.shape after rt_encoder: {:?}", x.shape());
-        let (mean, min, max) = get_tensor_stats(&x)?;
-        log::debug!("[RTCNNTFModel] rt_encoder output stats - min: {min}, max: {max}, mean: {mean}");
+        
         let x = self.dropout.forward(&x, self.is_training)?;
-        log::trace!("[RTCNNTFModel] x.shape after dropout: {:?}", x.shape());
-        let (mean, min, max) = get_tensor_stats(&x)?;
-        log::debug!("[RTCNNTFModel] dropout output stats - min: {min}, max: {max}, mean: {mean}");
+        
         let x = self.rt_decoder.forward(&x)?;
-        log::trace!("[RTCNNTFModel] x.shape after rt_decoder: {:?}", x.shape());
-        let (mean, min, max) = get_tensor_stats(&x)?;
-        log::debug!("[RTCNNTFModel] rt_decoder output stats - min: {min}, max: {max}, mean: {mean}");
+
         Ok(x.squeeze(1)?)
     }
 
diff --git a/crates/redeem-properties/src/models/rt_model.rs b/crates/redeem-properties/src/models/rt_model.rs
index 3b9672c..adab38c 100644
--- a/crates/redeem-properties/src/models/rt_model.rs
+++ b/crates/redeem-properties/src/models/rt_model.rs
@@ -8,7 +8,7 @@ use candle_nn::VarMap;
 use crate::models::model_interface::{ModelInterface,PredictionResult};
 use crate::models::rt_cnn_lstm_model::RTCNNLSTMModel;
 use crate::models::rt_cnn_transformer_model::RTCNNTFModel;
-use crate::utils::data_handling::PeptideData;
+use crate::utils::data_handling::{PeptideData, RTNormalization};
 use crate::utils::stats::TrainingStepMetrics;
 use std::collections::HashMap;
 use crate::utils::peptdeep_utils::ModificationMap;
@@ -59,7 +59,7 @@ impl RTModelWrapper {
         self.model.fine_tune(training_data, modifications, batch_size, learning_rate, epochs)
     }
 
-    pub fn inference(&mut self, inference_data: &Vec<PeptideData>, batch_size: usize, modifications: HashMap<(String, Option<char>), ModificationMap>, rt_norm_params: Option<(f32, f32)>,) -> Result<Vec<PeptideData>> {
+    pub fn inference(&mut self, inference_data: &Vec<PeptideData>, batch_size: usize, modifications: HashMap<(String, Option<char>), ModificationMap>, rt_norm_params: RTNormalization,) -> Result<Vec<PeptideData>> {
         self.model.inference(inference_data, batch_size, modifications, rt_norm_params)
     }
 

From 4dd8900bfae10a6865387acab4b9b36dee42fa89 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Mon, 12 May 2025 22:15:04 -0400
Subject: [PATCH 41/75] refactor: Update hidden_dim and decoder size in
 CCSCNNTFModel

---
 .../src/models/ccs_cnn_tf_model.rs            | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/crates/redeem-properties/src/models/ccs_cnn_tf_model.rs b/crates/redeem-properties/src/models/ccs_cnn_tf_model.rs
index e821546..ab41d5c 100644
--- a/crates/redeem-properties/src/models/ccs_cnn_tf_model.rs
+++ b/crates/redeem-properties/src/models/ccs_cnn_tf_model.rs
@@ -57,7 +57,7 @@ impl ModelInterface for CCSCNNTFModel {
         let ccs_encoder = Encoder26aaModChargeCnnTransformerAttnSum::new(
             &varbuilder.pp("ccs_encoder"),
             8,     // mod_hidden_dim
-            140,   // hidden_dim
+            128,   // hidden_dim
             256,   // ff_dim
             4,     // num_heads
             2,     // num_layers
@@ -67,7 +67,7 @@ impl ModelInterface for CCSCNNTFModel {
         )?;
 
         log::trace!("[CCSCNNTFModel] Initializing ccs_decoder");
-        let ccs_decoder = DecoderLinear::new(141, 1, &varbuilder.pp("ccs_decoder"))?;
+        let ccs_decoder = DecoderLinear::new(129, 1, &varbuilder.pp("ccs_decoder"))?;
         let constants = ModelConstants::default();
         let mod_to_feature = load_mod_to_feature(&constants)?;
 
@@ -110,7 +110,7 @@ impl ModelInterface for CCSCNNTFModel {
         let ccs_encoder = Encoder26aaModChargeCnnTransformerAttnSum::from_varstore(
             &var_store,
             8,      // mod_hidden_dim
-            140,    // hidden_dim
+            128,    // hidden_dim
             256,    // ff_dim
             4,      // num_heads
             2,      // num_layers
@@ -135,7 +135,7 @@ impl ModelInterface for CCSCNNTFModel {
 
         let ccs_decoder = DecoderLinear::from_varstore(
             &var_store,
-            141,
+            129,
             1,
             vec!["ccs_decoder.nn.0.weight", "ccs_decoder.nn.1.weight", "ccs_decoder.nn.2.weight"],
             vec!["ccs_decoder.nn.0.bias", "ccs_decoder.nn.2.bias"]
@@ -162,23 +162,23 @@ impl ModelInterface for CCSCNNTFModel {
         let start_charge = start_mod_x + MOD_FEATURE_SIZE;
 
         let aa_indices_out = xs.i((.., .., 0))?;
+        let (mean, min, max) = get_tensor_stats(&aa_indices_out)?;
+        log::debug!("[CCSCNNTFModel] aa_indices_out stats - min: {min}, max: {max}, mean: {mean}");
+        
         let mod_x_out = xs.i((.., .., start_mod_x..start_mod_x + MOD_FEATURE_SIZE))?;
         let charge_out = xs.i((.., 0..1, start_charge..start_charge + 1))?;
         let charge_out = charge_out.squeeze(2)?;         
         
         let x = self.ccs_encoder.forward(&aa_indices_out, &mod_x_out, &charge_out)?;
-        let (mean, min, max) = get_tensor_stats(&x)?;
-        log::debug!("[CCSCNNTFModel] ccs_encoder output stats - min: {min}, max: {max}, mean: {mean}");
+       
 
         let x = self.dropout.forward(&x, self.is_training)?;
-        log::trace!("[CCSCNNTFModel] x.shape after dropout: {:?}", x.shape());
-        let (mean, min, max) = get_tensor_stats(&x)?;
-        log::debug!("[CCSCNNTFModel] dropout output stats - min: {min}, max: {max}, mean: {mean}");
+        
+
+        let x = Tensor::cat(&[x, charge_out], 1)?;
 
         let x = self.ccs_decoder.forward(&x)?;
-        log::trace!("[CCSCNNTFModel] x.shape after ccs_decoder: {:?}", x.shape());
-        let (mean, min, max) = get_tensor_stats(&x)?;
-        log::debug!("[CCSCNNTFModel] ccs_decoder output stats - min: {min}, max: {max}, mean: {mean}");
+        
         Ok(x.squeeze(1)?)
     }
 

From 47ab97611100c75b517bd5161baaa636982cf29c Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Mon, 12 May 2025 22:15:26 -0400
Subject: [PATCH 42/75] refactor: clean up trace comments

---
 .../src/building_blocks/bilstm.rs             |  11 +-
 .../src/building_blocks/building_blocks.rs    | 213 ++++++------------
 .../src/building_blocks/featurize.rs          |  46 +++-
 .../src/building_blocks/nn.rs                 |  56 +----
 .../src/models/model_interface.rs             |  63 ++++--
 5 files changed, 148 insertions(+), 241 deletions(-)

diff --git a/crates/redeem-properties/src/building_blocks/bilstm.rs b/crates/redeem-properties/src/building_blocks/bilstm.rs
index 68d5204..43d4e1a 100644
--- a/crates/redeem-properties/src/building_blocks/bilstm.rs
+++ b/crates/redeem-properties/src/building_blocks/bilstm.rs
@@ -59,7 +59,6 @@ impl BidirectionalLSTM {
         let c0_forward = c0.i(0)?;
         let state_fw = rnn::LSTMState { h: h0_forward, c: c0_forward };
     
-        let start_time = std::time::Instant::now();
         let out_fw_states = lstm_forward.seq_init(input, &state_fw)?;
         let out_fw = Tensor::stack(
             &out_fw_states.iter().map(|s| s.h()).collect::<Vec<_>>(),
@@ -67,10 +66,8 @@ impl BidirectionalLSTM {
         )?;
         let last_fw_h = out_fw_states.last().unwrap().h().clone();
         let last_fw_c = out_fw_states.last().unwrap().c().clone();
-        log::trace!("BidirectionLSTM::apply_bidirectional_layer - Forward LSTM time: {:?}", start_time.elapsed());
     
         // Reverse sequence
-        let start_time = std::time::Instant::now();
         let input_reversed = Tensor::cat(
             &(0..seq_len)
                 .rev()
@@ -78,14 +75,12 @@ impl BidirectionalLSTM {
                 .collect::<Result<Vec<_>>>()?,
             1,
         )?;
-        log::trace!("BidirectionLSTM::apply_bidirectional_layer - Reverse sequence time: {:?}", start_time.elapsed());
             
         // Initial states for backward
         let h0_backward = h0.i(1)?;
         let c0_backward = c0.i(1)?;
         let state_bw = rnn::LSTMState { h: h0_backward, c: c0_backward };
     
-        let start_time = std::time::Instant::now();
         let out_bw_states = lstm_backward.seq_init(&input_reversed, &state_bw)?;
         let out_bw = Tensor::stack(
             &out_bw_states.iter().map(|s| s.h()).collect::<Vec<_>>(),
@@ -93,7 +88,6 @@ impl BidirectionalLSTM {
         )?;
         let last_bw_h = out_bw_states.last().unwrap().h().clone();
         let last_bw_c = out_bw_states.last().unwrap().c().clone();
-        log::trace!("BidirectionLSTM::apply_bidirectional_layer - Backward LSTM time: {:?}", start_time.elapsed());
     
         // Combine hidden and cell states
         let hn = Tensor::stack(&[last_fw_h.clone(), last_bw_h.clone()], 0)?;
@@ -116,12 +110,9 @@ impl BidirectionalLSTM {
         let h0_2 = h0.narrow(0, 2, 2)?;
         let c0_2 = c0.narrow(0, 2, 2)?;
 
-        let start_time = std::time::Instant::now();
         let (out1, (hn1, cn1)) = self.apply_bidirectional_layer(xs, &self.forward_lstm1, &self.backward_lstm1, &h0_1, &c0_1)?;
-        log::trace!("BidirectionLSTM::forward_with_state - Layer 1 time: {:?}", start_time.elapsed());
-        let start_time = std::time::Instant::now();
+
         let (out2, (hn2, cn2)) = self.apply_bidirectional_layer(&out1, &self.forward_lstm2, &self.backward_lstm2, &h0_2, &c0_2)?;
-        log::trace!("BidirectionLSTM::forward_with_state - Layer 2 time: {:?}", start_time.elapsed());
 
         let hn = Tensor::cat(&[hn1, hn2], 0)?;
         let cn = Tensor::cat(&[cn1, cn2], 0)?;
diff --git a/crates/redeem-properties/src/building_blocks/building_blocks.rs b/crates/redeem-properties/src/building_blocks/building_blocks.rs
index c1e164d..132d067 100644
--- a/crates/redeem-properties/src/building_blocks/building_blocks.rs
+++ b/crates/redeem-properties/src/building_blocks/building_blocks.rs
@@ -1,6 +1,6 @@
 use anyhow::{Context, Result as AnyHowResult};
 use candle_core::{DType, Device, Module, Result, Tensor, D};
-use candle_nn as nn;
+use candle_nn::{self as nn, linear};
 use candle_transformers as transformers;
 use serde::de;
 use core::num;
@@ -28,13 +28,10 @@ pub struct DecoderLinear {
 
 impl DecoderLinear {
     pub fn new(in_features: usize, out_features: usize, vb: &nn::VarBuilder) -> Result<Self> {
-        log::trace!("[DecoderLinear::new] Initializing linear1");
         let linear1 = nn::linear(in_features, 64, vb.pp("nn.0"))?;
-        log::trace!("[DecoderLinear::new] Initializing prelu");
         let prelu = nn::PReLU::new(Tensor::zeros(64, DType::F32, vb.device())?, false);
-        log::trace!("[DecoderLinear::new] Initializing linear2");
         let linear2 = nn::linear(64, out_features, vb.pp("nn.2"))?;
-        log::trace!("[DecoderLinear::new] Initializing sequential");
+
         let mut nn = seq();
         nn = nn.add(linear1);
         nn = nn.add(prelu);
@@ -73,16 +70,8 @@ impl DecoderLinear {
 
 impl Module for DecoderLinear {
     fn forward(&self, x: &Tensor) -> Result<Tensor> {
-        log::trace!("[DecoderLinear] input shape: {:?}", x.shape());
         match self.nn.forward(x) {
             Ok(output) => {
-                log::trace!("[DecoderLinear] output shape: {:?}", output.shape());
-                log::trace!(
-                    "[DecoderLinear] output stats - min: {:.4}, max: {:.4}, mean: {:.4}",
-                    output.min_all()?.to_vec0::<f32>()?,
-                    output.max_all()?.to_vec0::<f32>()?,
-                    output.mean_all()?.to_vec0::<f32>()?,
-                );
                 Ok(output)
             }
             Err(e) => {
@@ -120,7 +109,6 @@ impl AAEmbedding {
 
     fn from_varstore(varstore: &nn::VarBuilder, hidden_size: usize, name: &str) -> Result<Self> {
         let weight = varstore.get((AA_EMBEDDING_SIZE, hidden_size), name)?;
-        log::trace!("[AAEmbedding::from_varstore] weight shape (AA_EMBEDDING_SIZE, hidden_size): {:?}, device: {:?}", weight.shape(), weight.device());
         let embeddings = nn::Embedding::new(weight, hidden_size);
         Ok(Self { embeddings })
     }
@@ -128,13 +116,7 @@ impl AAEmbedding {
 
 impl Module for AAEmbedding {
     fn forward(&self, x: &Tensor) -> Result<Tensor> {
-        log::trace!("[AAEmbedding::forward] x shape: {:?}, device: {:?}, min: {:?}, max: {:?}",
-                    x.shape(), x.device(), x.min_all(), x.max_all());
-
         let x = x.to_dtype(DType::I64)?;
-        log::trace!("[AAEmbedding::forward] x (converted to i64) shape: {:?}, device: {:?}, min: {:?}, max: {:?}",
-                    x.shape(), x.device(), x.min_all(), x.max_all());
-
         self.embeddings.forward(&x)
     }
 }
@@ -217,7 +199,7 @@ struct ModEmbeddingFixFirstK {
 impl ModEmbeddingFixFirstK {
     fn new(mod_feature_size: usize, out_features: usize, varbuilder: &nn::VarBuilder) -> Result<Self> {
         let k = 6;
-        let nn = nn::linear(mod_feature_size - k, out_features - k, varbuilder.pp("linear"))?;
+        let nn = nn::linear_no_bias(mod_feature_size - k, out_features - k, varbuilder.pp("nn"))?;
         Ok(Self { k, nn })
     }
 
@@ -288,19 +270,10 @@ impl Input26aaModPositionalEncoding {
     }
 
     pub fn forward(&self, aa_indices: &Tensor, mod_x: &Tensor) -> Result<Tensor> {
-        log::trace!("[Input26aaModPositionalEncoding::forward] aa_indices shape: {:?}, device: {:?}, min: {:?}, max: {:?}", 
-                aa_indices.shape(), aa_indices.device(), aa_indices.min_all(),aa_indices.max_all());
-    
-        log::trace!("[Input26aaModPositionalEncoding::forward] mod_x shape: {:?}, device: {:?}", mod_x.shape(), mod_x.device());
-
         let mod_x = self.mod_nn.forward(mod_x)?;
-        log::trace!("[Input26aaModPositionalEncoding::forward] mod_x (after mod_nn) shape: {:?}, device: {:?}", mod_x.shape(), mod_x.device());
         let x = self.aa_emb.forward(aa_indices)?;
-        log::trace!("[Input26aaModPositionalEncoding::forward] x (after aa_emb) shape: {:?}, device: {:?}", x.shape(), x.device());
-
         // Concatenate x and mod_x along the last dimension
         let concatenated = Tensor::cat(&[&x, &mod_x], 2)?;
-        log::trace!("[Input26aaModPositionalEncoding::forward] concatenated shape: {:?}, device: {:?}", concatenated.shape(), concatenated.device());
         self.pos_encoder.forward(&concatenated)
     }
 }
@@ -332,34 +305,6 @@ impl MetaEmbedding {
         Ok(Self { nn })
     }
 
-    // fn one_hot(&self, indices: &Tensor, num_classes: usize) -> AnyHowResult<Tensor> {
-    //     let batch_size = indices.dim(0)?;
-
-    //     let mut one_hot_data = vec![0.0f32; batch_size * num_classes];
-
-    //     for i in 0..batch_size {
-    //         let index = indices.get(i)?.to_scalar::<i64>()?;
-    //         let class_idx = index as usize;
-
-    //         if class_idx < num_classes {
-    //             one_hot_data[i * num_classes + class_idx] = 1.0;
-    //         } else {
-    //             return Err(anyhow::anyhow!(
-    //                 "Index {} out of bounds for one-hot encoding",
-    //                 class_idx
-    //             ));
-    //         }
-    //     }
-
-    //     log::trace!("one hot encoded data of shape: {:?} on device: {:?}", (batch_size, num_classes), indices.device());
-
-    //     log::trace!("one hot encoded data: {:?}", Tensor::from_slice(&one_hot_data, (batch_size, num_classes), indices.device())
-    //     .context("Failed to create tensor from one-hot data"));
-
-    //     Tensor::from_slice(&one_hot_data, (batch_size, num_classes), indices.device())
-    //         .context("Failed to create tensor from one-hot data")
-    // }
-
     fn one_hot(&self, indices: &Tensor, num_classes: usize) -> Result<Tensor> {
         let batch_size = indices.dim(0)?;
         let device = indices.device();
@@ -385,8 +330,6 @@ impl MetaEmbedding {
         // Create a tensor from the one-hot data
         let one_hot = Tensor::from_slice(&one_hot_data, (batch_size, num_classes), device)?;
 
-        log::trace!("[MetaEmbedding::one_hot] one hot encoded data shape: {:?}, device: {:?}", one_hot.shape(), one_hot.device());
-
         Ok(one_hot)
     }
 
@@ -396,39 +339,23 @@ impl MetaEmbedding {
         nces: &Tensor,
         instrument_indices: &Tensor,
     ) -> Result<Tensor> {
-        // Log input tensors
-        log::trace!("[MetaEmbedding::forward] charges shape: {:?}, device: {:?}", charges.shape(), charges.device());
-        log::trace!("[MetaEmbedding::forward] nces shape: {:?}, device: {:?}", nces.shape(), nces.device());
-        log::trace!("[MetaEmbedding::forward] instrument_indices shape: {:?}, device: {:?}", instrument_indices.shape(), instrument_indices.device());
-        log::trace!("[MetaEmbedding::forward] charges: {:?}", charges.to_vec2::<f32>()?);
-
-        //  // Ensure instrument_indices is a 1D tensor
-        // let instrument_indices = instrument_indices.squeeze(1)?; // Remove the second dimension
-        // log::trace!("[MetaEmbedding::forward] instrument_indices (after squeeze) shape: {:?}, device: {:?}", instrument_indices.shape(), instrument_indices.device());
 
 
         // One-hot encode the instrument indices
         let inst_x = self.one_hot(&instrument_indices.to_dtype(DType::I64)?, MAX_INSTRUMENT_NUM)?;
 
-        log::trace!("[MetaEmbedding::forward] inst_x shape: {:?}, device: {:?}", inst_x.shape(), inst_x.device());
-
         // Ensure all tensors are on the same device
         let charges = &charges.to_device(inst_x.device())?;
         let nces = &nces.to_device(inst_x.device())?;
-        log::trace!("[MetaEmbedding::forward] charges (after to_device) shape: {:?}, device: {:?}", charges.shape(), charges.device());
-        log::trace!("[MetaEmbedding::forward] nces (after to_device) shape: {:?}, device: {:?}", nces.shape(), nces.device());
 
         // Concatenate the one-hot encoded instrument indices with NCEs
         let combined_input = Tensor::cat(&[&inst_x, nces], 1)?;
-        log::trace!("[MetaEmbedding::forward] combined_input shape: {:?}, device: {:?}", combined_input.shape(), combined_input.device());
 
         // Pass through the linear layer
         let meta_x = self.nn.forward(&combined_input)?;
-        log::trace!("[MetaEmbedding::forward] meta_x shape: {:?}, device: {:?}", meta_x.shape(), meta_x.device());
 
         // Concatenate the output with charges
         let meta_x = Tensor::cat(&[&meta_x, charges], 1)?;
-        log::trace!("[MetaEmbedding::forward] final meta_x shape: {:?}, device: {:?}", meta_x.shape(), meta_x.device());
 
         Ok(meta_x)
     }
@@ -685,11 +612,20 @@ impl Module for SeqCNN {
     fn forward(&self, x: &Tensor) -> Result<Tensor> {
         let x = x.transpose(1, 2)?;
 
-        let short = self.cnn_short.forward(&x)?;
+        let short = match self.cnn_short.forward(&x) {
+            Ok(output) => output,
+            Err(e) => {
+                log::error!("[SeqCNN::forward] cnn_short.forward failed: {:?}", e);
+                return Err(e);
+            }
+        };
+
         let medium = self.cnn_medium.forward(&x)?;
+
         let long = self.cnn_long.forward(&x)?;
 
         let output = Tensor::cat(&[x, short, medium, long], 1)?;
+
         Ok(output.transpose(1, 2)?)
     }
 }
@@ -801,16 +737,7 @@ impl SeqTransformer {
 
 impl Module for SeqTransformer {
     fn forward(&self, x: &Tensor) -> Result<Tensor> {
-        // Add check to ensure input feature dim matches expected model dim
-        let (_b, _t, d) = x.dims3()?;
-        let model_dim = self.encoder.model_dim;
-        if d != model_dim {
-            return Err(candle_core::Error::Msg(format!(
-                "SeqTransformer received input with dim {} but expected {}",
-                d, model_dim
-            )));
-        }
-        self.encoder.forward_with_mask(x, None, self.training)
+        Ok(self.encoder.forward_with_mask(x, None, self.training)?)
     }
 }
 
@@ -823,10 +750,10 @@ struct SeqAttentionSum {
 
 impl SeqAttentionSum {
     pub fn new(hidden_dim: usize, varbuilder: &nn::VarBuilder) -> Result<Self> {
-        let attention = nn::Linear::new(
-            varbuilder.get((1, hidden_dim), "attention.weight")?,
-            None,
-        );
+        let attention = nn::linear_no_bias(
+            hidden_dim,
+            1,
+            varbuilder.pp("attn.0"))?;
         Ok(Self { attention })
     }
 
@@ -838,7 +765,14 @@ impl SeqAttentionSum {
 
 impl Module for SeqAttentionSum {
     fn forward(&self, x: &Tensor) -> Result<Tensor> {
-        let attention_weights = self.attention.forward(x)?;
+        let attention_weights = match self.attention.forward(x) {
+            Ok(weights) => weights,
+            Err(e) => {
+                log::error!("Attention forward pass failed: {}", e);
+                return Err(e);
+            }
+        };
+        
 
         // Apply softmax to normalize weights
         // TODO: This is done in the model itself in the PyTorch implementation
@@ -911,34 +845,23 @@ impl Encoder26aaModCnnLstmAttnSum {
     }
 
     pub fn forward(&self, aa_indices: &Tensor, mod_x: &Tensor) -> Result<Tensor> {
-
-        let start_time = Instant::now();
         let mod_x = self.mod_nn.forward(mod_x)?;
-        log::trace!("Encoder26aaModCnnLstmAttnSum::forward - mod_x forward time: {:.3?}", start_time.elapsed());
-        let start_time = Instant::now();
+
         let additional_tensors: Vec<&Tensor> = vec![&mod_x];
-        log::trace!("Encoder26aaModCnnLstmAttnSum::forward - additional_tensors forward time: {:.3?}", start_time.elapsed());
-        let start_time = Instant::now();
+
         let x = aa_one_hot(&aa_indices, &additional_tensors)
             .map_err(|e| candle_core::Error::Msg(e.to_string()))?;
-        log::trace!("Encoder26aaModCnnLstmAttnSum::forward - aa_one_hot forward time: {:.3?}", start_time.elapsed());
+
         let (mean, min, max) = get_tensor_stats(&x)?;
-        log::debug!("[Encoder26aaModCnnLstmAttnSum] one-hot output stats - min: {min}, max: {max}, mean: {mean}");
+        log::trace!("[Encoder26aaModCnnLstmAttnSum] one-hot output stats - min: {min}, max: {max}, mean: {mean}");
+
         let start_time = Instant::now();
         let x = self.input_cnn.forward(&x)?;
-        log::trace!("Encoder26aaModCnnLstmAttnSum::forward - input_cnn forward time: {:.3?}", start_time.elapsed());
-        let (mean, min, max) = get_tensor_stats(&x)?;
-        log::debug!("[Encoder26aaModCnnLstmAttnSum] CNN output stats - min: {min}, max: {max}, mean: {mean}");
-        let start_time = Instant::now();
+
         let x = self.input_lstm.forward(&x)?;
-        log::trace!("Encoder26aaModCnnLstmAttnSum::forward - input_lstm forward time: {:.3?}", start_time.elapsed());
-        let (mean, min, max) = get_tensor_stats(&x)?;
-        log::debug!("[Encoder26aaModCnnLstmAttnSum] LSTM output stats - min: {min}, max: {max}, mean: {mean}");
-        let start_time = Instant::now();
+
         let x = self.attn_sum.forward(&x)?;
-        log::trace!("Encoder26aaModCnnLstmAttnSum::forward - attn_sum forward time: {:.3?}", start_time.elapsed());
-        let (mean, min, max) = get_tensor_stats(&x)?;
-        log::debug!("[Encoder26aaModCnnLstmAttnSum] AttentionSum output stats - min: {min}, max: {max}, mean: {mean}");
+
         Ok(x)
     }
 }
@@ -1006,15 +929,15 @@ impl Encoder26aaModChargeCnnLstmAttnSum {
         let x = aa_one_hot(&aa_indices, &additional_tensors)
             .map_err(|e| candle_core::Error::Msg(e.to_string()))?;
 
-        let start_time = Instant::now();
+        let (mean, min, max) = get_tensor_stats(&x)?;
+        log::trace!("[Encoder26aaModChargeCnnLstmAttnSum] one-hot output stats - min: {min}, max: {max}, mean: {mean}");
+
         let x = self.input_cnn.forward(&x)?;
-        log::trace!("Encoder26aaModChargeCnnLstmAttnSum::forward - input_cnn forward time: {:.3?}", start_time.elapsed());
-        let start_time = Instant::now();
+
         let x = self.input_lstm.forward(&x)?;
-        log::trace!("Encoder26aaModChargeCnnLstmAttnSum::forward - input_lstm forward time: {:.3?}", start_time.elapsed());
-        let start_time = Instant::now();
+
         let x = self.attn_sum.forward(&x)?;
-        log::trace!("Encoder26aaModChargeCnnLstmAttnSum::forward - attn_sum forward time: {:.3?}", start_time.elapsed());
+
         Ok(x)
     }
 }
@@ -1025,6 +948,7 @@ impl Encoder26aaModChargeCnnLstmAttnSum {
 pub struct Encoder26aaModCnnTransformerAttnSum {
     mod_nn: ModEmbeddingFixFirstK,
     input_cnn: SeqCNN,
+    proj_cnn_to_transformer: candle_nn::Linear,
     input_transformer: SeqTransformer,
     attn_sum: SeqAttentionSum,
 }
@@ -1060,6 +984,10 @@ impl Encoder26aaModCnnTransformerAttnSum {
                 names_input_cnn_weight,
                 names_input_cnn_bias,
             )?,
+            proj_cnn_to_transformer: candle_nn::Linear::new(
+                varstore.get((input_dim * 4, hidden_dim), "proj_cnn_to_transformer.weight")?,
+                Some(varstore.get(hidden_dim, "proj_cnn_to_transformer.bias")?),
+            ),
             input_transformer: SeqTransformer::from_varstore(
                 varstore.pp(transformer_pp).clone(),
                 input_dim * 4,
@@ -1095,6 +1023,7 @@ impl Encoder26aaModCnnTransformerAttnSum {
         Ok(Self {
             mod_nn: ModEmbeddingFixFirstK::new(MOD_FEATURE_SIZE, mod_hidden_dim, &varbuilder.pp("mod_nn"))?,
             input_cnn: SeqCNN::new(input_dim, &varbuilder.pp("input_cnn"))?,
+            proj_cnn_to_transformer: candle_nn::linear_no_bias(input_dim*4, hidden_dim, varbuilder.pp("proj_cnn_to_transformer"))?,
             input_transformer: SeqTransformer::new(
                 &varbuilder.pp("input_transformer"),
                 input_dim * 4,
@@ -1111,35 +1040,28 @@ impl Encoder26aaModCnnTransformerAttnSum {
     }
 
     pub fn forward(&self, aa_indices: &Tensor, mod_x: &Tensor) -> Result<Tensor> {
-        let start_time = Instant::now();
         let mod_x = self.mod_nn.forward(mod_x)?;
-        log::trace!("[Encoder26aaModCnnTransformerAttnSum::forward] - mod_x forward time: {:.3?}", start_time.elapsed());
 
         let additional_tensors: Vec<&Tensor> = vec![&mod_x];
-        let start_time = Instant::now();
+
         let x = aa_one_hot(aa_indices, &additional_tensors)
             .map_err(|e| candle_core::Error::Msg(e.to_string()))?;
-        log::trace!("[Encoder26aaModCnnTransformerAttnSum::forward] - aa_one_hot forward time: {:.3?}", start_time.elapsed());
+
         let (mean, min, max) = get_tensor_stats(&x)?;
-        log::debug!("[Encoder26aaModCnnTransformerAttnSum] one-hot output stats - min: {min}, max: {max}, mean: {mean}");
+        log::trace!("[Encoder26aaModCnnTransformerAttnSum] one-hot output stats - min: {min}, max: {max}, mean: {mean}");
+
+        if !mean.is_finite() || !min.is_finite() || !max.is_finite() {
+            log::error!("ERROR [Encoder26aaModCnnTransformerAttnSum] aa_one_hot produced non-finite tensor stats: mean={mean}, min={min}, max={max}");
+            candle_core::bail!("ERRORNon-finite values found in peptide encoding output.");
+        }
 
-        let start_time = Instant::now();
         let x = self.input_cnn.forward(&x)?;
-        log::trace!("[Encoder26aaModCnnTransformerAttnSum::forward] - input_cnn forward time: {:.3?}", start_time.elapsed());
-        let (mean, min, max) = get_tensor_stats(&x)?;
-        log::debug!("[Encoder26aaModCnnTransformerAttnSum] input_cnn output stats - min: {min}, max: {max}, mean: {mean}");
 
-        let start_time = Instant::now();
+        let x = self.proj_cnn_to_transformer.forward(&x)?;
+
         let x = self.input_transformer.forward(&x)?;
-        log::trace!("[Encoder26aaModCnnTransformerAttnSum::forward] - input_transformer forward time: {:.3?}", start_time.elapsed());
-        let (mean, min, max) = get_tensor_stats(&x)?;
-        log::debug!("[Encoder26aaModCnnTransformerAttnSum] input_transformer output stats - min: {min}, max: {max}, mean: {mean}");
 
-        let start_time = Instant::now();
         let x = self.attn_sum.forward(&x)?;
-        log::trace!("[Encoder26aaModCnnTransformerAttnSum::forward] - attn_sum forward time: {:.3?}", start_time.elapsed());
-        let (mean, min, max) = get_tensor_stats(&x)?;
-        log::debug!("[Encoder26aaModCnnTransformerAttnSum] attn_sum output stats - min: {min}, max: {max}, mean: {mean}");
 
         Ok(x)
     }
@@ -1151,6 +1073,7 @@ impl Encoder26aaModCnnTransformerAttnSum {
 pub struct Encoder26aaModChargeCnnTransformerAttnSum {
     mod_nn: ModEmbeddingFixFirstK,
     input_cnn: SeqCNN,
+    proj_cnn_to_transformer: candle_nn::Linear,
     input_transformer: SeqTransformer,
     attn_sum: SeqAttentionSum,
 }
@@ -1172,7 +1095,7 @@ impl Encoder26aaModChargeCnnTransformerAttnSum {
         names_attn_sum: Vec<&str>,
         device: &Device,
     ) -> Result<Self> {
-        let input_dim = AA_EMBEDDING_SIZE + mod_hidden_dim;
+        let input_dim = AA_EMBEDDING_SIZE + mod_hidden_dim + 1;
         Ok(Self {
             mod_nn: ModEmbeddingFixFirstK::from_varstore(
                 &varstore,
@@ -1186,6 +1109,10 @@ impl Encoder26aaModChargeCnnTransformerAttnSum {
                 names_input_cnn_weight,
                 names_input_cnn_bias,
             )?,
+            proj_cnn_to_transformer: candle_nn::Linear::new(
+                varstore.get((input_dim * 4, hidden_dim), "proj_cnn_to_transformer.weight")?,
+                Some(varstore.get(hidden_dim, "proj_cnn_to_transformer.bias")?),
+            ),            
             input_transformer: SeqTransformer::from_varstore(
                 varstore.pp(transformer_pp).clone(),
                 input_dim * 4,
@@ -1217,10 +1144,11 @@ impl Encoder26aaModChargeCnnTransformerAttnSum {
         dropout_prob: f32,
         device: &Device,
     ) -> Result<Self> {
-        let input_dim = AA_EMBEDDING_SIZE + mod_hidden_dim;
+        let input_dim = AA_EMBEDDING_SIZE + mod_hidden_dim + 1;
         Ok(Self {
             mod_nn: ModEmbeddingFixFirstK::new(MOD_FEATURE_SIZE, mod_hidden_dim, &varbuilder.pp("mod_nn"))?,
             input_cnn: SeqCNN::new(input_dim, &varbuilder.pp("input_cnn"))?,
+            proj_cnn_to_transformer: candle_nn::linear_no_bias(input_dim*4, hidden_dim, varbuilder.pp("proj_cnn_to_transformer"))?,
             input_transformer: SeqTransformer::new(
                 &varbuilder.pp("input_transformer"),
                 input_dim * 4,
@@ -1241,24 +1169,21 @@ impl Encoder26aaModChargeCnnTransformerAttnSum {
         let charges_repeated = charges.unsqueeze(1)?.repeat(&[1, mod_x.dim(1)?, 1])?;
 
         let additional_tensors: Vec<&Tensor> = vec![&mod_x, &charges_repeated];
+
         let x = aa_one_hot(aa_indices, &additional_tensors)
             .map_err(|e| candle_core::Error::Msg(e.to_string()))?;
+
         let (mean, min, max) = get_tensor_stats(&x)?;
-        log::debug!("[Encoder26aaModChargeCnnTransformerAttnSum] one-hot output stats - min: {min}, max: {max}, mean: {mean}");
+        log::trace!("[Encoder26aaModChargeCnnTransformerAttnSum] one-hot output stats - min: {min}, max: {max}, mean: {mean}");
 
         let x = self.input_cnn.forward(&x)?;
-        let (mean, min, max) = get_tensor_stats(&x)?;
-        log::debug!("[Encoder26aaModChargeCnnTransformerAttnSum] input_cnn output stats - min: {min}, max: {max}, mean: {mean}");
+
+        let x = self.proj_cnn_to_transformer.forward(&x)?;
 
         let x = self.input_transformer.forward(&x)?;
-        let (mean, min, max) = get_tensor_stats(&x)?;
-        log::debug!("[Encoder26aaModChargeCnnTransformerAttnSum] input_transformer output stats - min: {min}, max: {max}, mean: {mean}");
 
         let x = self.attn_sum.forward(&x)?;
 
-        let (mean, min, max) = get_tensor_stats(&x)?;
-        log::debug!("[Encoder26aaModChargeCnnTransformerAttnSum] attn_sum output stats - min: {min}, max: {max}, mean: {mean}");
-
         Ok(x)
     }
 }
diff --git a/crates/redeem-properties/src/building_blocks/featurize.rs b/crates/redeem-properties/src/building_blocks/featurize.rs
index 2272612..751464e 100644
--- a/crates/redeem-properties/src/building_blocks/featurize.rs
+++ b/crates/redeem-properties/src/building_blocks/featurize.rs
@@ -39,31 +39,53 @@ pub fn aa_indices_tensor(seq: &str, device: &Device) -> Result<Tensor> {
 /// One-hot encode amino acid indices and concatenate additional tensors.
 pub fn aa_one_hot(aa_indices: &Tensor, cat_others: &[&Tensor]) -> Result<Tensor> {
     let (batch_size, seq_len) = aa_indices.shape().dims2()?;
+    log::trace!("[aa_one_hot] batch_size: {}, seq_len: {}", batch_size, seq_len);
     let num_classes = AA_EMBEDDING_SIZE;
 
-    // Extract all indices as f32s once
     let indices = aa_indices.to_vec2::<f32>()?;
-
-    // Preallocate output buffer
     let mut one_hot_data = vec![0.0f32; batch_size * seq_len * num_classes];
 
-    // Use parallel iterator for speed
     one_hot_data
         .par_chunks_mut(seq_len * num_classes)
         .zip(indices.par_iter())
-        .for_each(|(chunk, row)| {
+        .enumerate()
+        .try_for_each(|(batch_idx, (chunk, row))| -> Result<()> {
             for (seq_idx, &fidx) in row.iter().enumerate() {
+                if !fidx.is_finite() {
+                    return Err(anyhow!(
+                        "Invalid AA index: found NaN or Inf at batch {}, position {}: {}",
+                        batch_idx, seq_idx, fidx
+                    ));
+                }
+
+                if fidx < 0.0 {
+                    return Err(anyhow!(
+                        "Invalid AA index: negative value at batch {}, position {}: {}",
+                        batch_idx, seq_idx, fidx
+                    ));
+                }
+
                 let class_idx = fidx.round() as usize;
-                if class_idx < num_classes {
-                    chunk[seq_idx * num_classes + class_idx] = 1.0;
+                if class_idx >= num_classes {
+                    return Err(anyhow!(
+                        "AA index out of bounds: got {}, but num_classes = {} (batch {}, position {})",
+                        class_idx, num_classes, batch_idx, seq_idx
+                    ));
                 }
+
+                let index = seq_idx * num_classes + class_idx;
+                chunk[index] = 1.0;
             }
-        });
+            Ok(())
+        })?;
 
-    let one_hot_tensor = Tensor::from_slice(&one_hot_data, (batch_size, seq_len, num_classes), aa_indices.device())
-        .map_err(|e| anyhow!("Failed to create one-hot tensor: {}", e))?;
+    let one_hot_tensor = Tensor::from_slice(
+        &one_hot_data,
+        (batch_size, seq_len, num_classes),
+        aa_indices.device(),
+    )
+    .map_err(|e| anyhow!("Failed to create one-hot tensor: {}", e))?;
 
-    // Concatenate with additional tensors
     if cat_others.is_empty() {
         Ok(one_hot_tensor)
     } else {
@@ -75,6 +97,8 @@ pub fn aa_one_hot(aa_indices: &Tensor, cat_others: &[&Tensor]) -> Result<Tensor>
 
 
 
+
+
 /// Get the modification features for a given set of modifications and modification sites.
 /// 
 /// Based on https://github.com/MannLabs/alphapeptdeep/blob/450518a39a4cd7d03db391108ec8700b365dd436/peptdeep/model/featurize.py#L47
diff --git a/crates/redeem-properties/src/building_blocks/nn.rs b/crates/redeem-properties/src/building_blocks/nn.rs
index a8343c7..c460717 100644
--- a/crates/redeem-properties/src/building_blocks/nn.rs
+++ b/crates/redeem-properties/src/building_blocks/nn.rs
@@ -114,34 +114,17 @@ impl TransformerEncoder {
     }
 
     pub fn forward_with_mask(&self, x: &Tensor, padding_mask: Option<&Tensor>, training: bool) -> Result<Tensor> {
-        log::trace!("[TransformerEncoder] input x shape: {:?}", x.shape());
-        let (mean, min, max) = get_tensor_stats(x)?;
-        log::debug!("[TransformerEncoder] input stats: mean={}, min={}, max={}", mean, min, max);
         let (b, t, _) = x.dims3()?;
         let pe = self.pos_encoding.i((..t, ..))?
             .unsqueeze(0)?
             .broadcast_as((b, t, self.pos_encoding.dim(1)?))?;
 
-        log::trace!("[TransformerEncoder] positional encoding shape: {:?}", pe.shape());
-        let (mean, min, max) = get_tensor_stats(&pe)?;
-        log::debug!("[TransformerEncoder] positional encoding stats: mean={}, min={}, max={}", mean, min, max);
-
         let mut out = x.broadcast_add(&pe)?;
-        let (mean, min, max) = get_tensor_stats(&out)?;
-        log::debug!("[TransformerEncoder] after positional encoding stats: mean={}, min={}, max={}", mean, min, max);
 
         out = self.dropout.forward(&out, training)?;
 
-        log::trace!("[TransformerEncoder] after dropout shape: {:?}", out.shape());
-        let (mean, min, max) = get_tensor_stats(&out)?;
-        log::debug!("[TransformerEncoder] after dropout stats: mean={}, min={}, max={}", mean, min, max);
-
-        for (i, layer) in self.layers.iter().enumerate() {
-            log::trace!("[TransformerEncoder] applying layer {}", i);
+        for (_i, layer) in self.layers.iter().enumerate() {
             out = layer.forward(&out, padding_mask, training)?;
-            log::trace!("[TransformerEncoder] output shape after layer {}: {:?}", i, out.shape());
-            let (mean, min, max) = get_tensor_stats(&out)?;
-            log::debug!("[TransformerEncoder] output stats after layer {}: mean={}, min={}, max={}", i, mean, min, max);
         }
         Ok(out)
     }
@@ -182,26 +165,12 @@ impl TransformerEncoderLayer {
     }
 
     pub fn forward(&self, x: &Tensor, mask: Option<&Tensor>, training: bool) -> Result<Tensor> {
-        log::trace!("[TransformerEncoderLayer] input x shape: {:?}", x.shape());
         let attn = self.self_attn.forward(x, mask)?;
-        let (mean, min, max) = get_tensor_stats(&attn)?;
-        log::debug!("[TransformerEncoderLayer] attention stats: mean={}, min={}, max={}", mean, min, max);
         let tmp = self.dropout1.forward(&attn, training)?;
-        let (mean, min, max) = get_tensor_stats(&tmp)?;
-        log::debug!("[TransformerEncoderLayer] attention after dropout stats: mean={}, min={}, max={}", mean, min, max);
         let tmp2 = x.broadcast_add(&tmp)?;
-        let (mean, min, max) = get_tensor_stats(&tmp2)?;
-        log::debug!("[TransformerEncoderLayer] after residual connection stats: mean={}, min={}, max={}", mean, min, max);
         let x = self.norm1.forward(&tmp2)?;
-        let (mean, min, max) = get_tensor_stats(&x)?;
-        log::debug!("[TransformerEncoderLayer] after norm1 stats: mean={}, min={}, max={}", mean, min, max);
         let ff = self.ff.forward(&x)?;
-        let (mean, min, max) = get_tensor_stats(&ff)?;
-        log::debug!("[TransformerEncoderLayer] feedforward stats: mean={}, min={}, max={}", mean, min, max);
         let result = self.norm2.forward(&x.broadcast_add(&self.dropout2.forward(&ff, training)?)?)?;
-        log::trace!("[TransformerEncoderLayer] output shape: {:?}", result.shape());
-        let (mean, min, max) = get_tensor_stats(&result)?;
-        log::debug!("[TransformerEncoderLayer] output stats: mean={}, min={}, max={}", mean, min, max);
         Ok(result)
     }
 }
@@ -237,34 +206,21 @@ impl MultiHeadAttention {
 
     pub fn forward(&self, x: &Tensor, mask: Option<&Tensor>) -> Result<Tensor> {
         let (b, t, _) = x.dims3()?;
-        log::trace!("[MultiHeadAttention] Input shape: b={}, t={}, head_dim={} (num_heads={})", b, t, self.head_dim, self.num_heads);
 
         let q = self.proj_q.forward(x)?
             .reshape((b, t, self.num_heads, self.head_dim))?
             .transpose(1, 2)?
             .contiguous()?;
-        log::trace!("[MultiHeadAttention] Q shape after projection and transpose: {:?}", q.shape());
-        let (mean, min, max) = get_tensor_stats(&q)?;
-        log::debug!("[MultiHeadAttention] Q stats: mean={}, min={}, max={}", mean, min, max);
 
         let k = self.proj_k.forward(x)?
             .reshape((b, t, self.num_heads, self.head_dim))?
             .transpose(1, 2)?
             .contiguous()?;
-        log::trace!("[MultiHeadAttention] K shape after projection and transpose: {:?}", k.shape());
-        let (mean, min, max) = get_tensor_stats(&k)?;
-        log::debug!("[MultiHeadAttention] K stats: mean={}, min={}, max={}", mean, min, max);
 
         let v = self.proj_v.forward(x)?
             .reshape((b, t, self.num_heads, self.head_dim))?
             .transpose(1, 2)?
             .contiguous()?;
-        log::trace!("[MultiHeadAttention] V shape after projection and transpose: {:?}", v.shape());
-        let (mean, min, max) = get_tensor_stats(&v)?;
-        log::debug!("[MultiHeadAttention] V stats: mean={}, min={}, max={}", mean, min, max);
-
-
-        log::trace!("[MultiHeadAttention] Q/K/V shape after projection and transpose: {:?}", q.shape());
 
         let k_t = k.transpose(2, 3)?.contiguous()?;
         let mut scores = q.matmul(&k_t)? / (self.head_dim as f64).sqrt();
@@ -277,12 +233,7 @@ impl MultiHeadAttention {
             }
         };
 
-        log::trace!("[MultiHeadAttention] Attention score shape: {:?}", scores.shape());
-        let (mean, min, max) = get_tensor_stats(&scores)?;
-        log::debug!("[MultiHeadAttention] Attention score stats: mean={}, min={}, max={}", mean, min, max);
-
         if let Some(mask) = mask {
-            log::trace!("[MultiHeadAttention] Applying mask");
             let mask = mask.unsqueeze(1)?;
             let scale = Tensor::new(1e9f32, x.device())?;
             scores = match scores.broadcast_add(&mask.neg()?.mul(&scale)?) {
@@ -301,8 +252,6 @@ impl MultiHeadAttention {
                 return Err(e.into());
             }
         };
-        let (attn_mean, attn_min, attn_max) = get_tensor_stats(&attn)?;
-        log::debug!("[MultiHeadAttention] Attention stats: mean={}, min={}, max={}", attn_mean, attn_min, attn_max);
 
         let context = match attn.matmul(&v) {
             Ok(ctx) => ctx.transpose(1, 2)?.reshape((b, t, self.num_heads * self.head_dim))?,
@@ -312,9 +261,6 @@ impl MultiHeadAttention {
             }
         };
 
-        log::trace!("[MultiHeadAttention] Final context shape: {:?}", context.shape());
-        let (mean, min, max) = get_tensor_stats(&context)?;
-        log::debug!("[MultiHeadAttention] Context stats: mean={}, min={}, max={}", mean, min, max);
         self.proj_out.forward(&context)
     }
 }
diff --git a/crates/redeem-properties/src/models/model_interface.rs b/crates/redeem-properties/src/models/model_interface.rs
index 9ea9273..ef68e24 100644
--- a/crates/redeem-properties/src/models/model_interface.rs
+++ b/crates/redeem-properties/src/models/model_interface.rs
@@ -2,10 +2,10 @@ use crate::{
     building_blocks::featurize::{self, aa_indices_tensor, get_mod_features_from_parsed},
     models::{ccs_model::CCSModelWrapper, ms2_model::MS2ModelWrapper, rt_model::RTModelWrapper},
     utils::{
-        data_handling::PeptideData, logging::Progress, peptdeep_utils::{
+        data_handling::{PeptideData, RTNormalization}, logging::Progress, peptdeep_utils::{
             get_modification_indices, get_modification_string, parse_instrument_index,
             remove_mass_shift,
-        }, stats::{compute_loss_stats, Metrics, TrainingPhase, TrainingStepMetrics}, utils::{CosineWithWarmup, LRScheduler}
+        }, stats::{compute_loss_stats, Metrics, TrainingPhase, TrainingStepMetrics}, utils::{get_tensor_stats, CosineWithWarmup, LRScheduler}
     },
 };
 use anyhow::{Context, Result};
@@ -13,7 +13,7 @@ use candle_core::{DType, Device, Tensor, Var};
 use candle_nn::{Optimizer, VarMap};
 use log::info;
 use rayon::prelude::*;
-use std::ops::{Deref, Index};
+use std::{ops::{Deref, Index}, process::Output};
 use std::path::Path;
 use std::{collections::HashMap, path::PathBuf};
 
@@ -293,7 +293,9 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         let mod_feature_size = self.get_mod_element_count();
         let mod_to_feature = self.get_mod_to_feature();
 
+        log::trace!("[ModelInterface::encode_peptide] peptide_sequence: {}", peptide_sequence);
         let aa_tensor = aa_indices_tensor(peptide_sequence, device)?;
+
         let (batch_size, seq_len, _) = aa_tensor.shape().dims3()?;
 
         let mod_names: Vec<&str> = mods.split(';').filter(|s| !s.is_empty()).collect();
@@ -343,9 +345,21 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         }
 
         if features.len() == 1 {
-            Ok(features.remove(0))
+            let output = features.remove(0);
+            let (mean, min, max) = get_tensor_stats(&output)?;
+            if !mean.is_finite() || !min.is_finite() || !max.is_finite() {
+                log::error!("For Peptide = {peptide_sequence} encode_peptides produced non-finite tensor stats: mean={mean}, min={min}, max={max}");
+                anyhow::bail!("Non-finite values found in peptide encoding output.");
+            }
+            Ok(output)
         } else {
-            Ok(Tensor::cat(&features, 2)?)
+            let output = Tensor::cat(&features, 2)?;
+            let (mean, min, max) = get_tensor_stats(&output)?;
+            if !mean.is_finite() || !min.is_finite() || !max.is_finite() {
+                log::error!("For Peptide = {peptide_sequence} encode_peptides produced non-finite tensor stats: mean={mean}, min={min}, max={max}");
+                anyhow::bail!("Non-finite values found in peptide encoding output.");
+            }
+            Ok(output)
         }
     }
 
@@ -532,6 +546,12 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                     ));
                     progress.inc();
 
+                    // If the loss is NaN, stop training and throw an error
+                    if loss_val.is_nan() {
+                        log::error!("Loss is NaN, stopping training.");
+                        return Err(anyhow::anyhow!("Loss is NaN, stopping training."));
+                    }
+
                     Ok(())
                 },
             )?;
@@ -841,7 +861,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             (String, Option<char>),
             crate::utils::peptdeep_utils::ModificationMap,
         >,
-        rt_norm_params: Option<(f32, f32)>,
+        rt_norm: RTNormalization,
     ) -> Result<Vec<PeptideData>> {
         let num_batches = (inference_data.len() + batch_size - 1) / batch_size;
         info!(
@@ -849,16 +869,16 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             inference_data.len(),
             num_batches
         );
-
+    
         let progress = Progress::new(inference_data.len(), "[inference] Batch:");
         let mut result: Vec<Option<PeptideData>> = vec![None; inference_data.len()];
-
+    
         inference_data
             .par_chunks(batch_size)
             .enumerate()
             .map(|(batch_idx, batch_data)| {
                 let start_idx = batch_idx * batch_size;
-
+    
                 let peptides: Vec<String> = batch_data
                     .iter()
                     .map(|p| remove_mass_shift(&p.sequence))
@@ -871,7 +891,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                     .iter()
                     .map(|p| get_modification_indices(&p.sequence))
                     .collect();
-
+    
                 let charges = batch_data
                     .iter()
                     .filter_map(|p| p.charge)
@@ -881,14 +901,14 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                 } else {
                     None
                 };
-
+    
                 let nces = batch_data.iter().filter_map(|p| p.nce).collect::<Vec<_>>();
                 let nces = if nces.len() == batch_data.len() {
                     Some(nces)
                 } else {
                     None
                 };
-
+    
                 let instruments = batch_data
                     .iter()
                     .filter_map(|p| p.instrument.clone())
@@ -898,12 +918,12 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                 } else {
                     None
                 };
-
+    
                 let input_tensor = self
                     .encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments)?
                     .to_device(self.get_device())?;
                 let output = self.forward(&input_tensor)?;
-
+    
                 match self.property_type() {
                     PropertyType::RT | PropertyType::CCS => {
                         let predictions = output.to_vec1()?;
@@ -914,12 +934,11 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                                 let mut peptide = batch_data[i].clone();
                                 match self.property_type() {
                                     PropertyType::RT => {
-                                        peptide.retention_time =
-                                            if let Some((mean, std)) = rt_norm_params {
-                                                Some(pred * std + mean)
-                                            } else {
-                                                Some(pred)
-                                            };
+                                        peptide.retention_time = Some(match rt_norm {
+                                            RTNormalization::ZScore(mean, std) => pred * std + mean,
+                                            RTNormalization::MinMax(min, max) => pred * (max - min) + min,
+                                            RTNormalization::None => pred,
+                                        });
                                     }
                                     PropertyType::CCS => peptide.ion_mobility = Some(pred),
                                     _ => {}
@@ -941,7 +960,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                 result[idx] = Some(peptide);
                 progress.inc();
             });
-
+    
         progress.finish();
         Ok(result.into_iter().flatten().collect())
     }
@@ -953,7 +972,9 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         modifications: &HashMap<(String, Option<char>), crate::utils::peptdeep_utils::ModificationMap>,
     ) -> Result<(Tensor, Tensor)> {
         let peptides: Vec<String> = batch_data.par_iter().map(|p| remove_mass_shift(&p.sequence)).collect();
+
         let mods: Vec<String> = batch_data.par_iter().map(|p| get_modification_string(&p.sequence, modifications)).collect();
+
         let mod_sites: Vec<String> = batch_data.par_iter().map(|p| get_modification_indices(&p.sequence)).collect();
 
         let charges = batch_data.par_iter().filter_map(|p| p.charge).collect::<Vec<_>>();

From b6ec2a404fbfc882438259a1ceefc15f65c5a09f Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Mon, 12 May 2025 23:10:17 -0400
Subject: [PATCH 43/75] refactor: Update peptide data loading logic in
 redeem-cli crate

---
 .../src/properties/inference/inference.rs     | 41 +++++++++++--------
 crates/redeem-cli/src/properties/load_data.rs | 39 +++++++++++-------
 .../src/properties/train/trainer.rs           |  2 +
 3 files changed, 52 insertions(+), 30 deletions(-)

diff --git a/crates/redeem-cli/src/properties/inference/inference.rs b/crates/redeem-cli/src/properties/inference/inference.rs
index 9f642c8..e8caef3 100644
--- a/crates/redeem-cli/src/properties/inference/inference.rs
+++ b/crates/redeem-cli/src/properties/inference/inference.rs
@@ -2,22 +2,27 @@ use anyhow::{Context, Result};
 use redeem_properties::models::ccs_cnn_lstm_model::CCSCNNLSTMModel;
 use redeem_properties::models::ccs_cnn_tf_model::CCSCNNTFModel;
 use redeem_properties::models::ccs_model::load_collision_cross_section_model;
-use redeem_properties::models::rt_cnn_lstm_model::RTCNNLSTMModel;
 use redeem_properties::models::model_interface::ModelInterface;
+use redeem_properties::models::rt_cnn_lstm_model::RTCNNLSTMModel;
+use redeem_properties::models::rt_model::load_retention_time_model;
 use redeem_properties::utils::data_handling::{PeptideData, RTNormalization};
 use redeem_properties::utils::peptdeep_utils::load_modifications;
 use redeem_properties::utils::utils::get_device;
-use redeem_properties::models::rt_model::load_retention_time_model;
 
-use crate::properties::load_data::load_peptide_data;
-use crate::properties::util::write_bytes_to_file;
 use crate::properties::inference::input::PropertyInferenceConfig;
 use crate::properties::inference::output::write_peptide_data;
+use crate::properties::load_data::load_peptide_data;
+use crate::properties::util::write_bytes_to_file;
 
 pub fn run_inference(config: &PropertyInferenceConfig) -> Result<()> {
-
     // Load inference data
-    let (inference_data, norm_factor) = load_peptide_data(&config.inference_data, Some(config.nce), Some(config.instrument.clone()), Some("min_max".to_string()))?;
+    let (inference_data, norm_factor) = load_peptide_data(
+        &config.inference_data,
+        &config.model_arch,
+        Some(config.nce),
+        Some(config.instrument.clone()),
+        Some("min_max".to_string()),
+    )?;
     log::info!("Loaded {} peptides", inference_data.len());
 
     // Dispatch model training based on architecture
@@ -44,14 +49,14 @@ pub fn run_inference(config: &PropertyInferenceConfig) -> Result<()> {
             device.clone(),
         )?),
         "ccs_cnn_lstm" => Box::new(CCSCNNLSTMModel::new(
-                &config.model_path,
-                None,
-                0,
-                8,
-                4,
-                true,
-                device.clone(),
-                )?),
+            &config.model_path,
+            None,
+            0,
+            8,
+            4,
+            true,
+            device.clone(),
+        )?),
         "ccs_cnn_tf" => Box::new(CCSCNNTFModel::new(
             &config.model_path,
             None,
@@ -61,7 +66,12 @@ pub fn run_inference(config: &PropertyInferenceConfig) -> Result<()> {
             true,
             device.clone(),
         )?),
-        _ => return Err(anyhow::anyhow!("Unsupported RT model architecture: {}", model_arch)),
+        _ => {
+            return Err(anyhow::anyhow!(
+                "Unsupported RT model architecture: {}",
+                model_arch
+            ));
+        }
     };
 
     let modifications = load_modifications().context("Failed to load modifications")?;
@@ -76,7 +86,6 @@ pub fn run_inference(config: &PropertyInferenceConfig) -> Result<()> {
     )?;
     log::info!("Inference completed in {:?}", start_time.elapsed());
 
-    
     log::info!("Predictions saved to: {}", config.output_file);
     write_peptide_data(&inference_results, &config.output_file)?;
 
diff --git a/crates/redeem-cli/src/properties/load_data.rs b/crates/redeem-cli/src/properties/load_data.rs
index 0646320..e29d239 100644
--- a/crates/redeem-cli/src/properties/load_data.rs
+++ b/crates/redeem-cli/src/properties/load_data.rs
@@ -11,6 +11,7 @@ use redeem_properties::utils::data_handling::{PeptideData, RTNormalization};
 /// Returns both the peptide vector and optionally (mean, std) of retention times.
 pub fn load_peptide_data<P: AsRef<Path>>(
     path: P,
+    model_arch: &str,
     nce: Option<i32>,
     instrument: Option<String>,
     normalize_rt: Option<String>,
@@ -43,9 +44,12 @@ pub fn load_peptide_data<P: AsRef<Path>>(
             .get(headers.iter().position(|h| h.to_lowercase() == "retention time").unwrap_or(3))
             .and_then(|s| s.parse::<f32>().ok());
 
-        let charge = record
-            .get(headers.iter().position(|h| h.to_lowercase() == "charge").unwrap_or(usize::MAX))
-            .and_then(|s| s.parse::<i32>().ok());
+        let charge = match model_arch {
+            "rt_cnn_lstm" | "rt_cnn_tf" => None,
+            _ => record
+                .get(headers.iter().position(|h| h.to_lowercase() == "charge").unwrap_or(usize::MAX))
+                .and_then(|s| s.parse::<i32>().ok()),
+        };
 
         let precursor_mass = record
             .get(headers.iter().position(|h| h.to_lowercase() == "precursor_mass").unwrap_or(usize::MAX))
@@ -59,17 +63,24 @@ pub fn load_peptide_data<P: AsRef<Path>>(
             .get(headers.iter().position(|h| h.to_lowercase() == "ccs").unwrap_or(usize::MAX))
             .and_then(|s| s.parse::<f32>().ok());
 
-        let in_nce = nce.or_else(|| {
-            record
-                .get(headers.iter().position(|h| h.to_lowercase() == "nce").unwrap_or(usize::MAX))
-                .and_then(|s| s.parse::<i32>().ok())
-        });
-
-        let in_instrument = instrument.clone().or_else(|| {
-            record
-                .get(headers.iter().position(|h| h.to_lowercase() == "instrument").unwrap_or(usize::MAX))
-                .map(|s| s.to_string())
-        });
+        let in_nce = match model_arch {
+            "ms2_bert" => nce.or_else(|| {
+                record
+                    .get(headers.iter().position(|h| h.to_lowercase() == "nce").unwrap_or(usize::MAX))
+                    .and_then(|s| s.parse::<i32>().ok())
+            }),
+            _ => None
+            
+        };
+
+        let in_instrument = match model_arch {
+            "ms2_bert" => instrument.clone().or_else(|| {
+                record
+                    .get(headers.iter().position(|h| h.to_lowercase() == "instrument").unwrap_or(usize::MAX))
+                    .map(|s| s.to_string())
+            }),
+            _ => None
+        };
 
         if let Some(rt) = retention_time {
             rt_values.push(rt);
diff --git a/crates/redeem-cli/src/properties/train/trainer.rs b/crates/redeem-cli/src/properties/train/trainer.rs
index bee5988..6324e0c 100644
--- a/crates/redeem-cli/src/properties/train/trainer.rs
+++ b/crates/redeem-cli/src/properties/train/trainer.rs
@@ -27,6 +27,7 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
     // Load training data
     let (train_peptides, norm_factor) = load_peptide_data(
         &config.train_data,
+        &config.model_arch,
         Some(config.nce),
         Some(config.instrument.clone()),
         Some(config.rt_normalization.clone().unwrap()),
@@ -37,6 +38,7 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
     let (val_peptides, _val_norm_factor) = if let Some(ref val_path) = config.validation_data {
         let (peptides, norm) = load_peptide_data(
             val_path,
+            &config.model_arch,
             Some(config.nce),
             Some(config.instrument.clone()),
             Some(config.rt_normalization.clone().unwrap()),

From 32e117c6b88bcb27c6b5b1e1f655b83d1429b1dd Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Tue, 13 May 2025 00:39:49 -0400
Subject: [PATCH 44/75] fix: modication name and indice retrieval

---
 .../src/building_blocks/featurize.rs          |  22 ++-
 .../src/models/ccs_cnn_tf_model.rs            |  31 ++++
 .../src/models/model_interface.rs             |  29 ++-
 .../src/utils/peptdeep_utils.rs               | 166 +++++++++++++++---
 4 files changed, 212 insertions(+), 36 deletions(-)

diff --git a/crates/redeem-properties/src/building_blocks/featurize.rs b/crates/redeem-properties/src/building_blocks/featurize.rs
index 751464e..f7628f2 100644
--- a/crates/redeem-properties/src/building_blocks/featurize.rs
+++ b/crates/redeem-properties/src/building_blocks/featurize.rs
@@ -26,8 +26,12 @@ pub fn aa_indices_tensor(seq: &str, device: &Device) -> Result<Tensor> {
     let map = aa_index_map();
     let filtered: Vec<i64> = seq
         .chars()
-        .filter_map(|c| map.get(&c).copied())
-        .collect();
+        .map(|c| {
+            map.get(&c)
+                .copied()
+                .ok_or_else(|| anyhow!("Unknown amino acid character: '{}'", c))
+        })
+        .collect::<Result<Vec<_>>>()?;
     let mut indices = vec![0i64]; // padding start
     indices.extend(filtered);
     indices.push(0); // padding end
@@ -43,6 +47,20 @@ pub fn aa_one_hot(aa_indices: &Tensor, cat_others: &[&Tensor]) -> Result<Tensor>
     let num_classes = AA_EMBEDDING_SIZE;
 
     let indices = aa_indices.to_vec2::<f32>()?;
+
+    for (i, row) in indices.iter().enumerate() {
+        for (j, val) in row.iter().enumerate() {
+            if !val.is_finite() || *val < 0.0 || *val > (AA_EMBEDDING_SIZE as f32) {
+                log::error!(
+                    "[aa_one_hot] Invalid index at batch {}, position {}: {}",
+                    i, j, val
+                );
+            }
+        }
+    }
+    
+
+
     let mut one_hot_data = vec![0.0f32; batch_size * seq_len * num_classes];
 
     one_hot_data
diff --git a/crates/redeem-properties/src/models/ccs_cnn_tf_model.rs b/crates/redeem-properties/src/models/ccs_cnn_tf_model.rs
index ab41d5c..e1e0ff2 100644
--- a/crates/redeem-properties/src/models/ccs_cnn_tf_model.rs
+++ b/crates/redeem-properties/src/models/ccs_cnn_tf_model.rs
@@ -243,3 +243,34 @@ impl ModelInterface for CCSCNNTFModel {
 }
 
 
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::models::model_interface::ModelInterface;
+    use crate::models::ccs_cnn_tf_model::CCSCNNTFModel;
+    use candle_core::Device;
+    use std::path::PathBuf;
+
+
+    #[test]
+    fn test_encode_peptides() {
+        let device = Device::Cpu;
+        let model = Box::new(CCSCNNTFModel::new_untrained(device.clone()).unwrap());
+
+        let peptide_sequences = "AGHCEWQMKYR";
+        let mods = "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M";
+        let mod_sites = "0;4;8";
+        let charge = Some(2);
+        let nce = Some(20);
+        let instrument = Some("QE");
+
+        let result =
+            model.encode_peptide(&peptide_sequences, mods, mod_sites, charge, nce, instrument);
+
+        println!("{:?}", result);
+
+        // assert!(result.is_ok());
+        // let encoded_peptides = result.unwrap();
+        // assert_eq!(encoded_peptides.shape().dims2().unwrap(), (1, 27 + 109 + 1 + 1 + 1));
+    }
+}
\ No newline at end of file
diff --git a/crates/redeem-properties/src/models/model_interface.rs b/crates/redeem-properties/src/models/model_interface.rs
index ef68e24..5f41e76 100644
--- a/crates/redeem-properties/src/models/model_interface.rs
+++ b/crates/redeem-properties/src/models/model_interface.rs
@@ -293,7 +293,8 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         let mod_feature_size = self.get_mod_element_count();
         let mod_to_feature = self.get_mod_to_feature();
 
-        log::trace!("[ModelInterface::encode_peptide] peptide_sequence: {}", peptide_sequence);
+        log::trace!("[ModelInterface::encode_peptide] peptide_sequence: {} | mods: {} | mod_sites: {} | charge: {:?} | nce: {:?} | instrument: {:?}", peptide_sequence, mods, mod_sites, charge, nce, instrument);
+        
         let aa_tensor = aa_indices_tensor(peptide_sequence, device)?;
 
         let (batch_size, seq_len, _) = aa_tensor.shape().dims3()?;
@@ -401,20 +402,36 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             .max()
             .unwrap_or(0);
 
+        // Consistency check for feature dimension
+        let expected_feat_dim = tensors
+            .get(0)
+            .ok_or_else(|| anyhow::anyhow!("Empty input batch"))?
+            .shape()
+            .dims3()?
+            .2;
+
         let padded = tensors
             .into_par_iter()
             .map(|t| {
                 let (_, seq_len, feat_dim) = t.shape().dims3()?;
+
+                // Check that all tensors have the same feature dimension
+                if feat_dim != expected_feat_dim {
+                    return Err(anyhow::anyhow!(
+                        "Inconsistent feature dim: expected {}, got {}",
+                        expected_feat_dim,
+                        feat_dim
+                    ));
+                }
+
                 if seq_len < max_len {
-                    let pad =
-                        Tensor::zeros(&[1, max_len - seq_len, feat_dim], t.dtype(), t.device())?;
-                    Tensor::cat(&[&t, &pad], 1)
+                    let pad = Tensor::zeros(&[1, max_len - seq_len, feat_dim], t.dtype(), t.device())?;
+                    Ok(Tensor::cat(&[&t, &pad], 1)?)
                 } else {
                     Ok(t)
                 }
             })
-            .map(|res| res.map_err(anyhow::Error::from))
-            .collect::<Result<Vec<_>, _>>()?;
+            .collect::<Result<Vec<_>, anyhow::Error>>()?;
 
         Ok(Tensor::cat(&padded, 0)?)
     }
diff --git a/crates/redeem-properties/src/utils/peptdeep_utils.rs b/crates/redeem-properties/src/utils/peptdeep_utils.rs
index 3a6702e..dfdfcc5 100644
--- a/crates/redeem-properties/src/utils/peptdeep_utils.rs
+++ b/crates/redeem-properties/src/utils/peptdeep_utils.rs
@@ -343,12 +343,26 @@ pub fn extract_masses_and_indices(peptide: &str) -> Vec<(f64, usize)> {
 pub fn get_modification_indices(peptide: &str) -> String {
     let re = Regex::new(r"(\[.*?\]|\(UniMod:\d+\)|\([a-zA-Z]+\))").unwrap();
     let mut indices = Vec::new();
-    let mut offset = 1; // Offset by 1 for 0-based index
+    let mut offset = 0;
+    let mut aa_index = 0;
+    let mut i = 0;
+
+    while i < peptide.len() {
+        let c = peptide[i..].chars().next().unwrap();
+
+        if c == '[' || c == '(' {
+            if let Some(mat) = re.find_at(peptide, i) {
+                if mat.start() == i {
+                    // If the modification is at the beginning (i == 0), it's on the N-term
+                    indices.push(aa_index.to_string());
+                    i = mat.end();
+                    continue;
+                }
+            }
+        }
 
-    for mat in re.find_iter(peptide) {
-        let index = mat.start().saturating_sub(offset);
-        indices.push(index.to_string());
-        offset += mat.end() - mat.start();
+        aa_index += 1;
+        i += c.len_utf8();
     }
 
     indices.join(";")
@@ -403,27 +417,60 @@ pub fn extract_unimod_annotations(peptide: &str) -> Vec<(String, usize)> {
     let re_unimod = Regex::new(r"\(UniMod:(\d+)\)").unwrap();
     let mut results = Vec::new();
     let mut offset = 0;
+    let mut aa_index = 0;
     let mut idx = 0;
 
     while idx < peptide.len() {
         if let Some(mat) = re_unimod.find_at(peptide, idx) {
             if mat.start() == idx {
+                // UniMod annotation
                 let cap = re_unimod.captures(&peptide[idx..mat.end()]).unwrap();
                 let unimod_str = format!("UniMod:{}", &cap[1]);
-                let pos = idx - offset;
-                results.push((unimod_str, pos));
+                results.push((unimod_str, aa_index));
                 offset += mat.end() - mat.start();
                 idx = mat.end();
                 continue;
             }
         }
-        idx += peptide[idx..].chars().next().unwrap().len_utf8();
+
+        // Only increment aa_index on actual amino acid
+        let ch = peptide[idx..].chars().next().unwrap();
+        if ch.is_alphabetic() {
+            aa_index += 1;
+        }
+        idx += ch.len_utf8();
     }
 
     results
 }
 
 
+/// Extracts either mass shift or UniMod annotations from a peptide string,
+/// returning a vector of (mod_str, position).
+///
+/// Dispatches to `extract_mass_annotations` if it finds `[+mass]`,
+/// or to `extract_unimod_annotations` if it finds `(UniMod:id)`.
+///
+/// # Example
+/// ```
+/// let mass = extract_mod_annotations("AC[+57.0215]DE");
+/// assert_eq!(mass, vec![("57.0215".to_string(), 2)]);
+///
+/// let unimod = extract_mod_annotations("AC(UniMod:4)DE");
+/// assert_eq!(unimod, vec![("UniMod:4".to_string(), 2)]);
+/// ```
+pub fn extract_mod_annotations(peptide: &str) -> Vec<(String, usize)> {
+    if peptide.contains("[+") || peptide.contains("[-") {
+        extract_mass_annotations(peptide)
+    } else if peptide.contains("(UniMod:") {
+        extract_unimod_annotations(peptide)
+    } else {
+        Vec::new()
+    }
+}
+
+
+
 /// Attempts to look up a modification name from a map using the provided key and amino acid.
 /// Falls back to a key with `None` if the exact amino acid is not matched.
 ///
@@ -438,16 +485,17 @@ pub fn extract_unimod_annotations(peptide: &str) -> Vec<(String, usize)> {
 /// ```
 pub fn lookup_modification(
     key: String,
-    aa: char,
+    aa: Option<char>,
     map: &HashMap<(String, Option<char>), ModificationMap>,
 ) -> Option<String> {
-    map.get(&(key.clone(), Some(aa)))
+    map.get(&(key.clone(), aa))
         .or_else(|| map.get(&(key, None)))
         .map(|m| m.name.clone())
 }
 
 
 
+
 /// Generates a standardized modification string (e.g., "Carbamidomethyl@C")
 /// for a peptide sequence based on mass shifts (e.g., `[+57.0215]`) or
 /// UniMod annotations (e.g., `(UniMod:4)`), using a preloaded modification map.
@@ -484,22 +532,43 @@ pub fn get_modification_string(
     modification_map: &HashMap<(String, Option<char>), ModificationMap>,
 ) -> String {
     let naked_peptide = remove_mass_shift(peptide);
-    let mut found_mods = Vec::new();
 
-    for (key, pos) in extract_mass_annotations(peptide)
+    extract_mod_annotations(peptide)
         .into_iter()
-        .chain(extract_unimod_annotations(peptide))
-    {
-        let aa = naked_peptide.chars().nth(pos.saturating_sub(1)).unwrap_or('\0');
-        if let Some(name) = lookup_modification(key, aa, modification_map) {
-            found_mods.push(name);
-        }
-    }
-
-    found_mods.join(";")
+        .filter_map(|(key, pos)| {
+            let aa_opt = if pos == 0 {
+                naked_peptide.chars().next()
+            } else {
+                naked_peptide.chars().nth(pos - 1)
+            };
+
+            // Try normal lookup first
+            let mod_str = lookup_modification(key.clone(), aa_opt, modification_map);
+
+            // If not found and it's a terminal mod, look for Protein_N-term
+            if mod_str.is_none() && pos == 0 {
+                // Try all entries with same key and look for *_N-term
+                let fallback = modification_map
+                    .iter()
+                    .find_map(|((k, _), v)| {
+                        if k == &key && (v.name.contains("Protein_N-term") || v.name.contains("Any_N-term")) {
+                            Some(v.name.clone())
+                        } else {
+                            None
+                        }
+                    });
+                fallback
+            } else {
+                mod_str
+            }
+        })
+        .collect::<Vec<_>>()
+        .join(";")
 }
 
 
+
+
 // TODO: Derive from PeptDep constants yaml
 const IM_GAS_MASS: f64 = 28.0; 
 const CCS_IM_COEF: f64 = 1059.62245; 
@@ -573,6 +642,44 @@ mod tests {
         assert!(result.is_ok());
     }
 
+    #[test]
+    fn test_extract_unimod_annotations() {
+        let peptide = "AC(UniMod:4)DE(UniMod:7)FG";
+        let result = extract_unimod_annotations(peptide);
+        println!("Peptide: {}, Result: {:?}", peptide, result);
+        assert_eq!(result, vec![("UniMod:4".to_string(), 2), ("UniMod:7".to_string(), 4)]);
+
+        let peptide = "AC(UniMod:4)DE(UniMod:7)FG(UniMod:10)";
+        let result = extract_unimod_annotations(peptide);
+        println!("Peptide: {}, Result: {:?}", peptide, result);
+        assert_eq!(
+            result,
+            vec![
+                ("UniMod:4".to_string(), 2),
+                ("UniMod:7".to_string(), 4),
+                ("UniMod:10".to_string(), 6)
+            ]
+        );
+
+        let peptide = "(UniMod:1)M(UniMod:35)AAAATMAAAAR";
+        let result = extract_unimod_annotations(peptide);
+        println!("Peptide: {}, Result: {:?}", peptide, result);
+        assert_eq!(result, vec![("UniMod:1".to_string(), 0), ("UniMod:35".to_string(), 1)]);
+    }
+
+    #[test]
+    fn test_extract_mod_annotations() {
+        let peptide = "[+42.0105]M[+15.9949]AAAATMAAAAR";
+        let result = extract_mod_annotations(peptide);
+        println!("Peptide: {}, Result: {:?}", peptide, result);
+        assert_eq!(result, vec![("42.0105".to_string(), 0), ("15.9949".to_string(), 1)]);
+
+        let peptide = "(UniMod:1)M(UniMod:35)AAAATMAAAAR";
+        let result = extract_mod_annotations(peptide);
+        println!("Peptide: {}, Result: {:?}", peptide, result);
+        assert_eq!(result, vec![("UniMod:1".to_string(), 0), ("UniMod:35".to_string(), 1)]);
+    }
+
     #[test]
     fn test_get_modification_indices() {
         // Compile the regex once for all tests
@@ -581,13 +688,14 @@ mod tests {
         // Test cases
         let test_cases = vec![
             ("PEPTIDE", ""),
-            ("PEPT[+15.9949]IDE", "3"),
-            ("P[+15.9949]EPT[+79.99]IDE", "0;3"),
-            ("TVQSLEIDLDSM[+15.9949]R", "11"),
-            ("TVQS[+79.99]LEIDLDSM[+15.9949]R", "3;11"),
+            ("PEPT[+15.9949]IDE", "4"),
+            ("P[+15.9949]EPT[+79.99]IDE", "1;4"),
+            ("TVQSLEIDLDSM[+15.9949]R", "12"),
+            ("TVQS[+79.99]LEIDLDSM[+15.9949]R", "4;12"),
             ("[+42.0106]PEPTIDE", "0"),
-            ("PEPTIDE[+42.0106]", "6"),
-            ("P[+15.9949]EP[+79.99]T[+15.9949]IDE", "0;2;3"),
+            ("PEPTIDE[+42.0106]", "7"),
+            ("P[+15.9949]EP[+79.99]T[+15.9949]IDE", "1;3;4"),
+            ("(UniMod:1)M(UniMod:35)AAAATMAAAAR", "0;1"),
         ];
 
         for (peptide, expected) in test_cases {
@@ -630,9 +738,11 @@ mod tests {
             ("P[+15.9949]EPT[+79.9663]IDE", "Oxidation@P;Phospho@T"),
             ("TVQSLEIDLDSM[+15.9949]R", "Oxidation@M"),
             ("TVQS[+79.9663]LEIDLDSM[+15.9949]R", "Phospho@S;Oxidation@M"),
-            ("[+42.0106]PEPTIDE", "Acetyl@Protein_N-term"),
+            ("(UniMod:1)M(UniMod:35)AAAATMAAAAR", "Any_N-term;Oxidation@M"),
+            ("[+42.0106]PEPTIDE", "Any_N-term"),
             ("PEPTIDE[+42.0106]", ""),
             ("P[+15.9949]EP[+79.9663]T[+15.9949]IDE", "Oxidation@P;Oxidation@T"),
+            ("(UniMod:1)M(UniMod:35)AAAATMAAAAR", "Any_N-term;Oxidation@M"),
         ];
 
 

From f0354a8bb654ffbf0a487780cc445a93f07ee8df Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Tue, 13 May 2025 10:33:10 -0400
Subject: [PATCH 45/75] refactor: Update PeptideData struct to use u8 for
 string fields

---
 .../src/properties/inference/inference.rs     |   5 +-
 .../src/properties/inference/output.rs        |  30 +-
 crates/redeem-cli/src/properties/load_data.rs |  49 +-
 .../src/properties/train/trainer.rs           |   8 +-
 .../examples/alphapeptdeep_ccs_cnn_lstm.rs    | 143 ++----
 .../examples/alphapeptdeep_ms2_bert.rs        | 327 +++++--------
 .../examples/alphapeptdeep_rt_cnn_lstm.rs     | 237 +++-------
 .../src/models/ccs_cnn_lstm_model.rs          |   6 +-
 .../redeem-properties/src/models/ccs_model.rs |   2 +-
 .../src/models/model_interface.rs             | 433 +++++++++---------
 .../src/models/ms2_bert_model.rs              |  10 +-
 .../redeem-properties/src/models/ms2_model.rs |   2 +-
 .../src/models/rt_cnn_lstm_model.rs           |  59 +--
 .../redeem-properties/src/models/rt_model.rs  |   2 +-
 .../src/utils/data_handling.rs                | 118 ++++-
 15 files changed, 656 insertions(+), 775 deletions(-)

diff --git a/crates/redeem-cli/src/properties/inference/inference.rs b/crates/redeem-cli/src/properties/inference/inference.rs
index e8caef3..5fc6a91 100644
--- a/crates/redeem-cli/src/properties/inference/inference.rs
+++ b/crates/redeem-cli/src/properties/inference/inference.rs
@@ -15,6 +15,8 @@ use crate::properties::load_data::load_peptide_data;
 use crate::properties::util::write_bytes_to_file;
 
 pub fn run_inference(config: &PropertyInferenceConfig) -> Result<()> {
+    let modifications = load_modifications().context("Failed to load modifications")?;
+
     // Load inference data
     let (inference_data, norm_factor) = load_peptide_data(
         &config.inference_data,
@@ -22,6 +24,7 @@ pub fn run_inference(config: &PropertyInferenceConfig) -> Result<()> {
         Some(config.nce),
         Some(config.instrument.clone()),
         Some("min_max".to_string()),
+        &modifications,
     )?;
     log::info!("Loaded {} peptides", inference_data.len());
 
@@ -74,8 +77,6 @@ pub fn run_inference(config: &PropertyInferenceConfig) -> Result<()> {
         }
     };
 
-    let modifications = load_modifications().context("Failed to load modifications")?;
-
     let start_time = std::time::Instant::now();
     model.set_evaluation_mode();
     let inference_results: Vec<PeptideData> = model.inference(
diff --git a/crates/redeem-cli/src/properties/inference/output.rs b/crates/redeem-cli/src/properties/inference/output.rs
index 2e91c97..0922834 100644
--- a/crates/redeem-cli/src/properties/inference/output.rs
+++ b/crates/redeem-cli/src/properties/inference/output.rs
@@ -1,11 +1,13 @@
 use std::fs::File;
-use std::io::{BufWriter, Write};
+use std::io::BufWriter;
 use anyhow::{Result, Context};
 use std::path::Path;
 use redeem_properties::utils::data_handling::PeptideData;
 
 /// Write a vector of PeptideData to a CSV or TSV file based on file extension.
 pub fn write_peptide_data<P: AsRef<Path>>(data: &[PeptideData], output_path: P) -> Result<()> {
+    
+
     let path = output_path.as_ref();
     let extension = path.extension().and_then(|ext| ext.to_str()).unwrap_or("csv");
     let delimiter = match extension {
@@ -19,7 +21,20 @@ pub fn write_peptide_data<P: AsRef<Path>>(data: &[PeptideData], output_path: P)
         .from_writer(BufWriter::new(file));
 
     // Write headers
-    writer.write_record(&["sequence", "charge", "nce", "instrument", "retention_time", "ion_mobility", "ms2_intensities"])?;
+    writer.write_record(&[
+        "modified_sequence",
+        "naked_sequence",
+        "mods",
+        "mod_sites",
+        "charge",
+        "precursor_mass",
+        "nce",
+        "instrument",
+        "retention_time",
+        "ion_mobility",
+        "ccs",
+        "ms2_intensities",
+    ])?;
 
     for entry in data {
         let ms2_str = entry.ms2_intensities.as_ref()
@@ -31,16 +46,21 @@ pub fn write_peptide_data<P: AsRef<Path>>(data: &[PeptideData], output_path: P)
             .unwrap_or_default();
 
         writer.write_record(&[
-            &entry.sequence,
+            entry.modified_sequence_str(),
+            entry.naked_sequence_str(),
+            entry.mods_str(),
+            entry.mod_sites_str(),
             &entry.charge.map_or(String::new(), |c| c.to_string()),
+            &entry.precursor_mass.map_or(String::new(), |m| format!("{:.4}", m)),
             &entry.nce.map_or(String::new(), |n| n.to_string()),
-            &entry.instrument.clone().unwrap_or_default(),
+            &entry.instrument_str().unwrap_or_default().to_string(),
             &entry.retention_time.map_or(String::new(), |r| format!("{:.4}", r)),
             &entry.ion_mobility.map_or(String::new(), |im| format!("{:.4}", im)),
+            &entry.ccs.map_or(String::new(), |c| format!("{:.4}", c)),
             &ms2_str,
         ])?;
     }
 
     writer.flush()?;
     Ok(())
-}
\ No newline at end of file
+}
diff --git a/crates/redeem-cli/src/properties/load_data.rs b/crates/redeem-cli/src/properties/load_data.rs
index e29d239..eac4717 100644
--- a/crates/redeem-cli/src/properties/load_data.rs
+++ b/crates/redeem-cli/src/properties/load_data.rs
@@ -1,9 +1,12 @@
+use std::{collections::HashMap, sync::Arc};
 use std::fs::File;
 use std::path::Path;
 use std::io::BufReader;
 use anyhow::{Result, Context};
 use csv::ReaderBuilder;
-use redeem_properties::utils::data_handling::{PeptideData, RTNormalization};
+use redeem_properties::utils::peptdeep_utils::{get_modification_indices, get_modification_string, ModificationMap};
+use redeem_properties::utils::{data_handling::{PeptideData, RTNormalization}, peptdeep_utils::remove_mass_shift};
+
 
 
 /// Load peptide training data from a CSV or TSV file and optionally normalize RT.
@@ -15,6 +18,7 @@ pub fn load_peptide_data<P: AsRef<Path>>(
     nce: Option<i32>,
     instrument: Option<String>,
     normalize_rt: Option<String>,
+    modifications: &HashMap<(String, Option<char>), ModificationMap>,
 ) -> Result<(Vec<PeptideData>, RTNormalization)> {
     let file = File::open(&path)
         .with_context(|| format!("Failed to open file: {:?}", path.as_ref()))?;
@@ -35,10 +39,21 @@ pub fn load_peptide_data<P: AsRef<Path>>(
     for result in rdr.records() {
         let record = result?;
 
-        let sequence = record
-            .get(headers.iter().position(|h| h.to_lowercase() == "sequence").unwrap_or(2))
-            .unwrap_or("")
-            .to_string();
+        let sequence_bytes: Arc<[u8]> = Arc::from(
+            record
+                .get(headers.iter().position(|h| h.to_lowercase() == "sequence").unwrap_or(2))
+                .unwrap_or("")
+                .as_bytes()
+                .to_vec()
+                .into_boxed_slice(),
+        );
+
+        let sequence_str = String::from_utf8_lossy(&sequence_bytes);
+
+        let naked_sequence = Arc::from(remove_mass_shift(&sequence_str).as_bytes().to_vec().into_boxed_slice());
+
+        let mods: Arc<[u8]> = Arc::from(get_modification_string(&sequence_str, modifications).into_bytes().into_boxed_slice());
+        let mod_sites: Arc<[u8]> = Arc::from(get_modification_indices(&sequence_str).into_bytes().into_boxed_slice());
 
         let retention_time = record
             .get(headers.iter().position(|h| h.to_lowercase() == "retention time").unwrap_or(3))
@@ -69,25 +84,31 @@ pub fn load_peptide_data<P: AsRef<Path>>(
                     .get(headers.iter().position(|h| h.to_lowercase() == "nce").unwrap_or(usize::MAX))
                     .and_then(|s| s.parse::<i32>().ok())
             }),
-            _ => None
-            
+            _ => None,
         };
 
         let in_instrument = match model_arch {
-            "ms2_bert" => instrument.clone().or_else(|| {
-                record
-                    .get(headers.iter().position(|h| h.to_lowercase() == "instrument").unwrap_or(usize::MAX))
-                    .map(|s| s.to_string())
-            }),
-            _ => None
+            "ms2_bert" => instrument
+                .as_ref()
+                .map(|s| Arc::from(s.as_bytes().to_vec().into_boxed_slice()))
+                .or_else(|| {
+                    record
+                        .get(headers.iter().position(|h| h.to_lowercase() == "instrument").unwrap_or(usize::MAX))
+                        .map(|s| Arc::from(s.as_bytes().to_vec().into_boxed_slice()))
+                }),
+            _ => None,
         };
+        
 
         if let Some(rt) = retention_time {
             rt_values.push(rt);
         }
 
         peptides.push(PeptideData {
-            sequence,
+            modified_sequence: sequence_bytes,
+            naked_sequence,
+            mods,
+            mod_sites,
             charge,
             precursor_mass,
             nce: in_nce,
diff --git a/crates/redeem-cli/src/properties/train/trainer.rs b/crates/redeem-cli/src/properties/train/trainer.rs
index 6324e0c..2c6c657 100644
--- a/crates/redeem-cli/src/properties/train/trainer.rs
+++ b/crates/redeem-cli/src/properties/train/trainer.rs
@@ -24,6 +24,9 @@ use load_data::load_peptide_data;
 use super::input;
 
 pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
+    log::trace!("Loading modifications map");
+    let modifications = load_modifications().context("Failed to load modifications")?;
+
     // Load training data
     let (train_peptides, norm_factor) = load_peptide_data(
         &config.train_data,
@@ -31,6 +34,7 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
         Some(config.nce),
         Some(config.instrument.clone()),
         Some(config.rt_normalization.clone().unwrap()),
+        &modifications,
     )?;
     log::info!("Loaded {} training peptides", train_peptides.len());
 
@@ -42,6 +46,7 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
             Some(config.nce),
             Some(config.instrument.clone()),
             Some(config.rt_normalization.clone().unwrap()),
+            &modifications,
         )
         .context("Failed to load validation data")?;
         (Some(peptides), Some(norm))
@@ -128,9 +133,6 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
 
     log::trace!("Model loaded successfully");
 
-    log::trace!("Loading modifications map");
-    let modifications = load_modifications().context("Failed to load modifications")?;
-
     let start_time = std::time::Instant::now();
     log::trace!("Training started");
     let train_step_metrics = model.train(
diff --git a/crates/redeem-properties/examples/alphapeptdeep_ccs_cnn_lstm.rs b/crates/redeem-properties/examples/alphapeptdeep_ccs_cnn_lstm.rs
index 5b7f52b..0236f0a 100644
--- a/crates/redeem-properties/examples/alphapeptdeep_ccs_cnn_lstm.rs
+++ b/crates/redeem-properties/examples/alphapeptdeep_ccs_cnn_lstm.rs
@@ -2,28 +2,31 @@ use anyhow::{Context, Result};
 use candle_core::Device;
 use redeem_properties::{
     models::{
-        model_interface::{ModelInterface, PredictionResult},
         ccs_cnn_lstm_model::CCSCNNLSTMModel,
+        model_interface::{ModelInterface, PredictionResult},
+    },
+    utils::{
+        data_handling::{PeptideData},
+        peptdeep_utils::{ion_mobility_to_ccs_bruker},
     },
-    utils::{data_handling::PeptideData, peptdeep_utils::{load_modifications, ccs_to_mobility_bruker, ion_mobility_to_ccs_bruker}},
 };
-use std::path::PathBuf;
+use std::{path::PathBuf, sync::Arc};
 
-struct PredictionContext {
-    peptides: Vec<String>,
-    mods: Vec<String>,
-    mod_sites: Vec<String>,
+struct PredictionContext<'a> {
+    peptides: Vec<&'a str>,
+    mods: Vec<&'a str>,
+    mod_sites: Vec<&'a str>,
     charges: Vec<i32>,
     observed_ccs: Vec<f32>,
 }
 
-impl PredictionContext {
-    fn new(test_peptides: &Vec<(&str, &str, &str, i32, f32)>) -> Self {
-        let peptides: Vec<String> = test_peptides.iter().map(|(pep, _, _, _, _)| pep.to_string()).collect();
-        let mods: Vec<String> = test_peptides.iter().map(|(_, mod_, _, _, _)| mod_.to_string()).collect();
-        let mod_sites: Vec<String> = test_peptides.iter().map(|(_, _, sites, _, _)| sites.to_string()).collect();
-        let charges: Vec<i32> = test_peptides.iter().map(|(_, _, _, charge, _)| *charge).collect();
-        let observed_ccs: Vec<f32> = test_peptides.iter().map(|(_, _, _, _, ccs)| *ccs).collect();
+impl<'a> PredictionContext<'a> {
+    fn new(test_peptides: &'a [(&'a str, &'a str, &'a str, i32, f32)]) -> Self {
+        let peptides = test_peptides.iter().map(|(pep, _, _, _, _)| *pep).collect();
+        let mods = test_peptides.iter().map(|(_, m, _, _, _)| *m).collect();
+        let mod_sites = test_peptides.iter().map(|(_, _, s, _, _)| *s).collect();
+        let charges = test_peptides.iter().map(|(_, _, _, c, _)| *c).collect();
+        let observed_ccs = test_peptides.iter().map(|(_, _, _, _, ccs)| *ccs).collect();
 
         Self {
             peptides,
@@ -35,90 +38,50 @@ impl PredictionContext {
     }
 }
 
-fn run_prediction(model: &mut CCSCNNLSTMModel, prediction_context: &PredictionContext) -> Result<()> { 
+fn run_prediction(model: &mut CCSCNNLSTMModel, ctx: &PredictionContext) -> Result<()> {
     match model.predict(
-        &prediction_context.peptides,
-        &prediction_context.mods,
-        &prediction_context.mod_sites,
-        Some(prediction_context.charges.clone()), 
+        &ctx.peptides,
+        &ctx.mods,
+        &ctx.mod_sites,
+        Some(ctx.charges.clone()),
         None,
         None,
-    ) {
-        Ok(predictions) => {
-            if let PredictionResult::CCSResult(ccs_preds) = predictions {  
-                let total_error: f32 = ccs_preds
-                    .iter()
-                    .zip(prediction_context.observed_ccs.iter())
-                    .map(|(pred, obs)| (pred - obs).abs())
-                    .sum();
-
-                print_predictions(&prediction_context.peptides, &ccs_preds, &prediction_context.observed_ccs); 
-
-                let mean_absolute_error = total_error / ccs_preds.len() as f32;
-                println!("Mean Absolute Error: {:.6}", mean_absolute_error);
-            } else {
-                println!("Unexpected prediction result type.");
+    )? {
+        PredictionResult::CCSResult(preds) => {
+            let total_error: f32 = preds
+                .iter()
+                .zip(ctx.observed_ccs.iter())
+                .map(|(pred, obs)| (pred - obs).abs())
+                .sum();
+
+            for (pep, pred, obs) in itertools::izip!(&ctx.peptides, &preds, &ctx.observed_ccs) {
+                println!("Peptide: {}, Predicted CCS: {:.4}, Observed CCS: {:.4}", pep, pred, obs);
             }
-        }
-        Err(e) => {
-            println!("Error during batch prediction: {:?}", e);
-        }
-    }
-    Ok(())
-}
 
-fn print_predictions(peptides: &[String], ccs_preds: &[f32], observed_ccs: &[f32]) { // Changed
-    let mut peptides_iter = peptides.iter();
-    let mut ccs_preds_iter = ccs_preds.iter(); // Changed
-    let mut observed_ccs_iter = observed_ccs.iter(); // Changed
-
-    loop {
-        match (peptides_iter.next(), ccs_preds_iter.next(), observed_ccs_iter.next()) {
-            (Some(pep), Some(pred), Some(obs)) => {
-                println!("Peptide: {}, Predicted CCS: {}, Observed CCS: {}", pep, pred, obs); // Changed
-            }
-            _ => break, // Exit the loop if any iterator is exhausted
+            let mae = total_error / preds.len() as f32;
+            println!("Mean Absolute Error: {:.6}", mae);
         }
+        _ => println!("Unexpected prediction result type."),
     }
+    Ok(())
 }
 
 fn main() -> Result<()> {
     let model_path = PathBuf::from("data/models/alphapeptdeep/generic/ccs.pth");
     let constants_path = PathBuf::from("data/models/alphapeptdeep/generic/ccs.pth.model_const.yaml");
-
-    // let device use cuda if available otherwise use cpu
     let device = Device::new_cuda(0).unwrap_or(Device::Cpu);
 
     println!("Device: {:?}", device);
 
-    let mut model = CCSCNNLSTMModel::new(&model_path, Some(&constants_path), 0, 8, 4, true, device)
-        .context("Failed to create CCSCNNLSTMModel")?;
-
-    // Define training data
-    let training_data = vec![
-        PeptideData::new("EHVIIQAEFYLNPDQ", Some(2), None, None, None, Some(1.10), None),
-        PeptideData::new("KTLTGKTITLEVEPS", Some(2), None, None, None, Some(1.04), None),
-        PeptideData::new("SLLAQNTSWLL", Some(1), None, None, None, Some(1.67), None),
-        PeptideData::new("SLQEVAM[+15.9949]FL", Some(1), None, None, None, Some(1.53), None),
-        PeptideData::new("VLADQVWTL", Some(2), None, None, None, Some(0.839), None),
-        PeptideData::new("LLMEPGAMRFL", Some(2), None, None, None, Some(0.949), None),
-        PeptideData::new("SGEIKIAYTYSVS", Some(2), None, None, None, Some(0.974), None),
-        PeptideData::new("HTEIVFARTSPQQKL", Some(2), None, None, None, Some(1.13), None),
-        PeptideData::new("SM[+15.9949]ADIPLGFGV", Some(1), None, None, None, Some(1.59), None),
-        PeptideData::new("KLIDHQGLYL", Some(2), None, None, None, Some(0.937), None),
-    ];
-
-    // Sequence	Monoisotopic Mass (Da)	Charge	m/z
-    // SKEEETSIDVAGKP	1488.7308	2	745.3727
-    // LPILVPSAKKAIYM	1542.9208	2	772.4677
-    // RTPKIQVYSRHPAE	1680.906	3	561.3093
-    // EEVQIDILDTAGQE	1558.7362	2	780.3754
-    // GAPLVKPLPVNPTDPA	1584.8875	2	793.4511
-    // FEDENFILK	1153.5655	2	577.7901
-    // YPSLPAQQV	1001.5182	1	1002.5255
-    // YLPPATQVV	986.5437	2	494.2792
-    // YISPDQLADLYK	1424.7187	2	713.3667
-    // PSIVRLLQCDPSSAGQF	1816.9142	2	909.4644
+    let mut model = CCSCNNLSTMModel::new(
+        &model_path,
+        Some(&constants_path),
+        0,
+        8,
+        4,
+        true,
+        device,
+    )?;
 
     let test_peptides = vec![
         ("SKEEETSIDVAGKP", "", "", 2, ion_mobility_to_ccs_bruker(0.998, 2, 745.3727)),
@@ -133,20 +96,8 @@ fn main() -> Result<()> {
         ("PSIVRLLQCDPSSAGQF", "", "", 2, ion_mobility_to_ccs_bruker(1.10, 2, 909.4644)),
     ];
 
-    let prediction_context = PredictionContext::new(&test_peptides);
-
-    run_prediction(&mut model, &prediction_context)?;
-
-    // Fine-tune the model
-    let modifications = load_modifications().context("Failed to load modifications")?;
-    let learning_rate = 0.001;
-    let epochs = 5;
-    model
-        .fine_tune(&training_data, modifications, 10, learning_rate, epochs)
-        .context("Failed to fine-tune the model")?;
-
-    // Test prediction again with a few peptides after fine-tuning
-    run_prediction(&mut model, &prediction_context)?;
+    let ctx = PredictionContext::new(&test_peptides);
+    run_prediction(&mut model, &ctx)?;
 
     Ok(())
 }
diff --git a/crates/redeem-properties/examples/alphapeptdeep_ms2_bert.rs b/crates/redeem-properties/examples/alphapeptdeep_ms2_bert.rs
index b3ee4b1..cf7b8f7 100644
--- a/crates/redeem-properties/examples/alphapeptdeep_ms2_bert.rs
+++ b/crates/redeem-properties/examples/alphapeptdeep_ms2_bert.rs
@@ -6,245 +6,142 @@ use redeem_properties::{
         model_interface::{ModelInterface, PredictionResult},
         ms2_bert_model::MS2BertModel,
     },
-    utils::{data_handling::PeptideData, peptdeep_utils::{get_modification_indices, get_modification_string, load_modifications, remove_mass_shift, ModificationMap}},
+    utils::{
+        data_handling::{PeptideData, PeptideBatchData},
+        peptdeep_utils::{get_modification_indices, get_modification_string, load_modifications, remove_mass_shift, ModificationMap},
+    },
 };
 use std::{
-    collections::HashMap, fs::File, path::PathBuf
+    collections::HashMap, fs::File, path::PathBuf, sync::Arc
 };
 
-struct PredictionContext {
-    peptides: Vec<String>,
-    naked_peptides: Vec<String>,
-    mods: Vec<String>,
-    mod_sites: Vec<String>,
-    charges: Vec<i32>,
-    nces: Vec<i32>,
-    instruments: Vec<String>,
-    ms2_intensities: Vec<Vec<Vec<f32>>>,
-}
-
-impl PredictionContext {
-    fn new(training_data: &Vec<PeptideData>, modification_map: &HashMap<(String, Option<char>), ModificationMap>) -> Self {
-        let peptides: Vec<String> = training_data.iter().map(|p| p.sequence.clone()).collect();
-
-        let naked_peptides: Vec<String> = training_data.iter().map(|p| remove_mass_shift(&p.sequence)).collect();
-        let naked_peptides: Vec<String> = naked_peptides.iter().map(|p| p.trim_start_matches("-").to_string()).collect();
-
-
-        // Get mod_str with get_modification_string
-        let mod_strs: Vec<String> = training_data.iter().map(|p| get_modification_string(&p.sequence, modification_map)).collect();
-
-        /// Get modification indices with get_modification_indices
-        let mod_sites: Vec<String> = training_data.iter().map(|p| get_modification_indices(&p.sequence)).collect();
-
-
-        let charges: Vec<i32> = training_data.iter().map(|p| p.charge.unwrap()).collect();
-        let nces: Vec<i32> = training_data.iter().map(|p| p.nce.unwrap()).collect();
-        let instruments: Vec<String> = training_data.iter().map(|p| p.instrument.clone().unwrap()).collect();
-        let ms2_intensities: Vec<Vec<Vec<f32>>> = training_data.iter().map(|p| p.ms2_intensities.clone().unwrap()).collect();
-
-        Self {
-            peptides,
-            naked_peptides,
-            mods: mod_strs,
-            mod_sites,
-            charges,
-            nces,
-            instruments,
-            ms2_intensities,
-        }
-    }
-}
-
-fn run_prediction(model: &mut MS2BertModel, prediction_context: &PredictionContext) -> Result<()> { // Changed Model
-    match model.predict(
-        &prediction_context.naked_peptides,
-        &prediction_context.mods,
-        &prediction_context.mod_sites,
-        Some(prediction_context.charges.clone()), 
-        Some(prediction_context.nces.clone()),
-        Some(prediction_context.instruments.clone()),
-    ) {
-        Ok(predictions) => {
-            if let PredictionResult::MS2Result(ms2_preds) = predictions {  
-                let total_error: f32 = ms2_preds
-                    .iter()
-                    .zip(prediction_context.ms2_intensities.iter())
-                    .map(|(outer_pred, outer_obs)| {
-                        outer_pred
-                            .iter()
-                            .zip(outer_obs.iter())
-                            .map(|(inner_pred, inner_obs)| {
-                                inner_pred
-                                    .iter()
-                                    .zip(inner_obs.iter())
-                                    .map(|(pred, obs)| (pred - obs).abs())
-                                    .sum::<f32>() // Sum the innermost differences
-                            })
-                            .sum::<f32>() // Sum the differences from the middle vectors
+fn run_prediction(model: &mut MS2BertModel, batch_data: &[PeptideData]) -> Result<()> {
+    let batch = PeptideBatchData::from(batch_data);
+
+    let peptides = batch.naked_sequence_strs();
+    let mods = batch.mods_strs();
+    let mod_sites = batch.mod_sites_strs();
+
+    let charges = if batch.charges.iter().all(|c| c.is_some()) {
+        Some(batch.charges.iter().map(|c| c.unwrap()).collect())
+    } else {
+        None
+    };
+    let nces = if batch.nces.iter().all(|n| n.is_some()) {
+        Some(batch.nces.iter().map(|n| n.unwrap()).collect())
+    } else {
+        None
+    };
+    let instruments = if batch.instruments.iter().all(|i| i.is_some()) {
+        let flat: Vec<&str> = batch.instrument_strs().into_iter().map(|opt| opt.unwrap()).collect();
+        Some(flat)
+    } else {
+        None
+    };
+
+    let predictions = model.predict(&peptides, &mods, &mod_sites, charges, nces, instruments.as_ref())?;
+
+    if let PredictionResult::MS2Result(ms2_preds) = predictions {
+        let total_error: f32 = ms2_preds
+            .iter()
+            .zip(batch.ms2_intensities.iter())
+            .map(|(pred, obs)| {
+                pred.iter()
+                    .zip(obs.as_ref().unwrap())
+                    .map(|(p_row, o_row)| {
+                        p_row.iter().zip(o_row.iter()).map(|(p, o)| (p - o).abs()).sum::<f32>()
                     })
-                    .sum::<f32>(); // Sum the differences from the outer vectors
-
-
-                print_predictions(&prediction_context.peptides, &ms2_preds, &prediction_context.ms2_intensities); 
-
-                let mean_absolute_error = total_error / ms2_preds.len() as f32;
-                println!("Mean Absolute Error: {:.6}", mean_absolute_error);
-            } else {
-                println!("Unexpected prediction result type.");
-            }
-        }
-        Err(e) => {
-            println!("Error during batch prediction: {:?}", e);
+                    .sum::<f32>()
+            })
+            .sum();
+
+        for (i, peptide) in batch.naked_sequence.iter().enumerate() {
+            let pred_sum: f32 = ms2_preds[i].iter().flatten().sum();
+            let obs_sum: f32 = batch.ms2_intensities[i]
+                .as_ref()
+                .map(|v| v.iter().flatten().sum())
+                .unwrap_or(0.0);
+            println!(
+                "Peptide: {}\n  Predicted Intensity Sum: {:.4}\n  Observed Intensity Sum: {:.4}",
+                std::str::from_utf8(peptide).unwrap_or(""), pred_sum, obs_sum
+            );
         }
-    }
-    Ok(())
-}
 
-fn print_predictions(
-    peptides: &[String],
-    predicted_ms2_intensities: &Vec<Vec<Vec<f32>>>,
-    observed_ms2_intensities: &Vec<Vec<Vec<f32>>>,
-) {
-    let mut peptides_iter = peptides.iter();
-    let mut predicted_iter = predicted_ms2_intensities.iter();
-    let mut observed_iter = observed_ms2_intensities.iter();
-
-    loop {
-        match (
-            peptides_iter.next(),
-            predicted_iter.next(),
-            observed_iter.next(),
-        ) {
-            (Some(pep), Some(predicted), Some(observed)) => {
-                let predicted_sum: f32 = predicted.iter().flat_map(|inner_vec| inner_vec.iter().copied()).sum();
-                let observed_sum: f32 = observed.iter().flat_map(|inner_vec| inner_vec.iter().copied()).sum();
-
-
-                println!("Peptide: {}", pep);
-                println!("  Sum of Predicted Intensities: {:.6}", predicted_sum);
-                println!("  Sum of Observed Intensities: {:.6}", observed_sum);
-            }
-            _ => break, // Exit the loop if any iterator is exhausted
-        }
+        let mean_abs_error = total_error / ms2_preds.len() as f32;
+        println!("Mean Absolute Error: {:.6}", mean_abs_error);
     }
+    Ok(())
 }
 
 fn main() -> Result<()> {
     let model_path = PathBuf::from("data/models/alphapeptdeep/generic/ms2.pth");
     let constants_path = PathBuf::from("data/models/alphapeptdeep/generic/ms2.pth.model_const.yaml");
-
-    // let device use cuda if available otherwise use cpu
     let device = Device::new_cuda(0).unwrap_or(Device::Cpu);
-
     println!("Device: {:?}", device);
 
-    let mut model = MS2BertModel::new(&model_path, Some(&constants_path), 0, 8, 4, true, device)
-        .context("Failed to create MS2BertModel")?;
+    let mut model = MS2BertModel::new(&model_path, Some(&constants_path), 0, 8, 4, true, device)?;
 
-    // Open the CSV file
-    let file_path = "data/predicted_fragment_intensities.csv";
-    let file = File::open(file_path).unwrap();
-
-    // Create a CSV reader
+    let file = File::open("data/predicted_fragment_intensities.csv")?;
     let mut rdr = Reader::from_reader(file);
 
-    // Group fragment intensities by peptide sequence
-    let mut peptide_data_map: HashMap<String, Vec<Vec<f32>>> = HashMap::new();
-    let mut peptide_charges: HashMap<String, i32> = HashMap::new();
-
-    for result in rdr.records() {
-        let record = result.unwrap();
-        let peptide_sequence = &record[0];
-        let precursor_charge: i32 = record[1].parse().unwrap();
-        let fragment_type = &record[2];
-        let fragment_ordinal: usize = record[3].parse().unwrap();
-        let fragment_charge: i32 = record[4].parse().unwrap();
-        let experimental_intensity: f32 = record[6].parse().unwrap();
-
-        // Get naked peptide sequence
-        let naked_peptide = remove_mass_shift(peptide_sequence);
-
-        // Get length of the peptide sequence
-        let peptide_len = naked_peptide.len() - 1;
-
-        // Initialize the peptide's intensity matrix if it doesn't exist
-        peptide_data_map
-            .entry(peptide_sequence.to_string())
-            .or_insert_with(|| vec![vec![0.0; 8]; peptide_len]); // Initialize with enough rows
-
-        // Update the peptide's charge
-        peptide_charges.insert(peptide_sequence.to_string(), precursor_charge);
-
-        // Determine the column index based on fragment type and charge
-        let col = match (fragment_type, fragment_charge) {
-            ("B", 1) => 0, // b_z1
-            ("B", 2) => 1, // b_z2
-            ("Y", 1) => 2, // y_z1
-            ("Y", 2) => 3, // y_z2
-            _ => continue, // Skip unsupported fragment types or charges
-        };
-
-        // Update the MS2 intensities matrix
-        let row = peptide_len - 1; // Convert to zero-based index
-        peptide_data_map
-            .get_mut(peptide_sequence)
-            .unwrap()
-            .resize(row + 1, vec![0.0; 8]); // Ensure the matrix has enough rows
-        peptide_data_map.get_mut(peptide_sequence).unwrap()[row][col] = experimental_intensity;
-    }
-
-    // Create PeptideData instances for each peptide
-    let mut training_data: Vec<PeptideData> = Vec::new();
-
-    for (sequence, ms2_intensities) in peptide_data_map {
-        let charge = peptide_charges.get(&sequence).copied();
-        let peptide_data = PeptideData::new(
-            &sequence,
-            charge,
-            Some(20), // Example NCE
-            Some("QE"), // Example instrument
-            None, // Retention time
-            None, // Ion mobility
-            Some(ms2_intensities), // MS2 intensities
-        );
-        training_data.push(peptide_data);
-    }
-
-    println!("Loaded {} peptides from the CSV file.", training_data.len());
-
-    // Create the prediction context using the training data
-    let modifications = load_modifications().context("Failed to load modifications")?;
-    let prediction_context = PredictionContext::new(&training_data, &modifications);
-
-    // Run prediction using the training data as the test data
-    let result = run_prediction(&mut model, &prediction_context, );
-
-    match result {
-        Ok(_) => println!("Ran prediction successfully."),
-        Err(e) => println!("Failed to run prediction: {:?}", e),
+    let mut data_map: HashMap<String, Vec<Vec<f32>>> = HashMap::new();
+    let mut charge_map: HashMap<String, i32> = HashMap::new();
+
+    for rec in rdr.records() {
+        let rec = rec?;
+        let seq = &rec[0];
+        let charge: i32 = rec[1].parse()?;
+        let ftype = &rec[2];
+        let idx: usize = rec[3].parse()?;
+        let fz: i32 = rec[4].parse()?;
+        let intensity: f32 = rec[6].parse()?;
+
+        let naked = remove_mass_shift(seq);
+        let len = naked.len().saturating_sub(1);
+        charge_map.insert(seq.clone().to_string(), charge);
+
+        let entry = data_map.entry(seq.to_string()).or_insert_with(|| vec![vec![0.0; 8]; len]);
+        if let Some(row) = entry.get_mut(idx.saturating_sub(1)) {
+            let col = match (ftype, fz) {
+                ("B", 1) => 0,
+                ("B", 2) => 1,
+                ("Y", 1) => 2,
+                ("Y", 2) => 3,
+                _ => continue,
+            };
+            row[col] = intensity;
+        }
     }
 
-    // Fine-tune the model
-    
-    let learning_rate = 0.001;
-    let epochs = 5;
-    let result = model
-        .fine_tune(&training_data, modifications, 3, learning_rate, epochs)
-        .context("Failed to fine-tune the model");
-
-    match result {
-        Ok(_) => println!("Model fine-tuned successfully."),
-        Err(e) => println!("Failed to fine-tune model: {:?}", e),
+    let modifications = load_modifications()?;
+    let mut training_data = Vec::new();
+
+    for (mod_seq, ms2) in data_map {
+        let naked = remove_mass_shift(&mod_seq).trim_start_matches('-').to_string();
+        let mods = get_modification_string(&mod_seq, &modifications);
+        let mod_sites = get_modification_indices(&mod_seq);
+
+        training_data.push(PeptideData::new(
+            &mod_seq,
+            &naked,
+            &mods,
+            &mod_sites,
+            charge_map.get(&mod_seq).copied(),
+            None,
+            Some(20),
+            Some("QE"),
+            None,
+            None,
+            None,
+            Some(ms2),
+        ));
     }
 
-    // Test prediction again with a few peptides after fine-tuning
-    let result = run_prediction(&mut model, &prediction_context);
+    println!("Loaded {} peptides.", training_data.len());
+    run_prediction(&mut model, &training_data)?;
 
-    match result {
-        Ok(_) => println!("Ran prediction successfully."),
-        Err(e) => println!("Failed to run prediction: {:?}", e),
-    }
+    model.fine_tune(&training_data, modifications, 3, 0.001, 5)?;
+    run_prediction(&mut model, &training_data)?;
 
     Ok(())
 }
diff --git a/crates/redeem-properties/examples/alphapeptdeep_rt_cnn_lstm.rs b/crates/redeem-properties/examples/alphapeptdeep_rt_cnn_lstm.rs
index 7408473..f206ad7 100644
--- a/crates/redeem-properties/examples/alphapeptdeep_rt_cnn_lstm.rs
+++ b/crates/redeem-properties/examples/alphapeptdeep_rt_cnn_lstm.rs
@@ -5,32 +5,27 @@ use redeem_properties::{
         model_interface::{ModelInterface, PredictionResult},
         rt_cnn_lstm_model::RTCNNLSTMModel,
     },
-    utils::{data_handling::PeptideData, peptdeep_utils::load_modifications},
+    utils::{
+        data_handling::{PeptideData, PeptideBatchData},
+        peptdeep_utils::{load_modifications, remove_mass_shift, get_modification_string, get_modification_indices},
+    },
 };
 use std::path::PathBuf;
+use std::sync::Arc;
 
 struct PredictionContext {
-    peptides: Vec<String>,
-    mods: Vec<String>,
-    mod_sites: Vec<String>,
+    peptides: Vec<&'static str>,
+    mods: Vec<&'static str>,
+    mod_sites: Vec<&'static str>,
     observed_rts: Vec<f32>,
 }
 
 impl PredictionContext {
-    fn new(test_peptides: &Vec<(&str, &str, &str, f32)>) -> Self {
-        let peptides: Vec<String> = test_peptides
-            .iter()
-            .map(|(pep, _, _, _)| pep.to_string())
-            .collect();
-        let mods: Vec<String> = test_peptides
-            .iter()
-            .map(|(_, mod_, _, _)| mod_.to_string())
-            .collect();
-        let mod_sites: Vec<String> = test_peptides
-            .iter()
-            .map(|(_, _, sites, _)| sites.to_string())
-            .collect();
-        let observed_rts: Vec<f32> = test_peptides.iter().map(|(_, _, _, rt)| *rt).collect();
+    fn new(test_peptides: &[(&'static str, &'static str, &'static str, f32)]) -> Self {
+        let peptides = test_peptides.iter().map(|(pep, _, _, _)| *pep).collect();
+        let mods = test_peptides.iter().map(|(_, m, _, _)| *m).collect();
+        let mod_sites = test_peptides.iter().map(|(_, _, site, _)| *site).collect();
+        let observed_rts = test_peptides.iter().map(|(_, _, _, rt)| *rt).collect();
 
         Self {
             peptides,
@@ -41,74 +36,43 @@ impl PredictionContext {
     }
 }
 
-fn run_prediction(
-    model: &mut RTCNNLSTMModel,
-    prediction_context: &PredictionContext,
-) -> Result<()> {
+fn run_prediction(model: &mut RTCNNLSTMModel, context: &PredictionContext) -> Result<()> {
     match model.predict(
-        &prediction_context.peptides,
-        &prediction_context.mods,
-        &prediction_context.mod_sites,
+        &context.peptides,
+        &context.mods,
+        &context.mod_sites,
         None,
         None,
         None,
     ) {
-        Ok(predictions) => {
-            if let PredictionResult::RTResult(rt_preds) = predictions {
+        Ok(preds) => {
+            if let PredictionResult::RTResult(rt_preds) = preds {
                 let total_error: f32 = rt_preds
                     .iter()
-                    .zip(prediction_context.observed_rts.iter())
-                    .map(|(pred, obs)| (pred - obs).abs())
+                    .zip(&context.observed_rts)
+                    .map(|(p, o)| (p - o).abs())
                     .sum();
 
-                print_predictions(
-                    &prediction_context.peptides,
-                    &rt_preds,
-                    &prediction_context.observed_rts,
-                );
+                for ((pep, pred), obs) in context.peptides.iter().zip(rt_preds.iter()).zip(&context.observed_rts) {
+                    println!("Peptide: {}, Predicted RT: {:.6}, Observed RT: {:.6}", pep, pred, obs);
+                }
 
-                let mean_absolute_error = total_error / rt_preds.len() as f32;
-                println!("Mean Absolute Error: {:.6}", mean_absolute_error);
-            } else {
-                println!("Unexpected prediction result type.");
-            }
-        }
-        Err(e) => {
-            println!("Error during batch prediction: {:?}", e);
-        }
-    }
-    Ok(())
-}
-
-fn print_predictions(peptides: &[String], rt_preds: &[f32], observed_rts: &[f32]) {
-    let mut peptides_iter = peptides.iter();
-    let mut rt_preds_iter = rt_preds.iter();
-    let mut observed_rts_iter = observed_rts.iter();
-
-    loop {
-        match (
-            peptides_iter.next(),
-            rt_preds_iter.next(),
-            observed_rts_iter.next(),
-        ) {
-            (Some(pep), Some(pred), Some(obs)) => {
                 println!(
-                    "Peptide: {}, Predicted RT: {}, Observed RT: {}",
-                    pep, pred, obs
+                    "Mean Absolute Error: {:.6}",
+                    total_error / rt_preds.len() as f32
                 );
             }
-            _ => break, // Exit the loop if any iterator is exhausted
         }
+        Err(e) => println!("Prediction error: {e}"),
     }
+    Ok(())
 }
 
 fn main() -> Result<()> {
     env_logger::init();
-    // let model_path = PathBuf::from("data/models/alphapeptdeep/generic/rt.pth");
+
     let model_path = PathBuf::from("/home/singjc/Documents/github/redeem/rt_fine_tuned.safetensors");
     let constants_path = PathBuf::from("/home/singjc/Documents/github/redeem/crates/redeem-properties/data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
-
-    // let device use cuda if available otherwise use cpu
     let device = Device::new_cuda(0).unwrap_or(Device::Cpu);
 
     println!("Device: {:?}", device);
@@ -116,142 +80,57 @@ fn main() -> Result<()> {
     let mut model = RTCNNLSTMModel::new(&model_path, Some(&constants_path), 0, 8, 4, true, device)
         .context("Failed to create RTCNNLSTMModel")?;
 
-    // Define training data
+    let modifications = load_modifications().context("Failed to load modifications")?;
+
     let training_data: Vec<PeptideData> = vec![
-        PeptideData::new("AKPLMELIER", None, None, None, Some(0.4231399), None, None),
+        "AKPLMELIER",
+        "TEM[+15.9949]VTISDASQR",
+        "AGKFPSLLTHNENMVAK",
+        "LSELDDRADALQAGASQFETSAAK",
+        "FLLQDTVELR",
+        "SVTEQGAELSNEER",
+        "EHALLAYTLGVK",
+        "TVQSLEIDLDSM[+15.9949]R",
+        "VVSQYSSLLSPMSVNAVM[+15.9949]K",
+        "TFLALINQVFPAEEDSKK",
+    ]
+    .into_iter()
+    .enumerate()
+    .map(|(i, seq)| {
+        let naked = remove_mass_shift(seq);
+        let mods = get_modification_string(seq, &modifications);
+        let sites = get_modification_indices(seq);
         PeptideData::new(
-            "TEM[+15.9949]VTISDASQR",
+            seq,
+            &naked,
+            &mods,
+            &sites,
             None,
             None,
             None,
-            Some(0.2192762),
             None,
-            None,
-        ),
-        PeptideData::new(
-            "AGKFPSLLTHNENMVAK",
-            None,
-            None,
-            None,
-            Some(0.3343900),
-            None,
-            None,
-        ),
-        PeptideData::new(
-            "LSELDDRADALQAGASQFETSAAK",
-            None,
-            None,
-            None,
-            Some(0.5286755),
-            None,
-            None,
-        ),
-        PeptideData::new("FLLQDTVELR", None, None, None, Some(0.6522490), None, None),
-        PeptideData::new(
-            "SVTEQGAELSNEER",
+            Some(i as f32 / 10.0),
             None,
             None,
             None,
-            Some(0.2388270),
-            None,
-            None,
-        ),
-        PeptideData::new(
-            "EHALLAYTLGVK",
-            None,
-            None,
-            None,
-            Some(0.5360210),
-            None,
-            None,
-        ),
-        PeptideData::new(
-            "TVQSLEIDLDSM[+15.9949]R",
-            None,
-            None,
-            None,
-            Some(0.5787880),
-            None,
-            None,
-        ),
-        PeptideData::new(
-            "VVSQYSSLLSPMSVNAVM[+15.9949]K",
-            None,
-            None,
-            None,
-            Some(0.6726230),
-            None,
-            None,
-        ),
-        PeptideData::new(
-            "TFLALINQVFPAEEDSKK",
-            None,
-            None,
-            None,
-            Some(0.8345350),
-            None,
-            None,
-        ),
-    ];
+        )
+    })
+    .collect();
 
-    // Test prediction data
     let test_peptides_data = vec![
         ("QPYAVSELAGHQTSAESWGTGR", "", "", 0.4328955),
         ("GMSVSDLADKLSTDDLNSLIAHAHR", "Oxidation@M", "1", 0.6536107),
-        (
-            "TVQHHVLFTDNMVLICR",
-            "Oxidation@M;Carbamidomethyl@C",
-            "11;15",
-            0.7811949,
-        ),
+        ("TVQHHVLFTDNMVLICR", "Oxidation@M;Carbamidomethyl@C", "11;15", 0.7811949),
         ("EAELDVNEELDKK", "", "", 0.2934583),
         ("YTPVQQGPVGVNVTYGGDPIPK", "", "", 0.5863009),
-        ("YYAIDFTLDEIK", "", "", 0.8048359),
-        ("VSSLQAEPLPR", "", "", 0.3201348),
-        (
-            "NHAVVCQGCHNAIDPEVQR",
-            "Carbamidomethyl@C;Carbamidomethyl@C",
-            "5;8",
-            0.1730425,
-        ),
-        ("IPNIYAIGDVVAGPMLAHK", "", "", 0.8220097),
-        ("AELGIPLEEVPPEEINYLTR", "", "", 0.8956433),
-        ("NESTPPSEELELDKWK", "", "", 0.4471560),
-        ("SIQEIQELDKDDESLR", "", "", 0.4157068),
-        ("EMEENFAVEAANYQDTIGR", "Oxidation@M", "1", 0.6388353),
-        ("MDSFDEDLARPSGLLAQER", "Oxidation@M", "0", 0.5593624),
-        ("SLLTEADAGHTEFTDEVYQNESR", "", "", 0.5538696),
-        ("NQDLAPNSAEQASILSLVTK", "", "", 0.7682227),
-        ("GKVEEVELPVEK", "", "", 0.2943246),
-        ("IYVASVHQDLSDDDIK", "", "", 0.3847130),
-        ("IKGDMDISVPK", "", "", 0.2844255),
-        ("IIPVLLEHGLER", "", "", 0.5619017),
-        ("AGYTDKVVIGMDVAASEFFR", "", "", 0.8972052),
-        ("TDYNASVSVPDSSGPER", "", "", 0.3279318),
-        ("DLKPQNLLINTEGAIK", "", "", 0.6046495),
-        ("VAEAIAASFGSFADFK", "", "", 0.8935943),
-        ("AMVSNAQLDNEK", "Oxidation@M", "1", 0.1724159),
-        ("THINIVVIGHVDSGK", "", "", 0.4865058),
-        ("LILPHVDIQLK", "", "", 0.6268850),
-        ("LIAPVAEEEATVPNNK", "", "", 0.4162872),
-        ("FTASAGIQVVGDDLTVTNPK", "", "", 0.7251064),
-        ("HEDLKDMLEFPAQELR", "", "", 0.6529368),
-        ("LLPDFLLER", "", "", 0.7852863),
     ];
 
     let prediction_context = PredictionContext::new(&test_peptides_data);
 
     run_prediction(&mut model, &prediction_context)?;
 
-    // Fine-tune the model
-    let modifications = load_modifications().context("Failed to load modifications")?;
-    let learning_rate = 0.001;
-    let epochs = 5;
-    model
-        .fine_tune(&training_data, modifications, 10, learning_rate, epochs)
-        .context("Failed to fine-tune the model")?;
+    model.fine_tune(&training_data, modifications, 10, 0.001, 5)?;
 
-    // Test prediction again with a few peptides after fine-tuning
     run_prediction(&mut model, &prediction_context)?;
 
     model.save("alphapeptdeep_rt_cnn_lstm_finetuned.safetensors")?;
diff --git a/crates/redeem-properties/src/models/ccs_cnn_lstm_model.rs b/crates/redeem-properties/src/models/ccs_cnn_lstm_model.rs
index fa8489a..58b08cb 100644
--- a/crates/redeem-properties/src/models/ccs_cnn_lstm_model.rs
+++ b/crates/redeem-properties/src/models/ccs_cnn_lstm_model.rs
@@ -335,9 +335,9 @@ mod tests {
         let device = Device::Cpu;
         let model = CCSCNNLSTMModel::new(model_path, Some(constants_path), 0, 8, 4, true, device).unwrap();
 
-        let peptide_sequences = vec!["AGHCEWQMKYR".to_string(), "AGHCEWQMKYR".to_string()];
-        let mods = vec!["Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M".to_string(), "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M".to_string()];
-        let mod_sites = vec!["0;4;8".to_string(), "0;4;8".to_string()];
+        let peptide_sequences = vec!["AGHCEWQMKYR", "AGHCEWQMKYR"];
+        let mods = vec!["Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M", "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M"];
+        let mod_sites = vec!["0;4;8", "0;4;8"];
         let charge = Some(vec![2, 2]);
 
         let result = model.predict(&peptide_sequences, &mods, &mod_sites, charge, None, None);
diff --git a/crates/redeem-properties/src/models/ccs_model.rs b/crates/redeem-properties/src/models/ccs_model.rs
index 43b5c87..a9f667c 100644
--- a/crates/redeem-properties/src/models/ccs_model.rs
+++ b/crates/redeem-properties/src/models/ccs_model.rs
@@ -42,7 +42,7 @@ impl CCSModelWrapper {
         Ok(Self { model })
     }
 
-    pub fn predict(&self, peptide_sequence: &[String], mods: &[String], mod_sites: &[String], charge: Vec<i32>) -> Result<PredictionResult> {
+    pub fn predict(&self, peptide_sequence: &Vec<&str>, mods: &Vec<&str>, mod_sites: &Vec<&str>, charge: Vec<i32>) -> Result<PredictionResult> {
         self.model.predict(peptide_sequence, mods, mod_sites, Some(charge), None, None)
     }
 
diff --git a/crates/redeem-properties/src/models/model_interface.rs b/crates/redeem-properties/src/models/model_interface.rs
index 5f41e76..8d67d51 100644
--- a/crates/redeem-properties/src/models/model_interface.rs
+++ b/crates/redeem-properties/src/models/model_interface.rs
@@ -2,7 +2,7 @@ use crate::{
     building_blocks::featurize::{self, aa_indices_tensor, get_mod_features_from_parsed},
     models::{ccs_model::CCSModelWrapper, ms2_model::MS2ModelWrapper, rt_model::RTModelWrapper},
     utils::{
-        data_handling::{PeptideData, RTNormalization}, logging::Progress, peptdeep_utils::{
+        data_handling::{PeptideBatchData, PeptideData, RTNormalization}, logging::Progress, peptdeep_utils::{
             get_modification_indices, get_modification_string, parse_instrument_index,
             remove_mass_shift,
         }, stats::{compute_loss_stats, Metrics, TrainingPhase, TrainingStepMetrics}, utils::{get_tensor_stats, CosineWithWarmup, LRScheduler}
@@ -242,16 +242,16 @@ pub trait ModelInterface: Send + Sync + ModelClone {
     ///    A vector of predicted retention times.
     fn predict(
         &self,
-        peptide_sequences: &[String],
-        mods: &[String],
-        mod_sites: &[String],
+        peptide_sequences: &Vec<&str>,
+        mods: &Vec<&str>,
+        mod_sites: &Vec<&str>,
         charge: Option<Vec<i32>>,
         nce: Option<Vec<i32>>,
-        instrument: Option<Vec<String>>,
+        instrument: Option<&Vec<&str>>,
     ) -> Result<PredictionResult> {
         // Encode the batch of peptides
         let input_tensor = self
-            .encode_peptides(peptide_sequences, mods, mod_sites, charge, nce, instrument)?
+            .encode_peptides(peptide_sequences, mods, mod_sites, charge, nce, instrument.cloned())?
             .to_device(self.get_device())?;
 
         // Forward pass through the model
@@ -294,7 +294,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         let mod_to_feature = self.get_mod_to_feature();
 
         log::trace!("[ModelInterface::encode_peptide] peptide_sequence: {} | mods: {} | mod_sites: {} | charge: {:?} | nce: {:?} | instrument: {:?}", peptide_sequence, mods, mod_sites, charge, nce, instrument);
-        
+
         let aa_tensor = aa_indices_tensor(peptide_sequence, device)?;
 
         let (batch_size, seq_len, _) = aa_tensor.shape().dims3()?;
@@ -367,12 +367,12 @@ pub trait ModelInterface: Send + Sync + ModelClone {
     /// Encode a batch of peptide sequences into a tensor
     fn encode_peptides(
         &self,
-        peptide_sequences: &[String],
-        mods: &[String],
-        mod_sites: &[String],
+        peptide_sequences: &Vec<&str>,
+        mods: &Vec<&str>,
+        mod_sites: &Vec<&str>,
         charges: Option<Vec<i32>>,
         nces: Option<Vec<i32>>,
-        instruments: Option<Vec<String>>,
+        instruments: Option<Vec<&str>,>,
     ) -> Result<Tensor> {
         let len = peptide_sequences.len();
 
@@ -385,7 +385,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                     &mod_sites[i],
                     charges.as_ref().map(|v| v[i]),
                     nces.as_ref().map(|v| v[i]),
-                    instruments.as_ref().map(|v| v[i].as_str()),
+                    instruments.as_ref().map(|v| v[i]),
                 )
             })
             .collect::<Result<Vec<_>>>()?;
@@ -715,166 +715,167 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         learning_rate: f64,
         epochs: usize,
     ) -> Result<()> {
-        let num_batches = if training_data.len() < batch_size {
-            1
-        } else {
-            let full_batches = training_data.len() / batch_size;
-            if training_data.len() % batch_size > 0 {
-                full_batches + 1
-            } else {
-                full_batches
-            }
-        };
-
-        info!(
-            "Fine-tuning {} model on {} peptide features ({} batches) for {} epochs",
-            self.get_model_arch(),
-            training_data.len(),
-            num_batches,
-            epochs
-        );
-
-        let params = candle_nn::ParamsAdamW {
-            lr: learning_rate,
-            ..Default::default()
-        };
-        let mut opt = candle_nn::AdamW::new(self.get_mut_varmap().all_vars(), params)?;
-
-        for epoch in 0..epochs {
-            let progress = Progress::new(num_batches, &format!("[fine-tuning] Epoch {}: ", epoch));
-            let mut total_loss = 0.0;
-
-            for batch_idx in 0..num_batches {
-                let start = batch_idx * batch_size;
-                let end = (start + batch_size).min(training_data.len());
-                let batch_data = &training_data[start..end];
-
-                let peptides: Vec<String> = batch_data
-                    .iter()
-                    .map(|p| remove_mass_shift(&p.sequence))
-                    .collect();
-                let mods: Vec<String> = batch_data
-                    .iter()
-                    .map(|p| get_modification_string(&p.sequence, &modifications))
-                    .collect();
-                let mod_sites: Vec<String> = batch_data
-                    .iter()
-                    .map(|p| get_modification_indices(&p.sequence))
-                    .collect();
-
-                let charges = batch_data
-                    .iter()
-                    .filter_map(|p| p.charge)
-                    .collect::<Vec<_>>();
-                let charges = if charges.len() == batch_data.len() {
-                    Some(charges)
-                } else {
-                    None
-                };
-
-                let nces = batch_data.iter().filter_map(|p| p.nce).collect::<Vec<_>>();
-                let nces = if nces.len() == batch_data.len() {
-                    Some(nces)
-                } else {
-                    None
-                };
-
-                let instruments = batch_data
-                    .iter()
-                    .filter_map(|p| p.instrument.clone())
-                    .collect::<Vec<_>>();
-                let instruments = if instruments.len() == batch_data.len() {
-                    Some(instruments)
-                } else {
-                    None
-                };
-
-                let input_batch = self
-                    .encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments)?
-                    .to_device(self.get_device())?;
-
-                log::trace!(
-                    "[ModelInterface::fine_tune] input_batch shape: {:?}, device: {:?}",
-                    input_batch.shape(),
-                    input_batch.device()
-                );
-
-                let batch_targets = match self.property_type() {
-                    PropertyType::RT => PredictionResult::RTResult(
-                        batch_data
-                            .iter()
-                            .map(|p| p.retention_time.unwrap_or_default())
-                            .collect(),
-                    ),
-                    PropertyType::CCS => PredictionResult::CCSResult(
-                        batch_data
-                            .iter()
-                            .map(|p| p.ion_mobility.unwrap_or_default())
-                            .collect(),
-                    ),
-                    PropertyType::MS2 => PredictionResult::MS2Result(
-                        batch_data
-                            .iter()
-                            .map(|p| p.ms2_intensities.clone().unwrap_or_default())
-                            .collect(),
-                    ),
-                };
-
-                let target_batch = match batch_targets {
-                    PredictionResult::RTResult(ref values)
-                    | PredictionResult::CCSResult(ref values) => {
-                        Tensor::new(values.clone(), &self.get_device())?
-                    }
-                    PredictionResult::MS2Result(ref spectra) => {
-                        let max_len = spectra.iter().map(|s| s.len()).max().unwrap_or(1);
-                        let feature_dim = spectra
-                            .get(0)
-                            .and_then(|s| s.get(0))
-                            .map(|v| v.len())
-                            .unwrap_or(1);
-                        let mut padded_spectra = spectra.clone();
-                        for s in &mut padded_spectra {
-                            s.resize(max_len, vec![0.0; feature_dim]);
-                        }
-                        Tensor::new(padded_spectra.concat(), &self.get_device())?.reshape((
-                            batch_data.len(),
-                            max_len,
-                            feature_dim,
-                        ))?
-                    }
-                }
-                .to_device(self.get_device())?;
-
-                let predicted = self.forward(&input_batch)?;
-                let loss = candle_nn::loss::mse(&predicted, &target_batch)?;
-                opt.backward_step(&loss)?;
-
-                total_loss += loss.to_vec0::<f32>().unwrap_or(990.0);
-
-                progress.update_description(&format!(
-                    "[fine-tuning] Epoch {}: Loss: {}",
-                    epoch,
-                    loss.to_vec0::<f32>()?
-                ));
-                progress.inc();
-            }
-
-            let avg_loss = total_loss / num_batches as f32;
-            progress.update_description(&format!(
-                "[fine-tuning] Epoch {}: Avg. Batch Loss: {}",
-                epoch, avg_loss
-            ));
-            progress.finish();
-        }
-
-        Ok(())
+        // let num_batches = if training_data.len() < batch_size {
+        //     1
+        // } else {
+        //     let full_batches = training_data.len() / batch_size;
+        //     if training_data.len() % batch_size > 0 {
+        //         full_batches + 1
+        //     } else {
+        //         full_batches
+        //     }
+        // };
+
+        // info!(
+        //     "Fine-tuning {} model on {} peptide features ({} batches) for {} epochs",
+        //     self.get_model_arch(),
+        //     training_data.len(),
+        //     num_batches,
+        //     epochs
+        // );
+
+        // let params = candle_nn::ParamsAdamW {
+        //     lr: learning_rate,
+        //     ..Default::default()
+        // };
+        // let mut opt = candle_nn::AdamW::new(self.get_mut_varmap().all_vars(), params)?;
+
+        // for epoch in 0..epochs {
+        //     let progress = Progress::new(num_batches, &format!("[fine-tuning] Epoch {}: ", epoch));
+        //     let mut total_loss = 0.0;
+
+        //     for batch_idx in 0..num_batches {
+        //         let start = batch_idx * batch_size;
+        //         let end = (start + batch_size).min(training_data.len());
+        //         let batch_data = &training_data[start..end];
+
+        //         let peptides: Vec<String> = batch_data
+        //             .iter()
+        //             .map(|p| remove_mass_shift(&p.sequence))
+        //             .collect();
+        //         let mods: Vec<String> = batch_data
+        //             .iter()
+        //             .map(|p| get_modification_string(&p.sequence, &modifications))
+        //             .collect();
+        //         let mod_sites: Vec<String> = batch_data
+        //             .iter()
+        //             .map(|p| get_modification_indices(&p.sequence))
+        //             .collect();
+
+        //         let charges = batch_data
+        //             .iter()
+        //             .filter_map(|p| p.charge)
+        //             .collect::<Vec<_>>();
+        //         let charges = if charges.len() == batch_data.len() {
+        //             Some(charges)
+        //         } else {
+        //             None
+        //         };
+
+        //         let nces = batch_data.iter().filter_map(|p| p.nce).collect::<Vec<_>>();
+        //         let nces = if nces.len() == batch_data.len() {
+        //             Some(nces)
+        //         } else {
+        //             None
+        //         };
+
+        //         let instruments = batch_data
+        //             .iter()
+        //             .filter_map(|p| p.instrument.clone())
+        //             .collect::<Vec<_>>();
+        //         let instruments = if instruments.len() == batch_data.len() {
+        //             Some(instruments)
+        //         } else {
+        //             None
+        //         };
+
+        //         let input_batch = self
+        //             .encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments)?
+        //             .to_device(self.get_device())?;
+
+        //         log::trace!(
+        //             "[ModelInterface::fine_tune] input_batch shape: {:?}, device: {:?}",
+        //             input_batch.shape(),
+        //             input_batch.device()
+        //         );
+
+        //         let batch_targets = match self.property_type() {
+        //             PropertyType::RT => PredictionResult::RTResult(
+        //                 batch_data
+        //                     .iter()
+        //                     .map(|p| p.retention_time.unwrap_or_default())
+        //                     .collect(),
+        //             ),
+        //             PropertyType::CCS => PredictionResult::CCSResult(
+        //                 batch_data
+        //                     .iter()
+        //                     .map(|p| p.ion_mobility.unwrap_or_default())
+        //                     .collect(),
+        //             ),
+        //             PropertyType::MS2 => PredictionResult::MS2Result(
+        //                 batch_data
+        //                     .iter()
+        //                     .map(|p| p.ms2_intensities.clone().unwrap_or_default())
+        //                     .collect(),
+        //             ),
+        //         };
+
+        //         let target_batch = match batch_targets {
+        //             PredictionResult::RTResult(ref values)
+        //             | PredictionResult::CCSResult(ref values) => {
+        //                 Tensor::new(values.clone(), &self.get_device())?
+        //             }
+        //             PredictionResult::MS2Result(ref spectra) => {
+        //                 let max_len = spectra.iter().map(|s| s.len()).max().unwrap_or(1);
+        //                 let feature_dim = spectra
+        //                     .get(0)
+        //                     .and_then(|s| s.get(0))
+        //                     .map(|v| v.len())
+        //                     .unwrap_or(1);
+        //                 let mut padded_spectra = spectra.clone();
+        //                 for s in &mut padded_spectra {
+        //                     s.resize(max_len, vec![0.0; feature_dim]);
+        //                 }
+        //                 Tensor::new(padded_spectra.concat(), &self.get_device())?.reshape((
+        //                     batch_data.len(),
+        //                     max_len,
+        //                     feature_dim,
+        //                 ))?
+        //             }
+        //         }
+        //         .to_device(self.get_device())?;
+
+        //         let predicted = self.forward(&input_batch)?;
+        //         let loss = candle_nn::loss::mse(&predicted, &target_batch)?;
+        //         opt.backward_step(&loss)?;
+
+        //         total_loss += loss.to_vec0::<f32>().unwrap_or(990.0);
+
+        //         progress.update_description(&format!(
+        //             "[fine-tuning] Epoch {}: Loss: {}",
+        //             epoch,
+        //             loss.to_vec0::<f32>()?
+        //         ));
+        //         progress.inc();
+        //     }
+
+        //     let avg_loss = total_loss / num_batches as f32;
+        //     progress.update_description(&format!(
+        //         "[fine-tuning] Epoch {}: Avg. Batch Loss: {}",
+        //         epoch, avg_loss
+        //     ));
+        //     progress.finish();
+        // }
+
+        // Ok(())
+        todo!()
     }
 
     fn inference(
         &self,
         inference_data: &Vec<PeptideData>,
         batch_size: usize,
-        modifications: HashMap<
+        _modifications: HashMap<
             (String, Option<char>),
             crate::utils::peptdeep_utils::ModificationMap,
         >,
@@ -895,49 +896,33 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             .enumerate()
             .map(|(batch_idx, batch_data)| {
                 let start_idx = batch_idx * batch_size;
+                let batch: PeptideBatchData = batch_data.into();
     
-                let peptides: Vec<String> = batch_data
-                    .iter()
-                    .map(|p| remove_mass_shift(&p.sequence))
-                    .collect();
-                let mods: Vec<String> = batch_data
-                    .iter()
-                    .map(|p| get_modification_string(&p.sequence, &modifications))
-                    .collect();
-                let mod_sites: Vec<String> = batch_data
-                    .iter()
-                    .map(|p| get_modification_indices(&p.sequence))
-                    .collect();
+                let naked_sequences = batch.naked_sequence_strs();
+                let mods = batch.mods_strs();
+                let mod_sites = batch.mod_sites_strs();
     
-                let charges = batch_data
-                    .iter()
-                    .filter_map(|p| p.charge)
-                    .collect::<Vec<_>>();
-                let charges = if charges.len() == batch_data.len() {
-                    Some(charges)
+                let charges = if batch.charges.iter().all(|c| c.is_some()) {
+                    Some(batch.charges.iter().map(|c| c.unwrap()).collect::<Vec<_>>())
                 } else {
                     None
                 };
     
-                let nces = batch_data.iter().filter_map(|p| p.nce).collect::<Vec<_>>();
-                let nces = if nces.len() == batch_data.len() {
-                    Some(nces)
+                let nces = if batch.nces.iter().all(|n| n.is_some()) {
+                    Some(batch.nces.iter().map(|n| n.unwrap()).collect::<Vec<_>>())
                 } else {
                     None
                 };
     
-                let instruments = batch_data
-                    .iter()
-                    .filter_map(|p| p.instrument.clone())
-                    .collect::<Vec<_>>();
-                let instruments = if instruments.len() == batch_data.len() {
-                    Some(instruments)
+                let instruments = if batch.instruments.iter().all(|i| i.is_some()) {
+                    let flat: Vec<&str> = batch.instrument_strs().into_iter().map(|opt| opt.unwrap()).collect();
+                    Some(flat)
                 } else {
                     None
                 };
     
                 let input_tensor = self
-                    .encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments)?
+                    .encode_peptides(&naked_sequences, &mods, &mod_sites, charges, nces, instruments)?
                     .to_device(self.get_device())?;
                 let output = self.forward(&input_tensor)?;
     
@@ -981,34 +966,66 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         progress.finish();
         Ok(result.into_iter().flatten().collect())
     }
+    
 
     /// Extract encoded input and target tensor for a batch of peptides.
     fn prepare_batch_inputs(
         &self,
         batch_data: &[PeptideData],
-        modifications: &HashMap<(String, Option<char>), crate::utils::peptdeep_utils::ModificationMap>,
+        _modifications: &HashMap<(String, Option<char>), crate::utils::peptdeep_utils::ModificationMap>,
     ) -> Result<(Tensor, Tensor)> {
-        let peptides: Vec<String> = batch_data.par_iter().map(|p| remove_mass_shift(&p.sequence)).collect();
+        use rayon::prelude::*;
 
-        let mods: Vec<String> = batch_data.par_iter().map(|p| get_modification_string(&p.sequence, modifications)).collect();
+        let batch: PeptideBatchData = batch_data.into();
 
-        let mod_sites: Vec<String> = batch_data.par_iter().map(|p| get_modification_indices(&p.sequence)).collect();
+        let naked_sequences = batch.naked_sequence_strs();
 
-        let charges = batch_data.par_iter().filter_map(|p| p.charge).collect::<Vec<_>>();
-        let charges = if charges.len() == batch_data.len() { Some(charges) } else { None };
+        let mods = batch.mods_strs();
 
-        let nces = batch_data.par_iter().filter_map(|p| p.nce).collect::<Vec<_>>();
-        let nces = if nces.len() == batch_data.len() { Some(nces) } else { None };
+        let mod_sites = batch.mod_sites_strs();
 
-        let instruments = batch_data.par_iter().filter_map(|p| p.instrument.clone()).collect::<Vec<_>>();
-        let instruments = if instruments.len() == batch_data.len() { Some(instruments) } else { None };
+        let charges = if batch.charges.iter().all(|c| c.is_some()) {
+            Some(batch.charges.iter().map(|c| c.unwrap()).collect::<Vec<_>>())
+        } else {
+            None
+        };
 
-        let input_batch = self.encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments)?.to_device(self.get_device())?;
+        let nces = if batch.nces.iter().all(|n| n.is_some()) {
+            Some(batch.nces.iter().map(|n| n.unwrap()).collect::<Vec<_>>())
+        } else {
+            None
+        };
+
+        let instruments = if batch.instruments.iter().all(|i| i.is_some()) {
+            let flat: Vec<&str> = batch
+                .instrument_strs()
+                .into_iter()
+                .map(|opt| opt.unwrap())
+                .collect();
+            Some(flat)
+        } else {
+            None
+        };
+        
+
+        let input_batch = self
+            .encode_peptides(&naked_sequences, &mods, &mod_sites, charges, nces, instruments)?
+            .to_device(self.get_device())?;
 
         let target_values: Vec<f32> = match self.property_type() {
-            PropertyType::RT => batch_data.par_iter().map(|p| p.retention_time.unwrap_or_default()).collect(),
-            PropertyType::CCS => batch_data.par_iter().map(|p| p.ion_mobility.unwrap_or_default()).collect(),
-            PropertyType::MS2 => return Err(anyhow::anyhow!("MS2 training is not yet implemented")),
+            PropertyType::RT => batch
+                .retention_times
+                .iter()
+                .map(|v| v.unwrap_or(0.0))
+                .collect(),
+            PropertyType::CCS => batch
+                .ion_mobilities
+                .iter()
+                .map(|v| v.unwrap_or(0.0))
+                .collect(),
+            PropertyType::MS2 => {
+                return Err(anyhow::anyhow!("MS2 training is not yet implemented"))
+            }
         };
 
         let target_tensor = Tensor::new(target_values, &self.get_device())?;
diff --git a/crates/redeem-properties/src/models/ms2_bert_model.rs b/crates/redeem-properties/src/models/ms2_bert_model.rs
index 32a634b..86ac41c 100644
--- a/crates/redeem-properties/src/models/ms2_bert_model.rs
+++ b/crates/redeem-properties/src/models/ms2_bert_model.rs
@@ -499,15 +499,15 @@ mod tests {
         let device = Device::Cpu;
         let model = MS2BertModel::new(model_path, Some(constants_path), 0, 8, 4, true, device).unwrap();
 
-        let peptide_sequences = vec!["AGHCEWQMKYR".to_string(), "AGHCEWQMKYR".to_string()];
+        let peptide_sequences = vec!["AGHCEWQMKYR", "AGHCEWQMKYR"];
         let mods = vec![
-            "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M".to_string(),
-            "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M".to_string(),
+            "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M",
+            "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M",
         ];
-        let mod_sites = vec!["0;4;8".to_string(), "0;4;8".to_string()];
+        let mod_sites = vec!["0;4;8", "0;4;8"];
         let charge = Some(vec![2, 2]);
         let nce = Some(vec![20, 20]);
-        let instrument = Some(vec!["QE".to_string(), "QE".to_string()]);
+        let instrument = Some(vec!["QE", "QE"]);
 
         let input_tensor = model
             .encode_peptides(
diff --git a/crates/redeem-properties/src/models/ms2_model.rs b/crates/redeem-properties/src/models/ms2_model.rs
index ea3c489..176b7e7 100644
--- a/crates/redeem-properties/src/models/ms2_model.rs
+++ b/crates/redeem-properties/src/models/ms2_model.rs
@@ -40,7 +40,7 @@ impl MS2ModelWrapper {
         Ok(Self { model })
     }
 
-    pub fn predict(&self, peptide_sequence: &[String], mods: &[String], mod_sites: &[String], charge: Vec<i32>, nce: Vec<i32>, intsrument: Vec<String>) -> Result<PredictionResult> {
+    pub fn predict(&self, peptide_sequence: &Vec<&str>, mods: &Vec<&str>, mod_sites: &Vec<&str>, charge: Vec<i32>, nce: Vec<i32>, intsrument: &Vec<&str>) -> Result<PredictionResult> {
         self.model.predict(peptide_sequence, mods, mod_sites, Some(charge), Some(nce), Some(intsrument))
     }
 
diff --git a/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs b/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
index 19e3f21..a0f3c84 100644
--- a/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
+++ b/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
@@ -349,16 +349,16 @@ mod tests {
 
         // Batched input
         let peptide_sequences = vec![
-            "ACDEFGHIK".to_string(),
-            "AGHCEWQMKYR".to_string(),
+            "ACDEFGHIK",
+            "AGHCEWQMKYR",
         ];
         let mods = vec![
-            "Carbamidomethyl@C".to_string(),
-            "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M".to_string(),
+            "Carbamidomethyl@C",
+            "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M",
         ];
         let mod_sites = vec![
-            "1".to_string(),
-            "0;4;8".to_string(),
+            "1",
+            "0;4;8",
         ];
 
         println!("Peptides: {:?}", peptide_sequences);
@@ -388,34 +388,23 @@ mod tests {
 
     #[test]
     fn test_prediction() {
+
         let model_path = PathBuf::from("data/models/alphapeptdeep/generic/rt.pth");
-        let constants_path =
-            PathBuf::from("data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
-        let device = /* Assuming Device is defined */ Device::new_cuda(0).unwrap_or(/* assuming Device::Cpu is defined */ Device::Cpu); // Replace with actual Device code.
-        let result =  RTCNNLSTMModel::new(&model_path, Some(&constants_path), 0, 8, 4, true, device); 
+        let constants_path = PathBuf::from("data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
+        let device = Device::new_cuda(0).unwrap_or(Device::Cpu);
+        let result = RTCNNLSTMModel::new(&model_path, Some(&constants_path), 0, 8, 4, true, device);
         let mut model = result.unwrap();
-    
-        // Test prediction with a few peptides after fine-tuning
+
         let test_peptides = vec![
             ("AGHCEWQMKYR", "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M", "0;4;8", 0.2945),
             ("QPYAVSELAGHQTSAESWGTGR", "", "", 0.4328955),
             ("GMSVSDLADKLSTDDLNSLIAHAHR", "Oxidation@M", "1", 0.6536107),
-            (
-                "TVQHHVLFTDNMVLICR",
-                "Oxidation@M;Carbamidomethyl@C",
-                "11;15",
-                0.7811949,
-            ),
+            ("TVQHHVLFTDNMVLICR", "Oxidation@M;Carbamidomethyl@C", "11;15", 0.7811949),
             ("EAELDVNEELDKK", "", "", 0.2934583),
             ("YTPVQQGPVGVNVTYGGDPIPK", "", "", 0.5863009),
             ("YYAIDFTLDEIK", "", "", 0.8048359),
             ("VSSLQAEPLPR", "", "", 0.3201348),
-            (
-                "NHAVVCQGCHNAIDPEVQR",
-                "Carbamidomethyl@C;Carbamidomethyl@C",
-                "5;8",
-                0.1730425,
-            ),
+            ("NHAVVCQGCHNAIDPEVQR", "Carbamidomethyl@C;Carbamidomethyl@C", "5;8", 0.1730425),
             ("IPNIYAIGDVVAGPMLAHK", "", "", 0.8220097),
             ("AELGIPLEEVPPEEINYLTR", "", "", 0.8956433),
             ("NESTPPSEELELDKWK", "", "", 0.4471560),
@@ -440,34 +429,32 @@ mod tests {
             ("HEDLKDMLEFPAQELR", "", "", 0.6529368),
             ("LLPDFLLER", "", "", 0.7852863),
         ];
-    
-        let batch_size = 16; // Set an appropriate batch size
-        let peptides: Vec<String> = test_peptides.iter().map(|(pep, _, _, _)| pep.to_string()).collect();
-        let mods: Vec<String> = test_peptides.iter().map(|(_, mod_, _, _)| mod_.to_string()).collect();
-        let mod_sites: Vec<String> = test_peptides.iter().map(|(_, _, sites, _)| sites.to_string()).collect();
+
+        let peptides: Vec<&str> = test_peptides.iter().map(|(pep, _, _, _)| *pep).collect();
+        let mods: Vec<&str> = test_peptides.iter().map(|(_, mod_, _, _)| *mod_).collect();
+        let mod_sites: Vec<&str> = test_peptides.iter().map(|(_, _, sites, _)| *sites).collect();
         let observed_rts: Vec<f32> = test_peptides.iter().map(|(_, _, _, rt)| *rt).collect();
-    
+
         match model.predict(&peptides, &mods, &mod_sites, None, None, None) {
             Ok(predictions) => {
-                if let /* Assuming PredictionResult and RTResult are defined */ PredictionResult::RTResult(rt_preds) = predictions {  // Replace with actual PredictionResult and RTResult code
+                if let PredictionResult::RTResult(rt_preds) = predictions {
                     let total_error: f32 = rt_preds.iter().zip(observed_rts.iter())
                         .map(|(pred, obs)| (pred - obs).abs())
                         .sum();
-    
-                    // PRINT PREDICTIONS AND OBSERVED RTs WITHOUT IZIP
+
                     let mut peptides_iter = peptides.iter();
                     let mut rt_preds_iter = rt_preds.iter();
                     let mut observed_rts_iter = observed_rts.iter();
-    
+
                     loop {
                         match (peptides_iter.next(), rt_preds_iter.next(), observed_rts_iter.next()) {
                             (Some(pep), Some(pred), Some(obs)) => {
                                 println!("Peptide: {}, Predicted RT: {}, Observed RT: {}", pep, pred, obs);
                             }
-                            _ => break, // Exit the loop if any iterator is exhausted
+                            _ => break,
                         }
                     }
-    
+
                     let mean_absolute_error = total_error / rt_preds.len() as f32;
                     println!("Mean Absolute Error: {:.6}", mean_absolute_error);
                 } else {
diff --git a/crates/redeem-properties/src/models/rt_model.rs b/crates/redeem-properties/src/models/rt_model.rs
index adab38c..c41f056 100644
--- a/crates/redeem-properties/src/models/rt_model.rs
+++ b/crates/redeem-properties/src/models/rt_model.rs
@@ -47,7 +47,7 @@ impl RTModelWrapper {
         Ok(Self { model })
     }
 
-    pub fn predict(&self, peptide_sequence: &[String], mods: &[String], mod_sites: &[String]) -> Result<PredictionResult> {
+    pub fn predict(&self, peptide_sequence: &Vec<&str>, mods: &Vec<&str>, mod_sites: &Vec<&str>) -> Result<PredictionResult> {
         self.model.predict(peptide_sequence, mods, mod_sites, None, None, None)
     }
 
diff --git a/crates/redeem-properties/src/utils/data_handling.rs b/crates/redeem-properties/src/utils/data_handling.rs
index 2722122..035238d 100644
--- a/crates/redeem-properties/src/utils/data_handling.rs
+++ b/crates/redeem-properties/src/utils/data_handling.rs
@@ -1,4 +1,5 @@
 
+use std::sync::Arc;
 
 /// Type of RT normalization used
 #[derive(Debug, Clone, Copy)]
@@ -18,13 +19,17 @@ impl RTNormalization {
     }
 }
 
+
 #[derive(Clone)]
 pub struct PeptideData {
-    pub sequence: String,
+    pub modified_sequence: Arc<[u8]>, // e.g., "(UniMod:1)M(UniMod:35)AAAATMAAAAR"
+    pub naked_sequence: Arc<[u8]>, // e.g., "MAAAATMAAAAR"
+    pub mods: Arc<[u8]>,         // e.g., "Any_N-term;Oxidation@M"
+    pub mod_sites: Arc<[u8]>,    // e.g., "0;1"
     pub charge: Option<i32>,
     pub precursor_mass: Option<f32>,
     pub nce: Option<i32>,
-    pub instrument: Option<String>,
+    pub instrument: Option<Arc<[u8]>>,
     pub retention_time: Option<f32>,
     pub ion_mobility: Option<f32>,
     pub ccs: Option<f32>,
@@ -32,17 +37,118 @@ pub struct PeptideData {
 }
 
 impl PeptideData {
-    pub fn new(sequence: &str, charge: Option<i32>, precursor_mass: Option<f32>, nce: Option<i32>, instrument: Option<&str>, retention_time: Option<f32>, ion_mobility: Option<f32>, ccs: Option<f32>, ms2_intensities: Option<Vec<Vec<f32>>>) -> Self {
+    pub fn new(
+        modified_sequence: &str,
+        naked_sequence: &str,
+        mods: &str,
+        mod_sites: &str,
+        charge: Option<i32>,
+        precursor_mass: Option<f32>,
+        nce: Option<i32>,
+        instrument: Option<&str>,
+        retention_time: Option<f32>,
+        ion_mobility: Option<f32>,
+        ccs: Option<f32>,
+        ms2_intensities: Option<Vec<Vec<f32>>>,
+    ) -> Self {
         Self {
-            sequence: sequence.to_string(),
+            modified_sequence: Arc::from(modified_sequence.as_bytes().to_vec().into_boxed_slice()),
+            naked_sequence: Arc::from(naked_sequence.as_bytes().to_vec().into_boxed_slice()),
+            mods: Arc::from(mods.as_bytes().to_vec().into_boxed_slice()),
+            mod_sites: Arc::from(mod_sites.as_bytes().to_vec().into_boxed_slice()),
             charge,
             precursor_mass,
             nce,
-            instrument: instrument.map(|s| s.to_string()),
+            instrument: instrument.map(|s| Arc::from(s.as_bytes().to_vec().into_boxed_slice())),
             retention_time,
             ion_mobility,
             ccs,
-            ms2_intensities
+            ms2_intensities,
+        }
+    }
+
+    pub fn modified_sequence_str(&self) -> &str {
+        std::str::from_utf8(&self.modified_sequence).unwrap_or("")
+    }
+
+    pub fn naked_sequence_str(&self) -> &str {
+        std::str::from_utf8(&self.naked_sequence).unwrap_or("")
+    }
+
+    pub fn mods_str(&self) -> &str {
+        std::str::from_utf8(&self.mods).unwrap_or("")
+    }
+
+    pub fn mod_sites_str(&self) -> &str {
+        std::str::from_utf8(&self.mod_sites).unwrap_or("")
+    }
+
+    pub fn instrument_str(&self) -> Option<&str> {
+        self.instrument
+            .as_ref()
+            .map(|v| std::str::from_utf8(v).unwrap_or(""))
+    }
+}
+
+pub struct PeptideBatchData {
+    pub naked_sequence: Vec<Arc<[u8]>>,
+    pub mods: Vec<Arc<[u8]>>,
+    pub mod_sites: Vec<Arc<[u8]>>,
+    pub charges: Vec<Option<i32>>,
+    pub precursor_masses: Vec<Option<f32>>,
+    pub nces: Vec<Option<i32>>,
+    pub instruments: Vec<Option<Arc<[u8]>>>,
+    pub retention_times: Vec<Option<f32>>,
+    pub ion_mobilities: Vec<Option<f32>>,
+    pub ccs: Vec<Option<f32>>,
+    pub ms2_intensities: Vec<Option<Vec<Vec<f32>>>>,
+}
+
+impl From<&[PeptideData]> for PeptideBatchData {
+    fn from(slice: &[PeptideData]) -> Self {
+        Self {
+            naked_sequence: slice.iter().map(|p| Arc::clone(&p.naked_sequence)).collect(),
+            mods: slice.iter().map(|p| Arc::clone(&p.mods)).collect(),
+            mod_sites: slice.iter().map(|p| Arc::clone(&p.mod_sites)).collect(),
+            charges: slice.iter().map(|p| p.charge).collect(),
+            precursor_masses: slice.iter().map(|p| p.precursor_mass).collect(),
+            nces: slice.iter().map(|p| p.nce).collect(),
+            instruments: slice.iter().map(|p| p.instrument.clone()).collect(),
+            retention_times: slice.iter().map(|p| p.retention_time).collect(),
+            ion_mobilities: slice.iter().map(|p| p.ion_mobility).collect(),
+            ccs: slice.iter().map(|p| p.ccs).collect(),
+            ms2_intensities: slice.iter().map(|p| p.ms2_intensities.clone()).collect(),
         }
     }
 }
+
+
+impl PeptideBatchData {
+    pub fn naked_sequence_strs(&self) -> Vec<&str> {
+        self.naked_sequence
+            .iter()
+            .map(|s| std::str::from_utf8(s).unwrap_or(""))
+            .collect()
+    }
+
+    pub fn mods_strs(&self) -> Vec<&str> {
+        self.mods
+            .iter()
+            .map(|s| std::str::from_utf8(s).unwrap_or(""))
+            .collect()
+    }
+
+    pub fn mod_sites_strs(&self) -> Vec<&str> {
+        self.mod_sites
+            .iter()
+            .map(|s| std::str::from_utf8(s).unwrap_or(""))
+            .collect()
+    }
+
+    pub fn instrument_strs(&self) -> Vec<Option<&str>> {
+        self.instruments
+            .iter()
+            .map(|opt| opt.as_ref().map(|s| std::str::from_utf8(s).unwrap_or("")))
+            .collect()
+    }
+}

From 67de1dd04724763d42b770956d224e1a6db3fb5d Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Tue, 13 May 2025 13:04:45 -0400
Subject: [PATCH 46/75] refactor: Update mod_to_feature loading to use Arc for
 key in RTCNNTFModel and CCSCNNTFModel

---
 .../examples/alphapeptdeep_ccs_cnn_lstm.rs    |  50 +--
 .../examples/alphapeptdeep_ms2_bert.rs        |  43 ++-
 .../examples/alphapeptdeep_rt_cnn_lstm.rs     | 142 +++++---
 .../src/building_blocks/featurize.rs          | 111 +++++-
 .../src/models/ccs_cnn_lstm_model.rs          | 148 +++++---
 .../src/models/ccs_cnn_tf_model.rs            |  28 +-
 .../redeem-properties/src/models/ccs_model.rs | 108 ++++--
 .../src/models/model_interface.rs             | 212 ++++++-----
 .../src/models/ms2_bert_model.rs              |  72 ++--
 .../redeem-properties/src/models/ms2_model.rs |  83 ++++-
 .../src/models/rt_cnn_lstm_model.rs           | 341 ++++++++++--------
 .../src/models/rt_cnn_transformer_model.rs    |  11 +-
 .../redeem-properties/src/models/rt_model.rs  | 123 +++++--
 .../src/utils/peptdeep_utils.rs               |  28 ++
 14 files changed, 981 insertions(+), 519 deletions(-)

diff --git a/crates/redeem-properties/examples/alphapeptdeep_ccs_cnn_lstm.rs b/crates/redeem-properties/examples/alphapeptdeep_ccs_cnn_lstm.rs
index 0236f0a..085cb0b 100644
--- a/crates/redeem-properties/examples/alphapeptdeep_ccs_cnn_lstm.rs
+++ b/crates/redeem-properties/examples/alphapeptdeep_ccs_cnn_lstm.rs
@@ -6,25 +6,31 @@ use redeem_properties::{
         model_interface::{ModelInterface, PredictionResult},
     },
     utils::{
-        data_handling::{PeptideData},
-        peptdeep_utils::{ion_mobility_to_ccs_bruker},
+        data_handling::PeptideData,
+        peptdeep_utils::ion_mobility_to_ccs_bruker,
     },
 };
 use std::{path::PathBuf, sync::Arc};
 
-struct PredictionContext<'a> {
-    peptides: Vec<&'a str>,
-    mods: Vec<&'a str>,
-    mod_sites: Vec<&'a str>,
+struct PredictionContext {
+    peptides: Vec<Arc<[u8]>>,
+    mods: Vec<Arc<[u8]>>,
+    mod_sites: Vec<Arc<[u8]>>,
     charges: Vec<i32>,
     observed_ccs: Vec<f32>,
 }
 
-impl<'a> PredictionContext<'a> {
-    fn new(test_peptides: &'a [(&'a str, &'a str, &'a str, i32, f32)]) -> Self {
-        let peptides = test_peptides.iter().map(|(pep, _, _, _, _)| *pep).collect();
-        let mods = test_peptides.iter().map(|(_, m, _, _, _)| *m).collect();
-        let mod_sites = test_peptides.iter().map(|(_, _, s, _, _)| *s).collect();
+impl PredictionContext {
+    fn new(test_peptides: &[(&str, &str, &str, i32, f32)]) -> Self {
+        let peptides = test_peptides.iter()
+            .map(|(pep, _, _, _, _)| Arc::from(pep.as_bytes().to_vec().into_boxed_slice()))
+            .collect();
+        let mods = test_peptides.iter()
+            .map(|(_, m, _, _, _)| Arc::from(m.as_bytes().to_vec().into_boxed_slice()))
+            .collect();
+        let mod_sites = test_peptides.iter()
+            .map(|(_, _, s, _, _)| Arc::from(s.as_bytes().to_vec().into_boxed_slice()))
+            .collect();
         let charges = test_peptides.iter().map(|(_, _, _, c, _)| *c).collect();
         let observed_ccs = test_peptides.iter().map(|(_, _, _, _, ccs)| *ccs).collect();
 
@@ -48,18 +54,20 @@ fn run_prediction(model: &mut CCSCNNLSTMModel, ctx: &PredictionContext) -> Resul
         None,
     )? {
         PredictionResult::CCSResult(preds) => {
-            let total_error: f32 = preds
-                .iter()
-                .zip(ctx.observed_ccs.iter())
+            let total_error: f32 = preds.iter().zip(ctx.observed_ccs.iter())
                 .map(|(pred, obs)| (pred - obs).abs())
                 .sum();
 
-            for (pep, pred, obs) in itertools::izip!(&ctx.peptides, &preds, &ctx.observed_ccs) {
-                println!("Peptide: {}, Predicted CCS: {:.4}, Observed CCS: {:.4}", pep, pred, obs);
+            for ((pep, pred), obs) in ctx.peptides.iter().zip(preds.clone()).zip(&ctx.observed_ccs) {
+                println!(
+                    "Peptide: {}, Predicted CCS: {:.4}, Observed CCS: {:.4}",
+                    std::str::from_utf8(pep).unwrap_or(""),
+                    pred,
+                    obs
+                );
             }
 
-            let mae = total_error / preds.len() as f32;
-            println!("Mean Absolute Error: {:.6}", mae);
+            println!("Mean Absolute Error: {:.6}", total_error / preds.len() as f32);
         }
         _ => println!("Unexpected prediction result type."),
     }
@@ -71,8 +79,6 @@ fn main() -> Result<()> {
     let constants_path = PathBuf::from("data/models/alphapeptdeep/generic/ccs.pth.model_const.yaml");
     let device = Device::new_cuda(0).unwrap_or(Device::Cpu);
 
-    println!("Device: {:?}", device);
-
     let mut model = CCSCNNLSTMModel::new(
         &model_path,
         Some(&constants_path),
@@ -97,7 +103,5 @@ fn main() -> Result<()> {
     ];
 
     let ctx = PredictionContext::new(&test_peptides);
-    run_prediction(&mut model, &ctx)?;
-
-    Ok(())
+    run_prediction(&mut model, &ctx)
 }
diff --git a/crates/redeem-properties/examples/alphapeptdeep_ms2_bert.rs b/crates/redeem-properties/examples/alphapeptdeep_ms2_bert.rs
index cf7b8f7..a1a7921 100644
--- a/crates/redeem-properties/examples/alphapeptdeep_ms2_bert.rs
+++ b/crates/redeem-properties/examples/alphapeptdeep_ms2_bert.rs
@@ -18,28 +18,35 @@ use std::{
 fn run_prediction(model: &mut MS2BertModel, batch_data: &[PeptideData]) -> Result<()> {
     let batch = PeptideBatchData::from(batch_data);
 
-    let peptides = batch.naked_sequence_strs();
-    let mods = batch.mods_strs();
-    let mod_sites = batch.mod_sites_strs();
-
-    let charges = if batch.charges.iter().all(|c| c.is_some()) {
-        Some(batch.charges.iter().map(|c| c.unwrap()).collect())
-    } else {
-        None
-    };
-    let nces = if batch.nces.iter().all(|n| n.is_some()) {
-        Some(batch.nces.iter().map(|n| n.unwrap()).collect())
-    } else {
-        None
-    };
     let instruments = if batch.instruments.iter().all(|i| i.is_some()) {
-        let flat: Vec<&str> = batch.instrument_strs().into_iter().map(|opt| opt.unwrap()).collect();
-        Some(flat)
+        Some(
+            batch
+                .instruments
+                .iter()
+                .map(|opt| opt.as_ref().map(|a| Arc::clone(a)))
+                .collect::<Vec<_>>(),
+        )
     } else {
         None
     };
-
-    let predictions = model.predict(&peptides, &mods, &mod_sites, charges, nces, instruments.as_ref())?;
+    
+
+    let predictions = model.predict(
+        &batch.naked_sequence,
+        &batch.mods,
+        &batch.mod_sites,
+        if batch.charges.iter().all(|c| c.is_some()) {
+            Some(batch.charges.iter().map(|c| c.unwrap()).collect())
+        } else {
+            None
+        },
+        if batch.nces.iter().all(|n| n.is_some()) {
+            Some(batch.nces.iter().map(|n| n.unwrap()).collect())
+        } else {
+            None
+        },
+        instruments,
+    )?;
 
     if let PredictionResult::MS2Result(ms2_preds) = predictions {
         let total_error: f32 = ms2_preds
diff --git a/crates/redeem-properties/examples/alphapeptdeep_rt_cnn_lstm.rs b/crates/redeem-properties/examples/alphapeptdeep_rt_cnn_lstm.rs
index f206ad7..25026e2 100644
--- a/crates/redeem-properties/examples/alphapeptdeep_rt_cnn_lstm.rs
+++ b/crates/redeem-properties/examples/alphapeptdeep_rt_cnn_lstm.rs
@@ -6,65 +6,68 @@ use redeem_properties::{
         rt_cnn_lstm_model::RTCNNLSTMModel,
     },
     utils::{
-        data_handling::{PeptideData, PeptideBatchData},
-        peptdeep_utils::{load_modifications, remove_mass_shift, get_modification_string, get_modification_indices},
+        data_handling::PeptideData,
+        peptdeep_utils::{
+            get_modification_indices, get_modification_string, load_modifications,
+            remove_mass_shift,
+        },
     },
 };
 use std::path::PathBuf;
-use std::sync::Arc;
 
-struct PredictionContext {
-    peptides: Vec<&'static str>,
-    mods: Vec<&'static str>,
-    mod_sites: Vec<&'static str>,
-    observed_rts: Vec<f32>,
-}
-
-impl PredictionContext {
-    fn new(test_peptides: &[(&'static str, &'static str, &'static str, f32)]) -> Self {
-        let peptides = test_peptides.iter().map(|(pep, _, _, _)| *pep).collect();
-        let mods = test_peptides.iter().map(|(_, m, _, _)| *m).collect();
-        let mod_sites = test_peptides.iter().map(|(_, _, site, _)| *site).collect();
-        let observed_rts = test_peptides.iter().map(|(_, _, _, rt)| *rt).collect();
-
-        Self {
-            peptides,
-            mods,
-            mod_sites,
-            observed_rts,
-        }
-    }
-}
-
-fn run_prediction(model: &mut RTCNNLSTMModel, context: &PredictionContext) -> Result<()> {
-    match model.predict(
-        &context.peptides,
-        &context.mods,
-        &context.mod_sites,
+fn run_prediction(model: &mut RTCNNLSTMModel, batch_data: &[PeptideData]) -> Result<()> {
+    let batch = redeem_properties::utils::data_handling::PeptideBatchData::from(batch_data);
+
+    let predictions = model.predict(
+        &batch.naked_sequence,
+        &batch.mods,
+        &batch.mod_sites,
+        if batch.charges.iter().all(|c| c.is_some()) {
+            Some(batch.charges.iter().map(|c| c.unwrap()).collect())
+        } else {
+            None
+        },
         None,
-        None,
-        None,
-    ) {
-        Ok(preds) => {
-            if let PredictionResult::RTResult(rt_preds) = preds {
-                let total_error: f32 = rt_preds
+        if batch.instruments.iter().all(|i| i.is_some()) {
+            Some(
+                batch
+                    .instruments
                     .iter()
-                    .zip(&context.observed_rts)
-                    .map(|(p, o)| (p - o).abs())
-                    .sum();
-
-                for ((pep, pred), obs) in context.peptides.iter().zip(rt_preds.iter()).zip(&context.observed_rts) {
-                    println!("Peptide: {}, Predicted RT: {:.6}, Observed RT: {:.6}", pep, pred, obs);
-                }
-
-                println!(
-                    "Mean Absolute Error: {:.6}",
-                    total_error / rt_preds.len() as f32
-                );
-            }
+                    .map(|opt| opt.as_ref().map(|a| a.clone()))
+                    .collect::<Vec<_>>(),
+            )
+        } else {
+            None
+        },
+    )?;
+
+    if let PredictionResult::RTResult(rt_preds) = predictions {
+        let total_error: f32 = rt_preds
+            .iter()
+            .zip(batch.retention_times.iter())
+            .map(|(pred, obs)| (pred - obs.unwrap_or_default()).abs())
+            .sum();
+
+        for ((seq, pred), obs) in batch
+            .naked_sequence
+            .iter()
+            .zip(rt_preds.iter())
+            .zip(batch.retention_times.iter())
+        {
+            println!(
+                "Peptide: {}, Predicted RT: {:.6}, Observed RT: {:.6}",
+                std::str::from_utf8(seq).unwrap_or(""),
+                pred,
+                obs.unwrap_or_default()
+            );
         }
-        Err(e) => println!("Prediction error: {e}"),
+
+        println!(
+            "Mean Absolute Error: {:.6}",
+            total_error / rt_preds.len() as f32
+        );
     }
+
     Ok(())
 }
 
@@ -74,7 +77,6 @@ fn main() -> Result<()> {
     let model_path = PathBuf::from("/home/singjc/Documents/github/redeem/rt_fine_tuned.safetensors");
     let constants_path = PathBuf::from("/home/singjc/Documents/github/redeem/crates/redeem-properties/data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
     let device = Device::new_cuda(0).unwrap_or(Device::Cpu);
-
     println!("Device: {:?}", device);
 
     let mut model = RTCNNLSTMModel::new(&model_path, Some(&constants_path), 0, 8, 4, true, device)
@@ -117,21 +119,45 @@ fn main() -> Result<()> {
     })
     .collect();
 
-    let test_peptides_data = vec![
+    let test_peptides = vec![
         ("QPYAVSELAGHQTSAESWGTGR", "", "", 0.4328955),
         ("GMSVSDLADKLSTDDLNSLIAHAHR", "Oxidation@M", "1", 0.6536107),
-        ("TVQHHVLFTDNMVLICR", "Oxidation@M;Carbamidomethyl@C", "11;15", 0.7811949),
+        (
+            "TVQHHVLFTDNMVLICR",
+            "Oxidation@M;Carbamidomethyl@C",
+            "11;15",
+            0.7811949,
+        ),
         ("EAELDVNEELDKK", "", "", 0.2934583),
         ("YTPVQQGPVGVNVTYGGDPIPK", "", "", 0.5863009),
     ];
 
-    let prediction_context = PredictionContext::new(&test_peptides_data);
-
-    run_prediction(&mut model, &prediction_context)?;
+    let prediction_data: Vec<PeptideData> = test_peptides
+        .into_iter()
+        .map(|(seq, mods, sites, rt)| {
+            let naked = remove_mass_shift(seq);
+            PeptideData::new(
+                seq,
+                &naked,
+                mods,
+                sites,
+                None,
+                None,
+                None,
+                None,
+                Some(rt),
+                None,
+                None,
+                None,
+            )
+        })
+        .collect();
+
+    run_prediction(&mut model, &prediction_data)?;
 
     model.fine_tune(&training_data, modifications, 10, 0.001, 5)?;
 
-    run_prediction(&mut model, &prediction_context)?;
+    run_prediction(&mut model, &prediction_data)?;
 
     model.save("alphapeptdeep_rt_cnn_lstm_finetuned.safetensors")?;
 
diff --git a/crates/redeem-properties/src/building_blocks/featurize.rs b/crates/redeem-properties/src/building_blocks/featurize.rs
index f7628f2..9317556 100644
--- a/crates/redeem-properties/src/building_blocks/featurize.rs
+++ b/crates/redeem-properties/src/building_blocks/featurize.rs
@@ -1,5 +1,5 @@
 use anyhow::{Result, anyhow};
-use std::collections::HashMap;
+use std::{collections::HashMap, sync::Arc};
 use candle_core::{DType, Device, Tensor};
 use rayon::prelude::*;
 use std::sync::atomic::{AtomicU32, Ordering};
@@ -39,6 +39,26 @@ pub fn aa_indices_tensor(seq: &str, device: &Device) -> Result<Tensor> {
     Ok(Tensor::from_slice(&indices, (1, indices.len()), device)?.to_dtype(DType::F32)?.unsqueeze(2)?)
 }
 
+/// Convert peptide sequences into AA ID array using Arc<[u8]>.
+/// This avoids converting the whole sequence to a String or &str unless necessary.
+pub fn aa_indices_tensor_from_arc(seq: &Arc<[u8]>, device: &Device) -> Result<Tensor> {
+    let map = aa_index_map();
+    let filtered: Vec<i64> = seq
+        .iter()
+        .map(|&b| {
+            let c = b as char;
+            map.get(&c)
+                .copied()
+                .ok_or_else(|| anyhow!("Unknown amino acid character: '{}'", c))
+        })
+        .collect::<Result<Vec<_>>>()?;
+
+    let mut indices = vec![0i64]; // padding start
+    indices.extend(filtered);
+    indices.push(0); // padding end
+
+    Ok(Tensor::from_slice(&indices, (1, indices.len()), device)?.to_dtype(DType::F32)?.unsqueeze(2)?)
+}
 
 /// One-hot encode amino acid indices and concatenate additional tensors.
 pub fn aa_one_hot(aa_indices: &Tensor, cat_others: &[&Tensor]) -> Result<Tensor> {
@@ -58,8 +78,6 @@ pub fn aa_one_hot(aa_indices: &Tensor, cat_others: &[&Tensor]) -> Result<Tensor>
             }
         }
     }
-    
-
 
     let mut one_hot_data = vec![0.0f32; batch_size * seq_len * num_classes];
 
@@ -166,10 +184,59 @@ pub fn get_mod_features_from_parsed(
 }
 
 
+pub fn get_mod_features_from_parsed_arc(
+    mod_names: &[Arc<[u8]>],
+    mod_sites: &[usize],
+    seq_len: usize,
+    mod_feature_size: usize,
+    mod_to_feature: &HashMap<Arc<[u8]>, Vec<f32>>,
+    device: &Device,
+) -> Result<Tensor> {
+    let atomic_buffer: Vec<AtomicU32> = (0..seq_len * mod_feature_size)
+        .map(|_| AtomicU32::new(0))
+        .collect();
+
+    mod_names
+        .par_iter()
+        .zip(mod_sites.par_iter())
+        .for_each(|(mod_name, &site)| {
+            if site >= seq_len {
+                log::warn!(
+                    "Skipping mod {:?} at invalid site {} (seq_len {})",
+                    std::str::from_utf8(mod_name).unwrap_or("<invalid>"),
+                    site,
+                    seq_len
+                );
+                return;
+            }
+            if let Some(feat) = mod_to_feature.get(mod_name) {
+                for (i, &val) in feat.iter().enumerate() {
+                    let idx = site * mod_feature_size + i;
+                    let val_bits = val.to_bits();
+                    atomic_buffer[idx].fetch_add(val_bits, Ordering::Relaxed);
+                }
+            } else {
+                log::warn!(
+                    "Unknown modification feature: {:?}",
+                    std::str::from_utf8(mod_name).unwrap_or("<invalid>")
+                );
+            }
+        });
+
+    let mod_x: Vec<f32> = atomic_buffer
+        .into_iter()
+        .map(|a| f32::from_bits(a.load(Ordering::Relaxed)))
+        .collect();
+
+    Tensor::from_slice(&mod_x, (1, seq_len, mod_feature_size), device)
+        .map_err(|e| anyhow!("Failed to create tensor: {}", e))
+}
+
+
 #[cfg(test)]
 mod tests {
  
-    use crate::utils::peptdeep_utils::load_mod_to_feature;
+    use crate::utils::peptdeep_utils::{load_mod_to_feature, load_mod_to_feature_arc};
     use crate::utils::peptdeep_utils::parse_model_constants;
     use crate::utils::peptdeep_utils::ModelConstants;
 
@@ -183,13 +250,23 @@ mod tests {
     fn test_aa_indices_tensor(){
         let device = Device::Cpu;
         let seq = "AGHCEWQMKYR";
+        let start_time = std::time::Instant::now();
         let result = aa_indices_tensor(seq, &device).unwrap();
+        println!("aa_indices_tensor Time taken: {:?}", start_time.elapsed());
         // expected result is [[0, 1, 7, 8, 3, 5, 23, 17, 13, 11, 25, 18, 0]]
         let expect_out = Tensor::from_vec(vec!{0.0f32, 1.0f32, 7.0f32, 8.0f32, 3.0f32, 5.0f32, 23.0f32, 17.0f32, 13.0f32, 11.0f32, 25.0f32, 18.0f32, 0.0f32}, (1, 13), &device).unwrap();
         println!("{:?} - aa_indices_tensor: {:?}", seq, result.to_vec3::<f32>().unwrap());
         println!("result shape: {:?}", result.shape());
         assert_eq!(result.shape().dims(), &[1, 13, 1]);
         // assert_eq!(result.to_vec3::<f32>().unwrap(), expect_out.to_vec3::<f32>().unwrap());
+
+        let seq_bytes = Arc::from(seq.as_bytes().to_vec().into_boxed_slice());
+        let start_time = std::time::Instant::now();
+        let result = aa_indices_tensor_from_arc(&seq_bytes, &device).unwrap();
+        println!("aa_indices_tensor_from_arc Time taken: {:?}", start_time.elapsed());
+        println!("{:?} - aa_indices_tensor_from_arc: {:?}", seq, result.to_vec3::<f32>().unwrap());
+        assert_eq!(result.shape().dims(), &[1, 13, 1]);
+        // assert_eq!(result.to_vec3::<f32>().unwrap(), expect_out.to_vec3::<f32>().unwrap());
     }
 
     #[test]
@@ -214,6 +291,7 @@ mod tests {
         let mod_to_feature: HashMap<String, Vec<f32>> = load_mod_to_feature(&constants).unwrap();
 
         let device = Device::Cpu;
+        let start_time = std::time::Instant::now();
         let tensor = get_mod_features_from_parsed(
             &mod_names,
             &mod_sites,
@@ -222,10 +300,35 @@ mod tests {
             &mod_to_feature,
             &device,
         ).unwrap();
+        println!("get_mod_features_from_parsed Time taken: {:?}", start_time.elapsed());
 
         println!("tensor shape: {:?}", tensor.shape());
 
         assert_eq!(tensor.shape().dims(), &[1, seq_len, mod_feature_size]);
 
+        let mod_to_feature = load_mod_to_feature_arc(&constants).unwrap();
+        let mod_names_arc: Vec<Arc<[u8]>> = mod_names
+            .iter()
+            .map(|&s| Arc::from(s.as_bytes().to_vec().into_boxed_slice()))
+            .collect();
+        let mod_sites_arc: Vec<usize> = mod_sites
+            .iter()
+            .map(|&s| s)
+            .collect();
+        let start_time = std::time::Instant::now();
+        let tensor_arc = get_mod_features_from_parsed_arc(
+            &mod_names_arc,
+            &mod_sites_arc,
+            seq_len,
+            mod_feature_size,
+            &mod_to_feature,
+            &device,
+        ).unwrap();
+        println!("get_mod_features_from_parsed_arc Time taken: {:?}", start_time.elapsed());
+        println!("tensor_arc shape: {:?}", tensor_arc.shape());
+        assert_eq!(tensor_arc.shape().dims(), &[1, seq_len, mod_feature_size]);
+        assert_eq!(tensor.shape(), tensor_arc.shape());
+
+
     }
 }
\ No newline at end of file
diff --git a/crates/redeem-properties/src/models/ccs_cnn_lstm_model.rs b/crates/redeem-properties/src/models/ccs_cnn_lstm_model.rs
index 58b08cb..b9048e4 100644
--- a/crates/redeem-properties/src/models/ccs_cnn_lstm_model.rs
+++ b/crates/redeem-properties/src/models/ccs_cnn_lstm_model.rs
@@ -1,21 +1,20 @@
 use anyhow::Result;
 use candle_core::{DType, Device, IndexOp, Tensor};
-use candle_nn::{
-    Dropout, Module, VarBuilder, VarMap,
-};
+use candle_nn::{Dropout, Module, VarBuilder, VarMap};
 
 use std::collections::HashMap;
-use std::{fmt, vec};
 use std::path::Path;
+use std::sync::Arc;
+use std::{fmt, vec};
 
 use crate::building_blocks::building_blocks::{
     DecoderLinear, Encoder26aaModChargeCnnLstmAttnSum, MOD_FEATURE_SIZE,
 };
 use crate::{
-    models::model_interface::{ModelInterface, PropertyType,load_tensors_from_model, create_var_map},
-    utils::peptdeep_utils::{
-        load_mod_to_feature, parse_model_constants, ModelConstants,
+    models::model_interface::{
+        create_var_map, load_tensors_from_model, ModelInterface, PropertyType,
     },
+    utils::peptdeep_utils::{load_mod_to_feature_arc, parse_model_constants, ModelConstants},
 };
 
 // Constants
@@ -29,7 +28,7 @@ pub struct CCSCNNLSTMModel {
     var_store: VarBuilder<'static>,
     varmap: VarMap,
     constants: ModelConstants,
-    mod_to_feature: HashMap<String, Vec<f32>>,
+    mod_to_feature: HashMap<Arc<[u8]>, Vec<f32>>,
     fixed_sequence_len: usize,
     // Total number of fragment types of a fragmentation position to predict
     num_frag_types: usize,
@@ -55,11 +54,10 @@ impl ModelInterface for CCSCNNLSTMModel {
     }
 
     fn model_arch(&self) -> &'static str {
-        "ccs_cnn_lstm"   
+        "ccs_cnn_lstm"
     }
 
-    fn new_untrained(_device: Device) -> Result<Self>
-    {
+    fn new_untrained(_device: Device) -> Result<Self> {
         unimplemented!("Untrained model creation is not implemented for this architecture.");
     }
 
@@ -71,7 +69,7 @@ impl ModelInterface for CCSCNNLSTMModel {
         num_frag_types: usize,
         num_modloss_types: usize,
         mask_modloss: bool,
-        device: Device
+        device: Device,
     ) -> Result<Self> {
         let tensor_data = load_tensors_from_model(model_path.as_ref(), &device)?;
 
@@ -86,7 +84,7 @@ impl ModelInterface for CCSCNNLSTMModel {
         };
 
         // Load the mod_to_feature mapping
-        let mod_to_feature = load_mod_to_feature(&constants)?;
+        let mod_to_feature = load_mod_to_feature_arc(&constants)?;
 
         let dropout = Dropout::new(0.1);
 
@@ -95,9 +93,7 @@ impl ModelInterface for CCSCNNLSTMModel {
             8,
             128,
             2,
-            vec![
-                "ccs_encoder.mod_nn.nn.weight"
-            ],
+            vec!["ccs_encoder.mod_nn.nn.weight"],
             vec![
                 "ccs_encoder.input_cnn.cnn_short.weight",
                 "ccs_encoder.input_cnn.cnn_medium.weight",
@@ -106,11 +102,12 @@ impl ModelInterface for CCSCNNLSTMModel {
             vec![
                 "ccs_encoder.input_cnn.cnn_short.bias",
                 "ccs_encoder.input_cnn.cnn_medium.bias",
-                "ccs_encoder.input_cnn.cnn_long.bias"
+                "ccs_encoder.input_cnn.cnn_long.bias",
             ],
             "ccs_encoder.hidden_nn",
-            vec!["ccs_encoder.attn_sum.attn.0.weight"]
-        ).unwrap();
+            vec!["ccs_encoder.attn_sum.attn.0.weight"],
+        )
+        .unwrap();
 
         let ccs_decoder = DecoderLinear::from_varstore(
             &var_store,
@@ -138,10 +135,9 @@ impl ModelInterface for CCSCNNLSTMModel {
             is_training: false,
             dropout,
             ccs_encoder,
-            ccs_decoder
+            ccs_decoder,
         })
     }
-    
 
     fn forward(&self, xs: &Tensor) -> Result<Tensor, candle_core::Error> {
         let (_batch_size, _seq_len, _) = xs.shape().dims3()?;
@@ -154,15 +150,17 @@ impl ModelInterface for CCSCNNLSTMModel {
         let mod_x_out = xs.i((.., .., start_mod_x..start_mod_x + MOD_FEATURE_SIZE))?;
         let charge_out = xs.i((.., 0..1, start_charge..start_charge + 1))?;
         let charge_out = charge_out.squeeze(2)?;
-        
-        let x = self.ccs_encoder.forward(&aa_indices_out, &mod_x_out, &charge_out)?;
+
+        let x = self
+            .ccs_encoder
+            .forward(&aa_indices_out, &mod_x_out, &charge_out)?;
         let x = self.dropout.forward(&x, true)?;
         let x = Tensor::cat(&[x, charge_out], 1)?;
         let x = self.ccs_decoder.forward(&x)?;
 
         Ok(x.squeeze(1)?)
     }
-    
+
     /// Set model to evaluation mode for inference
     /// This disables dropout and other training-specific layers.
     fn set_evaluation_mode(&mut self) {
@@ -192,35 +190,35 @@ impl ModelInterface for CCSCNNLSTMModel {
         self.constants.mod_elements.len()
     }
 
-    fn get_mod_to_feature(&self) -> &HashMap<String, Vec<f32>> {
+    fn get_mod_to_feature(&self) -> &HashMap<Arc<[u8]>, Vec<f32>> {
         &self.mod_to_feature
     }
 
     fn get_min_pred_intensity(&self) -> f32 {
-        unimplemented!("Method not implemented for architecture: {}", self.model_arch())
+        unimplemented!(
+            "Method not implemented for architecture: {}",
+            self.model_arch()
+        )
     }
 
-
     fn get_mut_varmap(&mut self) -> &mut VarMap {
         &mut self.varmap
     }
-    
+
     fn print_summary(&self) {
         todo!()
     }
-    
+
     fn print_weights(&self) {
         todo!()
     }
-    
 }
 
-
 // // Forward Module Trait Implementation
 // impl  Module for CCSCNNLSTMModel {
 //     fn forward(&self, input: &Tensor) -> Result<Tensor, candle_core::Error> {
 //         ModelInterface::forward(self, input)
-//     }    
+//     }
 // }
 
 impl fmt::Debug for CCSCNNLSTMModel {
@@ -239,14 +237,26 @@ impl fmt::Debug for CCSCNNLSTMModel {
 
         // CNN
         writeln!(f, "    (input_cnn): SeqCNN(")?;
-        writeln!(f, "      (cnn_short): Conv1d(36, 36, kernel_size=(3,), stride=(1,), padding=(1,))")?;
-        writeln!(f, "      (cnn_medium): Conv1d(36, 36, kernel_size=(5,), stride=(1,), padding=(2,))")?;
-        writeln!(f, "      (cnn_long): Conv1d(36, 36, kernel_size=(7,), stride=(1,), padding=(3,))")?;
+        writeln!(
+            f,
+            "      (cnn_short): Conv1d(36, 36, kernel_size=(3,), stride=(1,), padding=(1,))"
+        )?;
+        writeln!(
+            f,
+            "      (cnn_medium): Conv1d(36, 36, kernel_size=(5,), stride=(1,), padding=(2,))"
+        )?;
+        writeln!(
+            f,
+            "      (cnn_long): Conv1d(36, 36, kernel_size=(7,), stride=(1,), padding=(3,))"
+        )?;
         writeln!(f, "    )")?;
 
         // Hidden LSTM
         writeln!(f, "    (hidden_nn): SeqLSTM(")?;
-        writeln!(f, "      (rnn): LSTM(144, 128, num_layers=2, batch_first=True, bidirectional=True)")?;
+        writeln!(
+            f,
+            "      (rnn): LSTM(144, 128, num_layers=2, batch_first=True, bidirectional=True)"
+        )?;
         writeln!(f, "    )")?;
 
         // Attention Sum
@@ -275,21 +285,21 @@ impl fmt::Debug for CCSCNNLSTMModel {
             "      (2): Linear(in_features=64, out_features=1, bias=True)"
         )?;
         writeln!(f, "    )")?;
-        
+
         writeln!(f, "  )")?;
-        
+
         write!(f, ")")
     }
 }
 
-
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::models::model_interface::ModelInterface;
     use crate::models::ccs_cnn_lstm_model::CCSCNNLSTMModel;
+    use crate::models::model_interface::ModelInterface;
     use candle_core::Device;
     use std::path::PathBuf;
+    use std::sync::Arc;
 
     #[test]
     fn test_load_pretrained_ccs_cnn_lstm_model() {
@@ -297,7 +307,8 @@ mod tests {
         let constants_path =
             PathBuf::from("data/models/alphapeptdeep/generic/ccs.pth.model_const.yaml");
         let device = Device::Cpu;
-        let model = CCSCNNLSTMModel::new(model_path, Some(constants_path), 0, 8, 4, true, device).unwrap();
+        let model =
+            CCSCNNLSTMModel::new(model_path, Some(constants_path), 0, 8, 4, true, device).unwrap();
 
         println!("{:?}", model);
     }
@@ -308,17 +319,29 @@ mod tests {
         let constants_path =
             PathBuf::from("data/models/alphapeptdeep/generic/ccs.pth.model_const.yaml");
         let device = Device::Cpu;
-        let model = CCSCNNLSTMModel::new(model_path, Some(constants_path), 0, 8, 4, true, device).unwrap();
-
-        let peptide_sequences = "AGHCEWQMKYR";
-        let mods = "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M";
-        let mod_sites = "0;4;8";
+        let model =
+            CCSCNNLSTMModel::new(model_path, Some(constants_path), 0, 8, 4, true, device).unwrap();
+
+        let peptide_sequences = Arc::from("AGHCEWQMKYR".as_bytes().to_vec().into_boxed_slice());
+        let mods = Arc::from(
+            "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M"
+                .as_bytes()
+                .to_vec()
+                .into_boxed_slice(),
+        );
+        let mod_sites = Arc::from("0;4;8".as_bytes().to_vec().into_boxed_slice());
         let charge = Some(2);
         let nce = Some(20);
-        let instrument = Some("QE");
+        let instrument = Some(Arc::from("QE".as_bytes().to_vec().into_boxed_slice()));
 
-        let result =
-            model.encode_peptide(&peptide_sequences, mods, mod_sites, charge, nce, instrument);
+        let result = model.encode_peptide(
+            &peptide_sequences,
+            &mods,
+            &mod_sites,
+            charge,
+            nce,
+            instrument.as_ref(),
+        );
 
         println!("{:?}", result);
 
@@ -328,22 +351,31 @@ mod tests {
     }
 
     #[test]
-    fn test_predict(){
+    fn test_predict() {
         let model_path = PathBuf::from("data/models/alphapeptdeep/generic/ccs.pth");
         let constants_path =
             PathBuf::from("data/models/alphapeptdeep/generic/ccs.pth.model_const.yaml");
         let device = Device::Cpu;
-        let model = CCSCNNLSTMModel::new(model_path, Some(constants_path), 0, 8, 4, true, device).unwrap();
 
-        let peptide_sequences = vec!["AGHCEWQMKYR", "AGHCEWQMKYR"];
-        let mods = vec!["Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M", "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M"];
-        let mod_sites = vec!["0;4;8", "0;4;8"];
+        let model =
+            CCSCNNLSTMModel::new(model_path, Some(constants_path), 0, 8, 4, true, device).unwrap();
+
+        let seq: Arc<[u8]> = Arc::from(b"AGHCEWQMKYR".to_vec().into_boxed_slice());
+        let mods: Arc<[u8]> = Arc::from(
+            b"Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M"
+                .to_vec()
+                .into_boxed_slice(),
+        );
+        let mod_sites: Arc<[u8]> = Arc::from(b"0;4;8".to_vec().into_boxed_slice());
+
+        let peptide_sequences = vec![seq.clone(), seq];
+        let mods = vec![mods.clone(), mods];
+        let mod_sites = vec![mod_sites.clone(), mod_sites];
         let charge = Some(vec![2, 2]);
 
         let result = model.predict(&peptide_sequences, &mods, &mod_sites, charge, None, None);
         println!("{:?}", result);
-    }
-
-    
 
-}
\ No newline at end of file
+        assert!(result.is_ok());
+    }
+}
diff --git a/crates/redeem-properties/src/models/ccs_cnn_tf_model.rs b/crates/redeem-properties/src/models/ccs_cnn_tf_model.rs
index e1e0ff2..9499f19 100644
--- a/crates/redeem-properties/src/models/ccs_cnn_tf_model.rs
+++ b/crates/redeem-properties/src/models/ccs_cnn_tf_model.rs
@@ -3,13 +3,14 @@ use candle_core::{DType, Device, IndexOp, Tensor};
 use candle_nn::{Dropout, Module, VarBuilder, VarMap};
 use std::collections::HashMap;
 use std::path::Path;
+use std::sync::Arc;
 
 use crate::building_blocks::building_blocks::{
     DecoderLinear, Encoder26aaModChargeCnnTransformerAttnSum, MOD_FEATURE_SIZE,
 };
 use crate::models::model_interface::{ModelInterface, PropertyType, load_tensors_from_model, create_var_map};
 use crate::utils::peptdeep_utils::{
-    load_mod_to_feature,
+    load_mod_to_feature_arc,
     parse_model_constants, ModelConstants,
 };
 use crate::utils::utils::get_tensor_stats;
@@ -27,7 +28,7 @@ pub struct CCSCNNTFModel {
     varmap: VarMap,
     constants: ModelConstants,
     device: Device,
-    mod_to_feature: HashMap<String, Vec<f32>>,
+    mod_to_feature: HashMap<Arc<[u8]>, Vec<f32>>,
     dropout: Dropout,
     ccs_encoder: Encoder26aaModChargeCnnTransformerAttnSum,
     ccs_decoder: DecoderLinear,
@@ -69,7 +70,7 @@ impl ModelInterface for CCSCNNTFModel {
         log::trace!("[CCSCNNTFModel] Initializing ccs_decoder");
         let ccs_decoder = DecoderLinear::new(129, 1, &varbuilder.pp("ccs_decoder"))?;
         let constants = ModelConstants::default();
-        let mod_to_feature = load_mod_to_feature(&constants)?;
+        let mod_to_feature = load_mod_to_feature_arc(&constants)?;
 
         Ok(Self {
             var_store: varbuilder,
@@ -104,7 +105,7 @@ impl ModelInterface for CCSCNNTFModel {
             None => ModelConstants::default(),
         };
 
-        let mod_to_feature = load_mod_to_feature(&constants)?;
+        let mod_to_feature = load_mod_to_feature_arc(&constants)?;
         let dropout = Dropout::new(0.1);
 
         let ccs_encoder = Encoder26aaModChargeCnnTransformerAttnSum::from_varstore(
@@ -211,7 +212,7 @@ impl ModelInterface for CCSCNNTFModel {
         self.constants.mod_elements.len()
     }
 
-    fn get_mod_to_feature(&self) -> &HashMap<String, Vec<f32>> {
+    fn get_mod_to_feature(&self) -> &HashMap<Arc<[u8]>, Vec<f32>> {
         &self.mod_to_feature
     }
 
@@ -257,20 +258,17 @@ mod tests {
         let device = Device::Cpu;
         let model = Box::new(CCSCNNTFModel::new_untrained(device.clone()).unwrap());
 
-        let peptide_sequences = "AGHCEWQMKYR";
-        let mods = "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M";
-        let mod_sites = "0;4;8";
+        let seq = Arc::from(b"AGHCEWQMKYR".to_vec().into_boxed_slice());
+        let mods =
+            Arc::from(b"Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M".to_vec().into_boxed_slice());
+        let mod_sites = Arc::from(b"0;4;8".to_vec().into_boxed_slice());
         let charge = Some(2);
         let nce = Some(20);
-        let instrument = Some("QE");
+        let instrument = Some(Arc::from(b"QE".to_vec().into_boxed_slice()));
 
-        let result =
-            model.encode_peptide(&peptide_sequences, mods, mod_sites, charge, nce, instrument);
+        let result = model.encode_peptide(&seq, &mods, &mod_sites, charge, nce, instrument.as_ref());
 
         println!("{:?}", result);
-
-        // assert!(result.is_ok());
-        // let encoded_peptides = result.unwrap();
-        // assert_eq!(encoded_peptides.shape().dims2().unwrap(), (1, 27 + 109 + 1 + 1 + 1));
+        assert!(result.is_ok());
     }
 }
\ No newline at end of file
diff --git a/crates/redeem-properties/src/models/ccs_model.rs b/crates/redeem-properties/src/models/ccs_model.rs
index a9f667c..a61b2e3 100644
--- a/crates/redeem-properties/src/models/ccs_model.rs
+++ b/crates/redeem-properties/src/models/ccs_model.rs
@@ -1,13 +1,14 @@
-use std::path::Path;
-use candle_core::Device;
-use anyhow::{Result, anyhow};
-use crate::models::model_interface::{ModelInterface,PredictionResult};
 use crate::models::ccs_cnn_lstm_model::CCSCNNLSTMModel;
 use crate::models::ccs_cnn_tf_model::CCSCNNTFModel;
+use crate::models::model_interface::{ModelInterface, PredictionResult};
 use crate::utils::data_handling::PeptideData;
+use crate::utils::peptdeep_utils::ModificationMap;
 use crate::utils::stats::TrainingStepMetrics;
+use anyhow::{anyhow, Result};
+use candle_core::Device;
 use std::collections::HashMap;
-use crate::utils::peptdeep_utils::ModificationMap;
+use std::path::Path;
+use std::sync::Arc;
 
 // Enum for different types of CCS models
 pub enum CCSModelArch {
@@ -26,32 +27,92 @@ pub struct CCSModelWrapper {
 impl Clone for CCSModelWrapper {
     fn clone(&self) -> Self {
         CCSModelWrapper {
-            model: self.model.clone(), 
+            model: self.model.clone(),
         }
     }
 }
 
 impl CCSModelWrapper {
-    pub fn new<P: AsRef<Path>>(model_path: P, constants_path: P, arch: &str, device: Device) -> Result<Self> {
+    pub fn new<P: AsRef<Path>>(
+        model_path: P,
+        constants_path: P,
+        arch: &str,
+        device: Device,
+    ) -> Result<Self> {
         let model: Box<dyn ModelInterface> = match arch {
-            "ccs_cnn_lstm" => Box::new(CCSCNNLSTMModel::new(model_path, Some(constants_path), 0, 8, 4, true, device)?),
-            "ccs_cnn_tf" => Box::new(CCSCNNTFModel::new(model_path, Some(constants_path), 0, 8, 4, true, device)?),
+            "ccs_cnn_lstm" => Box::new(CCSCNNLSTMModel::new(
+                model_path,
+                Some(constants_path),
+                0,
+                8,
+                4,
+                true,
+                device,
+            )?),
+            "ccs_cnn_tf" => Box::new(CCSCNNTFModel::new(
+                model_path,
+                Some(constants_path),
+                0,
+                8,
+                4,
+                true,
+                device,
+            )?),
             _ => return Err(anyhow!("Unsupported CCS model architecture: {}", arch)),
         };
 
         Ok(Self { model })
     }
 
-    pub fn predict(&self, peptide_sequence: &Vec<&str>, mods: &Vec<&str>, mod_sites: &Vec<&str>, charge: Vec<i32>) -> Result<PredictionResult> {
-        self.model.predict(peptide_sequence, mods, mod_sites, Some(charge), None, None)
+    pub fn predict(
+        &self,
+        peptide_sequence: &[Arc<[u8]>],
+        mods: &[Arc<[u8]>],
+        mod_sites: &[Arc<[u8]>],
+        charge: Vec<i32>,
+    ) -> Result<PredictionResult> {
+        self.model
+            .predict(peptide_sequence, mods, mod_sites, Some(charge), None, None)
     }
 
-    pub fn train(&mut self, training_data: &Vec<PeptideData>, val_data: Option<&Vec<PeptideData>>, modifications: HashMap<(String, Option<char>), ModificationMap>, batch_size: usize, val_batch_size: usize, learning_rate: f64, epochs: usize, early_stopping_patience: usize) -> Result<TrainingStepMetrics> {
-        self.model.train(training_data, val_data, modifications, batch_size, val_batch_size, learning_rate, epochs, early_stopping_patience)
+    pub fn train(
+        &mut self,
+        training_data: &Vec<PeptideData>,
+        val_data: Option<&Vec<PeptideData>>,
+        modifications: HashMap<(String, Option<char>), ModificationMap>,
+        batch_size: usize,
+        val_batch_size: usize,
+        learning_rate: f64,
+        epochs: usize,
+        early_stopping_patience: usize,
+    ) -> Result<TrainingStepMetrics> {
+        self.model.train(
+            training_data,
+            val_data,
+            modifications,
+            batch_size,
+            val_batch_size,
+            learning_rate,
+            epochs,
+            early_stopping_patience,
+        )
     }
 
-    pub fn fine_tune(&mut self, training_data: &Vec<PeptideData>, modifications: HashMap<(String, Option<char>), ModificationMap>, batch_size: usize, learning_rate: f64, epochs: usize) -> Result<()> {
-        self.model.fine_tune(training_data, modifications, batch_size, learning_rate, epochs)
+    pub fn fine_tune(
+        &mut self,
+        training_data: &Vec<PeptideData>,
+        modifications: HashMap<(String, Option<char>), ModificationMap>,
+        batch_size: usize,
+        learning_rate: f64,
+        epochs: usize,
+    ) -> Result<()> {
+        self.model.fine_tune(
+            training_data,
+            modifications,
+            batch_size,
+            learning_rate,
+            epochs,
+        )
     }
 
     pub fn set_evaluation_mode(&mut self) {
@@ -76,7 +137,12 @@ impl CCSModelWrapper {
 }
 
 // Public API Function to load a new CCS model
-pub fn load_collision_cross_section_model<P: AsRef<Path>>(model_path: P, constants_path: P, arch: &str, device: Device) -> Result<CCSModelWrapper> {
+pub fn load_collision_cross_section_model<P: AsRef<Path>>(
+    model_path: P,
+    constants_path: P,
+    arch: &str,
+    device: Device,
+) -> Result<CCSModelWrapper> {
     CCSModelWrapper::new(model_path, constants_path, arch, device)
 }
 
@@ -92,7 +158,7 @@ pub fn load_collision_cross_section_model<P: AsRef<Path>>(model_path: P, constan
 //     fn peptide_ccs_prediction() {
 //         let model_path = PathBuf::from("data/models/alphapeptdeep/generic/ccs.pth");
 //         let constants_path = PathBuf::from("data/models/alphapeptdeep/generic/ccs.pth.model_const.yaml");
-        
+
 //         assert!(
 //             model_path.exists(),
 //             "\n╔══════════════════════════════════════════════════════════════════╗\n\
@@ -107,7 +173,7 @@ pub fn load_collision_cross_section_model<P: AsRef<Path>>(model_path: P, constan
 //              ╚══════════════════════════════════════════════════════════════════╝\n",
 //             model_path
 //         );
-        
+
 //         assert!(
 //             constants_path.exists(),
 //             "\n╔══════════════════════════════════════════════════════════════════╗\n\
@@ -124,12 +190,12 @@ pub fn load_collision_cross_section_model<P: AsRef<Path>>(model_path: P, constan
 //         );
 
 //         let result = load_collision_cross_section_model(&model_path, &constants_path, "ccs_cnn_lstm", Device::Cpu);
-        
+
 //         assert!(result.is_ok(), "Failed to load model: {:?}", result.err());
 
 //         let mut model = result.unwrap();
 //         // model.print_summary();
-        
+
 //         // Print the model's weights
 //         // model.print_weights();
 
@@ -164,4 +230,4 @@ pub fn load_collision_cross_section_model<P: AsRef<Path>>(model_path: P, constan
 //             },
 //         }
 //     }
-// }
\ No newline at end of file
+// }
diff --git a/crates/redeem-properties/src/models/model_interface.rs b/crates/redeem-properties/src/models/model_interface.rs
index 8d67d51..2353c44 100644
--- a/crates/redeem-properties/src/models/model_interface.rs
+++ b/crates/redeem-properties/src/models/model_interface.rs
@@ -1,11 +1,18 @@
 use crate::{
-    building_blocks::featurize::{self, aa_indices_tensor, get_mod_features_from_parsed},
+    building_blocks::featurize::{
+        self, aa_indices_tensor, aa_indices_tensor_from_arc, get_mod_features_from_parsed,
+        get_mod_features_from_parsed_arc,
+    },
     models::{ccs_model::CCSModelWrapper, ms2_model::MS2ModelWrapper, rt_model::RTModelWrapper},
     utils::{
-        data_handling::{PeptideBatchData, PeptideData, RTNormalization}, logging::Progress, peptdeep_utils::{
+        data_handling::{PeptideBatchData, PeptideData, RTNormalization},
+        logging::Progress,
+        peptdeep_utils::{
             get_modification_indices, get_modification_string, parse_instrument_index,
             remove_mass_shift,
-        }, stats::{compute_loss_stats, Metrics, TrainingPhase, TrainingStepMetrics}, utils::{get_tensor_stats, CosineWithWarmup, LRScheduler}
+        },
+        stats::{compute_loss_stats, Metrics, TrainingPhase, TrainingStepMetrics},
+        utils::{get_tensor_stats, CosineWithWarmup, LRScheduler},
     },
 };
 use anyhow::{Context, Result};
@@ -13,9 +20,13 @@ use candle_core::{DType, Device, Tensor, Var};
 use candle_nn::{Optimizer, VarMap};
 use log::info;
 use rayon::prelude::*;
-use std::{ops::{Deref, Index}, process::Output};
 use std::path::Path;
 use std::{collections::HashMap, path::PathBuf};
+use std::{
+    ops::{Deref, Index},
+    process::Output,
+    sync::Arc,
+};
 
 // Constants
 const CHARGE_FACTOR: f64 = 0.1;
@@ -231,30 +242,35 @@ pub trait ModelInterface: Send + Sync + ModelClone {
     /// Predict the property for a batch of peptide sequences.
     ///
     /// # Arguments
-    ///   * `peptide_sequences` - A vector of peptide sequences.
-    ///   * `mods` - A vector of strings representing the modifications for each peptide.
-    ///   * `mod_sites` - A vector of strings representing the modification sites for each peptide.
-    ///  * `charge` - An optional vector of charge states for each peptide.
-    ///  * `nce` - An optional vector of nominal collision energies for each peptide.
-    ///  * `instrument` - An optional vector of instrument names for each peptide.
+    /// * `peptide_sequences` - A slice of `Arc<[u8]>` containing each peptide sequence.
+    /// * `mods` - A slice of `Arc<[u8]>` with modifications for each peptide.
+    /// * `mod_sites` - A slice of `Arc<[u8]>` representing modification sites per peptide.
+    /// * `charges` - Optional vector of charge states.
+    /// * `nces` - Optional vector of normalized collision energies.
+    /// * `instruments` - Optional vector of instrument names as `Arc<[u8]>`.
     ///
     /// # Returns
-    ///    A vector of predicted retention times.
+    /// A `PredictionResult` containing either RT, CCS, or MS2 predictions.
     fn predict(
         &self,
-        peptide_sequences: &Vec<&str>,
-        mods: &Vec<&str>,
-        mod_sites: &Vec<&str>,
-        charge: Option<Vec<i32>>,
-        nce: Option<Vec<i32>>,
-        instrument: Option<&Vec<&str>>,
+        peptide_sequences: &[Arc<[u8]>],
+        mods: &[Arc<[u8]>],
+        mod_sites: &[Arc<[u8]>],
+        charges: Option<Vec<i32>>,
+        nces: Option<Vec<i32>>,
+        instruments: Option<Vec<Option<Arc<[u8]>>>>,
     ) -> Result<PredictionResult> {
-        // Encode the batch of peptides
         let input_tensor = self
-            .encode_peptides(peptide_sequences, mods, mod_sites, charge, nce, instrument.cloned())?
+            .encode_peptides(
+                peptide_sequences,
+                mods,
+                mod_sites,
+                charges,
+                nces,
+                instruments,
+            )?
             .to_device(self.get_device())?;
 
-        // Forward pass through the model
         let output = self.forward(&input_tensor)?;
 
         match self.property_type() {
@@ -268,7 +284,6 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             }
             PropertyType::MS2 => {
                 let out = self.process_predictions(&output, self.get_min_pred_intensity())?;
-                // Each prediction per peptide is a vector of vectors of f32, i.e. Number of fragment ions by number of ion types ordered as b_z1, b_z2, y_z1, y_z2, b_modloss_z1, b_modloss_z2, y_modloss_z1, y_modloss_z2
                 let predictions: Vec<Vec<Vec<f32>>> = out.to_vec3()?;
                 Ok(PredictionResult::MS2Result(predictions))
             }
@@ -282,31 +297,40 @@ pub trait ModelInterface: Send + Sync + ModelClone {
     /// Encode peptide sequence (plus modifications) into a tensor.
     fn encode_peptide(
         &self,
-        peptide_sequence: &str,
-        mods: &str,
-        mod_sites: &str,
+        peptide_sequence: &Arc<[u8]>,
+        mods: &Arc<[u8]>,
+        mod_sites: &Arc<[u8]>,
         charge: Option<i32>,
         nce: Option<i32>,
-        instrument: Option<&str>,
+        instrument: Option<&Arc<[u8]>>,
     ) -> Result<Tensor> {
         let device = self.get_device();
         let mod_feature_size = self.get_mod_element_count();
         let mod_to_feature = self.get_mod_to_feature();
 
-        log::trace!("[ModelInterface::encode_peptide] peptide_sequence: {} | mods: {} | mod_sites: {} | charge: {:?} | nce: {:?} | instrument: {:?}", peptide_sequence, mods, mod_sites, charge, nce, instrument);
-
-        let aa_tensor = aa_indices_tensor(peptide_sequence, device)?;
+        log::trace!(
+            "[ModelInterface::encode_peptide] peptide_sequence: {:?} | mods: {:?} | mod_sites: {:?} | charge: {:?} | nce: {:?} | instrument: {:?}",
+            peptide_sequence, mods, mod_sites, charge, nce, instrument
+        );
 
+        let aa_tensor = aa_indices_tensor_from_arc(peptide_sequence, device)?;
         let (batch_size, seq_len, _) = aa_tensor.shape().dims3()?;
 
-        let mod_names: Vec<&str> = mods.split(';').filter(|s| !s.is_empty()).collect();
-        let mod_indices: Vec<usize> = mod_sites
+        let mod_names: Vec<Arc<[u8]>> = std::str::from_utf8(mods)
+            .unwrap_or("")
+            .split(';')
+            .filter(|s| !s.is_empty())
+            .map(|s| Arc::from(s.as_bytes().to_vec().into_boxed_slice()))
+            .collect();
+
+        let mod_indices: Vec<usize> = std::str::from_utf8(mod_sites)
+            .unwrap_or("")
             .split(';')
             .filter(|s| !s.is_empty())
             .map(|s| s.parse::<usize>().unwrap())
             .collect();
 
-        let mod_tensor = get_mod_features_from_parsed(
+        let mod_tensor = get_mod_features_from_parsed_arc(
             &mod_names,
             &mod_indices,
             seq_len,
@@ -338,7 +362,8 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         }
 
         if let Some(instr) = instrument {
-            let instr_idx = parse_instrument_index(instr) as u32;
+            let instr_str = std::str::from_utf8(instr).unwrap_or("");
+            let instr_idx = parse_instrument_index(instr_str) as u32;
             let instr_tensor =
                 Tensor::from_slice(&vec![instr_idx; seq_len], &[batch_size, seq_len, 1], device)?
                     .to_dtype(DType::F32)?;
@@ -349,7 +374,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             let output = features.remove(0);
             let (mean, min, max) = get_tensor_stats(&output)?;
             if !mean.is_finite() || !min.is_finite() || !max.is_finite() {
-                log::error!("For Peptide = {peptide_sequence} encode_peptides produced non-finite tensor stats: mean={mean}, min={min}, max={max}");
+                log::error!("For Peptide = {:?} encode_peptides produced non-finite tensor stats: mean={mean}, min={min}, max={max}", peptide_sequence);
                 anyhow::bail!("Non-finite values found in peptide encoding output.");
             }
             Ok(output)
@@ -357,7 +382,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             let output = Tensor::cat(&features, 2)?;
             let (mean, min, max) = get_tensor_stats(&output)?;
             if !mean.is_finite() || !min.is_finite() || !max.is_finite() {
-                log::error!("For Peptide = {peptide_sequence} encode_peptides produced non-finite tensor stats: mean={mean}, min={min}, max={max}");
+                log::error!("For Peptide = {:?} encode_peptides produced non-finite tensor stats: mean={mean}, min={min}, max={max}", peptide_sequence);
                 anyhow::bail!("Non-finite values found in peptide encoding output.");
             }
             Ok(output)
@@ -367,12 +392,12 @@ pub trait ModelInterface: Send + Sync + ModelClone {
     /// Encode a batch of peptide sequences into a tensor
     fn encode_peptides(
         &self,
-        peptide_sequences: &Vec<&str>,
-        mods: &Vec<&str>,
-        mod_sites: &Vec<&str>,
+        peptide_sequences: &[Arc<[u8]>],
+        mods: &[Arc<[u8]>],
+        mod_sites: &[Arc<[u8]>],
         charges: Option<Vec<i32>>,
         nces: Option<Vec<i32>>,
-        instruments: Option<Vec<&str>,>,
+        instruments: Option<Vec<Option<Arc<[u8]>>>>,
     ) -> Result<Tensor> {
         let len = peptide_sequences.len();
 
@@ -385,14 +410,14 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                     &mod_sites[i],
                     charges.as_ref().map(|v| v[i]),
                     nces.as_ref().map(|v| v[i]),
-                    instruments.as_ref().map(|v| v[i]),
+                    instruments.as_ref().and_then(|v| v[i].as_ref()),
                 )
             })
             .collect::<Result<Vec<_>>>()?;
 
         if tensors.is_empty() {
             return Err(anyhow::anyhow!(
-                "Encoding batch of peptides failed, the resulting tesnor batch is empty."
+                "Encoding batch of peptides failed, the resulting tensor batch is empty."
             ));
         }
 
@@ -402,7 +427,6 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             .max()
             .unwrap_or(0);
 
-        // Consistency check for feature dimension
         let expected_feat_dim = tensors
             .get(0)
             .ok_or_else(|| anyhow::anyhow!("Empty input batch"))?
@@ -415,7 +439,6 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             .map(|t| {
                 let (_, seq_len, feat_dim) = t.shape().dims3()?;
 
-                // Check that all tensors have the same feature dimension
                 if feat_dim != expected_feat_dim {
                     return Err(anyhow::anyhow!(
                         "Inconsistent feature dim: expected {}, got {}",
@@ -425,7 +448,8 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                 }
 
                 if seq_len < max_len {
-                    let pad = Tensor::zeros(&[1, max_len - seq_len, feat_dim], t.dtype(), t.device())?;
+                    let pad =
+                        Tensor::zeros(&[1, max_len - seq_len, feat_dim], t.dtype(), t.device())?;
                     Ok(Tensor::cat(&[&t, &pad], 1)?)
                 } else {
                     Ok(t)
@@ -495,7 +519,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             recalls: vec![],
             accuracies: vec![],
         };
-        
+
         let mut step_idx = 0;
         let mut val_step_idx = 0;
 
@@ -505,10 +529,10 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         };
         let mut opt = candle_nn::AdamW::new(self.get_mut_varmap().all_vars(), params)?;
         let mut lr_scheduler = CosineWithWarmup::new(
-            learning_rate, 
-            warmup_steps, 
-            total_steps, 
-            0.5 // one full cosine cycle
+            learning_rate,
+            warmup_steps,
+            total_steps,
+            0.5, // one full cosine cycle
         );
 
         let mut best_val_loss = f32::INFINITY;
@@ -543,13 +567,15 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                         PropertyType::CCS => {
                             let tol: Vec<f32> = targets.iter().map(|t| t * 0.02).collect();
                             Some(Metrics::accuracy_dynamic(&predictions, &targets, &tol))
-                        }, // is predicted CCS within 2% of target CCS?
+                        } // is predicted CCS within 2% of target CCS?
                         _ => None,
                     };
-                    
+
                     step_metrics.epochs.push(epoch);
                     step_metrics.steps.push(step_idx);
-                    step_metrics.learning_rates.push(lr_scheduler.get_last_lr() as f64);
+                    step_metrics
+                        .learning_rates
+                        .push(lr_scheduler.get_last_lr() as f64);
                     step_metrics.losses.push(loss_val);
                     step_metrics.phases.push(TrainingPhase::Train);
                     step_metrics.accuracies.push(acc);
@@ -583,23 +609,26 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                     .par_chunks(validation_batch_size)
                     .enumerate()
                     .map(|(idx, batch_data)| {
-                        let (input_val, target_val) = self.prepare_batch_inputs(batch_data, &modifications)?;
+                        let (input_val, target_val) =
+                            self.prepare_batch_inputs(batch_data, &modifications)?;
                         let predicted = self.forward(&input_val)?;
                         let val_loss = candle_nn::loss::mse(&predicted, &target_val)?;
                         let loss_val = val_loss.to_vec0::<f32>()?;
-                
+
                         let predictions = predicted.to_vec1::<f32>()?;
                         let targets = target_val.to_vec1::<f32>()?;
-                
+
                         let acc = match self.property_type() {
-                            PropertyType::RT => Some(Metrics::accuracy(&predictions, &targets, 0.5)),
+                            PropertyType::RT => {
+                                Some(Metrics::accuracy(&predictions, &targets, 0.5))
+                            }
                             PropertyType::CCS => {
                                 let tol: Vec<f32> = targets.iter().map(|t| t * 0.02).collect();
                                 Some(Metrics::accuracy_dynamic(&predictions, &targets, &tol))
-                            },
+                            }
                             _ => None,
                         };
-                
+
                         Ok((loss_val, idx, lr_scheduler.get_last_lr(), acc))
                     })
                     .collect::<Result<_>>()?;
@@ -616,7 +645,8 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                 }
                 val_step_idx += val_results.len();
 
-                let val_losses: Vec<f32> = val_results.iter().map(|(loss, _, _, _)| *loss).collect();
+                let val_losses: Vec<f32> =
+                    val_results.iter().map(|(loss, _, _, _)| *loss).collect();
                 let (avg_val_loss, std_val_loss): (f32, f32) = compute_loss_stats(&val_losses);
 
                 epoch_losses.push((
@@ -652,7 +682,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                     let checkpoint_path = format!(
                         "redeem_{}_ckpt_model_epoch_{}.safetensors",
                         self.get_model_arch(),
-                        epoch- 1
+                        epoch - 1
                     );
                     // Check if the prior checkpoint exists, if it does delete it
                     if PathBuf::from(&checkpoint_path).exists() {
@@ -677,7 +707,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                 let checkpoint_path = format!(
                     "redeem_{}_ckpt_model_epoch_{}.safetensors",
                     self.get_model_arch(),
-                    epoch- 1
+                    epoch - 1
                 );
                 // Check if the prior checkpoint exists, if it does delete it
                 if PathBuf::from(&checkpoint_path).exists() {
@@ -871,6 +901,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         todo!()
     }
 
+    /// Perform inference over a batch of peptides.
     fn inference(
         &self,
         inference_data: &Vec<PeptideData>,
@@ -887,45 +918,44 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             inference_data.len(),
             num_batches
         );
-    
+
         let progress = Progress::new(inference_data.len(), "[inference] Batch:");
         let mut result: Vec<Option<PeptideData>> = vec![None; inference_data.len()];
-    
+
         inference_data
             .par_chunks(batch_size)
             .enumerate()
             .map(|(batch_idx, batch_data)| {
                 let start_idx = batch_idx * batch_size;
                 let batch: PeptideBatchData = batch_data.into();
-    
-                let naked_sequences = batch.naked_sequence_strs();
-                let mods = batch.mods_strs();
-                let mod_sites = batch.mod_sites_strs();
-    
+
+                let naked_sequences = &batch.naked_sequence;
+                let mods = &batch.mods;
+                let mod_sites = &batch.mod_sites;
+
                 let charges = if batch.charges.iter().all(|c| c.is_some()) {
                     Some(batch.charges.iter().map(|c| c.unwrap()).collect::<Vec<_>>())
                 } else {
                     None
                 };
-    
+
                 let nces = if batch.nces.iter().all(|n| n.is_some()) {
                     Some(batch.nces.iter().map(|n| n.unwrap()).collect::<Vec<_>>())
                 } else {
                     None
                 };
-    
+
                 let instruments = if batch.instruments.iter().all(|i| i.is_some()) {
-                    let flat: Vec<&str> = batch.instrument_strs().into_iter().map(|opt| opt.unwrap()).collect();
-                    Some(flat)
+                    Some(batch.instruments.clone())
                 } else {
                     None
                 };
-    
+
                 let input_tensor = self
-                    .encode_peptides(&naked_sequences, &mods, &mod_sites, charges, nces, instruments)?
+                    .encode_peptides(naked_sequences, mods, mod_sites, charges, nces, instruments)?
                     .to_device(self.get_device())?;
                 let output = self.forward(&input_tensor)?;
-    
+
                 match self.property_type() {
                     PropertyType::RT | PropertyType::CCS => {
                         let predictions = output.to_vec1()?;
@@ -938,7 +968,9 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                                     PropertyType::RT => {
                                         peptide.retention_time = Some(match rt_norm {
                                             RTNormalization::ZScore(mean, std) => pred * std + mean,
-                                            RTNormalization::MinMax(min, max) => pred * (max - min) + min,
+                                            RTNormalization::MinMax(min, max) => {
+                                                pred * (max - min) + min
+                                            }
                                             RTNormalization::None => pred,
                                         });
                                     }
@@ -962,27 +994,27 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                 result[idx] = Some(peptide);
                 progress.inc();
             });
-    
+
         progress.finish();
         Ok(result.into_iter().flatten().collect())
     }
-    
 
     /// Extract encoded input and target tensor for a batch of peptides.
     fn prepare_batch_inputs(
         &self,
         batch_data: &[PeptideData],
-        _modifications: &HashMap<(String, Option<char>), crate::utils::peptdeep_utils::ModificationMap>,
+        _modifications: &HashMap<
+            (String, Option<char>),
+            crate::utils::peptdeep_utils::ModificationMap,
+        >,
     ) -> Result<(Tensor, Tensor)> {
         use rayon::prelude::*;
 
         let batch: PeptideBatchData = batch_data.into();
 
-        let naked_sequences = batch.naked_sequence_strs();
-
-        let mods = batch.mods_strs();
-
-        let mod_sites = batch.mod_sites_strs();
+        let naked_sequences = &batch.naked_sequence;
+        let mods = &batch.mods;
+        let mod_sites = &batch.mod_sites;
 
         let charges = if batch.charges.iter().all(|c| c.is_some()) {
             Some(batch.charges.iter().map(|c| c.unwrap()).collect::<Vec<_>>())
@@ -997,19 +1029,13 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         };
 
         let instruments = if batch.instruments.iter().all(|i| i.is_some()) {
-            let flat: Vec<&str> = batch
-                .instrument_strs()
-                .into_iter()
-                .map(|opt| opt.unwrap())
-                .collect();
-            Some(flat)
+            Some(batch.instruments.clone())
         } else {
             None
         };
-        
 
         let input_batch = self
-            .encode_peptides(&naked_sequences, &mods, &mod_sites, charges, nces, instruments)?
+            .encode_peptides(naked_sequences, mods, mod_sites, charges, nces, instruments)?
             .to_device(self.get_device())?;
 
         let target_values: Vec<f32> = match self.property_type() {
@@ -1048,7 +1074,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
 
     fn get_mod_element_count(&self) -> usize;
 
-    fn get_mod_to_feature(&self) -> &HashMap<String, Vec<f32>>;
+    fn get_mod_to_feature(&self) -> &HashMap<Arc<[u8]>, Vec<f32>>;
 
     fn get_min_pred_intensity(&self) -> f32;
 
@@ -1106,8 +1132,6 @@ pub trait ModelInterface: Send + Sync + ModelClone {
     }
 }
 
-
-
 /// Parameters for the `predict` method of a `ModelInterface` implementation.
 #[derive(Clone)]
 pub struct Parameters {
diff --git a/crates/redeem-properties/src/models/ms2_bert_model.rs b/crates/redeem-properties/src/models/ms2_bert_model.rs
index 86ac41c..9bef37c 100644
--- a/crates/redeem-properties/src/models/ms2_bert_model.rs
+++ b/crates/redeem-properties/src/models/ms2_bert_model.rs
@@ -1,9 +1,9 @@
 use anyhow::Result;
 use candle_core::{DType, Device, IndexOp, Tensor};
 use candle_nn::{Dropout, Module, VarBuilder, VarMap};
-use std::collections::HashMap;
 use std::fmt;
 use std::path::Path;
+use std::{collections::HashMap, sync::Arc};
 
 use crate::{
     building_blocks::building_blocks::{
@@ -13,7 +13,7 @@ use crate::{
     models::model_interface::{
         create_var_map, load_tensors_from_model, ModelInterface, PropertyType,
     },
-    utils::peptdeep_utils::{load_mod_to_feature, parse_model_constants, ModelConstants},
+    utils::peptdeep_utils::{load_mod_to_feature_arc, parse_model_constants, ModelConstants},
 };
 
 // Constants
@@ -27,7 +27,7 @@ pub struct MS2BertModel {
     var_store: VarBuilder<'static>,
     varmap: VarMap,
     constants: ModelConstants,
-    mod_to_feature: HashMap<String, Vec<f32>>,
+    mod_to_feature: HashMap<Arc<[u8]>, Vec<f32>>,
     fixed_sequence_len: usize,
     // Total number of fragment types of a fragmentation position to predict
     num_frag_types: usize,
@@ -60,8 +60,7 @@ impl ModelInterface for MS2BertModel {
         "ms2_bert"
     }
 
-    fn new_untrained(_device: Device) -> Result<Self>
-    {
+    fn new_untrained(_device: Device) -> Result<Self> {
         unimplemented!("Untrained model creation is not implemented for this architecture.");
     }
 
@@ -88,7 +87,7 @@ impl ModelInterface for MS2BertModel {
         };
 
         // Load the mod_to_feature mapping
-        let mod_to_feature = load_mod_to_feature(&constants)?;
+        let mod_to_feature = load_mod_to_feature_arc(&constants)?;
 
         let dropout = Dropout::new(0.1);
 
@@ -355,7 +354,7 @@ impl ModelInterface for MS2BertModel {
         self.constants.mod_elements.len()
     }
 
-    fn get_mod_to_feature(&self) -> &HashMap<String, Vec<f32>> {
+    fn get_mod_to_feature(&self) -> &HashMap<Arc<[u8]>, Vec<f32>> {
         &self.mod_to_feature
     }
 
@@ -461,7 +460,8 @@ mod tests {
         let constants_path =
             PathBuf::from("data/models/alphapeptdeep/generic/ms2.pth.model_const.yaml");
         let device = Device::Cpu;
-        let model = MS2BertModel::new(model_path, Some(constants_path), 0, 8, 4, true, device).unwrap();
+        let model =
+            MS2BertModel::new(model_path, Some(constants_path), 0, 8, 4, true, device).unwrap();
 
         println!("{:?}", model);
     }
@@ -472,23 +472,25 @@ mod tests {
         let constants_path =
             PathBuf::from("data/models/alphapeptdeep/generic/ms2.pth.model_const.yaml");
         let device = Device::Cpu;
-        let model = MS2BertModel::new(model_path, Some(constants_path), 0, 8, 4, true, device).unwrap();
-
-        let peptide_sequences = "AGHCEWQMKYR";
-        let mods = "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M";
-        let mod_sites = "0;4;8";
+        let model =
+            MS2BertModel::new(model_path, Some(constants_path), 0, 8, 4, true, device).unwrap();
+
+        let seq = Arc::from(b"AGHCEWQMKYR".to_vec().into_boxed_slice());
+        let mods = Arc::from(
+            b"Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M"
+                .to_vec()
+                .into_boxed_slice(),
+        );
+        let mod_sites = Arc::from(b"0;4;8".to_vec().into_boxed_slice());
         let charge = Some(2);
         let nce = Some(20);
-        let instrument = Some("QE");
+        let instrument = Some(Arc::from(b"QE".to_vec().into_boxed_slice()));
 
         let result =
-            model.encode_peptide(&peptide_sequences, mods, mod_sites, charge, nce, instrument);
+            model.encode_peptide(&seq, &mods, &mod_sites, charge, nce, instrument.as_ref());
 
         println!("{:?}", result);
-
-        // assert!(result.is_ok());
-        // let encoded_peptides = result.unwrap();
-        // assert_eq!(encoded_peptides.shape().dims2().unwrap(), (1, 27 + 109 + 1 + 1 + 1));
+        assert!(result.is_ok());
     }
 
     #[test]
@@ -497,17 +499,26 @@ mod tests {
         let constants_path =
             PathBuf::from("data/models/alphapeptdeep/generic/ms2.pth.model_const.yaml");
         let device = Device::Cpu;
-        let model = MS2BertModel::new(model_path, Some(constants_path), 0, 8, 4, true, device).unwrap();
-
-        let peptide_sequences = vec!["AGHCEWQMKYR", "AGHCEWQMKYR"];
-        let mods = vec![
-            "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M",
-            "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M",
-        ];
-        let mod_sites = vec!["0;4;8", "0;4;8"];
+        let model =
+            MS2BertModel::new(model_path, Some(constants_path), 0, 8, 4, true, device).unwrap();
+
+        let seq: Arc<[u8]> = Arc::from(b"AGHCEWQMKYR".to_vec().into_boxed_slice());
+        let mods: Arc<[u8]> = Arc::from(
+            b"Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M"
+                .to_vec()
+                .into_boxed_slice(),
+        );
+        let mod_sites: Arc<[u8]> = Arc::from(b"0;4;8".to_vec().into_boxed_slice());
         let charge = Some(vec![2, 2]);
         let nce = Some(vec![20, 20]);
-        let instrument = Some(vec!["QE", "QE"]);
+        let instrument = vec![
+            Arc::from(b"QE".to_vec().into_boxed_slice()),
+            Arc::from(b"QE".to_vec().into_boxed_slice()),
+        ];
+
+        let peptide_sequences = vec![seq.clone(), seq];
+        let mods = vec![mods.clone(), mods];
+        let mod_sites = vec![mod_sites.clone(), mod_sites];
 
         let input_tensor = model
             .encode_peptides(
@@ -516,14 +527,15 @@ mod tests {
                 &mod_sites,
                 charge,
                 nce,
-                instrument,
+                Some(instrument.into_iter().map(Some).collect()),
             )
             .unwrap();
+
         let output = model.forward(&input_tensor).unwrap();
         println!("{:?}", output);
 
         let prediction: Vec<Vec<Vec<f32>>> = output.to_vec3().unwrap();
-
         println!("{:?}", prediction);
+        assert_eq!(prediction.len(), 2);
     }
 }
diff --git a/crates/redeem-properties/src/models/ms2_model.rs b/crates/redeem-properties/src/models/ms2_model.rs
index 176b7e7..0a63d72 100644
--- a/crates/redeem-properties/src/models/ms2_model.rs
+++ b/crates/redeem-properties/src/models/ms2_model.rs
@@ -1,11 +1,12 @@
-use std::path::Path;
-use candle_core::{Device, Tensor};
-use anyhow::{Result, anyhow};
-use crate::models::model_interface::{ModelInterface,PredictionResult};
+use crate::models::model_interface::{ModelInterface, PredictionResult};
 use crate::models::ms2_bert_model::MS2BertModel;
 use crate::utils::data_handling::PeptideData;
-use std::collections::HashMap;
 use crate::utils::peptdeep_utils::ModificationMap;
+use anyhow::{anyhow, Result};
+use candle_core::{Device, Tensor};
+use std::collections::HashMap;
+use std::path::Path;
+use std::sync::Arc;
 
 // Enum for different types of MS2 models
 pub enum MS2ModelArch {
@@ -24,15 +25,28 @@ pub struct MS2ModelWrapper {
 impl Clone for MS2ModelWrapper {
     fn clone(&self) -> Self {
         MS2ModelWrapper {
-            model: self.model.clone(), 
+            model: self.model.clone(),
         }
     }
 }
 
 impl MS2ModelWrapper {
-    pub fn new<P: AsRef<Path>>(model_path: P, constants_path: P, arch: &str, device: Device) -> Result<Self> {
+    pub fn new<P: AsRef<Path>>(
+        model_path: P,
+        constants_path: P,
+        arch: &str,
+        device: Device,
+    ) -> Result<Self> {
         let model: Box<dyn ModelInterface> = match arch {
-            "ms2_bert" => Box::new(MS2BertModel::new(model_path, Some(constants_path), 0, 8, 4, true, device)?),
+            "ms2_bert" => Box::new(MS2BertModel::new(
+                model_path,
+                Some(constants_path),
+                0,
+                8,
+                4,
+                true,
+                device,
+            )?),
             // Add other cases here as you implement more models
             _ => return Err(anyhow!("Unsupported MS2 model architecture: {}", arch)),
         };
@@ -40,12 +54,40 @@ impl MS2ModelWrapper {
         Ok(Self { model })
     }
 
-    pub fn predict(&self, peptide_sequence: &Vec<&str>, mods: &Vec<&str>, mod_sites: &Vec<&str>, charge: Vec<i32>, nce: Vec<i32>, intsrument: &Vec<&str>) -> Result<PredictionResult> {
-        self.model.predict(peptide_sequence, mods, mod_sites, Some(charge), Some(nce), Some(intsrument))
+    pub fn predict(
+        &self,
+        peptide_sequence: &[Arc<[u8]>],
+        mods: &[Arc<[u8]>],
+        mod_sites: &[Arc<[u8]>],
+        charge: Vec<i32>,
+        nce: Vec<i32>,
+        intsrument: Vec<Option<Arc<[u8]>>>,
+    ) -> Result<PredictionResult> {
+        self.model.predict(
+            peptide_sequence,
+            mods,
+            mod_sites,
+            Some(charge),
+            Some(nce),
+            Some(intsrument),
+        )
     }
 
-    pub fn fine_tune(&mut self, training_data: &Vec<PeptideData>, modifications: HashMap<(String, Option<char>), ModificationMap>, batch_size: usize, learning_rate: f64, epochs: usize) -> Result<()> {
-        self.model.fine_tune(training_data, modifications, batch_size,  learning_rate, epochs)
+    pub fn fine_tune(
+        &mut self,
+        training_data: &Vec<PeptideData>,
+        modifications: HashMap<(String, Option<char>), ModificationMap>,
+        batch_size: usize,
+        learning_rate: f64,
+        epochs: usize,
+    ) -> Result<()> {
+        self.model.fine_tune(
+            training_data,
+            modifications,
+            batch_size,
+            learning_rate,
+            epochs,
+        )
     }
 
     pub fn set_evaluation_mode(&mut self) {
@@ -70,7 +112,12 @@ impl MS2ModelWrapper {
 }
 
 // Public API Function to load a new MS2 model
-pub fn load_ms2_model<P: AsRef<Path>>(model_path: P, constants_path: P, arch: &str, device: Device) -> Result<MS2ModelWrapper> {
+pub fn load_ms2_model<P: AsRef<Path>>(
+    model_path: P,
+    constants_path: P,
+    arch: &str,
+    device: Device,
+) -> Result<MS2ModelWrapper> {
     MS2ModelWrapper::new(model_path, constants_path, arch, device)
 }
 
@@ -86,7 +133,7 @@ pub fn load_ms2_model<P: AsRef<Path>>(model_path: P, constants_path: P, arch: &s
 //     fn peptide_ms2_prediction() {
 //         let model_path = PathBuf::from("data/models/alphapeptdeep/generic/ms2.pth");
 //         let constants_path = PathBuf::from("data/models/alphapeptdeep/generic/ms2.pth.model_const.yaml");
-        
+
 //         assert!(
 //             model_path.exists(),
 //             "\n╔══════════════════════════════════════════════════════════════════╗\n\
@@ -101,7 +148,7 @@ pub fn load_ms2_model<P: AsRef<Path>>(model_path: P, constants_path: P, arch: &s
 //              ╚══════════════════════════════════════════════════════════════════╝\n",
 //             model_path
 //         );
-        
+
 //         assert!(
 //             constants_path.exists(),
 //             "\n╔══════════════════════════════════════════════════════════════════╗\n\
@@ -118,12 +165,12 @@ pub fn load_ms2_model<P: AsRef<Path>>(model_path: P, constants_path: P, arch: &s
 //         );
 
 //         let result = load_ms2_model(&model_path, &constants_path, "ms2_bert", Device::Cpu);
-        
+
 //         assert!(result.is_ok(), "Failed to load model: {:?}", result.err());
 
 //         let mut model = result.unwrap();
 //         // model.print_summary();
-        
+
 //         // Print the model's weights
 //         // model.print_weights();
 
@@ -162,4 +209,4 @@ pub fn load_ms2_model<P: AsRef<Path>>(model_path: P, constants_path: P, arch: &s
 //             },
 //         }
 //     }
-// }
\ No newline at end of file
+// }
diff --git a/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs b/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
index a0f3c84..80434da 100644
--- a/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
+++ b/crates/redeem-properties/src/models/rt_cnn_lstm_model.rs
@@ -3,20 +3,19 @@ use candle_core::{DType, Device, IndexOp, Tensor};
 use candle_nn::{Dropout, Module, VarBuilder, VarMap};
 use std::collections::HashMap;
 use std::path::Path;
-
-
+use std::sync::Arc;
 
 use crate::building_blocks::building_blocks::{
     DecoderLinear, Encoder26aaModCnnLstmAttnSum, MOD_FEATURE_SIZE,
 };
-use crate::models::model_interface::{ModelInterface, PropertyType, load_tensors_from_model, create_var_map};
+use crate::models::model_interface::{
+    create_var_map, load_tensors_from_model, ModelInterface, PropertyType,
+};
 use crate::utils::peptdeep_utils::{
-    load_mod_to_feature,
-    parse_model_constants, ModelConstants,
+    load_mod_to_feature_arc, parse_model_constants, ModelConstants,
 };
 use crate::utils::utils::get_tensor_stats;
 
-
 // Main Model Struct
 
 #[derive(Clone)]
@@ -26,7 +25,7 @@ pub struct RTCNNLSTMModel {
     varmap: VarMap,
     constants: ModelConstants,
     device: Device,
-    mod_to_feature: HashMap<String, Vec<f32>>,
+    mod_to_feature: HashMap<Arc<[u8]>, Vec<f32>>,
     dropout: Dropout,
     rt_encoder: Encoder26aaModCnnLstmAttnSum,
     rt_decoder: DecoderLinear,
@@ -45,11 +44,10 @@ impl ModelInterface for RTCNNLSTMModel {
     }
 
     fn model_arch(&self) -> &'static str {
-        "rt_cnn_lstm"   
+        "rt_cnn_lstm"
     }
 
-    fn new_untrained(_device: Device) -> Result<Self>
-    {
+    fn new_untrained(_device: Device) -> Result<Self> {
         unimplemented!("Untrained model creation is not implemented for this architecture.");
     }
 
@@ -63,9 +61,8 @@ impl ModelInterface for RTCNNLSTMModel {
         _mask_modloss: bool,
         device: Device,
     ) -> Result<Self> {
-
         let tensor_data = load_tensors_from_model(model_path.as_ref(), &device)?;
- 
+
         let mut varmap = candle_nn::VarMap::new();
         create_var_map(&mut varmap, tensor_data, &device)?;
         let var_store = candle_nn::VarBuilder::from_varmap(&varmap, DType::F32, &device);
@@ -76,7 +73,7 @@ impl ModelInterface for RTCNNLSTMModel {
         };
 
         // Load the mod_to_feature mapping
-        let mod_to_feature = load_mod_to_feature(&constants)?;
+        let mod_to_feature = load_mod_to_feature_arc(&constants)?;
 
         // Encoder
         let dropout = Dropout::new(0.1);
@@ -87,21 +84,33 @@ impl ModelInterface for RTCNNLSTMModel {
             128,
             2,
             vec!["rt_encoder.mod_nn.nn.weight"],
-            vec!["rt_encoder.input_cnn.cnn_short.weight", "rt_encoder.input_cnn.cnn_medium.weight", "rt_encoder.input_cnn.cnn_long.weight"],
-            vec!["rt_encoder.input_cnn.cnn_short.bias", "rt_encoder.input_cnn.cnn_medium.bias", "rt_encoder.input_cnn.cnn_long.bias"],
-            "rt_encoder.hidden_nn",
             vec![
-                "rt_encoder.attn_sum.attn.0.weight",
+                "rt_encoder.input_cnn.cnn_short.weight",
+                "rt_encoder.input_cnn.cnn_medium.weight",
+                "rt_encoder.input_cnn.cnn_long.weight",
+            ],
+            vec![
+                "rt_encoder.input_cnn.cnn_short.bias",
+                "rt_encoder.input_cnn.cnn_medium.bias",
+                "rt_encoder.input_cnn.cnn_long.bias",
             ],
-        ).unwrap();
+            "rt_encoder.hidden_nn",
+            vec!["rt_encoder.attn_sum.attn.0.weight"],
+        )
+        .unwrap();
 
         let rt_decoder = DecoderLinear::from_varstore(
             &var_store,
             256,
             1,
-            vec!["rt_decoder.nn.0.weight", "rt_decoder.nn.1.weight", "rt_decoder.nn.2.weight"],
-            vec!["rt_decoder.nn.0.bias", "rt_decoder.nn.2.bias"]
-        ).unwrap();
+            vec![
+                "rt_decoder.nn.0.weight",
+                "rt_decoder.nn.1.weight",
+                "rt_decoder.nn.2.weight",
+            ],
+            vec!["rt_decoder.nn.0.bias", "rt_decoder.nn.2.bias"],
+        )
+        .unwrap();
 
         Ok(Self {
             var_store,
@@ -116,21 +125,20 @@ impl ModelInterface for RTCNNLSTMModel {
         })
     }
 
-
     fn forward(&self, xs: &Tensor) -> Result<Tensor, candle_core::Error> {
         let (_batch_size, _seq_len, _) = xs.shape().dims3()?;
-    
+
         let aa_indices_out = xs.i((.., .., 0))?;
         let (mean, min, max) = get_tensor_stats(&aa_indices_out)?;
         log::debug!("[RTCNNLSTMModel] aa_indices_out stats - min: {min}, max: {max}, mean: {mean}");
         let mod_x_out = xs.i((.., .., 1..1 + MOD_FEATURE_SIZE))?;
-        
+
         let x = self.rt_encoder.forward(&aa_indices_out, &mod_x_out)?;
-        
+
         let x = self.dropout.forward(&x, self.is_training)?;
-        
+
         let x = self.rt_decoder.forward(&x)?;
-        
+
         let result = x.squeeze(1)?;
 
         Ok(result)
@@ -165,12 +173,15 @@ impl ModelInterface for RTCNNLSTMModel {
         self.constants.mod_elements.len()
     }
 
-    fn get_mod_to_feature(&self) -> &HashMap<String, Vec<f32>> {
+    fn get_mod_to_feature(&self) -> &HashMap<Arc<[u8]>, Vec<f32>> {
         &self.mod_to_feature
     }
 
     fn get_min_pred_intensity(&self) -> f32 {
-        unimplemented!("Method not implemented for architecture: {}", self.model_arch())
+        unimplemented!(
+            "Method not implemented for architecture: {}",
+            self.model_arch()
+        )
     }
 
     fn get_mut_varmap(&mut self) -> &mut VarMap {
@@ -180,7 +191,10 @@ impl ModelInterface for RTCNNLSTMModel {
     /// Print a summary of the model's constants.
     fn print_summary(&self) {
         println!("RTModel Summary:");
-        println!("AA Embedding Size: {}", self.constants.aa_embedding_size.unwrap());
+        println!(
+            "AA Embedding Size: {}",
+            self.constants.aa_embedding_size.unwrap()
+        );
         println!("Charge Factor: {:?}", self.constants.charge_factor);
         println!("Instruments: {:?}", self.constants.instruments);
         println!("Max Instrument Num: {}", self.constants.max_instrument_num);
@@ -191,7 +205,7 @@ impl ModelInterface for RTCNNLSTMModel {
     /// Print the model's weights.
     fn print_weights(&self) {
         println!("RTModel Weights:");
-    
+
         // Helper function to print the first 5 values of a tensor
         fn print_first_5_values(tensor: &Tensor, name: &str) {
             let shape = tensor.shape();
@@ -199,7 +213,11 @@ impl ModelInterface for RTCNNLSTMModel {
                 // Extract the first row
                 if let Ok(row) = tensor.i((0, ..)) {
                     match row.to_vec1::<f32>() {
-                        Ok(values) => println!("{} (first 5 values of first row): {:?}", name, &values[..5.min(values.len())]),
+                        Ok(values) => println!(
+                            "{} (first 5 values of first row): {:?}",
+                            name,
+                            &values[..5.min(values.len())]
+                        ),
                         Err(e) => eprintln!("Error printing {}: {:?}", name, e),
                     }
                 } else {
@@ -207,13 +225,16 @@ impl ModelInterface for RTCNNLSTMModel {
                 }
             } else {
                 match tensor.to_vec1::<f32>() {
-                    Ok(values) => println!("{} (first 5 values): {:?}", name, &values[..5.min(values.len())]),
+                    Ok(values) => println!(
+                        "{} (first 5 values): {:?}",
+                        name,
+                        &values[..5.min(values.len())]
+                    ),
                     Err(e) => eprintln!("Error printing {}: {:?}", name, e),
                 }
             }
         }
-        
-    
+
         // Print the first 5 values of each weight tensor
         if let Ok(tensor) = self.var_store.get((2, 103), "rt_encoder.mod_nn.nn.weight") {
             print_first_5_values(&tensor, "rt_encoder.mod_nn.nn.weight");
@@ -233,31 +254,58 @@ impl ModelInterface for RTCNNLSTMModel {
         // if let Ok(tensor) = self.var_store.get((4, 1, 128), "rt_encoder.hidden_nn.rnn_c0") {
         //     print_first_5_values(&tensor, "rt_encoder.hidden_nn.rnn_c0");
         // }
-        if let Ok(tensor) = self.var_store.get((512, 140), "rt_encoder.hidden_nn.rnn.weight_ih_l0") {
+        if let Ok(tensor) = self
+            .var_store
+            .get((512, 140), "rt_encoder.hidden_nn.rnn.weight_ih_l0")
+        {
             print_first_5_values(&tensor, "rt_encoder.hidden_nn.rnn.weight_ih_l0");
         }
-        if let Ok(tensor) = self.var_store.get((512, 128), "rt_encoder.hidden_nn.rnn.weight_hh_l0") {
+        if let Ok(tensor) = self
+            .var_store
+            .get((512, 128), "rt_encoder.hidden_nn.rnn.weight_hh_l0")
+        {
             print_first_5_values(&tensor, "rt_encoder.hidden_nn.rnn.weight_hh_l0");
         }
-        if let Ok(tensor) = self.var_store.get((512, 140), "rt_encoder.hidden_nn.rnn.weight_ih_l0_reverse") {
+        if let Ok(tensor) = self
+            .var_store
+            .get((512, 140), "rt_encoder.hidden_nn.rnn.weight_ih_l0_reverse")
+        {
             print_first_5_values(&tensor, "rt_encoder.hidden_nn.rnn.weight_ih_l0_reverse");
         }
-        if let Ok(tensor) = self.var_store.get((512, 128), "rt_encoder.hidden_nn.rnn.weight_hh_l0_reverse") {
+        if let Ok(tensor) = self
+            .var_store
+            .get((512, 128), "rt_encoder.hidden_nn.rnn.weight_hh_l0_reverse")
+        {
             print_first_5_values(&tensor, "rt_encoder.hidden_nn.rnn.weight_hh_l0_reverse");
         }
-        if let Ok(tensor) = self.var_store.get((512, 256), "rt_encoder.hidden_nn.rnn.weight_ih_l1") {
+        if let Ok(tensor) = self
+            .var_store
+            .get((512, 256), "rt_encoder.hidden_nn.rnn.weight_ih_l1")
+        {
             print_first_5_values(&tensor, "rt_encoder.hidden_nn.rnn.weight_ih_l1");
         }
-        if let Ok(tensor) = self.var_store.get((512, 128), "rt_encoder.hidden_nn.rnn.weight_hh_l1") {
+        if let Ok(tensor) = self
+            .var_store
+            .get((512, 128), "rt_encoder.hidden_nn.rnn.weight_hh_l1")
+        {
             print_first_5_values(&tensor, "rt_encoder.hidden_nn.rnn.weight_hh_l1");
         }
-        if let Ok(tensor) = self.var_store.get((512, 256), "rt_encoder.hidden_nn.rnn.weight_ih_l1_reverse") {
+        if let Ok(tensor) = self
+            .var_store
+            .get((512, 256), "rt_encoder.hidden_nn.rnn.weight_ih_l1_reverse")
+        {
             print_first_5_values(&tensor, "rt_encoder.hidden_nn.rnn.weight_ih_l1_reverse");
         }
-        if let Ok(tensor) = self.var_store.get((512, 128), "rt_encoder.hidden_nn.rnn.weight_hh_l1_reverse") {
+        if let Ok(tensor) = self
+            .var_store
+            .get((512, 128), "rt_encoder.hidden_nn.rnn.weight_hh_l1_reverse")
+        {
             print_first_5_values(&tensor, "rt_encoder.hidden_nn.rnn.weight_hh_l1_reverse");
         }
-        if let Ok(tensor) = self.var_store.get((1, 256), "rt_encoder.attn_sum.attn.0.weight") {
+        if let Ok(tensor) = self
+            .var_store
+            .get((1, 256), "rt_encoder.attn_sum.attn.0.weight")
+        {
             print_first_5_values(&tensor, "rt_encoder.attn_sum.attn.0.weight");
         }
         if let Ok(tensor) = self.var_store.get((256, 256), "rt_decoder.nn.0.weight") {
@@ -270,8 +318,6 @@ impl ModelInterface for RTCNNLSTMModel {
             print_first_5_values(&tensor, "rt_decoder.nn.2.weight");
         }
     }
-
-
 }
 
 // Module Trait Implementation
@@ -282,7 +328,6 @@ impl ModelInterface for RTCNNLSTMModel {
 //     }
 // }
 
-
 #[cfg(test)]
 mod tests {
     use crate::models::model_interface::{ModelInterface, PredictionResult};
@@ -293,7 +338,7 @@ mod tests {
     use super::*;
 
     #[test]
-    fn test_tensor_from_pth(){
+    fn test_tensor_from_pth() {
         let model_path = PathBuf::from("data/models/alphapeptdeep/generic/rt.pth");
         let tensor_data = candle_core::pickle::read_all(model_path).unwrap();
         println!("{:?}", tensor_data);
@@ -319,152 +364,150 @@ mod tests {
         let constants_path =
             PathBuf::from("data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
         let device = Device::Cpu;
-        let model = RTCNNLSTMModel::new(&model_path, Some(&constants_path), 0, 8, 4, true, device).unwrap(); 
-
-        let peptide_sequences = "AGHCEWQMKYR";
-        let mods = "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M";
-        let mod_sites = "0;4;8";
-        // let charge = Some(2);
-        // let nce = Some(20);
-        // let instrument = Some("QE");
+        let model =
+            RTCNNLSTMModel::new(&model_path, Some(&constants_path), 0, 8, 4, true, device).unwrap();
+
+        let seq = Arc::from(b"AGHCEWQMKYR".to_vec().into_boxed_slice());
+        let mods = Arc::from(
+            b"Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M"
+                .to_vec()
+                .into_boxed_slice(),
+        );
+        let mod_sites = Arc::from(b"0;4;8".to_vec().into_boxed_slice());
+        let charge = Some(2);
+        let nce = Some(20);
+        let instrument = Some(Arc::from(b"QE".to_vec().into_boxed_slice()));
 
         let result =
-            model.encode_peptide(&peptide_sequences, mods, mod_sites, None, None, None);
+            model.encode_peptide(&seq, &mods, &mod_sites, charge, nce, instrument.as_ref());
 
         println!("{:?}", result);
-
-        // assert!(result.is_ok());
-        // let encoded_peptides = result.unwrap();
-        // assert_eq!(encoded_peptides.shape().dims2().unwrap(), (1, 27 + 109 + 1 + 1 + 1));
+        assert!(result.is_ok());
     }
 
     #[test]
     fn test_encode_peptides_batch() {
-
         let model_path = PathBuf::from("data/models/alphapeptdeep/generic/rt.pth");
-        let constants_path = PathBuf::from("data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
+        let constants_path =
+            PathBuf::from("data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
         let device = Device::Cpu;
 
-        let model = RTCNNLSTMModel::new(&model_path, Some(&constants_path), 0, 8, 4, true, device.clone()).unwrap();
-
-        // Batched input
-        let peptide_sequences = vec![
-            "ACDEFGHIK",
-            "AGHCEWQMKYR",
+        let model = RTCNNLSTMModel::new(
+            &model_path,
+            Some(&constants_path),
+            0,
+            8,
+            4,
+            true,
+            device.clone(),
+        )
+        .unwrap();
+
+        let naked_sequence = vec![
+            Arc::from(b"ACDEFGHIK".to_vec().into_boxed_slice()),
+            Arc::from(b"AGHCEWQMKYR".to_vec().into_boxed_slice()),
         ];
         let mods = vec![
-            "Carbamidomethyl@C",
-            "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M",
+            Arc::from(b"Carbamidomethyl@C".to_vec().into_boxed_slice()),
+            Arc::from(
+                b"Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M"
+                    .to_vec()
+                    .into_boxed_slice(),
+            ),
         ];
         let mod_sites = vec![
-            "1",
-            "0;4;8",
+            Arc::from(b"1".to_vec().into_boxed_slice()),
+            Arc::from(b"0;4;8".to_vec().into_boxed_slice()),
         ];
 
-        println!("Peptides: {:?}", peptide_sequences);
-        println!("Mods: {:?}", mods);
-        println!("Mod sites: {:?}", mod_sites);
-
-
-        let result = model.encode_peptides(
-            &peptide_sequences,
-            &mods,
-            &mod_sites,
-            None,
-            None,
-            None,
-        );
+        let result = model.encode_peptides(&naked_sequence, &mods, &mod_sites, None, None, None);
 
         assert!(result.is_ok());
         let tensor = result.unwrap();
         println!("Batched encoded tensor shape: {:?}", tensor.shape());
 
         let (batch, seq_len, feat_dim) = tensor.shape().dims3().unwrap();
-        assert_eq!(batch, 2); // two peptides
-        assert!(seq_len >= 11); // padded to max length
-        assert!(feat_dim > 1); // includes aa + mod features
+        assert_eq!(batch, 2);
+        assert!(seq_len >= 11);
+        assert!(feat_dim > 1);
     }
 
-
     #[test]
     fn test_prediction() {
-
         let model_path = PathBuf::from("data/models/alphapeptdeep/generic/rt.pth");
-        let constants_path = PathBuf::from("data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
+        let constants_path =
+            PathBuf::from("data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
         let device = Device::new_cuda(0).unwrap_or(Device::Cpu);
-        let result = RTCNNLSTMModel::new(&model_path, Some(&constants_path), 0, 8, 4, true, device);
-        let mut model = result.unwrap();
+        let mut model =
+            RTCNNLSTMModel::new(&model_path, Some(&constants_path), 0, 8, 4, true, device).unwrap();
 
         let test_peptides = vec![
-            ("AGHCEWQMKYR", "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M", "0;4;8", 0.2945),
+            (
+                "AGHCEWQMKYR",
+                "Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M",
+                "0;4;8",
+                0.2945,
+            ),
             ("QPYAVSELAGHQTSAESWGTGR", "", "", 0.4328955),
             ("GMSVSDLADKLSTDDLNSLIAHAHR", "Oxidation@M", "1", 0.6536107),
-            ("TVQHHVLFTDNMVLICR", "Oxidation@M;Carbamidomethyl@C", "11;15", 0.7811949),
+            (
+                "TVQHHVLFTDNMVLICR",
+                "Oxidation@M;Carbamidomethyl@C",
+                "11;15",
+                0.7811949,
+            ),
             ("EAELDVNEELDKK", "", "", 0.2934583),
             ("YTPVQQGPVGVNVTYGGDPIPK", "", "", 0.5863009),
             ("YYAIDFTLDEIK", "", "", 0.8048359),
             ("VSSLQAEPLPR", "", "", 0.3201348),
-            ("NHAVVCQGCHNAIDPEVQR", "Carbamidomethyl@C;Carbamidomethyl@C", "5;8", 0.1730425),
+            (
+                "NHAVVCQGCHNAIDPEVQR",
+                "Carbamidomethyl@C;Carbamidomethyl@C",
+                "5;8",
+                0.1730425,
+            ),
             ("IPNIYAIGDVVAGPMLAHK", "", "", 0.8220097),
-            ("AELGIPLEEVPPEEINYLTR", "", "", 0.8956433),
-            ("NESTPPSEELELDKWK", "", "", 0.4471560),
-            ("SIQEIQELDKDDESLR", "", "", 0.4157068),
-            ("EMEENFAVEAANYQDTIGR", "Oxidation@M", "1", 0.6388353),
-            ("MDSFDEDLARPSGLLAQER", "Oxidation@M", "0", 0.5593624),
-            ("SLLTEADAGHTEFTDEVYQNESR", "", "", 0.5538696),
-            ("NQDLAPNSAEQASILSLVTK", "", "", 0.7682227),
-            ("GKVEEVELPVEK", "", "", 0.2943246),
-            ("IYVASVHQDLSDDDIK", "", "", 0.3847130),
-            ("IKGDMDISVPK", "", "", 0.2844255),
-            ("IIPVLLEHGLER", "", "", 0.5619017),
-            ("AGYTDKVVIGMDVAASEFFR", "", "", 0.8972052),
-            ("TDYNASVSVPDSSGPER", "", "", 0.3279318),
-            ("DLKPQNLLINTEGAIK", "", "", 0.6046495),
-            ("VAEAIAASFGSFADFK", "", "", 0.8935943),
-            ("AMVSNAQLDNEK", "Oxidation@M", "1", 0.1724159),
-            ("THINIVVIGHVDSGK", "", "", 0.4865058),
-            ("LILPHVDIQLK", "", "", 0.6268850),
-            ("LIAPVAEEEATVPNNK", "", "", 0.4162872),
-            ("FTASAGIQVVGDDLTVTNPK", "", "", 0.7251064),
-            ("HEDLKDMLEFPAQELR", "", "", 0.6529368),
-            ("LLPDFLLER", "", "", 0.7852863),
         ];
 
-        let peptides: Vec<&str> = test_peptides.iter().map(|(pep, _, _, _)| *pep).collect();
-        let mods: Vec<&str> = test_peptides.iter().map(|(_, mod_, _, _)| *mod_).collect();
-        let mod_sites: Vec<&str> = test_peptides.iter().map(|(_, _, sites, _)| *sites).collect();
+        let peptides: Vec<Arc<[u8]>> = test_peptides
+            .iter()
+            .map(|(pep, _, _, _)| Arc::from(pep.as_bytes().to_vec().into_boxed_slice()))
+            .collect();
+        let mods: Vec<Arc<[u8]>> = test_peptides
+            .iter()
+            .map(|(_, mod_, _, _)| Arc::from(mod_.as_bytes().to_vec().into_boxed_slice()))
+            .collect();
+        let mod_sites: Vec<Arc<[u8]>> = test_peptides
+            .iter()
+            .map(|(_, _, sites, _)| Arc::from(sites.as_bytes().to_vec().into_boxed_slice()))
+            .collect();
         let observed_rts: Vec<f32> = test_peptides.iter().map(|(_, _, _, rt)| *rt).collect();
 
         match model.predict(&peptides, &mods, &mod_sites, None, None, None) {
-            Ok(predictions) => {
-                if let PredictionResult::RTResult(rt_preds) = predictions {
-                    let total_error: f32 = rt_preds.iter().zip(observed_rts.iter())
-                        .map(|(pred, obs)| (pred - obs).abs())
-                        .sum();
-
-                    let mut peptides_iter = peptides.iter();
-                    let mut rt_preds_iter = rt_preds.iter();
-                    let mut observed_rts_iter = observed_rts.iter();
-
-                    loop {
-                        match (peptides_iter.next(), rt_preds_iter.next(), observed_rts_iter.next()) {
-                            (Some(pep), Some(pred), Some(obs)) => {
-                                println!("Peptide: {}, Predicted RT: {}, Observed RT: {}", pep, pred, obs);
-                            }
-                            _ => break,
-                        }
-                    }
-
-                    let mean_absolute_error = total_error / rt_preds.len() as f32;
-                    println!("Mean Absolute Error: {:.6}", mean_absolute_error);
-                } else {
-                    println!("Unexpected prediction result type.");
+            Ok(PredictionResult::RTResult(rt_preds)) => {
+                let total_error: f32 = rt_preds
+                    .iter()
+                    .zip(observed_rts.iter())
+                    .map(|(pred, obs)| (pred - obs).abs())
+                    .sum();
+
+                for ((pep_bytes, pred), obs) in peptides
+                    .iter()
+                    .zip(rt_preds.iter())
+                    .zip(observed_rts.iter())
+                {
+                    let pep = std::str::from_utf8(pep_bytes).unwrap_or("");
+                    println!(
+                        "Peptide: {}, Predicted RT: {}, Observed RT: {}",
+                        pep, pred, obs
+                    );
                 }
+
+                let mean_absolute_error = total_error / rt_preds.len() as f32;
+                println!("Mean Absolute Error: {:.6}", mean_absolute_error);
             }
-            Err(e) => {
-                println!("Error during batch prediction: {:?}", e);
-            }
+            Ok(_) => println!("Unexpected prediction result type."),
+            Err(e) => println!("Error during batch prediction: {:?}", e),
         }
     }
-    
 }
diff --git a/crates/redeem-properties/src/models/rt_cnn_transformer_model.rs b/crates/redeem-properties/src/models/rt_cnn_transformer_model.rs
index 0231fe7..3743261 100644
--- a/crates/redeem-properties/src/models/rt_cnn_transformer_model.rs
+++ b/crates/redeem-properties/src/models/rt_cnn_transformer_model.rs
@@ -3,13 +3,14 @@ use candle_core::{DType, Device, IndexOp, Tensor};
 use candle_nn::{Dropout, Module, VarBuilder, VarMap};
 use std::collections::HashMap;
 use std::path::Path;
+use std::sync::Arc;
 
 use crate::building_blocks::building_blocks::{
     DecoderLinear, Encoder26aaModCnnTransformerAttnSum, MOD_FEATURE_SIZE,
 };
 use crate::models::model_interface::{ModelInterface, PropertyType, load_tensors_from_model, create_var_map};
 use crate::utils::peptdeep_utils::{
-    load_mod_to_feature,
+    load_mod_to_feature_arc,
     parse_model_constants, ModelConstants,
 };
 use crate::utils::utils::get_tensor_stats;
@@ -24,7 +25,7 @@ pub struct RTCNNTFModel {
     varmap: VarMap,
     constants: ModelConstants,
     device: Device,
-    mod_to_feature: HashMap<String, Vec<f32>>,
+    mod_to_feature: HashMap<Arc<[u8]>, Vec<f32>>,
     dropout: Dropout,
     rt_encoder: Encoder26aaModCnnTransformerAttnSum,
     rt_decoder: DecoderLinear,
@@ -66,7 +67,7 @@ impl ModelInterface for RTCNNTFModel {
         log::trace!("[RTCNNTFModel] Initializing rt_decoder");
         let rt_decoder = DecoderLinear::new(128, 1, &varbuilder.pp("rt_decoder"))?;
         let constants = ModelConstants::default();
-        let mod_to_feature = load_mod_to_feature(&constants)?;
+        let mod_to_feature = load_mod_to_feature_arc(&constants)?;
 
         Ok(Self {
             var_store: varbuilder,
@@ -101,7 +102,7 @@ impl ModelInterface for RTCNNTFModel {
             None => ModelConstants::default(),
         };
 
-        let mod_to_feature = load_mod_to_feature(&constants)?;
+        let mod_to_feature = load_mod_to_feature_arc(&constants)?;
         let dropout = Dropout::new(0.1);
 
         let rt_encoder = Encoder26aaModCnnTransformerAttnSum::from_varstore(
@@ -195,7 +196,7 @@ impl ModelInterface for RTCNNTFModel {
         self.constants.mod_elements.len()
     }
 
-    fn get_mod_to_feature(&self) -> &HashMap<String, Vec<f32>> {
+    fn get_mod_to_feature(&self) -> &HashMap<Arc<[u8]>, Vec<f32>> {
         &self.mod_to_feature
     }
 
diff --git a/crates/redeem-properties/src/models/rt_model.rs b/crates/redeem-properties/src/models/rt_model.rs
index c41f056..0044252 100644
--- a/crates/redeem-properties/src/models/rt_model.rs
+++ b/crates/redeem-properties/src/models/rt_model.rs
@@ -1,22 +1,23 @@
 // rt_model.rs
 
-use std::path::Path;
-use std::ops::Deref;
-use candle_core::{Device, Tensor};
-use anyhow::{Result, anyhow};
-use candle_nn::VarMap;
-use crate::models::model_interface::{ModelInterface,PredictionResult};
+use crate::models::model_interface::{ModelInterface, PredictionResult};
 use crate::models::rt_cnn_lstm_model::RTCNNLSTMModel;
 use crate::models::rt_cnn_transformer_model::RTCNNTFModel;
 use crate::utils::data_handling::{PeptideData, RTNormalization};
+use crate::utils::peptdeep_utils::ModificationMap;
 use crate::utils::stats::TrainingStepMetrics;
+use anyhow::{anyhow, Result};
+use candle_core::{Device, Tensor};
+use candle_nn::VarMap;
 use std::collections::HashMap;
-use crate::utils::peptdeep_utils::ModificationMap;
+use std::ops::Deref;
+use std::path::Path;
+use std::sync::Arc;
 
 // Enum for different types of retention time models
 pub enum RTModelArch {
     RTCNNLSTM,
-    RTCNNTF
+    RTCNNTF,
 }
 
 // Constants for different types of retention time models
@@ -35,32 +36,97 @@ impl Clone for RTModelWrapper {
     }
 }
 
-
 impl RTModelWrapper {
-    pub fn new<P: AsRef<Path>>(model_path: P, constants_path: Option<P>, arch: &str, device: Device) -> Result<Self> {
+    pub fn new<P: AsRef<Path>>(
+        model_path: P,
+        constants_path: Option<P>,
+        arch: &str,
+        device: Device,
+    ) -> Result<Self> {
         let model: Box<dyn ModelInterface> = match arch {
-            "rt_cnn_lstm" => Box::new(RTCNNLSTMModel::new(model_path, constants_path, 0, 8, 4, true, device)?),
-            "rt_cnn_tf" => Box::new(RTCNNTFModel::new(model_path, constants_path, 0, 8, 4, true, device)?),
+            "rt_cnn_lstm" => Box::new(RTCNNLSTMModel::new(
+                model_path,
+                constants_path,
+                0,
+                8,
+                4,
+                true,
+                device,
+            )?),
+            "rt_cnn_tf" => Box::new(RTCNNTFModel::new(
+                model_path,
+                constants_path,
+                0,
+                8,
+                4,
+                true,
+                device,
+            )?),
             _ => return Err(anyhow!("Unsupported RT model architecture: {}", arch)),
         };
 
         Ok(Self { model })
     }
 
-    pub fn predict(&self, peptide_sequence: &Vec<&str>, mods: &Vec<&str>, mod_sites: &Vec<&str>) -> Result<PredictionResult> {
-        self.model.predict(peptide_sequence, mods, mod_sites, None, None, None)
+    pub fn predict(
+        &self,
+        peptide_sequence: &[Arc<[u8]>],
+        mods: &[Arc<[u8]>],
+        mod_sites: &[Arc<[u8]>],
+    ) -> Result<PredictionResult> {
+        self.model
+            .predict(peptide_sequence, mods, mod_sites, None, None, None)
     }
 
-    pub fn train(&mut self, training_data: &Vec<PeptideData>, val_data: Option<&Vec<PeptideData>>, modifications: HashMap<(String, Option<char>), ModificationMap>, batch_size: usize, val_batch_size: usize, learning_rate: f64, epochs: usize, early_stopping_patience: usize) -> Result<TrainingStepMetrics> {
-        self.model.train(training_data, val_data, modifications, batch_size, val_batch_size, learning_rate, epochs, early_stopping_patience)
+    pub fn train(
+        &mut self,
+        training_data: &Vec<PeptideData>,
+        val_data: Option<&Vec<PeptideData>>,
+        modifications: HashMap<(String, Option<char>), ModificationMap>,
+        batch_size: usize,
+        val_batch_size: usize,
+        learning_rate: f64,
+        epochs: usize,
+        early_stopping_patience: usize,
+    ) -> Result<TrainingStepMetrics> {
+        self.model.train(
+            training_data,
+            val_data,
+            modifications,
+            batch_size,
+            val_batch_size,
+            learning_rate,
+            epochs,
+            early_stopping_patience,
+        )
     }
 
-    pub fn fine_tune(&mut self, training_data: &Vec<PeptideData>, modifications: HashMap<(String, Option<char>), ModificationMap>, batch_size:usize, learning_rate: f64, epochs: usize) -> Result<()> {
-        self.model.fine_tune(training_data, modifications, batch_size, learning_rate, epochs)
+    pub fn fine_tune(
+        &mut self,
+        training_data: &Vec<PeptideData>,
+        modifications: HashMap<(String, Option<char>), ModificationMap>,
+        batch_size: usize,
+        learning_rate: f64,
+        epochs: usize,
+    ) -> Result<()> {
+        self.model.fine_tune(
+            training_data,
+            modifications,
+            batch_size,
+            learning_rate,
+            epochs,
+        )
     }
 
-    pub fn inference(&mut self, inference_data: &Vec<PeptideData>, batch_size: usize, modifications: HashMap<(String, Option<char>), ModificationMap>, rt_norm_params: RTNormalization,) -> Result<Vec<PeptideData>> {
-        self.model.inference(inference_data, batch_size, modifications, rt_norm_params)
+    pub fn inference(
+        &mut self,
+        inference_data: &Vec<PeptideData>,
+        batch_size: usize,
+        modifications: HashMap<(String, Option<char>), ModificationMap>,
+        rt_norm_params: RTNormalization,
+    ) -> Result<Vec<PeptideData>> {
+        self.model
+            .inference(inference_data, batch_size, modifications, rt_norm_params)
     }
 
     pub fn set_evaluation_mode(&mut self) {
@@ -85,7 +151,12 @@ impl RTModelWrapper {
 }
 
 // Public API Function to load a new RT model
-pub fn load_retention_time_model<P: AsRef<Path>>(model_path: P, constants_path: Option<P>, arch: &str, device: Device) -> Result<RTModelWrapper> {
+pub fn load_retention_time_model<P: AsRef<Path>>(
+    model_path: P,
+    constants_path: Option<P>,
+    arch: &str,
+    device: Device,
+) -> Result<RTModelWrapper> {
     RTModelWrapper::new(model_path, constants_path, arch, device)
 }
 
@@ -101,7 +172,7 @@ pub fn load_retention_time_model<P: AsRef<Path>>(model_path: P, constants_path:
 //         let model_path = PathBuf::from("data/models/alphapeptdeep/generic/rt.pth");
 //         // let model_path = PathBuf::from("data/models/alphapeptdeep/generic/rt_resaved_model.pth");
 //         let constants_path = PathBuf::from("data/models/alphapeptdeep/generic/rt.pth.model_const.yaml");
-        
+
 //         assert!(
 //             model_path.exists(),
 //             "\n╔══════════════════════════════════════════════════════════════════╗\n\
@@ -116,7 +187,7 @@ pub fn load_retention_time_model<P: AsRef<Path>>(model_path: P, constants_path:
 //              ╚══════════════════════════════════════════════════════════════════╝\n",
 //             model_path
 //         );
-        
+
 //         assert!(
 //             constants_path.exists(),
 //             "\n╔══════════════════════════════════════════════════════════════════╗\n\
@@ -133,12 +204,12 @@ pub fn load_retention_time_model<P: AsRef<Path>>(model_path: P, constants_path:
 //         );
 
 //         let result = load_retention_time_model(&model_path, &constants_path, "rt_cnn_lstm", Device::Cpu);
-        
+
 //         assert!(result.is_ok(), "Failed to load model: {:?}", result.err());
 
 //         let mut model = result.unwrap();
 //         model.print_summary();
-        
+
 //         // Print the model's weights
 //         model.print_weights();
 
@@ -171,4 +242,4 @@ pub fn load_retention_time_model<P: AsRef<Path>>(model_path: P, constants_path:
 //             },
 //         }
 //     }
-// }
\ No newline at end of file
+// }
diff --git a/crates/redeem-properties/src/utils/peptdeep_utils.rs b/crates/redeem-properties/src/utils/peptdeep_utils.rs
index dfdfcc5..6a4757e 100644
--- a/crates/redeem-properties/src/utils/peptdeep_utils.rs
+++ b/crates/redeem-properties/src/utils/peptdeep_utils.rs
@@ -4,6 +4,7 @@ use std::ops::Index;
 use std::path::PathBuf;
 use std::io;
 use std::fs;
+use std::sync::Arc;
 use log::info;
 use csv::ReaderBuilder;
 use reqwest;
@@ -214,6 +215,33 @@ pub fn load_mod_to_feature(constants: &ModelConstants) -> Result<HashMap<String,
     Ok(mod_to_feature)
 }
 
+pub fn load_mod_to_feature_arc(
+    constants: &ModelConstants,
+) -> Result<HashMap<Arc<[u8]>, Vec<f32>>, Error> {
+    let path = ensure_mod_tsv_exists()?;
+    let mut rdr = ReaderBuilder::new()
+        .delimiter(b'\t')
+        .from_path(path)?;
+
+    let mod_elem_to_idx: HashMap<String, usize> = constants
+        .mod_elements
+        .iter()
+        .enumerate()
+        .map(|(i, elem)| (elem.clone(), i))
+        .collect();
+
+    let mod_feature_size = constants.mod_elements.len();
+    let mut mod_to_feature = HashMap::new();
+
+    for result in rdr.deserialize() {
+        let record: ModFeature = result?;
+        let feature_vector = parse_mod_formula(&record.composition, &mod_elem_to_idx, mod_feature_size);
+        mod_to_feature.insert(Arc::from(record.mod_name.as_bytes()), feature_vector);
+    }
+
+    Ok(mod_to_feature)
+}
+
 
 #[derive(Debug, Clone)]
 pub struct ModificationMap {

From 905c80ac8eb81ef97600700854103f6c99114092 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Tue, 13 May 2025 13:04:55 -0400
Subject: [PATCH 47/75] refactor: Improve error handling in redeem-cli crate

---
 crates/redeem-cli/src/main.rs                 | 29 +++++++++++++++----
 .../src/properties/train/trainer.rs           |  2 +-
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/crates/redeem-cli/src/main.rs b/crates/redeem-cli/src/main.rs
index d9ea4dd..6ce3d47 100644
--- a/crates/redeem-cli/src/main.rs
+++ b/crates/redeem-cli/src/main.rs
@@ -164,21 +164,38 @@ fn handle_properties(matches: &ArgMatches) -> Result<()> {
         Some(("train", train_matches)) => {
             let config_path: &PathBuf = train_matches.get_one("config").unwrap();
             log::info!("[ReDeeM::Properties] Training from config: {:?}", config_path);
-            let params: PropertyTrainConfig = PropertyTrainConfig::from_arguments(config_path, train_matches)?;
-            let _ = trainer::run_training(&params);
-            Ok(())
+
+            let params: PropertyTrainConfig =
+                PropertyTrainConfig::from_arguments(config_path, train_matches)?;
+
+            match trainer::run_training(&params) {
+                Ok(_) => Ok(()),
+                Err(e) => {
+                    log::error!("Training failed: {:#}", e);
+                    std::process::exit(1)
+                }
+            }
         },
         Some(("inference", inference_matches)) => {
             let config_path: &PathBuf = inference_matches.get_one("config").unwrap();
             log::info!("[ReDeeM::Properties] Inference using config: {:?}", config_path);
-            let params: PropertyInferenceConfig = PropertyInferenceConfig::from_arguments(config_path, inference_matches)?;
-            let _ = inference:: run_inference(&params);
-            Ok(())
+
+            let params: PropertyInferenceConfig =
+                PropertyInferenceConfig::from_arguments(config_path, inference_matches)?;
+
+            match inference::run_inference(&params) {
+                Ok(_) => Ok(()),
+                Err(e) => {
+                    log::error!("Inference failed: {:#}", e);
+                    std::process::exit(1)
+                }
+            }
         }
         _ => unreachable!(),
     }
 }
 
+
 fn handle_classifiers(matches: &ArgMatches) -> Result<()> {
     match matches.subcommand() {
         Some(("rescore", rescore_matches)) => {
diff --git a/crates/redeem-cli/src/properties/train/trainer.rs b/crates/redeem-cli/src/properties/train/trainer.rs
index 2c6c657..bbbec1c 100644
--- a/crates/redeem-cli/src/properties/train/trainer.rs
+++ b/crates/redeem-cli/src/properties/train/trainer.rs
@@ -144,7 +144,7 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
         config.learning_rate as f64,
         config.epochs,
         config.early_stopping_patience,
-    )?;
+    ).with_context(|| "Training failed: an error occurred during the model training process")?;
     log::info!("Training completed in {:?}", start_time.elapsed());
     model.save(&config.output_file)?;
     log::info!("Model saved to: {}", config.output_file);

From 9e6d8c3ea1ada8180ce99290ba67d849d9ac7d6a Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Tue, 13 May 2025 21:06:30 -0400
Subject: [PATCH 48/75] refactor: Optimize contiguous operations in
 building_blocks.rs

---
 .../src/building_blocks/building_blocks.rs           | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/crates/redeem-properties/src/building_blocks/building_blocks.rs b/crates/redeem-properties/src/building_blocks/building_blocks.rs
index 132d067..2e758fd 100644
--- a/crates/redeem-properties/src/building_blocks/building_blocks.rs
+++ b/crates/redeem-properties/src/building_blocks/building_blocks.rs
@@ -1056,11 +1056,11 @@ impl Encoder26aaModCnnTransformerAttnSum {
         }
 
         let x = self.input_cnn.forward(&x)?;
-
+        let x = x.contiguous()?;
         let x = self.proj_cnn_to_transformer.forward(&x)?;
-
+        let x = x.contiguous()?;
         let x = self.input_transformer.forward(&x)?;
-
+        let x = x.contiguous()?;
         let x = self.attn_sum.forward(&x)?;
 
         Ok(x)
@@ -1177,11 +1177,11 @@ impl Encoder26aaModChargeCnnTransformerAttnSum {
         log::trace!("[Encoder26aaModChargeCnnTransformerAttnSum] one-hot output stats - min: {min}, max: {max}, mean: {mean}");
 
         let x = self.input_cnn.forward(&x)?;
-
+        let x = x.contiguous()?;
         let x = self.proj_cnn_to_transformer.forward(&x)?;
-
+        let x = x.contiguous()?;
         let x = self.input_transformer.forward(&x)?;
-
+        let x = x.contiguous()?;
         let x = self.attn_sum.forward(&x)?;
 
         Ok(x)

From a55ae3feabb83c8ff59b1cc3f296ef15c03d6a9f Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Tue, 13 May 2025 23:56:15 -0400
Subject: [PATCH 49/75] refactor: Update rank feature based on new classifier
 scores

---
 .../redeem-classifiers/src/data_handling.rs   | 102 ++++++++++++++++--
 crates/redeem-classifiers/src/psm_scorer.rs   |  13 ++-
 2 files changed, 104 insertions(+), 11 deletions(-)

diff --git a/crates/redeem-classifiers/src/data_handling.rs b/crates/redeem-classifiers/src/data_handling.rs
index 25d9067..aea785e 100644
--- a/crates/redeem-classifiers/src/data_handling.rs
+++ b/crates/redeem-classifiers/src/data_handling.rs
@@ -1,3 +1,6 @@
+use std::collections::HashMap;
+use std::sync::Arc;
+
 use ndarray::{Array1, Array2, ArrayView2, Axis};
 use rand::rngs::StdRng;
 use rand::seq::SliceRandom;
@@ -5,6 +8,16 @@ use rand::{thread_rng, SeedableRng};
 
 use crate::stats::tdc;
 
+#[derive(Debug, Clone)]
+pub struct PsmMetadata {
+    /// Spectrum id
+    pub spec_id: Vec<String>,
+    /// File identifier
+    pub file_id: Vec<usize>,
+    /// Feature names
+    pub feature_names: Vec<String>,
+}
+
 #[derive(Debug, Clone)]
 pub struct Experiment {
     pub x: Array2<f32>,
@@ -13,10 +26,11 @@ pub struct Experiment {
     pub is_top_peak: Array1<bool>,
     pub tg_num_id: Array1<i32>,
     pub classifier_score: Array1<f32>,
+    pub psm_metadata: PsmMetadata,
 }
 
 impl Experiment {
-    pub fn new(x: Array2<f32>, y: Array1<i32>) -> Self {
+    pub fn new(x: Array2<f32>, y: Array1<i32>, psm_metadata: PsmMetadata) -> Self {
         let n_samples = x.nrows();
         Experiment {
             x,
@@ -25,6 +39,7 @@ impl Experiment {
             is_top_peak: Array1::from_elem(n_samples, false),
             tg_num_id: Array1::from_elem(n_samples, 0),
             classifier_score: Array1::from_elem(n_samples, 0.0),
+            psm_metadata,
         }
     }
 
@@ -71,6 +86,45 @@ impl Experiment {
         new_labels
     }
 
+    /// Update the "rank" feature column based on new classifier scores.
+    ///
+    /// This re-ranks all PSMs per spectrum (grouped by file_id and spec_id),
+    /// and sets the rank column in `self.x` accordingly (1 = best).
+    ///
+    /// # Arguments
+    /// * `scores` - The current classifier scores (same length as rows in `x`)
+    /// * `metadata` - PSM metadata with file_id and spec_id for grouping
+    pub fn update_rank_feature(&mut self, scores: &Array1<f32>, metadata: &PsmMetadata) {
+        // 1. Locate the "rank" feature index
+        let Some(rank_feature_idx) = metadata
+            .feature_names
+            .iter()
+            .position(|name| name == "rank")
+        else {
+            log::warn!("No 'rank' feature found in feature_names — skipping rank update.");
+            return;
+        };
+
+        // 2. Group PSMs by (file_id, spec_id)
+        let mut spectrum_groups: HashMap<(usize, &str), Vec<(usize, f32)>> = HashMap::new();
+        for i in 0..self.x.nrows() {
+            spectrum_groups
+                .entry((metadata.file_id[i], metadata.spec_id[i].as_str()))
+                .or_default()
+                .push((i, scores[i]));
+        }
+
+        // 3. For each group, sort by score descending and assign new rank
+        for group in spectrum_groups.values_mut() {
+            group.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
+            for (rank, (row_idx, _)) in group.iter().enumerate() {
+                self.x[[*row_idx, rank_feature_idx]] = (rank + 1) as f32;
+            }
+        }
+
+        log::debug!("Updated rank feature for {} spectrum groups.", spectrum_groups.len());
+    }
+
     pub fn get_top_test_peaks(&self) -> Experiment {
         let mask = &self.is_train.mapv(|x| !x) & &self.is_top_peak;
         self.filter(&mask)
@@ -103,16 +157,50 @@ impl Experiment {
         self.x.clone()
     }
 
+    /// Filter the experiment by applying a boolean mask to all row-aligned fields.
+    ///
+    /// This includes:
+    /// - Feature matrix `x`
+    /// - Labels `y`
+    /// - Training/test flags `is_train`
+    /// - Top peak flags `is_top_peak`
+    /// - Target group identifiers `tg_num_id`
+    /// - Classifier scores `classifier_score`
+    /// - PSM metadata: `spec_id`, `file_id` (feature names are retained as-is)
+    ///
+    /// # Arguments
+    ///
+    /// * `mask` - A boolean mask (`Array1<bool>`) of the same length as the number of PSMs (rows in `x`)
+    ///
+    /// # Returns
+    ///
+    /// A new `Experiment` instance with only rows where `mask[i] == true`
     pub fn filter(&self, mask: &Array1<bool>) -> Experiment {
+        let selected_indices: Vec<usize> = mask
+            .iter()
+            .enumerate()
+            .filter_map(|(i, &m)| if m { Some(i) } else { None })
+            .collect();
+
+        fn filter_vec<T: Clone>(v: &Vec<T>, indices: &[usize]) -> Vec<T> {
+            indices.iter().map(|&i| v[i].clone()).collect()
+        }
+
         Experiment {
-            x: self.x.select(Axis(0), &mask.iter().enumerate().filter_map(|(i, &m)| if m { Some(i) } else { None }).collect::<Vec<_>>()),
-            y: self.y.select(Axis(0), &mask.iter().enumerate().filter_map(|(i, &m)| if m { Some(i) } else { None }).collect::<Vec<_>>()),
-            is_train: self.is_train.select(Axis(0), &mask.iter().enumerate().filter_map(|(i, &m)| if m { Some(i) } else { None }).collect::<Vec<_>>()),
-            is_top_peak: self.is_top_peak.select(Axis(0), &mask.iter().enumerate().filter_map(|(i, &m)| if m { Some(i) } else { None }).collect::<Vec<_>>()),
-            tg_num_id: self.tg_num_id.select(Axis(0), &mask.iter().enumerate().filter_map(|(i, &m)| if m { Some(i) } else { None }).collect::<Vec<_>>()),
-            classifier_score: self.classifier_score.select(Axis(0), &mask.iter().enumerate().filter_map(|(i, &m)| if m { Some(i) } else { None }).collect::<Vec<_>>()),
+            x: self.x.select(Axis(0), &selected_indices),
+            y: self.y.select(Axis(0), &selected_indices),
+            is_train: self.is_train.select(Axis(0), &selected_indices),
+            is_top_peak: self.is_top_peak.select(Axis(0), &selected_indices),
+            tg_num_id: self.tg_num_id.select(Axis(0), &selected_indices),
+            classifier_score: self.classifier_score.select(Axis(0), &selected_indices),
+            psm_metadata: PsmMetadata {
+                spec_id: filter_vec(&self.psm_metadata.spec_id, &selected_indices),
+                file_id: filter_vec(&self.psm_metadata.file_id, &selected_indices),
+                feature_names: self.psm_metadata.feature_names.clone(), // not row-aligned
+            },
         }
     }
+    
 
     pub fn split_for_xval(&mut self, fraction: f32, is_test: bool) {
         let mut rng = thread_rng();
diff --git a/crates/redeem-classifiers/src/psm_scorer.rs b/crates/redeem-classifiers/src/psm_scorer.rs
index 82475cb..f4bd9ba 100644
--- a/crates/redeem-classifiers/src/psm_scorer.rs
+++ b/crates/redeem-classifiers/src/psm_scorer.rs
@@ -5,7 +5,7 @@ use rand::seq::SliceRandom;
 use rand::thread_rng;
 use serde::{Deserialize, Serialize};
 
-use crate::data_handling::Experiment;
+use crate::data_handling::{Experiment, PsmMetadata};
 
 use crate::models::utils::{ModelParams, ModelType};
 #[cfg(feature = "xgboost")]
@@ -321,9 +321,9 @@ impl SemiSupervisedLearner {
     /// # Returns
     ///
     /// The predictions for the input features
-    pub fn fit(&mut self, x: Array2<f32>, y: Array1<i32>) -> Array1<f32> {
+    pub fn fit(&mut self, x: Array2<f32>, y: Array1<i32>, psm_metadata: PsmMetadata) -> Array1<f32> {
 
-        let mut experiment = Experiment::new(x.clone(), y.clone());
+        let mut experiment = Experiment::new(x.clone(), y.clone(), psm_metadata.clone());
 
         experiment.log_input_data_summary();
 
@@ -373,11 +373,14 @@ impl SemiSupervisedLearner {
 
             new_labels = experiment.update_labels(&all_predictions, self.train_fdr, best_desc);
             experiment.y = new_labels;
+
+            experiment.update_rank_feature(&all_predictions, &experiment.psm_metadata.clone());
+
         }
 
         // Final prediction on the entire dataset
         log::info!("Final prediction on the entire dataset");
-        let experiment = Experiment::new(x, y);
+        let experiment = Experiment::new(x, y, psm_metadata);
 
         // self.model
         //     .fit(&experiment.x, &experiment.y.to_vec(), None, None);
@@ -453,6 +456,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg(feature = "xgboost")]
     fn test_xgb_semi_supervised_learner() {
         // Load the test data from the TSV files
         let x = read_features_tsv("/home/singjc/Documents/github/sage_bruker/20241115_single_file_redeem/sage_scores_for_testing.csv").unwrap();
@@ -485,6 +489,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg(feature = "linfa")]
     fn test_svm_semi_supervised_learner() {
         // Load the test data from the TSV files
         let x = read_features_tsv("/home/singjc/Documents/github/sage_bruker/20241115_single_file_redeem/sage_scores_for_testing.csv").unwrap();

From 2c7e25e6126b2d8f78fdc426dafd98c6f3f32039 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Wed, 14 May 2025 10:08:31 -0400
Subject: [PATCH 50/75] refactor: Update examples in classifiers crate

---
 .../examples/gbdt_semi_supervised_learning.rs | 104 +++++++-----
 .../examples/svm_semi_supervised_learning.rs  | 153 ++++++++++-------
 .../examples/xgb_semi_supervised_learning.rs  | 154 ++++++++++--------
 3 files changed, 247 insertions(+), 164 deletions(-)

diff --git a/crates/redeem-classifiers/examples/gbdt_semi_supervised_learning.rs b/crates/redeem-classifiers/examples/gbdt_semi_supervised_learning.rs
index d574709..d53f165 100644
--- a/crates/redeem-classifiers/examples/gbdt_semi_supervised_learning.rs
+++ b/crates/redeem-classifiers/examples/gbdt_semi_supervised_learning.rs
@@ -5,57 +5,84 @@ use ndarray::{Array1, Array2};
 use std::error::Error;
 use std::fs::File;
 use std::io::Write;
+use std::io::BufReader;
 
+use redeem_classifiers::data_handling::PsmMetadata;
 use redeem_classifiers::psm_scorer::SemiSupervisedLearner;
 use redeem_classifiers::models::utils::ModelType;
 use redeem_classifiers::report::{report::{Report, ReportSection}, plots::{plot_score_histogram, plot_pp}};
 
 
-fn read_features_tsv(path: &str) -> Result<Array2<f32>, Box<dyn Error>> {
+/// Load a test PSM CSV file into feature matrix, labels, and metadata.
+///
+/// # Arguments
+/// * `path` - Path to the CSV file
+///
+/// # Returns
+/// A tuple of (`x`, `y`, `PsmMetadata`)
+pub fn load_test_psm_csv(path: &str) -> Result<(Array2<f32>, Array1<i32>, PsmMetadata)> {
+    let file = File::open(path)?;
     let mut reader = ReaderBuilder::new()
-        .has_headers(false)
-        .delimiter(b',')
-        .from_path(path)?;
-
-    let mut data = Vec::new();
+        .has_headers(true)
+        .from_reader(BufReader::new(file));
+
+    let headers = reader
+        .headers()?
+        .iter()
+        .map(|h| h.to_string())
+        .collect::<Vec<_>>();
+
+    // Find indices
+    let file_id_idx = headers.iter().position(|h| h == "file_id").unwrap();
+    let spec_id_idx = headers.iter().position(|h| h == "spec_id").unwrap();
+    let label_idx = headers.iter().position(|h| h == "label").unwrap();
+
+    // Everything else is a feature
+    let feature_indices: Vec<usize> = (0..headers.len())
+        .filter(|&i| i != file_id_idx && i != spec_id_idx && i != label_idx)
+        .collect();
+
+    let feature_names = feature_indices
+        .iter()
+        .map(|&i| headers[i].clone())
+        .collect::<Vec<_>>();
+
+    let mut file_ids = Vec::new();
+    let mut spec_ids = Vec::new();
+    let mut labels = Vec::new();
+    let mut features = Vec::new();
 
     for result in reader.records() {
         let record = result?;
-        let row: Vec<f32> = record
+
+        file_ids.push(record[file_id_idx].parse::<usize>()?);
+        spec_ids.push(record[spec_id_idx].to_string());
+        labels.push(record[label_idx].parse::<i32>()?);
+
+        let row = feature_indices
             .iter()
-            .map(|field| field.parse::<f32>())
-            .collect::<Result<_, _>>()?;
-        data.push(row);
+            .map(|&i| record[i].parse::<f32>().unwrap_or(f32::NAN))
+            .collect::<Vec<f32>>();
+
+        features.extend(row);
     }
 
-    let n_samples = data.len();
-    let n_features = data[0].len();
+    let n_rows = labels.len();
+    let n_cols = feature_indices.len();
 
-    Array2::from_shape_vec(
-        (n_samples, n_features),
-        data.into_iter().flatten().collect(),
-    )
-    .map_err(|e| e.into())
-}
+    let x = Array2::from_shape_vec((n_rows, n_cols), features)?;
+    let y = Array1::from_vec(labels);
 
-fn read_labels_tsv(path: &str) -> Result<Array1<i32>, Box<dyn Error>> {
-    let mut reader = ReaderBuilder::new()
-        .has_headers(false)
-        .delimiter(b'\t')
-        .from_path(path)?;
-
-    let labels: Vec<i32> = reader
-        .records()
-        .map(|r| {
-            let record = r?;
-            let value = record.get(0).ok_or_else(|| "Empty row".to_string())?;
-            value.parse::<i32>().map_err(|e| e.into())
-        })
-        .collect::<Result<_, Box<dyn Error>>>()?;
-
-    Ok(Array1::from_vec(labels))
+    let metadata = PsmMetadata {
+        file_id: file_ids,
+        spec_id: spec_ids,
+        feature_names,
+    };
+
+    Ok((x, y, metadata))
 }
 
+
 fn save_predictions_to_csv(
     predictions: &Array1<f32>,
     file_path: &str,
@@ -71,12 +98,7 @@ fn save_predictions_to_csv(
 
 fn main() -> Result<()> {
     env_logger::init();
-    // Load the test data from the TSV files
-    let x = read_features_tsv("/home/singjc/Documents/github/sage_bruker/20241115_single_file_redeem/sage_scores_for_testing.csv").unwrap();
-    // Select first 10 columns of data
-    let x = x.slice(ndarray::s![.., ..10]).to_owned();
-
-    let y = read_labels_tsv("/home/singjc/Documents/github/sage_bruker/20241115_single_file_redeem/sage_labels_for_testing.csv").unwrap();
+    let (x, y, metadata) = load_test_psm_csv("/home/singjc/Documents/github/sage_bruker/20241115_single_file_redeem/sage_scores_with_metadata_for_testing_redeem.csv")?;
 
     println!("Loaded features shape: {:?}", x.shape());
     println!("Loaded labels shape: {:?}", y.shape());
@@ -97,7 +119,7 @@ fn main() -> Result<()> {
         3,
         Some((0.15, 1.0))
     );
-    let predictions = learner.fit(x, y.clone());
+    let predictions = learner.fit(x, y.clone(), metadata);
 
     println!("Labels: {:?}", y);
 
diff --git a/crates/redeem-classifiers/examples/svm_semi_supervised_learning.rs b/crates/redeem-classifiers/examples/svm_semi_supervised_learning.rs
index e9ad6b5..0b52a2c 100644
--- a/crates/redeem-classifiers/examples/svm_semi_supervised_learning.rs
+++ b/crates/redeem-classifiers/examples/svm_semi_supervised_learning.rs
@@ -1,84 +1,98 @@
-use anyhow::{Context, Result};
+use anyhow::{Context, Ok, Result};
 use csv::ReaderBuilder;
 use ndarray::{Array1, Array2};
 use std::error::Error;
 use std::fs::File;
-use std::io::Write;
+use std::io::{BufReader, Write};
 
+use redeem_classifiers::data_handling::PsmMetadata;
 use redeem_classifiers::psm_scorer::SemiSupervisedLearner;
 use redeem_classifiers::models::utils::ModelType;
 
-fn read_features_tsv(path: &str) -> Result<Array2<f32>, Box<dyn Error>> {
+/// Load a test PSM CSV file into feature matrix, labels, and metadata.
+///
+/// # Arguments
+/// * `path` - Path to the CSV file
+///
+/// # Returns
+/// A tuple of (`x`, `y`, `PsmMetadata`)
+pub fn load_test_psm_csv(path: &str) -> Result<(Array2<f32>, Array1<i32>, PsmMetadata)> {
+    let file = File::open(path)?;
     let mut reader = ReaderBuilder::new()
-        .has_headers(false)
-        .delimiter(b',')
-        .from_path(path)?;
-
-    let mut data = Vec::new();
+        .has_headers(true)
+        .from_reader(BufReader::new(file));
+
+    let headers = reader
+        .headers()?
+        .iter()
+        .map(|h| h.to_string())
+        .collect::<Vec<_>>();
+
+    // Find indices
+    let file_id_idx = headers.iter().position(|h| h == "file_id").unwrap();
+    let spec_id_idx = headers.iter().position(|h| h == "spec_id").unwrap();
+    let label_idx = headers.iter().position(|h| h == "label").unwrap();
+
+    // Everything else is a feature
+    let feature_indices: Vec<usize> = (0..headers.len())
+        .filter(|&i| i != file_id_idx && i != spec_id_idx && i != label_idx)
+        .collect();
+
+    let feature_names = feature_indices
+        .iter()
+        .map(|&i| headers[i].clone())
+        .collect::<Vec<_>>();
+
+    let mut file_ids = Vec::new();
+    let mut spec_ids = Vec::new();
+    let mut labels = Vec::new();
+    let mut features = Vec::new();
 
     for result in reader.records() {
         let record = result?;
-        let row: Vec<f32> = record
-            .iter()
-            .map(|field| field.parse::<f32>())
-            .collect::<Result<_, _>>()?;
-        data.push(row);
-    }
 
-    let n_samples = data.len();
-    let n_features = data[0].len();
+        file_ids.push(record[file_id_idx].parse::<usize>()?);
+        spec_ids.push(record[spec_id_idx].to_string());
+        labels.push(record[label_idx].parse::<i32>()?);
 
-    Array2::from_shape_vec(
-        (n_samples, n_features),
-        data.into_iter().flatten().collect(),
-    )
-    .map_err(|e| e.into())
-}
+        let row = feature_indices
+            .iter()
+            .map(|&i| record[i].parse::<f32>().unwrap_or(f32::NAN))
+            .collect::<Vec<f32>>();
 
-fn read_labels_tsv(path: &str) -> Result<Array1<i32>, Box<dyn Error>> {
-    let mut reader = ReaderBuilder::new()
-        .has_headers(false)
-        .delimiter(b'\t')
-        .from_path(path)?;
-
-    let labels: Vec<i32> = reader
-        .records()
-        .map(|r| {
-            let record = r?;
-            let value = record.get(0).ok_or_else(|| "Empty row".to_string())?;
-            value.parse::<i32>().map_err(|e| e.into())
-        })
-        .collect::<Result<_, Box<dyn Error>>>()?;
-
-    Ok(Array1::from_vec(labels))
-}
+        features.extend(row);
+    }
 
-fn save_predictions_to_csv(
-    predictions: &Array1<f32>,
-    file_path: &str,
-) -> Result<(), Box<dyn Error>> {
-    let mut file = File::create(file_path)?;
+    let n_rows = labels.len();
+    let n_cols = feature_indices.len();
 
-    for &pred in predictions.iter() {
-        writeln!(file, "{}", pred)?;
-    }
+    let x = Array2::from_shape_vec((n_rows, n_cols), features)?;
+    let y = Array1::from_vec(labels);
 
-    Ok(())
+    let metadata = PsmMetadata {
+        file_id: file_ids,
+        spec_id: spec_ids,
+        feature_names,
+    };
+
+    Ok((x, y, metadata))
 }
 
-fn main() -> Result<()> {
-    env_logger::init();
-    // Load the test data from the TSV files
-    let x = read_features_tsv("/home/singjc/Documents/github/sage_bruker/20241115_single_file_redeem/sage_scores_for_testing.csv").unwrap();
-    let y = read_labels_tsv("/home/singjc/Documents/github/sage_bruker/20241115_single_file_redeem/sage_labels_for_testing.csv").unwrap();
+// fn save_predictions_to_csv(
+//     predictions: &Array1<f32>,
+//     file_path: &str,
+// ) -> Result<(), Box<dyn Error>> {
+//     let mut file = File::create(file_path)?;
 
-    // Select first 10 columns of data
-    let x = x.slice(ndarray::s![.., ..10]).to_owned();
+//     for &pred in predictions.iter() {
+//         writeln!(file, "{}", pred)?;
+//     }
 
-    println!("Loaded features shape: {:?}", x.shape());
-    println!("Loaded labels shape: {:?}", y.shape());
+//     Ok(())
+// }
 
-    // Create and train your SemiSupervisedLearner
+#[cfg(feature = "linfa")]
+fn run_psm_scorer(x: &Array2<f32>, y: &Array1<i32>, metadata: &PsmMetadata) -> Result<Array1<f32>> {
     let params = ModelType::SVM  {
         eps: 0.1,
         c: (1.0, 1.0),
@@ -94,7 +108,28 @@ fn main() -> Result<()> {
         500,
         Some((0.15, 1.0))
     );
-    let predictions = learner.fit(x, y.clone());
+    let predictions = learner.fit(x, y.clone(), metadata);
+    Ok(predictions)
+}
+
+#[cfg(not(feature = "linfa"))]
+fn run_psm_scorer(x: &Array2<f32>, y: &Array1<i32>, metadata: &PsmMetadata) -> Result<Array1<f32>> {
+    unimplemented!("SVM is not available in this build. Please enable the linfa feature.");
+}
+
+fn main() -> Result<()> {
+    env_logger::init();
+    // Load the test data from the TSV files
+    let (x, y, metadata) = load_test_psm_csv("/home/singjc/Documents/github/sage_bruker/20241115_single_file_redeem/sage_scores_with_metadata_for_testing_redeem.csv")?;
+
+    // Select first 10 columns of data
+    let x = x.slice(ndarray::s![.., ..10]).to_owned();
+
+    println!("Loaded features shape: {:?}", x.shape());
+    println!("Loaded labels shape: {:?}", y.shape());
+
+    // Create and train your SemiSupervisedLearner
+    let predictions = run_psm_scorer(&x, &y, &metadata).context("Failed to run PSM scorer")?;
 
     println!("Labels: {:?}", y);
 
diff --git a/crates/redeem-classifiers/examples/xgb_semi_supervised_learning.rs b/crates/redeem-classifiers/examples/xgb_semi_supervised_learning.rs
index 6c4697f..66f80cb 100644
--- a/crates/redeem-classifiers/examples/xgb_semi_supervised_learning.rs
+++ b/crates/redeem-classifiers/examples/xgb_semi_supervised_learning.rs
@@ -1,58 +1,83 @@
 use anyhow::{Context, Result};
 use csv::ReaderBuilder;
-use machine_info::Machine;
 use ndarray::{Array1, Array2};
+
 use std::error::Error;
 use std::fs::File;
-use std::io::Write;
+use std::io::{BufReader, Write};
 use std::process;
 
+use redeem_classifiers::data_handling::PsmMetadata;
 use redeem_classifiers::psm_scorer::SemiSupervisedLearner;
 use redeem_classifiers::models::utils::ModelType;
 
-fn read_features_tsv(path: &str) -> Result<Array2<f32>, Box<dyn Error>> {
+/// Load a test PSM CSV file into feature matrix, labels, and metadata.
+///
+/// # Arguments
+/// * `path` - Path to the CSV file
+///
+/// # Returns
+/// A tuple of (`x`, `y`, `PsmMetadata`)
+pub fn load_test_psm_csv(path: &str) -> Result<(Array2<f32>, Array1<i32>, PsmMetadata)> {
+    let file = File::open(path)?;
     let mut reader = ReaderBuilder::new()
-        .has_headers(false)
-        .delimiter(b',')
-        .from_path(path)?;
-
-    let mut data = Vec::new();
+        .has_headers(true)
+        .from_reader(BufReader::new(file));
+
+    let headers = reader
+        .headers()?
+        .iter()
+        .map(|h| h.to_string())
+        .collect::<Vec<_>>();
+
+    // Find indices
+    let file_id_idx = headers.iter().position(|h| h == "file_id").unwrap();
+    let spec_id_idx = headers.iter().position(|h| h == "spec_id").unwrap();
+    let label_idx = headers.iter().position(|h| h == "label").unwrap();
+
+    // Everything else is a feature
+    let feature_indices: Vec<usize> = (0..headers.len())
+        .filter(|&i| i != file_id_idx && i != spec_id_idx && i != label_idx)
+        .collect();
+
+    let feature_names = feature_indices
+        .iter()
+        .map(|&i| headers[i].clone())
+        .collect::<Vec<_>>();
+
+    let mut file_ids = Vec::new();
+    let mut spec_ids = Vec::new();
+    let mut labels = Vec::new();
+    let mut features = Vec::new();
 
     for result in reader.records() {
         let record = result?;
-        let row: Vec<f32> = record
+
+        file_ids.push(record[file_id_idx].parse::<usize>()?);
+        spec_ids.push(record[spec_id_idx].to_string());
+        labels.push(record[label_idx].parse::<i32>()?);
+
+        let row = feature_indices
             .iter()
-            .map(|field| field.parse::<f32>())
-            .collect::<Result<_, _>>()?;
-        data.push(row);
+            .map(|&i| record[i].parse::<f32>().unwrap_or(f32::NAN))
+            .collect::<Vec<f32>>();
+
+        features.extend(row);
     }
 
-    let n_samples = data.len();
-    let n_features = data[0].len();
+    let n_rows = labels.len();
+    let n_cols = feature_indices.len();
 
-    Array2::from_shape_vec(
-        (n_samples, n_features),
-        data.into_iter().flatten().collect(),
-    )
-    .map_err(|e| e.into())
-}
+    let x = Array2::from_shape_vec((n_rows, n_cols), features)?;
+    let y = Array1::from_vec(labels);
 
-fn read_labels_tsv(path: &str) -> Result<Array1<i32>, Box<dyn Error>> {
-    let mut reader = ReaderBuilder::new()
-        .has_headers(false)
-        .delimiter(b'\t')
-        .from_path(path)?;
-
-    let labels: Vec<i32> = reader
-        .records()
-        .map(|r| {
-            let record = r?;
-            let value = record.get(0).ok_or_else(|| "Empty row".to_string())?;
-            value.parse::<i32>().map_err(|e| e.into())
-        })
-        .collect::<Result<_, Box<dyn Error>>>()?;
-
-    Ok(Array1::from_vec(labels))
+    let metadata = PsmMetadata {
+        file_id: file_ids,
+        spec_id: spec_ids,
+        feature_names,
+    };
+
+    Ok((x, y, metadata))
 }
 
 fn save_predictions_to_csv(
@@ -68,48 +93,49 @@ fn save_predictions_to_csv(
     Ok(())
 }
 
+#[cfg(feature = "xgboost")]
+fn run_psm_scorer(x: &Array2<f32>, y: &Array1<i32>, metadata: &PsmMetadata) -> Result<Array1<f32>> {
+    // Create and train your SemiSupervisedLearner
+
+    use std::fs::metadata;
+    let xgb_params = ModelType::XGBoost {
+        max_depth: 6,
+        num_boost_round: 100,
+        early_stopping_rounds: 10,
+        verbose_eval: false,
+    };
+let mut learner = SemiSupervisedLearner::new(
+    xgb_params,
+    0.01,
+    1.0,
+    5,
+    Some((1.0, 1.0))
+);
+let predictions = learner.fit(x, y.clone(), metadata);
+    Ok(predictions)
+}
+
+#[cfg(not(feature = "xgboost"))]
+fn run_psm_scorer(x: &Array2<f32>, y: &Array1<i32>, metadata: &PsmMetadata) -> Result<Array1<f32>> {
+    unimplemented!("xgboost is not available in this build. Please enable the xgboost feature.");
+}
+
 fn main() -> Result<()> {
     env_logger::init();
-
-    let mut m = Machine::new();
-    m.track_process(process::id() as i32).unwrap();
     
     // Load the test data from the TSV files
-    let x = read_features_tsv("/home/singjc/Documents/github/sage_bruker/20241115_single_file_redeem/sage_scores_for_testing.csv").unwrap();
-    // Select first 10 columns of data
-    let x = x.slice(ndarray::s![.., ..10]).to_owned();
-
-    let y = read_labels_tsv("/home/singjc/Documents/github/sage_bruker/20241115_single_file_redeem/sage_labels_for_testing.csv").unwrap();
+    let (x, y, metadata) = load_test_psm_csv("/home/singjc/Documents/github/sage_bruker/20241115_single_file_redeem/sage_scores_with_metadata_for_testing_redeem.csv")?;
 
     println!("Loaded features shape: {:?}", x.shape());
     println!("Loaded labels shape: {:?}", y.shape());
 
-    // Create and train your SemiSupervisedLearner
-    let xgb_params = ModelType::XGBoost {
-            max_depth: 6,
-            num_boost_round: 100,
-            early_stopping_rounds: 10,
-            verbose_eval: false,
-        };
-    let mut learner = SemiSupervisedLearner::new(
-        xgb_params,
-        0.01,
-        1.0,
-        5,
-        Some((1.0, 1.0))
-    );
-    let predictions = learner.fit(x, y.clone());
+    let predictions = run_psm_scorer(&x, &y, &metadata).context("Failed to run PSM scorer")?;
 
     println!("Labels: {:?}", y);
 
     // Evaluate the predictions
     println!("Predictions: {:?}", predictions);
 
-    let processes = m.processes_status();
-    let system = m.system_status();
-    let graphics = m.graphics_status();
-    println!("{:?} {:?} {:?}", processes, system, graphics);
-
     // save_predictions_to_csv(&predictions, "/home/singjc/Documents/github/sage_bruker/20241115_single_file_redeem/predictions.csv").unwrap();
     Ok(())
 }
\ No newline at end of file

From 122d5c1b8225bbb3880d8b27ba28d0fc5149fa85 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Wed, 14 May 2025 10:08:41 -0400
Subject: [PATCH 51/75] refactor: Update rank feature and log rank changes in
 Experiment class

---
 .../redeem-classifiers/src/data_handling.rs   | 23 +++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/crates/redeem-classifiers/src/data_handling.rs b/crates/redeem-classifiers/src/data_handling.rs
index aea785e..b28e9a5 100644
--- a/crates/redeem-classifiers/src/data_handling.rs
+++ b/crates/redeem-classifiers/src/data_handling.rs
@@ -91,6 +91,8 @@ impl Experiment {
     /// This re-ranks all PSMs per spectrum (grouped by file_id and spec_id),
     /// and sets the rank column in `self.x` accordingly (1 = best).
     ///
+    /// Also logs the percentage of PSMs whose rank changed.
+    ///
     /// # Arguments
     /// * `scores` - The current classifier scores (same length as rows in `x`)
     /// * `metadata` - PSM metadata with file_id and spec_id for grouping
@@ -114,17 +116,34 @@ impl Experiment {
                 .push((i, scores[i]));
         }
 
+        let mut changed_ranks = 0;
+
         // 3. For each group, sort by score descending and assign new rank
         for group in spectrum_groups.values_mut() {
             group.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
             for (rank, (row_idx, _)) in group.iter().enumerate() {
-                self.x[[*row_idx, rank_feature_idx]] = (rank + 1) as f32;
+                let old_rank = self.x[[*row_idx, rank_feature_idx]] as usize;
+                let new_rank = rank + 1;
+                if old_rank != new_rank {
+                    changed_ranks += 1;
+                }
+                self.x[[*row_idx, rank_feature_idx]] = new_rank as f32;
             }
         }
 
-        log::debug!("Updated rank feature for {} spectrum groups.", spectrum_groups.len());
+        let total = self.x.nrows();
+        let pct_changed = (changed_ranks as f64 / total as f64) * 100.0;
+
+        log::debug!(
+            "Updated rank feature for {} spectrum groups. Rank changed for {:.2}% of PSMs ({} of {}).",
+            spectrum_groups.len(),
+            pct_changed,
+            changed_ranks,
+            total
+        );
     }
 
+
     pub fn get_top_test_peaks(&self) -> Experiment {
         let mask = &self.is_train.mapv(|x| !x) & &self.is_top_peak;
         self.filter(&mask)

From 8f7eea0fa611e6070680d0e7a890c23d2949b1c1 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Wed, 14 May 2025 10:39:22 -0400
Subject: [PATCH 52/75] refactor: Set log level to debug in main function

---
 .../examples/gbdt_semi_supervised_learning.rs                  | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/crates/redeem-classifiers/examples/gbdt_semi_supervised_learning.rs b/crates/redeem-classifiers/examples/gbdt_semi_supervised_learning.rs
index d53f165..e0debf9 100644
--- a/crates/redeem-classifiers/examples/gbdt_semi_supervised_learning.rs
+++ b/crates/redeem-classifiers/examples/gbdt_semi_supervised_learning.rs
@@ -98,6 +98,9 @@ fn save_predictions_to_csv(
 
 fn main() -> Result<()> {
     env_logger::init();
+    // Set log level to debug
+    log::set_max_level(log::LevelFilter::Debug);
+
     let (x, y, metadata) = load_test_psm_csv("/home/singjc/Documents/github/sage_bruker/20241115_single_file_redeem/sage_scores_with_metadata_for_testing_redeem.csv")?;
 
     println!("Loaded features shape: {:?}", x.shape());

From fb087943f3e6fc1cec9bb6daf6a56891cb7f542b Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Wed, 14 May 2025 12:41:31 -0400
Subject: [PATCH 53/75] refactor: Update loading of modifications to use byte
 slice instead of file path

---
 .../src/utils/peptdeep_utils.rs               | 230 ++++++++----------
 1 file changed, 107 insertions(+), 123 deletions(-)

diff --git a/crates/redeem-properties/src/utils/peptdeep_utils.rs b/crates/redeem-properties/src/utils/peptdeep_utils.rs
index 6a4757e..d802348 100644
--- a/crates/redeem-properties/src/utils/peptdeep_utils.rs
+++ b/crates/redeem-properties/src/utils/peptdeep_utils.rs
@@ -12,9 +12,10 @@ use regex::Regex;
 use std::collections::HashMap;
 use serde::Deserialize;
 use zip::ZipArchive;
+use once_cell::sync::Lazy;
+
+const MODIFICATIONS_TSV_BYTES: &[u8] = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"),"/assets/modification.tsv"));
 
-const MOD_TSV_URL: &str = "https://raw.githubusercontent.com/MannLabs/alphabase/main/alphabase/constants/const_files/modification.tsv";
-const MOD_TSV_PATH: &str = "data/modification.tsv";
 
 const PRETRAINED_MODELS_URL: &str = "https://github.com/singjc/redeem/releases/download/v0.1.0-alpha/peptdeep_generic_pretrained_models.zip";
 const PRETRAINED_MODELS_ZIP: &str = "data/peptdeep_generic_pretrained_models.zip";
@@ -36,58 +37,59 @@ const MAX_INSTRUMENT_NUM: usize = 8;
 const UNKNOWN_INSTRUMENT_NUM: usize = MAX_INSTRUMENT_NUM - 1;
 
 
-pub fn download_pretrained_models_exist() -> Result<PathBuf, io::Error> {
-    let zip_path = PathBuf::from(PRETRAINED_MODELS_ZIP);
-    let extract_dir = PathBuf::from(PRETRAINED_MODELS_PATH);
+#[derive(Debug, Clone)]
+pub struct ModificationMap {
+    pub name: String,
+    pub amino_acid: Option<char>, // Optional if not applicable
+    pub unimod_id: Option<u32>
+}
 
-    // Ensure the parent directory exists
-    if let Some(parent) = zip_path.parent() {
-        fs::create_dir_all(parent)?;
-    }
+/// Loads a unified modification map where the key is either:
+/// - ("57.0215", Some('C')) for mass-based lookup
+/// - ("UniMod:4", Some('C')) for UniMod ID–based lookup
+/// Loads the modification map, parsing the embedded modifications.tsv.
+pub fn load_modifications() -> Result<HashMap<(String, Option<char>), ModificationMap>> {
+    let mut rdr = csv::ReaderBuilder::new()
+        .delimiter(b'\t')
+        .from_reader(MODIFICATIONS_TSV_BYTES);
 
-    // Download the zip file if it doesn't exist
-    if !zip_path.exists() {
-        info!("Downloading pretrained models...");
-        let mut response = reqwest::blocking::get(PRETRAINED_MODELS_URL)
-            .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
-        let mut file = File::create(&zip_path)?;
-        io::copy(&mut response, &mut file)?;
-    }
+    let mut modifications = HashMap::new();
 
-    // Unzip the file if the target directory doesn't exist
-    if !extract_dir.exists() {
-        info!("Unzipping pretrained models...");
-        let file = File::open(&zip_path)?;
-        let mut archive = ZipArchive::new(file)?;
+    for result in rdr.records() {
+        let record = result?;
+        let mod_name = record.get(0).unwrap_or("").to_string();
+        let unimod_mass: f64 = record.get(1).unwrap_or("0").parse().unwrap_or(0.0);
+        let unimod_id: Option<u32> = record.get(7).and_then(|s| s.parse().ok());
 
-        for i in 0..archive.len() {
-            let mut file = archive.by_index(i)?;
-            let outpath = extract_dir.join(file.mangled_name());
+        let mass_key = format!("{:.4}", unimod_mass);
+        let unimod_key = unimod_id.map(|id| format!("UniMod:{}", id));
 
-            if file.name().ends_with('/') {
-                // Create directory
-                fs::create_dir_all(&outpath)?;
-            } else {
-                // Write file
-                if let Some(parent) = outpath.parent() {
-                    fs::create_dir_all(parent)?;
-                }
-                let mut outfile = File::create(&outpath)?;
-                io::copy(&mut file, &mut outfile)?;
-            }
+        let amino_acid = mod_name.split('@').nth(1).and_then(|aa| aa.chars().next());
+
+        let modification = ModificationMap {
+            name: mod_name,
+            amino_acid,
+            unimod_id,
+        };
+
+        // Insert mass-based key
+        modifications.insert((mass_key.clone(), amino_acid), modification.clone());
+
+        // Insert unimod-id based key if available
+        if let Some(key) = unimod_key {
+            modifications.insert((key, amino_acid), modification.clone());
         }
     }
 
-    Ok(extract_dir)
+    Ok(modifications)
 }
 
-pub fn parse_instrument_index(instrument: &str) -> usize {
-    let upper_instrument = instrument.to_uppercase();
-    
-    INSTRUMENT_DICT.iter()
-        .find(|&&(name, _)| name == upper_instrument)
-        .map_or(UNKNOWN_INSTRUMENT_NUM, |&(_, index)| index)
-}
+// Lazy static variable to hold the loaded modification map
+pub static MODIFICATION_MAP: Lazy<HashMap<(String, Option<char>), ModificationMap>> = Lazy::new(|| {
+    load_modifications().expect("Failed to load modifications")
+});
+
+
 
 
 #[derive(Clone, Debug, Deserialize)]
@@ -153,24 +155,6 @@ pub fn parse_model_constants(path: &str) -> Result<ModelConstants> {
     Ok(constants)
 }
 
-fn ensure_mod_tsv_exists() -> Result<PathBuf, io::Error> {
-    let path = PathBuf::from(MOD_TSV_PATH);
-    
-    // Ensure the parent directory exists
-    if let Some(parent) = path.parent() {
-        fs::create_dir_all(parent)?;
-    }
-
-    if !path.exists() {
-        info!("Downloading modification.tsv...");
-        let mut response = reqwest::blocking::get(MOD_TSV_URL)
-            .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
-        let mut file = File::create(&path)?;
-        response.copy_to(&mut file)
-            .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
-    }
-    Ok(path)
-}
 
 fn parse_mod_formula(formula: &str, mod_elem_to_idx: &HashMap<String, usize>, mod_feature_size: usize) -> Vec<f32> {
     let mut feature = vec![0.0; mod_feature_size];
@@ -191,10 +175,10 @@ fn parse_mod_formula(formula: &str, mod_elem_to_idx: &HashMap<String, usize>, mo
 }
 
 pub fn load_mod_to_feature(constants: &ModelConstants) -> Result<HashMap<String, Vec<f32>>, Error> {
-    let path = ensure_mod_tsv_exists()?;
+
     let mut rdr = ReaderBuilder::new()
         .delimiter(b'\t')
-        .from_path(path)?;
+        .from_reader(MODIFICATIONS_TSV_BYTES);  // Read from the byte slice
 
     // Create mod_elem_to_idx mapping
     let mod_elem_to_idx: HashMap<String, usize> = constants.mod_elements.iter()
@@ -218,10 +202,10 @@ pub fn load_mod_to_feature(constants: &ModelConstants) -> Result<HashMap<String,
 pub fn load_mod_to_feature_arc(
     constants: &ModelConstants,
 ) -> Result<HashMap<Arc<[u8]>, Vec<f32>>, Error> {
-    let path = ensure_mod_tsv_exists()?;
+
     let mut rdr = ReaderBuilder::new()
         .delimiter(b'\t')
-        .from_path(path)?;
+        .from_reader(MODIFICATIONS_TSV_BYTES);
 
     let mod_elem_to_idx: HashMap<String, usize> = constants
         .mod_elements
@@ -243,59 +227,6 @@ pub fn load_mod_to_feature_arc(
 }
 
 
-#[derive(Debug, Clone)]
-pub struct ModificationMap {
-    pub name: String,
-    pub amino_acid: Option<char>, // Optional if not applicable
-    pub unimod_id: Option<u32>
-}
-
-
-/// Loads a unified modification map where the key is either:
-/// - ("57.0215", Some('C')) for mass-based lookup
-/// - ("UniMod:4", Some('C')) for UniMod ID–based lookup
-pub fn load_modifications() -> Result<HashMap<(String, Option<char>), ModificationMap>> {
-    let path: PathBuf = ensure_mod_tsv_exists().context("Failed to ensure TSV exists")?;
-
-    let mut rdr = ReaderBuilder::new()
-        .delimiter(b'\t')
-        .from_path(&path)
-        .context("Failed to read modification TSV file")?;
-
-    let mut modifications = HashMap::new();
-
-    for result in rdr.records() {
-        let record = result.context("Failed to read record")?;
-        let mod_name = record.get(0).unwrap_or("").to_string();
-        let unimod_mass: f64 = record.get(1).unwrap_or("0").parse().unwrap_or(0.0);
-        let unimod_id: Option<u32> = record.get(7).and_then(|s| s.parse().ok());
-
-        let mass_key = format!("{:.4}", unimod_mass);
-        let unimod_key = unimod_id.map(|id| format!("UniMod:{}", id));
-
-        let amino_acid = mod_name.split('@').nth(1).and_then(|aa| aa.chars().next());
-
-        let modification = ModificationMap {
-            name: mod_name,
-            amino_acid,
-            unimod_id,
-        };
-
-        // Insert mass-based key
-        modifications.insert((mass_key.clone(), amino_acid), modification.clone());
-
-        // Insert unimod-id based key if available
-        if let Some(key) = unimod_key {
-            modifications.insert((key, amino_acid), modification.clone());
-        }
-    }
-
-    Ok(modifications)
-}
-
-
-
-
 /// Removes mass shifts and UniMod annotations from a modified peptide sequence.
 ///
 /// Supports both bracketed mass shifts (e.g., `[+57.0215]`) and UniMod-style
@@ -595,6 +526,59 @@ pub fn get_modification_string(
 }
 
 
+pub fn download_pretrained_models_exist() -> Result<PathBuf, io::Error> {
+    let zip_path = PathBuf::from(PRETRAINED_MODELS_ZIP);
+    let extract_dir = PathBuf::from(PRETRAINED_MODELS_PATH);
+
+    // Ensure the parent directory exists
+    if let Some(parent) = zip_path.parent() {
+        fs::create_dir_all(parent)?;
+    }
+
+    // Download the zip file if it doesn't exist
+    if !zip_path.exists() {
+        info!("Downloading pretrained models...");
+        let mut response = reqwest::blocking::get(PRETRAINED_MODELS_URL)
+            .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
+        let mut file = File::create(&zip_path)?;
+        io::copy(&mut response, &mut file)?;
+    }
+
+    // Unzip the file if the target directory doesn't exist
+    if !extract_dir.exists() {
+        info!("Unzipping pretrained models...");
+        let file = File::open(&zip_path)?;
+        let mut archive = ZipArchive::new(file)?;
+
+        for i in 0..archive.len() {
+            let mut file = archive.by_index(i)?;
+            let outpath = extract_dir.join(file.mangled_name());
+
+            if file.name().ends_with('/') {
+                // Create directory
+                fs::create_dir_all(&outpath)?;
+            } else {
+                // Write file
+                if let Some(parent) = outpath.parent() {
+                    fs::create_dir_all(parent)?;
+                }
+                let mut outfile = File::create(&outpath)?;
+                io::copy(&mut file, &mut outfile)?;
+            }
+        }
+    }
+
+    Ok(extract_dir)
+}
+
+pub fn parse_instrument_index(instrument: &str) -> usize {
+    let upper_instrument = instrument.to_uppercase();
+    
+    INSTRUMENT_DICT.iter()
+        .find(|&&(name, _)| name == upper_instrument)
+        .map_or(UNKNOWN_INSTRUMENT_NUM, |&(_, index)| index)
+}
+
 
 
 // TODO: Derive from PeptDep constants yaml
@@ -758,7 +742,7 @@ mod tests {
 
     #[test]
     fn test_get_modification_string() {
-        let modification_map = load_modifications().unwrap();
+        let modification_map = MODIFICATION_MAP.clone();
 
         let test_cases = vec![
             ("PEPTIDE", ""),
@@ -766,11 +750,11 @@ mod tests {
             ("P[+15.9949]EPT[+79.9663]IDE", "Oxidation@P;Phospho@T"),
             ("TVQSLEIDLDSM[+15.9949]R", "Oxidation@M"),
             ("TVQS[+79.9663]LEIDLDSM[+15.9949]R", "Phospho@S;Oxidation@M"),
-            ("(UniMod:1)M(UniMod:35)AAAATMAAAAR", "Any_N-term;Oxidation@M"),
-            ("[+42.0106]PEPTIDE", "Any_N-term"),
+            ("(UniMod:1)M(UniMod:35)AAAATMAAAAR", "Acetyl@Protein_N-term;Oxidation@M"),
+            ("[+42.0106]PEPTIDE", "Acetyl@Protein_N-term"),
             ("PEPTIDE[+42.0106]", ""),
             ("P[+15.9949]EP[+79.9663]T[+15.9949]IDE", "Oxidation@P;Oxidation@T"),
-            ("(UniMod:1)M(UniMod:35)AAAATMAAAAR", "Any_N-term;Oxidation@M"),
+            ("(UniMod:1)M(UniMod:35)AAAATMAAAAR", "Acetyl@Protein_N-term;Oxidation@M"),
         ];
 
 

From ddb255c0ee57580f8fd3b7e73bc2bd28307f988c Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Wed, 14 May 2025 12:41:55 -0400
Subject: [PATCH 54/75] refactor: Update data handling to use
 TargetNormalization instead of RTNormalization

---
 .../src/properties/inference/inference.rs     |  2 +-
 crates/redeem-cli/src/properties/load_data.rs | 62 ++++++++++++-------
 .../src/properties/train/trainer.rs           |  8 +--
 .../src/models/model_interface.rs             | 10 +--
 .../redeem-properties/src/models/rt_model.rs  |  4 +-
 .../src/utils/data_handling.rs                | 11 ++--
 6 files changed, 56 insertions(+), 41 deletions(-)

diff --git a/crates/redeem-cli/src/properties/inference/inference.rs b/crates/redeem-cli/src/properties/inference/inference.rs
index 5fc6a91..6be1dea 100644
--- a/crates/redeem-cli/src/properties/inference/inference.rs
+++ b/crates/redeem-cli/src/properties/inference/inference.rs
@@ -5,7 +5,7 @@ use redeem_properties::models::ccs_model::load_collision_cross_section_model;
 use redeem_properties::models::model_interface::ModelInterface;
 use redeem_properties::models::rt_cnn_lstm_model::RTCNNLSTMModel;
 use redeem_properties::models::rt_model::load_retention_time_model;
-use redeem_properties::utils::data_handling::{PeptideData, RTNormalization};
+use redeem_properties::utils::data_handling::{PeptideData, TargetNormalization};
 use redeem_properties::utils::peptdeep_utils::load_modifications;
 use redeem_properties::utils::utils::get_device;
 
diff --git a/crates/redeem-cli/src/properties/load_data.rs b/crates/redeem-cli/src/properties/load_data.rs
index eac4717..9b5b746 100644
--- a/crates/redeem-cli/src/properties/load_data.rs
+++ b/crates/redeem-cli/src/properties/load_data.rs
@@ -5,7 +5,7 @@ use std::io::BufReader;
 use anyhow::{Result, Context};
 use csv::ReaderBuilder;
 use redeem_properties::utils::peptdeep_utils::{get_modification_indices, get_modification_string, ModificationMap};
-use redeem_properties::utils::{data_handling::{PeptideData, RTNormalization}, peptdeep_utils::remove_mass_shift};
+use redeem_properties::utils::{data_handling::{PeptideData, TargetNormalization}, peptdeep_utils::remove_mass_shift};
 
 
 
@@ -17,9 +17,9 @@ pub fn load_peptide_data<P: AsRef<Path>>(
     model_arch: &str,
     nce: Option<i32>,
     instrument: Option<String>,
-    normalize_rt: Option<String>,
+    normalize_target: Option<String>,
     modifications: &HashMap<(String, Option<char>), ModificationMap>,
-) -> Result<(Vec<PeptideData>, RTNormalization)> {
+) -> Result<(Vec<PeptideData>, TargetNormalization)> {
     let file = File::open(&path)
         .with_context(|| format!("Failed to open file: {:?}", path.as_ref()))?;
     let reader = BufReader::new(file);
@@ -34,7 +34,13 @@ pub fn load_peptide_data<P: AsRef<Path>>(
 
     let headers = rdr.headers()?.clone();
     let mut peptides = Vec::new();
-    let mut rt_values = Vec::new();
+    let mut target_values = Vec::new();
+
+    let normalize_field = if model_arch.contains("ccs") {
+        "ccs"
+    } else {
+        "retention time"
+    };
 
     for result in rdr.records() {
         let record = result?;
@@ -51,7 +57,6 @@ pub fn load_peptide_data<P: AsRef<Path>>(
         let sequence_str = String::from_utf8_lossy(&sequence_bytes);
 
         let naked_sequence = Arc::from(remove_mass_shift(&sequence_str).as_bytes().to_vec().into_boxed_slice());
-
         let mods: Arc<[u8]> = Arc::from(get_modification_string(&sequence_str, modifications).into_bytes().into_boxed_slice());
         let mod_sites: Arc<[u8]> = Arc::from(get_modification_indices(&sequence_str).into_bytes().into_boxed_slice());
 
@@ -98,10 +103,12 @@ pub fn load_peptide_data<P: AsRef<Path>>(
                 }),
             _ => None,
         };
-        
 
-        if let Some(rt) = retention_time {
-            rt_values.push(rt);
+        if let Some(val) = match normalize_field {
+            "ccs" => ccs,
+            _ => retention_time,
+        } {
+            target_values.push(val);
         }
 
         peptides.push(PeptideData {
@@ -120,29 +127,38 @@ pub fn load_peptide_data<P: AsRef<Path>>(
         });
     }
 
-    match RTNormalization::from_str(normalize_rt) {
-        RTNormalization::ZScore(_, _) if !rt_values.is_empty() => {
-            let mean = rt_values.iter().copied().sum::<f32>() / rt_values.len() as f32;
-            let std = (rt_values.iter().map(|v| (v - mean).powi(2)).sum::<f32>() / rt_values.len() as f32).sqrt();
+    match TargetNormalization::from_str(normalize_target) {
+        TargetNormalization::ZScore(_, _) if !target_values.is_empty() => {
+            let mean = target_values.iter().copied().sum::<f32>() / target_values.len() as f32;
+            let std = (target_values.iter().map(|v| (v - mean).powi(2)).sum::<f32>() / target_values.len() as f32).sqrt();
             for peptide in &mut peptides {
-                if let Some(rt) = peptide.retention_time.as_mut() {
-                    *rt = (*rt - mean) / std;
+                match normalize_field {
+                    "ccs" => if let Some(val) = peptide.ccs.as_mut() {
+                        *val = (*val - mean) / std;
+                    },
+                    _ => if let Some(val) = peptide.retention_time.as_mut() {
+                        *val = (*val - mean) / std;
+                    },
                 }
             }
-            Ok((peptides, RTNormalization::ZScore(mean, std)))
+            Ok((peptides, TargetNormalization::ZScore(mean, std)))
         }
-        RTNormalization::MinMax(_, _) if !rt_values.is_empty() => {
-            let min = *rt_values.iter().min_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
-            let max = *rt_values.iter().max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
+        TargetNormalization::MinMax(_, _) if !target_values.is_empty() => {
+            let min = *target_values.iter().min_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
+            let max = *target_values.iter().max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
             let range = max - min;
             for peptide in &mut peptides {
-                if let Some(rt) = peptide.retention_time.as_mut() {
-                    *rt = (*rt - min) / range;
+                match normalize_field {
+                    "ccs" => if let Some(val) = peptide.ccs.as_mut() {
+                        *val = (*val - min) / range;
+                    },
+                    _ => if let Some(val) = peptide.retention_time.as_mut() {
+                        *val = (*val - min) / range;
+                    },
                 }
             }
-            Ok((peptides, RTNormalization::MinMax(min, max)))
+            Ok((peptides, TargetNormalization::MinMax(min, max)))
         }
-        _ => Ok((peptides, RTNormalization::None))
+        _ => Ok((peptides, TargetNormalization::None)),
     }
 }
-
diff --git a/crates/redeem-cli/src/properties/train/trainer.rs b/crates/redeem-cli/src/properties/train/trainer.rs
index bbbec1c..701bf8e 100644
--- a/crates/redeem-cli/src/properties/train/trainer.rs
+++ b/crates/redeem-cli/src/properties/train/trainer.rs
@@ -6,7 +6,7 @@ use redeem_properties::models::{
     ccs_cnn_lstm_model::CCSCNNLSTMModel, ccs_cnn_tf_model::CCSCNNTFModel,
     rt_cnn_lstm_model::RTCNNLSTMModel, rt_cnn_transformer_model::RTCNNTFModel,
 };
-use redeem_properties::utils::data_handling::{PeptideData, RTNormalization};
+use redeem_properties::utils::data_handling::{PeptideData, TargetNormalization};
 use redeem_properties::utils::peptdeep_utils::load_modifications;
 use redeem_properties::utils::utils::get_device;
 use report_builder::{
@@ -210,9 +210,9 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
                 match (true_pep.retention_time, pred_pep.retention_time) {
                     (Some(t), Some(p)) => {
                         let t_denorm = match norm_factor {
-                            RTNormalization::ZScore(mean, std) => t as f64 * std as f64 + mean as f64,
-                            RTNormalization::MinMax(min, range) => t as f64 * range as f64 + min as f64,
-                            RTNormalization::None => t as f64,
+                            TargetNormalization::ZScore(mean, std) => t as f64 * std as f64 + mean as f64,
+                            TargetNormalization::MinMax(min, range) => t as f64 * range as f64 + min as f64,
+                            TargetNormalization::None => t as f64,
                         };
                         Some((t_denorm, p as f64))
                     }
diff --git a/crates/redeem-properties/src/models/model_interface.rs b/crates/redeem-properties/src/models/model_interface.rs
index 2353c44..eec347a 100644
--- a/crates/redeem-properties/src/models/model_interface.rs
+++ b/crates/redeem-properties/src/models/model_interface.rs
@@ -5,7 +5,7 @@ use crate::{
     },
     models::{ccs_model::CCSModelWrapper, ms2_model::MS2ModelWrapper, rt_model::RTModelWrapper},
     utils::{
-        data_handling::{PeptideBatchData, PeptideData, RTNormalization},
+        data_handling::{PeptideBatchData, PeptideData, TargetNormalization},
         logging::Progress,
         peptdeep_utils::{
             get_modification_indices, get_modification_string, parse_instrument_index,
@@ -910,7 +910,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             (String, Option<char>),
             crate::utils::peptdeep_utils::ModificationMap,
         >,
-        rt_norm: RTNormalization,
+        rt_norm: TargetNormalization,
     ) -> Result<Vec<PeptideData>> {
         let num_batches = (inference_data.len() + batch_size - 1) / batch_size;
         info!(
@@ -967,11 +967,11 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                                 match self.property_type() {
                                     PropertyType::RT => {
                                         peptide.retention_time = Some(match rt_norm {
-                                            RTNormalization::ZScore(mean, std) => pred * std + mean,
-                                            RTNormalization::MinMax(min, max) => {
+                                            TargetNormalization::ZScore(mean, std) => pred * std + mean,
+                                            TargetNormalization::MinMax(min, max) => {
                                                 pred * (max - min) + min
                                             }
-                                            RTNormalization::None => pred,
+                                            TargetNormalization::None => pred,
                                         });
                                     }
                                     PropertyType::CCS => peptide.ion_mobility = Some(pred),
diff --git a/crates/redeem-properties/src/models/rt_model.rs b/crates/redeem-properties/src/models/rt_model.rs
index 0044252..7b2a166 100644
--- a/crates/redeem-properties/src/models/rt_model.rs
+++ b/crates/redeem-properties/src/models/rt_model.rs
@@ -3,7 +3,7 @@
 use crate::models::model_interface::{ModelInterface, PredictionResult};
 use crate::models::rt_cnn_lstm_model::RTCNNLSTMModel;
 use crate::models::rt_cnn_transformer_model::RTCNNTFModel;
-use crate::utils::data_handling::{PeptideData, RTNormalization};
+use crate::utils::data_handling::{PeptideData, TargetNormalization};
 use crate::utils::peptdeep_utils::ModificationMap;
 use crate::utils::stats::TrainingStepMetrics;
 use anyhow::{anyhow, Result};
@@ -123,7 +123,7 @@ impl RTModelWrapper {
         inference_data: &Vec<PeptideData>,
         batch_size: usize,
         modifications: HashMap<(String, Option<char>), ModificationMap>,
-        rt_norm_params: RTNormalization,
+        rt_norm_params: TargetNormalization,
     ) -> Result<Vec<PeptideData>> {
         self.model
             .inference(inference_data, batch_size, modifications, rt_norm_params)
diff --git a/crates/redeem-properties/src/utils/data_handling.rs b/crates/redeem-properties/src/utils/data_handling.rs
index 035238d..4a7dc83 100644
--- a/crates/redeem-properties/src/utils/data_handling.rs
+++ b/crates/redeem-properties/src/utils/data_handling.rs
@@ -1,20 +1,19 @@
 
 use std::sync::Arc;
 
-/// Type of RT normalization used
 #[derive(Debug, Clone, Copy)]
-pub enum RTNormalization {
+pub enum TargetNormalization {
     ZScore(f32, f32),     // mean, std
     MinMax(f32, f32),     // min, max
     None,
 }
 
-impl RTNormalization {
+impl TargetNormalization {
     pub fn from_str(norm: Option<String>) -> Self {
         match norm.as_deref() {
-            Some("z_score") => RTNormalization::ZScore(0.0, 0.0),
-            Some("min_max") => RTNormalization::MinMax(0.0, 0.0),
-            _ => RTNormalization::None,
+            Some("z_score") => TargetNormalization::ZScore(0.0, 0.0),
+            Some("min_max") => TargetNormalization::MinMax(0.0, 0.0),
+            _ => TargetNormalization::None,
         }
     }
 }

From 14ff5b6c286b0748a514492bee4aa34b077e1310 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Wed, 14 May 2025 12:42:03 -0400
Subject: [PATCH 55/75] refactor: Add once_cell dependency for
 redeem-properties crate

---
 crates/redeem-properties/Cargo.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crates/redeem-properties/Cargo.toml b/crates/redeem-properties/Cargo.toml
index 56ef10b..e29ba46 100644
--- a/crates/redeem-properties/Cargo.toml
+++ b/crates/redeem-properties/Cargo.toml
@@ -14,6 +14,7 @@ env_logger = "0.8.4"
 log = "0.4.0"
 serde = { version = "1.0", features = ["derive"] }
 serde_yaml = "0.9"
+once_cell = "1.8"
 ndarray = "0.15"
 #ndarray = "0.16.1"
 reqwest = { version = "0.11", features = ["blocking"] }

From 782d9c38a93c4643371ad831eb4b397548b08162 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Wed, 14 May 2025 12:45:55 -0400
Subject: [PATCH 56/75] add: modification.tsv asset

---
 .../redeem-properties/assets/modification.tsv | 2797 +++++++++++++++++
 1 file changed, 2797 insertions(+)
 create mode 100644 crates/redeem-properties/assets/modification.tsv

diff --git a/crates/redeem-properties/assets/modification.tsv b/crates/redeem-properties/assets/modification.tsv
new file mode 100644
index 0000000..47c455f
--- /dev/null
+++ b/crates/redeem-properties/assets/modification.tsv
@@ -0,0 +1,2797 @@
+mod_name	unimod_mass	unimod_avge_mass	composition	unimod_modloss	modloss_composition	classification	unimod_id	smiles	modloss_importance
+Acetyl@T	42.010565	42.0367	H(2)C(2)O(1)	0.0		Post-translational	1		0.0
+Acetyl@Protein_N-term	42.010565	42.0367	H(2)C(2)O(1)	0.0		Post-translational	1	C(=O)C	0.0
+Acetyl@S	42.010565	42.0367	H(2)C(2)O(1)	0.0		Post-translational	1		0.0
+Acetyl@C	42.010565	42.0367	H(2)C(2)O(1)	0.0		Post-translational	1		0.0
+Acetyl@Any_N-term	42.010565	42.0367	H(2)C(2)O(1)	0.0		Multiple	1	C(=O)C	0.0
+Acetyl@K	42.010565	42.0367	H(2)C(2)O(1)	0.0		Multiple	1	CC(=O)NCCCC[C@H](N([Xe])([Xe]))C(=O)[Rn]	0.0
+Acetyl@Y	42.010565	42.0367	H(2)C(2)O(1)	0.0		Chemical derivative	1		0.0
+Acetyl@H	42.010565	42.0367	H(2)C(2)O(1)	0.0		Chemical derivative	1		0.0
+Acetyl@R	42.010565	42.0367	H(2)C(2)O(1)	0.0		Artefact	1		0.0
+Amidated@Any_C-term	-0.984016	-0.9848	H(1)N(1)O(-1)	0.0		Artefact	2	N	0.0
+Amidated@Protein_C-term	-0.984016	-0.9848	H(1)N(1)O(-1)	0.0		Post-translational	2	N	0.0
+Biotin@Any_N-term	226.077598	226.2954	H(14)C(10)N(2)O(2)S(1)	0.0		Chemical derivative	3	C(=O)CCCCC1SCC2NC(=O)NC21	0.0
+Biotin@K	226.077598	226.2954	H(14)C(10)N(2)O(2)S(1)	0.0		Post-translational	3		0.0
+Carbamidomethyl@Y	57.021464	57.0513	H(3)C(2)N(1)O(1)	0.0		Artefact	4		0.0
+Carbamidomethyl@T	57.021464	57.0513	H(3)C(2)N(1)O(1)	0.0		Artefact	4		0.0
+Carbamidomethyl@S	57.021464	57.0513	H(3)C(2)N(1)O(1)	0.0		Artefact	4		0.0
+Carbamidomethyl@E	57.021464	57.0513	H(3)C(2)N(1)O(1)	0.0		Artefact	4		0.0
+Carbamidomethyl@D	57.021464	57.0513	H(3)C(2)N(1)O(1)	0.0		Artefact	4		0.0
+Carbamidomethyl@H	57.021464	57.0513	H(3)C(2)N(1)O(1)	0.0		Artefact	4		0.0
+Carbamidomethyl@Any_N-term	57.021464	57.0513	H(3)C(2)N(1)O(1)	0.0		Artefact	4	C(=O)NC	0.0
+Carbamidomethyl@K	57.021464	57.0513	H(3)C(2)N(1)O(1)	0.0		Artefact	4		0.0
+Carbamidomethyl@C	57.021464	57.0513	H(3)C(2)N(1)O(1)	0.0		Chemical derivative	4	C(C(C(=O)[Rn])N([Xe])([Xe]))SCC(=O)N	0.0
+Carbamidomethyl@U	57.021464	57.0513	H(3)C(2)N(1)O(1)	0.0		Chemical derivative	4		0.0
+Carbamidomethyl@M	57.021464	57.0513	H(3)C(2)N(1)O(1)	105.024835	H(7)C(3)N(1)O(1)S(1)	Chemical derivative	4	CS(CCC(N([Xe])([Xe]))C([Rn])=O)=CC(N)=O	0.5
+Carbamyl@Y	43.005814	43.0247	H(1)C(1)N(1)O(1)	0.0		Chemical derivative	5		0.0
+Carbamyl@T	43.005814	43.0247	H(1)C(1)N(1)O(1)	0.0		Chemical derivative	5		0.0
+Carbamyl@S	43.005814	43.0247	H(1)C(1)N(1)O(1)	0.0		Chemical derivative	5		0.0
+Carbamyl@M	43.005814	43.0247	H(1)C(1)N(1)O(1)	0.0		Artefact	5		0.0
+Carbamyl@C	43.005814	43.0247	H(1)C(1)N(1)O(1)	0.0		Artefact	5		0.0
+Carbamyl@R	43.005814	43.0247	H(1)C(1)N(1)O(1)	0.0		Artefact	5		0.0
+Carbamyl@Any_N-term	43.005814	43.0247	H(1)C(1)N(1)O(1)	0.0		Multiple	5	C(=O)N	0.0
+Carbamyl@K	43.005814	43.0247	H(1)C(1)N(1)O(1)	0.0		Multiple	5		0.0
+Carbamyl@Protein_N-term	43.005814	43.0247	H(1)C(1)N(1)O(1)	0.0		Post-translational	5	C(=O)N	0.0
+Carboxymethyl@Any_N-term	58.005479	58.0361	H(2)C(2)O(2)	0.0		Artefact	6		0.0
+Carboxymethyl@K	58.005479	58.0361	H(2)C(2)O(2)	0.0		Artefact	6		0.0
+Carboxymethyl@C	58.005479	58.0361	H(2)C(2)O(2)	0.0		Chemical derivative	6		0.0
+Carboxymethyl@W	58.005479	58.0361	H(2)C(2)O(2)	0.0		Chemical derivative	6		0.0
+Carboxymethyl@U	58.005479	58.0361	H(2)C(2)O(2)	0.0		Chemical derivative	6		0.0
+Deamidated@Q	0.984016	0.9848	H(-1)N(-1)O(1)	0.0		Artefact	7	C(CC(=O)O)[C@@H](C(=O)[Rn])N([Xe])([Xe])	0.0
+Deamidated@R	0.984016	0.9848	H(-1)N(-1)O(1)	43.005814	H(1)C(1)N(1)O(1)	Post-translational	7		0.5
+Deamidated@N	0.984016	0.9848	H(-1)N(-1)O(1)	0.0		Artefact	7	C([C@@H](C(=O)[Rn])N([Xe])([Xe]))C(=O)O	0.0
+Deamidated@F^Protein_N-term	0.984016	0.9848	H(-1)N(-1)O(1)	0.0		Post-translational	7		0.0
+ICAT-G@C	486.251206	486.6253	H(38)C(22)N(4)O(6)S(1)	0.0		Isotopic label	8		0.0
+ICAT-G:2H(8)@C	494.30142	494.6746	H(30)2H(8)C(22)N(4)O(6)S(1)	0.0		Isotopic label	9		0.0
+Met->Hse@M^Any_C-term	-29.992806	-30.0922	H(-2)C(-1)O(1)S(-1)	0.0		Chemical derivative	10	N([Xe])([Xe])[C@H](C(=O)[Rn])CCO	0.0
+Met->Hsl@M^Any_C-term	-48.003371	-48.1075	H(-4)C(-1)S(-1)	0.0		Chemical derivative	11		0.0
+ICAT-D:2H(8)@C	450.275205	450.6221	H(26)2H(8)C(20)N(4)O(5)S(1)	0.0		Isotopic label	12		0.0
+ICAT-D@C	442.224991	442.5728	H(34)C(20)N(4)O(5)S(1)	0.0		Isotopic label	13		0.0
+NIPCAM@C	99.068414	99.1311	H(9)C(5)N(1)O(1)	0.0		Chemical derivative	17		0.0
+PEO-Iodoacetyl-LC-Biotin@C	414.193691	414.5196	H(30)C(18)N(4)O(5)S(1)	0.0		Chemical derivative	20		0.0
+Phospho@E	79.966331	79.9799	H(1)O(3)P(1)	0.0		Post-translational	21		0.0
+Phospho@R	79.966331	79.9799	H(1)O(3)P(1)	0.0		Post-translational	21		0.0
+Phospho@K	79.966331	79.9799	H(1)O(3)P(1)	0.0		Post-translational	21		0.0
+Phospho@H	79.966331	79.9799	H(1)O(3)P(1)	0.0		Post-translational	21		0.0
+Phospho@C	79.966331	79.9799	H(1)O(3)P(1)	0.0		Post-translational	21		0.0
+Phospho@D	79.966331	79.9799	H(1)O(3)P(1)	0.0		Post-translational	21		0.0
+Phospho@Y	79.966331	79.9799	H(1)O(3)P(1)	0.0		Post-translational	21	C1=CC(=CC=C1CC(C(=O)[Rn])N([Xe])([Xe]))OP(=O)(O)O	0.0
+Phospho@T	79.966331	79.9799	H(1)O(3)P(1)	97.976896	H(3)O(4)P(1)	Post-translational	21	CC(C(C(=O)[Rn])N([Xe])([Xe]))OP(=O)(O)O	10000000.0
+Phospho@S	79.966331	79.9799	H(1)O(3)P(1)	97.976896	H(3)O(4)P(1)	Post-translational	21	O=P(O)(O)OC[C@@H](C(=O)[Rn])N([Xe])([Xe])	100000000.0
+Methamidophos-S@Y	108.975121	109.0873	H(4)C(1)N(1)O(1)P(1)S(1)	0.0		Chemical derivative	2007		0.0
+Methamidophos-S@T	108.975121	109.0873	H(4)C(1)N(1)O(1)P(1)S(1)	0.0		Chemical derivative	2007		0.0
+Methamidophos-S@S	108.975121	109.0873	H(4)C(1)N(1)O(1)P(1)S(1)	0.0		Chemical derivative	2007		0.0
+Methamidophos-S@K	108.975121	109.0873	H(4)C(1)N(1)O(1)P(1)S(1)	0.0		Chemical derivative	2007		0.0
+Methamidophos-S@H	108.975121	109.0873	H(4)C(1)N(1)O(1)P(1)S(1)	0.0		Chemical derivative	2007		0.0
+Methamidophos-S@C	108.975121	109.0873	H(4)C(1)N(1)O(1)P(1)S(1)	0.0		Chemical derivative	2007		0.0
+Dehydrated@D	-18.010565	-18.0153	H(-2)O(-1)	0.0		Chemical derivative	23		0.0
+Dehydrated@Y	-18.010565	-18.0153	H(-2)O(-1)	0.0		Post-translational	23		0.0
+Dehydrated@T	-18.010565	-18.0153	H(-2)O(-1)	0.0		Post-translational	23		0.0
+Dehydrated@S	-18.010565	-18.0153	H(-2)O(-1)	0.0		Post-translational	23		0.0
+Dehydrated@N^Protein_C-term	-18.010565	-18.0153	H(-2)O(-1)	0.0		Post-translational	23		0.0
+Dehydrated@Q^Protein_C-term	-18.010565	-18.0153	H(-2)O(-1)	0.0		Post-translational	23		0.0
+Dehydrated@C^Any_N-term	-18.010565	-18.0153	H(-2)O(-1)	0.0		Artefact	23		0.0
+Propionamide@C	71.037114	71.0779	H(5)C(3)N(1)O(1)	0.0		Artefact	24		0.0
+Propionamide@K	71.037114	71.0779	H(5)C(3)N(1)O(1)	0.0		Chemical derivative	24		0.0
+Propionamide@Any_N-term	71.037114	71.0779	H(5)C(3)N(1)O(1)	0.0		Chemical derivative	24	CCC(N)=O	0.0
+Pyridylacetyl@Any_N-term	119.037114	119.1207	H(5)C(7)N(1)O(1)	0.0		Chemical derivative	25	C(=O)Cc1ccccn1	0.0
+Pyridylacetyl@K	119.037114	119.1207	H(5)C(7)N(1)O(1)	0.0		Chemical derivative	25		0.0
+Pyro-carbamidomethyl@C^Any_N-term	39.994915	40.0208	C(2)O(1)	0.0		Artefact	26		0.0
+Glu->pyro-Glu@E^Any_N-term	-18.010565	-18.0153	H(-2)O(-1)	0.0		Artefact	27	O=C([Rn])[C@H]1N([Xe])C(=O)CC1	0.0
+Gln->pyro-Glu@Q^Any_N-term	-17.026549	-17.0305	H(-3)N(-1)	0.0		Artefact	28	O=C([Rn])[C@H]1N([Xe])C(=O)CC1	0.0
+SMA@Any_N-term	127.063329	127.1412	H(9)C(6)N(1)O(2)	0.0		Chemical derivative	29		0.0
+SMA@K	127.063329	127.1412	H(9)C(6)N(1)O(2)	0.0		Chemical derivative	29		0.0
+Cation:Na@D	21.981943	21.9818	H(-1)Na(1)	0.0		Artefact	30		0.0
+Cation:Na@Any_C-term	21.981943	21.9818	H(-1)Na(1)	0.0		Artefact	30	O[Na]	0.0
+Cation:Na@E	21.981943	21.9818	H(-1)Na(1)	0.0		Artefact	30		0.0
+Pyridylethyl@C	105.057849	105.1372	H(7)C(7)N(1)	0.0		Chemical derivative	31	C1=CN=CC=C1CCSCC(C(=O)[Rn])N([Xe])([Xe])	0.0
+Methyl@E	14.01565	14.0266	H(2)C(1)	0.0		Post-translational	34		0.0
+Methyl@D	14.01565	14.0266	H(2)C(1)	0.0		Post-translational	34		0.0
+Methyl@Any_C-term	14.01565	14.0266	H(2)C(1)	0.0		Multiple	34	OC	0.0
+Methyl@Protein_N-term	14.01565	14.0266	H(2)C(1)	0.0		Post-translational	34	C	0.0
+Methyl@L	14.01565	14.0266	H(2)C(1)	0.0		Post-translational	34		0.0
+Methyl@I	14.01565	14.0266	H(2)C(1)	0.0		Post-translational	34		0.0
+Methyl@R	14.01565	14.0266	H(2)C(1)	0.0		Post-translational	34		0.0
+Methyl@Q	14.01565	14.0266	H(2)C(1)	0.0		Post-translational	34		0.0
+Methyl@Any_N-term	14.01565	14.0266	H(2)C(1)	0.0		Chemical derivative	34	C	0.0
+Methyl@N	14.01565	14.0266	H(2)C(1)	0.0		Post-translational	34		0.0
+Methyl@K	14.01565	14.0266	H(2)C(1)	0.0		Post-translational	34		0.0
+Methyl@H	14.01565	14.0266	H(2)C(1)	0.0		Post-translational	34		0.0
+Methyl@C	14.01565	14.0266	H(2)C(1)	0.0		Post-translational	34		0.0
+Methyl@S	14.01565	14.0266	H(2)C(1)	0.0		Post-translational	34		0.0
+Methyl@T	14.01565	14.0266	H(2)C(1)	0.0		Post-translational	34		0.0
+Oxidation@T	15.994915	15.9994	O(1)	0.0		Chemical derivative	35		0.0
+Oxidation@E	15.994915	15.9994	O(1)	0.0		Chemical derivative	35		0.0
+Oxidation@S	15.994915	15.9994	O(1)	0.0		Chemical derivative	35		0.0
+Oxidation@Q	15.994915	15.9994	O(1)	0.0		Chemical derivative	35		0.0
+Oxidation@L	15.994915	15.9994	O(1)	0.0		Chemical derivative	35		0.0
+Oxidation@I	15.994915	15.9994	O(1)	0.0		Chemical derivative	35		0.0
+Oxidation@U	15.994915	15.9994	O(1)	0.0		Multiple	35		0.0
+Oxidation@G^Any_C-term	15.994915	15.9994	O(1)	0.0		Pre-translational	35		0.0
+Oxidation@W	15.994915	15.9994	O(1)	0.0		Artefact	35		0.0
+Oxidation@C	15.994915	15.9994	O(1)	0.0		Post-translational	35		0.0
+Oxidation@H	15.994915	15.9994	O(1)	0.0		Artefact	35		0.0
+Oxidation@V	15.994915	15.9994	O(1)	0.0		Chemical derivative	35		0.0
+Oxidation@R	15.994915	15.9994	O(1)	0.0		Post-translational	35		0.0
+Oxidation@M	15.994915	15.9994	O(1)	63.998285	H(4)C(1)O(1)S(1)	Artefact	35	O=C([Rn])C(N([Xe])([Xe]))CCS(=O)C	0.5
+Oxidation@Y	15.994915	15.9994	O(1)	0.0		Post-translational	35		0.0
+Oxidation@F	15.994915	15.9994	O(1)	0.0		Artefact	35		0.0
+Oxidation@P	15.994915	15.9994	O(1)	0.0		Post-translational	35		0.0
+Oxidation@N	15.994915	15.9994	O(1)	0.0		Post-translational	35		0.0
+Oxidation@K	15.994915	15.9994	O(1)	0.0		Post-translational	35		0.0
+Oxidation@D	15.994915	15.9994	O(1)	0.0		Post-translational	35		0.0
+Dimethyl@Protein_N-term	28.0313	28.0532	H(4)C(2)	0.0		Isotopic label	36	C	0.0
+Dimethyl@P^Protein_N-term	28.0313	28.0532	H(4)C(2)	0.0		Post-translational	36		0.0
+Dimethyl@N	28.0313	28.0532	H(4)C(2)	0.0		Post-translational	36		0.0
+Dimethyl@Any_N-term	28.0313	28.0532	H(4)C(2)	0.0		Isotopic label	36	C	0.0
+Dimethyl@K	28.0313	28.0532	H(4)C(2)	0.0		Multiple	36	CN(C)CCCC[C@H](N([Xe])([Xe]))C(=O)[Rn]	0.0
+Dimethyl@R	28.0313	28.0532	H(4)C(2)	0.0		Post-translational	36		0.0
+Trimethyl@A^Protein_N-term	42.04695	42.0797	H(6)C(3)	0.0		Post-translational	37		0.0
+Trimethyl@R	42.04695	42.0797	H(6)C(3)	0.0		Chemical derivative	37		0.0
+Trimethyl@K	42.04695	42.0797	H(6)C(3)	59.073499	H(9)C(3)N(1)	Post-translational	37		0.5
+Methylthio@C	45.987721	46.0916	H(2)C(1)S(1)	0.0		Multiple	39	CSSC[C@H](N([Xe])([Xe]))C([Rn])=O	0.0
+Methylthio@N	45.987721	46.0916	H(2)C(1)S(1)	0.0		Post-translational	39		0.0
+Methylthio@D	45.987721	46.0916	H(2)C(1)S(1)	0.0		Post-translational	39		0.0
+Methylthio@K	45.987721	46.0916	H(2)C(1)S(1)	0.0		Artefact	39		0.0
+Methylthio@Any_N-term	45.987721	46.0916	H(2)C(1)S(1)	0.0		Artefact	39		0.0
+Sulfo@S	79.956815	80.0632	O(3)S(1)	79.956815	O(3)S(1)	Post-translational	40		0.5
+Sulfo@T	79.956815	80.0632	O(3)S(1)	79.956815	O(3)S(1)	Post-translational	40		0.5
+Sulfo@Y	79.956815	80.0632	O(3)S(1)	79.956815	O(3)S(1)	Post-translational	40		0.5
+Sulfo@C	79.956815	80.0632	O(3)S(1)	0.0		Post-translational	40		0.0
+Hex@C	162.052824	162.1406	H(10)C(6)O(5)	0.0		Other glycosylation	41		0.0
+Hex@W	162.052824	162.1406	H(10)C(6)O(5)	0.0		Other glycosylation	41		0.0
+Hex@T	162.052824	162.1406	H(10)C(6)O(5)	162.052824	H(10)C(6)O(5)	O-linked glycosylation	41		0.5
+Hex@S	162.052824	162.1406	H(10)C(6)O(5)	162.052824	H(10)C(6)O(5)	O-linked glycosylation	41		0.5
+Hex@Any_N-term	162.052824	162.1406	H(10)C(6)O(5)	54.031694	H(6)O(3)	Other glycosylation	41		0.5
+Hex@N	162.052824	162.1406	H(10)C(6)O(5)	162.052824	H(10)C(6)O(5)	N-linked glycosylation	41		0.5
+Hex@R	162.052824	162.1406	H(10)C(6)O(5)	54.031694	H(6)O(3)	Other glycosylation	41		0.5
+Hex@K	162.052824	162.1406	H(10)C(6)O(5)	54.031694	H(6)O(3)	Other glycosylation	41		0.5
+Hex@Y	162.052824	162.1406	H(10)C(6)O(5)	0.0		O-linked glycosylation	41		0.0
+Lipoyl@K	188.032956	188.3103	H(12)C(8)O(1)S(2)	0.0		Post-translational	42		0.0
+HexNAc@C	203.079373	203.1925	H(13)C(8)N(1)O(5)	203.079373	H(13)C(8)N(1)O(5)	Other glycosylation	43		0.5
+HexNAc@T	203.079373	203.1925	H(13)C(8)N(1)O(5)	203.079373	H(13)C(8)N(1)O(5)	O-linked glycosylation	43		0.5
+HexNAc@S	203.079373	203.1925	H(13)C(8)N(1)O(5)	203.079373	H(13)C(8)N(1)O(5)	O-linked glycosylation	43		0.5
+HexNAc@N	203.079373	203.1925	H(13)C(8)N(1)O(5)	203.079373	H(13)C(8)N(1)O(5)	N-linked glycosylation	43		0.5
+Farnesyl@C	204.187801	204.3511	H(24)C(15)	0.0		Post-translational	44		0.0
+Myristoyl@C	210.198366	210.3556	H(26)C(14)O(1)	0.0		Post-translational	45		0.0
+Myristoyl@K	210.198366	210.3556	H(26)C(14)O(1)	0.0		Post-translational	45		0.0
+Myristoyl@G^Any_N-term	210.198366	210.3556	H(26)C(14)O(1)	0.0		Post-translational	45		0.0
+PyridoxalPhosphate@K	229.014009	229.1266	H(8)C(8)N(1)O(5)P(1)	0.0		Post-translational	46		0.0
+Palmitoyl@T	238.229666	238.4088	H(30)C(16)O(1)	0.0		Post-translational	47		0.0
+Palmitoyl@S	238.229666	238.4088	H(30)C(16)O(1)	0.0		Post-translational	47		0.0
+Palmitoyl@K	238.229666	238.4088	H(30)C(16)O(1)	0.0		Post-translational	47		0.0
+Palmitoyl@C	238.229666	238.4088	H(30)C(16)O(1)	0.0		Post-translational	47		0.0
+Palmitoyl@Protein_N-term	238.229666	238.4088	H(30)C(16)O(1)	0.0		Post-translational	47		0.0
+GeranylGeranyl@C	272.250401	272.4681	H(32)C(20)	0.0		Post-translational	48		0.0
+Phosphopantetheine@S	340.085794	340.333	H(21)C(11)N(2)O(6)P(1)S(1)	0.0		Post-translational	49		0.0
+FAD@Y	783.141486	783.5339	H(31)C(27)N(9)O(15)P(2)	0.0		Post-translational	50		0.0
+FAD@H	783.141486	783.5339	H(31)C(27)N(9)O(15)P(2)	0.0		Post-translational	50		0.0
+FAD@C	783.141486	783.5339	H(31)C(27)N(9)O(15)P(2)	0.0		Post-translational	50		0.0
+Tripalmitate@C^Protein_N-term	788.725777	789.3049	H(96)C(51)O(5)	0.0		Post-translational	51		0.0
+Guanidinyl@K	42.021798	42.04	H(2)C(1)N(2)	0.0		Chemical derivative	52		0.0
+Guanidinyl@Any_N-term	42.021798	42.04	H(2)C(1)N(2)	0.0		Chemical derivative	52		0.0
+HNE@K	156.11503	156.2221	H(16)C(9)O(2)	0.0		Post-translational	53		0.0
+HNE@H	156.11503	156.2221	H(16)C(9)O(2)	0.0		Post-translational	53		0.0
+HNE@C	156.11503	156.2221	H(16)C(9)O(2)	0.0		Post-translational	53		0.0
+HNE@A	156.11503	156.2221	H(16)C(9)O(2)	0.0		Post-translational	53		0.0
+HNE@L	156.11503	156.2221	H(16)C(9)O(2)	0.0		Post-translational	53		0.0
+Glucuronyl@T	176.032088	176.1241	H(8)C(6)O(6)	176.032088	H(8)C(6)O(6)	O-linked glycosylation	54		0.5
+Glucuronyl@S	176.032088	176.1241	H(8)C(6)O(6)	176.032088	H(8)C(6)O(6)	O-linked glycosylation	54		0.5
+Glucuronyl@Protein_N-term	176.032088	176.1241	H(8)C(6)O(6)	0.0		Other glycosylation	54		0.0
+Glutathione@C	305.068156	305.3076	H(15)C(10)N(3)O(6)S(1)	0.0		Post-translational	55		0.0
+Acetyl:2H(3)@Y	45.029395	45.0552	H(-1)2H(3)C(2)O(1)	0.0		Isotopic label	56		0.0
+Acetyl:2H(3)@T	45.029395	45.0552	H(-1)2H(3)C(2)O(1)	0.0		Isotopic label	56		0.0
+Acetyl:2H(3)@S	45.029395	45.0552	H(-1)2H(3)C(2)O(1)	0.0		Isotopic label	56		0.0
+Acetyl:2H(3)@H	45.029395	45.0552	H(-1)2H(3)C(2)O(1)	0.0		Isotopic label	56		0.0
+Acetyl:2H(3)@Any_N-term	45.029395	45.0552	H(-1)2H(3)C(2)O(1)	0.0		Isotopic label	56		0.0
+Acetyl:2H(3)@K	45.029395	45.0552	H(-1)2H(3)C(2)O(1)	0.0		Isotopic label	56		0.0
+Acetyl:2H(3)@Protein_N-term	45.029395	45.0552	H(-1)2H(3)C(2)O(1)	0.0		Isotopic label	56		0.0
+Propionyl@Protein_N-term	56.026215	56.0633	H(4)C(3)O(1)	0.0		Multiple	58	C(=O)CC	0.0
+Propionyl@T	56.026215	56.0633	H(4)C(3)O(1)	0.0		Isotopic label	58		0.0
+Propionyl@S	56.026215	56.0633	H(4)C(3)O(1)	0.0		Chemical derivative	58		0.0
+Propionyl@K	56.026215	56.0633	H(4)C(3)O(1)	0.0		Isotopic label	58	CCC(=O)NCCCCC(C(=O)[Rn])N([Xe])([Xe])	0.0
+Propionyl@Any_N-term	56.026215	56.0633	H(4)C(3)O(1)	0.0		Isotopic label	58	C(=O)CC	0.0
+Propionyl:13C(3)@Any_N-term	59.036279	59.0412	H(4)13C(3)O(1)	0.0		Isotopic label	59		0.0
+Propionyl:13C(3)@K	59.036279	59.0412	H(4)13C(3)O(1)	0.0		Isotopic label	59		0.0
+GIST-Quat@Any_N-term	127.099714	127.1842	H(13)C(7)N(1)O(1)	59.073499	H(9)C(3)N(1)	Isotopic label	60		0.5
+GIST-Quat@K	127.099714	127.1842	H(13)C(7)N(1)O(1)	59.073499	H(9)C(3)N(1)	Isotopic label	60		0.5
+GIST-Quat:2H(3)@Any_N-term	130.118544	130.2027	H(10)2H(3)C(7)N(1)O(1)	62.09233	H(6)2H(3)C(3)N(1)	Isotopic label	61		0.5
+GIST-Quat:2H(3)@K	130.118544	130.2027	H(10)2H(3)C(7)N(1)O(1)	62.09233	H(6)2H(3)C(3)N(1)	Isotopic label	61		0.5
+GIST-Quat:2H(6)@Any_N-term	133.137375	133.2212	H(7)2H(6)C(7)N(1)O(1)	65.11116	H(3)2H(6)C(3)N(1)	Isotopic label	62		0.5
+GIST-Quat:2H(6)@K	133.137375	133.2212	H(7)2H(6)C(7)N(1)O(1)	65.11116	H(3)2H(6)C(3)N(1)	Isotopic label	62		0.5
+GIST-Quat:2H(9)@Any_N-term	136.156205	136.2397	H(4)2H(9)C(7)N(1)O(1)	68.12999	2H(9)C(3)N(1)	Isotopic label	63		0.5
+GIST-Quat:2H(9)@K	136.156205	136.2397	H(4)2H(9)C(7)N(1)O(1)	68.12999	2H(9)C(3)N(1)	Isotopic label	63		0.5
+Succinyl@Protein_N-term	100.016044	100.0728	H(4)C(4)O(3)	0.0		Post-translational	64		0.0
+Succinyl@Any_N-term	100.016044	100.0728	H(4)C(4)O(3)	0.0		Isotopic label	64		0.0
+Succinyl@K	100.016044	100.0728	H(4)C(4)O(3)	0.0		Isotopic label	64	C(CCN)CC(C(=O)[Rn])N([Xe])C(=O)CCC(=O)O	0.0
+Succinyl:2H(4)@Any_N-term	104.041151	104.0974	2H(4)C(4)O(3)	0.0		Isotopic label	65		0.0
+Succinyl:2H(4)@K	104.041151	104.0974	2H(4)C(4)O(3)	0.0		Isotopic label	65		0.0
+Succinyl:13C(4)@Any_N-term	104.029463	104.0434	H(4)13C(4)O(3)	0.0		Isotopic label	66		0.0
+Succinyl:13C(4)@K	104.029463	104.0434	H(4)13C(4)O(3)	0.0		Isotopic label	66		0.0
+probiotinhydrazide@P	258.115047	258.3405	H(18)C(10)N(4)O(2)S(1)	0.0		Chemical derivative	357		0.0
+Pro->pyro-Glu@P	13.979265	13.9835	H(-2)O(1)	0.0		Chemical derivative	359		0.0
+His->Asn@H	-23.015984	-23.0366	H(-1)C(-2)N(-1)O(1)	0.0		AA substitution	348		0.0
+His->Asp@H	-22.031969	-22.0519	H(-2)C(-2)N(-2)O(2)	0.0		AA substitution	349		0.0
+Trp->Hydroxykynurenin@W	19.989829	19.9881	C(-1)O(2)	0.0		Chemical derivative	350		0.0
+Delta:H(4)C(3)@K	40.0313	40.0639	H(4)C(3)	0.0		Other	256		0.0
+Delta:H(4)C(3)@H	40.0313	40.0639	H(4)C(3)	0.0		Other	256		0.0
+Delta:H(4)C(3)@Protein_N-term	40.0313	40.0639	H(4)C(3)	0.0		Other	256		0.0
+Delta:H(4)C(2)@K	28.0313	28.0532	H(4)C(2)	0.0		Other	255		0.0
+Delta:H(4)C(2)@H	28.0313	28.0532	H(4)C(2)	0.0		Other	255		0.0
+Delta:H(4)C(2)@Any_N-term	28.0313	28.0532	H(4)C(2)	0.0		Other	255		0.0
+Cys->Dha@C	-33.987721	-34.0809	H(-2)S(-1)	0.0		Chemical derivative	368		0.0
+Arg->GluSA@R	-43.053433	-43.0711	H(-5)C(-1)N(-3)O(1)	0.0		Chemical derivative	344		0.0
+Trioxidation@Y	47.984744	47.9982	O(3)	0.0		Chemical derivative	345		0.0
+Trioxidation@W	47.984744	47.9982	O(3)	0.0		Chemical derivative	345		0.0
+Trioxidation@C	47.984744	47.9982	O(3)	0.0		Chemical derivative	345		0.0
+Trioxidation@F	47.984744	47.9982	O(3)	0.0		Artefact	345		0.0
+Iminobiotin@Any_N-term	225.093583	225.3106	H(15)C(10)N(3)O(1)S(1)	0.0		Chemical derivative	89		0.0
+Iminobiotin@K	225.093583	225.3106	H(15)C(10)N(3)O(1)S(1)	0.0		Chemical derivative	89		0.0
+ESP@Any_N-term	338.177647	338.4682	H(26)C(16)N(4)O(2)S(1)	0.0		Isotopic label	90		0.0
+ESP@K	338.177647	338.4682	H(26)C(16)N(4)O(2)S(1)	0.0		Isotopic label	90		0.0
+ESP:2H(10)@Any_N-term	348.240414	348.5299	H(16)2H(10)C(16)N(4)O(2)S(1)	0.0		Isotopic label	91		0.0
+ESP:2H(10)@K	348.240414	348.5299	H(16)2H(10)C(16)N(4)O(2)S(1)	0.0		Isotopic label	91		0.0
+NHS-LC-Biotin@Any_N-term	339.161662	339.453	H(25)C(16)N(3)O(3)S(1)	0.0		Chemical derivative	92		0.0
+NHS-LC-Biotin@K	339.161662	339.453	H(25)C(16)N(3)O(3)S(1)	0.0		Chemical derivative	92		0.0
+EDT-maleimide-PEO-biotin@T	601.206246	601.8021	H(39)C(25)N(5)O(6)S(3)	0.0		Chemical derivative	93		0.0
+EDT-maleimide-PEO-biotin@S	601.206246	601.8021	H(39)C(25)N(5)O(6)S(3)	0.0		Chemical derivative	93		0.0
+IMID@K	68.037448	68.0773	H(4)C(3)N(2)	0.0		Isotopic label	94		0.0
+IMID:2H(4)@K	72.062555	72.1019	2H(4)C(3)N(2)	0.0		Isotopic label	95		0.0
+Lysbiotinhydrazide@K	241.088497	241.31	H(15)C(10)N(3)O(2)S(1)	0.0		Chemical derivative	353		0.0
+Propionamide:2H(3)@C	74.055944	74.0964	H(2)2H(3)C(3)N(1)O(1)	0.0		Isotopic label	97		0.0
+Nitro@Y	44.985078	44.9976	H(-1)N(1)O(2)	0.0		Chemical derivative	354	O=[N+]([O-])c1cc(ccc1O)C[C@@H](C(=O)[Rn])N([Xe])([Xe])	0.0
+Nitro@W	44.985078	44.9976	H(-1)N(1)O(2)	0.0		Chemical derivative	354		0.0
+Nitro@F	44.985078	44.9976	H(-1)N(1)O(2)	0.0		Artefact	354		0.0
+ICAT-C@C	227.126991	227.2603	H(17)C(10)N(3)O(3)	0.0		Isotopic label	105		0.0
+Delta:H(2)C(2)@Protein_N-term	26.01565	26.0373	H(2)C(2)	0.0		Other	254		0.0
+Delta:H(2)C(2)@K	26.01565	26.0373	H(2)C(2)	0.0		Other	254		0.0
+Delta:H(2)C(2)@H	26.01565	26.0373	H(2)C(2)	0.0		Other	254		0.0
+Delta:H(2)C(2)@Any_N-term	26.01565	26.0373	H(2)C(2)	0.0		Other	254		0.0
+Trp->Kynurenin@W	3.994915	3.9887	C(-1)O(1)	0.0		Chemical derivative	351		0.0
+Lys->Allysine@K	-1.031634	-1.0311	H(-3)N(-1)O(1)	0.0		Post-translational	352		0.0
+ICAT-C:13C(9)@C	236.157185	236.1942	H(17)C(1)13C(9)N(3)O(3)	0.0		Isotopic label	106		0.0
+FormylMet@Protein_N-term	159.035399	159.2062	H(9)C(6)N(1)O(2)S(1)	0.0		Pre-translational	107		0.0
+Nethylmaleimide@C	125.047679	125.1253	H(7)C(6)N(1)O(2)	0.0		Chemical derivative	108		0.0
+OxLysBiotinRed@K	354.172562	354.4676	H(26)C(16)N(4)O(3)S(1)	0.0		Chemical derivative	112		0.0
+IBTP@C	316.138088	316.3759	H(21)C(22)P(1)	0.0		Chemical derivative	119		0.0
+OxLysBiotin@K	352.156911	352.4518	H(24)C(16)N(4)O(3)S(1)	0.0		Chemical derivative	113		0.0
+OxProBiotinRed@P	371.199111	371.4982	H(29)C(16)N(5)O(3)S(1)	0.0		Chemical derivative	114		0.0
+OxProBiotin@P	369.183461	369.4823	H(27)C(16)N(5)O(3)S(1)	0.0		Chemical derivative	115		0.0
+OxArgBiotin@R	310.135113	310.4118	H(22)C(15)N(2)O(3)S(1)	0.0		Chemical derivative	116		0.0
+OxArgBiotinRed@R	312.150763	312.4277	H(24)C(15)N(2)O(3)S(1)	0.0		Chemical derivative	117		0.0
+EDT-iodoacetyl-PEO-biotin@T	490.174218	490.7034	H(34)C(20)N(4)O(4)S(3)	0.0		Chemical derivative	118		0.0
+EDT-iodoacetyl-PEO-biotin@S	490.174218	490.7034	H(34)C(20)N(4)O(4)S(3)	0.0		Chemical derivative	118		0.0
+GG@C	114.042927	114.1026	H(6)C(4)N(2)O(2)	0.0		Other	121		0.0
+GG@T	114.042927	114.1026	H(6)C(4)N(2)O(2)	0.0		Other	121		0.0
+GG@S	114.042927	114.1026	H(6)C(4)N(2)O(2)	0.0		Other	121		0.0
+GG@K	114.042927	114.1026	H(6)C(4)N(2)O(2)	0.0		Other	121		1000000.0
+GG@Protein_N-term	114.042927	114.1026	H(6)C(4)N(2)O(2)	0.0		Post-translational	121		0.0
+Formyl@Protein_N-term	27.994915	28.0101	C(1)O(1)	0.0		Post-translational	122		0.0
+Formyl@T	27.994915	28.0101	C(1)O(1)	0.0		Artefact	122		0.0
+Formyl@K	27.994915	28.0101	C(1)O(1)	0.0		Artefact	122		0.0
+Formyl@Any_N-term	27.994915	28.0101	C(1)O(1)	0.0		Artefact	122		0.0
+Formyl@S	27.994915	28.0101	C(1)O(1)	0.0		Artefact	122		0.0
+ICAT-H@C	345.097915	345.7754	H(20)C(15)N(1)O(6)Cl(1)	0.0		Isotopic label	123		0.0
+ICAT-H:13C(6)@C	351.118044	351.7313	H(20)C(9)13C(6)N(1)O(6)Cl(1)	0.0		Isotopic label	124		0.0
+Cation:K@Any_C-term	37.955882	38.0904	H(-1)K(1)	0.0		Artefact	530	O[K]	0.0
+Cation:K@E	37.955882	38.0904	H(-1)K(1)	0.0		Artefact	530		0.0
+Cation:K@D	37.955882	38.0904	H(-1)K(1)	0.0		Artefact	530		0.0
+Xlink:DTSSP[88]@Protein_N-term	87.998285	88.1283	H(4)C(3)O(1)S(1)	0.0		Chemical derivative	126		0.0
+Xlink:DTSSP[88]@K	87.998285	88.1283	H(4)C(3)O(1)S(1)	0.0		Chemical derivative	126		0.0
+Xlink:EGS[226]@K	226.047738	226.1828	H(10)C(10)O(6)	0.0		Chemical derivative	1897		0.0
+Xlink:EGS[226]@Protein_N-term	226.047738	226.1828	H(10)C(10)O(6)	0.0		Chemical derivative	1897		0.0
+Fluoro@Y	17.990578	17.9905	H(-1)F(1)	0.0		Non-standard residue	127		0.0
+Fluoro@W	17.990578	17.9905	H(-1)F(1)	0.0		Non-standard residue	127		0.0
+Fluoro@F	17.990578	17.9905	H(-1)F(1)	0.0		Non-standard residue	127		0.0
+Fluoro@A	17.990578	17.9905	H(-1)F(1)	0.0		Chemical derivative	127		0.0
+Fluorescein@C	387.074287	387.3417	H(13)C(22)N(1)O(6)	0.0		Chemical derivative	128		0.0
+Iodo@H	125.896648	125.8965	H(-1)I(1)	0.0		Chemical derivative	129		0.0
+Iodo@Y	125.896648	125.8965	H(-1)I(1)	0.0		Chemical derivative	129		0.0
+Diiodo@Y	251.793296	251.7931	H(-2)I(2)	0.0		Chemical derivative	130		0.0
+Diiodo@H	251.793296	251.7931	H(-2)I(2)	0.0		Chemical derivative	130		0.0
+Triiodo@Y	377.689944	377.6896	H(-3)I(3)	0.0		Chemical derivative	131		0.0
+Myristoleyl@G^Protein_N-term	208.182715	208.3398	H(24)C(14)O(1)	0.0		Co-translational	134		0.0
+Pro->Pyrrolidinone@P	-30.010565	-30.026	H(-2)C(-1)O(-1)	0.0		Chemical derivative	360		0.0
+Myristoyl+Delta:H(-4)@G^Protein_N-term	206.167065	206.3239	H(22)C(14)O(1)	0.0		Co-translational	135		0.0
+Benzoyl@Any_N-term	104.026215	104.1061	H(4)C(7)O(1)	0.0		Isotopic label	136		0.0
+Benzoyl@K	104.026215	104.1061	H(4)C(7)O(1)	0.0		Isotopic label	136		0.0
+Hex(5)HexNAc(2)@N	1216.422863	1217.088	H(76)C(46)N(2)O(35)	1216.422863	H(76)C(46)N(2)O(35)	N-linked glycosylation	137		0.5
+Dansyl@Any_N-term	233.051049	233.2862	H(11)C(12)N(1)O(2)S(1)	0.0		Chemical derivative	139		0.0
+Dansyl@K	233.051049	233.2862	H(11)C(12)N(1)O(2)S(1)	0.0		Chemical derivative	139		0.0
+a-type-ion@Any_C-term	-46.005479	-46.0254	H(-2)C(-1)O(-2)	0.0		Other	140		0.0
+Amidine@Any_N-term	41.026549	41.0519	H(3)C(2)N(1)	0.0		Chemical derivative	141		0.0
+Amidine@K	41.026549	41.0519	H(3)C(2)N(1)	0.0		Chemical derivative	141		0.0
+HexNAc(1)dHex(1)@T	349.137281	349.3337	H(23)C(14)N(1)O(9)	349.137281	H(23)C(14)N(1)O(9)	O-linked glycosylation	142		0.5
+HexNAc(1)dHex(1)@S	349.137281	349.3337	H(23)C(14)N(1)O(9)	349.137281	H(23)C(14)N(1)O(9)	O-linked glycosylation	142		0.5
+HexNAc(1)dHex(1)@N	349.137281	349.3337	H(23)C(14)N(1)O(9)	349.137281	H(23)C(14)N(1)O(9)	N-linked glycosylation	142		0.5
+HexNAc(2)@T	406.158745	406.385	H(26)C(16)N(2)O(10)	406.158745	H(26)C(16)N(2)O(10)	O-linked glycosylation	143		0.5
+HexNAc(2)@S	406.158745	406.385	H(26)C(16)N(2)O(10)	406.158745	H(26)C(16)N(2)O(10)	O-linked glycosylation	143		0.5
+HexNAc(2)@N	406.158745	406.385	H(26)C(16)N(2)O(10)	406.158745	H(26)C(16)N(2)O(10)	N-linked glycosylation	143		0.5
+Hex(3)@T	486.158471	486.4218	H(30)C(18)O(15)	486.158471	H(30)C(18)O(15)	O-linked glycosylation	144		0.5
+Hex(3)@S	486.158471	486.4218	H(30)C(18)O(15)	486.158471	H(30)C(18)O(15)	O-linked glycosylation	144		0.5
+Hex(3)@N	486.158471	486.4218	H(30)C(18)O(15)	486.158471	H(30)C(18)O(15)	N-linked glycosylation	144		0.5
+HexNAc(1)dHex(2)@N	495.19519	495.4749	H(33)C(20)N(1)O(13)	495.19519	H(33)C(20)N(1)O(13)	N-linked glycosylation	145		0.5
+Hex(1)HexNAc(1)dHex(1)@T	511.190105	511.4743	H(33)C(20)N(1)O(14)	511.190105	H(33)C(20)N(1)O(14)	O-linked glycosylation	146		0.5
+Hex(1)HexNAc(1)dHex(1)@S	511.190105	511.4743	H(33)C(20)N(1)O(14)	511.190105	H(33)C(20)N(1)O(14)	O-linked glycosylation	146		0.5
+Hex(1)HexNAc(1)dHex(1)@N	511.190105	511.4743	H(33)C(20)N(1)O(14)	511.190105	H(33)C(20)N(1)O(14)	N-linked glycosylation	146		0.5
+HexNAc(2)dHex(1)@N	552.216654	552.5262	H(36)C(22)N(2)O(14)	552.216654	H(36)C(22)N(2)O(14)	N-linked glycosylation	147		0.5
+Hex(1)HexNAc(2)@T	568.211569	568.5256	H(36)C(22)N(2)O(15)	568.211569	H(36)C(22)N(2)O(15)	O-linked glycosylation	148		0.5
+Hex(1)HexNAc(2)@S	568.211569	568.5256	H(36)C(22)N(2)O(15)	568.211569	H(36)C(22)N(2)O(15)	O-linked glycosylation	148		0.5
+Hex(1)HexNAc(2)@N	568.211569	568.5256	H(36)C(22)N(2)O(15)	568.211569	H(36)C(22)N(2)O(15)	N-linked glycosylation	148		0.5
+Hex(1)HexNAc(1)NeuAc(1)@T	656.227613	656.5877	H(40)C(25)N(2)O(18)	656.227613	H(40)C(25)N(2)O(18)	O-linked glycosylation	149		0.5
+Hex(1)HexNAc(1)NeuAc(1)@S	656.227613	656.5877	H(40)C(25)N(2)O(18)	656.227613	H(40)C(25)N(2)O(18)	O-linked glycosylation	149		0.5
+Hex(1)HexNAc(1)NeuAc(1)@N	656.227613	656.5877	H(40)C(25)N(2)O(18)	656.227613	H(40)C(25)N(2)O(18)	N-linked glycosylation	149		0.5
+HexNAc(2)dHex(2)@N	698.274563	698.6674	H(46)C(28)N(2)O(18)	698.274563	H(46)C(28)N(2)O(18)	N-linked glycosylation	150		0.5
+Hex(1)HexNAc(2)Pent(1)@N	700.253828	700.6403	H(44)C(27)N(2)O(19)	700.253828	H(44)C(27)N(2)O(19)	N-linked glycosylation	151		0.5
+Hex(1)HexNAc(2)dHex(1)@T	714.269478	714.6668	H(46)C(28)N(2)O(19)	714.269478	H(46)C(28)N(2)O(19)	O-linked glycosylation	152		0.5
+Hex(1)HexNAc(2)dHex(1)@S	714.269478	714.6668	H(46)C(28)N(2)O(19)	714.269478	H(46)C(28)N(2)O(19)	O-linked glycosylation	152		0.5
+Hex(1)HexNAc(2)dHex(1)@N	714.269478	714.6668	H(46)C(28)N(2)O(19)	714.269478	H(46)C(28)N(2)O(19)	N-linked glycosylation	152		0.5
+Hex(2)HexNAc(2)@T	730.264392	730.6662	H(46)C(28)N(2)O(20)	730.264392	H(46)C(28)N(2)O(20)	O-linked glycosylation	153		0.5
+Hex(2)HexNAc(2)@S	730.264392	730.6662	H(46)C(28)N(2)O(20)	730.264392	H(46)C(28)N(2)O(20)	O-linked glycosylation	153		0.5
+Hex(2)HexNAc(2)@N	730.264392	730.6662	H(46)C(28)N(2)O(20)	730.264392	H(46)C(28)N(2)O(20)	N-linked glycosylation	153		0.5
+Hex(3)HexNAc(1)Pent(1)@N	821.280102	821.7289	H(51)C(31)N(1)O(24)	821.280102	H(51)C(31)N(1)O(24)	N-linked glycosylation	154		0.5
+Hex(1)HexNAc(2)dHex(1)Pent(1)@N	846.311736	846.7815	H(54)C(33)N(2)O(23)	846.311736	H(54)C(33)N(2)O(23)	N-linked glycosylation	155		0.5
+Hex(1)HexNAc(2)dHex(2)@T	860.327386	860.808	H(56)C(34)N(2)O(23)	860.327386	H(56)C(34)N(2)O(23)	O-linked glycosylation	156		0.5
+Hex(1)HexNAc(2)dHex(2)@S	860.327386	860.808	H(56)C(34)N(2)O(23)	860.327386	H(56)C(34)N(2)O(23)	O-linked glycosylation	156		0.5
+Hex(1)HexNAc(2)dHex(2)@N	860.327386	860.808	H(56)C(34)N(2)O(23)	860.327386	H(56)C(34)N(2)O(23)	N-linked glycosylation	156		0.5
+Hex(2)HexNAc(2)Pent(1)@N	862.306651	862.7809	H(54)C(33)N(2)O(24)	862.306651	H(54)C(33)N(2)O(24)	N-linked glycosylation	157		0.5
+Hex(2)HexNAc(2)dHex(1)@T	876.322301	876.8074	H(56)C(34)N(2)O(24)	876.322301	H(56)C(34)N(2)O(24)	O-linked glycosylation	158		0.5
+Hex(2)HexNAc(2)dHex(1)@S	876.322301	876.8074	H(56)C(34)N(2)O(24)	876.322301	H(56)C(34)N(2)O(24)	O-linked glycosylation	158		0.5
+Hex(2)HexNAc(2)dHex(1)@N	876.322301	876.8074	H(56)C(34)N(2)O(24)	876.322301	H(56)C(34)N(2)O(24)	N-linked glycosylation	158		0.5
+Hex(3)HexNAc(2)@T	892.317216	892.8068	H(56)C(34)N(2)O(25)	892.317216	H(56)C(34)N(2)O(25)	O-linked glycosylation	159		0.5
+Hex(3)HexNAc(2)@S	892.317216	892.8068	H(56)C(34)N(2)O(25)	892.317216	H(56)C(34)N(2)O(25)	O-linked glycosylation	159		0.5
+Hex(3)HexNAc(2)@N	892.317216	892.8068	H(56)C(34)N(2)O(25)	892.317216	H(56)C(34)N(2)O(25)	N-linked glycosylation	159		0.5
+Hex(1)HexNAc(1)NeuAc(2)@T	947.323029	947.8423	H(57)C(36)N(3)O(26)	947.323029	H(57)C(36)N(3)O(26)	O-linked glycosylation	160		0.5
+Hex(1)HexNAc(1)NeuAc(2)@S	947.323029	947.8423	H(57)C(36)N(3)O(26)	947.323029	H(57)C(36)N(3)O(26)	O-linked glycosylation	160		0.5
+Hex(1)HexNAc(1)NeuAc(2)@N	947.323029	947.8423	H(57)C(36)N(3)O(26)	947.323029	H(57)C(36)N(3)O(26)	N-linked glycosylation	160		0.5
+Hex(3)HexNAc(2)Phos(1)@N	972.283547	972.7867	H(57)C(34)N(2)O(28)P(1)	972.283547	H(57)C(34)N(2)O(28)P(1)	N-linked glycosylation	161		0.5
+Delta:S(-1)Se(1)@M	47.944449	46.895	S(-1)Se(1)	0.0		Non-standard residue	162		0.0
+Delta:S(-1)Se(1)@C	47.944449	46.895	S(-1)Se(1)	0.0		Non-standard residue	162		0.0
+NBS:13C(6)@W	159.008578	159.1144	H(3)13C(6)N(1)O(2)S(1)	0.0		Chemical derivative	171		0.0
+Methyl:2H(3)13C(1)@K	18.037835	18.0377	H(-1)2H(3)13C(1)	0.0		Isotopic label	329		0.0
+Methyl:2H(3)13C(1)@R	18.037835	18.0377	H(-1)2H(3)13C(1)	0.0		Isotopic label	329		0.0
+Methyl:2H(3)13C(1)@Any_N-term	18.037835	18.0377	H(-1)2H(3)13C(1)	0.0		Isotopic label	329		0.0
+Dimethyl:2H(6)13C(2)@Protein_N-term	36.07567	36.0754	H(-2)2H(6)13C(2)	0.0		Isotopic label	330	[13C]([2H])([2H])([2H])	0.0
+Dimethyl:2H(6)13C(2)@Any_N-term	36.07567	36.0754	H(-2)2H(6)13C(2)	0.0		Isotopic label	330	[13C]([2H])([2H])([2H])	0.0
+Dimethyl:2H(6)13C(2)@R	36.07567	36.0754	H(-2)2H(6)13C(2)	0.0		Isotopic label	330		0.0
+Dimethyl:2H(6)13C(2)@K	36.07567	36.0754	H(-2)2H(6)13C(2)	0.0		Isotopic label	330		0.0
+NBS@W	152.988449	153.1585	H(3)C(6)N(1)O(2)S(1)	0.0		Chemical derivative	172		0.0
+Delta:H(-1)N(-1)18O(1)@N	2.988261	2.9845	H(-1)N(-1)18O(1)	0.0		Isotopic label	170		0.0
+QAT@C	171.149738	171.26	H(19)C(9)N(2)O(1)	0.0		Chemical derivative	195		0.0
+BHT@H	218.167065	218.3346	H(22)C(15)O(1)	0.0		Other	176		0.0
+BHT@K	218.167065	218.3346	H(22)C(15)O(1)	0.0		Other	176		0.0
+BHT@C	218.167065	218.3346	H(22)C(15)O(1)	0.0		Other	176		0.0
+Delta:H(4)C(2)O(-1)S(1)@S	44.008456	44.1188	H(4)C(2)O(-1)S(1)	0.0		Chemical derivative	327		0.0
+DAET@T	87.050655	87.1866	H(9)C(4)N(1)O(-1)S(1)	0.0		Chemical derivative	178		0.0
+DAET@S	87.050655	87.1866	H(9)C(4)N(1)O(-1)S(1)	0.0		Chemical derivative	178		0.0
+Pro->Pyrrolidone@P	-27.994915	-28.0101	C(-1)O(-1)	0.0		Chemical derivative	369		0.0
+Label:13C(9)@Y	9.030193	8.9339	C(-9)13C(9)	0.0		Isotopic label	184		0.0
+Label:13C(9)@F	9.030193	8.9339	C(-9)13C(9)	0.0		Isotopic label	184		0.0
+Label:13C(9)+Phospho@Y	88.996524	88.9138	H(1)C(-9)13C(9)O(3)P(1)	0.0		Isotopic label	185		0.0
+Label:13C(6)@I	6.020129	5.9559	C(-6)13C(6)	0.0		Isotopic label	188		0.0
+Label:13C(6)@L	6.020129	5.9559	C(-6)13C(6)	0.0		Isotopic label	188		0.0
+Label:13C(6)@K	6.020129	5.9559	C(-6)13C(6)	0.0		Isotopic label	188		0.0
+Label:13C(6)@R	6.020129	5.9559	C(-6)13C(6)	0.0		Isotopic label	188		0.0
+HPG@R	132.021129	132.1162	H(4)C(8)O(2)	0.0		Chemical derivative	186		0.0
+2HPG@R	282.052824	282.2476	H(10)C(16)O(5)	0.0		Chemical derivative	187		0.0
+QAT:2H(3)@C	174.168569	174.2784	H(16)2H(3)C(9)N(2)O(1)	0.0		Isotopic label	196		0.0
+Label:18O(2)@Any_C-term	4.008491	3.9995	O(-2)18O(2)	0.0		Isotopic label	193		0.0
+AccQTag@Any_N-term	170.048013	170.1674	H(6)C(10)N(2)O(1)	0.0		Chemical derivative	194		0.0
+AccQTag@K	170.048013	170.1674	H(6)C(10)N(2)O(1)	0.0		Chemical derivative	194		0.0
+Dimethyl:2H(4)@Protein_N-term	32.056407	32.0778	2H(4)C(2)	0.0		Isotopic label	199	C([2H])([2H])([1H])	0.0
+Dimethyl:2H(4)@Any_N-term	32.056407	32.0778	2H(4)C(2)	0.0		Isotopic label	199	C([2H])([2H])([1H])	0.0
+Dimethyl:2H(4)@K	32.056407	32.0778	2H(4)C(2)	0.0		Isotopic label	199		0.0
+Dimethyl:2H(4)@R	32.056407	32.0778	2H(4)C(2)	0.0		Isotopic label	199		0.0
+EQAT@C	184.157563	184.2786	H(20)C(10)N(2)O(1)	0.0		Chemical derivative	197		0.0
+EQAT:2H(5)@C	189.188947	189.3094	H(15)2H(5)C(10)N(2)O(1)	0.0		Isotopic label	198		0.0
+Ethanedithiol@T	75.980527	76.1838	H(4)C(2)O(-1)S(2)	0.0		Chemical derivative	200		0.0
+Ethanedithiol@S	75.980527	76.1838	H(4)C(2)O(-1)S(2)	0.0		Chemical derivative	200		0.0
+NEIAA:2H(5)@Y	90.084148	90.1353	H(2)2H(5)C(4)N(1)O(1)	0.0		Isotopic label	212		0.0
+NEIAA:2H(5)@C	90.084148	90.1353	H(2)2H(5)C(4)N(1)O(1)	0.0		Isotopic label	212		0.0
+Delta:H(6)C(6)O(1)@K	94.041865	94.1112	H(6)C(6)O(1)	0.0		Other	205		0.0
+Delta:H(4)C(3)O(1)@K	56.026215	56.0633	H(4)C(3)O(1)	0.0		Other	206		0.0
+Delta:H(4)C(3)O(1)@H	56.026215	56.0633	H(4)C(3)O(1)	0.0		Other	206		0.0
+Delta:H(4)C(3)O(1)@C	56.026215	56.0633	H(4)C(3)O(1)	0.0		Other	206		0.0
+Delta:H(4)C(3)O(1)@R	56.026215	56.0633	H(4)C(3)O(1)	0.0		Artefact	206		0.0
+Delta:H(2)C(3)@K	38.01565	38.048	H(2)C(3)	0.0		Other	207		0.0
+Delta:H(4)C(6)@K	76.0313	76.096	H(4)C(6)	0.0		Other	208		0.0
+Delta:H(8)C(6)O(2)@K	112.05243	112.1265	H(8)C(6)O(2)	0.0		Other	209		0.0
+ADP-Ribosyl@D	541.06111	541.3005	H(21)C(15)N(5)O(13)P(2)	0.0		Other glycosylation	213		0.0
+ADP-Ribosyl@K	541.06111	541.3005	H(21)C(15)N(5)O(13)P(2)	0.0		Other glycosylation	213		0.0
+ADP-Ribosyl@E	541.06111	541.3005	H(21)C(15)N(5)O(13)P(2)	0.0		Other glycosylation	213		0.0
+ADP-Ribosyl@T	541.06111	541.3005	H(21)C(15)N(5)O(13)P(2)	541.06111	H(21)C(15)N(5)O(13)P(2)	O-linked glycosylation	213		0.5
+ADP-Ribosyl@S	541.06111	541.3005	H(21)C(15)N(5)O(13)P(2)	541.06111	H(21)C(15)N(5)O(13)P(2)	O-linked glycosylation	213		0.5
+ADP-Ribosyl@C	541.06111	541.3005	H(21)C(15)N(5)O(13)P(2)	0.0		Other glycosylation	213		0.0
+ADP-Ribosyl@N	541.06111	541.3005	H(21)C(15)N(5)O(13)P(2)	541.06111	H(21)C(15)N(5)O(13)P(2)	N-linked glycosylation	213		0.5
+ADP-Ribosyl@R	541.06111	541.3005	H(21)C(15)N(5)O(13)P(2)	0.0		Other glycosylation	213		0.0
+NEIAA@Y	85.052764	85.1045	H(7)C(4)N(1)O(1)	0.0		Isotopic label	211		0.0
+NEIAA@C	85.052764	85.1045	H(7)C(4)N(1)O(1)	0.0		Isotopic label	211		0.0
+iTRAQ4plex@C	144.102063	144.1544	H(12)C(4)13C(3)N(1)15N(1)O(1)	0.0		Isotopic label	214		0.0
+iTRAQ4plex@T	144.102063	144.1544	H(12)C(4)13C(3)N(1)15N(1)O(1)	0.0		Isotopic label	214		0.0
+iTRAQ4plex@Protein_N-term	144.102063	144.1544	H(12)C(4)13C(3)N(1)15N(1)O(1)	0.0		Isotopic label	214		0.0
+iTRAQ4plex@S	144.102063	144.1544	H(12)C(4)13C(3)N(1)15N(1)O(1)	0.0		Isotopic label	214		0.0
+iTRAQ4plex@H	144.102063	144.1544	H(12)C(4)13C(3)N(1)15N(1)O(1)	0.0		Isotopic label	214		0.0
+iTRAQ4plex@Y	144.102063	144.1544	H(12)C(4)13C(3)N(1)15N(1)O(1)	0.0		Isotopic label	214		0.0
+iTRAQ4plex@Any_N-term	144.102063	144.1544	H(12)C(4)13C(3)N(1)15N(1)O(1)	0.0		Isotopic label	214		0.0
+iTRAQ4plex@K	144.102063	144.1544	H(12)C(4)13C(3)N(1)15N(1)O(1)	0.0		Isotopic label	214		0.0
+Crotonaldehyde@K	70.041865	70.0898	H(6)C(4)O(1)	0.0		Other	253		0.0
+Crotonaldehyde@H	70.041865	70.0898	H(6)C(4)O(1)	0.0		Other	253		0.0
+Crotonaldehyde@C	70.041865	70.0898	H(6)C(4)O(1)	0.0		Other	253		0.0
+Bromo@F	77.910511	78.8961	H(-1)Br(1)	0.0		Post-translational	340		0.0
+Bromo@H	77.910511	78.8961	H(-1)Br(1)	0.0		Post-translational	340		0.0
+Bromo@W	77.910511	78.8961	H(-1)Br(1)	0.0		Post-translational	340		0.0
+Bromo@Y	77.910511	78.8961	H(-1)Br(1)	0.0		Artefact	340		0.0
+Amino@Y	15.010899	15.0146	H(1)N(1)	0.0		Chemical derivative	342		0.0
+Argbiotinhydrazide@R	199.066699	199.27	H(13)C(9)N(1)O(2)S(1)	0.0		Chemical derivative	343		0.0
+Label:18O(1)@Y	2.004246	1.9998	O(-1)18O(1)	0.0		Isotopic label	258		0.0
+Label:18O(1)@T	2.004246	1.9998	O(-1)18O(1)	0.0		Isotopic label	258		0.0
+Label:18O(1)@S	2.004246	1.9998	O(-1)18O(1)	0.0		Isotopic label	258		0.0
+Label:18O(1)@Any_C-term	2.004246	1.9998	O(-1)18O(1)	0.0		Isotopic label	258		0.0
+Label:13C(6)15N(2)@K	8.014199	7.9427	C(-6)13C(6)N(-2)15N(2)	0.0		Isotopic label	259		0.0
+Thiophospho@Y	95.943487	96.0455	H(1)O(2)P(1)S(1)	0.0		Other	260		0.0
+Thiophospho@T	95.943487	96.0455	H(1)O(2)P(1)S(1)	0.0		Other	260		0.0
+Thiophospho@S	95.943487	96.0455	H(1)O(2)P(1)S(1)	0.0		Other	260		0.0
+SPITC@K	214.971084	215.2495	H(5)C(7)N(1)O(3)S(2)	0.0		Chemical derivative	261		0.0
+SPITC@Any_N-term	214.971084	215.2495	H(5)C(7)N(1)O(3)S(2)	0.0		Chemical derivative	261		0.0
+IGBP@C	296.016039	297.1478	H(13)C(12)N(2)O(2)Br(1)	0.0		Isotopic label	243		0.0
+Cytopiloyne@Y	362.136553	362.3738	H(22)C(19)O(7)	0.0		Chemical derivative	270		0.0
+Cytopiloyne@S	362.136553	362.3738	H(22)C(19)O(7)	0.0		Chemical derivative	270		0.0
+Cytopiloyne@R	362.136553	362.3738	H(22)C(19)O(7)	0.0		Chemical derivative	270		0.0
+Cytopiloyne@P	362.136553	362.3738	H(22)C(19)O(7)	0.0		Chemical derivative	270		0.0
+Cytopiloyne@Any_N-term	362.136553	362.3738	H(22)C(19)O(7)	0.0		Chemical derivative	270		0.0
+Cytopiloyne@K	362.136553	362.3738	H(22)C(19)O(7)	0.0		Chemical derivative	270		0.0
+Cytopiloyne@C	362.136553	362.3738	H(22)C(19)O(7)	0.0		Chemical derivative	270		0.0
+Cytopiloyne+water@Y	380.147118	380.3891	H(24)C(19)O(8)	0.0		Chemical derivative	271		0.0
+Cytopiloyne+water@T	380.147118	380.3891	H(24)C(19)O(8)	0.0		Chemical derivative	271		0.0
+Cytopiloyne+water@S	380.147118	380.3891	H(24)C(19)O(8)	0.0		Chemical derivative	271		0.0
+Cytopiloyne+water@R	380.147118	380.3891	H(24)C(19)O(8)	0.0		Chemical derivative	271		0.0
+Cytopiloyne+water@Any_N-term	380.147118	380.3891	H(24)C(19)O(8)	0.0		Chemical derivative	271		0.0
+Cytopiloyne+water@K	380.147118	380.3891	H(24)C(19)O(8)	0.0		Chemical derivative	271		0.0
+Cytopiloyne+water@C	380.147118	380.3891	H(24)C(19)O(8)	0.0		Chemical derivative	271		0.0
+Label:13C(6)15N(4)@R	10.008269	9.9296	C(-6)13C(6)N(-4)15N(4)	0.0		Isotopic label	267		0.0
+Label:13C(9)15N(1)@F	10.027228	9.9273	C(-9)13C(9)N(-1)15N(1)	0.0		Isotopic label	269		0.0
+Label:2H(3)@L	3.01883	3.0185	H(-3)2H(3)	0.0		Isotopic label	262		0.0
+Label:2H(3)@M	3.01883	3.0185	H(-3)2H(3)	0.0		Isotopic label	262		0.0
+Label:13C(5)15N(1)@M	6.013809	5.9567	C(-5)13C(5)N(-1)15N(1)	0.0		Isotopic label	268		0.0
+Label:13C(5)15N(1)@P	6.013809	5.9567	C(-5)13C(5)N(-1)15N(1)	0.0		Isotopic label	268		0.0
+Label:13C(5)15N(1)@V	6.013809	5.9567	C(-5)13C(5)N(-1)15N(1)	0.0		Isotopic label	268		0.0
+Label:13C(5)15N(1)@E	6.013809	5.9567	C(-5)13C(5)N(-1)15N(1)	0.0		Isotopic label	268		0.0
+PET@T	121.035005	121.2028	H(7)C(7)N(1)O(-1)S(1)	0.0		Chemical derivative	264		0.0
+PET@S	121.035005	121.2028	H(7)C(7)N(1)O(-1)S(1)	0.0		Chemical derivative	264		0.0
+CAF@Any_N-term	135.983029	136.1265	H(4)C(3)O(4)S(1)	0.0		Chemical derivative	272		0.0
+Xlink:BS2G[96]@Protein_N-term	96.021129	96.0841	H(4)C(5)O(2)	0.0		Chemical derivative	1905		0.0
+Xlink:BS2G[96]@K	96.021129	96.0841	H(4)C(5)O(2)	0.0		Chemical derivative	1905		0.0
+Nitrosyl@C	28.990164	28.9982	H(-1)N(1)O(1)	0.0		Post-translational	275		0.0
+Nitrosyl@Y	28.990164	28.9982	H(-1)N(1)O(1)	0.0		Chemical derivative	275		0.0
+Ser/Thr-KDO@T	220.058303	220.1767	H(12)C(8)O(7)	220.058303	H(12)C(8)O(7)	O-linked glycosylation	2022		0.5
+Ser/Thr-KDO@S	220.058303	220.1767	H(12)C(8)O(7)	220.058303	H(12)C(8)O(7)	O-linked glycosylation	2022		0.5
+AEBS@Y	183.035399	183.2276	H(9)C(8)N(1)O(2)S(1)	0.0		Artefact	276		0.0
+AEBS@S	183.035399	183.2276	H(9)C(8)N(1)O(2)S(1)	0.0		Artefact	276		0.0
+AEBS@Protein_N-term	183.035399	183.2276	H(9)C(8)N(1)O(2)S(1)	0.0		Artefact	276		0.0
+AEBS@K	183.035399	183.2276	H(9)C(8)N(1)O(2)S(1)	0.0		Artefact	276		0.0
+AEBS@H	183.035399	183.2276	H(9)C(8)N(1)O(2)S(1)	0.0		Artefact	276		0.0
+Ethanolyl@K	44.026215	44.0526	H(4)C(2)O(1)	0.0		Chemical derivative	278		0.0
+Ethanolyl@C	44.026215	44.0526	H(4)C(2)O(1)	0.0		Chemical derivative	278		0.0
+Ethanolyl@R	44.026215	44.0526	H(4)C(2)O(1)	0.0		Chemical derivative	278		0.0
+Label:13C(6)15N(2)+Dimethyl@K	36.045499	35.9959	H(4)C(-4)13C(6)N(-2)15N(2)	0.0		Isotopic label	987		0.0
+HMVK@C	86.036779	86.0892	H(6)C(4)O(2)	0.0		Chemical derivative	371		0.0
+Ethyl@Any_C-term	28.0313	28.0532	H(4)C(2)	0.0		Chemical derivative	280	OCC	0.0
+Ethyl@Protein_N-term	28.0313	28.0532	H(4)C(2)	0.0		Chemical derivative	280		0.0
+Ethyl@E	28.0313	28.0532	H(4)C(2)	0.0		Artefact	280		0.0
+Ethyl@Any_N-term	28.0313	28.0532	H(4)C(2)	0.0		Multiple	280		0.0
+Ethyl@K	28.0313	28.0532	H(4)C(2)	0.0		Multiple	280		0.0
+Ethyl@D	28.0313	28.0532	H(4)C(2)	0.0		Chemical derivative	280		0.0
+CoenzymeA@C	765.09956	765.5182	H(34)C(21)N(7)O(16)P(3)S(1)	0.0		Post-translational	281		0.0
+Methyl+Deamidated@Q	14.999666	15.0113	H(1)C(1)N(-1)O(1)	0.0		Post-translational	528		0.0
+Methyl+Deamidated@N	14.999666	15.0113	H(1)C(1)N(-1)O(1)	0.0		Chemical derivative	528		0.0
+Delta:H(5)C(2)@P	29.039125	29.0611	H(5)C(2)	0.0		Post-translational	529		0.0
+Methyl:2H(2)@K	16.028204	16.0389	2H(2)C(1)	0.0		Isotopic label	284		0.0
+Methyl:2H(2)@Any_N-term	16.028204	16.0389	2H(2)C(1)	0.0		Isotopic label	284		0.0
+SulfanilicAcid@E	155.004099	155.1744	H(5)C(6)N(1)O(2)S(1)	0.0		Isotopic label	285		0.0
+SulfanilicAcid@D	155.004099	155.1744	H(5)C(6)N(1)O(2)S(1)	0.0		Isotopic label	285		0.0
+SulfanilicAcid@Any_C-term	155.004099	155.1744	H(5)C(6)N(1)O(2)S(1)	0.0		Isotopic label	285		0.0
+SulfanilicAcid:13C(6)@E	161.024228	161.1303	H(5)13C(6)N(1)O(2)S(1)	0.0		Chemical derivative	286		0.0
+SulfanilicAcid:13C(6)@D	161.024228	161.1303	H(5)13C(6)N(1)O(2)S(1)	0.0		Chemical derivative	286		0.0
+SulfanilicAcid:13C(6)@Any_C-term	161.024228	161.1303	H(5)13C(6)N(1)O(2)S(1)	0.0		Chemical derivative	286		0.0
+Biotin-PEO-Amine@D	356.188212	356.4835	H(28)C(16)N(4)O(3)S(1)	0.0		Chemical derivative	289		0.0
+Biotin-PEO-Amine@Protein_C-term	356.188212	356.4835	H(28)C(16)N(4)O(3)S(1)	0.0		Chemical derivative	289		0.0
+Biotin-PEO-Amine@E	356.188212	356.4835	H(28)C(16)N(4)O(3)S(1)	0.0		Chemical derivative	289		0.0
+Trp->Oxolactone@W	13.979265	13.9835	H(-2)O(1)	0.0		Chemical derivative	288		0.0
+Biotin-HPDP@C	428.191582	428.6124	H(32)C(19)N(4)O(3)S(2)	0.0		Chemical derivative	290		0.0
+Delta:Hg(1)@C	201.970617	200.59	Hg(1)	0.0		Chemical derivative	291		0.0
+IodoU-AMP@Y	322.020217	322.1654	H(11)C(9)N(2)O(9)P(1)	0.0		Chemical derivative	292		0.0
+IodoU-AMP@W	322.020217	322.1654	H(11)C(9)N(2)O(9)P(1)	0.0		Chemical derivative	292		0.0
+IodoU-AMP@F	322.020217	322.1654	H(11)C(9)N(2)O(9)P(1)	0.0		Chemical derivative	292		0.0
+CAMthiopropanoyl@Protein_N-term	145.019749	145.1796	H(7)C(5)N(1)O(2)S(1)	0.0		Chemical derivative	293		0.0
+CAMthiopropanoyl@K	145.019749	145.1796	H(7)C(5)N(1)O(2)S(1)	0.0		Chemical derivative	293		0.0
+IED-Biotin@C	326.141261	326.4145	H(22)C(14)N(4)O(3)S(1)	0.0		Chemical derivative	294		0.0
+dHex@N	146.057909	146.1412	H(10)C(6)O(4)	146.057909	H(10)C(6)O(4)	N-linked glycosylation	295		0.5
+dHex@T	146.057909	146.1412	H(10)C(6)O(4)	146.057909	H(10)C(6)O(4)	O-linked glycosylation	295		0.5
+dHex@S	146.057909	146.1412	H(10)C(6)O(4)	146.057909	H(10)C(6)O(4)	O-linked glycosylation	295		0.5
+Methyl:2H(3)@Anywhere	17.03448	17.0451	H(-1)2H(3)C(1)	0.0		Isotopic label	298		0.0
+Methyl:2H(3)@D	17.03448	17.0451	H(-1)2H(3)C(1)	0.0		Isotopic label	298		0.0
+Methyl:2H(3)@E	17.03448	17.0451	H(-1)2H(3)C(1)	0.0		Isotopic label	298		0.0
+Methyl:2H(3)@K	17.03448	17.0451	H(-1)2H(3)C(1)	0.0		Isotopic label	298		0.0
+Methyl:2H(3)@R	17.03448	17.0451	H(-1)2H(3)C(1)	0.0		Isotopic label	298		0.0
+Carboxy@E	43.989829	44.0095	C(1)O(2)	0.0		Post-translational	299		0.0
+Carboxy@D	43.989829	44.0095	C(1)O(2)	0.0		Post-translational	299		0.0
+Carboxy@K	43.989829	44.0095	C(1)O(2)	0.0		Post-translational	299		0.0
+Carboxy@W	43.989829	44.0095	C(1)O(2)	0.0		Chemical derivative	299		0.0
+Carboxy@M^Protein_N-term	43.989829	44.0095	C(1)O(2)	0.0		Post-translational	299		0.0
+Bromobimane@C	190.074228	190.1986	H(10)C(10)N(2)O(2)	0.0		Chemical derivative	301		0.0
+Menadione@K	170.036779	170.1641	H(6)C(11)O(2)	0.0		Chemical derivative	302		0.0
+Menadione@C	170.036779	170.1641	H(6)C(11)O(2)	0.0		Chemical derivative	302		0.0
+DeStreak@C	75.998285	76.1176	H(4)C(2)O(1)S(1)	0.0		Chemical derivative	303		0.0
+dHex(1)Hex(3)HexNAc(4)@T	1444.53387	1445.3331	H(92)C(56)N(4)O(39)	1444.53387	H(92)C(56)N(4)O(39)	O-linked glycosylation	305		0.5
+dHex(1)Hex(3)HexNAc(4)@S	1444.53387	1445.3331	H(92)C(56)N(4)O(39)	1444.53387	H(92)C(56)N(4)O(39)	O-linked glycosylation	305		0.5
+dHex(1)Hex(3)HexNAc(4)@N	1444.53387	1445.3331	H(92)C(56)N(4)O(39)	1444.53387	H(92)C(56)N(4)O(39)	N-linked glycosylation	305		0.5
+dHex(1)Hex(4)HexNAc(4)@T	1606.586693	1607.4737	H(102)C(62)N(4)O(44)	1606.586693	H(102)C(62)N(4)O(44)	O-linked glycosylation	307		0.5
+dHex(1)Hex(4)HexNAc(4)@S	1606.586693	1607.4737	H(102)C(62)N(4)O(44)	1606.586693	H(102)C(62)N(4)O(44)	O-linked glycosylation	307		0.5
+dHex(1)Hex(4)HexNAc(4)@N	1606.586693	1607.4737	H(102)C(62)N(4)O(44)	1606.586693	H(102)C(62)N(4)O(44)	N-linked glycosylation	307		0.5
+Pro+O(2)@H	129.042593	129.114	H(7)C(5)N(1)O(3)	0.0		Post-translational	2035		0.0
+dHex(1)Hex(5)HexNAc(4)@N	1768.639517	1769.6143	H(112)C(68)N(4)O(49)	1768.639517	H(112)C(68)N(4)O(49)	N-linked glycosylation	308		0.5
+Hex(3)HexNAc(4)@T	1298.475961	1299.1919	H(82)C(50)N(4)O(35)	1298.475961	H(82)C(50)N(4)O(35)	O-linked glycosylation	309		0.5
+Hex(3)HexNAc(4)@S	1298.475961	1299.1919	H(82)C(50)N(4)O(35)	1298.475961	H(82)C(50)N(4)O(35)	O-linked glycosylation	309		0.5
+Hex(3)HexNAc(4)@N	1298.475961	1299.1919	H(82)C(50)N(4)O(35)	1298.475961	H(82)C(50)N(4)O(35)	N-linked glycosylation	309		0.5
+Hex(4)HexNAc(4)@T	1460.528784	1461.3325	H(92)C(56)N(4)O(40)	1460.528784	H(92)C(56)N(4)O(40)	O-linked glycosylation	310		0.5
+Hex(4)HexNAc(4)@S	1460.528784	1461.3325	H(92)C(56)N(4)O(40)	1460.528784	H(92)C(56)N(4)O(40)	O-linked glycosylation	310		0.5
+Hex(4)HexNAc(4)@N	1460.528784	1461.3325	H(92)C(56)N(4)O(40)	1460.528784	H(92)C(56)N(4)O(40)	N-linked glycosylation	310		0.5
+Hex(5)HexNAc(4)@T	1622.581608	1623.4731	H(102)C(62)N(4)O(45)	1622.581608	H(102)C(62)N(4)O(45)	O-linked glycosylation	311		0.5
+Hex(5)HexNAc(4)@S	1622.581608	1623.4731	H(102)C(62)N(4)O(45)	1622.581608	H(102)C(62)N(4)O(45)	O-linked glycosylation	311		0.5
+Hex(5)HexNAc(4)@N	1622.581608	1623.4731	H(102)C(62)N(4)O(45)	1622.581608	H(102)C(62)N(4)O(45)	N-linked glycosylation	311		0.5
+Cysteinyl@C	119.004099	119.1423	H(5)C(3)N(1)O(2)S(1)	0.0		Multiple	312		0.0
+Lys-loss@K	-128.094963	-128.1723	H(-12)C(-6)N(-2)O(-1)	0.0		Artefact	313		0.0
+Lys-loss@K^Protein_C-term	-128.094963	-128.1723	H(-12)C(-6)N(-2)O(-1)	0.0		Post-translational	313		0.0
+Nmethylmaleimide@K	111.032028	111.0987	H(5)C(5)N(1)O(2)	0.0		Chemical derivative	314		0.0
+Nmethylmaleimide@C	111.032028	111.0987	H(5)C(5)N(1)O(2)	0.0		Chemical derivative	314		0.0
+CyDye-Cy3@C	672.298156	672.8335	H(44)C(37)N(4)O(6)S(1)	0.0		Chemical derivative	494		0.0
+DimethylpyrroleAdduct@K	78.04695	78.1118	H(6)C(6)	0.0		Chemical derivative	316		0.0
+Delta:H(2)C(5)@K	62.01565	62.0694	H(2)C(5)	0.0		Chemical derivative	318		0.0
+Delta:H(2)C(3)O(1)@K	54.010565	54.0474	H(2)C(3)O(1)	0.0		Chemical derivative	319		0.0
+Delta:H(2)C(3)O(1)@R	54.010565	54.0474	H(2)C(3)O(1)	0.0		Chemical derivative	319		0.0
+Nethylmaleimide+water@K	143.058243	143.1406	H(9)C(6)N(1)O(3)	0.0		Chemical derivative	320		0.0
+Nethylmaleimide+water@C	143.058243	143.1406	H(9)C(6)N(1)O(3)	0.0		Chemical derivative	320		0.0
+Methyl+Acetyl:2H(3)@K	59.045045	59.0817	H(1)2H(3)C(3)O(1)	0.0		Isotopic label	768		0.0
+Xlink:B10621@C	713.093079	713.5626	H(30)C(31)N(4)O(6)S(1)I(1)	0.0		Chemical derivative	323		0.0
+Xlink:DTBP[87]@Protein_N-term	87.01427	87.1435	H(5)C(3)N(1)S(1)	0.0		Chemical derivative	324		0.0
+Xlink:DTBP[87]@K	87.01427	87.1435	H(5)C(3)N(1)S(1)	0.0		Chemical derivative	324		0.0
+FP-Biotin@K	572.316129	572.7405	H(49)C(27)N(4)O(5)P(1)S(1)	0.0		Chemical derivative	325		0.0
+FP-Biotin@T	572.316129	572.7405	H(49)C(27)N(4)O(5)P(1)S(1)	0.0		Chemical derivative	325		0.0
+FP-Biotin@Y	572.316129	572.7405	H(49)C(27)N(4)O(5)P(1)S(1)	0.0		Chemical derivative	325		0.0
+FP-Biotin@S	572.316129	572.7405	H(49)C(27)N(4)O(5)P(1)S(1)	0.0		Chemical derivative	325		0.0
+Thiophos-S-S-biotin@Y	525.142894	525.6658	H(34)C(19)N(4)O(5)P(1)S(3)	525.142894	H(34)C(19)N(4)O(5)P(1)S(3)	Chemical derivative	332		0.5
+Thiophos-S-S-biotin@T	525.142894	525.6658	H(34)C(19)N(4)O(5)P(1)S(3)	525.142894	H(34)C(19)N(4)O(5)P(1)S(3)	Chemical derivative	332		0.5
+Thiophos-S-S-biotin@S	525.142894	525.6658	H(34)C(19)N(4)O(5)P(1)S(3)	525.142894	H(34)C(19)N(4)O(5)P(1)S(3)	Chemical derivative	332		0.5
+Can-FP-biotin@T	447.195679	447.5291	H(34)C(19)N(3)O(5)P(1)S(1)	0.0		Chemical derivative	333		0.0
+Can-FP-biotin@Y	447.195679	447.5291	H(34)C(19)N(3)O(5)P(1)S(1)	0.0		Chemical derivative	333		0.0
+Can-FP-biotin@S	447.195679	447.5291	H(34)C(19)N(3)O(5)P(1)S(1)	0.0		Chemical derivative	333		0.0
+HNE+Delta:H(2)@K	158.13068	158.238	H(18)C(9)O(2)	0.0		Chemical derivative	335		0.0
+HNE+Delta:H(2)@H	158.13068	158.238	H(18)C(9)O(2)	0.0		Chemical derivative	335		0.0
+HNE+Delta:H(2)@C	158.13068	158.238	H(18)C(9)O(2)	0.0		Chemical derivative	335		0.0
+Thrbiotinhydrazide@T	240.104482	240.3252	H(16)C(10)N(4)O(1)S(1)	0.0		Chemical derivative	361		0.0
+Methylamine@T	13.031634	13.0418	H(3)C(1)N(1)O(-1)	0.0		Artefact	337		0.0
+Methylamine@S	13.031634	13.0418	H(3)C(1)N(1)O(-1)	0.0		Artefact	337		0.0
+Diisopropylphosphate@K	164.060231	164.1394	H(13)C(6)O(3)P(1)	0.0		Chemical derivative	362		0.0
+Diisopropylphosphate@Y	164.060231	164.1394	H(13)C(6)O(3)P(1)	0.0		Chemical derivative	362		0.0
+Diisopropylphosphate@T	164.060231	164.1394	H(13)C(6)O(3)P(1)	0.0		Chemical derivative	362		0.0
+Diisopropylphosphate@S	164.060231	164.1394	H(13)C(6)O(3)P(1)	0.0		Chemical derivative	362		0.0
+Diisopropylphosphate@Any_N-term	164.060231	164.1394	H(13)C(6)O(3)P(1)	0.0		Chemical derivative	362		0.0
+Isopropylphospho@Y	122.013281	122.0596	H(7)C(3)O(3)P(1)	0.0		Chemical derivative	363		0.0
+Isopropylphospho@T	122.013281	122.0596	H(7)C(3)O(3)P(1)	0.0		Chemical derivative	363		0.0
+Isopropylphospho@S	122.013281	122.0596	H(7)C(3)O(3)P(1)	0.0		Chemical derivative	363		0.0
+ICPL:13C(6)@Any_N-term	111.041593	111.05	H(3)13C(6)N(1)O(1)	0.0		Isotopic label	364		0.0
+ICPL:13C(6)@Protein_N-term	111.041593	111.05	H(3)13C(6)N(1)O(1)	0.0		Isotopic label	364		0.0
+ICPL:13C(6)@K	111.041593	111.05	H(3)13C(6)N(1)O(1)	0.0		Isotopic label	364		0.0
+CarbamidomethylDTT@C	209.018035	209.2864	H(11)C(6)N(1)O(3)S(2)	0.0		Artefact	893		0.0
+ICPL@Protein_N-term	105.021464	105.0941	H(3)C(6)N(1)O(1)	0.0		Isotopic label	365		0.0
+ICPL@K	105.021464	105.0941	H(3)C(6)N(1)O(1)	0.0		Isotopic label	365		0.0
+ICPL@Any_N-term	105.021464	105.0941	H(3)C(6)N(1)O(1)	0.0		Isotopic label	365		0.0
+Deamidated:18O(1)@Q	2.988261	2.9845	H(-1)N(-1)18O(1)	0.0		Isotopic label	366		0.0
+Deamidated:18O(1)@N	2.988261	2.9845	H(-1)N(-1)18O(1)	0.0		Isotopic label	366		0.0
+Arg->Orn@R	-42.021798	-42.04	H(-2)C(-1)N(-2)	0.0		Artefact	372		0.0
+Cation:Cu[I]@Any_C-term	61.921774	62.5381	H(-1)Cu(1)	0.0		Artefact	531	O[Cu]	0.0
+Cation:Cu[I]@E	61.921774	62.5381	H(-1)Cu(1)	0.0		Artefact	531		0.0
+Cation:Cu[I]@D	61.921774	62.5381	H(-1)Cu(1)	0.0		Artefact	531		0.0
+Cation:Cu[I]@H	61.921774	62.5381	H(-1)Cu(1)	0.0		Artefact	531		0.0
+Dehydro@C	-1.007825	-1.0079	H(-1)	0.0		Multiple	374		0.0
+Diphthamide@H	142.110613	142.1989	H(14)C(7)N(2)O(1)	0.0		Post-translational	375		0.0
+Hydroxyfarnesyl@C	220.182715	220.3505	H(24)C(15)O(1)	0.0		Post-translational	376		0.0
+Diacylglycerol@C	576.511761	576.9334	H(68)C(37)O(4)	0.0		Post-translational	377		0.0
+Carboxyethyl@H	72.021129	72.0627	H(4)C(3)O(2)	0.0		Chemical derivative	378		0.0
+Carboxyethyl@K	72.021129	72.0627	H(4)C(3)O(2)	0.0		Post-translational	378		0.0
+Hypusine@K	87.068414	87.1204	H(9)C(4)N(1)O(1)	0.0		Post-translational	379		0.0
+Retinylidene@K	266.203451	266.4204	H(26)C(20)	0.0		Post-translational	380		0.0
+Lys->AminoadipicAcid@K	14.96328	14.9683	H(-3)N(-1)O(2)	0.0		Post-translational	381		0.0
+Cys->PyruvicAcid@C^Protein_N-term	-33.003705	-33.0961	H(-3)N(-1)O(1)S(-1)	0.0		Post-translational	382		0.0
+Ammonia-loss@C^Any_N-term	-17.026549	-17.0305	H(-3)N(-1)	0.0		Artefact	385		0.0
+Ammonia-loss@S^Protein_N-term	-17.026549	-17.0305	H(-3)N(-1)	0.0		Post-translational	385		0.0
+Ammonia-loss@T^Protein_N-term	-17.026549	-17.0305	H(-3)N(-1)	0.0		Post-translational	385		0.0
+Ammonia-loss@N	-17.026549	-17.0305	H(-3)N(-1)	0.0		Chemical derivative	385		0.0
+Phycocyanobilin@C	586.279135	586.678	H(38)C(33)N(4)O(6)	0.0		Post-translational	387		0.0
+Phycoerythrobilin@C	588.294785	588.6939	H(40)C(33)N(4)O(6)	0.0		Post-translational	388		0.0
+Phytochromobilin@C	584.263485	584.6621	H(36)C(33)N(4)O(6)	0.0		Post-translational	389		0.0
+Heme@H	616.177295	616.4873	H(32)C(34)N(4)O(4)Fe(1)	0.0		Post-translational	390		0.0
+Heme@C	616.177295	616.4873	H(32)C(34)N(4)O(4)Fe(1)	0.0		Post-translational	390		0.0
+Molybdopterin@C	521.884073	520.2668	H(11)C(10)N(5)O(8)P(1)S(2)Mo(1)	0.0		Post-translational	391		0.0
+Quinone@W	29.974179	29.9829	H(-2)O(2)	0.0		Post-translational	392		0.0
+Quinone@Y	29.974179	29.9829	H(-2)O(2)	0.0		Post-translational	392		0.0
+Glucosylgalactosyl@K	340.100562	340.2806	H(20)C(12)O(11)	340.100562	H(20)C(12)O(11)	Other glycosylation	393		0.5
+GPIanchor@Protein_C-term	123.00853	123.0477	H(6)C(2)N(1)O(3)P(1)	0.0		Post-translational	394		0.0
+PhosphoribosyldephosphoCoA@S	881.146904	881.6335	H(42)C(26)N(7)O(19)P(3)S(1)	0.0		Post-translational	395		0.0
+GlycerylPE@E	197.04531	197.1262	H(12)C(5)N(1)O(5)P(1)	0.0		Post-translational	396		0.0
+Triiodothyronine@Y	469.716159	469.785	H(1)C(6)O(1)I(3)	0.0		Post-translational	397		0.0
+Thyroxine@Y	595.612807	595.6815	C(6)O(1)I(4)	0.0		Post-translational	398		0.0
+Tyr->Dha@Y	-94.041865	-94.1112	H(-6)C(-6)O(-1)	0.0		Post-translational	400		0.0
+Didehydro@S	-2.01565	-2.0159	H(-2)	0.0		Post-translational	401		0.0
+Didehydro@Y	-2.01565	-2.0159	H(-2)	0.0		Post-translational	401		0.0
+Didehydro@T	-2.01565	-2.0159	H(-2)	0.0		Chemical derivative	401		0.0
+Didehydro@K^Any_C-term	-2.01565	-2.0159	H(-2)	0.0		Artefact	401		0.0
+Cys->Oxoalanine@C	-17.992806	-18.0815	H(-2)O(1)S(-1)	0.0		Post-translational	402		0.0
+Ser->LacticAcid@S^Protein_N-term	-15.010899	-15.0146	H(-1)N(-1)	0.0		Post-translational	403		0.0
+GluGlu@E	258.085186	258.228	H(14)C(10)N(2)O(6)	0.0		Post-translational	451		0.0
+GluGlu@Protein_C-term	258.085186	258.228	H(14)C(10)N(2)O(6)	0.0		Post-translational	451		0.0
+Phosphoadenosine@S	329.05252	329.2059	H(12)C(10)N(5)O(6)P(1)	347.063085	H(14)C(10)N(5)O(7)P(1)	Post-translational	405		0.5
+Phosphoadenosine@H	329.05252	329.2059	H(12)C(10)N(5)O(6)P(1)	0.0		Post-translational	405		0.0
+Phosphoadenosine@T	329.05252	329.2059	H(12)C(10)N(5)O(6)P(1)	347.063085	H(14)C(10)N(5)O(7)P(1)	Post-translational	405		0.5
+Phosphoadenosine@Y	329.05252	329.2059	H(12)C(10)N(5)O(6)P(1)	135.054495	H(5)C(5)N(5)	Post-translational	405		0.5
+Phosphoadenosine@K	329.05252	329.2059	H(12)C(10)N(5)O(6)P(1)	0.0		Post-translational	405		0.0
+Glu@E	129.042593	129.114	H(7)C(5)N(1)O(3)	0.0		Post-translational	450		0.0
+Glu@Protein_C-term	129.042593	129.114	H(7)C(5)N(1)O(3)	0.0		Chemical derivative	450		0.0
+Hydroxycinnamyl@C	146.036779	146.1427	H(6)C(9)O(2)	0.0		Post-translational	407		0.0
+Glycosyl@P	148.037173	148.114	H(8)C(5)O(5)	0.0		Other glycosylation	408		0.0
+FMNH@H	454.088965	454.3279	H(19)C(17)N(4)O(9)P(1)	0.0		Post-translational	409		0.0
+FMNH@C	454.088965	454.3279	H(19)C(17)N(4)O(9)P(1)	0.0		Post-translational	409		0.0
+Archaeol@C	634.662782	635.1417	H(86)C(43)O(2)	0.0		Post-translational	410		0.0
+Phenylisocyanate@Any_N-term	119.037114	119.1207	H(5)C(7)N(1)O(1)	0.0		Chemical derivative	411		0.0
+Phenylisocyanate:2H(5)@Any_N-term	124.068498	124.1515	2H(5)C(7)N(1)O(1)	0.0		Chemical derivative	412		0.0
+Phosphoguanosine@H	345.047435	345.2053	H(12)C(10)N(5)O(7)P(1)	0.0		Post-translational	413		0.0
+Phosphoguanosine@K	345.047435	345.2053	H(12)C(10)N(5)O(7)P(1)	0.0		Post-translational	413		0.0
+Hydroxymethyl@N	30.010565	30.026	H(2)C(1)O(1)	0.0		Post-translational	414		0.0
+MolybdopterinGD+Delta:S(-1)Se(1)@C	1620.930224	1618.9096	H(47)C(40)N(20)O(26)P(4)S(3)Se(1)Mo(1)	0.0		Post-translational	415		0.0
+Dipyrrolylmethanemethyl@C	418.137616	418.3973	H(22)C(20)N(2)O(8)	0.0		Post-translational	416		0.0
+PhosphoUridine@H	306.025302	306.166	H(11)C(9)N(2)O(8)P(1)	0.0		Post-translational	417		0.0
+PhosphoUridine@Y	306.025302	306.166	H(11)C(9)N(2)O(8)P(1)	0.0		Post-translational	417		0.0
+Glycerophospho@S	154.00311	154.0584	H(7)C(3)O(5)P(1)	0.0		Post-translational	419		0.0
+Carboxy->Thiocarboxy@G^Protein_C-term	15.977156	16.0656	O(-1)S(1)	0.0		Post-translational	420		0.0
+Sulfide@D	31.972071	32.065	S(1)	0.0		Post-translational	421		0.0
+Sulfide@C	31.972071	32.065	S(1)	0.0		Post-translational	421		0.0
+Sulfide@W	31.972071	32.065	S(1)	0.0		Chemical derivative	421		0.0
+PyruvicAcidIminyl@K	70.005479	70.0468	H(2)C(3)O(2)	0.0		Post-translational	422		0.0
+PyruvicAcidIminyl@V^Protein_N-term	70.005479	70.0468	H(2)C(3)O(2)	0.0		Post-translational	422		0.0
+PyruvicAcidIminyl@C^Protein_N-term	70.005479	70.0468	H(2)C(3)O(2)	0.0		Post-translational	422		0.0
+Delta:Se(1)@C	79.91652	78.96	Se(1)	0.0		Post-translational	423		0.0
+MolybdopterinGD@D	1572.985775	1572.0146	H(47)C(40)N(20)O(26)P(4)S(4)Mo(1)	0.0		Post-translational	424		0.0
+MolybdopterinGD@C	1572.985775	1572.0146	H(47)C(40)N(20)O(26)P(4)S(4)Mo(1)	0.0		Post-translational	424		0.0
+MolybdopterinGD@U	1572.985775	1572.0146	H(47)C(40)N(20)O(26)P(4)S(4)Mo(1)	0.0		Post-translational	424		0.0
+Dioxidation@U	31.989829	31.9988	O(2)	0.0		Multiple	425		0.0
+Dioxidation@C	31.989829	31.9988	O(2)	0.0		Post-translational	425		0.0
+Dioxidation@W	31.989829	31.9988	O(2)	0.0		Chemical derivative	425		0.0
+Dioxidation@Y	31.989829	31.9988	O(2)	0.0		Post-translational	425		0.0
+Dioxidation@F	31.989829	31.9988	O(2)	0.0		Chemical derivative	425		0.0
+Dioxidation@M	31.989829	31.9988	O(2)	0.0		Post-translational	425		0.0
+Dioxidation@K	31.989829	31.9988	O(2)	0.0		Post-translational	425		0.0
+Dioxidation@R	31.989829	31.9988	O(2)	0.0		Post-translational	425		0.0
+Dioxidation@P	31.989829	31.9988	O(2)	0.0		Post-translational	425		0.0
+Dioxidation@E	31.989829	31.9988	O(2)	0.0		Chemical derivative	425		0.0
+Dioxidation@I	31.989829	31.9988	O(2)	0.0		Chemical derivative	425		0.0
+Dioxidation@L	31.989829	31.9988	O(2)	0.0		Chemical derivative	425		0.0
+Dioxidation@V	31.989829	31.9988	O(2)	0.0		Chemical derivative	425		0.0
+Octanoyl@T	126.104465	126.1962	H(14)C(8)O(1)	0.0		Post-translational	426		0.0
+Octanoyl@S	126.104465	126.1962	H(14)C(8)O(1)	0.0		Post-translational	426		0.0
+Octanoyl@C	126.104465	126.1962	H(14)C(8)O(1)	0.0		Post-translational	426		0.0
+PhosphoHexNAc@T	283.045704	283.1724	H(14)C(8)N(1)O(8)P(1)	283.045704	H(14)C(8)N(1)O(8)P(1)	O-linked glycosylation	428		0.5
+PhosphoHexNAc@S	283.045704	283.1724	H(14)C(8)N(1)O(8)P(1)	283.045704	H(14)C(8)N(1)O(8)P(1)	O-linked glycosylation	428		0.5
+PhosphoHex@T	242.019154	242.1205	H(11)C(6)O(8)P(1)	242.019154	H(11)C(6)O(8)P(1)	O-linked glycosylation	429		0.5
+PhosphoHex@S	242.019154	242.1205	H(11)C(6)O(8)P(1)	242.019154	H(11)C(6)O(8)P(1)	O-linked glycosylation	429		0.5
+Palmitoleyl@C	236.214016	236.3929	H(28)C(16)O(1)	0.0		Post-translational	431		0.0
+Palmitoleyl@S	236.214016	236.3929	H(28)C(16)O(1)	0.0		Post-translational	431		0.0
+Palmitoleyl@T	236.214016	236.3929	H(28)C(16)O(1)	0.0		Pre-translational	431		0.0
+Cholesterol@Protein_C-term	368.344302	368.6383	H(44)C(27)	0.0		Post-translational	432		0.0
+Didehydroretinylidene@K	264.187801	264.4046	H(24)C(20)	0.0		Post-translational	433		0.0
+CHDH@D	294.183109	294.3859	H(26)C(17)O(4)	0.0		Post-translational	434		0.0
+Methylpyrroline@K	109.052764	109.1259	H(7)C(6)N(1)O(1)	0.0		Post-translational	435		0.0
+Hydroxyheme@E	614.161645	614.4714	H(30)C(34)N(4)O(4)Fe(1)	0.0		Post-translational	436		0.0
+MicrocinC7@Protein_C-term	386.110369	386.3003	H(19)C(13)N(6)O(6)P(1)	0.0		Post-translational	437		0.0
+Cyano@C	24.995249	25.0095	H(-1)C(1)N(1)	0.0		Post-translational	438		0.0
+Diironsubcluster@C	342.786916	342.876	H(-1)C(5)N(2)O(5)S(2)Fe(2)	0.0		Post-translational	439		0.0
+Amidino@C	42.021798	42.04	H(2)C(1)N(2)	0.0		Post-translational	440		0.0
+FMN@S	438.094051	438.3285	H(19)C(17)N(4)O(8)P(1)	0.0		Post-translational	442		0.0
+FMN@T	438.094051	438.3285	H(19)C(17)N(4)O(8)P(1)	0.0		Post-translational	442		0.0
+FMNC@C	456.104615	456.3438	H(21)C(17)N(4)O(9)P(1)	0.0		Post-translational	443		0.0
+CuSMo@C	922.834855	922.067	H(24)C(19)N(8)O(15)P(2)S(3)Mo(1)Cu(1)	0.0		Post-translational	444		0.0
+Hydroxytrimethyl@K	59.04969	59.0871	H(7)C(3)O(1)	0.0		Post-translational	445		0.0
+Deoxy@T	-15.994915	-15.9994	O(-1)	0.0		Chemical derivative	447		0.0
+Deoxy@D	-15.994915	-15.9994	O(-1)	0.0		Post-translational	447		0.0
+Deoxy@S	-15.994915	-15.9994	O(-1)	0.0		Chemical derivative	447		0.0
+Microcin@Protein_C-term	831.197041	831.6871	H(37)C(36)N(3)O(20)	0.0		Post-translational	448		0.0
+Decanoyl@T	154.135765	154.2493	H(18)C(10)O(1)	0.0		Post-translational	449		0.0
+Decanoyl@S	154.135765	154.2493	H(18)C(10)O(1)	0.0		Post-translational	449		0.0
+GluGluGlu@Protein_C-term	387.127779	387.3419	H(21)C(15)N(3)O(9)	0.0		Post-translational	452		0.0
+GluGluGlu@E	387.127779	387.3419	H(21)C(15)N(3)O(9)	0.0		Post-translational	452		0.0
+GluGluGluGlu@Protein_C-term	516.170373	516.4559	H(28)C(20)N(4)O(12)	0.0		Post-translational	453		0.0
+GluGluGluGlu@E	516.170373	516.4559	H(28)C(20)N(4)O(12)	0.0		Post-translational	453		0.0
+HexN@W	161.068808	161.1558	H(11)C(6)N(1)O(4)	0.0		Other glycosylation	454		0.0
+HexN@T	161.068808	161.1558	H(11)C(6)N(1)O(4)	161.068808	H(11)C(6)N(1)O(4)	O-linked glycosylation	454		0.5
+HexN@S	161.068808	161.1558	H(11)C(6)N(1)O(4)	161.068808	H(11)C(6)N(1)O(4)	O-linked glycosylation	454		0.5
+HexN@N	161.068808	161.1558	H(11)C(6)N(1)O(4)	161.068808	H(11)C(6)N(1)O(4)	N-linked glycosylation	454		0.5
+HexN@K	161.068808	161.1558	H(11)C(6)N(1)O(4)	0.0		Synth. pep. protect. gp.	454		0.0
+Xlink:DMP[154]@Protein_N-term	154.110613	154.2096	H(14)C(8)N(2)O(1)	0.0		Chemical derivative	455		0.0
+Xlink:DMP[154]@K	154.110613	154.2096	H(14)C(8)N(2)O(1)	0.0		Chemical derivative	455		0.0
+NDA@Any_N-term	175.042199	175.1855	H(5)C(13)N(1)	0.0		Chemical derivative	457		0.0
+NDA@K	175.042199	175.1855	H(5)C(13)N(1)	0.0		Chemical derivative	457		0.0
+SPITC:13C(6)@Any_N-term	220.991213	221.2054	H(5)C(1)13C(6)N(1)O(3)S(2)	0.0		Chemical derivative	464		0.0
+SPITC:13C(6)@K	220.991213	221.2054	H(5)C(1)13C(6)N(1)O(3)S(2)	0.0		Chemical derivative	464		0.0
+TMAB:2H(9)@Any_N-term	137.16403	137.2476	H(5)2H(9)C(7)N(1)O(1)	68.12999	2H(9)C(3)N(1)	Isotopic label	477		0.5
+TMAB:2H(9)@K	137.16403	137.2476	H(5)2H(9)C(7)N(1)O(1)	68.12999	2H(9)C(3)N(1)	Isotopic label	477		0.5
+TMAB@Any_N-term	128.107539	128.1922	H(14)C(7)N(1)O(1)	59.073499	H(9)C(3)N(1)	Isotopic label	476		0.5
+TMAB@K	128.107539	128.1922	H(14)C(7)N(1)O(1)	59.073499	H(9)C(3)N(1)	Isotopic label	476		0.5
+FTC@S	421.073241	421.4259	H(15)C(21)N(3)O(5)S(1)	0.0		Chemical derivative	478		0.0
+FTC@R	421.073241	421.4259	H(15)C(21)N(3)O(5)S(1)	0.0		Chemical derivative	478		0.0
+FTC@P	421.073241	421.4259	H(15)C(21)N(3)O(5)S(1)	0.0		Chemical derivative	478		0.0
+FTC@K	421.073241	421.4259	H(15)C(21)N(3)O(5)S(1)	0.0		Chemical derivative	478		0.0
+FTC@C	421.073241	421.4259	H(15)C(21)N(3)O(5)S(1)	0.0		Chemical derivative	478		0.0
+AEC-MAEC@T	59.019355	59.1334	H(5)C(2)N(1)O(-1)S(1)	0.0		Chemical derivative	472		0.0
+AEC-MAEC@S	59.019355	59.1334	H(5)C(2)N(1)O(-1)S(1)	0.0		Chemical derivative	472		0.0
+BADGE@C	340.167459	340.4129	H(24)C(21)O(4)	0.0		Non-standard residue	493		0.0
+Label:2H(4)@A	4.025107	4.0246	H(-4)2H(4)	0.0		Isotopic label	481		0.0
+Label:2H(4)@Y	4.025107	4.0246	H(-4)2H(4)	0.0		Isotopic label	481		0.0
+Label:2H(4)@F	4.025107	4.0246	H(-4)2H(4)	0.0		Isotopic label	481		0.0
+Label:2H(4)@K	4.025107	4.0246	H(-4)2H(4)	0.0		Isotopic label	481		0.0
+Label:2H(4)@U	4.025107	4.0246	H(-4)2H(4)	0.0		Isotopic label	481		0.0
+Hep@T	192.063388	192.1666	H(12)C(7)O(6)	192.063388	H(12)C(7)O(6)	O-linked glycosylation	490		0.5
+Hep@S	192.063388	192.1666	H(12)C(7)O(6)	192.063388	H(12)C(7)O(6)	O-linked glycosylation	490		0.5
+Hep@R	192.063388	192.1666	H(12)C(7)O(6)	0.0		N-linked glycosylation	490		0.0
+Hep@Q	192.063388	192.1666	H(12)C(7)O(6)	0.0		Other glycosylation	490		0.0
+Hep@N	192.063388	192.1666	H(12)C(7)O(6)	192.063388	H(12)C(7)O(6)	N-linked glycosylation	490		0.5
+Hep@K	192.063388	192.1666	H(12)C(7)O(6)	0.0		Other glycosylation	490		0.0
+CyDye-Cy5@C	684.298156	684.8442	H(44)C(38)N(4)O(6)S(1)	0.0		Chemical derivative	495		0.0
+DHP@C	118.065674	118.1558	H(8)C(8)N(1)	0.0		Chemical derivative	488		0.0
+BHTOH@H	234.16198	234.334	H(22)C(15)O(2)	0.0		Other	498		0.0
+BHTOH@C	234.16198	234.334	H(22)C(15)O(2)	0.0		Other	498		0.0
+BHTOH@K	234.16198	234.334	H(22)C(15)O(2)	0.0		Other	498		0.0
+IGBP:13C(2)@C	298.022748	299.1331	H(13)C(10)13C(2)N(2)O(2)Br(1)	0.0		Isotopic label	499		0.0
+Nmethylmaleimide+water@C	129.042593	129.114	H(7)C(5)N(1)O(3)	0.0		Chemical derivative	500		0.0
+PyMIC@Any_N-term	134.048013	134.1353	H(6)C(7)N(2)O(1)	0.0		Chemical derivative	501		0.0
+LG-lactam-K@Protein_N-term	332.19876	332.4339	H(28)C(20)O(4)	0.0		Post-translational	503		0.0
+LG-lactam-K@K	332.19876	332.4339	H(28)C(20)O(4)	0.0		Post-translational	503		0.0
+BisANS@K	594.091928	594.6569	H(22)C(32)N(2)O(6)S(2)	0.0		Chemical derivative	519		0.0
+Piperidine@Any_N-term	68.0626	68.117	H(8)C(5)	0.0		Chemical derivative	520		0.0
+Piperidine@K	68.0626	68.117	H(8)C(5)	0.0		Chemical derivative	520		0.0
+Diethyl@Any_N-term	56.0626	56.1063	H(8)C(4)	0.0		Chemical derivative	518		0.0
+Diethyl@K	56.0626	56.1063	H(8)C(4)	0.0		Chemical derivative	518		0.0
+LG-Hlactam-K@Protein_N-term	348.193674	348.4333	H(28)C(20)O(5)	0.0		Post-translational	504		0.0
+LG-Hlactam-K@K	348.193674	348.4333	H(28)C(20)O(5)	0.0		Post-translational	504		0.0
+Dimethyl:2H(4)13C(2)@Protein_N-term	34.063117	34.0631	2H(4)13C(2)	0.0		Isotopic label	510	[13C]([2H])([2H])([1H])	0.0
+Dimethyl:2H(4)13C(2)@R	34.063117	34.0631	2H(4)13C(2)	0.0		Isotopic label	510		0.0
+Dimethyl:2H(4)13C(2)@K	34.063117	34.0631	2H(4)13C(2)	0.0		Isotopic label	510		0.0
+Dimethyl:2H(4)13C(2)@Any_N-term	34.063117	34.0631	2H(4)13C(2)	0.0		Isotopic label	510	[13C]([2H])([2H])([1H])	0.0
+C8-QAT@Any_N-term	227.224915	227.3862	H(29)C(14)N(1)O(1)	0.0		Chemical derivative	513		0.0
+C8-QAT@K	227.224915	227.3862	H(29)C(14)N(1)O(1)	0.0		Chemical derivative	513		0.0
+Hex(2)@R	324.105647	324.2812	H(20)C(12)O(10)	0.0		Other glycosylation	512		0.0
+Hex(2)@K	324.105647	324.2812	H(20)C(12)O(10)	0.0		Other glycosylation	512		0.0
+Hex(2)@S	324.105647	324.2812	H(20)C(12)O(10)	324.105647	H(20)C(12)O(10)	O-linked glycosylation	512		0.5
+Hex(2)@T	324.105647	324.2812	H(20)C(12)O(10)	324.105647	H(20)C(12)O(10)	O-linked glycosylation	512		0.5
+LG-lactam-R@R	290.176961	290.3939	H(26)C(19)N(-2)O(4)	0.0		Post-translational	505		0.0
+Withaferin@C	470.266839	470.5977	H(38)C(28)O(6)	0.0		Chemical derivative	1036		0.0
+Biotin:Thermo-88317@S	443.291294	443.5603	H(42)C(22)N(3)O(4)P(1)	0.0		Chemical derivative	1037		0.0
+Biotin:Thermo-88317@Y	443.291294	443.5603	H(42)C(22)N(3)O(4)P(1)	0.0		Chemical derivative	1037		0.0
+CLIP_TRAQ_2@Any_N-term	141.098318	141.1756	H(12)C(6)13C(1)N(2)O(1)	0.0		Isotopic label	525		0.0
+CLIP_TRAQ_2@K	141.098318	141.1756	H(12)C(6)13C(1)N(2)O(1)	0.0		Isotopic label	525		0.0
+CLIP_TRAQ_2@Y	141.098318	141.1756	H(12)C(6)13C(1)N(2)O(1)	0.0		Isotopic label	525		0.0
+LG-Hlactam-R@R	306.171876	306.3933	H(26)C(19)N(-2)O(5)	0.0		Post-translational	506		0.0
+Maleimide-PEO2-Biotin@C	525.225719	525.6183	H(35)C(23)N(5)O(7)S(1)	0.0		Chemical derivative	522		0.0
+Sulfo-NHS-LC-LC-Biotin@Any_N-term	452.245726	452.6106	H(36)C(22)N(4)O(4)S(1)	0.0		Chemical derivative	523		0.0
+Sulfo-NHS-LC-LC-Biotin@K	452.245726	452.6106	H(36)C(22)N(4)O(4)S(1)	0.0		Chemical derivative	523		0.0
+FNEM@C	427.069202	427.3625	H(13)C(24)N(1)O(7)	0.0		Chemical derivative	515		0.0
+PropylNAGthiazoline@C	232.064354	232.2768	H(14)C(9)N(1)O(4)S(1)	0.0		Chemical derivative	514		0.0
+Dethiomethyl@M	-48.003371	-48.1075	H(-4)C(-1)S(-1)	0.0		Artefact	526		0.0
+iTRAQ4plex114@Y	144.105918	144.168	H(12)C(5)13C(2)N(2)18O(1)	0.0		Isotopic label	532		0.0
+iTRAQ4plex114@Any_N-term	144.105918	144.168	H(12)C(5)13C(2)N(2)18O(1)	0.0		Isotopic label	532		0.0
+iTRAQ4plex114@K	144.105918	144.168	H(12)C(5)13C(2)N(2)18O(1)	0.0		Isotopic label	532		0.0
+iTRAQ4plex114@C	144.105918	144.168	H(12)C(5)13C(2)N(2)18O(1)	0.0		Isotopic label	532		0.0
+iTRAQ4plex115@Y	144.099599	144.1688	H(12)C(6)13C(1)N(1)15N(1)18O(1)	0.0		Isotopic label	533		0.0
+iTRAQ4plex115@Any_N-term	144.099599	144.1688	H(12)C(6)13C(1)N(1)15N(1)18O(1)	0.0		Isotopic label	533		0.0
+iTRAQ4plex115@K	144.099599	144.1688	H(12)C(6)13C(1)N(1)15N(1)18O(1)	0.0		Isotopic label	533		0.0
+iTRAQ4plex115@C	144.099599	144.1688	H(12)C(6)13C(1)N(1)15N(1)18O(1)	0.0		Isotopic label	533		0.0
+Dibromo@Y	155.821022	157.7921	H(-2)Br(2)	0.0		Chemical derivative	534		0.0
+LRGG@K	383.228103	383.446	H(29)C(16)N(7)O(4)	0.0		Chemical derivative	535		0.0
+CLIP_TRAQ_3@Y	271.148736	271.2976	H(20)C(11)13C(1)N(3)O(4)	0.0		Isotopic label	536		0.0
+CLIP_TRAQ_3@Any_N-term	271.148736	271.2976	H(20)C(11)13C(1)N(3)O(4)	0.0		Isotopic label	536		0.0
+CLIP_TRAQ_3@K	271.148736	271.2976	H(20)C(11)13C(1)N(3)O(4)	0.0		Isotopic label	536		0.0
+CLIP_TRAQ_4@Any_N-term	244.101452	244.2292	H(15)C(9)13C(1)N(2)O(5)	0.0		Isotopic label	537		0.0
+CLIP_TRAQ_4@K	244.101452	244.2292	H(15)C(9)13C(1)N(2)O(5)	0.0		Isotopic label	537		0.0
+CLIP_TRAQ_4@Y	244.101452	244.2292	H(15)C(9)13C(1)N(2)O(5)	0.0		Isotopic label	537		0.0
+Biotin:Cayman-10141@C	626.386577	626.8927	H(54)C(35)N(4)O(4)S(1)	0.0		Other	538		0.0
+Biotin:Cayman-10013@C	660.428442	660.9504	H(60)C(36)N(4)O(5)S(1)	0.0		Other	539		0.0
+Ala->Ser@A	15.994915	15.9994	H(0)C(0)N(0)O(1)S(0)	0.0		AA substitution	540		0.0
+Ala->Thr@A	30.010565	30.026	H(2)C(1)N(0)O(1)S(0)	0.0		AA substitution	541		0.0
+Ala->Asp@A	43.989829	44.0095	H(0)C(1)N(0)O(2)S(0)	0.0		AA substitution	542		0.0
+Ala->Pro@A	26.01565	26.0373	H(2)C(2)N(0)O(0)S(0)	0.0		AA substitution	543		0.0
+Ala->Gly@A	-14.01565	-14.0266	H(-2)C(-1)N(0)O(0)S(0)	0.0		AA substitution	544		0.0
+Ala->Glu@A	58.005479	58.0361	H(2)C(2)N(0)O(2)S(0)	0.0		AA substitution	545		0.0
+Ala->Val@A	28.0313	28.0532	H(4)C(2)N(0)O(0)S(0)	0.0		AA substitution	546		0.0
+Cys->Phe@C	44.059229	44.031	H(4)C(6)N(0)O(0)S(-1)	0.0		AA substitution	547		0.0
+Cys->Ser@C	-15.977156	-16.0656	H(0)C(0)N(0)O(1)S(-1)	0.0		AA substitution	548		0.0
+Cys->Trp@C	83.070128	83.067	H(5)C(8)N(1)O(0)S(-1)	0.0		AA substitution	549		0.0
+Cys->Tyr@C	60.054144	60.0304	H(4)C(6)N(0)O(1)S(-1)	0.0		AA substitution	550		0.0
+Cys->Arg@C	53.091927	53.0428	H(7)C(3)N(3)O(0)S(-1)	0.0		AA substitution	551		0.0
+Cys->Gly@C	-45.987721	-46.0916	H(-2)C(-1)N(0)O(0)S(-1)	0.0		AA substitution	552		0.0
+Asp->Ala@D	-43.989829	-44.0095	H(0)C(-1)N(0)O(-2)S(0)	0.0		AA substitution	553		0.0
+Asp->His@D	22.031969	22.0519	H(2)C(2)N(2)O(-2)S(0)	0.0		AA substitution	554		0.0
+Asp->Asn@D	-0.984016	-0.9848	H(1)C(0)N(1)O(-1)S(0)	0.0		AA substitution	555		0.0
+Asp->Gly@D	-58.005479	-58.0361	H(-2)C(-2)N(0)O(-2)S(0)	0.0		AA substitution	556		0.0
+Asp->Tyr@D	48.036386	48.0859	H(4)C(5)N(0)O(-1)S(0)	0.0		AA substitution	557		0.0
+Asp->Glu@D	14.01565	14.0266	H(2)C(1)N(0)O(0)S(0)	0.0		AA substitution	558		0.0
+Asp->Val@D	-15.958529	-15.9563	H(4)C(1)N(0)O(-2)S(0)	0.0		AA substitution	559		0.0
+Glu->Ala@E	-58.005479	-58.0361	H(-2)C(-2)N(0)O(-2)S(0)	0.0		AA substitution	560		0.0
+Glu->Gln@E	-0.984016	-0.9848	H(1)C(0)N(1)O(-1)S(0)	0.0		AA substitution	561		0.0
+Glu->Asp@E	-14.01565	-14.0266	H(-2)C(-1)N(0)O(0)S(0)	0.0		AA substitution	562		0.0
+Glu->Lys@E	-0.94763	-0.9417	H(5)C(1)N(1)O(-2)S(0)	0.0		AA substitution	563		0.0
+Glu->Gly@E	-72.021129	-72.0627	H(-4)C(-3)N(0)O(-2)S(0)	0.0		AA substitution	564		0.0
+Glu->Val@E	-29.974179	-29.9829	H(2)C(0)N(0)O(-2)S(0)	0.0		AA substitution	565		0.0
+Phe->Ser@F	-60.036386	-60.0966	H(-4)C(-6)N(0)O(1)S(0)	0.0		AA substitution	566		0.0
+Phe->Cys@F	-44.059229	-44.031	H(-4)C(-6)N(0)O(0)S(1)	0.0		AA substitution	567		0.0
+Phe->Xle@F	-33.98435	-34.0162	H(2)C(-3)	0.0		AA substitution	568		0.0
+Phe->Tyr@F	15.994915	15.9994	H(0)C(0)N(0)O(1)S(0)	0.0		AA substitution	569		0.0
+Phe->Val@F	-48.0	-48.0428	H(0)C(-4)N(0)O(0)S(0)	0.0		AA substitution	570		0.0
+Gly->Ala@G	14.01565	14.0266	H(2)C(1)N(0)O(0)S(0)	0.0		AA substitution	571		0.0
+Gly->Ser@G	30.010565	30.026	H(2)C(1)N(0)O(1)S(0)	0.0		AA substitution	572		0.0
+Gly->Trp@G	129.057849	129.1586	H(7)C(9)N(1)O(0)S(0)	0.0		AA substitution	573		0.0
+Gly->Glu@G	72.021129	72.0627	H(4)C(3)N(0)O(2)S(0)	0.0		AA substitution	574		0.0
+Gly->Val@G	42.04695	42.0797	H(6)C(3)N(0)O(0)S(0)	0.0		AA substitution	575		0.0
+Gly->Asp@G	58.005479	58.0361	H(2)C(2)N(0)O(2)S(0)	0.0		AA substitution	576		0.0
+Gly->Cys@G	45.987721	46.0916	H(2)C(1)N(0)O(0)S(1)	0.0		AA substitution	577		0.0
+Gly->Arg@G	99.079647	99.1344	H(9)C(4)N(3)O(0)S(0)	0.0		AA substitution	578		0.0
+dNIC@Any_N-term	109.048119	109.1205	H(1)2H(3)C(6)N(1)O(1)	0.0		Isotopic label	698		0.0
+dNIC@K	109.048119	109.1205	H(1)2H(3)C(6)N(1)O(1)	0.0		Isotopic label	698		0.0
+His->Pro@H	-40.006148	-40.0241	H(0)C(-1)N(-2)O(0)S(0)	0.0		AA substitution	580		0.0
+His->Tyr@H	26.004417	26.034	H(2)C(3)N(-2)O(1)S(0)	0.0		AA substitution	581		0.0
+His->Gln@H	-9.000334	-9.0101	H(1)C(-1)N(-1)O(1)S(0)	0.0		AA substitution	582		0.0
+NIC@Any_N-term	105.021464	105.0941	H(3)C(6)N(1)O(1)	0.0		Isotopic label	697		0.0
+NIC@K	105.021464	105.0941	H(3)C(6)N(1)O(1)	0.0		Isotopic label	697		0.0
+His->Arg@H	19.042199	19.0464	H(5)C(0)N(1)O(0)S(0)	0.0		AA substitution	584		0.0
+His->Xle@H	-23.974848	-23.9816	H(4)N(-2)	0.0		AA substitution	585		0.0
+Xle->Ala@L	-42.04695	-42.0797	H(-6)C(-3)N(0)O(0)S(0)	0.0		AA substitution	1125		0.0
+Xle->Ala@I	-42.04695	-42.0797	H(-6)C(-3)N(0)O(0)S(0)	0.0		AA substitution	1125		0.0
+Xle->Thr@L	-12.036386	-12.0538	H(-4)C(-2)O(1)	0.0		AA substitution	588		0.0
+Xle->Thr@I	-12.036386	-12.0538	H(-4)C(-2)O(1)	0.0		AA substitution	588		0.0
+Xle->Asn@L	0.958863	0.945	H(-5)C(-2)N(1)O(1)	0.0		AA substitution	589		0.0
+Xle->Asn@I	0.958863	0.945	H(-5)C(-2)N(1)O(1)	0.0		AA substitution	589		0.0
+Xle->Lys@L	15.010899	15.0146	H(1)N(1)	0.0		AA substitution	590		0.0
+Xle->Lys@I	15.010899	15.0146	H(1)N(1)	0.0		AA substitution	590		0.0
+Lys->Thr@K	-27.047285	-27.0684	H(-5)C(-2)N(-1)O(1)S(0)	0.0		AA substitution	594		0.0
+Lys->Asn@K	-14.052036	-14.0696	H(-6)C(-2)N(0)O(1)S(0)	0.0		AA substitution	595		0.0
+Lys->Glu@K	0.94763	0.9417	H(-5)C(-1)N(-1)O(2)S(0)	0.0		AA substitution	596		0.0
+Lys->Gln@K	-0.036386	-0.0431	H(-4)C(-1)N(0)O(1)S(0)	0.0		AA substitution	597		0.0
+Lys->Met@K	2.945522	3.0238	H(-3)C(-1)N(-1)O(0)S(1)	0.0		AA substitution	598		0.0
+Lys->Arg@K	28.006148	28.0134	H(0)C(0)N(2)O(0)S(0)	0.0		AA substitution	599		0.0
+Lys->Xle@K	-15.010899	-15.0146	H(-1)N(-1)	0.0		AA substitution	600		0.0
+Xle->Ser@I	-26.052036	-26.0803	H(-6)C(-3)O(1)	0.0		AA substitution	601		0.0
+Xle->Ser@L	-26.052036	-26.0803	H(-6)C(-3)O(1)	0.0		AA substitution	601		0.0
+Xle->Phe@I	33.98435	34.0162	H(-2)C(3)	0.0		AA substitution	602		0.0
+Xle->Phe@L	33.98435	34.0162	H(-2)C(3)	0.0		AA substitution	602		0.0
+Xle->Trp@I	72.995249	73.0523	H(-1)C(5)N(1)	0.0		AA substitution	603		0.0
+Xle->Trp@L	72.995249	73.0523	H(-1)C(5)N(1)	0.0		AA substitution	603		0.0
+Xle->Pro@I	-16.0313	-16.0425	H(-4)C(-1)	0.0		AA substitution	604		0.0
+Xle->Pro@L	-16.0313	-16.0425	H(-4)C(-1)	0.0		AA substitution	604		0.0
+Xle->Val@I	-14.01565	-14.0266	H(-2)C(-1)	0.0		AA substitution	605		0.0
+Xle->Val@L	-14.01565	-14.0266	H(-2)C(-1)	0.0		AA substitution	605		0.0
+Xle->His@I	23.974848	23.9816	H(-4)N(2)	0.0		AA substitution	606		0.0
+Xle->His@L	23.974848	23.9816	H(-4)N(2)	0.0		AA substitution	606		0.0
+Xle->Gln@I	14.974514	14.9716	H(-3)C(-1)N(1)O(1)	0.0		AA substitution	607		0.0
+Xle->Gln@L	14.974514	14.9716	H(-3)C(-1)N(1)O(1)	0.0		AA substitution	607		0.0
+Xle->Met@I	17.956421	18.0384	H(-2)C(-1)S(1)	0.0		AA substitution	608		0.0
+Xle->Met@L	17.956421	18.0384	H(-2)C(-1)S(1)	0.0		AA substitution	608		0.0
+Xle->Arg@I	43.017047	43.028	H(1)N(3)	0.0		AA substitution	609		0.0
+Xle->Arg@L	43.017047	43.028	H(1)N(3)	0.0		AA substitution	609		0.0
+Met->Thr@M	-29.992806	-30.0922	H(-2)C(-1)N(0)O(1)S(-1)	0.0		AA substitution	610		0.0
+Met->Arg@M	25.060626	24.9896	H(3)C(1)N(3)O(0)S(-1)	0.0		AA substitution	611		0.0
+Met->Lys@M	-2.945522	-3.0238	H(3)C(1)N(1)O(0)S(-1)	0.0		AA substitution	613		0.0
+Met->Xle@M	-17.956421	-18.0384	H(2)C(1)S(-1)	0.0		AA substitution	614		0.0
+Met->Val@M	-31.972071	-32.065	H(0)C(0)N(0)O(0)S(-1)	0.0		AA substitution	615		0.0
+Asn->Ser@N	-27.010899	-27.0253	H(-1)C(-1)N(-1)O(0)S(0)	0.0		AA substitution	616		0.0
+Asn->Thr@N	-12.995249	-12.9988	H(1)C(0)N(-1)O(0)S(0)	0.0		AA substitution	617		0.0
+Asn->Lys@N	14.052036	14.0696	H(6)C(2)N(0)O(-1)S(0)	0.0		AA substitution	618		0.0
+Asn->Tyr@N	49.020401	49.0706	H(3)C(5)N(-1)O(0)S(0)	0.0		AA substitution	619		0.0
+Asn->His@N	23.015984	23.0366	H(1)C(2)N(1)O(-1)S(0)	0.0		AA substitution	620		0.0
+Asn->Asp@N	0.984016	0.9848	H(-1)C(0)N(-1)O(1)S(0)	0.0		AA substitution	621		0.0
+Asn->Xle@N	-0.958863	-0.945	H(5)C(2)N(-1)O(-1)	0.0		AA substitution	622		0.0
+Pro->Ser@P	-10.020735	-10.0379	H(-2)C(-2)N(0)O(1)S(0)	0.0		AA substitution	623		0.0
+Pro->Ala@P	-26.01565	-26.0373	H(-2)C(-2)N(0)O(0)S(0)	0.0		AA substitution	624		0.0
+Pro->His@P	40.006148	40.0241	H(0)C(1)N(2)O(0)S(0)	0.0		AA substitution	625		0.0
+Pro->Gln@P	31.005814	31.014	H(1)C(0)N(1)O(1)S(0)	0.0		AA substitution	626		0.0
+Pro->Thr@P	3.994915	3.9887	H(0)C(-1)N(0)O(1)S(0)	0.0		AA substitution	627		0.0
+Pro->Arg@P	59.048347	59.0705	H(5)C(1)N(3)O(0)S(0)	0.0		AA substitution	628		0.0
+Pro->Xle@P	16.0313	16.0425	H(4)C(1)	0.0		AA substitution	629		0.0
+Gln->Pro@Q	-31.005814	-31.014	H(-1)C(0)N(-1)O(-1)S(0)	0.0		AA substitution	630		0.0
+Gln->Lys@Q	0.036386	0.0431	H(4)C(1)N(0)O(-1)S(0)	0.0		AA substitution	631		0.0
+Gln->Glu@Q	0.984016	0.9848	H(-1)C(0)N(-1)O(1)S(0)	0.0		AA substitution	632		0.0
+Gln->His@Q	9.000334	9.0101	H(-1)C(1)N(1)O(-1)S(0)	0.0		AA substitution	633		0.0
+Gln->Arg@Q	28.042534	28.0565	H(4)C(1)N(2)O(-1)S(0)	0.0		AA substitution	634		0.0
+Gln->Xle@Q	-14.974514	-14.9716	H(3)C(1)N(-1)O(-1)	0.0		AA substitution	635		0.0
+Arg->Ser@R	-69.069083	-69.1084	H(-7)C(-3)N(-3)O(1)S(0)	0.0		AA substitution	636		0.0
+Arg->Trp@R	29.978202	30.0242	H(-2)C(5)N(-2)O(0)S(0)	0.0		AA substitution	637		0.0
+Arg->Thr@R	-55.053433	-55.0818	H(-5)C(-2)N(-3)O(1)S(0)	0.0		AA substitution	638		0.0
+Arg->Pro@R	-59.048347	-59.0705	H(-5)C(-1)N(-3)O(0)S(0)	0.0		AA substitution	639		0.0
+Arg->Lys@R	-28.006148	-28.0134	H(0)C(0)N(-2)O(0)S(0)	0.0		AA substitution	640		0.0
+Arg->His@R	-19.042199	-19.0464	H(-5)C(0)N(-1)O(0)S(0)	0.0		AA substitution	641		0.0
+Arg->Gln@R	-28.042534	-28.0565	H(-4)C(-1)N(-2)O(1)S(0)	0.0		AA substitution	642		0.0
+Arg->Met@R	-25.060626	-24.9896	H(-3)C(-1)N(-3)O(0)S(1)	0.0		AA substitution	643		0.0
+Arg->Cys@R	-53.091927	-53.0428	H(-7)C(-3)N(-3)O(0)S(1)	0.0		AA substitution	644		0.0
+Arg->Xle@R	-43.017047	-43.028	H(-1)N(-3)	0.0		AA substitution	645		0.0
+Arg->Gly@R	-99.079647	-99.1344	H(-9)C(-4)N(-3)O(0)S(0)	0.0		AA substitution	646		0.0
+Ser->Phe@S	60.036386	60.0966	H(4)C(6)N(0)O(-1)S(0)	0.0		AA substitution	647		0.0
+Ser->Ala@S	-15.994915	-15.9994	H(0)C(0)N(0)O(-1)S(0)	0.0		AA substitution	648		0.0
+Ser->Trp@S	99.047285	99.1326	H(5)C(8)N(1)O(-1)S(0)	0.0		AA substitution	649		0.0
+Ser->Thr@S	14.01565	14.0266	H(2)C(1)N(0)O(0)S(0)	0.0		AA substitution	650		0.0
+Ser->Asn@S	27.010899	27.0253	H(1)C(1)N(1)O(0)S(0)	0.0		AA substitution	651		0.0
+Ser->Pro@S	10.020735	10.0379	H(2)C(2)N(0)O(-1)S(0)	0.0		AA substitution	652		0.0
+Ser->Tyr@S	76.0313	76.096	H(4)C(6)N(0)O(0)S(0)	0.0		AA substitution	653		0.0
+Ser->Cys@S	15.977156	16.0656	H(0)C(0)N(0)O(-1)S(1)	0.0		AA substitution	654		0.0
+Ser->Arg@S	69.069083	69.1084	H(7)C(3)N(3)O(-1)S(0)	0.0		AA substitution	655		0.0
+Ser->Xle@S	26.052036	26.0803	H(6)C(3)O(-1)	0.0		AA substitution	656		0.0
+Ser->Gly@S	-30.010565	-30.026	H(-2)C(-1)N(0)O(-1)S(0)	0.0		AA substitution	657		0.0
+Thr->Ser@T	-14.01565	-14.0266	H(-2)C(-1)N(0)O(0)S(0)	0.0		AA substitution	658		0.0
+Thr->Ala@T	-30.010565	-30.026	H(-2)C(-1)N(0)O(-1)S(0)	0.0		AA substitution	659		0.0
+Thr->Asn@T	12.995249	12.9988	H(-1)C(0)N(1)O(0)S(0)	0.0		AA substitution	660		0.0
+Thr->Lys@T	27.047285	27.0684	H(5)C(2)N(1)O(-1)S(0)	0.0		AA substitution	661		0.0
+Thr->Pro@T	-3.994915	-3.9887	H(0)C(1)N(0)O(-1)S(0)	0.0		AA substitution	662		0.0
+Thr->Met@T	29.992806	30.0922	H(2)C(1)N(0)O(-1)S(1)	0.0		AA substitution	663		0.0
+Thr->Xle@T	12.036386	12.0538	H(4)C(2)O(-1)	0.0		AA substitution	664		0.0
+Thr->Arg@T	55.053433	55.0818	H(5)C(2)N(3)O(-1)S(0)	0.0		AA substitution	665		0.0
+Val->Phe@V	48.0	48.0428	H(0)C(4)N(0)O(0)S(0)	0.0		AA substitution	666		0.0
+Val->Ala@V	-28.0313	-28.0532	H(-4)C(-2)N(0)O(0)S(0)	0.0		AA substitution	667		0.0
+Val->Glu@V	29.974179	29.9829	H(-2)C(0)N(0)O(2)S(0)	0.0		AA substitution	668		0.0
+Val->Met@V	31.972071	32.065	H(0)C(0)N(0)O(0)S(1)	0.0		AA substitution	669		0.0
+Val->Asp@V	15.958529	15.9563	H(-4)C(-1)N(0)O(2)S(0)	0.0		AA substitution	670		0.0
+Val->Xle@V	14.01565	14.0266	H(2)C(1)	0.0		AA substitution	671		0.0
+Val->Gly@V	-42.04695	-42.0797	H(-6)C(-3)N(0)O(0)S(0)	0.0		AA substitution	672		0.0
+Trp->Ser@W	-99.047285	-99.1326	H(-5)C(-8)N(-1)O(1)S(0)	0.0		AA substitution	673		0.0
+Trp->Cys@W	-83.070128	-83.067	H(-5)C(-8)N(-1)O(0)S(1)	0.0		AA substitution	674		0.0
+Trp->Arg@W	-29.978202	-30.0242	H(2)C(-5)N(2)O(0)S(0)	0.0		AA substitution	675		0.0
+Trp->Gly@W	-129.057849	-129.1586	H(-7)C(-9)N(-1)O(0)S(0)	0.0		AA substitution	676		0.0
+Trp->Xle@W	-72.995249	-73.0523	H(1)C(-5)N(-1)	0.0		AA substitution	677		0.0
+Tyr->Phe@Y	-15.994915	-15.9994	H(0)C(0)N(0)O(-1)S(0)	0.0		AA substitution	678		0.0
+Tyr->Ser@Y	-76.0313	-76.096	H(-4)C(-6)N(0)O(0)S(0)	0.0		AA substitution	679		0.0
+Tyr->Asn@Y	-49.020401	-49.0706	H(-3)C(-5)N(1)O(0)S(0)	0.0		AA substitution	680		0.0
+Tyr->His@Y	-26.004417	-26.034	H(-2)C(-3)N(2)O(-1)S(0)	0.0		AA substitution	681		0.0
+Tyr->Asp@Y	-48.036386	-48.0859	H(-4)C(-5)N(0)O(1)S(0)	0.0		AA substitution	682		0.0
+Tyr->Cys@Y	-60.054144	-60.0304	H(-4)C(-6)N(0)O(-1)S(1)	0.0		AA substitution	683		0.0
+BDMAPP@W	253.010225	254.1231	H(12)C(11)N(1)O(1)Br(1)	0.0		Artefact	684		0.0
+BDMAPP@Y	253.010225	254.1231	H(12)C(11)N(1)O(1)Br(1)	0.0		Artefact	684		0.0
+BDMAPP@Protein_N-term	253.010225	254.1231	H(12)C(11)N(1)O(1)Br(1)	0.0		Chemical derivative	684		0.0
+BDMAPP@K	253.010225	254.1231	H(12)C(11)N(1)O(1)Br(1)	0.0		Chemical derivative	684		0.0
+BDMAPP@H	253.010225	254.1231	H(12)C(11)N(1)O(1)Br(1)	0.0		Artefact	684		0.0
+NA-LNO2@C	325.225309	325.443	H(31)C(18)N(1)O(4)	0.0		Post-translational	685		0.0
+NA-LNO2@H	325.225309	325.443	H(31)C(18)N(1)O(4)	0.0		Post-translational	685		0.0
+NA-OA-NO2@C	327.240959	327.4589	H(33)C(18)N(1)O(4)	0.0		Post-translational	686		0.0
+NA-OA-NO2@H	327.240959	327.4589	H(33)C(18)N(1)O(4)	0.0		Post-translational	686		0.0
+ICPL:2H(4)@Any_N-term	109.046571	109.1188	H(-1)2H(4)C(6)N(1)O(1)	0.0		Isotopic label	687		0.0
+ICPL:2H(4)@Protein_N-term	109.046571	109.1188	H(-1)2H(4)C(6)N(1)O(1)	0.0		Isotopic label	687		0.0
+ICPL:2H(4)@K	109.046571	109.1188	H(-1)2H(4)C(6)N(1)O(1)	0.0		Isotopic label	687		0.0
+CarboxymethylDTT@C	210.00205	210.2712	H(10)C(6)O(4)S(2)	0.0		Artefact	894		0.0
+iTRAQ8plex@Protein_N-term	304.20536	304.3074	H(24)C(7)13C(7)N(3)15N(1)O(3)	0.0		Isotopic label	730		0.0
+iTRAQ8plex@T	304.20536	304.3074	H(24)C(7)13C(7)N(3)15N(1)O(3)	0.0		Isotopic label	730		0.0
+iTRAQ8plex@S	304.20536	304.3074	H(24)C(7)13C(7)N(3)15N(1)O(3)	0.0		Isotopic label	730		0.0
+iTRAQ8plex@H	304.20536	304.3074	H(24)C(7)13C(7)N(3)15N(1)O(3)	0.0		Isotopic label	730		0.0
+iTRAQ8plex@Y	304.20536	304.3074	H(24)C(7)13C(7)N(3)15N(1)O(3)	0.0		Isotopic label	730		0.0
+iTRAQ8plex@Any_N-term	304.20536	304.3074	H(24)C(7)13C(7)N(3)15N(1)O(3)	0.0		Isotopic label	730		0.0
+iTRAQ8plex@K	304.20536	304.3074	H(24)C(7)13C(7)N(3)15N(1)O(3)	0.0		Isotopic label	730		0.0
+iTRAQ8plex@C	304.20536	304.3074	H(24)C(7)13C(7)N(3)15N(1)O(3)	0.0		Isotopic label	730		0.0
+Label:13C(6)15N(1)@I	7.017164	6.9493	C(-6)13C(6)N(-1)15N(1)	0.0		Isotopic label	695		0.0
+Label:13C(6)15N(1)@L	7.017164	6.9493	C(-6)13C(6)N(-1)15N(1)	0.0		Isotopic label	695		0.0
+Label:2H(9)13C(6)15N(2)@K	17.07069	16.9982	H(-9)2H(9)C(-6)13C(6)N(-2)15N(2)	0.0		Isotopic label	696		0.0
+HNE-Delta:H(2)O@K	138.104465	138.2069	H(14)C(9)O(1)	0.0		Chemical derivative	720		0.0
+HNE-Delta:H(2)O@H	138.104465	138.2069	H(14)C(9)O(1)	0.0		Chemical derivative	720		0.0
+HNE-Delta:H(2)O@C	138.104465	138.2069	H(14)C(9)O(1)	0.0		Chemical derivative	720		0.0
+4-ONE@K	154.09938	154.2063	H(14)C(9)O(2)	0.0		Chemical derivative	721		0.0
+4-ONE@H	154.09938	154.2063	H(14)C(9)O(2)	0.0		Chemical derivative	721		0.0
+4-ONE@C	154.09938	154.2063	H(14)C(9)O(2)	0.0		Chemical derivative	721		0.0
+O-Dimethylphosphate@Y	107.997631	108.0331	H(5)C(2)O(3)P(1)	0.0		Chemical derivative	723		0.0
+O-Dimethylphosphate@T	107.997631	108.0331	H(5)C(2)O(3)P(1)	0.0		Chemical derivative	723		0.0
+O-Dimethylphosphate@S	107.997631	108.0331	H(5)C(2)O(3)P(1)	0.0		Chemical derivative	723		0.0
+O-Methylphosphate@Y	93.981981	94.0065	H(3)C(1)O(3)P(1)	0.0		Chemical derivative	724		0.0
+O-Methylphosphate@T	93.981981	94.0065	H(3)C(1)O(3)P(1)	0.0		Chemical derivative	724		0.0
+O-Methylphosphate@S	93.981981	94.0065	H(3)C(1)O(3)P(1)	0.0		Chemical derivative	724		0.0
+Diethylphosphate@Any_N-term	136.028931	136.0862	H(9)C(4)O(3)P(1)	0.0		Chemical derivative	725		0.0
+Diethylphosphate@H	136.028931	136.0862	H(9)C(4)O(3)P(1)	0.0		Chemical derivative	725		0.0
+Diethylphosphate@C	136.028931	136.0862	H(9)C(4)O(3)P(1)	0.0		Chemical derivative	725		0.0
+Diethylphosphate@K	136.028931	136.0862	H(9)C(4)O(3)P(1)	0.0		Chemical derivative	725		0.0
+Diethylphosphate@Y	136.028931	136.0862	H(9)C(4)O(3)P(1)	0.0		Chemical derivative	725		0.0
+Diethylphosphate@T	136.028931	136.0862	H(9)C(4)O(3)P(1)	0.0		Chemical derivative	725		0.0
+Diethylphosphate@S	136.028931	136.0862	H(9)C(4)O(3)P(1)	0.0		Chemical derivative	725		0.0
+Ethylphosphate@Any_N-term	107.997631	108.0331	H(5)C(2)O(3)P(1)	0.0		Chemical derivative	726		0.0
+Ethylphosphate@K	107.997631	108.0331	H(5)C(2)O(3)P(1)	0.0		Chemical derivative	726		0.0
+Ethylphosphate@Y	107.997631	108.0331	H(5)C(2)O(3)P(1)	0.0		Chemical derivative	726		0.0
+Ethylphosphate@T	107.997631	108.0331	H(5)C(2)O(3)P(1)	0.0		Chemical derivative	726		0.0
+Ethylphosphate@S	107.997631	108.0331	H(5)C(2)O(3)P(1)	0.0		Chemical derivative	726		0.0
+O-pinacolylmethylphosphonate@T	162.080967	162.1666	H(15)C(7)O(2)P(1)	0.0		Chemical derivative	727		0.0
+O-pinacolylmethylphosphonate@S	162.080967	162.1666	H(15)C(7)O(2)P(1)	0.0		Chemical derivative	727		0.0
+O-pinacolylmethylphosphonate@K	162.080967	162.1666	H(15)C(7)O(2)P(1)	0.0		Chemical derivative	727		0.0
+O-pinacolylmethylphosphonate@Y	162.080967	162.1666	H(15)C(7)O(2)P(1)	0.0		Chemical derivative	727		0.0
+O-pinacolylmethylphosphonate@H	162.080967	162.1666	H(15)C(7)O(2)P(1)	0.0		Chemical derivative	727		0.0
+Methylphosphonate@Y	77.987066	78.0071	H(3)C(1)O(2)P(1)	0.0		Chemical derivative	728		0.0
+Methylphosphonate@T	77.987066	78.0071	H(3)C(1)O(2)P(1)	0.0		Chemical derivative	728		0.0
+Methylphosphonate@S	77.987066	78.0071	H(3)C(1)O(2)P(1)	0.0		Chemical derivative	728		0.0
+O-Isopropylmethylphosphonate@Y	120.034017	120.0868	H(9)C(4)O(2)P(1)	0.0		Chemical derivative	729		0.0
+O-Isopropylmethylphosphonate@T	120.034017	120.0868	H(9)C(4)O(2)P(1)	0.0		Chemical derivative	729		0.0
+O-Isopropylmethylphosphonate@S	120.034017	120.0868	H(9)C(4)O(2)P(1)	0.0		Chemical derivative	729		0.0
+iTRAQ8plex:13C(6)15N(2)@Y	304.19904	304.3081	H(24)C(8)13C(6)N(2)15N(2)O(3)	0.0		Isotopic label	731		0.0
+iTRAQ8plex:13C(6)15N(2)@Any_N-term	304.19904	304.3081	H(24)C(8)13C(6)N(2)15N(2)O(3)	0.0		Isotopic label	731		0.0
+iTRAQ8plex:13C(6)15N(2)@K	304.19904	304.3081	H(24)C(8)13C(6)N(2)15N(2)O(3)	0.0		Isotopic label	731		0.0
+iTRAQ8plex:13C(6)15N(2)@C	304.19904	304.3081	H(24)C(8)13C(6)N(2)15N(2)O(3)	0.0		Isotopic label	731		0.0
+BEMAD_ST@T	136.001656	136.2357	H(8)C(4)O(1)S(2)	0.0		Chemical derivative	735		0.0
+BEMAD_ST@S	136.001656	136.2357	H(8)C(4)O(1)S(2)	0.0		Chemical derivative	735		0.0
+Ethanolamine@D	43.042199	43.0678	H(5)C(2)N(1)	0.0		Chemical derivative	734		0.0
+Ethanolamine@Any_C-term	43.042199	43.0678	H(5)C(2)N(1)	0.0		Chemical derivative	734		0.0
+Ethanolamine@E	43.042199	43.0678	H(5)C(2)N(1)	0.0		Chemical derivative	734		0.0
+Ethanolamine@C	43.042199	43.0678	H(5)C(2)N(1)	0.0		Chemical derivative	734		0.0
+TMT6plex@T	229.162932	229.2634	H(20)C(8)13C(4)N(1)15N(1)O(2)	0.0		Isotopic label	737		0.0
+TMT6plex@S	229.162932	229.2634	H(20)C(8)13C(4)N(1)15N(1)O(2)	0.0		Isotopic label	737		0.0
+TMT6plex@H	229.162932	229.2634	H(20)C(8)13C(4)N(1)15N(1)O(2)	0.0		Isotopic label	737		0.0
+TMT6plex@Protein_N-term	229.162932	229.2634	H(20)C(8)13C(4)N(1)15N(1)O(2)	0.0		Isotopic label	737		0.0
+TMT6plex@Any_N-term	229.162932	229.2634	H(20)C(8)13C(4)N(1)15N(1)O(2)	0.0		Isotopic label	737		0.0
+TMT6plex@K	229.162932	229.2634	H(20)C(8)13C(4)N(1)15N(1)O(2)	0.0		Isotopic label	737		0.0
+BEMAD_C@C	120.0245	120.1701	H(8)C(4)O(2)S(1)	0.0		Chemical derivative	736		0.0
+TMT2plex@H	225.155833	225.2921	H(20)C(11)13C(1)N(2)O(2)	0.0		Isotopic label	738		0.0
+TMT2plex@S	225.155833	225.2921	H(20)C(11)13C(1)N(2)O(2)	0.0		Isotopic label	738		0.0
+TMT2plex@T	225.155833	225.2921	H(20)C(11)13C(1)N(2)O(2)	0.0		Isotopic label	738		0.0
+TMT2plex@Protein_N-term	225.155833	225.2921	H(20)C(11)13C(1)N(2)O(2)	0.0		Isotopic label	738		0.0
+TMT2plex@Any_N-term	225.155833	225.2921	H(20)C(11)13C(1)N(2)O(2)	0.0		Isotopic label	738		0.0
+TMT2plex@K	225.155833	225.2921	H(20)C(11)13C(1)N(2)O(2)	0.0		Isotopic label	738		0.0
+TMT@Protein_N-term	224.152478	224.2994	H(20)C(12)N(2)O(2)	0.0		Chemical derivative	739		0.0
+TMT@Any_N-term	224.152478	224.2994	H(20)C(12)N(2)O(2)	0.0		Chemical derivative	739		0.0
+TMT@K	224.152478	224.2994	H(20)C(12)N(2)O(2)	0.0		Chemical derivative	739		0.0
+TMT@H	224.152478	224.2994	H(20)C(12)N(2)O(2)	0.0		Isotopic label	739		0.0
+TMT@S	224.152478	224.2994	H(20)C(12)N(2)O(2)	0.0		Isotopic label	739		0.0
+TMT@T	224.152478	224.2994	H(20)C(12)N(2)O(2)	0.0		Isotopic label	739		0.0
+ExacTagThiol@C	972.365219	972.7268	H(50)C(23)13C(12)N(8)15N(6)O(18)	0.0		Isotopic label	740		0.0
+ExacTagAmine@K	1046.347854	1046.8285	H(52)C(25)13C(12)N(8)15N(6)O(19)S(1)	0.0		Isotopic label	741		0.0
+NO_SMX_SEMD@C	251.036462	251.2618	H(9)C(10)N(3)O(3)S(1)	0.0		Chemical derivative	744		0.0
+4-ONE+Delta:H(-2)O(-1)@K	136.088815	136.191	H(12)C(9)O(1)	0.0		Chemical derivative	743		0.0
+4-ONE+Delta:H(-2)O(-1)@H	136.088815	136.191	H(12)C(9)O(1)	0.0		Chemical derivative	743		0.0
+4-ONE+Delta:H(-2)O(-1)@C	136.088815	136.191	H(12)C(9)O(1)	0.0		Chemical derivative	743		0.0
+Biotin:Aha-DADPS@M	922.465403	923.2022	H(70)C(42)N(8)O(11)S(1)Si(1)	0.0		Chemical derivative	2052		0.0
+NO_SMX_SIMD@C	267.031377	267.2612	H(9)C(10)N(3)O(4)S(1)	0.0		Chemical derivative	746		0.0
+Malonyl@C	86.000394	86.0462	H(2)C(3)O(3)	0.0		Chemical derivative	747		0.0
+Malonyl@S	86.000394	86.0462	H(2)C(3)O(3)	0.0		Chemical derivative	747		0.0
+Malonyl@K	86.000394	86.0462	H(2)C(3)O(3)	0.0		Post-translational	747	N([Xe])([Xe])[C@@H](CCCC(NC(=O)CC(=O)O))C(=O)[Rn]	0.0
+3sulfo@Any_N-term	183.983029	184.1693	H(4)C(7)O(4)S(1)	0.0		Chemical derivative	748		0.0
+trifluoro@L	53.971735	53.9714	H(-3)F(3)	0.0		Non-standard residue	750		0.0
+TNBS@Any_N-term	210.986535	211.0886	H(1)C(6)N(3)O(6)	0.0		Chemical derivative	751		0.0
+TNBS@K	210.986535	211.0886	H(1)C(6)N(3)O(6)	0.0		Chemical derivative	751		0.0
+Biotin-phenacyl@C	626.263502	626.727	H(38)C(29)N(8)O(6)S(1)	0.0		Chemical derivative	774		0.0
+Biotin-phenacyl@H	626.263502	626.727	H(38)C(29)N(8)O(6)S(1)	0.0		Chemical derivative	774		0.0
+Biotin-phenacyl@S	626.263502	626.727	H(38)C(29)N(8)O(6)S(1)	0.0		Chemical derivative	774		0.0
+BEMAD_C:2H(6)@C	126.062161	126.2071	H(2)2H(6)C(4)O(2)S(1)	0.0		Isotopic label	764		0.0
+lapachenole@C	240.11503	240.297	H(16)C(16)O(2)	0.0		Chemical derivative	771		0.0
+Label:13C(5)@P	5.016774	4.9633	C(-5)13C(5)	0.0		Isotopic label	772		0.0
+maleimide@K	97.016378	97.0721	H(3)C(4)N(1)O(2)	0.0		Chemical derivative	773		0.0
+maleimide@C	97.016378	97.0721	H(3)C(4)N(1)O(2)	0.0		Chemical derivative	773		0.0
+IDEnT@C	214.990469	216.064	H(7)C(9)N(1)O(1)Cl(2)	0.0		Isotopic label	762		0.0
+BEMAD_ST:2H(6)@T	142.039317	142.2727	H(2)2H(6)C(4)O(1)S(2)	0.0		Isotopic label	763		0.0
+BEMAD_ST:2H(6)@S	142.039317	142.2727	H(2)2H(6)C(4)O(1)S(2)	0.0		Isotopic label	763		0.0
+Met-loss@M^Protein_N-term	-131.040485	-131.1961	H(-9)C(-5)N(-1)O(-1)S(-1)	0.0		Co-translational	765		0.0
+Met-loss+Acetyl@M^Protein_N-term	-89.02992	-89.1594	H(-7)C(-3)N(-1)S(-1)	0.0		Co-translational	766		0.0
+Menadione-HQ@K	172.05243	172.18	H(8)C(11)O(2)	0.0		Chemical derivative	767		0.0
+Menadione-HQ@C	172.05243	172.18	H(8)C(11)O(2)	0.0		Chemical derivative	767		0.0
+Carboxymethyl:13C(2)@C	60.012189	60.0214	H(2)13C(2)O(2)	0.0		Chemical derivative	775		0.0
+NEM:2H(5)@C	130.079062	130.1561	H(2)2H(5)C(6)N(1)O(2)	0.0		Chemical derivative	776		0.0
+Gly-loss+Amide@G^Any_C-term	-58.005479	-58.0361	H(-2)C(-2)O(-2)	0.0		Post-translational	822		0.0
+TMPP-Ac@Any_N-term	572.181134	572.5401	H(33)C(29)O(10)P(1)	0.0		Chemical derivative	827		0.0
+TMPP-Ac@K	572.181134	572.5401	H(33)C(29)O(10)P(1)	0.0		Artefact	827		0.0
+TMPP-Ac@Y	572.181134	572.5401	H(33)C(29)O(10)P(1)	0.0		Artefact	827		0.0
+Label:13C(6)+GG@K	120.063056	120.0586	H(6)C(-2)13C(6)N(2)O(2)	0.0		Isotopic label	799		0.0
+Arg->Npo@R	80.985078	81.0297	H(-1)C(3)N(1)O(2)	0.0		Chemical derivative	837		0.0
+Label:2H(4)+Acetyl@K	46.035672	46.0613	H(-2)2H(4)C(2)O(1)	0.0		Isotopic label	834		0.0
+Pentylamine@Q	70.07825	70.1329	H(10)C(5)	0.0		Chemical derivative	801		0.0
+Biotin:Thermo-21345@Q	311.166748	311.4429	H(25)C(15)N(3)O(2)S(1)	0.0		Chemical derivative	800		0.0
+Dihydroxyimidazolidine@R	72.021129	72.0627	H(4)C(3)O(2)	0.0		Multiple	830		0.0
+Xlink:DFDNB@N	163.985807	164.0752	C(6)N(2)O(4)	0.0		Chemical derivative	825		0.0
+Xlink:DFDNB@Q	163.985807	164.0752	C(6)N(2)O(4)	0.0		Chemical derivative	825		0.0
+Xlink:DFDNB@R	163.985807	164.0752	C(6)N(2)O(4)	0.0		Chemical derivative	825		0.0
+Xlink:DFDNB@K	163.985807	164.0752	C(6)N(2)O(4)	0.0		Chemical derivative	825		0.0
+Cy3b-maleimide@C	682.24612	682.7852	H(38)C(37)N(4)O(7)S(1)	0.0		Chemical derivative	821		0.0
+Hex(1)HexNAc(1)@N	365.132196	365.3331	H(23)C(14)N(1)O(10)	365.132196	H(23)C(14)N(1)O(10)	N-linked glycosylation	793		0.5
+Hex(1)HexNAc(1)@T	365.132196	365.3331	H(23)C(14)N(1)O(10)	365.132196	H(23)C(14)N(1)O(10)	O-linked glycosylation	793		0.5
+Hex(1)HexNAc(1)@S	365.132196	365.3331	H(23)C(14)N(1)O(10)	365.132196	H(23)C(14)N(1)O(10)	O-linked glycosylation	793		0.5
+AEC-MAEC:2H(4)@S	63.044462	63.158	H(1)2H(4)C(2)N(1)O(-1)S(1)	0.0		Isotopic label	792		0.0
+AEC-MAEC:2H(4)@T	63.044462	63.158	H(1)2H(4)C(2)N(1)O(-1)S(1)	0.0		Isotopic label	792		0.0
+Xlink:BMOE@C	220.048407	220.1815	H(8)C(10)N(2)O(4)	0.0		Chemical derivative	824		0.0
+Biotin:Thermo-21360@Anywhere	487.246455	487.6134	H(37)C(21)N(5)O(6)S(1)	0.0		Chemical derivative	811		0.0
+Label:13C(6)+Acetyl@K	48.030694	47.9926	H(2)C(-4)13C(6)O(1)	0.0		Isotopic label	835		0.0
+Label:13C(6)15N(2)+Acetyl@K	50.024764	49.9794	H(2)C(-4)13C(6)N(-2)15N(2)O(1)	0.0		Isotopic label	836		0.0
+EQIGG@K	484.228162	484.5035	H(32)C(20)N(6)O(8)	0.0		Other	846		0.0
+cGMP@S	343.031785	343.1895	H(10)C(10)N(5)O(7)P(1)	0.0		Post-translational	849		0.0
+cGMP@C	343.031785	343.1895	H(10)C(10)N(5)O(7)P(1)	0.0		Post-translational	849		0.0
+cGMP+RMP-loss@C	150.041585	150.1182	H(4)C(5)N(5)O(1)	0.0		Post-translational	851		0.0
+cGMP+RMP-loss@S	150.041585	150.1182	H(4)C(5)N(5)O(1)	0.0		Post-translational	851		0.0
+mTRAQ@Y	140.094963	140.183	H(12)C(7)N(2)O(1)	0.0		Isotopic label	888		0.0
+mTRAQ@Any_N-term	140.094963	140.183	H(12)C(7)N(2)O(1)	0.0		Isotopic label	888	C(=O)CN1CCN(CC1)C	0.0
+mTRAQ@K	140.094963	140.183	H(12)C(7)N(2)O(1)	0.0		Isotopic label	888	[H]N(CCCC[C@H](N([Xe])([Xe]))C(=O)[Rn])C(=O)CN1CCN(C)CC1	0.0
+mTRAQ@H	140.094963	140.183	H(12)C(7)N(2)O(1)	0.0		Isotopic label	888		0.0
+mTRAQ@S	140.094963	140.183	H(12)C(7)N(2)O(1)	0.0		Isotopic label	888		0.0
+mTRAQ@T	140.094963	140.183	H(12)C(7)N(2)O(1)	0.0		Isotopic label	888		0.0
+Arg2PG@R	266.057909	266.2482	H(10)C(16)O(4)	0.0		Chemical derivative	848		0.0
+Label:2H(4)+GG@K	118.068034	118.1273	H(2)2H(4)C(4)N(2)O(2)	0.0		Post-translational	853		0.0
+spermine@Q	185.189198	185.3097	H(23)C(10)N(3)	0.0		Chemical derivative	1420		0.0
+Label:13C(1)2H(3)@M	4.022185	4.0111	H(-3)2H(3)C(-1)13C(1)	0.0		Isotopic label	862		0.0
+ZGB@K	758.380841	758.7261	H(53)C(37)N(6)O(6)F(2)S(1)B(1)	0.0		Other	861		0.0
+ZGB@Any_N-term	758.380841	758.7261	H(53)C(37)N(6)O(6)F(2)S(1)B(1)	0.0		Other	861		0.0
+MG-H1@R	54.010565	54.0474	H(2)C(3)O(1)	0.0		Other	859		0.0
+G-H1@R	39.994915	40.0208	C(2)O(1)	0.0		Other	860		0.0
+Label:13C(6)15N(2)+GG@K	122.057126	122.0454	H(6)C(-2)13C(6)15N(2)O(2)	0.0		Isotopic label	864		0.0
+ICPL:13C(6)2H(4)@Any_N-term	115.0667	115.0747	H(-1)2H(4)13C(6)N(1)O(1)	0.0		Isotopic label	866		0.0
+ICPL:13C(6)2H(4)@K	115.0667	115.0747	H(-1)2H(4)13C(6)N(1)O(1)	0.0		Isotopic label	866		0.0
+ICPL:13C(6)2H(4)@Protein_N-term	115.0667	115.0747	H(-1)2H(4)13C(6)N(1)O(1)	0.0		Isotopic label	866		0.0
+DyLight-maleimide@C	940.1999	941.0762	H(48)C(39)N(4)O(15)S(4)	0.0		Chemical derivative	890		0.0
+mTRAQ:13C(3)15N(1)@S	144.102063	144.1544	H(12)C(4)13C(3)N(1)15N(1)O(1)	0.0		Isotopic label	889		0.0
+mTRAQ:13C(3)15N(1)@T	144.102063	144.1544	H(12)C(4)13C(3)N(1)15N(1)O(1)	0.0		Isotopic label	889		0.0
+mTRAQ:13C(3)15N(1)@H	144.102063	144.1544	H(12)C(4)13C(3)N(1)15N(1)O(1)	0.0		Isotopic label	889		0.0
+mTRAQ:13C(3)15N(1)@Y	144.102063	144.1544	H(12)C(4)13C(3)N(1)15N(1)O(1)	0.0		Isotopic label	889		0.0
+mTRAQ:13C(3)15N(1)@Any_N-term	144.102063	144.1544	H(12)C(4)13C(3)N(1)15N(1)O(1)	0.0		Isotopic label	889	C(=O)[13C]([H])([H])[15N]1[13C]([H])([H])[13C]([H])([H])N(CC1)C	0.0
+mTRAQ:13C(3)15N(1)@K	144.102063	144.1544	H(12)C(4)13C(3)N(1)15N(1)O(1)	0.0		Isotopic label	889	[H]N(CCCC[C@H](N([Xe])([Xe]))C(=O)[Rn])C(=O)[13CH2][15N]1CCN(C)[13CH2][13CH2]1	0.0
+Methyl-PEO12-Maleimide@C	710.383719	710.8073	H(58)C(32)N(2)O(15)	0.0		Chemical derivative	891		0.0
+MDCC@C	383.148121	383.3978	H(21)C(20)N(3)O(5)	0.0		Chemical derivative	887		0.0
+QQQTGG@K	599.266339	599.5942	H(37)C(23)N(9)O(10)	0.0		Other	877		0.0
+QEQTGG@K	600.250354	600.5789	H(36)C(23)N(8)O(11)	0.0		Other	876		0.0
+HydroxymethylOP@K	108.021129	108.0948	H(4)C(6)O(2)	0.0		Other	886		0.0
+Biotin:Thermo-21325@K	695.310118	695.8288	H(45)C(34)N(7)O(7)S(1)	0.0		Chemical derivative	884		0.0
+Label:13C(1)2H(3)+Oxidation@M	20.0171	20.0105	H(-3)2H(3)C(-1)13C(1)O(1)	0.0		Multiple	885		0.0
+shTMTpro@K	313.231019	313.2473	H(25)13C(15)15N(3)O(3)	0.0		Chemical derivative	2050		0.0
+shTMTpro@Protein_N-term	313.231019	313.2473	H(25)13C(15)15N(3)O(3)	0.0		Chemical derivative	2050		0.0
+shTMTpro@Any_N-term	313.231019	313.2473	H(25)13C(15)15N(3)O(3)	0.0		Chemical derivative	2050		0.0
+Biotin-PEG-PRA@M	578.317646	578.6611	H(42)C(26)N(8)O(7)	0.0		Chemical derivative	895		0.0
+Met->Aha@M	-4.986324	-5.0794	H(-3)C(-1)N(3)S(-1)	0.0		Non-standard residue	896		0.0
+Label:15N(4)@R	3.98814	3.9736	N(-4)15N(4)	0.0		Isotopic label	897		0.0
+pyrophospho@T	159.932662	159.9598	H(2)O(6)P(2)	176.935402	H(3)O(7)P(2)	Post-translational	898		0.5
+pyrophospho@S	159.932662	159.9598	H(2)O(6)P(2)	176.935402	H(3)O(7)P(2)	Post-translational	898		0.5
+Met->Hpg@M	-21.987721	-22.0702	H(-2)C(1)S(-1)	0.0		Non-standard residue	899		0.0
+4AcAllylGal@C	372.142033	372.3671	H(24)C(17)O(9)	0.0		Chemical derivative	901		0.0
+DimethylArsino@C	103.960719	103.9827	H(5)C(2)As(1)	0.0		Post-translational	902		0.0
+Lys->CamCys@K	31.935685	32.0219	H(-4)C(-1)O(1)S(1)	0.0		Pre-translational	903		0.0
+Phe->CamCys@F	12.962234	13.0204	H(-1)C(-4)N(1)O(1)S(1)	0.0		Pre-translational	904		0.0
+Leu->MetOx@L	33.951335	34.0378	H(-2)C(-1)O(1)S(1)	0.0		Pre-translational	905		0.0
+Lys->MetOx@K	18.940436	19.0232	H(-3)C(-1)N(-1)O(1)S(1)	0.0		Pre-translational	906		0.0
+Galactosyl@Any_N-term	178.047738	178.14	H(10)C(6)O(6)	0.0		Other glycosylation	907		0.0
+Galactosyl@K	178.047738	178.14	H(10)C(6)O(6)	0.0		Other glycosylation	907		0.0
+Xlink:SMCC[321]@C	321.205242	321.4146	H(27)C(17)N(3)O(3)	0.0		Chemical derivative	908		0.0
+Bacillosamine@N	228.111007	228.245	H(16)C(10)N(2)O(4)	228.111007	H(16)C(10)N(2)O(4)	N-linked glycosylation	910		0.5
+MTSL@C	184.07961	184.2786	H(14)C(9)N(1)O(1)S(1)	0.0		Chemical derivative	911		0.0
+HNE-BAHAH@H	511.319226	511.7209	H(45)C(25)N(5)O(4)S(1)	0.0		Chemical derivative	912		0.0
+HNE-BAHAH@C	511.319226	511.7209	H(45)C(25)N(5)O(4)S(1)	0.0		Chemical derivative	912		0.0
+HNE-BAHAH@K	511.319226	511.7209	H(45)C(25)N(5)O(4)S(1)	0.0		Chemical derivative	912		0.0
+LTX+Lophotoxin@Y	416.147118	416.4212	H(24)C(22)O(8)	0.0		Post-translational	2039		0.0
+Methylmalonylation@S	100.016044	100.0728	H(4)C(4)O(3)	0.0		Chemical derivative	914		0.0
+AROD@C	820.336015	820.979	H(52)C(35)N(10)O(9)S(2)	0.0		Chemical derivative	938		0.0
+Cys->methylaminoAla@C	-2.945522	-3.0238	H(3)C(1)N(1)S(-1)	0.0		Chemical derivative	939		0.0
+Cys->ethylaminoAla@C	11.070128	11.0028	H(5)C(2)N(1)S(-1)	0.0		Chemical derivative	940		0.0
+Label:13C(4)15N(2)+GG@K	120.050417	120.0601	H(6)13C(4)15N(2)O(2)	0.0		Isotopic label	923		0.0
+ethylamino@S	27.047285	27.0684	H(5)C(2)N(1)O(-1)	0.0		Chemical derivative	926		0.0
+ethylamino@T	27.047285	27.0684	H(5)C(2)N(1)O(-1)	0.0		Chemical derivative	926		0.0
+MercaptoEthanol@S	60.003371	60.1182	H(4)C(2)S(1)	0.0		Chemical derivative	928		0.0
+MercaptoEthanol@T	60.003371	60.1182	H(4)C(2)S(1)	0.0		Chemical derivative	928		0.0
+Atto495Maleimide@C	474.250515	474.5747	H(32)C(27)N(5)O(3)	0.0		Chemical derivative	935		0.0
+AMTzHexNAc2@T	502.202341	502.4757	H(30)C(19)N(6)O(10)	0.0		Chemical derivative	934		0.0
+AMTzHexNAc2@S	502.202341	502.4757	H(30)C(19)N(6)O(10)	0.0		Chemical derivative	934		0.0
+AMTzHexNAc2@N	502.202341	502.4757	H(30)C(19)N(6)O(10)	0.0		Chemical derivative	934		0.0
+Ethyl+Deamidated@Q	29.015316	29.0379	H(3)C(2)N(-1)O(1)	0.0		Chemical derivative	931		0.0
+Ethyl+Deamidated@N	29.015316	29.0379	H(3)C(2)N(-1)O(1)	0.0		Chemical derivative	931		0.0
+VFQQQTGG@K	845.403166	845.8991	H(55)C(37)N(11)O(12)	0.0		Other	932		0.0
+VIEVYQEQTGG@K	1203.577168	1204.2859	H(81)C(53)N(13)O(19)	0.0		Other	933		0.0
+Chlorination@W	33.961028	34.4451	H(-1)Cl(1)	0.0		Artefact	936		0.0
+Chlorination@Y	33.961028	34.4451	H(-1)Cl(1)	0.0		Artefact	936		0.0
+dichlorination@C	67.922055	68.8901	H(-2)Cl(2)	0.0		Chemical derivative	937		0.0
+dichlorination@Y	67.922055	68.8901	H(-2)Cl(2)	0.0		Artefact	937		0.0
+DNPS@C	198.981352	199.164	H(3)C(6)N(2)O(4)S(1)	0.0		Chemical derivative	941		0.0
+DNPS@W	198.981352	199.164	H(3)C(6)N(2)O(4)S(1)	0.0		Chemical derivative	941		0.0
+SulfoGMBS@C	458.162391	458.5306	H(26)C(22)N(4)O(5)S(1)	0.0		Other	942		0.0
+DimethylamineGMBS@C	267.158292	267.3241	H(21)C(13)N(3)O(3)	0.0		Chemical derivative	943		0.0
+Label:15N(2)2H(9)@K	11.050561	11.0423	H(-9)2H(9)N(-2)15N(2)	0.0		Isotopic label	944		0.0
+LG-anhydrolactam@Any_N-term	314.188195	314.4186	H(26)C(20)O(3)	0.0		Post-translational	946		0.0
+LG-anhydrolactam@K	314.188195	314.4186	H(26)C(20)O(3)	0.0		Post-translational	946		0.0
+LG-pyrrole@C	316.203845	316.4345	H(28)C(20)O(3)	0.0		Post-translational	947		0.0
+LG-pyrrole@Any_N-term	316.203845	316.4345	H(28)C(20)O(3)	0.0		Post-translational	947		0.0
+LG-pyrrole@K	316.203845	316.4345	H(28)C(20)O(3)	0.0		Post-translational	947		0.0
+LG-anhyropyrrole@Any_N-term	298.19328	298.4192	H(26)C(20)O(2)	0.0		Post-translational	948		0.0
+LG-anhyropyrrole@K	298.19328	298.4192	H(26)C(20)O(2)	0.0		Post-translational	948		0.0
+3-deoxyglucosone@R	144.042259	144.1253	H(8)C(6)O(4)	0.0		Multiple	949		0.0
+Cation:Li@D	6.008178	5.9331	H(-1)Li(1)	0.0		Artefact	950		0.0
+Cation:Li@E	6.008178	5.9331	H(-1)Li(1)	0.0		Artefact	950		0.0
+Cation:Li@Any_C-term	6.008178	5.9331	H(-1)Li(1)	0.0		Artefact	950	O[Li]	0.0
+Cation:Ca[II]@Any_C-term	37.946941	38.0621	H(-2)Ca(1)	0.0		Artefact	951		0.0
+Cation:Ca[II]@E	37.946941	38.0621	H(-2)Ca(1)	0.0		Artefact	951		0.0
+Cation:Ca[II]@D	37.946941	38.0621	H(-2)Ca(1)	0.0		Artefact	951		0.0
+Cation:Fe[II]@D	53.919289	53.8291	H(-2)Fe(1)	0.0		Artefact	952		0.0
+Cation:Fe[II]@E	53.919289	53.8291	H(-2)Fe(1)	0.0		Artefact	952		0.0
+Cation:Fe[II]@Any_C-term	53.919289	53.8291	H(-2)Fe(1)	0.0		Artefact	952		0.0
+Cation:Ni[II]@D	55.919696	56.6775	H(-2)Ni(1)	0.0		Artefact	953		0.0
+Cation:Ni[II]@E	55.919696	56.6775	H(-2)Ni(1)	0.0		Artefact	953		0.0
+Cation:Ni[II]@Any_C-term	55.919696	56.6775	H(-2)Ni(1)	0.0		Artefact	953		0.0
+Cation:Zn[II]@Any_C-term	61.913495	63.3931	H(-2)Zn(1)	0.0		Artefact	954		0.0
+Cation:Zn[II]@E	61.913495	63.3931	H(-2)Zn(1)	0.0		Artefact	954		0.0
+Cation:Zn[II]@D	61.913495	63.3931	H(-2)Zn(1)	0.0		Artefact	954		0.0
+Cation:Zn[II]@H	61.913495	63.3931	H(-2)Zn(1)	0.0		Artefact	954		0.0
+Cation:Ag@D	105.897267	106.8603	H(-1)Ag(1)	0.0		Artefact	955		0.0
+Cation:Ag@E	105.897267	106.8603	H(-1)Ag(1)	0.0		Artefact	955		0.0
+Cation:Ag@Any_C-term	105.897267	106.8603	H(-1)Ag(1)	0.0		Artefact	955		0.0
+Cation:Mg[II]@D	21.969392	22.2891	H(-2)Mg(1)	0.0		Artefact	956		0.0
+Cation:Mg[II]@E	21.969392	22.2891	H(-2)Mg(1)	0.0		Artefact	956		0.0
+Cation:Mg[II]@Any_C-term	21.969392	22.2891	H(-2)Mg(1)	0.0		Artefact	956		0.0
+2-succinyl@C	116.010959	116.0722	H(4)C(4)O(4)	0.0		Chemical derivative	957		0.0
+Propargylamine@D	37.031634	37.0632	H(3)C(3)N(1)O(-1)	0.0		Chemical derivative	958		0.0
+Propargylamine@Any_C-term	37.031634	37.0632	H(3)C(3)N(1)O(-1)	0.0		Chemical derivative	958		0.0
+Propargylamine@E	37.031634	37.0632	H(3)C(3)N(1)O(-1)	0.0		Chemical derivative	958		0.0
+Phosphopropargyl@T	116.997965	117.0431	H(4)C(3)N(1)O(2)P(1)	0.0		Multiple	959		0.0
+Phosphopropargyl@Y	116.997965	117.0431	H(4)C(3)N(1)O(2)P(1)	0.0		Multiple	959		0.0
+Phosphopropargyl@S	116.997965	117.0431	H(4)C(3)N(1)O(2)P(1)	0.0		Multiple	959		0.0
+SUMO2135@K	2135.920496	2137.2343	H(137)C(90)N(21)O(37)S(1)	0.0		Other	960		0.0
+SUMO3549@K	3549.536568	3551.6672	H(224)C(150)N(38)O(60)S(1)	0.0		Other	961		0.0
+serotonylation@Q	159.068414	159.1846	H(9)C(10)N(1)O(1)	0.0		Post-translational	1992		0.0
+BITC@Any_N-term	149.02992	149.2129	H(7)C(8)N(1)S(1)	0.0		Chemical derivative	978		0.0
+BITC@K	149.02992	149.2129	H(7)C(8)N(1)S(1)	0.0		Chemical derivative	978		0.0
+BITC@C	149.02992	149.2129	H(7)C(8)N(1)S(1)	0.0		Chemical derivative	978		0.0
+Carbofuran@S	57.021464	57.0513	H(3)C(2)N(1)O(1)	0.0		Chemical derivative	977		0.0
+PEITC@Any_N-term	163.04557	163.2395	H(9)C(9)N(1)S(1)	0.0		Chemical derivative	979		0.0
+PEITC@K	163.04557	163.2395	H(9)C(9)N(1)S(1)	0.0		Chemical derivative	979		0.0
+PEITC@C	163.04557	163.2395	H(9)C(9)N(1)S(1)	0.0		Chemical derivative	979		0.0
+thioacylPA@K	159.035399	159.2062	H(9)C(6)N(1)O(2)S(1)	0.0		Chemical derivative	967		0.0
+maleimide3@K	969.366232	969.8975	H(59)C(37)N(7)O(23)	0.0		Post-translational	971		0.0
+maleimide3@C	969.366232	969.8975	H(59)C(37)N(7)O(23)	0.0		Post-translational	971		0.0
+maleimide5@K	1293.471879	1294.1787	H(79)C(49)N(7)O(33)	0.0		Post-translational	972		0.0
+maleimide5@C	1293.471879	1294.1787	H(79)C(49)N(7)O(33)	0.0		Post-translational	972		0.0
+Puromycin@Any_C-term	453.212452	453.4943	H(27)C(22)N(7)O(4)	0.0		Co-translational	973		0.0
+glucosone@R	160.037173	160.1247	H(8)C(6)O(5)	0.0		Other	981		0.0
+Label:13C(6)+Dimethyl@K	34.051429	34.0091	H(4)C(-4)13C(6)	0.0		Isotopic label	986		0.0
+cysTMT@C	299.166748	299.4322	H(25)C(14)N(3)O(2)S(1)	0.0		Chemical derivative	984		0.0
+cysTMT6plex@C	304.177202	304.3962	H(25)C(10)13C(4)N(2)15N(1)O(2)S(1)	0.0		Isotopic label	985		0.0
+ISD_z+2_ion@Any_N-term	-15.010899	-15.0146	H(-1)N(-1)	0.0		Artefact	991		0.0
+Ammonium@E	17.026549	17.0305	H(3)N(1)	0.0		Artefact	989		0.0
+Ammonium@D	17.026549	17.0305	H(3)N(1)	0.0		Artefact	989		0.0
+Ammonium@Any_C-term	17.026549	17.0305	H(3)N(1)	0.0		Artefact	989		0.0
+Biotin:Sigma-B1267@C	449.17329	449.5239	H(27)C(20)N(5)O(5)S(1)	0.0		Chemical derivative	993		0.0
+Label:15N(1)@M	0.997035	0.9934	N(-1)15N(1)	0.0		Isotopic label	994		0.0
+Label:15N(1)@E	0.997035	0.9934	N(-1)15N(1)	0.0		Isotopic label	994		0.0
+Label:15N(1)@D	0.997035	0.9934	N(-1)15N(1)	0.0		Isotopic label	994		0.0
+Label:15N(1)@L	0.997035	0.9934	N(-1)15N(1)	0.0		Isotopic label	994		0.0
+Label:15N(1)@I	0.997035	0.9934	N(-1)15N(1)	0.0		Isotopic label	994		0.0
+Label:15N(1)@C	0.997035	0.9934	N(-1)15N(1)	0.0		Isotopic label	994		0.0
+Label:15N(1)@T	0.997035	0.9934	N(-1)15N(1)	0.0		Isotopic label	994		0.0
+Label:15N(1)@V	0.997035	0.9934	N(-1)15N(1)	0.0		Isotopic label	994		0.0
+Label:15N(1)@P	0.997035	0.9934	N(-1)15N(1)	0.0		Isotopic label	994		0.0
+Label:15N(1)@S	0.997035	0.9934	N(-1)15N(1)	0.0		Isotopic label	994		0.0
+Label:15N(1)@A	0.997035	0.9934	N(-1)15N(1)	0.0		Isotopic label	994		0.0
+Label:15N(1)@G	0.997035	0.9934	N(-1)15N(1)	0.0		Isotopic label	994		0.0
+Label:15N(1)@Y	0.997035	0.9934	N(-1)15N(1)	0.0		Isotopic label	994		0.0
+Label:15N(1)@F	0.997035	0.9934	N(-1)15N(1)	0.0		Isotopic label	994		0.0
+Label:15N(2)@W	1.99407	1.9868	N(-2)15N(2)	0.0		Isotopic label	995		0.0
+Label:15N(2)@K	1.99407	1.9868	N(-2)15N(2)	0.0		Isotopic label	995		0.0
+Label:15N(2)@Q	1.99407	1.9868	N(-2)15N(2)	0.0		Isotopic label	995		0.0
+Label:15N(2)@N	1.99407	1.9868	N(-2)15N(2)	0.0		Isotopic label	995		0.0
+Label:15N(3)@H	2.991105	2.9802	N(-3)15N(3)	0.0		Isotopic label	996		0.0
+sulfo+amino@Y	94.967714	95.0778	H(1)N(1)O(3)S(1)	0.0		Chemical derivative	997		0.0
+AHA-Alkyne@M	107.077339	107.0504	H(5)C(4)N(5)O(1)S(-1)	0.0		Chemical derivative	1000		0.0
+AHA-Alkyne-KDDDD@M	695.280074	695.5723	H(37)C(26)N(11)O(14)S(-1)	0.0		Chemical derivative	1001		0.0
+EGCG1@C	456.069261	456.3558	H(16)C(22)O(11)	0.0		Post-translational	1002		0.0
+EGCG2@C	287.055563	287.2442	H(11)C(15)O(6)	0.0		Post-translational	1003		0.0
+Label:13C(6)15N(4)+Methyl@R	24.023919	23.9561	H(2)C(-5)13C(6)N(-4)15N(4)	0.0		Isotopic label	1004		0.0
+Label:13C(6)15N(4)+Dimethyl@R	38.039569	37.9827	H(4)C(-4)13C(6)N(-4)15N(4)	0.0		Isotopic label	1005		0.0
+Label:13C(6)15N(4)+Methyl:2H(3)13C(1)@R	28.046104	27.9673	H(-1)2H(3)C(-6)13C(7)N(-4)15N(4)	0.0		Isotopic label	1006		0.0
+Label:13C(6)15N(4)+Dimethyl:2H(6)13C(2)@R	46.083939	46.005	H(-2)2H(6)C(-6)13C(8)N(-4)15N(4)	0.0		Isotopic label	1007		0.0
+Cys->CamSec@C	104.965913	103.9463	H(3)C(2)N(1)O(1)S(-1)Se(1)	0.0		Non-standard residue	1008		0.0
+Thiazolidine@W	12.0	12.0107	C(1)	0.0		Chemical derivative	1009		0.0
+Thiazolidine@Y	12.0	12.0107	C(1)	0.0		Chemical derivative	1009		0.0
+Thiazolidine@H	12.0	12.0107	C(1)	0.0		Chemical derivative	1009		0.0
+Thiazolidine@R	12.0	12.0107	C(1)	0.0		Chemical derivative	1009		0.0
+Thiazolidine@K	12.0	12.0107	C(1)	0.0		Chemical derivative	1009		0.0
+Thiazolidine@Protein_N-term	12.0	12.0107	C(1)	0.0		Chemical derivative	1009		0.0
+Thiazolidine@C	12.0	12.0107	C(1)	0.0		Chemical derivative	1009		0.0
+Thiazolidine@F	12.0	12.0107	C(1)	0.0		Chemical derivative	1009		0.0
+DEDGFLYMVYASQETFG@K	1970.824411	1972.088	H(122)C(89)N(18)O(31)S(1)	18.010565	H(2)O(1)	Post-translational	1010		0.5
+Biotin:Invitrogen-M1602@C	523.210069	523.6024	H(33)C(23)N(5)O(7)S(1)	0.0		Chemical derivative	1012		0.0
+Xlink:DSS[156]@K	156.078644	156.1791	H(12)C(8)O(3)	0.0		Chemical derivative	1020		0.0
+Xlink:DSS[156]@Protein_N-term	156.078644	156.1791	H(12)C(8)O(3)	0.0		Chemical derivative	1020		0.0
+DMPO@H	111.068414	111.1418	H(9)C(6)N(1)O(1)	0.0		Post-translational	1017		0.0
+DMPO@Y	111.068414	111.1418	H(9)C(6)N(1)O(1)	0.0		Post-translational	1017		0.0
+DMPO@C	111.068414	111.1418	H(9)C(6)N(1)O(1)	0.0		Post-translational	1017		0.0
+glycidamide@K	87.032028	87.0773	H(5)C(3)N(1)O(2)	0.0		Chemical derivative	1014		0.0
+glycidamide@Any_N-term	87.032028	87.0773	H(5)C(3)N(1)O(2)	0.0		Chemical derivative	1014		0.0
+Ahx2+Hsl@Any_C-term	309.205242	309.4039	H(27)C(16)N(3)O(3)	0.0		Non-standard residue	1015		0.0
+ICDID@C	138.06808	138.1638	H(10)C(8)O(2)	0.0		Isotopic label	1018		0.0
+ICDID:2H(6)@C	144.10574	144.2008	H(4)2H(6)C(8)O(2)	0.0		Isotopic label	1019		0.0
+Xlink:EGS[244]@Protein_N-term	244.058303	244.1981	H(12)C(10)O(7)	0.0		Chemical derivative	1021		0.0
+Xlink:EGS[244]@K	244.058303	244.1981	H(12)C(10)O(7)	0.0		Chemical derivative	1021		0.0
+Xlink:DST[132]@Protein_N-term	132.005873	132.0716	H(4)C(4)O(5)	0.0		Chemical derivative	1022		0.0
+Xlink:DST[132]@K	132.005873	132.0716	H(4)C(4)O(5)	0.0		Chemical derivative	1022		0.0
+Xlink:DTSSP[192]@Protein_N-term	191.991486	192.2559	H(8)C(6)O(3)S(2)	0.0		Chemical derivative	1023		0.0
+Xlink:DTSSP[192]@K	191.991486	192.2559	H(8)C(6)O(3)S(2)	0.0		Chemical derivative	1023		0.0
+Xlink:SMCC[237]@C	237.100108	237.2518	H(15)C(12)N(1)O(4)	0.0		Chemical derivative	1024		0.0
+Xlink:SMCC[237]@K	237.100108	237.2518	H(15)C(12)N(1)O(4)	0.0		Chemical derivative	1024		0.0
+Xlink:SMCC[237]@Protein_N-term	237.100108	237.2518	H(15)C(12)N(1)O(4)	0.0		Chemical derivative	1024		0.0
+2-nitrobenzyl@Y	135.032028	135.1201	H(5)C(7)N(1)O(2)	0.0		Chemical derivative	1032		0.0
+Xlink:DMP[140]@Protein_N-term	140.094963	140.183	H(12)C(7)N(2)O(1)	0.0		Chemical derivative	1027		0.0
+Xlink:DMP[140]@K	140.094963	140.183	H(12)C(7)N(2)O(1)	0.0		Chemical derivative	1027		0.0
+Xlink:EGS[115]@Protein_N-term	115.026943	115.0874	H(5)C(4)N(1)O(3)	0.0		Chemical derivative	1028		0.0
+Xlink:EGS[115]@K	115.026943	115.0874	H(5)C(4)N(1)O(3)	0.0		Chemical derivative	1028		0.0
+Cys->SecNEM@C	172.992127	172.0203	H(7)C(6)N(1)O(2)S(-1)Se(1)	0.0		Non-standard residue	1033		0.0
+Cys->SecNEM:2H(5)@C	178.023511	177.0511	H(2)2H(5)C(6)N(1)O(2)S(-1)Se(1)	0.0		Chemical derivative	1034		0.0
+Thiadiazole@C	174.025169	174.2223	H(6)C(9)N(2)S(1)	0.0		Chemical derivative	1035		0.0
+Biotin:Thermo-88310@K	196.121178	196.2462	H(16)C(10)N(2)O(2)	0.0		Chemical derivative	1031		0.0
+TAMRA-FP@Y	659.312423	659.7514	H(46)C(37)N(3)O(6)P(1)	0.0		Chemical derivative	1038		0.0
+TAMRA-FP@S	659.312423	659.7514	H(46)C(37)N(3)O(6)P(1)	0.0		Chemical derivative	1038		0.0
+Biotin:Thermo-21901+H2O@C	543.236284	543.6336	H(37)C(23)N(5)O(8)S(1)	0.0		Chemical derivative	1039		0.0
+Deoxyhypusine@Q	71.073499	71.121	H(9)C(4)N(1)	0.0		Chemical derivative	1041		0.0
+Deoxyhypusine@K	71.073499	71.121	H(9)C(4)N(1)	0.0		Post-translational	1041		0.0
+Acetyldeoxyhypusine@K	113.084064	113.1576	H(11)C(6)N(1)O(1)	0.0		Post-translational	1042		0.0
+Acetylhypusine@K	129.078979	129.157	H(11)C(6)N(1)O(2)	0.0		Post-translational	1043		0.0
+Ala->Cys@A	31.972071	32.065	H(0)C(0)N(0)O(0)S(1)	0.0		AA substitution	1044		0.0
+Ala->Phe@A	76.0313	76.096	H(4)C(6)N(0)O(0)S(0)	0.0		AA substitution	1045		0.0
+Ala->His@A	66.021798	66.0614	H(2)C(3)N(2)O(0)S(0)	0.0		AA substitution	1046		0.0
+Ala->Xle@A	42.04695	42.0797	H(6)C(3)	0.0		AA substitution	1047		0.0
+Ala->Lys@A	57.057849	57.0944	H(7)C(3)N(1)O(0)S(0)	0.0		AA substitution	1048		0.0
+Ala->Met@A	60.003371	60.1182	H(4)C(2)N(0)O(0)S(1)	0.0		AA substitution	1049		0.0
+Ala->Asn@A	43.005814	43.0247	H(1)C(1)N(1)O(1)S(0)	0.0		AA substitution	1050		0.0
+Ala->Gln@A	57.021464	57.0513	H(3)C(2)N(1)O(1)S(0)	0.0		AA substitution	1051		0.0
+Ala->Arg@A	85.063997	85.1078	H(7)C(3)N(3)O(0)S(0)	0.0		AA substitution	1052		0.0
+Ala->Trp@A	115.042199	115.132	H(5)C(8)N(1)O(0)S(0)	0.0		AA substitution	1053		0.0
+Ala->Tyr@A	92.026215	92.0954	H(4)C(6)N(0)O(1)S(0)	0.0		AA substitution	1054		0.0
+Cys->Ala@C	-31.972071	-32.065	H(0)C(0)N(0)O(0)S(-1)	0.0		AA substitution	1055		0.0
+Cys->Asp@C	12.017759	11.9445	H(0)C(1)N(0)O(2)S(-1)	0.0		AA substitution	1056		0.0
+Cys->Glu@C	26.033409	25.9711	H(2)C(2)N(0)O(2)S(-1)	0.0		AA substitution	1057		0.0
+Cys->His@C	34.049727	33.9964	H(2)C(3)N(2)O(0)S(-1)	0.0		AA substitution	1058		0.0
+Cys->Xle@C	10.07488	10.0147	H(6)C(3)S(-1)	0.0		AA substitution	1059		0.0
+Cys->Lys@C	25.085779	25.0294	H(7)C(3)N(1)O(0)S(-1)	0.0		AA substitution	1060		0.0
+Cys->Met@C	28.0313	28.0532	H(4)C(2)N(0)O(0)S(0)	0.0		AA substitution	1061		0.0
+Cys->Asn@C	11.033743	10.9597	H(1)C(1)N(1)O(1)S(-1)	0.0		AA substitution	1062		0.0
+Cys->Pro@C	-5.956421	-6.0277	H(2)C(2)N(0)O(0)S(-1)	0.0		AA substitution	1063		0.0
+Cys->Gln@C	25.049393	24.9863	H(3)C(2)N(1)O(1)S(-1)	0.0		AA substitution	1064		0.0
+Cys->Thr@C	-1.961506	-2.039	H(2)C(1)N(0)O(1)S(-1)	0.0		AA substitution	1065		0.0
+Cys->Val@C	-3.940771	-4.0118	H(4)C(2)N(0)O(0)S(-1)	0.0		AA substitution	1066		0.0
+Asp->Cys@D	-12.017759	-11.9445	H(0)C(-1)N(0)O(-2)S(1)	0.0		AA substitution	1067		0.0
+Asp->Phe@D	32.041471	32.0865	H(4)C(5)N(0)O(-2)S(0)	0.0		AA substitution	1068		0.0
+Asp->Xle@D	-1.942879	-1.9298	H(6)C(2)O(-2)	0.0		AA substitution	1069		0.0
+Asp->Lys@D	13.06802	13.0849	H(7)C(2)N(1)O(-2)S(0)	0.0		AA substitution	1070		0.0
+Asp->Met@D	16.013542	16.1087	H(4)C(1)N(0)O(-2)S(1)	0.0		AA substitution	1071		0.0
+Asp->Pro@D	-17.974179	-17.9722	H(2)C(1)N(0)O(-2)S(0)	0.0		AA substitution	1072		0.0
+Asp->Gln@D	13.031634	13.0418	H(3)C(1)N(1)O(-1)S(0)	0.0		AA substitution	1073		0.0
+Asp->Arg@D	41.074168	41.0983	H(7)C(2)N(3)O(-2)S(0)	0.0		AA substitution	1074		0.0
+Asp->Ser@D	-27.994915	-28.0101	H(0)C(-1)N(0)O(-1)S(0)	0.0		AA substitution	1075		0.0
+Asp->Thr@D	-13.979265	-13.9835	H(2)C(0)N(0)O(-1)S(0)	0.0		AA substitution	1076		0.0
+Asp->Trp@D	71.05237	71.1225	H(5)C(7)N(1)O(-2)S(0)	0.0		AA substitution	1077		0.0
+Glu->Cys@E	-26.033409	-25.9711	H(-2)C(-2)N(0)O(-2)S(1)	0.0		AA substitution	1078		0.0
+Glu->Phe@E	18.025821	18.0599	H(2)C(4)N(0)O(-2)S(0)	0.0		AA substitution	1079		0.0
+Glu->His@E	8.016319	8.0253	H(0)C(1)N(2)O(-2)S(0)	0.0		AA substitution	1080		0.0
+Glu->Xle@E	-15.958529	-15.9563	H(4)C(1)O(-2)	0.0		AA substitution	1081		0.0
+Glu->Met@E	1.997892	2.0821	H(2)C(0)N(0)O(-2)S(1)	0.0		AA substitution	1082		0.0
+Glu->Asn@E	-14.999666	-15.0113	H(-1)C(-1)N(1)O(-1)S(0)	0.0		AA substitution	1083		0.0
+Glu->Pro@E	-31.989829	-31.9988	H(0)C(0)N(0)O(-2)S(0)	0.0		AA substitution	1084		0.0
+Glu->Arg@E	27.058518	27.0717	H(5)C(1)N(3)O(-2)S(0)	0.0		AA substitution	1085		0.0
+Glu->Ser@E	-42.010565	-42.0367	H(-2)C(-2)N(0)O(-1)S(0)	0.0		AA substitution	1086		0.0
+Glu->Thr@E	-27.994915	-28.0101	H(0)C(-1)N(0)O(-1)S(0)	0.0		AA substitution	1087		0.0
+Glu->Trp@E	57.03672	57.0959	H(3)C(6)N(1)O(-2)S(0)	0.0		AA substitution	1088		0.0
+Glu->Tyr@E	34.020735	34.0593	H(2)C(4)N(0)O(-1)S(0)	0.0		AA substitution	1089		0.0
+Phe->Ala@F	-76.0313	-76.096	H(-4)C(-6)N(0)O(0)S(0)	0.0		AA substitution	1090		0.0
+Phe->Asp@F	-32.041471	-32.0865	H(-4)C(-5)N(0)O(2)S(0)	0.0		AA substitution	1091		0.0
+Phe->Glu@F	-18.025821	-18.0599	H(-2)C(-4)N(0)O(2)S(0)	0.0		AA substitution	1092		0.0
+Phe->Gly@F	-90.04695	-90.1225	H(-6)C(-7)N(0)O(0)S(0)	0.0		AA substitution	1093		0.0
+Phe->His@F	-10.009502	-10.0346	H(-2)C(-3)N(2)O(0)S(0)	0.0		AA substitution	1094		0.0
+Phe->Lys@F	-18.973451	-19.0016	H(3)C(-3)N(1)O(0)S(0)	0.0		AA substitution	1095		0.0
+Phe->Met@F	-16.027929	-15.9778	H(0)C(-4)N(0)O(0)S(1)	0.0		AA substitution	1096		0.0
+Phe->Asn@F	-33.025486	-33.0712	H(-3)C(-5)N(1)O(1)S(0)	0.0		AA substitution	1097		0.0
+Phe->Pro@F	-50.01565	-50.0587	H(-2)C(-4)N(0)O(0)S(0)	0.0		AA substitution	1098		0.0
+Phe->Gln@F	-19.009836	-19.0446	H(-1)C(-4)N(1)O(1)S(0)	0.0		AA substitution	1099		0.0
+Phe->Arg@F	9.032697	9.0118	H(3)C(-3)N(3)O(0)S(0)	0.0		AA substitution	1100		0.0
+Phe->Thr@F	-46.020735	-46.07	H(-2)C(-5)N(0)O(1)S(0)	0.0		AA substitution	1101		0.0
+Phe->Trp@F	39.010899	39.036	H(1)C(2)N(1)O(0)S(0)	0.0		AA substitution	1102		0.0
+Gly->Phe@G	90.04695	90.1225	H(6)C(7)N(0)O(0)S(0)	0.0		AA substitution	1103		0.0
+Gly->His@G	80.037448	80.088	H(4)C(4)N(2)O(0)S(0)	0.0		AA substitution	1104		0.0
+Gly->Xle@G	56.0626	56.1063	H(8)C(4)	0.0		AA substitution	1105		0.0
+Gly->Lys@G	71.073499	71.121	H(9)C(4)N(1)O(0)S(0)	0.0		AA substitution	1106		0.0
+Gly->Met@G	74.019021	74.1447	H(6)C(3)N(0)O(0)S(1)	0.0		AA substitution	1107		0.0
+Gly->Asn@G	57.021464	57.0513	H(3)C(2)N(1)O(1)S(0)	0.0		AA substitution	1108		0.0
+Gly->Pro@G	40.0313	40.0639	H(4)C(3)N(0)O(0)S(0)	0.0		AA substitution	1109		0.0
+Gly->Gln@G	71.037114	71.0779	H(5)C(3)N(1)O(1)S(0)	0.0		AA substitution	1110		0.0
+Gly->Thr@G	44.026215	44.0526	H(4)C(2)N(0)O(1)S(0)	0.0		AA substitution	1111		0.0
+Gly->Tyr@G	106.041865	106.1219	H(6)C(7)N(0)O(1)S(0)	0.0		AA substitution	1112		0.0
+His->Ala@H	-66.021798	-66.0614	H(-2)C(-3)N(-2)O(0)S(0)	0.0		AA substitution	1113		0.0
+His->Cys@H	-34.049727	-33.9964	H(-2)C(-3)N(-2)O(0)S(1)	0.0		AA substitution	1114		0.0
+His->Glu@H	-8.016319	-8.0253	H(0)C(-1)N(-2)O(2)S(0)	0.0		AA substitution	1115		0.0
+His->Phe@H	10.009502	10.0346	H(2)C(3)N(-2)O(0)S(0)	0.0		AA substitution	1116		0.0
+His->Gly@H	-80.037448	-80.088	H(-4)C(-4)N(-2)O(0)S(0)	0.0		AA substitution	1117		0.0
+His->Lys@H	-8.963949	-8.967	H(5)C(0)N(-1)O(0)S(0)	0.0		AA substitution	1119		0.0
+His->Met@H	-6.018427	-5.9432	H(2)C(-1)N(-2)O(0)S(1)	0.0		AA substitution	1120		0.0
+His->Ser@H	-50.026883	-50.062	H(-2)C(-3)N(-2)O(1)S(0)	0.0		AA substitution	1121		0.0
+His->Thr@H	-36.011233	-36.0354	H(0)C(-2)N(-2)O(1)S(0)	0.0		AA substitution	1122		0.0
+His->Val@H	-37.990498	-38.0082	H(2)C(-1)N(-2)O(0)S(0)	0.0		AA substitution	1123		0.0
+His->Trp@H	49.020401	49.0706	H(3)C(5)N(-1)O(0)S(0)	0.0		AA substitution	1124		0.0
+Xle->Cys@L	-10.07488	-10.0147	H(-6)C(-3)N(0)O(0)S(1)	0.0		AA substitution	1126		0.0
+Xle->Cys@I	-10.07488	-10.0147	H(-6)C(-3)N(0)O(0)S(1)	0.0		AA substitution	1126		0.0
+Xle->Asp@L	1.942879	1.9298	H(-6)C(-2)N(0)O(2)S(0)	0.0		AA substitution	1127		0.0
+Xle->Asp@I	1.942879	1.9298	H(-6)C(-2)N(0)O(2)S(0)	0.0		AA substitution	1127		0.0
+Xle->Glu@L	15.958529	15.9563	H(-4)C(-1)N(0)O(2)S(0)	0.0		AA substitution	1128		0.0
+Xle->Glu@I	15.958529	15.9563	H(-4)C(-1)N(0)O(2)S(0)	0.0		AA substitution	1128		0.0
+Xle->Gly@L	-56.0626	-56.1063	H(-8)C(-4)N(0)O(0)S(0)	0.0		AA substitution	1129		0.0
+Xle->Gly@I	-56.0626	-56.1063	H(-8)C(-4)N(0)O(0)S(0)	0.0		AA substitution	1129		0.0
+Xle->Tyr@L	49.979265	50.0156	H(-2)C(3)N(0)O(1)S(0)	0.0		AA substitution	1130		0.0
+Xle->Tyr@I	49.979265	50.0156	H(-2)C(3)N(0)O(1)S(0)	0.0		AA substitution	1130		0.0
+Lys->Ala@K	-57.057849	-57.0944	H(-7)C(-3)N(-1)O(0)S(0)	0.0		AA substitution	1131		0.0
+Lys->Cys@K	-25.085779	-25.0294	H(-7)C(-3)N(-1)O(0)S(1)	0.0		AA substitution	1132		0.0
+Lys->Asp@K	-13.06802	-13.0849	H(-7)C(-2)N(-1)O(2)S(0)	0.0		AA substitution	1133		0.0
+Lys->Phe@K	18.973451	19.0016	H(-3)C(3)N(-1)O(0)S(0)	0.0		AA substitution	1134		0.0
+Lys->Gly@K	-71.073499	-71.121	H(-9)C(-4)N(-1)O(0)S(0)	0.0		AA substitution	1135		0.0
+Lys->His@K	8.963949	8.967	H(-5)C(0)N(1)O(0)S(0)	0.0		AA substitution	1136		0.0
+Lys->Pro@K	-31.042199	-31.0571	H(-5)C(-1)N(-1)O(0)S(0)	0.0		AA substitution	1137		0.0
+Lys->Ser@K	-41.062935	-41.095	H(-7)C(-3)N(-1)O(1)S(0)	0.0		AA substitution	1138		0.0
+Lys->Val@K	-29.026549	-29.0412	H(-3)C(-1)N(-1)O(0)S(0)	0.0		AA substitution	1139		0.0
+Lys->Trp@K	57.98435	58.0376	H(-2)C(5)N(0)O(0)S(0)	0.0		AA substitution	1140		0.0
+Lys->Tyr@K	34.968366	35.001	H(-3)C(3)N(-1)O(1)S(0)	0.0		AA substitution	1141		0.0
+Met->Ala@M	-60.003371	-60.1182	H(-4)C(-2)N(0)O(0)S(-1)	0.0		AA substitution	1142		0.0
+Met->Cys@M	-28.0313	-28.0532	H(-4)C(-2)N(0)O(0)S(0)	0.0		AA substitution	1143		0.0
+Met->Asp@M	-16.013542	-16.1087	H(-4)C(-1)N(0)O(2)S(-1)	0.0		AA substitution	1144		0.0
+Met->Glu@M	-1.997892	-2.0821	H(-2)C(0)N(0)O(2)S(-1)	0.0		AA substitution	1145		0.0
+Met->Phe@M	16.027929	15.9778	H(0)C(4)N(0)O(0)S(-1)	0.0		AA substitution	1146		0.0
+Met->Gly@M	-74.019021	-74.1447	H(-6)C(-3)N(0)O(0)S(-1)	0.0		AA substitution	1147		0.0
+Met->His@M	6.018427	5.9432	H(-2)C(1)N(2)O(0)S(-1)	0.0		AA substitution	1148		0.0
+Met->Asn@M	-16.997557	-17.0934	H(-3)C(-1)N(1)O(1)S(-1)	0.0		AA substitution	1149		0.0
+Met->Pro@M	-33.987721	-34.0809	H(-2)C(0)N(0)O(0)S(-1)	0.0		AA substitution	1150		0.0
+Met->Gln@M	-2.981907	-3.0668	H(-1)C(0)N(1)O(1)S(-1)	0.0		AA substitution	1151		0.0
+Met->Ser@M	-44.008456	-44.1188	H(-4)C(-2)N(0)O(1)S(-1)	0.0		AA substitution	1152		0.0
+Met->Trp@M	55.038828	55.0138	H(1)C(6)N(1)O(0)S(-1)	0.0		AA substitution	1153		0.0
+Met->Tyr@M	32.022844	31.9772	H(0)C(4)N(0)O(1)S(-1)	0.0		AA substitution	1154		0.0
+Asn->Ala@N	-43.005814	-43.0247	H(-1)C(-1)N(-1)O(-1)S(0)	0.0		AA substitution	1155		0.0
+Asn->Cys@N	-11.033743	-10.9597	H(-1)C(-1)N(-1)O(-1)S(1)	0.0		AA substitution	1156		0.0
+Asn->Glu@N	14.999666	15.0113	H(1)C(1)N(-1)O(1)S(0)	0.0		AA substitution	1157		0.0
+Asn->Phe@N	33.025486	33.0712	H(3)C(5)N(-1)O(-1)S(0)	0.0		AA substitution	1158		0.0
+Asn->Gly@N	-57.021464	-57.0513	H(-3)C(-2)N(-1)O(-1)S(0)	0.0		AA substitution	1159		0.0
+Asn->Met@N	16.997557	17.0934	H(3)C(1)N(-1)O(-1)S(1)	0.0		AA substitution	1160		0.0
+Asn->Pro@N	-16.990164	-16.9875	H(1)C(1)N(-1)O(-1)S(0)	0.0		AA substitution	1161		0.0
+Asn->Gln@N	14.01565	14.0266	H(2)C(1)N(0)O(0)S(0)	0.0		AA substitution	1162		0.0
+Asn->Arg@N	42.058184	42.083	H(6)C(2)N(2)O(-1)S(0)	0.0		AA substitution	1163		0.0
+Asn->Val@N	-14.974514	-14.9716	H(3)C(1)N(-1)O(-1)S(0)	0.0		AA substitution	1164		0.0
+Asn->Trp@N	72.036386	72.1073	H(4)C(7)N(0)O(-1)S(0)	0.0		AA substitution	1165		0.0
+Pro->Cys@P	5.956421	6.0277	H(-2)C(-2)N(0)O(0)S(1)	0.0		AA substitution	1166		0.0
+Pro->Asp@P	17.974179	17.9722	H(-2)C(-1)N(0)O(2)S(0)	0.0		AA substitution	1167		0.0
+Pro->Glu@P	31.989829	31.9988	H(0)C(0)N(0)O(2)S(0)	0.0		AA substitution	1168		0.0
+Pro->Phe@P	50.01565	50.0587	H(2)C(4)N(0)O(0)S(0)	0.0		AA substitution	1169		0.0
+Pro->Gly@P	-40.0313	-40.0639	H(-4)C(-3)N(0)O(0)S(0)	0.0		AA substitution	1170		0.0
+Pro->Lys@P	31.042199	31.0571	H(5)C(1)N(1)O(0)S(0)	0.0		AA substitution	1171		0.0
+Pro->Met@P	33.987721	34.0809	H(2)C(0)N(0)O(0)S(1)	0.0		AA substitution	1172		0.0
+Pro->Asn@P	16.990164	16.9875	H(-1)C(-1)N(1)O(1)S(0)	0.0		AA substitution	1173		0.0
+Pro->Val@P	2.01565	2.0159	H(2)C(0)N(0)O(0)S(0)	0.0		AA substitution	1174		0.0
+Pro->Trp@P	89.026549	89.0947	H(3)C(6)N(1)O(0)S(0)	0.0		AA substitution	1175		0.0
+Pro->Tyr@P	66.010565	66.0581	H(2)C(4)N(0)O(1)S(0)	0.0		AA substitution	1176		0.0
+Gln->Ala@Q	-57.021464	-57.0513	H(-3)C(-2)N(-1)O(-1)S(0)	0.0		AA substitution	1177		0.0
+Gln->Cys@Q	-25.049393	-24.9863	H(-3)C(-2)N(-1)O(-1)S(1)	0.0		AA substitution	1178		0.0
+Gln->Asp@Q	-13.031634	-13.0418	H(-3)C(-1)N(-1)O(1)S(0)	0.0		AA substitution	1179		0.0
+Gln->Phe@Q	19.009836	19.0446	H(1)C(4)N(-1)O(-1)S(0)	0.0		AA substitution	1180		0.0
+Gln->Gly@Q	-71.037114	-71.0779	H(-5)C(-3)N(-1)O(-1)S(0)	0.0		AA substitution	1181		0.0
+Gln->Met@Q	2.981907	3.0668	H(1)C(0)N(-1)O(-1)S(1)	0.0		AA substitution	1182		0.0
+Gln->Asn@Q	-14.01565	-14.0266	H(-2)C(-1)N(0)O(0)S(0)	0.0		AA substitution	1183		0.0
+Gln->Ser@Q	-41.026549	-41.0519	H(-3)C(-2)N(-1)O(0)S(0)	0.0		AA substitution	1184		0.0
+Gln->Thr@Q	-27.010899	-27.0253	H(-1)C(-1)N(-1)O(0)S(0)	0.0		AA substitution	1185		0.0
+Gln->Val@Q	-28.990164	-28.9982	H(1)C(0)N(-1)O(-1)S(0)	0.0		AA substitution	1186		0.0
+Gln->Trp@Q	58.020735	58.0807	H(2)C(6)N(0)O(-1)S(0)	0.0		AA substitution	1187		0.0
+Gln->Tyr@Q	35.004751	35.044	H(1)C(4)N(-1)O(0)S(0)	0.0		AA substitution	1188		0.0
+Arg->Ala@R	-85.063997	-85.1078	H(-7)C(-3)N(-3)O(0)S(0)	0.0		AA substitution	1189		0.0
+Arg->Asp@R	-41.074168	-41.0983	H(-7)C(-2)N(-3)O(2)S(0)	0.0		AA substitution	1190		0.0
+Arg->Glu@R	-27.058518	-27.0717	H(-5)C(-1)N(-3)O(2)S(0)	0.0		AA substitution	1191		0.0
+Arg->Asn@R	-42.058184	-42.083	H(-6)C(-2)N(-2)O(1)S(0)	0.0		AA substitution	1192		0.0
+Arg->Val@R	-57.032697	-57.0546	H(-3)C(-1)N(-3)O(0)S(0)	0.0		AA substitution	1193		0.0
+Arg->Tyr@R	6.962218	6.9876	H(-3)C(3)N(-3)O(1)S(0)	0.0		AA substitution	1194		0.0
+Arg->Phe@R	-9.032697	-9.0118	H(-3)C(3)N(-3)	0.0		AA substitution	1195		0.0
+Ser->Asp@S	27.994915	28.0101	H(0)C(1)N(0)O(1)S(0)	0.0		AA substitution	1196		0.0
+Ser->Glu@S	42.010565	42.0367	H(2)C(2)N(0)O(1)S(0)	0.0		AA substitution	1197		0.0
+Ser->His@S	50.026883	50.062	H(2)C(3)N(2)O(-1)S(0)	0.0		AA substitution	1198		0.0
+Ser->Lys@S	41.062935	41.095	H(7)C(3)N(1)O(-1)S(0)	0.0		AA substitution	1199		0.0
+Ser->Met@S	44.008456	44.1188	H(4)C(2)N(0)O(-1)S(1)	0.0		AA substitution	1200		0.0
+Ser->Gln@S	41.026549	41.0519	H(3)C(2)N(1)O(0)S(0)	0.0		AA substitution	1201		0.0
+Ser->Val@S	12.036386	12.0538	H(4)C(2)N(0)O(-1)S(0)	0.0		AA substitution	1202		0.0
+Thr->Cys@T	1.961506	2.039	H(-2)C(-1)N(0)O(-1)S(1)	0.0		AA substitution	1203		0.0
+Thr->Asp@T	13.979265	13.9835	H(-2)C(0)N(0)O(1)S(0)	0.0		AA substitution	1204		0.0
+Thr->Glu@T	27.994915	28.0101	H(0)C(1)N(0)O(1)S(0)	0.0		AA substitution	1205		0.0
+Thr->Phe@T	46.020735	46.07	H(2)C(5)N(0)O(-1)S(0)	0.0		AA substitution	1206		0.0
+Thr->Gly@T	-44.026215	-44.0526	H(-4)C(-2)N(0)O(-1)S(0)	0.0		AA substitution	1207		0.0
+Thr->His@T	36.011233	36.0354	H(0)C(2)N(2)O(-1)S(0)	0.0		AA substitution	1208		0.0
+Thr->Gln@T	27.010899	27.0253	H(1)C(1)N(1)O(0)S(0)	0.0		AA substitution	1209		0.0
+Thr->Val@T	-1.979265	-1.9728	H(2)C(1)N(0)O(-1)S(0)	0.0		AA substitution	1210		0.0
+Thr->Trp@T	85.031634	85.106	H(3)C(7)N(1)O(-1)S(0)	0.0		AA substitution	1211		0.0
+Thr->Tyr@T	62.01565	62.0694	H(2)C(5)N(0)O(0)S(0)	0.0		AA substitution	1212		0.0
+Val->Cys@V	3.940771	4.0118	H(-4)C(-2)N(0)O(0)S(1)	0.0		AA substitution	1213		0.0
+Val->His@V	37.990498	38.0082	H(-2)C(1)N(2)O(0)S(0)	0.0		AA substitution	1214		0.0
+Val->Lys@V	29.026549	29.0412	H(3)C(1)N(1)O(0)S(0)	0.0		AA substitution	1215		0.0
+Val->Asn@V	14.974514	14.9716	H(-3)C(-1)N(1)O(1)S(0)	0.0		AA substitution	1216		0.0
+Val->Pro@V	-2.01565	-2.0159	H(-2)C(0)N(0)O(0)S(0)	0.0		AA substitution	1217		0.0
+Val->Gln@V	28.990164	28.9982	H(-1)C(0)N(1)O(1)S(0)	0.0		AA substitution	1218		0.0
+Val->Arg@V	57.032697	57.0546	H(3)C(1)N(3)O(0)S(0)	0.0		AA substitution	1219		0.0
+Val->Ser@V	-12.036386	-12.0538	H(-4)C(-2)N(0)O(1)S(0)	0.0		AA substitution	1220		0.0
+Val->Thr@V	1.979265	1.9728	H(-2)C(-1)N(0)O(1)S(0)	0.0		AA substitution	1221		0.0
+Val->Trp@V	87.010899	87.0788	H(1)C(6)N(1)O(0)S(0)	0.0		AA substitution	1222		0.0
+Val->Tyr@V	63.994915	64.0422	H(0)C(4)N(0)O(1)S(0)	0.0		AA substitution	1223		0.0
+Trp->Ala@W	-115.042199	-115.132	H(-5)C(-8)N(-1)O(0)S(0)	0.0		AA substitution	1224		0.0
+Trp->Asp@W	-71.05237	-71.1225	H(-5)C(-7)N(-1)O(2)S(0)	0.0		AA substitution	1225		0.0
+Trp->Glu@W	-57.03672	-57.0959	H(-3)C(-6)N(-1)O(2)S(0)	0.0		AA substitution	1226		0.0
+Trp->Phe@W	-39.010899	-39.036	H(-1)C(-2)N(-1)O(0)S(0)	0.0		AA substitution	1227		0.0
+Trp->His@W	-49.020401	-49.0706	H(-3)C(-5)N(1)O(0)S(0)	0.0		AA substitution	1228		0.0
+Trp->Lys@W	-57.98435	-58.0376	H(2)C(-5)N(0)O(0)S(0)	0.0		AA substitution	1229		0.0
+Trp->Met@W	-55.038828	-55.0138	H(-1)C(-6)N(-1)O(0)S(1)	0.0		AA substitution	1230		0.0
+Trp->Asn@W	-72.036386	-72.1073	H(-4)C(-7)N(0)O(1)S(0)	0.0		AA substitution	1231		0.0
+Trp->Pro@W	-89.026549	-89.0947	H(-3)C(-6)N(-1)O(0)S(0)	0.0		AA substitution	1232		0.0
+Trp->Gln@W	-58.020735	-58.0807	H(-2)C(-6)N(0)O(1)S(0)	0.0		AA substitution	1233		0.0
+Trp->Thr@W	-85.031634	-85.106	H(-3)C(-7)N(-1)O(1)S(0)	0.0		AA substitution	1234		0.0
+Trp->Val@W	-87.010899	-87.0788	H(-1)C(-6)N(-1)O(0)S(0)	0.0		AA substitution	1235		0.0
+Trp->Tyr@W	-23.015984	-23.0366	H(-1)C(-2)N(-1)O(1)S(0)	0.0		AA substitution	1236		0.0
+Tyr->Ala@Y	-92.026215	-92.0954	H(-4)C(-6)N(0)O(-1)S(0)	0.0		AA substitution	1237		0.0
+Tyr->Glu@Y	-34.020735	-34.0593	H(-2)C(-4)N(0)O(1)S(0)	0.0		AA substitution	1238		0.0
+Tyr->Gly@Y	-106.041865	-106.1219	H(-6)C(-7)N(0)O(-1)S(0)	0.0		AA substitution	1239		0.0
+Tyr->Lys@Y	-34.968366	-35.001	H(3)C(-3)N(1)O(-1)S(0)	0.0		AA substitution	1240		0.0
+Tyr->Met@Y	-32.022844	-31.9772	H(0)C(-4)N(0)O(-1)S(1)	0.0		AA substitution	1241		0.0
+Tyr->Pro@Y	-66.010565	-66.0581	H(-2)C(-4)N(0)O(-1)S(0)	0.0		AA substitution	1242		0.0
+Tyr->Gln@Y	-35.004751	-35.044	H(-1)C(-4)N(1)O(0)S(0)	0.0		AA substitution	1243		0.0
+Tyr->Arg@Y	-6.962218	-6.9876	H(3)C(-3)N(3)O(-1)S(0)	0.0		AA substitution	1244		0.0
+Tyr->Thr@Y	-62.01565	-62.0694	H(-2)C(-5)N(0)O(0)S(0)	0.0		AA substitution	1245		0.0
+Tyr->Val@Y	-63.994915	-64.0422	H(0)C(-4)N(0)O(-1)S(0)	0.0		AA substitution	1246		0.0
+Tyr->Trp@Y	23.015984	23.0366	H(1)C(2)N(1)O(-1)S(0)	0.0		AA substitution	1247		0.0
+Tyr->Xle@Y	-49.979265	-50.0156	H(2)C(-3)O(-1)	0.0		AA substitution	1248		0.0
+AHA-SS@M	195.075625	195.1787	H(9)C(7)N(5)O(2)	0.0		Multiple	1249		0.0
+AHA-SS_CAM@M	252.097088	252.23	H(12)C(9)N(6)O(3)	0.0		Multiple	1250		0.0
+Biotin:Thermo-33033@Anywhere	548.223945	548.7211	H(36)C(25)N(6)O(4)S(2)	0.0		Chemical derivative	1251		0.0
+Biotin:Thermo-33033-H@Anywhere	546.208295	546.7053	H(34)C(25)N(6)O(4)S(2)	0.0		Chemical derivative	1252		0.0
+2-monomethylsuccinyl@C	130.026609	130.0987	H(6)C(5)O(4)	0.0		Chemical derivative	1253		0.0
+Saligenin@H	106.041865	106.1219	H(6)C(7)O(1)	0.0		Chemical derivative	1254		0.0
+Saligenin@K	106.041865	106.1219	H(6)C(7)O(1)	0.0		Chemical derivative	1254		0.0
+Cresylphosphate@R	170.013281	170.1024	H(7)C(7)O(3)P(1)	0.0		Chemical derivative	1255		0.0
+Cresylphosphate@S	170.013281	170.1024	H(7)C(7)O(3)P(1)	0.0		Chemical derivative	1255		0.0
+Cresylphosphate@T	170.013281	170.1024	H(7)C(7)O(3)P(1)	0.0		Chemical derivative	1255		0.0
+Cresylphosphate@Y	170.013281	170.1024	H(7)C(7)O(3)P(1)	0.0		Chemical derivative	1255		0.0
+Cresylphosphate@K	170.013281	170.1024	H(7)C(7)O(3)P(1)	0.0		Chemical derivative	1255		0.0
+Cresylphosphate@H	170.013281	170.1024	H(7)C(7)O(3)P(1)	0.0		Chemical derivative	1255		0.0
+CresylSaligeninPhosphate@R	276.055146	276.2244	H(13)C(14)O(4)P(1)	0.0		Chemical derivative	1256		0.0
+CresylSaligeninPhosphate@S	276.055146	276.2244	H(13)C(14)O(4)P(1)	0.0		Chemical derivative	1256		0.0
+CresylSaligeninPhosphate@T	276.055146	276.2244	H(13)C(14)O(4)P(1)	0.0		Chemical derivative	1256		0.0
+CresylSaligeninPhosphate@Y	276.055146	276.2244	H(13)C(14)O(4)P(1)	0.0		Chemical derivative	1256		0.0
+CresylSaligeninPhosphate@K	276.055146	276.2244	H(13)C(14)O(4)P(1)	0.0		Chemical derivative	1256		0.0
+CresylSaligeninPhosphate@H	276.055146	276.2244	H(13)C(14)O(4)P(1)	0.0		Chemical derivative	1256		0.0
+Ub-Br2@C	100.063663	100.1191	H(8)C(4)N(2)O(1)	0.0		Chemical derivative	1257		0.0
+Ub-VME@C	172.084792	172.1818	H(12)C(7)N(2)O(3)	0.0		Chemical derivative	1258		0.0
+Ub-fluorescein@C	597.209772	597.598	H(29)C(31)N(6)O(7)	0.0		Chemical derivative	1261		0.0
+2-dimethylsuccinyl@C	144.042259	144.1253	H(8)C(6)O(4)	0.0		Chemical derivative	1262		0.0
+Gly@T	57.021464	57.0513	H(3)C(2)N(1)O(1)	0.0		Chemical derivative	1263		0.0
+Gly@S	57.021464	57.0513	H(3)C(2)N(1)O(1)	0.0		Chemical derivative	1263		0.0
+Gly@K	57.021464	57.0513	H(3)C(2)N(1)O(1)	0.0		Chemical derivative	1263		0.0
+pupylation@K	243.085521	243.2166	H(13)C(9)N(3)O(5)	0.0		Post-translational	1264		0.0
+Label:13C(4)@M	4.013419	3.9706	C(-4)13C(4)	0.0		Isotopic label	1266		0.0
+HCysteinyl@C	133.019749	133.1689	H(7)C(4)N(1)O(2)S(1)	0.0		Post-translational	1271		0.0
+Label:13C(4)+Oxidation@M	20.008334	19.97	C(-4)13C(4)O(1)	0.0		Isotopic label	1267		0.0
+UgiJoullie@E	1106.48935	1107.1274	H(60)C(47)N(23)O(10)	0.0		Chemical derivative	1276		0.0
+UgiJoullie@D	1106.48935	1107.1274	H(60)C(47)N(23)O(10)	0.0		Chemical derivative	1276		0.0
+HCysThiolactone@K	117.024835	117.1695	H(7)C(4)N(1)O(1)S(1)	0.0		Post-translational	1270		0.0
+UgiJoullieProGly@D	154.074228	154.1665	H(10)C(7)N(2)O(2)	0.0		Chemical derivative	1282		0.0
+UgiJoullieProGly@E	154.074228	154.1665	H(10)C(7)N(2)O(2)	0.0		Chemical derivative	1282		0.0
+Dipyridyl@C	225.090212	225.2459	H(11)C(13)N(3)O(1)	0.0		Chemical derivative	1277		0.0
+Furan@Y	66.010565	66.0581	H(2)C(4)O(1)	0.0		Chemical derivative	1278		0.0
+Difuran@Y	132.021129	132.1162	H(4)C(8)O(2)	0.0		Chemical derivative	1279		0.0
+BMP-piperidinol@C	263.131014	263.3337	H(17)C(18)N(1)O(1)	0.0		Chemical derivative	1281		0.0
+BMP-piperidinol@M	263.131014	263.3337	H(17)C(18)N(1)O(1)	0.0		Chemical derivative	1281		0.0
+UgiJoullieProGlyProGly@D	308.148455	308.333	H(20)C(14)N(4)O(4)	0.0		Chemical derivative	1283		0.0
+UgiJoullieProGlyProGly@E	308.148455	308.333	H(20)C(14)N(4)O(4)	0.0		Chemical derivative	1283		0.0
+Arg-loss@R^Any_C-term	-156.101111	-156.1857	H(-12)C(-6)N(-4)O(-1)	0.0		Other	1287		0.0
+Arg@Any_N-term	156.101111	156.1857	H(12)C(6)N(4)O(1)	0.0		Other	1288		0.0
+IMEHex(2)NeuAc(1)@K	688.199683	688.6527	H(40)C(25)N(2)O(18)S(1)	0.0		Other glycosylation	1286		0.0
+Butyryl@K	70.041865	70.0898	H(6)C(4)O(1)	0.0		Post-translational	1289	CCCC(=O)NCCCCC(C(=O)[Rn])N([Xe])([Xe])	0.0
+Dicarbamidomethyl@K	114.042927	114.1026	H(6)C(4)N(2)O(2)	0.0		Artefact	1290		0.0
+Dicarbamidomethyl@H	114.042927	114.1026	H(6)C(4)N(2)O(2)	0.0		Artefact	1290		0.0
+Dicarbamidomethyl@C	114.042927	114.1026	H(6)C(4)N(2)O(2)	0.0		Artefact	1290		0.0
+Dicarbamidomethyl@R	114.042927	114.1026	H(6)C(4)N(2)O(2)	0.0		Artefact	1290		0.0
+Dicarbamidomethyl@Any_N-term	114.042927	114.1026	H(6)C(4)N(2)O(2)	0.0		Artefact	1290		0.0
+Dimethyl:2H(6)@K	34.068961	34.0901	H(-2)2H(6)C(2)	0.0		Isotopic label	1291		0.0
+Dimethyl:2H(6)@Any_N-term	34.068961	34.0901	H(-2)2H(6)C(2)	0.0		Isotopic label	1291		0.0
+Dimethyl:2H(6)@R	34.068961	34.0901	H(-2)2H(6)C(2)	0.0		Isotopic label	1291		0.0
+GGQ@K	242.101505	242.2319	H(14)C(9)N(4)O(4)	0.0		Other	1292		0.0
+QTGG@K	343.149184	343.3357	H(21)C(13)N(5)O(6)	0.0		Other	1293		0.0
+Label:13C(3)15N(1)@A	4.007099	3.9714	C(-3)13C(3)N(-1)15N(1)	0.0		Isotopic label	1297		0.0
+Label:13C(3)15N(1)@S	4.007099	3.9714	C(-3)13C(3)N(-1)15N(1)	0.0		Isotopic label	1297		0.0
+Label:13C(3)@A	3.010064	2.978	C(-3)13C(3)	0.0		Isotopic label	1296		0.0
+Label:13C(4)15N(1)@D	5.010454	4.964	C(-4)13C(4)N(-1)15N(1)	0.0		Isotopic label	1298		0.0
+Label:2H(10)@L	10.062767	10.0616	H(-10)2H(10)	0.0		Isotopic label	1299		0.0
+Label:2H(4)13C(1)@R	5.028462	5.0173	H(-4)2H(4)C(-1)13C(1)	0.0		Isotopic label	1300		0.0
+Lys@Any_N-term	128.094963	128.1723	H(12)C(6)N(2)O(1)	0.0		Other	1301		0.0
+mTRAQ:13C(6)15N(2)@K	148.109162	148.1257	H(12)C(1)13C(6)15N(2)O(1)	0.0		Isotopic label	1302	[H]N(CCCC[C@H](N([Xe])([Xe]))C(=O)[Rn])C(=O)[13CH2][15N]1[13CH2][13CH2][15N]([13CH3])[13CH2][13CH2]1	0.0
+mTRAQ:13C(6)15N(2)@Any_N-term	148.109162	148.1257	H(12)C(1)13C(6)15N(2)O(1)	0.0		Isotopic label	1302	C(=O)[13C]([H])([H])[15N]1[13C]([H])([H])[13C]([H])([H])[15N]([13C]([H])([H])[13C]1([H])([H]))[13C]([H])([H])([H])	0.0
+mTRAQ:13C(6)15N(2)@Y	148.109162	148.1257	H(12)C(1)13C(6)15N(2)O(1)	0.0		Isotopic label	1302		0.0
+mTRAQ:13C(6)15N(2)@H	148.109162	148.1257	H(12)C(1)13C(6)15N(2)O(1)	0.0		Isotopic label	1302		0.0
+mTRAQ:13C(6)15N(2)@S	148.109162	148.1257	H(12)C(1)13C(6)15N(2)O(1)	0.0		Isotopic label	1302		0.0
+mTRAQ:13C(6)15N(2)@T	148.109162	148.1257	H(12)C(1)13C(6)15N(2)O(1)	0.0		Isotopic label	1302		0.0
+NeuAc@T	291.095417	291.2546	H(17)C(11)N(1)O(8)	291.095417	H(17)C(11)N(1)O(8)	O-linked glycosylation	1303		0.5
+NeuAc@S	291.095417	291.2546	H(17)C(11)N(1)O(8)	291.095417	H(17)C(11)N(1)O(8)	O-linked glycosylation	1303		0.5
+NeuAc@N	291.095417	291.2546	H(17)C(11)N(1)O(8)	291.095417	H(17)C(11)N(1)O(8)	N-linked glycosylation	1303		0.5
+NeuGc@T	307.090331	307.254	H(17)C(11)N(1)O(9)	307.090331	H(17)C(11)N(1)O(9)	O-linked glycosylation	1304		0.5
+NeuGc@S	307.090331	307.254	H(17)C(11)N(1)O(9)	307.090331	H(17)C(11)N(1)O(9)	O-linked glycosylation	1304		0.5
+NeuGc@N	307.090331	307.254	H(17)C(11)N(1)O(9)	307.090331	H(17)C(11)N(1)O(9)	N-linked glycosylation	1304		0.5
+Propyl@D	42.04695	42.0797	H(6)C(3)	0.0		Chemical derivative	1305		0.0
+Propyl@K	42.04695	42.0797	H(6)C(3)	0.0		Isotopic label	1305		0.0
+Propyl@Any_N-term	42.04695	42.0797	H(6)C(3)	0.0		Isotopic label	1305		0.0
+Propyl@E	42.04695	42.0797	H(6)C(3)	0.0		Chemical derivative	1305		0.0
+Propyl@Any_C-term	42.04695	42.0797	H(6)C(3)	0.0		Chemical derivative	1305	OCCC	0.0
+Propyl@Protein_C-term	42.04695	42.0797	H(6)C(3)	0.0		Chemical derivative	1305	OCCC	0.0
+Propyl:2H(6)@Any_N-term	48.084611	48.1167	2H(6)C(3)	0.0		Isotopic label	1306		0.0
+Propyl:2H(6)@K	48.084611	48.1167	2H(6)C(3)	0.0		Isotopic label	1306		0.0
+Propiophenone@C	132.057515	132.1592	H(8)C(9)O(1)	0.0		Chemical derivative	1310		0.0
+Propiophenone@W	132.057515	132.1592	H(8)C(9)O(1)	0.0		Chemical derivative	1310		0.0
+Propiophenone@T	132.057515	132.1592	H(8)C(9)O(1)	0.0		Chemical derivative	1310		0.0
+Propiophenone@S	132.057515	132.1592	H(8)C(9)O(1)	0.0		Chemical derivative	1310		0.0
+Propiophenone@R	132.057515	132.1592	H(8)C(9)O(1)	0.0		Chemical derivative	1310		0.0
+Propiophenone@K	132.057515	132.1592	H(8)C(9)O(1)	0.0		Chemical derivative	1310		0.0
+Propiophenone@H	132.057515	132.1592	H(8)C(9)O(1)	0.0		Chemical derivative	1310		0.0
+PS_Hapten@H	120.021129	120.1055	H(4)C(7)O(2)	0.0		Chemical derivative	1345		0.0
+PS_Hapten@C	120.021129	120.1055	H(4)C(7)O(2)	0.0		Chemical derivative	1345		0.0
+PS_Hapten@K	120.021129	120.1055	H(4)C(7)O(2)	0.0		Chemical derivative	1345		0.0
+Cy3-maleimide@C	753.262796	753.9046	H(45)C(37)N(4)O(9)S(2)	0.0		Chemical derivative	1348		0.0
+Delta:H(6)C(3)O(1)@Protein_N-term	58.041865	58.0791	H(6)C(3)O(1)	0.0		Chemical derivative	1312		0.0
+Delta:H(6)C(3)O(1)@K	58.041865	58.0791	H(6)C(3)O(1)	0.0		Chemical derivative	1312		0.0
+Delta:H(6)C(3)O(1)@H	58.041865	58.0791	H(6)C(3)O(1)	0.0		Chemical derivative	1312		0.0
+Delta:H(6)C(3)O(1)@C	58.041865	58.0791	H(6)C(3)O(1)	0.0		Chemical derivative	1312		0.0
+Delta:H(8)C(6)O(1)@Protein_N-term	96.057515	96.1271	H(8)C(6)O(1)	0.0		Chemical derivative	1313		0.0
+Delta:H(8)C(6)O(1)@K	96.057515	96.1271	H(8)C(6)O(1)	0.0		Chemical derivative	1313		0.0
+biotinAcrolein298@H	298.146347	298.4044	H(22)C(13)N(4)O(2)S(1)	0.0		Chemical derivative	1314		0.0
+biotinAcrolein298@K	298.146347	298.4044	H(22)C(13)N(4)O(2)S(1)	0.0		Chemical derivative	1314		0.0
+biotinAcrolein298@Protein_N-term	298.146347	298.4044	H(22)C(13)N(4)O(2)S(1)	0.0		Chemical derivative	1314		0.0
+biotinAcrolein298@C	298.146347	298.4044	H(22)C(13)N(4)O(2)S(1)	0.0		Chemical derivative	1314		0.0
+MM-diphenylpentanone@C	265.146664	265.3496	H(19)C(18)N(1)O(1)	0.0		Chemical derivative	1315		0.0
+EHD-diphenylpentanone@M	266.13068	266.3343	H(18)C(18)O(2)	0.0		Chemical derivative	1317		0.0
+EHD-diphenylpentanone@C	266.13068	266.3343	H(18)C(18)O(2)	0.0		Chemical derivative	1317		0.0
+benzylguanidine@K	132.068748	132.1625	H(8)C(8)N(2)	0.0		Chemical derivative	1349		0.0
+CarboxymethylDMAP@Any_N-term	162.079313	162.1885	H(10)C(9)N(2)O(1)	0.0		Chemical derivative	1350		0.0
+Biotin:Thermo-21901+2H2O@C	561.246849	561.6489	H(39)C(23)N(5)O(9)S(1)	0.0		Chemical derivative	1320		0.0
+DiLeu4plex115@K	145.12	145.1966	H(15)C(7)13C(1)15N(1)18O(1)	0.0		Isotopic label	1321		0.0
+DiLeu4plex115@Any_N-term	145.12	145.1966	H(15)C(7)13C(1)15N(1)18O(1)	0.0		Isotopic label	1321		0.0
+DiLeu4plex115@Y	145.12	145.1966	H(15)C(7)13C(1)15N(1)18O(1)	0.0		Isotopic label	1321		0.0
+DiLeu4plex@Any_N-term	145.132163	145.2229	H(13)2H(2)C(8)N(1)18O(1)	0.0		Isotopic label	1322		0.0
+DiLeu4plex@K	145.132163	145.2229	H(13)2H(2)C(8)N(1)18O(1)	0.0		Isotopic label	1322		0.0
+DiLeu4plex@Y	145.132163	145.2229	H(13)2H(2)C(8)N(1)18O(1)	0.0		Isotopic label	1322		0.0
+DiLeu4plex117@K	145.128307	145.2092	H(13)2H(2)C(7)13C(1)15N(1)O(1)	0.0		Isotopic label	1323		0.0
+DiLeu4plex117@Any_N-term	145.128307	145.2092	H(13)2H(2)C(7)13C(1)15N(1)O(1)	0.0		Isotopic label	1323		0.0
+DiLeu4plex117@Y	145.128307	145.2092	H(13)2H(2)C(7)13C(1)15N(1)O(1)	0.0		Isotopic label	1323		0.0
+DiLeu4plex118@K	145.140471	145.2354	H(11)2H(4)C(8)N(1)O(1)	0.0		Isotopic label	1324		0.0
+DiLeu4plex118@Any_N-term	145.140471	145.2354	H(11)2H(4)C(8)N(1)O(1)	0.0		Isotopic label	1324		0.0
+DiLeu4plex118@Y	145.140471	145.2354	H(11)2H(4)C(8)N(1)O(1)	0.0		Isotopic label	1324		0.0
+Xlink:BuUrBu[213]@Protein_N-term	213.111341	213.2337	H(15)C(9)N(3)O(3)	0.0		Chemical derivative	1887		0.0
+Xlink:BuUrBu[213]@S	213.111341	213.2337	H(15)C(9)N(3)O(3)	0.0		Chemical derivative	1887		0.0
+Xlink:BuUrBu[213]@K	213.111341	213.2337	H(15)C(9)N(3)O(3)	0.0		Chemical derivative	1887		0.0
+Xlink:BuUrBu[213]@T	213.111341	213.2337	H(15)C(9)N(3)O(3)	0.0		Chemical derivative	1887		0.0
+Xlink:BuUrBu[213]@Y	213.111341	213.2337	H(15)C(9)N(3)O(3)	0.0		Chemical derivative	1887		0.0
+bisANS-sulfonates@T	434.178299	434.5305	H(22)C(32)N(2)	0.0		Chemical derivative	1330		0.0
+bisANS-sulfonates@S	434.178299	434.5305	H(22)C(32)N(2)	0.0		Chemical derivative	1330		0.0
+bisANS-sulfonates@K	434.178299	434.5305	H(22)C(32)N(2)	0.0		Chemical derivative	1330		0.0
+DNCB_hapten@Y	166.001457	166.0911	H(2)C(6)N(2)O(4)	0.0		Chemical derivative	1331		0.0
+DNCB_hapten@H	166.001457	166.0911	H(2)C(6)N(2)O(4)	0.0		Chemical derivative	1331		0.0
+DNCB_hapten@K	166.001457	166.0911	H(2)C(6)N(2)O(4)	0.0		Chemical derivative	1331		0.0
+DNCB_hapten@C	166.001457	166.0911	H(2)C(6)N(2)O(4)	0.0		Chemical derivative	1331		0.0
+NEMsulfur@C	157.019749	157.1903	H(7)C(6)N(1)O(2)S(1)	0.0		Chemical derivative	1326		0.0
+SulfurDioxide@C	63.9619	64.0638	O(2)S(1)	0.0		Post-translational	1327		0.0
+NEMsulfurWater@C	175.030314	175.2056	H(9)C(6)N(1)O(3)S(1)	0.0		Chemical derivative	1328		0.0
+HN3_mustard@C	131.094629	131.1729	H(13)C(6)N(1)O(2)	0.0		Post-translational	1389		0.0
+HN3_mustard@H	131.094629	131.1729	H(13)C(6)N(1)O(2)	0.0		Post-translational	1389		0.0
+HN3_mustard@K	131.094629	131.1729	H(13)C(6)N(1)O(2)	0.0		Post-translational	1389		0.0
+3-phosphoglyceryl@K	167.982375	168.042	H(5)C(3)O(6)P(1)	0.0		Post-translational	1387		0.0
+HN2_mustard@H	101.084064	101.1469	H(11)C(5)N(1)O(1)	0.0		Post-translational	1388		0.0
+HN2_mustard@K	101.084064	101.1469	H(11)C(5)N(1)O(1)	0.0		Post-translational	1388		0.0
+HN2_mustard@C	101.084064	101.1469	H(11)C(5)N(1)O(1)	0.0		Post-translational	1388		0.0
+NEM:2H(5)+H2O@C	148.089627	148.1714	H(4)2H(5)C(6)N(1)O(3)	0.0		Chemical derivative	1358		0.0
+Crotonyl@K	68.026215	68.074	H(4)C(4)O(1)	0.0		Post-translational	1363	CC=CC(=O)NCCCCC(C(=O)[Rn])N([Xe])([Xe])	0.0
+O-Et-N-diMePhospho@S	135.044916	135.1015	H(10)C(4)N(1)O(2)P(1)	0.0		Chemical derivative	1364		0.0
+N-dimethylphosphate@S	107.013615	107.0483	H(6)C(2)N(1)O(2)P(1)	0.0		Chemical derivative	1365		0.0
+phosphoRibosyl@E	212.00859	212.0945	H(9)C(5)O(7)P(1)	0.0		Post-translational	1356		0.0
+phosphoRibosyl@R	212.00859	212.0945	H(9)C(5)O(7)P(1)	0.0		Post-translational	1356		0.0
+phosphoRibosyl@D	212.00859	212.0945	H(9)C(5)O(7)P(1)	0.0		Post-translational	1356		0.0
+azole@C	-20.026215	-20.0312	H(-4)O(-1)	0.0		Post-translational	1355		0.0
+azole@S	-20.026215	-20.0312	H(-4)O(-1)	0.0		Post-translational	1355		0.0
+Biotin:Thermo-21911@C	921.461652	922.0913	H(71)C(41)N(5)O(16)S(1)	0.0		Chemical derivative	1340		0.0
+iodoTMT@K	324.216141	324.4185	H(28)C(16)N(4)O(3)	0.0		Chemical derivative	1341		0.0
+iodoTMT@H	324.216141	324.4185	H(28)C(16)N(4)O(3)	0.0		Chemical derivative	1341		0.0
+iodoTMT@E	324.216141	324.4185	H(28)C(16)N(4)O(3)	0.0		Chemical derivative	1341		0.0
+iodoTMT@D	324.216141	324.4185	H(28)C(16)N(4)O(3)	0.0		Chemical derivative	1341		0.0
+iodoTMT@C	324.216141	324.4185	H(28)C(16)N(4)O(3)	0.0		Chemical derivative	1341		0.0
+iodoTMT6plex@K	329.226595	329.3825	H(28)C(12)13C(4)N(3)15N(1)O(3)	0.0		Chemical derivative	1342		0.0
+iodoTMT6plex@H	329.226595	329.3825	H(28)C(12)13C(4)N(3)15N(1)O(3)	0.0		Chemical derivative	1342		0.0
+iodoTMT6plex@E	329.226595	329.3825	H(28)C(12)13C(4)N(3)15N(1)O(3)	0.0		Chemical derivative	1342		0.0
+iodoTMT6plex@D	329.226595	329.3825	H(28)C(12)13C(4)N(3)15N(1)O(3)	0.0		Chemical derivative	1342		0.0
+iodoTMT6plex@C	329.226595	329.3825	H(28)C(12)13C(4)N(3)15N(1)O(3)	0.0		Chemical derivative	1342		0.0
+Label:13C(2)15N(2)@K	4.00078	3.9721	C(-2)13C(2)N(-2)15N(2)	0.0		Isotopic label	1787		0.0
+Phosphogluconoylation@Any_N-term	258.014069	258.1199	H(11)C(6)O(9)P(1)	0.0		Post-translational	1344		0.0
+Phosphogluconoylation@K	258.014069	258.1199	H(11)C(6)O(9)P(1)	0.0		Post-translational	1344		0.0
+Methyl:2H(3)+Acetyl:2H(3)@K	62.063875	62.1002	H(-2)2H(6)C(3)O(1)	0.0		Isotopic label	1368		0.0
+dHex(1)Hex(1)@T	308.110732	308.2818	H(20)C(12)O(9)	308.110732	H(20)C(12)O(9)	O-linked glycosylation	1367		0.5
+dHex(1)Hex(1)@S	308.110732	308.2818	H(20)C(12)O(9)	308.110732	H(20)C(12)O(9)	O-linked glycosylation	1367		0.5
+methylsulfonylethyl@K	106.00885	106.1435	H(6)C(3)O(2)S(1)	0.0		Chemical derivative	1380		0.0
+methylsulfonylethyl@H	106.00885	106.1435	H(6)C(3)O(2)S(1)	0.0		Chemical derivative	1380		0.0
+methylsulfonylethyl@C	106.00885	106.1435	H(6)C(3)O(2)S(1)	0.0		Chemical derivative	1380		0.0
+Label:2H(3)+Oxidation@M	19.013745	19.0179	H(-3)2H(3)O(1)	0.0		Isotopic label	1370		0.0
+Trimethyl:2H(9)@R	51.103441	51.1352	H(-3)2H(9)C(3)	0.0		Isotopic label	1371		0.0
+Trimethyl:2H(9)@K	51.103441	51.1352	H(-3)2H(9)C(3)	0.0		Isotopic label	1371		0.0
+Acetyl:13C(2)@K	44.017274	44.022	H(2)13C(2)O(1)	0.0		Isotopic label	1372		0.0
+Acetyl:13C(2)@Protein_N-term	44.017274	44.022	H(2)13C(2)O(1)	0.0		Isotopic label	1372		0.0
+dHex(1)Hex(2)@T	470.163556	470.4224	H(30)C(18)O(14)	470.163556	H(30)C(18)O(14)	O-linked glycosylation	1375		0.5
+dHex(1)Hex(2)@S	470.163556	470.4224	H(30)C(18)O(14)	470.163556	H(30)C(18)O(14)	O-linked glycosylation	1375		0.5
+dHex(1)Hex(3)@T	632.216379	632.563	H(40)C(24)O(19)	632.216379	H(40)C(24)O(19)	O-linked glycosylation	1376		0.5
+dHex(1)Hex(3)@S	632.216379	632.563	H(40)C(24)O(19)	632.216379	H(40)C(24)O(19)	O-linked glycosylation	1376		0.5
+dHex(1)Hex(4)@T	794.269203	794.7036	H(50)C(30)O(24)	794.269203	H(50)C(30)O(24)	O-linked glycosylation	1377		0.5
+dHex(1)Hex(4)@S	794.269203	794.7036	H(50)C(30)O(24)	794.269203	H(50)C(30)O(24)	O-linked glycosylation	1377		0.5
+dHex(1)Hex(5)@T	956.322026	956.8442	H(60)C(36)O(29)	956.322026	H(60)C(36)O(29)	O-linked glycosylation	1378		0.5
+dHex(1)Hex(5)@S	956.322026	956.8442	H(60)C(36)O(29)	956.322026	H(60)C(36)O(29)	O-linked glycosylation	1378		0.5
+dHex(1)Hex(6)@T	1118.37485	1118.9848	H(70)C(42)O(34)	1118.37485	H(70)C(42)O(34)	O-linked glycosylation	1379		0.5
+dHex(1)Hex(6)@S	1118.37485	1118.9848	H(70)C(42)O(34)	1118.37485	H(70)C(42)O(34)	O-linked glycosylation	1379		0.5
+ethylsulfonylethyl@H	120.0245	120.1701	H(8)C(4)O(2)S(1)	0.0		Chemical derivative	1381		0.0
+ethylsulfonylethyl@C	120.0245	120.1701	H(8)C(4)O(2)S(1)	0.0		Chemical derivative	1381		0.0
+ethylsulfonylethyl@K	120.0245	120.1701	H(8)C(4)O(2)S(1)	0.0		Chemical derivative	1381		0.0
+phenylsulfonylethyl@C	168.0245	168.2129	H(8)C(8)O(2)S(1)	0.0		Chemical derivative	1382		0.0
+PyridoxalPhosphateH2@K	231.02966	231.1425	H(10)C(8)N(1)O(5)P(1)	0.0		Chemical derivative	1383		0.0
+Homocysteic_acid@M	33.969094	33.9716	H(-2)C(-1)O(3)	0.0		Artefact	1384		0.0
+Hydroxamic_acid@E	15.010899	15.0146	H(1)N(1)	0.0		Artefact	1385		0.0
+Hydroxamic_acid@D	15.010899	15.0146	H(1)N(1)	0.0		Artefact	1385		0.0
+Oxidation+NEM@C	141.042593	141.1247	H(7)C(6)N(1)O(3)	0.0		Chemical derivative	1390		0.0
+NHS-fluorescein@K	471.131802	471.4581	H(21)C(27)N(1)O(7)	0.0		Chemical derivative	1391		0.0
+DiART6plex@Y	217.162932	217.2527	H(20)C(7)13C(4)N(1)15N(1)O(2)	0.0		Isotopic label	1392		0.0
+DiART6plex@Protein_N-term	217.162932	217.2527	H(20)C(7)13C(4)N(1)15N(1)O(2)	0.0		Isotopic label	1392		0.0
+DiART6plex@Any_N-term	217.162932	217.2527	H(20)C(7)13C(4)N(1)15N(1)O(2)	0.0		Isotopic label	1392		0.0
+DiART6plex@K	217.162932	217.2527	H(20)C(7)13C(4)N(1)15N(1)O(2)	0.0		Isotopic label	1392		0.0
+DiART6plex115@K	217.156612	217.2535	H(20)C(8)13C(3)15N(2)O(2)	0.0		Isotopic label	1393		0.0
+DiART6plex115@Any_N-term	217.156612	217.2535	H(20)C(8)13C(3)15N(2)O(2)	0.0		Isotopic label	1393		0.0
+DiART6plex115@Protein_N-term	217.156612	217.2535	H(20)C(8)13C(3)15N(2)O(2)	0.0		Isotopic label	1393		0.0
+DiART6plex115@Y	217.156612	217.2535	H(20)C(8)13C(3)15N(2)O(2)	0.0		Isotopic label	1393		0.0
+DiART6plex116/119@Y	217.168776	217.2797	H(18)2H(2)C(9)13C(2)N(1)15N(1)O(2)	0.0		Isotopic label	1394		0.0
+DiART6plex116/119@Protein_N-term	217.168776	217.2797	H(18)2H(2)C(9)13C(2)N(1)15N(1)O(2)	0.0		Isotopic label	1394		0.0
+DiART6plex116/119@K	217.168776	217.2797	H(18)2H(2)C(9)13C(2)N(1)15N(1)O(2)	0.0		Isotopic label	1394		0.0
+DiART6plex116/119@Any_N-term	217.168776	217.2797	H(18)2H(2)C(9)13C(2)N(1)15N(1)O(2)	0.0		Isotopic label	1394		0.0
+DiART6plex117@K	217.162456	217.2805	H(18)2H(2)C(10)13C(1)15N(2)O(2)	0.0		Isotopic label	1395		0.0
+DiART6plex117@Any_N-term	217.162456	217.2805	H(18)2H(2)C(10)13C(1)15N(2)O(2)	0.0		Isotopic label	1395		0.0
+DiART6plex117@Protein_N-term	217.162456	217.2805	H(18)2H(2)C(10)13C(1)15N(2)O(2)	0.0		Isotopic label	1395		0.0
+DiART6plex117@Y	217.162456	217.2805	H(18)2H(2)C(10)13C(1)15N(2)O(2)	0.0		Isotopic label	1395		0.0
+DiART6plex118@K	217.175096	217.279	H(18)2H(2)C(8)13C(3)N(2)O(2)	0.0		Isotopic label	1396		0.0
+DiART6plex118@Any_N-term	217.175096	217.279	H(18)2H(2)C(8)13C(3)N(2)O(2)	0.0		Isotopic label	1396		0.0
+DiART6plex118@Protein_N-term	217.175096	217.279	H(18)2H(2)C(8)13C(3)N(2)O(2)	0.0		Isotopic label	1396		0.0
+DiART6plex118@Y	217.175096	217.279	H(18)2H(2)C(8)13C(3)N(2)O(2)	0.0		Isotopic label	1396		0.0
+Iodoacetanilide@K	133.052764	133.1473	H(7)C(8)N(1)O(1)	0.0		Artefact	1397		0.0
+Iodoacetanilide@C	133.052764	133.1473	H(7)C(8)N(1)O(1)	0.0		Chemical derivative	1397		0.0
+Iodoacetanilide@Any_N-term	133.052764	133.1473	H(7)C(8)N(1)O(1)	0.0		Artefact	1397		0.0
+Iodoacetanilide:13C(6)@K	139.072893	139.1032	H(7)C(2)13C(6)N(1)O(1)	0.0		Artefact	1398		0.0
+Iodoacetanilide:13C(6)@C	139.072893	139.1032	H(7)C(2)13C(6)N(1)O(1)	0.0		Chemical derivative	1398		0.0
+Iodoacetanilide:13C(6)@Any_N-term	139.072893	139.1032	H(7)C(2)13C(6)N(1)O(1)	0.0		Artefact	1398		0.0
+Dap-DSP@K	364.076278	364.4377	H(20)C(13)N(2)O(6)S(2)	0.0		Chemical derivative	1399		0.0
+Dap-DSP@E	364.076278	364.4377	H(20)C(13)N(2)O(6)S(2)	0.0		Non-standard residue	1399		0.0
+Dap-DSP@A	364.076278	364.4377	H(20)C(13)N(2)O(6)S(2)	0.0		Non-standard residue	1399		0.0
+MurNAc@A	275.100502	275.2552	H(17)C(11)N(1)O(7)	0.0		Other glycosylation	1400		0.0
+EEEDVIEVYQEQTGG@K	1705.73189	1706.7153	H(107)C(72)N(17)O(31)	0.0		Chemical derivative	1405		0.0
+Label:2H(7)15N(4)@R	11.032077	11.0168	H(-7)2H(7)N(-4)15N(4)	0.0		Isotopic label	1402		0.0
+Label:2H(6)15N(1)@P	7.034695	7.0304	H(-6)2H(6)N(-1)15N(1)	0.0		Isotopic label	1403		0.0
+EDEDTIDVFQQQTGG@K	1662.700924	1663.6508	H(102)C(69)N(18)O(30)	0.0		Chemical derivative	1406		0.0
+Hex(5)HexNAc(4)NeuAc(2)@N	2204.772441	2205.9822	H(136)C(84)N(6)O(61)	2204.772441	H(136)C(84)N(6)O(61)	N-linked glycosylation	1408		0.5
+Hex(5)HexNAc(4)NeuAc(1)@N	1913.677025	1914.7277	H(119)C(73)N(5)O(53)	1913.677025	H(119)C(73)N(5)O(53)	N-linked glycosylation	1409		0.5
+dHex(1)Hex(5)HexNAc(4)NeuAc(1)@N	2059.734933	2060.8689	H(129)C(79)N(5)O(57)	2059.734933	H(129)C(79)N(5)O(57)	N-linked glycosylation	1410		0.5
+dHex(1)Hex(5)HexNAc(4)NeuAc(2)@N	2350.83035	2352.1234	H(146)C(90)N(6)O(65)	2350.83035	H(146)C(90)N(6)O(65)	N-linked glycosylation	1411		0.5
+s-GlcNAc@T	283.036187	283.2557	H(13)C(8)N(1)O(8)S(1)	283.036187	H(13)C(8)N(1)O(8)S(1)	O-linked glycosylation	1412		0.5
+s-GlcNAc@S	283.036187	283.2557	H(13)C(8)N(1)O(8)S(1)	283.036187	H(13)C(8)N(1)O(8)S(1)	O-linked glycosylation	1412		0.5
+PhosphoHex(2)@N	404.071978	404.2611	H(21)C(12)O(13)P(1)	404.071978	H(21)C(12)O(13)P(1)	N-linked glycosylation	1413		0.5
+PhosphoHex(2)@T	404.071978	404.2611	H(21)C(12)O(13)P(1)	404.071978	H(21)C(12)O(13)P(1)	O-linked glycosylation	1413		0.5
+PhosphoHex(2)@S	404.071978	404.2611	H(21)C(12)O(13)P(1)	404.071978	H(21)C(12)O(13)P(1)	O-linked glycosylation	1413		0.5
+Trimethyl:13C(3)2H(9)@K	54.113505	54.1132	H(-3)2H(9)13C(3)	0.0		Isotopic label	1414		0.0
+Trimethyl:13C(3)2H(9)@R	54.113505	54.1132	H(-3)2H(9)13C(3)	0.0		Isotopic label	1414		0.0
+15N-oxobutanoic@S^Protein_N-term	-18.023584	-18.0239	H(-3)15N(-1)	0.0		Post-translational	1419		0.0
+15N-oxobutanoic@C^Any_N-term	-18.023584	-18.0239	H(-3)15N(-1)	0.0		Artefact	1419		0.0
+15N-oxobutanoic@T^Protein_N-term	-18.023584	-18.0239	H(-3)15N(-1)	0.0		Post-translational	1419		0.0
+spermidine@Q	128.131349	128.2153	H(16)C(7)N(2)	0.0		Chemical derivative	1421		0.0
+Biotin:Thermo-21330@Any_N-term	473.219571	473.5835	H(35)C(21)N(3)O(7)S(1)	0.0		Chemical derivative	1423		0.0
+Biotin:Thermo-21330@K	473.219571	473.5835	H(35)C(21)N(3)O(7)S(1)	0.0		Chemical derivative	1423		0.0
+Hex(1)Pent(2)@T	426.137341	426.3698	H(26)C(16)O(13)	426.137341	H(26)C(16)O(13)	O-linked glycosylation	1428		0.5
+Hex(1)Pent(2)@S	426.137341	426.3698	H(26)C(16)O(13)	426.137341	H(26)C(16)O(13)	O-linked glycosylation	1428		0.5
+Pentose@T	132.042259	132.1146	H(8)C(5)O(4)	132.042259	H(8)C(5)O(4)	O-linked glycosylation	1425		0.5
+Pentose@S	132.042259	132.1146	H(8)C(5)O(4)	132.042259	H(8)C(5)O(4)	O-linked glycosylation	1425		0.5
+Hex(1)Pent(1)@T	294.095082	294.2552	H(18)C(11)O(9)	294.095082	H(18)C(11)O(9)	O-linked glycosylation	1426		0.5
+Hex(1)Pent(1)@S	294.095082	294.2552	H(18)C(11)O(9)	294.095082	H(18)C(11)O(9)	O-linked glycosylation	1426		0.5
+Hex(1)HexA(1)@T	338.084912	338.2647	H(18)C(12)O(11)	338.084912	H(18)C(12)O(11)	O-linked glycosylation	1427		0.5
+Hex(1)HexA(1)@S	338.084912	338.2647	H(18)C(12)O(11)	338.084912	H(18)C(12)O(11)	O-linked glycosylation	1427		0.5
+Hex(1)HexNAc(1)Phos(1)@T	445.098527	445.313	H(24)C(14)N(1)O(13)P(1)	445.098527	H(24)C(14)N(1)O(13)P(1)	O-linked glycosylation	1429		0.5
+Hex(1)HexNAc(1)Phos(1)@S	445.098527	445.313	H(24)C(14)N(1)O(13)P(1)	445.098527	H(24)C(14)N(1)O(13)P(1)	O-linked glycosylation	1429		0.5
+Hex(1)HexNAc(1)Sulf(1)@T	445.089011	445.3963	H(23)C(14)N(1)O(13)S(1)	445.089011	H(23)C(14)N(1)O(13)S(1)	O-linked glycosylation	1430		0.5
+Hex(1)HexNAc(1)Sulf(1)@S	445.089011	445.3963	H(23)C(14)N(1)O(13)S(1)	445.089011	H(23)C(14)N(1)O(13)S(1)	O-linked glycosylation	1430		0.5
+Hex(1)NeuAc(1)@T	453.14824	453.3952	H(27)C(17)N(1)O(13)	453.14824	H(27)C(17)N(1)O(13)	O-linked glycosylation	1431		0.5
+Hex(1)NeuAc(1)@S	453.14824	453.3952	H(27)C(17)N(1)O(13)	453.14824	H(27)C(17)N(1)O(13)	O-linked glycosylation	1431		0.5
+Hex(1)NeuGc(1)@T	469.143155	469.3946	H(27)C(17)N(1)O(14)	469.143155	H(27)C(17)N(1)O(14)	O-linked glycosylation	1432		0.5
+Hex(1)NeuGc(1)@S	469.143155	469.3946	H(27)C(17)N(1)O(14)	469.143155	H(27)C(17)N(1)O(14)	O-linked glycosylation	1432		0.5
+HexNAc(3)@T	609.238118	609.5776	H(39)C(24)N(3)O(15)	609.238118	H(39)C(24)N(3)O(15)	O-linked glycosylation	1433		0.5
+HexNAc(3)@S	609.238118	609.5776	H(39)C(24)N(3)O(15)	609.238118	H(39)C(24)N(3)O(15)	O-linked glycosylation	1433		0.5
+HexNAc(1)NeuAc(1)@T	494.174789	494.4471	H(30)C(19)N(2)O(13)	494.174789	H(30)C(19)N(2)O(13)	O-linked glycosylation	1434		0.5
+HexNAc(1)NeuAc(1)@S	494.174789	494.4471	H(30)C(19)N(2)O(13)	494.174789	H(30)C(19)N(2)O(13)	O-linked glycosylation	1434		0.5
+HexNAc(1)NeuGc(1)@T	510.169704	510.4465	H(30)C(19)N(2)O(14)	510.169704	H(30)C(19)N(2)O(14)	O-linked glycosylation	1435		0.5
+HexNAc(1)NeuGc(1)@S	510.169704	510.4465	H(30)C(19)N(2)O(14)	510.169704	H(30)C(19)N(2)O(14)	O-linked glycosylation	1435		0.5
+Hex(2)NeuAc(1)@T	615.201064	615.5358	H(37)C(23)N(1)O(18)	615.201064	H(37)C(23)N(1)O(18)	O-linked glycosylation	1444		0.5
+Hex(2)NeuAc(1)@S	615.201064	615.5358	H(37)C(23)N(1)O(18)	615.201064	H(37)C(23)N(1)O(18)	O-linked glycosylation	1444		0.5
+Hex(1)HexNAc(1)dHex(1)Me(1)@T	525.205755	525.5009	H(35)C(21)N(1)O(14)	525.205755	H(35)C(21)N(1)O(14)	O-linked glycosylation	1436		0.5
+Hex(1)HexNAc(1)dHex(1)Me(1)@S	525.205755	525.5009	H(35)C(21)N(1)O(14)	525.205755	H(35)C(21)N(1)O(14)	O-linked glycosylation	1436		0.5
+Hex(1)HexNAc(1)dHex(1)Me(2)@T	539.221405	539.5275	H(37)C(22)N(1)O(14)	539.221405	H(37)C(22)N(1)O(14)	O-linked glycosylation	1437		0.5
+Hex(1)HexNAc(1)dHex(1)Me(2)@S	539.221405	539.5275	H(37)C(22)N(1)O(14)	539.221405	H(37)C(22)N(1)O(14)	O-linked glycosylation	1437		0.5
+Xlink:DSS[155]@Protein_N-term	155.094629	155.1943	H(13)C(8)N(1)O(2)	0.0		Chemical derivative	1789		0.0
+Xlink:DSS[155]@K	155.094629	155.1943	H(13)C(8)N(1)O(2)	0.0		Chemical derivative	1789		0.0
+Hex(2)HexNAc(1)@N	527.18502	527.4737	H(33)C(20)N(1)O(15)	527.18502	H(33)C(20)N(1)O(15)	N-linked glycosylation	1438		0.5
+Hex(2)HexNAc(1)@T	527.18502	527.4737	H(33)C(20)N(1)O(15)	527.18502	H(33)C(20)N(1)O(15)	O-linked glycosylation	1438		0.5
+Hex(2)HexNAc(1)@S	527.18502	527.4737	H(33)C(20)N(1)O(15)	527.18502	H(33)C(20)N(1)O(15)	O-linked glycosylation	1438		0.5
+Hex(1)HexA(1)HexNAc(1)@T	541.164284	541.4572	H(31)C(20)N(1)O(16)	541.164284	H(31)C(20)N(1)O(16)	O-linked glycosylation	1439		0.5
+Hex(1)HexA(1)HexNAc(1)@S	541.164284	541.4572	H(31)C(20)N(1)O(16)	541.164284	H(31)C(20)N(1)O(16)	O-linked glycosylation	1439		0.5
+Hex(2)HexNAc(1)Me(1)@T	541.20067	541.5003	H(35)C(21)N(1)O(15)	541.20067	H(35)C(21)N(1)O(15)	O-linked glycosylation	1440		0.5
+Hex(2)HexNAc(1)Me(1)@S	541.20067	541.5003	H(35)C(21)N(1)O(15)	541.20067	H(35)C(21)N(1)O(15)	O-linked glycosylation	1440		0.5
+Hex(1)Pent(3)@T	558.1796	558.4845	H(34)C(21)O(17)	558.1796	H(34)C(21)O(17)	O-linked glycosylation	1441		0.5
+Hex(1)Pent(3)@S	558.1796	558.4845	H(34)C(21)O(17)	558.1796	H(34)C(21)O(17)	O-linked glycosylation	1441		0.5
+Hex(1)NeuAc(1)Pent(1)@S	585.190499	585.5098	H(35)C(22)N(1)O(17)	585.190499	H(35)C(22)N(1)O(17)	O-linked glycosylation	1442		0.5
+Hex(1)NeuAc(1)Pent(1)@T	585.190499	585.5098	H(35)C(22)N(1)O(17)	585.190499	H(35)C(22)N(1)O(17)	O-linked glycosylation	1442		0.5
+Hex(2)HexNAc(1)Sulf(1)@T	607.141834	607.5369	H(33)C(20)N(1)O(18)S(1)	607.141834	H(33)C(20)N(1)O(18)S(1)	O-linked glycosylation	1443		0.5
+Hex(2)HexNAc(1)Sulf(1)@S	607.141834	607.5369	H(33)C(20)N(1)O(18)S(1)	607.141834	H(33)C(20)N(1)O(18)S(1)	O-linked glycosylation	1443		0.5
+dHex(2)Hex(2)@S	616.221465	616.5636	H(40)C(24)O(18)	616.221465	H(40)C(24)O(18)	O-linked glycosylation	1445		0.5
+dHex(2)Hex(2)@T	616.221465	616.5636	H(40)C(24)O(18)	616.221465	H(40)C(24)O(18)	O-linked glycosylation	1445		0.5
+dHex(1)Hex(2)HexA(1)@S	646.195644	646.5465	H(38)C(24)O(20)	646.195644	H(38)C(24)O(20)	O-linked glycosylation	1446		0.5
+dHex(1)Hex(2)HexA(1)@T	646.195644	646.5465	H(38)C(24)O(20)	646.195644	H(38)C(24)O(20)	O-linked glycosylation	1446		0.5
+Hex(1)HexNAc(2)Sulf(1)@T	648.168383	648.5888	H(36)C(22)N(2)O(18)S(1)	648.168383	H(36)C(22)N(2)O(18)S(1)	O-linked glycosylation	1447		0.5
+Hex(1)HexNAc(2)Sulf(1)@S	648.168383	648.5888	H(36)C(22)N(2)O(18)S(1)	648.168383	H(36)C(22)N(2)O(18)S(1)	O-linked glycosylation	1447		0.5
+Hex(4)@S	648.211294	648.5624	H(40)C(24)O(20)	648.211294	H(40)C(24)O(20)	O-linked glycosylation	1448		0.5
+Hex(4)@T	648.211294	648.5624	H(40)C(24)O(20)	648.211294	H(40)C(24)O(20)	O-linked glycosylation	1448		0.5
+dHex(1)Hex(2)HexNAc(2)Pent(1)@N	1008.36456	1008.9221	H(64)C(39)N(2)O(28)	1008.36456	H(64)C(39)N(2)O(28)	N-linked glycosylation	1449		0.5
+Hex(2)HexNAc(2)NeuAc(1)@N	1021.359809	1021.9208	H(63)C(39)N(3)O(28)	1021.359809	H(63)C(39)N(3)O(28)	N-linked glycosylation	1450		0.5
+Hex(2)HexNAc(2)NeuAc(1)@S	1021.359809	1021.9208	H(63)C(39)N(3)O(28)	1021.359809	H(63)C(39)N(3)O(28)	O-linked glycosylation	1450		0.5
+Hex(2)HexNAc(2)NeuAc(1)@T	1021.359809	1021.9208	H(63)C(39)N(3)O(28)	1021.359809	H(63)C(39)N(3)O(28)	O-linked glycosylation	1450		0.5
+Hex(3)HexNAc(2)Pent(1)@N	1024.359475	1024.9215	H(64)C(39)N(2)O(29)	1024.359475	H(64)C(39)N(2)O(29)	N-linked glycosylation	1451		0.5
+Hex(4)HexNAc(2)@N	1054.370039	1054.9474	H(66)C(40)N(2)O(30)	1054.370039	H(66)C(40)N(2)O(30)	N-linked glycosylation	1452		0.5
+dHex(1)Hex(4)HexNAc(1)Pent(1)@N	1129.390834	1130.0107	H(71)C(43)N(1)O(33)	1129.390834	H(71)C(43)N(1)O(33)	N-linked glycosylation	1453		0.5
+dHex(1)Hex(3)HexNAc(2)Pent(1)@N	1170.417383	1171.0627	H(74)C(45)N(2)O(33)	1170.417383	H(74)C(45)N(2)O(33)	N-linked glycosylation	1454		0.5
+Hex(3)HexNAc(2)NeuAc(1)@N	1183.412632	1184.0614	H(73)C(45)N(3)O(33)	1183.412632	H(73)C(45)N(3)O(33)	N-linked glycosylation	1455		0.5
+Hex(4)HexNAc(2)Pent(1)@N	1186.412298	1187.0621	H(74)C(45)N(2)O(34)	1186.412298	H(74)C(45)N(2)O(34)	N-linked glycosylation	1456		0.5
+Hex(3)HexNAc(3)Pent(1)@N	1227.438847	1228.114	H(77)C(47)N(3)O(34)	1227.438847	H(77)C(47)N(3)O(34)	N-linked glycosylation	1457		0.5
+Hex(5)HexNAc(2)Phos(1)@N	1296.389194	1297.0679	H(77)C(46)N(2)O(38)P(1)	1296.389194	H(77)C(46)N(2)O(38)P(1)	N-linked glycosylation	1458		0.5
+dHex(1)Hex(4)HexNAc(2)Pent(1)@N	1332.470207	1333.2033	H(84)C(51)N(2)O(38)	1332.470207	H(84)C(51)N(2)O(38)	N-linked glycosylation	1459		0.5
+Hex(7)HexNAc(1)@N	1337.449137	1338.1767	H(83)C(50)N(1)O(40)	1337.449137	H(83)C(50)N(1)O(40)	N-linked glycosylation	1460		0.5
+Hex(4)HexNAc(2)NeuAc(1)@N	1345.465456	1346.202	H(83)C(51)N(3)O(38)	1345.465456	H(83)C(51)N(3)O(38)	N-linked glycosylation	1461		0.5
+Hex(4)HexNAc(2)NeuAc(1)@S	1345.465456	1346.202	H(83)C(51)N(3)O(38)	1345.465456	H(83)C(51)N(3)O(38)	O-linked glycosylation	1461		0.5
+Hex(4)HexNAc(2)NeuAc(1)@T	1345.465456	1346.202	H(83)C(51)N(3)O(38)	1345.465456	H(83)C(51)N(3)O(38)	O-linked glycosylation	1461		0.5
+dHex(1)Hex(5)HexNAc(2)@N	1362.480772	1363.2292	H(86)C(52)N(2)O(39)	1362.480772	H(86)C(52)N(2)O(39)	N-linked glycosylation	1462		0.5
+dHex(1)Hex(3)HexNAc(3)Pent(1)@N	1373.496756	1374.2552	H(87)C(53)N(3)O(38)	1373.496756	H(87)C(53)N(3)O(38)	N-linked glycosylation	1463		0.5
+Hex(3)HexNAc(4)Sulf(1)@N	1378.432776	1379.2551	H(82)C(50)N(4)O(38)S(1)	1378.432776	H(82)C(50)N(4)O(38)S(1)	N-linked glycosylation	1464		0.5
+Hex(6)HexNAc(2)@N	1378.475686	1379.2286	H(86)C(52)N(2)O(40)	1378.475686	H(86)C(52)N(2)O(40)	N-linked glycosylation	1465		0.5
+Hex(4)HexNAc(3)Pent(1)@N	1389.491671	1390.2546	H(87)C(53)N(3)O(39)	1389.491671	H(87)C(53)N(3)O(39)	N-linked glycosylation	1466		0.5
+dHex(1)Hex(4)HexNAc(3)@N	1403.507321	1404.2812	H(89)C(54)N(3)O(39)	1403.507321	H(89)C(54)N(3)O(39)	N-linked glycosylation	1467		0.5
+Hex(5)HexNAc(3)@N	1419.502235	1420.2806	H(89)C(54)N(3)O(40)	1419.502235	H(89)C(54)N(3)O(40)	N-linked glycosylation	1468		0.5
+Hex(3)HexNAc(4)Pent(1)@N	1430.51822	1431.3065	H(90)C(55)N(4)O(39)	1430.51822	H(90)C(55)N(4)O(39)	N-linked glycosylation	1469		0.5
+Hex(6)HexNAc(2)Phos(1)@N	1458.442017	1459.2085	H(87)C(52)N(2)O(43)P(1)	1458.442017	H(87)C(52)N(2)O(43)P(1)	N-linked glycosylation	1470		0.5
+dHex(1)Hex(4)HexNAc(3)Sulf(1)@N	1483.464135	1484.3444	H(89)C(54)N(3)O(42)S(1)	1483.464135	H(89)C(54)N(3)O(42)S(1)	N-linked glycosylation	1471		0.5
+dHex(1)Hex(5)HexNAc(2)Pent(1)@N	1494.52303	1495.3439	H(94)C(57)N(2)O(43)	1494.52303	H(94)C(57)N(2)O(43)	N-linked glycosylation	1472		0.5
+Hex(8)HexNAc(1)@N	1499.501961	1500.3173	H(93)C(56)N(1)O(45)	1499.501961	H(93)C(56)N(1)O(45)	N-linked glycosylation	1473		0.5
+dHex(1)Hex(3)HexNAc(3)Pent(2)@N	1505.539015	1506.3698	H(95)C(58)N(3)O(42)	1505.539015	H(95)C(58)N(3)O(42)	N-linked glycosylation	1474		0.5
+dHex(2)Hex(3)HexNAc(3)Pent(1)@N	1519.554665	1520.3964	H(97)C(59)N(3)O(42)	1519.554665	H(97)C(59)N(3)O(42)	N-linked glycosylation	1475		0.5
+dHex(1)Hex(3)HexNAc(4)Sulf(1)@N	1524.490684	1525.3963	H(92)C(56)N(4)O(42)S(1)	1524.490684	H(92)C(56)N(4)O(42)S(1)	N-linked glycosylation	1476		0.5
+dHex(1)Hex(6)HexNAc(2)@N	1524.533595	1525.3698	H(96)C(58)N(2)O(44)	1524.533595	H(96)C(58)N(2)O(44)	N-linked glycosylation	1477		0.5
+dHex(1)Hex(4)HexNAc(3)Pent(1)@N	1535.549579	1536.3958	H(97)C(59)N(3)O(43)	1535.549579	H(97)C(59)N(3)O(43)	N-linked glycosylation	1478		0.5
+Hex(4)HexNAc(4)Sulf(1)@N	1540.485599	1541.3957	H(92)C(56)N(4)O(43)S(1)	1540.485599	H(92)C(56)N(4)O(43)S(1)	N-linked glycosylation	1479		0.5
+Hex(7)HexNAc(2)@N	1540.52851	1541.3692	H(96)C(58)N(2)O(45)	1540.52851	H(96)C(58)N(2)O(45)	N-linked glycosylation	1480		0.5
+dHex(2)Hex(4)HexNAc(3)@N	1549.56523	1550.4224	H(99)C(60)N(3)O(43)	1549.56523	H(99)C(60)N(3)O(43)	N-linked glycosylation	1481		0.5
+Hex(5)HexNAc(3)Pent(1)@N	1551.544494	1552.3952	H(97)C(59)N(3)O(44)	1551.544494	H(97)C(59)N(3)O(44)	N-linked glycosylation	1482		0.5
+Hex(4)HexNAc(3)NeuGc(1)@N	1564.539743	1565.3939	H(96)C(59)N(4)O(44)	1564.539743	H(96)C(59)N(4)O(44)	N-linked glycosylation	1483		0.5
+dHex(1)Hex(5)HexNAc(3)@N	1565.560144	1566.4218	H(99)C(60)N(3)O(44)	1565.560144	H(99)C(60)N(3)O(44)	N-linked glycosylation	1484		0.5
+dHex(1)Hex(3)HexNAc(4)Pent(1)@N	1576.576129	1577.4477	H(100)C(61)N(4)O(43)	1576.576129	H(100)C(61)N(4)O(43)	N-linked glycosylation	1485		0.5
+Hex(3)HexNAc(5)Sulf(1)@N	1581.512148	1582.4476	H(95)C(58)N(5)O(43)S(1)	1581.512148	H(95)C(58)N(5)O(43)S(1)	N-linked glycosylation	1486		0.5
+Hex(6)HexNAc(3)@N	1581.555059	1582.4212	H(99)C(60)N(3)O(45)	1581.555059	H(99)C(60)N(3)O(45)	N-linked glycosylation	1487		0.5
+Hex(3)HexNAc(4)NeuAc(1)@N	1589.571378	1590.4465	H(99)C(61)N(5)O(43)	1589.571378	H(99)C(61)N(5)O(43)	N-linked glycosylation	1488		0.5
+Hex(4)HexNAc(4)Pent(1)@N	1592.571043	1593.4471	H(100)C(61)N(4)O(44)	1592.571043	H(100)C(61)N(4)O(44)	N-linked glycosylation	1489		0.5
+Hex(7)HexNAc(2)Phos(1)@N	1620.494841	1621.3491	H(97)C(58)N(2)O(48)P(1)	1620.494841	H(97)C(58)N(2)O(48)P(1)	N-linked glycosylation	1490		0.5
+Hex(4)HexNAc(4)Me(2)Pent(1)@N	1620.602343	1621.5003	H(104)C(63)N(4)O(44)	1620.602343	H(104)C(63)N(4)O(44)	N-linked glycosylation	1491		0.5
+dHex(1)Hex(3)HexNAc(3)Pent(3)@N	1637.581274	1638.4844	H(103)C(63)N(3)O(46)	1637.581274	H(103)C(63)N(3)O(46)	N-linked glycosylation	1492		0.5
+dHex(1)Hex(5)HexNAc(3)Sulf(1)@N	1645.516959	1646.485	H(99)C(60)N(3)O(47)S(1)	1645.516959	H(99)C(60)N(3)O(47)S(1)	N-linked glycosylation	1493		0.5
+dHex(2)Hex(3)HexNAc(3)Pent(2)@N	1651.596924	1652.511	H(105)C(64)N(3)O(46)	1651.596924	H(105)C(64)N(3)O(46)	N-linked glycosylation	1494		0.5
+Hex(6)HexNAc(3)Phos(1)@N	1661.52139	1662.4011	H(100)C(60)N(3)O(48)P(1)	1661.52139	H(100)C(60)N(3)O(48)P(1)	N-linked glycosylation	1495		0.5
+Hex(4)HexNAc(5)@N	1663.608157	1664.525	H(105)C(64)N(5)O(45)	1663.608157	H(105)C(64)N(5)O(45)	N-linked glycosylation	1496		0.5
+dHex(3)Hex(3)HexNAc(3)Pent(1)@N	1665.612574	1666.5376	H(107)C(65)N(3)O(46)	1665.612574	H(107)C(65)N(3)O(46)	N-linked glycosylation	1497		0.5
+dHex(2)Hex(4)HexNAc(3)Pent(1)@N	1681.607488	1682.537	H(107)C(65)N(3)O(47)	1681.607488	H(107)C(65)N(3)O(47)	N-linked glycosylation	1498		0.5
+dHex(1)Hex(4)HexNAc(4)Sulf(1)@N	1686.543508	1687.5369	H(102)C(62)N(4)O(47)S(1)	1686.543508	H(102)C(62)N(4)O(47)S(1)	N-linked glycosylation	1499		0.5
+dHex(1)Hex(7)HexNAc(2)@N	1686.586419	1687.5104	H(106)C(64)N(2)O(49)	1686.586419	H(106)C(64)N(2)O(49)	N-linked glycosylation	1500		0.5
+dHex(1)Hex(4)HexNAc(3)NeuAc(1)@N	1694.602737	1695.5357	H(106)C(65)N(4)O(47)	1694.602737	H(106)C(65)N(4)O(47)	N-linked glycosylation	1501		0.5
+dHex(1)Hex(4)HexNAc(3)NeuAc(1)@S	1694.602737	1695.5357	H(106)C(65)N(4)O(47)	1694.602737	H(106)C(65)N(4)O(47)	O-linked glycosylation	1501		0.5
+dHex(1)Hex(4)HexNAc(3)NeuAc(1)@T	1694.602737	1695.5357	H(106)C(65)N(4)O(47)	1694.602737	H(106)C(65)N(4)O(47)	O-linked glycosylation	1501		0.5
+Hex(7)HexNAc(2)Phos(2)@N	1700.461172	1701.329	H(98)C(58)N(2)O(51)P(2)	1700.461172	H(98)C(58)N(2)O(51)P(2)	N-linked glycosylation	1502		0.5
+Hex(5)HexNAc(4)Sulf(1)@N	1702.538423	1703.5363	H(102)C(62)N(4)O(48)S(1)	1702.538423	H(102)C(62)N(4)O(48)S(1)	N-linked glycosylation	1503		0.5
+Hex(8)HexNAc(2)@N	1702.581333	1703.5098	H(106)C(64)N(2)O(50)	1702.581333	H(106)C(64)N(2)O(50)	N-linked glycosylation	1504		0.5
+dHex(1)Hex(3)HexNAc(4)Pent(2)@N	1708.618387	1709.5623	H(108)C(66)N(4)O(47)	1708.618387	H(108)C(66)N(4)O(47)	N-linked glycosylation	1505		0.5
+dHex(1)Hex(4)HexNAc(3)NeuGc(1)@N	1710.597652	1711.5351	H(106)C(65)N(4)O(48)	1710.597652	H(106)C(65)N(4)O(48)	N-linked glycosylation	1506		0.5
+dHex(2)Hex(3)HexNAc(4)Pent(1)@N	1722.634037	1723.5889	H(110)C(67)N(4)O(47)	1722.634037	H(110)C(67)N(4)O(47)	N-linked glycosylation	1507		0.5
+dHex(1)Hex(3)HexNAc(5)Sulf(1)@N	1727.570057	1728.5888	H(105)C(64)N(5)O(47)S(1)	1727.570057	H(105)C(64)N(5)O(47)S(1)	N-linked glycosylation	1508		0.5
+dHex(1)Hex(6)HexNAc(3)@N	1727.612968	1728.5624	H(109)C(66)N(3)O(49)	1727.612968	H(109)C(66)N(3)O(49)	N-linked glycosylation	1509		0.5
+dHex(1)Hex(3)HexNAc(4)NeuAc(1)@N	1735.629286	1736.5877	H(109)C(67)N(5)O(47)	1735.629286	H(109)C(67)N(5)O(47)	N-linked glycosylation	1510		0.5
+dHex(3)Hex(3)HexNAc(4)@N	1736.649688	1737.6155	H(112)C(68)N(4)O(47)	1736.649688	H(112)C(68)N(4)O(47)	N-linked glycosylation	1511		0.5
+dHex(1)Hex(4)HexNAc(4)Pent(1)@N	1738.628952	1739.5883	H(110)C(67)N(4)O(48)	1738.628952	H(110)C(67)N(4)O(48)	N-linked glycosylation	1512		0.5
+Hex(4)HexNAc(5)Sulf(1)@N	1743.564972	1744.5882	H(105)C(64)N(5)O(48)S(1)	1743.564972	H(105)C(64)N(5)O(48)S(1)	N-linked glycosylation	1513		0.5
+Hex(7)HexNAc(3)@N	1743.607882	1744.5618	H(109)C(66)N(3)O(50)	1743.607882	H(109)C(66)N(3)O(50)	N-linked glycosylation	1514		0.5
+dHex(1)Hex(4)HexNAc(3)NeuAc(1)Sulf(1)@N	1774.559552	1775.5989	H(106)C(65)N(4)O(50)S(1)	1774.559552	H(106)C(65)N(4)O(50)S(1)	N-linked glycosylation	1515		0.5
+Hex(5)HexNAc(4)Me(2)Pent(1)@N	1782.655167	1783.6409	H(114)C(69)N(4)O(49)	1782.655167	H(114)C(69)N(4)O(49)	N-linked glycosylation	1516		0.5
+Hex(3)HexNAc(6)Sulf(1)@N	1784.591521	1785.6401	H(108)C(66)N(6)O(48)S(1)	1784.591521	H(108)C(66)N(6)O(48)S(1)	N-linked glycosylation	1517		0.5
+dHex(1)Hex(6)HexNAc(3)Sulf(1)@N	1807.569782	1808.6256	H(109)C(66)N(3)O(52)S(1)	1807.569782	H(109)C(66)N(3)O(52)S(1)	N-linked glycosylation	1518		0.5
+dHex(1)Hex(4)HexNAc(5)@N	1809.666066	1810.6662	H(115)C(70)N(5)O(49)	1809.666066	H(115)C(70)N(5)O(49)	N-linked glycosylation	1519		0.5
+dHex(1)Hex(5)HexA(1)HexNAc(3)Sulf(1)@N	1821.549047	1822.6091	H(107)C(66)N(3)O(53)S(1)	1821.549047	H(107)C(66)N(3)O(53)S(1)	N-linked glycosylation	1520		0.5
+Hex(7)HexNAc(3)Phos(1)@N	1823.574213	1824.5417	H(110)C(66)N(3)O(53)P(1)	1823.574213	H(110)C(66)N(3)O(53)P(1)	N-linked glycosylation	1521		0.5
+Hex(6)HexNAc(4)Me(3)@N	1826.681382	1827.6934	H(118)C(71)N(4)O(50)	1826.681382	H(118)C(71)N(4)O(50)	N-linked glycosylation	1522		0.5
+dHex(2)Hex(4)HexNAc(4)Sulf(1)@N	1832.601417	1833.6781	H(112)C(68)N(4)O(51)S(1)	1832.601417	H(112)C(68)N(4)O(51)S(1)	N-linked glycosylation	1523		0.5
+Hex(4)HexNAc(3)NeuAc(2)@N	1839.640245	1840.6491	H(113)C(70)N(5)O(51)	1839.640245	H(113)C(70)N(5)O(51)	N-linked glycosylation	1524		0.5
+dHex(1)Hex(3)HexNAc(4)Pent(3)@N	1840.660646	1841.6769	H(116)C(71)N(4)O(51)	1840.660646	H(116)C(71)N(4)O(51)	N-linked glycosylation	1525		0.5
+dHex(2)Hex(5)HexNAc(3)Pent(1)@N	1843.660312	1844.6776	H(117)C(71)N(3)O(52)	1843.660312	H(117)C(71)N(3)O(52)	N-linked glycosylation	1526		0.5
+dHex(1)Hex(5)HexNAc(4)Sulf(1)@N	1848.596331	1849.6775	H(112)C(68)N(4)O(52)S(1)	1848.596331	H(112)C(68)N(4)O(52)S(1)	N-linked glycosylation	1527		0.5
+dHex(2)Hex(3)HexNAc(4)Pent(2)@N	1854.676296	1855.7035	H(118)C(72)N(4)O(51)	1854.676296	H(118)C(72)N(4)O(51)	N-linked glycosylation	1528		0.5
+dHex(1)Hex(5)HexNAc(3)NeuAc(1)@N	1856.655561	1857.6763	H(116)C(71)N(4)O(52)	1856.655561	H(116)C(71)N(4)O(52)	N-linked glycosylation	1529		0.5
+Hex(3)HexNAc(6)Sulf(2)@N	1864.548335	1865.7033	H(108)C(66)N(6)O(51)S(2)	1864.548335	H(108)C(66)N(6)O(51)S(2)	N-linked glycosylation	1530		0.5
+Hex(9)HexNAc(2)@N	1864.634157	1865.6504	H(116)C(70)N(2)O(55)	1864.634157	H(116)C(70)N(2)O(55)	N-linked glycosylation	1531		0.5
+Hex(4)HexNAc(6)@N	1866.68753	1867.7175	H(118)C(72)N(6)O(50)	1866.68753	H(118)C(72)N(6)O(50)	N-linked glycosylation	1532		0.5
+dHex(3)Hex(3)HexNAc(4)Pent(1)@N	1868.691946	1869.7301	H(120)C(73)N(4)O(51)	1868.691946	H(120)C(73)N(4)O(51)	N-linked glycosylation	1533		0.5
+dHex(1)Hex(5)HexNAc(3)NeuGc(1)@N	1872.650475	1873.6757	H(116)C(71)N(4)O(53)	1872.650475	H(116)C(71)N(4)O(53)	N-linked glycosylation	1534		0.5
+dHex(2)Hex(4)HexNAc(4)Pent(1)@N	1884.686861	1885.7295	H(120)C(73)N(4)O(52)	1884.686861	H(120)C(73)N(4)O(52)	N-linked glycosylation	1535		0.5
+dHex(1)Hex(4)HexNAc(5)Sulf(1)@N	1889.62288	1890.7294	H(115)C(70)N(5)O(52)S(1)	1889.62288	H(115)C(70)N(5)O(52)S(1)	N-linked glycosylation	1536		0.5
+dHex(1)Hex(7)HexNAc(3)@N	1889.665791	1890.703	H(119)C(72)N(3)O(54)	1889.665791	H(119)C(72)N(3)O(54)	N-linked glycosylation	1537		0.5
+dHex(1)Hex(5)HexNAc(4)Pent(1)@N	1900.681776	1901.7289	H(120)C(73)N(4)O(53)	1900.681776	H(120)C(73)N(4)O(53)	N-linked glycosylation	1538		0.5
+dHex(1)Hex(5)HexA(1)HexNAc(3)Sulf(2)@N	1901.505861	1902.6723	H(107)C(66)N(3)O(56)S(2)	1901.505861	H(107)C(66)N(3)O(56)S(2)	N-linked glycosylation	1539		0.5
+Hex(3)HexNAc(7)@N	1907.714079	1908.7694	H(121)C(74)N(7)O(50)	1907.714079	H(121)C(74)N(7)O(50)	N-linked glycosylation	1540		0.5
+dHex(2)Hex(5)HexNAc(4)@N	1914.697426	1915.7555	H(122)C(74)N(4)O(53)	1914.697426	H(122)C(74)N(4)O(53)	N-linked glycosylation	1541		0.5
+dHex(2)Hex(4)HexNAc(3)NeuAc(1)Sulf(1)@N	1920.617461	1921.7401	H(116)C(71)N(4)O(54)S(1)	1920.617461	H(116)C(71)N(4)O(54)S(1)	N-linked glycosylation	1542		0.5
+dHex(1)Hex(5)HexNAc(4)Sulf(2)@N	1928.553146	1929.7407	H(112)C(68)N(4)O(55)S(2)	1928.553146	H(112)C(68)N(4)O(55)S(2)	N-linked glycosylation	1543		0.5
+dHex(1)Hex(5)HexNAc(4)Me(2)Pent(1)@N	1928.713076	1929.7821	H(124)C(75)N(4)O(53)	1928.713076	H(124)C(75)N(4)O(53)	N-linked glycosylation	1544		0.5
+Hex(5)HexNAc(4)NeuGc(1)@N	1929.671939	1930.7271	H(119)C(73)N(5)O(54)	1929.671939	H(119)C(73)N(5)O(54)	N-linked glycosylation	1545		0.5
+dHex(1)Hex(3)HexNAc(6)Sulf(1)@N	1930.64943	1931.7813	H(118)C(72)N(6)O(52)S(1)	1930.64943	H(118)C(72)N(6)O(52)S(1)	N-linked glycosylation	1546		0.5
+dHex(1)Hex(6)HexNAc(4)@N	1930.69234	1931.7549	H(122)C(74)N(4)O(54)	1930.69234	H(122)C(74)N(4)O(54)	N-linked glycosylation	1547		0.5
+dHex(1)Hex(5)HexNAc(3)NeuAc(1)Sulf(1)@N	1936.612375	1937.7395	H(116)C(71)N(4)O(55)S(1)	1936.612375	H(116)C(71)N(4)O(55)S(1)	N-linked glycosylation	1548		0.5
+Hex(7)HexNAc(4)@N	1946.687255	1947.7543	H(122)C(74)N(4)O(55)	1946.687255	H(122)C(74)N(4)O(55)	N-linked glycosylation	1549		0.5
+dHex(1)Hex(5)HexNAc(3)NeuGc(1)Sulf(1)@N	1952.60729	1953.7389	H(116)C(71)N(4)O(56)S(1)	1952.60729	H(116)C(71)N(4)O(56)S(1)	N-linked glycosylation	1550		0.5
+Hex(4)HexNAc(5)NeuAc(1)@N	1954.703574	1955.7796	H(122)C(75)N(6)O(53)	1954.703574	H(122)C(75)N(6)O(53)	N-linked glycosylation	1551		0.5
+Hex(6)HexNAc(4)Me(3)Pent(1)@N	1958.72364	1959.808	H(126)C(76)N(4)O(54)	1958.72364	H(126)C(76)N(4)O(54)	N-linked glycosylation	1552		0.5
+dHex(1)Hex(7)HexNAc(3)Sulf(1)@N	1969.622606	1970.7662	H(119)C(72)N(3)O(57)S(1)	1969.622606	H(119)C(72)N(3)O(57)S(1)	N-linked glycosylation	1553		0.5
+dHex(1)Hex(7)HexNAc(3)Phos(1)@N	1969.632122	1970.6829	H(120)C(72)N(3)O(57)P(1)	1969.632122	H(120)C(72)N(3)O(57)P(1)	N-linked glycosylation	1554		0.5
+dHex(1)Hex(5)HexNAc(5)@N	1971.718889	1972.8068	H(125)C(76)N(5)O(54)	1971.718889	H(125)C(76)N(5)O(54)	N-linked glycosylation	1555		0.5
+dHex(1)Hex(4)HexNAc(4)NeuAc(1)Sulf(1)@N	1977.638925	1978.7915	H(119)C(73)N(5)O(55)S(1)	1977.638925	H(119)C(73)N(5)O(55)S(1)	N-linked glycosylation	1556		0.5
+dHex(3)Hex(4)HexNAc(4)Sulf(1)@N	1978.659326	1979.8193	H(122)C(74)N(4)O(55)S(1)	1978.659326	H(122)C(74)N(4)O(55)S(1)	N-linked glycosylation	1557		0.5
+Hex(3)HexNAc(7)Sulf(1)@N	1987.670893	1988.8326	H(121)C(74)N(7)O(53)S(1)	1987.670893	H(121)C(74)N(7)O(53)S(1)	N-linked glycosylation	1558		0.5
+Hex(6)HexNAc(5)@N	1987.713804	1988.8062	H(125)C(76)N(5)O(55)	1987.713804	H(125)C(76)N(5)O(55)	N-linked glycosylation	1559		0.5
+Hex(5)HexNAc(4)NeuAc(1)Sulf(1)@N	1993.633839	1994.7909	H(119)C(73)N(5)O(56)S(1)	1993.633839	H(119)C(73)N(5)O(56)S(1)	N-linked glycosylation	1560		0.5
+Hex(3)HexNAc(6)NeuAc(1)@N	1995.730123	1996.8315	H(125)C(77)N(7)O(53)	1995.730123	H(125)C(77)N(7)O(53)	N-linked glycosylation	1561		0.5
+dHex(2)Hex(3)HexNAc(6)@N	1996.750524	1997.8593	H(128)C(78)N(6)O(53)	1996.750524	H(128)C(78)N(6)O(53)	N-linked glycosylation	1562		0.5
+Hex(1)HexNAc(1)NeuGc(1)@S	672.222527	672.5871	H(40)C(25)N(2)O(19)	672.222527	H(40)C(25)N(2)O(19)	O-linked glycosylation	1563		0.5
+Hex(1)HexNAc(1)NeuGc(1)@T	672.222527	672.5871	H(40)C(25)N(2)O(19)	672.222527	H(40)C(25)N(2)O(19)	O-linked glycosylation	1563		0.5
+dHex(1)Hex(2)HexNAc(1)@S	673.242928	673.6149	H(43)C(26)N(1)O(19)	673.242928	H(43)C(26)N(1)O(19)	O-linked glycosylation	1564		0.5
+dHex(1)Hex(2)HexNAc(1)@T	673.242928	673.6149	H(43)C(26)N(1)O(19)	673.242928	H(43)C(26)N(1)O(19)	O-linked glycosylation	1564		0.5
+HexNAc(3)Sulf(1)@T	689.194932	689.6408	H(39)C(24)N(3)O(18)S(1)	689.194932	H(39)C(24)N(3)O(18)S(1)	O-linked glycosylation	1565		0.5
+HexNAc(3)Sulf(1)@S	689.194932	689.6408	H(39)C(24)N(3)O(18)S(1)	689.194932	H(39)C(24)N(3)O(18)S(1)	O-linked glycosylation	1565		0.5
+Hex(3)HexNAc(1)@T	689.237843	689.6143	H(43)C(26)N(1)O(20)	689.237843	H(43)C(26)N(1)O(20)	O-linked glycosylation	1566		0.5
+Hex(3)HexNAc(1)@S	689.237843	689.6143	H(43)C(26)N(1)O(20)	689.237843	H(43)C(26)N(1)O(20)	O-linked glycosylation	1566		0.5
+Hex(3)HexNAc(1)@N	689.237843	689.6143	H(43)C(26)N(1)O(20)	689.237843	H(43)C(26)N(1)O(20)	N-linked glycosylation	1566		0.5
+Hex(1)HexNAc(1)Kdn(1)Sulf(1)@T	695.157878	695.599	H(37)C(23)N(1)O(21)S(1)	695.157878	H(37)C(23)N(1)O(21)S(1)	O-linked glycosylation	1567		0.5
+Hex(1)HexNAc(1)Kdn(1)Sulf(1)@S	695.157878	695.599	H(37)C(23)N(1)O(21)S(1)	695.157878	H(37)C(23)N(1)O(21)S(1)	O-linked glycosylation	1567		0.5
+HexNAc(2)NeuAc(1)@S	697.254162	697.6396	H(43)C(27)N(3)O(18)	697.254162	H(43)C(27)N(3)O(18)	O-linked glycosylation	1568		0.5
+HexNAc(2)NeuAc(1)@T	697.254162	697.6396	H(43)C(27)N(3)O(18)	697.254162	H(43)C(27)N(3)O(18)	O-linked glycosylation	1568		0.5
+HexNAc(1)Kdn(2)@T	703.217108	703.5978	H(41)C(26)N(1)O(21)	703.217108	H(41)C(26)N(1)O(21)	O-linked glycosylation	1570		0.5
+HexNAc(1)Kdn(2)@S	703.217108	703.5978	H(41)C(26)N(1)O(21)	703.217108	H(41)C(26)N(1)O(21)	O-linked glycosylation	1570		0.5
+Hex(3)HexNAc(1)Me(1)@S	703.253493	703.6409	H(45)C(27)N(1)O(20)	703.253493	H(45)C(27)N(1)O(20)	O-linked glycosylation	1571		0.5
+Hex(3)HexNAc(1)Me(1)@T	703.253493	703.6409	H(45)C(27)N(1)O(20)	703.253493	H(45)C(27)N(1)O(20)	O-linked glycosylation	1571		0.5
+Hex(2)HexA(1)Pent(1)Sulf(1)@T	712.136808	712.5831	H(36)C(23)O(23)S(1)	712.136808	H(36)C(23)O(23)S(1)	O-linked glycosylation	1572		0.5
+Hex(2)HexA(1)Pent(1)Sulf(1)@S	712.136808	712.5831	H(36)C(23)O(23)S(1)	712.136808	H(36)C(23)O(23)S(1)	O-linked glycosylation	1572		0.5
+HexNAc(2)NeuGc(1)@S	713.249076	713.639	H(43)C(27)N(3)O(19)	713.249076	H(43)C(27)N(3)O(19)	O-linked glycosylation	1573		0.5
+HexNAc(2)NeuGc(1)@T	713.249076	713.639	H(43)C(27)N(3)O(19)	713.249076	H(43)C(27)N(3)O(19)	O-linked glycosylation	1573		0.5
+Hex(4)Phos(1)@T	728.177625	728.5423	H(41)C(24)O(23)P(1)	728.177625	H(41)C(24)O(23)P(1)	O-linked glycosylation	1575		0.5
+Hex(4)Phos(1)@S	728.177625	728.5423	H(41)C(24)O(23)P(1)	728.177625	H(41)C(24)O(23)P(1)	O-linked glycosylation	1575		0.5
+Hex(1)HexNAc(1)NeuAc(1)Sulf(1)@T	736.184427	736.6509	H(40)C(25)N(2)O(21)S(1)	736.184427	H(40)C(25)N(2)O(21)S(1)	O-linked glycosylation	1577		0.5
+Hex(1)HexNAc(1)NeuAc(1)Sulf(1)@S	736.184427	736.6509	H(40)C(25)N(2)O(21)S(1)	736.184427	H(40)C(25)N(2)O(21)S(1)	O-linked glycosylation	1577		0.5
+Hex(1)HexA(1)HexNAc(2)@S	744.243657	744.6498	H(44)C(28)N(2)O(21)	744.243657	H(44)C(28)N(2)O(21)	O-linked glycosylation	1578		0.5
+Hex(1)HexA(1)HexNAc(2)@T	744.243657	744.6498	H(44)C(28)N(2)O(21)	744.243657	H(44)C(28)N(2)O(21)	O-linked glycosylation	1578		0.5
+dHex(1)Hex(2)HexNAc(1)Sulf(1)@T	753.199743	753.6781	H(43)C(26)N(1)O(22)S(1)	753.199743	H(43)C(26)N(1)O(22)S(1)	O-linked glycosylation	1579		0.5
+dHex(1)Hex(2)HexNAc(1)Sulf(1)@S	753.199743	753.6781	H(43)C(26)N(1)O(22)S(1)	753.199743	H(43)C(26)N(1)O(22)S(1)	O-linked glycosylation	1579		0.5
+dHex(1)HexNAc(3)@S	755.296027	755.7188	H(49)C(30)N(3)O(19)	755.296027	H(49)C(30)N(3)O(19)	O-linked glycosylation	1580		0.5
+dHex(1)HexNAc(3)@T	755.296027	755.7188	H(49)C(30)N(3)O(19)	755.296027	H(49)C(30)N(3)O(19)	O-linked glycosylation	1580		0.5
+dHex(1)Hex(1)HexNAc(1)Kdn(1)@T	761.258973	761.677	H(47)C(29)N(1)O(22)	761.258973	H(47)C(29)N(1)O(22)	O-linked glycosylation	1581		0.5
+dHex(1)Hex(1)HexNAc(1)Kdn(1)@S	761.258973	761.677	H(47)C(29)N(1)O(22)	761.258973	H(47)C(29)N(1)O(22)	O-linked glycosylation	1581		0.5
+Hex(1)HexNAc(3)@S	771.290941	771.7182	H(49)C(30)N(3)O(20)	771.290941	H(49)C(30)N(3)O(20)	O-linked glycosylation	1582		0.5
+Hex(1)HexNAc(3)@T	771.290941	771.7182	H(49)C(30)N(3)O(20)	771.290941	H(49)C(30)N(3)O(20)	O-linked glycosylation	1582		0.5
+HexNAc(2)NeuAc(1)Sulf(1)@T	777.210976	777.7028	H(43)C(27)N(3)O(21)S(1)	777.210976	H(43)C(27)N(3)O(21)S(1)	O-linked glycosylation	1583		0.5
+HexNAc(2)NeuAc(1)Sulf(1)@S	777.210976	777.7028	H(43)C(27)N(3)O(21)S(1)	777.210976	H(43)C(27)N(3)O(21)S(1)	O-linked glycosylation	1583		0.5
+dHex(2)Hex(3)@S	778.274288	778.7042	H(50)C(30)O(23)	778.274288	H(50)C(30)O(23)	O-linked glycosylation	1584		0.5
+dHex(2)Hex(3)@T	778.274288	778.7042	H(50)C(30)O(23)	778.274288	H(50)C(30)O(23)	O-linked glycosylation	1584		0.5
+Hex(2)HexA(1)HexNAc(1)Sulf(1)@T	783.173922	783.661	H(41)C(26)N(1)O(24)S(1)	783.173922	H(41)C(26)N(1)O(24)S(1)	O-linked glycosylation	1585		0.5
+Hex(2)HexA(1)HexNAc(1)Sulf(1)@S	783.173922	783.661	H(41)C(26)N(1)O(24)S(1)	783.173922	H(41)C(26)N(1)O(24)S(1)	O-linked glycosylation	1585		0.5
+dHex(2)Hex(2)HexA(1)@S	792.253553	792.6877	H(48)C(30)O(24)	792.253553	H(48)C(30)O(24)	O-linked glycosylation	1586		0.5
+dHex(2)Hex(2)HexA(1)@T	792.253553	792.6877	H(48)C(30)O(24)	792.253553	H(48)C(30)O(24)	O-linked glycosylation	1586		0.5
+dHex(1)Hex(1)HexNAc(2)Sulf(1)@T	794.226292	794.73	H(46)C(28)N(2)O(22)S(1)	794.226292	H(46)C(28)N(2)O(22)S(1)	O-linked glycosylation	1587		0.5
+dHex(1)Hex(1)HexNAc(2)Sulf(1)@S	794.226292	794.73	H(46)C(28)N(2)O(22)S(1)	794.226292	H(46)C(28)N(2)O(22)S(1)	O-linked glycosylation	1587		0.5
+dHex(1)Hex(1)HexNAc(1)NeuAc(1)@S	802.285522	802.7289	H(50)C(31)N(2)O(22)	802.285522	H(50)C(31)N(2)O(22)	O-linked glycosylation	1588		0.5
+dHex(1)Hex(1)HexNAc(1)NeuAc(1)@T	802.285522	802.7289	H(50)C(31)N(2)O(22)	802.285522	H(50)C(31)N(2)O(22)	O-linked glycosylation	1588		0.5
+Hex(2)HexNAc(2)Sulf(1)@T	810.221207	810.7294	H(46)C(28)N(2)O(23)S(1)	810.221207	H(46)C(28)N(2)O(23)S(1)	O-linked glycosylation	1589		0.5
+Hex(2)HexNAc(2)Sulf(1)@S	810.221207	810.7294	H(46)C(28)N(2)O(23)S(1)	810.221207	H(46)C(28)N(2)O(23)S(1)	O-linked glycosylation	1589		0.5
+Hex(5)@S	810.264117	810.703	H(50)C(30)O(25)	810.264117	H(50)C(30)O(25)	O-linked glycosylation	1590		0.5
+Hex(5)@T	810.264117	810.703	H(50)C(30)O(25)	810.264117	H(50)C(30)O(25)	O-linked glycosylation	1590		0.5
+HexNAc(4)@S	812.31749	812.7701	H(52)C(32)N(4)O(20)	812.31749	H(52)C(32)N(4)O(20)	O-linked glycosylation	1591		0.5
+HexNAc(4)@T	812.31749	812.7701	H(52)C(32)N(4)O(20)	812.31749	H(52)C(32)N(4)O(20)	O-linked glycosylation	1591		0.5
+HexNAc(1)NeuGc(2)@S	817.260035	817.7005	H(47)C(30)N(3)O(23)	817.260035	H(47)C(30)N(3)O(23)	O-linked glycosylation	1592		0.5
+HexNAc(1)NeuGc(2)@T	817.260035	817.7005	H(47)C(30)N(3)O(23)	817.260035	H(47)C(30)N(3)O(23)	O-linked glycosylation	1592		0.5
+dHex(1)Hex(1)HexNAc(1)NeuGc(1)@T	818.280436	818.7283	H(50)C(31)N(2)O(23)	818.280436	H(50)C(31)N(2)O(23)	O-linked glycosylation	1593		0.5
+dHex(1)Hex(1)HexNAc(1)NeuGc(1)@S	818.280436	818.7283	H(50)C(31)N(2)O(23)	818.280436	H(50)C(31)N(2)O(23)	O-linked glycosylation	1593		0.5
+dHex(2)Hex(2)HexNAc(1)@S	819.300837	819.7561	H(53)C(32)N(1)O(23)	819.300837	H(53)C(32)N(1)O(23)	O-linked glycosylation	1594		0.5
+dHex(2)Hex(2)HexNAc(1)@T	819.300837	819.7561	H(53)C(32)N(1)O(23)	819.300837	H(53)C(32)N(1)O(23)	O-linked glycosylation	1594		0.5
+Hex(2)HexNAc(1)NeuGc(1)@S	834.275351	834.7277	H(50)C(31)N(2)O(24)	834.275351	H(50)C(31)N(2)O(24)	O-linked glycosylation	1595		0.5
+Hex(2)HexNAc(1)NeuGc(1)@T	834.275351	834.7277	H(50)C(31)N(2)O(24)	834.275351	H(50)C(31)N(2)O(24)	O-linked glycosylation	1595		0.5
+dHex(1)Hex(3)HexNAc(1)@S	835.295752	835.7555	H(53)C(32)N(1)O(24)	835.295752	H(53)C(32)N(1)O(24)	O-linked glycosylation	1596		0.5
+dHex(1)Hex(3)HexNAc(1)@T	835.295752	835.7555	H(53)C(32)N(1)O(24)	835.295752	H(53)C(32)N(1)O(24)	O-linked glycosylation	1596		0.5
+dHex(1)Hex(2)HexA(1)HexNAc(1)@S	849.275017	849.739	H(51)C(32)N(1)O(25)	849.275017	H(51)C(32)N(1)O(25)	O-linked glycosylation	1597		0.5
+dHex(1)Hex(2)HexA(1)HexNAc(1)@T	849.275017	849.739	H(51)C(32)N(1)O(25)	849.275017	H(51)C(32)N(1)O(25)	O-linked glycosylation	1597		0.5
+Hex(1)HexNAc(3)Sulf(1)@T	851.247756	851.7814	H(49)C(30)N(3)O(23)S(1)	851.247756	H(49)C(30)N(3)O(23)S(1)	O-linked glycosylation	1598		0.5
+Hex(1)HexNAc(3)Sulf(1)@S	851.247756	851.7814	H(49)C(30)N(3)O(23)S(1)	851.247756	H(49)C(30)N(3)O(23)S(1)	O-linked glycosylation	1598		0.5
+Hex(4)HexNAc(1)@T	851.290667	851.7549	H(53)C(32)N(1)O(25)	851.290667	H(53)C(32)N(1)O(25)	O-linked glycosylation	1599		0.5
+Hex(4)HexNAc(1)@S	851.290667	851.7549	H(53)C(32)N(1)O(25)	851.290667	H(53)C(32)N(1)O(25)	O-linked glycosylation	1599		0.5
+Hex(4)HexNAc(1)@N	851.290667	851.7549	H(53)C(32)N(1)O(25)	851.290667	H(53)C(32)N(1)O(25)	N-linked glycosylation	1599		0.5
+Hex(1)HexNAc(2)NeuAc(1)@S	859.306985	859.7802	H(53)C(33)N(3)O(23)	859.306985	H(53)C(33)N(3)O(23)	O-linked glycosylation	1600		0.5
+Hex(1)HexNAc(2)NeuAc(1)@T	859.306985	859.7802	H(53)C(33)N(3)O(23)	859.306985	H(53)C(33)N(3)O(23)	O-linked glycosylation	1600		0.5
+Hex(1)HexNAc(2)NeuGc(1)@S	875.3019	875.7796	H(53)C(33)N(3)O(24)	875.3019	H(53)C(33)N(3)O(24)	O-linked glycosylation	1602		0.5
+Hex(1)HexNAc(2)NeuGc(1)@T	875.3019	875.7796	H(53)C(33)N(3)O(24)	875.3019	H(53)C(33)N(3)O(24)	O-linked glycosylation	1602		0.5
+Hex(5)Phos(1)@T	890.230448	890.6829	H(51)C(30)O(28)P(1)	890.230448	H(51)C(30)O(28)P(1)	O-linked glycosylation	1604		0.5
+Hex(5)Phos(1)@S	890.230448	890.6829	H(51)C(30)O(28)P(1)	890.230448	H(51)C(30)O(28)P(1)	O-linked glycosylation	1604		0.5
+dHex(2)Hex(1)HexNAc(1)Kdn(1)@T	907.316881	907.8182	H(57)C(35)N(1)O(26)	907.316881	H(57)C(35)N(1)O(26)	O-linked glycosylation	1606		0.5
+dHex(2)Hex(1)HexNAc(1)Kdn(1)@S	907.316881	907.8182	H(57)C(35)N(1)O(26)	907.316881	H(57)C(35)N(1)O(26)	O-linked glycosylation	1606		0.5
+dHex(1)Hex(3)HexNAc(1)Sulf(1)@T	915.252567	915.8187	H(53)C(32)N(1)O(27)S(1)	915.252567	H(53)C(32)N(1)O(27)S(1)	O-linked glycosylation	1607		0.5
+dHex(1)Hex(3)HexNAc(1)Sulf(1)@S	915.252567	915.8187	H(53)C(32)N(1)O(27)S(1)	915.252567	H(53)C(32)N(1)O(27)S(1)	O-linked glycosylation	1607		0.5
+dHex(1)Hex(1)HexNAc(3)@S	917.34885	917.8594	H(59)C(36)N(3)O(24)	917.34885	H(59)C(36)N(3)O(24)	O-linked glycosylation	1608		0.5
+dHex(1)Hex(1)HexNAc(3)@T	917.34885	917.8594	H(59)C(36)N(3)O(24)	917.34885	H(59)C(36)N(3)O(24)	O-linked glycosylation	1608		0.5
+dHex(1)Hex(2)HexA(1)HexNAc(1)Sulf(1)@T	929.231831	929.8022	H(51)C(32)N(1)O(28)S(1)	929.231831	H(51)C(32)N(1)O(28)S(1)	O-linked glycosylation	1609		0.5
+dHex(1)Hex(2)HexA(1)HexNAc(1)Sulf(1)@S	929.231831	929.8022	H(51)C(32)N(1)O(28)S(1)	929.231831	H(51)C(32)N(1)O(28)S(1)	O-linked glycosylation	1609		0.5
+Hex(2)HexNAc(3)@S	933.343765	933.8588	H(59)C(36)N(3)O(25)	933.343765	H(59)C(36)N(3)O(25)	O-linked glycosylation	1610		0.5
+Hex(2)HexNAc(3)@N	933.343765	933.8588	H(59)C(36)N(3)O(25)	933.343765	H(59)C(36)N(3)O(25)	N-linked glycosylation	1610		0.5
+Hex(2)HexNAc(3)@T	933.343765	933.8588	H(59)C(36)N(3)O(25)	933.343765	H(59)C(36)N(3)O(25)	O-linked glycosylation	1610		0.5
+Hex(1)HexNAc(2)NeuAc(1)Sulf(1)@T	939.2638	939.8434	H(53)C(33)N(3)O(26)S(1)	939.2638	H(53)C(33)N(3)O(26)S(1)	O-linked glycosylation	1611		0.5
+Hex(1)HexNAc(2)NeuAc(1)Sulf(1)@S	939.2638	939.8434	H(53)C(33)N(3)O(26)S(1)	939.2638	H(53)C(33)N(3)O(26)S(1)	O-linked glycosylation	1611		0.5
+dHex(2)Hex(4)@S	940.327112	940.8448	H(60)C(36)O(28)	940.327112	H(60)C(36)O(28)	O-linked glycosylation	1612		0.5
+dHex(2)Hex(4)@T	940.327112	940.8448	H(60)C(36)O(28)	940.327112	H(60)C(36)O(28)	O-linked glycosylation	1612		0.5
+Hex(1)HexNAc(1)NeuAc(1)Ac(1)@T	698.238177	698.6244	H(42)C(27)N(2)O(19)	698.238177	H(42)C(27)N(2)O(19)	O-linked glycosylation	1786		0.5
+Hex(1)HexNAc(1)NeuAc(1)Ac(1)@S	698.238177	698.6244	H(42)C(27)N(2)O(19)	698.238177	H(42)C(27)N(2)O(19)	O-linked glycosylation	1786		0.5
+dHex(2)HexNAc(2)Kdn(1)@T	948.34343	948.8701	H(60)C(37)N(2)O(26)	948.34343	H(60)C(37)N(2)O(26)	O-linked glycosylation	1614		0.5
+dHex(2)HexNAc(2)Kdn(1)@S	948.34343	948.8701	H(60)C(37)N(2)O(26)	948.34343	H(60)C(37)N(2)O(26)	O-linked glycosylation	1614		0.5
+dHex(1)Hex(2)HexNAc(2)Sulf(1)@T	956.279116	956.8706	H(56)C(34)N(2)O(27)S(1)	956.279116	H(56)C(34)N(2)O(27)S(1)	O-linked glycosylation	1615		0.5
+dHex(1)Hex(2)HexNAc(2)Sulf(1)@S	956.279116	956.8706	H(56)C(34)N(2)O(27)S(1)	956.279116	H(56)C(34)N(2)O(27)S(1)	O-linked glycosylation	1615		0.5
+dHex(1)HexNAc(4)@S	958.375399	958.9113	H(62)C(38)N(4)O(24)	958.375399	H(62)C(38)N(4)O(24)	O-linked glycosylation	1616		0.5
+dHex(1)HexNAc(4)@T	958.375399	958.9113	H(62)C(38)N(4)O(24)	958.375399	H(62)C(38)N(4)O(24)	O-linked glycosylation	1616		0.5
+Hex(1)HexNAc(1)NeuAc(1)NeuGc(1)@S	963.317944	963.8417	H(57)C(36)N(3)O(27)	963.317944	H(57)C(36)N(3)O(27)	O-linked glycosylation	1617		0.5
+Hex(1)HexNAc(1)NeuAc(1)NeuGc(1)@T	963.317944	963.8417	H(57)C(36)N(3)O(27)	963.317944	H(57)C(36)N(3)O(27)	O-linked glycosylation	1617		0.5
+dHex(1)Hex(1)HexNAc(2)Kdn(1)@T	964.338345	964.8695	H(60)C(37)N(2)O(27)	964.338345	H(60)C(37)N(2)O(27)	O-linked glycosylation	1618		0.5
+dHex(1)Hex(1)HexNAc(2)Kdn(1)@S	964.338345	964.8695	H(60)C(37)N(2)O(27)	964.338345	H(60)C(37)N(2)O(27)	O-linked glycosylation	1618		0.5
+Hex(1)HexNAc(1)NeuGc(2)@S	979.312859	979.8411	H(57)C(36)N(3)O(28)	979.312859	H(57)C(36)N(3)O(28)	O-linked glycosylation	1619		0.5
+Hex(1)HexNAc(1)NeuGc(2)@T	979.312859	979.8411	H(57)C(36)N(3)O(28)	979.312859	H(57)C(36)N(3)O(28)	O-linked glycosylation	1619		0.5
+Hex(1)HexNAc(1)NeuAc(2)Ac(1)@T	989.333594	989.879	H(59)C(38)N(3)O(27)	989.333594	H(59)C(38)N(3)O(27)	O-linked glycosylation	1620		0.5
+Hex(1)HexNAc(1)NeuAc(2)Ac(1)@S	989.333594	989.879	H(59)C(38)N(3)O(27)	989.333594	H(59)C(38)N(3)O(27)	O-linked glycosylation	1620		0.5
+dHex(2)Hex(2)HexA(1)HexNAc(1)@S	995.332925	995.8802	H(61)C(38)N(1)O(29)	995.332925	H(61)C(38)N(1)O(29)	O-linked glycosylation	1621		0.5
+dHex(2)Hex(2)HexA(1)HexNAc(1)@T	995.332925	995.8802	H(61)C(38)N(1)O(29)	995.332925	H(61)C(38)N(1)O(29)	O-linked glycosylation	1621		0.5
+dHex(1)Hex(1)HexNAc(3)Sulf(1)@T	997.305665	997.9226	H(59)C(36)N(3)O(27)S(1)	997.305665	H(59)C(36)N(3)O(27)S(1)	O-linked glycosylation	1622		0.5
+dHex(1)Hex(1)HexNAc(3)Sulf(1)@S	997.305665	997.9226	H(59)C(36)N(3)O(27)S(1)	997.305665	H(59)C(36)N(3)O(27)S(1)	O-linked glycosylation	1622		0.5
+Hex(2)HexA(1)NeuAc(1)Pent(1)Sulf(1)@T	1003.232225	1003.8377	H(53)C(34)N(1)O(31)S(1)	1003.232225	H(53)C(34)N(1)O(31)S(1)	O-linked glycosylation	1623		0.5
+Hex(2)HexA(1)NeuAc(1)Pent(1)Sulf(1)@S	1003.232225	1003.8377	H(53)C(34)N(1)O(31)S(1)	1003.232225	H(53)C(34)N(1)O(31)S(1)	O-linked glycosylation	1623		0.5
+dHex(1)Hex(1)HexNAc(2)NeuAc(1)@S	1005.364894	1005.9214	H(63)C(39)N(3)O(27)	1005.364894	H(63)C(39)N(3)O(27)	O-linked glycosylation	1624		0.5
+dHex(1)Hex(1)HexNAc(2)NeuAc(1)@T	1005.364894	1005.9214	H(63)C(39)N(3)O(27)	1005.364894	H(63)C(39)N(3)O(27)	O-linked glycosylation	1624		0.5
+dHex(1)Hex(3)HexA(1)HexNAc(1)@S	1011.32784	1011.8796	H(61)C(38)N(1)O(30)	1011.32784	H(61)C(38)N(1)O(30)	O-linked glycosylation	1625		0.5
+dHex(1)Hex(3)HexA(1)HexNAc(1)@T	1011.32784	1011.8796	H(61)C(38)N(1)O(30)	1011.32784	H(61)C(38)N(1)O(30)	O-linked glycosylation	1625		0.5
+Hex(2)HexNAc(3)Sulf(1)@T	1013.300579	1013.922	H(59)C(36)N(3)O(28)S(1)	1013.300579	H(59)C(36)N(3)O(28)S(1)	O-linked glycosylation	1626		0.5
+Hex(2)HexNAc(3)Sulf(1)@S	1013.300579	1013.922	H(59)C(36)N(3)O(28)S(1)	1013.300579	H(59)C(36)N(3)O(28)S(1)	O-linked glycosylation	1626		0.5
+Hex(5)HexNAc(1)@T	1013.34349	1013.8955	H(63)C(38)N(1)O(30)	1013.34349	H(63)C(38)N(1)O(30)	O-linked glycosylation	1627		0.5
+Hex(5)HexNAc(1)@S	1013.34349	1013.8955	H(63)C(38)N(1)O(30)	1013.34349	H(63)C(38)N(1)O(30)	O-linked glycosylation	1627		0.5
+Hex(5)HexNAc(1)@N	1013.34349	1013.8955	H(63)C(38)N(1)O(30)	1013.34349	H(63)C(38)N(1)O(30)	N-linked glycosylation	1627		0.5
+HexNAc(5)@S	1015.396863	1015.9626	H(65)C(40)N(5)O(25)	1015.396863	H(65)C(40)N(5)O(25)	O-linked glycosylation	1628		0.5
+HexNAc(5)@T	1015.396863	1015.9626	H(65)C(40)N(5)O(25)	1015.396863	H(65)C(40)N(5)O(25)	O-linked glycosylation	1628		0.5
+Hex(1)HexNAc(1)NeuAc(2)Ac(2)@T	1031.344159	1031.9156	H(61)C(40)N(3)O(28)	1031.344159	H(61)C(40)N(3)O(28)	O-linked glycosylation	1630		0.5
+Hex(1)HexNAc(1)NeuAc(2)Ac(2)@S	1031.344159	1031.9156	H(61)C(40)N(3)O(28)	1031.344159	H(61)C(40)N(3)O(28)	O-linked glycosylation	1630		0.5
+Hex(2)HexNAc(2)NeuGc(1)@S	1037.354723	1037.9202	H(63)C(39)N(3)O(29)	1037.354723	H(63)C(39)N(3)O(29)	O-linked glycosylation	1631		0.5
+Hex(2)HexNAc(2)NeuGc(1)@T	1037.354723	1037.9202	H(63)C(39)N(3)O(29)	1037.354723	H(63)C(39)N(3)O(29)	O-linked glycosylation	1631		0.5
+Hex(5)Phos(3)@T	1050.16311	1050.6427	H(53)C(30)O(34)P(3)	1050.16311	H(53)C(30)O(34)P(3)	O-linked glycosylation	1632		0.5
+Hex(5)Phos(3)@S	1050.16311	1050.6427	H(53)C(30)O(34)P(3)	1050.16311	H(53)C(30)O(34)P(3)	O-linked glycosylation	1632		0.5
+Hex(6)Phos(1)@T	1052.283272	1052.8235	H(61)C(36)O(33)P(1)	1052.283272	H(61)C(36)O(33)P(1)	O-linked glycosylation	1633		0.5
+Hex(6)Phos(1)@S	1052.283272	1052.8235	H(61)C(36)O(33)P(1)	1052.283272	H(61)C(36)O(33)P(1)	O-linked glycosylation	1633		0.5
+dHex(1)Hex(2)HexA(1)HexNAc(2)@S	1052.354389	1052.9316	H(64)C(40)N(2)O(30)	1052.354389	H(64)C(40)N(2)O(30)	O-linked glycosylation	1634		0.5
+dHex(1)Hex(2)HexA(1)HexNAc(2)@T	1052.354389	1052.9316	H(64)C(40)N(2)O(30)	1052.354389	H(64)C(40)N(2)O(30)	O-linked glycosylation	1634		0.5
+dHex(2)Hex(3)HexNAc(1)Sulf(1)@T	1061.310475	1061.9599	H(63)C(38)N(1)O(31)S(1)	1061.310475	H(63)C(38)N(1)O(31)S(1)	O-linked glycosylation	1635		0.5
+dHex(2)Hex(3)HexNAc(1)Sulf(1)@S	1061.310475	1061.9599	H(63)C(38)N(1)O(31)S(1)	1061.310475	H(63)C(38)N(1)O(31)S(1)	O-linked glycosylation	1635		0.5
+Hex(1)HexNAc(3)NeuAc(1)@S	1062.386358	1062.9727	H(66)C(41)N(4)O(28)	1062.386358	H(66)C(41)N(4)O(28)	O-linked glycosylation	1636		0.5
+Hex(1)HexNAc(3)NeuAc(1)@T	1062.386358	1062.9727	H(66)C(41)N(4)O(28)	1062.386358	H(66)C(41)N(4)O(28)	O-linked glycosylation	1636		0.5
+dHex(2)Hex(1)HexNAc(3)@S	1063.406759	1064.0006	H(69)C(42)N(3)O(28)	1063.406759	H(69)C(42)N(3)O(28)	O-linked glycosylation	1637		0.5
+dHex(2)Hex(1)HexNAc(3)@T	1063.406759	1064.0006	H(69)C(42)N(3)O(28)	1063.406759	H(69)C(42)N(3)O(28)	O-linked glycosylation	1637		0.5
+Hex(1)HexNAc(3)NeuGc(1)@S	1078.381273	1078.9721	H(66)C(41)N(4)O(29)	1078.381273	H(66)C(41)N(4)O(29)	O-linked glycosylation	1638		0.5
+Hex(1)HexNAc(3)NeuGc(1)@T	1078.381273	1078.9721	H(66)C(41)N(4)O(29)	1078.381273	H(66)C(41)N(4)O(29)	O-linked glycosylation	1638		0.5
+dHex(1)Hex(1)HexNAc(2)NeuAc(1)Sulf(1)@T	1085.321709	1085.9846	H(63)C(39)N(3)O(30)S(1)	1085.321709	H(63)C(39)N(3)O(30)S(1)	O-linked glycosylation	1639		0.5
+dHex(1)Hex(1)HexNAc(2)NeuAc(1)Sulf(1)@S	1085.321709	1085.9846	H(63)C(39)N(3)O(30)S(1)	1085.321709	H(63)C(39)N(3)O(30)S(1)	O-linked glycosylation	1639		0.5
+dHex(1)Hex(3)HexA(1)HexNAc(1)Sulf(1)@T	1091.284655	1091.9428	H(61)C(38)N(1)O(33)S(1)	1091.284655	H(61)C(38)N(1)O(33)S(1)	O-linked glycosylation	1640		0.5
+dHex(1)Hex(3)HexA(1)HexNAc(1)Sulf(1)@S	1091.284655	1091.9428	H(61)C(38)N(1)O(33)S(1)	1091.284655	H(61)C(38)N(1)O(33)S(1)	O-linked glycosylation	1640		0.5
+dHex(1)Hex(1)HexA(1)HexNAc(3)@S	1093.380938	1093.9835	H(67)C(42)N(3)O(30)	1093.380938	H(67)C(42)N(3)O(30)	O-linked glycosylation	1641		0.5
+dHex(1)Hex(1)HexA(1)HexNAc(3)@T	1093.380938	1093.9835	H(67)C(42)N(3)O(30)	1093.380938	H(67)C(42)N(3)O(30)	O-linked glycosylation	1641		0.5
+Hex(2)HexNAc(2)NeuAc(1)Sulf(1)@T	1101.316623	1101.984	H(63)C(39)N(3)O(31)S(1)	1101.316623	H(63)C(39)N(3)O(31)S(1)	O-linked glycosylation	1642		0.5
+Hex(2)HexNAc(2)NeuAc(1)Sulf(1)@S	1101.316623	1101.984	H(63)C(39)N(3)O(31)S(1)	1101.316623	H(63)C(39)N(3)O(31)S(1)	O-linked glycosylation	1642		0.5
+dHex(2)Hex(2)HexNAc(2)Sulf(1)@T	1102.337025	1103.0118	H(66)C(40)N(2)O(31)S(1)	1102.337025	H(66)C(40)N(2)O(31)S(1)	O-linked glycosylation	1643		0.5
+dHex(2)Hex(2)HexNAc(2)Sulf(1)@S	1102.337025	1103.0118	H(66)C(40)N(2)O(31)S(1)	1102.337025	H(66)C(40)N(2)O(31)S(1)	O-linked glycosylation	1643		0.5
+dHex(2)Hex(1)HexNAc(2)Kdn(1)@T	1110.396254	1111.0107	H(70)C(43)N(2)O(31)	1110.396254	H(70)C(43)N(2)O(31)	O-linked glycosylation	1644		0.5
+dHex(2)Hex(1)HexNAc(2)Kdn(1)@S	1110.396254	1111.0107	H(70)C(43)N(2)O(31)	1110.396254	H(70)C(43)N(2)O(31)	O-linked glycosylation	1644		0.5
+dHex(1)Hex(1)HexNAc(4)@S	1120.428223	1121.0519	H(72)C(44)N(4)O(29)	1120.428223	H(72)C(44)N(4)O(29)	O-linked glycosylation	1645		0.5
+dHex(1)Hex(1)HexNAc(4)@T	1120.428223	1121.0519	H(72)C(44)N(4)O(29)	1120.428223	H(72)C(44)N(4)O(29)	O-linked glycosylation	1645		0.5
+Hex(2)HexNAc(4)@T	1136.423137	1137.0513	H(72)C(44)N(4)O(30)	1136.423137	H(72)C(44)N(4)O(30)	O-linked glycosylation	1646		0.5
+Hex(2)HexNAc(4)@S	1136.423137	1137.0513	H(72)C(44)N(4)O(30)	1136.423137	H(72)C(44)N(4)O(30)	O-linked glycosylation	1646		0.5
+Hex(2)HexNAc(4)@N	1136.423137	1137.0513	H(72)C(44)N(4)O(30)	1136.423137	H(72)C(44)N(4)O(30)	N-linked glycosylation	1646		0.5
+Hex(2)HexNAc(1)NeuGc(2)@S	1141.365682	1141.9817	H(67)C(42)N(3)O(33)	1141.365682	H(67)C(42)N(3)O(33)	O-linked glycosylation	1647		0.5
+Hex(2)HexNAc(1)NeuGc(2)@T	1141.365682	1141.9817	H(67)C(42)N(3)O(33)	1141.365682	H(67)C(42)N(3)O(33)	O-linked glycosylation	1647		0.5
+dHex(2)Hex(4)HexNAc(1)@S	1143.406484	1144.0373	H(73)C(44)N(1)O(33)	1143.406484	H(73)C(44)N(1)O(33)	O-linked glycosylation	1648		0.5
+dHex(2)Hex(4)HexNAc(1)@T	1143.406484	1144.0373	H(73)C(44)N(1)O(33)	1143.406484	H(73)C(44)N(1)O(33)	O-linked glycosylation	1648		0.5
+Hex(1)HexNAc(2)NeuAc(2)@S	1150.402402	1151.0348	H(70)C(44)N(4)O(31)	1150.402402	H(70)C(44)N(4)O(31)	O-linked glycosylation	1649		0.5
+Hex(1)HexNAc(2)NeuAc(2)@T	1150.402402	1151.0348	H(70)C(44)N(4)O(31)	1150.402402	H(70)C(44)N(4)O(31)	O-linked glycosylation	1649		0.5
+dHex(2)Hex(1)HexNAc(2)NeuAc(1)@S	1151.422803	1152.0626	H(73)C(45)N(3)O(31)	1151.422803	H(73)C(45)N(3)O(31)	O-linked glycosylation	1650		0.5
+dHex(2)Hex(1)HexNAc(2)NeuAc(1)@T	1151.422803	1152.0626	H(73)C(45)N(3)O(31)	1151.422803	H(73)C(45)N(3)O(31)	O-linked glycosylation	1650		0.5
+dHex(1)Hex(2)HexNAc(3)Sulf(1)@T	1159.358488	1160.0632	H(69)C(42)N(3)O(32)S(1)	1159.358488	H(69)C(42)N(3)O(32)S(1)	O-linked glycosylation	1651		0.5
+dHex(1)Hex(2)HexNAc(3)Sulf(1)@S	1159.358488	1160.0632	H(69)C(42)N(3)O(32)S(1)	1159.358488	H(69)C(42)N(3)O(32)S(1)	O-linked glycosylation	1651		0.5
+dHex(1)HexNAc(5)@S	1161.454772	1162.1038	H(75)C(46)N(5)O(29)	1161.454772	H(75)C(46)N(5)O(29)	O-linked glycosylation	1652		0.5
+dHex(1)HexNAc(5)@T	1161.454772	1162.1038	H(75)C(46)N(5)O(29)	1161.454772	H(75)C(46)N(5)O(29)	O-linked glycosylation	1652		0.5
+dHex(2)Hex(1)HexNAc(2)NeuGc(1)@T	1167.417718	1168.062	H(73)C(45)N(3)O(32)	1167.417718	H(73)C(45)N(3)O(32)	O-linked glycosylation	1653		0.5
+dHex(2)Hex(1)HexNAc(2)NeuGc(1)@S	1167.417718	1168.062	H(73)C(45)N(3)O(32)	1167.417718	H(73)C(45)N(3)O(32)	O-linked glycosylation	1653		0.5
+dHex(3)Hex(2)HexNAc(2)@S	1168.438119	1169.0898	H(76)C(46)N(2)O(32)	1168.438119	H(76)C(46)N(2)O(32)	O-linked glycosylation	1654		0.5
+dHex(3)Hex(2)HexNAc(2)@T	1168.438119	1169.0898	H(76)C(46)N(2)O(32)	1168.438119	H(76)C(46)N(2)O(32)	O-linked glycosylation	1654		0.5
+Hex(3)HexNAc(3)Sulf(1)@T	1175.353403	1176.0626	H(69)C(42)N(3)O(33)S(1)	1175.353403	H(69)C(42)N(3)O(33)S(1)	O-linked glycosylation	1655		0.5
+Hex(3)HexNAc(3)Sulf(1)@S	1175.353403	1176.0626	H(69)C(42)N(3)O(33)S(1)	1175.353403	H(69)C(42)N(3)O(33)S(1)	O-linked glycosylation	1655		0.5
+Hex(3)HexNAc(3)Sulf(1)@N	1175.353403	1176.0626	H(69)C(42)N(3)O(33)S(1)	1175.353403	H(69)C(42)N(3)O(33)S(1)	N-linked glycosylation	1655		0.5
+dHex(2)Hex(2)HexNAc(2)Sulf(2)@T	1182.293839	1183.075	H(66)C(40)N(2)O(34)S(2)	1182.293839	H(66)C(40)N(2)O(34)S(2)	O-linked glycosylation	1656		0.5
+dHex(2)Hex(2)HexNAc(2)Sulf(2)@S	1182.293839	1183.075	H(66)C(40)N(2)O(34)S(2)	1182.293839	H(66)C(40)N(2)O(34)S(2)	O-linked glycosylation	1656		0.5
+dHex(1)Hex(2)HexNAc(2)NeuGc(1)@N	1183.412632	1184.0614	H(73)C(45)N(3)O(33)	1183.412632	H(73)C(45)N(3)O(33)	N-linked glycosylation	1657		0.5
+dHex(1)Hex(2)HexNAc(2)NeuGc(1)@T	1183.412632	1184.0614	H(73)C(45)N(3)O(33)	1183.412632	H(73)C(45)N(3)O(33)	O-linked glycosylation	1657		0.5
+dHex(1)Hex(2)HexNAc(2)NeuGc(1)@S	1183.412632	1184.0614	H(73)C(45)N(3)O(33)	1183.412632	H(73)C(45)N(3)O(33)	O-linked glycosylation	1657		0.5
+dHex(1)Hex(1)HexNAc(3)NeuAc(1)@T	1208.444267	1209.1139	H(76)C(47)N(4)O(32)	1208.444267	H(76)C(47)N(4)O(32)	O-linked glycosylation	1658		0.5
+dHex(1)Hex(1)HexNAc(3)NeuAc(1)@S	1208.444267	1209.1139	H(76)C(47)N(4)O(32)	1208.444267	H(76)C(47)N(4)O(32)	O-linked glycosylation	1658		0.5
+Hex(6)Phos(3)@T	1212.215934	1212.7833	H(63)C(36)O(39)P(3)	1212.215934	H(63)C(36)O(39)P(3)	O-linked glycosylation	1659		0.5
+Hex(6)Phos(3)@S	1212.215934	1212.7833	H(63)C(36)O(39)P(3)	1212.215934	H(63)C(36)O(39)P(3)	O-linked glycosylation	1659		0.5
+dHex(1)Hex(3)HexA(1)HexNAc(2)@S	1214.407213	1215.0722	H(74)C(46)N(2)O(35)	1214.407213	H(74)C(46)N(2)O(35)	O-linked glycosylation	1660		0.5
+dHex(1)Hex(3)HexA(1)HexNAc(2)@T	1214.407213	1215.0722	H(74)C(46)N(2)O(35)	1214.407213	H(74)C(46)N(2)O(35)	O-linked glycosylation	1660		0.5
+dHex(1)Hex(1)HexNAc(3)NeuGc(1)@T	1224.439181	1225.1133	H(76)C(47)N(4)O(33)	1224.439181	H(76)C(47)N(4)O(33)	O-linked glycosylation	1661		0.5
+dHex(1)Hex(1)HexNAc(3)NeuGc(1)@S	1224.439181	1225.1133	H(76)C(47)N(4)O(33)	1224.439181	H(76)C(47)N(4)O(33)	O-linked glycosylation	1661		0.5
+Hex(1)HexNAc(2)NeuAc(2)Sulf(1)@T	1230.359217	1231.098	H(70)C(44)N(4)O(34)S(1)	1230.359217	H(70)C(44)N(4)O(34)S(1)	O-linked glycosylation	1662		0.5
+Hex(1)HexNAc(2)NeuAc(2)Sulf(1)@S	1230.359217	1231.098	H(70)C(44)N(4)O(34)S(1)	1230.359217	H(70)C(44)N(4)O(34)S(1)	O-linked glycosylation	1662		0.5
+dHex(2)Hex(3)HexA(1)HexNAc(1)Sulf(1)@T	1237.342563	1238.084	H(71)C(44)N(1)O(37)S(1)	1237.342563	H(71)C(44)N(1)O(37)S(1)	O-linked glycosylation	1663		0.5
+dHex(2)Hex(3)HexA(1)HexNAc(1)Sulf(1)@S	1237.342563	1238.084	H(71)C(44)N(1)O(37)S(1)	1237.342563	H(71)C(44)N(1)O(37)S(1)	O-linked glycosylation	1663		0.5
+Hex(1)HexNAc(1)NeuAc(3)@S	1238.418446	1239.0969	H(74)C(47)N(4)O(34)	1238.418446	H(74)C(47)N(4)O(34)	O-linked glycosylation	1664		0.5
+Hex(1)HexNAc(1)NeuAc(3)@T	1238.418446	1239.0969	H(74)C(47)N(4)O(34)	1238.418446	H(74)C(47)N(4)O(34)	O-linked glycosylation	1664		0.5
+Hex(2)HexNAc(3)NeuGc(1)@S	1240.434096	1241.1127	H(76)C(47)N(4)O(34)	1240.434096	H(76)C(47)N(4)O(34)	O-linked glycosylation	1665		0.5
+Hex(2)HexNAc(3)NeuGc(1)@T	1240.434096	1241.1127	H(76)C(47)N(4)O(34)	1240.434096	H(76)C(47)N(4)O(34)	O-linked glycosylation	1665		0.5
+dHex(1)Hex(2)HexNAc(2)NeuAc(1)Sulf(1)@T	1247.374532	1248.1252	H(73)C(45)N(3)O(35)S(1)	1247.374532	H(73)C(45)N(3)O(35)S(1)	O-linked glycosylation	1666		0.5
+dHex(1)Hex(2)HexNAc(2)NeuAc(1)Sulf(1)@S	1247.374532	1248.1252	H(73)C(45)N(3)O(35)S(1)	1247.374532	H(73)C(45)N(3)O(35)S(1)	O-linked glycosylation	1666		0.5
+dHex(3)Hex(1)HexNAc(2)Kdn(1)@T	1256.454163	1257.1519	H(80)C(49)N(2)O(35)	1256.454163	H(80)C(49)N(2)O(35)	O-linked glycosylation	1667		0.5
+dHex(3)Hex(1)HexNAc(2)Kdn(1)@S	1256.454163	1257.1519	H(80)C(49)N(2)O(35)	1256.454163	H(80)C(49)N(2)O(35)	O-linked glycosylation	1667		0.5
+dHex(2)Hex(3)HexNAc(2)Sulf(1)@T	1264.389848	1265.1524	H(76)C(46)N(2)O(36)S(1)	1264.389848	H(76)C(46)N(2)O(36)S(1)	O-linked glycosylation	1668		0.5
+dHex(2)Hex(3)HexNAc(2)Sulf(1)@S	1264.389848	1265.1524	H(76)C(46)N(2)O(36)S(1)	1264.389848	H(76)C(46)N(2)O(36)S(1)	O-linked glycosylation	1668		0.5
+dHex(2)Hex(2)HexNAc(2)Kdn(1)@T	1272.449077	1273.1513	H(80)C(49)N(2)O(36)	1272.449077	H(80)C(49)N(2)O(36)	O-linked glycosylation	1669		0.5
+dHex(2)Hex(2)HexNAc(2)Kdn(1)@S	1272.449077	1273.1513	H(80)C(49)N(2)O(36)	1272.449077	H(80)C(49)N(2)O(36)	O-linked glycosylation	1669		0.5
+dHex(2)Hex(2)HexA(1)HexNAc(2)Sulf(1)@T	1278.369113	1279.136	H(74)C(46)N(2)O(37)S(1)	1278.369113	H(74)C(46)N(2)O(37)S(1)	O-linked glycosylation	1670		0.5
+dHex(2)Hex(2)HexA(1)HexNAc(2)Sulf(1)@S	1278.369113	1279.136	H(74)C(46)N(2)O(37)S(1)	1278.369113	H(74)C(46)N(2)O(37)S(1)	O-linked glycosylation	1670		0.5
+dHex(1)Hex(2)HexNAc(4)@T	1282.481046	1283.1925	H(82)C(50)N(4)O(34)	1282.481046	H(82)C(50)N(4)O(34)	O-linked glycosylation	1671		0.5
+dHex(1)Hex(2)HexNAc(4)@S	1282.481046	1283.1925	H(82)C(50)N(4)O(34)	1282.481046	H(82)C(50)N(4)O(34)	O-linked glycosylation	1671		0.5
+dHex(1)Hex(2)HexNAc(4)@N	1282.481046	1283.1925	H(82)C(50)N(4)O(34)	1282.481046	H(82)C(50)N(4)O(34)	N-linked glycosylation	1671		0.5
+Hex(1)HexNAc(1)NeuGc(3)@S	1286.40319	1287.0951	H(74)C(47)N(4)O(37)	1286.40319	H(74)C(47)N(4)O(37)	O-linked glycosylation	1672		0.5
+Hex(1)HexNAc(1)NeuGc(3)@T	1286.40319	1287.0951	H(74)C(47)N(4)O(37)	1286.40319	H(74)C(47)N(4)O(37)	O-linked glycosylation	1672		0.5
+dHex(1)Hex(1)HexNAc(3)NeuAc(1)Sulf(1)@T	1288.401081	1289.1771	H(76)C(47)N(4)O(35)S(1)	1288.401081	H(76)C(47)N(4)O(35)S(1)	O-linked glycosylation	1673		0.5
+dHex(1)Hex(1)HexNAc(3)NeuAc(1)Sulf(1)@S	1288.401081	1289.1771	H(76)C(47)N(4)O(35)S(1)	1288.401081	H(76)C(47)N(4)O(35)S(1)	O-linked glycosylation	1673		0.5
+dHex(1)Hex(3)HexA(1)HexNAc(2)Sulf(1)@T	1294.364027	1295.1354	H(74)C(46)N(2)O(38)S(1)	1294.364027	H(74)C(46)N(2)O(38)S(1)	O-linked glycosylation	1674		0.5
+dHex(1)Hex(3)HexA(1)HexNAc(2)Sulf(1)@S	1294.364027	1295.1354	H(74)C(46)N(2)O(38)S(1)	1294.364027	H(74)C(46)N(2)O(38)S(1)	O-linked glycosylation	1674		0.5
+dHex(1)Hex(1)HexNAc(2)NeuAc(2)@S	1296.460311	1297.176	H(80)C(50)N(4)O(35)	1296.460311	H(80)C(50)N(4)O(35)	O-linked glycosylation	1675		0.5
+dHex(1)Hex(1)HexNAc(2)NeuAc(2)@T	1296.460311	1297.176	H(80)C(50)N(4)O(35)	1296.460311	H(80)C(50)N(4)O(35)	O-linked glycosylation	1675		0.5
+dHex(3)HexNAc(3)Kdn(1)@T	1297.480712	1298.2038	H(83)C(51)N(3)O(35)	1297.480712	H(83)C(51)N(3)O(35)	O-linked glycosylation	1676		0.5
+dHex(3)HexNAc(3)Kdn(1)@S	1297.480712	1298.2038	H(83)C(51)N(3)O(35)	1297.480712	H(83)C(51)N(3)O(35)	O-linked glycosylation	1676		0.5
+Hex(2)HexNAc(3)NeuAc(1)Sulf(1)@T	1304.395996	1305.1765	H(76)C(47)N(4)O(36)S(1)	1304.395996	H(76)C(47)N(4)O(36)S(1)	O-linked glycosylation	1678		0.5
+Hex(2)HexNAc(3)NeuAc(1)Sulf(1)@S	1304.395996	1305.1765	H(76)C(47)N(4)O(36)S(1)	1304.395996	H(76)C(47)N(4)O(36)S(1)	O-linked glycosylation	1678		0.5
+dHex(2)Hex(2)HexNAc(3)Sulf(1)@T	1305.416397	1306.2044	H(79)C(48)N(3)O(36)S(1)	1305.416397	H(79)C(48)N(3)O(36)S(1)	O-linked glycosylation	1679		0.5
+dHex(2)Hex(2)HexNAc(3)Sulf(1)@S	1305.416397	1306.2044	H(79)C(48)N(3)O(36)S(1)	1305.416397	H(79)C(48)N(3)O(36)S(1)	O-linked glycosylation	1679		0.5
+dHex(2)HexNAc(5)@S	1307.512681	1308.245	H(85)C(52)N(5)O(33)	1307.512681	H(85)C(52)N(5)O(33)	O-linked glycosylation	1680		0.5
+dHex(2)HexNAc(5)@T	1307.512681	1308.245	H(85)C(52)N(5)O(33)	1307.512681	H(85)C(52)N(5)O(33)	O-linked glycosylation	1680		0.5
+Hex(2)HexNAc(2)NeuAc(2)@S	1312.455225	1313.1754	H(80)C(50)N(4)O(36)	1312.455225	H(80)C(50)N(4)O(36)	O-linked glycosylation	1681		0.5
+Hex(2)HexNAc(2)NeuAc(2)@T	1312.455225	1313.1754	H(80)C(50)N(4)O(36)	1312.455225	H(80)C(50)N(4)O(36)	O-linked glycosylation	1681		0.5
+dHex(2)Hex(2)HexNAc(2)NeuAc(1)@T	1313.475627	1314.2032	H(83)C(51)N(3)O(36)	1313.475627	H(83)C(51)N(3)O(36)	O-linked glycosylation	1682		0.5
+dHex(2)Hex(2)HexNAc(2)NeuAc(1)@S	1313.475627	1314.2032	H(83)C(51)N(3)O(36)	1313.475627	H(83)C(51)N(3)O(36)	O-linked glycosylation	1682		0.5
+dHex(1)Hex(3)HexNAc(3)Sulf(1)@T	1321.411312	1322.2038	H(79)C(48)N(3)O(37)S(1)	1321.411312	H(79)C(48)N(3)O(37)S(1)	O-linked glycosylation	1683		0.5
+dHex(1)Hex(3)HexNAc(3)Sulf(1)@S	1321.411312	1322.2038	H(79)C(48)N(3)O(37)S(1)	1321.411312	H(79)C(48)N(3)O(37)S(1)	O-linked glycosylation	1683		0.5
+dHex(2)Hex(2)HexNAc(2)NeuGc(1)@T	1329.470541	1330.2026	H(83)C(51)N(3)O(37)	1329.470541	H(83)C(51)N(3)O(37)	O-linked glycosylation	1684		0.5
+dHex(2)Hex(2)HexNAc(2)NeuGc(1)@S	1329.470541	1330.2026	H(83)C(51)N(3)O(37)	1329.470541	H(83)C(51)N(3)O(37)	O-linked glycosylation	1684		0.5
+Hex(2)HexNAc(5)@S	1339.50251	1340.2438	H(85)C(52)N(5)O(35)	1339.50251	H(85)C(52)N(5)O(35)	O-linked glycosylation	1685		0.5
+Hex(2)HexNAc(5)@T	1339.50251	1340.2438	H(85)C(52)N(5)O(35)	1339.50251	H(85)C(52)N(5)O(35)	O-linked glycosylation	1685		0.5
+dHex(1)Hex(3)HexNAc(2)NeuGc(1)@S	1345.465456	1346.202	H(83)C(51)N(3)O(38)	1345.465456	H(83)C(51)N(3)O(38)	O-linked glycosylation	1686		0.5
+dHex(1)Hex(3)HexNAc(2)NeuGc(1)@T	1345.465456	1346.202	H(83)C(51)N(3)O(38)	1345.465456	H(83)C(51)N(3)O(38)	O-linked glycosylation	1686		0.5
+Hex(1)HexNAc(3)NeuAc(2)@S	1353.481775	1354.2273	H(83)C(52)N(5)O(36)	1353.481775	H(83)C(52)N(5)O(36)	O-linked glycosylation	1687		0.5
+Hex(1)HexNAc(3)NeuAc(2)@T	1353.481775	1354.2273	H(83)C(52)N(5)O(36)	1353.481775	H(83)C(52)N(5)O(36)	O-linked glycosylation	1687		0.5
+dHex(1)Hex(2)HexNAc(3)NeuAc(1)@S	1370.49709	1371.2545	H(86)C(53)N(4)O(37)	1370.49709	H(86)C(53)N(4)O(37)	O-linked glycosylation	1688		0.5
+dHex(1)Hex(2)HexNAc(3)NeuAc(1)@T	1370.49709	1371.2545	H(86)C(53)N(4)O(37)	1370.49709	H(86)C(53)N(4)O(37)	O-linked glycosylation	1688		0.5
+dHex(3)Hex(2)HexNAc(3)@S	1371.517491	1372.2824	H(89)C(54)N(3)O(37)	1371.517491	H(89)C(54)N(3)O(37)	O-linked glycosylation	1689		0.5
+dHex(3)Hex(2)HexNAc(3)@T	1371.517491	1372.2824	H(89)C(54)N(3)O(37)	1371.517491	H(89)C(54)N(3)O(37)	O-linked glycosylation	1689		0.5
+Hex(7)Phos(3)@T	1374.268757	1374.9239	H(73)C(42)O(44)P(3)	1374.268757	H(73)C(42)O(44)P(3)	O-linked glycosylation	1690		0.5
+Hex(7)Phos(3)@S	1374.268757	1374.9239	H(73)C(42)O(44)P(3)	1374.268757	H(73)C(42)O(44)P(3)	O-linked glycosylation	1690		0.5
+dHex(1)Hex(4)HexA(1)HexNAc(2)@S	1376.460036	1377.2128	H(84)C(52)N(2)O(40)	1376.460036	H(84)C(52)N(2)O(40)	O-linked glycosylation	1691		0.5
+dHex(1)Hex(4)HexA(1)HexNAc(2)@T	1376.460036	1377.2128	H(84)C(52)N(2)O(40)	1376.460036	H(84)C(52)N(2)O(40)	O-linked glycosylation	1691		0.5
+Hex(3)HexNAc(3)NeuAc(1)@T	1386.492005	1387.2539	H(86)C(53)N(4)O(38)	1386.492005	H(86)C(53)N(4)O(38)	O-linked glycosylation	1692		0.5
+Hex(3)HexNAc(3)NeuAc(1)@S	1386.492005	1387.2539	H(86)C(53)N(4)O(38)	1386.492005	H(86)C(53)N(4)O(38)	O-linked glycosylation	1692		0.5
+dHex(1)Hex(3)HexA(2)HexNAc(2)@S	1390.439301	1391.1963	H(82)C(52)N(2)O(41)	1390.439301	H(82)C(52)N(2)O(41)	O-linked glycosylation	1693		0.5
+dHex(1)Hex(3)HexA(2)HexNAc(2)@T	1390.439301	1391.1963	H(82)C(52)N(2)O(41)	1390.439301	H(82)C(52)N(2)O(41)	O-linked glycosylation	1693		0.5
+Hex(2)HexNAc(2)NeuAc(2)Sulf(1)@T	1392.41204	1393.2386	H(80)C(50)N(4)O(39)S(1)	1392.41204	H(80)C(50)N(4)O(39)S(1)	O-linked glycosylation	1694		0.5
+Hex(2)HexNAc(2)NeuAc(2)Sulf(1)@S	1392.41204	1393.2386	H(80)C(50)N(4)O(39)S(1)	1392.41204	H(80)C(50)N(4)O(39)S(1)	O-linked glycosylation	1694		0.5
+dHex(2)Hex(2)HexNAc(2)NeuAc(1)Sulf(1)@T	1393.432441	1394.2664	H(83)C(51)N(3)O(39)S(1)	1393.432441	H(83)C(51)N(3)O(39)S(1)	O-linked glycosylation	1695		0.5
+dHex(2)Hex(2)HexNAc(2)NeuAc(1)Sulf(1)@S	1393.432441	1394.2664	H(83)C(51)N(3)O(39)S(1)	1393.432441	H(83)C(51)N(3)O(39)S(1)	O-linked glycosylation	1695		0.5
+Hex(3)HexNAc(3)NeuGc(1)@S	1402.48692	1403.2533	H(86)C(53)N(4)O(39)	1402.48692	H(86)C(53)N(4)O(39)	O-linked glycosylation	1696		0.5
+Hex(3)HexNAc(3)NeuGc(1)@T	1402.48692	1403.2533	H(86)C(53)N(4)O(39)	1402.48692	H(86)C(53)N(4)O(39)	O-linked glycosylation	1696		0.5
+dHex(4)Hex(1)HexNAc(2)Kdn(1)@T	1402.512072	1403.2931	H(90)C(55)N(2)O(39)	1402.512072	H(90)C(55)N(2)O(39)	O-linked glycosylation	1697		0.5
+dHex(4)Hex(1)HexNAc(2)Kdn(1)@S	1402.512072	1403.2931	H(90)C(55)N(2)O(39)	1402.512072	H(90)C(55)N(2)O(39)	O-linked glycosylation	1697		0.5
+dHex(3)Hex(2)HexNAc(2)Kdn(1)@T	1418.506986	1419.2925	H(90)C(55)N(2)O(40)	1418.506986	H(90)C(55)N(2)O(40)	O-linked glycosylation	1698		0.5
+dHex(3)Hex(2)HexNAc(2)Kdn(1)@S	1418.506986	1419.2925	H(90)C(55)N(2)O(40)	1418.506986	H(90)C(55)N(2)O(40)	O-linked glycosylation	1698		0.5
+dHex(3)Hex(2)HexA(1)HexNAc(2)Sulf(1)@T	1424.427021	1425.2772	H(84)C(52)N(2)O(41)S(1)	1424.427021	H(84)C(52)N(2)O(41)S(1)	O-linked glycosylation	1699		0.5
+dHex(3)Hex(2)HexA(1)HexNAc(2)Sulf(1)@S	1424.427021	1425.2772	H(84)C(52)N(2)O(41)S(1)	1424.427021	H(84)C(52)N(2)O(41)S(1)	O-linked glycosylation	1699		0.5
+Hex(2)HexNAc(4)NeuAc(1)@S	1427.518554	1428.3059	H(89)C(55)N(5)O(38)	1427.518554	H(89)C(55)N(5)O(38)	O-linked glycosylation	1700		0.5
+Hex(2)HexNAc(4)NeuAc(1)@T	1427.518554	1428.3059	H(89)C(55)N(5)O(38)	1427.518554	H(89)C(55)N(5)O(38)	O-linked glycosylation	1700		0.5
+dHex(2)Hex(2)HexNAc(4)@S	1428.538955	1429.3337	H(92)C(56)N(4)O(38)	1428.538955	H(92)C(56)N(4)O(38)	O-linked glycosylation	1701		0.5
+dHex(2)Hex(2)HexNAc(4)@T	1428.538955	1429.3337	H(92)C(56)N(4)O(38)	1428.538955	H(92)C(56)N(4)O(38)	O-linked glycosylation	1701		0.5
+dHex(2)Hex(3)HexA(1)HexNAc(2)Sulf(1)@T	1440.421936	1441.2766	H(84)C(52)N(2)O(42)S(1)	1440.421936	H(84)C(52)N(2)O(42)S(1)	O-linked glycosylation	1702		0.5
+dHex(2)Hex(3)HexA(1)HexNAc(2)Sulf(1)@S	1440.421936	1441.2766	H(84)C(52)N(2)O(42)S(1)	1440.421936	H(84)C(52)N(2)O(42)S(1)	O-linked glycosylation	1702		0.5
+dHex(4)HexNAc(3)Kdn(1)@T	1443.538621	1444.345	H(93)C(57)N(3)O(39)	1443.538621	H(93)C(57)N(3)O(39)	O-linked glycosylation	1703		0.5
+dHex(4)HexNAc(3)Kdn(1)@S	1443.538621	1444.345	H(93)C(57)N(3)O(39)	1443.538621	H(93)C(57)N(3)O(39)	O-linked glycosylation	1703		0.5
+Hex(2)HexNAc(1)NeuGc(3)@S	1448.456013	1449.2357	H(84)C(53)N(4)O(42)	1448.456013	H(84)C(53)N(4)O(42)	O-linked glycosylation	1705		0.5
+Hex(2)HexNAc(1)NeuGc(3)@T	1448.456013	1449.2357	H(84)C(53)N(4)O(42)	1448.456013	H(84)C(53)N(4)O(42)	O-linked glycosylation	1705		0.5
+dHex(4)Hex(1)HexNAc(1)Kdn(2)@T	1449.501567	1450.3032	H(91)C(56)N(1)O(42)	1449.501567	H(91)C(56)N(1)O(42)	O-linked glycosylation	1706		0.5
+dHex(4)Hex(1)HexNAc(1)Kdn(2)@S	1449.501567	1450.3032	H(91)C(56)N(1)O(42)	1449.501567	H(91)C(56)N(1)O(42)	O-linked glycosylation	1706		0.5
+dHex(1)Hex(2)HexNAc(3)NeuAc(1)Sulf(1)@T	1450.453905	1451.3177	H(86)C(53)N(4)O(40)S(1)	1450.453905	H(86)C(53)N(4)O(40)S(1)	O-linked glycosylation	1707		0.5
+dHex(1)Hex(2)HexNAc(3)NeuAc(1)Sulf(1)@S	1450.453905	1451.3177	H(86)C(53)N(4)O(40)S(1)	1450.453905	H(86)C(53)N(4)O(40)S(1)	O-linked glycosylation	1707		0.5
+dHex(1)Hex(2)HexNAc(2)NeuAc(2)@S	1458.513134	1459.3166	H(90)C(56)N(4)O(40)	1458.513134	H(90)C(56)N(4)O(40)	O-linked glycosylation	1708		0.5
+dHex(1)Hex(2)HexNAc(2)NeuAc(2)@T	1458.513134	1459.3166	H(90)C(56)N(4)O(40)	1458.513134	H(90)C(56)N(4)O(40)	O-linked glycosylation	1708		0.5
+dHex(3)Hex(1)HexNAc(3)Kdn(1)@T	1459.533535	1460.3444	H(93)C(57)N(3)O(40)	1459.533535	H(93)C(57)N(3)O(40)	O-linked glycosylation	1709		0.5
+dHex(3)Hex(1)HexNAc(3)Kdn(1)@S	1459.533535	1460.3444	H(93)C(57)N(3)O(40)	1459.533535	H(93)C(57)N(3)O(40)	O-linked glycosylation	1709		0.5
+Hex(3)HexNAc(3)NeuAc(1)Sulf(1)@T	1466.44882	1467.3171	H(86)C(53)N(4)O(41)S(1)	1466.44882	H(86)C(53)N(4)O(41)S(1)	O-linked glycosylation	1711		0.5
+Hex(3)HexNAc(3)NeuAc(1)Sulf(1)@S	1466.44882	1467.3171	H(86)C(53)N(4)O(41)S(1)	1466.44882	H(86)C(53)N(4)O(41)S(1)	O-linked glycosylation	1711		0.5
+Hex(3)HexNAc(2)NeuAc(2)@S	1474.508049	1475.316	H(90)C(56)N(4)O(41)	1474.508049	H(90)C(56)N(4)O(41)	O-linked glycosylation	1712		0.5
+Hex(3)HexNAc(2)NeuAc(2)@T	1474.508049	1475.316	H(90)C(56)N(4)O(41)	1474.508049	H(90)C(56)N(4)O(41)	O-linked glycosylation	1712		0.5
+Hex(3)HexNAc(3)NeuGc(1)Sulf(1)@T	1482.443734	1483.3165	H(86)C(53)N(4)O(42)S(1)	1482.443734	H(86)C(53)N(4)O(42)S(1)	O-linked glycosylation	1713		0.5
+Hex(3)HexNAc(3)NeuGc(1)Sulf(1)@S	1482.443734	1483.3165	H(86)C(53)N(4)O(42)S(1)	1482.443734	H(86)C(53)N(4)O(42)S(1)	O-linked glycosylation	1713		0.5
+dHex(1)Hex(2)HexNAc(2)NeuGc(2)@S	1490.502964	1491.3154	H(90)C(56)N(4)O(42)	1490.502964	H(90)C(56)N(4)O(42)	O-linked glycosylation	1714		0.5
+dHex(1)Hex(2)HexNAc(2)NeuGc(2)@T	1490.502964	1491.3154	H(90)C(56)N(4)O(42)	1490.502964	H(90)C(56)N(4)O(42)	O-linked glycosylation	1714		0.5
+dHex(2)Hex(3)HexNAc(2)NeuGc(1)@T	1491.523365	1492.3432	H(93)C(57)N(3)O(42)	1491.523365	H(93)C(57)N(3)O(42)	O-linked glycosylation	1715		0.5
+dHex(2)Hex(3)HexNAc(2)NeuGc(1)@S	1491.523365	1492.3432	H(93)C(57)N(3)O(42)	1491.523365	H(93)C(57)N(3)O(42)	O-linked glycosylation	1715		0.5
+dHex(1)Hex(3)HexA(1)HexNAc(3)Sulf(1)@T	1497.4434	1498.3279	H(87)C(54)N(3)O(43)S(1)	1497.4434	H(87)C(54)N(3)O(43)S(1)	O-linked glycosylation	1716		0.5
+dHex(1)Hex(3)HexA(1)HexNAc(3)Sulf(1)@S	1497.4434	1498.3279	H(87)C(54)N(3)O(43)S(1)	1497.4434	H(87)C(54)N(3)O(43)S(1)	O-linked glycosylation	1716		0.5
+Hex(2)HexNAc(3)NeuAc(2)@S	1515.534598	1516.3679	H(93)C(58)N(5)O(41)	1515.534598	H(93)C(58)N(5)O(41)	O-linked glycosylation	1717		0.5
+Hex(2)HexNAc(3)NeuAc(2)@T	1515.534598	1516.3679	H(93)C(58)N(5)O(41)	1515.534598	H(93)C(58)N(5)O(41)	O-linked glycosylation	1717		0.5
+dHex(2)Hex(2)HexNAc(3)NeuAc(1)@S	1516.554999	1517.3957	H(96)C(59)N(4)O(41)	1516.554999	H(96)C(59)N(4)O(41)	O-linked glycosylation	1718		0.5
+dHex(2)Hex(2)HexNAc(3)NeuAc(1)@T	1516.554999	1517.3957	H(96)C(59)N(4)O(41)	1516.554999	H(96)C(59)N(4)O(41)	O-linked glycosylation	1718		0.5
+dHex(4)Hex(2)HexNAc(3)@S	1517.5754	1518.4236	H(99)C(60)N(3)O(41)	1517.5754	H(99)C(60)N(3)O(41)	O-linked glycosylation	1719		0.5
+dHex(4)Hex(2)HexNAc(3)@T	1517.5754	1518.4236	H(99)C(60)N(3)O(41)	1517.5754	H(99)C(60)N(3)O(41)	O-linked glycosylation	1719		0.5
+Hex(2)HexNAc(3)NeuAc(1)NeuGc(1)@S	1531.529513	1532.3673	H(93)C(58)N(5)O(42)	1531.529513	H(93)C(58)N(5)O(42)	O-linked glycosylation	1720		0.5
+Hex(2)HexNAc(3)NeuAc(1)NeuGc(1)@T	1531.529513	1532.3673	H(93)C(58)N(5)O(42)	1531.529513	H(93)C(58)N(5)O(42)	O-linked glycosylation	1720		0.5
+dHex(2)Hex(2)HexNAc(3)NeuGc(1)@T	1532.549914	1533.3951	H(96)C(59)N(4)O(42)	1532.549914	H(96)C(59)N(4)O(42)	O-linked glycosylation	1721		0.5
+dHex(2)Hex(2)HexNAc(3)NeuGc(1)@S	1532.549914	1533.3951	H(96)C(59)N(4)O(42)	1532.549914	H(96)C(59)N(4)O(42)	O-linked glycosylation	1721		0.5
+dHex(3)Hex(3)HexNAc(3)@S	1533.570315	1534.423	H(99)C(60)N(3)O(42)	1533.570315	H(99)C(60)N(3)O(42)	O-linked glycosylation	1722		0.5
+dHex(3)Hex(3)HexNAc(3)@T	1533.570315	1534.423	H(99)C(60)N(3)O(42)	1533.570315	H(99)C(60)N(3)O(42)	O-linked glycosylation	1722		0.5
+Hex(8)Phos(3)@T	1536.321581	1537.0645	H(83)C(48)O(49)P(3)	1536.321581	H(83)C(48)O(49)P(3)	O-linked glycosylation	1723		0.5
+Hex(8)Phos(3)@S	1536.321581	1537.0645	H(83)C(48)O(49)P(3)	1536.321581	H(83)C(48)O(49)P(3)	O-linked glycosylation	1723		0.5
+dHex(1)Hex(2)HexNAc(2)NeuAc(2)Sulf(1)@T	1538.469949	1539.3798	H(90)C(56)N(4)O(43)S(1)	1538.469949	H(90)C(56)N(4)O(43)S(1)	O-linked glycosylation	1724		0.5
+dHex(1)Hex(2)HexNAc(2)NeuAc(2)Sulf(1)@S	1538.469949	1539.3798	H(90)C(56)N(4)O(43)S(1)	1538.469949	H(90)C(56)N(4)O(43)S(1)	O-linked glycosylation	1724		0.5
+Hex(2)HexNAc(3)NeuGc(2)@S	1547.524427	1548.3667	H(93)C(58)N(5)O(43)	1547.524427	H(93)C(58)N(5)O(43)	O-linked glycosylation	1725		0.5
+Hex(2)HexNAc(3)NeuGc(2)@T	1547.524427	1548.3667	H(93)C(58)N(5)O(43)	1547.524427	H(93)C(58)N(5)O(43)	O-linked glycosylation	1725		0.5
+dHex(4)Hex(2)HexNAc(2)Kdn(1)@T	1564.564895	1565.4337	H(100)C(61)N(2)O(44)	1564.564895	H(100)C(61)N(2)O(44)	O-linked glycosylation	1726		0.5
+dHex(4)Hex(2)HexNAc(2)Kdn(1)@S	1564.564895	1565.4337	H(100)C(61)N(2)O(44)	1564.564895	H(100)C(61)N(2)O(44)	O-linked glycosylation	1726		0.5
+dHex(1)Hex(2)HexNAc(4)NeuAc(1)@S	1573.576463	1574.4471	H(99)C(61)N(5)O(42)	1573.576463	H(99)C(61)N(5)O(42)	O-linked glycosylation	1727		0.5
+dHex(1)Hex(2)HexNAc(4)NeuAc(1)@T	1573.576463	1574.4471	H(99)C(61)N(5)O(42)	1573.576463	H(99)C(61)N(5)O(42)	O-linked glycosylation	1727		0.5
+dHex(3)Hex(2)HexNAc(4)@S	1574.596864	1575.4749	H(102)C(62)N(4)O(42)	1574.596864	H(102)C(62)N(4)O(42)	O-linked glycosylation	1728		0.5
+dHex(3)Hex(2)HexNAc(4)@T	1574.596864	1575.4749	H(102)C(62)N(4)O(42)	1574.596864	H(102)C(62)N(4)O(42)	O-linked glycosylation	1728		0.5
+Hex(1)HexNAc(1)NeuGc(4)@S	1593.493521	1594.349	H(91)C(58)N(5)O(46)	1593.493521	H(91)C(58)N(5)O(46)	O-linked glycosylation	1729		0.5
+Hex(1)HexNAc(1)NeuGc(4)@T	1593.493521	1594.349	H(91)C(58)N(5)O(46)	1593.493521	H(91)C(58)N(5)O(46)	O-linked glycosylation	1729		0.5
+dHex(4)Hex(1)HexNAc(3)Kdn(1)@T	1605.591444	1606.4856	H(103)C(63)N(3)O(44)	1605.591444	H(103)C(63)N(3)O(44)	O-linked glycosylation	1730		0.5
+dHex(4)Hex(1)HexNAc(3)Kdn(1)@S	1605.591444	1606.4856	H(103)C(63)N(3)O(44)	1605.591444	H(103)C(63)N(3)O(44)	O-linked glycosylation	1730		0.5
+Hex(4)HexNAc(4)Sulf(2)@T	1620.442414	1621.4589	H(92)C(56)N(4)O(46)S(2)	1620.442414	H(92)C(56)N(4)O(46)S(2)	O-linked glycosylation	1732		0.5
+Hex(4)HexNAc(4)Sulf(2)@S	1620.442414	1621.4589	H(92)C(56)N(4)O(46)S(2)	1620.442414	H(92)C(56)N(4)O(46)S(2)	O-linked glycosylation	1732		0.5
+dHex(3)Hex(2)HexNAc(3)Kdn(1)@T	1621.586359	1622.485	H(103)C(63)N(3)O(45)	1621.586359	H(103)C(63)N(3)O(45)	O-linked glycosylation	1733		0.5
+dHex(3)Hex(2)HexNAc(3)Kdn(1)@S	1621.586359	1622.485	H(103)C(63)N(3)O(45)	1621.586359	H(103)C(63)N(3)O(45)	O-linked glycosylation	1733		0.5
+dHex(2)Hex(2)HexNAc(5)@S	1631.618328	1632.5262	H(105)C(64)N(5)O(43)	1631.618328	H(105)C(64)N(5)O(43)	O-linked glycosylation	1735		0.5
+dHex(2)Hex(2)HexNAc(5)@T	1631.618328	1632.5262	H(105)C(64)N(5)O(43)	1631.618328	H(105)C(64)N(5)O(43)	O-linked glycosylation	1735		0.5
+dHex(2)Hex(3)HexA(1)HexNAc(3)Sulf(1)@T	1643.501309	1644.4691	H(97)C(60)N(3)O(47)S(1)	1643.501309	H(97)C(60)N(3)O(47)S(1)	O-linked glycosylation	1736		0.5
+dHex(2)Hex(3)HexA(1)HexNAc(3)Sulf(1)@S	1643.501309	1644.4691	H(97)C(60)N(3)O(47)S(1)	1643.501309	H(97)C(60)N(3)O(47)S(1)	O-linked glycosylation	1736		0.5
+dHex(1)Hex(4)HexA(1)HexNAc(3)Sulf(1)@T	1659.496223	1660.4685	H(97)C(60)N(3)O(48)S(1)	1659.496223	H(97)C(60)N(3)O(48)S(1)	O-linked glycosylation	1737		0.5
+dHex(1)Hex(4)HexA(1)HexNAc(3)Sulf(1)@S	1659.496223	1660.4685	H(97)C(60)N(3)O(48)S(1)	1659.496223	H(97)C(60)N(3)O(48)S(1)	O-linked glycosylation	1737		0.5
+Hex(3)HexNAc(3)NeuAc(2)@S	1677.587422	1678.5085	H(103)C(64)N(5)O(46)	1677.587422	H(103)C(64)N(5)O(46)	O-linked glycosylation	1738		0.5
+Hex(3)HexNAc(3)NeuAc(2)@T	1677.587422	1678.5085	H(103)C(64)N(5)O(46)	1677.587422	H(103)C(64)N(5)O(46)	O-linked glycosylation	1738		0.5
+dHex(2)Hex(3)HexNAc(3)NeuAc(1)@T	1678.607823	1679.5363	H(106)C(65)N(4)O(46)	1678.607823	H(106)C(65)N(4)O(46)	O-linked glycosylation	1739		0.5
+dHex(2)Hex(3)HexNAc(3)NeuAc(1)@S	1678.607823	1679.5363	H(106)C(65)N(4)O(46)	1678.607823	H(106)C(65)N(4)O(46)	O-linked glycosylation	1739		0.5
+dHex(4)Hex(3)HexNAc(3)@S	1679.628224	1680.5642	H(109)C(66)N(3)O(46)	1679.628224	H(109)C(66)N(3)O(46)	O-linked glycosylation	1740		0.5
+dHex(4)Hex(3)HexNAc(3)@T	1679.628224	1680.5642	H(109)C(66)N(3)O(46)	1679.628224	H(109)C(66)N(3)O(46)	O-linked glycosylation	1740		0.5
+Hex(9)Phos(3)@T	1698.374404	1699.2051	H(93)C(54)O(54)P(3)	1698.374404	H(93)C(54)O(54)P(3)	O-linked glycosylation	1742		0.5
+Hex(9)Phos(3)@S	1698.374404	1699.2051	H(93)C(54)O(54)P(3)	1698.374404	H(93)C(54)O(54)P(3)	O-linked glycosylation	1742		0.5
+dHex(2)HexNAc(7)@S	1713.671426	1714.63	H(111)C(68)N(7)O(43)	1713.671426	H(111)C(68)N(7)O(43)	O-linked glycosylation	1743		0.5
+dHex(2)HexNAc(7)@T	1713.671426	1714.63	H(111)C(68)N(7)O(43)	1713.671426	H(111)C(68)N(7)O(43)	O-linked glycosylation	1743		0.5
+Hex(2)HexNAc(1)NeuGc(4)@S	1755.546345	1756.4896	H(101)C(64)N(5)O(51)	1755.546345	H(101)C(64)N(5)O(51)	O-linked glycosylation	1744		0.5
+Hex(2)HexNAc(1)NeuGc(4)@T	1755.546345	1756.4896	H(101)C(64)N(5)O(51)	1755.546345	H(101)C(64)N(5)O(51)	O-linked glycosylation	1744		0.5
+Hex(3)HexNAc(3)NeuAc(2)Sulf(1)@T	1757.544236	1758.5717	H(103)C(64)N(5)O(49)S(1)	1757.544236	H(103)C(64)N(5)O(49)S(1)	O-linked glycosylation	1745		0.5
+Hex(3)HexNAc(3)NeuAc(2)Sulf(1)@S	1757.544236	1758.5717	H(103)C(64)N(5)O(49)S(1)	1757.544236	H(103)C(64)N(5)O(49)S(1)	O-linked glycosylation	1745		0.5
+dHex(2)Hex(3)HexNAc(5)@T	1793.671151	1794.6668	H(115)C(70)N(5)O(48)	1793.671151	H(115)C(70)N(5)O(48)	O-linked glycosylation	1746		0.5
+dHex(2)Hex(3)HexNAc(5)@S	1793.671151	1794.6668	H(115)C(70)N(5)O(48)	1793.671151	H(115)C(70)N(5)O(48)	O-linked glycosylation	1746		0.5
+dHex(2)Hex(3)HexNAc(5)@N	1793.671151	1794.6668	H(115)C(70)N(5)O(48)	1793.671151	H(115)C(70)N(5)O(48)	N-linked glycosylation	1746		0.5
+dHex(1)Hex(2)HexNAc(2)NeuGc(3)@S	1797.593295	1798.5694	H(107)C(67)N(5)O(51)	1797.593295	H(107)C(67)N(5)O(51)	O-linked glycosylation	1747		0.5
+dHex(1)Hex(2)HexNAc(2)NeuGc(3)@T	1797.593295	1798.5694	H(107)C(67)N(5)O(51)	1797.593295	H(107)C(67)N(5)O(51)	O-linked glycosylation	1747		0.5
+dHex(2)Hex(4)HexA(1)HexNAc(3)Sulf(1)@T	1805.554132	1806.6097	H(107)C(66)N(3)O(52)S(1)	1805.554132	H(107)C(66)N(3)O(52)S(1)	O-linked glycosylation	1748		0.5
+dHex(2)Hex(4)HexA(1)HexNAc(3)Sulf(1)@S	1805.554132	1806.6097	H(107)C(66)N(3)O(52)S(1)	1805.554132	H(107)C(66)N(3)O(52)S(1)	O-linked glycosylation	1748		0.5
+Hex(2)HexNAc(3)NeuAc(3)@S	1806.630015	1807.6225	H(110)C(69)N(6)O(49)	1806.630015	H(110)C(69)N(6)O(49)	O-linked glycosylation	1749		0.5
+Hex(2)HexNAc(3)NeuAc(3)@T	1806.630015	1807.6225	H(110)C(69)N(6)O(49)	1806.630015	H(110)C(69)N(6)O(49)	O-linked glycosylation	1749		0.5
+dHex(1)Hex(3)HexNAc(3)NeuAc(2)@S	1823.64533	1824.6497	H(113)C(70)N(5)O(50)	1823.64533	H(113)C(70)N(5)O(50)	O-linked glycosylation	1750		0.5
+dHex(1)Hex(3)HexNAc(3)NeuAc(2)@T	1823.64533	1824.6497	H(113)C(70)N(5)O(50)	1823.64533	H(113)C(70)N(5)O(50)	O-linked glycosylation	1750		0.5
+dHex(3)Hex(3)HexNAc(3)NeuAc(1)@S	1824.665732	1825.6775	H(116)C(71)N(4)O(50)	1824.665732	H(116)C(71)N(4)O(50)	O-linked glycosylation	1751		0.5
+dHex(3)Hex(3)HexNAc(3)NeuAc(1)@T	1824.665732	1825.6775	H(116)C(71)N(4)O(50)	1824.665732	H(116)C(71)N(4)O(50)	O-linked glycosylation	1751		0.5
+Hex(2)HexNAc(3)NeuGc(3)@S	1854.614759	1855.6207	H(110)C(69)N(6)O(52)	1854.614759	H(110)C(69)N(6)O(52)	O-linked glycosylation	1752		0.5
+Hex(2)HexNAc(3)NeuGc(3)@T	1854.614759	1855.6207	H(110)C(69)N(6)O(52)	1854.614759	H(110)C(69)N(6)O(52)	O-linked glycosylation	1752		0.5
+Hex(10)Phos(3)@T	1860.427228	1861.3457	H(103)C(60)O(59)P(3)	1860.427228	H(103)C(60)O(59)P(3)	O-linked glycosylation	1753		0.5
+Hex(10)Phos(3)@S	1860.427228	1861.3457	H(103)C(60)O(59)P(3)	1860.427228	H(103)C(60)O(59)P(3)	O-linked glycosylation	1753		0.5
+dHex(1)Hex(2)HexNAc(4)NeuAc(2)@S	1864.67188	1865.7016	H(116)C(72)N(6)O(50)	1864.67188	H(116)C(72)N(6)O(50)	O-linked glycosylation	1754		0.5
+dHex(1)Hex(2)HexNAc(4)NeuAc(2)@T	1864.67188	1865.7016	H(116)C(72)N(6)O(50)	1864.67188	H(116)C(72)N(6)O(50)	O-linked glycosylation	1754		0.5
+Hex(1)HexNAc(1)NeuGc(5)@S	1900.583852	1901.603	H(108)C(69)N(6)O(55)	1900.583852	H(108)C(69)N(6)O(55)	O-linked glycosylation	1755		0.5
+Hex(1)HexNAc(1)NeuGc(5)@T	1900.583852	1901.603	H(108)C(69)N(6)O(55)	1900.583852	H(108)C(69)N(6)O(55)	O-linked glycosylation	1755		0.5
+Hex(4)HexNAc(4)NeuAc(1)Sulf(2)@T	1911.53783	1912.7135	H(109)C(67)N(5)O(54)S(2)	1911.53783	H(109)C(67)N(5)O(54)S(2)	O-linked glycosylation	1756		0.5
+Hex(4)HexNAc(4)NeuAc(1)Sulf(2)@S	1911.53783	1912.7135	H(109)C(67)N(5)O(54)S(2)	1911.53783	H(109)C(67)N(5)O(54)S(2)	O-linked glycosylation	1756		0.5
+Hex(4)HexNAc(4)NeuGc(1)Sulf(2)@T	1927.532745	1928.7129	H(109)C(67)N(5)O(55)S(2)	1927.532745	H(109)C(67)N(5)O(55)S(2)	O-linked glycosylation	1757		0.5
+Hex(4)HexNAc(4)NeuGc(1)Sulf(2)@S	1927.532745	1928.7129	H(109)C(67)N(5)O(55)S(2)	1927.532745	H(109)C(67)N(5)O(55)S(2)	O-linked glycosylation	1757		0.5
+dHex(2)Hex(3)HexNAc(3)NeuAc(2)@S	1969.703239	1970.7909	H(123)C(76)N(5)O(54)	1969.703239	H(123)C(76)N(5)O(54)	O-linked glycosylation	1758		0.5
+dHex(2)Hex(3)HexNAc(3)NeuAc(2)@T	1969.703239	1970.7909	H(123)C(76)N(5)O(54)	1969.703239	H(123)C(76)N(5)O(54)	O-linked glycosylation	1758		0.5
+Hex(4)HexNAc(4)NeuAc(1)Sulf(3)@T	1991.494645	1992.7767	H(109)C(67)N(5)O(57)S(3)	1991.494645	H(109)C(67)N(5)O(57)S(3)	O-linked glycosylation	1759		0.5
+Hex(4)HexNAc(4)NeuAc(1)Sulf(3)@S	1991.494645	1992.7767	H(109)C(67)N(5)O(57)S(3)	1991.494645	H(109)C(67)N(5)O(57)S(3)	O-linked glycosylation	1759		0.5
+dHex(2)Hex(2)HexNAc(2)@S	1022.38021	1022.9486	H(66)C(40)N(2)O(28)	1022.38021	H(66)C(40)N(2)O(28)	O-linked glycosylation	1760		0.5
+dHex(2)Hex(2)HexNAc(2)@T	1022.38021	1022.9486	H(66)C(40)N(2)O(28)	1022.38021	H(66)C(40)N(2)O(28)	O-linked glycosylation	1760		0.5
+dHex(2)Hex(2)HexNAc(2)@N	1022.38021	1022.9486	H(66)C(40)N(2)O(28)	1022.38021	H(66)C(40)N(2)O(28)	N-linked glycosylation	1760		0.5
+dHex(1)Hex(3)HexNAc(2)@S	1038.375125	1038.948	H(66)C(40)N(2)O(29)	1038.375125	H(66)C(40)N(2)O(29)	O-linked glycosylation	1761		0.5
+dHex(1)Hex(3)HexNAc(2)@T	1038.375125	1038.948	H(66)C(40)N(2)O(29)	1038.375125	H(66)C(40)N(2)O(29)	O-linked glycosylation	1761		0.5
+dHex(1)Hex(3)HexNAc(2)@N	1038.375125	1038.948	H(66)C(40)N(2)O(29)	1038.375125	H(66)C(40)N(2)O(29)	N-linked glycosylation	1761		0.5
+dHex(1)Hex(2)HexNAc(3)@S	1079.401674	1080.0	H(69)C(42)N(3)O(29)	1079.401674	H(69)C(42)N(3)O(29)	O-linked glycosylation	1762		0.5
+dHex(1)Hex(2)HexNAc(3)@T	1079.401674	1080.0	H(69)C(42)N(3)O(29)	1079.401674	H(69)C(42)N(3)O(29)	O-linked glycosylation	1762		0.5
+dHex(1)Hex(2)HexNAc(3)@N	1079.401674	1080.0	H(69)C(42)N(3)O(29)	1079.401674	H(69)C(42)N(3)O(29)	N-linked glycosylation	1762		0.5
+Hex(3)HexNAc(3)@S	1095.396588	1095.9994	H(69)C(42)N(3)O(30)	1095.396588	H(69)C(42)N(3)O(30)	O-linked glycosylation	1763		0.5
+Hex(3)HexNAc(3)@T	1095.396588	1095.9994	H(69)C(42)N(3)O(30)	1095.396588	H(69)C(42)N(3)O(30)	O-linked glycosylation	1763		0.5
+Hex(3)HexNAc(3)@N	1095.396588	1095.9994	H(69)C(42)N(3)O(30)	1095.396588	H(69)C(42)N(3)O(30)	N-linked glycosylation	1763		0.5
+dHex(1)Hex(3)HexNAc(2)Sulf(1)@N	1118.331939	1119.0112	H(66)C(40)N(2)O(32)S(1)	1118.331939	H(66)C(40)N(2)O(32)S(1)	N-linked glycosylation	1764		0.5
+dHex(1)Hex(3)HexNAc(2)Sulf(1)@T	1118.331939	1119.0112	H(66)C(40)N(2)O(32)S(1)	1118.331939	H(66)C(40)N(2)O(32)S(1)	O-linked glycosylation	1764		0.5
+dHex(1)Hex(3)HexNAc(2)Sulf(1)@S	1118.331939	1119.0112	H(66)C(40)N(2)O(32)S(1)	1118.331939	H(66)C(40)N(2)O(32)S(1)	O-linked glycosylation	1764		0.5
+dHex(2)Hex(3)HexNAc(2)@S	1184.433033	1185.0892	H(76)C(46)N(2)O(33)	1184.433033	H(76)C(46)N(2)O(33)	O-linked glycosylation	1765		0.5
+dHex(2)Hex(3)HexNAc(2)@T	1184.433033	1185.0892	H(76)C(46)N(2)O(33)	1184.433033	H(76)C(46)N(2)O(33)	O-linked glycosylation	1765		0.5
+dHex(2)Hex(3)HexNAc(2)@N	1184.433033	1185.0892	H(76)C(46)N(2)O(33)	1184.433033	H(76)C(46)N(2)O(33)	N-linked glycosylation	1765		0.5
+dHex(1)Hex(4)HexNAc(2)@S	1200.427948	1201.0886	H(76)C(46)N(2)O(34)	1200.427948	H(76)C(46)N(2)O(34)	O-linked glycosylation	1766		0.5
+dHex(1)Hex(4)HexNAc(2)@T	1200.427948	1201.0886	H(76)C(46)N(2)O(34)	1200.427948	H(76)C(46)N(2)O(34)	O-linked glycosylation	1766		0.5
+dHex(1)Hex(4)HexNAc(2)@N	1200.427948	1201.0886	H(76)C(46)N(2)O(34)	1200.427948	H(76)C(46)N(2)O(34)	N-linked glycosylation	1766		0.5
+dHex(2)Hex(2)HexNAc(3)@S	1225.459583	1226.1412	H(79)C(48)N(3)O(33)	1225.459583	H(79)C(48)N(3)O(33)	O-linked glycosylation	1767		0.5
+dHex(2)Hex(2)HexNAc(3)@T	1225.459583	1226.1412	H(79)C(48)N(3)O(33)	1225.459583	H(79)C(48)N(3)O(33)	O-linked glycosylation	1767		0.5
+dHex(2)Hex(2)HexNAc(3)@N	1225.459583	1226.1412	H(79)C(48)N(3)O(33)	1225.459583	H(79)C(48)N(3)O(33)	N-linked glycosylation	1767		0.5
+dHex(1)Hex(3)HexNAc(3)@S	1241.454497	1242.1406	H(79)C(48)N(3)O(34)	1241.454497	H(79)C(48)N(3)O(34)	O-linked glycosylation	1768		0.5
+dHex(1)Hex(3)HexNAc(3)@T	1241.454497	1242.1406	H(79)C(48)N(3)O(34)	1241.454497	H(79)C(48)N(3)O(34)	O-linked glycosylation	1768		0.5
+dHex(1)Hex(3)HexNAc(3)@N	1241.454497	1242.1406	H(79)C(48)N(3)O(34)	1241.454497	H(79)C(48)N(3)O(34)	N-linked glycosylation	1768		0.5
+Hex(4)HexNAc(3)@S	1257.449412	1258.14	H(79)C(48)N(3)O(35)	1257.449412	H(79)C(48)N(3)O(35)	O-linked glycosylation	1769		0.5
+Hex(4)HexNAc(3)@T	1257.449412	1258.14	H(79)C(48)N(3)O(35)	1257.449412	H(79)C(48)N(3)O(35)	O-linked glycosylation	1769		0.5
+Hex(4)HexNAc(3)@N	1257.449412	1258.14	H(79)C(48)N(3)O(35)	1257.449412	H(79)C(48)N(3)O(35)	N-linked glycosylation	1769		0.5
+dHex(2)Hex(4)HexNAc(2)@S	1346.485857	1347.2298	H(86)C(52)N(2)O(38)	1346.485857	H(86)C(52)N(2)O(38)	O-linked glycosylation	1770		0.5
+dHex(2)Hex(4)HexNAc(2)@T	1346.485857	1347.2298	H(86)C(52)N(2)O(38)	1346.485857	H(86)C(52)N(2)O(38)	O-linked glycosylation	1770		0.5
+dHex(2)Hex(4)HexNAc(2)@N	1346.485857	1347.2298	H(86)C(52)N(2)O(38)	1346.485857	H(86)C(52)N(2)O(38)	N-linked glycosylation	1770		0.5
+dHex(2)Hex(3)HexNAc(3)@S	1387.512406	1388.2818	H(89)C(54)N(3)O(38)	1387.512406	H(89)C(54)N(3)O(38)	O-linked glycosylation	1771		0.5
+dHex(2)Hex(3)HexNAc(3)@T	1387.512406	1388.2818	H(89)C(54)N(3)O(38)	1387.512406	H(89)C(54)N(3)O(38)	O-linked glycosylation	1771		0.5
+dHex(2)Hex(3)HexNAc(3)@N	1387.512406	1388.2818	H(89)C(54)N(3)O(38)	1387.512406	H(89)C(54)N(3)O(38)	N-linked glycosylation	1771		0.5
+Hex(3)HexNAc(5)@N	1501.555334	1502.3844	H(95)C(58)N(5)O(40)	1501.555334	H(95)C(58)N(5)O(40)	N-linked glycosylation	1772		0.5
+Hex(3)HexNAc(5)@T	1501.555334	1502.3844	H(95)C(58)N(5)O(40)	1501.555334	H(95)C(58)N(5)O(40)	O-linked glycosylation	1772		0.5
+Hex(3)HexNAc(5)@S	1501.555334	1502.3844	H(95)C(58)N(5)O(40)	1501.555334	H(95)C(58)N(5)O(40)	O-linked glycosylation	1772		0.5
+Hex(4)HexNAc(3)NeuAc(1)@N	1548.544828	1549.3945	H(96)C(59)N(4)O(43)	1548.544828	H(96)C(59)N(4)O(43)	N-linked glycosylation	1773		0.5
+Hex(4)HexNAc(3)NeuAc(1)@T	1548.544828	1549.3945	H(96)C(59)N(4)O(43)	1548.544828	H(96)C(59)N(4)O(43)	O-linked glycosylation	1773		0.5
+Hex(4)HexNAc(3)NeuAc(1)@S	1548.544828	1549.3945	H(96)C(59)N(4)O(43)	1548.544828	H(96)C(59)N(4)O(43)	O-linked glycosylation	1773		0.5
+dHex(2)Hex(3)HexNAc(4)@S	1590.591779	1591.4743	H(102)C(62)N(4)O(43)	1590.591779	H(102)C(62)N(4)O(43)	O-linked glycosylation	1774		0.5
+dHex(2)Hex(3)HexNAc(4)@T	1590.591779	1591.4743	H(102)C(62)N(4)O(43)	1590.591779	H(102)C(62)N(4)O(43)	O-linked glycosylation	1774		0.5
+dHex(2)Hex(3)HexNAc(4)@N	1590.591779	1591.4743	H(102)C(62)N(4)O(43)	1590.591779	H(102)C(62)N(4)O(43)	N-linked glycosylation	1774		0.5
+dHex(1)Hex(3)HexNAc(5)@S	1647.613242	1648.5256	H(105)C(64)N(5)O(44)	1647.613242	H(105)C(64)N(5)O(44)	O-linked glycosylation	1775		0.5
+dHex(1)Hex(3)HexNAc(5)@T	1647.613242	1648.5256	H(105)C(64)N(5)O(44)	1647.613242	H(105)C(64)N(5)O(44)	O-linked glycosylation	1775		0.5
+dHex(1)Hex(3)HexNAc(5)@N	1647.613242	1648.5256	H(105)C(64)N(5)O(44)	1647.613242	H(105)C(64)N(5)O(44)	N-linked glycosylation	1775		0.5
+Hex(3)HexNAc(6)@N	1704.634706	1705.5769	H(108)C(66)N(6)O(45)	1704.634706	H(108)C(66)N(6)O(45)	N-linked glycosylation	1776		0.5
+Hex(3)HexNAc(6)@T	1704.634706	1705.5769	H(108)C(66)N(6)O(45)	1704.634706	H(108)C(66)N(6)O(45)	O-linked glycosylation	1776		0.5
+Hex(3)HexNAc(6)@S	1704.634706	1705.5769	H(108)C(66)N(6)O(45)	1704.634706	H(108)C(66)N(6)O(45)	O-linked glycosylation	1776		0.5
+Hex(4)HexNAc(4)NeuAc(1)@S	1751.624201	1752.5871	H(109)C(67)N(5)O(48)	1751.624201	H(109)C(67)N(5)O(48)	O-linked glycosylation	1777		0.5
+Hex(4)HexNAc(4)NeuAc(1)@T	1751.624201	1752.5871	H(109)C(67)N(5)O(48)	1751.624201	H(109)C(67)N(5)O(48)	O-linked glycosylation	1777		0.5
+Hex(4)HexNAc(4)NeuAc(1)@N	1751.624201	1752.5871	H(109)C(67)N(5)O(48)	1751.624201	H(109)C(67)N(5)O(48)	N-linked glycosylation	1777		0.5
+dHex(2)Hex(4)HexNAc(4)@N	1752.644602	1753.6149	H(112)C(68)N(4)O(48)	1752.644602	H(112)C(68)N(4)O(48)	N-linked glycosylation	1778		0.5
+dHex(2)Hex(4)HexNAc(4)@T	1752.644602	1753.6149	H(112)C(68)N(4)O(48)	1752.644602	H(112)C(68)N(4)O(48)	O-linked glycosylation	1778		0.5
+dHex(2)Hex(4)HexNAc(4)@S	1752.644602	1753.6149	H(112)C(68)N(4)O(48)	1752.644602	H(112)C(68)N(4)O(48)	O-linked glycosylation	1778		0.5
+Hex(6)HexNAc(4)@S	1784.634431	1785.6137	H(112)C(68)N(4)O(50)	1784.634431	H(112)C(68)N(4)O(50)	O-linked glycosylation	1779		0.5
+Hex(6)HexNAc(4)@T	1784.634431	1785.6137	H(112)C(68)N(4)O(50)	1784.634431	H(112)C(68)N(4)O(50)	O-linked glycosylation	1779		0.5
+Hex(6)HexNAc(4)@N	1784.634431	1785.6137	H(112)C(68)N(4)O(50)	1784.634431	H(112)C(68)N(4)O(50)	N-linked glycosylation	1779		0.5
+Hex(5)HexNAc(5)@S	1825.660981	1826.6656	H(115)C(70)N(5)O(50)	1825.660981	H(115)C(70)N(5)O(50)	O-linked glycosylation	1780		0.5
+Hex(5)HexNAc(5)@T	1825.660981	1826.6656	H(115)C(70)N(5)O(50)	1825.660981	H(115)C(70)N(5)O(50)	O-linked glycosylation	1780		0.5
+Hex(5)HexNAc(5)@N	1825.660981	1826.6656	H(115)C(70)N(5)O(50)	1825.660981	H(115)C(70)N(5)O(50)	N-linked glycosylation	1780		0.5
+dHex(1)Hex(3)HexNAc(6)@S	1850.692615	1851.7181	H(118)C(72)N(6)O(49)	1850.692615	H(118)C(72)N(6)O(49)	O-linked glycosylation	1781		0.5
+dHex(1)Hex(3)HexNAc(6)@T	1850.692615	1851.7181	H(118)C(72)N(6)O(49)	1850.692615	H(118)C(72)N(6)O(49)	O-linked glycosylation	1781		0.5
+dHex(1)Hex(3)HexNAc(6)@N	1850.692615	1851.7181	H(118)C(72)N(6)O(49)	1850.692615	H(118)C(72)N(6)O(49)	N-linked glycosylation	1781		0.5
+dHex(1)Hex(4)HexNAc(4)NeuAc(1)@N	1897.68211	1898.7283	H(119)C(73)N(5)O(52)	1897.68211	H(119)C(73)N(5)O(52)	N-linked glycosylation	1782		0.5
+dHex(1)Hex(4)HexNAc(4)NeuAc(1)@T	1897.68211	1898.7283	H(119)C(73)N(5)O(52)	1897.68211	H(119)C(73)N(5)O(52)	O-linked glycosylation	1782		0.5
+dHex(1)Hex(4)HexNAc(4)NeuAc(1)@S	1897.68211	1898.7283	H(119)C(73)N(5)O(52)	1897.68211	H(119)C(73)N(5)O(52)	O-linked glycosylation	1782		0.5
+dHex(3)Hex(4)HexNAc(4)@S	1898.702511	1899.7561	H(122)C(74)N(4)O(52)	1898.702511	H(122)C(74)N(4)O(52)	O-linked glycosylation	1783		0.5
+dHex(3)Hex(4)HexNAc(4)@T	1898.702511	1899.7561	H(122)C(74)N(4)O(52)	1898.702511	H(122)C(74)N(4)O(52)	O-linked glycosylation	1783		0.5
+dHex(3)Hex(4)HexNAc(4)@N	1898.702511	1899.7561	H(122)C(74)N(4)O(52)	1898.702511	H(122)C(74)N(4)O(52)	N-linked glycosylation	1783		0.5
+dHex(1)Hex(3)HexNAc(5)NeuAc(1)@S	1938.708659	1939.7802	H(122)C(75)N(6)O(52)	1938.708659	H(122)C(75)N(6)O(52)	O-linked glycosylation	1784		0.5
+dHex(1)Hex(3)HexNAc(5)NeuAc(1)@T	1938.708659	1939.7802	H(122)C(75)N(6)O(52)	1938.708659	H(122)C(75)N(6)O(52)	O-linked glycosylation	1784		0.5
+dHex(1)Hex(3)HexNAc(5)NeuAc(1)@N	1938.708659	1939.7802	H(122)C(75)N(6)O(52)	1938.708659	H(122)C(75)N(6)O(52)	N-linked glycosylation	1784		0.5
+dHex(2)Hex(4)HexNAc(5)@S	1955.723975	1956.8074	H(125)C(76)N(5)O(53)	1955.723975	H(125)C(76)N(5)O(53)	O-linked glycosylation	1785		0.5
+dHex(2)Hex(4)HexNAc(5)@T	1955.723975	1956.8074	H(125)C(76)N(5)O(53)	1955.723975	H(125)C(76)N(5)O(53)	O-linked glycosylation	1785		0.5
+dHex(2)Hex(4)HexNAc(5)@N	1955.723975	1956.8074	H(125)C(76)N(5)O(53)	1955.723975	H(125)C(76)N(5)O(53)	N-linked glycosylation	1785		0.5
+NQIGG@K	469.228496	469.4921	H(31)C(19)N(7)O(7)	0.0		Post-translational	1799		0.0
+Carboxyethylpyrrole@K	122.036779	122.1213	H(6)C(7)O(2)	0.0		Other	1800		0.0
+Fluorescein-tyramine@Y	493.116152	493.4637	H(19)C(29)N(1)O(7)	0.0		Chemical derivative	1801		0.0
+dHex(1)Hex(7)HexNAc(4)@N	2092.745164	2093.8955	H(132)C(80)N(4)O(59)	0.0		N-linked glycosylation	1840		0.0
+betaFNA@C	454.210387	454.5155	H(30)C(25)N(2)O(6)	0.0		Chemical derivative	1839		0.0
+betaFNA@K	454.210387	454.5155	H(30)C(25)N(2)O(6)	0.0		Chemical derivative	1839		0.0
+Brij58@Any_N-term	224.250401	224.4253	H(32)C(16)	0.0		Other	1838		0.0
+Brij35@Any_N-term	168.187801	168.319	H(24)C(12)	0.0		Other	1837		0.0
+Triton@Any_N-term	188.156501	188.3086	H(20)C(14)	0.0		Other	1836		0.0
+Triton@Any_C-term	188.156501	188.3086	H(20)C(14)	0.0		Other	1836		0.0
+Tween80@Any_C-term	263.237491	263.4381	H(31)C(18)O(1)	0.0		Other	1835		0.0
+Tween20@Any_N-term	165.164326	165.2951	H(21)C(12)	0.0		Other	1834		0.0
+Tris@N	104.071154	104.1277	H(10)C(4)N(1)O(2)	0.0		Artefact	1831		0.0
+Biotin-tyramide@Y	361.146012	361.4585	H(23)C(18)N(3)O(3)S(1)	0.0		Chemical derivative	1830		0.0
+Biotin-tyramide@W	361.146012	361.4585	H(23)C(18)N(3)O(3)S(1)	0.0		Chemical derivative	1830		0.0
+Biotin-tyramide@C	361.146012	361.4585	H(23)C(18)N(3)O(3)S(1)	0.0		Chemical derivative	1830		0.0
+LRGG+dimethyl@K	411.259403	411.4991	H(33)C(18)N(7)O(4)	0.0		Post-translational	1829		0.0
+RNPXL@R^Any_N-term	324.035867	324.1813	H(13)C(9)N(2)O(9)P(1)	324.035867	H(13)C(9)N(2)O(9)P(1)	Other	1825		0.5
+RNPXL@K^Any_N-term	324.035867	324.1813	H(13)C(9)N(2)O(9)P(1)	324.035867	H(13)C(9)N(2)O(9)P(1)	Other	1825		0.5
+GEE@Q	86.036779	86.0892	H(6)C(4)O(2)	0.0		Chemical derivative	1824		0.0
+Glu->pyro-Glu+Methyl@E^Any_N-term	-3.994915	-3.9887	C(1)O(-1)	0.0		Artefact	1826		0.0
+Glu->pyro-Glu+Methyl:2H(2)13C(1)@E^Any_N-term	-0.979006	-0.9837	H(-2)2H(2)13C(1)O(-1)	0.0		Artefact	1827		0.0
+LRGG+methyl@K	397.243753	397.4725	H(31)C(17)N(7)O(4)	0.0		Post-translational	1828		0.0
+NP40@Any_N-term	220.182715	220.3505	H(24)C(15)O(1)	0.0		Other	1833		0.0
+IASD@C	452.034807	452.4582	H(16)C(18)N(2)O(8)S(2)	0.0		Chemical derivative	1832		0.0
+Biotin:Thermo-21328@K	389.090154	389.5564	H(23)C(15)N(3)O(3)S(3)	0.0		Chemical derivative	1841		0.0
+Biotin:Thermo-21328@Any_N-term	389.090154	389.5564	H(23)C(15)N(3)O(3)S(3)	0.0		Chemical derivative	1841		0.0
+PhosphoCytidine@Y	305.041287	305.1812	H(12)C(9)N(3)O(7)P(1)	0.0		Post-translational	1843		0.0
+PhosphoCytidine@T	305.041287	305.1812	H(12)C(9)N(3)O(7)P(1)	0.0		Post-translational	1843		0.0
+PhosphoCytidine@S	305.041287	305.1812	H(12)C(9)N(3)O(7)P(1)	0.0		Post-translational	1843		0.0
+AzidoF@F	41.001397	41.0122	H(-1)N(3)	0.0		Chemical derivative	1845		0.0
+Dimethylaminoethyl@C	71.073499	71.121	H(9)C(4)N(1)	0.0		Chemical derivative	1846		0.0
+Gluratylation@K	114.031694	114.0993	H(6)C(5)O(3)	0.0		Post-translational	1848		0.0
+hydroxyisobutyryl@K	86.036779	86.0892	H(6)C(4)O(2)	0.0		Post-translational	1849	CC(C)(O)C(=O)NCCCCC(N([Xe])[Xe])C([Rn])=O	0.0
+MeMePhosphorothioate@S	107.979873	108.0993	H(5)C(2)O(1)P(1)S(1)	0.0		Chemical derivative	1868		0.0
+Cation:Fe[III]@D	52.911464	52.8212	H(-3)Fe(1)	0.0		Artefact	1870		0.0
+Cation:Fe[III]@E	52.911464	52.8212	H(-3)Fe(1)	0.0		Artefact	1870		0.0
+Cation:Fe[III]@Any_C-term	52.911464	52.8212	H(-3)Fe(1)	0.0		Artefact	1870		0.0
+DTT@C	151.996571	152.2351	H(8)C(4)O(2)S(2)	0.0		Artefact	1871		0.0
+DYn-2@C	161.09664	161.2203	H(13)C(11)O(1)	0.0		Other	1872		0.0
+Xlink:DSSO[176]@K	176.01433	176.1903	H(8)C(6)O(4)S(1)	0.0		Chemical derivative	1878		0.0
+Xlink:DSSO[176]@Protein_N-term	176.01433	176.1903	H(8)C(6)O(4)S(1)	0.0		Chemical derivative	1878		0.0
+MesitylOxide@K	98.073165	98.143	H(10)C(6)O(1)	0.0		Chemical derivative	1873		0.0
+MesitylOxide@H	98.073165	98.143	H(10)C(6)O(1)	0.0		Chemical derivative	1873		0.0
+MesitylOxide@Protein_N-term	98.073165	98.143	H(10)C(6)O(1)	0.0		Chemical derivative	1873		0.0
+Xlink:DSS[259]@K	259.141973	259.2988	H(21)C(12)N(1)O(5)	0.0		Chemical derivative	1877		0.0
+Xlink:DSS[259]@Protein_N-term	259.141973	259.2988	H(21)C(12)N(1)O(5)	0.0		Chemical derivative	1877		0.0
+methylol@Y	30.010565	30.026	H(2)C(1)O(1)	0.0		Chemical derivative	1875		0.0
+methylol@W	30.010565	30.026	H(2)C(1)O(1)	0.0		Chemical derivative	1875		0.0
+methylol@K	30.010565	30.026	H(2)C(1)O(1)	0.0		Chemical derivative	1875		0.0
+Xlink:DSSO[175]@K	175.030314	175.2056	H(9)C(6)N(1)O(3)S(1)	0.0		Chemical derivative	1879		0.0
+Xlink:DSSO[175]@Protein_N-term	175.030314	175.2056	H(9)C(6)N(1)O(3)S(1)	0.0		Chemical derivative	1879		0.0
+Xlink:DSSO[279]@K	279.077658	279.3101	H(17)C(10)N(1)O(6)S(1)	0.0		Chemical derivative	1880		0.0
+Xlink:DSSO[279]@Protein_N-term	279.077658	279.3101	H(17)C(10)N(1)O(6)S(1)	0.0		Chemical derivative	1880		0.0
+Xlink:DSSO[54]@Protein_N-term	54.010565	54.0474	H(2)C(3)O(1)	0.0		Chemical derivative	1881		0.0
+Xlink:DSSO[54]@K	54.010565	54.0474	H(2)C(3)O(1)	0.0		Chemical derivative	1881		0.0
+Xlink:DSSO[86]@K	85.982635	86.1124	H(2)C(3)O(1)S(1)	0.0		Chemical derivative	1882		0.0
+Xlink:DSSO[86]@Protein_N-term	85.982635	86.1124	H(2)C(3)O(1)S(1)	0.0		Chemical derivative	1882		0.0
+Xlink:DSSO[104]@K	103.9932	104.1277	H(4)C(3)O(2)S(1)	0.0		Chemical derivative	1883		0.0
+Xlink:DSSO[104]@Protein_N-term	103.9932	104.1277	H(4)C(3)O(2)S(1)	0.0		Chemical derivative	1883		0.0
+Xlink:BuUrBu[111]@S	111.032028	111.0987	H(5)C(5)N(1)O(2)	0.0		Chemical derivative	1885		0.0
+Xlink:BuUrBu[111]@Protein_N-term	111.032028	111.0987	H(5)C(5)N(1)O(2)	0.0		Chemical derivative	1885		0.0
+Xlink:BuUrBu[111]@K	111.032028	111.0987	H(5)C(5)N(1)O(2)	0.0		Chemical derivative	1885		0.0
+Xlink:BuUrBu[111]@T	111.032028	111.0987	H(5)C(5)N(1)O(2)	0.0		Chemical derivative	1885		0.0
+Xlink:BuUrBu[111]@Y	111.032028	111.0987	H(5)C(5)N(1)O(2)	0.0		Chemical derivative	1885		0.0
+Xlink:BuUrBu[85]@S	85.052764	85.1045	H(7)C(4)N(1)O(1)	0.0		Chemical derivative	1886		0.0
+Xlink:BuUrBu[85]@Protein_N-term	85.052764	85.1045	H(7)C(4)N(1)O(1)	0.0		Chemical derivative	1886		0.0
+Xlink:BuUrBu[85]@K	85.052764	85.1045	H(7)C(4)N(1)O(1)	0.0		Chemical derivative	1886		0.0
+Xlink:BuUrBu[85]@T	85.052764	85.1045	H(7)C(4)N(1)O(1)	0.0		Chemical derivative	1886		0.0
+Xlink:BuUrBu[85]@Y	85.052764	85.1045	H(7)C(4)N(1)O(1)	0.0		Chemical derivative	1886		0.0
+Xlink:BuUrBu[214]@S	214.095357	214.2185	H(14)C(9)N(2)O(4)	0.0		Chemical derivative	1888		0.0
+Xlink:BuUrBu[214]@Protein_N-term	214.095357	214.2185	H(14)C(9)N(2)O(4)	0.0		Chemical derivative	1888		0.0
+Xlink:BuUrBu[214]@K	214.095357	214.2185	H(14)C(9)N(2)O(4)	0.0		Chemical derivative	1888		0.0
+Xlink:BuUrBu[214]@T	214.095357	214.2185	H(14)C(9)N(2)O(4)	0.0		Chemical derivative	1888		0.0
+Xlink:BuUrBu[214]@Y	214.095357	214.2185	H(14)C(9)N(2)O(4)	0.0		Chemical derivative	1888		0.0
+Xlink:BuUrBu[317]@S	317.158686	317.3382	H(23)C(13)N(3)O(6)	0.0		Chemical derivative	1889		0.0
+Xlink:BuUrBu[317]@Protein_N-term	317.158686	317.3382	H(23)C(13)N(3)O(6)	0.0		Chemical derivative	1889		0.0
+Xlink:BuUrBu[317]@K	317.158686	317.3382	H(23)C(13)N(3)O(6)	0.0		Chemical derivative	1889		0.0
+Xlink:BuUrBu[317]@T	317.158686	317.3382	H(23)C(13)N(3)O(6)	0.0		Chemical derivative	1889		0.0
+Xlink:BuUrBu[317]@Y	317.158686	317.3382	H(23)C(13)N(3)O(6)	0.0		Chemical derivative	1889		0.0
+Xlink:DSSO[158]@K	158.003765	158.175	H(6)C(6)O(3)S(1)	0.0		Chemical derivative	1896		0.0
+Xlink:DSSO[158]@Protein_N-term	158.003765	158.175	H(6)C(6)O(3)S(1)	0.0		Chemical derivative	1896		0.0
+Xlink:DSS[138]@K	138.06808	138.1638	H(10)C(8)O(2)	0.0		Chemical derivative	1898		0.0
+Xlink:DSS[138]@Protein_N-term	138.06808	138.1638	H(10)C(8)O(2)	0.0		Chemical derivative	1898		0.0
+Xlink:BuUrBu[196]@S	196.084792	196.2032	H(12)C(9)N(2)O(3)	0.0		Chemical derivative	1899		0.0
+Xlink:BuUrBu[196]@Protein_N-term	196.084792	196.2032	H(12)C(9)N(2)O(3)	0.0		Chemical derivative	1899		0.0
+Xlink:BuUrBu[196]@K	196.084792	196.2032	H(12)C(9)N(2)O(3)	0.0		Chemical derivative	1899		0.0
+Xlink:BuUrBu[196]@T	196.084792	196.2032	H(12)C(9)N(2)O(3)	0.0		Chemical derivative	1899		0.0
+Xlink:BuUrBu[196]@Y	196.084792	196.2032	H(12)C(9)N(2)O(3)	0.0		Chemical derivative	1899		0.0
+Xlink:DTBP[172]@K	172.01289	172.2711	H(8)C(6)N(2)S(2)	0.0		Chemical derivative	1900		0.0
+Xlink:DTBP[172]@Protein_N-term	172.01289	172.2711	H(8)C(6)N(2)S(2)	0.0		Chemical derivative	1900		0.0
+Xlink:DST[114]@K	113.995309	114.0563	H(2)C(4)O(4)	0.0		Chemical derivative	1901		0.0
+Xlink:DST[114]@Protein_N-term	113.995309	114.0563	H(2)C(4)O(4)	0.0		Chemical derivative	1901		0.0
+Xlink:DTSSP[174]@K	173.980921	174.2406	H(6)C(6)O(2)S(2)	0.0		Chemical derivative	1902		0.0
+Xlink:DTSSP[174]@Protein_N-term	173.980921	174.2406	H(6)C(6)O(2)S(2)	0.0		Chemical derivative	1902		0.0
+Xlink:SMCC[219]@C	219.089543	219.2365	H(13)C(12)N(1)O(3)	0.0		Chemical derivative	1903		0.0
+Xlink:SMCC[219]@K	219.089543	219.2365	H(13)C(12)N(1)O(3)	0.0		Chemical derivative	1903		0.0
+Xlink:SMCC[219]@Protein_N-term	219.089543	219.2365	H(13)C(12)N(1)O(3)	0.0		Chemical derivative	1903		0.0
+Cation:Al[III]@D	23.958063	23.9577	H(-3)Al(1)	0.0		Artefact	1910		0.0
+Cation:Al[III]@E	23.958063	23.9577	H(-3)Al(1)	0.0		Artefact	1910		0.0
+Cation:Al[III]@Any_C-term	23.958063	23.9577	H(-3)Al(1)	0.0		Artefact	1910		0.0
+Xlink:BS2G[113]@Protein_N-term	113.047679	113.1146	H(7)C(5)N(1)O(2)	0.0		Chemical derivative	1906		0.0
+Xlink:BS2G[113]@K	113.047679	113.1146	H(7)C(5)N(1)O(2)	0.0		Chemical derivative	1906		0.0
+Xlink:BS2G[114]@Protein_N-term	114.031694	114.0993	H(6)C(5)O(3)	0.0		Chemical derivative	1907		0.0
+Xlink:BS2G[114]@K	114.031694	114.0993	H(6)C(5)O(3)	0.0		Chemical derivative	1907		0.0
+Xlink:BS2G[217]@Protein_N-term	217.095023	217.2191	H(15)C(9)N(1)O(5)	0.0		Chemical derivative	1908		0.0
+Xlink:BS2G[217]@K	217.095023	217.2191	H(15)C(9)N(1)O(5)	0.0		Chemical derivative	1908		0.0
+Xlink:DMP[139]@K	139.110947	139.1982	H(13)C(7)N(3)	0.0		Chemical derivative	1911		0.0
+Xlink:DMP[139]@Protein_N-term	139.110947	139.1982	H(13)C(7)N(3)	0.0		Chemical derivative	1911		0.0
+Xlink:DMP[122]@K	122.084398	122.1677	H(10)C(7)N(2)	0.0		Chemical derivative	1912		0.0
+Xlink:DMP[122]@Protein_N-term	122.084398	122.1677	H(10)C(7)N(2)	0.0		Chemical derivative	1912		0.0
+glyoxalAGE@R	21.98435	22.0055	H(-2)C(2)	0.0		Post-translational	1913		0.0
+Met->AspSA@M	-32.008456	-32.1081	H(-4)C(-1)O(1)S(-1)	0.0		Chemical derivative	1914		0.0
+Decarboxylation@D	-30.010565	-30.026	H(-2)C(-1)O(-1)	0.0		Chemical derivative	1915		0.0
+Decarboxylation@E	-30.010565	-30.026	H(-2)C(-1)O(-1)	0.0		Chemical derivative	1915		0.0
+Aspartylurea@H	-10.031969	-10.0412	H(-2)C(-1)N(-2)O(2)	0.0		Chemical derivative	1916		0.0
+Formylasparagine@H	4.97893	4.9735	H(-1)C(-1)N(-1)O(2)	0.0		Chemical derivative	1917		0.0
+Carbonyl@S	13.979265	13.9835	H(-2)O(1)	0.0		Chemical derivative	1918		0.0
+Carbonyl@R	13.979265	13.9835	H(-2)O(1)	0.0		Chemical derivative	1918		0.0
+Carbonyl@Q	13.979265	13.9835	H(-2)O(1)	0.0		Chemical derivative	1918		0.0
+Carbonyl@L	13.979265	13.9835	H(-2)O(1)	0.0		Chemical derivative	1918		0.0
+Carbonyl@I	13.979265	13.9835	H(-2)O(1)	0.0		Chemical derivative	1918		0.0
+Carbonyl@E	13.979265	13.9835	H(-2)O(1)	0.0		Chemical derivative	1918		0.0
+Carbonyl@A	13.979265	13.9835	H(-2)O(1)	0.0		Chemical derivative	1918		0.0
+Carbonyl@V	13.979265	13.9835	H(-2)O(1)	0.0		Chemical derivative	1918		0.0
+Pro->HAVA@P	18.010565	18.0153	H(2)O(1)	0.0		Chemical derivative	1922		0.0
+AFB1_Dialdehyde@K	310.047738	310.2577	H(10)C(17)O(6)	0.0		Post-translational	1920		0.0
+Delta:H(-4)O(2)@W	27.958529	27.967	H(-4)O(2)	0.0		Chemical derivative	1923		0.0
+Delta:H(-4)O(3)@W	43.953444	43.9664	H(-4)O(3)	0.0		Chemical derivative	1924		0.0
+Delta:O(4)@W	63.979659	63.9976	O(4)	0.0		Artefact	1925		0.0
+Delta:H(4)C(3)O(2)@K	72.021129	72.0627	H(4)C(3)O(2)	0.0		Artefact	1926		0.0
+Delta:H(4)C(5)O(1)@R	80.026215	80.0847	H(4)C(5)O(1)	0.0		Chemical derivative	1927		0.0
+Delta:H(10)C(8)O(1)@K	122.073165	122.1644	H(10)C(8)O(1)	0.0		Artefact	1928		0.0
+Delta:H(6)C(7)O(4)@R	154.026609	154.1201	H(6)C(7)O(4)	0.0		Chemical derivative	1929		0.0
+Hex(2)Sulf(1)@T	404.062462	404.3444	H(20)C(12)O(13)S(1)	404.062462	H(20)C(12)O(13)S(1)	O-linked glycosylation	1932		0.5
+Hex(2)Sulf(1)@S	404.062462	404.3444	H(20)C(12)O(13)S(1)	404.062462	H(20)C(12)O(13)S(1)	O-linked glycosylation	1932		0.5
+Pent(2)@T	264.084518	264.2292	H(16)C(10)O(8)	264.084518	H(16)C(10)O(8)	O-linked glycosylation	1930		0.5
+Pent(2)@S	264.084518	264.2292	H(16)C(10)O(8)	264.084518	H(16)C(10)O(8)	O-linked glycosylation	1930		0.5
+Pent(1)HexNAc(1)@T	335.121631	335.3071	H(21)C(13)N(1)O(9)	335.121631	H(21)C(13)N(1)O(9)	O-linked glycosylation	1931		0.5
+Pent(1)HexNAc(1)@S	335.121631	335.3071	H(21)C(13)N(1)O(9)	335.121631	H(21)C(13)N(1)O(9)	O-linked glycosylation	1931		0.5
+Hex(1)Pent(2)Me(1)@T	440.152991	440.3964	H(28)C(17)O(13)	440.152991	H(28)C(17)O(13)	O-linked glycosylation	1933		0.5
+Hex(1)Pent(2)Me(1)@S	440.152991	440.3964	H(28)C(17)O(13)	440.152991	H(28)C(17)O(13)	O-linked glycosylation	1933		0.5
+HexNAc(2)Sulf(1)@S	486.11556	486.4482	H(26)C(16)N(2)O(13)S(1)	486.11556	H(26)C(16)N(2)O(13)S(1)	O-linked glycosylation	1934		0.5
+HexNAc(2)Sulf(1)@T	486.11556	486.4482	H(26)C(16)N(2)O(13)S(1)	486.11556	H(26)C(16)N(2)O(13)S(1)	O-linked glycosylation	1934		0.5
+Hex(1)Pent(3)Me(1)@S	572.19525	572.511	H(36)C(22)O(17)	572.19525	H(36)C(22)O(17)	O-linked glycosylation	1935		0.5
+Hex(1)Pent(3)Me(1)@T	572.19525	572.511	H(36)C(22)O(17)	572.19525	H(36)C(22)O(17)	O-linked glycosylation	1935		0.5
+Hex(2)Pent(2)@S	588.190165	588.5104	H(36)C(22)O(18)	588.190165	H(36)C(22)O(18)	O-linked glycosylation	1936		0.5
+Hex(2)Pent(2)@T	588.190165	588.5104	H(36)C(22)O(18)	588.190165	H(36)C(22)O(18)	O-linked glycosylation	1936		0.5
+Hex(2)Pent(2)Me(1)@S	602.205815	602.537	H(38)C(23)O(18)	602.205815	H(38)C(23)O(18)	O-linked glycosylation	1937		0.5
+Hex(2)Pent(2)Me(1)@T	602.205815	602.537	H(38)C(23)O(18)	602.205815	H(38)C(23)O(18)	O-linked glycosylation	1937		0.5
+Hex(4)HexA(1)@S	824.243382	824.6865	H(48)C(30)O(26)	824.243382	H(48)C(30)O(26)	O-linked glycosylation	1938		0.5
+Hex(4)HexA(1)@T	824.243382	824.6865	H(48)C(30)O(26)	824.243382	H(48)C(30)O(26)	O-linked glycosylation	1938		0.5
+Hex(2)HexNAc(1)Pent(1)HexA(1)@S	835.259366	835.7125	H(49)C(31)N(1)O(25)	835.259366	H(49)C(31)N(1)O(25)	O-linked glycosylation	1939		0.5
+Hex(2)HexNAc(1)Pent(1)HexA(1)@T	835.259366	835.7125	H(49)C(31)N(1)O(25)	835.259366	H(49)C(31)N(1)O(25)	O-linked glycosylation	1939		0.5
+Hex(3)HexNAc(1)HexA(1)@S	865.269931	865.7384	H(51)C(32)N(1)O(26)	865.269931	H(51)C(32)N(1)O(26)	O-linked glycosylation	1940		0.5
+Hex(3)HexNAc(1)HexA(1)@T	865.269931	865.7384	H(51)C(32)N(1)O(26)	865.269931	H(51)C(32)N(1)O(26)	O-linked glycosylation	1940		0.5
+Hex(1)HexNAc(2)dHex(2)Sulf(1)@S	940.284201	940.8712	H(56)C(34)N(2)O(26)S(1)	940.284201	H(56)C(34)N(2)O(26)S(1)	O-linked glycosylation	1941		0.5
+Hex(1)HexNAc(2)dHex(2)Sulf(1)@T	940.284201	940.8712	H(56)C(34)N(2)O(26)S(1)	940.284201	H(56)C(34)N(2)O(26)S(1)	O-linked glycosylation	1941		0.5
+HexA(2)HexNAc(3)@S	961.302294	961.8258	H(55)C(36)N(3)O(27)	961.302294	H(55)C(36)N(3)O(27)	O-linked glycosylation	1942		0.5
+HexA(2)HexNAc(3)@T	961.302294	961.8258	H(55)C(36)N(3)O(27)	961.302294	H(55)C(36)N(3)O(27)	O-linked glycosylation	1942		0.5
+dHex(1)Hex(4)HexA(1)@T	970.301291	970.8277	H(58)C(36)O(30)	970.301291	H(58)C(36)O(30)	O-linked glycosylation	1943		0.5
+dHex(1)Hex(4)HexA(1)@S	970.301291	970.8277	H(58)C(36)O(30)	970.301291	H(58)C(36)O(30)	O-linked glycosylation	1943		0.5
+Hex(5)HexA(1)@S	986.296206	986.8271	H(58)C(36)O(31)	986.296206	H(58)C(36)O(31)	O-linked glycosylation	1944		0.5
+Hex(5)HexA(1)@T	986.296206	986.8271	H(58)C(36)O(31)	986.296206	H(58)C(36)O(31)	O-linked glycosylation	1944		0.5
+Hex(4)HexA(1)HexNAc(1)@T	1027.322755	1027.879	H(61)C(38)N(1)O(31)	1027.322755	H(61)C(38)N(1)O(31)	O-linked glycosylation	1945		0.5
+Hex(4)HexA(1)HexNAc(1)@S	1027.322755	1027.879	H(61)C(38)N(1)O(31)	1027.322755	H(61)C(38)N(1)O(31)	O-linked glycosylation	1945		0.5
+dHex(3)Hex(3)HexNAc(1)@T	1127.41157	1128.0379	H(73)C(44)N(1)O(32)	1127.41157	H(73)C(44)N(1)O(32)	O-linked glycosylation	1946		0.5
+dHex(3)Hex(3)HexNAc(1)@S	1127.41157	1128.0379	H(73)C(44)N(1)O(32)	1127.41157	H(73)C(44)N(1)O(32)	O-linked glycosylation	1946		0.5
+Hex(6)HexNAc(1)@N	1175.396314	1176.0361	H(73)C(44)N(1)O(35)	1175.396314	H(73)C(44)N(1)O(35)	N-linked glycosylation	1947		0.5
+Hex(1)HexNAc(4)dHex(1)Sulf(1)@T	1200.385037	1201.1151	H(72)C(44)N(4)O(32)S(1)	1200.385037	H(72)C(44)N(4)O(32)S(1)	O-linked glycosylation	1948		0.5
+Hex(1)HexNAc(4)dHex(1)Sulf(1)@S	1200.385037	1201.1151	H(72)C(44)N(4)O(32)S(1)	1200.385037	H(72)C(44)N(4)O(32)S(1)	O-linked glycosylation	1948		0.5
+dHex(1)Hex(2)HexNAc(1)NeuAc(2)@T	1255.433762	1256.1241	H(77)C(48)N(3)O(35)	1255.433762	H(77)C(48)N(3)O(35)	O-linked glycosylation	1949		0.5
+dHex(1)Hex(2)HexNAc(1)NeuAc(2)@S	1255.433762	1256.1241	H(77)C(48)N(3)O(35)	1255.433762	H(77)C(48)N(3)O(35)	O-linked glycosylation	1949		0.5
+dHex(3)Hex(3)HexNAc(2)@T	1330.490942	1331.2304	H(86)C(52)N(2)O(37)	1330.490942	H(86)C(52)N(2)O(37)	O-linked glycosylation	1950		0.5
+dHex(3)Hex(3)HexNAc(2)@S	1330.490942	1331.2304	H(86)C(52)N(2)O(37)	1330.490942	H(86)C(52)N(2)O(37)	O-linked glycosylation	1950		0.5
+dHex(2)Hex(1)HexNAc(4)Sulf(1)@T	1346.442946	1347.2563	H(82)C(50)N(4)O(36)S(1)	1346.442946	H(82)C(50)N(4)O(36)S(1)	O-linked glycosylation	1951		0.5
+dHex(2)Hex(1)HexNAc(4)Sulf(1)@S	1346.442946	1347.2563	H(82)C(50)N(4)O(36)S(1)	1346.442946	H(82)C(50)N(4)O(36)S(1)	O-linked glycosylation	1951		0.5
+dHex(1)Hex(2)HexNAc(4)Sulf(2)@T	1442.394675	1443.3189	H(82)C(50)N(4)O(40)S(2)	1442.394675	H(82)C(50)N(4)O(40)S(2)	O-linked glycosylation	1952		0.5
+dHex(1)Hex(2)HexNAc(4)Sulf(2)@S	1442.394675	1443.3189	H(82)C(50)N(4)O(40)S(2)	1442.394675	H(82)C(50)N(4)O(40)S(2)	O-linked glycosylation	1952		0.5
+Hex(9)@N	1458.475412	1459.2654	H(90)C(54)O(45)	1458.475412	H(90)C(54)O(45)	N-linked glycosylation	1953		0.5
+dHex(2)Hex(3)HexNAc(3)Sulf(1)@T	1467.469221	1468.345	H(89)C(54)N(3)O(41)S(1)	1467.469221	H(89)C(54)N(3)O(41)S(1)	O-linked glycosylation	1954		0.5
+dHex(2)Hex(3)HexNAc(3)Sulf(1)@S	1467.469221	1468.345	H(89)C(54)N(3)O(41)S(1)	1467.469221	H(89)C(54)N(3)O(41)S(1)	O-linked glycosylation	1954		0.5
+dHex(2)Hex(5)HexNAc(2)Me(1)@T	1522.554331	1523.397	H(98)C(59)N(2)O(43)	1522.554331	H(98)C(59)N(2)O(43)	O-linked glycosylation	1955		0.5
+dHex(2)Hex(5)HexNAc(2)Me(1)@S	1522.554331	1523.397	H(98)C(59)N(2)O(43)	1522.554331	H(98)C(59)N(2)O(43)	O-linked glycosylation	1955		0.5
+dHex(2)Hex(2)HexNAc(4)Sulf(2)@T	1588.452584	1589.4601	H(92)C(56)N(4)O(44)S(2)	1588.452584	H(92)C(56)N(4)O(44)S(2)	O-linked glycosylation	1956		0.5
+dHex(2)Hex(2)HexNAc(4)Sulf(2)@S	1588.452584	1589.4601	H(92)C(56)N(4)O(44)S(2)	1588.452584	H(92)C(56)N(4)O(44)S(2)	O-linked glycosylation	1956		0.5
+Hex(9)HexNAc(1)@N	1661.554784	1662.4579	H(103)C(62)N(1)O(50)	1661.554784	H(103)C(62)N(1)O(50)	N-linked glycosylation	1957		0.5
+dHex(3)Hex(2)HexNAc(4)Sulf(2)@S	1734.510493	1735.6013	H(102)C(62)N(4)O(48)S(2)	1734.510493	H(102)C(62)N(4)O(48)S(2)	O-linked glycosylation	1958		0.5
+dHex(3)Hex(2)HexNAc(4)Sulf(2)@T	1734.510493	1735.6013	H(102)C(62)N(4)O(48)S(2)	1734.510493	H(102)C(62)N(4)O(48)S(2)	O-linked glycosylation	1958		0.5
+Hex(4)HexNAc(4)NeuGc(1)@N	1767.619116	1768.5865	H(109)C(67)N(5)O(49)	1767.619116	H(109)C(67)N(5)O(49)	N-linked glycosylation	1959		0.5
+Hex(4)HexNAc(4)NeuGc(1)@S	1767.619116	1768.5865	H(109)C(67)N(5)O(49)	1767.619116	H(109)C(67)N(5)O(49)	O-linked glycosylation	1959		0.5
+Hex(4)HexNAc(4)NeuGc(1)@T	1767.619116	1768.5865	H(109)C(67)N(5)O(49)	1767.619116	H(109)C(67)N(5)O(49)	O-linked glycosylation	1959		0.5
+dHex(4)Hex(3)HexNAc(2)NeuAc(1)@T	1767.644268	1768.6262	H(113)C(69)N(3)O(49)	1767.644268	H(113)C(69)N(3)O(49)	O-linked glycosylation	1960		0.5
+dHex(4)Hex(3)HexNAc(2)NeuAc(1)@S	1767.644268	1768.6262	H(113)C(69)N(3)O(49)	1767.644268	H(113)C(69)N(3)O(49)	O-linked glycosylation	1960		0.5
+Hex(3)HexNAc(5)NeuAc(1)@N	1792.65075	1793.639	H(112)C(69)N(6)O(48)	1792.65075	H(112)C(69)N(6)O(48)	N-linked glycosylation	1961		0.5
+Hex(10)HexNAc(1)@N	1823.607608	1824.5985	H(113)C(68)N(1)O(55)	1823.607608	H(113)C(68)N(1)O(55)	N-linked glycosylation	1962		0.5
+dHex(1)Hex(8)HexNAc(2)@N	1848.639242	1849.651	H(116)C(70)N(2)O(54)	1848.639242	H(116)C(70)N(2)O(54)	N-linked glycosylation	1963		0.5
+Hex(3)HexNAc(4)NeuAc(2)@N	1880.666794	1881.701	H(116)C(72)N(6)O(51)	1880.666794	H(116)C(72)N(6)O(51)	N-linked glycosylation	1964		0.5
+dHex(2)Hex(3)HexNAc(4)NeuAc(1)@N	1881.687195	1882.7289	H(119)C(73)N(5)O(51)	1881.687195	H(119)C(73)N(5)O(51)	N-linked glycosylation	1965		0.5
+dHex(2)Hex(2)HexNAc(6)Sulf(1)@S	1914.654515	1915.7819	H(118)C(72)N(6)O(51)S(1)	1914.654515	H(118)C(72)N(6)O(51)S(1)	O-linked glycosylation	1966		0.5
+dHex(2)Hex(2)HexNAc(6)Sulf(1)@T	1914.654515	1915.7819	H(118)C(72)N(6)O(51)S(1)	1914.654515	H(118)C(72)N(6)O(51)S(1)	O-linked glycosylation	1966		0.5
+Hex(5)HexNAc(4)NeuAc(1)Ac(1)@N	1955.687589	1956.7643	H(121)C(75)N(5)O(54)	1955.687589	H(121)C(75)N(5)O(54)	N-linked glycosylation	1967		0.5
+Hex(3)HexNAc(3)NeuAc(3)@S	1968.682838	1969.7631	H(120)C(75)N(6)O(54)	1968.682838	H(120)C(75)N(6)O(54)	O-linked glycosylation	1968		0.5
+Hex(3)HexNAc(3)NeuAc(3)@T	1968.682838	1969.7631	H(120)C(75)N(6)O(54)	1968.682838	H(120)C(75)N(6)O(54)	O-linked glycosylation	1968		0.5
+Hex(5)HexNAc(4)NeuAc(1)Ac(2)@N	1997.698154	1998.801	H(123)C(77)N(5)O(55)	1997.698154	H(123)C(77)N(5)O(55)	N-linked glycosylation	1969		0.5
+Unknown:162@Any_C-term	162.125595	162.2267	H(18)C(8)O(3)	0.0		Artefact	1970		0.0
+Unknown:162@E	162.125595	162.2267	H(18)C(8)O(3)	0.0		Artefact	1970		0.0
+Unknown:162@D	162.125595	162.2267	H(18)C(8)O(3)	0.0		Artefact	1970		0.0
+Unknown:162@Any_N-term	162.125595	162.2267	H(18)C(8)O(3)	0.0		Artefact	1970		0.0
+Unknown:177@D	176.744957	176.4788	H(-7)O(1)Fe(3)	0.0		Artefact	1971		0.0
+Unknown:177@E	176.744957	176.4788	H(-7)O(1)Fe(3)	0.0		Artefact	1971		0.0
+Unknown:177@Any_C-term	176.744957	176.4788	H(-7)O(1)Fe(3)	0.0		Artefact	1971		0.0
+Unknown:177@Any_N-term	176.744957	176.4788	H(-7)O(1)Fe(3)	0.0		Artefact	1971		0.0
+Unknown:210@D	210.16198	210.3126	H(22)C(13)O(2)	0.0		Artefact	1972		0.0
+Unknown:210@E	210.16198	210.3126	H(22)C(13)O(2)	0.0		Artefact	1972		0.0
+Unknown:210@Any_C-term	210.16198	210.3126	H(22)C(13)O(2)	0.0		Artefact	1972		0.0
+Unknown:210@Any_N-term	210.16198	210.3126	H(22)C(13)O(2)	0.0		Artefact	1972		0.0
+Unknown:216@D	216.099774	216.231	H(16)C(10)O(5)	0.0		Artefact	1973		0.0
+Unknown:216@E	216.099774	216.231	H(16)C(10)O(5)	0.0		Artefact	1973		0.0
+Unknown:216@Any_C-term	216.099774	216.231	H(16)C(10)O(5)	0.0		Artefact	1973		0.0
+Unknown:216@Any_N-term	216.099774	216.231	H(16)C(10)O(5)	0.0		Artefact	1973		0.0
+Unknown:234@D	234.073953	234.2033	H(14)C(9)O(7)	0.0		Artefact	1974		0.0
+Unknown:234@E	234.073953	234.2033	H(14)C(9)O(7)	0.0		Artefact	1974		0.0
+Unknown:234@Any_C-term	234.073953	234.2033	H(14)C(9)O(7)	0.0		Artefact	1974		0.0
+Unknown:234@Any_N-term	234.073953	234.2033	H(14)C(9)O(7)	0.0		Artefact	1974		0.0
+Unknown:248@D	248.19876	248.359	H(28)C(13)O(4)	0.0		Artefact	1975		0.0
+Unknown:248@E	248.19876	248.359	H(28)C(13)O(4)	0.0		Artefact	1975		0.0
+Unknown:248@Any_C-term	248.19876	248.359	H(28)C(13)O(4)	0.0		Artefact	1975		0.0
+Unknown:248@Any_N-term	248.19876	248.359	H(28)C(13)O(4)	0.0		Artefact	1975		0.0
+Unknown:250@D	249.981018	250.2075	H(4)C(10)N(1)O(5)S(1)	0.0		Artefact	1976		0.0
+Unknown:250@E	249.981018	250.2075	H(4)C(10)N(1)O(5)S(1)	0.0		Artefact	1976		0.0
+Unknown:250@Any_C-term	249.981018	250.2075	H(4)C(10)N(1)O(5)S(1)	0.0		Artefact	1976		0.0
+Unknown:250@Any_N-term	249.981018	250.2075	H(4)C(10)N(1)O(5)S(1)	0.0		Artefact	1976		0.0
+Unknown:302@D	301.986514	302.2656	H(8)C(4)N(5)O(7)S(2)	0.0		Artefact	1977		0.0
+Unknown:302@E	301.986514	302.2656	H(8)C(4)N(5)O(7)S(2)	0.0		Artefact	1977		0.0
+Unknown:302@Any_C-term	301.986514	302.2656	H(8)C(4)N(5)O(7)S(2)	0.0		Artefact	1977		0.0
+Unknown:302@Any_N-term	301.986514	302.2656	H(8)C(4)N(5)O(7)S(2)	0.0		Artefact	1977		0.0
+Unknown:306@D	306.095082	306.2659	H(18)C(12)O(9)	0.0		Artefact	1978		0.0
+Unknown:306@E	306.095082	306.2659	H(18)C(12)O(9)	0.0		Artefact	1978		0.0
+Unknown:306@Any_C-term	306.095082	306.2659	H(18)C(12)O(9)	0.0		Artefact	1978		0.0
+Unknown:306@Any_N-term	306.095082	306.2659	H(18)C(12)O(9)	0.0		Artefact	1978		0.0
+Unknown:420@Any_N-term	420.051719	420.5888	H(24)C(12)N(2)O(6)S(4)	420.051719	H(24)C(12)N(2)O(6)S(4)	Artefact	1979		0.5
+Unknown:420@Any_C-term	420.051719	420.5888	H(24)C(12)N(2)O(6)S(4)	420.051719	H(24)C(12)N(2)O(6)S(4)	Artefact	1979		0.5
+Diethylphosphothione@Y	152.006087	152.1518	H(9)C(4)O(2)P(1)S(1)	0.0		Chemical derivative	1986		0.0
+Diethylphosphothione@T	152.006087	152.1518	H(9)C(4)O(2)P(1)S(1)	0.0		Chemical derivative	1986		0.0
+Diethylphosphothione@S	152.006087	152.1518	H(9)C(4)O(2)P(1)S(1)	0.0		Chemical derivative	1986		0.0
+Diethylphosphothione@K	152.006087	152.1518	H(9)C(4)O(2)P(1)S(1)	0.0		Chemical derivative	1986		0.0
+Diethylphosphothione@H	152.006087	152.1518	H(9)C(4)O(2)P(1)S(1)	0.0		Chemical derivative	1986		0.0
+Diethylphosphothione@C	152.006087	152.1518	H(9)C(4)O(2)P(1)S(1)	0.0		Chemical derivative	1986		0.0
+CIGG@K	330.136176	330.4032	H(22)C(13)N(4)O(4)S(1)	0.0		Post-translational	1990		0.0
+GNLLFLACYCIGG@K	1324.6308	1325.598	H(92)C(61)N(14)O(15)S(2)	0.0		Post-translational	1991		0.0
+Dimethylphosphothione@S	123.974787	124.0987	H(5)C(2)O(2)P(1)S(1)	0.0		Chemical derivative	1987		0.0
+Dimethylphosphothione@K	123.974787	124.0987	H(5)C(2)O(2)P(1)S(1)	0.0		Chemical derivative	1987		0.0
+Dimethylphosphothione@H	123.974787	124.0987	H(5)C(2)O(2)P(1)S(1)	0.0		Chemical derivative	1987		0.0
+Dimethylphosphothione@C	123.974787	124.0987	H(5)C(2)O(2)P(1)S(1)	0.0		Chemical derivative	1987		0.0
+Dimethylphosphothione@Y	123.974787	124.0987	H(5)C(2)O(2)P(1)S(1)	0.0		Chemical derivative	1987		0.0
+Dimethylphosphothione@T	123.974787	124.0987	H(5)C(2)O(2)P(1)S(1)	0.0		Chemical derivative	1987		0.0
+monomethylphosphothione@S	109.959137	110.0721	H(3)C(1)O(2)P(1)S(1)	0.0		Chemical derivative	1989		0.0
+monomethylphosphothione@K	109.959137	110.0721	H(3)C(1)O(2)P(1)S(1)	0.0		Chemical derivative	1989		0.0
+monomethylphosphothione@H	109.959137	110.0721	H(3)C(1)O(2)P(1)S(1)	0.0		Chemical derivative	1989		0.0
+monomethylphosphothione@C	109.959137	110.0721	H(3)C(1)O(2)P(1)S(1)	0.0		Chemical derivative	1989		0.0
+monomethylphosphothione@T	109.959137	110.0721	H(3)C(1)O(2)P(1)S(1)	0.0		Chemical derivative	1989		0.0
+monomethylphosphothione@Y	109.959137	110.0721	H(3)C(1)O(2)P(1)S(1)	0.0		Chemical derivative	1989		0.0
+TMPP-Ac:13C(9)@Y	581.211328	581.474	H(33)C(20)13C(9)O(10)P(1)	0.0		Artefact	1993		0.0
+TMPP-Ac:13C(9)@K	581.211328	581.474	H(33)C(20)13C(9)O(10)P(1)	0.0		Artefact	1993		0.0
+TMPP-Ac:13C(9)@Any_N-term	581.211328	581.474	H(33)C(20)13C(9)O(10)P(1)	0.0		Chemical derivative	1993		0.0
+Lys+O(2)@H	160.084792	160.1711	H(12)C(6)N(2)O(3)	0.0		Post-translational	2036		0.0
+ZQG@K	320.100836	320.2973	H(16)C(15)N(2)O(6)	134.036779	H(6)C(8)O(2)	Chemical derivative	2001		0.5
+Xlink:DST[56]@Protein_N-term	55.989829	56.0202	C(2)O(2)	0.0		Chemical derivative	1999		0.0
+Xlink:DST[56]@K	55.989829	56.0202	C(2)O(2)	0.0		Chemical derivative	1999		0.0
+Haloxon@Y	203.950987	204.9763	H(7)C(4)O(3)P(1)Cl(2)	0.0		Chemical derivative	2006		0.0
+Haloxon@T	203.950987	204.9763	H(7)C(4)O(3)P(1)Cl(2)	0.0		Chemical derivative	2006		0.0
+Haloxon@S	203.950987	204.9763	H(7)C(4)O(3)P(1)Cl(2)	0.0		Chemical derivative	2006		0.0
+Haloxon@K	203.950987	204.9763	H(7)C(4)O(3)P(1)Cl(2)	0.0		Chemical derivative	2006		0.0
+Haloxon@H	203.950987	204.9763	H(7)C(4)O(3)P(1)Cl(2)	0.0		Chemical derivative	2006		0.0
+Haloxon@C	203.950987	204.9763	H(7)C(4)O(3)P(1)Cl(2)	0.0		Chemical derivative	2006		0.0
+Methamidophos-O@Y	92.997965	93.0217	H(4)C(1)N(1)O(2)P(1)	0.0		Chemical derivative	2008		0.0
+Methamidophos-O@T	92.997965	93.0217	H(4)C(1)N(1)O(2)P(1)	0.0		Chemical derivative	2008		0.0
+Methamidophos-O@S	92.997965	93.0217	H(4)C(1)N(1)O(2)P(1)	0.0		Chemical derivative	2008		0.0
+Methamidophos-O@K	92.997965	93.0217	H(4)C(1)N(1)O(2)P(1)	0.0		Chemical derivative	2008		0.0
+Methamidophos-O@H	92.997965	93.0217	H(4)C(1)N(1)O(2)P(1)	0.0		Chemical derivative	2008		0.0
+Methamidophos-O@C	92.997965	93.0217	H(4)C(1)N(1)O(2)P(1)	0.0		Chemical derivative	2008		0.0
+Nitrene@Y	12.995249	12.9988	H(-1)N(1)	0.0		Artefact	2014		0.0
+shTMT@Any_N-term	235.176741	235.2201	H(20)C(3)13C(9)15N(2)O(2)	0.0		Chemical derivative	2015		0.0
+shTMT@Protein_N-term	235.176741	235.2201	H(20)C(3)13C(9)15N(2)O(2)	0.0		Chemical derivative	2015		0.0
+shTMT@K	235.176741	235.2201	H(20)C(3)13C(9)15N(2)O(2)	0.0		Chemical derivative	2015		0.0
+TMTpro@T	304.207146	304.3127	H(25)C(8)13C(7)N(1)15N(2)O(3)	0.0		Isotopic label	2016		0.0
+TMTpro@S	304.207146	304.3127	H(25)C(8)13C(7)N(1)15N(2)O(3)	0.0		Isotopic label	2016		0.0
+TMTpro@H	304.207146	304.3127	H(25)C(8)13C(7)N(1)15N(2)O(3)	0.0		Isotopic label	2016		0.0
+TMTpro@Protein_N-term	304.207146	304.3127	H(25)C(8)13C(7)N(1)15N(2)O(3)	0.0		Isotopic label	2016		0.0
+TMTpro@Any_N-term	304.207146	304.3127	H(25)C(8)13C(7)N(1)15N(2)O(3)	0.0		Isotopic label	2016		0.0
+TMTpro@K	304.207146	304.3127	H(25)C(8)13C(7)N(1)15N(2)O(3)	0.0		Isotopic label	2016		0.0
+TMTpro_zero@S	295.189592	295.3773	H(25)C(15)N(3)O(3)	0.0		Chemical derivative	2017		0.0
+TMTpro_zero@H	295.189592	295.3773	H(25)C(15)N(3)O(3)	0.0		Chemical derivative	2017		0.0
+TMTpro_zero@Protein_N-term	295.189592	295.3773	H(25)C(15)N(3)O(3)	0.0		Chemical derivative	2017		0.0
+TMTpro_zero@Any_N-term	295.189592	295.3773	H(25)C(15)N(3)O(3)	0.0		Chemical derivative	2017		0.0
+TMTpro_zero@K	295.189592	295.3773	H(25)C(15)N(3)O(3)	0.0		Chemical derivative	2017		0.0
+TMTpro_zero@T	295.189592	295.3773	H(25)C(15)N(3)O(3)	0.0		Chemical derivative	2017		0.0
+3-hydroxybenzyl-phosphate@S	186.008196	186.1018	H(7)C(7)O(4)P(1)	0.0		Chemical derivative	2041		0.0
+3-hydroxybenzyl-phosphate@K	186.008196	186.1018	H(7)C(7)O(4)P(1)	0.0		Chemical derivative	2041		0.0
+3-hydroxybenzyl-phosphate@T	186.008196	186.1018	H(7)C(7)O(4)P(1)	0.0		Chemical derivative	2041		0.0
+3-hydroxybenzyl-phosphate@Y	186.008196	186.1018	H(7)C(7)O(4)P(1)	0.0		Chemical derivative	2041		0.0
+Hex(6)HexNAc(5)NeuAc(3)@N	2861.000054	2862.5699	H(176)C(109)N(8)O(79)	2861.000054	H(176)C(109)N(8)O(79)	N-linked glycosylation	2028		0.5
+Andro-H2O@C	332.19876	332.4339	H(28)C(20)O(4)	0.0		Chemical derivative	2025		0.0
+His+O(2)@H	169.048741	169.1381	H(7)C(6)N(3)O(3)	0.0		Post-translational	2027		0.0
+Hex(7)HexNAc(6)@S	2352.846	2354.1393	H(148)C(90)N(6)O(65)	2352.846	H(148)C(90)N(6)O(65)	O-linked glycosylation	2029		0.5
+Hex(7)HexNAc(6)@T	2352.846	2354.1393	H(148)C(90)N(6)O(65)	2352.846	H(148)C(90)N(6)O(65)	O-linked glycosylation	2029		0.5
+Hex(7)HexNAc(6)@N	2352.846	2354.1393	H(148)C(90)N(6)O(65)	2352.846	H(148)C(90)N(6)O(65)	N-linked glycosylation	2029		0.5
+Met+O(2)@H	163.030314	163.1949	H(9)C(5)N(1)O(3)S(1)	0.0		Chemical derivative	2033		0.0
+Gly+O(2)@H	89.011293	89.0501	H(3)C(2)N(1)O(3)	0.0		Chemical derivative	2034		0.0
+Glu+O(2)@H	161.032422	161.1128	H(7)C(5)N(1)O(5)	0.0		Post-translational	2037		0.0
+MBS+peptide@C	1482.77	1483.7597	H(108)C(81)N(7)O(19)	0.0		Chemical derivative	2040		0.0
+phenyl-phosphate@S	155.997631	156.0759	H(5)C(6)O(3)P(1)	0.0		Chemical derivative	2042		0.0
+phenyl-phosphate@K	155.997631	156.0759	H(5)C(6)O(3)P(1)	0.0		Chemical derivative	2042		0.0
+phenyl-phosphate@T	155.997631	156.0759	H(5)C(6)O(3)P(1)	0.0		Chemical derivative	2042		0.0
+phenyl-phosphate@Y	155.997631	156.0759	H(5)C(6)O(3)P(1)	0.0		Chemical derivative	2042		0.0
+RBS-ID_Uridine@Y	244.069536	244.2014	H(12)C(9)N(2)O(6)	0.0		Other	2044		0.0
+pRBS-ID_4-thiouridine@F	226.058972	226.1861	H(10)C(9)N(2)O(5)	132.042259	H(8)C(5)O(4)	Other	2054		0.5
+Biotin:Aha-PC@M	690.24316	690.7246	H(38)C(29)N(8)O(10)S(1)	0.0		Chemical derivative	2053		0.0
+DBIA@C	296.184841	296.3654	H(24)C(14)N(4)O(3)	0.0		Chemical derivative	2062		0.0
+pRBS-ID_6-thioguanosine@W	265.081104	265.2254	H(11)C(10)N(5)O(4)	132.042259	H(8)C(5)O(4)	Other	2055		0.5
+6C-CysPAT@Y	221.081695	221.1907	H(16)C(8)N(1)O(4)P(1)	0.0		Artefact	2057		0.0
+6C-CysPAT@T	221.081695	221.1907	H(16)C(8)N(1)O(4)P(1)	0.0		Artefact	2057		0.0
+6C-CysPAT@S	221.081695	221.1907	H(16)C(8)N(1)O(4)P(1)	0.0		Artefact	2057		0.0
+6C-CysPAT@E	221.081695	221.1907	H(16)C(8)N(1)O(4)P(1)	0.0		Artefact	2057		0.0
+6C-CysPAT@D	221.081695	221.1907	H(16)C(8)N(1)O(4)P(1)	0.0		Artefact	2057		0.0
+6C-CysPAT@H	221.081695	221.1907	H(16)C(8)N(1)O(4)P(1)	0.0		Artefact	2057		0.0
+6C-CysPAT@Any_N-term	221.081695	221.1907	H(16)C(8)N(1)O(4)P(1)	0.0		Artefact	2057		0.0
+6C-CysPAT@K	221.081695	221.1907	H(16)C(8)N(1)O(4)P(1)	0.0		Artefact	2057		0.0
+6C-CysPAT@C	221.081695	221.1907	H(16)C(8)N(1)O(4)P(1)	0.0		Chemical derivative	2057		0.0
+Xlink:DSPP[210]@Protein_N-term	209.97181	210.0802	H(3)C(8)O(5)P(1)	0.0		Chemical derivative	2058		0.0
+Xlink:DSPP[210]@K	209.97181	210.0802	H(3)C(8)O(5)P(1)	0.0		Chemical derivative	2058		0.0
+Xlink:DSPP[228]@Protein_N-term	227.982375	228.0955	H(5)C(8)O(6)P(1)	0.0		Chemical derivative	2059		0.0
+Xlink:DSPP[228]@K	227.982375	228.0955	H(5)C(8)O(6)P(1)	0.0		Chemical derivative	2059		0.0
+Xlink:DSPP[331]@Protein_N-term	331.045704	331.2152	H(14)C(12)N(1)O(8)P(1)	0.0		Chemical derivative	2060		0.0
+Xlink:DSPP[331]@K	331.045704	331.2152	H(14)C(12)N(1)O(8)P(1)	0.0		Chemical derivative	2060		0.0
+Xlink:DSPP[226]@K	225.990534	226.1028	H(5)C(8)N(1)O(5)P(1)	0.0		Chemical derivative	2061		0.0
+Xlink:DSPP[226]@Protein_N-term	225.990534	226.1028	H(5)C(8)N(1)O(5)P(1)	0.0		Chemical derivative	2061		0.0
+N6pAMP@Y	367.06817	367.2539	H(14)C(13)N(5)O(6)P(1)	0.0		Chemical derivative	2073		0.0
+N6pAMP@T	367.06817	367.2539	H(14)C(13)N(5)O(6)P(1)	0.0		Chemical derivative	2073		0.0
+N6pAMP@S	367.06817	367.2539	H(14)C(13)N(5)O(6)P(1)	0.0		Chemical derivative	2073		0.0
+DABCYL-C2-maleimide@K	391.16444	391.4231	H(21)C(21)N(5)O(3)	251.105862	H(13)C(15)N(3)O(1)	Chemical derivative	2074		0.5
+DABCYL-C2-maleimide@C	391.16444	391.4231	H(21)C(21)N(5)O(3)	251.105862	H(13)C(15)N(3)O(1)	Chemical derivative	2074		0.5
+Ethynyl@C	24.0	24.0214	C(2)	0.0		Chemical derivative	2081		0.0
+Mono_Nγ-propargyl-L-Gln_desthiobiotin@C	596.328211	596.6764	H(44)C(26)N(8)O(8)	0.0		Chemical derivative	2067		0.0
+Di_L-Glu_Nγ-propargyl-L-Gln_desthiobiotin@E	709.375889	709.7909	H(51)C(31)N(9)O(10)	469.301268	H(39)C(21)N(7)O(5)	Chemical derivative	2068		0.5
+Di_L-Glu_Nγ-propargyl-L-Gln_desthiobiotin@D	709.375889	709.7909	H(51)C(31)N(9)O(10)	469.301268	H(39)C(21)N(7)O(5)	Chemical derivative	2068		0.5
+Di_L-Gln_Nγ-propargyl-L-Gln_desthiobiotin@E	708.391873	708.8062	H(52)C(31)N(10)O(9)	726.402438	H(54)C(31)N(10)O(10)	Chemical derivative	2069		0.5
+Di_L-Gln_Nγ-propargyl-L-Gln_desthiobiotin@D	708.391873	708.8062	H(52)C(31)N(10)O(9)	726.402438	H(54)C(31)N(10)O(10)	Chemical derivative	2069		0.5
+L-Gln@D	128.058578	128.1292	H(8)C(5)N(2)O(2)	0.0		Post-translational	2070		0.0
+L-Gln@E	128.058578	128.1292	H(8)C(5)N(2)O(2)	0.0		Post-translational	2070		0.0
+Glyceroyl@Protein_N-term	88.016044	88.0621	H(4)C(3)O(3)	0.0		Post-translational	2072		0.0
+Glyceroyl@K	88.016044	88.0621	H(4)C(3)O(3)	0.0		Post-translational	2072		0.0
+NBF@R	163.001791	163.0904	H(1)C(6)N(3)O(3)	0.0		Chemical derivative	2079		0.0
+NBF@K	163.001791	163.0904	H(1)C(6)N(3)O(3)	0.0		Chemical derivative	2079		0.0
+NBF@C	163.001791	163.0904	H(1)C(6)N(3)O(3)	0.0		Chemical derivative	2079		0.0
+DCP@C	168.078644	168.1898	H(12)C(9)O(3)	0.0		Chemical derivative	2080		0.0
+QQTGG@K	471.207761	471.465	H(29)C(18)N(7)O(8)	0.0		Other	2082		0.0
+Pyro-QQTGG@K	454.181212	454.4344	H(26)C(18)N(6)O(8)	0.0		Other	2083		0.0
+NQTGG@K	457.192111	457.4384	H(27)C(17)N(7)O(8)	0.0		Other	2084		0.0
+DVFQQQTGG@K	960.43011	960.9865	H(60)C(41)N(12)O(15)	0.0		Other	2085		0.0
+iST-NHS_specific_cysteine_modification@C	113.084064	113.1576	H(11)C(6)N(1)O(1)	0.0		Chemical derivative	2086		0.0
+Label:13C(2)15N(1)@G	3.003745	2.9787	C(-2)13C(2)N(-1)15N(1)	0.0		Isotopic label	2088		0.0
+GlyGly@K	114.042927	114.1026	H(6)C(4)N(2)O(2)	0.0		Multiple	121	NCC(=O)NCC(=O)NCCCC[C@H](N([Xe])([Xe]))C([Rn])=O	1000000.0
+Pro->(2S,4R)-4-fluoroproline@P	0.0	0.0	F(1)H(-1)	0.0		User-added	0	F[C@@H]1C[C@H](N([Xe])C1)C(=O)[Rn]	0.0
+Pro->(2S,4S)-4fluoroproline@P	0.0	0.0	F(1)H(-1)	0.0		User-added	0	F[C@H]1C[C@H](N([Xe])C1)C(=O)[Rn]	0.0
+Pro->(2S)-1,3-thiazolidine-2-carboxylic_acid@P	0.0	0.0	C(-1)H(-2)S(1)	0.0		User-added	0	S1[C@H](N([Xe])CC1)C(=O)[Rn]	0.0
+Pro->(4R)-1,3-Thiazolidine-4-carboxylic_acid@P	0.0	0.0	C(-1)H(-2)S(1)	0.0		User-added	0	S1CN([Xe])[C@@H](C1)C(=O)[Rn]	0.0
+Pro->(2S,4R)-4-hydroxyproline@P	0.0	0.0	O(1)	0.0		User-added	0	O[C@@H]1C[C@H](N([Xe])C1)C(=O)[Rn]	0.0
+Pro->(DL)-pipecolic_acid@P	0.0	0.0	C(1)H(2)	0.0		User-added	0	C1CCN([Xe])C(C1)C(=O)[Rn]	0.0
+Pro->3,4-Dehydro-L-proline@P	0.0	0.0	H(-2)	0.0		User-added	0	C1C=CC(N1([Xe]))C(=O)[Rn]	0.0
+Pro->(1S,3S,5S)-2-Azabicyclo[3.1.0]hexane-3-carboxylic_acid@P	0.0	0.0	C(1)	0.0		User-added	0	[C@H]12N([Xe])[C@@H](C[C@@H]2C1)C(=O)[Rn]	0.0
+Pro->(1R,3S,5R)-2-Azabicyclo[3.1.0]hexane-3-carboxylic_acid@P	0.0	0.0	C(1)	0.0		User-added	0	[C@@H]12N([Xe])[C@@H](C[C@H]2C1)C(=O)[Rn]	0.0
+Pro->(2S,3aS,7aS)-Octahydro-1H-indole-2-carboxylic_acid@P	0.0	0.0	C(4)H(6)	0.0		User-added	0	N1([Xe])[C@@H](C[C@@H]2CCCC[C@H]12)C(=O)[Rn]	0.0
+Pro->(DL)-5-trifluoromethylproline@P	0.0	0.0	C(1)F(3)H(-1)	0.0		User-added	0	FC(C1CCC(N1([Xe]))C(=O)[Rn])(F)F	0.0
+mTRAQ@Protein_N-term	0.0	0.0	C(7)H(12)N(2)O(1)	0.0		User-added	0	C(=O)CN1CCN(CC1)C	0.0
+mTRAQ:13C(3)15N(1)@Protein_N-term	0.0	0.0	13C(3)15N(1)C(4)H(12)N(1)O(1)	0.0		User-added	0	C(=O)[13C]([H])([H])[15N]1[13C]([H])([H])[13C]([H])([H])N(CC1)C	0.0
+mTRAQ:13C(6)15N(2)@Protein_N-term	0.0	0.0	13C(6)15N(2)C(1)H(12)O(1)	0.0		User-added	0	C(=O)[13C]([H])([H])[15N]1[13C]([H])([H])[13C]([H])([H])[15N]([13C]([H])([H])[13C]1([H])([H]))[13C]([H])([H])([H])	0.0
+Biotin@Protein_N-term	0.0	0.0	C(10)H(14)N(2)O(2)S(1)	0.0		User-added	0	C(=O)CCCCC1SCC2NC(=O)NC21	0.0
+Carbamidomethyl@Protein_N-term	0.0	0.0	C(2)H(3)N(1)O(1)	0.0		User-added	0	C(=O)NC	0.0
+Propionamide@Protein_N-term	0.0	0.0	C(3)H(5)N(1)O(1)	0.0		User-added	0	CCC(N)=O	0.0
+Pyridylacetyl@Protein_N-term	0.0	0.0	C(7)H(5)N(1)O(1)	0.0		User-added	0	C(=O)Cc1ccccn1	0.0
+Methyl@Protein_C-term	0.0	0.0	C(1)H(2)	0.0		User-added	0	OC	0.0
+Ethyl@Protein_C-term	0.0	0.0	C(2)H(4)	0.0		User-added	0	OCC	0.0
+Cation:Na@Protein_C-term	0.0	0.0	H(-1)Na(1)	0.0		User-added	0	O[Na]	0.0
+Cation:K@Protein_C-term	0.0	0.0	H(-1)K(1)	0.0		User-added	0	O[K]	0.0
+Cation:Cu[I]@Protein_C-term	0.0	0.0	Cu(1)H(-1)	0.0		User-added	0	O[Cu]	0.0
+Cation:Li@Protein_C-term	0.0	0.0	H(-1)Li(1)	0.0		User-added	0	O[Li]	0.0

From ab4352cae12c4fca677fd01df85aad5bf01740ed Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Wed, 14 May 2025 22:04:38 -0400
Subject: [PATCH 57/75] refactor: Update normalization field in Redeem CLI
 properties

---
 .../src/properties/inference/inference.rs     | 117 +++++++++++++++++-
 .../src/properties/inference/input.rs         |   5 +
 .../redeem-cli/src/properties/train/input.rs  |   6 +-
 .../src/properties/train/trainer.rs           |   5 +-
 .../src/models/model_interface.rs             |  49 +++++---
 5 files changed, 156 insertions(+), 26 deletions(-)

diff --git a/crates/redeem-cli/src/properties/inference/inference.rs b/crates/redeem-cli/src/properties/inference/inference.rs
index 6be1dea..b171f77 100644
--- a/crates/redeem-cli/src/properties/inference/inference.rs
+++ b/crates/redeem-cli/src/properties/inference/inference.rs
@@ -1,16 +1,20 @@
 use anyhow::{Context, Result};
+use maud::{PreEscaped, html};
 use redeem_properties::models::ccs_cnn_lstm_model::CCSCNNLSTMModel;
 use redeem_properties::models::ccs_cnn_tf_model::CCSCNNTFModel;
-use redeem_properties::models::ccs_model::load_collision_cross_section_model;
 use redeem_properties::models::model_interface::ModelInterface;
 use redeem_properties::models::rt_cnn_lstm_model::RTCNNLSTMModel;
-use redeem_properties::models::rt_model::load_retention_time_model;
 use redeem_properties::utils::data_handling::{PeptideData, TargetNormalization};
-use redeem_properties::utils::peptdeep_utils::load_modifications;
+use redeem_properties::utils::peptdeep_utils::{load_modifications, MODIFICATION_MAP};
 use redeem_properties::utils::utils::get_device;
+use report_builder::{
+    Report, ReportSection,
+    plots::plot_scatter,
+};
 
 use crate::properties::inference::input::PropertyInferenceConfig;
 use crate::properties::inference::output::write_peptide_data;
+use crate::properties::train::sample_peptides;
 use crate::properties::load_data::load_peptide_data;
 use crate::properties::util::write_bytes_to_file;
 
@@ -23,7 +27,7 @@ pub fn run_inference(config: &PropertyInferenceConfig) -> Result<()> {
         &config.model_arch,
         Some(config.nce),
         Some(config.instrument.clone()),
-        Some("min_max".to_string()),
+        Some(config.normalization.clone().unwrap()),
         &modifications,
     )?;
     log::info!("Loaded {} peptides", inference_data.len());
@@ -90,6 +94,111 @@ pub fn run_inference(config: &PropertyInferenceConfig) -> Result<()> {
     log::info!("Predictions saved to: {}", config.output_file);
     write_peptide_data(&inference_results, &config.output_file)?;
 
+    // Generate report
+    let mut report = Report::new(
+        "ReDeeM",
+        &config.version,
+        Some("https://github.com/singjc/redeem/blob/master/img/redeem_logo.png?raw=true"),
+        &format!("ReDeeM {:?} Inference Report", config.model_arch),
+    );
+
+    /* Section 1: Overview */
+    {
+        let mut overview_section = ReportSection::new("Overview");
+
+        overview_section.add_content(html! {
+            "This report summarizes the inference process of the ReDeeM model."
+        });
+
+        let modifications = MODIFICATION_MAP.clone();
+
+        let normalize_field = if config.model_arch.contains("ccs") {
+            "ccs"
+        } else {
+            "retention time"
+        };
+
+        // Inference scatter plot
+        let inference_data_sampled: Vec<PeptideData> = sample_peptides(&inference_data, 5000);
+
+        let (true_rt, pred_rt): (Vec<f64>, Vec<f64>) = inference_data_sampled
+            .iter()
+            .zip(&inference_results)
+            .filter_map(|(true_pep, pred_pep)| {
+                match normalize_field {
+                    "ccs" => {
+                        match (true_pep.ccs, pred_pep.ccs) {
+                            (Some(t), Some(p)) => {
+                                let t_denorm = match norm_factor {
+                                    TargetNormalization::ZScore(mean, std) => t as f64 * std as f64 + mean as f64,
+                                    TargetNormalization::MinMax(min, range) => t as f64 * range as f64 + min as f64,
+                                    TargetNormalization::None => t as f64,
+                                };
+                                Some((t_denorm, p as f64))
+                            }
+                            _ => None,
+                        }
+                    },
+                    _ => {
+                        match (true_pep.retention_time, pred_pep.retention_time) {
+                        (Some(t), Some(p)) => {
+                            let t_denorm = match norm_factor {
+                                TargetNormalization::ZScore(mean, std) => t as f64 * std as f64 + mean as f64,
+                                TargetNormalization::MinMax(min, range) => t as f64 * range as f64 + min as f64,
+                                TargetNormalization::None => t as f64,
+                            };
+                            Some((t_denorm, p as f64))
+                        }
+                        _ => None,
+                    }
+                }
+                }
+            })
+            .unzip();
+        
+
+        let scatter_plot = plot_scatter(
+            &vec![true_rt.clone()],
+            &vec![pred_rt.clone()],
+            vec!["Prediction".to_string()],
+            "Predicted vs True (Random 1000 Validation Peptides)",
+            "Target",
+            "Predicted",
+        )
+        .unwrap();
+        overview_section.add_plot(scatter_plot);
+
+        report.add_section(overview_section);
+    }
+
+
+    /* Section 2: Configuration */
+    {
+        let mut config_section = ReportSection::new("Configuration");
+        config_section.add_content(html! {
+            style {
+                ".code-container {
+                    background-color: #f5f5f5;
+                    padding: 10px;
+                    border-radius: 5px;
+                    overflow-x: auto;
+                    font-family: monospace;
+                    white-space: pre-wrap;
+                }"
+            }
+            div class="code-container" {
+                pre {
+                    code { (PreEscaped(serde_json::to_string_pretty(&config)?)) }
+                }
+            }
+        });
+        report.add_section(config_section);
+    }
+
+    // Save the report to HTML file
+    let path = "redeem_inference_report.html";
+    report.save_to_file(&path.to_string())?;
+
     let path = "redeem_inference_config.json";
     let json = serde_json::to_string_pretty(&config)?;
     println!("{}", json);
diff --git a/crates/redeem-cli/src/properties/inference/input.rs b/crates/redeem-cli/src/properties/inference/input.rs
index fcdacd5..e8ee563 100644
--- a/crates/redeem-cli/src/properties/inference/input.rs
+++ b/crates/redeem-cli/src/properties/inference/input.rs
@@ -8,9 +8,11 @@ use crate::properties::util::validate_tsv_or_csv_file;
 
 #[derive(Debug, Deserialize, Serialize, Clone)]
 pub struct PropertyInferenceConfig {
+    pub version: String,
     pub model_path: String,
     pub inference_data: String,
     pub output_file: String,
+    pub normalization: Option<String>,
     pub model_arch: String,
     pub device: String,
     pub batch_size: usize,
@@ -21,9 +23,11 @@ pub struct PropertyInferenceConfig {
 impl Default for PropertyInferenceConfig {
     fn default() -> Self {
         PropertyInferenceConfig {
+            version: clap::crate_version!().to_string(),
             model_path: String::new(),
             inference_data: String::new(),
             output_file: String::from("redeem_inference.csv"),
+            normalization: Some(String::from("min_max")),
             model_arch: String::from("rt_cnn_tf"),
             device: String::from("cpu"),
             batch_size: 64,
@@ -64,6 +68,7 @@ impl PropertyInferenceConfig {
         load_or_default!(model_path);
         load_or_default!(inference_data);
         load_or_default!(output_file);
+        load_or_default!(normalization);
         load_or_default!(model_arch);
         load_or_default!(device);
         load_or_default!(batch_size);
diff --git a/crates/redeem-cli/src/properties/train/input.rs b/crates/redeem-cli/src/properties/train/input.rs
index c8523b9..45356bc 100644
--- a/crates/redeem-cli/src/properties/train/input.rs
+++ b/crates/redeem-cli/src/properties/train/input.rs
@@ -13,7 +13,7 @@ pub struct PropertyTrainConfig {
     pub train_data: String,
     pub validation_data: Option<String>,
     pub output_file: String,
-    pub rt_normalization: Option<String>,
+    pub normalization: Option<String>,
     pub model_arch: String,
     pub device: String,
     pub batch_size: usize,
@@ -33,7 +33,7 @@ impl Default for PropertyTrainConfig {
             train_data: String::new(),
             validation_data: None,
             output_file: String::from("rt_cnn_tf.safetensors"),
-            rt_normalization: Some(String::from("min_max")),
+            normalization: Some(String::from("min_max")),
             model_arch: String::from("rt_cnn_tf"),
             device: String::from("cpu"),
             batch_size: 64,
@@ -79,7 +79,7 @@ impl PropertyTrainConfig {
         load_or_default!(train_data);
         load_or_default!(validation_data);
         load_or_default!(output_file);
-        load_or_default!(rt_normalization);
+        load_or_default!(normalization);
         load_or_default!(model_arch);
         load_or_default!(device);
         load_or_default!(batch_size);
diff --git a/crates/redeem-cli/src/properties/train/trainer.rs b/crates/redeem-cli/src/properties/train/trainer.rs
index 701bf8e..66dcf2b 100644
--- a/crates/redeem-cli/src/properties/train/trainer.rs
+++ b/crates/redeem-cli/src/properties/train/trainer.rs
@@ -1,7 +1,6 @@
 use anyhow::{Context, Result};
 use maud::{PreEscaped, html};
 use redeem_properties::models::model_interface::ModelInterface;
-use redeem_properties::models::rt_model::load_retention_time_model;
 use redeem_properties::models::{
     ccs_cnn_lstm_model::CCSCNNLSTMModel, ccs_cnn_tf_model::CCSCNNTFModel,
     rt_cnn_lstm_model::RTCNNLSTMModel, rt_cnn_transformer_model::RTCNNTFModel,
@@ -33,7 +32,7 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
         &config.model_arch,
         Some(config.nce),
         Some(config.instrument.clone()),
-        Some(config.rt_normalization.clone().unwrap()),
+        Some(config.normalization.clone().unwrap()),
         &modifications,
     )?;
     log::info!("Loaded {} training peptides", train_peptides.len());
@@ -45,7 +44,7 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
             &config.model_arch,
             Some(config.nce),
             Some(config.instrument.clone()),
-            Some(config.rt_normalization.clone().unwrap()),
+            Some(config.normalization.clone().unwrap()),
             &modifications,
         )
         .context("Failed to load validation data")?;
diff --git a/crates/redeem-properties/src/models/model_interface.rs b/crates/redeem-properties/src/models/model_interface.rs
index eec347a..ff83f6b 100644
--- a/crates/redeem-properties/src/models/model_interface.rs
+++ b/crates/redeem-properties/src/models/model_interface.rs
@@ -667,6 +667,16 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                     best_val_loss = avg_val_loss;
                     epochs_without_improvement = 0;
 
+                    // Check if the prior checkpoint exists, if it does delete it
+                    let checkpoint_path = format!(
+                        "redeem_{}_best_val_ckpt_model_epoch_{}.safetensors",
+                        self.get_model_arch(),
+                        epoch - 1
+                    );
+                    if PathBuf::from(&checkpoint_path).exists() {
+                        std::fs::remove_file(&checkpoint_path)?;
+                    }
+
                     let checkpoint_path = format!(
                         "redeem_{}_best_val_ckpt_model_epoch_{}.safetensors",
                         self.get_model_arch(),
@@ -910,7 +920,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             (String, Option<char>),
             crate::utils::peptdeep_utils::ModificationMap,
         >,
-        rt_norm: TargetNormalization,
+        target_norm: TargetNormalization,
     ) -> Result<Vec<PeptideData>> {
         let num_batches = (inference_data.len() + batch_size - 1) / batch_size;
         info!(
@@ -918,44 +928,44 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             inference_data.len(),
             num_batches
         );
-
+    
         let progress = Progress::new(inference_data.len(), "[inference] Batch:");
         let mut result: Vec<Option<PeptideData>> = vec![None; inference_data.len()];
-
+    
         inference_data
             .par_chunks(batch_size)
             .enumerate()
             .map(|(batch_idx, batch_data)| {
                 let start_idx = batch_idx * batch_size;
                 let batch: PeptideBatchData = batch_data.into();
-
+    
                 let naked_sequences = &batch.naked_sequence;
                 let mods = &batch.mods;
                 let mod_sites = &batch.mod_sites;
-
+    
                 let charges = if batch.charges.iter().all(|c| c.is_some()) {
                     Some(batch.charges.iter().map(|c| c.unwrap()).collect::<Vec<_>>())
                 } else {
                     None
                 };
-
+    
                 let nces = if batch.nces.iter().all(|n| n.is_some()) {
                     Some(batch.nces.iter().map(|n| n.unwrap()).collect::<Vec<_>>())
                 } else {
                     None
                 };
-
+    
                 let instruments = if batch.instruments.iter().all(|i| i.is_some()) {
                     Some(batch.instruments.clone())
                 } else {
                     None
                 };
-
+    
                 let input_tensor = self
                     .encode_peptides(naked_sequences, mods, mod_sites, charges, nces, instruments)?
                     .to_device(self.get_device())?;
                 let output = self.forward(&input_tensor)?;
-
+    
                 match self.property_type() {
                     PropertyType::RT | PropertyType::CCS => {
                         let predictions = output.to_vec1()?;
@@ -966,7 +976,16 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                                 let mut peptide = batch_data[i].clone();
                                 match self.property_type() {
                                     PropertyType::RT => {
-                                        peptide.retention_time = Some(match rt_norm {
+                                        peptide.retention_time = Some(match target_norm {
+                                            TargetNormalization::ZScore(mean, std) => pred * std + mean,
+                                            TargetNormalization::MinMax(min, max) => {
+                                                pred * (max - min) + min
+                                            }
+                                            TargetNormalization::None => pred,
+                                        });
+                                    }
+                                    PropertyType::CCS => {
+                                        peptide.ccs = Some(match target_norm {
                                             TargetNormalization::ZScore(mean, std) => pred * std + mean,
                                             TargetNormalization::MinMax(min, max) => {
                                                 pred * (max - min) + min
@@ -974,9 +993,8 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                                             TargetNormalization::None => pred,
                                         });
                                     }
-                                    PropertyType::CCS => peptide.ion_mobility = Some(pred),
                                     _ => {}
-                                };
+                                }
                                 (start_idx + i, peptide)
                             })
                             .collect();
@@ -994,10 +1012,11 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                 result[idx] = Some(peptide);
                 progress.inc();
             });
-
+    
         progress.finish();
         Ok(result.into_iter().flatten().collect())
     }
+    
 
     /// Extract encoded input and target tensor for a batch of peptides.
     fn prepare_batch_inputs(
@@ -1008,8 +1027,6 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             crate::utils::peptdeep_utils::ModificationMap,
         >,
     ) -> Result<(Tensor, Tensor)> {
-        use rayon::prelude::*;
-
         let batch: PeptideBatchData = batch_data.into();
 
         let naked_sequences = &batch.naked_sequence;
@@ -1045,7 +1062,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                 .map(|v| v.unwrap_or(0.0))
                 .collect(),
             PropertyType::CCS => batch
-                .ion_mobilities
+                .ccs
                 .iter()
                 .map(|v| v.unwrap_or(0.0))
                 .collect(),

From 16bba8fb7b96762628b9c075693ce06790fb485e Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Wed, 14 May 2025 22:04:44 -0400
Subject: [PATCH 58/75] refactor: Update loading of modifications to use byte
 slice instead of file path

---
 crates/redeem-properties/src/utils/peptdeep_utils.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/redeem-properties/src/utils/peptdeep_utils.rs b/crates/redeem-properties/src/utils/peptdeep_utils.rs
index d802348..d30b6ae 100644
--- a/crates/redeem-properties/src/utils/peptdeep_utils.rs
+++ b/crates/redeem-properties/src/utils/peptdeep_utils.rs
@@ -234,7 +234,7 @@ pub fn load_mod_to_feature_arc(
 ///
 /// # Example
 /// ```
-/// use easypqp_core::data_handling::remove_mass_shift;
+/// use redeem_properties::utils::peptdeep_utils::remove_mass_shift;
 /// 
 /// let peptide = "MGC[+57.0215]AAR";
 /// assert_eq!(remove_mass_shift(peptide), "MGCAAR");

From 99ae99fcf85e009a590212ef9b92a1d9c8f095bd Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Thu, 15 May 2025 01:42:40 -0400
Subject: [PATCH 59/75] refactor: Update training configuration in
 redeem-properties crate

---
 .../src/properties/train/trainer.rs           |   3 +
 .../redeem-properties/src/models/ccs_model.rs |   3 +
 .../src/models/model_interface.rs             | 384 ++++++------------
 .../redeem-properties/src/models/rt_model.rs  |   3 +
 4 files changed, 139 insertions(+), 254 deletions(-)

diff --git a/crates/redeem-cli/src/properties/train/trainer.rs b/crates/redeem-cli/src/properties/train/trainer.rs
index 66dcf2b..cd06a79 100644
--- a/crates/redeem-cli/src/properties/train/trainer.rs
+++ b/crates/redeem-cli/src/properties/train/trainer.rs
@@ -143,6 +143,9 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
         config.learning_rate as f64,
         config.epochs,
         config.early_stopping_patience,
+        "training",
+        true, 
+        true
     ).with_context(|| "Training failed: an error occurred during the model training process")?;
     log::info!("Training completed in {:?}", start_time.elapsed());
     model.save(&config.output_file)?;
diff --git a/crates/redeem-properties/src/models/ccs_model.rs b/crates/redeem-properties/src/models/ccs_model.rs
index a61b2e3..9e9d129 100644
--- a/crates/redeem-properties/src/models/ccs_model.rs
+++ b/crates/redeem-properties/src/models/ccs_model.rs
@@ -95,6 +95,9 @@ impl CCSModelWrapper {
             learning_rate,
             epochs,
             early_stopping_patience,
+            "training",
+            true,
+            true,
         )
     }
 
diff --git a/crates/redeem-properties/src/models/model_interface.rs b/crates/redeem-properties/src/models/model_interface.rs
index ff83f6b..d4b7194 100644
--- a/crates/redeem-properties/src/models/model_interface.rs
+++ b/crates/redeem-properties/src/models/model_interface.rs
@@ -465,6 +465,8 @@ pub trait ModelInterface: Send + Sync + ModelClone {
     /// This method initializes model weights from scratch and trains over the given peptide feature data for a specified
     /// number of epochs. Optionally performs validation and tracks both training and validation loss statistics.
     /// Early stopping is applied if the validation loss does not improve for a consecutive number of epochs.
+    /// 
+    /// A Cosine Annealing with Warmup learning rate scheduler is used to adjust the learning rate during training. The initial warmup period is set to 10% of the total training steps.
     ///
     /// # Arguments
     /// * `training_data` - Vector of peptide records used for training.
@@ -475,14 +477,12 @@ pub trait ModelInterface: Send + Sync + ModelClone {
     /// * `learning_rate` - Learning rate for the AdamW optimizer.
     /// * `epochs` - Maximum number of training epochs.
     /// * `early_stopping_patience` - Number of epochs to wait before stopping if validation loss does not improve.
+    /// * `context` - A string representing the context for logging, e.g., "training" or "fine-tuning".
+    /// * `save_checkpoints` - Flag to save model checkpoints during training.
+    /// * `track_metrics` - Flag to track training and validation metrics.
     ///
     /// # Returns
-    /// A `Vec` of tuples where each tuple contains:
-    /// * `epoch` - Epoch number.
-    /// * `avg_train_loss` - Average training loss for the epoch.
-    /// * `avg_val_loss` - Optional average validation loss for the epoch.
-    /// * `train_std` - Standard deviation of training loss across batches.
-    /// * `val_std` - Optional standard deviation of validation loss across batches.
+    /// [`TrainingStepMetrics`] - A struct containing training and validation loss statistics, learning rates, and other metrics.
     fn train(
         &mut self,
         training_data: &Vec<PeptideData>,
@@ -496,13 +496,17 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         learning_rate: f64,
         epochs: usize,
         early_stopping_patience: usize,
+        context: &str, 
+        save_checkpoints: bool,
+        track_metrics: bool,
     ) -> Result<TrainingStepMetrics> {
         let num_batches = (training_data.len() + batch_size - 1) / batch_size;
         let total_steps = num_batches * epochs;
-        let warmup_steps = total_steps / 10; // 10% of total steps
+        let warmup_steps = total_steps / 10; 
 
         info!(
-            "Training {} model from on {} peptide features ({} batches) for {} epochs",
+            "{} {} model on {} peptide features ({} batches) for {} epochs",
+            context,
             self.get_model_arch(),
             training_data.len(),
             num_batches,
@@ -540,7 +544,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         let mut epoch_losses = vec![];
 
         for epoch in 0..epochs {
-            let progress = Progress::new(num_batches, &format!("[training] Epoch {}: ", epoch));
+            let progress = Progress::new(num_batches, &format!("[{}] Epoch {}: ", context, epoch));
             let mut batch_losses = vec![];
 
             training_data.chunks(batch_size).enumerate().try_for_each(
@@ -571,21 +575,24 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                         _ => None,
                     };
 
-                    step_metrics.epochs.push(epoch);
-                    step_metrics.steps.push(step_idx);
-                    step_metrics
-                        .learning_rates
-                        .push(lr_scheduler.get_last_lr() as f64);
-                    step_metrics.losses.push(loss_val);
-                    step_metrics.phases.push(TrainingPhase::Train);
-                    step_metrics.accuracies.push(acc);
-                    step_metrics.precisions.push(None);
-                    step_metrics.recalls.push(None);
-                    step_idx += 1;
+                    if track_metrics{
+                        step_metrics.epochs.push(epoch);
+                        step_metrics.steps.push(step_idx);
+                        step_metrics
+                            .learning_rates
+                            .push(lr_scheduler.get_last_lr() as f64);
+                        step_metrics.losses.push(loss_val);
+                        step_metrics.phases.push(TrainingPhase::Train);
+                        step_metrics.accuracies.push(acc);
+                        step_metrics.precisions.push(None);
+                        step_metrics.recalls.push(None);
+                        step_idx += 1;
+                    }
+                    
 
                     progress.update_description(&format!(
-                        "[training] Epoch {}: Loss: {:.4}",
-                        epoch, loss_val
+                        "[{}] Epoch {}: Loss: {:.4}",
+                        context, epoch, loss_val
                     ));
                     progress.inc();
 
@@ -633,17 +640,19 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                     })
                     .collect::<Result<_>>()?;
 
-                for (val_loss, idx, lr, acc) in &val_results {
-                    step_metrics.epochs.push(epoch);
-                    step_metrics.steps.push(val_step_idx + idx);
-                    step_metrics.learning_rates.push(*lr);
-                    step_metrics.losses.push(*val_loss);
-                    step_metrics.phases.push(TrainingPhase::Validation);
-                    step_metrics.accuracies.push(*acc);
-                    step_metrics.precisions.push(None);
-                    step_metrics.recalls.push(None);
+                if track_metrics{
+                    for (val_loss, idx, lr, acc) in &val_results {
+                        step_metrics.epochs.push(epoch);
+                        step_metrics.steps.push(val_step_idx + idx);
+                        step_metrics.learning_rates.push(*lr);
+                        step_metrics.losses.push(*val_loss);
+                        step_metrics.phases.push(TrainingPhase::Validation);
+                        step_metrics.accuracies.push(*acc);
+                        step_metrics.precisions.push(None);
+                        step_metrics.recalls.push(None);
+                    }
+                    val_step_idx += val_results.len();
                 }
-                val_step_idx += val_results.len();
 
                 let val_losses: Vec<f32> =
                     val_results.iter().map(|(loss, _, _, _)| *loss).collect();
@@ -666,45 +675,18 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                 if avg_val_loss < best_val_loss {
                     best_val_loss = avg_val_loss;
                     epochs_without_improvement = 0;
-
-                    // Check if the prior checkpoint exists, if it does delete it
-                    let checkpoint_path = format!(
-                        "redeem_{}_best_val_ckpt_model_epoch_{}.safetensors",
-                        self.get_model_arch(),
-                        epoch - 1
-                    );
-                    if PathBuf::from(&checkpoint_path).exists() {
-                        std::fs::remove_file(&checkpoint_path)?;
+                    if save_checkpoints{
+                        self.save_epoch_checkpoint(epoch, "val")?;
                     }
-
-                    let checkpoint_path = format!(
-                        "redeem_{}_best_val_ckpt_model_epoch_{}.safetensors",
-                        self.get_model_arch(),
-                        epoch
-                    );
-                    self.get_mut_varmap().save(&checkpoint_path)?;
                 } else {
                     epochs_without_improvement += 1;
                     if epochs_without_improvement >= early_stopping_patience {
                         info!("Early stopping triggered after {} epochs without validation loss improvement.", early_stopping_patience);
                         return Ok(step_metrics);
                     }
-                    let checkpoint_path = format!(
-                        "redeem_{}_ckpt_model_epoch_{}.safetensors",
-                        self.get_model_arch(),
-                        epoch - 1
-                    );
-                    // Check if the prior checkpoint exists, if it does delete it
-                    if PathBuf::from(&checkpoint_path).exists() {
-                        std::fs::remove_file(&checkpoint_path)?;
+                    if save_checkpoints{
+                        self.save_epoch_checkpoint(epoch, "train")?;
                     }
-                    // Save the current checkpoint
-                    let checkpoint_path = format!(
-                        "redeem_{}_ckpt_model_epoch_{}.safetensors",
-                        self.get_model_arch(),
-                        epoch
-                    );
-                    self.get_mut_varmap().save(&checkpoint_path)?;
                 }
             } else {
                 epoch_losses.push((epoch, avg_loss, None, std_loss, None));
@@ -713,37 +695,17 @@ pub trait ModelInterface: Send + Sync + ModelClone {
                     epoch, avg_loss, std_loss
                 ));
                 progress.finish();
-
-                let checkpoint_path = format!(
-                    "redeem_{}_ckpt_model_epoch_{}.safetensors",
-                    self.get_model_arch(),
-                    epoch - 1
-                );
-                // Check if the prior checkpoint exists, if it does delete it
-                if PathBuf::from(&checkpoint_path).exists() {
-                    std::fs::remove_file(&checkpoint_path)?;
+                if save_checkpoints{
+                    self.save_epoch_checkpoint(epoch, "train")?;
                 }
-                // Save the current checkpoint
-                let checkpoint_path = format!(
-                    "redeem_{}_ckpt_model_epoch_{}.safetensors",
-                    self.get_model_arch(),
-                    epoch
-                );
-                self.get_mut_varmap().save(&checkpoint_path)?;
             }
         }
 
         Ok(step_metrics)
     }
 
-    /// Fine-tune the model on a batch of training data.
-    ///
-    /// # Arguments
-    /// * `training_data` - A vector of `PeptideData` instances representing the training data.
-    /// * `modifications` - A map of modifications and their corresponding feature vectors.
-    /// * `batch_size` - The batch size to use for training.
-    /// * `learning_rate` - The learning rate to use for training.
-    /// * `epochs` - The number of epochs to train for.
+    /// Fine-tune the model on new data using the main [`ModelInterface::train`] method.
+    /// This is a wrapper that disables validation and early stopping.
     fn fine_tune(
         &mut self,
         training_data: &Vec<PeptideData>,
@@ -755,160 +717,21 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         learning_rate: f64,
         epochs: usize,
     ) -> Result<()> {
-        // let num_batches = if training_data.len() < batch_size {
-        //     1
-        // } else {
-        //     let full_batches = training_data.len() / batch_size;
-        //     if training_data.len() % batch_size > 0 {
-        //         full_batches + 1
-        //     } else {
-        //         full_batches
-        //     }
-        // };
-
-        // info!(
-        //     "Fine-tuning {} model on {} peptide features ({} batches) for {} epochs",
-        //     self.get_model_arch(),
-        //     training_data.len(),
-        //     num_batches,
-        //     epochs
-        // );
-
-        // let params = candle_nn::ParamsAdamW {
-        //     lr: learning_rate,
-        //     ..Default::default()
-        // };
-        // let mut opt = candle_nn::AdamW::new(self.get_mut_varmap().all_vars(), params)?;
-
-        // for epoch in 0..epochs {
-        //     let progress = Progress::new(num_batches, &format!("[fine-tuning] Epoch {}: ", epoch));
-        //     let mut total_loss = 0.0;
-
-        //     for batch_idx in 0..num_batches {
-        //         let start = batch_idx * batch_size;
-        //         let end = (start + batch_size).min(training_data.len());
-        //         let batch_data = &training_data[start..end];
-
-        //         let peptides: Vec<String> = batch_data
-        //             .iter()
-        //             .map(|p| remove_mass_shift(&p.sequence))
-        //             .collect();
-        //         let mods: Vec<String> = batch_data
-        //             .iter()
-        //             .map(|p| get_modification_string(&p.sequence, &modifications))
-        //             .collect();
-        //         let mod_sites: Vec<String> = batch_data
-        //             .iter()
-        //             .map(|p| get_modification_indices(&p.sequence))
-        //             .collect();
-
-        //         let charges = batch_data
-        //             .iter()
-        //             .filter_map(|p| p.charge)
-        //             .collect::<Vec<_>>();
-        //         let charges = if charges.len() == batch_data.len() {
-        //             Some(charges)
-        //         } else {
-        //             None
-        //         };
-
-        //         let nces = batch_data.iter().filter_map(|p| p.nce).collect::<Vec<_>>();
-        //         let nces = if nces.len() == batch_data.len() {
-        //             Some(nces)
-        //         } else {
-        //             None
-        //         };
-
-        //         let instruments = batch_data
-        //             .iter()
-        //             .filter_map(|p| p.instrument.clone())
-        //             .collect::<Vec<_>>();
-        //         let instruments = if instruments.len() == batch_data.len() {
-        //             Some(instruments)
-        //         } else {
-        //             None
-        //         };
-
-        //         let input_batch = self
-        //             .encode_peptides(&peptides, &mods, &mod_sites, charges, nces, instruments)?
-        //             .to_device(self.get_device())?;
-
-        //         log::trace!(
-        //             "[ModelInterface::fine_tune] input_batch shape: {:?}, device: {:?}",
-        //             input_batch.shape(),
-        //             input_batch.device()
-        //         );
-
-        //         let batch_targets = match self.property_type() {
-        //             PropertyType::RT => PredictionResult::RTResult(
-        //                 batch_data
-        //                     .iter()
-        //                     .map(|p| p.retention_time.unwrap_or_default())
-        //                     .collect(),
-        //             ),
-        //             PropertyType::CCS => PredictionResult::CCSResult(
-        //                 batch_data
-        //                     .iter()
-        //                     .map(|p| p.ion_mobility.unwrap_or_default())
-        //                     .collect(),
-        //             ),
-        //             PropertyType::MS2 => PredictionResult::MS2Result(
-        //                 batch_data
-        //                     .iter()
-        //                     .map(|p| p.ms2_intensities.clone().unwrap_or_default())
-        //                     .collect(),
-        //             ),
-        //         };
-
-        //         let target_batch = match batch_targets {
-        //             PredictionResult::RTResult(ref values)
-        //             | PredictionResult::CCSResult(ref values) => {
-        //                 Tensor::new(values.clone(), &self.get_device())?
-        //             }
-        //             PredictionResult::MS2Result(ref spectra) => {
-        //                 let max_len = spectra.iter().map(|s| s.len()).max().unwrap_or(1);
-        //                 let feature_dim = spectra
-        //                     .get(0)
-        //                     .and_then(|s| s.get(0))
-        //                     .map(|v| v.len())
-        //                     .unwrap_or(1);
-        //                 let mut padded_spectra = spectra.clone();
-        //                 for s in &mut padded_spectra {
-        //                     s.resize(max_len, vec![0.0; feature_dim]);
-        //                 }
-        //                 Tensor::new(padded_spectra.concat(), &self.get_device())?.reshape((
-        //                     batch_data.len(),
-        //                     max_len,
-        //                     feature_dim,
-        //                 ))?
-        //             }
-        //         }
-        //         .to_device(self.get_device())?;
-
-        //         let predicted = self.forward(&input_batch)?;
-        //         let loss = candle_nn::loss::mse(&predicted, &target_batch)?;
-        //         opt.backward_step(&loss)?;
-
-        //         total_loss += loss.to_vec0::<f32>().unwrap_or(990.0);
-
-        //         progress.update_description(&format!(
-        //             "[fine-tuning] Epoch {}: Loss: {}",
-        //             epoch,
-        //             loss.to_vec0::<f32>()?
-        //         ));
-        //         progress.inc();
-        //     }
-
-        //     let avg_loss = total_loss / num_batches as f32;
-        //     progress.update_description(&format!(
-        //         "[fine-tuning] Epoch {}: Avg. Batch Loss: {}",
-        //         epoch, avg_loss
-        //     ));
-        //     progress.finish();
-        // }
-
-        // Ok(())
-        todo!()
+        let _metrics = self.train(
+            training_data,
+            None, // No validation data
+            modifications,
+            batch_size,
+            batch_size, // Validation batch size is same but unused
+            learning_rate,
+            epochs,
+            usize::MAX, // Disable early stopping
+            "fine-tuning",
+            false, // No checkpoints
+            false, // No metrics
+        )?;
+
+        Ok(())
     }
 
     /// Perform inference over a batch of peptides.
@@ -1055,23 +878,47 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             .encode_peptides(naked_sequences, mods, mod_sites, charges, nces, instruments)?
             .to_device(self.get_device())?;
 
-        let target_values: Vec<f32> = match self.property_type() {
-            PropertyType::RT => batch
-                .retention_times
-                .iter()
-                .map(|v| v.unwrap_or(0.0))
-                .collect(),
-            PropertyType::CCS => batch
-                .ccs
-                .iter()
-                .map(|v| v.unwrap_or(0.0))
-                .collect(),
+        let target_tensor = match self.property_type() {
+            PropertyType::RT => {
+                let target_values: Vec<f32> = batch
+                    .retention_times
+                    .iter()
+                    .map(|v| v.unwrap_or(0.0))
+                    .collect();
+                Tensor::new(target_values, &self.get_device())?
+            }
+            PropertyType::CCS => {
+                let target_values: Vec<f32> = batch
+                    .ccs
+                    .iter()
+                    .map(|v| v.unwrap_or(0.0))
+                    .collect();
+                Tensor::new(target_values, &self.get_device())?
+            }
             PropertyType::MS2 => {
-                return Err(anyhow::anyhow!("MS2 training is not yet implemented"))
+                let mut targets = Vec::new();
+                for (i, opt_peptide) in batch.ms2_intensities.iter().enumerate() {
+                    let peptide = opt_peptide.as_ref().ok_or_else(|| {
+                        anyhow::anyhow!("Missing MS2 intensities for peptide at index {i}")
+                    })?;
+                    for row in peptide {
+                        for val in row {
+                            targets.push(*val);
+                        }
+                    }
+                }
+                let shape = (
+                    batch.ms2_intensities.len(),
+                    batch.ms2_intensities[0]
+                        .as_ref()
+                        .ok_or_else(|| anyhow::anyhow!("Missing MS2 intensities in batch"))?
+                        .len(),
+                    8,
+                );
+                Tensor::from_vec(targets, shape, &self.get_device())?
             }
         };
 
-        let target_tensor = Tensor::new(target_values, &self.get_device())?;
         Ok((input_batch, target_tensor))
     }
 
@@ -1111,6 +958,35 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         Ok(())
     }
 
+    /// Save epoch checkpoint and delete prior checkpoint
+    fn save_epoch_checkpoint(&mut self, epoch: usize, ctx: &str) -> Result<()> {
+        let insert_ctx = match ctx {
+            "train" => "_",
+            "val" => "_best_val_",
+            _ => panic!("Invalid context for saving checkpoint. Must be 'train' or 'val'."),
+        };
+
+        // Check if the prior checkpoint exists, if it does delete it
+        let checkpoint_path = format!(
+            "redeem_{}{}ckpt_model_epoch_{}.safetensors",
+            self.get_model_arch(),
+            insert_ctx,
+            epoch - 1
+        );
+        if PathBuf::from(&checkpoint_path).exists() {
+            std::fs::remove_file(&checkpoint_path)?;
+        }
+        // Save the current checkpoint
+        let checkpoint_path = format!(
+            "redeem_{}{}ckpt_model_epoch_{}.safetensors",
+            self.get_model_arch(),
+            insert_ctx,
+            epoch
+        );
+        self.get_mut_varmap().save(&checkpoint_path)?;
+        Ok(())
+    }
+
     fn apply_min_pred_value(&self, tensor: &Tensor, min_pred_value: f32) -> Result<Tensor> {
         // Create a tensor with the same shape as the input, filled with min_pred_value
         let min_tensor = Tensor::full(min_pred_value, tensor.shape(), tensor.device())?;
diff --git a/crates/redeem-properties/src/models/rt_model.rs b/crates/redeem-properties/src/models/rt_model.rs
index 7b2a166..8143b92 100644
--- a/crates/redeem-properties/src/models/rt_model.rs
+++ b/crates/redeem-properties/src/models/rt_model.rs
@@ -98,6 +98,9 @@ impl RTModelWrapper {
             learning_rate,
             epochs,
             early_stopping_patience,
+            "training",
+            true,
+            true,
         )
     }
 

From 7ae4aa87ed5b19bdf333b4b2bffd345595893ab6 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Thu, 15 May 2025 01:42:52 -0400
Subject: [PATCH 60/75] refactor: Update AAEmbedding constructor signature to
 accept VarBuilder instead of Device

---
 .../redeem-properties/src/building_blocks/building_blocks.rs | 5 +----
 crates/redeem-properties/src/models/ms2_bert_model.rs        | 2 +-
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/crates/redeem-properties/src/building_blocks/building_blocks.rs b/crates/redeem-properties/src/building_blocks/building_blocks.rs
index 2e758fd..d99c914 100644
--- a/crates/redeem-properties/src/building_blocks/building_blocks.rs
+++ b/crates/redeem-properties/src/building_blocks/building_blocks.rs
@@ -97,10 +97,7 @@ struct AAEmbedding {
 }
 
 impl AAEmbedding {
-    fn new(hidden_size: usize, device: &Device) -> Result<Self> {
-        // Create a VarBuilder
-        let vb = nn::VarBuilder::zeros(DType::F32, device);
-
+    fn new(hidden_size: usize, vb: &nn::VarBuilder) -> Result<Self> {
         // Create the embedding layer
         let embeddings = nn::embedding(AA_EMBEDDING_SIZE, hidden_size, vb.pp("embedding"))?;
 
diff --git a/crates/redeem-properties/src/models/ms2_bert_model.rs b/crates/redeem-properties/src/models/ms2_bert_model.rs
index 9bef37c..85a9ef6 100644
--- a/crates/redeem-properties/src/models/ms2_bert_model.rs
+++ b/crates/redeem-properties/src/models/ms2_bert_model.rs
@@ -228,7 +228,7 @@ impl ModelInterface for MS2BertModel {
         // Forward pass through input_nn with dropout
         let in_x = self
             .dropout
-            .forward(&self.input_nn.forward(&aa_indices_out, &mod_x_out)?, true)?;
+            .forward(&self.input_nn.forward(&aa_indices_out, &mod_x_out)?, self.is_training)?;
 
         log::trace!(
             "[MS2BertModel::forward] in_x shape (post dropout-input_nn): {:?}, device: {:?}",

From c1d18d8c9f4d5e0d2267ec3f1eb4f04b7b556ade Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Thu, 15 May 2025 08:39:48 -0400
Subject: [PATCH 61/75] refactor: Update semi-supervised learning to return
 updated  ranks along with predictions

---
 .../examples/gbdt_semi_supervised_learning.rs  |  2 +-
 crates/redeem-classifiers/src/data_handling.rs | 18 ++++++++++++++++++
 crates/redeem-classifiers/src/psm_scorer.rs    | 12 +++++++-----
 3 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/crates/redeem-classifiers/examples/gbdt_semi_supervised_learning.rs b/crates/redeem-classifiers/examples/gbdt_semi_supervised_learning.rs
index e0debf9..c7253c7 100644
--- a/crates/redeem-classifiers/examples/gbdt_semi_supervised_learning.rs
+++ b/crates/redeem-classifiers/examples/gbdt_semi_supervised_learning.rs
@@ -122,7 +122,7 @@ fn main() -> Result<()> {
         3,
         Some((0.15, 1.0))
     );
-    let predictions = learner.fit(x, y.clone(), metadata);
+    let (predictions, _ranks) = learner.fit(x, y.clone(), metadata)?;
 
     println!("Labels: {:?}", y);
 
diff --git a/crates/redeem-classifiers/src/data_handling.rs b/crates/redeem-classifiers/src/data_handling.rs
index b28e9a5..354b6c8 100644
--- a/crates/redeem-classifiers/src/data_handling.rs
+++ b/crates/redeem-classifiers/src/data_handling.rs
@@ -143,6 +143,24 @@ impl Experiment {
         );
     }
 
+    /// Extracts the "rank" feature column as a 1D array.
+    ///
+    /// # Returns
+    /// * `Ok(Array1<f32>)` containing the rank values (one per row in `x`)
+    /// * `Err` if "rank" is not found in the feature names
+    pub fn get_rank_column(&self) -> anyhow::Result<Array1<f32>> {
+        let Some(rank_idx) = self
+            .psm_metadata
+            .feature_names
+            .iter()
+            .position(|name| name == "rank")
+        else {
+            anyhow::bail!("'rank' feature not found in feature_names");
+        };
+
+        Ok(self.x.column(rank_idx).to_owned())
+    }
+
 
     pub fn get_top_test_peaks(&self) -> Experiment {
         let mask = &self.is_train.mapv(|x| !x) & &self.is_top_peak;
diff --git a/crates/redeem-classifiers/src/psm_scorer.rs b/crates/redeem-classifiers/src/psm_scorer.rs
index f4bd9ba..04c7626 100644
--- a/crates/redeem-classifiers/src/psm_scorer.rs
+++ b/crates/redeem-classifiers/src/psm_scorer.rs
@@ -321,7 +321,7 @@ impl SemiSupervisedLearner {
     /// # Returns
     ///
     /// The predictions for the input features
-    pub fn fit(&mut self, x: Array2<f32>, y: Array1<i32>, psm_metadata: PsmMetadata) -> Array1<f32> {
+    pub fn fit(&mut self, x: Array2<f32>, y: Array1<i32>, psm_metadata: PsmMetadata) -> anyhow::Result<(Array1<f32>, Array1<f32>)> {
 
         let mut experiment = Experiment::new(x.clone(), y.clone(), psm_metadata.clone());
 
@@ -331,9 +331,7 @@ impl SemiSupervisedLearner {
         let (_best_feat, _best_positives, mut new_labels, best_desc, _best_feature_scores) =
             self.init_best_feature(&experiment, self.train_fdr);
 
-        // println!("Original labels: {:?}", experiment.y);
         experiment.y = new_labels.clone();
-        // println!("New labels: {:?}", experiment.y);
 
         let folds = self.create_folds(&experiment, self.xeval_num_iter, self.class_pct.map(|(t, _d)| t), self.class_pct.map(|(_t, d)| d));
 
@@ -380,11 +378,15 @@ impl SemiSupervisedLearner {
 
         // Final prediction on the entire dataset
         log::info!("Final prediction on the entire dataset");
-        let experiment = Experiment::new(x, y, psm_metadata);
+        let mut experiment = Experiment::new(x, y, psm_metadata);
 
         // self.model
         //     .fit(&experiment.x, &experiment.y.to_vec(), None, None);
-        Array1::from(self.model.predict_proba(&experiment.x))
+        let final_predictions = Array1::from(self.model.predict_proba(&experiment.x));
+        experiment.update_rank_feature(&final_predictions, &experiment.psm_metadata.clone());
+        let updated_ranks = experiment.get_rank_column()?; 
+
+        Ok((final_predictions, updated_ranks))
     }
 }
 

From 60886782074a67b23c3ffe730b24d0ecb9148cf3 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Thu, 15 May 2025 08:40:00 -0400
Subject: [PATCH 62/75] refactor: Update Redeem CLI to use RTCNNTFModel for
 inference

---
 crates/redeem-cli/src/properties/inference/inference.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/crates/redeem-cli/src/properties/inference/inference.rs b/crates/redeem-cli/src/properties/inference/inference.rs
index b171f77..7445b72 100644
--- a/crates/redeem-cli/src/properties/inference/inference.rs
+++ b/crates/redeem-cli/src/properties/inference/inference.rs
@@ -4,6 +4,7 @@ use redeem_properties::models::ccs_cnn_lstm_model::CCSCNNLSTMModel;
 use redeem_properties::models::ccs_cnn_tf_model::CCSCNNTFModel;
 use redeem_properties::models::model_interface::ModelInterface;
 use redeem_properties::models::rt_cnn_lstm_model::RTCNNLSTMModel;
+use redeem_properties::models::rt_cnn_transformer_model::RTCNNTFModel;
 use redeem_properties::utils::data_handling::{PeptideData, TargetNormalization};
 use redeem_properties::utils::peptdeep_utils::{load_modifications, MODIFICATION_MAP};
 use redeem_properties::utils::utils::get_device;
@@ -46,7 +47,7 @@ pub fn run_inference(config: &PropertyInferenceConfig) -> Result<()> {
             true,
             device.clone(),
         )?),
-        "rt_cnn_tf" => Box::new(RTCNNLSTMModel::new(
+        "rt_cnn_tf" => Box::new(RTCNNTFModel::new(
             &config.model_path,
             None,
             0,

From 2fbe12ce1bf368d757243284c8aff56376201416 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Thu, 15 May 2025 10:03:34 -0400
Subject: [PATCH 63/75] refactor: Update data handling to extract "rank"
 feature column as 1D array of `u32`s

---
 .../redeem-classifiers/src/data_handling.rs   | 21 +++++++++++++++----
 crates/redeem-classifiers/src/psm_scorer.rs   |  2 +-
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/crates/redeem-classifiers/src/data_handling.rs b/crates/redeem-classifiers/src/data_handling.rs
index 354b6c8..f94de6b 100644
--- a/crates/redeem-classifiers/src/data_handling.rs
+++ b/crates/redeem-classifiers/src/data_handling.rs
@@ -143,12 +143,12 @@ impl Experiment {
         );
     }
 
-    /// Extracts the "rank" feature column as a 1D array.
+    /// Extracts the "rank" feature column as a 1D array of `u32`s.
     ///
     /// # Returns
-    /// * `Ok(Array1<f32>)` containing the rank values (one per row in `x`)
+    /// * `Ok(Array1<u32>)` containing the rank values (one per row in `x`)
     /// * `Err` if "rank" is not found in the feature names
-    pub fn get_rank_column(&self) -> anyhow::Result<Array1<f32>> {
+    pub fn get_rank_column(&self) -> anyhow::Result<Array1<u32>> {
         let Some(rank_idx) = self
             .psm_metadata
             .feature_names
@@ -158,7 +158,20 @@ impl Experiment {
             anyhow::bail!("'rank' feature not found in feature_names");
         };
 
-        Ok(self.x.column(rank_idx).to_owned())
+        let rank_f32 = self.x.column(rank_idx);
+
+        let rank_u32 = rank_f32
+            .iter()
+            .map(|&val| {
+                if val.is_finite() && val >= 0.0 {
+                    val.round() as u32
+                } else {
+                    0 // fallback: treat NaNs or negatives as rank 0 (could also bail or panic if preferred)
+                }
+            })
+            .collect::<Array1<u32>>();
+
+        Ok(rank_u32)
     }
 
 
diff --git a/crates/redeem-classifiers/src/psm_scorer.rs b/crates/redeem-classifiers/src/psm_scorer.rs
index 04c7626..2280a23 100644
--- a/crates/redeem-classifiers/src/psm_scorer.rs
+++ b/crates/redeem-classifiers/src/psm_scorer.rs
@@ -321,7 +321,7 @@ impl SemiSupervisedLearner {
     /// # Returns
     ///
     /// The predictions for the input features
-    pub fn fit(&mut self, x: Array2<f32>, y: Array1<i32>, psm_metadata: PsmMetadata) -> anyhow::Result<(Array1<f32>, Array1<f32>)> {
+    pub fn fit(&mut self, x: Array2<f32>, y: Array1<i32>, psm_metadata: PsmMetadata) -> anyhow::Result<(Array1<f32>, Array1<u32>)> {
 
         let mut experiment = Experiment::new(x.clone(), y.clone(), psm_metadata.clone());
 

From fd447f3d364dce02e1b512873175377572032cec Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Thu, 15 May 2025 18:00:40 -0400
Subject: [PATCH 64/75] refactor: Improve bidirectional LSTM input handling for
 contiguous tensors

---
 crates/redeem-properties/src/building_blocks/bilstm.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/crates/redeem-properties/src/building_blocks/bilstm.rs b/crates/redeem-properties/src/building_blocks/bilstm.rs
index 43d4e1a..dca552a 100644
--- a/crates/redeem-properties/src/building_blocks/bilstm.rs
+++ b/crates/redeem-properties/src/building_blocks/bilstm.rs
@@ -110,8 +110,11 @@ impl BidirectionalLSTM {
         let h0_2 = h0.narrow(0, 2, 2)?;
         let c0_2 = c0.narrow(0, 2, 2)?;
 
-        let (out1, (hn1, cn1)) = self.apply_bidirectional_layer(xs, &self.forward_lstm1, &self.backward_lstm1, &h0_1, &c0_1)?;
+        let xs = xs.contiguous()?;
 
+        let (out1, (hn1, cn1)) = self.apply_bidirectional_layer(&xs, &self.forward_lstm1, &self.backward_lstm1, &h0_1, &c0_1)?;
+
+        let out1 = out1.contiguous()?;
         let (out2, (hn2, cn2)) = self.apply_bidirectional_layer(&out1, &self.forward_lstm2, &self.backward_lstm2, &h0_2, &c0_2)?;
 
         let hn = Tensor::cat(&[hn1, hn2], 0)?;

From 2e3bd90ba4c1e530b93905b7955af8be8325ae97 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Thu, 15 May 2025 18:45:20 -0400
Subject: [PATCH 65/75] refactor: Improve bidirectional LSTM input handling for
 contiguous tensors

---
 crates/redeem-properties/src/building_blocks/bilstm.rs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/crates/redeem-properties/src/building_blocks/bilstm.rs b/crates/redeem-properties/src/building_blocks/bilstm.rs
index dca552a..bfb11f7 100644
--- a/crates/redeem-properties/src/building_blocks/bilstm.rs
+++ b/crates/redeem-properties/src/building_blocks/bilstm.rs
@@ -58,8 +58,9 @@ impl BidirectionalLSTM {
         let h0_forward = h0.i(0)?;
         let c0_forward = c0.i(0)?;
         let state_fw = rnn::LSTMState { h: h0_forward, c: c0_forward };
-    
-        let out_fw_states = lstm_forward.seq_init(input, &state_fw)?;
+
+        let input = input.contiguous()?;
+        let out_fw_states = lstm_forward.seq_init(&input, &state_fw)?;
         let out_fw = Tensor::stack(
             &out_fw_states.iter().map(|s| s.h()).collect::<Vec<_>>(),
             1,
@@ -74,7 +75,7 @@ impl BidirectionalLSTM {
                 .map(|t| input.i((.., t..=t, ..)))
                 .collect::<Result<Vec<_>>>()?,
             1,
-        )?;
+        )?.contiguous()?;
             
         // Initial states for backward
         let h0_backward = h0.i(1)?;

From 930f21a5b119e91b209e44d2c54e3d1f8a7d9044 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Thu, 15 May 2025 19:05:22 -0400
Subject: [PATCH 66/75] refactor: Improve initialization of hidden states in
 BidirectionalLSTM

---
 crates/redeem-properties/src/building_blocks/bilstm.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/crates/redeem-properties/src/building_blocks/bilstm.rs b/crates/redeem-properties/src/building_blocks/bilstm.rs
index bfb11f7..c39235f 100644
--- a/crates/redeem-properties/src/building_blocks/bilstm.rs
+++ b/crates/redeem-properties/src/building_blocks/bilstm.rs
@@ -103,8 +103,9 @@ impl BidirectionalLSTM {
     /// Forward with hidden states returned
     pub fn forward_with_state(&self, xs: &Tensor) -> Result<(Tensor, (Tensor, Tensor))> {
         let (batch_size, _, _) = xs.dims3()?;
-        let h0 = self.h0.expand((self.num_layers * 2, batch_size, self.hidden_size))?;
-        let c0 = self.c0.expand((self.num_layers * 2, batch_size, self.hidden_size))?;
+        let h0 = self.h0.unsqueeze(1)?.repeat((1, batch_size, 1))?;
+        let c0 = self.c0.unsqueeze(1)?.repeat((1, batch_size, 1))?;
+
 
         let h0_1 = h0.narrow(0, 0, 2)?;
         let c0_1 = c0.narrow(0, 0, 2)?;

From d8a32c54d9d4c64bf42fa033c587a00b5e351b5f Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Thu, 15 May 2025 19:25:41 -0400
Subject: [PATCH 67/75] debug: bilstm forward with state

---
 .../src/building_blocks/bilstm.rs             | 51 +++++++++++--------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/crates/redeem-properties/src/building_blocks/bilstm.rs b/crates/redeem-properties/src/building_blocks/bilstm.rs
index c39235f..d4c52ad 100644
--- a/crates/redeem-properties/src/building_blocks/bilstm.rs
+++ b/crates/redeem-properties/src/building_blocks/bilstm.rs
@@ -54,17 +54,20 @@ impl BidirectionalLSTM {
     ) -> Result<(Tensor, (Tensor, Tensor))> {
         let (_batch_size, seq_len, _input_size) = input.dims3()?;
     
+        log::debug!("Entering apply_bidirectional_layer");
+    
         // Initial states for forward
         let h0_forward = h0.i(0)?;
         let c0_forward = c0.i(0)?;
-        let state_fw = rnn::LSTMState { h: h0_forward, c: c0_forward };
-
+        let state_fw = rnn::LSTMState { h: h0_forward.clone(), c: c0_forward.clone() };
+    
         let input = input.contiguous()?;
+        log::debug!("Forward input shape: {:?}, is_contiguous: {}", input.shape(), input.is_contiguous());
+    
         let out_fw_states = lstm_forward.seq_init(&input, &state_fw)?;
-        let out_fw = Tensor::stack(
-            &out_fw_states.iter().map(|s| s.h()).collect::<Vec<_>>(),
-            1,
-        )?;
+        let out_fw = Tensor::stack(&out_fw_states.iter().map(|s| s.h()).collect::<Vec<_>>(), 1)?;
+        log::debug!("out_fw shape: {:?}, is_contiguous: {}", out_fw.shape(), out_fw.is_contiguous());
+    
         let last_fw_h = out_fw_states.last().unwrap().h().clone();
         let last_fw_c = out_fw_states.last().unwrap().c().clone();
     
@@ -76,17 +79,17 @@ impl BidirectionalLSTM {
                 .collect::<Result<Vec<_>>>()?,
             1,
         )?.contiguous()?;
-            
+        log::debug!("Backward input_reversed shape: {:?}, is_contiguous: {}", input_reversed.shape(), input_reversed.is_contiguous());
+    
         // Initial states for backward
         let h0_backward = h0.i(1)?;
         let c0_backward = c0.i(1)?;
-        let state_bw = rnn::LSTMState { h: h0_backward, c: c0_backward };
+        let state_bw = rnn::LSTMState { h: h0_backward.clone(), c: c0_backward.clone() };
     
         let out_bw_states = lstm_backward.seq_init(&input_reversed, &state_bw)?;
-        let out_bw = Tensor::stack(
-            &out_bw_states.iter().map(|s| s.h()).collect::<Vec<_>>(),
-            1,
-        )?;
+        let out_bw = Tensor::stack(&out_bw_states.iter().map(|s| s.h()).collect::<Vec<_>>(), 1)?;
+        log::debug!("out_bw shape: {:?}, is_contiguous: {}", out_bw.shape(), out_bw.is_contiguous());
+    
         let last_bw_h = out_bw_states.last().unwrap().h().clone();
         let last_bw_c = out_bw_states.last().unwrap().c().clone();
     
@@ -94,31 +97,35 @@ impl BidirectionalLSTM {
         let hn = Tensor::stack(&[last_fw_h.clone(), last_bw_h.clone()], 0)?;
         let cn = Tensor::stack(&[last_fw_c, last_bw_c], 0)?;
         let output = Tensor::cat(&[out_fw, out_bw], 2)?;
+        log::debug!("Combined output shape: {:?}, is_contiguous: {}", output.shape(), output.is_contiguous());
     
         Ok((output, (hn, cn)))
     }
-    
-    
+       
 
     /// Forward with hidden states returned
     pub fn forward_with_state(&self, xs: &Tensor) -> Result<(Tensor, (Tensor, Tensor))> {
+        log::debug!("Input xs shape: {:?}, is_contiguous: {}", xs.shape(), xs.is_contiguous());
+    
         let (batch_size, _, _) = xs.dims3()?;
-        let h0 = self.h0.unsqueeze(1)?.repeat((1, batch_size, 1))?;
-        let c0 = self.c0.unsqueeze(1)?.repeat((1, batch_size, 1))?;
-
-
+        let h0 = self.h0.expand((self.num_layers * 2, batch_size, self.hidden_size))?;
+        let c0 = self.c0.expand((self.num_layers * 2, batch_size, self.hidden_size))?;
+    
         let h0_1 = h0.narrow(0, 0, 2)?;
         let c0_1 = c0.narrow(0, 0, 2)?;
         let h0_2 = h0.narrow(0, 2, 2)?;
         let c0_2 = c0.narrow(0, 2, 2)?;
-
+    
         let xs = xs.contiguous()?;
-
+        log::debug!("xs after contiguous shape: {:?}, is_contiguous: {}", xs.shape(), xs.is_contiguous());
+    
         let (out1, (hn1, cn1)) = self.apply_bidirectional_layer(&xs, &self.forward_lstm1, &self.backward_lstm1, &h0_1, &c0_1)?;
-
+    
         let out1 = out1.contiguous()?;
+        log::debug!("out1 after first layer shape: {:?}, is_contiguous: {}", out1.shape(), out1.is_contiguous());
+    
         let (out2, (hn2, cn2)) = self.apply_bidirectional_layer(&out1, &self.forward_lstm2, &self.backward_lstm2, &h0_2, &c0_2)?;
-
+    
         let hn = Tensor::cat(&[hn1, hn2], 0)?;
         let cn = Tensor::cat(&[cn1, cn2], 0)?;
         Ok((out2, (hn, cn)))

From 1f779fdd0ef885bba52921b1cb5c3651dcd3ba0a Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Thu, 15 May 2025 19:41:16 -0400
Subject: [PATCH 68/75] refactor: Improve bidirectional LSTM forward and
 backward processing

---
 .../src/building_blocks/bilstm.rs             | 45 ++++++++-----------
 1 file changed, 19 insertions(+), 26 deletions(-)

diff --git a/crates/redeem-properties/src/building_blocks/bilstm.rs b/crates/redeem-properties/src/building_blocks/bilstm.rs
index d4c52ad..cf2eb71 100644
--- a/crates/redeem-properties/src/building_blocks/bilstm.rs
+++ b/crates/redeem-properties/src/building_blocks/bilstm.rs
@@ -56,51 +56,44 @@ impl BidirectionalLSTM {
     
         log::debug!("Entering apply_bidirectional_layer");
     
-        // Initial states for forward
+        // Forward
         let h0_forward = h0.i(0)?;
         let c0_forward = c0.i(0)?;
-        let state_fw = rnn::LSTMState { h: h0_forward.clone(), c: c0_forward.clone() };
+        let mut state_fw = rnn::LSTMState { h: h0_forward, c: c0_forward };
     
-        let input = input.contiguous()?;
-        log::debug!("Forward input shape: {:?}, is_contiguous: {}", input.shape(), input.is_contiguous());
-    
-        let out_fw_states = lstm_forward.seq_init(&input, &state_fw)?;
+        let mut out_fw_states = Vec::with_capacity(seq_len);
+        for t in 0..seq_len {
+            let xt = input.i((.., t..=t, ..))?.squeeze(1)?.contiguous()?;
+            state_fw = lstm_forward.step(&xt, &state_fw)?;
+            out_fw_states.push(state_fw.clone());
+        }
         let out_fw = Tensor::stack(&out_fw_states.iter().map(|s| s.h()).collect::<Vec<_>>(), 1)?;
-        log::debug!("out_fw shape: {:?}, is_contiguous: {}", out_fw.shape(), out_fw.is_contiguous());
-    
         let last_fw_h = out_fw_states.last().unwrap().h().clone();
         let last_fw_c = out_fw_states.last().unwrap().c().clone();
     
-        // Reverse sequence
-        let input_reversed = Tensor::cat(
-            &(0..seq_len)
-                .rev()
-                .map(|t| input.i((.., t..=t, ..)))
-                .collect::<Result<Vec<_>>>()?,
-            1,
-        )?.contiguous()?;
-        log::debug!("Backward input_reversed shape: {:?}, is_contiguous: {}", input_reversed.shape(), input_reversed.is_contiguous());
-    
-        // Initial states for backward
+        // Backward
         let h0_backward = h0.i(1)?;
         let c0_backward = c0.i(1)?;
-        let state_bw = rnn::LSTMState { h: h0_backward.clone(), c: c0_backward.clone() };
+        let mut state_bw = rnn::LSTMState { h: h0_backward, c: c0_backward };
     
-        let out_bw_states = lstm_backward.seq_init(&input_reversed, &state_bw)?;
+        let mut out_bw_states = Vec::with_capacity(seq_len);
+        for t in (0..seq_len).rev() {
+            let xt = input.i((.., t..=t, ..))?.squeeze(1)?.contiguous()?;
+            state_bw = lstm_backward.step(&xt, &state_bw)?;
+            out_bw_states.push(state_bw.clone());
+        }
+        out_bw_states.reverse();
         let out_bw = Tensor::stack(&out_bw_states.iter().map(|s| s.h()).collect::<Vec<_>>(), 1)?;
-        log::debug!("out_bw shape: {:?}, is_contiguous: {}", out_bw.shape(), out_bw.is_contiguous());
-    
         let last_bw_h = out_bw_states.last().unwrap().h().clone();
         let last_bw_c = out_bw_states.last().unwrap().c().clone();
     
-        // Combine hidden and cell states
-        let hn = Tensor::stack(&[last_fw_h.clone(), last_bw_h.clone()], 0)?;
+        let hn = Tensor::stack(&[last_fw_h, last_bw_h], 0)?;
         let cn = Tensor::stack(&[last_fw_c, last_bw_c], 0)?;
         let output = Tensor::cat(&[out_fw, out_bw], 2)?;
-        log::debug!("Combined output shape: {:?}, is_contiguous: {}", output.shape(), output.is_contiguous());
     
         Ok((output, (hn, cn)))
     }
+    
        
 
     /// Forward with hidden states returned

From 80469a0c33edbed5823067e468c5e0924401d68d Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Thu, 15 May 2025 19:55:07 -0400
Subject: [PATCH 69/75] debugging bilstm

---
 .../src/building_blocks/bilstm.rs             | 21 ++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/crates/redeem-properties/src/building_blocks/bilstm.rs b/crates/redeem-properties/src/building_blocks/bilstm.rs
index cf2eb71..a71fdb0 100644
--- a/crates/redeem-properties/src/building_blocks/bilstm.rs
+++ b/crates/redeem-properties/src/building_blocks/bilstm.rs
@@ -77,11 +77,17 @@ impl BidirectionalLSTM {
         let mut state_bw = rnn::LSTMState { h: h0_backward, c: c0_backward };
     
         let mut out_bw_states = Vec::with_capacity(seq_len);
-        for t in (0..seq_len).rev() {
+        for t in 0..seq_len {
             let xt = input.i((.., t..=t, ..))?.squeeze(1)?.contiguous()?;
-            state_bw = lstm_backward.step(&xt, &state_bw)?;
-            out_bw_states.push(state_bw.clone());
+        
+            log::debug!("[step][fw] xt shape: {:?}, strides: {:?}", xt.shape(), xt.stride());
+            log::debug!("[step][fw] h shape: {:?}, strides: {:?}", state_fw.h.shape(), state_fw.h.stride());
+            log::debug!("[step][fw] c shape: {:?}, strides: {:?}", state_fw.c.shape(), state_fw.c.stride());
+        
+            state_fw = lstm_forward.step(&xt, &state_fw)?;
+            out_fw_states.push(state_fw.clone());
         }
+        
         out_bw_states.reverse();
         let out_bw = Tensor::stack(&out_bw_states.iter().map(|s| s.h()).collect::<Vec<_>>(), 1)?;
         let last_bw_h = out_bw_states.last().unwrap().h().clone();
@@ -112,10 +118,19 @@ impl BidirectionalLSTM {
         let xs = xs.contiguous()?;
         log::debug!("xs after contiguous shape: {:?}, is_contiguous: {}", xs.shape(), xs.is_contiguous());
     
+        log::debug!("forward_with_state: xs shape = {:?}, strides = {:?}", xs.shape(), xs.stride());
+        log::debug!("h0_1 shape: {:?}, strides: {:?}", h0.shape(), h0.stride());
+        log::debug!("c0_1 shape: {:?}, strides: {:?}", c0.shape(), c0.stride());
+
         let (out1, (hn1, cn1)) = self.apply_bidirectional_layer(&xs, &self.forward_lstm1, &self.backward_lstm1, &h0_1, &c0_1)?;
     
         let out1 = out1.contiguous()?;
         log::debug!("out1 after first layer shape: {:?}, is_contiguous: {}", out1.shape(), out1.is_contiguous());
+
+        log::debug!("forward_with_state: out1 shape = {:?}, strides = {:?}", out1.shape(), out1.stride());
+        log::debug!("h0_2 shape: {:?}, strides: {:?}", h0.shape(), h0.stride());
+        log::debug!("c0_2 shape: {:?}, strides: {:?}", c0.shape(), c0.stride());
+
     
         let (out2, (hn2, cn2)) = self.apply_bidirectional_layer(&out1, &self.forward_lstm2, &self.backward_lstm2, &h0_2, &c0_2)?;
     

From 0c56fd27222c953fa1efc527c02152270181c51f Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Thu, 15 May 2025 21:18:08 -0400
Subject: [PATCH 70/75] add: type annotation for bilstm

---
 crates/redeem-properties/src/building_blocks/bilstm.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crates/redeem-properties/src/building_blocks/bilstm.rs b/crates/redeem-properties/src/building_blocks/bilstm.rs
index a71fdb0..dfeddb3 100644
--- a/crates/redeem-properties/src/building_blocks/bilstm.rs
+++ b/crates/redeem-properties/src/building_blocks/bilstm.rs
@@ -67,7 +67,7 @@ impl BidirectionalLSTM {
             state_fw = lstm_forward.step(&xt, &state_fw)?;
             out_fw_states.push(state_fw.clone());
         }
-        let out_fw = Tensor::stack(&out_fw_states.iter().map(|s| s.h()).collect::<Vec<_>>(), 1)?;
+        let out_fw = Tensor::stack(&out_fw_states.iter().map(|s: &rnn::LSTMState| s.h()).collect::<Vec<_>>(), 1)?;
         let last_fw_h = out_fw_states.last().unwrap().h().clone();
         let last_fw_c = out_fw_states.last().unwrap().c().clone();
     
@@ -89,7 +89,7 @@ impl BidirectionalLSTM {
         }
         
         out_bw_states.reverse();
-        let out_bw = Tensor::stack(&out_bw_states.iter().map(|s| s.h()).collect::<Vec<_>>(), 1)?;
+        let out_bw = Tensor::stack(&out_bw_states.iter().map(|s: &rnn::LSTMState| s.h()).collect::<Vec<_>>(), 1)?;
         let last_bw_h = out_bw_states.last().unwrap().h().clone();
         let last_bw_c = out_bw_states.last().unwrap().c().clone();
     

From 54af169d3fb014278856d84d0796583fed5cff42 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Thu, 15 May 2025 21:45:10 -0400
Subject: [PATCH 71/75] refactor: Clone contiguous tensor in BidirectionalLSTM
 for improved handling

---
 crates/redeem-properties/src/building_blocks/bilstm.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/crates/redeem-properties/src/building_blocks/bilstm.rs b/crates/redeem-properties/src/building_blocks/bilstm.rs
index dfeddb3..044750d 100644
--- a/crates/redeem-properties/src/building_blocks/bilstm.rs
+++ b/crates/redeem-properties/src/building_blocks/bilstm.rs
@@ -78,7 +78,9 @@ impl BidirectionalLSTM {
     
         let mut out_bw_states = Vec::with_capacity(seq_len);
         for t in 0..seq_len {
-            let xt = input.i((.., t..=t, ..))?.squeeze(1)?.contiguous()?;
+            let xt = input.i((.., t..=t, ..))?.squeeze(1)?.contiguous()?.clone();
+
+            log::debug!("xt shape: {:?}, strides: {:?}, is_contiguous: {}", xt.shape(), xt.stride(), xt.is_contiguous());
         
             log::debug!("[step][fw] xt shape: {:?}, strides: {:?}", xt.shape(), xt.stride());
             log::debug!("[step][fw] h shape: {:?}, strides: {:?}", state_fw.h.shape(), state_fw.h.stride());

From f89c0a810ad59b09fa0e9fb2d0cf839ed883de3f Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Thu, 15 May 2025 22:22:50 -0400
Subject: [PATCH 72/75] refactor: Improve logging in BidirectionalLSTM backward
 processing

---
 .../src/building_blocks/bilstm.rs               | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/crates/redeem-properties/src/building_blocks/bilstm.rs b/crates/redeem-properties/src/building_blocks/bilstm.rs
index 044750d..025816b 100644
--- a/crates/redeem-properties/src/building_blocks/bilstm.rs
+++ b/crates/redeem-properties/src/building_blocks/bilstm.rs
@@ -63,7 +63,14 @@ impl BidirectionalLSTM {
     
         let mut out_fw_states = Vec::with_capacity(seq_len);
         for t in 0..seq_len {
-            let xt = input.i((.., t..=t, ..))?.squeeze(1)?.contiguous()?;
+            let xt = input.i((.., t..=t, ..))?.squeeze(1)?.contiguous()?.clone();
+
+            log::debug!("[backward] xt shape: {:?}, strides: {:?}, is_contiguous: {}", xt.shape(), xt.stride(), xt.is_contiguous());
+        
+            log::debug!("[backward] [step][fw] xt shape: {:?}, strides: {:?}", xt.shape(), xt.stride());
+            log::debug!("[backward] [step][fw] h shape: {:?}, strides: {:?}", state_fw.h.shape(), state_fw.h.stride());
+            log::debug!("[backward] [step][fw] c shape: {:?}, strides: {:?}", state_fw.c.shape(), state_fw.c.stride());
+            
             state_fw = lstm_forward.step(&xt, &state_fw)?;
             out_fw_states.push(state_fw.clone());
         }
@@ -80,11 +87,11 @@ impl BidirectionalLSTM {
         for t in 0..seq_len {
             let xt = input.i((.., t..=t, ..))?.squeeze(1)?.contiguous()?.clone();
 
-            log::debug!("xt shape: {:?}, strides: {:?}, is_contiguous: {}", xt.shape(), xt.stride(), xt.is_contiguous());
+            log::debug!("[backward] xt shape: {:?}, strides: {:?}, is_contiguous: {}", xt.shape(), xt.stride(), xt.is_contiguous());
         
-            log::debug!("[step][fw] xt shape: {:?}, strides: {:?}", xt.shape(), xt.stride());
-            log::debug!("[step][fw] h shape: {:?}, strides: {:?}", state_fw.h.shape(), state_fw.h.stride());
-            log::debug!("[step][fw] c shape: {:?}, strides: {:?}", state_fw.c.shape(), state_fw.c.stride());
+            log::debug!("[backward] [step][fw] xt shape: {:?}, strides: {:?}", xt.shape(), xt.stride());
+            log::debug!("[backward] [step][fw] h shape: {:?}, strides: {:?}", state_fw.h.shape(), state_fw.h.stride());
+            log::debug!("[backward] [step][fw] c shape: {:?}, strides: {:?}", state_fw.c.shape(), state_fw.c.stride());
         
             state_fw = lstm_forward.step(&xt, &state_fw)?;
             out_fw_states.push(state_fw.clone());

From b89e9ddadd6c6b484c471d3b113372350d145bea Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Thu, 15 May 2025 22:47:29 -0400
Subject: [PATCH 73/75] more debugging for bilstm

---
 .../src/building_blocks/bilstm.rs             | 30 +++++++++----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/crates/redeem-properties/src/building_blocks/bilstm.rs b/crates/redeem-properties/src/building_blocks/bilstm.rs
index 025816b..3a6d19a 100644
--- a/crates/redeem-properties/src/building_blocks/bilstm.rs
+++ b/crates/redeem-properties/src/building_blocks/bilstm.rs
@@ -65,18 +65,18 @@ impl BidirectionalLSTM {
         for t in 0..seq_len {
             let xt = input.i((.., t..=t, ..))?.squeeze(1)?.contiguous()?.clone();
 
-            log::debug!("[backward] xt shape: {:?}, strides: {:?}, is_contiguous: {}", xt.shape(), xt.stride(), xt.is_contiguous());
+            log::debug!("[forward] xt shape: {:?}, strides: {:?}, is_contiguous: {}", xt.shape(), xt.stride(), xt.is_contiguous());
         
-            log::debug!("[backward] [step][fw] xt shape: {:?}, strides: {:?}", xt.shape(), xt.stride());
-            log::debug!("[backward] [step][fw] h shape: {:?}, strides: {:?}", state_fw.h.shape(), state_fw.h.stride());
-            log::debug!("[backward] [step][fw] c shape: {:?}, strides: {:?}", state_fw.c.shape(), state_fw.c.stride());
-            
+            log::debug!("[forward] [step][fw] xt shape: {:?}, strides: {:?}", xt.shape(), xt.stride());
+            log::debug!("[forward] [step][fw] h shape: {:?}, strides: {:?}", state_fw.h.shape(), state_fw.h.stride());
+            log::debug!("[forward] [step][fw] c shape: {:?}, strides: {:?}", state_fw.c.shape(), state_fw.c.stride());
+
             state_fw = lstm_forward.step(&xt, &state_fw)?;
             out_fw_states.push(state_fw.clone());
         }
-        let out_fw = Tensor::stack(&out_fw_states.iter().map(|s: &rnn::LSTMState| s.h()).collect::<Vec<_>>(), 1)?;
-        let last_fw_h = out_fw_states.last().unwrap().h().clone();
-        let last_fw_c = out_fw_states.last().unwrap().c().clone();
+        let out_fw = Tensor::stack(&out_fw_states.iter().map(|s: &rnn::LSTMState| s.h()).collect::<Vec<_>>(), 1)?.contiguous()?;
+        let last_fw_h = out_fw_states.last().unwrap().h().clone().contiguous()?;
+        let last_fw_c = out_fw_states.last().unwrap().c().clone().contiguous()?;
     
         // Backward
         let h0_backward = h0.i(1)?;
@@ -90,17 +90,17 @@ impl BidirectionalLSTM {
             log::debug!("[backward] xt shape: {:?}, strides: {:?}, is_contiguous: {}", xt.shape(), xt.stride(), xt.is_contiguous());
         
             log::debug!("[backward] [step][fw] xt shape: {:?}, strides: {:?}", xt.shape(), xt.stride());
-            log::debug!("[backward] [step][fw] h shape: {:?}, strides: {:?}", state_fw.h.shape(), state_fw.h.stride());
-            log::debug!("[backward] [step][fw] c shape: {:?}, strides: {:?}", state_fw.c.shape(), state_fw.c.stride());
+            log::debug!("[backward] [step][fw] h shape: {:?}, strides: {:?}", state_bw.h.shape(), state_bw.h.stride());
+            log::debug!("[backward] [step][fw] c shape: {:?}, strides: {:?}", state_bw.c.shape(), state_bw.c.stride());
         
-            state_fw = lstm_forward.step(&xt, &state_fw)?;
-            out_fw_states.push(state_fw.clone());
+            state_bw = lstm_backward.step(&xt, &state_bw)?;
+            out_bw_states.push(state_bw.clone());
         }
         
         out_bw_states.reverse();
-        let out_bw = Tensor::stack(&out_bw_states.iter().map(|s: &rnn::LSTMState| s.h()).collect::<Vec<_>>(), 1)?;
-        let last_bw_h = out_bw_states.last().unwrap().h().clone();
-        let last_bw_c = out_bw_states.last().unwrap().c().clone();
+        let out_bw = Tensor::stack(&out_bw_states.iter().map(|s: &rnn::LSTMState| s.h()).collect::<Vec<_>>(), 1)?.contiguous()?;
+        let last_bw_h = out_bw_states.last().unwrap().h().clone().contiguous()?;
+        let last_bw_c = out_bw_states.last().unwrap().c().clone().contiguous()?;
     
         let hn = Tensor::stack(&[last_fw_h, last_bw_h], 0)?;
         let cn = Tensor::stack(&[last_fw_c, last_bw_c], 0)?;

From 74b070303f31e6086ee536797bf9a7115eddb468 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Thu, 15 May 2025 23:00:19 -0400
Subject: [PATCH 74/75] revert: apply_bidirectional_layber and
 forward_wtih_state in bilstm to earlier version before refatoring

---
 .../src/building_blocks/bilstm.rs             | 142 ++++++++----------
 1 file changed, 59 insertions(+), 83 deletions(-)

diff --git a/crates/redeem-properties/src/building_blocks/bilstm.rs b/crates/redeem-properties/src/building_blocks/bilstm.rs
index 3a6d19a..805bc5d 100644
--- a/crates/redeem-properties/src/building_blocks/bilstm.rs
+++ b/crates/redeem-properties/src/building_blocks/bilstm.rs
@@ -1,4 +1,4 @@
-use candle_core::{IndexOp, Result, Tensor};
+use candle_core::{DType, IndexOp, Result, Tensor};
 use candle_nn::{rnn, Module, VarBuilder, RNN};
 
 
@@ -44,108 +44,84 @@ impl BidirectionalLSTM {
         })
     }
 
-    fn apply_bidirectional_layer(
-        &self,
-        input: &Tensor,
-        lstm_forward: &rnn::LSTM,
-        lstm_backward: &rnn::LSTM,
-        h0: &Tensor,
-        c0: &Tensor,
-    ) -> Result<(Tensor, (Tensor, Tensor))> {
-        let (_batch_size, seq_len, _input_size) = input.dims3()?;
+    fn apply_bidirectional_layer(&self, input: &Tensor, lstm_forward: &rnn::LSTM, lstm_backward: &rnn::LSTM, h0: &Tensor, c0: &Tensor, layer_idx: &i32) -> Result<(Tensor, (Tensor, Tensor))> {
+        let (batch_size, seq_len, input_size) = input.dims3()?;
     
-        log::debug!("Entering apply_bidirectional_layer");
+        // Print first and last 5 values of the original input
+        let input_vec = input.to_vec3::<f32>()?;
     
-        // Forward
-        let h0_forward = h0.i(0)?;
-        let c0_forward = c0.i(0)?;
-        let mut state_fw = rnn::LSTMState { h: h0_forward, c: c0_forward };
-    
-        let mut out_fw_states = Vec::with_capacity(seq_len);
-        for t in 0..seq_len {
-            let xt = input.i((.., t..=t, ..))?.squeeze(1)?.contiguous()?.clone();
-
-            log::debug!("[forward] xt shape: {:?}, strides: {:?}, is_contiguous: {}", xt.shape(), xt.stride(), xt.is_contiguous());
+        // Forward pass
+        let h0_forward = h0.narrow(0, 0, 1)?.reshape((batch_size, h0.dim(2)?))?;
+        let c0_forward = c0.narrow(0, 0, 1)?.reshape((batch_size, c0.dim(2)?))?;
         
-            log::debug!("[forward] [step][fw] xt shape: {:?}, strides: {:?}", xt.shape(), xt.stride());
-            log::debug!("[forward] [step][fw] h shape: {:?}, strides: {:?}", state_fw.h.shape(), state_fw.h.stride());
-            log::debug!("[forward] [step][fw] c shape: {:?}, strides: {:?}", state_fw.c.shape(), state_fw.c.stride());
+        let state_forward = rnn::LSTMState{ h: h0_forward.clone(), c: c0_forward.clone() };
 
-            state_fw = lstm_forward.step(&xt, &state_fw)?;
-            out_fw_states.push(state_fw.clone());
-        }
-        let out_fw = Tensor::stack(&out_fw_states.iter().map(|s: &rnn::LSTMState| s.h()).collect::<Vec<_>>(), 1)?.contiguous()?;
-        let last_fw_h = out_fw_states.last().unwrap().h().clone().contiguous()?;
-        let last_fw_c = out_fw_states.last().unwrap().c().clone().contiguous()?;
+        let output_forward_states: Vec<rnn::LSTMState> = lstm_forward.seq_init(&input, &state_forward)?;
+        let output_forward = Tensor::stack(&output_forward_states.iter().map(|state| state.h().clone()).collect::<Vec<_>>(), 1)?;
+        let last_forward_state = output_forward_states.last().unwrap().h().clone();
     
-        // Backward
-        let h0_backward = h0.i(1)?;
-        let c0_backward = c0.i(1)?;
-        let mut state_bw = rnn::LSTMState { h: h0_backward, c: c0_backward };
-    
-        let mut out_bw_states = Vec::with_capacity(seq_len);
-        for t in 0..seq_len {
-            let xt = input.i((.., t..=t, ..))?.squeeze(1)?.contiguous()?.clone();
+        // Backward pass
+        let h0_backward = h0.narrow(0, 1, 1)?.reshape((batch_size, h0.dim(2)?))?;
+        let c0_backward = c0.narrow(0, 1, 1)?.reshape((batch_size, c0.dim(2)?))?;
 
-            log::debug!("[backward] xt shape: {:?}, strides: {:?}, is_contiguous: {}", xt.shape(), xt.stride(), xt.is_contiguous());
-        
-            log::debug!("[backward] [step][fw] xt shape: {:?}, strides: {:?}", xt.shape(), xt.stride());
-            log::debug!("[backward] [step][fw] h shape: {:?}, strides: {:?}", state_bw.h.shape(), state_bw.h.stride());
-            log::debug!("[backward] [step][fw] c shape: {:?}, strides: {:?}", state_bw.c.shape(), state_bw.c.stride());
-        
-            state_bw = lstm_backward.step(&xt, &state_bw)?;
-            out_bw_states.push(state_bw.clone());
+        let state_backward = rnn::LSTMState{ h: h0_backward.clone(), c: c0_backward.clone() };
+    
+        // Correctly reverse the input sequence
+        let mut reversed_input = vec![vec![vec![0.0; input_size]; seq_len]; batch_size];
+        for b in 0..batch_size {
+            for t in 0..seq_len {
+                for i in 0..input_size {
+                    reversed_input[b][seq_len - t - 1][i] = input_vec[b][t][i];
+                }
+            }
         }
+        let input_reversed = Tensor::new(reversed_input, input.device())?
+            .to_dtype(DType::F32)?
+            .reshape((batch_size, seq_len, input_size))?;
+
+        // Print first and last 5 values of the reversed input
+        // let reversed_input_vec = input_reversed.to_vec3::<f32>()?;
+
+    
+        let output_backward_states = lstm_backward.seq_init(&input_reversed, &state_backward)?;
+        let output_backward = Tensor::stack(&output_backward_states.iter().map(|state| state.h().clone()).collect::<Vec<_>>(), 1)?;
         
-        out_bw_states.reverse();
-        let out_bw = Tensor::stack(&out_bw_states.iter().map(|s: &rnn::LSTMState| s.h()).collect::<Vec<_>>(), 1)?.contiguous()?;
-        let last_bw_h = out_bw_states.last().unwrap().h().clone().contiguous()?;
-        let last_bw_c = out_bw_states.last().unwrap().c().clone().contiguous()?;
+        // Use the last state of the backward LSTM (which corresponds to the first element of the original sequence)
+        let last_backward_state = output_backward_states.last().unwrap().h().clone();
+    
+        // Combine the forward and backward hidden states for hn
+        let hn = Tensor::cat(&[last_forward_state.unsqueeze(0)?, last_backward_state.unsqueeze(0)?], 0)?; // Shape: [2, 1, 128]
+        let hn_concat = Tensor::cat(&[last_forward_state, last_backward_state], 1)?; // Shape: [1, 256]
+
+        // Combine the forward and backwards cell states for cn
+        let cn = Tensor::cat(&[output_forward_states.last().unwrap().c().clone(), output_backward_states.last().unwrap().c().clone()], 0)?; // Shape: [2, 1, 128]
     
-        let hn = Tensor::stack(&[last_fw_h, last_bw_h], 0)?;
-        let cn = Tensor::stack(&[last_fw_c, last_bw_c], 0)?;
-        let output = Tensor::cat(&[out_fw, out_bw], 2)?;
+        // The output_backward is already in the correct order for the original sequence
+        let output = Tensor::cat(&[output_forward, output_backward], 2)?; // Shape: [1, 13, 256]
     
         Ok((output, (hn, cn)))
     }
     
-       
-
-    /// Forward with hidden states returned
-    pub fn forward_with_state(&self, xs: &Tensor) -> Result<(Tensor, (Tensor, Tensor))> {
-        log::debug!("Input xs shape: {:?}, is_contiguous: {}", xs.shape(), xs.is_contiguous());
-    
-        let (batch_size, _, _) = xs.dims3()?;
-        let h0 = self.h0.expand((self.num_layers * 2, batch_size, self.hidden_size))?;
-        let c0 = self.c0.expand((self.num_layers * 2, batch_size, self.hidden_size))?;
     
+    // New method that returns output and states
+    pub fn forward_with_state(&self, xs: &Tensor) -> Result<(Tensor, (Tensor, Tensor))> {
+        let (batch_size, seq_len, input_size) = xs.dims3()?;
+
+        let h0 = &self.h0.expand((self.num_layers * 2, batch_size, self.hidden_size))?;
+        let c0 = &self.c0.expand((self.num_layers * 2, batch_size, self.hidden_size))?;
+
         let h0_1 = h0.narrow(0, 0, 2)?;
-        let c0_1 = c0.narrow(0, 0, 2)?;
         let h0_2 = h0.narrow(0, 2, 2)?;
+        let c0_1 = c0.narrow(0, 0, 2)?;
         let c0_2 = c0.narrow(0, 2, 2)?;
-    
-        let xs = xs.contiguous()?;
-        log::debug!("xs after contiguous shape: {:?}, is_contiguous: {}", xs.shape(), xs.is_contiguous());
-    
-        log::debug!("forward_with_state: xs shape = {:?}, strides = {:?}", xs.shape(), xs.stride());
-        log::debug!("h0_1 shape: {:?}, strides: {:?}", h0.shape(), h0.stride());
-        log::debug!("c0_1 shape: {:?}, strides: {:?}", c0.shape(), c0.stride());
 
-        let (out1, (hn1, cn1)) = self.apply_bidirectional_layer(&xs, &self.forward_lstm1, &self.backward_lstm1, &h0_1, &c0_1)?;
-    
-        let out1 = out1.contiguous()?;
-        log::debug!("out1 after first layer shape: {:?}, is_contiguous: {}", out1.shape(), out1.is_contiguous());
+        let (layer1_output, (hn1, cn1)) = self.apply_bidirectional_layer(xs, &self.forward_lstm1, &self.backward_lstm1, &h0_1, &c0_1, &1)?;
+        let (layer2_output, (hn2, cn2)) = self.apply_bidirectional_layer(&layer1_output, &self.forward_lstm2, &self.backward_lstm2, &h0_2, &c0_2, &2)?;
 
-        log::debug!("forward_with_state: out1 shape = {:?}, strides = {:?}", out1.shape(), out1.stride());
-        log::debug!("h0_2 shape: {:?}, strides: {:?}", h0.shape(), h0.stride());
-        log::debug!("c0_2 shape: {:?}, strides: {:?}", c0.shape(), c0.stride());
+        let final_hn = Tensor::cat(&[hn1, hn2], 0)?;
+        let final_cn = Tensor::cat(&[cn1, cn2], 0)?;
 
-    
-        let (out2, (hn2, cn2)) = self.apply_bidirectional_layer(&out1, &self.forward_lstm2, &self.backward_lstm2, &h0_2, &c0_2)?;
-    
-        let hn = Tensor::cat(&[hn1, hn2], 0)?;
-        let cn = Tensor::cat(&[cn1, cn2], 0)?;
-        Ok((out2, (hn, cn)))
+        Ok((layer2_output, (final_hn, final_cn)))
     }
 
     pub fn input_size(&self) -> usize {

From 8c7c70e581d4cc5243cb17790afedc29419a9244 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Tue, 27 May 2025 23:00:04 -0400
Subject: [PATCH 75/75] minor

---
 .../src/properties/train/trainer.rs           |  41 +++++--
 .../src/building_blocks/building_blocks.rs    |  24 +++--
 .../src/models/model_interface.rs             | 101 ++++++------------
 .../src/models/rt_cnn_transformer_model.rs    |   6 +-
 4 files changed, 81 insertions(+), 91 deletions(-)

diff --git a/crates/redeem-cli/src/properties/train/trainer.rs b/crates/redeem-cli/src/properties/train/trainer.rs
index cd06a79..0cec5b3 100644
--- a/crates/redeem-cli/src/properties/train/trainer.rs
+++ b/crates/redeem-cli/src/properties/train/trainer.rs
@@ -164,7 +164,7 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
         let mut overview_section = ReportSection::new("Overview");
 
         overview_section.add_content(html! {
-            "This report summarizes the training process of the ReDeeM model. It includes epoch-level summaries and step-wise dynamics such as learning rate scheduling and accuracy tracking over time. These plots provide insight into model convergence behavior and training stability."
+            "This report summarizes the training process of the {} model. It includes epoch-level summaries and step-wise dynamics such as learning rate scheduling and accuracy tracking over time. These plots provide insight into model convergence behavior and training stability."
         });
 
         let epoch_losses = train_step_metrics.summarize_loss_for_plotting();
@@ -209,17 +209,38 @@ pub fn run_training(config: &PropertyTrainConfig) -> Result<()> {
             .iter()
             .zip(&inference_results)
             .filter_map(|(true_pep, pred_pep)| {
-                match (true_pep.retention_time, pred_pep.retention_time) {
-                    (Some(t), Some(p)) => {
-                        let t_denorm = match norm_factor {
-                            TargetNormalization::ZScore(mean, std) => t as f64 * std as f64 + mean as f64,
-                            TargetNormalization::MinMax(min, range) => t as f64 * range as f64 + min as f64,
-                            TargetNormalization::None => t as f64,
-                        };
-                        Some((t_denorm, p as f64))
+                // check if model is RT or CCS
+                if config.model_arch == "ccs_cnn_lstm" || config.model_arch == "ccs_cnn_tf" {
+                    match (true_pep.ccs, pred_pep.ccs) {
+                        (Some(t), Some(p)) => {
+                            let t_denorm = match norm_factor {
+                                TargetNormalization::ZScore(mean, std) => t as f64 * std as f64 + mean as f64,
+                                TargetNormalization::MinMax(min, range) => t as f64 * range as f64 + min as f64,
+                                TargetNormalization::None => t as f64,
+                            };
+                            Some((t_denorm, p as f64))
+                        }
+                        _ => None,
+                  
                     }
-                    _ => None,
                 }
+                else if config.model_arch == "rt_cnn_lstm" || config.model_arch == "rt_cnn_tf" {
+                    match (true_pep.retention_time, pred_pep.retention_time) {
+                        (Some(t), Some(p)) => {
+                            let t_denorm = match norm_factor {
+                                TargetNormalization::ZScore(mean, std) => t as f64 * std as f64 + mean as f64,
+                                TargetNormalization::MinMax(min, range) => t as f64 * range as f64 + min as f64,
+                                TargetNormalization::None => t as f64,
+                            };
+                            Some((t_denorm, p as f64))
+                        }
+                        _ => None,
+                  
+                    }
+                } else {
+                    return None;
+                }
+                
             })
             .unzip();
         
diff --git a/crates/redeem-properties/src/building_blocks/building_blocks.rs b/crates/redeem-properties/src/building_blocks/building_blocks.rs
index d99c914..f72823a 100644
--- a/crates/redeem-properties/src/building_blocks/building_blocks.rs
+++ b/crates/redeem-properties/src/building_blocks/building_blocks.rs
@@ -850,12 +850,15 @@ impl Encoder26aaModCnnLstmAttnSum {
             .map_err(|e| candle_core::Error::Msg(e.to_string()))?;
 
         let (mean, min, max) = get_tensor_stats(&x)?;
-        log::trace!("[Encoder26aaModCnnLstmAttnSum] one-hot output stats - min: {min}, max: {max}, mean: {mean}");
+        // log::trace!("[Encoder26aaModCnnLstmAttnSum] one-hot output stats - min: {min}, max: {max}, mean: {mean}");
 
-        let start_time = Instant::now();
         let x = self.input_cnn.forward(&x)?;
+        let (b, s, d) = x.dims3()?;
+        println!("x (post input_cnn): batch size: {b}, seq len: {s}, embedding dim: {d}");
 
         let x = self.input_lstm.forward(&x)?;
+        let (b, s, d) = x.dims3()?;
+        println!("x (post input_lstm): batch size: {b}, seq len: {s}, embedding dim: {d}");
 
         let x = self.attn_sum.forward(&x)?;
 
@@ -982,8 +985,8 @@ impl Encoder26aaModCnnTransformerAttnSum {
                 names_input_cnn_bias,
             )?,
             proj_cnn_to_transformer: candle_nn::Linear::new(
-                varstore.get((input_dim * 4, hidden_dim), "proj_cnn_to_transformer.weight")?,
-                Some(varstore.get(hidden_dim, "proj_cnn_to_transformer.bias")?),
+                varstore.get((hidden_dim, input_dim * 4), "proj_cnn_to_transformer.weight")?,
+                None,
             ),
             input_transformer: SeqTransformer::from_varstore(
                 varstore.pp(transformer_pp).clone(),
@@ -1020,7 +1023,7 @@ impl Encoder26aaModCnnTransformerAttnSum {
         Ok(Self {
             mod_nn: ModEmbeddingFixFirstK::new(MOD_FEATURE_SIZE, mod_hidden_dim, &varbuilder.pp("mod_nn"))?,
             input_cnn: SeqCNN::new(input_dim, &varbuilder.pp("input_cnn"))?,
-            proj_cnn_to_transformer: candle_nn::linear_no_bias(input_dim*4, hidden_dim, varbuilder.pp("proj_cnn_to_transformer"))?,
+            proj_cnn_to_transformer: candle_nn::linear_no_bias(input_dim * 4, hidden_dim, varbuilder.pp("proj_cnn_to_transformer"))?,
             input_transformer: SeqTransformer::new(
                 &varbuilder.pp("input_transformer"),
                 input_dim * 4,
@@ -1045,17 +1048,19 @@ impl Encoder26aaModCnnTransformerAttnSum {
             .map_err(|e| candle_core::Error::Msg(e.to_string()))?;
 
         let (mean, min, max) = get_tensor_stats(&x)?;
-        log::trace!("[Encoder26aaModCnnTransformerAttnSum] one-hot output stats - min: {min}, max: {max}, mean: {mean}");
+        // log::trace!("[Encoder26aaModCnnTransformerAttnSum] one-hot output stats - min: {min}, max: {max}, mean: {mean}");
 
         if !mean.is_finite() || !min.is_finite() || !max.is_finite() {
             log::error!("ERROR [Encoder26aaModCnnTransformerAttnSum] aa_one_hot produced non-finite tensor stats: mean={mean}, min={min}, max={max}");
-            candle_core::bail!("ERRORNon-finite values found in peptide encoding output.");
+            candle_core::bail!("ERROR: Non-finite values found in peptide encoding output.");
         }
 
         let x = self.input_cnn.forward(&x)?;
         let x = x.contiguous()?;
+
         let x = self.proj_cnn_to_transformer.forward(&x)?;
         let x = x.contiguous()?;
+
         let x = self.input_transformer.forward(&x)?;
         let x = x.contiguous()?;
         let x = self.attn_sum.forward(&x)?;
@@ -1107,9 +1112,8 @@ impl Encoder26aaModChargeCnnTransformerAttnSum {
                 names_input_cnn_bias,
             )?,
             proj_cnn_to_transformer: candle_nn::Linear::new(
-                varstore.get((input_dim * 4, hidden_dim), "proj_cnn_to_transformer.weight")?,
-                Some(varstore.get(hidden_dim, "proj_cnn_to_transformer.bias")?),
-            ),            
+                varstore.get((hidden_dim, input_dim * 4), "proj_cnn_to_transformer.weight")?,
+                None),            
             input_transformer: SeqTransformer::from_varstore(
                 varstore.pp(transformer_pp).clone(),
                 input_dim * 4,
diff --git a/crates/redeem-properties/src/models/model_interface.rs b/crates/redeem-properties/src/models/model_interface.rs
index d4b7194..d5154a0 100644
--- a/crates/redeem-properties/src/models/model_interface.rs
+++ b/crates/redeem-properties/src/models/model_interface.rs
@@ -308,10 +308,10 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         let mod_feature_size = self.get_mod_element_count();
         let mod_to_feature = self.get_mod_to_feature();
 
-        log::trace!(
-            "[ModelInterface::encode_peptide] peptide_sequence: {:?} | mods: {:?} | mod_sites: {:?} | charge: {:?} | nce: {:?} | instrument: {:?}",
-            peptide_sequence, mods, mod_sites, charge, nce, instrument
-        );
+        // log::trace!(
+        //     "[ModelInterface::encode_peptide] peptide_sequence: {:?} | mods: {:?} | mod_sites: {:?} | charge: {:?} | nce: {:?} | instrument: {:?}",
+        //     peptide_sequence, mods, mod_sites, charge, nce, instrument
+        // );
 
         let aa_tensor = aa_indices_tensor_from_arc(peptide_sequence, device)?;
         let (batch_size, seq_len, _) = aa_tensor.shape().dims3()?;
@@ -627,6 +627,9 @@ pub trait ModelInterface: Send + Sync + ModelClone {
 
                         let acc = match self.property_type() {
                             PropertyType::RT => {
+                                // print first 5 predictions and targets
+                                println!("Predictions: {:?}", &predictions[0..5]);
+                                println!("Targets: {:?}", &targets[0..5]);
                                 Some(Metrics::accuracy(&predictions, &targets, 0.5))
                             }
                             PropertyType::CCS => {
@@ -739,7 +742,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
         &self,
         inference_data: &Vec<PeptideData>,
         batch_size: usize,
-        _modifications: HashMap<
+        modifications: HashMap<
             (String, Option<char>),
             crate::utils::peptdeep_utils::ModificationMap,
         >,
@@ -760,73 +763,33 @@ pub trait ModelInterface: Send + Sync + ModelClone {
             .enumerate()
             .map(|(batch_idx, batch_data)| {
                 let start_idx = batch_idx * batch_size;
-                let batch: PeptideBatchData = batch_data.into();
     
-                let naked_sequences = &batch.naked_sequence;
-                let mods = &batch.mods;
-                let mod_sites = &batch.mod_sites;
-    
-                let charges = if batch.charges.iter().all(|c| c.is_some()) {
-                    Some(batch.charges.iter().map(|c| c.unwrap()).collect::<Vec<_>>())
-                } else {
-                    None
-                };
+                // Extract input features only (ignore targets)
+                let (input_tensor, _) = self.prepare_batch_inputs(batch_data, &modifications)?;
+                let predicted = self.forward(&input_tensor)?;
     
-                let nces = if batch.nces.iter().all(|n| n.is_some()) {
-                    Some(batch.nces.iter().map(|n| n.unwrap()).collect::<Vec<_>>())
-                } else {
-                    None
-                };
-    
-                let instruments = if batch.instruments.iter().all(|i| i.is_some()) {
-                    Some(batch.instruments.clone())
-                } else {
-                    None
-                };
+                let predictions = predicted.to_vec1::<f32>()?;
     
-                let input_tensor = self
-                    .encode_peptides(naked_sequences, mods, mod_sites, charges, nces, instruments)?
-                    .to_device(self.get_device())?;
-                let output = self.forward(&input_tensor)?;
+                let updated = predictions
+                    .into_iter()
+                    .enumerate()
+                    .map(|(i, pred)| {
+                        let mut peptide = batch_data[i].clone();
+                        let value = match target_norm {
+                            TargetNormalization::ZScore(mean, std) => pred * std + mean,
+                            TargetNormalization::MinMax(min, max) => pred * (max - min) + min,
+                            TargetNormalization::None => pred,
+                        };
+                        match self.property_type() {
+                            PropertyType::RT => peptide.retention_time = Some(value),
+                            PropertyType::CCS => peptide.ccs = Some(value),
+                            _ => {}
+                        }
+                        (start_idx + i, peptide)
+                    })
+                    .collect::<Vec<_>>();
     
-                match self.property_type() {
-                    PropertyType::RT | PropertyType::CCS => {
-                        let predictions = output.to_vec1()?;
-                        let updated: Vec<(usize, PeptideData)> = predictions
-                            .into_iter()
-                            .enumerate()
-                            .map(|(i, pred)| {
-                                let mut peptide = batch_data[i].clone();
-                                match self.property_type() {
-                                    PropertyType::RT => {
-                                        peptide.retention_time = Some(match target_norm {
-                                            TargetNormalization::ZScore(mean, std) => pred * std + mean,
-                                            TargetNormalization::MinMax(min, max) => {
-                                                pred * (max - min) + min
-                                            }
-                                            TargetNormalization::None => pred,
-                                        });
-                                    }
-                                    PropertyType::CCS => {
-                                        peptide.ccs = Some(match target_norm {
-                                            TargetNormalization::ZScore(mean, std) => pred * std + mean,
-                                            TargetNormalization::MinMax(min, max) => {
-                                                pred * (max - min) + min
-                                            }
-                                            TargetNormalization::None => pred,
-                                        });
-                                    }
-                                    _ => {}
-                                }
-                                (start_idx + i, peptide)
-                            })
-                            .collect();
-                        Ok(updated)
-                    }
-                    PropertyType::MS2 => Err(anyhow::anyhow!(
-                        "Inference not supported for MS2 models in batch mode"
-                    )),
-                }
+                Ok(updated)
             })
             .collect::<Result<Vec<Vec<(usize, PeptideData)>>>>()?
             .into_iter()
@@ -838,7 +801,7 @@ pub trait ModelInterface: Send + Sync + ModelClone {
     
         progress.finish();
         Ok(result.into_iter().flatten().collect())
-    }
+    }  
     
 
     /// Extract encoded input and target tensor for a batch of peptides.
diff --git a/crates/redeem-properties/src/models/rt_cnn_transformer_model.rs b/crates/redeem-properties/src/models/rt_cnn_transformer_model.rs
index 3743261..1931208 100644
--- a/crates/redeem-properties/src/models/rt_cnn_transformer_model.rs
+++ b/crates/redeem-properties/src/models/rt_cnn_transformer_model.rs
@@ -47,6 +47,7 @@ impl ModelInterface for RTCNNTFModel {
         "rt_cnn_tf"   
     }
 
+    /// Create a new RTCNNTFModel to train
     fn new_untrained(device: Device) -> Result<Self> {
         let mut varmap = VarMap::new();
         let varbuilder = VarBuilder::from_varmap(&varmap, DType::F32, &device);
@@ -82,7 +83,7 @@ impl ModelInterface for RTCNNTFModel {
         })
     }
 
-    /// Create a new RTCNNTFModel from the given model and constants files.
+    /// Create a new RTCNNTFModel from the given pretrained model and constants files.
     fn new<P: AsRef<Path>>(
         model_path: P,
         constants_path: Option<P>,
@@ -93,6 +94,7 @@ impl ModelInterface for RTCNNTFModel {
         device: Device,
     ) -> Result<Self> {
         let tensor_data = load_tensors_from_model(model_path.as_ref(), &device)?;
+
         let mut varmap = candle_nn::VarMap::new();
         create_var_map(&mut varmap, tensor_data, &device)?;
         let var_store = candle_nn::VarBuilder::from_varmap(&varmap, DType::F32, &device);
@@ -112,7 +114,7 @@ impl ModelInterface for RTCNNTFModel {
             256,    // ff_dim
             4,      // num_heads
             2,      // num_layers
-            100,    // max_len (set appropriately for your sequence length)
+            100,    // max_len (sequence length)
             0.1,    // dropout_prob
             vec!["rt_encoder.mod_nn.nn.weight"],
             vec![