Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
79 commits
Select commit Hold shift + click to select a range
a991ab9
Update model_interface.rs
singjc May 7, 2025
8f33855
chore: Add Clone trait implementation for ModelInterface
singjc May 7, 2025
669dd00
Merge branch 'master' of github.com:singjc/redeem
singjc May 7, 2025
5860df8
refactor: Update model structs to use 'static lifetime for VarBuilder
singjc May 7, 2025
8be72ea
refactor: Update model structs to use 'static lifetime for VarBuilder
singjc May 7, 2025
ac5afe9
refactor: Update ModelClone trait to include Send and Sync bounds
singjc May 7, 2025
f41eeb5
refactor: Update DLModels struct to remove unnecessary Arc and Mutex …
singjc May 7, 2025
6eecf07
refactor: Update peptide modification handling to support mass shifts…
singjc May 8, 2025
1c70ac6
refactor: peptide encoding
singjc May 8, 2025
1086bd6
chore: Update dependencies in redeem-properties crate
singjc May 8, 2025
c13dabd
refactor: bilstm
singjc May 9, 2025
e0b19c6
refactor: Optimize peptide sequence featurization and one-hot encoding
singjc May 9, 2025
f3a5013
refactor: Update RTCNNLSTMModel forward method to improve performance…
singjc May 9, 2025
d1aea79
refactor: Update redeem-properties crate models to remove unused impo…
singjc May 9, 2025
e676951
add: TransformerEncoder and SeqTransformer block
singjc May 9, 2025
e44dddd
refactor: Update RTCNNLSTMModel forward method to improve performance…
singjc May 9, 2025
999ccf5
refactor: Add RT-CNN Transformer model and update redeem-properties c…
singjc May 9, 2025
50c4a07
feat: Add new modules for training and loading data in redeem-cli
singjc May 9, 2025
b1715c6
Merge pull request #1 from singjc/patch/optimizations
singjc May 9, 2025
c8db11c
Merge pull request #2 from singjc/add/cli
singjc May 9, 2025
b5decf0
refactor: Add early stopping to property training
singjc May 9, 2025
37081ba
feat: Add Dockerfile for CUDA-based application containerization
singjc May 9, 2025
1ceb7e6
refactor: Remove unnecessary cargo update command in Dockerfile
singjc May 9, 2025
480d6c3
refactor: Update dependencies and descriptions in Cargo.toml files
singjc May 9, 2025
4b7f92c
refactor: Update redeem-properties crate models and add new modules f…
singjc May 11, 2025
bf62774
feat: Add inference functionality to redeem-cli
singjc May 11, 2025
eab57a0
refactor: Add new modules for training and loading data in redeem-cli
singjc May 11, 2025
0b68ac3
refactor: Update Dockerfile to optimize build process and clean up ar…
singjc May 11, 2025
e4bfaf9
add: Encoder26aaModChargeCnnTransformerAttnSum implementation
singjc May 11, 2025
146dedd
refactor: Add CCSCNNTFModel implementation
singjc May 11, 2025
ddc39f1
refactor: Update RTCNNTFModel implementation and remove unused code
singjc May 11, 2025
b0e0f22
update: readme
singjc May 11, 2025
87797f3
refactor: Improve regex pattern for extracting modification indices i…
singjc May 11, 2025
4100747
refactor: Update redeem-properties crate models for CCS prediction
singjc May 11, 2025
a6d944f
refactor: Add new fields to load_peptide_data function in redeem-cli
singjc May 11, 2025
b22f28d
refactor: Add stats module to redeem-properties crate, and add lr sch…
singjc May 11, 2025
5891b4e
refactor: Add precursor mass field to PeptideData struct in redeem-pr…
singjc May 11, 2025
874e441
refactor: Add plot_training_metric function to redeem-cli crate
singjc May 11, 2025
487f6b7
refactor: Update early stopping logic in ModelInterface implementation
singjc May 11, 2025
30ad102
refactor: Update plot_losses function in redeem-cli crate
singjc May 12, 2025
f1c74a5
add: RT Norm struct to set type of normalization
singjc May 13, 2025
92c7134
refactor: Update config loading logic in redeem-cli crate
singjc May 13, 2025
f95f087
refactor: Update RT-CNN-LSTM and RT-CNN-Transformer models in redeem-…
singjc May 13, 2025
4dd8900
refactor: Update hidden_dim and decoder size in CCSCNNTFModel
singjc May 13, 2025
47ab976
refactor: clean up trace comments
singjc May 13, 2025
b6ec2a4
refactor: Update peptide data loading logic in redeem-cli crate
singjc May 13, 2025
32e117c
fix: modication name and indice retrieval
singjc May 13, 2025
f0354a8
refactor: Update PeptideData struct to use u8 for string fields
singjc May 13, 2025
67de1dd
refactor: Update mod_to_feature loading to use Arc for key in RTCNNTF…
singjc May 13, 2025
905c80a
refactor: Improve error handling in redeem-cli crate
singjc May 13, 2025
9e6d8c3
refactor: Optimize contiguous operations in building_blocks.rs
singjc May 14, 2025
a55ae3f
refactor: Update rank feature based on new classifier scores
singjc May 14, 2025
2c7e25e
refactor: Update examples in classifiers crate
singjc May 14, 2025
122d5c1
refactor: Update rank feature and log rank changes in Experiment class
singjc May 14, 2025
8f7eea0
refactor: Set log level to debug in main function
singjc May 14, 2025
47c7da1
Merge branch 'patch/rescore_reranking' into develop
singjc May 14, 2025
fb08794
refactor: Update loading of modifications to use byte slice instead o…
singjc May 14, 2025
ddb255c
refactor: Update data handling to use TargetNormalization instead of …
singjc May 14, 2025
14ff5b6
refactor: Add once_cell dependency for redeem-properties crate
singjc May 14, 2025
782d9c3
add: modification.tsv asset
singjc May 14, 2025
ab4352c
refactor: Update normalization field in Redeem CLI properties
singjc May 15, 2025
16bba8f
refactor: Update loading of modifications to use byte slice instead o…
singjc May 15, 2025
99ae99f
refactor: Update training configuration in redeem-properties crate
singjc May 15, 2025
7ae4aa8
refactor: Update AAEmbedding constructor signature to accept VarBuild…
singjc May 15, 2025
c1d18d8
refactor: Update semi-supervised learning to return updated ranks al…
singjc May 15, 2025
6088678
refactor: Update Redeem CLI to use RTCNNTFModel for inference
singjc May 15, 2025
2fbe12c
refactor: Update data handling to extract "rank" feature column as 1D…
singjc May 15, 2025
fd447f3
refactor: Improve bidirectional LSTM input handling for contiguous te…
singjc May 15, 2025
2e3bd90
refactor: Improve bidirectional LSTM input handling for contiguous te…
singjc May 15, 2025
930f21a
refactor: Improve initialization of hidden states in BidirectionalLSTM
singjc May 15, 2025
d8a32c5
debug: bilstm forward with state
singjc May 15, 2025
1f779fd
refactor: Improve bidirectional LSTM forward and backward processing
singjc May 15, 2025
80469a0
debugging bilstm
singjc May 15, 2025
0c56fd2
add: type annotation for bilstm
singjc May 16, 2025
54af169
refactor: Clone contiguous tensor in BidirectionalLSTM for improved h…
singjc May 16, 2025
f89c0a8
refactor: Improve logging in BidirectionalLSTM backward processing
singjc May 16, 2025
b89e9dd
more debugging for bilstm
singjc May 16, 2025
74b0703
revert: apply_bidirectional_layber and forward_wtih_state in bilstm t…
singjc May 16, 2025
8c7c70e
minor
singjc May 28, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[workspace]
members = [ "crates/redeem-classifiers",
members = [ "crates/redeem-classifiers", "crates/redeem-cli",
"crates/redeem-properties"
]

Expand Down
50 changes: 50 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Use the official NVIDIA CUDA base image with CUDA 12.2
FROM nvidia/cuda:12.2.2-devel-ubuntu22.04

# Install system dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
curl \
libssl-dev \
pkg-config \
clang \
libstdc++-12-dev \
cmake \
git \
&& \
update-ca-certificates && \
rm -rf /var/lib/apt/lists/*

# Install Rust using rustup
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
ENV PATH="/root/.cargo/bin:${PATH}"

# Set environment variables for CUDA
ENV CUDA_HOME=/usr/local/cuda
ENV PATH=${CUDA_HOME}/bin:${PATH}
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}

# Set the CUDA compute capability for the build process
# Tesla V100 has compute capability 7.0
ENV CUDA_COMPUTE_CAP=70

# Set the working directory
WORKDIR /app

# Copy the source code into the container
COPY Cargo.toml Cargo.lock ./
COPY crates ./crates

# Build the application with CUDA support
RUN cargo build --release --bin redeem --features cuda

# Copy the binary into the PATH
RUN cp target/release/redeem /app/redeem

# clean up build artifacts
RUN cargo clean

# Set the PATH environment variable
ENV PATH="/app:${PATH}"
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,17 @@ The ReDeeM project consists of two primary crates:

1. **redeem-properties**:
- This crate focuses on deep learning models for peptide property prediction. It implements models for predicting retention time (RT), ion mobility (IM), and MS2 fragment intensities using the Candle library.
- The models can be fine-tuned on new data and can be saved in the safetensor format for later use.
- The models can be trained, fine-tuned on new data and can be saved in the safetensor format for later use.

- Current Models

Model | Name | Architecture | Implemented
--- | --- | --- | ---
AlphaPept RT Model | `redeem_properties::RTCNNLSTMModel` | CNN-LSTM | :heavy_check_mark:
AlphaPept MS2 Model | `redeem_properties::MS2BertModel` | Bert | :heavy_check_mark:
AlphaPept IM Model | `redeem_properties::CCSCNNLSTMModel` | CNN-LSTM | :heavy_check_mark:
AlphaPept RT Model | `rt_cnn_lstm` | CNN-LSTM | :heavy_check_mark:
AlphaPept MS2 Model | `ms2_bert` | Bert | :heavy_check_mark:
AlphaPept CCS Model | `ccs_cnn_lstm` | CNN-LSTM | :heavy_check_mark:
RT Model | `rt_tf_lstm` | CNN-Transformer | :heavy_check_mark:
CCS Model | `ccs_tf_lstm` | CNN-Transformer | :heavy_check_mark:

2. **redeem-classifiers**:
- This crate is aimed at developing semi-supervised scoring classifier models. The goal is to create models for separating target peptides from decoys.
Expand Down
2 changes: 1 addition & 1 deletion crates/redeem-classifiers/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ edition = "2021"
rust-version = "1.76"
description = "A repository of deep-learning models for mass spectrometry data"
readme = "README.md"
license = "MIT"


[dependencies]
anyhow = "1.0"
Expand Down
105 changes: 65 additions & 40 deletions crates/redeem-classifiers/examples/gbdt_semi_supervised_learning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,57 +5,84 @@ use ndarray::{Array1, Array2};
use std::error::Error;
use std::fs::File;
use std::io::Write;
use std::io::BufReader;

use redeem_classifiers::data_handling::PsmMetadata;
use redeem_classifiers::psm_scorer::SemiSupervisedLearner;
use redeem_classifiers::models::utils::ModelType;
use redeem_classifiers::report::{report::{Report, ReportSection}, plots::{plot_score_histogram, plot_pp}};


fn read_features_tsv(path: &str) -> Result<Array2<f32>, Box<dyn Error>> {
/// Load a test PSM CSV file into feature matrix, labels, and metadata.
///
/// # Arguments
/// * `path` - Path to the CSV file
///
/// # Returns
/// A tuple of (`x`, `y`, `PsmMetadata`)
pub fn load_test_psm_csv(path: &str) -> Result<(Array2<f32>, Array1<i32>, PsmMetadata)> {
let file = File::open(path)?;
let mut reader = ReaderBuilder::new()
.has_headers(false)
.delimiter(b',')
.from_path(path)?;

let mut data = Vec::new();
.has_headers(true)
.from_reader(BufReader::new(file));

let headers = reader
.headers()?
.iter()
.map(|h| h.to_string())
.collect::<Vec<_>>();

// Find indices
let file_id_idx = headers.iter().position(|h| h == "file_id").unwrap();
let spec_id_idx = headers.iter().position(|h| h == "spec_id").unwrap();
let label_idx = headers.iter().position(|h| h == "label").unwrap();

// Everything else is a feature
let feature_indices: Vec<usize> = (0..headers.len())
.filter(|&i| i != file_id_idx && i != spec_id_idx && i != label_idx)
.collect();

let feature_names = feature_indices
.iter()
.map(|&i| headers[i].clone())
.collect::<Vec<_>>();

let mut file_ids = Vec::new();
let mut spec_ids = Vec::new();
let mut labels = Vec::new();
let mut features = Vec::new();

for result in reader.records() {
let record = result?;
let row: Vec<f32> = record

file_ids.push(record[file_id_idx].parse::<usize>()?);
spec_ids.push(record[spec_id_idx].to_string());
labels.push(record[label_idx].parse::<i32>()?);

let row = feature_indices
.iter()
.map(|field| field.parse::<f32>())
.collect::<Result<_, _>>()?;
data.push(row);
.map(|&i| record[i].parse::<f32>().unwrap_or(f32::NAN))
.collect::<Vec<f32>>();

features.extend(row);
}

let n_samples = data.len();
let n_features = data[0].len();
let n_rows = labels.len();
let n_cols = feature_indices.len();

Array2::from_shape_vec(
(n_samples, n_features),
data.into_iter().flatten().collect(),
)
.map_err(|e| e.into())
}
let x = Array2::from_shape_vec((n_rows, n_cols), features)?;
let y = Array1::from_vec(labels);

fn read_labels_tsv(path: &str) -> Result<Array1<i32>, Box<dyn Error>> {
let mut reader = ReaderBuilder::new()
.has_headers(false)
.delimiter(b'\t')
.from_path(path)?;

let labels: Vec<i32> = reader
.records()
.map(|r| {
let record = r?;
let value = record.get(0).ok_or_else(|| "Empty row".to_string())?;
value.parse::<i32>().map_err(|e| e.into())
})
.collect::<Result<_, Box<dyn Error>>>()?;

Ok(Array1::from_vec(labels))
let metadata = PsmMetadata {
file_id: file_ids,
spec_id: spec_ids,
feature_names,
};

Ok((x, y, metadata))
}


fn save_predictions_to_csv(
predictions: &Array1<f32>,
file_path: &str,
Expand All @@ -71,12 +98,10 @@ fn save_predictions_to_csv(

fn main() -> Result<()> {
env_logger::init();
// Load the test data from the TSV files
let x = read_features_tsv("/home/singjc/Documents/github/sage_bruker/20241115_single_file_redeem/sage_scores_for_testing.csv").unwrap();
// Select first 10 columns of data
let x = x.slice(ndarray::s![.., ..10]).to_owned();
// Set log level to debug
log::set_max_level(log::LevelFilter::Debug);

let y = read_labels_tsv("/home/singjc/Documents/github/sage_bruker/20241115_single_file_redeem/sage_labels_for_testing.csv").unwrap();
let (x, y, metadata) = load_test_psm_csv("/home/singjc/Documents/github/sage_bruker/20241115_single_file_redeem/sage_scores_with_metadata_for_testing_redeem.csv")?;

println!("Loaded features shape: {:?}", x.shape());
println!("Loaded labels shape: {:?}", y.shape());
Expand All @@ -97,7 +122,7 @@ fn main() -> Result<()> {
3,
Some((0.15, 1.0))
);
let predictions = learner.fit(x, y.clone());
let (predictions, _ranks) = learner.fit(x, y.clone(), metadata)?;

println!("Labels: {:?}", y);

Expand Down
Loading
Loading