Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions crates/sage-cli/src/output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ impl Runner {
.format(peptide.proteins.len())
.as_bytes(),
);
record.push_field(peptide.start_position.iter().map(|&x| x.to_string()).collect::<Vec<String>>().join(";").as_bytes());
record.push_field(peptide.end_position.iter().map(|&x| x.to_string()).collect::<Vec<String>>().join(";").as_bytes());
record.push_field(filenames[feature.file_id].as_bytes());
record.push_field(feature.spec_id.as_bytes());
record.push_field(itoa::Buffer::new().format(feature.rank).as_bytes());
Expand Down Expand Up @@ -161,12 +163,14 @@ impl Runner {
"peptide",
"proteins",
"num_proteins",
"start_positions",
"end_positions",
"filename",
"scannr",
"scan",
"rank",
"label",
"expmass",
"calcmass",
"measured_mass",
"calculated_mass",
"charge",
"peptide_len",
"missed_cleavages",
Expand Down
2 changes: 0 additions & 2 deletions crates/sage-cli/tests/integration.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
use sage_core::database::Builder;
use sage_core::enzyme::Digest;
use sage_core::mass::Tolerance;
use sage_core::peptide::Peptide;
use sage_core::scoring::Scorer;
use sage_core::spectrum::SpectrumProcessor;

Expand Down
37 changes: 30 additions & 7 deletions crates/sage/src/enzyme.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,22 @@ pub struct Digest {
/// Cleaved peptide sequence
pub sequence: String,
/// Protein accession
pub protein: Arc<String>,
pub protein: ProteinAssignment,
/// Missed cleavages
pub missed_cleavages: u8,
/// Is this an N-terminal peptide of the protein?
pub position: Position,
/// What residue position does this start at (1-based inclusive)?
pub start_position: u32,
/// What residue position does this end at (1-based inclusive)?
pub end_position: u32
}

#[derive(Clone, PartialOrd, Ord, Debug, Default)]
pub struct ProteinAssignment {
identifier: Arc<String>,
start_position: u32,
end_position: u32
}

#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
Expand Down Expand Up @@ -58,6 +69,8 @@ impl Digest {
sequence: sequence.into_iter().collect(),
missed_cleavages: self.missed_cleavages,
position: self.position,
start_position: self.start_position,
end_position: self.end_position
}
}
}
Expand Down Expand Up @@ -306,6 +319,8 @@ impl EnzymeParameters {
semi_enzymatic: site.semi_enzymatic,
position,
protein: protein.clone(),
start_position: (start + 1) as u32,
end_position: end as u32
});
}
}
Expand All @@ -330,6 +345,8 @@ mod test {
missed_cleavages: 0,
position: Position::Nterm,
protein: Arc::new(String::default()),
start_position: 1,
end_position: 6
},
Digest {
decoy: false,
Expand All @@ -338,6 +355,8 @@ mod test {
missed_cleavages: 0,
position: Position::Nterm,
protein: Arc::new(String::default()),
start_position: 1,
end_position: 6
},
];

Expand All @@ -353,6 +372,8 @@ mod test {
missed_cleavages: 0,
position: Position::Nterm,
protein: Arc::new(String::default()),
start_position: 1,
end_position: 6
},
Digest {
decoy: false,
Expand All @@ -361,6 +382,8 @@ mod test {
missed_cleavages: 0,
position: Position::Internal,
protein: Arc::new(String::default()),
start_position: 7,
end_position: 12
},
];

Expand All @@ -373,11 +396,11 @@ mod test {
fn trypsin() {
let sequence = "MADEEKLPPGWEKRMSRSSGRVYYFNHITNASQWERPSGN";
let expected = vec![
("MADEEK".into(), Position::Nterm),
("LPPGWEK".into(), Position::Internal),
("MSR".into(), Position::Internal),
("SSGR".into(), Position::Internal),
("VYYFNHITNASQWERPSGN".into(), Position::Cterm),
("MADEEK".into(), Position::Nterm, 1, 6),
("LPPGWEK".into(), Position::Internal, 7, 13),
("MSR".into(), Position::Internal, 14, 16),
("SSGR".into(), Position::Internal, 17, 20),
("VYYFNHITNASQWERPSGN".into(), Position::Cterm, 21, 40),
];

let tryp = EnzymeParameters {
Expand All @@ -391,7 +414,7 @@ mod test {
expected,
tryp.digest(sequence, Arc::default())
.into_iter()
.map(|d| (d.sequence, d.position))
.map(|d| (d.sequence, d.position, d.start_position, d.end_position))
.collect::<Vec<_>>()
);
}
Expand Down
10 changes: 10 additions & 0 deletions crates/sage/src/peptide.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ pub struct Peptide {
pub position: Position,

pub proteins: Vec<Arc<String>>,
/// What residue does this peptide start at in the protein (1-based inclusive)?
pub start_position: Vec<u32>,
/// What residue does this peptide end at in the protein (1-based inclusive)?
pub end_position: Vec<u32>,
}

impl Peptide {
Expand Down Expand Up @@ -66,6 +70,8 @@ impl Debug for Peptide {
.field("monoisotopic", &self.monoisotopic)
.field("missed_cleavages", &self.missed_cleavages)
.field("position", &self.position)
.field("start_position", &self.start_position)
.field("end_position", &self.end_position)
.finish()
}
}
Expand Down Expand Up @@ -313,6 +319,8 @@ impl Peptide {
s[1..n].reverse();
pep.sequence = Arc::from(s.into_boxed_slice());
pep.modifications[1..n].reverse();
pep.start_position = pep.start_position; // TODO: calculate start/end in reversed protein sequences?
pep.end_position = pep.end_position;
}
pep
}
Expand Down Expand Up @@ -373,6 +381,8 @@ impl TryFrom<Digest> for Peptide {
missed_cleavages: value.missed_cleavages,
semi_enzymatic: value.semi_enzymatic,
proteins: vec![value.protein],
start_position: vec![value.start_position],
end_position: vec![value.end_position],
})
}
}
Expand Down