Skip to content

Commit bbad0d3

Browse files
committed
Split chromatogram out from spectra
1 parent fd269e0 commit bbad0d3

16 files changed

+928
-18
lines changed

example_scripts/access_spectra_and_chromatograms.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def main(mzml_file):
8080
break
8181
if isinstance(item, pymzml.spec.Spectrum):
8282
print(f" Spectrum {item.ID}, MS level {item.ms_level}, RT {item.scan_time_in_minutes():.2f} min")
83-
elif isinstance(item, pymzml.spec.Chromatogram):
83+
elif hasattr(item, 'time') and hasattr(item, 'i'):
8484
print(f" Chromatogram {item.ID}, {len(item.peaks())} data points")
8585
count += 1
8686

pymzml/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2525
SOFTWARE.
2626
"""
27-
__all__ = ["run", "spec", "obo", "minimum", "plot", "file_classes"]
27+
__all__ = ["run", "spec", "chromatogram", "obo", "minimum", "plot", "file_classes"]
2828

2929
import os
3030
import sys
@@ -40,7 +40,9 @@
4040
# Imports of individual modules
4141
import pymzml.run
4242
import pymzml.spec
43+
import pymzml.chromatogram
4344
from pymzml.spec import MSDecoder
45+
from pymzml.chromatogram import Chromatogram
4446
import pymzml.obo
4547
import pymzml.plot
4648
import pymzml.utils

pymzml/chromatogram.py

Lines changed: 366 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,366 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: latin-1 -*-
3+
"""
4+
The chromatogram class offers a python object for mass spectrometry chromatogram data.
5+
The chromatogram object holds the basic information of the chromatogram and offers
6+
methods to interrogate properties of the chromatogram.
7+
Data, i.e. time and intensity decoding is performed on demand
8+
and can be accessed via their properties, e.g. :py:attr:`~pymzml.chromatogram.Chromatogram.peaks`.
9+
10+
The Chromatogram class is used in the :py:class:`~pymzml.run.Reader` class.
11+
There each chromatogram is accessible as a chromatogram object.
12+
"""
13+
14+
# Python mzML module - pymzml
15+
# Copyright (C) 2010-2019 M. Kösters, C. Fufezan
16+
# The MIT License (MIT)
17+
18+
# Permission is hereby granted, free of charge, to any person obtaining a copy
19+
# of this software and associated documentation files (the "Software"), to deal
20+
# in the Software without restriction, including without limitation the rights
21+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
22+
# copies of the Software, and to permit persons to whom the Software is
23+
# furnished to do so, subject to the following conditions:
24+
25+
# The above copyright notice and this permission notice shall be included in all
26+
# copies or substantial portions of the Software.
27+
28+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
33+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34+
# SOFTWARE.
35+
36+
import re
37+
import numpy as np
38+
from .ms_spectrum import MS_Spectrum
39+
from .obo import OboTranslator
40+
41+
42+
class Chromatogram(MS_Spectrum):
43+
"""
44+
Class for Chromatogram access and handling.
45+
"""
46+
47+
def __init__(self, element, measured_precision=5e-6, *, obo_version=None):
48+
"""
49+
Arguments:
50+
element (xml.etree.ElementTree.Element): chromatogram as xml Element
51+
52+
Keyword Arguments:
53+
measured_precision (float): in ppm, i.e. 5e-6 equals to 5 ppm.
54+
obo_version (str, optional): obo version number.
55+
"""
56+
self._measured_precision = measured_precision
57+
self.element = element
58+
self.noise_level_estimate = {}
59+
# Property variables
60+
self._time = None
61+
self._ms_level = None
62+
self._i = None
63+
self._t_mass_set = None
64+
self._peaks = None
65+
self._t_mz_set = None
66+
self._centroided_peaks = None
67+
self._reprofiled_peaks = None
68+
self._deconvoluted_peaks = None
69+
self._profile = None
70+
self._extreme_values = None
71+
self._centroided_peaks_sorted_by_i = None
72+
self._transformed_mz_with_error = None
73+
self._transformed_mass_with_error = None
74+
self._precursors = None
75+
self._ID = None
76+
self._chromatogram_type = None
77+
self._precursor_mz = None
78+
self._product_mz = None
79+
self._polarity = None
80+
self.obo_translator = OboTranslator.from_cache(obo_version)
81+
82+
if self.element:
83+
self.ns = (
84+
re.match(r"\{.*\}", element.tag).group(0)
85+
if re.match(r"\{.*\}", element.tag)
86+
else ""
87+
)
88+
89+
self._decode = self._decode_to_numpy
90+
# assign function to create numpy array to list???
91+
self._array = np.array
92+
93+
def __repr__(self):
94+
"""
95+
Returns representative string for a chromatogram object class
96+
"""
97+
return "<__main__.Chromatogram object with native ID {0} at {1}>".format(
98+
self.ID, hex(id(self))
99+
)
100+
101+
def __str__(self):
102+
"""
103+
Returns representative string for a chromatogram object class
104+
"""
105+
return "<__main__.Chromatogram object with native ID {0} at {1}>".format(
106+
self.ID, hex(id(self))
107+
)
108+
109+
@property
110+
def ID(self):
111+
"""
112+
Access the native id of the chromatogram.
113+
114+
Returns:
115+
ID (str): native ID of the chromatogram
116+
"""
117+
if self._ID is None:
118+
if self.element:
119+
self._ID = self.element.get("id")
120+
return self._ID
121+
122+
@property
123+
def mz(self):
124+
"""
125+
Chromatogram has no property mz. This property is included for
126+
compatibility with the Spectrum class.
127+
128+
Returns:
129+
time (list): list of time values from the chromatogram
130+
"""
131+
print("Chromatogram has no property mz.\nReturn retention time instead")
132+
return self.time
133+
134+
@property
135+
def time(self):
136+
"""
137+
Returns the list of time values. If the time values are encoded, the
138+
function _decode() is used to decode the encoded data.\n
139+
The time property can also be set, e.g. for theoretical data.
140+
However, it is recommended to use the profile property to set time and
141+
intensity tuples at same time.
142+
143+
Returns:
144+
time (list): list of time values from the analyzed chromatogram
145+
146+
"""
147+
if self._time is None:
148+
params = self._get_encoding_parameters("time array")
149+
self._time = self._decode(*params)
150+
return self._time
151+
152+
@property
153+
def i(self):
154+
"""
155+
Returns the list of intensity values from the analyzed chromatogram.
156+
157+
Returns:
158+
i (list): list of intensity values from the analyzed chromatogram
159+
"""
160+
if self._i is None:
161+
params = self._get_encoding_parameters("intensity array")
162+
self._i = self._decode(*params)
163+
return self._i
164+
165+
@property
166+
def profile(self):
167+
"""
168+
Returns the list of peaks of the chromatogram as tuples (time, intensity).
169+
170+
Returns:
171+
peaks (list): list of time, i tuples
172+
173+
Example:
174+
175+
>>> import pymzml
176+
>>> run = pymzml.run.Reader(
177+
... spectra.mzMl.gz,
178+
... MS_precisions = {
179+
... 1 : 5e-6,
180+
... 2 : 20e-6
181+
... }
182+
... )
183+
>>> for entry in run:
184+
... if isinstance(entry, pymzml.chromatogram.Chromatogram):
185+
... for time, intensity in entry.peaks:
186+
... print(time, intensity)
187+
188+
Note:
189+
The peaks property can also be set, e.g. for theoretical data.
190+
It requires a list of time/intensity tuples.
191+
192+
"""
193+
if self._profile is None:
194+
if self._time is None and self._i is None:
195+
self._profile = []
196+
for pos, t in enumerate(self.time):
197+
self._profile.append([t, self.i[pos]])
198+
# much faster than zip ... list(zip(self.mz, self.i))
199+
elif self._time is not None and self._i is not None:
200+
self._profile = []
201+
for pos, t in enumerate(self.time):
202+
self._profile.append([t, self.i[pos]])
203+
elif self._profile is None:
204+
self._profile = []
205+
return self._array(self._profile)
206+
207+
@profile.setter
208+
def profile(self, tuple_list):
209+
"""
210+
Set the chromatogram profile.
211+
212+
Args:
213+
tuple_list (list): list of tuples (time, intensity)
214+
"""
215+
if len(tuple_list) == 0:
216+
return
217+
self._time = []
218+
self._i = []
219+
for time, i in tuple_list:
220+
self._time.append(time)
221+
self._i.append(i)
222+
self._peaks = tuple_list
223+
self._reprofiledPeaks = None
224+
self._centroidedPeaks = None
225+
return self
226+
227+
def peaks(self):
228+
"""
229+
Return the list of peaks of the chromatogram as tuples (time, intensity).
230+
231+
Returns:
232+
peaks (list): list of time, intensity tuples
233+
234+
Example:
235+
236+
>>> import pymzml
237+
>>> run = pymzml.run.Reader(
238+
... spectra.mzMl.gz,
239+
... MS_precisions = {
240+
... 1 : 5e-6,
241+
... 2 : 20e-6
242+
... }
243+
... )
244+
>>> for entry in run:
245+
... if isinstance(entry, pymzml.chromatogram.Chromatogram):
246+
... for time, intensity in entry.peaks:
247+
... print(time, intensity)
248+
249+
Note:
250+
The peaks property can also be set, e.g. for theoretical data.
251+
It requires a list of time/intensity tuples.
252+
253+
"""
254+
return self.profile
255+
256+
@property
257+
def chromatogram_type(self):
258+
"""
259+
Returns the chromatogram type.
260+
261+
Returns:
262+
chromatogram_type (str): chromatogram type
263+
"""
264+
if self._chromatogram_type is None:
265+
for element in self.element.iter():
266+
if element.tag.endswith("}cvParam"):
267+
accession = element.get("accession")
268+
# Check for chromatogram type accessions
269+
if accession in [
270+
"MS:1000235", # total ion current chromatogram
271+
"MS:1000627", # selected ion current chromatogram
272+
"MS:1000628", # basepeak intensity chromatogram
273+
"MS:1000810", # chromatogram
274+
"MS:1000811", # chromatogram created by spectrum aggregation
275+
"MS:1000812", # single ion monitoring chromatogram
276+
"MS:1000813", # multiple reaction monitoring chromatogram
277+
"MS:1000814", # selected reaction monitoring chromatogram
278+
"MS:1000815", # consecutive reaction monitoring chromatogram
279+
"MS:1001472", # selected ion monitoring chromatogram
280+
"MS:1001473", # selected reaction monitoring chromatogram
281+
"MS:1001474", # consecutive reaction monitoring chromatogram
282+
"MS:1001475", # targeted SIM chromatogram
283+
"MS:1001476", # automatic SIM chromatogram
284+
"MS:1001477", # targeted SRM chromatogram
285+
"MS:1001478", # automatic SRM chromatogram
286+
"MS:1001479", # targeted CRM chromatogram
287+
"MS:1001480", # automatic CRM chromatogram
288+
]:
289+
self._chromatogram_type = element.get("name")
290+
break
291+
return self._chromatogram_type
292+
293+
@property
294+
def polarity(self):
295+
"""
296+
Returns the polarity of the chromatogram.
297+
298+
Returns:
299+
polarity (str): polarity (positive scan or negative scan)
300+
"""
301+
if self._polarity is None:
302+
for element in self.element.iter():
303+
if element.tag.endswith("}cvParam"):
304+
accession = element.get("accession")
305+
# Check for polarity accessions
306+
if accession in [
307+
"MS:1000129", # negative scan
308+
"MS:1000130", # positive scan
309+
]:
310+
self._polarity = element.get("name")
311+
break
312+
return self._polarity
313+
314+
@property
315+
def precursor_mz(self):
316+
"""
317+
Returns the precursor m/z value for SRM/MRM chromatograms.
318+
319+
Returns:
320+
precursor_mz (float): precursor m/z value
321+
"""
322+
if self._precursor_mz is None:
323+
precursor = self.element.find(f".//{self.ns}precursor")
324+
if precursor is not None:
325+
isolation_window = precursor.find(f".//{self.ns}isolationWindow")
326+
if isolation_window is not None:
327+
for element in isolation_window.iter():
328+
if element.tag.endswith("}cvParam") and element.get("accession") == "MS:1000827": # isolation window target m/z
329+
self._precursor_mz = float(element.get("value"))
330+
break
331+
return self._precursor_mz
332+
333+
@property
334+
def product_mz(self):
335+
"""
336+
Returns the product m/z value for SRM/MRM chromatograms.
337+
338+
Returns:
339+
product_mz (float): product m/z value
340+
"""
341+
if self._product_mz is None:
342+
product = self.element.find(f".//{self.ns}product")
343+
if product is not None:
344+
isolation_window = product.find(f".//{self.ns}isolationWindow")
345+
if isolation_window is not None:
346+
for element in isolation_window.iter():
347+
if element.tag.endswith("}cvParam") and element.get("accession") == "MS:1000827": # isolation window target m/z
348+
self._product_mz = float(element.get("value"))
349+
break
350+
return self._product_mz
351+
352+
def get_chromatogram_properties(self):
353+
"""
354+
Returns a dictionary with the main properties of the chromatogram.
355+
356+
Returns:
357+
properties (dict): dictionary with chromatogram properties
358+
"""
359+
properties = {
360+
"id": self.ID,
361+
"chromatogram_type": self.chromatogram_type,
362+
"polarity": self.polarity,
363+
"precursor_mz": self.precursor_mz,
364+
"product_mz": self.product_mz,
365+
}
366+
return properties

0 commit comments

Comments
 (0)