diff --git a/CHANGELOG b/CHANGELOG index 4676c663..5d5e120e 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,11 +1,22 @@ -5.0b5 +5.0c0 ----- + - Update the standard ion compositions to be more consistent with the adopted ion type notation. + The `"z+1"` and `"c+1"` ions now represent compositions described as "c-ion plus a hydrogen" and "z-ion plus a hydrogen", + respectively. + The `"z-dot"` and `"c-dot"` notations are retained unchanged for partial backward compatibility, + they are now equivalent to `"z+1"` and `"c+1"`. + + .. warning :: + Make sure to check what ion types you are using in mass and composition calculations! + In short, `z+1`, `z+2`, `z+3` have all been reduced by one hydrogen. + - Support **ProForma 2.1** (`#183 `_ by Joshua Klein). You can calculate compositions for :py:class:`ProForma` objects using :py:meth:`pyteomics.proforma.Proforma.composition` and get m/z with annotated or user-provided charge state using :py:meth:`pyteomics.proforma.Proforma.mz`. You can also iterate through possible peptidoforms when a ProForma sequence is annotated with some ambiguity using :py:meth:`pyteomics.proforma.Proforma.generate_proteoforms`. + - Implement **thread-based parallelism**. Following the introduction of `official free-threading Python implementations `_ users are now able to use theads through the :py:meth:`map` interface of :ref:`indexing parsers`. @@ -27,6 +38,7 @@ :py:func:`auxiliary.set_start_method`, to configure it (`#172 `_). Read more about the new behavior and the rationale :ref:`in the docs `. + - **Drop Python 2 support.** See `#167 `_ for the announcement. - Pyteomics now uses the `implicit namespace mechanism `_. - Fix compatibility with :py:mod:`lxml` 5.4.0 and newer (`#170 `_). diff --git a/pyteomics/auxiliary/structures.py b/pyteomics/auxiliary/structures.py index 4fc3fd32..25e78d06 100644 --- a/pyteomics/auxiliary/structures.py +++ b/pyteomics/auxiliary/structures.py @@ -190,6 +190,9 @@ def __sub__(self, other): result[elem] -= cnt return result + def __neg__(self): + return self * (-1) + def __isub__(self, other): for elem, cnt in other.items(): self[elem] -= cnt diff --git a/pyteomics/mass/mass.py b/pyteomics/mass/mass.py index 0073edae..8fcafdf5 100644 --- a/pyteomics/mass/mass.py +++ b/pyteomics/mass/mass.py @@ -134,6 +134,7 @@ def _parse_isotope_string(label): and standard H- and -OH terminal groups. """ + std_ion_comp = {} """A dict with relative elemental compositions of the standard peptide fragment ions. An elemental composition of a fragment ion is calculated as a @@ -141,6 +142,7 @@ def _parse_isotope_string(label): and the sum of elemental compositions of its constituting amino acid residues. """ + _isotope_string = r'^((?:[A-Z][a-z+]*)|e-|e\*)(?:\[(\d+)\])?$' _atom = r'([A-Z][a-z+]*)(?:\[(\d+)\])?([+-]?\d+)?' _formula = r'^({})*$'.format(_atom) @@ -151,6 +153,15 @@ def _raise_term_label_exception(what='comp'): " in `aa_{0}`.".format(what)) +def _warn_about_ion_type(ion_type, ion_comp): + """Temporarily warn about the changes in affected std_ion_comp entries.""" + if ion_type in {'z+1', 'z+2', 'z+3'} and ion_type in ion_comp and ion_comp[ion_type] == std_ion_comp.get(ion_type): + warnings.warn( + 'The compositions of z+1, z+2 and z+3 ions in `std_ion_comp` have been changed in Pyteomics v5.0 ' + 'to reflect the most common interpretation of these labels. ' + ) + + class Composition(BasicComposition): """ A Composition object stores a chemical composition of a @@ -334,7 +345,9 @@ def __init__(self, *args, **kwargs): ion_comp = kwargs.get('ion_comp', std_ion_comp) if 'ion_type' in kwargs: - self += ion_comp[kwargs['ion_type']] + ion_type = kwargs['ion_type'] + _warn_about_ion_type(ion_type, ion_comp) + self += ion_comp[ion_type] # Charge is not supported in kwargs charge = self['H+'] @@ -537,6 +550,7 @@ def mass(self, **kwargs): 'M-H2O': Composition(formula='H-2O-1'), 'M-NH3': Composition(formula='N-1H-3'), 'a': Composition(formula='H-2O-1' + 'C-1O-1'), + 'a+1': Composition(formula='H-2O-1' + 'C-1O-1' + 'H1'), 'a-H2O': Composition(formula='H-2O-1' + 'C-1O-1' + 'H-2O-1'), 'a-NH3': Composition(formula='H-2O-1' + 'C-1O-1' + 'N-1H-3'), 'b': Composition(formula='H-2O-1'), @@ -550,16 +564,18 @@ def mass(self, **kwargs): 'c-H2O': Composition(formula='H-2O-1' + 'NH3' + 'H-2O-1'), 'c-NH3': Composition(formula='H-2O-1'), 'x': Composition(formula='H-2O-1' + 'CO2'), + 'x+1': Composition(formula='H-2O-1' + 'CO2' + 'H1'), 'x-H2O': Composition(formula='H-2O-1' + 'CO2' + 'H-2O-1'), 'x-NH3': Composition(formula='H-2O-1' + 'CO2' + 'N-1H-3'), 'y': Composition(formula=''), 'y-H2O': Composition(formula='H-2O-1'), 'y-NH3': Composition(formula='N-1H-3'), + 'z-1': Composition(formula='H-2O-1' + 'ON-1H-2'), 'z': Composition(formula='H-2O-1' + 'ON-1H-1'), 'z-dot': Composition(formula='H-2O-1' + 'ON-1'), - 'z+1': Composition(formula='H-2O-1' + 'ON-1H1'), - 'z+2': Composition(formula='H-2O-1' + 'ON-1H2'), - 'z+3': Composition(formula='H-2O-1' + 'ON-1H3'), + 'z+1': Composition(formula='H-2O-1' + 'ON-1'), + 'z+2': Composition(formula='H-2O-1' + 'ON-1H1'), + 'z+3': Composition(formula='H-2O-1' + 'ON-1H2'), 'z-H2O': Composition(formula='H-2O-1' + 'ON-1H-1' + 'H-2O-1'), 'z-NH3': Composition(formula='H-2O-1' + 'ON-1H-1' + 'N-1H-3'), }) @@ -960,11 +976,12 @@ def fast_mass(sequence, ion_type=None, charge=None, **kwargs): mass += mass_data['H'][0][0] * 2 + mass_data['O'][0][0] if ion_type: + ion_comp = kwargs.get('ion_comp', std_ion_comp) try: - icomp = kwargs.get('ion_comp', std_ion_comp)[ion_type] + icomp = ion_comp[ion_type] except KeyError: raise PyteomicsError('Unknown ion type: {}'.format(ion_type)) - + _warn_about_ion_type(ion_type, ion_comp) mass += sum(mass_data[element][0][0] * num for element, num in icomp.items()) if charge: @@ -1043,13 +1060,13 @@ def fast_mass2(sequence, ion_type=None, charge=None, **kwargs): raise PyteomicsError('Unspecified mass for modification: "{}"'.format(e.args[0])) if ion_type: + ion_comp = kwargs.get('ion_comp', std_ion_comp) try: - icomp = kwargs.get('ion_comp', std_ion_comp)[ion_type] + icomp = ion_comp[ion_type] except KeyError: raise PyteomicsError('Unknown ion type: {}'.format(ion_type)) - - mass += sum(mass_data[element][0][0] * num - for element, num in icomp.items()) + _warn_about_ion_type(ion_type, ion_comp) + mass += sum(mass_data[element][0][0] * num for element, num in icomp.items()) if charge: mass = (mass + mass_data['H+'][0][0] * charge) / charge @@ -1230,7 +1247,7 @@ def by_name(self, name, strict=True): The full name of the modification(s). strict : bool, optional If :py:const:`False`, the search will return all modifications - whose full name **contains** `title`, otherwise equality is + whose full name **contains** `name`, otherwise equality is required. :py:const:`True` by default. Returns diff --git a/pyteomics/version.py b/pyteomics/version.py index f486185e..f0033221 100644 --- a/pyteomics/version.py +++ b/pyteomics/version.py @@ -19,7 +19,7 @@ """ -__version__ = '5.0b5' +__version__ = '5.0c0' from collections import namedtuple import re diff --git a/tests/test_mass.py b/tests/test_mass.py index f96de547..184b5a17 100644 --- a/tests/test_mass.py +++ b/tests/test_mass.py @@ -297,6 +297,50 @@ def test_Unimod_methods(self): self.assertEqual(record, db.by_title(rec_title)) self.assertEqual(record, db.by_name(rec_name)) + def test_compare_ion_comp_with_Unimod(self): + db = mass.Unimod(gzip.open('unimod.xml.gz')) + for ion_type in 'abcxz': + with self.subTest(ion_type=ion_type): + unimod_ion = db.by_title(f'{ion_type}-type-ion') + if unimod_ion: + self.assertEqual(unimod_ion['composition'], mass.std_ion_comp[ion_type]) + else: + self.skipTest(f'Saved Unimod does not contain a record for {ion_type}-type-ion') + + def test_ion_complementarity(self): + for pair in [('a+1', 'x+1'), ('b', 'y'), ('c', 'z')]: + with self.subTest(pair=pair): + ion1, ion2 = pair + comp1 = mass.std_ion_comp[ion1] + comp2 = mass.std_ion_comp[ion2] + self.assertEqual(comp1 + comp2, -mass.Composition({'H': 2, 'O': 1})) + + def test_ion_dot_notation(self): + for key in mass.std_ion_comp: + if key.endswith('-dot'): + with self.subTest(key=key): + comp_dot = mass.std_ion_comp[key] + comp_p1 = mass.std_ion_comp[key[:-4] + '+1'] + self.assertEqual(comp_dot, comp_p1) + + def test_ion_comp_consistency(self): + for ion_type, comp in mass.std_ion_comp.items(): + with self.subTest(ion_type=ion_type): + if '-dot' in ion_type: + continue + if '-1' in ion_type: + self.assertEqual(comp + {'H': 1}, mass.std_ion_comp[ion_type[:-2]]) + elif '+1' in ion_type: + self.assertEqual(comp - {'H': 1}, mass.std_ion_comp[ion_type[:-2]]) + elif '+2' in ion_type: + self.assertEqual(comp - {'H': 2}, mass.std_ion_comp[ion_type[:-2]]) + elif '+3' in ion_type: + self.assertEqual(comp - {'H': 3}, mass.std_ion_comp[ion_type[:-2]]) + elif '-H2O' in ion_type: + self.assertEqual(comp + {'H': 2, 'O': 1}, mass.std_ion_comp[ion_type[:-4]]) + elif '-NH3' in ion_type: + self.assertEqual(comp + {'N': 1, 'H': 3}, mass.std_ion_comp[ion_type[:-4]]) + def test_nist_mass(self): self.assertTrue(all(abs(g[0][1] - 1) < 1e-6 for g in mass.nist_mass.values())) for g in mass.nist_mass.values(): diff --git a/tests/unimod.xml.gz b/tests/unimod.xml.gz index 6c90f0b1..6fc5f78f 100644 Binary files a/tests/unimod.xml.gz and b/tests/unimod.xml.gz differ