From 9fc4b96e53a50dad9406f6327a2c875b38343f7e Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Wed, 11 Mar 2026 21:59:20 -0400 Subject: [PATCH 01/16] feature: add proforma.modify_with --- pyteomics/proforma.py | 124 ++++++++++++++++++++++++++++++------------ 1 file changed, 90 insertions(+), 34 deletions(-) diff --git a/pyteomics/proforma.py b/pyteomics/proforma.py index b800dbd0..361d1493 100644 --- a/pyteomics/proforma.py +++ b/pyteomics/proforma.py @@ -1614,6 +1614,43 @@ def is_valid(self, aa: str, n_term: bool, c_term: bool) -> bool: return False return self.aa.upper() == aa.upper() or self.aa is None + @classmethod + def from_str(cls, target: str): + target_lower = target.lower() + if target in VALID_AA: + return cls(target, False, False) + elif target_lower in ("n-term", "c-term"): + n_term = target_lower == "n-term" + c_term = target_lower == "c-term" + return cls(None, n_term, c_term) + elif target_lower.startswith(("n-term:", "c-term:")): + tokens = target.split(":") + if len(tokens) == 2: + if tokens[1] in VALID_AA: + t = tokens[0].lower() + n_term = t == "n-term" + c_term = t == "c-term" + cls(tokens[1], n_term, c_term) + else: + raise PyteomicsError( + "Modification target has an invalid amino acid specific terminal target {1} in {0}".format( + target, + tokens[1] + ) + ) + else: + raise PyteomicsError( + "Modification rule target {0} has an empty amino acid specific terminal target".format( + target + ) + ) + else: + raise PyteomicsError( + "Modification rule target {0} is invalid".format( + target + ) + ) + class ModificationRule(object): '''Define a fixed modification rule which dictates a modification tag is @@ -1652,39 +1689,10 @@ def _validate_targets(self): for target in self.targets: if isinstance(target, ModificationTarget): validated_targets.append(target) - elif target in VALID_AA: - validated_targets.append(ModificationTarget(target, False, False)) - elif target in ("N-term", "C-term"): - n_term = target == "N-term" - c_term = target == "C-term" - validated_targets.append(ModificationTarget(None, n_term, c_term)) - elif target.startswith(("N-term:", "C-term:")): - tokens = target.split(":") - if len(tokens) == 2: - if tokens[1] in VALID_AA: - n_term = tokens[0] == "N-term" - c_term = tokens[0] == "C-term" - validated_targets.append(ModificationTarget(tokens[1], n_term, c_term)) - else: - raise PyteomicsError( - "Modification rule {0} has an invalid amino acid specific terminal target {2} in {1}".format( - self, - target, - tokens[1] - ) - ) - else: - raise PyteomicsError( - "Modification rule {0} has an empty amino acid specific terminal target {1}".format( - self, target - ) - ) - else: - raise PyteomicsError( - "Modification rule {0} has an invalid target {1}".format( - self, target - ) - ) + try: + validated_targets.append(ModificationTarget.from_str(target)) + except PyteomicsError as err: + raise PyteomicsError(f"While parsing {self}, encountered error {err}") from err self.targets = validated_targets @@ -3941,7 +3949,7 @@ def from_unlocalized_rule(cls, tag: TagBase) -> "GeneratorModificationRuleDirect if not mod: return position_constraints = tag.find_tag_type(TagTypeEnum.position_modifier) - targets = [ModificationTarget(v.value) for v in position_constraints] + targets = [v.value for v in position_constraints] colocal_known = bool(tag.find_tag_type(TagTypeEnum.comkp)) colocal_unknown = bool(tag.find_tag_type(TagTypeEnum.comup)) rule = ModificationRule(modification_tag=mod, targets=targets) @@ -3978,6 +3986,54 @@ def from_labile_rule(cls, tag: TagBase) -> "GeneratorModificationRuleDirective": return cls(rule, None, colocal_known, colocal_unknown, limit, labile=True) +def _coerce_string_to_modification(item) -> TagBase: + if isinstance(item, TagBase): + return item.copy() + elif isinstance(item, str): + return TagParser(item)()[0] + else: + raise TypeError(f"Don't know how to coerce {item} of type {type(item)} to a modification") + + +def modify_with(peptide: ProForma, + variable_modifications: Optional[Union[List[TagBase], dict[str, TagBase]]] = None, + fixed_modifications: Optional[Union[List[TagBase], dict[str, TagBase]]] = None, + include_unmodified: bool = True, include_labile: bool = False): + template = peptide.copy() + if variable_modifications: + if isinstance(variable_modifications, list): + template.unlocalized_modifications.extend(map(_coerce_string_to_modification, variable_modifications)) + elif isinstance(variable_modifications, dict): + extra_rules = [] + for target, tag in variable_modifications.items(): + if isinstance(target, str): + target = PositionModifierTag(target) + tag = _coerce_string_to_modification(tag) + tag.extra.append(target) + extra_rules.append(tag) + template.unlocalized_modifications.extend(extra_rules) + else: + raise TypeError(f"Expected variable_modifications to be a list or a dict, got {type(variable_modifications)}") + if fixed_modifications: + if isinstance(fixed_modifications, list): + template.fixed_modifications.extend(map(_coerce_string_to_modification, fixed_modifications)) + elif isinstance(fixed_modifications, dict): + extra_rules = [] + for target, tag in fixed_modifications.items(): + if isinstance(target, str): + target = PositionModifierTag(target) + tag = _coerce_string_to_modification(tag) + tag.extra.append(target) + extra_rules.append(tag) + template.fixed_modifications.extend(extra_rules) + else: + raise TypeError( + f"Expected fixed_modifications to be a list or a dict, got {type(fixed_modifications)}" + ) + + return template.generate_proteoforms(include_unmodified=include_unmodified, include_labile=include_labile) + + class ProteoformCombinator: """ Generate combinations of modification (co)localizations for From 229fb630c448170b2a8287b042d056f2ad0f3077 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Sat, 14 Mar 2026 15:45:43 -0400 Subject: [PATCH 02/16] cleanup docs --- pyteomics/proforma.py | 257 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 209 insertions(+), 48 deletions(-) diff --git a/pyteomics/proforma.py b/pyteomics/proforma.py index 361d1493..dcfdaa40 100644 --- a/pyteomics/proforma.py +++ b/pyteomics/proforma.py @@ -262,6 +262,11 @@ def has_mass(self) -> bool: def has_composition(self) -> bool: return False + def __or__(self, other): + this = self.copy() + this.extra.append(other.copy()) + return this + class GroupLabelBase(TagBase): __slots__ = () @@ -275,6 +280,9 @@ def __str__(self): label = part return '%s' % label + def __hash__(self): + return hash(str(self)) + class PositionLabelTag(GroupLabelBase): '''A tag to mark that a position is involved in a group in some way, but does @@ -1154,16 +1162,38 @@ class GlycanModification(ModificationBase): _tag_type = TagTypeEnum.glycan valid_monosaccharides = { - "Hex": monosaccharide_description(162.0528, Composition("C6H10O5"), 'Hex'), - "HexNAc": monosaccharide_description(203.0793, Composition("C8H13N1O5"), 'HexNAc'), - "HexS": monosaccharide_description(242.009, Composition("C6H10O8S1"), 'HexS'), - "HexP": monosaccharide_description(242.0191, Composition("C6H11O8P1"), 'HexP'), - "HexNAcS": monosaccharide_description(283.0361, Composition("C8H13N1O8S1"), 'HexNAcS'), - "dHex": monosaccharide_description(146.0579, Composition("C6H10O4"), 'dHex'), - "NeuAc": monosaccharide_description(291.0954, Composition("C11H17N1O8"), 'NeuAc'), - "NeuGc": monosaccharide_description(307.0903, Composition("C11H17N1O9"), 'NeuGc'), - "Pen": monosaccharide_description(132.0422, Composition("C5H8O4"), 'Pen'), - "Fuc": monosaccharide_description(146.0579, Composition("C6H10O4"), 'Fuc') + "Hex": monosaccharide_description(162.0528, Composition("C6H10O5"), "Hex"), + "HexNAc": monosaccharide_description( + 203.0793, Composition("C8H13N1O5"), "HexNAc" + ), + "HexS": monosaccharide_description(242.009, Composition("C6H10O8S1"), "HexS"), + "HexP": monosaccharide_description(242.0191, Composition("C6H11O8P1"), "HexP"), + "HexNAcS": monosaccharide_description( + 283.0361, Composition("C8H13N1O8S1"), "HexNAcS" + ), + "dHex": monosaccharide_description(146.0579, Composition("C6H10O4"), "dHex"), + "NeuAc": monosaccharide_description( + 291.0954, Composition("C11H17N1O8"), "NeuAc" + ), + "NeuGc": monosaccharide_description( + 307.0903, Composition("C11H17N1O9"), "NeuGc" + ), + "Pen": monosaccharide_description(132.0422, Composition("C5H8O4"), "Pen"), + "Fuc": monosaccharide_description(146.0579, Composition("C6H10O4"), "Fuc"), + "Kdn": monosaccharide_description( + 250.06886740546, Composition({"C": 9, "H": 14, "O": 8}), "Kdn" + ), + "Kdo": monosaccharide_description( + 220.05830272176, Composition({"C": 8, "H": 12, "O": 7}), "Kdo" + ), + "Phospho": monosaccharide_description( + 79.96633052075, Composition({"P": 1, "O": 3, "H": 1}), "Phospho" + ), + "Sulfo": monosaccharide_description( + 79.95681485867999, + Composition({"S": 1, "O": 3, "H": 0}), + "Sulfo" + ), } valid_monosaccharides['Neu5Ac'] = valid_monosaccharides['NeuAc'] @@ -1173,7 +1203,18 @@ class GlycanModification(ModificationBase): monomer_tokenizer = re.compile( r"|".join(sorted(valid_monosaccharides.keys(), key=len, reverse=True))) - tokenizer = re.compile(r"(%s|[A-Za-z]+)\s*(\d*)\s*" % monomer_tokenizer.pattern) + tokenizer = re.compile( + r"""(?: + (?P%s)| + (?P[A-Za-z]+)| + (?P\{ + [^\}]+? + \}) + ) + \s*(?P\d*)\s*""" + % monomer_tokenizer.pattern, + re.X, + ) @property def monosaccharides(self): @@ -1181,38 +1222,72 @@ def monosaccharides(self): def resolve(self): composite = BasicComposition() - for tok, cnt in self.tokenizer.findall(self.value): + mass = 0 + chemcomp = Composition() + charge = 0 + for hit in self.tokenizer.finditer(self.value): + hit = hit.groupdict() + cnt = hit['count'] + + tok = hit.get('known_name') + base_name = hit.get('base_name') + formula = hit.get('charged_formula') + if cnt: cnt = int(cnt) else: cnt = 1 - if tok not in self.valid_monosaccharides: - parts = self.monomer_tokenizer.findall(tok) + if tok is not None: + if tok not in self.valid_monosaccharides: + parts = self.monomer_tokenizer.findall(tok) + t = 0 + for p in parts: + if p not in self.valid_monosaccharides: + break + t += len(p) + if t != len(tok): + raise ValueError("{tok!r} is not a valid monosaccharide name".format(tok=tok)) + else: + for p in parts: + if p not in self.valid_monosaccharides: + raise UnknownMonosaccharideError(p) + m, c, sym = self.valid_monosaccharides[p] + mass += m * cnt + chemcomp += c * cnt + composite[sym] += cnt + else: + m, c, sym = self.valid_monosaccharides[tok] + mass += m * cnt + chemcomp += c * cnt + composite[sym] += cnt + elif formula is not None: + inner = FormulaModification(formula[1:-1]).resolve() + mass += inner['mass'] * cnt + chemcomp += inner['composition'] * cnt + composite[formula] += cnt + charge += inner['charge'] * cnt + elif base_name is not None: + parts = self.monomer_tokenizer.findall(base_name) t = 0 for p in parts: if p not in self.valid_monosaccharides: break t += len(p) - if t != len(tok): - raise ValueError("{tok!r} is not a valid monosaccharide name".format(tok=tok)) + if t != len(base_name): + raise ValueError( + f"{base_name!r} is not a valid monosaccharide name" + ) else: - for p in parts[:-1]: - sym = self.valid_monosaccharides[p].symbol - composite[sym] += 1 - sym = self.valid_monosaccharides[parts[-1]].symbol - composite[sym] += cnt + for p in parts: + if p not in self.valid_monosaccharides: + raise UnknownMonosaccharideError(p) + m, c, sym = self.valid_monosaccharides[p] + mass += m * cnt + chemcomp += c * cnt + composite[sym] += cnt else: - sym = self.valid_monosaccharides[tok].symbol - composite[sym] += cnt - mass = 0 - chemcomp = Composition() - for key, cnt in composite.items(): - try: - m, c, sym = self.valid_monosaccharides[key] - except KeyError: - raise UnknownMonosaccharideError(key) - mass += m * cnt - chemcomp += c * cnt + raise NotImplementedError(f"I do not know how to decode the impossible, {hit}") + return { "mass": mass, "composition": chemcomp, @@ -3995,22 +4070,104 @@ def _coerce_string_to_modification(item) -> TagBase: raise TypeError(f"Don't know how to coerce {item} of type {type(item)} to a modification") -def modify_with(peptide: ProForma, - variable_modifications: Optional[Union[List[TagBase], dict[str, TagBase]]] = None, - fixed_modifications: Optional[Union[List[TagBase], dict[str, TagBase]]] = None, - include_unmodified: bool = True, include_labile: bool = False): +def peptidoforms( + peptide: Union[ProForma, str], + variable_modifications: Optional[ + Union[ + List[Union[TagBase, str]], + dict[Union[TagBase, str], List[Union[str, TagBase]]], + ] + ] = None, + fixed_modifications: Optional[ + Union[ + List[Union[TagBase, str]], + dict[Union[TagBase, str], List[Union[str, TagBase]]], + ] + ] = None, + include_unmodified: bool = True, + include_labile: bool = False, +) -> Iterator[ProForma]: + """ + Generate the combinatorial cross-product of modifications for ``peptide``, given by + a set of variable and fixed modification rules, as in a classical peptide search engine. + + This is similar to :func:`parser.peptidoforms`, but using :class:`ProForma` as the representation. + This uses ProForma 2.1's position limiting rules to give the caller greater control over how modifications + are applied, if desired. + + Internally, this delegates to :class:`ProteoformCombinator` and would mirror the behavior of embedding all + of the modification rules directly in the sequence and calling :meth:`ProForma.generate_proteoforms`. + + Parameters + ---------- + peptide : :class:`ProForma` or :class:`str` + The base peptide to modify. If a string is provided, it will be parsed with :meth:`ProForma.parse`. + If ``peptide`` itself encodes modification rules or unlocalized modifications of any kind, they **will** + also be applied. + variable_modifications : :class:`list` of :class:`str` or :class:`TagBase` modification rules, or a :class:`dict` mapping :class:`str` or :class:`TagBase` modifications to a list of :class:`str` or :class:`TagBase` targets + The variable modifications that will be combined. If a list is provided, the values are assumed to either + be strings encoding a modification tag in ProForma notation or pre-parsed :class:`TagBase` modifications + with position limiting rules added with ``|`` separators. If a :class:`dict` is provided, keys are assumed + to be :class:`TagBase` modifications, as in the list-case, but the values of those keys are expected to be + :class:`TagBase` position limiters like :class:`PositionModifierTag`, or strings that will be coerced as + such. + fixed_modifications : :class:`list` of :class:`str` or :class:`TagBase` modification rules, or a :class:`dict` mapping :class:`str` or :class:`TagBase` modifications to a list of :class:`str` or :class:`TagBase` targets + The fixed modifications that will be applied to all combinations, even the unmodified version if ``include_unmodified`` + is specified. See ``variable_modifications`` for an explanation of type coercion. + include_unmodified : :class:`bool` + For all non-fixed modifications, include the case where the modification is not included anywhere + include_labile : :class:`bool` + For all labile modifications, include the case where the modification is localized at every possible location + + Yields + ------ + :class:`ProForma` + + Examples + -------- + This example shows how to use the :class:`dict`-based modification rule approach. + + >>> from pyteomics import proforma + >>> isos = proforma.peptidoforms( + ... "EMEVTESPEK", + ... variable_modifications={"Oxidation": ['M']}) + >>> for i in isos: + ... print(i) + EMEVTESPEK + EM[Oxidation|Position:M]EVTESPEK + + Using parsed objects to get the equivalent behavior, and avoids needing to re-parse the rules + on every invocation. + + >>> from pyteomics import proforma + >>> isos = proforma.peptidoforms( + ... ProForma.parse("EMEVTESPEK"), + ... variable_modifications={proforma.GenericModification("Oxidation"): [proforma.PositionModifierTag('M')]}) + >>> for i in isos: + ... print(i) + EMEVTESPEK + EM[Oxidation|Position:M]EVTESPEK + + """ + if isinstance(peptide, str): + peptide = ProForma.parse(peptide) template = peptide.copy() + seen = set() if variable_modifications: if isinstance(variable_modifications, list): template.unlocalized_modifications.extend(map(_coerce_string_to_modification, variable_modifications)) elif isinstance(variable_modifications, dict): extra_rules = [] - for target, tag in variable_modifications.items(): - if isinstance(target, str): - target = PositionModifierTag(target) - tag = _coerce_string_to_modification(tag) - tag.extra.append(target) - extra_rules.append(tag) + for tag, targets in variable_modifications.items(): + seen.clear() + for target in targets: + if isinstance(target, str): + target = PositionModifierTag(target) + if target in seen: + continue + seen.add(target) + tag = _coerce_string_to_modification(tag) + extra_rules.append(tag | target) template.unlocalized_modifications.extend(extra_rules) else: raise TypeError(f"Expected variable_modifications to be a list or a dict, got {type(variable_modifications)}") @@ -4019,12 +4176,16 @@ def modify_with(peptide: ProForma, template.fixed_modifications.extend(map(_coerce_string_to_modification, fixed_modifications)) elif isinstance(fixed_modifications, dict): extra_rules = [] - for target, tag in fixed_modifications.items(): - if isinstance(target, str): - target = PositionModifierTag(target) - tag = _coerce_string_to_modification(tag) - tag.extra.append(target) - extra_rules.append(tag) + for tag, targets in fixed_modifications.items(): + seen.clear() + for target in targets: + if isinstance(target, str): + target = PositionModifierTag(target) + if target in seen: + continue + seen.add(target) + tag = _coerce_string_to_modification(tag) + extra_rules.append(tag | target) template.fixed_modifications.extend(extra_rules) else: raise TypeError( From 473dff836f55a2b4003db3e0169c36cefd330639 Mon Sep 17 00:00:00 2001 From: Lev Levitsky Date: Thu, 19 Mar 2026 14:06:39 +0100 Subject: [PATCH 03/16] Fixes and tests --- pyteomics/proforma.py | 32 ++++++++++++++++---------- tests/test_proforma.py | 52 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 65 insertions(+), 19 deletions(-) diff --git a/pyteomics/proforma.py b/pyteomics/proforma.py index dcfdaa40..bfc98685 100644 --- a/pyteomics/proforma.py +++ b/pyteomics/proforma.py @@ -1705,7 +1705,7 @@ def from_str(cls, target: str): t = tokens[0].lower() n_term = t == "n-term" c_term = t == "c-term" - cls(tokens[1], n_term, c_term) + return cls(tokens[1], n_term, c_term) else: raise PyteomicsError( "Modification target has an invalid amino acid specific terminal target {1} in {0}".format( @@ -1743,9 +1743,9 @@ class ModificationRule(object): modification_tag: TagBase targets: List[ModificationTarget] - def __init__(self, modification_tag: TagBase, targets: Union[ModificationTarget, List[ModificationTarget], None]=None): + def __init__(self, modification_tag: TagBase, targets: Union[ModificationTarget, List[ModificationTarget], List[str], None]=None): self.modification_tag = modification_tag - self.targets = targets + self.targets = targets # type: ignore self._validate_targets() def is_not_specific(self) -> bool: @@ -1764,10 +1764,11 @@ def _validate_targets(self): for target in self.targets: if isinstance(target, ModificationTarget): validated_targets.append(target) - try: - validated_targets.append(ModificationTarget.from_str(target)) - except PyteomicsError as err: - raise PyteomicsError(f"While parsing {self}, encountered error {err}") from err + else: + try: + validated_targets.append(ModificationTarget.from_str(target)) + except PyteomicsError as err: + raise PyteomicsError(f"While parsing {self}, encountered error {err}") from err self.targets = validated_targets @@ -3829,7 +3830,7 @@ def find_tags_by_id(self, tag_id, include_position=True): def tags(self): return [tag for tags_at in [pos[1] for pos in self if pos[1]] for tag in tags_at] - def generate_proteoforms(self, include_unmodified: bool = False, include_labile: bool = False) -> Iterator["ProForma"]: + def proteoforms(self, include_unmodified: bool = False, include_labile: bool = False) -> Iterator["ProForma"]: """ Generate combinatorial localizations of modifications defined on this ProForma sequence. @@ -3848,6 +3849,8 @@ def generate_proteoforms(self, include_unmodified: bool = False, include_labile: """ return iter(ProteoformCombinator(self, include_unmodified=include_unmodified, include_labile=include_labile)) + peptidoforms = proteoforms + def copy(self) -> "ProForma": sequence = [] for (aa, tags) in self: @@ -4096,7 +4099,7 @@ def peptidoforms( are applied, if desired. Internally, this delegates to :class:`ProteoformCombinator` and would mirror the behavior of embedding all - of the modification rules directly in the sequence and calling :meth:`ProForma.generate_proteoforms`. + of the modification rules directly in the sequence and calling :meth:`ProForma.proteoforms`. Parameters ---------- @@ -4104,14 +4107,16 @@ def peptidoforms( The base peptide to modify. If a string is provided, it will be parsed with :meth:`ProForma.parse`. If ``peptide`` itself encodes modification rules or unlocalized modifications of any kind, they **will** also be applied. - variable_modifications : :class:`list` of :class:`str` or :class:`TagBase` modification rules, or a :class:`dict` mapping :class:`str` or :class:`TagBase` modifications to a list of :class:`str` or :class:`TagBase` targets + variable_modifications : :class:`list` of :class:`str` or :class:`TagBase` modification rules, or a :class:`dict` + mapping :class:`str` or :class:`TagBase` modifications to a list of :class:`str` or :class:`TagBase` targets The variable modifications that will be combined. If a list is provided, the values are assumed to either be strings encoding a modification tag in ProForma notation or pre-parsed :class:`TagBase` modifications with position limiting rules added with ``|`` separators. If a :class:`dict` is provided, keys are assumed to be :class:`TagBase` modifications, as in the list-case, but the values of those keys are expected to be :class:`TagBase` position limiters like :class:`PositionModifierTag`, or strings that will be coerced as such. - fixed_modifications : :class:`list` of :class:`str` or :class:`TagBase` modification rules, or a :class:`dict` mapping :class:`str` or :class:`TagBase` modifications to a list of :class:`str` or :class:`TagBase` targets + fixed_modifications : :class:`list` of :class:`str` or :class:`TagBase` modification rules, or a :class:`dict` + mapping :class:`str` or :class:`TagBase` modifications to a list of :class:`str` or :class:`TagBase` targets The fixed modifications that will be applied to all combinations, even the unmodified version if ``include_unmodified`` is specified. See ``variable_modifications`` for an explanation of type coercion. include_unmodified : :class:`bool` @@ -4192,7 +4197,10 @@ def peptidoforms( f"Expected fixed_modifications to be a list or a dict, got {type(fixed_modifications)}" ) - return template.generate_proteoforms(include_unmodified=include_unmodified, include_labile=include_labile) + return template.proteoforms(include_unmodified=include_unmodified, include_labile=include_labile) + + +proteoforms = peptidoforms class ProteoformCombinator: diff --git a/tests/test_proforma.py b/tests/test_proforma.py index c5a5e072..18f1879e 100644 --- a/tests/test_proforma.py +++ b/tests/test_proforma.py @@ -1,13 +1,15 @@ from os import path import unittest import pickle +import math import pyteomics pyteomics.__path__ = [path.abspath( path.join(path.dirname(__file__), path.pardir, 'pyteomics'))] from pyteomics.proforma import ( PSIModModification, ProForma, TaggedInterval, parse, MassModification, ProFormaError, TagTypeEnum, ModificationRule, StableIsotope, GenericModification, Composition, to_proforma, ModificationMassNotFoundError, - AdductParser, ChargeState, + UnimodModification, PSIModModification, ModificationTarget, + AdductParser, ChargeState, proteoforms, _coerce_string_to_modification, std_aa_comp, obo_cache, process_tag_tokens) @@ -487,15 +489,25 @@ def test_range(self): pf = ProForma.parse(seq) for include_unmodified in [False, True]: with self.subTest(include_unmodified=include_unmodified): - proteoforms = list(pf.generate_proteoforms(include_unmodified=include_unmodified)) + proteoforms = list(pf.proteoforms(include_unmodified=include_unmodified)) self.assertEqual(len(proteoforms), 2 + include_unmodified) # Phospho on T or S (+ no phospho if include_unmodified) + def test_unlocalized_position_list_and_count(self): + seq = "[Phospho|Position:S|Position:T]^2?EMEVTSESPEK" + nsites = 3 + k = 2 + pf = ProForma.parse(seq) + for include_unmodified in [False, True]: + with self.subTest(include_unmodified=include_unmodified): + proteoforms = list(pf.proteoforms(include_unmodified=include_unmodified)) + self.assertEqual(len(proteoforms), math.comb(nsites, k) + include_unmodified) # Phospho on T or S (+ no phospho if include_unmodified) + def test_localization_tag(self): seq = "EMEVT[#g1]S[#g1]ES[Phospho#g1]PEK" pf = ProForma.parse(seq) for include_unmodified in [False, True]: with self.subTest(include_unmodified=include_unmodified): - proteoforms = list(pf.generate_proteoforms(include_unmodified=include_unmodified)) + proteoforms = list(pf.proteoforms(include_unmodified=include_unmodified)) self.assertEqual(len(proteoforms), 3 + include_unmodified) def test_unlocalized_modification(self): @@ -503,23 +515,49 @@ def test_unlocalized_modification(self): pf = ProForma.parse(seq) for include_unmodified in [False, True]: with self.subTest(include_unmodified=include_unmodified): - proteoforms = list(pf.generate_proteoforms(include_unmodified=include_unmodified)) + proteoforms = list(pf.proteoforms(include_unmodified=include_unmodified)) self.assertEqual(len(proteoforms), len(pf) + include_unmodified) def test_comup_stacking(self): seq = "[Phospho|Position:S|Position:T|comup|Limit:2]^2?EMEVTESPEK" pf = ProForma.parse(seq) - proteoforms = list(pf.generate_proteoforms()) + proteoforms = list(pf.proteoforms()) self.assertEqual(len(proteoforms), 4) - proteoforms = list(pf.generate_proteoforms(True)) + proteoforms = list(pf.proteoforms(True)) self.assertEqual(len(proteoforms), 9) def test_labile(self): seq = "{Phosphpo}EMEVTESPEK" pf = ProForma.parse(seq) - proteoforms = list(pf.generate_proteoforms(False, True)) + proteoforms = list(pf.proteoforms(False, True)) self.assertEqual(len(proteoforms), 11) +class ProteoformsFunctionTest(unittest.TestCase): + def test_proteoforms(self): + seq = "EMEV(TS)[Phospho]ESPEK" + pf = ProForma.parse(seq) + for include_unmodified in [False, True]: + with self.subTest(include_unmodified=include_unmodified): + forms = list(proteoforms(pf, include_unmodified=include_unmodified)) + self.assertEqual(len(forms), 2 + include_unmodified) # Phospho on T or S (+ no phospho if include_unmodified) + + def test_coerce_modification(self): + for s, m in [("Phospho", GenericModification("Phospho")), + ("UNIMOD:21", UnimodModification("21")), + ("MOD:00046", PSIModModification("00046"))]: + with self.subTest(s=s): + self.assertEqual(_coerce_string_to_modification(s), m) + + def test_modification_target_from_str(self): + for s, t in [("S", ModificationTarget('S')), + ("T", ModificationTarget('T')), + ("N-term", ModificationTarget(None, True, False)), + ("C-term", ModificationTarget(None, False, True)), + ("N-term:K", ModificationTarget('K', True, False)), + ("C-term:Y", ModificationTarget('Y', False, True))]: + with self.subTest(s=s): + self.assertEqual(ModificationTarget.from_str(s), t) + if __name__ == '__main__': unittest.main() From c7af7c8025aed9f2392b13f79ddb089c1e1cfbf6 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Fri, 27 Mar 2026 23:29:00 -0400 Subject: [PATCH 04/16] fix previous tests --- pyteomics/proforma.py | 12 ++++++++++++ tests/test_proforma.py | 14 ++++++++++---- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/pyteomics/proforma.py b/pyteomics/proforma.py index bfc98685..8d8a3053 100644 --- a/pyteomics/proforma.py +++ b/pyteomics/proforma.py @@ -3960,6 +3960,7 @@ class GeneratorModificationRuleDirective: colocal_unknown: bool = False limit: int = 1 labile: bool = False + token: Optional[ModificationToken] = None def __init__(self, rule, region=None, colocal_known: bool = False, colocal_unknown: bool = False, limit: int = 1, labile: bool = False): self.rule = rule @@ -3968,6 +3969,7 @@ def __init__(self, rule, region=None, colocal_known: bool = False, colocal_unkno self.colocal_unknown = colocal_unknown self.limit = limit self.labile = labile + self.token = getattr(self.rule.modification_tag, "key", None) def create(self) -> TagBase: return self.rule.modification_tag.copy() @@ -4313,13 +4315,17 @@ def generate(self): positions_for = [None] + positions_for position_choices.append(positions_for) + seen = set() + for slots in itertools.product(*position_choices): + state = set() template = self.template.copy() valid = True labile_remaining = [] for rule, idx in zip(self.variable_rules, slots): if idx is None: if rule.labile: + state.add((None, rule.token)) labile_remaining.append(rule.create()) continue if idx not in rule.find_positions(template): @@ -4332,7 +4338,13 @@ def generate(self): tag._generated = ModificationSourceType.Generated tags.append(tag) template[idx] = (aa, tags) + state.add((idx, rule.token)) if valid: + state = frozenset(state) + if state in seen: + continue + else: + seen.add(state) if labile_remaining: template.labile_modifications = labile_remaining yield template diff --git a/tests/test_proforma.py b/tests/test_proforma.py index 18f1879e..7b143566 100644 --- a/tests/test_proforma.py +++ b/tests/test_proforma.py @@ -500,7 +500,13 @@ def test_unlocalized_position_list_and_count(self): for include_unmodified in [False, True]: with self.subTest(include_unmodified=include_unmodified): proteoforms = list(pf.proteoforms(include_unmodified=include_unmodified)) - self.assertEqual(len(proteoforms), math.comb(nsites, k) + include_unmodified) # Phospho on T or S (+ no phospho if include_unmodified) + if not include_unmodified: + self.assertEqual(len(proteoforms), math.comb(nsites, k)) # Phospho on T or S (+ no phospho if include_unmodified) + else: + self.assertEqual( + len(proteoforms), + sum([math.comb(nsites, i) for i in range(k + 1)]), + ) def test_localization_tag(self): seq = "EMEVT[#g1]S[#g1]ES[Phospho#g1]PEK" @@ -522,12 +528,12 @@ def test_comup_stacking(self): seq = "[Phospho|Position:S|Position:T|comup|Limit:2]^2?EMEVTESPEK" pf = ProForma.parse(seq) proteoforms = list(pf.proteoforms()) - self.assertEqual(len(proteoforms), 4) + self.assertEqual(len(proteoforms), 3) proteoforms = list(pf.proteoforms(True)) - self.assertEqual(len(proteoforms), 9) + self.assertEqual(len(proteoforms), 4) def test_labile(self): - seq = "{Phosphpo}EMEVTESPEK" + seq = "{Phospho}EMEVTESPEK" pf = ProForma.parse(seq) proteoforms = list(pf.proteoforms(False, True)) self.assertEqual(len(proteoforms), 11) From 03a3cffa6c0a6401a23e943e2bab9155c3128500 Mon Sep 17 00:00:00 2001 From: Lev Levitsky Date: Sat, 28 Mar 2026 14:50:32 +0100 Subject: [PATCH 05/16] Add a test for proteoforms function with a dict --- pyteomics/proforma.py | 4 ++-- tests/test_proforma.py | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/pyteomics/proforma.py b/pyteomics/proforma.py index 8d8a3053..b7d6a224 100644 --- a/pyteomics/proforma.py +++ b/pyteomics/proforma.py @@ -4147,10 +4147,10 @@ def peptidoforms( on every invocation. >>> from pyteomics import proforma - >>> isos = proforma.peptidoforms( + >>> pforms = proforma.peptidoforms( ... ProForma.parse("EMEVTESPEK"), ... variable_modifications={proforma.GenericModification("Oxidation"): [proforma.PositionModifierTag('M')]}) - >>> for i in isos: + >>> for i in pforms: ... print(i) EMEVTESPEK EM[Oxidation|Position:M]EVTESPEK diff --git a/tests/test_proforma.py b/tests/test_proforma.py index 7b143566..793a8385 100644 --- a/tests/test_proforma.py +++ b/tests/test_proforma.py @@ -565,5 +565,14 @@ def test_modification_target_from_str(self): with self.subTest(s=s): self.assertEqual(ModificationTarget.from_str(s), t) + def test_from_simple_dict(self): + seq = "EMEVTSESPEK" + variable_mods = {"Phospho": ["S", "T"]} + pf = ProForma.parse(seq) + for include_unmodified in [False, True]: + with self.subTest(include_unmodified=include_unmodified): + forms = list(proteoforms(pf, variable_modifications=variable_mods, include_unmodified=include_unmodified)) + self.assertEqual(len(forms), 3 + include_unmodified) # Phospho on T or S (+ no phospho if include_unmodified) + if __name__ == '__main__': unittest.main() From f82b8a42b8f8dd6feccaf82222d3af8fcb5f09cd Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Sat, 28 Mar 2026 23:33:08 -0400 Subject: [PATCH 06/16] Patch the failing unrelated test, Massive's PROXI server seems to be down --- tests/test_usi.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_usi.py b/tests/test_usi.py index e3aeafe4..733103eb 100644 --- a/tests/test_usi.py +++ b/tests/test_usi.py @@ -4,7 +4,7 @@ pyteomics.__path__ = [path.abspath(path.join(path.dirname(__file__), path.pardir, 'pyteomics'))] import unittest -from urllib.error import HTTPError +from urllib.error import HTTPError, URLError from pyteomics.usi import USI, proxi, AGGREGATOR_KEY from pyteomics.auxiliary import PyteomicsError @@ -28,6 +28,9 @@ def test_request(self): usi_str = "mzspec:MSV000085202:210320_SARS_CoV_2_T:scan:131256" try: response = proxi(usi_str, backend='massive') + except URLError as e: + if e.errno in {110, }: + self.skipTest(f"PROXI service is unavailable: ({e})") except HTTPError as e: if e.code in {500, 502, 503, 504}: self.skipTest(f'PROXI service is unavailable ({e.code})') From 2e8adb4f026161791f134a140e9f7f4d4fac2b12 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Sat, 28 Mar 2026 23:45:39 -0400 Subject: [PATCH 07/16] Add expand_rules argument --- pyteomics/proforma.py | 144 ++++++++++++++++++++++++++++++++++++----- tests/test_proforma.py | 25 ++++++- 2 files changed, 150 insertions(+), 19 deletions(-) diff --git a/pyteomics/proforma.py b/pyteomics/proforma.py index b7d6a224..bb77fd91 100644 --- a/pyteomics/proforma.py +++ b/pyteomics/proforma.py @@ -13,8 +13,8 @@ import itertools import re import warnings -from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, ClassVar, Sequence, Tuple, Type, Union, Generic, TypeVar, NamedTuple -from collections import deque, namedtuple +from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, ClassVar, Sequence, Tuple, Type, Union, Generic, TypeVar, NamedTuple, DefaultDict +from collections import Counter, deque, namedtuple from functools import partial from itertools import chain from array import array as _array @@ -337,6 +337,12 @@ class PositionModifierTag(TagBase): def __init__(self, value, extra=None, group_id=None): super().__init__(TagTypeEnum.position_modifier, value, extra, group_id) + def __eq__(self, other): + return super().__eq__(other) + + def __hash__(self): + return hash(self.value) + def _format_main(self): return f"{self.prefix_name}:{self.value}" @@ -3830,7 +3836,7 @@ def find_tags_by_id(self, tag_id, include_position=True): def tags(self): return [tag for tags_at in [pos[1] for pos in self if pos[1]] for tag in tags_at] - def proteoforms(self, include_unmodified: bool = False, include_labile: bool = False) -> Iterator["ProForma"]: + def proteoforms(self, include_unmodified: bool = False, include_labile: bool = False, expand_rules: bool = False) -> Iterator["ProForma"]: """ Generate combinatorial localizations of modifications defined on this ProForma sequence. @@ -3838,16 +3844,21 @@ def proteoforms(self, include_unmodified: bool = False, include_labile: bool = F ---------- include_unmodified : :class:`bool` For all non-fixed modifications, include the case where the modification is not included anywhere. This is equivalent to - how variable modification rules are applied in search engines. + how variable modification rules are applied in search engines. It still respects the number of copies of modifications included + in the input. See ``expand_rules``. include_labile : :class:`bool` For all labile modifications, include the case where the modification is localized at every possible location or as a remaining labile modification. + expand_rules : :class:`bool` + For all variable modifications, allow any number of copies of the modification to be included in the result. + This mirrors the expected behavior of many search engines' variable modification rules, though it is not strictly + how ProForma's rules work. This forces ``include_unmodified`` to be :const:`True`. Yields ------ :class:`ProForma` """ - return iter(ProteoformCombinator(self, include_unmodified=include_unmodified, include_labile=include_labile)) + return iter(ProteoformCombinator(self, include_unmodified=include_unmodified, include_labile=include_labile, expand_rules=expand_rules)) peptidoforms = proteoforms @@ -3962,6 +3973,24 @@ class GeneratorModificationRuleDirective: labile: bool = False token: Optional[ModificationToken] = None + def __eq__(self, other): + if other is None: + return False + return ( + self.rule == other.rule and + self.region == other.region and + self.colocal_known == other.colocal_known and + self.colocal_unknown == other.colocal_unknown and + self.limit == other.limit and + self.labile == other.labile + ) + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self.token) + def __init__(self, rule, region=None, colocal_known: bool = False, colocal_unknown: bool = False, limit: int = 1, labile: bool = False): self.rule = rule self.region = region @@ -4091,6 +4120,7 @@ def peptidoforms( ] = None, include_unmodified: bool = True, include_labile: bool = False, + expand_rules: bool = False, ) -> Iterator[ProForma]: """ Generate the combinatorial cross-product of modifications for ``peptide``, given by @@ -4125,6 +4155,11 @@ def peptidoforms( For all non-fixed modifications, include the case where the modification is not included anywhere include_labile : :class:`bool` For all labile modifications, include the case where the modification is localized at every possible location + expand_rules : :class:`bool` + For all variable modifications, allow any number of copies of the modification to be included in the result. + This mirrors the expected behavior of many search engines' variable modification rules, though it is not strictly + how ProForma's rules work. This forces :attr:`include_unmodified` to be :const:`True`. This behavior is currently + incompatible with modification stacking with ``Limit`` and ``CoMUP`` tag modifiers. Yields ------ @@ -4155,6 +4190,30 @@ def peptidoforms( EMEVTESPEK EM[Oxidation|Position:M]EVTESPEK + To expand rules so that they might apply to as many positions as are available, as is often done when + build a combinatorial search space, use the ``expand_rules`` argument. + >>> from pyteomics import proforma + >>> isos = proforma.peptidoforms( + ... "EMEVTESPEK", + ... variable_modifications={"Oxidation": ['M'], "Phospho": ['S', 'T']}, expand_rules=True) + >>> for i in isos: + ... print(i) + EM[Oxidation|Position:M]EVT[Phospho|Position:T]S[Phospho|Position:S]ES[Phospho|Position:S]PEK + EMEVT[Phospho|Position:T]S[Phospho|Position:S]ES[Phospho|Position:S]PEK + EM[Oxidation|Position:M]EVTS[Phospho|Position:S]ES[Phospho|Position:S]PEK + EMEVTS[Phospho|Position:S]ES[Phospho|Position:S]PEK + EM[Oxidation|Position:M]EVT[Phospho|Position:T]S[Phospho|Position:S]ESPEK + EMEVT[Phospho|Position:T]S[Phospho|Position:S]ESPEK + EM[Oxidation|Position:M]EVTS[Phospho|Position:S]ESPEK + EMEVTS[Phospho|Position:S]ESPEK + EM[Oxidation|Position:M]EVT[Phospho|Position:T]SES[Phospho|Position:S]PEK + EMEVT[Phospho|Position:T]SES[Phospho|Position:S]PEK + EM[Oxidation|Position:M]EVTSES[Phospho|Position:S]PEK + EMEVTSES[Phospho|Position:S]PEK + EM[Oxidation|Position:M]EVT[Phospho|Position:T]SESPEK + EMEVT[Phospho|Position:T]SESPEK + EM[Oxidation|Position:M]EVTSESPEK + EMEVTSESPEK """ if isinstance(peptide, str): peptide = ProForma.parse(peptide) @@ -4199,7 +4258,11 @@ def peptidoforms( f"Expected fixed_modifications to be a list or a dict, got {type(fixed_modifications)}" ) - return template.proteoforms(include_unmodified=include_unmodified, include_labile=include_labile) + return template.proteoforms( + include_unmodified=include_unmodified, + include_labile=include_labile, + expand_rules=expand_rules, + ) proteoforms = peptidoforms @@ -4218,19 +4281,31 @@ class ProteoformCombinator: variable_rules: list[:class:`GeneratorModificationRuleDirective`] The rules to apply in combinations to the template sequence include_unmodified : :class:`bool` - For all non-fixed modifications, include the case where the modification is not included anywhere + For all non-fixed modifications, include the case where the modification is not included anywhere. This is equivalent to + how variable modification rules are applied in search engines. It still respects the number of copies of modifications included + in the input. See :attr:`expand_rules`. include_labile : :class:`bool` For all labile modifications, include the case where the modification is localized at every possible location + expand_rules : :class:`bool` + For all variable modifications, allow any number of copies of the modification to be included in the result. + This mirrors the expected behavior of many search engines' variable modification rules, though it is not strictly + how ProForma's rules work. This forces :attr:`include_unmodified` to be :const:`True`. This behavior is currently + incompatible with modification stacking with ``Limit`` and ``CoMUP`` tag modifiers. """ template: ProForma include_unmodified: bool include_labile: bool variable_rules: List[GeneratorModificationRuleDirective] - def __init__(self, base_proteoform: ProForma, include_unmodified: bool=False, include_labile: bool=False): + def __init__(self, base_proteoform: ProForma, include_unmodified: bool=False, include_labile: bool=False, expand_rules: bool=False): + if expand_rules: + if not include_unmodified: + warnings.warn("Forcing `include_unmodified = True` from `expand_rules`") + include_unmodified = True self.template = base_proteoform.copy() self.include_unmodified = include_unmodified self.include_labile = include_labile + self.expand_rules = expand_rules self.variable_rules = [] self._extract_rules() self._apply_fixed_modifications() @@ -4305,7 +4380,30 @@ def __iter__(self): def __next__(self): return next(self._iter) - def generate(self): + def _invert_position_rules(self, rules: List[GeneratorModificationRuleDirective], positions: List[List[Optional[int]]]) -> List[List[Tuple[Optional[int], GeneratorModificationRuleDirective]]]: + index = DefaultDict(list) + + for rule, positions in zip(rules, positions): + if rule.labile: + index[None].append(rule) + for position in positions: + if position is None: + continue + index[position].append(rule) + + if self.include_unmodified: + for k in index: + index[k].append(None) + + stacks = [] + for idx, options in index.items(): + stack = [] + for opt in options: + stack.append((idx, opt)) + stacks.append(stack) + return stacks + + def _build_position_map(self): position_choices = [] for rule in self.variable_rules: positions_for = rule.find_positions(self.template) @@ -4314,18 +4412,31 @@ def generate(self): elif self.include_unmodified or not positions_for: positions_for = [None] + positions_for position_choices.append(positions_for) + return position_choices + + def _build_modification_iter(self) -> Iterator[List[Tuple[Optional[int], Optional[GeneratorModificationRuleDirective]]]]: + position_choices = self._build_position_map() + if self.expand_rules: + return itertools.product(*self._invert_position_rules( + self.variable_rules, position_choices + )) + else: + return map(lambda pos: zip(pos, self.variable_rules), itertools.product(*position_choices)) + def generate(self): seen = set() - - for slots in itertools.product(*position_choices): - state = set() + for slots in self._build_modification_iter(): + state = Counter() template = self.template.copy() valid = True labile_remaining = [] - for rule, idx in zip(self.variable_rules, slots): + + for idx, rule in slots: + if rule is None: + continue if idx is None: if rule.labile: - state.add((None, rule.token)) + state[((None, rule.token))] += 1 labile_remaining.append(rule.create()) continue if idx not in rule.find_positions(template): @@ -4338,9 +4449,10 @@ def generate(self): tag._generated = ModificationSourceType.Generated tags.append(tag) template[idx] = (aa, tags) - state.add((idx, rule.token)) + state[((idx, rule.token))] += 1 + if valid: - state = frozenset(state) + state = frozenset(state.items()) if state in seen: continue else: diff --git a/tests/test_proforma.py b/tests/test_proforma.py index 793a8385..55bff515 100644 --- a/tests/test_proforma.py +++ b/tests/test_proforma.py @@ -10,7 +10,7 @@ ModificationRule, StableIsotope, GenericModification, Composition, to_proforma, ModificationMassNotFoundError, UnimodModification, PSIModModification, ModificationTarget, AdductParser, ChargeState, proteoforms, _coerce_string_to_modification, - std_aa_comp, obo_cache, process_tag_tokens) + std_aa_comp, obo_cache, process_tag_tokens, peptidoforms) class ProFormaTest(unittest.TestCase): @@ -530,7 +530,7 @@ def test_comup_stacking(self): proteoforms = list(pf.proteoforms()) self.assertEqual(len(proteoforms), 3) proteoforms = list(pf.proteoforms(True)) - self.assertEqual(len(proteoforms), 4) + self.assertEqual(len(proteoforms), 6) def test_labile(self): seq = "{Phospho}EMEVTESPEK" @@ -538,6 +538,22 @@ def test_labile(self): proteoforms = list(pf.proteoforms(False, True)) self.assertEqual(len(proteoforms), 11) + def test_expand(self): + seq = "EMEVTSESPEK" + variable_mods = {"Phospho": ["S", "T"], "Oxidation": "M"} + pf = ProForma.parse(seq) + combos = peptidoforms( + pf, + variable_modifications=variable_mods, + expand_rules=True, + ) + variants = list(combos) + self.assertEqual(len(variants), 16) + self.assertEqual( + 8, + sum(['Oxidation' in str(p) for p in variants]) + ) + class ProteoformsFunctionTest(unittest.TestCase): def test_proteoforms(self): @@ -572,7 +588,10 @@ def test_from_simple_dict(self): for include_unmodified in [False, True]: with self.subTest(include_unmodified=include_unmodified): forms = list(proteoforms(pf, variable_modifications=variable_mods, include_unmodified=include_unmodified)) - self.assertEqual(len(forms), 3 + include_unmodified) # Phospho on T or S (+ no phospho if include_unmodified) + if include_unmodified: + self.assertEqual(len(forms), 6) + else: + self.assertEqual(len(forms), 2) if __name__ == '__main__': unittest.main() From c9f5f01f0ce3276f7820614f1abd813b45f8572f Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Sun, 29 Mar 2026 00:02:55 -0400 Subject: [PATCH 08/16] chore: rework USI test patch --- tests/test_usi.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_usi.py b/tests/test_usi.py index 733103eb..30cbaa9b 100644 --- a/tests/test_usi.py +++ b/tests/test_usi.py @@ -28,14 +28,16 @@ def test_request(self): usi_str = "mzspec:MSV000085202:210320_SARS_CoV_2_T:scan:131256" try: response = proxi(usi_str, backend='massive') - except URLError as e: - if e.errno in {110, }: - self.skipTest(f"PROXI service is unavailable: ({e})") except HTTPError as e: if e.code in {500, 502, 503, 504}: self.skipTest(f'PROXI service is unavailable ({e.code})') else: raise + except URLError as e: + if e.errno in {110, }: + self.skipTest(f"PROXI service is unavailable: ({e})") + else: + raise assert set(usi_proxi_data.keys()) <= set(response.keys()) From 770b2c2efbd0e32cfaacd69899445604d109eb13 Mon Sep 17 00:00:00 2001 From: Lev Levitsky Date: Sun, 29 Mar 2026 13:29:20 +0200 Subject: [PATCH 09/16] Only warn if expand_rules is true and include_unmodified is explicitly false --- pyteomics/proforma.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/pyteomics/proforma.py b/pyteomics/proforma.py index bb77fd91..e23ae126 100644 --- a/pyteomics/proforma.py +++ b/pyteomics/proforma.py @@ -13,8 +13,8 @@ import itertools import re import warnings -from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, ClassVar, Sequence, Tuple, Type, Union, Generic, TypeVar, NamedTuple, DefaultDict -from collections import Counter, deque, namedtuple +from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, ClassVar, Sequence, Tuple, Type, Union, Generic, TypeVar, NamedTuple +from collections import Counter, deque, namedtuple, defaultdict from functools import partial from itertools import chain from array import array as _array @@ -4297,11 +4297,14 @@ class ProteoformCombinator: include_labile: bool variable_rules: List[GeneratorModificationRuleDirective] - def __init__(self, base_proteoform: ProForma, include_unmodified: bool=False, include_labile: bool=False, expand_rules: bool=False): + def __init__(self, base_proteoform: ProForma, include_unmodified: Optional[bool] = False, include_labile: bool = False, expand_rules: bool = False): if expand_rules: - if not include_unmodified: + if include_unmodified is False: warnings.warn("Forcing `include_unmodified = True` from `expand_rules`") include_unmodified = True + else: + include_unmodified = bool(include_unmodified) + self.template = base_proteoform.copy() self.include_unmodified = include_unmodified self.include_labile = include_labile @@ -4381,12 +4384,12 @@ def __next__(self): return next(self._iter) def _invert_position_rules(self, rules: List[GeneratorModificationRuleDirective], positions: List[List[Optional[int]]]) -> List[List[Tuple[Optional[int], GeneratorModificationRuleDirective]]]: - index = DefaultDict(list) + index = defaultdict(list) - for rule, positions in zip(rules, positions): + for rule, position_list in zip(rules, positions): if rule.labile: index[None].append(rule) - for position in positions: + for position in position_list: if position is None: continue index[position].append(rule) From a6f7f7f5fb211af59f5639b31f943436e30456a9 Mon Sep 17 00:00:00 2001 From: Lev Levitsky Date: Sun, 29 Mar 2026 13:29:41 +0200 Subject: [PATCH 10/16] Update test_from_simple_dict --- tests/test_proforma.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_proforma.py b/tests/test_proforma.py index 55bff515..44862446 100644 --- a/tests/test_proforma.py +++ b/tests/test_proforma.py @@ -584,14 +584,18 @@ def test_modification_target_from_str(self): def test_from_simple_dict(self): seq = "EMEVTSESPEK" variable_mods = {"Phospho": ["S", "T"]} + nsites = seq.count("S") + seq.count("T") pf = ProForma.parse(seq) for include_unmodified in [False, True]: with self.subTest(include_unmodified=include_unmodified): forms = list(proteoforms(pf, variable_modifications=variable_mods, include_unmodified=include_unmodified)) if include_unmodified: - self.assertEqual(len(forms), 6) + self.assertEqual(len(forms), nsites + 1) else: - self.assertEqual(len(forms), 2) + self.assertEqual(len(forms), nsites) + with self.subTest(expand_rules=True): + forms = list(proteoforms(pf, variable_modifications=variable_mods, expand_rules=True)) + self.assertEqual(len(forms), 2 ** nsites) if __name__ == '__main__': unittest.main() From a40f893633241e88ad9435ccde5b1b60c1022cf2 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Sun, 29 Mar 2026 22:31:57 -0400 Subject: [PATCH 11/16] refactor `expand_rules` --- pyteomics/proforma.py | 54 +++++++++++++++++++----------------------- tests/test_proforma.py | 7 ++++-- 2 files changed, 30 insertions(+), 31 deletions(-) diff --git a/pyteomics/proforma.py b/pyteomics/proforma.py index bb77fd91..a319862e 100644 --- a/pyteomics/proforma.py +++ b/pyteomics/proforma.py @@ -3836,7 +3836,7 @@ def find_tags_by_id(self, tag_id, include_position=True): def tags(self): return [tag for tags_at in [pos[1] for pos in self if pos[1]] for tag in tags_at] - def proteoforms(self, include_unmodified: bool = False, include_labile: bool = False, expand_rules: bool = False) -> Iterator["ProForma"]: + def proteoforms(self, include_unmodified: bool = False, include_labile: bool = False) -> Iterator["ProForma"]: """ Generate combinatorial localizations of modifications defined on this ProForma sequence. @@ -3849,16 +3849,12 @@ def proteoforms(self, include_unmodified: bool = False, include_labile: bool = F include_labile : :class:`bool` For all labile modifications, include the case where the modification is localized at every possible location or as a remaining labile modification. - expand_rules : :class:`bool` - For all variable modifications, allow any number of copies of the modification to be included in the result. - This mirrors the expected behavior of many search engines' variable modification rules, though it is not strictly - how ProForma's rules work. This forces ``include_unmodified`` to be :const:`True`. Yields ------ :class:`ProForma` """ - return iter(ProteoformCombinator(self, include_unmodified=include_unmodified, include_labile=include_labile, expand_rules=expand_rules)) + return iter(ProteoformCombinator(self, include_unmodified=include_unmodified, include_labile=include_labile)) peptidoforms = proteoforms @@ -4158,8 +4154,7 @@ def peptidoforms( expand_rules : :class:`bool` For all variable modifications, allow any number of copies of the modification to be included in the result. This mirrors the expected behavior of many search engines' variable modification rules, though it is not strictly - how ProForma's rules work. This forces :attr:`include_unmodified` to be :const:`True`. This behavior is currently - incompatible with modification stacking with ``Limit`` and ``CoMUP`` tag modifiers. + how ProForma's rules work. This forces :attr:`include_unmodified` to be :const:`True`. Yields ------ @@ -4217,23 +4212,40 @@ def peptidoforms( """ if isinstance(peptide, str): peptide = ProForma.parse(peptide) + if expand_rules: + include_unmodified = True template = peptide.copy() seen = set() if variable_modifications: if isinstance(variable_modifications, list): - template.unlocalized_modifications.extend(map(_coerce_string_to_modification, variable_modifications)) + extra_rules = [] + for rule in map(_coerce_string_to_modification, variable_modifications): + if expand_rules: + parsed_rule = GeneratorModificationRuleDirective.from_unlocalized_rule( + rule + ) + extra_rules.extend([rule] * len(parsed_rule.find_positions(template))) + else: + extra_rules.append(rule) + template.unlocalized_modifications.extend(extra_rules) elif isinstance(variable_modifications, dict): extra_rules = [] for tag, targets in variable_modifications.items(): seen.clear() + tag = _coerce_string_to_modification(tag) for target in targets: if isinstance(target, str): target = PositionModifierTag(target) if target in seen: continue seen.add(target) - tag = _coerce_string_to_modification(tag) - extra_rules.append(tag | target) + tag = tag | target + if expand_rules: + rule = GeneratorModificationRuleDirective.from_unlocalized_rule(tag) + n = len(rule.find_positions(peptide)) + extra_rules.extend([tag] * n) + else: + extra_rules.append(tag) template.unlocalized_modifications.extend(extra_rules) else: raise TypeError(f"Expected variable_modifications to be a list or a dict, got {type(variable_modifications)}") @@ -4261,7 +4273,6 @@ def peptidoforms( return template.proteoforms( include_unmodified=include_unmodified, include_labile=include_labile, - expand_rules=expand_rules, ) @@ -4286,26 +4297,16 @@ class ProteoformCombinator: in the input. See :attr:`expand_rules`. include_labile : :class:`bool` For all labile modifications, include the case where the modification is localized at every possible location - expand_rules : :class:`bool` - For all variable modifications, allow any number of copies of the modification to be included in the result. - This mirrors the expected behavior of many search engines' variable modification rules, though it is not strictly - how ProForma's rules work. This forces :attr:`include_unmodified` to be :const:`True`. This behavior is currently - incompatible with modification stacking with ``Limit`` and ``CoMUP`` tag modifiers. """ template: ProForma include_unmodified: bool include_labile: bool variable_rules: List[GeneratorModificationRuleDirective] - def __init__(self, base_proteoform: ProForma, include_unmodified: bool=False, include_labile: bool=False, expand_rules: bool=False): - if expand_rules: - if not include_unmodified: - warnings.warn("Forcing `include_unmodified = True` from `expand_rules`") - include_unmodified = True + def __init__(self, base_proteoform: ProForma, include_unmodified: bool=False, include_labile: bool=False): self.template = base_proteoform.copy() self.include_unmodified = include_unmodified self.include_labile = include_labile - self.expand_rules = expand_rules self.variable_rules = [] self._extract_rules() self._apply_fixed_modifications() @@ -4416,12 +4417,7 @@ def _build_position_map(self): def _build_modification_iter(self) -> Iterator[List[Tuple[Optional[int], Optional[GeneratorModificationRuleDirective]]]]: position_choices = self._build_position_map() - if self.expand_rules: - return itertools.product(*self._invert_position_rules( - self.variable_rules, position_choices - )) - else: - return map(lambda pos: zip(pos, self.variable_rules), itertools.product(*position_choices)) + return map(lambda pos: zip(pos, self.variable_rules), itertools.product(*position_choices)) def generate(self): seen = set() diff --git a/tests/test_proforma.py b/tests/test_proforma.py index 55bff515..12399ea5 100644 --- a/tests/test_proforma.py +++ b/tests/test_proforma.py @@ -589,9 +589,12 @@ def test_from_simple_dict(self): with self.subTest(include_unmodified=include_unmodified): forms = list(proteoforms(pf, variable_modifications=variable_mods, include_unmodified=include_unmodified)) if include_unmodified: - self.assertEqual(len(forms), 6) + self.assertEqual(len(forms), 4) else: - self.assertEqual(len(forms), 2) + self.assertEqual(len(forms), 3) + + forms = list(proteoforms(pf, variable_modifications=variable_mods, expand_rules=True)) + self.assertEqual(len(forms), 8) if __name__ == '__main__': unittest.main() From 4736e32c3737258d5afd8f4436fefe47da6d05aa Mon Sep 17 00:00:00 2001 From: Lev Levitsky Date: Mon, 30 Mar 2026 13:08:51 +0200 Subject: [PATCH 12/16] Expand tests --- tests/test_proforma.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/tests/test_proforma.py b/tests/test_proforma.py index 6dbc6ffd..8340b1db 100644 --- a/tests/test_proforma.py +++ b/tests/test_proforma.py @@ -558,11 +558,12 @@ def test_expand(self): class ProteoformsFunctionTest(unittest.TestCase): def test_proteoforms(self): seq = "EMEV(TS)[Phospho]ESPEK" + nsites = 2 # length of the range pf = ProForma.parse(seq) for include_unmodified in [False, True]: with self.subTest(include_unmodified=include_unmodified): forms = list(proteoforms(pf, include_unmodified=include_unmodified)) - self.assertEqual(len(forms), 2 + include_unmodified) # Phospho on T or S (+ no phospho if include_unmodified) + self.assertEqual(len(forms), nsites + include_unmodified) # Phospho on T or S (+ no phospho if include_unmodified) def test_coerce_modification(self): for s, m in [("Phospho", GenericModification("Phospho")), @@ -590,12 +591,35 @@ def test_from_simple_dict(self): with self.subTest(include_unmodified=include_unmodified): forms = list(proteoforms(pf, variable_modifications=variable_mods, include_unmodified=include_unmodified)) if include_unmodified: - self.assertEqual(len(forms), 4) + self.assertEqual(len(forms), nsites + 1) # Phospho on T or S + no phospho else: - self.assertEqual(len(forms), 3) + self.assertEqual(len(forms), nsites) # Phospho on T or S forms = list(proteoforms(pf, variable_modifications=variable_mods, expand_rules=True)) - self.assertEqual(len(forms), 8) + self.assertEqual(len(forms), 2 ** nsites) # all combinations of phospho / no phospho on each S or T + + def test_from_str(self): + seq = "EMEVTSESPEK" + variable_mods = ["Phospho|Position:S|Position:T"] + nsites = seq.count("S") + seq.count("T") + pf = ProForma.parse(seq) + for include_unmodified in [False, True]: + with self.subTest(include_unmodified=include_unmodified): + forms = list(proteoforms(pf, variable_modifications=variable_mods, include_unmodified=include_unmodified)) + if include_unmodified: + self.assertEqual(len(forms), nsites + 1) # Phospho on T or S + no phospho + else: + self.assertEqual(len(forms), nsites) # Phospho on T or S + forms = list(proteoforms(pf, variable_modifications=variable_mods, expand_rules=True)) + self.assertEqual(len(forms), 2 ** nsites) # all combinations of phospho / no phospho on each S or T + + def test_expand_mods_from_list(self): + seq = "EMEVTSESPEK" + variable_mods = ["Phospho|Position:S", "Phospho|Position:T"] + nsites = seq.count("S") + seq.count("T") + pf = ProForma.parse(seq) + forms = list(proteoforms(pf, variable_modifications=variable_mods, expand_rules=True)) + self.assertEqual(len(forms), 2 ** nsites) # all combinations of phospho on 0, 1, or 2 of the S or T if __name__ == '__main__': unittest.main() From cc29c349ba628faa2782daf373e2db5ab893e807 Mon Sep 17 00:00:00 2001 From: Lev Levitsky Date: Tue, 14 Apr 2026 12:33:50 +0200 Subject: [PATCH 13/16] Skip the PROXI test on timeout --- tests/test_usi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_usi.py b/tests/test_usi.py index 30cbaa9b..b703e79a 100644 --- a/tests/test_usi.py +++ b/tests/test_usi.py @@ -34,7 +34,7 @@ def test_request(self): else: raise except URLError as e: - if e.errno in {110, }: + if getattr(e.reason, 'errno', None) in {110}: self.skipTest(f"PROXI service is unavailable: ({e})") else: raise From 9254a8eeb379bc0d4363aa3ed13a53146976b2ac Mon Sep 17 00:00:00 2001 From: Lev Levitsky Date: Tue, 14 Apr 2026 18:13:00 +0200 Subject: [PATCH 14/16] Make test code slightly more descriptive --- CHANGELOG | 8 ++++--- pyteomics/proforma.py | 8 +++---- pyteomics/version.py | 2 +- tests/test_proforma.py | 54 +++++++++++++++++++++++------------------- 4 files changed, 40 insertions(+), 32 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 5d5e120e..e8b1aaeb 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,4 +1,4 @@ -5.0c0 +5.0c1 ----- - Update the standard ion compositions to be more consistent with the adopted ion type notation. @@ -14,8 +14,10 @@ - Support **ProForma 2.1** (`#183 `_ by Joshua Klein). You can calculate compositions for :py:class:`ProForma` objects using :py:meth:`pyteomics.proforma.Proforma.composition` and get m/z with annotated or user-provided charge state using :py:meth:`pyteomics.proforma.Proforma.mz`. - You can also iterate through possible peptidoforms when a ProForma sequence is annotated with some ambiguity using - :py:meth:`pyteomics.proforma.Proforma.generate_proteoforms`. + + - You can also iterate through possible peptidoforms when a ProForma sequence is annotated with some ambiguity using + :py:meth:`pyteomics.proforma.Proforma.proteoforms` and apply additional modification specifications to any ProForma sequence + using :py:func:`pyteomics.proforma.proteoforms` (`#196 `_ by Joshua Klein). - Implement **thread-based parallelism**. Following the introduction of `official free-threading Python implementations `_ diff --git a/pyteomics/proforma.py b/pyteomics/proforma.py index 6bb56359..8bb13682 100644 --- a/pyteomics/proforma.py +++ b/pyteomics/proforma.py @@ -4148,9 +4148,9 @@ def peptidoforms( The fixed modifications that will be applied to all combinations, even the unmodified version if ``include_unmodified`` is specified. See ``variable_modifications`` for an explanation of type coercion. include_unmodified : :class:`bool` - For all non-fixed modifications, include the case where the modification is not included anywhere + For all non-fixed modifications, include the case where the modification is not included anywhere. include_labile : :class:`bool` - For all labile modifications, include the case where the modification is localized at every possible location + For all labile modifications, include the case where the modification is localized at every possible location. expand_rules : :class:`bool` For all variable modifications, allow any number of copies of the modification to be included in the result. This mirrors the expected behavior of many search engines' variable modification rules, though it is not strictly @@ -4290,13 +4290,13 @@ class ProteoformCombinator: template: :class:`ProForma` The template sequence to apply any combination of rules to variable_rules: list[:class:`GeneratorModificationRuleDirective`] - The rules to apply in combinations to the template sequence + The rules to apply in combinations to the template sequence. include_unmodified : :class:`bool` For all non-fixed modifications, include the case where the modification is not included anywhere. This is equivalent to how variable modification rules are applied in search engines. It still respects the number of copies of modifications included in the input. See :attr:`expand_rules`. include_labile : :class:`bool` - For all labile modifications, include the case where the modification is localized at every possible location + For all labile modifications, include the case where the modification is localized at every possible location. """ template: ProForma include_unmodified: bool diff --git a/pyteomics/version.py b/pyteomics/version.py index f0033221..7e7b0f5a 100644 --- a/pyteomics/version.py +++ b/pyteomics/version.py @@ -19,7 +19,7 @@ """ -__version__ = '5.0c0' +__version__ = '5.0c1' from collections import namedtuple import re diff --git a/tests/test_proforma.py b/tests/test_proforma.py index 8340b1db..00ec21a7 100644 --- a/tests/test_proforma.py +++ b/tests/test_proforma.py @@ -493,28 +493,29 @@ def test_range(self): self.assertEqual(len(proteoforms), 2 + include_unmodified) # Phospho on T or S (+ no phospho if include_unmodified) def test_unlocalized_position_list_and_count(self): - seq = "[Phospho|Position:S|Position:T]^2?EMEVTSESPEK" - nsites = 3 k = 2 + seq = f"[Phospho|Position:S|Position:T]^{k}?EMEVTSESPEK" + nsites = seq.partition('?')[2].count('S') + seq.partition('?')[2].count('T') pf = ProForma.parse(seq) for include_unmodified in [False, True]: with self.subTest(include_unmodified=include_unmodified): proteoforms = list(pf.proteoforms(include_unmodified=include_unmodified)) if not include_unmodified: - self.assertEqual(len(proteoforms), math.comb(nsites, k)) # Phospho on T or S (+ no phospho if include_unmodified) + self.assertEqual(len(proteoforms), math.comb(nsites, k)) # Phospho on T or S, exactly `k` times else: self.assertEqual( len(proteoforms), - sum([math.comb(nsites, i) for i in range(k + 1)]), + sum([math.comb(nsites, i) for i in range(k + 1)]), # Phospho on T or S, anywhere from 0 to `k` times ) def test_localization_tag(self): seq = "EMEVT[#g1]S[#g1]ES[Phospho#g1]PEK" + nsites = seq.count('#g1') pf = ProForma.parse(seq) for include_unmodified in [False, True]: with self.subTest(include_unmodified=include_unmodified): proteoforms = list(pf.proteoforms(include_unmodified=include_unmodified)) - self.assertEqual(len(proteoforms), 3 + include_unmodified) + self.assertEqual(len(proteoforms), nsites + include_unmodified) def test_unlocalized_modification(self): seq = "[Phospho]?EMEVTSESPEK" @@ -525,10 +526,15 @@ def test_unlocalized_modification(self): self.assertEqual(len(proteoforms), len(pf) + include_unmodified) def test_comup_stacking(self): - seq = "[Phospho|Position:S|Position:T|comup|Limit:2]^2?EMEVTESPEK" + k = 2 # number of modifications to combine + limit = 2 # stack limit + seq = f"[Phospho|Position:S|Position:T|comup|Limit:{limit}]^{k}?EMEVTESPEK" + nsites = seq.partition('?')[2].count('S') + seq.partition('?')[2].count('T') + self.assertGreaterEqual(nsites * limit, k) # otherwise we can't place `k` mods even with stacking + effective_limit = min(limit, k) # if limit >= k, then we can just treat it as a normal combinatorial expansion pf = ProForma.parse(seq) proteoforms = list(pf.proteoforms()) - self.assertEqual(len(proteoforms), 3) + self.assertEqual(len(proteoforms), math.comb(nsites + effective_limit - 1, k)) # number of ways to place `k` indistinguishable mods on `nsites` distinguishable sites with a stack limit of `effective_limit` proteoforms = list(pf.proteoforms(True)) self.assertEqual(len(proteoforms), 6) @@ -536,23 +542,7 @@ def test_labile(self): seq = "{Phospho}EMEVTESPEK" pf = ProForma.parse(seq) proteoforms = list(pf.proteoforms(False, True)) - self.assertEqual(len(proteoforms), 11) - - def test_expand(self): - seq = "EMEVTSESPEK" - variable_mods = {"Phospho": ["S", "T"], "Oxidation": "M"} - pf = ProForma.parse(seq) - combos = peptidoforms( - pf, - variable_modifications=variable_mods, - expand_rules=True, - ) - variants = list(combos) - self.assertEqual(len(variants), 16) - self.assertEqual( - 8, - sum(['Oxidation' in str(p) for p in variants]) - ) + self.assertEqual(len(proteoforms), len(pf) + 1) # all possible sites and the form where phospho is kept as labile class ProteoformsFunctionTest(unittest.TestCase): @@ -598,6 +588,22 @@ def test_from_simple_dict(self): forms = list(proteoforms(pf, variable_modifications=variable_mods, expand_rules=True)) self.assertEqual(len(forms), 2 ** nsites) # all combinations of phospho / no phospho on each S or T + def test_expand(self): + seq = "EMEVTSESPEK" + variable_mods = {"Phospho": ["S", "T"], "Oxidation": "M"} + pf = ProForma.parse(seq) + combos = peptidoforms( + pf, + variable_modifications=variable_mods, + expand_rules=True, + ) + variants = list(combos) + self.assertEqual(len(variants), 16) + self.assertEqual( + 8, + sum(['Oxidation' in str(p) for p in variants]) + ) + def test_from_str(self): seq = "EMEVTSESPEK" variable_mods = ["Phospho|Position:S|Position:T"] From b4f2a6cd20a8a16f5c39649801dd3d919e3645a1 Mon Sep 17 00:00:00 2001 From: Lev Levitsky Date: Tue, 14 Apr 2026 18:49:25 +0200 Subject: [PATCH 15/16] Add support for CoMUP with expand_rules --- pyteomics/proforma.py | 10 +++++----- tests/test_proforma.py | 20 +++++++++++++++++++- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/pyteomics/proforma.py b/pyteomics/proforma.py index 8bb13682..da92cbfe 100644 --- a/pyteomics/proforma.py +++ b/pyteomics/proforma.py @@ -4045,7 +4045,7 @@ def from_tagged_modification(cls, tag: TagBase) -> "GeneratorModificationRuleDir rule = ModificationRule(tag, []) colocal_known = bool(tag.find_tag_type(TagTypeEnum.comkp)) colocal_unknown = bool(tag.find_tag_type(TagTypeEnum.comup)) - limit = max([t.value for t in tag.find_tag_type(TagTypeEnum.limit)] + [1]) + limit = max([int(t.value) for t in tag.find_tag_type(TagTypeEnum.limit)] + [1]) return cls(rule, None, colocal_known, colocal_unknown, limit) @classmethod @@ -4058,7 +4058,7 @@ def from_unlocalized_rule(cls, tag: TagBase) -> "GeneratorModificationRuleDirect colocal_known = bool(tag.find_tag_type(TagTypeEnum.comkp)) colocal_unknown = bool(tag.find_tag_type(TagTypeEnum.comup)) rule = ModificationRule(modification_tag=mod, targets=targets) - limit = max([t.value for t in tag.find_tag_type(TagTypeEnum.limit)] + [1]) + limit = max([int(t.value) for t in tag.find_tag_type(TagTypeEnum.limit)] + [1]) return cls(rule, None, colocal_known, colocal_unknown, limit) @classmethod @@ -4073,7 +4073,7 @@ def from_region_rule(cls, region: TaggedInterval) -> List['GeneratorModification colocal_known = bool(tag.find_tag_type(TagTypeEnum.comkp)) colocal_unknown = bool(tag.find_tag_type(TagTypeEnum.comup)) rule = ModificationRule(modification_tag=mod, targets=targets) - limit = max([t.value for t in tag.find_tag_type(TagTypeEnum.limit)] + [1]) + limit = max([int(t.value) for t in tag.find_tag_type(TagTypeEnum.limit)] + [1]) rules.append(cls(rule, region, colocal_known, colocal_unknown, limit)) return rules @@ -4087,7 +4087,7 @@ def from_labile_rule(cls, tag: TagBase) -> "GeneratorModificationRuleDirective": colocal_known = bool(tag.find_tag_type(TagTypeEnum.comkp)) colocal_unknown = bool(tag.find_tag_type(TagTypeEnum.comup)) rule = ModificationRule(modification_tag=mod, targets=targets) - limit = max([t.value for t in tag.find_tag_type(TagTypeEnum.limit)] + [1]) + limit = max([int(t.value) for t in tag.find_tag_type(TagTypeEnum.limit)] + [1]) return cls(rule, None, colocal_known, colocal_unknown, limit, labile=True) @@ -4224,7 +4224,7 @@ def peptidoforms( parsed_rule = GeneratorModificationRuleDirective.from_unlocalized_rule( rule ) - extra_rules.extend([rule] * len(parsed_rule.find_positions(template))) + extra_rules.extend([rule] * len(parsed_rule.find_positions(template) * parsed_rule.limit)) else: extra_rules.append(rule) template.unlocalized_modifications.extend(extra_rules) diff --git a/tests/test_proforma.py b/tests/test_proforma.py index 00ec21a7..19795df0 100644 --- a/tests/test_proforma.py +++ b/tests/test_proforma.py @@ -536,7 +536,7 @@ def test_comup_stacking(self): proteoforms = list(pf.proteoforms()) self.assertEqual(len(proteoforms), math.comb(nsites + effective_limit - 1, k)) # number of ways to place `k` indistinguishable mods on `nsites` distinguishable sites with a stack limit of `effective_limit` proteoforms = list(pf.proteoforms(True)) - self.assertEqual(len(proteoforms), 6) + self.assertEqual(len(proteoforms), sum([math.comb(nsites + min(limit, i) - 1, i) for i in range(k + 1)])) # number of ways to place anywhere from 0 to `k` indistinguishable mods on `nsites` distinguishable sites with a stack limit of `effective_limit` def test_labile(self): seq = "{Phospho}EMEVTESPEK" @@ -627,5 +627,23 @@ def test_expand_mods_from_list(self): forms = list(proteoforms(pf, variable_modifications=variable_mods, expand_rules=True)) self.assertEqual(len(forms), 2 ** nsites) # all combinations of phospho on 0, 1, or 2 of the S or T + def test_expand_mods_from_dict(self): + seq = "EMEVTSESPEK" + variable_mods = {"Phospho": ["S", "T"]} + nsites = seq.count("S") + seq.count("T") + pf = ProForma.parse(seq) + forms = list(proteoforms(pf, variable_modifications=variable_mods, expand_rules=True)) + self.assertEqual(len(forms), 2 ** nsites) # all combinations of phospho on 0, 1, or 2 of the S or T + + def test_expand_mods_comup(self): + seq = "EMEVTSESPEK" + limit = 2 + variable_mods = [f"Phospho|Position:S|Position:T|comup|Limit:{limit}"] + nsites = seq.count("S") + seq.count("T") + pf = ProForma.parse(seq) + forms = list(proteoforms(pf, variable_modifications=variable_mods, expand_rules=True)) + self.assertEqual(len(forms), (limit + 1) ** nsites) # all combinations of 0 to `limit` phosphos on each S or T + + if __name__ == '__main__': unittest.main() From c4c0d58de442fc1f05b3d92728fd417b7c68a16c Mon Sep 17 00:00:00 2001 From: Lev Levitsky Date: Tue, 14 Apr 2026 18:55:52 +0200 Subject: [PATCH 16/16] Roll back unneeded value type conversions --- pyteomics/proforma.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyteomics/proforma.py b/pyteomics/proforma.py index da92cbfe..13db903c 100644 --- a/pyteomics/proforma.py +++ b/pyteomics/proforma.py @@ -4045,7 +4045,7 @@ def from_tagged_modification(cls, tag: TagBase) -> "GeneratorModificationRuleDir rule = ModificationRule(tag, []) colocal_known = bool(tag.find_tag_type(TagTypeEnum.comkp)) colocal_unknown = bool(tag.find_tag_type(TagTypeEnum.comup)) - limit = max([int(t.value) for t in tag.find_tag_type(TagTypeEnum.limit)] + [1]) + limit = max([t.value for t in tag.find_tag_type(TagTypeEnum.limit)] + [1]) return cls(rule, None, colocal_known, colocal_unknown, limit) @classmethod @@ -4058,7 +4058,7 @@ def from_unlocalized_rule(cls, tag: TagBase) -> "GeneratorModificationRuleDirect colocal_known = bool(tag.find_tag_type(TagTypeEnum.comkp)) colocal_unknown = bool(tag.find_tag_type(TagTypeEnum.comup)) rule = ModificationRule(modification_tag=mod, targets=targets) - limit = max([int(t.value) for t in tag.find_tag_type(TagTypeEnum.limit)] + [1]) + limit = max([t.value for t in tag.find_tag_type(TagTypeEnum.limit)] + [1]) return cls(rule, None, colocal_known, colocal_unknown, limit) @classmethod @@ -4073,7 +4073,7 @@ def from_region_rule(cls, region: TaggedInterval) -> List['GeneratorModification colocal_known = bool(tag.find_tag_type(TagTypeEnum.comkp)) colocal_unknown = bool(tag.find_tag_type(TagTypeEnum.comup)) rule = ModificationRule(modification_tag=mod, targets=targets) - limit = max([int(t.value) for t in tag.find_tag_type(TagTypeEnum.limit)] + [1]) + limit = max([t.value for t in tag.find_tag_type(TagTypeEnum.limit)] + [1]) rules.append(cls(rule, region, colocal_known, colocal_unknown, limit)) return rules @@ -4087,7 +4087,7 @@ def from_labile_rule(cls, tag: TagBase) -> "GeneratorModificationRuleDirective": colocal_known = bool(tag.find_tag_type(TagTypeEnum.comkp)) colocal_unknown = bool(tag.find_tag_type(TagTypeEnum.comup)) rule = ModificationRule(modification_tag=mod, targets=targets) - limit = max([int(t.value) for t in tag.find_tag_type(TagTypeEnum.limit)] + [1]) + limit = max([t.value for t in tag.find_tag_type(TagTypeEnum.limit)] + [1]) return cls(rule, None, colocal_known, colocal_unknown, limit, labile=True)