From 9fc4b96e53a50dad9406f6327a2c875b38343f7e Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Wed, 11 Mar 2026 21:59:20 -0400
Subject: [PATCH 01/16] feature: add proforma.modify_with

---
 pyteomics/proforma.py | 124 ++++++++++++++++++++++++++++++------------
 1 file changed, 90 insertions(+), 34 deletions(-)

diff --git a/pyteomics/proforma.py b/pyteomics/proforma.py
index b800dbd0..361d1493 100644
--- a/pyteomics/proforma.py
+++ b/pyteomics/proforma.py
@@ -1614,6 +1614,43 @@ def is_valid(self, aa: str, n_term: bool, c_term: bool) -> bool:
             return False
         return self.aa.upper() == aa.upper() or self.aa is None
 
+    @classmethod
+    def from_str(cls, target: str):
+        target_lower = target.lower()
+        if target in VALID_AA:
+            return cls(target, False, False)
+        elif target_lower in ("n-term", "c-term"):
+            n_term = target_lower == "n-term"
+            c_term = target_lower == "c-term"
+            return cls(None, n_term, c_term)
+        elif target_lower.startswith(("n-term:", "c-term:")):
+            tokens = target.split(":")
+            if len(tokens) == 2:
+                if tokens[1] in VALID_AA:
+                    t = tokens[0].lower()
+                    n_term = t == "n-term"
+                    c_term = t == "c-term"
+                    cls(tokens[1], n_term, c_term)
+                else:
+                    raise PyteomicsError(
+                        "Modification target has an invalid amino acid specific terminal target {1} in {0}".format(
+                            target,
+                            tokens[1]
+                        )
+                    )
+            else:
+                raise PyteomicsError(
+                    "Modification rule target {0} has an empty amino acid specific terminal target".format(
+                        target
+                    )
+                )
+        else:
+            raise PyteomicsError(
+                "Modification rule target {0} is invalid".format(
+                    target
+                )
+            )
+
 
 class ModificationRule(object):
     '''Define a fixed modification rule which dictates a modification tag is
@@ -1652,39 +1689,10 @@ def _validate_targets(self):
         for target in self.targets:
             if isinstance(target, ModificationTarget):
                 validated_targets.append(target)
-            elif target in VALID_AA:
-                validated_targets.append(ModificationTarget(target, False, False))
-            elif target in ("N-term", "C-term"):
-                n_term = target == "N-term"
-                c_term = target == "C-term"
-                validated_targets.append(ModificationTarget(None, n_term, c_term))
-            elif target.startswith(("N-term:", "C-term:")):
-                tokens = target.split(":")
-                if len(tokens) == 2:
-                    if tokens[1] in VALID_AA:
-                        n_term = tokens[0] == "N-term"
-                        c_term = tokens[0] == "C-term"
-                        validated_targets.append(ModificationTarget(tokens[1], n_term, c_term))
-                    else:
-                        raise PyteomicsError(
-                            "Modification rule {0} has an invalid amino acid specific terminal target {2} in {1}".format(
-                                self,
-                                target,
-                                tokens[1]
-                            )
-                        )
-                else:
-                    raise PyteomicsError(
-                        "Modification rule {0} has an empty amino acid specific terminal target {1}".format(
-                            self, target
-                        )
-                    )
-            else:
-                raise PyteomicsError(
-                    "Modification rule {0} has an invalid target {1}".format(
-                        self, target
-                    )
-                )
+            try:
+                validated_targets.append(ModificationTarget.from_str(target))
+            except PyteomicsError as err:
+                raise PyteomicsError(f"While parsing {self}, encountered error {err}") from err
 
         self.targets = validated_targets
 
@@ -3941,7 +3949,7 @@ def from_unlocalized_rule(cls, tag: TagBase) -> "GeneratorModificationRuleDirect
         if not mod:
             return
         position_constraints = tag.find_tag_type(TagTypeEnum.position_modifier)
-        targets = [ModificationTarget(v.value) for v in position_constraints]
+        targets = [v.value for v in position_constraints]
         colocal_known = bool(tag.find_tag_type(TagTypeEnum.comkp))
         colocal_unknown = bool(tag.find_tag_type(TagTypeEnum.comup))
         rule = ModificationRule(modification_tag=mod, targets=targets)
@@ -3978,6 +3986,54 @@ def from_labile_rule(cls, tag: TagBase) -> "GeneratorModificationRuleDirective":
         return cls(rule, None, colocal_known, colocal_unknown, limit, labile=True)
 
 
+def _coerce_string_to_modification(item) -> TagBase:
+    if isinstance(item, TagBase):
+        return item.copy()
+    elif isinstance(item, str):
+        return TagParser(item)()[0]
+    else:
+        raise TypeError(f"Don't know how to coerce {item} of type {type(item)} to a modification")
+
+
+def modify_with(peptide: ProForma,
+                variable_modifications: Optional[Union[List[TagBase], dict[str, TagBase]]] = None,
+                fixed_modifications: Optional[Union[List[TagBase], dict[str, TagBase]]] = None,
+                include_unmodified: bool = True, include_labile: bool = False):
+    template = peptide.copy()
+    if variable_modifications:
+        if isinstance(variable_modifications, list):
+            template.unlocalized_modifications.extend(map(_coerce_string_to_modification, variable_modifications))
+        elif isinstance(variable_modifications, dict):
+            extra_rules = []
+            for target, tag in variable_modifications.items():
+                if isinstance(target, str):
+                    target = PositionModifierTag(target)
+                tag = _coerce_string_to_modification(tag)
+                tag.extra.append(target)
+                extra_rules.append(tag)
+            template.unlocalized_modifications.extend(extra_rules)
+        else:
+            raise TypeError(f"Expected variable_modifications to be a list or a dict, got {type(variable_modifications)}")
+    if fixed_modifications:
+        if isinstance(fixed_modifications, list):
+            template.fixed_modifications.extend(map(_coerce_string_to_modification, fixed_modifications))
+        elif isinstance(fixed_modifications, dict):
+            extra_rules = []
+            for target, tag in fixed_modifications.items():
+                if isinstance(target, str):
+                    target = PositionModifierTag(target)
+                tag = _coerce_string_to_modification(tag)
+                tag.extra.append(target)
+                extra_rules.append(tag)
+            template.fixed_modifications.extend(extra_rules)
+        else:
+            raise TypeError(
+                f"Expected fixed_modifications to be a list or a dict, got {type(fixed_modifications)}"
+            )
+
+    return template.generate_proteoforms(include_unmodified=include_unmodified, include_labile=include_labile)
+
+
 class ProteoformCombinator:
     """
     Generate combinations of modification (co)localizations for

From 229fb630c448170b2a8287b042d056f2ad0f3077 Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Sat, 14 Mar 2026 15:45:43 -0400
Subject: [PATCH 02/16] cleanup docs

---
 pyteomics/proforma.py | 257 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 209 insertions(+), 48 deletions(-)

diff --git a/pyteomics/proforma.py b/pyteomics/proforma.py
index 361d1493..dcfdaa40 100644
--- a/pyteomics/proforma.py
+++ b/pyteomics/proforma.py
@@ -262,6 +262,11 @@ def has_mass(self) -> bool:
     def has_composition(self) -> bool:
         return False
 
+    def __or__(self, other):
+        this = self.copy()
+        this.extra.append(other.copy())
+        return this
+
 
 class GroupLabelBase(TagBase):
     __slots__ = ()
@@ -275,6 +280,9 @@ def __str__(self):
             label = part
         return '%s' % label
 
+    def __hash__(self):
+        return hash(str(self))
+
 
 class PositionLabelTag(GroupLabelBase):
     '''A tag to mark that a position is involved in a group in some way, but does
@@ -1154,16 +1162,38 @@ class GlycanModification(ModificationBase):
     _tag_type = TagTypeEnum.glycan
 
     valid_monosaccharides = {
-        "Hex": monosaccharide_description(162.0528, Composition("C6H10O5"), 'Hex'),
-        "HexNAc": monosaccharide_description(203.0793, Composition("C8H13N1O5"), 'HexNAc'),
-        "HexS": monosaccharide_description(242.009, Composition("C6H10O8S1"), 'HexS'),
-        "HexP": monosaccharide_description(242.0191, Composition("C6H11O8P1"), 'HexP'),
-        "HexNAcS": monosaccharide_description(283.0361, Composition("C8H13N1O8S1"), 'HexNAcS'),
-        "dHex": monosaccharide_description(146.0579, Composition("C6H10O4"), 'dHex'),
-        "NeuAc": monosaccharide_description(291.0954, Composition("C11H17N1O8"), 'NeuAc'),
-        "NeuGc": monosaccharide_description(307.0903, Composition("C11H17N1O9"), 'NeuGc'),
-        "Pen": monosaccharide_description(132.0422, Composition("C5H8O4"), 'Pen'),
-        "Fuc": monosaccharide_description(146.0579, Composition("C6H10O4"), 'Fuc')
+        "Hex": monosaccharide_description(162.0528, Composition("C6H10O5"), "Hex"),
+        "HexNAc": monosaccharide_description(
+            203.0793, Composition("C8H13N1O5"), "HexNAc"
+        ),
+        "HexS": monosaccharide_description(242.009, Composition("C6H10O8S1"), "HexS"),
+        "HexP": monosaccharide_description(242.0191, Composition("C6H11O8P1"), "HexP"),
+        "HexNAcS": monosaccharide_description(
+            283.0361, Composition("C8H13N1O8S1"), "HexNAcS"
+        ),
+        "dHex": monosaccharide_description(146.0579, Composition("C6H10O4"), "dHex"),
+        "NeuAc": monosaccharide_description(
+            291.0954, Composition("C11H17N1O8"), "NeuAc"
+        ),
+        "NeuGc": monosaccharide_description(
+            307.0903, Composition("C11H17N1O9"), "NeuGc"
+        ),
+        "Pen": monosaccharide_description(132.0422, Composition("C5H8O4"), "Pen"),
+        "Fuc": monosaccharide_description(146.0579, Composition("C6H10O4"), "Fuc"),
+        "Kdn": monosaccharide_description(
+            250.06886740546, Composition({"C": 9, "H": 14, "O": 8}), "Kdn"
+        ),
+        "Kdo": monosaccharide_description(
+            220.05830272176, Composition({"C": 8, "H": 12, "O": 7}), "Kdo"
+        ),
+        "Phospho": monosaccharide_description(
+            79.96633052075, Composition({"P": 1, "O": 3, "H": 1}), "Phospho"
+        ),
+        "Sulfo": monosaccharide_description(
+            79.95681485867999,
+            Composition({"S": 1, "O": 3, "H": 0}),
+            "Sulfo"
+        ),
     }
 
     valid_monosaccharides['Neu5Ac'] = valid_monosaccharides['NeuAc']
@@ -1173,7 +1203,18 @@ class GlycanModification(ModificationBase):
 
     monomer_tokenizer = re.compile(
         r"|".join(sorted(valid_monosaccharides.keys(), key=len, reverse=True)))
-    tokenizer = re.compile(r"(%s|[A-Za-z]+)\s*(\d*)\s*" % monomer_tokenizer.pattern)
+    tokenizer = re.compile(
+        r"""(?:
+        (?P<known_name>%s)|
+        (?P<base_name>[A-Za-z]+)|
+        (?P<charged_formula>\{
+                [^\}]+?
+        \})
+        )
+        \s*(?P<count>\d*)\s*"""
+        % monomer_tokenizer.pattern,
+        re.X,
+    )
 
     @property
     def monosaccharides(self):
@@ -1181,38 +1222,72 @@ def monosaccharides(self):
 
     def resolve(self):
         composite = BasicComposition()
-        for tok, cnt in self.tokenizer.findall(self.value):
+        mass = 0
+        chemcomp = Composition()
+        charge = 0
+        for hit in self.tokenizer.finditer(self.value):
+            hit = hit.groupdict()
+            cnt = hit['count']
+
+            tok = hit.get('known_name')
+            base_name = hit.get('base_name')
+            formula = hit.get('charged_formula')
+
             if cnt:
                 cnt = int(cnt)
             else:
                 cnt = 1
-            if tok not in self.valid_monosaccharides:
-                parts = self.monomer_tokenizer.findall(tok)
+            if tok is not None:
+                if tok not in self.valid_monosaccharides:
+                    parts = self.monomer_tokenizer.findall(tok)
+                    t = 0
+                    for p in parts:
+                        if p not in self.valid_monosaccharides:
+                            break
+                        t += len(p)
+                    if t != len(tok):
+                        raise ValueError("{tok!r} is not a valid monosaccharide name".format(tok=tok))
+                    else:
+                        for p in parts:
+                            if p not in self.valid_monosaccharides:
+                                raise UnknownMonosaccharideError(p)
+                            m, c, sym = self.valid_monosaccharides[p]
+                            mass += m * cnt
+                            chemcomp += c * cnt
+                            composite[sym] += cnt
+                else:
+                    m, c, sym = self.valid_monosaccharides[tok]
+                    mass += m * cnt
+                    chemcomp += c * cnt
+                    composite[sym] += cnt
+            elif formula is not None:
+                inner = FormulaModification(formula[1:-1]).resolve()
+                mass += inner['mass'] * cnt
+                chemcomp += inner['composition'] * cnt
+                composite[formula] += cnt
+                charge += inner['charge'] * cnt
+            elif base_name is not None:
+                parts = self.monomer_tokenizer.findall(base_name)
                 t = 0
                 for p in parts:
                     if p not in self.valid_monosaccharides:
                         break
                     t += len(p)
-                if t != len(tok):
-                    raise ValueError("{tok!r} is not a valid monosaccharide name".format(tok=tok))
+                if t != len(base_name):
+                    raise ValueError(
+                        f"{base_name!r} is not a valid monosaccharide name"
+                    )
                 else:
-                    for p in parts[:-1]:
-                        sym = self.valid_monosaccharides[p].symbol
-                        composite[sym] += 1
-                    sym = self.valid_monosaccharides[parts[-1]].symbol
-                    composite[sym] += cnt
+                    for p in parts:
+                        if p not in self.valid_monosaccharides:
+                            raise UnknownMonosaccharideError(p)
+                        m, c, sym = self.valid_monosaccharides[p]
+                        mass += m * cnt
+                        chemcomp += c * cnt
+                        composite[sym] += cnt
             else:
-                sym = self.valid_monosaccharides[tok].symbol
-                composite[sym] += cnt
-        mass = 0
-        chemcomp = Composition()
-        for key, cnt in composite.items():
-            try:
-                m, c, sym = self.valid_monosaccharides[key]
-            except KeyError:
-                raise UnknownMonosaccharideError(key)
-            mass += m * cnt
-            chemcomp += c * cnt
+                raise NotImplementedError(f"I do not know how to decode the impossible, {hit}")
+
         return {
             "mass": mass,
             "composition": chemcomp,
@@ -3995,22 +4070,104 @@ def _coerce_string_to_modification(item) -> TagBase:
         raise TypeError(f"Don't know how to coerce {item} of type {type(item)} to a modification")
 
 
-def modify_with(peptide: ProForma,
-                variable_modifications: Optional[Union[List[TagBase], dict[str, TagBase]]] = None,
-                fixed_modifications: Optional[Union[List[TagBase], dict[str, TagBase]]] = None,
-                include_unmodified: bool = True, include_labile: bool = False):
+def peptidoforms(
+    peptide: Union[ProForma, str],
+    variable_modifications: Optional[
+        Union[
+            List[Union[TagBase, str]],
+            dict[Union[TagBase, str], List[Union[str, TagBase]]],
+        ]
+    ] = None,
+    fixed_modifications: Optional[
+        Union[
+            List[Union[TagBase, str]],
+            dict[Union[TagBase, str], List[Union[str, TagBase]]],
+        ]
+    ] = None,
+    include_unmodified: bool = True,
+    include_labile: bool = False,
+) -> Iterator[ProForma]:
+    """
+    Generate the combinatorial cross-product of modifications for ``peptide``, given by
+    a set of variable and fixed modification rules, as in a classical peptide search engine.
+
+    This is similar to :func:`parser.peptidoforms`, but using :class:`ProForma` as the representation.
+    This uses ProForma 2.1's position limiting rules to give the caller greater control over how modifications
+    are applied, if desired.
+
+    Internally, this delegates to :class:`ProteoformCombinator` and would mirror the behavior of embedding all
+    of the modification rules directly in the sequence and calling :meth:`ProForma.generate_proteoforms`.
+
+    Parameters
+    ----------
+    peptide : :class:`ProForma` or :class:`str`
+        The base peptide to modify. If a string is provided, it will be parsed with :meth:`ProForma.parse`.
+        If ``peptide`` itself encodes modification rules or unlocalized modifications of any kind, they **will**
+        also be applied.
+    variable_modifications : :class:`list` of :class:`str` or :class:`TagBase` modification rules, or a :class:`dict` mapping :class:`str` or :class:`TagBase` modifications to a list of :class:`str` or :class:`TagBase` targets
+        The variable modifications that will be combined. If a list is provided, the values are assumed to either
+        be strings encoding a modification tag in ProForma notation or pre-parsed :class:`TagBase` modifications
+        with position limiting rules added with ``|`` separators. If a :class:`dict` is provided, keys are assumed
+        to be :class:`TagBase` modifications, as in the list-case, but the values of those keys are expected to be
+        :class:`TagBase` position limiters like :class:`PositionModifierTag`, or strings that will be coerced as
+        such.
+    fixed_modifications : :class:`list` of :class:`str` or :class:`TagBase` modification rules, or a :class:`dict` mapping :class:`str` or :class:`TagBase` modifications to a list of :class:`str` or :class:`TagBase` targets
+        The fixed modifications that will be applied to all combinations, even the unmodified version if ``include_unmodified``
+        is specified. See ``variable_modifications`` for an explanation of type coercion.
+    include_unmodified : :class:`bool`
+        For all non-fixed modifications, include the case where the modification is not included anywhere
+    include_labile : :class:`bool`
+        For all labile modifications, include the case where the modification is localized at every possible location
+
+    Yields
+    ------
+    :class:`ProForma`
+
+    Examples
+    --------
+    This example shows how to use the :class:`dict`-based modification rule approach.
+
+    >>> from pyteomics import proforma
+    >>> isos = proforma.peptidoforms(
+    ... "EMEVTESPEK",
+    ... variable_modifications={"Oxidation": ['M']})
+    >>> for i in isos:
+    ...     print(i)
+    EMEVTESPEK
+    EM[Oxidation|Position:M]EVTESPEK
+
+    Using parsed objects to get the equivalent behavior, and avoids needing to re-parse the rules
+    on every invocation.
+
+    >>> from pyteomics import proforma
+    >>> isos = proforma.peptidoforms(
+    ... ProForma.parse("EMEVTESPEK"),
+    ... variable_modifications={proforma.GenericModification("Oxidation"): [proforma.PositionModifierTag('M')]})
+    >>> for i in isos:
+    ...     print(i)
+    EMEVTESPEK
+    EM[Oxidation|Position:M]EVTESPEK
+
+    """
+    if isinstance(peptide, str):
+        peptide = ProForma.parse(peptide)
     template = peptide.copy()
+    seen = set()
     if variable_modifications:
         if isinstance(variable_modifications, list):
             template.unlocalized_modifications.extend(map(_coerce_string_to_modification, variable_modifications))
         elif isinstance(variable_modifications, dict):
             extra_rules = []
-            for target, tag in variable_modifications.items():
-                if isinstance(target, str):
-                    target = PositionModifierTag(target)
-                tag = _coerce_string_to_modification(tag)
-                tag.extra.append(target)
-                extra_rules.append(tag)
+            for tag, targets in variable_modifications.items():
+                seen.clear()
+                for target in targets:
+                    if isinstance(target, str):
+                        target = PositionModifierTag(target)
+                    if target in seen:
+                        continue
+                    seen.add(target)
+                    tag = _coerce_string_to_modification(tag)
+                    extra_rules.append(tag | target)
             template.unlocalized_modifications.extend(extra_rules)
         else:
             raise TypeError(f"Expected variable_modifications to be a list or a dict, got {type(variable_modifications)}")
@@ -4019,12 +4176,16 @@ def modify_with(peptide: ProForma,
             template.fixed_modifications.extend(map(_coerce_string_to_modification, fixed_modifications))
         elif isinstance(fixed_modifications, dict):
             extra_rules = []
-            for target, tag in fixed_modifications.items():
-                if isinstance(target, str):
-                    target = PositionModifierTag(target)
-                tag = _coerce_string_to_modification(tag)
-                tag.extra.append(target)
-                extra_rules.append(tag)
+            for tag, targets in fixed_modifications.items():
+                seen.clear()
+                for target in targets:
+                    if isinstance(target, str):
+                        target = PositionModifierTag(target)
+                    if target in seen:
+                        continue
+                    seen.add(target)
+                    tag = _coerce_string_to_modification(tag)
+                    extra_rules.append(tag | target)
             template.fixed_modifications.extend(extra_rules)
         else:
             raise TypeError(

From 473dff836f55a2b4003db3e0169c36cefd330639 Mon Sep 17 00:00:00 2001
From: Lev Levitsky <lev.levitsky@phystech.su>
Date: Thu, 19 Mar 2026 14:06:39 +0100
Subject: [PATCH 03/16] Fixes and tests

---
 pyteomics/proforma.py  | 32 ++++++++++++++++----------
 tests/test_proforma.py | 52 ++++++++++++++++++++++++++++++++++++------
 2 files changed, 65 insertions(+), 19 deletions(-)

diff --git a/pyteomics/proforma.py b/pyteomics/proforma.py
index dcfdaa40..bfc98685 100644
--- a/pyteomics/proforma.py
+++ b/pyteomics/proforma.py
@@ -1705,7 +1705,7 @@ def from_str(cls, target: str):
                     t = tokens[0].lower()
                     n_term = t == "n-term"
                     c_term = t == "c-term"
-                    cls(tokens[1], n_term, c_term)
+                    return cls(tokens[1], n_term, c_term)
                 else:
                     raise PyteomicsError(
                         "Modification target has an invalid amino acid specific terminal target {1} in {0}".format(
@@ -1743,9 +1743,9 @@ class ModificationRule(object):
     modification_tag: TagBase
     targets: List[ModificationTarget]
 
-    def __init__(self, modification_tag: TagBase, targets: Union[ModificationTarget, List[ModificationTarget], None]=None):
+    def __init__(self, modification_tag: TagBase, targets: Union[ModificationTarget, List[ModificationTarget], List[str], None]=None):
         self.modification_tag = modification_tag
-        self.targets = targets
+        self.targets = targets  # type: ignore
         self._validate_targets()
 
     def is_not_specific(self) -> bool:
@@ -1764,10 +1764,11 @@ def _validate_targets(self):
         for target in self.targets:
             if isinstance(target, ModificationTarget):
                 validated_targets.append(target)
-            try:
-                validated_targets.append(ModificationTarget.from_str(target))
-            except PyteomicsError as err:
-                raise PyteomicsError(f"While parsing {self}, encountered error {err}") from err
+            else:
+                try:
+                    validated_targets.append(ModificationTarget.from_str(target))
+                except PyteomicsError as err:
+                    raise PyteomicsError(f"While parsing {self}, encountered error {err}") from err
 
         self.targets = validated_targets
 
@@ -3829,7 +3830,7 @@ def find_tags_by_id(self, tag_id, include_position=True):
     def tags(self):
         return [tag for tags_at in [pos[1] for pos in self if pos[1]] for tag in tags_at]
 
-    def generate_proteoforms(self, include_unmodified: bool = False, include_labile: bool = False) -> Iterator["ProForma"]:
+    def proteoforms(self, include_unmodified: bool = False, include_labile: bool = False) -> Iterator["ProForma"]:
         """
         Generate combinatorial localizations of modifications defined on this ProForma sequence.
 
@@ -3848,6 +3849,8 @@ def generate_proteoforms(self, include_unmodified: bool = False, include_labile:
         """
         return iter(ProteoformCombinator(self, include_unmodified=include_unmodified, include_labile=include_labile))
 
+    peptidoforms = proteoforms
+
     def copy(self) -> "ProForma":
         sequence = []
         for (aa, tags) in self:
@@ -4096,7 +4099,7 @@ def peptidoforms(
     are applied, if desired.
 
     Internally, this delegates to :class:`ProteoformCombinator` and would mirror the behavior of embedding all
-    of the modification rules directly in the sequence and calling :meth:`ProForma.generate_proteoforms`.
+    of the modification rules directly in the sequence and calling :meth:`ProForma.proteoforms`.
 
     Parameters
     ----------
@@ -4104,14 +4107,16 @@ def peptidoforms(
         The base peptide to modify. If a string is provided, it will be parsed with :meth:`ProForma.parse`.
         If ``peptide`` itself encodes modification rules or unlocalized modifications of any kind, they **will**
         also be applied.
-    variable_modifications : :class:`list` of :class:`str` or :class:`TagBase` modification rules, or a :class:`dict` mapping :class:`str` or :class:`TagBase` modifications to a list of :class:`str` or :class:`TagBase` targets
+    variable_modifications : :class:`list` of :class:`str` or :class:`TagBase` modification rules, or a :class:`dict`
+        mapping :class:`str` or :class:`TagBase` modifications to a list of :class:`str` or :class:`TagBase` targets
         The variable modifications that will be combined. If a list is provided, the values are assumed to either
         be strings encoding a modification tag in ProForma notation or pre-parsed :class:`TagBase` modifications
         with position limiting rules added with ``|`` separators. If a :class:`dict` is provided, keys are assumed
         to be :class:`TagBase` modifications, as in the list-case, but the values of those keys are expected to be
         :class:`TagBase` position limiters like :class:`PositionModifierTag`, or strings that will be coerced as
         such.
-    fixed_modifications : :class:`list` of :class:`str` or :class:`TagBase` modification rules, or a :class:`dict` mapping :class:`str` or :class:`TagBase` modifications to a list of :class:`str` or :class:`TagBase` targets
+    fixed_modifications : :class:`list` of :class:`str` or :class:`TagBase` modification rules, or a :class:`dict`
+        mapping :class:`str` or :class:`TagBase` modifications to a list of :class:`str` or :class:`TagBase` targets
         The fixed modifications that will be applied to all combinations, even the unmodified version if ``include_unmodified``
         is specified. See ``variable_modifications`` for an explanation of type coercion.
     include_unmodified : :class:`bool`
@@ -4192,7 +4197,10 @@ def peptidoforms(
                 f"Expected fixed_modifications to be a list or a dict, got {type(fixed_modifications)}"
             )
 
-    return template.generate_proteoforms(include_unmodified=include_unmodified, include_labile=include_labile)
+    return template.proteoforms(include_unmodified=include_unmodified, include_labile=include_labile)
+
+
+proteoforms = peptidoforms
 
 
 class ProteoformCombinator:
diff --git a/tests/test_proforma.py b/tests/test_proforma.py
index c5a5e072..18f1879e 100644
--- a/tests/test_proforma.py
+++ b/tests/test_proforma.py
@@ -1,13 +1,15 @@
 from os import path
 import unittest
 import pickle
+import math
 import pyteomics
 pyteomics.__path__ = [path.abspath(
     path.join(path.dirname(__file__), path.pardir, 'pyteomics'))]
 from pyteomics.proforma import (
     PSIModModification, ProForma, TaggedInterval, parse, MassModification, ProFormaError, TagTypeEnum,
     ModificationRule, StableIsotope, GenericModification, Composition, to_proforma, ModificationMassNotFoundError,
-    AdductParser, ChargeState,
+    UnimodModification, PSIModModification, ModificationTarget,
+    AdductParser, ChargeState, proteoforms, _coerce_string_to_modification,
     std_aa_comp, obo_cache, process_tag_tokens)
 
 
@@ -487,15 +489,25 @@ def test_range(self):
         pf = ProForma.parse(seq)
         for include_unmodified in [False, True]:
             with self.subTest(include_unmodified=include_unmodified):
-                proteoforms = list(pf.generate_proteoforms(include_unmodified=include_unmodified))
+                proteoforms = list(pf.proteoforms(include_unmodified=include_unmodified))
                 self.assertEqual(len(proteoforms), 2 + include_unmodified)   # Phospho on T or S (+ no phospho if include_unmodified)
 
+    def test_unlocalized_position_list_and_count(self):
+        seq = "[Phospho|Position:S|Position:T]^2?EMEVTSESPEK"
+        nsites = 3
+        k = 2
+        pf = ProForma.parse(seq)
+        for include_unmodified in [False, True]:
+            with self.subTest(include_unmodified=include_unmodified):
+                proteoforms = list(pf.proteoforms(include_unmodified=include_unmodified))
+                self.assertEqual(len(proteoforms), math.comb(nsites, k) + include_unmodified)   # Phospho on T or S (+ no phospho if include_unmodified)
+
     def test_localization_tag(self):
         seq = "EMEVT[#g1]S[#g1]ES[Phospho#g1]PEK"
         pf = ProForma.parse(seq)
         for include_unmodified in [False, True]:
             with self.subTest(include_unmodified=include_unmodified):
-                proteoforms = list(pf.generate_proteoforms(include_unmodified=include_unmodified))
+                proteoforms = list(pf.proteoforms(include_unmodified=include_unmodified))
                 self.assertEqual(len(proteoforms), 3 + include_unmodified)
 
     def test_unlocalized_modification(self):
@@ -503,23 +515,49 @@ def test_unlocalized_modification(self):
         pf = ProForma.parse(seq)
         for include_unmodified in [False, True]:
             with self.subTest(include_unmodified=include_unmodified):
-                proteoforms = list(pf.generate_proteoforms(include_unmodified=include_unmodified))
+                proteoforms = list(pf.proteoforms(include_unmodified=include_unmodified))
                 self.assertEqual(len(proteoforms), len(pf) + include_unmodified)
 
     def test_comup_stacking(self):
         seq = "[Phospho|Position:S|Position:T|comup|Limit:2]^2?EMEVTESPEK"
         pf = ProForma.parse(seq)
-        proteoforms = list(pf.generate_proteoforms())
+        proteoforms = list(pf.proteoforms())
         self.assertEqual(len(proteoforms), 4)
-        proteoforms = list(pf.generate_proteoforms(True))
+        proteoforms = list(pf.proteoforms(True))
         self.assertEqual(len(proteoforms), 9)
 
     def test_labile(self):
         seq = "{Phosphpo}EMEVTESPEK"
         pf = ProForma.parse(seq)
-        proteoforms = list(pf.generate_proteoforms(False, True))
+        proteoforms = list(pf.proteoforms(False, True))
         self.assertEqual(len(proteoforms), 11)
 
 
+class ProteoformsFunctionTest(unittest.TestCase):
+    def test_proteoforms(self):
+        seq = "EMEV(TS)[Phospho]ESPEK"
+        pf = ProForma.parse(seq)
+        for include_unmodified in [False, True]:
+            with self.subTest(include_unmodified=include_unmodified):
+                forms = list(proteoforms(pf, include_unmodified=include_unmodified))
+                self.assertEqual(len(forms), 2 + include_unmodified)   # Phospho on T or S (+ no phospho if include_unmodified)
+
+    def test_coerce_modification(self):
+        for s, m in [("Phospho", GenericModification("Phospho")),
+                     ("UNIMOD:21", UnimodModification("21")),
+                     ("MOD:00046", PSIModModification("00046"))]:
+            with self.subTest(s=s):
+                self.assertEqual(_coerce_string_to_modification(s), m)
+
+    def test_modification_target_from_str(self):
+        for s, t in [("S", ModificationTarget('S')),
+                     ("T", ModificationTarget('T')),
+                     ("N-term", ModificationTarget(None, True, False)),
+                     ("C-term", ModificationTarget(None, False, True)),
+                     ("N-term:K", ModificationTarget('K', True, False)),
+                     ("C-term:Y", ModificationTarget('Y', False, True))]:
+            with self.subTest(s=s):
+                self.assertEqual(ModificationTarget.from_str(s), t)
+
 if __name__ == '__main__':
     unittest.main()

From c7af7c8025aed9f2392b13f79ddb089c1e1cfbf6 Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Fri, 27 Mar 2026 23:29:00 -0400
Subject: [PATCH 04/16] fix previous tests

---
 pyteomics/proforma.py  | 12 ++++++++++++
 tests/test_proforma.py | 14 ++++++++++----
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/pyteomics/proforma.py b/pyteomics/proforma.py
index bfc98685..8d8a3053 100644
--- a/pyteomics/proforma.py
+++ b/pyteomics/proforma.py
@@ -3960,6 +3960,7 @@ class GeneratorModificationRuleDirective:
     colocal_unknown: bool = False
     limit: int = 1
     labile: bool = False
+    token: Optional[ModificationToken] = None
 
     def __init__(self, rule, region=None, colocal_known: bool = False, colocal_unknown: bool = False, limit: int = 1, labile: bool = False):
         self.rule = rule
@@ -3968,6 +3969,7 @@ def __init__(self, rule, region=None, colocal_known: bool = False, colocal_unkno
         self.colocal_unknown = colocal_unknown
         self.limit = limit
         self.labile = labile
+        self.token = getattr(self.rule.modification_tag, "key", None)
 
     def create(self) -> TagBase:
         return self.rule.modification_tag.copy()
@@ -4313,13 +4315,17 @@ def generate(self):
                 positions_for = [None] + positions_for
             position_choices.append(positions_for)
 
+        seen = set()
+
         for slots in itertools.product(*position_choices):
+            state = set()
             template = self.template.copy()
             valid = True
             labile_remaining = []
             for rule, idx in zip(self.variable_rules, slots):
                 if idx is None:
                     if rule.labile:
+                        state.add((None, rule.token))
                         labile_remaining.append(rule.create())
                     continue
                 if idx not in rule.find_positions(template):
@@ -4332,7 +4338,13 @@ def generate(self):
                 tag._generated = ModificationSourceType.Generated
                 tags.append(tag)
                 template[idx] = (aa, tags)
+                state.add((idx, rule.token))
             if valid:
+                state = frozenset(state)
+                if state in seen:
+                    continue
+                else:
+                    seen.add(state)
                 if labile_remaining:
                     template.labile_modifications = labile_remaining
                 yield template
diff --git a/tests/test_proforma.py b/tests/test_proforma.py
index 18f1879e..7b143566 100644
--- a/tests/test_proforma.py
+++ b/tests/test_proforma.py
@@ -500,7 +500,13 @@ def test_unlocalized_position_list_and_count(self):
         for include_unmodified in [False, True]:
             with self.subTest(include_unmodified=include_unmodified):
                 proteoforms = list(pf.proteoforms(include_unmodified=include_unmodified))
-                self.assertEqual(len(proteoforms), math.comb(nsites, k) + include_unmodified)   # Phospho on T or S (+ no phospho if include_unmodified)
+                if not include_unmodified:
+                    self.assertEqual(len(proteoforms), math.comb(nsites, k))   # Phospho on T or S (+ no phospho if include_unmodified)
+                else:
+                    self.assertEqual(
+                        len(proteoforms),
+                        sum([math.comb(nsites, i) for i in range(k + 1)]),
+                    )
 
     def test_localization_tag(self):
         seq = "EMEVT[#g1]S[#g1]ES[Phospho#g1]PEK"
@@ -522,12 +528,12 @@ def test_comup_stacking(self):
         seq = "[Phospho|Position:S|Position:T|comup|Limit:2]^2?EMEVTESPEK"
         pf = ProForma.parse(seq)
         proteoforms = list(pf.proteoforms())
-        self.assertEqual(len(proteoforms), 4)
+        self.assertEqual(len(proteoforms), 3)
         proteoforms = list(pf.proteoforms(True))
-        self.assertEqual(len(proteoforms), 9)
+        self.assertEqual(len(proteoforms), 4)
 
     def test_labile(self):
-        seq = "{Phosphpo}EMEVTESPEK"
+        seq = "{Phospho}EMEVTESPEK"
         pf = ProForma.parse(seq)
         proteoforms = list(pf.proteoforms(False, True))
         self.assertEqual(len(proteoforms), 11)

From 03a3cffa6c0a6401a23e943e2bab9155c3128500 Mon Sep 17 00:00:00 2001
From: Lev Levitsky <lev.levitsky@phystech.edu>
Date: Sat, 28 Mar 2026 14:50:32 +0100
Subject: [PATCH 05/16] Add a test for proteoforms function with a dict

---
 pyteomics/proforma.py  | 4 ++--
 tests/test_proforma.py | 9 +++++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/pyteomics/proforma.py b/pyteomics/proforma.py
index 8d8a3053..b7d6a224 100644
--- a/pyteomics/proforma.py
+++ b/pyteomics/proforma.py
@@ -4147,10 +4147,10 @@ def peptidoforms(
     on every invocation.
 
     >>> from pyteomics import proforma
-    >>> isos = proforma.peptidoforms(
+    >>> pforms = proforma.peptidoforms(
     ... ProForma.parse("EMEVTESPEK"),
     ... variable_modifications={proforma.GenericModification("Oxidation"): [proforma.PositionModifierTag('M')]})
-    >>> for i in isos:
+    >>> for i in pforms:
     ...     print(i)
     EMEVTESPEK
     EM[Oxidation|Position:M]EVTESPEK
diff --git a/tests/test_proforma.py b/tests/test_proforma.py
index 7b143566..793a8385 100644
--- a/tests/test_proforma.py
+++ b/tests/test_proforma.py
@@ -565,5 +565,14 @@ def test_modification_target_from_str(self):
             with self.subTest(s=s):
                 self.assertEqual(ModificationTarget.from_str(s), t)
 
+    def test_from_simple_dict(self):
+        seq = "EMEVTSESPEK"
+        variable_mods = {"Phospho": ["S", "T"]}
+        pf = ProForma.parse(seq)
+        for include_unmodified in [False, True]:
+            with self.subTest(include_unmodified=include_unmodified):
+                forms = list(proteoforms(pf, variable_modifications=variable_mods, include_unmodified=include_unmodified))
+                self.assertEqual(len(forms), 3 + include_unmodified)   # Phospho on T or S (+ no phospho if include_unmodified)
+
 if __name__ == '__main__':
     unittest.main()

From f82b8a42b8f8dd6feccaf82222d3af8fcb5f09cd Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Sat, 28 Mar 2026 23:33:08 -0400
Subject: [PATCH 06/16] Patch the failing unrelated test, Massive's PROXI
 server seems to be down

---
 tests/test_usi.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/test_usi.py b/tests/test_usi.py
index e3aeafe4..733103eb 100644
--- a/tests/test_usi.py
+++ b/tests/test_usi.py
@@ -4,7 +4,7 @@
 pyteomics.__path__ = [path.abspath(path.join(path.dirname(__file__), path.pardir, 'pyteomics'))]
 
 import unittest
-from urllib.error import HTTPError
+from urllib.error import HTTPError, URLError
 
 from pyteomics.usi import USI, proxi, AGGREGATOR_KEY
 from pyteomics.auxiliary import PyteomicsError
@@ -28,6 +28,9 @@ def test_request(self):
         usi_str = "mzspec:MSV000085202:210320_SARS_CoV_2_T:scan:131256"
         try:
             response = proxi(usi_str, backend='massive')
+        except URLError as e:
+            if e.errno in {110, }:
+                self.skipTest(f"PROXI service is unavailable: ({e})")
         except HTTPError as e:
             if e.code in {500, 502, 503, 504}:
                 self.skipTest(f'PROXI service is unavailable ({e.code})')

From 2e8adb4f026161791f134a140e9f7f4d4fac2b12 Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Sat, 28 Mar 2026 23:45:39 -0400
Subject: [PATCH 07/16] Add expand_rules argument

---
 pyteomics/proforma.py  | 144 ++++++++++++++++++++++++++++++++++++-----
 tests/test_proforma.py |  25 ++++++-
 2 files changed, 150 insertions(+), 19 deletions(-)

diff --git a/pyteomics/proforma.py b/pyteomics/proforma.py
index b7d6a224..bb77fd91 100644
--- a/pyteomics/proforma.py
+++ b/pyteomics/proforma.py
@@ -13,8 +13,8 @@
 import itertools
 import re
 import warnings
-from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, ClassVar, Sequence, Tuple, Type, Union, Generic, TypeVar, NamedTuple
-from collections import deque, namedtuple
+from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, ClassVar, Sequence, Tuple, Type, Union, Generic, TypeVar, NamedTuple, DefaultDict
+from collections import Counter, deque, namedtuple
 from functools import partial
 from itertools import chain
 from array import array as _array
@@ -337,6 +337,12 @@ class PositionModifierTag(TagBase):
     def __init__(self, value, extra=None, group_id=None):
         super().__init__(TagTypeEnum.position_modifier, value, extra, group_id)
 
+    def __eq__(self, other):
+        return super().__eq__(other)
+
+    def __hash__(self):
+        return hash(self.value)
+
     def _format_main(self):
         return f"{self.prefix_name}:{self.value}"
 
@@ -3830,7 +3836,7 @@ def find_tags_by_id(self, tag_id, include_position=True):
     def tags(self):
         return [tag for tags_at in [pos[1] for pos in self if pos[1]] for tag in tags_at]
 
-    def proteoforms(self, include_unmodified: bool = False, include_labile: bool = False) -> Iterator["ProForma"]:
+    def proteoforms(self, include_unmodified: bool = False, include_labile: bool = False, expand_rules: bool = False) -> Iterator["ProForma"]:
         """
         Generate combinatorial localizations of modifications defined on this ProForma sequence.
 
@@ -3838,16 +3844,21 @@ def proteoforms(self, include_unmodified: bool = False, include_labile: bool = F
         ----------
         include_unmodified : :class:`bool`
             For all non-fixed modifications, include the case where the modification is not included anywhere. This is equivalent to
-            how variable modification rules are applied in search engines.
+            how variable modification rules are applied in search engines. It still respects the number of copies of modifications included
+            in the input. See ``expand_rules``.
         include_labile : :class:`bool`
             For all labile modifications, include the case where the modification is localized at every possible location or as
             a remaining labile modification.
+        expand_rules : :class:`bool`
+            For all variable modifications, allow any number of copies of the modification to be included in the result.
+            This mirrors the expected behavior of many search engines' variable modification rules, though it is not strictly
+            how ProForma's rules work. This forces ``include_unmodified`` to be :const:`True`.
 
         Yields
         ------
         :class:`ProForma`
         """
-        return iter(ProteoformCombinator(self, include_unmodified=include_unmodified, include_labile=include_labile))
+        return iter(ProteoformCombinator(self, include_unmodified=include_unmodified, include_labile=include_labile, expand_rules=expand_rules))
 
     peptidoforms = proteoforms
 
@@ -3962,6 +3973,24 @@ class GeneratorModificationRuleDirective:
     labile: bool = False
     token: Optional[ModificationToken] = None
 
+    def __eq__(self, other):
+        if other is None:
+            return False
+        return (
+            self.rule == other.rule and
+            self.region == other.region and
+            self.colocal_known == other.colocal_known and
+            self.colocal_unknown == other.colocal_unknown and
+            self.limit == other.limit and
+            self.labile == other.labile
+        )
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __hash__(self):
+        return hash(self.token)
+
     def __init__(self, rule, region=None, colocal_known: bool = False, colocal_unknown: bool = False, limit: int = 1, labile: bool = False):
         self.rule = rule
         self.region = region
@@ -4091,6 +4120,7 @@ def peptidoforms(
     ] = None,
     include_unmodified: bool = True,
     include_labile: bool = False,
+    expand_rules: bool = False,
 ) -> Iterator[ProForma]:
     """
     Generate the combinatorial cross-product of modifications for ``peptide``, given by
@@ -4125,6 +4155,11 @@ def peptidoforms(
         For all non-fixed modifications, include the case where the modification is not included anywhere
     include_labile : :class:`bool`
         For all labile modifications, include the case where the modification is localized at every possible location
+    expand_rules : :class:`bool`
+        For all variable modifications, allow any number of copies of the modification to be included in the result.
+        This mirrors the expected behavior of many search engines' variable modification rules, though it is not strictly
+        how ProForma's rules work. This forces :attr:`include_unmodified` to be :const:`True`. This behavior is currently
+        incompatible with modification stacking with ``Limit`` and ``CoMUP`` tag modifiers.
 
     Yields
     ------
@@ -4155,6 +4190,30 @@ def peptidoforms(
     EMEVTESPEK
     EM[Oxidation|Position:M]EVTESPEK
 
+    To expand rules so that they might apply to as many positions as are available, as is often done when
+    build a combinatorial search space, use the ``expand_rules`` argument.
+    >>> from pyteomics import proforma
+    >>> isos = proforma.peptidoforms(
+    ... "EMEVTESPEK",
+    ... variable_modifications={"Oxidation": ['M'], "Phospho": ['S', 'T']}, expand_rules=True)
+    >>> for i in isos:
+    ...     print(i)
+    EM[Oxidation|Position:M]EVT[Phospho|Position:T]S[Phospho|Position:S]ES[Phospho|Position:S]PEK
+    EMEVT[Phospho|Position:T]S[Phospho|Position:S]ES[Phospho|Position:S]PEK
+    EM[Oxidation|Position:M]EVTS[Phospho|Position:S]ES[Phospho|Position:S]PEK
+    EMEVTS[Phospho|Position:S]ES[Phospho|Position:S]PEK
+    EM[Oxidation|Position:M]EVT[Phospho|Position:T]S[Phospho|Position:S]ESPEK
+    EMEVT[Phospho|Position:T]S[Phospho|Position:S]ESPEK
+    EM[Oxidation|Position:M]EVTS[Phospho|Position:S]ESPEK
+    EMEVTS[Phospho|Position:S]ESPEK
+    EM[Oxidation|Position:M]EVT[Phospho|Position:T]SES[Phospho|Position:S]PEK
+    EMEVT[Phospho|Position:T]SES[Phospho|Position:S]PEK
+    EM[Oxidation|Position:M]EVTSES[Phospho|Position:S]PEK
+    EMEVTSES[Phospho|Position:S]PEK
+    EM[Oxidation|Position:M]EVT[Phospho|Position:T]SESPEK
+    EMEVT[Phospho|Position:T]SESPEK
+    EM[Oxidation|Position:M]EVTSESPEK
+    EMEVTSESPEK
     """
     if isinstance(peptide, str):
         peptide = ProForma.parse(peptide)
@@ -4199,7 +4258,11 @@ def peptidoforms(
                 f"Expected fixed_modifications to be a list or a dict, got {type(fixed_modifications)}"
             )
 
-    return template.proteoforms(include_unmodified=include_unmodified, include_labile=include_labile)
+    return template.proteoforms(
+        include_unmodified=include_unmodified,
+        include_labile=include_labile,
+        expand_rules=expand_rules,
+    )
 
 
 proteoforms = peptidoforms
@@ -4218,19 +4281,31 @@ class ProteoformCombinator:
     variable_rules: list[:class:`GeneratorModificationRuleDirective`]
         The rules to apply in combinations to the template sequence
     include_unmodified : :class:`bool`
-        For all non-fixed modifications, include the case where the modification is not included anywhere
+        For all non-fixed modifications, include the case where the modification is not included anywhere. This is equivalent to
+        how variable modification rules are applied in search engines. It still respects the number of copies of modifications included
+        in the input. See :attr:`expand_rules`.
     include_labile : :class:`bool`
         For all labile modifications, include the case where the modification is localized at every possible location
+    expand_rules : :class:`bool`
+        For all variable modifications, allow any number of copies of the modification to be included in the result.
+        This mirrors the expected behavior of many search engines' variable modification rules, though it is not strictly
+        how ProForma's rules work. This forces :attr:`include_unmodified` to be :const:`True`. This behavior is currently
+        incompatible with modification stacking with ``Limit`` and ``CoMUP`` tag modifiers.
     """
     template: ProForma
     include_unmodified: bool
     include_labile: bool
     variable_rules: List[GeneratorModificationRuleDirective]
 
-    def __init__(self, base_proteoform: ProForma, include_unmodified: bool=False, include_labile: bool=False):
+    def __init__(self, base_proteoform: ProForma, include_unmodified: bool=False, include_labile: bool=False, expand_rules: bool=False):
+        if expand_rules:
+            if not include_unmodified:
+                warnings.warn("Forcing `include_unmodified = True` from `expand_rules`")
+            include_unmodified = True
         self.template = base_proteoform.copy()
         self.include_unmodified = include_unmodified
         self.include_labile = include_labile
+        self.expand_rules = expand_rules
         self.variable_rules = []
         self._extract_rules()
         self._apply_fixed_modifications()
@@ -4305,7 +4380,30 @@ def __iter__(self):
     def __next__(self):
         return next(self._iter)
 
-    def generate(self):
+    def _invert_position_rules(self, rules: List[GeneratorModificationRuleDirective], positions: List[List[Optional[int]]]) -> List[List[Tuple[Optional[int], GeneratorModificationRuleDirective]]]:
+        index = DefaultDict(list)
+
+        for rule, positions in zip(rules, positions):
+            if rule.labile:
+                index[None].append(rule)
+            for position in positions:
+                if position is None:
+                    continue
+                index[position].append(rule)
+
+        if self.include_unmodified:
+            for k in index:
+                index[k].append(None)
+
+        stacks = []
+        for idx, options in index.items():
+            stack = []
+            for opt in options:
+                stack.append((idx, opt))
+            stacks.append(stack)
+        return stacks
+
+    def _build_position_map(self):
         position_choices = []
         for rule in self.variable_rules:
             positions_for = rule.find_positions(self.template)
@@ -4314,18 +4412,31 @@ def generate(self):
             elif self.include_unmodified or not positions_for:
                 positions_for = [None] + positions_for
             position_choices.append(positions_for)
+        return position_choices
+
+    def _build_modification_iter(self) -> Iterator[List[Tuple[Optional[int], Optional[GeneratorModificationRuleDirective]]]]:
+        position_choices = self._build_position_map()
+        if self.expand_rules:
+            return itertools.product(*self._invert_position_rules(
+                self.variable_rules, position_choices
+            ))
+        else:
+            return map(lambda pos: zip(pos, self.variable_rules), itertools.product(*position_choices))
 
+    def generate(self):
         seen = set()
-
-        for slots in itertools.product(*position_choices):
-            state = set()
+        for slots in self._build_modification_iter():
+            state = Counter()
             template = self.template.copy()
             valid = True
             labile_remaining = []
-            for rule, idx in zip(self.variable_rules, slots):
+
+            for idx, rule in slots:
+                if rule is None:
+                    continue
                 if idx is None:
                     if rule.labile:
-                        state.add((None, rule.token))
+                        state[((None, rule.token))] += 1
                         labile_remaining.append(rule.create())
                     continue
                 if idx not in rule.find_positions(template):
@@ -4338,9 +4449,10 @@ def generate(self):
                 tag._generated = ModificationSourceType.Generated
                 tags.append(tag)
                 template[idx] = (aa, tags)
-                state.add((idx, rule.token))
+                state[((idx, rule.token))] += 1
+
             if valid:
-                state = frozenset(state)
+                state = frozenset(state.items())
                 if state in seen:
                     continue
                 else:
diff --git a/tests/test_proforma.py b/tests/test_proforma.py
index 793a8385..55bff515 100644
--- a/tests/test_proforma.py
+++ b/tests/test_proforma.py
@@ -10,7 +10,7 @@
     ModificationRule, StableIsotope, GenericModification, Composition, to_proforma, ModificationMassNotFoundError,
     UnimodModification, PSIModModification, ModificationTarget,
     AdductParser, ChargeState, proteoforms, _coerce_string_to_modification,
-    std_aa_comp, obo_cache, process_tag_tokens)
+    std_aa_comp, obo_cache, process_tag_tokens, peptidoforms)
 
 
 class ProFormaTest(unittest.TestCase):
@@ -530,7 +530,7 @@ def test_comup_stacking(self):
         proteoforms = list(pf.proteoforms())
         self.assertEqual(len(proteoforms), 3)
         proteoforms = list(pf.proteoforms(True))
-        self.assertEqual(len(proteoforms), 4)
+        self.assertEqual(len(proteoforms), 6)
 
     def test_labile(self):
         seq = "{Phospho}EMEVTESPEK"
@@ -538,6 +538,22 @@ def test_labile(self):
         proteoforms = list(pf.proteoforms(False, True))
         self.assertEqual(len(proteoforms), 11)
 
+    def test_expand(self):
+        seq = "EMEVTSESPEK"
+        variable_mods = {"Phospho": ["S", "T"], "Oxidation": "M"}
+        pf = ProForma.parse(seq)
+        combos = peptidoforms(
+            pf,
+            variable_modifications=variable_mods,
+            expand_rules=True,
+        )
+        variants = list(combos)
+        self.assertEqual(len(variants), 16)
+        self.assertEqual(
+            8,
+            sum(['Oxidation' in str(p) for p in variants])
+        )
+
 
 class ProteoformsFunctionTest(unittest.TestCase):
     def test_proteoforms(self):
@@ -572,7 +588,10 @@ def test_from_simple_dict(self):
         for include_unmodified in [False, True]:
             with self.subTest(include_unmodified=include_unmodified):
                 forms = list(proteoforms(pf, variable_modifications=variable_mods, include_unmodified=include_unmodified))
-                self.assertEqual(len(forms), 3 + include_unmodified)   # Phospho on T or S (+ no phospho if include_unmodified)
+                if include_unmodified:
+                    self.assertEqual(len(forms), 6)
+                else:
+                    self.assertEqual(len(forms), 2)
 
 if __name__ == '__main__':
     unittest.main()

From c9f5f01f0ce3276f7820614f1abd813b45f8572f Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Sun, 29 Mar 2026 00:02:55 -0400
Subject: [PATCH 08/16] chore: rework USI test patch

---
 tests/test_usi.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/test_usi.py b/tests/test_usi.py
index 733103eb..30cbaa9b 100644
--- a/tests/test_usi.py
+++ b/tests/test_usi.py
@@ -28,14 +28,16 @@ def test_request(self):
         usi_str = "mzspec:MSV000085202:210320_SARS_CoV_2_T:scan:131256"
         try:
             response = proxi(usi_str, backend='massive')
-        except URLError as e:
-            if e.errno in {110, }:
-                self.skipTest(f"PROXI service is unavailable: ({e})")
         except HTTPError as e:
             if e.code in {500, 502, 503, 504}:
                 self.skipTest(f'PROXI service is unavailable ({e.code})')
             else:
                 raise
+        except URLError as e:
+            if e.errno in {110, }:
+                self.skipTest(f"PROXI service is unavailable: ({e})")
+            else:
+                raise
 
         assert set(usi_proxi_data.keys()) <= set(response.keys())
 

From 770b2c2efbd0e32cfaacd69899445604d109eb13 Mon Sep 17 00:00:00 2001
From: Lev Levitsky <lev.levitsky@phystech.edu>
Date: Sun, 29 Mar 2026 13:29:20 +0200
Subject: [PATCH 09/16] Only warn if expand_rules is true and
 include_unmodified is explicitly false

---
 pyteomics/proforma.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/pyteomics/proforma.py b/pyteomics/proforma.py
index bb77fd91..e23ae126 100644
--- a/pyteomics/proforma.py
+++ b/pyteomics/proforma.py
@@ -13,8 +13,8 @@
 import itertools
 import re
 import warnings
-from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, ClassVar, Sequence, Tuple, Type, Union, Generic, TypeVar, NamedTuple, DefaultDict
-from collections import Counter, deque, namedtuple
+from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, ClassVar, Sequence, Tuple, Type, Union, Generic, TypeVar, NamedTuple
+from collections import Counter, deque, namedtuple, defaultdict
 from functools import partial
 from itertools import chain
 from array import array as _array
@@ -4297,11 +4297,14 @@ class ProteoformCombinator:
     include_labile: bool
     variable_rules: List[GeneratorModificationRuleDirective]
 
-    def __init__(self, base_proteoform: ProForma, include_unmodified: bool=False, include_labile: bool=False, expand_rules: bool=False):
+    def __init__(self, base_proteoform: ProForma, include_unmodified: Optional[bool] = False, include_labile: bool = False, expand_rules: bool = False):
         if expand_rules:
-            if not include_unmodified:
+            if include_unmodified is False:
                 warnings.warn("Forcing `include_unmodified = True` from `expand_rules`")
             include_unmodified = True
+        else:
+            include_unmodified = bool(include_unmodified)
+
         self.template = base_proteoform.copy()
         self.include_unmodified = include_unmodified
         self.include_labile = include_labile
@@ -4381,12 +4384,12 @@ def __next__(self):
         return next(self._iter)
 
     def _invert_position_rules(self, rules: List[GeneratorModificationRuleDirective], positions: List[List[Optional[int]]]) -> List[List[Tuple[Optional[int], GeneratorModificationRuleDirective]]]:
-        index = DefaultDict(list)
+        index = defaultdict(list)
 
-        for rule, positions in zip(rules, positions):
+        for rule, position_list in zip(rules, positions):
             if rule.labile:
                 index[None].append(rule)
-            for position in positions:
+            for position in position_list:
                 if position is None:
                     continue
                 index[position].append(rule)

From a6f7f7f5fb211af59f5639b31f943436e30456a9 Mon Sep 17 00:00:00 2001
From: Lev Levitsky <lev.levitsky@phystech.edu>
Date: Sun, 29 Mar 2026 13:29:41 +0200
Subject: [PATCH 10/16] Update test_from_simple_dict

---
 tests/test_proforma.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/test_proforma.py b/tests/test_proforma.py
index 55bff515..44862446 100644
--- a/tests/test_proforma.py
+++ b/tests/test_proforma.py
@@ -584,14 +584,18 @@ def test_modification_target_from_str(self):
     def test_from_simple_dict(self):
         seq = "EMEVTSESPEK"
         variable_mods = {"Phospho": ["S", "T"]}
+        nsites = seq.count("S") + seq.count("T")
         pf = ProForma.parse(seq)
         for include_unmodified in [False, True]:
             with self.subTest(include_unmodified=include_unmodified):
                 forms = list(proteoforms(pf, variable_modifications=variable_mods, include_unmodified=include_unmodified))
                 if include_unmodified:
-                    self.assertEqual(len(forms), 6)
+                    self.assertEqual(len(forms), nsites + 1)
                 else:
-                    self.assertEqual(len(forms), 2)
+                    self.assertEqual(len(forms), nsites)
+        with self.subTest(expand_rules=True):
+            forms = list(proteoforms(pf, variable_modifications=variable_mods, expand_rules=True))
+            self.assertEqual(len(forms), 2 ** nsites)
 
 if __name__ == '__main__':
     unittest.main()

From a40f893633241e88ad9435ccde5b1b60c1022cf2 Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Sun, 29 Mar 2026 22:31:57 -0400
Subject: [PATCH 11/16] refactor `expand_rules`

---
 pyteomics/proforma.py  | 54 +++++++++++++++++++-----------------------
 tests/test_proforma.py |  7 ++++--
 2 files changed, 30 insertions(+), 31 deletions(-)

diff --git a/pyteomics/proforma.py b/pyteomics/proforma.py
index bb77fd91..a319862e 100644
--- a/pyteomics/proforma.py
+++ b/pyteomics/proforma.py
@@ -3836,7 +3836,7 @@ def find_tags_by_id(self, tag_id, include_position=True):
     def tags(self):
         return [tag for tags_at in [pos[1] for pos in self if pos[1]] for tag in tags_at]
 
-    def proteoforms(self, include_unmodified: bool = False, include_labile: bool = False, expand_rules: bool = False) -> Iterator["ProForma"]:
+    def proteoforms(self, include_unmodified: bool = False, include_labile: bool = False) -> Iterator["ProForma"]:
         """
         Generate combinatorial localizations of modifications defined on this ProForma sequence.
 
@@ -3849,16 +3849,12 @@ def proteoforms(self, include_unmodified: bool = False, include_labile: bool = F
         include_labile : :class:`bool`
             For all labile modifications, include the case where the modification is localized at every possible location or as
             a remaining labile modification.
-        expand_rules : :class:`bool`
-            For all variable modifications, allow any number of copies of the modification to be included in the result.
-            This mirrors the expected behavior of many search engines' variable modification rules, though it is not strictly
-            how ProForma's rules work. This forces ``include_unmodified`` to be :const:`True`.
 
         Yields
         ------
         :class:`ProForma`
         """
-        return iter(ProteoformCombinator(self, include_unmodified=include_unmodified, include_labile=include_labile, expand_rules=expand_rules))
+        return iter(ProteoformCombinator(self, include_unmodified=include_unmodified, include_labile=include_labile))
 
     peptidoforms = proteoforms
 
@@ -4158,8 +4154,7 @@ def peptidoforms(
     expand_rules : :class:`bool`
         For all variable modifications, allow any number of copies of the modification to be included in the result.
         This mirrors the expected behavior of many search engines' variable modification rules, though it is not strictly
-        how ProForma's rules work. This forces :attr:`include_unmodified` to be :const:`True`. This behavior is currently
-        incompatible with modification stacking with ``Limit`` and ``CoMUP`` tag modifiers.
+        how ProForma's rules work. This forces :attr:`include_unmodified` to be :const:`True`.
 
     Yields
     ------
@@ -4217,23 +4212,40 @@ def peptidoforms(
     """
     if isinstance(peptide, str):
         peptide = ProForma.parse(peptide)
+    if expand_rules:
+        include_unmodified = True
     template = peptide.copy()
     seen = set()
     if variable_modifications:
         if isinstance(variable_modifications, list):
-            template.unlocalized_modifications.extend(map(_coerce_string_to_modification, variable_modifications))
+            extra_rules = []
+            for rule in map(_coerce_string_to_modification, variable_modifications):
+                if expand_rules:
+                    parsed_rule = GeneratorModificationRuleDirective.from_unlocalized_rule(
+                        rule
+                    )
+                    extra_rules.extend([rule] * len(parsed_rule.find_positions(template)))
+                else:
+                    extra_rules.append(rule)
+            template.unlocalized_modifications.extend(extra_rules)
         elif isinstance(variable_modifications, dict):
             extra_rules = []
             for tag, targets in variable_modifications.items():
                 seen.clear()
+                tag = _coerce_string_to_modification(tag)
                 for target in targets:
                     if isinstance(target, str):
                         target = PositionModifierTag(target)
                     if target in seen:
                         continue
                     seen.add(target)
-                    tag = _coerce_string_to_modification(tag)
-                    extra_rules.append(tag | target)
+                    tag = tag | target
+                if expand_rules:
+                    rule = GeneratorModificationRuleDirective.from_unlocalized_rule(tag)
+                    n = len(rule.find_positions(peptide))
+                    extra_rules.extend([tag] * n)
+                else:
+                    extra_rules.append(tag)
             template.unlocalized_modifications.extend(extra_rules)
         else:
             raise TypeError(f"Expected variable_modifications to be a list or a dict, got {type(variable_modifications)}")
@@ -4261,7 +4273,6 @@ def peptidoforms(
     return template.proteoforms(
         include_unmodified=include_unmodified,
         include_labile=include_labile,
-        expand_rules=expand_rules,
     )
 
 
@@ -4286,26 +4297,16 @@ class ProteoformCombinator:
         in the input. See :attr:`expand_rules`.
     include_labile : :class:`bool`
         For all labile modifications, include the case where the modification is localized at every possible location
-    expand_rules : :class:`bool`
-        For all variable modifications, allow any number of copies of the modification to be included in the result.
-        This mirrors the expected behavior of many search engines' variable modification rules, though it is not strictly
-        how ProForma's rules work. This forces :attr:`include_unmodified` to be :const:`True`. This behavior is currently
-        incompatible with modification stacking with ``Limit`` and ``CoMUP`` tag modifiers.
     """
     template: ProForma
     include_unmodified: bool
     include_labile: bool
     variable_rules: List[GeneratorModificationRuleDirective]
 
-    def __init__(self, base_proteoform: ProForma, include_unmodified: bool=False, include_labile: bool=False, expand_rules: bool=False):
-        if expand_rules:
-            if not include_unmodified:
-                warnings.warn("Forcing `include_unmodified = True` from `expand_rules`")
-            include_unmodified = True
+    def __init__(self, base_proteoform: ProForma, include_unmodified: bool=False, include_labile: bool=False):
         self.template = base_proteoform.copy()
         self.include_unmodified = include_unmodified
         self.include_labile = include_labile
-        self.expand_rules = expand_rules
         self.variable_rules = []
         self._extract_rules()
         self._apply_fixed_modifications()
@@ -4416,12 +4417,7 @@ def _build_position_map(self):
 
     def _build_modification_iter(self) -> Iterator[List[Tuple[Optional[int], Optional[GeneratorModificationRuleDirective]]]]:
         position_choices = self._build_position_map()
-        if self.expand_rules:
-            return itertools.product(*self._invert_position_rules(
-                self.variable_rules, position_choices
-            ))
-        else:
-            return map(lambda pos: zip(pos, self.variable_rules), itertools.product(*position_choices))
+        return map(lambda pos: zip(pos, self.variable_rules), itertools.product(*position_choices))
 
     def generate(self):
         seen = set()
diff --git a/tests/test_proforma.py b/tests/test_proforma.py
index 55bff515..12399ea5 100644
--- a/tests/test_proforma.py
+++ b/tests/test_proforma.py
@@ -589,9 +589,12 @@ def test_from_simple_dict(self):
             with self.subTest(include_unmodified=include_unmodified):
                 forms = list(proteoforms(pf, variable_modifications=variable_mods, include_unmodified=include_unmodified))
                 if include_unmodified:
-                    self.assertEqual(len(forms), 6)
+                    self.assertEqual(len(forms), 4)
                 else:
-                    self.assertEqual(len(forms), 2)
+                    self.assertEqual(len(forms), 3)
+
+        forms = list(proteoforms(pf, variable_modifications=variable_mods, expand_rules=True))
+        self.assertEqual(len(forms), 8)
 
 if __name__ == '__main__':
     unittest.main()

From 4736e32c3737258d5afd8f4436fefe47da6d05aa Mon Sep 17 00:00:00 2001
From: Lev Levitsky <lev.levitsky@phystech.su>
Date: Mon, 30 Mar 2026 13:08:51 +0200
Subject: [PATCH 12/16] Expand tests

---
 tests/test_proforma.py | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/tests/test_proforma.py b/tests/test_proforma.py
index 6dbc6ffd..8340b1db 100644
--- a/tests/test_proforma.py
+++ b/tests/test_proforma.py
@@ -558,11 +558,12 @@ def test_expand(self):
 class ProteoformsFunctionTest(unittest.TestCase):
     def test_proteoforms(self):
         seq = "EMEV(TS)[Phospho]ESPEK"
+        nsites = 2  # length of the range
         pf = ProForma.parse(seq)
         for include_unmodified in [False, True]:
             with self.subTest(include_unmodified=include_unmodified):
                 forms = list(proteoforms(pf, include_unmodified=include_unmodified))
-                self.assertEqual(len(forms), 2 + include_unmodified)   # Phospho on T or S (+ no phospho if include_unmodified)
+                self.assertEqual(len(forms), nsites + include_unmodified)   # Phospho on T or S (+ no phospho if include_unmodified)
 
     def test_coerce_modification(self):
         for s, m in [("Phospho", GenericModification("Phospho")),
@@ -590,12 +591,35 @@ def test_from_simple_dict(self):
             with self.subTest(include_unmodified=include_unmodified):
                 forms = list(proteoforms(pf, variable_modifications=variable_mods, include_unmodified=include_unmodified))
                 if include_unmodified:
-                    self.assertEqual(len(forms), 4)
+                    self.assertEqual(len(forms), nsites + 1)   # Phospho on T or S + no phospho
                 else:
-                    self.assertEqual(len(forms), 3)
+                    self.assertEqual(len(forms), nsites)  # Phospho on T or S
 
         forms = list(proteoforms(pf, variable_modifications=variable_mods, expand_rules=True))
-        self.assertEqual(len(forms), 8)
+        self.assertEqual(len(forms), 2 ** nsites)  # all combinations of phospho / no phospho on each S or T
+
+    def test_from_str(self):
+        seq = "EMEVTSESPEK"
+        variable_mods = ["Phospho|Position:S|Position:T"]
+        nsites = seq.count("S") + seq.count("T")
+        pf = ProForma.parse(seq)
+        for include_unmodified in [False, True]:
+            with self.subTest(include_unmodified=include_unmodified):
+                forms = list(proteoforms(pf, variable_modifications=variable_mods, include_unmodified=include_unmodified))
+                if include_unmodified:
+                    self.assertEqual(len(forms), nsites + 1)   # Phospho on T or S + no phospho
+                else:
+                    self.assertEqual(len(forms), nsites)  # Phospho on T or S
+        forms = list(proteoforms(pf, variable_modifications=variable_mods, expand_rules=True))
+        self.assertEqual(len(forms), 2 ** nsites)  # all combinations of phospho / no phospho on each S or T
+
+    def test_expand_mods_from_list(self):
+        seq = "EMEVTSESPEK"
+        variable_mods = ["Phospho|Position:S", "Phospho|Position:T"]
+        nsites = seq.count("S") + seq.count("T")
+        pf = ProForma.parse(seq)
+        forms = list(proteoforms(pf, variable_modifications=variable_mods, expand_rules=True))
+        self.assertEqual(len(forms), 2 ** nsites)  # all combinations of phospho on 0, 1, or 2 of the S or T
 
 if __name__ == '__main__':
     unittest.main()

From cc29c349ba628faa2782daf373e2db5ab893e807 Mon Sep 17 00:00:00 2001
From: Lev Levitsky <lev.levitsky@phystech.su>
Date: Tue, 14 Apr 2026 12:33:50 +0200
Subject: [PATCH 13/16] Skip the PROXI test on timeout

---
 tests/test_usi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_usi.py b/tests/test_usi.py
index 30cbaa9b..b703e79a 100644
--- a/tests/test_usi.py
+++ b/tests/test_usi.py
@@ -34,7 +34,7 @@ def test_request(self):
             else:
                 raise
         except URLError as e:
-            if e.errno in {110, }:
+            if getattr(e.reason, 'errno', None) in {110}:
                 self.skipTest(f"PROXI service is unavailable: ({e})")
             else:
                 raise

From 9254a8eeb379bc0d4363aa3ed13a53146976b2ac Mon Sep 17 00:00:00 2001
From: Lev Levitsky <lev.levitsky@phystech.su>
Date: Tue, 14 Apr 2026 18:13:00 +0200
Subject: [PATCH 14/16] Make test code slightly more descriptive

---
 CHANGELOG              |  8 ++++---
 pyteomics/proforma.py  |  8 +++----
 pyteomics/version.py   |  2 +-
 tests/test_proforma.py | 54 +++++++++++++++++++++++-------------------
 4 files changed, 40 insertions(+), 32 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 5d5e120e..e8b1aaeb 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,4 +1,4 @@
-5.0c0
+5.0c1
 -----
 
  - Update the standard ion compositions to be more consistent with the adopted ion type notation.
@@ -14,8 +14,10 @@
  - Support **ProForma 2.1** (`#183 <https://github.com/levitsky/pyteomics/pull/183>`_ by Joshua Klein).
    You can calculate compositions for :py:class:`ProForma` objects using :py:meth:`pyteomics.proforma.Proforma.composition`
    and get m/z with annotated or user-provided charge state using :py:meth:`pyteomics.proforma.Proforma.mz`.
-   You can also iterate through possible peptidoforms when a ProForma sequence is annotated with some ambiguity using
-   :py:meth:`pyteomics.proforma.Proforma.generate_proteoforms`.
+
+ - You can also iterate through possible peptidoforms when a ProForma sequence is annotated with some ambiguity using
+   :py:meth:`pyteomics.proforma.Proforma.proteoforms` and apply additional modification specifications to any ProForma sequence
+   using :py:func:`pyteomics.proforma.proteoforms` (`#196 <https://github.com/levitsky/pyteomics/pull/196>`_ by Joshua Klein).
 
  - Implement **thread-based parallelism**.
    Following the introduction of `official free-threading Python implementations <https://docs.python.org/3/howto/free-threading-python.html>`_
diff --git a/pyteomics/proforma.py b/pyteomics/proforma.py
index 6bb56359..8bb13682 100644
--- a/pyteomics/proforma.py
+++ b/pyteomics/proforma.py
@@ -4148,9 +4148,9 @@ def peptidoforms(
         The fixed modifications that will be applied to all combinations, even the unmodified version if ``include_unmodified``
         is specified. See ``variable_modifications`` for an explanation of type coercion.
     include_unmodified : :class:`bool`
-        For all non-fixed modifications, include the case where the modification is not included anywhere
+        For all non-fixed modifications, include the case where the modification is not included anywhere.
     include_labile : :class:`bool`
-        For all labile modifications, include the case where the modification is localized at every possible location
+        For all labile modifications, include the case where the modification is localized at every possible location.
     expand_rules : :class:`bool`
         For all variable modifications, allow any number of copies of the modification to be included in the result.
         This mirrors the expected behavior of many search engines' variable modification rules, though it is not strictly
@@ -4290,13 +4290,13 @@ class ProteoformCombinator:
     template: :class:`ProForma`
         The template sequence to apply any combination of rules to
     variable_rules: list[:class:`GeneratorModificationRuleDirective`]
-        The rules to apply in combinations to the template sequence
+        The rules to apply in combinations to the template sequence.
     include_unmodified : :class:`bool`
         For all non-fixed modifications, include the case where the modification is not included anywhere. This is equivalent to
         how variable modification rules are applied in search engines. It still respects the number of copies of modifications included
         in the input. See :attr:`expand_rules`.
     include_labile : :class:`bool`
-        For all labile modifications, include the case where the modification is localized at every possible location
+        For all labile modifications, include the case where the modification is localized at every possible location.
     """
     template: ProForma
     include_unmodified: bool
diff --git a/pyteomics/version.py b/pyteomics/version.py
index f0033221..7e7b0f5a 100644
--- a/pyteomics/version.py
+++ b/pyteomics/version.py
@@ -19,7 +19,7 @@
 
 """
 
-__version__ = '5.0c0'
+__version__ = '5.0c1'
 
 from collections import namedtuple
 import re
diff --git a/tests/test_proforma.py b/tests/test_proforma.py
index 8340b1db..00ec21a7 100644
--- a/tests/test_proforma.py
+++ b/tests/test_proforma.py
@@ -493,28 +493,29 @@ def test_range(self):
                 self.assertEqual(len(proteoforms), 2 + include_unmodified)   # Phospho on T or S (+ no phospho if include_unmodified)
 
     def test_unlocalized_position_list_and_count(self):
-        seq = "[Phospho|Position:S|Position:T]^2?EMEVTSESPEK"
-        nsites = 3
         k = 2
+        seq = f"[Phospho|Position:S|Position:T]^{k}?EMEVTSESPEK"
+        nsites = seq.partition('?')[2].count('S') + seq.partition('?')[2].count('T')
         pf = ProForma.parse(seq)
         for include_unmodified in [False, True]:
             with self.subTest(include_unmodified=include_unmodified):
                 proteoforms = list(pf.proteoforms(include_unmodified=include_unmodified))
                 if not include_unmodified:
-                    self.assertEqual(len(proteoforms), math.comb(nsites, k))   # Phospho on T or S (+ no phospho if include_unmodified)
+                    self.assertEqual(len(proteoforms), math.comb(nsites, k))   # Phospho on T or S, exactly `k` times
                 else:
                     self.assertEqual(
                         len(proteoforms),
-                        sum([math.comb(nsites, i) for i in range(k + 1)]),
+                        sum([math.comb(nsites, i) for i in range(k + 1)]),  # Phospho on T or S, anywhere from 0 to `k` times
                     )
 
     def test_localization_tag(self):
         seq = "EMEVT[#g1]S[#g1]ES[Phospho#g1]PEK"
+        nsites = seq.count('#g1')
         pf = ProForma.parse(seq)
         for include_unmodified in [False, True]:
             with self.subTest(include_unmodified=include_unmodified):
                 proteoforms = list(pf.proteoforms(include_unmodified=include_unmodified))
-                self.assertEqual(len(proteoforms), 3 + include_unmodified)
+                self.assertEqual(len(proteoforms), nsites + include_unmodified)
 
     def test_unlocalized_modification(self):
         seq = "[Phospho]?EMEVTSESPEK"
@@ -525,10 +526,15 @@ def test_unlocalized_modification(self):
                 self.assertEqual(len(proteoforms), len(pf) + include_unmodified)
 
     def test_comup_stacking(self):
-        seq = "[Phospho|Position:S|Position:T|comup|Limit:2]^2?EMEVTESPEK"
+        k = 2  # number of modifications to combine
+        limit = 2  # stack limit
+        seq = f"[Phospho|Position:S|Position:T|comup|Limit:{limit}]^{k}?EMEVTESPEK"
+        nsites = seq.partition('?')[2].count('S') + seq.partition('?')[2].count('T')
+        self.assertGreaterEqual(nsites * limit, k)  # otherwise we can't place `k` mods even with stacking
+        effective_limit = min(limit, k)  # if limit >= k, then we can just treat it as a normal combinatorial expansion
         pf = ProForma.parse(seq)
         proteoforms = list(pf.proteoforms())
-        self.assertEqual(len(proteoforms), 3)
+        self.assertEqual(len(proteoforms), math.comb(nsites + effective_limit - 1, k))  # number of ways to place `k` indistinguishable mods on `nsites` distinguishable sites with a stack limit of `effective_limit`
         proteoforms = list(pf.proteoforms(True))
         self.assertEqual(len(proteoforms), 6)
 
@@ -536,23 +542,7 @@ def test_labile(self):
         seq = "{Phospho}EMEVTESPEK"
         pf = ProForma.parse(seq)
         proteoforms = list(pf.proteoforms(False, True))
-        self.assertEqual(len(proteoforms), 11)
-
-    def test_expand(self):
-        seq = "EMEVTSESPEK"
-        variable_mods = {"Phospho": ["S", "T"], "Oxidation": "M"}
-        pf = ProForma.parse(seq)
-        combos = peptidoforms(
-            pf,
-            variable_modifications=variable_mods,
-            expand_rules=True,
-        )
-        variants = list(combos)
-        self.assertEqual(len(variants), 16)
-        self.assertEqual(
-            8,
-            sum(['Oxidation' in str(p) for p in variants])
-        )
+        self.assertEqual(len(proteoforms), len(pf) + 1)  # all possible sites and the form where phospho is kept as labile
 
 
 class ProteoformsFunctionTest(unittest.TestCase):
@@ -598,6 +588,22 @@ def test_from_simple_dict(self):
         forms = list(proteoforms(pf, variable_modifications=variable_mods, expand_rules=True))
         self.assertEqual(len(forms), 2 ** nsites)  # all combinations of phospho / no phospho on each S or T
 
+    def test_expand(self):
+        seq = "EMEVTSESPEK"
+        variable_mods = {"Phospho": ["S", "T"], "Oxidation": "M"}
+        pf = ProForma.parse(seq)
+        combos = peptidoforms(
+            pf,
+            variable_modifications=variable_mods,
+            expand_rules=True,
+        )
+        variants = list(combos)
+        self.assertEqual(len(variants), 16)
+        self.assertEqual(
+            8,
+            sum(['Oxidation' in str(p) for p in variants])
+        )
+
     def test_from_str(self):
         seq = "EMEVTSESPEK"
         variable_mods = ["Phospho|Position:S|Position:T"]

From b4f2a6cd20a8a16f5c39649801dd3d919e3645a1 Mon Sep 17 00:00:00 2001
From: Lev Levitsky <lev.levitsky@phystech.su>
Date: Tue, 14 Apr 2026 18:49:25 +0200
Subject: [PATCH 15/16] Add support for CoMUP with expand_rules

---
 pyteomics/proforma.py  | 10 +++++-----
 tests/test_proforma.py | 20 +++++++++++++++++++-
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/pyteomics/proforma.py b/pyteomics/proforma.py
index 8bb13682..da92cbfe 100644
--- a/pyteomics/proforma.py
+++ b/pyteomics/proforma.py
@@ -4045,7 +4045,7 @@ def from_tagged_modification(cls, tag: TagBase) -> "GeneratorModificationRuleDir
         rule = ModificationRule(tag, [])
         colocal_known = bool(tag.find_tag_type(TagTypeEnum.comkp))
         colocal_unknown = bool(tag.find_tag_type(TagTypeEnum.comup))
-        limit = max([t.value for t in tag.find_tag_type(TagTypeEnum.limit)] + [1])
+        limit = max([int(t.value) for t in tag.find_tag_type(TagTypeEnum.limit)] + [1])
         return cls(rule, None, colocal_known, colocal_unknown, limit)
 
     @classmethod
@@ -4058,7 +4058,7 @@ def from_unlocalized_rule(cls, tag: TagBase) -> "GeneratorModificationRuleDirect
         colocal_known = bool(tag.find_tag_type(TagTypeEnum.comkp))
         colocal_unknown = bool(tag.find_tag_type(TagTypeEnum.comup))
         rule = ModificationRule(modification_tag=mod, targets=targets)
-        limit = max([t.value for t in tag.find_tag_type(TagTypeEnum.limit)] + [1])
+        limit = max([int(t.value) for t in tag.find_tag_type(TagTypeEnum.limit)] + [1])
         return cls(rule, None, colocal_known, colocal_unknown, limit)
 
     @classmethod
@@ -4073,7 +4073,7 @@ def from_region_rule(cls, region: TaggedInterval) -> List['GeneratorModification
             colocal_known = bool(tag.find_tag_type(TagTypeEnum.comkp))
             colocal_unknown = bool(tag.find_tag_type(TagTypeEnum.comup))
             rule = ModificationRule(modification_tag=mod, targets=targets)
-            limit = max([t.value for t in tag.find_tag_type(TagTypeEnum.limit)] + [1])
+            limit = max([int(t.value) for t in tag.find_tag_type(TagTypeEnum.limit)] + [1])
             rules.append(cls(rule, region, colocal_known, colocal_unknown, limit))
         return rules
 
@@ -4087,7 +4087,7 @@ def from_labile_rule(cls, tag: TagBase) -> "GeneratorModificationRuleDirective":
         colocal_known = bool(tag.find_tag_type(TagTypeEnum.comkp))
         colocal_unknown = bool(tag.find_tag_type(TagTypeEnum.comup))
         rule = ModificationRule(modification_tag=mod, targets=targets)
-        limit = max([t.value for t in tag.find_tag_type(TagTypeEnum.limit)] + [1])
+        limit = max([int(t.value) for t in tag.find_tag_type(TagTypeEnum.limit)] + [1])
         return cls(rule, None, colocal_known, colocal_unknown, limit, labile=True)
 
 
@@ -4224,7 +4224,7 @@ def peptidoforms(
                     parsed_rule = GeneratorModificationRuleDirective.from_unlocalized_rule(
                         rule
                     )
-                    extra_rules.extend([rule] * len(parsed_rule.find_positions(template)))
+                    extra_rules.extend([rule] * len(parsed_rule.find_positions(template) * parsed_rule.limit))
                 else:
                     extra_rules.append(rule)
             template.unlocalized_modifications.extend(extra_rules)
diff --git a/tests/test_proforma.py b/tests/test_proforma.py
index 00ec21a7..19795df0 100644
--- a/tests/test_proforma.py
+++ b/tests/test_proforma.py
@@ -536,7 +536,7 @@ def test_comup_stacking(self):
         proteoforms = list(pf.proteoforms())
         self.assertEqual(len(proteoforms), math.comb(nsites + effective_limit - 1, k))  # number of ways to place `k` indistinguishable mods on `nsites` distinguishable sites with a stack limit of `effective_limit`
         proteoforms = list(pf.proteoforms(True))
-        self.assertEqual(len(proteoforms), 6)
+        self.assertEqual(len(proteoforms), sum([math.comb(nsites + min(limit, i) - 1, i) for i in range(k + 1)]))  # number of ways to place anywhere from 0 to `k` indistinguishable mods on `nsites` distinguishable sites with a stack limit of `effective_limit`
 
     def test_labile(self):
         seq = "{Phospho}EMEVTESPEK"
@@ -627,5 +627,23 @@ def test_expand_mods_from_list(self):
         forms = list(proteoforms(pf, variable_modifications=variable_mods, expand_rules=True))
         self.assertEqual(len(forms), 2 ** nsites)  # all combinations of phospho on 0, 1, or 2 of the S or T
 
+    def test_expand_mods_from_dict(self):
+        seq = "EMEVTSESPEK"
+        variable_mods = {"Phospho": ["S", "T"]}
+        nsites = seq.count("S") + seq.count("T")
+        pf = ProForma.parse(seq)
+        forms = list(proteoforms(pf, variable_modifications=variable_mods, expand_rules=True))
+        self.assertEqual(len(forms), 2 ** nsites)  # all combinations of phospho on 0, 1, or 2 of the S or T
+
+    def test_expand_mods_comup(self):
+        seq = "EMEVTSESPEK"
+        limit = 2
+        variable_mods = [f"Phospho|Position:S|Position:T|comup|Limit:{limit}"]
+        nsites = seq.count("S") + seq.count("T")
+        pf = ProForma.parse(seq)
+        forms = list(proteoforms(pf, variable_modifications=variable_mods, expand_rules=True))
+        self.assertEqual(len(forms), (limit + 1) ** nsites)  # all combinations of 0 to `limit` phosphos on each S or T
+
+
 if __name__ == '__main__':
     unittest.main()

From c4c0d58de442fc1f05b3d92728fd417b7c68a16c Mon Sep 17 00:00:00 2001
From: Lev Levitsky <lev.levitsky@phystech.su>
Date: Tue, 14 Apr 2026 18:55:52 +0200
Subject: [PATCH 16/16] Roll back unneeded value type conversions

---
 pyteomics/proforma.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pyteomics/proforma.py b/pyteomics/proforma.py
index da92cbfe..13db903c 100644
--- a/pyteomics/proforma.py
+++ b/pyteomics/proforma.py
@@ -4045,7 +4045,7 @@ def from_tagged_modification(cls, tag: TagBase) -> "GeneratorModificationRuleDir
         rule = ModificationRule(tag, [])
         colocal_known = bool(tag.find_tag_type(TagTypeEnum.comkp))
         colocal_unknown = bool(tag.find_tag_type(TagTypeEnum.comup))
-        limit = max([int(t.value) for t in tag.find_tag_type(TagTypeEnum.limit)] + [1])
+        limit = max([t.value for t in tag.find_tag_type(TagTypeEnum.limit)] + [1])
         return cls(rule, None, colocal_known, colocal_unknown, limit)
 
     @classmethod
@@ -4058,7 +4058,7 @@ def from_unlocalized_rule(cls, tag: TagBase) -> "GeneratorModificationRuleDirect
         colocal_known = bool(tag.find_tag_type(TagTypeEnum.comkp))
         colocal_unknown = bool(tag.find_tag_type(TagTypeEnum.comup))
         rule = ModificationRule(modification_tag=mod, targets=targets)
-        limit = max([int(t.value) for t in tag.find_tag_type(TagTypeEnum.limit)] + [1])
+        limit = max([t.value for t in tag.find_tag_type(TagTypeEnum.limit)] + [1])
         return cls(rule, None, colocal_known, colocal_unknown, limit)
 
     @classmethod
@@ -4073,7 +4073,7 @@ def from_region_rule(cls, region: TaggedInterval) -> List['GeneratorModification
             colocal_known = bool(tag.find_tag_type(TagTypeEnum.comkp))
             colocal_unknown = bool(tag.find_tag_type(TagTypeEnum.comup))
             rule = ModificationRule(modification_tag=mod, targets=targets)
-            limit = max([int(t.value) for t in tag.find_tag_type(TagTypeEnum.limit)] + [1])
+            limit = max([t.value for t in tag.find_tag_type(TagTypeEnum.limit)] + [1])
             rules.append(cls(rule, region, colocal_known, colocal_unknown, limit))
         return rules
 
@@ -4087,7 +4087,7 @@ def from_labile_rule(cls, tag: TagBase) -> "GeneratorModificationRuleDirective":
         colocal_known = bool(tag.find_tag_type(TagTypeEnum.comkp))
         colocal_unknown = bool(tag.find_tag_type(TagTypeEnum.comup))
         rule = ModificationRule(modification_tag=mod, targets=targets)
-        limit = max([int(t.value) for t in tag.find_tag_type(TagTypeEnum.limit)] + [1])
+        limit = max([t.value for t in tag.find_tag_type(TagTypeEnum.limit)] + [1])
         return cls(rule, None, colocal_known, colocal_unknown, limit, labile=True)