Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 36 additions & 19 deletions pyteomics/proforma.py
Original file line number Diff line number Diff line change
Expand Up @@ -3836,7 +3836,7 @@ def find_tags_by_id(self, tag_id, include_position=True):
def tags(self):
return [tag for tags_at in [pos[1] for pos in self if pos[1]] for tag in tags_at]

def proteoforms(self, include_unmodified: bool = False, include_labile: bool = False) -> Iterator["ProForma"]:
def proteoforms(self, include_unmodified: bool = False, include_labile: bool = False, strip: bool = False) -> Iterator["ProForma"]:
"""
Generate combinatorial localizations of modifications defined on this ProForma sequence.

Expand All @@ -3849,12 +3849,15 @@ def proteoforms(self, include_unmodified: bool = False, include_labile: bool = F
include_labile : :class:`bool`
For all labile modifications, include the case where the modification is localized at every possible location or as
a remaining labile modification.
strip : :class:`bool`
If :class:`True`, the generated peptidoforms will have all modification tags stripped of any extra information,
leaving only the bare modification definition.

Yields
------
:class:`ProForma`
"""
return iter(ProteoformCombinator(self, include_unmodified=include_unmodified, include_labile=include_labile))
return iter(ProteoformCombinator(self, include_unmodified=include_unmodified, include_labile=include_labile, strip=strip))

peptidoforms = proteoforms

Expand Down Expand Up @@ -3968,6 +3971,7 @@ class GeneratorModificationRuleDirective:
limit: int = 1
labile: bool = False
token: Optional[ModificationToken] = None
strip: bool = False

def __eq__(self, other):
if other is None:
Expand All @@ -3987,17 +3991,21 @@ def __ne__(self, other):
def __hash__(self):
return hash(self.token)

def __init__(self, rule, region=None, colocal_known: bool = False, colocal_unknown: bool = False, limit: int = 1, labile: bool = False):
def __init__(self, rule, region=None, colocal_known: bool = False, colocal_unknown: bool = False, limit: int = 1, labile: bool = False, strip: bool = False):
self.rule = rule
self.region = region
self.colocal_known = colocal_known
self.colocal_unknown = colocal_unknown
self.limit = limit
self.labile = labile
self.strip = strip
self.token = getattr(self.rule.modification_tag, "key", None)

def create(self) -> TagBase:
return self.rule.modification_tag.copy()
tag = self.rule.modification_tag.copy()
if self.strip:
tag.extra.clear()
return tag

def __repr__(self):
return f"{self.__class__.__name__}({self.rule}, {self.region}, {self.colocal_known}, {self.colocal_unknown})"
Expand Down Expand Up @@ -4036,7 +4044,7 @@ def find_positions(self, sequence: ProForma) -> List[int]:
return positions

@classmethod
def from_tagged_modification(cls, tag: TagBase) -> "GeneratorModificationRuleDirective":
def from_tagged_modification(cls, tag: TagBase, strip: bool = False) -> "GeneratorModificationRuleDirective":
mod = tag.find_modification()
if not mod:
return
Expand All @@ -4046,10 +4054,10 @@ def from_tagged_modification(cls, tag: TagBase) -> "GeneratorModificationRuleDir
colocal_known = bool(tag.find_tag_type(TagTypeEnum.comkp))
colocal_unknown = bool(tag.find_tag_type(TagTypeEnum.comup))
limit = max([t.value for t in tag.find_tag_type(TagTypeEnum.limit)] + [1])
return cls(rule, None, colocal_known, colocal_unknown, limit)
return cls(rule, None, colocal_known, colocal_unknown, limit, strip=strip)

@classmethod
def from_unlocalized_rule(cls, tag: TagBase) -> "GeneratorModificationRuleDirective":
def from_unlocalized_rule(cls, tag: TagBase, strip: bool = False) -> "GeneratorModificationRuleDirective":
mod = tag.find_modification()
if not mod:
return
Expand All @@ -4059,10 +4067,10 @@ def from_unlocalized_rule(cls, tag: TagBase) -> "GeneratorModificationRuleDirect
colocal_unknown = bool(tag.find_tag_type(TagTypeEnum.comup))
rule = ModificationRule(modification_tag=mod, targets=targets)
limit = max([t.value for t in tag.find_tag_type(TagTypeEnum.limit)] + [1])
return cls(rule, None, colocal_known, colocal_unknown, limit)
return cls(rule, None, colocal_known, colocal_unknown, limit, strip=strip)

@classmethod
def from_region_rule(cls, region: TaggedInterval) -> List['GeneratorModificationRuleDirective']:
def from_region_rule(cls, region: TaggedInterval, strip: bool = False) -> List['GeneratorModificationRuleDirective']:
rules = []
for tag in (region.tags or []):
mod = tag.find_modification()
Expand All @@ -4074,11 +4082,11 @@ def from_region_rule(cls, region: TaggedInterval) -> List['GeneratorModification
colocal_unknown = bool(tag.find_tag_type(TagTypeEnum.comup))
rule = ModificationRule(modification_tag=mod, targets=targets)
limit = max([t.value for t in tag.find_tag_type(TagTypeEnum.limit)] + [1])
rules.append(cls(rule, region, colocal_known, colocal_unknown, limit))
rules.append(cls(rule, region, colocal_known, colocal_unknown, limit, strip=strip))
return rules

@classmethod
def from_labile_rule(cls, tag: TagBase) -> "GeneratorModificationRuleDirective":
def from_labile_rule(cls, tag: TagBase, strip: bool = False) -> "GeneratorModificationRuleDirective":
mod = tag.find_modification()
if not mod:
return
Expand All @@ -4088,7 +4096,7 @@ def from_labile_rule(cls, tag: TagBase) -> "GeneratorModificationRuleDirective":
colocal_unknown = bool(tag.find_tag_type(TagTypeEnum.comup))
rule = ModificationRule(modification_tag=mod, targets=targets)
limit = max([t.value for t in tag.find_tag_type(TagTypeEnum.limit)] + [1])
return cls(rule, None, colocal_known, colocal_unknown, limit, labile=True)
return cls(rule, None, colocal_known, colocal_unknown, limit, labile=True, strip=strip)


def _coerce_string_to_modification(item) -> TagBase:
Expand Down Expand Up @@ -4117,6 +4125,7 @@ def peptidoforms(
include_unmodified: bool = True,
include_labile: bool = False,
expand_rules: bool = False,
strip: bool = False,
) -> Iterator[ProForma]:
"""
Generate the combinatorial cross-product of modifications for ``peptide``, given by
Expand Down Expand Up @@ -4155,6 +4164,9 @@ def peptidoforms(
For all variable modifications, allow any number of copies of the modification to be included in the result.
This mirrors the expected behavior of many search engines' variable modification rules, though it is not strictly
how ProForma's rules work. This forces :attr:`include_unmodified` to be :const:`True`.
strip : :class:`bool`
If :class:`True`, the generated peptidoforms will have all modification tags stripped of any extra information,
leaving only the bare modification definition.

Yields
------
Expand Down Expand Up @@ -4222,7 +4234,7 @@ def peptidoforms(
for rule in map(_coerce_string_to_modification, variable_modifications):
if expand_rules:
parsed_rule = GeneratorModificationRuleDirective.from_unlocalized_rule(
rule
rule, strip=strip
)
extra_rules.extend([rule] * len(parsed_rule.find_positions(template) * parsed_rule.limit))
else:
Expand All @@ -4241,8 +4253,8 @@ def peptidoforms(
seen.add(target)
tag = tag | target
if expand_rules:
rule = GeneratorModificationRuleDirective.from_unlocalized_rule(tag)
n = len(rule.find_positions(peptide))
rule = GeneratorModificationRuleDirective.from_unlocalized_rule(tag, strip=strip)
n = len(rule.find_positions(peptide)) * rule.limit
extra_rules.extend([tag] * n)
else:
extra_rules.append(tag)
Expand Down Expand Up @@ -4273,6 +4285,7 @@ def peptidoforms(
return template.proteoforms(
include_unmodified=include_unmodified,
include_labile=include_labile,
strip=strip,
)


Expand All @@ -4297,16 +4310,20 @@ class ProteoformCombinator:
in the input. See :attr:`expand_rules`.
include_labile : :class:`bool`
For all labile modifications, include the case where the modification is localized at every possible location.
strip : :class:`bool`
If :class:`True`, the generated peptidoforms will have all modification tags stripped of any extra information,
leaving only the bare modification definition.
"""
template: ProForma
include_unmodified: bool
include_labile: bool
variable_rules: List[GeneratorModificationRuleDirective]

def __init__(self, base_proteoform: ProForma, include_unmodified: bool=False, include_labile: bool=False):
def __init__(self, base_proteoform: ProForma, include_unmodified: bool=False, include_labile: bool=False, strip: bool=False):
self.template = base_proteoform.copy()
self.include_unmodified = include_unmodified
self.include_labile = include_labile
self.strip = strip
self.variable_rules = []
self._extract_rules()
self._apply_fixed_modifications()
Expand Down Expand Up @@ -4343,7 +4360,7 @@ def _extract_rules(self) -> None:

remains = []
for rule in self.template.unlocalized_modifications:
rule_ = GeneratorModificationRuleDirective.from_unlocalized_rule(rule)
rule_ = GeneratorModificationRuleDirective.from_unlocalized_rule(rule, strip=self.strip)
if rule_:
rules.append(rule_)
else:
Expand All @@ -4355,7 +4372,7 @@ def _extract_rules(self) -> None:
tmp = []
for tag in tags:
if tag.group_id and tag.is_modification():
rule_ = GeneratorModificationRuleDirective.from_tagged_modification(tag)
rule_ = GeneratorModificationRuleDirective.from_tagged_modification(tag, strip=self.strip)
if rule_:
rules.append(rule_)
tmp.append(PositionLabelTag(group_id=tag.group_id))
Expand All @@ -4366,7 +4383,7 @@ def _extract_rules(self) -> None:
if self.include_labile:
remains = []
for rule in self.template.labile_modifications:
rule_ = GeneratorModificationRuleDirective.from_labile_rule(rule)
rule_ = GeneratorModificationRuleDirective.from_labile_rule(rule, strip=self.strip)
if rule_:
rules.append(rule_)
else:
Expand Down