From 203846c85500211961e4461ff99d1568453f7d50 Mon Sep 17 00:00:00 2001 From: jbloom Date: Fri, 4 Apr 2025 22:11:33 -0700 Subject: [PATCH 1/3] add `arbitrary_sites` option to `MutationParser` to allow arbitrary str as sites --- CHANGELOG.rst | 4 ++++ polyclonal/__init__.py | 2 +- polyclonal/polyclonal.py | 6 +++--- polyclonal/utils.py | 15 +++++++++++++-- 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 311fd78..5ab4877 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,10 @@ All notable changes to this project will be documented in this file. The format is based on `Keep a Changelog `_. +6.15 +---- +- Add ``arbitrary_sites`` option to ``MutationParser`` to allow arbitrary strings as sites. + 6.14 ---- - Fix bug in ``plot.lineplot_and_heatmap`` where the ``minimum max of at site`` failed to keep only the top sites when the hide-not-filter option was being used. Addresses `this issue `_. diff --git a/polyclonal/__init__.py b/polyclonal/__init__.py index 93bc1c7..f7e3827 100644 --- a/polyclonal/__init__.py +++ b/polyclonal/__init__.py @@ -31,7 +31,7 @@ __author__ = "`the Bloom lab `_" __email__ = "jbloom@fredhutch.org" -__version__ = "6.14" +__version__ = "6.15" __url__ = "https://github.com/jbloomlab/polyclonal" from polyclonal.alphabets import AAS diff --git a/polyclonal/polyclonal.py b/polyclonal/polyclonal.py index b9eab93..bcdd617 100644 --- a/polyclonal/polyclonal.py +++ b/polyclonal/polyclonal.py @@ -820,7 +820,7 @@ def __init__( self.alphabet = tuple(alphabet) self._mutparser = polyclonal.utils.MutationParser( alphabet, - letter_suffixed_sites=not self.sequential_integer_sites, + arbitrary_sites=not self.sequential_integer_sites, ) # get any epitope labels as str, not int @@ -1597,13 +1597,13 @@ def site_level_model( site_data_to_fit = polyclonal.utils.site_level_variants( self.data_to_fit, original_alphabet=self.alphabet, - letter_suffixed_sites=not self.sequential_integer_sites, + arbitrary_sites=not self.sequential_integer_sites, ) site_escape_df = ( polyclonal.utils.site_level_variants( self.mut_escape_df.rename(columns={"mutation": "aa_substitutions"}), original_alphabet=self.alphabet, - letter_suffixed_sites=not self.sequential_integer_sites, + arbitrary_sites=not self.sequential_integer_sites, ) .rename(columns={"aa_substitutions": "mutation"}) .groupby(["epitope", "mutation"], as_index=False) diff --git a/polyclonal/utils.py b/polyclonal/utils.py index 06b0cf1..74c2937 100644 --- a/polyclonal/utils.py +++ b/polyclonal/utils.py @@ -24,6 +24,9 @@ class MutationParser: letter_suffixed_sites : bool Allow sites suffixed by lowercase letters, such as "214a". In this case, returned sites from :meth:`MutationParser.parse_mut` are str. + arbitrary_sites: bool + Allow arbitrary strings as sites, such as "31(E2)". In this case, returned + sites from :meth:`MutationParser.parse_mut` are str. Example ------- @@ -56,10 +59,11 @@ class MutationParser: """ - def __init__(self, alphabet, letter_suffixed_sites=False): + def __init__(self, alphabet, letter_suffixed_sites=False, arbitrary_sites=False): """See main class docstring.""" chars = [] for char in alphabet: + assert len(char) == 1, f"{char=}, {alphabet=}" if char.isalpha(): chars.append(char) elif char == "*": @@ -69,7 +73,10 @@ def __init__(self, alphabet, letter_suffixed_sites=False): else: raise ValueError(f"invalid alphabet character: {char}") chars = "|".join(chars) - if letter_suffixed_sites: + if arbitrary_sites: + self._sites_as_int = False + site_regex = "(?P.+)" + elif letter_suffixed_sites: self._sites_as_int = False site_regex = r"(?P\-?\d+[a-z]?)" else: @@ -96,6 +103,7 @@ def site_level_variants( wt_char="w", mut_char="m", letter_suffixed_sites=False, + arbitrary_sites=False, ): """Re-define variants simply in terms of which sites are mutated. @@ -116,6 +124,8 @@ def site_level_variants( Single letter used to represent mutant identity at all sites. letter_suffixed_sites : str Same mutation as for :class:`MutationParser`. + arbitrary_sites : str + Same mutation as for :class:`MutationParser`. Returns ------- @@ -149,6 +159,7 @@ def site_level_variants( mutparser = MutationParser( original_alphabet, letter_suffixed_sites=letter_suffixed_sites, + arbitrary_sites=arbitrary_sites, ) site_subs_mapping = {} From de19bf953b6a9f7e4342e1dfc252ff685bae22e0 Mon Sep 17 00:00:00 2001 From: jbloom Date: Sat, 5 Apr 2025 20:39:35 -0700 Subject: [PATCH 2/3] remove requirement `sites` passed to `Polyclonal` be natsorted --- CHANGELOG.rst | 1 + polyclonal/polyclonal.py | 5 +---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 5ab4877..b9a85e9 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,6 +9,7 @@ The format is based on `Keep a Changelog `_. 6.15 ---- - Add ``arbitrary_sites`` option to ``MutationParser`` to allow arbitrary strings as sites. +- Remove requirement that ``sites`` passed to ``Polyclonal`` be natsorted. 6.14 ---- diff --git a/polyclonal/polyclonal.py b/polyclonal/polyclonal.py index bcdd617..9f02f20 100644 --- a/polyclonal/polyclonal.py +++ b/polyclonal/polyclonal.py @@ -181,8 +181,7 @@ class Polyclonal: from ``data_to_fit`` or ``mut_escape_df``. However, you can also have non-sequential integer sites, or sites with lower-case letter suffixes (eg, `214a`) if your protein is numbered against a reference that it has - indels relative to. In that case, provide list of all expected in order - here; we require that order to be natsorted. + indels relative to. In that case, provide list of all expected in order here. epitope_colors : array-like or dict Maps each epitope to the color used for plotting. Either a dict keyed by each epitope, or an array of colors that are sequentially assigned @@ -801,8 +800,6 @@ def __init__( if sites is not None: sites = tuple(sites) - if sites != tuple(natsort.natsorted(sites, alg=natsort.ns.SIGNED)): - raise ValueError("`sites` not natsorted") if any(not isinstance(r, int) for r in sites) or sites != tuple( range(sites[0], sites[-1] + 1) ): From a944a62a16d52e37d3b50de257e4d7aab5c1e937 Mon Sep 17 00:00:00 2001 From: jbloom Date: Sat, 5 Apr 2025 21:28:06 -0700 Subject: [PATCH 3/3] upgrade `binarymap` to 0.8 --- CHANGELOG.rst | 1 + setup.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index b9a85e9..3b23484 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -10,6 +10,7 @@ The format is based on `Keep a Changelog `_. ---- - Add ``arbitrary_sites`` option to ``MutationParser`` to allow arbitrary strings as sites. - Remove requirement that ``sites`` passed to ``Polyclonal`` be natsorted. +- Upgrade ``binarymap`` to 0.8. 6.14 ---- diff --git a/setup.py b/setup.py index 17c88ce..d732634 100644 --- a/setup.py +++ b/setup.py @@ -44,7 +44,7 @@ license="GPLv3", install_requires=[ "altair>=5.0.0", - "binarymap>=0.7", + "binarymap>=0.8", "biopython>=1.79", "frozendict>=2.0.7", "matplotlib>=3.1",