Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ All notable changes to this project will be documented in this file.

The format is based on `Keep a Changelog <https://keepachangelog.com>`_.

6.15
----
- Add ``arbitrary_sites`` option to ``MutationParser`` to allow arbitrary strings as sites.
- Remove requirement that ``sites`` passed to ``Polyclonal`` be natsorted.
- Upgrade ``binarymap`` to 0.8.

6.14
----
- Fix bug in ``plot.lineplot_and_heatmap`` where the ``minimum max of <stat> at site`` failed to keep only the top sites when the hide-not-filter option was being used. Addresses `this issue <https://github.com/dms-vep/dms-vep-pipeline-3/issues/107>`_.
Expand Down
2 changes: 1 addition & 1 deletion polyclonal/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

__author__ = "`the Bloom lab <https://jbloomlab.org>`_"
__email__ = "jbloom@fredhutch.org"
__version__ = "6.14"
__version__ = "6.15"
__url__ = "https://github.com/jbloomlab/polyclonal"

from polyclonal.alphabets import AAS
Expand Down
11 changes: 4 additions & 7 deletions polyclonal/polyclonal.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,7 @@ class Polyclonal:
from ``data_to_fit`` or ``mut_escape_df``. However, you can also have
non-sequential integer sites, or sites with lower-case letter suffixes
(eg, `214a`) if your protein is numbered against a reference that it has
indels relative to. In that case, provide list of all expected in order
here; we require that order to be natsorted.
indels relative to. In that case, provide list of all expected in order here.
epitope_colors : array-like or dict
Maps each epitope to the color used for plotting. Either a dict keyed
by each epitope, or an array of colors that are sequentially assigned
Expand Down Expand Up @@ -801,8 +800,6 @@ def __init__(

if sites is not None:
sites = tuple(sites)
if sites != tuple(natsort.natsorted(sites, alg=natsort.ns.SIGNED)):
raise ValueError("`sites` not natsorted")
if any(not isinstance(r, int) for r in sites) or sites != tuple(
range(sites[0], sites[-1] + 1)
):
Expand All @@ -820,7 +817,7 @@ def __init__(
self.alphabet = tuple(alphabet)
self._mutparser = polyclonal.utils.MutationParser(
alphabet,
letter_suffixed_sites=not self.sequential_integer_sites,
arbitrary_sites=not self.sequential_integer_sites,
)

# get any epitope labels as str, not int
Expand Down Expand Up @@ -1597,13 +1594,13 @@ def site_level_model(
site_data_to_fit = polyclonal.utils.site_level_variants(
self.data_to_fit,
original_alphabet=self.alphabet,
letter_suffixed_sites=not self.sequential_integer_sites,
arbitrary_sites=not self.sequential_integer_sites,
)
site_escape_df = (
polyclonal.utils.site_level_variants(
self.mut_escape_df.rename(columns={"mutation": "aa_substitutions"}),
original_alphabet=self.alphabet,
letter_suffixed_sites=not self.sequential_integer_sites,
arbitrary_sites=not self.sequential_integer_sites,
)
.rename(columns={"aa_substitutions": "mutation"})
.groupby(["epitope", "mutation"], as_index=False)
Expand Down
15 changes: 13 additions & 2 deletions polyclonal/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ class MutationParser:
letter_suffixed_sites : bool
Allow sites suffixed by lowercase letters, such as "214a". In this case, returned
sites from :meth:`MutationParser.parse_mut` are str.
arbitrary_sites: bool
Allow arbitrary strings as sites, such as "31(E2)". In this case, returned
sites from :meth:`MutationParser.parse_mut` are str.

Example
-------
Expand Down Expand Up @@ -56,10 +59,11 @@ class MutationParser:

"""

def __init__(self, alphabet, letter_suffixed_sites=False):
def __init__(self, alphabet, letter_suffixed_sites=False, arbitrary_sites=False):
"""See main class docstring."""
chars = []
for char in alphabet:
assert len(char) == 1, f"{char=}, {alphabet=}"
if char.isalpha():
chars.append(char)
elif char == "*":
Expand All @@ -69,7 +73,10 @@ def __init__(self, alphabet, letter_suffixed_sites=False):
else:
raise ValueError(f"invalid alphabet character: {char}")
chars = "|".join(chars)
if letter_suffixed_sites:
if arbitrary_sites:
self._sites_as_int = False
site_regex = "(?P<site>.+)"
elif letter_suffixed_sites:
self._sites_as_int = False
site_regex = r"(?P<site>\-?\d+[a-z]?)"
else:
Expand All @@ -96,6 +103,7 @@ def site_level_variants(
wt_char="w",
mut_char="m",
letter_suffixed_sites=False,
arbitrary_sites=False,
):
"""Re-define variants simply in terms of which sites are mutated.

Expand All @@ -116,6 +124,8 @@ def site_level_variants(
Single letter used to represent mutant identity at all sites.
letter_suffixed_sites : str
Same mutation as for :class:`MutationParser`.
arbitrary_sites : str
Same mutation as for :class:`MutationParser`.

Returns
-------
Expand Down Expand Up @@ -149,6 +159,7 @@ def site_level_variants(
mutparser = MutationParser(
original_alphabet,
letter_suffixed_sites=letter_suffixed_sites,
arbitrary_sites=arbitrary_sites,
)

site_subs_mapping = {}
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
license="GPLv3",
install_requires=[
"altair>=5.0.0",
"binarymap>=0.7",
"binarymap>=0.8",
"biopython>=1.79",
"frozendict>=2.0.7",
"matplotlib>=3.1",
Expand Down