From 08584b94feaedc01fa4fe306453129618827505a Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Sun, 7 Dec 2025 19:50:36 -0500 Subject: [PATCH 1/2] chore: add pyrefly-inferred type annotations --- pyproject.toml | 6 ++++++ src/ga4gh/core/enderef.py | 17 ++++++++++++----- src/ga4gh/core/identifiers.py | 6 +++--- src/ga4gh/vrs/dataproxy.py | 8 ++++---- src/ga4gh/vrs/enderef.py | 6 ++++-- src/ga4gh/vrs/extras/object_store.py | 10 ++++++---- src/ga4gh/vrs/extras/translator.py | 6 +++--- src/ga4gh/vrs/models.py | 18 +++++++++--------- src/ga4gh/vrs/normalize.py | 13 +++++++++---- src/ga4gh/vrs/utils/hgvs_tools.py | 2 +- 10 files changed, 57 insertions(+), 35 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d86fd7e5..719e9707 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -169,6 +169,12 @@ ignore = [ "PLC0206", ] +[tool.pyrefly] +project-includes = [ + "**/*.py*", + "**/*.ipynb", +] + [tool.ruff.lint.per-file-ignores] # ANN001 - missing-type-function-argument # ANN2 - missing-return-type diff --git a/src/ga4gh/core/enderef.py b/src/ga4gh/core/enderef.py index ed6af244..b8feff67 100644 --- a/src/ga4gh/core/enderef.py +++ b/src/ga4gh/core/enderef.py @@ -11,6 +11,8 @@ import logging +from pydantic.main import BaseModel + from .identifiers import ga4gh_identify, is_ga4gh_identifier from .pydantic import ( get_pydantic_root, @@ -22,7 +24,12 @@ _logger = logging.getLogger(__name__) -def ga4gh_enref(o, cra_map, object_store=None, return_id_obj_tuple=False) -> tuple: # noqa: ANN001 +def ga4gh_enref( + o, # noqa: ANN001 + cra_map, # noqa: ANN001 + object_store=None, # noqa: ANN001 + return_id_obj_tuple: bool = False, +) -> tuple: """Recursively convert "referable attributes" from inlined to referenced form. Returns a new object. @@ -35,13 +42,13 @@ def ga4gh_enref(o, cra_map, object_store=None, return_id_obj_tuple=False) -> tup :raise TypeError: if any object IDs are non-GA4GH CURIEs """ - def _id_and_store(o): # noqa: ANN202 ANN001 + def _id_and_store(o) -> str | None: # noqa: ANN001 _id = ga4gh_identify(o) if _id and object_store is not None: object_store[_id] = o return _id - def _enref(o): # noqa: ANN202 ANN001 + def _enref(o: BaseModel) -> str | None: """depth-first recursive, in-place enref of object; returns id of object""" ref_att_names = cra_map.get(o.type, []) for ran in ref_att_names: @@ -76,7 +83,7 @@ def _enref(o): # noqa: ANN202 ANN001 return (_id, o) if return_id_obj_tuple else o -def ga4gh_deref(o, cra_map, object_store): # noqa: ANN201 ANN001 +def ga4gh_deref(o, cra_map, object_store) -> BaseModel: # noqa: ANN001 """Convert "referable attributes" in-place from referenced to inlined form. @@ -87,7 +94,7 @@ def ga4gh_deref(o, cra_map, object_store): # noqa: ANN201 ANN001 """ - def _deref(o): # noqa: ANN202 ANN001 + def _deref(o: BaseModel): # noqa: ANN202 """depth-first recursive, in-place deref of object; returns id of object""" if o.type not in cra_map: _logger.warning("%s not in cra_map %s", o.type, cra_map) diff --git a/src/ga4gh/core/identifiers.py b/src/ga4gh/core/identifiers.py index 2ed76855..07dfc3db 100644 --- a/src/ga4gh/core/identifiers.py +++ b/src/ga4gh/core/identifiers.py @@ -85,14 +85,14 @@ class use_ga4gh_compute_identifier_when(ContextDecorator): # noqa: N801 def my_method(): """ - def __init__(self, when: VrsObjectIdentifierIs): + def __init__(self, when: VrsObjectIdentifierIs) -> None: self.when = when self.token = None - def __enter__(self): # noqa: ANN204 + def __enter__(self) -> None: self.token = ga4gh_compute_identifier_when.set(self.when) - def __exit__(self, exc_type, exc, exc_tb): # noqa: ANN204 ANN001 + def __exit__(self, exc_type, exc, exc_tb) -> None: # noqa: ANN001 ga4gh_compute_identifier_when.reset(self.token) diff --git a/src/ga4gh/vrs/dataproxy.py b/src/ga4gh/vrs/dataproxy.py index 8e593069..50e70fb6 100644 --- a/src/ga4gh/vrs/dataproxy.py +++ b/src/ga4gh/vrs/dataproxy.py @@ -247,7 +247,7 @@ class SeqRepoRESTDataProxy(_SeqRepoDataProxyBase): rest_version = "1" - def __init__(self, base_url: str): + def __init__(self, base_url: str) -> None: """Initialize REST-based dataproxy instance. :param base_url: root URL to server @@ -284,12 +284,12 @@ class SequenceProxy(Sequence): """ - def __init__(self, dp: _DataProxy, alias: str): # noqa: D107 + def __init__(self, dp: _DataProxy, alias: str) -> None: # noqa: D107 self.dp = dp self.alias = alias self._md = self.dp.get_metadata(self.alias) - def __str__(self): # noqa: D105 ANN204 + def __str__(self) -> str: # noqa: D105 return self.dp.get_sequence(self.alias) def __len__(self): # noqa: D105 ANN204 @@ -299,7 +299,7 @@ def __reversed__(self): # noqa: D105 ANN204 msg = "Reversed iteration of a SequenceProxy is not implemented" raise NotImplementedError(msg) - def __getitem__(self, key): # noqa: ANN001 ANN204 + def __getitem__(self, key) -> str: # noqa: ANN001 """Return sequence for key (slice), fetching if necessary""" if isinstance(key, int): key = slice(key, key + 1) diff --git a/src/ga4gh/vrs/enderef.py b/src/ga4gh/vrs/enderef.py index c3c5e104..d4616046 100644 --- a/src/ga4gh/vrs/enderef.py +++ b/src/ga4gh/vrs/enderef.py @@ -1,9 +1,11 @@ +from pydantic.main import BaseModel + from ga4gh.core import ga4gh_deref, ga4gh_enref from .models import class_refatt_map -def vrs_enref(o, object_store=None, return_id_obj_tuple=False): +def vrs_enref(o, object_store=None, return_id_obj_tuple: bool = False): return ga4gh_enref( o, cra_map=class_refatt_map, @@ -12,5 +14,5 @@ def vrs_enref(o, object_store=None, return_id_obj_tuple=False): ) -def vrs_deref(o, object_store): +def vrs_deref(o, object_store) -> BaseModel: return ga4gh_deref(o, cra_map=class_refatt_map, object_store=object_store) diff --git a/src/ga4gh/vrs/extras/object_store.py b/src/ga4gh/vrs/extras/object_store.py index 39a9ad55..8712bb31 100644 --- a/src/ga4gh/vrs/extras/object_store.py +++ b/src/ga4gh/vrs/extras/object_store.py @@ -13,7 +13,9 @@ class Sqlite3MutableMapping(MutableMapping): If not used as a contextmanager, user must call commit and/or close. """ - def __init__(self, sqlite3_db: str | sqlite3.Connection, autocommit: bool = True): + def __init__( + self, sqlite3_db: str | sqlite3.Connection, autocommit: bool = True + ) -> None: """Connect to the sqlite3 database specified by an existing sqlite3.Connection or a connection string. @@ -96,10 +98,10 @@ def __len__(self): finally: cur.close() - def commit(self): + def commit(self) -> None: self.db.commit() - def close(self): + def close(self) -> None: with self._closed_lock: if not self._closed: self.commit() @@ -110,5 +112,5 @@ def __enter__(self): self.db.__enter__() return self - def __exit__(self, exc_type, exc_value, traceback): + def __exit__(self, exc_type, exc_value, traceback) -> None: self.db.__exit__(exc_type, exc_value, traceback) diff --git a/src/ga4gh/vrs/extras/translator.py b/src/ga4gh/vrs/extras/translator.py index 5c064447..1fe5e350 100644 --- a/src/ga4gh/vrs/extras/translator.py +++ b/src/ga4gh/vrs/extras/translator.py @@ -75,7 +75,7 @@ def __init__( default_assembly_name: str = "GRCh38", identify: bool = True, rle_seq_limit: int | None = 50, - ): + ) -> None: self.default_assembly_name = default_assembly_name self.data_proxy = data_proxy self.identify = identify @@ -175,7 +175,7 @@ def __init__( data_proxy: _DataProxy, default_assembly_name: str = "GRCh38", identify: bool = True, - ): + ) -> None: """Initialize AlleleTranslator class""" super().__init__(data_proxy, default_assembly_name, identify) @@ -528,7 +528,7 @@ def __init__( data_proxy: _DataProxy, default_assembly_name: str = "GRCh38", identify: bool = True, - ): + ) -> None: """Initialize CnvTranslator class""" super().__init__(data_proxy, default_assembly_name, identify) self.from_translators = { diff --git a/src/ga4gh/vrs/models.py b/src/ga4gh/vrs/models.py index 8fffd053..ad2978d5 100644 --- a/src/ga4gh/vrs/models.py +++ b/src/ga4gh/vrs/models.py @@ -50,7 +50,7 @@ def flatten(vals): """Flatten vals recursively, lazily using yield""" - def is_coll(thing): + def is_coll(thing) -> bool: """Return True if the thing looks like a collection. This is not exhaustive, do not use in general. @@ -77,7 +77,7 @@ def flatten_type(t): return [t] -def overlaps(a: list, b: list): +def overlaps(a: list, b: list) -> bool: """Return true if there are any elements in common between a and b""" return len(set(a).intersection(set(b))) > 0 @@ -248,7 +248,7 @@ class _ValueObject(Entity, ABC): See https://en.wikipedia.org/wiki/Value_object for more on Value Objects. """ - def __hash__(self): + def __hash__(self) -> int: return encode_canonical_json(self.ga4gh_serialize()).decode("utf-8").__hash__() def ga4gh_serialize(self) -> dict: @@ -280,14 +280,14 @@ class Ga4ghIdentifiableObject(_ValueObject, ABC): description="A sha512t24u digest created using the VRS Computed Identifier algorithm.", ) - def __lt__(self, other): + def __lt__(self, other) -> bool: return self.get_or_create_digest() < other.get_or_create_digest() @staticmethod def is_ga4gh_identifiable() -> bool: return True - def has_valid_ga4gh_id(self): + def has_valid_ga4gh_id(self) -> bool | str | None: return self.id and GA4GH_IR_REGEXP.match(self.id) is not None def compute_digest( @@ -354,7 +354,7 @@ def get_or_create_ga4gh_identifier( else: return self.compute_ga4gh_identifier(recompute) - def compute_ga4gh_identifier(self, recompute: bool = False, as_version=None): + def compute_ga4gh_identifier(self, recompute: bool = False, as_version=None) -> str: """Return a GA4GH Computed Identifier. If ``as_version`` is provided, other parameters are ignored and a computed @@ -611,7 +611,7 @@ def validate_start_end( raise ValueError(err_msg) return v - def ga4gh_serialize_as_version(self, as_version: PrevVrsVersion): + def ga4gh_serialize_as_version(self, as_version: PrevVrsVersion) -> str: """Return a serialized string following the conventions for SequenceLocation serialization as defined in the VRS version specified by ``as_version``. @@ -643,7 +643,7 @@ def ga4gh_serialize_as_version(self, as_version: PrevVrsVersion): msg = f"Received an unexpected value for `as_version`: {as_version}. MUST be an instance of `PrevVrsVersion`." raise TypeError(msg) - def get_refget_accession(self): + def get_refget_accession(self) -> str | None: if isinstance(self.sequenceReference, SequenceReference): return self.sequenceReference.refgetAccession if isinstance(self.sequenceReference, iriReference): @@ -685,7 +685,7 @@ class Allele(_VariationBase, BaseModelForbidExtra): Field(..., description="An expression of the sequence state") ) - def ga4gh_serialize_as_version(self, as_version: PrevVrsVersion): + def ga4gh_serialize_as_version(self, as_version: PrevVrsVersion) -> str: """Return a serialized string following the conventions for Allele serialization as defined in the VRS version specified by 'as_version`. diff --git a/src/ga4gh/vrs/normalize.py b/src/ga4gh/vrs/normalize.py index 17b40c52..78afca24 100644 --- a/src/ga4gh/vrs/normalize.py +++ b/src/ga4gh/vrs/normalize.py @@ -11,6 +11,7 @@ from bioutils.normalize import NormalizationMode from bioutils.normalize import normalize as _normalize +from pydantic.main import BaseModel from ga4gh.core import ga4gh_digest, is_pydantic_instance, pydantic_copy from ga4gh.vrs import models @@ -83,7 +84,7 @@ def _get_new_allele_location_pos( return val -def _normalize_allele(input_allele, data_proxy, rle_seq_limit=50): +def _normalize_allele(input_allele, data_proxy, rle_seq_limit: int = 50): """Normalize Allele using "fully-justified" normalization adapted from NCBI's VOCA. Fully-justified normalization expands such ambiguous representation over the entire region of ambiguity, resulting in an unambiguous representation that may be @@ -248,7 +249,7 @@ def denormalize_reference_length_expression( return alt -def _factor_gen(n): +def _factor_gen(n: int): """Yield all factors of an integer `n`, in descending order""" lower_factors = [] i = 1 @@ -262,7 +263,11 @@ def _factor_gen(n): def _define_rle_allele( - allele, length, repeat_subunit_length, rle_seq_limit, extended_alt_seq + allele: BaseModel, + length: int, + repeat_subunit_length: int, + rle_seq_limit, + extended_alt_seq, ): # Otherwise, create the Allele as an RLE allele.state = models.ReferenceLengthExpression( @@ -275,7 +280,7 @@ def _define_rle_allele( return allele -def _is_valid_cycle(template_start, template, target): +def _is_valid_cycle(template_start, template: str, target) -> bool: cycle = itertools.cycle(template[template_start:]) for char in target[len(template) :]: # noqa: SIM110 if char != next(cycle): diff --git a/src/ga4gh/vrs/utils/hgvs_tools.py b/src/ga4gh/vrs/utils/hgvs_tools.py index fbfb10d2..503d4022 100644 --- a/src/ga4gh/vrs/utils/hgvs_tools.py +++ b/src/ga4gh/vrs/utils/hgvs_tools.py @@ -30,7 +30,7 @@ class HgvsTools: hgvs_re = re.compile(r"[^:]+:[cgmnpr]\.") - def __init__(self, data_proxy: _DataProxy | None = None): + def __init__(self, data_proxy: _DataProxy | None = None) -> None: """Initialize object. :param data_proxy: GA4GH data proxy instance From 8bbfc5f89523e72b7e61e3b263a079481498e20b Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Sun, 7 Dec 2025 19:52:04 -0500 Subject: [PATCH 2/2] remove pyrefly config --- pyproject.toml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 719e9707..d86fd7e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -169,12 +169,6 @@ ignore = [ "PLC0206", ] -[tool.pyrefly] -project-includes = [ - "**/*.py*", - "**/*.ipynb", -] - [tool.ruff.lint.per-file-ignores] # ANN001 - missing-type-function-argument # ANN2 - missing-return-type