diff --git a/.gitmodules b/.gitmodules index 6ee1c44a..a7b8c08e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "submodules/vrs"] path = submodules/vrs url = https://github.com/ga4gh/vrs.git - branch = 2.0 + branch = 2.1.0-snapshot.2026-02 diff --git a/src/ga4gh/core/models.py b/src/ga4gh/core/models.py index aabfb45c..a8149a3d 100644 --- a/src/ga4gh/core/models.py +++ b/src/ga4gh/core/models.py @@ -4,7 +4,7 @@ from abc import ABC from enum import Enum -from typing import Annotated, Any +from typing import Annotated, Any, Literal from pydantic import ( BaseModel, @@ -37,6 +37,20 @@ class Relation(str, Enum): RELATED_MATCH = "relatedMatch" +class MembershipOperator(str, Enum): + """The logical relationship between concepts in the set, in the context of some + knowledge reported about them. The value 'AND' indicates that the concepts are + dependent and occur together in this context - i.e. the reported assertion is not + necessarily true for each concept on its own - only in combination with the + other(s). The value 'OR' indicates that each concept applies independently in this + context - i.e. the reported assertion is necessarily true for each concept on its + own, independent of the presence of the other(s). + """ + + AND = "AND" + OR = "OR" + + ######################################### # Primitive data types ######################################### @@ -185,6 +199,31 @@ class ConceptMapping(Element, BaseModelForbidExtra): ) +class ConceptSet(Element, BaseModelForbidExtra): + """A set of concepts that may be considered as dependent (occurring together), or + independent (existing separately) in the context of some knowledge reported about + them, as indicated by a set membership operator. e.g. a set of independent molecular + consequences that both result from the presence of a particular genetic variant + (membership operator = OR). + """ + + model_config = ConfigDict(use_enum_values=True) + + type: Literal["ConceptSet"] = Field( + default="ConceptSet", + description='MUST be "ConceptSet".', + ) + concepts: list[MappableConcept] | list[ConceptSet] = Field( + ..., + description="A list of concepts that are dependent (occurring together), or independent (existing separately), depending on the membership operator.", + min_length=2, + ) + membershipOperator: MembershipOperator = Field( # noqa: N815 + ..., + description="The logical relationship between concepts in the set, in the context of some knowledge reported about them. The value 'AND' indicates that the concepts are dependent and occur together in this context - i.e. the reported assertion is not necessarily true for each concept on its own - only in combination with the other(s). The value 'OR' indicates that each concept applies independently in this context - i.e. the reported assertion is necessarily true for each concept on its own, independent of the presence of the other(s).", + ) + + class Extension(Element, BaseModelForbidExtra): """The Extension class provides entities with a means to include additional attributes that are outside of the specified standard but needed by a given content diff --git a/src/ga4gh/vrs/models.py b/src/ga4gh/vrs/models.py index ad2978d5..e8029b01 100644 --- a/src/ga4gh/vrs/models.py +++ b/src/ga4gh/vrs/models.py @@ -161,7 +161,10 @@ class VrsType(str, Enum): LIT_SEQ_EXPR = "LiteralSequenceExpression" SEQ_REF = "SequenceReference" SEQ_LOC = "SequenceLocation" + SEQ_OFFSET_LOCATION = "SequenceOffsetLocation" + RELATIVE_SEQ_LOC = "RelativeSequenceLocation" ALLELE = "Allele" + RELATIVE_ALLELE = "RelativeAllele" CIS_PHASED_BLOCK = "CisPhasedBlock" ADJACENCY = "Adjacency" TERMINUS = "Terminus" @@ -229,6 +232,21 @@ class Syntax(str, Enum): SPDI = "spdi" +class AnchorOrientation(str, Enum): + """Indicates which side of a discontinuous anchor on the sequenceReference is used + as the reference point for interpreting offsetStart/offsetEnd. The anchor is an + inter-residue coordinate on the sequenceReference. When that anchor corresponds to a + boundary whose realization on a base sequence yields two distinct locations (e.g., + an exon junction), this property disambiguates which anchor side on the + sequenceReference is intended. `left` denotes the side immediately preceding the + anchor in sequenceReference coordinate order; `right` denotes the side immediately + following the anchor in sequenceReference coordinate order. + """ + + LEFT = "left" + RIGHT = "right" + + def _recurse_ga4gh_serialize(obj): if isinstance(obj, Ga4ghIdentifiableObject): return obj.get_or_create_digest() @@ -503,7 +521,7 @@ class ReferenceLengthExpression(_ValueObject, BaseModelForbidExtra): ) sequence: sequenceString | None = Field( default=None, - description="the literal Sequence encoded by the Reference Length Expression.", + description="the literal sequence encoded by the Reference Length Expression.", ) repeatSubunitLength: int = Field( ..., description="The number of residues in the repeat subunit." @@ -656,6 +674,72 @@ class ga4gh(Ga4ghIdentifiableObject.ga4gh): # noqa: N801 inherent = ["end", "sequenceReference", "start", "type"] +class SequenceOffsetLocation(_ValueObject, BaseModelForbidExtra): + """A location defined by an offset relative to an anchor on a mapped sequence + reference. + """ + + model_config = ConfigDict(use_enum_values=True) + + type: Literal["SequenceOffsetLocation"] = Field( + default=VrsType.SEQ_OFFSET_LOCATION.value, + description=f'MUST be "{VrsType.SEQ_OFFSET_LOCATION.value}"', + ) + sequenceReference: SequenceReference | iriReference = Field( + ..., + description="A sequence reference that has been mapped from which a relative location is defined.", + ) + anchor: int = Field( + ..., + description="The inter-residue position on the sequence reference from which the relative location offset is calculated.", + ) + anchorOrientation: AnchorOrientation = Field( + ..., + description="Indicates which side of a discontinuous anchor on the sequenceReference is used as the reference point for interpreting offsetStart/offsetEnd. The anchor is an inter-residue coordinate on the sequenceReference. When that anchor corresponds to a boundary whose realization on a base sequence yields two distinct locations (e.g., an exon junction), this property disambiguates which anchor side on the sequenceReference is intended. `left` denotes the side immediately preceding the anchor in sequenceReference coordinate order; `right` denotes the side immediately following the anchor in sequenceReference coordinate order.", + ) + offsetStart: int | Range | None = Field( + default=None, + description="The start offset, in inter-residue coordinates, from the anchor realization selected by anchorOrientation on the sequenceReference.", + ) + offsetEnd: int | Range | None = Field( + default=None, + description="The end offset, in inter-residue coordinates, from the anchor realization selected by anchorOrientation on the sequenceReference.", + ) + + class ga4gh(_ValueObject.ga4gh): + inherent = [ + "sequenceReference", + "anchor", + "anchorOrientation", + "offsetStart", + "offsetEnd", + "type", + ] + + +class RelativeSequenceLocation(Ga4ghIdentifiableObject, BaseModelForbidExtra): + """A location on a base sequence and its position relative to a boundary offset on a + mapped sequence gap. Typically used to describe intronic locations that exist with + respect to a mapped RNA transcript sequence. + """ + + type: Literal["RelativeSequenceLocation"] = Field( + default=VrsType.RELATIVE_SEQ_LOC.value, + description=f'MUST be "{VrsType.RELATIVE_SEQ_LOC.value}"', + ) + baseSequenceLocation: SequenceLocation | iriReference = Field( + ..., description="An absolute location on a sequence." + ) + mappedSequenceLocation: SequenceOffsetLocation | iriReference = Field( + ..., + description="A location relative to an offset on a mapped sequence.", + ) + + class ga4gh(Ga4ghIdentifiableObject.ga4gh): + prefix = "RSL" + inherent = ["baseSequenceLocation", "mappedSequenceLocation", "type"] + + ######################################### # base variation ######################################### @@ -716,6 +800,35 @@ class ga4gh(Ga4ghIdentifiableObject.ga4gh): # noqa: N801 inherent = ["location", "state", "type"] +class RelativeAllele(_VariationBase, BaseModelForbidExtra): + """An Allele defined on a mapped location relative to a base location. Often used to describe intronic variants.""" + + type: Literal["RelativeAllele"] = Field( + default=VrsType.RELATIVE_ALLELE.value, + description=f'MUST be "{VrsType.RELATIVE_ALLELE.value}"', + ) + mappedState: ( + LiteralSequenceExpression | ReferenceLengthExpression | LengthExpression + ) = Field( + ..., + description='The state of the RelativeAllele as expressed on the mapped sequence. This will differ from the base state when mapping to a reverse complement sequence, commonly observed when representing the state on transcripts mapped to the "negative strand" of a chromosome.', + ) + baseState: ( + LiteralSequenceExpression | ReferenceLengthExpression | LengthExpression + ) = Field( + ..., + description="The state of the RelativeAllele as expressed on the base sequence.", + ) + relativeLocation: RelativeSequenceLocation | iriReference = Field( + ..., + description="The relative location at which the baseState and mappedState are expressed.", + ) + + class ga4gh(Ga4ghIdentifiableObject.ga4gh): + prefix = "RA" + inherent = ["mappedState", "baseState", "relativeLocation", "type"] + + class CisPhasedBlock(_VariationBase, BaseModelForbidExtra): """An ordered set of co-occurring `Variation` on the same molecule.""" @@ -921,7 +1034,14 @@ class ga4gh(Ga4ghIdentifiableObject.ga4gh): class MolecularVariation(RootModel): """A `variation` on a contiguous molecule.""" - root: Allele | CisPhasedBlock | Adjacency | Terminus | DerivativeMolecule = Field( + root: ( + Allele + | RelativeAllele + | CisPhasedBlock + | Adjacency + | Terminus + | DerivativeMolecule + ) = Field( ..., json_schema_extra={"description": "A `variation` on a contiguous molecule."}, discriminator="type", @@ -943,7 +1063,7 @@ class SequenceExpression(RootModel): class Location(RootModel): """A contiguous segment of a biological sequence.""" - root: SequenceLocation = Field( + root: SequenceLocation | RelativeSequenceLocation = Field( ..., json_schema_extra={ "description": "A contiguous segment of a biological sequence." diff --git a/submodules/vrs b/submodules/vrs index d2e87dd8..83791ac4 160000 --- a/submodules/vrs +++ b/submodules/vrs @@ -1 +1 @@ -Subproject commit d2e87dd878a9a344c1910105c5acf5bdf8087af4 +Subproject commit 83791ac4294a59ce48400eb66c34af86a12a3704 diff --git a/tests/test_vrs.py b/tests/test_vrs.py index 975854ab..47e372e0 100644 --- a/tests/test_vrs.py +++ b/tests/test_vrs.py @@ -284,6 +284,8 @@ def test_enref2(): def test_class_refatt_map(): class_refatt_map_expected = { "Allele": ["location"], + "RelativeAllele": ["relativeLocation"], + "RelativeSequenceLocation": ["baseSequenceLocation"], "CisPhasedBlock": ["members"], "CopyNumberCount": ["location"], "CopyNumberChange": ["location"],