Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[submodule "submodules/vrs"]
path = submodules/vrs
url = https://github.com/ga4gh/vrs.git
branch = 2.0
branch = 2.1.0-snapshot.2026-02
41 changes: 40 additions & 1 deletion src/ga4gh/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from abc import ABC
from enum import Enum
from typing import Annotated, Any
from typing import Annotated, Any, Literal

from pydantic import (
BaseModel,
Expand Down Expand Up @@ -37,6 +37,20 @@ class Relation(str, Enum):
RELATED_MATCH = "relatedMatch"


class MembershipOperator(str, Enum):
"""The logical relationship between concepts in the set, in the context of some
knowledge reported about them. The value 'AND' indicates that the concepts are
dependent and occur together in this context - i.e. the reported assertion is not
necessarily true for each concept on its own - only in combination with the
other(s). The value 'OR' indicates that each concept applies independently in this
context - i.e. the reported assertion is necessarily true for each concept on its
own, independent of the presence of the other(s).
"""

AND = "AND"
OR = "OR"


#########################################
# Primitive data types
#########################################
Expand Down Expand Up @@ -185,6 +199,31 @@ class ConceptMapping(Element, BaseModelForbidExtra):
)


class ConceptSet(Element, BaseModelForbidExtra):
"""A set of concepts that may be considered as dependent (occurring together), or
independent (existing separately) in the context of some knowledge reported about
them, as indicated by a set membership operator. e.g. a set of independent molecular
consequences that both result from the presence of a particular genetic variant
(membership operator = OR).
"""

model_config = ConfigDict(use_enum_values=True)

type: Literal["ConceptSet"] = Field(
default="ConceptSet",
description='MUST be "ConceptSet".',
)
concepts: list[MappableConcept] | list[ConceptSet] = Field(
...,
description="A list of concepts that are dependent (occurring together), or independent (existing separately), depending on the membership operator.",
min_length=2,
)
membershipOperator: MembershipOperator = Field( # noqa: N815
...,
description="The logical relationship between concepts in the set, in the context of some knowledge reported about them. The value 'AND' indicates that the concepts are dependent and occur together in this context - i.e. the reported assertion is not necessarily true for each concept on its own - only in combination with the other(s). The value 'OR' indicates that each concept applies independently in this context - i.e. the reported assertion is necessarily true for each concept on its own, independent of the presence of the other(s).",
)


class Extension(Element, BaseModelForbidExtra):
"""The Extension class provides entities with a means to include additional
attributes that are outside of the specified standard but needed by a given content
Expand Down
126 changes: 123 additions & 3 deletions src/ga4gh/vrs/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,10 @@ class VrsType(str, Enum):
LIT_SEQ_EXPR = "LiteralSequenceExpression"
SEQ_REF = "SequenceReference"
SEQ_LOC = "SequenceLocation"
SEQ_OFFSET_LOCATION = "SequenceOffsetLocation"
RELATIVE_SEQ_LOC = "RelativeSequenceLocation"
ALLELE = "Allele"
RELATIVE_ALLELE = "RelativeAllele"
CIS_PHASED_BLOCK = "CisPhasedBlock"
ADJACENCY = "Adjacency"
TERMINUS = "Terminus"
Expand Down Expand Up @@ -229,6 +232,21 @@ class Syntax(str, Enum):
SPDI = "spdi"


class AnchorOrientation(str, Enum):
"""Indicates which side of a discontinuous anchor on the sequenceReference is used
as the reference point for interpreting offsetStart/offsetEnd. The anchor is an
inter-residue coordinate on the sequenceReference. When that anchor corresponds to a
boundary whose realization on a base sequence yields two distinct locations (e.g.,
an exon junction), this property disambiguates which anchor side on the
sequenceReference is intended. `left` denotes the side immediately preceding the
anchor in sequenceReference coordinate order; `right` denotes the side immediately
following the anchor in sequenceReference coordinate order.
"""

LEFT = "left"
RIGHT = "right"


def _recurse_ga4gh_serialize(obj):
if isinstance(obj, Ga4ghIdentifiableObject):
return obj.get_or_create_digest()
Expand Down Expand Up @@ -503,7 +521,7 @@ class ReferenceLengthExpression(_ValueObject, BaseModelForbidExtra):
)
sequence: sequenceString | None = Field(
default=None,
description="the literal Sequence encoded by the Reference Length Expression.",
description="the literal sequence encoded by the Reference Length Expression.",
)
repeatSubunitLength: int = Field(
..., description="The number of residues in the repeat subunit."
Expand Down Expand Up @@ -656,6 +674,72 @@ class ga4gh(Ga4ghIdentifiableObject.ga4gh): # noqa: N801
inherent = ["end", "sequenceReference", "start", "type"]


class SequenceOffsetLocation(_ValueObject, BaseModelForbidExtra):
"""A location defined by an offset relative to an anchor on a mapped sequence
reference.
"""

model_config = ConfigDict(use_enum_values=True)

type: Literal["SequenceOffsetLocation"] = Field(
default=VrsType.SEQ_OFFSET_LOCATION.value,
description=f'MUST be "{VrsType.SEQ_OFFSET_LOCATION.value}"',
)
sequenceReference: SequenceReference | iriReference = Field(
...,
description="A sequence reference that has been mapped from which a relative location is defined.",
)
anchor: int = Field(
...,
description="The inter-residue position on the sequence reference from which the relative location offset is calculated.",
)
anchorOrientation: AnchorOrientation = Field(
...,
description="Indicates which side of a discontinuous anchor on the sequenceReference is used as the reference point for interpreting offsetStart/offsetEnd. The anchor is an inter-residue coordinate on the sequenceReference. When that anchor corresponds to a boundary whose realization on a base sequence yields two distinct locations (e.g., an exon junction), this property disambiguates which anchor side on the sequenceReference is intended. `left` denotes the side immediately preceding the anchor in sequenceReference coordinate order; `right` denotes the side immediately following the anchor in sequenceReference coordinate order.",
)
offsetStart: int | Range | None = Field(
default=None,
description="The start offset, in inter-residue coordinates, from the anchor realization selected by anchorOrientation on the sequenceReference.",
)
offsetEnd: int | Range | None = Field(
default=None,
description="The end offset, in inter-residue coordinates, from the anchor realization selected by anchorOrientation on the sequenceReference.",
)

class ga4gh(_ValueObject.ga4gh):
inherent = [
"sequenceReference",
"anchor",
"anchorOrientation",
"offsetStart",
"offsetEnd",
"type",
]


class RelativeSequenceLocation(Ga4ghIdentifiableObject, BaseModelForbidExtra):
"""A location on a base sequence and its position relative to a boundary offset on a
mapped sequence gap. Typically used to describe intronic locations that exist with
respect to a mapped RNA transcript sequence.
"""

type: Literal["RelativeSequenceLocation"] = Field(
default=VrsType.RELATIVE_SEQ_LOC.value,
description=f'MUST be "{VrsType.RELATIVE_SEQ_LOC.value}"',
)
baseSequenceLocation: SequenceLocation | iriReference = Field(
..., description="An absolute location on a sequence."
)
mappedSequenceLocation: SequenceOffsetLocation | iriReference = Field(
...,
description="A location relative to an offset on a mapped sequence.",
)

class ga4gh(Ga4ghIdentifiableObject.ga4gh):
prefix = "RSL"
inherent = ["baseSequenceLocation", "mappedSequenceLocation", "type"]


#########################################
# base variation
#########################################
Expand Down Expand Up @@ -716,6 +800,35 @@ class ga4gh(Ga4ghIdentifiableObject.ga4gh): # noqa: N801
inherent = ["location", "state", "type"]


class RelativeAllele(_VariationBase, BaseModelForbidExtra):
"""An Allele defined on a mapped location relative to a base location. Often used to describe intronic variants."""

type: Literal["RelativeAllele"] = Field(
default=VrsType.RELATIVE_ALLELE.value,
description=f'MUST be "{VrsType.RELATIVE_ALLELE.value}"',
)
mappedState: (
LiteralSequenceExpression | ReferenceLengthExpression | LengthExpression
) = Field(
...,
description='The state of the RelativeAllele as expressed on the mapped sequence. This will differ from the base state when mapping to a reverse complement sequence, commonly observed when representing the state on transcripts mapped to the "negative strand" of a chromosome.',
)
baseState: (
LiteralSequenceExpression | ReferenceLengthExpression | LengthExpression
) = Field(
...,
description="The state of the RelativeAllele as expressed on the base sequence.",
)
relativeLocation: RelativeSequenceLocation | iriReference = Field(
...,
description="The relative location at which the baseState and mappedState are expressed.",
)

class ga4gh(Ga4ghIdentifiableObject.ga4gh):
prefix = "RA"
inherent = ["mappedState", "baseState", "relativeLocation", "type"]


class CisPhasedBlock(_VariationBase, BaseModelForbidExtra):
"""An ordered set of co-occurring `Variation` on the same molecule."""

Expand Down Expand Up @@ -921,7 +1034,14 @@ class ga4gh(Ga4ghIdentifiableObject.ga4gh):
class MolecularVariation(RootModel):
"""A `variation` on a contiguous molecule."""

root: Allele | CisPhasedBlock | Adjacency | Terminus | DerivativeMolecule = Field(
root: (
Allele
| RelativeAllele
| CisPhasedBlock
| Adjacency
| Terminus
| DerivativeMolecule
) = Field(
...,
json_schema_extra={"description": "A `variation` on a contiguous molecule."},
discriminator="type",
Expand All @@ -943,7 +1063,7 @@ class SequenceExpression(RootModel):
class Location(RootModel):
"""A contiguous segment of a biological sequence."""

root: SequenceLocation = Field(
root: SequenceLocation | RelativeSequenceLocation = Field(
...,
json_schema_extra={
"description": "A contiguous segment of a biological sequence."
Expand Down
2 changes: 1 addition & 1 deletion submodules/vrs
Submodule vrs updated 88 files
+21 −0 .github/workflows/cqa.yaml
+1 −1 .gitmodules
+2 −2 .requirements.txt
+2 −1 docs/source/appendices/class_diagram.rst
+34 −5 docs/source/appendices/design_decisions.rst
+1 −1 docs/source/appendices/maturity_model.rst
+9 −1 docs/source/concepts/AdditionalDataTypes/index.rst
+9 −0 docs/source/concepts/LocationAndReference/RelativeSequenceLocation.rst
+0 −2 docs/source/concepts/LocationAndReference/SequenceLocation.rst
+9 −0 docs/source/concepts/LocationAndReference/SequenceOffsetLocation.rst
+2 −0 docs/source/concepts/LocationAndReference/index.rst
+9 −0 docs/source/concepts/MolecularVariation/RelativeAllele.rst
+1 −0 docs/source/concepts/MolecularVariation/index.rst
+1 −1 docs/source/concepts/SequenceExpression/LiteralSequenceExpression.rst
+3 −0 docs/source/concepts/SystemicVariation/CopyNumberChange.rst
+25 −22 docs/source/conf.py
+2 −3 docs/source/conventions/computed_identifiers.rst
+1 −20 docs/source/conventions/example.rst
+2 −2 docs/source/conventions/required_data.rst
+135 −0 docs/source/examples/SpliceVariants/SpliceAcceptorDownstream.rst
+135 −0 docs/source/examples/SpliceVariants/SpliceAcceptorUpstream.rst
+135 −0 docs/source/examples/SpliceVariants/SpliceDonorDownstream.rst
+135 −0 docs/source/examples/SpliceVariants/SpliceDonorUpstream.rst
+21 −0 docs/source/examples/SpliceVariants/index.rst
+9 −0 docs/source/examples/index.rst
+2,055 −0 docs/source/images/schema-current.drawio.svg
+ docs/source/images/schema-current.png
+289 −0 docs/source/images/splice_variants/acceptor_downstream/anchor.drawio.svg
+600 −0 docs/source/images/splice_variants/acceptor_downstream/mapping.drawio.svg
+ docs/source/images/splice_variants/acceptor_downstream/ncbi_reference_sequence.png
+ docs/source/images/splice_variants/acceptor_downstream/ucsc-exon15.png
+ docs/source/images/splice_variants/acceptor_downstream/ucsc-exon16.png
+293 −0 docs/source/images/splice_variants/acceptor_upstream/anchor.drawio.svg
+576 −0 docs/source/images/splice_variants/acceptor_upstream/mapping.drawio.svg
+ docs/source/images/splice_variants/acceptor_upstream/ncbi_reference_sequence.png
+ docs/source/images/splice_variants/acceptor_upstream/ucsc-exon7.png
+ docs/source/images/splice_variants/acceptor_upstream/ucsc-exon8.png
+297 −0 docs/source/images/splice_variants/donor_downstream/anchor.drawio.svg
+600 −0 docs/source/images/splice_variants/donor_downstream/mapping.drawio.svg
+ docs/source/images/splice_variants/donor_downstream/ncbi_reference_sequence.png
+ docs/source/images/splice_variants/donor_downstream/ucsc-exon2.png
+ docs/source/images/splice_variants/donor_downstream/ucsc-exon3.png
+302 −0 docs/source/images/splice_variants/donor_upstream/anchor.drawio.svg
+576 −0 docs/source/images/splice_variants/donor_upstream/mapping.drawio.svg
+ docs/source/images/splice_variants/donor_upstream/ncbi_reference_sequence.png
+ docs/source/images/splice_variants/donor_upstream/ucsc-exon29.png
+ docs/source/images/splice_variants/donor_upstream/ucsc-exon30.png
+1 −0 docs/source/index.rst
+2 −2 docs/source/releases/1.1.rst
+10 −11 docs/source/releases/1.2.rst
+5 −6 docs/source/releases/1.3.rst
+18 −10 docs/source/releases/2.0.rst
+2 −2 docs/source/requirements.txt
+1 −1 schema/vrs/def/Allele.rst
+1 −1 schema/vrs/def/CopyNumberChange.rst
+1 −1 schema/vrs/def/CopyNumberCount.rst
+1 −1 schema/vrs/def/ReferenceLengthExpression.rst
+102 −0 schema/vrs/def/RelativeAllele.rst
+89 −0 schema/vrs/def/RelativeSequenceLocation.rst
+1 −1 schema/vrs/def/SequenceExpression.rst
+99 −0 schema/vrs/def/SequenceOffsetLocation.rst
+11 −8 schema/vrs/json/Adjacency
+8 −8 schema/vrs/json/Allele
+6 −6 schema/vrs/json/CisPhasedBlock
+5 −5 schema/vrs/json/CopyNumberChange
+6 −6 schema/vrs/json/CopyNumberCount
+8 −8 schema/vrs/json/DerivativeMolecule
+2 −2 schema/vrs/json/Expression
+3 −3 schema/vrs/json/LengthExpression
+3 −3 schema/vrs/json/LiteralSequenceExpression
+5 −2 schema/vrs/json/Location
+9 −6 schema/vrs/json/MolecularVariation
+1 −1 schema/vrs/json/Range
+5 −5 schema/vrs/json/ReferenceLengthExpression
+113 −0 schema/vrs/json/RelativeAllele
+87 −0 schema/vrs/json/RelativeSequenceLocation
+5 −5 schema/vrs/json/SequenceExpression
+7 −7 schema/vrs/json/SequenceLocation
+109 −0 schema/vrs/json/SequenceOffsetLocation
+3 −3 schema/vrs/json/SequenceReference
+3 −3 schema/vrs/json/SystemicVariation
+8 −5 schema/vrs/json/Terminus
+3 −3 schema/vrs/json/TraversalBlock
+11 −8 schema/vrs/json/Variation
+1 −1 schema/vrs/json/residue
+1 −1 schema/vrs/json/sequenceString
+156 −7 schema/vrs/vrs-source.yaml
+1 −1 submodules/gks-core
2 changes: 2 additions & 0 deletions tests/test_vrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,8 @@ def test_enref2():
def test_class_refatt_map():
class_refatt_map_expected = {
"Allele": ["location"],
"RelativeAllele": ["relativeLocation"],
"RelativeSequenceLocation": ["baseSequenceLocation"],
"CisPhasedBlock": ["members"],
"CopyNumberCount": ["location"],
"CopyNumberChange": ["location"],
Expand Down
Loading