diff --git a/civicpy/civic.py b/civicpy/civic.py index fe10fcc..36f143a 100644 --- a/civicpy/civic.py +++ b/civicpy/civic.py @@ -9,6 +9,7 @@ from collections import defaultdict, namedtuple from datetime import datetime, timedelta from backports.datetime_fromisoformat import MonkeyPatch +import time MonkeyPatch.patch_fromisoformat() import re @@ -261,6 +262,7 @@ def update_cache( genes = _get_elements_by_ids("gene", allow_cached=False, get_all=True) factors = _get_elements_by_ids("factor", allow_cached=False, get_all=True) fusions = _get_elements_by_ids("fusion", allow_cached=False, get_all=True) + regions = _get_elements_by_ids("region", allow_cached=False, get_all=True) variants = _get_elements_by_ids("variant", allow_cached=False, get_all=True) evidence = _get_elements_by_ids("evidence", allow_cached=False, get_all=True) assertions = _get_elements_by_ids("assertion", allow_cached=False, get_all=True) @@ -295,6 +297,11 @@ def update_cache( f.variants = [v for v in variants if v.feature_id == f.id] f._partial = False CACHE[hash(f)] = f + for r in regions: + r.sources = [s for s in sources if s.id in r.source_ids] + r.variants = [v for v in variants if v.feature_id == r.id] + r._partial = False + CACHE[hash(r)] = r for v in variants: v.variant_groups = [vg for vg in variant_groups if v.id in vg.variant_ids] v.molecular_profiles = [ @@ -327,6 +334,8 @@ def update_cache( pn = [f for f in factors if f.id == pn.id][0] elif pn.featureType == "FUSION": pn = [f for f in fusions if f.id == pn.id][0] + elif pn.featureType == "REGION": + pn = [r for r in regions if r.id == pn.id][0] elif pn.type == "Variant": pn = [v for v in variants if v.id == pn.id][0] else: @@ -340,6 +349,7 @@ def update_cache( s.genes = [g for g in genes if s.id in g.source_ids] s.factors = [f for f in factors if s.id in f.source_ids] s.fusions = [f for f in fusions if s.id in f.source_ids] + s.regions = [r for r in regions if s.id in r.source_ids] s.molecular_profiles = [ m for m in molecular_profiles if s.id in m.source_ids ] @@ -1010,6 +1020,27 @@ def feature(self): """ return self.fusion +class RegionVariant(Variant): + _SIMPLE_FIELDS = Variant._SIMPLE_FIELDS.union( + { + "iscn_name", + } + ) + + @property + def region(self): + """ + The :class:`Region` record this variant belongs to. + """ + return _get_element_by_id("region", self.feature_id) + + @property + def feature(self): + """ + The :class:`Region` feature this variant belongs to. + """ + return self.region + class VariantGroup(CivicRecord): _SIMPLE_FIELDS = CivicRecord._SIMPLE_FIELDS.union( @@ -1212,6 +1243,51 @@ def three_prime_gene(self): return None +class Region(CivicRecord): + _SIMPLE_FIELDS = CivicRecord._SIMPLE_FIELDS.union( + {"description", "name", "source_ids"} + ) + _COMPLEX_FIELDS = CivicRecord._COMPLEX_FIELDS.union( + { + "aliases", + # 'errors', # TODO: Add support for these fields in advanced search endpoint + # /'lifecycle_actions', + # 'provisional_values', + "sources", + "variants", + } + ) + + def __init__(self, **kwargs): + self._variants = [] + self._sources = [] + super().__init__(**kwargs) + + @property + def variants(self): + """ + A list of :class:`Variant` records associated with this region. + """ + for variant in self._variants: + variant._include_status = self._include_status + return [v for v in self._variants if v.molecular_profiles] + + @variants.setter + def variants(self, value): + self._variants = value + + @property + def sources(self): + """ + A list of :class:`Source` records associated with the region description. + """ + return self._sources + + @sources.setter + def sources(self, value): + self._sources = value + + class Evidence(CivicRecord): _SIMPLE_FIELDS = CivicRecord._SIMPLE_FIELDS.union( { @@ -1712,6 +1788,7 @@ def __init__(self, **kwargs): self._genes = [] self._factors = [] self._fusions = [] + self._regions = [] self._molecular_profiles = [] super().__init__(**kwargs) @@ -1769,6 +1846,17 @@ def factors(self): def factors(self, value): self._factors = value + @property + def regions(self): + """ + A list of :class:`Region` records supported by this source. + """ + return self._regions + + @regions.setter + def regions(self, value): + self._regions = value + @property def molecular_profiles(self): """ @@ -2036,6 +2124,9 @@ def _postprocess_response_element(e, element): e["five_prime_gene_id"] = e["fivePrimeGene"]["id"] else: e["five_prime_gene_id"] = None + elif element == "region": + e["source_ids"] = [v["id"] for v in e["sources"]] + del e["sources"] elif element == "gene": e["source_ids"] = [v["id"] for v in e["sources"]] del e["sources"] @@ -2084,6 +2175,8 @@ def _postprocess_response_element(e, element): ): e["three_prime_end_exon_coordinates"]["exon_offset"] = 0 e["subtype"] = "fusion_variant" + elif e["__typename"] == "RegionVariant": + e["subtype"] = "region_variant" else: raise Exception("Variant type {} not supported yet".format(e["__typename"])) elif element == "variant_group": @@ -2104,6 +2197,7 @@ def _request_by_ids(element, ids): "gene": graphql_payloads._construct_get_gene_payload, "factor": graphql_payloads._construct_get_factor_payload, "fusion": graphql_payloads._construct_get_fusion_payload, + "region": graphql_payloads._construct_get_region_payload, "variant": graphql_payloads._construct_get_variant_payload, "assertion": graphql_payloads._construct_get_assertion_payload, "variant_group": graphql_payloads._construct_get_variant_group_payload, @@ -2135,6 +2229,7 @@ def _request_all(element): "gene": graphql_payloads._construct_get_all_genes_payload, "factor": graphql_payloads._construct_get_all_factors_payload, "fusion": graphql_payloads._construct_get_all_fusions_payload, + "region": graphql_payloads._construct_get_all_regions_payload, "variant": graphql_payloads._construct_get_all_variants_payload, "assertion": graphql_payloads._construct_get_all_assertions_payload, "variant_group": graphql_payloads._construct_get_all_variant_groups_payload, @@ -2150,7 +2245,7 @@ def _request_all(element): payload = payload_method() after_cursor = None - variables = {"after": after_cursor} + variables = {"after": after_cursor, "page_size": 50} resp = requests.post( API_URL, json={"query": payload, "variables": variables}, timeout=(10, 200) ) @@ -2162,6 +2257,7 @@ def _request_all(element): while has_next_page: variables = {"after": after_cursor} + time.sleep(0.1) resp = requests.post( API_URL, json={"query": payload, "variables": variables}, timeout=(10, 200) ) @@ -2275,6 +2371,7 @@ def get_variants_by_ids(variant_id_list): gene_ids = set() factor_ids = set() fusion_ids = set() + region_ids = set() for variant in variants: if isinstance(variant, GeneVariant): gene_ids.add(variant.feature_id) @@ -2282,6 +2379,8 @@ def get_variants_by_ids(variant_id_list): factor_ids.add(variant.feature_id) elif isinstance(variant, FusionVariant): fusion_ids.add(variant.feature_id) + elif isinstance(variant, RegionVariant): + region_ids.add(variant.feature_id) variant._include_status = ["accepted", "submitted", "rejected"] if gene_ids: logging.info("Caching gene details...") @@ -2292,6 +2391,9 @@ def get_variants_by_ids(variant_id_list): if fusion_ids: logging.info("Caching fusion details...") _get_elements_by_ids("fusion", fusion_ids) + if region_ids: + logging.info("Caching region details...") + _get_elements_by_ids("region", region_ids) return variants @@ -2350,6 +2452,10 @@ def get_features_by_ids(feature_id_list): feature = _get_element_by_id("factor", feature_id) except: pass + try: + feature = _get_element_by_id("region", feature_id) + except: + pass if feature is None: raise Exception("Feature {} not found".format(feature_id)) else: @@ -2463,6 +2569,35 @@ def get_factor_by_id(factor_id): return get_factors_by_ids([factor_id])[0] +def get_regions_by_ids(region_id_list): + """ + :param list region_id_list: A list of CIViC region feature IDs to query against to cache and (as needed) CIViC. + :returns: A list of :class:`Region` objects. + """ + logging.info("Getting regions...") + regions = _get_elements_by_ids("region", region_id_list) + variant_ids = set() + for region in regions: + region._include_status = ["accepted", "submitted", "rejected"] + for variant in region.variants: + variant_ids.add(variant.id) + if variant_ids: + logging.info("Caching variant details...") + _get_elements_by_ids("variant", variant_ids) + for region in regions: + for variant in region.variants: + variant.update() + return regions + + +def get_region_by_id(region_id): + """ + :param int region_id: A single CIViC region feature ID. + :returns: A :class:`Region` object. + """ + return get_regions_by_ids([region_id])[0] + + # Source @@ -2735,6 +2870,20 @@ def get_all_factor_variants( return [v for v in variants if v.subtype == "factor_variant"] +def get_all_region_variants( + include_status=["accepted", "submitted", "rejected"], allow_cached=True +): + """ + Queries CIViC for all region variants. + + :param list include_status: A list of statuses. Only variants and their associated entities matching the given statuses will be returned. Use **None** to include variants without any associated entities. + :param bool allow_cached: Indicates whether or not object retrieval from CACHE is allowed. If **False** it will query the CIViC database directly. + :returns: A list of :class:`Variant` objects of **subtype** **region_variant**. + """ + variants = get_all_variants(include_status=include_status, allow_cached=True) + return [v for v in variants if v.subtype == "region_variant"] + + # Variant Group @@ -2767,10 +2916,12 @@ def get_all_features( genes = _get_elements_by_ids("gene", get_all=True, allow_cached=allow_cached) fusions = _get_elements_by_ids("fusion", get_all=True, allow_cached=allow_cached) factors = _get_elements_by_ids("factor", get_all=True, allow_cached=allow_cached) + regions = _get_elements_by_ids("region", get_all=True, allow_cached=allow_cached) features = [] features.extend(genes) features.extend(fusions) features.extend(factors) + features.extend(regions) if include_status: assert CACHE.get("variants_all_ids", False) assert CACHE.get("evidence_items_all_ids", False) @@ -2856,6 +3007,30 @@ def get_all_factors( return factors +def get_all_regions( + include_status=["accepted", "submitted", "rejected"], allow_cached=True +): + """ + Queries CIViC for all region features. + + :param list include_status: A list of statuses. Only regions and their associated entities matching the given statuses will be returned. Use **None** to include regions without any associated entities. + :param bool allow_cached: Indicates whether or not object retrieval from CACHE is allowed. If **False** it will query the CIViC database directly. + :returns: A list of :class:`Region` objects. + """ + regions = _get_elements_by_ids("region", get_all=True, allow_cached=allow_cached) + if include_status: + assert CACHE.get("variants_all_ids", False) + assert CACHE.get("evidence_items_all_ids", False) + resp = list() + for r in regions: + r._include_status = include_status + if r.variants: + resp.append(r) + return resp + else: + return regions + + # Evidence @@ -3504,6 +3679,21 @@ def search_fusions_by_partner_gene_id(partner_gene_id): return matching_fusions +# Region + + +def get_region_by_name(name): + """ + :param str name: A region name. + :returns: A :class:`Region` object. + """ + regions = _get_elements_by_ids("region", get_all=True) + matching_regions = [r for r in regions if r.name == name] + if len(matching_regions) == 0: + raise Exception("No Region with name: {}".format(name)) + return matching_regions[0] + + # Variants diff --git a/civicpy/data/test_cache.pkl b/civicpy/data/test_cache.pkl index 450b92e..2747a7d 100644 Binary files a/civicpy/data/test_cache.pkl and b/civicpy/data/test_cache.pkl differ diff --git a/civicpy/graphql_payloads.py b/civicpy/graphql_payloads.py index ce57317..2f2d855 100644 --- a/civicpy/graphql_payloads.py +++ b/civicpy/graphql_payloads.py @@ -129,6 +129,41 @@ def _construct_get_all_fusions_payload(): } }""" +def _construct_get_region_payload(): + return """ + query region($id: Int!) { + region(id: $id) { + name + description + aliases: featureAliases + sources { + id + } + } + }""" + + +def _construct_get_all_regions_payload(): + return """ + query regions($after: String) { + regions(after: $after, evidenceStatusFilter: ALL) { + totalCount + pageInfo { + hasNextPage + endCursor + } + nodes { + id + name + description + aliases: featureAliases + sources { + id + } + } + } + }""" + def _construct_get_molecular_profile_payload(): return """ @@ -307,6 +342,9 @@ def _construct_get_variant_payload(): ... on FactorVariant { ncit_id: ncitId } + ... on RegionVariant { + iscn_name: iscnName + } feature { id name @@ -360,6 +398,9 @@ def _construct_get_all_variants_payload(): ... on FactorVariant { ncit_id: ncitId } + ... on RegionVariant { + iscn_name: iscnName + } ... on FusionVariant { vicc_compliant_name: viccCompliantName five_prime_coordinates: fivePrimeCoordinates { diff --git a/civicpy/tests/test_civic.py b/civicpy/tests/test_civic.py index 4af6df5..3a4b73f 100644 --- a/civicpy/tests/test_civic.py +++ b/civicpy/tests/test_civic.py @@ -176,6 +176,7 @@ def test_get_by_id(self): variant = civic.get_variant_by_id(11) assert variant.id == 11 assert variant.type == "variant" + assert variant.subtype == "gene_variant" def test_attributes(self): variant = civic.get_variant_by_id(11) @@ -202,6 +203,7 @@ def test_get_by_id(self): variant = civic.get_variant_by_id(1) assert variant.id == 1 assert variant.type == "variant" + assert variant.subtype == "fusion_variant" def test_attributes(self): variant = civic.get_variant_by_id(1) @@ -291,6 +293,7 @@ def test_get_by_id(self): variant = civic.get_variant_by_id(4985) assert variant.id == 4985 assert variant.type == "variant" + assert variant.subtype == "factor_variant" def test_attributes(self): variant = civic.get_variant_by_id(4985) @@ -302,6 +305,29 @@ def test_properties(self): assert variant.factor == variant.feature +class TestRegionVariants(object): + def test_get_all(self): + variants = civic.get_all_region_variants() + assert len(variants) >= 3 + for variant in variants: + assert variant.subtype == "region_variant" + + def test_get_by_id(self): + variant = civic.get_variant_by_id(5078) + assert variant.id == 5078 + assert variant.type == "variant" + assert variant.subtype == "region_variant" + + def test_attributes(self): + variant = civic.get_variant_by_id(5078) + assert variant.iscn_name == "amp(17p)" + + def test_properties(self): + variant = civic.get_variant_by_id(5078) + assert variant.region.id == 62048 + assert variant.region == variant.feature + + class TestMolecularProfiles(object): def test_get_all(self): mps = civic.get_all_molecular_profiles() @@ -561,6 +587,39 @@ def test_search_fusions_by_partner_gene_id(self): assert len(fusions) >= 5 +class TestRegions(object): + def test_get_all(self): + regions = civic.get_all_regions() + assert len(regions) >= 2 + + def test_get_non_rejected(self): + regions = civic.get_all_regions(include_status=["accepted", "submitted"]) + assert len(regions) >= 2 + + def test_get_accepted_only(self): + regions = civic.get_all_regions(include_status=["accepted"]) + assert len(regions) >= 1 + + def test_get_by_id(self): + region = civic.get_region_by_id(62048) + assert region.type == "region" + assert region.id == 62048 + + def test_get_by_name(self): + region = civic.get_region_by_name("17p") + assert region.type == "region" + assert region.name == "17p" + + def test_attributes(self): + region = civic.get_region_by_id(62048) + assert region.name == "17p" + + def test_properties(self): + region = civic.get_region_by_id(62048) + assert len(region.variants) == 2 + assert len(region.sources) == 0 + + class TestDiseases(object): def test_get_all(self): diseases = civic.get_all_diseases() diff --git a/docs/civic.rst b/docs/civic.rst index 797ad73..8e2f07b 100644 --- a/docs/civic.rst +++ b/docs/civic.rst @@ -136,6 +136,27 @@ Fusion .. _HGNC Gene Symbol: https://www.genenames.org/ +Region +^^^^^^ + +.. autoclass:: Region + :show-inheritance: + :members: + + .. attribute:: aliases + + A list of alternate names by which this region is referenced. + + .. attribute:: description + + A curated summary of the clinical significance of this region. + + .. attribute:: name + + The name of the region. Either a chromosome, a chromosome arm, or + a cytoband. + + Variant ^^^^^^^ @@ -145,8 +166,8 @@ Variant .. attribute:: feature_id - The :attr:`CivicRecord.id` of the :class:`Gene`, :class:`Factor`, or - :class:`Fusion` the variant belongs to. + The :attr:`CivicRecord.id` of the :class:`Gene`, :class:`Factor`, + :class:`Fusion`, or :class:`Region` the variant belongs to. .. attribute:: name @@ -257,6 +278,20 @@ FusionVariant .. _VICC fusion specification: https://fusions.cancervariants.org/en/latest/nomenclature.html +RegionVariant +""""""""""""" + +.. autoclass:: RegionVariant + :show-inheritance: + :members: + + .. attribute:: iscn_name + + The `International System for Human Cytogenomic Nomenclature Name`_ representing the region variant. + +.. _International System for Human Cytogenomic Nomenclature Name: https://iscn.karger.com/ + + MolecularProfile ^^^^^^^^^^^^^^^^ diff --git a/docs/getting_records.rst b/docs/getting_records.rst index e7a858c..54f51e5 100644 --- a/docs/getting_records.rst +++ b/docs/getting_records.rst @@ -19,6 +19,7 @@ Features .. autofunction:: get_all_genes .. autofunction:: get_all_factors .. autofunction:: get_all_fusions +.. autofunction:: get_all_regions Variants ~~~~~~~~ @@ -27,6 +28,7 @@ Variants .. autofunction:: get_all_gene_variants .. autofunction:: get_all_factor_variants .. autofunction:: get_all_fusion_variants +.. autofunction:: get_all_region_variants Molecular Profiles ~~~~~~~~~~~~~~~~~~ @@ -99,6 +101,9 @@ Features .. autofunction:: get_fusion_by_id .. autofunction:: get_fusions_by_ids +.. autofunction:: get_region_by_id +.. autofunction:: get_regions_by_ids + Variants ~~~~~~~~ @@ -207,6 +212,11 @@ Fusions .. autofunction:: get_fusion_by_name .. autofunction:: search_fusions_by_partner_gene_id +Regions +~~~~~~~ + +.. autofunction:: get_region_by_name + Variants ~~~~~~~~ diff --git a/setup.py b/setup.py index a384b14..bab6b45 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ "requests", "obonet", "networkx", - "pandas", + "pandas<=2.3.3", "Click", "vcfpy~=0.13.8", "pysam",