From 7868bbfa16f0e1d2d3c676fab886bb7fc7351f56 Mon Sep 17 00:00:00 2001 From: gtcha2 Date: Mon, 4 Aug 2025 11:04:18 -0700 Subject: [PATCH 01/11] Variant_ontologies: base normalization resutl data class, base Normalizer: registers handlers for processing RSID_Normalizer: registered for snp db, pharmgkb look up from rsid as well. --- src/ontology_module/variant_ontology.py | 143 ++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 src/ontology_module/variant_ontology.py diff --git a/src/ontology_module/variant_ontology.py b/src/ontology_module/variant_ontology.py new file mode 100644 index 0000000..05bf1bb --- /dev/null +++ b/src/ontology_module/variant_ontology.py @@ -0,0 +1,143 @@ +from abc import ABC, abstractmethod +from typing import Callable,Dict, Optional, Any +from dataclasses import dataclass, field +import logging +from Bio import Entrez +import requests + +logger = logging.getLogger(__name__) + +@dataclass +class NormalizationResult: + raw_input: str + normalized_output: str + entity_type: str # e.g. "variant", "gene", "drug", etc. + source: str # where the normalized info came from + metadata: Dict[str, Any] = field(default_factory=dict) + + @classmethod + def from_dict(cls, data: dict) -> "NormalizationResult": + return cls( + raw_input=data["raw_input"], + normalized_output=data["normalized_output"], + entity_type=data.get("entity_type", "unknown"), + source=data["source"], + metadata=data.get("metadata", {}) + ) + +class BaseNormalizer(ABC): + def __init__(self): + self._handlers: list[Callable[[str], Optional[dict]]] = [] + + def register_handler(self, handler: Callable[[str], Optional[dict]]): + self._handlers.append(handler) + + def normalize(self, raw: str) -> Optional["NormalizationResult"]: + for handler in self._handlers: + try: + result = handler(raw) + if result: + return result # Assuming result is already a NormalizedEntity + except Exception as e: + logger.exception(f"Handler '{handler.__name__}' failed on input: '{raw}'") + return None + + @abstractmethod + def name(self) -> str: + pass + + + + + +class RSIDNormalizer(BaseNormalizer): + def __init__(self, email: str, api_key: Optional[str] = None): + super().__init__() + Entrez.email = email + if api_key: + Entrez.api_key = api_key + + self.register_handler(self.lookup_dbsnp) + self.register_handler(self.lookup_pharmgkb_id) + + def name(self) -> str: + return "RSIDNormalizer" + + def lookup_dbsnp(self, raw: str) -> Optional[NormalizationResult]: + rsid = raw.lower().strip() + if not rsid.startswith("rs") or not rsid[2:].isdigit(): + return None + + try: + handle = Entrez.esummary(db="snp", id=rsid[2:], retmode="json") + response = handle.read() + handle.close() + + # Convert JSON string to Python dict + import json + data = json.loads(response) + + record = data.get("result", {}).get(rsid[2:]) + if not record: + return None + + return NormalizationResult( + raw_input=raw, + normalized_output=rsid, + entity_type="variant", + source="dbSNP", + metadata=record + ) + + except Exception: + logger.exception(f"lookup_dbsnp failed for {raw}") + return None + + def lookup_pharmgkb_id(self, raw: str) -> Optional[NormalizationResult]: + logger.debug(f"Looking up PharmGKB variant by symbol: {raw}") + + base_url = "https://api.pharmgkb.org/v1/data/variant" + params = { + "symbol": raw.strip(), + "view": "max" + } + + try: + response = requests.get(base_url, params=params, timeout=10) + if response.status_code != 200: + logger.warning(f"PharmGKB lookup failed ({response.status_code}) for {raw}") + return None + + data = response.json() + records = data.get("data", []) + if not records: + logger.info(f"No PharmGKB variant match for symbol: {raw}") + return None + + variant = records[0] + + # Extract only required fields + normalized_output = variant.get("id") + entity_type = "variant" + source = "PharmGKB" + + # Remove known fields so everything else is dumped into metadata + metadata = {k: v for k, v in variant.items() if k not in {"id"}} + + return NormalizationResult( + raw_input=raw, + normalized_output=normalized_output, + entity_type=entity_type, + source=source, + metadata=metadata + ) + + except Exception: + logger.exception(f"PharmGKB symbol lookup failed for {raw}") + return None + + + + + + From 750be09864d4644031a4fa66ac578218dc467460 Mon Sep 17 00:00:00 2001 From: gtcha2 Date: Mon, 4 Aug 2025 11:09:05 -0700 Subject: [PATCH 02/11] drug ontology initial --- src/ontology_module/drug_ontology.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 src/ontology_module/drug_ontology.py diff --git a/src/ontology_module/drug_ontology.py b/src/ontology_module/drug_ontology.py new file mode 100644 index 0000000..2eeaf85 --- /dev/null +++ b/src/ontology_module/drug_ontology.py @@ -0,0 +1,17 @@ + +from variant_ontology import BaseNormalizer +import requests + +# how to use, you have thew following, + + + +class DrugNormalizer(BaseNormalizer): + def __init__(self): + + ## + pass + def lookup_drug(): + request.get() + + # the followig From 5c9970d53a4332ff43f4b8119721d2e93f1e53ad Mon Sep 17 00:00:00 2001 From: gtcha2 Date: Mon, 4 Aug 2025 11:09:33 -0700 Subject: [PATCH 03/11] init file for ontology and normalization module base --- src/ontology_module/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/ontology_module/__init__.py diff --git a/src/ontology_module/__init__.py b/src/ontology_module/__init__.py new file mode 100644 index 0000000..e69de29 From 9d7a22b9e94c8fd3a7a72b5a28b62aa025622484 Mon Sep 17 00:00:00 2001 From: gtcha2 Date: Mon, 4 Aug 2025 11:20:30 -0700 Subject: [PATCH 04/11] lookupdrugpubchem complete, still need validation on it --- src/ontology_module/drug_ontology.py | 66 +++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 7 deletions(-) diff --git a/src/ontology_module/drug_ontology.py b/src/ontology_module/drug_ontology.py index 2eeaf85..7b67a16 100644 --- a/src/ontology_module/drug_ontology.py +++ b/src/ontology_module/drug_ontology.py @@ -1,5 +1,9 @@ -from variant_ontology import BaseNormalizer + +from typing import Optional +import logging +from variant_ontology import BaseNormalizer,NormalizationResult + import requests # how to use, you have thew following, @@ -7,11 +11,59 @@ class DrugNormalizer(BaseNormalizer): + """Normalizes drug names using PubChem API.""" + def __init__(self): + super().__init__() + + def lookup_drug_pubchem(self, raw: str) -> Optional[NormalizationResult]: + """ + Normalize a raw drug name via PubChem, return structured result. + """ + query = raw.strip() + if not query: + logger.debug("Empty drug input, skipping.") + return None + + try: + # Step 1: Fetch CID + cid_url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{query}/cids/JSON" + cid_resp = requests.get(cid_url, timeout=5) + cid_resp.raise_for_status() + cid_data = cid_resp.json() + cid_list = cid_data.get("IdentifierList", {}).get("CID", []) + if not cid_list: + logger.debug("No CID found for input: %s", query) + return None + cid = cid_list[0] + + # Step 2: Fetch chemical properties + prop_url = ( + f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/" + f"{cid}/property/IUPACName,MolecularFormula,CanonicalSMILES/JSON" + ) + prop_resp = requests.get(prop_url, timeout=5) + prop_resp.raise_for_status() + prop_data = prop_resp.json() + props = prop_data["PropertyTable"]["Properties"][0] + + return NormalizationResult( + raw_input=raw, + normalized_output=props.get("IUPACName", query), + entity_type="drug", + source="PubChem", + metadata={ + "cid": cid, + "molecular_formula": props.get("MolecularFormula"), + "canonical_smiles": props.get("CanonicalSMILES") + } + ) + + except requests.RequestException as exc: + logger.warning("Request failed for '%s': %s", raw, exc) + except Exception as exc: + logger.warning("Unexpected error for '%s': %s", raw, exc) - ## - pass - def lookup_drug(): - request.get() - - # the followig + return None + + def \ No newline at end of file From 9dc4230db127d8f5d7f38726b095bc4627c33c99 Mon Sep 17 00:00:00 2001 From: gtcha2 Date: Mon, 4 Aug 2025 11:28:56 -0700 Subject: [PATCH 05/11] included pharmgkb ontology for drugs. TODO: needto fix pubchem, and need to order look up, such that pharmgkb gets exact chemical --- src/ontology_module/drug_ontology.py | 46 ++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/src/ontology_module/drug_ontology.py b/src/ontology_module/drug_ontology.py index 7b67a16..a7e0b4f 100644 --- a/src/ontology_module/drug_ontology.py +++ b/src/ontology_module/drug_ontology.py @@ -9,12 +9,14 @@ # how to use, you have thew following, +logger = logging.getLogger(__name__) class DrugNormalizer(BaseNormalizer): - """Normalizes drug names using PubChem API.""" + """Normalizes drug names, and connect to common ID's per use.""" def __init__(self): super().__init__() + # register the pubchem first before I register the other. def lookup_drug_pubchem(self, raw: str) -> Optional[NormalizationResult]: """ @@ -66,4 +68,44 @@ def lookup_drug_pubchem(self, raw: str) -> Optional[NormalizationResult]: return None - def \ No newline at end of file + def lookup_drug_pharmgkb(self, raw: str) -> Optional[NormalizationResult]: + """ + Lookup drug info from PharmGKB using its REST API. + Returns all available metadata without filtering. + """ + query = raw.strip().lower() + if not query: + logger.debug("Empty drug input for PharmGKB lookup.") + return None + + try: + url = ( + "https://api.pharmgkb.org/v1/data/chemical" + f"?name={requests.utils.quote(query)}&view=max" + ) + headers = {"accept": "application/json"} + response = requests.get(url, headers=headers, timeout=5) + response.raise_for_status() + data = response.json() + + results = data.get("data", []) + if not results: + logger.debug("No PharmGKB chemical match found for: %s", query) + return None + + entry = results[0] # Always take the first match + + return NormalizationResult( + raw_input=raw, + normalized_output=entry.get("name", raw), + entity_type="drug", + source="PharmGKB", + metadata=entry # Store the entire returned dictionary + ) + + except requests.RequestException as exc: + logger.warning("PharmGKB request failed for '%s': %s", raw, exc) + except Exception as exc: + logger.warning("Unexpected error during PharmGKB lookup for '%s': %s", raw, exc) + + return None \ No newline at end of file From c301d70850abddb08226f3891a22ace81ff9db30 Mon Sep 17 00:00:00 2001 From: gtcha2 Date: Mon, 4 Aug 2025 11:31:53 -0700 Subject: [PATCH 06/11] register handles, fix import --- src/ontology_module/drug_ontology.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/ontology_module/drug_ontology.py b/src/ontology_module/drug_ontology.py index a7e0b4f..bcfc134 100644 --- a/src/ontology_module/drug_ontology.py +++ b/src/ontology_module/drug_ontology.py @@ -2,7 +2,7 @@ from typing import Optional import logging -from variant_ontology import BaseNormalizer,NormalizationResult +from .variant_ontology import BaseNormalizer, NormalizationResult import requests @@ -16,6 +16,15 @@ class DrugNormalizer(BaseNormalizer): def __init__(self): super().__init__() + + self.register_handler(self.lookup_drug_pubchem) + + + + #TODO: insert logic to handle base generic instead of what we have + + + self.register_handler(self.lookup_drug_pharmgkb) # register the pubchem first before I register the other. def lookup_drug_pubchem(self, raw: str) -> Optional[NormalizationResult]: From e1a37900af82ece3773eea0c1e8a9a9d3e6a8bc1 Mon Sep 17 00:00:00 2001 From: gtcha2 Date: Mon, 4 Aug 2025 11:50:02 -0700 Subject: [PATCH 07/11] included some tests in the ontology... brand names will fail on pharmgkb, need to convert first. or rather identify if string is brand or nah --- src/ontology_module/drug_ontology.py | 57 +++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/src/ontology_module/drug_ontology.py b/src/ontology_module/drug_ontology.py index bcfc134..2b647ad 100644 --- a/src/ontology_module/drug_ontology.py +++ b/src/ontology_module/drug_ontology.py @@ -23,9 +23,11 @@ def __init__(self): #TODO: insert logic to handle base generic instead of what we have - + self.register_handler(self.lookup_drug_pharmgkb) # register the pubchem first before I register the other. + def name(self): + return "Drug Normalizer" def lookup_drug_pubchem(self, raw: str) -> Optional[NormalizationResult]: """ @@ -117,4 +119,55 @@ def lookup_drug_pharmgkb(self, raw: str) -> Optional[NormalizationResult]: except Exception as exc: logger.warning("Unexpected error during PharmGKB lookup for '%s': %s", raw, exc) - return None \ No newline at end of file + return None + + + + +def test_lookup_pubchem(): + normalizer = DrugNormalizer() + drug = "Imatinib" + result = normalizer.lookup_drug_pubchem(drug) + + print(f"\n[PubChem] Input: {drug}") + if result is None: + print("❌ No result returned.") + else: + print("✅ Result:") + print(f" Raw: {result.raw_input}") + print(f" Normalized: {result.normalized_output}") + print(f" Source: {result.source}") + print(f" Entity Type: {result.entity_type}") + print(f" CID: {result.metadata.get('cid')}") + print(f" SMILES: {result.metadata.get('canonical_smiles')}") + assert isinstance(result, NormalizationResult) + assert result.source == "PubChem" + assert result.entity_type == "drug" + assert "canonical_smiles" in result.metadata + + +def test_lookup_pharmgkb(): + normalizer = DrugNormalizer() + drug = "Gleevec" # Brand name for Imatinib + result = normalizer.lookup_drug_pharmgkb(drug) + + print(f"\n[PharmGKB] Input: {drug}") + if result is None: + print("❌ No result returned.") + else: + print("✅ Result:") + print(f" Raw: {result.raw_input}") + print(f" Normalized: {result.normalized_output}") + print(f" Source: {result.source}") + print(f" Entity Type: {result.entity_type}") + print(f" PharmGKB ID: {result.metadata.get('id')}") + print(f" Brand Names: {result.metadata.get('brandNames')}") + assert isinstance(result, NormalizationResult) + assert result.source == "PharmGKB" + assert result.entity_type == "drug" + assert "id" in result.metadata + + +if __name__ == "__main__": + test_lookup_pubchem() + test_lookup_pharmgkb() From 6e42bcb00409e631f56d788fc5c07439eb856c23 Mon Sep 17 00:00:00 2001 From: gtcha2 Date: Mon, 4 Aug 2025 12:10:35 -0700 Subject: [PATCH 08/11] created new function with rxnorm to get generic from brand name, feed that into pharmgkb to get drug id. --- src/ontology_module/drug_ontology.py | 78 +++++++++++++++++++++++++++- 1 file changed, 76 insertions(+), 2 deletions(-) diff --git a/src/ontology_module/drug_ontology.py b/src/ontology_module/drug_ontology.py index 2b647ad..2ff477f 100644 --- a/src/ontology_module/drug_ontology.py +++ b/src/ontology_module/drug_ontology.py @@ -78,7 +78,17 @@ def lookup_drug_pubchem(self, raw: str) -> Optional[NormalizationResult]: logger.warning("Unexpected error for '%s': %s", raw, exc) return None - + def get_generic_from_brand_pubchem(self, raw: str) -> Optional[str]: + """ + Resolves a brand name to a generic (IUPAC) name using PubChem. + Returns the normalized_output from lookup_drug_pubchem, or None. + """ + result = self.lookup_drug_pubchem(raw) + if result: + return result.normalized_output + return None + + def lookup_drug_pharmgkb(self, raw: str) -> Optional[NormalizationResult]: """ Lookup drug info from PharmGKB using its REST API. @@ -120,7 +130,63 @@ def lookup_drug_pharmgkb(self, raw: str) -> Optional[NormalizationResult]: logger.warning("Unexpected error during PharmGKB lookup for '%s': %s", raw, exc) return None - + def lookup_drug_rxnorm(self, raw: str) -> Optional[NormalizationResult]: + """ + Resolves a drug name (brand or generic) using the RxNorm API. + Returns a NormalizationResult with the generic name and RxNorm metadata. + """ + query = raw.strip() + if not query: + logger.debug("Empty drug input for RxNorm lookup.") + return None + + try: + # Step 1: Get RxCUI for input name + rxcui_url = f"https://rxnav.nlm.nih.gov/REST/rxcui.json?name={requests.utils.quote(query)}" + rxcui_resp = requests.get(rxcui_url, timeout=5) + rxcui_resp.raise_for_status() + rxcui_data = rxcui_resp.json() + rxcui_list = rxcui_data.get("idGroup", {}).get("rxnormId", []) + if not rxcui_list: + logger.debug("No RxCUI found for input: %s", query) + return None + rxcui = rxcui_list[0] + + # Step 2: Get related ingredient (generic) names from RxCUI + related_url = f"https://rxnav.nlm.nih.gov/REST/rxcui/{rxcui}/related.json?tty=IN" + related_resp = requests.get(related_url, timeout=5) + related_resp.raise_for_status() + related_data = related_resp.json() + + concepts = related_data.get("relatedGroup", {}).get("conceptGroup", []) + ingredients = [] + for group in concepts: + if group.get("tty") == "IN": + for concept in group.get("conceptProperties", []): + ingredients.append(concept.get("name")) + + if not ingredients: + logger.debug("No generic (IN) concept found for RxCUI: %s", rxcui) + return None + + return NormalizationResult( + raw_input=raw, + normalized_output=ingredients[0], # first generic match + entity_type="drug", + source="RxNorm", + metadata={ + "rxcui": rxcui, + "generic_candidates": ingredients + } + ) + + except requests.RequestException as exc: + logger.warning("RxNorm request failed for '%s': %s", raw, exc) + except Exception as exc: + logger.warning("Unexpected error in RxNorm lookup for '%s': %s", raw, exc) + + return None + @@ -149,6 +215,9 @@ def test_lookup_pubchem(): def test_lookup_pharmgkb(): normalizer = DrugNormalizer() drug = "Gleevec" # Brand name for Imatinib + print("TEST LOOKUP PHARMGKB") + generic = normalizer.get_generic_from_brand_pubchem("Gleevec") + print(generic) result = normalizer.lookup_drug_pharmgkb(drug) print(f"\n[PharmGKB] Input: {drug}") @@ -170,4 +239,9 @@ def test_lookup_pharmgkb(): if __name__ == "__main__": test_lookup_pubchem() + test_lookup_pharmgkb() + normalizer = DrugNormalizer() + result = normalizer.lookup_drug_rxnorm("Gleevec") + print(result.normalized_output) # → "imatinib" + From 4a14a9ba17fc2643d64c85f3c8a7670a24305160 Mon Sep 17 00:00:00 2001 From: gtcha2 Date: Mon, 4 Aug 2025 13:48:17 -0700 Subject: [PATCH 09/11] the beginnings of the haplotype/starallele representation and normalization --- src/ontology_module/variant_ontology.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/ontology_module/variant_ontology.py b/src/ontology_module/variant_ontology.py index 05bf1bb..28db476 100644 --- a/src/ontology_module/variant_ontology.py +++ b/src/ontology_module/variant_ontology.py @@ -135,7 +135,18 @@ def lookup_pharmgkb_id(self, raw: str) -> Optional[NormalizationResult]: except Exception: logger.exception(f"PharmGKB symbol lookup failed for {raw}") return None + +class StarAlleleNormalizer(BaseNormalizer): + + def __init__(self): + pass + def name(self): + return "Star Allele Normalizer" + + + def + From 8defc1bed8cb6c7bbd61c37142adf9d3f52ee3e1 Mon Sep 17 00:00:00 2001 From: gtcha2 Date: Mon, 4 Aug 2025 14:42:11 -0700 Subject: [PATCH 10/11] semi fixed star allele lookup... no reliable pattern to match.... --- src/ontology_module/variant_ontology.py | 48 ++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/src/ontology_module/variant_ontology.py b/src/ontology_module/variant_ontology.py index 28db476..8743eae 100644 --- a/src/ontology_module/variant_ontology.py +++ b/src/ontology_module/variant_ontology.py @@ -143,12 +143,52 @@ def __init__(self): pass def name(self): return "Star Allele Normalizer" - + def fetch_star_alleles(self, term: str) -> list[dict]: + """ + Searches for star alleles matching a term and retrieves full metadata for each. + + Args: + term (str): The star allele search string (e.g., "CYP2D6*4"). + + Returns: + list[dict]: Each dict contains all metadata fields for a matched star allele. + """ + base_url = "https://clinicaltables.nlm.nih.gov/api/star_alleles/v3/search" + fields = [ + "StarAlleleName", "GenBank", "ProteinAffected", "cDNANucleotideChanges", + "GeneNucleotideChange", "ProteinChange", "OtherNames", + "InVivoEnzymeActivity", "InVitroEnzymeActivity", "References", + "ClinicalPhenotype", "Notes" + ] - def - + params = { + "terms": term, + "ef": ",".join(fields), + "maxList": "50" + } + + response = requests.get(base_url, params=params) + response.raise_for_status() + data = response.json() + + if not data or len(data) < 3: + return [] + + codes = data[1] + extra_fields = data[2] + + results = [] + for i, code in enumerate(codes): + allele_data = {field: extra_fields.get(field, [None])[i] for field in fields} + results.append(allele_data) + + return results + + +if __name__ == "__main__": + from pprint import pprint + pprint(StarAlleleNormalizer().fetch_star_alleles("CYP2D6*4")) - From 72592d4bf3c5e65816214e2791751d943ba0e02e Mon Sep 17 00:00:00 2001 From: gtcha2 Date: Mon, 4 Aug 2025 14:51:21 -0700 Subject: [PATCH 11/11] added in the ability to handle multiple star allels, the star allele normalizer returns all information associated with all possible star allels referenced to the query, --- src/ontology_module/variant_ontology.py | 108 +++++++++++++++++------- 1 file changed, 78 insertions(+), 30 deletions(-) diff --git a/src/ontology_module/variant_ontology.py b/src/ontology_module/variant_ontology.py index 8743eae..de67242 100644 --- a/src/ontology_module/variant_ontology.py +++ b/src/ontology_module/variant_ontology.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod -from typing import Callable,Dict, Optional, Any -from dataclasses import dataclass, field +from typing import Callable,Dict, Optional, Any, List +from dataclasses import dataclass, field import logging from Bio import Entrez import requests @@ -137,58 +137,106 @@ def lookup_pharmgkb_id(self, raw: str) -> Optional[NormalizationResult]: return None class StarAlleleNormalizer(BaseNormalizer): - + API_URL = "https://clinicaltables.nlm.nih.gov/api/star_alleles/v3/search" def __init__(self): pass def name(self): return "Star Allele Normalizer" - def fetch_star_alleles(self, term: str) -> list[dict]: - """ - Searches for star alleles matching a term and retrieves full metadata for each. + - Args: - term (str): The star allele search string (e.g., "CYP2D6*4"). + - Returns: - list[dict]: Each dict contains all metadata fields for a matched star allele. + def fetch_star_alleles(self, query: str, max_results: int = 50) -> List[Dict[str, Any]]: + """ + Fetches all star allele records matching the query string from the PharmVar-backed Clinical Tables API. + Returns a list of dictionaries, one per allele, with all available fields populated. """ - base_url = "https://clinicaltables.nlm.nih.gov/api/star_alleles/v3/search" fields = [ "StarAlleleName", "GenBank", "ProteinAffected", "cDNANucleotideChanges", - "GeneNucleotideChange", "ProteinChange", "OtherNames", + "GeneNucleotideChange", "XbaIHaplotype", "RFLP", "OtherNames", "ProteinChange", "InVivoEnzymeActivity", "InVitroEnzymeActivity", "References", "ClinicalPhenotype", "Notes" ] params = { - "terms": term, - "ef": ",".join(fields), - "maxList": "50" + "terms": query, + "count": max_results, + "ef": ",".join(fields) } - response = requests.get(base_url, params=params) - response.raise_for_status() - data = response.json() - - if not data or len(data) < 3: + try: + response = requests.get(self.API_URL, params=params, timeout=10) + response.raise_for_status() + except Exception as e: + logger.error(f"API request failed: {e}") return [] - codes = data[1] - extra_fields = data[2] + try: + total_count, allele_names, extra_fields, *_ = response.json() + except Exception as e: + logger.error(f"Failed to parse API response: {e}") + return [] results = [] - for i, code in enumerate(codes): - allele_data = {field: extra_fields.get(field, [None])[i] for field in fields} - results.append(allele_data) + for i, allele in enumerate(allele_names): + allele_info = { + "StarAlleleName": allele + } + for field, values in extra_fields.items(): + allele_info[field] = values[i] if i < len(values) else None + results.append(allele_info) return results + # def fetch_star_alleles(self, term: str) -> list[dict]: + # """ + # Searches for star alleles matching a term and retrieves full metadata for each. + + # Args: + # term (str): The star allele search string (e.g., "CYP2D6*4"). + + # Returns: + # list[dict]: Each dict contains all metadata fields for a matched star allele. + # """ + # base_url = "https://clinicaltables.nlm.nih.gov/api/star_alleles/v3/search" + # fields = [ + # "StarAlleleName", "GenBank", "ProteinAffected", "cDNANucleotideChanges", + # "GeneNucleotideChange", "ProteinChange", "OtherNames", + # "InVivoEnzymeActivity", "InVitroEnzymeActivity", "References", + # "ClinicalPhenotype", "Notes" + # ] + + # params = { + # "terms": term, + # "ef": ",".join(fields), + # "maxList": "50" + # } + + # response = requests.get(base_url, params=params) + # response.raise_for_status() + # data = response.json() + + # if not data or len(data) < 3: + # return [] + + # codes = data[1] + # extra_fields = data[2] + + # results = [] + # for i, code in enumerate(codes): + # allele_data = {field: extra_fields.get(field, [None])[i] for field in fields} + # results.append(allele_data) + + # return results if __name__ == "__main__": - from pprint import pprint - pprint(StarAlleleNormalizer().fetch_star_alleles("CYP2D6*4")) - - - + logging.basicConfig(level=logging.INFO) + normalizer = StarAlleleNormalizer() + data = normalizer.fetch_star_alleles("CYP2D6*4") + + for record in data: + print("\n--- Star Allele Record ---") + for k, v in record.items(): + print(f"{k}: {v}")