From 7868bbfa16f0e1d2d3c676fab886bb7fc7351f56 Mon Sep 17 00:00:00 2001
From: gtcha2 <aron7628@gmail.com>
Date: Mon, 4 Aug 2025 11:04:18 -0700
Subject: [PATCH 01/11] Variant_ontologies:

base normalization resutl data class,

base Normalizer:

registers handlers for processing

RSID_Normalizer:
registered for snp db,
pharmgkb look up from rsid as well.
---
 src/ontology_module/variant_ontology.py | 143 ++++++++++++++++++++++++
 1 file changed, 143 insertions(+)
 create mode 100644 src/ontology_module/variant_ontology.py

diff --git a/src/ontology_module/variant_ontology.py b/src/ontology_module/variant_ontology.py
new file mode 100644
index 0000000..05bf1bb
--- /dev/null
+++ b/src/ontology_module/variant_ontology.py
@@ -0,0 +1,143 @@
+from abc import ABC, abstractmethod
+from typing import Callable,Dict, Optional, Any
+from dataclasses import dataclass, field
+import logging
+from Bio import Entrez
+import requests
+
+logger = logging.getLogger(__name__)
+
+@dataclass
+class NormalizationResult:
+    raw_input: str
+    normalized_output: str
+    entity_type: str         # e.g. "variant", "gene", "drug", etc.
+    source: str              # where the normalized info came from
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+    @classmethod
+    def from_dict(cls, data: dict) -> "NormalizationResult":
+        return cls(
+            raw_input=data["raw_input"],
+            normalized_output=data["normalized_output"],
+            entity_type=data.get("entity_type", "unknown"),
+            source=data["source"],
+            metadata=data.get("metadata", {})
+        )
+
+class BaseNormalizer(ABC):
+    def __init__(self):
+        self._handlers: list[Callable[[str], Optional[dict]]] = []
+
+    def register_handler(self, handler: Callable[[str], Optional[dict]]):
+        self._handlers.append(handler)
+
+    def normalize(self, raw: str) -> Optional["NormalizationResult"]:
+        for handler in self._handlers:
+            try:
+                result = handler(raw)
+                if result:
+                    return result  # Assuming result is already a NormalizedEntity
+            except Exception as e:
+                logger.exception(f"Handler '{handler.__name__}' failed on input: '{raw}'")
+        return None
+
+    @abstractmethod
+    def name(self) -> str:
+        pass
+
+    
+
+
+
+class RSIDNormalizer(BaseNormalizer):
+    def __init__(self, email: str, api_key: Optional[str] = None):
+        super().__init__()
+        Entrez.email = email
+        if api_key:
+            Entrez.api_key = api_key
+
+        self.register_handler(self.lookup_dbsnp)
+        self.register_handler(self.lookup_pharmgkb_id)
+
+    def name(self) -> str:
+        return "RSIDNormalizer"
+
+    def lookup_dbsnp(self, raw: str) -> Optional[NormalizationResult]:
+        rsid = raw.lower().strip()
+        if not rsid.startswith("rs") or not rsid[2:].isdigit():
+            return None
+
+        try:
+            handle = Entrez.esummary(db="snp", id=rsid[2:], retmode="json")
+            response = handle.read()
+            handle.close()
+
+            # Convert JSON string to Python dict
+            import json
+            data = json.loads(response)
+
+            record = data.get("result", {}).get(rsid[2:])
+            if not record:
+                return None
+
+            return NormalizationResult(
+                raw_input=raw,
+                normalized_output=rsid,
+                entity_type="variant",
+                source="dbSNP",
+                metadata=record
+            )
+
+        except Exception:
+            logger.exception(f"lookup_dbsnp failed for {raw}")
+            return None
+
+    def lookup_pharmgkb_id(self, raw: str) -> Optional[NormalizationResult]:
+        logger.debug(f"Looking up PharmGKB variant by symbol: {raw}")
+
+        base_url = "https://api.pharmgkb.org/v1/data/variant"
+        params = {
+            "symbol": raw.strip(),
+            "view": "max"
+        }
+
+        try:
+            response = requests.get(base_url, params=params, timeout=10)
+            if response.status_code != 200:
+                logger.warning(f"PharmGKB lookup failed ({response.status_code}) for {raw}")
+                return None
+
+            data = response.json()
+            records = data.get("data", [])
+            if not records:
+                logger.info(f"No PharmGKB variant match for symbol: {raw}")
+                return None
+
+            variant = records[0]
+
+            # Extract only required fields
+            normalized_output = variant.get("id")
+            entity_type = "variant"
+            source = "PharmGKB"
+
+            # Remove known fields so everything else is dumped into metadata
+            metadata = {k: v for k, v in variant.items() if k not in {"id"}}
+
+            return NormalizationResult(
+                raw_input=raw,
+                normalized_output=normalized_output,
+                entity_type=entity_type,
+                source=source,
+                metadata=metadata
+            )
+
+        except Exception:
+            logger.exception(f"PharmGKB symbol lookup failed for {raw}")
+            return None
+
+
+        
+    
+
+

From 750be09864d4644031a4fa66ac578218dc467460 Mon Sep 17 00:00:00 2001
From: gtcha2 <aron7628@gmail.com>
Date: Mon, 4 Aug 2025 11:09:05 -0700
Subject: [PATCH 02/11] drug ontology initial

---
 src/ontology_module/drug_ontology.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 src/ontology_module/drug_ontology.py

diff --git a/src/ontology_module/drug_ontology.py b/src/ontology_module/drug_ontology.py
new file mode 100644
index 0000000..2eeaf85
--- /dev/null
+++ b/src/ontology_module/drug_ontology.py
@@ -0,0 +1,17 @@
+
+from variant_ontology import BaseNormalizer
+import requests
+
+# how to use, you have thew following, 
+
+
+
+class DrugNormalizer(BaseNormalizer):
+    def __init__(self):
+
+        ##
+        pass
+    def lookup_drug():
+        request.get()
+        
+        # the followig 

From 5c9970d53a4332ff43f4b8119721d2e93f1e53ad Mon Sep 17 00:00:00 2001
From: gtcha2 <aron7628@gmail.com>
Date: Mon, 4 Aug 2025 11:09:33 -0700
Subject: [PATCH 03/11] init file for ontology and normalization module base

---
 src/ontology_module/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 src/ontology_module/__init__.py

diff --git a/src/ontology_module/__init__.py b/src/ontology_module/__init__.py
new file mode 100644
index 0000000..e69de29

From 9d7a22b9e94c8fd3a7a72b5a28b62aa025622484 Mon Sep 17 00:00:00 2001
From: gtcha2 <aron7628@gmail.com>
Date: Mon, 4 Aug 2025 11:20:30 -0700
Subject: [PATCH 04/11] lookupdrugpubchem complete, still need validation on it

---
 src/ontology_module/drug_ontology.py | 66 +++++++++++++++++++++++++---
 1 file changed, 59 insertions(+), 7 deletions(-)

diff --git a/src/ontology_module/drug_ontology.py b/src/ontology_module/drug_ontology.py
index 2eeaf85..7b67a16 100644
--- a/src/ontology_module/drug_ontology.py
+++ b/src/ontology_module/drug_ontology.py
@@ -1,5 +1,9 @@
 
-from variant_ontology import BaseNormalizer
+
+from typing import Optional
+import logging
+from variant_ontology import BaseNormalizer,NormalizationResult
+
 import requests
 
 # how to use, you have thew following, 
@@ -7,11 +11,59 @@
 
 
 class DrugNormalizer(BaseNormalizer):
+    """Normalizes drug names using PubChem API."""
+
     def __init__(self):
+        super().__init__()
+
+    def lookup_drug_pubchem(self, raw: str) -> Optional[NormalizationResult]:
+        """
+        Normalize a raw drug name via PubChem, return structured result.
+        """
+        query = raw.strip()
+        if not query:
+            logger.debug("Empty drug input, skipping.")
+            return None
+
+        try:
+            # Step 1: Fetch CID
+            cid_url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{query}/cids/JSON"
+            cid_resp = requests.get(cid_url, timeout=5)
+            cid_resp.raise_for_status()
+            cid_data = cid_resp.json()
+            cid_list = cid_data.get("IdentifierList", {}).get("CID", [])
+            if not cid_list:
+                logger.debug("No CID found for input: %s", query)
+                return None
+            cid = cid_list[0]
+
+            # Step 2: Fetch chemical properties
+            prop_url = (
+                f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/"
+                f"{cid}/property/IUPACName,MolecularFormula,CanonicalSMILES/JSON"
+            )
+            prop_resp = requests.get(prop_url, timeout=5)
+            prop_resp.raise_for_status()
+            prop_data = prop_resp.json()
+            props = prop_data["PropertyTable"]["Properties"][0]
+
+            return NormalizationResult(
+                raw_input=raw,
+                normalized_output=props.get("IUPACName", query),
+                entity_type="drug",
+                source="PubChem",
+                metadata={
+                    "cid": cid,
+                    "molecular_formula": props.get("MolecularFormula"),
+                    "canonical_smiles": props.get("CanonicalSMILES")
+                }
+            )
+
+        except requests.RequestException as exc:
+            logger.warning("Request failed for '%s': %s", raw, exc)
+        except Exception as exc:
+            logger.warning("Unexpected error for '%s': %s", raw, exc)
 
-        ##
-        pass
-    def lookup_drug():
-        request.get()
-        
-        # the followig 
+        return None
+    
+    def 
\ No newline at end of file

From 9dc4230db127d8f5d7f38726b095bc4627c33c99 Mon Sep 17 00:00:00 2001
From: gtcha2 <aron7628@gmail.com>
Date: Mon, 4 Aug 2025 11:28:56 -0700
Subject: [PATCH 05/11] included pharmgkb ontology for drugs. TODO: needto fix
 pubchem, and need to order look up, such that pharmgkb gets exact chemical

---
 src/ontology_module/drug_ontology.py | 46 ++++++++++++++++++++++++++--
 1 file changed, 44 insertions(+), 2 deletions(-)

diff --git a/src/ontology_module/drug_ontology.py b/src/ontology_module/drug_ontology.py
index 7b67a16..a7e0b4f 100644
--- a/src/ontology_module/drug_ontology.py
+++ b/src/ontology_module/drug_ontology.py
@@ -9,12 +9,14 @@
 # how to use, you have thew following, 
 
 
+logger = logging.getLogger(__name__)
 
 class DrugNormalizer(BaseNormalizer):
-    """Normalizes drug names using PubChem API."""
+    """Normalizes drug names, and connect to common ID's per use."""
 
     def __init__(self):
         super().__init__()
+        # register the pubchem first before I register the other. 
 
     def lookup_drug_pubchem(self, raw: str) -> Optional[NormalizationResult]:
         """
@@ -66,4 +68,44 @@ def lookup_drug_pubchem(self, raw: str) -> Optional[NormalizationResult]:
 
         return None
     
-    def 
\ No newline at end of file
+    def lookup_drug_pharmgkb(self, raw: str) -> Optional[NormalizationResult]:
+        """
+        Lookup drug info from PharmGKB using its REST API.
+        Returns all available metadata without filtering.
+        """
+        query = raw.strip().lower()  
+        if not query:
+            logger.debug("Empty drug input for PharmGKB lookup.")
+            return None
+
+        try:
+            url = (
+                "https://api.pharmgkb.org/v1/data/chemical"
+                f"?name={requests.utils.quote(query)}&view=max"
+            )
+            headers = {"accept": "application/json"}
+            response = requests.get(url, headers=headers, timeout=5)
+            response.raise_for_status()
+            data = response.json()
+
+            results = data.get("data", [])
+            if not results:
+                logger.debug("No PharmGKB chemical match found for: %s", query)
+                return None
+
+            entry = results[0]  # Always take the first match
+
+            return NormalizationResult(
+                raw_input=raw,
+                normalized_output=entry.get("name", raw),
+                entity_type="drug",
+                source="PharmGKB",
+                metadata=entry  # Store the entire returned dictionary
+            )
+
+        except requests.RequestException as exc:
+            logger.warning("PharmGKB request failed for '%s': %s", raw, exc)
+        except Exception as exc:
+            logger.warning("Unexpected error during PharmGKB lookup for '%s': %s", raw, exc)
+
+        return None
\ No newline at end of file

From c301d70850abddb08226f3891a22ace81ff9db30 Mon Sep 17 00:00:00 2001
From: gtcha2 <aron7628@gmail.com>
Date: Mon, 4 Aug 2025 11:31:53 -0700
Subject: [PATCH 06/11] register handles, fix import

---
 src/ontology_module/drug_ontology.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/ontology_module/drug_ontology.py b/src/ontology_module/drug_ontology.py
index a7e0b4f..bcfc134 100644
--- a/src/ontology_module/drug_ontology.py
+++ b/src/ontology_module/drug_ontology.py
@@ -2,7 +2,7 @@
 
 from typing import Optional
 import logging
-from variant_ontology import BaseNormalizer,NormalizationResult
+from .variant_ontology import BaseNormalizer, NormalizationResult
 
 import requests
 
@@ -16,6 +16,15 @@ class DrugNormalizer(BaseNormalizer):
 
     def __init__(self):
         super().__init__()
+        
+        self.register_handler(self.lookup_drug_pubchem)
+        
+
+
+        #TODO: insert logic to handle base generic instead of what we have 
+
+        
+        self.register_handler(self.lookup_drug_pharmgkb)
         # register the pubchem first before I register the other. 
 
     def lookup_drug_pubchem(self, raw: str) -> Optional[NormalizationResult]:

From e1a37900af82ece3773eea0c1e8a9a9d3e6a8bc1 Mon Sep 17 00:00:00 2001
From: gtcha2 <aron7628@gmail.com>
Date: Mon, 4 Aug 2025 11:50:02 -0700
Subject: [PATCH 07/11] included some tests in the ontology... brand names will
 fail on pharmgkb, need to convert first. or rather identify if string is
 brand or nah

---
 src/ontology_module/drug_ontology.py | 57 +++++++++++++++++++++++++++-
 1 file changed, 55 insertions(+), 2 deletions(-)

diff --git a/src/ontology_module/drug_ontology.py b/src/ontology_module/drug_ontology.py
index bcfc134..2b647ad 100644
--- a/src/ontology_module/drug_ontology.py
+++ b/src/ontology_module/drug_ontology.py
@@ -23,9 +23,11 @@ def __init__(self):
 
         #TODO: insert logic to handle base generic instead of what we have 
 
-        
+
         self.register_handler(self.lookup_drug_pharmgkb)
         # register the pubchem first before I register the other. 
+    def name(self):
+        return "Drug Normalizer"
 
     def lookup_drug_pubchem(self, raw: str) -> Optional[NormalizationResult]:
         """
@@ -117,4 +119,55 @@ def lookup_drug_pharmgkb(self, raw: str) -> Optional[NormalizationResult]:
         except Exception as exc:
             logger.warning("Unexpected error during PharmGKB lookup for '%s': %s", raw, exc)
 
-        return None
\ No newline at end of file
+        return None
+    
+
+
+
+def test_lookup_pubchem():
+    normalizer = DrugNormalizer()
+    drug = "Imatinib"
+    result = normalizer.lookup_drug_pubchem(drug)
+
+    print(f"\n[PubChem] Input: {drug}")
+    if result is None:
+        print("❌ No result returned.")
+    else:
+        print("✅ Result:")
+        print(f"  Raw:         {result.raw_input}")
+        print(f"  Normalized:  {result.normalized_output}")
+        print(f"  Source:      {result.source}")
+        print(f"  Entity Type: {result.entity_type}")
+        print(f"  CID:         {result.metadata.get('cid')}")
+        print(f"  SMILES:      {result.metadata.get('canonical_smiles')}")
+        assert isinstance(result, NormalizationResult)
+        assert result.source == "PubChem"
+        assert result.entity_type == "drug"
+        assert "canonical_smiles" in result.metadata
+
+
+def test_lookup_pharmgkb():
+    normalizer = DrugNormalizer()
+    drug = "Gleevec"  # Brand name for Imatinib
+    result = normalizer.lookup_drug_pharmgkb(drug)
+
+    print(f"\n[PharmGKB] Input: {drug}")
+    if result is None:
+        print("❌ No result returned.")
+    else:
+        print("✅ Result:")
+        print(f"  Raw:         {result.raw_input}")
+        print(f"  Normalized:  {result.normalized_output}")
+        print(f"  Source:      {result.source}")
+        print(f"  Entity Type: {result.entity_type}")
+        print(f"  PharmGKB ID: {result.metadata.get('id')}")
+        print(f"  Brand Names: {result.metadata.get('brandNames')}")
+        assert isinstance(result, NormalizationResult)
+        assert result.source == "PharmGKB"
+        assert result.entity_type == "drug"
+        assert "id" in result.metadata
+
+
+if __name__ == "__main__":
+    test_lookup_pubchem()
+    test_lookup_pharmgkb()

From 6e42bcb00409e631f56d788fc5c07439eb856c23 Mon Sep 17 00:00:00 2001
From: gtcha2 <aron7628@gmail.com>
Date: Mon, 4 Aug 2025 12:10:35 -0700
Subject: [PATCH 08/11] created new function with rxnorm to get generic from
 brand name, feed that into pharmgkb to get drug id.

---
 src/ontology_module/drug_ontology.py | 78 +++++++++++++++++++++++++++-
 1 file changed, 76 insertions(+), 2 deletions(-)

diff --git a/src/ontology_module/drug_ontology.py b/src/ontology_module/drug_ontology.py
index 2b647ad..2ff477f 100644
--- a/src/ontology_module/drug_ontology.py
+++ b/src/ontology_module/drug_ontology.py
@@ -78,7 +78,17 @@ def lookup_drug_pubchem(self, raw: str) -> Optional[NormalizationResult]:
             logger.warning("Unexpected error for '%s': %s", raw, exc)
 
         return None
-    
+    def get_generic_from_brand_pubchem(self, raw: str) -> Optional[str]:
+        """
+        Resolves a brand name to a generic (IUPAC) name using PubChem.
+        Returns the normalized_output from lookup_drug_pubchem, or None.
+        """
+        result = self.lookup_drug_pubchem(raw)
+        if result:
+            return result.normalized_output
+        return None
+
+
     def lookup_drug_pharmgkb(self, raw: str) -> Optional[NormalizationResult]:
         """
         Lookup drug info from PharmGKB using its REST API.
@@ -120,7 +130,63 @@ def lookup_drug_pharmgkb(self, raw: str) -> Optional[NormalizationResult]:
             logger.warning("Unexpected error during PharmGKB lookup for '%s': %s", raw, exc)
 
         return None
-    
+    def lookup_drug_rxnorm(self, raw: str) -> Optional[NormalizationResult]:
+        """
+        Resolves a drug name (brand or generic) using the RxNorm API.
+        Returns a NormalizationResult with the generic name and RxNorm metadata.
+        """
+        query = raw.strip()
+        if not query:
+            logger.debug("Empty drug input for RxNorm lookup.")
+            return None
+
+        try:
+            # Step 1: Get RxCUI for input name
+            rxcui_url = f"https://rxnav.nlm.nih.gov/REST/rxcui.json?name={requests.utils.quote(query)}"
+            rxcui_resp = requests.get(rxcui_url, timeout=5)
+            rxcui_resp.raise_for_status()
+            rxcui_data = rxcui_resp.json()
+            rxcui_list = rxcui_data.get("idGroup", {}).get("rxnormId", [])
+            if not rxcui_list:
+                logger.debug("No RxCUI found for input: %s", query)
+                return None
+            rxcui = rxcui_list[0]
+
+            # Step 2: Get related ingredient (generic) names from RxCUI
+            related_url = f"https://rxnav.nlm.nih.gov/REST/rxcui/{rxcui}/related.json?tty=IN"
+            related_resp = requests.get(related_url, timeout=5)
+            related_resp.raise_for_status()
+            related_data = related_resp.json()
+
+            concepts = related_data.get("relatedGroup", {}).get("conceptGroup", [])
+            ingredients = []
+            for group in concepts:
+                if group.get("tty") == "IN":
+                    for concept in group.get("conceptProperties", []):
+                        ingredients.append(concept.get("name"))
+
+            if not ingredients:
+                logger.debug("No generic (IN) concept found for RxCUI: %s", rxcui)
+                return None
+
+            return NormalizationResult(
+                raw_input=raw,
+                normalized_output=ingredients[0],  # first generic match
+                entity_type="drug",
+                source="RxNorm",
+                metadata={
+                    "rxcui": rxcui,
+                    "generic_candidates": ingredients
+                }
+            )
+
+        except requests.RequestException as exc:
+            logger.warning("RxNorm request failed for '%s': %s", raw, exc)
+        except Exception as exc:
+            logger.warning("Unexpected error in RxNorm lookup for '%s': %s", raw, exc)
+
+        return None
+
 
 
 
@@ -149,6 +215,9 @@ def test_lookup_pubchem():
 def test_lookup_pharmgkb():
     normalizer = DrugNormalizer()
     drug = "Gleevec"  # Brand name for Imatinib
+    print("TEST LOOKUP PHARMGKB")
+    generic = normalizer.get_generic_from_brand_pubchem("Gleevec")
+    print(generic)
     result = normalizer.lookup_drug_pharmgkb(drug)
 
     print(f"\n[PharmGKB] Input: {drug}")
@@ -170,4 +239,9 @@ def test_lookup_pharmgkb():
 
 if __name__ == "__main__":
     test_lookup_pubchem()
+    
     test_lookup_pharmgkb()
+    normalizer = DrugNormalizer()
+    result = normalizer.lookup_drug_rxnorm("Gleevec")
+    print(result.normalized_output)  # → "imatinib"
+

From 4a14a9ba17fc2643d64c85f3c8a7670a24305160 Mon Sep 17 00:00:00 2001
From: gtcha2 <aron7628@gmail.com>
Date: Mon, 4 Aug 2025 13:48:17 -0700
Subject: [PATCH 09/11] the beginnings of the haplotype/starallele
 representation and normalization

---
 src/ontology_module/variant_ontology.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/ontology_module/variant_ontology.py b/src/ontology_module/variant_ontology.py
index 05bf1bb..28db476 100644
--- a/src/ontology_module/variant_ontology.py
+++ b/src/ontology_module/variant_ontology.py
@@ -135,7 +135,18 @@ def lookup_pharmgkb_id(self, raw: str) -> Optional[NormalizationResult]:
         except Exception:
             logger.exception(f"PharmGKB symbol lookup failed for {raw}")
             return None
+        
+class StarAlleleNormalizer(BaseNormalizer):
+    
 
+    def __init__(self):
+        pass
+    def name(self):
+        return "Star Allele Normalizer"
+    
+
+    def 
+    
 
         
     

From 8defc1bed8cb6c7bbd61c37142adf9d3f52ee3e1 Mon Sep 17 00:00:00 2001
From: gtcha2 <aron7628@gmail.com>
Date: Mon, 4 Aug 2025 14:42:11 -0700
Subject: [PATCH 10/11] semi fixed star allele lookup... no reliable pattern to
 match....

---
 src/ontology_module/variant_ontology.py | 48 ++++++++++++++++++++++---
 1 file changed, 44 insertions(+), 4 deletions(-)

diff --git a/src/ontology_module/variant_ontology.py b/src/ontology_module/variant_ontology.py
index 28db476..8743eae 100644
--- a/src/ontology_module/variant_ontology.py
+++ b/src/ontology_module/variant_ontology.py
@@ -143,12 +143,52 @@ def __init__(self):
         pass
     def name(self):
         return "Star Allele Normalizer"
-    
+    def fetch_star_alleles(self, term: str) -> list[dict]:
+        """
+        Searches for star alleles matching a term and retrieves full metadata for each.
+
+        Args:
+            term (str): The star allele search string (e.g., "CYP2D6*4").
+
+        Returns:
+            list[dict]: Each dict contains all metadata fields for a matched star allele.
+        """
+        base_url = "https://clinicaltables.nlm.nih.gov/api/star_alleles/v3/search"
+        fields = [
+            "StarAlleleName", "GenBank", "ProteinAffected", "cDNANucleotideChanges",
+            "GeneNucleotideChange", "ProteinChange", "OtherNames",
+            "InVivoEnzymeActivity", "InVitroEnzymeActivity", "References",
+            "ClinicalPhenotype", "Notes"
+        ]
 
-    def 
-    
+        params = {
+            "terms": term,
+            "ef": ",".join(fields),
+            "maxList": "50"
+        }
+
+        response = requests.get(base_url, params=params)
+        response.raise_for_status()
+        data = response.json()
+
+        if not data or len(data) < 3:
+            return []
+
+        codes = data[1]
+        extra_fields = data[2]
+
+        results = []
+        for i, code in enumerate(codes):
+            allele_data = {field: extra_fields.get(field, [None])[i] for field in fields}
+            results.append(allele_data)
+
+        return results
+        
+
+if __name__ == "__main__":
+    from pprint import pprint
+    pprint(StarAlleleNormalizer().fetch_star_alleles("CYP2D6*4"))
 
         
-    
 
 

From 72592d4bf3c5e65816214e2791751d943ba0e02e Mon Sep 17 00:00:00 2001
From: gtcha2 <aron7628@gmail.com>
Date: Mon, 4 Aug 2025 14:51:21 -0700
Subject: [PATCH 11/11] added in the ability to handle multiple star allels,
 the star allele normalizer returns all information associated with all
 possible star allels referenced to the query,

---
 src/ontology_module/variant_ontology.py | 108 +++++++++++++++++-------
 1 file changed, 78 insertions(+), 30 deletions(-)

diff --git a/src/ontology_module/variant_ontology.py b/src/ontology_module/variant_ontology.py
index 8743eae..de67242 100644
--- a/src/ontology_module/variant_ontology.py
+++ b/src/ontology_module/variant_ontology.py
@@ -1,6 +1,6 @@
 from abc import ABC, abstractmethod
-from typing import Callable,Dict, Optional, Any
-from dataclasses import dataclass, field
+from typing import Callable,Dict, Optional, Any, List
+from dataclasses import dataclass, field 
 import logging
 from Bio import Entrez
 import requests
@@ -137,58 +137,106 @@ def lookup_pharmgkb_id(self, raw: str) -> Optional[NormalizationResult]:
             return None
         
 class StarAlleleNormalizer(BaseNormalizer):
-    
+    API_URL = "https://clinicaltables.nlm.nih.gov/api/star_alleles/v3/search"
 
     def __init__(self):
         pass
     def name(self):
         return "Star Allele Normalizer"
-    def fetch_star_alleles(self, term: str) -> list[dict]:
-        """
-        Searches for star alleles matching a term and retrieves full metadata for each.
+ 
 
-        Args:
-            term (str): The star allele search string (e.g., "CYP2D6*4").
+       
 
-        Returns:
-            list[dict]: Each dict contains all metadata fields for a matched star allele.
+    def fetch_star_alleles(self, query: str, max_results: int = 50) -> List[Dict[str, Any]]:
+        """
+        Fetches all star allele records matching the query string from the PharmVar-backed Clinical Tables API.
+        Returns a list of dictionaries, one per allele, with all available fields populated.
         """
-        base_url = "https://clinicaltables.nlm.nih.gov/api/star_alleles/v3/search"
         fields = [
             "StarAlleleName", "GenBank", "ProteinAffected", "cDNANucleotideChanges",
-            "GeneNucleotideChange", "ProteinChange", "OtherNames",
+            "GeneNucleotideChange", "XbaIHaplotype", "RFLP", "OtherNames", "ProteinChange",
             "InVivoEnzymeActivity", "InVitroEnzymeActivity", "References",
             "ClinicalPhenotype", "Notes"
         ]
 
         params = {
-            "terms": term,
-            "ef": ",".join(fields),
-            "maxList": "50"
+            "terms": query,
+            "count": max_results,
+            "ef": ",".join(fields)
         }
 
-        response = requests.get(base_url, params=params)
-        response.raise_for_status()
-        data = response.json()
-
-        if not data or len(data) < 3:
+        try:
+            response = requests.get(self.API_URL, params=params, timeout=10)
+            response.raise_for_status()
+        except Exception as e:
+            logger.error(f"API request failed: {e}")
             return []
 
-        codes = data[1]
-        extra_fields = data[2]
+        try:
+            total_count, allele_names, extra_fields, *_ = response.json()
+        except Exception as e:
+            logger.error(f"Failed to parse API response: {e}")
+            return []
 
         results = []
-        for i, code in enumerate(codes):
-            allele_data = {field: extra_fields.get(field, [None])[i] for field in fields}
-            results.append(allele_data)
+        for i, allele in enumerate(allele_names):
+            allele_info = {
+                "StarAlleleName": allele
+            }
+            for field, values in extra_fields.items():
+                allele_info[field] = values[i] if i < len(values) else None
+            results.append(allele_info)
 
         return results
+    # def fetch_star_alleles(self, term: str) -> list[dict]:
+    #     """
+    #     Searches for star alleles matching a term and retrieves full metadata for each.
+
+    #     Args:
+    #         term (str): The star allele search string (e.g., "CYP2D6*4").
+
+    #     Returns:
+    #         list[dict]: Each dict contains all metadata fields for a matched star allele.
+    #     """
+    #     base_url = "https://clinicaltables.nlm.nih.gov/api/star_alleles/v3/search"
+    #     fields = [
+    #         "StarAlleleName", "GenBank", "ProteinAffected", "cDNANucleotideChanges",
+    #         "GeneNucleotideChange", "ProteinChange", "OtherNames",
+    #         "InVivoEnzymeActivity", "InVitroEnzymeActivity", "References",
+    #         "ClinicalPhenotype", "Notes"
+    #     ]
+
+    #     params = {
+    #         "terms": term,
+    #         "ef": ",".join(fields),
+    #         "maxList": "50"
+    #     }
+
+    #     response = requests.get(base_url, params=params)
+    #     response.raise_for_status()
+    #     data = response.json()
+
+    #     if not data or len(data) < 3:
+    #         return []
+
+    #     codes = data[1]
+    #     extra_fields = data[2]
+
+    #     results = []
+    #     for i, code in enumerate(codes):
+    #         allele_data = {field: extra_fields.get(field, [None])[i] for field in fields}
+    #         results.append(allele_data)
+
+    #     return results
         
 
 if __name__ == "__main__":
-    from pprint import pprint
-    pprint(StarAlleleNormalizer().fetch_star_alleles("CYP2D6*4"))
-
-        
-
+    logging.basicConfig(level=logging.INFO)
+    normalizer = StarAlleleNormalizer()
+    data = normalizer.fetch_star_alleles("CYP2D6*4")
+
+    for record in data:
+        print("\n--- Star Allele Record ---")
+        for k, v in record.items():
+            print(f"{k}: {v}")