From 04e84ea4dd25c6e6e242f5c9c8059e0d51f555b2 Mon Sep 17 00:00:00 2001
From: Shlok Natarajan <shlok.natarajan@gmail.com>
Date: Tue, 5 Aug 2025 13:45:21 -0700
Subject: [PATCH 01/10] feat: initial cleanup + main testing

---
 src/citations/line_citation_generator.py      |  28 +-
 src/ontology/__init__.py                      |  15 +
 .../drug_ontology.py                          | 188 ++++++--
 src/ontology/variant_ontology.py              | 422 ++++++++++++++++++
 src/ontology_module/__init__.py               |   0
 src/ontology_module/variant_ontology.py       | 242 ----------
 src/study_parameters.py                       |  23 +-
 7 files changed, 630 insertions(+), 288 deletions(-)
 create mode 100644 src/ontology/__init__.py
 rename src/{ontology_module => ontology}/drug_ontology.py (59%)
 create mode 100644 src/ontology/variant_ontology.py
 delete mode 100644 src/ontology_module/__init__.py
 delete mode 100644 src/ontology_module/variant_ontology.py

diff --git a/src/citations/line_citation_generator.py b/src/citations/line_citation_generator.py
index c9d1cf9..fe9c3d7 100644
--- a/src/citations/line_citation_generator.py
+++ b/src/citations/line_citation_generator.py
@@ -413,8 +413,8 @@ def add_citations_to_annotations(
             )
 
             # Get p-value citations for this annotation
-            p_value_citations_candidates = self._get_top_p_value_citations_for_annotation(
-                annotation, top_k=3
+            p_value_citations_candidates = (
+                self._get_top_p_value_citations_for_annotation(annotation, top_k=3)
             )
 
             # Filter out duplicate citations within this annotation
@@ -487,8 +487,8 @@ def add_citations_to_annotations(
             # Final fallback for p-value citations: if still no unique p-value citations, use lower similarity threshold
             if len(unique_p_value_citations) == 0:
                 # Try with a lower similarity threshold
-                fallback_p_value_candidates = self._get_top_p_value_citations_for_annotation(
-                    annotation, top_k=5
+                fallback_p_value_candidates = (
+                    self._get_top_p_value_citations_for_annotation(annotation, top_k=5)
                 )
                 for citation in fallback_p_value_candidates:
                     is_duplicate = any(
@@ -511,10 +511,15 @@ def add_citations_to_annotations(
                     p_value_mentions = [
                         s
                         for s in self.sentences
-                        if any(keyword in s.lower() for keyword in ["p-value", "p<", "p =", "significant"])
+                        if any(
+                            keyword in s.lower()
+                            for keyword in ["p-value", "p<", "p =", "significant"]
+                        )
                     ]
                     if p_value_mentions:
-                        unique_p_value_citations = p_value_mentions[:2]  # Take top 2 p-value citations
+                        unique_p_value_citations = p_value_mentions[
+                            :2
+                        ]  # Take top 2 p-value citations
 
             # Create new annotation with unique citations
             updated_annotation = AnnotationRelationship(
@@ -523,7 +528,9 @@ def add_citations_to_annotations(
                 relationship_effect=annotation.relationship_effect,
                 p_value=annotation.p_value,
                 citations=unique_citations[:3],  # Take top 3 unique citations
-                p_value_citations=unique_p_value_citations[:2],  # Take top 2 unique p-value citations
+                p_value_citations=unique_p_value_citations[
+                    :2
+                ],  # Take top 2 unique p-value citations
             )
 
             updated_relationships.append(updated_annotation)
@@ -858,6 +865,7 @@ def _score_sentence_for_p_value(
 
         # Check for numerical patterns that might be p-values
         import re
+
         p_value_patterns = [
             r"p\s*[<>=≤≥]\s*0\.\d+",
             r"p\s*=\s*\d+\.\d+",
@@ -865,7 +873,7 @@ def _score_sentence_for_p_value(
             r"p\s*<\s*0\.01",
             r"p\s*<\s*0\.001",
         ]
-        
+
         for pattern in p_value_patterns:
             if re.search(pattern, sentence_lower):
                 score += 3
@@ -1254,7 +1262,9 @@ def main():
         print()
 
     # Get p-value citations for the annotation
-    p_value_citations = generator._get_top_p_value_citations_for_annotation(test_annotation, top_k=2)
+    p_value_citations = generator._get_top_p_value_citations_for_annotation(
+        test_annotation, top_k=2
+    )
 
     print(f"Found {len(p_value_citations)} p-value citations:")
     for i, citation in enumerate(p_value_citations, 1):
diff --git a/src/ontology/__init__.py b/src/ontology/__init__.py
new file mode 100644
index 0000000..bd85488
--- /dev/null
+++ b/src/ontology/__init__.py
@@ -0,0 +1,15 @@
+from .variant_ontology import (
+    NormalizationResult,
+    BaseNormalizer,
+    RSIDNormalizer,
+    StarAlleleNormalizer,
+)
+from .drug_ontology import DrugNormalizer
+
+__all__ = [
+    "NormalizationResult",
+    "BaseNormalizer",
+    "RSIDNormalizer",
+    "StarAlleleNormalizer",
+    "DrugNormalizer",
+]
diff --git a/src/ontology_module/drug_ontology.py b/src/ontology/drug_ontology.py
similarity index 59%
rename from src/ontology_module/drug_ontology.py
rename to src/ontology/drug_ontology.py
index 2ff477f..53c0f4a 100644
--- a/src/ontology_module/drug_ontology.py
+++ b/src/ontology/drug_ontology.py
@@ -1,31 +1,29 @@
-
-
 from typing import Optional
 import logging
 from .variant_ontology import BaseNormalizer, NormalizationResult
 
 import requests
 
-# how to use, you have thew following, 
+# how to use, you have thew following,
 
 
 logger = logging.getLogger(__name__)
 
+
 class DrugNormalizer(BaseNormalizer):
     """Normalizes drug names, and connect to common ID's per use."""
 
     def __init__(self):
         super().__init__()
-        
-        self.register_handler(self.lookup_drug_pubchem)
-        
-
 
-        #TODO: insert logic to handle base generic instead of what we have 
+        self.register_handler(self.lookup_drug_pubchem)
 
+        # TODO: insert logic to handle base generic instead of what we have
 
         self.register_handler(self.lookup_drug_pharmgkb)
-        # register the pubchem first before I register the other. 
+        self.register_handler(self.lookup_drug_rxnorm)
+        # register the pubchem first before I register the other.
+
     def name(self):
         return "Drug Normalizer"
 
@@ -68,8 +66,8 @@ def lookup_drug_pubchem(self, raw: str) -> Optional[NormalizationResult]:
                 metadata={
                     "cid": cid,
                     "molecular_formula": props.get("MolecularFormula"),
-                    "canonical_smiles": props.get("CanonicalSMILES")
-                }
+                    "canonical_smiles": props.get("CanonicalSMILES"),
+                },
             )
 
         except requests.RequestException as exc:
@@ -78,6 +76,7 @@ def lookup_drug_pubchem(self, raw: str) -> Optional[NormalizationResult]:
             logger.warning("Unexpected error for '%s': %s", raw, exc)
 
         return None
+
     def get_generic_from_brand_pubchem(self, raw: str) -> Optional[str]:
         """
         Resolves a brand name to a generic (IUPAC) name using PubChem.
@@ -88,13 +87,12 @@ def get_generic_from_brand_pubchem(self, raw: str) -> Optional[str]:
             return result.normalized_output
         return None
 
-
     def lookup_drug_pharmgkb(self, raw: str) -> Optional[NormalizationResult]:
         """
         Lookup drug info from PharmGKB using its REST API.
         Returns all available metadata without filtering.
         """
-        query = raw.strip().lower()  
+        query = raw.strip().lower()
         if not query:
             logger.debug("Empty drug input for PharmGKB lookup.")
             return None
@@ -121,15 +119,18 @@ def lookup_drug_pharmgkb(self, raw: str) -> Optional[NormalizationResult]:
                 normalized_output=entry.get("name", raw),
                 entity_type="drug",
                 source="PharmGKB",
-                metadata=entry  # Store the entire returned dictionary
+                metadata=entry,  # Store the entire returned dictionary
             )
 
         except requests.RequestException as exc:
             logger.warning("PharmGKB request failed for '%s': %s", raw, exc)
         except Exception as exc:
-            logger.warning("Unexpected error during PharmGKB lookup for '%s': %s", raw, exc)
+            logger.warning(
+                "Unexpected error during PharmGKB lookup for '%s': %s", raw, exc
+            )
 
         return None
+
     def lookup_drug_rxnorm(self, raw: str) -> Optional[NormalizationResult]:
         """
         Resolves a drug name (brand or generic) using the RxNorm API.
@@ -153,7 +154,9 @@ def lookup_drug_rxnorm(self, raw: str) -> Optional[NormalizationResult]:
             rxcui = rxcui_list[0]
 
             # Step 2: Get related ingredient (generic) names from RxCUI
-            related_url = f"https://rxnav.nlm.nih.gov/REST/rxcui/{rxcui}/related.json?tty=IN"
+            related_url = (
+                f"https://rxnav.nlm.nih.gov/REST/rxcui/{rxcui}/related.json?tty=IN"
+            )
             related_resp = requests.get(related_url, timeout=5)
             related_resp.raise_for_status()
             related_data = related_resp.json()
@@ -174,10 +177,7 @@ def lookup_drug_rxnorm(self, raw: str) -> Optional[NormalizationResult]:
                 normalized_output=ingredients[0],  # first generic match
                 entity_type="drug",
                 source="RxNorm",
-                metadata={
-                    "rxcui": rxcui,
-                    "generic_candidates": ingredients
-                }
+                metadata={"rxcui": rxcui, "generic_candidates": ingredients},
             )
 
         except requests.RequestException as exc:
@@ -188,8 +188,6 @@ def lookup_drug_rxnorm(self, raw: str) -> Optional[NormalizationResult]:
         return None
 
 
-
-
 def test_lookup_pubchem():
     normalizer = DrugNormalizer()
     drug = "Imatinib"
@@ -237,11 +235,145 @@ def test_lookup_pharmgkb():
         assert "id" in result.metadata
 
 
+def extract_drugs_from_annotations():
+    """
+    Extract and normalize drugs from annotation files.
+    This demonstrates drug normalization from real annotation data.
+    """
+    import json
+    import os
+    import re
+    from typing import Set, List, Dict, Any
+
+    drug_normalizer = DrugNormalizer()
+
+    annotation_dir = (
+        "/Users/shloknatarajan/stanford/research/daneshjou/AutoGKB/data/annotations"
+    )
+    if not os.path.exists(annotation_dir):
+        print(f"❌ Annotation directory not found: {annotation_dir}")
+        return
+
+    drugs_found: Set[str] = set()
+    normalized_results: List[Dict[str, Any]] = []
+
+    print("🔍 Scanning annotation files for drugs...")
+
+    # Common drug name patterns to look for
+    drug_patterns = [
+        r"\b(?:warfarin|imatinib|gleevec|sitagliptin|gliclazide|metformin|edoxaban)\b",
+        r"\b\w+mab\b",  # monoclonal antibodies
+        r"\b\w+ine\b",  # many drugs end in -ine
+        r"\b\w+ol\b",  # many drugs end in -ol
+    ]
+
+    # Scan all annotation files
+    for filename in os.listdir(annotation_dir):
+        if not filename.endswith(".json"):
+            continue
+
+        filepath = os.path.join(annotation_dir, filename)
+        try:
+            with open(filepath, "r") as f:
+                data = json.load(f)
+
+            # Extract drugs from title and content
+            text_content = data.get("title", "") + " "
+
+            # Also check study parameters for drug mentions
+            if "study_parameters" in data:
+                for section in data["study_parameters"].values():
+                    if isinstance(section, dict) and "content" in section:
+                        if isinstance(section["content"], str):
+                            text_content += section["content"] + " "
+                        elif isinstance(section["content"], list):
+                            text_content += (
+                                " ".join(str(item) for item in section["content"]) + " "
+                            )
+
+            # Apply drug patterns
+            for pattern in drug_patterns:
+                matches = re.findall(pattern, text_content, re.IGNORECASE)
+                drugs_found.update(match.lower() for match in matches)
+
+        except Exception as e:
+            print(f"⚠️  Error processing {filename}: {e}")
+
+    print(f"📊 Found {len(drugs_found)} potential drug names")
+
+    # Normalize each drug
+    for drug in drugs_found:
+        if len(drug) < 3:  # Skip very short matches
+            continue
+
+        print(f"\n💊 Processing drug: {drug}")
+
+        result = drug_normalizer.normalize(drug)
+
+        if result:
+            print(f"✅ Normalization successful:")
+            print(f"   Raw: {result.raw_input}")
+            print(f"   Normalized: {result.normalized_output}")
+            print(f"   Source: {result.source}")
+            print(f"   Type: {result.entity_type}")
+
+            if result.metadata:
+                if "cid" in result.metadata:
+                    print(f"   PubChem CID: {result.metadata['cid']}")
+                if "molecular_formula" in result.metadata:
+                    print(f"   Formula: {result.metadata['molecular_formula']}")
+
+            normalized_results.append({"raw_drug": drug, "result": result})
+        else:
+            print(f"❌ No normalization found for {drug}")
+
+    print(
+        f"\n📈 Summary: {len(normalized_results)}/{len(drugs_found)} drugs successfully normalized"
+    )
+    return normalized_results
+
+
+def test_drug_normalizers():
+    """Test drug normalizer with sample data"""
+    print("\n" + "=" * 50)
+    print("🧪 TESTING DRUG NORMALIZERS")
+    print("=" * 50)
+
+    drug_normalizer = DrugNormalizer()
+    test_drugs = ["imatinib", "Gleevec", "warfarin", "sitagliptin", "metformin"]
+
+    for drug in test_drugs:
+        print(f"\n💊 Testing {drug}:")
+        result = drug_normalizer.normalize(drug)
+        if result:
+            print(f"  ✅ Found: {result.normalized_output} from {result.source}")
+            if result.metadata:
+                if "cid" in result.metadata:
+                    print(f"  🆔 PubChem CID: {result.metadata['cid']}")
+                if "molecular_formula" in result.metadata:
+                    print(f"  🧪 Formula: {result.metadata['molecular_formula']}")
+        else:
+            print(f"  ❌ Not found")
+
+
 if __name__ == "__main__":
-    test_lookup_pubchem()
-    
-    test_lookup_pharmgkb()
-    normalizer = DrugNormalizer()
-    result = normalizer.lookup_drug_rxnorm("Gleevec")
-    print(result.normalized_output)  # → "imatinib"
+    logging.basicConfig(level=logging.INFO)
+
+    print("🎯 AutoGKB Drug Ontology Normalization System")
+    print("=" * 60)
 
+    # Test individual drug normalizers first
+    test_drug_normalizers()
+
+    # Then demonstrate with real annotation data
+    print("\n" + "=" * 50)
+    print("📋 PROCESSING ANNOTATION DATA FOR DRUGS")
+    print("=" * 50)
+
+    results = extract_drugs_from_annotations()
+
+    if results:
+        print(f"\n🎉 Successfully processed annotation data!")
+        print(f"   Normalized {len(results)} drugs")
+    else:
+        print("\n⚠️  No results from annotation processing")
diff --git a/src/ontology/variant_ontology.py b/src/ontology/variant_ontology.py
new file mode 100644
index 0000000..c19af44
--- /dev/null
+++ b/src/ontology/variant_ontology.py
@@ -0,0 +1,422 @@
+from abc import ABC, abstractmethod
+from typing import Callable, Dict, Optional, Any, List
+from dataclasses import dataclass, field
+import logging
+from Bio import Entrez
+import requests
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class NormalizationResult:
+    raw_input: str
+    normalized_output: str
+    entity_type: str  # e.g. "variant", "gene", "drug", etc.
+    source: str  # where the normalized info came from
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+    @classmethod
+    def from_dict(cls, data: dict) -> "NormalizationResult":
+        return cls(
+            raw_input=data["raw_input"],
+            normalized_output=data["normalized_output"],
+            entity_type=data.get("entity_type", "unknown"),
+            source=data["source"],
+            metadata=data.get("metadata", {}),
+        )
+
+
+class BaseNormalizer(ABC):
+    def __init__(self):
+        self._handlers: list[Callable[[str], Optional[dict]]] = []
+
+    def register_handler(self, handler: Callable[[str], Optional[dict]]):
+        self._handlers.append(handler)
+
+    def normalize(self, raw: str) -> Optional["NormalizationResult"]:
+        for handler in self._handlers:
+            try:
+                result = handler(raw)
+                if result:
+                    return result  # Assuming result is already a NormalizedEntity
+            except Exception as e:
+                logger.exception(
+                    f"Handler '{handler.__name__}' failed on input: '{raw}'"
+                )
+        return None
+
+    @abstractmethod
+    def name(self) -> str:
+        pass
+
+
+class RSIDNormalizer(BaseNormalizer):
+    def __init__(self, email: str, api_key: Optional[str] = None):
+        super().__init__()
+        Entrez.email = email
+        if api_key:
+            Entrez.api_key = api_key
+
+        self.register_handler(self.lookup_dbsnp)
+        self.register_handler(self.lookup_pharmgkb_id)
+
+    def name(self) -> str:
+        return "RSIDNormalizer"
+
+    def lookup_dbsnp(self, raw: str) -> Optional[NormalizationResult]:
+        rsid = raw.lower().strip()
+        if not rsid.startswith("rs") or not rsid[2:].isdigit():
+            return None
+
+        try:
+            handle = Entrez.esummary(db="snp", id=rsid[2:], retmode="json")
+            response = handle.read()
+            handle.close()
+
+            # Convert JSON string to Python dict
+            import json
+
+            data = json.loads(response)
+
+            record = data.get("result", {}).get(rsid[2:])
+            if not record:
+                return None
+
+            return NormalizationResult(
+                raw_input=raw,
+                normalized_output=rsid,
+                entity_type="variant",
+                source="dbSNP",
+                metadata=record,
+            )
+
+        except Exception:
+            logger.exception(f"lookup_dbsnp failed for {raw}")
+            return None
+
+    def lookup_pharmgkb_id(self, raw: str) -> Optional[NormalizationResult]:
+        logger.debug(f"Looking up PharmGKB variant by symbol: {raw}")
+
+        base_url = "https://api.pharmgkb.org/v1/data/variant"
+        params = {"symbol": raw.strip(), "view": "max"}
+
+        try:
+            response = requests.get(base_url, params=params, timeout=10)
+            if response.status_code != 200:
+                logger.warning(
+                    f"PharmGKB lookup failed ({response.status_code}) for {raw}"
+                )
+                return None
+
+            data = response.json()
+            records = data.get("data", [])
+            if not records:
+                logger.info(f"No PharmGKB variant match for symbol: {raw}")
+                return None
+
+            variant = records[0]
+
+            # Extract only required fields
+            normalized_output = variant.get("id")
+            entity_type = "variant"
+            source = "PharmGKB"
+
+            # Remove known fields so everything else is dumped into metadata
+            metadata = {k: v for k, v in variant.items() if k not in {"id"}}
+
+            return NormalizationResult(
+                raw_input=raw,
+                normalized_output=normalized_output,
+                entity_type=entity_type,
+                source=source,
+                metadata=metadata,
+            )
+
+        except Exception:
+            logger.exception(f"PharmGKB symbol lookup failed for {raw}")
+            return None
+
+
+class StarAlleleNormalizer(BaseNormalizer):
+    API_URL = "https://clinicaltables.nlm.nih.gov/api/star_alleles/v3/search"
+
+    def __init__(self):
+        super().__init__()
+        self.register_handler(self.lookup_star_allele)
+
+    def name(self):
+        return "Star Allele Normalizer"
+
+    def lookup_star_allele(self, raw: str) -> Optional[NormalizationResult]:
+        """
+        Normalize a star allele (e.g., CYP2D6*4) using the Clinical Tables API.
+        Returns a NormalizationResult with detailed metadata.
+        """
+        query = raw.strip()
+        if not query:
+            logger.debug("Empty star allele input, skipping.")
+            return None
+
+        try:
+            alleles = self.fetch_star_alleles(query, max_results=1)
+            if not alleles:
+                logger.debug("No star allele found for input: %s", query)
+                return None
+
+            allele_data = alleles[0]
+
+            return NormalizationResult(
+                raw_input=raw,
+                normalized_output=allele_data.get("StarAlleleName", query),
+                entity_type="variant",
+                source="PharmVar/Clinical Tables",
+                metadata=allele_data,
+            )
+
+        except Exception as exc:
+            logger.warning("Star allele lookup failed for '%s': %s", raw, exc)
+            return None
+
+    def fetch_star_alleles(
+        self, query: str, max_results: int = 50
+    ) -> List[Dict[str, Any]]:
+        """
+        Fetches all star allele records matching the query string from the PharmVar-backed Clinical Tables API.
+        Returns a list of dictionaries, one per allele, with all available fields populated.
+        """
+        fields = [
+            "StarAlleleName",
+            "GenBank",
+            "ProteinAffected",
+            "cDNANucleotideChanges",
+            "GeneNucleotideChange",
+            "XbaIHaplotype",
+            "RFLP",
+            "OtherNames",
+            "ProteinChange",
+            "InVivoEnzymeActivity",
+            "InVitroEnzymeActivity",
+            "References",
+            "ClinicalPhenotype",
+            "Notes",
+        ]
+
+        params = {"terms": query, "count": max_results, "ef": ",".join(fields)}
+
+        try:
+            response = requests.get(self.API_URL, params=params, timeout=10)
+            response.raise_for_status()
+        except Exception as e:
+            logger.error(f"API request failed: {e}")
+            return []
+
+        try:
+            total_count, allele_names, extra_fields, *_ = response.json()
+        except Exception as e:
+            logger.error(f"Failed to parse API response: {e}")
+            return []
+
+        results = []
+        for i, allele in enumerate(allele_names):
+            allele_info = {"StarAlleleName": allele}
+            for field, values in extra_fields.items():
+                allele_info[field] = values[i] if i < len(values) else None
+            results.append(allele_info)
+
+        return results
+
+    # def fetch_star_alleles(self, term: str) -> list[dict]:
+    #     """
+    #     Searches for star alleles matching a term and retrieves full metadata for each.
+
+    #     Args:
+    #         term (str): The star allele search string (e.g., "CYP2D6*4").
+
+    #     Returns:
+    #         list[dict]: Each dict contains all metadata fields for a matched star allele.
+    #     """
+    #     base_url = "https://clinicaltables.nlm.nih.gov/api/star_alleles/v3/search"
+    #     fields = [
+    #         "StarAlleleName", "GenBank", "ProteinAffected", "cDNANucleotideChanges",
+    #         "GeneNucleotideChange", "ProteinChange", "OtherNames",
+    #         "InVivoEnzymeActivity", "InVitroEnzymeActivity", "References",
+    #         "ClinicalPhenotype", "Notes"
+    #     ]
+
+    #     params = {
+    #         "terms": term,
+    #         "ef": ",".join(fields),
+    #         "maxList": "50"
+    #     }
+
+    #     response = requests.get(base_url, params=params)
+    #     response.raise_for_status()
+    #     data = response.json()
+
+    #     if not data or len(data) < 3:
+    #         return []
+
+    #     codes = data[1]
+    #     extra_fields = data[2]
+
+    #     results = []
+    #     for i, code in enumerate(codes):
+    #         allele_data = {field: extra_fields.get(field, [None])[i] for field in fields}
+    #         results.append(allele_data)
+
+    #     return results
+
+
+def extract_variants_from_annotations():
+    """
+    Extract and normalize variants from annotation files.
+    This demonstrates the core functionality for mapping variants to normalized ontologies.
+    """
+    import json
+    import os
+    import re
+    from typing import Set, List, Dict, Any
+
+    # Initialize normalizers
+    rsid_normalizer = RSIDNormalizer(email="test@example.com")
+    star_normalizer = StarAlleleNormalizer()
+
+    annotation_dir = (
+        "/Users/shloknatarajan/stanford/research/daneshjou/AutoGKB/data/annotations"
+    )
+    if not os.path.exists(annotation_dir):
+        print(f"❌ Annotation directory not found: {annotation_dir}")
+        return
+
+    variants_found: Set[str] = set()
+    normalized_results: List[Dict[str, Any]] = []
+
+    print("🔍 Scanning annotation files for variants...")
+
+    # Scan all annotation files
+    for filename in os.listdir(annotation_dir):
+        if not filename.endswith(".json"):
+            continue
+
+        filepath = os.path.join(annotation_dir, filename)
+        try:
+            with open(filepath, "r") as f:
+                data = json.load(f)
+
+            # Extract polymorphisms from annotations
+            if "annotations" in data and "relationships" in data["annotations"]:
+                for relationship in data["annotations"]["relationships"]:
+                    polymorphism = relationship.get("polymorphism", "")
+
+                    # Extract rsIDs (rs followed by numbers)
+                    rsids = re.findall(r"rs\d+", polymorphism)
+                    variants_found.update(rsids)
+
+                    # Extract star alleles (gene*number pattern)
+                    star_alleles = re.findall(r"[A-Z0-9]+\*\d+", polymorphism)
+                    variants_found.update(star_alleles)
+
+        except Exception as e:
+            print(f"⚠️  Error processing {filename}: {e}")
+
+    print(f"📊 Found {len(variants_found)} unique variants")
+
+    # Normalize each variant
+    for variant in variants_found:
+        print(f"\n🧬 Processing variant: {variant}")
+
+        result = None
+        normalizer_used = None
+
+        # Try rsID normalization first
+        if variant.startswith("rs"):
+            result = rsid_normalizer.normalize(variant)
+            normalizer_used = "RSIDNormalizer"
+
+        # Try star allele normalization if rsID didn't work
+        if not result and "*" in variant:
+            result = star_normalizer.normalize(variant)
+            normalizer_used = "StarAlleleNormalizer"
+
+        if result:
+            print(f"✅ {normalizer_used} successful:")
+            print(f"   Raw: {result.raw_input}")
+            print(f"   Normalized: {result.normalized_output}")
+            print(f"   Source: {result.source}")
+            print(f"   Type: {result.entity_type}")
+
+            normalized_results.append(
+                {
+                    "raw_variant": variant,
+                    "normalizer": normalizer_used,
+                    "result": result,
+                }
+            )
+        else:
+            print(f"❌ No normalization found for {variant}")
+
+    print(
+        f"\n📈 Summary: {len(normalized_results)}/{len(variants_found)} variants successfully normalized"
+    )
+    return normalized_results
+
+
+def test_individual_normalizers():
+    """Test each normalizer with sample data"""
+    print("\n" + "=" * 50)
+    print("🧪 TESTING INDIVIDUAL NORMALIZERS")
+    print("=" * 50)
+
+    # Test RSIDNormalizer
+    print("\n🧬 Testing RSIDNormalizer:")
+    rsid_normalizer = RSIDNormalizer(email="test@example.com")
+    test_rsids = ["rs1799853", "rs1057910", "rs9923231"]
+
+    for rsid in test_rsids:
+        print(f"\n Testing {rsid}:")
+        result = rsid_normalizer.normalize(rsid)
+        if result:
+            print(f"  ✅ Found: {result.normalized_output} from {result.source}")
+        else:
+            print(f"  ❌ Not found")
+
+    # Test StarAlleleNormalizer
+    print("\n⭐ Testing StarAlleleNormalizer:")
+    star_normalizer = StarAlleleNormalizer()
+    test_alleles = ["CYP2D6*4", "CYP2C9*2", "CYP2C9*3"]
+
+    for allele in test_alleles:
+        print(f"\n Testing {allele}:")
+        result = star_normalizer.normalize(allele)
+        if result:
+            print(f"  ✅ Found: {result.normalized_output} from {result.source}")
+            if result.metadata:
+                activity = result.metadata.get("InVivoEnzymeActivity")
+                if activity:
+                    print(f"  📊 Activity: {activity}")
+        else:
+            print(f"  ❌ Not found")
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+
+    print("🎯 AutoGKB Variant Ontology Normalization System")
+    print("=" * 60)
+
+    # Test individual normalizers first
+    test_individual_normalizers()
+
+    # Then demonstrate with real annotation data
+    print("\n" + "=" * 50)
+    print("📋 PROCESSING ANNOTATION DATA")
+    print("=" * 50)
+
+    results = extract_variants_from_annotations()
+
+    if results:
+        print(f"\n🎉 Successfully processed annotation data!")
+        print(f"   Normalized {len(results)} variants")
+    else:
+        print("\n⚠️  No results from annotation processing")
diff --git a/src/ontology_module/__init__.py b/src/ontology_module/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/ontology_module/variant_ontology.py b/src/ontology_module/variant_ontology.py
deleted file mode 100644
index de67242..0000000
--- a/src/ontology_module/variant_ontology.py
+++ /dev/null
@@ -1,242 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Callable,Dict, Optional, Any, List
-from dataclasses import dataclass, field 
-import logging
-from Bio import Entrez
-import requests
-
-logger = logging.getLogger(__name__)
-
-@dataclass
-class NormalizationResult:
-    raw_input: str
-    normalized_output: str
-    entity_type: str         # e.g. "variant", "gene", "drug", etc.
-    source: str              # where the normalized info came from
-    metadata: Dict[str, Any] = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict) -> "NormalizationResult":
-        return cls(
-            raw_input=data["raw_input"],
-            normalized_output=data["normalized_output"],
-            entity_type=data.get("entity_type", "unknown"),
-            source=data["source"],
-            metadata=data.get("metadata", {})
-        )
-
-class BaseNormalizer(ABC):
-    def __init__(self):
-        self._handlers: list[Callable[[str], Optional[dict]]] = []
-
-    def register_handler(self, handler: Callable[[str], Optional[dict]]):
-        self._handlers.append(handler)
-
-    def normalize(self, raw: str) -> Optional["NormalizationResult"]:
-        for handler in self._handlers:
-            try:
-                result = handler(raw)
-                if result:
-                    return result  # Assuming result is already a NormalizedEntity
-            except Exception as e:
-                logger.exception(f"Handler '{handler.__name__}' failed on input: '{raw}'")
-        return None
-
-    @abstractmethod
-    def name(self) -> str:
-        pass
-
-    
-
-
-
-class RSIDNormalizer(BaseNormalizer):
-    def __init__(self, email: str, api_key: Optional[str] = None):
-        super().__init__()
-        Entrez.email = email
-        if api_key:
-            Entrez.api_key = api_key
-
-        self.register_handler(self.lookup_dbsnp)
-        self.register_handler(self.lookup_pharmgkb_id)
-
-    def name(self) -> str:
-        return "RSIDNormalizer"
-
-    def lookup_dbsnp(self, raw: str) -> Optional[NormalizationResult]:
-        rsid = raw.lower().strip()
-        if not rsid.startswith("rs") or not rsid[2:].isdigit():
-            return None
-
-        try:
-            handle = Entrez.esummary(db="snp", id=rsid[2:], retmode="json")
-            response = handle.read()
-            handle.close()
-
-            # Convert JSON string to Python dict
-            import json
-            data = json.loads(response)
-
-            record = data.get("result", {}).get(rsid[2:])
-            if not record:
-                return None
-
-            return NormalizationResult(
-                raw_input=raw,
-                normalized_output=rsid,
-                entity_type="variant",
-                source="dbSNP",
-                metadata=record
-            )
-
-        except Exception:
-            logger.exception(f"lookup_dbsnp failed for {raw}")
-            return None
-
-    def lookup_pharmgkb_id(self, raw: str) -> Optional[NormalizationResult]:
-        logger.debug(f"Looking up PharmGKB variant by symbol: {raw}")
-
-        base_url = "https://api.pharmgkb.org/v1/data/variant"
-        params = {
-            "symbol": raw.strip(),
-            "view": "max"
-        }
-
-        try:
-            response = requests.get(base_url, params=params, timeout=10)
-            if response.status_code != 200:
-                logger.warning(f"PharmGKB lookup failed ({response.status_code}) for {raw}")
-                return None
-
-            data = response.json()
-            records = data.get("data", [])
-            if not records:
-                logger.info(f"No PharmGKB variant match for symbol: {raw}")
-                return None
-
-            variant = records[0]
-
-            # Extract only required fields
-            normalized_output = variant.get("id")
-            entity_type = "variant"
-            source = "PharmGKB"
-
-            # Remove known fields so everything else is dumped into metadata
-            metadata = {k: v for k, v in variant.items() if k not in {"id"}}
-
-            return NormalizationResult(
-                raw_input=raw,
-                normalized_output=normalized_output,
-                entity_type=entity_type,
-                source=source,
-                metadata=metadata
-            )
-
-        except Exception:
-            logger.exception(f"PharmGKB symbol lookup failed for {raw}")
-            return None
-        
-class StarAlleleNormalizer(BaseNormalizer):
-    API_URL = "https://clinicaltables.nlm.nih.gov/api/star_alleles/v3/search"
-
-    def __init__(self):
-        pass
-    def name(self):
-        return "Star Allele Normalizer"
- 
-
-       
-
-    def fetch_star_alleles(self, query: str, max_results: int = 50) -> List[Dict[str, Any]]:
-        """
-        Fetches all star allele records matching the query string from the PharmVar-backed Clinical Tables API.
-        Returns a list of dictionaries, one per allele, with all available fields populated.
-        """
-        fields = [
-            "StarAlleleName", "GenBank", "ProteinAffected", "cDNANucleotideChanges",
-            "GeneNucleotideChange", "XbaIHaplotype", "RFLP", "OtherNames", "ProteinChange",
-            "InVivoEnzymeActivity", "InVitroEnzymeActivity", "References",
-            "ClinicalPhenotype", "Notes"
-        ]
-
-        params = {
-            "terms": query,
-            "count": max_results,
-            "ef": ",".join(fields)
-        }
-
-        try:
-            response = requests.get(self.API_URL, params=params, timeout=10)
-            response.raise_for_status()
-        except Exception as e:
-            logger.error(f"API request failed: {e}")
-            return []
-
-        try:
-            total_count, allele_names, extra_fields, *_ = response.json()
-        except Exception as e:
-            logger.error(f"Failed to parse API response: {e}")
-            return []
-
-        results = []
-        for i, allele in enumerate(allele_names):
-            allele_info = {
-                "StarAlleleName": allele
-            }
-            for field, values in extra_fields.items():
-                allele_info[field] = values[i] if i < len(values) else None
-            results.append(allele_info)
-
-        return results
-    # def fetch_star_alleles(self, term: str) -> list[dict]:
-    #     """
-    #     Searches for star alleles matching a term and retrieves full metadata for each.
-
-    #     Args:
-    #         term (str): The star allele search string (e.g., "CYP2D6*4").
-
-    #     Returns:
-    #         list[dict]: Each dict contains all metadata fields for a matched star allele.
-    #     """
-    #     base_url = "https://clinicaltables.nlm.nih.gov/api/star_alleles/v3/search"
-    #     fields = [
-    #         "StarAlleleName", "GenBank", "ProteinAffected", "cDNANucleotideChanges",
-    #         "GeneNucleotideChange", "ProteinChange", "OtherNames",
-    #         "InVivoEnzymeActivity", "InVitroEnzymeActivity", "References",
-    #         "ClinicalPhenotype", "Notes"
-    #     ]
-
-    #     params = {
-    #         "terms": term,
-    #         "ef": ",".join(fields),
-    #         "maxList": "50"
-    #     }
-
-    #     response = requests.get(base_url, params=params)
-    #     response.raise_for_status()
-    #     data = response.json()
-
-    #     if not data or len(data) < 3:
-    #         return []
-
-    #     codes = data[1]
-    #     extra_fields = data[2]
-
-    #     results = []
-    #     for i, code in enumerate(codes):
-    #         allele_data = {field: extra_fields.get(field, [None])[i] for field in fields}
-    #         results.append(allele_data)
-
-    #     return results
-        
-
-if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
-    normalizer = StarAlleleNormalizer()
-    data = normalizer.fetch_star_alleles("CYP2D6*4")
-
-    for record in data:
-        print("\n--- Star Allele Record ---")
-        for k, v in record.items():
-            print(f"{k}: {v}")
-
diff --git a/src/study_parameters.py b/src/study_parameters.py
index 0458f98..f88b605 100644
--- a/src/study_parameters.py
+++ b/src/study_parameters.py
@@ -18,27 +18,28 @@ def parse_bullets_to_list(text: str) -> List[str]:
     """Parse bulleted text into a list of strings."""
     if not text or not text.strip():
         return []
-    
+
     # Split by common bullet patterns
-    lines = text.strip().split('\n')
+    lines = text.strip().split("\n")
     bullets = []
-    
+
     for line in lines:
         line = line.strip()
         if not line:
             continue
-        
+
         # Remove common bullet markers (•, -, numbers) but preserve markdown asterisks
-        cleaned_line = re.sub(r'^[\s]*[\•\-\d+\.\)\]\s]+[\s]*', '', line)
+        cleaned_line = re.sub(r"^[\s]*[\•\-\d+\.\)\]\s]+[\s]*", "", line)
         # Also remove standalone asterisks that are bullet markers (not part of markdown)
-        cleaned_line = re.sub(r'^[\s]*\*[\s]+', '', cleaned_line)
-        
+        cleaned_line = re.sub(r"^[\s]*\*[\s]+", "", cleaned_line)
+
         if cleaned_line:
             bullets.append(cleaned_line)
-    
+
     # If no bullets were found, return the original text as a single item
     return bullets if bullets else [text.strip()]
 
+
 class StudyParameters(BaseModel):
     summary: ParameterWithCitations
     study_type: ParameterWithCitations
@@ -48,8 +49,10 @@ class StudyParameters(BaseModel):
     allele_frequency: ParameterWithCitations
     additional_resource_links: List[str]
 
+
 bulleted_output_queue = "Format the response as a bulleted list. Keep each bullet point concise (1-2 sentences maximum). If the format of the response is term: value, then have the term bolded (**term**) and the value in plain text. Do not include any other text and use markdown formatting for your response."
 
+
 class StudyParametersGenerator:
     """
     Generator for extracting study parameters from PMC articles
@@ -70,7 +73,9 @@ def __init__(self, pmcid: str, model: str = "gpt-4o"):
     def get_summary(self) -> str:
         """Extract a short 2-3 sentence summary of the study."""
         prompt = "Provide a short 2-3 sentence summary of the study motivation, design, and results."
-        output_queues = "Format the response as a short paragraph without using any bullet points."
+        output_queues = (
+            "Format the response as a short paragraph without using any bullet points."
+        )
         return self.generator.generate(prompt + output_queues)
 
     def get_study_type(self) -> str:

From 5595f0a1a4a0d53e1bc79b1f8c2ed383b6926aac Mon Sep 17 00:00:00 2001
From: Shlok Natarajan <shlok.natarajan@gmail.com>
Date: Tue, 5 Aug 2025 14:05:32 -0700
Subject: [PATCH 02/10] chore: switched to loguru

---
 src/ontology/drug_ontology.py    | 5 ++---
 src/ontology/variant_ontology.py | 5 ++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/ontology/drug_ontology.py b/src/ontology/drug_ontology.py
index 53c0f4a..436017b 100644
--- a/src/ontology/drug_ontology.py
+++ b/src/ontology/drug_ontology.py
@@ -1,5 +1,5 @@
 from typing import Optional
-import logging
+from loguru import logger
 from .variant_ontology import BaseNormalizer, NormalizationResult
 
 import requests
@@ -7,7 +7,6 @@
 # how to use, you have thew following,
 
 
-logger = logging.getLogger(__name__)
 
 
 class DrugNormalizer(BaseNormalizer):
@@ -357,7 +356,7 @@ def test_drug_normalizers():
 
 
 if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
+    pass
 
     print("🎯 AutoGKB Drug Ontology Normalization System")
     print("=" * 60)
diff --git a/src/ontology/variant_ontology.py b/src/ontology/variant_ontology.py
index c19af44..d098960 100644
--- a/src/ontology/variant_ontology.py
+++ b/src/ontology/variant_ontology.py
@@ -1,11 +1,10 @@
 from abc import ABC, abstractmethod
 from typing import Callable, Dict, Optional, Any, List
 from dataclasses import dataclass, field
-import logging
+from loguru import logger
 from Bio import Entrez
 import requests
 
-logger = logging.getLogger(__name__)
 
 
 @dataclass
@@ -400,7 +399,7 @@ def test_individual_normalizers():
 
 
 if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
+    pass
 
     print("🎯 AutoGKB Variant Ontology Normalization System")
     print("=" * 60)

From 8135cb46766c255f88b82a2d188a560ccba3cdad Mon Sep 17 00:00:00 2001
From: Shlok Natarajan <shlok.natarajan@gmail.com>
Date: Tue, 5 Aug 2025 14:06:37 -0700
Subject: [PATCH 03/10] fix: removed absolute path

---
 src/ontology/variant_ontology.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ontology/variant_ontology.py b/src/ontology/variant_ontology.py
index d098960..87ff62e 100644
--- a/src/ontology/variant_ontology.py
+++ b/src/ontology/variant_ontology.py
@@ -282,7 +282,7 @@ def extract_variants_from_annotations():
     star_normalizer = StarAlleleNormalizer()
 
     annotation_dir = (
-        "/Users/shloknatarajan/stanford/research/daneshjou/AutoGKB/data/annotations"
+        "data/annotations"
     )
     if not os.path.exists(annotation_dir):
         print(f"❌ Annotation directory not found: {annotation_dir}")

From bbf8aa051cde291fa2e8028003c43bab5e63310f Mon Sep 17 00:00:00 2001
From: Shlok Natarajan <shlok.natarajan@gmail.com>
Date: Tue, 5 Aug 2025 14:12:18 -0700
Subject: [PATCH 04/10] feat: ontology readme

---
 src/ontology/README.md | 179 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 179 insertions(+)
 create mode 100644 src/ontology/README.md

diff --git a/src/ontology/README.md b/src/ontology/README.md
new file mode 100644
index 0000000..79087de
--- /dev/null
+++ b/src/ontology/README.md
@@ -0,0 +1,179 @@
+# Ontology Module
+
+This module provides normalization and standardization capabilities for biological entities (variants and drugs) used in the AutoGKB system. It enables consistent identification and mapping of genetic variants and pharmaceutical compounds to standardized ontologies and databases.
+
+## Overview
+
+The ontology module contains normalizers that:
+- **Standardize variant nomenclature** using dbSNP, PharmGKB, and star allele databases
+- **Normalize drug names** using PubChem, PharmGKB, and RxNorm APIs
+- **Provide consistent data structures** for normalized results
+- **Handle multiple input formats** (rsIDs like "rs1799853", star alleles like "CYP2D6*4", brand/generic drug names like "Gleevec"/"imatinib") and return structured metadata like PubChem CIDs, SMILES strings, enzyme activity levels, PharmGKB IDs, and molecular formulas
+
+## Module Structure
+
+```
+src/ontology/
+├── __init__.py           # Module exports
+├── variant_ontology.py   # Variant normalization classes
+├── drug_ontology.py      # Drug normalization classes
+└── README.md            # This file
+```
+
+## Core Classes
+
+### Data Structures
+
+#### `NormalizationResult`
+A dataclass that standardizes normalization outputs across all normalizers.
+
+**Fields:**
+- `raw_input: str` - Original input string
+- `normalized_output: str` - Standardized/normalized result
+- `entity_type: str` - Type of entity ("variant", "drug", etc.)
+- `source: str` - Database/API source of normalization
+- `metadata: Dict[str, Any]` - Additional structured data
+
+### Base Classes
+
+#### `BaseNormalizer` (Abstract)
+Base class for all normalizers providing:
+- Handler registration system
+- Standardized normalization workflow
+- Error handling and logging
+- Abstract `name()` method
+
+## Variant Normalization
+
+### `RSIDNormalizer`
+Normalizes rsID variants (e.g., `rs1799853`) using:
+- **dbSNP** via NCBI Entrez API
+- **PharmGKB** variant database
+
+**Usage:**
+```python
+from src.ontology import RSIDNormalizer
+
+normalizer = RSIDNormalizer(email="your@email.com", api_key="optional")
+result = normalizer.normalize("rs1799853")
+```
+
+### `StarAlleleNormalizer`
+Normalizes star allele nomenclature (e.g., `CYP2D6*4`) using:
+- **PharmVar** via Clinical Tables API
+- Comprehensive metadata including enzyme activity, protein changes
+
+**Usage:**
+```python
+from src.ontology import StarAlleleNormalizer
+
+normalizer = StarAlleleNormalizer()
+result = normalizer.normalize("CYP2D6*4")
+```
+
+## Drug Normalization
+
+### `DrugNormalizer`
+Normalizes drug names using multiple cascading sources:
+1. **PubChem** - Chemical structure and IUPAC names
+2. **PharmGKB** - Pharmacogenomic drug information
+3. **RxNorm** - Clinical drug terminology
+
+**Usage:**
+```python
+from src.ontology import DrugNormalizer
+
+normalizer = DrugNormalizer()
+result = normalizer.normalize("imatinib")  # or "Gleevec"
+```
+
+**Features:**
+- Brand name to generic conversion
+- Chemical structure data (SMILES, molecular formula)
+- Cross-reference between multiple drug databases
+- Comprehensive metadata preservation
+
+## API Endpoints Used
+
+| Database | Endpoint | Purpose |
+|----------|----------|---------|
+| dbSNP | NCBI Entrez | Variant information lookup |
+| PharmGKB | `api.pharmgkb.org/v1/data/variant` | Variant annotations |
+| PharmGKB | `api.pharmgkb.org/v1/data/chemical` | Drug information |
+| PubChem | `pubchem.ncbi.nlm.nih.gov/rest/pug` | Chemical properties |
+| RxNorm | `rxnav.nlm.nih.gov/REST` | Clinical drug terminology |
+| Clinical Tables | `clinicaltables.nlm.nih.gov/api/star_alleles` | Star allele data |
+
+## Example Usage
+
+### Basic Normalization
+```python
+from src.ontology import RSIDNormalizer, DrugNormalizer
+
+# Normalize a variant
+variant_normalizer = RSIDNormalizer(email="test@example.com")
+variant_result = variant_normalizer.normalize("rs1799853")
+
+if variant_result:
+    print(f"Normalized: {variant_result.normalized_output}")
+    print(f"Source: {variant_result.source}")
+    print(f"Metadata: {variant_result.metadata}")
+
+# Normalize a drug
+drug_normalizer = DrugNormalizer()
+drug_result = drug_normalizer.normalize("imatinib")
+
+if drug_result:
+    print(f"IUPAC Name: {drug_result.normalized_output}")
+    print(f"PubChem CID: {drug_result.metadata.get('cid')}")
+    print(f"SMILES: {drug_result.metadata.get('canonical_smiles')}")
+```
+
+### Processing Annotation Files
+Both modules include demonstration functions:
+- `extract_variants_from_annotations()` - Extract and normalize variants from JSON files
+- `extract_drugs_from_annotations()` - Extract and normalize drugs from JSON files
+
+## Error Handling
+
+All normalizers implement robust error handling:
+- **Network timeouts** (5-10 second limits)
+- **API rate limiting** awareness
+- **Graceful degradation** when services are unavailable
+- **Comprehensive logging** using loguru
+
+## Configuration Requirements
+
+### Required Dependencies
+- `requests` - HTTP API calls
+- `biopython` - NCBI Entrez access
+- `loguru` - Structured logging
+
+### API Keys/Configuration
+- **NCBI Entrez**: Email required, API key optional but recommended
+- **Other APIs**: No authentication required
+
+### Environment Setup
+```python
+# For NCBI access
+Entrez.email = "your@email.com"  # Required
+Entrez.api_key = "your_api_key"  # Optional but recommended
+```
+
+## Testing
+
+Run the test functions included in each module:
+```bash
+python -m src.ontology.variant_ontology
+python -m src.ontology.drug_ontology
+```
+
+These will test both individual normalizers and demonstrate processing of annotation files from the `data/annotations` directory.
+
+## Integration Points
+
+This module integrates with:
+- **Annotation processing pipeline** - Normalizes extracted entities
+- **Knowledge graph construction** - Provides standardized identifiers
+- **Cross-reference mapping** - Links entities across databases
+- **Data validation** - Ensures entity consistency
\ No newline at end of file

From f460d43e265ab7a1b5403430edbd7487263f17ce Mon Sep 17 00:00:00 2001
From: Shlok Natarajan <shlok.natarajan@gmail.com>
Date: Wed, 6 Aug 2025 08:42:28 -0700
Subject: [PATCH 05/10] fix: file paths

---
 src/deprecated/all_associations.py                 | 2 +-
 src/deprecated/all_variants.py                     | 2 +-
 src/deprecated/association_types.py                | 2 +-
 src/deprecated/functional_annotation_extraction.py | 2 +-
 src/deprecated/phenotype_annotation_extraction.py  | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/deprecated/all_associations.py b/src/deprecated/all_associations.py
index 8ae3b4c..d32d9c4 100644
--- a/src/deprecated/all_associations.py
+++ b/src/deprecated/all_associations.py
@@ -1,5 +1,5 @@
 from src.inference import Generator, Fuser
-from src.variants import QuotedStr
+from src.deprecated.variants import QuotedStr
 from src.prompts import GeneratorPrompt, ArticlePrompt
 from src.utils import get_article_text
 from loguru import logger
diff --git a/src/deprecated/all_variants.py b/src/deprecated/all_variants.py
index 0ac1cd2..f94e1c5 100644
--- a/src/deprecated/all_variants.py
+++ b/src/deprecated/all_variants.py
@@ -1,5 +1,5 @@
 from src.inference import Generator
-from src.variants import Variant, VariantList
+from src.deprecated.variants import Variant, VariantList
 from src.prompts import GeneratorPrompt, PromptVariables
 from src.utils import get_article_text
 from loguru import logger
diff --git a/src/deprecated/association_types.py b/src/deprecated/association_types.py
index 77c63ec..48f4320 100644
--- a/src/deprecated/association_types.py
+++ b/src/deprecated/association_types.py
@@ -2,7 +2,7 @@
 Given a list of variants and the article text, determine the type of association (drug, phenotype, functional association)
 """
 
-from src.variants import Variant
+from src.deprecated.variants import Variant
 from typing import List, Optional
 from src.prompts import PromptVariables, GeneratorPrompt, ParserPrompt
 from src.inference import Generator, Parser
diff --git a/src/deprecated/functional_annotation_extraction.py b/src/deprecated/functional_annotation_extraction.py
index 35ea98e..4d4dfc7 100644
--- a/src/deprecated/functional_annotation_extraction.py
+++ b/src/deprecated/functional_annotation_extraction.py
@@ -5,7 +5,7 @@
 from typing import List
 from loguru import logger
 from pydantic import BaseModel
-from src.variants import Variant, FunctionalAnnotation, FunctionalAnnotationList
+from src.deprecated.variants import Variant, FunctionalAnnotation, FunctionalAnnotationList
 from src.prompts import PromptVariables, GeneratorPrompt, ParserPrompt
 from src.inference import Generator, Parser
 from src.utils import get_article_text
diff --git a/src/deprecated/phenotype_annotation_extraction.py b/src/deprecated/phenotype_annotation_extraction.py
index 3ec422f..5a95608 100644
--- a/src/deprecated/phenotype_annotation_extraction.py
+++ b/src/deprecated/phenotype_annotation_extraction.py
@@ -5,7 +5,7 @@
 from typing import List
 from loguru import logger
 from pydantic import BaseModel
-from src.variants import Variant, PhenotypeAnnotation, PhenotypeAnnotationList
+from src.deprecated.variants import Variant, PhenotypeAnnotation, PhenotypeAnnotationList
 from src.prompts import PromptVariables, GeneratorPrompt, ParserPrompt
 from src.inference import Generator, Parser
 from src.utils import get_article_text

From 90398766d54117e73a1eb91df718ef0302763c1b Mon Sep 17 00:00:00 2001
From: Shlok Natarajan <shlok.natarajan@gmail.com>
Date: Wed, 6 Aug 2025 09:32:39 -0700
Subject: [PATCH 06/10] fix: removed merge conflicts

---
 src/ontology/drug_ontology.py | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/src/ontology/drug_ontology.py b/src/ontology/drug_ontology.py
index d70a6fd..436017b 100644
--- a/src/ontology/drug_ontology.py
+++ b/src/ontology/drug_ontology.py
@@ -5,10 +5,6 @@
 import requests
 
 # how to use, you have thew following,
-<<<<<<< HEAD:src/ontology/drug_ontology.py
-
-=======
->>>>>>> origin/main:src/ontology_module/drug_ontology.py
 
 
 
@@ -24,10 +20,7 @@ def __init__(self):
         # TODO: insert logic to handle base generic instead of what we have
 
         self.register_handler(self.lookup_drug_pharmgkb)
-<<<<<<< HEAD:src/ontology/drug_ontology.py
         self.register_handler(self.lookup_drug_rxnorm)
-=======
->>>>>>> origin/main:src/ontology_module/drug_ontology.py
         # register the pubchem first before I register the other.
 
     def name(self):
@@ -241,7 +234,6 @@ def test_lookup_pharmgkb():
         assert "id" in result.metadata
 
 
-<<<<<<< HEAD:src/ontology/drug_ontology.py
 def extract_drugs_from_annotations():
     """
     Extract and normalize drugs from annotation files.
@@ -384,12 +376,3 @@ def test_drug_normalizers():
         print(f"   Normalized {len(results)} drugs")
     else:
         print("\n⚠️  No results from annotation processing")
-=======
-if __name__ == "__main__":
-    test_lookup_pubchem()
-
-    test_lookup_pharmgkb()
-    normalizer = DrugNormalizer()
-    result = normalizer.lookup_drug_rxnorm("Gleevec")
-    print(result.normalized_output)  # → "imatinib"
->>>>>>> origin/main:src/ontology_module/drug_ontology.py

From 6ff73ab87f8edd30d017f5e097af5b012c9c5a20 Mon Sep 17 00:00:00 2001
From: Shlok Natarajan <shlok.natarajan@gmail.com>
Date: Wed, 6 Aug 2025 11:12:54 -0700
Subject: [PATCH 07/10] feat: small prompt updates

---
 pixi.toml               | 1 +
 src/annotation_table.py | 5 ++++-
 src/study_parameters.py | 4 +++-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/pixi.toml b/pixi.toml
index cf9ca6a..2497377 100644
--- a/pixi.toml
+++ b/pixi.toml
@@ -20,6 +20,7 @@ setup-repo = "pixi install && pixi run download-data"
 copy-markdown = "python -m src.copy_markdown"
 annotation-pipeline = "python -m src.annotation_pipeline"
 test-citations = "python -m src.citations.one_shot_citations"
+study-parameters = "python -m src.study_parameters"
 
 [dependencies]
 seaborn = ">=0.13.2,<0.14"
diff --git a/src/annotation_table.py b/src/annotation_table.py
index 5298450..2dc906e 100644
--- a/src/annotation_table.py
+++ b/src/annotation_table.py
@@ -47,6 +47,7 @@ def __init__(self, pmcid: str, model: str = "gpt-4.1"):
         self.prompt = """
 What are all the pharmacogenomic relationships found in this paper?
 Output your response in markdown table format with nothing except the table. The columns should be Gene, Polymorphism, Relationship/Effect, and p-value.
+Make sure that every polymorphism gets its own row, even if they have the same effect/p-value.
 """
 
     def generate_table_json(self) -> AnnotationTable:
@@ -62,10 +63,12 @@ def generate_table_json(self) -> AnnotationTable:
 Please extract all pharmacogenomic relationships and format them as structured data with the following fields for each relationship:
 - gene: The gene name
 - polymorphism: The genetic polymorphism or variant
+- drug: The drug name if a drug is part of this relationship. If a drug is not part of this association, fill this field with "None".
 - relationship_effect: Description of the relationship or effect
-- p_value: The statistical p-value
+- p_value: The statistical p-value. If confidence intervals are provided, display that information here as well.
 
 Return the data as a JSON object with a 'relationships' array containing all the pharmacogenomic relationships found.
+Make sure that every polymorphism/relationship gets its own entry, even if they have the same effect/p-value.
 """
 
         response = self.generator.generate(
diff --git a/src/study_parameters.py b/src/study_parameters.py
index f88b605..7733707 100644
--- a/src/study_parameters.py
+++ b/src/study_parameters.py
@@ -97,7 +97,9 @@ def get_study_type(self) -> str:
         Retrospective: Uses existing records to look backward at exposures and outcomes.
         Replication: Repeating a study to confirm findings.
 
-        Your output should be a string similar to these examples: "case/control, GWAS", "Cohort, replication", etc. Do not include a descriptor that's not included in the list above."""
+        Your output should be a string similar to these examples: "case/control, GWAS", "Cohort, replication", etc. Do not include a descriptor that's not included in the list above.
+        If the study type is not clear, return "Unknown".
+        Don't include any other text or formatting (e.g. don't include quotation marks in your response)."""
 
         return self.generator.generate(prompt)
 

From 2c28531cf34779fd6d1a69d7285b354e627e62ab Mon Sep 17 00:00:00 2001
From: Shlok Natarajan <shlok.natarajan@gmail.com>
Date: Mon, 11 Aug 2025 11:51:19 -0700
Subject: [PATCH 08/10] chore: moved files

---
 src/citations/line_citation_generator.py | 174 ++++++++++++++--
 src/ontology_module/variant_ontology.py  | 248 -----------------------
 2 files changed, 157 insertions(+), 265 deletions(-)
 delete mode 100644 src/ontology_module/variant_ontology.py

diff --git a/src/citations/line_citation_generator.py b/src/citations/line_citation_generator.py
index fe9c3d7..bb12699 100644
--- a/src/citations/line_citation_generator.py
+++ b/src/citations/line_citation_generator.py
@@ -899,7 +899,13 @@ def _score_sentence_for_study_param(
             Relevance score from 1-10
         """
         sentence_lower = sentence.lower()
-        parameter_lower = parameter_content.lower()
+        
+        # Handle case where parameter_content is a list
+        if isinstance(parameter_content, list):
+            parameter_lower = " ".join(str(item) for item in parameter_content).lower()
+        else:
+            parameter_lower = str(parameter_content).lower()
+        
         score = 0
 
         # Define keywords for each parameter type
@@ -1197,6 +1203,37 @@ def create_citation_generator(
         return LMCitationGenerator(pmcid, model)
 
 
+def process_annotation_file_with_citations(pmcid: str, model: str = "local") -> AnnotationTable:
+    """
+    Convenience function to load annotations from file, add citations, and save back to file.
+    
+    Args:
+        pmcid: PubMed Central ID
+        model: Model to use for citation generation
+        
+    Returns:
+        AnnotationTable with citations added
+    """
+    # Load annotations from file
+    annotations = load_annotations_from_file(pmcid)
+    
+    if not annotations.relationships:
+        logger.warning(f"No annotations found for {pmcid}")
+        return annotations
+    
+    # Create citation generator
+    generator = create_citation_generator(pmcid, model)
+    
+    # Add citations to annotations
+    updated_annotations = generator.add_citations_to_annotations(annotations)
+    
+    # Save updated annotations back to file
+    update_annotations_in_file(pmcid, updated_annotations)
+    
+    logger.info(f"Successfully processed {len(updated_annotations.relationships)} annotations for {pmcid}")
+    return updated_annotations
+
+
 # Maintain backward compatibility
 def CitationGenerator(
     pmcid: str, model: str = "local", approach: str = None
@@ -1226,33 +1263,136 @@ def CitationGenerator(
         return create_citation_generator(pmcid, model)
 
 
+def update_annotations_in_file(pmcid: str, updated_annotations: AnnotationTable) -> None:
+    """
+    Save updated annotations back to the JSON file in the new schema format.
+    
+    Args:
+        pmcid: PubMed Central ID
+        updated_annotations: AnnotationTable with updated relationships
+    """
+    import json
+    import os
+    
+    annotation_file = f"data/annotations/{pmcid}.json"
+    
+    if not os.path.exists(annotation_file):
+        logger.error(f"Annotation file not found: {annotation_file}")
+        return
+    
+    try:
+        # Load existing data
+        with open(annotation_file, 'r') as f:
+            data = json.load(f)
+        
+        # Update the relationships in the new schema format
+        if "annotations" not in data:
+            data["annotations"] = {}
+        
+        data["annotations"]["relationships"] = []
+        for rel in updated_annotations.relationships:
+            rel_dict = {
+                "gene": rel.gene,
+                "polymorphism": rel.polymorphism,
+                "relationship_effect": rel.relationship_effect,
+                "p_value": rel.p_value,
+                "citations": rel.citations,
+                "p_value_citations": rel.p_value_citations
+            }
+            data["annotations"]["relationships"].append(rel_dict)
+        
+        # Write back to file
+        with open(annotation_file, 'w') as f:
+            json.dump(data, f, indent=4, ensure_ascii=False)
+            
+        logger.info(f"Updated annotations saved to {annotation_file}")
+        
+    except Exception as e:
+        logger.error(f"Error updating annotations in {annotation_file}: {e}")
+
+
+def load_annotations_from_file(pmcid: str) -> AnnotationTable:
+    """
+    Load annotations from the new JSON schema format.
+    
+    Args:
+        pmcid: PubMed Central ID
+        
+    Returns:
+        AnnotationTable with relationships loaded from the file
+    """
+    import json
+    import os
+    
+    annotation_file = f"data/annotations/{pmcid}.json"
+    
+    if not os.path.exists(annotation_file):
+        logger.warning(f"Annotation file not found: {annotation_file}")
+        return AnnotationTable(relationships=[])
+    
+    try:
+        with open(annotation_file, 'r') as f:
+            data = json.load(f)
+            
+        # Extract relationships from the new schema format
+        if "annotations" in data and "relationships" in data["annotations"]:
+            relationships = []
+            for rel_data in data["annotations"]["relationships"]:
+                # Convert the dict to AnnotationRelationship object
+                relationship = AnnotationRelationship(
+                    gene=rel_data.get("gene", ""),
+                    polymorphism=rel_data.get("polymorphism", ""),
+                    relationship_effect=rel_data.get("relationship_effect", ""),
+                    p_value=rel_data.get("p_value", ""),
+                    citations=rel_data.get("citations", []),
+                    p_value_citations=rel_data.get("p_value_citations", [])
+                )
+                relationships.append(relationship)
+            
+            return AnnotationTable(relationships=relationships)
+        else:
+            logger.warning(f"No annotations found in file: {annotation_file}")
+            return AnnotationTable(relationships=[])
+            
+    except Exception as e:
+        logger.error(f"Error loading annotations from {annotation_file}: {e}")
+        return AnnotationTable(relationships=[])
+
+
 def main():
     """
-    Test function for citation generator using PMC11730665 and a single sentence.
+    Test function for citation generator using PMC11730665 and loading annotations from file.
     """
     # Test parameters
     pmcid = "PMC11730665"
-    test_sentence = "Patients with the GG genotype had a trend toward lower efficacy of sitagliptin and higher efficacy of gliclazide, likely due to slower metabolism of gliclazide."
 
     # Create citation generator
-    generator = create_citation_generator(pmcid, model="gemini/gemini-2.5-flash-lite")
-
-    # Create a mock annotation for testing
-    from src.annotation_table import AnnotationRelationship
-
-    test_annotation = AnnotationRelationship(
-        gene="CYP2C9",
-        polymorphism="rs1057910 GG",
-        relationship_effect="Patients with the GG genotype had a trend toward lower efficacy of sitagliptin and higher efficacy of gliclazide, likely due to slower metabolism of gliclazide.",
-        p_value=".464",
-        citations=[],
-    )
+    generator = create_citation_generator(pmcid, model="local")
+
+    # Load annotations from the updated schema file
+    annotations = load_annotations_from_file(pmcid)
+    
+    if not annotations.relationships:
+        logger.error("No annotations loaded from file. Creating a test annotation instead.")
+        # Fallback to creating a mock annotation for testing
+        from src.annotation_table import AnnotationRelationship
+        test_annotation = AnnotationRelationship(
+            gene="CYP2C9",
+            polymorphism="rs1057910 GG",
+            relationship_effect="Patients with the GG genotype had a trend toward lower efficacy of sitagliptin and higher efficacy of gliclazide, likely due to slower metabolism of gliclazide.",
+            p_value=".464",
+            citations=[],
+        )
+        annotations = AnnotationTable(relationships=[test_annotation])
 
     print(f"Testing citation generator with PMCID: {pmcid}")
-    print(f"Test sentence: {test_sentence}")
-    print(f"Test annotation: {test_annotation.gene} {test_annotation.polymorphism}")
+    print(f"Loaded {len(annotations.relationships)} annotations from file")
     print("-" * 50)
 
+    # Test with first annotation
+    test_annotation = annotations.relationships[0]
+    print(f"Test annotation: {test_annotation.gene} {test_annotation.polymorphism}")
+    
     # Get citations for the annotation
     citations = generator._get_top_citations_for_annotation(test_annotation, top_k=3)
 
diff --git a/src/ontology_module/variant_ontology.py b/src/ontology_module/variant_ontology.py
deleted file mode 100644
index 3659a8f..0000000
--- a/src/ontology_module/variant_ontology.py
+++ /dev/null
@@ -1,248 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Callable, Dict, Optional, Any, List
-from dataclasses import dataclass, field
-import logging
-from Bio import Entrez
-import requests
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class NormalizationResult:
-    raw_input: str
-    normalized_output: str
-    entity_type: str  # e.g. "variant", "gene", "drug", etc.
-    source: str  # where the normalized info came from
-    metadata: Dict[str, Any] = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict) -> "NormalizationResult":
-        return cls(
-            raw_input=data["raw_input"],
-            normalized_output=data["normalized_output"],
-            entity_type=data.get("entity_type", "unknown"),
-            source=data["source"],
-            metadata=data.get("metadata", {}),
-        )
-
-
-class BaseNormalizer(ABC):
-    def __init__(self):
-        self._handlers: list[Callable[[str], Optional[dict]]] = []
-
-    def register_handler(self, handler: Callable[[str], Optional[dict]]):
-        self._handlers.append(handler)
-
-    def normalize(self, raw: str) -> Optional["NormalizationResult"]:
-        for handler in self._handlers:
-            try:
-                result = handler(raw)
-                if result:
-                    return result  # Assuming result is already a NormalizedEntity
-            except Exception as e:
-                logger.exception(
-                    f"Handler '{handler.__name__}' failed on input: '{raw}'"
-                )
-        return None
-
-    @abstractmethod
-    def name(self) -> str:
-        pass
-
-
-class RSIDNormalizer(BaseNormalizer):
-    def __init__(self, email: str, api_key: Optional[str] = None):
-        super().__init__()
-        Entrez.email = email
-        if api_key:
-            Entrez.api_key = api_key
-
-        self.register_handler(self.lookup_dbsnp)
-        self.register_handler(self.lookup_pharmgkb_id)
-
-    def name(self) -> str:
-        return "RSIDNormalizer"
-
-    def lookup_dbsnp(self, raw: str) -> Optional[NormalizationResult]:
-        rsid = raw.lower().strip()
-        if not rsid.startswith("rs") or not rsid[2:].isdigit():
-            return None
-
-        try:
-            handle = Entrez.esummary(db="snp", id=rsid[2:], retmode="json")
-            response = handle.read()
-            handle.close()
-
-            # Convert JSON string to Python dict
-            import json
-
-            data = json.loads(response)
-
-            record = data.get("result", {}).get(rsid[2:])
-            if not record:
-                return None
-
-            return NormalizationResult(
-                raw_input=raw,
-                normalized_output=rsid,
-                entity_type="variant",
-                source="dbSNP",
-                metadata=record,
-            )
-
-        except Exception:
-            logger.exception(f"lookup_dbsnp failed for {raw}")
-            return None
-
-    def lookup_pharmgkb_id(self, raw: str) -> Optional[NormalizationResult]:
-        logger.debug(f"Looking up PharmGKB variant by symbol: {raw}")
-
-        base_url = "https://api.pharmgkb.org/v1/data/variant"
-        params = {"symbol": raw.strip(), "view": "max"}
-
-        try:
-            response = requests.get(base_url, params=params, timeout=10)
-            if response.status_code != 200:
-                logger.warning(
-                    f"PharmGKB lookup failed ({response.status_code}) for {raw}"
-                )
-                return None
-
-            data = response.json()
-            records = data.get("data", [])
-            if not records:
-                logger.info(f"No PharmGKB variant match for symbol: {raw}")
-                return None
-
-            variant = records[0]
-
-            # Extract only required fields
-            normalized_output = variant.get("id")
-            entity_type = "variant"
-            source = "PharmGKB"
-
-            # Remove known fields so everything else is dumped into metadata
-            metadata = {k: v for k, v in variant.items() if k not in {"id"}}
-
-            return NormalizationResult(
-                raw_input=raw,
-                normalized_output=normalized_output,
-                entity_type=entity_type,
-                source=source,
-                metadata=metadata,
-            )
-
-        except Exception:
-            logger.exception(f"PharmGKB symbol lookup failed for {raw}")
-            return None
-
-
-class StarAlleleNormalizer(BaseNormalizer):
-    API_URL = "https://clinicaltables.nlm.nih.gov/api/star_alleles/v3/search"
-
-    def __init__(self):
-        pass
-
-    def name(self):
-        return "Star Allele Normalizer"
-
-    def fetch_star_alleles(
-        self, query: str, max_results: int = 50
-    ) -> List[Dict[str, Any]]:
-        """
-        Fetches all star allele records matching the query string from the PharmVar-backed Clinical Tables API.
-        Returns a list of dictionaries, one per allele, with all available fields populated.
-        """
-        fields = [
-            "StarAlleleName",
-            "GenBank",
-            "ProteinAffected",
-            "cDNANucleotideChanges",
-            "GeneNucleotideChange",
-            "XbaIHaplotype",
-            "RFLP",
-            "OtherNames",
-            "ProteinChange",
-            "InVivoEnzymeActivity",
-            "InVitroEnzymeActivity",
-            "References",
-            "ClinicalPhenotype",
-            "Notes",
-        ]
-
-        params = {"terms": query, "count": max_results, "ef": ",".join(fields)}
-
-        try:
-            response = requests.get(self.API_URL, params=params, timeout=10)
-            response.raise_for_status()
-        except Exception as e:
-            logger.error(f"API request failed: {e}")
-            return []
-
-        try:
-            total_count, allele_names, extra_fields, *_ = response.json()
-        except Exception as e:
-            logger.error(f"Failed to parse API response: {e}")
-            return []
-
-        results = []
-        for i, allele in enumerate(allele_names):
-            allele_info = {"StarAlleleName": allele}
-            for field, values in extra_fields.items():
-                allele_info[field] = values[i] if i < len(values) else None
-            results.append(allele_info)
-
-        return results
-
-    # def fetch_star_alleles(self, term: str) -> list[dict]:
-    #     """
-    #     Searches for star alleles matching a term and retrieves full metadata for each.
-
-    #     Args:
-    #         term (str): The star allele search string (e.g., "CYP2D6*4").
-
-    #     Returns:
-    #         list[dict]: Each dict contains all metadata fields for a matched star allele.
-    #     """
-    #     base_url = "https://clinicaltables.nlm.nih.gov/api/star_alleles/v3/search"
-    #     fields = [
-    #         "StarAlleleName", "GenBank", "ProteinAffected", "cDNANucleotideChanges",
-    #         "GeneNucleotideChange", "ProteinChange", "OtherNames",
-    #         "InVivoEnzymeActivity", "InVitroEnzymeActivity", "References",
-    #         "ClinicalPhenotype", "Notes"
-    #     ]
-
-    #     params = {
-    #         "terms": term,
-    #         "ef": ",".join(fields),
-    #         "maxList": "50"
-    #     }
-
-    #     response = requests.get(base_url, params=params)
-    #     response.raise_for_status()
-    #     data = response.json()
-
-    #     if not data or len(data) < 3:
-    #         return []
-
-    #     codes = data[1]
-    #     extra_fields = data[2]
-
-    #     results = []
-    #     for i, code in enumerate(codes):
-    #         allele_data = {field: extra_fields.get(field, [None])[i] for field in fields}
-    #         results.append(allele_data)
-
-    #     return results
-
-
-if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
-    normalizer = StarAlleleNormalizer()
-    data = normalizer.fetch_star_alleles("CYP2D6*4")
-
-    for record in data:
-        print("\n--- Star Allele Record ---")
-        for k, v in record.items():
-            print(f"{k}: {v}")

From 7838555a75447644f8f39780e820e2ecd7b924bb Mon Sep 17 00:00:00 2001
From: Shlok Natarajan <shlok.natarajan@gmail.com>
Date: Mon, 11 Aug 2025 14:35:49 -0700
Subject: [PATCH 09/10] fix: got rid of unnused code

---
 src/ontology/drug_ontology.py    | 153 ++-----------------------------
 src/ontology/variant_ontology.py |  42 ---------
 2 files changed, 6 insertions(+), 189 deletions(-)

diff --git a/src/ontology/drug_ontology.py b/src/ontology/drug_ontology.py
index 436017b..e1f7376 100644
--- a/src/ontology/drug_ontology.py
+++ b/src/ontology/drug_ontology.py
@@ -122,11 +122,9 @@ def lookup_drug_pharmgkb(self, raw: str) -> Optional[NormalizationResult]:
             )
 
         except requests.RequestException as exc:
-            logger.warning("PharmGKB request failed for '%s': %s", raw, exc)
+            logger.warning(f"PharmGKB request failed for '{raw}': {exc}")
         except Exception as exc:
-            logger.warning(
-                "Unexpected error during PharmGKB lookup for '%s': %s", raw, exc
-            )
+            logger.warning(f"Unexpected error during PharmGKB lookup for '{raw}': {exc}")
 
         return None
 
@@ -180,9 +178,9 @@ def lookup_drug_rxnorm(self, raw: str) -> Optional[NormalizationResult]:
             )
 
         except requests.RequestException as exc:
-            logger.warning("RxNorm request failed for '%s': %s", raw, exc)
+            logger.warning(f"RxNorm request failed for '{raw}': {exc}")
         except Exception as exc:
-            logger.warning("Unexpected error in RxNorm lookup for '%s': %s", raw, exc)
+            logger.warning(f"Unexpected error in RxNorm lookup for '{raw}': {exc}")
 
         return None
 
@@ -234,145 +232,6 @@ def test_lookup_pharmgkb():
         assert "id" in result.metadata
 
 
-def extract_drugs_from_annotations():
-    """
-    Extract and normalize drugs from annotation files.
-    This demonstrates drug normalization from real annotation data.
-    """
-    import json
-    import os
-    import re
-    from typing import Set, List, Dict, Any
-
-    drug_normalizer = DrugNormalizer()
-
-    annotation_dir = (
-        "/Users/shloknatarajan/stanford/research/daneshjou/AutoGKB/data/annotations"
-    )
-    if not os.path.exists(annotation_dir):
-        print(f"❌ Annotation directory not found: {annotation_dir}")
-        return
-
-    drugs_found: Set[str] = set()
-    normalized_results: List[Dict[str, Any]] = []
-
-    print("🔍 Scanning annotation files for drugs...")
-
-    # Common drug name patterns to look for
-    drug_patterns = [
-        r"\b(?:warfarin|imatinib|gleevec|sitagliptin|gliclazide|metformin|edoxaban)\b",
-        r"\b\w+mab\b",  # monoclonal antibodies
-        r"\b\w+ine\b",  # many drugs end in -ine
-        r"\b\w+ol\b",  # many drugs end in -ol
-    ]
-
-    # Scan all annotation files
-    for filename in os.listdir(annotation_dir):
-        if not filename.endswith(".json"):
-            continue
-
-        filepath = os.path.join(annotation_dir, filename)
-        try:
-            with open(filepath, "r") as f:
-                data = json.load(f)
-
-            # Extract drugs from title and content
-            text_content = data.get("title", "") + " "
-
-            # Also check study parameters for drug mentions
-            if "study_parameters" in data:
-                for section in data["study_parameters"].values():
-                    if isinstance(section, dict) and "content" in section:
-                        if isinstance(section["content"], str):
-                            text_content += section["content"] + " "
-                        elif isinstance(section["content"], list):
-                            text_content += (
-                                " ".join(str(item) for item in section["content"]) + " "
-                            )
-
-            # Apply drug patterns
-            for pattern in drug_patterns:
-                matches = re.findall(pattern, text_content, re.IGNORECASE)
-                drugs_found.update(match.lower() for match in matches)
-
-        except Exception as e:
-            print(f"⚠️  Error processing {filename}: {e}")
-
-    print(f"📊 Found {len(drugs_found)} potential drug names")
-
-    # Normalize each drug
-    for drug in drugs_found:
-        if len(drug) < 3:  # Skip very short matches
-            continue
-
-        print(f"\n💊 Processing drug: {drug}")
-
-        result = drug_normalizer.normalize(drug)
-
-        if result:
-            print(f"✅ Normalization successful:")
-            print(f"   Raw: {result.raw_input}")
-            print(f"   Normalized: {result.normalized_output}")
-            print(f"   Source: {result.source}")
-            print(f"   Type: {result.entity_type}")
-
-            if result.metadata:
-                if "cid" in result.metadata:
-                    print(f"   PubChem CID: {result.metadata['cid']}")
-                if "molecular_formula" in result.metadata:
-                    print(f"   Formula: {result.metadata['molecular_formula']}")
-
-            normalized_results.append({"raw_drug": drug, "result": result})
-        else:
-            print(f"❌ No normalization found for {drug}")
-
-    print(
-        f"\n📈 Summary: {len(normalized_results)}/{len(drugs_found)} drugs successfully normalized"
-    )
-    return normalized_results
-
-
-def test_drug_normalizers():
-    """Test drug normalizer with sample data"""
-    print("\n" + "=" * 50)
-    print("🧪 TESTING DRUG NORMALIZERS")
-    print("=" * 50)
-
-    drug_normalizer = DrugNormalizer()
-    test_drugs = ["imatinib", "Gleevec", "warfarin", "sitagliptin", "metformin"]
-
-    for drug in test_drugs:
-        print(f"\n💊 Testing {drug}:")
-        result = drug_normalizer.normalize(drug)
-        if result:
-            print(f"  ✅ Found: {result.normalized_output} from {result.source}")
-            if result.metadata:
-                if "cid" in result.metadata:
-                    print(f"  🆔 PubChem CID: {result.metadata['cid']}")
-                if "molecular_formula" in result.metadata:
-                    print(f"  🧪 Formula: {result.metadata['molecular_formula']}")
-        else:
-            print(f"  ❌ Not found")
-
-
 if __name__ == "__main__":
-    pass
-
-    print("🎯 AutoGKB Drug Ontology Normalization System")
-    print("=" * 60)
-
-    # Test individual drug normalizers first
-    test_drug_normalizers()
-
-    # Then demonstrate with real annotation data
-    print("\n" + "=" * 50)
-    print("📋 PROCESSING ANNOTATION DATA FOR DRUGS")
-    print("=" * 50)
-
-    results = extract_drugs_from_annotations()
-
-    if results:
-        print(f"\n🎉 Successfully processed annotation data!")
-        print(f"   Normalized {len(results)} drugs")
-    else:
-        print("\n⚠️  No results from annotation processing")
+    test_lookup_pharmgkb()
+    test_lookup_pubchem()
diff --git a/src/ontology/variant_ontology.py b/src/ontology/variant_ontology.py
index 87ff62e..8092949 100644
--- a/src/ontology/variant_ontology.py
+++ b/src/ontology/variant_ontology.py
@@ -225,48 +225,6 @@ def fetch_star_alleles(
 
         return results
 
-    # def fetch_star_alleles(self, term: str) -> list[dict]:
-    #     """
-    #     Searches for star alleles matching a term and retrieves full metadata for each.
-
-    #     Args:
-    #         term (str): The star allele search string (e.g., "CYP2D6*4").
-
-    #     Returns:
-    #         list[dict]: Each dict contains all metadata fields for a matched star allele.
-    #     """
-    #     base_url = "https://clinicaltables.nlm.nih.gov/api/star_alleles/v3/search"
-    #     fields = [
-    #         "StarAlleleName", "GenBank", "ProteinAffected", "cDNANucleotideChanges",
-    #         "GeneNucleotideChange", "ProteinChange", "OtherNames",
-    #         "InVivoEnzymeActivity", "InVitroEnzymeActivity", "References",
-    #         "ClinicalPhenotype", "Notes"
-    #     ]
-
-    #     params = {
-    #         "terms": term,
-    #         "ef": ",".join(fields),
-    #         "maxList": "50"
-    #     }
-
-    #     response = requests.get(base_url, params=params)
-    #     response.raise_for_status()
-    #     data = response.json()
-
-    #     if not data or len(data) < 3:
-    #         return []
-
-    #     codes = data[1]
-    #     extra_fields = data[2]
-
-    #     results = []
-    #     for i, code in enumerate(codes):
-    #         allele_data = {field: extra_fields.get(field, [None])[i] for field in fields}
-    #         results.append(allele_data)
-
-    #     return results
-
-
 def extract_variants_from_annotations():
     """
     Extract and normalize variants from annotation files.

From 76f234f118afb606f6efada71c12ae1fdf1b717a Mon Sep 17 00:00:00 2001
From: Shlok Natarajan <shlok.natarajan@gmail.com>
Date: Mon, 11 Aug 2025 18:14:50 -0700
Subject: [PATCH 10/10] chore: reverted change + black formatting

---
 data/annotations/PMC11730665.json             | 128 ++++----
 data/annotations/PMC4737107.json              | 189 +++++++-----
 data/annotations/PMC5712579.json              | 176 +++++------
 data/annotations/PMC5728534.json              | 277 +++++++++++++-----
 data/annotations/PMC5749368.json              | 109 +++----
 notebooks/test.ipynb                          |  16 +
 pixi.toml                                     |   2 +
 src/citations/line_citation_generator.py      |  83 +++---
 .../functional_annotation_extraction.py       |   6 +-
 .../phenotype_annotation_extraction.py        |   6 +-
 src/ontology/drug_ontology.py                 |   8 +-
 src/ontology/variant_ontology.py              |   6 +-
 12 files changed, 586 insertions(+), 420 deletions(-)
 create mode 100644 notebooks/test.ipynb

diff --git a/data/annotations/PMC11730665.json b/data/annotations/PMC11730665.json
index d384978..3b20273 100644
--- a/data/annotations/PMC11730665.json
+++ b/data/annotations/PMC11730665.json
@@ -3,7 +3,7 @@
     "title": "Comparative efficacy and safety of sitagliptin or gliclazide combined with metformin in treatment-naive patients with type 2 diabetes: A single-center, prospective, randomized, controlled, noninferiority study with genetic polymorphism analysis",
     "study_parameters": {
         "summary": {
-            "content": "The study aimed to compare the efficacy and safety of sitagliptin versus gliclazide, both combined with metformin, in treatment-naive patients with type 2 diabetes mellitus and glucotoxicity. Conducted as a single-center, randomized, controlled noninferiority trial, it involved 129 participants who were treated for 12 weeks. The results demonstrated that sitagliptin combined with metformin was noninferior to gliclazide combined with metformin, with sitagliptin achieving faster glycemic targets, greater weight reductions, and similar safety profiles, while genetic polymorphisms significantly influenced drug efficacy, underscoring the importance of personalized medicine.",
+            "content": "The study aimed to compare the efficacy and safety of sitagliptin versus gliclazide, both combined with metformin, in treatment-naive patients with type 2 diabetes mellitus and glucotoxicity. Conducted as a single-center, prospective, randomized, controlled noninferiority trial, it involved 129 participants who were treated for 12 weeks. Results demonstrated that sitagliptin was noninferior to gliclazide in reducing glycated hemoglobin levels, with sitagliptin achieving faster glycemic targets and greater weight reductions, while genetic polymorphisms significantly influenced drug efficacy, underscoring the importance of personalized medicine.",
             "citations": [
                 "Sitagliptin combined with metformin is noninferior to gliclazide combined with metformin in treatment-naive patients with T2DM with glucotoxicity.",
                 "The sitagliptin group achieved glycemic targets more quickly and had greater weight reductions without increased adverse effects.",
@@ -11,28 +11,28 @@
             ]
         },
         "study_type": {
-            "content": "\"Clinical trial, GWAS, prospective\"",
+            "content": "Clinical trial, GWAS, Prospective",
             "citations": [
                 "This single-center, prospective, randomized, controlled, noninferiority study, which is of utmost importance, included treatment-naive patients with T2DM.",
                 "Whole genome analysis techniques were employed to investigate the impact of genetic polymorphisms on drug efficacy and safety.",
-                "Whole genome sequencing was performed on the extracted DNA samples using the Illumina HiSeq X Ten platform (Illumina, San Diego, CA), achieving a coverage depth of over 1000-fold per sample to ensure high-quality genomic data."
+                "The Manhattan plot shows the overall distribution of single-nucleotide polymorphism associations across the genome, highlighting key loci such as rs2909451, rs4664443, rs163184, and rs2285676, which are strongly associated with differential HbA1c improvements and underscore the genetic influence on therapeutic response (Fig. [6](#F6)6)."
             ]
         },
         "participant_info": {
             "content": [
                 "**Age Range:** 18 to 70 years",
                 "**Gender:** Study group: 47 males, 14 females; Control group: 33 males, 20 females",
-                "**Body Mass Index (BMI):** 18 to 30 kg/m\u00b2",
                 "**Ethnicity:** Not specified",
-                "**Pre-existing Conditions:** Treatment-naive individuals with type 2 diabetes mellitus (T2DM) and glucotoxicity",
-                "**Fasting Plasma Glucose (FPG):** \u2265 200 mg/dL (11.1 mmol/L)",
-                "**Glycated Hemoglobin (HbA1c):** \u2265 9.0%",
-                "**Hepatic and Renal Function:** Normal, with alanine aminotransferase and aspartate aminotransferase levels not exceeding 2.5\u00d7 the upper limit of normal, serum creatinine within normal limits, and urine ketone bodies not exceeding (1+)"
+                "**Body Mass Index (BMI):** Study group: 25.00 \u00b1 2.19 kg/m\u00b2; Control group: 24.51 \u00b1 2.65 kg/m\u00b2",
+                "**Fasting Plasma Glucose (FPG):** Study group: 12.66 \u00b1 1.69 mmol/L; Control group: 12.82 \u00b1 1.78 mmol/L",
+                "**Glycated Hemoglobin (HbA1c):** Study group: 10.36 \u00b1 1.18%; Control group: 10.64 \u00b1 1.27%",
+                "**Pre-existing Conditions:** Newly diagnosed, treatment-naive type 2 diabetes mellitus with glucotoxicity (FPG \u2265 200 mg/dL and HbA1c \u2265 9.0%)",
+                "**Other Characteristics:** Normal hepatic and renal function, no previous use of hypoglycemic medications, and no major systemic diseases."
             ],
             "citations": [
                 "The inclusion criteria are given as follows: demonstrably understand the study objectives with voluntary participation, documented by signed informed consent; newly diagnosed, treatment-naive individuals with T2DM; the age range is between 18 and 70 years, with a body mass index (BMI) ranging from 18 to 30 kg/m\u00b2; normal hepatic and renal function, defined as alanine aminotransferase and aspartate aminotransferase levels not exceeding 2.5\u00d7 the upper limit of normal, serum creatinine within normal limits, and urine ketone bodies not exceeding (1+); fasting plasma glucose (FPG) levels \u2265 200 mg/dL (11.1 mmol/L) and glycated hemoglobin (HbA1c) \u2265 9.0%; and capability to adhere to the prescribed antidiabetic regimen, follow dietary guidelines, and self-monitor fasting and postprandial blood glucose levels.",
-                "## Table 1. Baseline data characteristics of patients in the 2 groups (x\u0304\u00b1s).",
-                "There were no statistically significant differences between the groups in terms of gender, age, height, weight, BMI, FPG, or HbA1c at baseline, indicating homogeneity across the cohorts (P > .05; Table [1]1)."
+                "Exclusion criteria included the following: a diagnosis of type 1 diabetes mellitus; hepatic or renal dysfunction indicated by serum creatinine levels above 1.2\u00d7 the upper limit of normal; previous use of hypoglycemic medications before screening; a history of severe ketosis, ketoacidosis, or hyperosmolar hyperglycemic state; ongoing treatment with corticosteroids, immunosuppressive agents, or cytotoxic drugs or a history of pancreatitis or pancreatic surgery; major systemic diseases such as cardiovascular, respiratory, gastrointestinal, neurological, endocrine, or genitourinary disorders, severe anemia, malignancies, psychiatric disorders, or other conditions likely to interfere with study results; pregnant or breastfeeding women; known allergies to sitagliptin or gliclazide; and poor compliance potential as assessed by the investigator, which may preclude completion of study requirements.",
+                "## Table 1. Baseline data characteristics of patients in the 2 groups (x\u0304\u00b1s)."
             ]
         },
         "study_design": {
@@ -40,53 +40,40 @@
                 "**Study Design:** Single-center, prospective, randomized, controlled, noninferiority trial.",
                 "**Study Population:** Treatment-naive patients with type 2 diabetes mellitus (T2DM) and glucotoxicity.",
                 "**Inclusion Criteria:** Newly diagnosed T2DM, age 18-70 years, BMI 18-30 kg/m\u00b2, FPG \u2265 200 mg/dL, HbA1c \u2265 9.0%.",
-                "**Exclusion Criteria:** Type 1 diabetes, hepatic or renal dysfunction, prior hypoglycemic medication use, major systemic diseases, pregnancy, or poor compliance potential.",
-                "**Sample Size:** 129 participants randomized; 66 in the sitagliptin group and 63 in the gliclazide group.",
-                "**Intervention:** Sitagliptin plus metformin or gliclazide plus metformin for 4 weeks, followed by metformin monotherapy for 8 weeks.",
-                "**Follow-up Protocol:** Regular assessments at baseline, 2, 4, 8, and 12 weeks, with weekly telephone follow-ups for hypoglycemic events and adverse effects.",
+                "**Exclusion Criteria:** Type 1 diabetes, hepatic or renal dysfunction, previous hypoglycemic medication use, major systemic diseases, pregnancy, or poor compliance potential.",
+                "**Sample Size:** 129 participants randomized; 66 in the sitagliptin plus metformin group, 63 in the gliclazide plus metformin group.",
+                "**Intervention:** Sitagliptin (100 mg daily) or gliclazide MR (2 mg daily) combined with metformin (500 mg 3\u00d7 daily) for 4 weeks, followed by metformin monotherapy for 8 weeks.",
+                "**Follow-up Protocol:** Regular follow-up visits at baseline, 2, 4, 8, and 12 weeks, with weekly telephone contact to document hypoglycemic events and adverse effects.",
                 "**Primary Endpoint:** Change in HbA1c from baseline to week 12.",
                 "**Secondary Endpoints:** Changes in FPG, body weight, BMI, and achievement of specific glycemic targets."
             ],
             "citations": [
                 "This single-center, prospective, randomized, controlled, noninferiority study, which is of utmost importance, included treatment-naive patients with T2DM.",
                 "The inclusion criteria are given as follows: demonstrably understand the study objectives with voluntary participation, documented by signed informed consent; newly diagnosed, treatment-naive individuals with T2DM; the age range is between 18 and 70 years, with a body mass index (BMI) ranging from 18 to 30 kg/m\u00b2; normal hepatic and renal function, defined as alanine aminotransferase and aspartate aminotransferase levels not exceeding 2.5\u00d7 the upper limit of normal, serum creatinine within normal limits, and urine ketone bodies not exceeding (1+); fasting plasma glucose (FPG) levels \u2265 200 mg/dL (11.1 mmol/L) and glycated hemoglobin (HbA1c) \u2265 9.0%; and capability to adhere to the prescribed antidiabetic regimen, follow dietary guidelines, and self-monitor fasting and postprandial blood glucose levels.",
-                "In the intervention arm, participants were administered sitagliptin phosphate (100 mg daily, manufactured by Merck) and metformin (500 mg 3\u00d7 daily, orally manufactured by Bristol Myers Squibb) for 4 weeks. Following this phase, participants were transitioned to monotherapy with metformin for an additional 8 weeks. Conversely, the control group received gliclazide MR (2 mg daily, orally, manufactured by Sanofi) combined with metformin (500 mg 3\u00d7 daily, orally, manufactured by Bristol Myers Squibb) for 4 weeks, after which they too were transitioned to monotherapy with metformin for 8 weeks."
+                "Exclusion criteria included the following: a diagnosis of type 1 diabetes mellitus; hepatic or renal dysfunction indicated by serum creatinine levels above 1.2\u00d7 the upper limit of normal; previous use of hypoglycemic medications before screening; a history of severe ketosis, ketoacidosis, or hyperosmolar hyperglycemic state; ongoing treatment with corticosteroids, immunosuppressive agents, or cytotoxic drugs or a history of pancreatitis or pancreatic surgery; major systemic diseases such as cardiovascular, respiratory, gastrointestinal, neurological, endocrine, or genitourinary disorders, severe anemia, malignancies, psychiatric disorders, or other conditions likely to interfere with study results; pregnant or breastfeeding women; known allergies to sitagliptin or gliclazide; and poor compliance potential as assessed by the investigator, which may preclude completion of study requirements."
             ]
         },
         "study_results": {
             "content": [
                 "**Mean glycated hemoglobin reduction:** 4.03% in the sitagliptin group and 4.13% in the gliclazide group, with a mean difference of \u22120.097 (95% CI, \u22120.648 to 0.453), confirming noninferiority.",
                 "**FPG reduction at 4 weeks:** Significant in both groups (*P* < .05).",
-                "**FPG < 6.1 mmol/L achievement rate at 4 weeks:** 26.2% in the sitagliptin group vs 5.7% in the gliclazide group (*P* = .012).",
-                "**Time to achieve normal FPG levels:** 24.56 \u00b1 7.43 days in the sitagliptin group vs 28.30 \u00b1 10.29 days in the gliclazide group (*P* = .027).",
-                "**Weight change after 12 weeks:** \u22120.53 \u00b1 1.85 kg in the sitagliptin group vs 0.49 \u00b1 1.57 kg in the gliclazide group (*P* < .001).",
-                "**BMI change after 12 weeks:** \u22120.18 \u00b1 0.63 kg/m\u00b2 in the sitagliptin group vs 0.18 \u00b1 0.59 kg/m\u00b2 in the gliclazide group (*P* < .001).",
-                "**Hypoglycemia incidence:** 6.6% in the sitagliptin group vs 9.4% in the gliclazide group (*P* = .605).",
-                "**Genetic polymorphisms affecting sitagliptin efficacy:** DPP-4 rs2909451 TT and rs4664443 GG genotypes showed lower efficacy, while GLP1R rs3765467 AG and KCNJ11 rs2285676 CC genotypes responded better to sitagliptin."
+                "**FPG < 6.1 mmol/L at 4 weeks:** 26.2% in the sitagliptin group vs 5.7% in the gliclazide group (*P* = .012).",
+                "**Body weight change:** Sitagliptin group showed greater weight reduction compared to gliclazide group (*P* = .009).",
+                "**\u03b2-cell function and hypoglycemia incidence:** No significant differences between groups (*P* > .05).",
+                "**Genetic polymorphisms:** DPP-4 rs2909451 TT and rs4664443 GG genotypes showed lower efficacy with sitagliptin, while GLP1R rs3765467 AG and KCNJ11 rs2285676 CC genotypes responded better to sitagliptin."
             ],
             "citations": [
                 "After 12 weeks, mean glycated hemoglobin reductions were 4.03% in the sitagliptin group and 4.13% in the gliclazide group, with a mean difference of \u22120.097 (95% confidence interval, \u22120.648 to 0.453), confirming noninferiority.",
-                "At 4 weeks, the proportion of patients achieving FPG < 6.1 mmol/L was significantly higher in the study group (26.2% [16/61]) compared with the control group (5.7% [3/53]; *P* = .012; Fig. [2]2B).",
-                "Genetic analysis showed specific single-nucleotide polymorphisms affected drug efficacy: dipeptidyl peptidase-4 rs2909451 TT and rs4664443 GG genotypes showed lower efficacy with sitagliptin, while GLP1R rs3765467 AG and KCNJ11 rs2285676 CC genotypes responded better to sitagliptin."
+                "Both groups showed significant FPG reductions at 4 weeks (P < .05).",
+                "The sitagliptin group achieved faster glycemic targets, greater FPG and body weight reductions, and higher rates of FPG < 6.1 mmol/L (26.2% vs 5.7%; P = .012)."
             ]
         },
         "allele_frequency": {
-            "content": [
-                "**rs2909451 TT genotype:** Lower efficacy of sitagliptin with a median HbA1c improvement of 0.57 (IQR, 0.18\u20130.85) in the study group.",
-                "**rs4664443 GG genotype:** Lower efficacy of sitagliptin with a median HbA1c improvement of 0.69 (IQR, 0.48\u20130.91) in the study group.",
-                "**rs6923761 AA genotype:** Reduced glycemic response to sitagliptin with a median HbA1c improvement of 0.90 (IQR, 0.61\u20131.01) in the study group.",
-                "**rs3765467 AG genotype:** Favorable response to sitagliptin with a median HbA1c improvement of 1.42 (IQR, 1.22\u20131.68) in the study group.",
-                "**rs163184 GG allele:** Lower responsiveness to sitagliptin with a median HbA1c improvement of 0.81 (IQR, 0.62\u20130.92) in the study group.",
-                "**rs2285676 CC genotype:** More substantial insulin secretion capability with sitagliptin with a median HbA1c improvement of 1.02 (IQR, 0.90\u20131.22) in the study group.",
-                "**rs7754840 CG genotype:** More significant HbA1c reduction in the study group with a median improvement of 1.44 (IQR, 1.38\u20131.72).",
-                "**rs756992 AG genotype:** More significant HbA1c reduction in the study group with a median improvement of 1.43 (IQR, 1.28\u20131.52).",
-                "**rs1799853 TT genotype:** Slower metabolism of gliclazide with a median HbA1c improvement of 0.70 (IQR, 0.69\u20130.72) in the study group.",
-                "**rs1057910 GG genotype:** Slower metabolism of gliclazide with a median HbA1c improvement of 0.93 (IQR, 0.66\u20131.21) in the study group."
-            ],
+            "content": "| Gene   | Polymorphism | Frequency                                                                 |\n|--------|--------------|---------------------------------------------------------------------------|\n| DPP-4  | rs2909451    | The TT genotype showed lower efficacy with sitagliptin.                    |\n| DPP-4  | rs4664443    | The GG genotype showed lower efficacy with sitagliptin.                    |\n| GLP1R  | rs3765467    | The AG genotype responded better to sitagliptin.                           |\n| KCNJ11 | rs2285676    | The CC genotype responded better to sitagliptin.                           |\n| CYP2C9 | rs1799853    | The TT genotype showed slower metabolism of gliclazide.                    |\n| CYP2C9 | rs1057910    | The GG genotype showed slower metabolism of gliclazide.                    |",
             "citations": [
+                "Genetic analysis showed specific single-nucleotide polymorphisms affected drug efficacy: dipeptidyl peptidase-4 rs2909451 TT and rs4664443 GG genotypes showed lower efficacy with sitagliptin, while GLP1R rs3765467 AG and KCNJ11 rs2285676 CC genotypes responded better to sitagliptin.",
                 "The analysis revealed significant effects of DPP-4 gene polymorphisms on the efficacy of sitagliptin.",
-                "Patients with the rs2909451 TT genotype in the study group (treated with sitagliptin) exhibited a median HbA1c improvement of 0.57 (interquartile range [IQR], 0.18\u20130.85), whereas the control group (treated with gliclazide) showed a median improvement of 1.11 (IQR, 0.86\u20131.35; *P*P < .001).",
-                "## Table 6. Comparison of the median (IQR) glycated hemoglobin improvement and P values between the study group and control group across different genotypes."
+                "CYP2C9 gene polymorphisms also significantly influenced treatment efficacy."
             ]
         },
         "additional_resource_links": [
@@ -101,63 +88,60 @@
             {
                 "gene": "DPP-4",
                 "polymorphism": "rs2909451 TT",
-                "relationship_effect": "Patients with the rs2909451 TT genotype showed lower efficacy of sitagliptin (smaller HbA1c improvement) compared to gliclazide.",
+                "relationship_effect": "Patients with the rs2909451 TT genotype showed lower efficacy with sitagliptin (median HbA1c improvement 0.57) compared to gliclazide (median 1.11).",
                 "p_value": "<.001",
                 "citations": [
-                    "The analysis revealed significant effects of DPP-4 gene polymorphisms on the efficacy of sitagliptin.",
-                    "Patients with the rs2909451 TT genotype in the study group (treated with sitagliptin) exhibited a median HbA1c improvement of 0.57 (interquartile range [IQR], 0.18\u20130.85), whereas the control group (treated with gliclazide) showed a median improvement of 1.11 (IQR, 0.86\u20131.35; *P*P < .001).",
-                    "## Table 6. Comparison of the median (IQR) glycated hemoglobin improvement and P values between the study group and control group across different genotypes."
+                    "The analysis revealed significant effects of DPP-4 gene polymorphisms on the efficacy of sitagliptin. Patients with the rs2909451 TT genotype in the study group (treated with sitagliptin) exhibited a median HbA1c improvement of 0.57 (interquartile range [IQR], 0.18\u20130.85), whereas the control group (treated with gliclazide) showed a median improvement of 1.11 (IQR, 0.86\u20131.35; *P*P < .001).",
+                    "## Table 6. Comparison of the median (IQR) glycated hemoglobin improvement and P values between the study group and control group across different genotypes.",
+                    "Genetic polymorphisms, such as DPP-4 rs2909451 TT and rs4664443 GG, significantly influenced the efficacy of sitagliptin, highlighting the importance of personalized medicine."
                 ],
                 "p_value_citations": [
-                    "P < .001"
+                    "Patients with the rs2909451 TT genotype in the study group (treated with sitagliptin) exhibited a median HbA1c improvement of 0.57 (IQR, 0.18\u20130.85), whereas the control group (treated with gliclazide) showed a median improvement of 1.11 (IQR, 0.86\u20131.35; P < .001)."
                 ]
             },
             {
                 "gene": "DPP-4",
                 "polymorphism": "rs4664443 GG",
-                "relationship_effect": "Patients with the rs4664443 GG genotype showed lower efficacy of sitagliptin (smaller HbA1c improvement) compared to gliclazide.",
+                "relationship_effect": "Patients with the rs4664443 GG genotype showed lower efficacy with sitagliptin (median HbA1c improvement 0.69) compared to gliclazide (median 1.25).",
                 "p_value": "<.001",
                 "citations": [
                     "Similarly, for the rs4664443 GG genotype, the median HbA1c improvement in the study group was 0.69 (IQR, 0.48\u20130.91) compared with 1.25 (IQR, 1.00\u20131.46) in the control group (*P*P < .001), indicating lower efficacy of sitagliptin.",
-                    "## Table 6: Comparison of the median (IQR) glycated hemoglobin improvement and P values between the study group and control group across different genotypes.",
                     "Genetic polymorphisms, such as DPP-4 rs2909451 TT and rs4664443 GG, significantly influenced the efficacy of sitagliptin, highlighting the importance of personalized medicine."
                 ],
                 "p_value_citations": [
-                    "P < .001"
+                    "For the rs4664443 GG genotype, the median HbA1c improvement in the study group was 0.69 (IQR, 0.48\u20130.91) compared with 1.25 (IQR, 1.00\u20131.46) in the control group (P < .001), indicating lower efficacy of sitagliptin."
                 ]
             },
             {
                 "gene": "GLP1R",
                 "polymorphism": "rs6923761 AA",
-                "relationship_effect": "Patients with the rs6923761 AA genotype showed reduced glycemic response to sitagliptin compared to gliclazide.",
+                "relationship_effect": "Patients with the rs6923761 AA genotype showed reduced glycemic response to sitagliptin (median HbA1c improvement 0.90) compared to gliclazide (median 1.41).",
                 "p_value": ".010",
                 "citations": [
                     "Regarding GLP1R gene polymorphisms, patients with the rs6923761 AA homozygous genotype in the study group had a median HbA1c improvement of 0.90 (IQR, 0.61\u20131.01), while the control group showed 1.41 (IQR, 1.12\u20131.45; *P*P = .010), suggesting reduced glycemic response to sitagliptin.",
-                    "## Table 6: Comparison of the median (IQR) glycated hemoglobin improvement and P values between the study group and control group across different genotypes.",
-                    "Genetic analysis showed specific single-nucleotide polymorphisms affected drug efficacy: dipeptidyl peptidase-4 rs2909451 TT and rs4664443 GG genotypes showed lower efficacy with sitagliptin, while GLP1R rs3765467 AG and KCNJ11 rs2285676 CC genotypes responded better to sitagliptin."
+                    "These findings provide new insights into optimizing treatment strategies and support the integration of genetic information into clinical decision-making to develop personalized therapeutic approaches."
                 ],
                 "p_value_citations": [
-                    "P = .010"
+                    "Regarding GLP1R gene polymorphisms, patients with the rs6923761 AA homozygous genotype in the study group had a median HbA1c improvement of 0.90 (IQR, 0.61\u20131.01), while the control group showed 1.41 (IQR, 1.12\u20131.45; P = .010), suggesting reduced glycemic response to sitagliptin."
                 ]
             },
             {
                 "gene": "GLP1R",
                 "polymorphism": "rs3765467 AG",
-                "relationship_effect": "Patients with the rs3765467 AG genotype responded better to sitagliptin (greater HbA1c improvement) than gliclazide.",
+                "relationship_effect": "Patients with the rs3765467 AG genotype responded better to sitagliptin (median HbA1c improvement 1.42) than gliclazide (median 1.08).",
                 "p_value": ".023",
                 "citations": [
                     "Conversely, patients with the rs3765467 AG genotype in the study group demonstrated a median HbA1c improvement of 1.42 (IQR, 1.22\u20131.68) compared with 1.08 (IQR, 0.97\u20131.15) in the control group (*P*P = .023), indicating favorable responses to both treatments.",
-                    "Genetic analysis showed specific single-nucleotide polymorphisms affected drug efficacy: dipeptidyl peptidase-4 rs2909451 TT and rs4664443 GG genotypes showed lower efficacy with sitagliptin, while GLP1R rs3765467 AG and KCNJ11 rs2285676 CC genotypes responded better to sitagliptin.",
-                    "## Table 6: Comparison of the median (IQR) glycated hemoglobin improvement and P values between the study group and control group across different genotypes."
+                    "| rs3765467 AG | 1.42 (1.22\u20131.68) | 1.08 (0.97\u20131.15) | .023 |"
                 ],
                 "p_value_citations": [
-                    "P = .023"
+                    "Patients with the rs3765467 AG genotype in the study group demonstrated a median HbA1c improvement of 1.42 (IQR, 1.22\u20131.68) compared with 1.08 (IQR, 0.97\u20131.15) in the control group (P = .023), indicating favorable responses to both treatments."
                 ]
             },
             {
                 "gene": "KCNQ1",
                 "polymorphism": "rs163184 GG",
-                "relationship_effect": "Patients with the rs163184 GG allele had lower responsiveness to sitagliptin and better response to gliclazide.",
+                "relationship_effect": "Patients with the rs163184 GG allele showed lower responsiveness to sitagliptin (median HbA1c improvement 0.81) and better response to gliclazide (median 1.16).",
                 "p_value": "<.001",
                 "citations": [
                     "KCNQ1 gene polymorphisms also significantly affected treatment outcomes. Patients with the rs163184 GG allele in the study group had a median HbA1c improvement of 0.81 (IQR, 0.62\u20130.92) compared with 1.16 (IQR, 0.91\u20131.32) in the control group (*P*P < .001), suggesting lower responsiveness to sitagliptin and better response to gliclazide.",
@@ -165,41 +149,41 @@
                     "The Manhattan plot shows the overall distribution of single-nucleotide polymorphism associations across the genome, highlighting key loci such as rs2909451, rs4664443, rs163184, and rs2285676, which are strongly associated with differential HbA1c improvements and underscore the genetic influence on therapeutic response (Fig. [6](#F6)6)."
                 ],
                 "p_value_citations": [
-                    "P < .001"
+                    "Patients with the rs163184 GG allele in the study group had a median HbA1c improvement of 0.81 (IQR, 0.62\u20130.92) compared with 1.16 (IQR, 0.91\u20131.32) in the control group (P < .001), suggesting lower responsiveness to sitagliptin and better response to gliclazide."
                 ]
             },
             {
                 "gene": "KCNJ11",
                 "polymorphism": "rs2285676 CC",
-                "relationship_effect": "Patients with the rs2285676 CC genotype had more substantial insulin secretion capability and greater HbA1c improvement with sitagliptin than gliclazide.",
+                "relationship_effect": "Patients with the rs2285676 CC genotype responded better to sitagliptin (median HbA1c improvement 1.02) than gliclazide (median 1.31), indicating more substantial insulin secretion capability with sitagliptin.",
                 "p_value": "<.001",
                 "citations": [
                     "For KCNJ11 gene polymorphisms, patients with the rs2285676 CC genotype in the study group had a median HbA1c improvement of 1.02 (IQR, 0.90\u20131.22), while the control group showed 1.31 (IQR, 1.08\u20131.42; *P*P < .001), indicating more substantial insulin secretion capability with sitagliptin.",
-                    "Genetic analysis showed specific single-nucleotide polymorphisms affected drug efficacy: dipeptidyl peptidase-4 rs2909451 TT and rs4664443 GG genotypes showed lower efficacy with sitagliptin, while GLP1R rs3765467 AG and KCNJ11 rs2285676 CC genotypes responded better to sitagliptin.",
-                    "## Table 6: Comparison of the median (IQR) glycated hemoglobin improvement and P values between the study group and control group across different genotypes."
+                    "## Table 6. Comparison of the median (IQR) glycated hemoglobin improvement and P values between the study group and control group across different genotypes.",
+                    "Genetic analysis showed specific single-nucleotide polymorphisms affected drug efficacy: dipeptidyl peptidase-4 rs2909451 TT and rs4664443 GG genotypes showed lower efficacy with sitagliptin, while GLP1R rs3765467 AG and KCNJ11 rs2285676 CC genotypes responded better to sitagliptin."
                 ],
                 "p_value_citations": [
-                    "P < .001"
+                    "For KCNJ11 gene polymorphisms, patients with the rs2285676 CC genotype in the study group had a median HbA1c improvement of 1.02 (IQR, 0.90\u20131.22), while the control group showed 1.31 (IQR, 1.08\u20131.42; P < .001), indicating more substantial insulin secretion capability with sitagliptin."
                 ]
             },
             {
                 "gene": "CDKAL1",
                 "polymorphism": "rs7754840 CG",
-                "relationship_effect": "Patients with the rs7754840 CG genotype had a trend toward greater HbA1c reduction with sitagliptin than gliclazide.",
+                "relationship_effect": "Patients with the rs7754840 CG genotype showed a trend toward greater HbA1c reduction with sitagliptin (median 1.44) than gliclazide (median 1.09).",
                 "p_value": ".053",
                 "citations": [
                     "CDKAL1 gene variants, specifically rs7754840 CG and rs756992 AG, were associated with more significant HbA1c reductions in the study group.",
                     "Patients with the rs7754840 CG genotype showed a median improvement of 1.44 (IQR, 1.38\u20131.72) in the study group compared with 1.09 (IQR, 0.79\u20131.17) in the control group (*P*P = .053).",
-                    "## Table 6: Comparison of the median (IQR) glycated hemoglobin improvement and P values between the study group and control group across different genotypes."
+                    "Comparison of the median (IQR) glycated hemoglobin improvement and P values between the study group and control group across different genotypes."
                 ],
                 "p_value_citations": [
-                    "P = .053"
+                    "Patients with the rs7754840 CG genotype showed a median improvement of 1.44 (IQR, 1.38\u20131.72) in the study group compared with 1.09 (IQR, 0.79\u20131.17) in the control group (P = .053)."
                 ]
             },
             {
                 "gene": "CDKAL1",
                 "polymorphism": "rs756992 AG",
-                "relationship_effect": "Patients with the rs756992 AG genotype had a trend toward greater HbA1c reduction with sitagliptin than gliclazide.",
+                "relationship_effect": "Patients with the rs756992 AG genotype showed a trend toward greater HbA1c reduction with sitagliptin (median 1.43) than gliclazide (median 1.10).",
                 "p_value": ".081",
                 "citations": [
                     "CDKAL1 gene variants, specifically rs7754840 CG and rs756992 AG, were associated with more significant HbA1c reductions in the study group.",
@@ -207,35 +191,35 @@
                     "Comparison of the median (IQR) glycated hemoglobin improvement and P values between the study group and control group across different genotypes."
                 ],
                 "p_value_citations": [
-                    "P = .081"
+                    "Patients with the rs756992 AG genotype exhibited a median improvement of 1.43 (IQR, 1.28\u20131.52) in the study group compared with 1.10 (IQR, 0.87\u20131.18) in the control group (P = .081)."
                 ]
             },
             {
                 "gene": "CYP2C9",
                 "polymorphism": "rs1799853 TT",
-                "relationship_effect": "Patients with the rs1799853 TT genotype had lower HbA1c improvement with sitagliptin than gliclazide, suggesting slower metabolism of gliclazide impacts efficacy.",
+                "relationship_effect": "Patients with the rs1799853 TT genotype had lower efficacy with sitagliptin (median HbA1c improvement 0.70) compared to gliclazide (median 1.07), suggesting slower metabolism of gliclazide impacts efficacy and adverse event rates.",
                 "p_value": "<.001",
                 "citations": [
-                    "Patients with the rs1799853 TT genotype in the study group had a median HbA1c improvement of 0.70 (IQR, 0.69\u20130.72), while the control group showed 1.07 (IQR, 0.82\u20131.42; *P*P < .001).",
+                    "CYP2C9 gene polymorphisms also significantly influenced treatment efficacy. Patients with the rs1799853 TT genotype in the study group had a median HbA1c improvement of 0.70 (IQR, 0.69\u20130.72), while the control group showed 1.07 (IQR, 0.82\u20131.42; *P*P < .001).",
                     "These findings suggest that rs1799853 and rs1057910 variants lead to slower metabolism of gliclazide, thereby impacting drug efficacy and adverse event rates (Table [6](#T6)6; Fig. [5](#F5)5).",
-                    "## Table 6: Comparison of the median (IQR) glycated hemoglobin improvement and P values between the study group and control group across different genotypes."
+                    "### Table 6. Comparison of the median (IQR) glycated hemoglobin improvement and P values between the study group and control group across different genotypes."
                 ],
                 "p_value_citations": [
-                    "P < .001"
+                    "Patients with the rs1799853 TT genotype in the study group had a median HbA1c improvement of 0.70 (IQR, 0.69\u20130.72), while the control group showed 1.07 (IQR, 0.82\u20131.42; P < .001)."
                 ]
             },
             {
                 "gene": "CYP2C9",
                 "polymorphism": "rs1057910 GG",
-                "relationship_effect": "Patients with the rs1057910 GG genotype had similar HbA1c improvement with sitagliptin and gliclazide; this variant leads to slower metabolism of gliclazide, impacting efficacy and adverse event rates.",
+                "relationship_effect": "Patients with the rs1057910 GG genotype had lower efficacy with sitagliptin (median HbA1c improvement 0.93) compared to gliclazide (median 1.20), but the difference was not statistically significant.",
                 "p_value": ".464",
                 "citations": [
+                    "CYP2C9 gene polymorphisms also significantly influenced treatment efficacy.",
                     "For the rs1057910 GG genotype, the study group exhibited a median improvement of 0.93 (IQR, 0.66\u20131.21) compared with 1.20 (IQR, 0.89\u20131.30) in the control group (*P*P = .464).",
-                    "These findings suggest that rs1799853 and rs1057910 variants lead to slower metabolism of gliclazide, thereby impacting drug efficacy and adverse event rates (Table [6](#T6)6; Fig. [5](#F5)5).",
-                    "## Table 6: Comparison of the median (IQR) glycated hemoglobin improvement and P values between the study group and control group across different genotypes."
+                    "Comparison of the median (IQR) glycated hemoglobin improvement and P values between the study group and control group across different genotypes."
                 ],
                 "p_value_citations": [
-                    "P = .464"
+                    "For the rs1057910 GG genotype, the study group exhibited a median improvement of 0.93 (IQR, 0.66\u20131.21) compared with 1.20 (IQR, 0.89\u20131.30) in the control group (P = .464)."
                 ]
             }
         ]
diff --git a/data/annotations/PMC4737107.json b/data/annotations/PMC4737107.json
index 5bac275..b7bbd0b 100644
--- a/data/annotations/PMC4737107.json
+++ b/data/annotations/PMC4737107.json
@@ -3,32 +3,30 @@
     "title": "Thiopurine dose intensity and treatment outcome in childhood lymphoblastic leukaemia: the influence of thiopurine methyltransferase pharmacogenetics",
     "study_parameters": {
         "summary": {
-            "content": "The study aimed to investigate the impact of thiopurine methyltransferase (TPMT) genotype on thiopurine dose intensity, myelosuppression, and treatment outcomes in childhood acute lymphoblastic leukemia (ALL) within the UK ALL97 trial. The trial compared the effects of mercaptopurine versus thioguanine and prednisone versus dexamethasone in children aged 1 to 18 years, with TPMT genotyping performed to assess its influence on treatment. Results indicated that TPMT*1/*3A heterozygotes had better event-free survival (EFS) compared to TPMT wild-type patients, and while TPMT heterozygotes experienced more cytopenias, these did not negatively impact treatment outcomes.",
+            "content": "The study aimed to investigate the impact of thiopurine methyltransferase (TPMT) genotype on thiopurine dose intensity, myelosuppression, and treatment outcomes in childhood acute lymphoblastic leukemia (ALL) within the UK ALL97 trial. The trial compared the effects of different TPMT genotypes on treatment outcomes, finding that TPMT heterozygotes, particularly those with the TPMT*1/*3A variant, had better event-free survival (EFS) compared to wild-type patients, despite experiencing more frequent cytopenias and requiring dose adjustments. The study concluded that thiopurine-induced cytopenias did not negatively affect treatment outcomes, and TPMT*1/*3A heterozygotes had a better EFS than TPMT wild-type patients.",
             "citations": [
-                "EFS differed significantly by *TPMT*TPMT genotype (Fig [2](#bjh13240-fig-0002)2).",
-                "*TPMT*TPMT heterozygotes tolerated a significantly lower average daily thiopurine dose than the *TPMT*TPMT wild\u2010type patients and experienced more cytopenias.",
-                "Within the UK ALL trials, thiopurine\u2010induced cytopenias did not have a detrimental effect on EFS."
+                "TPMT heterozygotes had significantly more frequent cytopenias and therefore required dose adjustments below target levels significantly more often than TPMT wild\u2010type patients although the average dose range was similar for both genotypes.",
+                "Event\u2010free survival (EFS) for patients heterozygous for the more common TPMT*1/*3A variant allele (n = 99, 5\u2010year EFS 88%) was better than for both wild\u2010type TPMT*1/*1 (n = 1206, EFS 80%, P = 0\u00b705) and TPMT*1/*3C patients (n = 17, EFS 53%, P = 0\u00b7002); outcomes supported by a multivariate Cox regression analysis.",
+                "In conclusion, TPMT*1/*3A heterozygotes had a better EFS than TPMT wild\u2010type patients. Thiopurine induced cytopenias were not detrimental to treatment outcome."
             ]
         },
         "study_type": {
             "content": "Clinical trial, cohort, retrospective",
             "citations": [
                 "ALL97 [International Standard Randomized Controlled Trial Number (ISRCTN) registration number ISRCTN26727615] was a randomized comparison of dexamethasone versus prednisone and mercaptopurine versus thioguanine in patients aged 1 to 18 years.",
-                "The patient cohort has been previously described (Vora *et al*et al, [2006](#bjh13240-bib-0042)2006).",
-                "Dose intensity data was available for 818 of the 1334 (61%) children with *TPMT*TPMT genotypes. There was a significant heterogeneity in dose tolerance by genotype (Table [4](#bjh13240-tbl-0004)4)."
+                "The impact of thiopurine methyltransferase (TPMT) genotype on thiopurine dose intensity, myelosuppression and treatment outcome was investigated in the United Kingdom childhood acute lymphoblastic leukaemia (ALL) trial ALL97.",
+                "The patient cohort has been previously described (Vora *et al*et al, [2006](#bjh13240-bib-0042)2006)."
             ]
         },
         "participant_info": {
             "content": [
                 "**Age:** Participants were aged 1 to 18 years.",
-                "**Gender:** The study included both male and female participants, with no significant gender differences in thiopurine data availability.",
+                "**Gender:** The study included both male and female participants, with specific gender-related analyses indicating differences in myelosuppression and dose tolerance.",
                 "**Ethnicity:** Of the 1334 patients with TPMT genotype data, 1160 were white, and 174 belonged to other ethnic groups (71 Asian, 44 mixed race, 20 black, 6 Oriental, and 33 unknown or non-Caucasian).",
                 "**Pre-existing Conditions:** None of the TPMT*1/*3C patients had CNS disease at diagnosis or Down syndrome; two had T-cell immunophenotype.",
                 "**Study Group Breakdown:**",
-                "**TPMT*1/*1:** 1206 patients were homozygous wild-type.",
-                "**TPMT*1/*3A:** 99 patients were heterozygous for the TPMT*1/*3A variant allele.",
-                "**TPMT*1/*3C:** 17 patients were heterozygous for the TPMT*1/*3C variant allele.",
-                "**Other TPMT Variants:** 12 patients had other low activity variant alleles."
+                "**TPMT Genotype:** 1206 patients were homozygous wild-type TPMT*1/*1, and 128 had low activity variant alleles (99 TPMT*1/*3A, 17 TPMT*1/*3C, 4 TPMT*1/*2, and others with rare alleles).",
+                "**Trial Phase:** Participants were part of the ALL97 and ALL97/99 trial phases, with differences in treatment protocols and maintenance duration."
             ],
             "citations": [
                 "ALL97 [International Standard Randomized Controlled Trial Number (ISRCTN) registration number ISRCTN26727615] was a randomized comparison of dexamethasone versus prednisone and mercaptopurine versus thioguanine in patients aged 1 to 18 years.",
@@ -38,19 +36,22 @@
         },
         "study_design": {
             "content": [
-                "**Study Design:** Randomized controlled trial (RCT) with an add-on thiopurine biological study.",
-                "**Study Population:** Children aged 1 to 18 years with acute lymphoblastic leukemia (ALL) in the United Kingdom.",
-                "**Sample Size:** 1,948 patients were enrolled in the ALL97 trial, with TPMT genotype data available for 1,334 patients.",
-                "**Trial Phases:** ALL97 and its modification ALL97/99, with the latter extending maintenance therapy to 3 years for boys.",
-                "**Randomization:** Patients were randomized to receive either dexamethasone or prednisone and either mercaptopurine or thioguanine.",
-                "**Ethics Approval:** Local ethics committee approval and informed consent were obtained from patients and/or parents.",
-                "**Data Collection:** Weekly drug dosage and cell counts were recorded, and TPMT genotype and thiopurine metabolite concentrations were measured.",
-                "**Exclusion Criteria:** High-risk patients and those who relapsed or died during the first year of chemotherapy were excluded from thiopurine dosage analysis."
+                "**Study Design**: Randomized controlled trial (ALL97) with an add-on thiopurine biological study.",
+                "**Study Population**: Children aged 1 to 18 years with acute lymphoblastic leukemia (ALL) in the United Kingdom.",
+                "**Sample Size**: 1334 patients with TPMT genotype data; 1948 total trial participants.",
+                "**Trial Phases**: ALL97 and ALL97/99, with modifications in November 1999.",
+                "**Randomization**: Comparison of dexamethasone versus prednisone and mercaptopurine versus thioguanine.",
+                "**Chemotherapy Regimen**: Daily oral thiopurine, weekly methotrexate, monthly intravenous vincristine, and 5 days of randomized steroid.",
+                "**Maintenance Duration**: 2 years for ALL97; increased to 3 years for boys in ALL97/99.",
+                "**Dose Titration**: Based on cell counts, with adjustments every 4 weeks in ALL97 and every 12 weeks in ALL97/99.",
+                "**Exclusion Criteria**: High-risk patients and those who relapsed or died during the first year of chemotherapy.",
+                "**Data Collection**: Weekly drug dosage and cell counts recorded, with databases capturing thiopurine dosage and cell count information.",
+                "**TPMT Genotyping**: Conducted using blood samples, with TPMT activity measured and reported to clinicians."
             ],
             "citations": [
                 "ALL97 [International Standard Randomized Controlled Trial Number (ISRCTN) registration number ISRCTN26727615] was a randomized comparison of dexamethasone versus prednisone and mercaptopurine versus thioguanine in patients aged 1 to 18 years.",
                 "The trial had an add\u2010on thiopurine biological study.",
-                "High\u2010risk patients (ALL97 protocol HR1 and ALL97/99 regimen C, which contained additional multi\u2010drug chemotherapy during the first year) were excluded from the thiopurine dosage analysis, as were children who relapsed or died during the first year of chemotherapy."
+                "*TPMT*TPMT genotype was available for 1334 patients (69% of patients entered onto ALL97); 1160 were white and 174 belonged to other ethnic groups (71 Asian (Indian sub\u2010continent), 44 mixed race, 20 black, 6 Oriental and 33 unknown or non\u2010Caucasian)."
             ]
         },
         "study_results": {
@@ -62,7 +63,7 @@
                 "**Thiopurine-induced cytopenias**: Not detrimental to treatment outcome.",
                 "**Odds ratio for non-compliance with thioguanine**: 2.58 (95% CI: 1.11\u20135.7, P = 0.04).",
                 "**Hazard ratio for TPMT*1/*3C vs. TPMT*1/*3A**: 4.5 (95% CI: 1.7\u201311.8, P = 0.003).",
-                "**Hazard ratio for TPMT*1/*3C vs. TPMT*1/*1 and other variants**: 3.2 (95% CI: 1.5\u20136.8, P = 0.003)."
+                "**Hazard ratio for TPMT*1/*3C vs. TPMT*1/*1 and other heterozygous genotypes**: 3.2 (95% CI: 1.5\u20136.8, P = 0.003)."
             ],
             "citations": [
                 "TPMT heterozygotes had significantly more frequent cytopenias and therefore required dose adjustments below target levels significantly more often than TPMT wild\u2010type patients although the average dose range was similar for both genotypes.",
@@ -71,110 +72,102 @@
             ]
         },
         "allele_frequency": {
-            "content": [
-                "**TPMT*1/*1**: 1206 patients were homozygous wild-type.",
-                "**TPMT*1/*3A**: 99 patients were heterozygous for this variant allele.",
-                "**TPMT*1/*3C**: 17 patients were heterozygous for this variant allele.",
-                "**TPMT*1/*2**: 4 patients were heterozygous for this variant allele.",
-                "**Rare alleles**: Two children had the rare alleles *TPMT*1/*9 and *TPMT*1/*21.",
-                "**Novel alleles**: Three children had novel alleles *TPMT*1/*32, *TPMT*1/*33, and *TPMT*1/*34.",
-                "**Compound heterozygote**: One child was *TPMT*2/*3A.",
-                "**Homozygous mutants**: One child was *TPMT*3A/*3A and one was *TPMT*3C/*3C."
-            ],
+            "content": "| Gene | Polymorphism | Frequency |\n|------|--------------|-----------|\n| TPMT | *1/*1 | 1206 out of 1334 patients were homozygous wild-type, which is approximately 90% of the studied cohort. |\n| TPMT | *1/*3A | 99 out of 1334 patients were heterozygous for the *1/*3A variant, which is approximately 7.4% of the studied cohort. |\n| TPMT | *1/*3C | 17 out of 1334 patients were heterozygous for the *1/*3C variant, which is approximately 1.3% of the studied cohort. |\n| TPMT | *1/*2 | 4 out of 1334 patients were heterozygous for the *1/*2 variant, which is approximately 0.3% of the studied cohort. |\n| TPMT | *1/*9 | 2 out of 1334 patients were heterozygous for the *1/*9 variant, which is approximately 0.15% of the studied cohort. |\n| TPMT | *1/*21 | 2 out of 1334 patients were heterozygous for the *1/*21 variant, which is approximately 0.15% of the studied cohort. |\n| TPMT | *1/*32 | 1 out of 1334 patients was heterozygous for the *1/*32 variant, which is approximately 0.075% of the studied cohort. |\n| TPMT | *1/*33 | 1 out of 1334 patients was heterozygous for the *1/*33 variant, which is approximately 0.075% of the studied cohort. |\n| TPMT | *1/*34 | 1 out of 1334 patients was heterozygous for the *1/*34 variant, which is approximately 0.075% of the studied cohort. |\n| TPMT | *2/*3A | 1 out of 1334 patients was a compound heterozygote for the *2/*3A variant, which is approximately 0.075% of the studied cohort. |\n| TPMT | *3A/*3A | 1 out of 1334 patients was homozygous for the *3A variant, which is approximately 0.075% of the studied cohort. |\n| TPMT | *3C/*3C | 1 out of 1334 patients was homozygous for the *3C variant, which is approximately 0.075% of the studied cohort. |",
             "citations": [
-                "*TPMT*TPMT genotype was available for 1334 patients (69% of patients entered onto ALL97); 1160 were white and 174 belonged to other ethnic groups (71 Asian (Indian sub\u2010continent), 44 mixed race, 20 black, 6 Oriental and 33 unknown or non\u2010Caucasian).",
-                "1206 patients were homozygous wild\u2010type *TPMT*1/*1*TPMT*1/*1 and 128 patients had low activity variant alleles (99 *TPMT*1/*3A*TPMT*1/*3A; 17 *TPMT*1/*3C*TPMT*1/*3C; 4 *TPMT*1/*2*TPMT*1/*2; two children with the rare alleles *TPMT*1/*9*TPMT*1/*9,* TPMT*1/*21* TPMT*1/*21; three children with novel alleles *TPMT*1/*32, TPMT*1/*33, TPMT*1/*34*TPMT*1/*32, TPMT*1/*33, TPMT*1/*34; one compound heterozygote *TPMT*2/*3A*TPMT*2/*3A; one homozygous *TPMT*3A/*3A*TPMT*3A/*3A and one *TPMT*3C/*3C*TPMT*3C/*3C).",
-                "The *TPMT*3B*TPMT*3B allele was not detected."
+                "*TPMT*TPMT genotype was available for 1334 patients (69% of patients entered onto ALL97); 1160 were white and 174 belonged to other ethnic groups (71 Asian (Indian sub\u2010continent), 44 mixed race, 20 black, 6 Oriental and 33 unknown or non\u2010Caucasian). 1206 patients were homozygous wild\u2010type *TPMT*1/*1*TPMT*1/*1 and 128 patients had low activity variant alleles (99 *TPMT*1/*3A*TPMT*1/*3A; 17 *TPMT*1/*3C*TPMT*1/*3C; 4 *TPMT*1/*2*TPMT*1/*2; two children with the rare alleles *TPMT*1/*9*TPMT*1/*9,* TPMT*1/*21* TPMT*1/*21; three children with novel alleles *TPMT*1/*32, TPMT*1/*33, TPMT*1/*34*TPMT*1/*32, TPMT*1/*33, TPMT*1/*34; one compound heterozygote *TPMT*2/*3A*TPMT*2/*3A; one homozygous *TPMT*3A/*3A*TPMT*3A/*3A and one *TPMT*3C/*3C*TPMT*3C/*3C).",
+                "1206 patients were homozygous wild\u2010type TPMT*1/*1 (90%), 99 were heterozygous for TPMT*1/*3A (7.4%), 17 for TPMT*1/*3C (1.3%), 4 for TPMT*1/*2 (0.3%), two children with the rare alleles TPMT*1/*9, TPMT*1/*21 (0.15% each), three children with novel alleles TPMT*1/*32, TPMT*1/*33, TPMT*1/*34 (0.075% each), one compound heterozygote TPMT*2/*3A (0.075%), one homozygous TPMT*3A/*3A (0.075%) and one TPMT*3C/*3C (0.075%)."
             ]
         },
         "additional_resource_links": [
             "The study provides the following additional resources or links related to its design and execution:",
             "- [DOI link to the article](https://doi.org/10.1111/bjh.13240)",
             "- [PubMed Central (PMC) link to the article](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4737107/)",
-            "- [PDF version of the article](https://pmc.ncbi.nlm.nih.gov/articles/PMC4737107/pdf/BJH-169-228.pdf)"
+            "- [PDF of the article](https://pmc.ncbi.nlm.nih.gov/articles/PMC4737107/pdf/BJH-169-228.pdf)",
+            "These resources provide access to the full text of the study and its supplementary materials."
         ]
     },
     "annotations": {
         "relationships": [
             {
                 "gene": "TPMT",
-                "polymorphism": "TPMT*1/*3A (heterozygote)",
-                "relationship_effect": "TPMT*1/*3A heterozygotes had significantly better 5-year event-free survival (EFS) than both TPMT wild-type (TPMT*1/*1) and TPMT*1/*3C patients. EFS for TPMT*1/*3A was 88%, compared to 80% for TPMT*1/*1 and 53% for TPMT*1/*3C.",
-                "p_value": "TPMT*1/*3A vs TPMT*1/*1: P = 0.05; TPMT*1/*3A vs TPMT*1/*3C: P = 0.002",
+                "polymorphism": "*1/*3A",
+                "relationship_effect": "TPMT*1/*3A heterozygotes had significantly more frequent cytopenias and required dose adjustments below target levels more often than TPMT wild-type patients, although the average dose range was similar for both genotypes.",
+                "p_value": "Not specified for this comparison in the abstract, but see Table 4 for dose (P=0.0009 for average dose wild-type vs all heterozygotes)",
                 "citations": [
-                    "Event\u2010free survival (EFS) for patients heterozygous for the more common TPMT*1/*3A variant allele (n = 99, 5\u2010year EFS 88%) was better than for both wild\u2010type TPMT*1/*1 (n = 1206, EFS 80%, P = 0\u00b705) and TPMT*1/*3C patients (n = 17, EFS 53%, P = 0\u00b7002); outcomes supported by a multivariate Cox regression analysis.",
-                    "*TPMT*1/*3A*TPMT*1/*3A patients (EFS 88%, 95%CI 81\u201394%) fared significantly better than *TPMT*1/*1*TPMT*1/*1 (EFS 80%, 95%CI 78\u201382%) patients (Fig [2](#bjh13240-fig-0002)2).",
-                    "In conclusion, TPMT*1/*3A heterozygotes had a better EFS than TPMT wild\u2010type patients."
+                    "The TPMT heterozygotes tolerated a significantly lower average daily thiopurine dose than the TPMT wild\u2010type patients and experienced more cytopenias.",
+                    "Comparing the TPMT wild\u2010type (TPMT*1/*1) patients with the TPMT wild\u2010type/variant allele heterozygotes, the former had a higher average dose than the latter (78% versus 70% respectively, P < 0\u00b70002) and experienced less time with the dose withdrawn due to cytopenias (15\u00b75% vs. 20\u00b78% of time respectively, P < 0\u00b7001).",
+                    "## Table 4. Thiopurine dosage and myelosuppression by TPMT genotype"
                 ],
                 "p_value_citations": [
-                    "Event\u2010free survival (EFS) for patients heterozygous for the more common TPMT*1/*3A variant allele (n = 99, 5\u2010year EFS 88%) was better than for both wild\u2010type TPMT*1/*1 (n = 1206, EFS 80%, P = 0\u00b705) and TPMT*1/*3C patients (n = 17, EFS 53%, P = 0\u00b7002); outcomes supported by a multivariate Cox regression analysis."
+                    "Comparing the TPMT wild\u2010type (TPMT*1/*1) patients with the TPMT wild\u2010type/variant allele heterozygotes, the former had a higher average dose than the latter (78% versus 70% respectively, P < 0\u00b70002) and experienced less time with the dose withdrawn due to cytopenias (15\u00b75% vs. 20\u00b78% of time respectively, P < 0\u00b7001)."
                 ]
             },
             {
                 "gene": "TPMT",
-                "polymorphism": "TPMT*1/*3C (heterozygote)",
-                "relationship_effect": "TPMT*1/*3C heterozygotes had significantly worse 5-year event-free survival (EFS) compared to both TPMT*1/*3A and TPMT*1/*1 patients. EFS for TPMT*1/*3C was 53%.",
-                "p_value": "TPMT*1/*3C vs TPMT*1/*3A: P = 0.002; TPMT*1/*3C vs TPMT*1/*1: P = 0.03",
+                "polymorphism": "*1/*3A",
+                "relationship_effect": "Event-free survival (EFS) for patients heterozygous for TPMT*1/*3A was better than for both wild-type TPMT*1/*1 and TPMT*1/*3C patients.",
+                "p_value": "5-year EFS 88% (TPMT*1/*3A) vs 80% (TPMT*1/*1), P = 0.05; vs 53% (TPMT*1/*3C), P = 0.002",
                 "citations": [
-                    "Heterozygous TPMT*1/*3C patients fared worse (5\u2010year EFS, 53%, 95%CI 29\u201377%) compared to the other heterozygous TPMT patients (TPMT*1/*3A, *1/*2, *1/*21, *1/*9, *1/*32, *1/*33, *1/*34; EFS 89%, 95%CI 83\u201395%, P = 0\u00b7002) and homozygous TPMT*1/*1 patients (EFS 80%%, 95%CI 78\u201382%, P = 0\u00b703).",
-                    "In a multivariate Cox regression analysis, the worse survival for TPMT*1/*3C against TPMT*1/*3A (TPMT*1/*3C hazard ratio = 4\u00b75, 95% CI 1\u00b77\u201311\u00b78, P = 0\u00b7003) and for TPMT*1/*3C against TPMT*1/*1 and all other TPMT variant heterozygous genotypes (Table 5) retained significance in models for overall EFS that also included the covariates trial, age group, WBC at diagnosis and steroid randomization.",
-                    "Survival was inexplicably worse for patients with TPMT*1/*3C than for TPMT*1/*3A patients."
+                    "Event\u2010free survival (EFS) for patients heterozygous for the more common TPMT*1/*3A variant allele (n = 99, 5\u2010year EFS 88%) was better than for both wild\u2010type TPMT*1/*1 (n = 1206, EFS 80%, P = 0\u00b705) and TPMT*1/*3C patients (n = 17, EFS 53%, P = 0\u00b7002); outcomes supported by a multivariate Cox regression analysis.",
+                    "TPMT*1/*3A patients (EFS 88%, 95%CI 81\u201394%) fared significantly better than TPMT*1/*1 (EFS 80%, 95%CI 78\u201382%) patients (Fig [2](#bjh13240-fig-0002)2).",
+                    "In a multivariate Cox regression analysis, the worse survival for TPMT*1/*3C against TPMT*1/*3A (TPMT*1/*3C hazard ratio = 4\u00b75, 95% CI 1\u00b77\u201311\u00b78, P = 0\u00b7003) and for TPMT*1/*3C against TPMT*1/*1 and all other TPMT variant heterozygous genotypes (Table [5](#bjh13240-tbl-0005)5) retained significance in models for overall EFS that also included the covariates trial, age group, WBC at diagnosis and steroid randomization."
                 ],
                 "p_value_citations": [
-                    "Heterozygous TPMT*1/*3C patients fared worse (5\u2010year EFS, 53%, 95%CI 29\u201377%) compared to the other heterozygous TPMT patients (TPMT*1/*3A, *1/*2, *1/*21, *1/*9, *1/*32, *1/*33, *1/*34; EFS 89%, 95%CI 83\u201395%, P = 0\u00b7002) and homozygous TPMT*1/*1 patients (EFS 80%%, 95%CI 78\u201382%, P = 0\u00b703)."
+                    "Event\u2010free survival (EFS) for patients heterozygous for the more common TPMT*1/*3A variant allele (n = 99, 5\u2010year EFS 88%) was better than for both wild\u2010type TPMT*1/*1 (n = 1206, EFS 80%, P = 0\u00b705) and TPMT*1/*3C patients (n = 17, EFS 53%, P = 0\u00b7002); outcomes supported by a multivariate Cox regression analysis."
                 ]
             },
             {
                 "gene": "TPMT",
-                "polymorphism": "TPMT*1/*3A (heterozygote)",
-                "relationship_effect": "TPMT*1/*3A heterozygotes tolerated a significantly lower average daily thiopurine dose than TPMT wild-type (TPMT*1/*1) patients (70% vs 78% of protocol dose) and experienced more cytopenias.",
-                "p_value": "P < 0.0002 (dose); P < 0.001 (time with dose withdrawn)",
+                "polymorphism": "*1/*3C",
+                "relationship_effect": "TPMT*1/*3C heterozygotes had worse event-free survival (EFS) compared to both TPMT*1/*3A and TPMT*1/*1 patients.",
+                "p_value": "5-year EFS 53% (TPMT*1/*3C) vs 88% (TPMT*1/*3A), P = 0.002; vs 80% (TPMT*1/*1), P = 0.03; Multivariate Cox regression: hazard ratio = 4.5 (95% CI 1.7\u201311.8), P = 0.003",
                 "citations": [
-                    "Comparing the *TPMT*TPMT wild\u2010type (*TPMT*1/*1*TPMT*1/*1) patients with the *TPMT*TPMT wild\u2010type/variant allele heterozygotes, the former had a higher average dose than the latter (78% versus 70% respectively, *P *P < 0\u00b70002) and experienced less time with the dose withdrawn due to cytopenias (15\u00b75% vs. 20\u00b78% of time respectively, *P *P < 0\u00b7001).",
-                    "The *TPMT*TPMT heterozygotes tolerated a significantly lower average daily thiopurine dose than the *TPMT*TPMT wild\u2010type patients and experienced more cytopenias.",
-                    "## Table 4. Thiopurine dosage and myelosuppression by TPMT genotype"
+                    "EFS differed significantly by *TPMT*TPMT genotype (Fig [2](#bjh13240-fig-0002)2). Heterozygous *TPMT*1/*3C*TPMT*1/*3C patients fared worse (5\u2010year EFS, 53%, 95%CI 29\u201377%) compared to the other heterozygous *TPMT*TPMT patients (*TPMT*1/*3A, *1/*2, *1/*21, *1/*9, *1/*32, *1/*33, *1/*34*TPMT*1/*3A, *1/*2, *1/*21, *1/*9, *1/*32, *1/*33, *1/*34; EFS 89%, 95%CI 83\u201395%, *P *P = 0\u00b7002) and homozygous *TPMT*1/*1*TPMT*1/*1 patients (EFS 80%%, 95%CI 78\u201382%, *P *P = 0\u00b703).",
+                    "In a multivariate Cox regression analysis, the worse survival for *TPMT*1/*3C*TPMT*1/*3C against *TPMT*1/*3A*TPMT*1/*3A (*TPMT*1/*3C*TPMT*1/*3C hazard ratio = 4\u00b75, 95% CI 1\u00b77\u201311\u00b78, *P *P = 0\u00b7003) and for *TPMT*1/*3C*TPMT*1/*3C against *TPMT*1/*1*TPMT*1/*1 and all other *TPMT*TPMT variant heterozygous genotypes (Table [5](#bjh13240-tbl-0005)5) retained significance in models for overall EFS that also included the covariates trial, age group, WBC at diagnosis and steroid randomization.",
+                    "Survival was inexplicably worse for patients with *TPMT*1/*3C*TPMT*1/*3C than for *TPMT*1/*3A*TPMT*1/*3A patients."
                 ],
                 "p_value_citations": [
-                    "Comparing the TPMT wild\u2010type (TPMT*1/*1) patients with the TPMT wild\u2010type/variant allele heterozygotes, the former had a higher average dose than the latter (78% versus 70% respectively, P < 0\u00b70002) and experienced less time with the dose withdrawn due to cytopenias (15\u00b75% vs. 20\u00b78% of time respectively, P < 0\u00b7001)."
+                    "Event\u2010free survival (EFS) for patients heterozygous for the more common TPMT*1/*3A variant allele (n = 99, 5\u2010year EFS 88%) was better than for both wild\u2010type TPMT*1/*1 (n = 1206, EFS 80%, P = 0\u00b705) and TPMT*1/*3C patients (n = 17, EFS 53%, P = 0\u00b7002); outcomes supported by a multivariate Cox regression analysis.",
+                    "In a multivariate Cox regression analysis, the worse survival for TPMT*1/*3C against TPMT*1/*3A (TPMT*1/*3C hazard ratio = 4\u00b75, 95% CI 1\u00b77\u201311\u00b78, P = 0\u00b7003) and for TPMT*1/*3C against TPMT*1/*1 and all other TPMT variant heterozygous genotypes (Table 5) retained significance in models for overall EFS that also included the covariates trial, age group, WBC at diagnosis and steroid randomization."
                 ]
             },
             {
                 "gene": "TPMT",
-                "polymorphism": "TPMT*1/*3A (heterozygote)",
-                "relationship_effect": "TPMT*1/*3A heterozygotes accumulated significantly higher thioguanine nucleotide (TGN) concentrations compared to TPMT wild-type (TPMT*1/*1) patients.",
-                "p_value": "P < 0.0001 (mercaptopurine); P = 0.0009 (thioguanine)",
+                "polymorphism": "*1/*3C",
+                "relationship_effect": "TPMT*1/*3C patients had significantly lower TGN concentrations than TPMT*1/*3A patients despite similar drug dosages and TPMT activities.",
+                "p_value": "Median difference 192 pmol (95% CI 10 to 425), P = 0.05",
                 "citations": [
-                    "For both genders and both thiopurines, *TPMT*TPMT heterozygous patients accumulated significantly higher TGN concentrations compared to the *TPMT*TPMT wild\u2010type cohort (Relling *et al*et al, [1999a](#bjh13240-bib-0027)1999a; Lennard *et al*et al, [2013](#bjh13240-bib-0015)2013).",
-                    "## Table 1. Thiopurine methyltransferase genotype and metabolite formation",
-                    "The *TPMT*TPMT heterozygotes tolerated a significantly lower average daily thiopurine dose than the *TPMT*TPMT wild\u2010type patients and experienced more cytopenias."
+                    "Comparisons between *TPMT*TPMT heterozygotes were not possible with thioguanine (only four *TPMT*1/*3C*TPMT*1/*3C children), but within the mercaptopurine cohort *TPMT *1/*3C*TPMT *1/*3C children (*n *n = 12) had significantly lower TGN concentrations than *TPMT *1/*3A*TPMT *1/*3A children (*n *n = 53) despite having similar drug dosages and TPMT activities (Lennard *et al*et al, [2013](#bjh13240-bib-0015)2013); *TPMT *1/*3C*TPMT *1/*3C median TGNs 608 pmol/8 \u00d7 10^8^8 red cells (range 288 to 910) and *TPMT *1/*3A*TPMT *1/*3A median TGNs 802 (range 132 to 2228), median difference 192 pmol (95% confidence interval [CI] 10 to 425), *P *P = 0\u00b705).",
+                    "Despite similar mercaptopurine dosages and TPMT activities, the *TPMT*1/*3C*TPMT*1/*3C patients accumulated significantly less TGNs and lower MeMPN concentrations than *TPMT*1/*3A*TPMT*1/*3A patients; this could indicate an increased frequency of non\u2010adherence and suboptimal metabolite exposure in the *TPMT*1/*3C*TPMT*1/*3C cohort.",
+                    "There was no significant difference between the *TPMT*1/*3C*TPMT*1/*3C and *TPMT*1/*3A*TPMT*1/*3A patients with respect to mean daily dose or incidence of cytopenias, although the number of *TPMT*1/*3C*TPMT*1/*3C patients with full dose intensity data available was small (*n*n = 9), (Table [4](#bjh13240-tbl-0004)4)."
                 ],
                 "p_value_citations": [
-                    "MP\u2010TGNs pmol: 360 (0\u20131216) [TPMT*1/*1] vs 754 (132\u20132228) [TPMT heterozygote], median difference 394 (326 to 466), P < 0\u00b70001; TG\u2010TGNs pmol: 1904 (36\u20134336) [TPMT*1/*1] vs 2468 (174\u20136730) [TPMT heterozygote], median difference 504 (206 to 802), P = 0\u00b70009."
+                    "TPMT *1/*3C median TGNs 608 pmol/8 \u00d7 10^8 red cells (range 288 to 910) and TPMT *1/*3A median TGNs 802 (range 132 to 2228), median difference 192 pmol (95% confidence interval [CI] 10 to 425), P = 0\u00b705)."
                 ]
             },
             {
                 "gene": "TPMT",
-                "polymorphism": "TPMT*1/*3C (heterozygote)",
-                "relationship_effect": "TPMT*1/*3C patients had significantly lower TGN concentrations than TPMT*1/*3A patients despite similar drug dosages and TPMT activities.",
-                "p_value": "P = 0.05 (TGN); P = 0.06 (MeMPN)",
+                "polymorphism": "*1/*3C",
+                "relationship_effect": "TPMT*1/*3C patients had lower MeMPN concentrations than TPMT*1/*3A patients.",
+                "p_value": "Median difference 2190 pmol (95% CI \u221254 to 5180), P = 0.06",
                 "citations": [
-                    "Comparisons between *TPMT*TPMT heterozygotes were not possible with thioguanine (only four *TPMT*1/*3C*TPMT*1/*3C children), but within the mercaptopurine cohort *TPMT *1/*3C*TPMT *1/*3C children (*n *n = 12) had significantly lower TGN concentrations than *TPMT *1/*3A*TPMT *1/*3A children (*n *n = 53) despite having similar drug dosages and TPMT activities (Lennard *et al*et al, [2013](#bjh13240-bib-0015)2013); *TPMT *1/*3C*TPMT *1/*3C median TGNs 608 pmol/8 \u00d7 10^8^8 red cells (range 288 to 910) and *TPMT *1/*3A*TPMT *1/*3A median TGNs 802 (range 132 to 2228), median difference 192 pmol (95% confidence interval [CI] 10 to 425), *P *P = 0\u00b705).",
                     "MeMPN concentrations were also lower in *TPMT *1/*3C*TPMT *1/*3C patients; *TPMT *1/*3C*TPMT *1/*3C median MeMPN 2061 pmol/8 \u00d7 10^8^8 red cells (range 60 to 10746) and *TPMT *1/*3A*TPMT *1/*3A median MeMPN 4542 (range 84 to 38 386), median difference 2190 pmol (95% CI \u221254 to 5180), *P *P = 0\u00b706).",
-                    "Despite similar mercaptopurine dosages and TPMT activities, the *TPMT*1/*3C*TPMT*1/*3C patients accumulated significantly less TGNs and lower MeMPN concentrations than *TPMT*1/*3A*TPMT*1/*3A patients; this could indicate an increased frequency of non\u2010adherence and suboptimal metabolite exposure in the *TPMT*1/*3C*TPMT*1/*3C cohort."
+                    "Despite similar mercaptopurine dosages and TPMT activities, the *TPMT*1/*3C*TPMT*1/*3C patients accumulated significantly less TGNs and lower MeMPN concentrations than *TPMT*1/*3A*TPMT*1/*3A patients; this could indicate an increased frequency of non\u2010adherence and suboptimal metabolite exposure in the *TPMT*1/*3C*TPMT*1/*3C cohort.",
+                    "## Table 1. Thiopurine methyltransferase genotype and metabolite formation"
                 ],
                 "p_value_citations": [
-                    "TPMT*1/*3C median TGNs 608 pmol/8 \u00d7 10^8 red cells (range 288 to 910) and TPMT*1/*3A median TGNs 802 (range 132 to 2228), median difference 192 pmol (95% confidence interval [CI] 10 to 425), P = 0\u00b705. MeMPN concentrations were also lower in TPMT*1/*3C patients; TPMT*1/*3C median MeMPN 2061 pmol/8 \u00d7 10^8 red cells (range 60 to 10746) and TPMT*1/*3A median MeMPN 4542 (range 84 to 38 386), median difference 2190 pmol (95% CI \u221254 to 5180), P = 0\u00b706."
+                    "TPMT *1/*3C median MeMPN 2061 pmol/8 \u00d7 10^8 red cells (range 60 to 10746) and TPMT *1/*3A median MeMPN 4542 (range 84 to 38 386), median difference 2190 pmol (95% CI \u221254 to 5180), P = 0\u00b706)."
                 ]
             },
             {
                 "gene": "TPMT",
-                "polymorphism": "TPMT heterozygotes (all variants)",
-                "relationship_effect": "TPMT heterozygotes experienced more frequent cytopenias and required dose adjustments below target levels significantly more often than TPMT wild-type patients, although the average dose range was similar for both genotypes.",
-                "p_value": "P < 0.0002 (dose); P < 0.001 (time with dose withdrawn)",
+                "polymorphism": "*1/*1",
+                "relationship_effect": "TPMT*1/*1 (wild-type) patients tolerated a higher average dose than TPMT heterozygotes (78% vs 70% of protocol dose), and experienced less time with the dose withdrawn due to cytopenias (15.5% vs 20.8%).",
+                "p_value": "Average dose: P < 0.0002; Time withdrawn: P < 0.001",
                 "citations": [
-                    "Comparing the *TPMT*TPMT wild\u2010type (*TPMT*1/*1*TPMT*1/*1) patients with the *TPMT*TPMT wild\u2010type/variant allele heterozygotes, the former had a higher average dose than the latter (78% versus 70% respectively, *P *P < 0\u00b70002) and experienced less time with the dose withdrawn due to cytopenias (15\u00b75% vs. 20\u00b78% of time respectively, *P *P < 0\u00b7001).",
-                    "The *TPMT*TPMT heterozygotes tolerated a significantly lower average daily thiopurine dose than the *TPMT*TPMT wild\u2010type patients and experienced more cytopenias.",
-                    "However, the range of thiopurine dosages tolerated was wide, with the upper and lower limits similar for both *TPMT*TPMT genotypes."
+                    "Comparing the TPMT wild\u2010type (*TPMT*1/*1) patients with the TPMT wild\u2010type/variant allele heterozygotes, the former had a higher average dose than the latter (78% versus 70% respectively, *P* < 0\u00b70002) and experienced less time with the dose withdrawn due to cytopenias (15\u00b75% vs. 20\u00b78% of time respectively, *P* < 0\u00b7001).",
+                    "## Table 4. Thiopurine dosage and myelosuppression by TPMT genotype",
+                    "The TPMT heterozygotes tolerated a significantly lower average daily thiopurine dose than the TPMT wild\u2010type patients and experienced more cytopenias."
                 ],
                 "p_value_citations": [
                     "Comparing the TPMT wild\u2010type (TPMT*1/*1) patients with the TPMT wild\u2010type/variant allele heterozygotes, the former had a higher average dose than the latter (78% versus 70% respectively, P < 0\u00b70002) and experienced less time with the dose withdrawn due to cytopenias (15\u00b75% vs. 20\u00b78% of time respectively, P < 0\u00b7001)."
@@ -182,16 +175,54 @@
             },
             {
                 "gene": "TPMT",
-                "polymorphism": "TPMT heterozygotes (all variants)",
-                "relationship_effect": "TPMT heterozygosity was not associated with a higher rate of second cancers in this cohort.",
-                "p_value": "Not significant (P = 0.41 for thiopurine type)",
+                "polymorphism": "*1/*3A",
+                "relationship_effect": "TPMT*1/*3A patients accumulated higher TGN concentrations than TPMT*1/*1 (wild-type) patients.",
+                "p_value": "MP-TGNs: median difference 394 pmol (326 to 466), P < 0.0001",
+                "citations": [
+                    "For both genders and both thiopurines, *TPMT*TPMT heterozygous patients accumulated significantly higher TGN concentrations compared to the *TPMT*TPMT wild\u2010type cohort (Relling *et al*et al, [1999a](#bjh13240-bib-0027)1999a; Lennard *et al*et al, [2013](#bjh13240-bib-0015)2013).",
+                    "*TPMT *1/*3A*TPMT *1/*3A patients had a better EFS than *TPMT *1/*1*TPMT *1/*1 patients, the former also experienced more cytopenias and accumulated higher TGN concentrations than the latter."
+                ],
+                "p_value_citations": [
+                    "MP\u2010TGNs pmol: 360 (0\u20131216) [wild-type] vs 754 (132\u20132228) [heterozygous]; median difference 394 (326 to 466), P < 0\u00b70001"
+                ]
+            },
+            {
+                "gene": "TPMT",
+                "polymorphism": "*1/*3A",
+                "relationship_effect": "TPMT*1/*3A patients had lower MeMPN concentrations than TPMT*1/*1 (wild-type) patients.",
+                "p_value": "MP-MeMPNs: median difference \u22125464 (\u22127278 to \u22123808), P < 0.0001",
+                "citations": [
+                    "MP\u2010MeMPNs pmol | 10702 (0\u2013141772) | 4078 (60\u201338386) | \u22125464 (\u22127278 to \u22123808), P < 0\u00b70001 |",
+                    "*TPMT *1/*3A*TPMT *1/*3A patients had a better EFS than *TPMT *1/*1*TPMT *1/*1 patients, the former also experienced more cytopenias and accumulated higher TGN concentrations than the latter."
+                ],
+                "p_value_citations": [
+                    "MP\u2010MeMPNs pmol: 10702 (0\u2013141772) [wild-type] vs 4078 (60\u201338386) [heterozygous]; median difference \u22125464 (\u22127278 to \u22123808), P < 0\u00b70001"
+                ]
+            },
+            {
+                "gene": "TPMT",
+                "polymorphism": "*1/*3A",
+                "relationship_effect": "TPMT*1/*3A patients experienced more cytopenias than TPMT*1/*1 (wild-type) patients.",
+                "p_value": "Not specified for this comparison, but see Table 4 for % time with neutropenia and thrombocytopenia.",
+                "citations": [
+                    "The TPMTTPMT heterozygotes tolerated a significantly lower average daily thiopurine dose than the TPMTTPMT wild\u2010type patients and experienced more cytopenias.",
+                    "Comparing the TPMTTPMT wild\u2010type (TPMT1/*1TPMT1/*1) patients with the TPMTTPMT wild\u2010type/variant allele heterozygotes, the former had a higher average dose than the latter (78% versus 70% respectively, PP < 0\u00b70002) and experienced less time with the dose withdrawn due to cytopenias (15\u00b75% vs. 20\u00b78% of time respectively, PP < 0\u00b7001).",
+                    "## Table 4. Thiopurine dosage and myelosuppression by TPMT genotype"
+                ],
+                "p_value_citations": []
+            },
+            {
+                "gene": "TPMT",
+                "polymorphism": "*1/*3A",
+                "relationship_effect": "TPMT*1/*3A heterozygosity was not associated with a higher rate of second cancers.",
+                "p_value": "Not significant (P = 0.41 for difference by thiopurine type; no association with genotype)",
                 "citations": [
                     "In contrast to reports from some USA and Nordic trials, TPMT heterozygosity was not associated with a higher rate of second cancers.",
                     "There was no association between second cancer and genotype, thiopurine metabolites, thiopurine average % dose or frequency of cytopenias.",
                     "There was no association between second cancer and type of thiopurine: although a higher proportion of second cancers occurred in those randomized to thioguanine, this difference was not significant (P = 0\u00b741)."
                 ],
                 "p_value_citations": [
-                    "There was no association between second cancer and type of thiopurine: although a higher proportion of second cancers occurred in those randomized to thioguanine, this difference was not significant (P = 0\u00b741)."
+                    "There was no association between second cancer and genotype, thiopurine metabolites, thiopurine average % dose or frequency of cytopenias. There was no association between second cancer and type of thiopurine: although a higher proportion of second cancers occurred in those randomized to thioguanine, this difference was not significant (P = 0\u00b741)."
                 ]
             }
         ]
diff --git a/data/annotations/PMC5712579.json b/data/annotations/PMC5712579.json
index a6e1614..644c64e 100644
--- a/data/annotations/PMC5712579.json
+++ b/data/annotations/PMC5712579.json
@@ -3,33 +3,36 @@
     "title": "Association of HLA-A and HLA-B Alleles with Lamotrigine-Induced Cutaneous Adverse Drug Reactions in the Thai Population",
     "study_parameters": {
         "summary": {
-            "content": "The study aimed to investigate the association between specific HLA alleles and lamotrigine-induced cutaneous adverse drug reactions (CADR) in the Thai population, given the known genetic predispositions to such reactions with other antiepileptic drugs. Conducted as a case-control study, it involved 15 patients with lamotrigine-induced CADR and 50 lamotrigine-tolerant controls, with HLA-A and HLA-B genotyping performed. The results revealed significant associations of HLA-A*02:07 and HLA-B*15:02 with lamotrigine-induced CADR, and HLA-A*33:03, HLA-B*15:02, and HLA-B*44:03 with lamotrigine-induced maculopapular exanthema (MPE), suggesting these alleles could serve as potential screening markers for preventing CADR in Thai patients.",
+            "content": "The study aimed to investigate the association between specific HLA alleles and lamotrigine-induced cutaneous adverse drug reactions (CADR) in the Thai population, given the known genetic predispositions to such reactions with other antiepileptic drugs. Conducted as a case-control study, it involved 15 patients with lamotrigine-induced CADR and 50 lamotrigine-tolerant controls, with HLA genotyping performed to identify potential genetic markers. The results revealed significant associations of HLA-A*02:07 and HLA-B*15:02 with lamotrigine-induced CADR, suggesting these alleles could serve as useful screening markers to prevent adverse reactions in Thai patients.",
             "citations": [
                 "The proportion of HLA-A\u221702:07 and HLA-B\u221715:02 allele carriers were significantly higher in the LTG-induced CADR group than in the tolerant controls [odds ratio (OR): 7.83; 95% confidence interval (CI): 1.60\u201338.25; P = 0.013, and OR: 4.89; 95% CI: 1.28\u201318.67; P = 0.014].",
-                "In addition, subjects with HLA-A\u221733:03, HLA-B\u221715:02, and HLA-B\u221744:03 were significantly higher in the LTG-induced MPE group than in the tolerant controls (OR: 8.27; 95% CI: 1.83\u201337.41; P = 0.005, OR: 7.33; 95% CI: 1.63\u201333.02; P = 0.005; and OR: 10.29; 95% CI: 1.45\u201372.81; P = 0.029).",
-                "These results suggest that these alleles could be useful screening markers for preventing CADR before LTG treatment in Thai patients, but further replication studies with larger sample sizes are needed."
+                "Conclusion: HLA-A\u221702:07 and HLA-B\u221715:02 were associated with LTG-induced CADR in Thai patients.",
+                "We found a statistically significant association of the HLA-A\u221702:07 and HLA-B\u221715:02 alleles with LTG-induced CADR in the Thai population."
             ]
         },
         "study_type": {
-            "content": "case-control, retrospective",
+            "content": "case/control",
             "citations": [
                 "A case\u2013control study was performed at the Laboratory for Pharmacogenomics, Somdech Phra Debaratana Medical Center (SDMC), Ramathibodi Hospital, Thailand.",
                 "Fifteen LTG-induced CADR (4 cases of SJS, 1 case of DRESS, and 10 cases of MPE) were recruited from the Faculty of Medicine, Ramathibodi Hospital, Mahidol University, Manarom Hospital, and Srinagarind Hospital between 2011 and 2015.",
-                "The dermatological diagnosis was made by a dermatologist or allergist who reviewed photographs, pathological slides, clinical morphology, and medical records."
+                "Patients who had been taking LTG for more than 6 months without evidence of cutaneous adverse effects were recruited as LTG-tolerant controls."
             ]
         },
         "participant_info": {
             "content": [
-                "**Age**: The mean age of the LTG-induced CADR patients was 35.2 \u00b1 22.1 years, while the LTG-tolerant group had a mean age of 38.2 \u00b1 19.0 years.",
-                "**Gender**: In the LTG-induced CADR group, 73.3% were female and 26.7% were male. In the LTG-tolerant group, 76.0% were female and 24.0% were male.",
-                "**Ethnicity**: All participants were from the Thai population.",
-                "**Pre-existing Conditions**: In the LTG-induced CADR group, 40.0% had epilepsy, 33.3% had bipolar disorder, 20.0% had depressive disorder, and 6.7% had major depressive disorder. In the LTG-tolerant group, 40.0% had epilepsy, 16.0% had bipolar disorder, 12.0% had depressive disorder, 6.0% had major depressive disorder, 8.0% used LTG as a mood stabilizer, and 18.0% had other conditions.",
-                "**Concomitant Use of Valproic Acid**: 6.7% of the LTG-induced CADR group and 8.0% of the LTG-tolerant group used valproic acid."
+                "**Age:** The mean age of the LTG-induced CADR patients was 35.2 \u00b1 22.1 years.",
+                "**Gender:** 73.3% of the LTG-induced CADR patients were female.",
+                "**Ethnicity:** All participants were from the Thai population.",
+                "**Study Groups:**",
+                "LTG-induced CADR group: 15 patients (10 with MPE, 4 with SJS, 1 with DRESS).",
+                "LTG-tolerant group: 50 patients.",
+                "**Pre-existing Conditions:** Indications for LTG use included epilepsy, bipolar disorder, depressive disorder, and major depressive disorder.",
+                "**Concomitant Medication:** 6.7% of LTG-induced CADR patients used valproic acid concomitantly."
             ],
             "citations": [
+                "DNA samples from 15 LTG-induced CADR patients (10 cases with MPE, 4 cases with SJS, and 1 case with DRESS) and 50 LTG-tolerant controls and the general population group were genotyped.",
                 "The mean age of the LTG-induced CADR patients was 35.2 \u00b1 22.1 and 73.3% were female.",
-                "There were no significant differences in gender, age, dosage of LTG, and concomitant use of valproic acid between the LTG-induced CADR patients and the LTG-tolerant patients (Table 1).",
-                "Table 1. Clinical characteristics of patients in the lamotrigine (LTG)-induced cutaneous adverse drug reactions group and the LTG-tolerant group."
+                "All patients who developed CADR, as SJS, MPE, or DRESS, within 2 months after initiating LTG treatment were recruited for the study."
             ]
         },
         "study_design": {
@@ -38,55 +41,35 @@
                 "**Study Population:** Thai patients with lamotrigine-induced cutaneous adverse drug reactions (CADR) and lamotrigine-tolerant controls.",
                 "**Sample Size:** 15 patients with LTG-induced CADR (10 with maculopapular exanthema, 4 with Stevens\u2013Johnson syndrome, and 1 with drug reaction with eosinophilia and systemic symptoms) and 50 LTG-tolerant controls.",
                 "**Recruitment Period:** Between 2011 and 2015.",
-                "**Recruitment Sites:** Faculty of Medicine, Ramathibodi Hospital, Mahidol University, Manarom Hospital, and Srinagarind Hospital, Thailand.",
+                "**Recruitment Sites:** Faculty of Medicine, Ramathibodi Hospital, Mahidol University, Manarom Hospital, and Srinagarind Hospital in Thailand.",
                 "**Genotyping Method:** HLA-A and HLA-B genotyping using polymerase chain reaction-sequence-specific oligonucleotides probes.",
-                "**Ethical Approval:** Approved by the Ramathibodi Hospital Ethical Review Board."
+                "**Ethical Approval:** Approved by the Ramathibodi Hospital Ethical Review Board, with informed, written consent obtained from all participants."
             ],
             "citations": [
                 "A case\u2013control study was performed at the Laboratory for Pharmacogenomics, Somdech Phra Debaratana Medical Center (SDMC), Ramathibodi Hospital, Thailand.",
                 "Fifteen LTG-induced CADR (4 cases of SJS, 1 case of DRESS, and 10 cases of MPE) were recruited from the Faculty of Medicine, Ramathibodi Hospital, Mahidol University, Manarom Hospital, and Srinagarind Hospital between 2011 and 2015.",
-                "HLA-A and HLA-B genotyping was performed using polymerase chain reaction-sequence-specific oligonucleotides probes."
+                "The study was approved by the Ramathibodi Hospital Ethical Review Board, and informed, written consent was obtained from all participants."
             ]
         },
         "study_results": {
             "content": [
-                "**HLA-A\u221702:07 and HLA-B\u221715:02 association with LTG-induced CADR:** Odds ratio (OR) = 7.83, 95% confidence interval (CI) = 1.60\u201338.25, P = 0.013 for HLA-A\u221702:07; OR = 4.89, 95% CI = 1.28\u201318.67, P = 0.014 for HLA-B\u221715:02.",
-                "**HLA-A\u221733:03, HLA-B\u221715:02, and HLA-B\u221744:03 association with LTG-induced MPE:** OR = 8.27, 95% CI = 1.83\u201337.41, P = 0.005 for HLA-A\u221733:03; OR = 7.33, 95% CI = 1.63\u201333.02, P = 0.005 for HLA-B\u221715:02; OR = 10.29, 95% CI = 1.45\u201372.81, P = 0.029 for HLA-B\u221744:03.",
-                "**HLA-B\u221735:08 and HLA-B\u221739:01 association with LTG-induced CADR:** OR = 70.36, 95% CI = 4.19\u20131182.21, P = 0.030 for HLA-B\u221735:08; OR = 10.68, 95% CI = 2.20\u201351.83, P = 0.022 for HLA-B\u221739:01.",
-                "**HLA-A\u221702:07 and HLA-A\u221733:03 association with LTG-induced CADR compared to general population:** OR = 3.27, 95% CI = 1.07\u20139.96, P = 0.029 for HLA-A\u221702:07; OR = 3.16, 95% CI = 1.11\u20138.98, P = 0.023 for HLA-A\u221733:03.",
-                "**HLA-B\u221715:02 association with LTG-induced MPE compared to general population:** OR = 5.44, 95% CI = 1.56\u201319.03, P = 0.003.",
-                "**HLA-B\u221744:03 association with LTG-induced MPE compared to general population:** OR = 4.73, 95% CI = 1.20\u201318.62, P = 0.046."
+                "**HLA-A\u221702:07 and HLA-B\u221715:02 Association**: These alleles were significantly associated with lamotrigine-induced cutaneous adverse drug reactions (CADR) in Thai patients, with odds ratios (OR) of 7.83 (95% CI: 1.60\u201338.25, P = 0.013) and 4.89 (95% CI: 1.28\u201318.67, P = 0.014), respectively.",
+                "**HLA-A\u221733:03, HLA-B\u221715:02, and HLA-B\u221744:03 in MPE**: These alleles were significantly associated with lamotrigine-induced maculopapular exanthema (MPE), with ORs of 8.27 (95% CI: 1.83\u201337.41, P = 0.005), 7.33 (95% CI: 1.63\u201333.02, P = 0.005), and 10.29 (95% CI: 1.45\u201372.81, P = 0.029), respectively.",
+                "**HLA-B\u221735:08 and HLA-B\u221739:01**: These alleles showed significant associations with lamotrigine-induced CADR when compared with the general population, with ORs of 70.36 (95% CI: 4.19\u20131182.21, P = 0.030) and 10.68 (95% CI: 2.20\u201351.83, P = 0.022), respectively.",
+                "**No Significant Association in SCAR**: There were no significant differences in HLA alleles between the lamotrigine-induced severe cutaneous adverse reactions (SCAR) group and controls."
             ],
             "citations": [
                 "The proportion of HLA-A\u221702:07 and HLA-B\u221715:02 allele carriers were significantly higher in the LTG-induced CADR group than in the tolerant controls [odds ratio (OR): 7.83; 95% confidence interval (CI): 1.60\u201338.25; P = 0.013, and OR: 4.89; 95% CI: 1.28\u201318.67; P = 0.014].",
                 "In addition, subjects with HLA-A\u221733:03, HLA-B\u221715:02, and HLA-B\u221744:03 were significantly higher in the LTG-induced MPE group than in the tolerant controls (OR: 8.27; 95% CI: 1.83\u201337.41; P = 0.005, OR: 7.33; 95% CI: 1.63\u201333.02; P = 0.005; and OR: 10.29; 95% CI: 1.45\u201372.81; P = 0.029).",
-                "## Table 3. List of HLA alleles that showed a significant association with LTG-induced cutaneous adverse drug reactions."
+                "In contrast to the LTG-induced MPE group, there were no significant differences between HLA alleles and LTG-induced SCAR group."
             ]
         },
         "allele_frequency": {
-            "content": [
-                "**HLA-A\u221702:07 frequency in LTG-induced CADR group**: 33.3% (5/15)",
-                "**HLA-A\u221702:07 frequency in LTG-tolerant group**: 6.0% (3/50)",
-                "**HLA-A\u221702:07 frequency in general population**: 13.3% (49/369)",
-                "**HLA-A\u221733:03 frequency in LTG-induced CADR group**: 46.7% (7/15)",
-                "**HLA-A\u221733:03 frequency in LTG-tolerant group**: 22.0% (11/50)",
-                "**HLA-A\u221733:03 frequency in general population**: 21.7% (80/369)",
-                "**HLA-B\u221715:02 frequency in LTG-induced CADR group**: 40.0% (6/15)",
-                "**HLA-B\u221715:02 frequency in LTG-tolerant group**: 12.0% (6/50)",
-                "**HLA-B\u221715:02 frequency in general population**: 15.5% (153/986)",
-                "**HLA-B\u221735:08 frequency in LTG-induced CADR group**: 6.7% (1/15)",
-                "**HLA-B\u221735:08 frequency in LTG-tolerant group**: 0% (0/50)",
-                "**HLA-B\u221735:08 frequency in general population**: 0.1% (1/986)",
-                "**HLA-B\u221739:01 frequency in LTG-induced CADR group**: 13.3% (2/15)",
-                "**HLA-B\u221739:01 frequency in LTG-tolerant group**: 2.0% (1/50)",
-                "**HLA-B\u221739:01 frequency in general population**: 1.4% (14/986)",
-                "**HLA-B\u221744:03 frequency in LTG-induced MPE group**: 30.0% (3/10)",
-                "**HLA-B\u221744:03 frequency in LTG-tolerant group**: 4.0% (2/50)",
-                "**HLA-B\u221744:03 frequency in general population**: 8.3% (82/986)"
-            ],
+            "content": "| Gene  | Polymorphism | Frequency                                                                 |\n|-------|--------------|---------------------------------------------------------------------------|\n| HLA-A | HLA-A*02:07  | Present in 33.3% of LTG-induced CADR patients, significantly higher than both the treatment control and general population groups. |\n| HLA-A | HLA-A*33:03  | Present at a significantly higher rate in LTG-induced CADR patients than in the general population controls. |\n| HLA-B | HLA-B*15:02  | Found in 40.0% of patients who developed CADR and in 12.0% of the tolerant patients. Significantly higher in LTG-induced CADR cases than in both the treatment controls and general population groups. |\n| HLA-B | HLA-B*35:08  | Reported in only one case of LTG-induced MPE and once in the general population, very rare in the Thai population (less than 1%). |\n| HLA-B | HLA-B*39:01  | Found in 2/15 LTG-induced CADR patients, significantly associated with the general population. |\n| HLA-B | HLA-B*44:03  | Associated with LTG-induced MPE when compared with both control groups. |",
             "citations": [
-                "The HLA-A\u221702:07 and HLA-B\u221715:02 allele carriers were significantly higher in the LTG-induced CADR group than in the tolerant controls [odds ratio (OR): 7.83; 95% confidence interval (CI): 1.60\u201338.25; P = 0.013, and OR: 4.89; 95% CI: 1.28\u201318.67; P = 0.014].",
-                "Compared with the HLA-B allele, HLA-A\u221702:07 was present in 33.3% of LTG-induced CADR patients and showed significantly higher frequencies than both the treatment control and general population groups with OR = 7.83, 95% CI = 1.60\u201338.25, P-value = 0.013 and OR = 3.27, 95% CI = 1.07\u20139.96, P-value = 0.029, respectively; in addition, HLA-A\u221733:03 also had a significantly higher frequency than in the general population (OR = 3.16, 95% CI = 1.11\u20138.98, P-value = 0.023)."
+                "The proportion of HLA-A\u221702:07 and HLA-B\u221715:02 allele carriers were significantly higher in the LTG-induced CADR group than in the tolerant controls [odds ratio (OR): 7.83; 95% confidence interval (CI): 1.60\u201338.25; P = 0.013, and OR: 4.89; 95% CI: 1.28\u201318.67; P = 0.014].",
+                "In addition, subjects with HLA-A\u221733:03, HLA-B\u221715:02, and HLA-B\u221744:03 were significantly higher in the LTG-induced MPE group than in the tolerant controls (OR: 8.27; 95% CI: 1.83\u201337.41; P = 0.005, OR: 7.33; 95% CI: 1.63\u201333.02; P = 0.005; and OR: 10.29; 95% CI: 1.45\u201372.81; P = 0.029).",
+                "We found the HLA-B\u221715:02 allele in 40.0% of patients who developed CADR and in 12.0% of the tolerant patients."
             ]
         },
         "additional_resource_links": [
@@ -101,97 +84,124 @@
                 "gene": "HLA-A",
                 "polymorphism": "HLA-A*02:07",
                 "relationship_effect": "HLA-A*02:07 is associated with increased risk of lamotrigine-induced cutaneous adverse drug reactions (CADR) in Thai patients.",
-                "p_value": "0.013",
+                "p_value": "P = 0.013 (OR: 7.83; 95% CI: 1.60\u201338.25, vs LTG-tolerant controls); P = 0.029 (OR: 3.27; 95% CI: 1.07\u20139.96, vs general population)",
                 "citations": [
-                    "The proportion of HLA-A\u221702:07 and HLA-B\u221715:02 allele carriers were significantly higher in the LTG-induced CADR group than in the tolerant controls [odds ratio (OR): 7.83; 95% confidence interval (CI): 1.60\u201338.25; P = 0.013, and OR: 4.89; 95% CI: 1.28\u201318.67; P = 0.014].",
                     "Compared with the HLA-B allele, HLA-A\u221702:07 was present in 33.3% of LTG-induced CADR patients and showed significantly higher frequencies than both the treatment control and general population groups with OR = 7.83, 95% CI = 1.60\u201338.25, P-value = 0.013 and OR = 3.27, 95% CI = 1.07\u20139.96, P-value = 0.029, respectively; in addition, HLA-A\u221733:03 also had a significantly higher frequency than in the general population (OR = 3.16, 95% CI = 1.11\u20138.98, P-value = 0.023).",
+                    "We found a statistically significant association of the HLA-A\u221702:07 and HLA-B\u221715:02 alleles with LTG-induced CADR in the Thai population.",
                     "## Table 3. List of HLA alleles that showed a significant association with LTG-induced cutaneous adverse drug reactions."
                 ],
                 "p_value_citations": [
-                    "...P = 0.013..."
+                    "Table 3. List of HLA alleles that showed a significant association with LTG-induced cutaneous adverse drug reactions."
                 ]
             },
             {
                 "gene": "HLA-B",
                 "polymorphism": "HLA-B*15:02",
                 "relationship_effect": "HLA-B*15:02 is associated with increased risk of lamotrigine-induced cutaneous adverse drug reactions (CADR) in Thai patients.",
-                "p_value": "0.014",
+                "p_value": "P = 0.014 (OR: 4.89; 95% CI: 1.28\u201318.66, vs LTG-tolerant controls); P = 0.027 (OR: 3.63; 95% CI: 1.27\u201310.34, vs general population)",
                 "citations": [
                     "The proportion of patients carrying the HLA-B\u221715:02 allele was significantly higher in LTG-induced CADR cases than in both the treatment controls and general population groups with odds ratios (OR) of 4.89, 95% CI = 1.28\u201318.66, P-value = 0.014 and OR = 3.63, 95% CI = 1.27\u201310.34, P-value = 0.027, respectively.",
                     "Conclusion: HLA-A\u221702:07 and HLA-B\u221715:02 were associated with LTG-induced CADR in Thai patients.",
-                    "In the present study, we found the significant association between LTG-induced CADR and HLA-A\u221702:07 and HLA-B\u221715:02 when compared with both tolerant and general population controls."
+                    "## Table 3. List of HLA alleles that showed a significant association with LTG-induced cutaneous adverse drug reactions."
                 ],
                 "p_value_citations": [
-                    "...P = 0.014..."
+                    "Table 3. List of HLA alleles that showed a significant association with LTG-induced cutaneous adverse drug reactions."
                 ]
             },
             {
                 "gene": "HLA-A",
                 "polymorphism": "HLA-A*33:03",
-                "relationship_effect": "HLA-A*33:03 is associated with increased risk of lamotrigine-induced maculopapular exanthema (MPE) in Thai patients.",
-                "p_value": "0.005",
+                "relationship_effect": "HLA-A*33:03 is associated with increased risk of lamotrigine-induced CADR in Thai patients (significant vs general population, trend vs tolerant controls).",
+                "p_value": "P = 0.061 (OR: 3.10; 95% CI: 0.92\u201310.46, vs LTG-tolerant controls); P = 0.023 (OR: 3.16; 95% CI: 1.11\u20138.98, vs general population)",
                 "citations": [
-                    "In subgroup analysis of LTG-induced CADR, a significant association between LTG-induced MPE and *HLA-B\u221715:02*HLA-B^\u2217^\u221715:02 was found when compared with the tolerant controls and general population (OR = 7.33, 95% CI = 1.63\u201333.02, *P*P-value = 0.005 and OR = 5.44, 95% CI = 1.56\u201319.03, *P*P-value = 0.003, respectively).",
-                    "Moreover, we found a significant association of LTG-induced MPE with *HLA-A\u221733:03*HLA-A^\u2217^\u221733:03 compared with the tolerant controls group (OR = 8.27, 95% CI = 1.83\u201337.41, *P*P-value = 0.005) and general population group (OR = 8.43, 95% CI = 2.13\u201333.34, *P*P-value = 0.002) as shown in **Table 4**Table [4](#T4)4."
+                    "HLA-A\u221733:03 also had a significantly higher frequency than in the general population (OR = 3.16, 95% CI = 1.11\u20138.98, *P*P-value = 0.023).",
+                    "In addition, subjects with HLA-A\u221733:03, HLA-B\u221715:02, and HLA-B\u221744:03 were significantly higher in the LTG-induced MPE group than in the tolerant controls (OR: 8.27; 95% CI: 1.83\u201337.41; P = 0.005, OR: 7.33; 95% CI: 1.63\u201333.02; P = 0.005; and OR: 10.29; 95% CI: 1.45\u201372.81; P = 0.029).",
+                    "## Table 3. List of HLA alleles that showed a significant association with LTG-induced cutaneous adverse drug reactions."
                 ],
                 "p_value_citations": [
-                    "...P = 0.005..."
+                    "Table 3. List of HLA alleles that showed a significant association with LTG-induced cutaneous adverse drug reactions."
                 ]
             },
             {
                 "gene": "HLA-B",
-                "polymorphism": "HLA-B*15:02",
-                "relationship_effect": "HLA-B*15:02 is associated with increased risk of lamotrigine-induced maculopapular exanthema (MPE) in Thai patients.",
-                "p_value": "0.005",
+                "polymorphism": "HLA-B*35:08",
+                "relationship_effect": "HLA-B*35:08 is associated with increased risk of lamotrigine-induced CADR in Thai patients (significant vs general population, not significant vs tolerant controls).",
+                "p_value": "P = 0.231 (OR: 10.45; 95% CI: 0.40\u2013270.41, vs LTG-tolerant controls); P = 0.030 (OR: 70.36; 95% CI: 4.19\u20131182.21, vs general population)",
                 "citations": [
-                    "In subgroup analysis of LTG-induced CADR, a significant association between LTG-induced MPE and *HLA-B\u221715:02*HLA-B^\u2217^\u221715:02 was found when compared with the tolerant controls and general population (OR = 7.33, 95% CI = 1.63\u201333.02, *P*P-value = 0.005 and OR = 5.44, 95% CI = 1.56\u201319.03, *P*P-value = 0.003, respectively).",
-                    "The subgroup analysis revealed that the proportion of patients carrying the *HLA-B\u221715:02*HLA-B^\u2217^\u221715:02 allele was significantly higher in LTG-induced MPE cases than in both the tolerant control and general population groups, which is very different from previous studies in which *HLA-B\u221715:02*HLA-B^\u2217^\u221715:02 was not found to be associated with LTG-induced MPE ([An et al., 2010](#B2)An et al., 2010; [Shi et al., 2011](#B27)Shi et al., 2011).",
-                    "## Table 4. The associations of individual HLA alleles with LTG-induced cutaneous adverse drug reactions among the different subgroups."
+                    "In addition, we also found a significant association between LTG-induced CADR patients and both HLA-B\u221735:08 and HLA-B\u221739:01 when compared with the general population with OR = 70.36, 95% CI = 4.19\u20131182.21, P-value = 0.030 and OR = 10.68, 95% CI = 2.20\u201351.83, P-value = 0.022, respectively.",
+                    "Table 3. List of HLA alleles that showed a significant association with LTG-induced cutaneous adverse drug reactions.",
+                    "In this study, we report for the first time a significant association between HLA-B\u221735:08 and LTG-induced CADR or MPE, although this allele has been reported in only one case of LTG-induced MPE and once in the general population, as a result of this allele being very rare in the Thai population (less than 1%, data from [Puangpetch et al., 2015](#B24)Puangpetch et al., 2015)."
                 ],
                 "p_value_citations": [
-                    "...P = 0.005..."
+                    "Table 3. List of HLA alleles that showed a significant association with LTG-induced cutaneous adverse drug reactions."
                 ]
             },
             {
                 "gene": "HLA-B",
-                "polymorphism": "HLA-B*44:03",
-                "relationship_effect": "HLA-B*44:03 is associated with increased risk of lamotrigine-induced maculopapular exanthema (MPE) in Thai patients.",
-                "p_value": "0.029",
+                "polymorphism": "HLA-B*39:01",
+                "relationship_effect": "HLA-B*39:01 is associated with increased risk of lamotrigine-induced CADR in Thai patients (significant vs general population, not significant vs tolerant controls).",
+                "p_value": "P = 0.131 (OR: 7.54; 95% CI: 0.63\u201389.76, vs LTG-tolerant controls); P = 0.022 (OR: 10.68; 95% CI: 2.20\u201351.83, vs general population)",
+                "citations": [
+                    "In addition, we also found a significant association between LTG-induced CADR patients and both HLA-B\u221735:08 and HLA-B\u221739:01 when compared with the general population with OR = 70.36, 95% CI = 4.19\u20131182.21, P-value = 0.030 and OR = 10.68, 95% CI = 2.20\u201351.83, P-value = 0.022, respectively.",
+                    "HLA-B\u221739:01 | 2/15 | 1/50 | 7.54 (0.63\u201389.76) | 0.131 | 14/986 | 10.68 (2.20\u201351.83) | 0.022 |"
+                ],
+                "p_value_citations": [
+                    "Table 3. List of HLA alleles that showed a significant association with LTG-induced cutaneous adverse drug reactions."
+                ]
+            },
+            {
+                "gene": "HLA-A",
+                "polymorphism": "HLA-A*33:03",
+                "relationship_effect": "HLA-A*33:03 is associated with increased risk of lamotrigine-induced maculopapular exanthema (MPE) in Thai patients.",
+                "p_value": "P = 0.005 (OR: 8.27; 95% CI: 1.83\u201337.41, vs LTG-tolerant controls); P = 0.002 (OR: 8.43; 95% CI: 2.13\u201333.34, vs general population)",
                 "citations": [
-                    "In subgroup analysis of LTG-induced CADR, a significant association between LTG-induced MPE and *HLA-B\u221744:03* was found when compared with both control groups (OR = 10.29, 95% CI = 1.45\u201372.81, *P*P-value = 0.029 and OR = 4.73, 95% CI = 1.20\u201318.62, *P*P-value = 0.046, respectively), whereas *HLA-B\u221735:08* was significantly associated only with the general population (OR = 109.44, 95% CI = 6.34\u20131889.11, *P*P-value = 0.020) (**Table 4**); nevertheless, no significant associations were found in LTG-induced SCAR.",
-                    "Similarly to *HLA-B\u221735:08*, the association of LTG-induced MPE and *HLA-B\u221744:03* alleles was firstly reported in the Thai population.",
-                    "We also identified an association between HLA-A\u221733:03, HLA-B\u221715:02, and HLA-B\u221744:03 and LTG-induced MPE in this population."
+                    "In addition, subjects with HLA-A\u221733:03, HLA-B\u221715:02, and HLA-B\u221744:03 were significantly higher in the LTG-induced MPE group than in the tolerant controls (OR: 8.27; 95% CI: 1.83\u201337.41; P = 0.005, OR: 7.33; 95% CI: 1.63\u201333.02; P = 0.005; and OR: 10.29; 95% CI: 1.45\u201372.81; P = 0.029).",
+                    "## Table 4. The associations of individual HLA alleles with LTG-induced cutaneous adverse drug reactions among the different subgroups.",
+                    "Moreover, we found a significant association of LTG-induced MPE with HLA-A\u221733:03 compared with the tolerant controls group (OR = 8.27, 95% CI = 1.83\u201337.41, P-value = 0.005) and general population group (OR = 8.43, 95% CI = 2.13\u201333.34, P-value = 0.002) as shown in Table 4."
                 ],
                 "p_value_citations": [
-                    "...P = 0.029..."
+                    "Table 4. The associations of individual HLA alleles with LTG-induced cutaneous adverse drug reactions among the different subgroups."
                 ]
             },
             {
                 "gene": "HLA-B",
-                "polymorphism": "HLA-B*35:08",
-                "relationship_effect": "HLA-B*35:08 is associated with increased risk of lamotrigine-induced cutaneous adverse drug reactions (CADR) in Thai patients compared to the general population, but the allele is very rare.",
-                "p_value": "0.030",
+                "polymorphism": "HLA-B*15:02",
+                "relationship_effect": "HLA-B*15:02 is associated with increased risk of lamotrigine-induced maculopapular exanthema (MPE) in Thai patients.",
+                "p_value": "P = 0.005 (OR: 7.33; 95% CI: 1.63\u201333.02, vs LTG-tolerant controls); P = 0.003 (OR: 5.44; 95% CI: 1.56\u201319.03, vs general population)",
                 "citations": [
-                    "In addition, we also found a significant association between LTG-induced CADR patients and both HLA-B\u221735:08 and HLA-B\u221739:01 when compared with the general population with OR = 70.36, 95% CI = 4.19\u20131182.21, P-value = 0.030 and OR = 10.68, 95% CI = 2.20\u201351.83, P-value = 0.022, respectively.",
-                    "In this study, we report for the first time a significant association between HLA-B\u221735:08 and LTG-induced CADR or MPE, although this allele has been reported in only one case of LTG-induced MPE and once in the general population, as a result of this allele being very rare in the Thai population (less than 1%, data from [Puangpetch et al., 2015](#B24)Puangpetch et al., 2015).",
-                    "## Table 3. List of HLA alleles that showed a significant association with LTG-induced cutaneous adverse drug reactions."
+                    "In subgroup analysis of LTG-induced CADR, a significant association between LTG-induced MPE and *HLA-B\u221715:02* was found when compared with the tolerant controls and general population (OR = 7.33, 95% CI = 1.63\u201333.02, *P* = 0.005 and OR = 5.44, 95% CI = 1.56\u201319.03, *P* = 0.003, respectively).",
+                    "We also identified an association between HLA-A\u221733:03, HLA-B\u221715:02, and HLA-B\u221744:03 and LTG-induced MPE in this population.",
+                    "The subgroup analysis revealed that the proportion of patients carrying the *HLA-B\u221715:02* allele was significantly higher in LTG-induced MPE cases than in both the tolerant control and general population groups, which is very different from previous studies in which *HLA-B\u221715:02* was not found to be associated with LTG-induced MPE ([An et al., 2010](#B2); [Shi et al., 2011](#B27))."
                 ],
                 "p_value_citations": [
-                    "...P-value = 0.030..."
+                    "Table 4. The associations of individual HLA alleles with LTG-induced cutaneous adverse drug reactions among the different subgroups."
                 ]
             },
             {
                 "gene": "HLA-B",
-                "polymorphism": "HLA-B*39:01",
-                "relationship_effect": "HLA-B*39:01 is associated with increased risk of lamotrigine-induced cutaneous adverse drug reactions (CADR) in Thai patients compared to the general population, but the allele is rare.",
-                "p_value": "0.022",
+                "polymorphism": "HLA-B*44:03",
+                "relationship_effect": "HLA-B*44:03 is associated with increased risk of lamotrigine-induced maculopapular exanthema (MPE) in Thai patients.",
+                "p_value": "P = 0.029 (OR: 10.29; 95% CI: 1.45\u201372.81, vs LTG-tolerant controls); P = 0.046 (OR: 4.73; 95% CI: 1.20\u201318.62, vs general population)",
                 "citations": [
-                    "In addition, we also found a significant association between LTG-induced CADR patients and both HLA-B\u221735:08 and HLA-B\u221739:01 when compared with the general population with OR = 70.36, 95% CI = 4.19\u20131182.21, P-value = 0.030 and OR = 10.68, 95% CI = 2.20\u201351.83, P-value = 0.022, respectively.",
-                    "## Table 3. List of HLA alleles that showed a significant association with LTG-induced cutaneous adverse drug reactions.",
-                    "HLA-B\u221739:01 | 2/15 | 1/50 | 7.54 (0.63\u201389.76) | 0.131 | 14/986 | 10.68 (2.20\u201351.83) | 0.022 |"
+                    "In subgroup analysis of LTG-induced CADR, a significant association between LTG-induced MPE and *HLA-B\u221744:03*HLA-B^\u2217^\u221744:03 was found when compared with both control groups (OR = 10.29, 95% CI = 1.45\u201372.81, *P*P-value = 0.029 and OR = 4.73, 95% CI = 1.20\u201318.62, *P*P-value = 0.046, respectively), whereas *HLA-B\u221735:08*HLA-B^\u2217^\u221735:08 was significantly associated only with the general population (OR = 109.44, 95% CI = 6.34\u20131889.11, *P*P-value = 0.020) (**Table 4**Table [4](#T4)4); nevertheless, no significant associations were found in LTG-induced SCAR.",
+                    "Similarly to *HLA-B\u221735:08*HLA-B^\u2217^\u221735:08, the association of LTG-induced MPE and *HLA-B\u221744:03*HLA-B^\u2217^\u221744:03 alleles was firstly reported in the Thai population.",
+                    "## Table 4. The associations of individual HLA alleles with LTG-induced cutaneous adverse drug reactions among the different subgroups."
+                ],
+                "p_value_citations": [
+                    "Table 4. The associations of individual HLA alleles with LTG-induced cutaneous adverse drug reactions among the different subgroups."
+                ]
+            },
+            {
+                "gene": "HLA-B",
+                "polymorphism": "HLA-B*35:08",
+                "relationship_effect": "HLA-B*35:08 is associated with increased risk of lamotrigine-induced maculopapular exanthema (MPE) in Thai patients (significant vs general population, not significant vs tolerant controls).",
+                "p_value": "P = 0.167 (OR: 15.95; 95% CI: 0.60\u2013421.64, vs LTG-tolerant controls); P = 0.020 (OR: 109.44; 95% CI: 6.34\u20131889.11, vs general population)",
+                "citations": [
+                    "In subgroup analysis of LTG-induced CADR, a significant association between LTG-induced MPE and *HLA-B\u221735:08* was significantly associated only with the general population (OR = 109.44, 95% CI = 6.34\u20131889.11, *P*-value = 0.020) (**Table 4**); nevertheless, no significant associations were found in LTG-induced SCAR.",
+                    "In this study, we report for the first time a significant association between *HLA-B\u221735:08* and LTG-induced CADR or MPE, although this allele has been reported in only one case of LTG-induced MPE and once in the general population, as a result of this allele being very rare in the Thai population (less than 1%, data from [Puangpetch et al., 2015](#B24))."
                 ],
                 "p_value_citations": [
-                    "...P-value = 0.022..."
+                    "Table 4. The associations of individual HLA alleles with LTG-induced cutaneous adverse drug reactions among the different subgroups."
                 ]
             }
         ]
diff --git a/data/annotations/PMC5728534.json b/data/annotations/PMC5728534.json
index d2496ac..49d0dde 100644
--- a/data/annotations/PMC5728534.json
+++ b/data/annotations/PMC5728534.json
@@ -3,42 +3,37 @@
     "title": "Effects of genetic polymorphisms on the OCT1 and OCT2-mediated uptake of ranitidine",
     "study_parameters": {
         "summary": {
-            "content": "The study aimed to investigate the effects of genetic polymorphisms in the organic cation transporters OCT1 and OCT2 on the uptake of ranitidine, a commonly used H2-receptor antagonist. Using HEK293 and CHO cells overexpressing various OCT1 and OCT2 allelic variants, the researchers found that ranitidine is a substrate of OCT1, with significant variability in uptake depending on the OCT1 allele, while OCT2 showed limited ranitidine uptake unaffected by its common polymorphism. The findings highlight the potential for genetic variations in OCT1 to influence ranitidine pharmacokinetics and drug-drug interactions, although the clinical implications remain to be explored.",
+            "content": "The study aimed to investigate the effects of genetic polymorphisms in the organic cation transporters OCT1 and OCT2 on the uptake of ranitidine, a commonly used H2-receptor antagonist. Using HEK293 and CHO cells overexpressing various OCT1 and OCT2 alleles, the researchers found that ranitidine is a substrate of OCT1, with significant variability in uptake depending on the OCT1 genetic variants. While certain OCT1 alleles completely lacked ranitidine uptake, others showed reduced or increased uptake, and OCT2 showed limited ranitidine uptake unaffected by its common polymorphism. These findings highlight the potential for genetic variability in OCT1 to influence ranitidine pharmacokinetics and drug-drug interactions.",
             "citations": [
                 "We confirmed ranitidine as an OCT1 substrate and demonstrated that common genetic polymorphisms in OCT1 strongly affect ranitidine uptake and modulate ranitidine\u2019s potential to cause drug-drug interactions.",
-                "OCT2 showed only a limited uptake of ranitidine that was not significantly affected by the Ala270Ser polymorphism.",
-                "The effects of the frequent OCT1 polymorphisms on ranitidine pharmacokinetics in humans remain to be analyzed."
+                "Ranitidine was transported by wild-type OCT1 with a Km of 62.9 \u03bcM and a vmax of 1125 pmol/min/mg protein.",
+                "Alleles OCT1*5, *6, *12, and *13 completely lacked ranitidine uptake."
             ]
         },
         "study_type": {
-            "content": "In vitro study, prospective",
+            "content": "In vitro study, Unknown",
             "citations": [
                 "We characterized ranitidine uptake using HEK293 and CHO cells stably transfected to overexpress wild type OCT1, OCT2, or their naturally occurring allelic variants.",
-                "The major limitation of our study is that it contains only *in vitro* analyses.",
-                "The effects of alleles *OCT1*1* to *6* on ranitidine uptake were confirmed using an alternative cell model\u2014stably transfected CHO cells ([Fig 2D](#pone.0189521.g002)Fig 2D)."
+                "The major limitation of our study is that it contains only *in vitro*in vitro analyses.",
+                "HEK293 or CHO cells overexpressing the human *OCT1*OCT1 alleles **1A**1A (characterized by the amino acid substitution Met408Val), **1B**1B, **1C**1C (Phe160Leu), **1D**1D (Pro341Leu/Met408Val), **2**2 (Met420del), **3**3 (Arg61Cys), **4**4 (Gly401Ser), **5**5 (Gly465Arg/Met420del), **6**6 (Cys88Arg/Met420del), **7**7 (Ser14Phe), **8A**8A (Arg488Met), **8B**8B (Arg488Met/Met408Val), **9**9 (Pro117Leu), **10**10 (Ser189Leu), **11**11 (Ile449Thr), **12**12 (Ser29Leu), or **13**13 (Thr245Met), the human *OCT2*OCT2 reference and variant alleles (Ala270Ser), human *OCT3*OCT3, and the control cells (transfected with the empty pcDNA5 vector) were generated by targeted chromosomal integration using the Flp-In\u2122 System (Life Technologies, Darmstadt, Germany)."
             ]
         },
         "participant_info": {
             "content": [
-                "**Participants:** The study did not involve human participants directly; it was conducted using HEK293 and CHO cell lines.",
-                "**Age:** Not applicable, as the study used cell lines.",
-                "**Gender:** Not applicable, as the study used cell lines.",
-                "**Ethnicity:** Not applicable, as the study used cell lines.",
-                "**Pre-existing conditions:** Not applicable, as the study used cell lines.",
-                "**Study groups:** The study groups consisted of HEK293 and CHO cells overexpressing various OCT1 and OCT2 alleles."
+                "The article does not provide specific details about participants in this study, as it is an in vitro study using cell lines. Therefore, there are no details about age, gender, ethnicity, pre-existing conditions, or other participant characteristics."
             ],
             "citations": [
                 "We characterized ranitidine uptake using HEK293 and CHO cells stably transfected to overexpress wild type OCT1, OCT2, or their naturally occurring allelic variants.",
-                "The effects of alleles OCT1*5, *6, *12, and *13 on ranitidine uptake were confirmed using an alternative cell model\u2014stably transfected CHO cells ([Fig 2D](#pone.0189521.g002)Fig 2D).",
-                "HEK293 or CHO cells overexpressing the human OCT1 alleles 1A (characterized by the amino acid substitution Met408Val), 1B, 1C (Phe160Leu), 1D (Pro341Leu/Met408Val), 2 (Met420del), 3 (Arg61Cys), 4 (Gly401Ser), 5 (Gly465Arg/Met420del), 6 (Cys88Arg/Met420del), 7 (Ser14Phe), 8A (Arg488Met), 8B (Arg488Met/Met408Val), 9 (Pro117Leu), 10 (Ser189Leu), 11 (Ile449Thr), 12 (Ser29Leu), or 13 (Thr245Met), the human OCT2 reference and variant alleles (Ala270Ser), human OCT3, and the control cells (transfected with the empty pcDNA5 vector) were generated by targeted chromosomal integration using the Flp-In\u2122 System (Life Technologies, Darmstadt, Germany)."
+                "The major limitation of our study is that it contains only *in vitro*in vitro analyses.",
+                "HEK293 or CHO cells overexpressing the human *OCT1*OCT1 alleles **1A**1A (characterized by the amino acid substitution Met408Val), **1B**1B, **1C**1C (Phe160Leu), **1D**1D (Pro341Leu/Met408Val), **2**2 (Met420del), **3**3 (Arg61Cys), **4**4 (Gly401Ser), **5**5 (Gly465Arg/Met420del), **6**6 (Cys88Arg/Met420del), **7**7 (Ser14Phe), **8A**8A (Arg488Met), **8B**8B (Arg488Met/Met408Val), **9**9 (Pro117Leu), **10**10 (Ser189Leu), **11**11 (Ile449Thr), **12**12 (Ser29Leu), or **13**13 (Thr245Met), the human *OCT2*OCT2 reference and variant alleles (Ala270Ser), human *OCT3*OCT3, and the control cells (transfected with the empty pcDNA5 vector) were generated by targeted chromosomal integration using the Flp-In\u2122 System (Life Technologies, Darmstadt, Germany)."
             ]
         },
         "study_design": {
             "content": [
                 "**Study Design:** The study utilized an in vitro experimental design.",
-                "**Study Population:** HEK293 and CHO cells were used, stably transfected to overexpress wild-type OCT1, OCT2, or their naturally occurring allelic variants.",
-                "**Sample Size:** The exact number of cell samples is not specified, but experiments were conducted with at least three independent replicates.",
-                "**Methods:** Ranitidine uptake was characterized using these cell lines, and the effects of genetic polymorphisms on OCT1 and OCT2-mediated uptake were analyzed.",
+                "**Cell Lines:** HEK293 and CHO cells were used, stably transfected to overexpress wild-type OCT1, OCT2, or their naturally occurring allelic variants.",
+                "**Sample Size:** The study does not specify a numerical sample size but mentions conducting at least three independent experiments for each condition.",
+                "**Experimental Conditions:** Ranitidine uptake was characterized in the presence of varying concentrations, and the effects of genetic polymorphisms on OCT1 and OCT2 were analyzed.",
                 "**Data Analysis:** Nonlinear regression to the Michaelis-Menten equation was used to determine kinetic parameters, and ANOVA followed by Tukey\u2019s HSD post hoc analyses were used for statistical comparisons."
             ],
             "citations": [
@@ -50,28 +45,23 @@
         "study_results": {
             "content": [
                 "**OCT1-mediated ranitidine uptake**: Ranitidine is confirmed as a substrate of OCT1 with a Km of 62.9 \u03bcM and a vmax of 1125 pmol/min/mg protein.",
-                "**OCT1 alleles with no uptake**: Alleles OCT1*5, *6, *12, and *13 completely lack ranitidine uptake.",
-                "**Reduced vmax in OCT1 alleles**: Alleles OCT1*2, *3, *4, and *10 show a significant decrease in vmax, ranging from 50% to 91% reduction.",
-                "**Increased vmax in OCT1*8**: OCT1*8 shows a 25% increase in vmax, though not statistically significant (P = 0.5).",
+                "**OCT1 alleles lacking ranitidine uptake**: Alleles OCT1*5, *6, *12, and *13 completely lack ranitidine uptake.",
+                "**Reduced vmax in OCT1 alleles**: Alleles OCT1*2, *3, *4, and *10 show more than 50% reduction in vmax.",
+                "**Increased vmax in OCT1*8**: The OCT1*8 allele shows a 25% increase in vmax.",
                 "**Correlation with morphine uptake**: Effects of OCT1 alleles on ranitidine uptake strongly correlate with effects on morphine uptake (r\u00b2 = 0.961).",
-                "**OCT2-mediated ranitidine uptake**: OCT2 shows limited uptake of ranitidine, not significantly affected by the Ala270Ser polymorphism.",
-                "**Ranitidine inhibition of OCT1 substrates**: Ranitidine inhibits OCT1-mediated uptake of metformin and morphine, with IC50 values of 20.9 \u03bcM for metformin and 45.5 \u03bcM for morphine.",
-                "**Genotype-dependent inhibition**: Ranitidine is more potent in inhibiting the OCT1*2 allele compared to the reference allele, especially for morphine (IC50 of 19.5 \u03bcM for OCT1*2 vs. 45.5 \u03bcM for reference)."
+                "**Ranitidine inhibition of OCT1 substrates**: Ranitidine inhibits OCT1-mediated uptake of metformin and morphine, with genotype-dependent differences in inhibitory potency.",
+                "**OCT2-mediated ranitidine uptake**: OCT2 shows limited uptake of ranitidine, unaffected by the Ala270Ser polymorphism.",
+                "**Genotype-dependent inhibition**: Ranitidine is more potent in inhibiting the OCT1*2 variant than the reference allele, especially for morphine (IC50 of 19.5 \u03bcM for OCT1*2 vs. 45.5 \u03bcM for reference).",
+                "**Statistical significance**: The reduction in vmax for certain alleles and the genotype-dependent differences in inhibition are statistically significant (p < 0.05)."
             ],
             "citations": [
                 "Ranitidine was transported by wild-type OCT1 with a Km of 62.9 \u03bcM and a vmax of 1125 pmol/min/mg protein.",
                 "Alleles OCT1*5, *6, *12, and *13 completely lacked ranitidine uptake.",
-                "The effects of OCT1 alleles on ranitidine uptake strongly correlated with the effects on morphine uptake suggesting common interaction mechanisms of both drugs with OCT1."
+                "The alleles OCT1*2, *3, *4, and *10 showed a significant decrease of vmax ([Fig 2E](#pone.0189521.g002)Fig 2E and [Table 2](#pone.0189521.t002)Table 2)."
             ]
         },
         "allele_frequency": {
-            "content": [
-                "**OCT1*2 allele frequency**: Global allele frequency of 12.2%.",
-                "**Poor OCT1 transporters in Europeans and White Americans**: 9% are homozygous or compound heterozygous carriers of loss-of-function alleles.",
-                "**Heterozygous carriers of loss-of-function alleles in Europeans and White Americans**: 40% have only one active copy of OCT1.",
-                "**Poor OCT1 transporters in East Asia**: Rare.",
-                "**Poor OCT1 transporters in certain South American populations**: More than 80% (e.g., Surui Indians)."
-            ],
+            "content": "The article provides information about the allele frequencies of various OCT1 polymorphisms in different populations. However, specific allele frequencies for the studied cohorts and experiments are not detailed in the provided text. Instead, the article mentions general allele frequencies and prevalence in broader populations. Here is the information structured as a JSON object:\n\n```json\n{\n  \"allele_frequency\": [\n    {\n      \"allele\": \"OCT1*2\",\n      \"frequency\": \"12.2%\",\n      \"population\": \"Global\"\n    },\n    {\n      \"allele\": \"OCT1*2, *3, *4, *5, *6\",\n      \"frequency\": \"9%\",\n      \"population\": \"Homozygous or compound heterozygous carriers in Europeans and White Americans\"\n    },\n    {\n      \"allele\": \"OCT1*2, *3, *4, *5, *6\",\n      \"frequency\": \"40%\",\n      \"population\": \"Heterozygous carriers in Europeans and White Americans\"\n    },\n    {\n      \"allele\": \"Poor OCT1 transporters\",\n      \"frequency\": \"80%\",\n      \"population\": \"Certain populations in South America (e.g., Surui Indians)\"\n    }\n  ]\n}\n```\n\nThis JSON object summarizes the allele frequencies mentioned in the article for different populations.",
             "citations": [
                 "Nine percent of Europeans and White Americans are homozygous or compound heterozygous carriers of these loss-of-function alleles (so called poor OCT1 transporters) [[14](#pone.0189521.ref014)14, [16](#pone.0189521.ref016)16, [30](#pone.0189521.ref030)30].",
                 "An additional 40% of Europeans and White Americans are heterozygous carriers of these alleles and have only one active copy of *OCT1*OCT1 in their genomes.",
@@ -83,108 +73,247 @@
             "- [DOI link to the article](https://doi.org/10.1371/journal.pone.0189521)",
             "- [PubMed Central (PMC) link to the article](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5728534/)",
             "- [PDF version of the article](https://pmc.ncbi.nlm.nih.gov/articles/PMC5728534/pdf/pone.0189521.pdf)",
-            "- [German Research Foundation (DFG) website](http://www.dfg.de)",
-            "These resources provide access to the full text of the study, its PDF version, and information about the funding organization."
+            "These resources provide access to the full text of the study, including detailed methodologies and data."
         ]
     },
     "annotations": {
         "relationships": [
             {
                 "gene": "OCT1 (SLC22A1)",
-                "polymorphism": "OCT1*5, OCT1*6, OCT1*12, OCT1*13",
-                "relationship_effect": "These alleles completely lacked ranitidine uptake, indicating a complete loss of OCT1-mediated ranitidine transport.",
-                "p_value": "Not explicitly stated for this comparison, but described as significant in the text.",
+                "polymorphism": "OCT1*5 (Gly465Arg/Met420del)",
+                "relationship_effect": "Completely lacked ranitidine uptake (complete loss of function for ranitidine transport).",
+                "p_value": "Not explicitly stated, but described as complete loss; see Table 2 and Fig 2B.",
                 "citations": [
                     "The alleles OCT1*5, *6, *12, and *13 completely lacked ranitidine uptake.",
-                    "The alleles OCT1*5, *6, *12, and *13 showed a complete lack of ranitidine transport activity after incubation with both 1 \u03bcM ([Fig 2B](#pone.0189521.g002)Fig 2B) and 10 \u03bcM concentration of ranitidine ([S1 Fig](#pone.0189521.s001)S1 Fig).",
-                    "In conclusion, we demonstrated that ranitidine is a substrate of OCT1 and that common genetic polymorphisms in OCT1 lead to a substantial reduction or even complete abolishment of OCT1-mediated ranitidine uptake."
+                    "The alleles OCT1*5, *6, *12, and *13 showed a complete lack of ranitidine transport activity after incubation with both 1 \u03bcM (Fig 2B) and 10 \u03bcM concentration of ranitidine (S1 Fig).",
+                    "## Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine."
+                ],
+                "p_value_citations": [
+                    "The alleles OCT1*5, *6, *12, and *13 showed a complete lack of ranitidine transport activity after incubation with both 1 \u03bcM and 10 \u03bcM concentration of ranitidine."
+                ]
+            },
+            {
+                "gene": "OCT1 (SLC22A1)",
+                "polymorphism": "OCT1*6 (Cys88Arg/Met420del)",
+                "relationship_effect": "Completely lacked ranitidine uptake (complete loss of function for ranitidine transport).",
+                "p_value": "Not explicitly stated, but described as complete loss; see Table 2 and Fig 2B.",
+                "citations": [
+                    "The alleles OCT1*5, *6, *12, and *13 completely lacked ranitidine uptake.",
+                    "The alleles OCT1*5, *6, *12, and *13 showed a complete lack of ranitidine transport activity after incubation with both 1 \u03bcM (Fig 2B) and 10 \u03bcM concentration of ranitidine (S1 Fig).",
+                    "## Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine."
                 ],
                 "p_value_citations": [
-                    "The alleles OCT1*5, *6, *12, and *13 showed a complete lack of ranitidine transport activity after incubation with both 1 \u03bcM and 10 \u03bcM concentration of ranitidine. (Results)"
+                    "The alleles OCT1*5, *6, *12, and *13 showed a complete lack of ranitidine transport activity after incubation with both 1 \u03bcM and 10 \u03bcM concentration of ranitidine."
                 ]
             },
             {
                 "gene": "OCT1 (SLC22A1)",
-                "polymorphism": "OCT1*2 (Met420del), OCT1*3 (Arg61Cys), OCT1*4 (Gly401Ser), OCT1*10 (Ser189Leu)",
-                "relationship_effect": "These alleles had vmax values decreased by more than 50% for ranitidine uptake, indicating a strong reduction in OCT1-mediated ranitidine transport. The decrease ranged from 50% (OCT1*10) to 91% (OCT1*4).",
-                "p_value": "OCT1*2: *** P<0.001; OCT1*3: *** P<0.001; OCT1*4: *** P<0.001; OCT1*10: * P<0.05 (from Table 2)",
+                "polymorphism": "OCT1*12 (Ser29Leu)",
+                "relationship_effect": "Completely lacked ranitidine uptake (complete loss of function for ranitidine transport).",
+                "p_value": "Not explicitly stated, but described as complete loss; see Table 2 and Fig 2B.",
+                "citations": [
+                    "The alleles OCT1*5, *6, *12, and *13 completely lacked ranitidine uptake.",
+                    "The alleles OCT1*5, *6, *12, and *13 showed a complete lack of ranitidine transport activity after incubation with both 1 \u03bcM (Fig 2B) and 10 \u03bcM concentration of ranitidine (S1 Fig).",
+                    "## Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine."
+                ],
+                "p_value_citations": [
+                    "The alleles OCT1*5, *6, *12, and *13 showed a complete lack of ranitidine transport activity after incubation with both 1 \u03bcM and 10 \u03bcM concentration of ranitidine."
+                ]
+            },
+            {
+                "gene": "OCT1 (SLC22A1)",
+                "polymorphism": "OCT1*13 (Thr245Met)",
+                "relationship_effect": "Completely lacked ranitidine uptake (complete loss of function for ranitidine transport).",
+                "p_value": "Not explicitly stated, but described as complete loss; see Table 2 and Fig 2B.",
+                "citations": [
+                    "The alleles OCT1*5, *6, *12, and *13 completely lacked ranitidine uptake.",
+                    "The alleles OCT1*5, *6, *12, and *13 showed a complete lack of ranitidine transport activity after incubation with both 1 \u03bcM (Fig 2B) and 10 \u03bcM concentration of ranitidine (S1 Fig).",
+                    "## Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine."
+                ],
+                "p_value_citations": [
+                    "The alleles OCT1*5, *6, *12, and *13 showed a complete lack of ranitidine transport activity after incubation with both 1 \u03bcM and 10 \u03bcM concentration of ranitidine."
+                ]
+            },
+            {
+                "gene": "OCT1 (SLC22A1)",
+                "polymorphism": "OCT1*2 (Met420del)",
+                "relationship_effect": "Significant decrease in ranitidine uptake (vmax decreased by ~64%, CLint decreased by ~56% compared to reference).",
+                "p_value": "vmax: 402.04 \u00b1 51.24 vs 1125.41 \u00b1 86.12 pmol/min/mg, ***P<0.001; CLint: 8.18 \u00b1 1.39 vs 18.48 \u00b1 1.83 ml/min/mg, **P<0.01 (Table 2).",
+                "citations": [
+                    "The alleles OCT1*2, *3, *4, and *10 showed a significant decrease of vmax (Fig 2E and Table 2).",
+                    "Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine.",
+                    "In this study we confirmed that ranitidine is a substrate of the human hepatic uptake transporter OCT1 and demonstrated that genetic polymorphisms in OCT1 lead to a significant reduction or even complete loss of ranitidine uptake."
+                ],
+                "p_value_citations": [
+                    "Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine."
+                ]
+            },
+            {
+                "gene": "OCT1 (SLC22A1)",
+                "polymorphism": "OCT1*3 (Arg61Cys)",
+                "relationship_effect": "Significant decrease in ranitidine uptake (vmax decreased by ~77%, CLint decreased by ~58% compared to reference).",
+                "p_value": "vmax: 255.08 \u00b1 12.75 vs 1125.41 \u00b1 86.12 pmol/min/mg, ***P<0.001; CLint: 7.76 \u00b1 1.80 vs 18.48 \u00b1 1.83 ml/min/mg, **P<0.01 (Table 2).",
                 "citations": [
                     "The alleles OCT1*2, *3, *4, and *10 showed a significant decrease of vmax (Fig 2E and Table 2).",
                     "The decrease ranged from 50% (OCT1*10) to 91% (OCT1*4).",
                     "## Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine."
                 ],
                 "p_value_citations": [
-                    "Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine. (Table 2)"
+                    "Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine."
+                ]
+            },
+            {
+                "gene": "OCT1 (SLC22A1)",
+                "polymorphism": "OCT1*4 (Gly401Ser)",
+                "relationship_effect": "Significant decrease in ranitidine uptake (vmax decreased by ~91%, CLint decreased by ~83% compared to reference).",
+                "p_value": "vmax: 106.65 \u00b1 26.70 vs 1125.41 \u00b1 86.12 pmol/min/mg, ***P<0.001; CLint: 3.14 \u00b1 0.82 vs 18.48 \u00b1 1.83 ml/min/mg, ***P<0.001 (Table 2).",
+                "citations": [
+                    "The alleles OCT1*2, *3, *4, and *10 showed a significant decrease of vmax (Fig 2E and Table 2).",
+                    "The decrease ranged from 50% (OCT1*10) to 91% (OCT1*4).",
+                    "## Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine."
+                ],
+                "p_value_citations": [
+                    "Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine."
+                ]
+            },
+            {
+                "gene": "OCT1 (SLC22A1)",
+                "polymorphism": "OCT1*10 (Ser189Leu)",
+                "relationship_effect": "Significant decrease in ranitidine uptake (vmax decreased by ~50%, CLint decreased by ~52% compared to reference).",
+                "p_value": "vmax: 567.50 \u00b1 59.10 vs 1125.41 \u00b1 86.12 pmol/min/mg, *P<0.05; CLint: 8.96 \u00b1 0.75 vs 18.48 \u00b1 1.83 ml/min/mg, *P<0.05 (Table 2).",
+                "citations": [
+                    "The alleles OCT1*2, *3, *4, and *10 showed a significant decrease of vmax (Fig 2E and Table 2).",
+                    "The decrease ranged from 50% (OCT1*10) to 91% (OCT1*4).",
+                    "## Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine."
+                ],
+                "p_value_citations": [
+                    "Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine."
                 ]
             },
             {
                 "gene": "OCT1 (SLC22A1)",
                 "polymorphism": "OCT1*8 (Arg488Met)",
-                "relationship_effect": "OCT1*8 showed an increase of vmax by 25% for ranitidine uptake, though the difference was not statistically significant (P = 0.5).",
-                "p_value": "P = 0.5 (not significant)",
+                "relationship_effect": "Showed an increase of vmax by 25% for ranitidine uptake (not statistically significant).",
+                "p_value": "vmax: 1412.43 \u00b1 108.88 vs 1125.41 \u00b1 86.12 pmol/min/mg, P = 0.5 (Table 2).",
                 "citations": [
+                    "In contrast, OCT1*8 showed an increase of vmax by 25%.",
                     "The OCT1*8 allele showed a 25% increase in vmax, though the difference was not statistically significant (P = 0.5, [Table 2](#pone.0189521.t002)Table 2, [Fig 2E](#pone.0189521.g002)Fig 2E).",
-                    "## Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine.",
-                    "In contrast, OCT1*8 showed an increase of vmax by 25%."
+                    "### Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine."
                 ],
                 "p_value_citations": [
-                    "The OCT1*8 allele showed a 25% increase in vmax, though the difference was not statistically significant (P = 0.5, Table 2, Fig 2E). (Results)"
+                    "The OCT1*8 allele showed a 25% increase in vmax, though the difference was not statistically significant (P = 0.5, Table 2, Fig 2E)."
                 ]
             },
             {
                 "gene": "OCT1 (SLC22A1)",
-                "polymorphism": "OCT1*1A, *1C, *1D, *7, *9, *11",
-                "relationship_effect": "These alleles showed no significant difference in the uptake of ranitidine compared to the reference allele.",
-                "p_value": "Not significant (exact p-values not stated)",
+                "polymorphism": "OCT1*1A (Met408Val)",
+                "relationship_effect": "No significant difference in ranitidine uptake compared to reference allele.",
+                "p_value": "vmax: 1047.13 \u00b1 123.39 vs 1125.41 \u00b1 86.12 pmol/min/mg, not significant (Table 2).",
                 "citations": [
-                    "Alleles OCT1*1A, *1C, *1D, *7, *9, and *11 showed no significant difference in the uptake of ranitidine compared to the reference allele (Fig 2B and 2C, S1 Fig).",
-                    "The OCT1*8 allele showed a 25% increase in vmax, though the difference was not statistically significant (P = 0.5, Table 2, Fig 2E).",
-                    "In contrast, none of the analyzed polymorphisms significantly affected the affinity (Km) of ranitidine uptake (P = 0.17, Table 2)."
+                    "Alleles OCT1*1A, 1C, 1D, 7, 9, and 11 showed no significant difference in the uptake of ranitidine compared to the reference allele (Fig 2B and 2C, S1 Fig).",
+                    "The effects of alleles OCT1*1 to 6 on ranitidine uptake were confirmed using an alternative cell model\u2014stably transfected CHO cells (Fig 2D).",
+                    "## Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine."
                 ],
                 "p_value_citations": [
-                    "Alleles OCT1*1A, *1C, *1D, *7, *9, and *11 showed no significant difference in the uptake of ranitidine compared to the reference allele. (Results)"
+                    "Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine."
                 ]
             },
             {
                 "gene": "OCT1 (SLC22A1)",
-                "polymorphism": "OCT1*2 (Met420del)",
-                "relationship_effect": "The inhibitory potency of ranitidine for morphine uptake was affected by the OCT1*2 allele; ranitidine was on average two-fold more potent in inhibiting the common OCT1*2 variant than the reference OCT1 allele (IC50 for morphine: 19.5 \u03bcM for OCT1*2 vs 45.5 \u03bcM for reference).",
-                "p_value": "Not explicitly stated, but described as significant difference in IC50 values.",
+                "polymorphism": "OCT1*1C (Phe160Leu)",
+                "relationship_effect": "No significant difference in ranitidine uptake compared to reference allele.",
+                "p_value": "vmax: 1245.87 \u00b1 209.39 vs 1125.41 \u00b1 86.12 pmol/min/mg, not significant (Table 2).",
                 "citations": [
-                    "The inhibitory potency for morphine uptake was affected by the OCT1*2 allele.",
-                    "Depending on the genotype, ranitidine was on average two-fold more potent in inhibiting the common OCT1*2 variant than the reference OCT1 allele.",
-                    "The genotype-dependent differences were most prominent when inhibiting morphine (IC50 of 19.5 and 45.5 \u03bcM for OCT1*2 and 1, respectively) and least prominent when inhibiting metformin (IC50 of 14.8 and 20.9 \u03bcM for OCT1*2 and 1, respectively)."
+                    "Alleles OCT1*1A, 1C, 1D, 7, 9, and 11 showed no significant difference in the uptake of ranitidine compared to the reference allele (Fig 2B and 2C, S1 Fig).",
+                    "The alleles OCT1*1A, 1C, 1D, 7, 9, and 11 showed no significant difference in the uptake of ranitidine compared to the reference allele (Fig 2B and 2C, S1 Fig).",
+                    "## Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine."
                 ],
                 "p_value_citations": [
-                    "Depending on the genotype, ranitidine was on average two-fold more potent in inhibiting the common OCT1*2 variant than the reference OCT1 allele. The genotype-dependent differences were most prominent when inhibiting morphine (IC50 of 19.5 and 45.5 \u03bcM for OCT1*2 and 1, respectively). (Results)"
+                    "Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine."
                 ]
             },
             {
                 "gene": "OCT1 (SLC22A1)",
-                "polymorphism": "OCT1*2 (Met420del)",
-                "relationship_effect": "For metformin uptake, ranitidine was more potent in inhibiting the OCT1*2 allele (IC50 14.8 \u03bcM) than the reference allele (IC50 20.9 \u03bcM), but the genotype-dependent difference was less pronounced than for morphine.",
-                "p_value": "Not explicitly stated, but IC50 values provided.",
+                "polymorphism": "OCT1*1D (Pro341Leu/Met408Val)",
+                "relationship_effect": "No significant difference in ranitidine uptake compared to reference allele.",
+                "p_value": "vmax: 883.10 \u00b1 76.48 vs 1125.41 \u00b1 86.12 pmol/min/mg, not significant (Table 2).",
+                "citations": [
+                    "Alleles OCT1*1A, 1C, 1D, 7, 9, and 11 showed no significant difference in the uptake of ranitidine compared to the reference allele (Fig 2B and 2C, S1 Fig).",
+                    "The remaining OCT1 variants were further analyzed by performing concentration-dependent uptake measurements.",
+                    "## Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine."
+                ],
+                "p_value_citations": [
+                    "Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine."
+                ]
+            },
+            {
+                "gene": "OCT1 (SLC22A1)",
+                "polymorphism": "OCT1*7 (Ser14Phe)",
+                "relationship_effect": "No significant difference in ranitidine uptake compared to reference allele.",
+                "p_value": "vmax: 753.08 \u00b1 49.95 vs 1125.41 \u00b1 86.12 pmol/min/mg, not significant (Table 2).",
                 "citations": [
-                    "The inhibitory potency for morphine uptake was affected by the OCT1*2 allele.",
-                    "Depending on the genotype, ranitidine was on average two-fold more potent in inhibiting the common OCT1*2 variant than the reference OCT1 allele.",
-                    "The genotype-dependent differences were most prominent when inhibiting morphine (IC50 of 19.5 and 45.5 \u03bcM for OCT1*2 and 1, respectively) and least prominent when inhibiting metformin (IC50 of 14.8 and 20.9 \u03bcM for OCT1*2 and 1, respectively)."
+                    "Alleles OCT1*1A, 1C, 1D, 7, 9, and 11 showed no significant difference in the uptake of ranitidine compared to the reference allele (Fig 2B and 2C, S1 Fig).",
+                    "OCT1*7 | Ser14Phe | 61.58 (\u00b113.19) | 753.08 (\u00b149.95) | 14.94 (\u00b13.61) |",
+                    "In contrast, none of the analyzed polymorphisms significantly affected the affinity (K_m) of ranitidine uptake (P = 0.17, Table 2)."
                 ],
                 "p_value_citations": [
-                    "The genotype-dependent differences were most prominent when inhibiting morphine (IC50 of 19.5 and 45.5 \u03bcM for OCT1*2 and 1, respectively) and least prominent when inhibiting metformin (IC50 of 14.8 and 20.9 \u03bcM for OCT1*2 and 1, respectively). (Results)"
+                    "Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine."
+                ]
+            },
+            {
+                "gene": "OCT1 (SLC22A1)",
+                "polymorphism": "OCT1*9 (Pro117Leu)",
+                "relationship_effect": "No significant difference in ranitidine uptake compared to reference allele.",
+                "p_value": "vmax: 1138.47 \u00b1 279.72 vs 1125.41 \u00b1 86.12 pmol/min/mg, not significant (Table 2).",
+                "citations": [
+                    "Alleles OCT1*1A, 1C, 1D, 7, 9, and 11 showed no significant difference in the uptake of ranitidine compared to the reference allele (Fig 2B and 2C, S1 Fig).",
+                    "The OCT1*8 allele showed a 25% increase in vmax, though the difference was not statistically significant (P = 0.5, Table 2, Fig 2E).",
+                    "## Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine."
+                ],
+                "p_value_citations": [
+                    "Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine."
+                ]
+            },
+            {
+                "gene": "OCT1 (SLC22A1)",
+                "polymorphism": "OCT1*11 (Ile449Thr)",
+                "relationship_effect": "No significant difference in ranitidine uptake compared to reference allele.",
+                "p_value": "vmax: 1283.33 \u00b1 319.87 vs 1125.41 \u00b1 86.12 pmol/min/mg, not significant (Table 2).",
+                "citations": [
+                    "Alleles OCT1*1A, 1C, 1D, 7, 9, and 11 showed no significant difference in the uptake of ranitidine compared to the reference allele (Fig 2B and 2C, S1 Fig).",
+                    "The alleles OCT1*10 and OCT1*11 did not affect ranitidine uptake, but strongly reduced TEA^+ and metformin uptake (Fig 5B).",
+                    "### Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine."
+                ],
+                "p_value_citations": [
+                    "Table 2. Effects of common amino acid substitutions on the kinetics of the OCT1-mediated uptake of ranitidine."
                 ]
             },
             {
                 "gene": "OCT2 (SLC22A2)",
                 "polymorphism": "Ala270Ser",
-                "relationship_effect": "OCT2 showed only a limited uptake of ranitidine that was not significantly affected by the Ala270Ser polymorphism (9% reduction, not significant).",
-                "p_value": "Not significant (exact p-value not stated)",
+                "relationship_effect": "No significant effect on OCT2-mediated ranitidine uptake (Ser270 allele showed a limited reduction of ranitidine uptake by 9% which was not significant).",
+                "p_value": "Not significant (see Fig 4B).",
                 "citations": [
                     "In comparison to the Ala270 allele, the Ser270 allele showed a limited reduction of ranitidine uptake by 9% which was not significant ([Fig 4B](#pone.0189521.g004)Fig 4B).",
-                    "Furthermore, the OCT2-mediated uptake of ranitidine was not substantially affected by the Ala270Ser substitution, the only common genetic polymorphism suggested to affect OCT2 function [[47](#pone.0189521.ref047)47].",
-                    "OCT2 showed only a limited uptake of ranitidine that was not significantly affected by the Ala270Ser polymorphism."
+                    "OCT2 showed only a limited uptake of ranitidine that was not significantly affected by the Ala270Ser polymorphism.",
+                    "However, we observed only a limited uptake of ranitidine via OCT2 compared to OCT1. The limited ability of OCT2 to transport ranitidine we observed here is in concordance with previously published data [[11](#pone.0189521.ref011)11]. Furthermore, the OCT2-mediated uptake of ranitidine was not substantially affected by the Ala270Ser substitution, the only common genetic polymorphism suggested to affect OCT2 function [[47](#pone.0189521.ref047)47]."
+                ],
+                "p_value_citations": [
+                    "In comparison to the Ala270 allele, the Ser270 allele showed a limited reduction of ranitidine uptake by 9% which was not significant (Fig 4B)."
+                ]
+            },
+            {
+                "gene": "OCT1 (SLC22A1)",
+                "polymorphism": "OCT1*2 (Met420del)",
+                "relationship_effect": "Ranitidine is on average two-fold more potent in inhibiting the common OCT1*2 variant than the reference allele for morphine uptake (IC50 for morphine: 19.5 \u03bcM for OCT1*2 vs 45.5 \u03bcM for reference).",
+                "p_value": "IC50 for morphine: 19.5 \u03bcM (OCT1*2) vs 45.5 \u03bcM (reference); for metformin: 14.8 \u03bcM (OCT1*2) vs 20.9 \u03bcM (reference) (Table 3, Fig 3).",
+                "citations": [
+                    "Depending on the genotype, ranitidine was on average two-fold more potent in inhibiting the common *OCT1*2*OCT1*2 variant than the reference *OCT1*OCT1 allele.",
+                    "The genotype-dependent differences were most prominent when inhibiting morphine (IC_50_50 of 19.5 and 45.5 \u03bcM for *OCT1*2*OCT1*2 and **1**1, respectively) and least prominent when inhibiting metformin (IC_50_50 of 14.8 and 20.9 \u03bcM for *OCT1*2*OCT1*2 and **1**1, respectively).",
+                    "### Table 3. Genotype and substrate-dependent variations in the potency of ranitidine to inhibit OCT1-mediated uptake related to the expected ranitidine concentration in the gastrointestinal tract [I]2 and in plasma [I]1."
                 ],
                 "p_value_citations": [
-                    "In comparison to the Ala270 allele, the Ser270 allele showed a limited reduction of ranitidine uptake by 9% which was not significant. (Results)"
+                    "Table 3. Genotype and substrate-dependent variations in the potency of ranitidine to inhibit OCT1-mediated uptake related to the expected ranitidine concentration in the gastrointestinal tract [I]2 and in plasma [I]1."
                 ]
             }
         ]
diff --git a/data/annotations/PMC5749368.json b/data/annotations/PMC5749368.json
index 1b0cad1..6cfd098 100644
--- a/data/annotations/PMC5749368.json
+++ b/data/annotations/PMC5749368.json
@@ -3,7 +3,7 @@
     "title": "Genetics and clinical response to warfarin and edoxaban in patients with venous thromboembolism",
     "study_parameters": {
         "summary": {
-            "content": "The study aimed to determine if genetic variants could identify patients with venous thromboembolism (VTE) at increased risk of bleeding when treated with warfarin. Utilizing data from the Hokusai VTE trial, a randomized, double-blind study comparing edoxaban and warfarin, the researchers genotyped patients for CYP2C9 and VKORC1 variants to classify them into warfarin sensitivity groups. The results showed that sensitive and highly sensitive responders, as defined by their genotypes, had a higher risk of bleeding, spent more time overanticoagulated, and required lower warfarin doses compared to normal responders, highlighting the potential utility of genetic testing in managing warfarin therapy.",
+            "content": "The study aimed to determine if genetic variants could identify patients with venous thromboembolism (VTE) at increased risk of bleeding when treated with warfarin. Conducted as a subanalysis of the Hokusai VTE trial, it involved genotyping patients for CYP2C9 and VKORC1 variants to classify them into warfarin sensitivity types. The results showed that sensitive and highly sensitive responders, based on their genotypes, had a higher risk of bleeding, spent more time overanticoagulated, and required lower warfarin doses compared to normal responders, highlighting the potential of genetic testing in managing warfarin therapy.",
             "citations": [
                 "Compared with normal responders, sensitive and highly sensitive responders had heparin therapy discontinued earlier (p<0.001), had a decreased final weekly warfarin dose (p<0.001), spent more time overanticoagulated (p<0.001) and had an increased bleeding risk with warfarin (sensitive responders HR 1.38 [95% CI 1.11 to 1.71], p=0.0035; highly sensitive responders 1.79 [1.09 to 2.99]; p=0.0252).",
                 "In this study, CYP2C9 and VKORC1 genotypes identified patients with VTE at increased bleeding risk with warfarin.",
@@ -11,7 +11,7 @@
             ]
         },
         "study_type": {
-            "content": "Clinical trial, cohort, prospective, replication",
+            "content": "Clinical trial, prospective, replication",
             "citations": [
                 "Hokusai-venous thromboembolism (Hokusai VTE), a randomised, multinational, double-blind, non-inferiority trial, evaluated the safety and efficacy of edoxaban versus warfarin in patients with VTE initially treated with heparin.",
                 "The present analysis was designed to replicate the ENGAGE AF-TIMI 48 findings and examine whether the previous results could be extended to patients with VTE.",
@@ -37,29 +37,29 @@
         },
         "study_design": {
             "content": [
-                "**Study Design:** Randomised, multinational, double-blind, non-inferiority trial.",
-                "**Trial Name:** Hokusai-venous thromboembolism (Hokusai VTE).",
-                "**Objective:** Evaluate the safety and efficacy of edoxaban versus warfarin in patients with venous thromboembolism (VTE) initially treated with heparin.",
-                "**Study Population:** Patients with acute, symptomatic VTE (deep vein thrombosis, pulmonary embolism, or both).",
-                "**Sample Size:** 8292 patients enrolled; 3956 patients included in the pharmacogenetic subanalysis.",
-                "**Genotyping:** Patients were genotyped for variants in CYP2C9 and VKORC1 genes.",
-                "**Warfarin Sensitivity Types:** Patients were categorized into normal, sensitive, and highly sensitive responders based on genotypes.",
-                "**Treatment Groups:** Patients were randomized 1:1 to receive either edoxaban 60 mg once daily or warfarin.",
-                "**Initial Treatment:** All patients received open-label heparin treatment for at least 5 days before randomization.",
-                "**Warfarin Dosing:** Adjusted to maintain an INR between 2.0 and 3.0.",
-                "**Edoxaban Dosing:** Dose halved in patients with specific conditions (creatinine clearance, body weight, or concurrent treatment with P-glycoprotein inhibitors).",
-                "**Trial Registration Number:** NCT00986154."
+                "**Study design:** Randomised, multinational, double-blind, non-inferiority trial.",
+                "**Trial name:** Hokusai-venous thromboembolism (Hokusai VTE).",
+                "**Objective:** Evaluate the safety and efficacy of edoxaban versus warfarin in patients with VTE initially treated with heparin.",
+                "**Genetic analysis:** Subanalysis focused on genotyping for variants in *CYP2C9* and *VKORC1* genes.",
+                "**Warfarin sensitivity types:** Patients were divided into normal, sensitive, and highly sensitive responders based on genotypes.",
+                "**Sample size:** 3956 patients included in the genetic analysis, with 1978 patients randomized to warfarin.",
+                "**Patient population:** Patients with acute, symptomatic VTE (DVT, PE, or both) initially treated with heparin.",
+                "**Randomisation:** 1:1 to receive either edoxaban 60 mg once daily or warfarin.",
+                "**Inclusion criteria:** Patients \u226518 years with diagnosed acute, symptomatic DVT or PE.",
+                "**Exclusion criteria:** Contraindications to heparin or warfarin, prior extensive treatment with heparin or VKA, certain cancer treatments, and specific medication use.",
+                "**Genotyping method:** Sequenom technology used for *CYP2C9* and *VKORC1* alleles.",
+                "**Statistical analysis:** Conducted using SAS V.9.3 or higher, with significance level set at 0.05."
             ],
             "citations": [
                 "Hokusai-venous thromboembolism (Hokusai VTE), a randomised, multinational, double-blind, non-inferiority trial, evaluated the safety and efficacy of edoxaban versus warfarin in patients with VTE initially treated with heparin.",
-                "The Hokusai VTE trial enrolled 8292 patients from 439 centres worldwide.",
-                "All patients received open-label heparin treatment for at least 5 days. Patients were randomised 1:1 to receive either edoxaban 60 mg once daily or warfarin. The edoxaban dose was halved to 30 mg once daily in patients with a creatinine clearance of 30 to 50 mL/min, a body weight of 60 kg or less or who were receiving concurrent treatment with the P-glycoprotein (inhibitors verapamil or quinidine. Warfarin was started concurrently with heparin and the warfarin dose was adjusted to maintain an INR between 2.0 and 3.0."
+                "In this subanalysis of Hokusai VTE, patients genotyped for variants in CYP2C9 and VKORC1 genes were divided into three warfarin sensitivity types (normal, sensitive and highly sensitive) based on their genotypes.",
+                "Genotypes were determined for CYP2C9 *2 and CYP2C9 *3 (rs1799853 and rs1057910, respectively) and VKORC1 -\u20131639 G\u2192A alleles (rs9923231) in accordance with Good Laboratory Practice guidelines by ILS Genomics (Morrisville, North Carolina, USA) with an analytically validated assay using Sequenom (San Diego, California, USA) technology."
             ]
         },
         "study_results": {
             "content": [
-                "**Study Population:** 47.7% (3956/8292) of patients in Hokusai VTE were included; 1978 were randomized to warfarin.",
-                "**Warfarin Sensitivity Distribution:** 63.0% normal responders, 34.1% sensitive responders, 2.8% highly sensitive responders.",
+                "**Study Population:** The analysis included 47.7% (3956/8292) of the patients in Hokusai VTE, with 1978 patients randomized to warfarin.",
+                "**Warfarin Sensitivity Distribution:** Among warfarin patients, 63.0% were normal responders, 34.1% were sensitive responders, and 2.8% were highly sensitive responders.",
                 "**Heparin Discontinuation:** Sensitive and highly sensitive responders had heparin therapy discontinued earlier (p<0.001).",
                 "**Warfarin Dose:** Sensitive and highly sensitive responders had a decreased final weekly warfarin dose (p<0.001).",
                 "**Overanticoagulation:** Sensitive and highly sensitive responders spent more time overanticoagulated (p<0.001).",
@@ -72,81 +72,62 @@
             ]
         },
         "allele_frequency": {
-            "content": [
-                "**CYP2C9 and VKORC1 polymorphisms:** The polymorphisms were in Hardy-Weinberg equilibrium for all three race groups examined (Caucasians, African Americans, and East Asians).",
-                "**Observed allele frequencies:** The allele frequencies within each race group were consistent with previously published findings."
-            ],
+            "content": "| Gene     | Polymorphism          | Frequency                                                                 |\n|----------|-----------------------|---------------------------------------------------------------------------|\n| CYP2C9   | *CYP2C9* *2*          | The *CYP2C9* alleles associated with increased warfarin sensitivity are more common in individuals of European descent compared with African or Asian descent. |\n| CYP2C9   | *CYP2C9* *3*          | The *CYP2C9* alleles associated with increased warfarin sensitivity are more common in individuals of European descent compared with African or Asian descent. |\n| VKORC1   | -1639 G\u2192A             | The prevalence of the *VKORC1* allele associated with the greatest increase in warfarin sensitivity is highest in individuals of Asian descent. |",
             "citations": [
-                "The CYP2C9 and VKORC1 polymorphisms were in Hardy-Weinberg equilibrium for all three race groups examined (ie, Caucasians, African Americans and East Asians), and the observed allele frequencies within each race group were consistent with previously published findings (online supplementary table 1).[11]",
-                "This variation in warfarin sensitivity by race is expected based on the known differences in allele frequencies[11] and is consistent with the ENGAGE AF-TIMI 48 pharmacogenetic subanalysis.[13]",
-                "PCR primers were validated using a total of 165 independent samples of known genotype representing African, Asian, Caucasian and Hispanic populations."
+                "The CYP2C9 alleles that are the most associated with increased warfarin sensitivity are more common in individuals of European descent compared with African or Asian descent.[11]",
+                "Conversely, the prevalence of the VKORC1 allele that is associated with greatest increase in warfarin sensitivity is the highest in individuals of Asian descent.[11 12]",
+                "The CYP2C9 and VKORC1 polymorphisms were in Hardy-Weinberg equilibrium for all three race groups examined (ie, Caucasians, African Americans and East Asians), and the observed allele frequencies within each race group were consistent with previously published findings (online supplementary table 1).[11]"
             ]
         },
         "additional_resource_links": [
             "The study provides the following additional resources or links related to its design and execution:",
-            "- [DOI link to the article](https://doi.org/10.1136/heartjnl-2016-310901)",
-            "- [PubMed Central (PMC) link to the article](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5749368/)",
-            "- [PDF of the article on PMC](https://pmc.ncbi.nlm.nih.gov/articles/PMC5749368/pdf/heartjnl-2016-310901.pdf)",
-            "- [ClinicalTrials.gov registration for Hokusai VTE trial](https://clinicaltrials.gov/ct2/show/NCT00986154)",
+            "- [Trial registration number: NCT00986154](https://clinicaltrials.gov/ct2/show/NCT00986154)",
+            "- [PDF of the article](https://pmc.ncbi.nlm.nih.gov/articles/PMC5749368/pdf/heartjnl-2016-310901.pdf)",
             "- [Supplementary Materials PDF](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5749368/bin/heartjnl-2016-310901supp001.pdf)"
         ]
     },
     "annotations": {
         "relationships": [
             {
-                "gene": "CYP2C9 and VKORC1",
-                "polymorphism": "CYP2C9*2 (rs1799853), CYP2C9*3 (rs1057910), VKORC1 -1639 G>A (rs9923231)",
-                "relationship_effect": "Patients with sensitive or highly sensitive warfarin genotypes (based on CYP2C9 and VKORC1 variants) have increased risk of bleeding with warfarin compared to normal responders.",
-                "p_value": "Sensitive responders: HR 1.38 [95% CI 1.11 to 1.71], p=0.0035; Highly sensitive responders: HR 1.79 [1.09 to 2.99], p=0.0252",
+                "gene": "CYP2C9",
+                "polymorphism": "*2 (rs1799853)",
+                "relationship_effect": "Carriers of CYP2C9*2 have increased sensitivity to warfarin, require lower final weekly warfarin dose, spend more time overanticoagulated, and have increased bleeding risk with warfarin compared to normal responders.",
+                "p_value": "Final weekly warfarin dose: p<0.001; Time overanticoagulated: p<0.001; Bleeding risk (sensitive responders): HR 1.38 [95% CI 1.11 to 1.71], p=0.0035; Bleeding risk (highly sensitive responders): HR 1.79 [1.09 to 2.99], p=0.0252",
                 "citations": [
                     "Compared with normal responders, sensitive and highly sensitive responders had heparin therapy discontinued earlier (p<0.001), had a decreased final weekly warfarin dose (p<0.001), spent more time overanticoagulated (p<0.001) and had an increased bleeding risk with warfarin (sensitive responders HR 1.38 [95% CI 1.11 to 1.71], p=0.0035; highly sensitive responders 1.79 [1.09 to 2.99]; p=0.0252).",
-                    "The present pharmacogenetic subanalysis of the Hokusai-venous thromboembolism (Hokusai VTE) trial, a trial evaluating edoxaban versus warfarin in patients with VTE initially treated with heparin, demonstrates that patients with VTE who have a sensitive or highly sensitive warfarin genotype spend more time overanticoagulated (p<0.001), require a lower warfarin dose (p<0.001) and have higher bleeding rates with warfarin therapy (sensitive responders HR 1.38 [95% CI 1.11 to 1.71], p=0.0035; highly sensitive responders 1.79 [1.09 to 2.99]; p=0.0252).",
-                    "The results of this analysis extend the results of the previous ENGAGE AF-TIMI 48 pharmacogenetic analysis to demonstrate that patients with VTE who have a sensitive or highly sensitive warfarin genotype spend more time overanticoagulated, require a lower warfarin dose and have higher bleeding rates with warfarin therapy."
+                    "In warfarin-treated patients, but not edoxaban-treated patients, as genetically defined warfarin sensitivity increased, heparin therapy was discontinued sooner (global p<0.001; p<0.0001 vs normal responders for both sensitive and highly sensitive responders) and the final mean weekly warfarin doses decreased (global p<0.001; p<0.0001 vs normal responders for both sensitive and highly sensitive responders).",
+                    "The present pharmacogenetic subanalysis of the Hokusai-venous thromboembolism (Hokusai VTE) trial, a trial evaluating edoxaban versus warfarin in patients with VTE initially treated with heparin, demonstrates that patients with VTE who have a sensitive or highly sensitive warfarin genotype spend more time overanticoagulated (p<0.001), require a lower warfarin dose (p<0.001) and have higher bleeding rates with warfarin therapy (sensitive responders HR 1.38 [95% CI 1.11 to 1.71], p=0.0035; highly sensitive responders 1.79 [1.09 to 2.99]; p=0.0252)."
                 ],
                 "p_value_citations": [
-                    "Compared with normal responders, sensitive and highly sensitive responders ... had an increased bleeding risk with warfarin (sensitive responders HR 1.38 [95% CI 1.11 to 1.71], p=0.0035; highly sensitive responders 1.79 [1.09 to 2.99]; p=0.0252)."
+                    "Compared with normal responders, sensitive and highly sensitive responders had heparin therapy discontinued earlier (p<0.001), had a decreased final weekly warfarin dose (p<0.001), spent more time overanticoagulated (p<0.001) and had an increased bleeding risk with warfarin (sensitive responders HR 1.38 [95% CI 1.11 to 1.71], p=0.0035; highly sensitive responders 1.79 [1.09 to 2.99]; p=0.0252)."
                 ]
             },
             {
-                "gene": "CYP2C9 and VKORC1",
-                "polymorphism": "CYP2C9*2 (rs1799853), CYP2C9*3 (rs1057910), VKORC1 -1639 G>A (rs9923231)",
-                "relationship_effect": "As genetically defined warfarin sensitivity increases, patients require a lower final weekly warfarin dose.",
-                "p_value": "p<0.001",
+                "gene": "CYP2C9",
+                "polymorphism": "*3 (rs1057910)",
+                "relationship_effect": "Carriers of CYP2C9*3 have increased sensitivity to warfarin, require lower final weekly warfarin dose, spend more time overanticoagulated, and have increased bleeding risk with warfarin compared to normal responders.",
+                "p_value": "Final weekly warfarin dose: p<0.001; Time overanticoagulated: p<0.001; Bleeding risk (sensitive responders): HR 1.38 [95% CI 1.11 to 1.71], p=0.0035; Bleeding risk (highly sensitive responders): HR 1.79 [1.09 to 2.99], p=0.0252",
                 "citations": [
-                    "In warfarin-treated patients, but not edoxaban-treated patients, as genetically defined warfarin sensitivity increased, heparin therapy was discontinued sooner (global p<0.001; p<0.0001 vs normal responders for both sensitive and highly sensitive responders) and the final mean weekly warfarin doses decreased (global p<0.001; p<0.0001 vs normal responders for both sensitive and highly sensitive responders) ([figure 2A and B](#F2)figure 2A and B).",
+                    "Compared with normal responders, sensitive and highly sensitive responders had heparin therapy discontinued earlier (p<0.001), had a decreased final weekly warfarin dose (p<0.001), spent more time overanticoagulated (p<0.001) and had an increased bleeding risk with warfarin (sensitive responders HR 1.38 [95% CI 1.11 to 1.71], p=0.0035; highly sensitive responders 1.79 [1.09 to 2.99]; p=0.0252).",
                     "The present pharmacogenetic subanalysis of the Hokusai-venous thromboembolism (Hokusai VTE) trial, a trial evaluating edoxaban versus warfarin in patients with VTE initially treated with heparin, demonstrates that patients with VTE who have a sensitive or highly sensitive warfarin genotype spend more time overanticoagulated (p<0.001), require a lower warfarin dose (p<0.001) and have higher bleeding rates with warfarin therapy (sensitive responders HR 1.38 [95% CI 1.11 to 1.71], p=0.0035; highly sensitive responders 1.79 [1.09 to 2.99]; p=0.0252).",
-                    "Specifically, as genetically defined warfarin sensitivity increased, heparin therapy was discontinued earlier and the final weekly warfarin dose decreased.[13](#R13)^13^13"
+                    "In warfarin-treated patients, but not edoxaban-treated patients, as genetically defined warfarin sensitivity increased, heparin therapy was discontinued sooner (global p<0.001; p<0.0001 vs normal responders for both sensitive and highly sensitive responders) and the final mean weekly warfarin doses decreased (global p<0.001; p<0.0001 vs normal responders for both sensitive and highly sensitive responders) ([figure 2A and B](#F2)figure 2A and B)."
                 ],
                 "p_value_citations": [
-                    "...the final mean weekly warfarin doses decreased (global p<0.001; p<0.0001 vs normal responders for both sensitive and highly sensitive responders)."
+                    "Compared with normal responders, sensitive and highly sensitive responders had heparin therapy discontinued earlier (p<0.001), had a decreased final weekly warfarin dose (p<0.001), spent more time overanticoagulated (p<0.001) and had an increased bleeding risk with warfarin (sensitive responders HR 1.38 [95% CI 1.11 to 1.71], p=0.0035; highly sensitive responders 1.79 [1.09 to 2.99]; p=0.0252)."
                 ]
             },
             {
-                "gene": "CYP2C9 and VKORC1",
-                "polymorphism": "CYP2C9*2 (rs1799853), CYP2C9*3 (rs1057910), VKORC1 -1639 G>A (rs9923231)",
-                "relationship_effect": "As genetically defined warfarin sensitivity increases, patients spend more time overanticoagulated (INR >3) during warfarin therapy.",
-                "p_value": "First 90 days: p<0.0001; Entire study: p=0.0004",
+                "gene": "VKORC1",
+                "polymorphism": "-1639 G>A (rs9923231)",
+                "relationship_effect": "Carriers of VKORC1 -1639 A allele have increased sensitivity to warfarin, require lower final weekly warfarin dose, spend more time overanticoagulated, and have increased bleeding risk with warfarin compared to normal responders.",
+                "p_value": "Final weekly warfarin dose: p<0.001; Time overanticoagulated: p<0.001; Bleeding risk (sensitive responders): HR 1.38 [95% CI 1.11 to 1.71], p=0.0035; Bleeding risk (highly sensitive responders): HR 1.79 [1.09 to 2.99], p=0.0252",
                 "citations": [
-                    "However, as genetically defined warfarin sensitivity increased, warfarin-treated patients tended to spend a higher percentage of time with supratherapeutic INRs during the first 90 days of treatment (global p<0.0001) and over the entire study duration (global p=0.0004) ([figure 3](#F3)figure 3).",
+                    "Compared with normal responders, sensitive and highly sensitive responders had heparin therapy discontinued earlier (p<0.001), had a decreased final weekly warfarin dose (p<0.001), spent more time overanticoagulated (p<0.001) and had an increased bleeding risk with warfarin (sensitive responders HR 1.38 [95% CI 1.11 to 1.71], p=0.0035; highly sensitive responders 1.79 [1.09 to 2.99]; p=0.0252).",
                     "The present pharmacogenetic subanalysis of the Hokusai-venous thromboembolism (Hokusai VTE) trial, a trial evaluating edoxaban versus warfarin in patients with VTE initially treated with heparin, demonstrates that patients with VTE who have a sensitive or highly sensitive warfarin genotype spend more time overanticoagulated (p<0.001), require a lower warfarin dose (p<0.001) and have higher bleeding rates with warfarin therapy (sensitive responders HR 1.38 [95% CI 1.11 to 1.71], p=0.0035; highly sensitive responders 1.79 [1.09 to 2.99]; p=0.0252).",
-                    "The results of this analysis extend the results of the previous ENGAGE AF-TIMI 48 pharmacogenetic analysis to demonstrate that patients with VTE who have a sensitive or highly sensitive warfarin genotype spend more time overanticoagulated, require a lower warfarin dose and have higher bleeding rates with warfarin therapy."
-                ],
-                "p_value_citations": [
-                    "...higher percentage of time with supratherapeutic INRs during the first 90 days of treatment (global p<0.0001) and over the entire study duration (global p=0.0004)."
-                ]
-            },
-            {
-                "gene": "CYP2C9 and VKORC1",
-                "polymorphism": "CYP2C9*2 (rs1799853), CYP2C9*3 (rs1057910), VKORC1 -1639 G>A (rs9923231)",
-                "relationship_effect": "Pooled sensitive responders (sensitive and highly sensitive) have a higher risk of any bleeding event with warfarin compared to normal responders during the first 90 days of treatment.",
-                "p_value": "HR 1.41 [95% CI 1.14 to 1.74]; p=0.0013",
-                "citations": [
-                    "In an exploratory analysis using a two-bin system in which sensitive and highly sensitive responders were pooled (ie, pooled sensitive responders) and compared with normal responders, a statistically higher proportion of pooled sensitive warfarin responders experienced any bleeding events compared with normal warfarin responders during the first 90 days of treatment (HR 1.41 [95% CI 1.14 to 1.74]; p=0.0013) (online supplementary figure 4).",
-                    "In addition, among patients randomised to warfarin, sensitive and highly sensitive responders experienced significantly more bleeding events sooner than normal responders during the first 90 days of treatment (HR: sensitive responder, 1.38 [95% CI 1.11 to 1.71], p=0.004; highly sensitive responders, 1.79 [1.09 to 2.99]; p=0.03) (figure 4).",
-                    "The results of this analysis extend the results of the previous ENGAGE AF-TIMI 48 pharmacogenetic analysis to demonstrate that patients with VTE who have a sensitive or highly sensitive warfarin genotype spend more time overanticoagulated, require a lower warfarin dose and have higher bleeding rates with warfarin therapy."
+                    "In warfarin-treated patients, but not edoxaban-treated patients, as genetically defined warfarin sensitivity increased, heparin therapy was discontinued sooner (global p<0.001; p<0.0001 vs normal responders for both sensitive and highly sensitive responders) and the final mean weekly warfarin doses decreased (global p<0.001; p<0.0001 vs normal responders for both sensitive and highly sensitive responders)."
                 ],
                 "p_value_citations": [
-                    "...a statistically higher proportion of pooled sensitive warfarin responders experienced any bleeding events compared with normal warfarin responders during the first 90 days of treatment (HR 1.41 [95% CI 1.14 to 1.74]; p=0.0013)."
+                    "Compared with normal responders, sensitive and highly sensitive responders had heparin therapy discontinued earlier (p<0.001), had a decreased final weekly warfarin dose (p<0.001), spent more time overanticoagulated (p<0.001) and had an increased bleeding risk with warfarin (sensitive responders HR 1.38 [95% CI 1.11 to 1.71], p=0.0035; highly sensitive responders 1.79 [1.09 to 2.99]; p=0.0252)."
                 ]
             }
         ]
diff --git a/notebooks/test.ipynb b/notebooks/test.ipynb
new file mode 100644
index 0000000..9300f99
--- /dev/null
+++ b/notebooks/test.ipynb
@@ -0,0 +1,16 @@
+{
+ "cells": [],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "default",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.13.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/pixi.toml b/pixi.toml
index 2497377..72b5063 100644
--- a/pixi.toml
+++ b/pixi.toml
@@ -21,6 +21,8 @@ copy-markdown = "python -m src.copy_markdown"
 annotation-pipeline = "python -m src.annotation_pipeline"
 test-citations = "python -m src.citations.one_shot_citations"
 study-parameters = "python -m src.study_parameters"
+variant-ontology = "python -m src.ontology.variant_ontology"
+drug-ontology = "python -m src.ontology.drug_ontology"
 
 [dependencies]
 seaborn = ">=0.13.2,<0.14"
diff --git a/src/citations/line_citation_generator.py b/src/citations/line_citation_generator.py
index bb12699..2926f60 100644
--- a/src/citations/line_citation_generator.py
+++ b/src/citations/line_citation_generator.py
@@ -899,13 +899,13 @@ def _score_sentence_for_study_param(
             Relevance score from 1-10
         """
         sentence_lower = sentence.lower()
-        
+
         # Handle case where parameter_content is a list
         if isinstance(parameter_content, list):
             parameter_lower = " ".join(str(item) for item in parameter_content).lower()
         else:
             parameter_lower = str(parameter_content).lower()
-        
+
         score = 0
 
         # Define keywords for each parameter type
@@ -1203,34 +1203,38 @@ def create_citation_generator(
         return LMCitationGenerator(pmcid, model)
 
 
-def process_annotation_file_with_citations(pmcid: str, model: str = "local") -> AnnotationTable:
+def process_annotation_file_with_citations(
+    pmcid: str, model: str = "local"
+) -> AnnotationTable:
     """
     Convenience function to load annotations from file, add citations, and save back to file.
-    
+
     Args:
         pmcid: PubMed Central ID
         model: Model to use for citation generation
-        
+
     Returns:
         AnnotationTable with citations added
     """
     # Load annotations from file
     annotations = load_annotations_from_file(pmcid)
-    
+
     if not annotations.relationships:
         logger.warning(f"No annotations found for {pmcid}")
         return annotations
-    
+
     # Create citation generator
     generator = create_citation_generator(pmcid, model)
-    
+
     # Add citations to annotations
     updated_annotations = generator.add_citations_to_annotations(annotations)
-    
+
     # Save updated annotations back to file
     update_annotations_in_file(pmcid, updated_annotations)
-    
-    logger.info(f"Successfully processed {len(updated_annotations.relationships)} annotations for {pmcid}")
+
+    logger.info(
+        f"Successfully processed {len(updated_annotations.relationships)} annotations for {pmcid}"
+    )
     return updated_annotations
 
 
@@ -1263,32 +1267,34 @@ def CitationGenerator(
         return create_citation_generator(pmcid, model)
 
 
-def update_annotations_in_file(pmcid: str, updated_annotations: AnnotationTable) -> None:
+def update_annotations_in_file(
+    pmcid: str, updated_annotations: AnnotationTable
+) -> None:
     """
     Save updated annotations back to the JSON file in the new schema format.
-    
+
     Args:
         pmcid: PubMed Central ID
         updated_annotations: AnnotationTable with updated relationships
     """
     import json
     import os
-    
+
     annotation_file = f"data/annotations/{pmcid}.json"
-    
+
     if not os.path.exists(annotation_file):
         logger.error(f"Annotation file not found: {annotation_file}")
         return
-    
+
     try:
         # Load existing data
-        with open(annotation_file, 'r') as f:
+        with open(annotation_file, "r") as f:
             data = json.load(f)
-        
+
         # Update the relationships in the new schema format
         if "annotations" not in data:
             data["annotations"] = {}
-        
+
         data["annotations"]["relationships"] = []
         for rel in updated_annotations.relationships:
             rel_dict = {
@@ -1297,16 +1303,16 @@ def update_annotations_in_file(pmcid: str, updated_annotations: AnnotationTable)
                 "relationship_effect": rel.relationship_effect,
                 "p_value": rel.p_value,
                 "citations": rel.citations,
-                "p_value_citations": rel.p_value_citations
+                "p_value_citations": rel.p_value_citations,
             }
             data["annotations"]["relationships"].append(rel_dict)
-        
+
         # Write back to file
-        with open(annotation_file, 'w') as f:
+        with open(annotation_file, "w") as f:
             json.dump(data, f, indent=4, ensure_ascii=False)
-            
+
         logger.info(f"Updated annotations saved to {annotation_file}")
-        
+
     except Exception as e:
         logger.error(f"Error updating annotations in {annotation_file}: {e}")
 
@@ -1314,26 +1320,26 @@ def update_annotations_in_file(pmcid: str, updated_annotations: AnnotationTable)
 def load_annotations_from_file(pmcid: str) -> AnnotationTable:
     """
     Load annotations from the new JSON schema format.
-    
+
     Args:
         pmcid: PubMed Central ID
-        
+
     Returns:
         AnnotationTable with relationships loaded from the file
     """
     import json
     import os
-    
+
     annotation_file = f"data/annotations/{pmcid}.json"
-    
+
     if not os.path.exists(annotation_file):
         logger.warning(f"Annotation file not found: {annotation_file}")
         return AnnotationTable(relationships=[])
-    
+
     try:
-        with open(annotation_file, 'r') as f:
+        with open(annotation_file, "r") as f:
             data = json.load(f)
-            
+
         # Extract relationships from the new schema format
         if "annotations" in data and "relationships" in data["annotations"]:
             relationships = []
@@ -1345,15 +1351,15 @@ def load_annotations_from_file(pmcid: str) -> AnnotationTable:
                     relationship_effect=rel_data.get("relationship_effect", ""),
                     p_value=rel_data.get("p_value", ""),
                     citations=rel_data.get("citations", []),
-                    p_value_citations=rel_data.get("p_value_citations", [])
+                    p_value_citations=rel_data.get("p_value_citations", []),
                 )
                 relationships.append(relationship)
-            
+
             return AnnotationTable(relationships=relationships)
         else:
             logger.warning(f"No annotations found in file: {annotation_file}")
             return AnnotationTable(relationships=[])
-            
+
     except Exception as e:
         logger.error(f"Error loading annotations from {annotation_file}: {e}")
         return AnnotationTable(relationships=[])
@@ -1371,11 +1377,14 @@ def main():
 
     # Load annotations from the updated schema file
     annotations = load_annotations_from_file(pmcid)
-    
+
     if not annotations.relationships:
-        logger.error("No annotations loaded from file. Creating a test annotation instead.")
+        logger.error(
+            "No annotations loaded from file. Creating a test annotation instead."
+        )
         # Fallback to creating a mock annotation for testing
         from src.annotation_table import AnnotationRelationship
+
         test_annotation = AnnotationRelationship(
             gene="CYP2C9",
             polymorphism="rs1057910 GG",
@@ -1392,7 +1401,7 @@ def main():
     # Test with first annotation
     test_annotation = annotations.relationships[0]
     print(f"Test annotation: {test_annotation.gene} {test_annotation.polymorphism}")
-    
+
     # Get citations for the annotation
     citations = generator._get_top_citations_for_annotation(test_annotation, top_k=3)
 
diff --git a/src/deprecated/functional_annotation_extraction.py b/src/deprecated/functional_annotation_extraction.py
index 4d4dfc7..64fb642 100644
--- a/src/deprecated/functional_annotation_extraction.py
+++ b/src/deprecated/functional_annotation_extraction.py
@@ -5,7 +5,11 @@
 from typing import List
 from loguru import logger
 from pydantic import BaseModel
-from src.deprecated.variants import Variant, FunctionalAnnotation, FunctionalAnnotationList
+from src.deprecated.variants import (
+    Variant,
+    FunctionalAnnotation,
+    FunctionalAnnotationList,
+)
 from src.prompts import PromptVariables, GeneratorPrompt, ParserPrompt
 from src.inference import Generator, Parser
 from src.utils import get_article_text
diff --git a/src/deprecated/phenotype_annotation_extraction.py b/src/deprecated/phenotype_annotation_extraction.py
index 5a95608..72feaba 100644
--- a/src/deprecated/phenotype_annotation_extraction.py
+++ b/src/deprecated/phenotype_annotation_extraction.py
@@ -5,7 +5,11 @@
 from typing import List
 from loguru import logger
 from pydantic import BaseModel
-from src.deprecated.variants import Variant, PhenotypeAnnotation, PhenotypeAnnotationList
+from src.deprecated.variants import (
+    Variant,
+    PhenotypeAnnotation,
+    PhenotypeAnnotationList,
+)
 from src.prompts import PromptVariables, GeneratorPrompt, ParserPrompt
 from src.inference import Generator, Parser
 from src.utils import get_article_text
diff --git a/src/ontology/drug_ontology.py b/src/ontology/drug_ontology.py
index e1f7376..c38ab1d 100644
--- a/src/ontology/drug_ontology.py
+++ b/src/ontology/drug_ontology.py
@@ -4,10 +4,6 @@
 
 import requests
 
-# how to use, you have thew following,
-
-
-
 
 class DrugNormalizer(BaseNormalizer):
     """Normalizes drug names, and connect to common ID's per use."""
@@ -124,7 +120,9 @@ def lookup_drug_pharmgkb(self, raw: str) -> Optional[NormalizationResult]:
         except requests.RequestException as exc:
             logger.warning(f"PharmGKB request failed for '{raw}': {exc}")
         except Exception as exc:
-            logger.warning(f"Unexpected error during PharmGKB lookup for '{raw}': {exc}")
+            logger.warning(
+                f"Unexpected error during PharmGKB lookup for '{raw}': {exc}"
+            )
 
         return None
 
diff --git a/src/ontology/variant_ontology.py b/src/ontology/variant_ontology.py
index 8092949..06d070f 100644
--- a/src/ontology/variant_ontology.py
+++ b/src/ontology/variant_ontology.py
@@ -6,7 +6,6 @@
 import requests
 
 
-
 @dataclass
 class NormalizationResult:
     raw_input: str
@@ -225,6 +224,7 @@ def fetch_star_alleles(
 
         return results
 
+
 def extract_variants_from_annotations():
     """
     Extract and normalize variants from annotation files.
@@ -239,9 +239,7 @@ def extract_variants_from_annotations():
     rsid_normalizer = RSIDNormalizer(email="test@example.com")
     star_normalizer = StarAlleleNormalizer()
 
-    annotation_dir = (
-        "data/annotations"
-    )
+    annotation_dir = "data/annotations"
     if not os.path.exists(annotation_dir):
         print(f"❌ Annotation directory not found: {annotation_dir}")
         return