From 36ed54225d2806b93d76b0dbec034ec7ac7287fd Mon Sep 17 00:00:00 2001
From: jenniferjiangkells <jenniferjiangkells@gmail.com>
Date: Mon, 1 Dec 2025 14:31:26 +0000
Subject: [PATCH 01/12] ML cookbooks WIP

---
 cookbook/sepsis_cds_hooks.py           | 166 +++++++++++++++++++
 cookbook/sepsis_fhir_batch.py          | 139 ++++++++++++++++
 healthchain/io/containers/dataset.py   |   8 +-
 healthchain/sandbox/sandboxclient.py   |  77 +++++----
 scripts/extract_mimic_demo_patients.py | 217 +++++++++++++++++++++++++
 5 files changed, 571 insertions(+), 36 deletions(-)
 create mode 100644 cookbook/sepsis_cds_hooks.py
 create mode 100644 cookbook/sepsis_fhir_batch.py
 create mode 100644 scripts/extract_mimic_demo_patients.py

diff --git a/cookbook/sepsis_cds_hooks.py b/cookbook/sepsis_cds_hooks.py
new file mode 100644
index 00000000..7afcdd75
--- /dev/null
+++ b/cookbook/sepsis_cds_hooks.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python3
+"""
+Sepsis Risk Prediction via CDS Hooks
+
+Real-time sepsis alerts when clinician opens patient chart.
+Uses pre-extracted MIMIC patient data for fast demos.
+
+Demo patients extracted from MIMIC-on-FHIR using:
+    python scripts/extract_mimic_demo_patients.py
+
+Requirements:
+- pip install healthchain joblib xgboost
+
+Run:
+- python sepsis_cds_hooks.py
+"""
+
+from pathlib import Path
+
+import joblib
+from dotenv import load_dotenv
+
+from healthchain.gateway import HealthChainAPI, CDSHooksService
+from healthchain.io import Dataset
+from healthchain.models import CDSRequest, CDSResponse
+from healthchain.models.responses.cdsresponse import Card
+from healthchain.pipeline import Pipeline
+
+load_dotenv()
+
+# Configuration
+SCRIPT_DIR = Path(__file__).parent
+MODEL_PATH = SCRIPT_DIR / "models" / "sepsis_model.pkl"
+SCHEMA_PATH = (
+    SCRIPT_DIR / ".." / "healthchain" / "configs" / "features" / "sepsis_vitals.yaml"
+)
+DEMO_PATIENTS_DIR = SCRIPT_DIR / "data" / "mimic_demo_patients"
+
+# Load model
+model_data = joblib.load(MODEL_PATH)
+model = model_data["model"]
+feature_names = model_data["metadata"]["feature_names"]
+threshold = model_data["metadata"]["metrics"].get("optimal_threshold", 0.5)
+
+
+def create_pipeline() -> Pipeline[Dataset]:
+    """Build sepsis prediction pipeline."""
+    pipeline = Pipeline[Dataset]()
+
+    @pipeline.add_node
+    def impute_missing(dataset: Dataset) -> Dataset:
+        dataset.data = dataset.data.fillna(dataset.data.median(numeric_only=True))
+        return dataset
+
+    @pipeline.add_node
+    def run_inference(dataset: Dataset) -> Dataset:
+        features = dataset.data[feature_names]
+        probabilities = model.predict_proba(features)[:, 1]
+        dataset.metadata["probabilities"] = probabilities
+        return dataset
+
+    return pipeline
+
+
+def create_app():
+    pipeline = create_pipeline()
+    cds = CDSHooksService()
+
+    @cds.hook("patient-view", id="sepsis-risk")
+    def sepsis_alert(request: CDSRequest) -> CDSResponse:
+        prefetch = request.prefetch or {}
+        if not prefetch:
+            return CDSResponse(cards=[])
+
+        # Merge keyed prefetch into single bundle
+        # Format: {"patient": {...}, "heart_rate": {"entry": [...]}, ...}
+        entries = []
+        for key, value in prefetch.items():
+            if key == "patient":
+                entries.append({"resource": value})
+            elif isinstance(value, dict) and "entry" in value:
+                entries.extend(value["entry"])
+
+        bundle = {"type": "collection", "entry": entries}
+
+        # FHIR → Dataset → Prediction
+        dataset = Dataset.from_fhir_bundle(bundle, schema=SCHEMA_PATH)
+        result = pipeline(dataset)
+
+        # print("Result:")
+        # print(result.data.head(10))
+        # print("Metadata:")
+        # print(result.metadata)
+
+        probability = float(result.metadata["probabilities"][0])
+        risk = (
+            "high" if probability > 0.7 else "moderate" if probability > 0.4 else "low"
+        )
+
+        if risk in ["high", "moderate"]:
+            summary = f"⚠️ Sepsis Risk: {risk.upper()} ({probability:.0%})"
+            indicator = "critical" if risk == "high" else "warning"
+            detail = (
+                "**AI Guidance:**\n"
+                f"- Predicted risk: **{risk.upper()}** ({probability:.0%})\n"
+                "- Recommend sepsis workup and early intervention."
+            )
+            title = "Sepsis Alert (AI Prediction)"
+            source = {
+                "label": "HealthChain Sepsis Predictor",
+                "url": "https://www.sccm.org/SurvivingSepsisCampaign/Guidelines/Adult-Patients",
+            }
+            return CDSResponse(
+                cards=[
+                    Card(
+                        summary=summary,
+                        indicator=indicator,
+                        detail=detail,
+                        source=source,
+                        title=title,
+                    )
+                ]
+            )
+
+        return CDSResponse(cards=[])
+
+    app = HealthChainAPI(title="Sepsis CDS Hooks")
+    app.register_service(cds, path="/cds")
+    return app
+
+
+app = create_app()
+
+
+if __name__ == "__main__":
+    import threading
+    import uvicorn
+    from time import sleep
+    from healthchain.sandbox import SandboxClient
+
+    # Start server
+    def run_server():
+        uvicorn.run(app, port=8000, log_level="warning")
+
+    server = threading.Thread(target=run_server, daemon=True)
+    server.start()
+    sleep(2)
+
+    # Test with pre-extracted demo patients (fast, realistic per-patient data)
+    client = SandboxClient(
+        url="http://localhost:8000/cds/cds-services/sepsis-risk",
+        workflow="patient-view",
+    )
+    client.load_from_path(DEMO_PATIENTS_DIR)
+    responses = client.send_requests()
+    client.save_results(save_request=True, save_response=True, directory="./output/")
+
+    print(f"\nProcessed {len(responses)} requests")
+    for i, resp in enumerate(responses):
+        cards = resp.get("cards", [])
+        if cards:
+            print(f"  Patient {i+1}: {cards[0].get('summary', 'No alert')}")
+        else:
+            print(f"  Patient {i+1}: Low risk (no alert)")
+
+    server.join()
diff --git a/cookbook/sepsis_fhir_batch.py b/cookbook/sepsis_fhir_batch.py
new file mode 100644
index 00000000..252c455c
--- /dev/null
+++ b/cookbook/sepsis_fhir_batch.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+"""
+Sepsis Batch Screening with FHIR Gateway
+
+Batch process patients and write RiskAssessment resources to FHIR server.
+Demonstrates querying FHIR server and writing results back.
+
+Requirements:
+- pip install healthchain joblib xgboost python-dotenv
+
+Environment Variables:
+- MEDPLUM_CLIENT_ID, MEDPLUM_CLIENT_SECRET, MEDPLUM_BASE_URL
+
+Run:
+- python sepsis_fhir_batch.py
+"""
+
+from pathlib import Path
+
+import joblib
+from dotenv import load_dotenv
+
+from healthchain.gateway import HealthChainAPI, FHIRGateway
+from healthchain.gateway.clients.fhir.base import FHIRAuthConfig
+from healthchain.io import Dataset
+from healthchain.pipeline import Pipeline
+
+load_dotenv()
+
+# Configuration
+SCRIPT_DIR = Path(__file__).parent
+MODEL_PATH = SCRIPT_DIR / "models" / "sepsis_model.pkl"
+SCHEMA_PATH = "healthchain/configs/features/sepsis_vitals.yaml"
+
+# Load model
+model_data = joblib.load(MODEL_PATH)
+model = model_data["model"]
+feature_names = model_data["metadata"]["feature_names"]
+threshold = model_data["metadata"]["metrics"].get("optimal_threshold", 0.5)
+
+# FHIR Gateway
+config = FHIRAuthConfig.from_env("MEDPLUM")
+gateway = FHIRGateway()
+gateway.add_source("fhir", config.to_connection_string())
+
+
+def create_pipeline() -> Pipeline[Dataset]:
+    """Build sepsis prediction pipeline."""
+    pipeline = Pipeline[Dataset]()
+
+    @pipeline.add_node
+    def impute_missing(dataset: Dataset) -> Dataset:
+        dataset.data = dataset.data.fillna(dataset.data.median(numeric_only=True))
+        return dataset
+
+    @pipeline.add_node
+    def run_inference(dataset: Dataset) -> Dataset:
+        features = dataset.data[feature_names]
+        probabilities = model.predict_proba(features)[:, 1]
+        predictions = (probabilities >= threshold).astype(int)
+        dataset.metadata["predictions"] = predictions
+        dataset.metadata["probabilities"] = probabilities
+        return dataset
+
+    return pipeline
+
+
+def run_batch_screening():
+    """
+    Run batch sepsis screening.
+
+    In production: query FHIR server for ICU patients
+    For demo: load from MIMIC-on-FHIR
+    """
+    from healthchain.sandbox.loaders import MimicOnFHIRLoader
+
+    pipeline = create_pipeline()
+
+    # Load data (production would use: gateway.search(Patient, {"location": "ICU"}))
+    loader = MimicOnFHIRLoader()
+    bundle = loader.load(
+        data_dir="../datasets/mimic-iv-clinical-database-demo-on-fhir-2.1.0/",
+        resource_types=[
+            "MimicObservationChartevents",
+            "MimicObservationLabevents",
+            "MimicPatient",
+        ],
+        as_dict=True,
+    )
+
+    # FHIR → Dataset → Predictions → RiskAssessments
+    dataset = Dataset.from_fhir_bundle(bundle, schema=SCHEMA_PATH)
+    result = pipeline(dataset)
+
+    risk_assessments = result.to_risk_assessment(
+        result.metadata["predictions"],
+        result.metadata["probabilities"],
+        outcome_code="A41.9",
+        outcome_display="Sepsis",
+        model_name="XGBoost",
+    )
+
+    print(f"Processed {len(result)} patients")
+    high_risk = sum(
+        1
+        for ra in risk_assessments
+        if ra.prediction[0].qualitativeRisk.coding[0].code == "high"
+    )
+    print(f"High risk: {high_risk}")
+
+    # Write to FHIR server
+    for ra in risk_assessments:
+        gateway.create(ra, source="fhir")
+        print(f"Created RiskAssessment/{ra.id}")
+
+    return risk_assessments
+
+
+def create_app():
+    """Expose batch endpoint via API."""
+    app = HealthChainAPI(title="Sepsis Batch Screening")
+    app.register_gateway(gateway, path="/fhir")
+    return app
+
+
+app = create_app()
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    # Run batch screening
+    print("=== Batch Sepsis Screening ===")
+    run_batch_screening()
+
+    # Start API server
+    print("\n=== FHIR Gateway Server ===")
+    print("http://localhost:8000/fhir/")
+    uvicorn.run(app, port=8000)
diff --git a/healthchain/io/containers/dataset.py b/healthchain/io/containers/dataset.py
index 39740be5..99f7966f 100644
--- a/healthchain/io/containers/dataset.py
+++ b/healthchain/io/containers/dataset.py
@@ -1,7 +1,7 @@
 import pandas as pd
 import numpy as np
 
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Dict, Iterator, List, Union, Optional
 
@@ -23,6 +23,10 @@ class Dataset(DataContainer[pd.DataFrame]):
     """
     A container for tabular data optimized for ML inference, lightweight wrapper around a pandas DataFrame.
 
+    Attributes:
+        data: The pandas DataFrame containing the dataset.
+        metadata: Dict for storing pipeline results (predictions, probabilities, etc.)
+
     Methods:
         from_csv: Load Dataset from CSV.
         from_dict: Load Dataset from dict.
@@ -31,6 +35,8 @@ class Dataset(DataContainer[pd.DataFrame]):
         to_risk_assessment: Convert predictions to FHIR RiskAssessment.
     """
 
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
     def __post_init__(self):
         if not isinstance(self.data, pd.DataFrame):
             raise TypeError("data must be a pandas DataFrame")
diff --git a/healthchain/sandbox/sandboxclient.py b/healthchain/sandbox/sandboxclient.py
index da121501..c3f59576 100644
--- a/healthchain/sandbox/sandboxclient.py
+++ b/healthchain/sandbox/sandboxclient.py
@@ -464,7 +464,7 @@ def send_requests(self) -> List[Dict]:
                         log.debug(f"Making POST request to: {self.url}")
                         response = client.post(
                             url=str(self.url),
-                            json=request.model_dump(exclude_none=True),
+                            json=request.model_dump(exclude_none=True, mode="json"),
                             timeout=timeout,
                         )
                         response.raise_for_status()
@@ -472,7 +472,9 @@ def send_requests(self) -> List[Dict]:
                         try:
                             response_data = response.json()
                             cds_response = CDSResponse(**response_data)
-                            responses.append(cds_response.model_dump(exclude_none=True))
+                            responses.append(
+                                cds_response.model_dump(mode="json", exclude_none=True)
+                            )
                         except json.JSONDecodeError:
                             log.error(
                                 f"Invalid JSON response from {self.url}. "
@@ -507,51 +509,56 @@ def send_requests(self) -> List[Dict]:
 
         return responses
 
-    def save_results(self, directory: Union[str, Path] = "./output/") -> None:
+    def save_results(
+        self,
+        directory: Union[str, Path] = "./output/",
+        save_request: bool = True,
+        save_response: bool = True,
+    ) -> None:
         """
-        Save request and response data to disk.
+        Save request and/or response data to disk.
 
         Args:
             directory: Directory to save data to (default: "./output/")
+            save_request: Whether to save request data (default: True)
+            save_response: Whether to save response data (default: True)
         """
-        if not self.responses:
+        if not self.responses and save_response:
             raise RuntimeError(
                 "No responses to save. Send requests first using send_requests()"
             )
 
         save_dir = Path(directory)
-        request_path = ensure_directory_exists(save_dir / "requests")
-
-        # Determine file extension based on protocol
         extension = "xml" if self.protocol == ApiProtocol.soap else "json"
 
-        # Save requests
-        if self.protocol == ApiProtocol.soap:
-            request_data = [request.model_dump_xml() for request in self.requests]
-        else:
-            request_data = [
-                request.model_dump(exclude_none=True) for request in self.requests
-            ]
-
-        save_data_to_directory(
-            request_data,
-            "request",
-            self.sandbox_id,
-            request_path,
-            extension,
-        )
-        log.info(f"Saved request data at {request_path}/")
-
-        # Save responses
-        response_path = ensure_directory_exists(save_dir / "responses")
-        save_data_to_directory(
-            self.responses,
-            "response",
-            self.sandbox_id,
-            response_path,
-            extension,
-        )
-        log.info(f"Saved response data at {response_path}/")
+        if save_request:
+            request_path = ensure_directory_exists(save_dir / "requests")
+            if self.protocol == ApiProtocol.soap:
+                request_data = [request.model_dump_xml() for request in self.requests]
+            else:
+                request_data = [
+                    request.model_dump(mode="json", exclude_none=True)
+                    for request in self.requests
+                ]
+            save_data_to_directory(
+                request_data,
+                "request",
+                self.sandbox_id,
+                request_path,
+                extension,
+            )
+            log.info(f"Saved request data at {request_path}/")
+
+        if save_response:
+            response_path = ensure_directory_exists(save_dir / "responses")
+            save_data_to_directory(
+                self.responses,
+                "response",
+                self.sandbox_id,
+                response_path,
+                extension,
+            )
+            log.info(f"Saved response data at {response_path}/")
 
     def get_status(self) -> Dict[str, Any]:
         """
diff --git a/scripts/extract_mimic_demo_patients.py b/scripts/extract_mimic_demo_patients.py
new file mode 100644
index 00000000..834df100
--- /dev/null
+++ b/scripts/extract_mimic_demo_patients.py
@@ -0,0 +1,217 @@
+#!/usr/bin/env python3
+"""
+Extract Demo Patient Prefetch from MIMIC-on-FHIR
+
+Creates CDS Hooks prefetch files with only the observations needed for
+sepsis prediction, keyed by feature name. Much smaller than full bundles!
+
+Customize:
+    - MIMIC_DIR: Path to your MIMIC-on-FHIR dataset
+    - MODEL_PATH: Path to your trained model pickle
+    - SCHEMA_PATH: Feature schema defining which observations to extract
+    - OUTPUT_DIR: Where to save extracted patient files
+    - NUM_PATIENTS_PER_RISK: How many patients to extract per risk level
+
+Run:
+    python scripts/extract_mimic_demo_patients.py
+
+Output format:
+    {
+      "patient": {...Patient resource...},
+      "heart_rate": {"resourceType": "Bundle", "entry": [...]},
+      "temperature": {"resourceType": "Bundle", "entry": [...]},
+      ...
+    }
+"""
+
+import json
+from pathlib import Path
+
+import joblib
+import yaml
+
+from healthchain.sandbox.loaders import MimicOnFHIRLoader
+from healthchain.io import Dataset
+from healthchain.pipeline import Pipeline
+
+import os
+
+try:
+    from dotenv import load_dotenv
+
+    load_dotenv()
+except ImportError:
+    print(
+        "Warning: dotenv not installed. Please manually set the MIMIC_FHIR_PATH environment variable."
+    )
+
+
+# =============================================================================
+# CUSTOMIZE THESE
+# =============================================================================
+
+MIMIC_DIR = os.getenv("MIMIC_FHIR_PATH")
+MODEL_PATH = "cookbook/models/sepsis_model.pkl"
+SCHEMA_PATH = "healthchain/configs/features/sepsis_vitals.yaml"
+OUTPUT_DIR = Path("cookbook/data/mimic_demo_patients")
+
+# Number of patients to extract per risk level (high/moderate/low)
+NUM_PATIENTS_PER_RISK = 1
+
+# =============================================================================
+
+
+def load_observation_codes(schema_path: str) -> dict:
+    """Load feature schema and extract observation codes."""
+    with open(schema_path) as f:
+        schema = yaml.safe_load(f)
+
+    codes = {}
+    for feature_name, config in schema["features"].items():
+        if config.get("fhir_resource") == "Observation":
+            codes[config["code"]] = feature_name
+    return codes
+
+
+def create_pipeline(model, feature_names) -> Pipeline[Dataset]:
+    """Build prediction pipeline."""
+    pipeline = Pipeline[Dataset]()
+
+    @pipeline.add_node
+    def impute_missing(dataset: Dataset) -> Dataset:
+        dataset.data = dataset.data.fillna(dataset.data.median(numeric_only=True))
+        return dataset
+
+    @pipeline.add_node
+    def run_inference(dataset: Dataset) -> Dataset:
+        features = dataset.data[feature_names]
+        probabilities = model.predict_proba(features)[:, 1]
+        dataset.metadata["probabilities"] = probabilities
+        return dataset
+
+    return pipeline
+
+
+def get_observation_code(resource: dict) -> str:
+    """Extract MIMIC code from Observation resource."""
+    for coding in resource.get("code", {}).get("coding", []):
+        if "mimic" in coding.get("system", ""):
+            return coding.get("code", "")
+    return ""
+
+
+def extract_patient_prefetch(bundle: dict, patient_ref: str, obs_codes: dict) -> dict:
+    """Extract keyed prefetch for a patient with only needed observations."""
+    patient_id = patient_ref.split("/")[-1]
+    prefetch = {}
+    feature_obs = {name: [] for name in obs_codes.values()}
+
+    for entry in bundle["entry"]:
+        resource = entry.get("resource", {})
+        resource_type = resource.get("resourceType", "")
+
+        if resource_type == "Patient" and resource.get("id") == patient_id:
+            prefetch["patient"] = resource
+
+        elif resource_type == "Observation":
+            subject = resource.get("subject", {})
+            if subject.get("reference", "").endswith(patient_id):
+                code = get_observation_code(resource)
+                if code in obs_codes:
+                    feature_obs[obs_codes[code]].append(entry)
+
+    for feature_name, entries in feature_obs.items():
+        if entries:
+            prefetch[feature_name] = {
+                "resourceType": "Bundle",
+                "type": "searchset",
+                "entry": entries,
+            }
+
+    return prefetch
+
+
+def main():
+    print("=" * 60)
+    print("MIMIC Demo Patient Extraction")
+    print("=" * 60)
+
+    if MIMIC_DIR is None:
+        print("Error: MIMIC_FHIR_PATH environment variable is not set.")
+        return
+
+    # Load configs
+    obs_codes = load_observation_codes(SCHEMA_PATH)
+    print(f"Features to extract: {list(obs_codes.values())}")
+
+    model_data = joblib.load(MODEL_PATH)
+    model = model_data["model"]
+    feature_names = model_data["metadata"]["feature_names"]
+    print(f"Model features: {feature_names}")
+
+    # Load MIMIC data
+    print("\nLoading MIMIC-on-FHIR...")
+    loader = MimicOnFHIRLoader()
+    bundle = loader.load(
+        data_dir=MIMIC_DIR,
+        resource_types=[
+            "MimicObservationChartevents",
+            "MimicObservationLabevents",
+            "MimicPatient",
+        ],
+        as_dict=True,
+    )
+    print(f"Loaded {len(bundle['entry']):,} resources")
+
+    # Run predictions
+    print("\nExtracting features and predicting...")
+    dataset = Dataset.from_fhir_bundle(bundle, schema=SCHEMA_PATH)
+    pipeline = create_pipeline(model, feature_names)
+    result = pipeline(dataset)
+
+    df = result.data.copy()
+    df["probability"] = result.metadata["probabilities"]
+    df["risk"] = df["probability"].apply(
+        lambda p: "high" if p > 0.7 else "moderate" if p > 0.4 else "low"
+    )
+
+    print(f"\nRisk distribution ({len(df)} patients):")
+    print(df["risk"].value_counts().to_string())
+
+    # Select patients per risk level
+    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+    patients_to_extract = []
+
+    for risk_level in ["high", "moderate", "low"]:
+        risk_patients = df[df["risk"] == risk_level]
+        for i in range(min(NUM_PATIENTS_PER_RISK, len(risk_patients))):
+            patient = risk_patients.iloc[i]
+            label = (
+                f"{risk_level}_risk"
+                if NUM_PATIENTS_PER_RISK == 1
+                else f"{risk_level}_risk_{i+1}"
+            )
+            patients_to_extract.append((label, patient))
+
+    # Extract and save
+    print(f"\nExtracting to {OUTPUT_DIR}/")
+    for label, patient in patients_to_extract:
+        prefetch = extract_patient_prefetch(bundle, patient["patient_ref"], obs_codes)
+
+        output_file = OUTPUT_DIR / f"{label}_patient.json"
+        with open(output_file, "w") as f:
+            json.dump(prefetch, f, indent=2, default=str)
+
+        obs_count = sum(
+            len(v.get("entry", [])) for k, v in prefetch.items() if k != "patient"
+        )
+        features_with_data = [k for k in prefetch if k != "patient"]
+        print(
+            f"  {label}: {patient['probability']:.1%} risk, {obs_count} obs ({', '.join(features_with_data)})"
+        )
+
+    print("\nDone! Use these files with SandboxClient.load_from_path()")
+
+
+if __name__ == "__main__":
+    main()

From 4a00c9b5b89b85f2ae3f86d97b6b64ebc283252c Mon Sep 17 00:00:00 2001
From: jenniferjiangkells <jenniferjiangkells@gmail.com>
Date: Tue, 2 Dec 2025 10:39:18 +0000
Subject: [PATCH 02/12] Working cookbooks

---
 cookbook/sepsis_cds_hooks.py           |  12 +-
 cookbook/sepsis_fhir_batch.py          | 175 +++++++++-----
 scripts/extract_mimic_demo_patients.py | 315 +++++++++++++++++--------
 3 files changed, 340 insertions(+), 162 deletions(-)
 mode change 100644 => 100755 scripts/extract_mimic_demo_patients.py

diff --git a/cookbook/sepsis_cds_hooks.py b/cookbook/sepsis_cds_hooks.py
index 7afcdd75..d80249af 100644
--- a/cookbook/sepsis_cds_hooks.py
+++ b/cookbook/sepsis_cds_hooks.py
@@ -2,17 +2,17 @@
 """
 Sepsis Risk Prediction via CDS Hooks
 
-Real-time sepsis alerts when clinician opens patient chart.
-Uses pre-extracted MIMIC patient data for fast demos.
+Real-time sepsis alerts triggered when clinician opens a patient chart.
+Uses pre-extracted MIMIC patient data for demos.
 
 Demo patients extracted from MIMIC-on-FHIR using:
     python scripts/extract_mimic_demo_patients.py
 
 Requirements:
-- pip install healthchain joblib xgboost
+    pip install healthchain joblib xgboost
 
 Run:
-- python sepsis_cds_hooks.py
+    python cookbook/sepsis_cds_hooks.py
 """
 
 from pathlib import Path
@@ -89,8 +89,6 @@ def sepsis_alert(request: CDSRequest) -> CDSResponse:
 
         # print("Result:")
         # print(result.data.head(10))
-        # print("Metadata:")
-        # print(result.metadata)
 
         probability = float(result.metadata["probabilities"][0])
         risk = (
@@ -98,7 +96,7 @@ def sepsis_alert(request: CDSRequest) -> CDSResponse:
         )
 
         if risk in ["high", "moderate"]:
-            summary = f"⚠️ Sepsis Risk: {risk.upper()} ({probability:.0%})"
+            summary = f"Sepsis Risk: {risk.upper()} ({probability:.0%})"
             indicator = "critical" if risk == "high" else "warning"
             detail = (
                 "**AI Guidance:**\n"
diff --git a/cookbook/sepsis_fhir_batch.py b/cookbook/sepsis_fhir_batch.py
index 252c455c..a9ae5880 100644
--- a/cookbook/sepsis_fhir_batch.py
+++ b/cookbook/sepsis_fhir_batch.py
@@ -2,23 +2,26 @@
 """
 Sepsis Batch Screening with FHIR Gateway
 
-Batch process patients and write RiskAssessment resources to FHIR server.
-Demonstrates querying FHIR server and writing results back.
+Query patients from a FHIR server, batch run sepsis predictions, and write
+RiskAssessment resources back. Demonstrates real FHIR server integration.
 
-Requirements:
-- pip install healthchain joblib xgboost python-dotenv
-
-Environment Variables:
-- MEDPLUM_CLIENT_ID, MEDPLUM_CLIENT_SECRET, MEDPLUM_BASE_URL
+Setup:
+    1. Extract and upload demo patients:
+       python scripts/extract_mimic_demo_patients.py --minimal --upload
+    2. Update DEMO_PATIENT_IDS below with the server-assigned IDs
+    3. Set env vars: MEDPLUM_CLIENT_ID, MEDPLUM_CLIENT_SECRET, MEDPLUM_BASE_URL
 
 Run:
-- python sepsis_fhir_batch.py
+    python cookbook/sepsis_fhir_batch.py
 """
 
 from pathlib import Path
+from typing import List
 
 import joblib
 from dotenv import load_dotenv
+from fhir.resources.patient import Patient
+from fhir.resources.observation import Observation
 
 from healthchain.gateway import HealthChainAPI, FHIRGateway
 from healthchain.gateway.clients.fhir.base import FHIRAuthConfig
@@ -30,7 +33,9 @@
 # Configuration
 SCRIPT_DIR = Path(__file__).parent
 MODEL_PATH = SCRIPT_DIR / "models" / "sepsis_model.pkl"
-SCHEMA_PATH = "healthchain/configs/features/sepsis_vitals.yaml"
+SCHEMA_PATH = (
+    SCRIPT_DIR / ".." / "healthchain" / "configs" / "features" / "sepsis_vitals.yaml"
+)
 
 # Load model
 model_data = joblib.load(MODEL_PATH)
@@ -38,10 +43,21 @@
 feature_names = model_data["metadata"]["feature_names"]
 threshold = model_data["metadata"]["metrics"].get("optimal_threshold", 0.5)
 
-# FHIR Gateway
-config = FHIRAuthConfig.from_env("MEDPLUM")
-gateway = FHIRGateway()
-gateway.add_source("fhir", config.to_connection_string())
+# FHIR sources (configure via environment)
+MEDPLUM_URL = None
+EPIC_URL = None
+
+try:
+    config = FHIRAuthConfig.from_env("MEDPLUM")
+    MEDPLUM_URL = config.to_connection_string()
+except Exception:
+    pass
+
+try:
+    config = FHIRAuthConfig.from_env("EPIC")
+    EPIC_URL = config.to_connection_string()
+except Exception:
+    pass
 
 
 def create_pipeline() -> Pipeline[Dataset]:
@@ -65,75 +81,114 @@ def run_inference(dataset: Dataset) -> Dataset:
     return pipeline
 
 
-def run_batch_screening():
-    """
-    Run batch sepsis screening.
+def screen_patient(
+    gateway: FHIRGateway, pipeline: Pipeline, patient_id: str, source: str
+):
+    """Screen a single patient for sepsis risk."""
+    # Query patient data from FHIR server
+    obs_bundle = gateway.search(
+        Observation, {"patient": patient_id, "_count": "100"}, source
+    )
+    patient_bundle = gateway.search(Patient, {"_id": patient_id}, source)
 
-    In production: query FHIR server for ICU patients
-    For demo: load from MIMIC-on-FHIR
-    """
-    from healthchain.sandbox.loaders import MimicOnFHIRLoader
+    # Merge into single bundle
+    entries = []
+    if patient_bundle.entry:
+        entries.extend([e.model_dump() for e in patient_bundle.entry])
+    if obs_bundle.entry:
+        entries.extend([e.model_dump() for e in obs_bundle.entry])
 
-    pipeline = create_pipeline()
+    if not entries:
+        return None, "No data found"
 
-    # Load data (production would use: gateway.search(Patient, {"location": "ICU"}))
-    loader = MimicOnFHIRLoader()
-    bundle = loader.load(
-        data_dir="../datasets/mimic-iv-clinical-database-demo-on-fhir-2.1.0/",
-        resource_types=[
-            "MimicObservationChartevents",
-            "MimicObservationLabevents",
-            "MimicPatient",
-        ],
-        as_dict=True,
-    )
+    # FHIR → Dataset → Prediction
+    bundle = {"type": "collection", "entry": entries}
+    dataset = Dataset.from_fhir_bundle(bundle, schema=str(SCHEMA_PATH))
+
+    if len(dataset.data) == 0:
+        return None, "No matching features"
 
-    # FHIR → Dataset → Predictions → RiskAssessments
-    dataset = Dataset.from_fhir_bundle(bundle, schema=SCHEMA_PATH)
     result = pipeline(dataset)
+    probability = float(result.metadata["probabilities"][0])
+    risk = "high" if probability > 0.7 else "moderate" if probability > 0.4 else "low"
 
+    # Create and save RiskAssessment
     risk_assessments = result.to_risk_assessment(
         result.metadata["predictions"],
         result.metadata["probabilities"],
         outcome_code="A41.9",
         outcome_display="Sepsis",
-        model_name="XGBoost",
-    )
-
-    print(f"Processed {len(result)} patients")
-    high_risk = sum(
-        1
-        for ra in risk_assessments
-        if ra.prediction[0].qualitativeRisk.coding[0].code == "high"
+        model_name="sepsis_xgboost_v1",
     )
-    print(f"High risk: {high_risk}")
 
-    # Write to FHIR server
     for ra in risk_assessments:
-        gateway.create(ra, source="fhir")
-        print(f"Created RiskAssessment/{ra.id}")
+        gateway.create(ra, source=source)
 
-    return risk_assessments
+    return risk_assessments[
+        0
+    ] if risk_assessments else None, f"{risk.upper()} ({probability:.0%})"
+
+
+def batch_screen(gateway: FHIRGateway, patient_ids: List[str], source: str = "medplum"):
+    """Screen multiple patients for sepsis risk."""
+    pipeline = create_pipeline()
+    results = []
+
+    for patient_id in patient_ids:
+        try:
+            ra, status = screen_patient(gateway, pipeline, patient_id, source)
+            if ra:
+                results.append(
+                    {"patient": patient_id, "status": status, "risk_assessment": ra.id}
+                )
+                print(f"  {patient_id}: {status} → RiskAssessment/{ra.id}")
+            else:
+                results.append({"patient": patient_id, "status": status})
+                print(f"  {patient_id}: {status}")
+        except Exception as e:
+            results.append({"patient": patient_id, "error": str(e)})
+            print(f"  {patient_id}: Error - {e}")
+
+    return results
 
 
 def create_app():
-    """Expose batch endpoint via API."""
+    """Create FHIR gateway app with configured sources."""
+    gateway = FHIRGateway()
+
+    # Add configured sources
+    if MEDPLUM_URL:
+        gateway.add_source("medplum", MEDPLUM_URL)
+        print("✓ Medplum configured")
+    if EPIC_URL:
+        gateway.add_source("epic", EPIC_URL)
+        print("✓ Epic configured")
+
     app = HealthChainAPI(title="Sepsis Batch Screening")
     app.register_gateway(gateway, path="/fhir")
-    return app
-
 
-app = create_app()
+    return app, gateway
 
 
-if __name__ == "__main__":
-    import uvicorn
+# Create app at module level
+app, gateway = create_app()
 
-    # Run batch screening
-    print("=== Batch Sepsis Screening ===")
-    run_batch_screening()
 
-    # Start API server
-    print("\n=== FHIR Gateway Server ===")
-    print("http://localhost:8000/fhir/")
-    uvicorn.run(app, port=8000)
+if __name__ == "__main__":
+    # Demo patient IDs from: python scripts/extract_mimic_demo_patients.py --minimal --upload
+    # (Update these with server-assigned IDs after upload)
+    DEMO_PATIENT_IDS = [
+        "702e11e8-6d21-41dd-9b48-31715fdc0fb1",  # high risk
+        "3b0da7e9-0379-455a-8d35-bedd3a6ee459",  # moderate risk
+        "f490ceb4-6262-4f1e-8b72-5515e6c46741",  # low risk
+    ]
+
+    # Screen Medplum patients
+    if MEDPLUM_URL:
+        print("\n=== Screening patients from Medplum ===")
+        batch_screen(gateway, DEMO_PATIENT_IDS, source="medplum")
+
+    # Demo Epic connectivity (data may not match sepsis features)
+    if EPIC_URL:
+        print("\n=== Epic Sandbox (demo connectivity) ===")
+        batch_screen(gateway, ["e0w0LEDCYtfckT6N.CkJKCw3"], source="epic")
diff --git a/scripts/extract_mimic_demo_patients.py b/scripts/extract_mimic_demo_patients.py
old mode 100644
new mode 100755
index 834df100..13526dfb
--- a/scripts/extract_mimic_demo_patients.py
+++ b/scripts/extract_mimic_demo_patients.py
@@ -1,80 +1,76 @@
 #!/usr/bin/env python3
 """
-Extract Demo Patient Prefetch from MIMIC-on-FHIR
-
-Creates CDS Hooks prefetch files with only the observations needed for
-sepsis prediction, keyed by feature name. Much smaller than full bundles!
-
-Customize:
-    - MIMIC_DIR: Path to your MIMIC-on-FHIR dataset
-    - MODEL_PATH: Path to your trained model pickle
-    - SCHEMA_PATH: Feature schema defining which observations to extract
-    - OUTPUT_DIR: Where to save extracted patient files
-    - NUM_PATIENTS_PER_RISK: How many patients to extract per risk level
-
-Run:
-    python scripts/extract_mimic_demo_patients.py
-
-Output format:
-    {
-      "patient": {...Patient resource...},
-      "heart_rate": {"resourceType": "Bundle", "entry": [...]},
-      "temperature": {"resourceType": "Bundle", "entry": [...]},
-      ...
-    }
+Extract Demo Patients from MIMIC-on-FHIR
+
+Extracts patient data for sepsis prediction demos. Creates small files with
+only the observations needed for the model.
+
+Usage:
+    # For CDS Hooks demo (prefetch format)
+    python scripts/extract_mimic_demo_patients.py --minimal
+
+    # For FHIR batch demo (upload to Medplum)
+    python scripts/extract_mimic_demo_patients.py --minimal --upload
+
+Output formats:
+  Default (prefetch for CDS Hooks):
+    {"patient": {...}, "heart_rate": {"entry": [...]}, ...}
+
+  --bundle (for FHIR server upload):
+    {"resourceType": "Bundle", "type": "transaction", "entry": [...]}
+
+Requires:
+    - MIMIC_FHIR_PATH env var (or --mimic flag)
+    - MEDPLUM_* env vars (if using --upload)
 """
 
+import argparse
 import json
+import os
+import uuid
 from pathlib import Path
 
 import joblib
 import yaml
 
-from healthchain.sandbox.loaders import MimicOnFHIRLoader
 from healthchain.io import Dataset
 from healthchain.pipeline import Pipeline
-
-import os
+from healthchain.sandbox.loaders import MimicOnFHIRLoader
 
 try:
     from dotenv import load_dotenv
 
     load_dotenv()
 except ImportError:
-    print(
-        "Warning: dotenv not installed. Please manually set the MIMIC_FHIR_PATH environment variable."
-    )
-
+    pass
 
 # =============================================================================
-# CUSTOMIZE THESE
+# CONFIGURATION
 # =============================================================================
 
-MIMIC_DIR = os.getenv("MIMIC_FHIR_PATH")
-MODEL_PATH = "cookbook/models/sepsis_model.pkl"
-SCHEMA_PATH = "healthchain/configs/features/sepsis_vitals.yaml"
-OUTPUT_DIR = Path("cookbook/data/mimic_demo_patients")
+DEFAULT_MODEL_PATH = "cookbook/models/sepsis_model.pkl"
+DEFAULT_SCHEMA_PATH = "healthchain/configs/features/sepsis_vitals.yaml"
+DEFAULT_OUTPUT_DIR = Path("cookbook/data/mimic_demo_patients")
 
-# Number of patients to extract per risk level (high/moderate/low)
-NUM_PATIENTS_PER_RISK = 1
 
+# =============================================================================
+# HELPER FUNCTIONS
 # =============================================================================
 
 
 def load_observation_codes(schema_path: str) -> dict:
-    """Load feature schema and extract observation codes."""
+    """Extract observation codes from feature schema."""
     with open(schema_path) as f:
         schema = yaml.safe_load(f)
-
-    codes = {}
-    for feature_name, config in schema["features"].items():
-        if config.get("fhir_resource") == "Observation":
-            codes[config["code"]] = feature_name
-    return codes
+    return {
+        config["code"]: name
+        for name, config in schema["features"].items()
+        if config.get("fhir_resource") == "Observation"
+    }
 
 
 def create_pipeline(model, feature_names) -> Pipeline[Dataset]:
-    """Build prediction pipeline."""
+    """Build prediction pipeline for risk stratification."""
     pipeline = Pipeline[Dataset]()
 
     @pipeline.add_node
@@ -85,44 +81,51 @@ def impute_missing(dataset: Dataset) -> Dataset:
     @pipeline.add_node
     def run_inference(dataset: Dataset) -> Dataset:
         features = dataset.data[feature_names]
-        probabilities = model.predict_proba(features)[:, 1]
-        dataset.metadata["probabilities"] = probabilities
+        dataset.metadata["probabilities"] = model.predict_proba(features)[:, 1]
         return dataset
 
     return pipeline
 
 
 def get_observation_code(resource: dict) -> str:
-    """Extract MIMIC code from Observation resource."""
+    """Extract MIMIC code from Observation."""
     for coding in resource.get("code", {}).get("coding", []):
         if "mimic" in coding.get("system", ""):
             return coding.get("code", "")
     return ""
 
 
-def extract_patient_prefetch(bundle: dict, patient_ref: str, obs_codes: dict) -> dict:
-    """Extract keyed prefetch for a patient with only needed observations."""
+# =============================================================================
+# EXTRACTION FUNCTIONS
+# =============================================================================
+
+
+def extract_patient_prefetch(
+    bundle: dict, patient_ref: str, obs_codes: dict, minimal: bool = False
+) -> dict:
+    """Extract keyed prefetch for a patient (CDS Hooks format)."""
     patient_id = patient_ref.split("/")[-1]
     prefetch = {}
     feature_obs = {name: [] for name in obs_codes.values()}
 
     for entry in bundle["entry"]:
         resource = entry.get("resource", {})
-        resource_type = resource.get("resourceType", "")
+        rtype = resource.get("resourceType", "")
 
-        if resource_type == "Patient" and resource.get("id") == patient_id:
+        if rtype == "Patient" and resource.get("id") == patient_id:
             prefetch["patient"] = resource
-
-        elif resource_type == "Observation":
-            subject = resource.get("subject", {})
-            if subject.get("reference", "").endswith(patient_id):
+        elif rtype == "Observation":
+            ref = resource.get("subject", {}).get("reference", "")
+            if ref.endswith(patient_id):
                 code = get_observation_code(resource)
                 if code in obs_codes:
                     feature_obs[obs_codes[code]].append(entry)
 
-    for feature_name, entries in feature_obs.items():
+    for name, entries in feature_obs.items():
         if entries:
-            prefetch[feature_name] = {
+            if minimal:
+                entries = entries[-1:]  # Keep only latest
+            prefetch[name] = {
                 "resourceType": "Bundle",
                 "type": "searchset",
                 "entry": entries,
@@ -131,29 +134,125 @@ def extract_patient_prefetch(bundle: dict, patient_ref: str, obs_codes: dict) ->
     return prefetch
 
 
+def prefetch_to_bundle(prefetch: dict) -> dict:
+    """Convert prefetch to FHIR transaction Bundle (for server upload)."""
+    entries = []
+    # Use urn:uuid references so Medplum properly links Observations to Patient.
+    patient_uuid = f"urn:uuid:{uuid.uuid4()}"
+
+    # Patient
+    if "patient" in prefetch:
+        entries.append(
+            {
+                "fullUrl": patient_uuid,
+                "resource": prefetch["patient"].copy(),
+                "request": {"method": "POST", "url": "Patient"},
+            }
+        )
+
+    # Observations (with updated subject reference)
+    for key, value in prefetch.items():
+        if key == "patient" or not isinstance(value, dict):
+            continue
+        for entry in value.get("entry", []):
+            resource = entry.get("resource", {})
+            if resource.get("resourceType") == "Observation":
+                obs = resource.copy()
+                obs["subject"] = {"reference": patient_uuid}
+                entries.append(
+                    {
+                        "fullUrl": f"urn:uuid:{uuid.uuid4()}",
+                        "resource": obs,
+                        "request": {"method": "POST", "url": "Observation"},
+                    }
+                )
+
+    return {"resourceType": "Bundle", "type": "transaction", "entry": entries}
+
+
+def upload_bundle(gateway, bundle_data: dict) -> str:
+    """Upload bundle to Medplum, return server-assigned Patient ID."""
+    from fhir.resources.bundle import Bundle as FHIRBundle
+
+    response = gateway.transaction(FHIRBundle(**bundle_data), source="medplum")
+
+    # Extract Patient ID from response
+    if response.entry:
+        for entry in response.entry:
+            if entry.response and entry.response.location:
+                loc = entry.response.location
+                if "Patient/" in loc:
+                    return loc.split("Patient/")[1].split("/")[0]
+    return None
+
+
+# =============================================================================
+# MAIN
+# =============================================================================
+
+
 def main():
+    parser = argparse.ArgumentParser(
+        description="Extract demo patients from MIMIC-on-FHIR"
+    )
+    parser.add_argument("--mimic", type=str, help="Path to MIMIC-on-FHIR dataset")
+    parser.add_argument(
+        "--model", type=str, default=DEFAULT_MODEL_PATH, help="Model pickle path"
+    )
+    parser.add_argument(
+        "--schema", type=str, default=DEFAULT_SCHEMA_PATH, help="Feature schema YAML"
+    )
+    parser.add_argument(
+        "--minimal", action="store_true", help="Keep only 1 obs per feature (~12KB)"
+    )
+    parser.add_argument("--bundle", action="store_true", help="Output as FHIR Bundle")
+    parser.add_argument("--upload", action="store_true", help="Upload to Medplum")
+    parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT_DIR)
+    parser.add_argument("--seed", type=int, default=42)
+    parser.add_argument("--num-patients-per-risk", type=int, default=1)
+    args = parser.parse_args()
+
+    mimic_dir = args.mimic or os.getenv("MIMIC_FHIR_PATH")
+    if not mimic_dir:
+        print("Error: Set MIMIC_FHIR_PATH or use --mimic")
+        return
+
+    # --upload implies --bundle
+    if args.upload:
+        args.bundle = True
+
+    # Set up FHIRGateway for upload
+    gateway = None
+    if args.upload:
+        from healthchain.gateway import FHIRGateway
+        from healthchain.gateway.clients.fhir.base import FHIRAuthConfig
+
+        try:
+            config = FHIRAuthConfig.from_env("MEDPLUM")
+            gateway = FHIRGateway()
+            gateway.add_source("medplum", config.to_connection_string())
+            print("✓ Medplum configured")
+        except Exception as e:
+            print(f"✗ Medplum failed: {e}")
+            return
+
     print("=" * 60)
-    print("MIMIC Demo Patient Extraction")
+    print("MIMIC Demo Patient Extraction" + (" (MINIMAL)" if args.minimal else ""))
     print("=" * 60)
 
-    if MIMIC_DIR is None:
-        print("Error: MIMIC_FHIR_PATH environment variable is not set.")
-        return
-
-    # Load configs
-    obs_codes = load_observation_codes(SCHEMA_PATH)
-    print(f"Features to extract: {list(obs_codes.values())}")
+    # Load schema and model
+    obs_codes = load_observation_codes(args.schema)
+    print(f"Features: {list(obs_codes.values())}")
 
-    model_data = joblib.load(MODEL_PATH)
+    model_data = joblib.load(args.model)
     model = model_data["model"]
     feature_names = model_data["metadata"]["feature_names"]
-    print(f"Model features: {feature_names}")
 
     # Load MIMIC data
     print("\nLoading MIMIC-on-FHIR...")
     loader = MimicOnFHIRLoader()
     bundle = loader.load(
-        data_dir=MIMIC_DIR,
+        data_dir=mimic_dir,
         resource_types=[
             "MimicObservationChartevents",
             "MimicObservationLabevents",
@@ -164,10 +263,9 @@ def main():
     print(f"Loaded {len(bundle['entry']):,} resources")
 
     # Run predictions
-    print("\nExtracting features and predicting...")
-    dataset = Dataset.from_fhir_bundle(bundle, schema=SCHEMA_PATH)
-    pipeline = create_pipeline(model, feature_names)
-    result = pipeline(dataset)
+    print("\nExtracting features...")
+    dataset = Dataset.from_fhir_bundle(bundle, schema=args.schema)
+    result = create_pipeline(model, feature_names)(dataset)
 
     df = result.data.copy()
     df["probability"] = result.metadata["probabilities"]
@@ -178,39 +276,66 @@ def main():
     print(f"\nRisk distribution ({len(df)} patients):")
     print(df["risk"].value_counts().to_string())
 
-    # Select patients per risk level
-    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
-    patients_to_extract = []
+    # Extract patients
+    args.output.mkdir(parents=True, exist_ok=True)
+    print(f"\nExtracting to {args.output}/")
 
     for risk_level in ["high", "moderate", "low"]:
-        risk_patients = df[df["risk"] == risk_level]
-        for i in range(min(NUM_PATIENTS_PER_RISK, len(risk_patients))):
-            patient = risk_patients.iloc[i]
+        risk_df = df[df["risk"] == risk_level]
+        if len(risk_df) == 0:
+            continue
+
+        risk_df = risk_df.sample(
+            n=min(args.num_patients_per_risk, len(risk_df)), random_state=args.seed
+        )
+
+        for i, (_, patient) in enumerate(risk_df.iterrows()):
             label = (
                 f"{risk_level}_risk"
-                if NUM_PATIENTS_PER_RISK == 1
+                if args.num_patients_per_risk == 1
                 else f"{risk_level}_risk_{i+1}"
             )
-            patients_to_extract.append((label, patient))
+            prefetch = extract_patient_prefetch(
+                bundle, patient["patient_ref"], obs_codes, args.minimal
+            )
 
-    # Extract and save
-    print(f"\nExtracting to {OUTPUT_DIR}/")
-    for label, patient in patients_to_extract:
-        prefetch = extract_patient_prefetch(bundle, patient["patient_ref"], obs_codes)
+            # Output format
+            if args.bundle:
+                output_data = prefetch_to_bundle(prefetch)
+                suffix = "_bundle.json"
+            else:
+                output_data = prefetch
+                suffix = "_patient.json"
 
-        output_file = OUTPUT_DIR / f"{label}_patient.json"
-        with open(output_file, "w") as f:
-            json.dump(prefetch, f, indent=2, default=str)
+            # Save file
+            with open(args.output / f"{label}{suffix}", "w") as f:
+                json.dump(output_data, f, indent=2, default=str)
 
-        obs_count = sum(
-            len(v.get("entry", [])) for k, v in prefetch.items() if k != "patient"
-        )
-        features_with_data = [k for k in prefetch if k != "patient"]
-        print(
-            f"  {label}: {patient['probability']:.1%} risk, {obs_count} obs ({', '.join(features_with_data)})"
-        )
+            obs_count = sum(
+                len(v.get("entry", [])) for k, v in prefetch.items() if k != "patient"
+            )
+            patient_id = patient["patient_ref"].split("/")[-1]
+
+            # Upload if requested
+            status = ""
+            if args.upload and gateway:
+                server_id = upload_bundle(gateway, output_data)
+                status = (
+                    f" ✓ uploaded (ID: {server_id})" if server_id else " ✓ uploaded"
+                )
+
+            print(
+                f"  {label}: {patient_id} ({patient['probability']:.1%}, {obs_count} obs){status}"
+            )
 
-    print("\nDone! Use these files with SandboxClient.load_from_path()")
+    # Print next steps
+    print("\n" + "=" * 60)
+    if args.upload:
+        print("✓ Uploaded to Medplum! Update patient IDs in sepsis_fhir_batch.py")
+    elif args.bundle:
+        print("Re-run with --upload to upload to Medplum")
+    else:
+        print("CDS: client.load_from_path('cookbook/data/mimic_demo_patients/')")
 
 
 if __name__ == "__main__":

From 0d041b6dc268bdb79d9470d30a6eaf6a8243a705 Mon Sep 17 00:00:00 2001
From: jenniferjiangkells <jenniferjiangkells@gmail.com>
Date: Tue, 2 Dec 2025 10:39:48 +0000
Subject: [PATCH 03/12] Add mimic demo patients

---
 .../mimic_demo_patients/high_risk_bundle.json | 413 ++++++++++++++++++
 .../high_risk_patient.json                    | 390 +++++++++++++++++
 .../mimic_demo_patients/low_risk_bundle.json  | 413 ++++++++++++++++++
 .../mimic_demo_patients/low_risk_patient.json | 407 +++++++++++++++++
 .../moderate_risk_bundle.json                 | 413 ++++++++++++++++++
 .../moderate_risk_patient.json                | 408 +++++++++++++++++
 6 files changed, 2444 insertions(+)
 create mode 100644 cookbook/data/mimic_demo_patients/high_risk_bundle.json
 create mode 100644 cookbook/data/mimic_demo_patients/high_risk_patient.json
 create mode 100644 cookbook/data/mimic_demo_patients/low_risk_bundle.json
 create mode 100644 cookbook/data/mimic_demo_patients/low_risk_patient.json
 create mode 100644 cookbook/data/mimic_demo_patients/moderate_risk_bundle.json
 create mode 100644 cookbook/data/mimic_demo_patients/moderate_risk_patient.json

diff --git a/cookbook/data/mimic_demo_patients/high_risk_bundle.json b/cookbook/data/mimic_demo_patients/high_risk_bundle.json
new file mode 100644
index 00000000..ae2ca225
--- /dev/null
+++ b/cookbook/data/mimic_demo_patients/high_risk_bundle.json
@@ -0,0 +1,413 @@
+{
+  "resourceType": "Bundle",
+  "type": "transaction",
+  "entry": [
+    {
+      "fullUrl": "urn:uuid:f1f8064e-a37d-4c2b-8002-3efd94d43a26",
+      "resource": {
+        "id": "1cf9e585-806c-513b-80af-4ca565a28231",
+        "meta": {
+          "profile": [
+            "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-patient"
+          ]
+        },
+        "name": [
+          {
+            "use": "official",
+            "family": "Patient_10015860"
+          }
+        ],
+        "gender": "male",
+        "birthDate": "2133-09-15",
+        "extension": [
+          {
+            "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-race",
+            "extension": [
+              {
+                "url": "ombCategory",
+                "valueCoding": {
+                  "code": "2106-3",
+                  "system": "urn:oid:2.16.840.1.113883.6.238",
+                  "display": "White"
+                }
+              },
+              {
+                "url": "text",
+                "valueString": "White"
+              }
+            ]
+          },
+          {
+            "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity",
+            "extension": [
+              {
+                "url": "ombCategory",
+                "valueCoding": {
+                  "code": "2186-5",
+                  "system": "urn:oid:2.16.840.1.113883.6.238",
+                  "display": "Not Hispanic or Latino"
+                }
+              },
+              {
+                "url": "text",
+                "valueString": "Not Hispanic or Latino"
+              }
+            ]
+          },
+          {
+            "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-birthsex",
+            "valueCode": "M"
+          }
+        ],
+        "identifier": [
+          {
+            "value": "10015860",
+            "system": "http://mimic.mit.edu/fhir/mimic/identifier/patient"
+          }
+        ],
+        "resourceType": "Patient",
+        "communication": [
+          {
+            "language": {
+              "coding": [
+                {
+                  "code": "en",
+                  "system": "urn:ietf:bcp:47"
+                }
+              ]
+            }
+          }
+        ],
+        "maritalStatus": {
+          "coding": [
+            {
+              "code": "S",
+              "system": "http://terminology.hl7.org/CodeSystem/v3-MaritalStatus"
+            }
+          ]
+        },
+        "managingOrganization": {
+          "reference": "Organization/ee172322-118b-5716-abbc-18e4c5437e15"
+        }
+      },
+      "request": {
+        "method": "POST",
+        "url": "Patient"
+      }
+    },
+    {
+      "fullUrl": "urn:uuid:7242a670-6b9d-4a67-a7a9-374658ac6b03",
+      "resource": {
+        "id": "1e00686c-4ed2-5acd-bc59-e8c305b95af7",
+        "code": {
+          "coding": [
+            {
+              "code": "220045",
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-chartevents-d-items",
+              "display": "Heart Rate"
+            }
+          ]
+        },
+        "meta": {
+          "profile": [
+            "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-chartevents"
+          ]
+        },
+        "issued": "2192-05-12T17:15:00-04:00",
+        "status": "final",
+        "subject": {
+          "reference": "urn:uuid:f1f8064e-a37d-4c2b-8002-3efd94d43a26"
+        },
+        "category": [
+          {
+            "coding": [
+              {
+                "code": "Routine Vital Signs",
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-observation-category"
+              }
+            ]
+          }
+        ],
+        "encounter": {
+          "reference": "Encounter/36ad3455-d2af-514b-ac42-265954a07a0e"
+        },
+        "resourceType": "Observation",
+        "valueQuantity": {
+          "code": "bpm",
+          "unit": "bpm",
+          "value": 108,
+          "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+        },
+        "effectiveDateTime": "2192-05-12T16:49:00-04:00"
+      },
+      "request": {
+        "method": "POST",
+        "url": "Observation"
+      }
+    },
+    {
+      "fullUrl": "urn:uuid:c29b9720-0c2d-4c02-aeeb-7b515ce318b4",
+      "resource": {
+        "id": "8f10f571-8183-5b55-b659-107b69ab6fba",
+        "code": {
+          "coding": [
+            {
+              "code": "223761",
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-chartevents-d-items",
+              "display": "Temperature Fahrenheit"
+            }
+          ]
+        },
+        "meta": {
+          "profile": [
+            "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-chartevents"
+          ]
+        },
+        "issued": "2192-05-12T09:46:00-04:00",
+        "status": "final",
+        "subject": {
+          "reference": "urn:uuid:f1f8064e-a37d-4c2b-8002-3efd94d43a26"
+        },
+        "category": [
+          {
+            "coding": [
+              {
+                "code": "Routine Vital Signs",
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-observation-category"
+              }
+            ]
+          }
+        ],
+        "encounter": {
+          "reference": "Encounter/36ad3455-d2af-514b-ac42-265954a07a0e"
+        },
+        "resourceType": "Observation",
+        "valueQuantity": {
+          "code": "\u00b0F",
+          "unit": "\u00b0F",
+          "value": 98.4,
+          "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+        },
+        "effectiveDateTime": "2192-05-12T09:46:00-04:00"
+      },
+      "request": {
+        "method": "POST",
+        "url": "Observation"
+      }
+    },
+    {
+      "fullUrl": "urn:uuid:4c37b18e-e4a6-4070-83af-c29224ba4fe1",
+      "resource": {
+        "id": "3a294f91-6d99-5a23-bc6b-c44d5f69c5db",
+        "code": {
+          "coding": [
+            {
+              "code": "220210",
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-chartevents-d-items",
+              "display": "Respiratory Rate"
+            }
+          ]
+        },
+        "meta": {
+          "profile": [
+            "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-chartevents"
+          ]
+        },
+        "issued": "2192-05-12T17:15:00-04:00",
+        "status": "final",
+        "subject": {
+          "reference": "urn:uuid:f1f8064e-a37d-4c2b-8002-3efd94d43a26"
+        },
+        "category": [
+          {
+            "coding": [
+              {
+                "code": "Respiratory",
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-observation-category"
+              }
+            ]
+          }
+        ],
+        "encounter": {
+          "reference": "Encounter/36ad3455-d2af-514b-ac42-265954a07a0e"
+        },
+        "resourceType": "Observation",
+        "valueQuantity": {
+          "code": "insp/min",
+          "unit": "insp/min",
+          "value": 17,
+          "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+        },
+        "effectiveDateTime": "2192-05-12T16:49:00-04:00"
+      },
+      "request": {
+        "method": "POST",
+        "url": "Observation"
+      }
+    },
+    {
+      "fullUrl": "urn:uuid:4e3d2d87-f47a-4eb3-bb09-a707f13b5e1e",
+      "resource": {
+        "id": "ff849940-4858-59d3-9da8-da8d43aaa808",
+        "code": {
+          "coding": [
+            {
+              "code": "51301",
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-d-labitems",
+              "display": "White Blood Cells"
+            }
+          ]
+        },
+        "meta": {
+          "profile": [
+            "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-labevents"
+          ]
+        },
+        "issued": "2188-08-06T08:15:00-04:00",
+        "status": "final",
+        "subject": {
+          "reference": "urn:uuid:f1f8064e-a37d-4c2b-8002-3efd94d43a26"
+        },
+        "category": [
+          {
+            "coding": [
+              {
+                "code": "laboratory",
+                "system": "http://terminology.hl7.org/CodeSystem/observation-category",
+                "display": "Laboratory"
+              }
+            ]
+          }
+        ],
+        "specimen": {
+          "reference": "Specimen/5f29e513-36c0-5435-a7ab-e6119d10fcc1"
+        },
+        "encounter": {
+          "reference": "Encounter/dcd2507e-f200-5bfa-a719-c49d94f17fce"
+        },
+        "extension": [
+          {
+            "url": "http://mimic.mit.edu/fhir/mimic/StructureDefinition/lab-priority",
+            "valueString": "ROUTINE"
+          }
+        ],
+        "identifier": [
+          {
+            "value": "196686",
+            "system": "http://mimic.mit.edu/fhir/mimic/identifier/observation-labevents"
+          }
+        ],
+        "resourceType": "Observation",
+        "valueQuantity": {
+          "code": "K/uL",
+          "unit": "K/uL",
+          "value": 17.8,
+          "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+        },
+        "referenceRange": [
+          {
+            "low": {
+              "code": "K/uL",
+              "unit": "K/uL",
+              "value": 4,
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+            },
+            "high": {
+              "code": "K/uL",
+              "unit": "K/uL",
+              "value": 11,
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+            }
+          }
+        ],
+        "effectiveDateTime": "2188-08-06T06:57:00-04:00"
+      },
+      "request": {
+        "method": "POST",
+        "url": "Observation"
+      }
+    },
+    {
+      "fullUrl": "urn:uuid:9e170dfc-714e-4afc-889f-9d15d6f5f6b5",
+      "resource": {
+        "id": "5bdf7562-d8cd-5611-9177-4cbafa9b8b19",
+        "code": {
+          "coding": [
+            {
+              "code": "50912",
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-d-labitems",
+              "display": "Creatinine"
+            }
+          ]
+        },
+        "meta": {
+          "profile": [
+            "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-labevents"
+          ]
+        },
+        "issued": "2188-08-06T08:54:00-04:00",
+        "status": "final",
+        "subject": {
+          "reference": "urn:uuid:f1f8064e-a37d-4c2b-8002-3efd94d43a26"
+        },
+        "category": [
+          {
+            "coding": [
+              {
+                "code": "laboratory",
+                "system": "http://terminology.hl7.org/CodeSystem/observation-category",
+                "display": "Laboratory"
+              }
+            ]
+          }
+        ],
+        "specimen": {
+          "reference": "Specimen/2a72c2e8-48b9-5d28-92c6-a0cf35c1ca7c"
+        },
+        "encounter": {
+          "reference": "Encounter/dcd2507e-f200-5bfa-a719-c49d94f17fce"
+        },
+        "extension": [
+          {
+            "url": "http://mimic.mit.edu/fhir/mimic/StructureDefinition/lab-priority",
+            "valueString": "ROUTINE"
+          }
+        ],
+        "identifier": [
+          {
+            "value": "196668",
+            "system": "http://mimic.mit.edu/fhir/mimic/identifier/observation-labevents"
+          }
+        ],
+        "resourceType": "Observation",
+        "valueQuantity": {
+          "code": "mg/dL",
+          "unit": "mg/dL",
+          "value": 2,
+          "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+        },
+        "referenceRange": [
+          {
+            "low": {
+              "code": "mg/dL",
+              "unit": "mg/dL",
+              "value": 0.5,
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+            },
+            "high": {
+              "code": "mg/dL",
+              "unit": "mg/dL",
+              "value": 1.2,
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+            }
+          }
+        ],
+        "effectiveDateTime": "2188-08-06T06:57:00-04:00"
+      },
+      "request": {
+        "method": "POST",
+        "url": "Observation"
+      }
+    }
+  ]
+}
diff --git a/cookbook/data/mimic_demo_patients/high_risk_patient.json b/cookbook/data/mimic_demo_patients/high_risk_patient.json
new file mode 100644
index 00000000..2d20486d
--- /dev/null
+++ b/cookbook/data/mimic_demo_patients/high_risk_patient.json
@@ -0,0 +1,390 @@
+{
+  "patient": {
+    "id": "f5efdf3f-5b53-5c9f-95a6-047275107c46",
+    "meta": {
+      "profile": [
+        "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-patient"
+      ]
+    },
+    "name": [
+      {
+        "use": "official",
+        "family": "Patient_10002495"
+      }
+    ],
+    "gender": "male",
+    "birthDate": "2060-05-22",
+    "extension": [
+      {
+        "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-race",
+        "extension": [
+          {
+            "url": "ombCategory",
+            "valueCoding": {
+              "code": "UNK",
+              "system": "http://terminology.hl7.org/CodeSystem/v3-NullFlavor",
+              "display": "unknown"
+            }
+          },
+          {
+            "url": "text",
+            "valueString": "unknown"
+          }
+        ]
+      },
+      {
+        "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-birthsex",
+        "valueCode": "M"
+      }
+    ],
+    "identifier": [
+      {
+        "value": "10002495",
+        "system": "http://mimic.mit.edu/fhir/mimic/identifier/patient"
+      }
+    ],
+    "resourceType": "Patient",
+    "communication": [
+      {
+        "language": {
+          "coding": [
+            {
+              "code": "en",
+              "system": "urn:ietf:bcp:47"
+            }
+          ]
+        }
+      }
+    ],
+    "maritalStatus": {
+      "coding": [
+        {
+          "code": "M",
+          "system": "http://terminology.hl7.org/CodeSystem/v3-MaritalStatus"
+        }
+      ]
+    },
+    "managingOrganization": {
+      "reference": "Organization/ee172322-118b-5716-abbc-18e4c5437e15"
+    }
+  },
+  "heart_rate": {
+    "resourceType": "Bundle",
+    "type": "searchset",
+    "entry": [
+      {
+        "resource": {
+          "id": "041a0657-63c0-5828-8301-6dd389649892",
+          "code": {
+            "coding": [
+              {
+                "code": "220045",
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-chartevents-d-items",
+                "display": "Heart Rate"
+              }
+            ]
+          },
+          "meta": {
+            "profile": [
+              "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-chartevents"
+            ]
+          },
+          "issued": "2141-05-23T21:50:00-04:00",
+          "status": "final",
+          "subject": {
+            "reference": "Patient/f5efdf3f-5b53-5c9f-95a6-047275107c46"
+          },
+          "category": [
+            {
+              "coding": [
+                {
+                  "code": "Routine Vital Signs",
+                  "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-observation-category"
+                }
+              ]
+            }
+          ],
+          "encounter": {
+            "reference": "Encounter/25e05468-7cbf-5a04-9209-79cb07703326"
+          },
+          "resourceType": "Observation",
+          "valueQuantity": {
+            "code": "bpm",
+            "unit": "bpm",
+            "value": 113,
+            "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+          },
+          "effectiveDateTime": "2141-05-23T17:55:00-04:00"
+        }
+      }
+    ]
+  },
+  "temperature": {
+    "resourceType": "Bundle",
+    "type": "searchset",
+    "entry": [
+      {
+        "resource": {
+          "id": "69958710-586e-5a29-994c-0f93f9da43dd",
+          "code": {
+            "coding": [
+              {
+                "code": "223761",
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-chartevents-d-items",
+                "display": "Temperature Fahrenheit"
+              }
+            ]
+          },
+          "meta": {
+            "profile": [
+              "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-chartevents"
+            ]
+          },
+          "issued": "2141-05-22T20:32:00-04:00",
+          "status": "final",
+          "subject": {
+            "reference": "Patient/f5efdf3f-5b53-5c9f-95a6-047275107c46"
+          },
+          "category": [
+            {
+              "coding": [
+                {
+                  "code": "Routine Vital Signs",
+                  "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-observation-category"
+                }
+              ]
+            }
+          ],
+          "encounter": {
+            "reference": "Encounter/25e05468-7cbf-5a04-9209-79cb07703326"
+          },
+          "resourceType": "Observation",
+          "valueQuantity": {
+            "code": "\u00b0F",
+            "unit": "\u00b0F",
+            "value": 98.7,
+            "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+          },
+          "effectiveDateTime": "2141-05-22T20:32:00-04:00"
+        }
+      }
+    ]
+  },
+  "respiratory_rate": {
+    "resourceType": "Bundle",
+    "type": "searchset",
+    "entry": [
+      {
+        "resource": {
+          "id": "9f0d5f36-58c1-5e3c-a645-9ac6eed4eeca",
+          "code": {
+            "coding": [
+              {
+                "code": "220210",
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-chartevents-d-items",
+                "display": "Respiratory Rate"
+              }
+            ]
+          },
+          "meta": {
+            "profile": [
+              "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-chartevents"
+            ]
+          },
+          "issued": "2141-05-23T21:50:00-04:00",
+          "status": "final",
+          "subject": {
+            "reference": "Patient/f5efdf3f-5b53-5c9f-95a6-047275107c46"
+          },
+          "category": [
+            {
+              "coding": [
+                {
+                  "code": "Respiratory",
+                  "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-observation-category"
+                }
+              ]
+            }
+          ],
+          "encounter": {
+            "reference": "Encounter/25e05468-7cbf-5a04-9209-79cb07703326"
+          },
+          "resourceType": "Observation",
+          "valueQuantity": {
+            "code": "insp/min",
+            "unit": "insp/min",
+            "value": 25,
+            "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+          },
+          "effectiveDateTime": "2141-05-23T17:55:00-04:00"
+        }
+      }
+    ]
+  },
+  "wbc": {
+    "resourceType": "Bundle",
+    "type": "searchset",
+    "entry": [
+      {
+        "resource": {
+          "id": "943aa613-4410-5792-8286-eb0a7637de4b",
+          "code": {
+            "coding": [
+              {
+                "code": "51301",
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-d-labitems",
+                "display": "White Blood Cells"
+              }
+            ]
+          },
+          "meta": {
+            "profile": [
+              "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-labevents"
+            ]
+          },
+          "issued": "2141-05-23T15:28:00-04:00",
+          "status": "final",
+          "subject": {
+            "reference": "Patient/f5efdf3f-5b53-5c9f-95a6-047275107c46"
+          },
+          "category": [
+            {
+              "coding": [
+                {
+                  "code": "laboratory",
+                  "system": "http://terminology.hl7.org/CodeSystem/observation-category",
+                  "display": "Laboratory"
+                }
+              ]
+            }
+          ],
+          "specimen": {
+            "reference": "Specimen/122994a4-a976-501c-813d-994c1ab5742d"
+          },
+          "encounter": {
+            "reference": "Encounter/3e802913-a3f3-573f-90b3-a85dffdec47b"
+          },
+          "extension": [
+            {
+              "url": "http://mimic.mit.edu/fhir/mimic/StructureDefinition/lab-priority",
+              "valueString": "ROUTINE"
+            }
+          ],
+          "identifier": [
+            {
+              "value": "32427",
+              "system": "http://mimic.mit.edu/fhir/mimic/identifier/observation-labevents"
+            }
+          ],
+          "resourceType": "Observation",
+          "valueQuantity": {
+            "code": "K/uL",
+            "unit": "K/uL",
+            "value": 28.5,
+            "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+          },
+          "referenceRange": [
+            {
+              "low": {
+                "code": "K/uL",
+                "unit": "K/uL",
+                "value": 4,
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+              },
+              "high": {
+                "code": "K/uL",
+                "unit": "K/uL",
+                "value": 10,
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+              }
+            }
+          ],
+          "effectiveDateTime": "2141-05-23T14:53:00-04:00"
+        }
+      }
+    ]
+  },
+  "creatinine": {
+    "resourceType": "Bundle",
+    "type": "searchset",
+    "entry": [
+      {
+        "resource": {
+          "id": "9c9eab28-b23d-56ce-a2e0-e3fa0201cc9a",
+          "code": {
+            "coding": [
+              {
+                "code": "50912",
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-d-labitems",
+                "display": "Creatinine"
+              }
+            ]
+          },
+          "meta": {
+            "profile": [
+              "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-labevents"
+            ]
+          },
+          "issued": "2141-05-23T12:34:00-04:00",
+          "status": "final",
+          "subject": {
+            "reference": "Patient/f5efdf3f-5b53-5c9f-95a6-047275107c46"
+          },
+          "category": [
+            {
+              "coding": [
+                {
+                  "code": "laboratory",
+                  "system": "http://terminology.hl7.org/CodeSystem/observation-category",
+                  "display": "Laboratory"
+                }
+              ]
+            }
+          ],
+          "specimen": {
+            "reference": "Specimen/c191d9dd-50e4-5d7c-bb05-cbe8b2e72772"
+          },
+          "encounter": {
+            "reference": "Encounter/3e802913-a3f3-573f-90b3-a85dffdec47b"
+          },
+          "extension": [
+            {
+              "url": "http://mimic.mit.edu/fhir/mimic/StructureDefinition/lab-priority",
+              "valueString": "STAT"
+            }
+          ],
+          "identifier": [
+            {
+              "value": "32407",
+              "system": "http://mimic.mit.edu/fhir/mimic/identifier/observation-labevents"
+            }
+          ],
+          "resourceType": "Observation",
+          "valueQuantity": {
+            "code": "mg/dL",
+            "unit": "mg/dL",
+            "value": 1.6,
+            "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+          },
+          "referenceRange": [
+            {
+              "low": {
+                "code": "mg/dL",
+                "unit": "mg/dL",
+                "value": 0.5,
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+              },
+              "high": {
+                "code": "mg/dL",
+                "unit": "mg/dL",
+                "value": 1.2,
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+              }
+            }
+          ],
+          "effectiveDateTime": "2141-05-23T10:52:00-04:00"
+        }
+      }
+    ]
+  }
+}
diff --git a/cookbook/data/mimic_demo_patients/low_risk_bundle.json b/cookbook/data/mimic_demo_patients/low_risk_bundle.json
new file mode 100644
index 00000000..aa30f6ad
--- /dev/null
+++ b/cookbook/data/mimic_demo_patients/low_risk_bundle.json
@@ -0,0 +1,413 @@
+{
+  "resourceType": "Bundle",
+  "type": "transaction",
+  "entry": [
+    {
+      "fullUrl": "urn:uuid:f6f0dd59-e75c-4562-9150-ea4a0b5321b3",
+      "resource": {
+        "id": "afa7c67f-82b9-5f51-bd04-8b7d7c4456c0",
+        "meta": {
+          "profile": [
+            "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-patient"
+          ]
+        },
+        "name": [
+          {
+            "use": "official",
+            "family": "Patient_10016150"
+          }
+        ],
+        "gender": "male",
+        "birthDate": "2073-05-10",
+        "extension": [
+          {
+            "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-race",
+            "extension": [
+              {
+                "url": "ombCategory",
+                "valueCoding": {
+                  "code": "2106-3",
+                  "system": "urn:oid:2.16.840.1.113883.6.238",
+                  "display": "White"
+                }
+              },
+              {
+                "url": "text",
+                "valueString": "White"
+              }
+            ]
+          },
+          {
+            "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity",
+            "extension": [
+              {
+                "url": "ombCategory",
+                "valueCoding": {
+                  "code": "2186-5",
+                  "system": "urn:oid:2.16.840.1.113883.6.238",
+                  "display": "Not Hispanic or Latino"
+                }
+              },
+              {
+                "url": "text",
+                "valueString": "Not Hispanic or Latino"
+              }
+            ]
+          },
+          {
+            "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-birthsex",
+            "valueCode": "M"
+          }
+        ],
+        "identifier": [
+          {
+            "value": "10016150",
+            "system": "http://mimic.mit.edu/fhir/mimic/identifier/patient"
+          }
+        ],
+        "resourceType": "Patient",
+        "communication": [
+          {
+            "language": {
+              "coding": [
+                {
+                  "code": "en",
+                  "system": "urn:ietf:bcp:47"
+                }
+              ]
+            }
+          }
+        ],
+        "maritalStatus": {
+          "coding": [
+            {
+              "code": "S",
+              "system": "http://terminology.hl7.org/CodeSystem/v3-MaritalStatus"
+            }
+          ]
+        },
+        "managingOrganization": {
+          "reference": "Organization/ee172322-118b-5716-abbc-18e4c5437e15"
+        }
+      },
+      "request": {
+        "method": "POST",
+        "url": "Patient"
+      }
+    },
+    {
+      "fullUrl": "urn:uuid:bc20d95c-18b8-4fa8-9095-f05abec6aa78",
+      "resource": {
+        "id": "867fe01b-3930-5adf-a45d-f666fecbe864",
+        "code": {
+          "coding": [
+            {
+              "code": "220045",
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-chartevents-d-items",
+              "display": "Heart Rate"
+            }
+          ]
+        },
+        "meta": {
+          "profile": [
+            "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-chartevents"
+          ]
+        },
+        "issued": "2142-05-10T16:59:00-04:00",
+        "status": "final",
+        "subject": {
+          "reference": "urn:uuid:f6f0dd59-e75c-4562-9150-ea4a0b5321b3"
+        },
+        "category": [
+          {
+            "coding": [
+              {
+                "code": "Routine Vital Signs",
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-observation-category"
+              }
+            ]
+          }
+        ],
+        "encounter": {
+          "reference": "Encounter/13987fde-e7cc-5dfb-b5e8-cdf2b709a1d4"
+        },
+        "resourceType": "Observation",
+        "valueQuantity": {
+          "code": "bpm",
+          "unit": "bpm",
+          "value": 71,
+          "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+        },
+        "effectiveDateTime": "2142-05-10T16:59:00-04:00"
+      },
+      "request": {
+        "method": "POST",
+        "url": "Observation"
+      }
+    },
+    {
+      "fullUrl": "urn:uuid:e4ce72da-4ab9-4619-adb6-07bc027c728a",
+      "resource": {
+        "id": "93664731-abf1-57e1-a3fb-693fa9b07479",
+        "code": {
+          "coding": [
+            {
+              "code": "223761",
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-chartevents-d-items",
+              "display": "Temperature Fahrenheit"
+            }
+          ]
+        },
+        "meta": {
+          "profile": [
+            "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-chartevents"
+          ]
+        },
+        "issued": "2142-05-10T16:59:00-04:00",
+        "status": "final",
+        "subject": {
+          "reference": "urn:uuid:f6f0dd59-e75c-4562-9150-ea4a0b5321b3"
+        },
+        "category": [
+          {
+            "coding": [
+              {
+                "code": "Routine Vital Signs",
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-observation-category"
+              }
+            ]
+          }
+        ],
+        "encounter": {
+          "reference": "Encounter/13987fde-e7cc-5dfb-b5e8-cdf2b709a1d4"
+        },
+        "resourceType": "Observation",
+        "valueQuantity": {
+          "code": "\u00b0F",
+          "unit": "\u00b0F",
+          "value": 98.4,
+          "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+        },
+        "effectiveDateTime": "2142-05-10T16:59:00-04:00"
+      },
+      "request": {
+        "method": "POST",
+        "url": "Observation"
+      }
+    },
+    {
+      "fullUrl": "urn:uuid:8e23c576-644b-4908-9271-1935d06aff54",
+      "resource": {
+        "id": "e9532f81-3f62-5af3-a095-d027492f7e01",
+        "code": {
+          "coding": [
+            {
+              "code": "220210",
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-chartevents-d-items",
+              "display": "Respiratory Rate"
+            }
+          ]
+        },
+        "meta": {
+          "profile": [
+            "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-chartevents"
+          ]
+        },
+        "issued": "2142-05-10T16:59:00-04:00",
+        "status": "final",
+        "subject": {
+          "reference": "urn:uuid:f6f0dd59-e75c-4562-9150-ea4a0b5321b3"
+        },
+        "category": [
+          {
+            "coding": [
+              {
+                "code": "Respiratory",
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-observation-category"
+              }
+            ]
+          }
+        ],
+        "encounter": {
+          "reference": "Encounter/13987fde-e7cc-5dfb-b5e8-cdf2b709a1d4"
+        },
+        "resourceType": "Observation",
+        "valueQuantity": {
+          "code": "insp/min",
+          "unit": "insp/min",
+          "value": 33,
+          "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+        },
+        "effectiveDateTime": "2142-05-10T16:59:00-04:00"
+      },
+      "request": {
+        "method": "POST",
+        "url": "Observation"
+      }
+    },
+    {
+      "fullUrl": "urn:uuid:46fe4954-70f4-4652-9d40-15c9598ce545",
+      "resource": {
+        "id": "759f2c85-3345-5d7a-8bbb-252d4d7ac5b0",
+        "code": {
+          "coding": [
+            {
+              "code": "51301",
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-d-labitems",
+              "display": "White Blood Cells"
+            }
+          ]
+        },
+        "meta": {
+          "profile": [
+            "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-labevents"
+          ]
+        },
+        "issued": "2142-05-10T16:09:00-04:00",
+        "status": "final",
+        "subject": {
+          "reference": "urn:uuid:f6f0dd59-e75c-4562-9150-ea4a0b5321b3"
+        },
+        "category": [
+          {
+            "coding": [
+              {
+                "code": "laboratory",
+                "system": "http://terminology.hl7.org/CodeSystem/observation-category",
+                "display": "Laboratory"
+              }
+            ]
+          }
+        ],
+        "specimen": {
+          "reference": "Specimen/b0afc9eb-baad-5d87-a7d8-d6eaf699cf96"
+        },
+        "encounter": {
+          "reference": "Encounter/ef3c0803-f981-59f7-a022-0d1223377142"
+        },
+        "extension": [
+          {
+            "url": "http://mimic.mit.edu/fhir/mimic/StructureDefinition/lab-priority",
+            "valueString": "STAT"
+          }
+        ],
+        "identifier": [
+          {
+            "value": "202620",
+            "system": "http://mimic.mit.edu/fhir/mimic/identifier/observation-labevents"
+          }
+        ],
+        "resourceType": "Observation",
+        "valueQuantity": {
+          "code": "K/uL",
+          "unit": "K/uL",
+          "value": 5.4,
+          "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+        },
+        "referenceRange": [
+          {
+            "low": {
+              "code": "K/uL",
+              "unit": "K/uL",
+              "value": 4,
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+            },
+            "high": {
+              "code": "K/uL",
+              "unit": "K/uL",
+              "value": 11,
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+            }
+          }
+        ],
+        "effectiveDateTime": "2142-05-10T15:37:00-04:00"
+      },
+      "request": {
+        "method": "POST",
+        "url": "Observation"
+      }
+    },
+    {
+      "fullUrl": "urn:uuid:7d4a5e58-ad87-43d3-bc7c-b89df25c2308",
+      "resource": {
+        "id": "7461ac2f-33f1-508b-9eef-bf5c23dd9b8d",
+        "code": {
+          "coding": [
+            {
+              "code": "50912",
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-d-labitems",
+              "display": "Creatinine"
+            }
+          ]
+        },
+        "meta": {
+          "profile": [
+            "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-labevents"
+          ]
+        },
+        "issued": "2142-05-14T10:23:00-04:00",
+        "status": "final",
+        "subject": {
+          "reference": "urn:uuid:f6f0dd59-e75c-4562-9150-ea4a0b5321b3"
+        },
+        "category": [
+          {
+            "coding": [
+              {
+                "code": "laboratory",
+                "system": "http://terminology.hl7.org/CodeSystem/observation-category",
+                "display": "Laboratory"
+              }
+            ]
+          }
+        ],
+        "specimen": {
+          "reference": "Specimen/d936d365-0a43-52e0-b440-26db28d3bbf0"
+        },
+        "encounter": {
+          "reference": "Encounter/ef3c0803-f981-59f7-a022-0d1223377142"
+        },
+        "extension": [
+          {
+            "url": "http://mimic.mit.edu/fhir/mimic/StructureDefinition/lab-priority",
+            "valueString": "ROUTINE"
+          }
+        ],
+        "identifier": [
+          {
+            "value": "202713",
+            "system": "http://mimic.mit.edu/fhir/mimic/identifier/observation-labevents"
+          }
+        ],
+        "resourceType": "Observation",
+        "valueQuantity": {
+          "code": "mg/dL",
+          "unit": "mg/dL",
+          "value": 0.9,
+          "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+        },
+        "referenceRange": [
+          {
+            "low": {
+              "code": "mg/dL",
+              "unit": "mg/dL",
+              "value": 0.5,
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+            },
+            "high": {
+              "code": "mg/dL",
+              "unit": "mg/dL",
+              "value": 1.2,
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+            }
+          }
+        ],
+        "effectiveDateTime": "2142-05-14T08:30:00-04:00"
+      },
+      "request": {
+        "method": "POST",
+        "url": "Observation"
+      }
+    }
+  ]
+}
diff --git a/cookbook/data/mimic_demo_patients/low_risk_patient.json b/cookbook/data/mimic_demo_patients/low_risk_patient.json
new file mode 100644
index 00000000..5cb18e13
--- /dev/null
+++ b/cookbook/data/mimic_demo_patients/low_risk_patient.json
@@ -0,0 +1,407 @@
+{
+  "patient": {
+    "id": "5f3dcdb5-bd27-58f5-b990-859b6bcc2d73",
+    "meta": {
+      "profile": [
+        "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-patient"
+      ]
+    },
+    "name": [
+      {
+        "use": "official",
+        "family": "Patient_10038999"
+      }
+    ],
+    "gender": "male",
+    "birthDate": "2086-05-22",
+    "extension": [
+      {
+        "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-race",
+        "extension": [
+          {
+            "url": "ombCategory",
+            "valueCoding": {
+              "code": "2106-3",
+              "system": "urn:oid:2.16.840.1.113883.6.238",
+              "display": "White"
+            }
+          },
+          {
+            "url": "text",
+            "valueString": "White"
+          }
+        ]
+      },
+      {
+        "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity",
+        "extension": [
+          {
+            "url": "ombCategory",
+            "valueCoding": {
+              "code": "2186-5",
+              "system": "urn:oid:2.16.840.1.113883.6.238",
+              "display": "Not Hispanic or Latino"
+            }
+          },
+          {
+            "url": "text",
+            "valueString": "Not Hispanic or Latino"
+          }
+        ]
+      },
+      {
+        "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-birthsex",
+        "valueCode": "M"
+      }
+    ],
+    "identifier": [
+      {
+        "value": "10038999",
+        "system": "http://mimic.mit.edu/fhir/mimic/identifier/patient"
+      }
+    ],
+    "resourceType": "Patient",
+    "communication": [
+      {
+        "language": {
+          "coding": [
+            {
+              "code": "en",
+              "system": "urn:ietf:bcp:47"
+            }
+          ]
+        }
+      }
+    ],
+    "maritalStatus": {
+      "coding": [
+        {
+          "code": "S",
+          "system": "http://terminology.hl7.org/CodeSystem/v3-MaritalStatus"
+        }
+      ]
+    },
+    "managingOrganization": {
+      "reference": "Organization/ee172322-118b-5716-abbc-18e4c5437e15"
+    }
+  },
+  "heart_rate": {
+    "resourceType": "Bundle",
+    "type": "searchset",
+    "entry": [
+      {
+        "resource": {
+          "id": "ccbb50b3-c1e2-5a78-8f40-fda91f209773",
+          "code": {
+            "coding": [
+              {
+                "code": "220045",
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-chartevents-d-items",
+                "display": "Heart Rate"
+              }
+            ]
+          },
+          "meta": {
+            "profile": [
+              "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-chartevents"
+            ]
+          },
+          "issued": "2131-05-22T22:38:00-04:00",
+          "status": "final",
+          "subject": {
+            "reference": "Patient/5f3dcdb5-bd27-58f5-b990-859b6bcc2d73"
+          },
+          "category": [
+            {
+              "coding": [
+                {
+                  "code": "Routine Vital Signs",
+                  "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-observation-category"
+                }
+              ]
+            }
+          ],
+          "encounter": {
+            "reference": "Encounter/24339e36-0b8e-5f30-91bc-d4b7d9919c3c"
+          },
+          "resourceType": "Observation",
+          "valueQuantity": {
+            "code": "bpm",
+            "unit": "bpm",
+            "value": 110,
+            "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+          },
+          "effectiveDateTime": "2131-05-22T22:38:00-04:00"
+        }
+      }
+    ]
+  },
+  "temperature": {
+    "resourceType": "Bundle",
+    "type": "searchset",
+    "entry": [
+      {
+        "resource": {
+          "id": "3c53284d-5069-54bd-8496-3a739180babe",
+          "code": {
+            "coding": [
+              {
+                "code": "223761",
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-chartevents-d-items",
+                "display": "Temperature Fahrenheit"
+              }
+            ]
+          },
+          "meta": {
+            "profile": [
+              "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-chartevents"
+            ]
+          },
+          "issued": "2131-05-22T22:38:00-04:00",
+          "status": "final",
+          "subject": {
+            "reference": "Patient/5f3dcdb5-bd27-58f5-b990-859b6bcc2d73"
+          },
+          "category": [
+            {
+              "coding": [
+                {
+                  "code": "Routine Vital Signs",
+                  "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-observation-category"
+                }
+              ]
+            }
+          ],
+          "encounter": {
+            "reference": "Encounter/24339e36-0b8e-5f30-91bc-d4b7d9919c3c"
+          },
+          "resourceType": "Observation",
+          "valueQuantity": {
+            "code": "\u00b0F",
+            "unit": "\u00b0F",
+            "value": 98.8,
+            "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+          },
+          "effectiveDateTime": "2131-05-22T22:38:00-04:00"
+        }
+      }
+    ]
+  },
+  "respiratory_rate": {
+    "resourceType": "Bundle",
+    "type": "searchset",
+    "entry": [
+      {
+        "resource": {
+          "id": "9331ed77-b563-5abe-bf84-4ac7053b9fe9",
+          "code": {
+            "coding": [
+              {
+                "code": "220210",
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-chartevents-d-items",
+                "display": "Respiratory Rate"
+              }
+            ]
+          },
+          "meta": {
+            "profile": [
+              "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-chartevents"
+            ]
+          },
+          "issued": "2131-05-22T22:38:00-04:00",
+          "status": "final",
+          "subject": {
+            "reference": "Patient/5f3dcdb5-bd27-58f5-b990-859b6bcc2d73"
+          },
+          "category": [
+            {
+              "coding": [
+                {
+                  "code": "Respiratory",
+                  "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-observation-category"
+                }
+              ]
+            }
+          ],
+          "encounter": {
+            "reference": "Encounter/24339e36-0b8e-5f30-91bc-d4b7d9919c3c"
+          },
+          "resourceType": "Observation",
+          "valueQuantity": {
+            "code": "insp/min",
+            "unit": "insp/min",
+            "value": 20,
+            "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+          },
+          "effectiveDateTime": "2131-05-22T22:38:00-04:00"
+        }
+      }
+    ]
+  },
+  "wbc": {
+    "resourceType": "Bundle",
+    "type": "searchset",
+    "entry": [
+      {
+        "resource": {
+          "id": "0b9aaaa5-43c1-51a5-b1d2-a6156968513e",
+          "code": {
+            "coding": [
+              {
+                "code": "51301",
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-d-labitems",
+                "display": "White Blood Cells"
+              }
+            ]
+          },
+          "meta": {
+            "profile": [
+              "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-labevents"
+            ]
+          },
+          "issued": "2131-05-28T03:24:00-04:00",
+          "status": "final",
+          "subject": {
+            "reference": "Patient/5f3dcdb5-bd27-58f5-b990-859b6bcc2d73"
+          },
+          "category": [
+            {
+              "coding": [
+                {
+                  "code": "laboratory",
+                  "system": "http://terminology.hl7.org/CodeSystem/observation-category",
+                  "display": "Laboratory"
+                }
+              ]
+            }
+          ],
+          "specimen": {
+            "reference": "Specimen/6025339e-8821-54a8-99dc-ae56d8c705d1"
+          },
+          "encounter": {
+            "reference": "Encounter/7f95fc8e-1f36-54a6-96f9-798fd9c7e93b"
+          },
+          "extension": [
+            {
+              "url": "http://mimic.mit.edu/fhir/mimic/StructureDefinition/lab-priority",
+              "valueString": "STAT"
+            }
+          ],
+          "identifier": [
+            {
+              "value": "455202",
+              "system": "http://mimic.mit.edu/fhir/mimic/identifier/observation-labevents"
+            }
+          ],
+          "resourceType": "Observation",
+          "valueQuantity": {
+            "code": "K/uL",
+            "unit": "K/uL",
+            "value": 8.6,
+            "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+          },
+          "referenceRange": [
+            {
+              "low": {
+                "code": "K/uL",
+                "unit": "K/uL",
+                "value": 4,
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+              },
+              "high": {
+                "code": "K/uL",
+                "unit": "K/uL",
+                "value": 10,
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+              }
+            }
+          ],
+          "effectiveDateTime": "2131-05-28T02:56:00-04:00"
+        }
+      }
+    ]
+  },
+  "creatinine": {
+    "resourceType": "Bundle",
+    "type": "searchset",
+    "entry": [
+      {
+        "resource": {
+          "id": "e9ea65d0-b198-58c1-bcbe-e436150d6e4d",
+          "code": {
+            "coding": [
+              {
+                "code": "50912",
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-d-labitems",
+                "display": "Creatinine"
+              }
+            ]
+          },
+          "meta": {
+            "profile": [
+              "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-labevents"
+            ]
+          },
+          "issued": "2131-05-28T03:37:00-04:00",
+          "status": "final",
+          "subject": {
+            "reference": "Patient/5f3dcdb5-bd27-58f5-b990-859b6bcc2d73"
+          },
+          "category": [
+            {
+              "coding": [
+                {
+                  "code": "laboratory",
+                  "system": "http://terminology.hl7.org/CodeSystem/observation-category",
+                  "display": "Laboratory"
+                }
+              ]
+            }
+          ],
+          "specimen": {
+            "reference": "Specimen/00d5feac-4fe1-5fdb-ac62-01bff201f55c"
+          },
+          "encounter": {
+            "reference": "Encounter/7f95fc8e-1f36-54a6-96f9-798fd9c7e93b"
+          },
+          "extension": [
+            {
+              "url": "http://mimic.mit.edu/fhir/mimic/StructureDefinition/lab-priority",
+              "valueString": "STAT"
+            }
+          ],
+          "identifier": [
+            {
+              "value": "455210",
+              "system": "http://mimic.mit.edu/fhir/mimic/identifier/observation-labevents"
+            }
+          ],
+          "resourceType": "Observation",
+          "valueQuantity": {
+            "code": "mg/dL",
+            "unit": "mg/dL",
+            "value": 0.8,
+            "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+          },
+          "referenceRange": [
+            {
+              "low": {
+                "code": "mg/dL",
+                "unit": "mg/dL",
+                "value": 0.5,
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+              },
+              "high": {
+                "code": "mg/dL",
+                "unit": "mg/dL",
+                "value": 1.2,
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+              }
+            }
+          ],
+          "effectiveDateTime": "2131-05-28T02:56:00-04:00"
+        }
+      }
+    ]
+  }
+}
diff --git a/cookbook/data/mimic_demo_patients/moderate_risk_bundle.json b/cookbook/data/mimic_demo_patients/moderate_risk_bundle.json
new file mode 100644
index 00000000..869c0676
--- /dev/null
+++ b/cookbook/data/mimic_demo_patients/moderate_risk_bundle.json
@@ -0,0 +1,413 @@
+{
+  "resourceType": "Bundle",
+  "type": "transaction",
+  "entry": [
+    {
+      "fullUrl": "urn:uuid:a6f47e5c-2ee6-4a71-ae52-6cdb2edd5122",
+      "resource": {
+        "id": "72d56b49-a7ee-5b9a-a679-25d1c836d3c3",
+        "meta": {
+          "profile": [
+            "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-patient"
+          ]
+        },
+        "name": [
+          {
+            "use": "official",
+            "family": "Patient_10018845"
+          }
+        ],
+        "gender": "male",
+        "birthDate": "2093-10-07",
+        "extension": [
+          {
+            "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-race",
+            "extension": [
+              {
+                "url": "ombCategory",
+                "valueCoding": {
+                  "code": "2106-3",
+                  "system": "urn:oid:2.16.840.1.113883.6.238",
+                  "display": "White"
+                }
+              },
+              {
+                "url": "text",
+                "valueString": "White"
+              }
+            ]
+          },
+          {
+            "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity",
+            "extension": [
+              {
+                "url": "ombCategory",
+                "valueCoding": {
+                  "code": "2186-5",
+                  "system": "urn:oid:2.16.840.1.113883.6.238",
+                  "display": "Not Hispanic or Latino"
+                }
+              },
+              {
+                "url": "text",
+                "valueString": "Not Hispanic or Latino"
+              }
+            ]
+          },
+          {
+            "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-birthsex",
+            "valueCode": "M"
+          }
+        ],
+        "identifier": [
+          {
+            "value": "10018845",
+            "system": "http://mimic.mit.edu/fhir/mimic/identifier/patient"
+          }
+        ],
+        "resourceType": "Patient",
+        "communication": [
+          {
+            "language": {
+              "coding": [
+                {
+                  "code": "en",
+                  "system": "urn:ietf:bcp:47"
+                }
+              ]
+            }
+          }
+        ],
+        "maritalStatus": {
+          "coding": [
+            {
+              "code": "M",
+              "system": "http://terminology.hl7.org/CodeSystem/v3-MaritalStatus"
+            }
+          ]
+        },
+        "deceasedDateTime": "2184-11-22",
+        "managingOrganization": {
+          "reference": "Organization/ee172322-118b-5716-abbc-18e4c5437e15"
+        }
+      },
+      "request": {
+        "method": "POST",
+        "url": "Patient"
+      }
+    },
+    {
+      "fullUrl": "urn:uuid:7b9f0473-b12c-49ea-9cec-0af73dccb83c",
+      "resource": {
+        "id": "ff7c1328-fe32-5574-b842-144ba3ac8fb0",
+        "code": {
+          "coding": [
+            {
+              "code": "220045",
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-chartevents-d-items",
+              "display": "Heart Rate"
+            }
+          ]
+        },
+        "meta": {
+          "profile": [
+            "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-chartevents"
+          ]
+        },
+        "issued": "2184-10-08T04:31:00-04:00",
+        "status": "final",
+        "subject": {
+          "reference": "urn:uuid:a6f47e5c-2ee6-4a71-ae52-6cdb2edd5122"
+        },
+        "category": [
+          {
+            "coding": [
+              {
+                "code": "Routine Vital Signs",
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-observation-category"
+              }
+            ]
+          }
+        ],
+        "encounter": {
+          "reference": "Encounter/625b0a9e-a378-5e68-b8d6-10c655f7579d"
+        },
+        "resourceType": "Observation",
+        "valueQuantity": {
+          "code": "bpm",
+          "unit": "bpm",
+          "value": 58,
+          "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+        },
+        "effectiveDateTime": "2184-10-08T04:31:00-04:00"
+      },
+      "request": {
+        "method": "POST",
+        "url": "Observation"
+      }
+    },
+    {
+      "fullUrl": "urn:uuid:9eebab6a-909d-424a-baa9-0a155a54f13a",
+      "resource": {
+        "id": "4664e5eb-efaa-5062-a594-f20c0b10d901",
+        "code": {
+          "coding": [
+            {
+              "code": "223761",
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-chartevents-d-items",
+              "display": "Temperature Fahrenheit"
+            }
+          ]
+        },
+        "meta": {
+          "profile": [
+            "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-chartevents"
+          ]
+        },
+        "issued": "2184-10-08T04:31:00-04:00",
+        "status": "final",
+        "subject": {
+          "reference": "urn:uuid:a6f47e5c-2ee6-4a71-ae52-6cdb2edd5122"
+        },
+        "category": [
+          {
+            "coding": [
+              {
+                "code": "Routine Vital Signs",
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-observation-category"
+              }
+            ]
+          }
+        ],
+        "encounter": {
+          "reference": "Encounter/625b0a9e-a378-5e68-b8d6-10c655f7579d"
+        },
+        "resourceType": "Observation",
+        "valueQuantity": {
+          "code": "\u00b0F",
+          "unit": "\u00b0F",
+          "value": 98,
+          "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+        },
+        "effectiveDateTime": "2184-10-08T04:31:00-04:00"
+      },
+      "request": {
+        "method": "POST",
+        "url": "Observation"
+      }
+    },
+    {
+      "fullUrl": "urn:uuid:f1358348-72a0-411b-9413-d9261ae6b92a",
+      "resource": {
+        "id": "1ac7341b-9efc-5101-a4bf-b5fa5de755dd",
+        "code": {
+          "coding": [
+            {
+              "code": "220210",
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-chartevents-d-items",
+              "display": "Respiratory Rate"
+            }
+          ]
+        },
+        "meta": {
+          "profile": [
+            "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-chartevents"
+          ]
+        },
+        "issued": "2184-10-08T04:31:00-04:00",
+        "status": "final",
+        "subject": {
+          "reference": "urn:uuid:a6f47e5c-2ee6-4a71-ae52-6cdb2edd5122"
+        },
+        "category": [
+          {
+            "coding": [
+              {
+                "code": "Respiratory",
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-observation-category"
+              }
+            ]
+          }
+        ],
+        "encounter": {
+          "reference": "Encounter/625b0a9e-a378-5e68-b8d6-10c655f7579d"
+        },
+        "resourceType": "Observation",
+        "valueQuantity": {
+          "code": "insp/min",
+          "unit": "insp/min",
+          "value": 13,
+          "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+        },
+        "effectiveDateTime": "2184-10-08T04:31:00-04:00"
+      },
+      "request": {
+        "method": "POST",
+        "url": "Observation"
+      }
+    },
+    {
+      "fullUrl": "urn:uuid:b868d1ac-4537-43a1-aafa-3438e8c6a28d",
+      "resource": {
+        "id": "41599527-6e37-53ce-b710-8d1d071d28eb",
+        "code": {
+          "coding": [
+            {
+              "code": "51301",
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-d-labitems",
+              "display": "White Blood Cells"
+            }
+          ]
+        },
+        "meta": {
+          "profile": [
+            "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-labevents"
+          ]
+        },
+        "issued": "2184-10-08T02:08:00-04:00",
+        "status": "final",
+        "subject": {
+          "reference": "urn:uuid:a6f47e5c-2ee6-4a71-ae52-6cdb2edd5122"
+        },
+        "category": [
+          {
+            "coding": [
+              {
+                "code": "laboratory",
+                "system": "http://terminology.hl7.org/CodeSystem/observation-category",
+                "display": "Laboratory"
+              }
+            ]
+          }
+        ],
+        "specimen": {
+          "reference": "Specimen/f3e2cbca-c799-5acd-85f1-4c3fd56f7dd9"
+        },
+        "extension": [
+          {
+            "url": "http://mimic.mit.edu/fhir/mimic/StructureDefinition/lab-priority",
+            "valueString": "STAT"
+          }
+        ],
+        "identifier": [
+          {
+            "value": "222110",
+            "system": "http://mimic.mit.edu/fhir/mimic/identifier/observation-labevents"
+          }
+        ],
+        "resourceType": "Observation",
+        "valueQuantity": {
+          "code": "K/uL",
+          "unit": "K/uL",
+          "value": 5.9,
+          "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+        },
+        "referenceRange": [
+          {
+            "low": {
+              "code": "K/uL",
+              "unit": "K/uL",
+              "value": 4,
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+            },
+            "high": {
+              "code": "K/uL",
+              "unit": "K/uL",
+              "value": 11,
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+            }
+          }
+        ],
+        "effectiveDateTime": "2184-10-08T00:50:00-04:00"
+      },
+      "request": {
+        "method": "POST",
+        "url": "Observation"
+      }
+    },
+    {
+      "fullUrl": "urn:uuid:8375eaf0-8196-485b-84a2-7a200610d2e4",
+      "resource": {
+        "id": "164745e6-16e5-5ded-95c2-3094a9cc0ac6",
+        "code": {
+          "coding": [
+            {
+              "code": "50912",
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-d-labitems",
+              "display": "Creatinine"
+            }
+          ]
+        },
+        "meta": {
+          "profile": [
+            "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-labevents"
+          ]
+        },
+        "note": [
+          {
+            "text": "VERIFIED - CONSISTENT WITH OTHER DATA."
+          }
+        ],
+        "issued": "2184-10-08T02:09:00-04:00",
+        "status": "final",
+        "subject": {
+          "reference": "urn:uuid:a6f47e5c-2ee6-4a71-ae52-6cdb2edd5122"
+        },
+        "category": [
+          {
+            "coding": [
+              {
+                "code": "laboratory",
+                "system": "http://terminology.hl7.org/CodeSystem/observation-category",
+                "display": "Laboratory"
+              }
+            ]
+          }
+        ],
+        "specimen": {
+          "reference": "Specimen/7991a26f-45ad-5d40-b4b9-2a17467b13c9"
+        },
+        "extension": [
+          {
+            "url": "http://mimic.mit.edu/fhir/mimic/StructureDefinition/lab-priority",
+            "valueString": "STAT"
+          }
+        ],
+        "identifier": [
+          {
+            "value": "222087",
+            "system": "http://mimic.mit.edu/fhir/mimic/identifier/observation-labevents"
+          }
+        ],
+        "resourceType": "Observation",
+        "valueQuantity": {
+          "code": "mg/dL",
+          "unit": "mg/dL",
+          "value": 5,
+          "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+        },
+        "referenceRange": [
+          {
+            "low": {
+              "code": "mg/dL",
+              "unit": "mg/dL",
+              "value": 0.5,
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+            },
+            "high": {
+              "code": "mg/dL",
+              "unit": "mg/dL",
+              "value": 1.2,
+              "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+            }
+          }
+        ],
+        "effectiveDateTime": "2184-10-08T00:50:00-04:00"
+      },
+      "request": {
+        "method": "POST",
+        "url": "Observation"
+      }
+    }
+  ]
+}
diff --git a/cookbook/data/mimic_demo_patients/moderate_risk_patient.json b/cookbook/data/mimic_demo_patients/moderate_risk_patient.json
new file mode 100644
index 00000000..c8aaf1b3
--- /dev/null
+++ b/cookbook/data/mimic_demo_patients/moderate_risk_patient.json
@@ -0,0 +1,408 @@
+{
+  "patient": {
+    "id": "22a3e422-663a-561c-b305-a0c04bf42235",
+    "meta": {
+      "profile": [
+        "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-patient"
+      ]
+    },
+    "name": [
+      {
+        "use": "official",
+        "family": "Patient_10021666"
+      }
+    ],
+    "gender": "male",
+    "birthDate": "2085-03-12",
+    "extension": [
+      {
+        "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-race",
+        "extension": [
+          {
+            "url": "ombCategory",
+            "valueCoding": {
+              "code": "2106-3",
+              "system": "urn:oid:2.16.840.1.113883.6.238",
+              "display": "White"
+            }
+          },
+          {
+            "url": "text",
+            "valueString": "White"
+          }
+        ]
+      },
+      {
+        "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity",
+        "extension": [
+          {
+            "url": "ombCategory",
+            "valueCoding": {
+              "code": "2186-5",
+              "system": "urn:oid:2.16.840.1.113883.6.238",
+              "display": "Not Hispanic or Latino"
+            }
+          },
+          {
+            "url": "text",
+            "valueString": "Not Hispanic or Latino"
+          }
+        ]
+      },
+      {
+        "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-birthsex",
+        "valueCode": "M"
+      }
+    ],
+    "identifier": [
+      {
+        "value": "10021666",
+        "system": "http://mimic.mit.edu/fhir/mimic/identifier/patient"
+      }
+    ],
+    "resourceType": "Patient",
+    "communication": [
+      {
+        "language": {
+          "coding": [
+            {
+              "code": "en",
+              "system": "urn:ietf:bcp:47"
+            }
+          ]
+        }
+      }
+    ],
+    "maritalStatus": {
+      "coding": [
+        {
+          "code": "M",
+          "system": "http://terminology.hl7.org/CodeSystem/v3-MaritalStatus"
+        }
+      ]
+    },
+    "deceasedDateTime": "2172-04-19",
+    "managingOrganization": {
+      "reference": "Organization/ee172322-118b-5716-abbc-18e4c5437e15"
+    }
+  },
+  "heart_rate": {
+    "resourceType": "Bundle",
+    "type": "searchset",
+    "entry": [
+      {
+        "resource": {
+          "id": "01093aef-0cf5-5af0-b5c1-92ca3d7deaf2",
+          "code": {
+            "coding": [
+              {
+                "code": "220045",
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-chartevents-d-items",
+                "display": "Heart Rate"
+              }
+            ]
+          },
+          "meta": {
+            "profile": [
+              "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-chartevents"
+            ]
+          },
+          "issued": "2172-03-13T02:02:00-04:00",
+          "status": "final",
+          "subject": {
+            "reference": "Patient/22a3e422-663a-561c-b305-a0c04bf42235"
+          },
+          "category": [
+            {
+              "coding": [
+                {
+                  "code": "Routine Vital Signs",
+                  "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-observation-category"
+                }
+              ]
+            }
+          ],
+          "encounter": {
+            "reference": "Encounter/ffce7398-83de-5c56-833d-dfcb02d1abac"
+          },
+          "resourceType": "Observation",
+          "valueQuantity": {
+            "code": "bpm",
+            "unit": "bpm",
+            "value": 70,
+            "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+          },
+          "effectiveDateTime": "2172-03-13T01:56:00-04:00"
+        }
+      }
+    ]
+  },
+  "temperature": {
+    "resourceType": "Bundle",
+    "type": "searchset",
+    "entry": [
+      {
+        "resource": {
+          "id": "476c79e7-cdba-5f17-8bee-f0f5bcbaa845",
+          "code": {
+            "coding": [
+              {
+                "code": "223761",
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-chartevents-d-items",
+                "display": "Temperature Fahrenheit"
+              }
+            ]
+          },
+          "meta": {
+            "profile": [
+              "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-chartevents"
+            ]
+          },
+          "issued": "2172-03-13T02:02:00-04:00",
+          "status": "final",
+          "subject": {
+            "reference": "Patient/22a3e422-663a-561c-b305-a0c04bf42235"
+          },
+          "category": [
+            {
+              "coding": [
+                {
+                  "code": "Routine Vital Signs",
+                  "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-observation-category"
+                }
+              ]
+            }
+          ],
+          "encounter": {
+            "reference": "Encounter/ffce7398-83de-5c56-833d-dfcb02d1abac"
+          },
+          "resourceType": "Observation",
+          "valueQuantity": {
+            "code": "\u00b0F",
+            "unit": "\u00b0F",
+            "value": 99.4,
+            "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+          },
+          "effectiveDateTime": "2172-03-13T02:01:00-04:00"
+        }
+      }
+    ]
+  },
+  "respiratory_rate": {
+    "resourceType": "Bundle",
+    "type": "searchset",
+    "entry": [
+      {
+        "resource": {
+          "id": "e22290e7-e08c-5e0d-9929-eba8ad24c97a",
+          "code": {
+            "coding": [
+              {
+                "code": "220210",
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-chartevents-d-items",
+                "display": "Respiratory Rate"
+              }
+            ]
+          },
+          "meta": {
+            "profile": [
+              "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-chartevents"
+            ]
+          },
+          "issued": "2172-03-13T02:02:00-04:00",
+          "status": "final",
+          "subject": {
+            "reference": "Patient/22a3e422-663a-561c-b305-a0c04bf42235"
+          },
+          "category": [
+            {
+              "coding": [
+                {
+                  "code": "Respiratory",
+                  "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-observation-category"
+                }
+              ]
+            }
+          ],
+          "encounter": {
+            "reference": "Encounter/ffce7398-83de-5c56-833d-dfcb02d1abac"
+          },
+          "resourceType": "Observation",
+          "valueQuantity": {
+            "code": "insp/min",
+            "unit": "insp/min",
+            "value": 14,
+            "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+          },
+          "effectiveDateTime": "2172-03-13T01:56:00-04:00"
+        }
+      }
+    ]
+  },
+  "wbc": {
+    "resourceType": "Bundle",
+    "type": "searchset",
+    "entry": [
+      {
+        "resource": {
+          "id": "b78e2882-469d-566f-bcfe-f47388cb72f0",
+          "code": {
+            "coding": [
+              {
+                "code": "51301",
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-d-labitems",
+                "display": "White Blood Cells"
+              }
+            ]
+          },
+          "meta": {
+            "profile": [
+              "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-labevents"
+            ]
+          },
+          "issued": "2172-03-15T13:18:00-04:00",
+          "status": "final",
+          "subject": {
+            "reference": "Patient/22a3e422-663a-561c-b305-a0c04bf42235"
+          },
+          "category": [
+            {
+              "coding": [
+                {
+                  "code": "laboratory",
+                  "system": "http://terminology.hl7.org/CodeSystem/observation-category",
+                  "display": "Laboratory"
+                }
+              ]
+            }
+          ],
+          "specimen": {
+            "reference": "Specimen/3d1deb9c-3aa2-5bd7-a3c8-3f1766530dc2"
+          },
+          "encounter": {
+            "reference": "Encounter/f96dcfb3-1c84-5040-b9b9-c227d21a21a1"
+          },
+          "extension": [
+            {
+              "url": "http://mimic.mit.edu/fhir/mimic/StructureDefinition/lab-priority",
+              "valueString": "ROUTINE"
+            }
+          ],
+          "identifier": [
+            {
+              "value": "257293",
+              "system": "http://mimic.mit.edu/fhir/mimic/identifier/observation-labevents"
+            }
+          ],
+          "resourceType": "Observation",
+          "valueQuantity": {
+            "code": "K/uL",
+            "unit": "K/uL",
+            "value": 10.5,
+            "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+          },
+          "referenceRange": [
+            {
+              "low": {
+                "code": "K/uL",
+                "unit": "K/uL",
+                "value": 4,
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+              },
+              "high": {
+                "code": "K/uL",
+                "unit": "K/uL",
+                "value": 11,
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+              }
+            }
+          ],
+          "effectiveDateTime": "2172-03-15T11:56:00-04:00"
+        }
+      }
+    ]
+  },
+  "creatinine": {
+    "resourceType": "Bundle",
+    "type": "searchset",
+    "entry": [
+      {
+        "resource": {
+          "id": "ab2d4a21-fd85-5263-b909-4d92d0c50dac",
+          "code": {
+            "coding": [
+              {
+                "code": "50912",
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-d-labitems",
+                "display": "Creatinine"
+              }
+            ]
+          },
+          "meta": {
+            "profile": [
+              "http://mimic.mit.edu/fhir/mimic/StructureDefinition/mimic-observation-labevents"
+            ]
+          },
+          "issued": "2172-03-15T14:02:00-04:00",
+          "status": "final",
+          "subject": {
+            "reference": "Patient/22a3e422-663a-561c-b305-a0c04bf42235"
+          },
+          "category": [
+            {
+              "coding": [
+                {
+                  "code": "laboratory",
+                  "system": "http://terminology.hl7.org/CodeSystem/observation-category",
+                  "display": "Laboratory"
+                }
+              ]
+            }
+          ],
+          "specimen": {
+            "reference": "Specimen/99911b05-1540-5236-9689-e9594cc8aeed"
+          },
+          "encounter": {
+            "reference": "Encounter/f96dcfb3-1c84-5040-b9b9-c227d21a21a1"
+          },
+          "extension": [
+            {
+              "url": "http://mimic.mit.edu/fhir/mimic/StructureDefinition/lab-priority",
+              "valueString": "ROUTINE"
+            }
+          ],
+          "identifier": [
+            {
+              "value": "257298",
+              "system": "http://mimic.mit.edu/fhir/mimic/identifier/observation-labevents"
+            }
+          ],
+          "resourceType": "Observation",
+          "valueQuantity": {
+            "code": "mg/dL",
+            "unit": "mg/dL",
+            "value": 1.2,
+            "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+          },
+          "referenceRange": [
+            {
+              "low": {
+                "code": "mg/dL",
+                "unit": "mg/dL",
+                "value": 0.5,
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+              },
+              "high": {
+                "code": "mg/dL",
+                "unit": "mg/dL",
+                "value": 1.2,
+                "system": "http://mimic.mit.edu/fhir/mimic/CodeSystem/mimic-units"
+              }
+            }
+          ],
+          "effectiveDateTime": "2172-03-15T11:56:00-04:00"
+        }
+      }
+    ]
+  }
+}

From a28d85627c4882ad88c7b18097031d15d68fba67 Mon Sep 17 00:00:00 2001
From: jenniferjiangkells <jenniferjiangkells@gmail.com>
Date: Tue, 2 Dec 2025 11:49:31 +0000
Subject: [PATCH 04/12] to_risk_assessment reads preds from metadata

---
 docs/reference/io/containers/dataset.md |  8 +++-----
 healthchain/io/containers/dataset.py    | 23 +++++++++++++++--------
 tests/io/test_dataset.py                | 21 +++++++++------------
 3 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/docs/reference/io/containers/dataset.md b/docs/reference/io/containers/dataset.md
index 731e5310..af2bd421 100644
--- a/docs/reference/io/containers/dataset.md
+++ b/docs/reference/io/containers/dataset.md
@@ -27,14 +27,12 @@ print("Columns:", dataset.columns)
 validation_result = dataset.validate(schema="path/to/schema.yaml")
 print("Validation Result:", validation_result)
 
-# 4. Run inference using your ML model
-predictions = model.predict(dataset.data)
-probabilities = model.predict_proba(dataset.data)[:, 1]
+# 4. Run inference using your ML model and store in metadata
+dataset.metadata["predictions"] = model.predict(dataset.data)
+dataset.metadata["probabilities"] = model.predict_proba(dataset.data)[:, 1]
 
 # 5. Convert predictions to FHIR RiskAssessment resources for downstream use
 risk_assessments = dataset.to_risk_assessment(
-    predictions=predictions,
-    probabilities=probabilities,
     outcome_code="A41.9",
     outcome_display="Sepsis, unspecified",
     model_name="SepsisRiskModel",
diff --git a/healthchain/io/containers/dataset.py b/healthchain/io/containers/dataset.py
index 99f7966f..20a4e36c 100644
--- a/healthchain/io/containers/dataset.py
+++ b/healthchain/io/containers/dataset.py
@@ -200,8 +200,6 @@ def _dtypes_compatible(self, actual: str, expected: str) -> bool:
 
     def to_risk_assessment(
         self,
-        predictions: np.ndarray,
-        probabilities: np.ndarray,
         outcome_code: str,
         outcome_display: str,
         outcome_system: str = "http://hl7.org/fhir/sid/icd-10",
@@ -209,6 +207,8 @@ def to_risk_assessment(
         model_version: Optional[str] = None,
         high_threshold: float = 0.7,
         moderate_threshold: float = 0.4,
+        predictions: Optional[np.ndarray] = None,
+        probabilities: Optional[np.ndarray] = None,
     ) -> List[RiskAssessment]:
         """Convert model predictions to FHIR RiskAssessment resources.
 
@@ -216,8 +216,6 @@ def to_risk_assessment(
         including in FHIR Bundles or sending to FHIR servers.
 
         Args:
-            predictions: Binary predictions array (0/1)
-            probabilities: Probability scores array (0-1)
             outcome_code: Code for the predicted outcome (e.g., "A41.9" for sepsis)
             outcome_display: Display text for the outcome (e.g., "Sepsis")
             outcome_system: Code system for the outcome (default: ICD-10)
@@ -225,22 +223,31 @@ def to_risk_assessment(
             model_version: Version of the ML model (optional)
             high_threshold: Threshold for high risk (default: 0.7)
             moderate_threshold: Threshold for moderate risk (default: 0.4)
+            predictions: Binary predictions array (0/1). Defaults to metadata["predictions"]
+            probabilities: Probability scores array (0-1). Defaults to metadata["probabilities"]
 
         Returns:
             List of RiskAssessment resources, one per patient
 
         Example:
-            >>> predictions = np.array([0, 1, 0])
-            >>> probabilities = np.array([0.15, 0.85, 0.32])
             >>> risk_assessments = dataset.to_risk_assessment(
-            ...     predictions,
-            ...     probabilities,
             ...     outcome_code="A41.9",
             ...     outcome_display="Sepsis, unspecified",
             ...     model_name="RandomForest",
             ...     model_version="1.0"
             ... )
         """
+        # Fall back to metadata if not provided
+        if predictions is None:
+            predictions = self.metadata.get("predictions")
+        if probabilities is None:
+            probabilities = self.metadata.get("probabilities")
+
+        if predictions is None or probabilities is None:
+            raise ValueError(
+                "predictions and probabilities must be provided or available in metadata"
+            )
+
         if len(predictions) != len(self.data):
             raise ValueError(
                 f"Predictions length ({len(predictions)}) must match "
diff --git a/tests/io/test_dataset.py b/tests/io/test_dataset.py
index be2e25f1..272f26b2 100644
--- a/tests/io/test_dataset.py
+++ b/tests/io/test_dataset.py
@@ -128,9 +128,9 @@ def test_dataset_to_risk_assessment_creates_resources_with_metadata(sample_datas
     probabilities = np.array([0.15, 0.85])
 
     # Test with model metadata
+    sample_dataset.metadata["predictions"] = predictions
+    sample_dataset.metadata["probabilities"] = probabilities
     risks = sample_dataset.to_risk_assessment(
-        predictions,
-        probabilities,
         outcome_code="A41.9",
         outcome_display="Sepsis",
         model_name="RandomForest",
@@ -183,10 +183,10 @@ def test_dataset_to_risk_assessment_categorizes_risk_levels(
         }
     )
     dataset = Dataset(data)
+    dataset.metadata["predictions"] = np.array(predictions)
+    dataset.metadata["probabilities"] = np.array(probabilities)
 
     risks = dataset.to_risk_assessment(
-        np.array(predictions),
-        np.array(probabilities),
         outcome_code="A41.9",
         outcome_display="Sepsis",
     )
@@ -224,11 +224,11 @@ def test_dataset_to_risk_assessment_validation_errors(
     """Dataset.to_risk_assessment validates required columns and array lengths."""
     data = pd.DataFrame(data_dict)
     dataset = Dataset(data)
+    dataset.metadata["predictions"] = np.array(predictions)
+    dataset.metadata["probabilities"] = np.array(probabilities)
 
     with pytest.raises(ValueError, match=expected_error):
         dataset.to_risk_assessment(
-            np.array(predictions),
-            np.array(probabilities),
             outcome_code="A41.9",
             outcome_display="Sepsis",
         )
@@ -284,11 +284,8 @@ def test_dataset_to_risk_assessment_validates_probability_length():
     """Dataset.to_risk_assessment validates probabilities array length."""
     data = pd.DataFrame({"patient_ref": ["Patient/1", "Patient/2"], "value": [1, 2]})
     dataset = Dataset(data)
-
-    predictions = np.array([0, 1])
-    probabilities = np.array([0.15])  # Wrong length
+    dataset.metadata["predictions"] = np.array([0, 1])
+    dataset.metadata["probabilities"] = np.array([0.15])  # Wrong length
 
     with pytest.raises(ValueError, match="Probabilities length .* must match"):
-        dataset.to_risk_assessment(
-            predictions, probabilities, outcome_code="A41.9", outcome_display="Sepsis"
-        )
+        dataset.to_risk_assessment(outcome_code="A41.9", outcome_display="Sepsis")

From dba972cce4f56dde56dd40ad7aa36ac5bb0f5434 Mon Sep 17 00:00:00 2001
From: jenniferjiangkells <jenniferjiangkells@gmail.com>
Date: Tue, 2 Dec 2025 11:49:50 +0000
Subject: [PATCH 05/12] Add prefetch to bundle helper

---
 healthchain/fhir/__init__.py  |  2 ++
 healthchain/fhir/readers.py   | 28 ++++++++++++++++++++++++
 tests/fhir/test_converters.py | 40 +++++++++++++++++++++++++++++++++++
 3 files changed, 70 insertions(+)

diff --git a/healthchain/fhir/__init__.py b/healthchain/fhir/__init__.py
index 9193ccd4..b33116d2 100644
--- a/healthchain/fhir/__init__.py
+++ b/healthchain/fhir/__init__.py
@@ -23,6 +23,7 @@
 from healthchain.fhir.readers import (
     create_resource_from_dict,
     convert_prefetch_to_fhir_objects,
+    prefetch_to_bundle,
     read_content_attachment,
 )
 
@@ -74,6 +75,7 @@
     # Conversions and readers
     "create_resource_from_dict",
     "convert_prefetch_to_fhir_objects",
+    "prefetch_to_bundle",
     "read_content_attachment",
     # Bundle operations
     "create_bundle",
diff --git a/healthchain/fhir/readers.py b/healthchain/fhir/readers.py
index 7d7bbd06..d55fe7be 100644
--- a/healthchain/fhir/readers.py
+++ b/healthchain/fhir/readers.py
@@ -37,6 +37,34 @@ def create_resource_from_dict(
         return None
 
 
+def prefetch_to_bundle(prefetch: Dict[str, Any]) -> Dict[str, Any]:
+    """Flatten CDS Hooks prefetch into a collection Bundle dict.
+
+    Converts the keyed prefetch format (used in CDS Hooks) into a flat bundle
+    suitable for Dataset.from_fhir_bundle().
+
+    Args:
+        prefetch: CDS Hooks prefetch dict with format:
+            {"patient": {...}, "observations": {"entry": [...]}, ...}
+
+    Returns:
+        Bundle dict with type "collection" and flattened entries
+
+    Example:
+        >>> prefetch = request.prefetch
+        >>> bundle = prefetch_to_bundle(prefetch)
+        >>> dataset = Dataset.from_fhir_bundle(bundle, schema=schema)
+    """
+    entries = []
+    for key, value in prefetch.items():
+        if isinstance(value, dict):
+            if "entry" in value:  # Searchset bundle
+                entries.extend(value["entry"])
+            elif "resourceType" in value:  # Single resource
+                entries.append({"resource": value})
+    return {"type": "collection", "entry": entries}
+
+
 def convert_prefetch_to_fhir_objects(
     prefetch_dict: Dict[str, Any],
 ) -> Dict[str, Resource]:
diff --git a/tests/fhir/test_converters.py b/tests/fhir/test_converters.py
index 40e06725..aa16c20a 100644
--- a/tests/fhir/test_converters.py
+++ b/tests/fhir/test_converters.py
@@ -23,6 +23,7 @@
     create_value_quantity_observation,
     create_condition,
     create_medication_statement,
+    prefetch_to_bundle,
 )
 
 
@@ -523,3 +524,42 @@ def test_bundle_to_dataframe_skips_unsupported_resources_gracefully():
     # Should not raise error, just skip unsupported types
     df = bundle_to_dataframe(bundle, config=config)
     assert len(df) == 1
+
+
+def test_prefetch_to_bundle_flattens_cds_prefetch():
+    """prefetch_to_bundle converts CDS Hooks prefetch to collection bundle."""
+    prefetch = {
+        "patient": {"resourceType": "Patient", "id": "123", "gender": "male"},
+        "heart_rate": {
+            "resourceType": "Bundle",
+            "type": "searchset",
+            "entry": [
+                {
+                    "resource": {
+                        "resourceType": "Observation",
+                        "code": {"coding": [{"code": "8867-4"}]},
+                        "valueQuantity": {"value": 85.0},
+                    }
+                }
+            ],
+        },
+    }
+
+    bundle = prefetch_to_bundle(prefetch)
+
+    assert bundle["type"] == "collection"
+    assert len(bundle["entry"]) == 2
+    # Patient should be wrapped in resource
+    patient_entry = next(
+        e
+        for e in bundle["entry"]
+        if e.get("resource", {}).get("resourceType") == "Patient"
+    )
+    assert patient_entry["resource"]["id"] == "123"
+
+
+def test_prefetch_to_bundle_handles_empty_prefetch():
+    """prefetch_to_bundle handles empty prefetch gracefully."""
+    bundle = prefetch_to_bundle({})
+    assert bundle["type"] == "collection"
+    assert bundle["entry"] == []

From 020dd050db22fe3326c2747c55afbb684f716a44 Mon Sep 17 00:00:00 2001
From: jenniferjiangkells <jenniferjiangkells@gmail.com>
Date: Tue, 2 Dec 2025 11:50:26 +0000
Subject: [PATCH 06/12] Cleanup cookbooks

---
 cookbook/sepsis_cds_hooks.py  | 16 +++------
 cookbook/sepsis_fhir_batch.py | 64 ++++++++++++++++-------------------
 2 files changed, 35 insertions(+), 45 deletions(-)

diff --git a/cookbook/sepsis_cds_hooks.py b/cookbook/sepsis_cds_hooks.py
index d80249af..ff70be67 100644
--- a/cookbook/sepsis_cds_hooks.py
+++ b/cookbook/sepsis_cds_hooks.py
@@ -21,6 +21,7 @@
 from dotenv import load_dotenv
 
 from healthchain.gateway import HealthChainAPI, CDSHooksService
+from healthchain.fhir import prefetch_to_bundle
 from healthchain.io import Dataset
 from healthchain.models import CDSRequest, CDSResponse
 from healthchain.models.responses.cdsresponse import Card
@@ -72,16 +73,8 @@ def sepsis_alert(request: CDSRequest) -> CDSResponse:
         if not prefetch:
             return CDSResponse(cards=[])
 
-        # Merge keyed prefetch into single bundle
-        # Format: {"patient": {...}, "heart_rate": {"entry": [...]}, ...}
-        entries = []
-        for key, value in prefetch.items():
-            if key == "patient":
-                entries.append({"resource": value})
-            elif isinstance(value, dict) and "entry" in value:
-                entries.extend(value["entry"])
-
-        bundle = {"type": "collection", "entry": entries}
+        # Flatten keyed prefetch into single bundle
+        bundle = prefetch_to_bundle(prefetch)
 
         # FHIR → Dataset → Prediction
         dataset = Dataset.from_fhir_bundle(bundle, schema=SCHEMA_PATH)
@@ -124,6 +117,7 @@ def sepsis_alert(request: CDSRequest) -> CDSResponse:
 
     app = HealthChainAPI(title="Sepsis CDS Hooks")
     app.register_service(cds, path="/cds")
+
     return app
 
 
@@ -149,7 +143,7 @@ def run_server():
         url="http://localhost:8000/cds/cds-services/sepsis-risk",
         workflow="patient-view",
     )
-    client.load_from_path(DEMO_PATIENTS_DIR)
+    client.load_from_path(DEMO_PATIENTS_DIR, pattern="*_patient.json")
     responses = client.send_requests()
     client.save_results(save_request=True, save_response=True, directory="./output/")
 
diff --git a/cookbook/sepsis_fhir_batch.py b/cookbook/sepsis_fhir_batch.py
index a9ae5880..3c41dcb7 100644
--- a/cookbook/sepsis_fhir_batch.py
+++ b/cookbook/sepsis_fhir_batch.py
@@ -19,17 +19,22 @@
 from typing import List
 
 import joblib
+import logging
 from dotenv import load_dotenv
 from fhir.resources.patient import Patient
 from fhir.resources.observation import Observation
+from fhir.resources.riskassessment import RiskAssessment
 
 from healthchain.gateway import HealthChainAPI, FHIRGateway
 from healthchain.gateway.clients.fhir.base import FHIRAuthConfig
+from healthchain.fhir import merge_bundles
 from healthchain.io import Dataset
 from healthchain.pipeline import Pipeline
 
 load_dotenv()
 
+logger = logging.getLogger(__name__)
+
 # Configuration
 SCRIPT_DIR = Path(__file__).parent
 MODEL_PATH = SCRIPT_DIR / "models" / "sepsis_model.pkl"
@@ -51,13 +56,19 @@
     config = FHIRAuthConfig.from_env("MEDPLUM")
     MEDPLUM_URL = config.to_connection_string()
 except Exception:
-    pass
+    logger.warning("Failed to load Medplum config")
 
 try:
     config = FHIRAuthConfig.from_env("EPIC")
     EPIC_URL = config.to_connection_string()
 except Exception:
-    pass
+    logger.warning("Failed to load Epic config")
+
+
+def get_risk_summary(ra: RiskAssessment) -> tuple[str, float]:
+    """Extract risk level and probability from a RiskAssessment."""
+    pred = ra.prediction[0]
+    return pred.qualitativeRisk.coding[0].code, pred.probabilityDecimal
 
 
 def create_pipeline() -> Pipeline[Dataset]:
@@ -83,8 +94,8 @@ def run_inference(dataset: Dataset) -> Dataset:
 
 def screen_patient(
     gateway: FHIRGateway, pipeline: Pipeline, patient_id: str, source: str
-):
-    """Screen a single patient for sepsis risk."""
+) -> RiskAssessment | None:
+    """Screen a single patient for sepsis risk. Returns RiskAssessment or None."""
     # Query patient data from FHIR server
     obs_bundle = gateway.search(
         Observation, {"patient": patient_id, "_count": "100"}, source
@@ -92,30 +103,21 @@ def screen_patient(
     patient_bundle = gateway.search(Patient, {"_id": patient_id}, source)
 
     # Merge into single bundle
-    entries = []
-    if patient_bundle.entry:
-        entries.extend([e.model_dump() for e in patient_bundle.entry])
-    if obs_bundle.entry:
-        entries.extend([e.model_dump() for e in obs_bundle.entry])
+    bundle = merge_bundles([patient_bundle, obs_bundle])
 
-    if not entries:
-        return None, "No data found"
+    if not bundle.entry:
+        return None
 
     # FHIR → Dataset → Prediction
-    bundle = {"type": "collection", "entry": entries}
     dataset = Dataset.from_fhir_bundle(bundle, schema=str(SCHEMA_PATH))
 
     if len(dataset.data) == 0:
-        return None, "No matching features"
+        return None
 
     result = pipeline(dataset)
-    probability = float(result.metadata["probabilities"][0])
-    risk = "high" if probability > 0.7 else "moderate" if probability > 0.4 else "low"
 
     # Create and save RiskAssessment
     risk_assessments = result.to_risk_assessment(
-        result.metadata["predictions"],
-        result.metadata["probabilities"],
         outcome_code="A41.9",
         outcome_display="Sepsis",
         model_name="sepsis_xgboost_v1",
@@ -124,33 +126,28 @@ def screen_patient(
     for ra in risk_assessments:
         gateway.create(ra, source=source)
 
-    return risk_assessments[
-        0
-    ] if risk_assessments else None, f"{risk.upper()} ({probability:.0%})"
+    return risk_assessments[0] if risk_assessments else None
 
 
-def batch_screen(gateway: FHIRGateway, patient_ids: List[str], source: str = "medplum"):
+def batch_screen(
+    gateway: FHIRGateway, patient_ids: List[str], source: str = "medplum"
+) -> None:
     """Screen multiple patients for sepsis risk."""
     pipeline = create_pipeline()
-    results = []
 
     for patient_id in patient_ids:
         try:
-            ra, status = screen_patient(gateway, pipeline, patient_id, source)
+            ra = screen_patient(gateway, pipeline, patient_id, source)
             if ra:
-                results.append(
-                    {"patient": patient_id, "status": status, "risk_assessment": ra.id}
+                risk, prob = get_risk_summary(ra)
+                print(
+                    f"  {patient_id}: {risk.upper()} ({prob:.0%}) → RiskAssessment/{ra.id}"
                 )
-                print(f"  {patient_id}: {status} → RiskAssessment/{ra.id}")
             else:
-                results.append({"patient": patient_id, "status": status})
-                print(f"  {patient_id}: {status}")
+                print(f"  {patient_id}: No data")
         except Exception as e:
-            results.append({"patient": patient_id, "error": str(e)})
             print(f"  {patient_id}: Error - {e}")
 
-    return results
-
 
 def create_app():
     """Create FHIR gateway app with configured sources."""
@@ -159,10 +156,10 @@ def create_app():
     # Add configured sources
     if MEDPLUM_URL:
         gateway.add_source("medplum", MEDPLUM_URL)
-        print("✓ Medplum configured")
+        logger.info("✓ Medplum configured")
     if EPIC_URL:
         gateway.add_source("epic", EPIC_URL)
-        print("✓ Epic configured")
+        logger.info("✓ Epic configured")
 
     app = HealthChainAPI(title="Sepsis Batch Screening")
     app.register_gateway(gateway, path="/fhir")
@@ -170,7 +167,6 @@ def create_app():
     return app, gateway
 
 
-# Create app at module level
 app, gateway = create_app()
 
 

From ad1fefaa8870fa8b09aa79d9ae4563ff97bdc49f Mon Sep 17 00:00:00 2001
From: jenniferjiangkells <jenniferjiangkells@gmail.com>
Date: Tue, 2 Dec 2025 11:51:37 +0000
Subject: [PATCH 07/12] Make patient ids easier to copy and paste

---
 scripts/extract_mimic_demo_patients.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/scripts/extract_mimic_demo_patients.py b/scripts/extract_mimic_demo_patients.py
index 13526dfb..8e68e86e 100755
--- a/scripts/extract_mimic_demo_patients.py
+++ b/scripts/extract_mimic_demo_patients.py
@@ -280,6 +280,8 @@ def main():
     args.output.mkdir(parents=True, exist_ok=True)
     print(f"\nExtracting to {args.output}/")
 
+    uploaded_ids = []  # Track server-assigned IDs for copy-paste output
+
     for risk_level in ["high", "moderate", "low"]:
         risk_df = df[df["risk"] == risk_level]
         if len(risk_df) == 0:
@@ -320,9 +322,11 @@ def main():
             status = ""
             if args.upload and gateway:
                 server_id = upload_bundle(gateway, output_data)
-                status = (
-                    f" ✓ uploaded (ID: {server_id})" if server_id else " ✓ uploaded"
-                )
+                if server_id:
+                    uploaded_ids.append((server_id, risk_level))
+                    status = f" ✓ uploaded (ID: {server_id})"
+                else:
+                    status = " ✓ uploaded"
 
             print(
                 f"  {label}: {patient_id} ({patient['probability']:.1%}, {obs_count} obs){status}"
@@ -331,7 +335,13 @@ def main():
     # Print next steps
     print("\n" + "=" * 60)
     if args.upload:
-        print("✓ Uploaded to Medplum! Update patient IDs in sepsis_fhir_batch.py")
+        print("✓ Uploaded to Medplum!")
+        if uploaded_ids:
+            print("\nCopy this into sepsis_fhir_batch.py:\n")
+            print("DEMO_PATIENT_IDS = [")
+            for server_id, risk in uploaded_ids:
+                print(f'    "{server_id}",  # {risk} risk')
+            print("]")
     elif args.bundle:
         print("Re-run with --upload to upload to Medplum")
     else:

From a1ba549c7278dca516ccce5025b183b1309c64ad Mon Sep 17 00:00:00 2001
From: jenniferjiangkells <jenniferjiangkells@gmail.com>
Date: Tue, 2 Dec 2025 11:52:20 +0000
Subject: [PATCH 08/12] Move sepsis training scripts to scripts folder and add
 epic connection check

---
 scripts/check_epic_connection.py       |   96 +++
 scripts/sepsis_prediction_inference.py |  206 +++++
 scripts/sepsis_prediction_training.py  | 1039 ++++++++++++++++++++++++
 3 files changed, 1341 insertions(+)
 create mode 100644 scripts/check_epic_connection.py
 create mode 100644 scripts/sepsis_prediction_inference.py
 create mode 100644 scripts/sepsis_prediction_training.py

diff --git a/scripts/check_epic_connection.py b/scripts/check_epic_connection.py
new file mode 100644
index 00000000..e55ad023
--- /dev/null
+++ b/scripts/check_epic_connection.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python3
+"""
+Quick Epic FHIR connection test.
+
+Run: python scripts/check_epic_connection.py
+"""
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+
+def main():
+    print("=" * 50)
+    print("Epic FHIR Connection Test")
+    print("=" * 50)
+
+    # 1. Load config
+    print("\n1. Loading config from environment...")
+    try:
+        from healthchain.gateway.clients.fhir.base import FHIRAuthConfig
+
+        config = FHIRAuthConfig.from_env("EPIC")
+        print(f"   ✓ client_id: {config.client_id[:8]}...")
+        print(f"   ✓ token_url: {config.token_url}")
+        print(f"   ✓ base_url: {config.base_url}")
+        print(f"   ✓ use_jwt_assertion: {config.use_jwt_assertion}")
+    except Exception as e:
+        print(f"   ✗ Failed to load config: {e}")
+        return False
+
+    # 2. Test JWT creation
+    print("\n2. Creating JWT assertion...")
+    try:
+        oauth_config = config.to_oauth2_config()
+        from healthchain.gateway.clients.auth import OAuth2TokenManager
+
+        manager = OAuth2TokenManager(oauth_config)
+        jwt = manager._create_jwt_assertion()
+        print(f"   ✓ JWT created ({len(jwt)} chars)")
+    except Exception as e:
+        print(f"   ✗ JWT creation failed: {e}")
+        return False
+
+    # 3. Get access token
+    print("\n3. Requesting access token from Epic...")
+    try:
+        token = manager.get_access_token()
+        print(f"   ✓ Token received: {token[:20]}...")
+    except Exception as e:
+        print(f"   ✗ Token request failed: {e}")
+        print("\n   Possible causes:")
+        print("   - App changes still propagating (wait 15-30 min)")
+        print("   - Public key not registered in Epic App Orchard")
+        print("   - App not in 'Ready for Sandbox' state")
+        return False
+
+    # 4. Test FHIR endpoint
+    print("\n4. Testing FHIR endpoint (CapabilityStatement)...")
+    try:
+        from healthchain.gateway.clients.fhir.sync.client import FHIRClient
+
+        client = FHIRClient(config)
+        caps = client.capabilities()
+        print(f"   ✓ FHIR server: {caps.software.name if caps.software else 'Unknown'}")
+        print(f"   ✓ FHIR version: {caps.fhirVersion}")
+    except Exception as e:
+        print(f"   ✗ FHIR request failed: {e}")
+        return False
+
+    # 5. Test patient read (optional)
+    print("\n5. Testing Patient read...")
+    test_patient_id = "e0w0LEDCYtfckT6N.CkJKCw3"  # Epic sandbox patient
+    try:
+        from fhir.resources.patient import Patient
+
+        patient = client.read(Patient, test_patient_id)
+        name = patient.name[0] if patient.name else None
+        print(
+            f"   ✓ Patient: {name.given[0] if name and name.given else '?'} {name.family if name else '?'}"
+        )
+    except Exception as e:
+        print(f"   ⚠ Patient read failed: {e}")
+        print("   (This may be a permissions issue, not a connection issue)")
+
+    print("\n" + "=" * 50)
+    print("✓ Epic connection working!")
+    print("=" * 50)
+    return True
+
+
+if __name__ == "__main__":
+    import sys
+
+    success = main()
+    sys.exit(0 if success else 1)
diff --git a/scripts/sepsis_prediction_inference.py b/scripts/sepsis_prediction_inference.py
new file mode 100644
index 00000000..33edb858
--- /dev/null
+++ b/scripts/sepsis_prediction_inference.py
@@ -0,0 +1,206 @@
+#!/usr/bin/env python3
+"""
+Sepsis Prediction Inference Script
+
+Demonstrates how to load and use the trained sepsis prediction model.
+
+Requirements:
+- pip install scikit-learn xgboost joblib pandas numpy
+
+Usage:
+- python sepsis_prediction_inference.py
+"""
+
+import pandas as pd
+import numpy as np
+from pathlib import Path
+from typing import Dict, Union, Tuple
+import joblib
+
+
+def load_model(model_path: Union[str, Path]) -> Dict:
+    """
+    Load trained sepsis prediction model.
+
+    Args:
+        model_path: Path to saved model file
+
+    Returns:
+        Dictionary containing model, scaler, and metadata
+    """
+    print(f"Loading model from {model_path}...")
+    model_data = joblib.load(model_path)
+
+    metadata = model_data["metadata"]
+    print(f"  Model: {metadata['model_name']}")
+    print(f"  Training date: {metadata['training_date']}")
+    print(f"  Features: {', '.join(metadata['feature_names'])}")
+    print(f"  Test F1-score: {metadata['metrics']['f1']:.4f}")
+    print(f"  Test AUC-ROC: {metadata['metrics']['auc']:.4f}")
+
+    if "optimal_threshold" in metadata["metrics"]:
+        print(f"  Optimal threshold: {metadata['metrics']['optimal_threshold']:.4f}")
+        print(f"  Optimal F1-score: {metadata['metrics']['optimal_f1']:.4f}")
+
+    return model_data
+
+
+def predict_sepsis(
+    model_data: Dict, patient_features: pd.DataFrame, use_optimal_threshold: bool = True
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Predict sepsis risk for patient(s).
+
+    Args:
+        model_data: Dictionary containing model, scaler, and metadata
+        patient_features: DataFrame with patient features
+        use_optimal_threshold: Whether to use optimal threshold (default: True)
+
+    Returns:
+        Tuple of (predictions, probabilities)
+    """
+    model = model_data["model"]
+    scaler = model_data["scaler"]
+    metadata = model_data["metadata"]
+    feature_names = metadata["feature_names"]
+
+    # Ensure features are in correct order
+    patient_features = patient_features[feature_names]
+
+    # Apply scaling if Logistic Regression
+    if scaler is not None:
+        patient_features_scaled = scaler.transform(patient_features)
+        probabilities = model.predict_proba(patient_features_scaled)[:, 1]
+    else:
+        probabilities = model.predict_proba(patient_features)[:, 1]
+
+    # Use optimal threshold if available and requested
+    if use_optimal_threshold and "optimal_threshold" in metadata["metrics"]:
+        threshold = metadata["metrics"]["optimal_threshold"]
+    else:
+        threshold = 0.5
+
+    predictions = (probabilities >= threshold).astype(int)
+
+    return predictions, probabilities
+
+
+def create_example_patients() -> pd.DataFrame:
+    """
+    Create example patient data for demonstration.
+
+    Returns:
+        DataFrame with example patient features
+    """
+    # Example patient data
+    # Patient 1: Healthy patient (low risk)
+    # Patient 2: Moderate risk (some abnormal values)
+    # Patient 3: Low risk (normal values)
+    # Patient 4: High risk for sepsis (multiple severe abnormalities)
+    # Patient 5: Critical sepsis risk (severe multi-organ dysfunction)
+    patients = pd.DataFrame(
+        {
+            "heart_rate": [85, 110, 75, 130, 145],  # beats/min (normal: 60-100)
+            "temperature": [
+                37.2,
+                38.5,
+                36.8,
+                39.2,
+                35.5,
+            ],  # Celsius (normal: 36.5-37.5, hypothermia <36)
+            "respiratory_rate": [16, 24, 14, 30, 35],  # breaths/min (normal: 12-20)
+            "wbc": [8.5, 15.2, 7.0, 18.5, 22.0],  # x10^9/L (normal: 4-11)
+            "lactate": [
+                1.2,
+                3.5,
+                0.9,
+                4.8,
+                6.5,
+            ],  # mmol/L (normal: <2, severe sepsis: >4)
+            "creatinine": [0.9, 1.8, 0.8, 2.5, 3.2],  # mg/dL (normal: 0.6-1.2)
+            "age": [45, 68, 35, 72, 78],  # years
+            "gender_encoded": [1, 0, 1, 1, 0],  # 1=Male, 0=Female
+        }
+    )
+
+    return patients
+
+
+def interpret_results(
+    predictions: np.ndarray, probabilities: np.ndarray, patient_features: pd.DataFrame
+) -> None:
+    """
+    Interpret and display prediction results.
+
+    Args:
+        predictions: Binary predictions (0=no sepsis, 1=sepsis)
+        probabilities: Probability scores
+        patient_features: Original patient features
+    """
+    print("\n" + "=" * 80)
+    print("SEPSIS PREDICTION RESULTS")
+    print("=" * 80)
+
+    for i in range(len(predictions)):
+        print(f"\nPatient {i+1}:")
+        print(f"  Risk Score: {probabilities[i]:.2%}")
+        print(f"  Prediction: {'SEPSIS RISK' if predictions[i] == 1 else 'Low Risk'}")
+
+        # Show key vital signs
+        print("  Key Features:")
+        print(f"    Heart Rate: {patient_features.iloc[i]['heart_rate']:.1f} bpm")
+        print(f"    Temperature: {patient_features.iloc[i]['temperature']:.1f}°C")
+        print(
+            f"    Respiratory Rate: {patient_features.iloc[i]['respiratory_rate']:.1f} /min"
+        )
+        print(f"    WBC: {patient_features.iloc[i]['wbc']:.1f} x10^9/L")
+        print(f"    Lactate: {patient_features.iloc[i]['lactate']:.1f} mmol/L")
+        print(f"    Creatinine: {patient_features.iloc[i]['creatinine']:.2f} mg/dL")
+
+        # Risk interpretation
+        if probabilities[i] >= 0.7:
+            risk_level = "HIGH"
+        elif probabilities[i] >= 0.4:
+            risk_level = "MODERATE"
+        else:
+            risk_level = "LOW"
+
+        print(f"  Clinical Interpretation: {risk_level} RISK")
+
+    print("\n" + "=" * 80)
+
+
+def main():
+    """Main inference pipeline."""
+    # Model path (relative to script location)
+    script_dir = Path(__file__).parent
+    model_path = script_dir / "models" / "sepsis_model.pkl"
+
+    print("=" * 80)
+    print("Sepsis Prediction Inference")
+    print("=" * 80 + "\n")
+
+    # Load model
+    model_data = load_model(model_path)
+
+    # Create example patients
+    print("\nCreating example patient data...")
+    patient_features = create_example_patients()
+    print(f"Number of patients: {len(patient_features)}")
+
+    # Make predictions
+    print("\nMaking predictions...")
+    predictions, probabilities = predict_sepsis(
+        model_data, patient_features, use_optimal_threshold=True
+    )
+
+    # Interpret results
+    interpret_results(predictions, probabilities, patient_features)
+
+    print("\n" + "=" * 80)
+    print("Inference complete!")
+    print("=" * 80)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/sepsis_prediction_training.py b/scripts/sepsis_prediction_training.py
new file mode 100644
index 00000000..a0ea85ce
--- /dev/null
+++ b/scripts/sepsis_prediction_training.py
@@ -0,0 +1,1039 @@
+#!/usr/bin/env python3
+"""
+Sepsis Prediction Training Script
+
+Trains Random Forest, XGBoost, and Logistic Regression models for sepsis prediction
+using MIMIC-IV clinical database data.
+
+Requirements:
+- pip install scikit-learn xgboost joblib pandas numpy
+
+Run:
+- python sepsis_prediction_training.py
+"""
+
+import pandas as pd
+import numpy as np
+from pathlib import Path
+from datetime import datetime
+from typing import Dict, Tuple, List, Any, Union
+
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import (
+    accuracy_score,
+    precision_score,
+    recall_score,
+    f1_score,
+    roc_auc_score,
+    precision_recall_curve,
+)
+import xgboost as xgb
+import joblib
+
+
+# MIMIC-IV ItemID mappings for features
+CHARTEVENTS_ITEMIDS = {
+    "heart_rate": 220050,
+    "temperature_f": 223761,
+    "temperature_c": 223762,
+    "respiratory_rate": 220210,
+}
+
+LABEVENTS_ITEMIDS = {
+    "wbc": [51300, 51301],  # White Blood Cell Count
+    "lactate": 50813,
+    "creatinine": 50912,
+}
+
+# Sepsis ICD-10 codes
+SEPSIS_ICD10_CODES = [
+    "A41.9",  # Sepsis, unspecified organism
+    "A40",  # Streptococcal sepsis (starts with)
+    "A41",  # Other sepsis (starts with)
+    "R65.20",  # Severe sepsis without shock
+    "R65.21",  # Severe sepsis with shock
+    "R65.1",  # SIRS (Systemic Inflammatory Response Syndrome)
+    "A41.0",  # Sepsis due to Streptococcus, group A
+    "A41.1",  # Sepsis due to Streptococcus, group B
+    "A41.2",  # Sepsis due to other specified streptococci
+    "A41.3",  # Sepsis due to Haemophilus influenzae
+    "A41.4",  # Sepsis due to anaerobes
+    "A41.5",  # Sepsis due to other Gram-negative organisms
+    "A41.50",  # Sepsis due to unspecified Gram-negative organism
+    "A41.51",  # Sepsis due to Escherichia coli
+    "A41.52",  # Sepsis due to Pseudomonas
+    "A41.53",  # Sepsis due to Serratia
+    "A41.59",  # Sepsis due to other Gram-negative organisms
+    "A41.8",  # Other specified sepsis
+    "A41.81",  # Sepsis due to Enterococcus
+    "A41.89",  # Other specified sepsis
+]
+
+# Sepsis ICD-9 codes (for older data)
+SEPSIS_ICD9_CODES = [
+    "038",  # Septicemia (starts with)
+    "99591",  # Sepsis
+    "99592",  # Severe sepsis
+    "78552",  # Septic shock
+]
+
+
+def load_mimic_data(data_dir: str) -> Dict[str, pd.DataFrame]:
+    """
+    Load all required MIMIC-IV CSV tables.
+
+    Args:
+        data_dir: Path to MIMIC-IV dataset directory
+
+    Returns:
+        Dictionary mapping table names to DataFrames
+    """
+    data_dir = Path(data_dir)
+
+    print("Loading MIMIC-IV data...")
+
+    tables = {
+        "patients": pd.read_csv(
+            data_dir / "hosp" / "patients.csv.gz", compression="gzip", low_memory=False
+        ),
+        "admissions": pd.read_csv(
+            data_dir / "hosp" / "admissions.csv.gz",
+            compression="gzip",
+            low_memory=False,
+        ),
+        "icustays": pd.read_csv(
+            data_dir / "icu" / "icustays.csv.gz", compression="gzip", low_memory=False
+        ),
+        "chartevents": pd.read_csv(
+            data_dir / "icu" / "chartevents.csv.gz",
+            compression="gzip",
+            low_memory=False,
+        ),
+        "labevents": pd.read_csv(
+            data_dir / "hosp" / "labevents.csv.gz", compression="gzip", low_memory=False
+        ),
+        "diagnoses_icd": pd.read_csv(
+            data_dir / "hosp" / "diagnoses_icd.csv.gz",
+            compression="gzip",
+            low_memory=False,
+        ),
+    }
+
+    print(f"Loaded {len(tables)} tables")
+    for name, df in tables.items():
+        print(f"  {name}: {len(df)} rows")
+
+    return tables
+
+
+def extract_chartevents_features(
+    chartevents: pd.DataFrame, icustays: pd.DataFrame
+) -> pd.DataFrame:
+    """
+    Extract 2-3 vital signs from chartevents table.
+
+    Args:
+        chartevents: Chart events DataFrame
+        icustays: ICU stays DataFrame
+
+    Returns:
+        DataFrame with features per stay_id
+    """
+    print("Extracting chartevents features...")
+
+    # Filter to relevant itemids
+    relevant_itemids = list(CHARTEVENTS_ITEMIDS.values())
+    chartevents_filtered = chartevents[
+        chartevents["itemid"].isin(relevant_itemids)
+    ].copy()
+
+    # Merge with icustays to get stay times
+    chartevents_merged = chartevents_filtered.merge(
+        icustays[["stay_id", "intime", "outtime"]], on="stay_id", how="inner"
+    )
+
+    # Convert charttime to datetime
+    chartevents_merged["charttime"] = pd.to_datetime(chartevents_merged["charttime"])
+    chartevents_merged["intime"] = pd.to_datetime(chartevents_merged["intime"])
+
+    # Filter to first 24 hours of ICU stay
+    chartevents_merged = chartevents_merged[
+        (chartevents_merged["charttime"] >= chartevents_merged["intime"])
+        & (
+            chartevents_merged["charttime"]
+            <= chartevents_merged["intime"] + pd.Timedelta(hours=24)
+        )
+    ]
+
+    # Extract numeric values
+    chartevents_merged["valuenum"] = pd.to_numeric(
+        chartevents_merged["valuenum"], errors="coerce"
+    )
+
+    # Aggregate by stay_id and itemid (take mean)
+    features = []
+
+    for stay_id in icustays["stay_id"].unique():
+        stay_data = chartevents_merged[chartevents_merged["stay_id"] == stay_id]
+
+        feature_row = {"stay_id": stay_id}
+
+        # Heart Rate
+        hr_data = stay_data[stay_data["itemid"] == CHARTEVENTS_ITEMIDS["heart_rate"]][
+            "valuenum"
+        ]
+        feature_row["heart_rate"] = hr_data.mean() if not hr_data.empty else np.nan
+
+        # Temperature (prefer Celsius, convert Fahrenheit if needed)
+        temp_c = stay_data[stay_data["itemid"] == CHARTEVENTS_ITEMIDS["temperature_c"]][
+            "valuenum"
+        ]
+        temp_f = stay_data[stay_data["itemid"] == CHARTEVENTS_ITEMIDS["temperature_f"]][
+            "valuenum"
+        ]
+
+        if not temp_c.empty:
+            feature_row["temperature"] = temp_c.mean()
+        elif not temp_f.empty:
+            # Convert Fahrenheit to Celsius
+            feature_row["temperature"] = (temp_f.mean() - 32) * 5 / 9
+        else:
+            feature_row["temperature"] = np.nan
+
+        # Respiratory Rate
+        rr_data = stay_data[
+            stay_data["itemid"] == CHARTEVENTS_ITEMIDS["respiratory_rate"]
+        ]["valuenum"]
+        feature_row["respiratory_rate"] = (
+            rr_data.mean() if not rr_data.empty else np.nan
+        )
+
+        features.append(feature_row)
+
+    return pd.DataFrame(features)
+
+
+def extract_labevents_features(
+    labevents: pd.DataFrame, icustays: pd.DataFrame
+) -> pd.DataFrame:
+    """
+    Extract 2-3 lab values from labevents table.
+
+    Args:
+        labevents: Lab events DataFrame
+        icustays: ICU stays DataFrame
+
+    Returns:
+        DataFrame with features per stay_id
+    """
+    print("Extracting labevents features...")
+
+    # Get relevant itemids
+    relevant_itemids = [
+        LABEVENTS_ITEMIDS["lactate"],
+        LABEVENTS_ITEMIDS["creatinine"],
+    ] + LABEVENTS_ITEMIDS["wbc"]
+
+    labevents_filtered = labevents[labevents["itemid"].isin(relevant_itemids)].copy()
+
+    # Merge with icustays via admissions
+    # First need to get hadm_id from icustays
+    icustays_with_hadm = icustays[["stay_id", "hadm_id", "intime"]].copy()
+
+    # Labevents links via hadm_id, then we need to link to stay_id
+    labevents_merged = labevents_filtered.merge(
+        icustays_with_hadm, on="hadm_id", how="inner"
+    )
+
+    # Convert charttime to datetime
+    labevents_merged["charttime"] = pd.to_datetime(labevents_merged["charttime"])
+    labevents_merged["intime"] = pd.to_datetime(labevents_merged["intime"])
+
+    # Filter to first 24 hours of ICU stay
+    labevents_merged = labevents_merged[
+        (labevents_merged["charttime"] >= labevents_merged["intime"])
+        & (
+            labevents_merged["charttime"]
+            <= labevents_merged["intime"] + pd.Timedelta(hours=24)
+        )
+    ]
+
+    # Extract numeric values
+    labevents_merged["valuenum"] = pd.to_numeric(
+        labevents_merged["valuenum"], errors="coerce"
+    )
+
+    # Aggregate by stay_id and itemid
+    features = []
+
+    for stay_id in icustays["stay_id"].unique():
+        stay_data = labevents_merged[labevents_merged["stay_id"] == stay_id]
+
+        feature_row = {"stay_id": stay_id}
+
+        # WBC (check both itemids)
+        wbc_data = stay_data[stay_data["itemid"].isin(LABEVENTS_ITEMIDS["wbc"])][
+            "valuenum"
+        ]
+        feature_row["wbc"] = wbc_data.mean() if not wbc_data.empty else np.nan
+
+        # Lactate
+        lactate_data = stay_data[stay_data["itemid"] == LABEVENTS_ITEMIDS["lactate"]][
+            "valuenum"
+        ]
+        feature_row["lactate"] = (
+            lactate_data.mean() if not lactate_data.empty else np.nan
+        )
+
+        # Creatinine
+        creatinine_data = stay_data[
+            stay_data["itemid"] == LABEVENTS_ITEMIDS["creatinine"]
+        ]["valuenum"]
+        feature_row["creatinine"] = (
+            creatinine_data.mean() if not creatinine_data.empty else np.nan
+        )
+
+        features.append(feature_row)
+
+    return pd.DataFrame(features)
+
+
+def extract_demographics(
+    patients: pd.DataFrame, admissions: pd.DataFrame, icustays: pd.DataFrame
+) -> pd.DataFrame:
+    """
+    Extract age and gender from patients table.
+
+    Args:
+        patients: Patients DataFrame
+        admissions: Admissions DataFrame (not used, kept for compatibility)
+        icustays: ICU stays DataFrame
+
+    Returns:
+        DataFrame with demographics per stay_id
+    """
+    print("Extracting demographics...")
+
+    # icustays already has subject_id, so merge directly with patients
+    icustays_with_patient = icustays[["stay_id", "subject_id"]].merge(
+        patients[["subject_id", "gender", "anchor_age"]], on="subject_id", how="left"
+    )
+
+    # Use anchor_age if available, otherwise calculate from anchor_year and anchor_age
+    # For demo data, anchor_age should be available
+    demographics = icustays_with_patient[["stay_id", "anchor_age", "gender"]].copy()
+    demographics.rename(columns={"anchor_age": "age"}, inplace=True)
+
+    # Encode gender (M=1, F=0)
+    demographics["gender_encoded"] = (demographics["gender"] == "M").astype(int)
+
+    return demographics[["stay_id", "age", "gender_encoded"]]
+
+
+def extract_sepsis_labels(
+    diagnoses_icd: pd.DataFrame, icustays: pd.DataFrame
+) -> pd.DataFrame:
+    """
+    Extract sepsis labels from diagnoses_icd table.
+    Checks both ICD-9 and ICD-10 codes to maximize positive samples.
+
+    Args:
+        diagnoses_icd: Diagnoses ICD DataFrame
+        icustays: ICU stays DataFrame
+
+    Returns:
+        DataFrame with sepsis labels per stay_id
+    """
+    print("Extracting sepsis labels...")
+
+    # Check what ICD versions are available
+    icd_versions = diagnoses_icd["icd_version"].unique()
+    print(f"  Available ICD versions: {sorted(icd_versions)}")
+
+    all_sepsis_diagnoses = []
+
+    # Check ICD-10 codes
+    if 10 in icd_versions:
+        diagnoses_icd10 = diagnoses_icd[diagnoses_icd["icd_version"] == 10].copy()
+        print(f"  ICD-10 diagnoses: {len(diagnoses_icd10)} rows")
+
+        sepsis_mask = pd.Series(
+            [False] * len(diagnoses_icd10), index=diagnoses_icd10.index
+        )
+
+        for code in SEPSIS_ICD10_CODES:
+            if "." not in code or code.endswith("."):
+                # Pattern match (e.g., "A40" matches "A40.x")
+                code_prefix = code.rstrip(".")
+                mask = diagnoses_icd10["icd_code"].str.startswith(code_prefix, na=False)
+                sepsis_mask |= mask
+                if mask.sum() > 0:
+                    print(
+                        f"    Found {mask.sum()} ICD-10 diagnoses matching pattern '{code}'"
+                    )
+            else:
+                # Exact match
+                mask = diagnoses_icd10["icd_code"] == code
+                sepsis_mask |= mask
+                if mask.sum() > 0:
+                    print(
+                        f"    Found {mask.sum()} ICD-10 diagnoses with exact code '{code}'"
+                    )
+
+        sepsis_icd10 = diagnoses_icd10[sepsis_mask].copy()
+        if len(sepsis_icd10) > 0:
+            all_sepsis_diagnoses.append(sepsis_icd10)
+            print(f"  Total ICD-10 sepsis diagnoses: {len(sepsis_icd10)}")
+
+    # Check ICD-9 codes
+    if 9 in icd_versions:
+        diagnoses_icd9 = diagnoses_icd[diagnoses_icd["icd_version"] == 9].copy()
+        print(f"  ICD-9 diagnoses: {len(diagnoses_icd9)} rows")
+
+        sepsis_mask = pd.Series(
+            [False] * len(diagnoses_icd9), index=diagnoses_icd9.index
+        )
+
+        for code in SEPSIS_ICD9_CODES:
+            if len(code) <= 3 or code.endswith("."):
+                # Pattern match (e.g., "038" matches "038.x")
+                code_prefix = code.rstrip(".")
+                mask = diagnoses_icd9["icd_code"].str.startswith(code_prefix, na=False)
+                sepsis_mask |= mask
+                if mask.sum() > 0:
+                    print(
+                        f"    Found {mask.sum()} ICD-9 diagnoses matching pattern '{code}'"
+                    )
+            else:
+                # Exact match
+                mask = diagnoses_icd9["icd_code"] == code
+                sepsis_mask |= mask
+                if mask.sum() > 0:
+                    print(
+                        f"    Found {mask.sum()} ICD-9 diagnoses with exact code '{code}'"
+                    )
+
+        sepsis_icd9 = diagnoses_icd9[sepsis_mask].copy()
+        if len(sepsis_icd9) > 0:
+            all_sepsis_diagnoses.append(sepsis_icd9)
+            print(f"  Total ICD-9 sepsis diagnoses: {len(sepsis_icd9)}")
+
+    # Combine all sepsis diagnoses
+    if all_sepsis_diagnoses:
+        sepsis_diagnoses = pd.concat(all_sepsis_diagnoses, ignore_index=True)
+        print(f"  Total sepsis diagnoses (ICD-9 + ICD-10): {len(sepsis_diagnoses)}")
+
+        if len(sepsis_diagnoses) > 0:
+            print(
+                f"  Sample sepsis ICD codes: {sepsis_diagnoses['icd_code'].unique()[:15].tolist()}"
+            )
+            print(
+                f"  Unique hadm_id with sepsis: {sepsis_diagnoses['hadm_id'].nunique()}"
+            )
+    else:
+        sepsis_diagnoses = pd.DataFrame(columns=diagnoses_icd.columns)
+        print("  No sepsis diagnoses found")
+
+    # Merge with icustays to get stay_id
+    icustays_with_hadm = icustays[["stay_id", "hadm_id"]].copy()
+
+    if len(sepsis_diagnoses) > 0:
+        sepsis_labels = icustays_with_hadm.merge(
+            sepsis_diagnoses[["hadm_id"]].drop_duplicates(),
+            on="hadm_id",
+            how="left",
+            indicator=True,
+        )
+    else:
+        sepsis_labels = icustays_with_hadm.copy()
+        sepsis_labels["_merge"] = "left_only"
+
+    # Create binary label (1 if sepsis, 0 otherwise)
+    sepsis_labels["sepsis"] = (sepsis_labels["_merge"] == "both").astype(int)
+
+    sepsis_count = sepsis_labels["sepsis"].sum()
+    print(
+        f"  ICU stays with sepsis: {sepsis_count}/{len(sepsis_labels)} ({sepsis_count/len(sepsis_labels)*100:.2f}%)"
+    )
+
+    return sepsis_labels[["stay_id", "sepsis"]]
+
+
+def print_feature_summary(X: pd.DataFrame):
+    """Print feature statistics with FHIR mapping information.
+
+    Args:
+        X: Feature matrix with actual data
+    """
+    print("\n" + "=" * 120)
+    print("FEATURE SUMMARY: MIMIC-IV → Model → FHIR Mapping")
+    print("=" * 120)
+
+    # Define FHIR mappings for each feature
+    fhir_mappings = {
+        "heart_rate": {
+            "mimic_table": "chartevents",
+            "mimic_itemid": "220050",
+            "fhir_resource": "Observation",
+            "fhir_code": "8867-4",
+            "fhir_system": "LOINC",
+            "fhir_display": "Heart rate",
+        },
+        "temperature": {
+            "mimic_table": "chartevents",
+            "mimic_itemid": "223762/223761",
+            "fhir_resource": "Observation",
+            "fhir_code": "8310-5",
+            "fhir_system": "LOINC",
+            "fhir_display": "Body temperature",
+        },
+        "respiratory_rate": {
+            "mimic_table": "chartevents",
+            "mimic_itemid": "220210",
+            "fhir_resource": "Observation",
+            "fhir_code": "9279-1",
+            "fhir_system": "LOINC",
+            "fhir_display": "Respiratory rate",
+        },
+        "wbc": {
+            "mimic_table": "labevents",
+            "mimic_itemid": "51300/51301",
+            "fhir_resource": "Observation",
+            "fhir_code": "6690-2",
+            "fhir_system": "LOINC",
+            "fhir_display": "Leukocytes [#/volume] in Blood",
+        },
+        "lactate": {
+            "mimic_table": "labevents",
+            "mimic_itemid": "50813",
+            "fhir_resource": "Observation",
+            "fhir_code": "2524-7",
+            "fhir_system": "LOINC",
+            "fhir_display": "Lactate [Moles/volume] in Blood",
+        },
+        "creatinine": {
+            "mimic_table": "labevents",
+            "mimic_itemid": "50912",
+            "fhir_resource": "Observation",
+            "fhir_code": "2160-0",
+            "fhir_system": "LOINC",
+            "fhir_display": "Creatinine [Mass/volume] in Serum or Plasma",
+        },
+        "age": {
+            "mimic_table": "patients",
+            "mimic_itemid": "anchor_age",
+            "fhir_resource": "Patient",
+            "fhir_code": "birthDate",
+            "fhir_system": "FHIR Core",
+            "fhir_display": "Patient birth date (calculate age)",
+        },
+        "gender_encoded": {
+            "mimic_table": "patients",
+            "mimic_itemid": "gender",
+            "fhir_resource": "Patient",
+            "fhir_code": "gender",
+            "fhir_system": "FHIR Core",
+            "fhir_display": "Administrative Gender (M/F)",
+        },
+    }
+
+    print(
+        f"\n{'Feature':<20} {'Mean±SD':<20} {'MIMIC Source':<20} {'FHIR Resource':<20} {'FHIR Code (System)':<30}"
+    )
+    print("-" * 120)
+
+    for feature in X.columns:
+        mapping = fhir_mappings.get(feature, {})
+
+        # Calculate statistics
+        mean_val = X[feature].mean()
+        std_val = X[feature].std()
+
+        # Format based on feature type
+        if feature == "gender_encoded":
+            stats = f"{mean_val:.2f} (M={X[feature].sum():.0f})"
+        else:
+            stats = f"{mean_val:.2f}±{std_val:.2f}"
+
+        mimic_source = f"{mapping.get('mimic_table', 'N/A')} ({mapping.get('mimic_itemid', 'N/A')})"
+        fhir_resource = mapping.get("fhir_resource", "N/A")
+        fhir_code = (
+            f"{mapping.get('fhir_code', 'N/A')} ({mapping.get('fhir_system', 'N/A')})"
+        )
+
+        print(
+            f"{feature:<20} {stats:<20} {mimic_source:<20} {fhir_resource:<20} {fhir_code:<30}"
+        )
+
+    print("\n" + "=" * 120)
+    print(
+        "Note: Statistics calculated from first 24 hours of ICU stay. Missing values imputed with median."
+    )
+    print("=" * 120 + "\n")
+
+
+def create_feature_matrix(
+    chartevents_features: pd.DataFrame,
+    labevents_features: pd.DataFrame,
+    demographics: pd.DataFrame,
+    sepsis_labels: pd.DataFrame,
+) -> Tuple[pd.DataFrame, pd.Series]:
+    """
+    Create feature matrix and labels from extracted features.
+
+    Args:
+        chartevents_features: Chart events features
+        labevents_features: Lab events features
+        demographics: Demographics features
+        sepsis_labels: Sepsis labels
+
+    Returns:
+        Tuple of (feature matrix, labels)
+    """
+    print("Creating feature matrix...")
+
+    # Merge all features on stay_id
+    features = (
+        chartevents_features.merge(labevents_features, on="stay_id", how="outer")
+        .merge(demographics, on="stay_id", how="outer")
+        .merge(sepsis_labels, on="stay_id", how="inner")
+    )
+
+    # Select feature columns (exclude stay_id and sepsis)
+    feature_cols = [
+        "heart_rate",
+        "temperature",
+        "respiratory_rate",
+        "wbc",
+        "lactate",
+        "creatinine",
+        "age",
+        "gender_encoded",
+    ]
+
+    X = features[feature_cols].copy()
+    y = features["sepsis"].copy()
+
+    print(f"Feature matrix shape: {X.shape}")
+    print(f"Sepsis cases: {y.sum()} ({y.sum() / len(y) * 100:.2f}%)")
+
+    return X, y
+
+
+def train_models(X_train: pd.DataFrame, y_train: pd.Series) -> Dict[str, Any]:
+    """
+    Train all three models (Random Forest, XGBoost, Logistic Regression).
+
+    Args:
+        X_train: Training features
+        y_train: Training labels
+
+    Returns:
+        Dictionary of trained models
+    """
+    print("\nTraining models...")
+
+    models = {}
+
+    # Check if we have any positive samples
+    positive_samples = y_train.sum()
+    total_samples = len(y_train)
+    positive_rate = positive_samples / total_samples if total_samples > 0 else 0.0
+
+    print(
+        f"  Positive samples: {positive_samples}/{total_samples} ({positive_rate*100:.2f}%)"
+    )
+
+    # Random Forest - use class_weight to handle imbalance
+    print("  Training Random Forest...")
+    rf = RandomForestClassifier(
+        n_estimators=100,
+        random_state=42,
+        n_jobs=-1,
+        class_weight="balanced",  # Automatically adjust for class imbalance
+    )
+    rf.fit(X_train, y_train)
+    models["RandomForest"] = rf
+
+    # XGBoost - handle case with no positive samples
+    print("  Training XGBoost...")
+    if positive_samples == 0:
+        # When there are no positive samples, set base_score to a small value
+        # and use scale_pos_weight to avoid errors
+        xgb_model = xgb.XGBClassifier(
+            random_state=42,
+            n_jobs=-1,
+            eval_metric="logloss",
+            base_score=0.01,  # Small positive value instead of 0
+            scale_pos_weight=1.0,
+        )
+    else:
+        # Calculate scale_pos_weight for imbalanced data
+        scale_pos_weight = (total_samples - positive_samples) / positive_samples
+        xgb_model = xgb.XGBClassifier(
+            random_state=42,
+            n_jobs=-1,
+            eval_metric="logloss",
+            scale_pos_weight=scale_pos_weight,
+        )
+    xgb_model.fit(X_train, y_train)
+    models["XGBoost"] = xgb_model
+
+    # Logistic Regression (with scaling) - use class_weight to handle imbalance
+    print("  Training Logistic Regression...")
+    scaler = StandardScaler()
+    X_train_scaled = scaler.fit_transform(X_train)
+    lr = LogisticRegression(
+        random_state=42,
+        max_iter=1000,
+        class_weight="balanced",  # Automatically adjust for class imbalance
+    )
+    lr.fit(X_train_scaled, y_train)
+    models["LogisticRegression"] = lr
+    models["scaler"] = scaler  # Store scaler for later use
+
+    return models
+
+
+def evaluate_models(
+    models: Dict[str, Any],
+    X_test: pd.DataFrame,
+    y_test: pd.Series,
+    feature_names: List[str],
+) -> Dict[str, Dict[str, float]]:
+    """
+    Evaluate and compare all models.
+
+    Args:
+        models: Dictionary of trained models
+        X_test: Test features
+        y_test: Test labels
+        feature_names: List of feature names
+
+    Returns:
+        Dictionary of evaluation metrics for each model
+    """
+    print("\nEvaluating models...")
+    print(
+        f"Test set: {len(y_test)} samples, {y_test.sum()} positive ({y_test.sum()/len(y_test)*100:.2f}%)"
+    )
+
+    results = {}
+
+    for name, model in models.items():
+        if name == "scaler":
+            continue
+
+        # Get probability predictions
+        if name == "LogisticRegression":
+            X_test_scaled = models["scaler"].transform(X_test)
+            y_pred_proba = model.predict_proba(X_test_scaled)[:, 1]
+        else:
+            y_pred_proba = model.predict_proba(X_test)[:, 1]
+
+        # Use default threshold (0.5) for predictions
+        y_pred = (y_pred_proba >= 0.5).astype(int)
+
+        # Calculate metrics with default threshold
+        metrics = {
+            "accuracy": accuracy_score(y_test, y_pred),
+            "precision": precision_score(y_test, y_pred, zero_division=0),
+            "recall": recall_score(y_test, y_pred, zero_division=0),
+            "f1": f1_score(y_test, y_pred, zero_division=0),
+            "auc": roc_auc_score(y_test, y_pred_proba)
+            if len(np.unique(y_test)) > 1
+            else 0.0,
+        }
+
+        # Try to find optimal threshold for F1 score
+        if len(np.unique(y_test)) > 1 and y_test.sum() > 0:
+            precision, recall, thresholds = precision_recall_curve(y_test, y_pred_proba)
+            f1_scores = 2 * (precision * recall) / (precision + recall + 1e-10)
+            optimal_idx = np.argmax(f1_scores)
+            optimal_threshold = (
+                thresholds[optimal_idx] if optimal_idx < len(thresholds) else 0.5
+            )
+            optimal_f1 = f1_scores[optimal_idx]
+
+            # Predictions with optimal threshold
+            y_pred_optimal = (y_pred_proba >= optimal_threshold).astype(int)
+            metrics["optimal_threshold"] = optimal_threshold
+            metrics["optimal_f1"] = optimal_f1
+            metrics["optimal_precision"] = precision_score(
+                y_test, y_pred_optimal, zero_division=0
+            )
+            metrics["optimal_recall"] = recall_score(
+                y_test, y_pred_optimal, zero_division=0
+            )
+        else:
+            metrics["optimal_threshold"] = 0.5
+            metrics["optimal_f1"] = 0.0
+            metrics["optimal_precision"] = 0.0
+            metrics["optimal_recall"] = 0.0
+
+        results[name] = metrics
+
+        print(f"\n{name}:")
+        print(
+            f"  Predictions: {y_pred.sum()} positive predicted (actual: {y_test.sum()})"
+        )
+        print(f"  Accuracy:  {metrics['accuracy']:.4f}")
+        print(f"  Precision: {metrics['precision']:.4f}")
+        print(f"  Recall:    {metrics['recall']:.4f}")
+        print(f"  F1-score:  {metrics['f1']:.4f}")
+        print(f"  AUC-ROC:   {metrics['auc']:.4f}")
+        if metrics["optimal_f1"] > 0:
+            print(f"  Optimal threshold: {metrics['optimal_threshold']:.4f}")
+            print(f"  Optimal F1-score:  {metrics['optimal_f1']:.4f}")
+            print(f"  Optimal Precision: {metrics['optimal_precision']:.4f}")
+            print(f"  Optimal Recall:    {metrics['optimal_recall']:.4f}")
+
+        # Show feature importance for tree-based models
+        if hasattr(model, "feature_importances_"):
+            print("\n  Top 5 Feature Importances:")
+            importances = model.feature_importances_
+            indices = np.argsort(importances)[::-1][:5]
+            for idx in indices:
+                print(f"    {feature_names[idx]}: {importances[idx]:.4f}")
+
+    return results
+
+
+def select_best_model(
+    models: Dict[str, Any],
+    results: Dict[str, Dict[str, float]],
+    metric: str = "f1",
+) -> Tuple[str, Any, Dict[str, float]]:
+    """
+    Select best model based on specified metric.
+
+    Args:
+        models: Dictionary of trained models
+        results: Evaluation results
+        metric: Metric to optimize ("f1", "recall", "precision", "auc")
+
+    Returns:
+        Tuple of (best model name, best model, best metrics)
+    """
+    print(f"\nSelecting best model based on {metric}...")
+
+    # Get the appropriate metric value (prefer optimal if available)
+    def get_metric_value(metrics, metric_name):
+        if metric_name == "f1":
+            return metrics.get("optimal_f1", metrics["f1"])
+        elif metric_name == "recall":
+            return metrics.get("optimal_recall", metrics["recall"])
+        elif metric_name == "precision":
+            return metrics.get("optimal_precision", metrics["precision"])
+        elif metric_name == "auc":
+            return metrics.get("auc", 0.0)
+        else:
+            return metrics.get("optimal_f1", metrics["f1"])
+
+    best_name = max(results.keys(), key=lambda k: get_metric_value(results[k], metric))
+    best_model = models[best_name]
+    best_metrics = results[best_name]
+
+    best_value = get_metric_value(best_metrics, metric)
+    print(f"Best model: {best_name} ({metric}: {best_value:.4f})")
+
+    return best_name, best_model, best_metrics
+
+
+def save_model(
+    model: Any,
+    model_name: str,
+    feature_names: List[str],
+    metrics: Dict[str, float],
+    scaler: Any,
+    output_path: Union[str, Path],
+) -> None:
+    """
+    Save the best model with metadata.
+
+    Args:
+        model: Trained model
+        model_name: Name of the model
+        feature_names: List of feature names
+        metrics: Evaluation metrics
+        scaler: StandardScaler (if Logistic Regression, None otherwise)
+        output_path: Path to save model
+    """
+    print(f"\nSaving model to {output_path}...")
+
+    # Create output directory if it doesn't exist
+    output_path = Path(output_path)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    # Prepare metadata
+    metadata = {
+        "model_name": model_name,
+        "training_date": datetime.now().isoformat(),
+        "feature_names": feature_names,
+        "metrics": metrics,
+        "itemid_mappings": {
+            "chartevents": CHARTEVENTS_ITEMIDS,
+            "labevents": LABEVENTS_ITEMIDS,
+        },
+        "sepsis_icd_codes": {
+            "icd10": SEPSIS_ICD10_CODES,
+            "icd9": SEPSIS_ICD9_CODES,
+        },
+    }
+
+    # Save model and metadata
+    model_data = {
+        "model": model,
+        "scaler": scaler,
+        "metadata": metadata,
+    }
+
+    joblib.dump(model_data, output_path)
+
+    print("Model saved successfully!")
+
+
+def main():
+    """Main training pipeline."""
+    # Data directory
+    data_dir = "../datasets/mimic-iv-clinical-database-demo-2.2"
+
+    # Output path (relative to script location)
+    script_dir = Path(__file__).parent
+    output_path = script_dir / "models" / "sepsis_model.pkl"
+
+    print("=" * 60)
+    print("Sepsis Prediction Model Training")
+    print("=" * 60)
+
+    # Load data
+    tables = load_mimic_data(data_dir)
+
+    # Extract features
+    chartevents_features = extract_chartevents_features(
+        tables["chartevents"], tables["icustays"]
+    )
+    labevents_features = extract_labevents_features(
+        tables["labevents"], tables["icustays"]
+    )
+    demographics = extract_demographics(
+        tables["patients"], tables["admissions"], tables["icustays"]
+    )
+
+    # Extract labels
+    sepsis_labels = extract_sepsis_labels(tables["diagnoses_icd"], tables["icustays"])
+
+    # Create feature matrix
+    X, y = create_feature_matrix(
+        chartevents_features,
+        labevents_features,
+        demographics,
+        sepsis_labels,
+    )
+
+    # Handle missing values (impute with median)
+    print("\nHandling missing values...")
+    missing_before = X.isnull().sum().sum()
+    print(f"  Missing values before imputation: {missing_before}")
+    X = X.fillna(X.median())
+
+    # Print feature summary with actual data statistics
+    print_feature_summary(X)
+
+    # Split data with careful stratification to ensure positive samples in both sets
+    print("\nSplitting data...")
+    if len(np.unique(y)) > 1 and y.sum() > 0:
+        # Use stratification to ensure positive samples in both train and test
+        X_train, X_test, y_train, y_test = train_test_split(
+            X, y, test_size=0.2, random_state=42, stratify=y
+        )
+        print(
+            f"  Training set: {len(X_train)} samples ({y_train.sum()} positive, {y_train.sum()/len(y_train)*100:.2f}%)"
+        )
+        print(
+            f"  Test set: {len(X_test)} samples ({y_test.sum()} positive, {y_test.sum()/len(y_test)*100:.2f}%)"
+        )
+
+        # Warn if test set has no positive samples (shouldn't happen with stratify, but check anyway)
+        if y_test.sum() == 0:
+            print(
+                "  WARNING: Test set has no positive samples! Consider using a different random seed."
+            )
+    else:
+        print(
+            "  Warning: No positive samples or only one class. Skipping stratification."
+        )
+        X_train, X_test, y_train, y_test = train_test_split(
+            X, y, test_size=0.2, random_state=42
+        )
+        print(f"  Training set: {len(X_train)} samples")
+        print(f"  Test set: {len(X_test)} samples")
+
+    # Apply oversampling to training data to balance classes
+    print("\nApplying oversampling to training data...")
+    try:
+        from imblearn.over_sampling import SMOTE
+
+        # Only apply SMOTE if we have positive samples
+        if y_train.sum() > 0 and len(np.unique(y_train)) > 1:
+            print(
+                f"  Before oversampling: {len(X_train)} samples ({y_train.sum()} positive, {y_train.sum()/len(y_train)*100:.2f}%)"
+            )
+            # Ensure k_neighbors doesn't exceed available positive samples
+            k_neighbors = min(5, max(1, y_train.sum() - 1))
+            smote = SMOTE(random_state=42, k_neighbors=k_neighbors)
+            X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)
+            print(
+                f"  After oversampling: {len(X_train_resampled)} samples ({y_train_resampled.sum()} positive, {y_train_resampled.sum()/len(X_train_resampled)*100:.2f}%)"
+            )
+            X_train = pd.DataFrame(
+                X_train_resampled,
+                columns=X_train.columns,
+                index=X_train.index[: len(X_train_resampled)],
+            )
+            y_train = pd.Series(
+                y_train_resampled, index=y_train.index[: len(y_train_resampled)]
+            )
+        else:
+            print("  Skipping oversampling: insufficient positive samples")
+    except (ImportError, ModuleNotFoundError) as e:
+        print(
+            "  imbalanced-learn not installed. Install with: pip install imbalanced-learn"
+        )
+        print(f"  Error: {e}")
+        print("  Proceeding without oversampling...")
+
+    # Train models
+    models = train_models(X_train, y_train)
+
+    # Evaluate models
+    feature_names = X.columns.tolist()
+    results = evaluate_models(models, X_test, y_test, feature_names)
+
+    # Select best model (can change metric: "f1", "recall", "precision", "auc")
+    # For sepsis prediction, recall (sensitivity) is often most important
+    best_name, best_model, best_metrics = select_best_model(
+        models, results, metric="f1"
+    )
+
+    # Save best model
+    scaler = models.get("scaler")
+    save_model(
+        best_model,
+        best_name,
+        feature_names,
+        best_metrics,
+        scaler,
+        output_path,
+    )
+
+    print("\n" + "=" * 60)
+    print("Training complete!")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    main()

From 8d68b7eddb214091356e3519198eb8a311c49c98 Mon Sep 17 00:00:00 2001
From: jenniferjiangkells <jenniferjiangkells@gmail.com>
Date: Tue, 2 Dec 2025 12:20:44 +0000
Subject: [PATCH 09/12] Remove output directory from git tracking

---
 output/README.md | 3 ---
 1 file changed, 3 deletions(-)
 delete mode 100644 output/README.md

diff --git a/output/README.md b/output/README.md
deleted file mode 100644
index 2b90777e..00000000
--- a/output/README.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Output Directory
-
-This is where the requests and responses generated by sandbox runs will be saved.

From b63660709221b3a99cfbe6e029d7270ee7f8dcc8 Mon Sep 17 00:00:00 2001
From: jenniferjiangkells <jenniferjiangkells@gmail.com>
Date: Tue, 2 Dec 2025 12:24:09 +0000
Subject: [PATCH 10/12] Add notebooks

---
 cookbook/sepsis_prediction_inference.py |  206 -----
 cookbook/sepsis_prediction_training.py  | 1039 -----------------------
 notebooks/fhir_ml_workflow.ipynb        |  567 +++++++++++++
 3 files changed, 567 insertions(+), 1245 deletions(-)
 delete mode 100644 cookbook/sepsis_prediction_inference.py
 delete mode 100644 cookbook/sepsis_prediction_training.py
 create mode 100644 notebooks/fhir_ml_workflow.ipynb

diff --git a/cookbook/sepsis_prediction_inference.py b/cookbook/sepsis_prediction_inference.py
deleted file mode 100644
index 33edb858..00000000
--- a/cookbook/sepsis_prediction_inference.py
+++ /dev/null
@@ -1,206 +0,0 @@
-#!/usr/bin/env python3
-"""
-Sepsis Prediction Inference Script
-
-Demonstrates how to load and use the trained sepsis prediction model.
-
-Requirements:
-- pip install scikit-learn xgboost joblib pandas numpy
-
-Usage:
-- python sepsis_prediction_inference.py
-"""
-
-import pandas as pd
-import numpy as np
-from pathlib import Path
-from typing import Dict, Union, Tuple
-import joblib
-
-
-def load_model(model_path: Union[str, Path]) -> Dict:
-    """
-    Load trained sepsis prediction model.
-
-    Args:
-        model_path: Path to saved model file
-
-    Returns:
-        Dictionary containing model, scaler, and metadata
-    """
-    print(f"Loading model from {model_path}...")
-    model_data = joblib.load(model_path)
-
-    metadata = model_data["metadata"]
-    print(f"  Model: {metadata['model_name']}")
-    print(f"  Training date: {metadata['training_date']}")
-    print(f"  Features: {', '.join(metadata['feature_names'])}")
-    print(f"  Test F1-score: {metadata['metrics']['f1']:.4f}")
-    print(f"  Test AUC-ROC: {metadata['metrics']['auc']:.4f}")
-
-    if "optimal_threshold" in metadata["metrics"]:
-        print(f"  Optimal threshold: {metadata['metrics']['optimal_threshold']:.4f}")
-        print(f"  Optimal F1-score: {metadata['metrics']['optimal_f1']:.4f}")
-
-    return model_data
-
-
-def predict_sepsis(
-    model_data: Dict, patient_features: pd.DataFrame, use_optimal_threshold: bool = True
-) -> Tuple[np.ndarray, np.ndarray]:
-    """
-    Predict sepsis risk for patient(s).
-
-    Args:
-        model_data: Dictionary containing model, scaler, and metadata
-        patient_features: DataFrame with patient features
-        use_optimal_threshold: Whether to use optimal threshold (default: True)
-
-    Returns:
-        Tuple of (predictions, probabilities)
-    """
-    model = model_data["model"]
-    scaler = model_data["scaler"]
-    metadata = model_data["metadata"]
-    feature_names = metadata["feature_names"]
-
-    # Ensure features are in correct order
-    patient_features = patient_features[feature_names]
-
-    # Apply scaling if Logistic Regression
-    if scaler is not None:
-        patient_features_scaled = scaler.transform(patient_features)
-        probabilities = model.predict_proba(patient_features_scaled)[:, 1]
-    else:
-        probabilities = model.predict_proba(patient_features)[:, 1]
-
-    # Use optimal threshold if available and requested
-    if use_optimal_threshold and "optimal_threshold" in metadata["metrics"]:
-        threshold = metadata["metrics"]["optimal_threshold"]
-    else:
-        threshold = 0.5
-
-    predictions = (probabilities >= threshold).astype(int)
-
-    return predictions, probabilities
-
-
-def create_example_patients() -> pd.DataFrame:
-    """
-    Create example patient data for demonstration.
-
-    Returns:
-        DataFrame with example patient features
-    """
-    # Example patient data
-    # Patient 1: Healthy patient (low risk)
-    # Patient 2: Moderate risk (some abnormal values)
-    # Patient 3: Low risk (normal values)
-    # Patient 4: High risk for sepsis (multiple severe abnormalities)
-    # Patient 5: Critical sepsis risk (severe multi-organ dysfunction)
-    patients = pd.DataFrame(
-        {
-            "heart_rate": [85, 110, 75, 130, 145],  # beats/min (normal: 60-100)
-            "temperature": [
-                37.2,
-                38.5,
-                36.8,
-                39.2,
-                35.5,
-            ],  # Celsius (normal: 36.5-37.5, hypothermia <36)
-            "respiratory_rate": [16, 24, 14, 30, 35],  # breaths/min (normal: 12-20)
-            "wbc": [8.5, 15.2, 7.0, 18.5, 22.0],  # x10^9/L (normal: 4-11)
-            "lactate": [
-                1.2,
-                3.5,
-                0.9,
-                4.8,
-                6.5,
-            ],  # mmol/L (normal: <2, severe sepsis: >4)
-            "creatinine": [0.9, 1.8, 0.8, 2.5, 3.2],  # mg/dL (normal: 0.6-1.2)
-            "age": [45, 68, 35, 72, 78],  # years
-            "gender_encoded": [1, 0, 1, 1, 0],  # 1=Male, 0=Female
-        }
-    )
-
-    return patients
-
-
-def interpret_results(
-    predictions: np.ndarray, probabilities: np.ndarray, patient_features: pd.DataFrame
-) -> None:
-    """
-    Interpret and display prediction results.
-
-    Args:
-        predictions: Binary predictions (0=no sepsis, 1=sepsis)
-        probabilities: Probability scores
-        patient_features: Original patient features
-    """
-    print("\n" + "=" * 80)
-    print("SEPSIS PREDICTION RESULTS")
-    print("=" * 80)
-
-    for i in range(len(predictions)):
-        print(f"\nPatient {i+1}:")
-        print(f"  Risk Score: {probabilities[i]:.2%}")
-        print(f"  Prediction: {'SEPSIS RISK' if predictions[i] == 1 else 'Low Risk'}")
-
-        # Show key vital signs
-        print("  Key Features:")
-        print(f"    Heart Rate: {patient_features.iloc[i]['heart_rate']:.1f} bpm")
-        print(f"    Temperature: {patient_features.iloc[i]['temperature']:.1f}°C")
-        print(
-            f"    Respiratory Rate: {patient_features.iloc[i]['respiratory_rate']:.1f} /min"
-        )
-        print(f"    WBC: {patient_features.iloc[i]['wbc']:.1f} x10^9/L")
-        print(f"    Lactate: {patient_features.iloc[i]['lactate']:.1f} mmol/L")
-        print(f"    Creatinine: {patient_features.iloc[i]['creatinine']:.2f} mg/dL")
-
-        # Risk interpretation
-        if probabilities[i] >= 0.7:
-            risk_level = "HIGH"
-        elif probabilities[i] >= 0.4:
-            risk_level = "MODERATE"
-        else:
-            risk_level = "LOW"
-
-        print(f"  Clinical Interpretation: {risk_level} RISK")
-
-    print("\n" + "=" * 80)
-
-
-def main():
-    """Main inference pipeline."""
-    # Model path (relative to script location)
-    script_dir = Path(__file__).parent
-    model_path = script_dir / "models" / "sepsis_model.pkl"
-
-    print("=" * 80)
-    print("Sepsis Prediction Inference")
-    print("=" * 80 + "\n")
-
-    # Load model
-    model_data = load_model(model_path)
-
-    # Create example patients
-    print("\nCreating example patient data...")
-    patient_features = create_example_patients()
-    print(f"Number of patients: {len(patient_features)}")
-
-    # Make predictions
-    print("\nMaking predictions...")
-    predictions, probabilities = predict_sepsis(
-        model_data, patient_features, use_optimal_threshold=True
-    )
-
-    # Interpret results
-    interpret_results(predictions, probabilities, patient_features)
-
-    print("\n" + "=" * 80)
-    print("Inference complete!")
-    print("=" * 80)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/cookbook/sepsis_prediction_training.py b/cookbook/sepsis_prediction_training.py
deleted file mode 100644
index a0ea85ce..00000000
--- a/cookbook/sepsis_prediction_training.py
+++ /dev/null
@@ -1,1039 +0,0 @@
-#!/usr/bin/env python3
-"""
-Sepsis Prediction Training Script
-
-Trains Random Forest, XGBoost, and Logistic Regression models for sepsis prediction
-using MIMIC-IV clinical database data.
-
-Requirements:
-- pip install scikit-learn xgboost joblib pandas numpy
-
-Run:
-- python sepsis_prediction_training.py
-"""
-
-import pandas as pd
-import numpy as np
-from pathlib import Path
-from datetime import datetime
-from typing import Dict, Tuple, List, Any, Union
-
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.linear_model import LogisticRegression
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import StandardScaler
-from sklearn.metrics import (
-    accuracy_score,
-    precision_score,
-    recall_score,
-    f1_score,
-    roc_auc_score,
-    precision_recall_curve,
-)
-import xgboost as xgb
-import joblib
-
-
-# MIMIC-IV ItemID mappings for features
-CHARTEVENTS_ITEMIDS = {
-    "heart_rate": 220050,
-    "temperature_f": 223761,
-    "temperature_c": 223762,
-    "respiratory_rate": 220210,
-}
-
-LABEVENTS_ITEMIDS = {
-    "wbc": [51300, 51301],  # White Blood Cell Count
-    "lactate": 50813,
-    "creatinine": 50912,
-}
-
-# Sepsis ICD-10 codes
-SEPSIS_ICD10_CODES = [
-    "A41.9",  # Sepsis, unspecified organism
-    "A40",  # Streptococcal sepsis (starts with)
-    "A41",  # Other sepsis (starts with)
-    "R65.20",  # Severe sepsis without shock
-    "R65.21",  # Severe sepsis with shock
-    "R65.1",  # SIRS (Systemic Inflammatory Response Syndrome)
-    "A41.0",  # Sepsis due to Streptococcus, group A
-    "A41.1",  # Sepsis due to Streptococcus, group B
-    "A41.2",  # Sepsis due to other specified streptococci
-    "A41.3",  # Sepsis due to Haemophilus influenzae
-    "A41.4",  # Sepsis due to anaerobes
-    "A41.5",  # Sepsis due to other Gram-negative organisms
-    "A41.50",  # Sepsis due to unspecified Gram-negative organism
-    "A41.51",  # Sepsis due to Escherichia coli
-    "A41.52",  # Sepsis due to Pseudomonas
-    "A41.53",  # Sepsis due to Serratia
-    "A41.59",  # Sepsis due to other Gram-negative organisms
-    "A41.8",  # Other specified sepsis
-    "A41.81",  # Sepsis due to Enterococcus
-    "A41.89",  # Other specified sepsis
-]
-
-# Sepsis ICD-9 codes (for older data)
-SEPSIS_ICD9_CODES = [
-    "038",  # Septicemia (starts with)
-    "99591",  # Sepsis
-    "99592",  # Severe sepsis
-    "78552",  # Septic shock
-]
-
-
-def load_mimic_data(data_dir: str) -> Dict[str, pd.DataFrame]:
-    """
-    Load all required MIMIC-IV CSV tables.
-
-    Args:
-        data_dir: Path to MIMIC-IV dataset directory
-
-    Returns:
-        Dictionary mapping table names to DataFrames
-    """
-    data_dir = Path(data_dir)
-
-    print("Loading MIMIC-IV data...")
-
-    tables = {
-        "patients": pd.read_csv(
-            data_dir / "hosp" / "patients.csv.gz", compression="gzip", low_memory=False
-        ),
-        "admissions": pd.read_csv(
-            data_dir / "hosp" / "admissions.csv.gz",
-            compression="gzip",
-            low_memory=False,
-        ),
-        "icustays": pd.read_csv(
-            data_dir / "icu" / "icustays.csv.gz", compression="gzip", low_memory=False
-        ),
-        "chartevents": pd.read_csv(
-            data_dir / "icu" / "chartevents.csv.gz",
-            compression="gzip",
-            low_memory=False,
-        ),
-        "labevents": pd.read_csv(
-            data_dir / "hosp" / "labevents.csv.gz", compression="gzip", low_memory=False
-        ),
-        "diagnoses_icd": pd.read_csv(
-            data_dir / "hosp" / "diagnoses_icd.csv.gz",
-            compression="gzip",
-            low_memory=False,
-        ),
-    }
-
-    print(f"Loaded {len(tables)} tables")
-    for name, df in tables.items():
-        print(f"  {name}: {len(df)} rows")
-
-    return tables
-
-
-def extract_chartevents_features(
-    chartevents: pd.DataFrame, icustays: pd.DataFrame
-) -> pd.DataFrame:
-    """
-    Extract 2-3 vital signs from chartevents table.
-
-    Args:
-        chartevents: Chart events DataFrame
-        icustays: ICU stays DataFrame
-
-    Returns:
-        DataFrame with features per stay_id
-    """
-    print("Extracting chartevents features...")
-
-    # Filter to relevant itemids
-    relevant_itemids = list(CHARTEVENTS_ITEMIDS.values())
-    chartevents_filtered = chartevents[
-        chartevents["itemid"].isin(relevant_itemids)
-    ].copy()
-
-    # Merge with icustays to get stay times
-    chartevents_merged = chartevents_filtered.merge(
-        icustays[["stay_id", "intime", "outtime"]], on="stay_id", how="inner"
-    )
-
-    # Convert charttime to datetime
-    chartevents_merged["charttime"] = pd.to_datetime(chartevents_merged["charttime"])
-    chartevents_merged["intime"] = pd.to_datetime(chartevents_merged["intime"])
-
-    # Filter to first 24 hours of ICU stay
-    chartevents_merged = chartevents_merged[
-        (chartevents_merged["charttime"] >= chartevents_merged["intime"])
-        & (
-            chartevents_merged["charttime"]
-            <= chartevents_merged["intime"] + pd.Timedelta(hours=24)
-        )
-    ]
-
-    # Extract numeric values
-    chartevents_merged["valuenum"] = pd.to_numeric(
-        chartevents_merged["valuenum"], errors="coerce"
-    )
-
-    # Aggregate by stay_id and itemid (take mean)
-    features = []
-
-    for stay_id in icustays["stay_id"].unique():
-        stay_data = chartevents_merged[chartevents_merged["stay_id"] == stay_id]
-
-        feature_row = {"stay_id": stay_id}
-
-        # Heart Rate
-        hr_data = stay_data[stay_data["itemid"] == CHARTEVENTS_ITEMIDS["heart_rate"]][
-            "valuenum"
-        ]
-        feature_row["heart_rate"] = hr_data.mean() if not hr_data.empty else np.nan
-
-        # Temperature (prefer Celsius, convert Fahrenheit if needed)
-        temp_c = stay_data[stay_data["itemid"] == CHARTEVENTS_ITEMIDS["temperature_c"]][
-            "valuenum"
-        ]
-        temp_f = stay_data[stay_data["itemid"] == CHARTEVENTS_ITEMIDS["temperature_f"]][
-            "valuenum"
-        ]
-
-        if not temp_c.empty:
-            feature_row["temperature"] = temp_c.mean()
-        elif not temp_f.empty:
-            # Convert Fahrenheit to Celsius
-            feature_row["temperature"] = (temp_f.mean() - 32) * 5 / 9
-        else:
-            feature_row["temperature"] = np.nan
-
-        # Respiratory Rate
-        rr_data = stay_data[
-            stay_data["itemid"] == CHARTEVENTS_ITEMIDS["respiratory_rate"]
-        ]["valuenum"]
-        feature_row["respiratory_rate"] = (
-            rr_data.mean() if not rr_data.empty else np.nan
-        )
-
-        features.append(feature_row)
-
-    return pd.DataFrame(features)
-
-
-def extract_labevents_features(
-    labevents: pd.DataFrame, icustays: pd.DataFrame
-) -> pd.DataFrame:
-    """
-    Extract 2-3 lab values from labevents table.
-
-    Args:
-        labevents: Lab events DataFrame
-        icustays: ICU stays DataFrame
-
-    Returns:
-        DataFrame with features per stay_id
-    """
-    print("Extracting labevents features...")
-
-    # Get relevant itemids
-    relevant_itemids = [
-        LABEVENTS_ITEMIDS["lactate"],
-        LABEVENTS_ITEMIDS["creatinine"],
-    ] + LABEVENTS_ITEMIDS["wbc"]
-
-    labevents_filtered = labevents[labevents["itemid"].isin(relevant_itemids)].copy()
-
-    # Merge with icustays via admissions
-    # First need to get hadm_id from icustays
-    icustays_with_hadm = icustays[["stay_id", "hadm_id", "intime"]].copy()
-
-    # Labevents links via hadm_id, then we need to link to stay_id
-    labevents_merged = labevents_filtered.merge(
-        icustays_with_hadm, on="hadm_id", how="inner"
-    )
-
-    # Convert charttime to datetime
-    labevents_merged["charttime"] = pd.to_datetime(labevents_merged["charttime"])
-    labevents_merged["intime"] = pd.to_datetime(labevents_merged["intime"])
-
-    # Filter to first 24 hours of ICU stay
-    labevents_merged = labevents_merged[
-        (labevents_merged["charttime"] >= labevents_merged["intime"])
-        & (
-            labevents_merged["charttime"]
-            <= labevents_merged["intime"] + pd.Timedelta(hours=24)
-        )
-    ]
-
-    # Extract numeric values
-    labevents_merged["valuenum"] = pd.to_numeric(
-        labevents_merged["valuenum"], errors="coerce"
-    )
-
-    # Aggregate by stay_id and itemid
-    features = []
-
-    for stay_id in icustays["stay_id"].unique():
-        stay_data = labevents_merged[labevents_merged["stay_id"] == stay_id]
-
-        feature_row = {"stay_id": stay_id}
-
-        # WBC (check both itemids)
-        wbc_data = stay_data[stay_data["itemid"].isin(LABEVENTS_ITEMIDS["wbc"])][
-            "valuenum"
-        ]
-        feature_row["wbc"] = wbc_data.mean() if not wbc_data.empty else np.nan
-
-        # Lactate
-        lactate_data = stay_data[stay_data["itemid"] == LABEVENTS_ITEMIDS["lactate"]][
-            "valuenum"
-        ]
-        feature_row["lactate"] = (
-            lactate_data.mean() if not lactate_data.empty else np.nan
-        )
-
-        # Creatinine
-        creatinine_data = stay_data[
-            stay_data["itemid"] == LABEVENTS_ITEMIDS["creatinine"]
-        ]["valuenum"]
-        feature_row["creatinine"] = (
-            creatinine_data.mean() if not creatinine_data.empty else np.nan
-        )
-
-        features.append(feature_row)
-
-    return pd.DataFrame(features)
-
-
-def extract_demographics(
-    patients: pd.DataFrame, admissions: pd.DataFrame, icustays: pd.DataFrame
-) -> pd.DataFrame:
-    """
-    Extract age and gender from patients table.
-
-    Args:
-        patients: Patients DataFrame
-        admissions: Admissions DataFrame (not used, kept for compatibility)
-        icustays: ICU stays DataFrame
-
-    Returns:
-        DataFrame with demographics per stay_id
-    """
-    print("Extracting demographics...")
-
-    # icustays already has subject_id, so merge directly with patients
-    icustays_with_patient = icustays[["stay_id", "subject_id"]].merge(
-        patients[["subject_id", "gender", "anchor_age"]], on="subject_id", how="left"
-    )
-
-    # Use anchor_age if available, otherwise calculate from anchor_year and anchor_age
-    # For demo data, anchor_age should be available
-    demographics = icustays_with_patient[["stay_id", "anchor_age", "gender"]].copy()
-    demographics.rename(columns={"anchor_age": "age"}, inplace=True)
-
-    # Encode gender (M=1, F=0)
-    demographics["gender_encoded"] = (demographics["gender"] == "M").astype(int)
-
-    return demographics[["stay_id", "age", "gender_encoded"]]
-
-
-def extract_sepsis_labels(
-    diagnoses_icd: pd.DataFrame, icustays: pd.DataFrame
-) -> pd.DataFrame:
-    """
-    Extract sepsis labels from diagnoses_icd table.
-    Checks both ICD-9 and ICD-10 codes to maximize positive samples.
-
-    Args:
-        diagnoses_icd: Diagnoses ICD DataFrame
-        icustays: ICU stays DataFrame
-
-    Returns:
-        DataFrame with sepsis labels per stay_id
-    """
-    print("Extracting sepsis labels...")
-
-    # Check what ICD versions are available
-    icd_versions = diagnoses_icd["icd_version"].unique()
-    print(f"  Available ICD versions: {sorted(icd_versions)}")
-
-    all_sepsis_diagnoses = []
-
-    # Check ICD-10 codes
-    if 10 in icd_versions:
-        diagnoses_icd10 = diagnoses_icd[diagnoses_icd["icd_version"] == 10].copy()
-        print(f"  ICD-10 diagnoses: {len(diagnoses_icd10)} rows")
-
-        sepsis_mask = pd.Series(
-            [False] * len(diagnoses_icd10), index=diagnoses_icd10.index
-        )
-
-        for code in SEPSIS_ICD10_CODES:
-            if "." not in code or code.endswith("."):
-                # Pattern match (e.g., "A40" matches "A40.x")
-                code_prefix = code.rstrip(".")
-                mask = diagnoses_icd10["icd_code"].str.startswith(code_prefix, na=False)
-                sepsis_mask |= mask
-                if mask.sum() > 0:
-                    print(
-                        f"    Found {mask.sum()} ICD-10 diagnoses matching pattern '{code}'"
-                    )
-            else:
-                # Exact match
-                mask = diagnoses_icd10["icd_code"] == code
-                sepsis_mask |= mask
-                if mask.sum() > 0:
-                    print(
-                        f"    Found {mask.sum()} ICD-10 diagnoses with exact code '{code}'"
-                    )
-
-        sepsis_icd10 = diagnoses_icd10[sepsis_mask].copy()
-        if len(sepsis_icd10) > 0:
-            all_sepsis_diagnoses.append(sepsis_icd10)
-            print(f"  Total ICD-10 sepsis diagnoses: {len(sepsis_icd10)}")
-
-    # Check ICD-9 codes
-    if 9 in icd_versions:
-        diagnoses_icd9 = diagnoses_icd[diagnoses_icd["icd_version"] == 9].copy()
-        print(f"  ICD-9 diagnoses: {len(diagnoses_icd9)} rows")
-
-        sepsis_mask = pd.Series(
-            [False] * len(diagnoses_icd9), index=diagnoses_icd9.index
-        )
-
-        for code in SEPSIS_ICD9_CODES:
-            if len(code) <= 3 or code.endswith("."):
-                # Pattern match (e.g., "038" matches "038.x")
-                code_prefix = code.rstrip(".")
-                mask = diagnoses_icd9["icd_code"].str.startswith(code_prefix, na=False)
-                sepsis_mask |= mask
-                if mask.sum() > 0:
-                    print(
-                        f"    Found {mask.sum()} ICD-9 diagnoses matching pattern '{code}'"
-                    )
-            else:
-                # Exact match
-                mask = diagnoses_icd9["icd_code"] == code
-                sepsis_mask |= mask
-                if mask.sum() > 0:
-                    print(
-                        f"    Found {mask.sum()} ICD-9 diagnoses with exact code '{code}'"
-                    )
-
-        sepsis_icd9 = diagnoses_icd9[sepsis_mask].copy()
-        if len(sepsis_icd9) > 0:
-            all_sepsis_diagnoses.append(sepsis_icd9)
-            print(f"  Total ICD-9 sepsis diagnoses: {len(sepsis_icd9)}")
-
-    # Combine all sepsis diagnoses
-    if all_sepsis_diagnoses:
-        sepsis_diagnoses = pd.concat(all_sepsis_diagnoses, ignore_index=True)
-        print(f"  Total sepsis diagnoses (ICD-9 + ICD-10): {len(sepsis_diagnoses)}")
-
-        if len(sepsis_diagnoses) > 0:
-            print(
-                f"  Sample sepsis ICD codes: {sepsis_diagnoses['icd_code'].unique()[:15].tolist()}"
-            )
-            print(
-                f"  Unique hadm_id with sepsis: {sepsis_diagnoses['hadm_id'].nunique()}"
-            )
-    else:
-        sepsis_diagnoses = pd.DataFrame(columns=diagnoses_icd.columns)
-        print("  No sepsis diagnoses found")
-
-    # Merge with icustays to get stay_id
-    icustays_with_hadm = icustays[["stay_id", "hadm_id"]].copy()
-
-    if len(sepsis_diagnoses) > 0:
-        sepsis_labels = icustays_with_hadm.merge(
-            sepsis_diagnoses[["hadm_id"]].drop_duplicates(),
-            on="hadm_id",
-            how="left",
-            indicator=True,
-        )
-    else:
-        sepsis_labels = icustays_with_hadm.copy()
-        sepsis_labels["_merge"] = "left_only"
-
-    # Create binary label (1 if sepsis, 0 otherwise)
-    sepsis_labels["sepsis"] = (sepsis_labels["_merge"] == "both").astype(int)
-
-    sepsis_count = sepsis_labels["sepsis"].sum()
-    print(
-        f"  ICU stays with sepsis: {sepsis_count}/{len(sepsis_labels)} ({sepsis_count/len(sepsis_labels)*100:.2f}%)"
-    )
-
-    return sepsis_labels[["stay_id", "sepsis"]]
-
-
-def print_feature_summary(X: pd.DataFrame):
-    """Print feature statistics with FHIR mapping information.
-
-    Args:
-        X: Feature matrix with actual data
-    """
-    print("\n" + "=" * 120)
-    print("FEATURE SUMMARY: MIMIC-IV → Model → FHIR Mapping")
-    print("=" * 120)
-
-    # Define FHIR mappings for each feature
-    fhir_mappings = {
-        "heart_rate": {
-            "mimic_table": "chartevents",
-            "mimic_itemid": "220050",
-            "fhir_resource": "Observation",
-            "fhir_code": "8867-4",
-            "fhir_system": "LOINC",
-            "fhir_display": "Heart rate",
-        },
-        "temperature": {
-            "mimic_table": "chartevents",
-            "mimic_itemid": "223762/223761",
-            "fhir_resource": "Observation",
-            "fhir_code": "8310-5",
-            "fhir_system": "LOINC",
-            "fhir_display": "Body temperature",
-        },
-        "respiratory_rate": {
-            "mimic_table": "chartevents",
-            "mimic_itemid": "220210",
-            "fhir_resource": "Observation",
-            "fhir_code": "9279-1",
-            "fhir_system": "LOINC",
-            "fhir_display": "Respiratory rate",
-        },
-        "wbc": {
-            "mimic_table": "labevents",
-            "mimic_itemid": "51300/51301",
-            "fhir_resource": "Observation",
-            "fhir_code": "6690-2",
-            "fhir_system": "LOINC",
-            "fhir_display": "Leukocytes [#/volume] in Blood",
-        },
-        "lactate": {
-            "mimic_table": "labevents",
-            "mimic_itemid": "50813",
-            "fhir_resource": "Observation",
-            "fhir_code": "2524-7",
-            "fhir_system": "LOINC",
-            "fhir_display": "Lactate [Moles/volume] in Blood",
-        },
-        "creatinine": {
-            "mimic_table": "labevents",
-            "mimic_itemid": "50912",
-            "fhir_resource": "Observation",
-            "fhir_code": "2160-0",
-            "fhir_system": "LOINC",
-            "fhir_display": "Creatinine [Mass/volume] in Serum or Plasma",
-        },
-        "age": {
-            "mimic_table": "patients",
-            "mimic_itemid": "anchor_age",
-            "fhir_resource": "Patient",
-            "fhir_code": "birthDate",
-            "fhir_system": "FHIR Core",
-            "fhir_display": "Patient birth date (calculate age)",
-        },
-        "gender_encoded": {
-            "mimic_table": "patients",
-            "mimic_itemid": "gender",
-            "fhir_resource": "Patient",
-            "fhir_code": "gender",
-            "fhir_system": "FHIR Core",
-            "fhir_display": "Administrative Gender (M/F)",
-        },
-    }
-
-    print(
-        f"\n{'Feature':<20} {'Mean±SD':<20} {'MIMIC Source':<20} {'FHIR Resource':<20} {'FHIR Code (System)':<30}"
-    )
-    print("-" * 120)
-
-    for feature in X.columns:
-        mapping = fhir_mappings.get(feature, {})
-
-        # Calculate statistics
-        mean_val = X[feature].mean()
-        std_val = X[feature].std()
-
-        # Format based on feature type
-        if feature == "gender_encoded":
-            stats = f"{mean_val:.2f} (M={X[feature].sum():.0f})"
-        else:
-            stats = f"{mean_val:.2f}±{std_val:.2f}"
-
-        mimic_source = f"{mapping.get('mimic_table', 'N/A')} ({mapping.get('mimic_itemid', 'N/A')})"
-        fhir_resource = mapping.get("fhir_resource", "N/A")
-        fhir_code = (
-            f"{mapping.get('fhir_code', 'N/A')} ({mapping.get('fhir_system', 'N/A')})"
-        )
-
-        print(
-            f"{feature:<20} {stats:<20} {mimic_source:<20} {fhir_resource:<20} {fhir_code:<30}"
-        )
-
-    print("\n" + "=" * 120)
-    print(
-        "Note: Statistics calculated from first 24 hours of ICU stay. Missing values imputed with median."
-    )
-    print("=" * 120 + "\n")
-
-
-def create_feature_matrix(
-    chartevents_features: pd.DataFrame,
-    labevents_features: pd.DataFrame,
-    demographics: pd.DataFrame,
-    sepsis_labels: pd.DataFrame,
-) -> Tuple[pd.DataFrame, pd.Series]:
-    """
-    Create feature matrix and labels from extracted features.
-
-    Args:
-        chartevents_features: Chart events features
-        labevents_features: Lab events features
-        demographics: Demographics features
-        sepsis_labels: Sepsis labels
-
-    Returns:
-        Tuple of (feature matrix, labels)
-    """
-    print("Creating feature matrix...")
-
-    # Merge all features on stay_id
-    features = (
-        chartevents_features.merge(labevents_features, on="stay_id", how="outer")
-        .merge(demographics, on="stay_id", how="outer")
-        .merge(sepsis_labels, on="stay_id", how="inner")
-    )
-
-    # Select feature columns (exclude stay_id and sepsis)
-    feature_cols = [
-        "heart_rate",
-        "temperature",
-        "respiratory_rate",
-        "wbc",
-        "lactate",
-        "creatinine",
-        "age",
-        "gender_encoded",
-    ]
-
-    X = features[feature_cols].copy()
-    y = features["sepsis"].copy()
-
-    print(f"Feature matrix shape: {X.shape}")
-    print(f"Sepsis cases: {y.sum()} ({y.sum() / len(y) * 100:.2f}%)")
-
-    return X, y
-
-
-def train_models(X_train: pd.DataFrame, y_train: pd.Series) -> Dict[str, Any]:
-    """
-    Train all three models (Random Forest, XGBoost, Logistic Regression).
-
-    Args:
-        X_train: Training features
-        y_train: Training labels
-
-    Returns:
-        Dictionary of trained models
-    """
-    print("\nTraining models...")
-
-    models = {}
-
-    # Check if we have any positive samples
-    positive_samples = y_train.sum()
-    total_samples = len(y_train)
-    positive_rate = positive_samples / total_samples if total_samples > 0 else 0.0
-
-    print(
-        f"  Positive samples: {positive_samples}/{total_samples} ({positive_rate*100:.2f}%)"
-    )
-
-    # Random Forest - use class_weight to handle imbalance
-    print("  Training Random Forest...")
-    rf = RandomForestClassifier(
-        n_estimators=100,
-        random_state=42,
-        n_jobs=-1,
-        class_weight="balanced",  # Automatically adjust for class imbalance
-    )
-    rf.fit(X_train, y_train)
-    models["RandomForest"] = rf
-
-    # XGBoost - handle case with no positive samples
-    print("  Training XGBoost...")
-    if positive_samples == 0:
-        # When there are no positive samples, set base_score to a small value
-        # and use scale_pos_weight to avoid errors
-        xgb_model = xgb.XGBClassifier(
-            random_state=42,
-            n_jobs=-1,
-            eval_metric="logloss",
-            base_score=0.01,  # Small positive value instead of 0
-            scale_pos_weight=1.0,
-        )
-    else:
-        # Calculate scale_pos_weight for imbalanced data
-        scale_pos_weight = (total_samples - positive_samples) / positive_samples
-        xgb_model = xgb.XGBClassifier(
-            random_state=42,
-            n_jobs=-1,
-            eval_metric="logloss",
-            scale_pos_weight=scale_pos_weight,
-        )
-    xgb_model.fit(X_train, y_train)
-    models["XGBoost"] = xgb_model
-
-    # Logistic Regression (with scaling) - use class_weight to handle imbalance
-    print("  Training Logistic Regression...")
-    scaler = StandardScaler()
-    X_train_scaled = scaler.fit_transform(X_train)
-    lr = LogisticRegression(
-        random_state=42,
-        max_iter=1000,
-        class_weight="balanced",  # Automatically adjust for class imbalance
-    )
-    lr.fit(X_train_scaled, y_train)
-    models["LogisticRegression"] = lr
-    models["scaler"] = scaler  # Store scaler for later use
-
-    return models
-
-
-def evaluate_models(
-    models: Dict[str, Any],
-    X_test: pd.DataFrame,
-    y_test: pd.Series,
-    feature_names: List[str],
-) -> Dict[str, Dict[str, float]]:
-    """
-    Evaluate and compare all models.
-
-    Args:
-        models: Dictionary of trained models
-        X_test: Test features
-        y_test: Test labels
-        feature_names: List of feature names
-
-    Returns:
-        Dictionary of evaluation metrics for each model
-    """
-    print("\nEvaluating models...")
-    print(
-        f"Test set: {len(y_test)} samples, {y_test.sum()} positive ({y_test.sum()/len(y_test)*100:.2f}%)"
-    )
-
-    results = {}
-
-    for name, model in models.items():
-        if name == "scaler":
-            continue
-
-        # Get probability predictions
-        if name == "LogisticRegression":
-            X_test_scaled = models["scaler"].transform(X_test)
-            y_pred_proba = model.predict_proba(X_test_scaled)[:, 1]
-        else:
-            y_pred_proba = model.predict_proba(X_test)[:, 1]
-
-        # Use default threshold (0.5) for predictions
-        y_pred = (y_pred_proba >= 0.5).astype(int)
-
-        # Calculate metrics with default threshold
-        metrics = {
-            "accuracy": accuracy_score(y_test, y_pred),
-            "precision": precision_score(y_test, y_pred, zero_division=0),
-            "recall": recall_score(y_test, y_pred, zero_division=0),
-            "f1": f1_score(y_test, y_pred, zero_division=0),
-            "auc": roc_auc_score(y_test, y_pred_proba)
-            if len(np.unique(y_test)) > 1
-            else 0.0,
-        }
-
-        # Try to find optimal threshold for F1 score
-        if len(np.unique(y_test)) > 1 and y_test.sum() > 0:
-            precision, recall, thresholds = precision_recall_curve(y_test, y_pred_proba)
-            f1_scores = 2 * (precision * recall) / (precision + recall + 1e-10)
-            optimal_idx = np.argmax(f1_scores)
-            optimal_threshold = (
-                thresholds[optimal_idx] if optimal_idx < len(thresholds) else 0.5
-            )
-            optimal_f1 = f1_scores[optimal_idx]
-
-            # Predictions with optimal threshold
-            y_pred_optimal = (y_pred_proba >= optimal_threshold).astype(int)
-            metrics["optimal_threshold"] = optimal_threshold
-            metrics["optimal_f1"] = optimal_f1
-            metrics["optimal_precision"] = precision_score(
-                y_test, y_pred_optimal, zero_division=0
-            )
-            metrics["optimal_recall"] = recall_score(
-                y_test, y_pred_optimal, zero_division=0
-            )
-        else:
-            metrics["optimal_threshold"] = 0.5
-            metrics["optimal_f1"] = 0.0
-            metrics["optimal_precision"] = 0.0
-            metrics["optimal_recall"] = 0.0
-
-        results[name] = metrics
-
-        print(f"\n{name}:")
-        print(
-            f"  Predictions: {y_pred.sum()} positive predicted (actual: {y_test.sum()})"
-        )
-        print(f"  Accuracy:  {metrics['accuracy']:.4f}")
-        print(f"  Precision: {metrics['precision']:.4f}")
-        print(f"  Recall:    {metrics['recall']:.4f}")
-        print(f"  F1-score:  {metrics['f1']:.4f}")
-        print(f"  AUC-ROC:   {metrics['auc']:.4f}")
-        if metrics["optimal_f1"] > 0:
-            print(f"  Optimal threshold: {metrics['optimal_threshold']:.4f}")
-            print(f"  Optimal F1-score:  {metrics['optimal_f1']:.4f}")
-            print(f"  Optimal Precision: {metrics['optimal_precision']:.4f}")
-            print(f"  Optimal Recall:    {metrics['optimal_recall']:.4f}")
-
-        # Show feature importance for tree-based models
-        if hasattr(model, "feature_importances_"):
-            print("\n  Top 5 Feature Importances:")
-            importances = model.feature_importances_
-            indices = np.argsort(importances)[::-1][:5]
-            for idx in indices:
-                print(f"    {feature_names[idx]}: {importances[idx]:.4f}")
-
-    return results
-
-
-def select_best_model(
-    models: Dict[str, Any],
-    results: Dict[str, Dict[str, float]],
-    metric: str = "f1",
-) -> Tuple[str, Any, Dict[str, float]]:
-    """
-    Select best model based on specified metric.
-
-    Args:
-        models: Dictionary of trained models
-        results: Evaluation results
-        metric: Metric to optimize ("f1", "recall", "precision", "auc")
-
-    Returns:
-        Tuple of (best model name, best model, best metrics)
-    """
-    print(f"\nSelecting best model based on {metric}...")
-
-    # Get the appropriate metric value (prefer optimal if available)
-    def get_metric_value(metrics, metric_name):
-        if metric_name == "f1":
-            return metrics.get("optimal_f1", metrics["f1"])
-        elif metric_name == "recall":
-            return metrics.get("optimal_recall", metrics["recall"])
-        elif metric_name == "precision":
-            return metrics.get("optimal_precision", metrics["precision"])
-        elif metric_name == "auc":
-            return metrics.get("auc", 0.0)
-        else:
-            return metrics.get("optimal_f1", metrics["f1"])
-
-    best_name = max(results.keys(), key=lambda k: get_metric_value(results[k], metric))
-    best_model = models[best_name]
-    best_metrics = results[best_name]
-
-    best_value = get_metric_value(best_metrics, metric)
-    print(f"Best model: {best_name} ({metric}: {best_value:.4f})")
-
-    return best_name, best_model, best_metrics
-
-
-def save_model(
-    model: Any,
-    model_name: str,
-    feature_names: List[str],
-    metrics: Dict[str, float],
-    scaler: Any,
-    output_path: Union[str, Path],
-) -> None:
-    """
-    Save the best model with metadata.
-
-    Args:
-        model: Trained model
-        model_name: Name of the model
-        feature_names: List of feature names
-        metrics: Evaluation metrics
-        scaler: StandardScaler (if Logistic Regression, None otherwise)
-        output_path: Path to save model
-    """
-    print(f"\nSaving model to {output_path}...")
-
-    # Create output directory if it doesn't exist
-    output_path = Path(output_path)
-    output_path.parent.mkdir(parents=True, exist_ok=True)
-
-    # Prepare metadata
-    metadata = {
-        "model_name": model_name,
-        "training_date": datetime.now().isoformat(),
-        "feature_names": feature_names,
-        "metrics": metrics,
-        "itemid_mappings": {
-            "chartevents": CHARTEVENTS_ITEMIDS,
-            "labevents": LABEVENTS_ITEMIDS,
-        },
-        "sepsis_icd_codes": {
-            "icd10": SEPSIS_ICD10_CODES,
-            "icd9": SEPSIS_ICD9_CODES,
-        },
-    }
-
-    # Save model and metadata
-    model_data = {
-        "model": model,
-        "scaler": scaler,
-        "metadata": metadata,
-    }
-
-    joblib.dump(model_data, output_path)
-
-    print("Model saved successfully!")
-
-
-def main():
-    """Main training pipeline."""
-    # Data directory
-    data_dir = "../datasets/mimic-iv-clinical-database-demo-2.2"
-
-    # Output path (relative to script location)
-    script_dir = Path(__file__).parent
-    output_path = script_dir / "models" / "sepsis_model.pkl"
-
-    print("=" * 60)
-    print("Sepsis Prediction Model Training")
-    print("=" * 60)
-
-    # Load data
-    tables = load_mimic_data(data_dir)
-
-    # Extract features
-    chartevents_features = extract_chartevents_features(
-        tables["chartevents"], tables["icustays"]
-    )
-    labevents_features = extract_labevents_features(
-        tables["labevents"], tables["icustays"]
-    )
-    demographics = extract_demographics(
-        tables["patients"], tables["admissions"], tables["icustays"]
-    )
-
-    # Extract labels
-    sepsis_labels = extract_sepsis_labels(tables["diagnoses_icd"], tables["icustays"])
-
-    # Create feature matrix
-    X, y = create_feature_matrix(
-        chartevents_features,
-        labevents_features,
-        demographics,
-        sepsis_labels,
-    )
-
-    # Handle missing values (impute with median)
-    print("\nHandling missing values...")
-    missing_before = X.isnull().sum().sum()
-    print(f"  Missing values before imputation: {missing_before}")
-    X = X.fillna(X.median())
-
-    # Print feature summary with actual data statistics
-    print_feature_summary(X)
-
-    # Split data with careful stratification to ensure positive samples in both sets
-    print("\nSplitting data...")
-    if len(np.unique(y)) > 1 and y.sum() > 0:
-        # Use stratification to ensure positive samples in both train and test
-        X_train, X_test, y_train, y_test = train_test_split(
-            X, y, test_size=0.2, random_state=42, stratify=y
-        )
-        print(
-            f"  Training set: {len(X_train)} samples ({y_train.sum()} positive, {y_train.sum()/len(y_train)*100:.2f}%)"
-        )
-        print(
-            f"  Test set: {len(X_test)} samples ({y_test.sum()} positive, {y_test.sum()/len(y_test)*100:.2f}%)"
-        )
-
-        # Warn if test set has no positive samples (shouldn't happen with stratify, but check anyway)
-        if y_test.sum() == 0:
-            print(
-                "  WARNING: Test set has no positive samples! Consider using a different random seed."
-            )
-    else:
-        print(
-            "  Warning: No positive samples or only one class. Skipping stratification."
-        )
-        X_train, X_test, y_train, y_test = train_test_split(
-            X, y, test_size=0.2, random_state=42
-        )
-        print(f"  Training set: {len(X_train)} samples")
-        print(f"  Test set: {len(X_test)} samples")
-
-    # Apply oversampling to training data to balance classes
-    print("\nApplying oversampling to training data...")
-    try:
-        from imblearn.over_sampling import SMOTE
-
-        # Only apply SMOTE if we have positive samples
-        if y_train.sum() > 0 and len(np.unique(y_train)) > 1:
-            print(
-                f"  Before oversampling: {len(X_train)} samples ({y_train.sum()} positive, {y_train.sum()/len(y_train)*100:.2f}%)"
-            )
-            # Ensure k_neighbors doesn't exceed available positive samples
-            k_neighbors = min(5, max(1, y_train.sum() - 1))
-            smote = SMOTE(random_state=42, k_neighbors=k_neighbors)
-            X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)
-            print(
-                f"  After oversampling: {len(X_train_resampled)} samples ({y_train_resampled.sum()} positive, {y_train_resampled.sum()/len(X_train_resampled)*100:.2f}%)"
-            )
-            X_train = pd.DataFrame(
-                X_train_resampled,
-                columns=X_train.columns,
-                index=X_train.index[: len(X_train_resampled)],
-            )
-            y_train = pd.Series(
-                y_train_resampled, index=y_train.index[: len(y_train_resampled)]
-            )
-        else:
-            print("  Skipping oversampling: insufficient positive samples")
-    except (ImportError, ModuleNotFoundError) as e:
-        print(
-            "  imbalanced-learn not installed. Install with: pip install imbalanced-learn"
-        )
-        print(f"  Error: {e}")
-        print("  Proceeding without oversampling...")
-
-    # Train models
-    models = train_models(X_train, y_train)
-
-    # Evaluate models
-    feature_names = X.columns.tolist()
-    results = evaluate_models(models, X_test, y_test, feature_names)
-
-    # Select best model (can change metric: "f1", "recall", "precision", "auc")
-    # For sepsis prediction, recall (sensitivity) is often most important
-    best_name, best_model, best_metrics = select_best_model(
-        models, results, metric="f1"
-    )
-
-    # Save best model
-    scaler = models.get("scaler")
-    save_model(
-        best_model,
-        best_name,
-        feature_names,
-        best_metrics,
-        scaler,
-        output_path,
-    )
-
-    print("\n" + "=" * 60)
-    print("Training complete!")
-    print("=" * 60)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/notebooks/fhir_ml_workflow.ipynb b/notebooks/fhir_ml_workflow.ipynb
new file mode 100644
index 00000000..5a73c1dd
--- /dev/null
+++ b/notebooks/fhir_ml_workflow.ipynb
@@ -0,0 +1,567 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from healthchain.sandbox.loaders import MimicOnFHIRLoader\n",
+    "from healthchain.io import Dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "MIMIC_DIR = \"../../datasets/mimic-iv-clinical-database-demo-on-fhir-2.1.0/\"\n",
+    "RESOURCES_TO_LOAD = [\"MimicObservationChartevents\", \"MimicObservationLabevents\", \"MimicPatient\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO: 2025-11-27 18:55:49,156 [healthchain.sandbox.loaders.mimic]: Loaded 668862 resources from MimicObservationChartevents.ndjson.gz\n",
+      "INFO: 2025-11-27 18:55:54,360 [healthchain.sandbox.loaders.mimic]: Loaded 107727 resources from MimicObservationLabevents.ndjson.gz\n",
+      "INFO: 2025-11-27 18:55:54,366 [healthchain.sandbox.loaders.mimic]: Loaded 100 resources from MimicPatient.ndjson.gz\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Load MIMIC data as single bundle dict (fast, no validation)\n",
+    "loader = MimicOnFHIRLoader()\n",
+    "bundle = loader.load(\n",
+    "    data_dir=MIMIC_DIR,\n",
+    "    resource_types=RESOURCES_TO_LOAD,\n",
+    "    as_dict=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "776689"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(bundle[\"entry\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Convert to DataFrame using schema\n",
+    "tabular = Dataset.from_fhir_bundle(\n",
+    "    bundle, \n",
+    "    schema=\"../healthchain/configs/features/sepsis_vitals.yaml\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'patient_ref': 'object',\n",
+       " 'heart_rate': 'float64',\n",
+       " 'temperature': 'float64',\n",
+       " 'respiratory_rate': 'float64',\n",
+       " 'wbc': 'float64',\n",
+       " 'lactate': 'float64',\n",
+       " 'creatinine': 'float64',\n",
+       " 'age': 'int64',\n",
+       " 'gender_encoded': 'int64'}"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tabular.dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "100"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(tabular)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>patient_ref</th>\n",
+       "      <th>heart_rate</th>\n",
+       "      <th>temperature</th>\n",
+       "      <th>respiratory_rate</th>\n",
+       "      <th>wbc</th>\n",
+       "      <th>lactate</th>\n",
+       "      <th>creatinine</th>\n",
+       "      <th>age</th>\n",
+       "      <th>gender_encoded</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Patient/0a8eebfd-a352-522e-89f0-1d4a13abdebc</td>\n",
+       "      <td>96.500000</td>\n",
+       "      <td>98.966667</td>\n",
+       "      <td>20.700000</td>\n",
+       "      <td>5.815385</td>\n",
+       "      <td>31.0</td>\n",
+       "      <td>0.466667</td>\n",
+       "      <td>52</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Patient/0c2243d2-987b-5cbd-8eb1-170a80647693</td>\n",
+       "      <td>85.379310</td>\n",
+       "      <td>98.135294</td>\n",
+       "      <td>12.103448</td>\n",
+       "      <td>7.737500</td>\n",
+       "      <td>66.5</td>\n",
+       "      <td>0.594937</td>\n",
+       "      <td>57</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Patient/13df78e7-150e-5eb7-be5f-5f62b2baee87</td>\n",
+       "      <td>104.307692</td>\n",
+       "      <td>98.375000</td>\n",
+       "      <td>15.961538</td>\n",
+       "      <td>8.337500</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.966667</td>\n",
+       "      <td>66</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Patient/158f3a39-e3d7-5e7a-93aa-57af894aadd9</td>\n",
+       "      <td>99.739130</td>\n",
+       "      <td>98.390000</td>\n",
+       "      <td>18.358696</td>\n",
+       "      <td>11.509091</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.628571</td>\n",
+       "      <td>40</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Patient/1ab119a5-aac8-5002-9d2f-b8ff69623387</td>\n",
+       "      <td>93.402985</td>\n",
+       "      <td>98.824242</td>\n",
+       "      <td>21.753731</td>\n",
+       "      <td>15.578571</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.646667</td>\n",
+       "      <td>34</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>Patient/1bb918ba-e04e-5e7a-87ca-dbcbbb4c72c3</td>\n",
+       "      <td>78.181818</td>\n",
+       "      <td>98.725000</td>\n",
+       "      <td>16.961538</td>\n",
+       "      <td>22.906250</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.756667</td>\n",
+       "      <td>38</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>Patient/1cf9e585-806c-513b-80af-4ca565a28231</td>\n",
+       "      <td>104.666667</td>\n",
+       "      <td>98.810000</td>\n",
+       "      <td>19.388889</td>\n",
+       "      <td>12.520952</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>3.847321</td>\n",
+       "      <td>53</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>Patient/22a3e422-663a-561c-b305-a0c04bf42235</td>\n",
+       "      <td>69.050000</td>\n",
+       "      <td>98.800000</td>\n",
+       "      <td>16.900000</td>\n",
+       "      <td>10.754545</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1.353846</td>\n",
+       "      <td>87</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>Patient/23069939-0c4c-517b-a3ec-baae0d4e3988</td>\n",
+       "      <td>78.694444</td>\n",
+       "      <td>98.325000</td>\n",
+       "      <td>16.055556</td>\n",
+       "      <td>9.383333</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.860000</td>\n",
+       "      <td>52</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>Patient/23f959c1-6ac2-562b-9cbe-c111f338e27b</td>\n",
+       "      <td>87.184524</td>\n",
+       "      <td>98.827091</td>\n",
+       "      <td>17.251497</td>\n",
+       "      <td>14.954054</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.642105</td>\n",
+       "      <td>51</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                    patient_ref  heart_rate  temperature  \\\n",
+       "0  Patient/0a8eebfd-a352-522e-89f0-1d4a13abdebc   96.500000    98.966667   \n",
+       "1  Patient/0c2243d2-987b-5cbd-8eb1-170a80647693   85.379310    98.135294   \n",
+       "2  Patient/13df78e7-150e-5eb7-be5f-5f62b2baee87  104.307692    98.375000   \n",
+       "3  Patient/158f3a39-e3d7-5e7a-93aa-57af894aadd9   99.739130    98.390000   \n",
+       "4  Patient/1ab119a5-aac8-5002-9d2f-b8ff69623387   93.402985    98.824242   \n",
+       "5  Patient/1bb918ba-e04e-5e7a-87ca-dbcbbb4c72c3   78.181818    98.725000   \n",
+       "6  Patient/1cf9e585-806c-513b-80af-4ca565a28231  104.666667    98.810000   \n",
+       "7  Patient/22a3e422-663a-561c-b305-a0c04bf42235   69.050000    98.800000   \n",
+       "8  Patient/23069939-0c4c-517b-a3ec-baae0d4e3988   78.694444    98.325000   \n",
+       "9  Patient/23f959c1-6ac2-562b-9cbe-c111f338e27b   87.184524    98.827091   \n",
+       "\n",
+       "   respiratory_rate        wbc  lactate  creatinine  age  gender_encoded  \n",
+       "0         20.700000   5.815385     31.0    0.466667   52               0  \n",
+       "1         12.103448   7.737500     66.5    0.594937   57               1  \n",
+       "2         15.961538   8.337500      NaN    0.966667   66               1  \n",
+       "3         18.358696  11.509091      NaN    0.628571   40               0  \n",
+       "4         21.753731  15.578571      NaN    0.646667   34               1  \n",
+       "5         16.961538  22.906250      NaN    0.756667   38               0  \n",
+       "6         19.388889  12.520952      NaN    3.847321   53               1  \n",
+       "7         16.900000  10.754545      NaN    1.353846   87               1  \n",
+       "8         16.055556   9.383333      NaN    0.860000   52               1  \n",
+       "9         17.251497  14.954054      NaN    0.642105   51               1  "
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = tabular.data\n",
+    "df.head(10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "100"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import joblib\n",
+    "MODEL_PATH = '../cookbook/models/sepsis_model.pkl'\n",
+    "model_data = joblib.load(MODEL_PATH)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'model': XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
+       "               colsample_bylevel=None, colsample_bynode=None,\n",
+       "               colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
+       "               enable_categorical=False, eval_metric='logloss',\n",
+       "               feature_types=None, feature_weights=None, gamma=None,\n",
+       "               grow_policy=None, importance_type=None,\n",
+       "               interaction_constraints=None, learning_rate=None, max_bin=None,\n",
+       "               max_cat_threshold=None, max_cat_to_onehot=None,\n",
+       "               max_delta_step=None, max_depth=None, max_leaves=None,\n",
+       "               min_child_weight=None, missing=nan, monotone_constraints=None,\n",
+       "               multi_strategy=None, n_estimators=None, n_jobs=-1,\n",
+       "               num_parallel_tree=None, ...),\n",
+       " 'scaler': StandardScaler(),\n",
+       " 'metadata': {'model_name': 'XGBoost',\n",
+       "  'training_date': '2025-11-22T13:52:14.144052',\n",
+       "  'feature_names': ['heart_rate',\n",
+       "   'temperature',\n",
+       "   'respiratory_rate',\n",
+       "   'wbc',\n",
+       "   'lactate',\n",
+       "   'creatinine',\n",
+       "   'age',\n",
+       "   'gender_encoded'],\n",
+       "  'metrics': {'accuracy': 0.8214285714285714,\n",
+       "   'precision': 0.5,\n",
+       "   'recall': 0.2,\n",
+       "   'f1': 0.2857142857142857,\n",
+       "   'auc': 0.7391304347826086,\n",
+       "   'optimal_threshold': 0.19611828,\n",
+       "   'optimal_f1': 0.5454545454049586,\n",
+       "   'optimal_precision': 0.5,\n",
+       "   'optimal_recall': 0.6},\n",
+       "  'itemid_mappings': {'chartevents': {'heart_rate': 220050,\n",
+       "    'temperature_f': 223761,\n",
+       "    'temperature_c': 223762,\n",
+       "    'respiratory_rate': 220210},\n",
+       "   'labevents': {'wbc': [51300, 51301],\n",
+       "    'lactate': 50813,\n",
+       "    'creatinine': 50912}},\n",
+       "  'sepsis_icd_codes': {'icd10': ['A41.9',\n",
+       "    'A40',\n",
+       "    'A41',\n",
+       "    'R65.20',\n",
+       "    'R65.21',\n",
+       "    'R65.1',\n",
+       "    'A41.0',\n",
+       "    'A41.1',\n",
+       "    'A41.2',\n",
+       "    'A41.3',\n",
+       "    'A41.4',\n",
+       "    'A41.5',\n",
+       "    'A41.50',\n",
+       "    'A41.51',\n",
+       "    'A41.52',\n",
+       "    'A41.53',\n",
+       "    'A41.59',\n",
+       "    'A41.8',\n",
+       "    'A41.81',\n",
+       "    'A41.89'],\n",
+       "   'icd9': ['038', '99591', '99592', '78552']}}}"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = model_data[\"model\"]\n",
+    "patient_features = df[model_data[\"metadata\"][\"feature_names\"]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ML inference\n",
+    "probabilities = model.predict_proba(patient_features)[:, 1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "threshold = model_data[\"metadata\"][\"metrics\"][\"optimal_threshold\"]\n",
+    "predictions = (probabilities >= threshold).astype(int)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Convert back to FHIR\n",
+    "risk_assessments = tabular.to_risk_assessment(\n",
+    "    outcome_code=\"A41.9\",\n",
+    "    outcome_display=\"Sepsis\",\n",
+    "    model_name=\"XGBoost\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "100"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(risk_assessments)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'resourceType': 'RiskAssessment',\n",
+       " 'id': 'hc-71012a5d-cf7f-436d-864b-327efe28b483',\n",
+       " 'status': 'final',\n",
+       " 'method': {'coding': [{'system': 'https://healthchain.github.io/ml-models',\n",
+       "    'code': 'XGBoost',\n",
+       "    'display': 'XGBoost'}]},\n",
+       " 'subject': {'reference': 'Patient/1bb918ba-e04e-5e7a-87ca-dbcbbb4c72c3'},\n",
+       " 'occurrenceDateTime': datetime.datetime(2025, 11, 27, 18, 56, 51, tzinfo=TzInfo(UTC)),\n",
+       " 'prediction': [{'outcome': {'coding': [{'system': 'http://hl7.org/fhir/sid/icd-10',\n",
+       "      'code': 'A41.9',\n",
+       "      'display': 'Sepsis'}]},\n",
+       "   'probabilityDecimal': 0.07619287073612213,\n",
+       "   'qualitativeRisk': {'coding': [{'system': 'http://terminology.hl7.org/CodeSystem/risk-probability',\n",
+       "      'code': 'low',\n",
+       "      'display': 'Low'}]}}],\n",
+       " 'note': [{'text': 'ML prediction: Negative (probability: 7.62%, risk: low)'}]}"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "risk_assessments[5].model_dump()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From d46d1678af924b2a8e78b313bbde640785a29776 Mon Sep 17 00:00:00 2001
From: jenniferjiangkells <jenniferjiangkells@gmail.com>
Date: Tue, 2 Dec 2025 12:33:15 +0000
Subject: [PATCH 11/12] gitignore

---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 8709beb5..f5547d53 100644
--- a/.gitignore
+++ b/.gitignore
@@ -167,7 +167,8 @@ scrap/
 .ruff_cache/
 .python-version
 .cursor/
-.private/
+.local/
+.keys/
 .idea/
 
 # Personal AI context (keep local)

From ecbcff55b1a6eee8930709bd2237341e6f311d28 Mon Sep 17 00:00:00 2001
From: jenniferjiangkells <jenniferjiangkells@gmail.com>
Date: Tue, 2 Dec 2025 20:22:43 +0000
Subject: [PATCH 12/12] Update cookbook docs

---
 docs/cookbook/index.md                |   3 +
 docs/cookbook/ml_model_deployment.md  | 539 ++++++++++++++++++++++++--
 docs/index.md                         |   2 +-
 mkdocs.yml                            |   1 +
 scripts/sepsis_prediction_training.py |   7 +-
 5 files changed, 506 insertions(+), 46 deletions(-)

diff --git a/docs/cookbook/index.md b/docs/cookbook/index.md
index e64b6d3c..165624b6 100644
--- a/docs/cookbook/index.md
+++ b/docs/cookbook/index.md
@@ -13,6 +13,9 @@ Dive into real-world, production-ready examples to learn how to build interopera
 
 ## 📚 How-To Guides
 
+- 🔬 **[Deploy ML Models: Real-Time Alerts & Batch Screening](./ml_model_deployment.md)**
+  *Deploy the same ML model two ways: CDS Hooks for point-of-care sepsis alerts, and FHIR Gateway for population-level batch screening with RiskAssessment resources.*
+
 - 🚦 **[Multi-Source Patient Data Aggregation](./multi_ehr_aggregation.md)**
   *Merge patient data from multiple FHIR sources (Epic, Cerner, etc.), deduplicate conditions, prove provenance, and robustly handle cross-vendor errors. Foundation for retrieval-augmented generation (RAG) and analytics workflows.*
 
diff --git a/docs/cookbook/ml_model_deployment.md b/docs/cookbook/ml_model_deployment.md
index 74fa87c1..9c878a15 100644
--- a/docs/cookbook/ml_model_deployment.md
+++ b/docs/cookbook/ml_model_deployment.md
@@ -1,64 +1,517 @@
-# Deploy ML Models as Healthcare APIs
+# Deploy ML Models: Real-Time Alerts & Batch Screening
 
-*This example is coming soon! 🚧*
+You trained a model on CSVs. Now you need to deploy it against FHIR data from EHRs. This tutorial shows how to bridge that gap with two production patterns: **real-time CDS Hooks alerts** and **batch FHIR Gateway screening**—both using the same model and a simple YAML schema that maps FHIR resources to your training features.
 
-<div align="center">
-  <img src="../assets/images/hc-use-cases-ml-deployment.png" alt="ML Model Deployment Architecture" width="60%">
-</div>
+Check out the full working examples:
 
-## Overview
+- [Real-time CDS Hooks](https://github.com/dotimplement/HealthChain/tree/main/cookbook/sepsis_cds_hooks.py)
+- [Batch FHIR Gateway](https://github.com/dotimplement/HealthChain/tree/main/cookbook/sepsis_fhir_batch.py)
 
-This tutorial will demonstrate how to deploy any trained ML model as a production-ready healthcare API with FHIR input/output, multi-EHR connectivity, and comprehensive monitoring.
+![](../assets/images/hc-use-cases-ml-deployment.png)
 
-## What You'll Learn
+## When to Use Each Pattern
 
-- **Model serving architecture** - Deploy Hugging Face, scikit-learn, PyTorch, and custom models
-- **FHIR-native endpoints** - Serve predictions with structured healthcare data formats
-- **Multi-EHR integration** - Connect your model to live FHIR servers for real-time inference
-- **Healthcare data validation** - Ensure type-safe input/output with Pydantic models
-- **Production monitoring** - Track model performance, data drift, and API health
-- **Scalable deployment** - Configure auto-scaling and load balancing for healthcare workloads
+| Pattern | Trigger | Output | Best For |
+|---------|---------|--------|----------|
+| **CDS Hooks** | Clinician opens chart | Alert cards in EHR UI | Point-of-care decision support |
+| **FHIR Gateway** | Scheduled job / API call | [RiskAssessment](https://www.hl7.org/fhir/riskassessment.html) resources | Population screening, quality measures |
 
-## Architecture
+Both patterns share the same trained model and feature extraction—only the integration layer differs.
 
-The example will showcase:
+## Setup
 
-1. **Model Packaging** - Wrap any ML model with HealthChain's deployment framework
-2. **FHIR Endpoint Creation** - Automatically generate OpenAPI-compliant healthcare APIs
-3. **Real-time Inference** - Process FHIR resources and return structured predictions
-4. **Multi-source Integration** - Connect to Epic, Cerner, and other FHIR systems
-5. **Performance Monitoring** - Track latency, throughput, and prediction quality
-6. **Security & Compliance** - Implement OAuth2, audit logging, and data governance
+### Install Dependencies
 
-## Use Cases
+```bash
+pip install healthchain joblib xgboost scikit-learn python-dotenv
+```
 
-Perfect for:
-- **Clinical Decision Support** - Deploy diagnostic or prognostic models in EHR workflows
-- **Population Health** - Serve risk stratification models for large patient cohorts
-- **Research Platforms** - Make trained models available to clinical researchers
-- **AI-powered Applications** - Build healthcare apps with ML-driven features
+### Train the Model (or Bring Your Own)
 
-## Example Models
+The cookbook includes a training script that builds an XGBoost classifier from MIMIC-IV data. From the project root:
 
-We'll show deployment patterns for:
-- **Clinical NLP models** - Named entity recognition, clinical coding, text classification
-- **Diagnostic models** - Medical imaging analysis, lab result interpretation
-- **Risk prediction models** - Readmission risk, mortality prediction, drug interactions
-- **Recommendation systems** - Treatment recommendations, medication optimization
+```bash
+cd scripts
+python sepsis_prediction_training.py
+```
 
-## Prerequisites
+This script:
 
-- A trained ML model (any framework supported)
-- Understanding of FHIR resources and healthcare data standards
-- Python environment with HealthChain installed
-- Basic knowledge of API deployment concepts
+- Loads MIMIC-IV CSV tables (chartevents, labevents, patients, diagnoses)
+- Extracts vitals features (heart rate, temperature, respiratory rate, WBC, lactate, creatinine, age, gender)
+- Labels ICU stays with sepsis diagnoses (ICD-9/ICD-10)
+- Trains Random Forest, XGBoost, and Logistic Regression models
+- Saves the best model (by F1 score) to `scripts/models/sepsis_model.pkl`
 
-## Coming Soon
+After training, copy the model to the cookbook directory:
 
-We're building comprehensive examples covering multiple model types and deployment scenarios!
+```bash
+cp scripts/models/sepsis_model.pkl cookbook/models/
+```
 
-In the meantime, explore our [Gateway documentation](../reference/gateway/gateway.md) to understand the deployment infrastructure.
+!!! note "MIMIC-IV Demo Dataset"
+
+    The training script uses the [MIMIC-IV Clinical Database Demo](https://physionet.org/content/mimic-iv-demo/2.2/) (~50MB, freely downloadable). Set the path:
+
+    ```bash
+    export MIMIC_CSV_PATH=/path/to/mimic-iv-clinical-database-demo-2.2
+    ```
+
+    *This is a quick-start workflow for demo purposes. Full MIMIC requires credentialed access. Most researchers use BigQuery or a PostgreSQL database.
+
+**Using your own model?** The pipeline is flexible—just save any scikit-learn-compatible model as a pickle with this structure:
+
+```python
+import joblib
+
+model_data = {
+    "model": your_trained_model,  # Must have .predict_proba()
+    "metadata": {
+        "feature_names": ["heart_rate", "temperature", ...],
+        "metrics": {"optimal_threshold": 0.5}
+    }
+}
+joblib.dump(model_data, "cookbook/models/sepsis_model.pkl")
+```
+
+The pipeline will work with any model that implements `predict_proba()` - XGBoost, Random Forest, LightGBM, or even PyTorch/TensorFlow models wrapped with a sklearn-compatible interface.
+
+### Prepare Demo Patient Data
+
+The two patterns have different data requirements:
+
+| Pattern | Data Source | What You Need |
+|---------|-------------|---------------|
+| **CDS Hooks** | Local JSON files | Download pre-extracted patients (quick start) |
+| **FHIR Gateway** | FHIR server | Upload patients to Medplum and get server-assigned IDs |
+
+=== "CDS Hooks Only (Quick Start)"
+
+    Download pre-extracted patient bundles—these are already in the repo if you cloned it:
+
+    ```bash
+    mkdir -p cookbook/data/mimic_demo_patients
+    cd cookbook/data/mimic_demo_patients
+    wget https://github.com/dotimplement/HealthChain/raw/main/cookbook/data/mimic_demo_patients/high_risk_patient.json
+    wget https://github.com/dotimplement/HealthChain/raw/main/cookbook/data/mimic_demo_patients/moderate_risk_patient.json
+    wget https://github.com/dotimplement/HealthChain/raw/main/cookbook/data/mimic_demo_patients/low_risk_patient.json
+    ```
+
+    That's it! Skip to [Pattern 1: CDS Hooks](#pattern-1-real-time-cds-hooks-alerts).
+
+=== "FHIR Gateway (Full Setup)"
+
+    The batch screening pattern queries patients from a FHIR server. This tutorial uses [Medplum](https://www.medplum.com/) (a free, hosted FHIR server), but any FHIR R4-compliant API works - just swap the credentials.
+
+    **1. Configure FHIR Credentials**
+
+    Add Medplum credentials to your `.env` file. See [FHIR Sandbox Setup](./setup_fhir_sandboxes.md#medplum) for details:
+
+    ```bash
+    MEDPLUM_BASE_URL=https://api.medplum.com/fhir/R4
+    MEDPLUM_CLIENT_ID=your_client_id
+    MEDPLUM_CLIENT_SECRET=your_client_secret
+    MEDPLUM_TOKEN_URL=https://api.medplum.com/oauth2/token
+    MEDPLUM_SCOPE=openid
+    ```
+
+    **2. Extract and Upload Demo Patients**
+
+    ```bash
+    # Set MIMIC-on-FHIR path (or use --mimic flag)
+    export MIMIC_FHIR_PATH=/path/to/mimic-iv-on-fhir
+
+    # Extract and upload to Medplum
+    cd scripts
+    python extract_mimic_demo_patients.py --minimal --upload
+    ```
+
+    This script:
+
+    - Loads patient data from [MIMIC-IV on FHIR](https://physionet.org/content/mimic-iv-demo/2.2/)
+    - Runs the sepsis model to find high/moderate/low risk patients
+    - Creates minimal FHIR bundles with only the observations needed
+    - Uploads them to your Medplum instance as transaction bundles
+
+    **3. Copy Patient IDs**
+
+    After upload, the script prints server-assigned patient IDs:
+
+    ```
+    ✓ Uploaded to Medplum!
+
+    Copy this into sepsis_fhir_batch.py:
+
+    DEMO_PATIENT_IDS = [
+        "702e11e8-6d21-41dd-9b48-31715fdc0fb1",  # high risk
+        "3b0da7e9-0379-455a-8d35-bedd3a6ee459",  # moderate risk
+        "f490ceb4-6262-4f1e-8b72-5515e6c46741",  # low risk
+    ]
+    ```
+
+    Copy these IDs into the `DEMO_PATIENT_IDS` list in `sepsis_fhir_batch.py`.
+
+    !!! tip "Generate More Patients"
+
+        The script has options for generating larger test sets:
+
+        ```bash
+        python extract_mimic_demo_patients.py --help
+
+        # Examples:
+        --num-patients-per-risk 5   # 5 patients per risk level (15 total)
+        --seed 123                   # Different random sample
+        --minimal                    # Keep only latest observation per feature (~12KB each)
+        ```
+
+    !!! tip "Alternative: Manual Upload"
+
+        If you prefer, run without `--upload` to generate bundle JSON files, then upload them manually via the [Medplum → Batch](https://app.medplum.com/batch) page.
+
+---
+
+**Setup complete!** You should now have:
+
+- ✅ A trained model at `cookbook/models/sepsis_model.pkl`
+- ✅ Demo patient data (local JSON or uploaded to Medplum)
+
+If using the **FHIR Gateway pattern**, also confirm:
+
+- ✅ FHIR credentials in `.env`
+- ✅ Patient IDs copied into `DEMO_PATIENT_IDS` in `sepsis_fhir_batch.py`
+
+## The Shared Model Pipeline
+
+Both patterns reuse the same pipeline. Here's what you'll write:
+
+```python
+def create_pipeline() -> Pipeline[Dataset]:
+    pipeline = Pipeline[Dataset]()
+
+    @pipeline.add_node
+    def impute_missing(dataset: Dataset) -> Dataset:
+        dataset.data = dataset.data.fillna(dataset.data.median(numeric_only=True))
+        return dataset
+
+    @pipeline.add_node
+    def run_inference(dataset: Dataset) -> Dataset:
+        features = dataset.data[feature_names]
+        probabilities = model.predict_proba(features)[:, 1]
+        dataset.metadata["probabilities"] = probabilities
+        return dataset
+
+    return pipeline
+```
+
+The pipeline operates on a `Dataset`, which you create from a FHIR bundle:
+
+```python
+dataset = Dataset.from_fhir_bundle(bundle, schema=SCHEMA_PATH)
+```
+
+**How does FHIR become a DataFrame?** The schema maps FHIR resources to your training features:
+
+```yaml
+# sepsis_vitals.yaml (excerpt)
+features:
+  heart_rate:
+    fhir_resource: Observation
+    code: "220045"  # MIMIC chartevents code
+  wbc:
+    fhir_resource: Observation
+    code: "51301"   # MIMIC labevents code
+  age:
+    fhir_resource: Patient
+    field: birthDate
+    transform: calculate_age
+```
+
+No FHIR parsing code needed—define the mapping once, use it everywhere.
+
+!!! tip "Explore Interactively"
+
+    Step through the full flow in [notebooks/fhir_ml_workflow.ipynb](../../notebooks/fhir_ml_workflow.ipynb): FHIR bundle → Dataset → DataFrame → inference → RiskAssessment.
+
+Now let's see how this pipeline plugs into each deployment pattern.
 
 ---
 
-**Want to be notified when this example is ready?** Join our [Discord community](https://discord.gg/UQC6uAepUz) for updates!
+## Pattern 1: Real-Time CDS Hooks Alerts
+
+Use CDS Hooks when you need **instant alerts** during clinical workflows. The EHR triggers your service and pushes patient data via prefetch—no server queries needed.
+
+### How It Works
+
+```
+Clinician opens chart → EHR fires patient-view hook → Your service runs prediction → CDS card appears in EHR
+```
+
+### Set Up the CDS Hook Handler
+
+Create a [CDSHooksService](../reference/gateway/cdshooks.md) that listens for `patient-view` events:
+
+```python
+from healthchain.gateway import CDSHooksService
+from healthchain.fhir import prefetch_to_bundle
+from healthchain.models import CDSRequest, CDSResponse
+from healthchain.models.responses.cdsresponse import Card
+
+cds = CDSHooksService()
+
+@cds.hook("patient-view", id="sepsis-risk")
+def sepsis_alert(request: CDSRequest) -> CDSResponse:
+    if not request.prefetch:
+        return CDSResponse(cards=[])
+
+    # FHIR prefetch → Dataset → Prediction
+    bundle = prefetch_to_bundle(request.prefetch)
+    dataset = Dataset.from_fhir_bundle(bundle, schema=SCHEMA_PATH)
+    result = pipeline(dataset)
+
+    # Generate alert card if risk is elevated
+    prob = float(result.metadata["probabilities"][0])
+    risk = "high" if prob > 0.7 else "moderate" if prob > 0.4 else "low"
+
+    if risk in ["high", "moderate"]:
+        return CDSResponse(cards=[
+            Card(
+                summary=f"Sepsis Risk: {risk.upper()} ({prob:.0%})",
+                indicator="critical" if risk == "high" else "warning",
+                detail=f"Predicted sepsis risk: {risk.upper()}. Recommend workup.",
+                source={"label": "HealthChain Sepsis Predictor"},
+            )
+        ])
+
+    return CDSResponse(cards=[])
+```
+
+### Build the Service
+
+Register with [HealthChainAPI](../reference/gateway/api.md):
+
+```python
+app = HealthChainAPI(title="Sepsis CDS Hooks")
+app.register_service(cds, path="/cds")
+```
+
+### Test with Sandbox Client
+
+The [SandboxClient](../reference/utilities/sandbox.md) simulates EHR requests using your demo patient files:
+
+```python
+from healthchain.sandbox import SandboxClient
+
+client = SandboxClient(
+    url="http://localhost:8000/cds/cds-services/sepsis-risk",
+    workflow="patient-view",
+)
+client.load_from_path("data/mimic_demo_patients", pattern="*_patient.json")
+responses = client.send_requests()
+client.save_results(save_request=True, save_response=True, directory="./output/")
+```
+
+### Expected Output
+
+```
+Processed 3 requests
+  Patient 1: Sepsis Risk: HIGH (85%)
+  Patient 2: Sepsis Risk: MODERATE (52%)
+  Patient 3: Low risk (no alert)
+```
+
+??? example "Example CDS Response"
+
+    ```json
+    {
+      "cards": [
+        {
+          "summary": "Sepsis Risk: HIGH (85%)",
+          "indicator": "critical",
+          "source": {
+            "label": "HealthChain Sepsis Predictor",
+            "url": "https://www.sccm.org/SurvivingSepsisCampaign/Guidelines/Adult-Patients"
+          },
+          "detail": "**AI Guidance:**\n- Predicted risk: **HIGH** (85%)\n- Recommend sepsis workup and early intervention.",
+          "title": "Sepsis Alert (AI Prediction)"
+        }
+      ]
+    }
+    ```
+
+---
+
+## Pattern 2: Batch FHIR Gateway Screening
+
+Use the FHIR Gateway when you need to **screen multiple patients** from a FHIR server. Unlike CDS Hooks (ephemeral alerts), this pattern **persists predictions back to the FHIR server** as RiskAssessment resources, making them available for dashboards, reports, and downstream workflows.
+
+### How It Works
+
+```
+Query patients from FHIR server → Run predictions → Write RiskAssessment back to FHIR server
+```
+
+### Set Up FHIR Gateway
+
+Configure the [FHIRGateway](../reference/gateway/fhir_gateway.md) with your FHIR source:
+
+```python
+from fhir.resources.patient import Patient
+from fhir.resources.observation import Observation
+from healthchain.gateway import FHIRGateway
+from healthchain.gateway.clients.fhir.base import FHIRAuthConfig
+from healthchain.fhir import merge_bundles
+
+gateway = FHIRGateway()
+config = FHIRAuthConfig.from_env("MEDPLUM")
+gateway.add_source("medplum", config.to_connection_string())
+```
+
+### Screen Individual Patients
+
+Query patient data, run prediction, and write back a [RiskAssessment](https://www.hl7.org/fhir/riskassessment.html) resource:
+
+```python
+def screen_patient(gateway: FHIRGateway, patient_id: str, source: str):
+    # Query patient + observations from FHIR server
+    patient_bundle = gateway.search(Patient, {"_id": patient_id}, source)
+    obs_bundle = gateway.search(Observation, {"patient": patient_id}, source)
+    bundle = merge_bundles([patient_bundle, obs_bundle])
+
+    # FHIR → Dataset → Prediction
+    dataset = Dataset.from_fhir_bundle(bundle, schema=SCHEMA_PATH)
+    result = pipeline(dataset)
+
+    # Convert to RiskAssessment and write back
+    for ra in result.to_risk_assessment(
+        outcome_code="A41.9",
+        outcome_display="Sepsis",
+        model_name="sepsis_xgboost_v1",
+    ):
+        gateway.create(ra, source=source)
+```
+
+### Batch Screen Multiple Patients
+
+Loop over patient IDs and screen each one:
+
+```python
+for patient_id in patient_ids:
+    screen_patient(gateway, patient_id, source="medplum")
+```
+
+!!! note "Demo vs Production"
+
+    This demo uses a fixed list of patient IDs. In production, you'd query for patients dynamically—for example, ICU admissions in the last hour:
+
+    ```python
+    # Find patients with recent ICU encounters
+    encounters = gateway.search(
+        Encounter,
+        {
+            "class": "IMP",  # inpatient
+            "location": "icu",
+            "date": "ge2024-01-01",
+        },
+        source="ehr"
+    )
+    patient_ids = [e.subject.reference.split("/")[1] for e in encounters]
+    ```
+
+### Build the Service
+
+```python
+app = HealthChainAPI(title="Sepsis Batch Screening")
+app.register_gateway(gateway, path="/fhir")
+```
+
+### Expected Output
+
+After uploading demo patients to Medplum and running batch screening:
+
+```
+=== Screening patients from Medplum ===
+  702e11e8-6d21-41dd-9b48-31715fdc0fb1: HIGH (85%) → RiskAssessment/abc123
+  3b0da7e9-0379-455a-8d35-bedd3a6ee459: MODERATE (52%) → RiskAssessment/def456
+  f490ceb4-6262-4f1e-8b72-5515e6c46741: LOW (15%) → RiskAssessment/ghi789
+```
+You should be able to see the RiskAssessment resources in the [Medplum console](https://app.medplum.com) (search for "RiskAssessment" in "Resource Type" search bar in top left corner)
+
+??? example "Example RiskAssessment Resource"
+
+    ```json
+    {
+      "resourceType": "RiskAssessment",
+      "id": "abc123",
+      "status": "final",
+      "subject": {
+        "reference": "Patient/702e11e8-6d21-41dd-9b48-31715fdc0fb1"
+      },
+      "method": {
+        "coding": [{
+          "system": "https://healthchain.io/models",
+          "code": "sepsis_xgboost_v1",
+          "display": "Sepsis XGBoost Model v1"
+        }]
+      },
+      "prediction": [{
+        "outcome": {
+          "coding": [{
+            "system": "http://hl7.org/fhir/sid/icd-10",
+            "code": "A41.9",
+            "display": "Sepsis"
+          }]
+        },
+        "probabilityDecimal": 0.85,
+        "qualitativeRisk": {
+          "coding": [{
+            "system": "http://terminology.hl7.org/CodeSystem/risk-probability",
+            "code": "high",
+            "display": "High likelihood"
+          }]
+        }
+      }]
+    }
+    ```
+
+---
+
+## What You've Built
+
+Two deployment patterns for the same ML model:
+
+| | CDS Hooks | FHIR Gateway |
+|-|-----------|--------------|
+| **Integration** | Event-driven (EHR pushes data) | Pull-based (service queries server) |
+| **Latency** | Real-time (<1s) | Batch (seconds to minutes) |
+| **Output** | CDS Cards (ephemeral alerts) | RiskAssessment (persisted resources) |
+| **Scaling** | Per-patient on demand | Parallel/scheduled batch jobs |
+
+Both patterns:
+
+- **Share the same model** - Train once, deploy multiple ways
+- **Use YAML feature schemas** - Declarative FHIR → features mapping
+- **Handle FHIR natively** - No custom data wrangling per integration
+
+!!! info "Use Cases"
+
+    **CDS Hooks (Real-time)**
+
+    - Sepsis early warning alerts when opening ICU patient charts
+    - Drug interaction warnings during medication ordering
+    - Clinical guideline reminders triggered by diagnosis codes
+
+    **FHIR Gateway (Batch)**
+
+    - Nightly population health screening
+    - Quality measure calculation for reporting
+    - Research cohort identification
+    - Pre-visit risk stratification
+
+!!! tip "Next Steps"
+
+    - **Train your own model**: Replace `sepsis_model.pkl` with your model; update the feature schema to match
+    - **Add more features**: Extend `sepsis_vitals.yaml` with lab values, medications, or other Observations
+    - **Add more FHIR sources**: The gateway supports multiple sources—see the cookbook script for Epic sandbox configuration, or the [FHIR Sandbox Setup guide](./setup_fhir_sandboxes.md)
+    - **Automate batch runs**: Schedule screening jobs with cron, Airflow, or cloud schedulers; or use [FHIR Subscriptions](https://www.hl7.org/fhir/subscription.html) to trigger on new ICU admissions ([PRs welcome!](https://github.com/dotimplement/HealthChain/pulls))
+    - **Combine patterns**: Use batch screening to identify high-risk patients, then enable CDS
+    alerts for those patients
diff --git a/docs/index.md b/docs/index.md
index e314ef41..f3681305 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -10,7 +10,7 @@ HealthChain is an open-source Python toolkit that streamlines productionizing he
 
 <div class="grid cards" markdown>
 
--   :material-tools:{ .lg .middle } __FHIR-native ML Pipelines__
+-   :material-tools:{ .lg .middle } __FHIR-native Pipelines__
 
     ---
 
diff --git a/mkdocs.yml b/mkdocs.yml
index 8692f0d3..00d9ebc0 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -18,6 +18,7 @@ nav:
       - Multi-Source Data Integration: cookbook/multi_ehr_aggregation.md
       - Automated Clinical Coding: cookbook/clinical_coding.md
       - Discharge Summarizer: cookbook/discharge_summarizer.md
+      - ML Model Deployment: cookbook/ml_model_deployment.md
   - Docs:
       - Welcome: reference/index.md
       - Gateway:
diff --git a/scripts/sepsis_prediction_training.py b/scripts/sepsis_prediction_training.py
index a0ea85ce..16d630de 100644
--- a/scripts/sepsis_prediction_training.py
+++ b/scripts/sepsis_prediction_training.py
@@ -12,6 +12,7 @@
 - python sepsis_prediction_training.py
 """
 
+import os
 import pandas as pd
 import numpy as np
 from pathlib import Path
@@ -898,8 +899,10 @@ def save_model(
 
 def main():
     """Main training pipeline."""
-    # Data directory
-    data_dir = "../datasets/mimic-iv-clinical-database-demo-2.2"
+    # Data directory (set via MIMIC_CSV_PATH or use default)
+    data_dir = os.getenv(
+        "MIMIC_CSV_PATH", "../datasets/mimic-iv-clinical-database-demo-2.2"
+    )
 
     # Output path (relative to script location)
     script_dir = Path(__file__).parent