dotimplement · jenniferjiangkells · Mar 5, 2025 · Jan 21, 2025 · Jan 22, 2025 · Feb 13, 2025
diff --git a/README.md b/README.md
@@ -10,20 +10,22 @@
 
 </div>
 
-Simplify developing, testing and validating AI and NLP applications in a healthcare context 💫 🏥.
+Build simple, portable, and scalable AI and NLP applications in a healthcare context 💫 🏥.
 
-Building applications that integrate with electronic health record systems (EHRs) is complex, and so is designing reliable, reactive algorithms involving unstructured data. Let's try to change that.
+Integrating electronic health record systems (EHRs) data is complex, and so is designing reliable, reactive algorithms involving unstructured healthcare data. Let's try to change that.
 
 ```bash
 pip install healthchain
 ```
 First time here? Check out our [Docs](https://dotimplement.github.io/HealthChain/) page!
 
-Came here from NHS RPySOC 2024 ✨? [CDS sandbox walkthrough](https://dotimplement.github.io/HealthChain/cookbook/cds_sandbox/)
+Came here from NHS RPySOC 2024 ✨?
+[CDS sandbox walkthrough](https://dotimplement.github.io/HealthChain/cookbook/cds_sandbox/)
+[Slides](https://speakerdeck.com/jenniferjiangkells/building-healthcare-context-aware-applications-with-healthchain)
 
 ## Features
-- [x] 🛠️ Build custom pipelines or use [pre-built ones](https://dotimplement.github.io/HealthChain/reference/pipeline/pipeline/#prebuilt) for your healthcare NLP and ML tasks
-- [x] 🏗️ Add built-in [CDA and FHIR parsers](https://dotimplement.github.io/HealthChain/reference/utilities/cda_parser/) to connect your pipeline to interoperability standards
+- [x] 🔥 Build FHIR-native pipelines or use [pre-built ones](https://dotimplement.github.io/HealthChain/reference/pipeline/pipeline/#prebuilt) for your healthcare NLP and ML tasks
+- [x] 🔌 Connect pipelines to any EHR system with built-in [CDA and FHIR Connectors](https://dotimplement.github.io/HealthChain/reference/pipeline/connectors/connectors/)
 - [x] 🧪 Test your pipelines in full healthcare-context aware [sandbox](https://dotimplement.github.io/HealthChain/reference/sandbox/sandbox/) environments
 - [x] 🗃️ Generate [synthetic healthcare data](https://dotimplement.github.io/HealthChain/reference/utilities/data_generator/) for testing and development
 - [x] 🚀 Deploy sandbox servers locally with [FastAPI](https://fastapi.tiangolo.com/)
@@ -42,24 +44,28 @@ Pipelines provide a flexible way to build and manage processing pipelines for NL
 ```python
 from healthchain.io.containers import Document
 from healthchain.pipeline import Pipeline
-from healthchain.pipeline.components import TextPreProcessor, SpacyNLP, TextPostProcessor
+from healthchain.pipeline.components import (
+    TextPreProcessor,
+    SpacyNLP,
+    TextPostProcessor,
+)
 
 # Initialize the pipeline
 nlp_pipeline = Pipeline[Document]()
 
 # Add TextPreProcessor component
-preprocessor = TextPreProcessor(tokenizer="spacy")
+preprocessor = TextPreProcessor()
 nlp_pipeline.add_node(preprocessor)
 
 # Add Model component (assuming we have a pre-trained model)
-spacy_nlp = SpacyNLP.from_model_id("en_core_sci_md", source="spacy")
+spacy_nlp = SpacyNLP.from_model_id("en_core_sci_sm")
 nlp_pipeline.add_node(spacy_nlp)
 
 # Add TextPostProcessor component
 postprocessor = TextPostProcessor(
     postcoordination_lookup={
         "heart attack": "myocardial infarction",
-        "high blood pressure": "hypertension"
+        "high blood pressure": "hypertension",
     }
 )
 nlp_pipeline.add_node(postprocessor)
@@ -70,7 +76,7 @@ nlp = nlp_pipeline.build()
 # Use the pipeline
 result = nlp(Document("Patient has a history of heart attack and high blood pressure."))
 
-print(f"Entities: {result.nlp.spacy_doc.ents}")
+print(f"Entities: {result.nlp.get_entities()}")
 ```
 
 #### Adding connectors
@@ -89,6 +95,7 @@ pipe = pipeline.build()
 
 cda_data = CdaRequest(document="<CDA XML content>")
 output = pipe(cda_data)
+# output: CdsResponse model
 ```
 
 ### Using pre-built pipelines
@@ -130,7 +137,7 @@ import healthchain as hc
 
 from healthchain.pipeline import SummarizationPipeline
 from healthchain.use_cases import ClinicalDecisionSupport
-from healthchain.models import Card, CdsFhirData, CDSRequest
+from healthchain.models import Card, Prefetch, CDSRequest
 from healthchain.data_generator import CdsDataGenerator
 from typing import List
 
@@ -144,8 +151,8 @@ class MyCDS(ClinicalDecisionSupport):
 
     # Sets up an instance of a mock EHR client of the specified workflow
     @hc.ehr(workflow="encounter-discharge")
-    def ehr_database_client(self) -> CdsFhirData:
-        return self.data_generator.generate()
+    def ehr_database_client(self) -> Prefetch:
+        return self.data_generator.generate_prefetch()
 
     # Define your application logic here
     @hc.api
@@ -167,7 +174,8 @@ import healthchain as hc
 
 from healthchain.pipeline import MedicalCodingPipeline
 from healthchain.use_cases import ClinicalDocumentation
-from healthchain.models import CcdData, CdaRequest, CdaResponse
+from healthchain.models import CdaRequest, CdaResponse
+from fhir.resources.documentreference import DocumentReference
 
 @hc.sandbox
 class NotereaderSandbox(ClinicalDocumentation):
@@ -178,11 +186,16 @@ class NotereaderSandbox(ClinicalDocumentation):
 
     # Load an existing CDA file
     @hc.ehr(workflow="sign-note-inpatient")
-    def load_data_in_client(self) -> CcdData:
+    def load_data_in_client(self) -> DocumentReference:
         with open("/path/to/cda/data.xml", "r") as file:
             xml_string = file.read()
 
-        return CcdData(cda_xml=xml_string)
+        cda_document_reference = create_document_reference(
+            data=xml_string,
+            content_type="text/xml",
+            description="Original CDA Document loaded from my sandbox",
+        )
+        return cda_document_reference
 
     @hc.api
     def my_service(self, data: CdaRequest) -> CdaResponse:
@@ -206,17 +219,17 @@ healthchain run mycds.py
 By default, the server runs at `http://127.0.0.1:8000`, and you can interact with the exposed endpoints at `/docs`.
 
 ## Road Map
-- [ ] 🎛️ Versioning and artifact management for pipelines sandbox EHR configurations
-- [ ] ❓ Testing and evaluation framework for pipelines and use cases
+- [ ] 🔄 Transform and validate healthcare HL7v2, CDA to FHIR with template-based interop engine
+- [ ] 🏥 Runtime connection health and EHR integration management - connect to FHIR APIs and legacy systems
+- [ ] 📊 Track configurations, data provenance, and monitor model performance with MLFlow integration
+- [ ] 🚀 Compliance monitoring, auditing at deployment as a sidecar service
+- [ ] 🔒 Built-in HIPAA compliance validation and PHI detection
 - [ ] 🧠 Multi-modal pipelines that that have built-in NLP to utilize unstructured data
-- [ ] ✨ Improvements to synthetic data generator methods
-- [ ] 👾 Frontend UI for EHR client and visualization features
-- [ ] 🚀 Production deployment options
 
 ## Contribute
 We are always eager to hear feedback and suggestions, especially if you are a developer or researcher working with healthcare systems!
 - 💡 Let's chat! [Discord](https://discord.gg/UQC6uAepUz)
 - 🛠️ [Contribution Guidelines](CONTRIBUTING.md)
 
 ## Acknowledgement
-This repository makes use of CDS Hooks developed by Boston Children’s Hospital.
+This repository makes use of [fhir.resources](https://github.com/nazrulworld/fhir.resources), and [CDS Hooks](https://cds-hooks.org/) developed by [HL7](https://www.hl7.org/) and [Boston Children’s Hospital](https://www.childrenshospital.org/).
diff --git a/cookbook/cds_discharge_summarizer_hf_chat.py b/cookbook/cds_discharge_summarizer_hf_chat.py
@@ -2,7 +2,7 @@
 
 from healthchain.pipeline import SummarizationPipeline
 from healthchain.use_cases import ClinicalDecisionSupport
-from healthchain.models import CdsFhirData, CDSRequest, CDSResponse
+from healthchain.models import CDSRequest, CDSResponse, Prefetch
 from healthchain.data_generators import CdsDataGenerator
 
 from langchain_huggingface.llms import HuggingFaceEndpoint
@@ -47,9 +47,9 @@ def __init__(self):
         self.data_generator = CdsDataGenerator()
 
     @hc.ehr(workflow="encounter-discharge")
-    def load_data_in_client(self) -> CdsFhirData:
+    def load_data_in_client(self) -> Prefetch:
         # Generate synthetic FHIR data for testing
-        data = self.data_generator.generate(
+        data = self.data_generator.generate_prefetch(
             free_text_path="data/discharge_notes.csv", column_name="text"
         )
         return data

diff --git a/cookbook/cds_discharge_summarizer_hf_trf.py b/cookbook/cds_discharge_summarizer_hf_trf.py
@@ -2,7 +2,7 @@
 
 from healthchain.pipeline import SummarizationPipeline
 from healthchain.use_cases import ClinicalDecisionSupport
-from healthchain.models import CdsFhirData, CDSRequest, CDSResponse
+from healthchain.models import Prefetch, CDSRequest, CDSResponse
 from healthchain.data_generators import CdsDataGenerator
 
 import getpass
@@ -22,8 +22,8 @@ def __init__(self):
         self.data_generator = CdsDataGenerator()
 
     @hc.ehr(workflow="encounter-discharge")
-    def load_data_in_client(self) -> CdsFhirData:
-        data = self.data_generator.generate(
+    def load_data_in_client(self) -> Prefetch:
+        data = self.data_generator.generate_prefetch(
             free_text_path="data/discharge_notes.csv", column_name="text"
         )
         return data

diff --git a/docs/api/data_models.md b/docs/api/data_models.md
diff --git a/docs/api/fhir_helpers.md b/docs/api/fhir_helpers.md
@@ -0,0 +1,3 @@
+# FHIR Helpers
+
+::: healthchain.fhir
diff --git a/docs/cookbook/cds_sandbox.md b/docs/cookbook/cds_sandbox.md
@@ -127,7 +127,7 @@ print(data.model_dump())
 # }
 ```
 
-The data generator returns a `CdsFhirData` object, which ensures that the data is parsed correctly inside the sandbox.
+The data generator returns a `Prefetch` object, which ensures that the data is parsed correctly inside the sandbox.
 
 ## Define client workflow
 
@@ -137,7 +137,7 @@ To finish our sandbox, we'll define a client function that loads the data genera
 import healthchain as hc
 
 from healthchain.use_cases import ClinicalDecisionSupport
-from healthchain.models import CDSRequest, CDSResponse, CdsFhirData
+from healthchain.models import CDSRequest, CDSResponse, Prefetch
 
 @hc.sandbox
 class DischargeNoteSummarizer(ClinicalDecisionSupport):
@@ -151,8 +151,8 @@ class DischargeNoteSummarizer(ClinicalDecisionSupport):
     return result
 
   @hc.ehr(workflow="encounter-discharge")
-  def load_data_in_client(self) -> CdsFhirData:
-    data = self.data_generator.generate()
+  def load_data_in_client(self) -> Prefetch:
+    data = self.data_generator.generate_prefetch()
     return data
 ```
 

diff --git a/docs/cookbook/notereader_sandbox.md b/docs/cookbook/notereader_sandbox.md
@@ -1,38 +1,74 @@
 # NoteReader Sandbox
 
-A sandbox example of NoteReader clinical documentation improvement which extracts problems, medications, and allergies entries from the progress note section of a pre-configured CDA document.
+A sandbox example of NoteReader clinical documentation improvement which extracts problems, medications, and allergies entries from the progress note section of a pre-configured CDA document using [scispacy](https://github.com/allenai/scispacy) with a custom entity linker component.
 
 Full example coming soon!
 
 ```python
 import healthchain as hc
-from healthchain.use_cases import ClinicalDocumentation
-from healthchain.models import (
-    CcdData,
-    AllergyConcept,
-    Concept,
-    MedicationConcept,
-    ProblemConcept,
-    Quantity,
-)
+
+from healthchain.io import Document
+from healthchain.models.requests.cda import CdaRequest, CdaResponse
+from healthchain.pipeline.medicalcodingpipeline import MedicalCodingPipeline
+from healthchain.use_cases.clindoc import ClinicalDocumentation
+from healthchain.fhir import create_document_reference
+
+from spacy.tokens import Span
+
+from fhir.resources.documentreference import DocumentReference
+
+pipeline = MedicalCodingPipeline.from_model_id("en_core_sci_sm", source="spacy")
+
+@pipeline.add_node(position="after", reference="SpacyNLP")
+def link_entities(doc: Document) -> Document:
+    # Register the extension if it doesn't exist already
+    if not Span.has_extension("cui"):
+        Span.set_extension("cui", default=None)
+    spacy_doc = doc.nlp.get_spacy_doc()
+
+    dummy_linker = {"fever": "C0006477",
+                    "cough": "C0006477",
+                    "cold": "C0006477",
+                    "flu": "C0006477",
+                    "headache": "C0006477",
+                    "sore throat": "C0006477",
+                    }
+
+    for ent in spacy_doc.ents:
+        if ent.text in dummy_linker:
+            ent._.cui = dummy_linker[ent.text]
+
+    doc.update_problem_list_from_nlp()
+
+    return doc
+
 
 @hc.sandbox
 class NotereaderSandbox(ClinicalDocumentation):
-  def __init__(self):
-      self.cda_path = "./resources/uclh_cda.xml"
-      self.pipeline = MedicalCodingPipeline.from_local_model(
-          "./resources/models/medcat_model.zip", source="spacy"
-      )
-
-  @hc.ehr(workflow="sign-note-inpatient")
-  def load_data_in_client(self) -> CcdData:
-      with open(self.cda_path, "r") as file:
-          xml_string = file.read()
-
-      return CcdData(cda_xml=xml_string)
-
-  @hc.api
-  def my_service(self, request: CdaRequest) -> CdaResponse:
-    response = self.pipeline(request)
-    return response
+    def __init__(self):
+        self.pipeline = pipeline
+
+    @hc.ehr(workflow="sign-note-inpatient")
+    def load_data_in_client(self) -> DocumentReference:
+        with open("./resources/uclh_cda.xml", "r") as file:
+            xml_string = file.read()
+
+        cda_document_reference = create_document_reference(
+            data=xml_string,
+            content_type="text/xml",
+            description="Original CDA Document loaded from my sandbox",
+        )
+
+        return cda_document_reference
+
+    @hc.api
+    def my_service(self, request: CdaRequest) -> CdaResponse:
+        result = self.pipeline(request)
+
+        return result
+
+
+if __name__ == "__main__":
+    clindoc = NotereaderSandbox()
+    clindoc.start_sandbox()
 ```