From 04eea434ab2e9d9a9e0469b1bb75ec34c393a397 Mon Sep 17 00:00:00 2001
From: Anil Sorathiya <anil@validmind.ai>
Date: Mon, 24 Mar 2025 18:49:13 +0000
Subject: [PATCH 01/10] introduce log_text function to log qualitative text
 block

---
 validmind/api_client.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/validmind/api_client.py b/validmind/api_client.py
index 3adc5a832..8a2b86dc0 100644
--- a/validmind/api_client.py
+++ b/validmind/api_client.py
@@ -407,6 +407,12 @@ def log_input(input_id: str, type: str, metadata: Dict[str, Any]) -> Dict[str, A
     return run_async(alog_input, input_id, type, metadata)
 
 
+def log_text(
+    content_id: str, text: str, _json: Optional[Dict[str, Any]] = None
+) -> Dict[str, Any]:
+    return run_async(alog_metadata, content_id, text, _json)
+
+
 async def alog_metric(
     key: str,
     value: Union[int, float],
@@ -476,7 +482,15 @@ def log_metric(
         recorded_at (str, optional): Timestamp when the metric was recorded
         thresholds (Dict[str, Any], optional): Thresholds for the metric
     """
-    return run_async(alog_metric, key=key, value=value, inputs=inputs, params=params, recorded_at=recorded_at, thresholds=thresholds)
+    return run_async(
+        alog_metric,
+        key=key,
+        value=value,
+        inputs=inputs,
+        params=params,
+        recorded_at=recorded_at,
+        thresholds=thresholds,
+    )
 
 
 def get_ai_key() -> Dict[str, Any]:

From be874fed8d1bdb2d496e9b2e5949e2dece783b03 Mon Sep 17 00:00:00 2001
From: Anil Sorathiya <anil@validmind.ai>
Date: Mon, 24 Mar 2025 18:50:02 +0000
Subject: [PATCH 02/10] introduce log_text function to log qualitative text
 block

---
 validmind/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/validmind/__init__.py b/validmind/__init__.py
index 55b2dd1d2..c99f3a537 100644
--- a/validmind/__init__.py
+++ b/validmind/__init__.py
@@ -43,7 +43,7 @@
 warnings.simplefilter("ignore", category=NumbaPendingDeprecationWarning)
 
 from .__version__ import __version__  # noqa: E402
-from .api_client import init, log_metric, reload
+from .api_client import init, log_metric, log_text, reload
 from .client import (  # noqa: E402
     get_test_suite,
     init_dataset,
@@ -125,4 +125,5 @@ def check_version():
     "tests",
     "unit_metrics",
     "test_suites",
+    "log_text",
 ]

From 297ded9a65a5bbb23735c0c9f6d4a5dd41e3b29a Mon Sep 17 00:00:00 2001
From: Anil Sorathiya <anil@validmind.ai>
Date: Mon, 24 Mar 2025 20:25:40 +0000
Subject: [PATCH 03/10] introduce log_text function to log qualitative text
 block

---
 validmind/api_client.py | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/validmind/api_client.py b/validmind/api_client.py
index 8a2b86dc0..802fb3c08 100644
--- a/validmind/api_client.py
+++ b/validmind/api_client.py
@@ -407,9 +407,7 @@ def log_input(input_id: str, type: str, metadata: Dict[str, Any]) -> Dict[str, A
     return run_async(alog_input, input_id, type, metadata)
 
 
-def log_text(
-    content_id: str, text: str, _json: Optional[Dict[str, Any]] = None
-) -> Dict[str, Any]:
+def log_text(content_id: str, text: str, _json: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
     return run_async(alog_metadata, content_id, text, _json)
 
 
@@ -482,15 +480,7 @@ def log_metric(
         recorded_at (str, optional): Timestamp when the metric was recorded
         thresholds (Dict[str, Any], optional): Thresholds for the metric
     """
-    return run_async(
-        alog_metric,
-        key=key,
-        value=value,
-        inputs=inputs,
-        params=params,
-        recorded_at=recorded_at,
-        thresholds=thresholds,
-    )
+    return run_async(alog_metric, key=key, value=value, inputs=inputs, params=params, recorded_at=recorded_at, thresholds=thresholds)
 
 
 def get_ai_key() -> Dict[str, Any]:

From d6fd2abfba85632baf645faac9d2753eadaca4c9 Mon Sep 17 00:00:00 2001
From: Anil Sorathiya <anil@validmind.ai>
Date: Wed, 26 Mar 2025 16:32:57 +0000
Subject: [PATCH 04/10] validate parameter types in log_text interface

---
 validmind/api_client.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/validmind/api_client.py b/validmind/api_client.py
index 802fb3c08..cc72a9515 100644
--- a/validmind/api_client.py
+++ b/validmind/api_client.py
@@ -408,6 +408,23 @@ def log_input(input_id: str, type: str, metadata: Dict[str, Any]) -> Dict[str, A
 
 
 def log_text(content_id: str, text: str, _json: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+    """Logs free-form text to ValidMind API.
+
+    Args:
+        content_id (str): Unique content identifier for the text.
+        text (str): The text to log.
+        _json (dict, optional): Free-form key-value pairs to assign to the text. Defaults to None.
+
+    Raises:
+        Exception: If the API call fails.
+
+    Returns:
+        dict: The response from the API.
+    """
+    if not content_id or not isinstance(content_id, str):
+        raise ValueError("`content_id` must be a non-empty string")
+    if not text or not isinstance(text, str):
+        raise ValueError("`text` must be a non-empty string")
     return run_async(alog_metadata, content_id, text, _json)
 
 

From c53f94f0775d53e74b0e507ef50c446a766365f8 Mon Sep 17 00:00:00 2001
From: Anil Sorathiya <anil@validmind.ai>
Date: Thu, 27 Mar 2025 09:57:08 +0000
Subject: [PATCH 05/10] convert str into text markup

---
 validmind/api_client.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/validmind/api_client.py b/validmind/api_client.py
index cc72a9515..9d8a16b9d 100644
--- a/validmind/api_client.py
+++ b/validmind/api_client.py
@@ -22,7 +22,7 @@
 from .client_config import client_config
 from .errors import MissingAPICredentialsError, MissingModelIdError, raise_api_error
 from .logging import get_logger, init_sentry, send_single_error
-from .utils import NumpyEncoder, run_async
+from .utils import NumpyEncoder, md_to_html, run_async
 from .vm_models import Figure
 
 logger = get_logger(__name__)
@@ -407,7 +407,9 @@ def log_input(input_id: str, type: str, metadata: Dict[str, Any]) -> Dict[str, A
     return run_async(alog_input, input_id, type, metadata)
 
 
-def log_text(content_id: str, text: str, _json: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+def log_text(
+    content_id: str, text: str, _json: Optional[Dict[str, Any]] = None
+) -> Dict[str, Any]:
     """Logs free-form text to ValidMind API.
 
     Args:
@@ -425,7 +427,7 @@ def log_text(content_id: str, text: str, _json: Optional[Dict[str, Any]] = None)
         raise ValueError("`content_id` must be a non-empty string")
     if not text or not isinstance(text, str):
         raise ValueError("`text` must be a non-empty string")
-    return run_async(alog_metadata, content_id, text, _json)
+    return run_async(alog_metadata, content_id, md_to_html(text, mathml=True), _json)
 
 
 async def alog_metric(
@@ -497,7 +499,15 @@ def log_metric(
         recorded_at (str, optional): Timestamp when the metric was recorded
         thresholds (Dict[str, Any], optional): Thresholds for the metric
     """
-    return run_async(alog_metric, key=key, value=value, inputs=inputs, params=params, recorded_at=recorded_at, thresholds=thresholds)
+    return run_async(
+        alog_metric,
+        key=key,
+        value=value,
+        inputs=inputs,
+        params=params,
+        recorded_at=recorded_at,
+        thresholds=thresholds,
+    )
 
 
 def get_ai_key() -> Dict[str, Any]:

From 2a46ca4c938328ed9846d5c40843d5f58f421bfc Mon Sep 17 00:00:00 2001
From: Anil Sorathiya <anil@validmind.ai>
Date: Thu, 27 Mar 2025 09:57:57 +0000
Subject: [PATCH 06/10] format files

---
 validmind/client.py                           |  9 +-
 validmind/datasets/classification/__init__.py |  5 +-
 .../datasets/credit_risk/lending_club.py      |  7 +-
 validmind/datasets/nlp/cnn_dailymail.py       |  6 +-
 validmind/datasets/regression/__init__.py     |  7 +-
 validmind/logging.py                          | 15 ++--
 validmind/models/function.py                  |  3 +-
 validmind/template.py                         | 19 ++---
 validmind/tests/_store.py                     |  7 +-
 validmind/tests/decorator.py                  |  4 +-
 validmind/tests/load.py                       | 84 +++++++++++--------
 .../ClassifierThresholdOptimization.py        |  7 +-
 .../sklearn/SHAPGlobalImportance.py           | 12 ++-
 validmind/tests/output.py                     | 13 ++-
 validmind/tests/test_providers.py             |  2 +-
 validmind/tests/utils.py                      |  8 +-
 validmind/utils.py                            | 10 +--
 validmind/vm_models/dataset/dataset.py        |  6 +-
 18 files changed, 124 insertions(+), 100 deletions(-)

diff --git a/validmind/client.py b/validmind/client.py
index 956a0ac78..fe0517085 100644
--- a/validmind/client.py
+++ b/validmind/client.py
@@ -6,11 +6,12 @@
 Client interface for all data and model validation functions
 """
 
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import numpy as np
 import pandas as pd
 import polars as pl
-import numpy as np
 import torch
-from typing import Any, Callable, Dict, List, Optional, Union
 
 from .api_client import log_input as log_input
 from .client_config import client_config
@@ -45,7 +46,9 @@
 
 
 def init_dataset(
-    dataset: Union[pd.DataFrame, pl.DataFrame, "np.ndarray", "torch.utils.data.TensorDataset"],
+    dataset: Union[
+        pd.DataFrame, pl.DataFrame, "np.ndarray", "torch.utils.data.TensorDataset"
+    ],
     model: Optional[VMModel] = None,
     index: Optional[Any] = None,
     index_name: Optional[str] = None,
diff --git a/validmind/datasets/classification/__init__.py b/validmind/datasets/classification/__init__.py
index 94df363af..b18241295 100644
--- a/validmind/datasets/classification/__init__.py
+++ b/validmind/datasets/classification/__init__.py
@@ -6,6 +6,7 @@
 Entrypoint for classification datasets.
 """
 from typing import List
+
 import pandas as pd
 
 __all__ = [
@@ -37,7 +38,9 @@ def simple_preprocess_booleans(df: pd.DataFrame, columns: List[str]) -> pd.DataF
     return df
 
 
-def simple_preprocess_categoricals(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
+def simple_preprocess_categoricals(
+    df: pd.DataFrame, columns: List[str]
+) -> pd.DataFrame:
     """
     Preprocess categorical columns.
 
diff --git a/validmind/datasets/credit_risk/lending_club.py b/validmind/datasets/credit_risk/lending_club.py
index 958082ad0..283c4fd22 100644
--- a/validmind/datasets/credit_risk/lending_club.py
+++ b/validmind/datasets/credit_risk/lending_club.py
@@ -5,7 +5,7 @@
 import logging
 import os
 import warnings
-from typing import Dict, Optional, Tuple, Any
+from typing import Any, Dict, Optional, Tuple
 
 import numpy as np
 import pandas as pd
@@ -389,7 +389,7 @@ def split(
     validation_split: Optional[float] = None,
     test_size: float = 0.2,
     add_constant: bool = False,
-    verbose: bool = True
+    verbose: bool = True,
 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
     """
     Split dataset into train, validation (optional), and test sets.
@@ -476,8 +476,7 @@ def compute_scores(probabilities: np.ndarray) -> np.ndarray:
 
 
 def get_demo_test_config(
-    x_test: Optional[np.ndarray] = None,
-    y_test: Optional[np.ndarray] = None
+    x_test: Optional[np.ndarray] = None, y_test: Optional[np.ndarray] = None
 ) -> Dict[str, Any]:
     """Get demo test configuration.
 
diff --git a/validmind/datasets/nlp/cnn_dailymail.py b/validmind/datasets/nlp/cnn_dailymail.py
index 4f47c3b74..80ced3ef8 100644
--- a/validmind/datasets/nlp/cnn_dailymail.py
+++ b/validmind/datasets/nlp/cnn_dailymail.py
@@ -4,7 +4,7 @@
 
 import os
 import textwrap
-from typing import Tuple, Optional
+from typing import Optional, Tuple
 
 import pandas as pd
 from datasets import load_dataset
@@ -23,7 +23,9 @@
 dataset_path = os.path.join(current_path, "datasets")
 
 
-def load_data(source: str = "online", dataset_size: Optional[str] = None) -> Tuple[pd.DataFrame, pd.DataFrame]:
+def load_data(
+    source: str = "online", dataset_size: Optional[str] = None
+) -> Tuple[pd.DataFrame, pd.DataFrame]:
     """
     Load data from either online source or offline files.
 
diff --git a/validmind/datasets/regression/__init__.py b/validmind/datasets/regression/__init__.py
index 045e201c8..110fd7199 100644
--- a/validmind/datasets/regression/__init__.py
+++ b/validmind/datasets/regression/__init__.py
@@ -5,9 +5,10 @@
 """
 Entrypoint for regression datasets
 """
-import pandas as pd
 from typing import List
 
+import pandas as pd
+
 __all__: List[str] = [
     "fred",
     "lending_club",
@@ -40,7 +41,9 @@ def identify_frequencies(df: pd.DataFrame) -> pd.DataFrame:
     return freq_df
 
 
-def resample_to_common_frequency(df: pd.DataFrame, common_frequency: str = "MS") -> pd.DataFrame:
+def resample_to_common_frequency(
+    df: pd.DataFrame, common_frequency: str = "MS"
+) -> pd.DataFrame:
     """
     Resample time series data to a common frequency.
 
diff --git a/validmind/logging.py b/validmind/logging.py
index 41b563610..1cb81ec73 100644
--- a/validmind/logging.py
+++ b/validmind/logging.py
@@ -7,7 +7,7 @@
 import logging
 import os
 import time
-from typing import Any, Callable, Dict, Optional, TypeVar, Awaitable
+from typing import Any, Awaitable, Callable, Dict, Optional, TypeVar
 
 import sentry_sdk
 from sentry_sdk.utils import event_from_exception, exc_info_from_error
@@ -28,8 +28,7 @@ def _get_log_level() -> int:
 
 
 def get_logger(
-    name: str = "validmind",
-    log_level: Optional[int] = None
+    name: str = "validmind", log_level: Optional[int] = None
 ) -> logging.Logger:
     """Get a logger for the given module name."""
     formatter = logging.Formatter(
@@ -95,14 +94,14 @@ def init_sentry(server_config: Dict[str, Any]) -> None:
         logger.debug(f"Sentry error: {str(e)}")
 
 
-F = TypeVar('F', bound=Callable[..., Any])
-AF = TypeVar('AF', bound=Callable[..., Awaitable[Any]])
+F = TypeVar("F", bound=Callable[..., Any])
+AF = TypeVar("AF", bound=Callable[..., Awaitable[Any]])
 
 
 def log_performance(
     name: Optional[str] = None,
     logger: Optional[logging.Logger] = None,
-    force: bool = False
+    force: bool = False,
 ) -> Callable[[F], F]:
     """Decorator to log the time it takes to run a function.
 
@@ -114,6 +113,7 @@ def log_performance(
     Returns:
         Callable: The decorated function.
     """
+
     def decorator(func: F) -> F:
         # check if log level is set to debug
         if _get_log_level() != logging.DEBUG and not force:
@@ -137,6 +137,7 @@ def wrapped(*args: Any, **kwargs: Any) -> Any:
             return return_val
 
         return wrapped
+
     return decorator
 
 
@@ -144,7 +145,7 @@ async def log_performance_async(
     func: AF,
     name: Optional[str] = None,
     logger: Optional[logging.Logger] = None,
-    force: bool = False
+    force: bool = False,
 ) -> AF:
     """Async version of log_performance decorator"""
     # check if log level is set to debug
diff --git a/validmind/models/function.py b/validmind/models/function.py
index 730325653..a8c6067a1 100644
--- a/validmind/models/function.py
+++ b/validmind/models/function.py
@@ -2,8 +2,9 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
 
+from typing import Any, Dict, List
+
 from validmind.vm_models.model import VMModel
-from typing import Dict, Any, List
 
 
 # semi-immutable dict
diff --git a/validmind/template.py b/validmind/template.py
index 1a3ef5c2a..315b9449a 100644
--- a/validmind/template.py
+++ b/validmind/template.py
@@ -2,9 +2,9 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
 
-from ipywidgets import HTML, Accordion, VBox
-from typing import Any, Dict, List, Optional, Union, Type
-from ipywidgets import Widget
+from typing import Any, Dict, List, Optional, Type, Union
+
+from ipywidgets import HTML, Accordion, VBox, Widget
 
 from .html_templates.content_blocks import (
     failed_content_block_html,
@@ -33,7 +33,7 @@
 def _convert_sections_to_section_tree(
     sections: List[Dict[str, Any]],
     parent_id: str = "_root_",
-    start_section_id: Optional[str] = None
+    start_section_id: Optional[str] = None,
 ) -> List[Dict[str, Any]]:
     section_tree = []
 
@@ -80,8 +80,7 @@ def _create_content_widget(content: Dict[str, Any]) -> Widget:
 
 
 def _create_sub_section_widget(
-    sub_sections: List[Dict[str, Any]],
-    section_number: str
+    sub_sections: List[Dict[str, Any]], section_number: str
 ) -> Union[HTML, Accordion]:
     if not sub_sections:
         return HTML("<p>Empty Section</p>")
@@ -205,8 +204,7 @@ def _create_test_suite_section(section: Dict[str, Any]) -> Dict[str, Any]:
 
 
 def _create_template_test_suite(
-    template: str,
-    section: Optional[str] = None
+    template: str, section: Optional[str] = None
 ) -> Type[TestSuite]:
     """
     Create and run a test suite from a template.
@@ -239,10 +237,7 @@ def _create_template_test_suite(
     )
 
 
-def get_template_test_suite(
-    template: str,
-    section: Optional[str] = None
-) -> TestSuite:
+def get_template_test_suite(template: str, section: Optional[str] = None) -> TestSuite:
     """Get a TestSuite instance containing all tests in a template.
 
     This function will collect all tests used in a template into a dynamically-created
diff --git a/validmind/tests/_store.py b/validmind/tests/_store.py
index 9103bff47..569094d6f 100644
--- a/validmind/tests/_store.py
+++ b/validmind/tests/_store.py
@@ -5,9 +5,10 @@
 """Module for storing loaded tests and test providers"""
 
 
-from .test_providers import TestProvider, ValidMindTestProvider
 from typing import Any, Callable, Optional
 
+from .test_providers import TestProvider, ValidMindTestProvider
+
 
 def singleton(cls):
     """Decorator to make a class a singleton"""
@@ -77,7 +78,9 @@ def get_test(self, test_id: str) -> Optional[Callable[..., Any]]:
         """
         return self.tests.get(test_id)
 
-    def register_test(self, test_id: str, test: Optional[Callable[..., Any]] = None) -> None:
+    def register_test(
+        self, test_id: str, test: Optional[Callable[..., Any]] = None
+    ) -> None:
         """Register a test
 
         Args:
diff --git a/validmind/tests/decorator.py b/validmind/tests/decorator.py
index 4abb71c5c..26aa78f90 100644
--- a/validmind/tests/decorator.py
+++ b/validmind/tests/decorator.py
@@ -7,7 +7,7 @@
 import inspect
 import os
 from functools import wraps
-from typing import Any, Callable, List, Optional, Union, TypeVar
+from typing import Any, Callable, List, Optional, TypeVar, Union
 
 from validmind.logging import get_logger
 
@@ -16,7 +16,7 @@
 
 logger = get_logger(__name__)
 
-F = TypeVar('F', bound=Callable[..., Any])
+F = TypeVar("F", bound=Callable[..., Any])
 
 
 def _get_save_func(func: Callable[..., Any], test_id: str) -> Callable[..., None]:
diff --git a/validmind/tests/load.py b/validmind/tests/load.py
index cbf40fb23..799c5c06e 100644
--- a/validmind/tests/load.py
+++ b/validmind/tests/load.py
@@ -31,7 +31,9 @@
 }
 
 
-def _inspect_signature(test_func: Callable[..., Any]) -> Tuple[Dict[str, Dict[str, Any]], Dict[str, Dict[str, Any]]]:
+def _inspect_signature(
+    test_func: Callable[..., Any]
+) -> Tuple[Dict[str, Dict[str, Any]], Dict[str, Dict[str, Any]]]:
     """Inspect a test function's signature to get inputs and parameters"""
     inputs = {}
     params = {}
@@ -58,6 +60,7 @@ def _inspect_signature(test_func: Callable[..., Any]) -> Tuple[Dict[str, Dict[st
 
 def _create_mock_test(test_id: str) -> Callable[..., Any]:
     """Create a mock test function for unit testing purposes"""
+
     def mock_test(*args, **kwargs):
         return {"test_id": test_id, "args": args, "kwargs": kwargs}
 
@@ -75,9 +78,7 @@ def mock_test(*args, **kwargs):
 def _load_test_from_provider(test_id: str, namespace: str) -> Callable[..., Any]:
     """Load a test from the appropriate provider"""
     if not test_provider_store.has_test_provider(namespace):
-        raise LoadTestError(
-            f"No test provider found for namespace: {namespace}"
-        )
+        raise LoadTestError(f"No test provider found for namespace: {namespace}")
 
     provider = test_provider_store.get_test_provider(namespace)
 
@@ -90,7 +91,9 @@ def _load_test_from_provider(test_id: str, namespace: str) -> Callable[..., Any]
         ) from e
 
 
-def _prepare_test_function(test_func: Callable[..., Any], test_id: str) -> Callable[..., Any]:
+def _prepare_test_function(
+    test_func: Callable[..., Any], test_id: str
+) -> Callable[..., Any]:
     """Prepare a test function by adding necessary attributes"""
     # Add test_id as an attribute to the test function
     test_func.test_id = test_id
@@ -106,9 +109,7 @@ def _prepare_test_function(test_func: Callable[..., Any], test_id: str) -> Calla
 
 
 def load_test(
-    test_id: str,
-    test_func: Optional[Callable[..., Any]] = None,
-    reload: bool = False
+    test_id: str, test_func: Optional[Callable[..., Any]] = None, reload: bool = False
 ) -> Callable[..., Any]:
     """Load a test by test ID
 
@@ -184,7 +185,9 @@ def _test_description(test_description: str, num_lines: int = 5) -> str:
     return test_description
 
 
-def _pretty_list_tests(tests: Dict[str, Callable[..., Any]], truncate: bool = True) -> None:
+def _pretty_list_tests(
+    tests: Dict[str, Callable[..., Any]], truncate: bool = True
+) -> None:
     """Pretty print a list of tests"""
     for test_id, test_func in sorted(tests.items()):
         print(f"\n{test_id_to_name(test_id)}")
@@ -213,10 +216,9 @@ def list_tasks_and_tags(as_json: bool = False) -> Union[str, Dict[str, List[str]
         # Import this here to avoid circular import
         import pandas as pd
 
-        df = pd.DataFrame({
-            "Task": tasks,
-            "Tags": [", ".join(tags) for _ in range(len(tasks))]
-        })
+        df = pd.DataFrame(
+            {"Task": tasks, "Tags": [", ".join(tags) for _ in range(len(tasks))]}
+        )
         return df  # Return DataFrame instead of df.style
     except (ImportError, AttributeError):
         # Fallback if pandas is not available or styling doesn't work
@@ -249,9 +251,7 @@ def _filter_test_ids(test_ids: List[str], filter_text: Optional[str]) -> List[st
     elif filter_text:
         # Normal filtering logic
         return [
-            test_id
-            for test_id in test_ids
-            if filter_text.lower() in test_id.lower()
+            test_id for test_id in test_ids if filter_text.lower() in test_id.lower()
         ]
     return test_ids
 
@@ -274,7 +274,9 @@ def _filter_tests_by_task(tests: Dict[str, Any], task: Optional[str]) -> Dict[st
     return {test_id: tests[test_id] for test_id in task_test_ids}
 
 
-def _filter_tests_by_tags(tests: Dict[str, Any], tags: Optional[List[str]]) -> Dict[str, Any]:
+def _filter_tests_by_tags(
+    tests: Dict[str, Any], tags: Optional[List[str]]
+) -> Dict[str, Any]:
     """Filter tests by tags"""
     if not tags:
         return tests
@@ -285,7 +287,9 @@ def _filter_tests_by_tags(tests: Dict[str, Any], tags: Optional[List[str]]) -> D
         if isinstance(test_func, str):
             # For mock test functions, add all tags
             tag_test_ids.append(test_id)
-        elif hasattr(test_func, "__tags__") and all(tag in test_func.__tags__ for tag in tags):
+        elif hasattr(test_func, "__tags__") and all(
+            tag in test_func.__tags__ for tag in tags
+        ):
             tag_test_ids.append(test_id)
 
     # Create a new tests dictionary with only the filtered tests
@@ -302,29 +306,37 @@ def _create_tests_dataframe(tests: Dict[str, Any], truncate: bool) -> Any:
     for test_id, test_func in tests.items():
         if isinstance(test_func, str):
             # If it's a mock test, add minimal info
-            data.append({
-                "ID": test_id,
-                "Name": test_id_to_name(test_id),
-                "Description": f"Mock test for {test_id}",
-                "Required Inputs": [],
-                "Params": {}
-            })
+            data.append(
+                {
+                    "ID": test_id,
+                    "Name": test_id_to_name(test_id),
+                    "Description": f"Mock test for {test_id}",
+                    "Required Inputs": [],
+                    "Params": {},
+                }
+            )
         else:
             # If it's a real test, add full info
-            data.append({
-                "ID": test_id,
-                "Name": test_id_to_name(test_id),
-                "Description": inspect.getdoc(test_func) or "",
-                "Required Inputs": list(test_func.inputs.keys()) if hasattr(test_func, "inputs") else [],
-                "Params": test_func.params if hasattr(test_func, "params") else {}
-            })
+            data.append(
+                {
+                    "ID": test_id,
+                    "Name": test_id_to_name(test_id),
+                    "Description": inspect.getdoc(test_func) or "",
+                    "Required Inputs": list(test_func.inputs.keys())
+                    if hasattr(test_func, "inputs")
+                    else [],
+                    "Params": test_func.params if hasattr(test_func, "params") else {},
+                }
+            )
 
     if not data:
         return None
 
     df = pd.DataFrame(data)
     if truncate:
-        df["Description"] = df["Description"].apply(lambda x: x.split("\n")[0] if x else "")
+        df["Description"] = df["Description"].apply(
+            lambda x: x.split("\n")[0] if x else ""
+        )
     return df
 
 
@@ -333,7 +345,7 @@ def list_tests(
     task: Optional[str] = None,
     tags: Optional[List[str]] = None,
     pretty: bool = True,
-    truncate: bool = True
+    truncate: bool = True,
 ) -> Union[List[str], None]:
     """List all tests in the tests directory.
 
@@ -379,9 +391,7 @@ def list_tests(
 
 
 def describe_test(
-    test_id: Optional[TestID] = None,
-    raw: bool = False,
-    show: bool = True
+    test_id: Optional[TestID] = None, raw: bool = False, show: bool = True
 ) -> Union[str, HTML, Dict[str, Any]]:
     """Describe a test's functionality and parameters"""
     test = load_test(test_id)
diff --git a/validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py b/validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py
index adad0190d..73edf7044 100644
--- a/validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py
+++ b/validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py
@@ -2,12 +2,13 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
 
+from typing import Dict, List, Optional, Union
+
 import numpy as np
 import pandas as pd
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 from sklearn.metrics import confusion_matrix, precision_recall_curve, roc_curve
-from typing import Dict, List, Optional, Union
 
 from validmind import RawData, tags, tasks
 from validmind.vm_models import VMDataset, VMModel
@@ -17,7 +18,7 @@ def find_optimal_threshold(
     y_true: np.ndarray,
     y_prob: np.ndarray,
     method: str = "youden",
-    target_recall: Optional[float] = None
+    target_recall: Optional[float] = None,
 ) -> Dict[str, Union[str, float]]:
     """
     Find the optimal classification threshold using various methods.
@@ -89,7 +90,7 @@ def ClassifierThresholdOptimization(
     dataset: VMDataset,
     model: VMModel,
     methods: Optional[List[str]] = None,
-    target_recall: Optional[float] = None
+    target_recall: Optional[float] = None,
 ) -> Dict[str, Union[pd.DataFrame, go.Figure]]:
     """
     Analyzes and visualizes different threshold optimization methods for binary classification models.
diff --git a/validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py b/validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py
index bb02108dd..c91b4f9d2 100644
--- a/validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py
+++ b/validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py
@@ -3,13 +3,13 @@
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
 
 import warnings
-from warnings import filters as _warnings_filters
 from typing import Dict, List, Optional, Union
+from warnings import filters as _warnings_filters
 
 import matplotlib.pyplot as plt
 import numpy as np
-import shap
 import pandas as pd
+import shap
 
 from validmind import RawData, tags, tasks
 from validmind.errors import UnsupportedModelForSHAPError
@@ -22,7 +22,7 @@
 
 def select_shap_values(
     shap_values: Union[np.ndarray, List[np.ndarray]],
-    class_of_interest: Optional[int] = None
+    class_of_interest: Optional[int] = None,
 ) -> np.ndarray:
     """Selects SHAP values for binary or multiclass classification.
 
@@ -72,9 +72,7 @@ def select_shap_values(
 
 
 def generate_shap_plot(
-    type_: str,
-    shap_values: np.ndarray,
-    x_test: Union[np.ndarray, pd.DataFrame]
+    type_: str, shap_values: np.ndarray, x_test: Union[np.ndarray, pd.DataFrame]
 ) -> plt.Figure:
     """Plots two types of SHAP global importance (SHAP).
 
@@ -126,7 +124,7 @@ def SHAPGlobalImportance(
     dataset: VMDataset,
     kernel_explainer_samples: int = 10,
     tree_or_linear_explainer_samples: int = 200,
-    class_of_interest: Optional[int] = None
+    class_of_interest: Optional[int] = None,
 ) -> Dict[str, Union[plt.Figure, Dict[str, float]]]:
     """
     Evaluates and visualizes global feature importance using SHAP values for model explanation and risk identification.
diff --git a/validmind/tests/output.py b/validmind/tests/output.py
index 2d6fae71b..d99a28f3b 100644
--- a/validmind/tests/output.py
+++ b/validmind/tests/output.py
@@ -84,7 +84,7 @@ def _convert_simple_type(self, data: Any) -> pd.DataFrame:
         if isinstance(data, dict):
             return pd.DataFrame([data])
         elif isinstance(data, str):
-            return pd.DataFrame({'Value': [data]})
+            return pd.DataFrame({"Value": [data]})
         elif data is None:
             return pd.DataFrame()
         else:
@@ -99,8 +99,11 @@ def _convert_list(self, data_list: List) -> pd.DataFrame:
             return pd.DataFrame(data_list)
         except Exception as e:
             # If conversion fails, try to handle common cases
-            if all(isinstance(item, (int, float, str, bool, type(None))) for item in data_list):
-                return pd.DataFrame({'Values': data_list})
+            if all(
+                isinstance(item, (int, float, str, bool, type(None)))
+                for item in data_list
+            ):
+                return pd.DataFrame({"Values": data_list})
             else:
                 raise ValueError(f"Could not convert list to DataFrame: {e}")
 
@@ -123,7 +126,9 @@ def _convert_to_dataframe(self, table_data: Any) -> pd.DataFrame:
 
     def process(
         self,
-        item: Union[List[Dict[str, Any]], pd.DataFrame, Dict[str, Any], ResultTable, str, tuple],
+        item: Union[
+            List[Dict[str, Any]], pd.DataFrame, Dict[str, Any], ResultTable, str, tuple
+        ],
         result: TestResult,
     ) -> None:
         # Convert to a dictionary of tables if not already
diff --git a/validmind/tests/test_providers.py b/validmind/tests/test_providers.py
index 44d8746b0..827780749 100644
--- a/validmind/tests/test_providers.py
+++ b/validmind/tests/test_providers.py
@@ -7,7 +7,7 @@
 import re
 import sys
 from pathlib import Path
-from typing import List, Protocol, Callable, Any
+from typing import Any, Callable, List, Protocol
 
 from validmind.logging import get_logger
 
diff --git a/validmind/tests/utils.py b/validmind/tests/utils.py
index e2fdce465..7ef416071 100644
--- a/validmind/tests/utils.py
+++ b/validmind/tests/utils.py
@@ -5,7 +5,7 @@
 """Test Module Utils"""
 
 import inspect
-from typing import Any, Optional, Tuple, Union, Type
+from typing import Any, Optional, Tuple, Type, Union
 
 import numpy as np
 import pandas as pd
@@ -27,7 +27,7 @@ def test_description(test_class: Type[Any], truncate: bool = True) -> str:
 def remove_nan_pairs(
     y_true: Union[np.ndarray, list],
     y_pred: Union[np.ndarray, list],
-    dataset_id: Optional[str] = None
+    dataset_id: Optional[str] = None,
 ) -> Tuple[np.ndarray, np.ndarray]:
     """
     Remove pairs where either true or predicted values are NaN/None.
@@ -60,7 +60,7 @@ def remove_nan_pairs(
 def ensure_equal_lengths(
     y_true: Union[np.ndarray, list],
     y_pred: Union[np.ndarray, list],
-    dataset_id: Optional[str] = None
+    dataset_id: Optional[str] = None,
 ) -> Tuple[np.ndarray, np.ndarray]:
     """
     Check if true and predicted values have matching lengths, log warning if they don't,
@@ -94,7 +94,7 @@ def ensure_equal_lengths(
 def validate_prediction(
     y_true: Union[np.ndarray, list],
     y_pred: Union[np.ndarray, list],
-    dataset_id: Optional[str] = None
+    dataset_id: Optional[str] = None,
 ) -> Tuple[np.ndarray, np.ndarray]:
     """
     Comprehensive validation of true and predicted value pairs.
diff --git a/validmind/utils.py b/validmind/utils.py
index 4b69c6e8b..a3d2444e4 100644
--- a/validmind/utils.py
+++ b/validmind/utils.py
@@ -12,7 +12,7 @@
 import warnings
 from datetime import date, datetime, time
 from platform import python_version
-from typing import Any, Dict, List, Optional, TypeVar, Callable, Awaitable
+from typing import Any, Awaitable, Callable, Dict, List, Optional, TypeVar
 
 import matplotlib.pylab as pylab
 import mistune
@@ -59,7 +59,7 @@
 
 logger = get_logger(__name__)
 
-T = TypeVar('T')
+T = TypeVar("T")
 
 
 def parse_version(version: str) -> tuple[int, ...]:
@@ -363,7 +363,7 @@ def run_async(
     func: Callable[..., Awaitable[T]],
     *args: Any,
     name: Optional[str] = None,
-    **kwargs: Any
+    **kwargs: Any,
 ) -> T:
     """Helper function to run functions asynchronously.
 
@@ -397,9 +397,7 @@ def run_async(
 
 
 def run_async_check(
-    func: Callable[..., Awaitable[T]],
-    *args: Any,
-    **kwargs: Any
+    func: Callable[..., Awaitable[T]], *args: Any, **kwargs: Any
 ) -> Optional[asyncio.Task[T]]:
     """Helper function to run functions asynchronously if the task doesn't already exist.
 
diff --git a/validmind/vm_models/dataset/dataset.py b/validmind/vm_models/dataset/dataset.py
index 87c4c30e4..e953dece7 100644
--- a/validmind/vm_models/dataset/dataset.py
+++ b/validmind/vm_models/dataset/dataset.py
@@ -258,8 +258,10 @@ def assign_predictions(
         prediction_values: Optional[List[Any]] = None,
         probability_column: Optional[str] = None,
         probability_values: Optional[List[float]] = None,
-        prediction_probabilities: Optional[List[float]] = None,  # DEPRECATED: use probability_values
-        **kwargs: Dict[str, Any]
+        prediction_probabilities: Optional[
+            List[float]
+        ] = None,  # DEPRECATED: use probability_values
+        **kwargs: Dict[str, Any],
     ) -> None:
         """Assign predictions and probabilities to the dataset.
 

From e1b86440a9c7c450563ab87e7ed8120e93b996da Mon Sep 17 00:00:00 2001
From: Anil Sorathiya <anil@validmind.ai>
Date: Mon, 31 Mar 2025 12:27:34 +0100
Subject: [PATCH 07/10] ux for log_text output

---
 validmind/api_client.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/validmind/api_client.py b/validmind/api_client.py
index 9d8a16b9d..e6165e4dc 100644
--- a/validmind/api_client.py
+++ b/validmind/api_client.py
@@ -18,6 +18,7 @@
 import aiohttp
 import requests
 from aiohttp import FormData
+from ipywidgets import HTML, Accordion
 
 from .client_config import client_config
 from .errors import MissingAPICredentialsError, MissingModelIdError, raise_api_error
@@ -414,20 +415,29 @@ def log_text(
 
     Args:
         content_id (str): Unique content identifier for the text.
-        text (str): The text to log.
-        _json (dict, optional): Free-form key-value pairs to assign to the text. Defaults to None.
+        text (str): The text to log. Will be converted to HTML with MathML support.
+        _json (dict, optional): Additional metadata to associate with the text. Defaults to None.
 
     Raises:
+        ValueError: If content_id or text are empty or not strings.
         Exception: If the API call fails.
 
     Returns:
-        dict: The response from the API.
+        ipywidgets.Accordion: An accordion widget containing the logged text as HTML.
     """
     if not content_id or not isinstance(content_id, str):
         raise ValueError("`content_id` must be a non-empty string")
     if not text or not isinstance(text, str):
         raise ValueError("`text` must be a non-empty string")
-    return run_async(alog_metadata, content_id, md_to_html(text, mathml=True), _json)
+
+    log_text = run_async(
+        alog_metadata, content_id, md_to_html(text, mathml=True), _json
+    )
+
+    return Accordion(
+        children=[HTML(log_text["text"])],
+        titles=[f"Text Block: '{log_text['content_id']}'"],
+    )
 
 
 async def alog_metric(

From 5f4141816feec83cec1972147d344ca263e4d018 Mon Sep 17 00:00:00 2001
From: Anil Sorathiya <anil@validmind.ai>
Date: Mon, 31 Mar 2025 16:51:16 +0100
Subject: [PATCH 08/10] handle html text

---
 validmind/api_client.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/validmind/api_client.py b/validmind/api_client.py
index e6165e4dc..8f444861e 100644
--- a/validmind/api_client.py
+++ b/validmind/api_client.py
@@ -431,7 +431,12 @@ def log_text(
         raise ValueError("`text` must be a non-empty string")
 
     log_text = run_async(
-        alog_metadata, content_id, md_to_html(text, mathml=True), _json
+        alog_metadata,
+        content_id,
+        text
+        if text.startswith("<") and text.endswith(">")
+        else md_to_html(text, mathml=True),
+        _json,
     )
 
     return Accordion(

From 22a2aefb9046c77fc6cb6c7bf1f11f872ec81bfb Mon Sep 17 00:00:00 2001
From: Anil Sorathiya <anil@validmind.ai>
Date: Mon, 31 Mar 2025 18:14:51 +0100
Subject: [PATCH 09/10] checking if text is html

---
 validmind/api_client.py | 11 +++-----
 validmind/utils.py      | 58 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 7 deletions(-)

diff --git a/validmind/api_client.py b/validmind/api_client.py
index 8f444861e..7937cadf8 100644
--- a/validmind/api_client.py
+++ b/validmind/api_client.py
@@ -23,7 +23,7 @@
 from .client_config import client_config
 from .errors import MissingAPICredentialsError, MissingModelIdError, raise_api_error
 from .logging import get_logger, init_sentry, send_single_error
-from .utils import NumpyEncoder, md_to_html, run_async
+from .utils import NumpyEncoder, is_html, md_to_html, run_async
 from .vm_models import Figure
 
 logger = get_logger(__name__)
@@ -429,14 +429,11 @@ def log_text(
         raise ValueError("`content_id` must be a non-empty string")
     if not text or not isinstance(text, str):
         raise ValueError("`text` must be a non-empty string")
+    if is_html(text):
+        raise ValueError("`text` must be a markdown or plain text string")
 
     log_text = run_async(
-        alog_metadata,
-        content_id,
-        text
-        if text.startswith("<") and text.endswith(">")
-        else md_to_html(text, mathml=True),
-        _json,
+        alog_metadata, content_id, md_to_html(text, mathml=True), _json
     )
 
     return Accordion(
diff --git a/validmind/utils.py b/validmind/utils.py
index a3d2444e4..5d8306a05 100644
--- a/validmind/utils.py
+++ b/validmind/utils.py
@@ -20,6 +20,7 @@
 import numpy as np
 import pandas as pd
 import seaborn as sns
+from bs4 import BeautifulSoup
 from IPython.core import getipython
 from IPython.display import HTML
 from IPython.display import display as ipy_display
@@ -576,6 +577,63 @@ def md_to_html(md: str, mathml=False) -> str:
     return html
 
 
+def is_html(text: str) -> bool:
+    """Check if a string is HTML.
+
+    Uses more robust heuristics to determine if a string contains HTML content.
+
+    Args:
+        text (str): The string to check
+
+    Returns:
+        bool: True if the string likely contains HTML, False otherwise
+    """
+    # Strip whitespace first
+    text = text.strip()
+
+    # Basic check: Must at least start with < and end with >
+    if not (text.startswith("<") and text.endswith(">")):
+        return False
+
+    # Look for common HTML tags
+    common_html_patterns = [
+        r"<html.*?>",  # HTML tag
+        r"<body.*?>",  # Body tag
+        r"<div.*?>",  # Div tag
+        r"<p>.*?</p>",  # Paragraph with content
+        r"<h[1-6]>.*?</h[1-6]>",  # Headers
+        r"<script.*?>",  # Script tags
+        r"<style.*?>",  # Style tags
+        r"<a href=.*?>",  # Links
+        r"<img.*?>",  # Images
+        r"<table.*?>",  # Tables
+        r"<!DOCTYPE html>",  # DOCTYPE declaration
+    ]
+
+    for pattern in common_html_patterns:
+        if re.search(pattern, text, re.IGNORECASE | re.DOTALL):
+            return True
+
+    # If we have at least 2 matching tags, it's likely HTML
+    # This helps detect custom elements or patterns not in our list
+    tags = re.findall(r"</?[a-zA-Z][a-zA-Z0-9]*.*?>", text)
+    if len(tags) >= 2:
+        return True
+
+    # Try parsing with BeautifulSoup as a last resort
+    try:
+        soup = BeautifulSoup(text, "html.parser")
+        # If we find any tags that weren't in the original text, BeautifulSoup
+        # likely tried to fix broken HTML, meaning it's not valid HTML
+        return len(soup.find_all()) > 0
+
+    except Exception as e:
+        logger.error(f"Error checking if text is HTML: {e}")
+        return False
+
+    return False
+
+
 def inspect_obj(obj):
     # Filtering only attributes
     print(len("Attributes:") * "-")

From 6d24000ada1ccda959a52db21a8cf35cde1bd700 Mon Sep 17 00:00:00 2001
From: Anil Sorathiya <anil@validmind.ai>
Date: Tue, 1 Apr 2025 10:18:09 +0100
Subject: [PATCH 10/10] support html text input

---
 validmind/api_client.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/validmind/api_client.py b/validmind/api_client.py
index 7937cadf8..c5755daaa 100644
--- a/validmind/api_client.py
+++ b/validmind/api_client.py
@@ -429,12 +429,11 @@ def log_text(
         raise ValueError("`content_id` must be a non-empty string")
     if not text or not isinstance(text, str):
         raise ValueError("`text` must be a non-empty string")
-    if is_html(text):
-        raise ValueError("`text` must be a markdown or plain text string")
 
-    log_text = run_async(
-        alog_metadata, content_id, md_to_html(text, mathml=True), _json
-    )
+    if not is_html(text):
+        text = md_to_html(text, mathml=True)
+
+    log_text = run_async(alog_metadata, content_id, text, _json)
 
     return Accordion(
         children=[HTML(log_text["text"])],