validmind · AnilSorathiya · Apr 1, 2025 · Mar 24, 2025 · Mar 24, 2025 · Mar 24, 2025
diff --git a/validmind/__init__.py b/validmind/__init__.py
@@ -43,7 +43,7 @@
 warnings.simplefilter("ignore", category=NumbaPendingDeprecationWarning)
 
 from .__version__ import __version__  # noqa: E402
-from .api_client import init, log_metric, reload
+from .api_client import init, log_metric, log_text, reload
 from .client import (  # noqa: E402
     get_test_suite,
     init_dataset,
@@ -125,4 +125,5 @@ def check_version():
     "tests",
     "unit_metrics",
     "test_suites",
+    "log_text",
 ]
diff --git a/validmind/api_client.py b/validmind/api_client.py
@@ -18,11 +18,12 @@
 import aiohttp
 import requests
 from aiohttp import FormData
+from ipywidgets import HTML, Accordion
 
 from .client_config import client_config
 from .errors import MissingAPICredentialsError, MissingModelIdError, raise_api_error
 from .logging import get_logger, init_sentry, send_single_error
-from .utils import NumpyEncoder, run_async
+from .utils import NumpyEncoder, is_html, md_to_html, run_async
 from .vm_models import Figure
 
 logger = get_logger(__name__)
@@ -407,6 +408,39 @@ def log_input(input_id: str, type: str, metadata: Dict[str, Any]) -> Dict[str, A
     return run_async(alog_input, input_id, type, metadata)
 
 
+def log_text(
+    content_id: str, text: str, _json: Optional[Dict[str, Any]] = None
+) -> Dict[str, Any]:
+    """Logs free-form text to ValidMind API.
+
+    Args:
+        content_id (str): Unique content identifier for the text.
+        text (str): The text to log. Will be converted to HTML with MathML support.
+        _json (dict, optional): Additional metadata to associate with the text. Defaults to None.
+
+    Raises:
+        ValueError: If content_id or text are empty or not strings.
+        Exception: If the API call fails.
+
+    Returns:
+        ipywidgets.Accordion: An accordion widget containing the logged text as HTML.
+    """
+    if not content_id or not isinstance(content_id, str):
+        raise ValueError("`content_id` must be a non-empty string")
+    if not text or not isinstance(text, str):
+        raise ValueError("`text` must be a non-empty string")
+
+    if not is_html(text):
+        text = md_to_html(text, mathml=True)
+
+    log_text = run_async(alog_metadata, content_id, text, _json)
+
+    return Accordion(
+        children=[HTML(log_text["text"])],
+        titles=[f"Text Block: '{log_text['content_id']}'"],
+    )
+
+
 async def alog_metric(
     key: str,
     value: Union[int, float],

diff --git a/validmind/utils.py b/validmind/utils.py
@@ -20,6 +20,7 @@
 import numpy as np
 import pandas as pd
 import seaborn as sns
+from bs4 import BeautifulSoup
 from IPython.core import getipython
 from IPython.display import HTML
 from IPython.display import display as ipy_display
@@ -576,6 +577,63 @@ def md_to_html(md: str, mathml=False) -> str:
     return html
 
 
+def is_html(text: str) -> bool:
+    """Check if a string is HTML.
+
+    Uses more robust heuristics to determine if a string contains HTML content.
+
+    Args:
+        text (str): The string to check
+
+    Returns:
+        bool: True if the string likely contains HTML, False otherwise
+    """
+    # Strip whitespace first
+    text = text.strip()
+
+    # Basic check: Must at least start with < and end with >
+    if not (text.startswith("<") and text.endswith(">")):
+        return False
+
+    # Look for common HTML tags
+    common_html_patterns = [
+        r"<html.*?>",  # HTML tag
+        r"<body.*?>",  # Body tag
+        r"<div.*?>",  # Div tag
+        r"<p>.*?</p>",  # Paragraph with content
+        r"<h[1-6]>.*?</h[1-6]>",  # Headers
+        r"<script.*?>",  # Script tags
+        r"<style.*?>",  # Style tags
+        r"<a href=.*?>",  # Links
+        r"<img.*?>",  # Images
+        r"<table.*?>",  # Tables
+        r"<!DOCTYPE html>",  # DOCTYPE declaration
+    ]
+
+    for pattern in common_html_patterns:
+        if re.search(pattern, text, re.IGNORECASE | re.DOTALL):
+            return True
+
+    # If we have at least 2 matching tags, it's likely HTML
+    # This helps detect custom elements or patterns not in our list
+    tags = re.findall(r"</?[a-zA-Z][a-zA-Z0-9]*.*?>", text)
+    if len(tags) >= 2:
+        return True
+
+    # Try parsing with BeautifulSoup as a last resort
+    try:
+        soup = BeautifulSoup(text, "html.parser")
+        # If we find any tags that weren't in the original text, BeautifulSoup
+        # likely tried to fix broken HTML, meaning it's not valid HTML
+        return len(soup.find_all()) > 0
+
+    except Exception as e:
+        logger.error(f"Error checking if text is HTML: {e}")
+        return False
+
+    return False
+
+
 def inspect_obj(obj):
     # Filtering only attributes
     print(len("Attributes:") * "-")