Skip to content
Merged
3 changes: 2 additions & 1 deletion validmind/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
warnings.simplefilter("ignore", category=NumbaPendingDeprecationWarning)

from .__version__ import __version__ # noqa: E402
from .api_client import init, log_metric, reload
from .api_client import init, log_metric, log_text, reload
from .client import ( # noqa: E402
get_test_suite,
init_dataset,
Expand Down Expand Up @@ -125,4 +125,5 @@ def check_version():
"tests",
"unit_metrics",
"test_suites",
"log_text",
]
36 changes: 35 additions & 1 deletion validmind/api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,12 @@
import aiohttp
import requests
from aiohttp import FormData
from ipywidgets import HTML, Accordion

from .client_config import client_config
from .errors import MissingAPICredentialsError, MissingModelIdError, raise_api_error
from .logging import get_logger, init_sentry, send_single_error
from .utils import NumpyEncoder, run_async
from .utils import NumpyEncoder, is_html, md_to_html, run_async
from .vm_models import Figure

logger = get_logger(__name__)
Expand Down Expand Up @@ -407,6 +408,39 @@ def log_input(input_id: str, type: str, metadata: Dict[str, Any]) -> Dict[str, A
return run_async(alog_input, input_id, type, metadata)


def log_text(
content_id: str, text: str, _json: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""Logs free-form text to ValidMind API.

Args:
content_id (str): Unique content identifier for the text.
text (str): The text to log. Will be converted to HTML with MathML support.
_json (dict, optional): Additional metadata to associate with the text. Defaults to None.

Raises:
ValueError: If content_id or text are empty or not strings.
Exception: If the API call fails.

Returns:
ipywidgets.Accordion: An accordion widget containing the logged text as HTML.
"""
if not content_id or not isinstance(content_id, str):
raise ValueError("`content_id` must be a non-empty string")
if not text or not isinstance(text, str):
raise ValueError("`text` must be a non-empty string")

if not is_html(text):
text = md_to_html(text, mathml=True)

log_text = run_async(alog_metadata, content_id, text, _json)

return Accordion(
children=[HTML(log_text["text"])],
titles=[f"Text Block: '{log_text['content_id']}'"],
)


async def alog_metric(
key: str,
value: Union[int, float],
Expand Down
58 changes: 58 additions & 0 deletions validmind/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import numpy as np
import pandas as pd
import seaborn as sns
from bs4 import BeautifulSoup
from IPython.core import getipython
from IPython.display import HTML
from IPython.display import display as ipy_display
Expand Down Expand Up @@ -576,6 +577,63 @@ def md_to_html(md: str, mathml=False) -> str:
return html


def is_html(text: str) -> bool:
"""Check if a string is HTML.

Uses more robust heuristics to determine if a string contains HTML content.

Args:
text (str): The string to check

Returns:
bool: True if the string likely contains HTML, False otherwise
"""
# Strip whitespace first
text = text.strip()

# Basic check: Must at least start with < and end with >
if not (text.startswith("<") and text.endswith(">")):
return False

# Look for common HTML tags
common_html_patterns = [
r"<html.*?>", # HTML tag
r"<body.*?>", # Body tag
r"<div.*?>", # Div tag
r"<p>.*?</p>", # Paragraph with content
r"<h[1-6]>.*?</h[1-6]>", # Headers
r"<script.*?>", # Script tags
r"<style.*?>", # Style tags
r"<a href=.*?>", # Links
r"<img.*?>", # Images
r"<table.*?>", # Tables
r"<!DOCTYPE html>", # DOCTYPE declaration
]

for pattern in common_html_patterns:
if re.search(pattern, text, re.IGNORECASE | re.DOTALL):
return True

# If we have at least 2 matching tags, it's likely HTML
# This helps detect custom elements or patterns not in our list
tags = re.findall(r"</?[a-zA-Z][a-zA-Z0-9]*.*?>", text)
if len(tags) >= 2:
return True

# Try parsing with BeautifulSoup as a last resort
try:
soup = BeautifulSoup(text, "html.parser")
# If we find any tags that weren't in the original text, BeautifulSoup
# likely tried to fix broken HTML, meaning it's not valid HTML
return len(soup.find_all()) > 0

except Exception as e:
logger.error(f"Error checking if text is HTML: {e}")
return False

return False


def inspect_obj(obj):
# Filtering only attributes
print(len("Attributes:") * "-")
Expand Down