validmind · AnilSorathiya · Apr 10, 2025 · Apr 9, 2025 · Apr 10, 2025
diff --git a/notebooks/code_samples/custom_tests/implement_custom_tests.ipynb b/notebooks/code_samples/custom_tests/implement_custom_tests.ipynb
@@ -37,7 +37,9 @@
     "  - [Custom Test: External API Call](#toc9_2_)    \n",
     "  - [Custom Test: Passing Parameters](#toc9_3_)    \n",
     "  - [Custom Test: Multiple Tables and Plots in a Single Test](#toc9_4_)    \n",
-    "  - [Custom Test: Images](#toc9_5_)    \n",
+    "  - [Custom Test: Images](#toc9_5_)\n",
+    "  - [Custom Test: Description](#toc9_6_)\n",
+    "\n",
     "- [Conclusion](#toc10_)    \n",
     "- [Next steps](#toc11_)    \n",
     "  - [Work with your model documentation](#toc11_1_)    \n",
@@ -867,6 +869,66 @@
     "![screenshot showing image from file](../../images/pearson-correlation-matrix-test-output.png)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<a id='toc9_6_'></a>\n",
+    "\n",
+    "### Custom Test: Description\n",
+    "\n",
+    "If you want to write a custom test description for your custom test instead of it is interpreted through llm, you can do so by returning string in your test."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "@vm.test(\"my_custom_tests.MyCustomTest\")\n",
+    "def my_custom_test(dataset, model):\n",
+    "    \"\"\"\n",
+    "    This is a custom computed test that computes confusion matrix for a binary classification model and return  a string as a test description.\n",
+    "    \"\"\"\n",
+    "    y_true = dataset.y\n",
+    "    y_pred = dataset.y_pred(model)\n",
+    "\n",
+    "    confusion_matrix = metrics.confusion_matrix(y_true, y_pred)\n",
+    "\n",
+    "    cm_display = metrics.ConfusionMatrixDisplay(\n",
+    "        confusion_matrix=confusion_matrix, display_labels=[False, True]\n",
+    "    )\n",
+    "    cm_display.plot()\n",
+    "\n",
+    "    plt.close()  # close the plot to avoid displaying it\n",
+    "\n",
+    "    return cm_display.figure_, \"Test Description - Confusion Matrix\", pd.DataFrame({\"Value\": [1, 2, 3]})  # return the figure object itself\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can see here test result description has been customized here. The same result description will be displayed in the UI."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "result = run_test(\n",
+    "    \"my_custom_tests.MyCustomTest\",\n",
+    "    inputs={\"model\": \"model\", \"dataset\": \"test_dataset\"},\n",
+    ")\n",
+    "result.log()"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},

diff --git a/validmind/tests/output.py b/validmind/tests/output.py
@@ -9,6 +9,7 @@
 import numpy as np
 import pandas as pd
 
+from validmind.utils import is_html, md_to_html
 from validmind.vm_models.figure import (
     Figure,
     is_matplotlib_figure,
@@ -77,14 +78,12 @@ def process(self, item: Any, result: TestResult) -> None:
 
 class TableOutputHandler(OutputHandler):
     def can_handle(self, item: Any) -> bool:
-        return isinstance(item, (list, pd.DataFrame, dict, ResultTable, str, tuple))
+        return isinstance(item, (list, pd.DataFrame, dict, ResultTable, tuple))
 
     def _convert_simple_type(self, data: Any) -> pd.DataFrame:
         """Convert a simple data type to a DataFrame."""
         if isinstance(data, dict):
             return pd.DataFrame([data])
-        elif isinstance(data, str):
-            return pd.DataFrame({"Value": [data]})
         elif data is None:
             return pd.DataFrame()
         else:
@@ -155,6 +154,17 @@ def process(self, item: Any, result: TestResult) -> None:
         result.raw_data = item
 
 
+class StringOutputHandler(OutputHandler):
+    def can_handle(self, item: Any) -> bool:
+        return isinstance(item, str)
+
+    def process(self, item: Any, result: TestResult) -> None:
+        if not is_html(item):
+            item = md_to_html(item, mathml=True)
+
+        result.description = item
+
+
 def process_output(item: Any, result: TestResult) -> None:
     """Process a single test output item and update the TestResult."""
     handlers = [
@@ -163,6 +173,7 @@ def process_output(item: Any, result: TestResult) -> None:
         FigureOutputHandler(),
         TableOutputHandler(),
         RawDataOutputHandler(),
+        StringOutputHandler(),
     ]
 
     for handler in handlers:

diff --git a/validmind/tests/run.py b/validmind/tests/run.py
@@ -390,15 +390,16 @@ def run_test(  # noqa: C901
     if post_process_fn:
         result = post_process_fn(result)
 
-    result.description = get_result_description(
-        test_id=test_id,
-        test_description=result.doc,
-        tables=result.tables,
-        figures=result.figures,
-        metric=result.metric,
-        should_generate=generate_description,
-        title=title,
-    )
+    if not result.description:
+        result.description = get_result_description(
+            test_id=test_id,
+            test_description=result.doc,
+            tables=result.tables,
+            figures=result.figures,
+            metric=result.metric,
+            should_generate=generate_description,
+            title=title,
+        )
 
     if show:
         result.show()

diff --git a/validmind/vm_models/result/result.py b/validmind/vm_models/result/result.py
@@ -464,7 +464,7 @@ async def log_async(
                 )
             )
 
-        if self.tables or self.figures:
+        if self.tables or self.figures or self.description:
             tasks.append(
                 api_client.alog_test_result(
                     result=self.serialize(),