Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 63 additions & 1 deletion notebooks/code_samples/custom_tests/implement_custom_tests.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@
" - [Custom Test: External API Call](#toc9_2_) \n",
" - [Custom Test: Passing Parameters](#toc9_3_) \n",
" - [Custom Test: Multiple Tables and Plots in a Single Test](#toc9_4_) \n",
" - [Custom Test: Images](#toc9_5_) \n",
" - [Custom Test: Images](#toc9_5_)\n",
" - [Custom Test: Description](#toc9_6_)\n",
"\n",
"- [Conclusion](#toc10_) \n",
"- [Next steps](#toc11_) \n",
" - [Work with your model documentation](#toc11_1_) \n",
Expand Down Expand Up @@ -867,6 +869,66 @@
"![screenshot showing image from file](../../images/pearson-correlation-matrix-test-output.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<a id='toc9_6_'></a>\n",
"\n",
"### Custom Test: Description\n",
"\n",
"If you want to write a custom test description for your custom test instead of it is interpreted through llm, you can do so by returning string in your test."
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"@vm.test(\"my_custom_tests.MyCustomTest\")\n",
"def my_custom_test(dataset, model):\n",
" \"\"\"\n",
" This is a custom computed test that computes confusion matrix for a binary classification model and return a string as a test description.\n",
" \"\"\"\n",
" y_true = dataset.y\n",
" y_pred = dataset.y_pred(model)\n",
"\n",
" confusion_matrix = metrics.confusion_matrix(y_true, y_pred)\n",
"\n",
" cm_display = metrics.ConfusionMatrixDisplay(\n",
" confusion_matrix=confusion_matrix, display_labels=[False, True]\n",
" )\n",
" cm_display.plot()\n",
"\n",
" plt.close() # close the plot to avoid displaying it\n",
"\n",
" return cm_display.figure_, \"Test Description - Confusion Matrix\", pd.DataFrame({\"Value\": [1, 2, 3]}) # return the figure object itself\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can see here test result description has been customized here. The same result description will be displayed in the UI."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"result = run_test(\n",
" \"my_custom_tests.MyCustomTest\",\n",
" inputs={\"model\": \"model\", \"dataset\": \"test_dataset\"},\n",
")\n",
"result.log()"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
17 changes: 14 additions & 3 deletions validmind/tests/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import numpy as np
import pandas as pd

from validmind.utils import is_html, md_to_html
from validmind.vm_models.figure import (
Figure,
is_matplotlib_figure,
Expand Down Expand Up @@ -77,14 +78,12 @@ def process(self, item: Any, result: TestResult) -> None:

class TableOutputHandler(OutputHandler):
def can_handle(self, item: Any) -> bool:
return isinstance(item, (list, pd.DataFrame, dict, ResultTable, str, tuple))
return isinstance(item, (list, pd.DataFrame, dict, ResultTable, tuple))

def _convert_simple_type(self, data: Any) -> pd.DataFrame:
"""Convert a simple data type to a DataFrame."""
if isinstance(data, dict):
return pd.DataFrame([data])
elif isinstance(data, str):
return pd.DataFrame({"Value": [data]})
elif data is None:
return pd.DataFrame()
else:
Expand Down Expand Up @@ -155,6 +154,17 @@ def process(self, item: Any, result: TestResult) -> None:
result.raw_data = item


class StringOutputHandler(OutputHandler):
def can_handle(self, item: Any) -> bool:
return isinstance(item, str)

def process(self, item: Any, result: TestResult) -> None:
if not is_html(item):
item = md_to_html(item, mathml=True)

result.description = item


def process_output(item: Any, result: TestResult) -> None:
"""Process a single test output item and update the TestResult."""
handlers = [
Expand All @@ -163,6 +173,7 @@ def process_output(item: Any, result: TestResult) -> None:
FigureOutputHandler(),
TableOutputHandler(),
RawDataOutputHandler(),
StringOutputHandler(),
]

for handler in handlers:
Expand Down
19 changes: 10 additions & 9 deletions validmind/tests/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,15 +390,16 @@ def run_test( # noqa: C901
if post_process_fn:
result = post_process_fn(result)

result.description = get_result_description(
test_id=test_id,
test_description=result.doc,
tables=result.tables,
figures=result.figures,
metric=result.metric,
should_generate=generate_description,
title=title,
)
if not result.description:
result.description = get_result_description(
test_id=test_id,
test_description=result.doc,
tables=result.tables,
figures=result.figures,
metric=result.metric,
should_generate=generate_description,
title=title,
)

if show:
result.show()
Expand Down
2 changes: 1 addition & 1 deletion validmind/vm_models/result/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ async def log_async(
)
)

if self.tables or self.figures:
if self.tables or self.figures or self.description:
tasks.append(
api_client.alog_test_result(
result=self.serialize(),
Expand Down