diff --git a/notebooks/code_samples/custom_tests/implement_custom_tests.ipynb b/notebooks/code_samples/custom_tests/implement_custom_tests.ipynb index 80393d5f3..963179129 100644 --- a/notebooks/code_samples/custom_tests/implement_custom_tests.ipynb +++ b/notebooks/code_samples/custom_tests/implement_custom_tests.ipynb @@ -37,7 +37,9 @@ " - [Custom Test: External API Call](#toc9_2_) \n", " - [Custom Test: Passing Parameters](#toc9_3_) \n", " - [Custom Test: Multiple Tables and Plots in a Single Test](#toc9_4_) \n", - " - [Custom Test: Images](#toc9_5_) \n", + " - [Custom Test: Images](#toc9_5_)\n", + " - [Custom Test: Description](#toc9_6_)\n", + "\n", "- [Conclusion](#toc10_) \n", "- [Next steps](#toc11_) \n", " - [Work with your model documentation](#toc11_1_) \n", @@ -867,6 +869,66 @@ "![screenshot showing image from file](../../images/pearson-correlation-matrix-test-output.png)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Custom Test: Description\n", + "\n", + "If you want to write a custom test description for your custom test instead of it is interpreted through llm, you can do so by returning string in your test." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "@vm.test(\"my_custom_tests.MyCustomTest\")\n", + "def my_custom_test(dataset, model):\n", + " \"\"\"\n", + " This is a custom computed test that computes confusion matrix for a binary classification model and return a string as a test description.\n", + " \"\"\"\n", + " y_true = dataset.y\n", + " y_pred = dataset.y_pred(model)\n", + "\n", + " confusion_matrix = metrics.confusion_matrix(y_true, y_pred)\n", + "\n", + " cm_display = metrics.ConfusionMatrixDisplay(\n", + " confusion_matrix=confusion_matrix, display_labels=[False, True]\n", + " )\n", + " cm_display.plot()\n", + "\n", + " plt.close() # close the plot to avoid displaying it\n", + "\n", + " return cm_display.figure_, \"Test Description - Confusion Matrix\", pd.DataFrame({\"Value\": [1, 2, 3]}) # return the figure object itself\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can see here test result description has been customized here. The same result description will be displayed in the UI." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result = run_test(\n", + " \"my_custom_tests.MyCustomTest\",\n", + " inputs={\"model\": \"model\", \"dataset\": \"test_dataset\"},\n", + ")\n", + "result.log()" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/validmind/tests/output.py b/validmind/tests/output.py index d99a28f3b..52ee23d1b 100644 --- a/validmind/tests/output.py +++ b/validmind/tests/output.py @@ -9,6 +9,7 @@ import numpy as np import pandas as pd +from validmind.utils import is_html, md_to_html from validmind.vm_models.figure import ( Figure, is_matplotlib_figure, @@ -77,14 +78,12 @@ def process(self, item: Any, result: TestResult) -> None: class TableOutputHandler(OutputHandler): def can_handle(self, item: Any) -> bool: - return isinstance(item, (list, pd.DataFrame, dict, ResultTable, str, tuple)) + return isinstance(item, (list, pd.DataFrame, dict, ResultTable, tuple)) def _convert_simple_type(self, data: Any) -> pd.DataFrame: """Convert a simple data type to a DataFrame.""" if isinstance(data, dict): return pd.DataFrame([data]) - elif isinstance(data, str): - return pd.DataFrame({"Value": [data]}) elif data is None: return pd.DataFrame() else: @@ -155,6 +154,17 @@ def process(self, item: Any, result: TestResult) -> None: result.raw_data = item +class StringOutputHandler(OutputHandler): + def can_handle(self, item: Any) -> bool: + return isinstance(item, str) + + def process(self, item: Any, result: TestResult) -> None: + if not is_html(item): + item = md_to_html(item, mathml=True) + + result.description = item + + def process_output(item: Any, result: TestResult) -> None: """Process a single test output item and update the TestResult.""" handlers = [ @@ -163,6 +173,7 @@ def process_output(item: Any, result: TestResult) -> None: FigureOutputHandler(), TableOutputHandler(), RawDataOutputHandler(), + StringOutputHandler(), ] for handler in handlers: diff --git a/validmind/tests/run.py b/validmind/tests/run.py index e61a3fa46..09fed2a16 100644 --- a/validmind/tests/run.py +++ b/validmind/tests/run.py @@ -390,15 +390,16 @@ def run_test( # noqa: C901 if post_process_fn: result = post_process_fn(result) - result.description = get_result_description( - test_id=test_id, - test_description=result.doc, - tables=result.tables, - figures=result.figures, - metric=result.metric, - should_generate=generate_description, - title=title, - ) + if not result.description: + result.description = get_result_description( + test_id=test_id, + test_description=result.doc, + tables=result.tables, + figures=result.figures, + metric=result.metric, + should_generate=generate_description, + title=title, + ) if show: result.show() diff --git a/validmind/vm_models/result/result.py b/validmind/vm_models/result/result.py index 54ae176aa..ba34bcd7a 100644 --- a/validmind/vm_models/result/result.py +++ b/validmind/vm_models/result/result.py @@ -464,7 +464,7 @@ async def log_async( ) ) - if self.tables or self.figures: + if self.tables or self.figures or self.description: tasks.append( api_client.alog_test_result( result=self.serialize(),