validmind · juanmleng · Dec 16, 2024 · Dec 13, 2024 · Dec 13, 2024 · Dec 16, 2024
diff --git a/pyproject.toml b/pyproject.toml
@@ -10,7 +10,7 @@ description = "ValidMind Library"
 license = "Commercial License"
 name = "validmind"
 readme = "README.pypi.md"
-version = "2.7.0"
+version = "2.7.1"
 
 [tool.poetry.dependencies]
 aiohttp = {extras = ["speedups"], version = "*"}

diff --git a/tests/test_unit_tests.py b/tests/test_unit_tests.py
@@ -1,6 +1,7 @@
 import unittest
 import time
 import os
+import sys
 
 from tabulate import tabulate
 
@@ -151,3 +152,7 @@ def run_test_files():
 
 # Print coverage statistics
 print_coverage_statistics()
+
+# Exit with failure if any tests failed
+if not all_tests_passed:
+    sys.exit(1)
diff --git a/tests/unit_tests/model_validation/sklearn/test_ROCCurve.py b/tests/unit_tests/model_validation/sklearn/test_ROCCurve.py
@@ -65,10 +65,16 @@ def setUp(self):
         self.vm_test_ds.assign_predictions(self.vm_model)
 
     def test_roc_curve_structure(self):
-        fig = ROCCurve(self.vm_model, self.vm_test_ds)
+        result = ROCCurve(self.vm_model, self.vm_test_ds)
 
-        # Check return type
-        self.assertIsInstance(fig, go.Figure)
+        # Check return type is tuple with RawData and Figure
+        self.assertIsInstance(result, tuple)
+        self.assertEqual(len(result), 2)
+        self.assertIsInstance(result[0], vm.RawData)
+        self.assertIsInstance(result[1], go.Figure)
+
+        # Get the figure from the tuple
+        fig = result[1]
 
         # Check figure has two traces (ROC curve and random baseline)
         self.assertEqual(len(fig.data), 2)
@@ -82,6 +88,11 @@ def test_roc_curve_structure(self):
         auc = float(fig.data[0].name.split("=")[1].strip().rstrip(")"))
         self.assertGreater(auc, 0.5)
 
+        # Check RawData contains expected fields
+        self.assertTrue(hasattr(result[0], "fpr"))
+        self.assertTrue(hasattr(result[0], "tpr"))
+        self.assertTrue(hasattr(result[0], "auc"))
+
     def test_perfect_separation(self):
         # Create perfectly separable dataset
         X = np.random.randn(1000, 2)
@@ -132,8 +143,14 @@ def test_perfect_separation(self):
         vm_train_ds.assign_predictions(vm_perfect_model)
         vm_test_ds.assign_predictions(vm_perfect_model)
 
-        fig = ROCCurve(vm_perfect_model, vm_test_ds)
+        result = ROCCurve(vm_perfect_model, vm_test_ds)
+
+        # Get the figure from the tuple
+        fig = result[1]
 
         # Check AUC score (should be very close to 1.0)
         auc = float(fig.data[0].name.split("=")[1].strip().rstrip(")"))
         self.assertGreater(auc, 0.95)
+
+        # Verify RawData AUC matches figure
+        self.assertAlmostEqual(result[0].auc, auc, places=2)
diff --git a/validmind/__version__.py b/validmind/__version__.py
@@ -1 +1 @@
-__version__ = "2.7.0"
+__version__ = "2.7.1"
diff --git a/validmind/tests/run.py b/validmind/tests/run.py
@@ -136,6 +136,7 @@ def build_test_result(
     test_id: str,
     inputs: Dict[str, Union[VMInput, List[VMInput]]],
     params: Union[Dict[str, Any], None],
+    doc: str,
     description: str,
     generate_description: bool = True,
     title: Optional[str] = None,
@@ -149,6 +150,7 @@ def build_test_result(
         ref_id=ref_id,
         inputs=inputs,
         params=params if params else None,  # None if empty dict or None
+        doc=doc,
     )
 
     if not isinstance(outputs, tuple):
@@ -199,6 +201,11 @@ def _run_composite_test(
     if not all(result.metric is not None for result in results):
         raise ValueError("All tests must return a metric when used as a composite test")
 
+    # Create composite doc from all test results
+    composite_doc = "\n\n".join(
+        [f"{test_id_to_name(result.result_id)}:\n{result.doc}" for result in results]
+    )
+
     return build_test_result(
         outputs=[
             {
@@ -210,6 +217,7 @@ def _run_composite_test(
         test_id=test_id,
         inputs=results[0].inputs,
         params=results[0].params,
+        doc=composite_doc,
         description="\n\n".join(
             [_test_description(result.description, num_lines=1) for result in results]
         ),  # join truncated (first line only) test descriptions
@@ -261,11 +269,14 @@ def _run_comparison_test(
 
     combined_outputs, combined_inputs, combined_params = combine_results(results)
 
+    doc = getdoc(load_test(test_id))
+
     return build_test_result(
         outputs=tuple(combined_outputs),
         test_id=test_id,
         inputs=combined_inputs,
         params=combined_params,
+        doc=doc,
         description=description,
         generate_description=generate_description,
         title=title,
@@ -383,12 +394,15 @@ def run_test(
 
         raw_result = test_func(**input_kwargs, **param_kwargs)
 
+        doc = getdoc(test_func)
+
         result = build_test_result(
             outputs=raw_result,
             test_id=test_id,
             inputs=input_kwargs,
             params=param_kwargs,
-            description=getdoc(test_func),
+            doc=doc,
+            description=doc,
             generate_description=generate_description,
             title=title,
         )

diff --git a/validmind/vm_models/result/result.py b/validmind/vm_models/result/result.py
@@ -159,6 +159,7 @@ class TestResult(Result):
     name: str = "Test Result"
     ref_id: str = None
     title: Optional[str] = None
+    doc: Optional[str] = None
     description: Optional[Union[str, DescriptionFuture]] = None
     metric: Optional[Union[int, float]] = None
     tables: Optional[List[ResultTable]] = None
@@ -180,6 +181,7 @@ def __repr__(self) -> str:
         attrs = [
             attr
             for attr in [
+                "doc",
                 "description",
                 "params",
                 "tables",