From 7e261812ecbee835cfff9961100b7d6e121ef77c Mon Sep 17 00:00:00 2001
From: Juan <juan@validmind.ai>
Date: Fri, 13 Dec 2024 16:57:39 +0100
Subject: [PATCH 1/5] Add doc property to results

---
 validmind/tests/run.py               | 16 +++++++++++++++-
 validmind/vm_models/result/result.py |  2 ++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/validmind/tests/run.py b/validmind/tests/run.py
index 9c806f306..6fcce782b 100644
--- a/validmind/tests/run.py
+++ b/validmind/tests/run.py
@@ -136,6 +136,7 @@ def build_test_result(
     test_id: str,
     inputs: Dict[str, Union[VMInput, List[VMInput]]],
     params: Union[Dict[str, Any], None],
+    doc: str,
     description: str,
     generate_description: bool = True,
     title: Optional[str] = None,
@@ -149,6 +150,7 @@ def build_test_result(
         ref_id=ref_id,
         inputs=inputs,
         params=params if params else None,  # None if empty dict or None
+        doc=doc,
     )
 
     if not isinstance(outputs, tuple):
@@ -199,6 +201,11 @@ def _run_composite_test(
     if not all(result.metric is not None for result in results):
         raise ValueError("All tests must return a metric when used as a composite test")
 
+    # Create composite doc from all test results
+    composite_doc = "\n\n".join(
+        [f"{test_id_to_name(result.result_id)}:\n{result.doc}" for result in results]
+    )
+
     return build_test_result(
         outputs=[
             {
@@ -210,6 +217,7 @@ def _run_composite_test(
         test_id=test_id,
         inputs=results[0].inputs,
         params=results[0].params,
+        doc=composite_doc,
         description="\n\n".join(
             [_test_description(result.description, num_lines=1) for result in results]
         ),  # join truncated (first line only) test descriptions
@@ -261,11 +269,14 @@ def _run_comparison_test(
 
     combined_outputs, combined_inputs, combined_params = combine_results(results)
 
+    doc = getdoc(load_test(test_id))
+
     return build_test_result(
         outputs=tuple(combined_outputs),
         test_id=test_id,
         inputs=combined_inputs,
         params=combined_params,
+        doc=doc,
         description=description,
         generate_description=generate_description,
         title=title,
@@ -381,12 +392,15 @@ def run_test(
 
         raw_result = test_func(**input_kwargs, **param_kwargs)
 
+        doc = getdoc(test_func)
+
         result = build_test_result(
             outputs=raw_result,
             test_id=test_id,
             inputs=input_kwargs,
             params=param_kwargs,
-            description=getdoc(test_func),
+            doc=doc,
+            description=doc,
             generate_description=generate_description,
             title=title,
         )
diff --git a/validmind/vm_models/result/result.py b/validmind/vm_models/result/result.py
index 08c26a7b8..5cae5e9bb 100644
--- a/validmind/vm_models/result/result.py
+++ b/validmind/vm_models/result/result.py
@@ -115,6 +115,7 @@ class TestResult(Result):
     name: str = "Test Result"
     ref_id: str = None
     title: Optional[str] = None
+    doc: Optional[str] = None
     description: Optional[Union[str, DescriptionFuture]] = None
     metric: Optional[Union[int, float]] = None
     tables: Optional[List[ResultTable]] = None
@@ -136,6 +137,7 @@ def __repr__(self) -> str:
         attrs = [
             attr
             for attr in [
+                "doc",
                 "description",
                 "params",
                 "tables",

From 8c67d7150e550c332b7bc1fec879f6b0c617cfa6 Mon Sep 17 00:00:00 2001
From: Juan <juan@validmind.ai>
Date: Mon, 16 Dec 2024 18:12:41 +0100
Subject: [PATCH 2/5] 2.7.1

---
 pyproject.toml           | 2 +-
 validmind/__version__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8b137eaec..280b9725b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,7 +10,7 @@ description = "ValidMind Library"
 license = "Commercial License"
 name = "validmind"
 readme = "README.pypi.md"
-version = "2.7.0"
+version = "2.7.1"
 
 [tool.poetry.dependencies]
 aiohttp = {extras = ["speedups"], version = "*"}
diff --git a/validmind/__version__.py b/validmind/__version__.py
index 2614ce9d9..7a38ae062 100644
--- a/validmind/__version__.py
+++ b/validmind/__version__.py
@@ -1 +1 @@
-__version__ = "2.7.0"
+__version__ = "2.7.1"

From 7b9affc9e7f2fc04a9f348430310a94627f31217 Mon Sep 17 00:00:00 2001
From: Juan <juan@validmind.ai>
Date: Mon, 16 Dec 2024 18:40:43 +0100
Subject: [PATCH 3/5] Add exit condition when unit tests fail

---
 tests/test_unit_tests.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/test_unit_tests.py b/tests/test_unit_tests.py
index c34852ddb..cd8d7b678 100644
--- a/tests/test_unit_tests.py
+++ b/tests/test_unit_tests.py
@@ -1,6 +1,7 @@
 import unittest
 import time
 import os
+import sys
 
 from tabulate import tabulate
 
@@ -151,3 +152,7 @@ def run_test_files():
 
 # Print coverage statistics
 print_coverage_statistics()
+
+# Exit with failure if any tests failed
+if not all_tests_passed:
+    sys.exit(1)  # Add this line to exit with error code

From 6134e2c78a36da843bc69b0e8b6edc6ac63c4299 Mon Sep 17 00:00:00 2001
From: Juan <juan@validmind.ai>
Date: Mon, 16 Dec 2024 19:02:36 +0100
Subject: [PATCH 4/5] Output unit test errors before exiting

---
 tests/test_unit_tests.py | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/tests/test_unit_tests.py b/tests/test_unit_tests.py
index cd8d7b678..9d95ae76b 100644
--- a/tests/test_unit_tests.py
+++ b/tests/test_unit_tests.py
@@ -155,4 +155,26 @@ def run_test_files():
 
 # Exit with failure if any tests failed
 if not all_tests_passed:
-    sys.exit(1)  # Add this line to exit with error code
+    logger.error("\n=== FAILED TESTS SUMMARY ===")
+    for test_file in FAILED_TESTS:
+        logger.error(f"\nTest file that failed: {test_file}")
+        try:
+            # Load and run the failed test again to get detailed error output
+            suite = unittest.TestLoader().loadTestsFromName(f"tests.{test_file}")
+            result = unittest.TextTestRunner(verbosity=2).run(suite)
+
+            # Output specific test failures
+            for failure in result.failures:
+                logger.error(f"\nFailed test: {failure[0]}")
+                logger.error(f"Error message:\n{failure[1]}")
+
+            # Output specific test errors
+            for error in result.errors:
+                logger.error(f"\nTest error: {error[0]}")
+                logger.error(f"Error message:\n{error[1]}")
+
+        except Exception as e:
+            logger.error(f"Error re-running test {test_file}: {str(e)}")
+
+    logger.error("\nSome tests failed. Check the detailed errors above.")
+    sys.exit(1)

From 970e7ab17e56f1a5b4b7b2b8f63e30ccb617aeaf Mon Sep 17 00:00:00 2001
From: Juan <juan@validmind.ai>
Date: Mon, 16 Dec 2024 19:15:30 +0100
Subject: [PATCH 5/5] Update ROCCurve unit test

---
 tests/test_unit_tests.py                      | 22 ----------------
 .../model_validation/sklearn/test_ROCCurve.py | 25 ++++++++++++++++---
 2 files changed, 21 insertions(+), 26 deletions(-)

diff --git a/tests/test_unit_tests.py b/tests/test_unit_tests.py
index 9d95ae76b..73ceb884f 100644
--- a/tests/test_unit_tests.py
+++ b/tests/test_unit_tests.py
@@ -155,26 +155,4 @@ def run_test_files():
 
 # Exit with failure if any tests failed
 if not all_tests_passed:
-    logger.error("\n=== FAILED TESTS SUMMARY ===")
-    for test_file in FAILED_TESTS:
-        logger.error(f"\nTest file that failed: {test_file}")
-        try:
-            # Load and run the failed test again to get detailed error output
-            suite = unittest.TestLoader().loadTestsFromName(f"tests.{test_file}")
-            result = unittest.TextTestRunner(verbosity=2).run(suite)
-
-            # Output specific test failures
-            for failure in result.failures:
-                logger.error(f"\nFailed test: {failure[0]}")
-                logger.error(f"Error message:\n{failure[1]}")
-
-            # Output specific test errors
-            for error in result.errors:
-                logger.error(f"\nTest error: {error[0]}")
-                logger.error(f"Error message:\n{error[1]}")
-
-        except Exception as e:
-            logger.error(f"Error re-running test {test_file}: {str(e)}")
-
-    logger.error("\nSome tests failed. Check the detailed errors above.")
     sys.exit(1)
diff --git a/tests/unit_tests/model_validation/sklearn/test_ROCCurve.py b/tests/unit_tests/model_validation/sklearn/test_ROCCurve.py
index 555379cd7..277913fc3 100644
--- a/tests/unit_tests/model_validation/sklearn/test_ROCCurve.py
+++ b/tests/unit_tests/model_validation/sklearn/test_ROCCurve.py
@@ -65,10 +65,16 @@ def setUp(self):
         self.vm_test_ds.assign_predictions(self.vm_model)
 
     def test_roc_curve_structure(self):
-        fig = ROCCurve(self.vm_model, self.vm_test_ds)
+        result = ROCCurve(self.vm_model, self.vm_test_ds)
 
-        # Check return type
-        self.assertIsInstance(fig, go.Figure)
+        # Check return type is tuple with RawData and Figure
+        self.assertIsInstance(result, tuple)
+        self.assertEqual(len(result), 2)
+        self.assertIsInstance(result[0], vm.RawData)
+        self.assertIsInstance(result[1], go.Figure)
+
+        # Get the figure from the tuple
+        fig = result[1]
 
         # Check figure has two traces (ROC curve and random baseline)
         self.assertEqual(len(fig.data), 2)
@@ -82,6 +88,11 @@ def test_roc_curve_structure(self):
         auc = float(fig.data[0].name.split("=")[1].strip().rstrip(")"))
         self.assertGreater(auc, 0.5)
 
+        # Check RawData contains expected fields
+        self.assertTrue(hasattr(result[0], "fpr"))
+        self.assertTrue(hasattr(result[0], "tpr"))
+        self.assertTrue(hasattr(result[0], "auc"))
+
     def test_perfect_separation(self):
         # Create perfectly separable dataset
         X = np.random.randn(1000, 2)
@@ -132,8 +143,14 @@ def test_perfect_separation(self):
         vm_train_ds.assign_predictions(vm_perfect_model)
         vm_test_ds.assign_predictions(vm_perfect_model)
 
-        fig = ROCCurve(vm_perfect_model, vm_test_ds)
+        result = ROCCurve(vm_perfect_model, vm_test_ds)
+
+        # Get the figure from the tuple
+        fig = result[1]
 
         # Check AUC score (should be very close to 1.0)
         auc = float(fig.data[0].name.split("=")[1].strip().rstrip(")"))
         self.assertGreater(auc, 0.95)
+
+        # Verify RawData AUC matches figure
+        self.assertAlmostEqual(result[0].auc, auc, places=2)