diff --git a/pyproject.toml b/pyproject.toml index 8b137eaec..280b9725b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ description = "ValidMind Library" license = "Commercial License" name = "validmind" readme = "README.pypi.md" -version = "2.7.0" +version = "2.7.1" [tool.poetry.dependencies] aiohttp = {extras = ["speedups"], version = "*"} diff --git a/tests/test_unit_tests.py b/tests/test_unit_tests.py index c34852ddb..73ceb884f 100644 --- a/tests/test_unit_tests.py +++ b/tests/test_unit_tests.py @@ -1,6 +1,7 @@ import unittest import time import os +import sys from tabulate import tabulate @@ -151,3 +152,7 @@ def run_test_files(): # Print coverage statistics print_coverage_statistics() + +# Exit with failure if any tests failed +if not all_tests_passed: + sys.exit(1) diff --git a/tests/unit_tests/model_validation/sklearn/test_ROCCurve.py b/tests/unit_tests/model_validation/sklearn/test_ROCCurve.py index 555379cd7..277913fc3 100644 --- a/tests/unit_tests/model_validation/sklearn/test_ROCCurve.py +++ b/tests/unit_tests/model_validation/sklearn/test_ROCCurve.py @@ -65,10 +65,16 @@ def setUp(self): self.vm_test_ds.assign_predictions(self.vm_model) def test_roc_curve_structure(self): - fig = ROCCurve(self.vm_model, self.vm_test_ds) + result = ROCCurve(self.vm_model, self.vm_test_ds) - # Check return type - self.assertIsInstance(fig, go.Figure) + # Check return type is tuple with RawData and Figure + self.assertIsInstance(result, tuple) + self.assertEqual(len(result), 2) + self.assertIsInstance(result[0], vm.RawData) + self.assertIsInstance(result[1], go.Figure) + + # Get the figure from the tuple + fig = result[1] # Check figure has two traces (ROC curve and random baseline) self.assertEqual(len(fig.data), 2) @@ -82,6 +88,11 @@ def test_roc_curve_structure(self): auc = float(fig.data[0].name.split("=")[1].strip().rstrip(")")) self.assertGreater(auc, 0.5) + # Check RawData contains expected fields + self.assertTrue(hasattr(result[0], "fpr")) + self.assertTrue(hasattr(result[0], "tpr")) + self.assertTrue(hasattr(result[0], "auc")) + def test_perfect_separation(self): # Create perfectly separable dataset X = np.random.randn(1000, 2) @@ -132,8 +143,14 @@ def test_perfect_separation(self): vm_train_ds.assign_predictions(vm_perfect_model) vm_test_ds.assign_predictions(vm_perfect_model) - fig = ROCCurve(vm_perfect_model, vm_test_ds) + result = ROCCurve(vm_perfect_model, vm_test_ds) + + # Get the figure from the tuple + fig = result[1] # Check AUC score (should be very close to 1.0) auc = float(fig.data[0].name.split("=")[1].strip().rstrip(")")) self.assertGreater(auc, 0.95) + + # Verify RawData AUC matches figure + self.assertAlmostEqual(result[0].auc, auc, places=2) diff --git a/validmind/__version__.py b/validmind/__version__.py index 2614ce9d9..7a38ae062 100644 --- a/validmind/__version__.py +++ b/validmind/__version__.py @@ -1 +1 @@ -__version__ = "2.7.0" +__version__ = "2.7.1" diff --git a/validmind/tests/run.py b/validmind/tests/run.py index c3b28f050..24690e7f4 100644 --- a/validmind/tests/run.py +++ b/validmind/tests/run.py @@ -136,6 +136,7 @@ def build_test_result( test_id: str, inputs: Dict[str, Union[VMInput, List[VMInput]]], params: Union[Dict[str, Any], None], + doc: str, description: str, generate_description: bool = True, title: Optional[str] = None, @@ -149,6 +150,7 @@ def build_test_result( ref_id=ref_id, inputs=inputs, params=params if params else None, # None if empty dict or None + doc=doc, ) if not isinstance(outputs, tuple): @@ -199,6 +201,11 @@ def _run_composite_test( if not all(result.metric is not None for result in results): raise ValueError("All tests must return a metric when used as a composite test") + # Create composite doc from all test results + composite_doc = "\n\n".join( + [f"{test_id_to_name(result.result_id)}:\n{result.doc}" for result in results] + ) + return build_test_result( outputs=[ { @@ -210,6 +217,7 @@ def _run_composite_test( test_id=test_id, inputs=results[0].inputs, params=results[0].params, + doc=composite_doc, description="\n\n".join( [_test_description(result.description, num_lines=1) for result in results] ), # join truncated (first line only) test descriptions @@ -261,11 +269,14 @@ def _run_comparison_test( combined_outputs, combined_inputs, combined_params = combine_results(results) + doc = getdoc(load_test(test_id)) + return build_test_result( outputs=tuple(combined_outputs), test_id=test_id, inputs=combined_inputs, params=combined_params, + doc=doc, description=description, generate_description=generate_description, title=title, @@ -383,12 +394,15 @@ def run_test( raw_result = test_func(**input_kwargs, **param_kwargs) + doc = getdoc(test_func) + result = build_test_result( outputs=raw_result, test_id=test_id, inputs=input_kwargs, params=param_kwargs, - description=getdoc(test_func), + doc=doc, + description=doc, generate_description=generate_description, title=title, ) diff --git a/validmind/vm_models/result/result.py b/validmind/vm_models/result/result.py index 8cea3641b..eebf0fc62 100644 --- a/validmind/vm_models/result/result.py +++ b/validmind/vm_models/result/result.py @@ -159,6 +159,7 @@ class TestResult(Result): name: str = "Test Result" ref_id: str = None title: Optional[str] = None + doc: Optional[str] = None description: Optional[Union[str, DescriptionFuture]] = None metric: Optional[Union[int, float]] = None tables: Optional[List[ResultTable]] = None @@ -180,6 +181,7 @@ def __repr__(self) -> str: attrs = [ attr for attr in [ + "doc", "description", "params", "tables",