diff --git a/pyproject.toml b/pyproject.toml index 6bf242f07..a332f689a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ description = "ValidMind Library" license = "Commercial License" name = "validmind" readme = "README.pypi.md" -version = "2.8.6" +version = "2.8.7" [tool.poetry.dependencies] python = ">=3.8.1,<3.12" diff --git a/scripts/run_e2e_notebooks.py b/scripts/run_e2e_notebooks.py index c2b45c3a6..81ded70e8 100644 --- a/scripts/run_e2e_notebooks.py +++ b/scripts/run_e2e_notebooks.py @@ -40,7 +40,7 @@ NOTEBOOKS_TO_RUN = [ "notebooks/code_samples/quickstart_customer_churn_full_suite.ipynb", - "notebooks/code_samples/time_series/quickstart_time_series_full_suite.ipynb", + "notebooks/code_samples/time_series/quickstart_time_series_high_code.ipynb", "notebooks/code_samples/regression/quickstart_regression_full_suite.ipynb", "notebooks/how_to/run_unit_metrics.ipynb", "notebooks/code_samples/custom_tests/integrate_external_test_providers.ipynb", diff --git a/validmind/__version__.py b/validmind/__version__.py index 261017211..c2c8ceb46 100644 --- a/validmind/__version__.py +++ b/validmind/__version__.py @@ -1 +1 @@ -__version__ = "2.8.6" +__version__ = "2.8.7" diff --git a/validmind/tests/comparison.py b/validmind/tests/comparison.py index 0cacab0ba..6f94f8865 100644 --- a/validmind/tests/comparison.py +++ b/validmind/tests/comparison.py @@ -15,7 +15,7 @@ is_png_image, ) from validmind.vm_models.input import VMInput -from validmind.vm_models.result import ResultTable, TestResult +from validmind.vm_models.result import RawData, ResultTable, TestResult logger = get_logger(__name__) @@ -312,6 +312,25 @@ def list_to_dict(grid_list): return test_configs +def _combine_raw_data(results: List[TestResult]) -> RawData: + """Combine RawData objects""" + attribute_names = results[0].raw_data.__dict__.keys() + + # check that all the raw data objects have the same attributes + for result in results: + if not isinstance(result.raw_data, RawData): + raise ValueError("All raw data objects must be of type RawData") + if result.raw_data.__dict__.keys() != attribute_names: + raise ValueError("RawData objects must have the same attributes") + + return RawData( + **{ + key: [getattr(result.raw_data, key) for result in results] + for key in attribute_names + } + ) + + def combine_results( results: List[TestResult], ) -> Tuple[List[Any], Dict[str, List[Any]], Dict[str, List[Any]]]: @@ -338,6 +357,9 @@ def combine_results( # handle threshold tests (i.e. tests that have pass/fail bool status) if results[0].passed is not None: combined_outputs.append(all(result.passed for result in results)) + # handle raw data (if any) + if results[0].raw_data: + combined_outputs.append(_combine_raw_data(results)) # combine inputs and params combined_inputs = {} @@ -359,4 +381,8 @@ def combine_results( combined_inputs = _combine_dict_values(combined_inputs) combined_params = _combine_dict_values(combined_params) - return combined_outputs, combined_inputs, combined_params + return ( + tuple(combined_outputs), + combined_inputs, + combined_params, + ) diff --git a/validmind/tests/run.py b/validmind/tests/run.py index 610b9acd6..66dd40e7d 100644 --- a/validmind/tests/run.py +++ b/validmind/tests/run.py @@ -256,7 +256,7 @@ def _run_comparison_test( combined_outputs, combined_inputs, combined_params = combine_results(results) return build_test_result( - outputs=tuple(combined_outputs), + outputs=combined_outputs, test_id=test_id, test_doc=test_doc, inputs=combined_inputs,