From 20c8999611c89c590a9c4002f52d519778ee06a5 Mon Sep 17 00:00:00 2001 From: Anil Sorathiya Date: Thu, 15 Jan 2026 13:55:26 +0000 Subject: [PATCH 1/3] Create and add scorer store in the list_tests interface --- validmind/tests/load.py | 52 +++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/validmind/tests/load.py b/validmind/tests/load.py index af3018905..59bed2bc2 100644 --- a/validmind/tests/load.py +++ b/validmind/tests/load.py @@ -31,7 +31,7 @@ from ..vm_models.model import VMModel from ..vm_models.result import ResultTable from .__types__ import TestID -from ._store import test_provider_store, test_store +from ._store import scorer_store, test_provider_store, test_store logger = get_logger(__name__) @@ -147,6 +147,9 @@ def load_test( test_id = test_id.split(":", 1)[0] namespace = test_id.split(".", 1)[0] + # # Import scorer_store for checking custom scorers + # from ._store import scorer_store + # if not already loaded, load it from appropriate provider if test_id not in test_store.tests or reload: if test_id.startswith("validmind.composite_metric"): @@ -154,20 +157,24 @@ def load_test( pass if not test_func: - if not test_provider_store.has_test_provider(namespace): + # Handle custom scorers from scorer_store first (before checking providers) + custom_scorer = scorer_store.get_scorer(test_id) + if custom_scorer is not None: + test_func = custom_scorer + elif not test_provider_store.has_test_provider(namespace): raise LoadTestError( f"No test provider found for namespace: {namespace}" ) + else: + provider = test_provider_store.get_test_provider(namespace) - provider = test_provider_store.get_test_provider(namespace) - - try: - test_func = provider.load_test(test_id.split(".", 1)[1]) - except Exception as e: - raise LoadTestError( - f"Unable to load test '{test_id}' from {namespace} test provider", - original_error=e, - ) from e + try: + test_func = provider.load_test(test_id.split(".", 1)[1]) + except Exception as e: + raise LoadTestError( + f"Unable to load test '{test_id}' from {namespace} test provider", + original_error=e, + ) from e # add test_id as an attribute to the test function test_func.test_id = test_id @@ -191,15 +198,30 @@ def load_test( def _list_test_ids() -> List[str]: - """List all available test IDs""" - test_ids = [] + """List all available test IDs, including scorers""" + test_ids_set = set() for namespace, test_provider in test_provider_store.test_providers.items(): - test_ids.extend( + test_ids_set.update( [f"{namespace}.{test_id}" for test_id in sorted(test_provider.list_tests())] ) - return test_ids + # Add built-in scorers from validmind provider + if test_provider_store.has_test_provider("validmind"): + vm_provider = test_provider_store.get_test_provider("validmind") + if hasattr(vm_provider, "scorers_provider"): + scorer_ids = [ + f"validmind.scorers.{scorer_id}" + for scorer_id in sorted(vm_provider.scorers_provider.list_tests()) + ] + test_ids_set.update(scorer_ids) + + # Add custom scorers from scorer_store + from ._store import scorer_store + + test_ids_set.update(scorer_store.scorers.keys()) + + return sorted(list(test_ids_set)) def _load_tests(test_ids: List[str]) -> Dict[str, Callable[..., Any]]: From 7bc246a58cef24a95bb2e74e3ad24179c1321ec1 Mon Sep 17 00:00:00 2001 From: Anil Sorathiya Date: Thu, 15 Jan 2026 14:14:19 +0000 Subject: [PATCH 2/3] refactor code to remove lint error --- .../scorers/llm/deepeval/PlanAdherence.py | 4 +- validmind/tests/load.py | 97 ++++++++++++------- 2 files changed, 63 insertions(+), 38 deletions(-) diff --git a/validmind/scorers/llm/deepeval/PlanAdherence.py b/validmind/scorers/llm/deepeval/PlanAdherence.py index 0335a6fdd..fb606ef1b 100644 --- a/validmind/scorers/llm/deepeval/PlanAdherence.py +++ b/validmind/scorers/llm/deepeval/PlanAdherence.py @@ -27,7 +27,9 @@ @scorer() -@tags("llm", "PlanAdherence", "deepeval", "agent_evaluation", "reasoning_layer", "agentic") +@tags( + "llm", "PlanAdherence", "deepeval", "agent_evaluation", "reasoning_layer", "agentic" +) @tasks("llm") def PlanAdherence( dataset: VMDataset, diff --git a/validmind/tests/load.py b/validmind/tests/load.py index 59bed2bc2..17e94f98a 100644 --- a/validmind/tests/load.py +++ b/validmind/tests/load.py @@ -127,6 +127,64 @@ def _inspect_signature( return inputs, params +def _get_test_function_from_provider(test_id: str, namespace: str) -> Callable[..., Any]: + """Load a test function from the appropriate provider or scorer store. + + Args: + test_id: The full test ID + namespace: The namespace extracted from the test ID + + Returns: + The loaded test function + + Raises: + LoadTestError: If the test cannot be loaded + """ + # Handle custom scorers from scorer_store first (before checking providers) + custom_scorer = scorer_store.get_scorer(test_id) + if custom_scorer is not None: + return custom_scorer + + if not test_provider_store.has_test_provider(namespace): + raise LoadTestError( + f"No test provider found for namespace: {namespace}" + ) + + provider = test_provider_store.get_test_provider(namespace) + + try: + return provider.load_test(test_id.split(".", 1)[1]) + except Exception as e: + raise LoadTestError( + f"Unable to load test '{test_id}' from {namespace} test provider", + original_error=e, + ) from e + + +def _configure_test_function(test_func: Callable[..., Any], test_id: str) -> None: + """Configure a test function with required attributes. + + Args: + test_func: The test function to configure + test_id: The test ID to assign to the function + """ + # add test_id as an attribute to the test function + test_func.test_id = test_id + + # fallback to using func name if no docstring is found + if not inspect.getdoc(test_func): + test_func.__doc__ = f"{test_func.__name__} ({test_id})" + + # add inputs and params as attributes to the test function + test_func.inputs, test_func.params = _inspect_signature(test_func) + + # ensure tags and tasks attributes exist, default to empty list if not present + if not hasattr(test_func, "__tags__"): + test_func.__tags__ = [] + if not hasattr(test_func, "__tasks__"): + test_func.__tasks__ = [] + + def load_test( test_id: str, test_func: Optional[Callable[..., Any]] = None, reload: bool = False ) -> Callable[..., Any]: @@ -147,9 +205,6 @@ def load_test( test_id = test_id.split(":", 1)[0] namespace = test_id.split(".", 1)[0] - # # Import scorer_store for checking custom scorers - # from ._store import scorer_store - # if not already loaded, load it from appropriate provider if test_id not in test_store.tests or reload: if test_id.startswith("validmind.composite_metric"): @@ -157,41 +212,9 @@ def load_test( pass if not test_func: - # Handle custom scorers from scorer_store first (before checking providers) - custom_scorer = scorer_store.get_scorer(test_id) - if custom_scorer is not None: - test_func = custom_scorer - elif not test_provider_store.has_test_provider(namespace): - raise LoadTestError( - f"No test provider found for namespace: {namespace}" - ) - else: - provider = test_provider_store.get_test_provider(namespace) - - try: - test_func = provider.load_test(test_id.split(".", 1)[1]) - except Exception as e: - raise LoadTestError( - f"Unable to load test '{test_id}' from {namespace} test provider", - original_error=e, - ) from e - - # add test_id as an attribute to the test function - test_func.test_id = test_id - - # fallback to using func name if no docstring is found - if not inspect.getdoc(test_func): - test_func.__doc__ = f"{test_func.__name__} ({test_id})" - - # add inputs and params as attributes to the test function - test_func.inputs, test_func.params = _inspect_signature(test_func) - - # ensure tags and tasks attributes exist, default to empty list if not present - if not hasattr(test_func, "__tags__"): - test_func.__tags__ = [] - if not hasattr(test_func, "__tasks__"): - test_func.__tasks__ = [] + test_func = _get_test_function_from_provider(test_id, namespace) + _configure_test_function(test_func, test_id) test_store.register_test(test_id, test_func) return test_store.get_test(test_id) From 8fd8591161105eef9726971f78c2d6984260f403 Mon Sep 17 00:00:00 2001 From: Anil Sorathiya Date: Thu, 15 Jan 2026 14:17:30 +0000 Subject: [PATCH 3/3] remove duplicate import statement --- validmind/tests/load.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/validmind/tests/load.py b/validmind/tests/load.py index 17e94f98a..811db9aee 100644 --- a/validmind/tests/load.py +++ b/validmind/tests/load.py @@ -239,9 +239,6 @@ def _list_test_ids() -> List[str]: ] test_ids_set.update(scorer_ids) - # Add custom scorers from scorer_store - from ._store import scorer_store - test_ids_set.update(scorer_store.scorers.keys()) return sorted(list(test_ids_set))