From d2e1d80b35c1c76e0bd69323af94870474ca68c9 Mon Sep 17 00:00:00 2001 From: Anil Sorathiya Date: Fri, 25 Apr 2025 15:53:55 +0100 Subject: [PATCH 1/7] interfaces to support code explainer --- validmind/__init__.py | 3 +++ validmind/experimental/__init__.py | 0 validmind/experimental/agent.py | 40 ++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+) create mode 100644 validmind/experimental/__init__.py create mode 100644 validmind/experimental/agent.py diff --git a/validmind/__init__.py b/validmind/__init__.py index c99f3a537..0929ee797 100644 --- a/validmind/__init__.py +++ b/validmind/__init__.py @@ -53,6 +53,7 @@ run_documentation_tests, run_test_suite, ) +from .experimental import agent as experimental_agent from .tests.decorator import tags, tasks, test from .tests.run import print_env from .utils import is_notebook, parse_version @@ -126,4 +127,6 @@ def check_version(): "unit_metrics", "test_suites", "log_text", + # experimental features + "experimental_agent", ] diff --git a/validmind/experimental/__init__.py b/validmind/experimental/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/validmind/experimental/agent.py b/validmind/experimental/agent.py new file mode 100644 index 000000000..2cd770407 --- /dev/null +++ b/validmind/experimental/agent.py @@ -0,0 +1,40 @@ +import requests + +from validmind.api_client import _get_api_headers, _get_url, raise_api_error +from validmind.vm_models.result import TestResult + + +def run_task(generation_type: str, input: dict) -> TestResult: + """ + Run text generation for different purposes like code explanation. + + Args: + generation_type (str): Type of text generation ('code_explainer' or 'qualitative_text') + input (dict): Dictionary containing source_code and parameters + + Returns: + TestResult: Test result object containing the generated text + """ + if generation_type == "code_explainer": + r = requests.post( + url=_get_url("ai/generate/code_explainer"), + headers=_get_api_headers(), + json=input, + ) + + if r.status_code != 200: + raise_api_error(r.text) + + generated_text = r.json()["content"] + else: + raise ValueError(f"Unsupported generation type: {generation_type}") + + # Create a test result with the generated text + result = TestResult( + result_id=f"{generation_type}", + description=generated_text, + title=f"Text Generation: {generation_type}", + doc=f"Generated {generation_type}", + ) + + return result From 77dc9ee377b4065052d4770924294fdbfc974989 Mon Sep 17 00:00:00 2001 From: Anil Sorathiya Date: Fri, 25 Apr 2025 16:23:34 +0100 Subject: [PATCH 2/7] add copyright statement --- validmind/experimental/agent.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/validmind/experimental/agent.py b/validmind/experimental/agent.py index 2cd770407..65a7ef5c0 100644 --- a/validmind/experimental/agent.py +++ b/validmind/experimental/agent.py @@ -1,3 +1,11 @@ +# Copyright © 2023-2024 ValidMind Inc. All rights reserved. +# See the LICENSE file in the root of this repository for details. +# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial + +""" +Agent interface for all text generation tasks +""" + import requests from validmind.api_client import _get_api_headers, _get_url, raise_api_error From f9c8f61d492e7cfd7710fdf58fa7bca404923b9f Mon Sep 17 00:00:00 2001 From: Anil Sorathiya Date: Tue, 29 Apr 2025 15:20:27 +0100 Subject: [PATCH 3/7] add TextGenerationResult class for code explainer --- validmind/experimental/agent.py | 18 ++- validmind/vm_models/result/__init__.py | 18 ++- validmind/vm_models/result/result.py | 186 ++++++++++++++++++++++++- 3 files changed, 210 insertions(+), 12 deletions(-) diff --git a/validmind/experimental/agent.py b/validmind/experimental/agent.py index 65a7ef5c0..495c388e0 100644 --- a/validmind/experimental/agent.py +++ b/validmind/experimental/agent.py @@ -9,10 +9,15 @@ import requests from validmind.api_client import _get_api_headers, _get_url, raise_api_error -from validmind.vm_models.result import TestResult +from validmind.utils import is_html, md_to_html +from validmind.vm_models.result import TextGenerationResult -def run_task(generation_type: str, input: dict) -> TestResult: +def run_task( + generation_type: str, + input: dict, + show: bool = True, +) -> TextGenerationResult: """ Run text generation for different purposes like code explanation. @@ -37,12 +42,17 @@ def run_task(generation_type: str, input: dict) -> TestResult: else: raise ValueError(f"Unsupported generation type: {generation_type}") + if not is_html(generated_text): + generated_text = md_to_html(generated_text, mathml=True) + # Create a test result with the generated text - result = TestResult( - result_id=f"{generation_type}", + result = TextGenerationResult( + result_type=f"{generation_type}", description=generated_text, title=f"Text Generation: {generation_type}", doc=f"Generated {generation_type}", ) + if show: + result.show() return result diff --git a/validmind/vm_models/result/__init__.py b/validmind/vm_models/result/__init__.py index aca6c17e6..a092c4da9 100644 --- a/validmind/vm_models/result/__init__.py +++ b/validmind/vm_models/result/__init__.py @@ -2,6 +2,20 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -from .result import ErrorResult, RawData, Result, ResultTable, TestResult +from .result import ( + ErrorResult, + RawData, + Result, + ResultTable, + TestResult, + TextGenerationResult, +) -__all__ = ["ErrorResult", "RawData", "Result", "ResultTable", "TestResult"] +__all__ = [ + "ErrorResult", + "RawData", + "Result", + "ResultTable", + "TestResult", + "TextGenerationResult", +] diff --git a/validmind/vm_models/result/result.py b/validmind/vm_models/result/result.py index ba34bcd7a..a0fab447a 100644 --- a/validmind/vm_models/result/result.py +++ b/validmind/vm_models/result/result.py @@ -129,6 +129,7 @@ class Result: result_id: str = None name: str = None + result_type: str = None def __str__(self) -> str: """May be overridden by subclasses.""" @@ -445,6 +446,7 @@ def serialize(self): async def log_async( self, section_id: str = None, + content_id: str = None, position: int = None, config: Dict[str, bool] = None, ): @@ -477,7 +479,6 @@ async def log_async( tasks.extend( [api_client.alog_figure(figure) for figure in (self.figures or [])] ) - if self.description: revision_name = ( AI_REVISION_NAME @@ -485,18 +486,19 @@ async def log_async( else DEFAULT_REVISION_NAME ) - tasks.append( - update_metadata( - content_id=f"test_description:{self.result_id}::{revision_name}", - text=self.description, - ) + tasks.append( + update_metadata( + content_id=f"{content_id}:{revision_name}", + text=self.description, ) + ) return await asyncio.gather(*tasks) def log( self, section_id: str = None, + content_id: str = None, position: int = None, unsafe: bool = False, config: Dict[str, bool] = None, @@ -506,6 +508,7 @@ def log( Args: section_id (str): The section ID within the model document to insert the test result. + content_id (str): The content ID to log the result to. position (int): The position (index) within the section to insert the test result. unsafe (bool): If True, log the result even if it contains sensitive data @@ -533,6 +536,177 @@ def log( run_async( self.log_async, section_id=section_id, + content_id=content_id, + position=position, + config=config, + ) + + def validate_log_config(self, config: Dict[str, bool]): + """Validate the configuration options for logging a test result + + Args: + config (Dict[str, bool]): Configuration options to validate + + Raises: + InvalidParameterError: If config contains invalid keys or non-boolean values + """ + valid_keys = { + "hideTitle", + "hideText", + "hideParams", + "hideTables", + "hideFigures", + } + invalid_keys = set(config.keys()) - valid_keys + if invalid_keys: + raise InvalidParameterError( + f"Invalid config keys: {', '.join(invalid_keys)}. " + f"Valid keys are: {', '.join(valid_keys)}" + ) + + # Ensure all values are boolean + non_bool_keys = [ + key for key, value in config.items() if not isinstance(value, bool) + ] + if non_bool_keys: + raise InvalidParameterError( + f"Values for config keys must be boolean. Non-boolean values found for keys: {', '.join(non_bool_keys)}" + ) + + +@dataclass +class TextGenerationResult(Result): + """Test result.""" + + name: str = "Text Generation Result" + ref_id: str = None + title: Optional[str] = None + doc: Optional[str] = None + description: Optional[Union[str, DescriptionFuture]] = None + params: Optional[Dict[str, Any]] = None + metadata: Optional[Dict[str, Any]] = None + _was_description_generated: bool = False + _unsafe: bool = False + _client_config_cache: Optional[Any] = None + + def __post_init__(self): + if self.ref_id is None: + self.ref_id = str(uuid4()) + + def __repr__(self) -> str: + attrs = [ + attr + for attr in [ + "doc", + "description", + "params", + ] + if getattr(self, attr) is not None + and ( + len(getattr(self, attr)) > 0 + if isinstance(getattr(self, attr), list) + else True + ) + ] + + return f'TextGenerationResult("{self.result_id}", {", ".join(attrs)})' + + def __getattribute__(self, name): + # lazy load description if its a DescriptionFuture (generated in background) + if name == "description": + description = super().__getattribute__("description") + + if isinstance(description, DescriptionFuture): + self._was_description_generated = True + self.description = description.get_description() + + return super().__getattribute__(name) + + @property + def test_name(self) -> str: + """Get the test name, using custom title if available.""" + return self.title or test_id_to_name(self.result_id) + + def to_widget(self): + template_data = { + "test_name": self.test_name, + "description": self.description.replace("h3", "strong"), + "params": ( + json.dumps(self.params, cls=NumpyEncoder, indent=2) + if self.params + else None + ), + } + rendered = get_result_template().render(**template_data) + + widgets = [HTML(rendered)] + + return VBox(widgets) + + def serialize(self): + """Serialize the result for the API.""" + return { + "test_name": self.result_id, + "title": self.title, + "ref_id": self.ref_id, + "params": self.params, + "metadata": self.metadata, + } + + async def log_async( + self, + section_id: str = None, + content_id: str = None, + position: int = None, + config: Dict[str, bool] = None, + ): + tasks = [] # collect tasks to run in parallel (async) + + # Default empty dict if None + config = config or {} + + if self.description: + tasks.append( + update_metadata( + content_id=f"{content_id}", + text=self.description, + ) + ) + + return await asyncio.gather(*tasks) + + def log( + self, + section_id: str = None, + content_id: str = None, + position: int = None, + unsafe: bool = False, + config: Dict[str, bool] = None, + ): + """Log the result to ValidMind. + + Args: + section_id (str): The section ID within the model document to insert the + test result. + content_id (str): The content ID to log the result to. + position (int): The position (index) within the section to insert the test + result. + unsafe (bool): If True, log the result even if it contains sensitive data + i.e. raw data from input datasets. + config (Dict[str, bool]): Configuration options for displaying the test result. + Available config options: + - hideTitle: Hide the title in the document view + - hideText: Hide the description text in the document view + - hideParams: Hide the parameters in the document view + - hideTables: Hide tables in the document view + - hideFigures: Hide figures in the document view + """ + if config: + self.validate_log_config(config) + run_async( + self.log_async, + section_id=section_id, + content_id=content_id, position=position, config=config, ) From b0991983ccb634e57da69dcb898d22bd01325901 Mon Sep 17 00:00:00 2001 From: Anil Sorathiya Date: Wed, 30 Apr 2025 15:31:11 +0100 Subject: [PATCH 4/7] remove unnecessary parameters while logging metadata --- validmind/experimental/agent.py | 15 +++++-- validmind/vm_models/result/result.py | 58 ---------------------------- 2 files changed, 11 insertions(+), 62 deletions(-) diff --git a/validmind/experimental/agent.py b/validmind/experimental/agent.py index 495c388e0..77e778291 100644 --- a/validmind/experimental/agent.py +++ b/validmind/experimental/agent.py @@ -19,14 +19,21 @@ def run_task( show: bool = True, ) -> TextGenerationResult: """ - Run text generation for different purposes like code explanation. + Run text generation tasks using AI models. Args: - generation_type (str): Type of text generation ('code_explainer' or 'qualitative_text') - input (dict): Dictionary containing source_code and parameters + generation_type (str): Type of text generation task to run. Currently supports: + - 'code_explainer': Generates natural language explanations of code + input (dict): Input parameters for the generation task: + - For code_explainer: Must contain 'source_code' and optional parameters + show (bool): Whether to display the generated result. Defaults to True. Returns: - TestResult: Test result object containing the generated text + TextGenerationResult: Result object containing the generated text and metadata + + Raises: + ValueError: If an unsupported generation_type is provided + requests.exceptions.RequestException: If the API request fails """ if generation_type == "code_explainer": r = requests.post( diff --git a/validmind/vm_models/result/result.py b/validmind/vm_models/result/result.py index a0fab447a..61e1f4736 100644 --- a/validmind/vm_models/result/result.py +++ b/validmind/vm_models/result/result.py @@ -586,8 +586,6 @@ class TextGenerationResult(Result): params: Optional[Dict[str, Any]] = None metadata: Optional[Dict[str, Any]] = None _was_description_generated: bool = False - _unsafe: bool = False - _client_config_cache: Optional[Any] = None def __post_init__(self): if self.ref_id is None: @@ -655,16 +653,10 @@ def serialize(self): async def log_async( self, - section_id: str = None, content_id: str = None, - position: int = None, - config: Dict[str, bool] = None, ): tasks = [] # collect tasks to run in parallel (async) - # Default empty dict if None - config = config or {} - if self.description: tasks.append( update_metadata( @@ -677,11 +669,7 @@ async def log_async( def log( self, - section_id: str = None, content_id: str = None, - position: int = None, - unsafe: bool = False, - config: Dict[str, bool] = None, ): """Log the result to ValidMind. @@ -691,54 +679,8 @@ def log( content_id (str): The content ID to log the result to. position (int): The position (index) within the section to insert the test result. - unsafe (bool): If True, log the result even if it contains sensitive data - i.e. raw data from input datasets. - config (Dict[str, bool]): Configuration options for displaying the test result. - Available config options: - - hideTitle: Hide the title in the document view - - hideText: Hide the description text in the document view - - hideParams: Hide the parameters in the document view - - hideTables: Hide tables in the document view - - hideFigures: Hide figures in the document view """ - if config: - self.validate_log_config(config) run_async( self.log_async, - section_id=section_id, content_id=content_id, - position=position, - config=config, ) - - def validate_log_config(self, config: Dict[str, bool]): - """Validate the configuration options for logging a test result - - Args: - config (Dict[str, bool]): Configuration options to validate - - Raises: - InvalidParameterError: If config contains invalid keys or non-boolean values - """ - valid_keys = { - "hideTitle", - "hideText", - "hideParams", - "hideTables", - "hideFigures", - } - invalid_keys = set(config.keys()) - valid_keys - if invalid_keys: - raise InvalidParameterError( - f"Invalid config keys: {', '.join(invalid_keys)}. " - f"Valid keys are: {', '.join(valid_keys)}" - ) - - # Ensure all values are boolean - non_bool_keys = [ - key for key, value in config.items() if not isinstance(value, bool) - ] - if non_bool_keys: - raise InvalidParameterError( - f"Values for config keys must be boolean. Non-boolean values found for keys: {', '.join(non_bool_keys)}" - ) From 1c4f3bbe3a6ce022b33bc1aeebe396338c229cc3 Mon Sep 17 00:00:00 2001 From: Anil Sorathiya Date: Wed, 30 Apr 2025 16:03:48 +0100 Subject: [PATCH 5/7] rename parameter from generation type to task --- validmind/experimental/agent.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/validmind/experimental/agent.py b/validmind/experimental/agent.py index 77e778291..4b16eb792 100644 --- a/validmind/experimental/agent.py +++ b/validmind/experimental/agent.py @@ -14,7 +14,7 @@ def run_task( - generation_type: str, + task: str, input: dict, show: bool = True, ) -> TextGenerationResult: @@ -22,7 +22,7 @@ def run_task( Run text generation tasks using AI models. Args: - generation_type (str): Type of text generation task to run. Currently supports: + task (str): Type of text generation task to run. Currently supports: - 'code_explainer': Generates natural language explanations of code input (dict): Input parameters for the generation task: - For code_explainer: Must contain 'source_code' and optional parameters @@ -32,10 +32,10 @@ def run_task( TextGenerationResult: Result object containing the generated text and metadata Raises: - ValueError: If an unsupported generation_type is provided + ValueError: If an unsupported task is provided requests.exceptions.RequestException: If the API request fails """ - if generation_type == "code_explainer": + if task == "code_explainer": r = requests.post( url=_get_url("ai/generate/code_explainer"), headers=_get_api_headers(), @@ -47,17 +47,17 @@ def run_task( generated_text = r.json()["content"] else: - raise ValueError(f"Unsupported generation type: {generation_type}") + raise ValueError(f"Unsupported task: {task}") if not is_html(generated_text): generated_text = md_to_html(generated_text, mathml=True) # Create a test result with the generated text result = TextGenerationResult( - result_type=f"{generation_type}", + result_type=f"{task}", description=generated_text, - title=f"Text Generation: {generation_type}", - doc=f"Generated {generation_type}", + title=f"Text Generation: {task}", + doc=f"Generated {task}", ) if show: result.show() From e118a4e5bc0b1a5db177fb9ecac6a87f77dfeb0e Mon Sep 17 00:00:00 2001 From: Anil Sorathiya Date: Wed, 30 Apr 2025 20:39:25 +0100 Subject: [PATCH 6/7] address review comment --- validmind/vm_models/result/result.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/validmind/vm_models/result/result.py b/validmind/vm_models/result/result.py index 61e1f4736..8cab8cddc 100644 --- a/validmind/vm_models/result/result.py +++ b/validmind/vm_models/result/result.py @@ -655,17 +655,12 @@ async def log_async( self, content_id: str = None, ): - tasks = [] # collect tasks to run in parallel (async) - - if self.description: - tasks.append( - update_metadata( - content_id=f"{content_id}", - text=self.description, - ) + return await asyncio.gather( + update_metadata( + content_id=f"{content_id}", + text=self.description, ) - - return await asyncio.gather(*tasks) + ) def log( self, From a51405ef676d52047a9955f20b13699be112f19c Mon Sep 17 00:00:00 2001 From: Anil Sorathiya Date: Thu, 1 May 2025 10:11:23 +0100 Subject: [PATCH 7/7] rename module name from agent to agents --- validmind/__init__.py | 2 +- validmind/experimental/{agent.py => agents.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename validmind/experimental/{agent.py => agents.py} (100%) diff --git a/validmind/__init__.py b/validmind/__init__.py index 0929ee797..216c26d20 100644 --- a/validmind/__init__.py +++ b/validmind/__init__.py @@ -53,7 +53,7 @@ run_documentation_tests, run_test_suite, ) -from .experimental import agent as experimental_agent +from .experimental import agents as experimental_agent from .tests.decorator import tags, tasks, test from .tests.run import print_env from .utils import is_notebook, parse_version diff --git a/validmind/experimental/agent.py b/validmind/experimental/agents.py similarity index 100% rename from validmind/experimental/agent.py rename to validmind/experimental/agents.py