From 7eb6e20f4144ec145739b0abbbaadf4e9489af86 Mon Sep 17 00:00:00 2001
From: John Walz <john@validmind.ai>
Date: Fri, 17 Jan 2025 11:42:24 -0500
Subject: [PATCH 1/4] 2.7.8

---
 pyproject.toml           | 2 +-
 validmind/__version__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c9663eb1c..c1884bc04 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,7 +10,7 @@ description = "ValidMind Library"
 license = "Commercial License"
 name = "validmind"
 readme = "README.pypi.md"
-version = "2.7.7"
+version = "2.7.8"
 
 [tool.poetry.dependencies]
 aiohttp = {extras = ["speedups"], version = "*"}
diff --git a/validmind/__version__.py b/validmind/__version__.py
index 3da1ab5e9..dc6ddde7a 100644
--- a/validmind/__version__.py
+++ b/validmind/__version__.py
@@ -1 +1 @@
-__version__ = "2.7.7"
+__version__ = "2.7.8"

From d0fbdc9b0ce1264ba7e5bce6307217dbd23cca30 Mon Sep 17 00:00:00 2001
From: John Walz <john@validmind.ai>
Date: Fri, 17 Jan 2025 11:42:42 -0500
Subject: [PATCH 2/4] feat: using backend to generate descriptions

---
 validmind/ai/test_descriptions.py | 35 +++++++++++++++++--------------
 validmind/api_client.py           | 13 ++++++++++++
 2 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/validmind/ai/test_descriptions.py b/validmind/ai/test_descriptions.py
index e52a37e46..b1ec76d68 100644
--- a/validmind/ai/test_descriptions.py
+++ b/validmind/ai/test_descriptions.py
@@ -15,7 +15,7 @@
 from ..utils import NumpyEncoder, md_to_html, test_id_to_name
 from ..vm_models.figure import Figure
 from ..vm_models.result import ResultTable
-from .utils import DescriptionFuture, get_client_and_model
+from .utils import DescriptionFuture
 
 __executor = ThreadPoolExecutor()
 __prompt = None
@@ -91,13 +91,13 @@ def generate_description(
     title: Optional[str] = None,
 ):
     """Generate the description for the test results"""
+    from validmind.api_client import generate_test_result_description
+
     if not tables and not figures and not metric:
         raise ValueError(
             "No tables, unit metric or figures provided - cannot generate description"
         )
 
-    client, model = get_client_and_model()
-
     # get last part of test id
     test_name = title or test_id.split(".")[-1]
 
@@ -131,19 +131,22 @@ def generate_description(
         "figures": [figure._get_b64_url() for figure in ([] if tables else figures)],
         "context": context,
     }
-    system, user = _load_prompt()
-
-    messages = [
-        prompt_to_message("system", system.render(input_data)),
-        prompt_to_message("user", user.render(input_data)),
-    ]
-    response = client.chat.completions.create(
-        model=model,
-        temperature=0.0,
-        messages=messages,
-    )
-
-    return response.choices[0].message.content
+    # system, user = _load_prompt()
+
+    # messages = [
+    #     prompt_to_message("system", system.render(input_data)),
+    #     prompt_to_message("user", user.render(input_data)),
+    # ]
+    # response = client.chat.completions.create(
+    #     model=model,
+    #     temperature=0.0,
+    #     messages=messages,
+    # )
+
+    # return response.choices[0].message.content
+    response = generate_test_result_description(input_data)
+
+    return response["content"]
 
 
 def background_generate_description(
diff --git a/validmind/api_client.py b/validmind/api_client.py
index a93372c76..a207174bc 100644
--- a/validmind/api_client.py
+++ b/validmind/api_client.py
@@ -487,3 +487,16 @@ def get_ai_key() -> Dict[str, Any]:
         raise_api_error(r.text)
 
     return r.json()
+
+
+def generate_test_result_description(test_result_data: Dict[str, Any]) -> str:
+    r = requests.post(
+        url=_get_url("ai/generate/test_result_description"),
+        headers=_get_api_headers(),
+        json=test_result_data,
+    )
+
+    if r.status_code != 200:
+        raise_api_error(r.text)
+
+    return r.json()

From 18910dd038451dbcf75a515d440ac962e66ce013 Mon Sep 17 00:00:00 2001
From: John Walz <john@validmind.ai>
Date: Tue, 21 Jan 2025 11:31:17 -0500
Subject: [PATCH 3/4] 2.8.0

---
 pyproject.toml           | 2 +-
 validmind/__version__.py | 2 +-
 validmind/tests/run.py   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c1884bc04..424285bc7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,7 +10,7 @@ description = "ValidMind Library"
 license = "Commercial License"
 name = "validmind"
 readme = "README.pypi.md"
-version = "2.7.8"
+version = "2.8.0"
 
 [tool.poetry.dependencies]
 aiohttp = {extras = ["speedups"], version = "*"}
diff --git a/validmind/__version__.py b/validmind/__version__.py
index dc6ddde7a..892994aa6 100644
--- a/validmind/__version__.py
+++ b/validmind/__version__.py
@@ -1 +1 @@
-__version__ = "2.7.8"
+__version__ = "2.8.0"
diff --git a/validmind/tests/run.py b/validmind/tests/run.py
index 9401b2d1b..a86047c44 100644
--- a/validmind/tests/run.py
+++ b/validmind/tests/run.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
 
 import platform
+import pprint
 import subprocess
 import time
 from datetime import datetime
 from inspect import getdoc
-import pprint
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 from uuid import uuid4
 

From f3cf8710f7c7c584a1c1915c6a35aeb29f5c0ac1 Mon Sep 17 00:00:00 2001
From: John Walz <john@validmind.ai>
Date: Tue, 21 Jan 2025 12:06:42 -0500
Subject: [PATCH 4/4] chore: get rid of prompts for test result description
 generation

---
 validmind/ai/test_descriptions.py             |  85 ++----------
 .../ai/test_result_description/config.yaml    |  29 ----
 .../ai/test_result_description/context.py     |  73 -----------
 .../image_processing.py                       | 124 ------------------
 .../ai/test_result_description/system.jinja   |  39 ------
 .../ai/test_result_description/user.jinja     |  30 -----
 6 files changed, 12 insertions(+), 368 deletions(-)
 delete mode 100644 validmind/ai/test_result_description/config.yaml
 delete mode 100644 validmind/ai/test_result_description/context.py
 delete mode 100644 validmind/ai/test_result_description/image_processing.py
 delete mode 100644 validmind/ai/test_result_description/system.jinja
 delete mode 100644 validmind/ai/test_result_description/user.jinja

diff --git a/validmind/ai/test_descriptions.py b/validmind/ai/test_descriptions.py
index b1ec76d68..1e954f26c 100644
--- a/validmind/ai/test_descriptions.py
+++ b/validmind/ai/test_descriptions.py
@@ -4,12 +4,9 @@
 
 import json
 import os
-import re
 from concurrent.futures import ThreadPoolExecutor
 from typing import List, Optional, Union
 
-from jinja2 import Template
-
 from ..client_config import client_config
 from ..logging import get_logger
 from ..utils import NumpyEncoder, md_to_html, test_id_to_name
@@ -18,55 +15,11 @@
 from .utils import DescriptionFuture
 
 __executor = ThreadPoolExecutor()
-__prompt = None
 
 logger = get_logger(__name__)
 
 
-def _load_prompt():
-    global __prompt
-
-    if not __prompt:
-        folder_path = os.path.join(os.path.dirname(__file__), "test_result_description")
-        with open(os.path.join(folder_path, "system.jinja"), "r") as f:
-            system_prompt = f.read()
-        with open(os.path.join(folder_path, "user.jinja"), "r") as f:
-            user_prompt = f.read()
-
-        __prompt = (Template(system_prompt), Template(user_prompt))
-
-    return __prompt
-
-
-def prompt_to_message(role, prompt):
-    if "[[IMAGE:" not in prompt:
-        return {"role": role, "content": prompt}
-
-    content = []
-
-    # Regex pattern to find [[IMAGE:<b64-data>]] markers
-    pattern = re.compile(r"\[\[IMAGE:(.*?)\]\]", re.DOTALL)
-
-    last_index = 0
-    for match in pattern.finditer(prompt):
-        # Text before the image marker
-        start, end = match.span()
-        if start > last_index:
-            content.append({"type": "text", "text": prompt[last_index:start]})
-
-        content.append({"type": "image_url", "image_url": {"url": match.group(1)}})
-
-        last_index = end
-
-    # Text after the last image
-    if last_index < len(prompt):
-        content.append({"type": "text", "text": prompt[last_index:]})
-
-    return {"role": role, "content": content}
-
-
 def _get_llm_global_context():
-
     # Get the context from the environment variable
     context = os.getenv("VALIDMIND_LLM_DESCRIPTIONS_CONTEXT", "")
 
@@ -121,32 +74,18 @@ def generate_description(
     else:
         summary = None
 
-    context = _get_llm_global_context()
-
-    input_data = {
-        "test_name": test_name,
-        "test_description": test_description,
-        "title": title,
-        "summary": summary,
-        "figures": [figure._get_b64_url() for figure in ([] if tables else figures)],
-        "context": context,
-    }
-    # system, user = _load_prompt()
-
-    # messages = [
-    #     prompt_to_message("system", system.render(input_data)),
-    #     prompt_to_message("user", user.render(input_data)),
-    # ]
-    # response = client.chat.completions.create(
-    #     model=model,
-    #     temperature=0.0,
-    #     messages=messages,
-    # )
-
-    # return response.choices[0].message.content
-    response = generate_test_result_description(input_data)
-
-    return response["content"]
+    return generate_test_result_description(
+        {
+            "test_name": test_name,
+            "test_description": test_description,
+            "title": title,
+            "summary": summary,
+            "figures": [
+                figure._get_b64_url() for figure in ([] if tables else figures)
+            ],
+            "context": _get_llm_global_context(),
+        }
+    )["content"]
 
 
 def background_generate_description(
diff --git a/validmind/ai/test_result_description/config.yaml b/validmind/ai/test_result_description/config.yaml
deleted file mode 100644
index 0c81672c7..000000000
--- a/validmind/ai/test_result_description/config.yaml
+++ /dev/null
@@ -1,29 +0,0 @@
-id: test_result_description
-name: Test Result Description
-description: Generate a description for a test result
-version: 0.1.0
-model: gpt-4o
-temperature: 0.0
-output_type: markdown
-prompts:
-  system:
-    role: system
-    path: system.jinja
-  user:
-    role: user
-    path: user.jinja
-inputs:
-  test_name:
-    description: The name of the test that produced the result (usually the last part of the test ID)
-    type: string
-  test_description:
-    description: The description (docstring) of the test that was run
-    type: string
-  summary:
-    description: The json result summary (i.e. the table(s) returned by the test)
-    type: list
-    optional: true
-  figures:
-    description: A list of base64 encoded images of the figures returned by the test
-    type: list
-    optional: true
diff --git a/validmind/ai/test_result_description/context.py b/validmind/ai/test_result_description/context.py
deleted file mode 100644
index cba5180aa..000000000
--- a/validmind/ai/test_result_description/context.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
-# See the LICENSE file in the root of this repository for details.
-# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
-
-import multiprocessing
-
-MIN_IMAGES_FOR_PARALLEL = 4
-MAX_WORKERS = multiprocessing.cpu_count()
-
-
-def parallel_downsample_images(base64_strings):
-    import os
-    import sys
-
-    sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
-    from test_result_description.image_processing import (
-        downsample_image,  # type: ignore
-    )
-
-    num_images = len(base64_strings)
-
-    if num_images < MIN_IMAGES_FOR_PARALLEL:
-        return [downsample_image(img) for img in base64_strings]
-
-    num_workers = min(num_images, MAX_WORKERS)
-
-    with multiprocessing.Pool(processes=num_workers) as pool:
-        results = pool.map(downsample_image, base64_strings)
-
-    sys.path.pop(0)
-
-    return results
-
-
-class Context:
-    def __init__(self, mode="local"):
-        pass
-
-    def load(self, input_data):
-        # this task can accept a dict or a test result object from the ValidMind Library
-        if isinstance(input_data, dict):
-            return input_data
-
-        # we are likely running outside of the ValidMind Library and need to convert
-        # the test result object to a dictionary
-        test_result = input_data
-
-        try:
-            from markdownify import markdownify as md
-        except ImportError as e:
-            raise ImportError(
-                "Failed to import markdownify. Please install the package to use this task."
-            ) from e
-
-        input_data = {
-            "test_name": test_result.result_id.split(".")[-1],
-            "test_description": md(test_result.result_metadata[0]["text"]),
-        }
-
-        if hasattr(test_result, "metric") and test_result.metric.summary is not None:
-            input_data["summary"] = test_result.metric.summary.serialize()
-        elif (
-            hasattr(test_result, "test_results")
-            and test_result.test_results.summary is not None
-        ):
-            input_data["summary"] = test_result.test_results.summary.serialize()
-
-        if test_result.figures:
-            input_data["figures"] = parallel_downsample_images(
-                [figure._get_b64_url() for figure in test_result.figures]
-            )
-
-        return input_data
diff --git a/validmind/ai/test_result_description/image_processing.py b/validmind/ai/test_result_description/image_processing.py
deleted file mode 100644
index 01ecfbdf4..000000000
--- a/validmind/ai/test_result_description/image_processing.py
+++ /dev/null
@@ -1,124 +0,0 @@
-# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
-# See the LICENSE file in the root of this repository for details.
-# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
-
-import base64
-import io
-
-import numpy as np
-from PIL import Image, ImageEnhance, ImageFilter
-
-DOWNSAMPLE_PERCENTAGE = 50
-
-
-def open_base64_image(base64_string):
-    if base64_string.startswith("data:image/png;base64,"):
-        base64_string = base64_string.split(",")[1]
-
-    image_data = base64.b64decode(base64_string)
-    image_buffer = io.BytesIO(image_data)
-    image = Image.open(image_buffer)
-
-    return image
-
-
-def downsample_image(base64_string):
-    image = open_base64_image(base64_string)
-
-    # Calculate the target dimensions based on the reduction percentage
-    target_width = int(image.width * (1 - DOWNSAMPLE_PERCENTAGE / 100))
-    target_height = int(image.height * (1 - DOWNSAMPLE_PERCENTAGE / 100))
-
-    # If the image is already smaller than the target size, return the original
-    if image.width <= target_width and image.height <= target_height:
-        return base64_string
-
-    # remove any margins from the image
-    # Find the bounding box of non-uniform pixels (margin detection)
-    width, height = image.size
-    background = image.getpixel((0, 0))  # Assume top-left pixel is background color
-
-    def is_different(pixel):
-        return pixel != background
-
-    left = next(
-        x
-        for x in range(width)
-        if any(is_different(image.getpixel((x, y))) for y in range(height))
-    )
-    right = next(
-        x
-        for x in range(width - 1, -1, -1)
-        if any(is_different(image.getpixel((x, y))) for y in range(height))
-    )
-    top = next(
-        y
-        for y in range(height)
-        if any(is_different(image.getpixel((x, y))) for x in range(width))
-    )
-    bottom = next(
-        y
-        for y in range(height - 1, -1, -1)
-        if any(is_different(image.getpixel((x, y))) for x in range(width))
-    )
-
-    # Crop the image to remove the uniform margin (with some padding)
-    bbox = (left - 5, top - 5, right + 6, bottom + 6)
-    image = image.crop(bbox)
-
-    # If the image has an alpha channel, remove any transparent margins
-    if image.mode in ("RGBA", "LA"):
-        alpha = image.getchannel("A")
-        bbox = alpha.getbbox()
-        if bbox:
-            image = image.crop(bbox)
-
-    # Apply unsharp mask to enhance edges
-    image = image.filter(ImageFilter.UnsharpMask(radius=2, percent=150, threshold=3))
-
-    # Calculate new dimensions
-    aspect_ratio = image.width / image.height
-    new_height = target_height
-    new_width = int(new_height * aspect_ratio)
-
-    # print(f"downsampling from {width}x{height} to {new_width}x{new_height}")
-
-    # Ensure we don't exceed the target width
-    if new_width > target_width:
-        new_width = target_width
-        new_height = int(new_width / aspect_ratio)
-
-    # print(f"downsampling from {image.width}x{image.height} to {new_width}x{new_height}")
-
-    # Convert to numpy array for custom downsampling
-    img_array = np.array(image)
-
-    # Optimized area interpolation
-    h_factor = img_array.shape[0] / new_height
-    w_factor = img_array.shape[1] / new_width
-
-    h_indices = (np.arange(new_height).reshape(-1, 1) * h_factor).astype(int)
-    w_indices = (np.arange(new_width).reshape(1, -1) * w_factor).astype(int)
-
-    h_indices = np.minimum(h_indices, img_array.shape[0] - 1)
-    w_indices = np.minimum(w_indices, img_array.shape[1] - 1)
-
-    # Convert back to PIL Image
-    image = Image.fromarray(img_array[h_indices, w_indices].astype(np.uint8))
-
-    # Enhance contrast slightly
-    enhancer = ImageEnhance.Contrast(image)
-    image = enhancer.enhance(1.2)
-
-    # Sharpen the image
-    image = image.filter(ImageFilter.SHARPEN)
-
-    # Convert the image to bytes in PNG format
-    buffered = io.BytesIO()
-    image.save(buffered, format="PNG")
-    img_bytes = buffered.getvalue()
-
-    # Encode the bytes to base64
-    b64_encoded = base64.b64encode(img_bytes).decode("utf-8")
-
-    return f"data:image/png;base64,{b64_encoded}"
diff --git a/validmind/ai/test_result_description/system.jinja b/validmind/ai/test_result_description/system.jinja
deleted file mode 100644
index aff51b363..000000000
--- a/validmind/ai/test_result_description/system.jinja
+++ /dev/null
@@ -1,39 +0,0 @@
-You are an expert data scientist and MRM specialist.
-You are tasked with analyzing the results of a quantitative test run on some model or dataset.
-Your goal is to create a test description that will act as part of the model documentation.
-You will provide both the developer and other consumers of the documentation with a clear and concise "interpretation" of the results they will see.
-The overarching theme to maintain is MRM documentation.
-
-Examine the provided statistical test results and compose a description of the results.
-The results are either in the form of serialized tables or images of plots.
-Compose a description and interpretation of the result to accompany it in MRM documentation.
-It will be read by other data scientists and developers and by validators and stakeholders.
-
-Use valid Markdown syntax to format the response.
-Avoid long sentences and complex vocabulary.
-Avoid overly verbose explanations - the goal is to explain to a user what they are seeing in the results.
-Structure the response clearly and logically.
-Respond only with your analysis and insights, not the verbatim test results.
-Respond only with the markdown content, no explanation or context for your response is necessary.
-Use the Test ID that is provided to form the Test Name e.g. "ClassImbalance" -> "Class Imbalance".
-
-Explain the test, its purpose, its mechanism/formula etc and why it is useful.
-If relevant, provide a very brief description of the way this test is used in model/dataset evaluation and how it is interpreted.
-Highlight the key insights from the test results. The key insights should be concise and easily understood.
-An insight should only be included if it is something not entirely obvious from the test results.
-End the response with any closing remarks, summary or additional useful information.
-
-Use the following format for the response (feel free to stray from it if necessary - this is a suggested starting point):
-
-<ResponseFormat>
-**<Test Name>** calculates the xyz <continue to explain what it does in detail>...
-
-This test is useful for <explain why and for what this test is useful>...
-
-**Key Insights:**
-
-The following key insights can be identified in the test results:
-
-- **<key insight 1 - title>**: <concise explanation of key insight 1>
-- ...<continue with any other key insights using the same format>
-</ResponseFormat>
diff --git a/validmind/ai/test_result_description/user.jinja b/validmind/ai/test_result_description/user.jinja
deleted file mode 100644
index 1db866c9e..000000000
--- a/validmind/ai/test_result_description/user.jinja
+++ /dev/null
@@ -1,30 +0,0 @@
-**Test ID**: `{{ test_name }}`
-
-**Test Description**:
-
-{{ test_description }}
-
----
-
-Generate a description of the following result of the test using the instructions given in your system prompt.
-
-{%- if context %}
-**Context**:
-{{ context }}
-{%- endif %}
-
-{%- if summary %}
-**Test Result Tables** *(Raw Data)*:
-{{ summary }}
-{%- endif %}
-
-{%- if figures %}
-The following images make up the results of the test.
-{%- for b64_image_url in figures %}
-[[IMAGE:{{ b64_image_url }}]]
-{%- endfor %}
-{%- endif %}
-
-Keep your response concise and to the point!
-Only include content in your response if its something truly insightful or interesting!
-DO NOT VERBOSELY EXPLAIN THE TEST OR THE RESULTS!!!