predictionguard · jmansdorfer · Mar 2, 2026 · Feb 24, 2026 · Feb 24, 2026 · Mar 2, 2026
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -27,6 +27,7 @@ jobs:
           PREDICTIONGUARD_API_KEY: ${{ secrets.PREDICTIONGUARD_API_KEY }}
           PREDICTIONGUARD_URL: ${{ vars.PREDICTIONGUARD_URL }}
           TEST_CHAT_MODEL: ${{ vars.TEST_CHAT_MODEL }}
+          TEST_RESPONSES_MODEL: ${{ vars.TEST_RESPONSES_MODEL }}
           TEST_TEXT_EMBEDDINGS_MODEL: ${{ vars.TEST_TEXT_EMBEDDINGS_MODEL }}
           TEST_MULTIMODAL_EMBEDDINGS_MODEL: ${{ vars.TEST_MULTIMODAL_EMBEDDINGS_MODEL }}
           TEST_VISION_MODEL: ${{ vars.TEST_VISION_MODEL }}

diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
@@ -31,6 +31,7 @@ jobs:
           PREDICTIONGUARD_API_KEY: ${{ secrets.PREDICTIONGUARD_API_KEY }}
           PREDICTIONGUARD_URL: ${{ vars.PREDICTIONGUARD_URL }}
           TEST_CHAT_MODEL: ${{ vars.TEST_CHAT_MODEL }}
+          TEST_RESPONSES_MODEL: ${{ vars.TEST_RESPONSES_MODEL }}
           TEST_TEXT_EMBEDDINGS_MODEL: ${{ vars.TEST_TEXT_EMBEDDINGS_MODEL }}
           TEST_MULTIMODAL_EMBEDDINGS_MODEL: ${{ vars.TEST_MULTIMODAL_EMBEDDINGS_MODEL }}
           TEST_VISION_MODEL: ${{ vars.TEST_VISION_MODEL }}

diff --git a/examples/responses.ipynb b/examples/responses.ipynb
@@ -0,0 +1,129 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": "## Using Responses with Prediction Guard"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Set up"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import necessary packages\n",
+    "import os\n",
+    "import json\n",
+    "\n",
+    "from predictionguard import PredictionGuard\n",
+    "\n",
+    "\n",
+    "# Set your Prediction Guard token and url as an environmental variable.\n",
+    "os.environ[\"PREDICTIONGUARD_API_KEY\"] = \"<api key>\"\n",
+    "os.environ[\"PREDICTIONGUARD_URL\"] = \"<url>\"\n",
+    "\n",
+    "# Or set your Prediction Guard token and url when initializing the PredictionGuard class.\n",
+    "client = PredictionGuard(\n",
+    "    api_key=\"<api_key>\",\n",
+    "    url=\"<url>\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": "### Basic Responses"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "response = client.responses.create(\n",
+    "    model=\"Hermes-3-Llama-3.1-8B\",\n",
+    "    input=\"Tell me a funny joke about pirates.\"\n",
+    ")\n",
+    "\n",
+    "print(json.dumps(\n",
+    "    response,\n",
+    "    sort_keys=True,\n",
+    "    indent=4,\n",
+    "    separators=(',', ': ')\n",
+    "))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": "### Response with Images"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input = [\n",
+    "    {\n",
+    "        \"role\": \"user\",\n",
+    "        \"content\": [\n",
+    "            {\n",
+    "                # Text to use for inference.\n",
+    "                \"type\": \"input_text\",\n",
+    "                \"text\": \"What's in this image?\"\n",
+    "            },\n",
+    "            {\n",
+    "                # Image to use for inference. Accepts image urls, files, and base64 encoded images, all under the \"image_url\" param.\n",
+    "                \"type\": \"input_image\",\n",
+    "                \"image_url\": \"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg\"\n",
+    "            }\n",
+    "        ]\n",
+    "    },\n",
+    "]\n",
+    "\n",
+    "image_response = client.responses.create(\n",
+    "    model=\"Qwen2.5-VL-7B-Instruct\",\n",
+    "    input=input\n",
+    ")\n",
+    "\n",
+    "print(json.dumps(\n",
+    "    image_response,\n",
+    "    sort_keys=True,\n",
+    "    indent=4,\n",
+    "    separators=(',', ': ')\n",
+    "))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": "### List Responses Models"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_list = client.responses.list_models()\n",
+    "\n",
+    "print(model_list)"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/predictionguard/client.py b/predictionguard/client.py
@@ -4,6 +4,7 @@
 from typing import Optional, Union
 
 from .src.audio import Audio
+from .src.responses import Responses
 from .src.chat import Chat
 from .src.completions import Completions
 from .src.detokenize import Detokenize
@@ -16,13 +17,15 @@
 from .src.toxicity import Toxicity
 from .src.pii import Pii
 from .src.injection import Injection
+from .src.mcp_servers import MCPServers
+from .src.mcp_tools import MCPTools
 from .src.models import Models
 from .version import __version__
 
 __all__ = [
-    "PredictionGuard", "Chat", "Completions", "Embeddings",
-    "Audio", "Documents", "Rerank", "Tokenize", "Translate",
-    "Detokenize", "Factuality", "Toxicity", "Pii", "Injection",
+    "PredictionGuard", "Responses", "Chat", "Completions", "Embeddings",
+    "Audio", "Documents", "Rerank", "Tokenize", "Translate", "Detokenize",
+    "Factuality", "Toxicity", "Pii", "Injection", "MCPServers", "MCPTools",
     "Models"
 ]
 
@@ -81,11 +84,14 @@ def __init__(
         self._connect_client()
 
         # Pass Prediction Guard class variables to inner classes
+        self.responses: Responses = Responses(self.api_key, self.url, self.timeout)
+        """Responses allows for the usage of LLMs intended for agentic usages."""
+
         self.chat: Chat = Chat(self.api_key, self.url, self.timeout)
-        """Chat generates chat completions based on a conversation history"""
+        """Chat generates chat completions based on a conversation history."""
 
         self.completions: Completions = Completions(self.api_key, self.url, self.timeout)
-        """Completions generates text completions based on the provided input"""
+        """Completions generates text completions based on the provided input."""
 
         self.embeddings: Embeddings = Embeddings(self.api_key, self.url, self.timeout)
         """Embedding generates chat completions based on a conversation history."""
@@ -120,6 +126,12 @@ def __init__(
         self.detokenize: Detokenize = Detokenize(self.api_key, self.url, self.timeout)
         """Detokenizes generates text for input tokens."""
 
+        self.mcp_servers: MCPServers = MCPServers(self.api_key, self.url, self.timeout)
+        """MCPServers lists all the MCP servers available in the Prediction Guard API."""
+
+        self.mcp_tools: MCPTools = MCPTools(self.api_key, self.url, self.timeout)
+        """MCPTools lists all the MCP tools available in the Prediction Guard API."""
+
         self.models: Models = Models(self.api_key, self.url, self.timeout)
         """Models lists all of the models available in the Prediction Guard API."""
 

diff --git a/predictionguard/src/chat.py b/predictionguard/src/chat.py
@@ -122,7 +122,7 @@ def create(
         :param model: The ID(s) of the model to use.
         :param messages: The content of the call, an array of dictionaries containing a role and content.
         :param input: A dictionary containing the PII and injection arguments.
-        :param output: A dictionary containing the consistency, factuality, and toxicity arguments.
+        :param output: A dictionary containing the factuality, and toxicity arguments.
         :param frequency_penalty: The frequency penalty to use.
         :param logit_bias: The logit bias to use.
         :param max_completion_tokens: The maximum amount of tokens the model should return.

diff --git a/predictionguard/src/completions.py b/predictionguard/src/completions.py
@@ -71,7 +71,7 @@ def create(
         :param model: The ID(s) of the model to use.
         :param prompt: The prompt(s) to generate completions for.
         :param input: A dictionary containing the PII and injection arguments.
-        :param output: A dictionary containing the consistency, factuality, and toxicity arguments.
+        :param output: A dictionary containing the factuality, and toxicity arguments.
         :param echo: A boolean indicating whether to echo the prompt(s) to the output.
         :param frequency_penalty: The frequency penalty to use.
         :param logit_bias: The logit bias to use.

diff --git a/predictionguard/src/mcp_servers.py b/predictionguard/src/mcp_servers.py
@@ -0,0 +1,85 @@
+import requests
+from typing import Any, Dict, Optional
+
+from ..version import __version__
+
+
+class MCPServers:
+    """
+    MCPServers lists all the MCP servers available in the Prediction Guard API.
+
+    Usage::
+
+        import os
+        import json
+
+        from predictionguard import PredictionGuard
+
+        # Set your Prediction Guard token and url as an environmental variable.
+        os.environ["PREDICTIONGUARD_API_KEY"] = "<api key>"
+        os.environ["PREDICTIONGUARD_URL"] = "<url>"
+
+        # Or set your Prediction Guard token and url when initializing the PredictionGuard class.
+        client = PredictionGuard(
+            api_key="<api_key>",
+            url="<url>"
+        )
+
+        response = client.mcp_servers.list()
+
+        print(json.dumps(
+            response,
+            sort_keys=True,
+            indent=4,
+            separators=(",", ": ")
+        ))
+    """
+
+    def __init__(self, api_key, url, timeout):
+        self.api_key = api_key
+        self.url = url
+        self.timeout = timeout
+
+    def list(self) -> Dict[str, Any]:
+        """
+        Creates a mcp_servers list request in the Prediction Guard REST API.
+
+        :return: A dictionary containing the metadata of all the MCP servers.
+        """
+
+        # Run _list_mcp_servers
+        choices = self._list_mcp_servers()
+        return choices
+
+    def _list_mcp_servers(self):
+        """
+        Function to list available MCP servers.
+        """
+
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": "Bearer " + self.api_key,
+            "User-Agent": "Prediction Guard Python Client: " + __version__,
+        }
+
+        response = requests.request(
+            "GET", self.url + "/mcp_servers", headers=headers, timeout=self.timeout
+        )
+
+        if response.status_code == 200:
+            ret = response.json()
+            return ret
+        elif response.status_code == 429:
+            raise ValueError(
+                "Could not connect to Prediction Guard API. "
+                "Too many requests, rate limit or quota exceeded."
+            )
+        else:
+            # Check if there is a JSON body in the response. Read that in,
+            # print out the error field in the JSON body, and raise an exception.
+            err = ""
+            try:
+                err = response.json()["error"]
+            except Exception:
+                pass
+            raise ValueError("Could not check for injection. " + err)
diff --git a/predictionguard/src/mcp_tools.py b/predictionguard/src/mcp_tools.py
@@ -0,0 +1,85 @@
+import requests
+from typing import Any, Dict, Optional
+
+from ..version import __version__
+
+
+class MCPTools:
+    """
+    MCPTools lists all the MCP tools available in the Prediction Guard API.
+
+    Usage::
+
+        import os
+        import json
+
+        from predictionguard import PredictionGuard
+
+        # Set your Prediction Guard token and url as an environmental variable.
+        os.environ["PREDICTIONGUARD_API_KEY"] = "<api key>"
+        os.environ["PREDICTIONGUARD_URL"] = "<url>"
+
+        # Or set your Prediction Guard token and url when initializing the PredictionGuard class.
+        client = PredictionGuard(
+            api_key="<api_key>",
+            url="<url>"
+        )
+
+        response = client.mcp_tools.list()
+
+        print(json.dumps(
+            response,
+            sort_keys=True,
+            indent=4,
+            separators=(",", ": ")
+        ))
+    """
+
+    def __init__(self, api_key, url, timeout):
+        self.api_key = api_key
+        self.url = url
+        self.timeout = timeout
+
+    def list(self) -> Dict[str, Any]:
+        """
+        Creates a mcp_tools list request in the Prediction Guard REST API.
+
+        :return: A dictionary containing the metadata of all the MCP tools.
+        """
+
+        # Run _list_mcp_tools
+        choices = self._list_mcp_tools()
+        return choices
+
+    def _list_mcp_tools(self):
+        """
+        Function to list available MCP tools.
+        """
+
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": "Bearer " + self.api_key,
+            "User-Agent": "Prediction Guard Python Client: " + __version__,
+        }
+
+        response = requests.request(
+            "GET", self.url + "/mcp_tools", headers=headers, timeout=self.timeout
+        )
+
+        if response.status_code == 200:
+            ret = response.json()
+            return ret
+        elif response.status_code == 429:
+            raise ValueError(
+                "Could not connect to Prediction Guard API. "
+                "Too many requests, rate limit or quota exceeded."
+            )
+        else:
+            # Check if there is a JSON body in the response. Read that in,
+            # print out the error field in the JSON body, and raise an exception.
+            err = ""
+            try:
+                err = response.json()["error"]
+            except Exception:
+                pass
+            raise ValueError("Could not check for injection. " + err)