Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ jobs:
PREDICTIONGUARD_API_KEY: ${{ secrets.PREDICTIONGUARD_API_KEY }}
PREDICTIONGUARD_URL: ${{ vars.PREDICTIONGUARD_URL }}
TEST_CHAT_MODEL: ${{ vars.TEST_CHAT_MODEL }}
TEST_RESPONSES_MODEL: ${{ vars.TEST_RESPONSES_MODEL }}
TEST_TEXT_EMBEDDINGS_MODEL: ${{ vars.TEST_TEXT_EMBEDDINGS_MODEL }}
TEST_MULTIMODAL_EMBEDDINGS_MODEL: ${{ vars.TEST_MULTIMODAL_EMBEDDINGS_MODEL }}
TEST_VISION_MODEL: ${{ vars.TEST_VISION_MODEL }}
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ jobs:
PREDICTIONGUARD_API_KEY: ${{ secrets.PREDICTIONGUARD_API_KEY }}
PREDICTIONGUARD_URL: ${{ vars.PREDICTIONGUARD_URL }}
TEST_CHAT_MODEL: ${{ vars.TEST_CHAT_MODEL }}
TEST_RESPONSES_MODEL: ${{ vars.TEST_RESPONSES_MODEL }}
TEST_TEXT_EMBEDDINGS_MODEL: ${{ vars.TEST_TEXT_EMBEDDINGS_MODEL }}
TEST_MULTIMODAL_EMBEDDINGS_MODEL: ${{ vars.TEST_MULTIMODAL_EMBEDDINGS_MODEL }}
TEST_VISION_MODEL: ${{ vars.TEST_VISION_MODEL }}
Expand Down
129 changes: 129 additions & 0 deletions examples/responses.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": "## Using Responses with Prediction Guard"
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Set up"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Import necessary packages\n",
"import os\n",
"import json\n",
"\n",
"from predictionguard import PredictionGuard\n",
"\n",
"\n",
"# Set your Prediction Guard token and url as an environmental variable.\n",
"os.environ[\"PREDICTIONGUARD_API_KEY\"] = \"<api key>\"\n",
"os.environ[\"PREDICTIONGUARD_URL\"] = \"<url>\"\n",
"\n",
"# Or set your Prediction Guard token and url when initializing the PredictionGuard class.\n",
"client = PredictionGuard(\n",
" api_key=\"<api_key>\",\n",
" url=\"<url>\"\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": "### Basic Responses"
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"response = client.responses.create(\n",
" model=\"Hermes-3-Llama-3.1-8B\",\n",
" input=\"Tell me a funny joke about pirates.\"\n",
")\n",
"\n",
"print(json.dumps(\n",
" response,\n",
" sort_keys=True,\n",
" indent=4,\n",
" separators=(',', ': ')\n",
"))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": "### Response with Images"
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"input = [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": [\n",
" {\n",
" # Text to use for inference.\n",
" \"type\": \"input_text\",\n",
" \"text\": \"What's in this image?\"\n",
" },\n",
" {\n",
" # Image to use for inference. Accepts image urls, files, and base64 encoded images, all under the \"image_url\" param.\n",
" \"type\": \"input_image\",\n",
" \"image_url\": \"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg\"\n",
" }\n",
" ]\n",
" },\n",
"]\n",
"\n",
"image_response = client.responses.create(\n",
" model=\"Qwen2.5-VL-7B-Instruct\",\n",
" input=input\n",
")\n",
"\n",
"print(json.dumps(\n",
" image_response,\n",
" sort_keys=True,\n",
" indent=4,\n",
" separators=(',', ': ')\n",
"))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": "### List Responses Models"
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model_list = client.responses.list_models()\n",
"\n",
"print(model_list)"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
22 changes: 17 additions & 5 deletions predictionguard/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Optional, Union

from .src.audio import Audio
from .src.responses import Responses
from .src.chat import Chat
from .src.completions import Completions
from .src.detokenize import Detokenize
Expand All @@ -16,13 +17,15 @@
from .src.toxicity import Toxicity
from .src.pii import Pii
from .src.injection import Injection
from .src.mcp_servers import MCPServers
from .src.mcp_tools import MCPTools
from .src.models import Models
from .version import __version__

__all__ = [
"PredictionGuard", "Chat", "Completions", "Embeddings",
"Audio", "Documents", "Rerank", "Tokenize", "Translate",
"Detokenize", "Factuality", "Toxicity", "Pii", "Injection",
"PredictionGuard", "Responses", "Chat", "Completions", "Embeddings",
"Audio", "Documents", "Rerank", "Tokenize", "Translate", "Detokenize",
"Factuality", "Toxicity", "Pii", "Injection", "MCPServers", "MCPTools",
"Models"
]

Expand Down Expand Up @@ -81,11 +84,14 @@ def __init__(
self._connect_client()

# Pass Prediction Guard class variables to inner classes
self.responses: Responses = Responses(self.api_key, self.url, self.timeout)
"""Responses allows for the usage of LLMs intended for agentic usages."""

self.chat: Chat = Chat(self.api_key, self.url, self.timeout)
"""Chat generates chat completions based on a conversation history"""
"""Chat generates chat completions based on a conversation history."""

self.completions: Completions = Completions(self.api_key, self.url, self.timeout)
"""Completions generates text completions based on the provided input"""
"""Completions generates text completions based on the provided input."""

self.embeddings: Embeddings = Embeddings(self.api_key, self.url, self.timeout)
"""Embedding generates chat completions based on a conversation history."""
Expand Down Expand Up @@ -120,6 +126,12 @@ def __init__(
self.detokenize: Detokenize = Detokenize(self.api_key, self.url, self.timeout)
"""Detokenizes generates text for input tokens."""

self.mcp_servers: MCPServers = MCPServers(self.api_key, self.url, self.timeout)
"""MCPServers lists all the MCP servers available in the Prediction Guard API."""

self.mcp_tools: MCPTools = MCPTools(self.api_key, self.url, self.timeout)
"""MCPTools lists all the MCP tools available in the Prediction Guard API."""

self.models: Models = Models(self.api_key, self.url, self.timeout)
"""Models lists all of the models available in the Prediction Guard API."""

Expand Down
2 changes: 1 addition & 1 deletion predictionguard/src/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def create(
:param model: The ID(s) of the model to use.
:param messages: The content of the call, an array of dictionaries containing a role and content.
:param input: A dictionary containing the PII and injection arguments.
:param output: A dictionary containing the consistency, factuality, and toxicity arguments.
:param output: A dictionary containing the factuality, and toxicity arguments.
:param frequency_penalty: The frequency penalty to use.
:param logit_bias: The logit bias to use.
:param max_completion_tokens: The maximum amount of tokens the model should return.
Expand Down
2 changes: 1 addition & 1 deletion predictionguard/src/completions.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def create(
:param model: The ID(s) of the model to use.
:param prompt: The prompt(s) to generate completions for.
:param input: A dictionary containing the PII and injection arguments.
:param output: A dictionary containing the consistency, factuality, and toxicity arguments.
:param output: A dictionary containing the factuality, and toxicity arguments.
:param echo: A boolean indicating whether to echo the prompt(s) to the output.
:param frequency_penalty: The frequency penalty to use.
:param logit_bias: The logit bias to use.
Expand Down
85 changes: 85 additions & 0 deletions predictionguard/src/mcp_servers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import requests
from typing import Any, Dict, Optional

from ..version import __version__


class MCPServers:
"""
MCPServers lists all the MCP servers available in the Prediction Guard API.

Usage::

import os
import json

from predictionguard import PredictionGuard

# Set your Prediction Guard token and url as an environmental variable.
os.environ["PREDICTIONGUARD_API_KEY"] = "<api key>"
os.environ["PREDICTIONGUARD_URL"] = "<url>"

# Or set your Prediction Guard token and url when initializing the PredictionGuard class.
client = PredictionGuard(
api_key="<api_key>",
url="<url>"
)

response = client.mcp_servers.list()

print(json.dumps(
response,
sort_keys=True,
indent=4,
separators=(",", ": ")
))
"""

def __init__(self, api_key, url, timeout):
self.api_key = api_key
self.url = url
self.timeout = timeout

def list(self) -> Dict[str, Any]:
"""
Creates a mcp_servers list request in the Prediction Guard REST API.

:return: A dictionary containing the metadata of all the MCP servers.
"""

# Run _list_mcp_servers
choices = self._list_mcp_servers()
return choices

def _list_mcp_servers(self):
"""
Function to list available MCP servers.
"""

headers = {
"Content-Type": "application/json",
"Authorization": "Bearer " + self.api_key,
"User-Agent": "Prediction Guard Python Client: " + __version__,
}

response = requests.request(
"GET", self.url + "/mcp_servers", headers=headers, timeout=self.timeout
)

if response.status_code == 200:
ret = response.json()
return ret
elif response.status_code == 429:
raise ValueError(
"Could not connect to Prediction Guard API. "
"Too many requests, rate limit or quota exceeded."
)
else:
# Check if there is a JSON body in the response. Read that in,
# print out the error field in the JSON body, and raise an exception.
err = ""
try:
err = response.json()["error"]
except Exception:
pass
raise ValueError("Could not check for injection. " + err)
85 changes: 85 additions & 0 deletions predictionguard/src/mcp_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import requests
from typing import Any, Dict, Optional

from ..version import __version__


class MCPTools:
"""
MCPTools lists all the MCP tools available in the Prediction Guard API.

Usage::

import os
import json

from predictionguard import PredictionGuard

# Set your Prediction Guard token and url as an environmental variable.
os.environ["PREDICTIONGUARD_API_KEY"] = "<api key>"
os.environ["PREDICTIONGUARD_URL"] = "<url>"

# Or set your Prediction Guard token and url when initializing the PredictionGuard class.
client = PredictionGuard(
api_key="<api_key>",
url="<url>"
)

response = client.mcp_tools.list()

print(json.dumps(
response,
sort_keys=True,
indent=4,
separators=(",", ": ")
))
"""

def __init__(self, api_key, url, timeout):
self.api_key = api_key
self.url = url
self.timeout = timeout

def list(self) -> Dict[str, Any]:
"""
Creates a mcp_tools list request in the Prediction Guard REST API.

:return: A dictionary containing the metadata of all the MCP tools.
"""

# Run _list_mcp_tools
choices = self._list_mcp_tools()
return choices

def _list_mcp_tools(self):
"""
Function to list available MCP tools.
"""

headers = {
"Content-Type": "application/json",
"Authorization": "Bearer " + self.api_key,
"User-Agent": "Prediction Guard Python Client: " + __version__,
}

response = requests.request(
"GET", self.url + "/mcp_tools", headers=headers, timeout=self.timeout
)

if response.status_code == 200:
ret = response.json()
return ret
elif response.status_code == 429:
raise ValueError(
"Could not connect to Prediction Guard API. "
"Too many requests, rate limit or quota exceeded."
)
else:
# Check if there is a JSON body in the response. Read that in,
# print out the error field in the JSON body, and raise an exception.
err = ""
try:
err = response.json()["error"]
except Exception:
pass
raise ValueError("Could not check for injection. " + err)
Loading