diff --git a/fixtures/test_audio.wav b/fixtures/test_audio.wav new file mode 100644 index 0000000..3184d37 Binary files /dev/null and b/fixtures/test_audio.wav differ diff --git a/fixtures/test_csv.csv b/fixtures/test_csv.csv new file mode 100644 index 0000000..e7bba0d --- /dev/null +++ b/fixtures/test_csv.csv @@ -0,0 +1,6 @@ +John,Doe,120 jefferson st.,Riverside, NJ, 08075 +Jack,McGinnis,220 hobo Av.,Phila, PA,09119 +"John ""Da Man""",Repici,120 Jefferson St.,Riverside, NJ,08075 +Stephen,Tyler,"7452 Terrace ""At the Plaza"" road",SomeTown,SD, 91234 +,Blankman,,SomeTown, SD, 00298 +"Joan ""the bone"", Anne",Jet,"9th, at Terrace plc",Desert City,CO,00123 diff --git a/fixtures/test_pdf.pdf b/fixtures/test_pdf.pdf new file mode 100644 index 0000000..c01805e Binary files /dev/null and b/fixtures/test_pdf.pdf differ diff --git a/predictionguard/client.py b/predictionguard/client.py index c4762de..0bc9e9f 100644 --- a/predictionguard/client.py +++ b/predictionguard/client.py @@ -3,8 +3,10 @@ import requests from typing import Optional +from .src.audio import Audio from .src.chat import Chat from .src.completions import Completions +from .src.documents import Documents from .src.embeddings import Embeddings from .src.rerank import Rerank from .src.tokenize import Tokenize @@ -17,9 +19,9 @@ from .version import __version__ __all__ = [ - "PredictionGuard", "Chat", "Completions", "Embeddings", "Rerank", - "Tokenize", "Translate", "Factuality", "Toxicity", "Pii", "Injection", - "Models" + "PredictionGuard", "Chat", "Completions", "Embeddings", + "Audio", "Documents", "Rerank", "Tokenize", "Translate", + "Factuality", "Toxicity", "Pii", "Injection", "Models" ] class PredictionGuard: @@ -65,6 +67,12 @@ def __init__( self.embeddings: Embeddings = Embeddings(self.api_key, self.url) """Embedding generates chat completions based on a conversation history.""" + self.audio: Audio = Audio(self.api_key, self.url) + """Audio allows for the transcription of audio files.""" + + self.documents: Documents = Documents(self.api_key, self.url) + """Documents allows you to extract text from various document file types.""" + self.rerank: Rerank = Rerank(self.api_key, self.url) """Rerank sorts text inputs by semantic relevance to a specified query.""" diff --git a/predictionguard/src/audio.py b/predictionguard/src/audio.py new file mode 100644 index 0000000..a7348c3 --- /dev/null +++ b/predictionguard/src/audio.py @@ -0,0 +1,98 @@ +import json + +import requests +from typing import Any, Dict, Optional + +from ..version import __version__ + + +class Audio: + """Audio generates a response based on audio data. + + Usage:: + + import os + import json + + from predictionguard import PredictionGuard + + # Set your Prediction Guard token as an environmental variable. + os.environ["PREDICTIONGUARD_API_KEY"] = "" + + client = PredictionGuard() + + result = client.audio.transcriptions.create( + model="whisper-3-large-instruct", file=sample_audio.wav + ) + + print(json.dumps(result, sort_keys=True, indent=4, separators=(",", ": "))) + """ + + def __init__(self, api_key, url): + self.api_key = api_key + self.url = url + + self.transcriptions: AudioTranscriptions = AudioTranscriptions(self.api_key, self.url) + +class AudioTranscriptions: + def __init__(self, api_key, url): + self.api_key = api_key + self.url = url + + def create( + self, + model: str, + file: str + ) -> Dict[str, Any]: + """ + Creates a audio transcription request to the Prediction Guard /audio/transcriptions API + + :param model: The model to use + :param file: Audio file to be transcribed + :result: A dictionary containing the transcribed text. + """ + + # Create a list of tuples, each containing all the parameters for + # a call to _transcribe_audio + args = (model, file) + + # Run _transcribe_audio + choices = self._transcribe_audio(*args) + return choices + + def _transcribe_audio(self, model, file): + """ + Function to transcribe an audio file. + """ + + headers = { + "Authorization": "Bearer " + self.api_key, + "User-Agent": "Prediction Guard Python Client: " + __version__, + } + + with open(file, "rb") as audio_file: + files = {"file": (file, audio_file, "audio/wav")} + data = {"model": model} + + response = requests.request( + "POST", self.url + "/audio/transcriptions", headers=headers, files=files, data=data + ) + + # If the request was successful, print the proxies. + if response.status_code == 200: + ret = response.json() + return ret + elif response.status_code == 429: + raise ValueError( + "Could not connect to Prediction Guard API. " + "Too many requests, rate limit or quota exceeded." + ) + else: + # Check if there is a json body in the response. Read that in, + # print out the error field in the json body, and raise an exception. + err = "" + try: + err = response.json()["error"] + except Exception: + pass + raise ValueError("Could not transcribe the audio file. " + err) \ No newline at end of file diff --git a/predictionguard/src/chat.py b/predictionguard/src/chat.py index 332c91e..753fbe2 100644 --- a/predictionguard/src/chat.py +++ b/predictionguard/src/chat.py @@ -44,7 +44,7 @@ class Chat: { "role": "user", "content": "Haha. Good one." - }, + } ] result = client.chat.completions.create( @@ -69,15 +69,36 @@ def __init__(self, api_key, url): def create( self, model: str, - messages: Union[str, List[Dict[str, Any]]], + messages: Union[ + str, List[ + Dict[str, Any] + ] + ], input: Optional[Dict[str, Any]] = None, output: Optional[Dict[str, Any]] = None, + frequency_penalty: Optional[float] = None, + logit_bias: Optional[ + Dict[str, int] + ] = None, max_completion_tokens: Optional[int] = 100, max_tokens: Optional[int] = None, + parallel_tool_calls: Optional[bool] = None, + presence_penalty: Optional[float] = None, + stop: Optional[ + Union[ + str, List[str] + ] + ] = None, + stream: Optional[bool] = False, temperature: Optional[float] = 1.0, + tool_choice: Optional[Union[ + str, Dict[ + str, Dict[str, str] + ] + ]] = "none", + tools: Optional[List[Dict[str, Union[str, Dict[str, str]]]]] = None, top_p: Optional[float] = 0.99, top_k: Optional[float] = 50, - stream: Optional[bool] = False, ) -> Dict[str, Any]: """ Creates a chat request for the Prediction Guard /chat API. @@ -86,11 +107,18 @@ def create( :param messages: The content of the call, an array of dictionaries containing a role and content. :param input: A dictionary containing the PII and injection arguments. :param output: A dictionary containing the consistency, factuality, and toxicity arguments. + :param frequency_penalty: The frequency penalty to use. + :param logit_bias: The logit bias to use. :param max_completion_tokens: The maximum amount of tokens the model should return. + :param parallel_tool_calls: The parallel tool calls to use. + :param presence_penalty: The presence penalty to use. + :param stop: The completion stopping criteria. + :param stream: Option to stream the API response :param temperature: The consistency of the model responses to the same prompt. The higher the more consistent. + :param tool_choice: The tool choice to use. + :param tools: Options to pass to the tool choice. :param top_p: The sampling for the model to use. :param top_k: The Top-K sampling for the model to use. - :param stream: Option to stream the API response :return: A dictionary containing the chat response. """ @@ -110,11 +138,18 @@ def create( messages, input, output, + frequency_penalty, + logit_bias, max_completion_tokens, + parallel_tool_calls, + presence_penalty, + stop, + stream, temperature, + tool_choice, + tools, top_p, - top_k, - stream, + top_k ) # Run _generate_chat @@ -128,11 +163,18 @@ def _generate_chat( messages, input, output, + frequency_penalty, + logit_bias, max_completion_tokens, + parallel_tool_calls, + presence_penalty, + stop, + stream, temperature, + tool_choice, + tools, top_p, top_k, - stream, ): """ Function to generate a single chat response. @@ -257,11 +299,18 @@ def stream_generator(url, headers, payload, stream): payload_dict = { "model": model, "messages": messages, + "frequency_penalty": frequency_penalty, + "logit_bias": logit_bias, "max_completion_tokens": max_completion_tokens, + "parallel_tool_calls": parallel_tool_calls, + "presence_penalty": presence_penalty, + "stop": stop, + "stream": stream, "temperature": temperature, + "tool_choice": tool_choice, + "tools": tools, "top_p": top_p, "top_k": top_k, - "stream": stream, } if input: diff --git a/predictionguard/src/completions.py b/predictionguard/src/completions.py index 845baa1..9a03920 100644 --- a/predictionguard/src/completions.py +++ b/predictionguard/src/completions.py @@ -22,8 +22,14 @@ def create( prompt: Union[str, List[str]], input: Optional[Dict[str, Any]] = None, output: Optional[Dict[str, Any]] = None, + echo: Optional[bool] = None, + frequency_penalty: Optional[float] = None, + logit_bias: Optional[Dict[str, int]] = None, max_completion_tokens: Optional[int] = 100, max_tokens: Optional[int] = None, + presence_penalty: Optional[float] = None, + stop: Optional[Union[str, List[str]]] = None, + stream: Optional[bool] = False, temperature: Optional[float] = 1.0, top_p: Optional[float] = 0.99, top_k: Optional[int] = 50 @@ -35,7 +41,13 @@ def create( :param prompt: The prompt(s) to generate completions for. :param input: A dictionary containing the PII and injection arguments. :param output: A dictionary containing the consistency, factuality, and toxicity arguments. + :param echo: A boolean indicating whether to echo the prompt(s) to the output. + :param frequency_penalty: The frequency penalty to use. + :param logit_bias: The logit bias to use. :param max_completion_tokens: The maximum number of tokens to generate in the completion(s). + :param presence_penalty: The presence penalty to use. + :param stop: The completion stopping criteria. + :param stream: The stream to use for HTTP requests. :param temperature: The sampling temperature to use. :param top_p: The nucleus sampling probability to use. :param top_k: The Top-K sampling for the model to use. @@ -53,7 +65,22 @@ def create( # Create a list of tuples, each containing all the parameters for # a call to _generate_completion - args = (model, prompt, input, output, max_completion_tokens, temperature, top_p, top_k) + args = ( + model, + prompt, + input, + output, + echo, + frequency_penalty, + logit_bias, + max_completion_tokens, + presence_penalty, + stop, + stream, + temperature, + top_p, + top_k + ) # Run _generate_completion choices = self._generate_completion(*args) @@ -61,14 +88,76 @@ def create( return choices def _generate_completion( - self, model, prompt, - input, output, max_completion_tokens, - temperature, top_p, top_k + self, + model, + prompt, + input, + output, + echo, + frequency_penalty, + logit_bias, + max_completion_tokens, + presence_penalty, + stop, + stream, + temperature, + top_p, + top_k ): """ Function to generate a single completion. """ + def return_dict(url, headers, payload): + response = requests.request( + "POST", url + "/completions", headers=headers, data=payload + ) + # If the request was successful, print the proxies. + if response.status_code == 200: + ret = response.json() + return ret + elif response.status_code == 429: + raise ValueError( + "Could not connect to Prediction Guard API. " + "Too many requests, rate limit or quota exceeded." + ) + else: + # Check if there is a json body in the response. Read that in, + # then print out the error field in the json body, and raise an exception. + err = "" + try: + err = response.json()["error"] + except Exception: + pass + raise ValueError("Could not make prediction. " + err) + + def stream_generator(url, headers, payload, stream): + with requests.post( + url + "/completions", + headers=headers, + data=payload, + stream=stream, + ) as response: + response.raise_for_status() + + for line in response.iter_lines(): + if line: + decoded_line = line.decode("utf-8") + formatted_return = ( + "{" + (decoded_line.replace("data", '"data"', 1)) + "}" + ) + try: + dict_return = json.loads(formatted_return) + except json.decoder.JSONDecodeError: + pass + else: + try: + dict_return["data"]["choices"][0]["text"] + except KeyError: + pass + else: + yield dict_return + # Make a prediction using the proxy. headers = { "Content-Type": "application/json", @@ -79,7 +168,13 @@ def _generate_completion( payload_dict = { "model": model, "prompt": prompt, + "echo": echo, + "frequency_penalty": frequency_penalty, + "logit_bias": logit_bias, "max_completion_tokens": max_completion_tokens, + "presence_penalty": presence_penalty, + "stop": stop, + "stream": stream, "temperature": temperature, "top_p": top_p, "top_k": top_k @@ -87,30 +182,19 @@ def _generate_completion( if input: payload_dict["input"] = input if output: - payload_dict["output"] = output + if stream: + raise ValueError( + "Factuality and toxicity checks are not supported when streaming is enabled." + ) + else: + payload_dict["output"] = output payload = json.dumps(payload_dict) - response = requests.request( - "POST", self.url + "/completions", headers=headers, data=payload - ) - # If the request was successful, print the proxies. - if response.status_code == 200: - ret = response.json() - return ret - elif response.status_code == 429: - raise ValueError( - "Could not connect to Prediction Guard API. " - "Too many requests, rate limit or quota exceeded." - ) + if stream: + return stream_generator(self.url, headers, payload, stream) + else: - # Check if there is a json body in the response. Read whether the API response should be streamed in, - # print out the error field in the json body, and raise an exception. - err = "" - try: - err = response.json()["error"] - except Exception: - pass - raise ValueError("Could not make prediction. " + err) + return return_dict(self.url, headers, payload) def list_models(self) -> List[str]: # Get the list of current models. diff --git a/predictionguard/src/documents.py b/predictionguard/src/documents.py new file mode 100644 index 0000000..69b3ae9 --- /dev/null +++ b/predictionguard/src/documents.py @@ -0,0 +1,89 @@ +import json +from pyexpat import model + +import requests +from typing import Any, Dict, Optional + +from ..version import __version__ + + +class Documents: + """Documents allows you to extract text from various document file types. + + Usage:: + + from predictionguard import PredictionGuard + + # Set your Prediction Guard token as an environmental variable. + os.environ["PREDICTIONGUARD_API_KEY"] = "" + + client = PredictionGuard() + + response = client.documents.extract.create( + file="sample.pdf" + ) + + print(json.dumps(response, sort_keys=True, indent=4, separators=(",", ": "))) + """ + + def __init__(self, api_key, url): + self.api_key = api_key + self.url = url + + self.extract: DocumentsExtract = DocumentsExtract(self.api_key, self.url) + +class DocumentsExtract: + def __init__(self, api_key, url): + self.api_key = api_key + self.url = url + + def create( + self, + file: str + ) -> Dict[str, Any]: + """ + Creates a documents request to the Prediction Guard /documents/extract API + + :param file: Document to be parsed + :result: A dictionary containing the title, content, and length of the document. + """ + + # Run _extract_documents + choices = self._extract_documents(file) + return choices + + def _extract_documents(self, file): + """ + Function to extract a document. + """ + + headers = { + "Authorization": "Bearer " + self.api_key, + "User-Agent": "Prediction Guard Python Client: " + __version__, + } + + with open(file, "rb") as doc_file: + files = {"file": (file, doc_file)} + + response = requests.request( + "POST", self.url + "/documents/extract", headers=headers, files=files + ) + + # If the request was successful, print the proxies. + if response.status_code == 200: + ret = response.json() + return ret + elif response.status_code == 429: + raise ValueError( + "Could not connect to Prediction Guard API. " + "Too many requests, rate limit or quota exceeded." + ) + else: + # Check if there is a json body in the response. Read that in, + # print out the error field in the json body, and raise an exception. + err = "" + try: + err = response.json()["error"] + except Exception: + pass + raise ValueError("Could not extract document. " + err) \ No newline at end of file diff --git a/predictionguard/src/translate.py b/predictionguard/src/translate.py index 43c59be..336a12f 100644 --- a/predictionguard/src/translate.py +++ b/predictionguard/src/translate.py @@ -7,6 +7,28 @@ class Translate: + # UNCOMMENT WHEN DEPRECATED + # """No longer supported. + # """ + # + # def __init__(self, api_key, url): + # self.api_key = api_key + # self.url = url + # + # def create( + # self, + # text: Optional[str], + # source_lang: Optional[str], + # target_lang: Optional[str], + # use_third_party_engine: Optional[bool] = False + # ) -> Dict[str, Any]: + # """ + # No longer supported + # """ + # + # raise ValueError( + # "The translate functionality is no longer supported." + # ) """Translate converts text from one language to another. Usage:: @@ -28,6 +50,7 @@ class Translate: print(json.dumps(response, sort_keys=True, indent=4, separators=(",", ": "))) """ + # REMOVE BELOW HERE FOR DEPRECATION def __init__(self, api_key, url): self.api_key = api_key self.url = url @@ -96,4 +119,4 @@ def _generate_translation(self, text, source_lang, target_lang, use_third_party_ err = response.json()["error"] except Exception: pass - raise ValueError("Could not make translation. " + err) + raise ValueError("Could not make translation. " + err) \ No newline at end of file diff --git a/predictionguard/version.py b/predictionguard/version.py index 9c132bc..c77fef4 100644 --- a/predictionguard/version.py +++ b/predictionguard/version.py @@ -1,2 +1,2 @@ # Setting the package version -__version__ = "2.7.1" +__version__ = "2.8.0" diff --git a/pyproject.toml b/pyproject.toml index 31718e0..da06461 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,10 @@ build-backend = "flit_core.buildapi" [project] name = "predictionguard" -authors = [{name = "Daniel Whitenack", email = "dan@predictionguard.com"}] +authors = [ + {name = "Daniel Whitenack", email = "dan@predictionguard.com"}, + {name = "Jacob Mansdorfer", email = "jacob@predictionguard.com"} + ] readme = "README.md" license = {file = "LICENSE"} classifiers = ["License :: OSI Approved :: MIT License"] diff --git a/tests/test_audio.py b/tests/test_audio.py new file mode 100644 index 0000000..d95ccf9 --- /dev/null +++ b/tests/test_audio.py @@ -0,0 +1,16 @@ +import os + +from predictionguard import PredictionGuard + + +def test_audio_transcribe_success(): + test_client = PredictionGuard() + + response = test_client.audio.transcriptions.create( + model="base", + file="fixtures/test_audio.wav" + ) + + print(response) + + assert len(response["text"]) > 0 \ No newline at end of file diff --git a/tests/test_chat.py b/tests/test_chat.py index 7c45a71..c4c0a13 100644 --- a/tests/test_chat.py +++ b/tests/test_chat.py @@ -187,6 +187,21 @@ def test_chat_completions_create_vision_stream_fail(): response_list.append(res) +def test_chat_completions_create_tool_call(): + test_client = PredictionGuard() + + response = test_client.chat.completions.create( + model=os.environ["TEST_MODEL_NAME"], + messages=[ + {"role": "system", "content": "You are a helpful chatbot."}, + {"role": "user", "content": "Tell me a joke."}, + ], + + ) + + assert len(response["choices"][0]["message"]["content"]) > 0 + + def test_chat_completions_list_models(): test_client = PredictionGuard() diff --git a/tests/test_completions.py b/tests/test_completions.py index 778583b..f3adcb7 100644 --- a/tests/test_completions.py +++ b/tests/test_completions.py @@ -1,5 +1,7 @@ import os +import pytest + from predictionguard import PredictionGuard @@ -33,3 +35,35 @@ def test_completions_list_models(): assert len(response) > 0 assert type(response[0]) is str + + +def test_completions_create_stream(): + test_client = PredictionGuard() + + response_list = [] + for res in test_client.completions.create( + model=os.environ["TEST_MODEL_NAME"], + prompt="Tell me a joke.", + stream=True, + ): + response_list.append(res) + + assert len(response_list) > 1 + + +def test_completions_create_stream_output_fail(): + test_client = PredictionGuard() + + streaming_error = "Factuality and toxicity checks are not supported when streaming is enabled.".replace( + "\n", "" + ) + + response_list = [] + with pytest.raises(ValueError, match=streaming_error): + for res in test_client.completions.create( + model=os.environ["TEST_MODEL_NAME"], + prompt="Tell me a joke.", + stream=True, + output={"toxicity": True}, + ): + response_list.append(res) \ No newline at end of file diff --git a/tests/test_documents.py b/tests/test_documents.py new file mode 100644 index 0000000..c877c0e --- /dev/null +++ b/tests/test_documents.py @@ -0,0 +1,13 @@ +import os + +from predictionguard import PredictionGuard + + +def test_documents_extract_success(): + test_client = PredictionGuard() + + response = test_client.documents.extract.create( + file="fixtures/test_pdf.pdf" + ) + + assert len(response["contents"]) > 0 \ No newline at end of file diff --git a/tests/test_translate.py b/tests/test_translate.py index a0b4372..ee16e46 100644 --- a/tests/test_translate.py +++ b/tests/test_translate.py @@ -9,4 +9,4 @@ def test_translate_create(): ) assert type(response["best_score"]) is float - assert len(response["best_translation"]) + assert len(response["best_translation"]) \ No newline at end of file