From 469a55d88a872621e646c90eab9671113a79562b Mon Sep 17 00:00:00 2001 From: jmansdorfer Date: Mon, 14 Apr 2025 10:34:10 -0400 Subject: [PATCH 1/2] adding exts to audio and document, along with new doc params --- predictionguard/src/audio.py | 26 ++++++++++++++--- predictionguard/src/documents.py | 50 +++++++++++++++++++++++++++----- predictionguard/version.py | 2 +- 3 files changed, 65 insertions(+), 13 deletions(-) diff --git a/predictionguard/src/audio.py b/predictionguard/src/audio.py index dce5c46..b655072 100644 --- a/predictionguard/src/audio.py +++ b/predictionguard/src/audio.py @@ -1,5 +1,3 @@ -import json - import requests from typing import Any, Dict, Optional @@ -46,6 +44,10 @@ def create( language: Optional[str] = "auto", temperature: Optional[float] = 0.0, prompt: Optional[str] = "", + toxicity: Optional[bool] = False, + pii: Optional[str] = "", + replace_method: Optional[str] = "", + injection: Optional[bool] = False, ) -> Dict[str, Any]: """ Creates a audio transcription request to the Prediction Guard /audio/transcriptions API @@ -55,18 +57,30 @@ def create( :param language: The language of the audio file :param temperature: The temperature parameter for model transcription :param prompt: A prompt to assist in transcription styling + :param toxicity: Whether to check for output toxicity + :param pii: Whether to check for or replace pii + :param replace_method: Replace method for any PII that is present. + :param injection: Whether to check for prompt injection :result: A dictionary containing the transcribed text. """ # Create a list of tuples, each containing all the parameters for # a call to _transcribe_audio - args = (model, file, language, temperature, prompt) + args = ( + model, file, language, temperature, + prompt, toxicity, pii, replace_method, + injection + ) # Run _transcribe_audio choices = self._transcribe_audio(*args) return choices - def _transcribe_audio(self, model, file, language, temperature, prompt): + def _transcribe_audio( + self, model, file, + language, temperature, prompt, + toxicity, pii, replace_method, injection + ): """ Function to transcribe an audio file. """ @@ -74,6 +88,10 @@ def _transcribe_audio(self, model, file, language, temperature, prompt): headers = { "Authorization": "Bearer " + self.api_key, "User-Agent": "Prediction Guard Python Client: " + __version__, + "Toxicity": toxicity, + "Pii": pii, + "Replace-Method": replace_method, + "Injection": injection } with open(file, "rb") as audio_file: diff --git a/predictionguard/src/documents.py b/predictionguard/src/documents.py index 69b3ae9..9395247 100644 --- a/predictionguard/src/documents.py +++ b/predictionguard/src/documents.py @@ -1,6 +1,3 @@ -import json -from pyexpat import model - import requests from typing import Any, Dict, Optional @@ -10,7 +7,7 @@ class Documents: """Documents allows you to extract text from various document file types. - Usage:: + Usage: from predictionguard import PredictionGuard @@ -39,20 +36,45 @@ def __init__(self, api_key, url): def create( self, - file: str + file: str, + embed_images: Optional[bool] = False, + output_format: Optional[str] = None, + chunk_document: Optional[bool] = False, + chunk_size: Optional[int] = None, + toxicity: Optional[bool] = False, + pii: Optional[str] = "", + replace_method: Optional[str] = "", + injection: Optional[bool] = False, ) -> Dict[str, Any]: """ Creates a documents request to the Prediction Guard /documents/extract API :param file: Document to be parsed + :param embed_images: Whether to embed images into documents + :param output_format: Output format + :param chunk_document: Whether to chunk documents into chunks + :param chunk_size: Chunk size + :param toxicity: Whether to check for output toxicity + :param pii: Whether to check for or replace pii + :param replace_method: Replace method for any PII that is present. + :param injection: Whether to check for prompt injection :result: A dictionary containing the title, content, and length of the document. """ # Run _extract_documents - choices = self._extract_documents(file) + choices = self._extract_documents( + file, embed_images, output_format, + chunk_document, chunk_size, toxicity, + pii, replace_method, injection + ) return choices - def _extract_documents(self, file): + def _extract_documents( + self, file, embed_images, + output_format, chunk_document, + chunk_size, toxicity, pii, + replace_method, injection + ): """ Function to extract a document. """ @@ -60,13 +82,25 @@ def _extract_documents(self, file): headers = { "Authorization": "Bearer " + self.api_key, "User-Agent": "Prediction Guard Python Client: " + __version__, + "Toxicity": toxicity, + "Pii": pii, + "Replace-Method": replace_method, + "Injection": injection + } + + data = { + "embedImages": embed_images, + "outputFormat": output_format, + "chunkDocument": chunk_document, + "chunkSize": chunk_size, } with open(file, "rb") as doc_file: files = {"file": (file, doc_file)} response = requests.request( - "POST", self.url + "/documents/extract", headers=headers, files=files + "POST", self.url + "/documents/extract", + headers=headers, files=files, data=data ) # If the request was successful, print the proxies. diff --git a/predictionguard/version.py b/predictionguard/version.py index 5c1c3b7..5e64910 100644 --- a/predictionguard/version.py +++ b/predictionguard/version.py @@ -1,2 +1,2 @@ # Setting the package version -__version__ = "2.8.1" +__version__ = "2.8.2" From 4a865aabca23aad809d3a2d74bd69823483b0de3 Mon Sep 17 00:00:00 2001 From: jmansdorfer Date: Mon, 14 Apr 2025 11:22:33 -0400 Subject: [PATCH 2/2] fixing header types --- predictionguard/src/audio.py | 4 ++-- predictionguard/src/documents.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/predictionguard/src/audio.py b/predictionguard/src/audio.py index b655072..1a1b781 100644 --- a/predictionguard/src/audio.py +++ b/predictionguard/src/audio.py @@ -88,10 +88,10 @@ def _transcribe_audio( headers = { "Authorization": "Bearer " + self.api_key, "User-Agent": "Prediction Guard Python Client: " + __version__, - "Toxicity": toxicity, + "Toxicity": str(toxicity), "Pii": pii, "Replace-Method": replace_method, - "Injection": injection + "Injection": str(injection) } with open(file, "rb") as audio_file: diff --git a/predictionguard/src/documents.py b/predictionguard/src/documents.py index 9395247..20b97c1 100644 --- a/predictionguard/src/documents.py +++ b/predictionguard/src/documents.py @@ -82,10 +82,10 @@ def _extract_documents( headers = { "Authorization": "Bearer " + self.api_key, "User-Agent": "Prediction Guard Python Client: " + __version__, - "Toxicity": toxicity, + "Toxicity": str(toxicity), "Pii": pii, "Replace-Method": replace_method, - "Injection": injection + "Injection": str(injection) } data = {