From 469a55d88a872621e646c90eab9671113a79562b Mon Sep 17 00:00:00 2001
From: jmansdorfer <jmansdorfer19@gmail.com>
Date: Mon, 14 Apr 2025 10:34:10 -0400
Subject: [PATCH 1/2] adding exts to audio and document, along with new doc
 params

---
 predictionguard/src/audio.py     | 26 ++++++++++++++---
 predictionguard/src/documents.py | 50 +++++++++++++++++++++++++++-----
 predictionguard/version.py       |  2 +-
 3 files changed, 65 insertions(+), 13 deletions(-)

diff --git a/predictionguard/src/audio.py b/predictionguard/src/audio.py
index dce5c46..b655072 100644
--- a/predictionguard/src/audio.py
+++ b/predictionguard/src/audio.py
@@ -1,5 +1,3 @@
-import json
-
 import requests
 from typing import Any, Dict, Optional
 
@@ -46,6 +44,10 @@ def create(
         language: Optional[str] = "auto",
         temperature: Optional[float] = 0.0,
         prompt: Optional[str] = "",
+        toxicity: Optional[bool] = False,
+        pii: Optional[str] = "",
+        replace_method: Optional[str] = "",
+        injection: Optional[bool] = False,
     ) -> Dict[str, Any]:
         """
         Creates a audio transcription request to the Prediction Guard /audio/transcriptions API
@@ -55,18 +57,30 @@ def create(
         :param language: The language of the audio file
         :param temperature: The temperature parameter for model transcription
         :param prompt: A prompt to assist in transcription styling
+        :param toxicity: Whether to check for output toxicity
+        :param pii: Whether to check for or replace pii
+        :param replace_method: Replace method for any PII that is present.
+        :param injection: Whether to check for prompt injection
         :result: A dictionary containing the transcribed text.
         """
 
         # Create a list of tuples, each containing all the parameters for
         # a call to _transcribe_audio
-        args = (model, file, language, temperature, prompt)
+        args = (
+            model, file, language, temperature,
+            prompt, toxicity, pii, replace_method,
+            injection
+        )
 
         # Run _transcribe_audio
         choices = self._transcribe_audio(*args)
         return choices
 
-    def _transcribe_audio(self, model, file, language, temperature, prompt):
+    def _transcribe_audio(
+            self, model, file,
+            language, temperature, prompt,
+            toxicity, pii, replace_method, injection
+    ):
         """
         Function to transcribe an audio file.
         """
@@ -74,6 +88,10 @@ def _transcribe_audio(self, model, file, language, temperature, prompt):
         headers = {
             "Authorization": "Bearer " + self.api_key,
             "User-Agent": "Prediction Guard Python Client: " + __version__,
+            "Toxicity": toxicity,
+            "Pii": pii,
+            "Replace-Method": replace_method,
+            "Injection": injection
         }
 
         with open(file, "rb") as audio_file:
diff --git a/predictionguard/src/documents.py b/predictionguard/src/documents.py
index 69b3ae9..9395247 100644
--- a/predictionguard/src/documents.py
+++ b/predictionguard/src/documents.py
@@ -1,6 +1,3 @@
-import json
-from pyexpat import model
-
 import requests
 from typing import Any, Dict, Optional
 
@@ -10,7 +7,7 @@
 class Documents:
     """Documents allows you to extract text from various document file types.
 
-    Usage::
+    Usage:
 
         from predictionguard import PredictionGuard
 
@@ -39,20 +36,45 @@ def __init__(self, api_key, url):
 
     def create(
         self,
-        file: str
+        file: str,
+        embed_images: Optional[bool] = False,
+        output_format: Optional[str] = None,
+        chunk_document: Optional[bool] = False,
+        chunk_size: Optional[int] = None,
+        toxicity: Optional[bool] = False,
+        pii: Optional[str] = "",
+        replace_method: Optional[str] = "",
+        injection: Optional[bool] = False,
     ) -> Dict[str, Any]:
         """
         Creates a documents request to the Prediction Guard /documents/extract API
 
         :param file: Document to be parsed
+        :param embed_images: Whether to embed images into documents
+        :param output_format: Output format
+        :param chunk_document: Whether to chunk documents into chunks
+        :param chunk_size: Chunk size
+        :param toxicity: Whether to check for output toxicity
+        :param pii: Whether to check for or replace pii
+        :param replace_method: Replace method for any PII that is present.
+        :param injection: Whether to check for prompt injection
         :result: A dictionary containing the title, content, and length of the document.
         """
 
         # Run _extract_documents
-        choices = self._extract_documents(file)
+        choices = self._extract_documents(
+            file, embed_images, output_format,
+            chunk_document, chunk_size, toxicity,
+            pii, replace_method, injection
+        )
         return choices
 
-    def _extract_documents(self, file):
+    def _extract_documents(
+            self, file, embed_images,
+            output_format, chunk_document,
+            chunk_size, toxicity, pii,
+            replace_method, injection
+    ):
         """
         Function to extract a document.
         """
@@ -60,13 +82,25 @@ def _extract_documents(self, file):
         headers = {
             "Authorization": "Bearer " + self.api_key,
             "User-Agent": "Prediction Guard Python Client: " + __version__,
+            "Toxicity": toxicity,
+            "Pii": pii,
+            "Replace-Method": replace_method,
+            "Injection": injection
+        }
+
+        data = {
+            "embedImages": embed_images,
+            "outputFormat": output_format,
+            "chunkDocument": chunk_document,
+            "chunkSize": chunk_size,
         }
 
         with open(file, "rb") as doc_file:
             files = {"file": (file, doc_file)}
 
             response = requests.request(
-                "POST", self.url + "/documents/extract", headers=headers, files=files
+                "POST", self.url + "/documents/extract",
+                headers=headers, files=files, data=data
             )
 
         # If the request was successful, print the proxies.
diff --git a/predictionguard/version.py b/predictionguard/version.py
index 5c1c3b7..5e64910 100644
--- a/predictionguard/version.py
+++ b/predictionguard/version.py
@@ -1,2 +1,2 @@
 # Setting the package version
-__version__ = "2.8.1"
+__version__ = "2.8.2"

From 4a865aabca23aad809d3a2d74bd69823483b0de3 Mon Sep 17 00:00:00 2001
From: jmansdorfer <jmansdorfer19@gmail.com>
Date: Mon, 14 Apr 2025 11:22:33 -0400
Subject: [PATCH 2/2] fixing header types

---
 predictionguard/src/audio.py     | 4 ++--
 predictionguard/src/documents.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/predictionguard/src/audio.py b/predictionguard/src/audio.py
index b655072..1a1b781 100644
--- a/predictionguard/src/audio.py
+++ b/predictionguard/src/audio.py
@@ -88,10 +88,10 @@ def _transcribe_audio(
         headers = {
             "Authorization": "Bearer " + self.api_key,
             "User-Agent": "Prediction Guard Python Client: " + __version__,
-            "Toxicity": toxicity,
+            "Toxicity": str(toxicity),
             "Pii": pii,
             "Replace-Method": replace_method,
-            "Injection": injection
+            "Injection": str(injection)
         }
 
         with open(file, "rb") as audio_file:
diff --git a/predictionguard/src/documents.py b/predictionguard/src/documents.py
index 9395247..20b97c1 100644
--- a/predictionguard/src/documents.py
+++ b/predictionguard/src/documents.py
@@ -82,10 +82,10 @@ def _extract_documents(
         headers = {
             "Authorization": "Bearer " + self.api_key,
             "User-Agent": "Prediction Guard Python Client: " + __version__,
-            "Toxicity": toxicity,
+            "Toxicity": str(toxicity),
             "Pii": pii,
             "Replace-Method": replace_method,
-            "Injection": injection
+            "Injection": str(injection)
         }
 
         data = {