diff --git a/fixtures/test_audio.wav b/fixtures/test_audio.wav
new file mode 100644
index 0000000..3184d37
Binary files /dev/null and b/fixtures/test_audio.wav differ
diff --git a/fixtures/test_csv.csv b/fixtures/test_csv.csv
new file mode 100644
index 0000000..e7bba0d
--- /dev/null
+++ b/fixtures/test_csv.csv
@@ -0,0 +1,6 @@
+John,Doe,120 jefferson st.,Riverside, NJ, 08075
+Jack,McGinnis,220 hobo Av.,Phila, PA,09119
+"John ""Da Man""",Repici,120 Jefferson St.,Riverside, NJ,08075
+Stephen,Tyler,"7452 Terrace ""At the Plaza"" road",SomeTown,SD, 91234
+,Blankman,,SomeTown, SD, 00298
+"Joan ""the bone"", Anne",Jet,"9th, at Terrace plc",Desert City,CO,00123
diff --git a/fixtures/test_pdf.pdf b/fixtures/test_pdf.pdf
new file mode 100644
index 0000000..c01805e
Binary files /dev/null and b/fixtures/test_pdf.pdf differ
diff --git a/predictionguard/client.py b/predictionguard/client.py
index c4762de..0bc9e9f 100644
--- a/predictionguard/client.py
+++ b/predictionguard/client.py
@@ -3,8 +3,10 @@
 import requests
 from typing import Optional
 
+from .src.audio import Audio
 from .src.chat import Chat
 from .src.completions import Completions
+from .src.documents import Documents
 from .src.embeddings import Embeddings
 from .src.rerank import Rerank
 from .src.tokenize import Tokenize
@@ -17,9 +19,9 @@
 from .version import __version__
 
 __all__ = [
-    "PredictionGuard", "Chat", "Completions", "Embeddings", "Rerank",
-    "Tokenize", "Translate", "Factuality", "Toxicity", "Pii", "Injection",
-    "Models"
+    "PredictionGuard", "Chat", "Completions", "Embeddings",
+    "Audio", "Documents", "Rerank", "Tokenize", "Translate",
+    "Factuality", "Toxicity", "Pii", "Injection", "Models"
 ]
 
 class PredictionGuard:
@@ -65,6 +67,12 @@ def __init__(
         self.embeddings: Embeddings = Embeddings(self.api_key, self.url)
         """Embedding generates chat completions based on a conversation history."""
 
+        self.audio: Audio = Audio(self.api_key, self.url)
+        """Audio allows for the transcription of audio files."""
+
+        self.documents: Documents = Documents(self.api_key, self.url)
+        """Documents allows you to extract text from various document file types."""
+
         self.rerank: Rerank = Rerank(self.api_key, self.url)
         """Rerank sorts text inputs by semantic relevance to a specified query."""
 
diff --git a/predictionguard/src/audio.py b/predictionguard/src/audio.py
new file mode 100644
index 0000000..a7348c3
--- /dev/null
+++ b/predictionguard/src/audio.py
@@ -0,0 +1,98 @@
+import json
+
+import requests
+from typing import Any, Dict, Optional
+
+from ..version import __version__
+
+
+class Audio:
+    """Audio generates a response based on audio data.
+
+    Usage::
+
+        import os
+        import json
+
+        from predictionguard import PredictionGuard
+
+        # Set your Prediction Guard token as an environmental variable.
+        os.environ["PREDICTIONGUARD_API_KEY"] = "<api key>"
+
+        client = PredictionGuard()
+
+        result = client.audio.transcriptions.create(
+            model="whisper-3-large-instruct", file=sample_audio.wav
+        )
+
+        print(json.dumps(result, sort_keys=True, indent=4, separators=(",", ": ")))
+    """
+
+    def __init__(self, api_key, url):
+        self.api_key = api_key
+        self.url = url
+
+        self.transcriptions: AudioTranscriptions = AudioTranscriptions(self.api_key, self.url)
+
+class AudioTranscriptions:
+    def __init__(self, api_key, url):
+        self.api_key = api_key
+        self.url = url
+
+    def create(
+        self,
+        model: str,
+        file: str
+    ) -> Dict[str, Any]:
+        """
+        Creates a audio transcription request to the Prediction Guard /audio/transcriptions API
+
+        :param model: The model to use
+        :param file: Audio file to be transcribed
+        :result: A dictionary containing the transcribed text.
+        """
+
+        # Create a list of tuples, each containing all the parameters for
+        # a call to _transcribe_audio
+        args = (model, file)
+
+        # Run _transcribe_audio
+        choices = self._transcribe_audio(*args)
+        return choices
+
+    def _transcribe_audio(self, model, file):
+        """
+        Function to transcribe an audio file.
+        """
+
+        headers = {
+            "Authorization": "Bearer " + self.api_key,
+            "User-Agent": "Prediction Guard Python Client: " + __version__,
+        }
+
+        with open(file, "rb") as audio_file:
+            files = {"file": (file, audio_file, "audio/wav")}
+            data = {"model": model}
+
+            response = requests.request(
+                "POST", self.url + "/audio/transcriptions", headers=headers, files=files, data=data
+            )
+
+        # If the request was successful, print the proxies.
+        if response.status_code == 200:
+            ret = response.json()
+            return ret
+        elif response.status_code == 429:
+            raise ValueError(
+                "Could not connect to Prediction Guard API. "
+                "Too many requests, rate limit or quota exceeded."
+            )
+        else:
+            # Check if there is a json body in the response. Read that in,
+            # print out the error field in the json body, and raise an exception.
+            err = ""
+            try:
+                err = response.json()["error"]
+            except Exception:
+                pass
+            raise ValueError("Could not transcribe the audio file. " + err)
\ No newline at end of file
diff --git a/predictionguard/src/chat.py b/predictionguard/src/chat.py
index 332c91e..753fbe2 100644
--- a/predictionguard/src/chat.py
+++ b/predictionguard/src/chat.py
@@ -44,7 +44,7 @@ class Chat:
             {
                 "role": "user",
                 "content": "Haha. Good one."
-            },
+            }
         ]
 
         result = client.chat.completions.create(
@@ -69,15 +69,36 @@ def __init__(self, api_key, url):
     def create(
         self,
         model: str,
-        messages: Union[str, List[Dict[str, Any]]],
+        messages: Union[
+            str, List[
+                Dict[str, Any]
+            ]
+        ],
         input: Optional[Dict[str, Any]] = None,
         output: Optional[Dict[str, Any]] = None,
+        frequency_penalty: Optional[float] = None,
+        logit_bias: Optional[
+            Dict[str, int]
+        ] = None,
         max_completion_tokens: Optional[int] = 100,
         max_tokens: Optional[int] = None,
+        parallel_tool_calls: Optional[bool] = None,
+        presence_penalty: Optional[float] = None,
+        stop: Optional[
+            Union[
+                str, List[str]
+            ]
+        ] = None,
+        stream: Optional[bool] = False,
         temperature: Optional[float] = 1.0,
+        tool_choice: Optional[Union[
+            str, Dict[
+                str, Dict[str, str]
+            ]
+        ]] = "none",
+        tools: Optional[List[Dict[str, Union[str, Dict[str, str]]]]] = None,
         top_p: Optional[float] = 0.99,
         top_k: Optional[float] = 50,
-        stream: Optional[bool] = False,
     ) -> Dict[str, Any]:
         """
         Creates a chat request for the Prediction Guard /chat API.
@@ -86,11 +107,18 @@ def create(
         :param messages: The content of the call, an array of dictionaries containing a role and content.
         :param input: A dictionary containing the PII and injection arguments.
         :param output: A dictionary containing the consistency, factuality, and toxicity arguments.
+        :param frequency_penalty: The frequency penalty to use.
+        :param logit_bias: The logit bias to use.
         :param max_completion_tokens: The maximum amount of tokens the model should return.
+        :param parallel_tool_calls: The parallel tool calls to use.
+        :param presence_penalty: The presence penalty to use.
+        :param stop: The completion stopping criteria.
+        :param stream: Option to stream the API response
         :param temperature: The consistency of the model responses to the same prompt. The higher the more consistent.
+        :param tool_choice: The tool choice to use.
+        :param tools: Options to pass to the tool choice.
         :param top_p: The sampling for the model to use.
         :param top_k: The Top-K sampling for the model to use.
-        :param stream: Option to stream the API response
         :return: A dictionary containing the chat response.
         """
 
@@ -110,11 +138,18 @@ def create(
             messages,
             input,
             output,
+            frequency_penalty,
+            logit_bias,
             max_completion_tokens,
+            parallel_tool_calls,
+            presence_penalty,
+            stop,
+            stream,
             temperature,
+            tool_choice,
+            tools,
             top_p,
-            top_k,
-            stream,
+            top_k
         )
 
         # Run _generate_chat
@@ -128,11 +163,18 @@ def _generate_chat(
         messages,
         input,
         output,
+        frequency_penalty,
+        logit_bias,
         max_completion_tokens,
+        parallel_tool_calls,
+        presence_penalty,
+        stop,
+        stream,
         temperature,
+        tool_choice,
+        tools,
         top_p,
         top_k,
-        stream,
     ):
         """
         Function to generate a single chat response.
@@ -257,11 +299,18 @@ def stream_generator(url, headers, payload, stream):
         payload_dict = {
             "model": model,
             "messages": messages,
+            "frequency_penalty": frequency_penalty,
+            "logit_bias": logit_bias,
             "max_completion_tokens": max_completion_tokens,
+            "parallel_tool_calls": parallel_tool_calls,
+            "presence_penalty": presence_penalty,
+            "stop": stop,
+            "stream": stream,
             "temperature": temperature,
+            "tool_choice": tool_choice,
+            "tools": tools,
             "top_p": top_p,
             "top_k": top_k,
-            "stream": stream,
         }
 
         if input:
diff --git a/predictionguard/src/completions.py b/predictionguard/src/completions.py
index 845baa1..9a03920 100644
--- a/predictionguard/src/completions.py
+++ b/predictionguard/src/completions.py
@@ -22,8 +22,14 @@ def create(
         prompt: Union[str, List[str]],
         input: Optional[Dict[str, Any]] = None,
         output: Optional[Dict[str, Any]] = None,
+        echo: Optional[bool] = None,
+        frequency_penalty: Optional[float] = None,
+        logit_bias: Optional[Dict[str, int]] = None,
         max_completion_tokens: Optional[int] = 100,
         max_tokens: Optional[int] = None,
+        presence_penalty: Optional[float] = None,
+        stop: Optional[Union[str, List[str]]] = None,
+        stream: Optional[bool] = False,
         temperature: Optional[float] = 1.0,
         top_p: Optional[float] = 0.99,
         top_k: Optional[int] = 50
@@ -35,7 +41,13 @@ def create(
         :param prompt: The prompt(s) to generate completions for.
         :param input: A dictionary containing the PII and injection arguments.
         :param output: A dictionary containing the consistency, factuality, and toxicity arguments.
+        :param echo: A boolean indicating whether to echo the prompt(s) to the output.
+        :param frequency_penalty: The frequency penalty to use.
+        :param logit_bias: The logit bias to use.
         :param max_completion_tokens: The maximum number of tokens to generate in the completion(s).
+        :param presence_penalty: The presence penalty to use.
+        :param stop: The completion stopping criteria.
+        :param stream: The stream to use for HTTP requests.
         :param temperature: The sampling temperature to use.
         :param top_p: The nucleus sampling probability to use.
         :param top_k: The Top-K sampling for the model to use.
@@ -53,7 +65,22 @@ def create(
 
         # Create a list of tuples, each containing all the parameters for
         # a call to _generate_completion
-        args = (model, prompt, input, output, max_completion_tokens, temperature, top_p, top_k)
+        args = (
+            model,
+            prompt,
+            input,
+            output,
+            echo,
+            frequency_penalty,
+            logit_bias,
+            max_completion_tokens,
+            presence_penalty,
+            stop,
+            stream,
+            temperature,
+            top_p,
+            top_k
+        )
 
         # Run _generate_completion
         choices = self._generate_completion(*args)
@@ -61,14 +88,76 @@ def create(
         return choices
 
     def _generate_completion(
-        self, model, prompt,
-        input, output, max_completion_tokens,
-        temperature, top_p, top_k
+        self,
+        model,
+        prompt,
+        input,
+        output,
+        echo,
+        frequency_penalty,
+        logit_bias,
+        max_completion_tokens,
+        presence_penalty,
+        stop,
+        stream,
+        temperature,
+        top_p,
+        top_k
     ):
         """
         Function to generate a single completion.
         """
 
+        def return_dict(url, headers, payload):
+            response = requests.request(
+                "POST", url + "/completions", headers=headers, data=payload
+            )
+            # If the request was successful, print the proxies.
+            if response.status_code == 200:
+                ret = response.json()
+                return ret
+            elif response.status_code == 429:
+                raise ValueError(
+                    "Could not connect to Prediction Guard API. "
+                    "Too many requests, rate limit or quota exceeded."
+            )
+            else:
+                # Check if there is a json body in the response. Read that in,
+                # then print out the error field in the json body, and raise an exception.
+                err = ""
+                try:
+                    err = response.json()["error"]
+                except Exception:
+                    pass
+                raise ValueError("Could not make prediction. " + err)
+
+        def stream_generator(url, headers, payload, stream):
+            with requests.post(
+                url + "/completions",
+                headers=headers,
+                data=payload,
+                stream=stream,
+            ) as response:
+                response.raise_for_status()
+
+                for line in response.iter_lines():
+                    if line:
+                        decoded_line = line.decode("utf-8")
+                        formatted_return = (
+                            "{" + (decoded_line.replace("data", '"data"', 1)) + "}"
+                        )
+                        try:
+                            dict_return = json.loads(formatted_return)
+                        except json.decoder.JSONDecodeError:
+                            pass
+                        else:
+                            try:
+                                dict_return["data"]["choices"][0]["text"]
+                            except KeyError:
+                                pass
+                            else:
+                                yield dict_return
+
         # Make a prediction using the proxy.
         headers = {
             "Content-Type": "application/json",
@@ -79,7 +168,13 @@ def _generate_completion(
         payload_dict = {
             "model": model,
             "prompt": prompt,
+            "echo": echo,
+            "frequency_penalty": frequency_penalty,
+            "logit_bias": logit_bias,
             "max_completion_tokens": max_completion_tokens,
+            "presence_penalty": presence_penalty,
+            "stop": stop,
+            "stream": stream,
             "temperature": temperature,
             "top_p": top_p,
             "top_k": top_k
@@ -87,30 +182,19 @@ def _generate_completion(
         if input:
             payload_dict["input"] = input
         if output:
-            payload_dict["output"] = output
+            if stream:
+                raise ValueError(
+                    "Factuality and toxicity checks are not supported when streaming is enabled."
+                )
+            else:
+                payload_dict["output"] = output
         payload = json.dumps(payload_dict)
 
-        response = requests.request(
-            "POST", self.url + "/completions", headers=headers, data=payload
-        )
-        # If the request was successful, print the proxies.
-        if response.status_code == 200:
-            ret = response.json()
-            return ret
-        elif response.status_code == 429:
-            raise ValueError(
-                "Could not connect to Prediction Guard API. "
-                "Too many requests, rate limit or quota exceeded."
-            )
+        if stream:
+            return stream_generator(self.url, headers, payload, stream)
+
         else:
-            # Check if there is a json body in the response. Read whether the API response should be streamed in,
-            # print out the error field in the json body, and raise an exception.
-            err = ""
-            try:
-                err = response.json()["error"]
-            except Exception:
-                pass
-            raise ValueError("Could not make prediction. " + err)
+            return return_dict(self.url, headers, payload)
 
     def list_models(self) -> List[str]:
         # Get the list of current models.
diff --git a/predictionguard/src/documents.py b/predictionguard/src/documents.py
new file mode 100644
index 0000000..69b3ae9
--- /dev/null
+++ b/predictionguard/src/documents.py
@@ -0,0 +1,89 @@
+import json
+from pyexpat import model
+
+import requests
+from typing import Any, Dict, Optional
+
+from ..version import __version__
+
+
+class Documents:
+    """Documents allows you to extract text from various document file types.
+
+    Usage::
+
+        from predictionguard import PredictionGuard
+
+        # Set your Prediction Guard token as an environmental variable.
+        os.environ["PREDICTIONGUARD_API_KEY"] = "<api key>"
+
+        client = PredictionGuard()
+
+        response = client.documents.extract.create(
+            file="sample.pdf"
+        )
+
+        print(json.dumps(response, sort_keys=True, indent=4, separators=(",", ": ")))
+    """
+
+    def __init__(self, api_key, url):
+        self.api_key = api_key
+        self.url = url
+
+        self.extract: DocumentsExtract = DocumentsExtract(self.api_key, self.url)
+
+class DocumentsExtract:
+    def __init__(self, api_key, url):
+        self.api_key = api_key
+        self.url = url
+
+    def create(
+        self,
+        file: str
+    ) -> Dict[str, Any]:
+        """
+        Creates a documents request to the Prediction Guard /documents/extract API
+
+        :param file: Document to be parsed
+        :result: A dictionary containing the title, content, and length of the document.
+        """
+
+        # Run _extract_documents
+        choices = self._extract_documents(file)
+        return choices
+
+    def _extract_documents(self, file):
+        """
+        Function to extract a document.
+        """
+
+        headers = {
+            "Authorization": "Bearer " + self.api_key,
+            "User-Agent": "Prediction Guard Python Client: " + __version__,
+        }
+
+        with open(file, "rb") as doc_file:
+            files = {"file": (file, doc_file)}
+
+            response = requests.request(
+                "POST", self.url + "/documents/extract", headers=headers, files=files
+            )
+
+        # If the request was successful, print the proxies.
+        if response.status_code == 200:
+            ret = response.json()
+            return ret
+        elif response.status_code == 429:
+            raise ValueError(
+                "Could not connect to Prediction Guard API. "
+                "Too many requests, rate limit or quota exceeded."
+            )
+        else:
+            # Check if there is a json body in the response. Read that in,
+            # print out the error field in the json body, and raise an exception.
+            err = ""
+            try:
+                err = response.json()["error"]
+            except Exception:
+                pass
+            raise ValueError("Could not extract document. " + err)
\ No newline at end of file
diff --git a/predictionguard/src/translate.py b/predictionguard/src/translate.py
index 43c59be..336a12f 100644
--- a/predictionguard/src/translate.py
+++ b/predictionguard/src/translate.py
@@ -7,6 +7,28 @@
 
 
 class Translate:
+    # UNCOMMENT WHEN DEPRECATED
+    # """No longer supported.
+    # """
+    #
+    # def __init__(self, api_key, url):
+    #     self.api_key = api_key
+    #     self.url = url
+    #
+    # def create(
+    #         self,
+    #         text: Optional[str],
+    #         source_lang: Optional[str],
+    #         target_lang: Optional[str],
+    #         use_third_party_engine: Optional[bool] = False
+    #     ) -> Dict[str, Any]:
+    #     """
+    #     No longer supported
+    #     """
+    #
+    #     raise ValueError(
+    #         "The translate functionality is no longer supported."
+    #     )
     """Translate converts text from one language to another.
 
     Usage::
@@ -28,6 +50,7 @@ class Translate:
         print(json.dumps(response, sort_keys=True, indent=4, separators=(",", ": ")))
     """
 
+    # REMOVE BELOW HERE FOR DEPRECATION
     def __init__(self, api_key, url):
         self.api_key = api_key
         self.url = url
@@ -96,4 +119,4 @@ def _generate_translation(self, text, source_lang, target_lang, use_third_party_
                 err = response.json()["error"]
             except Exception:
                 pass
-            raise ValueError("Could not make translation. " + err)
+            raise ValueError("Could not make translation. " + err)
\ No newline at end of file
diff --git a/predictionguard/version.py b/predictionguard/version.py
index 9c132bc..c77fef4 100644
--- a/predictionguard/version.py
+++ b/predictionguard/version.py
@@ -1,2 +1,2 @@
 # Setting the package version
-__version__ = "2.7.1"
+__version__ = "2.8.0"
diff --git a/pyproject.toml b/pyproject.toml
index 31718e0..da06461 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,10 @@ build-backend = "flit_core.buildapi"
 
 [project]
 name = "predictionguard"
-authors = [{name = "Daniel Whitenack", email = "dan@predictionguard.com"}]
+authors = [
+    {name = "Daniel Whitenack", email = "dan@predictionguard.com"},
+    {name = "Jacob Mansdorfer", email = "jacob@predictionguard.com"}
+    ]
 readme = "README.md"
 license = {file = "LICENSE"}
 classifiers = ["License :: OSI Approved :: MIT License"]
diff --git a/tests/test_audio.py b/tests/test_audio.py
new file mode 100644
index 0000000..d95ccf9
--- /dev/null
+++ b/tests/test_audio.py
@@ -0,0 +1,16 @@
+import os
+
+from predictionguard import PredictionGuard
+
+
+def test_audio_transcribe_success():
+    test_client = PredictionGuard()
+
+    response = test_client.audio.transcriptions.create(
+        model="base",
+        file="fixtures/test_audio.wav"
+    )
+
+    print(response)
+
+    assert len(response["text"]) > 0
\ No newline at end of file
diff --git a/tests/test_chat.py b/tests/test_chat.py
index 7c45a71..c4c0a13 100644
--- a/tests/test_chat.py
+++ b/tests/test_chat.py
@@ -187,6 +187,21 @@ def test_chat_completions_create_vision_stream_fail():
             response_list.append(res)
 
 
+def test_chat_completions_create_tool_call():
+    test_client = PredictionGuard()
+
+    response = test_client.chat.completions.create(
+        model=os.environ["TEST_MODEL_NAME"],
+        messages=[
+            {"role": "system", "content": "You are a helpful chatbot."},
+            {"role": "user", "content": "Tell me a joke."},
+        ],
+
+    )
+
+    assert len(response["choices"][0]["message"]["content"]) > 0
+
+
 def test_chat_completions_list_models():
     test_client = PredictionGuard()
 
diff --git a/tests/test_completions.py b/tests/test_completions.py
index 778583b..f3adcb7 100644
--- a/tests/test_completions.py
+++ b/tests/test_completions.py
@@ -1,5 +1,7 @@
 import os
 
+import pytest
+
 from predictionguard import PredictionGuard
 
 
@@ -33,3 +35,35 @@ def test_completions_list_models():
 
     assert len(response) > 0
     assert type(response[0]) is str
+
+
+def test_completions_create_stream():
+    test_client = PredictionGuard()
+
+    response_list = []
+    for res in test_client.completions.create(
+        model=os.environ["TEST_MODEL_NAME"],
+        prompt="Tell me a joke.",
+        stream=True,
+    ):
+        response_list.append(res)
+
+    assert len(response_list) > 1
+
+
+def test_completions_create_stream_output_fail():
+    test_client = PredictionGuard()
+
+    streaming_error = "Factuality and toxicity checks are not supported when streaming is enabled.".replace(
+        "\n", ""
+    )
+
+    response_list = []
+    with pytest.raises(ValueError, match=streaming_error):
+        for res in test_client.completions.create(
+            model=os.environ["TEST_MODEL_NAME"],
+            prompt="Tell me a joke.",
+            stream=True,
+            output={"toxicity": True},
+        ):
+            response_list.append(res)
\ No newline at end of file
diff --git a/tests/test_documents.py b/tests/test_documents.py
new file mode 100644
index 0000000..c877c0e
--- /dev/null
+++ b/tests/test_documents.py
@@ -0,0 +1,13 @@
+import os
+
+from predictionguard import PredictionGuard
+
+
+def test_documents_extract_success():
+    test_client = PredictionGuard()
+
+    response = test_client.documents.extract.create(
+        file="fixtures/test_pdf.pdf"
+    )
+
+    assert len(response["contents"]) > 0
\ No newline at end of file
diff --git a/tests/test_translate.py b/tests/test_translate.py
index a0b4372..ee16e46 100644
--- a/tests/test_translate.py
+++ b/tests/test_translate.py
@@ -9,4 +9,4 @@ def test_translate_create():
     )
 
     assert type(response["best_score"]) is float
-    assert len(response["best_translation"])
+    assert len(response["best_translation"])
\ No newline at end of file