TLBC-pl · llamaonsecurity · Nov 24, 2025 · Jun 13, 2025 · Jun 13, 2025
diff --git a/.gitignore b/.gitignore
@@ -110,3 +110,4 @@ cython_debug/
 /recorded_audio
 LOGI POSZLO.txt
 test.txt
+*.txt
diff --git a/core/config.py b/core/config.py
@@ -38,6 +38,11 @@ def __init__(self) -> None:
             "GPT_EVALUATION_MODEL",
             "gpt-4o-mini",
         )
+        self.openai_tts_char_limit: Final[int] = int(
+            os.getenv(
+                "OPENAI_TTS_CHAR_LIMIT",
+                "4096",
+            ))
 
         self.microphone_name: Final[
             Optional[str]] = self.__resolve_microphone_name()

diff --git a/requirements.txt b/requirements.txt
@@ -5,4 +5,6 @@ sounddevice~=0.5.1
 openai~=1.71.0
 pydantic~=2.11.2
 numpy~=2.2.4
-soundfile~=0.13.1
+soundfile~=0.13.1
+audioop-lts~=0.2.1
+pydub~=0.25.1
diff --git a/services/tts_service.py b/services/tts_service.py
@@ -1,20 +1,31 @@
-"""Text-to-speech service using OpenAI TTS API."""
+"""Text-to-speech service using the OpenAI TTS API.
+
+This module provides a service to convert long texts into audio files
+by chunking the text, generating audio for each chunk, and concatenating
+the results.
+"""
 import hashlib
 import logging
 import shutil
+import tempfile
 from pathlib import Path
-from typing import Final
+from typing import Final, List
 
 from openai import OpenAI
+from pydub import AudioSegment
 
 from core.config import config
 
 
 class TTSService:
-    """Service for text-to-speech conversion using OpenAI TTS API."""
+    """Manages text-to-speech conversion, handling API limits gracefully."""
 
     def __init__(self) -> None:
-        """Initialize the TTS service."""
+        """Initializes the TTS service and the OpenAI client.
+
+        Raises:
+            ValueError: If the OPENAI_API_KEY is not set in the environment.
+        """
         self.__logger = logging.getLogger(self.__class__.__name__)
         if not config.openai_api_key:
             raise ValueError("OPENAI_API_KEY is not set in the environment.")
@@ -23,40 +34,139 @@ def __init__(self) -> None:
         self.__model: Final[str] = config.openai_tts_model
         self.__voice: Final[str] = config.openai_tts_voice
         self.__format: Final[str] = config.openai_tts_output_format
+        self.__api_char_limit: Final[int] = config.openai_tts_char_limit
+
+    def __chunk_text(self, text: str) -> List[str]:
+        """Splits a long text into chunks that respect the API character limit.
+
+        The method splits text primarily by sentences, then falls back to new
+        lines or spaces to ensure no chunk exceeds the limit.
+
+        Args:
+            text (str): The input text to be split.
+
+        Returns:
+            List[str]: A list of text chunks, each smaller than the API limit.
+        """
+        chunks = []
+        current_chunk = ""
+        sentences = text.replace("!", "!.").replace("?", "?. ").split(". ")
+
+        for sentence in sentences:
+            if not sentence:
+                continue
+
+            if len(current_chunk) + len(sentence) + 1 > self.__api_char_limit:
+                if current_chunk:
+                    chunks.append(current_chunk.strip())
+                current_chunk = sentence + ". "
+            else:
+                current_chunk += sentence + ". "
+
+        if current_chunk:
+            chunks.append(current_chunk.strip())
+
+        final_chunks = []
+        for chunk in chunks:
+            if len(chunk) > self.__api_char_limit:
+                while len(chunk) > self.__api_char_limit:
+                    split_pos = chunk.rfind(" ", 0, self.__api_char_limit)
+                    if split_pos == -1:
+                        split_pos = self.__api_char_limit
+                    final_chunks.append(chunk[:split_pos])
+                    chunk = chunk[split_pos:]
+            final_chunks.append(chunk)
+
+        return [c for c in final_chunks if c]
+
+    def __generate_chunk_audio(self, text_chunk: str, file_path: Path) -> None:
+        """Generates an audio file for a single text chunk via OpenAI API.
+
+        Args:
+            text_chunk (str): The text chunk to convert to speech.
+            file_path (Path): The path to save the generated audio file.
+
+        Raises:
+            Exception: Propagates exceptions from the OpenAI API client.
+        """
+        with self.__client.audio.speech.with_streaming_response.create(
+                model=self.__model,
+                voice=self.__voice,
+                input=text_chunk,
+                response_format=self.__format,
+        ) as response:
+            response.stream_to_file(file_path)
+
+    def __process_chunks(self, text_chunks: List[str]) -> AudioSegment:
+        """Generates and concatenates audio for a list of text chunks.
+
+        This method iterates through text chunks, generating audio for each
+        in a temporary directory, and then combines them into a single
+        AudioSegment.
+
+        Args:
+            text_chunks (List[str]): A list of text chunks to process.
+
+        Returns:
+            AudioSegment: A pydub AudioSegment with the combined audio.
+
+        Raises:
+            RuntimeError: If audio generation results in no processable
+                segments.
+        """
+        audio_segments = []
+        with tempfile.TemporaryDirectory() as temp_dir:
+            temp_path = Path(temp_dir)
+            for i, chunk in enumerate(text_chunks):
+                chunk_file_path = temp_path / f"chunk_{i}.{self.__format}"
+                log_msg = f"Generating audio for chunk {i+1}/{len(text_chunks)}"
+                self.__logger.info(log_msg)
+
+                self.__generate_chunk_audio(chunk, chunk_file_path)
+                segment = AudioSegment.from_file(chunk_file_path,
+                                                 format=self.__format)
+                audio_segments.append(segment)
+
+        if not audio_segments:
+            raise RuntimeError("Audio generation resulted in no segments.")
+
+        self.__logger.info("Concatenating audio chunks...")
+        return sum(audio_segments)
 
     async def generate_audio(self, text: str, output_path: Path) -> bool:
-        """Generate audio from text using OpenAI TTS API.
+        """Generates an audio file from text, using cache if available.
+
+        This is the main public method. It checks for a cached version of the
+        audio first. If not found, it chunks the text, generates audio
+        for each part, combines them, saves the final file, and caches it.
 
         Args:
-            text: Text to convert to speech.
-            output_path: Path where to save the generated audio.
+            text (str): The full text to be converted to speech.
+            output_path (Path): The path to save the final audio file.
 
         Returns:
-            True if cached audio was used, False if new audio was generated.
+            bool: True if a cached audio file was used, False otherwise.
 
         Raises:
-            Exception: If audio generation fails.
+            Exception: If any part of the audio generation or file handling
+                fails.
         """
         output_path.parent.mkdir(parents=True, exist_ok=True)
-        # noinspection PyTypeChecker
         prompt_hash: str = hashlib.sha256(text.encode("utf-8")).hexdigest()
-        cache_file: Path = (output_path.parent /
-                            f"jailbreak_prompt_{prompt_hash}.{self.__format}")
+        cache_file_name = f"jailbreak_prompt_{prompt_hash}.{self.__format}"
+        cache_file: Path = output_path.parent / cache_file_name
 
         if cache_file.exists():
             shutil.copy(cache_file, output_path)
             return True
 
+        text_chunks = self.__chunk_text(text)
         try:
-            with self.__client.audio.speech.with_streaming_response.create(
-                    model=self.__model,
-                    voice=self.__voice,
-                    input=text,
-                    response_format=self.__format,
-            ) as response:
-                response.stream_to_file(output_path)
-            shutil.copy(output_path, cache_file)
-            return False
+            combined_audio = self.__process_chunks(text_chunks)
+            combined_audio.export(output_path, format=self.__format)
         except Exception:
             self.__logger.exception("Failed to generate audio with OpenAI TTS.")
             raise
+
+        shutil.copy(output_path, cache_file)
+        return False