Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 23 additions & 21 deletions backend/app/background_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@
import requests
import string
import json
import httpx

MODEL_SIZE = os.getenv("WHISPER_MODEL", "base")


def load_whisper():
preferred = os.getenv("WHISPER_DEVICE") # cuda | metal | cpu (optional)

Expand Down Expand Up @@ -39,10 +41,11 @@ def load_whisper():

raise RuntimeError(f"Whisper init failed. Last error: {last_err}")


whisper_model = load_whisper()


def process_file(original_path: str, patch_duration_sec: int, overlap_sec: int):
async def process_file(original_path: str, patch_duration_sec: int, overlap_sec: int):
# Import db session inside background task
with tempfile.TemporaryDirectory() as tmpdir:
# Work on a copy
Expand All @@ -57,29 +60,31 @@ def process_file(original_path: str, patch_duration_sec: int, overlap_sec: int):
audio_path = working_path

# Split audio
patches = split_audio_to_patches(str(audio_path), patch_duration_sec, overlap_sec)
patches = await split_audio_to_patches(str(audio_path), patch_duration_sec, overlap_sec)

# Transcribe
all_results = transcribe_patches(patches, whisper_model)
all_results = await transcribe_patches(patches, whisper_model)

return all_results


def send_to_llm(transcribed_text: str, default_definitions: list = None, positive_examples: list = None, negative_examples: list = None):
async def send_to_llm(transcribed_text: str, default_definitions: list = None, positive_examples: list = None, negative_examples: list = None):
"""Send transcribed text to LLM for analysis"""
response = requests.post(
"http://localhost:8001/detect",
json={
"transcription": transcribed_text,
"default_definitions": default_definitions or [],
"positive_examples": positive_examples or [],
"negative_examples": negative_examples or []
}
)
async with httpx.AsyncClient() as client:
response = await client.post(
"http://localhost:8001/detect",
json={
"transcription": transcribed_text,
"default_definitions": default_definitions or [],
"positive_examples": positive_examples or [],
"negative_examples": negative_examples or []
},
timeout=1800
)

result = response.json()
result = response.json()

return result
return result


def normalize_word(word):
Expand Down Expand Up @@ -118,17 +123,15 @@ def find_matching_spans(transcribed_patches: dict, llm_spans: list):
return processed_spans


def main_background_function(job_id: str, original_path: str, patch_duration_sec: int, overlap_sec: int, db: Session, default_definitions: list = None, positive_examples: list = None, negative_examples: list = None):

async def main_background_function(job_id: str, original_path: str, patch_duration_sec: int, overlap_sec: int, db: Session, default_definitions: list = None, positive_examples: list = None, negative_examples: list = None):
job = db.get(Job, job_id)
job.status = "transcribing"
db.commit()
db.expire_all()

transcribed_patches = process_file(original_path, patch_duration_sec, overlap_sec)
transcribed_patches = await process_file(original_path, patch_duration_sec, overlap_sec)

print(f"Got {len(transcribed_patches)} batches")


cleaned_list = []

Expand All @@ -149,11 +152,10 @@ def main_background_function(job_id: str, original_path: str, patch_duration_sec

for i, batch in enumerate(cleaned_list):
print(f"Evaluating {i + 1}/{len(transcribed_patches)} ")
result_from_llm = send_to_llm(batch, default_definitions, positive_examples, negative_examples)
result_from_llm = await send_to_llm(batch, default_definitions, positive_examples, negative_examples)
llm_spans = result_from_llm["spans"]
processed_spans = find_matching_spans(transcribed_patches[i], llm_spans)
all_processed_spans.extend(processed_spans)


# Read the final transcribed text from file
with open(Path(original_path).with_suffix('.txt'), 'r', encoding='utf-8') as f:
Expand Down
4 changes: 2 additions & 2 deletions backend/app/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def convert_video_to_audio(video_path: str, output_audio_path: str) -> str:
raise


def split_audio_to_patches(audio_path: str, patch_duration_sec: int = 120, overlap_sec: int = 30):
async def split_audio_to_patches(audio_path: str, patch_duration_sec: int = 120, overlap_sec: int = 30):
print(f"[INFO] Loading audio for patching: {audio_path}")
y, sr = librosa.load(audio_path, sr=None)
total_duration = librosa.get_duration(y=y, sr=sr)
Expand All @@ -42,7 +42,7 @@ def split_audio_to_patches(audio_path: str, patch_duration_sec: int = 120, overl
return patches


def transcribe_patches(patches, model):
async def transcribe_patches(patches, model):
all_results = []
for i, patch_path in enumerate(patches):
print(f"[INFO] Transcribing patch {i}: {patch_path}")
Expand Down
24 changes: 13 additions & 11 deletions backend/app/upload_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from typing import List, Optional
from app.models import Job, Batch
from app.background_tasks import main_background_function
import asyncio

router = routing.APIRouter()

Expand Down Expand Up @@ -51,7 +52,7 @@ def extract_audio_from_zip(zip_file: UploadFile, extract_dir: Path) -> List[Path

@router.post("")
async def upload_batch(
background_tasks: BackgroundTasks,
# background_tasks: BackgroundTasks,
name: str = Form(...),
description: Optional[str] = Form(None),
default_definitions: str = Form("[]"),
Expand Down Expand Up @@ -141,16 +142,17 @@ async def upload_batch(
db.refresh(job)

# Launch background task for each job
background_tasks.add_task(
main_background_function,
str(job.id),
str(file_path),
patch_duration_sec,
overlap_sec,
db,
default_defs_list,
positive_examples_list,
negative_examples_list
asyncio.create_task(
main_background_function(
str(job.id),
str(file_path),
patch_duration_sec,
overlap_sec,
db,
default_defs_list,
positive_examples_list,
negative_examples_list
)
)

return {"batch_id": batch.id}
3 changes: 2 additions & 1 deletion backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ python-multipart
ffmpeg-python==0.2.0
pydub==0.25.1
librosa
soundfile
soundfile
httpx