Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,12 @@
.idea
venv
.venv
*.db
*.dbsrc/inputs/*.pdf
src/outputs/*.pdf
src/inputs/*.pdf
src/outputs/*.pdf
fireform.db
*.bak
ngrok.exe
out.txt
benchmark_proof.py
20 changes: 17 additions & 3 deletions api/db/repositories.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,33 @@
from sqlmodel import Session, select
from api.db.models import Template, FormSubmission

# Templates

# ── Templates ─────────────────────────────────────────────────

def create_template(session: Session, template: Template) -> Template:
session.add(template)
session.commit()
session.refresh(template)
return template


def get_template(session: Session, template_id: int) -> Template | None:
return session.get(Template, template_id)

# Forms

def get_all_templates(session: Session, limit: int = 100, offset: int = 0) -> list[Template]:
statement = select(Template).offset(offset).limit(limit)
return session.exec(statement).all()


# ── Forms ─────────────────────────────────────────────────────

def create_form(session: Session, form: FormSubmission) -> FormSubmission:
session.add(form)
session.commit()
session.refresh(form)
return form
return form


def get_form(session: Session, submission_id: int) -> FormSubmission | None:
return session.get(FormSubmission, submission_id)
31 changes: 28 additions & 3 deletions api/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,32 @@
from fastapi import FastAPI
from api.routes import templates, forms
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from fastapi.staticfiles import StaticFiles
from api.routes import templates, forms, transcribe
from api.errors.base import AppError
from typing import Union
import os

app = FastAPI()

app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)

@app.exception_handler(AppError)
def app_error_handler(request: Request, exc: AppError):
return JSONResponse(
status_code=exc.status_code,
content={"detail": exc.message}
)

app.include_router(templates.router)
app.include_router(forms.router)
app.include_router(forms.router)
app.include_router(transcribe.router)

# Serve mobile PWA at /mobile
if os.path.exists("mobile"):
app.mount("/mobile", StaticFiles(directory="mobile", html=True), name="mobile")
129 changes: 122 additions & 7 deletions api/routes/forms.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,140 @@
import os
from fastapi import APIRouter, Depends
from fastapi.responses import FileResponse
from sqlmodel import Session
from api.deps import get_db
from api.schemas.forms import FormFill, FormFillResponse
from api.db.repositories import create_form, get_template
from api.schemas.forms import FormFill, FormFillResponse, BatchFormFill, BatchFormFillResponse, BatchResultItem
from api.db.repositories import create_form, get_template, get_form
from api.db.models import FormSubmission
from api.errors.base import AppError
from src.controller import Controller
from src.llm import LLM
from src.filler import Filler

router = APIRouter(prefix="/forms", tags=["forms"])


@router.post("/fill", response_model=FormFillResponse)
def fill_form(form: FormFill, db: Session = Depends(get_db)):
if not get_template(db, form.template_id):
async def fill_form(form: FormFill, db: Session = Depends(get_db)):
template = get_template(db, form.template_id)
if not template:
raise AppError("Template not found", status_code=404)

fetched_template = get_template(db, form.template_id)
if not os.path.exists(template.pdf_path):
raise AppError(f"Template PDF not found: {template.pdf_path}", status_code=404)

try:
# Step 1: LLM Extraction (Async)
llm = LLM(transcript_text=form.input_text, target_fields=template.fields)
await llm.async_main_loop()
extracted_data = llm.get_data()

controller = Controller()
path = controller.fill_form(user_input=form.input_text, fields=fetched_template.fields, pdf_form_path=fetched_template.pdf_path)
# Step 2: PDF Filling (Sync)
# Using filler directly to avoid redundant extraction in controller
filler = Filler()
path = filler.fill_form_with_data(
pdf_form=template.pdf_path,
data=extracted_data
)
except Exception as e:
raise AppError(f"Processing failed: {str(e)}", status_code=500)

if not path or not os.path.exists(path):
raise AppError("PDF generation failed.", status_code=500)

submission = FormSubmission(**form.model_dump(), output_pdf_path=path)
return create_form(db, submission)


@router.post("/fill/batch", response_model=BatchFormFillResponse)
async def fill_batch(batch: BatchFormFill, db: Session = Depends(get_db)):
if not batch.template_ids:
raise AppError("template_ids must not be empty", status_code=400)

templates = []
for tid in batch.template_ids:
tpl = get_template(db, tid)
if not tpl or not os.path.exists(tpl.pdf_path):
raise AppError(f"Template {tid} invalid or PDF missing", status_code=404)
templates.append(tpl)

# Step 1: LLM Extraction (Async - ONE call for all templates)
merged_fields = {}
for tpl in templates:
if isinstance(tpl.fields, dict): merged_fields.update(tpl.fields)
else:
for f in tpl.fields: merged_fields[f] = f

try:
llm = LLM(transcript_text=batch.input_text, target_fields=merged_fields)
await llm.async_main_loop()
extracted_json = llm.get_data()
except Exception as e:
raise AppError(f"Extraction failed: {str(e)}", status_code=500)

# Step 2: PDF Filling (Sync - per template)
results = []
success_count = 0
filler = Filler()

for tpl in templates:
try:
tpl_field_keys = list(tpl.fields.keys()) if isinstance(tpl.fields, dict) else tpl.fields
tpl_data = {k: extracted_json.get(k) for k in tpl_field_keys}

output_path = filler.fill_form_with_data(pdf_form=tpl.pdf_path, data=tpl_data)

submission = FormSubmission(
template_id=tpl.id,
input_text=batch.input_text,
output_pdf_path=output_path
)
saved = create_form(db, submission)

results.append(BatchResultItem(
template_id=tpl.id,
template_name=tpl.name,
success=True,
submission_id=saved.id,
download_url=f"/forms/download/{saved.id}"
))
success_count += 1
except Exception as e:
results.append(BatchResultItem(
template_id=tpl.id,
template_name=tpl.name,
success=False,
error=str(e)
))

return BatchFormFillResponse(
total=len(templates),
succeeded=success_count,
failed=len(templates)-success_count,
results=results
)


@router.get("/{submission_id}", response_model=FormFillResponse)
def get_submission(submission_id: int, db: Session = Depends(get_db)):
submission = get_form(db, submission_id)
if not submission:
raise AppError("Submission not found", status_code=404)
return submission


@router.get("/download/{submission_id}")
def download_filled_pdf(submission_id: int, db: Session = Depends(get_db)):
submission = get_form(db, submission_id)
if not submission:
raise AppError("Submission not found", status_code=404)

file_path = submission.output_pdf_path
if not os.path.exists(file_path):
raise AppError("PDF file not found on server", status_code=404)

return FileResponse(
path=file_path,
media_type="application/pdf",
filename=os.path.basename(file_path)
)
91 changes: 82 additions & 9 deletions api/routes/templates.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,89 @@
from fastapi import APIRouter, Depends
import os
import shutil
import uuid
from fastapi import APIRouter, Depends, UploadFile, File, Form
from sqlmodel import Session
from api.deps import get_db
from api.schemas.templates import TemplateCreate, TemplateResponse
from api.db.repositories import create_template
from api.schemas.templates import TemplateResponse
from api.db.repositories import create_template, get_all_templates
from api.db.models import Template
from src.controller import Controller
from api.errors.base import AppError

router = APIRouter(prefix="/templates", tags=["templates"])

# Save directly into src/inputs/ — stable location, won't get wiped
TEMPLATES_DIR = os.path.join("src", "inputs")
os.makedirs(TEMPLATES_DIR, exist_ok=True)


@router.post("/create", response_model=TemplateResponse)
def create(template: TemplateCreate, db: Session = Depends(get_db)):
controller = Controller()
template_path = controller.create_template(template.pdf_path)
tpl = Template(**template.model_dump(exclude={"pdf_path"}), pdf_path=template_path)
return create_template(db, tpl)
async def create(
name: str = Form(...),
file: UploadFile = File(...),
db: Session = Depends(get_db)
):
# Validate PDF
if not file.filename.endswith(".pdf"):
raise AppError("Only PDF files are allowed", status_code=400)

# Save uploaded file with unique name into src/inputs/
unique_name = f"{uuid.uuid4().hex}_{file.filename}"
save_path = os.path.join(TEMPLATES_DIR, unique_name)

with open(save_path, "wb") as f:
shutil.copyfileobj(file.file, f)

# Extract fields using commonforms + pypdf
# Store as simple list of field name strings — what Filler expects
try:
from commonforms import prepare_form
from pypdf import PdfReader

# Read real field names directly from original PDF
# Use /T (internal name) as both key and label
# Real names like "JobTitle", "Phone Number" are already human-readable
reader = PdfReader(save_path)
raw_fields = reader.get_fields() or {}

fields = {}
for internal_name, field_data in raw_fields.items():
# Use /TU tooltip if available, otherwise prettify /T name
label = None
if isinstance(field_data, dict):
label = field_data.get("/TU")
if not label:
# Prettify: "JobTitle" → "Job Title", "DATE7_af_date" → "Date"
import re
label = re.sub(r'([a-z])([A-Z])', r'\1 \2', internal_name)
label = re.sub(r'_af_.*$', '', label) # strip "_af_date" suffix
label = label.replace('_', ' ').strip().title()
fields[internal_name] = label

except Exception as e:
print(f"Field extraction failed: {e}")
fields = []

# Save to DB
tpl = Template(name=name, pdf_path=save_path, fields=fields)
return create_template(db, tpl)


@router.get("", response_model=list[TemplateResponse])
def list_templates(
limit: int = 100,
offset: int = 0,
db: Session = Depends(get_db)
):
return get_all_templates(db, limit=limit, offset=offset)


@router.get("/{template_id}", response_model=TemplateResponse)
def get_template_by_id(
template_id: int,
db: Session = Depends(get_db)
):
from api.db.repositories import get_template
tpl = get_template(db, template_id)
if not tpl:
raise AppError("Template not found", status_code=404)
return tpl
Loading