Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ RUN pip install --no-cache-dir -r requirements.txt
COPY . .

# Set Python path so imports work correctly
ENV PYTHONPATH=/app/src
ENV PYTHONPATH=/app

# Keep container running for interactive use
CMD ["tail", "-f", "/dev/null"]
3 changes: 3 additions & 0 deletions api/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from fastapi import FastAPI
from api.routes import templates, forms
from api.errors.handlers import register_exception_handlers

app = FastAPI()

register_exception_handlers(app)

app.include_router(templates.router)
app.include_router(forms.router)
5 changes: 4 additions & 1 deletion api/routes/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@ def fill_form(form: FormFill, db: Session = Depends(get_db)):
fetched_template = get_template(db, form.template_id)

controller = Controller()
path = controller.fill_form(user_input=form.input_text, fields=fetched_template.fields, pdf_form_path=fetched_template.pdf_path)
try:
path = controller.fill_form(user_input=form.input_text, fields=fetched_template.fields, pdf_form_path=fetched_template.pdf_path)
except FileNotFoundError:
raise AppError("Template PDF file not found on disk", status_code=404)

submission = FormSubmission(**form.model_dump(), output_pdf_path=path)
return create_form(db, submission)
Expand Down
9 changes: 4 additions & 5 deletions api/schemas/forms.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
from pydantic import BaseModel
from pydantic import BaseModel, ConfigDict

class FormFill(BaseModel):
template_id: int
input_text: str


class FormFillResponse(BaseModel):
model_config = ConfigDict(from_attributes=True)

id: int
template_id: int
input_text: str
output_pdf_path: str

class Config:
from_attributes = True
output_pdf_path: str
9 changes: 4 additions & 5 deletions api/schemas/templates.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
from pydantic import BaseModel
from pydantic import BaseModel, ConfigDict

class TemplateCreate(BaseModel):
name: str
pdf_path: str
fields: dict

class TemplateResponse(BaseModel):
model_config = ConfigDict(from_attributes=True)

id: int
name: str
pdf_path: str
fields: dict

class Config:
from_attributes = True
fields: dict
3 changes: 1 addition & 2 deletions src/file_manipulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ def fill_form(self, user_input: str, fields: list, pdf_form_path: str):
print(f"[2] PDF template path: {pdf_form_path}")

if not os.path.exists(pdf_form_path):
print(f"Error: PDF template not found at {pdf_form_path}")
return None # Or raise an exception
raise FileNotFoundError(f"PDF template not found at {pdf_form_path}")

print("[3] Starting extraction and PDF filling process...")
try:
Expand Down
11 changes: 4 additions & 7 deletions src/filler.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from pdfrw import PdfReader, PdfWriter
from src.llm import LLM
from datetime import datetime
Expand All @@ -12,12 +13,8 @@ def fill_form(self, pdf_form: str, llm: LLM):
Fill a PDF form with values from user_input using LLM.
Fields are filled in the visual order (top-to-bottom, left-to-right).
"""
output_pdf = (
pdf_form[:-4]
+ "_"
+ datetime.now().strftime("%Y%m%d_%H%M%S")
+ "_filled.pdf"
)
base, _ = os.path.splitext(pdf_form)
output_pdf = base + "_" + datetime.now().strftime("%Y%m%d_%H%M%S") + "_filled.pdf"

# Generate dictionary of answers from your original function
t2j = llm.main_loop()
Expand All @@ -29,13 +26,13 @@ def fill_form(self, pdf_form: str, llm: LLM):
pdf = PdfReader(pdf_form)

# Loop through pages
i = 0
for page in pdf.pages:
if page.Annots:
sorted_annots = sorted(
page.Annots, key=lambda a: (-float(a.Rect[1]), float(a.Rect[0]))
)

i = 0
for annot in sorted_annots:
if annot.Subtype == "/Widget" and annot.T:
if i < len(answers_list):
Expand Down
14 changes: 11 additions & 3 deletions src/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,18 @@ def main_loop(self):
}

try:
response = requests.post(ollama_url, json=payload)
response = requests.post(ollama_url, json=payload, timeout=60)
response.raise_for_status()
except requests.exceptions.ConnectionError:
raise ConnectionError(
f"Could not connect to Ollama at {ollama_url}. "
"Please ensure Ollama is running and accessible."
)
except requests.exceptions.Timeout:
raise TimeoutError(
f"Ollama request timed out after 60 seconds at {ollama_url}. "
"The model may be overloaded or unavailable."
)
except requests.exceptions.HTTPError as e:
raise RuntimeError(f"Ollama returned an error: {e}")

Expand Down Expand Up @@ -97,8 +102,11 @@ def add_response_to_json(self, field, value):
if ";" in value:
parsed_value = self.handle_plural_values(value)

if field in self._json.keys():
self._json[field].append(parsed_value)
if field in self._json:
existing = self._json[field]
if not isinstance(existing, list):
existing = [existing]
self._json[field] = existing + [parsed_value]
else:
self._json[field] = parsed_value

Expand Down
5 changes: 3 additions & 2 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
# from backend import Fill
from commonforms import prepare_form
from typing import Union
# from backend import Fill
from commonforms import prepare_form
from pypdf import PdfReader
from controller import Controller

Expand Down
Empty file added tests/__init__.py
Empty file.