From 489f1e4ee6cca739c9be6d6240683983a07b2b07 Mon Sep 17 00:00:00 2001 From: vidhip222 Date: Fri, 20 Mar 2026 23:49:11 -0400 Subject: [PATCH 1/2] fix: missing Union import, hardcoded extension, multi-page index reset, Ollama timeout --- src/filler.py | 11 ++++------- src/llm.py | 7 ++++++- src/main.py | 5 +++-- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/filler.py b/src/filler.py index e31e535..5eea8b7 100644 --- a/src/filler.py +++ b/src/filler.py @@ -1,3 +1,4 @@ +import os from pdfrw import PdfReader, PdfWriter from src.llm import LLM from datetime import datetime @@ -12,12 +13,8 @@ def fill_form(self, pdf_form: str, llm: LLM): Fill a PDF form with values from user_input using LLM. Fields are filled in the visual order (top-to-bottom, left-to-right). """ - output_pdf = ( - pdf_form[:-4] - + "_" - + datetime.now().strftime("%Y%m%d_%H%M%S") - + "_filled.pdf" - ) + base, _ = os.path.splitext(pdf_form) + output_pdf = base + "_" + datetime.now().strftime("%Y%m%d_%H%M%S") + "_filled.pdf" # Generate dictionary of answers from your original function t2j = llm.main_loop() @@ -29,13 +26,13 @@ def fill_form(self, pdf_form: str, llm: LLM): pdf = PdfReader(pdf_form) # Loop through pages + i = 0 for page in pdf.pages: if page.Annots: sorted_annots = sorted( page.Annots, key=lambda a: (-float(a.Rect[1]), float(a.Rect[0])) ) - i = 0 for annot in sorted_annots: if annot.Subtype == "/Widget" and annot.T: if i < len(answers_list): diff --git a/src/llm.py b/src/llm.py index 70937f9..1166e63 100644 --- a/src/llm.py +++ b/src/llm.py @@ -60,13 +60,18 @@ def main_loop(self): } try: - response = requests.post(ollama_url, json=payload) + response = requests.post(ollama_url, json=payload, timeout=60) response.raise_for_status() except requests.exceptions.ConnectionError: raise ConnectionError( f"Could not connect to Ollama at {ollama_url}. " "Please ensure Ollama is running and accessible." ) + except requests.exceptions.Timeout: + raise TimeoutError( + f"Ollama request timed out after 60 seconds at {ollama_url}. " + "The model may be overloaded or unavailable." + ) except requests.exceptions.HTTPError as e: raise RuntimeError(f"Ollama returned an error: {e}") diff --git a/src/main.py b/src/main.py index 5bb632b..95b4ddc 100644 --- a/src/main.py +++ b/src/main.py @@ -1,6 +1,7 @@ import os -# from backend import Fill -from commonforms import prepare_form +from typing import Union +# from backend import Fill +from commonforms import prepare_form from pypdf import PdfReader from controller import Controller From f8f7f224449a5a8a490b37399bf6b083e03acff0 Mon Sep 17 00:00:00 2001 From: vidhip222 Date: Sun, 22 Mar 2026 00:07:55 -0400 Subject: [PATCH 2/2] fix: register AppError handler, PDF validation, duplicate field crash, Pydantic V2, tests init, Dockerfile PYTHONPATH --- Dockerfile | 2 +- api/main.py | 3 +++ api/routes/forms.py | 5 ++++- api/schemas/forms.py | 9 ++++----- api/schemas/templates.py | 9 ++++----- src/file_manipulator.py | 3 +-- src/llm.py | 7 +++++-- tests/__init__.py | 0 8 files changed, 22 insertions(+), 16 deletions(-) create mode 100644 tests/__init__.py diff --git a/Dockerfile b/Dockerfile index 833fcc3..5001082 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,7 +16,7 @@ RUN pip install --no-cache-dir -r requirements.txt COPY . . # Set Python path so imports work correctly -ENV PYTHONPATH=/app/src +ENV PYTHONPATH=/app # Keep container running for interactive use CMD ["tail", "-f", "/dev/null"] diff --git a/api/main.py b/api/main.py index d0b8c79..4b996ac 100644 --- a/api/main.py +++ b/api/main.py @@ -1,7 +1,10 @@ from fastapi import FastAPI from api.routes import templates, forms +from api.errors.handlers import register_exception_handlers app = FastAPI() +register_exception_handlers(app) + app.include_router(templates.router) app.include_router(forms.router) \ No newline at end of file diff --git a/api/routes/forms.py b/api/routes/forms.py index f3430ed..9abffb5 100644 --- a/api/routes/forms.py +++ b/api/routes/forms.py @@ -17,7 +17,10 @@ def fill_form(form: FormFill, db: Session = Depends(get_db)): fetched_template = get_template(db, form.template_id) controller = Controller() - path = controller.fill_form(user_input=form.input_text, fields=fetched_template.fields, pdf_form_path=fetched_template.pdf_path) + try: + path = controller.fill_form(user_input=form.input_text, fields=fetched_template.fields, pdf_form_path=fetched_template.pdf_path) + except FileNotFoundError: + raise AppError("Template PDF file not found on disk", status_code=404) submission = FormSubmission(**form.model_dump(), output_pdf_path=path) return create_form(db, submission) diff --git a/api/schemas/forms.py b/api/schemas/forms.py index 3cce650..f421513 100644 --- a/api/schemas/forms.py +++ b/api/schemas/forms.py @@ -1,4 +1,4 @@ -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict class FormFill(BaseModel): template_id: int @@ -6,10 +6,9 @@ class FormFill(BaseModel): class FormFillResponse(BaseModel): + model_config = ConfigDict(from_attributes=True) + id: int template_id: int input_text: str - output_pdf_path: str - - class Config: - from_attributes = True \ No newline at end of file + output_pdf_path: str \ No newline at end of file diff --git a/api/schemas/templates.py b/api/schemas/templates.py index 961f219..0e10716 100644 --- a/api/schemas/templates.py +++ b/api/schemas/templates.py @@ -1,4 +1,4 @@ -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict class TemplateCreate(BaseModel): name: str @@ -6,10 +6,9 @@ class TemplateCreate(BaseModel): fields: dict class TemplateResponse(BaseModel): + model_config = ConfigDict(from_attributes=True) + id: int name: str pdf_path: str - fields: dict - - class Config: - from_attributes = True \ No newline at end of file + fields: dict \ No newline at end of file diff --git a/src/file_manipulator.py b/src/file_manipulator.py index b7815cc..a1da36c 100644 --- a/src/file_manipulator.py +++ b/src/file_manipulator.py @@ -26,8 +26,7 @@ def fill_form(self, user_input: str, fields: list, pdf_form_path: str): print(f"[2] PDF template path: {pdf_form_path}") if not os.path.exists(pdf_form_path): - print(f"Error: PDF template not found at {pdf_form_path}") - return None # Or raise an exception + raise FileNotFoundError(f"PDF template not found at {pdf_form_path}") print("[3] Starting extraction and PDF filling process...") try: diff --git a/src/llm.py b/src/llm.py index 1166e63..7423225 100644 --- a/src/llm.py +++ b/src/llm.py @@ -102,8 +102,11 @@ def add_response_to_json(self, field, value): if ";" in value: parsed_value = self.handle_plural_values(value) - if field in self._json.keys(): - self._json[field].append(parsed_value) + if field in self._json: + existing = self._json[field] + if not isinstance(existing, list): + existing = [existing] + self._json[field] = existing + [parsed_value] else: self._json[field] = parsed_value diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29