Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 11 additions & 18 deletions src/file_manipulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,33 +7,19 @@
class FileManipulator:
def __init__(self):
self.filler = Filler()
self.llm = LLM()

def create_template(self, pdf_path: str):
"""
By using commonforms, we create an editable .pdf template and we store it.
"""
template_path = pdf_path[:-4] + "_template.pdf"
prepare_form(pdf_path, template_path)
return template_path

def fill_form(self, user_input: str, fields: list, pdf_form_path: str):
"""
It receives the raw data, runs the PDF filling logic,
and returns the path to the newly created file.
"""
print("[1] Received request from frontend.")
print(f"[2] PDF template path: {pdf_form_path}")

if not os.path.exists(pdf_form_path):
print(f"Error: PDF template not found at {pdf_form_path}")
return None # Or raise an exception
return None

print("[3] Starting extraction and PDF filling process...")
try:
self.llm._target_fields = fields
self.llm._transcript_text = user_input
output_name = self.filler.fill_form(pdf_form=pdf_form_path, llm=self.llm)
llm = LLM(transcript_text=user_input, target_fields=fields, json={})
output_name = self.filler.fill_form(pdf_form=pdf_form_path, llm=llm)

print("\n----------------------------------")
print("✅ Process Complete.")
Expand All @@ -43,5 +29,12 @@ def fill_form(self, user_input: str, fields: list, pdf_form_path: str):

except Exception as e:
print(f"An error occurred during PDF generation: {e}")
# Re-raise the exception so the frontend can handle it
raise e

def create_template(self, pdf_path: str):
"""
By using commonforms, we create an editable .pdf template and we store it.
"""
template_path = pdf_path[:-4] + "_template.pdf"
prepare_form(pdf_path, template_path)
return template_path
16 changes: 12 additions & 4 deletions src/filler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

class Filler:
def __init__(self):
pass
self._template_cache = {}

def fill_form(self, pdf_form: str, llm: LLM):
"""
Expand All @@ -24,9 +24,17 @@ def fill_form(self, pdf_form: str, llm: LLM):
textbox_answers = t2j.get_data() # This is a dictionary

answers_list = list(textbox_answers.values())

# Read PDF
pdf = PdfReader(pdf_form)
# --- NEW: CACHING LOGIC ---
if pdf_form not in self._template_cache:
print(f"[LOG] Template Cache Miss, parsing new PDF for {pdf_form}...")
# Read the file from the hard drive once and store it in RAM
with open(pdf_form, "rb") as f:
self._template_cache[pdf_form] = f.read()
else:
print(f"[LOG] Template Cache Hit! Reusing memory for {pdf_form}...")

# Load PDF instantly from RAM instead of hitting the slow hard drive
pdf = PdfReader(fdata=self._template_cache[pdf_form])

# Loop through pages
for page in pdf.pages:
Expand Down
2 changes: 1 addition & 1 deletion src/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def build_prompt(self, current_field):

def main_loop(self):
# self.type_check_all()
for field in self._target_fields.keys():
for field in self._target_fields:
prompt = self.build_prompt(field)
# print(prompt)
# ollama_url = "http://localhost:11434/api/generate"
Expand Down
14 changes: 11 additions & 3 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from commonforms import prepare_form
from pypdf import PdfReader
from controller import Controller
from typing import Union

def input_fields(num_fields: int):
fields = []
Expand Down Expand Up @@ -68,7 +69,7 @@ def run_pdf_fill_process(user_input: str, definitions: list, pdf_form_path: Unio
if __name__ == "__main__":
file = "./src/inputs/file.pdf"
user_input = "Hi. The employee's name is John Doe. His job title is managing director. His department supervisor is Jane Doe. His phone number is 123456. His email is jdoe@ucsc.edu. The signature is <Mamañema>, and the date is 01/02/2005"
fields = ["Employee's name", "Employee's job title", "Employee's department supervisor", "Employee's phone number", "Employee's email", "Signature", "Date"]
descriptive_fields = ["Employee's name", "Employee's job title", "Employee's department supervisor", "Employee's phone number", "Employee's email", "Signature", "Date"]
prepared_pdf = "temp_outfile.pdf"
prepare_form(file, prepared_pdf)

Expand All @@ -78,6 +79,13 @@ def run_pdf_fill_process(user_input: str, definitions: list, pdf_form_path: Unio
num_fields = len(fields)
else:
num_fields = 0

controller = Controller()
controller.fill_form(user_input, fields, file)

# --- TEST RUN 1 (Should be a Miss) ---
print("\n🚀 STARTING RUN 1...")
controller.fill_form(user_input, descriptive_fields, file)

# --- TEST RUN 2 (Should be a Hit!) ---
print("\n🚀 STARTING RUN 2...")
controller.fill_form(user_input, descriptive_fields, file)