diff --git a/src/file_manipulator.py b/src/file_manipulator.py index b7815cc..4210d5b 100644 --- a/src/file_manipulator.py +++ b/src/file_manipulator.py @@ -7,33 +7,19 @@ class FileManipulator: def __init__(self): self.filler = Filler() - self.llm = LLM() - - def create_template(self, pdf_path: str): - """ - By using commonforms, we create an editable .pdf template and we store it. - """ - template_path = pdf_path[:-4] + "_template.pdf" - prepare_form(pdf_path, template_path) - return template_path def fill_form(self, user_input: str, fields: list, pdf_form_path: str): - """ - It receives the raw data, runs the PDF filling logic, - and returns the path to the newly created file. - """ print("[1] Received request from frontend.") print(f"[2] PDF template path: {pdf_form_path}") if not os.path.exists(pdf_form_path): print(f"Error: PDF template not found at {pdf_form_path}") - return None # Or raise an exception + return None print("[3] Starting extraction and PDF filling process...") try: - self.llm._target_fields = fields - self.llm._transcript_text = user_input - output_name = self.filler.fill_form(pdf_form=pdf_form_path, llm=self.llm) + llm = LLM(transcript_text=user_input, target_fields=fields, json={}) + output_name = self.filler.fill_form(pdf_form=pdf_form_path, llm=llm) print("\n----------------------------------") print("✅ Process Complete.") @@ -43,5 +29,12 @@ def fill_form(self, user_input: str, fields: list, pdf_form_path: str): except Exception as e: print(f"An error occurred during PDF generation: {e}") - # Re-raise the exception so the frontend can handle it raise e + + def create_template(self, pdf_path: str): + """ + By using commonforms, we create an editable .pdf template and we store it. + """ + template_path = pdf_path[:-4] + "_template.pdf" + prepare_form(pdf_path, template_path) + return template_path diff --git a/src/filler.py b/src/filler.py index e31e535..879929e 100644 --- a/src/filler.py +++ b/src/filler.py @@ -5,7 +5,7 @@ class Filler: def __init__(self): - pass + self._template_cache = {} def fill_form(self, pdf_form: str, llm: LLM): """ @@ -24,9 +24,17 @@ def fill_form(self, pdf_form: str, llm: LLM): textbox_answers = t2j.get_data() # This is a dictionary answers_list = list(textbox_answers.values()) - - # Read PDF - pdf = PdfReader(pdf_form) + # --- NEW: CACHING LOGIC --- + if pdf_form not in self._template_cache: + print(f"[LOG] Template Cache Miss, parsing new PDF for {pdf_form}...") + # Read the file from the hard drive once and store it in RAM + with open(pdf_form, "rb") as f: + self._template_cache[pdf_form] = f.read() + else: + print(f"[LOG] Template Cache Hit! Reusing memory for {pdf_form}...") + + # Load PDF instantly from RAM instead of hitting the slow hard drive + pdf = PdfReader(fdata=self._template_cache[pdf_form]) # Loop through pages for page in pdf.pages: diff --git a/src/llm.py b/src/llm.py index 70937f9..3ed6761 100644 --- a/src/llm.py +++ b/src/llm.py @@ -46,7 +46,7 @@ def build_prompt(self, current_field): def main_loop(self): # self.type_check_all() - for field in self._target_fields.keys(): + for field in self._target_fields: prompt = self.build_prompt(field) # print(prompt) # ollama_url = "http://localhost:11434/api/generate" diff --git a/src/main.py b/src/main.py index 5bb632b..92f0d5d 100644 --- a/src/main.py +++ b/src/main.py @@ -3,6 +3,7 @@ from commonforms import prepare_form from pypdf import PdfReader from controller import Controller +from typing import Union def input_fields(num_fields: int): fields = [] @@ -68,7 +69,7 @@ def run_pdf_fill_process(user_input: str, definitions: list, pdf_form_path: Unio if __name__ == "__main__": file = "./src/inputs/file.pdf" user_input = "Hi. The employee's name is John Doe. His job title is managing director. His department supervisor is Jane Doe. His phone number is 123456. His email is jdoe@ucsc.edu. The signature is , and the date is 01/02/2005" - fields = ["Employee's name", "Employee's job title", "Employee's department supervisor", "Employee's phone number", "Employee's email", "Signature", "Date"] + descriptive_fields = ["Employee's name", "Employee's job title", "Employee's department supervisor", "Employee's phone number", "Employee's email", "Signature", "Date"] prepared_pdf = "temp_outfile.pdf" prepare_form(file, prepared_pdf) @@ -78,6 +79,13 @@ def run_pdf_fill_process(user_input: str, definitions: list, pdf_form_path: Unio num_fields = len(fields) else: num_fields = 0 - + controller = Controller() - controller.fill_form(user_input, fields, file) + + # --- TEST RUN 1 (Should be a Miss) --- + print("\n🚀 STARTING RUN 1...") + controller.fill_form(user_input, descriptive_fields, file) + + # --- TEST RUN 2 (Should be a Hit!) --- + print("\n🚀 STARTING RUN 2...") + controller.fill_form(user_input, descriptive_fields, file)