From 1a273eef6271751f7de3e8c0e8cc05cd56c1a6ec Mon Sep 17 00:00:00 2001 From: Cubix33 Date: Sat, 21 Mar 2026 22:21:51 +0000 Subject: [PATCH] #315 - remove metadata from output pdf files --- src/filler.py | 9 +++++++-- src/llm.py | 2 +- src/main.py | 5 +++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/filler.py b/src/filler.py index e31e535..b592785 100644 --- a/src/filler.py +++ b/src/filler.py @@ -1,4 +1,4 @@ -from pdfrw import PdfReader, PdfWriter +from pdfrw import PdfReader, PdfWriter, PdfDict, PdfName from src.llm import LLM from datetime import datetime @@ -45,7 +45,12 @@ def fill_form(self, pdf_form: str, llm: LLM): else: # Stop if we run out of answers break - + # --- NEW: Metadata Scrubbing --- + # Create a proper PdfDict instead of a plain {} + pdf.Info = PdfDict() + + # Wrap the key in PdfName so the writer recognizes it + pdf.Info[PdfName("Title")] = "FireForm Auto-Generated Report" PdfWriter().write(output_pdf, pdf) # Your main.py expects this function to return the path diff --git a/src/llm.py b/src/llm.py index 70937f9..3ed6761 100644 --- a/src/llm.py +++ b/src/llm.py @@ -46,7 +46,7 @@ def build_prompt(self, current_field): def main_loop(self): # self.type_check_all() - for field in self._target_fields.keys(): + for field in self._target_fields: prompt = self.build_prompt(field) # print(prompt) # ollama_url = "http://localhost:11434/api/generate" diff --git a/src/main.py b/src/main.py index 5bb632b..54f6b0a 100644 --- a/src/main.py +++ b/src/main.py @@ -3,6 +3,7 @@ from commonforms import prepare_form from pypdf import PdfReader from controller import Controller +from typing import Union def input_fields(num_fields: int): fields = [] @@ -68,7 +69,7 @@ def run_pdf_fill_process(user_input: str, definitions: list, pdf_form_path: Unio if __name__ == "__main__": file = "./src/inputs/file.pdf" user_input = "Hi. The employee's name is John Doe. His job title is managing director. His department supervisor is Jane Doe. His phone number is 123456. His email is jdoe@ucsc.edu. The signature is , and the date is 01/02/2005" - fields = ["Employee's name", "Employee's job title", "Employee's department supervisor", "Employee's phone number", "Employee's email", "Signature", "Date"] + descriptive_fields = ["Employee's name", "Employee's job title", "Employee's department supervisor", "Employee's phone number", "Employee's email", "Signature", "Date"] prepared_pdf = "temp_outfile.pdf" prepare_form(file, prepared_pdf) @@ -80,4 +81,4 @@ def run_pdf_fill_process(user_input: str, definitions: list, pdf_form_path: Unio num_fields = 0 controller = Controller() - controller.fill_form(user_input, fields, file) + controller.fill_form(user_input, descriptive_fields, file)