From 84c30f062d3c55e5506b0facbe4c5f9db75ddca6 Mon Sep 17 00:00:00 2001 From: Ayush Basu <98446798+ayushhbasu@users.noreply.github.com> Date: Fri, 20 Mar 2026 10:49:28 +0530 Subject: [PATCH 01/10] Add basic validation utility for extracted data Adds a function to validate required fields in extracted data. --- src/validation.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 src/validation.py diff --git a/src/validation.py b/src/validation.py new file mode 100644 index 0000000..0c41938 --- /dev/null +++ b/src/validation.py @@ -0,0 +1,13 @@ +def validate_extracted_data(data: dict) -> bool: + """ + Basic validation for extracted form data. + Ensures required fields are present and non-empty. + """ + + required_fields = ["patient_name", "age", "diagnosis"] + + for field in required_fields: + if field not in data or not data[field]: + return False + + return True From fc7251ca80850eb2211f505ff58044da60dffe7c Mon Sep 17 00:00:00 2001 From: Ayush Basu <98446798+ayushhbasu@users.noreply.github.com> Date: Fri, 20 Mar 2026 10:58:24 +0530 Subject: [PATCH 02/10] Implement data validation in fill_form method Integrate validation into controller flow --- src/controller.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/controller.py b/src/controller.py index d31ec9c..7f78e60 100644 --- a/src/controller.py +++ b/src/controller.py @@ -1,3 +1,4 @@ +from src.validation import validate_extracted_data from src.file_manipulator import FileManipulator class Controller: @@ -5,7 +6,12 @@ def __init__(self): self.file_manipulator = FileManipulator() def fill_form(self, user_input: str, fields: list, pdf_form_path: str): - return self.file_manipulator.fill_form(user_input, fields, pdf_form_path) + data = self.file_manipulator.fill_form(user_input, fields, pdf_form_path) + + if not validate_extracted_data(data): + raise ValueError("Invalid extracted data") + + return data def create_template(self, pdf_path: str): - return self.file_manipulator.create_template(pdf_path) \ No newline at end of file + return self.file_manipulator.create_template(pdf_path) From 9404cbaa54c0d0528adc929bac6298886aa0e0db Mon Sep 17 00:00:00 2001 From: Ayush Basu <98446798+ayushhbasu@users.noreply.github.com> Date: Fri, 20 Mar 2026 11:02:06 +0530 Subject: [PATCH 03/10] Add tests for validate_extracted_data function Add unit tests for validation utility --- tests/src/tests/test_validation.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 tests/src/tests/test_validation.py diff --git a/tests/src/tests/test_validation.py b/tests/src/tests/test_validation.py new file mode 100644 index 0000000..1f30f7a --- /dev/null +++ b/tests/src/tests/test_validation.py @@ -0,0 +1,28 @@ +import pytest +from src.validation import validate_extracted_data + + +def test_valid_data(): + data = { + "patient_name": "John Doe", + "age": 30, + "diagnosis": "Flu" + } + assert validate_extracted_data(data) == True + + +def test_missing_field(): + data = { + "patient_name": "John Doe", + "age": 30 + } + assert validate_extracted_data(data) == False + + +def test_empty_field(): + data = { + "patient_name": "", + "age": 30, + "diagnosis": "Flu" + } + assert validate_extracted_data(data) == False From ba1638467352b75401d8ea0f53a89accb5f2403f Mon Sep 17 00:00:00 2001 From: Ayush Basu <98446798+ayushhbasu@users.noreply.github.com> Date: Fri, 20 Mar 2026 11:06:13 +0530 Subject: [PATCH 04/10] Implement test for fill_form validation failure Add a test for form validation failure in Controller. dd validation + tests --- tests/src/tests/test_controller.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 tests/src/tests/test_controller.py diff --git a/tests/src/tests/test_controller.py b/tests/src/tests/test_controller.py new file mode 100644 index 0000000..a182333 --- /dev/null +++ b/tests/src/tests/test_controller.py @@ -0,0 +1,16 @@ +from src.controller import Controller + + +def test_fill_form_validation_fail(monkeypatch): + controller = Controller() + + def mock_fill_form(user_input, fields, pdf_form_path): + return {"patient_name": "", "age": 30, "diagnosis": "Flu"} + + monkeypatch.setattr(controller.file_manipulator, "fill_form", mock_fill_form) + + try: + controller.fill_form("input", [], "file.pdf") + assert False # Should not reach here + except ValueError: + assert True From 20abde0d82467f5327c4efc137148026e60c772a Mon Sep 17 00:00:00 2001 From: Ayush Basu <98446798+ayushhbasu@users.noreply.github.com> Date: Fri, 20 Mar 2026 12:59:04 +0530 Subject: [PATCH 05/10] Add schema definition for extracted data validation --- src/schema.py | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 src/schema.py diff --git a/src/schema.py b/src/schema.py new file mode 100644 index 0000000..ba1866c --- /dev/null +++ b/src/schema.py @@ -0,0 +1,7 @@ +REQUIRED_FIELDS = { + "incident_type": str, + "location": str, + "incident_time": str, + "units_involved": list, + "summary": str +} From 271f68dbf0848897c09f4e3fe06e60b5058a5c6b Mon Sep 17 00:00:00 2001 From: Ayush Basu <98446798+ayushhbasu@users.noreply.github.com> Date: Fri, 20 Mar 2026 13:00:47 +0530 Subject: [PATCH 06/10] Update validation logic to use schema-based enforcement Refactor validation to use REQUIRED_FIELDS from schema. --- src/validation.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/validation.py b/src/validation.py index 0c41938..eeaa16c 100644 --- a/src/validation.py +++ b/src/validation.py @@ -1,13 +1,11 @@ -def validate_extracted_data(data: dict) -> bool: - """ - Basic validation for extracted form data. - Ensures required fields are present and non-empty. - """ - - required_fields = ["patient_name", "age", "diagnosis"] +from src.schema import REQUIRED_FIELDS - for field in required_fields: - if field not in data or not data[field]: +def validate_extracted_data(data: dict) -> bool: + for field, field_type in REQUIRED_FIELDS.items(): + if field not in data: + return False + if not isinstance(data[field], field_type): + return False + if data[field] in ["", None]: return False - return True From c0c48ce317f6886455fb53aacae8ac21c2230c77 Mon Sep 17 00:00:00 2001 From: Ayush Basu <98446798+ayushhbasu@users.noreply.github.com> Date: Fri, 20 Mar 2026 13:20:47 +0530 Subject: [PATCH 07/10] Update tests for schema-based validation and add type-check test --- tests/src/tests/test_validation.py | 31 +++++++++++------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/tests/src/tests/test_validation.py b/tests/src/tests/test_validation.py index 1f30f7a..3ebb8e9 100644 --- a/tests/src/tests/test_validation.py +++ b/tests/src/tests/test_validation.py @@ -1,28 +1,19 @@ -import pytest -from src.validation import validate_extracted_data - - def test_valid_data(): data = { - "patient_name": "John Doe", - "age": 30, - "diagnosis": "Flu" + "incident_type": "Fire", + "location": "Downtown", + "incident_time": "2026-03-20 10:00", + "units_involved": ["Unit1", "Unit2"], + "summary": "Fire contained" } assert validate_extracted_data(data) == True - -def test_missing_field(): - data = { - "patient_name": "John Doe", - "age": 30 - } - assert validate_extracted_data(data) == False - - -def test_empty_field(): +def test_wrong_type(): data = { - "patient_name": "", - "age": 30, - "diagnosis": "Flu" + "incident_type": "Fire", + "location": "Downtown", + "incident_time": "2026-03-20 10:00", + "units_involved": "Unit1", # should be list + "summary": "Fire contained" } assert validate_extracted_data(data) == False From b2f0b7c7a85a013b91caf40b0c0ecea267a0f823 Mon Sep 17 00:00:00 2001 From: Ayush Basu <98446798+ayushhbasu@users.noreply.github.com> Date: Fri, 20 Mar 2026 15:49:10 +0530 Subject: [PATCH 08/10] Add template mapping utility for JSON to PDF field conversion --- src/template_mapper.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 src/template_mapper.py diff --git a/src/template_mapper.py b/src/template_mapper.py new file mode 100644 index 0000000..7e458d8 --- /dev/null +++ b/src/template_mapper.py @@ -0,0 +1,15 @@ +class TemplateMapper: + def __init__(self, mapping_config: dict): + """ + mapping_config: dict mapping JSON fields → PDF field names + """ + self.mapping = mapping_config + + def map_to_pdf_fields(self, structured_data: dict) -> dict: + mapped_data = {} + + for json_field, pdf_field in self.mapping.items(): + if json_field in structured_data: + mapped_data[pdf_field] = structured_data[json_field] + + return mapped_data From 9f352d69af1c6ad01641c635477ddd4afac3a73c Mon Sep 17 00:00:00 2001 From: Ayush Basu <98446798+ayushhbasu@users.noreply.github.com> Date: Fri, 20 Mar 2026 19:11:03 +0530 Subject: [PATCH 09/10] Add unit tests for template mapper Add tests for TemplateMapper to validate mapping success and handling of missing fields. --- tests/test_template_mapper.py | 43 +++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 tests/test_template_mapper.py diff --git a/tests/test_template_mapper.py b/tests/test_template_mapper.py new file mode 100644 index 0000000..27ee868 --- /dev/null +++ b/tests/test_template_mapper.py @@ -0,0 +1,43 @@ +from src.template_mapper import TemplateMapper + + +def test_mapping_success(): + mapping = { + "patient_name": "NameField", + "age": "AgeField", + "diagnosis": "DiagnosisField" + } + + data = { + "patient_name": "John Doe", + "age": 45, + "diagnosis": "Burn injury" + } + + mapper = TemplateMapper(mapping) + result = mapper.map_to_pdf_fields(data) + + assert result == { + "NameField": "John Doe", + "AgeField": 45, + "DiagnosisField": "Burn injury" + } + + +def test_missing_fields(): + mapping = { + "patient_name": "NameField", + "age": "AgeField", + "diagnosis": "DiagnosisField" + } + + data = { + "patient_name": "John Doe" + } + + mapper = TemplateMapper(mapping) + result = mapper.map_to_pdf_fields(data) + + assert result == { + "NameField": "John Doe" + } From 3da146694f1622ed7f64501d9d30bc76c2415ec5 Mon Sep 17 00:00:00 2001 From: Ayush Basu <98446798+ayushhbasu@users.noreply.github.com> Date: Fri, 20 Mar 2026 19:18:11 +0530 Subject: [PATCH 10/10] Integrate template mapper into controller workflow Add TemplateMapper to Controller for data mapping --- src/controller.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/controller.py b/src/controller.py index 7f78e60..7bc315c 100644 --- a/src/controller.py +++ b/src/controller.py @@ -1,10 +1,18 @@ +from src.template_mapper import TemplateMapper from src.validation import validate_extracted_data from src.file_manipulator import FileManipulator class Controller: def __init__(self): self.file_manipulator = FileManipulator() - + self.mapper = TemplateMapper({ + "patient_name": "NameField", + "age": "AgeField", + "diagnosis": "DiagnosisField" +}) + def map_data(self, validated_data: dict): + return self.mapper.map_to_pdf_fields(validated_data) + def fill_form(self, user_input: str, fields: list, pdf_form_path: str): data = self.file_manipulator.fill_form(user_input, fields, pdf_form_path)