From 10705ba7b113fd3b314f5a5580ea9ee95e4b8a0a Mon Sep 17 00:00:00 2001 From: PedroSoaresNHS Date: Thu, 5 Mar 2026 11:35:14 +0000 Subject: [PATCH 1/5] [PRMP-1527] adjusted file name --- lambdas/services/bulk_upload_metadata_processor_service.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lambdas/services/bulk_upload_metadata_processor_service.py b/lambdas/services/bulk_upload_metadata_processor_service.py index 3265d65629..8027105d02 100644 --- a/lambdas/services/bulk_upload_metadata_processor_service.py +++ b/lambdas/services/bulk_upload_metadata_processor_service.py @@ -394,11 +394,13 @@ def copy_metadata_to_dated_folder(self): """Copy processed metadata CSV into a dated archive folder in S3.""" logger.info("Copying metadata CSV to dated folder") current_datetime = datetime.now().strftime("%Y-%m-%d_%H-%M") + original_path = Path(self.file_key) + destination_key = f"metadata/{original_path.stem}_{current_datetime}.csv" self.s3_service.copy_across_bucket( self.staging_bucket_name, self.file_key, self.staging_bucket_name, - f"metadata/{current_datetime}.csv", + destination_key, ) self.s3_service.delete_object(self.staging_bucket_name, self.file_key) From aaaf9131008686ecbc1246f3d9a848a329e14d5f Mon Sep 17 00:00:00 2001 From: PedroSoaresNHS Date: Thu, 5 Mar 2026 13:23:49 +0000 Subject: [PATCH 2/5] [PRMP-1527] added logging --- lambdas/services/bulk_upload_metadata_processor_service.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lambdas/services/bulk_upload_metadata_processor_service.py b/lambdas/services/bulk_upload_metadata_processor_service.py index 8027105d02..4f8f8fcae8 100644 --- a/lambdas/services/bulk_upload_metadata_processor_service.py +++ b/lambdas/services/bulk_upload_metadata_processor_service.py @@ -395,6 +395,7 @@ def copy_metadata_to_dated_folder(self): logger.info("Copying metadata CSV to dated folder") current_datetime = datetime.now().strftime("%Y-%m-%d_%H-%M") original_path = Path(self.file_key) + logger.info(f"Original file key is {self.file_key}") destination_key = f"metadata/{original_path.stem}_{current_datetime}.csv" self.s3_service.copy_across_bucket( self.staging_bucket_name, From af39be84daae8327c270273ff952d36624681bf4 Mon Sep 17 00:00:00 2001 From: PedroSoaresNHS Date: Thu, 5 Mar 2026 14:00:21 +0000 Subject: [PATCH 3/5] [PRMP-1527] added logging --- lambdas/services/bulk_upload_metadata_processor_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lambdas/services/bulk_upload_metadata_processor_service.py b/lambdas/services/bulk_upload_metadata_processor_service.py index 4f8f8fcae8..39ddb22766 100644 --- a/lambdas/services/bulk_upload_metadata_processor_service.py +++ b/lambdas/services/bulk_upload_metadata_processor_service.py @@ -394,9 +394,9 @@ def copy_metadata_to_dated_folder(self): """Copy processed metadata CSV into a dated archive folder in S3.""" logger.info("Copying metadata CSV to dated folder") current_datetime = datetime.now().strftime("%Y-%m-%d_%H-%M") - original_path = Path(self.file_key) + original_path_directory = str(Path(self.file_key).parent) logger.info(f"Original file key is {self.file_key}") - destination_key = f"metadata/{original_path.stem}_{current_datetime}.csv" + destination_key = f"metadata/{original_path_directory}_{current_datetime}.csv" self.s3_service.copy_across_bucket( self.staging_bucket_name, self.file_key, From b83cfd53582fd67f62f229725c4ee0767eb8ee6c Mon Sep 17 00:00:00 2001 From: PedroSoaresNHS Date: Thu, 5 Mar 2026 14:20:13 +0000 Subject: [PATCH 4/5] [PRMP-1527] added tests --- .../bulk_upload_metadata_processor_service.py | 57 +++++++------- .../test_bulk_upload_metadata_handler.py | 75 +++++++++++++++---- 2 files changed, 88 insertions(+), 44 deletions(-) diff --git a/lambdas/services/bulk_upload_metadata_processor_service.py b/lambdas/services/bulk_upload_metadata_processor_service.py index 39ddb22766..0a54e2cbf1 100644 --- a/lambdas/services/bulk_upload_metadata_processor_service.py +++ b/lambdas/services/bulk_upload_metadata_processor_service.py @@ -10,6 +10,7 @@ import pydantic from botocore.exceptions import ClientError + from enums.document_review_reason import DocumentReviewReason from enums.lloyd_george_pre_process_format import LloydGeorgePreProcessFormat from enums.upload_status import UploadStatus @@ -86,7 +87,7 @@ def download_metadata_from_s3(self) -> str: local_file_path = f"{self.temp_download_dir}/{self.file_key.split('/')[-1]}" logger.info( - f"Fetching {local_file_path} from bucket {self.staging_bucket_name}" + f"Fetching {local_file_path} from bucket {self.staging_bucket_name}", ) try: @@ -97,7 +98,7 @@ def download_metadata_from_s3(self) -> str: ) except ClientError: raise BulkUploadMetadataException( - f"Could not retrieve the following metadata file: {self.file_key}" + f"Could not retrieve the following metadata file: {self.file_key}", ) return local_file_path @@ -144,12 +145,12 @@ def csv_to_sqs_metadata(self, csv_file_path: str) -> list[StagingSqsMetadata]: ) with open( - csv_file_path, mode="r", encoding="utf-8-sig", errors="replace" + csv_file_path, mode="r", encoding="utf-8-sig", errors="replace", ) as csv_file: csv_reader = csv.DictReader(csv_file) if csv_reader.fieldnames is None: raise BulkUploadMetadataException( - "Metadata file is empty or missing headers." + "Metadata file is empty or missing headers.", ) headers = [h.strip() for h in csv_reader.fieldnames] @@ -160,7 +161,7 @@ def csv_to_sqs_metadata(self, csv_file_path: str) -> list[StagingSqsMetadata]: validated_rows, rejected_rows, rejected_reasons = ( self.metadata_mapping_validator_service.validate_and_normalize_metadata( - records, self.fixed_values, self.metadata_heading_remap + records, self.fixed_values, self.metadata_heading_remap, ) ) if rejected_reasons: @@ -168,12 +169,12 @@ def csv_to_sqs_metadata(self, csv_file_path: str) -> list[StagingSqsMetadata]: logger.warning(f"Rejected due to: {reason['REASON']}") logger.info( - f"There are {len(validated_rows)} valid rows, and {len(rejected_rows)} rejected rows" + f"There are {len(validated_rows)} valid rows, and {len(rejected_rows)} rejected rows", ) if not validated_rows: raise BulkUploadMetadataException( - "No valid metadata rows found after alias validation." + "No valid metadata rows found after alias validation.", ) for row in validated_rows: @@ -183,7 +184,7 @@ def csv_to_sqs_metadata(self, csv_file_path: str) -> list[StagingSqsMetadata]: self.send_failed_files_to_review_queue(failed_files) else: logger.info( - f"Send to review is disabled. Skipping review queue for {len(failed_files)} failed file" + f"Send to review is disabled. Skipping review queue for {len(failed_files)} failed file", ) return [ @@ -209,7 +210,7 @@ def process_metadata_row( correct_file_name = self.validate_and_correct_filename(file_metadata) except InvalidFileNameException as error: self.handle_invalid_filename( - file_metadata, error, nhs_number, ods_code, failed_files + file_metadata, error, nhs_number, ods_code, failed_files, ) return @@ -231,18 +232,18 @@ def apply_fixed_values(self, file_metadata: MetadataFile) -> MetadataFile: for field_name, fixed_value in self.fixed_values.items(): metadata_dict[field_name] = fixed_value logger.info( - f"Applied fixed value for field '{field_name}': '{fixed_value}'" + f"Applied fixed value for field '{field_name}': '{fixed_value}'", ) return MetadataFile.model_validate(metadata_dict) @staticmethod def convert_to_sqs_metadata( - file: MetadataFile, stored_file_name: str + file: MetadataFile, stored_file_name: str, ) -> BulkUploadQueueMetadata: """Convert a MetadataFile into BulkUploadQueueMetadata.""" return BulkUploadQueueMetadata( - **file.model_dump(), stored_file_name=stored_file_name + **file.model_dump(), stored_file_name=stored_file_name, ) def create_expedite_sqs_metadata(self, key) -> StagingSqsMetadata: @@ -256,7 +257,7 @@ def create_expedite_sqs_metadata(self, key) -> StagingSqsMetadata: stored_file_name=file_path, gp_practice_code=ods_code, scan_date=scan_date, - ) + ), ], ) @@ -272,7 +273,7 @@ def validate_and_correct_filename(self, file_metadata: MetadataFile) -> str: return file_metadata.file_path except LGInvalidFilesException: return self.metadata_formatter_service.validate_record_filename( - file_metadata.file_path, file_metadata.nhs_number + file_metadata.file_path, file_metadata.nhs_number, ) def validate_expedite_file(self, s3_object_key: str): @@ -290,7 +291,7 @@ def validate_expedite_file(self, s3_object_key: str): nhs_number = extract_nhs_number_from_bulk_upload_file_name(file_path)[0] file_name = self.metadata_formatter_service.validate_record_filename( - s3_object_key + s3_object_key, ) ods_code = Path(s3_object_key).parent.name scan_date = datetime.now().strftime("%Y-%m-%d") @@ -302,7 +303,7 @@ def handle_expedite_event(self, event): try: unparsed_s3_object_key = event["detail"]["object"]["key"] s3_object_key = urllib.parse.unquote_plus( - unparsed_s3_object_key, encoding="utf-8" + unparsed_s3_object_key, encoding="utf-8", ) if s3_object_key.startswith("expedite/"): @@ -334,26 +335,26 @@ def handle_invalid_filename( ) -> None: """Handle invalid filenames by logging, storing failure in Dynamo, and tracking for review.""" logger.error( - f"Failed to process {file_metadata.file_path} due to error: {error}" + f"Failed to process {file_metadata.file_path} due to error: {error}", ) failed_file = self.convert_to_sqs_metadata( - file_metadata, file_metadata.file_path + file_metadata, file_metadata.file_path, ) failed_file.file_path = self.add_directory_path_to_file_path(file_metadata) failed_files[(nhs_number, ods_code)].append(failed_file) failed_entry = StagingSqsMetadata(nhs_number=nhs_number, files=[failed_file]) self.dynamo_repository.write_report_upload_to_dynamo( - failed_entry, UploadStatus.FAILED, str(error) + failed_entry, UploadStatus.FAILED, str(error), ) def send_failed_files_to_review_queue( - self, failed_files: dict[tuple[str, str], list[BulkUploadQueueMetadata]] + self, failed_files: dict[tuple[str, str], list[BulkUploadQueueMetadata]], ) -> None: for (nhs_number, ods_code), files in failed_files.items(): staging_metadata = StagingSqsMetadata(nhs_number=nhs_number, files=files) logger.info( - f"Sending {len(files)} failed file(s) to review queue for NHS number: {nhs_number}" + f"Sending {len(files)} failed file(s) to review queue for NHS number: {nhs_number}", ) self.sqs_repository.send_message_to_review_queue( staging_metadata=staging_metadata, @@ -362,7 +363,7 @@ def send_failed_files_to_review_queue( ) def send_metadata_to_fifo_sqs( - self, staging_sqs_metadata_list: list[StagingSqsMetadata] + self, staging_sqs_metadata_list: list[StagingSqsMetadata], ) -> None: """Send validated metadata entries to SQS FIFO queue.""" for staging_sqs_metadata in staging_sqs_metadata_list: @@ -377,7 +378,7 @@ def send_metadata_to_fifo_sqs( logger.info("Sent bulk upload metadata to sqs queue") def send_metadata_to_expedite_sqs( - self, staging_sqs_metadata: StagingSqsMetadata + self, staging_sqs_metadata: StagingSqsMetadata, ) -> None: """Send validated metadata entries to SQS expedite queue.""" sqs_group_id = f"bulk_upload_{uuid.uuid4()}" @@ -418,12 +419,12 @@ def check_file_status(self, file_key: str): if scan_result != VirusScanResult.CLEAN: logger.info(f"Found an issue with the file {file_key}.") raise VirusScanFailedException( - f"Encountered an issue when scanning the file {file_key}, scan result was {scan_result}" + f"Encountered an issue when scanning the file {file_key}, scan result was {scan_result}", ) def enforce_virus_scanner(self, file_key: str): logger.info( - f"Checking virus scan result for file: {file_key} in {self.staging_bucket_name}" + f"Checking virus scan result for file: {file_key} in {self.staging_bucket_name}", ) try: @@ -439,7 +440,7 @@ def enforce_virus_scanner(self, file_key: str): if "NoSuchKey" in error_message or "AccessDenied" in error_message: logger.error(f"S3 access error when checking tag for {file_key}.") raise BulkUploadMetadataException( - f"Failed to access S3 file {file_key} during tag check." + f"Failed to access S3 file {file_key} during tag check.", ) else: raise @@ -451,11 +452,11 @@ def get_formatter_service(raw_pre_format_type): if pre_format_type == LloydGeorgePreProcessFormat.GENERAL: logger.info("Using general preFormatType") return MetadataGeneralPreprocessor - elif pre_format_type == LloydGeorgePreProcessFormat.USB: + if pre_format_type == LloydGeorgePreProcessFormat.USB: logger.info("Using usb preFormatType") return MetadataUsbPreprocessorService except ValueError: logger.warning( - f"Invalid preFormatType: '{raw_pre_format_type}', defaulting to {LloydGeorgePreProcessFormat.GENERAL}." + f"Invalid preFormatType: '{raw_pre_format_type}', defaulting to {LloydGeorgePreProcessFormat.GENERAL}.", ) return MetadataGeneralPreprocessor diff --git a/lambdas/tests/unit/handlers/test_bulk_upload_metadata_handler.py b/lambdas/tests/unit/handlers/test_bulk_upload_metadata_handler.py index 988a86d8ad..ff1d63ffa8 100644 --- a/lambdas/tests/unit/handlers/test_bulk_upload_metadata_handler.py +++ b/lambdas/tests/unit/handlers/test_bulk_upload_metadata_handler.py @@ -1,21 +1,64 @@ -import pytest -from handlers.bulk_upload_metadata_handler import lambda_handler -from models.staging_metadata import METADATA_FILENAME -from services.bulk_upload_metadata_service import BulkUploadMetadataService +from unittest.mock import Mock +from services.bulk_upload_metadata_processor_service import ( + BulkUploadMetadataProcessorService, +) -def test_lambda_call_process_metadata_of_service_class( - set_env, event, context, mock_metadata_service -): - lambda_handler(event, context) - mock_metadata_service.process_metadata.assert_called_once_with(METADATA_FILENAME) +def test_copy_metadata_to_dated_folder_copies_and_deletes(mocker, monkeypatch): + monkeypatch.setenv("STAGING_STORE_BUCKET_NAME", "staging-bucket") + monkeypatch.setenv("METADATA_SQS_QUEUE_URL", "https://example.com/metadata-queue") + monkeypatch.setenv("EXPEDITE_SQS_QUEUE_URL", "https://example.com/expedite-queue") + mocker.patch( + "services.bulk_upload_metadata_processor_service.S3Service", autospec=True, + ) + mocker.patch( + "services.bulk_upload_metadata_processor_service.SQSService", autospec=True, + ) + mocker.patch( + "services.bulk_upload_metadata_processor_service.BulkUploadDynamoRepository", + autospec=True, + ) + mocker.patch( + "services.bulk_upload_metadata_processor_service.BulkUploadSqsRepository", + autospec=True, + ) + mocker.patch( + "services.bulk_upload_metadata_processor_service.BulkUploadS3Repository", + autospec=True, + ) + mocker.patch( + "services.bulk_upload_metadata_processor_service.get_virus_scan_service", + autospec=True, + ) -@pytest.fixture -def mock_metadata_service(mocker): - mocked_instance = mocker.patch( - "handlers.bulk_upload_metadata_handler.BulkUploadMetadataService", - spec=BulkUploadMetadataService, - ).return_value - yield mocked_instance + mocked_datetime = mocker.patch( + "services.bulk_upload_metadata_processor_service.datetime", + ) + mocked_datetime.now.return_value.strftime.return_value = "2026-03-05_12-34" + + formatter_service = Mock() + + service = BulkUploadMetadataProcessorService( + metadata_formatter_service=formatter_service, + metadata_heading_remap={}, + input_file_location="some/dir/metadata.csv", + ) + + service.s3_service = Mock() + + service.copy_metadata_to_dated_folder() + + expected_destination_key = "metadata/some/dir_2026-03-05_12-34.csv" + + service.s3_service.copy_across_bucket.assert_called_once_with( + "staging-bucket", + "some/dir/metadata.csv", + "staging-bucket", + expected_destination_key, + ) + service.s3_service.delete_object.assert_called_once_with( + "staging-bucket", + "some/dir/metadata.csv", + ) From df3c90b21f3c52d1d7321730177e5c674afae8d9 Mon Sep 17 00:00:00 2001 From: robg-test <106234256+robg-test@users.noreply.github.com> Date: Fri, 6 Mar 2026 16:27:00 +0000 Subject: [PATCH 5/5] [PRMP-1527] Reformat --- .../unit/handlers/test_bulk_upload_metadata_handler.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lambdas/tests/unit/handlers/test_bulk_upload_metadata_handler.py b/lambdas/tests/unit/handlers/test_bulk_upload_metadata_handler.py index ff1d63ffa8..385562ebdc 100644 --- a/lambdas/tests/unit/handlers/test_bulk_upload_metadata_handler.py +++ b/lambdas/tests/unit/handlers/test_bulk_upload_metadata_handler.py @@ -11,10 +11,12 @@ def test_copy_metadata_to_dated_folder_copies_and_deletes(mocker, monkeypatch): monkeypatch.setenv("EXPEDITE_SQS_QUEUE_URL", "https://example.com/expedite-queue") mocker.patch( - "services.bulk_upload_metadata_processor_service.S3Service", autospec=True, + "services.bulk_upload_metadata_processor_service.S3Service", + autospec=True, ) mocker.patch( - "services.bulk_upload_metadata_processor_service.SQSService", autospec=True, + "services.bulk_upload_metadata_processor_service.SQSService", + autospec=True, ) mocker.patch( "services.bulk_upload_metadata_processor_service.BulkUploadDynamoRepository",