From 19f84b6008a565d1c1c5975a8e39ba6867a2afee Mon Sep 17 00:00:00 2001 From: Christian Oertlin Date: Thu, 21 Nov 2024 15:47:36 +0100 Subject: [PATCH 1/5] decompose sample file formatter --- .../utils/sample_concatenation_service.py | 32 ++++++- .../file_formatter/utils/sample_service.py | 91 +++++++++++++------ .../utils/test_formatter_utils.py | 13 ++- 3 files changed, 98 insertions(+), 38 deletions(-) diff --git a/cg/services/deliver_files/file_formatter/utils/sample_concatenation_service.py b/cg/services/deliver_files/file_formatter/utils/sample_concatenation_service.py index 2091204fa2..1a9f558ee1 100644 --- a/cg/services/deliver_files/file_formatter/utils/sample_concatenation_service.py +++ b/cg/services/deliver_files/file_formatter/utils/sample_concatenation_service.py @@ -10,25 +10,47 @@ from cg.services.deliver_files.file_formatter.models import FormattedFile from cg.services.deliver_files.file_formatter.utils.sample_service import ( SampleFileFormatter, + SampleFileNameFormatter, + FileManagingService, ) -class SampleFileConcatenationFormatter(SampleFileFormatter): +class SampleFileConcatenationFormatter: """ Format the sample files to deliver, concatenate fastq files and return the formatted files. - Used for workflows: Microsalt and Mutant. + Used for workflows: Microsalt. """ - def __init__(self, concatenation_service: FastqConcatenationService): + def __init__( + self, + file_manager: FileManagingService, + file_formatter: SampleFileNameFormatter, + concatenation_service: FastqConcatenationService, + ): + self.file_manager = file_manager + self.file_name_formatter = file_formatter self.concatenation_service = concatenation_service def format_files( self, moved_files: list[SampleFile], ticket_dir_path: Path ) -> list[FormattedFile]: """Format the sample files to deliver, concatenate fastq files and return the formatted files.""" - formatted_files: list[FormattedFile] = super().format_files( - moved_files=moved_files, ticket_dir_path=ticket_dir_path + sample_names: set[str] = self.file_name_formatter.get_sample_names(sample_files=moved_files) + [ + self.file_manager.create_directories( + base_path=ticket_dir_path, directories={sample_name} + ) + for sample_name in sample_names + ] + formatted_files: list[FormattedFile] = self.file_name_formatter.generate_formatted_files( + sample_files=moved_files ) + [ + self.file_manager.rename_file( + src=formatted_file.original_path, dst=formatted_file.formatted_path + ) + for formatted_file in formatted_files + ] forward_paths, reverse_path = self._concatenate_fastq_files(formatted_files=formatted_files) self._replace_fastq_paths( reverse_paths=reverse_path, diff --git a/cg/services/deliver_files/file_formatter/utils/sample_service.py b/cg/services/deliver_files/file_formatter/utils/sample_service.py index f13aa4c30e..74821dc82e 100644 --- a/cg/services/deliver_files/file_formatter/utils/sample_service.py +++ b/cg/services/deliver_files/file_formatter/utils/sample_service.py @@ -4,54 +4,85 @@ from cg.services.deliver_files.file_formatter.models import FormattedFile -class SampleFileFormatter: +class FileManagingService: """ - Format the sample files to deliver. - Used for all workflows except Microsalt and Mutant. + Service to manage files. + Handles operations that create or rename files and directories. """ - def format_files( - self, moved_files: list[SampleFile], ticket_dir_path: Path - ) -> list[FormattedFile]: - """Format the sample files to deliver and return the formatted files.""" - sample_names: set[str] = self._get_sample_names(moved_files) - self._create_sample_folders(ticket_dir_path=ticket_dir_path, sample_names=sample_names) - return self._format_sample_files(moved_files) + @staticmethod + def create_directories(base_path: Path, directories: set[str]) -> None: + """Create directories for given names under the base path.""" + for directory in directories: + Path(base_path, directory).mkdir(exist_ok=True) @staticmethod - def _get_sample_names(sample_files: list[SampleFile]) -> set[str]: - return set(sample_file.sample_name for sample_file in sample_files) + def rename_file(src: Path, dst: Path) -> None: + """Rename a file from src to dst.""" + os.rename(src, dst) + + +class SampleFileNameFormatter: + """ + Format sample file names. Takes a list of SampleFile objects and returns a list of FormattedFile objects. + """ @staticmethod - def _create_sample_folders(ticket_dir_path: Path, sample_names: set[str]): - for sample_name in sample_names: - sample_dir_path = Path(ticket_dir_path, sample_name) - sample_dir_path.mkdir(exist_ok=True) - - def _format_sample_files(self, sample_files: list[SampleFile]) -> list[FormattedFile]: - formatted_files: list[FormattedFile] = self._get_formatted_files(sample_files) - for formatted_file in formatted_files: - os.rename(src=formatted_file.original_path, dst=formatted_file.formatted_path) - return formatted_files + def get_sample_names(sample_files: list[SampleFile]) -> set[str]: + """Extract sample names from the sample files.""" + return {sample_file.sample_name for sample_file in sample_files} @staticmethod - def _get_formatted_files(sample_files: list[SampleFile]) -> list[FormattedFile]: + def generate_formatted_files(sample_files: list[SampleFile]) -> list[FormattedFile]: """ Returns formatted files: 1. Adds a folder with sample name to the path of the sample files. 2. Replaces sample id by sample name. """ - formatted_files: list[FormattedFile] = [] + formatted_files = [] for sample_file in sample_files: - replaced_sample_file_name: str = sample_file.file_path.name.replace( + replaced_name = sample_file.file_path.name.replace( sample_file.sample_id, sample_file.sample_name ) - formatted_file_path = Path( - sample_file.file_path.parent, sample_file.sample_name, replaced_sample_file_name + formatted_path = Path( + sample_file.file_path.parent, sample_file.sample_name, replaced_name ) formatted_files.append( - FormattedFile( - original_path=sample_file.file_path, formatted_path=formatted_file_path - ) + FormattedFile(original_path=sample_file.file_path, formatted_path=formatted_path) + ) + return formatted_files + + +class SampleFileFormatter: + """ + Format the sample files to deliver. + Used for all workflows except Microsalt and Mutant. + """ + + def __init__( + self, file_manager: FileManagingService, file_name_formatter: SampleFileNameFormatter + ): + self.file_manager = file_manager + self.file_name_formatter = file_name_formatter + + def format_files( + self, moved_files: list[SampleFile], ticket_dir_path: Path + ) -> list[FormattedFile]: + """Format the sample files to deliver and return the formatted files.""" + sample_names: set[str] = self.file_name_formatter.get_sample_names(sample_files=moved_files) + [ + self.file_manager.create_directories( + base_path=ticket_dir_path, directories={sample_name} + ) + for sample_name in sample_names + ] + formatted_files: list[FormattedFile] = self.file_name_formatter.generate_formatted_files( + sample_files=moved_files + ) + [ + self.file_manager.rename_file( + src=formatted_file.original_path, dst=formatted_file.formatted_path ) + for formatted_file in formatted_files + ] return formatted_files diff --git a/tests/services/file_delivery/file_formatter/utils/test_formatter_utils.py b/tests/services/file_delivery/file_formatter/utils/test_formatter_utils.py index 4db3345b83..2245fb7f78 100644 --- a/tests/services/file_delivery/file_formatter/utils/test_formatter_utils.py +++ b/tests/services/file_delivery/file_formatter/utils/test_formatter_utils.py @@ -6,7 +6,6 @@ FastqConcatenationService, ) from cg.services.deliver_files.file_fetcher.models import ( - DeliveryFiles, CaseFile, SampleFile, ) @@ -19,6 +18,8 @@ ) from cg.services.deliver_files.file_formatter.utils.sample_service import ( SampleFileFormatter, + FileManagingService, + SampleFileNameFormatter, ) @@ -33,12 +34,18 @@ ( "expected_moved_analysis_sample_delivery_files", "expected_formatted_analysis_sample_files", - SampleFileFormatter(), + SampleFileFormatter( + file_manager=FileManagingService(), file_name_formatter=SampleFileNameFormatter() + ), ), ( "fastq_concatenation_sample_files", "expected_concatenated_fastq_formatted_files", - SampleFileConcatenationFormatter(FastqConcatenationService()), + SampleFileConcatenationFormatter( + file_manager=FileManagingService(), + file_formatter=SampleFileNameFormatter(), + concatenation_service=FastqConcatenationService(), + ), ), ], ) From 82ad36248dc3c4bd6f6cb45c340e5c0c770f3a5b Mon Sep 17 00:00:00 2001 From: Christian Oertlin Date: Thu, 21 Nov 2024 15:59:14 +0100 Subject: [PATCH 2/5] fix test --- .../deliver_files_service_factory.py | 12 ++++++++++-- .../utils/sample_concatenation_service.py | 1 - .../delivery_fixtures/delivery_services_fixtures.py | 7 ++++++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/cg/services/deliver_files/deliver_files_service/deliver_files_service_factory.py b/cg/services/deliver_files/deliver_files_service/deliver_files_service_factory.py index 9f357abd8c..18f7ff7eed 100644 --- a/cg/services/deliver_files/deliver_files_service/deliver_files_service_factory.py +++ b/cg/services/deliver_files/deliver_files_service/deliver_files_service_factory.py @@ -49,6 +49,8 @@ ) from cg.services.deliver_files.file_formatter.utils.sample_service import ( SampleFileFormatter, + FileManagingService, + SampleFileNameFormatter, ) from cg.services.deliver_files.file_mover.service import ( DeliveryFilesMover, @@ -108,8 +110,14 @@ def _get_sample_file_formatter( ) -> SampleFileFormatter | SampleFileConcatenationFormatter: """Get the file formatter service based on the workflow.""" if workflow in [Workflow.MICROSALT]: - return SampleFileConcatenationFormatter(FastqConcatenationService()) - return SampleFileFormatter() + return SampleFileConcatenationFormatter( + file_manager=FileManagingService(), + file_formatter=SampleFileNameFormatter(), + concatenation_service=FastqConcatenationService(), + ) + return SampleFileFormatter( + file_manager=FileManagingService(), file_name_formatter=SampleFileNameFormatter() + ) @staticmethod def _validate_delivery_type(delivery_type: DataDelivery): diff --git a/cg/services/deliver_files/file_formatter/utils/sample_concatenation_service.py b/cg/services/deliver_files/file_formatter/utils/sample_concatenation_service.py index 1a9f558ee1..040941b1b6 100644 --- a/cg/services/deliver_files/file_formatter/utils/sample_concatenation_service.py +++ b/cg/services/deliver_files/file_formatter/utils/sample_concatenation_service.py @@ -9,7 +9,6 @@ from cg.services.deliver_files.file_fetcher.models import SampleFile from cg.services.deliver_files.file_formatter.models import FormattedFile from cg.services.deliver_files.file_formatter.utils.sample_service import ( - SampleFileFormatter, SampleFileNameFormatter, FileManagingService, ) diff --git a/tests/fixture_plugins/delivery_fixtures/delivery_services_fixtures.py b/tests/fixture_plugins/delivery_fixtures/delivery_services_fixtures.py index 6e55d2070e..5d81346d36 100644 --- a/tests/fixture_plugins/delivery_fixtures/delivery_services_fixtures.py +++ b/tests/fixture_plugins/delivery_fixtures/delivery_services_fixtures.py @@ -27,6 +27,8 @@ ) from cg.services.deliver_files.file_formatter.utils.sample_service import ( SampleFileFormatter, + FileManagingService, + SampleFileNameFormatter, ) from cg.store.store import Store @@ -119,5 +121,8 @@ def analysis_delivery_service_no_housekeeper_bundle( def generic_delivery_file_formatter() -> DeliveryFileFormatter: """Fixture to get an instance of GenericDeliveryFileFormatter.""" return DeliveryFileFormatter( - sample_file_formatter=SampleFileFormatter(), case_file_formatter=CaseFileFormatter() + sample_file_formatter=SampleFileFormatter( + file_manager=FileManagingService(), file_name_formatter=SampleFileNameFormatter() + ), + case_file_formatter=CaseFileFormatter(), ) From 271fb269906af8e4365ac5ed8075047f60ec4543 Mon Sep 17 00:00:00 2001 From: Christian Oertlin Date: Fri, 22 Nov 2024 08:55:24 +0100 Subject: [PATCH 3/5] address comments --- .../utils/sample_concatenation_service.py | 10 +++------- .../file_formatter/utils/sample_service.py | 16 ++++++---------- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/cg/services/deliver_files/file_formatter/utils/sample_concatenation_service.py b/cg/services/deliver_files/file_formatter/utils/sample_concatenation_service.py index 040941b1b6..c7eaea6b63 100644 --- a/cg/services/deliver_files/file_formatter/utils/sample_concatenation_service.py +++ b/cg/services/deliver_files/file_formatter/utils/sample_concatenation_service.py @@ -35,21 +35,17 @@ def format_files( ) -> list[FormattedFile]: """Format the sample files to deliver, concatenate fastq files and return the formatted files.""" sample_names: set[str] = self.file_name_formatter.get_sample_names(sample_files=moved_files) - [ + for sample_name in sample_names: self.file_manager.create_directories( base_path=ticket_dir_path, directories={sample_name} ) - for sample_name in sample_names - ] - formatted_files: list[FormattedFile] = self.file_name_formatter.generate_formatted_files( + formatted_files: list[FormattedFile] = self.file_name_formatter.format_sample_file_names( sample_files=moved_files ) - [ + for formatted_file in formatted_files: self.file_manager.rename_file( src=formatted_file.original_path, dst=formatted_file.formatted_path ) - for formatted_file in formatted_files - ] forward_paths, reverse_path = self._concatenate_fastq_files(formatted_files=formatted_files) self._replace_fastq_paths( reverse_paths=reverse_path, diff --git a/cg/services/deliver_files/file_formatter/utils/sample_service.py b/cg/services/deliver_files/file_formatter/utils/sample_service.py index 74821dc82e..8efc383d1c 100644 --- a/cg/services/deliver_files/file_formatter/utils/sample_service.py +++ b/cg/services/deliver_files/file_formatter/utils/sample_service.py @@ -24,7 +24,7 @@ def rename_file(src: Path, dst: Path) -> None: class SampleFileNameFormatter: """ - Format sample file names. Takes a list of SampleFile objects and returns a list of FormattedFile objects. + Class to format sample file names. """ @staticmethod @@ -33,9 +33,9 @@ def get_sample_names(sample_files: list[SampleFile]) -> set[str]: return {sample_file.sample_name for sample_file in sample_files} @staticmethod - def generate_formatted_files(sample_files: list[SampleFile]) -> list[FormattedFile]: + def format_sample_file_names(sample_files: list[SampleFile]) -> list[FormattedFile]: """ - Returns formatted files: + Returns formatted files with original and formatted file names: 1. Adds a folder with sample name to the path of the sample files. 2. Replaces sample id by sample name. """ @@ -70,19 +70,15 @@ def format_files( ) -> list[FormattedFile]: """Format the sample files to deliver and return the formatted files.""" sample_names: set[str] = self.file_name_formatter.get_sample_names(sample_files=moved_files) - [ + for sample_name in sample_names: self.file_manager.create_directories( base_path=ticket_dir_path, directories={sample_name} ) - for sample_name in sample_names - ] - formatted_files: list[FormattedFile] = self.file_name_formatter.generate_formatted_files( + formatted_files: list[FormattedFile] = self.file_name_formatter.format_sample_file_names( sample_files=moved_files ) - [ + for formatted_file in formatted_files: self.file_manager.rename_file( src=formatted_file.original_path, dst=formatted_file.formatted_path ) - for formatted_file in formatted_files - ] return formatted_files From 7505fd5f1c3149a046051ebc714bad5ad19c11bf Mon Sep 17 00:00:00 2001 From: Christian Oertlin Date: Mon, 25 Nov 2024 08:48:19 +0100 Subject: [PATCH 4/5] fix import --- .../deliver_files_service/deliver_files_service_factory.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cg/services/deliver_files/deliver_files_service/deliver_files_service_factory.py b/cg/services/deliver_files/deliver_files_service/deliver_files_service_factory.py index 46f1fdbffc..5b616d67ec 100644 --- a/cg/services/deliver_files/deliver_files_service/deliver_files_service_factory.py +++ b/cg/services/deliver_files/deliver_files_service/deliver_files_service_factory.py @@ -24,20 +24,17 @@ from cg.services.deliver_files.file_formatter.utils.sample_concatenation_service import ( SampleFileConcatenationFormatter, ) -<<<<<<< HEAD from cg.services.deliver_files.file_formatter.utils.sample_service import ( SampleFileFormatter, FileManagingService, SampleFileNameFormatter, -======= -from cg.services.deliver_files.file_formatter.utils.sample_service import SampleFileFormatter +) from cg.services.deliver_files.file_mover.service import DeliveryFilesMover from cg.services.deliver_files.rsync.service import DeliveryRsyncService from cg.services.deliver_files.tag_fetcher.abstract import FetchDeliveryFileTagsService from cg.services.deliver_files.tag_fetcher.bam_service import BamDeliveryTagsFetcher from cg.services.deliver_files.tag_fetcher.sample_and_case_service import ( SampleAndCaseDeliveryTagsFetcher, ->>>>>>> master ) from cg.services.fastq_concatenation_service.fastq_concatenation_service import ( FastqConcatenationService, @@ -150,6 +147,7 @@ def _get_sample_file_formatter( return SampleFileFormatter( file_manager=FileManagingService(), file_name_formatter=SampleFileNameFormatter() ) + def build_delivery_service( self, case: Case, delivery_type: DataDelivery | None = None ) -> DeliverFilesService: From 57cd880c79de7695792aeae5099d7cf78b5e82a4 Mon Sep 17 00:00:00 2001 From: Christian Oertlin Date: Mon, 25 Nov 2024 09:24:45 +0100 Subject: [PATCH 5/5] fix test scenario --- .../file_delivery/delivery_file_service/test_service_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/services/file_delivery/delivery_file_service/test_service_builder.py b/tests/services/file_delivery/delivery_file_service/test_service_builder.py index 3ecba5b6f2..1a16496a0a 100644 --- a/tests/services/file_delivery/delivery_file_service/test_service_builder.py +++ b/tests/services/file_delivery/delivery_file_service/test_service_builder.py @@ -37,7 +37,7 @@ class DeliveryServiceScenario(BaseModel): expected_tag_fetcher: type[FetchDeliveryFileTagsService] expected_file_fetcher: type[FetchDeliveryFilesService] expected_file_mover: type[DeliveryFilesMover] - expected_sample_file_formatter: type[SampleFileFormatter] + expected_sample_file_formatter: type[SampleFileFormatter | SampleFileConcatenationFormatter] store_name: str