From ca3953416fafed2dcd63957f41387d9d8e149d53 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Wed, 15 Oct 2025 14:54:30 +0200 Subject: [PATCH 1/3] add case insensitive matching --- .../simcore_service_storage/simcore_s3_dsm.py | 2 +- .../storage/tests/unit/test_simcore_s3_dsm.py | 71 +++++++++++++++++++ 2 files changed, 72 insertions(+), 1 deletion(-) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index ff80405e186..8ad4378e286 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -1010,7 +1010,7 @@ async def _search_project_s3_files( filename = Path(s3_obj.object_key).name if not ( - fnmatch.fnmatch(filename, name_pattern) + fnmatch.fnmatch(filename.lower(), name_pattern.lower()) and len(s3_obj.object_key.split("/")) >= min_parts_for_valid_s3_object ): diff --git a/services/storage/tests/unit/test_simcore_s3_dsm.py b/services/storage/tests/unit/test_simcore_s3_dsm.py index 5fbe57f4aa3..3477edef6a8 100644 --- a/services/storage/tests/unit/test_simcore_s3_dsm.py +++ b/services/storage/tests/unit/test_simcore_s3_dsm.py @@ -315,6 +315,77 @@ async def test_search_files( assert len(paginated_results) == len(test_files) +@pytest.mark.parametrize( + "location_id", + [SimcoreS3DataManager.get_location_id()], + ids=[SimcoreS3DataManager.get_location_name()], + indirect=True, +) +async def test_search_files_case_insensitive( + simcore_s3_dsm: SimcoreS3DataManager, + upload_file: Callable[..., Awaitable[tuple[Path, SimcoreS3FileID]]], + file_size: ByteSize, + user_id: UserID, + project_id: ProjectID, + faker: Faker, +): + mixed_case_files = [ + ("TestFile.TXT", "*.txt"), + ("MyDocument.PDF", "*.pdf"), + ("DataFile.CSV", "data*.csv"), + ("ConfigFile.JSON", "config*"), + ("BackupData.BAK", "*.bak"), + ("CamelCaseFile.txt", "camelcase*"), + ("XMLDataFile.xml", "*.XML"), + ("config.json", "CONFIG*"), + ] + + for file_name, _ in mixed_case_files: + checksum: SHA256Str = TypeAdapter(SHA256Str).validate_python(faker.sha256()) + await upload_file(file_size, file_name, sha256_checksum=checksum) + + # Test case-insensitive extension matching + case_insensitive_txt = await _search_files_by_pattern( + simcore_s3_dsm, user_id, "*.txt", project_id + ) + txt_file_names = {file.file_name for file in case_insensitive_txt} + assert "TestFile.TXT" in txt_file_names + assert "CamelCaseFile.txt" in txt_file_names + + # Test case-insensitive prefix matching + case_insensitive_data = await _search_files_by_pattern( + simcore_s3_dsm, user_id, "data*", project_id + ) + data_file_names = {file.file_name for file in case_insensitive_data} + assert "DataFile.CSV" in data_file_names + + # Test mixed case pattern matching + case_insensitive_config = await _search_files_by_pattern( + simcore_s3_dsm, user_id, "CONFIG*", project_id + ) + config_file_names = {file.file_name for file in case_insensitive_config} + assert "ConfigFile.JSON" in config_file_names + assert "config.json" in config_file_names + + case_insensitive_xml = await _search_files_by_pattern( + simcore_s3_dsm, user_id, "*.XML", project_id + ) + xml_file_names = {file.file_name for file in case_insensitive_xml} + assert "XMLDataFile.xml" in xml_file_names + + camelcase_results = await _search_files_by_pattern( + simcore_s3_dsm, user_id, "camelcase*", project_id + ) + assert len(camelcase_results) == 1 + assert camelcase_results[0].file_name == "CamelCaseFile.txt" + + pdf_results = await _search_files_by_pattern( + simcore_s3_dsm, user_id, "*.PDF", project_id + ) + pdf_file_names = {file.file_name for file in pdf_results} + assert "MyDocument.PDF" in pdf_file_names + + @pytest.fixture async def paths_for_export( random_project_with_files: Callable[ From 566512d40e869eedd7ffb75c17b0837f8afd2c2a Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Wed, 15 Oct 2025 14:57:54 +0200 Subject: [PATCH 2/3] change task name --- .../src/models_library/api_schemas_storage/search_async_jobs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/models-library/src/models_library/api_schemas_storage/search_async_jobs.py b/packages/models-library/src/models_library/api_schemas_storage/search_async_jobs.py index 5de7cf9d1c0..5d4a1487742 100644 --- a/packages/models-library/src/models_library/api_schemas_storage/search_async_jobs.py +++ b/packages/models-library/src/models_library/api_schemas_storage/search_async_jobs.py @@ -5,7 +5,7 @@ from pydantic import BaseModel, ByteSize, ConfigDict from pydantic.alias_generators import to_camel -SEARCH_TASK_NAME: Final[str] = "files.search" +SEARCH_TASK_NAME: Final[str] = "files_search" class SearchResultItem(BaseModel): From 60374bd91ea947474e70d68dccf362db725c9cb1 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Wed, 15 Oct 2025 15:48:32 +0200 Subject: [PATCH 3/3] move out from loop --- services/storage/src/simcore_service_storage/simcore_s3_dsm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index 8ad4378e286..4f274d1b571 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -1001,6 +1001,7 @@ async def _search_project_s3_files( min_parts_for_valid_s3_object = 2 try: + name_pattern_lower = name_pattern.lower() async for s3_objects in s3_client.list_objects_paginated( bucket=self.simcore_bucket_name, prefix=f"{proj_id}/", @@ -1010,7 +1011,7 @@ async def _search_project_s3_files( filename = Path(s3_obj.object_key).name if not ( - fnmatch.fnmatch(filename.lower(), name_pattern.lower()) + fnmatch.fnmatch(filename.lower(), name_pattern_lower) and len(s3_obj.object_key.split("/")) >= min_parts_for_valid_s3_object ):