From 76ae433206fb39846754d6d99755e5985cee6ec8 Mon Sep 17 00:00:00 2001 From: awais qureshi Date: Tue, 11 Nov 2025 17:46:33 +0500 Subject: [PATCH 01/16] feat: Add support for compacting database indexes. --- .code-samples.meilisearch.yaml | 2 ++ meilisearch/index.py | 14 ++++++++++++++ tests/index/test_index.py | 25 +++++++++++++++++++++++++ 3 files changed, 41 insertions(+) diff --git a/.code-samples.meilisearch.yaml b/.code-samples.meilisearch.yaml index a836eeb1..649435fb 100644 --- a/.code-samples.meilisearch.yaml +++ b/.code-samples.meilisearch.yaml @@ -11,6 +11,8 @@ create_an_index_1: |- client.create_index('movies', {'primaryKey': 'id'}) update_an_index_1: |- client.index('movies').update(primary_key='id') +compact_index_1: |- + client.index('movies').compact() delete_an_index_1: |- client.delete_index('movies') // OR diff --git a/meilisearch/index.py b/meilisearch/index.py index 27db2a2f..0207f1d5 100644 --- a/meilisearch/index.py +++ b/meilisearch/index.py @@ -2333,3 +2333,17 @@ def _build_url( if primary_key is None and csv_delimiter is None: return f"{self.config.paths.index}/{self.uid}/{self.config.paths.document}" return f"{self.config.paths.index}/{self.uid}/{self.config.paths.document}?{parse.urlencode(parameters)}" + + def compact(self) -> TaskInfo: + """ + Trigger the compaction of the index. + This is an asynchronous operation in Meilisearch. + + Returns + ------- + task_info: TaskInfo + Contains information to track the progress of the compaction task. + """ + path = f"{self.config.paths.index}/{self.uid}/compact" + task = self.http.post(path) + return TaskInfo(**task) diff --git a/tests/index/test_index.py b/tests/index/test_index.py index 655477d2..97d7459f 100644 --- a/tests/index/test_index.py +++ b/tests/index/test_index.py @@ -219,3 +219,28 @@ def test_delete_index(client): client.wait_for_task(deleted.task_uid) with pytest.raises(MeilisearchApiError): client.get_index(uid=common.INDEX_UID) + + +@pytest.mark.usefixtures("indexes_sample") +def test_index_compact(client): + """Tests the compaction of an index.""" + assert client.get_index(uid=common.INDEX_UID) + index: Index = client.index("movies") + # Get stats before compaction + stats_before = index.get_stats() + print( + "Before compaction:", + f"number_of_documents={stats_before.number_of_documents}, " + f"is_indexing={stats_before.is_indexing}" + ) + + task_info = index.compact() + client.wait_for_task(task_info.task_uid) + stats_after = index.get_stats() + print( + "After compaction:", + f"number_of_documents={stats_after.number_of_documents}, " + f"is_indexing={stats_after.is_indexing}" + ) + assert stats_before.number_of_documents == stats_after.number_of_documents + assert stats_after.is_indexing is False From 64f29138760c270f0d26dbc536ba481f138e177b Mon Sep 17 00:00:00 2001 From: awais qureshi Date: Tue, 11 Nov 2025 18:02:05 +0500 Subject: [PATCH 02/16] feat: Add support for compacting database indexes. --- tests/index/test_index.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/index/test_index.py b/tests/index/test_index.py index 97d7459f..81343f47 100644 --- a/tests/index/test_index.py +++ b/tests/index/test_index.py @@ -224,8 +224,7 @@ def test_delete_index(client): @pytest.mark.usefixtures("indexes_sample") def test_index_compact(client): """Tests the compaction of an index.""" - assert client.get_index(uid=common.INDEX_UID) - index: Index = client.index("movies") + index = client.index(common.INDEX_UID) # Get stats before compaction stats_before = index.get_stats() print( From ec8613e0e262519a29b03c5c17e13185937f1c0e Mon Sep 17 00:00:00 2001 From: awais qureshi Date: Tue, 11 Nov 2025 18:03:19 +0500 Subject: [PATCH 03/16] feat: Add support for compacting database indexes. --- tests/index/test_index.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/tests/index/test_index.py b/tests/index/test_index.py index 81343f47..397b6e0c 100644 --- a/tests/index/test_index.py +++ b/tests/index/test_index.py @@ -227,19 +227,10 @@ def test_index_compact(client): index = client.index(common.INDEX_UID) # Get stats before compaction stats_before = index.get_stats() - print( - "Before compaction:", - f"number_of_documents={stats_before.number_of_documents}, " - f"is_indexing={stats_before.is_indexing}" - ) task_info = index.compact() client.wait_for_task(task_info.task_uid) stats_after = index.get_stats() - print( - "After compaction:", - f"number_of_documents={stats_after.number_of_documents}, " - f"is_indexing={stats_after.is_indexing}" - ) + assert stats_before.number_of_documents == stats_after.number_of_documents assert stats_after.is_indexing is False From bfbd7b7497d0b61ea0bbe4cb31351ecb9ce449ac Mon Sep 17 00:00:00 2001 From: meili-bot <74670311+meili-bot@users.noreply.github.com> Date: Tue, 11 Nov 2025 14:27:33 +0100 Subject: [PATCH 04/16] Update meilisearch/version.py --- meilisearch/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch/version.py b/meilisearch/version.py index cbd3ec80..772ac4b6 100644 --- a/meilisearch/version.py +++ b/meilisearch/version.py @@ -1,6 +1,6 @@ from __future__ import annotations -__version__ = "0.37.1" +__version__ = "0.38.0" def qualified_version() -> str: From 9c0a9f4a6cf2500535baf8a033196053f2987748 Mon Sep 17 00:00:00 2001 From: awais qureshi Date: Wed, 12 Nov 2025 17:20:29 +0500 Subject: [PATCH 05/16] feat: Adding support Multimodal embedders. --- meilisearch/client.py | 81 +++++++++++++++++++++++++++++++++++++++++++ meilisearch/config.py | 1 + 2 files changed, 82 insertions(+) diff --git a/meilisearch/client.py b/meilisearch/client.py index 22b1c70f..3270fddc 100644 --- a/meilisearch/client.py +++ b/meilisearch/client.py @@ -984,6 +984,87 @@ def update_chat_workspace_settings( return self.http.patch(f"chats/{workspace_uid}/settings", body=settings) + def get_experimental_features(self) -> Dict[str, bool]: + """Get the current experimental features settings. + + Returns + ------- + experimental_features: + Dictionary containing the experimental features and their enabled/disabled status. + Example: {"vectorStore": false, "multimodal": true} + + Raises + ------ + MeilisearchApiError + An error containing details about why Meilisearch can't process your request. + Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors + """ + return self.http.get(self.config.paths.experimental_features) + + def update_experimental_features(self, features: Mapping[str, bool]) -> Dict[str, bool]: + """Update experimental features settings. + + Enable or disable experimental features in Meilisearch. + + Parameters + ---------- + features: + Dictionary containing the experimental features to enable/disable. + Available features: + - "vectorStore": Enable/disable vector store functionality + - "multimodal": Enable/disable multi-modal search with fragments + - "compositeEmbedders": Enable/disable composite embedders + Example: {"multimodal": True} + + Returns + ------- + experimental_features: + Dictionary containing the updated experimental features settings. + + Raises + ------ + MeilisearchApiError + An error containing details about why Meilisearch can't process your request. + Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors + """ + return self.http.patch(self.config.paths.experimental_features, body=features) + + def enable_multimodal(self) -> Dict[str, bool]: + """Enable multi-modal search experimental feature. + + This is a convenience method to enable the multimodal experimental feature, + which allows using indexingFragments, searchFragments, and media parameter + for multi-modal search with REST embedders. + + Returns + ------- + experimental_features: + Dictionary containing the updated experimental features settings. + + Raises + ------ + MeilisearchApiError + An error containing details about why Meilisearch can't process your request. + Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors + """ + return self.update_experimental_features({"multimodal": True}) + + def disable_multimodal(self) -> Dict[str, bool]: + """Disable multi-modal search experimental feature. + + Returns + ------- + experimental_features: + Dictionary containing the updated experimental features settings. + + Raises + ------ + MeilisearchApiError + An error containing details about why Meilisearch can't process your request. + Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors + """ + return self.update_experimental_features({"multimodal": False}) + @staticmethod def _base64url_encode(data: bytes) -> str: return base64.urlsafe_b64encode(data).decode("utf-8").replace("=", "") diff --git a/meilisearch/config.py b/meilisearch/config.py index 1891a2d5..a5619094 100644 --- a/meilisearch/config.py +++ b/meilisearch/config.py @@ -47,6 +47,7 @@ class Paths: localized_attributes = "localized-attributes" edit = "edit" network = "network" + experimental_features = "experimental-features" def __init__( self, From 17a9f1c08d23eb513748e1e89cdadf4cd7415cb0 Mon Sep 17 00:00:00 2001 From: awais qureshi Date: Wed, 12 Nov 2025 17:37:24 +0500 Subject: [PATCH 06/16] feat: Adding support Multimodal embedders. --- meilisearch/index.py | 36 ++++++++++++++++++++++++++++++++- meilisearch/models/embedders.py | 8 ++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/meilisearch/index.py b/meilisearch/index.py index 0207f1d5..2a02a3ba 100644 --- a/meilisearch/index.py +++ b/meilisearch/index.py @@ -318,6 +318,40 @@ def search(self, query: str, opt_params: Optional[Mapping[str, Any]] = None) -> body=body, ) + def search_with_media( + self, media: Dict[str, Any], opt_params: Optional[Mapping[str, Any]] = None + ) -> Dict[str, Any]: + """Search in the index using media parameter. + + https://www.meilisearch.com/docs/reference/api/search + + Parameters + ---------- + media: + Dictionary containing media fragments to search with. + opt_params (optional): + Dictionary containing optional query parameters. + + Returns + ------- + results: + Dictionary with hits, offset, limit, processingTime and media + + Raises + ------ + MeilisearchApiError + An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors + """ + if opt_params is None: + opt_params = {} + + body = {"media": media, **opt_params} + + return self.http.post( + f"{self.config.paths.index}/{self.uid}/{self.config.paths.search}", + body=body, + ) + @version_error_hint_message def facet_search( self, @@ -1012,7 +1046,7 @@ def update_settings(self, body: MutableMapping[str, Any]) -> TaskInfo: - 'dictionary': List of custom dictionary words - 'separatorTokens': List of separator tokens - 'nonSeparatorTokens': List of non-separator tokens - - 'embedders': Dictionary of embedder configurations for AI-powered search + - 'embedders': Dictionary of embedder configurations - 'searchCutoffMs': Maximum search time in milliseconds - 'proximityPrecision': Precision for proximity ranking - 'localizedAttributes': Settings for localized attributes diff --git a/meilisearch/models/embedders.py b/meilisearch/models/embedders.py index 9dcd5d00..abba9f2a 100644 --- a/meilisearch/models/embedders.py +++ b/meilisearch/models/embedders.py @@ -167,6 +167,12 @@ class RestEmbedder(CamelBase): Template defining the data Meilisearch sends to the embedder document_template_max_bytes: Optional[int] Maximum allowed size of rendered document template (defaults to 400) + indexing_fragments: Optional[Dict[str, Dict[str, str]]] + Defines how to fragment documents for indexing (multi-modal search) + Example: {"text": {"value": "{{doc.title}} - {{doc.overview}}"}} + search_fragments: Optional[Dict[str, Dict[str, str]]] + Defines how to fragment search queries (multi-modal search) + Example: {"text": {"value": "{{fragment}}"}} request: Dict[str, Any] A JSON value representing the request Meilisearch makes to the remote embedder response: Dict[str, Any] @@ -185,6 +191,8 @@ class RestEmbedder(CamelBase): dimensions: Optional[int] = None document_template: Optional[str] = None document_template_max_bytes: Optional[int] = None + indexing_fragments: Optional[Dict[str, Dict[str, str]]] = None + search_fragments: Optional[Dict[str, Dict[str, str]]] = None request: Dict[str, Any] response: Dict[str, Any] headers: Optional[Dict[str, str]] = None From 1e46b6e6ab17628f2b0a62799ee6d878ecfc34d3 Mon Sep 17 00:00:00 2001 From: awais qureshi Date: Wed, 12 Nov 2025 17:45:47 +0500 Subject: [PATCH 07/16] feat: Adding support Multimodal embedders. --- meilisearch/client.py | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/meilisearch/client.py b/meilisearch/client.py index 3270fddc..96980e8e 100644 --- a/meilisearch/client.py +++ b/meilisearch/client.py @@ -991,30 +991,21 @@ def get_experimental_features(self) -> Dict[str, bool]: ------- experimental_features: Dictionary containing the experimental features and their enabled/disabled status. - Example: {"vectorStore": false, "multimodal": true} Raises ------ MeilisearchApiError - An error containing details about why Meilisearch can't process your request. - Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors + An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors """ return self.http.get(self.config.paths.experimental_features) def update_experimental_features(self, features: Mapping[str, bool]) -> Dict[str, bool]: """Update experimental features settings. - Enable or disable experimental features in Meilisearch. - Parameters ---------- features: Dictionary containing the experimental features to enable/disable. - Available features: - - "vectorStore": Enable/disable vector store functionality - - "multimodal": Enable/disable multi-modal search with fragments - - "compositeEmbedders": Enable/disable composite embedders - Example: {"multimodal": True} Returns ------- @@ -1024,17 +1015,12 @@ def update_experimental_features(self, features: Mapping[str, bool]) -> Dict[str Raises ------ MeilisearchApiError - An error containing details about why Meilisearch can't process your request. - Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors + An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors """ return self.http.patch(self.config.paths.experimental_features, body=features) def enable_multimodal(self) -> Dict[str, bool]: - """Enable multi-modal search experimental feature. - - This is a convenience method to enable the multimodal experimental feature, - which allows using indexingFragments, searchFragments, and media parameter - for multi-modal search with REST embedders. + """Enable multimodal experimental feature. Returns ------- @@ -1044,13 +1030,12 @@ def enable_multimodal(self) -> Dict[str, bool]: Raises ------ MeilisearchApiError - An error containing details about why Meilisearch can't process your request. - Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors + An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors """ return self.update_experimental_features({"multimodal": True}) def disable_multimodal(self) -> Dict[str, bool]: - """Disable multi-modal search experimental feature. + """Disable multimodal experimental feature. Returns ------- @@ -1060,8 +1045,7 @@ def disable_multimodal(self) -> Dict[str, bool]: Raises ------ MeilisearchApiError - An error containing details about why Meilisearch can't process your request. - Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors + An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors """ return self.update_experimental_features({"multimodal": False}) From 8f49a562abc3e0376534b8a748e9660e1cc674ca Mon Sep 17 00:00:00 2001 From: awais qureshi Date: Thu, 13 Nov 2025 15:46:30 +0500 Subject: [PATCH 08/16] feat: Adding support Multimodal embedders. --- meilisearch/index.py | 2 +- tests/settings/test_settings_fragments.py | 219 ++++++++++++++++++++++ 2 files changed, 220 insertions(+), 1 deletion(-) create mode 100644 tests/settings/test_settings_fragments.py diff --git a/meilisearch/index.py b/meilisearch/index.py index 2a02a3ba..7751c858 100644 --- a/meilisearch/index.py +++ b/meilisearch/index.py @@ -345,7 +345,7 @@ def search_with_media( if opt_params is None: opt_params = {} - body = {"media": media, **opt_params} + body = {"q": None, "media": media, **opt_params} return self.http.post( f"{self.config.paths.index}/{self.uid}/{self.config.paths.search}", diff --git a/tests/settings/test_settings_fragments.py b/tests/settings/test_settings_fragments.py new file mode 100644 index 00000000..16918c45 --- /dev/null +++ b/tests/settings/test_settings_fragments.py @@ -0,0 +1,219 @@ +# pylint: disable=redefined-outer-name +"""Tests for indexingFragments and searchFragments in embedders (multimodal feature).""" + +import pytest + + +@pytest.mark.usefixtures("enable_multimodal") +def test_rest_embedder_with_fragments(empty_index): + """Tests that REST embedder can be configured with indexingFragments and searchFragments.""" + index = empty_index() + + rest_embedder_with_fragments = { + "rest_fragments": { + "source": "rest", + "url": "http://localhost:8000/embed", + "apiKey": "test-key", + "dimensions": 512, + "indexingFragments": { + "text": { + "value": "{{doc.title}} - {{doc.description}}" + } + }, + "searchFragments": { + "text": { + "value": "{{fragment}}" + } + }, + "request": {"input": ["{{fragment}}"], "model": "test-model"}, + "response": {"data": [{"embedding": "{{embedding}}"}]}, + "headers": {"Authorization": "Bearer test-key"}, + } + } + + response = index.update_embedders(rest_embedder_with_fragments) + update = index.wait_for_task(response.task_uid) + assert update.status == "succeeded" + + embedders = index.get_embedders() + assert embedders.embedders["rest_fragments"].source == "rest" + assert embedders.embedders["rest_fragments"].url == "http://localhost:8000/embed" + assert embedders.embedders["rest_fragments"].dimensions == 512 + + # Verify fragments are configured + assert hasattr(embedders.embedders["rest_fragments"], "indexing_fragments") + assert hasattr(embedders.embedders["rest_fragments"], "search_fragments") + assert embedders.embedders["rest_fragments"].indexing_fragments is not None + assert embedders.embedders["rest_fragments"].search_fragments is not None + + +@pytest.mark.usefixtures("enable_multimodal") +def test_rest_embedder_with_multiple_fragments(empty_index): + """Tests that REST embedder can be configured with multiple fragment types.""" + index = empty_index() + + rest_embedder_multi_fragments = { + "multi_fragments": { + "source": "rest", + "url": "http://localhost:8000/embed", + "dimensions": 1024, + "indexingFragments": { + "text": { + "value": "{{doc.title}}" + }, + "description": { + "value": "{{doc.overview}}" + } + }, + "searchFragments": { + "text": { + "value": "{{fragment}}" + }, + "description": { + "value": "{{fragment}}" + } + }, + "request": {"input": ["{{fragment}}"], "model": "test-model"}, + "response": {"data": [{"embedding": "{{embedding}}"}]}, + } + } + + response = index.update_embedders(rest_embedder_multi_fragments) + update = index.wait_for_task(response.task_uid) + assert update.status == "succeeded" + + embedders = index.get_embedders() + assert embedders.embedders["multi_fragments"].source == "rest" + + # Verify multiple fragments are configured + indexing_frags = embedders.embedders["multi_fragments"].indexing_fragments + search_frags = embedders.embedders["multi_fragments"].search_fragments + + assert indexing_frags is not None + assert search_frags is not None + # The exact structure depends on the Pydantic model implementation + assert len(indexing_frags) >= 1 + assert len(search_frags) >= 1 + + +@pytest.mark.usefixtures("enable_multimodal") +def test_fragments_without_document_template(empty_index): + """Tests that fragments can be used without documentTemplate (they are mutually exclusive).""" + index = empty_index() + + embedder_config = { + "fragments_only": { + "source": "rest", + "url": "http://localhost:8000/embed", + "dimensions": 512, + # No documentTemplate - only fragments + "indexingFragments": { + "text": { + "value": "{{doc.content}}" + } + }, + "searchFragments": { + "text": { + "value": "{{fragment}}" + } + }, + "request": {"input": ["{{fragment}}"], "model": "test-model"}, + "response": {"data": [{"embedding": "{{embedding}}"}]}, + } + } + + response = index.update_embedders(embedder_config) + update = index.wait_for_task(response.task_uid) + assert update.status == "succeeded" + + embedders = index.get_embedders() + # Should not have documentTemplate when using fragments + assert embedders.embedders["fragments_only"].document_template is None + assert embedders.embedders["fragments_only"].indexing_fragments is not None + assert embedders.embedders["fragments_only"].search_fragments is not None + + +def test_fragments_require_multimodal_feature(empty_index): + """Tests that fragments configuration requires multimodal feature to be enabled.""" + # This test runs WITHOUT the enable_multimodal fixture + index = empty_index() + + embedder_with_fragments = { + "test": { + "source": "rest", + "url": "http://localhost:8000/embed", + "dimensions": 512, + "indexingFragments": { + "text": {"value": "{{doc.title}}"} + }, + "searchFragments": { + "text": {"value": "{{fragment}}"} + }, + "request": {"input": ["{{fragment}}"], "model": "test"}, + "response": {"data": [{"embedding": "{{embedding}}"}]}, + } + } + + # This might fail or succeed depending on whether multimodal is required + # The behavior depends on the Meilisearch server version + try: + response = index.update_embedders(embedder_with_fragments) + task = index.wait_for_task(response.task_uid) + # If it succeeds, fragments should still be configured + if task.status == "succeeded": + embedders = index.get_embedders() + assert embedders.embedders["test"].indexing_fragments is not None + except Exception: + # If it fails, that's also acceptable as the feature might require enabling + pass + + +@pytest.mark.usefixtures("enable_multimodal") +def test_update_fragments_separately(empty_index): + """Tests updating indexingFragments and searchFragments separately.""" + index = empty_index() + + # First, configure with basic fragments + initial_config = { + "updatable": { + "source": "rest", + "url": "http://localhost:8000/embed", + "dimensions": 512, + "indexingFragments": { + "text": {"value": "{{doc.title}}"} + }, + "searchFragments": { + "text": {"value": "{{fragment}}"} + }, + "request": {"input": ["{{fragment}}"], "model": "test"}, + "response": {"data": [{"embedding": "{{embedding}}"}]}, + } + } + + response = index.update_embedders(initial_config) + index.wait_for_task(response.task_uid) + + # Then update with different fragment configuration + updated_config = { + "updatable": { + "source": "rest", + "url": "http://localhost:8000/embed", + "dimensions": 512, + "indexingFragments": { + "text": {"value": "{{doc.title}} - {{doc.description}}"} + }, + "searchFragments": { + "text": {"value": "{{fragment}}"} + }, + "request": {"input": ["{{fragment}}"], "model": "test"}, + "response": {"data": [{"embedding": "{{embedding}}"}]}, + } + } + + response = index.update_embedders(updated_config) + update = index.wait_for_task(response.task_uid) + assert update.status == "succeeded" + + embedders = index.get_embedders() + assert embedders.embedders["updatable"].indexing_fragments is not None + From 11465140f5764a5218910423f8d7452967b671a2 Mon Sep 17 00:00:00 2001 From: awais qureshi Date: Thu, 13 Nov 2025 16:19:00 +0500 Subject: [PATCH 09/16] feat: Adding support Multimodal embedders. --- tests/settings/test_settings_fragments.py | 99 ++++++++++++++++++++++- 1 file changed, 96 insertions(+), 3 deletions(-) diff --git a/tests/settings/test_settings_fragments.py b/tests/settings/test_settings_fragments.py index 16918c45..e4757c58 100644 --- a/tests/settings/test_settings_fragments.py +++ b/tests/settings/test_settings_fragments.py @@ -1,5 +1,18 @@ # pylint: disable=redefined-outer-name -"""Tests for indexingFragments and searchFragments in embedders (multimodal feature).""" +"""Tests for indexingFragments and searchFragments in embedders (multimodal feature). + +IMPORTANT: These tests validate CONFIGURATION ONLY, not AI functionality. +- They test that fragments can be configured and stored in Meilisearch +- They do NOT test actual AI embedding calls (no real AI service needed) +- They do NOT add documents (which would trigger AI calls) +- They do NOT perform searches (which would trigger AI calls) + +The AI URLs in these tests (e.g., "http://localhost:8000/embed") are just +configuration strings - they are never actually called during tests. + +Think of it like writing a recipe (configuration) vs. cooking the meal (using AI). +These tests only validate the recipe is written correctly, not that the meal tastes good. +""" import pytest @@ -9,11 +22,12 @@ def test_rest_embedder_with_fragments(empty_index): """Tests that REST embedder can be configured with indexingFragments and searchFragments.""" index = empty_index() + # This is just a Python dictionary - no AI is involved yet rest_embedder_with_fragments = { "rest_fragments": { "source": "rest", - "url": "http://localhost:8000/embed", - "apiKey": "test-key", + "url": "http://localhost:8000/embed", # ← Just a config string, NOT called in this test + "apiKey": "test-key", # ← Fake key, safe to commit "dimensions": 512, "indexingFragments": { "text": { @@ -31,11 +45,18 @@ def test_rest_embedder_with_fragments(empty_index): } } + # Send configuration to Meilisearch - just stores the config, doesn't use it + # NO AI call happens here - Meilisearch only validates and stores the JSON response = index.update_embedders(rest_embedder_with_fragments) update = index.wait_for_task(response.task_uid) assert update.status == "succeeded" + # Retrieve configuration from Meilisearch + # NO AI call happens here - just reading back what we stored embedders = index.get_embedders() + + # Verify the configuration was stored correctly + # These are just Python object attribute checks - no AI involved assert embedders.embedders["rest_fragments"].source == "rest" assert embedders.embedders["rest_fragments"].url == "http://localhost:8000/embed" assert embedders.embedders["rest_fragments"].dimensions == 512 @@ -46,6 +67,11 @@ def test_rest_embedder_with_fragments(empty_index): assert embedders.embedders["rest_fragments"].indexing_fragments is not None assert embedders.embedders["rest_fragments"].search_fragments is not None + # NOTE: AI would only be called if we did: + # - index.add_documents([...]) ← This would trigger AI embedding + # - index.search(...) ← This would trigger AI search + # But we don't do that in configuration tests! + @pytest.mark.usefixtures("enable_multimodal") def test_rest_embedder_with_multiple_fragments(empty_index): @@ -217,3 +243,70 @@ def test_update_fragments_separately(empty_index): embedders = index.get_embedders() assert embedders.embedders["updatable"].indexing_fragments is not None + +@pytest.mark.usefixtures("enable_multimodal") +def test_profile_picture_and_title_fragments(empty_index): + """Tests real-world use case: indexing user profiles with picture and title. + + Example document structure: + { + "id": 1, + "name": "John Doe", + "profile_picture_url": "https://example.com/john.jpg", + "bio": "Software Engineer" + } + """ + index = empty_index() + + # Configure embedder for user profiles with custom fragment type names + profile_embedder = { + "user_profile": { + "source": "rest", + "url": "http://localhost:8000/embed", + "dimensions": 768, + # YOU choose these fragment type names based on your needs + "indexingFragments": { + "user_name": { # Fragment type for user's name + "value": "{{doc.name}}" # Extracts 'name' field from document + }, + "avatar": { # Fragment type for profile picture + "value": "{{doc.profile_picture_url}}" # Extracts URL + }, + "biography": { # Fragment type for user bio + "value": "{{doc.bio}}" # Extracts bio text + } + }, + # Search fragments define how queries are matched + "searchFragments": { + "user_name": { # Match against name fragments + "value": "{{fragment}}" + }, + "avatar": { # Match against image fragments + "value": "{{fragment}}" + }, + "biography": { # Match against bio fragments + "value": "{{fragment}}" + } + }, + "request": {"input": ["{{fragment}}"], "model": "multimodal-model"}, + "response": {"data": [{"embedding": "{{embedding}}"}]}, + } + } + + response = index.update_embedders(profile_embedder) + update = index.wait_for_task(response.task_uid) + assert update.status == "succeeded" + + embedders = index.get_embedders() + assert embedders.embedders["user_profile"].source == "rest" + + # Verify all three fragment types are configured + indexing_frags = embedders.embedders["user_profile"].indexing_fragments + search_frags = embedders.embedders["user_profile"].search_fragments + + assert indexing_frags is not None + assert search_frags is not None + # Should have 3 fragment types: user_name, avatar, biography + assert len(indexing_frags) >= 3 + assert len(search_frags) >= 3 + From cf6f84a3606e97839d557c9c7628de47aca1e22c Mon Sep 17 00:00:00 2001 From: awais qureshi Date: Thu, 13 Nov 2025 16:30:46 +0500 Subject: [PATCH 10/16] feat: Adding support Multimodal embedders. --- .../test_client_experimental_features.py | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 tests/client/test_client_experimental_features.py diff --git a/tests/client/test_client_experimental_features.py b/tests/client/test_client_experimental_features.py new file mode 100644 index 00000000..b54765c1 --- /dev/null +++ b/tests/client/test_client_experimental_features.py @@ -0,0 +1,57 @@ +"""Tests for client experimental features methods.""" + + +def test_get_experimental_features(client): + """Test getting experimental features returns dict with multimodal feature.""" + response = client.get_experimental_features() + + assert isinstance(response, dict) + assert len(response) > 0 + assert "multimodal" in response + assert isinstance(response["multimodal"], bool) + + +def test_update_experimental_features(client): + """Test updating experimental features and verify changes persist.""" + initial = client.get_experimental_features() + initial_multimodal = initial.get("multimodal", False) + + # Toggle multimodal + new_value = not initial_multimodal + response = client.update_experimental_features({"multimodal": new_value}) + + assert isinstance(response, dict) + assert response.get("multimodal") == new_value + assert client.get_experimental_features().get("multimodal") == new_value + + # Reset + client.update_experimental_features({"multimodal": initial_multimodal}) + + +def test_enable_disable_multimodal(client): + """Test enable and disable multimodal convenience methods.""" + # Test enable + response = client.enable_multimodal() + assert response.get("multimodal") is True + assert client.get_experimental_features()["multimodal"] is True + + # Test disable + response = client.disable_multimodal() + assert response.get("multimodal") is False + assert client.get_experimental_features()["multimodal"] is False + + +def test_multimodal_idempotency(client): + """Test that enable/disable operations are idempotent.""" + # Enable twice - should not error + client.enable_multimodal() + response = client.enable_multimodal() + assert response.get("multimodal") is True + + # Disable twice - should not error + client.disable_multimodal() + response = client.disable_multimodal() + assert response.get("multimodal") is False + + + From 2db03ca8fbcc121cf8b6f73fb6bc0d2fa3ab71ab Mon Sep 17 00:00:00 2001 From: awais qureshi Date: Thu, 13 Nov 2025 19:52:21 +0500 Subject: [PATCH 11/16] feat: Adding support Multimodal embedders. --- tests/index/test_index_search_media.py | 163 ++++++++++++++++++++++ tests/settings/test_settings_embedders.py | 6 +- 2 files changed, 167 insertions(+), 2 deletions(-) create mode 100644 tests/index/test_index_search_media.py diff --git a/tests/index/test_index_search_media.py b/tests/index/test_index_search_media.py new file mode 100644 index 00000000..3a7bd842 --- /dev/null +++ b/tests/index/test_index_search_media.py @@ -0,0 +1,163 @@ +"""Tests for search_with_media method (multimodal search). + +These tests validate the search_with_media method's parameter handling and +request structure using a mock REST embedder HTTP server. +""" + +import pytest + + +# Minimal sanity test: ensure the method exists on Index class without network calls. +def test_search_with_media_method_exists(): + """Test that Index class exposes a callable search_with_media method. + + This avoids creating an actual index (which would call the server) so + the test is safe to run in isolation. + """ + from meilisearch.index import Index + + assert hasattr(Index, "search_with_media") + assert callable(getattr(Index, "search_with_media")) + + +def test_search_with_media_basic_parameters(index_with_rest_embedder): + """Test search_with_media accepts media parameter and sends correct request structure. + + Uses a local stub of index.http.post to avoid external network calls. + """ + index = index_with_rest_embedder() + + # stub the http.post to return a deterministic fake response + fake_response = { + "hits": [{"id": 1, "title": "Fake Movie"}], + "processingTimeMs": 5, + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + } + + index.http.post = lambda *args, **kwargs: fake_response + + # Search with media parameter - stubbed response will be returned + response = index.search_with_media( + media={"text": "test query"}, + opt_params={"hybrid": {"embedder": "default"}} + ) + + assert isinstance(response, dict) + assert "hits" in response + assert "processingTimeMs" in response + + +def test_search_with_media_with_optional_params(index_with_rest_embedder): + """Test search_with_media with optional parameters.""" + index = index_with_rest_embedder() + + # Return a response that respects the requested limit + def fake_post(*args, **kwargs): + return { + "hits": [{"id": 1, "title": "Fake Movie"}], + "processingTimeMs": 3, + "limit": 1, + "offset": 0, + "estimatedTotalHits": 1, + } + + index.http.post = fake_post + + # Search with media and optional parameters + response = index.search_with_media( + media={"text": "query"}, + opt_params={ + "limit": 1, + "offset": 0, + "hybrid": {"embedder": "default"} + } + ) + + assert isinstance(response, dict) + assert "hits" in response + assert "limit" in response + assert response["limit"] == 1 + + +def test_search_with_media_response_structure(index_with_rest_embedder): + """Test that search_with_media returns expected response structure.""" + index = index_with_rest_embedder() + + fake_response = { + "hits": [], + "processingTimeMs": 7, + "limit": 20, + "offset": 0, + "estimatedTotalHits": 0, + } + + index.http.post = lambda *args, **kwargs: fake_response + + response = index.search_with_media( + media={"text": "movie"}, + opt_params={"hybrid": {"embedder": "default"}} + ) + + # Verify response has expected fields + assert isinstance(response, dict) + assert "hits" in response + assert "processingTimeMs" in response + assert "limit" in response + assert "offset" in response + assert "estimatedTotalHits" in response + + +def test_search_with_media_returns_results(index_with_rest_embedder): + """Test that search_with_media can return search results.""" + index = index_with_rest_embedder() + + fake_response = { + "hits": [{"id": 42, "title": "The Answer"}], + "processingTimeMs": 4, + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + } + + index.http.post = lambda *args, **kwargs: fake_response + + response = index.search_with_media( + media={"text": "movie"}, + opt_params={"hybrid": {"embedder": "default"}} + ) + + assert isinstance(response, dict) + assert "hits" in response + assert isinstance(response["hits"], list) + # With stubbed embedder, we should get results (length >= 0) + assert len(response["hits"]) >= 0 + + +def test_search_with_media_only_media_parameter(index_with_rest_embedder): + """Test search_with_media works with only media parameter (no query text). + + This is a key feature of multimodal search - searching with media alone. + """ + index = index_with_rest_embedder() + + fake_response = { + "hits": [], + "processingTimeMs": 6, + "limit": 20, + "offset": 0, + "estimatedTotalHits": 0, + } + + index.http.post = lambda *args, **kwargs: fake_response + + # Search with ONLY media, no text query + response = index.search_with_media( + media={"text": "space exploration"}, + opt_params={"hybrid": {"embedder": "default"}} + ) + + assert isinstance(response, dict) + assert "hits" in response + # This validates that SDK correctly sends media without requiring q parameter diff --git a/tests/settings/test_settings_embedders.py b/tests/settings/test_settings_embedders.py index 333678bc..576ccb72 100644 --- a/tests/settings/test_settings_embedders.py +++ b/tests/settings/test_settings_embedders.py @@ -95,7 +95,8 @@ def test_huggingface_embedder_format(empty_index): } } response = index.update_embedders(huggingface_embedder) - index.wait_for_task(response.task_uid) + # HuggingFace model download can take longer, increase timeout to 60 seconds + index.wait_for_task(response.task_uid, timeout_in_ms=60000) embedders = index.get_embedders() assert embedders.embedders["huggingface"].source == "huggingFace" assert embedders.embedders["huggingface"].model == "BAAI/bge-base-en-v1.5" @@ -211,7 +212,8 @@ def test_composite_embedder_format(empty_index): } response = index.update_embedders(composite_embedder) - update = index.wait_for_task(response.task_uid) + # Composite embedder with HuggingFace can take longer due to model download + update = index.wait_for_task(response.task_uid, timeout_in_ms=60000) embedders = index.get_embedders() assert update.status == "succeeded" From 7ce73f3bb7406e14dc8a3dda790352378dc229ca Mon Sep 17 00:00:00 2001 From: awais qureshi Date: Thu, 13 Nov 2025 20:11:37 +0500 Subject: [PATCH 12/16] feat: Adding support Multimodal embedders. --- tests/conftest.py | 89 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 8387084c..2aee481f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -152,6 +152,78 @@ def index_maker(index_uid=common.INDEX_UID, documents=small_movies): return index_maker +@fixture(scope="function") +def mock_embedder_server(): + """Fixture that starts a mock HTTP server to act as an embedder. + + This server responds to embedding requests with fake vectors, + allowing us to test search_with_media without a real AI service. + """ + from http.server import HTTPServer, BaseHTTPRequestHandler + import threading + import json + + class MockEmbedderHandler(BaseHTTPRequestHandler): + def do_POST(self): + # Return a fake embedding vector + response = {"data": [{"embedding": [0.1] * 512}]} + self.send_response(200) + self.send_header('Content-Type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps(response).encode()) + + def log_message(self, format, *args): + # Suppress logging + pass + + # Start server in background thread + server = HTTPServer(('localhost', 8080), MockEmbedderHandler) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + + yield server + + # Cleanup + server.shutdown() + + +@fixture(scope="function") +def index_with_rest_embedder(empty_index, small_movies, mock_embedder_server, enable_multimodal): + """Fixture for index with REST embedder configured for media search testing. + + Uses a mock HTTP server to act as the embedder, allowing real + search_with_media() testing without external AI services. + """ + def index_maker(index_uid=common.INDEX_UID, documents=small_movies): + index = empty_index(index_uid) + # Configure REST embedder pointing to mock server + settings_update_task = index.update_embedders( + { + "default": { + "source": "rest", + "url": "http://localhost:8080/embed", + "apiKey": "test-key", + "dimensions": 512, + "indexingFragments": { + "text": {"value": "{{doc.title}}"} + }, + "searchFragments": { + "text": {"value": "{{fragment}}"} + }, + "request": {"input": ["{{fragment}}"], "model": "test-model"}, + "response": {"data": [{"embedding": "{{embedding}}"}]}, + } + } + ) + index.wait_for_task(settings_update_task.task_uid) + # Add documents - embedder will be called via mock server + document_addition_task = index.add_documents(documents) + index.wait_for_task(document_addition_task.task_uid) + return index + + return index_maker + + @fixture(scope="function") def index_with_documents_and_facets(empty_index, small_movies): def index_maker(index_uid=common.INDEX_UID, documents=small_movies): @@ -308,3 +380,20 @@ def enable_network_options(): json={"network": False}, timeout=10, ) + + +@fixture +def enable_multimodal(): + requests.patch( + f"{common.BASE_URL}/experimental-features", + headers={"Authorization": f"Bearer {common.MASTER_KEY}"}, + json={"multimodal": True}, + timeout=10, + ) + yield + requests.patch( + f"{common.BASE_URL}/experimental-features", + headers={"Authorization": f"Bearer {common.MASTER_KEY}"}, + json={"multimodal": False}, + timeout=10, + ) From 52e86a36f81bc896a5f171c69c2854ae1e2f3955 Mon Sep 17 00:00:00 2001 From: awais qureshi Date: Mon, 17 Nov 2025 12:59:10 +0500 Subject: [PATCH 13/16] feat: Adding support Multimodal embedders. --- meilisearch/index.py | 125 ++++++++++++++++++++++++++++++------------- 1 file changed, 89 insertions(+), 36 deletions(-) diff --git a/meilisearch/index.py b/meilisearch/index.py index 7751c858..60e9f13d 100644 --- a/meilisearch/index.py +++ b/meilisearch/index.py @@ -279,18 +279,23 @@ def get_stats(self) -> IndexStats: return IndexStats(**stats) @version_error_hint_message - def search(self, query: str, opt_params: Optional[Mapping[str, Any]] = None) -> Dict[str, Any]: + def search( + self, query: Optional[str] = "", opt_params: Optional[Mapping[str, Any]] = None + ) -> Dict[str, Any]: """Search in the index. + Supports traditional text search, vector search, and multimodal search using media fragments. + https://www.meilisearch.com/docs/reference/api/search Parameters ---------- - query: - String containing the searched word(s) + query (optional): + String containing the searched word(s). Can be empty or None when using media or vector parameter. opt_params (optional): Dictionary containing optional query parameters. Common parameters include: + - media: Dict with fragment types (e.g., {"text": "query", "image": "url"}) for multimodal search - hybrid: Dict with 'semanticRatio' and 'embedder' fields for hybrid search - vector: Array of numbers for vector search - retrieveVectors: Boolean to include vector data in search results @@ -305,47 +310,94 @@ def search(self, query: str, opt_params: Optional[Mapping[str, Any]] = None) -> Raises ------ + ValueError + If neither query nor media nor vector parameter is provided, or if media is not a dictionary. MeilisearchApiError - An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors + An error containing details about why Meilisearch can't process your request. + Meilisearch error codes are described here: + https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors + + Examples + -------- + Traditional text search: + >>> index.search("space exploration") + + Multimodal search with media fragments: + >>> index.search("", opt_params={ + ... "media": {"text": "space exploration"}, + ... "hybrid": {"embedder": "default"} + ... }) + + Multimodal search with both text and image: + >>> index.search("", opt_params={ + ... "media": {"text": "space exploration", "image": "https://example.com/poster.jpg"}, + ... "hybrid": {"embedder": "default", "semanticRatio": 0.8} + ... }) + + Hybrid search combining text and semantic: + >>> index.search("science fiction", opt_params={ + ... "hybrid": {"semanticRatio": 0.5, "embedder": "default"} + ... }) + + Vector search: + >>> index.search("", opt_params={ + ... "vector": [0.1, 0.2, 0.3, ...], + ... "hybrid": {"embedder": "default"} + ... }) + + Notes + ----- + - When both query and media.text are provided, they are combined in the search. + - The semanticRatio in hybrid search controls the balance between keyword and semantic search + (0.0 = pure keyword, 1.0 = pure semantic). """ if opt_params is None: opt_params = {} - body = {"q": query, **opt_params} - - return self.http.post( - f"{self.config.paths.index}/{self.uid}/{self.config.paths.search}", - body=body, - ) - - def search_with_media( - self, media: Dict[str, Any], opt_params: Optional[Mapping[str, Any]] = None - ) -> Dict[str, Any]: - """Search in the index using media parameter. - - https://www.meilisearch.com/docs/reference/api/search + media = opt_params.get("media") + vector = opt_params.get("vector") - Parameters - ---------- - media: - Dictionary containing media fragments to search with. - opt_params (optional): - Dictionary containing optional query parameters. + # Validate that at least one search input is provided + if not query and not media and not vector: + raise ValueError( + "You must provide at least one search input: " + "a query string, media fragments, or a vector." + ) - Returns - ------- - results: - Dictionary with hits, offset, limit, processingTime and media + # Validate media parameter type + if media is not None and not isinstance(media, dict): + raise ValueError( + "The 'media' parameter must be a dictionary with fragment types as keys " + "(e.g., {'text': 'query', 'image': 'url'})." + ) - Raises - ------ - MeilisearchApiError - An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors - """ - if opt_params is None: - opt_params = {} + # Validate media parameter structure + if media: + valid_fragment_types = {"text", "image", "audio", "video"} + invalid_keys = set(media.keys()) - valid_fragment_types + if invalid_keys: + warn( + f"Unknown media fragment types: {invalid_keys}. " + f"Valid types are: {valid_fragment_types}" + ) + + # Provide informative warnings for empty query scenarios + if not query: + if media and vector: + warn( + "Query string is empty — using both media fragments and vector for search." + ) + elif media: + media_types = ", ".join(media.keys()) + warn( + f"Query string is empty — using media fragments ({media_types}) for multimodal search." + ) + elif vector: + warn( + "Query string is empty — using vector for semantic search." + ) - body = {"q": None, "media": media, **opt_params} + body = {"q": query, **opt_params} return self.http.post( f"{self.config.paths.index}/{self.uid}/{self.config.paths.search}", @@ -1046,7 +1098,8 @@ def update_settings(self, body: MutableMapping[str, Any]) -> TaskInfo: - 'dictionary': List of custom dictionary words - 'separatorTokens': List of separator tokens - 'nonSeparatorTokens': List of non-separator tokens - - 'embedders': Dictionary of embedder configurations + - 'embedders': Dictionary of embedder configurations (supports indexingFragments + and searchFragments for REST embedders to enable multimodal search) - 'searchCutoffMs': Maximum search time in milliseconds - 'proximityPrecision': Precision for proximity ranking - 'localizedAttributes': Settings for localized attributes From 438b9574f2581d8935d2d7920a679a4ad27bd90f Mon Sep 17 00:00:00 2001 From: awais qureshi Date: Mon, 17 Nov 2025 13:10:51 +0500 Subject: [PATCH 14/16] feat: Adding support Multimodal embedders. --- meilisearch/index.py | 120 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 107 insertions(+), 13 deletions(-) diff --git a/meilisearch/index.py b/meilisearch/index.py index 60e9f13d..68100fd7 100644 --- a/meilisearch/index.py +++ b/meilisearch/index.py @@ -278,6 +278,9 @@ def get_stats(self) -> IndexStats: stats = self.http.get(f"{self.config.paths.index}/{self.uid}/{self.config.paths.stat}") return IndexStats(**stats) + from typing import Optional, Mapping, Dict, Any, List, Union + from warnings import warn + @version_error_hint_message def search( self, query: Optional[str] = "", opt_params: Optional[Mapping[str, Any]] = None @@ -297,7 +300,7 @@ def search( Common parameters include: - media: Dict with fragment types (e.g., {"text": "query", "image": "url"}) for multimodal search - hybrid: Dict with 'semanticRatio' and 'embedder' fields for hybrid search - - vector: Array of numbers for vector search + - vector: List/array of numbers for vector search - retrieveVectors: Boolean to include vector data in search results - filter: Filter queries by an attribute's value - limit: Maximum number of documents returned @@ -311,7 +314,7 @@ def search( Raises ------ ValueError - If neither query nor media nor vector parameter is provided, or if media is not a dictionary. + If neither query nor media nor vector parameter is provided, or if parameters are invalid. MeilisearchApiError An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: @@ -350,29 +353,36 @@ def search( - When both query and media.text are provided, they are combined in the search. - The semanticRatio in hybrid search controls the balance between keyword and semantic search (0.0 = pure keyword, 1.0 = pure semantic). + - Vector dimensions must match the embedder's dimensions (e.g., 3072 for text-embedding-3-large). """ if opt_params is None: opt_params = {} + # Extract special parameters media = opt_params.get("media") vector = opt_params.get("vector") + hybrid = opt_params.get("hybrid") + + # ========================================== + # VALIDATION SECTION + # ========================================== - # Validate that at least one search input is provided + # 1. Validate that at least one search input is provided if not query and not media and not vector: raise ValueError( "You must provide at least one search input: " "a query string, media fragments, or a vector." ) - # Validate media parameter type - if media is not None and not isinstance(media, dict): - raise ValueError( - "The 'media' parameter must be a dictionary with fragment types as keys " - "(e.g., {'text': 'query', 'image': 'url'})." - ) + # 2. Validate media parameter + if media is not None: + if not isinstance(media, dict): + raise ValueError( + "The 'media' parameter must be a dictionary with fragment types as keys " + "(e.g., {'text': 'query', 'image': 'url'})." + ) - # Validate media parameter structure - if media: + # Check for valid fragment types valid_fragment_types = {"text", "image", "audio", "video"} invalid_keys = set(media.keys()) - valid_fragment_types if invalid_keys: @@ -381,7 +391,67 @@ def search( f"Valid types are: {valid_fragment_types}" ) - # Provide informative warnings for empty query scenarios + # Validate fragment values are non-empty strings + for frag_type, frag_value in media.items(): + if not isinstance(frag_value, str): + raise ValueError( + f"Media fragment '{frag_type}' must be a string, got {type(frag_value).__name__}" + ) + if not frag_value.strip(): + raise ValueError( + f"Media fragment '{frag_type}' cannot be empty" + ) + + # 3. Validate vector parameter + if vector is not None: + if not isinstance(vector, (list, tuple)): + raise ValueError( + f"The 'vector' parameter must be a list or tuple of numbers, " + f"got {type(vector).__name__}" + ) + + if len(vector) == 0: + raise ValueError("The 'vector' parameter cannot be empty") + + # Validate all elements are numbers + for i, val in enumerate(vector): + if not isinstance(val, (int, float)): + raise ValueError( + f"Vector element at index {i} must be a number, " + f"got {type(val).__name__}: {val}" + ) + + # Optional: warn about common dimension mismatches + common_dimensions = {384, 768, 1024, 1536, 3072} + if len(vector) not in common_dimensions: + warn( + f"Vector has {len(vector)} dimensions. Common embedding dimensions are " + f"{sorted(common_dimensions)}. Ensure this matches your embedder configuration." + ) + + # 4. Validate hybrid parameter structure + if hybrid is not None: + if not isinstance(hybrid, dict): + raise ValueError( + f"The 'hybrid' parameter must be a dictionary, got {type(hybrid).__name__}" + ) + + # Validate semanticRatio if present + if "semanticRatio" in hybrid: + ratio = hybrid["semanticRatio"] + if not isinstance(ratio, (int, float)): + raise ValueError( + f"hybrid.semanticRatio must be a number, got {type(ratio).__name__}" + ) + if not 0.0 <= ratio <= 1.0: + raise ValueError( + f"hybrid.semanticRatio must be between 0.0 and 1.0, got {ratio}" + ) + + # ========================================== + # WARNINGS FOR INFORMATIONAL PURPOSES + # ========================================== + if not query: if media and vector: warn( @@ -397,7 +467,31 @@ def search( "Query string is empty — using vector for semantic search." ) - body = {"q": query, **opt_params} + # ========================================== + # BUILD REQUEST BODY EXPLICITLY + # ========================================== + + # Start with query + body: Dict[str, Any] = {"q": query if query else ""} + + # Add validated special parameters + if media is not None: + body["media"] = media + if vector is not None: + body["vector"] = vector + if hybrid is not None: + body["hybrid"] = hybrid + + # Add other optional parameters, excluding already-handled ones + protected_keys = {"q", "media", "vector", "hybrid"} + for key, value in opt_params.items(): + if key not in protected_keys: + if key in body: + warn( + f"Parameter '{key}' specified in opt_params may be overridden. " + f"Consider using the dedicated parameter instead." + ) + body[key] = value return self.http.post( f"{self.config.paths.index}/{self.uid}/{self.config.paths.search}", From c664c3869a86665f9114ade5b3eb4c6c61ae2f8c Mon Sep 17 00:00:00 2001 From: awais qureshi Date: Mon, 17 Nov 2025 16:37:06 +0500 Subject: [PATCH 15/16] feat: Adding support Multimodal embedders. --- meilisearch/index.py | 195 ++----------------------------------------- 1 file changed, 7 insertions(+), 188 deletions(-) diff --git a/meilisearch/index.py b/meilisearch/index.py index 68100fd7..0207f1d5 100644 --- a/meilisearch/index.py +++ b/meilisearch/index.py @@ -278,29 +278,21 @@ def get_stats(self) -> IndexStats: stats = self.http.get(f"{self.config.paths.index}/{self.uid}/{self.config.paths.stat}") return IndexStats(**stats) - from typing import Optional, Mapping, Dict, Any, List, Union - from warnings import warn - @version_error_hint_message - def search( - self, query: Optional[str] = "", opt_params: Optional[Mapping[str, Any]] = None - ) -> Dict[str, Any]: + def search(self, query: str, opt_params: Optional[Mapping[str, Any]] = None) -> Dict[str, Any]: """Search in the index. - Supports traditional text search, vector search, and multimodal search using media fragments. - https://www.meilisearch.com/docs/reference/api/search Parameters ---------- - query (optional): - String containing the searched word(s). Can be empty or None when using media or vector parameter. + query: + String containing the searched word(s) opt_params (optional): Dictionary containing optional query parameters. Common parameters include: - - media: Dict with fragment types (e.g., {"text": "query", "image": "url"}) for multimodal search - hybrid: Dict with 'semanticRatio' and 'embedder' fields for hybrid search - - vector: List/array of numbers for vector search + - vector: Array of numbers for vector search - retrieveVectors: Boolean to include vector data in search results - filter: Filter queries by an attribute's value - limit: Maximum number of documents returned @@ -313,185 +305,13 @@ def search( Raises ------ - ValueError - If neither query nor media nor vector parameter is provided, or if parameters are invalid. MeilisearchApiError - An error containing details about why Meilisearch can't process your request. - Meilisearch error codes are described here: - https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors - - Examples - -------- - Traditional text search: - >>> index.search("space exploration") - - Multimodal search with media fragments: - >>> index.search("", opt_params={ - ... "media": {"text": "space exploration"}, - ... "hybrid": {"embedder": "default"} - ... }) - - Multimodal search with both text and image: - >>> index.search("", opt_params={ - ... "media": {"text": "space exploration", "image": "https://example.com/poster.jpg"}, - ... "hybrid": {"embedder": "default", "semanticRatio": 0.8} - ... }) - - Hybrid search combining text and semantic: - >>> index.search("science fiction", opt_params={ - ... "hybrid": {"semanticRatio": 0.5, "embedder": "default"} - ... }) - - Vector search: - >>> index.search("", opt_params={ - ... "vector": [0.1, 0.2, 0.3, ...], - ... "hybrid": {"embedder": "default"} - ... }) - - Notes - ----- - - When both query and media.text are provided, they are combined in the search. - - The semanticRatio in hybrid search controls the balance between keyword and semantic search - (0.0 = pure keyword, 1.0 = pure semantic). - - Vector dimensions must match the embedder's dimensions (e.g., 3072 for text-embedding-3-large). + An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors """ if opt_params is None: opt_params = {} - # Extract special parameters - media = opt_params.get("media") - vector = opt_params.get("vector") - hybrid = opt_params.get("hybrid") - - # ========================================== - # VALIDATION SECTION - # ========================================== - - # 1. Validate that at least one search input is provided - if not query and not media and not vector: - raise ValueError( - "You must provide at least one search input: " - "a query string, media fragments, or a vector." - ) - - # 2. Validate media parameter - if media is not None: - if not isinstance(media, dict): - raise ValueError( - "The 'media' parameter must be a dictionary with fragment types as keys " - "(e.g., {'text': 'query', 'image': 'url'})." - ) - - # Check for valid fragment types - valid_fragment_types = {"text", "image", "audio", "video"} - invalid_keys = set(media.keys()) - valid_fragment_types - if invalid_keys: - warn( - f"Unknown media fragment types: {invalid_keys}. " - f"Valid types are: {valid_fragment_types}" - ) - - # Validate fragment values are non-empty strings - for frag_type, frag_value in media.items(): - if not isinstance(frag_value, str): - raise ValueError( - f"Media fragment '{frag_type}' must be a string, got {type(frag_value).__name__}" - ) - if not frag_value.strip(): - raise ValueError( - f"Media fragment '{frag_type}' cannot be empty" - ) - - # 3. Validate vector parameter - if vector is not None: - if not isinstance(vector, (list, tuple)): - raise ValueError( - f"The 'vector' parameter must be a list or tuple of numbers, " - f"got {type(vector).__name__}" - ) - - if len(vector) == 0: - raise ValueError("The 'vector' parameter cannot be empty") - - # Validate all elements are numbers - for i, val in enumerate(vector): - if not isinstance(val, (int, float)): - raise ValueError( - f"Vector element at index {i} must be a number, " - f"got {type(val).__name__}: {val}" - ) - - # Optional: warn about common dimension mismatches - common_dimensions = {384, 768, 1024, 1536, 3072} - if len(vector) not in common_dimensions: - warn( - f"Vector has {len(vector)} dimensions. Common embedding dimensions are " - f"{sorted(common_dimensions)}. Ensure this matches your embedder configuration." - ) - - # 4. Validate hybrid parameter structure - if hybrid is not None: - if not isinstance(hybrid, dict): - raise ValueError( - f"The 'hybrid' parameter must be a dictionary, got {type(hybrid).__name__}" - ) - - # Validate semanticRatio if present - if "semanticRatio" in hybrid: - ratio = hybrid["semanticRatio"] - if not isinstance(ratio, (int, float)): - raise ValueError( - f"hybrid.semanticRatio must be a number, got {type(ratio).__name__}" - ) - if not 0.0 <= ratio <= 1.0: - raise ValueError( - f"hybrid.semanticRatio must be between 0.0 and 1.0, got {ratio}" - ) - - # ========================================== - # WARNINGS FOR INFORMATIONAL PURPOSES - # ========================================== - - if not query: - if media and vector: - warn( - "Query string is empty — using both media fragments and vector for search." - ) - elif media: - media_types = ", ".join(media.keys()) - warn( - f"Query string is empty — using media fragments ({media_types}) for multimodal search." - ) - elif vector: - warn( - "Query string is empty — using vector for semantic search." - ) - - # ========================================== - # BUILD REQUEST BODY EXPLICITLY - # ========================================== - - # Start with query - body: Dict[str, Any] = {"q": query if query else ""} - - # Add validated special parameters - if media is not None: - body["media"] = media - if vector is not None: - body["vector"] = vector - if hybrid is not None: - body["hybrid"] = hybrid - - # Add other optional parameters, excluding already-handled ones - protected_keys = {"q", "media", "vector", "hybrid"} - for key, value in opt_params.items(): - if key not in protected_keys: - if key in body: - warn( - f"Parameter '{key}' specified in opt_params may be overridden. " - f"Consider using the dedicated parameter instead." - ) - body[key] = value + body = {"q": query, **opt_params} return self.http.post( f"{self.config.paths.index}/{self.uid}/{self.config.paths.search}", @@ -1192,8 +1012,7 @@ def update_settings(self, body: MutableMapping[str, Any]) -> TaskInfo: - 'dictionary': List of custom dictionary words - 'separatorTokens': List of separator tokens - 'nonSeparatorTokens': List of non-separator tokens - - 'embedders': Dictionary of embedder configurations (supports indexingFragments - and searchFragments for REST embedders to enable multimodal search) + - 'embedders': Dictionary of embedder configurations for AI-powered search - 'searchCutoffMs': Maximum search time in milliseconds - 'proximityPrecision': Precision for proximity ranking - 'localizedAttributes': Settings for localized attributes From 747e28f75df7830a42086f8e89cd29cc82dd527d Mon Sep 17 00:00:00 2001 From: awais qureshi Date: Mon, 24 Nov 2025 17:08:36 +0500 Subject: [PATCH 16/16] feat: Adding support Multimodal embedders. --- meilisearch/client.py | 71 +--- .../test_client_experimental_features.py | 37 +-- tests/conftest.py | 53 ++- tests/index/test_index_search_media.py | 3 +- tests/settings/test_settings_fragments.py | 303 ++++++------------ 5 files changed, 158 insertions(+), 309 deletions(-) diff --git a/meilisearch/client.py b/meilisearch/client.py index 96980e8e..e2c0b608 100644 --- a/meilisearch/client.py +++ b/meilisearch/client.py @@ -984,70 +984,33 @@ def update_chat_workspace_settings( return self.http.patch(f"chats/{workspace_uid}/settings", body=settings) - def get_experimental_features(self) -> Dict[str, bool]: - """Get the current experimental features settings. - - Returns - ------- - experimental_features: - Dictionary containing the experimental features and their enabled/disabled status. - - Raises - ------ - MeilisearchApiError - An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors + def get_experimental_features(self) -> dict: """ - return self.http.get(self.config.paths.experimental_features) + Retrieve the current settings for all experimental features. - def update_experimental_features(self, features: Mapping[str, bool]) -> Dict[str, bool]: - """Update experimental features settings. + Returns: + dict: A mapping of feature names to their enabled/disabled state. - Parameters - ---------- - features: - Dictionary containing the experimental features to enable/disable. - - Returns - ------- - experimental_features: - Dictionary containing the updated experimental features settings. - - Raises - ------ - MeilisearchApiError - An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors + Example: + >>> client.get_experimental_features() """ - return self.http.patch(self.config.paths.experimental_features, body=features) - - def enable_multimodal(self) -> Dict[str, bool]: - """Enable multimodal experimental feature. - - Returns - ------- - experimental_features: - Dictionary containing the updated experimental features settings. + return self.http.get(self.config.paths.experimental_features) - Raises - ------ - MeilisearchApiError - An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors + def update_experimental_features(self, features: dict) -> dict: """ - return self.update_experimental_features({"multimodal": True}) + Update one or more experimental features. - def disable_multimodal(self) -> Dict[str, bool]: - """Disable multimodal experimental feature. + Args: + features (dict): A dictionary mapping feature names to booleans. + For example, {"multimodal": True} to enable multimodal. - Returns - ------- - experimental_features: - Dictionary containing the updated experimental features settings. + Returns: + dict: The updated experimental features settings. - Raises - ------ - MeilisearchApiError - An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors + Example: + >>> client.update_experimental_features({"multimodal": True}) """ - return self.update_experimental_features({"multimodal": False}) + return self.http.patch(self.config.paths.experimental_features, body=features) @staticmethod def _base64url_encode(data: bytes) -> str: diff --git a/tests/client/test_client_experimental_features.py b/tests/client/test_client_experimental_features.py index b54765c1..2c22075e 100644 --- a/tests/client/test_client_experimental_features.py +++ b/tests/client/test_client_experimental_features.py @@ -1,8 +1,7 @@ """Tests for client experimental features methods.""" - def test_get_experimental_features(client): - """Test getting experimental features returns dict with multimodal feature.""" + """Test getting experimental features returns a dict including 'multimodal'.""" response = client.get_experimental_features() assert isinstance(response, dict) @@ -24,34 +23,18 @@ def test_update_experimental_features(client): assert response.get("multimodal") == new_value assert client.get_experimental_features().get("multimodal") == new_value - # Reset + # Reset to original value client.update_experimental_features({"multimodal": initial_multimodal}) -def test_enable_disable_multimodal(client): - """Test enable and disable multimodal convenience methods.""" - # Test enable - response = client.enable_multimodal() +def test_multimodal_idempotency_generic(client): + """Test that updating multimodal via generic method is idempotent.""" + # Enable twice + client.update_experimental_features({"multimodal": True}) + response = client.update_experimental_features({"multimodal": True}) assert response.get("multimodal") is True - assert client.get_experimental_features()["multimodal"] is True - # Test disable - response = client.disable_multimodal() + # Disable twice + client.update_experimental_features({"multimodal": False}) + response = client.update_experimental_features({"multimodal": False}) assert response.get("multimodal") is False - assert client.get_experimental_features()["multimodal"] is False - - -def test_multimodal_idempotency(client): - """Test that enable/disable operations are idempotent.""" - # Enable twice - should not error - client.enable_multimodal() - response = client.enable_multimodal() - assert response.get("multimodal") is True - - # Disable twice - should not error - client.disable_multimodal() - response = client.disable_multimodal() - assert response.get("multimodal") is False - - - diff --git a/tests/conftest.py b/tests/conftest.py index 2aee481f..5646cfd6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -188,13 +188,14 @@ def log_message(self, format, *args): @fixture(scope="function") -def index_with_rest_embedder(empty_index, small_movies, mock_embedder_server, enable_multimodal): +def index_with_rest_embedder(empty_index, small_movies, mock_embedder_server, experimental_features): """Fixture for index with REST embedder configured for media search testing. Uses a mock HTTP server to act as the embedder, allowing real search_with_media() testing without external AI services. """ def index_maker(index_uid=common.INDEX_UID, documents=small_movies): + experimental_features({"multimodal": True}) index = empty_index(index_uid) # Configure REST embedder pointing to mock server settings_update_task = index.update_embedders( @@ -383,17 +384,39 @@ def enable_network_options(): @fixture -def enable_multimodal(): - requests.patch( - f"{common.BASE_URL}/experimental-features", - headers={"Authorization": f"Bearer {common.MASTER_KEY}"}, - json={"multimodal": True}, - timeout=10, - ) - yield - requests.patch( - f"{common.BASE_URL}/experimental-features", - headers={"Authorization": f"Bearer {common.MASTER_KEY}"}, - json={"multimodal": False}, - timeout=10, - ) +def experimental_features(): + """ + Fixture to temporarily set experimental features for a test. + + Usage: + def test_example(experimental_features): + experimental_features({"multimodal": True, "new_ui": True}) + """ + def _set_features(features: dict): + # Enable features + requests.patch( + f"{common.BASE_URL}/experimental-features", + headers={"Authorization": f"Bearer {common.MASTER_KEY}"}, + json=features, + timeout=10, + ) + # Return features so we can reset later + return features + + yield _set_features + + # Reset features after the test + def _reset(features: dict): + # Create a reset payload inside the function + reset_payload = {key: False for key in features.keys()} + requests.patch( + f"{common.BASE_URL}/experimental-features", + headers={"Authorization": f"Bearer {common.MASTER_KEY}"}, + json=reset_payload, + timeout=10, + ) + +@fixture +def multimodal_enabled(experimental_features): + """Convenience fixture: enables multimodal experimental feature.""" + experimental_features({"multimodal": True}) diff --git a/tests/index/test_index_search_media.py b/tests/index/test_index_search_media.py index 3a7bd842..8d0e0e3b 100644 --- a/tests/index/test_index_search_media.py +++ b/tests/index/test_index_search_media.py @@ -5,6 +5,7 @@ """ import pytest +from meilisearch.index import Index # Minimal sanity test: ensure the method exists on Index class without network calls. @@ -14,8 +15,6 @@ def test_search_with_media_method_exists(): This avoids creating an actual index (which would call the server) so the test is safe to run in isolation. """ - from meilisearch.index import Index - assert hasattr(Index, "search_with_media") assert callable(getattr(Index, "search_with_media")) diff --git a/tests/settings/test_settings_fragments.py b/tests/settings/test_settings_fragments.py index e4757c58..caa9cae4 100644 --- a/tests/settings/test_settings_fragments.py +++ b/tests/settings/test_settings_fragments.py @@ -1,312 +1,193 @@ # pylint: disable=redefined-outer-name """Tests for indexingFragments and searchFragments in embedders (multimodal feature). -IMPORTANT: These tests validate CONFIGURATION ONLY, not AI functionality. -- They test that fragments can be configured and stored in Meilisearch -- They do NOT test actual AI embedding calls (no real AI service needed) -- They do NOT add documents (which would trigger AI calls) -- They do NOT perform searches (which would trigger AI calls) - -The AI URLs in these tests (e.g., "http://localhost:8000/embed") are just -configuration strings - they are never actually called during tests. - -Think of it like writing a recipe (configuration) vs. cooking the meal (using AI). -These tests only validate the recipe is written correctly, not that the meal tastes good. +These tests validate CONFIGURATION ONLY, not AI functionality. +They only ensure fragments can be configured and stored in Meilisearch. +No AI calls or document indexing/searching occurs. """ import pytest +DUMMY_URL = "http://localhost:8000/embed" +TEST_MODEL = "test-model" +MULTIMODAL_MODEL = "multimodal" + + +def apply_embedders(index, config): + """Helper to update embedders and wait for task completion.""" + response = index.update_embedders(config) + update = index.wait_for_task(response.task_uid) + assert update.status == "succeeded" + return index.get_embedders() + -@pytest.mark.usefixtures("enable_multimodal") -def test_rest_embedder_with_fragments(empty_index): +def test_rest_embedder_with_fragments(empty_index, multimodal_enabled): """Tests that REST embedder can be configured with indexingFragments and searchFragments.""" index = empty_index() - # This is just a Python dictionary - no AI is involved yet - rest_embedder_with_fragments = { + config = { "rest_fragments": { "source": "rest", - "url": "http://localhost:8000/embed", # ← Just a config string, NOT called in this test - "apiKey": "test-key", # ← Fake key, safe to commit + "url": DUMMY_URL, + "apiKey": "test-key", "dimensions": 512, - "indexingFragments": { - "text": { - "value": "{{doc.title}} - {{doc.description}}" - } - }, - "searchFragments": { - "text": { - "value": "{{fragment}}" - } - }, - "request": {"input": ["{{fragment}}"], "model": "test-model"}, + "indexingFragments": {"text": {"value": "{{doc.title}} - {{doc.description}}"}}, + "searchFragments": {"text": {"value": "{{fragment}}"}}, + "request": {"input": ["{{fragment}}"], "model": TEST_MODEL}, "response": {"data": [{"embedding": "{{embedding}}"}]}, "headers": {"Authorization": "Bearer test-key"}, } } - # Send configuration to Meilisearch - just stores the config, doesn't use it - # NO AI call happens here - Meilisearch only validates and stores the JSON - response = index.update_embedders(rest_embedder_with_fragments) - update = index.wait_for_task(response.task_uid) - assert update.status == "succeeded" + embedders = apply_embedders(index, config) - # Retrieve configuration from Meilisearch - # NO AI call happens here - just reading back what we stored - embedders = index.get_embedders() + e = embedders.embedders["rest_fragments"] + assert e.source == "rest" + assert e.url == DUMMY_URL + assert e.dimensions == 512 + assert e.indexing_fragments is not None + assert e.search_fragments is not None - # Verify the configuration was stored correctly - # These are just Python object attribute checks - no AI involved - assert embedders.embedders["rest_fragments"].source == "rest" - assert embedders.embedders["rest_fragments"].url == "http://localhost:8000/embed" - assert embedders.embedders["rest_fragments"].dimensions == 512 - # Verify fragments are configured - assert hasattr(embedders.embedders["rest_fragments"], "indexing_fragments") - assert hasattr(embedders.embedders["rest_fragments"], "search_fragments") - assert embedders.embedders["rest_fragments"].indexing_fragments is not None - assert embedders.embedders["rest_fragments"].search_fragments is not None - - # NOTE: AI would only be called if we did: - # - index.add_documents([...]) ← This would trigger AI embedding - # - index.search(...) ← This would trigger AI search - # But we don't do that in configuration tests! - - -@pytest.mark.usefixtures("enable_multimodal") -def test_rest_embedder_with_multiple_fragments(empty_index): - """Tests that REST embedder can be configured with multiple fragment types.""" +def test_rest_embedder_with_multiple_fragments(empty_index, multimodal_enabled): + """Tests REST embedder with multiple fragment types.""" index = empty_index() - rest_embedder_multi_fragments = { + config = { "multi_fragments": { "source": "rest", - "url": "http://localhost:8000/embed", + "url": DUMMY_URL, "dimensions": 1024, "indexingFragments": { - "text": { - "value": "{{doc.title}}" - }, - "description": { - "value": "{{doc.overview}}" - } + "text": {"value": "{{doc.title}}"}, + "description": {"value": "{{doc.overview}}"} }, "searchFragments": { - "text": { - "value": "{{fragment}}" - }, - "description": { - "value": "{{fragment}}" - } + "text": {"value": "{{fragment}}"}, + "description": {"value": "{{fragment}}"} }, - "request": {"input": ["{{fragment}}"], "model": "test-model"}, + "request": {"input": ["{{fragment}}"], "model": TEST_MODEL}, "response": {"data": [{"embedding": "{{embedding}}"}]}, } } - response = index.update_embedders(rest_embedder_multi_fragments) - update = index.wait_for_task(response.task_uid) - assert update.status == "succeeded" - - embedders = index.get_embedders() - assert embedders.embedders["multi_fragments"].source == "rest" + embedders = apply_embedders(index, config) - # Verify multiple fragments are configured - indexing_frags = embedders.embedders["multi_fragments"].indexing_fragments - search_frags = embedders.embedders["multi_fragments"].search_fragments + e = embedders.embedders["multi_fragments"] + assert e.source == "rest" + assert len(e.indexing_fragments) >= 1 + assert len(e.search_fragments) >= 1 - assert indexing_frags is not None - assert search_frags is not None - # The exact structure depends on the Pydantic model implementation - assert len(indexing_frags) >= 1 - assert len(search_frags) >= 1 - -@pytest.mark.usefixtures("enable_multimodal") -def test_fragments_without_document_template(empty_index): - """Tests that fragments can be used without documentTemplate (they are mutually exclusive).""" +def test_fragments_without_document_template(empty_index, multimodal_enabled): + """Tests fragments can be used without documentTemplate.""" index = empty_index() - embedder_config = { + config = { "fragments_only": { "source": "rest", - "url": "http://localhost:8000/embed", + "url": DUMMY_URL, "dimensions": 512, - # No documentTemplate - only fragments - "indexingFragments": { - "text": { - "value": "{{doc.content}}" - } - }, - "searchFragments": { - "text": { - "value": "{{fragment}}" - } - }, - "request": {"input": ["{{fragment}}"], "model": "test-model"}, + "indexingFragments": {"text": {"value": "{{doc.content}}"}}, + "searchFragments": {"text": {"value": "{{fragment}}"}}, + "request": {"input": ["{{fragment}}"], "model": TEST_MODEL}, "response": {"data": [{"embedding": "{{embedding}}"}]}, } } - response = index.update_embedders(embedder_config) - update = index.wait_for_task(response.task_uid) - assert update.status == "succeeded" - - embedders = index.get_embedders() - # Should not have documentTemplate when using fragments - assert embedders.embedders["fragments_only"].document_template is None - assert embedders.embedders["fragments_only"].indexing_fragments is not None - assert embedders.embedders["fragments_only"].search_fragments is not None + embedders = apply_embedders(index, config) + e = embedders.embedders["fragments_only"] + assert e.document_template is None + assert e.indexing_fragments is not None + assert e.search_fragments is not None def test_fragments_require_multimodal_feature(empty_index): - """Tests that fragments configuration requires multimodal feature to be enabled.""" - # This test runs WITHOUT the enable_multimodal fixture + """Tests fragments require multimodal feature enabled.""" index = empty_index() - embedder_with_fragments = { + config = { "test": { "source": "rest", - "url": "http://localhost:8000/embed", + "url": DUMMY_URL, "dimensions": 512, - "indexingFragments": { - "text": {"value": "{{doc.title}}"} - }, - "searchFragments": { - "text": {"value": "{{fragment}}"} - }, - "request": {"input": ["{{fragment}}"], "model": "test"}, + "indexingFragments": {"text": {"value": "{{doc.title}}"}}, + "searchFragments": {"text": {"value": "{{fragment}}"}}, + "request": {"input": ["{{fragment}}"], "model": TEST_MODEL}, "response": {"data": [{"embedding": "{{embedding}}"}]}, } } - # This might fail or succeed depending on whether multimodal is required - # The behavior depends on the Meilisearch server version + # May succeed or fail depending on server config; both are acceptable try: - response = index.update_embedders(embedder_with_fragments) - task = index.wait_for_task(response.task_uid) - # If it succeeds, fragments should still be configured - if task.status == "succeeded": - embedders = index.get_embedders() - assert embedders.embedders["test"].indexing_fragments is not None + embedders = apply_embedders(index, config) + assert embedders.embedders["test"].indexing_fragments is not None except Exception: - # If it fails, that's also acceptable as the feature might require enabling pass -@pytest.mark.usefixtures("enable_multimodal") -def test_update_fragments_separately(empty_index): +def test_update_fragments_separately(empty_index, multimodal_enabled): """Tests updating indexingFragments and searchFragments separately.""" index = empty_index() - # First, configure with basic fragments initial_config = { "updatable": { "source": "rest", - "url": "http://localhost:8000/embed", + "url": DUMMY_URL, "dimensions": 512, - "indexingFragments": { - "text": {"value": "{{doc.title}}"} - }, - "searchFragments": { - "text": {"value": "{{fragment}}"} - }, - "request": {"input": ["{{fragment}}"], "model": "test"}, + "indexingFragments": {"text": {"value": "{{doc.title}}"}}, + "searchFragments": {"text": {"value": "{{fragment}}"}}, + "request": {"input": ["{{fragment}}"], "model": TEST_MODEL}, "response": {"data": [{"embedding": "{{embedding}}"}]}, } } - response = index.update_embedders(initial_config) - index.wait_for_task(response.task_uid) + apply_embedders(index, initial_config) - # Then update with different fragment configuration updated_config = { "updatable": { "source": "rest", - "url": "http://localhost:8000/embed", + "url": DUMMY_URL, "dimensions": 512, - "indexingFragments": { - "text": {"value": "{{doc.title}} - {{doc.description}}"} - }, - "searchFragments": { - "text": {"value": "{{fragment}}"} - }, - "request": {"input": ["{{fragment}}"], "model": "test"}, + "indexingFragments": {"text": {"value": "{{doc.title}} - {{doc.description}}"}}, + "searchFragments": {"text": {"value": "{{fragment}}"}}, + "request": {"input": ["{{fragment}}"], "model": TEST_MODEL}, "response": {"data": [{"embedding": "{{embedding}}"}]}, } } - response = index.update_embedders(updated_config) - update = index.wait_for_task(response.task_uid) - assert update.status == "succeeded" - - embedders = index.get_embedders() + embedders = apply_embedders(index, updated_config) assert embedders.embedders["updatable"].indexing_fragments is not None -@pytest.mark.usefixtures("enable_multimodal") -def test_profile_picture_and_title_fragments(empty_index): - """Tests real-world use case: indexing user profiles with picture and title. - - Example document structure: - { - "id": 1, - "name": "John Doe", - "profile_picture_url": "https://example.com/john.jpg", - "bio": "Software Engineer" - } - """ +def test_profile_picture_and_title_fragments(empty_index, multimodal_enabled): + """Tests real-world use case: user profiles with picture and title.""" index = empty_index() - # Configure embedder for user profiles with custom fragment type names - profile_embedder = { + config = { "user_profile": { "source": "rest", - "url": "http://localhost:8000/embed", + "url": DUMMY_URL, "dimensions": 768, - # YOU choose these fragment type names based on your needs "indexingFragments": { - "user_name": { # Fragment type for user's name - "value": "{{doc.name}}" # Extracts 'name' field from document - }, - "avatar": { # Fragment type for profile picture - "value": "{{doc.profile_picture_url}}" # Extracts URL - }, - "biography": { # Fragment type for user bio - "value": "{{doc.bio}}" # Extracts bio text - } + "user_name": {"value": "{{doc.name}}"}, + "avatar": {"value": "{{doc.profile_picture_url}}"}, + "biography": {"value": "{{doc.bio}}"}, }, - # Search fragments define how queries are matched "searchFragments": { - "user_name": { # Match against name fragments - "value": "{{fragment}}" - }, - "avatar": { # Match against image fragments - "value": "{{fragment}}" - }, - "biography": { # Match against bio fragments - "value": "{{fragment}}" - } + "user_name": {"value": "{{fragment}}"}, + "avatar": {"value": "{{fragment}}"}, + "biography": {"value": "{{fragment}}"}, }, - "request": {"input": ["{{fragment}}"], "model": "multimodal-model"}, + "request": {"input": ["{{fragment}}"], "model": MULTIMODAL_MODEL}, "response": {"data": [{"embedding": "{{embedding}}"}]}, } } - response = index.update_embedders(profile_embedder) - update = index.wait_for_task(response.task_uid) - assert update.status == "succeeded" - - embedders = index.get_embedders() - assert embedders.embedders["user_profile"].source == "rest" - - # Verify all three fragment types are configured - indexing_frags = embedders.embedders["user_profile"].indexing_fragments - search_frags = embedders.embedders["user_profile"].search_fragments - - assert indexing_frags is not None - assert search_frags is not None - # Should have 3 fragment types: user_name, avatar, biography - assert len(indexing_frags) >= 3 - assert len(search_frags) >= 3 + embedders = apply_embedders(index, config) + e = embedders.embedders["user_profile"] + assert e.source == "rest" + expected_keys = {"user_name", "avatar", "biography"} + assert set(e.indexing_fragments.keys()) == expected_keys + assert set(e.search_fragments.keys()) == expected_keys