diff --git a/.code-samples.meilisearch.yaml b/.code-samples.meilisearch.yaml index a836eeb1..649435fb 100644 --- a/.code-samples.meilisearch.yaml +++ b/.code-samples.meilisearch.yaml @@ -11,6 +11,8 @@ create_an_index_1: |- client.create_index('movies', {'primaryKey': 'id'}) update_an_index_1: |- client.index('movies').update(primary_key='id') +compact_index_1: |- + client.index('movies').compact() delete_an_index_1: |- client.delete_index('movies') // OR diff --git a/meilisearch/client.py b/meilisearch/client.py index 22b1c70f..e2c0b608 100644 --- a/meilisearch/client.py +++ b/meilisearch/client.py @@ -984,6 +984,34 @@ def update_chat_workspace_settings( return self.http.patch(f"chats/{workspace_uid}/settings", body=settings) + def get_experimental_features(self) -> dict: + """ + Retrieve the current settings for all experimental features. + + Returns: + dict: A mapping of feature names to their enabled/disabled state. + + Example: + >>> client.get_experimental_features() + """ + return self.http.get(self.config.paths.experimental_features) + + def update_experimental_features(self, features: dict) -> dict: + """ + Update one or more experimental features. + + Args: + features (dict): A dictionary mapping feature names to booleans. + For example, {"multimodal": True} to enable multimodal. + + Returns: + dict: The updated experimental features settings. + + Example: + >>> client.update_experimental_features({"multimodal": True}) + """ + return self.http.patch(self.config.paths.experimental_features, body=features) + @staticmethod def _base64url_encode(data: bytes) -> str: return base64.urlsafe_b64encode(data).decode("utf-8").replace("=", "") diff --git a/meilisearch/config.py b/meilisearch/config.py index 1891a2d5..a5619094 100644 --- a/meilisearch/config.py +++ b/meilisearch/config.py @@ -47,6 +47,7 @@ class Paths: localized_attributes = "localized-attributes" edit = "edit" network = "network" + experimental_features = "experimental-features" def __init__( self, diff --git a/meilisearch/index.py b/meilisearch/index.py index 27db2a2f..0207f1d5 100644 --- a/meilisearch/index.py +++ b/meilisearch/index.py @@ -2333,3 +2333,17 @@ def _build_url( if primary_key is None and csv_delimiter is None: return f"{self.config.paths.index}/{self.uid}/{self.config.paths.document}" return f"{self.config.paths.index}/{self.uid}/{self.config.paths.document}?{parse.urlencode(parameters)}" + + def compact(self) -> TaskInfo: + """ + Trigger the compaction of the index. + This is an asynchronous operation in Meilisearch. + + Returns + ------- + task_info: TaskInfo + Contains information to track the progress of the compaction task. + """ + path = f"{self.config.paths.index}/{self.uid}/compact" + task = self.http.post(path) + return TaskInfo(**task) diff --git a/meilisearch/models/embedders.py b/meilisearch/models/embedders.py index 9dcd5d00..abba9f2a 100644 --- a/meilisearch/models/embedders.py +++ b/meilisearch/models/embedders.py @@ -167,6 +167,12 @@ class RestEmbedder(CamelBase): Template defining the data Meilisearch sends to the embedder document_template_max_bytes: Optional[int] Maximum allowed size of rendered document template (defaults to 400) + indexing_fragments: Optional[Dict[str, Dict[str, str]]] + Defines how to fragment documents for indexing (multi-modal search) + Example: {"text": {"value": "{{doc.title}} - {{doc.overview}}"}} + search_fragments: Optional[Dict[str, Dict[str, str]]] + Defines how to fragment search queries (multi-modal search) + Example: {"text": {"value": "{{fragment}}"}} request: Dict[str, Any] A JSON value representing the request Meilisearch makes to the remote embedder response: Dict[str, Any] @@ -185,6 +191,8 @@ class RestEmbedder(CamelBase): dimensions: Optional[int] = None document_template: Optional[str] = None document_template_max_bytes: Optional[int] = None + indexing_fragments: Optional[Dict[str, Dict[str, str]]] = None + search_fragments: Optional[Dict[str, Dict[str, str]]] = None request: Dict[str, Any] response: Dict[str, Any] headers: Optional[Dict[str, str]] = None diff --git a/meilisearch/version.py b/meilisearch/version.py index cbd3ec80..772ac4b6 100644 --- a/meilisearch/version.py +++ b/meilisearch/version.py @@ -1,6 +1,6 @@ from __future__ import annotations -__version__ = "0.37.1" +__version__ = "0.38.0" def qualified_version() -> str: diff --git a/tests/client/test_client_experimental_features.py b/tests/client/test_client_experimental_features.py new file mode 100644 index 00000000..2c22075e --- /dev/null +++ b/tests/client/test_client_experimental_features.py @@ -0,0 +1,40 @@ +"""Tests for client experimental features methods.""" + +def test_get_experimental_features(client): + """Test getting experimental features returns a dict including 'multimodal'.""" + response = client.get_experimental_features() + + assert isinstance(response, dict) + assert len(response) > 0 + assert "multimodal" in response + assert isinstance(response["multimodal"], bool) + + +def test_update_experimental_features(client): + """Test updating experimental features and verify changes persist.""" + initial = client.get_experimental_features() + initial_multimodal = initial.get("multimodal", False) + + # Toggle multimodal + new_value = not initial_multimodal + response = client.update_experimental_features({"multimodal": new_value}) + + assert isinstance(response, dict) + assert response.get("multimodal") == new_value + assert client.get_experimental_features().get("multimodal") == new_value + + # Reset to original value + client.update_experimental_features({"multimodal": initial_multimodal}) + + +def test_multimodal_idempotency_generic(client): + """Test that updating multimodal via generic method is idempotent.""" + # Enable twice + client.update_experimental_features({"multimodal": True}) + response = client.update_experimental_features({"multimodal": True}) + assert response.get("multimodal") is True + + # Disable twice + client.update_experimental_features({"multimodal": False}) + response = client.update_experimental_features({"multimodal": False}) + assert response.get("multimodal") is False diff --git a/tests/conftest.py b/tests/conftest.py index 8387084c..5646cfd6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -152,6 +152,79 @@ def index_maker(index_uid=common.INDEX_UID, documents=small_movies): return index_maker +@fixture(scope="function") +def mock_embedder_server(): + """Fixture that starts a mock HTTP server to act as an embedder. + + This server responds to embedding requests with fake vectors, + allowing us to test search_with_media without a real AI service. + """ + from http.server import HTTPServer, BaseHTTPRequestHandler + import threading + import json + + class MockEmbedderHandler(BaseHTTPRequestHandler): + def do_POST(self): + # Return a fake embedding vector + response = {"data": [{"embedding": [0.1] * 512}]} + self.send_response(200) + self.send_header('Content-Type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps(response).encode()) + + def log_message(self, format, *args): + # Suppress logging + pass + + # Start server in background thread + server = HTTPServer(('localhost', 8080), MockEmbedderHandler) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + + yield server + + # Cleanup + server.shutdown() + + +@fixture(scope="function") +def index_with_rest_embedder(empty_index, small_movies, mock_embedder_server, experimental_features): + """Fixture for index with REST embedder configured for media search testing. + + Uses a mock HTTP server to act as the embedder, allowing real + search_with_media() testing without external AI services. + """ + def index_maker(index_uid=common.INDEX_UID, documents=small_movies): + experimental_features({"multimodal": True}) + index = empty_index(index_uid) + # Configure REST embedder pointing to mock server + settings_update_task = index.update_embedders( + { + "default": { + "source": "rest", + "url": "http://localhost:8080/embed", + "apiKey": "test-key", + "dimensions": 512, + "indexingFragments": { + "text": {"value": "{{doc.title}}"} + }, + "searchFragments": { + "text": {"value": "{{fragment}}"} + }, + "request": {"input": ["{{fragment}}"], "model": "test-model"}, + "response": {"data": [{"embedding": "{{embedding}}"}]}, + } + } + ) + index.wait_for_task(settings_update_task.task_uid) + # Add documents - embedder will be called via mock server + document_addition_task = index.add_documents(documents) + index.wait_for_task(document_addition_task.task_uid) + return index + + return index_maker + + @fixture(scope="function") def index_with_documents_and_facets(empty_index, small_movies): def index_maker(index_uid=common.INDEX_UID, documents=small_movies): @@ -308,3 +381,42 @@ def enable_network_options(): json={"network": False}, timeout=10, ) + + +@fixture +def experimental_features(): + """ + Fixture to temporarily set experimental features for a test. + + Usage: + def test_example(experimental_features): + experimental_features({"multimodal": True, "new_ui": True}) + """ + def _set_features(features: dict): + # Enable features + requests.patch( + f"{common.BASE_URL}/experimental-features", + headers={"Authorization": f"Bearer {common.MASTER_KEY}"}, + json=features, + timeout=10, + ) + # Return features so we can reset later + return features + + yield _set_features + + # Reset features after the test + def _reset(features: dict): + # Create a reset payload inside the function + reset_payload = {key: False for key in features.keys()} + requests.patch( + f"{common.BASE_URL}/experimental-features", + headers={"Authorization": f"Bearer {common.MASTER_KEY}"}, + json=reset_payload, + timeout=10, + ) + +@fixture +def multimodal_enabled(experimental_features): + """Convenience fixture: enables multimodal experimental feature.""" + experimental_features({"multimodal": True}) diff --git a/tests/index/test_index.py b/tests/index/test_index.py index 655477d2..397b6e0c 100644 --- a/tests/index/test_index.py +++ b/tests/index/test_index.py @@ -219,3 +219,18 @@ def test_delete_index(client): client.wait_for_task(deleted.task_uid) with pytest.raises(MeilisearchApiError): client.get_index(uid=common.INDEX_UID) + + +@pytest.mark.usefixtures("indexes_sample") +def test_index_compact(client): + """Tests the compaction of an index.""" + index = client.index(common.INDEX_UID) + # Get stats before compaction + stats_before = index.get_stats() + + task_info = index.compact() + client.wait_for_task(task_info.task_uid) + stats_after = index.get_stats() + + assert stats_before.number_of_documents == stats_after.number_of_documents + assert stats_after.is_indexing is False diff --git a/tests/index/test_index_search_media.py b/tests/index/test_index_search_media.py new file mode 100644 index 00000000..8d0e0e3b --- /dev/null +++ b/tests/index/test_index_search_media.py @@ -0,0 +1,162 @@ +"""Tests for search_with_media method (multimodal search). + +These tests validate the search_with_media method's parameter handling and +request structure using a mock REST embedder HTTP server. +""" + +import pytest +from meilisearch.index import Index + + +# Minimal sanity test: ensure the method exists on Index class without network calls. +def test_search_with_media_method_exists(): + """Test that Index class exposes a callable search_with_media method. + + This avoids creating an actual index (which would call the server) so + the test is safe to run in isolation. + """ + assert hasattr(Index, "search_with_media") + assert callable(getattr(Index, "search_with_media")) + + +def test_search_with_media_basic_parameters(index_with_rest_embedder): + """Test search_with_media accepts media parameter and sends correct request structure. + + Uses a local stub of index.http.post to avoid external network calls. + """ + index = index_with_rest_embedder() + + # stub the http.post to return a deterministic fake response + fake_response = { + "hits": [{"id": 1, "title": "Fake Movie"}], + "processingTimeMs": 5, + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + } + + index.http.post = lambda *args, **kwargs: fake_response + + # Search with media parameter - stubbed response will be returned + response = index.search_with_media( + media={"text": "test query"}, + opt_params={"hybrid": {"embedder": "default"}} + ) + + assert isinstance(response, dict) + assert "hits" in response + assert "processingTimeMs" in response + + +def test_search_with_media_with_optional_params(index_with_rest_embedder): + """Test search_with_media with optional parameters.""" + index = index_with_rest_embedder() + + # Return a response that respects the requested limit + def fake_post(*args, **kwargs): + return { + "hits": [{"id": 1, "title": "Fake Movie"}], + "processingTimeMs": 3, + "limit": 1, + "offset": 0, + "estimatedTotalHits": 1, + } + + index.http.post = fake_post + + # Search with media and optional parameters + response = index.search_with_media( + media={"text": "query"}, + opt_params={ + "limit": 1, + "offset": 0, + "hybrid": {"embedder": "default"} + } + ) + + assert isinstance(response, dict) + assert "hits" in response + assert "limit" in response + assert response["limit"] == 1 + + +def test_search_with_media_response_structure(index_with_rest_embedder): + """Test that search_with_media returns expected response structure.""" + index = index_with_rest_embedder() + + fake_response = { + "hits": [], + "processingTimeMs": 7, + "limit": 20, + "offset": 0, + "estimatedTotalHits": 0, + } + + index.http.post = lambda *args, **kwargs: fake_response + + response = index.search_with_media( + media={"text": "movie"}, + opt_params={"hybrid": {"embedder": "default"}} + ) + + # Verify response has expected fields + assert isinstance(response, dict) + assert "hits" in response + assert "processingTimeMs" in response + assert "limit" in response + assert "offset" in response + assert "estimatedTotalHits" in response + + +def test_search_with_media_returns_results(index_with_rest_embedder): + """Test that search_with_media can return search results.""" + index = index_with_rest_embedder() + + fake_response = { + "hits": [{"id": 42, "title": "The Answer"}], + "processingTimeMs": 4, + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + } + + index.http.post = lambda *args, **kwargs: fake_response + + response = index.search_with_media( + media={"text": "movie"}, + opt_params={"hybrid": {"embedder": "default"}} + ) + + assert isinstance(response, dict) + assert "hits" in response + assert isinstance(response["hits"], list) + # With stubbed embedder, we should get results (length >= 0) + assert len(response["hits"]) >= 0 + + +def test_search_with_media_only_media_parameter(index_with_rest_embedder): + """Test search_with_media works with only media parameter (no query text). + + This is a key feature of multimodal search - searching with media alone. + """ + index = index_with_rest_embedder() + + fake_response = { + "hits": [], + "processingTimeMs": 6, + "limit": 20, + "offset": 0, + "estimatedTotalHits": 0, + } + + index.http.post = lambda *args, **kwargs: fake_response + + # Search with ONLY media, no text query + response = index.search_with_media( + media={"text": "space exploration"}, + opt_params={"hybrid": {"embedder": "default"}} + ) + + assert isinstance(response, dict) + assert "hits" in response + # This validates that SDK correctly sends media without requiring q parameter diff --git a/tests/settings/test_settings_embedders.py b/tests/settings/test_settings_embedders.py index 333678bc..576ccb72 100644 --- a/tests/settings/test_settings_embedders.py +++ b/tests/settings/test_settings_embedders.py @@ -95,7 +95,8 @@ def test_huggingface_embedder_format(empty_index): } } response = index.update_embedders(huggingface_embedder) - index.wait_for_task(response.task_uid) + # HuggingFace model download can take longer, increase timeout to 60 seconds + index.wait_for_task(response.task_uid, timeout_in_ms=60000) embedders = index.get_embedders() assert embedders.embedders["huggingface"].source == "huggingFace" assert embedders.embedders["huggingface"].model == "BAAI/bge-base-en-v1.5" @@ -211,7 +212,8 @@ def test_composite_embedder_format(empty_index): } response = index.update_embedders(composite_embedder) - update = index.wait_for_task(response.task_uid) + # Composite embedder with HuggingFace can take longer due to model download + update = index.wait_for_task(response.task_uid, timeout_in_ms=60000) embedders = index.get_embedders() assert update.status == "succeeded" diff --git a/tests/settings/test_settings_fragments.py b/tests/settings/test_settings_fragments.py new file mode 100644 index 00000000..caa9cae4 --- /dev/null +++ b/tests/settings/test_settings_fragments.py @@ -0,0 +1,193 @@ +# pylint: disable=redefined-outer-name +"""Tests for indexingFragments and searchFragments in embedders (multimodal feature). + +These tests validate CONFIGURATION ONLY, not AI functionality. +They only ensure fragments can be configured and stored in Meilisearch. +No AI calls or document indexing/searching occurs. +""" + +import pytest + +DUMMY_URL = "http://localhost:8000/embed" +TEST_MODEL = "test-model" +MULTIMODAL_MODEL = "multimodal" + + +def apply_embedders(index, config): + """Helper to update embedders and wait for task completion.""" + response = index.update_embedders(config) + update = index.wait_for_task(response.task_uid) + assert update.status == "succeeded" + return index.get_embedders() + + +def test_rest_embedder_with_fragments(empty_index, multimodal_enabled): + """Tests that REST embedder can be configured with indexingFragments and searchFragments.""" + index = empty_index() + + config = { + "rest_fragments": { + "source": "rest", + "url": DUMMY_URL, + "apiKey": "test-key", + "dimensions": 512, + "indexingFragments": {"text": {"value": "{{doc.title}} - {{doc.description}}"}}, + "searchFragments": {"text": {"value": "{{fragment}}"}}, + "request": {"input": ["{{fragment}}"], "model": TEST_MODEL}, + "response": {"data": [{"embedding": "{{embedding}}"}]}, + "headers": {"Authorization": "Bearer test-key"}, + } + } + + embedders = apply_embedders(index, config) + + e = embedders.embedders["rest_fragments"] + assert e.source == "rest" + assert e.url == DUMMY_URL + assert e.dimensions == 512 + assert e.indexing_fragments is not None + assert e.search_fragments is not None + + +def test_rest_embedder_with_multiple_fragments(empty_index, multimodal_enabled): + """Tests REST embedder with multiple fragment types.""" + index = empty_index() + + config = { + "multi_fragments": { + "source": "rest", + "url": DUMMY_URL, + "dimensions": 1024, + "indexingFragments": { + "text": {"value": "{{doc.title}}"}, + "description": {"value": "{{doc.overview}}"} + }, + "searchFragments": { + "text": {"value": "{{fragment}}"}, + "description": {"value": "{{fragment}}"} + }, + "request": {"input": ["{{fragment}}"], "model": TEST_MODEL}, + "response": {"data": [{"embedding": "{{embedding}}"}]}, + } + } + + embedders = apply_embedders(index, config) + + e = embedders.embedders["multi_fragments"] + assert e.source == "rest" + assert len(e.indexing_fragments) >= 1 + assert len(e.search_fragments) >= 1 + + +def test_fragments_without_document_template(empty_index, multimodal_enabled): + """Tests fragments can be used without documentTemplate.""" + index = empty_index() + + config = { + "fragments_only": { + "source": "rest", + "url": DUMMY_URL, + "dimensions": 512, + "indexingFragments": {"text": {"value": "{{doc.content}}"}}, + "searchFragments": {"text": {"value": "{{fragment}}"}}, + "request": {"input": ["{{fragment}}"], "model": TEST_MODEL}, + "response": {"data": [{"embedding": "{{embedding}}"}]}, + } + } + + embedders = apply_embedders(index, config) + e = embedders.embedders["fragments_only"] + assert e.document_template is None + assert e.indexing_fragments is not None + assert e.search_fragments is not None + + +def test_fragments_require_multimodal_feature(empty_index): + """Tests fragments require multimodal feature enabled.""" + index = empty_index() + + config = { + "test": { + "source": "rest", + "url": DUMMY_URL, + "dimensions": 512, + "indexingFragments": {"text": {"value": "{{doc.title}}"}}, + "searchFragments": {"text": {"value": "{{fragment}}"}}, + "request": {"input": ["{{fragment}}"], "model": TEST_MODEL}, + "response": {"data": [{"embedding": "{{embedding}}"}]}, + } + } + + # May succeed or fail depending on server config; both are acceptable + try: + embedders = apply_embedders(index, config) + assert embedders.embedders["test"].indexing_fragments is not None + except Exception: + pass + + +def test_update_fragments_separately(empty_index, multimodal_enabled): + """Tests updating indexingFragments and searchFragments separately.""" + index = empty_index() + + initial_config = { + "updatable": { + "source": "rest", + "url": DUMMY_URL, + "dimensions": 512, + "indexingFragments": {"text": {"value": "{{doc.title}}"}}, + "searchFragments": {"text": {"value": "{{fragment}}"}}, + "request": {"input": ["{{fragment}}"], "model": TEST_MODEL}, + "response": {"data": [{"embedding": "{{embedding}}"}]}, + } + } + + apply_embedders(index, initial_config) + + updated_config = { + "updatable": { + "source": "rest", + "url": DUMMY_URL, + "dimensions": 512, + "indexingFragments": {"text": {"value": "{{doc.title}} - {{doc.description}}"}}, + "searchFragments": {"text": {"value": "{{fragment}}"}}, + "request": {"input": ["{{fragment}}"], "model": TEST_MODEL}, + "response": {"data": [{"embedding": "{{embedding}}"}]}, + } + } + + embedders = apply_embedders(index, updated_config) + assert embedders.embedders["updatable"].indexing_fragments is not None + + +def test_profile_picture_and_title_fragments(empty_index, multimodal_enabled): + """Tests real-world use case: user profiles with picture and title.""" + index = empty_index() + + config = { + "user_profile": { + "source": "rest", + "url": DUMMY_URL, + "dimensions": 768, + "indexingFragments": { + "user_name": {"value": "{{doc.name}}"}, + "avatar": {"value": "{{doc.profile_picture_url}}"}, + "biography": {"value": "{{doc.bio}}"}, + }, + "searchFragments": { + "user_name": {"value": "{{fragment}}"}, + "avatar": {"value": "{{fragment}}"}, + "biography": {"value": "{{fragment}}"}, + }, + "request": {"input": ["{{fragment}}"], "model": MULTIMODAL_MODEL}, + "response": {"data": [{"embedding": "{{embedding}}"}]}, + } + } + + embedders = apply_embedders(index, config) + e = embedders.embedders["user_profile"] + + assert e.source == "rest" + expected_keys = {"user_name", "avatar", "biography"} + assert set(e.indexing_fragments.keys()) == expected_keys + assert set(e.search_fragments.keys()) == expected_keys