diff --git a/src/ai-bundle/config/options.php b/src/ai-bundle/config/options.php index 74322d79c..9c95cbaac 100644 --- a/src/ai-bundle/config/options.php +++ b/src/ai-bundle/config/options.php @@ -525,6 +525,12 @@ ->stringNode('embedder')->end() ->stringNode('vector_field')->end() ->integerNode('dimensions')->end() + ->floatNode('semantic_ratio') + ->info('The ratio between semantic (vector) and full-text search (0.0 to 1.0). Default: 1.0 (100% semantic)') + ->defaultValue(1.0) + ->min(0.0) + ->max(1.0) + ->end() ->end() ->end() ->end() diff --git a/src/ai-bundle/src/AiBundle.php b/src/ai-bundle/src/AiBundle.php index 71e948f46..59ae66e3a 100644 --- a/src/ai-bundle/src/AiBundle.php +++ b/src/ai-bundle/src/AiBundle.php @@ -919,6 +919,10 @@ private function processStoreConfig(string $type, array $stores, ContainerBuilde $arguments[6] = $store['dimensions']; } + if (\array_key_exists('semantic_ratio', $store)) { + $arguments[7] = $store['semantic_ratio']; + } + $definition = new Definition(MeilisearchStore::class); $definition ->addTag('ai.store') diff --git a/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php b/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php index 87f42f4f9..36d5f4b45 100644 --- a/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php +++ b/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php @@ -2741,6 +2741,30 @@ public function testVectorizerModelBooleanOptionsArePreserved() $this->assertSame('text-embedding-3-small?normalize=false&cache=true&nested%5Bbool%5D=false', $vectorizerDefinition->getArgument(1)); } + #[TestDox('Meilisearch store with custom semantic_ratio can be configured')] + public function testMeilisearchStoreWithCustomSemanticRatioCanBeConfigured() + { + $container = $this->buildContainer([ + 'ai' => [ + 'store' => [ + 'meilisearch' => [ + 'test_store' => [ + 'endpoint' => 'http://127.0.0.1:7700', + 'api_key' => 'test_key', + 'index_name' => 'test_index', + 'semantic_ratio' => 0.5, + ], + ], + ], + ], + ]); + + $this->assertTrue($container->hasDefinition('ai.store.meilisearch.test_store')); + $definition = $container->getDefinition('ai.store.meilisearch.test_store'); + $arguments = $definition->getArguments(); + $this->assertSame(0.5, $arguments[7]); + } + private function buildContainer(array $configuration): ContainerBuilder { $container = new ContainerBuilder(); @@ -2904,6 +2928,7 @@ private function getFullConfig(): array 'embedder' => 'default', 'vector_field' => '_vectors', 'dimensions' => 768, + 'semantic_ratio' => 0.5, ], ], 'memory' => [ diff --git a/src/store/CHANGELOG.md b/src/store/CHANGELOG.md index e337546bd..c18e69156 100644 --- a/src/store/CHANGELOG.md +++ b/src/store/CHANGELOG.md @@ -59,5 +59,6 @@ CHANGELOG - Minimum score filtering - Result limiting - Distance/similarity scoring + * Add Meilisearch hybrid search support with a configurable `semanticRatio` parameter to control the balance between semantic (vector) and full-text search. * Add custom exception hierarchy with `ExceptionInterface` * Add support for specific exceptions for invalid arguments and runtime errors diff --git a/src/store/src/Bridge/Meilisearch/Store.php b/src/store/src/Bridge/Meilisearch/Store.php index 7aed6ce16..da4ec99d2 100644 --- a/src/store/src/Bridge/Meilisearch/Store.php +++ b/src/store/src/Bridge/Meilisearch/Store.php @@ -28,7 +28,11 @@ { /** * @param string $embedder The name of the embedder where vectors are stored - * @param string $vectorFieldName The name of the field int the index that contains the vector + * @param string $vectorFieldName The name of the field in the index that contains the vector + * @param float $semanticRatio The ratio between semantic (vector) and full-text search (0.0 to 1.0) + * - 0.0 = 100% full-text search + * - 0.5 = balanced hybrid search + * - 1.0 = 100% semantic search (vector only) */ public function __construct( private HttpClientInterface $httpClient, @@ -38,7 +42,11 @@ public function __construct( private string $embedder = 'default', private string $vectorFieldName = '_vectors', private int $embeddingsDimension = 1536, + private float $semanticRatio = 1.0, ) { + if ($semanticRatio < 0.0 || $semanticRatio > 1.0) { + throw new InvalidArgumentException(\sprintf('The semantic ratio must be between 0.0 and 1.0, "%s" given.', $semanticRatio)); + } } public function setup(array $options = []): void @@ -71,13 +79,20 @@ public function add(VectorDocument ...$documents): void public function query(Vector $vector, array $options = []): array { + $semanticRatio = $options['semanticRatio'] ?? $this->semanticRatio; + + if ($semanticRatio < 0.0 || $semanticRatio > 1.0) { + throw new InvalidArgumentException(\sprintf('The semantic ratio must be between 0.0 and 1.0, "%s" given.', $semanticRatio)); + } + $result = $this->request('POST', \sprintf('indexes/%s/search', $this->indexName), [ + 'q' => $options['q'] ?? '', 'vector' => $vector->getData(), 'showRankingScore' => true, 'retrieveVectors' => true, 'hybrid' => [ 'embedder' => $this->embedder, - 'semanticRatio' => 1.0, + 'semanticRatio' => $semanticRatio, ], ]); diff --git a/src/store/tests/Bridge/Meilisearch/StoreTest.php b/src/store/tests/Bridge/Meilisearch/StoreTest.php index e06f01ef2..9afb0a63b 100644 --- a/src/store/tests/Bridge/Meilisearch/StoreTest.php +++ b/src/store/tests/Bridge/Meilisearch/StoreTest.php @@ -15,9 +15,11 @@ use Symfony\AI\Platform\Vector\Vector; use Symfony\AI\Store\Bridge\Meilisearch\Store; use Symfony\AI\Store\Document\VectorDocument; +use Symfony\AI\Store\Exception\InvalidArgumentException; use Symfony\Component\HttpClient\Exception\ClientException; use Symfony\Component\HttpClient\MockHttpClient; use Symfony\Component\HttpClient\Response\JsonMockResponse; +use Symfony\Component\HttpClient\Response\MockResponse; use Symfony\Component\Uid\Uuid; final class StoreTest extends TestCase @@ -275,4 +277,148 @@ public function testMetadataWithoutIDRankingandVector() $this->assertSame($expected, $vectors[0]->metadata->getArrayCopy()); } + + public function testConstructorWithValidSemanticRatio() + { + $httpClient = new MockHttpClient(); + + $store = new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 0.5); + + $this->assertInstanceOf(Store::class, $store); + } + + public function testConstructorThrowsExceptionForInvalidSemanticRatio() + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('The semantic ratio must be between 0.0 and 1.0'); + + $httpClient = new MockHttpClient(); + new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 1.5); + } + + public function testConstructorThrowsExceptionForNegativeSemanticRatio() + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('The semantic ratio must be between 0.0 and 1.0'); + + $httpClient = new MockHttpClient(); + new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: -0.1); + } + + public function testQueryUsesDefaultSemanticRatio() + { + $responses = [ + new MockResponse(json_encode([ + 'hits' => [ + [ + 'id' => '550e8400-e29b-41d4-a716-446655440000', + '_vectors' => [ + 'default' => [ + 'embeddings' => [0.1, 0.2, 0.3], + ], + ], + '_rankingScore' => 0.95, + 'content' => 'Test document', + ], + ], + ])), + ]; + + $httpClient = new MockHttpClient($responses); + $store = new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 0.7); + + $vector = new Vector([0.1, 0.2, 0.3]); + $store->query($vector); + + $request = $httpClient->getRequestsCount() > 0 ? $responses[0]->getRequestOptions() : null; + $this->assertNotNull($request); + + $body = json_decode($request['body'], true); + $this->assertSame(0.7, $body['hybrid']['semanticRatio']); + } + + public function testQueryCanOverrideSemanticRatio() + { + $responses = [ + new MockResponse(json_encode([ + 'hits' => [], + ])), + ]; + + $httpClient = new MockHttpClient($responses); + $store = new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 0.5); + + $vector = new Vector([0.1, 0.2, 0.3]); + $store->query($vector, ['semanticRatio' => 0.2]); + + $request = $responses[0]->getRequestOptions(); + $body = json_decode($request['body'], true); + + $this->assertSame(0.2, $body['hybrid']['semanticRatio']); + } + + public function testQueryThrowsExceptionForInvalidSemanticRatioOption() + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('The semantic ratio must be between 0.0 and 1.0'); + + $httpClient = new MockHttpClient(); + $store = new Store($httpClient, 'http://localhost:7700', 'key', 'index'); + + $vector = new Vector([0.1, 0.2, 0.3]); + $store->query($vector, ['semanticRatio' => 2.0]); + } + + public function testQueryWithPureKeywordSearch() + { + $responses = [ + new MockResponse(json_encode([ + 'hits' => [ + [ + 'id' => '550e8400-e29b-41d4-a716-446655440000', + '_vectors' => [ + 'default' => [ + 'embeddings' => [0.1, 0.2, 0.3], + ], + ], + '_rankingScore' => 0.85, + 'title' => 'Symfony Framework', + ], + ], + ])), + ]; + + $httpClient = new MockHttpClient($responses); + $store = new Store($httpClient, 'http://localhost:7700', 'key', 'index'); + + $vector = new Vector([0.1, 0.2, 0.3]); + $results = $store->query($vector, ['semanticRatio' => 0.0]); + + $this->assertCount(1, $results); + $this->assertInstanceOf(VectorDocument::class, $results[0]); + + $request = $responses[0]->getRequestOptions(); + $body = json_decode($request['body'], true); + $this->assertSame(0.0, $body['hybrid']['semanticRatio']); + } + + public function testQueryWithBalancedHybridSearch() + { + $responses = [ + new MockResponse(json_encode([ + 'hits' => [], + ])), + ]; + + $httpClient = new MockHttpClient($responses); + $store = new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 0.5); + + $vector = new Vector([0.1, 0.2, 0.3]); + $store->query($vector); + + $request = $responses[0]->getRequestOptions(); + $body = json_decode($request['body'], true); + + $this->assertSame(0.5, $body['hybrid']['semanticRatio']); + } }