Skip to content
6 changes: 6 additions & 0 deletions src/ai-bundle/config/options.php
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,12 @@
->stringNode('embedder')->end()
->stringNode('vector_field')->end()
->integerNode('dimensions')->end()
->floatNode('semantic_ratio')
->info('The ratio between semantic (vector) and full-text search (0.0 to 1.0). Default: 1.0 (100% semantic)')
->defaultValue(1.0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we have ->min() and ->max(), can you please check. Unsure if we have ->between()

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes @OskarStark, it is cleaner using min and max, but unfortunately, a between function doesn't exist in the symfony/config component.

->min(0.0)
->max(1.0)
->end()
->end()
->end()
->end()
Expand Down
4 changes: 4 additions & 0 deletions src/ai-bundle/src/AiBundle.php
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,10 @@ private function processStoreConfig(string $type, array $stores, ContainerBuilde
$arguments[6] = $store['dimensions'];
}

if (\array_key_exists('semantic_ratio', $store)) {
$arguments[7] = $store['semantic_ratio'];
}

$definition = new Definition(MeilisearchStore::class);
$definition
->addTag('ai.store')
Expand Down
25 changes: 25 additions & 0 deletions src/ai-bundle/tests/DependencyInjection/AiBundleTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -2741,6 +2741,30 @@ public function testVectorizerModelBooleanOptionsArePreserved()
$this->assertSame('text-embedding-3-small?normalize=false&cache=true&nested%5Bbool%5D=false', $vectorizerDefinition->getArgument(1));
}

#[TestDox('Meilisearch store with custom semantic_ratio can be configured')]
public function testMeilisearchStoreWithCustomSemanticRatioCanBeConfigured()
{
$container = $this->buildContainer([
'ai' => [
'store' => [
'meilisearch' => [
'test_store' => [
'endpoint' => 'http://127.0.0.1:7700',
'api_key' => 'test_key',
'index_name' => 'test_index',
'semantic_ratio' => 0.5,
],
],
],
],
]);

$this->assertTrue($container->hasDefinition('ai.store.meilisearch.test_store'));
$definition = $container->getDefinition('ai.store.meilisearch.test_store');
$arguments = $definition->getArguments();
$this->assertSame(0.5, $arguments[7]);
}

private function buildContainer(array $configuration): ContainerBuilder
{
$container = new ContainerBuilder();
Expand Down Expand Up @@ -2904,6 +2928,7 @@ private function getFullConfig(): array
'embedder' => 'default',
'vector_field' => '_vectors',
'dimensions' => 768,
'semantic_ratio' => 0.5,
],
],
'memory' => [
Expand Down
1 change: 1 addition & 0 deletions src/store/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,6 @@ CHANGELOG
- Minimum score filtering
- Result limiting
- Distance/similarity scoring
* Add Meilisearch hybrid search support with a configurable `semanticRatio` parameter to control the balance between semantic (vector) and full-text search.
* Add custom exception hierarchy with `ExceptionInterface`
* Add support for specific exceptions for invalid arguments and runtime errors
19 changes: 17 additions & 2 deletions src/store/src/Bridge/Meilisearch/Store.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
{
/**
* @param string $embedder The name of the embedder where vectors are stored
* @param string $vectorFieldName The name of the field int the index that contains the vector
* @param string $vectorFieldName The name of the field in the index that contains the vector
* @param float $semanticRatio The ratio between semantic (vector) and full-text search (0.0 to 1.0)
* - 0.0 = 100% full-text search
* - 0.5 = balanced hybrid search
* - 1.0 = 100% semantic search (vector only)
*/
public function __construct(
private HttpClientInterface $httpClient,
Expand All @@ -38,7 +42,11 @@ public function __construct(
private string $embedder = 'default',
private string $vectorFieldName = '_vectors',
private int $embeddingsDimension = 1536,
private float $semanticRatio = 1.0,
) {
if ($semanticRatio < 0.0 || $semanticRatio > 1.0) {
throw new InvalidArgumentException(\sprintf('The semantic ratio must be between 0.0 and 1.0, "%s" given.', $semanticRatio));
}
}

public function setup(array $options = []): void
Expand Down Expand Up @@ -71,13 +79,20 @@ public function add(VectorDocument ...$documents): void

public function query(Vector $vector, array $options = []): array
{
$semanticRatio = $options['semanticRatio'] ?? $this->semanticRatio;

if ($semanticRatio < 0.0 || $semanticRatio > 1.0) {
throw new InvalidArgumentException(\sprintf('The semantic ratio must be between 0.0 and 1.0, "%s" given.', $semanticRatio));
}

$result = $this->request('POST', \sprintf('indexes/%s/search', $this->indexName), [
'q' => $options['q'] ?? '',
'vector' => $vector->getData(),
'showRankingScore' => true,
'retrieveVectors' => true,
'hybrid' => [
'embedder' => $this->embedder,
'semanticRatio' => 1.0,
'semanticRatio' => $semanticRatio,
],
]);

Expand Down
146 changes: 146 additions & 0 deletions src/store/tests/Bridge/Meilisearch/StoreTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
use Symfony\AI\Platform\Vector\Vector;
use Symfony\AI\Store\Bridge\Meilisearch\Store;
use Symfony\AI\Store\Document\VectorDocument;
use Symfony\AI\Store\Exception\InvalidArgumentException;
use Symfony\Component\HttpClient\Exception\ClientException;
use Symfony\Component\HttpClient\MockHttpClient;
use Symfony\Component\HttpClient\Response\JsonMockResponse;
use Symfony\Component\HttpClient\Response\MockResponse;
use Symfony\Component\Uid\Uuid;

final class StoreTest extends TestCase
Expand Down Expand Up @@ -275,4 +277,148 @@ public function testMetadataWithoutIDRankingandVector()

$this->assertSame($expected, $vectors[0]->metadata->getArrayCopy());
}

public function testConstructorWithValidSemanticRatio()
{
$httpClient = new MockHttpClient();

$store = new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 0.5);

$this->assertInstanceOf(Store::class, $store);
}

public function testConstructorThrowsExceptionForInvalidSemanticRatio()
{
$this->expectException(InvalidArgumentException::class);
$this->expectExceptionMessage('The semantic ratio must be between 0.0 and 1.0');

$httpClient = new MockHttpClient();
new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 1.5);
}

public function testConstructorThrowsExceptionForNegativeSemanticRatio()
{
$this->expectException(InvalidArgumentException::class);
$this->expectExceptionMessage('The semantic ratio must be between 0.0 and 1.0');

$httpClient = new MockHttpClient();
new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: -0.1);
}

public function testQueryUsesDefaultSemanticRatio()
{
$responses = [
new MockResponse(json_encode([
'hits' => [
[
'id' => '550e8400-e29b-41d4-a716-446655440000',
'_vectors' => [
'default' => [
'embeddings' => [0.1, 0.2, 0.3],
],
],
'_rankingScore' => 0.95,
'content' => 'Test document',
],
],
])),
];

$httpClient = new MockHttpClient($responses);
$store = new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 0.7);

$vector = new Vector([0.1, 0.2, 0.3]);
$store->query($vector);

$request = $httpClient->getRequestsCount() > 0 ? $responses[0]->getRequestOptions() : null;
$this->assertNotNull($request);

$body = json_decode($request['body'], true);
$this->assertSame(0.7, $body['hybrid']['semanticRatio']);
}

public function testQueryCanOverrideSemanticRatio()
{
$responses = [
new MockResponse(json_encode([
'hits' => [],
])),
];

$httpClient = new MockHttpClient($responses);
$store = new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 0.5);

$vector = new Vector([0.1, 0.2, 0.3]);
$store->query($vector, ['semanticRatio' => 0.2]);

$request = $responses[0]->getRequestOptions();
$body = json_decode($request['body'], true);

$this->assertSame(0.2, $body['hybrid']['semanticRatio']);
}

public function testQueryThrowsExceptionForInvalidSemanticRatioOption()
{
$this->expectException(InvalidArgumentException::class);
$this->expectExceptionMessage('The semantic ratio must be between 0.0 and 1.0');

$httpClient = new MockHttpClient();
$store = new Store($httpClient, 'http://localhost:7700', 'key', 'index');

$vector = new Vector([0.1, 0.2, 0.3]);
$store->query($vector, ['semanticRatio' => 2.0]);
}

public function testQueryWithPureKeywordSearch()
{
$responses = [
new MockResponse(json_encode([
'hits' => [
[
'id' => '550e8400-e29b-41d4-a716-446655440000',
'_vectors' => [
'default' => [
'embeddings' => [0.1, 0.2, 0.3],
],
],
'_rankingScore' => 0.85,
'title' => 'Symfony Framework',
],
],
])),
];

$httpClient = new MockHttpClient($responses);
$store = new Store($httpClient, 'http://localhost:7700', 'key', 'index');

$vector = new Vector([0.1, 0.2, 0.3]);
$results = $store->query($vector, ['semanticRatio' => 0.0]);

$this->assertCount(1, $results);
$this->assertInstanceOf(VectorDocument::class, $results[0]);

$request = $responses[0]->getRequestOptions();
$body = json_decode($request['body'], true);
$this->assertSame(0.0, $body['hybrid']['semanticRatio']);
}

public function testQueryWithBalancedHybridSearch()
{
$responses = [
new MockResponse(json_encode([
'hits' => [],
])),
];

$httpClient = new MockHttpClient($responses);
$store = new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 0.5);

$vector = new Vector([0.1, 0.2, 0.3]);
$store->query($vector);

$request = $responses[0]->getRequestOptions();
$body = json_decode($request['body'], true);

$this->assertSame(0.5, $body['hybrid']['semanticRatio']);
}
}