diff --git a/src/bundle/Core/DependencyInjection/Configuration/Parser/Embeddings.php b/src/bundle/Core/DependencyInjection/Configuration/Parser/Embeddings.php new file mode 100644 index 0000000000..b701620361 --- /dev/null +++ b/src/bundle/Core/DependencyInjection/Configuration/Parser/Embeddings.php @@ -0,0 +1,70 @@ +arrayNode('embedding_models') + ->normalizeKeys(false) + ->info('Defines available embedding models') + ->arrayPrototype() + ->children() + ->scalarNode('name')->isRequired()->end() + ->integerNode('dimensions')->isRequired()->end() + ->scalarNode('field_suffix')->isRequired()->end() + ->scalarNode('embedding_provider')->isRequired()->end() + ->end() + ->end() + ->end() + ->scalarNode('default_embedding_model') + ->info('Default embedding model identifier') + ->defaultValue('text-embedding-ada-002') + ->end(); + } + + /** + * @param array $config + */ + public function preMap(array $config, ContextualizerInterface $contextualizer): void + { + $contextualizer->mapConfigArray('embedding_models', $config); + $contextualizer->mapSetting('default_embedding_model', $config); + } + + /** + * @param array $scopeSettings + */ + public function mapConfig(array &$scopeSettings, $currentScope, ContextualizerInterface $contextualizer): void + { + // Nothing to do here. + } +} diff --git a/src/bundle/Core/IbexaCoreBundle.php b/src/bundle/Core/IbexaCoreBundle.php index be9b0ab962..c063430758 100644 --- a/src/bundle/Core/IbexaCoreBundle.php +++ b/src/bundle/Core/IbexaCoreBundle.php @@ -123,6 +123,7 @@ public function getContainerExtension() new ConfigParser\UrlChecker(), new ConfigParser\TwigVariablesParser(), new ConfigParser\UserContentTypeIdentifier(), + new ConfigParser\Embeddings(), ], [ new RepositoryConfigParser\Storage(), diff --git a/src/bundle/Core/Resources/config/default_settings.yml b/src/bundle/Core/Resources/config/default_settings.yml index 18e683ada3..6dc43995e5 100644 --- a/src/bundle/Core/Resources/config/default_settings.yml +++ b/src/bundle/Core/Resources/config/default_settings.yml @@ -273,3 +273,6 @@ parameters: writeFlags: ~ linkHandling: ~ permissions: [ ] + + ibexa.site_access.config.default.embedding_models: [] + ibexa.site_access.config.default.default_embedding_model: 'text-embedding-ada-002' diff --git a/src/bundle/Core/Resources/config/embeddings.yml b/src/bundle/Core/Resources/config/embeddings.yml new file mode 100644 index 0000000000..569a1c5da4 --- /dev/null +++ b/src/bundle/Core/Resources/config/embeddings.yml @@ -0,0 +1,24 @@ +services: + _defaults: + autowire: true + autoconfigure: true + public: false + + Ibexa\Contracts\Core\Search\Embedding\EmbeddingProviderRegistryInterface: + alias: Ibexa\Core\Search\Embedding\EmbeddingProviderRegistry + + Ibexa\Core\Search\Embedding\EmbeddingProviderRegistry: + arguments: + $embeddingProviders: !tagged_iterator { tag: 'ibexa.embedding_provider', index_by: 'provider_name' } + + Ibexa\Contracts\Core\Search\Embedding\EmbeddingProviderResolverInterface: + alias: Ibexa\Core\Search\Embedding\EmbeddingProviderResolver + + Ibexa\Core\Search\Embedding\EmbeddingProviderResolver: ~ + + Ibexa\Contracts\Core\Search\Embedding\EmbeddingConfigurationInterface: + alias: Ibexa\Core\Search\Embedding\EmbeddingConfiguration + + Ibexa\Core\Search\Embedding\EmbeddingConfiguration: ~ + + Ibexa\Contracts\Core\Search\FieldType\EmbeddingFieldFactory: ~ diff --git a/src/bundle/Core/Resources/config/services.yml b/src/bundle/Core/Resources/config/services.yml index 8babb6f451..170fe884f2 100644 --- a/src/bundle/Core/Resources/config/services.yml +++ b/src/bundle/Core/Resources/config/services.yml @@ -1,5 +1,6 @@ imports: - { resource: commands.yml } + - { resource: embeddings.yml } parameters: ibexa.site_access.default.name: default diff --git a/src/contracts/Repository/Values/Content/EmbeddingQuery.php b/src/contracts/Repository/Values/Content/EmbeddingQuery.php new file mode 100644 index 0000000000..b0e4a957bb --- /dev/null +++ b/src/contracts/Repository/Values/Content/EmbeddingQuery.php @@ -0,0 +1,116 @@ +embedding; + } + + public function setEmbedding(?Embedding $embedding): void + { + $this->embedding = $embedding; + } + + public function getFilter(): ?Criterion + { + return $this->filter; + } + + public function setFilter(Criterion $filter): void + { + $this->filter = $filter; + } + + /** + * @return \Ibexa\Contracts\Core\Repository\Values\Content\Query\Aggregation[] + */ + public function getAggregations(): array + { + return $this->aggregations; + } + + /** + * @param \Ibexa\Contracts\Core\Repository\Values\Content\Query\Aggregation[] $aggregations + */ + public function setAggregations(array $aggregations): void + { + $this->aggregations = $aggregations; + } + + public function getOffset(): int + { + return $this->offset; + } + + public function setOffset(int $offset): void + { + $this->offset = $offset; + } + + public function getLimit(): int + { + return $this->limit; + } + + public function setLimit(int $limit): void + { + $this->limit = $limit; + } + + public function setPerformCount(bool $performCount): void + { + $this->performCount = $performCount; + } + + public function getPerformCount(): bool + { + return $this->performCount; + } + + public function isValid(): bool + { + $invalid = []; + + if ($this->query !== null) { + $invalid[] = 'query'; + } + if (!empty($this->sortClauses)) { + $invalid[] = 'sortClauses'; + } + if (!empty($this->facetBuilders)) { + $invalid[] = 'facetBuilders'; + } + if ($this->spellcheck !== null) { + $invalid[] = 'spellcheck'; + } + + if (count($invalid) > 0) { + throw new InvalidArgumentException( + sprintf( + 'EmbeddingQuery may not set [%s].', + implode(', ', $invalid) + ) + ); + } + + return true; + } +} diff --git a/src/contracts/Repository/Values/Content/EmbeddingQueryBuilder.php b/src/contracts/Repository/Values/Content/EmbeddingQueryBuilder.php new file mode 100644 index 0000000000..b69e96b220 --- /dev/null +++ b/src/contracts/Repository/Values/Content/EmbeddingQueryBuilder.php @@ -0,0 +1,77 @@ +query = new EmbeddingQuery(); + } + + public static function create(): self + { + return new self(); + } + + public function withEmbedding(Embedding $embed): self + { + $this->query->setEmbedding($embed); + + return $this; + } + + public function setLimit(int $limit): self + { + $this->query->setLimit($limit); + + return $this; + } + + public function setOffset(int $offset): self + { + $this->query->setOffset($offset); + + return $this; + } + + public function setFilter(Criterion $filter): self + { + $this->query->setFilter($filter); + + return $this; + } + + /** + * @param array<\Ibexa\Contracts\Core\Repository\Values\Content\Query\Aggregation> $aggregations + */ + public function setAggregations(array $aggregations): self + { + $this->query->setAggregations($aggregations); + + return $this; + } + + public function setPerformCount(bool $performCount): self + { + $this->query->setPerformCount($performCount); + + return $this; + } + + public function build(): EmbeddingQuery + { + return $this->query; + } +} diff --git a/src/contracts/Repository/Values/Content/Query.php b/src/contracts/Repository/Values/Content/Query.php index ebcb9aebc7..c32f98826a 100644 --- a/src/contracts/Repository/Values/Content/Query.php +++ b/src/contracts/Repository/Values/Content/Query.php @@ -14,7 +14,7 @@ /** * This class is used to perform a Content query. */ -class Query extends ValueObject +class Query extends ValueObject implements QueryValidatorInterface { public const SORT_ASC = 'ascending'; public const SORT_DESC = 'descending'; @@ -102,6 +102,11 @@ class Query extends ValueObject * @var bool */ public $performCount = true; + + public function isValid(): bool + { + return true; + } } class_alias(Query::class, 'eZ\Publish\API\Repository\Values\Content\Query'); diff --git a/src/contracts/Repository/Values/Content/Query/Embedding.php b/src/contracts/Repository/Values/Content/Query/Embedding.php new file mode 100644 index 0000000000..21a4c00a5d --- /dev/null +++ b/src/contracts/Repository/Values/Content/Query/Embedding.php @@ -0,0 +1,31 @@ +value = $value; + } + + /** @return float[] */ + public function getValue(): array + { + return $this->value; + } +} diff --git a/src/contracts/Repository/Values/Content/QueryValidatorInterface.php b/src/contracts/Repository/Values/Content/QueryValidatorInterface.php new file mode 100644 index 0000000000..4a3ae23f4f --- /dev/null +++ b/src/contracts/Repository/Values/Content/QueryValidatorInterface.php @@ -0,0 +1,12 @@ + + */ + public function getEmbeddingModels(): array; + + /** + * @return string[] + */ + public function getEmbeddingModelIdentifiers(): array; + + /** + * @return array{name: string, dimensions: int, field_suffix: string, embedding_provider: string} + */ + public function getEmbeddingModel(string $identifier): array; + + public function getDefaultEmbeddingModelIdentifier(): string; + + /** + * @return array{name: string, dimensions: int, field_suffix: string, 'embedding_provider': string} + */ + public function getDefaultEmbeddingModel(): array; + + public function getDefaultEmbeddingProvider(): string; + + public function getDefaultEmbeddingModelFieldSuffix(): string; +} diff --git a/src/contracts/Search/Embedding/EmbeddingProviderInterface.php b/src/contracts/Search/Embedding/EmbeddingProviderInterface.php new file mode 100644 index 0000000000..dbaf439a20 --- /dev/null +++ b/src/contracts/Search/Embedding/EmbeddingProviderInterface.php @@ -0,0 +1,17 @@ + $type]); + } + + /** + * @param string $type Has to be handled by configured search engine (ibexa_dense_vector_ada002). + */ + public static function create(string $type): self + { + return new self($type); + } +} diff --git a/src/contracts/Search/FieldType/EmbeddingFieldFactory.php b/src/contracts/Search/FieldType/EmbeddingFieldFactory.php new file mode 100644 index 0000000000..517a8aa1ed --- /dev/null +++ b/src/contracts/Search/FieldType/EmbeddingFieldFactory.php @@ -0,0 +1,32 @@ +config = $config; + } + + public function create(?string $type = null): EmbeddingField + { + if ($type !== null) { + return EmbeddingField::create($type); + } + + $suffix = $this->config->getDefaultEmbeddingModelFieldSuffix(); + + return EmbeddingField::create('ibexa_dense_vector_' . $suffix); + } +} diff --git a/src/lib/Resources/settings/search_engines/common.yml b/src/lib/Resources/settings/search_engines/common.yml index 9e31bcb722..64e6ac650c 100644 --- a/src/lib/Resources/settings/search_engines/common.yml +++ b/src/lib/Resources/settings/search_engines/common.yml @@ -22,6 +22,9 @@ parameters: ez_geolocation: 'gl' ez_document: 'doc' ez_fulltext: 'fulltext' + ibexa_dense_vector_ada002: 'ada002_dv' + ibexa_dense_vector_3small: '3small_dv' + ibexa_dense_vector_3large: '3large_dv' services: # Note: services tagged with 'ibexa.field_type.indexable' diff --git a/src/lib/Resources/settings/search_engines/field_value_mappers.yml b/src/lib/Resources/settings/search_engines/field_value_mappers.yml index 0d5295ba6d..eb80c572db 100644 --- a/src/lib/Resources/settings/search_engines/field_value_mappers.yml +++ b/src/lib/Resources/settings/search_engines/field_value_mappers.yml @@ -73,3 +73,7 @@ services: Ibexa\Core\Search\Common\FieldValueMapper\MultipleRemoteIdentifierMapper: tags: - { name: ibexa.search.common.field_value.mapper, maps: Ibexa\Contracts\Core\Search\FieldType\MultipleRemoteIdentifierField } + + Ibexa\Core\Search\Common\FieldValueMapper\EmbeddingMapper: + tags: + - { name: ibexa.search.common.field_value.mapper, maps: Ibexa\Contracts\Core\Search\FieldType\EmbeddingField } diff --git a/src/lib/Search/Common/FieldValueMapper/EmbeddingMapper.php b/src/lib/Search/Common/FieldValueMapper/EmbeddingMapper.php new file mode 100644 index 0000000000..0b182792c6 --- /dev/null +++ b/src/lib/Search/Common/FieldValueMapper/EmbeddingMapper.php @@ -0,0 +1,29 @@ +getType() instanceof EmbeddingField; + } + + public function map(Field $field) + { + return $field->getValue(); + } +} diff --git a/src/lib/Search/Embedding/EmbeddingConfiguration.php b/src/lib/Search/Embedding/EmbeddingConfiguration.php new file mode 100644 index 0000000000..206ef0575e --- /dev/null +++ b/src/lib/Search/Embedding/EmbeddingConfiguration.php @@ -0,0 +1,81 @@ +configResolver = $configResolver; + } + + /** + * @return array + */ + public function getEmbeddingModels(): array + { + return (array)$this->configResolver->getParameter('embedding_models'); + } + + /** + * @return string[] + */ + public function getEmbeddingModelIdentifiers(): array + { + return array_keys($this->getEmbeddingModels()); + } + + /** + * @return array{name: string, dimensions: int, field_suffix: string, embedding_provider: string} + */ + public function getEmbeddingModel(string $identifier): array + { + $models = $this->getEmbeddingModels(); + + if (!isset($models[$identifier])) { + throw new InvalidArgumentException( + sprintf('Embedding model "%s" is not configured.', $identifier) + ); + } + + return $models[$identifier]; + } + + public function getDefaultEmbeddingModelIdentifier(): string + { + return (string)$this->configResolver->getParameter('default_embedding_model'); + } + + /** + * @return array{name: string, dimensions: int, field_suffix: string, 'embedding_provider': string} + */ + public function getDefaultEmbeddingModel(): array + { + return $this->getEmbeddingModel( + $this->getDefaultEmbeddingModelIdentifier() + ); + } + + public function getDefaultEmbeddingProvider(): string + { + return (string)$this->getDefaultEmbeddingModel()['embedding_provider']; + } + + public function getDefaultEmbeddingModelFieldSuffix(): string + { + return (string)$this->getDefaultEmbeddingModel()['field_suffix']; + } +} diff --git a/src/lib/Search/Embedding/EmbeddingProviderRegistry.php b/src/lib/Search/Embedding/EmbeddingProviderRegistry.php new file mode 100644 index 0000000000..8dd1bf0317 --- /dev/null +++ b/src/lib/Search/Embedding/EmbeddingProviderRegistry.php @@ -0,0 +1,45 @@ + */ + private PoolInterface $pool; + + /** + * @param iterable<\Ibexa\Contracts\Core\Search\Embedding\EmbeddingProviderInterface> $embeddingProviders + */ + public function __construct(iterable $embeddingProviders = []) + { + $this->pool = new Pool(EmbeddingProviderInterface::class, $embeddingProviders); + $this->pool->setExceptionArgumentName('embedding_provider'); + $this->pool->setExceptionMessageTemplate('Could not find %s for \'%s\' embedding provider.'); + } + + public function getEmbeddingProviders(): iterable + { + return $this->pool->getEntries(); + } + + public function hasEmbeddingProvider(string $identifier): bool + { + return $this->pool->has($identifier); + } + + public function getEmbeddingProvider(string $identifier): EmbeddingProviderInterface + { + return $this->pool->get($identifier); + } +} diff --git a/src/lib/Search/Embedding/EmbeddingProviderResolver.php b/src/lib/Search/Embedding/EmbeddingProviderResolver.php new file mode 100644 index 0000000000..ea78212ce9 --- /dev/null +++ b/src/lib/Search/Embedding/EmbeddingProviderResolver.php @@ -0,0 +1,43 @@ +embeddingConfiguration = $embeddingConfiguration; + $this->registry = $registry; + } + + public function resolve(): EmbeddingProviderInterface + { + $defaultEmbeddingProvider = $this->embeddingConfiguration->getDefaultEmbeddingProvider(); + + if (!$this->registry->hasEmbeddingProvider($defaultEmbeddingProvider)) { + throw new EmbeddingResolverNotFoundException( + $defaultEmbeddingProvider + ); + } + + return $this->registry->getEmbeddingProvider($defaultEmbeddingProvider); + } +} diff --git a/tests/bundle/Core/DependencyInjection/Configuration/Parser/EmbeddingsTest.php b/tests/bundle/Core/DependencyInjection/Configuration/Parser/EmbeddingsTest.php new file mode 100644 index 0000000000..d758838147 --- /dev/null +++ b/tests/bundle/Core/DependencyInjection/Configuration/Parser/EmbeddingsTest.php @@ -0,0 +1,165 @@ + + */ + protected function getMinimalConfiguration(): array + { + $input = file_get_contents(__DIR__ . '/../../Fixtures/ezpublish_minimal.yml'); + + if ($input === false) { + self::fail('Failed to load ezpublish_minimal.yml'); + } + + return Yaml::parse($input); + } + + public function testDefaultEmbeddingsSettings(): void + { + $this->load(); + + $this->assertConfigResolverParameterValue('embedding_models', [], 'ibexa_demo_site'); + $this->assertConfigResolverParameterValue('default_embedding_model', 'text-embedding-ada-002', 'ibexa_demo_site'); + } + + /** + * @param array $config + * @param array $expected + * + * @dataProvider embeddingsSettingsProvider + */ + public function testEmbeddingsSettings(array $config, array $expected): void + { + $this->load( + [ + 'system' => [ + 'ibexa_demo_site' => $config, + ], + ] + ); + + foreach ($expected as $key => $val) { + $this->assertConfigResolverParameterValue($key, $val, 'ibexa_demo_site'); + } + } + + /** + * @return array, + * default_embedding_model?: string + * }, + * array{ + * embedding_models: array, + * default_embedding_model: string + * } + * }> + */ + public function embeddingsSettingsProvider(): array + { + return [ + [ + [ + 'embedding_models' => [ + 'text-embedding-ada-002' => [ + 'name' => 'text-embedding-ada-002', + 'dimensions' => 1536, + 'field_suffix' => 'ada', + 'embedding_provider' => 'ibexa_openai', + ], + 'text-embedding-3-small' => [ + 'name' => 'text-embedding-3-small', + 'dimensions' => 1536, + 'field_suffix' => '3small', + 'embedding_provider' => 'ibexa_openai', + ], + 'text-embedding-3-large' => [ + 'name' => 'text-embedding-3-large', + 'dimensions' => 3072, + 'field_suffix' => '3large', + 'embedding_provider' => 'ibexa_openai', + ], + ], + ], + [ + 'embedding_models' => [ + 'text-embedding-ada-002' => [ + 'name' => 'text-embedding-ada-002', + 'dimensions' => 1536, + 'field_suffix' => 'ada', + 'embedding_provider' => 'ibexa_openai', + ], + 'text-embedding-3-small' => [ + 'name' => 'text-embedding-3-small', + 'dimensions' => 1536, + 'field_suffix' => '3small', + 'embedding_provider' => 'ibexa_openai', + ], + 'text-embedding-3-large' => [ + 'name' => 'text-embedding-3-large', + 'dimensions' => 3072, + 'field_suffix' => '3large', + 'embedding_provider' => 'ibexa_openai', + ], + ], + 'default_embedding_model' => 'text-embedding-ada-002', + ], + ], + [ + [ + 'embedding_models' => [ + 'text-embedding-ada-002' => [ + 'name' => 'text-embedding-ada-002', + 'dimensions' => 1536, + 'field_suffix' => 'ada', + 'embedding_provider' => 'ibexa_openai', + ], + ], + 'default_embedding_model' => 'text-embedding-foo', + ], + [ + 'embedding_models' => [ + 'text-embedding-ada-002' => [ + 'name' => 'text-embedding-ada-002', + 'dimensions' => 1536, + 'field_suffix' => 'ada', + 'embedding_provider' => 'ibexa_openai', + ], + ], + 'default_embedding_model' => 'text-embedding-foo', + ], + ], + ]; + } +} diff --git a/tests/integration/Core/Repository/Values/Content/EmbeddingQueryBuilderTest.php b/tests/integration/Core/Repository/Values/Content/EmbeddingQueryBuilderTest.php new file mode 100644 index 0000000000..98c9e8cbca --- /dev/null +++ b/tests/integration/Core/Repository/Values/Content/EmbeddingQueryBuilderTest.php @@ -0,0 +1,79 @@ +createMock(Embedding::class); + $aggregations = [$this->createMock(Aggregation::class), $this->createMock(Aggregation::class)]; + + $builder = EmbeddingQueryBuilder::create() + ->withEmbedding($embedding) + ->setLimit(10) + ->setOffset(5) + ->setPerformCount(true) + ->setAggregations($aggregations); + + $query = $builder->build(); + + $this->assertSame( + $embedding, + $query->getEmbedding(), + 'Embedding should be set by builder' + ); + + $this->assertEquals(10, $query->getLimit(), 'Limit should be set by builder'); + $this->assertEquals(5, $query->getOffset(), 'Offset should be set by builder'); + $this->assertTrue($query->getPerformCount(), 'PerformCount flag should be true'); + + $aggregations = $query->getAggregations(); + $this->assertIsArray($aggregations, 'Aggregations must be array'); + $this->assertCount(2, $aggregations, 'Two aggregations added'); + } + + public function testIsValidReturnsTrueForCleanQuery(): void + { + $query = EmbeddingQueryBuilder::create() + ->withEmbedding($this->createMock(Embedding::class)) + ->build(); + + $this->assertTrue($query->isValid()); + } + + public function testSettingSortClausesThenIsValidThrows(): void + { + $query = EmbeddingQueryBuilder::create() + ->withEmbedding($this->createMock(Embedding::class)) + ->build(); + + // bypass setter via array-append magic + $query->sortClauses[] = new ContentName(BaseQuery::SORT_ASC); + $query->query = $this->createMock(Criterion::class); + $query->facetBuilders = [$this->createMock(FacetBuilder::class)]; + $query->spellcheck = new Spellcheck('foo'); + + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('EmbeddingQuery may not set [query, sortClauses, facetBuilders, spellcheck].'); + + $query->isValid(); + } +} diff --git a/tests/integration/Core/Search/FieldType/EmbeddingFieldFactoryTest.php b/tests/integration/Core/Search/FieldType/EmbeddingFieldFactoryTest.php new file mode 100644 index 0000000000..6c9ffad555 --- /dev/null +++ b/tests/integration/Core/Search/FieldType/EmbeddingFieldFactoryTest.php @@ -0,0 +1,55 @@ +createMock(EmbeddingConfigurationInterface::class); + $config + ->expects($this->once()) + ->method('getDefaultEmbeddingModelFieldSuffix') + ->willReturn($suffix); + + $factory = new EmbeddingFieldFactory($config); + + $field = $factory->create(); + + $this->assertSame( + 'ibexa_dense_vector_model_123', + $field->getType(), + 'Factory should prepend "ibexa_dense_vector_" to the suffix from the config' + ); + } + + public function testCreateWithCustomType(): void + { + $config = $this->createMock(EmbeddingConfigurationInterface::class); + $config + ->expects($this->never()) + ->method('getDefaultEmbeddingModelFieldSuffix'); + + $factory = new EmbeddingFieldFactory($config); + $customType = 'custom_model'; + + $field = $factory->create($customType); + + $this->assertSame( + $customType, + $field->getType(), + 'Factory should use the explicit type when provided' + ); + } +} diff --git a/tests/lib/Search/Embedding/EmbeddingConfigurationTest.php b/tests/lib/Search/Embedding/EmbeddingConfigurationTest.php new file mode 100644 index 0000000000..599b3c0ae6 --- /dev/null +++ b/tests/lib/Search/Embedding/EmbeddingConfigurationTest.php @@ -0,0 +1,140 @@ + ['name' => 'text-embedding-3-small', 'dimensions' => 1536, 'field_suffix' => '3small', 'embedding_provider' => 'ibexa_openai'], + 'text-embedding-3-large' => ['name' => 'text-embedding-3-large', 'dimensions' => 3072, 'field_suffix' => '3large', 'embedding_provider' => 'ibexa_openai'], + 'text-embedding-ada-002' => ['name' => 'text-embedding-ada-002', 'dimensions' => 1536, 'field_suffix' => 'ada002', 'embedding_provider' => 'ibexa_openai'], + ]; + + /** @var \Ibexa\Contracts\Core\SiteAccess\ConfigResolverInterface&\PHPUnit\Framework\MockObject\MockObject */ + private ConfigResolverInterface $configResolver; + + private EmbeddingConfiguration $config; + + protected function setUp(): void + { + $this->configResolver = $this->createMock(ConfigResolverInterface::class); + $this->config = new EmbeddingConfiguration( + $this->configResolver + ); + } + + public function testGetDefaultEmbeddingModel(): void + { + $this->configResolver + ->method('getParameter') + ->willReturnMap([ + ['default_embedding_model', null, null, 'text-embedding-ada-002'], + ['embedding_models', null, null, self::MODELS], + ]); + + $this->assertSame( + ['name' => 'text-embedding-ada-002', 'dimensions' => 1536, 'field_suffix' => 'ada002', 'embedding_provider' => 'ibexa_openai'], + $this->config->getDefaultEmbeddingModel() + ); + } + + public function testGetEmbeddingModelIdentifiers(): void + { + $this->configResolver + ->method('getParameter') + ->willReturnMap([ + ['default_embedding_model', null, null, 'text-embedding-ada-002'], + ['embedding_models', null, null, self::MODELS], + ]); + + $this->assertSame( + ['text-embedding-3-small', 'text-embedding-3-large', 'text-embedding-ada-002'], + $this->config->getEmbeddingModelIdentifiers() + ); + } + + public function testGetEmbeddingModels(): void + { + $this->configResolver + ->method('getParameter') + ->with('embedding_models') + ->willReturn(self::MODELS); + + $this->assertSame(self::MODELS, $this->config->getEmbeddingModels()); + $this->assertSame( + ['text-embedding-3-small', 'text-embedding-3-large', 'text-embedding-ada-002'], + $this->config->getEmbeddingModelIdentifiers() + ); + } + + public function testGetEmbeddingModel(): void + { + $this->configResolver + ->method('getParameter') + ->with('embedding_models') + ->willReturn(self::MODELS); + + $this->assertSame( + ['name' => 'text-embedding-ada-002', 'dimensions' => 1536, 'field_suffix' => 'ada002', 'embedding_provider' => 'ibexa_openai'], + $this->config->getEmbeddingModel('text-embedding-ada-002') + ); + } + + public function testGetEmbeddingModelWillThrowException(): void + { + $this->configResolver + ->method('getParameter') + ->with('embedding_models') + ->willReturn(self::MODELS); + + self::expectException(InvalidArgumentException::class); + self::expectExceptionMessage('Embedding model "non-existing-model" is not configured.'); + + $this->config->getEmbeddingModel('non-existing-model'); + } + + public function testGetDefaultEmbeddingModelIdentifier(): void + { + $this->configResolver + ->method('getParameter') + ->with('default_embedding_model') + ->willReturn('text-embedding-ada-002'); + + $this->assertSame('text-embedding-ada-002', $this->config->getDefaultEmbeddingModelIdentifier()); + } + + public function testGetDefaultEmbeddingProvider(): void + { + $this->configResolver + ->method('getParameter') + ->willReturnMap([ + ['default_embedding_model', null, null, 'text-embedding-ada-002'], + ['embedding_models', null, null, self::MODELS], + ]); + + $this->assertSame('ibexa_openai', $this->config->getDefaultEmbeddingProvider()); + } + + public function getDefaultEmbeddingModelFieldSuffix(): void + { + $this->configResolver + ->method('getParameter') + ->willReturnMap([ + ['default_embedding_model', null, null, 'text-embedding-ada-002'], + ['embedding_models', null, null, self::MODELS], + ]); + + $this->assertSame('ada002', $this->config->getDefaultEmbeddingModelFieldSuffix()); + } +} diff --git a/tests/lib/Search/Embedding/EmbeddingProviderRegistryTest.php b/tests/lib/Search/Embedding/EmbeddingProviderRegistryTest.php new file mode 100644 index 0000000000..3f116e9c02 --- /dev/null +++ b/tests/lib/Search/Embedding/EmbeddingProviderRegistryTest.php @@ -0,0 +1,76 @@ + $this->createMock(EmbeddingProviderInterface::class), + ]); + + self::assertTrue($registry->hasEmbeddingProvider('existing')); + self::assertFalse($registry->hasEmbeddingProvider('non-existing')); + } + + public function testGetEmbeddingProvider(): void + { + $expectedEmbeddingProvider = $this->createMock(EmbeddingProviderInterface::class); + + $registry = new EmbeddingProviderRegistry([ + 'example' => $expectedEmbeddingProvider, + ]); + + self::assertSame($expectedEmbeddingProvider, $registry->getEmbeddingProvider('example')); + } + + public function testGetEmbeddingProviderThrowsInvalidArgumentException(): void + { + $message = "Argument 'embedding_provider' is invalid: Could not find " + . "Ibexa\Contracts\Core\Search\Embedding\EmbeddingProviderInterface for 'non-existing' embedding provider."; + + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage($message); + + $registry = new EmbeddingProviderRegistry([/* Empty registry */]); + $registry->getEmbeddingProvider('non-existing'); + } + + public function testGetEmbeddingProviders(): void + { + $embeddingProviderA = $this->createMock(EmbeddingProviderInterface::class); + $embeddingProviderB = $this->createMock(EmbeddingProviderInterface::class); + + $registry = new EmbeddingProviderRegistry([ + 'existingA' => $embeddingProviderA, + 'existingB' => $embeddingProviderB, + ]); + + $embeddingProviders = $registry->getEmbeddingProviders(); + + $this->assertIsArray( + $embeddingProviders, + 'getProviders() should return an array of embedding providers' + ); + + self::assertSame( + [ + 'existingA' => $embeddingProviderA, + 'existingB' => $embeddingProviderB, + ], + $embeddingProviders + ); + } +} diff --git a/tests/lib/Search/Embedding/EmbeddingProviderResolverTest.php b/tests/lib/Search/Embedding/EmbeddingProviderResolverTest.php new file mode 100644 index 0000000000..7ba3471411 --- /dev/null +++ b/tests/lib/Search/Embedding/EmbeddingProviderResolverTest.php @@ -0,0 +1,82 @@ +configuration = $this->createMock(EmbeddingConfigurationInterface::class); + $this->registry = $this->createMock(EmbeddingProviderRegistryInterface::class); + $this->resolver = new EmbeddingProviderResolver( + $this->configuration, + $this->registry + ); + } + + public function testResolveReturnsProviderWhenAvailable(): void + { + $embeddingProviderIdentifier = 'ibexa_openai'; + $mockProvider = $this->createMock(EmbeddingProviderInterface::class); + + $this->configuration + ->method('getDefaultEmbeddingProvider') + ->willReturn($embeddingProviderIdentifier); + + $this->registry + ->method('hasEmbeddingProvider') + ->with($embeddingProviderIdentifier) + ->willReturn(true); + + $this->registry + ->method('getEmbeddingProvider') + ->with($embeddingProviderIdentifier) + ->willReturn($mockProvider); + + $resolved = $this->resolver->resolve(); + + $this->assertSame($mockProvider, $resolved); + } + + public function testResolveThrowsWhenProviderMissing(): void + { + $embeddingProviderIdentifier = 'foo'; + + $this->configuration + ->method('getDefaultEmbeddingProvider') + ->willReturn($embeddingProviderIdentifier); + + $this->registry + ->method('hasEmbeddingProvider') + ->with($embeddingProviderIdentifier) + ->willReturn(false); + + $this->expectException(EmbeddingResolverNotFoundException::class); + $this->expectExceptionMessage( + sprintf('No embedding provider registered for identifier "%s".', $embeddingProviderIdentifier) + ); + + $this->resolver->resolve(); + } +}