From 3b5b5671c7f1a4a716d7eef8b0ce54e8f45d2a11 Mon Sep 17 00:00:00 2001 From: Anik Bhattacharjee Date: Thu, 23 Apr 2026 11:04:10 -0400 Subject: [PATCH] LCORE-1446: Add support for metadata filters in Solr vector search Updates the Solr vector search integration to support the new Filter API introduced in llama-stack 0.6.0, enabling metadata-based filtering of RAG results using comparison and compound filters. Filter format examples: Simple: {"filters": {"type": "eq", "key": "platform", "value": "openshift"}} Compound: {"filters": {"type": "and", "filters": [...]}} Note: This change requires lightspeed-providers with solr_vector_io filter support, introduced in https://github.com/lightspeed-core/lightspeed-providers/pull/119 --- src/models/requests.py | 34 ++++++++-- src/utils/vector_search.py | 21 +++++- tests/unit/utils/test_vector_search.py | 90 ++++++++++++++++++++++++-- 3 files changed, 131 insertions(+), 14 deletions(-) diff --git a/src/models/requests.py b/src/models/requests.py index 9299bb4e1..0cfdf9f2b 100644 --- a/src/models/requests.py +++ b/src/models/requests.py @@ -282,12 +282,34 @@ class QueryRequest(BaseModel): solr: Optional[SolrVectorSearchRequest] = Field( None, description=( - "Solr inline RAG config: mode (semantic, hybrid, lexical) and filters; " - "a legacy filter-only object (e.g. fq) is still accepted." + "Solr inline RAG config: mode (semantic, hybrid, lexical) and filters. " + "Supports structured metadata filters (eq, ne, in, nin comparison operators). " + "Legacy filter-only objects (e.g. fq) are still accepted." ), examples=[ - {"mode": "hybrid", "filters": {"fq": ["product:*openshift*"]}}, - {"filters": {"fq": ["product:*openshift*", "product_version:*4.16*"]}}, + { + "mode": "hybrid", + "filters": { + "filters": { + "type": "eq", + "key": "platform", + "value": "openshift" + } + } + }, + { + "mode": "semantic", + "filters": { + "filters": { + "type": "and", + "filters": [ + {"type": "eq", "key": "platform", "value": "openshift"}, + {"type": "in", "key": "version", "value": ["4.14", "4.15", "4.16"]} + ] + } + } + }, + {"filters": {"fq": ["product:*openshift*"]}}, ], ) @@ -752,7 +774,9 @@ class ResponsesRequest(BaseModel): topic summary for new conversations. Defaults to True. shield_ids: LCORE-specific list of safety shield IDs to apply. If None, all configured shields are used. - solr: Optional Solr inline RAG options (mode, filters) or legacy filter-only dict. + solr: Optional Solr inline RAG options (mode, filters). Supports + structured metadata filters (eq, ne, in, nin comparison operators). + Legacy filter-only dicts are still accepted. """ input: ResponseInput diff --git a/src/utils/vector_search.py b/src/utils/vector_search.py index dc7c69c9b..b0116fa10 100644 --- a/src/utils/vector_search.py +++ b/src/utils/vector_search.py @@ -60,9 +60,11 @@ def _build_query_params( Args: solr: Optional structured Solr request (mode and filters from the API). + - mode: Solr search mode (semantic, hybrid, lexical) + - filters: Solr filter payload, may contain structured metadata filters Returns: - Parameter dictionary for ``vector_io.query``. + Parameter dictionary for ``vector_io.query`` with extracted filters at top level. """ resolved_mode = ( solr.mode @@ -78,8 +80,21 @@ def _build_query_params( logger.debug("query_request.solr: %s", solr) if solr is not None and solr.filters is not None: - params["solr"] = solr.filters - logger.debug("Final params with solr filters: %s", params) + # Extract structured metadata filters if present in solr.filters dict + # Filters need to be at top-level params for vector_io.query + if isinstance(solr.filters, dict) and "filters" in solr.filters: + params["filters"] = solr.filters["filters"] + logger.debug("Extracted filters from solr.filters: %s", params["filters"]) + + # Pass remaining solr.filters content (legacy fq, etc.) to params["solr"] + remaining_filters = {k: v for k, v in solr.filters.items() if k != "filters"} + if remaining_filters: + params["solr"] = remaining_filters + logger.debug("Remaining solr.filters: %s", remaining_filters) + else: + # Legacy format: entire solr.filters dict is passed as params["solr"] + params["solr"] = solr.filters + logger.debug("Legacy solr.filters format: %s", params["solr"]) else: logger.debug("No solr filters provided") diff --git a/tests/unit/utils/test_vector_search.py b/tests/unit/utils/test_vector_search.py index 2aafab0a7..1697cb9dd 100644 --- a/tests/unit/utils/test_vector_search.py +++ b/tests/unit/utils/test_vector_search.py @@ -67,14 +67,89 @@ def test_default_params(self) -> None: assert params["mode"] == constants.SOLR_VECTOR_SEARCH_DEFAULT_MODE assert "solr" not in params - def test_with_solr_filters(self) -> None: - """Test parameters when solr filters are provided.""" - solr = SolrVectorSearchRequest.model_validate({"filter": "value"}) + def test_with_legacy_solr_filters(self) -> None: + """Test parameters when legacy solr filters are provided.""" + solr = SolrVectorSearchRequest.model_validate( + { + "filters": { + "fq": ["platform:openshift"], + }, + }, + ) + params = _build_query_params(solr=solr) + + assert params["solr"] == {"fq": ["platform:openshift"]} + assert params["k"] == constants.SOLR_VECTOR_SEARCH_DEFAULT_K + assert "filters" not in params + + def test_with_structured_metadata_filters(self) -> None: + """Test parameters with structured metadata filter format.""" + solr = SolrVectorSearchRequest.model_validate( + { + "filters": { + "filters": { + "type": "eq", + "key": "platform", + "value": "openshift", + }, + }, + }, + ) + params = _build_query_params(solr=solr) + + # Filters should be extracted to top-level + assert "filters" in params + assert params["filters"]["type"] == "eq" + assert params["filters"]["key"] == "platform" + assert params["filters"]["value"] == "openshift" + assert params["k"] == constants.SOLR_VECTOR_SEARCH_DEFAULT_K + # No remaining solr params + assert "solr" not in params + + def test_with_filters_and_other_solr_params(self) -> None: + """Test parameters with both filters and other solr-specific params.""" + solr = SolrVectorSearchRequest.model_validate( + { + "filters": { + "filters": { + "type": "in", + "key": "version", + "value": ["4.14", "4.15"], + }, + "custom_param": "value", + }, + }, + ) params = _build_query_params(solr=solr) - assert params["solr"] == {"filter": "value"} + # Filters extracted to top-level + assert params["filters"]["type"] == "in" + assert params["filters"]["key"] == "version" + # Other params remain under solr key + assert params["solr"] == {"custom_param": "value"} assert params["k"] == constants.SOLR_VECTOR_SEARCH_DEFAULT_K + def test_with_compound_filter(self) -> None: + """Test parameters with compound AND filter.""" + solr = SolrVectorSearchRequest.model_validate( + { + "filters": { + "filters": { + "type": "and", + "filters": [ + {"type": "eq", "key": "platform", "value": "openshift"}, + {"type": "ne", "key": "status", "value": "archived"}, + ], + }, + }, + }, + ) + params = _build_query_params(solr=solr) + + assert params["filters"]["type"] == "and" + assert len(params["filters"]["filters"]) == 2 + assert "solr" not in params + def test_custom_mode(self) -> None: """Request mode overrides the default Solr vector_io mode.""" solr = SolrVectorSearchRequest(mode="lexical") @@ -86,7 +161,8 @@ def test_custom_mode(self) -> None: def test_mode_with_solr_filters(self) -> None: """Custom mode is combined with solr filter payload.""" solr = SolrVectorSearchRequest( - mode="semantic", filters={"fq": ["product:*openshift*"]} + mode="semantic", + filters={"fq": ["product:*openshift*"]}, ) params = _build_query_params(solr=solr) @@ -95,7 +171,9 @@ def test_mode_with_solr_filters(self) -> None: def test_mode_with_only_filters(self) -> None: """Mode is set to default value when only filters are provided.""" - solr = SolrVectorSearchRequest(filters={"fq": ["product:*openshift*"]}) + solr = SolrVectorSearchRequest( + filters={"fq": ["product:*openshift*"]}, + ) params = _build_query_params(solr=solr) assert params["mode"] == constants.SOLR_VECTOR_SEARCH_DEFAULT_MODE