lightspeed-core · max-svistunov · Apr 23, 2026 · Apr 23, 2026 · Apr 23, 2026 · coderabbitai
diff --git a/docs/design/llama-stack-config-merge/llama-stack-config-merge-spike.md b/docs/design/llama-stack-config-merge/llama-stack-config-merge-spike.md
diff --git a/docs/design/llama-stack-config-merge/llama-stack-config-merge.md b/docs/design/llama-stack-config-merge/llama-stack-config-merge.md
diff --git a/docs/design/llama-stack-config-merge/poc-evidence/library-mode/README.md b/docs/design/llama-stack-config-merge/poc-evidence/library-mode/README.md
@@ -0,0 +1,26 @@
+# Library-mode PoC evidence
+
+Command:
+```bash
+export OPENAI_API_KEY=<redacted>
+export E2E_OPENAI_MODEL=gpt-4o-mini
+uv run lightspeed-stack -c docs/design/llama-stack-config-merge/poc-evidence/lightspeed-stack-unified-library.yaml
+```
+
+## What the unified config does
+
+- `llama_stack.config.profile: /abs/path/to/tests/e2e/configs/run-ci.yaml` — baseline loaded from the CI profile
+- `llama_stack.config.native_override.safety.default_shield_id: llama-guard` — override proves merge works
+
+## Evidence
+
+- `synthesized-run.yaml` — the full run.yaml LCORE produced from the unified config
+- `query-response.json` — a successful `/v1/query` round-trip
+
+## Proves
+
+- `llama_stack.library_client_config_path` was NOT used (no external run.yaml needed)
+- `llama_stack.config.profile` was used as the synthesis baseline (path resolution works with absolute paths)
+- `llama_stack.config.native_override` was merged onto the baseline
+- `AsyncLlamaStackAsLibraryClient` accepts the synthesized file path (answered item #24: file-only, not dict)
+- `/v1/query` succeeded end-to-end through the synthesized stack
diff --git a/docs/design/llama-stack-config-merge/poc-evidence/library-mode/query-response.json b/docs/design/llama-stack-config-merge/poc-evidence/library-mode/query-response.json
@@ -0,0 +1 @@
+{"conversation_id":"976ef32527283085ba2f1d0cfb4c16d97071bf64391a8200","response":"The three primary colors are red, blue, and yellow.","rag_chunks":[],"referenced_documents":[],"truncated":false,"input_tokens":24,"output_tokens":12,"available_quotas":{},"tool_calls":[],"tool_results":[]}
diff --git a/docs/design/llama-stack-config-merge/poc-evidence/library-mode/synthesized-run.yaml b/docs/design/llama-stack-config-merge/poc-evidence/library-mode/synthesized-run.yaml
@@ -0,0 +1,148 @@
+apis:
+  - agents
+  - batches
+  - datasetio
+  - eval
+  - files
+  - inference
+  - safety
+  - scoring
+  - tool_runtime
+  - vector_io
+benchmarks: []
+datasets: []
+image_name: starter
+providers:
+  agents:
+    - config:
+        persistence:
+          agent_state:
+            backend: kv_default
+            namespace: agents_state
+          responses:
+            backend: sql_default
+            table_name: agents_responses
+      provider_id: meta-reference
+      provider_type: inline::meta-reference
+  batches:
+    - config:
+        kvstore:
+          backend: kv_default
+          namespace: batches_store
+      provider_id: reference
+      provider_type: inline::reference
+  datasetio:
+    - config:
+        kvstore:
+          backend: kv_default
+          namespace: huggingface_datasetio
+      provider_id: huggingface
+      provider_type: remote::huggingface
+    - config:
+        kvstore:
+          backend: kv_default
+          namespace: localfs_datasetio
+      provider_id: localfs
+      provider_type: inline::localfs
+  eval:
+    - config:
+        kvstore:
+          backend: kv_default
+          namespace: eval_store
+      provider_id: meta-reference
+      provider_type: inline::meta-reference
+  files:
+    - config:
+        metadata_store:
+          backend: sql_default
+          table_name: files_metadata
+        storage_dir: ~/.llama/storage/files
+      provider_id: meta-reference-files
+      provider_type: inline::localfs
+  inference:
+    - config:
+        allowed_models:
+          - ${env.E2E_OPENAI_MODEL:=gpt-4o-mini}
+        api_key: ${env.OPENAI_API_KEY}
+      provider_id: openai
+      provider_type: remote::openai
+    - config: {}
+      provider_id: sentence-transformers
+      provider_type: inline::sentence-transformers
+  safety:
+    - config:
+        excluded_categories: []
+      provider_id: llama-guard
+      provider_type: inline::llama-guard
+  scoring:
+    - config: {}
+      provider_id: basic
+      provider_type: inline::basic
+    - config: {}
+      provider_id: llm-as-judge
+      provider_type: inline::llm-as-judge
+    - config:
+        openai_api_key: '********'
+      provider_id: braintrust
+      provider_type: inline::braintrust
+  tool_runtime:
+    - config: {}
+      provider_id: rag-runtime
+      provider_type: inline::rag-runtime
+    - config: {}
+      provider_id: model-context-protocol
+      provider_type: remote::model-context-protocol
+  vector_io: []
+registered_resources:
+  benchmarks: []
+  datasets: []
+  models:
+    - metadata:
+        embedding_dimension: 768
+      model_id: all-mpnet-base-v2
+      model_type: embedding
+      provider_id: sentence-transformers
+      provider_model_id: all-mpnet-base-v2
+  scoring_fns: []
+  shields:
+    - provider_id: llama-guard
+      provider_shield_id: openai/gpt-4o-mini
+      shield_id: llama-guard
+  tool_groups:
+    - provider_id: rag-runtime
+      toolgroup_id: builtin::rag
+  vector_stores: []
+safety:
+  default_shield_id: llama-guard
+scoring_fns: []
+server:
+  port: 8321
+storage:
+  backends:
+    kv_default:
+      db_path: ${env.KV_STORE_PATH:=~/.llama/storage/kv_store.db}
+      type: kv_sqlite
+    sql_default:
+      db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db}
+      type: sql_sqlite
+  stores:
+    conversations:
+      backend: sql_default
+      table_name: openai_conversations
+    inference:
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+      table_name: inference_store
+    metadata:
+      backend: kv_default
+      namespace: registry
+    prompts:
+      backend: kv_default
+      namespace: prompts
+vector_stores:
+  default_embedding_model:
+    model_id: all-mpnet-base-v2
+    provider_id: sentence-transformers
+  default_provider_id: faiss
+version: 2
diff --git a/docs/design/llama-stack-config-merge/poc-evidence/lightspeed-stack-unified-library.yaml b/docs/design/llama-stack-config-merge/poc-evidence/lightspeed-stack-unified-library.yaml
@@ -0,0 +1,33 @@
+name: Lightspeed Core Service (LCS) - Unified PoC
+service:
+  host: 0.0.0.0
+  port: 8080
+  base_url: http://localhost:8080
+  auth_enabled: false
+  workers: 1
+  color_log: true
+  access_log: true
+# Unified mode: no `library_client_config_path`. Operational LS config is
+# synthesized by LCORE from `llama_stack.config` below.
+llama_stack:
+  use_as_library_client: true
+  config:
+    # Use the CI-friendly baseline via `profile` (no EXTERNAL_PROVIDERS_DIR
+    # env var required). Equivalent to what tests/e2e/configs/run-ci.yaml
+    # provides; this exercises the `profile:` path of the synthesizer.
+    profile: /home/msvistun/repos/lightspeed/stack/tests/e2e/configs/run-ci.yaml
-    profile: /home/msvistun/repos/lightspeed/stack/tests/e2e/configs/run-ci.yaml
+    profile: ../../../../tests/e2e/configs/run-ci.yaml
-    profile: /home/msvistun/repos/lightspeed/stack/tests/e2e/configs/run-ci.yaml
+    profile: ../../../../tests/e2e/configs/run-ci.yaml
+    # Small native_override: prove overrides take effect end-to-end.
+    native_override:
+      safety:
+        default_shield_id: llama-guard
+user_data_collection:
+  feedback_enabled: false
+  feedback_storage: "/tmp/lcore-836-poc/feedback"
+  transcripts_enabled: false
+  transcripts_storage: "/tmp/lcore-836-poc/transcripts"
+conversation_cache:
+  type: "sqlite"
+  sqlite:
+    db_path: "/tmp/lcore-836-poc/conversation-cache.db"
+authentication:
+  module: "noop"
diff --git a/scripts/llama-stack-entrypoint.sh b/scripts/llama-stack-entrypoint.sh
@@ -1,6 +1,12 @@
 #!/bin/bash
 # Entrypoint for llama-stack container.
-# Enriches config with lightspeed dynamic values, then starts llama-stack.
+# Produces the run.yaml from lightspeed-stack.yaml then starts llama-stack.
+#
+# Two modes, auto-detected by the Python CLI (llama_stack_configuration.py):
+# - Unified (LCORE-836): `llama_stack.config` present in lightspeed-stack.yaml.
+#   The full run.yaml is SYNTHESIZED from the unified block; -i is ignored.
+# - Legacy: `run.yaml` is mounted separately and ENRICHED with BYOK RAG / Solr /
+#   Azure Entra ID values from lightspeed-stack.yaml.
 
 set -e
 
@@ -9,9 +15,9 @@ ENRICHED_CONFIG="/opt/app-root/run.yaml"
 LIGHTSPEED_CONFIG="${LIGHTSPEED_CONFIG:-/opt/app-root/lightspeed-stack.yaml}"
 ENV_FILE="/opt/app-root/.env"
 
-# Enrich config if lightspeed config exists
+# Run the config producer if lightspeed config exists
 if [ -f "$LIGHTSPEED_CONFIG" ]; then
-    echo "Enriching llama-stack config..."
+    echo "Preparing llama-stack config from $LIGHTSPEED_CONFIG ..."
     ENRICHMENT_FAILED=0
     python3 /opt/app-root/llama_stack_configuration.py \
         -c "$LIGHTSPEED_CONFIG" \

diff --git a/src/client.py b/src/client.py
@@ -3,6 +3,7 @@
 import json
 import os
 import tempfile
+from pathlib import Path
 from typing import Optional
 
 import yaml
@@ -11,7 +12,12 @@
 from llama_stack_client import APIConnectionError, AsyncLlamaStackClient
 
 from configuration import configuration
-from llama_stack_configuration import YamlDumper, enrich_byok_rag, enrich_solr
+from llama_stack_configuration import (
+    YamlDumper,
+    enrich_byok_rag,
+    enrich_solr,
+    synthesize_configuration,
+)
 from log import get_logger
 from models.config import LlamaStackConfiguration
 from models.responses import ServiceUnavailableResponse
@@ -44,22 +50,65 @@ async def load(self, llama_stack_config: LlamaStackConfiguration) -> None:
     async def _load_library_client(self, config: LlamaStackConfiguration) -> None:
         """Initialize client in library mode.
 
+        Two paths:
+        - Unified mode (`config.config` set): synthesize full run.yaml from the
+          lightspeed-stack config and write to a deterministic path.
+        - Legacy mode (`config.library_client_config_path` set): read the
+          external run.yaml and apply in-place enrichment.
+
         Stores the final config path for use in reload.
         """
-        if config.library_client_config_path is None:
+        if config.config is not None:
+            logger.info("Using Llama stack as library client (unified mode)")
+            self._config_path = self._synthesize_library_config()
+        elif config.library_client_config_path is not None:
+            logger.info("Using Llama stack as library client (legacy mode)")
+            self._config_path = self._enrich_library_config(
+                config.library_client_config_path
+            )
+        else:
             raise ValueError(
-                "Configuration problem: library_client_config_path is not set"
+                "Configuration problem: neither `llama_stack.config` (unified) "
+                "nor `llama_stack.library_client_config_path` (legacy) is set"
             )
-        logger.info("Using Llama stack as library client")
-
-        self._config_path = self._enrich_library_config(
-            config.library_client_config_path
-        )
 
         client = AsyncLlamaStackAsLibraryClient(self._config_path)
         await client.initialize()
         self._lsc = client
 
+    def _synthesize_library_config(self) -> str:
+        """Synthesize the full Llama Stack run.yaml from unified-mode config.
+
+        Library-client-friendly: writes to a file since the Llama Stack library
+        client only accepts a file path (not a dict). Returns the path to the
+        synthesized file.
+
+        The synthesizer preserves env-var references (`${env.FOO}`) verbatim;
+        secrets are not resolved into the file on disk.
+
+        Returns:
+            str: Path to the synthesized run.yaml.
+        """
+        lcs_config_dict = configuration.configuration.model_dump(
+            exclude_none=True, mode="python"
+        )
+        config_file_dir: Optional[Path] = None
+        env_path = os.environ.get("LIGHTSPEED_STACK_CONFIG_PATH")
+        if env_path:
+            config_file_dir = Path(env_path).resolve().parent
+
+        ls_config = synthesize_configuration(
+            lcs_config_dict, config_file_dir=config_file_dir
+        )
+
+        synthesized_path = os.path.join(
+            tempfile.gettempdir(), "llama_stack_synthesized_config.yaml"
+        )
+        with open(synthesized_path, "w", encoding="utf-8") as f:
+            yaml.dump(ls_config, f, Dumper=YamlDumper, default_flow_style=False)
+        logger.info("Wrote synthesized Llama Stack config to %s", synthesized_path)
+        return synthesized_path
+
     def _load_service_client(self, config: LlamaStackConfiguration) -> None:
         """Initialize client in service mode (remote HTTP)."""
         logger.info("Using Llama stack running as a service")
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"conversation_id":"976ef32527283085ba2f1d0cfb4c16d97071bf64391a8200","response":"The three primary colors are red, blue, and yellow.","rag_chunks":[],"referenced_documents":[],"truncated":false,"input_tokens":24,"output_tokens":12,"available_quotas":{},"tool_calls":[],"tool_results":[]}