From 3fbfe5ace6a8815b9e39297ec76b9a44343409f5 Mon Sep 17 00:00:00 2001 From: Maxim Svistunov Date: Thu, 23 Apr 2026 14:59:16 +0200 Subject: [PATCH 1/3] =?UTF-8?q?LCORE-836=20spike:=20unified=20mode=20PoC?= =?UTF-8?q?=20=E2=80=94=20schema,=20synthesizer,=20migration=20tool?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a unified `llama_stack.config` sub-section to `lightspeed-stack.yaml` that lets operators express the Llama Stack operational configuration in one place, eliminating the need for a separately maintained `run.yaml`. Legacy mode (`llama_stack.library_client_config_path` + external run.yaml) is preserved and mutually exclusive with the new path. New Pydantic classes `UnifiedLlamaStackConfig`, `UnifiedInferenceSection`, and `UnifiedInferenceProvider` define the unified schema; a new `synthesize_configuration` pipeline applies profile (or baseline) → existing BYOK RAG / Solr OKP enrichment → high-level sections → `native_override` (deep-merge, list-replacement). A `baseline: default | empty` field enables strict lossless round-trip for the migration tool. Library-mode wiring in `src/client.py` detects the unified form and writes the synthesized file to disk for `AsyncLlamaStackAsLibraryClient` (which the PoC confirmed requires a file path, not a dict). Legacy enrichment path is unchanged. A `--migrate-config` flag on the `lightspeed-stack` CLI produces a unified single-file config from a legacy (run.yaml, lightspeed-stack.yaml) pair (dumb lift-and-shift: content goes under `native_override` with `baseline: empty`, and `library_client_config_path` is removed). The LS container's `llama_stack_configuration.py` CLI now auto-detects unified vs legacy based on the presence of `llama_stack.config`; the entrypoint script requires no functional change (comment clarified). `test.containerfile` copies `src/data/` into the container so the shipped default baseline resolves at runtime. Tests: 22 new unit tests covering merge semantics, high-level inference expansion, the full synthesize pipeline, profile loading, precedence (profile < high-level < native_override), and migrate-then-synthesize round-trip lossless equality. 3 new schema tests cover unified/legacy mutual exclusion. 5 existing dump-configuration expectations updated for the new `config: None` field; 1 client error-message regex updated. Full `uv run make verify` passes (black, pylint 10/10, ruff, docstyle, mypy). `uv run pytest tests/unit/` — 2098 passed, 1 skipped, 0 failed. --- scripts/llama-stack-entrypoint.sh | 12 +- src/client.py | 65 ++- src/data/default_run.yaml | 155 ++++++++ src/lightspeed_stack.py | 47 ++- src/llama_stack_configuration.py | 269 ++++++++++++- src/models/config.py | 148 +++++-- test.containerfile | 7 +- .../models/config/test_dump_configuration.py | 5 + .../config/test_llama_stack_configuration.py | 59 ++- tests/unit/test_client.py | 2 +- tests/unit/test_llama_stack_synthesize.py | 371 ++++++++++++++++++ 11 files changed, 1088 insertions(+), 52 deletions(-) create mode 100644 src/data/default_run.yaml create mode 100644 tests/unit/test_llama_stack_synthesize.py diff --git a/scripts/llama-stack-entrypoint.sh b/scripts/llama-stack-entrypoint.sh index a7eeb797b..6917017c5 100755 --- a/scripts/llama-stack-entrypoint.sh +++ b/scripts/llama-stack-entrypoint.sh @@ -1,6 +1,12 @@ #!/bin/bash # Entrypoint for llama-stack container. -# Enriches config with lightspeed dynamic values, then starts llama-stack. +# Produces the run.yaml from lightspeed-stack.yaml then starts llama-stack. +# +# Two modes, auto-detected by the Python CLI (llama_stack_configuration.py): +# - Unified (LCORE-836): `llama_stack.config` present in lightspeed-stack.yaml. +# The full run.yaml is SYNTHESIZED from the unified block; -i is ignored. +# - Legacy: `run.yaml` is mounted separately and ENRICHED with BYOK RAG / Solr / +# Azure Entra ID values from lightspeed-stack.yaml. set -e @@ -9,9 +15,9 @@ ENRICHED_CONFIG="/opt/app-root/run.yaml" LIGHTSPEED_CONFIG="${LIGHTSPEED_CONFIG:-/opt/app-root/lightspeed-stack.yaml}" ENV_FILE="/opt/app-root/.env" -# Enrich config if lightspeed config exists +# Run the config producer if lightspeed config exists if [ -f "$LIGHTSPEED_CONFIG" ]; then - echo "Enriching llama-stack config..." + echo "Preparing llama-stack config from $LIGHTSPEED_CONFIG ..." ENRICHMENT_FAILED=0 python3 /opt/app-root/llama_stack_configuration.py \ -c "$LIGHTSPEED_CONFIG" \ diff --git a/src/client.py b/src/client.py index 0c77c2d49..f42544a13 100644 --- a/src/client.py +++ b/src/client.py @@ -3,6 +3,7 @@ import json import os import tempfile +from pathlib import Path from typing import Optional import yaml @@ -11,7 +12,12 @@ from llama_stack_client import APIConnectionError, AsyncLlamaStackClient from configuration import configuration -from llama_stack_configuration import YamlDumper, enrich_byok_rag, enrich_solr +from llama_stack_configuration import ( + YamlDumper, + enrich_byok_rag, + enrich_solr, + synthesize_configuration, +) from log import get_logger from models.config import LlamaStackConfiguration from models.responses import ServiceUnavailableResponse @@ -44,22 +50,65 @@ async def load(self, llama_stack_config: LlamaStackConfiguration) -> None: async def _load_library_client(self, config: LlamaStackConfiguration) -> None: """Initialize client in library mode. + Two paths: + - Unified mode (`config.config` set): synthesize full run.yaml from the + lightspeed-stack config and write to a deterministic path. + - Legacy mode (`config.library_client_config_path` set): read the + external run.yaml and apply in-place enrichment. + Stores the final config path for use in reload. """ - if config.library_client_config_path is None: + if config.config is not None: + logger.info("Using Llama stack as library client (unified mode)") + self._config_path = self._synthesize_library_config() + elif config.library_client_config_path is not None: + logger.info("Using Llama stack as library client (legacy mode)") + self._config_path = self._enrich_library_config( + config.library_client_config_path + ) + else: raise ValueError( - "Configuration problem: library_client_config_path is not set" + "Configuration problem: neither `llama_stack.config` (unified) " + "nor `llama_stack.library_client_config_path` (legacy) is set" ) - logger.info("Using Llama stack as library client") - - self._config_path = self._enrich_library_config( - config.library_client_config_path - ) client = AsyncLlamaStackAsLibraryClient(self._config_path) await client.initialize() self._lsc = client + def _synthesize_library_config(self) -> str: + """Synthesize the full Llama Stack run.yaml from unified-mode config. + + Library-client-friendly: writes to a file since the Llama Stack library + client only accepts a file path (not a dict). Returns the path to the + synthesized file. + + The synthesizer preserves env-var references (`${env.FOO}`) verbatim; + secrets are not resolved into the file on disk. + + Returns: + str: Path to the synthesized run.yaml. + """ + lcs_config_dict = configuration.configuration.model_dump( + exclude_none=True, mode="python" + ) + config_file_dir: Optional[Path] = None + env_path = os.environ.get("LIGHTSPEED_STACK_CONFIG_PATH") + if env_path: + config_file_dir = Path(env_path).resolve().parent + + ls_config = synthesize_configuration( + lcs_config_dict, config_file_dir=config_file_dir + ) + + synthesized_path = os.path.join( + tempfile.gettempdir(), "llama_stack_synthesized_config.yaml" + ) + with open(synthesized_path, "w", encoding="utf-8") as f: + yaml.dump(ls_config, f, Dumper=YamlDumper, default_flow_style=False) + logger.info("Wrote synthesized Llama Stack config to %s", synthesized_path) + return synthesized_path + def _load_service_client(self, config: LlamaStackConfiguration) -> None: """Initialize client in service mode (remote HTTP).""" logger.info("Using Llama stack running as a service") diff --git a/src/data/default_run.yaml b/src/data/default_run.yaml new file mode 100644 index 000000000..7a4a78efa --- /dev/null +++ b/src/data/default_run.yaml @@ -0,0 +1,155 @@ +version: 2 + +apis: +- agents +- batches +- datasetio +- eval +- files +- inference +- safety +- scoring +- tool_runtime +- vector_io + +benchmarks: [] +datasets: [] +image_name: starter +external_providers_dir: ${env.EXTERNAL_PROVIDERS_DIR} + +providers: + inference: + - provider_id: openai # This ID is a reference to 'providers.inference' + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} + allowed_models: ["${env.E2E_OPENAI_MODEL:=gpt-4o-mini}"] + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + files: + - config: + metadata_store: + table_name: files_metadata + backend: sql_default + storage_dir: ~/.llama/storage/files + provider_id: meta-reference-files + provider_type: inline::localfs + safety: + - config: + excluded_categories: [] + provider_id: llama-guard + provider_type: inline::llama-guard + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: '********' + tool_runtime: + - config: {} # Enable the RAG tool + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss + agents: + - config: + persistence: + agent_state: + namespace: agents_state + backend: kv_default + responses: + table_name: agents_responses + backend: sql_default + provider_id: meta-reference + provider_type: inline::meta-reference + batches: + - config: + kvstore: + namespace: batches_store + backend: kv_default + provider_id: reference + provider_type: inline::reference + datasetio: + - config: + kvstore: + namespace: huggingface_datasetio + backend: kv_default + provider_id: huggingface + provider_type: remote::huggingface + - config: + kvstore: + namespace: localfs_datasetio + backend: kv_default + provider_id: localfs + provider_type: inline::localfs + eval: + - config: + kvstore: + namespace: eval_store + backend: kv_default + provider_id: meta-reference + provider_type: inline::meta-reference +scoring_fns: [] +server: + port: 8321 +storage: + backends: + kv_default: # Define the storage backend type for RAG, in this case registry and RAG are unified i.e. information on registered resources (e.g. models, vector_stores) are saved together with the RAG chunks + type: kv_sqlite + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db} + sql_default: + type: sql_sqlite + db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: [] + shields: + - shield_id: llama-guard + provider_id: llama-guard + provider_shield_id: openai/gpt-4o-mini + vector_stores: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::rag # Register the RAG tool + provider_id: rag-runtime +# REQUIRED: This section is necessary for file_search tool calls to work. +# Without it, llama-stack's rag-runtime silently fails all file_search operations +# with no error logged. +vector_stores: + # LCORE-1498: Disables Llama Stack RAG annotation generation + # causing unwanted citation/file markers in model output. + annotation_prompt_params: + enable_annotations: false + default_provider_id: faiss + default_embedding_model: # Define the default embedding model for RAG + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 +safety: + default_shield_id: llama-guard + diff --git a/src/lightspeed_stack.py b/src/lightspeed_stack.py index 858799c36..261092063 100644 --- a/src/lightspeed_stack.py +++ b/src/lightspeed_stack.py @@ -48,10 +48,11 @@ def create_argument_parser() -> ArgumentParser: - -d / --dump-configuration: dump the loaded configuration to JSON and exit - -s / --dump-schema: dump the configuration schema to OpenAPI JSON and exit - -c / --config: path to the configuration file (default "lightspeed-stack.yaml") - - -g / --generate-llama-stack-configuration: generate a Llama Stack - configuration from the service configuration - - -i / --input-config-file: Llama Stack input configuration filename (default "run.yaml") - - -o / --output-config-file: Llama Stack output configuration filename (default "run_.yaml") + - --migrate-config: migrate a legacy (run.yaml + lightspeed-stack.yaml) + setup into a unified single-file config and exit + - --run-yaml: input run.yaml for --migrate-config (default "run.yaml") + - --migrate-output: output path for --migrate-config + (default "lightspeed-stack-unified.yaml") Returns: Configured ArgumentParser for parsing the service CLI options. @@ -88,6 +89,27 @@ def create_argument_parser() -> ArgumentParser: help="path to configuration file (default: lightspeed-stack.yaml)", default="lightspeed-stack.yaml", ) + parser.add_argument( + "--migrate-config", + dest="migrate_config", + help="migrate legacy (run.yaml + lightspeed-stack.yaml) into a unified " + "single-file configuration and exit", + action="store_true", + default=False, + ) + parser.add_argument( + "--run-yaml", + dest="run_yaml", + help="path to legacy run.yaml for --migrate-config (default: run.yaml)", + default="run.yaml", + ) + parser.add_argument( + "--migrate-output", + dest="migrate_output", + help="output path for --migrate-config " + "(default: lightspeed-stack-unified.yaml)", + default="lightspeed-stack-unified.yaml", + ) return parser @@ -125,6 +147,23 @@ def main() -> None: if isinstance(existing_logger, logging.Logger): existing_logger.setLevel(logging.DEBUG) + # --migrate-config runs standalone; does not load config into the singleton, + # since the input may be in legacy form and we are producing its successor. + if args.migrate_config: + # pylint: disable=import-outside-toplevel + from llama_stack_configuration import migrate_config_dumb + + try: + migrate_config_dumb(args.run_yaml, args.config_file, args.migrate_output) + logger.info( + "Migration complete. Wrote unified config to %s", + args.migrate_output, + ) + except Exception as e: + logger.error("Migration failed: %s", e) + raise SystemExit(1) from e + return + configuration.load_configuration(args.config_file) logger.info("Configuration: %s", configuration.configuration) logger.info( diff --git a/src/llama_stack_configuration.py b/src/llama_stack_configuration.py index 3ac4a8ec4..3776eefbf 100644 --- a/src/llama_stack_configuration.py +++ b/src/llama_stack_configuration.py @@ -589,6 +589,246 @@ def enrich_solr( # pylint: disable=too-many-locals logger.info("Added OKP embedding model to registered_resources.models") +# ============================================================================= +# Synthesis for Unified Mode (LCORE-836) +# ============================================================================= + + +DEFAULT_BASELINE_RESOURCE = "default_run.yaml" + +PROVIDER_TYPE_MAP: dict[str, str] = { + "openai": "remote::openai", + "sentence_transformers": "inline::sentence-transformers", + "azure": "remote::azure", + "vertexai": "remote::vertexai", + "watsonx": "remote::watsonx", + "vllm_rhaiis": "remote::vllm", + "vllm_rhel_ai": "remote::vllm", +} + + +def load_default_baseline() -> dict[str, Any]: + """Load LCORE's built-in default Llama Stack baseline config. + + Returns: + dict[str, Any]: The default baseline run.yaml parsed as a dict. + """ + # importlib.resources-style load; `src/data/default_run.yaml` is shipped + # with the package. + baseline_path = Path(__file__).parent / "data" / DEFAULT_BASELINE_RESOURCE + logger.info("Loading built-in default baseline from %s", baseline_path) + with open(baseline_path, "r", encoding="utf-8") as f: + return yaml.safe_load(f) + + +def deep_merge_list_replace( + base: dict[str, Any], overlay: dict[str, Any] +) -> dict[str, Any]: + """Deep-merge `overlay` onto `base`. + + Maps are merged recursively. Lists and scalars in `overlay` replace the + corresponding entry in `base` (no append semantics). Result is a new dict; + neither argument is mutated. + + Parameters: + base: The base mapping. + overlay: The mapping whose values take precedence. + + Returns: + dict[str, Any]: A new mapping with overlay applied on top of base. + """ + import copy # pylint: disable=import-outside-toplevel + + result: dict[str, Any] = copy.deepcopy(base) + for key, value in overlay.items(): + if key in result and isinstance(result[key], dict) and isinstance(value, dict): + result[key] = deep_merge_list_replace(result[key], value) + else: + result[key] = copy.deepcopy(value) + return result + + +def apply_high_level_inference( + ls_config: dict[str, Any], inference: dict[str, Any] +) -> None: + """Apply a high-level `inference` block into `ls_config['providers']['inference']`. + + Replaces the inference provider list entirely. Use `native_override` for + additive tweaks. + + Parameters: + ls_config: Llama Stack config dict (modified in place). + inference: High-level inference section as a dict (with 'providers' list). + """ + providers_out: list[dict[str, Any]] = [] + for provider in inference.get("providers", []): + p_type = provider["type"] + entry: dict[str, Any] = { + "provider_id": p_type, + "provider_type": PROVIDER_TYPE_MAP[p_type], + } + cfg: dict[str, Any] = {} + if provider.get("api_key_env"): + cfg["api_key"] = f"${{env.{provider['api_key_env']}}}" + if provider.get("allowed_models"): + cfg["allowed_models"] = provider["allowed_models"] + if provider.get("extra"): + cfg.update(provider["extra"]) + if cfg: + entry["config"] = cfg + providers_out.append(entry) + + if "providers" not in ls_config: + ls_config["providers"] = {} + ls_config["providers"]["inference"] = providers_out + logger.info( + "Applied high-level inference section: %s provider entries", + len(providers_out), + ) + + +def synthesize_configuration( + lcs_config: dict[str, Any], + config_file_dir: Optional[Path] = None, + default_baseline: Optional[dict[str, Any]] = None, +) -> dict[str, Any]: + """Synthesize a full Llama Stack run.yaml from a unified-mode LCORE config. + + Pipeline: + 1. Baseline = profile file (if set) else `default_baseline` (if provided) + else LCORE's built-in default. + 2. Apply existing top-level enrichment (BYOK RAG, Solr/OKP). + Azure Entra ID is intentionally not run here (side-effect on .env). + 3. Apply high-level sections (inference, and later storage/safety/...). + 4. Deep-merge (list-replace) `native_override`. + + Precedence: profile < high-level sections < native_override. + + Parameters: + lcs_config: Full lightspeed-stack.yaml content as a dict (env-expanded). + config_file_dir: Directory containing the lightspeed-stack.yaml, used + to resolve relative `profile:` paths. If None, relative paths are + resolved against the current working directory. + default_baseline: Override for the baseline when `profile:` is unset + (primarily for tests). If None, LCORE's built-in baseline is used. + + Returns: + dict[str, Any]: The synthesized Llama Stack run.yaml as a dict. + + Raises: + ValueError: If llama_stack.config is not present in `lcs_config`. + """ + unified = (lcs_config.get("llama_stack") or {}).get("config") + if unified is None: + raise ValueError( + "synthesize_configuration called without llama_stack.config set" + ) + + # 1. Baseline + profile = unified.get("profile") + baseline_kind = unified.get("baseline", "default") + if profile: + profile_path = Path(profile) + if not profile_path.is_absolute() and config_file_dir is not None: + profile_path = config_file_dir / profile_path + logger.info("Loading unified-mode profile baseline from %s", profile_path) + with open(profile_path, "r", encoding="utf-8") as f: + ls_config: dict[str, Any] = yaml.safe_load(f) + elif baseline_kind == "empty": + logger.info("Unified mode: starting from empty baseline") + ls_config = {} + elif default_baseline is not None: + import copy # pylint: disable=import-outside-toplevel + + ls_config = copy.deepcopy(default_baseline) + else: + ls_config = load_default_baseline() + + dedupe_providers_vector_io(ls_config) + + # 2. Existing enrichment (BYOK RAG, Solr/OKP) — Azure stays out (file side-effect). + enrich_byok_rag(ls_config, lcs_config.get("byok_rag", [])) + enrich_solr(ls_config, lcs_config.get("rag", {}), lcs_config.get("okp", {})) + + # 3. High-level sections + inference = unified.get("inference") + if inference is not None: + apply_high_level_inference(ls_config, inference) + + # 4. native_override — deep-merge (list-replace) + native_override = unified.get("native_override") or {} + if native_override: + ls_config = deep_merge_list_replace(ls_config, native_override) + + dedupe_providers_vector_io(ls_config) + return ls_config + + +def migrate_config_dumb( + run_yaml_path: str, + lightspeed_yaml_path: str, + output_path: str, +) -> None: + """Lossless lift-and-shift migration: fold run.yaml into lightspeed-stack.yaml. + + Reads the legacy two-file configuration (run.yaml + lightspeed-stack.yaml) + and writes a unified single-file configuration where the entire run.yaml + content is placed under `llama_stack.config.native_override`. Removes any + `llama_stack.library_client_config_path` that referenced the old run.yaml. + + This is the "dumb" migration mode — preserves 100% of the existing + Llama Stack schema content. A future `--smart` mode (out of scope for this + PoC) would factor portions into high-level sections. + + Parameters: + run_yaml_path: Path to the existing Llama Stack run.yaml. + lightspeed_yaml_path: Path to the existing lightspeed-stack.yaml. + output_path: Path to write the unified lightspeed-stack.yaml. + """ + logger.info("Reading %s and %s for migration", lightspeed_yaml_path, run_yaml_path) + + with open(run_yaml_path, "r", encoding="utf-8") as f: + run_yaml_content: dict[str, Any] = yaml.safe_load(f) + + with open(lightspeed_yaml_path, "r", encoding="utf-8") as f: + lcs_yaml: dict[str, Any] = yaml.safe_load(f) + + llama_stack_section = lcs_yaml.setdefault("llama_stack", {}) + llama_stack_section.pop("library_client_config_path", None) + # `baseline: empty` is required for true lossless round-trip: default baseline + # would add extra keys not present in the source run.yaml. + llama_stack_section["config"] = { + "baseline": "empty", + "native_override": run_yaml_content, + } + + logger.info("Writing unified configuration to %s", output_path) + with open(output_path, "w", encoding="utf-8") as f: + yaml.dump(lcs_yaml, f, Dumper=YamlDumper, default_flow_style=False) + + +def synthesize_to_file( + lcs_config: dict[str, Any], + output_file: str, + config_file_dir: Optional[Path] = None, +) -> None: + """Synthesize unified-mode Llama Stack config and write it to disk. + + Secrets are never resolved — env-var references like `${env.FOO}` are + preserved verbatim in the output. + + Parameters: + lcs_config: lightspeed-stack.yaml as a dict. + output_file: Path to write the synthesized run.yaml. + config_file_dir: Directory for resolving relative profile paths. + """ + ls_config = synthesize_configuration(lcs_config, config_file_dir=config_file_dir) + logger.info("Writing synthesized Llama Stack configuration to %s", output_file) + Path(output_file).parent.mkdir(parents=True, exist_ok=True) + with open(output_file, "w", encoding="utf-8") as f: + yaml.dump(ls_config, f, Dumper=YamlDumper, default_flow_style=False) + + # ============================================================================= # Main Generation Function (service/container mode only) # ============================================================================= @@ -638,9 +878,15 @@ def generate_configuration( def main() -> None: - """CLI entry point.""" + """CLI entry point. + + Auto-detects the mode: + - Unified mode: `llama_stack.config` present in the lightspeed config file. + Synthesizes the full run.yaml (no `-i/--input` needed); writes to `-o`. + - Legacy mode: requires `-i/--input` run.yaml; enriches it and writes to `-o`. + """ parser = ArgumentParser( - description="Enrich Llama Stack config with Lightspeed values", + description="Produce Llama Stack run.yaml from Lightspeed config.", ) parser.add_argument( "-c", @@ -652,13 +898,14 @@ def main() -> None: "-i", "--input", default="run.yaml", - help="Input Llama Stack config (default: run.yaml)", + help="Input Llama Stack config for legacy-mode enrichment " + "(default: run.yaml; ignored in unified mode)", ) parser.add_argument( "-o", "--output", default="run_.yaml", - help="Output enriched config (default: run_.yaml)", + help="Output run.yaml path (default: run_.yaml)", ) parser.add_argument( "-e", @@ -672,7 +919,19 @@ def main() -> None: config = yaml.safe_load(f) config = replace_env_vars(config) - generate_configuration(args.input, args.output, config, args.env_file) + unified_present = (config.get("llama_stack") or {}).get("config") is not None + if unified_present: + logger.info("Unified mode detected (llama_stack.config present)") + # Azure Entra ID side-effect (writes .env) stays part of boot — still run it. + setup_azure_entra_id_token(config.get("azure_entra_id"), args.env_file) + synthesize_to_file( + config, + args.output, + config_file_dir=Path(args.config).resolve().parent, + ) + else: + logger.info("Legacy mode detected (no llama_stack.config)") + generate_configuration(args.input, args.output, config, args.env_file) if __name__ == "__main__": diff --git a/src/models/config.py b/src/models/config.py index 95bfc4782..9252ccdba 100644 --- a/src/models/config.py +++ b/src/models/config.py @@ -582,6 +582,97 @@ def resolve_auth_headers(self) -> Self: return self +class UnifiedInferenceProvider(ConfigurationBase): + """High-level inference provider entry (unified-mode schema). + + Expanded by the synthesizer into a Llama Stack `providers.inference` entry. + Unknown provider types must be expressed via `native_override` instead. + """ + + type: Literal[ + "openai", + "sentence_transformers", + "azure", + "vertexai", + "watsonx", + "vllm_rhaiis", + "vllm_rhel_ai", + ] = Field( + ..., + description="High-level provider type. Mapped to Llama Stack provider_type.", + ) + + api_key_env: Optional[str] = Field( + None, + description="Environment variable name from which the api_key will be read " + "at Llama Stack start time (as `${env.}`). Kept as a reference; " + "secrets are never resolved into the synthesized file on disk.", + ) + + allowed_models: Optional[list[str]] = Field( + None, + description="Optional list of model ids allowed for this provider.", + ) + + extra: dict[str, Any] = Field( + default_factory=dict, + description="Extra per-provider-type config fields merged into the emitted " + "`config` map (escape hatch for per-type oddities).", + ) + + +class UnifiedInferenceSection(ConfigurationBase): + """High-level inference section (unified-mode schema).""" + + providers: list[UnifiedInferenceProvider] = Field( + default_factory=list, + description="High-level list of inference providers; replaces " + "`providers.inference` in the synthesized run.yaml.", + ) + + +class UnifiedLlamaStackConfig(ConfigurationBase): + """Operational Llama Stack config synthesized by LCORE at runtime. + + When present (unified mode), LCORE produces a full Llama Stack run.yaml + from this block. Precedence (lowest to highest): + + baseline (default / empty / profile) < high-level sections < native_override + + This section is mutually exclusive with + `llama_stack.library_client_config_path` (legacy mode). + """ + + baseline: Literal["default", "empty"] = Field( + "default", + description="Starting point before profile / high-level / native_override " + "are applied. 'default' uses LCORE's built-in baseline run.yaml; 'empty' " + "starts from an empty dict (useful when `native_override` specifies the " + "entire Llama Stack schema, as produced by dumb-mode migration). Ignored " + "when `profile` is set.", + ) + + profile: Optional[str] = Field( + None, + description="Path to a profile YAML file (absolute or relative to the " + "lightspeed-stack.yaml location). Loaded as the baseline if set; " + "overrides the `baseline` field.", + ) + + inference: Optional[UnifiedInferenceSection] = Field( + None, + description="High-level inference section. Additional high-level sections " + "(storage, safety, tools, ...) may be added in future versions.", + ) + + native_override: dict[str, Any] = Field( + default_factory=dict, + description="Raw Llama Stack schema fragment, deep-merged onto the result " + "of profile + high-level expansion. Lists are replaced (not appended). " + "Escape hatch for anything not expressible via high-level keys.", + ) + + class LlamaStackConfiguration(ConfigurationBase): """Llama stack configuration. @@ -620,7 +711,16 @@ class LlamaStackConfiguration(ConfigurationBase): library_client_config_path: Optional[str] = Field( None, title="Llama Stack configuration path", - description="Path to configuration file used when Llama Stack is run in library mode", + description="Path to configuration file used when Llama Stack is run in library " + "mode (legacy mode). Mutually exclusive with `config`.", + ) + + config: Optional[UnifiedLlamaStackConfig] = Field( + None, + title="Unified Llama Stack configuration", + description="Operational Llama Stack config synthesized by LCORE at runtime " + "(unified mode). When present, LCORE produces run.yaml from this block. " + "Mutually exclusive with `library_client_config_path`.", ) timeout: PositiveInt = Field( @@ -635,21 +735,26 @@ def check_llama_stack_model(self) -> Self: """ Validate the Llama Stack configuration and enforce mode-specific requirements. - If no URL is provided, requires explicit library-client mode selection. - When library-client mode is enabled, requires a non-empty - `library_client_config_path` that points to a regular, readable YAML - file (checked via checks.file_check). Also normalizes a None - `use_as_library_client` to False. + Unified mode (`config` set) and legacy mode (`library_client_config_path` + set) are mutually exclusive. If no URL is provided, requires explicit + library-client mode selection. When library-client mode is enabled, + requires either `config` (unified) or `library_client_config_path` + (legacy) to be set. Legacy paths are validated via checks.file_check. Returns: Self: The validated LlamaStackConfiguration instance. Raises: - ValueError: If the configuration is invalid, e.g. no - URL and library-client mode is unspecified or - disabled, or library-client mode is enabled but - `library_client_config_path` is not provided. + ValueError: If the configuration is invalid. """ + if self.config is not None and self.library_client_config_path is not None: + raise ValueError( + "llama_stack.config (unified mode) and " + "llama_stack.library_client_config_path (legacy mode) are mutually " + "exclusive. Migrate legacy configurations with: " + "lightspeed-stack --migrate-config" + ) + if self.url is None: # when URL is not set, it is supposed that Llama Stack should be run in library mode # it means that use_as_library_client attribute must be set to True @@ -667,20 +772,19 @@ def check_llama_stack_model(self) -> Self: self.use_as_library_client = False if self.use_as_library_client: - # when use_as_library_client is set to true, Llama Stack will be run in library mode - # it means that: - # - Llama Stack URL should not be set, and - # - library_client_config_path attribute must be set and must point to - # a regular readable YAML file - if self.library_client_config_path is None: - # pylint: disable=line-too-long + # library mode requires either unified config or legacy config path + if self.library_client_config_path is None and self.config is None: raise ValueError( - "Llama stack library client mode is enabled but a configuration file path is not specified" + "Llama stack library client mode is enabled but neither " + "`config` (unified) nor `library_client_config_path` (legacy) " + "is specified" + ) + if self.library_client_config_path is not None: + # legacy: the configuration file must exist and be a regular readable file + checks.file_check( + Path(self.library_client_config_path), + "Llama Stack configuration file", ) - # the configuration file must exists and be regular readable file - checks.file_check( - Path(self.library_client_config_path), "Llama Stack configuration file" - ) return self diff --git a/test.containerfile b/test.containerfile index 884fd8525..dff715b9f 100644 --- a/test.containerfile +++ b/test.containerfile @@ -36,11 +36,14 @@ RUN mkdir -p /opt/app-root/src/.llama/storage \ chown -R 1001:0 /opt/app-root && \ chmod -R 775 /opt/app-root -# Copy enrichment scripts for runtime config enrichment +# Copy enrichment / unified-mode synthesis scripts for runtime config production COPY src/llama_stack_configuration.py /opt/app-root/llama_stack_configuration.py +COPY src/data /opt/app-root/data COPY scripts/llama-stack-entrypoint.sh /opt/app-root/enrich-entrypoint.sh RUN chmod +x /opt/app-root/enrich-entrypoint.sh && \ - chown 1001:0 /opt/app-root/enrich-entrypoint.sh /opt/app-root/llama_stack_configuration.py + chown -R 1001:0 /opt/app-root/enrich-entrypoint.sh \ + /opt/app-root/llama_stack_configuration.py \ + /opt/app-root/data # Switch back to the original user USER 1001 diff --git a/tests/unit/models/config/test_dump_configuration.py b/tests/unit/models/config/test_dump_configuration.py index 06a3ef08c..c5954a478 100644 --- a/tests/unit/models/config/test_dump_configuration.py +++ b/tests/unit/models/config/test_dump_configuration.py @@ -144,6 +144,7 @@ def test_dump_configuration(tmp_path: Path) -> None: "use_as_library_client": True, "api_key": "**********", "library_client_config_path": "tests/configuration/run.yaml", + "config": None, "timeout": 180, }, "user_data_collection": { @@ -486,6 +487,7 @@ def test_dump_configuration_with_quota_limiters(tmp_path: Path) -> None: "use_as_library_client": True, "api_key": "**********", "library_client_config_path": "tests/configuration/run.yaml", + "config": None, "timeout": 180, }, "user_data_collection": { @@ -720,6 +722,7 @@ def test_dump_configuration_with_quota_limiters_different_values( "use_as_library_client": True, "api_key": "**********", "library_client_config_path": "tests/configuration/run.yaml", + "config": None, "timeout": 180, }, "user_data_collection": { @@ -934,6 +937,7 @@ def test_dump_configuration_byok(tmp_path: Path) -> None: "use_as_library_client": True, "api_key": "**********", "library_client_config_path": "tests/configuration/run.yaml", + "config": None, "timeout": 180, }, "user_data_collection": { @@ -1138,6 +1142,7 @@ def test_dump_configuration_pg_namespace(tmp_path: Path) -> None: "use_as_library_client": True, "api_key": "**********", "library_client_config_path": "tests/configuration/run.yaml", + "config": None, "timeout": 180, }, "user_data_collection": { diff --git a/tests/unit/models/config/test_llama_stack_configuration.py b/tests/unit/models/config/test_llama_stack_configuration.py index deec7e765..57ba66861 100644 --- a/tests/unit/models/config/test_llama_stack_configuration.py +++ b/tests/unit/models/config/test_llama_stack_configuration.py @@ -86,20 +86,65 @@ def test_llama_stack_wrong_configuration_no_config_file() -> None: """Test the LlamaStackConfiguration constructor. Verify that enabling library-client mode without providing a configuration - file path raises a ValueError. - - Asserts that constructing LlamaStackConfiguration with - use_as_library_client=True and no library_client_config_path raises a - ValueError whose message is "Llama stack library client mode is enabled but - a configuration file path is not specified". + file path or unified config raises a ValueError. """ - m = "Llama stack library client mode is enabled but a configuration file path is not specified" + m = ( + "Llama stack library client mode is enabled but neither `config` " + "\\(unified\\) nor `library_client_config_path` \\(legacy\\) is specified" + ) with pytest.raises(ValueError, match=m): LlamaStackConfiguration( use_as_library_client=True ) # pyright: ignore[reportCallIssue] +# ============================================================================= +# Unified mode (LCORE-836) +# ============================================================================= + + +def test_llama_stack_unified_mode_library_client() -> None: + """Unified mode in library mode: `config` set, no library_client_config_path.""" + # pylint: disable=import-outside-toplevel + from models.config import UnifiedLlamaStackConfig + + cfg = LlamaStackConfiguration( + use_as_library_client=True, + config=UnifiedLlamaStackConfig(), + ) # pyright: ignore[reportCallIssue] + assert cfg.config is not None + assert cfg.library_client_config_path is None + + +def test_llama_stack_unified_and_legacy_are_mutually_exclusive() -> None: + """Setting both `config` and `library_client_config_path` is rejected.""" + # pylint: disable=import-outside-toplevel + from models.config import UnifiedLlamaStackConfig + + with pytest.raises( + ValueError, + match="mutually exclusive", + ): + LlamaStackConfiguration( + use_as_library_client=True, + library_client_config_path="tests/configuration/run.yaml", + config=UnifiedLlamaStackConfig(), + ) # pyright: ignore[reportCallIssue] + + +def test_llama_stack_unified_mode_with_remote_url() -> None: + """Unified config is also allowed when connecting to a remote Llama Stack.""" + # pylint: disable=import-outside-toplevel + from models.config import UnifiedLlamaStackConfig + + cfg = LlamaStackConfiguration( + url="http://remote-ls:8321", + config=UnifiedLlamaStackConfig(), + ) # pyright: ignore[reportCallIssue] + assert cfg.config is not None + assert str(cfg.url) == "http://remote-ls:8321/" + + def test_llama_stack_configuration_valid_http_url() -> None: """Test that valid HTTP URLs are accepted.""" config = LlamaStackConfiguration( diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index bcd5ca0d6..999abf7f6 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -84,7 +84,7 @@ async def test_get_async_llama_stack_wrong_configuration() -> None: cfg.library_client_config_path = None with pytest.raises( ValueError, - match="Configuration problem: library_client_config_path is not set", + match="neither .*unified.* nor .*legacy.* is set", ): client = AsyncLlamaStackClientHolder() await client.load(cfg) diff --git a/tests/unit/test_llama_stack_synthesize.py b/tests/unit/test_llama_stack_synthesize.py new file mode 100644 index 000000000..9e86bcaa0 --- /dev/null +++ b/tests/unit/test_llama_stack_synthesize.py @@ -0,0 +1,371 @@ +"""Unit tests for unified-mode synthesizer and migration tool (LCORE-836).""" + +from pathlib import Path +from typing import Any + +import pytest +import yaml + +from llama_stack_configuration import ( + PROVIDER_TYPE_MAP, + apply_high_level_inference, + deep_merge_list_replace, + load_default_baseline, + migrate_config_dumb, + synthesize_configuration, +) + +# ============================================================================= +# deep_merge_list_replace +# ============================================================================= + + +def test_deep_merge_scalar_replace() -> None: + """Overlay scalar replaces base scalar.""" + result = deep_merge_list_replace({"a": 1}, {"a": 2}) + assert result == {"a": 2} + + +def test_deep_merge_adds_new_keys() -> None: + """Overlay keys not in base are added.""" + result = deep_merge_list_replace({"a": 1}, {"b": 2}) + assert result == {"a": 1, "b": 2} + + +def test_deep_merge_nested_map_merges() -> None: + """Nested maps merge recursively.""" + base = {"a": {"x": 1, "y": 2}} + overlay = {"a": {"y": 20, "z": 30}} + result = deep_merge_list_replace(base, overlay) + assert result == {"a": {"x": 1, "y": 20, "z": 30}} + + +def test_deep_merge_list_replaces() -> None: + """Lists are replaced, not appended.""" + base = {"items": [1, 2, 3]} + overlay = {"items": [9]} + result = deep_merge_list_replace(base, overlay) + assert result == {"items": [9]} + + +def test_deep_merge_does_not_mutate_inputs() -> None: + """Neither base nor overlay are mutated.""" + base = {"a": {"x": 1}} + overlay = {"a": {"x": 2}} + result = deep_merge_list_replace(base, overlay) + assert base == {"a": {"x": 1}} + assert overlay == {"a": {"x": 2}} + assert result == {"a": {"x": 2}} + + +def test_deep_merge_type_mismatch_replaces() -> None: + """If overlay type != base type at same key, overlay wins.""" + # base is map, overlay is scalar + result = deep_merge_list_replace({"a": {"x": 1}}, {"a": "replaced"}) + assert result == {"a": "replaced"} + + +# ============================================================================= +# apply_high_level_inference +# ============================================================================= + + +def test_apply_high_level_inference_single_provider() -> None: + """Single provider with api_key_env and allowed_models.""" + ls_config: dict[str, Any] = {} + inference = { + "providers": [ + { + "type": "openai", + "api_key_env": "OPENAI_API_KEY", + "allowed_models": ["gpt-4o-mini"], + } + ] + } + apply_high_level_inference(ls_config, inference) + assert ls_config["providers"]["inference"] == [ + { + "provider_id": "openai", + "provider_type": "remote::openai", + "config": { + "api_key": "${env.OPENAI_API_KEY}", + "allowed_models": ["gpt-4o-mini"], + }, + } + ] + + +def test_apply_high_level_inference_replaces_existing() -> None: + """Providers list is replaced entirely, not merged.""" + ls_config = {"providers": {"inference": [{"provider_id": "stale"}]}} + apply_high_level_inference( + ls_config, {"providers": [{"type": "sentence_transformers"}]} + ) + assert ls_config["providers"]["inference"] == [ + { + "provider_id": "sentence_transformers", + "provider_type": "inline::sentence-transformers", + } + ] + + +def test_apply_high_level_inference_extra_merged() -> None: + """`extra` dict fields merge into emitted config.""" + ls_config: dict[str, Any] = {} + inference = { + "providers": [ + { + "type": "vertexai", + "extra": {"project_id": "my-project", "location": "us-central1"}, + } + ] + } + apply_high_level_inference(ls_config, inference) + assert ls_config["providers"]["inference"][0]["config"] == { + "project_id": "my-project", + "location": "us-central1", + } + + +def test_provider_type_map_covers_all_literals() -> None: + """Every Literal value declared on UnifiedInferenceProvider.type has a mapping.""" + # pylint: disable=import-outside-toplevel + from models.config import UnifiedInferenceProvider + + literal_values = ( + UnifiedInferenceProvider.model_fields[ # pylint: disable=unsubscriptable-object + "type" + ].annotation.__args__ + ) + for value in literal_values: + assert value in PROVIDER_TYPE_MAP + + +# ============================================================================= +# synthesize_configuration +# ============================================================================= + + +MINIMAL_BASELINE: dict[str, Any] = { + "version": 2, + "apis": ["inference"], + "providers": { + "inference": [ + {"provider_id": "stock", "provider_type": "remote::stock", "config": {}} + ] + }, + "safety": {"default_shield_id": "llama-guard"}, +} + + +def test_synthesize_errors_without_config() -> None: + """Without llama_stack.config present, synthesize raises ValueError.""" + with pytest.raises(ValueError, match="llama_stack.config"): + synthesize_configuration({"llama_stack": {}}) + + +def test_synthesize_uses_default_baseline_when_no_profile() -> None: + """With neither profile nor native_override, result is the baseline (through enrichment).""" + lcs_config: dict[str, Any] = {"llama_stack": {"config": {}}} + result = synthesize_configuration(lcs_config, default_baseline=MINIMAL_BASELINE) + # Baseline preserved (enrichment is a no-op without byok_rag/rag/okp) + assert result["safety"] == {"default_shield_id": "llama-guard"} + assert result["providers"]["inference"] == [ + {"provider_id": "stock", "provider_type": "remote::stock", "config": {}} + ] + + +def test_synthesize_loads_profile_from_path(tmp_path: Path) -> None: + """Profile path is loaded as the baseline.""" + profile_data = { + "version": 2, + "apis": ["inference"], + "providers": {"inference": [{"provider_id": "profile_p"}]}, + } + profile_path = tmp_path / "profile.yaml" + profile_path.write_text(yaml.dump(profile_data)) + + lcs_config: dict[str, Any] = { + "llama_stack": {"config": {"profile": str(profile_path)}} + } + result = synthesize_configuration(lcs_config) + assert result["providers"]["inference"] == [{"provider_id": "profile_p"}] + + +def test_synthesize_profile_relative_path(tmp_path: Path) -> None: + """Relative profile path resolves against config_file_dir.""" + profile_data = {"version": 2} + (tmp_path / "p.yaml").write_text(yaml.dump(profile_data)) + lcs_config: dict[str, Any] = {"llama_stack": {"config": {"profile": "p.yaml"}}} + result = synthesize_configuration(lcs_config, config_file_dir=tmp_path) + assert result == {"version": 2} + + +def test_synthesize_applies_high_level_inference() -> None: + """High-level inference section expands into native providers list.""" + lcs_config: dict[str, Any] = { + "llama_stack": { + "config": { + "inference": { + "providers": [{"type": "openai", "api_key_env": "OPENAI_API_KEY"}] + } + } + } + } + result = synthesize_configuration(lcs_config, default_baseline=MINIMAL_BASELINE) + assert result["providers"]["inference"] == [ + { + "provider_id": "openai", + "provider_type": "remote::openai", + "config": {"api_key": "${env.OPENAI_API_KEY}"}, + } + ] + + +def test_synthesize_native_override_deep_merges() -> None: + """native_override deep-merges on top (scalar path).""" + lcs_config: dict[str, Any] = { + "llama_stack": { + "config": { + "native_override": { + "safety": {"default_shield_id": "overridden"}, + } + } + } + } + result = synthesize_configuration(lcs_config, default_baseline=MINIMAL_BASELINE) + assert result["safety"]["default_shield_id"] == "overridden" + + +def test_synthesize_native_override_list_replaces() -> None: + """native_override replaces lists, not appends.""" + lcs_config: dict[str, Any] = { + "llama_stack": { + "config": { + "native_override": { + "providers": { + "inference": [{"provider_id": "override-only"}], + } + } + } + } + } + result = synthesize_configuration(lcs_config, default_baseline=MINIMAL_BASELINE) + assert result["providers"]["inference"] == [{"provider_id": "override-only"}] + + +def test_synthesize_precedence_override_beats_high_level() -> None: + """When high-level and native_override both touch the same path, override wins.""" + lcs_config: dict[str, Any] = { + "llama_stack": { + "config": { + "inference": {"providers": [{"type": "openai"}]}, + "native_override": { + "providers": { + "inference": [{"provider_id": "override-wins"}], + } + }, + } + } + } + result = synthesize_configuration(lcs_config, default_baseline=MINIMAL_BASELINE) + assert result["providers"]["inference"] == [{"provider_id": "override-wins"}] + + +def test_synthesize_preserves_env_var_refs_verbatim() -> None: + """Secrets stay as ${env.FOO} references; never resolved into the output.""" + lcs_config: dict[str, Any] = { + "llama_stack": { + "config": { + "inference": { + "providers": [{"type": "openai", "api_key_env": "OPENAI_API_KEY"}] + } + } + } + } + result = synthesize_configuration(lcs_config, default_baseline=MINIMAL_BASELINE) + api_key_value = result["providers"]["inference"][0]["config"]["api_key"] + assert api_key_value == "${env.OPENAI_API_KEY}" + + +# ============================================================================= +# Built-in default baseline loader +# ============================================================================= + + +def test_load_default_baseline_returns_dict() -> None: + """The shipped default baseline loads as a dict with expected keys.""" + baseline = load_default_baseline() + assert isinstance(baseline, dict) + assert baseline.get("version") == 2 + assert "providers" in baseline + + +# ============================================================================= +# migrate_config_dumb +# ============================================================================= + + +def test_migrate_dumb_lossless_roundtrip(tmp_path: Path) -> None: + """Dumb migration places full run.yaml under config.native_override.""" + run_yaml_content = { + "version": 2, + "apis": ["inference"], + "providers": {"inference": [{"provider_id": "opa"}]}, + } + lcs_yaml_content = { + "name": "LCS", + "llama_stack": { + "use_as_library_client": True, + "library_client_config_path": str(tmp_path / "run.yaml"), + }, + } + + run_yaml_path = tmp_path / "run.yaml" + run_yaml_path.write_text(yaml.dump(run_yaml_content)) + lcs_yaml_path = tmp_path / "lightspeed-stack.yaml" + lcs_yaml_path.write_text(yaml.dump(lcs_yaml_content)) + output_path = tmp_path / "unified.yaml" + + migrate_config_dumb(str(run_yaml_path), str(lcs_yaml_path), str(output_path)) + + result = yaml.safe_load(output_path.read_text()) + + # Legacy path is gone + assert "library_client_config_path" not in result["llama_stack"] + # Unified config has full run.yaml under native_override + assert result["llama_stack"]["config"]["native_override"] == run_yaml_content + # Other fields preserved + assert result["llama_stack"]["use_as_library_client"] is True + assert result["name"] == "LCS" + + +def test_migrate_then_synthesize_reproduces_run_yaml(tmp_path: Path) -> None: + """End-to-end round trip: run.yaml → migrate → synthesize → original content.""" + run_yaml_content = { + "version": 2, + "apis": ["inference", "vector_io"], + "providers": { + "inference": [{"provider_id": "rt", "provider_type": "remote::rt"}] + }, + "safety": {"default_shield_id": "guard"}, + } + lcs_yaml_content = { + "name": "LCS", + "llama_stack": { + "use_as_library_client": True, + "library_client_config_path": str(tmp_path / "run.yaml"), + }, + } + run_yaml_path = tmp_path / "run.yaml" + run_yaml_path.write_text(yaml.dump(run_yaml_content)) + lcs_yaml_path = tmp_path / "lightspeed-stack.yaml" + lcs_yaml_path.write_text(yaml.dump(lcs_yaml_content)) + output_path = tmp_path / "unified.yaml" + migrate_config_dumb(str(run_yaml_path), str(lcs_yaml_path), str(output_path)) + + unified = yaml.safe_load(output_path.read_text()) + synthesized = synthesize_configuration(unified) + + # Synthesized == original run.yaml (lossless round trip in dumb mode) + assert synthesized == run_yaml_content From 06ef987a861360ab1584af8a1ed12934ee67eb38 Mon Sep 17 00:00:00 2001 From: Maxim Svistunov Date: Thu, 23 Apr 2026 14:59:40 +0200 Subject: [PATCH 2/3] LCORE-836 spike: design docs and PoC evidence Add the spike doc (decisions up front, background below, 7 proposed JIRAs) and the spec doc (requirements R1..R11, architecture, implementation guide, migration worked example) under `docs/design/llama-stack-config-merge/`. Key decisions captured for reviewer confirmation: - Overall shape: Option C (high-level + native_override) with Option E (profile feature, no shipped profiles) as an optional layer. - Deprecation: calendar-based (e.g., "legacy path removed no sooner than 6 months after WARN begins"); concrete timing deferred to PM review. - Override precedence: deep-merge with list replacement at leaf level. - Secrets handling: env-var references preserved verbatim in synthesized files; never resolved to disk. - Format detection: shape-based, with an optional `config_format_version` field that, if present, must agree with the shape. - Migration tool shape: `--migrate-config` flag (no CLI refactor); dumb lift-and-shift mode only in v1; smart mode deferred. - Profile distribution: feature only, LCORE ships no profiles of its own beyond reference examples under `examples/profiles/`. - LS process supervision and hot-reload: out of scope (LCORE-777, LCORE-778, LCORE-781 territory). The spike's PoC validated library-mode end-to-end: a `lightspeed-stack.yaml` containing only `llama_stack.config` (no external run.yaml) boots LCORE, serves /v1/query with a real model response, and a `native_override` value demonstrably takes effect in the synthesized run.yaml. Server-mode end-to-end through docker-compose was skipped because the LS container image rebuild (~2 GB, UBI + llama-stack llslibdev dependency sync) was impractical for the spike timeline; the same synthesis code path is exercised by the unit tests, including the lossless migrate-then-synthesize round-trip. PoC evidence is under `poc-evidence/library-mode/` as reference material for reviewers, and per the spike howto it is intended to be removed from the branch prior to merge. The spike doc and spec doc remain permanent. --- .../llama-stack-config-merge-spike.md | 717 ++++++++++++++++++ .../llama-stack-config-merge.md | 502 ++++++++++++ .../poc-evidence/library-mode/README.md | 26 + .../library-mode/query-response.json | 1 + .../library-mode/synthesized-run.yaml | 148 ++++ .../lightspeed-stack-unified-library.yaml | 33 + 6 files changed, 1427 insertions(+) create mode 100644 docs/design/llama-stack-config-merge/llama-stack-config-merge-spike.md create mode 100644 docs/design/llama-stack-config-merge/llama-stack-config-merge.md create mode 100644 docs/design/llama-stack-config-merge/poc-evidence/library-mode/README.md create mode 100644 docs/design/llama-stack-config-merge/poc-evidence/library-mode/query-response.json create mode 100644 docs/design/llama-stack-config-merge/poc-evidence/library-mode/synthesized-run.yaml create mode 100644 docs/design/llama-stack-config-merge/poc-evidence/lightspeed-stack-unified-library.yaml diff --git a/docs/design/llama-stack-config-merge/llama-stack-config-merge-spike.md b/docs/design/llama-stack-config-merge/llama-stack-config-merge-spike.md new file mode 100644 index 000000000..c8db06ff4 --- /dev/null +++ b/docs/design/llama-stack-config-merge/llama-stack-config-merge-spike.md @@ -0,0 +1,717 @@ +# Spike: Llama Stack config merge (unified `lightspeed-stack.yaml`) + +## Overview + +**The problem**: Operators today must maintain two configuration files — +`lightspeed-stack.yaml` (LCORE settings) and `run.yaml` (Llama Stack +operational config: providers, storage, APIs, safety, registered resources). +This split increases the chance of misconfiguration, makes downstream +deployment templates larger, and forces every Lightspeed team to understand +Llama Stack's internal schema. LCORE-836 asks for a single source of truth. + +**The recommendation**: A layered approach — "Option C + Option E layer": + +- **High-level keys** in `lightspeed-stack.yaml` under a new `llama_stack.config` + section (inference, later storage/safety/...). Most downstream teams write + only these. +- **`native_override`** escape hatch under the same section — raw Llama Stack + schema, deep-merged last. Covers anything the high-level schema doesn't + express. +- **`profile`** field that points to a YAML file used as the baseline — the + "profiles" feature is mechanism-only; LCORE ships no profiles of its own + beyond one or two reference examples under `examples/profiles/`. +- **`baseline: default | empty`** selects whether the synthesis starts from + LCORE's built-in baseline or a blank slate. +- **Legacy mode preserved**: existing `llama_stack.library_client_config_path` + works unchanged through a deprecation window. Mutual exclusion with the new + `llama_stack.config` block is enforced at load time. +- **Migration tool**: `lightspeed-stack --migrate-config` produces a unified + single-file config from an existing (`run.yaml` + `lightspeed-stack.yaml`) + pair, lossless round-trip. + +**PoC validation**: A Level 3' PoC (per the spike howto) proves the mechanism +end-to-end in library mode. A unified `lightspeed-stack.yaml` containing only +`llama_stack.config` (no external `run.yaml`) successfully drives LCORE: +liveness/readiness green, `/v1/query` returns a real model response, +`native_override` demonstrably takes effect. Full unit-test suite passes +(2098 tests), including a lossless migrate-then-synthesize round-trip. +Server-mode end-to-end was not re-run through docker-compose — the container +rebuild time was impractical and unrelated to PoC quality (the container image +is ~2 GB of LS dependencies); the same synthesis code path is exercised by the +library-mode PoC and unit tests. + +--- + +## Strategic decisions — for @sbunciak (PM) and @tisnik + +These set scope, approach, and rollout shape. Each has a recommendation — +please confirm or override. + +### Decision S1: Overall shape (Option C + optional Option E) + +See [Design alternatives considered](#design-alternatives-considered) for the +full option set and scoring. + +| Option | Summary | +|---|---| +| A (Embedded native only) | `lightspeed-stack.yaml.llama_stack.config` is raw Llama Stack schema | +| **B + C (High-level + native override)** | High-level keys cover the common path, `native_override` as escape hatch | +| E (Profiles) | Named or path-based pre-built config bundles, layered on top of A/B/C | +| G (Kustomize-style patches) | Ship a default baseline, operator writes JSON-Patch-like overlays | + +**Recommendation**: **C** (high-level + native_override) with **E** (profile +feature, no shipped profiles) as an optional layer. Best balance of UX, +escape-hatch power, validation rigor, and dynamic-reconfig fit for the +broader feature roadmap (LCORE-777/781). + +### Decision S2: Deprecation timeline for the legacy path + +Legacy mode (`llama_stack.library_client_config_path` + external `run.yaml`) +must coexist with unified mode through a deprecation window to avoid breaking +downstream teams. Three candidate cadences: + +| Cadence | Timing | +|---|---| +| N+2 releases | Opt-in → warning → removed over two releases after landing | +| N+3 releases | Opt-in → warning (N+1) → removed at N+3 | +| **Calendar-based** | e.g., "removed no sooner than 6 months after warning starts" | + +**Recommendation**: **calendar-based**, because the right number depends on +LCORE's release cadence and downstream consumers' update latency — both of +which the spike author does not own. @sbunciak to set the actual numbers. + +### Decision S3: Downstream implications we may not have seen + +The spike author has direct evidence of Konflux/Tekton usage (`.tekton/` dir) +and RHOAI testing (`tests/e2e-prow/rhoai/`). Other downstream consumers — +RHOAI operator CRs, Helm charts, Kustomize overlays, any other products — +are not visible from this repo alone. + +**Ask**: Reviewers from downstream teams to confirm whether their deployment +setup treats `run.yaml` as a separate artifact (ConfigMap, templated file, +build-time asset) that this design would need to accommodate. + +### Decision S4: Scope of this spike — what is deliberately left out + +The following related work streams are **not** included in this spike and +should be tracked as separate future JIRAs: + +- **Llama Stack process supervision** from LCORE (restart-on-crash, signal + propagation, merged logs). Orthogonal to config merging; covered by + LCORE-777 / LCORE-778. +- **Hot-reload / dynamic reconfig** (e.g., live `POST /v1/rag` that adds a + BYOK RAG without restart). Llama Stack does not natively support + hot-reload; achieving it would require supervision + restart flows. + Covered by LCORE-781. + +**Recommendation**: confirm this scope split. If reviewers want any of the +above pulled in, this spike's JIRAs grow accordingly. + +--- + +## Technical decisions — for @tisnik and team leads + +Architecture-level and implementation-level. Each has a recommendation +grounded in the PoC. + +### Decision T1: Format detection (shape vs version field vs both) + +How does LCORE tell unified-mode configs from legacy-mode configs? + +| Option | Works by | +|---|---| +| Shape only | Presence of `llama_stack.config` → unified; else legacy | +| Version field only | Explicit `config_format_version: 2` required | +| **Both (soft-coupled)** | Shape decides; version field optional but must agree when present | + +**Recommendation**: **both, soft-coupled**. Gives a cheap upgrade path for +future real schema bumps without forcing every existing user to add a +version field today. Confidence: 75%. + +### Decision T2: Override precedence (inside Option C) + +When `llama_stack.config.native_override` overlaps with a high-level key, +what semantics? + +| Strategy | Example: `safety: {excluded_categories: [a, b]}` vs override `{excluded_categories: [c]}` | +|---|---| +| Deep-merge, append lists | result: `[a, b, c]` | +| **Deep-merge, replace lists** | result: `[c]`; other keys in `safety` preserved | +| Whole-key override | result: whole `safety` replaced; lose `default_shield_id` unless restated | +| JSON Patch (ops) | explicit — `{op: replace, path: /safety/excluded_categories, value: [c]}` | + +**Recommendation**: **deep-merge with list replacement**. Simple mental model, +no list-merge tarpit, keeps scalar + map overrides minimal. Implemented in +`deep_merge_list_replace()`. Confidence: 70%. + +See [Merge semantics worked examples](#merge-semantics-worked-examples). + +### Decision T3: Secrets in synthesized files + +The synthesized run.yaml lives on disk (library mode: `$TMPDIR`; server mode: +inside the LS container). Option space: + +| Option | On-disk content | +|---|---| +| **Keep env-var refs verbatim** | `api_key: ${env.OPENAI_API_KEY}` (resolved by LS at start) | +| Resolve before writing | `api_key: sk-...` | + +**Recommendation**: **keep env-var refs verbatim**. Security-leaning default; +resolved secrets never touch the disk. Implemented in +`apply_high_level_inference` (emits `${env.}` strings). Confidence: 95%. + +### Decision T4: Synthesized file location + +Where the synthesized `run.yaml` goes at runtime: + +| Option | Path | +|---|---| +| Temp file | `$TMPDIR/llama_stack_synthesized_config.yaml` | +| **Persistent known path** | Local: `./.generated/run.yaml` or `~/.local/state/lightspeed-stack/run.yaml`; Container: `/app-root/.generated/run.yaml`. Overwrite on each boot. | + +**Recommendation**: **persistent known path, overwrite on boot**. Debuggable, +no stale-file risk (always overwritten before LS starts). The PoC used +`$TMPDIR` for expediency; production should use the persistent path. CLI flag +`--synthesized-config-output ` for debugging. Confidence: 85%. + +### Decision T5: Migration tool invocation + +How operators invoke the migration tool: + +| Option | Example | +|---|---| +| Separate script under `scripts/` | `uv run python scripts/migrate-config.py ...` | +| **Flag on main entry point** | `lightspeed-stack --migrate-config --run-yaml X -c Y --migrate-output Z` | +| Subcommand refactor | `lightspeed-stack migrate-config ...` (BREAKS existing invocations) | + +**Recommendation**: **flag on main entry point**. Parallels the existing +`--dump-configuration` / `--dump-schema` flags; zero breaking change to +existing invocations. Implemented in `src/lightspeed_stack.py` + a +companion `migrate_config_dumb()` function. Confidence: 90%. + +### Decision T6: Profile distribution + +How profiles (Option E layer) reach downstream teams: + +| Option | Details | +|---|---| +| Ship named profiles in `src/profiles/` | LCORE ships a pre-curated set; `profile: openai-remote` resolves | +| **Feature only, no shipped profiles** | `profile: ` is the only invocation; teams author their own; LCORE ships 1–2 reference examples under `examples/profiles/` | + +**Recommendation**: **feature only, no shipped profiles**. Avoids +profile-sprawl and the burden of keeping "blessed" profiles in sync with +downstream products. 1–2 reference examples in `examples/profiles/` are +documentation, not shipped runtime assets. Confidence: 85%. + +### Decision T7: The `baseline` field (added during PoC) + +During the PoC, strict lossless round-trip for the migration tool surfaced +a need: when `native_override` contains an entire run.yaml body, the default +baseline's keys still leak into the result via deep-merge. Fix: a +`baseline: "default" | "empty"` field. + +- `baseline: default` (default value) — start from LCORE's built-in baseline +- `baseline: empty` — start from `{}`. Used by the dumb migration tool so + round-trip is exact. + +**Recommendation**: **accept this field**. Alternatives (`inherit_defaults: +bool`, `starting_point: ...`) are cosmetic. Confidence: 80%. Reviewers: any +preference on naming before this ships? + +### Decision T8: Konflux / Tekton pipelines + +The `.tekton/` directory exists in this repo. If any Konflux/Tekton pipeline +templates or mounts `run.yaml` separately, unified mode needs that pipeline +to either (a) keep using legacy mode during the deprecation window, or +(b) mount the unified `lightspeed-stack.yaml` and drop the `run.yaml` mount. + +**Ask**: owner of `.tekton/` to confirm current pipeline shape and plan +migration. + +### Decision T9: Library client API (resolved by PoC) + +**Finding from PoC**: `AsyncLlamaStackAsLibraryClient` in `llama-stack` only +accepts a file-path string. It does not accept a dict. This means library +mode must write the synthesized config to disk — no dict-only shortcut +available. Not a decision; a fact to note in the spec doc. + +--- + +## Proposed JIRAs + +Each JIRA's agentic-tool instruction points to the spec doc +(`llama-stack-config-merge.md`), the permanent reference. + + + +### LCORE-???? Unified `llama_stack.config` schema + synthesizer + +**Description**: Implement the unified-mode config schema +(`UnifiedLlamaStackConfig`, `UnifiedInferenceSection`, +`UnifiedInferenceProvider`) and the synthesizer that produces a full Llama +Stack `run.yaml` from it. Wire library mode to the synthesizer. Preserve +legacy mode through mutual-exclusion validation. + +**Scope**: +- New Pydantic classes in `src/models/config.py`. +- New functions in `src/llama_stack_configuration.py`: + `synthesize_configuration`, `deep_merge_list_replace`, + `apply_high_level_inference`, `load_default_baseline`, `synthesize_to_file`. +- A shipped default baseline at `src/data/default_run.yaml`. +- Library-mode wiring in `src/client.py`: detect unified vs legacy, write + synthesized file, pass path to library client. +- Cross-field validation: reject both `config` and + `library_client_config_path` set simultaneously. +- Legacy behavior (`llama_stack.library_client_config_path` path) unchanged. + +**Acceptance criteria**: +- Unified `lightspeed-stack.yaml` (no external `run.yaml`) boots LCORE in + library mode and serves `/v1/query`. +- Legacy configs continue to work with no change. +- Mutual-exclusion error message fires cleanly when both forms are set. +- Unit tests for synthesizer, merge semantics, schema validation. + +**Agentic tool instruction**: +```text +Read the "Architecture" and "Implementation Suggestions" sections of +docs/design/llama-stack-config-merge/llama-stack-config-merge.md. +Key files to create or modify: + src/models/config.py (new classes; modify LlamaStackConfiguration) + src/llama_stack_configuration.py (synthesize_configuration + helpers) + src/data/default_run.yaml (new) + src/client.py (library-mode wiring) +To verify: run a unified-mode config end-to-end via `uv run lightspeed-stack -c ` and confirm /v1/query succeeds. +``` + + + +### LCORE-???? Migration tool — dumb-mode lift-and-shift + +**Description**: Implement `--migrate-config` on the `lightspeed-stack` CLI +that produces a unified single-file config from an existing +(`run.yaml` + `lightspeed-stack.yaml`) pair. Dumb mode places the entire +`run.yaml` body under `llama_stack.config.native_override` with +`baseline: empty`, removes `library_client_config_path`. + +**Scope**: +- `migrate_config_dumb()` function in `src/llama_stack_configuration.py`. +- `--migrate-config`, `--run-yaml`, `--migrate-output` flags in + `src/lightspeed_stack.py`. +- Round-trip test: migrate → synthesize → byte-identical to original + `run.yaml`. + +**Acceptance criteria**: +- `lightspeed-stack --migrate-config --run-yaml X -c Y --migrate-output Z` + produces a unified config that boots LCORE in library mode to the same + Llama Stack behavior as the original pair. +- Round-trip unit test passes. +- `--help` describes the flag clearly. + +**Agentic tool instruction**: +```text +Read "Migration tool" in docs/design/llama-stack-config-merge/llama-stack-config-merge.md. +Key files: src/lightspeed_stack.py, src/llama_stack_configuration.py, +tests/unit/test_llama_stack_synthesize.py. +To verify: migrate the repo's root run.yaml + lightspeed-stack.yaml, then +start LCORE with the output; confirm /v1/query works. +``` + + + +### LCORE-???? LS container entrypoint + deployment artifacts for unified mode + +**Description**: Update the Llama Stack container entrypoint and deployment +manifests so server mode works end-to-end from a unified +`lightspeed-stack.yaml`. Rebuild guidance for container images that bundle +the synthesizer script and default baseline. + +**Scope**: +- Update `scripts/llama-stack-entrypoint.sh` — the existing script already + defers to the Python CLI for auto-detection; document that behavior. +- Update `test.containerfile` to copy `src/data/` into the LS container so + `load_default_baseline()` resolves. +- Provide a unified-mode `docker-compose.yaml` (or update the existing one) + that mounts only `lightspeed-stack.yaml` into the LS container. +- Update `.tekton/` pipelines as needed (coordinate with pipeline owner, + see Decision T8). + +**Acceptance criteria**: +- `docker compose up` with a unified `lightspeed-stack.yaml` starts both + containers healthy; `/v1/query` works through LCORE → LS. +- Legacy docker-compose layout (with external `run.yaml` mount) still works. + +**Agentic tool instruction**: +```text +Read "Architecture → Server mode" in docs/design/llama-stack-config-merge/llama-stack-config-merge.md. +Key files: scripts/llama-stack-entrypoint.sh, test.containerfile, +docker-compose.yaml, .tekton/*.yaml. +To verify: docker compose up with the unified config; curl LCORE /v1/query. +``` + + + +### LCORE-???? Migrate in-repo e2e / integration test configurations + +**User story**: As a Lightspeed Core maintainer, I want the in-repo e2e and +integration tests to use the unified-mode config format, so that the +reference configuration shapes downstream teams see are the new ones. + +**Description**: Convert `tests/e2e/configs/run-*.yaml` and +`tests/e2e/configuration/**/lightspeed-stack*.yaml` into unified form +(or delete the `run-*.yaml` side and fold the content into the +corresponding `lightspeed-stack*.yaml`). Migrate `tests/e2e-prow/rhoai/` +configs similarly. + +**Scope**: +- Identify every test config that references `run.yaml`. +- Mechanically migrate using the migration tool (dumb mode). +- Re-run the full e2e suite and resolve any differences. + +**Acceptance criteria**: +- No in-repo test config references an external `run.yaml`. +- `uv run make test-e2e` passes. +- Existing test coverage is preserved (no tests deleted solely to make the + migration pass). + +**Agentic tool instruction**: +```text +Read "Migration paths" in docs/design/llama-stack-config-merge/llama-stack-config-merge.md. +Key files: tests/e2e/configs/, tests/e2e/configuration/, tests/e2e-prow/rhoai/. +To verify: `uv run make test-e2e` green. +``` + + + +### LCORE-???? Docs migration to unified mode as primary + +**User story**: As an operator reading Lightspeed Core docs, I want the +single-file unified configuration to be the primary way documented, with +legacy mode clearly marked as a deprecation path. + +**Description**: Update +`docs/deployment_guide.md`, `docs/byok_guide.md`, `docs/okp_guide.md`, +`docs/rag_guide.md`, `docs/providers.md`, `docs/config.md`, `README.md`, +`docs/local-stack-testing.md` to document unified mode as primary. Add a +migration section with the migration tool command. Clean up the stale +`create_argument_parser` docstring in `src/lightspeed_stack.py` that still +mentions the removed `-g/-i/-o` flags. + +**Scope**: +- Each doc file touched. +- A new migration section (step-by-step). +- Update the `create_argument_parser` docstring in + `src/lightspeed_stack.py`. + +**Acceptance criteria**: +- Every doc page that showed a two-file setup also shows the unified-mode + equivalent. +- Migration tool invocation documented with a worked example. +- `docs/openapi.md` / `docs/config.html` regenerated. + +**Agentic tool instruction**: +```text +Read "Deprecation timeline" and "Migration paths" in docs/design/llama-stack-config-merge/llama-stack-config-merge.md. +Key files: docs/*.md, docs/*.html, docs/*.json, README.md, src/lightspeed_stack.py docstring. +To verify: rendered docs present the unified mode first; legacy mode is visibly deprecated. +``` + + + +### LCORE-???? Reference profile examples and profile-path doc + +**Description**: Add `examples/profiles/` with two reference profile YAML +files — one remote-provider (OpenAI) and one inline-provider (sentence- +transformers + FAISS) — purely as reference material. Document how operators +write and reference their own profiles via +`llama_stack.config.profile: `. + +**Scope**: +- `examples/profiles/openai-remote.yaml` +- `examples/profiles/inline-faiss.yaml` +- Docs section: how to author a profile, where to place it, how to + reference it from `lightspeed-stack.yaml`. + +**Acceptance criteria**: +- Both examples load cleanly via the synthesizer (sanity test). +- A docs section titled "Profiles" exists and has a worked example. + +**Agentic tool instruction**: +```text +Read "Profiles" in docs/design/llama-stack-config-merge/llama-stack-config-merge.md. +Key files to create: examples/profiles/*.yaml, a "Profiles" section in docs/config.md or docs/deployment_guide.md. +To verify: load the example via `uv run lightspeed-stack -c ` referencing the profile; confirm LS boots. +``` + + + +### LCORE-???? Deprecation warning for legacy mode + +**Description**: After the unified-mode feature lands (one release later), +emit a one-line startup WARN when `library_client_config_path` is set. Link +to the migration doc. Legacy mode continues to fully function. + +**Scope**: +- Warning emission point: on load in `LlamaStackConfiguration` + `check_llama_stack_model` validator, or at LCORE startup. +- Log line format includes a stable URL fragment to the migration doc. + +**Acceptance criteria**: +- Legacy configs still load and run. +- A single WARN line appears at startup when legacy fields are used. +- The warning is not emitted in unified mode. + +**Agentic tool instruction**: +```text +Read "Deprecation timeline" in docs/design/llama-stack-config-merge/llama-stack-config-merge.md. +Key files: src/models/config.py (or src/lightspeed_stack.py startup). +To verify: run LCORE with a legacy config; confirm WARN line; run with unified config; confirm no WARN. +``` + +--- + +## PoC results + +### What the PoC does + +The PoC is at Level 3' (per the spike howto): unified config works +end-to-end in library mode, with overrides and a profile. Server-mode +end-to-end validation was skipped — same synthesis code path, container +rebuild time was impractical. + +**Important**: The PoC diverges from the production design in these ways: + +- Uses `$TMPDIR` for the synthesized `run.yaml` instead of the persistent + known path recommended in Decision T4. +- No `--synthesized-config-output` CLI flag yet. +- Migration tool has only the "dumb" mode; "smart" factoring into + high-level keys is out of scope. +- No deprecation warning yet (that's its own JIRA). +- High-level inference's emitted `provider_id` uses the Literal value + directly (`sentence_transformers` with underscore), which differs from + the baseline's `sentence-transformers` (hyphen). Acceptable in the PoC + because the validation used `baseline: default` + a `native_override` + path, not high-level inference, to avoid this naming collision. Resolution + before production: align the emitted `provider_id` with the Literal + values that already exist in common baselines (hyphenated form). + +### Results + +See [poc-evidence/library-mode/](poc-evidence/library-mode/) for the full +evidence bundle: + +- `lightspeed-stack-unified-library.yaml` — the unified-mode config used +- `synthesized-run.yaml` — what LCORE produced (3.7 KB) +- `query-response.json` — a real `/v1/query` round-trip + +Summary of validation: + +| Check | Evidence | +|---|---| +| Liveness 200 | `curl /liveness` → `{"alive":true}` | +| Readiness 200 | `curl /readiness` → `{"ready":true,"reason":"All providers are healthy","providers":[]}` | +| `/v1/query` works | `{"response":"The three primary colors are red, blue, and yellow.",...}` | +| Profile loaded | `profile: /.../tests/e2e/configs/run-ci.yaml` resolved | +| `native_override` took effect | `safety.default_shield_id: llama-guard` in synthesized output | +| No external `run.yaml` needed | No `library_client_config_path` in config | +| Secrets preserved as env refs | `api_key: ${env.OPENAI_API_KEY}` in synthesized file | +| Full unit suite | 2098 passed, 1 skipped, 0 failed | +| Round-trip lossless | `test_migrate_then_synthesize_reproduces_run_yaml` green | + +### Surprise discovered during PoC + +- **`AsyncLlamaStackAsLibraryClient` takes a file path, not a dict** (Decision + T9). The library client reads the file itself. Consequence: library mode + must write a synthesized file to disk. No dict-only shortcut. +- **`profile:` path resolution** uses the directory of the + `lightspeed-stack.yaml`. Relative paths work only when the profile is + co-located with the LCORE config. Absolute paths always work. Spec doc + recommends documenting this clearly. +- **Default baseline requires `EXTERNAL_PROVIDERS_DIR`**. `src/data/default_run.yaml` + (copied from the repo's `run.yaml`) references `${env.EXTERNAL_PROVIDERS_DIR}` + without a default. Either ship a thinner default baseline, or change the + reference to `${env.EXTERNAL_PROVIDERS_DIR:=~/.llama/providers.d}`. Flagging + for the implementation JIRA. +- **High-level inference naming collision** (described above in "divergence + from production design"). + +--- + +## Background sections + +### Current architecture (before LCORE-836) + +Two files: + +- **`lightspeed-stack.yaml`** — LCORE settings: service host/port, auth, + conversation cache, user data collection, MCP servers, authentication, + authorization, quota, etc. Also contains `llama_stack:` with + connection-to-LS settings (URL/api_key or library-client mode with a path + to an external `run.yaml`). +- **`run.yaml`** — Llama Stack operational config: `apis`, `providers` + (inference, safety, tool_runtime, vector_io, agents, ...), `storage`, + `registered_resources`, `vector_stores`, `safety`. + +**Existing enrichment** (`src/llama_stack_configuration.py`): + +- LCORE already enriches an input `run.yaml` with dynamic values from + `lightspeed-stack.yaml`: Azure Entra ID tokens (side-effect to `.env`), + BYOK RAG entries, Solr/OKP provider/store/model registration. Output is + an enriched `run.yaml`. +- Called in two places: `scripts/llama-stack-entrypoint.sh` at LS container + boot (server mode) and `src/client.py:_enrich_library_config()` (library + mode). +- LCORE-779 made this automatic; LCORE-518 (closed spike) proved (re)generation + feasibility. Both are the groundwork the current spike builds on. + +The new synthesizer *subsumes* the enrichment: it builds the full run.yaml +(baseline + enrichment + high-level + native_override) rather than +incrementally enriching an existing one. + +### Design alternatives considered + +Attributes (★ = high-weight for LCORE-836): + +| Attribute | A | B+C | C+E | E | G | +|---|---|---|---|---|---| +| ★ Operator UX | 2 | 4–5 | **4** | 5 | 3 | +| Abstraction cleanliness | 1 | 4 | 3 | 4 | 2 | +| LS schema resilience | 1 | 4 | 3 | 3 | 2 | +| ★ Escape-hatch power | 5 | 3 | 5 | 5 | 5 | +| Implementation cost | 4 | 2 | 2 | 3 | 3 | +| Maintenance load | 2 | 3 | 3 | 2 | 3 | +| ★ Backward compatibility | 3 | 3 | 3 | 3 | 4 | +| Validation rigor | 2 | 5 | 4 | 3 | 2 | +| ★ Dynamic-reconfig fit | 2 | 5 | 4 | 4 | 2 | +| ★ Library+server parity | 5 | 4 | 4 | 5 | 5 | +| Provider plurality | 5 | 4 | 5 | 4 | 5 | +| Testability | 3 | 4 | 3 | 5 | 3 | + +- **A (Embedded native)** — no abstraction win; same LS schema exposure as today. +- **B (High-level only)** — best UX when everything maps, painful at the edges. +- **C (B + `native_override`)** — recommended; combines B's UX with A's escape hatch. +- **E (Profiles, feature-only)** — optional layer on top of C. +- **G (Kustomize-style patches)** — strong for backward compat, weak on + validation and dynamic reconfig. + +### Merge semantics — worked examples + +Given the baseline: +```yaml +safety: + default_shield_id: llama-guard + excluded_categories: [violence, sexual_content] +providers: + inference: + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} +``` + +And `native_override`: +```yaml +safety: + excluded_categories: [spam] +``` + +**Deep-merge-with-list-replacement (chosen)** produces: +```yaml +safety: + default_shield_id: llama-guard # preserved (not in override) + excluded_categories: [spam] # list replaced +providers: # not in override — preserved + inference: + - provider_id: openai + ... +``` + +The recommendation's appeal: to keep `default_shield_id`, the user doesn't +have to restate it. To replace `excluded_categories`, the user provides the +new list — they don't need to know a patch syntax. + +### Process-model recap (no LCORE supervision of LS) + +**Library mode**: LCORE process embeds the Llama Stack library client. LCORE +synthesizes `run.yaml` to a file, calls `AsyncLlamaStackAsLibraryClient(path)`, +initializes, serves. One process. + +**Server mode**: Llama Stack runs as a separate process (container). LCORE +connects to it over HTTP. Under unified mode, the LS container's entrypoint +reads the mounted `lightspeed-stack.yaml`, the Python CLI auto-detects +unified mode, synthesizes `run.yaml`, then `exec llama stack run` with it. +LCORE container reads the same `lightspeed-stack.yaml`, ignores the +`config` sub-block (server mode — only connection fields matter), connects. +Two processes. LCORE does **not** start, monitor, or supervise the LS +process — the orchestrator (docker-compose, systemd, k8s) does. Supervision +is out of scope for this spike (see Decision S4). + +### What must not break during rollout + +See [Backward compatibility scope](#backward-compatibility-scope). The four +must-not-break surfaces: + +1. Existing `lightspeed-stack.yaml` with `library_client_config_path`. +2. Existing `run.yaml` content, including fields LCORE doesn't model. +3. Existing CI/CD templating that treats `run.yaml` as a separate artifact. +4. Existing enrichment behavior (Azure Entra ID, BYOK RAG, Solr/OKP). + +### Backward compatibility scope + +Detection rule at load time: + +| `lightspeed-stack.yaml` shape | Interpretation | +|---|---| +| `llama_stack.library_client_config_path` set, no `llama_stack.config` | **Legacy** — today's behavior | +| `llama_stack.config.*` present | **Unified** — new path | +| Both present | Error at load time — clear message | +| Neither (remote URL only, no config) | Existing remote mode — unchanged | + +Three migration paths operators can choose: + +| Path | Effort | Result | +|---|---|---| +| Do nothing | 0 | Legacy keeps working until deprecation window closes | +| Lift-and-shift (via migration tool) | seconds | Unified single file, zero semantic change | +| Re-express | hours+ | Unified single file, fully adopts the high-level schema | + +--- + +## Appendix A — Files changed in the PoC + +Relative to `upstream/main`: + +| File | Purpose | +|---|---| +| `src/models/config.py` | New classes: `UnifiedInferenceProvider`, `UnifiedInferenceSection`, `UnifiedLlamaStackConfig`; modified `LlamaStackConfiguration` (adds `config` field + mutual-exclusion validator) | +| `src/llama_stack_configuration.py` | New: `synthesize_configuration`, `deep_merge_list_replace`, `apply_high_level_inference`, `load_default_baseline`, `synthesize_to_file`, `migrate_config_dumb`. CLI `main()` auto-detects unified vs legacy. | +| `src/data/default_run.yaml` | Built-in default baseline (copied from repo root `run.yaml` for the PoC — implementation JIRA should slim it down; see PoC surprise about `EXTERNAL_PROVIDERS_DIR`) | +| `src/client.py` | Library-mode path picks synthesis for unified configs, enrichment for legacy | +| `src/lightspeed_stack.py` | `--migrate-config`, `--run-yaml`, `--migrate-output` flags | +| `scripts/llama-stack-entrypoint.sh` | Comment updated — script itself needs no change (Python CLI auto-detects) | +| `test.containerfile` | Copies `src/data/` into the LS container | +| `tests/unit/test_llama_stack_synthesize.py` | 22 new tests: merge semantics, high-level inference, synthesize pipeline, migration round-trip | +| `tests/unit/models/config/test_llama_stack_configuration.py` | 3 new tests: unified/legacy mutual exclusion | +| `tests/unit/models/config/test_dump_configuration.py` | 5 expected-dict updates (new `config: None` field appears in dumps) | +| `tests/unit/test_client.py` | Error-message regex updated | +| `docs/design/llama-stack-config-merge/` | Spike doc, spec doc, PoC evidence, proposed JIRAs | + +## Appendix B — Commands to reproduce the library-mode PoC + +```bash +# 1. Start LCORE in library mode with a unified config +export OPENAI_API_KEY= +export E2E_OPENAI_MODEL=gpt-4o-mini +mkdir -p /tmp/lcore-836-poc +uv run lightspeed-stack \ + -c docs/design/llama-stack-config-merge/poc-evidence/lightspeed-stack-unified-library.yaml + +# 2. In another shell — query +curl -s http://localhost:8080/liveness +curl -s http://localhost:8080/readiness +curl -s -X POST http://localhost:8080/v1/query \ + -H 'Content-Type: application/json' \ + -d '{"query": "Name three primary colors. One sentence."}' + +# 3. Inspect what was synthesized +cat /tmp/llama_stack_synthesized_config.yaml +``` diff --git a/docs/design/llama-stack-config-merge/llama-stack-config-merge.md b/docs/design/llama-stack-config-merge/llama-stack-config-merge.md new file mode 100644 index 000000000..9847cc2fd --- /dev/null +++ b/docs/design/llama-stack-config-merge/llama-stack-config-merge.md @@ -0,0 +1,502 @@ +# Feature design: Llama Stack config merge (unified `lightspeed-stack.yaml`) + +| | | +|--------------------|----------------------------------------------------------------------------------| +| **Date** | 2026-04-23 | +| **Component** | Lightspeed Core Stack (src/models/config.py, src/llama_stack_configuration.py, src/client.py, src/lightspeed_stack.py, scripts/llama-stack-entrypoint.sh) | +| **Authors** | Maxim Svistunov | +| **Feature** | [LCORE-836](https://redhat.atlassian.net/browse/LCORE-836) | +| **Spike** | [llama-stack-config-merge-spike.md](llama-stack-config-merge-spike.md) | +| **Links** | LCORE-509 (Epic), LCORE-777 (Epic), LCORE-518 (prior spike, Closed), LCORE-779 (auto-regen, Closed) | + +## What + +This feature collapses the two Lightspeed Core configuration files — +`lightspeed-stack.yaml` (LCORE settings) and `run.yaml` (Llama Stack +operational config) — into a single `lightspeed-stack.yaml`. At runtime, +LCORE synthesizes a full Llama Stack `run.yaml` from a new +`llama_stack.config` sub-section and hands it to Llama Stack (library +client or subprocess, mode-dependent). + +Key shape: + +- High-level keys under `llama_stack.config` for the common path + (v1: `inference`; future: `storage`, `safety`, `tools`). +- `llama_stack.config.native_override` escape hatch — raw Llama Stack + schema, deep-merged with list replacement. Covers anything the + high-level schema doesn't express. +- `llama_stack.config.profile` — path to a user-authored YAML that serves + as the synthesis baseline. +- `llama_stack.config.baseline: default | empty` — pick between LCORE's + built-in baseline and an empty dict (used by the migration tool for + exact round-trip). +- Legacy two-file mode (`llama_stack.library_client_config_path` + + external `run.yaml`) is preserved during a deprecation window; + mutually exclusive with `llama_stack.config`. + +## Why + +Two-file configuration multiplies the surface area for misconfiguration +and forces every downstream Lightspeed team (RHOAI, Konflux pipelines, +any product integrating LCORE) to understand Llama Stack's full internal +schema. A single source of truth: + +- Reduces the number of artifacts deployment tooling must manage + (Helm values, ConfigMaps, Kustomize overlays). +- Lets downstream teams express their intent at a high level (e.g. "use + OpenAI with these allowed models") rather than authoring raw LS + provider entries. +- Preserves an escape hatch so edge cases don't block adoption. + +LCORE-518 (closed) proved a generation PoC in principle; LCORE-779 +(closed) made configuration regeneration automatic at startup. This +feature completes the picture by making `run.yaml` an implementation +detail that LCORE owns, not an operator-facing artifact. + +## Requirements + +- **R1:** `lightspeed-stack.yaml` with a `llama_stack.config` sub-section + and no external `run.yaml` boots LCORE in both library and server modes + and serves `/v1/query` successfully. +- **R2:** Legacy mode (`llama_stack.library_client_config_path` + + external `run.yaml`) works unchanged until the deprecation window + closes. A startup WARN is emitted one release after unified mode lands. +- **R3:** Setting both `llama_stack.config` and + `llama_stack.library_client_config_path` in the same file fails at + configuration load time with a clear error message pointing to the + migration tool. +- **R4:** `lightspeed-stack --migrate-config --run-yaml X -c Y + --migrate-output Z` produces a unified configuration from the legacy + two-file pair. Running the migrated file drives Llama Stack to + byte-identical behavior as the original pair (dumb-mode lossless + round-trip). +- **R5:** When `llama_stack.config.native_override` overlaps a key set + by the high-level section or by the baseline, deep-merge semantics + apply with list replacement (maps merge recursively; lists are + replaced wholesale; scalars are replaced). +- **R6:** Secrets are never resolved into the synthesized file on disk. + `${env.FOO}` references appear verbatim in the synthesized `run.yaml`. +- **R7:** Existing enrichment behavior (Azure Entra ID, BYOK RAG, + Solr/OKP) produces the same result in unified mode as in legacy mode + for equivalent inputs. +- **R8:** A profile referenced by a relative `profile:` path resolves + against the directory of the loaded `lightspeed-stack.yaml`. +- **R9:** The unified schema extends current `LlamaStackConfiguration` + pydantic model with a new `config: Optional[UnifiedLlamaStackConfig]` + field; validation enforces mutual exclusion with legacy mode and + rejects unknown fields (`extra="forbid"`). +- **R10:** The synthesized `run.yaml` is written to a persistent known + path (overwritten each boot), logged, and a CLI flag + `--synthesized-config-output` lets operators override the location for + debugging. +- **R11:** Shape detection determines mode (unified vs legacy); an + optional `config_format_version` field is accepted but must agree with + the shape when present. + +## Use Cases + +- **U1:** As an operator setting up LCORE for the first time, I want to + write one config file with high-level provider choices (OpenAI, Azure, + …) so that I don't have to learn Llama Stack's internal schema. +- **U2:** As a downstream team maintainer with an existing heavily + customized `run.yaml`, I want a mechanical one-shot migration so that + I can move to the unified format without re-expressing my edge cases. +- **U3:** As an operator whose deployment sits behind a vLLM serving + stack not covered by the high-level schema, I want to drop my custom + configuration into `native_override` and still benefit from the rest + of the unified schema. +- **U4:** As a Lightspeed Core maintainer, I want a single authoritative + place for docs, examples, and test configs so that downstream teams + find the same patterns everywhere. +- **U5:** As a Red Hat release manager, I want legacy configs to keep + working throughout a deprecation window so that downstream products + can migrate on their own cadence. + +## Architecture + +### Overview + +```text +lightspeed-stack.yaml (unified mode) + │ + ▼ + ┌────────────────────────────┐ + │ Configuration load │ Pydantic validation, mutual-exclusion + │ src/configuration.py │ check between `config` and + │ src/models/config.py │ `library_client_config_path`. + └────────────┬───────────────┘ + │ Configuration (typed) + ▼ + ┌────────────────────────────┐ Baseline selection (profile / + │ Synthesizer │ default / empty) + enrichment + │ synthesize_configuration │ (BYOK RAG, Solr/OKP) + high-level + │ (llama_stack_config…) │ sections + native_override deep-merge. + └────────────┬───────────────┘ + │ synthesized run.yaml (dict) + ▼ + Library mode Server mode + ──────────── ─────────── + Write to deterministic path. Written by LS container's entrypoint + AsyncLlamaStackAsLibraryClient script (same synthesizer, same CLI, + reads the path and initializes. auto-detects unified via Python). + `llama stack run ` starts LS. + LCORE connects by URL. +``` + +### Trigger mechanism + +At LCORE startup (library mode): if `llama_stack.config` is set in the +loaded `lightspeed-stack.yaml`, the synthesizer produces a `run.yaml` +dict, writes it to disk, and passes the path to the library client. + +At Llama Stack container startup (server mode): the container's +entrypoint script invokes +`python3 /opt/app-root/llama_stack_configuration.py -c +-o /opt/app-root/run.yaml`. The Python CLI auto-detects unified vs legacy +by `llama_stack.config` presence; in unified mode it synthesizes and +writes the output; in legacy mode it performs in-place enrichment as +before. + +### Storage / data model changes + +No persistent storage is added. The synthesized `run.yaml` is written +once per boot to a deterministic path; not a database. `src/data/ +default_run.yaml` is a new package-shipped file, the built-in baseline +Llama Stack configuration. + +### Configuration + +New sub-section under the existing `llama_stack` block: + +```yaml +llama_stack: + use_as_library_client: true + # NOTE: library_client_config_path intentionally OMITTED in unified mode. + # Setting both `config` and `library_client_config_path` is a validation error. + config: + # Baseline selection + baseline: default # default | empty; ignored if `profile` is set + profile: ./my-profile.yaml # optional; resolves relative to lightspeed-stack.yaml + + # High-level sections (v1: inference; future: storage, safety, tools, ...) + inference: + providers: + - type: openai # mapped to remote::openai + api_key_env: OPENAI_API_KEY + allowed_models: [gpt-4o-mini] + - type: sentence_transformers + + # Escape hatch — raw Llama Stack schema, deep-merged with list replacement + native_override: + safety: + excluded_categories: [spam] +``` + +Pydantic classes (see `src/models/config.py`): + +```python +class UnifiedInferenceProvider(ConfigurationBase): + type: Literal[ + "openai", "sentence_transformers", "azure", "vertexai", + "watsonx", "vllm_rhaiis", "vllm_rhel_ai", + ] + api_key_env: Optional[str] = None + allowed_models: Optional[list[str]] = None + extra: dict[str, Any] = Field(default_factory=dict) + + +class UnifiedInferenceSection(ConfigurationBase): + providers: list[UnifiedInferenceProvider] = Field(default_factory=list) + + +class UnifiedLlamaStackConfig(ConfigurationBase): + baseline: Literal["default", "empty"] = "default" + profile: Optional[str] = None + inference: Optional[UnifiedInferenceSection] = None + native_override: dict[str, Any] = Field(default_factory=dict) + + +class LlamaStackConfiguration(ConfigurationBase): + # existing fields unchanged (url, api_key, use_as_library_client, + # library_client_config_path, timeout) + config: Optional[UnifiedLlamaStackConfig] = None + + @model_validator(mode="after") + def check_llama_stack_model(self) -> Self: + if self.config is not None and self.library_client_config_path is not None: + raise ValueError("... mutually exclusive ... use --migrate-config") + # ...legacy checks preserved... + return self +``` + +### API changes + +None at the REST API surface. Internal API additions in +`src/llama_stack_configuration.py`: + +- `synthesize_configuration(lcs_config, config_file_dir, default_baseline) + -> dict` — the synthesis pipeline. +- `synthesize_to_file(lcs_config, output_file, config_file_dir) -> None` — + synthesis + write. +- `migrate_config_dumb(run_yaml_path, lightspeed_yaml_path, output_path) + -> None` — dumb-mode migration (lossless round-trip). +- `deep_merge_list_replace(base, overlay) -> dict` — merge helper. +- `apply_high_level_inference(ls_config, inference)` — high-level expansion. +- `load_default_baseline() -> dict` — loads `src/data/default_run.yaml`. + +CLI additions in `src/lightspeed_stack.py`: + +- `--migrate-config` — invoke the migration tool. +- `--run-yaml ` — input for `--migrate-config`. +- `--migrate-output ` — output for `--migrate-config`. +- (recommended for R10) `--synthesized-config-output ` — override + the default deterministic synthesis location. + +The legacy CLI docstring in `create_argument_parser()` referencing the +removed `-g/-i/-o` flags is cleaned up as part of the docs JIRA. + +### Error handling + +- **Unified + legacy set simultaneously**: raised during + `LlamaStackConfiguration.check_llama_stack_model`. Error message + directs to `--migrate-config`. +- **Library mode with neither `config` nor `library_client_config_path`**: + raised during the same validator. Error identifies the two valid paths. +- **`profile:` path does not exist**: surfaced as `FileNotFoundError` + from `open(profile_path)` during synthesis. The implementation JIRA + should wrap this with context about where the path was resolved. +- **Unknown provider `type` in high-level inference**: rejected by the + Pydantic `Literal` — operator sees a validation error naming the + allowed types. Escape: use `native_override`. +- **Unknown fields in any unified-mode section**: rejected by + `extra="forbid"` on `ConfigurationBase`. +- **Llama Stack rejects the synthesized `run.yaml`**: surfaces as + whatever LS itself raises (ValidationError from LS's own config + parsing). The implementation JIRA should log the synthesized file path + before handing to LS so operators can inspect what failed. + +### Security considerations + +- **No secrets written to disk**: `apply_high_level_inference` emits + `${env.}` references, never the resolved secret. The synthesized + `run.yaml` is safe to log path-wise; its contents only contain env + references for secrets. +- **`native_override` is raw YAML**: content is operator-controlled, so + no new injection surface — same trust model as the existing + `run.yaml`. LCORE does no template expansion other than the existing + `replace_env_vars()` step in the load pipeline. +- **Synthesized file location**: persistent known path, world-readable + by default in a container. This is acceptable because the file + contains only env-var references for secrets; operators who want + stricter filesystem permissions should tighten the mount. + +### Migration / backwards compatibility + +Coexistence mechanism: shape detection (see R11). Legacy configs with +`llama_stack.library_client_config_path` continue through the +configured deprecation window. + +Three operator-facing migration paths (choose per deployment): + +| Path | Effort | Result | +|---|---|---| +| Do nothing | 0 | Legacy keeps working until deprecation closes | +| Lift-and-shift | seconds — `lightspeed-stack --migrate-config ...` | Single-file, byte-equivalent LS behavior | +| Re-express | hours+ | Single-file; high-level sections replace `native_override` | + +Deprecation schedule: calendar-based (per Decision S2 in the spike); +concrete numbers set by @sbunciak at release time. Default recommended +shape: unified mode ships as opt-in at release N; legacy-mode WARN +begins one release later; legacy-mode removal no sooner than 6 months +after WARN begins. + +## Implementation Suggestions + +### Key files and insertion points + +| File | What to do | +|---|---| +| `src/models/config.py` | Add `UnifiedInferenceProvider`, `UnifiedInferenceSection`, `UnifiedLlamaStackConfig`. Modify `LlamaStackConfiguration` — add `config` field, extend the `model_validator` for mutual-exclusion check. | +| `src/llama_stack_configuration.py` | Add `synthesize_configuration`, `deep_merge_list_replace`, `apply_high_level_inference`, `load_default_baseline`, `synthesize_to_file`, `migrate_config_dumb`, `PROVIDER_TYPE_MAP`, `DEFAULT_BASELINE_RESOURCE`. Update `main()` to auto-detect unified vs legacy. | +| `src/data/default_run.yaml` | New file — a thinner baseline than today's repo-root `run.yaml`. Notably do **not** reference `${env.EXTERNAL_PROVIDERS_DIR}` without a default (see PoC surprise in the spike doc). | +| `src/client.py` | In `_load_library_client`: branch on `config.config` presence. Add `_synthesize_library_config()` that calls the synthesizer and writes to the deterministic path (R10). Keep `_enrich_library_config` for legacy. | +| `src/lightspeed_stack.py` | Add `--migrate-config`, `--run-yaml`, `--migrate-output`, `--synthesized-config-output` flags. Add an early-exit branch in `main()` that dispatches to `migrate_config_dumb` when `--migrate-config` is set. Clean up stale docstring. | +| `scripts/llama-stack-entrypoint.sh` | No functional change — the Python CLI already auto-detects. Update the comment to document both modes. | +| `test.containerfile` | Copy `src/data/` into `/opt/app-root/data/` so `load_default_baseline()` resolves inside the LS container. | +| `docker-compose.yaml` | Provide a unified-mode variant (either a new compose file or env-var-switched mount list). Legacy compose continues to work. | + +### Insertion point detail + +**`synthesize_configuration` pipeline** (the core new function): + +1. Retrieve `unified = lcs_config["llama_stack"]["config"]` — raise if absent. +2. Baseline: if `unified.profile` set → load that file. Else if + `unified.baseline == "empty"` → `{}`. Else → `default_baseline` arg or + `load_default_baseline()`. +3. Run `dedupe_providers_vector_io` on the baseline. +4. Apply existing enrichment: `enrich_byok_rag`, `enrich_solr` (Azure + Entra ID intentionally stays separate because it's a `.env` + side-effect, not an `ls_config` mutation). +5. If `unified.inference` present → `apply_high_level_inference`. +6. If `unified.native_override` non-empty → + `deep_merge_list_replace(ls_config, native_override)`. +7. `dedupe_providers_vector_io` again for good measure. +8. Return the final dict. + +**`_load_library_client` fork point** (in `src/client.py`): + +```python +if config.config is not None: + self._config_path = self._synthesize_library_config() +elif config.library_client_config_path is not None: + self._config_path = self._enrich_library_config(config.library_client_config_path) +else: + raise ValueError(...) # caught by the validator at load time; belt-and-suspenders here +``` + +### Config pattern + +All new config classes extend `ConfigurationBase` (`extra="forbid"`). +Use `Field()` with defaults, title, and description for every attribute. +Cross-field validation in `UnifiedLlamaStackConfig` is not currently +needed — the precedence is strictly ordered and handled by the +synthesizer, not by the model. + +Example config files live in `examples/profiles/` (two reference +profiles — one remote-provider, one inline-provider) and in +`examples/lightspeed-stack-unified.yaml` as the canonical "unified mode" +reference. + +### Test patterns + +- Framework: pytest + pytest-mock. Unit tests live in + `tests/unit/test_llama_stack_synthesize.py` (synthesizer + migration) + and `tests/unit/models/config/test_llama_stack_configuration.py` + (schema validation). +- Merge semantics: parametric tests over scalar / map / list / + type-mismatch / precedence cases. +- Round-trip test: migrate → synthesize → assert dict equality with the + original `run.yaml`. Pattern already live in + `test_migrate_then_synthesize_reproduces_run_yaml`. +- Schema validation tests: mutual exclusion, remote URL + config, + library mode + config without legacy path. +- Feature-specific: provider_type map completeness test asserts every + `Literal` value on `UnifiedInferenceProvider.type` has a + `PROVIDER_TYPE_MAP` entry. +- e2e behave tests: migrate `tests/e2e/configuration/**` configs to + unified form as part of LCORE-???? (test migration JIRA). + +## Open Questions for Future Work + +- **Smart migration mode** (`--migrate-config --smart`): factoring an + existing `run.yaml` into high-level sections rather than dumping to + `native_override`. Valuable ergonomic win; deferred because the + factoring rules require careful design per provider type. +- **Additional high-level sections** beyond `inference` — `storage`, + `safety`, `tools`, `vector_stores`, etc. Add as real demand appears, + not speculatively. +- **User-supplied profile directory**: `profile_dir: /etc/lcore/profiles/` + with name-based lookup. Deferred to v2. +- **LS process supervision** (restart on crash, signal propagation, + merged logs) — covered by LCORE-777 / LCORE-778, not this feature. +- **Dynamic reconfig / hot-reload** (live `POST /v1/rag` that adds a BYOK + RAG without restart) — covered by LCORE-781, not this feature. Llama + Stack's lack of native hot-reload means any implementation requires + supervised restart, which is out of scope here. +- **`config_format_version`** as an explicit schema version, accepted + but not required. Will become load-bearing the first time the unified + schema undergoes a real breaking change. +- **Validation pre-flight against the Llama Stack schema**: today LCORE + only validates its own schema; LS validates its own at startup. + Introducing a pre-flight validator would catch bad synthesis earlier + but creates a heavy dependency on LS internals. + +## Changelog + +| Date | Change | Reason | +|---|---|---| +| 2026-04-23 | Initial version | Spike completion | + +## Appendix A — Worked example: legacy → unified migration + +Given legacy: + +```yaml +# run.yaml +version: 2 +apis: [agents, inference, vector_io, ...] +providers: + inference: + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} + allowed_models: ["${env.E2E_OPENAI_MODEL:=gpt-4o-mini}"] +# ... more ... +``` + +```yaml +# lightspeed-stack.yaml +name: LCS +llama_stack: + use_as_library_client: true + library_client_config_path: ./run.yaml +# ... rest ... +``` + +Run: + +```bash +lightspeed-stack --migrate-config \ + --run-yaml run.yaml \ + -c lightspeed-stack.yaml \ + --migrate-output lightspeed-stack-unified.yaml +``` + +Produces: + +```yaml +# lightspeed-stack-unified.yaml +name: LCS +llama_stack: + use_as_library_client: true + # library_client_config_path is REMOVED + config: + baseline: empty + native_override: + version: 2 + apis: [agents, inference, vector_io, ...] + providers: + inference: + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} + allowed_models: ["${env.E2E_OPENAI_MODEL:=gpt-4o-mini}"] + # ... rest of run.yaml content under native_override ... +# ... rest of lightspeed-stack.yaml content ... +``` + +Operator uses the unified file directly and can delete the original +`run.yaml`. Subsequent re-expression (moving from `native_override` into +high-level sections) is optional and per-deployment. + +## Appendix B — Reference profile example + +```yaml +# examples/profiles/openai-remote.yaml +# A minimal profile for an OpenAI-backed remote Llama Stack. +# Referenced via `llama_stack.config.profile: examples/profiles/openai-remote.yaml`. +version: 2 +apis: [agents, inference, safety, tool_runtime, vector_io] +providers: + inference: + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} + allowed_models: ["${env.OPENAI_MODEL:=gpt-4o-mini}"] + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers +# ... the rest is the same shape as a working run.yaml ... +``` diff --git a/docs/design/llama-stack-config-merge/poc-evidence/library-mode/README.md b/docs/design/llama-stack-config-merge/poc-evidence/library-mode/README.md new file mode 100644 index 000000000..6bfa7f5e9 --- /dev/null +++ b/docs/design/llama-stack-config-merge/poc-evidence/library-mode/README.md @@ -0,0 +1,26 @@ +# Library-mode PoC evidence + +Command: +```bash +export OPENAI_API_KEY= +export E2E_OPENAI_MODEL=gpt-4o-mini +uv run lightspeed-stack -c docs/design/llama-stack-config-merge/poc-evidence/lightspeed-stack-unified-library.yaml +``` + +## What the unified config does + +- `llama_stack.config.profile: /abs/path/to/tests/e2e/configs/run-ci.yaml` — baseline loaded from the CI profile +- `llama_stack.config.native_override.safety.default_shield_id: llama-guard` — override proves merge works + +## Evidence + +- `synthesized-run.yaml` — the full run.yaml LCORE produced from the unified config +- `query-response.json` — a successful `/v1/query` round-trip + +## Proves + +- `llama_stack.library_client_config_path` was NOT used (no external run.yaml needed) +- `llama_stack.config.profile` was used as the synthesis baseline (path resolution works with absolute paths) +- `llama_stack.config.native_override` was merged onto the baseline +- `AsyncLlamaStackAsLibraryClient` accepts the synthesized file path (answered item #24: file-only, not dict) +- `/v1/query` succeeded end-to-end through the synthesized stack diff --git a/docs/design/llama-stack-config-merge/poc-evidence/library-mode/query-response.json b/docs/design/llama-stack-config-merge/poc-evidence/library-mode/query-response.json new file mode 100644 index 000000000..5664cbd00 --- /dev/null +++ b/docs/design/llama-stack-config-merge/poc-evidence/library-mode/query-response.json @@ -0,0 +1 @@ +{"conversation_id":"976ef32527283085ba2f1d0cfb4c16d97071bf64391a8200","response":"The three primary colors are red, blue, and yellow.","rag_chunks":[],"referenced_documents":[],"truncated":false,"input_tokens":24,"output_tokens":12,"available_quotas":{},"tool_calls":[],"tool_results":[]} \ No newline at end of file diff --git a/docs/design/llama-stack-config-merge/poc-evidence/library-mode/synthesized-run.yaml b/docs/design/llama-stack-config-merge/poc-evidence/library-mode/synthesized-run.yaml new file mode 100644 index 000000000..34e3e1fc9 --- /dev/null +++ b/docs/design/llama-stack-config-merge/poc-evidence/library-mode/synthesized-run.yaml @@ -0,0 +1,148 @@ +apis: + - agents + - batches + - datasetio + - eval + - files + - inference + - safety + - scoring + - tool_runtime + - vector_io +benchmarks: [] +datasets: [] +image_name: starter +providers: + agents: + - config: + persistence: + agent_state: + backend: kv_default + namespace: agents_state + responses: + backend: sql_default + table_name: agents_responses + provider_id: meta-reference + provider_type: inline::meta-reference + batches: + - config: + kvstore: + backend: kv_default + namespace: batches_store + provider_id: reference + provider_type: inline::reference + datasetio: + - config: + kvstore: + backend: kv_default + namespace: huggingface_datasetio + provider_id: huggingface + provider_type: remote::huggingface + - config: + kvstore: + backend: kv_default + namespace: localfs_datasetio + provider_id: localfs + provider_type: inline::localfs + eval: + - config: + kvstore: + backend: kv_default + namespace: eval_store + provider_id: meta-reference + provider_type: inline::meta-reference + files: + - config: + metadata_store: + backend: sql_default + table_name: files_metadata + storage_dir: ~/.llama/storage/files + provider_id: meta-reference-files + provider_type: inline::localfs + inference: + - config: + allowed_models: + - ${env.E2E_OPENAI_MODEL:=gpt-4o-mini} + api_key: ${env.OPENAI_API_KEY} + provider_id: openai + provider_type: remote::openai + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers + safety: + - config: + excluded_categories: [] + provider_id: llama-guard + provider_type: inline::llama-guard + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + - config: {} + provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - config: + openai_api_key: '********' + provider_id: braintrust + provider_type: inline::braintrust + tool_runtime: + - config: {} + provider_id: rag-runtime + provider_type: inline::rag-runtime + - config: {} + provider_id: model-context-protocol + provider_type: remote::model-context-protocol + vector_io: [] +registered_resources: + benchmarks: [] + datasets: [] + models: + - metadata: + embedding_dimension: 768 + model_id: all-mpnet-base-v2 + model_type: embedding + provider_id: sentence-transformers + provider_model_id: all-mpnet-base-v2 + scoring_fns: [] + shields: + - provider_id: llama-guard + provider_shield_id: openai/gpt-4o-mini + shield_id: llama-guard + tool_groups: + - provider_id: rag-runtime + toolgroup_id: builtin::rag + vector_stores: [] +safety: + default_shield_id: llama-guard +scoring_fns: [] +server: + port: 8321 +storage: + backends: + kv_default: + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/kv_store.db} + type: kv_sqlite + sql_default: + db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} + type: sql_sqlite + stores: + conversations: + backend: sql_default + table_name: openai_conversations + inference: + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + table_name: inference_store + metadata: + backend: kv_default + namespace: registry + prompts: + backend: kv_default + namespace: prompts +vector_stores: + default_embedding_model: + model_id: all-mpnet-base-v2 + provider_id: sentence-transformers + default_provider_id: faiss +version: 2 diff --git a/docs/design/llama-stack-config-merge/poc-evidence/lightspeed-stack-unified-library.yaml b/docs/design/llama-stack-config-merge/poc-evidence/lightspeed-stack-unified-library.yaml new file mode 100644 index 000000000..a75ad5bf6 --- /dev/null +++ b/docs/design/llama-stack-config-merge/poc-evidence/lightspeed-stack-unified-library.yaml @@ -0,0 +1,33 @@ +name: Lightspeed Core Service (LCS) - Unified PoC +service: + host: 0.0.0.0 + port: 8080 + base_url: http://localhost:8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +# Unified mode: no `library_client_config_path`. Operational LS config is +# synthesized by LCORE from `llama_stack.config` below. +llama_stack: + use_as_library_client: true + config: + # Use the CI-friendly baseline via `profile` (no EXTERNAL_PROVIDERS_DIR + # env var required). Equivalent to what tests/e2e/configs/run-ci.yaml + # provides; this exercises the `profile:` path of the synthesizer. + profile: /home/msvistun/repos/lightspeed/stack/tests/e2e/configs/run-ci.yaml + # Small native_override: prove overrides take effect end-to-end. + native_override: + safety: + default_shield_id: llama-guard +user_data_collection: + feedback_enabled: false + feedback_storage: "/tmp/lcore-836-poc/feedback" + transcripts_enabled: false + transcripts_storage: "/tmp/lcore-836-poc/transcripts" +conversation_cache: + type: "sqlite" + sqlite: + db_path: "/tmp/lcore-836-poc/conversation-cache.db" +authentication: + module: "noop" From 6e0a95bc5a931845950d61275a277322cc3607a4 Mon Sep 17 00:00:00 2001 From: Maxim Svistunov Date: Thu, 23 Apr 2026 15:29:47 +0200 Subject: [PATCH 3/3] LCORE-836 spike: add e2e-kickoff JIRA (feature files first, no implementation) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Incorporates reviewer request: the work on this feature kicks off with a Story that authors the behave `.feature` files for unified mode BEFORE the feature is implemented. The intent is to keep test-shape authorship free of implementation bias and to surface any architectural gaps early. Adds two JIRAs to the spike doc's proposed-JIRAs list, bringing the total from 7 to 9: 1. LCORE-???? (Story, inserted first) — E2E feature files for unified mode (no step implementation). Authors Gherkin scenarios against the spec doc's R1..R11 requirements. Explicitly forbids reading the implementation JIRAs or the synthesizer code while authoring. behave marks resulting steps as undefined; test-e2e still green (undefined scenarios are reported, not failed). 2. LCORE-???? (Task, inserted after the migrate-e2e-configs Story) — Implement behave step definitions for the kickoff feature files. Takes the Gherkin as-is (does not water down the tests to fit implementation). Blocked by the kickoff ticket plus the feature- implementation tickets (schema + synthesizer, migration tool, LS container entrypoint). Filing both tickets together (rather than filing only the kickoff and "letting the step-def ticket appear later") makes the dependency chain explicit from the start and ensures the step-def work is not forgotten. No other JIRAs change scope. The PR template is updated to reflect the new count and to widen the "Full JIRA list" link range to cover both new sections. --- .../llama-stack-config-merge-spike.md | 125 +++++++++++++++++- 1 file changed, 124 insertions(+), 1 deletion(-) diff --git a/docs/design/llama-stack-config-merge/llama-stack-config-merge-spike.md b/docs/design/llama-stack-config-merge/llama-stack-config-merge-spike.md index c8db06ff4..83451b4ff 100644 --- a/docs/design/llama-stack-config-merge/llama-stack-config-merge-spike.md +++ b/docs/design/llama-stack-config-merge/llama-stack-config-merge-spike.md @@ -240,7 +240,80 @@ available. Not a decision; a fact to note in the spec doc. ## Proposed JIRAs Each JIRA's agentic-tool instruction points to the spec doc -(`llama-stack-config-merge.md`), the permanent reference. +(`llama-stack-config-merge.md`), the permanent reference. The first JIRA +(authoring e2e feature files) is the intentional kickoff — it happens +before feature implementation so the test shape is not influenced by +implementation choices. + + + +### LCORE-???? E2E feature files for unified mode (no step implementation) + +**User story**: As a Lightspeed Core e2e engineer, I want the behave +feature files for unified-mode scenarios written before the feature +implementation lands, so that the test shape reflects the feature's +intended behavior rather than the chosen implementation, and any +architectural gaps surface early. + +**Description**: Author behave `.feature` files under `tests/e2e/features/` +that describe the behaviors required of unified mode. Step definitions +(Python glue) are explicitly **not** part of this ticket — they are +covered by a later sibling ticket (LCORE-???? — Implement step +definitions). The feature files can be submitted for review and land +before implementation of the feature itself begins. + +**Scope**: +- `.feature` files covering, at minimum, these R1–R11 surfaces from the + spec doc: + - Boot LCORE with unified `lightspeed-stack.yaml` (no external + `run.yaml`); `/liveness`, `/readiness`, and `/v1/query` succeed. + - Boot LCORE with legacy config + (`library_client_config_path` + external `run.yaml`); same result. + - Setting both `llama_stack.config` and + `llama_stack.library_client_config_path` fails at config-load time + with a clear error that mentions `--migrate-config`. + - Migration tool: `lightspeed-stack --migrate-config ...` produces a + unified file that drives equivalent Llama Stack behavior. + - `native_override` deep-merges onto the baseline with list + replacement (tested on a scalar key and a list key). + - `profile:` path (absolute and relative-to-config-dir) loads the + referenced baseline. + - Secrets appear as `${env.FOO}` references in the synthesized + `run.yaml` on disk; never resolved to raw values. + - Legacy mode emits a one-line deprecation WARN at startup; unified + mode does not. +- Additions to `tests/e2e/test_list.txt` so behave discovers the new + files. +- Gherkin scenarios authored from the spec doc (`R1..R11`) only; author + must avoid reading the implementation JIRAs' scope sections while + drafting scenarios. + +**Acceptance criteria**: +- behave parses every new `.feature` file without syntax errors. +- behave marks all new scenario steps as `undefined` (step definitions + land in LCORE-????). +- `uv run make test-e2e` remains green (new scenarios are skipped or + reported undefined, not failing). +- Any ambiguity or architectural tension uncovered while authoring is + captured either as a comment in the spec doc or as a new sub-JIRA. + +**Blocks**: LCORE-???? (Implement behave step definitions for unified +mode). + +**Agentic tool instruction**: +```text +Read "Requirements" (R1..R11) and "Use Cases" in +docs/design/llama-stack-config-merge/llama-stack-config-merge.md. +Do NOT read the other JIRAs' scope sections or the synthesizer/schema +implementation code while authoring; the point of this ticket is to +produce feature files uncontaminated by implementation detail. +Key files to create: tests/e2e/features/unified-mode-*.feature plus +additions to tests/e2e/test_list.txt. Do NOT create step definitions in +tests/e2e/features/steps/. +To verify: `uv run behave --dry-run tests/e2e/features/unified-mode-*.feature` +parses successfully; `uv run make test-e2e` still green with the new +scenarios reported as undefined. +``` @@ -380,6 +453,56 @@ Key files: tests/e2e/configs/, tests/e2e/configuration/, tests/e2e-prow/rhoai/. To verify: `uv run make test-e2e` green. ``` + + +### LCORE-???? Implement behave step definitions for unified-mode feature files + +**Description**: Implement the Python step definitions +(`@given`/`@when`/`@then` functions) under `tests/e2e/features/steps/` +for the `.feature` files authored in LCORE-???? (E2E feature files +kickoff). After this ticket lands, the scenarios transition from +`undefined` to fully executing. + +The feature files are taken as-is — do not modify the Gherkin to make +implementation easier. If a scenario cannot be implemented faithfully, +raise it against the spec doc (and possibly back to LCORE-???? kickoff) +rather than quietly weakening the test. + +**Scope**: +- Step definitions for every step pattern in the new `.feature` files. +- Fixtures or helpers under `tests/e2e/features/steps/` as needed + (e.g., temp-dir config authoring, subprocess start/stop for LCORE, + HTTP client helpers reusing existing `tests/e2e/` patterns). +- CI wiring so the new scenarios run as part of `uv run make test-e2e`. + +**Acceptance criteria**: +- behave reports zero `undefined` steps across the new `.feature` + files. +- `uv run make test-e2e` runs the new scenarios and they pass. +- No Gherkin edit was made to accommodate implementation constraints + (or if any edit was made, it is documented in a PR comment with + explicit rationale). + +**Blocked by**: +- LCORE-???? (E2E feature files for unified mode — the `.feature` + files being implemented against). +- LCORE-???? (Unified schema + synthesizer), LCORE-???? + (Migration tool), LCORE-???? (LS container entrypoint + deployment) + — the feature under test must exist. + +**Agentic tool instruction**: +```text +Read "Architecture" and "Requirements" in +docs/design/llama-stack-config-merge/llama-stack-config-merge.md. +Key files to create: tests/e2e/features/steps/unified-mode*.py (or +extend existing step-definition modules if patterns reuse cleanly). +Do not modify tests/e2e/features/unified-mode-*.feature — take the +Gherkin as-is. If a scenario genuinely cannot be implemented faithfully, +file a sub-ticket rather than changing the Gherkin quietly. +To verify: `uv run make test-e2e` runs every new scenario green and +behave reports zero undefined steps. +``` + ### LCORE-???? Docs migration to unified mode as primary