From 323fd30ba845de0e8342a19ee73844ae95eb7108 Mon Sep 17 00:00:00 2001 From: Tristen Pierson Date: Fri, 1 May 2026 07:42:47 -0400 Subject: [PATCH 1/3] feat(cli): BYOE endpoints store + 'specsmith endpoints' group (REQ-142) Phase 1 of the Bring-Your-Own-Endpoint sprint. Adds a generic OpenAI-v1-compatible endpoint registry so users can register self-hosted vLLM, llama.cpp server, LM Studio, and TGI backends and pick between them. - src/specsmith/agent/endpoints.py: Endpoint / EndpointAuth / EndpointStore / EndpointHealth dataclasses, schema_version=1, JSON persistence at ~/.specsmith/endpoints.json (chmod 600), token resolution dispatch (none / bearer-inline / bearer-env / bearer-keyring), /v1/models health probe with TLS verify toggle. - src/specsmith/cli.py: 'specsmith endpoints' group with add / list / remove / default / test / models subcommands. Inline-token redaction in --json output, optional bearer-keyring storage with hidden-input prompt, --purge-keyring on remove, --set-default on add. - tests/test_endpoints_store.py + tests/test_endpoints_cli.py: 38 new tests covering validation, round-trip, redaction, token resolution dispatch, and /v1/models health against an in-process fake server. - tests/fixtures/api_surface.json: registered 'endpoints' as a top-level command for REQ-140 stability. - docs/site/endpoints.md: BYOE walkthrough, auth strategy table, security notes, CLI reference. Validation: ruff lint clean, ruff format clean, mypy strict clean for the new module, pytest 66/66 passing across the new suites + the existing api-surface stability test. Co-Authored-By: Oz --- docs/site/endpoints.md | 117 ++++++++ src/specsmith/agent/endpoints.py | 493 +++++++++++++++++++++++++++++++ src/specsmith/cli.py | 326 ++++++++++++++++++++ tests/fixtures/api_surface.json | 1 + tests/test_endpoints_cli.py | 244 +++++++++++++++ tests/test_endpoints_store.py | 350 ++++++++++++++++++++++ 6 files changed, 1531 insertions(+) create mode 100644 docs/site/endpoints.md create mode 100644 src/specsmith/agent/endpoints.py create mode 100644 tests/test_endpoints_cli.py create mode 100644 tests/test_endpoints_store.py diff --git a/docs/site/endpoints.md b/docs/site/endpoints.md new file mode 100644 index 0000000..25aa0f6 --- /dev/null +++ b/docs/site/endpoints.md @@ -0,0 +1,117 @@ +# Bring-Your-Own-Endpoint (BYOE) + +Specsmith ships first-class support for self-hosted OpenAI-v1-compatible +LLM servers (vLLM, llama.cpp `server`, LM Studio, TGI, +text-generation-webui, …). Every endpoint you register can be selected +per session via `--endpoint ` on `specsmith run`, `chat`, and +`serve` (PR-2). + +## Quick start + +Register a vLLM running on your LAN: + +```sh +specsmith endpoints add \ + --id home-vllm \ + --name "Home vLLM" \ + --base-url http://10.0.0.4:8000/v1 \ + --default-model Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-Int8 \ + --auth none \ + --set-default + +specsmith endpoints test home-vllm +``` + +Once the test reports `ok`, run an agent against it: + +```sh +specsmith run --endpoint home-vllm "summarise the last commit" +``` + +## Storage layout + +All endpoints live in `~/.specsmith/endpoints.json` (override with +`SPECSMITH_HOME`). The on-disk schema is versioned: + +```json +{ + "schema_version": 1, + "default_endpoint_id": "home-vllm", + "endpoints": [ + { + "id": "home-vllm", + "name": "Home vLLM", + "base_url": "http://10.0.0.4:8000/v1", + "auth": {"kind": "bearer-keyring", + "keyring_service": "specsmith", + "keyring_user": "endpoint:home-vllm"}, + "default_model": "Qwen/Qwen2.5-Coder-32B", + "verify_tls": true, + "tags": ["local", "coder"], + "created_at": "2026-05-01T11:30:17Z" + } + ] +} +``` + +The file is written `chmod 600` on POSIX. Token bytes for the inline +strategy are the only secret material that ever lands in this file — +the keyring and env-var strategies leave it secret-free. + +## Auth strategies + +| Kind | Where the token lives | When to use | +|------------------|----------------------------------------------------|-------------| +| `none` | nowhere — request is unauthenticated | trusted LAN, open vLLM dev box | +| `bearer-inline` | `endpoints.json` (plaintext, `chmod 600`) | quick scratch setups where keyring is unavailable | +| `bearer-env` | the env var name you specify (`--token-env FOO`) | CI / containers / 12-factor deploys | +| `bearer-keyring` | OS keyring, indexed by `(service, user)` (default) | desktop / laptop installs (default) | + +The `list --json` output redacts inline tokens to `"***"`. The CLI +never logs token bytes to terminal output. + +## Health checks + +```sh +specsmith endpoints test home-vllm --json +specsmith endpoints models home-vllm --json +``` + +`test` calls `/models` with the resolved bearer token, prints +the latency in milliseconds, and reports up to 5 model ids. `models` +returns the full list. + +If the endpoint does not expose `/v1/models`, `test` will still return a +clear error message — set `default_model` manually and rely on the +session-level model dropdown instead. + +## CLI reference + +| Command | Notes | +|---------|-------| +| `specsmith endpoints add` | Register a new endpoint. `--auth bearer-keyring` (default) prompts for the secret without echo. | +| `specsmith endpoints list [--json]` | Tabular by default, JSON for IDE consumers. Tokens are redacted. | +| `specsmith endpoints remove [--purge-keyring]` | Remove the entry; pass `--purge-keyring` to also delete the saved token. | +| `specsmith endpoints default ` | Promote an existing endpoint to the default. | +| `specsmith endpoints test [] [--timeout 5]` | Probe `/v1/models`. Exits 1 on failure. | +| `specsmith endpoints models []` | List every model the endpoint advertises. | + +## Security notes + +* The store path is `chmod 600` on POSIX where supported. +* `verify_tls: false` is opt-in (`--no-verify-tls`); otherwise the CLI + verifies the certificate chain. Disabling it for an https endpoint is + documented per-endpoint in the on-disk JSON so a drift audit can spot + insecure configurations. +* `auth.kind == bearer-inline` is functional but not recommended. + Prefer `bearer-keyring` when the OS keyring is available; otherwise + use `bearer-env` and inject the secret through your shell or + container environment. + +## Roadmap + +* **PR-2 (this milestone):** wires `--endpoint ` into `run`, + `chat`, and `serve`, plus a new `_run_openai_compat` provider driver. +* **PR-3:** Endpoints tab and a per-session dropdown in the + `specsmith-vscode` extension. +* **PR-4:** 0.8.0 release notes + tag. diff --git a/src/specsmith/agent/endpoints.py b/src/specsmith/agent/endpoints.py new file mode 100644 index 0000000..b6b4a68 --- /dev/null +++ b/src/specsmith/agent/endpoints.py @@ -0,0 +1,493 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 BitConcepts, LLC. All rights reserved. +"""Bring-Your-Own-Endpoint (BYOE) data model and persistence (REQ-142). + +Specsmith historically hard-coded a closed provider list (``ollama`` / +``anthropic`` / ``openai`` / ``gemini`` / ``mistral``). This module +introduces a generic OpenAI-v1-compatible endpoint store so users can +register self-hosted vLLM, llama.cpp ``server``, LM Studio, TGI, or any +other ``/v1/chat/completions``-shaped backend and pick between several +side-by-side. + +Storage layout (``~/.specsmith/endpoints.json``): + +.. code-block:: json + + { + "schema_version": 1, + "default_endpoint_id": "home-vllm", + "endpoints": [ + { + "id": "home-vllm", + "name": "Home vLLM", + "base_url": "http://10.0.0.4:8000/v1", + "auth": {"kind": "bearer-keyring", + "keyring_service": "specsmith", + "keyring_user": "endpoint:home-vllm"}, + "default_model": "Qwen/Qwen2.5-Coder-32B", + "verify_tls": true, + "tags": ["local", "coder"], + "created_at": "2026-05-01T11:30:17Z" + } + ] + } + +Tokens are NEVER printed verbatim by anything in this module; ``list_all`` +serialisation routes through :func:`Endpoint.to_public_dict` which +redacts inline tokens to ``"***"``. +""" + +from __future__ import annotations + +import json +import os +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +SCHEMA_VERSION = 1 +DEFAULT_KEYRING_SERVICE = "specsmith" + +VALID_AUTH_KINDS = ("none", "bearer-inline", "bearer-env", "bearer-keyring") + + +class EndpointError(RuntimeError): + """Raised for user-facing endpoint errors (validation, missing token, ...).""" + + +# --------------------------------------------------------------------------- +# Data model +# --------------------------------------------------------------------------- + + +@dataclass +class EndpointAuth: + """Discriminated-union auth metadata. + + ``kind`` is one of: + + * ``none`` — no Authorization header (e.g. open vLLM on a trusted LAN). + * ``bearer-inline`` — token stored verbatim in ``endpoints.json``. + Only used when the user explicitly opts in; the on-disk plaintext + is documented as insecure. + * ``bearer-env`` — token resolved from ``token_env`` at call time. + * ``bearer-keyring`` — token stored in the OS keyring under + ``(keyring_service, keyring_user)``. + """ + + kind: str = "none" + token: str = "" # only set when kind == "bearer-inline" + token_env: str = "" # only set when kind == "bearer-env" + keyring_service: str = DEFAULT_KEYRING_SERVICE + keyring_user: str = "" + + def to_dict(self) -> dict[str, Any]: + """On-disk shape (token included for ``bearer-inline``).""" + out: dict[str, Any] = {"kind": self.kind} + if self.kind == "bearer-inline": + out["token"] = self.token + elif self.kind == "bearer-env": + out["token_env"] = self.token_env + elif self.kind == "bearer-keyring": + out["keyring_service"] = self.keyring_service + out["keyring_user"] = self.keyring_user + return out + + def to_public_dict(self) -> dict[str, Any]: + """Redacted shape — never returns inline token bytes.""" + out: dict[str, Any] = {"kind": self.kind} + if self.kind == "bearer-inline": + out["token"] = "***" + elif self.kind == "bearer-env": + out["token_env"] = self.token_env + elif self.kind == "bearer-keyring": + out["keyring_service"] = self.keyring_service + out["keyring_user"] = self.keyring_user + return out + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> EndpointAuth: + kind = str(raw.get("kind") or "none").strip() + if kind not in VALID_AUTH_KINDS: + raise EndpointError(f"invalid auth kind {kind!r}; expected one of {VALID_AUTH_KINDS}") + return cls( + kind=kind, + token=str(raw.get("token") or ""), + token_env=str(raw.get("token_env") or ""), + keyring_service=str(raw.get("keyring_service") or DEFAULT_KEYRING_SERVICE), + keyring_user=str(raw.get("keyring_user") or ""), + ) + + +@dataclass +class Endpoint: + """A single OpenAI-v1-compatible endpoint registered for use with specsmith.""" + + id: str + name: str + base_url: str + auth: EndpointAuth = field(default_factory=EndpointAuth) + default_model: str = "" + verify_tls: bool = True + tags: list[str] = field(default_factory=list) + created_at: str = "" + + # ── Validation ───────────────────────────────────────────────────────── + + def validate(self) -> None: + """Raise :class:`EndpointError` on structural problems.""" + if not self.id or not self.id.strip(): + raise EndpointError("endpoint id must be non-empty") + if any(c.isspace() for c in self.id): + raise EndpointError(f"endpoint id {self.id!r} must not contain whitespace") + if not self.base_url.startswith(("http://", "https://")): + raise EndpointError( + f"endpoint base_url {self.base_url!r} must start with http:// or https://" + ) + if self.auth.kind == "bearer-env" and not self.auth.token_env: + raise EndpointError("auth.kind == 'bearer-env' requires a non-empty token_env") + if self.auth.kind == "bearer-keyring" and not self.auth.keyring_user: + raise EndpointError( + "auth.kind == 'bearer-keyring' requires a keyring_user (defaults to endpoint:)" + ) + + # ── Token resolution ─────────────────────────────────────────────────── + + def resolve_token(self) -> str | None: + """Return the bearer token for this endpoint, or ``None`` for unauthenticated. + + Order of resolution mirrors :data:`EndpointAuth.kind`. Errors are + converted to :class:`EndpointError` so callers can surface a clean + message instead of a stack trace. + """ + kind = self.auth.kind + if kind == "none": + return None + if kind == "bearer-inline": + return self.auth.token or None + if kind == "bearer-env": + value = os.environ.get(self.auth.token_env, "").strip() + if not value: + raise EndpointError( + f"endpoint {self.id!r} expects token in env var " + f"{self.auth.token_env!r}, but it is unset" + ) + return value + if kind == "bearer-keyring": + try: + import keyring + except Exception as exc: # noqa: BLE001 + raise EndpointError( + "keyring is not available — install python-keyring or " + "switch the endpoint to --auth bearer-env" + ) from exc + try: + value = keyring.get_password(self.auth.keyring_service, self.auth.keyring_user) + except Exception as exc: # noqa: BLE001 + raise EndpointError(f"keyring lookup failed: {exc}") from exc + if not value: + raise EndpointError( + f"endpoint {self.id!r} has no token stored in keyring " + f"({self.auth.keyring_service}/{self.auth.keyring_user})" + ) + return str(value) + raise EndpointError(f"unknown auth kind {kind!r}") + + # ── Health / discovery ───────────────────────────────────────────────── + + def health(self, *, timeout: float = 5.0) -> EndpointHealth: + """Probe ``/models`` and return a structured result. + + Network and HTTP errors are caught — the returned record always has + ``ok`` populated. ``models`` is empty when the endpoint does not + expose ``/models``; that is not an error in itself. + """ + import urllib.error + import urllib.request + + url = self.base_url.rstrip("/") + "/models" + req = urllib.request.Request(url) # noqa: S310 - user-supplied + try: + token = self.resolve_token() + except EndpointError as exc: + return EndpointHealth( + ok=False, latency_ms=0.0, models=[], error=str(exc), status_code=None + ) + if token: + req.add_header("Authorization", f"Bearer {token}") + start = time.perf_counter() + try: + ctx = None + if not self.verify_tls and url.startswith("https://"): + import ssl + + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + with urllib.request.urlopen( # noqa: S310 - user-supplied + req, timeout=timeout, context=ctx + ) as resp: + latency_ms = (time.perf_counter() - start) * 1000.0 + payload = json.loads(resp.read().decode("utf-8")) + models = _extract_model_ids(payload) + return EndpointHealth( + ok=True, + latency_ms=latency_ms, + models=models, + error="", + status_code=int(resp.status), + ) + except urllib.error.HTTPError as exc: + return EndpointHealth( + ok=False, + latency_ms=(time.perf_counter() - start) * 1000.0, + models=[], + error=f"HTTP {exc.code}", + status_code=int(exc.code), + ) + except Exception as exc: # noqa: BLE001 + return EndpointHealth( + ok=False, + latency_ms=(time.perf_counter() - start) * 1000.0, + models=[], + error=str(exc), + status_code=None, + ) + + # ── Serialisation ────────────────────────────────────────────────────── + + def to_dict(self) -> dict[str, Any]: + return { + "id": self.id, + "name": self.name, + "base_url": self.base_url, + "auth": self.auth.to_dict(), + "default_model": self.default_model, + "verify_tls": bool(self.verify_tls), + "tags": list(self.tags), + "created_at": self.created_at, + } + + def to_public_dict(self) -> dict[str, Any]: + return { + "id": self.id, + "name": self.name, + "base_url": self.base_url, + "auth": self.auth.to_public_dict(), + "default_model": self.default_model, + "verify_tls": bool(self.verify_tls), + "tags": list(self.tags), + "created_at": self.created_at, + } + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> Endpoint: + return cls( + id=str(raw.get("id") or "").strip(), + name=str(raw.get("name") or "").strip(), + base_url=str(raw.get("base_url") or "").strip(), + auth=EndpointAuth.from_dict(raw.get("auth") or {}), + default_model=str(raw.get("default_model") or "").strip(), + verify_tls=bool(raw.get("verify_tls", True)), + tags=[str(t) for t in (raw.get("tags") or [])], + created_at=str(raw.get("created_at") or ""), + ) + + +@dataclass +class EndpointHealth: + """Structured result of :meth:`Endpoint.health`.""" + + ok: bool + latency_ms: float + models: list[str] + error: str = "" + status_code: int | None = None + + def to_dict(self) -> dict[str, Any]: + return { + "ok": self.ok, + "latency_ms": round(self.latency_ms, 2), + "models": list(self.models), + "error": self.error, + "status_code": self.status_code, + } + + +def _extract_model_ids(payload: Any) -> list[str]: + """Pull a list of model id strings out of an OpenAI ``/v1/models`` body. + + Tolerates the two common shapes (``{"data": [{"id": ...}]}`` from real + OpenAI / vLLM and ``{"models": [...]}`` used by some proxies). + """ + out: list[str] = [] + if isinstance(payload, dict): + candidates = payload.get("data") or payload.get("models") or [] + if isinstance(candidates, list): + for item in candidates: + if isinstance(item, dict) and "id" in item: + out.append(str(item["id"])) + elif isinstance(item, str): + out.append(item) + return out + + +# --------------------------------------------------------------------------- +# Store +# --------------------------------------------------------------------------- + + +def default_store_path() -> Path: + """Resolve ``~/.specsmith/endpoints.json``, honouring ``SPECSMITH_HOME``.""" + base = os.environ.get("SPECSMITH_HOME", "").strip() + home = Path(base) if base else Path.home() / ".specsmith" + return home / "endpoints.json" + + +@dataclass +class EndpointStore: + """Read/write wrapper around ``~/.specsmith/endpoints.json``. + + Tokens are never logged. Inline tokens (``auth.kind == "bearer-inline"``) + land in the JSON unchanged, but :meth:`list_public` redacts them. The + keyring-backed and env-backed paths never store secrets in the JSON at + all. + """ + + path: Path + schema_version: int = SCHEMA_VERSION + default_endpoint_id: str = "" + endpoints: list[Endpoint] = field(default_factory=list) + + # ── I/O ──────────────────────────────────────────────────────────────── + + @classmethod + def load(cls, path: Path | None = None) -> EndpointStore: + target = path or default_store_path() + if not target.exists(): + return cls(path=target) + try: + raw = json.loads(target.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + raise EndpointError( + f"endpoints store at {target} is corrupted: {exc}. " + "Move it aside or fix the JSON to continue." + ) from exc + if not isinstance(raw, dict): + raise EndpointError(f"endpoints store at {target} must be a JSON object") + version = int(raw.get("schema_version") or 0) + if version != SCHEMA_VERSION: + raise EndpointError( + f"endpoints store at {target} uses schema_version={version}; " + f"this build of specsmith only understands {SCHEMA_VERSION}." + ) + endpoints_raw = raw.get("endpoints") or [] + if not isinstance(endpoints_raw, list): + raise EndpointError("endpoints store: 'endpoints' must be a list") + endpoints = [Endpoint.from_dict(item) for item in endpoints_raw] + return cls( + path=target, + schema_version=version, + default_endpoint_id=str(raw.get("default_endpoint_id") or ""), + endpoints=endpoints, + ) + + def save(self) -> None: + self.path.parent.mkdir(parents=True, exist_ok=True) + payload = { + "schema_version": self.schema_version, + "default_endpoint_id": self.default_endpoint_id, + "endpoints": [e.to_dict() for e in self.endpoints], + } + self.path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8") + # Best-effort lock-down on POSIX + import contextlib + + with contextlib.suppress(Exception): + self.path.chmod(0o600) + + # ── CRUD ─────────────────────────────────────────────────────────────── + + def add(self, endpoint: Endpoint, *, replace: bool = False) -> None: + endpoint.validate() + if not endpoint.created_at: + endpoint.created_at = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + existing = self._index(endpoint.id) + if existing is not None: + if not replace: + raise EndpointError( + f"endpoint {endpoint.id!r} already exists. Use --replace to overwrite." + ) + self.endpoints[existing] = endpoint + else: + self.endpoints.append(endpoint) + if not self.default_endpoint_id: + self.default_endpoint_id = endpoint.id + + def remove(self, endpoint_id: str) -> bool: + idx = self._index(endpoint_id) + if idx is None: + return False + self.endpoints.pop(idx) + if self.default_endpoint_id == endpoint_id: + self.default_endpoint_id = self.endpoints[0].id if self.endpoints else "" + return True + + def get(self, endpoint_id: str) -> Endpoint: + idx = self._index(endpoint_id) + if idx is None: + raise EndpointError(f"unknown endpoint id {endpoint_id!r}") + return self.endpoints[idx] + + def get_default(self) -> Endpoint | None: + if not self.default_endpoint_id: + return None + idx = self._index(self.default_endpoint_id) + if idx is None: + return None + return self.endpoints[idx] + + def set_default(self, endpoint_id: str) -> None: + if self._index(endpoint_id) is None: + raise EndpointError(f"unknown endpoint id {endpoint_id!r}") + self.default_endpoint_id = endpoint_id + + def list_all(self) -> list[Endpoint]: + return list(self.endpoints) + + def list_public(self) -> list[dict[str, Any]]: + return [e.to_public_dict() for e in self.endpoints] + + def resolve(self, endpoint_id: str | None) -> Endpoint: + """Return the named endpoint, or the default if ``endpoint_id`` is empty.""" + if endpoint_id: + return self.get(endpoint_id) + default = self.get_default() + if default is None: + raise EndpointError( + "no endpoint specified and no default is set. " + "Run `specsmith endpoints add ...` to register one." + ) + return default + + # ── Internals ────────────────────────────────────────────────────────── + + def _index(self, endpoint_id: str) -> int | None: + for i, e in enumerate(self.endpoints): + if e.id == endpoint_id: + return i + return None + + +__all__ = [ + "DEFAULT_KEYRING_SERVICE", + "Endpoint", + "EndpointAuth", + "EndpointError", + "EndpointHealth", + "EndpointStore", + "SCHEMA_VERSION", + "VALID_AUTH_KINDS", + "default_store_path", +] diff --git a/src/specsmith/cli.py b/src/specsmith/cli.py index 6c54f4f..cceefc6 100644 --- a/src/specsmith/cli.py +++ b/src/specsmith/cli.py @@ -4561,6 +4561,332 @@ def voice_status_cmd() -> None: raise SystemExit(2) +# --------------------------------------------------------------------------- +# specsmith endpoints — Bring-Your-Own-Endpoint store (REQ-142) +# --------------------------------------------------------------------------- + + +@main.group(name="endpoints") +def endpoints_group() -> None: + """Manage OpenAI-v1-compatible LLM endpoints (REQ-142). + + Lets you register one or more self-hosted backends (vLLM, llama.cpp + server, LM Studio, TGI, ...) and pick between them per session via + ``--endpoint `` on ``specsmith run`` / ``chat`` / ``serve``. + Stored at ``~/.specsmith/endpoints.json``; tokens default to the OS + keyring. + """ + + +def _resolve_keyring_user(endpoint_id: str, override: str) -> str: + return override.strip() or f"endpoint:{endpoint_id}" + + +@endpoints_group.command(name="add") +@click.option("--id", "endpoint_id", required=True, help="Stable identifier (no whitespace).") +@click.option("--name", default="", help="Human-readable display name (defaults to id).") +@click.option( + "--base-url", "base_url", required=True, help="OpenAI-v1 base URL, e.g. http://10.0.0.4:8000/v1" +) +@click.option("--default-model", default="", help="Optional default model id.") +@click.option( + "--auth", + "auth_kind", + type=click.Choice( + list( + __import__("specsmith.agent.endpoints", fromlist=["VALID_AUTH_KINDS"]).VALID_AUTH_KINDS + ) + ), + default="none", + show_default=True, + help="Auth strategy: none / bearer-inline / bearer-env / bearer-keyring.", +) +@click.option("--token", default="", help="Inline bearer token (only with --auth bearer-inline).") +@click.option("--token-env", default="", help="Env var name (only with --auth bearer-env).") +@click.option( + "--keyring-service", default="", help="Override the keyring service (default: 'specsmith')." +) +@click.option( + "--keyring-user", default="", help="Override the keyring user (default: 'endpoint:')." +) +@click.option( + "--no-verify-tls", + is_flag=True, + default=False, + help="Disable TLS certificate verification for this endpoint (insecure).", +) +@click.option("--tag", "tags", multiple=True, help="Optional free-form tag (repeatable).") +@click.option( + "--replace", + is_flag=True, + default=False, + help="Overwrite an existing endpoint with the same id.", +) +@click.option( + "--set-default", + is_flag=True, + default=False, + help="After saving, mark this endpoint as the default.", +) +@click.option("--json", "as_json", is_flag=True, default=False) +def endpoints_add( + endpoint_id: str, + name: str, + base_url: str, + default_model: str, + auth_kind: str, + token: str, + token_env: str, + keyring_service: str, + keyring_user: str, + no_verify_tls: bool, + tags: tuple[str, ...], + replace: bool, + set_default: bool, + as_json: bool, +) -> None: + """Register a new endpoint in ``~/.specsmith/endpoints.json``. + + For ``--auth bearer-keyring`` the token is prompted for (no echo) and + stored in the OS keyring via the existing :mod:`keyring` integration; + nothing secret lands in the JSON itself. + """ + import json as _json + + from specsmith.agent.endpoints import ( + DEFAULT_KEYRING_SERVICE, + Endpoint, + EndpointAuth, + EndpointError, + EndpointStore, + ) + + auth_token = token + if auth_kind == "bearer-keyring" and not token: + try: + auth_token = click.prompt( + f"Token for endpoint {endpoint_id!r} (will be stored in OS keyring)", + hide_input=True, + confirmation_prompt=False, + default="", + show_default=False, + ) + except click.Abort as exc: # pragma: no cover - interactive abort + raise SystemExit(2) from exc + if not auth_token: + console.print("[red]Refusing to store an empty keyring token.[/red]") + raise SystemExit(2) + + auth = EndpointAuth( + kind=auth_kind, + token=auth_token if auth_kind == "bearer-inline" else "", + token_env=token_env, + keyring_service=keyring_service or DEFAULT_KEYRING_SERVICE, + keyring_user=_resolve_keyring_user(endpoint_id, keyring_user) + if auth_kind == "bearer-keyring" + else keyring_user, + ) + endpoint = Endpoint( + id=endpoint_id.strip(), + name=name.strip() or endpoint_id.strip(), + base_url=base_url.strip(), + auth=auth, + default_model=default_model.strip(), + verify_tls=not no_verify_tls, + tags=list(tags), + ) + + store = EndpointStore.load() + try: + store.add(endpoint, replace=replace) + except EndpointError as exc: + console.print(f"[red]{exc}[/red]") + raise SystemExit(2) from exc + + if auth_kind == "bearer-keyring": + try: + import keyring # type: ignore[import-not-found] + + keyring.set_password(auth.keyring_service, auth.keyring_user, auth_token) + except Exception as exc: # noqa: BLE001 + console.print( + f"[yellow]Warning:[/yellow] keyring write failed ({exc}). " + "Endpoint metadata saved, but the token was not stored." + ) + + if set_default: + store.set_default(endpoint.id) + store.save() + + public = endpoint.to_public_dict() + if as_json: + click.echo( + _json.dumps( + {"endpoint": public, "default": store.default_endpoint_id}, + indent=2, + ) + ) + return + console.print( + f"[green]\u2713[/green] saved endpoint [bold]{endpoint.id}[/bold] " + f"({endpoint.base_url}, auth={auth_kind})" + ) + if store.default_endpoint_id == endpoint.id: + console.print(" [dim]marked as default.[/dim]") + + +@endpoints_group.command(name="list") +@click.option("--json", "as_json", is_flag=True, default=False) +def endpoints_list(as_json: bool) -> None: + """List every registered endpoint (tokens are redacted).""" + import json as _json + + from specsmith.agent.endpoints import EndpointStore + + store = EndpointStore.load() + items = store.list_public() + payload = {"default_endpoint_id": store.default_endpoint_id, "endpoints": items} + if as_json: + click.echo(_json.dumps(payload, indent=2)) + return + if not items: + console.print("[dim]No endpoints registered. Run `specsmith endpoints add ...`.[/dim]") + return + for item in items: + marker = "*" if item["id"] == store.default_endpoint_id else " " + console.print( + f"{marker} [bold]{item['id']}[/bold] {item['base_url']} " + f"[dim]auth={item['auth']['kind']}, model={item['default_model'] or '-'}[/dim]" + ) + + +@endpoints_group.command(name="remove") +@click.argument("endpoint_id") +@click.option( + "--purge-keyring", + is_flag=True, + default=False, + help="Also delete the bearer-keyring entry for this endpoint.", +) +@click.option("--json", "as_json", is_flag=True, default=False) +def endpoints_remove(endpoint_id: str, purge_keyring: bool, as_json: bool) -> None: + """Remove an endpoint by id. Exits 1 if the id is unknown.""" + import json as _json + + from specsmith.agent.endpoints import EndpointStore + + store = EndpointStore.load() + target = store.get(endpoint_id) if store._index(endpoint_id) is not None else None + removed = store.remove(endpoint_id) + if not removed: + console.print(f"[red]unknown endpoint id {endpoint_id!r}[/red]") + raise SystemExit(1) + if purge_keyring and target is not None and target.auth.kind == "bearer-keyring": + try: + import keyring # type: ignore[import-not-found] + + keyring.delete_password(target.auth.keyring_service, target.auth.keyring_user) + except Exception: # noqa: BLE001 + pass + store.save() + if as_json: + click.echo( + _json.dumps( + {"removed": endpoint_id, "default_endpoint_id": store.default_endpoint_id}, + indent=2, + ) + ) + return + console.print(f"[green]\u2713[/green] removed endpoint {endpoint_id!r}") + + +@endpoints_group.command(name="default") +@click.argument("endpoint_id") +def endpoints_default(endpoint_id: str) -> None: + """Mark an existing endpoint as the default for unqualified runs.""" + from specsmith.agent.endpoints import EndpointError, EndpointStore + + store = EndpointStore.load() + try: + store.set_default(endpoint_id) + except EndpointError as exc: + console.print(f"[red]{exc}[/red]") + raise SystemExit(1) from exc + store.save() + console.print(f"[green]\u2713[/green] default endpoint = {endpoint_id!r}") + + +@endpoints_group.command(name="test") +@click.argument("endpoint_id", required=False, default="") +@click.option("--timeout", type=float, default=5.0, help="Request timeout in seconds.") +@click.option("--json", "as_json", is_flag=True, default=False) +def endpoints_test(endpoint_id: str, timeout: float, as_json: bool) -> None: + """Probe ENDPOINT_ID's /models route. Defaults to the default endpoint.""" + import json as _json + + from specsmith.agent.endpoints import EndpointError, EndpointStore + + store = EndpointStore.load() + try: + endpoint = store.resolve(endpoint_id or None) + except EndpointError as exc: + console.print(f"[red]{exc}[/red]") + raise SystemExit(1) from exc + health = endpoint.health(timeout=timeout) + if as_json: + click.echo(_json.dumps({"id": endpoint.id, **health.to_dict()}, indent=2)) + else: + if health.ok: + console.print( + f"[green]\u2713[/green] {endpoint.id} ok in " + f"{int(health.latency_ms)} ms ({len(health.models)} models)" + ) + for model in health.models[:5]: + console.print(f" [dim]\u2022 {model}[/dim]") + if len(health.models) > 5: + console.print(f" [dim]... +{len(health.models) - 5} more[/dim]") + else: + console.print(f"[red]\u2717[/red] {endpoint.id} failed: {health.error}") + if not health.ok: + raise SystemExit(1) + + +@endpoints_group.command(name="models") +@click.argument("endpoint_id", required=False, default="") +@click.option("--timeout", type=float, default=5.0, help="Request timeout in seconds.") +@click.option("--json", "as_json", is_flag=True, default=False) +def endpoints_models(endpoint_id: str, timeout: float, as_json: bool) -> None: + """List every model the endpoint advertises via /v1/models.""" + import json as _json + + from specsmith.agent.endpoints import EndpointError, EndpointStore + + store = EndpointStore.load() + try: + endpoint = store.resolve(endpoint_id or None) + except EndpointError as exc: + console.print(f"[red]{exc}[/red]") + raise SystemExit(1) from exc + health = endpoint.health(timeout=timeout) + if not health.ok: + if as_json: + click.echo(_json.dumps({"id": endpoint.id, "error": health.error}, indent=2)) + else: + console.print(f"[red]\u2717[/red] {endpoint.id} failed: {health.error}") + raise SystemExit(1) + if as_json: + click.echo(_json.dumps({"id": endpoint.id, "models": health.models}, indent=2)) + return + if not health.models: + console.print(f"[yellow]\u2014[/yellow] {endpoint.id} returned no models.") + return + for model in health.models: + console.print(model) + + +main.add_command(endpoints_group) + + # --------------------------------------------------------------------------- # specsmith cloud spawn — client side of the receiver (REQ-136) # --------------------------------------------------------------------------- diff --git a/tests/fixtures/api_surface.json b/tests/fixtures/api_surface.json index 449b77f..6ba881c 100644 --- a/tests/fixtures/api_surface.json +++ b/tests/fixtures/api_surface.json @@ -20,6 +20,7 @@ "diff", "doctor", "drive", + "endpoints", "epistemic-audit", "exec", "export", diff --git a/tests/test_endpoints_cli.py b/tests/test_endpoints_cli.py new file mode 100644 index 0000000..5013579 --- /dev/null +++ b/tests/test_endpoints_cli.py @@ -0,0 +1,244 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 BitConcepts, LLC. All rights reserved. +"""CLI integration tests for `specsmith endpoints` (REQ-142, PR-1).""" + +from __future__ import annotations + +import http.server +import json +import socket +import threading +from pathlib import Path + +import pytest +from click.testing import CliRunner + +from specsmith.cli import main + + +@pytest.fixture(autouse=True) +def _no_auto_update(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("SPECSMITH_NO_AUTO_UPDATE", "1") + monkeypatch.setenv("SPECSMITH_PYPI_CHECKED", "1") + + +@pytest.fixture(autouse=True) +def _isolated_specsmith_home(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + """Redirect ``~/.specsmith`` so CLI invocations never touch the real one.""" + monkeypatch.setenv("SPECSMITH_HOME", str(tmp_path)) + + +def _runner_invoke(*args: str) -> object: + return CliRunner().invoke(main, list(args)) + + +def _free_port() -> int: + with socket.socket() as s: + s.bind(("127.0.0.1", 0)) + return int(s.getsockname()[1]) + + +class _FakeModelsHandler(http.server.BaseHTTPRequestHandler): + def log_message(self, *args: object, **kwargs: object) -> None: # noqa: D401 + return + + def do_GET(self) -> None: # noqa: N802 + if self.path == "/v1/models": + body = json.dumps( + {"object": "list", "data": [{"id": "fake-1"}, {"id": "fake-2"}]} + ).encode("utf-8") + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + else: + self.send_response(404) + self.end_headers() + + +@pytest.fixture +def fake_endpoint_server() -> object: + port = _free_port() + server = http.server.HTTPServer(("127.0.0.1", port), _FakeModelsHandler) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + try: + yield port + finally: + server.shutdown() + server.server_close() + + +def test_endpoints_help_lists_subcommands() -> None: + res = _runner_invoke("endpoints", "--help") + assert res.exit_code == 0 + for sub in ("add", "list", "remove", "default", "test", "models"): + assert sub in res.output + + +def test_endpoints_add_and_list_round_trip(tmp_path: Path) -> None: + res = _runner_invoke( + "endpoints", + "add", + "--id", + "home-vllm", + "--name", + "Home vLLM", + "--base-url", + "http://10.0.0.4:8000/v1", + "--default-model", + "qwen-coder", + "--auth", + "none", + "--json", + ) + assert res.exit_code == 0, res.output + payload = json.loads(res.output) + assert payload["endpoint"]["id"] == "home-vllm" + assert payload["default"] == "home-vllm" + + list_res = _runner_invoke("endpoints", "list", "--json") + assert list_res.exit_code == 0 + listed = json.loads(list_res.output) + assert listed["default_endpoint_id"] == "home-vllm" + assert listed["endpoints"][0]["id"] == "home-vllm" + # token must never leak even when no token was provided + assert "token" not in json.dumps(listed) or listed["endpoints"][0]["auth"]["kind"] == "none" + + +def test_endpoints_add_inline_token_redacts_in_list_output() -> None: + add_res = _runner_invoke( + "endpoints", + "add", + "--id", + "secured", + "--name", + "Secured", + "--base-url", + "https://lan.example.com/v1", + "--auth", + "bearer-inline", + "--token", + "sk-supersecret", + "--json", + ) + assert add_res.exit_code == 0, add_res.output + + list_res = _runner_invoke("endpoints", "list", "--json") + assert list_res.exit_code == 0 + body = list_res.output + assert "sk-supersecret" not in body + parsed = json.loads(body) + assert parsed["endpoints"][0]["auth"]["token"] == "***" + + +def test_endpoints_add_duplicate_id_exits_2() -> None: + base_args = [ + "endpoints", + "add", + "--id", + "dup", + "--name", + "d", + "--base-url", + "http://e/v1", + ] + first = _runner_invoke(*base_args) + assert first.exit_code == 0 + second = _runner_invoke(*base_args) + assert second.exit_code == 2 + assert "already exists" in second.output + + +def test_endpoints_add_invalid_url_exits_2() -> None: + res = _runner_invoke( + "endpoints", + "add", + "--id", + "bad", + "--name", + "bad", + "--base-url", + "ftp://nope/v1", + ) + assert res.exit_code == 2 + assert "http://" in res.output + + +def test_endpoints_remove_unknown_exits_1() -> None: + res = _runner_invoke("endpoints", "remove", "ghost") + assert res.exit_code == 1 + assert "unknown endpoint" in res.output + + +def test_endpoints_default_unknown_exits_1() -> None: + res = _runner_invoke("endpoints", "default", "ghost") + assert res.exit_code == 1 + assert "unknown endpoint" in res.output + + +def test_endpoints_default_promotes_existing() -> None: + _runner_invoke("endpoints", "add", "--id", "a", "--name", "a", "--base-url", "http://e/v1") + _runner_invoke("endpoints", "add", "--id", "b", "--name", "b", "--base-url", "http://e/v1") + res = _runner_invoke("endpoints", "default", "b") + assert res.exit_code == 0 + listed = json.loads(_runner_invoke("endpoints", "list", "--json").output) + assert listed["default_endpoint_id"] == "b" + + +def test_endpoints_test_against_fake_server(fake_endpoint_server: int) -> None: + port = fake_endpoint_server + add = _runner_invoke( + "endpoints", + "add", + "--id", + "fake", + "--name", + "fake", + "--base-url", + f"http://127.0.0.1:{port}/v1", + ) + assert add.exit_code == 0, add.output + + res = _runner_invoke("endpoints", "test", "fake", "--json", "--timeout", "2") + assert res.exit_code == 0, res.output + payload = json.loads(res.output) + assert payload["ok"] is True + assert "fake-1" in payload["models"] + + +def test_endpoints_models_against_fake_server(fake_endpoint_server: int) -> None: + port = fake_endpoint_server + _runner_invoke( + "endpoints", + "add", + "--id", + "fake", + "--name", + "fake", + "--base-url", + f"http://127.0.0.1:{port}/v1", + ) + res = _runner_invoke("endpoints", "models", "fake", "--json") + assert res.exit_code == 0 + payload = json.loads(res.output) + assert payload["models"] == ["fake-1", "fake-2"] + + +def test_endpoints_test_unreachable_exits_1() -> None: + _runner_invoke( + "endpoints", + "add", + "--id", + "ghost", + "--name", + "ghost", + "--base-url", + "http://127.0.0.1:1/v1", + ) + res = _runner_invoke("endpoints", "test", "ghost", "--json", "--timeout", "0.5") + assert res.exit_code == 1 + payload = json.loads(res.output) + assert payload["ok"] is False + assert payload["error"] diff --git a/tests/test_endpoints_store.py b/tests/test_endpoints_store.py new file mode 100644 index 0000000..b9f9668 --- /dev/null +++ b/tests/test_endpoints_store.py @@ -0,0 +1,350 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 BitConcepts, LLC. All rights reserved. +"""Unit tests for ``specsmith.agent.endpoints`` (REQ-142, PR-1). + +Covers the pure data layer: validation, JSON persistence, redaction, token +resolution dispatch, and the ``/models`` health probe parser. The CLI +group is exercised in ``tests/test_endpoints_cli.py``. +""" + +from __future__ import annotations + +import http.server +import json +import socket +import threading +from pathlib import Path + +import pytest + +from specsmith.agent.endpoints import ( + SCHEMA_VERSION, + Endpoint, + EndpointAuth, + EndpointError, + EndpointHealth, + EndpointStore, + _extract_model_ids, + default_store_path, +) + +# --------------------------------------------------------------------------- +# Validation +# --------------------------------------------------------------------------- + + +def test_validate_rejects_empty_id() -> None: + e = Endpoint(id="", name="x", base_url="http://example.com/v1") + with pytest.raises(EndpointError, match="non-empty"): + e.validate() + + +def test_validate_rejects_whitespace_id() -> None: + e = Endpoint(id="my endpoint", name="x", base_url="http://example.com/v1") + with pytest.raises(EndpointError, match="whitespace"): + e.validate() + + +def test_validate_rejects_non_http_scheme() -> None: + e = Endpoint(id="x", name="x", base_url="ftp://example.com/v1") + with pytest.raises(EndpointError, match="http://"): + e.validate() + + +def test_validate_requires_token_env_for_bearer_env() -> None: + e = Endpoint( + id="x", + name="x", + base_url="http://e/v1", + auth=EndpointAuth(kind="bearer-env", token_env=""), + ) + with pytest.raises(EndpointError, match="token_env"): + e.validate() + + +def test_validate_requires_keyring_user_for_bearer_keyring() -> None: + e = Endpoint( + id="x", + name="x", + base_url="http://e/v1", + auth=EndpointAuth(kind="bearer-keyring", keyring_user=""), + ) + with pytest.raises(EndpointError, match="keyring_user"): + e.validate() + + +# --------------------------------------------------------------------------- +# Round-trip + redaction +# --------------------------------------------------------------------------- + + +def test_to_public_dict_redacts_inline_token() -> None: + e = Endpoint( + id="vllm", + name="vllm", + base_url="http://10.0.0.4:8000/v1", + auth=EndpointAuth(kind="bearer-inline", token="sk-supersecret"), + ) + public = e.to_public_dict() + assert public["auth"]["kind"] == "bearer-inline" + assert public["auth"]["token"] == "***" + assert "sk-supersecret" not in json.dumps(public) + + +def test_store_round_trip(tmp_path: Path) -> None: + path = tmp_path / "endpoints.json" + store = EndpointStore(path=path) + store.add( + Endpoint( + id="home-vllm", + name="Home vLLM", + base_url="http://10.0.0.4:8000/v1", + default_model="qwen-coder", + tags=["local", "coder"], + ) + ) + store.save() + + reloaded = EndpointStore.load(path) + assert reloaded.schema_version == SCHEMA_VERSION + assert reloaded.default_endpoint_id == "home-vllm" + assert len(reloaded.endpoints) == 1 + e = reloaded.endpoints[0] + assert e.base_url == "http://10.0.0.4:8000/v1" + assert e.default_model == "qwen-coder" + assert e.tags == ["local", "coder"] + assert e.created_at # auto-stamped + + +def test_store_load_returns_empty_when_missing(tmp_path: Path) -> None: + path = tmp_path / "absent.json" + store = EndpointStore.load(path) + assert store.endpoints == [] + assert store.default_endpoint_id == "" + + +def test_store_load_rejects_corrupt_json(tmp_path: Path) -> None: + path = tmp_path / "bad.json" + path.write_text("{not json", encoding="utf-8") + with pytest.raises(EndpointError, match="corrupted"): + EndpointStore.load(path) + + +def test_store_load_rejects_wrong_schema(tmp_path: Path) -> None: + path = tmp_path / "v999.json" + path.write_text(json.dumps({"schema_version": 999, "endpoints": []}), encoding="utf-8") + with pytest.raises(EndpointError, match="schema_version=999"): + EndpointStore.load(path) + + +def test_store_add_blocks_duplicates_without_replace(tmp_path: Path) -> None: + store = EndpointStore(path=tmp_path / "x.json") + store.add(Endpoint(id="dup", name="d", base_url="http://e/v1")) + with pytest.raises(EndpointError, match="already exists"): + store.add(Endpoint(id="dup", name="d", base_url="http://e/v1")) + + +def test_store_add_replace_overwrites(tmp_path: Path) -> None: + store = EndpointStore(path=tmp_path / "x.json") + store.add(Endpoint(id="dup", name="orig", base_url="http://e/v1")) + store.add( + Endpoint(id="dup", name="new", base_url="http://e/v1", default_model="m"), + replace=True, + ) + assert store.get("dup").name == "new" + assert store.get("dup").default_model == "m" + + +def test_remove_clears_default(tmp_path: Path) -> None: + store = EndpointStore(path=tmp_path / "x.json") + store.add(Endpoint(id="a", name="a", base_url="http://e/v1")) + store.add(Endpoint(id="b", name="b", base_url="http://e/v1")) + assert store.default_endpoint_id == "a" + store.remove("a") + # Falls back to the next endpoint in the list, not empty. + assert store.default_endpoint_id == "b" + store.remove("b") + assert store.default_endpoint_id == "" + + +def test_resolve_uses_default(tmp_path: Path) -> None: + store = EndpointStore(path=tmp_path / "x.json") + store.add(Endpoint(id="a", name="a", base_url="http://e/v1")) + assert store.resolve(None).id == "a" + + +def test_resolve_raises_when_no_default(tmp_path: Path) -> None: + store = EndpointStore(path=tmp_path / "x.json") + with pytest.raises(EndpointError, match="no endpoint specified"): + store.resolve(None) + + +def test_set_default_rejects_unknown(tmp_path: Path) -> None: + store = EndpointStore(path=tmp_path / "x.json") + with pytest.raises(EndpointError, match="unknown endpoint"): + store.set_default("ghost") + + +def test_default_store_path_honours_specsmith_home( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + monkeypatch.setenv("SPECSMITH_HOME", str(tmp_path)) + assert default_store_path() == tmp_path / "endpoints.json" + + +# --------------------------------------------------------------------------- +# Token resolution +# --------------------------------------------------------------------------- + + +def test_resolve_token_none_returns_none() -> None: + e = Endpoint(id="x", name="x", base_url="http://e/v1") + assert e.resolve_token() is None + + +def test_resolve_token_bearer_inline_returns_value() -> None: + e = Endpoint( + id="x", + name="x", + base_url="http://e/v1", + auth=EndpointAuth(kind="bearer-inline", token="sk-abc"), + ) + assert e.resolve_token() == "sk-abc" + + +def test_resolve_token_bearer_env_reads_environment( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setenv("MY_VLLM_TOKEN", "lan-token") + e = Endpoint( + id="x", + name="x", + base_url="http://e/v1", + auth=EndpointAuth(kind="bearer-env", token_env="MY_VLLM_TOKEN"), + ) + assert e.resolve_token() == "lan-token" + + +def test_resolve_token_bearer_env_raises_when_missing( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("ABSENT_TOKEN", raising=False) + e = Endpoint( + id="x", + name="x", + base_url="http://e/v1", + auth=EndpointAuth(kind="bearer-env", token_env="ABSENT_TOKEN"), + ) + with pytest.raises(EndpointError, match="ABSENT_TOKEN"): + e.resolve_token() + + +# --------------------------------------------------------------------------- +# /models parser +# --------------------------------------------------------------------------- + + +def test_extract_model_ids_handles_openai_shape() -> None: + payload = {"object": "list", "data": [{"id": "m1"}, {"id": "m2"}]} + assert _extract_model_ids(payload) == ["m1", "m2"] + + +def test_extract_model_ids_handles_models_array_shape() -> None: + payload = {"models": ["a", "b"]} + assert _extract_model_ids(payload) == ["a", "b"] + + +def test_extract_model_ids_returns_empty_for_unrecognised_payload() -> None: + assert _extract_model_ids({"unexpected": True}) == [] + assert _extract_model_ids("not a dict") == [] + + +# --------------------------------------------------------------------------- +# health() against an in-process fake /v1/models server +# --------------------------------------------------------------------------- + + +def _free_port() -> int: + with socket.socket() as s: + s.bind(("127.0.0.1", 0)) + return int(s.getsockname()[1]) + + +class _FakeModelsHandler(http.server.BaseHTTPRequestHandler): + """Serves OpenAI-shape /v1/models payloads for health() tests.""" + + expected_token: str | None = None # set per-test via class attribute + + def log_message(self, *args: object, **kwargs: object) -> None: # noqa: D401 + # Quiet the test runner. + return + + def do_GET(self) -> None: # noqa: N802 + if self.path != "/v1/models": + self.send_response(404) + self.end_headers() + return + if self.expected_token is not None: + got = self.headers.get("Authorization", "") + if got != f"Bearer {self.expected_token}": + self.send_response(401) + self.end_headers() + self.wfile.write(b'{"error": "unauthorized"}') + return + body = json.dumps( + {"object": "list", "data": [{"id": "fake-model-1"}, {"id": "fake-model-2"}]} + ).encode("utf-8") + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + +@pytest.fixture +def fake_models_server() -> object: + port = _free_port() + server = http.server.HTTPServer(("127.0.0.1", port), _FakeModelsHandler) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + try: + yield port + finally: + server.shutdown() + server.server_close() + + +def test_health_against_fake_server_lists_models(fake_models_server: int) -> None: + port = fake_models_server + e = Endpoint(id="fake", name="fake", base_url=f"http://127.0.0.1:{port}/v1") + health = e.health(timeout=2.0) + assert isinstance(health, EndpointHealth) + assert health.ok + assert health.status_code == 200 + assert "fake-model-1" in health.models + + +def test_health_against_unauthenticated_request_returns_401( + fake_models_server: int, +) -> None: + port = fake_models_server + _FakeModelsHandler.expected_token = "right-token" + try: + e = Endpoint( + id="fake", + name="fake", + base_url=f"http://127.0.0.1:{port}/v1", + auth=EndpointAuth(kind="bearer-inline", token="wrong-token"), + ) + health = e.health(timeout=2.0) + assert not health.ok + assert health.status_code == 401 + finally: + _FakeModelsHandler.expected_token = None + + +def test_health_against_unreachable_endpoint_returns_error() -> None: + e = Endpoint(id="ghost", name="ghost", base_url="http://127.0.0.1:1/v1") + health = e.health(timeout=0.5) + assert not health.ok + assert health.error From 9ecd39e37ab51c60318328119149b107b5719072 Mon Sep 17 00:00:00 2001 From: Tristen Pierson Date: Fri, 1 May 2026 07:49:12 -0400 Subject: [PATCH 2/3] feat(cli): --endpoint flag + openai-compat provider driver (REQ-142) Phase 2 of the Bring-Your-Own-Endpoint sprint. Wires the registry from PR-1 into the chat surface and the persistent serve loop. - src/specsmith/agent/chat_runner.py: new _run_openai_compat driver streams from a registered Endpoint via raw stdlib HTTP / SSE (no openai SDK dependency). run_chat() takes an optional endpoint_id; when set, the BYOE store is consulted and the resolved endpoint short-circuits the auto-detect provider chain. Failure modes (unreachable, 401, missing default model) fall back gracefully. - src/specsmith/cli.py: 'specsmith chat --endpoint ' threads through to run_chat. 'specsmith serve --endpoint ' resolves the endpoint at startup, derives provider+model, and exports SPECSMITH_ACTIVE_ENDPOINT for downstream consumers. - tests/test_chat_runner_openai_compat.py: 4 new pytest cases against an in-process fake /v1/chat/completions SSE server. Covers happy-path streaming, missing default-model fallback, 401-on-bad-token fallback, and the run_chat entry point with endpoint_id resolution. Validation: ruff lint + format clean, 82/82 passing across the new + existing endpoint and warp parity suites. Co-Authored-By: Oz --- src/specsmith/agent/chat_runner.py | 99 +++++++++++- src/specsmith/cli.py | 49 +++++- tests/test_chat_runner_openai_compat.py | 195 ++++++++++++++++++++++++ 3 files changed, 340 insertions(+), 3 deletions(-) create mode 100644 tests/test_chat_runner_openai_compat.py diff --git a/src/specsmith/agent/chat_runner.py b/src/specsmith/agent/chat_runner.py index 24b62cf..7f55acc 100644 --- a/src/specsmith/agent/chat_runner.py +++ b/src/specsmith/agent/chat_runner.py @@ -80,11 +80,35 @@ def run_chat( history: list[dict[str, Any]] | None = None, confidence_target: float = 0.7, rules_prefix: str = "", + endpoint_id: str | None = None, ) -> ChatRunResult | None: - """Drive a real LLM turn. Return ``None`` if no provider is reachable.""" + """Drive a real LLM turn. Return ``None`` if no provider is reachable. + + When ``endpoint_id`` is set, the BYOE store (REQ-142) is consulted and + the resolved :class:`Endpoint` short-circuits the provider chain via + the new :func:`_run_openai_compat` driver. Any error during endpoint + resolution falls back to the legacy auto-detect chain so an offline + misconfigured endpoint never breaks `specsmith chat`. + """ history = history or [] messages = _build_messages(utterance, history, rules_prefix) + # REQ-142: explicit endpoint override. + if endpoint_id: + try: + from specsmith.agent.endpoints import EndpointStore + + endpoint = EndpointStore.load().resolve(endpoint_id) + except Exception: # noqa: BLE001 - any failure → fall back to auto-detect + endpoint = None + if endpoint is not None: + try: + full_text = _run_openai_compat(messages, emitter, msg_block, endpoint=endpoint) + except Exception: # noqa: BLE001 - degrade to auto-detect + full_text = None + if full_text is not None: + return _finalize(full_text, "openai_compat", project_dir, confidence_target) + # Order matters: Ollama first because it's local-first and free. for provider in (_run_ollama, _run_anthropic, _run_openai, _run_gemini): try: @@ -228,6 +252,79 @@ def _run_openai( return "".join(pieces) if pieces else None +def _run_openai_compat( + messages: list[dict[str, str]], + emitter: EventEmitter, + block_id: str, + *, + endpoint: Any, +) -> str | None: + """Stream from a user-registered OpenAI-v1-compatible endpoint (REQ-142). + + Uses raw stdlib HTTP so the openai SDK is not a hard dependency for + BYOE. Sends a streaming ``/chat/completions`` request, decodes the + Server-Sent-Events ``data:`` lines, and forwards each ``content`` + delta as a ``token`` event on ``block_id``. + """ + base_url = endpoint.base_url.rstrip("/") + url = f"{base_url}/chat/completions" + model = endpoint.default_model or os.environ.get("SPECSMITH_OPENAI_COMPAT_MODEL", "") + if not model: + # The endpoint did not pin a default model and the env override is + # absent. We cannot fabricate one; fall back to the auto-detect chain. + return None + + headers: dict[str, str] = { + "Content-Type": "application/json", + "Accept": "text/event-stream", + } + try: + token = endpoint.resolve_token() + except Exception: # noqa: BLE001 - fall back to auto-detect chain + return None + if token: + headers["Authorization"] = f"Bearer {token}" + + body = json.dumps({"model": model, "messages": messages, "stream": True}).encode("utf-8") + req = Request(url, data=body, headers=headers, method="POST") # noqa: S310 - user-supplied + + ctx = None + if not endpoint.verify_tls and url.startswith("https://"): + import ssl + + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + + pieces: list[str] = [] + try: + with urlopen(req, timeout=120, context=ctx) as resp: # noqa: S310 - user-supplied + for raw_line in resp: + line = raw_line.decode("utf-8", errors="replace").rstrip("\n\r") + if not line.startswith("data:"): + continue + payload = line[len("data:") :].strip() + if not payload or payload == "[DONE]": + if payload == "[DONE]": + break + continue + try: + obj = json.loads(payload) + except ValueError: + continue + choices = obj.get("choices") or [] + if not choices: + continue + delta = (choices[0] or {}).get("delta") or {} + chunk = str(delta.get("content") or "") + if chunk: + emitter.token(block_id, chunk) + pieces.append(chunk) + except (URLError, TimeoutError, OSError): + return None + return "".join(pieces) if pieces else None + + def _run_gemini( messages: list[dict[str, str]], emitter: EventEmitter, diff --git a/src/specsmith/cli.py b/src/specsmith/cli.py index cceefc6..89e3dbf 100644 --- a/src/specsmith/cli.py +++ b/src/specsmith/cli.py @@ -2806,6 +2806,16 @@ def run_cmd( "liveness probes still work." ), ) +@click.option( + "--endpoint", + "endpoint_id", + default="", + help=( + "Route turns through a registered BYOE endpoint (REQ-142). When set, " + "the resolved endpoint's base_url, default model, and bearer token " + "override --provider / --model for OpenAI-v1-compatible backends." + ), +) def serve_cmd( project_dir: str, provider: str, @@ -2813,6 +2823,7 @@ def serve_cmd( port: int, host: str, auth_token: str, + endpoint_id: str, ) -> None: """Start a persistent HTTP server for agent sessions. @@ -2824,12 +2835,34 @@ def serve_cmd( specsmith serve --port 8421 --provider ollama --model qwen2.5:14b \ --auth-token $(specsmith auth get serve) """ + import os + from specsmith.serve import run_server + # REQ-142: when --endpoint is given, derive provider+model from the + # endpoint registry so the serve loop can hand off to the OpenAI-compat + # driver in chat_runner. The bridge surfaces the original --provider + # value as a fallback when the endpoint can't be resolved. + effective_provider = provider + effective_model = model + if endpoint_id: + try: + from specsmith.agent.endpoints import EndpointStore + + resolved = EndpointStore.load().resolve(endpoint_id) + effective_provider = "openai-compat" + effective_model = resolved.default_model or model + os.environ["SPECSMITH_ACTIVE_ENDPOINT"] = resolved.id + except Exception as exc: # noqa: BLE001 + console.print( + f"[yellow]Warning:[/yellow] could not resolve endpoint " + f"{endpoint_id!r}: {exc}. Falling back to --provider {provider}." + ) + run_server( project_dir=project_dir, - provider=provider, - model=model, + provider=effective_provider, + model=effective_model, port=port, host=host, auth_token=auth_token, @@ -5881,6 +5914,16 @@ def index_search_cmd(query: str, project_dir: str, limit: int) -> None: default=120.0, help="Seconds to wait for a stdin decision before falling back to deny.", ) +@click.option( + "--endpoint", + "endpoint_id", + default="", + help=( + "Route the LLM turn to a registered BYOE endpoint (REQ-142). " + "See `specsmith endpoints add ...`. When empty, falls back to the " + "auto-detect provider chain (Ollama / Anthropic / OpenAI / Gemini)." + ), +) def chat_cmd( utterance: str, project_dir: str, @@ -5891,6 +5934,7 @@ def chat_cmd( json_events: bool, interactive: bool, decision_timeout: float, + endpoint_id: str, ) -> None: """Run a single chat turn, streaming JSONL block events to stdout. @@ -6043,6 +6087,7 @@ def chat_cmd( msg_block=msg_block, history=history, rules_prefix=rules_prefix, + endpoint_id=endpoint_id or None, ) except Exception: # noqa: BLE001 - real chat is best-effort real_result = None diff --git a/tests/test_chat_runner_openai_compat.py b/tests/test_chat_runner_openai_compat.py new file mode 100644 index 0000000..9900d6c --- /dev/null +++ b/tests/test_chat_runner_openai_compat.py @@ -0,0 +1,195 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 BitConcepts, LLC. All rights reserved. +"""End-to-end test for the BYOE openai-compat driver (REQ-142, PR-2). + +Runs an in-process fake ``/chat/completions`` SSE server, wires up an +:class:`Endpoint` pointing at it, and asserts that +:func:`chat_runner.run_chat` streams tokens through the new +``_run_openai_compat`` driver when ``endpoint_id`` is set. +""" + +from __future__ import annotations + +import http.server +import json +import socket +import threading +from pathlib import Path + +import pytest + +from specsmith.agent.chat_runner import _run_openai_compat, run_chat +from specsmith.agent.endpoints import Endpoint, EndpointAuth, EndpointStore +from specsmith.agent.events import EventEmitter + + +def _free_port() -> int: + with socket.socket() as s: + s.bind(("127.0.0.1", 0)) + return int(s.getsockname()[1]) + + +_REPLY = "Plan:\n- ok\nFiles changed:\n- a.py\nTest results:\nNext action:\n" + + +class _FakeChatHandler(http.server.BaseHTTPRequestHandler): + """Streams a canned SSE chat-completions response.""" + + expected_token: str | None = None + last_request_body: dict | None = None + + def log_message(self, *args: object, **kwargs: object) -> None: # noqa: D401 + return + + def do_POST(self) -> None: # noqa: N802 + if self.path != "/v1/chat/completions": + self.send_response(404) + self.end_headers() + return + if self.expected_token is not None: + got = self.headers.get("Authorization", "") + if got != f"Bearer {self.expected_token}": + self.send_response(401) + self.end_headers() + return + length = int(self.headers.get("Content-Length", "0") or 0) + body = self.rfile.read(length).decode("utf-8") + try: + _FakeChatHandler.last_request_body = json.loads(body) + except json.JSONDecodeError: + _FakeChatHandler.last_request_body = None + + self.send_response(200) + self.send_header("Content-Type", "text/event-stream") + self.end_headers() + + chunks = [_REPLY[i : i + 16] for i in range(0, len(_REPLY), 16)] + for chunk in chunks: + payload = json.dumps( + { + "id": "chatcmpl-fake", + "object": "chat.completion.chunk", + "choices": [{"delta": {"content": chunk}, "index": 0}], + } + ) + self.wfile.write(f"data: {payload}\n\n".encode()) + self.wfile.flush() + self.wfile.write(b"data: [DONE]\n\n") + self.wfile.flush() + + +@pytest.fixture +def fake_chat_server() -> object: + port = _free_port() + server = http.server.HTTPServer(("127.0.0.1", port), _FakeChatHandler) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + try: + yield port + finally: + server.shutdown() + server.server_close() + _FakeChatHandler.expected_token = None + _FakeChatHandler.last_request_body = None + + +# --------------------------------------------------------------------------- +# _run_openai_compat — direct +# --------------------------------------------------------------------------- + + +def test_openai_compat_streams_tokens(fake_chat_server: int) -> None: + port = fake_chat_server + emitter = EventEmitter() + endpoint = Endpoint( + id="fake", + name="fake", + base_url=f"http://127.0.0.1:{port}/v1", + default_model="fake-model", + ) + text = _run_openai_compat( + [{"role": "user", "content": "hello"}], emitter, "block-1", endpoint=endpoint + ) + assert text is not None + assert "Files changed" in text + assert _FakeChatHandler.last_request_body is not None + assert _FakeChatHandler.last_request_body["model"] == "fake-model" + assert _FakeChatHandler.last_request_body["stream"] is True + + +def test_openai_compat_returns_none_without_default_model(fake_chat_server: int) -> None: + port = fake_chat_server + emitter = EventEmitter() + endpoint = Endpoint( + id="fake", + name="fake", + base_url=f"http://127.0.0.1:{port}/v1", + default_model="", + ) + text = _run_openai_compat( + [{"role": "user", "content": "hi"}], emitter, "block-1", endpoint=endpoint + ) + assert text is None + + +def test_openai_compat_returns_none_when_unauthorised(fake_chat_server: int) -> None: + port = fake_chat_server + _FakeChatHandler.expected_token = "right-token" + emitter = EventEmitter() + endpoint = Endpoint( + id="fake", + name="fake", + base_url=f"http://127.0.0.1:{port}/v1", + default_model="fake-model", + auth=EndpointAuth(kind="bearer-inline", token="wrong-token"), + ) + text = _run_openai_compat( + [{"role": "user", "content": "hi"}], emitter, "block-1", endpoint=endpoint + ) + assert text is None + + +# --------------------------------------------------------------------------- +# run_chat with endpoint_id (PR-2 entry point) +# --------------------------------------------------------------------------- + + +def test_run_chat_with_endpoint_id_routes_to_openai_compat( + fake_chat_server: int, + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + port = fake_chat_server + monkeypatch.setenv("SPECSMITH_HOME", str(tmp_path)) + # No ANTHROPIC_API_KEY / OPENAI_API_KEY / GOOGLE_API_KEY → would otherwise + # fall back to the auto-detect chain (Ollama may or may not be running). + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.delenv("GOOGLE_API_KEY", raising=False) + + store = EndpointStore.load() + store.add( + Endpoint( + id="fake", + name="fake", + base_url=f"http://127.0.0.1:{port}/v1", + default_model="fake-model", + ) + ) + store.save() + + emitter = EventEmitter() + result = run_chat( + "do something", + project_dir=tmp_path, + profile="standard", + session_id="sess", + emitter=emitter, + msg_block="block-1", + endpoint_id="fake", + ) + assert result is not None + assert result.provider == "openai_compat" + assert "Files changed" in result.raw_text + assert _FakeChatHandler.last_request_body is not None + assert _FakeChatHandler.last_request_body["model"] == "fake-model" From f155fa4d1f4044e2869a5b3a921ba9332a765ffb Mon Sep 17 00:00:00 2001 From: Tristen Pierson Date: Fri, 1 May 2026 07:55:48 -0400 Subject: [PATCH 3/3] release: v0.8.0 (BYOE) Bump pyproject.toml to 0.8.0 to ship the Bring-Your-Own-Endpoint feature (REQ-142): the new endpoints store + 'specsmith endpoints' CLI group (PR-1) and the openai-compat provider driver wired through 'specsmith chat / serve --endpoint ' (PR-2). Co-Authored-By: Oz --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 89ab47c..3a6d63c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "specsmith" -version = "0.7.0" +version = "0.8.0" description = "Applied Epistemic Engineering toolkit — AEE agent sessions, execution profiles, FPGA/HDL governance, tool installer, 50+ CLI commands." readme = "README.md" license = "MIT"