diff --git a/.gitignore b/.gitignore index cd98bb9..d28c925 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,13 @@ __pycache__/ *.pyo .env .venv/ +.venv-*/ venv/ + +# Local-run artifacts — autostart stdout/stderr logs + memory middleware +# fallback directory when /sandbox is not available. +logs/ +.proto/ *.egg-info/ dist/ build/ diff --git a/Dockerfile b/Dockerfile index a52723c..0b1fe77 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,7 +26,7 @@ RUN useradd -m -s /bin/bash -u ${SANDBOX_UID} sandbox # auth, add them here. The ddgs + beautifulsoup4 pair powers the # starter web_search / fetch_url tools; drop them if you strip those. RUN pip install --no-cache-dir \ - gradio httpx uvicorn langfuse prometheus-client pyyaml \ + gradio httpx uvicorn langfuse prometheus-client pyyaml 'ruamel.yaml>=0.18' \ langchain langchain-openai langgraph websockets \ ddgs beautifulsoup4 @@ -40,6 +40,27 @@ RUN chmod +x /opt/protoagent/entrypoint.sh RUN mkdir -p /sandbox /tmp/sandbox /sandbox/audit /sandbox/knowledge \ && chown -R sandbox:sandbox /sandbox /tmp/sandbox +# Make /opt/protoagent/config writable by the sandbox user so the +# drawer and setup wizard can persist edits from inside the container. +RUN chown -R sandbox:sandbox /opt/protoagent/config + +# Declare config as a volume so setup completion (``.setup-complete`` +# marker + any YAML / SOUL.md edits) survives ``docker run`` without +# a -v flag. +# +# Lifecycle note: without an explicit mount, Docker creates an +# ANONYMOUS volume on every ``docker run``. Those accumulate and the +# volume is NOT removed when the container is removed unless you pass +# ``--rm -v``. For long-lived deployments, use a named volume or a +# host mount so upgrades don't silently carry stale config forward: +# +# docker run -v my-agent-config:/opt/protoagent/config my-agent:latest +# +# or a bind mount: +# +# docker run -v /srv/my-agent/config:/opt/protoagent/config my-agent:latest +VOLUME ["/opt/protoagent/config"] + ENV PYTHONPATH=/opt/protoagent USER sandbox diff --git a/README.md b/README.md index 6bf76ee..1a1b34c 100644 --- a/README.md +++ b/README.md @@ -12,9 +12,15 @@ close to a rewrite of `SOUL.md`, `graph/prompts.py`, and Quinn was the first agent built on this template — it's a good example of what a filled-in fork looks like end-to-end. -Start a new agent by clicking **"Use this template"** at the top -of the GitHub repo. See [TEMPLATE.md](./TEMPLATE.md) for the -step-by-step fork checklist. +**Try it in 5 minutes:** clone, `pip install -r requirements.txt`, +`python server.py`, open , and walk the +setup wizard — no forking, no `sed`, no Docker required to get +your first agent talking. See the [first-agent tutorial](./docs/tutorials/first-agent.md). + +**When you're ready to ship your own:** click **"Use this template"** +at the top of the GitHub repo, then follow [Customize & +deploy](./docs/guides/customize-and-deploy.md) for the fork / +rename / release-pipeline wiring. ## What you get out of the box @@ -31,28 +37,31 @@ step-by-step fork checklist. | UI | `chat_ui.py`, `static/` | Gradio chat with PWA shell, dark theme, offline fallback | | Release pipeline | `.github/workflows/*.yml` | Autonomous semver bumps, GHCR image push, GitHub release with filtered notes, optional Discord post | -## Quickstart +## Quickstart — from zero to chatting in 5 minutes ```bash -# 1. Click "Use this template" on GitHub, or: -gh repo create protoLabsAI/my-agent \ - --template protoLabsAI/protoAgent \ - --public --clone - +# 1. Get the code (no fork needed for a first run) +git clone https://github.com/protoLabsAI/protoAgent.git my-agent cd my-agent -# 2. Rename the agent (one env var, read by server.py, metrics, tracing) -export AGENT_NAME=my-agent +# 2. Install deps into a venv +python -m venv .venv && source .venv/bin/activate +pip install -r requirements.txt -# 3. Boot the container -docker build -t my-agent:local . -docker run --rm -p 7870:7870 -e AGENT_NAME=my-agent my-agent:local +# 3. Run the server — no env vars required +python server.py -# 4. Hit the agent card -curl http://localhost:7870/.well-known/agent-card.json +# 4. Open the wizard — pick your endpoint, pick a model, name the +# agent, pick a persona preset, hit Launch. The chat UI appears +# on the same page. +open http://localhost:7870 ``` -See [TEMPLATE.md](./TEMPLATE.md) for the full fork checklist. +[First-agent tutorial](./docs/tutorials/first-agent.md) walks +through every wizard step with screenshots. + +Once you're happy and want to ship it as your own image in your +own GHCR: [Customize & deploy](./docs/guides/customize-and-deploy.md). ## Architecture diff --git a/TEMPLATE.md b/TEMPLATE.md index 2997277..31b1eb2 100644 --- a/TEMPLATE.md +++ b/TEMPLATE.md @@ -1,5 +1,16 @@ # Fork checklist +> **Most of what used to be in this file is now a runtime wizard** +> that runs on first page load. Model, tools, persona, name, auth, +> autostart — all captured without editing code. See +> [first-agent tutorial](./docs/tutorials/first-agent.md). +> +> This checklist is only for forks that want to ship their own +> container image under their own GitHub org — the structural +> changes the wizard can't do. For most of that, the new +> [Customize & deploy](./docs/guides/customize-and-deploy.md) +> guide is the canonical source. This file stays for back-compat. + You clicked "Use this template" (or ran `gh repo create --template`). Now what? diff --git a/a2a_handler.py b/a2a_handler.py index 69b9520..7efecc7 100644 --- a/a2a_handler.py +++ b/a2a_handler.py @@ -919,6 +919,26 @@ def _check_auth(request: Request, api_key: str) -> None: # ── Route factory ───────────────────────────────────────────────────────────── +# Module-level mutable holder for the bearer token so hosts can +# update it at runtime without re-registering routes (e.g. when the +# setup wizard captures a token post-boot). ``register_a2a_routes`` +# seeds this from its ``auth_token`` argument (or ``A2A_AUTH_TOKEN`` +# env as fallback); ``set_a2a_token`` updates it live. Closures inside +# ``register_a2a_routes`` read ``_A2A_TOKEN[0]`` on every request, so +# a mutation is picked up by the next incoming call. +_A2A_TOKEN: list[str | None] = [None] + + +def set_a2a_token(token: str | None) -> None: + """Update the active A2A bearer token at runtime. + + Called by the host (e.g. ``server.py``) after the wizard / drawer + changes ``auth.token`` in the YAML — without this, bearer auth + captured at register time would stay stale until process restart. + """ + _A2A_TOKEN[0] = (token or "").strip() or None + + def register_a2a_routes( app: FastAPI, chat_stream_fn_factory: Callable[..., AsyncGenerator], @@ -926,29 +946,39 @@ def register_a2a_routes( api_key: str, agent_card: dict, register_card_route: bool = True, + auth_token: str = "", ) -> None: """Register all A2A routes on *app* and update *agent_card* capabilities. Host apps that already serve the agent card themselves (e.g. at multiple well-known paths for sdk compat) should pass ``register_card_route=False`` so FastAPI does not raise on a duplicate route registration. + + ``auth_token`` seeds the bearer-token check. When empty, falls + back to the ``A2A_AUTH_TOKEN`` env var. Hosts can update the + active token post-registration via ``set_a2a_token(...)`` (e.g. + after a wizard-driven config reload) without needing a restart. """ # ── Bearer token authentication ─────────────────────────────────────────── - _raw_a2a_token = os.environ.get("A2A_AUTH_TOKEN", "") - _a2a_token: str | None = _raw_a2a_token.strip() or None - if not _a2a_token: + # Seed order: explicit arg > env. Stored in the module-level holder + # so mutations propagate to the closure below. + seed = (auth_token or os.environ.get("A2A_AUTH_TOKEN", "") or "").strip() + _A2A_TOKEN[0] = seed or None + if _A2A_TOKEN[0] is None: logger.warning( "[a2a] A2A auth token not configured — endpoint is open" ) def _check_bearer_auth(request: Request) -> None: - """Validate Authorization: Bearer against A2A_AUTH_TOKEN. + """Validate Authorization: Bearer against the active + token. No-ops when unset. Raises HTTP 401 on missing/invalid. - No-ops when A2A_AUTH_TOKEN is unset (open mode). - Raises HTTP 401 on missing or invalid token. + Reads ``_A2A_TOKEN[0]`` on every call so runtime updates via + ``set_a2a_token`` are honored without route re-registration. """ - if not _a2a_token: + active = _A2A_TOKEN[0] + if not active: return auth_header = request.headers.get("Authorization", "") if not auth_header.startswith("Bearer "): @@ -957,7 +987,7 @@ def _check_bearer_auth(request: Request) -> None: detail="Unauthorized: expected 'Authorization: Bearer '", ) provided = auth_header[len("Bearer "):] - if not hmac.compare_digest(provided, _a2a_token): + if not hmac.compare_digest(provided, active): raise HTTPException(status_code=401, detail="Unauthorized: invalid bearer token") # ── Origin verification for SSE/streaming endpoints ─────────────────────── @@ -989,7 +1019,7 @@ def _check_origin(request: Request) -> None: agent_card.setdefault("capabilities", {}) agent_card["capabilities"]["streaming"] = True agent_card["capabilities"]["pushNotifications"] = True - if _a2a_token: + if _A2A_TOKEN[0]: agent_card.setdefault("securitySchemes", {}) agent_card["securitySchemes"]["bearer"] = { "type": "http", diff --git a/autostart.py b/autostart.py new file mode 100644 index 0000000..65ce5f8 --- /dev/null +++ b/autostart.py @@ -0,0 +1,266 @@ +"""OS-level autostart for the protoAgent server. + +Hooks the server into the OS so it launches on user login. Today +macOS is the only supported path (LaunchAgent plist); Linux and +Windows stubs return a clear "not yet supported" error so the +wizard surfaces that instead of silently failing. + +Design notes: + +- The source of truth for "should autostart be on?" is + ``runtime.autostart_on_boot`` in ``langgraph-config.yaml``. This + module only installs / removes the OS artifact — it doesn't + decide policy. The wizard and drawer toggle the YAML value and + call these functions to bring the OS state in sync. + +- ``sys.executable`` is captured at install time so reinstalling + after a venv rebuild picks up the new interpreter path. If a user + recreates their venv without reinstalling, the LaunchAgent keeps + pointing at the stale path and will fail at next login — noisy + log but not catastrophic. Documented in the docs. + +- Install is idempotent: ``install_autostart`` overwrites any + prior plist so the same file always reflects current state, no + stale LaunchAgents piling up. +""" + +from __future__ import annotations + +import platform +import re +import shlex +import subprocess +import sys +from pathlib import Path +from xml.sax.saxutils import escape as xml_escape + +REPO_ROOT = Path(__file__).parent.resolve() + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + + +def autostart_supported() -> tuple[bool, str]: + """Is this platform a supported autostart target? + + Returns ``(True, "")`` on supported platforms, ``(False, reason)`` + otherwise. Wizard / drawer check this before offering the toggle. + """ + system = platform.system() + if system == "Darwin": + return True, "" + if system == "Linux": + return False, "Linux autostart (systemd user unit) not yet implemented" + if system == "Windows": + return False, "Windows autostart (Task Scheduler) not yet implemented" + return False, f"autostart not implemented for platform {system!r}" + + +def install_autostart(agent_name: str = "protoagent", port: int = 7870) -> tuple[bool, str]: + """Install the OS artifact that runs the server on user login. + + Returns ``(ok, message)``. On success, ``message`` is a short + human-readable note the UI can display; on failure it's the + actual error (permission denied, launchctl exit code, etc). + """ + ok, reason = autostart_supported() + if not ok: + return False, reason + + if platform.system() == "Darwin": + return _install_macos_launchagent(agent_name, port) + return False, "unreachable" # autostart_supported already rejected + + +def uninstall_autostart(agent_name: str = "protoagent") -> tuple[bool, str]: + """Remove the OS autostart artifact. Safe to call when nothing + is installed — returns success in that case. + """ + ok, reason = autostart_supported() + if not ok: + return False, reason + + if platform.system() == "Darwin": + return _uninstall_macos_launchagent(agent_name) + return False, "unreachable" + + +def autostart_status(agent_name: str = "protoagent") -> dict: + """Report current on-disk state for diagnostics. + + The UI uses this to render accurate "autostart is currently + on/off" without having to remember what it last wrote. + """ + ok, reason = autostart_supported() + if not ok: + return {"supported": False, "installed": False, "reason": reason} + + if platform.system() == "Darwin": + plist = _macos_plist_path(agent_name) + return { + "supported": True, + "installed": plist.exists(), + "plist_path": str(plist), + "python": sys.executable, + "server_path": str(REPO_ROOT / "server.py"), + } + return {"supported": False, "installed": False, "reason": "unreachable"} + + +# --------------------------------------------------------------------------- +# macOS — LaunchAgent plist +# --------------------------------------------------------------------------- + + +_SAFE_LABEL_RE = re.compile(r"[^a-z0-9_.-]+") + + +def _macos_label(agent_name: str) -> str: + """Plist label — namespaced so it doesn't collide with system labels. + + Sanitizes the input: only ``[a-z0-9_.-]`` survive. Leading / trailing + dots and hyphens are stripped so the resulting filename can't be + a hidden file or look like a path-segment. Path-traversal + characters like ``/`` and ``..`` are filtered here rather than at + the filesystem layer so ``install_autostart(agent_name="../../x")`` + can't escape ``~/Library/LaunchAgents/``. + """ + sanitized = _SAFE_LABEL_RE.sub("-", agent_name.lower()).strip("-.") + if not sanitized: + sanitized = "protoagent" + return f"ai.protolabs.{sanitized}" + + +def _macos_plist_path(agent_name: str) -> Path: + home = Path.home() + return home / "Library" / "LaunchAgents" / f"{_macos_label(agent_name)}.plist" + + +def _install_macos_launchagent(agent_name: str, port: int) -> tuple[bool, str]: + """Write the plist and ``launchctl load`` it. + + Unload-then-load (rather than a bootstrap-replace dance) is the + simplest idempotent recipe that works across macOS versions. A + missing label on unload is a no-op. + """ + python = sys.executable + server_py = REPO_ROOT / "server.py" + if not server_py.exists(): + return False, f"server.py not found at {server_py}" + + label = _macos_label(agent_name) + plist_path = _macos_plist_path(agent_name) + log_dir = REPO_ROOT / "logs" + log_dir.mkdir(parents=True, exist_ok=True) + + plist = _render_launchagent_plist( + label=label, + python=python, + server_py=str(server_py), + port=port, + working_dir=str(REPO_ROOT), + agent_name=agent_name, + stdout_log=str(log_dir / "autostart.out.log"), + stderr_log=str(log_dir / "autostart.err.log"), + ) + + plist_path.parent.mkdir(parents=True, exist_ok=True) + + # Unload any prior incarnation first — silently ok if absent. + subprocess.run( + ["launchctl", "unload", str(plist_path)], + capture_output=True, check=False, + ) + + plist_path.write_text(plist, encoding="utf-8") + + result = subprocess.run( + ["launchctl", "load", str(plist_path)], + capture_output=True, check=False, + ) + if result.returncode != 0: + err = (result.stderr.decode("utf-8", errors="replace") + or result.stdout.decode("utf-8", errors="replace") + or f"launchctl load exit={result.returncode}") + return False, f"plist written but launchctl load failed: {err.strip()}" + + return True, f"installed • {plist_path.name} • runs `{shlex.quote(python)} server.py` on login" + + +def _uninstall_macos_launchagent(agent_name: str) -> tuple[bool, str]: + plist_path = _macos_plist_path(agent_name) + if not plist_path.exists(): + return True, "autostart was not installed" + + subprocess.run( + ["launchctl", "unload", str(plist_path)], + capture_output=True, check=False, + ) + + try: + plist_path.unlink() + except OSError as e: + return False, f"failed to remove plist: {e}" + + return True, f"uninstalled • removed {plist_path.name}" + + +def _render_launchagent_plist( + *, + label: str, + python: str, + server_py: str, + port: int, + working_dir: str, + agent_name: str, + stdout_log: str, + stderr_log: str, +) -> str: + """Render the plist XML. + + Every interpolated string is XML-escaped because several fields + (``agent_name`` most notably) come from user input — a wizard + user who names their agent ``bad`` or ``me & co`` would + otherwise produce a malformed or injection-vulnerable plist. + ``port`` is an int so it's safe as-is, but we coerce+escape it + anyway for consistency. + """ + e = xml_escape + return f""" + + + + Label + {e(label)} + ProgramArguments + + {e(python)} + {e(server_py)} + --port + {e(str(port))} + + WorkingDirectory + {e(working_dir)} + EnvironmentVariables + + AGENT_NAME + {e(agent_name)} + PATH + /usr/local/bin:/usr/bin:/bin:/opt/homebrew/bin + + RunAtLoad + + KeepAlive + + SuccessfulExit + + + StandardOutPath + {e(stdout_log)} + StandardErrorPath + {e(stderr_log)} + + +""" diff --git a/chat_ui.py b/chat_ui.py index 62fb697..ba832e3 100644 --- a/chat_ui.py +++ b/chat_ui.py @@ -181,6 +181,21 @@ def create_chat_app( _css = CLEAN_CSS + AGENT_DARK_CSS + extra_css _head = AGENT_PWA_HEAD if pwa else "" + # Determine first-run state. Fresh clones land in the wizard; + # subsequent boots go straight to chat unless the user explicitly + # triggers "Re-run setup" from the drawer. Settings dicts without + # the ``is_setup_complete`` key (older template forks) skip the + # wizard entirely — chat is always visible. + setup_done = True + if settings and "is_setup_complete" in settings: + try: + setup_done = bool(settings["is_setup_complete"]()) + except Exception: + setup_done = True # fail-open: don't trap forks in a broken wizard + wizard_enabled = bool( + settings and "finish_setup" in settings and "get_config" in settings + ) + def _build() -> gr.Blocks: with gr.Blocks( title=title.replace("*", "").strip(), @@ -197,77 +212,534 @@ def _build() -> gr.Blocks: header_md = gr.Markdown(header_text) - chatbot = gr.Chatbot(height=chat_height, show_label=False) + # === SETUP WIZARD PANE ===================================== + # Visible on first run (no .setup-complete marker), hidden + # after the user clicks Launch. All fields default from the + # current config so re-running the wizard doesn't start + # from zero. + wizard_pane = None + w_launch_btn = None + w_launch_status = None + w_inputs: list = [] + if wizard_enabled: + with gr.Column(visible=not setup_done) as wizard_pane: + gr.Markdown( + "# Welcome — let's set up your agent\n\n" + "Walk through the steps below and hit **Launch agent**. " + "You can revisit every one of these choices later from " + "the Configuration drawer. Nothing is persisted until " + "you click Launch." + ) + w_launch_status = gr.Markdown("") - with gr.Row(): - txt = gr.Textbox( - placeholder=placeholder, show_label=False, - scale=9, container=False, - ) - send_btn = gr.Button("Send", variant="primary", scale=1, min_width=80) + with gr.Accordion("1. Connect to your model", open=True): + w_api_base = gr.Textbox( + label="API base URL", + placeholder="e.g. https://api.openai.com/v1 or http://localhost:4000/v1", + interactive=True, + ) + w_api_key = gr.Textbox( + label="API key", + type="password", + placeholder="your OpenAI or gateway master key", + interactive=True, + ) + with gr.Row(): + w_test_btn = gr.Button( + "Test connection & fetch models", + variant="secondary", scale=3, + ) + w_test_status = gr.Markdown("") + w_model = gr.Dropdown( + label="Model", + choices=[], allow_custom_value=True, + interactive=True, + ) - with gr.Row(): - clear_btn = gr.Button("Clear", size="sm", variant="secondary") - new_btn = gr.Button("New Session", size="sm", variant="secondary") + with gr.Accordion("2. Name your agent", open=False): + w_agent_name = gr.Textbox( + label="Agent name", + placeholder="short lowercase slug, e.g. product-director", + interactive=True, + ) + gr.Markdown( + "_This becomes the agent card name, OpenAI-compat " + "model id, and chat header. Metric prefix still " + "needs a process restart to pick up._" + ) + with gr.Row(): + w_preset = gr.Dropdown( + label="Persona preset (optional)", + choices=[], interactive=True, scale=3, + ) + w_load_preset_btn = gr.Button( + "Load preset into SOUL.md", + size="sm", scale=2, + ) + w_soul = gr.Textbox( + label="SOUL.md — the agent's persona", + lines=14, interactive=True, + placeholder=( + "Identity, personality, values, communication " + "style. Loaded into every system prompt." + ), + ) - if footer_html: - gr.HTML(footer_html) + with gr.Accordion("3. Tools & middleware", open=False): + w_tools = gr.CheckboxGroup( + label="Tools available to the agent", + choices=[], interactive=True, + ) + w_mw_audit = gr.Checkbox( + label="Audit middleware — logs every tool call", + value=True, interactive=True, + ) + w_mw_memory = gr.Checkbox( + label="Memory middleware — persists session summaries", + value=True, interactive=True, + ) + w_mw_knowledge = gr.Checkbox( + label="Knowledge middleware — requires a knowledge store (leave off for starter setups)", + value=False, interactive=True, + ) + + with gr.Accordion("4. Optional — you, security, autostart", open=False): + w_operator = gr.Textbox( + label="Your name", + placeholder="so the agent can address you directly — blank = anonymous", + interactive=True, + ) + w_auth = gr.Textbox( + label="A2A bearer token", + type="password", + placeholder="set before exposing to a network; blank = open mode for local dev", + interactive=True, + ) + w_autostart = gr.Checkbox( + label="Launch this agent automatically on login", + value=False, interactive=True, + ) + w_autostart_note = gr.Markdown("") + + w_launch_btn = gr.Button( + "Launch agent", variant="primary", size="lg", + ) + + w_inputs = [ + w_api_base, w_api_key, w_model, + w_agent_name, w_soul, w_preset, + w_tools, w_mw_audit, w_mw_memory, w_mw_knowledge, + w_operator, w_auth, w_autostart, + ] + + # === CHAT PANE ============================================= + # Wrapped in a Column so visibility toggles in lockstep with + # the wizard. On fresh setup it starts hidden and the Launch + # button flips it on. + with gr.Column(visible=setup_done) as chat_pane: + chatbot = gr.Chatbot(height=chat_height, show_label=False) + + with gr.Row(): + txt = gr.Textbox( + placeholder=placeholder, show_label=False, + scale=9, container=False, + ) + send_btn = gr.Button("Send", variant="primary", scale=1, min_width=80) + + with gr.Row(): + clear_btn = gr.Button("Clear", size="sm", variant="secondary") + new_btn = gr.Button("New Session", size="sm", variant="secondary") + + if footer_html: + gr.HTML(footer_html) # --- Settings sidebar --- + # Each section below is gated on the presence of its callback, + # so forks can opt in per panel. The Configuration panel (the + # live-editable drawer) renders when "get_config" + "save_all" + # are provided by the server. The drawer is hidden during the + # wizard so the user has one surface to look at at a time. + sidebar_block = None if settings: - with gr.Sidebar(label="Settings", open=False, position="right"): - with gr.Accordion("Tools", open=False): - tools_display = gr.Markdown("Loading...") - refresh_tools_btn = gr.Button("Refresh", size="sm") - - with gr.Accordion("Model", open=False): - model_display = gr.Markdown("Loading...") - provider_dropdown = gr.Dropdown( - label="Provider", choices=[], interactive=True, + with gr.Sidebar(label="Settings", open=False, position="right", visible=setup_done) as sidebar_block: + + # === Live configuration drawer ============================ + if "get_config" in settings and "save_all" in settings: + gr.Markdown( + "### Configuration\n" + "Edits are written to `config/langgraph-config.yaml` " + "and applied with a live graph rebuild — in-flight " + "turns finish on the previous config.", ) - switch_status = gr.Markdown("") - refresh_model_btn = gr.Button("Refresh", size="sm") + config_status = gr.Markdown("") - if "get_knowledge_stats" in settings: - with gr.Accordion("Knowledge Base", open=False): - kb_display = gr.Markdown("Loading...") - refresh_kb_btn = gr.Button("Refresh", size="sm") + with gr.Accordion("Model", open=True): + api_base_in = gr.Textbox( + label="API Base URL", + placeholder="http://gateway:4000/v1", + interactive=True, + ) + api_key_in = gr.Textbox( + label="API Key", + type="password", + placeholder="blank → use $OPENAI_API_KEY env", + interactive=True, + ) + with gr.Row(): + model_in = gr.Dropdown( + label="Model", + choices=[], + interactive=True, + allow_custom_value=True, + scale=4, + ) + fetch_models_btn = gr.Button( + "Fetch", size="sm", scale=1, min_width=60, + ) + model_fetch_status = gr.Markdown("") + temperature_in = gr.Slider( + label="Temperature", + minimum=0.0, maximum=2.0, step=0.05, + interactive=True, + ) + max_tokens_in = gr.Number( + label="Max Tokens", precision=0, + minimum=1, interactive=True, + ) + max_iter_in = gr.Slider( + label="Max Iterations", + minimum=1, maximum=200, step=1, + interactive=True, + ) - # --- Callbacks --- + with gr.Accordion("Worker Subagent", open=False): + worker_enabled_in = gr.Checkbox( + label="Enabled", interactive=True, + ) + worker_tools_in = gr.CheckboxGroup( + label="Tools", choices=[], interactive=True, + ) + worker_max_turns_in = gr.Number( + label="Max Turns", precision=0, + minimum=1, interactive=True, + ) - def load_tools(): - return settings["get_tools_list"]() + with gr.Accordion("Middleware", open=False): + mw_knowledge_in = gr.Checkbox( + label="Knowledge", interactive=True, + ) + mw_audit_in = gr.Checkbox( + label="Audit", interactive=True, + ) + mw_memory_in = gr.Checkbox( + label="Memory", interactive=True, + ) - def load_model(): - return settings["get_model_info"]() + with gr.Accordion("Knowledge Store", open=False): + kb_db_in = gr.Textbox( + label="DB Path", interactive=True, + ) + kb_embed_in = gr.Textbox( + label="Embed Model", interactive=True, + ) + kb_top_k_in = gr.Number( + label="Top K", precision=0, + minimum=1, interactive=True, + ) - def load_provider_choices(): - choices = settings["get_provider_choices"]() - current = settings["get_current_provider"]() - return gr.update(choices=choices, value=current) + with gr.Accordion("Identity", open=False): + identity_name_in = gr.Textbox( + label="Agent name", + placeholder="short lowercase slug", + interactive=True, + ) + identity_operator_in = gr.Textbox( + label="Your name (operator)", + placeholder="injected into system prompt when set", + interactive=True, + ) - def switch_provider(choice): - return settings["switch_provider"](choice) + with gr.Accordion("Security — A2A bearer token", open=False): + auth_token_in = gr.Textbox( + label="Bearer token", + type="password", + placeholder="blank → open mode; set to require Authorization: Bearer ", + interactive=True, + ) + gr.Markdown( + "_Live-reloadable. Save & Reload flips A2A " + "enforcement on or off immediately; no restart._" + ) - def load_subtitle(): - return settings["get_subtitle"]() + with gr.Accordion("Autostart on login", open=False): + autostart_in = gr.Checkbox( + label="Launch this agent automatically on login", + interactive=True, + ) + autostart_drawer_status = gr.Markdown("") - app.load(fn=load_tools, outputs=[tools_display]) - app.load(fn=load_model, outputs=[model_display]) - app.load(fn=load_provider_choices, outputs=[provider_dropdown]) + with gr.Accordion("Persona (SOUL.md)", open=False): + soul_in = gr.Textbox( + label="SOUL.md", lines=16, show_label=False, + interactive=True, + placeholder="Agent persona — loaded into every system prompt.", + ) - refresh_tools_btn.click(fn=load_tools, outputs=[tools_display]) - refresh_model_btn.click( - fn=load_model, outputs=[model_display] - ).then(fn=load_provider_choices, outputs=[provider_dropdown]) + with gr.Row(): + save_btn = gr.Button( + "Save & Reload", variant="primary", scale=2, + ) + reload_btn = gr.Button( + "Reload from Disk", variant="secondary", scale=1, + ) - provider_dropdown.change( - fn=switch_provider, inputs=[provider_dropdown], outputs=[switch_status], - ).then(fn=load_model, outputs=[model_display]).then( - fn=load_subtitle, outputs=[header_md], - ) + # "Re-run setup" re-opens the wizard with current + # values pre-populated — for re-picking a preset, + # swapping models, or resetting the autostart plist. + if "restart_setup" in settings and wizard_enabled: + with gr.Accordion("Re-run setup wizard", open=False): + gr.Markdown( + "_Reopens the wizard with all current " + "values pre-filled. Your config isn't " + "wiped — you're just re-visiting the " + "choices._" + ) + reset_setup_btn = gr.Button( + "Run wizard now", variant="secondary", + ) + reset_setup_status = gr.Markdown("") + + # Ordered tuple used for both load_all outputs and + # save_all inputs — keeps the wiring obvious and the + # two lists from drifting out of sync. + _config_components = [ + api_base_in, api_key_in, model_in, + temperature_in, max_tokens_in, max_iter_in, + worker_enabled_in, worker_tools_in, worker_max_turns_in, + mw_knowledge_in, mw_audit_in, mw_memory_in, + kb_db_in, kb_embed_in, kb_top_k_in, + identity_name_in, identity_operator_in, + auth_token_in, autostart_in, + soul_in, + ] + + def _load_all(): + cfg = settings["get_config"]() + soul = settings["get_soul"]() if "get_soul" in settings else "" + tools = settings["list_tools"]() if "list_tools" in settings else [] + + # Best-effort gateway probe. If it fails (offline, + # wrong key) we surface the error but keep the form + # populated with the saved model name — the user + # can still edit everything else. + models, err = ([], "") + if "list_models" in settings: + try: + models, err = settings["list_models"]("", "") + except Exception as e: + err = str(e) + current_name = cfg["model"]["name"] + dropdown_choices = models if models else [current_name] + if current_name and current_name not in dropdown_choices: + dropdown_choices = [current_name, *dropdown_choices] + + fetch_msg = ( + f"✓ {len(models)} model(s) from gateway" + if models and not err + else f"⚠ {err}" if err else "" + ) + + worker = cfg["subagents"]["worker"] + identity = cfg.get("identity", {}) + auth = cfg.get("auth", {}) + runtime = cfg.get("runtime", {}) + return ( + cfg["model"]["api_base"], + cfg["model"]["api_key"], + gr.update(choices=dropdown_choices, value=current_name), + cfg["model"]["temperature"], + cfg["model"]["max_tokens"], + cfg["model"]["max_iterations"], + worker["enabled"], + gr.update(choices=tools, value=list(worker["tools"])), + worker["max_turns"], + cfg["middleware"]["knowledge"], + cfg["middleware"]["audit"], + cfg["middleware"]["memory"], + cfg["knowledge"]["db_path"], + cfg["knowledge"]["embed_model"], + cfg["knowledge"]["top_k"], + identity.get("name", ""), + identity.get("operator", ""), + auth.get("token", ""), + bool(runtime.get("autostart_on_boot", False)), + soul, + fetch_msg, + ) + + def _fetch_models(api_base, api_key): + if "list_models" not in settings: + return gr.update(), "⚠ list_models not wired" + try: + models, err = settings["list_models"](api_base, api_key) + except Exception as e: + return gr.update(), f"⚠ {e}" + if err: + return gr.update(), f"⚠ {err}" + return gr.update(choices=models), f"✓ {len(models)} model(s) from gateway" + + def _save( + api_base, api_key, model_name, + temperature, max_tokens, max_iter, + worker_enabled, worker_tools, worker_max_turns, + mw_knowledge, mw_audit, mw_memory, + kb_db, kb_embed, kb_top_k, + identity_name, identity_operator, + auth_token, autostart_on, + soul, + ): + # Numeric fields fall back to sensible minimums + # rather than 0 when the user clears them — + # ``validate_config_dict`` rejects zero values so + # a blank field would otherwise block the save + # with a confusing validation error. + new_config = { + "model": { + "api_base": api_base or "", + "api_key": api_key or "", + "name": model_name or "", + "temperature": float(temperature), + "max_tokens": int(max_tokens or 4096), + "max_iterations": int(max_iter or 50), + }, + "subagents": { + "worker": { + "enabled": bool(worker_enabled), + "tools": list(worker_tools or []), + "max_turns": int(worker_max_turns or 20), + }, + }, + "middleware": { + "knowledge": bool(mw_knowledge), + "audit": bool(mw_audit), + "memory": bool(mw_memory), + }, + "knowledge": { + "db_path": kb_db or "", + "embed_model": kb_embed or "", + "top_k": int(kb_top_k or 1), + }, + "identity": { + "name": (identity_name or "").strip() or "protoagent", + "operator": (identity_operator or "").strip(), + }, + "auth": { + "token": auth_token or "", + }, + "runtime": { + "autostart_on_boot": bool(autostart_on), + }, + } + try: + ok, msg = settings["save_all"](new_config, soul or "") + except Exception as e: + return f"⚠ save failed: {e}" + return f"{'✓' if ok else '⚠'} {msg}" + + def _reload_only(): + try: + ok, msg = settings["save_all"](None, None) + except Exception as e: + return f"⚠ reload failed: {e}" + return f"{'✓' if ok else '⚠'} {msg}" + + app.load( + fn=_load_all, + outputs=[*_config_components, model_fetch_status], + ) + fetch_models_btn.click( + fn=_fetch_models, + inputs=[api_base_in, api_key_in], + outputs=[model_in, model_fetch_status], + ) + save_btn.click( + fn=_save, + inputs=_config_components, + outputs=[config_status], + ).then( + fn=_fetch_models, + inputs=[api_base_in, api_key_in], + outputs=[model_in, model_fetch_status], + ) + reload_btn.click( + fn=_reload_only, outputs=[config_status], + ).then( + fn=_load_all, + outputs=[*_config_components, model_fetch_status], + ) + + # === Legacy read-only panels (opt-in via their own keys) == + if "get_tools_list" in settings: + with gr.Accordion("Tools", open=False): + tools_display = gr.Markdown("Loading...") + refresh_tools_btn = gr.Button("Refresh", size="sm") + + def load_tools(): + return settings["get_tools_list"]() + + app.load(fn=load_tools, outputs=[tools_display]) + refresh_tools_btn.click(fn=load_tools, outputs=[tools_display]) + + if "get_model_info" in settings: + with gr.Accordion("Model Status", open=False): + model_display = gr.Markdown("Loading...") + refresh_model_btn = gr.Button("Refresh", size="sm") + + provider_dropdown = None + switch_status = None + if "get_provider_choices" in settings: + provider_dropdown = gr.Dropdown( + label="Provider", choices=[], interactive=True, + ) + switch_status = gr.Markdown("") + + def load_model(): + return settings["get_model_info"]() + + app.load(fn=load_model, outputs=[model_display]) + refresh_model_btn.click(fn=load_model, outputs=[model_display]) + + if provider_dropdown is not None: + def load_provider_choices(): + choices = settings["get_provider_choices"]() + # get_current_provider is optional — older + # forks provided only the choices list. + # Missing key must not raise KeyError; the + # dropdown simply renders with no preselect. + current_fn = settings.get("get_current_provider") + current = current_fn() if current_fn else None + return gr.update(choices=choices, value=current) + + def switch_provider(choice): + return settings["switch_provider"](choice) + + def load_subtitle(): + return settings["get_subtitle"]() + + app.load(fn=load_provider_choices, outputs=[provider_dropdown]) + provider_dropdown.change( + fn=switch_provider, + inputs=[provider_dropdown], + outputs=[switch_status], + ).then(fn=load_model, outputs=[model_display]).then( + fn=load_subtitle, outputs=[header_md], + ) if "get_knowledge_stats" in settings: + with gr.Accordion("Knowledge Base", open=False): + kb_display = gr.Markdown("Loading...") + refresh_kb_btn = gr.Button("Refresh", size="sm") + def load_kb_stats(): return settings["get_knowledge_stats"]() @@ -311,6 +783,244 @@ def get_response(history: list[dict], original_msg: str, sid: str): clear_btn.click(fn=lambda: ([], "default"), outputs=[chatbot, session_id]) new_btn.click(fn=lambda: ([], secrets.token_hex(4)), outputs=[chatbot, session_id]) + # --- Wizard callbacks ----------------------------------------- + if wizard_enabled: + def _load_wizard_defaults(): + """Seed every wizard field from the current on-disk + config. Returns updates in the exact order of + ``w_inputs`` plus the connection-test status + the + autostart note.""" + cfg = settings["get_config"]() if "get_config" in settings else {} + soul = settings["get_soul"]() if "get_soul" in settings else "" + tools = settings["list_tools"]() if "list_tools" in settings else [] + presets = settings["list_soul_presets"]() if "list_soul_presets" in settings else [] + + model = cfg.get("model", {}) + identity = cfg.get("identity", {}) + worker = cfg.get("subagents", {}).get("worker", {}) + mw = cfg.get("middleware", {}) + runtime = cfg.get("runtime", {}) + auth = cfg.get("auth", {}) + + current_model = model.get("name", "") + model_choices = [current_model] if current_model else [] + + autostart_msg = "" + if "autostart_info" in settings: + try: + info = settings["autostart_info"]() + except Exception as e: + info = {"supported": False, "reason": str(e)} + if info.get("supported"): + state = "installed" if info.get("installed") else "not installed" + autostart_msg = f"_Platform supported. Current state: **{state}**._" + else: + autostart_msg = f"⚠ {info.get('reason', 'not supported on this platform')}" + + return ( + model.get("api_base", ""), + model.get("api_key", ""), + gr.update(choices=model_choices, value=current_model), + identity.get("name", ""), + soul, + gr.update(choices=presets, value=None), + gr.update(choices=tools, value=list(worker.get("tools", []))), + bool(mw.get("audit", True)), + bool(mw.get("memory", True)), + bool(mw.get("knowledge", False)), + identity.get("operator", ""), + auth.get("token", ""), + bool(runtime.get("autostart_on_boot", False)), + "", # w_test_status + autostart_msg, + ) + + app.load( + fn=_load_wizard_defaults, + outputs=[*w_inputs, w_test_status, w_autostart_note], + ) + + # Re-check setup state on every page load so external + # completions (POST /api/config/setup from curl, or a + # reset triggered in another tab) are reflected after + # a browser refresh. Without this, Gradio keeps serving + # the initial visibility state from when the Blocks + # were first rendered. + # Sync visibility on every page load. The output list is + # either two or three elements depending on whether the + # sidebar exists — we used to alias the sidebar slot to + # wizard_pane when missing, but that sent a duplicate + # gr.update to the same component, which Gradio treats + # as two competing writes to wizard_pane.visible. + if sidebar_block is not None: + def _sync_visibility_with_sidebar(): + if "is_setup_complete" not in settings: + return gr.update(), gr.update(), gr.update() + done = bool(settings["is_setup_complete"]()) + return ( + gr.update(visible=not done), # wizard_pane + gr.update(visible=done), # chat_pane + gr.update(visible=done), # sidebar_block + ) + + app.load( + fn=_sync_visibility_with_sidebar, + outputs=[wizard_pane, chat_pane, sidebar_block], + ) + else: + def _sync_visibility_no_sidebar(): + if "is_setup_complete" not in settings: + return gr.update(), gr.update() + done = bool(settings["is_setup_complete"]()) + return ( + gr.update(visible=not done), # wizard_pane + gr.update(visible=done), # chat_pane + ) + + app.load( + fn=_sync_visibility_no_sidebar, + outputs=[wizard_pane, chat_pane], + ) + + # Connection test — fills the model dropdown + def _test_connection(api_base, api_key): + if "list_models" not in settings: + return gr.update(), "⚠ list_models callback not wired" + if not api_base: + return gr.update(), "⚠ enter an API base URL first" + try: + models, err = settings["list_models"](api_base, api_key) + except Exception as e: + return gr.update(), f"⚠ {e}" + if err: + return gr.update(), f"⚠ {err}" + pick = models[0] if models else None + return ( + gr.update(choices=models, value=pick), + f"✓ {len(models)} model(s) — picked **{pick}**, change if needed", + ) + + w_test_btn.click( + fn=_test_connection, + inputs=[w_api_base, w_api_key], + outputs=[w_model, w_test_status], + ) + + # Preset loader — pastes template text into SOUL textarea + def _load_preset(name): + if not name or "read_soul_preset" not in settings: + return gr.update() + try: + return settings["read_soul_preset"](name) + except Exception: + return gr.update() + + w_load_preset_btn.click( + fn=_load_preset, inputs=[w_preset], outputs=[w_soul], + ) + + # Launch button — write everything, mark complete, then + # hard-reload the page. Toggling ``visible=`` on nested + # gr.Column + gr.Sidebar via gr.update is unreliable + # (children don't always re-mount); a full reload is the + # only bulletproof way to guarantee the chat pane appears. + # The reload re-enters _build() which reads + # is_setup_complete()==True and renders chat + drawer + # visible from scratch. + def _finish_wizard( + api_base, api_key, model_name, + agent_name_val, soul, _preset_unused, + tools, mw_audit, mw_memory, mw_knowledge, + operator, auth_token, autostart, + ): + if not (api_base or "").strip(): + return "⚠ API base URL is required — go back to step 1" + if not (model_name or "").strip(): + return "⚠ pick a model — use the Test connection button in step 1" + if not (agent_name_val or "").strip(): + return "⚠ agent name is required — step 2" + + new_config = { + "model": { + "api_base": api_base, + "api_key": api_key or "", + "name": model_name, + }, + "subagents": { + "worker": { + "enabled": True, + "tools": list(tools or []), + }, + }, + "middleware": { + "audit": bool(mw_audit), + "memory": bool(mw_memory), + "knowledge": bool(mw_knowledge), + }, + "identity": { + "name": agent_name_val.strip(), + "operator": (operator or "").strip(), + }, + "auth": {"token": auth_token or ""}, + "runtime": {"autostart_on_boot": bool(autostart)}, + } + try: + ok, msg = settings["finish_setup"](new_config, soul or "") + except Exception as e: + return f"⚠ setup failed: {e}" + if ok: + return f"✓ {msg} — reloading page…" + return f"⚠ {msg}" + + # 1. Run the save. 2. On the client, if the status message + # starts with "✓", reload after a short beat so the user + # sees the success line. Any warning (⚠) keeps the wizard + # visible so they can correct and retry. + w_launch_btn.click( + fn=_finish_wizard, + inputs=w_inputs, + outputs=[w_launch_status], + ).then( + fn=None, + inputs=[w_launch_status], + outputs=None, + js=( + "(status) => {" + " if (typeof status === 'string' && status.startsWith('✓')) {" + " setTimeout(() => window.location.reload(), 1000);" + " }" + " return [];" + "}" + ), + ) + + # "Re-run setup" in the drawer — same reload-after-flip + # pattern for the reverse direction. + if "restart_setup" in settings: + def _trigger_rerun(): + try: + msg = settings["restart_setup"]() + except Exception as e: + return f"⚠ {e}" + return f"✓ {msg} — reloading page…" + + reset_setup_btn.click( + fn=_trigger_rerun, + outputs=[reset_setup_status], + ).then( + fn=None, + inputs=[reset_setup_status], + outputs=None, + js=( + "(status) => {" + " if (typeof status === 'string' && status.startsWith('✓')) {" + " setTimeout(() => window.location.reload(), 800);" + " }" + " return [];" + "}" + ), + ) + return app app = _build() diff --git a/config/soul-presets/blank.md b/config/soul-presets/blank.md new file mode 100644 index 0000000..e53908c --- /dev/null +++ b/config/soul-presets/blank.md @@ -0,0 +1,24 @@ +# Identity + +_Describe your agent in one paragraph — who it is, who it +reports to, what domain it owns._ + +# Personality + +_3–6 traits. Affects the tone of every response._ + +# Values + +_Rules that shape judgement calls. Example: "never modify +production data while investigating."_ + +# Communication style + +_How output is formatted — markdown, plain text, JSON, Discord +embeds. How long responses should be by default._ + +# Capabilities + +_What tools are available and when to reach for each. The tool +docstrings are already in context; this is where you explain +the higher-level decision procedure._ diff --git a/config/soul-presets/coding.md b/config/soul-presets/coding.md new file mode 100644 index 0000000..1ad23d1 --- /dev/null +++ b/config/soul-presets/coding.md @@ -0,0 +1,37 @@ +# Identity + +I am a coding agent. I read code, explain it, suggest changes, and +write code when asked — grounded in what the codebase actually +does, not in what a general-purpose model might guess. + +# Personality + +- Precise — file paths, line numbers, exact identifiers. Never + "somewhere in the auth module." +- Conservative on edits — the smallest change that solves the + problem. I don't refactor surrounding code as a bonus. +- Root-cause oriented — when something breaks, I find the cause + before patching the symptom. + +# Communication style + +- Short prose, code in code fences, one clear recommendation. +- For any file reference, include the path and the relevant + lines. The operator shouldn't have to hunt. +- When I suggest a change, explain the *why* in one sentence. + Reserve multi-paragraph explanations for genuinely subtle cases. + +# When to reach for tools + +- `fetch_url` for official docs when the question is + library-specific and the model's training data may be stale. +- `web_search` for error messages with distinctive strings to + find similar reports. +- `calculator` for bit math, offsets, sizing. + +# Values + +- No speculation. If I haven't read the file, I say so before + making claims about it. +- A clean diff beats a clever one. Readability is a feature. +- Tests are evidence. A bug without a failing test is unverified. diff --git a/config/soul-presets/generic-assistant.md b/config/soul-presets/generic-assistant.md new file mode 100644 index 0000000..58e6459 --- /dev/null +++ b/config/soul-presets/generic-assistant.md @@ -0,0 +1,33 @@ +# Identity + +I am an AI assistant. I help the operator think through problems, +answer questions, and take action via the tools available to me. + +# Personality + +- Direct — I answer the question asked, not a version of it I wish + had been asked. +- Grounded — when I use a tool, I surface what it returned rather + than paraphrasing away the evidence. +- Calibrated — I say "I don't know" when I don't, rather than + fabricating a confident answer. + +# Communication style + +- Short by default. Expand when the operator asks or when the + answer genuinely requires it. +- Markdown when the surface renders it; plain text otherwise. +- Reference concrete artifacts (URLs, file paths, tool outputs) + so the operator can verify. + +# When to reach for tools + +- `web_search` + `fetch_url` when the question depends on current + information that the model's training data wouldn't know. +- `current_time` any time "now" matters — never guess the time. +- `calculator` for any numeric work beyond trivial mental math. + +# Values + +- Verify before asserting. +- Surface failures plainly; the operator decides what to do next. diff --git a/config/soul-presets/research.md b/config/soul-presets/research.md new file mode 100644 index 0000000..6d51cac --- /dev/null +++ b/config/soul-presets/research.md @@ -0,0 +1,35 @@ +# Identity + +I am a research agent. My job is to find information, evaluate +source quality, and deliver a synthesis the operator can act on. + +# Personality + +- Curious — I follow threads until I've seen enough to answer, + not until I find the first plausible-looking result. +- Skeptical — I assume claims are wrong until the evidence holds + up. I note when sources disagree. +- Thorough — when the operator asks for "three sources" I return + three distinct sources, not three links to the same article. + +# Communication style + +- Lead with the answer, then the evidence. Never bury the + conclusion under a recap of my search process. +- Cite with URLs. Prefer primary sources (docs, filings, papers) + over summaries. +- Flag confidence explicitly — "confirmed by X and Y" vs "one + source, unverified" — so the operator can calibrate. + +# Search loop + +1. Search with `web_search`. Read the top N titles + snippets. +2. Pick the most credible-looking 2–5. `fetch_url` each. +3. Cross-check: do independent sources agree? Which disagree? +4. Synthesize. Return claim → evidence → confidence, not a + chronological log of what I read. + +# Values + +- A hole in the evidence is more useful than a confident guess. +- Never present a synthesis as settled when the sources are thin. diff --git a/docs/.vitepress/config.mts b/docs/.vitepress/config.mts index d705a63..2deb10c 100644 --- a/docs/.vitepress/config.mts +++ b/docs/.vitepress/config.mts @@ -35,7 +35,8 @@ export default defineConfig({ text: "How-To Guides", items: [ { text: "Overview", link: "/guides/" }, - { text: "Fork the template", link: "/guides/fork-the-template" }, + { text: "Customize & deploy", link: "/guides/customize-and-deploy" }, + { text: "Fork checklist (fast path)", link: "/guides/fork-the-template" }, { text: "Add a custom skill", link: "/guides/add-a-skill" }, { text: "Configure subagents", link: "/guides/subagents" }, { text: "Wire Langfuse + Prometheus", link: "/guides/observability" }, diff --git a/docs/guides/customize-and-deploy.md b/docs/guides/customize-and-deploy.md new file mode 100644 index 0000000..17c24d9 --- /dev/null +++ b/docs/guides/customize-and-deploy.md @@ -0,0 +1,97 @@ +# Customize & deploy + +Use this guide when you've run through the wizard, decided the template fits your use case, and now want to fork it into your own GitHub repo + ship a deployable image. If you're still evaluating, stay on the [first-agent tutorial](/tutorials/first-agent) — you don't need any of this to run the agent locally. + +## Why this is a separate step + +The [setup wizard](/tutorials/first-agent) handles runtime customization — model, tools, persona, auth — without editing code. Everything below is structural: renaming the template throughout the codebase, bending the release pipeline to your repo, baking your fork's identity into the Docker image. Do it once per fork, not every time you tweak a setting. + +## 1. Fork the template on GitHub + +```bash +gh repo create protoLabsAI/my-agent \ + --template protoLabsAI/protoAgent \ + --public --clone + +cd my-agent +``` + +Or: `Use this template → Create a new repository` from the browser. Pick a short slug (`jon`, `echo-agent`, `product-director`) — it ends up as the image name, metric prefix, Langfuse tag, and release-workflow repo guard. + +## 2. Rename `protoagent` throughout + +The template uses `protoagent` as the placeholder everywhere. Do one pass: + +```bash +# macOS / BSD sed +git grep -li protoagent | xargs sed -i '' 's/protoagent/my-agent/g' +git grep -li protoAgent | xargs sed -i '' 's/protoAgent/MyAgent/g' + +# Linux / GNU sed — drop the empty-string backup suffix +git grep -li protoagent | xargs sed -i 's/protoagent/my-agent/g' +git grep -li protoAgent | xargs sed -i 's/protoAgent/MyAgent/g' +``` + +Review the diff. Key hits: + +- `Dockerfile` — the `/opt/protoagent/` paths become `/opt/my-agent/`. +- `entrypoint.sh` — same. +- `server.py` — `AGENT_NAME_ENV` fallback becomes `my-agent`. +- `chat_ui.py` — branding strings (service worker label, apple-mobile-web-app-title). +- Workflow files — the repo guards check `protoLabsAI/my-agent` instead. + +The runtime name (`identity.name` in `config/langgraph-config.yaml`, set by the wizard) is separate — keep both in sync unless you have a reason not to. + +## 3. Un-freeze the release pipeline + +The release workflows gate on the template's repo path so third-party clones don't accidentally cut releases: + +- `.github/workflows/prepare-release.yml` +- `.github/workflows/release.yml` +- `.github/workflows/docker-publish.yml` + +Each has a `if: github.repository == 'protoLabsAI/protoAgent'` (or similar) check. Swap `protoLabsAI/protoAgent` for `/` in all three, or the pipeline won't fire on merges. + +## 4. Rewrite the agent card + +`server.py::_build_agent_card` ships with placeholder skills: + +```python +"skills": [ + {"id": "chat", "name": "Chat", "description": "General-purpose...", ...}, +], +``` + +Replace with the skills your agent actually advertises over A2A. The `name` and `url` fields already pick up `identity.name` from YAML, so the wizard-set name lands on the card without code changes. + +## 5. (Optional) Add domain tools + +`tools/lg_tools.py` ships with `echo`, `current_time`, `calculator`, `web_search`, `fetch_url`. Keep the ones you want, drop the rest, add your own. Update `get_all_tools()` at the bottom. Any tool returned from there becomes a checkbox in the wizard and drawer automatically. + +## 6. (Optional) Configure subagents + +`graph/subagents/config.py` ships with one `worker`. Register more `SubagentConfig` instances in `SUBAGENT_REGISTRY` and add matching fields in `graph/config.py::LangGraphConfig`. The lead agent delegates via the `task` tool; the subagent delegation rules are built from the registry. + +## 7. Build and ship the image + +```bash +docker build -t ghcr.io/my-org/my-agent:local . + +# local test — mount the config volume so wizard completions persist +docker run --rm -p 7870:7870 \ + -e OPENAI_API_KEY="$OPENAI_API_KEY" \ + -v my-agent-config:/opt/my-agent/config \ + ghcr.io/my-org/my-agent:local +``` + +The Dockerfile declares `VOLUME /opt//config` so even without `-v` the wizard writes persist across container runs on the same Docker host — they live in an anonymous volume. For production, use a named volume or host mount so you can back it up. + +Once the local build is happy, merge a PR to trigger the release pipeline ([Deploy via GHCR](/guides/deploy)). + +## 8. Delete `TEMPLATE.md` + +Once the checklist is done, `rm TEMPLATE.md` and rewrite `README.md` to describe your specific agent — its purpose, its skills, its operators. + +## Canonical reference implementation + +[protoLabsAI/quinn](https://github.com/protoLabsAI/quinn) is the first agent built on this template, now running in production. When this guide doesn't cover a specific decision, Quinn is the filled-in example — worth a skim before you invent something new. diff --git a/docs/guides/index.md b/docs/guides/index.md index 843adee..3e49012 100644 --- a/docs/guides/index.md +++ b/docs/guides/index.md @@ -1,10 +1,11 @@ # How-To Guides -Task-oriented procedures. Assumes you already have a forked, running agent (see [Tutorials](/tutorials/) if not). +Task-oriented procedures. Assumes you already have a running agent (see [Tutorials](/tutorials/) if not — the wizard runs with zero setup). | Guide | When to read | |---|---| -| [Fork the template](/guides/fork-the-template) | Fast-path checklist for experienced forkers | +| [Customize & deploy](/guides/customize-and-deploy) | You've evaluated via the wizard and now want to fork, rename, and ship your own image | +| [Fork checklist (fast path)](/guides/fork-the-template) | Terser version of the above for experienced forkers | | [Add a custom skill](/guides/add-a-skill) | Your agent does new things and callers need to dispatch to them | | [Configure subagents](/guides/subagents) | You want specialized delegates beyond the placeholder `worker` | | [Wire Langfuse + Prometheus](/guides/observability) | You need traces and metrics in production | diff --git a/docs/index.md b/docs/index.md index d66540d..aaf8c0e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -3,14 +3,14 @@ layout: home hero: name: protoAgent text: LangGraph + A2A template for protoLabs agents - tagline: Fork this repo. Rewrite SOUL.md, prompts, and tools. Ship. + tagline: Clone. Run. Walk the wizard. Chat. Fork when you're ready to ship. actions: - theme: brand text: Spin up your first agent link: /tutorials/first-agent - theme: alt - text: Reference - link: /reference/ + text: Customize & deploy + link: /guides/customize-and-deploy features: - icon: 🔌 diff --git a/docs/tutorials/first-agent.md b/docs/tutorials/first-agent.md index 4fc9a16..6082f66 100644 --- a/docs/tutorials/first-agent.md +++ b/docs/tutorials/first-agent.md @@ -1,99 +1,80 @@ # Spin up your first agent -This walks you from "I clicked Use this template" to "I have a running agent answering a web-search query". About 15 minutes, assuming Docker and a LiteLLM gateway are already set up. +About 5 minutes. You need Python 3.11+ and an OpenAI-compatible API key (OpenAI direct, LiteLLM gateway, Anthropic-via-gateway, Ollama, anything that speaks the OpenAI REST shape). -## What you'll need +No forking, no `sed`, no Docker for your first run. That's all in [Customize & deploy](/guides/customize-and-deploy) once you've decided this template works for you. -- A GitHub account with access to `protoLabsAI` (or your own org — the workflows gate on the repo owner; see step 7) -- Docker -- A LiteLLM gateway running somewhere reachable (the template points at `http://gateway:4000/v1`) -- A model alias in that gateway. The template's default is `protolabs/agent` — either add that alias or retarget `model.name` in step 4 - -## 1. Use the template - -From GitHub, click **Use this template → Create a new repository** on [protoLabsAI/protoAgent](https://github.com/protoLabsAI/protoAgent). Pick a short slug like `jon` or `echo-agent` — it will end up as the image name, metric prefix, Langfuse tag, and more. - -Or from the CLI: +## 1. Get the code ```bash -gh repo create protoLabsAI/my-agent \ - --template protoLabsAI/protoAgent \ - --public --clone - +git clone https://github.com/protoLabsAI/protoAgent.git my-agent cd my-agent ``` -## 2. Rename the agent - -The template uses `protoagent` as the placeholder throughout. Do a pass: +## 2. Install dependencies ```bash -git grep -li protoagent | xargs sed -i 's/protoagent/my-agent/g' -git grep -li protoAgent | xargs sed -i 's/protoAgent/MyAgent/g' +python -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt ``` -Review the diff before committing — the replacement hits Dockerfile paths (`/opt/protoagent` → `/opt/my-agent`), the GHCR image path, workflow repo guards, and the Gradio UI branding. All of those want the new name. +## 3. Run the server -## 3. Rewrite identity - -Three files carry the agent's identity. Edit each one: - -- `config/SOUL.md` — the persona doc loaded at session start. See the placeholder file itself for guidance. -- `graph/prompts.py` — the system prompt for the lead agent + subagents. -- `server.py::_build_agent_card` — the agent card served at `/.well-known/agent-card.json`. At minimum, fix `name` and `description`; revisit `skills` once you have real tools. - -## 4. Point at a model +```bash +python server.py +``` -Edit `config/langgraph-config.yaml`: +You should see: -```yaml -model: - name: protolabs/my-agent # or openai/gpt-4o, anthropic/claude-opus-4-6, etc. - api_base: http://gateway:4000/v1 +``` +LangGraph agent initialized (setup wizard not complete — graph not compiled. Open the UI to finish setup.) +Starting protoagent on http://0.0.0.0:7870 ``` -If you're using a gateway alias (recommended), make sure the alias is registered there before booting — swapping models later becomes a gateway edit instead of a code change. +## 4. Open the setup wizard -## 5. Build and run +Visit in a browser. Because `config/.setup-complete` doesn't exist yet, you'll land in the wizard instead of the chat UI. -```bash -docker build -t my-agent:local . -docker run --rm -p 7870:7870 \ - -e AGENT_NAME=my-agent \ - -e OPENAI_API_KEY="$LITELLM_MASTER_KEY" \ - my-agent:local -``` +Walk through the four steps: -## 6. Verify the agent is up +1. **Connect to your model.** Paste your API base URL (`https://api.openai.com/v1` for OpenAI direct, `http://localhost:4000/v1` for a local LiteLLM gateway) and API key. Click **Test connection & fetch models** — the dropdown fills with whatever the endpoint actually exposes. Pick one. +2. **Name your agent.** Short lowercase slug (e.g. `product-director`). Pick a persona preset — **Generic Assistant** is the safe default; **Research** / **Coding** / **Blank** are the alternatives — and click **Load preset into SOUL.md**. Edit the loaded text if you want to make it specific to your agent. +3. **Tools & middleware.** All five starter tools (`echo`, `current_time`, `calculator`, `web_search`, `fetch_url`) are enabled by default. Leave **Audit** and **Memory** middleware on. Leave **Knowledge** off — that needs an index the template doesn't ship with. +4. **Optional — you, security, autostart.** Your name makes the agent address you directly. A2A auth token blank for local dev, set it before you expose the port. "Launch this agent automatically on login" installs a macOS LaunchAgent so the server is up after every reboot without remembering to `python server.py`. -In another terminal: +Hit **Launch agent**. The wizard closes, the chat UI appears, and the Configuration drawer on the right is now populated with your choices. -```bash -curl http://localhost:7870/.well-known/agent-card.json | jq .name -# → "my-agent" +## 5. Try it -curl http://localhost:7870/metrics | grep my_agent_active_sessions -# → my_agent_active_sessions 0 -``` - -Hit `http://localhost:7870` in a browser to get the Gradio chat UI. Ask it: +In the chat box: > What time is it in Tokyo? -If the starter tools are wired correctly, it should call `current_time`, return an ISO-8601 timestamp with the timezone offset, and explain what it found. +The agent calls `current_time`, returns an ISO-8601 timestamp, and explains what it found. Then: > Find three recent articles about the A2A protocol and summarize them. -The agent will call `web_search`, then `fetch_url` for each of the top results, and return a summary. That round-trip exercises the full tool loop + LLM call + streaming response path. +The agent calls `web_search`, then `fetch_url` on the top results, and hands back a synthesis. That round-trip exercises the full tool loop + LLM call + streaming response path. + +## What just happened + +- Your answers were written to `config/langgraph-config.yaml` (human-readable — peek at it). +- The persona preset was written to `config/SOUL.md`. +- A `config/.setup-complete` marker was created so the next boot goes straight to chat. +- The agent card at now reflects your agent name. +- If you checked autostart, `~/Library/LaunchAgents/ai.protolabs..plist` was installed and `launchctl load`-ed. -## 7. Un-freeze the release pipeline +## Changing your mind -The three release workflows (`docker-publish.yml`, `prepare-release.yml`, `release.yml`) all gate on `github.repository == 'protoLabsAI/protoAgent'`. Change that check in each file to match your repo's owner/repo before merging anything to `main`, or the release automation won't fire. +- **Any field** — open the Configuration drawer on the right side of the chat UI. Every wizard field is there, plus a few advanced ones (temperature, max_tokens, max_iterations, knowledge store settings). +- **The whole wizard** — expand the drawer's "Re-run setup wizard" accordion and click **Run wizard now**. Your current values pre-fill every step. +- **Autostart** — toggle it off in the wizard or the drawer; the LaunchAgent is removed and the plist file deleted. ## Where to go next - [Write your first tool](/tutorials/first-tool) — wire a custom LangChain tool into the loop +- [Customize & deploy](/guides/customize-and-deploy) — fork the template, rename throughout, ship a GHCR image - [Add a custom skill](/guides/add-a-skill) — expose the new behaviour on the A2A agent card -- [Deploy via GHCR](/guides/deploy) — get Watchtower auto-deploying your merges diff --git a/graph/config.py b/graph/config.py index c8c2601..00ae1a8 100644 --- a/graph/config.py +++ b/graph/config.py @@ -51,6 +51,26 @@ class LangGraphConfig: embed_model: str = "qwen3-embedding" knowledge_top_k: int = 5 + # Identity — captured by the setup wizard, editable via the drawer. + # ``identity_name`` falls back to the AGENT_NAME env var at runtime; + # the YAML value wins when both are set so per-fork customization + # survives image rebuilds. ``operator`` is the human the agent thinks + # it's talking to — injected into the system prompt when non-empty. + identity_name: str = "protoagent" + identity_operator: str = "" + + # A2A bearer token — blank = open mode (local dev). Writing a token + # here makes the A2A handler require ``Authorization: Bearer `` + # on every request and advertises the bearer scheme on the agent card. + # Kept in YAML rather than env so the drawer can manage it. + auth_token: str = "" + + # OS-level autostart — ``True`` means the server launches on user + # login (macOS LaunchAgent today; Linux/Windows TBD). Managed by + # ``autostart.py``; the field here is the source of truth for + # whether the plist should exist. + autostart_on_boot: bool = False + @classmethod def from_yaml(cls, path: str | Path) -> "LangGraphConfig": """Load config from YAML file. Falls back to defaults if absent.""" @@ -65,6 +85,9 @@ def from_yaml(cls, path: str | Path) -> "LangGraphConfig": subagents = data.get("subagents", {}) middleware = data.get("middleware", {}) knowledge = data.get("knowledge", {}) + identity = data.get("identity", {}) + auth = data.get("auth", {}) + runtime = data.get("runtime", {}) config = cls( model_provider=model.get("provider", cls.model_provider), @@ -80,6 +103,10 @@ def from_yaml(cls, path: str | Path) -> "LangGraphConfig": knowledge_db_path=knowledge.get("db_path", cls.knowledge_db_path), embed_model=knowledge.get("embed_model", cls.embed_model), knowledge_top_k=knowledge.get("top_k", cls.knowledge_top_k), + identity_name=identity.get("name", cls.identity_name), + identity_operator=identity.get("operator", cls.identity_operator), + auth_token=auth.get("token", cls.auth_token), + autostart_on_boot=runtime.get("autostart_on_boot", cls.autostart_on_boot), ) for name in ("worker",): diff --git a/graph/config_io.py b/graph/config_io.py new file mode 100644 index 0000000..24a44ca --- /dev/null +++ b/graph/config_io.py @@ -0,0 +1,396 @@ +"""Config I/O for the live-edit drawer in chat_ui.py. + +Three jobs: + +1. **YAML round-trip** that preserves comments and unknown keys in + ``config/langgraph-config.yaml``. ``LangGraphConfig.from_yaml`` + silently drops anything it doesn't know about, so writing back via + a freshly-constructed dataclass would wipe fork-added sections + (e.g. the ``memory`` / ``skills`` blocks the template already + ships). We use ruamel.yaml when available for comment preservation; + PyYAML is the fallback. + +2. **Two-location SOUL.md handling.** The runtime reads + ``/sandbox/SOUL.md`` (populated by ``entrypoint.sh`` at container + start). The source-of-truth lives at ``config/SOUL.md`` in the + repo. Drawer edits write to both so container restarts preserve + the change and local-dev runs without a ``/sandbox`` directory + still pick up the edit. + +3. **Gateway introspection.** ``list_gateway_models`` hits + ``{api_base}/models`` so the drawer's model dropdown reflects + whatever the connected LiteLLM gateway (or OpenAI-compat endpoint) + actually exposes — no hardcoded list to drift out of sync. +""" + +from __future__ import annotations + +import logging +import os +from io import StringIO +from pathlib import Path +from typing import Any + +from graph.config import LangGraphConfig + +log = logging.getLogger("protoagent.config_io") + +REPO_ROOT = Path(__file__).parent.parent +CONFIG_YAML_PATH = REPO_ROOT / "config" / "langgraph-config.yaml" +SOUL_SOURCE_PATH = REPO_ROOT / "config" / "SOUL.md" +SOUL_RUNTIME_PATH = Path("/sandbox/SOUL.md") + +# Setup wizard state. +# Presence of this (empty) marker file = wizard has been run and the +# server should boot straight into the chat UI. Absence = show the +# wizard on first page load. Lives in ``config/`` so a Docker volume +# mount at /opt//config persists setup across container runs. +SETUP_MARKER_PATH = REPO_ROOT / "config" / ".setup-complete" + +# SOUL.md starter templates. The wizard offers these as presets the +# user can pick then edit before saving. Adding a new file here +# automatically makes it a choice — no registry to update. +PRESETS_DIR = REPO_ROOT / "config" / "soul-presets" + + +# --------------------------------------------------------------------------- +# YAML round-trip +# --------------------------------------------------------------------------- + +try: + from ruamel.yaml import YAML # type: ignore + + _ruamel = YAML(typ="rt") + _ruamel.preserve_quotes = True + _ruamel.indent(mapping=2, sequence=4, offset=2) + _HAS_RUAMEL = True +except ImportError: + _HAS_RUAMEL = False + + +def load_yaml_doc(path: Path = CONFIG_YAML_PATH) -> Any: + """Load the config YAML as a mutable document. + + With ruamel: returns a CommentedMap that preserves comments + + key order on subsequent dump. Without: returns a plain dict and + comments are lost on next save (a warning is logged once per + save so the operator knows). + """ + if not path.exists(): + return {} if not _HAS_RUAMEL else _ruamel.load("{}\n") + + with open(path) as f: + if _HAS_RUAMEL: + return _ruamel.load(f) or _ruamel.load("{}\n") + import yaml + return yaml.safe_load(f) or {} + + +def save_yaml_doc(doc: Any, path: Path = CONFIG_YAML_PATH) -> None: + """Persist the document. Creates parent dirs if needed.""" + path.parent.mkdir(parents=True, exist_ok=True) + if _HAS_RUAMEL: + with open(path, "w") as f: + _ruamel.dump(doc, f) + return + + log.warning( + "ruamel.yaml not installed — YAML comments in %s will not be " + "preserved on save. Add `ruamel.yaml>=0.18` to requirements.txt " + "to fix.", path, + ) + import yaml + with open(path, "w") as f: + yaml.safe_dump(doc, f, sort_keys=False, default_flow_style=False) + + +# --------------------------------------------------------------------------- +# Config dict <-> dataclass +# --------------------------------------------------------------------------- + +def config_to_dict(config: LangGraphConfig) -> dict[str, Any]: + """Serialize a LangGraphConfig into the nested dict shape the UI + works with. Mirrors the YAML schema so round-tripping is trivial. + """ + return { + "model": { + "provider": config.model_provider, + "name": config.model_name, + "api_base": config.api_base, + "api_key": config.api_key, + "temperature": config.temperature, + "max_tokens": config.max_tokens, + "max_iterations": config.max_iterations, + }, + "subagents": { + "worker": { + "enabled": config.worker.enabled, + "tools": list(config.worker.tools), + "max_turns": config.worker.max_turns, + }, + }, + "middleware": { + "knowledge": config.knowledge_middleware, + "audit": config.audit_middleware, + "memory": config.memory_middleware, + }, + "knowledge": { + "db_path": config.knowledge_db_path, + "embed_model": config.embed_model, + "top_k": config.knowledge_top_k, + }, + "identity": { + "name": config.identity_name, + "operator": config.identity_operator, + }, + "auth": { + "token": config.auth_token, + }, + "runtime": { + "autostart_on_boot": config.autostart_on_boot, + }, + } + + +def apply_updates_to_yaml(doc: Any, updates: dict[str, Any]) -> Any: + """Merge a nested updates dict into the loaded YAML document. + + Uses __setitem__ on whatever container ruamel loaded (CommentedMap + acts like dict), so comments / key order / unknown sections are + preserved. Keys that don't exist yet get added at the end of the + containing section. + """ + for section, values in updates.items(): + if not isinstance(values, dict): + doc[section] = values + continue + if section not in doc or not isinstance(doc.get(section), dict): + doc[section] = {} + for key, val in values.items(): + if isinstance(val, dict): + if key not in doc[section] or not isinstance(doc[section].get(key), dict): + doc[section][key] = {} + for inner_key, inner_val in val.items(): + doc[section][key][inner_key] = inner_val + else: + doc[section][key] = val + return doc + + +def validate_config_dict(updates: dict[str, Any]) -> tuple[bool, str]: + """Validate without persisting. Returns (ok, error-message). + + Catches type mismatches and obvious range errors before we touch + disk or rebuild the graph. + """ + try: + model = updates.get("model", {}) + temp = float(model.get("temperature", 0.2)) + if not 0.0 <= temp <= 2.0: + return False, f"temperature must be 0.0-2.0, got {temp}" + max_tokens = int(model.get("max_tokens", 4096)) + if max_tokens < 1: + return False, f"max_tokens must be >= 1, got {max_tokens}" + max_iter = int(model.get("max_iterations", 50)) + if max_iter < 1: + return False, f"max_iterations must be >= 1, got {max_iter}" + + worker = updates.get("subagents", {}).get("worker", {}) + if worker: + max_turns = int(worker.get("max_turns", 20)) + if max_turns < 1: + return False, f"worker.max_turns must be >= 1, got {max_turns}" + tools = worker.get("tools", []) + if not isinstance(tools, list): + return False, "worker.tools must be a list" + + knowledge = updates.get("knowledge", {}) + if knowledge: + top_k = int(knowledge.get("top_k", 5)) + if top_k < 1: + return False, f"knowledge.top_k must be >= 1, got {top_k}" + except (TypeError, ValueError) as e: + return False, f"config validation: {e}" + return True, "" + + +# --------------------------------------------------------------------------- +# SOUL.md +# --------------------------------------------------------------------------- + + +def read_soul() -> str: + """Return the current persona text. + + Prefers the runtime path (``/sandbox/SOUL.md``) since that's what + ``graph/prompts.build_system_prompt`` actually reads; falls back + to the repo source so local-dev picks it up even when no sandbox + volume is mounted. + """ + for path in (SOUL_RUNTIME_PATH, SOUL_SOURCE_PATH): + if path.exists(): + return path.read_text(encoding="utf-8") + return "" + + +def write_soul(text: str) -> list[Path]: + """Write persona text to every reachable SOUL.md path. + + Always writes the repo source (``config/SOUL.md``). Additionally + writes the runtime path if its parent directory exists — in the + container ``/sandbox`` is created by Dockerfile; in local dev it + usually isn't, so we skip quietly instead of erroring. + + Returns the paths that were written for UI feedback. + """ + written: list[Path] = [] + SOUL_SOURCE_PATH.parent.mkdir(parents=True, exist_ok=True) + SOUL_SOURCE_PATH.write_text(text, encoding="utf-8") + written.append(SOUL_SOURCE_PATH) + + if SOUL_RUNTIME_PATH.parent.exists(): + SOUL_RUNTIME_PATH.write_text(text, encoding="utf-8") + written.append(SOUL_RUNTIME_PATH) + + return written + + +# --------------------------------------------------------------------------- +# Gateway model discovery +# --------------------------------------------------------------------------- + + +def list_gateway_models( + api_base: str, + api_key: str = "", + timeout: float = 10.0, +) -> tuple[list[str], str]: + """Fetch the model list from ``{api_base}/models``. + + Works against any OpenAI-compatible endpoint — LiteLLM gateway, + OpenAI proper, vLLM, Ollama with the OpenAI adapter. Returns + ``(model_ids, error_message)``. On success ``error_message`` is + empty; on failure model_ids is empty and the message is human- + readable. + """ + import httpx + + if not api_base: + return [], "api_base is empty" + + key = api_key or os.environ.get("OPENAI_API_KEY", "") + url = api_base.rstrip("/") + "/models" + headers = {} + if key: + headers["Authorization"] = f"Bearer {key}" + + try: + with httpx.Client(timeout=timeout) as client: + resp = client.get(url, headers=headers) + except httpx.HTTPError as e: + return [], f"connection failed: {e}" + + if resp.status_code >= 400: + detail = resp.text[:200] if resp.text else "" + return [], f"HTTP {resp.status_code} from {url}: {detail}" + + try: + data = resp.json() + except ValueError: + return [], f"non-JSON response from {url}" + + items = data.get("data") if isinstance(data, dict) else None + if not isinstance(items, list): + return [], f"unexpected shape from {url} — no 'data' array" + + ids: list[str] = [] + for item in items: + if isinstance(item, dict): + model_id = item.get("id") or item.get("name") + if isinstance(model_id, str): + ids.append(model_id) + ids.sort() + return ids, "" + + +# --------------------------------------------------------------------------- +# Tool registry introspection +# --------------------------------------------------------------------------- + + +def list_available_tools(knowledge_store: Any = None) -> list[str]: + """Return every tool name the runtime would wire into the graph.""" + from tools.lg_tools import get_all_tools + + return [t.name for t in get_all_tools(knowledge_store)] + + +# --------------------------------------------------------------------------- +# Setup wizard state +# --------------------------------------------------------------------------- + + +def is_setup_complete() -> bool: + """True once the wizard has been completed at least once. + + Checked at server boot to decide wizard-first vs chat-first + rendering. Don't read the YAML to infer this — a fork that ships + with a baked-in config still needs to walk a user through the + wizard on first run. + """ + return SETUP_MARKER_PATH.exists() + + +def mark_setup_complete() -> None: + """Write the marker so subsequent boots skip the wizard. + + Idempotent — safe to call repeatedly. The file is empty; only + its presence matters. + """ + SETUP_MARKER_PATH.parent.mkdir(parents=True, exist_ok=True) + SETUP_MARKER_PATH.touch() + + +def reset_setup() -> None: + """Remove the marker, forcing the wizard to run on next page load. + + Exposed to the drawer as a "Re-run setup" action. Leaves the YAML + + SOUL.md in place so the wizard pre-populates with the current + values — reset is for revisiting choices, not for wiping config. + """ + SETUP_MARKER_PATH.unlink(missing_ok=True) + + +# --------------------------------------------------------------------------- +# SOUL.md presets +# --------------------------------------------------------------------------- + + +def list_soul_presets() -> list[str]: + """Return preset names (file stems, no extension) sorted alphabetically. + + The wizard's preset dropdown reads from this — dropping a new + markdown file into ``config/soul-presets/`` makes it a choice + without code changes. + """ + if not PRESETS_DIR.exists(): + return [] + return sorted(p.stem for p in PRESETS_DIR.glob("*.md")) + + +def read_soul_preset(name: str) -> str: + """Return the preset's content. + + Returns empty string for an unknown name rather than raising — + the wizard treats that as "no preset selected, blank canvas". + + Path-traversal guarded: the resolved target must live inside + ``PRESETS_DIR``. A name like ``"../secret"`` would otherwise + escape the presets directory and read arbitrary ``.md`` files + anywhere the process can reach. + """ + presets_root = PRESETS_DIR.resolve() + candidate = (PRESETS_DIR / f"{name}.md").resolve() + if presets_root not in candidate.parents or not candidate.is_file(): + return "" + return candidate.read_text(encoding="utf-8") diff --git a/graph/llm.py b/graph/llm.py index 70f3fe0..f364cd8 100644 --- a/graph/llm.py +++ b/graph/llm.py @@ -32,4 +32,14 @@ def create_llm(config: LangGraphConfig) -> ChatOpenAI: # AIMessageChunks with usage_metadata=None and we can't emit # the cost-v1 DataPart on the terminal artifact. stream_usage=True, + # Cloudflare's managed WAF blocks the OpenAI SDK's default + # `OpenAI/Python ` User-Agent (observed 403 "Your request + # was blocked" against api.proto-labs.ai). Override with the + # same identifier `tools/lg_tools.py` uses for outbound fetches + # so every protoAgent egress presents a consistent, allowlisted + # UA. If you self-host behind a different edge, this is safe to + # keep. + default_headers={ + "User-Agent": "protoAgent/0.1 (+https://github.com/protoLabsAI/protoAgent)", + }, ) diff --git a/graph/prompts.py b/graph/prompts.py index b948909..b26e296 100644 --- a/graph/prompts.py +++ b/graph/prompts.py @@ -49,8 +49,13 @@ def build_system_prompt( """ parts = [] - # 1. Identity + # 1. Identity — prefer the runtime workspace (entrypoint.sh copies + # config/SOUL.md to /sandbox/SOUL.md at container start). Fall back + # to the repo source so local `python server.py` runs without a + # /sandbox mount still pick up persona edits made via the drawer. soul = _read_file(f"{workspace}/SOUL.md") + if not soul: + soul = _read_file(Path(__file__).parent.parent / "config" / "SOUL.md") if soul: parts.append(soul) else: diff --git a/requirements.txt b/requirements.txt index 9cb6ff6..30ef46d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ uvicorn>=0.30 langfuse>=3.0 prometheus-client>=0.20 pyyaml>=6.0 +ruamel.yaml>=0.18 # round-trip YAML that preserves comments in config/langgraph-config.yaml when the drawer writes back edits websockets>=12.0 # LangGraph agent backend diff --git a/server.py b/server.py index 2221b11..75f9692 100644 --- a/server.py +++ b/server.py @@ -55,23 +55,366 @@ _graph = None # LangGraph compiled graph _graph_config = None # LangGraphConfig _checkpointer = None # MemorySaver for session persistence +_active_port = 7870 # populated by _main() — the port this process is actually bound to. + # Read by the autostart installer so the LaunchAgent reboots + # on the same port the operator launched with, not the default. def _init_langgraph_agent(): - """Initialize the LangGraph agent backend.""" + """Initialize the LangGraph backend — setup-aware. + + Always loads the config + checkpointer so the wizard and drawer + can introspect what's on disk. The compiled graph is only built + when the setup wizard has been completed (``.setup-complete`` + marker present). This lets the server boot cleanly on a fresh + clone with no model credentials — the wizard drives the user to + provide them, then triggers a reload. + """ global _graph, _graph_config, _checkpointer - from graph.agent import create_agent_graph from graph.config import LangGraphConfig + from graph.config_io import is_setup_complete from langgraph.checkpoint.memory import MemorySaver config_path = Path(__file__).parent / "config" / "langgraph-config.yaml" _graph_config = LangGraphConfig.from_yaml(config_path) _checkpointer = MemorySaver() + + if not is_setup_complete(): + _graph = None + log.info( + "Setup wizard has not been completed — graph not compiled. " + "Open the UI to finish setup.", + ) + return + + from graph.agent import create_agent_graph + _graph = create_agent_graph(_graph_config) log.info("LangGraph agent initialized (model: %s)", _graph_config.model_name) +def _reload_langgraph_agent() -> tuple[bool, str]: + """Rebuild the compiled graph from the latest config YAML. + + Called by the drawer's Save & Reload action and the + ``/api/config/reload`` endpoint. Preserves the existing + ``_checkpointer`` so active session threads stay addressable + — a fresh MemorySaver would orphan every in-flight thread. + + Rebinding ``_graph`` is atomic in CPython; in-flight + ``astream_events`` iterators hold their own reference to the + prior graph and finish cleanly on the old instance. + + If the setup marker is absent this returns early without + compiling — the wizard is still in front of the user, so there + is nothing to hot-swap yet. + """ + global _graph, _graph_config + + from graph.agent import create_agent_graph + from graph.config import LangGraphConfig + from graph.config_io import is_setup_complete + + config_path = Path(__file__).parent / "config" / "langgraph-config.yaml" + try: + new_config = LangGraphConfig.from_yaml(config_path) + except Exception as e: + log.exception("[reload] config load failed") + return False, f"config load failed: {e}" + + # Build the graph FIRST (when setup is complete) — only commit + # runtime state after the rebuild succeeds. Doing the swap first + # would leave the process serving the prior compiled _graph under + # fresh _graph_config + rotated bearer auth on failure — the + # metrics / card / auth all de-sync from what's actually running. + if is_setup_complete(): + try: + new_graph = create_agent_graph(new_config) + except Exception as e: + log.exception("[reload] graph rebuild failed") + return False, f"graph rebuild failed: {e}" + else: + new_graph = None + + # Commit: config → A2A bearer → graph. All three reference the + # same ``new_config`` so they stay consistent. + _graph_config = new_config + try: + from a2a_handler import set_a2a_token + + set_a2a_token(new_config.auth_token or None) + except ImportError: + # a2a_handler not yet imported (e.g. during early-boot reload + # before _main wires routes) — harmless. + pass + _graph = new_graph + + if new_graph is None: + log.info("[reload] setup not complete — config reloaded, graph not compiled") + return True, "config reloaded • setup not complete" + + log.info("LangGraph agent reloaded (model: %s)", _graph_config.model_name) + return True, f"reloaded • model={_graph_config.model_name}" + + +def _sync_autostart_with_config(config: dict | None) -> str | None: + """Align the OS autostart artifact with the YAML runtime flag. + + Returns a short status string to append to the caller's message + log, or ``None`` when the config doesn't touch the runtime + section. Shared by ``finish_setup`` (wizard path) and + ``_apply_settings_changes`` (drawer path) so both surfaces + produce the same side effect when the checkbox flips. + """ + if not (config and "runtime" in config): + return None + want = bool(config.get("runtime", {}).get("autostart_on_boot", False)) + + try: + from autostart import install_autostart, uninstall_autostart + + as_name = ( + config.get("identity", {}).get("name") + or (_graph_config.identity_name if _graph_config else "") + or "protoagent" + ) + if want: + ok, msg = install_autostart(agent_name=as_name, port=_active_port) + else: + ok, msg = uninstall_autostart(agent_name=as_name) + except Exception as e: + log.exception("[autostart] sync raised") + return f"autostart failed: {e}" + + if not ok: + log.warning("[autostart] sync failed: %s", msg) + return f"autostart: {msg}" + + +def _apply_settings_changes( + config: dict | None = None, + soul: str | None = None, +) -> tuple[bool, list[str]]: + """Persist config YAML + SOUL.md then reload the graph once. + + Passing ``None`` for either argument skips that write — a bare + call with both None acts as a pure reload (useful for picking up + external file edits). + """ + from graph.config_io import ( + apply_updates_to_yaml, + load_yaml_doc, + save_yaml_doc, + validate_config_dict, + write_soul, + ) + + messages: list[str] = [] + + if config is not None: + ok, err = validate_config_dict(config) + if not ok: + return False, [f"validation: {err}"] + try: + doc = load_yaml_doc() + apply_updates_to_yaml(doc, config) + save_yaml_doc(doc) + messages.append("config saved") + except Exception as e: + log.exception("[config] YAML write failed") + return False, [f"config write: {e}"] + + if soul is not None: + try: + paths = write_soul(soul) + messages.append(f"SOUL saved ({len(paths)} path{'s' if len(paths) != 1 else ''})") + except Exception as e: + log.exception("[config] SOUL write failed") + return False, [f"soul write: {e}"] + + # Drawer toggles of runtime.autostart_on_boot ride this path, + # not the wizard's finish_setup, so the LaunchAgent plist has + # to be installed/removed here too. + as_msg = _sync_autostart_with_config(config) + if as_msg: + messages.append(as_msg) + + ok, reload_msg = _reload_langgraph_agent() + messages.append(reload_msg) + return ok, messages + + +def _build_settings_callbacks() -> dict[str, Any]: + """Callbacks consumed by the Gradio Configuration drawer + wizard.""" + from graph.config_io import ( + config_to_dict, + is_setup_complete, + list_available_tools, + list_gateway_models, + list_soul_presets, + mark_setup_complete, + read_soul, + read_soul_preset, + reset_setup, + ) + + def get_config() -> dict[str, Any]: + return config_to_dict(_graph_config) + + def list_models(api_base: str = "", api_key: str = "") -> tuple[list[str], str]: + """UI-friendly model lookup. + + Uses the form-local api_base/api_key when the user is trying a + different endpoint before saving; falls back to the currently + loaded graph config so the initial render works without + arguments. + """ + base = api_base or (_graph_config.api_base if _graph_config else "") + key = api_key or (_graph_config.api_key if _graph_config else "") + return list_gateway_models(base, key) + + def save_all(config: dict | None, soul: str | None) -> tuple[bool, str]: + ok, messages = _apply_settings_changes(config=config, soul=soul) + return ok, " • ".join(messages) + + def finish_setup(config: dict | None, soul: str | None) -> tuple[bool, str]: + """Wizard terminal action — write everything, mark complete, reload. + + Ordering matters: + + 1. Write config YAML + SOUL.md (no reload yet). + 2. ``mark_setup_complete()`` — flip the marker BEFORE the + reload so ``_reload_langgraph_agent`` actually compiles + the graph. Doing it after means the reload sees + setup-incomplete and stays ``_graph = None``. + 3. Sync autostart (LaunchAgent plist is independent of the + graph, so it can happen any time after the config is + written). + 4. Reload — marker present, graph compiles, chat works. + + Returns a single status string joining per-step messages. + """ + from graph.config_io import ( + apply_updates_to_yaml, + load_yaml_doc, + save_yaml_doc, + validate_config_dict, + write_soul, + ) + + messages: list[str] = [] + + # 1. Persist + if config is not None: + ok, err = validate_config_dict(config) + if not ok: + return False, f"validation: {err}" + try: + doc = load_yaml_doc() + apply_updates_to_yaml(doc, config) + save_yaml_doc(doc) + messages.append("config saved") + except Exception as e: + log.exception("[setup] YAML write failed: %s", e) + return False, f"config write: {e}" + + if soul is not None: + try: + paths = write_soul(soul) + messages.append(f"SOUL saved ({len(paths)} path{'s' if len(paths) != 1 else ''})") + except Exception as e: + log.exception("[setup] SOUL write failed: %s", e) + return False, f"soul write: {e}" + + # 2. Flip the marker — MUST be before reload so the graph builds + mark_setup_complete() + messages.append("setup marked complete") + + # 3. Autostart sync (shared helper — drawer path runs the same) + as_msg = _sync_autostart_with_config(config) + if as_msg: + messages.append(as_msg) + + # 4. Reload — now picks up setup_complete=True and compiles. + # On failure, roll back the marker so the next page load + # drops the user back into the wizard instead of landing + # them in the chat UI with the "setup required" fallback + # and no obvious way to retry. + ok, reload_msg = _reload_langgraph_agent() + messages.append(reload_msg) + if not ok: + reset_setup() + messages.append("setup marker rolled back — re-run the wizard after fixing the error above") + + return ok, " • ".join(messages) + + def restart_setup() -> str: + """Drawer action — delete the marker so the wizard runs again.""" + reset_setup() + log.info("[setup] marker removed — wizard will run on next page load") + return "setup marker removed • reload the page to run the wizard" + + def autostart_info() -> dict[str, Any]: + """Report platform support + current on-disk state. The drawer + uses this to render the toggle correctly and to print the + plist path for debugging.""" + try: + from autostart import autostart_status + + name = (_graph_config.identity_name if _graph_config else "") or "protoagent" + return autostart_status(name) + except Exception as e: + return {"supported": False, "installed": False, "reason": str(e)} + + def toggle_autostart(enabled: bool) -> tuple[bool, str]: + """Install or uninstall the OS autostart artifact, mirroring + the YAML field. Called from the drawer's checkbox handler so + toggling takes effect immediately without waiting for Save.""" + try: + from autostart import install_autostart, uninstall_autostart + + name = (_graph_config.identity_name if _graph_config else "") or "protoagent" + if enabled: + return install_autostart(agent_name=name, port=_active_port) + return uninstall_autostart(agent_name=name) + except Exception as e: + return False, str(e) + + return { + "get_config": get_config, + "get_soul": read_soul, + "list_models": list_models, + "list_tools": list_available_tools, + "list_soul_presets": list_soul_presets, + "read_soul_preset": read_soul_preset, + "save_all": save_all, + "finish_setup": finish_setup, + "restart_setup": restart_setup, + "is_setup_complete": is_setup_complete, + "autostart_info": autostart_info, + "toggle_autostart": toggle_autostart, + } + + +def _setup_required_message() -> list[dict[str, Any]]: + """Returned by chat endpoints when the wizard hasn't been run. + + The Gradio UI hides the chat pane until setup completes, but the + HTTP /api/chat, OpenAI-compat, and A2A endpoints don't know the + UI state — so they emit a plain-text "finish setup first" + message instead of 500ing on ``_graph is None``. + """ + return [{ + "role": "assistant", + "content": ( + "**Setup required.** The setup wizard has not been completed. " + "Open the UI and finish the wizard, or POST the completed config " + "to `/api/config/setup` before calling chat endpoints." + ), + }] + + # --------------------------------------------------------------------------- # Chat backend — called by the A2A handler + OpenAI-compat endpoint # --------------------------------------------------------------------------- @@ -85,6 +428,8 @@ async def chat(message: str, session_id: str) -> list[dict[str, Any]]: capture tool events and emit the cost-v1 DataPart on the terminal artifact. """ + if _graph is None: + return _setup_required_message() return await _chat_langgraph(message, session_id) @@ -120,6 +465,10 @@ async def _chat_langgraph_stream( if caller_trace.get("spanId"): trace_meta["caller_span_id"] = caller_trace["spanId"] + if _graph is None: + yield ("error", "setup required — finish the setup wizard before calling A2A endpoints") + return + async with tracing.trace_session( session_id=session_id, name="a2a-stream", @@ -248,13 +597,28 @@ async def _chat_langgraph(message: str, session_id: str) -> list[dict[str, Any]] # Agent card — EDIT THIS when forking # --------------------------------------------------------------------------- -AGENT_NAME = os.environ.get("AGENT_NAME", "protoagent") +AGENT_NAME_ENV = os.environ.get("AGENT_NAME", "protoagent") + + +def agent_name() -> str: + """Resolve the active agent name. + + Preference order: wizard-set ``identity.name`` in YAML (when loaded + and non-placeholder) → ``AGENT_NAME`` env var → ``"protoagent"``. + The agent card, OpenAI-compat model id, and chat header all call + this so a wizard rename propagates without a restart. The + Prometheus metric prefix and ``_API_KEY`` env name are + set at boot and still require a restart (see docs). + """ + if _graph_config and _graph_config.identity_name and _graph_config.identity_name != "protoagent": + return _graph_config.identity_name + return AGENT_NAME_ENV def _build_security_schemes() -> dict: """Return securitySchemes dict, adding bearer only when A2A_AUTH_TOKEN is set.""" schemes: dict = {"apiKey": {"type": "apiKey", "in": "header", "name": "X-API-Key"}} - if os.environ.get("A2A_AUTH_TOKEN", ""): + if os.environ.get("A2A_AUTH_TOKEN", "") or (_graph_config and _graph_config.auth_token): schemes["bearer"] = {"type": "http", "scheme": "bearer"} return schemes @@ -281,7 +645,7 @@ def _build_agent_card(host: str) -> dict: it only if you strip the usage-capture. """ return { - "name": AGENT_NAME, + "name": agent_name(), "description": ( "protoAgent template — A2A-compliant LangGraph agent. " "Replace this description with your agent's actual purpose." @@ -326,10 +690,13 @@ def _build_agent_card(host: str) -> dict: # --------------------------------------------------------------------------- def _main(): - parser = argparse.ArgumentParser(description=f"{AGENT_NAME} — protoAgent server") + global _active_port + + parser = argparse.ArgumentParser(description=f"{AGENT_NAME_ENV} — protoAgent server") parser.add_argument("--port", type=int, default=7870) parser.add_argument("--config", type=str, default=None) args = parser.parse_args() + _active_port = args.port # Initialize observability import tracing @@ -343,10 +710,11 @@ def _main(): from chat_ui import create_chat_app blocks = create_chat_app( chat_fn=chat, - title=AGENT_NAME, + title=agent_name(), subtitle="protoAgent", placeholder="Send a message...", pwa=True, + settings=_build_settings_callbacks(), ) import gradio as gr @@ -356,7 +724,7 @@ def _main(): from fastapi.staticfiles import StaticFiles from pydantic import BaseModel as PydanticBaseModel - fastapi_app = FastAPI(title=f"{AGENT_NAME} — protoAgent") + fastapi_app = FastAPI(title=f"{agent_name()} — protoAgent") # --- Chat API ----------------------------------------------------------- class ChatRequest(PydanticBaseModel): @@ -369,6 +737,80 @@ async def _api_chat(req: ChatRequest): parts = [m["content"] for m in result if m.get("role") == "assistant" and m.get("content")] return {"response": "\n\n".join(parts), "messages": result} + # --- Live config / SOUL editing ---------------------------------------- + # GET returns the current config + persona so external clients (the + # Gradio drawer is one; curl is another) can mirror what's running. + # POST accepts partial edits — pass only the sections you want to + # change. Reload is automatic. + class ConfigReloadRequest(PydanticBaseModel): + config: dict | None = None + soul: str | None = None + + @fastapi_app.get("/api/config") + async def _api_get_config(): + from graph.config_io import config_to_dict, read_soul + return { + "config": config_to_dict(_graph_config), + "soul": read_soul(), + } + + @fastapi_app.post("/api/config") + async def _api_post_config(req: ConfigReloadRequest): + ok, messages = _apply_settings_changes(config=req.config, soul=req.soul) + return {"ok": ok, "messages": messages} + + class ModelsProbeRequest(PydanticBaseModel): + api_base: str = "" + api_key: str = "" + + @fastapi_app.post("/api/config/models") + async def _api_list_models(req: ModelsProbeRequest | None = None): + """Fetch the gateway's model list. + + POST (body) not GET (query) so the caller's API key doesn't + end up in browser history, reverse-proxy access logs, or the + uvicorn request log. A blank body falls back to whatever key + and base are stored in the current config — useful for the + drawer's initial render where there's nothing to POST yet. + """ + from graph.config_io import list_gateway_models + + body = req or ModelsProbeRequest() + base = body.api_base or (_graph_config.api_base if _graph_config else "") + key = body.api_key or (_graph_config.api_key if _graph_config else "") + models, error = list_gateway_models(base, key) + return {"models": models, "error": error} + + # --- Setup wizard state ------------------------------------------------- + @fastapi_app.get("/api/config/setup-status") + async def _api_setup_status(): + from graph.config_io import is_setup_complete, list_soul_presets + return { + "setup_complete": is_setup_complete(), + "presets": list_soul_presets(), + } + + @fastapi_app.post("/api/config/setup") + async def _api_finish_setup(req: ConfigReloadRequest): + """Terminal wizard action over HTTP. Same semantics as the + drawer's ``finish_setup`` callback — writes everything, marks + setup complete, optionally installs autostart, then reloads. + """ + callbacks = _build_settings_callbacks() + ok, msg = callbacks["finish_setup"](req.config, req.soul) + return {"ok": ok, "message": msg} + + @fastapi_app.post("/api/config/reset-setup") + async def _api_reset_setup(): + from graph.config_io import reset_setup + reset_setup() + return {"ok": True, "message": "setup marker removed"} + + @fastapi_app.get("/api/config/presets/{name}") + async def _api_read_preset(name: str): + from graph.config_io import read_soul_preset + return {"name": name, "content": read_soul_preset(name)} + # --- OpenAI-compatible chat completions -------------------------------- # Lets this agent be registered as a model in the LiteLLM gateway / # OpenWebUI without any protocol adapter. @@ -386,19 +828,19 @@ async def _openai_chat_completions(req: dict): parts = [m["content"] for m in result if m.get("role") == "assistant" and m.get("content")] content = "\n\n".join(parts) created = int(time.time()) - completion_id = f"{AGENT_NAME}-{session_id}" + completion_id = f"{agent_name()}-{session_id}" if stream: async def _stream(): chunk = { "id": completion_id, "object": "chat.completion.chunk", - "created": created, "model": AGENT_NAME, + "created": created, "model": agent_name(), "choices": [{"index": 0, "delta": {"role": "assistant", "content": content}, "finish_reason": None}], } yield f"data: {json.dumps(chunk)}\n\n" done_chunk = { "id": completion_id, "object": "chat.completion.chunk", - "created": created, "model": AGENT_NAME, + "created": created, "model": agent_name(), "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}], } yield f"data: {json.dumps(done_chunk)}\n\n" @@ -407,7 +849,7 @@ async def _stream(): return { "id": completion_id, "object": "chat.completion", - "created": created, "model": AGENT_NAME, + "created": created, "model": agent_name(), "choices": [{ "index": 0, "message": {"role": "assistant", "content": content}, @@ -420,14 +862,14 @@ async def _stream(): async def _openai_models(): return { "object": "list", - "data": [{"id": AGENT_NAME, "object": "model", "created": 1774600000, "owned_by": "protolabs"}], + "data": [{"id": agent_name(), "object": "model", "created": 1774600000, "owned_by": "protolabs"}], } # --- A2A agent card ----------------------------------------------------- @fastapi_app.get("/.well-known/agent.json", include_in_schema=False) @fastapi_app.get("/.well-known/agent-card.json", include_in_schema=False) async def _a2a_agent_card(request: Request): - host = request.headers.get("host", f"{AGENT_NAME}:7870") + host = request.headers.get("host", f"{agent_name()}:7870") return JSONResponse( content=_build_agent_card(host), headers={"Cache-Control": "public, max-age=60"}, @@ -437,12 +879,24 @@ async def _a2a_agent_card(request: Request): # JSON-RPC + REST, streaming, polling, cancel, push webhooks. from a2a_handler import register_a2a_routes - auth_env = f"{AGENT_NAME.upper()}_API_KEY" + # Two independent A2A auth surfaces: + # + # 1. **Bearer** (modern) — ``auth.token`` in YAML, captured by the + # wizard as "A2A bearer token". Passed via the ``auth_token`` + # argument, with ``A2A_AUTH_TOKEN`` env as fallback. Updates + # from a wizard/drawer-driven reload propagate live through + # ``a2a_handler.set_a2a_token`` — no restart needed. + # 2. **X-API-Key** (legacy) — ``_API_KEY`` env var, threaded + # through the ``api_key`` argument. Kept env-driven; forks that + # want it YAML-configurable can add a field later. + yaml_bearer = _graph_config.auth_token if _graph_config else "" + auth_env = f"{AGENT_NAME_ENV.upper()}_API_KEY" register_a2a_routes( app=fastapi_app, chat_stream_fn_factory=_chat_langgraph_stream, chat_fn=chat, api_key=os.environ.get(auth_env, ""), + auth_token=yaml_bearer, agent_card={}, register_card_route=False, # card is already served above ) @@ -486,7 +940,7 @@ async def _serve_sw() -> FileResponse: favicon_path=str(static_dir / "favicon.svg") if (static_dir / "favicon.svg").exists() else None, ) - log.info("Starting %s on http://0.0.0.0:%d", AGENT_NAME, args.port) + log.info("Starting %s on http://0.0.0.0:%d", agent_name(), args.port) uvicorn.run(app, host="0.0.0.0", port=args.port) diff --git a/tests/test_config_io.py b/tests/test_config_io.py new file mode 100644 index 0000000..25a7472 --- /dev/null +++ b/tests/test_config_io.py @@ -0,0 +1,440 @@ +"""Tests for graph/config_io.py — the plumbing behind the live-edit drawer. + +Critical invariants: + +- YAML round-trip preserves unknown top-level sections (forks add + these; silently dropping them on save would be a footgun). +- ``apply_updates_to_yaml`` mutates only the keys you pass and leaves + siblings alone. +- ``validate_config_dict`` catches range / type errors before disk + writes. +- ``read_soul`` / ``write_soul`` handles the dual-location contract + (/sandbox/SOUL.md as runtime, config/SOUL.md as source). +- ``list_gateway_models`` returns a readable error message rather + than raising — the UI shows this string directly. +""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock, patch + +import httpx +import pytest + + +# ── YAML round-trip ────────────────────────────────────────────────────────── + + +def test_yaml_round_trip_preserves_unknown_keys(tmp_path: Path) -> None: + """Forks add custom top-level sections (the shipped YAML already + has ``memory`` and ``skills`` that the dataclass doesn't model). + Round-tripping through load_yaml_doc + save_yaml_doc must leave + them intact.""" + from graph import config_io + + yaml_path = tmp_path / "langgraph-config.yaml" + yaml_path.write_text( + "model:\n" + " name: test-model\n" + " temperature: 0.5\n" + "memory:\n" + " path: /custom/memory\n" + " max_sessions: 42\n" + "custom_section:\n" + " arbitrary_key: arbitrary_value\n" + ) + + doc = config_io.load_yaml_doc(yaml_path) + config_io.save_yaml_doc(doc, yaml_path) + + reloaded = config_io.load_yaml_doc(yaml_path) + assert reloaded["memory"]["path"] == "/custom/memory" + assert reloaded["memory"]["max_sessions"] == 42 + assert reloaded["custom_section"]["arbitrary_key"] == "arbitrary_value" + + +def test_apply_updates_merges_shallowly(tmp_path: Path) -> None: + """Updating model.temperature must NOT clobber model.name or + other model.* fields.""" + from graph import config_io + + yaml_path = tmp_path / "c.yaml" + yaml_path.write_text( + "model:\n" + " name: original-model\n" + " temperature: 0.1\n" + " api_base: http://original\n" + ) + + doc = config_io.load_yaml_doc(yaml_path) + config_io.apply_updates_to_yaml(doc, {"model": {"temperature": 0.9}}) + config_io.save_yaml_doc(doc, yaml_path) + + reloaded = config_io.load_yaml_doc(yaml_path) + assert reloaded["model"]["name"] == "original-model" + assert reloaded["model"]["api_base"] == "http://original" + assert reloaded["model"]["temperature"] == 0.9 + + +def test_apply_updates_adds_missing_sections(tmp_path: Path) -> None: + from graph import config_io + + yaml_path = tmp_path / "c.yaml" + yaml_path.write_text("model:\n name: x\n") + doc = config_io.load_yaml_doc(yaml_path) + + config_io.apply_updates_to_yaml( + doc, + {"middleware": {"audit": True, "memory": False}}, + ) + + assert doc["middleware"]["audit"] is True + assert doc["middleware"]["memory"] is False + assert doc["model"]["name"] == "x" + + +def test_apply_updates_nested_worker(tmp_path: Path) -> None: + """subagents.worker.tools is a list, subagents.worker.enabled + is a bool — both must land in the right nested slot.""" + from graph import config_io + + yaml_path = tmp_path / "c.yaml" + yaml_path.write_text("subagents:\n worker:\n enabled: false\n") + doc = config_io.load_yaml_doc(yaml_path) + + config_io.apply_updates_to_yaml( + doc, + {"subagents": {"worker": {"enabled": True, "tools": ["echo", "calculator"]}}}, + ) + + assert doc["subagents"]["worker"]["enabled"] is True + assert list(doc["subagents"]["worker"]["tools"]) == ["echo", "calculator"] + + +# ── config_to_dict ─────────────────────────────────────────────────────────── + + +def test_config_to_dict_mirrors_yaml_shape() -> None: + """The UI works with the dict shape; the YAML schema uses the + same paths. Keep them in lockstep so round-tripping through + apply_updates_to_yaml works without path rewrites.""" + from graph.config import LangGraphConfig + from graph.config_io import config_to_dict + + cfg = LangGraphConfig() + d = config_to_dict(cfg) + + # Top-level schema surface — all the sections the YAML exposes. + # Adding a new section here without updating config_to_dict would + # strand fork-added fields outside the drawer's round-trip. + assert set(d.keys()) == { + "model", "subagents", "middleware", "knowledge", + "identity", "auth", "runtime", + } + assert d["model"]["name"] == cfg.model_name + assert d["model"]["temperature"] == cfg.temperature + assert d["subagents"]["worker"]["tools"] == list(cfg.worker.tools) + assert d["middleware"]["audit"] == cfg.audit_middleware + assert d["knowledge"]["top_k"] == cfg.knowledge_top_k + assert d["identity"]["name"] == cfg.identity_name + assert d["auth"]["token"] == cfg.auth_token + assert d["runtime"]["autostart_on_boot"] == cfg.autostart_on_boot + + +# ── validate_config_dict ───────────────────────────────────────────────────── + + +@pytest.mark.parametrize("bad_value,expected_error_fragment", [ + ({"model": {"temperature": 3.0}}, "temperature"), + ({"model": {"temperature": -0.1}}, "temperature"), + ({"model": {"max_tokens": 0}}, "max_tokens"), + ({"model": {"max_iterations": 0}}, "max_iterations"), + ({"subagents": {"worker": {"max_turns": 0}}}, "max_turns"), + ({"subagents": {"worker": {"tools": "not-a-list"}}}, "list"), + ({"knowledge": {"top_k": 0}}, "top_k"), +]) +def test_validate_rejects_bad_values(bad_value, expected_error_fragment): + from graph.config_io import validate_config_dict + ok, err = validate_config_dict(bad_value) + assert not ok + assert expected_error_fragment in err + + +def test_validate_accepts_happy_path(): + from graph.config_io import config_to_dict, validate_config_dict + from graph.config import LangGraphConfig + + ok, err = validate_config_dict(config_to_dict(LangGraphConfig())) + assert ok, err + + +# ── SOUL.md dual-path ──────────────────────────────────────────────────────── + + +def test_read_soul_falls_back_to_source(monkeypatch, tmp_path: Path) -> None: + """When /sandbox/SOUL.md doesn't exist (local dev), fall through + to the repo config dir so drawer edits are still visible.""" + from graph import config_io + + # Point the runtime path at an unreachable location so the source + # fallback is exercised. + fake_runtime = tmp_path / "nonexistent" / "SOUL.md" + fake_source = tmp_path / "SOUL-source.md" + fake_source.write_text("from source", encoding="utf-8") + + monkeypatch.setattr(config_io, "SOUL_RUNTIME_PATH", fake_runtime) + monkeypatch.setattr(config_io, "SOUL_SOURCE_PATH", fake_source) + + assert config_io.read_soul() == "from source" + + +def test_read_soul_prefers_runtime(monkeypatch, tmp_path: Path) -> None: + from graph import config_io + + runtime = tmp_path / "runtime" / "SOUL.md" + runtime.parent.mkdir() + runtime.write_text("runtime wins", encoding="utf-8") + source = tmp_path / "SOUL-source.md" + source.write_text("source loses", encoding="utf-8") + + monkeypatch.setattr(config_io, "SOUL_RUNTIME_PATH", runtime) + monkeypatch.setattr(config_io, "SOUL_SOURCE_PATH", source) + + assert config_io.read_soul() == "runtime wins" + + +def test_write_soul_writes_source_always(monkeypatch, tmp_path: Path) -> None: + """The source-of-truth write (config/SOUL.md) must always succeed; + the runtime write is best-effort (skipped when /sandbox missing).""" + from graph import config_io + + # Runtime points at a path whose parent doesn't exist — should skip + # gracefully. + runtime = tmp_path / "no-sandbox-here" / "SOUL.md" + source = tmp_path / "src" / "SOUL.md" + + monkeypatch.setattr(config_io, "SOUL_RUNTIME_PATH", runtime) + monkeypatch.setattr(config_io, "SOUL_SOURCE_PATH", source) + + written = config_io.write_soul("hello world") + assert source in written + assert runtime not in written + assert source.read_text() == "hello world" + + +def test_write_soul_writes_both_when_runtime_parent_exists( + monkeypatch, tmp_path: Path, +) -> None: + from graph import config_io + + runtime_dir = tmp_path / "sandbox" + runtime_dir.mkdir() + runtime = runtime_dir / "SOUL.md" + source = tmp_path / "src" / "SOUL.md" + + monkeypatch.setattr(config_io, "SOUL_RUNTIME_PATH", runtime) + monkeypatch.setattr(config_io, "SOUL_SOURCE_PATH", source) + + written = config_io.write_soul("dual write") + assert runtime in written + assert source in written + assert runtime.read_text() == "dual write" + assert source.read_text() == "dual write" + + +# ── Gateway model listing ──────────────────────────────────────────────────── + + +def test_list_gateway_models_success(monkeypatch): + from graph import config_io + + fake_response = MagicMock() + fake_response.status_code = 200 + fake_response.json.return_value = { + "data": [ + {"id": "model-b"}, + {"id": "model-a"}, + {"id": "model-c"}, + ], + } + + fake_client = MagicMock() + fake_client.__enter__ = lambda self: fake_client + fake_client.__exit__ = lambda *args: None + fake_client.get.return_value = fake_response + + monkeypatch.setattr("httpx.Client", lambda **kw: fake_client) + + models, err = config_io.list_gateway_models("http://gateway:4000/v1", "test-key") + assert err == "" + assert models == ["model-a", "model-b", "model-c"] # sorted + called_url = fake_client.get.call_args[0][0] + assert called_url == "http://gateway:4000/v1/models" + + +def test_list_gateway_models_empty_base_returns_error(): + from graph.config_io import list_gateway_models + + models, err = list_gateway_models("", "key") + assert models == [] + assert "api_base" in err + + +def test_list_gateway_models_http_error(monkeypatch): + from graph import config_io + + fake_client = MagicMock() + fake_client.__enter__ = lambda self: fake_client + fake_client.__exit__ = lambda *args: None + fake_client.get.side_effect = httpx.ConnectError("no route to host") + + monkeypatch.setattr("httpx.Client", lambda **kw: fake_client) + + models, err = config_io.list_gateway_models("http://bad-host/v1") + assert models == [] + assert "connection failed" in err + + +def test_list_gateway_models_bad_status(monkeypatch): + from graph import config_io + + fake_response = MagicMock() + fake_response.status_code = 401 + fake_response.text = "unauthorized" + + fake_client = MagicMock() + fake_client.__enter__ = lambda self: fake_client + fake_client.__exit__ = lambda *args: None + fake_client.get.return_value = fake_response + + monkeypatch.setattr("httpx.Client", lambda **kw: fake_client) + + models, err = config_io.list_gateway_models("http://x/v1", "bad-key") + assert models == [] + assert "401" in err + + +# ── list_available_tools ───────────────────────────────────────────────────── + + +def test_list_available_tools_returns_starter_set(): + from graph.config_io import list_available_tools + + names = list_available_tools() + # Lock in the template's starter set — forks replace these but + # the drawer's CheckboxGroup populates from this call, so the + # contract is "return tool names in a stable list". + assert "echo" in names + assert "calculator" in names + assert "current_time" in names + assert all(isinstance(n, str) for n in names) + + +# ── Setup wizard marker ───────────────────────────────────────────────────── + + +def test_setup_marker_lifecycle(monkeypatch, tmp_path): + """Marker presence = wizard skipped. Mark → present. Reset → gone. + Reset on a missing marker is a no-op, not an error.""" + from graph import config_io + + marker = tmp_path / ".setup-complete" + monkeypatch.setattr(config_io, "SETUP_MARKER_PATH", marker) + + assert config_io.is_setup_complete() is False + + config_io.mark_setup_complete() + assert config_io.is_setup_complete() is True + assert marker.exists() + + config_io.mark_setup_complete() # idempotent + assert config_io.is_setup_complete() is True + + config_io.reset_setup() + assert config_io.is_setup_complete() is False + + config_io.reset_setup() # no-op on missing marker — doesn't raise + + +def test_mark_setup_complete_creates_parent_dir(monkeypatch, tmp_path): + """If config/ doesn't exist yet, mark_setup_complete must create + it — otherwise a fresh clone with a pristine filesystem fails + on first wizard run.""" + from graph import config_io + + marker = tmp_path / "fresh" / "config" / ".setup-complete" + monkeypatch.setattr(config_io, "SETUP_MARKER_PATH", marker) + + config_io.mark_setup_complete() + assert marker.exists() + + +# ── SOUL.md presets ───────────────────────────────────────────────────────── + + +def test_list_soul_presets_returns_shipped_starters(): + """The template must ship four starter presets so the wizard's + dropdown is useful on day one. Add a file to config/soul-presets/ + and it should appear here automatically — no registry.""" + from graph.config_io import list_soul_presets + + presets = list_soul_presets() + assert "generic-assistant" in presets + assert "research" in presets + assert "coding" in presets + assert "blank" in presets + + +def test_list_soul_presets_sorted(): + from graph.config_io import list_soul_presets + + presets = list_soul_presets() + assert presets == sorted(presets) + + +def test_read_soul_preset_returns_content(): + from graph.config_io import read_soul_preset + + content = read_soul_preset("research") + assert "research" in content.lower() + assert content.strip().startswith("#") # markdown h1 + + +def test_read_soul_preset_unknown_returns_empty(): + """Unknown preset names must return '' not raise — the wizard + treats empty as 'user didn't pick a preset, keep textarea as-is'.""" + from graph.config_io import read_soul_preset + + assert read_soul_preset("not-a-real-preset") == "" + assert read_soul_preset("") == "" + + +@pytest.mark.parametrize("malicious", [ + "../secret", + "../../etc/passwd", + "../../../etc/passwd", + "subdir/../../../outside", + "/etc/hosts", + "..", + "../../graph/config", # try to read a real repo file via ../../ +]) +def test_read_soul_preset_rejects_path_traversal(malicious): + """CRITICAL: the preset name must not let a caller escape + ``config/soul-presets/``. Every ``..`` or absolute path + should return empty string, not read an arbitrary .md file + elsewhere on disk.""" + from graph.config_io import read_soul_preset + + assert read_soul_preset(malicious) == "" + + +def test_list_soul_presets_missing_dir_returns_empty(monkeypatch, tmp_path): + """If a fork accidentally deletes the presets dir, the wizard + should render an empty dropdown, not crash.""" + from graph import config_io + + fake = tmp_path / "does-not-exist" + monkeypatch.setattr(config_io, "PRESETS_DIR", fake) + + assert config_io.list_soul_presets() == []