From 1899f9910965da79362216dc57b55b3cc62ef82c Mon Sep 17 00:00:00 2001 From: Nebhay Date: Fri, 20 Feb 2026 22:01:07 +0000 Subject: [PATCH 01/11] Fix Fireworks - **Added Fireworks base URL constant** (`src/config.rs`): - Added `FIREWORKS_PROVIDER_BASE_URL` constant set to `https://api.fireworks.ai/v1` - **Added Fireworks provider registration** (`src/config.rs`): - Registered Fireworks provider in `load_from_env()` function (environment variable loading) - Registered Fireworks provider in `from_toml()` function (TOML config file loading) - Both use `ApiType::OpenAiCompletions` API type and the Fireworks base URL --- src/config.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/config.rs b/src/config.rs index 5ab073171..49b469478 100644 --- a/src/config.rs +++ b/src/config.rs @@ -187,6 +187,7 @@ const MOONSHOT_PROVIDER_BASE_URL: &str = "https://api.moonshot.ai"; const ZHIPU_PROVIDER_BASE_URL: &str = "https://api.z.ai/api/paas/v4"; const ZAI_CODING_PLAN_BASE_URL: &str = "https://api.z.ai/api/coding/paas/v4"; const NVIDIA_PROVIDER_BASE_URL: &str = "https://integrate.api.nvidia.com"; +const FIREWORKS_PROVIDER_BASE_URL: &str = "https://api.fireworks.ai/v1"; /// Defaults inherited by all agents. Individual agents can override any field. #[derive(Debug, Clone)] @@ -1745,6 +1746,17 @@ impl Config { }); } + if let Some(fireworks_key) = llm.fireworks_key.clone() { + llm.providers + .entry("fireworks".to_string()) + .or_insert_with(|| ProviderConfig { + api_type: ApiType::OpenAiCompletions, + base_url: FIREWORKS_PROVIDER_BASE_URL.to_string(), + api_key: fireworks_key, + name: None, + }); + } + // Note: We allow boot without provider keys now. System starts in setup mode. // Agents are initialized later when keys are added via API. @@ -2057,6 +2069,17 @@ impl Config { }); } + if let Some(fireworks_key) = llm.fireworks_key.clone() { + llm.providers + .entry("fireworks".to_string()) + .or_insert_with(|| ProviderConfig { + api_type: ApiType::OpenAiCompletions, + base_url: FIREWORKS_PROVIDER_BASE_URL.to_string(), + api_key: fireworks_key, + name: None, + }); + } + // Note: We allow boot without provider keys now. System starts in setup mode. // Agents are initialized later when keys are added via API. From 72fa375ebf19717099d0681bc592f7787d6a57db Mon Sep 17 00:00:00 2001 From: Nebhay Date: Sat, 21 Feb 2026 06:40:18 +0000 Subject: [PATCH 02/11] Update src/config.rs Co-authored-by: Sam <78718829+the-snesler@users.noreply.github.com> --- src/config.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config.rs b/src/config.rs index 49b469478..842c09dd0 100644 --- a/src/config.rs +++ b/src/config.rs @@ -187,7 +187,7 @@ const MOONSHOT_PROVIDER_BASE_URL: &str = "https://api.moonshot.ai"; const ZHIPU_PROVIDER_BASE_URL: &str = "https://api.z.ai/api/paas/v4"; const ZAI_CODING_PLAN_BASE_URL: &str = "https://api.z.ai/api/coding/paas/v4"; const NVIDIA_PROVIDER_BASE_URL: &str = "https://integrate.api.nvidia.com"; -const FIREWORKS_PROVIDER_BASE_URL: &str = "https://api.fireworks.ai/v1"; +const FIREWORKS_PROVIDER_BASE_URL: &str = "https://api.fireworks.ai/inference"; /// Defaults inherited by all agents. Individual agents can override any field. #[derive(Debug, Clone)] From 52bcdf3bb3f21a2ab6ba4ace0b42ef11898e41e2 Mon Sep 17 00:00:00 2001 From: Billy Riaz Date: Sun, 22 Feb 2026 04:10:11 +0000 Subject: [PATCH 03/11] fix(build): restore main compile after security middleware update --- src/api/server.rs | 14 +++++++++++--- src/tools/browser.rs | 10 +++++----- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/api/server.rs b/src/api/server.rs index 39dcc9d64..ca4c59972 100644 --- a/src/api/server.rs +++ b/src/api/server.rs @@ -7,8 +7,8 @@ use super::{ }; use axum::Json; -use axum::extract::Request; use axum::Router; +use axum::extract::{Request, State}; use axum::http::{StatusCode, Uri, header}; use axum::middleware::{self, Next}; use axum::response::{Html, IntoResponse, Response}; @@ -177,7 +177,11 @@ pub async fn start_http_server( Ok(handle) } -async fn api_auth_middleware(state: Arc, request: Request, next: Next) -> Response { +async fn api_auth_middleware( + State(state): State>, + request: Request, + next: Next, +) -> Response { let Some(expected_token) = state.auth_token.as_deref() else { return next.run(request).await; }; @@ -197,7 +201,11 @@ async fn api_auth_middleware(state: Arc, request: Request, next: Next) if is_authorized { next.run(request).await } else { - (StatusCode::UNAUTHORIZED, Json(json!({"error": "unauthorized"}))).into_response() + ( + StatusCode::UNAUTHORIZED, + Json(json!({"error": "unauthorized"})), + ) + .into_response() } } diff --git a/src/tools/browser.rs b/src/tools/browser.rs index e650a5ac3..7f9b5f3e9 100644 --- a/src/tools/browser.rs +++ b/src/tools/browser.rs @@ -4,7 +4,6 @@ //! via headless Chrome using chromiumoxide. Uses an accessibility-tree based //! ref system for LLM-friendly element addressing. -use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; use crate::config::BrowserConfig; use chromiumoxide::browser::{Browser, BrowserConfig as ChromeConfig}; @@ -17,11 +16,13 @@ use chromiumoxide_cdp::cdp::browser_protocol::input::{ }; use chromiumoxide_cdp::cdp::browser_protocol::page::CaptureScreenshotFormat; use futures::StreamExt as _; +use reqwest::Url; use rig::completion::ToolDefinition; use rig::tool::Tool; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::collections::HashMap; +use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; use std::path::PathBuf; use std::sync::Arc; use tokio::sync::Mutex; @@ -31,9 +32,8 @@ use tokio::task::JoinHandle; /// Blocks private/loopback IPs, link-local addresses, and cloud metadata endpoints /// to prevent server-side request forgery. fn validate_url(url: &str) -> Result<(), BrowserError> { - let parsed = url::Url::parse(url).map_err(|error| { - BrowserError::new(format!("invalid URL '{url}': {error}")) - })?; + let parsed = Url::parse(url) + .map_err(|error| BrowserError::new(format!("invalid URL '{url}': {error}")))?; match parsed.scheme() { "http" | "https" => {} @@ -91,7 +91,7 @@ fn is_blocked_ip(ip: IpAddr) -> bool { || v4.is_link_local() // 169.254.0.0/16 || v4.is_broadcast() // 255.255.255.255 || v4.is_unspecified() // 0.0.0.0 - || is_v4_cgnat(v4) // 100.64.0.0/10 + || is_v4_cgnat(v4) // 100.64.0.0/10 } IpAddr::V6(v6) => { v6.is_loopback() // ::1 From 7c14b4c690fccb406e8adaabe2ff4d1da375bc54 Mon Sep 17 00:00:00 2001 From: Billy Riaz Date: Sun, 22 Feb 2026 03:44:45 +0000 Subject: [PATCH 04/11] feat(telegram): format messages with Telegram HTML parse mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert markdown output to Telegram-compatible HTML before sending, so bold, italic, code, links, headers, blockquotes and code blocks render properly instead of showing raw markdown characters. Every send path (text, rich messages, thread replies, streaming edits, captions, ephemeral, scheduled, broadcast) now goes through `send_formatted` which sets `ParseMode::Html` and automatically falls back to plain text if the API rejects the HTML. Only the Telegram adapter is touched — Discord and Slack are unaffected. --- src/messaging/telegram.rs | 426 ++++++++++++++++++++++++++++++++------ 1 file changed, 368 insertions(+), 58 deletions(-) diff --git a/src/messaging/telegram.rs b/src/messaging/telegram.rs index cd4bdb1f2..f949cae1a 100644 --- a/src/messaging/telegram.rs +++ b/src/messaging/telegram.rs @@ -6,16 +6,17 @@ use crate::{Attachment, InboundMessage, MessageContent, OutboundResponse, Status use anyhow::Context as _; use arc_swap::ArcSwap; +use regex::Regex; use teloxide::Bot; use teloxide::payloads::setters::*; use teloxide::requests::{Request, Requester}; use teloxide::types::{ ChatAction, ChatId, FileId, InputFile, InputPollOption, MediaKind, MessageId, MessageKind, - ReactionType, ReplyParameters, UpdateKind, UserId, + ParseMode, ReactionType, ReplyParameters, UpdateKind, UserId, }; use std::collections::{HashMap, VecDeque}; -use std::sync::Arc; +use std::sync::{Arc, LazyLock}; use std::time::Instant; use tokio::sync::{RwLock, mpsc}; use tokio::task::JoinHandle; @@ -285,25 +286,11 @@ impl Messaging for TelegramAdapter { match response { OutboundResponse::Text(text) => { self.stop_typing(&message.conversation_id).await; - - for chunk in split_message(&text, MAX_MESSAGE_LENGTH) { - self.bot - .send_message(chat_id, &chunk) - .send() - .await - .context("failed to send telegram message")?; - } + send_formatted(&self.bot, chat_id, &text, None).await?; } OutboundResponse::RichMessage { text, poll, .. } => { self.stop_typing(&message.conversation_id).await; - - for chunk in split_message(&text, MAX_MESSAGE_LENGTH) { - self.bot - .send_message(chat_id, &chunk) - .send() - .await - .context("failed to send telegram message")?; - } + send_formatted(&self.bot, chat_id, &text, None).await?; if let Some(poll_data) = poll { send_poll(&self.bot, chat_id, &poll_data).await?; @@ -317,17 +304,7 @@ impl Messaging for TelegramAdapter { // Telegram doesn't have named threads. Reply to the source message instead. let reply_to = self.extract_message_id(message).ok(); - - for chunk in split_message(&text, MAX_MESSAGE_LENGTH) { - let mut request = self.bot.send_message(chat_id, &chunk); - if let Some(reply_id) = reply_to { - request = request.reply_parameters(ReplyParameters::new(reply_id)); - } - request - .send() - .await - .context("failed to send telegram thread reply")?; - } + send_formatted(&self.bot, chat_id, &text, reply_to).await?; } OutboundResponse::File { filename, @@ -340,7 +317,8 @@ impl Messaging for TelegramAdapter { let input_file = InputFile::memory(data).file_name(filename); let mut request = self.bot.send_document(chat_id, input_file); if let Some(caption_text) = caption { - request = request.caption(caption_text); + let html_caption = markdown_to_telegram_html(&caption_text); + request = request.caption(html_caption).parse_mode(ParseMode::Html); } request .send() @@ -391,7 +369,6 @@ impl Messaging for TelegramAdapter { OutboundResponse::StreamChunk(text) => { let mut active = self.active_messages.write().await; if let Some(stream) = active.get_mut(&message.conversation_id) { - // Rate-limit edits to avoid Telegram API throttling if stream.last_edit.elapsed() < STREAM_EDIT_INTERVAL { return Ok(()); } @@ -403,13 +380,27 @@ impl Messaging for TelegramAdapter { text }; - if let Err(error) = self + let html = markdown_to_telegram_html(&display_text); + if let Err(html_error) = self .bot - .edit_message_text(stream.chat_id, stream.message_id, display_text) + .edit_message_text(stream.chat_id, stream.message_id, &html) + .parse_mode(ParseMode::Html) .send() .await { - tracing::debug!(%error, "failed to edit streaming message"); + tracing::debug!(%html_error, "HTML edit failed, retrying as plain text"); + if let Err(error) = self + .bot + .edit_message_text( + stream.chat_id, + stream.message_id, + &display_text, + ) + .send() + .await + { + tracing::debug!(%error, "failed to edit streaming message"); + } } stream.last_edit = Instant::now(); } @@ -427,19 +418,11 @@ impl Messaging for TelegramAdapter { OutboundResponse::RemoveReaction(_) => {} // no-op OutboundResponse::Ephemeral { text, .. } => { // Telegram has no ephemeral messages — send as regular text - let chat_id = self.extract_chat_id(message)?; - self.bot - .send_message(chat_id, text) - .await - .context("failed to send ephemeral fallback on telegram")?; + send_formatted(&self.bot, chat_id, &text, None).await?; } OutboundResponse::ScheduledMessage { text, .. } => { // Telegram has no scheduled messages — send immediately - let chat_id = self.extract_chat_id(message)?; - self.bot - .send_message(chat_id, text) - .await - .context("failed to send scheduled message fallback on telegram")?; + send_formatted(&self.bot, chat_id, &text, None).await?; } } @@ -494,21 +477,9 @@ impl Messaging for TelegramAdapter { ); if let OutboundResponse::Text(text) = response { - for chunk in split_message(&text, MAX_MESSAGE_LENGTH) { - self.bot - .send_message(chat_id, &chunk) - .send() - .await - .context("failed to broadcast telegram message")?; - } + send_formatted(&self.bot, chat_id, &text, None).await?; } else if let OutboundResponse::RichMessage { text, poll, .. } = response { - for chunk in split_message(&text, MAX_MESSAGE_LENGTH) { - self.bot - .send_message(chat_id, &chunk) - .send() - .await - .context("failed to broadcast telegram message")?; - } + send_formatted(&self.bot, chat_id, &text, None).await?; if let Some(poll_data) = poll { send_poll(&self.bot, chat_id, &poll_data).await?; @@ -909,3 +880,342 @@ fn split_message(text: &str, max_len: usize) -> Vec { chunks } + +// -- Markdown-to-Telegram-HTML formatting -- + +static BOLD_PATTERN: LazyLock = + LazyLock::new(|| Regex::new(r"\*\*(.+?)\*\*").expect("hardcoded regex")); +static ITALIC_PATTERN: LazyLock = + LazyLock::new(|| Regex::new(r"\*(.+?)\*").expect("hardcoded regex")); +static STRIKETHROUGH_PATTERN: LazyLock = + LazyLock::new(|| Regex::new(r"~~(.+?)~~").expect("hardcoded regex")); +static LINK_PATTERN: LazyLock = + LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").expect("hardcoded regex")); + +/// Escape characters that have special meaning in Telegram's HTML parse mode. +fn escape_html(text: &str) -> String { + text.replace('&', "&") + .replace('<', "<") + .replace('>', ">") +} + +/// Strip HTML tags and unescape entities, producing plain text for fallback. +fn strip_html_tags(html: &str) -> String { + static TAG_PATTERN: LazyLock = + LazyLock::new(|| Regex::new(r"<[^>]+>").expect("hardcoded regex")); + TAG_PATTERN + .replace_all(html, "") + .replace("&", "&") + .replace("<", "<") + .replace(">", ">") +} + +/// Convert markdown to Telegram-compatible HTML. +/// +/// Handles fenced code blocks, inline code, bold, italic, strikethrough, +/// links, headers (rendered as bold), and blockquotes. +fn markdown_to_telegram_html(markdown: &str) -> String { + let mut result = String::with_capacity(markdown.len()); + let mut in_code_block = false; + let mut code_language = String::new(); + let mut code_lines: Vec<&str> = Vec::new(); + let mut blockquote_lines: Vec = Vec::new(); + + for line in markdown.lines() { + if let Some(rest) = line.strip_prefix("```") { + flush_blockquote(&mut result, &mut blockquote_lines); + + if in_code_block { + let content = escape_html(&code_lines.join("\n")); + if code_language.is_empty() { + result.push_str("
");
+                    result.push_str(&content);
+                    result.push_str("
\n"); + } else { + result.push_str("
");
+                    result.push_str(&content);
+                    result.push_str("
\n"); + } + in_code_block = false; + code_language.clear(); + code_lines.clear(); + } else { + in_code_block = true; + code_language = rest.trim().to_string(); + } + continue; + } + + if in_code_block { + code_lines.push(line); + continue; + } + + if let Some(quote_text) = line.strip_prefix("> ") { + blockquote_lines.push(format_inline(quote_text)); + continue; + } else if line == ">" { + blockquote_lines.push(String::new()); + continue; + } + + flush_blockquote(&mut result, &mut blockquote_lines); + + if let Some(header_text) = line + .strip_prefix("### ") + .or_else(|| line.strip_prefix("## ")) + .or_else(|| line.strip_prefix("# ")) + { + result.push_str(""); + result.push_str(&format_inline(header_text)); + result.push_str("\n"); + continue; + } + + result.push_str(&format_inline(line)); + result.push('\n'); + } + + if in_code_block { + result.push_str("
");
+        result.push_str(&escape_html(&code_lines.join("\n")));
+        result.push_str("
\n"); + } + + flush_blockquote(&mut result, &mut blockquote_lines); + + while result.ends_with('\n') { + result.pop(); + } + + result +} + +/// Append buffered blockquote lines to the result and clear the buffer. +fn flush_blockquote(result: &mut String, lines: &mut Vec) { + if lines.is_empty() { + return; + } + result.push_str("
"); + result.push_str(&lines.join("\n")); + result.push_str("
\n"); + lines.clear(); +} + +/// Convert inline markdown elements to HTML within a single line. +/// +/// Splits on backticks to isolate inline code spans, then converts bold, +/// italic, strikethrough and links in the remaining text. Content inside +/// backticks is HTML-escaped but not processed for markdown. +fn format_inline(line: &str) -> String { + let segments: Vec<&str> = line.split('`').collect(); + let mut result = String::new(); + + for (index, segment) in segments.iter().enumerate() { + if index % 2 == 1 && index < segments.len() - 1 { + result.push_str(""); + result.push_str(&escape_html(segment)); + result.push_str(""); + } else if index % 2 == 0 { + result.push_str(&format_markdown_spans(&escape_html(segment))); + } else { + // Unmatched trailing backtick — treat as literal + result.push('`'); + result.push_str(&format_markdown_spans(&escape_html(segment))); + } + } + + result +} + +/// Replace markdown span markers with HTML tags in already-escaped text. +/// +/// Bold (`**`) is processed before italic (`*`) so double-star patterns +/// are consumed first and single stars only match true italic spans. +fn format_markdown_spans(text: &str) -> String { + let text = BOLD_PATTERN.replace_all(text, "$1"); + let text = ITALIC_PATTERN.replace_all(&text, "$1"); + let text = STRIKETHROUGH_PATTERN.replace_all(&text, "$1"); + let text = LINK_PATTERN.replace_all(&text, r#"$1"#); + text.into_owned() +} + +/// Send a message with Telegram HTML formatting, splitting at the message +/// length limit. Falls back to plain text if the API rejects the HTML. +async fn send_formatted( + bot: &Bot, + chat_id: ChatId, + text: &str, + reply_to: Option, +) -> anyhow::Result<()> { + let html = markdown_to_telegram_html(text); + for chunk in split_message(&html, MAX_MESSAGE_LENGTH) { + let mut request = bot + .send_message(chat_id, &chunk) + .parse_mode(ParseMode::Html); + if let Some(reply_id) = reply_to { + request = request.reply_parameters(ReplyParameters::new(reply_id)); + } + if let Err(error) = request.send().await { + tracing::debug!(%error, "HTML send failed, retrying as plain text"); + let plain = strip_html_tags(&chunk); + let mut fallback = bot.send_message(chat_id, &plain); + if let Some(reply_id) = reply_to { + fallback = fallback.reply_parameters(ReplyParameters::new(reply_id)); + } + fallback + .send() + .await + .context("failed to send telegram message")?; + } + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn bold() { + assert_eq!( + markdown_to_telegram_html("**bold text**"), + "bold text" + ); + } + + #[test] + fn italic() { + assert_eq!( + markdown_to_telegram_html("*italic text*"), + "italic text" + ); + } + + #[test] + fn bold_and_italic_nested() { + assert_eq!( + markdown_to_telegram_html("***both***"), + "both" + ); + } + + #[test] + fn inline_code() { + assert_eq!( + markdown_to_telegram_html("use `println!` here"), + "use println! here" + ); + } + + #[test] + fn code_block_with_language() { + let input = "```rust\nfn main() {}\n```"; + let expected = "
fn main() {}
"; + assert_eq!(markdown_to_telegram_html(input), expected); + } + + #[test] + fn code_block_without_language() { + let input = "```\nhello world\n```"; + let expected = "
hello world
"; + assert_eq!(markdown_to_telegram_html(input), expected); + } + + #[test] + fn code_block_escapes_html() { + let input = "```\n\n```"; + let expected = "
<script>alert(1)</script>
"; + assert_eq!(markdown_to_telegram_html(input), expected); + } + + #[test] + fn link() { + assert_eq!( + markdown_to_telegram_html("[click](https://example.com)"), + r#"click"# + ); + } + + #[test] + fn strikethrough() { + assert_eq!(markdown_to_telegram_html("~~deleted~~"), "deleted"); + } + + #[test] + fn headers_render_as_bold() { + assert_eq!(markdown_to_telegram_html("# Title"), "Title"); + assert_eq!(markdown_to_telegram_html("## Sub"), "Sub"); + assert_eq!(markdown_to_telegram_html("### Section"), "Section"); + } + + #[test] + fn blockquote() { + assert_eq!( + markdown_to_telegram_html("> quoted text"), + "
quoted text
" + ); + } + + #[test] + fn multiline_blockquote() { + let input = "> line one\n> line two"; + let expected = "
line one\nline two
"; + assert_eq!(markdown_to_telegram_html(input), expected); + } + + #[test] + fn html_entities_escaped_in_text() { + assert_eq!( + markdown_to_telegram_html("x < y & a > b"), + "x < y & a > b" + ); + } + + #[test] + fn inline_code_escapes_html() { + assert_eq!( + markdown_to_telegram_html("`not bold`"), + "<b>not bold</b>" + ); + } + + #[test] + fn mixed_formatting() { + let input = "Hello **world**, this is *important* and `code`"; + let expected = "Hello world, this is important and code"; + assert_eq!(markdown_to_telegram_html(input), expected); + } + + #[test] + fn plain_text_unchanged() { + assert_eq!( + markdown_to_telegram_html("just plain text"), + "just plain text" + ); + } + + #[test] + fn unclosed_code_block_handled() { + let input = "```python\nprint('hi')"; + let expected = "
print('hi')
"; + assert_eq!(markdown_to_telegram_html(input), expected); + } + + #[test] + fn strip_html_tags_and_unescape() { + assert_eq!( + strip_html_tags("bold & italic"), + "bold & italic" + ); + } + + #[test] + fn list_items_pass_through() { + let input = "- item one\n- item two\n- item three"; + let expected = "- item one\n- item two\n- item three"; + assert_eq!(markdown_to_telegram_html(input), expected); + } +} From 0735a2a3f3dbb21c6ebdb1cfbc2c23c496f7a749 Mon Sep 17 00:00:00 2001 From: Billy Riaz Date: Sun, 22 Feb 2026 03:53:26 +0000 Subject: [PATCH 05/11] fix(telegram): harden html chunking fallback --- src/messaging/telegram.rs | 83 +++++++++++++++++++++++++++++++-------- 1 file changed, 66 insertions(+), 17 deletions(-) diff --git a/src/messaging/telegram.rs b/src/messaging/telegram.rs index f949cae1a..b2cf7e66e 100644 --- a/src/messaging/telegram.rs +++ b/src/messaging/telegram.rs @@ -48,6 +48,9 @@ struct ActiveStream { /// Telegram's per-message character limit. const MAX_MESSAGE_LENGTH: usize = 4096; +/// Smaller source-chunk target for markdown that expands heavily when HTML-escaped. +const FORMATTED_SPLIT_LENGTH: usize = MAX_MESSAGE_LENGTH / 2; + /// Minimum interval between streaming edits to avoid rate limits. const STREAM_EDIT_INTERVAL: std::time::Duration = std::time::Duration::from_millis(1000); @@ -391,11 +394,7 @@ impl Messaging for TelegramAdapter { tracing::debug!(%html_error, "HTML edit failed, retrying as plain text"); if let Err(error) = self .bot - .edit_message_text( - stream.chat_id, - stream.message_id, - &display_text, - ) + .edit_message_text(stream.chat_id, stream.message_id, &display_text) .send() .await { @@ -885,8 +884,12 @@ fn split_message(text: &str, max_len: usize) -> Vec { static BOLD_PATTERN: LazyLock = LazyLock::new(|| Regex::new(r"\*\*(.+?)\*\*").expect("hardcoded regex")); +static BOLD_UNDERSCORE_PATTERN: LazyLock = + LazyLock::new(|| Regex::new(r"__(.+?)__").expect("hardcoded regex")); static ITALIC_PATTERN: LazyLock = LazyLock::new(|| Regex::new(r"\*(.+?)\*").expect("hardcoded regex")); +static ITALIC_UNDERSCORE_PATTERN: LazyLock = + LazyLock::new(|| Regex::new(r"_(.+?)_").expect("hardcoded regex")); static STRIKETHROUGH_PATTERN: LazyLock = LazyLock::new(|| Regex::new(r"~~(.+?)~~").expect("hardcoded regex")); static LINK_PATTERN: LazyLock = @@ -1036,12 +1039,32 @@ fn format_inline(line: &str) -> String { /// are consumed first and single stars only match true italic spans. fn format_markdown_spans(text: &str) -> String { let text = BOLD_PATTERN.replace_all(text, "$1"); + let text = BOLD_UNDERSCORE_PATTERN.replace_all(&text, "$1"); let text = ITALIC_PATTERN.replace_all(&text, "$1"); + let text = ITALIC_UNDERSCORE_PATTERN.replace_all(&text, "$1"); let text = STRIKETHROUGH_PATTERN.replace_all(&text, "$1"); let text = LINK_PATTERN.replace_all(&text, r#"$1"#); text.into_owned() } +/// Send a plain text Telegram message for formatting fallback paths. +async fn send_plain_text( + bot: &Bot, + chat_id: ChatId, + text: &str, + reply_to: Option, +) -> anyhow::Result<()> { + let mut request = bot.send_message(chat_id, text); + if let Some(reply_id) = reply_to { + request = request.reply_parameters(ReplyParameters::new(reply_id)); + } + request + .send() + .await + .context("failed to send telegram message")?; + Ok(()) +} + /// Send a message with Telegram HTML formatting, splitting at the message /// length limit. Falls back to plain text if the API rejects the HTML. async fn send_formatted( @@ -1050,25 +1073,35 @@ async fn send_formatted( text: &str, reply_to: Option, ) -> anyhow::Result<()> { - let html = markdown_to_telegram_html(text); - for chunk in split_message(&html, MAX_MESSAGE_LENGTH) { + let mut pending_chunks: VecDeque = + VecDeque::from(split_message(text, MAX_MESSAGE_LENGTH)); + while let Some(markdown_chunk) = pending_chunks.pop_front() { + let html_chunk = markdown_to_telegram_html(&markdown_chunk); + + if html_chunk.len() > MAX_MESSAGE_LENGTH { + let smaller_chunks = split_message(&markdown_chunk, FORMATTED_SPLIT_LENGTH); + if smaller_chunks.len() > 1 { + for chunk in smaller_chunks.into_iter().rev() { + pending_chunks.push_front(chunk); + } + continue; + } + + let plain_chunk = strip_html_tags(&html_chunk); + send_plain_text(bot, chat_id, &plain_chunk, reply_to).await?; + continue; + } + let mut request = bot - .send_message(chat_id, &chunk) + .send_message(chat_id, &html_chunk) .parse_mode(ParseMode::Html); if let Some(reply_id) = reply_to { request = request.reply_parameters(ReplyParameters::new(reply_id)); } if let Err(error) = request.send().await { tracing::debug!(%error, "HTML send failed, retrying as plain text"); - let plain = strip_html_tags(&chunk); - let mut fallback = bot.send_message(chat_id, &plain); - if let Some(reply_id) = reply_to { - fallback = fallback.reply_parameters(ReplyParameters::new(reply_id)); - } - fallback - .send() - .await - .context("failed to send telegram message")?; + let plain_chunk = strip_html_tags(&html_chunk); + send_plain_text(bot, chat_id, &plain_chunk, reply_to).await?; } } Ok(()) @@ -1094,6 +1127,22 @@ mod tests { ); } + #[test] + fn bold_with_underscores() { + assert_eq!( + markdown_to_telegram_html("__bold text__"), + "bold text" + ); + } + + #[test] + fn italic_with_underscores() { + assert_eq!( + markdown_to_telegram_html("_italic text_"), + "italic text" + ); + } + #[test] fn bold_and_italic_nested() { assert_eq!( From ac26cc2645db455ed4b633c987754e4a8228bff6 Mon Sep 17 00:00:00 2001 From: Billy Riaz Date: Sun, 22 Feb 2026 03:53:56 +0000 Subject: [PATCH 06/11] fix(telegram): add plain-text fallback for HTML-formatted file captions If Telegram rejects the HTML caption the file was silently lost. Now retry with the raw caption text so the document is always delivered. --- src/messaging/telegram.rs | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/src/messaging/telegram.rs b/src/messaging/telegram.rs index b2cf7e66e..a88816415 100644 --- a/src/messaging/telegram.rs +++ b/src/messaging/telegram.rs @@ -317,16 +317,31 @@ impl Messaging for TelegramAdapter { } => { self.stop_typing(&message.conversation_id).await; - let input_file = InputFile::memory(data).file_name(filename); - let mut request = self.bot.send_document(chat_id, input_file); - if let Some(caption_text) = caption { - let html_caption = markdown_to_telegram_html(&caption_text); - request = request.caption(html_caption).parse_mode(ParseMode::Html); + let input_file = InputFile::memory(data.clone()).file_name(filename.clone()); + let sent = if let Some(ref caption_text) = caption { + let html_caption = markdown_to_telegram_html(caption_text); + self.bot + .send_document(chat_id, input_file) + .caption(&html_caption) + .parse_mode(ParseMode::Html) + .send() + .await + } else { + self.bot.send_document(chat_id, input_file).send().await + }; + + if let Err(error) = sent { + tracing::debug!(%error, "HTML caption send failed, retrying as plain text"); + let fallback_file = InputFile::memory(data).file_name(filename); + let mut request = self.bot.send_document(chat_id, fallback_file); + if let Some(caption_text) = caption { + request = request.caption(caption_text); + } + request + .send() + .await + .context("failed to send telegram file")?; } - request - .send() - .await - .context("failed to send telegram file")?; } OutboundResponse::Reaction(emoji) => { let message_id = self.extract_message_id(message)?; From 3780eb41cf49cd7b672396aa87b2ec0de017e165 Mon Sep 17 00:00:00 2001 From: Billy Riaz Date: Sun, 22 Feb 2026 03:58:52 +0000 Subject: [PATCH 07/11] fix(telegram): retry plain captions only on parse errors --- src/messaging/telegram.rs | 44 ++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/src/messaging/telegram.rs b/src/messaging/telegram.rs index a88816415..47c1bda48 100644 --- a/src/messaging/telegram.rs +++ b/src/messaging/telegram.rs @@ -7,13 +7,13 @@ use crate::{Attachment, InboundMessage, MessageContent, OutboundResponse, Status use anyhow::Context as _; use arc_swap::ArcSwap; use regex::Regex; -use teloxide::Bot; use teloxide::payloads::setters::*; use teloxide::requests::{Request, Requester}; use teloxide::types::{ ChatAction, ChatId, FileId, InputFile, InputPollOption, MediaKind, MessageId, MessageKind, ParseMode, ReactionType, ReplyParameters, UpdateKind, UserId, }; +use teloxide::{ApiError, Bot, RequestError}; use std::collections::{HashMap, VecDeque}; use std::sync::{Arc, LazyLock}; @@ -331,16 +331,24 @@ impl Messaging for TelegramAdapter { }; if let Err(error) = sent { - tracing::debug!(%error, "HTML caption send failed, retrying as plain text"); - let fallback_file = InputFile::memory(data).file_name(filename); - let mut request = self.bot.send_document(chat_id, fallback_file); - if let Some(caption_text) = caption { - request = request.caption(caption_text); + if should_retry_plain_caption(&error) { + tracing::debug!( + %error, + "HTML caption parse failed, retrying telegram file with plain caption" + ); + let fallback_file = InputFile::memory(data).file_name(filename); + let mut request = self.bot.send_document(chat_id, fallback_file); + if let Some(caption_text) = caption { + request = request.caption(caption_text); + } + request + .send() + .await + .context("failed to send telegram file")?; + } else { + return Err(error) + .context("failed to send telegram file with HTML caption")?; } - request - .send() - .await - .context("failed to send telegram file")?; } } OutboundResponse::Reaction(emoji) => { @@ -895,6 +903,11 @@ fn split_message(text: &str, max_len: usize) -> Vec { chunks } +/// Return true when Telegram rejected rich text entities and a plain-caption retry is safe. +fn should_retry_plain_caption(error: &RequestError) -> bool { + matches!(error, RequestError::Api(ApiError::CantParseEntities(_))) +} + // -- Markdown-to-Telegram-HTML formatting -- static BOLD_PATTERN: LazyLock = @@ -1282,4 +1295,15 @@ mod tests { let expected = "- item one\n- item two\n- item three"; assert_eq!(markdown_to_telegram_html(input), expected); } + + #[test] + fn retries_plain_caption_only_for_parse_entity_errors() { + let parse_error = RequestError::Api(ApiError::CantParseEntities( + "Bad Request: can't parse entities".into(), + )); + let non_parse_error = RequestError::Api(ApiError::BotBlocked); + + assert!(should_retry_plain_caption(&parse_error)); + assert!(!should_retry_plain_caption(&non_parse_error)); + } } From 75502c166c30994cc77bf8bbafb13215e5f53412 Mon Sep 17 00:00:00 2001 From: Nebhay Date: Sun, 22 Feb 2026 08:55:28 +0000 Subject: [PATCH 08/11] add missing name None --- src/config.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/config.rs b/src/config.rs index cabf1d6e6..3ddb349b9 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1932,7 +1932,8 @@ impl Config { api_type: ApiType::OpenAiCompletions, base_url: FIREWORKS_PROVIDER_BASE_URL.to_string(), api_key: fireworks_key, - }); + name: None, + }); } if let Some(gemini_key) = llm.gemini_key.clone() { @@ -2278,6 +2279,7 @@ impl Config { api_type: ApiType::OpenAiCompletions, base_url: FIREWORKS_PROVIDER_BASE_URL.to_string(), api_key: fireworks_key, + name: None, }); } From 1c3e5be25864addc44ad9a6bab7e3b1a6d69de1e Mon Sep 17 00:00:00 2001 From: Kai Meder Date: Sun, 22 Feb 2026 19:05:56 +0100 Subject: [PATCH 09/11] remove obsolete plan document from #58 https://github.com/spacedriveapp/spacebot/pull/58 --- docs/PRD-slack-enhancements.md | 275 --------------------------------- 1 file changed, 275 deletions(-) delete mode 100644 docs/PRD-slack-enhancements.md diff --git a/docs/PRD-slack-enhancements.md b/docs/PRD-slack-enhancements.md deleted file mode 100644 index 80373c5fd..000000000 --- a/docs/PRD-slack-enhancements.md +++ /dev/null @@ -1,275 +0,0 @@ -# PRD: Slack Connector Enhancements - -**Status:** Draft -**Author:** sookochoff -**Repo:** spacedriveapp/spacebot -**Branch target:** main - ---- - -## Background - -The Slack connector in `src/messaging/slack.rs` is functional but uses a narrow slice of what the Slack platform and the existing `slack-morphism` library offer. For a company running Spacebot as a team agent, the current adapter covers basic conversation but leaves significant productivity and workflow value on the table. - -This document enumerates the gaps, scores them by value and effort, and proposes a phased delivery plan. - ---- - -## Current State Audit - -### What the connector already does - -| Feature | Status | -|---|---| -| Receives plain text messages via Socket Mode | ✅ | -| Receives file attachments | ✅ | -| Sends plain text replies (single channel or thread) | ✅ | -| Message splitting at 4,000 chars | ✅ | -| Emoji reactions (add) | ✅ | -| Streaming via message edit (`chat.update`) | ✅ | -| File upload (v2 flow: get URL → upload → complete) | ✅ | -| DM broadcasting via `conversations.open` | ✅ | -| History backfill (`conversations.history` + `conversations.replies`) | ✅ | -| User display name resolution on message receipt | ✅ | -| Permission filtering (workspace, channel, DM allowlist) | ✅ | -| Health check (`api.test`) | ✅ | - -### What the connector explicitly skips - -| Feature | Notes in code | -|---|---| -| Typing indicator | Comment: `// no-op, Slack has no native typing indicator API` — **incorrect**, `assistant.threads.setStatus` is the modern equivalent | -| Message subtypes (edits, deletes) | Silently dropped | -| `app_mention` events | Not wired in `handle_push_event` — agent won't respond to `@bot` in channels where it isn't already present | -| Slash commands | `command_callback` in slack-morphism is available but unused | -| Block Kit (rich messages) | `SlackBlock` et al. exist in the library, never used in outbound | -| Interactive components (button clicks, select menus) | `SlackInteractionEvent` / `SlackInteractionBlockActionsEvent` available, unused | -| Ephemeral messages | `chat_post_ephemeral` in library, unused | -| Scheduled messages | `chat_schedule_message` in library, unused | -| `app_home` tab | `SlackAppHomeOpenedEvent` in library, unused | -| Thread awareness in history | Thread replies fetched separately from channel history — not unified | -| Reaction-received events | `SlackReactionAddedEvent` available, unused | -| Unfurl / link previews | `chat_unfurl` in library, unused | -| User presence | `users_get_presence` in library, unused | -| Message pinning by agent | `pins_add` in library, unused | -| User groups (`@here`, `@channel`, group handles) | `usergroups_list` in library, unused | -| `SlackApiAssistantThreadsSetStatus` | Available — correct typing-indicator mechanism, unused | - ---- - -## Opportunity Analysis - -The gaps above are not all equal. Evaluated from the perspective of **a company team using Slack day-to-day with a Spacebot agent:** - -### High value, low effort - -1. **`app_mention` support** — Users in a channel naturally @mention the bot. Currently those events are ignored. One-line fix in the push event handler. - -2. **Typing indicator via `assistant.threads.setStatus`** — The agent goes dark while thinking. On Discord the bot shows a typing indicator; on Slack it's a dead interface. The Slack Assistants API provides exactly this. Low code change; meaningful UX. - -3. **Block Kit for structured outbound messages** — A new `OutboundResponse::Blocks` variant (or `RichMessage`) lets the agent send formatted cards, section headers, dividers, and inline code. Slack's renderer makes these significantly more readable than walls of markdown. Library support is full; gap is only in the `OutboundResponse` enum and adapter glue. - -4. **Ephemeral messages** — Agent can whisper a confirmation or warning to only the requesting user. Useful for admin-type responses in shared channels. - -### High value, medium effort - -5. **Slash commands** — `/ask`, `/summarize`, `/task` etc. Slack's command UX is familiar to every team user. The `command_callback` is available in `SlackSocketModeListenerCallbacks` and just needs wiring up + a config schema for command→agent routing. - -6. **Interactive components (buttons/select menus)** — The agent posts a decision card; a user clicks "Approve" or "Reject". The `SlackInteractionBlockActionsEvent` comes back through `interaction_callback`. Needs: (a) `OutboundResponse` variant to express buttons, (b) inbound `MessageContent` variant for interaction events, (c) interaction callback wired in the socket mode listener. - -7. **Scheduled messages** — The agent can post to a channel at a specific future time (`chat.schedule_message`). This is a natural output for cron-triggered workflows, e.g. "post standup prompt at 9am Monday". Low Rust effort; zero Spacebot architecture change needed — it's purely an outbound response variant. - -### Medium value, medium effort - -8. **Message edit/delete awareness** — Currently silently dropped. If a user corrects a message that the agent already acted on, the agent should be aware. Needs a new `MessageContent` variant and some channel-level state to correlate `message_changed` subtype events back to prior turns. - -9. **Reaction-received events** — User reacts with ✅ or ❌ to acknowledge or reject a proposal. Arriving as `SlackReactionAddedEvent`. Useful for lightweight approvals without typing. - -10. **`app_home` tab** — The agent's home tab in the Slack sidebar can surface a custom view (memory summary, recent tasks, status). Entirely cosmetic from the agent's perspective but gives it a professional presence. - -### Lower priority / out of scope for now - -- User presence (`users.getPresence`) — niche -- Link unfurling — requires Events API subscription changes -- Message pinning — useful but not urgent -- User groups resolution — useful for `@channel` type broadcasts but edge case - ---- - -## Proposed Phases - -### Phase 1: Foundational UX (Recommended first PR) - -**Scope:** Four targeted changes that require no new architecture. - -| Item | Change required | -|---|---| -| **`app_mention` events** | Add `AppMention` arm to `handle_push_event` match | -| **Typing indicator** | Implement `send_status()` using `SlackApiAssistantThreadsSetStatusRequest` | -| **Ephemeral messages** | Add `OutboundResponse::Ephemeral { text, user_id }` variant; handle in Slack adapter with `chat_post_ephemeral` | -| **Reaction removal** | Add `OutboundResponse::RemoveReaction(String)` for completeness (currently only add) | - -**Effort:** Small — 1–2 days of focused Rust work -**Risk:** Low — isolated changes, no new dependencies -**PRs:** Likely one PR with four commits - ---- - -### Phase 2: Block Kit + Interactive Components - -**Scope:** Rich outbound messages and inbound interaction events. - -#### 2a — Block Kit outbound - -Add a new `OutboundResponse` variant: - -```rust -RichMessage { - /// Plain text fallback (always required — used by non-Slack adapters and notifications). - text: String, - /// Block Kit blocks. Slack-only; other adapters fall back to `text`. - blocks: Vec, -} -``` - -- Slack adapter: build `SlackMessageContent` with `blocks` -- Discord adapter: falls back to `text` -- Webhook/Telegram: falls back to `text` - -This is platform-agnostic at the type level. The LLM would request a structured response via a new `reply_with_blocks` tool or through a structured tool output schema. - -#### 2b — Interactive components inbound - -Add a new `MessageContent` variant: - -```rust -Interaction { - /// action_id of the block element that was acted on. - action_id: String, - /// block_id for context. - block_id: Option, - /// Selected value(s), if applicable (button value or select menu option). - value: Option, - /// Human-readable label of the selected option. - label: Option, - /// The original message ts so the agent can correlate back. - message_ts: Option, -} -``` - -Wire `interaction_callback` in the socket mode setup to receive `SlackInteractionBlockActionsEvent` and convert to an `InboundMessage` with this content. - -**Effort:** Medium — 3–4 days -**Risk:** Medium — touches `OutboundResponse` and `MessageContent` enums (shared types), all adapters need an audit pass to ensure the new variants are handled (even if as no-ops) -**PRs:** 2a and 2b can be separate PRs - ---- - -### Phase 3: Slash Commands - -**Scope:** Allow users to invoke the agent via `/command` in any channel. - -Config extension: - -```toml -[messaging.slack.commands] -"/ask" = { agent_id = "main", description = "Ask the agent a question" } -"/task" = { agent_id = "main", description = "Kick off a background task" } -``` - -Implementation: -- Wire `command_callback` in socket mode listener setup -- Parse `SlackCommandEvent` into an `InboundMessage` (using the command text as content) -- Route via the existing binding resolution -- Respond to the command's `response_url` or via `chat.post_message` to the channel - -Slash commands have a 3-second acknowledge requirement from Slack. The adapter must acknowledge immediately (200 OK with empty body or with a deferral message) and post the real reply asynchronously. - -**Effort:** Medium — 2–3 days -**Risk:** Medium — requires config schema extension and a deferred response pattern -**PRs:** Single focused PR - ---- - -### Phase 4: Scheduled Messages - -**Scope:** Let the agent post messages at a future time. - -New `OutboundResponse` variant: - -```rust -ScheduledMessage { - text: String, - /// Unix timestamp when the message should be delivered. - post_at: i64, - /// Optional Block Kit content. Falls back to `text` on non-Slack adapters. - blocks: Option>, -} -``` - -This maps cleanly to `chat_schedule_message` in slack-morphism and requires no new infrastructure. Practically, it makes cron workflows more polished: instead of sending a message immediately on job completion, the agent can time-shift delivery to normal working hours. - -**Effort:** Low — 1 day -**Risk:** Low — self-contained outbound variant -**PRs:** Single small PR, could be bundled with Phase 1 - ---- - -## What Does NOT Need to Change - -- **Socket Mode** — already the right transport for a self-hosted bot. No switch to Events API needed. -- **slack-morphism version** — 2.17 already has full Block Kit and interaction model support. No dependency bump needed. -- **Agent architecture** — Channels, branches, workers, cortex are all unaffected. This is entirely connector-layer work. -- **Existing permissions model** — `SlackPermissions` is sufficient for all phases. - ---- - -## Recommended Delivery Order - -| Phase | Deliverable | Effort | Impact | -|---|---|---|---| -| 1 | `app_mention`, typing indicator, ephemeral messages | Small | High | -| 2a | Block Kit outbound (`RichMessage`) | Medium | High | -| 4 | Scheduled messages | Small | Medium | -| 2b | Interactive components inbound | Medium | High | -| 3 | Slash commands | Medium | Medium | - -Phases 1 and 4 are natural first PRs — they're self-contained, low risk, and address the most visible UX gaps (agent ignores @mentions, no typing indicator, no rich formatting). Phase 2 is where the real workflow power is. - ---- - -## Open Questions - -1. **Tool schema for Block Kit**: Should the LLM specify blocks explicitly (raw Block Kit JSON in tool args), or should there be a higher-level tool interface (e.g., `reply_with_sections([{header, body}])`) that the adapter renders into blocks? The latter is safer and more portable but requires maintaining a thin DSL. - -2. **Interaction routing**: When a button click arrives, should it be routed to the same channel that originally sent the message with the buttons, or treated as a new conversation turn? The former is simpler; the latter is more correct for multi-step flows. - -3. **Slash command permissions**: Should slash commands go through the same binding/channel filter as regular messages, or have their own allowlist? Consider that `/ask` from an unbound workspace should probably be rejected. - -4. **Typing indicator scope**: `assistant.threads.setStatus` works in Slack Assistant threads (AI-mode threads). For regular channel messages, the canonical signal is `typing.setStatus` from the RTM API, which Socket Mode doesn't expose. We may need to accept that typing indicators only work in Slack Assistant thread contexts and document this clearly. - ---- - -## Files Affected (by phase) - -**Phase 1:** -- `src/messaging/slack.rs` — `handle_push_event`, new `send_status` impl, new response arms -- `src/lib.rs` — add `Ephemeral` to `OutboundResponse`, `RemoveReaction` optional -- Other adapters — handle new variants as no-ops - -**Phase 2:** -- `src/messaging/slack.rs` — interaction callback, `RichMessage` response arm -- `src/lib.rs` — `RichMessage` to `OutboundResponse`, `Interaction` to `MessageContent` -- `src/messaging/discord.rs` — handle `RichMessage` (fallback to text) -- `src/messaging/telegram.rs` — handle `RichMessage` (fallback to text) -- `src/messaging/webhook.rs` — handle `RichMessage` (passthrough or fallback) - -**Phase 3:** -- `src/config.rs` — `SlackCommandConfig` struct, extend `SlackConfig` -- `src/messaging/slack.rs` — command callback, deferred response pattern -- `src/lib.rs` — no change (command maps to existing `InboundMessage`) - -**Phase 4:** -- `src/messaging/slack.rs` — `ScheduledMessage` response arm -- `src/lib.rs` — add `ScheduledMessage` to `OutboundResponse` -- Other adapters — no-op or error for the new variant From 329b3a6e22d603d4d2225ff79be6320be1ecae4e Mon Sep 17 00:00:00 2001 From: Jamie Pine Date: Sun, 22 Feb 2026 13:19:49 -0800 Subject: [PATCH 10/11] Add design documents for Cron Timezone and Reliability, and Multi-Agent Communication Graph - Introduced a comprehensive plan to enhance cron reliability by addressing timezone configuration, deletion behavior, and visibility of cron states. - Established a communication graph model for agent interactions, allowing for structured messaging and delegation between agents, enhancing coordination and context sharing. --- .../cron-timezone-and-reliability.md | 172 ++++++ .../multi-agent-communication-graph.md | 490 ++++++++++++++++++ 2 files changed, 662 insertions(+) create mode 100644 docs/design-docs/cron-timezone-and-reliability.md create mode 100644 docs/design-docs/multi-agent-communication-graph.md diff --git a/docs/design-docs/cron-timezone-and-reliability.md b/docs/design-docs/cron-timezone-and-reliability.md new file mode 100644 index 000000000..85fee5941 --- /dev/null +++ b/docs/design-docs/cron-timezone-and-reliability.md @@ -0,0 +1,172 @@ +# Cron Timezone and Reliability Plan + +Fix cron behavior so users can trust schedule execution, deletion, and status reporting. + +## Problem Statement + +Users are reporting that cron behavior feels inconsistent: + +- Deleting a cron sometimes still results in future executions. +- Daily jobs expected in the morning do not fire. +- The assistant can claim a deleted cron still exists. +- Timezone semantics are unclear and not user-visible. + +Current behavior has multiple state surfaces that can drift from each other (in-memory scheduler, DB rows, memory bulletin), and active-hour evaluation is tied to server local time without explicit configuration. + +## Goals + +1. Make cron timezone explicit and configurable. +2. Support global env override for cron timezone. +3. Support per-agent timezone overrides. +4. Keep default behavior safe: server timezone when no explicit setting exists. +5. Remove ghost executions after deletion. +6. Make timezone semantics visible in tool/API responses and docs. + +## Non-Goals + +1. Full cron-expression scheduling in this change. +2. Auto-detecting user timezone from messaging adapters. +3. Converting interval scheduling to wall-clock calendar scheduling. +4. Retrofitting memory reconciliation for every historical cron operation. + +## Existing Issues in Code + +1. Tool deletion does not stop timers: + - `src/tools/cron.rs` delete path removes DB row only. + - API delete path does call `scheduler.unregister` in `src/api/cron.rs`. +2. Active hours use server local timezone directly: + - `src/cron/scheduler.rs` uses `chrono::Local` for hour checks. +3. Schedule expectations mismatch: + - Jobs are interval-based; active hours are a gating window on ticks. +4. Source-of-truth ambiguity in conversational responses: + - Memory context can mention stale cron state unless the model calls live list tools. + +## Configuration Design + +### New Config Fields + +- `defaults.cron_timezone` (optional string) +- `agents[].cron_timezone` (optional string override) + +### New Environment Variable + +- `SPACEBOT_CRON_TIMEZONE` (optional string) + +### Accepted Values + +- IANA timezone names (examples: `UTC`, `America/Los_Angeles`, `Europe/Berlin`). + +### Resolution Order + +Per agent: + +1. `agents[].cron_timezone` +2. `defaults.cron_timezone` +3. `SPACEBOT_CRON_TIMEZONE` +4. server local timezone + +If the configured timezone is invalid: + +- log a warning with agent id and invalid value +- fall back to server local timezone + +## Runtime Design + +### Resolved Config + +Add a resolved `cron_timezone` field to `ResolvedAgentConfig` and carry it into `RuntimeConfig` so scheduler logic can read the live value. + +### Scheduler Behavior + +Replace active-hour evaluation from hardcoded local time to resolved cron timezone. Keep current interval cadence and active-hour gating semantics unchanged. + +### Deletion Semantics + +Unify delete behavior across entry points: + +- Tool delete should call `scheduler.unregister(id)` before/alongside DB deletion. +- API delete remains the same. + +Outcome: no timer should continue firing after successful delete regardless of whether delete originated via tool call or API endpoint. + +## API and Tool UX Changes + +### Tool (`cron`) responses + +- Include timezone context in create/list messages. +- Example: "Active hours evaluated in America/Los_Angeles." + +### API responses + +- Add resolved timezone to cron list payload. +- Optionally include timezone source (`agent`, `defaults`, `env`, `system`) for debugging. + +## Documentation Changes + +Update existing docs in the same change: + +1. `docs/content/docs/(configuration)/config.mdx` + - Add `cron_timezone` in defaults and per-agent sections. + - Document `SPACEBOT_CRON_TIMEZONE`. + - Document precedence and fallback behavior. +2. `docs/content/docs/(features)/cron.mdx` + - Clarify active-hour timezone resolution. + - Clarify interval-based semantics vs wall-clock expectations. +3. `README.md` + - Add concise timezone example for cron config. + +## Testing Plan + +Add focused tests for cron reliability and timezone behavior: + +1. Resolution precedence: agent > defaults > env > system. +2. Invalid timezone fallback to system local. +3. Active-hour pass/fail in named timezones. +4. Midnight-wrap windows in named timezones. +5. Tool delete unregister behavior (no post-delete execution). + +## Implementation Phases + +### Phase 1: Config and resolution plumbing + +1. Extend TOML structs for defaults and agent timezone fields. +2. Extend runtime/resolved config structs. +3. Implement precedence and validation logic. +4. Wire into hot reload so runtime timezone updates are applied. + +### Phase 2: Scheduler timezone integration + +1. Replace `chrono::Local` active-hour checks with resolved timezone. +2. Keep current interval/tick behavior unchanged. +3. Add logs that include timezone context for cron evaluation decisions. + +### Phase 3: Delete path reliability + +1. Update tool delete path to unregister scheduler timer. +2. Keep API delete path aligned. +3. Ensure idempotent behavior when timer handle does not exist. + +### Phase 4: API/tool transparency + +1. Include timezone in list/create responses. +2. Return timezone in API cron list payloads. + +### Phase 5: Docs and tests + +1. Update config and cron feature docs. +2. Add regression tests for timezone and deletion behavior. +3. Validate expected behavior manually with one daily cron and one active-window cron. + +## Acceptance Criteria + +1. Deleting a cron via tool or API prevents future firings. +2. Active hours are evaluated in resolved agent timezone. +3. Env and config precedence is deterministic and documented. +4. Cron list/create outputs clearly state timezone used. +5. Docs reflect actual runtime behavior. + +## Follow-Up Work + +1. Add wall-clock scheduling primitives (`daily_at`, cron expressions). +2. Force live cron-list tool calls when users ask "what crons are active". +3. Add optional memory sync actions for cron create/delete/toggle lifecycle. diff --git a/docs/design-docs/multi-agent-communication-graph.md b/docs/design-docs/multi-agent-communication-graph.md new file mode 100644 index 000000000..9789dbacb --- /dev/null +++ b/docs/design-docs/multi-agent-communication-graph.md @@ -0,0 +1,490 @@ +# Multi-Agent Communication Graph + +Agents on a single Spacebot instance are completely isolated. They share an LLM provider pool and a messaging pipeline, but have no way to talk to each other. The `send_message_to_another_channel` tool sends messages to platform channels (Discord, Slack, etc.), not to other agents. Two agents watching the same Discord server can't coordinate, delegate, escalate, or share context. + +The fix: an explicit communication graph between agents. Directed edges define who can message whom, with policy flags controlling the relationship. Messages flow through a shared internal channel visible to both agents and to humans in the dashboard. The graph models organizational hierarchy — superiors, subordinates, peers — so agents can be wired into company-like structures with clear chains of delegation and reporting. + +The existing `send_message_to_another_channel` tool is unrelated — it's scoped to a single agent's known platform channels and routes through `MessagingManager::broadcast()`. Agent-to-agent communication is a new mechanism, but not a new transport. Messages are constructed as `InboundMessage` with `source: "internal"` and injected into the existing `MessagingManager` fan-in via `inject_message()`. The main loop already routes by `agent_id` and `conversation_id`, so internal messages flow through the same pipeline as platform messages — with link policy enforcement, shared conversation history, and UI visibility. + +## What Exists Today + +**Agent isolation:** Each agent has its own SQLite database, memory store, LanceDB instance, and set of channels. Agents share only the `LlmManager`, `MessagingManager`, and instance-level config. There is no data path between agents. + +**Cross-channel messaging:** The `send_message_to_another_channel` tool lets a channel send a message to another platform channel via `MessagingManager::broadcast()`. It resolves targets through `ChannelStore::find_by_name()`, which searches the `channels` table for display name matches. This is platform-level routing — it delivers to a Discord channel or Telegram chat, not to another agent's processing pipeline. + +**Available channels context:** The `available_channels.md.j2` prompt fragment lists channels the agent knows about. This gives the LLM awareness of where it can send messages, but the list is platform channels, not agent peers. + +**Bindings:** `config.rs` defines `Binding` structs that route inbound platform messages to agents. Bindings are one-directional (platform → agent) and have no concept of agent-to-agent routing. + +**Event bus:** `ProcessEvent` is a broadcast channel per agent. Events are typed (branch started, worker complete, tool started, etc.) and feed the API's SSE pipeline. There is no cross-agent event bus. + +## The Communication Graph + +The graph is a set of directed edges between agents. Each edge is a **link** — a persistent, policy-governed communication channel. When agent A has a link to agent B, agent A can send messages to agent B. The link carries policy flags that define the nature of the relationship. + +### Why Agent-Level, Not Channel-Level + +Links connect agents, not channels. An agent may have dozens of active channels (one per Discord thread, Telegram chat, etc.), and those channels come and go. The communication graph operates at the organizational level — "the support agent can escalate to the engineering agent" — not at the conversation level. + +When agent A sends a message to agent B through a link, the message lands in a dedicated internal channel between them. This channel persists across platform channel lifecycles. + +### Link Model + +```rust +pub struct AgentLink { + pub id: String, // UUID + pub from_agent_id: String, // source agent + pub to_agent_id: String, // target agent + pub direction: LinkDirection, // one_way, two_way + pub relationship: LinkRelationship, // peer, superior, subordinate + pub enabled: bool, + pub created_at: DateTime, + pub updated_at: DateTime, +} + +pub enum LinkDirection { + /// from_agent can message to_agent, but not vice versa. + OneWay, + /// Both agents can message each other through this link. + TwoWay, +} + +pub enum LinkRelationship { + /// Equal peers — neither agent has authority over the other. + Peer, + /// from_agent is superior to to_agent. Can delegate tasks, + /// request status, and override decisions. + Superior, + /// from_agent is subordinate to to_agent. Reports status, + /// escalates issues, requests approval. + Subordinate, +} +``` + +A two-way link creates a single internal channel shared by both agents. A one-way link creates the same channel but only the `from_agent` can initiate messages — the `to_agent` can read and respond within an existing thread but cannot start new conversations. + +### Internal Channel + +When a link is created, a dedicated internal channel is spawned for that link. The channel ID follows the pattern `link:{link_id}`. This channel: + +- Has its own conversation history in `conversation_messages`, just like platform channels +- Appears in both agents' channel lists +- Is visible in the dashboard under a dedicated "Agent Links" section +- Supports the same coalescing, branching, and worker spawning as platform channels + +Messages in this channel carry metadata identifying the sending agent: + +```rust +InboundMessage { + id: uuid::Uuid::new_v4().to_string(), + source: "internal".into(), + conversation_id: format!("link:{link_id}"), + sender_id: sending_agent_id.to_string(), + agent_id: Some(receiving_agent_id.clone()), + content: MessageContent::Text(message), + timestamp: Utc::now(), + metadata: HashMap::from([ + ("link_id".into(), json!(link_id)), + ("from_agent_id".into(), json!(sending_agent_id)), + ("relationship".into(), json!("peer")), // or "superior" / "subordinate" + ]), + formatted_author: Some(format!("[{}]", sending_agent_name)), +} +``` + +### Relationship Semantics + +The `LinkRelationship` affects the receiving agent's system prompt context and available actions: + +**Peer:** Both agents are equals. Messages are informational — "here's what I found", "can you check this", "FYI the deploy failed". Neither agent has authority to assign tasks to the other. + +**Superior → Subordinate:** The superior can delegate tasks (which spawn workers on the subordinate), request status reports, and send directives. The subordinate's prompt context includes awareness that messages from this agent carry authority. The subordinate can escalate back — "I need help with X" or "this is beyond my scope." + +**Subordinate → Superior:** The subordinate can report status, escalate issues, and request decisions. The superior's prompt context frames these as reports from a direct report. The superior can respond with instructions. + +The relationship doesn't restrict message delivery — it frames context. A subordinate can still message its superior freely. The relationship metadata shapes how the LLM interprets and responds to messages. + +## Schema + +New migration: + +```sql +CREATE TABLE IF NOT EXISTS agent_links ( + id TEXT PRIMARY KEY, + from_agent_id TEXT NOT NULL, + to_agent_id TEXT NOT NULL, + direction TEXT NOT NULL DEFAULT 'two_way', -- 'one_way' or 'two_way' + relationship TEXT NOT NULL DEFAULT 'peer', -- 'peer', 'superior', 'subordinate' + enabled INTEGER NOT NULL DEFAULT 1, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + UNIQUE(from_agent_id, to_agent_id) +); + +CREATE INDEX IF NOT EXISTS idx_agent_links_from ON agent_links(from_agent_id); +CREATE INDEX IF NOT EXISTS idx_agent_links_to ON agent_links(to_agent_id); +``` + +This table lives in a shared instance-level SQLite database, not per-agent. Agent links span agents, so they can't live in either agent's isolated database. The instance already has a config loading path — this adds a small shared database alongside it. + +The `UNIQUE(from_agent_id, to_agent_id)` constraint means at most one link per direction between two agents. A two-way link between A and B is a single row with `direction = 'two_way'`. For asymmetric relationships (A is superior to B), there's one row with `from_agent_id = A, to_agent_id = B, relationship = 'superior'`. + +### Instance Database + +New shared SQLite database at `{instance_dir}/instance.db`. Contains only cross-agent data: + +``` +{instance_dir}/ + instance.db ← new: agent_links, shared_notes (future) + agents/ + agent-a/ + data/spacebot.db ← existing per-agent database + agent-b/ + data/spacebot.db +``` + +This keeps per-agent data isolated while giving links a home that both agents can reference. + +Instance-level migrations live in a separate `migrations_instance/` directory. The existing `sqlx::migrate!("./migrations")` is compiled into the binary targeting per-agent databases — instance.db needs its own `sqlx::migrate!("./migrations_instance/")` call during startup. + +## LinkStore + +```rust +pub struct LinkStore { + pool: SqlitePool, // instance.db pool +} + +impl LinkStore { + /// Get all links involving this agent (as source or target). + pub async fn get_links_for_agent(&self, agent_id: &str) -> Result>; + + /// Get a specific link by ID. + pub async fn get(&self, link_id: &str) -> Result>; + + /// Get the link between two specific agents (if any). + pub async fn get_between( + &self, + from_agent_id: &str, + to_agent_id: &str, + ) -> Result>; + + /// Create a new link. Returns error if a link already exists between these agents. + pub async fn create(&self, link: &AgentLink) -> Result<()>; + + /// Update link properties (direction, relationship, enabled). + pub async fn update(&self, link: &AgentLink) -> Result<()>; + + /// Delete a link and its associated internal channel history. + pub async fn delete(&self, link_id: &str) -> Result<()>; +} +``` + +## Message Routing + +### Sending + +New tool: `send_agent_message`. Available to channels that have at least one active link. + +```rust +pub struct SendAgentMessageArgs { + /// Target agent ID or name. + pub target: String, + /// The message content. + pub message: String, +} +``` + +The tool: + +1. Resolves the target agent by ID or name +2. Looks up the link between the sending agent and the target agent via `LinkStore` +3. Validates the link exists, is enabled, and permits messaging in this direction +4. Constructs an `InboundMessage` with `source: "internal"`, the target `agent_id`, and `conversation_id: "link:{link_id}"` +5. Calls `MessagingManager::inject_message()` to push it into the existing fan-in + +No new transport is needed. `inject_message()` already exists on `MessagingManager` — it pushes an `InboundMessage` into the same `mpsc` channel that platform adapters use. The main loop already routes by `agent_id` and `conversation_id`, so internal messages get routed to the correct agent and land in the correct link channel automatically. + +### Receiving + +The receiving agent processes internal messages the same way it processes platform messages. The message arrives through the existing `InboundMessage` pipeline, gets assigned to the `link:{link_id}` channel, and triggers the standard channel runtime (coalescing, system prompt build, LLM call, branching if needed). + +The channel's system prompt includes context about who the message is from: + +```jinja2 +{%- if link_context %} +## Agent Communication + +This is an internal channel with **{{ link_context.agent_name }}** ({{ link_context.relationship }}). +{% if link_context.relationship == "superior" -%} +Messages from this agent carry organizational authority. Treat directives as assignments. +{%- elif link_context.relationship == "subordinate" -%} +This is a report from a subordinate agent. They may be escalating, reporting status, or requesting guidance. +{%- else -%} +This is a peer agent. Communication is collaborative and informational. +{%- endif %} +{%- endif %} +``` + +## Prompt Integration + +### ROLE.md + +New identity file alongside `SOUL.md`, `IDENTITY.md`, and `USER.md`. Defines what the agent is supposed to *do* — responsibilities, scope, what to handle vs what to escalate, what success looks like. + +`SOUL.md` is personality. `IDENTITY.md` is who the agent is. `USER.md` is context about the human. `ROLE.md` is the job: "you handle tier 1 support tickets, escalate billing issues to the finance agent, never touch production infrastructure." + +In single-agent setups, `ROLE.md` separates identity from operational responsibilities. In multi-agent setups, it's what differentiates agents operationally — each agent sees its position in the hierarchy via org context, and `ROLE.md` tells it what to actually do in that position. Structure vs scope. + +Loaded the same way as the other identity files — from the agent's workspace directory, injected into the system prompt by `identity/files.rs`. + +### Organizational Awareness + +The core prompt addition is not a list of sendable targets — it's structural awareness. The agent needs to understand where it sits in the org, who's above it, who's below it, and who its peers are. Without this, link channels are just more inboxes. With it, the agent can reason about delegation, escalation, and collaboration. + +New prompt fragment `fragments/org_context.md.j2`: + +```jinja2 +{%- if org_context %} +## Organization + +You are part of a multi-agent system. Here is your position: + +{% if org_context.superiors -%} +**Reports to:** +{% for agent in org_context.superiors -%} +- **{{ agent.name }}** — your superior. Messages from this agent carry organizational authority. +{% endfor %} +{%- endif %} + +{% if org_context.subordinates -%} +**Direct reports:** +{% for agent in org_context.subordinates -%} +- **{{ agent.name }}** — reports to you. You can delegate tasks, request status, and send directives. +{% endfor %} +{%- endif %} + +{% if org_context.peers -%} +**Peers:** +{% for agent in org_context.peers -%} +- **{{ agent.name }}** — equal peer. Communication is collaborative and informational. +{% endfor %} +{%- endif %} + +Use the `send_agent_message` tool to communicate with these agents. Each link has a dedicated internal channel with full conversation history. +{%- endif %} +``` + +This is structured by relationship, not as a flat list. The agent sees the hierarchy, not just who it can message. The template groups agents by superiors/subordinates/peers so the LLM can reason about appropriate behavior — escalate up, delegate down, collaborate across. + +This fragment is reusable across process types. Channels get it on every turn. The cortex gets it when running autonomous behaviors (future work — the template is ready, injection into cortex prompts is a separate PR). The data source is the same: `LinkStore::get_links_for_agent()` resolved against agent configs for display names. + +Platform channel awareness (`available_channels.md.j2`) remains separate — it lists Discord/Slack/Telegram channels for `send_message_to_another_channel`. Org context lists agents for `send_agent_message`. Different tools, different context. + +### Link Channel Prompt + +When a channel is a link channel (`link:{link_id}`), the system prompt includes the `link_context` section described in the Receiving section above. This is injected during `build_system_prompt()` by checking if the channel ID starts with `link:` and looking up the link metadata. This is in addition to the org context — the agent knows both its overall position and who it's currently talking to in this specific channel. + +## API Surface + +### Link CRUD + +``` +GET /api/links — list all links +GET /api/links/:id — get link details +POST /api/links — create a link +PUT /api/links/:id — update link properties +DELETE /api/links/:id — delete a link + +GET /api/agents/:id/links — get links for a specific agent +``` + +### Link Messages + +``` +GET /api/links/:id/messages — get conversation history for a link channel +``` + +This reuses the existing conversation history infrastructure — the link channel ID is just another channel ID in the `conversation_messages` table. + +### Topology Snapshot + +``` +GET /api/topology — full agent graph for UI rendering +``` + +Returns: + +```json +{ + "agents": [ + { "id": "support", "name": "Support Agent" }, + { "id": "engineering", "name": "Engineering Agent" } + ], + "links": [ + { + "id": "uuid", + "from": "support", + "to": "engineering", + "direction": "two_way", + "relationship": "subordinate", + "enabled": true + } + ] +} +``` + +This is the payload the React Flow graph editor consumes to render the topology. + +## Config Integration + +Links can be defined in TOML config alongside agents: + +```toml +[[links]] +from = "support" +to = "engineering" +direction = "two_way" +relationship = "subordinate" + +[[links]] +from = "manager" +to = "support" +direction = "two_way" +relationship = "superior" + +[[links]] +from = "manager" +to = "engineering" +direction = "two_way" +relationship = "superior" +``` + +Config-defined links are synced to the database on startup. The API can also create links at runtime. Config-defined links take precedence — if a link exists in both config and DB with different properties, the config version wins on next reload. + +## Event Pipeline + +Link messages emit `ProcessEvent` variants so the dashboard can track inter-agent communication: + +```rust +ProcessEvent::AgentMessageSent { + from_agent_id: AgentId, + to_agent_id: AgentId, + link_id: String, + channel_id: ChannelId, +} + +ProcessEvent::AgentMessageReceived { + from_agent_id: AgentId, + to_agent_id: AgentId, + link_id: String, + channel_id: ChannelId, +} +``` + +These feed into the existing SSE pipeline. The `ProcessEvent` → `ApiEvent` forwarding in `api/state.rs` has a catch-all that drops unknown variants, so these need explicit `ApiEvent` counterparts and match arms to reach the dashboard. The dashboard can then render a live activity view of inter-agent communication overlaid on the topology graph. + +## Shared Notes (v2) + +Deferred to a second phase. The concept: named knowledge nodes that multiple agents can read from and write to, with per-agent permissions (read-only, read-write). + +```sql +CREATE TABLE IF NOT EXISTS shared_notes ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL UNIQUE, + content TEXT NOT NULL DEFAULT '', + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE IF NOT EXISTS shared_note_permissions ( + note_id TEXT NOT NULL REFERENCES shared_notes(id) ON DELETE CASCADE, + agent_id TEXT NOT NULL, + access TEXT NOT NULL DEFAULT 'read', -- 'read' or 'read_write' + PRIMARY KEY (note_id, agent_id) +); +``` + +Shared notes would give agents a persistent scratchpad — the engineering agent writes deployment status, the support agent reads it to answer customer questions. Tools: `read_shared_note`, `write_shared_note`. But this is a separate design problem with its own conflict resolution and versioning concerns. + +## Files Changed + +| File | Change | +|------|--------| +| `migrations_instance/` (new dir) | Separate migration directory for instance.db | +| New migration | `agent_links` table in instance.db | +| `src/links.rs` (new) | Module root, re-exports | +| `src/links/store.rs` (new) | `LinkStore` — CRUD for agent links | +| `src/links/types.rs` (new) | `AgentLink`, `LinkDirection`, `LinkRelationship` | +| `src/tools/send_agent_message.rs` (new) | `SendAgentMessageTool` — agent-to-agent messaging | +| `src/tools.rs` | Register `send_agent_message` tool for linked agents | +| `src/config.rs` | Add `links: Vec` to `Config`, TOML parsing | +| `src/lib.rs` | Add `mod links`, new `ProcessEvent` variants | +| `src/main.rs` | Initialize instance.db, `LinkStore`, sync config links | +| `src/identity/files.rs` | Load `ROLE.md` alongside SOUL/IDENTITY/USER | +| `src/agent/channel.rs` | Inject link context + org context into system prompt, handle internal source messages | +| `src/api/server.rs` | Mount link CRUD and topology routes | +| `src/api/links.rs` (new) | API handlers for link CRUD + topology | +| `src/api/state.rs` | Add `LinkStore` to `ApiState` | +| `prompts/en/fragments/org_context.md.j2` (new) | Organizational hierarchy prompt section | +| `prompts/en/fragments/link_context.md.j2` (new) | Link channel context section | +| `prompts/en/tools/send_agent_message_description.md.j2` (new) | Tool description | +| `src/prompts/engine.rs` | Register new templates | +| `src/prompts/text.rs` | Register new text templates | +| `src/db.rs` | Add instance.db connection setup with separate migration path | + +## Phases + +### Phase 1: Instance Database + Link Model + +- Set up `instance.db` at `{instance_dir}/instance.db` with separate migration directory +- Migration for `agent_links` table +- `LinkStore` with full CRUD +- `AgentLink`, `LinkDirection`, `LinkRelationship` types +- Config parsing for `[[links]]` sections +- Sync config links to database on startup + +### Phase 2: Send Tool + Prompt Context + +- `ROLE.md` identity file — loaded by `identity/files.rs`, injected into system prompt +- `SendAgentMessageTool` — resolve target, validate link, construct `InboundMessage`, deliver via `inject_message()` +- `org_context.md.j2` prompt fragment — organizational hierarchy, injected into channel system prompt when agent has links +- `link_context.md.j2` prompt fragment — inject when channel is a link channel +- Tool description prompt for `send_agent_message` +- Register tool conditionally (only when agent has active links) + +### Phase 3: Channel Runtime Integration + +- Handle `source: "internal"` messages in the channel runtime +- Link channels get their own conversation history (same `conversation_messages` table) +- Coalescing, branching, and workers work on link channels the same as platform channels +- `ProcessEvent::AgentMessageSent` and `AgentMessageReceived` events +- Corresponding `ApiEvent` variants in `api/state.rs` for SSE forwarding + +### Phase 4: API + UI Foundation + +- Link CRUD API endpoints +- Topology snapshot endpoint +- Wire `LinkStore` into `ApiState` +- SSE events for inter-agent message activity + +Phase 1 is the data foundation. Phase 2 gives agents the ability to send messages — no new transport, just a tool that constructs an `InboundMessage` and pushes it through the existing `MessagingManager::inject_message()` fan-in. Phase 3 makes the receiving side work end-to-end. Phase 4 is the API layer that the dashboard will consume for the React Flow editor. + +### Future: React Flow Topology Editor + +Not part of this design but the intended consumer of the topology API. The Overview page in the embedded dashboard would be replaced (or extended) with a React Flow graph showing agents as nodes and links as directed edges. Users drag to create links, click edges to configure direction and relationship. Live activity indicators show messages flowing between agents in real time. + +### Future: Shared Notes (v2) + +Persistent knowledge nodes with per-agent read/write permissions. Separate design doc when the link system is stable. + +## What This Enables + +**Organizational hierarchy.** Wire agents into manager/report structures. A manager agent delegates to specialists, specialists report back. The communication is explicit, auditable, and visible to humans. + +**Cross-agent coordination.** A support agent detects a bug and escalates to the engineering agent with full context. The engineering agent investigates, spawns workers, and reports findings back through the link channel. Humans can observe the entire exchange in the dashboard. + +**Separation of concerns.** Instead of one omniscient agent, split responsibilities across specialized agents that communicate through defined interfaces. A sales agent handles leads, a support agent handles tickets, an engineering agent handles technical work. Each has its own memory, identity, and personality. + +**Auditable communication.** Every inter-agent message is persisted in `conversation_messages` with full metadata. The dashboard shows the communication graph with live activity. There are no hidden side channels — everything flows through the link system. + +**Foundation for agent teams.** Once links and the topology API exist, the React Flow editor turns agent wiring into a visual, drag-and-drop experience. Non-technical users can design agent organizations without editing config files. From 49213f1edd87e318a720195347317c8d50f22d4b Mon Sep 17 00:00:00 2001 From: Kai Meder Date: Sun, 22 Feb 2026 21:23:04 +0100 Subject: [PATCH 11/11] feat: add external browser sandbox support via connect_url --- docs/docker.md | 136 ++++++++++++++++++++++++++++++++++++++++++- src/config.rs | 43 ++++++++++++++ src/tools/browser.rs | 95 ++++++++++++++++++++++++++---- 3 files changed, 261 insertions(+), 13 deletions(-) diff --git a/docs/docker.md b/docs/docker.md index 88cc3a8f8..25e07c973 100644 --- a/docs/docker.md +++ b/docs/docker.md @@ -78,8 +78,9 @@ Available environment variables: | `SLACK_BOT_TOKEN` | Slack bot token | | `SLACK_APP_TOKEN` | Slack app token | | `BRAVE_SEARCH_API_KEY` | Brave Search API key | -| `SPACEBOT_CHANNEL_MODEL` | Override channel model | -| `SPACEBOT_WORKER_MODEL` | Override worker model | +| `SPACEBOT_CHANNEL_MODEL` | Override channel model | +| `SPACEBOT_WORKER_MODEL` | Override worker model | +| `SPACEBOT_BROWSER_CONNECT_URL` | CDP URL of an external browser (`http://host:9222`) | ### Config File @@ -149,6 +150,137 @@ volumes: The `shm_size` and `seccomp` settings are needed for Chromium to run properly in a container. +### External Browser + +Run `chromedp/headless-shell` as a separate container and point Spacebot at it via +`connect_url`. This decouples the browser lifecycle from the main process and avoids +bundling Chromium into the Spacebot image. + +Workers spawned by the same agent share one Chrome process (each gets its own tab). A +Chrome crash kills all tabs for that agent. + +#### Spacebot on host, browser in Docker + +When Spacebot runs as a binary directly on the host, expose port 9222 so the host process +can reach the container: + +```yaml +# docker-compose.yml +services: + browser: + image: chromedp/headless-shell:latest + ports: + - "127.0.0.1:9222:9222" + shm_size: 1gb + restart: unless-stopped +``` + +Then configure Spacebot via env var or config: + +```bash +export SPACEBOT_BROWSER_CONNECT_URL=http://localhost:9222 +``` + +```toml +# or in config.toml +[defaults.browser] +connect_url = "http://localhost:9222" +``` + +#### Both in Docker + +When both Spacebot and the browser run in containers, use a Docker network instead of +exposing ports: + +```yaml +services: + spacebot: + image: ghcr.io/spacedriveapp/spacebot:slim + ports: + - "19898:19898" + volumes: + - spacebot-data:/data + environment: + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} + - SPACEBOT_BROWSER_CONNECT_URL=http://browser:9222 + networks: + - spacebot-net + + browser: + image: chromedp/headless-shell:latest + networks: + - spacebot-net + shm_size: 1gb + restart: unless-stopped + +networks: + spacebot-net: + +volumes: + spacebot-data: +``` + +#### Per-agent dedicated sandboxes + +Use a `config.toml` to route each agent to its own container: + +```toml +[defaults.browser] +connect_url = "http://browser-main:9222" + +[[agents]] +id = "research" +[agents.browser] +connect_url = "http://browser-research:9222" + +[[agents]] +id = "internal" +[agents.browser] +enabled = false +``` + +```yaml +services: + spacebot: + image: ghcr.io/spacedriveapp/spacebot:slim + volumes: + - spacebot-data:/data + - ./config.toml:/data/config.toml:ro + networks: + - spacebot-net + + browser-main: + image: chromedp/headless-shell:latest + networks: + - spacebot-net + shm_size: 512mb + restart: unless-stopped + + browser-research: + image: chromedp/headless-shell:latest + networks: + - spacebot-net + shm_size: 1gb + restart: unless-stopped + +networks: + spacebot-net: + +volumes: + spacebot-data: +``` + +#### `connect_url` + +Accepted formats: +- `http://host:9222` — auto-discovers the WebSocket URL via `/json/version` (preferred) +- `ws://host:9222/devtools/browser/` — direct WebSocket URL + +An empty string is treated as unset and falls back to the embedded launch path. + +`SPACEBOT_BROWSER_CONNECT_URL` overrides `[defaults.browser].connect_url` from config but +does not override per-agent `connect_url`. + ## Building the Image From the spacebot repo root: diff --git a/src/config.rs b/src/config.rs index 3ddb349b9..aaa7fb729 100644 --- a/src/config.rs +++ b/src/config.rs @@ -364,6 +364,8 @@ pub struct BrowserConfig { pub executable_path: Option, /// Directory for storing screenshots and other browser artifacts. pub screenshot_dir: Option, + /// CDP URL of an external browser to connect to instead of launching one locally. + pub connect_url: Option, } impl Default for BrowserConfig { @@ -374,6 +376,7 @@ impl Default for BrowserConfig { evaluate_enabled: false, executable_path: None, screenshot_dir: None, + connect_url: None, } } } @@ -1379,6 +1382,7 @@ struct TomlBrowserConfig { evaluate_enabled: Option, executable_path: Option, screenshot_dir: Option, + connect_url: Option, } #[derive(Deserialize)] @@ -2430,6 +2434,7 @@ impl Config { .screenshot_dir .map(PathBuf::from) .or_else(|| base.screenshot_dir.clone()), + connect_url: b.connect_url.or_else(|| base.connect_url.clone()), } }) .unwrap_or_else(|| base_defaults.browser.clone()), @@ -2481,6 +2486,12 @@ impl Config { .unwrap_or(base_defaults.worker_log_mode), }; + // Apply env var overrides to defaults. These override the config file but do not + // override per-agent settings (per-agent configs inherit from defaults at merge time). + if let Ok(url) = std::env::var("SPACEBOT_BROWSER_CONNECT_URL") { + defaults.browser.connect_url = Some(url); + } + let mut agents: Vec = toml .agents .into_iter() @@ -2599,6 +2610,9 @@ impl Config { .screenshot_dir .map(PathBuf::from) .or_else(|| defaults.browser.screenshot_dir.clone()), + connect_url: b + .connect_url + .or_else(|| defaults.browser.connect_url.clone()), }), mcp: match a.mcp { Some(mcp_servers) => Some( @@ -2775,6 +2789,13 @@ impl Config { } }; + warn_browser_config("defaults", &defaults.browser); + for agent in &agents { + if let Some(browser) = &agent.browser { + warn_browser_config(&agent.id, browser); + } + } + Ok(Config { instance_dir, llm, @@ -2983,6 +3004,28 @@ impl std::fmt::Debug for RuntimeConfig { } } +/// Warn at config load time about `BrowserConfig` fields that have no effect when +/// `connect_url` is set. +fn warn_browser_config(context: &str, config: &BrowserConfig) { + let Some(url) = config.connect_url.as_deref().filter(|u| !u.is_empty()) else { + return; + }; + if config.executable_path.is_some() { + tracing::warn!( + context, + connect_url = url, + "connect_url is set; executable_path has no effect" + ); + } + if !config.headless { + tracing::warn!( + context, + connect_url = url, + "connect_url is set; headless flag has no effect" + ); + } +} + /// Watches config, prompt, identity, and skill files for changes and triggers /// hot reload on the corresponding RuntimeConfig. /// diff --git a/src/tools/browser.rs b/src/tools/browser.rs index 7f9b5f3e9..e7418ec99 100644 --- a/src/tools/browser.rs +++ b/src/tools/browser.rs @@ -5,6 +5,7 @@ //! ref system for LLM-friendly element addressing. use crate::config::BrowserConfig; +use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; use chromiumoxide::browser::{Browser, BrowserConfig as ChromeConfig}; use chromiumoxide::page::ScreenshotParams; @@ -32,7 +33,7 @@ use tokio::task::JoinHandle; /// Blocks private/loopback IPs, link-local addresses, and cloud metadata endpoints /// to prevent server-side request forgery. fn validate_url(url: &str) -> Result<(), BrowserError> { - let parsed = Url::parse(url) + let parsed = url::Url::parse(url) .map_err(|error| BrowserError::new(format!("invalid URL '{url}': {error}")))?; match parsed.scheme() { @@ -147,12 +148,15 @@ struct BrowserState { element_refs: HashMap, /// Counter for generating element refs. next_ref: usize, + /// True when connected to an external browser process rather than a locally launched one. + external: bool, } impl std::fmt::Debug for BrowserState { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("BrowserState") .field("has_browser", &self.browser.is_some()) + .field("external", &self.external) .field("pages", &self.pages.len()) .field("active_target", &self.active_target) .field("element_refs", &self.element_refs.len()) @@ -182,6 +186,7 @@ impl BrowserTool { active_target: None, element_refs: HashMap::new(), next_ref: 0, + external: false, })), config, screenshot_dir, @@ -474,12 +479,54 @@ impl BrowserTool { return Ok(BrowserOutput::success("Browser already running")); } + let use_external = self + .config + .connect_url + .as_deref() + .is_some_and(|url| !url.is_empty()); + + if use_external { + self.launch_external(&mut state).await + } else { + self.launch_embedded(&mut state).await + } + } + + async fn launch_external( + &self, + state: &mut BrowserState, + ) -> Result { + let connect_url = self.config.connect_url.as_deref().unwrap(); + + tracing::info!(connect_url, "connecting to external browser"); + + let (browser, mut handler) = Browser::connect(connect_url).await.map_err(|error| { + BrowserError::new(format!( + "failed to connect to browser at {connect_url}: {error}" + )) + })?; + + let handler_task = tokio::spawn(async move { while handler.next().await.is_some() {} }); + + state.browser = Some(browser); + state._handler_task = Some(handler_task); + state.external = true; + + tracing::info!(connect_url, "connected to external browser"); + Ok(BrowserOutput::success(format!( + "Connected to external browser at {connect_url}" + ))) + } + + async fn launch_embedded( + &self, + state: &mut BrowserState, + ) -> Result { let mut builder = ChromeConfig::builder().no_sandbox(); if !self.config.headless { builder = builder.with_head().window_size(1280, 900); } - if let Some(path) = &self.config.executable_path { builder = builder.chrome_executable(path); } @@ -491,7 +538,7 @@ impl BrowserTool { tracing::info!( headless = self.config.headless, executable = ?self.config.executable_path, - "launching chrome" + "launching embedded browser" ); let (browser, mut handler) = Browser::launch(chrome_config) @@ -502,8 +549,9 @@ impl BrowserTool { state.browser = Some(browser); state._handler_task = Some(handler_task); + state.external = false; - tracing::info!("browser launched"); + tracing::info!("embedded browser launched"); Ok(BrowserOutput::success("Browser launched successfully")) } @@ -957,20 +1005,45 @@ impl BrowserTool { async fn handle_close(&self) -> Result { let mut state = self.state.lock().await; - if let Some(mut browser) = state.browser.take() - && let Err(error) = browser.close().await - { - tracing::warn!(%error, "browser close returned error"); + if state.external { + self.close_external(&mut state).await + } else { + self.close_embedded(&mut state).await + } + } + + async fn close_external( + &self, + state: &mut BrowserState, + ) -> Result { + // Drop without Browser.close — that CDP command would terminate the external process. + state.browser.take(); + self.reset_state(state); + tracing::info!("external browser disconnected"); + Ok(BrowserOutput::success("Browser closed")) + } + + async fn close_embedded( + &self, + state: &mut BrowserState, + ) -> Result { + if let Some(mut browser) = state.browser.take() { + if let Err(error) = browser.close().await { + tracing::warn!(%error, "embedded browser close returned error"); + } } + self.reset_state(state); + tracing::info!("embedded browser closed"); + Ok(BrowserOutput::success("Browser closed")) + } + fn reset_state(&self, state: &mut BrowserState) { state.pages.clear(); state.active_target = None; state.element_refs.clear(); state.next_ref = 0; state._handler_task = None; - - tracing::info!("browser closed"); - Ok(BrowserOutput::success("Browser closed")) + state.external = false; } /// Get the active page, or create a first one if the browser has no pages yet.