diff --git a/.github/ISSUE_TEMPLATE/bug.yml b/.github/ISSUE_TEMPLATE/bug.yml index bc8fc2e1..6c32f6ea 100644 --- a/.github/ISSUE_TEMPLATE/bug.yml +++ b/.github/ISSUE_TEMPLATE/bug.yml @@ -20,3 +20,15 @@ body: description: What did you expect to happen? validations: required: true + - type: textarea + attributes: + label: Environment + description: "OS, browser, Node version, or any relevant environment info" + validations: + required: false + - type: textarea + attributes: + label: Screenshots / Logs + description: Any screenshots or error logs that might help + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/documentation.yml b/.github/ISSUE_TEMPLATE/documentation.yml new file mode 100644 index 00000000..5ff3dc1b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/documentation.yml @@ -0,0 +1,16 @@ +name: Documentation +description: Report a documentation issue or request new documentation +labels: [documentation, needs-triage] +body: + - type: textarea + attributes: + label: Description + description: What's wrong or missing in the documentation? + validations: + required: true + - type: textarea + attributes: + label: Suggested Change + description: How do you think it should be improved? + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/feature.yml b/.github/ISSUE_TEMPLATE/feature.yml index 2b873e1a..275f45ee 100644 --- a/.github/ISSUE_TEMPLATE/feature.yml +++ b/.github/ISSUE_TEMPLATE/feature.yml @@ -14,3 +14,9 @@ body: description: Why do you need this? validations: required: true + - type: textarea + attributes: + label: Proposed Solution + description: Do you have any ideas on how this could be implemented? + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/guidance.yml b/.github/ISSUE_TEMPLATE/guidance.yml index 4ba921fd..29a9f5d1 100644 --- a/.github/ISSUE_TEMPLATE/guidance.yml +++ b/.github/ISSUE_TEMPLATE/guidance.yml @@ -1,5 +1,5 @@ name: Guidance -description: Ask a question or request guidance +description: Ask a question, request guidance, or report something you're unsure about labels: [guidance, needs-triage] body: - type: textarea diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 51301bfd..c2dcf678 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -69,6 +69,7 @@ jobs: - { suffix: "-codex", dockerfile: "Dockerfile.codex", artifact: "codex" } - { suffix: "-claude", dockerfile: "Dockerfile.claude", artifact: "claude" } - { suffix: "-gemini", dockerfile: "Dockerfile.gemini", artifact: "gemini" } + - { suffix: "-copilot", dockerfile: "Dockerfile.copilot", artifact: "copilot" } platform: - { os: linux/amd64, runner: ubuntu-latest } - { os: linux/arm64, runner: ubuntu-24.04-arm } @@ -129,6 +130,7 @@ jobs: - { suffix: "-codex", artifact: "codex" } - { suffix: "-claude", artifact: "claude" } - { suffix: "-gemini", artifact: "gemini" } + - { suffix: "-copilot", artifact: "copilot" } runs-on: ubuntu-latest permissions: contents: read @@ -176,6 +178,7 @@ jobs: - { suffix: "-codex" } - { suffix: "-claude" } - { suffix: "-gemini" } + - { suffix: "-copilot" } runs-on: ubuntu-latest permissions: contents: read diff --git a/.github/workflows/issue-check.yml b/.github/workflows/issue-check.yml new file mode 100644 index 00000000..ead16df5 --- /dev/null +++ b/.github/workflows/issue-check.yml @@ -0,0 +1,122 @@ +name: Issue Completeness Check + +on: + issues: + types: [opened, edited] + +concurrency: + group: issue-check-${{ github.event.issue.number }} + cancel-in-progress: true + +jobs: + check: + runs-on: ubuntu-latest + permissions: + issues: write + steps: + - uses: actions/github-script@v7 + with: + script: | + const issue = context.payload.issue; + const body = issue.body || ''; + const labels = issue.labels.map(l => l.name); + const marker = ''; + + const rules = { + bug: ['Description', 'Steps to Reproduce', 'Expected Behavior'], + feature: ['Description', 'Use Case'], + documentation: ['Description'], + guidance: ['Question'] + }; + + const type = Object.keys(rules).find(k => labels.includes(k)); + + // Find old check comment + const comments = await github.rest.issues.listComments({ + ...context.repo, + issue_number: issue.number + }); + const old = comments.data.find(c => c.body.includes(marker)); + + if (!type) { + // No template label — skip if already flagged + if (old && labels.includes('incomplete')) return; + + const msg = `${marker}\n⚠️ Could not detect an issue template. Please make sure this issue was created using one of the available templates and that the corresponding label (\`bug\`, \`feature\`, \`documentation\`, \`guidance\`) is present.\n\nAvailable templates: Bug Report, Feature Request, Documentation, Guidance`; + + if (old) { + await github.rest.issues.deleteComment({ + ...context.repo, + comment_id: old.id + }); + } + await github.rest.issues.createComment({ + ...context.repo, + issue_number: issue.number, + body: msg + }); + + if (!labels.includes('incomplete')) { + await github.rest.issues.addLabels({ + ...context.repo, + issue_number: issue.number, + labels: ['incomplete', 'needs-triage'] + }); + } + return; + } + + const missing = rules[type].filter(field => { + const regex = new RegExp( + `###\\s*${field.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\s*\\n+([\\s\\S]*?)(?=\\n*###|$)` + ); + const match = body.match(regex); + if (!match) return true; + const content = match[1].trim(); + return !content || content === '_No response_'; + }); + + if (missing.length > 0) { + const list = missing.map(f => `- [ ] **${f}**`).join('\n'); + const msg = `${marker}\n⚠️ This issue is missing the following required information and will not be processed until completed:\n\n${list}\n\nPlease edit this issue to fill in the missing fields. Thank you!\n\n> **Note:** Please do not modify the section headings (\`### ...\`) as they are used for automated validation.`; + + // Delete old, create new (so user gets notified) + if (old) { + await github.rest.issues.deleteComment({ + ...context.repo, + comment_id: old.id + }); + } + await github.rest.issues.createComment({ + ...context.repo, + issue_number: issue.number, + body: msg + }); + + if (!labels.includes('incomplete')) { + await github.rest.issues.addLabels({ + ...context.repo, + issue_number: issue.number, + labels: ['incomplete', 'needs-triage'] + }); + } + } else { + // All filled: remove comment + label + if (old) { + await github.rest.issues.deleteComment({ + ...context.repo, + comment_id: old.id + }); + } + if (labels.includes('incomplete')) { + try { + await github.rest.issues.removeLabel({ + ...context.repo, + issue_number: issue.number, + name: 'incomplete' + }); + } catch (e) { + if (e.status !== 404) throw e; + } + } + } diff --git a/.github/workflows/release-pr.yml b/.github/workflows/release-pr.yml index b87d63f4..ee8ede38 100644 --- a/.github/workflows/release-pr.yml +++ b/.github/workflows/release-pr.yml @@ -44,13 +44,18 @@ jobs: else CURRENT=$(grep '^version:' charts/openab/Chart.yaml | awk '{print $2}') BASE="${CURRENT%%-*}" - IFS='.' read -r major minor patch <<< "$BASE" - case "${{ inputs.bump }}" in - major) major=$((major + 1)); minor=0; patch=0 ;; - minor) minor=$((minor + 1)); patch=0 ;; - patch) patch=$((patch + 1)) ;; - esac - VERSION="${major}.${minor}.${patch}-beta.1" + if [[ "$CURRENT" == *-beta.* ]]; then + BETA_NUM="${CURRENT##*-beta.}" + VERSION="${BASE}-beta.$((BETA_NUM + 1))" + else + IFS='.' read -r major minor patch <<< "$BASE" + case "${{ inputs.bump }}" in + major) major=$((major + 1)); minor=0; patch=0 ;; + minor) minor=$((minor + 1)); patch=0 ;; + patch) patch=$((patch + 1)) ;; + esac + VERSION="${major}.${minor}.${patch}-beta.1" + fi fi echo "version=${VERSION}" >> "$GITHUB_OUTPUT" echo "::notice::Release version: ${VERSION}" diff --git a/Cargo.lock b/Cargo.lock index d123d5e6..6b016571 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -49,6 +49,12 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + [[package]] name = "base64" version = "0.22.1" @@ -76,12 +82,24 @@ version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" +[[package]] +name = "bytemuck" +version = "1.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" + [[package]] name = "byteorder" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +[[package]] +name = "byteorder-lite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" + [[package]] name = "bytes" version = "1.11.1" @@ -110,6 +128,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "color_quant" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" + [[package]] name = "cpufeatures" version = "0.2.17" @@ -205,6 +229,15 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "fdeflate" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e6853b52649d4ac5c0bd02320cddc5ba956bdb407c4b75a2c6b75bf51500f8c" +dependencies = [ + "simd-adler32", +] + [[package]] name = "find-msvc-tools" version = "0.1.9" @@ -362,6 +395,16 @@ dependencies = [ "wasip3", ] +[[package]] +name = "gif" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee8cfcc411d9adbbaba82fb72661cc1bcca13e8bba98b364e62b2dba8f960159" +dependencies = [ + "color_quant", + "weezl", +] + [[package]] name = "hashbrown" version = "0.14.5" @@ -597,6 +640,34 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "image" +version = "0.25.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85ab80394333c02fe689eaf900ab500fbd0c2213da414687ebf995a65d5a6104" +dependencies = [ + "bytemuck", + "byteorder-lite", + "color_quant", + "gif", + "image-webp", + "moxcms", + "num-traits", + "png", + "zune-core", + "zune-jpeg", +] + +[[package]] +name = "image-webp" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "525e9ff3e1a4be2fbea1fdf0e98686a6d98b4d8f937e1bf7402245af1909e8c3" +dependencies = [ + "byteorder-lite", + "quick-error", +] + [[package]] name = "indexmap" version = "2.14.0" @@ -740,6 +811,16 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "moxcms" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb85c154ba489f01b25c0d36ae69a87e4a1c73a72631fc6c0eb6dde34a73e44b" +dependencies = [ + "num-traits", + "pxfm", +] + [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -755,6 +836,15 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "once_cell" version = "1.21.4" @@ -763,10 +853,12 @@ checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] name = "openab" -version = "0.6.4" +version = "0.7.3" dependencies = [ "anyhow", "base64", + "image", + "libc", "rand 0.8.5", "regex", "reqwest", @@ -815,6 +907,19 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" +[[package]] +name = "png" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60769b8b31b2a9f263dae2776c37b1b28ae246943cf719eb6946a1db05128a61" +dependencies = [ + "bitflags", + "crc32fast", + "fdeflate", + "flate2", + "miniz_oxide", +] + [[package]] name = "potential_utf" version = "0.1.5" @@ -858,6 +963,18 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "pxfm" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5a041e753da8b807c9255f28de81879c78c876392ff2469cde94799b2896b9d" + +[[package]] +name = "quick-error" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" + [[package]] name = "quinn" version = "0.11.9" @@ -2026,6 +2143,12 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "weezl" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28ac98ddc8b9274cb41bb4d9d4d5c425b6020c50c46f25559911905610b4a88" + [[package]] name = "windows-link" version = "0.2.1" @@ -2399,3 +2522,18 @@ name = "zmij" version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + +[[package]] +name = "zune-core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb8a0807f7c01457d0379ba880ba6322660448ddebc890ce29bb64da71fb40f9" + +[[package]] +name = "zune-jpeg" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27bc9d5b815bc103f142aa054f561d9187d191692ec7c2d1e2b4737f8dbd7296" +dependencies = [ + "zune-core", +] diff --git a/Cargo.toml b/Cargo.toml index 2b56c028..829d7bd1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "openab" -version = "0.6.4" +version = "0.7.3" edition = "2021" [dependencies] @@ -15,5 +15,7 @@ uuid = { version = "1", features = ["v4"] } regex = "1" anyhow = "1" rand = "0.8" -reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] } +reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "multipart", "json"] } base64 = "0.22" +image = { version = "0.25", default-features = false, features = ["jpeg", "png", "gif", "webp"] } +libc = "0.2" diff --git a/Dockerfile b/Dockerfile index fdb14e2a..600b4680 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,9 +11,10 @@ FROM debian:bookworm-slim RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates curl unzip && rm -rf /var/lib/apt/lists/* # Install kiro-cli (auto-detect arch, copy binary directly) +ARG KIRO_CLI_VERSION=2.0.0 RUN ARCH=$(dpkg --print-architecture) && \ - if [ "$ARCH" = "arm64" ]; then URL="https://desktop-release.q.us-east-1.amazonaws.com/latest/kirocli-aarch64-linux.zip"; \ - else URL="https://desktop-release.q.us-east-1.amazonaws.com/latest/kirocli-x86_64-linux.zip"; fi && \ + if [ "$ARCH" = "arm64" ]; then URL="https://prod.download.cli.kiro.dev/stable/${KIRO_CLI_VERSION}/kirocli-aarch64-linux.zip"; \ + else URL="https://prod.download.cli.kiro.dev/stable/${KIRO_CLI_VERSION}/kirocli-x86_64-linux.zip"; fi && \ curl --proto '=https' --tlsv1.2 -sSf --retry 3 --retry-delay 5 "$URL" -o /tmp/kirocli.zip && \ unzip /tmp/kirocli.zip -d /tmp && \ cp /tmp/kirocli/bin/* /usr/local/bin/ && \ diff --git a/Dockerfile.claude b/Dockerfile.claude index 2c8b90ab..da12d8bb 100644 --- a/Dockerfile.claude +++ b/Dockerfile.claude @@ -11,7 +11,8 @@ FROM node:22-bookworm-slim RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates curl && rm -rf /var/lib/apt/lists/* # Install claude-agent-acp adapter and Claude Code CLI -RUN npm install -g @agentclientprotocol/claude-agent-acp@0.25.0 @anthropic-ai/claude-code --retry 3 +ARG CLAUDE_CODE_VERSION=2.1.107 +RUN npm install -g @agentclientprotocol/claude-agent-acp@0.25.0 @anthropic-ai/claude-code@${CLAUDE_CODE_VERSION} --retry 3 # Install gh CLI RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ diff --git a/Dockerfile.codex b/Dockerfile.codex index b7ab4921..198b8cb0 100644 --- a/Dockerfile.codex +++ b/Dockerfile.codex @@ -11,7 +11,8 @@ FROM node:22-bookworm-slim RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates curl && rm -rf /var/lib/apt/lists/* # Pre-install codex-acp and codex CLI globally -RUN npm install -g @zed-industries/codex-acp@0.9.5 @openai/codex --retry 3 +ARG CODEX_VERSION=0.120.0 +RUN npm install -g @zed-industries/codex-acp@0.9.5 @openai/codex@${CODEX_VERSION} --retry 3 # Install gh CLI RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ diff --git a/Dockerfile.copilot b/Dockerfile.copilot new file mode 100644 index 00000000..c164a429 --- /dev/null +++ b/Dockerfile.copilot @@ -0,0 +1,34 @@ +# --- Build stage --- +FROM rust:1-bookworm AS builder +WORKDIR /build +COPY Cargo.toml Cargo.lock ./ +RUN mkdir src && echo 'fn main() {}' > src/main.rs && cargo build --release && rm -rf src +COPY src/ src/ +RUN touch src/main.rs && cargo build --release + +# --- Runtime stage --- +FROM node:22-bookworm-slim +RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates curl && rm -rf /var/lib/apt/lists/* + +# Install GitHub Copilot CLI via npm (pinned version) +ARG COPILOT_VERSION=1.0.25 +RUN npm install -g @github/copilot@${COPILOT_VERSION} --retry 3 + +# Install gh CLI (for auth and token management) +RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ + -o /usr/share/keyrings/githubcli-archive-keyring.gpg && \ + echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \ + > /etc/apt/sources.list.d/github-cli.list && \ + apt-get update && apt-get install -y --no-install-recommends gh && \ + rm -rf /var/lib/apt/lists/* + +ENV HOME=/home/node +WORKDIR /home/node + +COPY --from=builder --chown=node:node /build/target/release/openab /usr/local/bin/openab + +USER node +HEALTHCHECK --interval=30s --timeout=5s --retries=3 \ + CMD pgrep -x openab || exit 1 +ENTRYPOINT ["openab"] +CMD ["/etc/openab/config.toml"] diff --git a/Dockerfile.gemini b/Dockerfile.gemini index a5ce9201..d2230547 100644 --- a/Dockerfile.gemini +++ b/Dockerfile.gemini @@ -11,7 +11,8 @@ FROM node:22-bookworm-slim RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates curl && rm -rf /var/lib/apt/lists/* # Install Gemini CLI (native ACP support via --acp) -RUN npm install -g @google/gemini-cli --retry 3 +ARG GEMINI_CLI_VERSION=0.37.2 +RUN npm install -g @google/gemini-cli@${GEMINI_CLI_VERSION} --retry 3 # Install gh CLI RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ diff --git a/README.md b/README.md index 4cb03ab2..6ad1dbcd 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # OpenAB — Open Agent Broker -A lightweight, secure, cloud-native ACP harness that bridges Discord and any [Agent Client Protocol](https://github.com/anthropics/agent-protocol)-compatible coding CLI (Kiro CLI, Claude Code, Codex, Gemini, etc.) over stdio JSON-RPC — delivering the next-generation development experience. +A lightweight, secure, cloud-native ACP harness that bridges Discord and any [Agent Client Protocol](https://github.com/anthropics/agent-protocol)-compatible coding CLI (Kiro CLI, Claude Code, Codex, Gemini, Copilot CLI, etc.) over stdio JSON-RPC — delivering the next-generation development experience. 🪼 **Join our community!** Come say hi on Discord — we'd love to have you: **[🪼 OpenAB — Official](https://discord.gg/YNksK9M6)** 🎉 @@ -17,7 +17,7 @@ A lightweight, secure, cloud-native ACP harness that bridges Discord and any [Ag ## Features -- **Pluggable agent backend** — swap between Kiro CLI, Claude Code, Codex, Gemini via config +- **Pluggable agent backend** — swap between Kiro CLI, Claude Code, Codex, Gemini, Copilot CLI via config - **@mention trigger** — mention the bot in an allowed channel to start a conversation - **Thread-based multi-turn** — auto-creates threads; no @mention needed for follow-ups - **Edit-streaming** — live-updates the Discord message every 1.5s as tokens arrive @@ -25,6 +25,7 @@ A lightweight, secure, cloud-native ACP harness that bridges Discord and any [Ag - **Session pool** — one CLI process per thread, auto-managed lifecycle - **ACP protocol** — JSON-RPC over stdio with tool call, thinking, and permission auto-reply support - **Kubernetes-ready** — Dockerfile + k8s manifests with PVC for auth persistence +- **Voice message STT** — auto-transcribes Discord voice messages via Groq, OpenAI, or local Whisper server ([docs/stt.md](docs/stt.md)) ## Quick Start @@ -32,131 +33,53 @@ A lightweight, secure, cloud-native ACP harness that bridges Discord and any [Ag See [docs/discord-bot-howto.md](docs/discord-bot-howto.md) for a detailed step-by-step guide. -In short: - -1. Go to https://discord.com/developers/applications and create an application -2. Bot tab → enable **Message Content Intent** -3. OAuth2 → URL Generator → scope: `bot` → permissions: Send Messages, Send Messages in Threads, Create Public Threads, Read Message History, Add Reactions, Manage Messages -4. Invite the bot to your server using the generated URL - -### 2. Configure +### 2. Install with Helm (Kiro CLI — default) ```bash -cp config.toml.example config.toml -``` - -Edit `config.toml`: -```toml -[discord] -bot_token = "${DISCORD_BOT_TOKEN}" -allowed_channels = ["YOUR_CHANNEL_ID"] -# allowed_users = ["YOUR_USER_ID"] # optional: restrict who can use the bot +helm repo add openab https://openabdev.github.io/openab +helm repo update -[agent] -command = "kiro-cli" -args = ["acp", "--trust-all-tools"] -working_dir = "/tmp" +helm install openab openab/openab \ + --set agents.kiro.discord.botToken="$DISCORD_BOT_TOKEN" \ + --set-string 'agents.kiro.discord.allowedChannels[0]=YOUR_CHANNEL_ID' ``` -### 3. Build & Run +### 3. Authenticate (first time only) ```bash -export DISCORD_BOT_TOKEN="your-token" - -# Development -cargo run - -# Production -cargo build --release -./target/release/openab config.toml +kubectl exec -it deployment/openab-kiro -- kiro-cli login --use-device-flow +kubectl rollout restart deployment/openab-kiro ``` -If no config path is given, it defaults to `config.toml` in the current directory. - ### 4. Use In your Discord channel: ``` -@AgentBroker explain this code +@YourBot explain this code ``` The bot creates a thread. After that, just type in the thread — no @mention needed. -## Pluggable Agent Backends - -Supports Kiro CLI, Claude Code, Codex, Gemini, and any ACP-compatible CLI. +## Other Agents -| Agent key | CLI | ACP Adapter | Auth | -|-----------|-----|-------------|------| -| `kiro` (default) | Kiro CLI | Native `kiro-cli acp` | `kiro-cli login --use-device-flow` | -| `codex` | Codex | [@zed-industries/codex-acp](https://github.com/zed-industries/codex-acp) | `codex login --device-auth` | -| `claude` | Claude Code | [@agentclientprotocol/claude-agent-acp](https://github.com/agentclientprotocol/claude-agent-acp) | `claude setup-token` | -| `gemini` | Gemini CLI | Native `gemini --acp` | Google OAuth or `GEMINI_API_KEY` | +| Agent | CLI | ACP Adapter | Guide | +|-------|-----|-------------|-------| +| Kiro (default) | `kiro-cli acp` | Native | [docs/kiro.md](docs/kiro.md) | +| Claude Code | `claude-agent-acp` | [@agentclientprotocol/claude-agent-acp](https://github.com/agentclientprotocol/claude-agent-acp) | [docs/claude-code.md](docs/claude-code.md) | +| Codex | `codex-acp` | [@zed-industries/codex-acp](https://github.com/zed-industries/codex-acp) | [docs/codex.md](docs/codex.md) | +| Gemini | `gemini --acp` | Native | [docs/gemini.md](docs/gemini.md) | +| Copilot CLI ⚠️ | `copilot --acp --stdio` | Native | [docs/copilot.md](docs/copilot.md) | -### Helm Install (recommended) +> 🔧 Running multiple agents? See [docs/multi-agent.md](docs/multi-agent.md) -See the **[Helm chart docs](https://openabdev.github.io/openab)** for full installation instructions, values reference, and multi-agent examples. +## Local Development ```bash -helm repo add openab https://openabdev.github.io/openab -helm repo update - -# Kiro CLI only (default) -helm install openab openab/openab \ - --set agents.kiro.discord.botToken="$DISCORD_BOT_TOKEN" \ - --set-string 'agents.kiro.discord.allowedChannels[0]=YOUR_CHANNEL_ID' - -# Claude Code only (disable default kiro) -helm install openab openab/openab \ - --set agents.kiro.enabled=false \ - --set agents.claude.discord.botToken="$DISCORD_BOT_TOKEN" \ - --set-string 'agents.claude.discord.allowedChannels[0]=YOUR_CHANNEL_ID' \ - --set agents.claude.image=ghcr.io/openabdev/openab-claude:78f8d2c \ - --set agents.claude.command=claude-agent-acp \ - --set agents.claude.workingDir=/home/node - -# Multi-agent (kiro + claude in one release) -helm install openab openab/openab \ - --set agents.kiro.discord.botToken="$KIRO_BOT_TOKEN" \ - --set-string 'agents.kiro.discord.allowedChannels[0]=KIRO_CHANNEL_ID' \ - --set agents.claude.discord.botToken="$CLAUDE_BOT_TOKEN" \ - --set-string 'agents.claude.discord.allowedChannels[0]=CLAUDE_CHANNEL_ID' \ - --set agents.claude.image=ghcr.io/openabdev/openab-claude:78f8d2c \ - --set agents.claude.command=claude-agent-acp \ - --set agents.claude.workingDir=/home/node -``` - -Each agent key in `agents` map creates its own Deployment, ConfigMap, Secret, and PVC. Set `agents..enabled: false` to skip creating resources for an agent. - -### Manual config.toml - -For non-Helm deployments, configure the `[agent]` block per CLI: - -```toml -# Kiro CLI (default) -[agent] -command = "kiro-cli" -args = ["acp", "--trust-all-tools"] -working_dir = "/home/agent" - -# Codex (requires codex-acp in PATH) -[agent] -command = "codex-acp" -args = [] -working_dir = "/home/node" - -# Claude Code (requires claude-agent-acp in PATH) -[agent] -command = "claude-agent-acp" -args = [] -working_dir = "/home/node" +cp config.toml.example config.toml +# Edit config.toml with your bot token and channel ID -# Gemini -[agent] -command = "gemini" -args = ["--acp"] -working_dir = "/home/node" -env = { GEMINI_API_KEY = "${GEMINI_API_KEY}" } +export DISCORD_BOT_TOKEN="your-token" +cargo run ``` ## Configuration Reference @@ -180,7 +103,12 @@ session_ttl_hours = 24 # idle session TTL [reactions] enabled = true # enable emoji status reactions remove_after_reply = false # remove reactions after reply +``` +
+Full reactions config + +```toml [reactions.emojis] queued = "👀" thinking = "🤔" @@ -191,68 +119,32 @@ done = "🆗" error = "😱" [reactions.timing] -debounce_ms = 700 # intermediate state debounce -stall_soft_ms = 10000 # 10s idle → 🥱 -stall_hard_ms = 30000 # 30s idle → 😨 -done_hold_ms = 1500 # keep done emoji for 1.5s -error_hold_ms = 2500 # keep error emoji for 2.5s +debounce_ms = 700 +stall_soft_ms = 10000 +stall_hard_ms = 30000 +done_hold_ms = 1500 +error_hold_ms = 2500 ``` -## Kubernetes Deployment +
-The Docker image bundles both `openab` and `kiro-cli` in a single container (openab spawns kiro-cli as a child process). +## Kubernetes Deployment -### Pod Architecture +The Docker image bundles both `openab` and `kiro-cli` in a single container. ``` -┌─ Kubernetes Pod ─────────────────────────────────────────────────┐ -│ │ -│ ┌─────────────────────────────────────────────────────────┐ │ -│ │ openab (main process, PID 1) │ │ -│ │ │ │ -│ │ ┌──────────────┐ ┌──────────────┐ ┌───────────┐ │ │ -│ │ │ Discord │ │ Session Pool │ │ Reaction │ │ │ -│ │ │ Gateway WS │ │ (per thread) │ │ Controller│ │ │ -│ │ └──────┬───────┘ └──────┬───────┘ └───────────┘ │ │ -│ │ │ │ │ │ -│ └─────────┼──────────────────┼────────────────────────────┘ │ -│ │ │ │ -│ │ @mention / │ spawn + stdio │ -│ │ thread msg │ JSON-RPC (ACP) │ -│ │ │ │ -│ ▼ ▼ │ -│ ┌──────────────────────────────────────────────────────────┐ │ -│ │ kiro-cli acp --trust-all-tools (child process) │ │ -│ │ │ │ -│ │ stdin ◄── JSON-RPC requests (session/new, prompt) │ │ -│ │ stdout ──► JSON-RPC responses (text, tool_call, done) │ │ -│ │ stderr ──► (ignored) │ │ -│ └──────────────────────────────────────────────────────────┘ │ -│ │ -│ ┌─ PVC Mount (/data) ──────────────────────────────────────┐ │ -│ │ ~/.kiro/ ← settings, skills, sessions │ │ -│ │ ~/.local/share/kiro-cli/ ← OAuth tokens (data.sqlite3) │ │ -│ └──────────────────────────────────────────────────────────┘ │ -│ │ -└──────────────────────────────────────────────────────────────────┘ - │ - │ WebSocket (wss://gateway.discord.gg) - ▼ -┌──────────────────┐ ┌──────────────┐ -│ Discord API │ ◄─────► │ Discord │ -│ Gateway │ │ Users │ -└──────────────────┘ └──────────────┘ +┌─ Kubernetes Pod ──────────────────────────────────────┐ +│ openab (PID 1) │ +│ └─ kiro-cli acp --trust-all-tools (child process) │ +│ ├─ stdin ◄── JSON-RPC requests │ +│ └─ stdout ──► JSON-RPC responses │ +│ │ +│ PVC (/data) │ +│ ├─ ~/.kiro/ (settings, sessions) │ +│ └─ ~/.local/share/kiro-cli/ (OAuth tokens) │ +└───────────────────────────────────────────────────────┘ ``` -- **Single container** — openab is PID 1, spawns kiro-cli as a child process -- **stdio JSON-RPC** — ACP communication over stdin/stdout, no network ports needed -- **Session pool** — one kiro-cli process per Discord thread, up to `max_sessions` -- **PVC** — persists OAuth tokens and settings across pod restarts - -### Install with Your Coding CLI - -See the **[Helm chart docs](https://openabdev.github.io/openab)** for per-agent install commands (Kiro CLI, Claude Code, Codex, Gemini) and values reference. - ### Build & Push ```bash @@ -261,56 +153,30 @@ docker tag openab:latest /openab:latest docker push /openab:latest ``` -### Deploy +### Deploy without Helm ```bash -# Create the secret with your bot token kubectl create secret generic openab-secret \ --from-literal=discord-bot-token="your-token" -# Edit k8s/configmap.yaml with your channel IDs kubectl apply -f k8s/configmap.yaml kubectl apply -f k8s/pvc.yaml kubectl apply -f k8s/deployment.yaml ``` -### Authenticate kiro-cli (first time only) - -kiro-cli requires a one-time OAuth login. The PVC persists the tokens across pod restarts. - -```bash -kubectl exec -it deployment/openab-kiro -- kiro-cli login --use-device-flow -``` - -Follow the device code flow in your browser, then restart the pod: - -```bash -kubectl rollout restart deployment/openab-kiro -``` - -### Manifests - -| File | Purpose | -|------|---------| +| Manifest | Purpose | +|----------|---------| | `k8s/deployment.yaml` | Single-container pod with config + data volume mounts | | `k8s/configmap.yaml` | `config.toml` mounted at `/etc/openab/` | | `k8s/secret.yaml` | `DISCORD_BOT_TOKEN` injected as env var | | `k8s/pvc.yaml` | Persistent storage for auth + settings | -The PVC persists two paths via `subPath`: -- `~/.kiro` — settings, skills, sessions -- `~/.local/share/kiro-cli` — OAuth tokens (`data.sqlite3` → `auth_kv` table), conversation history - ## Project Structure ``` ├── Dockerfile # multi-stage: rust build + debian-slim runtime with kiro-cli ├── config.toml.example # example config with all agent backends ├── k8s/ # Kubernetes manifests -│ ├── deployment.yaml -│ ├── configmap.yaml -│ ├── secret.yaml -│ └── pvc.yaml └── src/ ├── main.rs # entrypoint: tokio + serenity + cleanup + shutdown ├── config.rs # TOML config + ${ENV_VAR} expansion diff --git a/charts/openab/Chart.yaml b/charts/openab/Chart.yaml index b8405f68..2afbd370 100644 --- a/charts/openab/Chart.yaml +++ b/charts/openab/Chart.yaml @@ -2,5 +2,5 @@ apiVersion: v2 name: openab description: Discord ↔ ACP coding CLI bridge (Kiro CLI, Claude Code, Codex, Gemini) type: application -version: 0.6.4 -appVersion: "0.6.4" +version: 0.7.3 +appVersion: "0.7.3" diff --git a/charts/openab/templates/configmap.yaml b/charts/openab/templates/configmap.yaml index 95bd68ef..194d8c25 100644 --- a/charts/openab/templates/configmap.yaml +++ b/charts/openab/templates/configmap.yaml @@ -40,6 +40,17 @@ data: [reactions] enabled = {{ ($cfg.reactions).enabled | default true }} remove_after_reply = {{ ($cfg.reactions).removeAfterReply | default false }} + {{- if ($cfg.stt).enabled }} + {{- if not ($cfg.stt).apiKey }} + {{ fail (printf "agents.%s.stt.apiKey is required when stt.enabled=true" $name) }} + {{- end }} + + [stt] + enabled = true + api_key = "${STT_API_KEY}" + model = "{{ ($cfg.stt).model | default "whisper-large-v3-turbo" }}" + base_url = "{{ ($cfg.stt).baseUrl | default "https://api.groq.com/openai/v1" }}" + {{- end }} {{- if $cfg.agentsMd }} AGENTS.md: | {{- $cfg.agentsMd | nindent 4 }} diff --git a/charts/openab/templates/deployment.yaml b/charts/openab/templates/deployment.yaml index f1ab9b0b..0d45041d 100644 --- a/charts/openab/templates/deployment.yaml +++ b/charts/openab/templates/deployment.yaml @@ -45,6 +45,13 @@ spec: name: {{ include "openab.agentFullname" $d }} key: discord-bot-token {{- end }} + {{- if and ($cfg.stt).enabled ($cfg.stt).apiKey }} + - name: STT_API_KEY + valueFrom: + secretKeyRef: + name: {{ include "openab.agentFullname" $d }} + key: stt-api-key + {{- end }} - name: HOME value: {{ $cfg.workingDir | default "/home/agent" }} {{- range $k, $v := $cfg.env }} diff --git a/charts/openab/templates/secret.yaml b/charts/openab/templates/secret.yaml index fd090208..2cdd27c8 100644 --- a/charts/openab/templates/secret.yaml +++ b/charts/openab/templates/secret.yaml @@ -14,6 +14,9 @@ metadata: type: Opaque data: discord-bot-token: {{ $cfg.discord.botToken | b64enc | quote }} + {{- if and ($cfg.stt).enabled ($cfg.stt).apiKey }} + stt-api-key: {{ $cfg.stt.apiKey | b64enc | quote }} + {{- end }} {{- end }} {{- end }} {{- end }} diff --git a/charts/openab/values.yaml b/charts/openab/values.yaml index 22b7a255..956374cb 100644 --- a/charts/openab/values.yaml +++ b/charts/openab/values.yaml @@ -1,7 +1,7 @@ image: repository: ghcr.io/openabdev/openab # tag defaults to .Chart.AppVersion - tag: "94253a5" + tag: "" pullPolicy: IfNotPresent podSecurityContext: @@ -69,6 +69,11 @@ agents: reactions: enabled: true removeAfterReply: false + stt: + enabled: false + apiKey: "" + model: "whisper-large-v3-turbo" + baseUrl: "https://api.groq.com/openai/v1" persistence: enabled: true storageClass: "" diff --git a/config.toml.example b/config.toml.example index 598c3017..60db88f8 100644 --- a/config.toml.example +++ b/config.toml.example @@ -2,6 +2,9 @@ bot_token = "${DISCORD_BOT_TOKEN}" allowed_channels = ["1234567890"] # allowed_users = [""] # empty or omitted = allow all users +# allow_bot_messages = "off" # "off" (default) | "mentions" | "all" + # "mentions" is recommended for multi-agent collaboration +# trusted_bot_ids = [] # empty = any bot (mode permitting); set to restrict [agent] command = "kiro-cli" @@ -26,6 +29,12 @@ working_dir = "/home/agent" # working_dir = "/home/agent" # env = { GEMINI_API_KEY = "${GEMINI_API_KEY}" } +# [agent] +# command = "copilot" +# args = ["--acp", "--stdio"] +# working_dir = "/home/agent" +# env = {} # Auth via: kubectl exec -it -- gh auth login -p https -w + [pool] max_sessions = 10 session_ttl_hours = 24 diff --git a/docs/claude-code.md b/docs/claude-code.md new file mode 100644 index 00000000..a8f4479f --- /dev/null +++ b/docs/claude-code.md @@ -0,0 +1,40 @@ +# Claude Code + +Claude Code uses the [@agentclientprotocol/claude-agent-acp](https://github.com/agentclientprotocol/claude-agent-acp) adapter for ACP support. + +## Docker Image + +```bash +docker build -f Dockerfile.claude -t openab-claude:latest . +``` + +The image installs `@agentclientprotocol/claude-agent-acp` and `@anthropic-ai/claude-code` globally via npm. + +## Helm Install + +```bash +helm install openab openab/openab \ + --set agents.kiro.enabled=false \ + --set agents.claude.discord.botToken="$DISCORD_BOT_TOKEN" \ + --set-string 'agents.claude.discord.allowedChannels[0]=YOUR_CHANNEL_ID' \ + --set agents.claude.image=ghcr.io/openabdev/openab-claude:latest \ + --set agents.claude.command=claude-agent-acp \ + --set agents.claude.workingDir=/home/node +``` + +> Set `agents.kiro.enabled=false` to disable the default Kiro agent. + +## Manual config.toml + +```toml +[agent] +command = "claude-agent-acp" +args = [] +working_dir = "/home/node" +``` + +## Authentication + +```bash +kubectl exec -it deployment/openab-claude -- claude setup-token +``` diff --git a/docs/codex.md b/docs/codex.md new file mode 100644 index 00000000..5fc72119 --- /dev/null +++ b/docs/codex.md @@ -0,0 +1,40 @@ +# Codex + +Codex uses the [@zed-industries/codex-acp](https://github.com/zed-industries/codex-acp) adapter for ACP support. + +## Docker Image + +```bash +docker build -f Dockerfile.codex -t openab-codex:latest . +``` + +The image installs `@zed-industries/codex-acp` and `@openai/codex` globally via npm. + +## Helm Install + +```bash +helm install openab openab/openab \ + --set agents.kiro.enabled=false \ + --set agents.codex.discord.botToken="$DISCORD_BOT_TOKEN" \ + --set-string 'agents.codex.discord.allowedChannels[0]=YOUR_CHANNEL_ID' \ + --set agents.codex.image=ghcr.io/openabdev/openab-codex:latest \ + --set agents.codex.command=codex-acp \ + --set agents.codex.workingDir=/home/node +``` + +> Set `agents.kiro.enabled=false` to disable the default Kiro agent. + +## Manual config.toml + +```toml +[agent] +command = "codex-acp" +args = [] +working_dir = "/home/node" +``` + +## Authentication + +```bash +kubectl exec -it deployment/openab-codex -- codex login --device-auth +``` diff --git a/docs/copilot.md b/docs/copilot.md new file mode 100644 index 00000000..9f4132d9 --- /dev/null +++ b/docs/copilot.md @@ -0,0 +1,94 @@ +# GitHub Copilot CLI — Agent Backend Guide + +How to run OpenAB with [GitHub Copilot CLI](https://github.com/github/copilot-cli) as the agent backend. + +## Prerequisites + +- A paid [GitHub Copilot](https://github.com/features/copilot/plans) subscription (**Pro, Pro+, Business, or Enterprise** — Free tier does not include CLI/ACP access) +- Copilot CLI ACP support is in [public preview](https://github.blog/changelog/2026-01-28-acp-support-in-copilot-cli-is-now-in-public-preview/) since Jan 28, 2026 + +## Architecture + +``` +┌──────────────┐ Gateway WS ┌──────────────┐ ACP stdio ┌──────────────────────┐ +│ Discord │◄─────────────►│ openab │──────────────►│ copilot --acp --stdio │ +│ User │ │ (Rust) │◄── JSON-RPC ──│ (Copilot CLI) │ +└──────────────┘ └──────────────┘ └──────────────────────┘ +``` + +OpenAB spawns `copilot --acp --stdio` as a child process and communicates via stdio JSON-RPC. No intermediate layers. + +## Configuration + +```toml +[agent] +command = "copilot" +args = ["--acp", "--stdio"] +working_dir = "/home/agent" +# Auth via: kubectl exec -it -- gh auth login -p https -w +``` + +## Docker + +Build with the Copilot-specific Dockerfile: + +```bash +docker build -f Dockerfile.copilot -t openab-copilot . +``` + +## Authentication + +Copilot CLI uses GitHub OAuth (same as `gh` CLI). In a headless container, use device flow: + +```bash +# 1. Exec into the running pod/container +kubectl exec -it deployment/openab-copilot -- bash + +# 2. Authenticate via device flow +gh auth login --hostname github.com --git-protocol https -p https -w + +# 3. Follow the device code flow in your browser + +# 4. Verify +gh auth status + +# 5. Restart the pod (token is persisted via PVC) +kubectl rollout restart deployment/openab-copilot +``` + +The OAuth token is stored under `~/.config/gh/` and persisted across pod restarts via PVC. + +> **Note**: See [docs/gh-auth-device-flow.md](gh-auth-device-flow.md) for details on device flow in headless environments. + +## Helm Install + +> **Note**: The `ghcr.io/openabdev/openab-copilot` image is not published yet. You must build it locally first with `docker build -f Dockerfile.copilot -t openab-copilot .` and push to your own registry, or use a local image. + +```bash +helm install openab openab/openab \ + --set agents.kiro.enabled=false \ + --set agents.copilot.discord.botToken="$DISCORD_BOT_TOKEN" \ + --set-string 'agents.copilot.discord.allowedChannels[0]=YOUR_CHANNEL_ID' \ + --set agents.copilot.image=ghcr.io/openabdev/openab-copilot:latest \ + --set agents.copilot.command=copilot \ + --set 'agents.copilot.args={--acp,--stdio}' \ + --set agents.copilot.persistence.enabled=true \ + --set agents.copilot.workingDir=/home/node +``` + +## Model Selection + +Copilot CLI defaults to Claude Sonnet 4.6. Other available models include: + +- Claude Opus 4.6, Claude Haiku 4.5 (Anthropic) +- GPT-5.3-Codex (OpenAI) +- Gemini 3 Pro (Google) + +Model selection is controlled by Copilot CLI itself (via `/model` in interactive mode). In ACP mode, the default model is used. + +## Known Limitations + +- ⚠️ ACP support is in **public preview** — behavior may change +- ⚠️ Headless auth with `GITHUB_TOKEN` env var has not been fully validated; device flow via `gh auth login` is the recommended path +- Copilot CLI requires an active Copilot subscription per user/org +- For Copilot Business/Enterprise, an admin must enable Copilot CLI from the Policies page diff --git a/docs/gemini.md b/docs/gemini.md new file mode 100644 index 00000000..797cba99 --- /dev/null +++ b/docs/gemini.md @@ -0,0 +1,43 @@ +# Gemini CLI + +Gemini CLI supports ACP natively via the `--acp` flag — no adapter needed. + +## Docker Image + +```bash +docker build -f Dockerfile.gemini -t openab-gemini:latest . +``` + +The image installs `@google/gemini-cli` globally via npm. + +## Helm Install + +```bash +helm install openab openab/openab \ + --set agents.kiro.enabled=false \ + --set agents.gemini.discord.botToken="$DISCORD_BOT_TOKEN" \ + --set-string 'agents.gemini.discord.allowedChannels[0]=YOUR_CHANNEL_ID' \ + --set agents.gemini.image=ghcr.io/openabdev/openab-gemini:latest \ + --set agents.gemini.command=gemini \ + --set agents.gemini.args='{--acp}' \ + --set agents.gemini.workingDir=/home/node +``` + +> Set `agents.kiro.enabled=false` to disable the default Kiro agent. + +## Manual config.toml + +```toml +[agent] +command = "gemini" +args = ["--acp"] +working_dir = "/home/node" +env = { GEMINI_API_KEY = "${GEMINI_API_KEY}" } +``` + +## Authentication + +Gemini supports Google OAuth or an API key: + +- **API key**: Set `GEMINI_API_KEY` environment variable +- **OAuth**: Run Google OAuth flow inside the pod diff --git a/docs/kiro.md b/docs/kiro.md new file mode 100644 index 00000000..dbf56970 --- /dev/null +++ b/docs/kiro.md @@ -0,0 +1,52 @@ +# Kiro CLI (Default Agent) + +Kiro CLI is the default agent backend for OpenAB. It supports ACP natively — no adapter needed. + +## Docker Image + +The default `Dockerfile` bundles both `openab` and `kiro-cli`: + +```bash +docker build -t openab:latest . +``` + +## Helm Install + +```bash +helm repo add openab https://openabdev.github.io/openab +helm repo update + +helm install openab openab/openab \ + --set agents.kiro.discord.botToken="$DISCORD_BOT_TOKEN" \ + --set-string 'agents.kiro.discord.allowedChannels[0]=YOUR_CHANNEL_ID' +``` + +## Manual config.toml + +```toml +[agent] +command = "kiro-cli" +args = ["acp", "--trust-all-tools"] +working_dir = "/home/agent" +``` + +## Authentication + +Kiro CLI requires a one-time OAuth login. The PVC persists tokens across pod restarts. + +```bash +kubectl exec -it deployment/openab-kiro -- kiro-cli login --use-device-flow +``` + +Follow the device code flow in your browser, then restart the pod: + +```bash +kubectl rollout restart deployment/openab-kiro +``` + +### Persisted Paths (PVC) + +| Path | Contents | +|------|----------| +| `~/.kiro/` | Settings, skills, sessions | +| `~/.local/share/kiro-cli/` | OAuth tokens (`data.sqlite3` → `auth_kv` table), conversation history | diff --git a/docs/multi-agent.md b/docs/multi-agent.md new file mode 100644 index 00000000..f228c346 --- /dev/null +++ b/docs/multi-agent.md @@ -0,0 +1,127 @@ +# Multi-Agent Setup + +You can run multiple agents in a single Helm release. Each agent key in the `agents` map creates its own Deployment, ConfigMap, Secret, and PVC. + +## Example: Kiro + Claude Code + +```bash +helm install openab openab/openab \ + --set agents.kiro.discord.botToken="$KIRO_BOT_TOKEN" \ + --set-string 'agents.kiro.discord.allowedChannels[0]=KIRO_CHANNEL_ID' \ + --set agents.claude.discord.botToken="$CLAUDE_BOT_TOKEN" \ + --set-string 'agents.claude.discord.allowedChannels[0]=CLAUDE_CHANNEL_ID' \ + --set agents.claude.image=ghcr.io/openabdev/openab-claude:latest \ + --set agents.claude.command=claude-agent-acp \ + --set agents.claude.workingDir=/home/node +``` + +## How It Works + +- Each `agents.` entry creates an independent set of Kubernetes resources (Deployment, ConfigMap, Secret, PVC) +- Each agent gets its own Discord bot token and allowed channels +- Agents run in separate pods and don't share state +- Set `agents..enabled: false` to skip creating resources for an agent + +## Example: All Four Agents + +```bash +helm install openab openab/openab \ + --set agents.kiro.discord.botToken="$KIRO_BOT_TOKEN" \ + --set-string 'agents.kiro.discord.allowedChannels[0]=KIRO_CHANNEL_ID' \ + --set agents.claude.discord.botToken="$CLAUDE_BOT_TOKEN" \ + --set-string 'agents.claude.discord.allowedChannels[0]=CLAUDE_CHANNEL_ID' \ + --set agents.claude.image=ghcr.io/openabdev/openab-claude:latest \ + --set agents.claude.command=claude-agent-acp \ + --set agents.claude.workingDir=/home/node \ + --set agents.codex.discord.botToken="$CODEX_BOT_TOKEN" \ + --set-string 'agents.codex.discord.allowedChannels[0]=CODEX_CHANNEL_ID' \ + --set agents.codex.image=ghcr.io/openabdev/openab-codex:latest \ + --set agents.codex.command=codex-acp \ + --set agents.codex.workingDir=/home/node \ + --set agents.gemini.discord.botToken="$GEMINI_BOT_TOKEN" \ + --set-string 'agents.gemini.discord.allowedChannels[0]=GEMINI_CHANNEL_ID' \ + --set agents.gemini.image=ghcr.io/openabdev/openab-gemini:latest \ + --set agents.gemini.command=gemini \ + --set agents.gemini.args='{--acp}' \ + --set agents.gemini.workingDir=/home/node +``` + +See individual agent docs for authentication steps: +- [Kiro CLI](kiro.md) +- [Claude Code](claude-code.md) +- [Codex](codex.md) +- [Gemini](gemini.md) + +## Bot-to-Bot Communication + +By default, each agent ignores messages from other bots. To enable multi-agent collaboration in the same channel (e.g. a code review bot handing off to a deploy bot), configure `allow_bot_messages` in each agent's `config.toml`: + +```toml +[discord] +allow_bot_messages = "mentions" # recommended +``` + +### Modes + +| Value | Behavior | Loop risk | +|---|---|---| +| `"off"` (default) | Ignore all bot messages | None | +| `"mentions"` | Only respond to bot messages that @mention this bot | Very low — bots must explicitly @mention each other | +| `"all"` | Respond to all bot messages | Mitigated by turn cap (10 consecutive bot messages) | + +### Which mode should I use? + +**`"mentions"` is recommended for most setups.** It enables collaboration while acting as a natural loop breaker — Bot A only processes Bot B's message if Bot B explicitly @mentions Bot A. Two bots won't accidentally ping-pong. + +Use `"all"` only when bots need to react to each other's messages without explicit mentions (e.g. monitoring bots). A hard cap of 10 consecutive bot-to-bot turns prevents infinite loops. + +### Example: Code Review → Deploy handoff + +``` +┌──────────────────────────────────────────────────────────┐ +│ Discord Channel #dev │ +│ │ +│ 👤 User: "Review this PR and deploy if it looks good" │ +│ │ │ +│ ▼ │ +│ 🤖 Kiro (allow_bot_messages = "off"): │ +│ "LGTM — tests pass, no security issues. │ +│ @DeployBot please deploy to staging." │ +│ │ │ +│ ▼ │ +│ 🤖 Deploy Bot (allow_bot_messages = "mentions"): │ +│ "Deploying to staging... ✅ Done." │ +└──────────────────────────────────────────────────────────┘ +``` + +Note: the review bot doesn't need `allow_bot_messages` enabled — only the bot that needs to *receive* bot messages does. + +### Helm values + +```bash +helm install openab openab/openab \ + --set agents.kiro.discord.botToken="$KIRO_BOT_TOKEN" \ + --set agents.kiro.discord.allowBotMessages="off" \ + --set agents.deploy.discord.botToken="$DEPLOY_BOT_TOKEN" \ + --set agents.deploy.discord.allowBotMessages="mentions" +``` + +### Safety + +- The bot's own messages are **always** ignored, regardless of setting +- `"mentions"` mode is a natural loop breaker — no rate limiter needed +- `"all"` mode has a hard cap of 10 consecutive bot-to-bot turns per channel +- Channel and user allowlists still apply to bot messages +- `trusted_bot_ids` further restricts which bots are allowed through + +### Restricting to specific bots + +If you only want to accept messages from specific bots (e.g. your own deploy bot), add their Discord user IDs: + +```toml +[discord] +allow_bot_messages = "mentions" +trusted_bot_ids = ["123456789012345678"] # only this bot's messages pass through +``` + +When `trusted_bot_ids` is empty (default), any bot can pass through (subject to the mode check). When set, only listed bots are accepted — all others are silently ignored. diff --git a/docs/stt.md b/docs/stt.md new file mode 100644 index 00000000..157b6f66 --- /dev/null +++ b/docs/stt.md @@ -0,0 +1,164 @@ +# Speech-to-Text (STT) for Voice Messages + +openab can automatically transcribe Discord voice message attachments and forward the transcript to your ACP agent as text. + +## Quick Start + +Add an `[stt]` section to your `config.toml`: + +```toml +[stt] +enabled = true +``` + +If `GROQ_API_KEY` is set in your environment, that's all you need — openab will auto-detect it and use Groq's free tier. You can also set the key explicitly: + +```toml +[stt] +enabled = true +api_key = "${GROQ_API_KEY}" +``` + +## How It Works + +``` +Discord voice message (.ogg) + │ + ▼ + openab downloads the audio file + │ + ▼ + POST /audio/transcriptions → STT provider + │ + ▼ + transcript injected as: + "[Voice message transcript]: " + │ + ▼ + ACP agent receives plain text +``` + +The transcript is prepended to the prompt as a `ContentBlock::Text`, so the downstream agent (Kiro CLI, Claude Code, etc.) sees it as regular text input. + +## Configuration Reference + +```toml +[stt] +enabled = true # default: false +api_key = "${GROQ_API_KEY}" # required for cloud providers +model = "whisper-large-v3-turbo" # default +base_url = "https://api.groq.com/openai/v1" # default +``` + +| Field | Required | Default | Description | +|---|---|---|---| +| `enabled` | no | `false` | Enable/disable STT. When disabled, audio attachments are silently skipped. | +| `api_key` | no* | — | API key for the STT provider. *Auto-detected from `GROQ_API_KEY` env var if not set. For local servers, use any non-empty string (e.g. `"not-needed"`). | +| `model` | no | `whisper-large-v3-turbo` | Whisper model name. Varies by provider. | +| `base_url` | no | `https://api.groq.com/openai/v1` | OpenAI-compatible API base URL. | + +## Deployment Options + +openab uses the standard OpenAI-compatible `/audio/transcriptions` endpoint. Any provider that implements this API works — just change `base_url`. + +### Option 1: Groq Cloud (recommended, free tier) + +```toml +[stt] +enabled = true +api_key = "${GROQ_API_KEY}" +``` + +- Free tier with rate limits +- Model: `whisper-large-v3-turbo` (default) +- Sign up at https://console.groq.com + +### Option 2: OpenAI + +```toml +[stt] +enabled = true +api_key = "${OPENAI_API_KEY}" +model = "whisper-1" +base_url = "https://api.openai.com/v1" +``` + +- ~$0.006 per minute of audio +- Model: `whisper-1` + +### Option 3: Local Whisper Server + +For users running openab on a Mac Mini, home lab, or any machine with a local whisper server: + +```toml +[stt] +enabled = true +api_key = "not-needed" +model = "large-v3-turbo" +base_url = "http://localhost:8080/v1" +``` + +- Audio stays local — never leaves your machine +- No API key or cloud account needed +- Apple Silicon users get hardware acceleration + +Compatible local whisper servers: + +| Server | Install | Apple Silicon | +|---|---|---| +| [faster-whisper-server](https://github.com/fedirz/faster-whisper-server) | `pip install faster-whisper-server` | ✅ CoreML | +| [whisper.cpp server](https://github.com/ggerganov/whisper.cpp) | `brew install whisper-cpp` | ✅ Metal | +| [LocalAI](https://github.com/mudler/LocalAI) | Docker or binary | ✅ | + +### Option 4: LAN / Sidecar Server + +Point to a whisper server running on another machine in your network: + +```toml +[stt] +enabled = true +api_key = "not-needed" +base_url = "http://192.168.1.100:8080/v1" +``` + +### Not Supported + +- **Ollama** — does not expose an `/audio/transcriptions` endpoint. + +## Helm Chart (Kubernetes) + +When deploying via the openab Helm chart, STT is a first-class config block — no manual configmap patching needed: + +```bash +helm upgrade openab openab/openab \ + --set agents.kiro.stt.enabled=true \ + --set agents.kiro.stt.apiKey=gsk_xxx +``` + +The API key is stored in a K8s Secret and injected as an env var (never in plaintext in the configmap). You can also customize model and endpoint: + +```bash +helm upgrade openab openab/openab \ + --set agents.kiro.stt.enabled=true \ + --set agents.kiro.stt.apiKey=gsk_xxx \ + --set agents.kiro.stt.model=whisper-large-v3-turbo \ + --set agents.kiro.stt.baseUrl=https://api.groq.com/openai/v1 +``` + +## Disabling STT + +Omit the `[stt]` section entirely, or set: + +```toml +[stt] +enabled = false +``` + +When disabled, audio attachments are silently skipped with no impact on existing functionality. + +## Technical Notes + +- openab sends `response_format=json` in the transcription request to ensure the response is always parseable JSON. Some local whisper servers default to plain text output without this parameter. +- The actual MIME type from the Discord attachment is passed through to the STT API (e.g. `audio/ogg`, `audio/mp4`, `audio/wav`). +- Environment variables in config values are expanded via `${VAR}` syntax (e.g. `api_key = "${GROQ_API_KEY}"`). +- The `api_key` field is auto-detected from the `GROQ_API_KEY` environment variable when using the default Groq endpoint. If you set a custom `base_url` (e.g. local server), auto-detect is disabled to avoid leaking the Groq key to unrelated endpoints — you must set `api_key` explicitly. diff --git a/src/acp/connection.rs b/src/acp/connection.rs index 53770509..03f55712 100644 --- a/src/acp/connection.rs +++ b/src/acp/connection.rs @@ -10,6 +10,68 @@ use tokio::sync::{mpsc, oneshot, Mutex}; use tokio::task::JoinHandle; use tracing::{debug, error, info}; +/// Pick the most permissive selectable permission option from ACP options. +fn pick_best_option(options: &[Value]) -> Option { + let mut fallback: Option<&Value> = None; + + for kind in ["allow_always", "allow_once"] { + if let Some(option) = options + .iter() + .find(|option| option.get("kind").and_then(|k| k.as_str()) == Some(kind)) + { + return option + .get("optionId") + .and_then(|id| id.as_str()) + .map(str::to_owned); + } + } + + for option in options { + let kind = option.get("kind").and_then(|k| k.as_str()); + if kind == Some("reject_once") || kind == Some("reject_always") { + continue; + } + fallback = Some(option); + break; + } + + fallback + .and_then(|option| option.get("optionId")) + .and_then(|id| id.as_str()) + .map(str::to_owned) +} + +/// Build a spec-compliant permission response with backward-compatible fallback. +fn build_permission_response(params: Option<&Value>) -> Value { + match params + .and_then(|p| p.get("options")) + .and_then(|options| options.as_array()) + { + None => json!({ + "outcome": { + "outcome": "selected", + "optionId": "allow_always" + } + }), + Some(options) => { + if let Some(option_id) = pick_best_option(options) { + json!({ + "outcome": { + "outcome": "selected", + "optionId": option_id + } + }) + } else { + json!({ + "outcome": { + "outcome": "cancelled" + } + }) + } + } + } +} + fn expand_env(val: &str) -> String { if val.starts_with("${") && val.ends_with('}') { let key = &val[2..val.len() - 1]; @@ -45,11 +107,14 @@ impl ContentBlock { pub struct AcpConnection { _proc: Child, + /// PID of the direct child, used as the process group ID for cleanup. + child_pgid: Option, stdin: Arc>, next_id: AtomicU64, pending: Arc>>>, notify_tx: Arc>>>, pub acp_session_id: Option, + pub supports_load_session: bool, pub last_active: Instant, pub session_reset: bool, _reader_handle: JoinHandle<()>, @@ -69,14 +134,27 @@ impl AcpConnection { .stdin(std::process::Stdio::piped()) .stdout(std::process::Stdio::piped()) .stderr(std::process::Stdio::null()) - .current_dir(working_dir) - .kill_on_drop(true); + .current_dir(working_dir); + // Create a new process group so we can kill the entire tree. + // SAFETY: setpgid is async-signal-safe (POSIX.1-2008) and called + // before exec. Return value checked — failure means the child won't + // have its own process group, so kill(-pgid) would be unsafe. + unsafe { + cmd.pre_exec(|| { + if libc::setpgid(0, 0) != 0 { + return Err(std::io::Error::last_os_error()); + } + Ok(()) + }); + } for (k, v) in env { cmd.env(k, expand_env(v)); } let mut proc = cmd .spawn() .map_err(|e| anyhow!("failed to spawn {command}: {e}"))?; + let child_pgid = proc.id() + .and_then(|pid| i32::try_from(pid).ok()); let stdout = proc.stdout.take().ok_or_else(|| anyhow!("no stdout"))?; let stdin = proc.stdin.take().ok_or_else(|| anyhow!("no stdin"))?; @@ -113,13 +191,17 @@ impl AcpConnection { // Auto-reply session/request_permission if msg.method.as_deref() == Some("session/request_permission") { if let Some(id) = msg.id { - let title = msg.params.as_ref() + let title = msg + .params + .as_ref() .and_then(|p| p.get("toolCall")) .and_then(|t| t.get("title")) .and_then(|t| t.as_str()) .unwrap_or("?"); - info!(title, "auto-allow permission"); - let reply = JsonRpcResponse::new(id, json!({"optionId": "allow_always"})); + + let outcome = build_permission_response(msg.params.as_ref()); + info!(title, %outcome, "auto-respond permission"); + let reply = JsonRpcResponse::new(id, outcome); if let Ok(data) = serde_json::to_string(&reply) { let mut w = stdin_clone.lock().await; let _ = w.write_all(format!("{data}\n").as_bytes()).await; @@ -179,11 +261,13 @@ impl AcpConnection { Ok(Self { _proc: proc, + child_pgid, stdin, next_id: AtomicU64::new(1), pending, notify_tx, acp_session_id: None, + supports_load_session: false, last_active: Instant::now(), session_reset: false, _reader_handle: reader_handle, @@ -237,12 +321,18 @@ impl AcpConnection { ) .await?; - let agent_name = resp.result.as_ref() + let result = resp.result.as_ref(); + let agent_name = result .and_then(|r| r.get("agentInfo")) .and_then(|a| a.get("name")) .and_then(|n| n.as_str()) .unwrap_or("unknown"); - info!(agent = agent_name, "initialized"); + self.supports_load_session = result + .and_then(|r| r.get("agentCapabilities")) + .and_then(|c| c.get("loadSession")) + .and_then(|v| v.as_bool()) + .unwrap_or(false); + info!(agent = agent_name, load_session = self.supports_load_session, "initialized"); Ok(()) } @@ -316,4 +406,142 @@ impl AcpConnection { pub fn alive(&self) -> bool { !self._reader_handle.is_finished() } + + /// Resume a previous session by ID. Returns Ok(()) if the agent accepted + /// the load, or an error if it failed (caller should fall back to session/new). + pub async fn session_load(&mut self, session_id: &str, cwd: &str) -> Result<()> { + let resp = self + .send_request( + "session/load", + Some(json!({"sessionId": session_id, "cwd": cwd, "mcpServers": []})), + ) + .await?; + // Accept any non-error response as success + if resp.error.is_some() { + return Err(anyhow!("session/load rejected")); + } + info!(session_id, "session loaded"); + self.acp_session_id = Some(session_id.to_string()); + Ok(()) + } + + /// Kill the entire process group: SIGTERM → SIGKILL. + /// Uses std::thread (not tokio::spawn) so SIGKILL fires even during + /// runtime shutdown or panic unwinding. + fn kill_process_group(&mut self) { + let pgid = match self.child_pgid { + Some(pid) if pid > 0 => pid, + _ => return, + }; + // Stage 1: SIGTERM the process group + unsafe { libc::kill(-pgid, libc::SIGTERM); } + // Stage 2: SIGKILL after brief grace (std::thread survives runtime shutdown) + std::thread::spawn(move || { + std::thread::sleep(std::time::Duration::from_millis(1500)); + unsafe { libc::kill(-pgid, libc::SIGKILL); } + }); + } +} + +impl Drop for AcpConnection { + fn drop(&mut self) { + self.kill_process_group(); + } +} + +#[cfg(test)] +mod tests { + use super::{build_permission_response, pick_best_option}; + use serde_json::json; + + #[test] + fn picks_allow_always_over_other_options() { + let options = vec![ + json!({"kind": "allow_once", "optionId": "once"}), + json!({"kind": "allow_always", "optionId": "always"}), + json!({"kind": "reject_once", "optionId": "reject"}), + ]; + + assert_eq!(pick_best_option(&options), Some("always".to_string())); + } + + #[test] + fn falls_back_to_first_unknown_non_reject_kind() { + let options = vec![ + json!({"kind": "reject_once", "optionId": "reject"}), + json!({"kind": "workspace_write", "optionId": "workspace-write"}), + ]; + + assert_eq!( + pick_best_option(&options), + Some("workspace-write".to_string()) + ); + } + + #[test] + fn selects_bypass_permissions_for_exit_plan_mode() { + let options = vec![ + json!({"optionId": "bypassPermissions", "kind": "allow_always"}), + json!({"optionId": "acceptEdits", "kind": "allow_always"}), + json!({"optionId": "default", "kind": "allow_once"}), + json!({"optionId": "plan", "kind": "reject_once"}), + ]; + + assert_eq!( + pick_best_option(&options), + Some("bypassPermissions".to_string()) + ); + } + + #[test] + fn returns_none_when_only_reject_options_exist() { + let options = vec![ + json!({"kind": "reject_once", "optionId": "reject-once"}), + json!({"kind": "reject_always", "optionId": "reject-always"}), + ]; + + assert_eq!(pick_best_option(&options), None); + } + + #[test] + fn builds_cancelled_outcome_when_no_selectable_option_exists() { + let response = build_permission_response(Some(&json!({ + "options": [ + {"kind": "reject_once", "optionId": "reject-once"} + ] + }))); + + assert_eq!(response, json!({"outcome": {"outcome": "cancelled"}})); + } + + #[test] + fn builds_cancelled_when_options_array_is_empty() { + let response = build_permission_response(Some(&json!({ + "options": [] + }))); + + assert_eq!(response, json!({"outcome": {"outcome": "cancelled"}})); + } + + #[test] + fn falls_back_to_allow_always_when_options_are_missing() { + let response = build_permission_response(Some(&json!({ + "toolCall": {"title": "legacy"} + }))); + + assert_eq!( + response, + json!({"outcome": {"outcome": "selected", "optionId": "allow_always"}}) + ); + } + + #[test] + fn falls_back_to_allow_always_when_params_is_none() { + let response = build_permission_response(None); + + assert_eq!( + response, + json!({"outcome": {"outcome": "selected", "optionId": "allow_always"}}) + ); + } } diff --git a/src/acp/pool.rs b/src/acp/pool.rs index a2c8a06c..cff159b1 100644 --- a/src/acp/pool.rs +++ b/src/acp/pool.rs @@ -6,8 +6,18 @@ use tokio::sync::RwLock; use tokio::time::Instant; use tracing::{info, warn}; +/// Combined state protected by a single lock to prevent deadlocks. +/// Lock ordering: always acquire `state` before any operation on either map. +struct PoolState { + /// Active connections: thread_key → AcpConnection. + active: HashMap, + /// Suspended sessions: thread_key → ACP sessionId. + /// Saved on eviction so sessions can be resumed via `session/load`. + suspended: HashMap, +} + pub struct SessionPool { - connections: RwLock>, + state: RwLock, config: AgentConfig, max_sessions: usize, } @@ -15,7 +25,10 @@ pub struct SessionPool { impl SessionPool { pub fn new(config: AgentConfig, max_sessions: usize) -> Self { Self { - connections: RwLock::new(HashMap::new()), + state: RwLock::new(PoolState { + active: HashMap::new(), + suspended: HashMap::new(), + }), config, max_sessions, } @@ -24,8 +37,8 @@ impl SessionPool { pub async fn get_or_create(&self, thread_id: &str) -> Result<()> { // Check if alive connection exists { - let conns = self.connections.read().await; - if let Some(conn) = conns.get(thread_id) { + let state = self.state.read().await; + if let Some(conn) = state.active.get(thread_id) { if conn.alive() { return Ok(()); } @@ -33,19 +46,29 @@ impl SessionPool { } // Need to create or rebuild - let mut conns = self.connections.write().await; + let mut state = self.state.write().await; // Double-check after acquiring write lock - if let Some(conn) = conns.get(thread_id) { + if let Some(conn) = state.active.get(thread_id) { if conn.alive() { return Ok(()); } warn!(thread_id, "stale connection, rebuilding"); - conns.remove(thread_id); + suspend_entry(&mut state, thread_id); } - if conns.len() >= self.max_sessions { - return Err(anyhow!("pool exhausted ({} sessions)", self.max_sessions)); + if state.active.len() >= self.max_sessions { + // LRU evict: suspend the oldest idle session to make room + let oldest = state.active + .iter() + .min_by_key(|(_, c)| c.last_active) + .map(|(k, _)| k.clone()); + if let Some(key) = oldest { + info!(evicted = %key, "pool full, suspending oldest idle session"); + suspend_entry(&mut state, &key); + } else { + return Err(anyhow!("pool exhausted ({} sessions)", self.max_sessions)); + } } let mut conn = AcpConnection::spawn( @@ -57,14 +80,32 @@ impl SessionPool { .await?; conn.initialize().await?; - conn.session_new(&self.config.working_dir).await?; - let is_rebuild = conns.contains_key(thread_id); - if is_rebuild { - conn.session_reset = true; + // Try to resume a suspended session via session/load + let saved_session_id = state.suspended.remove(thread_id); + let mut resumed = false; + if let Some(ref sid) = saved_session_id { + if conn.supports_load_session { + match conn.session_load(sid, &self.config.working_dir).await { + Ok(()) => { + info!(thread_id, session_id = %sid, "session resumed via session/load"); + resumed = true; + } + Err(e) => { + warn!(thread_id, session_id = %sid, error = %e, "session/load failed, creating new session"); + } + } + } + } + + if !resumed { + conn.session_new(&self.config.working_dir).await?; + if saved_session_id.is_some() { + conn.session_reset = true; + } } - conns.insert(thread_id.to_string(), conn); + state.active.insert(thread_id.to_string(), conn); Ok(()) } @@ -73,8 +114,8 @@ impl SessionPool { where F: FnOnce(&mut AcpConnection) -> std::pin::Pin> + Send + '_>>, { - let mut conns = self.connections.write().await; - let conn = conns + let mut state = self.state.write().await; + let conn = state.active .get_mut(thread_id) .ok_or_else(|| anyhow!("no connection for thread {thread_id}"))?; f(conn).await @@ -82,23 +123,34 @@ impl SessionPool { pub async fn cleanup_idle(&self, ttl_secs: u64) { let cutoff = Instant::now() - std::time::Duration::from_secs(ttl_secs); - let mut conns = self.connections.write().await; - let stale: Vec = conns + let mut state = self.state.write().await; + let stale: Vec = state.active .iter() .filter(|(_, c)| c.last_active < cutoff || !c.alive()) .map(|(k, _)| k.clone()) .collect(); for key in stale { info!(thread_id = %key, "cleaning up idle session"); - conns.remove(&key); - // Child process killed via kill_on_drop when AcpConnection drops + suspend_entry(&mut state, &key); } } pub async fn shutdown(&self) { - let mut conns = self.connections.write().await; - let count = conns.len(); - conns.clear(); // kill_on_drop handles process cleanup + let mut state = self.state.write().await; + let count = state.active.len(); + state.active.clear(); // Drop impl kills process groups info!(count, "pool shutdown complete"); } } + +/// Suspend a connection: save its sessionId to the suspended map and remove +/// from active. The connection is dropped, triggering process group kill. +fn suspend_entry(state: &mut PoolState, thread_id: &str) { + if let Some(conn) = state.active.remove(thread_id) { + if let Some(sid) = &conn.acp_session_id { + info!(thread_id, session_id = %sid, "suspending session"); + state.suspended.insert(thread_id.to_string(), sid.clone()); + } + // conn dropped here → Drop impl kills process group + } +} diff --git a/src/acp/protocol.rs b/src/acp/protocol.rs index d3e96ed5..82f00eb8 100644 --- a/src/acp/protocol.rs +++ b/src/acp/protocol.rs @@ -60,8 +60,8 @@ impl std::fmt::Display for JsonRpcError { pub enum AcpEvent { Text(String), Thinking, - ToolStart { title: String }, - ToolDone { title: String, status: String }, + ToolStart { id: String, title: String }, + ToolDone { id: String, title: String, status: String }, Status, } @@ -70,6 +70,19 @@ pub fn classify_notification(msg: &JsonRpcMessage) -> Option { let update = params.get("update")?; let session_update = update.get("sessionUpdate")?.as_str()?; + // toolCallId is the stable identity across tool_call → tool_call_update + // events for the same tool invocation. claude-agent-acp emits the first + // event before the input fields are streamed in (so the title falls back + // to "Terminal" / "Edit" / etc.) and refines them in a later + // tool_call_update; without the id we can't tell those events belong to + // the same call and end up rendering placeholder + refined as two + // separate lines. + let tool_id = update + .get("toolCallId") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + match session_update { "agent_message_chunk" => { let text = update.get("content")?.get("text")?.as_str()?; @@ -80,15 +93,15 @@ pub fn classify_notification(msg: &JsonRpcMessage) -> Option { } "tool_call" => { let title = update.get("title").and_then(|v| v.as_str()).unwrap_or("").to_string(); - Some(AcpEvent::ToolStart { title }) + Some(AcpEvent::ToolStart { id: tool_id, title }) } "tool_call_update" => { let title = update.get("title").and_then(|v| v.as_str()).unwrap_or("").to_string(); let status = update.get("status").and_then(|v| v.as_str()).unwrap_or("").to_string(); if status == "completed" || status == "failed" { - Some(AcpEvent::ToolDone { title, status }) + Some(AcpEvent::ToolDone { id: tool_id, title, status }) } else { - Some(AcpEvent::ToolStart { title }) + Some(AcpEvent::ToolStart { id: tool_id, title }) } } "plan" => Some(AcpEvent::Status), diff --git a/src/config.rs b/src/config.rs index 6d341e27..658e00b9 100644 --- a/src/config.rs +++ b/src/config.rs @@ -3,6 +3,34 @@ use serde::Deserialize; use std::collections::HashMap; use std::path::Path; +/// Controls whether the bot processes messages from other Discord bots. +/// +/// Inspired by Hermes Agent's `DISCORD_ALLOW_BOTS` 3-value design: +/// - `Off` (default): ignore all bot messages (safe default, no behavior change) +/// - `Mentions`: only process bot messages that @mention this bot (natural loop breaker) +/// - `All`: process all bot messages (capped at `MAX_CONSECUTIVE_BOT_TURNS`) +/// +/// The bot's own messages are always ignored regardless of this setting. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub enum AllowBots { + #[default] + Off, + Mentions, + All, +} + +impl<'de> Deserialize<'de> for AllowBots { + fn deserialize>(deserializer: D) -> Result { + let s = String::deserialize(deserializer)?; + match s.to_lowercase().as_str() { + "off" | "none" | "false" => Ok(Self::Off), + "mentions" => Ok(Self::Mentions), + "all" | "true" => Ok(Self::All), + other => Err(serde::de::Error::unknown_variant(other, &["off", "mentions", "all"])), + } + } +} + #[derive(Debug, Deserialize)] pub struct Config { pub discord: DiscordConfig, @@ -11,8 +39,36 @@ pub struct Config { pub pool: PoolConfig, #[serde(default)] pub reactions: ReactionsConfig, + #[serde(default)] + pub stt: SttConfig, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct SttConfig { + #[serde(default)] + pub enabled: bool, + #[serde(default)] + pub api_key: String, + #[serde(default = "default_stt_model")] + pub model: String, + #[serde(default = "default_stt_base_url")] + pub base_url: String, } +impl Default for SttConfig { + fn default() -> Self { + Self { + enabled: false, + api_key: String::new(), + model: default_stt_model(), + base_url: default_stt_base_url(), + } + } +} + +fn default_stt_model() -> String { "whisper-large-v3-turbo".into() } +fn default_stt_base_url() -> String { "https://api.groq.com/openai/v1".into() } + #[derive(Debug, Deserialize)] pub struct DiscordConfig { pub bot_token: String, @@ -20,6 +76,15 @@ pub struct DiscordConfig { pub allowed_channels: Vec, #[serde(default)] pub allowed_users: Vec, + #[serde(default)] + pub allow_bot_messages: AllowBots, + /// When non-empty, only bot messages from these IDs pass the bot gate. + /// Combines with `allow_bot_messages`: the mode check runs first, then + /// the allowlist filters further. Empty = allow any bot (mode permitting). + /// Only relevant when `allow_bot_messages` is `"mentions"` or `"all"`; + /// ignored when `"off"` since all bot messages are rejected before this check. + #[serde(default)] + pub trusted_bot_ids: Vec, } #[derive(Debug, Deserialize)] @@ -89,7 +154,7 @@ pub struct ReactionTiming { fn default_working_dir() -> String { "/tmp".into() } fn default_max_sessions() -> usize { 10 } -fn default_ttl_hours() -> u64 { 24 } +fn default_ttl_hours() -> u64 { 4 } fn default_true() -> bool { true } fn emoji_queued() -> String { "👀".into() } diff --git a/src/discord.rs b/src/discord.rs index 77539173..7677bc47 100644 --- a/src/discord.rs +++ b/src/discord.rs @@ -1,10 +1,12 @@ use crate::acp::{classify_notification, AcpEvent, ContentBlock, SessionPool}; -use crate::config::ReactionsConfig; +use crate::config::{AllowBots, ReactionsConfig, SttConfig}; use crate::error_display::{format_coded_error, format_user_error}; use crate::format; use crate::reactions::StatusReactionController; use base64::engine::general_purpose::STANDARD as BASE64; use base64::Engine; +use image::ImageReader; +use std::io::Cursor; use std::sync::LazyLock; use serenity::async_trait; use serenity::model::channel::{Message, ReactionType}; @@ -16,6 +18,15 @@ use std::sync::Arc; use tokio::sync::watch; use tracing::{debug, error, info}; +/// Hard cap on consecutive bot messages (from any other bot) in a +/// channel or thread. When this many recent messages are all from +/// bots other than ourselves, we stop responding to prevent runaway +/// loops between multiple bots in "all" mode. +/// +/// Note: must be ≤ 255 because Serenity's `GetMessages::limit()` takes `u8`. +/// Inspired by OpenClaw's `session.agentToAgent.maxPingPongTurns`. +const MAX_CONSECUTIVE_BOT_TURNS: u8 = 10; + /// Reusable HTTP client for downloading Discord attachments. /// Built once with a 30s timeout and rustls TLS (no native-tls deps). static HTTP_CLIENT: LazyLock = LazyLock::new(|| { @@ -30,17 +41,21 @@ pub struct Handler { pub allowed_channels: HashSet, pub allowed_users: HashSet, pub reactions_config: ReactionsConfig, + pub stt_config: SttConfig, + pub allow_bot_messages: AllowBots, + pub trusted_bot_ids: HashSet, } #[async_trait] impl EventHandler for Handler { async fn message(&self, ctx: Context, msg: Message) { - if msg.author.bot { + let bot_id = ctx.cache.current_user().id; + + // Always ignore own messages + if msg.author.id == bot_id { return; } - let bot_id = ctx.cache.current_user().id; - let channel_id = msg.channel_id.get(); let in_allowed_channel = self.allowed_channels.is_empty() || self.allowed_channels.contains(&channel_id); @@ -49,6 +64,71 @@ impl EventHandler for Handler { || msg.content.contains(&format!("<@{}>", bot_id)) || msg.mention_roles.iter().any(|r| msg.content.contains(&format!("<@&{}>", r))); + // Bot message gating — runs after self-ignore but before channel/user + // allowlist checks. This ordering is intentional: channel checks below + // apply uniformly to both human and bot messages, so a bot mention in + // a non-allowed channel is still rejected by the channel check. + if msg.author.bot { + match self.allow_bot_messages { + AllowBots::Off => return, + AllowBots::Mentions => if !is_mentioned { return; }, + AllowBots::All => { + // Safety net: count consecutive messages from any bot + // (excluding ourselves) in recent history. If all recent + // messages are from other bots, we've likely entered a + // loop. This counts *all* other-bot messages, not just + // one specific bot — so 3 bots taking turns still hits + // the cap (which is intentionally conservative). + // + // Try cache first to avoid an API call on every bot + // message. Fall back to API on cache miss. If both fail, + // reject the message (fail-closed) to avoid unbounded + // loops during Discord API outages. + let cap = MAX_CONSECUTIVE_BOT_TURNS as usize; + let history = ctx.cache.channel_messages(msg.channel_id) + .map(|msgs| { + let mut recent: Vec<_> = msgs.iter() + .filter(|(mid, _)| **mid < msg.id) + .map(|(_, m)| m.clone()) + .collect(); + recent.sort_unstable_by(|a, b| b.id.cmp(&a.id)); // newest first + recent.truncate(cap); + recent + }) + .filter(|msgs| !msgs.is_empty()); + + let recent = if let Some(cached) = history { + cached + } else { + match msg.channel_id + .messages(&ctx.http, serenity::builder::GetMessages::new().before(msg.id).limit(MAX_CONSECUTIVE_BOT_TURNS)) + .await + { + Ok(msgs) => msgs, + Err(e) => { + tracing::warn!(channel_id = %msg.channel_id, error = %e, "failed to fetch history for bot turn cap, rejecting (fail-closed)"); + return; + } + } + }; + + let consecutive_bot = recent.iter() + .take_while(|m| m.author.bot && m.author.id != bot_id) + .count(); + if consecutive_bot >= cap { + tracing::warn!(channel_id = %msg.channel_id, cap, "bot turn cap reached, ignoring"); + return; + } + }, + } + + // If trusted_bot_ids is set, only allow bots on the list + if !self.trusted_bot_ids.is_empty() && !self.trusted_bot_ids.contains(&msg.author.id.get()) { + tracing::debug!(bot_id = %msg.author.id, "bot not in trusted_bot_ids, ignoring"); + return; + } + } + let in_thread = if !in_allowed_channel { match msg.channel_id.to_channel(&ctx.http).await { Ok(serenity::model::channel::Channel::Guild(gc)) => { @@ -124,18 +204,23 @@ impl EventHandler for Handler { text: prompt_with_sender.clone(), }); - // Add image attachments + // Process attachments: route by content type (audio → STT, image → encode) if !msg.attachments.is_empty() { for attachment in &msg.attachments { - if let Some(content_block) = download_and_encode_image(attachment).await { + if is_audio_attachment(attachment) { + if self.stt_config.enabled { + if let Some(transcript) = download_and_transcribe(attachment, &self.stt_config).await { + debug!(filename = %attachment.filename, chars = transcript.len(), "voice transcript injected"); + content_blocks.insert(0, ContentBlock::Text { + text: format!("[Voice message transcript]: {transcript}"), + }); + } + } else { + debug!(filename = %attachment.filename, "skipping audio attachment (STT disabled)"); + } + } else if let Some(content_block) = download_and_encode_image(attachment).await { debug!(url = %attachment.url, filename = %attachment.filename, "adding image attachment"); content_blocks.push(content_block); - } else { - error!( - url = %attachment.url, - filename = %attachment.filename, - "failed to download image attachment" - ); } } } @@ -233,14 +318,51 @@ impl EventHandler for Handler { } } -/// Download a Discord image attachment and encode it as an ACP image content block. -/// -/// Discord attachment URLs are temporary and expire, so we must download -/// and encode the image data immediately. The ACP ImageContent schema -/// requires `{ data: base64_string, mimeType: "image/..." }`. +/// Check if an attachment is an audio file (voice messages are typically audio/ogg). +fn is_audio_attachment(attachment: &serenity::model::channel::Attachment) -> bool { + let mime = attachment.content_type.as_deref().unwrap_or(""); + mime.starts_with("audio/") +} + +/// Download an audio attachment and transcribe it via the configured STT provider. +async fn download_and_transcribe( + attachment: &serenity::model::channel::Attachment, + stt_config: &SttConfig, +) -> Option { + const MAX_SIZE: u64 = 25 * 1024 * 1024; // 25 MB (Whisper API limit) + + if u64::from(attachment.size) > MAX_SIZE { + error!(filename = %attachment.filename, size = attachment.size, "audio exceeds 25MB limit"); + return None; + } + + let resp = HTTP_CLIENT.get(&attachment.url).send().await.ok()?; + if !resp.status().is_success() { + error!(url = %attachment.url, status = %resp.status(), "audio download failed"); + return None; + } + let bytes = resp.bytes().await.ok()?.to_vec(); + + let mime_type = attachment.content_type.as_deref().unwrap_or("audio/ogg"); + let mime_type = mime_type.split(';').next().unwrap_or(mime_type).trim(); + + crate::stt::transcribe(&HTTP_CLIENT, stt_config, bytes, attachment.filename.clone(), mime_type).await +} + +/// Maximum dimension (width or height) for resized images. +/// Matches OpenClaw's DEFAULT_IMAGE_MAX_DIMENSION_PX. +const IMAGE_MAX_DIMENSION_PX: u32 = 1200; + +/// JPEG quality for compressed output (OpenClaw uses progressive 85→35; +/// we start at 75 which is a good balance of quality vs size). +const IMAGE_JPEG_QUALITY: u8 = 75; + +/// Download a Discord image attachment, resize/compress it, then base64-encode +/// as an ACP image content block. /// -/// Security: rejects non-image attachments (by content-type or extension) -/// and files larger than 10MB to prevent OOM/abuse. +/// Large images are resized so the longest side is at most 1200px and +/// re-encoded as JPEG at quality 75. This keeps the base64 payload well +/// under typical JSON-RPC transport limits (~200-400KB after encoding). async fn download_and_encode_image(attachment: &serenity::model::channel::Attachment) -> Option { const MAX_SIZE: u64 = 10 * 1024 * 1024; // 10 MB @@ -267,69 +389,104 @@ async fn download_and_encode_image(attachment: &serenity::model::channel::Attach }) }); - // Validate that it's actually an image let Some(mime) = media_type else { - debug!(filename = %attachment.filename, "skipping non-image attachment (no matching content-type or extension)"); + debug!(filename = %attachment.filename, "skipping non-image attachment"); return None; }; - // Strip MIME type parameters (e.g. "image/jpeg; charset=utf-8" → "image/jpeg") - // Downstream LLM APIs (Claude, OpenAI, Gemini) reject MIME types with parameters let mime = mime.split(';').next().unwrap_or(mime).trim(); if !mime.starts_with("image/") { debug!(filename = %attachment.filename, mime = %mime, "skipping non-image attachment"); return None; } - // Size check before downloading if u64::from(attachment.size) > MAX_SIZE { - error!( - filename = %attachment.filename, - size = attachment.size, - max = MAX_SIZE, - "image attachment exceeds 10MB limit" - ); + error!(filename = %attachment.filename, size = attachment.size, "image exceeds 10MB limit"); return None; } - // Download using the static reusable client let response = match HTTP_CLIENT.get(url).send().await { Ok(resp) => resp, - Err(e) => { - error!("failed to download image {}: {}", url, e); - return None; - } + Err(e) => { error!(url = %url, error = %e, "download failed"); return None; } }; - if !response.status().is_success() { - error!("HTTP error downloading image {}: {}", url, response.status()); + error!(url = %url, status = %response.status(), "HTTP error downloading image"); return None; } - let bytes = match response.bytes().await { Ok(b) => b, - Err(e) => { - error!("failed to read image bytes from {}: {}", url, e); - return None; - } + Err(e) => { error!(url = %url, error = %e, "read failed"); return None; } }; - // Final size check after download (defense in depth) + // Defense-in-depth: verify actual download size if bytes.len() as u64 > MAX_SIZE { - error!( - filename = %attachment.filename, - size = bytes.len(), - "downloaded image exceeds 10MB limit after decode" - ); + error!(filename = %attachment.filename, size = bytes.len(), "downloaded image exceeds limit"); return None; } - let encoded = BASE64.encode(bytes.as_ref()); + // Resize and compress + let (output_bytes, output_mime) = match resize_and_compress(&bytes) { + Ok(result) => result, + Err(e) => { + // Fallback: use original bytes but reject if too large for transport + if bytes.len() > 1024 * 1024 { + error!(filename = %attachment.filename, error = %e, size = bytes.len(), "resize failed and original too large, skipping"); + return None; + } + debug!(filename = %attachment.filename, error = %e, "resize failed, using original"); + (bytes.to_vec(), mime.to_string()) + } + }; + + debug!( + filename = %attachment.filename, + original_size = bytes.len(), + compressed_size = output_bytes.len(), + "image processed" + ); + + let encoded = BASE64.encode(&output_bytes); Some(ContentBlock::Image { - media_type: mime.to_string(), + media_type: output_mime, data: encoded, }) } +/// Resize image so longest side ≤ IMAGE_MAX_DIMENSION_PX, then encode as JPEG. +/// Returns (compressed_bytes, mime_type). GIFs are passed through unchanged +/// to preserve animation. +fn resize_and_compress(raw: &[u8]) -> Result<(Vec, String), image::ImageError> { + let reader = ImageReader::new(Cursor::new(raw)) + .with_guessed_format()?; + + let format = reader.format(); + + // Pass through GIFs unchanged to preserve animation + if format == Some(image::ImageFormat::Gif) { + return Ok((raw.to_vec(), "image/gif".to_string())); + } + + let img = reader.decode()?; + let (w, h) = (img.width(), img.height()); + + // Resize preserving aspect ratio: scale so longest side = 1200px + let img = if w > IMAGE_MAX_DIMENSION_PX || h > IMAGE_MAX_DIMENSION_PX { + let max_side = std::cmp::max(w, h); + let ratio = f64::from(IMAGE_MAX_DIMENSION_PX) / f64::from(max_side); + let new_w = (f64::from(w) * ratio) as u32; + let new_h = (f64::from(h) * ratio) as u32; + img.resize(new_w, new_h, image::imageops::FilterType::Lanczos3) + } else { + img + }; + + // Encode as JPEG + let mut buf = Cursor::new(Vec::new()); + let encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut buf, IMAGE_JPEG_QUALITY); + img.write_with_encoder(encoder)?; + + Ok((buf.into_inner(), "image/jpeg".to_string())) +} + async fn edit(ctx: &Context, ch: ChannelId, msg_id: MessageId, content: &str) -> serenity::Result { ch.edit_message(&ctx.http, msg_id, serenity::builder::EditMessage::new().content(content)).await } @@ -364,38 +521,39 @@ async fn stream_prompt( let (buf_tx, buf_rx) = watch::channel(initial); let mut text_buf = String::new(); - let mut tool_lines: Vec = Vec::new(); + // Tool calls indexed by toolCallId. Vec preserves first-seen + // order. We store id + title + state separately so a ToolDone + // event that arrives without a refreshed title (claude-agent-acp's + // update events don't always re-send the title field) can still + // reuse the title we already learned from a prior + // tool_call_update — only the icon flips 🔧 → ✅ / ❌. Rendering + // happens on the fly in compose_display(). + let mut tool_lines: Vec = Vec::new(); let current_msg_id = msg_id; if reset { text_buf.push_str("⚠️ _Session expired, starting fresh..._\n\n"); } - // Spawn edit-streaming task + // Spawn edit-streaming task — only edits the single message, never sends new ones. + // Long content is truncated during streaming; final multi-message split happens after. let edit_handle = { let ctx = ctx.clone(); let mut buf_rx = buf_rx.clone(); tokio::spawn(async move { let mut last_content = String::new(); - let mut current_edit_msg = msg_id; loop { tokio::time::sleep(std::time::Duration::from_millis(1500)).await; if buf_rx.has_changed().unwrap_or(false) { let content = buf_rx.borrow_and_update().clone(); if content != last_content { - if content.len() > 1900 { - let chunks = format::split_message(&content, 1900); - if let Some(first) = chunks.first() { - let _ = edit(&ctx, channel, current_edit_msg, first).await; - } - for chunk in chunks.iter().skip(1) { - if let Ok(new_msg) = channel.say(&ctx.http, chunk).await { - current_edit_msg = new_msg.id; - } - } + let display = if content.chars().count() > 1900 { + let truncated = format::truncate_chars(&content, 1900); + format!("{truncated}…") } else { - let _ = edit(&ctx, channel, current_edit_msg, &content).await; - } + content.clone() + }; + let _ = edit(&ctx, channel, msg_id, &display).await; last_content = content; } } @@ -431,16 +589,53 @@ async fn stream_prompt( AcpEvent::Thinking => { reactions.set_thinking().await; } - AcpEvent::ToolStart { title, .. } if !title.is_empty() => { + AcpEvent::ToolStart { id, title } if !title.is_empty() => { reactions.set_tool(&title).await; - tool_lines.push(format!("🔧 `{title}`...")); + let title = sanitize_title(&title); + // Dedupe by toolCallId: replace if we've already + // seen this id, otherwise append a new entry. + // claude-agent-acp emits a placeholder title + // ("Terminal", "Edit", etc.) on the first event + // and refines it via tool_call_update; without + // dedup the placeholder and refined version + // appear as two separate orphaned lines. + if let Some(slot) = tool_lines.iter_mut().find(|e| e.id == id) { + slot.title = title; + slot.state = ToolState::Running; + } else { + tool_lines.push(ToolEntry { + id, + title, + state: ToolState::Running, + }); + } let _ = buf_tx.send(compose_display(&tool_lines, &text_buf)); } - AcpEvent::ToolDone { title, status, .. } => { + AcpEvent::ToolDone { id, title, status } => { reactions.set_thinking().await; - let icon = if status == "completed" { "✅" } else { "❌" }; - if let Some(line) = tool_lines.iter_mut().rev().find(|l| l.contains(&title)) { - *line = format!("{icon} `{title}`"); + let new_state = if status == "completed" { + ToolState::Completed + } else { + ToolState::Failed + }; + // Find by id (the title is unreliable — substring + // match against the placeholder "Terminal" would + // never find the refined entry). Preserve the + // existing title if the Done event omits it. + if let Some(slot) = tool_lines.iter_mut().find(|e| e.id == id) { + if !title.is_empty() { + slot.title = sanitize_title(&title); + } + slot.state = new_state; + } else if !title.is_empty() { + // Done arrived without a prior Start (rare + // race) — record it so we still show + // something. + tool_lines.push(ToolEntry { + id, + title: sanitize_title(&title), + state: new_state, + }); } let _ = buf_tx.send(compose_display(&tool_lines, &text_buf)); } @@ -486,11 +681,47 @@ async fn stream_prompt( .await } -fn compose_display(tool_lines: &[String], text: &str) -> String { +/// Flatten a tool-call title into a single line that's safe to render +/// inside Discord inline-code spans. Discord renders single-backtick +/// code on a single line only, so multi-line shell commands (heredocs, +/// `&&`-chained commands split across lines) appear truncated; we +/// collapse newlines to ` ; ` and rewrite embedded backticks so they +/// don't break the wrapping span. +fn sanitize_title(title: &str) -> String { + title.replace('\r', "").replace('\n', " ; ").replace('`', "'") +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum ToolState { + Running, + Completed, + Failed, +} + +#[derive(Debug, Clone)] +struct ToolEntry { + id: String, + title: String, + state: ToolState, +} + +impl ToolEntry { + fn render(&self) -> String { + let icon = match self.state { + ToolState::Running => "🔧", + ToolState::Completed => "✅", + ToolState::Failed => "❌", + }; + let suffix = if self.state == ToolState::Running { "..." } else { "" }; + format!("{icon} `{}`{}", self.title, suffix) + } +} + +fn compose_display(tool_lines: &[ToolEntry], text: &str) -> String { let mut out = String::new(); if !tool_lines.is_empty() { - for line in tool_lines { - out.push_str(line); + for entry in tool_lines { + out.push_str(&entry.render()); out.push('\n'); } out.push('\n'); @@ -542,3 +773,87 @@ async fn get_or_create_thread(ctx: &Context, msg: &Message, prompt: &str) -> any Ok(thread.id.get()) } + +#[cfg(test)] +mod tests { + use super::*; + + fn make_png(width: u32, height: u32) -> Vec { + let img = image::RgbImage::new(width, height); + let mut buf = Cursor::new(Vec::new()); + img.write_to(&mut buf, image::ImageFormat::Png).unwrap(); + buf.into_inner() + } + + #[test] + fn large_image_resized_to_max_dimension() { + let png = make_png(3000, 2000); + let (compressed, mime) = resize_and_compress(&png).unwrap(); + + assert_eq!(mime, "image/jpeg"); + let result = image::load_from_memory(&compressed).unwrap(); + assert!(result.width() <= IMAGE_MAX_DIMENSION_PX); + assert!(result.height() <= IMAGE_MAX_DIMENSION_PX); + } + + #[test] + fn small_image_keeps_original_dimensions() { + let png = make_png(800, 600); + let (compressed, mime) = resize_and_compress(&png).unwrap(); + + assert_eq!(mime, "image/jpeg"); + let result = image::load_from_memory(&compressed).unwrap(); + assert_eq!(result.width(), 800); + assert_eq!(result.height(), 600); + } + + #[test] + fn landscape_image_respects_aspect_ratio() { + let png = make_png(4000, 2000); + let (compressed, _) = resize_and_compress(&png).unwrap(); + + let result = image::load_from_memory(&compressed).unwrap(); + assert_eq!(result.width(), 1200); + assert_eq!(result.height(), 600); + } + + #[test] + fn portrait_image_respects_aspect_ratio() { + let png = make_png(2000, 4000); + let (compressed, _) = resize_and_compress(&png).unwrap(); + + let result = image::load_from_memory(&compressed).unwrap(); + assert_eq!(result.width(), 600); + assert_eq!(result.height(), 1200); + } + + #[test] + fn compressed_output_is_smaller_than_original() { + let png = make_png(3000, 2000); + let (compressed, _) = resize_and_compress(&png).unwrap(); + + assert!(compressed.len() < png.len(), "compressed {} should be < original {}", compressed.len(), png.len()); + } + + #[test] + fn gif_passes_through_unchanged() { + // Minimal valid GIF89a (1x1 pixel) + let gif: Vec = vec![ + 0x47, 0x49, 0x46, 0x38, 0x39, 0x61, // GIF89a + 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, // logical screen descriptor + 0x2C, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, // image descriptor + 0x02, 0x02, 0x44, 0x01, 0x00, // image data + 0x3B, // trailer + ]; + let (output, mime) = resize_and_compress(&gif).unwrap(); + + assert_eq!(mime, "image/gif"); + assert_eq!(output, gif); + } + + #[test] + fn invalid_data_returns_error() { + let garbage = vec![0x00, 0x01, 0x02, 0x03]; + assert!(resize_and_compress(&garbage).is_err()); + } +} diff --git a/src/format.rs b/src/format.rs index a0026ebb..841cf559 100644 --- a/src/format.rs +++ b/src/format.rs @@ -1,31 +1,40 @@ -/// Split text into chunks at line boundaries, each <= limit chars. +/// Split text into chunks at line boundaries, each <= limit Unicode characters (UTF-8 safe). +/// Discord's message limit counts Unicode characters, not bytes. pub fn split_message(text: &str, limit: usize) -> Vec { - if text.len() <= limit { + if text.chars().count() <= limit { return vec![text.to_string()]; } let mut chunks = Vec::new(); let mut current = String::new(); + let mut current_len: usize = 0; for line in text.split('\n') { + let line_chars = line.chars().count(); // +1 for the newline - if !current.is_empty() && current.len() + line.len() + 1 > limit { + if !current.is_empty() && current_len + line_chars + 1 > limit { chunks.push(current); current = String::new(); + current_len = 0; } if !current.is_empty() { current.push('\n'); + current_len += 1; } - // If a single line exceeds limit, hard-split it - if line.len() > limit { - for chunk in line.as_bytes().chunks(limit) { - if !current.is_empty() { + // If a single line exceeds limit, hard-split on char boundaries + if line_chars > limit { + for ch in line.chars() { + if current_len + 1 > limit { chunks.push(current); + current = String::new(); + current_len = 0; } - current = String::from_utf8_lossy(chunk).to_string(); + current.push(ch); + current_len += 1; } } else { current.push_str(line); + current_len += line_chars; } } if !current.is_empty() { @@ -33,3 +42,12 @@ pub fn split_message(text: &str, limit: usize) -> Vec { } chunks } + +/// Truncate a string to at most `limit` Unicode characters. +/// Discord's message limit counts Unicode characters, not bytes. +pub fn truncate_chars(s: &str, limit: usize) -> &str { + match s.char_indices().nth(limit) { + Some((idx, _)) => &s[..idx], + None => s, + } +} diff --git a/src/main.rs b/src/main.rs index 39817342..fd63b89a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ mod discord; mod error_display; mod format; mod reactions; +mod stt; use serenity::prelude::*; use std::collections::HashSet; @@ -25,13 +26,14 @@ async fn main() -> anyhow::Result<()> { .map(PathBuf::from) .unwrap_or_else(|| PathBuf::from("config.toml")); - let cfg = config::load_config(&config_path)?; + let mut cfg = config::load_config(&config_path)?; info!( agent_cmd = %cfg.agent.command, pool_max = cfg.pool.max_sessions, channels = ?cfg.discord.allowed_channels, users = ?cfg.discord.allowed_users, reactions = cfg.reactions.enabled, + allow_bot_messages = ?cfg.discord.allow_bot_messages, "config loaded" ); @@ -40,13 +42,33 @@ async fn main() -> anyhow::Result<()> { let allowed_channels = parse_id_set(&cfg.discord.allowed_channels, "allowed_channels")?; let allowed_users = parse_id_set(&cfg.discord.allowed_users, "allowed_users")?; - info!(channels = allowed_channels.len(), users = allowed_users.len(), "parsed allowlists"); + let trusted_bot_ids = parse_id_set(&cfg.discord.trusted_bot_ids, "trusted_bot_ids")?; + info!(channels = allowed_channels.len(), users = allowed_users.len(), trusted_bots = ?trusted_bot_ids, "parsed allowlists"); + + // Resolve STT config before constructing handler (auto-detect mutates cfg.stt) + if cfg.stt.enabled { + if cfg.stt.api_key.is_empty() && cfg.stt.base_url.contains("groq.com") { + if let Ok(key) = std::env::var("GROQ_API_KEY") { + if !key.is_empty() { + info!("stt.api_key not set, using GROQ_API_KEY from environment"); + cfg.stt.api_key = key; + } + } + } + if cfg.stt.api_key.is_empty() { + anyhow::bail!("stt.enabled = true but no API key found — set stt.api_key in config or export GROQ_API_KEY"); + } + info!(model = %cfg.stt.model, base_url = %cfg.stt.base_url, "STT enabled"); + } let handler = discord::Handler { pool: pool.clone(), allowed_channels, allowed_users, reactions_config: cfg.reactions, + stt_config: cfg.stt.clone(), + allow_bot_messages: cfg.discord.allow_bot_messages, + trusted_bot_ids, }; let intents = GatewayIntents::GUILD_MESSAGES diff --git a/src/stt.rs b/src/stt.rs new file mode 100644 index 00000000..122db9b6 --- /dev/null +++ b/src/stt.rs @@ -0,0 +1,61 @@ +use crate::config::SttConfig; +use reqwest::multipart; +use tracing::{debug, error}; + +/// Transcribe audio bytes via an OpenAI-compatible `/audio/transcriptions` endpoint. +pub async fn transcribe( + client: &reqwest::Client, + cfg: &SttConfig, + audio_bytes: Vec, + filename: String, + mime_type: &str, +) -> Option { + let url = format!("{}/audio/transcriptions", cfg.base_url.trim_end_matches('/')); + + let file_part = multipart::Part::bytes(audio_bytes) + .file_name(filename) + .mime_str(mime_type) + .ok()?; + + let form = multipart::Form::new() + .part("file", file_part) + .text("model", cfg.model.clone()) + .text("response_format", "json"); + + let resp = match client + .post(&url) + .bearer_auth(&cfg.api_key) + .multipart(form) + .send() + .await + { + Ok(r) => r, + Err(e) => { + error!(error = %e, "STT request failed"); + return None; + } + }; + + if !resp.status().is_success() { + let status = resp.status(); + let body = resp.text().await.unwrap_or_default(); + error!(status = %status, body = %body, "STT API error"); + return None; + } + + let json: serde_json::Value = match resp.json().await { + Ok(v) => v, + Err(e) => { + error!(error = %e, "STT response parse failed"); + return None; + } + }; + + let text = json.get("text")?.as_str()?.trim().to_string(); + if text.is_empty() { + return None; + } + + debug!(chars = text.len(), "STT transcription complete"); + Some(text) +}