diff --git a/.github/workflows/ci.yml.disabled b/.github/workflows/ci.yml.disabled new file mode 100644 index 00000000..7f61cae4 --- /dev/null +++ b/.github/workflows/ci.yml.disabled @@ -0,0 +1,49 @@ +name: CI + +on: + pull_request: + push: + branches: + - main + - dev + - "V*" + +jobs: + lint: + name: Lint (ruff) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: pip + + - name: Install ruff + run: pip install ruff + + - name: Check formatting + run: ruff format --check . + + - name: Run ruff check + run: ruff check . + + smoke: + name: Smoke (syntax + imports) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: pip + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Byte-compile source tree + run: python -m compileall -q app agent_core agents decorators skills diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 6414918e..97fa3d00 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -100,11 +100,31 @@ jobs: with: python-version: "3.10" + - name: Install Linux webview system packages + if: matrix.os_label == 'linux' + shell: bash + run: | + # PyGObject builds against these headers. WebKitGTK + the + # webkit2-4.0 GObject introspection bindings are what pywebview's + # GTK backend talks to at runtime. End users need the runtime + # halves of these too (libwebkit2gtk-4.0-37, gir1.2-webkit2-4.0). + sudo apt-get update + sudo apt-get install -y \ + libwebkit2gtk-4.0-37 \ + gir1.2-webkit2-4.0 \ + libgirepository1.0-dev \ + libcairo2-dev \ + python3-gi \ + python3-gi-cairo + - name: Install dependencies shell: bash run: | python -m pip install --upgrade pip pip install -r requirements.txt + # Installer-only deps (pywebview + per-OS backend bindings). Kept + # in a separate file so the agent's requirements.txt stays lean. + pip install -r packaging/requirements-installer.txt pip install pyinstaller - name: Set up Node.js @@ -126,61 +146,55 @@ jobs: run: | echo "{\"use_conda\": false, \"gui_mode_enabled\": false}" > config.json - - name: Build with PyInstaller + - name: Write VERSION file from git tag + shell: bash + run: | + # github.ref_name is "v1.3.0" for tag pushes; strip the leading 'v'. + # The installer's _read_bundled_version() reads this to pin the + # download URL to the matching agent release. + REF="${{ github.ref_name }}" + VERSION="${REF#v}" + echo "$VERSION" > VERSION + echo "VERSION file content: $(cat VERSION)" + + - name: Build agent (CraftBotAgent) with PyInstaller + shell: bash + run: pyinstaller --noconfirm --clean packaging/CraftBotAgent.spec + + - name: Zip agent payload shell: bash run: | - pyinstaller \ - --noconfirm \ - --clean \ - --onefile \ - --name CraftBot \ - --collect-submodules app \ - --collect-submodules agent_core \ - --collect-submodules agents \ - --collect-submodules decorators \ - --collect-submodules chromadb \ - --hidden-import onnxruntime \ - --hidden-import tokenizers \ - --collect-data tiktoken_ext \ - --collect-submodules tiktoken \ - --exclude-module torch \ - --exclude-module torchvision \ - --exclude-module torchaudio \ - --exclude-module triton \ - --exclude-module nvidia \ - --exclude-module transformers \ - --exclude-module cv2 \ - --exclude-module matplotlib \ - --exclude-module tensorflow \ - --additional-hooks-dir hooks \ - --runtime-hook rthooks/rthook-rich-unicode.py \ - --add-data "assets${{ matrix.data_sep }}assets" \ - --add-data "main.py${{ matrix.data_sep }}." \ - --add-data "config.json${{ matrix.data_sep }}." \ - --add-data ".env.example${{ matrix.data_sep }}." \ - --add-data "requirements.txt${{ matrix.data_sep }}." \ - --add-data "environment.yml${{ matrix.data_sep }}." \ - --add-data "app/config${{ matrix.data_sep }}app/config" \ - --add-data "app/data${{ matrix.data_sep }}app/data" \ - --add-data "app/ui_layer/browser/frontend/dist${{ matrix.data_sep }}app/ui_layer/browser/frontend/dist" \ - --add-data "app/gui/docker-compose.yaml${{ matrix.data_sep }}app/gui" \ - --add-data "app/gui/Dockerfile${{ matrix.data_sep }}app/gui" \ - --add-data "app/gui/custom-cont-init.d${{ matrix.data_sep }}app/gui/custom-cont-init.d" \ - --add-data "agents${{ matrix.data_sep }}agents" \ - --add-data "skills${{ matrix.data_sep }}skills" \ - run.py - - - name: Rename artifact + # CraftBotAgent.spec produces dist/CraftBotAgent/ (folder). + # Zip it into the asset name the installer downloads at runtime. + cd dist + if [ "${{ matrix.os_label }}" = "windows" ]; then + 7z a -tzip "CraftBot-agent-${{ matrix.os_label }}.zip" CraftBotAgent + else + zip -r "CraftBot-agent-${{ matrix.os_label }}.zip" CraftBotAgent + fi + ls -lh "CraftBot-agent-${{ matrix.os_label }}.zip" + + - name: Build installer (CraftBotInstaller) with PyInstaller + shell: bash + run: pyinstaller --noconfirm --clean packaging/CraftBotInstaller.spec + + - name: Rename installer artifact shell: bash run: | - mv "dist/CraftBot${{ matrix.ext }}" \ - "dist/CraftBot-${{ matrix.os_label }}${{ matrix.ext }}" + mv "dist/CraftBotInstaller${{ matrix.ext }}" \ + "dist/CraftBotInstaller-${{ matrix.os_label }}${{ matrix.ext }}" + + - name: Upload installer artifact + uses: actions/upload-artifact@v4 + with: + name: release-installer-${{ matrix.os_label }} + path: dist/CraftBotInstaller-${{ matrix.os_label }}${{ matrix.ext }} - - name: Upload artifact + - name: Upload agent zip artifact uses: actions/upload-artifact@v4 with: - name: release-${{ matrix.os_label }} - path: dist/CraftBot-${{ matrix.os_label }}${{ matrix.ext }} + name: release-agent-${{ matrix.os_label }} + path: dist/CraftBot-agent-${{ matrix.os_label }}.zip # ────────────────────────────────────────────── # Create GitHub Release with all artifacts @@ -202,21 +216,14 @@ jobs: files: release/** generate_release_notes: true body: | - ## Installation - - ### Docker Compose (Recommended) - Includes the agent, OmniParser, and GUI desktop VM — all in one. - ```bash - git clone https://github.com/zfoong/CraftBot.git - cd CraftBot - cp .env.example .env # Edit with your API keys - docker compose up - ``` - For GPU acceleration on OmniParser: - ```bash - docker compose -f docker-compose.yml -f docker-compose.gpu.yml up - ``` - - ### Standalone Binary - Download the binary for your platform from the assets below. - Requires Docker for GUI mode and Omniparser. Run with `--no-omniparser` to skip OmniParser setup. Run with `--no-conda` to skip conda setup and use system Python instead. + ### Installer (Recommended) + Download `CraftBotInstaller-` from the assets below and + run it. The installer wizard will let you choose an install + location and will download the matching agent payload + (`CraftBot-agent-.zip`) from this same release. + + ### Manual install + If you'd rather skip the wizard, download both + `CraftBotInstaller-` and `CraftBot-agent-.zip`, + place them in the same folder, and run the installer — it'll find + the local zip instead of fetching from GitHub. \ No newline at end of file diff --git a/.github/workflows/staging-lint.yml b/.github/workflows/staging-lint.yml new file mode 100644 index 00000000..c6dcad3d --- /dev/null +++ b/.github/workflows/staging-lint.yml @@ -0,0 +1,42 @@ +name: Staging Lint + +on: + push: + branches: [staging] + pull_request: + branches: [staging] + +jobs: + ruff: + name: Ruff (format + check) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: pip + + - name: Install ruff + run: pip install ruff + + - name: Check formatting + run: ruff format --check . + + - name: Run ruff check + run: ruff check . + + smoke: + name: Smoke (syntax + imports) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: pip + + - name: Byte-compile source tree + run: python -m compileall -q app agent_core agents decorators skills diff --git a/.gitignore b/.gitignore index c9359d64..1eb0d79c 100644 --- a/.gitignore +++ b/.gitignore @@ -30,7 +30,14 @@ build/ debug_images .vscode/ .idea/ +# PyInstaller auto-generates a .spec when run without one — those are +# build artifacts. Our hand-written CraftBotInstaller.spec and +# CraftBotAgent.spec live under packaging/ and are source-of-truth for the +# release workflow, so they MUST be tracked. Keep the broad ignore but +# allow-list the two we own. *.spec +!packaging/CraftBotInstaller.spec +!packaging/CraftBotAgent.spec **/.whatsapp_web_sessions/ **/build **/build_* @@ -46,4 +53,6 @@ app/config/settings.json **/USER.md **/onboarding_config.json **/config.json -!build_template.py \ No newline at end of file +!build_template.py +docs/LIVING_UI_DEVELOPER_GUIDE.md +agent_file_system/ACTIONS.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index fd958734..b45392e9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -9,7 +9,7 @@ To ensure contributor feels welcome, we have this guide to help you get started ## 🌟 Links - [Discord Community](https://discord.gg/W8jdMKdE) -- [Issue Tracker](https://github.com/zfoong/CraftBot/issues) +- [Issue Tracker](https://github.com/CraftOS-dev/CraftBot/issues) ## 1. 🚀 Ways to Contribute @@ -24,7 +24,7 @@ Here are all the things you to contribute to the community. ## 📫 There are several ways to collaborate with the team and community: ### GitHub Collaboration -- [Open an issue](https://github.com/zfoong/CraftBot/issues) for bug reports, feature requests, or discussions +- [Open an issue](https://github.com/CraftOS-dev/CraftBot/issues) for bug reports, feature requests, or discussions - Submit pull requests to contribute code or documentation - Join ongoing discussions in existing issues and PRs @@ -45,7 +45,7 @@ For faster responses, consider using our Discord channel where the whole communi ### Fork and Clone -1. Fork the [**CraftBot**](https://github.com/zfoong/CraftBot) repository +1. Fork the [**CraftBot**](https://github.com/CraftOS-dev/CraftBot) repository 2. Clone your fork: ```shell git clone https://github.com//CraftBot.git @@ -85,7 +85,7 @@ git push origin your-branch-name ``` 2. Create a Pull Request: - - Go to the [**CraftBot** repository](https://github.com/zfoong/CraftBot) + - Go to the [**CraftBot** repository](https://github.com/CraftOS-dev/CraftBot) - Click "Compare & Pull Request" and open a PR against dev branch - Fill in the PR template with details about your changes @@ -99,7 +99,7 @@ git push origin your-branch-name ## 6. 📫 To Get Help -- Open an [issue](https://github.com/zfoong/CraftBot) +- Open an [issue](https://github.com/CraftOS-dev/CraftBot) - Join our Discord community Thank you for contributing to **CraftBot**! 🌟 \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 57ace6ef..8078bb42 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,9 @@ FROM python:3.10-slim ENV PYTHONDONTWRITEBYTECODE=1 \ - PYTHONUNBUFFERED=1 + PYTHONUNBUFFERED=1 \ + PIP_TIMEOUT=600 \ + PIP_PROGRESS_BAR=off RUN apt-get update \ && apt-get install -y --no-install-recommends \ @@ -38,7 +40,7 @@ WORKDIR /app COPY requirements.txt ./requirements.txt RUN pip install --no-cache-dir --upgrade pip \ - && pip install --no-cache-dir -r requirements.txt + && pip install --no-cache-dir --timeout 600 -r requirements.txt COPY . . diff --git a/README.cn.md b/README.cn.md index 96630f15..91443f1f 100644 --- a/README.cn.md +++ b/README.cn.md @@ -6,13 +6,14 @@

- English README | 日本語版はこちら + English | 日本語 | 繁體中文 | 한국어 | Español | Português | Français | Deutsch

## 🚀 概览 @@ -49,13 +52,14 @@ CraftBot 静候你的指令,现在就部署属于你的 CraftBot 吧。 - **自带密钥 (BYOK)** — 灵活的 LLM 提供商系统,支持 OpenAI、Google Gemini、Anthropic Claude、BytePlus 和本地 Ollama 模型。可轻松切换提供商。 - **记忆系统** — 在午夜整理并汇总一天中发生的事件。 - **主动式代理** — 学习你的偏好、习惯和人生目标,然后进行规划并启动任务(当然需要你的批准)来帮助你改善生活。 +- **Living UI** — 在 CraftBot 中构建、导入或演进自定义应用。代理始终感知 UI 的状态,并可直接读取、写入和操作其数据。 - **外部工具集成** — 连接 Google Workspace、Slack、Notion、Zoom、LinkedIn、Discord 和 Telegram(更多即将推出!),支持嵌入式凭据和 OAuth。 - **MCP** — 模型上下文协议(Model Context Protocol)集成,通过外部工具和服务扩展代理能力。 - **技能系统** — 可扩展的技能框架,内置任务规划、研究、代码审查、Git 操作等技能。 -- **跨平台** — 完整支持 Windows 和 Linux,具有平台特定代码变体和 Docker 容器化。 +- **跨平台** — 完整支持 Windows、macOS 和 Linux,具有平台特定代码变体和 Docker 容器化。 > [!IMPORTANT] -> **关于 GUI 模式的说明:** GUI 模式仍处于实验阶段。代理切换到 GUI 模式时可能会遇到一些问题。我们正在积极改进此功能。 +> **GUI 模式已弃用。** CraftBot 不再支持 GUI(桌面自动化)模式。请改用 Browser、TUI 或 CLI 模式。
CraftBot Banner @@ -74,23 +78,100 @@ CraftBot 静候你的指令,现在就部署属于你的 CraftBot 吧。 - `Node.js` **18+**(可选 - 仅浏览器界面需要) - `conda`(可选 - 如未找到,安装器会提供自动安装 Miniconda 的选项) -### 快速安装 +### 我该选哪种方式? + +> **不确定?选方案一。** 它会帮你搞定所有事。 + +| | 方案一 — 服务安装 | 方案二 — Conda 安装 | 方案三 — 手动安装 | +|---|---|---|---| +| **适合谁** | 大多数用户、新手、测试 | 想要独立环境的 Conda 用户 | 进阶用户、自定义 Python、完全控制 | +| **自动管理 Python 环境?** | ✅ 自动 | ✅ 自动 | ❌ 你自己管理 | +| **后台运行?** | ✅ 是,作为服务 | ❌ 否 | ❌ 否 | +| **启动方式** | `python craftbot.py install` | `python install.py --conda` | `python install.py` | + +--- + +### ⭐ 方案一 — 服务安装(推荐) + +**适合你,如果:** 你希望 CraftBot 开箱即用——后台服务、开机自启、桌面快捷方式,无需手动操作。 + +`craftbot.py` 全程自动处理:Python 环境、依赖安装、后台进程管理和自启注册。 + +```bash +# 1. 克隆仓库 +git clone https://github.com/CraftOS-dev/CraftBot.git +cd CraftBot + +# 2. 安装、注册自启并启动 CraftBot +python craftbot.py install +``` + +就这样。终端自动关闭,CraftBot 在后台运行,浏览器自动打开。同时会创建**桌面快捷方式**,随时可重新打开浏览器。 + +**安装后的服务管理命令:** + +```bash +python craftbot.py start # 在后台启动 CraftBot +python craftbot.py stop # 停止 CraftBot +python craftbot.py restart # 重启 CraftBot +python craftbot.py status # 检查是否运行,自启是否已启用 +python craftbot.py logs # 查看最近日志 +python craftbot.py uninstall # 停止、移除自启并卸载包 +``` + +> [!TIP] +> 执行 `install` 或 `start` 后,系统会自动创建 **CraftBot 桌面快捷方式**。如果关闭了浏览器,双击快捷方式即可重新打开。 + +--- + +### 方案二 — Conda 安装 + +**适合你,如果:** 你已经在使用 conda,希望 CraftBot 运行在独立的 conda 环境中。 + +`install.py --conda` 会创建专用的 `craftbot` conda 环境。若系统中未找到 Miniconda,会自动安装。 ```bash -# 克隆仓库 -git clone https://github.com/zfoong/CraftBot.git +# 1. 克隆仓库 +git clone https://github.com/CraftOS-dev/CraftBot.git cd CraftBot -# 安装依赖 +# 2. 安装到 conda 环境 +python install.py --conda + +# 3. 运行 CraftBot +conda run -n craftbot python run.py + +# 如果 conda 不在 PATH 中(仅 Windows): +&"$env:USERPROFILE\miniconda3\Scripts\conda.exe" run -n craftbot python run.py +``` + +> [!NOTE] +> 每次运行 CraftBot 时,请使用 `conda run -n craftbot python run.py`。此方式没有后台服务——由你手动启停。 + +--- + +### 方案三 — 手动安装(pip) + +**适合你,如果:** 你希望完全掌控 Python 环境,不需要任何自动服务或后台进程,自己管理 CraftBot。 + +`install.py`(不带参数)会对当前激活的 Python 环境执行标准 pip 安装。通过 `run.py` 手动启停 CraftBot。 + +```bash +# 1. 克隆仓库 +git clone https://github.com/CraftOS-dev/CraftBot.git +cd CraftBot + +# 2. 在当前 Python 环境中安装依赖 python install.py -# 运行代理 +# 3. 运行 CraftBot python run.py ``` -就这样!首次运行会引导你设置 API Key。 +首次运行会引导你完成 API Key 设置和偏好配置。 -**注意:** 如果你没有安装 Node.js,安装器会提供详细的安装指引。你也可以跳过浏览器模式,改用 TUI(见下方模式说明)。 +> [!NOTE] +> 如果未安装 Node.js,安装器会提供详细指引。你也可以完全跳过浏览器模式,直接使用 TUI 模式——无需 Node.js:`python run.py --tui` ### 安装完成后你可以做什么? - 用自然语言与代理交流 @@ -111,12 +192,57 @@ CraftBot 支持多种 UI 模式。根据你的偏好选择: | **浏览器** | `python run.py` | Node.js 18+ | 现代 Web 界面,最易使用 | | **TUI** | `python run.py --tui` | 无 | 终端 UI,无需额外依赖 | | **CLI** | `python run.py --cli` | 无 | 命令行,轻量级 | -| **GUI** | `python run.py --gui` | `install.py --gui` | 带视觉反馈的桌面自动化 | **浏览器模式**是默认的推荐模式。如果你没有 Node.js,安装器会提供安装指引,或者你可以使用 **TUI 模式**。 --- +## 🧬 Living UI + +**Living UI 是随你需求而进化的系统/应用/仪表盘。** + +需要一个内置 AI 副驾的看板?量身定制符合你工作流程的 CRM? +一个 CraftBot 能读取并驱动的公司仪表盘? +将它作为 Living UI 启动——它与 CraftBot 并行运行,并随着你的需求变化而成长。 + +
+ Living UI example +
+ +### 创建 Living UI 的三种方式 + +1. **从零开始构建。** 用自然语言描述你想要的。CraftBot 会搭建 + 数据模型、后端 API 和 React UI,并通过结构化的设计流程 + 与你一起迭代。 + +
+ Building a Living UI from scratch +
+ +2. **从市场安装。** 从 [living-ui-marketplace](https://github.com/CraftOS-dev/living-ui-marketplace) 浏览社区构建的 Living UI。 + +
+ Living UI marketplace +
+ +3. **导入已有项目。** 将 CraftBot 指向 Go、Node.js、Python、Rust 或静态源代码 + 或 GitHub 仓库。它会检测运行时、配置健康检查,并将其封装为 Living UI。 + +
+ Importing an existing project as a Living UI +
+ +### 让 CraftBot 融入其中,持续进化 + +Living UI 永远不会"完成"。随着你的需求增长,你可以让代理添加功能、 +重新设计视图或将其接入新的数据源。 + +CraftBot 嵌入在每个 Living UI 中,并**感知其状态**: +它可以读取当前的 DOM 和表单值,通过 REST API 查询应用数据, +并代表你触发操作。 + +--- + ## 🧩 架构概览 | 组件 | 说明 | @@ -133,7 +259,6 @@ CraftBot 支持多种 UI 模式。根据你的偏好选择: | **技能管理器** | 加载并将可插拔技能注入代理上下文。 | | **MCP 适配器** | 模型上下文协议集成,将 MCP 工具转换为原生动作。 | | **TUI 界面** | 基于 Textual 框架构建的终端用户界面,用于交互式命令行操作。 | -| **GUI 模块** | 实验性 GUI 自动化,使用 Docker 容器、OmniParser 进行 UI 元素检测,以及 Gradio 客户端。 | --- @@ -143,27 +268,7 @@ CraftBot 支持多种 UI 模式。根据你的偏好选择: - [ ] **外部工具集成** — 持续添加中! - [X] **MCP 层** — 已完成。 - [X] **技能层** — 已完成。 -- [X] **主动式行为** — 待定 - ---- - -## 🖥️ GUI 模式(可选) - -GUI 模式支持屏幕自动化 - 代理可以看到并与桌面环境交互。这是可选的,需要额外设置。 - -```bash -# 安装 GUI 支持(使用 pip,不需要 conda) -python install.py --gui - -# 使用 GUI 支持和 conda 安装 -python install.py --gui --conda - -# 以 GUI 模式运行 -python run.py --gui -``` - -> [!NOTE] -> GUI 模式是实验性功能,需要额外的依赖(约 4GB 模型权重)。如果你不需要桌面自动化,请跳过此项,使用 Browser/TUI 模式即可,无需额外依赖。 +- [ ] **主动式行为** — 待定 --- @@ -173,9 +278,7 @@ python run.py --gui | 参数 | 说明 | |------|-------------| -| `--gui` | 安装 GUI 组件(OmniParser) | | `--conda` | 使用 conda 环境(可选) | -| `--cpu-only` | 安装仅 CPU 版本的 PyTorch(与 --gui 一起使用) | ### run.py @@ -184,27 +287,14 @@ python run.py --gui | (无) | 以**浏览器**模式运行(推荐,需要 Node.js) | | `--tui` | 以**终端 UI** 模式运行(无需额外依赖) | | `--cli` | 以 **CLI** 模式运行(轻量级) | -| `--gui` | 启用 GUI 自动化模式(需先运行 `install.py --gui`) | **安装示例:** ```bash # 简单 pip 安装(不使用 conda) python install.py -# 带 GUI 支持(使用 pip,不需要 conda) -python install.py --gui - -# 仅 CPU 系统上的 GUI(使用 pip,不需要 conda) -python install.py --gui --cpu-only - # 使用 conda 环境(推荐 conda 用户使用) python install.py --conda - -# GUI 支持和 conda -python install.py --gui --conda - -# 使用 conda 的仅 CPU 系统上的 GUI -python install.py --gui --conda --cpu-only ``` **运行 CraftBot:** @@ -219,9 +309,6 @@ python run.py --tui # CLI 模式(轻量级) python run.py --cli -# GPU/GUI 模式 -python run.py --gui - # 使用 conda 环境 conda run -n craftbot python run.py @@ -240,13 +327,43 @@ python run.py --tui # CLI 模式(轻量级) python run.py --cli -# GPU/GUI 模式 -python run.py --gui - # 使用 conda 环境 conda run -n craftbot python run.py ``` +### 🔧 后台服务(推荐) + +将 CraftBot 作为后台服务运行,关闭终端后仍可继续运行。系统会自动创建桌面快捷方式,随时可重新打开浏览器。 + +```bash +# 安装依赖、注册开机自启、启动 CraftBot +python craftbot.py install +``` + +就这样。终端会自动关闭,CraftBot 在后台运行,浏览器自动打开。 + +```bash +# 其他服务命令: +python craftbot.py start # 在后台启动 CraftBot +python craftbot.py status # 检查是否正在运行 +python craftbot.py stop # 停止 CraftBot +python craftbot.py restart # 重启 CraftBot +python craftbot.py logs # 查看最近日志输出 +``` + +| 命令 | 说明 | +|---------|-------------| +| `python craftbot.py install` | 安装依赖、注册开机自启、启动 CraftBot、打开浏览器,并自动关闭终端 | +| `python craftbot.py start` | 在后台启动 CraftBot(若已运行则自动重启,终端自动关闭) | +| `python craftbot.py stop` | 停止 CraftBot | +| `python craftbot.py restart` | 停止并重启 CraftBot | +| `python craftbot.py status` | 检查 CraftBot 是否在运行,以及自动启动是否已启用 | +| `python craftbot.py logs` | 显示最近日志(使用 `-n 100` 查看更多行) | +| `python craftbot.py uninstall` | 停止 CraftBot、注销自启、卸载 pip 包并清理 pip 缓存 | + +> [!TIP] +> 执行 `craftbot.py start` 或 `craftbot.py install` 后,系统会自动创建 **CraftBot 桌面快捷方式**。如果不小心关闭了浏览器,双击快捷方式即可重新打开。 + > [!NOTE] > **安装:** 安装器会在缺少依赖时提供清晰的指引。如果未找到 Node.js,会提示你安装或切换到 TUI 模式。安装会自动检测 GPU 可用性,必要时回退到仅 CPU 模式。 @@ -283,15 +400,12 @@ Playwright chromium 安装是可选的。如果失败: - 可跳过或稍后安装:`playwright install chromium` - 仅 WhatsApp Web 集成需要 -### GPU/CUDA 问题 -安装器会自动检测 GPU 可用性: -- 如果 CUDA 安装失败,会自动回退到 CPU 模式 -- 手动 CPU 设置:`python install.py --gui --cpu-only` - 详细故障排除,请参阅 [INSTALLATION_FIX.md](INSTALLATION_FIX.md)。 --- +## 🔌 外部服务集成 + 代理可以使用 OAuth 连接各种服务。正式版本附带嵌入式凭据,但你也可以使用自己的凭据。 ### 快速开始 @@ -366,7 +480,7 @@ LINKEDIN_CLIENT_SECRET=your-linkedin-client-secret 4. 复制 Client ID 和 Client Secret --- -## 使用容器运行 +## 🐳 使用容器运行 仓库根目录包含 Docker 配置:使用 Python 3.10、关键系统依赖(包含用于 OCR 的 Tesseract),以及在 `environment.yml`/`requirements.txt` 中定义的所有 Python 库,从而让代理在隔离环境中保持一致运行。 @@ -394,29 +508,7 @@ docker run --rm -it craftbot docker run --rm -it --env-file .env craftbot ``` -使用 `-v` 挂载需要在容器外持久化的目录(例如数据或缓存文件夹),并根据部署需要调整端口或额外参数。该容器内置 OCR(`tesseract`)、屏幕自动化(`pyautogui`、`mss`、X11 工具与虚拟帧缓冲)以及常见 HTTP 客户端等系统依赖,使代理能够在容器中处理文件、网络 API 与 GUI 自动化。 - -### 启用 GUI/屏幕自动化 - -GUI 操作(鼠标/键盘事件、截图)需要 X11 服务器。你可以连接宿主机显示,或使用 `xvfb` 无头运行: - -* 使用宿主机显示(需要带 X11 的 Linux): - - ```bash - docker run --rm -it - -e DISPLAY=$DISPLAY \ - -v /tmp/.X11-unix:/tmp/.X11-unix \ - -v $(pwd)/data:/app/app/data \ - craftbot - ``` - - 如需让代理读写更多目录,可添加额外的 `-v` 挂载。 - -* 使用虚拟显示进行无头运行: - - ```bash - docker run --rm -it --env-file .env craftbot bash -lc "Xvfb :99 -screen 0 1920x1080x24 & export DISPLAY=:99 && exec python -m app.main" - ``` +使用 `-v` 挂载需要在容器外持久化的目录(例如数据或缓存文件夹),并根据部署需要调整端口或额外参数。该容器内置 OCR(`tesseract`)以及常见 HTTP 客户端等系统依赖,使代理能够在容器中处理文件与网络 API。 默认情况下镜像会使用 Python 3.10,并打包了 `environment.yml`/`requirements.txt` 中的 Python 依赖,因此 `python -m app.main` 可开箱即用。 @@ -424,7 +516,7 @@ GUI 操作(鼠标/键盘事件、截图)需要 X11 服务器。你可以连 ## 🤝 如何贡献 -欢迎各种建议与反馈!你可以联系 [@zfoong](https://github.com/zfoong),邮箱为 thamyikfoong(at)craftos.net。我们目前尚未配置检查流程,因此无法接受直接提交贡献,但非常感谢你的建议与反馈。 +欢迎提交 PR!请参阅 [CONTRIBUTING.md](CONTRIBUTING.md) 了解工作流程(fork → 从 `dev` 分支新建分支 → 提交 PR)。所有 Pull Request 都会自动运行 lint + 烟雾测试 CI。如需快速沟通,可加入我们的 [Discord](https://discord.gg/ZN9YHc37HG) 或发送邮件至 thamyikfoong(at)craftos.net。 ## 🧾 许可证 @@ -436,3 +528,15 @@ GUI 操作(鼠标/键盘事件、截图)需要 X11 服务器。你可以连 由 [CraftOS](https://craftos.net/) 与贡献者 [@zfoong](https://github.com/zfoong) 及 [@ahmad-ajmal](https://github.com/ahmad-ajmal) 开发与维护。 如果你觉得 **CraftBot** 有用,请给仓库点一个 ⭐ 并分享给更多人! + +--- + +## Star History + + + + + + Star History Chart + + diff --git a/README.de.md b/README.de.md new file mode 100644 index 00000000..75e3a546 --- /dev/null +++ b/README.de.md @@ -0,0 +1,558 @@ + +
+ CraftBot Banner +
+
+ +
+ Windows + macOS + Linux + + + GitHub Repo stars + + + License + + + Discord + +
+
+ +[![SPONSORED BY E2B FOR STARTUPS](https://img.shields.io/badge/SPONSORED%20BY-E2B%20FOR%20STARTUPS-ff8800?style=for-the-badge)](https://e2b.dev/startups) + +CraftBot - Self-hosted proactive AI assistant that lives locally | Product Hunt +
+ +

+ English | 日本語 | 简体中文 | 繁體中文 | 한국어 | Español | Português | Français +

+ +## 🚀 Überblick +

+CraftBot ist dein persönlicher KI-Assistent, der auf deinem Rechner lebt und rund um die Uhr für dich arbeitet. +

+ +Er interpretiert Aufgaben autonom, plant Aktionen und führt sie aus, um deine Ziele zu erreichen. +Er lernt deine Vorlieben und Ziele kennen und hilft dir proaktiv dabei, Aufgaben zu planen und anzustoßen, damit du deine Lebensziele erreichst. +MCPs, Skills und Integrationen mit externen Apps werden unterstützt. + +CraftBot wartet auf deine Befehle. Richte jetzt deinen eigenen CraftBot ein. + +
+ CraftBot Overview +
+ +--- + +## ✨ Funktionen + +- **Bring Your Own Key (BYOK)** — Flexibles LLM-Provider-System mit Unterstützung für OpenAI, Google Gemini, Anthropic Claude, BytePlus und lokale Ollama-Modelle. Wechsle Anbieter mühelos. +- **Speichersystem** — Destilliert und konsolidiert um Mitternacht die Ereignisse des Tages. +- **Proaktiver Agent** — Lernt deine Vorlieben, Gewohnheiten und Lebensziele kennen. Anschließend plant er und startet (selbstverständlich nach Freigabe) Aufgaben, die dir beim Fortschritt helfen. +- **Living UI** — Baue, importiere oder entwickle eigene Apps weiter, die in CraftBot leben. Der Agent behält den UI-Zustand stets im Blick und kann deren Daten direkt lesen, schreiben und verarbeiten. +- **Externe Tool-Integration** — Verbinde dich mit Google Workspace, Slack, Notion, Zoom, LinkedIn, Discord und Telegram (weitere folgen!) mit eingebetteten Zugangsdaten und OAuth-Unterstützung. +- **MCP** — Integration des Model Context Protocol, um die Fähigkeiten des Agents um externe Tools und Dienste zu erweitern. +- **Skills** — Erweiterbares Skill-Framework mit eingebauten Skills für Aufgabenplanung, Recherche, Code-Reviews, Git-Operationen und mehr. +- **Plattformübergreifend** — Vollständige Unterstützung für Windows, macOS und Linux mit plattformspezifischen Code-Varianten und Docker-Containerisierung. + +> [!IMPORTANT] +> **Der GUI-Modus ist veraltet.** CraftBot unterstützt den GUI-Modus (Desktop-Automatisierung) nicht mehr. Bitte verwende stattdessen den Browser-, TUI- oder CLI-Modus. + +
+ CraftBot Banner + CraftBot Banner +
+ +--- + + +## 🧰 Erste Schritte + +### Voraussetzungen +- Python **3.10+** +- `git` (erforderlich zum Klonen des Repositorys) +- Ein API-Schlüssel für den gewählten LLM-Anbieter (OpenAI, Gemini oder Anthropic) +- `Node.js` **18+** (optional – nur für die Browser-Oberfläche erforderlich) +- `conda` (optional – wenn nicht vorhanden, bietet das Installationsprogramm an, Miniconda automatisch zu installieren) + +### Welches Setup soll ich verwenden? + +> **Nicht sicher? Nimm Option 1.** Sie erledigt alles für dich. + +| | Option 1 — Service | Option 2 — Conda | Option 3 — Manuell | +|---|---|---|---| +| **Für wen** | Die meisten Nutzer, Einsteiger, Tests | Conda-Nutzer, die isolierte Umgebungen wollen | Fortgeschrittene Nutzer, benutzerdefiniertes Python, volle Kontrolle | +| **Python/Umgebung automatisch verwalten?** | ✅ Automatisch | ✅ Automatisch | ❌ Du verwaltest es | +| **Läuft im Hintergrund?** | ✅ Ja, als Dienst | ❌ Nein | ❌ Nein | +| **Wie starten** | `python craftbot.py install` | `python install.py --conda` | `python install.py` | + +--- + +### ⭐ Option 1 — Service-Installation (Empfohlen) + +**Wähle dies, wenn:** du möchtest, dass CraftBot einfach funktioniert — Hintergrunddienst, automatischer Start beim Login, Desktop-Verknüpfung, keine manuellen Schritte. + +`craftbot.py` übernimmt alles: Python-Umgebung, Abhängigkeiten, Hintergrundprozess-Verwaltung und Autostart-Registrierung. + +```bash +# 1. Repository klonen +git clone https://github.com/CraftOS-dev/CraftBot.git +cd CraftBot + +# 2. Installieren, Autostart registrieren und CraftBot starten +python craftbot.py install +``` + +Das war's. Das Terminal schließt sich von selbst, CraftBot läuft im Hintergrund und der Browser öffnet sich automatisch. Eine **Desktop-Verknüpfung** wird erstellt, damit du den Browser jederzeit wieder öffnen kannst. + +**Dienstverwaltung nach der Installation:** + +```bash +python craftbot.py start # CraftBot im Hintergrund starten +python craftbot.py stop # CraftBot stoppen +python craftbot.py restart # CraftBot neu starten +python craftbot.py status # Prüfen ob er läuft und ob Autostart aktiviert ist +python craftbot.py logs # Aktuelle Logs ansehen +python craftbot.py uninstall # Stoppen, Autostart entfernen, Pakete deinstallieren +``` + +> [!TIP] +> Nach `install` oder `start` wird automatisch eine **CraftBot-Desktop-Verknüpfung** erstellt. Hast du den Browser versehentlich geschlossen, doppelklicke die Verknüpfung, um ihn wieder zu öffnen. + +--- + +### Option 2 — Conda-Installation + +**Wähle dies, wenn:** du bereits conda verwendest und CraftBot in einer isolierten conda-Umgebung betreiben möchtest. + +`install.py --conda` richtet eine dedizierte `craftbot`-conda-Umgebung ein. Falls Miniconda auf deinem System nicht gefunden wird, wird es automatisch installiert. + +```bash +# 1. Repository klonen +git clone https://github.com/CraftOS-dev/CraftBot.git +cd CraftBot + +# 2. In einer conda-Umgebung installieren +python install.py --conda + +# 3. CraftBot ausführen +conda run -n craftbot python run.py + +# Falls conda nicht im PATH ist (nur Windows): +&"$env:USERPROFILE\miniconda3\Scripts\conda.exe" run -n craftbot python run.py +``` + +> [!NOTE] +> Jedes Mal wenn du CraftBot starten möchtest, verwende `conda run -n craftbot python run.py`. Es gibt keinen Hintergrunddienst — du startest und stoppst ihn selbst. + +--- + +### Option 3 — Manuelle Installation (pip) + +**Wähle dies, wenn:** du volle Kontrolle über deine Python-Umgebung möchtest und CraftBot lieber selbst verwaltest, ohne automatischen Dienst oder Hintergrundprozess. + +`install.py` (ohne Optionen) führt eine Standard-pip-Installation in der aktuell aktiven Python-Umgebung durch. Du startest und stoppst CraftBot manuell mit `run.py`. + +```bash +# 1. Repository klonen +git clone https://github.com/CraftOS-dev/CraftBot.git +cd CraftBot + +# 2. Abhängigkeiten in der aktiven Python-Umgebung installieren +python install.py + +# 3. CraftBot starten +python run.py +``` + +Beim ersten Start wirst du durch die Einrichtung deiner API-Schlüssel und Einstellungen geführt. + +> [!NOTE] +> Wenn Node.js nicht installiert ist, führt dich das Installationsprogramm Schritt für Schritt durch die Installation. Du kannst den Browser-Modus auch vollständig überspringen und den TUI-Modus verwenden — kein Node.js nötig: `python run.py --tui` + +### Was kannst du direkt danach tun? +- Natürlich mit dem Agent sprechen +- Ihn komplexe, mehrstufige Aufgaben ausführen lassen +- `/help` eingeben, um verfügbare Befehle zu sehen +- Dich mit Google, Slack, Notion und mehr verbinden + +### 🖥️ Schnittstellenmodi + +
+ CraftOS Banner +
+ +CraftBot unterstützt mehrere UI-Modi. Wähle nach deinen Vorlieben: + +| Modus | Befehl | Voraussetzungen | Empfohlen für | +|------|---------|--------------|----------| +| **Browser** | `python run.py` | Node.js 18+ | Moderne Web-Oberfläche, am einfachsten | +| **TUI** | `python run.py --tui` | Keine | Terminal-UI, ohne Abhängigkeiten | +| **CLI** | `python run.py --cli` | Keine | Kommandozeile, leichtgewichtig | + +Der **Browser-Modus** ist Standard und wird empfohlen. Ohne Node.js gibt dir das Installationsprogramm eine Anleitung – alternativ kannst du den **TUI-Modus** nutzen. + +--- + +## 🧬 Living UI + +**Living UI ist ein System/App/Dashboard, das mit deinen Anforderungen wächst.** + +Brauchst du ein Kanban-Board mit eingebautem KI-Copiloten? Ein individuelles CRM, +das exakt zu deinem Workflow passt? Ein Unternehmens-Dashboard, das CraftBot +lesen und für dich steuern kann? Bring es als Living UI an den Start — es läuft +neben CraftBot und wächst mit deinen Anforderungen. + +
+ Living UI example +
+ +### Drei Wege, eine Living UI zu erstellen + +1. **Von Grund auf bauen.** Beschreibe in natürlicher Sprache, was du möchtest. + CraftBot legt Datenmodell, Backend-API und React-UI an und iteriert mit dir + in einem strukturierten Designprozess. + +
+ Building a Living UI from scratch +
+ +2. **Aus dem Marketplace installieren.** Stöbere durch Community-erstellte Living UIs auf [living-ui-marketplace](https://github.com/CraftOS-dev/living-ui-marketplace). + +
+ Living UI marketplace +
+ +3. **Bestehendes Projekt importieren.** Verweise CraftBot auf Quellcode oder ein + GitHub-Repository in Go, Node.js, Python, Rust oder statischem Web. Die Runtime + wird erkannt, Health-Checks werden eingerichtet, und das Projekt wird als + Living UI eingebunden. + +
+ Importing an existing project as a Living UI +
+ +### Entwickelt sich weiter — mit CraftBot im Loop + +Eine Living UI ist nie „fertig". Bitte den Agent, Funktionen hinzuzufügen, eine +Ansicht neu zu gestalten oder sie an neue Daten anzubinden, sobald deine +Anforderungen wachsen. + +CraftBot ist in jeder Living UI eingebettet und **kennt deren Zustand**: +Er kann das aktuelle DOM und Formularwerte lesen, App-Daten über die +REST-API abfragen und in deinem Namen Aktionen auslösen. + +--- + +## 🧩 Architekturüberblick + +| Komponente | Beschreibung | +|-----------|-------------| +| **Agent Base** | Zentrale Orchestrierungsschicht, die den Task-Lifecycle verwaltet, zwischen Komponenten koordiniert und die Haupt-Agenten-Schleife steuert. | +| **LLM Interface** | Einheitliche Schnittstelle mit Unterstützung mehrerer LLM-Anbieter (OpenAI, Gemini, Anthropic, BytePlus, Ollama). | +| **Context Engine** | Erzeugt optimierte Prompts mit KV-Cache-Unterstützung. | +| **Action Manager** | Ruft Aktionen aus der Bibliothek ab und führt sie aus. Eigene Aktionen lassen sich leicht erweitern. | +| **Action Router** | Wählt intelligent die am besten passende Aktion auf Basis der Task-Anforderungen und löst Eingabeparameter bei Bedarf über das LLM auf. | +| **Event Stream** | Echtzeit-Event-Publishing-System für Fortschrittsverfolgung, UI-Updates und Ausführungs-Monitoring. | +| **Memory Manager** | RAG-basiertes semantisches Gedächtnis mit ChromaDB. Übernimmt Memory-Chunking, Embedding, Retrieval und inkrementelle Updates. | +| **State Manager** | Globales State-Management zur Verfolgung von Ausführungskontext, Gesprächshistorie und Laufzeitkonfiguration. | +| **Task Manager** | Verwaltet Task-Definitionen, ermöglicht einfache und komplexe Task-Modi, erstellt To-dos und verfolgt mehrstufige Workflows. | +| **Skill Manager** | Lädt einsteckbare Skills und injiziert sie in den Agent-Kontext. | +| **MCP Adapter** | Model Context Protocol Integration, die MCP-Tools in native Aktionen umwandelt. | +| **TUI Interface** | Textual-basierte Terminal-Benutzeroberfläche für interaktive Kommandozeilennutzung. | + +--- + +## 🔜 Roadmap + +- [X] **Memory-Modul** — Fertig. +- [ ] **Externe Tool-Integration** — Wir fügen noch weitere hinzu! +- [X] **MCP-Schicht** — Fertig. +- [X] **Skill-Schicht** — Fertig. +- [ ] **Proaktives Verhalten** — In Arbeit + +--- + +## 📋 Befehlsreferenz + +### install.py + +| Flag | Beschreibung | +|------|-------------| +| `--conda` | conda-Umgebung nutzen (optional) | + +### run.py + +| Flag | Beschreibung | +|------|-------------| +| (keines) | Im **Browser**-Modus ausführen (empfohlen, Node.js erforderlich) | +| `--tui` | Im **Terminal-UI**-Modus ausführen (keine Abhängigkeiten nötig) | +| `--cli` | Im **CLI**-Modus ausführen (leichtgewichtig) | + +### craftbot.py + +| Befehl | Beschreibung | +|---------|-------------| +| `install` | Abhängigkeiten installieren, Autostart registrieren und CraftBot starten | +| `start` | CraftBot im Hintergrund starten | +| `stop` | CraftBot stoppen | +| `restart` | Stoppen und neu starten | +| `status` | Laufstatus und Autostart-Status anzeigen | +| `logs [-n N]` | Die letzten N Log-Zeilen anzeigen (Standard: 50) | +| `uninstall` | Autostart-Registrierung entfernen | + +**Installationsbeispiele:** +```bash +# Einfache pip-Installation (ohne conda) +python install.py + +# Mit conda-Umgebung (empfohlen für conda-Nutzer) +python install.py --conda +``` + +**CraftBot ausführen:** + +```powershell +# Browser-Modus (Standard, Node.js erforderlich) +python run.py + +# TUI-Modus (kein Node.js nötig) +python run.py --tui + +# CLI-Modus (leichtgewichtig) +python run.py --cli + +# Mit conda-Umgebung +conda run -n craftbot python run.py + +# Oder mit vollständigem Pfad, falls conda nicht im PATH ist +&"$env:USERPROFILE\miniconda3\Scripts\conda.exe" run -n craftbot python run.py +``` + +**Linux/macOS (Bash):** +```bash +# Browser-Modus (Standard, Node.js erforderlich) +python run.py + +# TUI-Modus (kein Node.js nötig) +python run.py --tui + +# CLI-Modus (leichtgewichtig) +python run.py --cli + +# Mit conda-Umgebung +conda run -n craftbot python run.py +``` + +### 🔧 Hintergrunddienst (empfohlen) + +Betreibe CraftBot als Hintergrunddienst, sodass er auch nach dem Schließen des Terminals weiterläuft. Eine Desktop-Verknüpfung wird automatisch erstellt, damit du den Browser jederzeit wieder öffnen kannst. + +```bash +# Abhängigkeiten installieren, Autostart bei Anmeldung registrieren und CraftBot starten +python craftbot.py install +``` + +Das war's. Das Terminal schließt sich von selbst, CraftBot läuft im Hintergrund und der Browser öffnet sich automatisch. + +```bash +# Weitere Dienstbefehle: +python craftbot.py start # CraftBot im Hintergrund starten +python craftbot.py status # Prüfen, ob er läuft +python craftbot.py stop # CraftBot stoppen +python craftbot.py restart # CraftBot neu starten +python craftbot.py logs # Aktuelle Log-Ausgabe ansehen +``` + +| Befehl | Beschreibung | +|---------|-------------| +| `python craftbot.py install` | Abhängigkeiten installieren, Autostart bei Anmeldung registrieren, CraftBot starten, Browser öffnen und Terminal automatisch schließen | +| `python craftbot.py start` | CraftBot im Hintergrund starten – startet automatisch neu, wenn er bereits läuft (Terminal schließt sich selbst) | +| `python craftbot.py stop` | CraftBot stoppen | +| `python craftbot.py restart` | CraftBot stoppen und starten | +| `python craftbot.py status` | Prüfen, ob CraftBot läuft und ob Autostart aktiviert ist | +| `python craftbot.py logs` | Aktuelle Log-Ausgabe anzeigen (`-n 100` für mehr Zeilen) | +| `python craftbot.py uninstall` | CraftBot stoppen, Autostart entfernen, pip-Pakete deinstallieren und pip-Cache leeren | + +> [!TIP] +> Nach `craftbot.py start` oder `craftbot.py install` wird automatisch eine **CraftBot-Desktop-Verknüpfung** erstellt. Hast du den Browser versehentlich geschlossen, doppelklicke die Verknüpfung, um ihn wieder zu öffnen. + +> [!NOTE] +> **Installation:** Das Installationsprogramm gibt nun klare Hinweise, falls Abhängigkeiten fehlen. Wird Node.js nicht gefunden, wirst du zur Installation aufgefordert oder kannst in den TUI-Modus wechseln. Die Installation erkennt die GPU-Verfügbarkeit automatisch und fällt bei Bedarf auf den CPU-Modus zurück. + +> [!TIP] +> **Ersteinrichtung:** CraftBot führt dich durch einen Onboarding-Ablauf, um API-Schlüssel, den Agentennamen, MCPs und Skills zu konfigurieren. + +> [!NOTE] +> **Playwright Chromium:** Optional für die WhatsApp-Web-Integration. Schlägt die Installation fehl, funktioniert der Agent weiterhin für andere Aufgaben. Manuell nachinstallieren mit: `playwright install chromium` + +--- + +## 🔧 Fehlerbehebung und häufige Probleme + +### Fehlendes Node.js (für den Browser-Modus) +Erscheint **"npm not found in PATH"** beim Ausführen von `python run.py`: +1. Von [nodejs.org](https://nodejs.org/) herunterladen (LTS-Version wählen) +2. Installieren und das Terminal neu starten +3. `python run.py` erneut ausführen + +**Alternative:** TUI-Modus verwenden (kein Node.js nötig): +```bash +python run.py --tui +``` + +### Installation schlägt bei Abhängigkeiten fehl +Das Installationsprogramm liefert jetzt detaillierte Fehlermeldungen mit Lösungen. Wenn die Installation fehlschlägt: +- **Python-Version prüfen:** Stelle sicher, dass du Python 3.10+ hast (`python --version`) +- **Internet prüfen:** Abhängigkeiten werden während der Installation heruntergeladen +- **pip-Cache leeren:** `pip install --upgrade pip` ausführen und erneut versuchen + +### Probleme bei der Playwright-Installation +Die Playwright-Chromium-Installation ist optional. Bei einem Fehlschlag: +- Der Agent **funktioniert weiterhin** für andere Aufgaben +- Du kannst ihn überspringen oder später installieren: `playwright install chromium` +- Nur für die WhatsApp-Web-Integration erforderlich + +Ausführliche Hinweise zur Fehlerbehebung findest du in [INSTALLATION_FIX.md](INSTALLATION_FIX.md). + +--- + +## 🔌 Externe Dienste integrieren + +Der Agent kann sich über OAuth mit verschiedenen Diensten verbinden. Release-Builds enthalten eingebettete Zugangsdaten, du kannst aber auch deine eigenen verwenden. + +### Schnellstart + +Für Release-Builds mit eingebetteten Zugangsdaten: +``` +/google login # Google Workspace verbinden +/zoom login # Zoom verbinden +/slack invite # Slack verbinden +/notion invite # Notion verbinden +/linkedin login # LinkedIn verbinden +``` + +### Dienst-Details + +| Dienst | Auth-Typ | Befehl | Secret nötig? | +|---------|-----------|---------|------------------| +| Google | PKCE | `/google login` | Nein (PKCE) | +| Zoom | PKCE | `/zoom login` | Nein (PKCE) | +| Slack | OAuth 2.0 | `/slack invite` | Ja | +| Notion | OAuth 2.0 | `/notion invite` | Ja | +| LinkedIn | OAuth 2.0 | `/linkedin login` | Ja | + +### Eigene Zugangsdaten verwenden + +Möchtest du deine eigenen OAuth-Zugangsdaten verwenden, trage sie in deine `.env`-Datei ein: + +#### Google (PKCE – nur Client ID nötig) +```bash +GOOGLE_CLIENT_ID=your-client-id.apps.googleusercontent.com +``` +1. Gehe zur [Google Cloud Console](https://console.cloud.google.com/) +2. Aktiviere die APIs für Gmail, Calendar, Drive und People +3. Erstelle OAuth-Zugangsdaten vom Typ **Desktop app** +4. Kopiere die Client ID (für PKCE ist kein Secret nötig) + +#### Zoom (PKCE – nur Client ID nötig) +```bash +ZOOM_CLIENT_ID=your-zoom-client-id +``` +1. Gehe zum [Zoom Marketplace](https://marketplace.zoom.us/) +2. Erstelle eine OAuth-App +3. Kopiere die Client ID + +#### Slack (beides erforderlich) +```bash +SLACK_SHARED_CLIENT_ID=your-slack-client-id +SLACK_SHARED_CLIENT_SECRET=your-slack-client-secret +``` +1. Gehe zur [Slack API](https://api.slack.com/apps) +2. Erstelle eine neue App +3. Füge OAuth-Scopes hinzu: `chat:write`, `channels:read`, `users:read` usw. +4. Kopiere Client ID und Client Secret + +#### Notion (beides erforderlich) +```bash +NOTION_SHARED_CLIENT_ID=your-notion-client-id +NOTION_SHARED_CLIENT_SECRET=your-notion-client-secret +``` +1. Gehe zu [Notion Developers](https://developers.notion.com/) +2. Erstelle eine neue Integration (Public integration) +3. Kopiere OAuth Client ID und Secret + +#### LinkedIn (beides erforderlich) +```bash +LINKEDIN_CLIENT_ID=your-linkedin-client-id +LINKEDIN_CLIENT_SECRET=your-linkedin-client-secret +``` +1. Gehe zu [LinkedIn Developers](https://developer.linkedin.com/) +2. Erstelle eine App +3. Füge OAuth-2.0-Scopes hinzu +4. Kopiere Client ID und Client Secret + +--- +## 🐳 Mit Container ausführen + +Das Repository-Root enthält eine Docker-Konfiguration mit Python 3.10, wichtigen Systempaketen (inklusive Tesseract für OCR) und allen in `environment.yml`/`requirements.txt` definierten Python-Abhängigkeiten, damit der Agent konsistent in isolierten Umgebungen läuft. + +Nachfolgend die Einrichtungsanleitung, um unseren Agent mit Container auszuführen. + +### Image bauen + +Im Repository-Root: + +```bash +docker build -t craftbot . +``` + +### Container ausführen + +Das Image ist so konfiguriert, dass der Agent standardmäßig mit `python -m app.main` gestartet wird. Für eine interaktive Ausführung: + +```bash +docker run --rm -it craftbot +``` + +Wenn du Umgebungsvariablen bereitstellen musst, übergib eine env-Datei (z. B. basierend auf `.env.example`): + +```bash +docker run --rm -it --env-file .env craftbot +``` + +Mounte alle Verzeichnisse, die außerhalb des Containers persistent sein sollen (etwa Daten- oder Cache-Ordner), mit `-v`, und passe Ports oder weitere Flags nach Bedarf an dein Deployment an. Das Image enthält Systemabhängigkeiten für OCR (`tesseract`) sowie gängige HTTP-Clients, damit der Agent im Container mit Dateien und Netzwerk-APIs arbeiten kann. + +Standardmäßig nutzt das Image Python 3.10 und bündelt die Python-Abhängigkeiten aus `environment.yml`/`requirements.txt`, sodass `python -m app.main` sofort funktioniert. + +--- + +## 🤝 Mitwirken + +PRs sind willkommen! Siehe [CONTRIBUTING.md](CONTRIBUTING.md) für den Workflow (Fork → Branch von `dev` → PR). Alle Pull Requests durchlaufen automatisch Lint- und Smoke-Test-CI. Für Fragen oder schnelleren Austausch komm auf unseren [Discord](https://discord.gg/ZN9YHc37HG) oder schreib an thamyikfoong(at)craftos.net. + +## 🧾 Lizenz + +Dieses Projekt steht unter der [MIT-Lizenz](LICENSE). Du darfst das Projekt frei nutzen, hosten und monetarisieren (bei Weiterverbreitung und Monetarisierung muss dieses Projekt genannt werden). + +--- + +## ⭐ Danksagung + +Entwickelt und gepflegt von [CraftOS](https://craftos.net/) sowie den Contributors [@zfoong](https://github.com/zfoong) und [@ahmad-ajmal](https://github.com/ahmad-ajmal). +Wenn dir **CraftBot** nützlich ist, gib dem Repository bitte einen ⭐ und teile es mit anderen! + +--- + +## Star History + + + + + + Star History Chart + + diff --git a/README.es.md b/README.es.md new file mode 100644 index 00000000..adde9286 --- /dev/null +++ b/README.es.md @@ -0,0 +1,556 @@ + +
+ CraftBot Banner +
+
+ +
+ Windows + macOS + Linux + + + GitHub Repo stars + + + License + + + Discord + +
+
+ +[![SPONSORED BY E2B FOR STARTUPS](https://img.shields.io/badge/SPONSORED%20BY-E2B%20FOR%20STARTUPS-ff8800?style=for-the-badge)](https://e2b.dev/startups) + +CraftBot - Self-hosted proactive AI assistant that lives locally | Product Hunt +
+ +

+ English | 日本語 | 简体中文 | 繁體中文 | 한국어 | Português | Français | Deutsch +

+ +## 🚀 Descripción general +

+CraftBot es tu Asistente de IA Personal que vive dentro de tu máquina y trabaja 24/7 para ti. +

+ +Interpreta tareas de forma autónoma, planifica acciones y las ejecuta para alcanzar tus objetivos. +Aprende tus preferencias y metas, y te ayuda de manera proactiva a planificar e iniciar tareas para cumplir tus objetivos de vida. +Soporta MCP, Skills e integraciones con apps externas. + +CraftBot espera tus órdenes. Configura tu propio CraftBot ahora. + +
+ CraftBot Overview +
+ +--- + +## ✨ Características + +- **Bring Your Own Key (BYOK)** — Sistema flexible de proveedores de LLM con soporte para OpenAI, Google Gemini, Anthropic Claude, BytePlus y modelos locales de Ollama. Cambia entre proveedores fácilmente. +- **Sistema de Memoria** — Destila y consolida los eventos del día cada medianoche. +- **Agente Proactivo** — Aprende tus preferencias, hábitos y metas de vida. Luego planifica e inicia tareas (con tu aprobación, por supuesto) para ayudarte a mejorar en la vida. +- **Living UI** — Crea, importa o evoluciona aplicaciones personalizadas que viven dentro de CraftBot. El agente es consciente del estado de la UI y puede leer, escribir y actuar sobre sus datos directamente. +- **Integración con herramientas externas** — Conéctate a Google Workspace, Slack, Notion, Zoom, LinkedIn, Discord y Telegram (¡vendrán más!) con credenciales integradas y soporte OAuth. +- **MCP** — Integración con Model Context Protocol para ampliar las capacidades del agente con herramientas y servicios externos. +- **Skills** — Framework de skills extensible con skills integradas para planificación de tareas, investigación, revisión de código, operaciones de git y más. +- **Multiplataforma** — Soporte completo para Windows, macOS y Linux con variantes de código específicas por plataforma y contenedorización con Docker. + +> [!IMPORTANT] +> **El modo GUI está obsoleto.** CraftBot ya no admite el modo GUI (automatización de escritorio). Usa en su lugar el modo Browser, TUI o CLI. + +
+ CraftBot Banner + CraftBot Banner +
+ +--- + + +## 🧰 Primeros pasos + +### Requisitos previos +- Python **3.10+** +- `git` (necesario para clonar el repositorio) +- Una clave API del proveedor de LLM que elijas (OpenAI, Gemini o Anthropic) +- `Node.js` **18+** (opcional — solo necesario para la interfaz del navegador) +- `conda` (opcional — si no se encuentra, el instalador ofrece instalar Miniconda automáticamente) + +### ¿Qué opción debo elegir? + +> **¿No estás seguro? Usa la Opción 1.** Se encarga de todo por ti. + +| | Opción 1 — Servicio | Opción 2 — Conda | Opción 3 — Manual | +|---|---|---|---| +| **Para quién** | La mayoría de usuarios, principiantes, pruebas | Usuarios de Conda que quieren entornos aislados | Usuarios avanzados, Python personalizado, control total | +| **¿Gestiona Python/entorno automáticamente?** | ✅ Automático | ✅ Automático | ❌ Lo gestionas tú | +| **¿Corre en segundo plano?** | ✅ Sí, como servicio | ❌ No | ❌ No | +| **Cómo empezar** | `python craftbot.py install` | `python install.py --conda` | `python install.py` | + +--- + +### ⭐ Opción 1 — Instalación como servicio (Recomendada) + +**Elige esta si:** quieres que CraftBot simplemente funcione — servicio en segundo plano, inicio automático al arrancar, acceso directo en el escritorio, sin pasos manuales. + +`craftbot.py` se encarga de todo: entorno Python, dependencias, gestión de procesos en segundo plano y registro de inicio automático. + +```bash +# 1. Clona el repositorio +git clone https://github.com/CraftOS-dev/CraftBot.git +cd CraftBot + +# 2. Instala, registra el inicio automático y lanza CraftBot +python craftbot.py install +``` + +Eso es todo. El terminal se cierra solo, CraftBot se ejecuta en segundo plano y el navegador se abre automáticamente. Se crea un **acceso directo en el escritorio** para que puedas abrir el navegador en cualquier momento. + +**Gestión del servicio tras la instalación:** + +```bash +python craftbot.py start # Iniciar CraftBot en segundo plano +python craftbot.py stop # Detener CraftBot +python craftbot.py restart # Reiniciar CraftBot +python craftbot.py status # Comprobar si está en ejecución y si el inicio automático está activado +python craftbot.py logs # Ver el registro reciente +python craftbot.py uninstall # Detener, quitar inicio automático y desinstalar paquetes +``` + +> [!TIP] +> Tras `install` o `start`, se crea automáticamente un **acceso directo de CraftBot en el escritorio**. Si cierras el navegador, haz doble clic en el acceso directo para reabrirlo. + +--- + +### Opción 2 — Instalación con Conda + +**Elige esta si:** ya usas conda y quieres CraftBot en un entorno conda aislado. + +`install.py --conda` configura un entorno conda dedicado `craftbot`. Si no se encuentra Miniconda en tu sistema, se instalará automáticamente. + +```bash +# 1. Clona el repositorio +git clone https://github.com/CraftOS-dev/CraftBot.git +cd CraftBot + +# 2. Instala en un entorno conda +python install.py --conda + +# 3. Ejecuta CraftBot +conda run -n craftbot python run.py + +# Si conda no está en PATH (solo Windows): +&"$env:USERPROFILE\miniconda3\Scripts\conda.exe" run -n craftbot python run.py +``` + +> [!NOTE] +> Cada vez que quieras ejecutar CraftBot, usa `conda run -n craftbot python run.py`. No hay servicio en segundo plano — lo inicias y detienes tú mismo. + +--- + +### Opción 3 — Instalación manual (pip) + +**Elige esta si:** quieres control total sobre tu entorno Python y prefieres gestionar CraftBot tú mismo, sin servicio automático ni proceso en segundo plano. + +`install.py` (sin opciones) hace una instalación pip estándar en el entorno Python activo. Inicias y detienes CraftBot manualmente con `run.py`. + +```bash +# 1. Clona el repositorio +git clone https://github.com/CraftOS-dev/CraftBot.git +cd CraftBot + +# 2. Instala las dependencias en tu entorno Python activo +python install.py + +# 3. Ejecuta CraftBot +python run.py +``` + +La primera ejecución te guiará para configurar tus claves API y preferencias. + +> [!NOTE] +> Si Node.js no está instalado, el instalador te ofrecerá instrucciones paso a paso. También puedes omitir completamente el modo navegador y usar el modo TUI — sin Node.js: `python run.py --tui` + +### ¿Qué puedes hacer justo después? +- Hablar con el agente de forma natural +- Pedirle que realice tareas complejas de varios pasos +- Escribir `/help` para ver los comandos disponibles +- Conectarte a Google, Slack, Notion y más + +### 🖥️ Modos de interfaz + +
+ CraftOS Banner +
+ +CraftBot soporta varios modos de UI. Elige según tu preferencia: + +| Modo | Comando | Requisitos | Recomendado para | +|------|---------|--------------|----------| +| **Browser** | `python run.py` | Node.js 18+ | Interfaz web moderna, la más sencilla de usar | +| **TUI** | `python run.py --tui` | Ninguno | UI en terminal, sin dependencias adicionales | +| **CLI** | `python run.py --cli` | Ninguno | Línea de comandos, ligero | + +El **modo navegador** es el predeterminado y recomendado. Si no tienes Node.js, el instalador te ofrecerá instrucciones de instalación o puedes usar el **modo TUI** en su lugar. + +--- + +## 🧬 Living UI + +**Living UI es un sistema/app/panel que evoluciona con tus necesidades.** + +¿Necesitas un tablero kanban con un copiloto de IA integrado? ¿Un CRM personalizado que se +ajuste exactamente a tu flujo de trabajo? ¿Un panel de empresa que CraftBot pueda leer y +manejar por ti? Pon uno en marcha como Living UI: se ejecuta junto a CraftBot y crece a +medida que cambian tus necesidades. + +
+ Living UI example +
+ +### Tres formas de crear una Living UI + +1. **Construir desde cero.** Describe lo que quieres en lenguaje natural. CraftBot + monta el modelo de datos, la API del backend y la interfaz React, e itera contigo + a través de un proceso de diseño estructurado. + +
+ Building a Living UI from scratch +
+ +2. **Instalar desde el marketplace.** Explora Living UIs creadas por la comunidad en [living-ui-marketplace](https://github.com/CraftOS-dev/living-ui-marketplace). + +
+ Living UI marketplace +
+ +3. **Importar un proyecto existente.** Apunta CraftBot a un código fuente o repositorio + de GitHub de Go, Node.js, Python, Rust o estático. Detecta el runtime, configura los + chequeos de salud y lo envuelve como una Living UI. + +
+ Importing an existing project as a Living UI +
+ +### Sigue evolucionando con CraftBot dentro del bucle + +Una Living UI nunca está "terminada". Pide al agente que añada funciones, rediseñe +una vista o la conecte con nuevos datos a medida que crecen tus necesidades. + +CraftBot está integrado en cada Living UI y es **consciente de su estado**: +puede leer el DOM actual y los valores de los formularios, consultar los datos +de la app mediante la API REST, y disparar acciones en tu nombre. + +--- + +## 🧩 Visión general de la arquitectura + +| Componente | Descripción | +|-----------|-------------| +| **Agent Base** | Capa de orquestación central que gestiona el ciclo de vida de las tareas, coordina los componentes y maneja el bucle agente principal. | +| **LLM Interface** | Interfaz unificada que soporta múltiples proveedores LLM (OpenAI, Gemini, Anthropic, BytePlus, Ollama). | +| **Context Engine** | Genera prompts optimizados con soporte de KV-cache. | +| **Action Manager** | Recupera y ejecuta acciones desde la biblioteca. Las acciones personalizadas son fáciles de extender. | +| **Action Router** | Selecciona inteligentemente la acción que mejor se ajusta a los requisitos de la tarea y resuelve los parámetros de entrada mediante el LLM cuando es necesario. | +| **Event Stream** | Sistema de publicación de eventos en tiempo real para seguimiento del progreso de tareas, actualizaciones de UI y monitoreo de ejecución. | +| **Memory Manager** | Memoria semántica basada en RAG con ChromaDB. Gestiona fragmentación de memoria, embeddings, recuperación y actualizaciones incrementales. | +| **State Manager** | Gestión global del estado para rastrear el contexto de ejecución del agente, el historial de conversación y la configuración en tiempo de ejecución. | +| **Task Manager** | Administra definiciones de tareas, habilita modos de tareas simples y complejas, crea todos y hace seguimiento a flujos de trabajo multietapa. | +| **Skill Manager** | Carga e inyecta skills intercambiables en el contexto del agente. | +| **MCP Adapter** | Integración con Model Context Protocol que convierte herramientas MCP en acciones nativas. | +| **TUI Interface** | Interfaz de usuario de terminal construida con el framework Textual para operación interactiva por línea de comandos. | + +--- + +## 🔜 Hoja de ruta + +- [X] **Módulo de memoria** — Listo. +- [ ] **Integración con herramientas externas** — ¡Seguimos añadiendo más! +- [X] **Capa MCP** — Listo. +- [X] **Capa de Skills** — Listo. +- [ ] **Comportamiento proactivo** — En curso + +--- + +## 📋 Referencia de comandos + +### install.py + +| Flag | Descripción | +|------|-------------| +| `--conda` | Usa entorno conda (opcional) | + +### run.py + +| Flag | Descripción | +|------|-------------| +| (ninguno) | Ejecutar en modo **Browser** (recomendado, requiere Node.js) | +| `--tui` | Ejecutar en modo **Terminal UI** (no requiere dependencias) | +| `--cli` | Ejecutar en modo **CLI** (ligero) | + +### craftbot.py + +| Comando | Descripción | +|---------|-------------| +| `install` | Instala dependencias, registra el autoarranque e inicia CraftBot | +| `start` | Inicia CraftBot en segundo plano | +| `stop` | Detiene CraftBot | +| `restart` | Detener y luego iniciar | +| `status` | Muestra el estado de ejecución y del autoarranque | +| `logs [-n N]` | Muestra las últimas N líneas de log (por defecto: 50) | +| `uninstall` | Elimina el registro de autoarranque | + +**Ejemplos de instalación:** +```bash +# Instalación simple con pip (sin conda) +python install.py + +# Con entorno conda (recomendado para usuarios de conda) +python install.py --conda +``` + +**Ejecución de CraftBot:** + +```powershell +# Modo navegador (por defecto, requiere Node.js) +python run.py + +# Modo TUI (no requiere Node.js) +python run.py --tui + +# Modo CLI (ligero) +python run.py --cli + +# Con entorno conda +conda run -n craftbot python run.py + +# O usando la ruta completa si conda no está en PATH +&"$env:USERPROFILE\miniconda3\Scripts\conda.exe" run -n craftbot python run.py +``` + +**Linux/macOS (Bash):** +```bash +# Modo navegador (por defecto, requiere Node.js) +python run.py + +# Modo TUI (no requiere Node.js) +python run.py --tui + +# Modo CLI (ligero) +python run.py --cli + +# Con entorno conda +conda run -n craftbot python run.py +``` + +### 🔧 Servicio en segundo plano (recomendado) + +Ejecuta CraftBot como un servicio en segundo plano para que siga funcionando incluso después de cerrar la terminal. Se crea automáticamente un acceso directo en el escritorio para reabrir el navegador cuando quieras. + +```bash +# Instala dependencias, registra autoarranque al iniciar sesión e inicia CraftBot +python craftbot.py install +``` + +Eso es todo. La terminal se cierra sola, CraftBot se ejecuta en segundo plano y el navegador se abre automáticamente. + +```bash +# Otros comandos del servicio: +python craftbot.py start # Inicia CraftBot en segundo plano +python craftbot.py status # Comprueba si está en ejecución +python craftbot.py stop # Detiene CraftBot +python craftbot.py restart # Reinicia CraftBot +python craftbot.py logs # Ver el log reciente +``` + +| Comando | Descripción | +|---------|-------------| +| `python craftbot.py install` | Instala dependencias, registra autoarranque al iniciar sesión, inicia CraftBot, abre el navegador y cierra la terminal automáticamente | +| `python craftbot.py start` | Inicia CraftBot en segundo plano — se reinicia automáticamente si ya está en ejecución (la terminal se cierra sola) | +| `python craftbot.py stop` | Detiene CraftBot | +| `python craftbot.py restart` | Detiene e inicia CraftBot | +| `python craftbot.py status` | Comprueba si CraftBot está en ejecución y si el autoarranque está habilitado | +| `python craftbot.py logs` | Muestra la salida reciente del log (`-n 100` para más líneas) | +| `python craftbot.py uninstall` | Detiene CraftBot, elimina el registro de autoarranque, desinstala paquetes pip y purga la caché de pip | + +> [!TIP] +> Tras `craftbot.py start` o `craftbot.py install`, se crea automáticamente un **acceso directo de CraftBot en el escritorio**. Si cierras el navegador por error, haz doble clic en el acceso directo para reabrirlo. + +> [!NOTE] +> **Instalación:** El instalador ahora ofrece orientación clara si faltan dependencias. Si no se encuentra Node.js, se te pedirá instalarlo o podrás cambiar al modo TUI. La instalación detecta automáticamente la disponibilidad de GPU y recurre al modo solo CPU si es necesario. + +> [!TIP] +> **Configuración inicial:** CraftBot te guiará por una secuencia de onboarding para configurar claves API, el nombre del agente, MCPs y Skills. + +> [!NOTE] +> **Playwright Chromium:** Opcional para la integración con WhatsApp Web. Si falla la instalación, el agente seguirá funcionando para otras tareas. Puedes instalarlo manualmente más tarde con: `playwright install chromium` + +--- + +## 🔧 Solución de problemas y preguntas frecuentes + +### Falta Node.js (para el modo navegador) +Si ves **"npm not found in PATH"** al ejecutar `python run.py`: +1. Descárgalo desde [nodejs.org](https://nodejs.org/) (elige la versión LTS) +2. Instálalo y reinicia tu terminal +3. Ejecuta `python run.py` de nuevo + +**Alternativa:** Usa el modo TUI (no necesita Node.js): +```bash +python run.py --tui +``` + +### La instalación falla por dependencias +Ahora el instalador ofrece mensajes de error detallados con soluciones. Si la instalación falla: +- **Revisa la versión de Python:** asegúrate de tener Python 3.10+ (`python --version`) +- **Revisa tu conexión a Internet:** las dependencias se descargan durante la instalación +- **Limpia la caché de pip:** ejecuta `pip install --upgrade pip` e inténtalo de nuevo + +### Problemas de instalación de Playwright +La instalación de Playwright Chromium es opcional. Si falla: +- El agente **seguirá funcionando** para otras tareas +- Puedes omitirla o instalarla más tarde: `playwright install chromium` +- Solo es necesaria para la integración con WhatsApp Web + +Para una solución de problemas más detallada, consulta [INSTALLATION_FIX.md](INSTALLATION_FIX.md). + +--- + +## 🔌 Integración de servicios externos + +El agente puede conectarse a varios servicios usando OAuth. Las builds de release incluyen credenciales integradas, pero también puedes usar las tuyas. + +### Inicio rápido + +Para builds de release con credenciales integradas: +``` +/google login # Conectar Google Workspace +/zoom login # Conectar Zoom +/slack invite # Conectar Slack +/notion invite # Conectar Notion +/linkedin login # Conectar LinkedIn +``` + +### Detalles de los servicios + +| Servicio | Tipo de auth | Comando | ¿Requiere secreto? | +|---------|-----------|---------|------------------| +| Google | PKCE | `/google login` | No (PKCE) | +| Zoom | PKCE | `/zoom login` | No (PKCE) | +| Slack | OAuth 2.0 | `/slack invite` | Sí | +| Notion | OAuth 2.0 | `/notion invite` | Sí | +| LinkedIn | OAuth 2.0 | `/linkedin login` | Sí | + +### Uso de tus propias credenciales + +Si prefieres usar tus propias credenciales OAuth, añádelas a tu archivo `.env`: + +#### Google (PKCE — solo se necesita el Client ID) +```bash +GOOGLE_CLIENT_ID=your-client-id.apps.googleusercontent.com +``` +1. Ve a [Google Cloud Console](https://console.cloud.google.com/) +2. Habilita las APIs de Gmail, Calendar, Drive y People +3. Crea credenciales OAuth de tipo **Desktop app** +4. Copia el Client ID (el secreto no es necesario con PKCE) + +#### Zoom (PKCE — solo se necesita el Client ID) +```bash +ZOOM_CLIENT_ID=your-zoom-client-id +``` +1. Ve a [Zoom Marketplace](https://marketplace.zoom.us/) +2. Crea una app OAuth +3. Copia el Client ID + +#### Slack (requiere ambos) +```bash +SLACK_SHARED_CLIENT_ID=your-slack-client-id +SLACK_SHARED_CLIENT_SECRET=your-slack-client-secret +``` +1. Ve a [Slack API](https://api.slack.com/apps) +2. Crea una nueva app +3. Añade los scopes OAuth: `chat:write`, `channels:read`, `users:read`, etc. +4. Copia el Client ID y el Client Secret + +#### Notion (requiere ambos) +```bash +NOTION_SHARED_CLIENT_ID=your-notion-client-id +NOTION_SHARED_CLIENT_SECRET=your-notion-client-secret +``` +1. Ve a [Notion Developers](https://developers.notion.com/) +2. Crea una nueva integración (Public integration) +3. Copia el OAuth Client ID y el Secret + +#### LinkedIn (requiere ambos) +```bash +LINKEDIN_CLIENT_ID=your-linkedin-client-id +LINKEDIN_CLIENT_SECRET=your-linkedin-client-secret +``` +1. Ve a [LinkedIn Developers](https://developer.linkedin.com/) +2. Crea una app +3. Añade los scopes OAuth 2.0 +4. Copia el Client ID y el Client Secret + +--- +## 🐳 Ejecutar con contenedor + +La raíz del repositorio incluye una configuración Docker con Python 3.10, paquetes clave del sistema (incluido Tesseract para OCR) y todas las dependencias de Python definidas en `environment.yml`/`requirements.txt`, de modo que el agente pueda ejecutarse de forma consistente en entornos aislados. + +A continuación las instrucciones para ejecutar nuestro agente con contenedor. + +### Construir la imagen + +Desde la raíz del repositorio: + +```bash +docker build -t craftbot . +``` + +### Ejecutar el contenedor + +La imagen está configurada para lanzar el agente con `python -m app.main` por defecto. Para ejecutarlo de forma interactiva: + +```bash +docker run --rm -it craftbot +``` + +Si necesitas suministrar variables de entorno, pasa un archivo env (por ejemplo, basado en `.env.example`): + +```bash +docker run --rm -it --env-file .env craftbot +``` + +Monta cualquier directorio que deba persistir fuera del contenedor (como carpetas de datos o caché) usando `-v`, y ajusta los puertos u otras opciones según lo necesite tu despliegue. La imagen trae dependencias del sistema para OCR (`tesseract`) y clientes HTTP comunes, de modo que el agente pueda trabajar con archivos y APIs de red dentro del contenedor. + +Por defecto, la imagen usa Python 3.10 y empaqueta las dependencias de Python de `environment.yml`/`requirements.txt`, así que `python -m app.main` funciona de entrada. + +--- + +## 🤝 Cómo contribuir + +¡Las PRs son bienvenidas! Consulta [CONTRIBUTING.md](CONTRIBUTING.md) para el flujo de trabajo (fork → rama desde `dev` → PR). Todas las pull requests pasan automáticamente por CI de lint + smoke-test. Si tienes preguntas o quieres una conversación más rápida, únete a nuestro [Discord](https://discord.gg/ZN9YHc37HG) o escríbenos a thamyikfoong(at)craftos.net. + +## 🧾 Licencia + +Este proyecto está licenciado bajo la [Licencia MIT](LICENSE). Eres libre de usar, alojar y monetizar este proyecto (debes dar crédito a este proyecto en caso de distribución y monetización). + +--- + +## ⭐ Agradecimientos + +Desarrollado y mantenido por [CraftOS](https://craftos.net/) y los contribuyentes [@zfoong](https://github.com/zfoong) y [@ahmad-ajmal](https://github.com/ahmad-ajmal). +Si **CraftBot** te resulta útil, ¡pon una ⭐ al repositorio y compártelo con otras personas! + +--- + +## Star History + + + + + + Star History Chart + + diff --git a/README.fr.md b/README.fr.md new file mode 100644 index 00000000..19c8d56e --- /dev/null +++ b/README.fr.md @@ -0,0 +1,557 @@ + +
+ CraftBot Banner +
+
+ +
+ Windows + macOS + Linux + + + GitHub Repo stars + + + License + + + Discord + +
+
+ +[![SPONSORED BY E2B FOR STARTUPS](https://img.shields.io/badge/SPONSORED%20BY-E2B%20FOR%20STARTUPS-ff8800?style=for-the-badge)](https://e2b.dev/startups) + +CraftBot - Self-hosted proactive AI assistant that lives locally | Product Hunt +
+ +

+ English | 日本語 | 简体中文 | 繁體中文 | 한국어 | Español | Português | Deutsch +

+ +## 🚀 Aperçu +

+CraftBot est votre Assistant IA Personnel qui vit à l'intérieur de votre machine et travaille 24h/24 pour vous. +

+ +Il interprète les tâches de manière autonome, planifie les actions et les exécute pour atteindre vos objectifs. +Il apprend vos préférences et objectifs, et vous aide de façon proactive à planifier et lancer des tâches pour atteindre vos buts de vie. +Les MCP, les Skills et les intégrations d'applications externes sont pris en charge. + +CraftBot attend vos ordres. Configurez dès maintenant votre propre CraftBot. + +
+ CraftBot Overview +
+ +--- + +## ✨ Fonctionnalités + +- **Bring Your Own Key (BYOK)** — Système flexible de fournisseurs LLM prenant en charge OpenAI, Google Gemini, Anthropic Claude, BytePlus et les modèles locaux Ollama. Basculez facilement entre fournisseurs. +- **Système de mémoire** — Distille et consolide les événements de la journée à minuit. +- **Agent proactif** — Apprend vos préférences, habitudes et objectifs de vie. Puis planifie et lance des tâches (avec votre accord, bien sûr) pour vous aider à progresser. +- **Living UI** — Créez, importez ou faites évoluer des applications personnalisées qui vivent au sein de CraftBot. L'agent reste conscient de l'état de l'UI et peut lire, écrire et agir directement sur ses données. +- **Intégration d'outils externes** — Connectez-vous à Google Workspace, Slack, Notion, Zoom, LinkedIn, Discord et Telegram (d'autres à venir !) avec des identifiants intégrés et le support OAuth. +- **MCP** — Intégration du Model Context Protocol pour étendre les capacités de l'agent avec des outils et services externes. +- **Skills** — Framework de skills extensible avec des skills intégrées pour la planification de tâches, la recherche, la revue de code, les opérations git, etc. +- **Multiplateforme** — Prise en charge complète de Windows, macOS et Linux avec des variantes de code spécifiques à chaque plateforme et la conteneurisation Docker. + +> [!IMPORTANT] +> **Le mode GUI est déprécié.** CraftBot ne prend plus en charge le mode GUI (automatisation de bureau). Utilisez plutôt le mode Browser, TUI ou CLI. + +
+ CraftBot Banner + CraftBot Banner +
+ +--- + + +## 🧰 Pour commencer + +### Prérequis +- Python **3.10+** +- `git` (nécessaire pour cloner le dépôt) +- Une clé API pour le fournisseur LLM de votre choix (OpenAI, Gemini ou Anthropic) +- `Node.js` **18+** (optionnel — requis uniquement pour l'interface navigateur) +- `conda` (optionnel — s'il est introuvable, l'installateur propose d'installer Miniconda automatiquement) + +### Quelle option choisir ? + +> **Vous hésitez ? Optez pour l'Option 1.** Elle gère tout pour vous. + +| | Option 1 — Service | Option 2 — Conda | Option 3 — Manuel | +|---|---|---|---| +| **Pour qui** | La plupart des utilisateurs, débutants, tests | Utilisateurs Conda souhaitant des environnements isolés | Utilisateurs avancés, Python personnalisé, contrôle total | +| **Gère Python/l'environnement automatiquement ?** | ✅ Automatique | ✅ Automatique | ❌ Vous le gérez | +| **Tourne en arrière-plan ?** | ✅ Oui, en tant que service | ❌ Non | ❌ Non | +| **Comment démarrer** | `python craftbot.py install` | `python install.py --conda` | `python install.py` | + +--- + +### ⭐ Option 1 — Installation en service (Recommandée) + +**Choisissez cette option si :** vous voulez que CraftBot fonctionne directement — service en arrière-plan, démarrage automatique à la connexion, raccourci sur le bureau, aucune étape manuelle. + +`craftbot.py` gère tout : environnement Python, dépendances, gestion du processus en arrière-plan et enregistrement du démarrage automatique. + +```bash +# 1. Clonez le dépôt +git clone https://github.com/CraftOS-dev/CraftBot.git +cd CraftBot + +# 2. Installez, enregistrez le démarrage automatique et lancez CraftBot +python craftbot.py install +``` + +C'est tout. Le terminal se ferme tout seul, CraftBot tourne en arrière-plan et le navigateur s'ouvre automatiquement. Un **raccourci sur le bureau** est créé pour rouvrir le navigateur à tout moment. + +**Gestion du service après installation :** + +```bash +python craftbot.py start # Démarrer CraftBot en arrière-plan +python craftbot.py stop # Arrêter CraftBot +python craftbot.py restart # Redémarrer CraftBot +python craftbot.py status # Vérifier s'il tourne et si le démarrage automatique est activé +python craftbot.py logs # Voir les logs récents +python craftbot.py uninstall # Arrêter, supprimer le démarrage auto et désinstaller les paquets +``` + +> [!TIP] +> Après `install` ou `start`, un **raccourci CraftBot sur le bureau** est créé automatiquement. Si vous fermez le navigateur, double-cliquez sur le raccourci pour le rouvrir. + +--- + +### Option 2 — Installation Conda + +**Choisissez cette option si :** vous utilisez déjà conda et souhaitez CraftBot dans un environnement conda isolé. + +`install.py --conda` configure un environnement conda dédié `craftbot`. Si Miniconda n'est pas trouvé sur votre système, il sera installé automatiquement. + +```bash +# 1. Clonez le dépôt +git clone https://github.com/CraftOS-dev/CraftBot.git +cd CraftBot + +# 2. Installez dans un environnement conda +python install.py --conda + +# 3. Lancez CraftBot +conda run -n craftbot python run.py + +# Si conda n'est pas dans le PATH (Windows uniquement) : +&"$env:USERPROFILE\miniconda3\Scripts\conda.exe" run -n craftbot python run.py +``` + +> [!NOTE] +> Chaque fois que vous voulez lancer CraftBot, utilisez `conda run -n craftbot python run.py`. Il n'y a pas de service en arrière-plan — vous le démarrez et l'arrêtez vous-même. + +--- + +### Option 3 — Installation manuelle (pip) + +**Choisissez cette option si :** vous souhaitez un contrôle total sur votre environnement Python et préférez gérer CraftBot vous-même, sans service automatique ni processus en arrière-plan. + +`install.py` (sans options) effectue une installation pip standard dans l'environnement Python actif. Vous démarrez et arrêtez CraftBot manuellement avec `run.py`. + +```bash +# 1. Clonez le dépôt +git clone https://github.com/CraftOS-dev/CraftBot.git +cd CraftBot + +# 2. Installez les dépendances dans votre environnement Python actif +python install.py + +# 3. Lancez CraftBot +python run.py +``` + +La première exécution vous guidera dans la configuration de vos clés API et préférences. + +> [!NOTE] +> Si Node.js n'est pas installé, l'installateur fournira des instructions étape par étape. Vous pouvez aussi ignorer complètement le mode navigateur et utiliser le mode TUI — sans Node.js : `python run.py --tui` + +### Que pouvez-vous faire tout de suite ? +- Discuter avec l'agent naturellement +- Lui demander d'exécuter des tâches complexes en plusieurs étapes +- Taper `/help` pour voir les commandes disponibles +- Vous connecter à Google, Slack, Notion et plus + +### 🖥️ Modes d'interface + +
+ CraftOS Banner +
+ +CraftBot propose plusieurs modes d'UI. Choisissez selon vos préférences : + +| Mode | Commande | Prérequis | Idéal pour | +|------|---------|--------------|----------| +| **Browser** | `python run.py` | Node.js 18+ | Interface web moderne, la plus simple à utiliser | +| **TUI** | `python run.py --tui` | Aucun | UI en terminal, aucune dépendance requise | +| **CLI** | `python run.py --cli` | Aucun | Ligne de commande, léger | + +Le **mode navigateur** est le mode par défaut et recommandé. Si vous n'avez pas Node.js, l'installateur vous guidera pour l'installer, ou vous pouvez utiliser le **mode TUI**. + +--- + +## 🧬 Living UI + +**Living UI est un système/une application/un tableau de bord qui évolue avec vos besoins.** + +Besoin d'un tableau kanban avec un copilote IA intégré ? D'un CRM sur mesure taillé +exactement pour votre flux de travail ? D'un tableau de bord d'entreprise que CraftBot +peut lire et piloter pour vous ? Lancez-le comme une Living UI — elle tourne aux côtés +de CraftBot et grandit au rythme de vos besoins. + +
+ Living UI example +
+ +### Trois façons de créer une Living UI + +1. **Construire à partir de zéro.** Décrivez ce que vous voulez en langage naturel. + CraftBot met en place le modèle de données, l'API backend et l'interface React, + et itère avec vous à travers un processus de conception structuré. + +
+ Building a Living UI from scratch +
+ +2. **Installer depuis la marketplace.** Parcourez les Living UIs créées par la communauté sur [living-ui-marketplace](https://github.com/CraftOS-dev/living-ui-marketplace). + +
+ Living UI marketplace +
+ +3. **Importer un projet existant.** Pointez CraftBot vers un code source ou un dépôt + GitHub en Go, Node.js, Python, Rust ou statique. Il détecte le runtime, configure + les vérifications de santé et l'encapsule en Living UI. + +
+ Importing an existing project as a Living UI +
+ +### Continue d'évoluer avec CraftBot dans la boucle + +Une Living UI n'est jamais « terminée ». Demandez à l'agent d'ajouter des +fonctionnalités, de repenser une vue ou de la brancher à de nouvelles données +à mesure que vos besoins évoluent. + +CraftBot est intégré à chaque Living UI et **conscient de son état** : +il peut lire le DOM courant et les valeurs des formulaires, interroger les +données de l'app via l'API REST, et déclencher des actions en votre nom. + +--- + +## 🧩 Aperçu de l'architecture + +| Composant | Description | +|-----------|-------------| +| **Agent Base** | Couche d'orchestration centrale qui gère le cycle de vie des tâches, coordonne les composants et pilote la boucle agentique principale. | +| **LLM Interface** | Interface unifiée prenant en charge plusieurs fournisseurs LLM (OpenAI, Gemini, Anthropic, BytePlus, Ollama). | +| **Context Engine** | Génère des prompts optimisés avec support du cache KV. | +| **Action Manager** | Récupère et exécute les actions depuis la bibliothèque. Les actions personnalisées sont faciles à étendre. | +| **Action Router** | Sélectionne intelligemment l'action la plus adaptée aux exigences de la tâche et résout les paramètres d'entrée via le LLM au besoin. | +| **Event Stream** | Système de publication d'événements en temps réel pour le suivi de la progression des tâches, les mises à jour d'UI et le monitoring d'exécution. | +| **Memory Manager** | Mémoire sémantique basée sur le RAG via ChromaDB. Gère le découpage, l'embedding, la récupération et les mises à jour incrémentales. | +| **State Manager** | Gestion globale de l'état pour suivre le contexte d'exécution de l'agent, l'historique de conversation et la configuration d'exécution. | +| **Task Manager** | Gère les définitions de tâches, permet des modes simples et complexes, crée des to-dos et suit les workflows multi-étapes. | +| **Skill Manager** | Charge et injecte des skills enfichables dans le contexte de l'agent. | +| **MCP Adapter** | Intégration Model Context Protocol qui convertit les outils MCP en actions natives. | +| **TUI Interface** | Interface utilisateur en terminal construite avec le framework Textual pour une utilisation interactive en ligne de commande. | + +--- + +## 🔜 Roadmap + +- [X] **Module de mémoire** — Terminé. +- [ ] **Intégration d'outils externes** — En cours d'ajout ! +- [X] **Couche MCP** — Terminée. +- [X] **Couche Skills** — Terminée. +- [ ] **Comportement proactif** — En cours + +--- + +## 📋 Référence des commandes + +### install.py + +| Flag | Description | +|------|-------------| +| `--conda` | Utiliser un environnement conda (optionnel) | + +### run.py + +| Flag | Description | +|------|-------------| +| (aucun) | Lancer en mode **Browser** (recommandé, nécessite Node.js) | +| `--tui` | Lancer en mode **Terminal UI** (aucune dépendance) | +| `--cli` | Lancer en mode **CLI** (léger) | + +### craftbot.py + +| Commande | Description | +|---------|-------------| +| `install` | Installe les deps, enregistre le démarrage automatique et lance CraftBot | +| `start` | Démarre CraftBot en arrière-plan | +| `stop` | Arrête CraftBot | +| `restart` | Arrête puis redémarre | +| `status` | Affiche l'état d'exécution et celui du démarrage automatique | +| `logs [-n N]` | Affiche les N dernières lignes de log (par défaut : 50) | +| `uninstall` | Supprime l'enregistrement du démarrage automatique | + +**Exemples d'installation :** +```bash +# Installation simple via pip (sans conda) +python install.py + +# Avec environnement conda (recommandé pour les utilisateurs de conda) +python install.py --conda +``` + +**Exécuter CraftBot :** + +```powershell +# Mode Browser (par défaut, nécessite Node.js) +python run.py + +# Mode TUI (pas de Node.js nécessaire) +python run.py --tui + +# Mode CLI (léger) +python run.py --cli + +# Avec environnement conda +conda run -n craftbot python run.py + +# Ou en utilisant le chemin complet si conda n'est pas dans le PATH +&"$env:USERPROFILE\miniconda3\Scripts\conda.exe" run -n craftbot python run.py +``` + +**Linux/macOS (Bash) :** +```bash +# Mode Browser (par défaut, nécessite Node.js) +python run.py + +# Mode TUI (pas de Node.js nécessaire) +python run.py --tui + +# Mode CLI (léger) +python run.py --cli + +# Avec environnement conda +conda run -n craftbot python run.py +``` + +### 🔧 Service en arrière-plan (recommandé) + +Exécutez CraftBot en tant que service en arrière-plan pour qu'il continue de fonctionner même après la fermeture du terminal. Un raccourci de bureau est créé automatiquement pour rouvrir le navigateur à tout moment. + +```bash +# Installer les dépendances, enregistrer le démarrage automatique à la connexion et lancer CraftBot +python craftbot.py install +``` + +C'est tout. Le terminal se ferme tout seul, CraftBot tourne en arrière-plan et le navigateur s'ouvre automatiquement. + +```bash +# Autres commandes du service : +python craftbot.py start # Démarre CraftBot en arrière-plan +python craftbot.py status # Vérifie s'il tourne +python craftbot.py stop # Arrête CraftBot +python craftbot.py restart # Redémarre CraftBot +python craftbot.py logs # Affiche les logs récents +``` + +| Commande | Description | +|---------|-------------| +| `python craftbot.py install` | Installe les dépendances, enregistre le démarrage automatique à la connexion, lance CraftBot, ouvre le navigateur et ferme le terminal automatiquement | +| `python craftbot.py start` | Démarre CraftBot en arrière-plan — redémarre automatiquement s'il est déjà lancé (le terminal se ferme tout seul) | +| `python craftbot.py stop` | Arrête CraftBot | +| `python craftbot.py restart` | Arrête puis démarre CraftBot | +| `python craftbot.py status` | Vérifie si CraftBot tourne et si le démarrage automatique est activé | +| `python craftbot.py logs` | Affiche les logs récents (`-n 100` pour plus de lignes) | +| `python craftbot.py uninstall` | Arrête CraftBot, supprime le démarrage automatique, désinstalle les paquets pip et purge le cache pip | + +> [!TIP] +> Après `craftbot.py start` ou `craftbot.py install`, un **raccourci CraftBot sur le bureau** est créé automatiquement. Si vous fermez le navigateur par accident, double-cliquez sur le raccourci pour le rouvrir. + +> [!NOTE] +> **Installation :** L'installateur fournit maintenant des indications claires si des dépendances manquent. Si Node.js est introuvable, on vous proposera de l'installer ou de basculer en mode TUI. L'installation détecte automatiquement la disponibilité du GPU et bascule en mode CPU si nécessaire. + +> [!TIP] +> **Première configuration :** CraftBot vous guidera dans une séquence d'onboarding pour configurer les clés API, le nom de l'agent, les MCP et les Skills. + +> [!NOTE] +> **Playwright Chromium :** Optionnel pour l'intégration WhatsApp Web. Si l'installation échoue, l'agent fonctionnera toujours pour les autres tâches. Installez-le manuellement plus tard avec : `playwright install chromium` + +--- + +## 🔧 Dépannage et problèmes courants + +### Node.js manquant (pour le mode navigateur) +Si vous voyez **"npm not found in PATH"** en lançant `python run.py` : +1. Téléchargez depuis [nodejs.org](https://nodejs.org/) (choisissez la version LTS) +2. Installez et redémarrez votre terminal +3. Relancez `python run.py` + +**Alternative :** Utilisez le mode TUI (Node.js non requis) : +```bash +python run.py --tui +``` + +### L'installation échoue sur les dépendances +L'installateur fournit désormais des messages d'erreur détaillés avec des solutions. Si l'installation échoue : +- **Vérifiez la version de Python :** assurez-vous d'avoir Python 3.10+ (`python --version`) +- **Vérifiez votre connexion :** les dépendances sont téléchargées pendant l'installation +- **Videz le cache pip :** `pip install --upgrade pip` puis réessayez + +### Problèmes d'installation de Playwright +L'installation de Playwright Chromium est optionnelle. En cas d'échec : +- L'agent **continuera de fonctionner** pour les autres tâches +- Vous pouvez l'ignorer ou l'installer plus tard : `playwright install chromium` +- Nécessaire uniquement pour l'intégration WhatsApp Web + +Pour un dépannage détaillé, consultez [INSTALLATION_FIX.md](INSTALLATION_FIX.md). + +--- + +## 🔌 Intégration des services externes + +L'agent peut se connecter à divers services via OAuth. Les builds de release incluent des identifiants intégrés, mais vous pouvez aussi utiliser les vôtres. + +### Démarrage rapide + +Pour les builds de release avec identifiants intégrés : +``` +/google login # Connecter Google Workspace +/zoom login # Connecter Zoom +/slack invite # Connecter Slack +/notion invite # Connecter Notion +/linkedin login # Connecter LinkedIn +``` + +### Détails des services + +| Service | Type d'auth | Commande | Secret requis ? | +|---------|-----------|---------|------------------| +| Google | PKCE | `/google login` | Non (PKCE) | +| Zoom | PKCE | `/zoom login` | Non (PKCE) | +| Slack | OAuth 2.0 | `/slack invite` | Oui | +| Notion | OAuth 2.0 | `/notion invite` | Oui | +| LinkedIn | OAuth 2.0 | `/linkedin login` | Oui | + +### Utiliser vos propres identifiants + +Si vous préférez utiliser vos propres identifiants OAuth, ajoutez-les à votre fichier `.env` : + +#### Google (PKCE — uniquement le Client ID) +```bash +GOOGLE_CLIENT_ID=your-client-id.apps.googleusercontent.com +``` +1. Allez sur la [Google Cloud Console](https://console.cloud.google.com/) +2. Activez les API Gmail, Calendar, Drive et People +3. Créez des identifiants OAuth de type **Desktop app** +4. Copiez le Client ID (le secret n'est pas requis en PKCE) + +#### Zoom (PKCE — uniquement le Client ID) +```bash +ZOOM_CLIENT_ID=your-zoom-client-id +``` +1. Allez sur le [Zoom Marketplace](https://marketplace.zoom.us/) +2. Créez une application OAuth +3. Copiez le Client ID + +#### Slack (les deux requis) +```bash +SLACK_SHARED_CLIENT_ID=your-slack-client-id +SLACK_SHARED_CLIENT_SECRET=your-slack-client-secret +``` +1. Allez sur [Slack API](https://api.slack.com/apps) +2. Créez une nouvelle application +3. Ajoutez les scopes OAuth : `chat:write`, `channels:read`, `users:read`, etc. +4. Copiez le Client ID et le Client Secret + +#### Notion (les deux requis) +```bash +NOTION_SHARED_CLIENT_ID=your-notion-client-id +NOTION_SHARED_CLIENT_SECRET=your-notion-client-secret +``` +1. Allez sur [Notion Developers](https://developers.notion.com/) +2. Créez une nouvelle intégration (Public integration) +3. Copiez l'OAuth Client ID et le Secret + +#### LinkedIn (les deux requis) +```bash +LINKEDIN_CLIENT_ID=your-linkedin-client-id +LINKEDIN_CLIENT_SECRET=your-linkedin-client-secret +``` +1. Allez sur [LinkedIn Developers](https://developer.linkedin.com/) +2. Créez une application +3. Ajoutez les scopes OAuth 2.0 +4. Copiez le Client ID et le Client Secret + +--- +## 🐳 Exécuter avec un conteneur + +La racine du dépôt contient une configuration Docker avec Python 3.10, des paquets système clés (dont Tesseract pour l'OCR) et toutes les dépendances Python définies dans `environment.yml`/`requirements.txt`, pour que l'agent s'exécute de façon cohérente dans des environnements isolés. + +Ci-dessous les instructions pour exécuter notre agent en conteneur. + +### Construire l'image + +Depuis la racine du dépôt : + +```bash +docker build -t craftbot . +``` + +### Exécuter le conteneur + +L'image est configurée pour lancer l'agent avec `python -m app.main` par défaut. Pour l'exécuter en mode interactif : + +```bash +docker run --rm -it craftbot +``` + +Si vous devez fournir des variables d'environnement, passez un fichier env (par exemple basé sur `.env.example`) : + +```bash +docker run --rm -it --env-file .env craftbot +``` + +Montez tous les répertoires qui doivent persister en dehors du conteneur (comme les dossiers de données ou cache) via `-v`, et ajustez les ports ou autres flags selon votre déploiement. L'image embarque les dépendances système pour l'OCR (`tesseract`) et les clients HTTP courants, afin que l'agent puisse travailler avec les fichiers et les API réseau dans le conteneur. + +Par défaut, l'image utilise Python 3.10 et embarque les dépendances Python de `environment.yml`/`requirements.txt`, donc `python -m app.main` fonctionne immédiatement. + +--- + +## 🤝 Comment contribuer + +Les PR sont les bienvenues ! Voir [CONTRIBUTING.md](CONTRIBUTING.md) pour le workflow (fork → branche depuis `dev` → PR). Toutes les pull requests passent automatiquement par un CI lint + smoke-test. Pour toute question ou une discussion plus rapide, rejoignez-nous sur [Discord](https://discord.gg/ZN9YHc37HG) ou envoyez un email à thamyikfoong(at)craftos.net. + +## 🧾 Licence + +Ce projet est sous [licence MIT](LICENSE). Vous êtes libre d'utiliser, d'héberger et de monétiser ce projet (vous devez créditer ce projet en cas de distribution et de monétisation). + +--- + +## ⭐ Remerciements + +Développé et maintenu par [CraftOS](https://craftos.net/) et les contributeurs [@zfoong](https://github.com/zfoong) et [@ahmad-ajmal](https://github.com/ahmad-ajmal). +Si **CraftBot** vous est utile, mettez une ⭐ au dépôt et partagez-le avec d'autres ! + +--- + +## Star History + + + + + + Star History Chart + + diff --git a/README.ja.md b/README.ja.md index c5f3cd10..0f593e2a 100644 --- a/README.ja.md +++ b/README.ja.md @@ -6,13 +6,14 @@

- English version here | 中文版README + English | 简体中文 | 繁體中文 | 한국어 | Español | Português | Français | Deutsch

## 🚀 概要 @@ -49,13 +52,14 @@ CraftBotはあなたの命令を待っています。今すぐあなた専用の - **Bring Your Own Key (BYOK)** — OpenAI、Google Gemini、Anthropic Claude、BytePlus、ローカルOllamaモデルをサポートする柔軟なLLMプロバイダーシステム。プロバイダー間の切り替えが簡単です。 - **メモリシステム** — 一日を通して起きたイベントを深夜に整理・統合します。 - **プロアクティブエージェント** — あなたの好み、習慣、人生の目標を学習し、計画を立て、タスクを開始して(もちろん承認付きで)あなたの生活をより良くします。 +- **Living UI** — CraftBotの中で動作するカスタムアプリを構築、インポート、または進化させます。エージェントはUIの状態を常に把握し、そのデータを直接読み取り、書き込み、操作できます。 - **外部ツール統合** — 埋め込みクレデンシャルとOAuthサポートにより、Google Workspace、Slack、Notion、Zoom、LinkedIn、Discord、Telegramに接続(今後さらに追加予定!)。 - **MCP** — 外部ツールやサービスでエージェント機能を拡張するためのModel Context Protocol統合。 - **スキル** — タスク計画、リサーチ、コードレビュー、Git操作などの組み込みスキルを含む拡張可能なスキルフレームワーク。 -- **クロスプラットフォーム** — プラットフォーム固有のコードバリアントとDockerコンテナ化によるWindowsとLinuxの完全サポート。 +- **クロスプラットフォーム** — プラットフォーム固有のコードバリアントとDockerコンテナ化によるWindows、macOS、Linuxの完全サポート。 > [!IMPORTANT] -> **GUIモードに関する注意:** GUIモードはまだ実験段階です。エージェントがGUIモードに切り替える際に問題が発生する可能性があります。この機能の改善に積極的に取り組んでいます。 +> **GUIモードは非推奨になりました。** CraftBotはGUI(デスクトップ自動化)モードをサポートしなくなりました。代わりにBrowser、TUI、またはCLIモードをご利用ください。
CraftBot Banner @@ -74,23 +78,100 @@ CraftBotはあなたの命令を待っています。今すぐあなた専用の - `Node.js` **18+**(オプション - ブラウザインターフェースにのみ必要) - `conda`(オプション - 見つからない場合、インストーラーがMinicondaの自動インストールを提案します) -### クイックインストール +### どのセットアップを使えばいいですか? + +> **迷ったらオプション1を選んでください。** すべて自動で処理されます。 + +| | オプション1 — サービスインストール | オプション2 — Condaインストール | オプション3 — 手動インストール | +|---|---|---|---| +| **対象ユーザー** | ほとんどのユーザー、初心者、テスト目的 | 独立環境が必要なCondaユーザー | 上級ユーザー、Pythonのカスタマイズ、完全制御 | +| **Python/環境を自動管理?** | ✅ 自動 | ✅ 自動 | ❌ 自分で管理 | +| **バックグラウンドで動作?** | ✅ はい、サービスとして | ❌ いいえ | ❌ いいえ | +| **起動方法** | `python craftbot.py install` | `python install.py --conda` | `python install.py` | + +--- + +### ⭐ オプション1 — サービスインストール(推奨) + +**これを選ぶなら:** CraftBotをすぐに使いたい場合 — バックグラウンドサービス、ログイン時の自動起動、デスクトップショートカット、手動操作不要。 + +`craftbot.py` がすべて処理します:Python環境、依存関係、バックグラウンドプロセス管理、自動起動の登録。 + +```bash +# 1. リポジトリをクローン +git clone https://github.com/CraftOS-dev/CraftBot.git +cd CraftBot + +# 2. インストール、自動起動の登録、CraftBotの起動 +python craftbot.py install +``` + +以上です。ターミナルは自動で閉じ、CraftBotはバックグラウンドで動作し、ブラウザが自動で開きます。**デスクトップショートカット**が作成されるので、ブラウザをいつでも再度開けます。 + +**インストール後のサービス管理:** + +```bash +python craftbot.py start # CraftBotをバックグラウンドで起動 +python craftbot.py stop # CraftBotを停止 +python craftbot.py restart # CraftBotを再起動 +python craftbot.py status # 実行中か確認、自動起動が有効かも確認 +python craftbot.py logs # 最近のログを確認 +python craftbot.py uninstall # 停止、自動起動削除、パッケージをアンインストール +``` + +> [!TIP] +> `install` または `start` の後、**CraftBotデスクトップショートカット**が自動作成されます。ブラウザを誤って閉じた場合は、ショートカットをダブルクリックして再度開けます。 + +--- + +### オプション2 — Condaインストール + +**これを選ぶなら:** すでにcondaを使用していて、独立したconda環境でCraftBotを動かしたい場合。 + +`install.py --conda` は専用の `craftbot` conda環境を設定します。Minicondaが見つからない場合は自動的にインストールされます。 ```bash -# リポジトリをクローン -git clone https://github.com/zfoong/CraftBot.git +# 1. リポジトリをクローン +git clone https://github.com/CraftOS-dev/CraftBot.git cd CraftBot -# 依存関係をインストール +# 2. conda環境にインストール +python install.py --conda + +# 3. CraftBotを実行 +conda run -n craftbot python run.py + +# condaがPATHにない場合(Windowsのみ): +&"$env:USERPROFILE\miniconda3\Scripts\conda.exe" run -n craftbot python run.py +``` + +> [!NOTE] +> CraftBotを実行するたびに `conda run -n craftbot python run.py` を使用してください。バックグラウンドサービスはありません — 自分で起動と停止を行います。 + +--- + +### オプション3 — 手動インストール(pip) + +**これを選ぶなら:** Python環境を完全に自分で管理したい場合、自動サービスやバックグラウンドプロセスは不要な場合。 + +`install.py`(フラグなし)は現在アクティブなPython環境に標準pip installを実行します。`run.py` を使って手動でCraftBotを起動・停止します。 + +```bash +# 1. リポジトリをクローン +git clone https://github.com/CraftOS-dev/CraftBot.git +cd CraftBot + +# 2. アクティブなPython環境に依存関係をインストール python install.py -# エージェントを実行 +# 3. CraftBotを実行 python run.py ``` -以上です!初回実行時にAPIキーの設定がガイドされます。 +初回実行時にAPIキーと設定のセットアップがガイドされます。 -**注意:** Node.jsがインストールされていない場合、インストーラーがステップバイステップの手順をガイドします。ブラウザモードをスキップしてTUIを使用することもできます(以下のモードを参照)。 +> [!NOTE] +> Node.jsがインストールされていない場合、インストーラーがステップバイステップの手順を提供します。ブラウザモードを完全にスキップしてTUIモードを使用することもできます — Node.js不要:`python run.py --tui` ### インストール後にできること - エージェントと自然言語で会話 @@ -111,12 +192,58 @@ CraftBotは複数のUIモードをサポートしています。お好みに応 | **ブラウザ** | `python run.py` | Node.js 18+ | モダンなWebインターフェース、最も使いやすい | | **TUI** | `python run.py --tui` | なし | ターミナルUI、追加の依存関係なし | | **CLI** | `python run.py --cli` | なし | コマンドライン、軽量 | -| **GUI** | `python run.py --gui` | `install.py --gui` | 視覚的フィードバック付きのデスクトップ自動化 | **ブラウザモード**がデフォルトで推奨されます。Node.jsがない場合は、インストーラーがインストール手順を提供するか、代わりに**TUIモード**を使用できます。 --- +## 🧬 Living UI + +**Living UIは、あなたのニーズに合わせて進化するシステム/アプリ/ダッシュボードです。** + +AIコパイロットが組み込まれたカンバンボードが必要ですか?あなたのワークフローに合わせて形作られたカスタムCRMは? +CraftBotが読み取って操作できる会社のダッシュボードは? +Living UIとして立ち上げれば、CraftBotと並んで動作し、あなたのニーズの変化に合わせて成長します。 + +
+ Living UI example +
+ +### Living UIを作成する3つの方法 + +1. **ゼロから構築する。** 望むものを自然な言葉で説明してください。CraftBotが + データモデル、バックエンドAPI、React UIをスキャフォールドし、構造化された + 設計プロセスを通じてあなたと共に反復します。 + +
+ Building a Living UI from scratch +
+ +2. **マーケットプレイスからインストールする。** [living-ui-marketplace](https://github.com/CraftOS-dev/living-ui-marketplace)からコミュニティが作成したLiving UIを閲覧できます。 + +
+ Living UI marketplace +
+ +3. **既存のプロジェクトをインポートする。** Go、Node.js、Python、Rust、または静的なソースコード + やGitHubリポジトリをCraftBotに指定してください。ランタイムを検出し、ヘルスチェックを構成し、 + Living UIとしてラップします。 + +
+ Importing an existing project as a Living UI +
+ +### CraftBotをループの中に据えて進化し続ける + +Living UIは決して「完成」することはありません。あなたのニーズが成長するにつれて、 +機能の追加、ビューの再設計、新しいデータへの接続をエージェントに依頼できます。 + +CraftBotはすべてのLiving UIに埋め込まれ、**その状態を常に認識しています**。 +現在のDOMとフォームの値を読み取り、REST APIを介してアプリデータを照会し、 +あなたの代わりにアクションをトリガーできます。 + +--- + ## 🧩 アーキテクチャの概要 | コンポーネント | 説明 | @@ -133,7 +260,6 @@ CraftBotは複数のUIモードをサポートしています。お好みに応 | **スキルマネージャー** | エージェントコンテキストにプラグイン可能なスキルをロードして注入。 | | **MCPアダプター** | MCPツールをネイティブアクションに変換するModel Context Protocol統合。 | | **TUIインターフェース** | 対話的なコマンドライン操作のためにTextualフレームワークで構築されたターミナルユーザーインターフェース。 | -| **GUIモジュール** | Dockerコンテナ、UI要素検出用のOmniParser、Gradioクライアントを使用した実験的なGUI自動化。 | --- @@ -143,27 +269,7 @@ CraftBotは複数のUIモードをサポートしています。お好みに応 - [ ] **外部ツール統合** — さらに追加中! - [X] **MCPレイヤー** — 完了。 - [X] **スキルレイヤー** — 完了。 -- [X] **プロアクティブな動作** — 実装予定 - ---- - -## 🖥️ GUIモード(オプション) - -GUIモードは画面自動化を有効にします - エージェントがデスクトップ環境を見て操作できるようになります。これはオプションで、追加のセットアップが必要です。 - -```bash -# GUIサポートをインストール(pip使用、conda不要) -python install.py --gui - -# GUIサポートとcondaでインストール -python install.py --gui --conda - -# GUIモードで実行 -python run.py --gui -``` - -> [!NOTE] -> GUIモードは実験的機能であり、追加の依存関係が必要です(モデルウェイトで約4GB)。デスクトップ自動化が不要な場合は、これをスキップして代わりにBrowser/TUIモードを使用してください。追加の依存関係は必要ありません。 +- [ ] **プロアクティブな動作** — 実装予定 --- @@ -173,9 +279,7 @@ python run.py --gui | フラグ | 説明 | |------|-------------| -| `--gui` | GUIコンポーネント(OmniParser)をインストール | | `--conda` | conda環境を使用(オプション) | -| `--cpu-only` | CPU専用のPyTorchをインストール(--guiと併用) | ### run.py @@ -184,27 +288,14 @@ python run.py --gui | (なし) | **ブラウザ**モードで実行(推奨、Node.jsが必要) | | `--tui` | **ターミナルUI**モードで実行(追加の依存関係なし) | | `--cli` | **CLI**モードで実行(軽量) | -| `--gui` | GUI自動化モードを有効化(先に `install.py --gui` が必要) | **インストール例:** ```bash # シンプルなpipインストール(condaなし) python install.py -# GUIサポート付き(pip使用、condaなし) -python install.py --gui - -# CPU専用システムでのGUI(pip使用、condaなし) -python install.py --gui --cpu-only - # conda環境を使用(condaユーザー向け推奨) python install.py --conda - -# GUIサポートとconda -python install.py --gui --conda - -# condaを使用したCPU専用システムでのGUI -python install.py --gui --conda --cpu-only ``` **CraftBotの実行:** @@ -219,9 +310,6 @@ python run.py --tui # CLIモード(軽量) python run.py --cli -# GPU/GUIモード -python run.py --gui - # conda環境で conda run -n craftbot python run.py @@ -240,13 +328,43 @@ python run.py --tui # CLIモード(軽量) python run.py --cli -# GPU/GUIモード -python run.py --gui - # conda環境で conda run -n craftbot python run.py ``` +### 🔧 バックグラウンドサービス(推奨) + +ターミナルを閉じても CraftBot が動き続けるようにバックグラウンドサービスとして実行します。デスクトップショートカットが自動作成されるので、いつでもブラウザを再度開けます。 + +```bash +# 依存関係インストール、ログイン時自動起動の登録、CraftBot の起動 +python craftbot.py install +``` + +以上です。ターミナルは自動で閉じ、CraftBot はバックグラウンドで動作し、ブラウザが自動で開きます。 + +```bash +# その他のサービスコマンド: +python craftbot.py start # CraftBot をバックグラウンドで起動 +python craftbot.py status # 実行中かどうか確認 +python craftbot.py stop # CraftBot を停止 +python craftbot.py restart # CraftBot を再起動 +python craftbot.py logs # 最近のログ出力を確認 +``` + +| コマンド | 説明 | +|---------|-------------| +| `python craftbot.py install` | 依存関係インストール、ログイン時自動起動の登録、CraftBot 起動、ブラウザを開き、ターミナルを自動で閉じる | +| `python craftbot.py start` | CraftBot をバックグラウンドで起動(すでに実行中の場合は自動再起動、ターミナルは自動で閉じる) | +| `python craftbot.py stop` | CraftBot を停止 | +| `python craftbot.py restart` | CraftBot を停止して再起動 | +| `python craftbot.py status` | CraftBot が実行中か、自動起動が有効かを確認 | +| `python craftbot.py logs` | 最近のログ出力を表示(`-n 100` でより多く表示) | +| `python craftbot.py uninstall` | CraftBot を停止、自動起動の登録解除、pip パッケージのアンインストール、pip キャッシュの削除 | + +> [!TIP] +> `craftbot.py start` または `craftbot.py install` の後、**CraftBot デスクトップショートカット**が自動作成されます。ブラウザを誤って閉じた場合は、ショートカットをダブルクリックして再度開けます。 + > [!NOTE] > **インストール:** インストーラーは依存関係が不足している場合、明確なガイダンスを提供します。Node.jsが見つからない場合は、インストールを促すか、TUIモードに切り替えることができます。インストールはGPUの可用性を自動検出し、必要に応じてCPU専用モードにフォールバックします。 @@ -283,15 +401,12 @@ Playwright chromiumのインストールはオプションです。失敗した - スキップするか後でインストール: `playwright install chromium` - WhatsApp Web連携にのみ必要 -### GPU/CUDAの問題 -インストーラーはGPUの可用性を自動検出します: -- CUDAインストールが失敗した場合、自動的にCPUモードにフォールバックします -- 手動でCPUセットアップ: `python install.py --gui --cpu-only` - 詳細なトラブルシューティングについては、[INSTALLATION_FIX.md](INSTALLATION_FIX.md)を参照してください。 --- +## 🔌 外部サービス連携 + エージェントはOAuthを使用してさまざまなサービスに接続できます。リリースビルドには埋め込みクレデンシャルが付属していますが、独自のクレデンシャルを使用することもできます。 ### クイックスタート @@ -366,7 +481,7 @@ LINKEDIN_CLIENT_SECRET=your-linkedin-client-secret 4. クライアントIDとクライアントシークレットをコピー --- -## コンテナで実行 +## 🐳 コンテナで実行 リポジトリのルートには、Python 3.10、主要なシステムパッケージ(OCR用のTesseractを含む)、および`environment.yml`/`requirements.txt`で定義されたすべてのPython依存関係を含むDocker構成が含まれており、エージェントは隔離された環境で一貫して実行できます。 @@ -394,29 +509,7 @@ docker run --rm -it craftbot docker run --rm -it --env-file .env craftbot ``` -コンテナの外部で永続化する必要があるディレクトリ(データやキャッシュフォルダなど)は`-v`を使用してマウントし、デプロイに必要に応じてポートや追加のフラグを調整してください。コンテナには、OCR(`tesseract`)、画面自動化(`pyautogui`、`mss`、X11ユーティリティ、仮想フレームバッファ)、および一般的なHTTPクライアント用のシステム依存関係が含まれているため、エージェントはコンテナ内でファイル、ネットワークAPI、GUI自動化を扱うことができます。 - -### GUI/画面自動化の有効化 - -GUIアクション(マウス/キーボードイベント、スクリーンショット)にはX11サーバーが必要です。ホストディスプレイにアタッチするか、`xvfb`でヘッドレスで実行できます: - -* ホストディスプレイを使用(X11を使用するLinuxが必要): - - ```bash - docker run --rm -it - -e DISPLAY=$DISPLAY \ - -v /tmp/.X11-unix:/tmp/.X11-unix \ - -v $(pwd)/data:/app/app/data \ - craftbot - ``` - - エージェントが読み書きする必要があるフォルダには、追加の`-v`マウントを追加してください。 - -* 仮想ディスプレイでヘッドレス実行: - - ```bash - docker run --rm -it --env-file .env craftbot bash -lc "Xvfb :99 -screen 0 1920x1080x24 & export DISPLAY=:99 && exec python -m app.main" - ``` +コンテナの外部で永続化する必要があるディレクトリ(データやキャッシュフォルダなど)は`-v`を使用してマウントし、デプロイに必要に応じてポートや追加のフラグを調整してください。コンテナには、OCR(`tesseract`)や一般的なHTTPクライアント用のシステム依存関係が含まれているため、エージェントはコンテナ内でファイルやネットワークAPIを扱うことができます。 デフォルトでは、イメージはPython 3.10を使用し、`environment.yml`/`requirements.txt`からのPython依存関係をバンドルしているため、`python -m app.main`はそのまま動作します。 @@ -424,7 +517,7 @@ GUIアクション(マウス/キーボードイベント、スクリーンシ ## 🤝 貢献方法 -貢献と提案を歓迎します![@zfoong](https://github.com/zfoong) @ thamyikfoong(at)craftos.net までご連絡ください。現在、チェック機能を設定していないため、直接的な貢献は受け付けられませんが、提案やフィードバックは大変ありがたく思います。 +プルリクエストを歓迎します!ワークフロー(fork → `dev` ブランチから分岐 → PR)については [CONTRIBUTING.md](CONTRIBUTING.md) をご覧ください。すべてのプルリクエストは lint + スモークテスト CI で自動的に検証されます。質問や素早いやり取りをご希望の場合は、[Discord](https://discord.gg/ZN9YHc37HG) に参加するか、thamyikfoong(at)craftos.net までメールしてください。 ## 🧾 ライセンス @@ -436,3 +529,15 @@ GUIアクション(マウス/キーボードイベント、スクリーンシ [CraftOS](https://craftos.net/)および貢献者[@zfoong](https://github.com/zfoong)と[@ahmad-ajmal](https://github.com/ahmad-ajmal)によって開発・維持されています。 **CraftBot**が役に立つと思われた場合は、リポジトリに⭐をつけて、他の人と共有してくださると嬉しいです! + +--- + +## Star History + + + + + + Star History Chart + + diff --git a/README.ko.md b/README.ko.md new file mode 100644 index 00000000..aa884339 --- /dev/null +++ b/README.ko.md @@ -0,0 +1,555 @@ + +
+ CraftBot Banner +
+
+ +
+ Windows + macOS + Linux + + + GitHub Repo stars + + + License + + + Discord + +
+
+ +[![SPONSORED BY E2B FOR STARTUPS](https://img.shields.io/badge/SPONSORED%20BY-E2B%20FOR%20STARTUPS-ff8800?style=for-the-badge)](https://e2b.dev/startups) + +CraftBot - Self-hosted proactive AI assistant that lives locally | Product Hunt +
+ +

+ English | 日本語 | 简体中文 | 繁體中文 | Español | Português | Français | Deutsch +

+ +## 🚀 개요 +

+CraftBot은 당신의 기기 안에 상주하며 24시간 내내 당신을 위해 일하는 개인 AI 어시스턴트입니다. +

+ +CraftBot은 작업을 자율적으로 해석하고, 행동을 계획하며, 당신의 목표를 달성하기 위해 이를 실행합니다. +사용자의 선호도와 목표를 학습하여, 삶의 목표를 이루도록 작업을 계획하고 능동적으로 시작하는 것을 도와줍니다. +MCP, 스킬, 그리고 외부 앱 통합을 지원합니다. + +CraftBot이 당신의 명령을 기다리고 있습니다. 지금 나만의 CraftBot을 설정해 보세요. + +
+ CraftBot Overview +
+ +--- + +## ✨ 주요 기능 + +- **Bring Your Own Key (BYOK)** — OpenAI, Google Gemini, Anthropic Claude, BytePlus, 로컬 Ollama 모델을 지원하는 유연한 LLM 제공자 시스템. 제공자 간 손쉬운 전환이 가능합니다. +- **메모리 시스템** — 하루 동안 발생한 사건들을 자정에 정제하고 통합합니다. +- **능동형 에이전트(Proactive Agent)** — 사용자의 선호도, 습관, 인생 목표를 학습합니다. 그리고 (물론 승인을 받은 뒤) 계획을 수행하고 작업을 시작하여 삶을 개선하도록 도와줍니다. +- **Living UI** — CraftBot 안에서 동작하는 커스텀 앱을 만들고, 가져오고, 진화시킵니다. 에이전트는 UI의 상태를 항상 인식하며, 그 데이터를 직접 읽고, 쓰고, 조작할 수 있습니다. +- **외부 도구 통합** — Google Workspace, Slack, Notion, Zoom, LinkedIn, Discord, Telegram과 연결됩니다(계속 추가 예정!). 내장된 자격 증명 및 OAuth가 지원됩니다. +- **MCP** — 외부 도구 및 서비스로 에이전트 기능을 확장하기 위한 Model Context Protocol 통합. +- **스킬(Skills)** — 작업 계획, 리서치, 코드 리뷰, Git 작업 등 내장 스킬을 갖춘 확장형 스킬 프레임워크. +- **크로스 플랫폼** — 플랫폼별 코드 변형 및 Docker 컨테이너화를 통해 Windows, macOS, Linux를 완벽하게 지원합니다. + +> [!IMPORTANT] +> **GUI 모드는 더 이상 지원되지 않습니다.** CraftBot은 GUI(데스크톱 자동화) 모드를 더 이상 지원하지 않습니다. 대신 Browser, TUI 또는 CLI 모드를 사용하세요. + +
+ CraftBot Banner + CraftBot Banner +
+ +--- + + +## 🧰 시작하기 + +### 필수 요구 사항 +- Python **3.10+** +- `git` (리포지토리 클론 시 필요) +- 사용할 LLM 제공자의 API 키(OpenAI, Gemini 또는 Anthropic) +- `Node.js` **18+** (선택 사항 - 브라우저 인터페이스 사용 시에만 필요) +- `conda` (선택 사항 - 없을 경우 설치 프로그램이 Miniconda 자동 설치를 제안합니다) + +### 어떤 설치 방법을 선택해야 하나요? + +> **잘 모르겠다면 옵션 1을 선택하세요.** 모든 것을 자동으로 처리해 줍니다. + +| | 옵션 1 — 서비스 설치 | 옵션 2 — Conda 설치 | 옵션 3 — 수동 설치 | +|---|---|---|---| +| **대상** | 대부분의 사용자, 처음 사용자, 테스트 | 독립 환경을 원하는 Conda 사용자 | 고급 사용자, 커스텀 Python, 완전한 제어 | +| **Python/환경 자동 관리?** | ✅ 자동 | ✅ 자동 | ❌ 직접 관리 | +| **백그라운드 실행?** | ✅ 예, 서비스로 | ❌ 아니오 | ❌ 아니오 | +| **시작 방법** | `python craftbot.py install` | `python install.py --conda` | `python install.py` | + +--- + +### ⭐ 옵션 1 — 서비스 설치 (권장) + +**이것을 선택하세요:** CraftBot을 바로 사용하고 싶은 경우 — 백그라운드 서비스, 로그인 시 자동 시작, 바탕화면 바로가기, 수동 작업 없음. + +`craftbot.py`가 모든 것을 처리합니다: Python 환경, 의존성 설치, 백그라운드 프로세스 관리, 자동 시작 등록. + +```bash +# 1. 리포지토리 클론 +git clone https://github.com/CraftOS-dev/CraftBot.git +cd CraftBot + +# 2. 설치, 자동 시작 등록, CraftBot 실행 +python craftbot.py install +``` + +이게 전부입니다. 터미널은 자동으로 닫히고, CraftBot은 백그라운드에서 실행되며, 브라우저가 자동으로 열립니다. **바탕화면 바로가기**가 생성되어 언제든지 브라우저를 다시 열 수 있습니다. + +**설치 후 서비스 관리:** + +```bash +python craftbot.py start # CraftBot을 백그라운드에서 시작 +python craftbot.py stop # CraftBot 중지 +python craftbot.py restart # CraftBot 재시작 +python craftbot.py status # 실행 여부 확인, 자동 시작 활성화 여부 확인 +python craftbot.py logs # 최근 로그 확인 +python craftbot.py uninstall # 중지, 자동 시작 제거, 패키지 제거 +``` + +> [!TIP] +> `install` 또는 `start` 실행 후 **CraftBot 바탕화면 바로가기**가 자동으로 생성됩니다. 브라우저를 실수로 닫았다면 바로가기를 더블클릭해 다시 열 수 있습니다. + +--- + +### 옵션 2 — Conda 설치 + +**이것을 선택하세요:** 이미 conda를 사용 중이고 독립된 conda 환경에서 CraftBot을 실행하고 싶은 경우. + +`install.py --conda`는 전용 `craftbot` conda 환경을 설정합니다. 시스템에 Miniconda가 없으면 자동으로 설치됩니다. + +```bash +# 1. 리포지토리 클론 +git clone https://github.com/CraftOS-dev/CraftBot.git +cd CraftBot + +# 2. conda 환경에 설치 +python install.py --conda + +# 3. CraftBot 실행 +conda run -n craftbot python run.py + +# conda가 PATH에 없는 경우 (Windows 전용): +&"$env:USERPROFILE\miniconda3\Scripts\conda.exe" run -n craftbot python run.py +``` + +> [!NOTE] +> CraftBot을 실행할 때마다 `conda run -n craftbot python run.py`를 사용하세요. 백그라운드 서비스가 없으므로 직접 시작하고 중지해야 합니다. + +--- + +### 옵션 3 — 수동 설치 (pip) + +**이것을 선택하세요:** Python 환경을 완전히 직접 관리하고 싶고, 자동 서비스나 백그라운드 프로세스 없이 CraftBot을 관리하고 싶은 경우. + +`install.py`(플래그 없음)는 현재 활성화된 Python 환경에 표준 pip 설치를 수행합니다. `run.py`로 CraftBot을 수동으로 시작하고 중지합니다. + +```bash +# 1. 리포지토리 클론 +git clone https://github.com/CraftOS-dev/CraftBot.git +cd CraftBot + +# 2. 활성 Python 환경에 의존성 설치 +python install.py + +# 3. CraftBot 실행 +python run.py +``` + +첫 실행 시 API 키 설정 과정을 안내해 줍니다. + +> [!NOTE] +> Node.js가 설치되어 있지 않다면 설치 프로그램이 단계별로 안내해 줍니다. TUI 모드를 사용하면 브라우저 모드를 완전히 건너뛸 수도 있습니다 — Node.js 불필요: `python run.py --tui` + +### 바로 할 수 있는 일 +- 에이전트와 자연스럽게 대화 +- 복잡한 다단계 작업 요청 +- `/help`를 입력해 사용 가능한 명령 확인 +- Google, Slack, Notion 등과 연결 + +### 🖥️ 인터페이스 모드 + +
+ CraftOS Banner +
+ +CraftBot은 여러 UI 모드를 지원합니다. 선호에 따라 선택하세요. + +| 모드 | 명령어 | 요구 사항 | 적합한 용도 | +|------|---------|--------------|----------| +| **Browser** | `python run.py` | Node.js 18+ | 최신 웹 인터페이스, 가장 사용하기 쉬움 | +| **TUI** | `python run.py --tui` | 없음 | 터미널 UI, 별도 의존성 불필요 | +| **CLI** | `python run.py --cli` | 없음 | 커맨드라인, 경량 | + +**브라우저 모드**가 기본이자 권장 모드입니다. Node.js가 없는 경우 설치 프로그램이 설치 안내를 제공하거나, 대신 **TUI 모드**를 사용할 수 있습니다. + +--- + +## 🧬 Living UI + +**Living UI는 당신의 필요에 따라 진화하는 시스템/앱/대시보드입니다.** + +AI 코파일럿이 내장된 칸반 보드가 필요한가요? 당신의 워크플로에 딱 맞게 만든 맞춤형 CRM은요? +CraftBot이 읽고 조작할 수 있는 회사 대시보드는요? +Living UI로 실행하세요 — CraftBot과 함께 작동하며, 당신의 필요가 변할수록 함께 성장합니다. + +
+ Living UI example +
+ +### Living UI를 만드는 세 가지 방법 + +1. **처음부터 만들기.** 원하는 것을 일상 언어로 설명하세요. CraftBot이 + 데이터 모델, 백엔드 API, React UI를 구성하고, 구조화된 설계 과정을 + 통해 당신과 함께 반복 개선합니다. + +
+ Building a Living UI from scratch +
+ +2. **마켓플레이스에서 설치하기.** [living-ui-marketplace](https://github.com/CraftOS-dev/living-ui-marketplace)에서 커뮤니티가 만든 Living UI를 둘러보세요. + +
+ Living UI marketplace +
+ +3. **기존 프로젝트 가져오기.** CraftBot에게 Go, Node.js, Python, Rust 또는 정적 소스 코드나 + GitHub 저장소를 지정하세요. 런타임을 감지하고, 헬스 체크를 구성한 후, + Living UI로 감쌉니다. + +
+ Importing an existing project as a Living UI +
+ +### 루프 안의 CraftBot과 함께 계속 진화합니다 + +Living UI는 결코 "완성"되지 않습니다. 필요가 커질수록 에이전트에게 기능 추가, +뷰 재설계, 새로운 데이터 연결을 요청할 수 있습니다. + +CraftBot은 모든 Living UI에 내장되어 있으며, **그 상태를 항상 인식합니다**. +현재 DOM과 폼 값을 읽고, REST API를 통해 앱 데이터를 조회하며, +당신을 대신해 작업을 실행할 수 있습니다. + +--- + +## 🧩 아키텍처 개요 + +| 구성 요소 | 설명 | +|-----------|-------------| +| **Agent Base** | 작업 라이프사이클을 관리하고 구성 요소 간 조정을 담당하며 주요 에이전틱 루프를 처리하는 핵심 오케스트레이션 계층. | +| **LLM Interface** | 여러 LLM 제공자(OpenAI, Gemini, Anthropic, BytePlus, Ollama)를 지원하는 통합 인터페이스. | +| **Context Engine** | KV 캐시를 지원하는 최적화된 프롬프트를 생성합니다. | +| **Action Manager** | 라이브러리에서 액션을 가져와 실행합니다. 커스텀 액션을 쉽게 확장할 수 있습니다. | +| **Action Router** | 작업 요구 사항에 가장 잘 맞는 액션을 지능적으로 선택하고, 필요 시 LLM을 통해 입력 매개변수를 해결합니다. | +| **Event Stream** | 작업 진행 추적, UI 업데이트, 실행 모니터링을 위한 실시간 이벤트 게시 시스템. | +| **Memory Manager** | ChromaDB 기반의 RAG 시맨틱 메모리. 메모리 청킹, 임베딩, 검색, 점진적 업데이트를 처리합니다. | +| **State Manager** | 에이전트 실행 컨텍스트, 대화 이력, 런타임 구성을 추적하는 전역 상태 관리. | +| **Task Manager** | 작업 정의를 관리하며 단순/복잡 작업 모드, 할 일 생성, 다단계 워크플로우 추적을 가능하게 합니다. | +| **Skill Manager** | 플러그형 스킬을 로드하여 에이전트 컨텍스트에 주입합니다. | +| **MCP Adapter** | MCP 도구를 네이티브 액션으로 변환하는 Model Context Protocol 통합. | +| **TUI Interface** | 대화형 커맨드라인 조작을 위해 Textual 프레임워크로 구축된 터미널 사용자 인터페이스. | + +--- + +## 🔜 로드맵 + +- [X] **메모리 모듈** — 완료. +- [ ] **외부 도구 통합** — 계속 추가 중! +- [X] **MCP 레이어** — 완료. +- [X] **스킬 레이어** — 완료. +- [ ] **능동형 동작(Proactive Behaviour)** — 진행 중 + +--- + +## 📋 명령어 레퍼런스 + +### install.py + +| 플래그 | 설명 | +|------|-------------| +| `--conda` | conda 환경 사용 (선택 사항) | + +### run.py + +| 플래그 | 설명 | +|------|-------------| +| (없음) | **Browser** 모드로 실행 (권장, Node.js 필요) | +| `--tui` | **터미널 UI** 모드로 실행 (의존성 불필요) | +| `--cli` | **CLI** 모드로 실행 (경량) | + +### craftbot.py + +| 명령 | 설명 | +|---------|-------------| +| `install` | 의존성 설치, 자동 시작 등록, CraftBot 실행 | +| `start` | CraftBot을 백그라운드에서 실행 | +| `stop` | CraftBot 중지 | +| `restart` | 중지 후 다시 시작 | +| `status` | 실행 상태 및 자동 시작 상태 표시 | +| `logs [-n N]` | 마지막 N개의 로그 라인 표시 (기본값: 50) | +| `uninstall` | 자동 시작 등록 해제 | + +**설치 예시:** +```bash +# 간단한 pip 설치 (conda 미사용) +python install.py + +# conda 환경 사용 (conda 사용자에게 권장) +python install.py --conda +``` + +**CraftBot 실행:** + +```powershell +# Browser 모드 (기본, Node.js 필요) +python run.py + +# TUI 모드 (Node.js 불필요) +python run.py --tui + +# CLI 모드 (경량) +python run.py --cli + +# conda 환경에서 실행 +conda run -n craftbot python run.py + +# conda가 PATH에 없는 경우 전체 경로 사용 +&"$env:USERPROFILE\miniconda3\Scripts\conda.exe" run -n craftbot python run.py +``` + +**Linux/macOS (Bash):** +```bash +# Browser 모드 (기본, Node.js 필요) +python run.py + +# TUI 모드 (Node.js 불필요) +python run.py --tui + +# CLI 모드 (경량) +python run.py --cli + +# conda 환경에서 실행 +conda run -n craftbot python run.py +``` + +### 🔧 백그라운드 서비스 (권장) + +터미널을 닫아도 CraftBot이 계속 실행되도록 백그라운드 서비스로 실행합니다. 데스크톱 바로가기가 자동으로 생성되므로 언제든지 브라우저를 다시 열 수 있습니다. + +```bash +# 의존성 설치, 로그인 시 자동 시작 등록, CraftBot 실행 +python craftbot.py install +``` + +이게 전부입니다. 터미널은 자동으로 닫히고, CraftBot은 백그라운드에서 실행되며, 브라우저가 자동으로 열립니다. + +```bash +# 기타 서비스 명령: +python craftbot.py start # CraftBot을 백그라운드에서 시작 +python craftbot.py status # 실행 여부 확인 +python craftbot.py stop # CraftBot 중지 +python craftbot.py restart # CraftBot 재시작 +python craftbot.py logs # 최근 로그 출력 확인 +``` + +| 명령 | 설명 | +|---------|-------------| +| `python craftbot.py install` | 의존성 설치, 로그인 시 자동 시작 등록, CraftBot 실행, 브라우저 열기 후 터미널 자동 종료 | +| `python craftbot.py start` | CraftBot을 백그라운드에서 시작 — 이미 실행 중이면 자동 재시작 (터미널 자동 종료) | +| `python craftbot.py stop` | CraftBot 중지 | +| `python craftbot.py restart` | CraftBot 중지 후 재시작 | +| `python craftbot.py status` | CraftBot 실행 여부와 자동 시작 활성화 여부 확인 | +| `python craftbot.py logs` | 최근 로그 출력 표시 (`-n 100`으로 더 많은 줄 표시) | +| `python craftbot.py uninstall` | CraftBot 중지, 자동 시작 등록 해제, pip 패키지 제거 및 pip 캐시 정리 | + +> [!TIP] +> `craftbot.py start` 또는 `craftbot.py install` 실행 후 **CraftBot 데스크톱 바로가기**가 자동으로 생성됩니다. 브라우저를 실수로 닫았다면 바로가기를 더블클릭해 다시 열 수 있습니다. + +> [!NOTE] +> **설치:** 의존성이 누락된 경우 설치 프로그램이 명확한 안내를 제공합니다. Node.js가 없으면 설치 여부를 묻거나 TUI 모드로 전환할 수 있습니다. GPU 가용성을 자동으로 감지하고 필요한 경우 CPU 전용 모드로 대체합니다. + +> [!TIP] +> **첫 실행 설정:** CraftBot은 API 키, 에이전트 이름, MCP, 스킬 설정을 위한 온보딩 과정을 안내합니다. + +> [!NOTE] +> **Playwright Chromium:** WhatsApp Web 통합에 필요한 선택 사항입니다. 설치에 실패해도 다른 작업에서는 에이전트가 정상 작동합니다. 나중에 `playwright install chromium`으로 수동 설치할 수 있습니다. + +--- + +## 🔧 문제 해결 및 자주 발생하는 이슈 + +### Node.js 누락 (브라우저 모드용) +`python run.py` 실행 시 **"npm not found in PATH"** 오류가 보인다면: +1. [nodejs.org](https://nodejs.org/)에서 다운로드 (LTS 버전 권장) +2. 설치 후 터미널 재시작 +3. `python run.py`를 다시 실행 + +**대안:** TUI 모드를 사용하세요 (Node.js 불필요): +```bash +python run.py --tui +``` + +### 의존성 설치 실패 +설치 프로그램은 이제 해결 방법이 포함된 자세한 오류 메시지를 제공합니다. 설치가 실패한다면: +- **Python 버전 확인:** Python 3.10+인지 확인 (`python --version`) +- **인터넷 연결 확인:** 설치 중 의존성이 다운로드됩니다 +- **pip 캐시 초기화:** `pip install --upgrade pip` 후 다시 시도 + +### Playwright 설치 문제 +Playwright chromium 설치는 선택 사항입니다. 실패 시: +- 에이전트는 다른 작업에서 **정상 작동**합니다 +- 건너뛰거나 나중에 설치 가능: `playwright install chromium` +- WhatsApp Web 통합에만 필요합니다 + +자세한 문제 해결은 [INSTALLATION_FIX.md](INSTALLATION_FIX.md)를 참고하세요. + +--- + +## 🔌 외부 서비스 연동 + +에이전트는 OAuth를 사용해 다양한 서비스에 연결할 수 있습니다. 릴리스 빌드에는 자격 증명이 내장되어 있지만, 자신의 자격 증명을 사용할 수도 있습니다. + +### 빠른 시작 + +자격 증명이 내장된 릴리스 빌드의 경우: +``` +/google login # Google Workspace 연결 +/zoom login # Zoom 연결 +/slack invite # Slack 연결 +/notion invite # Notion 연결 +/linkedin login # LinkedIn 연결 +``` + +### 서비스 세부 정보 + +| 서비스 | 인증 유형 | 명령 | 시크릿 필요? | +|---------|-----------|---------|------------------| +| Google | PKCE | `/google login` | 불필요 (PKCE) | +| Zoom | PKCE | `/zoom login` | 불필요 (PKCE) | +| Slack | OAuth 2.0 | `/slack invite` | 필요 | +| Notion | OAuth 2.0 | `/notion invite` | 필요 | +| LinkedIn | OAuth 2.0 | `/linkedin login` | 필요 | + +### 자신의 자격 증명 사용하기 + +자체 OAuth 자격 증명을 사용하려면 `.env` 파일에 추가하세요. + +#### Google (PKCE - Client ID만 필요) +```bash +GOOGLE_CLIENT_ID=your-client-id.apps.googleusercontent.com +``` +1. [Google Cloud Console](https://console.cloud.google.com/) 접속 +2. Gmail, Calendar, Drive, People API 활성화 +3. **데스크톱 앱** 유형으로 OAuth 자격 증명 생성 +4. Client ID 복사 (PKCE에서는 시크릿 불필요) + +#### Zoom (PKCE - Client ID만 필요) +```bash +ZOOM_CLIENT_ID=your-zoom-client-id +``` +1. [Zoom Marketplace](https://marketplace.zoom.us/) 접속 +2. OAuth 앱 생성 +3. Client ID 복사 + +#### Slack (둘 다 필요) +```bash +SLACK_SHARED_CLIENT_ID=your-slack-client-id +SLACK_SHARED_CLIENT_SECRET=your-slack-client-secret +``` +1. [Slack API](https://api.slack.com/apps) 접속 +2. 새 앱 생성 +3. OAuth 스코프 추가: `chat:write`, `channels:read`, `users:read` 등 +4. Client ID와 Client Secret 복사 + +#### Notion (둘 다 필요) +```bash +NOTION_SHARED_CLIENT_ID=your-notion-client-id +NOTION_SHARED_CLIENT_SECRET=your-notion-client-secret +``` +1. [Notion Developers](https://developers.notion.com/) 접속 +2. 새 통합 생성 (Public integration) +3. OAuth Client ID와 Secret 복사 + +#### LinkedIn (둘 다 필요) +```bash +LINKEDIN_CLIENT_ID=your-linkedin-client-id +LINKEDIN_CLIENT_SECRET=your-linkedin-client-secret +``` +1. [LinkedIn Developers](https://developer.linkedin.com/) 접속 +2. 앱 생성 +3. OAuth 2.0 스코프 추가 +4. Client ID와 Client Secret 복사 + +--- +## 🐳 컨테이너로 실행하기 + +리포지토리 루트에는 Python 3.10, OCR을 위한 Tesseract를 포함한 주요 시스템 패키지, 그리고 `environment.yml`/`requirements.txt`에 정의된 모든 Python 의존성을 갖춘 Docker 구성이 포함되어 있습니다. 이를 통해 격리된 환경에서 에이전트를 일관되게 실행할 수 있습니다. + +아래는 컨테이너로 에이전트를 실행하는 설정 방법입니다. + +### 이미지 빌드 + +리포지토리 루트에서: + +```bash +docker build -t craftbot . +``` + +### 컨테이너 실행 + +이 이미지는 기본적으로 `python -m app.main`으로 에이전트를 실행하도록 구성되어 있습니다. 대화형으로 실행하려면: + +```bash +docker run --rm -it craftbot +``` + +환경 변수를 제공하려면 env 파일을 전달하세요 (예: `.env.example` 기반): + +```bash +docker run --rm -it --env-file .env craftbot +``` + +컨테이너 외부에 유지해야 하는 디렉터리(데이터, 캐시 폴더 등)는 `-v`를 사용해 마운트하고, 배포 환경에 맞게 포트나 추가 플래그를 조정하세요. 이미지에는 OCR(`tesseract`)과 일반 HTTP 클라이언트 등의 시스템 의존성이 포함되어 있어 컨테이너 내에서 파일과 네트워크 API를 처리할 수 있습니다. + +기본적으로 이미지는 Python 3.10을 사용하고 `environment.yml`/`requirements.txt`의 Python 의존성을 번들로 포함하므로 `python -m app.main`이 바로 동작합니다. + +--- + +## 🤝 기여 방법 + +PR을 환영합니다! 워크플로우(포크 → `dev`에서 브랜치 생성 → PR)는 [CONTRIBUTING.md](CONTRIBUTING.md)를 참고하세요. 모든 풀 리퀘스트는 린트 + 스모크 테스트 CI를 자동으로 거칩니다. 질문이 있거나 더 빠른 대화를 원하시면 [Discord](https://discord.gg/ZN9YHc37HG)에 참여하거나 thamyikfoong(at)craftos.net로 이메일을 보내주세요. + +## 🧾 라이선스 + +이 프로젝트는 [MIT 라이선스](LICENSE)로 배포됩니다. 이 프로젝트를 자유롭게 사용, 호스팅, 수익화할 수 있습니다(배포 및 수익화 시 이 프로젝트를 크레딧으로 명시해야 합니다). + +--- + +## ⭐ 감사의 말 + +[CraftOS](https://craftos.net/)와 기여자 [@zfoong](https://github.com/zfoong), [@ahmad-ajmal](https://github.com/ahmad-ajmal)이 개발 및 유지 관리하고 있습니다. +**CraftBot**이 유용하다고 느끼신다면 리포지토리에 ⭐를 눌러주시고 다른 분들에게도 공유해 주세요! + +--- + +## Star History + + + + + + Star History Chart + + diff --git a/README.md b/README.md index 38b74939..c8d886c8 100644 --- a/README.md +++ b/README.md @@ -6,13 +6,14 @@

- 日本語版はこちら | 中文版README + 日本語 | 简体中文 | 繁體中文 | 한국어 | Español | Português | Français | Deutsch

## 🚀 Overview @@ -49,13 +52,14 @@ CraftBot awaits your orders. Set up your own CraftBot now. - **Bring Your Own Key (BYOK)** — Flexible LLM provider system supporting OpenAI, Google Gemini, Anthropic Claude, BytePlus, and local Ollama models. Easily switch between providers. - **Memory System** — Distill and consolidate events that happened through the day at midnight. - **Proactive Agent** — Learn your preferences, habits, and life goals. Then, perform planning and initiate tasks (with approval, of course) to help you improve in life. +- **Living UI** — Build, import, or evolve custom apps that live inside CraftBot. The agent stays aware of the UI's state and can read, write, and act on its data directly. - **External Tools Integration** — Connect to Google Workspace, Slack, Notion, Zoom, LinkedIn, Discord, and Telegram (more to come!) with embedded credentials and OAuth support. - **MCP** — Model Context Protocol integration for extending agent capabilities with external tools and services. - **Skills** — Extensible skill framework with built-in skills for task planning, research, code review, git operations, and more. -- **Cross-Platform** — Full support for Windows and Linux with platform-specific code variants and Docker containerization. +- **Cross-Platform** — Full support for Windows, macOS, and Linux with platform-specific code variants and Docker containerization. > [!IMPORTANT] -> **Note for GUI mode:** The GUI mode is still in experimental phase. This means you may encounter issues when the agent switches to GUI mode. We are actively improving this feature. +> **GUI mode is deprecated.** CraftBot no longer supports GUI (desktop automation) mode. Please use Browser, TUI, or CLI mode instead.
CraftBot Banner @@ -71,26 +75,107 @@ CraftBot awaits your orders. Set up your own CraftBot now. - Python **3.10+** - `git` (required to clone the repository) - An API key for your chosen LLM provider (OpenAI, Gemini, or Anthropic) -- `Node.js` **18+** (optional - only required for browser interface) -- `conda` (optional - if not found, installer offers to auto-install Miniconda) +- `Node.js` **18+** (optional — only required for browser interface) +- `conda` (optional — only required for the conda setup path) + +--- + +### Which setup should I use? + +> **Not sure? Use Option 1.** It handles everything for you. + +| | Option 1 — Service | Option 2 — Conda | Option 3 — Manual | +|---|---|---|---| +| **Who it's for** | Most users, first-timers, testing | Conda users who want isolated envs | Power users, custom Python, full control | +| **Manages Python & env?** | ✅ Automatic | ✅ Automatic | ❌ You manage it | +| **Runs in background?** | ✅ Yes, as a service | ❌ No | ❌ No | +| **How to start** | `python craftbot.py install` | `python install.py --conda` | `python install.py` | + +--- + +### ⭐ Option 1 — Service Install (Recommended) + +**Use this if:** you want CraftBot to just work — background service, auto-start on login, desktop shortcut, no manual steps. + +`craftbot.py` handles everything: Python environment, dependencies, background process management, and auto-start registration. + +```bash +# 1. Clone the repository +git clone https://github.com/CraftOS-dev/CraftBot.git +cd CraftBot + +# 2. Install, register auto-start, and launch CraftBot +python craftbot.py install +``` + +That's it. The terminal closes itself, CraftBot runs in the background, and the browser opens automatically. A **desktop shortcut** is created so you can reopen the browser anytime. -### Quick Install +**Managing the service after install:** ```bash -# Clone the repository -git clone https://github.com/zfoong/CraftBot.git +python craftbot.py start # Start CraftBot in the background +python craftbot.py stop # Stop CraftBot +python craftbot.py restart # Restart CraftBot +python craftbot.py status # Check if it's running and if auto-start is enabled +python craftbot.py logs # See recent log output +python craftbot.py uninstall # Stop, remove auto-start, and uninstall packages +``` + +> [!TIP] +> After `install` or `start`, a **CraftBot desktop shortcut** is created automatically. If you close the browser, just double-click the shortcut to reopen it. + +--- + +### Option 2 — Conda Install + +**Use this if:** you already use conda and want CraftBot in an isolated conda environment. + +`install.py --conda` sets up a dedicated `craftbot` conda environment. If Miniconda is not found on your system, it will be installed automatically. + +```bash +# 1. Clone the repository +git clone https://github.com/CraftOS-dev/CraftBot.git cd CraftBot -# Install dependencies +# 2. Install into a conda environment +python install.py --conda + +# 3. Run CraftBot +conda run -n craftbot python run.py + +# If conda is not in PATH (Windows only): +&"$env:USERPROFILE\miniconda3\Scripts\conda.exe" run -n craftbot python run.py +``` + +> [!NOTE] +> Each time you want to run CraftBot, use `conda run -n craftbot python run.py`. There is no background service — you start and stop it yourself. + +--- + +### Option 3 — Manual Install (pip) + +**Use this if:** you want full control over your Python environment and prefer managing CraftBot yourself with no automatic service or background process. + +`install.py` (no flags) does a standard pip install into whichever Python environment is currently active. You start and stop CraftBot manually using `run.py`. + +```bash +# 1. Clone the repository +git clone https://github.com/CraftOS-dev/CraftBot.git +cd CraftBot + +# 2. Install dependencies into your active Python environment python install.py -# Run the agent +# 3. Run CraftBot python run.py ``` -That's it! The first run will guide you through setting up your API keys. +The first run will guide you through setting up your API keys and preferences. -**Note:** If you don't have Node.js installed, the installer will guide you with step-by-step instructions. You can also skip browser mode and use TUI instead (see modes below). +> [!NOTE] +> If Node.js is not installed, the installer will provide step-by-step instructions. You can also skip browser mode entirely and use TUI mode — no Node.js required: `python run.py --tui` + +--- ### What you can do right after? - Talk to the agent naturally @@ -111,12 +196,57 @@ CraftBot supports multiple UI modes. Choose based on your preference: | **Browser** | `python run.py` | Node.js 18+ | Modern web interface, easiest to use | | **TUI** | `python run.py --tui` | None | Terminal UI, no dependencies needed | | **CLI** | `python run.py --cli` | None | Command-line, lightweight | -| **GUI** | `python run.py --gui` | `install.py --gui` | Desktop automation with visual feedback | **Browser mode** is the default and recommended. If you don't have Node.js, the installer will provide installation instructions or you can use **TUI mode** instead. --- +## 🧬 Living UI + +**Living UI is a system/app/dashboard that evolve with your needs.** + +Need a kanban board with an AI co-pilot built in? A custom CRM shaped exactly +like your workflow? A company dashboard that CraftBot can read and drive on +your behalf? Spin it up as a Living UI that runs alongside CraftBot and grows as your needs change. + +
+ Living UI example +
+ +### Three ways to create a Living UI + +1. **Build from scratch.** Describe what you want in plain language. CraftBot + scaffolds the data model, backend API, and React UI, then iterates with + you through a structured design process. + +
+ Building a Living UI from scratch +
+ +2. **Install from the marketplace.** Browse community-built Living UIs from [living-ui-marketplace](https://github.com/CraftOS-dev/living-ui-marketplace). + +
+ Living UI marketplace +
+ +3. **Import an existing project.** Point CraftBot at a Go, Node.js, Python, + Rust, or static source code or github repo. It detects the runtime, configures health checks, and wraps it as a Living UI. + +
+ Importing an existing project as a Living UI +
+ +### Keeps evolving with CraftBot inside the loop + +A Living UI is never "finished." Ask the agent to add features, redesign +a view, or hook it into new data as your needs grow. + +CraftBot is embedded in every Living UI and **context-aware of its state**: +it can read the current DOM and form values, query app data through the +REST API, and trigger actions on your behalf. + +--- + ## 🧩 Architecture Overview | Component | Description | @@ -133,7 +263,6 @@ CraftBot supports multiple UI modes. Choose based on your preference: | **Skill Manager** | Loads and injects pluggable skills into the agent context. | | **MCP Adapter** | Model Context Protocol integration that converts MCP tools into native actions. | | **TUI Interface** | Terminal user interface built with Textual framework for interactive command-line operation. | -| **GUI Module** | Experimental GUI automation using Docker containers, OmniParser for UI element detection, and Gradio client. | --- @@ -143,84 +272,54 @@ CraftBot supports multiple UI modes. Choose based on your preference: - [ ] **External Tool integration** — Still adding more! - [X] **MCP Layer** — Done. - [X] **Skill Layer** — Done. -- [X] **Proactive Behaviour** — Pending +- [ ] **Proactive Behaviour** — Pending --- -## 🖥️ GUI Mode (Optional) - -GUI mode enables screen automation - the agent can see and interact with a desktop environment. This is optional and requires additional setup. - -```bash -# Install with GUI support (using pip, no conda required) -python install.py --gui - -# Install with GUI support and conda -python install.py --gui --conda +## 📋 Command Reference -# Run with GUI mode -python run.py --gui -``` +### craftbot.py — Automatic Setup (Recommended) -> [!NOTE] -> GUI mode is experimental and requires additional dependencies (~4GB for model weights). If you don't need desktop automation, skip this and use Browser/TUI mode instead which has no additional dependencies. +| Command | Description | +|---------|-------------| +| `python craftbot.py install` | Install dependencies, register auto-start on login, start CraftBot, open browser, and close the terminal automatically | +| `python craftbot.py start` | Start CraftBot in the background — auto-restarts if already running (terminal closes automatically) | +| `python craftbot.py stop` | Stop CraftBot | +| `python craftbot.py restart` | Stop and start CraftBot | +| `python craftbot.py status` | Check if CraftBot is running and if auto-start is enabled | +| `python craftbot.py logs` | Show recent log output (`-n 100` for more lines) | +| `python craftbot.py uninstall` | Stop CraftBot, remove auto-start registration, uninstall pip packages, and purge pip cache | --- -## 📋 Command Reference - -### install.py +### install.py — Manual Setup | Flag | Description | |------|-------------| -| `--gui` | Install GUI components (OmniParser) | -| `--conda` | Use conda environment (optional) | -| `--cpu-only` | Install CPU-only PyTorch (with --gui) | - -### run.py - -| Flag | Description | -|------|-------------| -| (none) | Run in **Browser** mode (recommended, requires Node.js) | -| `--tui` | Run in **Terminal UI** mode (no dependencies needed) | -| `--cli` | Run in **CLI** mode (lightweight) | -| `--gui` | Enable GUI automation mode (requires `install.py --gui` first) | - -### service.py +| (none) | Standard pip install — uses your active Python environment | +| `--conda` | Install into a conda environment (auto-installs Miniconda if not found) | -| Command | Description | -|---------|-------------| -| `install` | Install deps, register auto-start, and start CraftBot | -| `start` | Start CraftBot in the background | -| `stop` | Stop CraftBot | -| `restart` | Stop then start | -| `status` | Show running status and auto-start state | -| `logs [-n N]` | Show last N log lines (default: 50) | -| `uninstall` | Remove auto-start registration | - -**Installation Examples:** ```bash -# Simple pip installation (no conda) +# Standard pip install python install.py -# With GUI support (using pip, no conda) -python install.py --gui - -# With GUI on CPU-only systems (using pip, no conda) -python install.py --gui --cpu-only - -# With conda environment (recommended for conda users) +# With conda environment python install.py --conda +``` + +--- -# With GUI support and conda -python install.py --gui --conda +### run.py — Running CraftBot (Manual Setup Only) -# With GUI on CPU-only systems with conda -python install.py --gui --conda --cpu-only -``` +> If you used `craftbot.py install`, CraftBot starts automatically. Use `run.py` only when running manually. -**Running CraftBot:** +| Flag | Description | +|------|-------------| +| (none) | Run in **Browser** mode (recommended, requires Node.js) | +| `--tui` | Run in **Terminal UI** mode (no dependencies needed) | +| `--cli` | Run in **CLI** mode (lightweight) | +**Windows (PowerShell):** ```powershell # Browser mode (default, requires Node.js) python run.py @@ -231,9 +330,6 @@ python run.py --tui # CLI mode (lightweight) python run.py --cli -# With GPU/GUI mode -python run.py --gui - # With conda environment conda run -n craftbot python run.py @@ -243,55 +339,14 @@ conda run -n craftbot python run.py **Linux/macOS (Bash):** ```bash -# Browser mode (default, requires Node.js) -python run.py - -# TUI mode (no Node.js required) -python run.py --tui - -# CLI mode (lightweight) -python run.py --cli - -# With GPU/GUI mode -python run.py --gui +python run.py # Browser mode +python run.py --tui # TUI mode +python run.py --cli # CLI mode # With conda environment conda run -n craftbot python run.py ``` -### 🔧 Background Service (Recommended) - -Run CraftBot as a background service so it stays running even after you close the terminal. A desktop shortcut is created automatically so you can reopen the browser anytime. - -```bash -# Install dependencies, register auto-start on login, and start CraftBot -python service.py install -``` - -That's it. The terminal closes itself, CraftBot runs in the background, and the browser opens automatically. - -```bash -# Other service commands: -python service.py start # Start CraftBot in background -python service.py status # Check if it's running -python service.py stop # Stop CraftBot -python service.py restart # Restart CraftBot -python service.py logs # See recent log output -``` - -| Command | Description | -|---------|-------------| -| `python service.py install` | Install dependencies, register auto-start on login, start CraftBot, open browser, and close the terminal automatically | -| `python service.py start` | Start CraftBot in the background — auto-restarts if already running (terminal closes automatically) | -| `python service.py stop` | Stop CraftBot | -| `python service.py restart` | Stop and start CraftBot | -| `python service.py status` | Check if CraftBot is running and if auto-start is enabled | -| `python service.py logs` | Show recent log output (`-n 100` for more lines) | -| `python service.py uninstall` | Stop CraftBot, remove auto-start registration, uninstall pip packages, and purge pip cache | - -> [!TIP] -> After `service.py start` or `service.py install`, a **CraftBot desktop shortcut** is created automatically. If you accidentally close the browser, just double-click the shortcut to reopen it. - > [!NOTE] > **Installation:** The installer now provides clear guidance if dependencies are missing. If Node.js is not found, you'll be prompted to install it or can switch to TUI mode. Installation automatically detects GPU availability and falls back to CPU-only mode if needed. @@ -303,7 +358,7 @@ python service.py logs # See recent log output --- -## � Troubleshooting & Common Issues +## 🔧 Troubleshooting & Common Issues ### Missing Node.js (for Browser Mode) If you see **"npm not found in PATH"** when running `python run.py`: @@ -328,15 +383,12 @@ Playwright chromium installation is optional. If it fails: - You can skip it or install later: `playwright install chromium` - Only needed for WhatsApp Web integration -### GPU/CUDA Issues -The installer automatically detects GPU availability: -- If CUDA installation fails, it falls back to CPU mode automatically -- For manual CPU setup: `python install.py --gui --cpu-only` - For detailed troubleshooting, see [INSTALLATION_FIX.md](INSTALLATION_FIX.md). --- +## 🔌 External Service Integration + The agent can connect to various services using OAuth. Release builds come with embedded credentials, but you can also use your own. ### Quick Start @@ -411,7 +463,7 @@ LINKEDIN_CLIENT_SECRET=your-linkedin-client-secret 4. Copy Client ID and Client Secret --- -## Run with container +## 🐳 Run with Container The repository root included a Docker configuration with Python 3.10, key system packages (including Tesseract for OCR), and all Python dependencies defined in `environment.yml`/`requirements.txt` so the agent can run consistently in isolated environments. @@ -439,29 +491,7 @@ If you need to supply environment variables, pass an env file (for example, base docker run --rm -it --env-file .env craftbot ``` -Mount any directories that should persist outside the container (such as data or cache folders) using `-v`, and adjust ports or additional flags as needed for your deployment. The container ships with system dependencies for OCR (`tesseract`), screen automation (`pyautogui`, `mss`, X11 utilities, and a virtual framebuffer), and common HTTP clients so the agent can work with files, network APIs, and GUI automation inside the container. - -### Enabling GUI/screen automation - -GUI actions (mouse/keyboard events, screenshots) require an X11 server. You can either attach to your host display or run headless with `xvfb`: - -* Use the host display (requires Linux with X11): - - ```bash - docker run --rm -it - -e DISPLAY=$DISPLAY \ - -v /tmp/.X11-unix:/tmp/.X11-unix \ - -v $(pwd)/data:/app/app/data \ - craftbot - ``` - - Add extra `-v` mounts for any folders the agent should read/write. - -* Run headlessly with a virtual display: - - ```bash - docker run --rm -it --env-file .env craftbot bash -lc "Xvfb :99 -screen 0 1920x1080x24 & export DISPLAY=:99 && exec python -m app.main" - ``` +Mount any directories that should persist outside the container (such as data or cache folders) using `-v`, and adjust ports or additional flags as needed for your deployment. The container ships with system dependencies for OCR (`tesseract`) and common HTTP clients so the agent can work with files and network APIs inside the container. By default the image uses Python 3.10 and bundles the Python dependencies from `environment.yml`/`requirements.txt`, so `python -m app.main` works out of the box. @@ -469,7 +499,7 @@ By default the image uses Python 3.10 and bundles the Python dependencies from ` ## 🤝 How to Contribute -Contributions and suggestions are welcome! You can contact [@zfoong](https://github.com/zfoong) @ thamyikfoong(at)craftos.net. We currently don't have checks set up, so we can't allow direct contributions but we appreciate any suggestions and feedback. +PRs are welcome! See [CONTRIBUTING.md](CONTRIBUTING.md) for the workflow (fork → branch from `dev` → PR). All pull requests run through lint + smoke-test CI automatically. For questions or a faster conversation, join us on [Discord](https://discord.gg/ZN9YHc37HG) or email thamyikfoong(at)craftos.net. ## 🧾 License @@ -481,3 +511,15 @@ This project is licensed under the [MIT License](LICENSE). You are free to use, Developed and maintained by [CraftOS](https://craftos.net/) and contributors [@zfoong](https://github.com/zfoong) and [@ahmad-ajmal](https://github.com/ahmad-ajmal). If you find **CraftBot** useful, please ⭐ the repository and share it with others! + +--- + +## Star History + + + + + + Star History Chart + + diff --git a/README.pt-BR.md b/README.pt-BR.md new file mode 100644 index 00000000..877c5ef8 --- /dev/null +++ b/README.pt-BR.md @@ -0,0 +1,557 @@ + +
+ CraftBot Banner +
+
+ +
+ Windows + macOS + Linux + + + GitHub Repo stars + + + License + + + Discord + +
+
+ +[![SPONSORED BY E2B FOR STARTUPS](https://img.shields.io/badge/SPONSORED%20BY-E2B%20FOR%20STARTUPS-ff8800?style=for-the-badge)](https://e2b.dev/startups) + +CraftBot - Self-hosted proactive AI assistant that lives locally | Product Hunt +
+ +

+ English | 日本語 | 简体中文 | 繁體中文 | 한국어 | Español | Français | Deutsch +

+ +## 🚀 Visão geral +

+O CraftBot é o seu Assistente de IA Pessoal, que vive dentro da sua máquina e trabalha 24/7 para você. +

+ +Ele interpreta tarefas de forma autônoma, planeja ações e as executa para alcançar seus objetivos. +Aprende suas preferências e metas, ajudando-o proativamente a planejar e iniciar tarefas para atingir seus objetivos de vida. +Suporta MCPs, Skills e integrações com aplicativos externos. + +O CraftBot aguarda suas ordens. Configure o seu agora mesmo. + +
+ CraftBot Overview +
+ +--- + +## ✨ Recursos + +- **Bring Your Own Key (BYOK)** — Sistema flexível de provedores de LLM com suporte a OpenAI, Google Gemini, Anthropic Claude, BytePlus e modelos locais do Ollama. Troque de provedor com facilidade. +- **Sistema de Memória** — Destila e consolida os eventos ocorridos durante o dia à meia-noite. +- **Agente Proativo** — Aprende suas preferências, hábitos e metas de vida. Depois, planeja e inicia tarefas (com sua aprovação, claro) para ajudá-lo a evoluir. +- **Living UI** — Construa, importe ou evolua aplicativos personalizados que vivem dentro do CraftBot. O agente permanece ciente do estado da UI e pode ler, escrever e agir sobre seus dados diretamente. +- **Integração com ferramentas externas** — Conecte-se a Google Workspace, Slack, Notion, Zoom, LinkedIn, Discord e Telegram (mais a caminho!) com credenciais embutidas e suporte a OAuth. +- **MCP** — Integração com o Model Context Protocol para ampliar as capacidades do agente com ferramentas e serviços externos. +- **Skills** — Framework de skills extensível com skills embutidas para planejamento de tarefas, pesquisa, revisão de código, operações de git e muito mais. +- **Multiplataforma** — Suporte completo para Windows, macOS e Linux, com variantes de código específicas por plataforma e conteinerização via Docker. + +> [!IMPORTANT] +> **O modo GUI foi descontinuado.** O CraftBot não oferece mais suporte ao modo GUI (automação de desktop). Use os modos Browser, TUI ou CLI em vez disso. + +
+ CraftBot Banner + CraftBot Banner +
+ +--- + + +## 🧰 Começando + +### Pré-requisitos +- Python **3.10+** +- `git` (necessário para clonar o repositório) +- Uma chave de API do provedor LLM escolhido (OpenAI, Gemini ou Anthropic) +- `Node.js` **18+** (opcional — necessário apenas para a interface no navegador) +- `conda` (opcional — se não for encontrado, o instalador pode instalar o Miniconda automaticamente) + +### Qual opção devo usar? + +> **Não sabe qual escolher? Use a Opção 1.** Ela cuida de tudo por você. + +| | Opção 1 — Serviço | Opção 2 — Conda | Opção 3 — Manual | +|---|---|---|---| +| **Para quem** | A maioria dos usuários, iniciantes, testes | Usuários de Conda que querem ambientes isolados | Usuários avançados, Python personalizado, controle total | +| **Gerencia Python/ambiente automaticamente?** | ✅ Automático | ✅ Automático | ❌ Você gerencia | +| **Roda em segundo plano?** | ✅ Sim, como serviço | ❌ Não | ❌ Não | +| **Como começar** | `python craftbot.py install` | `python install.py --conda` | `python install.py` | + +--- + +### ⭐ Opção 1 — Instalação como serviço (Recomendada) + +**Use esta se:** você quer que o CraftBot simplesmente funcione — serviço em segundo plano, início automático no login, atalho na área de trabalho, sem passos manuais. + +O `craftbot.py` cuida de tudo: ambiente Python, dependências, gerenciamento de processos em segundo plano e registro de início automático. + +```bash +# 1. Clone o repositório +git clone https://github.com/CraftOS-dev/CraftBot.git +cd CraftBot + +# 2. Instale, registre o início automático e inicie o CraftBot +python craftbot.py install +``` + +É isso. O terminal se fecha sozinho, o CraftBot roda em segundo plano e o navegador abre automaticamente. Um **atalho na área de trabalho** é criado para que você possa reabrir o navegador a qualquer momento. + +**Gerenciando o serviço após a instalação:** + +```bash +python craftbot.py start # Iniciar o CraftBot em segundo plano +python craftbot.py stop # Parar o CraftBot +python craftbot.py restart # Reiniciar o CraftBot +python craftbot.py status # Verificar se está rodando e se o início automático está ativo +python craftbot.py logs # Ver o log recente +python craftbot.py uninstall # Parar, remover início automático e desinstalar pacotes +``` + +> [!TIP] +> Após `install` ou `start`, um **atalho do CraftBot na área de trabalho** é criado automaticamente. Se você fechar o navegador, basta dar um duplo clique no atalho para reabri-lo. + +--- + +### Opção 2 — Instalação com Conda + +**Use esta se:** você já usa conda e quer o CraftBot em um ambiente conda isolado. + +O `install.py --conda` configura um ambiente conda dedicado `craftbot`. Se o Miniconda não for encontrado no seu sistema, ele será instalado automaticamente. + +```bash +# 1. Clone o repositório +git clone https://github.com/CraftOS-dev/CraftBot.git +cd CraftBot + +# 2. Instale em um ambiente conda +python install.py --conda + +# 3. Execute o CraftBot +conda run -n craftbot python run.py + +# Se o conda não estiver no PATH (somente Windows): +&"$env:USERPROFILE\miniconda3\Scripts\conda.exe" run -n craftbot python run.py +``` + +> [!NOTE] +> Sempre que quiser rodar o CraftBot, use `conda run -n craftbot python run.py`. Não há serviço em segundo plano — você inicia e para manualmente. + +--- + +### Opção 3 — Instalação manual (pip) + +**Use esta se:** você quer controle total sobre seu ambiente Python e prefere gerenciar o CraftBot por conta própria, sem serviço automático ou processo em segundo plano. + +O `install.py` (sem flags) faz uma instalação pip padrão no ambiente Python ativo. Você inicia e para o CraftBot manualmente com `run.py`. + +```bash +# 1. Clone o repositório +git clone https://github.com/CraftOS-dev/CraftBot.git +cd CraftBot + +# 2. Instale as dependências no seu ambiente Python ativo +python install.py + +# 3. Execute o CraftBot +python run.py +``` + +Na primeira execução, você será guiado para configurar suas chaves de API e preferências. + +> [!NOTE] +> Se o Node.js não estiver instalado, o instalador fornecerá instruções passo a passo. Você também pode pular completamente o modo navegador e usar o modo TUI — sem Node.js: `python run.py --tui` + +### O que você pode fazer logo de cara? +- Conversar com o agente de forma natural +- Pedir que ele execute tarefas complexas de várias etapas +- Digitar `/help` para ver os comandos disponíveis +- Conectar-se ao Google, Slack, Notion e muito mais + +### 🖥️ Modos de interface + +
+ CraftOS Banner +
+ +O CraftBot oferece vários modos de UI. Escolha conforme sua preferência: + +| Modo | Comando | Requisitos | Indicado para | +|------|---------|--------------|----------| +| **Browser** | `python run.py` | Node.js 18+ | Interface web moderna, a mais fácil de usar | +| **TUI** | `python run.py --tui` | Nenhum | UI em terminal, sem dependências | +| **CLI** | `python run.py --cli` | Nenhum | Linha de comando, leve | + +O **modo Browser** é o padrão e recomendado. Se não tiver o Node.js, o instalador fornecerá instruções de instalação, ou você pode usar o **modo TUI**. + +--- + +## 🧬 Living UI + +**Living UI é um sistema/app/dashboard que evolui com suas necessidades.** + +Precisa de um quadro kanban com um copiloto de IA embutido? Um CRM personalizado +moldado exatamente para o seu fluxo de trabalho? Um dashboard corporativo que o +CraftBot possa ler e operar por você? Coloque-o no ar como uma Living UI — ela +roda junto ao CraftBot e cresce conforme suas necessidades mudam. + +
+ Living UI example +
+ +### Três formas de criar uma Living UI + +1. **Construir do zero.** Descreva o que você quer em linguagem natural. O CraftBot + monta o modelo de dados, a API do backend e a UI em React, e itera com você + por meio de um processo de design estruturado. + +
+ Building a Living UI from scratch +
+ +2. **Instalar do marketplace.** Explore Living UIs criadas pela comunidade em [living-ui-marketplace](https://github.com/CraftOS-dev/living-ui-marketplace). + +
+ Living UI marketplace +
+ +3. **Importar um projeto existente.** Aponte o CraftBot para um código-fonte ou + repositório do GitHub em Go, Node.js, Python, Rust ou estático. Ele detecta o + runtime, configura os health checks e o empacota como uma Living UI. + +
+ Importing an existing project as a Living UI +
+ +### Continua evoluindo com o CraftBot dentro do loop + +Uma Living UI nunca está "pronta". Peça ao agente para adicionar funcionalidades, +redesenhar uma visualização ou conectá-la a novos dados conforme suas necessidades +crescem. + +O CraftBot está integrado a cada Living UI e é **consciente do seu estado**: +ele pode ler o DOM atual e os valores de formulário, consultar dados do app +pela API REST, e disparar ações em seu nome. + +--- + +## 🧩 Visão geral da arquitetura + +| Componente | Descrição | +|-----------|-------------| +| **Agent Base** | Camada central de orquestração que gerencia o ciclo de vida das tarefas, coordena os componentes e cuida do loop principal do agente. | +| **LLM Interface** | Interface unificada com suporte a vários provedores de LLM (OpenAI, Gemini, Anthropic, BytePlus, Ollama). | +| **Context Engine** | Gera prompts otimizados com suporte a KV-cache. | +| **Action Manager** | Recupera e executa ações da biblioteca. Ações personalizadas são fáceis de estender. | +| **Action Router** | Seleciona de forma inteligente a ação que melhor corresponde aos requisitos da tarefa e resolve parâmetros de entrada via LLM quando necessário. | +| **Event Stream** | Sistema de publicação de eventos em tempo real para acompanhar o progresso das tarefas, atualizar a UI e monitorar a execução. | +| **Memory Manager** | Memória semântica baseada em RAG usando o ChromaDB. Lida com chunking, embeddings, recuperação e atualizações incrementais. | +| **State Manager** | Gerenciamento global de estado para rastrear contexto de execução do agente, histórico de conversas e configurações de runtime. | +| **Task Manager** | Gerencia definições de tarefas, habilita modos simples e complexos, cria to-dos e rastreia workflows multi-etapa. | +| **Skill Manager** | Carrega e injeta skills plugáveis no contexto do agente. | +| **MCP Adapter** | Integração com o Model Context Protocol que converte ferramentas MCP em ações nativas. | +| **TUI Interface** | Interface de usuário no terminal construída com o framework Textual para operação interativa por linha de comando. | + +--- + +## 🔜 Roadmap + +- [X] **Módulo de memória** — Concluído. +- [ ] **Integração com ferramentas externas** — Ainda adicionando mais! +- [X] **Camada MCP** — Concluída. +- [X] **Camada de Skills** — Concluída. +- [ ] **Comportamento proativo** — Em andamento + +--- + +## 📋 Referência de comandos + +### install.py + +| Flag | Descrição | +|------|-------------| +| `--conda` | Usa ambiente conda (opcional) | + +### run.py + +| Flag | Descrição | +|------|-------------| +| (nenhum) | Executa no modo **Browser** (recomendado, requer Node.js) | +| `--tui` | Executa no modo **Terminal UI** (sem dependências) | +| `--cli` | Executa no modo **CLI** (leve) | + +### craftbot.py + +| Comando | Descrição | +|---------|-------------| +| `install` | Instala deps, registra auto-start e inicia o CraftBot | +| `start` | Inicia o CraftBot em segundo plano | +| `stop` | Para o CraftBot | +| `restart` | Para e inicia novamente | +| `status` | Mostra o status de execução e do auto-start | +| `logs [-n N]` | Mostra as últimas N linhas do log (padrão: 50) | +| `uninstall` | Remove o registro do auto-start | + +**Exemplos de instalação:** +```bash +# Instalação simples via pip (sem conda) +python install.py + +# Com ambiente conda (recomendado para usuários de conda) +python install.py --conda +``` + +**Executando o CraftBot:** + +```powershell +# Modo Browser (padrão, requer Node.js) +python run.py + +# Modo TUI (não requer Node.js) +python run.py --tui + +# Modo CLI (leve) +python run.py --cli + +# Com ambiente conda +conda run -n craftbot python run.py + +# Ou usando caminho completo se o conda não estiver no PATH +&"$env:USERPROFILE\miniconda3\Scripts\conda.exe" run -n craftbot python run.py +``` + +**Linux/macOS (Bash):** +```bash +# Modo Browser (padrão, requer Node.js) +python run.py + +# Modo TUI (não requer Node.js) +python run.py --tui + +# Modo CLI (leve) +python run.py --cli + +# Com ambiente conda +conda run -n craftbot python run.py +``` + +### 🔧 Serviço em segundo plano (recomendado) + +Execute o CraftBot como um serviço em segundo plano para que ele continue rodando mesmo após fechar o terminal. Um atalho na área de trabalho é criado automaticamente, permitindo reabrir o navegador a qualquer momento. + +```bash +# Instala dependências, registra auto-start no login e inicia o CraftBot +python craftbot.py install +``` + +É isso. O terminal se fecha sozinho, o CraftBot roda em segundo plano e o navegador abre automaticamente. + +```bash +# Outros comandos do serviço: +python craftbot.py start # Inicia o CraftBot em segundo plano +python craftbot.py status # Verifica se está em execução +python craftbot.py stop # Para o CraftBot +python craftbot.py restart # Reinicia o CraftBot +python craftbot.py logs # Mostra logs recentes +``` + +| Comando | Descrição | +|---------|-------------| +| `python craftbot.py install` | Instala dependências, registra auto-start no login, inicia o CraftBot, abre o navegador e fecha o terminal automaticamente | +| `python craftbot.py start` | Inicia o CraftBot em segundo plano — reinicia automaticamente se já estiver rodando (o terminal se fecha sozinho) | +| `python craftbot.py stop` | Para o CraftBot | +| `python craftbot.py restart` | Para e inicia o CraftBot | +| `python craftbot.py status` | Verifica se o CraftBot está rodando e se o auto-start está habilitado | +| `python craftbot.py logs` | Mostra a saída recente do log (`-n 100` para mais linhas) | +| `python craftbot.py uninstall` | Para o CraftBot, remove o registro de auto-start, desinstala pacotes pip e limpa o cache do pip | + +> [!TIP] +> Após `craftbot.py start` ou `craftbot.py install`, um **atalho do CraftBot na área de trabalho** é criado automaticamente. Se você fechar o navegador por acidente, basta clicar duas vezes no atalho para reabri-lo. + +> [!NOTE] +> **Instalação:** O instalador agora fornece orientações claras se faltarem dependências. Se o Node.js não for encontrado, você será orientado a instalá-lo ou poderá alternar para o modo TUI. A instalação detecta automaticamente a disponibilidade de GPU e recorre ao modo somente CPU quando necessário. + +> [!TIP] +> **Configuração inicial:** O CraftBot vai guiá-lo por um onboarding para configurar chaves de API, o nome do agente, MCPs e Skills. + +> [!NOTE] +> **Playwright Chromium:** Opcional para a integração com o WhatsApp Web. Se a instalação falhar, o agente continuará funcionando normalmente para outras tarefas. Instale manualmente depois com: `playwright install chromium` + +--- + +## 🔧 Solução de problemas e dúvidas comuns + +### Node.js ausente (para o modo navegador) +Se aparecer **"npm not found in PATH"** ao executar `python run.py`: +1. Baixe em [nodejs.org](https://nodejs.org/) (escolha a versão LTS) +2. Instale e reinicie o terminal +3. Execute `python run.py` novamente + +**Alternativa:** Use o modo TUI (sem necessidade de Node.js): +```bash +python run.py --tui +``` + +### A instalação falha nas dependências +O instalador agora fornece mensagens de erro detalhadas com soluções. Se a instalação falhar: +- **Verifique a versão do Python:** tenha o Python 3.10+ (`python --version`) +- **Verifique sua internet:** as dependências são baixadas durante a instalação +- **Limpe o cache do pip:** `pip install --upgrade pip` e tente de novo + +### Problemas com a instalação do Playwright +A instalação do Playwright Chromium é opcional. Se falhar: +- O agente **continuará funcionando** para outras tarefas +- Você pode pular ou instalar depois: `playwright install chromium` +- Só é necessário para a integração com o WhatsApp Web + +Para uma solução detalhada, veja [INSTALLATION_FIX.md](INSTALLATION_FIX.md). + +--- + +## 🔌 Integração com serviços externos + +O agente pode se conectar a diversos serviços via OAuth. As builds de release vêm com credenciais embutidas, mas você também pode usar as suas. + +### Início rápido + +Para builds de release com credenciais embutidas: +``` +/google login # Conectar ao Google Workspace +/zoom login # Conectar ao Zoom +/slack invite # Conectar ao Slack +/notion invite # Conectar ao Notion +/linkedin login # Conectar ao LinkedIn +``` + +### Detalhes do serviço + +| Serviço | Tipo de auth | Comando | Requer segredo? | +|---------|-----------|---------|------------------| +| Google | PKCE | `/google login` | Não (PKCE) | +| Zoom | PKCE | `/zoom login` | Não (PKCE) | +| Slack | OAuth 2.0 | `/slack invite` | Sim | +| Notion | OAuth 2.0 | `/notion invite` | Sim | +| LinkedIn | OAuth 2.0 | `/linkedin login` | Sim | + +### Usando suas próprias credenciais + +Se preferir usar suas próprias credenciais OAuth, adicione-as ao arquivo `.env`: + +#### Google (PKCE — apenas Client ID) +```bash +GOOGLE_CLIENT_ID=your-client-id.apps.googleusercontent.com +``` +1. Acesse o [Google Cloud Console](https://console.cloud.google.com/) +2. Habilite as APIs de Gmail, Calendar, Drive e People +3. Crie credenciais OAuth do tipo **Desktop app** +4. Copie o Client ID (o secret não é necessário com PKCE) + +#### Zoom (PKCE — apenas Client ID) +```bash +ZOOM_CLIENT_ID=your-zoom-client-id +``` +1. Acesse o [Zoom Marketplace](https://marketplace.zoom.us/) +2. Crie um app OAuth +3. Copie o Client ID + +#### Slack (requer ambos) +```bash +SLACK_SHARED_CLIENT_ID=your-slack-client-id +SLACK_SHARED_CLIENT_SECRET=your-slack-client-secret +``` +1. Acesse o [Slack API](https://api.slack.com/apps) +2. Crie um novo app +3. Adicione os escopos OAuth: `chat:write`, `channels:read`, `users:read` etc. +4. Copie o Client ID e o Client Secret + +#### Notion (requer ambos) +```bash +NOTION_SHARED_CLIENT_ID=your-notion-client-id +NOTION_SHARED_CLIENT_SECRET=your-notion-client-secret +``` +1. Acesse o [Notion Developers](https://developers.notion.com/) +2. Crie uma nova integração (Public integration) +3. Copie o OAuth Client ID e o Secret + +#### LinkedIn (requer ambos) +```bash +LINKEDIN_CLIENT_ID=your-linkedin-client-id +LINKEDIN_CLIENT_SECRET=your-linkedin-client-secret +``` +1. Acesse o [LinkedIn Developers](https://developer.linkedin.com/) +2. Crie um app +3. Adicione os escopos OAuth 2.0 +4. Copie o Client ID e o Client Secret + +--- +## 🐳 Executar com contêiner + +A raiz do repositório inclui uma configuração Docker com Python 3.10, pacotes de sistema essenciais (incluindo Tesseract para OCR) e todas as dependências Python definidas em `environment.yml`/`requirements.txt`, para que o agente execute de forma consistente em ambientes isolados. + +Abaixo estão as instruções de configuração para rodar nosso agente em contêiner. + +### Construir a imagem + +Na raiz do repositório: + +```bash +docker build -t craftbot . +``` + +### Executar o contêiner + +A imagem está configurada para iniciar o agente com `python -m app.main` por padrão. Para executar interativamente: + +```bash +docker run --rm -it craftbot +``` + +Se precisar fornecer variáveis de ambiente, passe um arquivo env (por exemplo, baseado em `.env.example`): + +```bash +docker run --rm -it --env-file .env craftbot +``` + +Monte quaisquer diretórios que devam persistir fora do contêiner (como pastas de dados ou cache) usando `-v`, e ajuste portas e outras flags conforme necessário para sua implantação. A imagem traz dependências de sistema para OCR (`tesseract`) e clientes HTTP comuns, para que o agente trabalhe com arquivos e APIs de rede dentro do contêiner. + +Por padrão, a imagem usa Python 3.10 e empacota as dependências Python de `environment.yml`/`requirements.txt`, portanto `python -m app.main` funciona de imediato. + +--- + +## 🤝 Como contribuir + +PRs são bem-vindos! Consulte [CONTRIBUTING.md](CONTRIBUTING.md) para o fluxo (fork → branch a partir de `dev` → PR). Todos os pull requests passam automaticamente por lint + smoke-test no CI. Para dúvidas ou uma conversa mais rápida, entre no nosso [Discord](https://discord.gg/ZN9YHc37HG) ou envie e-mail para thamyikfoong(at)craftos.net. + +## 🧾 Licença + +Este projeto está licenciado sob a [Licença MIT](LICENSE). Você é livre para usar, hospedar e monetizar este projeto (é necessário dar crédito ao projeto em caso de distribuição e monetização). + +--- + +## ⭐ Agradecimentos + +Desenvolvido e mantido por [CraftOS](https://craftos.net/) e pelos contribuidores [@zfoong](https://github.com/zfoong) e [@ahmad-ajmal](https://github.com/ahmad-ajmal). +Se o **CraftBot** é útil para você, por favor dê uma ⭐ no repositório e compartilhe com outras pessoas! + +--- + +## Star History + + + + + + Star History Chart + + diff --git a/README.zh-TW.md b/README.zh-TW.md new file mode 100644 index 00000000..f769ad7d --- /dev/null +++ b/README.zh-TW.md @@ -0,0 +1,554 @@ + +
+ CraftBot Banner +
+
+ +
+ Windows + macOS + Linux + + + GitHub Repo stars + + + License + + + Discord + +
+
+ +[![SPONSORED BY E2B FOR STARTUPS](https://img.shields.io/badge/SPONSORED%20BY-E2B%20FOR%20STARTUPS-ff8800?style=for-the-badge)](https://e2b.dev/startups) + +CraftBot - Self-hosted proactive AI assistant that lives locally | Product Hunt +
+ +

+ English | 日本語 | 简体中文 | 한국어 | Español | Português | Français | Deutsch +

+ +## 🚀 概覽 +

+CraftBot 是你的個人 AI 助理,它駐留在你的裝置中,全天候為你服務。 +

+ +它會自主解讀任務、規劃行動並執行它們,協助你達成目標。 +它會學習你的偏好與目標,主動協助你規劃並展開任務,幫助你實現人生目標。 +支援 MCP、技能(Skills)以及外部應用整合。 + +CraftBot 正在等待你的指令,立刻建立屬於你自己的 CraftBot 吧。 + +
+ CraftBot Overview +
+ +--- + +## ✨ 功能特色 + +- **自帶金鑰(BYOK)** — 靈活的 LLM 供應商系統,支援 OpenAI、Google Gemini、Anthropic Claude、BytePlus 及本地 Ollama 模型,可輕鬆切換。 +- **記憶系統** — 每天午夜時分提煉並整合當日所發生的事件。 +- **主動式代理人** — 學習你的偏好、習慣與人生目標,接著進行規劃並(在取得同意後)主動啟動任務,協助你在生活中不斷進步。 +- **Living UI** — 在 CraftBot 中建立、匯入或演進自訂應用。代理人持續感知 UI 的狀態,並可直接讀取、寫入和操作其資料。 +- **外部工具整合** — 連接 Google Workspace、Slack、Notion、Zoom、LinkedIn、Discord 及 Telegram(更多服務陸續推出!),內建憑證與 OAuth 支援。 +- **MCP** — 整合 Model Context Protocol,以外部工具與服務擴充代理人的能力。 +- **技能(Skills)** — 可擴充的技能框架,內建任務規劃、研究、程式碼審查、Git 操作等多種技能。 +- **跨平台** — 完整支援 Windows、macOS 與 Linux,並提供對應的平台程式碼與 Docker 容器化。 + +> [!IMPORTANT] +> **GUI 模式已停用。** CraftBot 不再支援 GUI(桌面自動化)模式。請改用 Browser、TUI 或 CLI 模式。 + +
+ CraftBot Banner + CraftBot Banner +
+ +--- + + +## 🧰 快速開始 + +### 先決條件 +- Python **3.10+** +- `git`(複製儲存庫時需要) +- 所選 LLM 供應商的 API 金鑰(OpenAI、Gemini 或 Anthropic) +- `Node.js` **18+**(選用——僅於使用瀏覽器介面時需要) +- `conda`(選用——若未安裝,安裝程式可代為安裝 Miniconda) + +### 我該選哪種方式? + +> **不確定?選方案一。** 它會幫你搞定所有事。 + +| | 方案一 — 服務安裝 | 方案二 — Conda 安裝 | 方案三 — 手動安裝 | +|---|---|---|---| +| **適合誰** | 大多數使用者、新手、測試 | 想要獨立環境的 Conda 使用者 | 進階使用者、自訂 Python、完全控制 | +| **自動管理 Python 環境?** | ✅ 自動 | ✅ 自動 | ❌ 自己管理 | +| **背景執行?** | ✅ 是,作為服務 | ❌ 否 | ❌ 否 | +| **啟動方式** | `python craftbot.py install` | `python install.py --conda` | `python install.py` | + +--- + +### ⭐ 方案一 — 服務安裝(推薦) + +**適合你,如果:** 你希望 CraftBot 開箱即用——背景服務、開機自啟、桌面捷徑,無需手動操作。 + +`craftbot.py` 全程自動處理:Python 環境、相依套件安裝、背景程序管理和自啟註冊。 + +```bash +# 1. 複製儲存庫 +git clone https://github.com/CraftOS-dev/CraftBot.git +cd CraftBot + +# 2. 安裝、註冊自啟並啟動 CraftBot +python craftbot.py install +``` + +這樣就完成了。終端機自動關閉,CraftBot 在背景執行,瀏覽器自動開啟。同時會建立**桌面捷徑**,隨時可重新開啟瀏覽器。 + +**安裝後的服務管理指令:** + +```bash +python craftbot.py start # 在背景啟動 CraftBot +python craftbot.py stop # 停止 CraftBot +python craftbot.py restart # 重啟 CraftBot +python craftbot.py status # 檢查是否執行中,自啟是否已啟用 +python craftbot.py logs # 查看最近記錄 +python craftbot.py uninstall # 停止、移除自啟並卸載套件 +``` + +> [!TIP] +> 執行 `install` 或 `start` 後,系統會自動建立 **CraftBot 桌面捷徑**。如果關閉了瀏覽器,雙擊捷徑即可重新開啟。 + +--- + +### 方案二 — Conda 安裝 + +**適合你,如果:** 你已在使用 conda,希望 CraftBot 在獨立的 conda 環境中執行。 + +`install.py --conda` 會建立專用的 `craftbot` conda 環境。若系統中未找到 Miniconda,會自動安裝。 + +```bash +# 1. 複製儲存庫 +git clone https://github.com/CraftOS-dev/CraftBot.git +cd CraftBot + +# 2. 安裝到 conda 環境 +python install.py --conda + +# 3. 執行 CraftBot +conda run -n craftbot python run.py + +# 若 conda 不在 PATH(僅 Windows): +&"$env:USERPROFILE\miniconda3\Scripts\conda.exe" run -n craftbot python run.py +``` + +> [!NOTE] +> 每次要執行 CraftBot 時,請使用 `conda run -n craftbot python run.py`。此方式沒有背景服務——由你手動啟停。 + +--- + +### 方案三 — 手動安裝(pip) + +**適合你,如果:** 你想完全掌控 Python 環境,不需要任何自動服務或背景程序,自己管理 CraftBot。 + +`install.py`(不帶旗標)會對目前啟用的 Python 環境執行標準 pip 安裝。透過 `run.py` 手動啟停 CraftBot。 + +```bash +# 1. 複製儲存庫 +git clone https://github.com/CraftOS-dev/CraftBot.git +cd CraftBot + +# 2. 在目前 Python 環境中安裝相依套件 +python install.py + +# 3. 執行 CraftBot +python run.py +``` + +首次執行時會引導你完成 API 金鑰設定與偏好設定。 + +> [!NOTE] +> 若尚未安裝 Node.js,安裝程式會提供逐步指引。你也可以完全略過瀏覽器模式,直接使用 TUI 模式——無需 Node.js:`python run.py --tui` + +### 立即能做什麼? +- 用自然語言與代理人對話 +- 請它執行複雜的多步驟任務 +- 輸入 `/help` 查看可用指令 +- 連接 Google、Slack、Notion 等服務 + +### 🖥️ 介面模式 + +
+ CraftOS Banner +
+ +CraftBot 支援多種 UI 模式,可依個人偏好選擇: + +| 模式 | 指令 | 需求 | 適用情境 | +|------|---------|--------------|----------| +| **Browser** | `python run.py` | Node.js 18+ | 現代化網頁介面,最易使用 | +| **TUI** | `python run.py --tui` | 無 | 終端機 UI,無須額外相依套件 | +| **CLI** | `python run.py --cli` | 無 | 命令列,輕量化 | + +**Browser 模式**為預設與建議選項。若沒有 Node.js,安裝程式會提供安裝指引,或你可改用 **TUI 模式**。 + +--- + +## 🧬 Living UI + +**Living UI 是隨你需求演進的系統/應用/儀表板。** + +需要一個內建 AI 副駕駛的看板?量身打造符合你工作流程的 CRM? +一個 CraftBot 能讀取並驅動的公司儀表板? +將它作為 Living UI 啟動——它與 CraftBot 並行運作,並隨著你的需求變化而成長。 + +
+ Living UI example +
+ +### 建立 Living UI 的三種方式 + +1. **從零開始建立。** 用自然語言描述你想要的。CraftBot 會搭建 + 資料模型、後端 API 與 React UI,並透過結構化的設計流程 + 與你一起迭代。 + +
+ Building a Living UI from scratch +
+ +2. **從市集安裝。** 從 [living-ui-marketplace](https://github.com/CraftOS-dev/living-ui-marketplace) 瀏覽社群打造的 Living UI。 + +
+ Living UI marketplace +
+ +3. **匯入既有專案。** 將 CraftBot 指向 Go、Node.js、Python、Rust 或靜態原始碼 + 或 GitHub 儲存庫。它會偵測執行環境、設定健康檢查,並將其包裝為 Living UI。 + +
+ Importing an existing project as a Living UI +
+ +### 與 CraftBot 身處其中,持續演進 + +Living UI 永遠不會「完成」。隨著你的需求成長,你可以請代理人加入功能、 +重新設計畫面,或將它接上新的資料來源。 + +CraftBot 嵌入在每個 Living UI 中,並**感知其狀態**: +它可以讀取當前的 DOM 與表單值,透過 REST API 查詢應用資料, +並代表你觸發操作。 + +--- + +## 🧩 架構概覽 + +| 元件 | 說明 | +|-----------|-------------| +| **Agent Base** | 負責管理任務生命週期、協調各元件並處理主要代理人迴圈的核心編排層。 | +| **LLM Interface** | 支援多家 LLM 供應商(OpenAI、Gemini、Anthropic、BytePlus、Ollama)的統一介面。 | +| **Context Engine** | 產生最佳化的 Prompt,支援 KV-Cache。 | +| **Action Manager** | 從動作庫中擷取並執行動作,方便擴充自訂動作。 | +| **Action Router** | 依任務需求智慧挑選最合適的動作,並在需要時透過 LLM 解析輸入參數。 | +| **Event Stream** | 即時事件發佈系統,用於任務進度追蹤、UI 更新與執行監控。 | +| **Memory Manager** | 以 ChromaDB 為基礎的 RAG 語意記憶,處理記憶分塊、嵌入、檢索與增量更新。 | +| **State Manager** | 全域狀態管理,追蹤代理人執行脈絡、對話歷史與執行期設定。 | +| **Task Manager** | 管理任務定義,支援簡單與複雜任務模式、待辦清單建立,以及多步驟流程追蹤。 | +| **Skill Manager** | 載入並將可插拔技能注入到代理人情境中。 | +| **MCP Adapter** | Model Context Protocol 整合,將 MCP 工具轉換為原生動作。 | +| **TUI Interface** | 以 Textual 框架打造的終端機使用者介面,提供互動式命令列操作。 | + +--- + +## 🔜 藍圖 + +- [X] **記憶模組** — 完成。 +- [ ] **外部工具整合** — 仍在持續新增! +- [X] **MCP 層** — 完成。 +- [X] **技能層** — 完成。 +- [ ] **主動式行為** — 進行中 + +--- + +## 📋 指令參考 + +### install.py + +| 旗標 | 說明 | +|------|-------------| +| `--conda` | 使用 conda 環境(選用) | + +### run.py + +| 旗標 | 說明 | +|------|-------------| +| (無) | 以 **Browser** 模式執行(建議,需 Node.js) | +| `--tui` | 以 **Terminal UI** 模式執行(無需額外相依) | +| `--cli` | 以 **CLI** 模式執行(輕量) | + +### craftbot.py + +| 指令 | 說明 | +|---------|-------------| +| `install` | 安裝相依套件、註冊開機自動啟動,並啟動 CraftBot | +| `start` | 在背景啟動 CraftBot | +| `stop` | 停止 CraftBot | +| `restart` | 停止後重新啟動 | +| `status` | 顯示執行狀態與自動啟動狀態 | +| `logs [-n N]` | 顯示最後 N 行記錄(預設 50) | +| `uninstall` | 移除自動啟動註冊 | + +**安裝範例:** +```bash +# 單純使用 pip 安裝(不使用 conda) +python install.py + +# 使用 conda 環境(建議給 conda 使用者) +python install.py --conda +``` + +**執行 CraftBot:** + +```powershell +# Browser 模式(預設,需 Node.js) +python run.py + +# TUI 模式(無需 Node.js) +python run.py --tui + +# CLI 模式(輕量) +python run.py --cli + +# 使用 conda 環境 +conda run -n craftbot python run.py + +# 若 conda 不在 PATH,使用完整路徑 +&"$env:USERPROFILE\miniconda3\Scripts\conda.exe" run -n craftbot python run.py +``` + +**Linux/macOS(Bash):** +```bash +# Browser 模式(預設,需 Node.js) +python run.py + +# TUI 模式(無需 Node.js) +python run.py --tui + +# CLI 模式(輕量) +python run.py --cli + +# 使用 conda 環境 +conda run -n craftbot python run.py +``` + +### 🔧 背景服務(建議) + +將 CraftBot 當成背景服務執行,即使關閉終端機仍能持續運作。系統會自動建立桌面捷徑,讓你隨時可重新開啟瀏覽器。 + +```bash +# 安裝相依套件、註冊登入時自動啟動並啟動 CraftBot +python craftbot.py install +``` + +這樣就完成了。終端機會自動關閉,CraftBot 在背景執行,瀏覽器也會自動開啟。 + +```bash +# 其他服務指令: +python craftbot.py start # 在背景啟動 CraftBot +python craftbot.py status # 檢查是否正在執行 +python craftbot.py stop # 停止 CraftBot +python craftbot.py restart # 重新啟動 CraftBot +python craftbot.py logs # 檢視最近的記錄 +``` + +| 指令 | 說明 | +|---------|-------------| +| `python craftbot.py install` | 安裝相依套件、註冊登入時自動啟動、啟動 CraftBot、開啟瀏覽器並自動關閉終端機 | +| `python craftbot.py start` | 在背景啟動 CraftBot——若已在執行,會自動重啟(終端機自動關閉) | +| `python craftbot.py stop` | 停止 CraftBot | +| `python craftbot.py restart` | 停止並重新啟動 CraftBot | +| `python craftbot.py status` | 檢查 CraftBot 是否執行中,以及自動啟動是否啟用 | +| `python craftbot.py logs` | 顯示最近的記錄(使用 `-n 100` 顯示更多行) | +| `python craftbot.py uninstall` | 停止 CraftBot、移除自動啟動註冊、解除 pip 套件並清除 pip 快取 | + +> [!TIP] +> 執行 `craftbot.py start` 或 `craftbot.py install` 後,會自動建立 **CraftBot 桌面捷徑**。若不小心關閉了瀏覽器,雙擊捷徑即可重新開啟。 + +> [!NOTE] +> **安裝:** 若相依套件缺失,安裝程式會提供清楚的指引。若找不到 Node.js,會提示你安裝或切換至 TUI 模式。安裝程式會自動偵測 GPU 是否可用,必要時會自動回退至 CPU 模式。 + +> [!TIP] +> **首次設定:** CraftBot 會引導你完成初始化流程,包含設定 API 金鑰、代理人名稱、MCP 與技能。 + +> [!NOTE] +> **Playwright Chromium:** 整合 WhatsApp Web 時選用。若安裝失敗,代理人仍可正常執行其他任務。稍後可以手動安裝:`playwright install chromium`。 + +--- + +## 🔧 疑難排解與常見問題 + +### 缺少 Node.js(Browser 模式) +若執行 `python run.py` 時看到 **"npm not found in PATH"**: +1. 從 [nodejs.org](https://nodejs.org/) 下載(建議 LTS 版本) +2. 安裝完成後重新啟動終端機 +3. 再次執行 `python run.py` + +**替代方案:** 改用 TUI 模式(不需 Node.js): +```bash +python run.py --tui +``` + +### 相依套件安裝失敗 +安裝程式現在會提供詳細錯誤訊息及解決方案。若安裝失敗: +- **確認 Python 版本:** 確保安裝 Python 3.10+(`python --version`) +- **檢查網路連線:** 安裝過程需下載相依套件 +- **清除 pip 快取:** 執行 `pip install --upgrade pip` 後再試 + +### Playwright 安裝問題 +Playwright chromium 為選用安裝,若失敗: +- 代理人的其他功能**仍可正常運作** +- 可先跳過,日後再安裝:`playwright install chromium` +- 僅於整合 WhatsApp Web 時需要 + +更多疑難排解請參閱 [INSTALLATION_FIX.md](INSTALLATION_FIX.md)。 + +--- + +## 🔌 外部服務整合 + +代理人可透過 OAuth 連接多種服務。Release 版本內建憑證,但你也可以使用自己的憑證。 + +### 快速上手 + +若使用內建憑證的 Release 版本: +``` +/google login # 連接 Google Workspace +/zoom login # 連接 Zoom +/slack invite # 連接 Slack +/notion invite # 連接 Notion +/linkedin login # 連接 LinkedIn +``` + +### 服務細節 + +| 服務 | 驗證方式 | 指令 | 是否需要密鑰? | +|---------|-----------|---------|------------------| +| Google | PKCE | `/google login` | 否(PKCE) | +| Zoom | PKCE | `/zoom login` | 否(PKCE) | +| Slack | OAuth 2.0 | `/slack invite` | 是 | +| Notion | OAuth 2.0 | `/notion invite` | 是 | +| LinkedIn | OAuth 2.0 | `/linkedin login` | 是 | + +### 使用自己的憑證 + +若希望使用自己的 OAuth 憑證,請將其加入 `.env` 檔: + +#### Google(PKCE,只需 Client ID) +```bash +GOOGLE_CLIENT_ID=your-client-id.apps.googleusercontent.com +``` +1. 前往 [Google Cloud Console](https://console.cloud.google.com/) +2. 啟用 Gmail、Calendar、Drive 與 People API +3. 建立 OAuth 憑證,類型選 **Desktop app** +4. 複製 Client ID(PKCE 不需 secret) + +#### Zoom(PKCE,只需 Client ID) +```bash +ZOOM_CLIENT_ID=your-zoom-client-id +``` +1. 前往 [Zoom Marketplace](https://marketplace.zoom.us/) +2. 建立 OAuth 應用程式 +3. 複製 Client ID + +#### Slack(兩者皆需) +```bash +SLACK_SHARED_CLIENT_ID=your-slack-client-id +SLACK_SHARED_CLIENT_SECRET=your-slack-client-secret +``` +1. 前往 [Slack API](https://api.slack.com/apps) +2. 建立新應用程式 +3. 新增 OAuth 範圍:`chat:write`、`channels:read`、`users:read` 等 +4. 複製 Client ID 與 Client Secret + +#### Notion(兩者皆需) +```bash +NOTION_SHARED_CLIENT_ID=your-notion-client-id +NOTION_SHARED_CLIENT_SECRET=your-notion-client-secret +``` +1. 前往 [Notion Developers](https://developers.notion.com/) +2. 建立新的整合(Public integration) +3. 複製 OAuth Client ID 與 Secret + +#### LinkedIn(兩者皆需) +```bash +LINKEDIN_CLIENT_ID=your-linkedin-client-id +LINKEDIN_CLIENT_SECRET=your-linkedin-client-secret +``` +1. 前往 [LinkedIn Developers](https://developer.linkedin.com/) +2. 建立應用程式 +3. 新增 OAuth 2.0 範圍 +4. 複製 Client ID 與 Client Secret + +--- +## 🐳 使用容器執行 + +儲存庫根目錄提供 Docker 設定,內含 Python 3.10、OCR 用的 Tesseract 等關鍵系統套件,以及 `environment.yml`/`requirements.txt` 中定義的所有 Python 相依套件,讓代理人可在隔離環境中穩定執行。 + +以下是透過容器執行代理人的設定說明。 + +### 建置映像檔 + +於儲存庫根目錄執行: + +```bash +docker build -t craftbot . +``` + +### 執行容器 + +映像檔預設會以 `python -m app.main` 啟動代理人。若要以互動方式執行: + +```bash +docker run --rm -it craftbot +``` + +若需傳入環境變數,可透過 env 檔(例如以 `.env.example` 為基礎): + +```bash +docker run --rm -it --env-file .env craftbot +``` + +可使用 `-v` 掛載需要保存在容器外的目錄(如資料或快取資料夾),並依部署需求調整連接埠或其他旗標。映像檔內建 OCR(`tesseract`)以及常見 HTTP 用戶端等系統相依,能讓代理人在容器中處理檔案與網路 API。 + +映像檔預設使用 Python 3.10,並內建 `environment.yml`/`requirements.txt` 中的 Python 相依套件,因此 `python -m app.main` 可直接運作。 + +--- + +## 🤝 如何貢獻 + +歡迎提交 PR!詳細流程(fork → 由 `dev` 建分支 → 提 PR)請見 [CONTRIBUTING.md](CONTRIBUTING.md)。所有 Pull Request 都會自動執行 lint 與 smoke-test CI。如果你有任何疑問,或想更快速地溝通,歡迎加入 [Discord](https://discord.gg/ZN9YHc37HG) 或寄信至 thamyikfoong(at)craftos.net。 + +## 🧾 授權條款 + +本專案採用 [MIT 授權條款](LICENSE)。你可以自由使用、部署並商業化本專案(如需散佈或商業化,請註明出處)。 + +--- + +## ⭐ 致謝 + +本專案由 [CraftOS](https://craftos.net/) 與貢獻者 [@zfoong](https://github.com/zfoong)、[@ahmad-ajmal](https://github.com/ahmad-ajmal) 共同開發與維護。 +如果你覺得 **CraftBot** 好用,歡迎為儲存庫按下 ⭐ 並分享給更多人! + +--- + +## Star History + + + + + + Star History Chart + + diff --git a/agent_core/__init__.py b/agent_core/__init__.py index d0757090..b7badbdc 100644 --- a/agent_core/__init__.py +++ b/agent_core/__init__.py @@ -161,6 +161,7 @@ ) from agent_core.core.impl.llm import LLMCallType from agent_core.core.impl.trigger import TriggerQueue +from agent_core.core.impl.workflow_lock import WorkflowLockManager from agent_core.core.impl.event_stream import ( EventStream, EventStreamManager, @@ -369,6 +370,7 @@ "create_memory_processing_task", "LLMCallType", "TriggerQueue", + "WorkflowLockManager", "EventStream", "EventStreamManager", # Prompts - Registry diff --git a/agent_core/core/impl/action/manager.py b/agent_core/core/impl/action/manager.py index 3693982a..d17ad889 100644 --- a/agent_core/core/impl/action/manager.py +++ b/agent_core/core/impl/action/manager.py @@ -30,6 +30,49 @@ from agent_core.core.impl.action.executor import ActionExecutor from agent_core.utils.logger import logger +# ============================================================================ +# Python 3.14 + nest_asyncio 1.6.0 compatibility shim for asyncio.wait_for. +# On 3.11+, asyncio.wait_for uses `async with asyncio.timeout(...)`, which +# calls asyncio.current_task() in __aenter__. nest_asyncio.apply() (below) +# patches the event loop's _run_once but does not propagate the task context +# variable when re-entering the loop, so current_task() returns None and +# wait_for raises "RuntimeError: Timeout should be used inside a task". +# Replace wait_for with an asyncio.wait-based equivalent that doesn't depend +# on current_task(). Installed just before nest_asyncio.apply() so every +# subsequent asyncio.wait_for caller (MCP stdio, action executor, etc.) picks +# it up. Safe to remove once nest_asyncio ships a 3.14-compatible release. +try: + import sys as _compat_sys + if _compat_sys.version_info >= (3, 11): + import asyncio.tasks as _compat_asyncio_tasks + + async def _compat_wait_for(fut, timeout): + if timeout is None: + return await fut + task = asyncio.ensure_future(fut) + _done, pending = await asyncio.wait({task}, timeout=timeout) + if task in pending: + task.cancel() + try: + await task + except BaseException: + pass + raise asyncio.TimeoutError() + return task.result() + + asyncio.wait_for = _compat_wait_for + _compat_asyncio_tasks.wait_for = _compat_wait_for + try: + _compat_sys.stderr.write( + "[compat-shim] asyncio.wait_for replaced (action/manager)\n" + ) + _compat_sys.stderr.flush() + except Exception: + pass +except Exception as _compat_exc: + logger.warning(f"[compat-shim] failed to install asyncio.wait_for replacement: {_compat_exc!r}") +# ============================================================================ + nest_asyncio.apply() @@ -221,7 +264,13 @@ async def execute_action( event=f"Running action {action.name} with input: {pretty_input}.", display_message=f"Running {action.display_name}", action_name=action.name, - session_id=session_id if is_running_task else None, + # Always pass session_id when present so the event_stream_manager can route + # to the correct task stream OR fall back to main_stream for transient + # sessions (e.g. third-party email notification). Previously this gated on + # is_running_task, which meant conversation-mode actions logged with + # task_id=None and got attributed to whatever task_stream get_stream() + # returned via STATE.current_task_id — leaking events into unrelated tasks. + session_id=session_id, ) logger.debug(f"Starting execution of action {action.name}...") @@ -277,7 +326,13 @@ async def execute_action( logger.debug(f"[OUTPUT DATA] Final outputs for action {action.name}: {outputs}") if status != "error": - status = "success" + # If the action returned an error dict (either via exception path in + # execute_atomic_action or an explicit failure from the action body), + # treat the run as an error so on_action_end and runtime logs reflect it. + if outputs and outputs.get("status") == "error": + status = "error" + else: + status = "success" except asyncio.CancelledError: status = "error" @@ -297,7 +352,7 @@ async def execute_action( # 3. Persist final state # ──────────────────────────────────────────────────────────────── - logger.debug(f"Action {action.name} completed with status: {status}.") + logger.info(f"Action {action.name} completed with status: {status}.") # Log to event stream # Only pass session_id when is_running_task=True (task stream exists) @@ -310,7 +365,13 @@ async def execute_action( event=f"Action {action.name} completed with output: {pretty_output}.", display_message=f"{action.display_name} → {display_status}", action_name=action.name, - session_id=session_id if is_running_task else None, + # Always pass session_id when present so the event_stream_manager can route + # to the correct task stream OR fall back to main_stream for transient + # sessions (e.g. third-party email notification). Previously this gated on + # is_running_task, which meant conversation-mode actions logged with + # task_id=None and got attributed to whatever task_stream get_stream() + # returned via STATE.current_task_id — leaking events into unrelated tasks. + session_id=session_id, ) # Emit waiting_for_user event if requested @@ -321,7 +382,7 @@ async def execute_action( event="Agent is waiting for user response.", display_message=None, action_name=action.name, - session_id=session_id if is_running_task else None, + session_id=session_id, ) logger.debug(f"Persisting final state for action {action.name}...") @@ -514,7 +575,10 @@ async def execute_atomic_action(self, action: Action, input_data: Dict) -> Dict: except Exception as e: logger.exception("Error occurred while executing atomic action") - return {"error": f"Execution failed: {str(e)}"} + # Mark status=error so the caller (execute_action) propagates "error" to + # the action_end event, the UI display_status, and the on_action_end hook. + # Without this, the action is reported as "completed/success" despite raising. + return {"status": "error", "error": f"Execution failed: {str(e)}"} @staticmethod def _parse_action_output(raw_output: str) -> Any: diff --git a/agent_core/core/impl/action/router.py b/agent_core/core/impl/action/router.py index 210c2458..a9c291d5 100644 --- a/agent_core/core/impl/action/router.py +++ b/agent_core/core/impl/action/router.py @@ -98,15 +98,6 @@ async def select_action( # Base conversation mode actions base_actions = ["send_message", "task_start", "ignore"] - # Integration management actions (always available so the agent can - # help users connect / disconnect external apps via conversation) - integration_actions = [ - "list_available_integrations", - "connect_integration", - "disconnect_integration", - "check_integration_status", - ] - # Dynamically add messaging actions for connected platforms try: from app.external_comms.integration_discovery import ( @@ -115,10 +106,10 @@ async def select_action( ) connected_platforms = get_connected_messaging_platforms() messaging_actions = get_messaging_actions_for_platforms(connected_platforms) - conversation_mode_actions = base_actions + integration_actions + messaging_actions + conversation_mode_actions = base_actions + messaging_actions except Exception as e: logger.debug(f"[ACTION] Could not discover messaging actions: {e}") - conversation_mode_actions = base_actions + integration_actions + conversation_mode_actions = base_actions action_candidates = [] diff --git a/agent_core/core/impl/context/engine.py b/agent_core/core/impl/context/engine.py index 853c284b..05d89e74 100644 --- a/agent_core/core/impl/context/engine.py +++ b/agent_core/core/impl/context/engine.py @@ -198,7 +198,6 @@ def create_system_environmental_context(self) -> str: vm_operating_system="Linux", vm_os_version="6.12.13", vm_os_platform="Linux a5e39e32118c 6.12.13 #1 SMP Thu Mar 13 11:34:50 UTC 2025 x86_64 x86_64 x86_64 GNU/Linux", - vm_resolution="1064 x 1064" ) def create_system_file_system_context(self) -> str: diff --git a/agent_core/core/impl/event_stream/event_stream.py b/agent_core/core/impl/event_stream/event_stream.py index b9e00d17..d2e1a3fe 100644 --- a/agent_core/core/impl/event_stream/event_stream.py +++ b/agent_core/core/impl/event_stream/event_stream.py @@ -31,6 +31,11 @@ SEVERITIES = ("DEBUG", "INFO", "WARN", "ERROR") MAX_EVENT_INLINE_CHARS = 200000 +# Always preserve at least this many most-recent events in tail_events when summarizing. +# Guards against a single oversized event (e.g. a large read_pdf result) being purged in the +# same tick it arrives — the UI consumer polls tail_events and would otherwise miss it, +# leaving the action displayed as "running" forever. +MIN_KEEP_RECENT_EVENTS = 2 def get_cached_token_count(rec: "EventRecord") -> int: @@ -199,18 +204,21 @@ def _find_token_cutoff(self, events: List[EventRecord], keep_tokens: int) -> int if not events: return 0 - # Calculate tokens from the end, accumulating until we reach keep_tokens + # Calculate tokens from the end, accumulating until we reach keep_tokens. + # MIN_KEEP_RECENT_EVENTS overrides the token budget so the most recent events + # always survive a summarization pass — needed because the UI polls tail_events + # and would never see an event that's purged in the same tick it arrived. tokens_from_end = 0 keep_count = 0 for rec in reversed(events): event_tokens = get_cached_token_count(rec) - if tokens_from_end + event_tokens > keep_tokens: + if tokens_from_end + event_tokens > keep_tokens and keep_count >= MIN_KEEP_RECENT_EVENTS: break tokens_from_end += event_tokens keep_count += 1 # Return how many events to summarize (from the beginning) - cutoff = len(events) - keep_count + cutoff = max(0, len(events) - keep_count) duration_ms = (time.perf_counter() - start) * 1000 profiler.record( "find_token_cutoff", diff --git a/agent_core/core/impl/event_stream/manager.py b/agent_core/core/impl/event_stream/manager.py index 69e334ca..a7a068a9 100644 --- a/agent_core/core/impl/event_stream/manager.py +++ b/agent_core/core/impl/event_stream/manager.py @@ -22,6 +22,7 @@ from agent_core.core.event_stream.event import Event from agent_core.core.protocols.llm import LLMInterfaceProtocol from agent_core.utils.logger import logger +from agent_core.utils.file_utils import rotate_md_file_if_needed from agent_core.core.state.base import get_state_or_none # Import memory mode check (deferred to avoid circular imports) @@ -298,6 +299,7 @@ def _log_to_files(self, kind: str, message: str) -> None: # Always write to EVENT.md (create if doesn't exist) try: event_file = self._agent_file_system_path / "EVENT.md" + rotate_md_file_if_needed(event_file) with open(event_file, "a", encoding="utf-8") as f: f.write(event_line) except Exception as e: @@ -309,6 +311,7 @@ def _log_to_files(self, kind: str, message: str) -> None: if not self._should_skip_unprocessed() and not self._should_skip_event_type(kind): try: unprocessed_file = self._agent_file_system_path / "EVENT_UNPROCESSED.md" + rotate_md_file_if_needed(unprocessed_file) with open(unprocessed_file, "a", encoding="utf-8") as f: f.write(event_line) except Exception as e: @@ -353,13 +356,16 @@ def log( if task_id is not None and task_id in self._task_streams: stream = self._task_streams[task_id] elif task_id is not None and task_id not in self._task_streams: - # Task ID provided but stream not found - fall back to global stream - # Only warn if other streams exist (indicates a bug/race condition). - # If no streams exist, this is expected (conversation mode, before task creation). + # Task ID provided but stream not found — fall back to the MAIN stream, + # not get_stream(). get_stream() resolves via global STATE.current_task_id + # which is the *currently running* task; that path leaks events from a + # parallel conversation reaction (e.g. third-party email notification in + # session 0489cf) into whatever task happens to be active (e.g. translate + # task 15a11d). Only warn if other streams exist (indicates a bug/race). if self._task_streams: - logger.warning(f"[EVENT_STREAM] Task stream not found for task_id={task_id!r}, falling back to global stream. " + logger.warning(f"[EVENT_STREAM] Task stream not found for task_id={task_id!r}, falling back to main stream. " f"Available streams: {list(self._task_streams.keys())}") - stream = self.get_stream() + stream = self._main_stream else: stream = self.get_stream() idx = stream.log( diff --git a/agent_core/core/impl/llm/cache/config.py b/agent_core/core/impl/llm/cache/config.py index f958738c..aacc411e 100644 --- a/agent_core/core/impl/llm/cache/config.py +++ b/agent_core/core/impl/llm/cache/config.py @@ -7,6 +7,7 @@ from __future__ import annotations + import os from dataclasses import dataclass from typing import Optional diff --git a/agent_core/core/impl/llm/cache/metrics.py b/agent_core/core/impl/llm/cache/metrics.py index 8f390825..0e1bbc6b 100644 --- a/agent_core/core/impl/llm/cache/metrics.py +++ b/agent_core/core/impl/llm/cache/metrics.py @@ -7,6 +7,7 @@ from __future__ import annotations + import logging from dataclasses import dataclass from typing import Dict, Optional diff --git a/agent_core/core/impl/llm/errors.py b/agent_core/core/impl/llm/errors.py index e310f686..052e2611 100644 --- a/agent_core/core/impl/llm/errors.py +++ b/agent_core/core/impl/llm/errors.py @@ -8,6 +8,7 @@ from __future__ import annotations + from typing import Optional # Import provider exception types diff --git a/agent_core/core/impl/llm/interface.py b/agent_core/core/impl/llm/interface.py index ca010f94..913453ca 100644 --- a/agent_core/core/impl/llm/interface.py +++ b/agent_core/core/impl/llm/interface.py @@ -1762,7 +1762,6 @@ def _generate_anthropic( for block in response.content: if block.type == "text": content += block.text - content = content.strip() # Token usage from Anthropic response diff --git a/agent_core/core/impl/llm/types.py b/agent_core/core/impl/llm/types.py index 1b942525..4f51eabe 100644 --- a/agent_core/core/impl/llm/types.py +++ b/agent_core/core/impl/llm/types.py @@ -5,6 +5,7 @@ from __future__ import annotations + from enum import Enum diff --git a/agent_core/core/impl/memory/manager.py b/agent_core/core/impl/memory/manager.py index 5b491864..ff103391 100644 --- a/agent_core/core/impl/memory/manager.py +++ b/agent_core/core/impl/memory/manager.py @@ -874,7 +874,11 @@ def _compute_content_hash(content: str) -> str: # ───────────────────────────── Task Creation Helper ───────────────────────────── -def create_memory_processing_task(task_manager) -> str: +def create_memory_processing_task( + task_manager, + needs_pruning: bool = False, + prune_target: int = 100, +) -> str: """ Create a task to process unprocessed events into distilled memories. @@ -884,26 +888,46 @@ def create_memory_processing_task(task_manager) -> str: - Check for duplicate memories - Write to MEMORY.md in strict format - Clear processed events + - Optionally prune MEMORY.md when it has grown past the configured cap Args: task_manager: The TaskManager instance to create the task with + needs_pruning: True when MEMORY.md has reached the max-items threshold + and the task should also run the pruning phase after distillation. + prune_target: Approximate number of oldest items the pruning phase + should consolidate or drop. Returns: The task ID of the created task """ + instruction = ( + "SILENT BACKGROUND TASK - NEVER use send_message or run_python. " + "Read agent_file_system/EVENT_UNPROCESSED.md. " + "DISTILL (rewrite, don't copy) into agent_file_system/MEMORY.md. " + "Format: [YYYY-MM-DD HH:MM:SS] [category] Subject predicate object. " + "DISCARD 95%+ events. Agent messages and greetings are ALWAYS discarded. " + "Each memory item must be <= 150 words. " + "Use stream_edit only. Never write code." + ) + + if needs_pruning: + instruction += ( + f" MEMORY.md has reached the item-count cap. After processing events, " + f"run the Pruning phase: remove the FIRST (oldest) ~{prune_target} items " + f"from the items section — they appear at the top, immediately after the header block. " + f"Merge related items about the same subject before dropping, then drop duplicates " + f"and low-utility items. Preserve high-utility items regardless of age. " + f"The header block must NOT be modified. Keep only the newest items (bottom of file). " + f"Target: remove at least {prune_target} items so only the latest 1/3 remain." + ) + return task_manager.create_task( task_name="Process Memory Events", - task_instruction=( - "SILENT BACKGROUND TASK - NEVER use send_message or run_python. " - "Read agent_file_system/EVENT_UNPROCESSED.md. " - "DISTILL (rewrite, don't copy) into agent_file_system/MEMORY.md. " - "Format: [YYYY-MM-DD HH:MM:SS] [category] Subject predicate object. " - "DISCARD 95%+ events. Agent messages and greetings are ALWAYS discarded. " - "Use stream_edit only. Never write code." - ), + task_instruction=instruction, mode="complex", action_sets=["file_operations"], - selected_skills=["memory-processor"] + selected_skills=["memory-processor"], + workflow_id="memory_processing", ) diff --git a/agent_core/core/impl/task/manager.py b/agent_core/core/impl/task/manager.py index 0e388374..571478f1 100644 --- a/agent_core/core/impl/task/manager.py +++ b/agent_core/core/impl/task/manager.py @@ -38,9 +38,11 @@ if TYPE_CHECKING: from agent_core.core.state.base import StateManagerBase + from agent_core.core.impl.workflow_lock import WorkflowLockManager # Set up logger - use shared agent_core logger for consistency from agent_core.utils.logger import logger +from agent_core.utils.file_utils import rotate_md_file_if_needed # ============================================================================= @@ -106,6 +108,8 @@ def __init__( on_todo_transition: Optional[OnTodoTransitionHook] = None, on_task_ended_chatserver: Optional[OnTaskEndedChatserverHook] = None, finalize_todos_chatserver: Optional[FinalizeTodosChatserverHook] = None, + # Workflow-lock registry for auto-release on task end + workflow_lock_manager: Optional["WorkflowLockManager"] = None, ): """ Initialize the task manager. @@ -177,6 +181,9 @@ def __init__( self._on_task_ended_chatserver = on_task_ended_chatserver self._finalize_todos_chatserver = finalize_todos_chatserver + # Workflow-lock registry (optional) + self.workflow_lock_manager = workflow_lock_manager + @property def active(self) -> Optional[Task]: """Current session's task. @@ -228,6 +235,7 @@ def create_task( session_id: Optional[str] = None, original_query: Optional[str] = None, original_platform: Optional[str] = None, + workflow_id: Optional[str] = None, ) -> str: """ Create a new task without LLM planning. @@ -283,6 +291,7 @@ def create_task( selected_skills=selected_skills or [], conversation_id=conversation_id, source_platform=original_platform, + workflow_id=workflow_id, ) self.tasks[task_id] = task @@ -627,6 +636,18 @@ async def _end_task( if self.state_manager: self.state_manager.on_task_ended(task, status, summary) + # Release any workflow lock this task was holding. Runs regardless of + # terminal status (completed / error / cancelled) so a crashed task + # never leaves its workflow wedged. + if self.workflow_lock_manager and task.workflow_id: + try: + await self.workflow_lock_manager.release(task.workflow_id) + except Exception as e: + logger.warning( + f"[TaskManager] Failed to release workflow lock " + f"'{task.workflow_id}' for task {task.id}: {e}" + ) + # Remove task from dict and clean up event stream self.tasks.pop(task.id, None) if self._current_session_id == task.id: @@ -732,6 +753,7 @@ def _log_to_task_history(self, task: Task, note: Optional[str] = None) -> None: entry_lines.append("") + rotate_md_file_if_needed(task_history_path) with open(task_history_path, "a", encoding="utf-8") as f: f.write("\n".join(entry_lines) + "\n") diff --git a/agent_core/core/impl/trigger/queue.py b/agent_core/core/impl/trigger/queue.py index 817399aa..1a5aa656 100644 --- a/agent_core/core/impl/trigger/queue.py +++ b/agent_core/core/impl/trigger/queue.py @@ -300,6 +300,7 @@ async def put(self, trig: Trigger, skip_merge: bool = False) -> None: conversation_id=trig.payload.get("conversation_id", "N/A"), existing_sessions=existing_sessions, recent_conversation=recent_conversation, + current_living_ui_id=trig.payload.get("living_ui_id") or "(not on a Living UI page)", ) logger.debug(f"[UNIFIED ROUTING PROMPT]:\n{usr_msg}") @@ -454,6 +455,7 @@ async def fire( *, message: str | None = None, platform: str | None = None, + living_ui_id: str | None = None, ) -> bool: """ Mark a trigger for a given session as ready to fire immediately. @@ -469,6 +471,7 @@ async def fire( description so the reasoning step sees it. platform: Optional platform identifier (e.g., "Telegram", "WhatsApp") to preserve message source information. + living_ui_id: Optional Living UI project ID if user is on a Living UI page. Returns: ``True`` if a trigger was found (queued or active), otherwise ``False``. @@ -485,6 +488,8 @@ async def fire( t.payload["pending_user_message"] = message if platform: t.payload["pending_platform"] = platform + if living_ui_id: + t.payload["living_ui_id"] = living_ui_id found = True if found: @@ -500,6 +505,8 @@ async def fire( t.payload["pending_user_message"] = message if platform: t.payload["pending_platform"] = platform + if living_ui_id: + t.payload["living_ui_id"] = living_ui_id logger.debug(f"[FIRE] Attached message to active trigger for session {session_id}") return True diff --git a/agent_core/core/impl/vlm/interface.py b/agent_core/core/impl/vlm/interface.py index 2ebb88da..dc86d82b 100644 --- a/agent_core/core/impl/vlm/interface.py +++ b/agent_core/core/impl/vlm/interface.py @@ -217,6 +217,7 @@ def describe_image_bytes( system_prompt: str | None = None, user_prompt: str | None = "Describe this image in detail.", log_response: bool = True, + json_mode: bool = True, ) -> str: """Describe an image from raw bytes using the VLM. @@ -236,7 +237,7 @@ def describe_image_bytes( if self.provider == "deepseek": raise RuntimeError("DeepSeek does not support vision/VLM. Use a different provider for image description.") elif self.provider in ("openai", "minimax", "moonshot", "grok"): - response = self._openai_describe_bytes(image_bytes, system_prompt, user_prompt) + response = self._openai_describe_bytes(image_bytes, system_prompt, user_prompt, json_mode=json_mode) elif self.provider == "remote": response = self._ollama_describe_bytes(image_bytes, system_prompt, user_prompt) elif self.provider == "gemini": @@ -288,6 +289,101 @@ async def generate_response_async( log_response, ) + def describe_image_ocr( + self, + image_path: str, + user_prompt: str | None = None, + ) -> str: + """ + Run OCR on an image. Returns raw extracted text, not a description. + Uses a structured extraction system prompt regardless of provider. + """ + if not os.path.isfile(image_path): + raise FileNotFoundError(f"Image file not found: {image_path}") + + with open(image_path, "rb") as f: + image_bytes = f.read() + + system_prompt = ( + "You are a precise OCR engine. Extract ALL text from this image exactly as it appears. " + "Preserve line breaks, indentation, and formatting. " + "Do NOT add commentary, interpretation, or markdown. " + "Output only the raw extracted text. If no text is present, output an empty string." + ) + effective_user = user_prompt or "Extract all text from this image." + + logger.info(f"[LLM SEND] OCR request | path={image_path}") + + cleaned = self.describe_image_bytes( + image_bytes, + system_prompt=system_prompt, + user_prompt=effective_user, + log_response=False, # Logged below + json_mode=False, + ) + + logger.info(f"[LLM RECV OCR] {cleaned[:120]}...") + return cleaned + + def describe_video_frames( + self, + video_path: str, + query: str | None = None, + max_frames: int = 8, + ) -> str: + """ + Analyse video by extracting evenly-spaced keyframes and sending to VLM. + Falls back to graceful error if OpenCV is unavailable. + """ + try: + import cv2 + except ImportError: + raise RuntimeError( + "opencv-python-headless is required for video analysis. " + "Install with: pip install opencv-python-headless" + ) + + if not os.path.isfile(video_path): + raise FileNotFoundError(f"Video file not found: {video_path}") + + cap = cv2.VideoCapture(video_path) + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + if total_frames == 0: + cap.release() + raise ValueError("Video has 0 frames or could not be read.") + + indices = [int(i * total_frames / max_frames) for i in range(max_frames)] + frame_bytes_list: list[bytes] = [] + + for idx in indices: + cap.set(cv2.CAP_PROP_POS_FRAMES, idx) + ret, frame = cap.read() + if ret: + success, buf = cv2.imencode(".jpg", frame) + if success: + frame_bytes_list.append(buf.tobytes()) + cap.release() + + if not frame_bytes_list: + raise ValueError("Could not extract any frames from the video.") + + system_prompt = ( + f"You are analysing a video represented by {len(frame_bytes_list)} evenly-spaced keyframes. " + "Provide: 1) An overall narrative summary of what is happening, " + "2) Any visible text or titles, " + "3) Key objects, people, or scenes, " + "4) Notable transitions between frames." + ) + effective_user = query or "Summarise the content of this video." + + # For multi-frame, send frames sequentially (all providers support single-image per call) + # Gemini 1.5 Pro supports native multi-image; others receive concatenated descriptions + if self.provider == "gemini" and len(frame_bytes_list) > 1: + return self._gemini_describe_video_frames(frame_bytes_list, system_prompt, effective_user) + else: + # Universal fallback: describe each frame, then synthesise + return self._multi_frame_describe_fallback(frame_bytes_list, system_prompt, effective_user) + # ───────────────────── Provider Helpers ───────────────────── @staticmethod @@ -330,7 +426,51 @@ def _report_usage_async( except Exception as e: logger.warning(f"[VLM] Failed to report usage: {e}") - def _openai_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str) -> Dict[str, Any]: + + def _gemini_describe_video_frames( + self, frame_bytes_list: list[bytes], sys: str | None, usr: str + ) -> str: + """Gemini-specific multi-image frame analysis in a single API call.""" + result = self._gemini_client.generate_multimodal( + self.model, + text=usr, + image_bytes_list=frame_bytes_list, + system_prompt=sys, + temperature=self.temperature, + json_mode=False, + ) + tokens_used = result.get("tokens_used", 0) + if tokens_used: + self._set_token_count(self._get_token_count() + tokens_used) + return re.sub(self._CODE_BLOCK_RE, "", result.get("content", "").strip()) + + def _multi_frame_describe_fallback( + self, frame_bytes_list: list[bytes], system_prompt: str, user_prompt: str + ) -> str: + """Describe each frame individually, then synthesise into a narrative.""" + frame_descriptions = [] + for i, fb in enumerate(frame_bytes_list): + desc = self.describe_image_bytes( + fb, + system_prompt=f"Frame {i+1} of {len(frame_bytes_list)}: Describe what you see.", + user_prompt=user_prompt, + log_response=False, + ) + frame_descriptions.append(f"[Frame {i+1}]: {desc}") + + synthesis_prompt = ( + "You received descriptions of video keyframes. Write a coherent video summary:\n\n" + + "\n".join(frame_descriptions) + ) + synthesis = self.describe_image_bytes( + frame_bytes_list[-1], # anchor with last frame for context + system_prompt=system_prompt, + user_prompt=synthesis_prompt, + log_response=True, + ) + return synthesis + + def _openai_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str, json_mode: bool = True) -> Dict[str, Any]: """OpenAI/Grok vision request with automatic prompt caching metrics.""" img_b64 = base64.b64encode(image_bytes).decode() mime_type = self._detect_mime_type(image_bytes) @@ -348,14 +488,13 @@ def _openai_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str) ) # Newer OpenAI models (o1, o3, o4, gpt-5, etc.) require # 'max_completion_tokens' instead of the legacy 'max_tokens' parameter. - # Note: response_format=json_object is intentionally NOT set here because - # describe_image returns plain text descriptions, not JSON. Enabling JSON - # mode would also require the prompt to contain the word "json". request_kwargs: Dict[str, Any] = { "model": self.model, "messages": messages, "temperature": self.temperature, } + if json_mode: + request_kwargs["response_format"] = {"type": "json_object"} model_lower = (self.model or "").lower() uses_max_completion_tokens = ( model_lower.startswith("o1") @@ -435,7 +574,7 @@ def _gemini_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str) result = self._gemini_client.generate_multimodal( self.model, text=usr, - image_bytes=image_bytes, + image_bytes_list=[image_bytes], system_prompt=sys, temperature=self.temperature, json_mode=False, diff --git a/agent_core/core/impl/workflow_lock/__init__.py b/agent_core/core/impl/workflow_lock/__init__.py new file mode 100644 index 00000000..62bcb647 --- /dev/null +++ b/agent_core/core/impl/workflow_lock/__init__.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +"""Workflow lock registry — prevents overlapping execution of named workflows.""" + +from agent_core.core.impl.workflow_lock.manager import WorkflowLockManager + +__all__ = ["WorkflowLockManager"] diff --git a/agent_core/core/impl/workflow_lock/manager.py b/agent_core/core/impl/workflow_lock/manager.py new file mode 100644 index 00000000..e7229cfe --- /dev/null +++ b/agent_core/core/impl/workflow_lock/manager.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- +"""WorkflowLockManager — exclusive locks for named background workflows. + +A *workflow* is any recurring background activity that must not run concurrently +with another instance of itself (e.g. memory processing, proactive cycles). +Each workflow is identified by a stable string. At most one task may own a +given workflow lock at a time. + +Typical usage: + + if not await locks.try_acquire("memory_processing"): + logger.info("workflow already active; skipping") + return + + try: + task_id = task_manager.create_task(..., workflow_id="memory_processing") + # TaskManager auto-releases the lock in its _end_task funnel when the + # task terminates (completed / error / cancelled). + except Exception: + # Release on any failure before the task takes ownership. + await locks.release("memory_processing") + raise + +The manager is safe for concurrent callers inside a single asyncio event loop +because every mutation is guarded by an internal ``asyncio.Lock``. +""" + +from __future__ import annotations + +import asyncio +from typing import FrozenSet, Set + + +class WorkflowLockManager: + """Registry of exclusive locks for named background workflows.""" + + def __init__(self) -> None: + self._held: Set[str] = set() + self._mutex = asyncio.Lock() + + async def try_acquire(self, workflow_id: str) -> bool: + """Attempt to acquire the lock for ``workflow_id``. + + Returns True on success, False if another holder already owns it. + """ + if not workflow_id: + raise ValueError("workflow_id must be a non-empty string") + async with self._mutex: + if workflow_id in self._held: + return False + self._held.add(workflow_id) + return True + + async def release(self, workflow_id: str) -> None: + """Release the lock for ``workflow_id``. Idempotent.""" + if not workflow_id: + return + async with self._mutex: + self._held.discard(workflow_id) + + def is_locked(self, workflow_id: str) -> bool: + """Non-blocking check — True iff a holder currently owns ``workflow_id``.""" + return workflow_id in self._held + + def active_workflows(self) -> FrozenSet[str]: + """Snapshot of all currently-held workflow ids.""" + return frozenset(self._held) diff --git a/agent_core/core/llm/google_gemini_client.py b/agent_core/core/llm/google_gemini_client.py index f6d1688b..114734fb 100644 --- a/agent_core/core/llm/google_gemini_client.py +++ b/agent_core/core/llm/google_gemini_client.py @@ -9,6 +9,7 @@ """ from __future__ import annotations + import base64 import logging import os @@ -168,12 +169,15 @@ def generate_multimodal( model: str, *, text: str, - image_bytes: bytes, + image_bytes_list: List[bytes], system_prompt: Optional[str] = None, temperature: Optional[float] = None, json_mode: bool = False, ) -> Dict[str, Any]: - """Generate text from a prompt that also contains an inline image. + """Generate text from a prompt that contains one or more inline images. + + Normalises both single-image and multi-image inputs into a consistent + request format for the Gemini API. Returns a dict containing: - tokens_used: Total tokens consumed @@ -185,7 +189,8 @@ def generate_multimodal( Args: model: Model identifier text: The text prompt - image_bytes: PNG image data + + image_bytes_list: List of image data (PNG/JPEG) system_prompt: Optional system instruction temperature: Sampling temperature json_mode: If True, enforce JSON output format @@ -193,12 +198,16 @@ def generate_multimodal( Returns: Dict with generation results and token counts """ - inline_data = { - "mimeType": "image/png", - "data": base64.b64encode(image_bytes).decode("utf-8"), - } + parts: List[Dict[str, Any]] = [{"text": text}] + for img in image_bytes_list: + mime = "image/jpeg" + parts.append({ + "inlineData": { + "mimeType": mime, + "data": base64.b64encode(img).decode("utf-8"), + } + }) - parts: List[Dict[str, Any]] = [{"text": text}, {"inlineData": inline_data}] contents = [{"role": "user", "parts": parts}] payload: Dict[str, Any] = {"contents": contents} @@ -236,6 +245,8 @@ def generate_multimodal( "cached_tokens": cached_tokens, } + + def embed_text(self, model: str, *, text: str) -> List[float]: """Fetch an embedding vector for the supplied text. diff --git a/agent_core/core/models/model_registry.py b/agent_core/core/models/model_registry.py index 3d141edc..f63c365c 100644 --- a/agent_core/core/models/model_registry.py +++ b/agent_core/core/models/model_registry.py @@ -20,7 +20,7 @@ InterfaceType.EMBEDDING: None, # Anthropic does not provide native embedding models }, "byteplus": { - InterfaceType.LLM: "kimi-k2-250905", + InterfaceType.LLM: "seed-1-6-250915", InterfaceType.VLM: "seed-1-6-250915", InterfaceType.EMBEDDING: "skylark-embedding-vision-250615", }, diff --git a/agent_core/core/prompts/application.py b/agent_core/core/prompts/application.py new file mode 100644 index 00000000..c9dbe930 --- /dev/null +++ b/agent_core/core/prompts/application.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +""" +Application-specific prompt templates. + +Contains prompt templates for Living UI and other application features. +""" + +LIVING_UI_TASK_INSTRUCTION = """Create a Living UI application. + +Project ID: {project_id} +Project Name: {project_name} +Description: {description} +Features: {features} +Theme: {theme} +Project Path: {project_path} + +Follow the living-ui-creator skill instructions. Here's the workflow: + +1. Read agent_file_system/GLOBAL_LIVING_UI.md — apply its colors, fonts, and rules +2. Phase 0: Ask the user 2+ batches of questions about data, features, design, and layout +3. Document requirements in LIVING_UI.md +4. Break the app into features, then for each feature: + - Re-read LIVING_UI.md (check what's left) and GLOBAL_LIVING_UI.md (refresh design rules) + - Write backend tests first (backend/tests/) + - Create model + routes to pass tests + - Run pytest to verify + - Create frontend types + components + - Update LIVING_UI.md — mark this feature as done, add models/routes/components you created + Do NOT skip features listed in LIVING_UI.md. A working app with all planned features is the goal. +5. Update LIVING_UI.md with implementation details +6. Call living_ui_notify_ready(project_id="{project_id}") + +What a GOOD Living UI looks like: +- Professional web app layout — proper spacing, visual hierarchy, sections, headers +- Uses preset components (Button, Card, Input, Modal, Table from './components/ui') — never raw HTML +- Thoughtful layout: sidebar or top nav, content area with grid/list views, detail panels or modals +- Colors from GLOBAL_LIVING_UI.md applied consistently +- Empty state when no data — the app launches with an empty database, users create their own content +- "Add" actions open forms/modals with proper input fields — never auto-create with placeholder text +- Every item is viewable, editable, and deletable through the UI +- Error handling with toast notifications on API failures +- Responsive design that works on different screen sizes + +When pytest fails: +- Read ALL errors carefully before fixing — fix ALL issues in one go, not one at a time +- If you see an import error, check ALL files for the same pattern and fix them all +- Maximum 3 pytest attempts per feature. If still failing after 3, review your approach +- Common fix: relative imports (from . import X) → absolute imports (from X import Y) + +External integrations (Gmail, YouTube, Discord, Slack, etc.): +- CraftBot has connected external services — use the integration bridge, NOT custom OAuth +- Import: from services.integration_client import integration +- Call: result = await integration.request("google_workspace", "GET", url) +- NEVER build OAuth flows, ask for API keys, or store credentials +- See the "External Integrations" section in SKILL.md for details and examples + +What to AVOID: +- Flat list of items with no visual structure +- Custom CSS when preset components exist +- Hardcoded test data left in the database +- Buttons that create items without user input +- Everything crammed into one component file +- Relative imports in backend code +- Running uvicorn/npm manually — the launch pipeline handles this +- Editing main.py, main.tsx, manifest.json, or tests/conftest.py — system managed +- Rewriting conftest.py — it has the correct imports and test DB setup already + +Your todo list should follow this EXACT pattern — do NOT add extra sub-steps: +Phase 0: Read global config +Phase 0: Ask user batch 1 (data/features) +Phase 0: Ask user batch 2 (design/layout) +Phase 0: Document requirements in LIVING_UI.md +Phase 1: Plan features +Feature 1 - [name]: Backend (tests + model + routes + pytest) +Feature 1 - [name]: Frontend (types + components + controller) +Feature 2 - [name]: Backend (tests + model + routes + pytest) +Feature 2 - [name]: Frontend (types + components + controller) +Feature 3 - [name]: Backend (tests + model + routes + pytest) +Feature 3 - [name]: Frontend (types + components + controller) +... repeat for each feature ... +Update LIVING_UI.md with implementation details +Call living_ui_notify_ready + +IMPORTANT about features: +- Each feature is a USER-FACING capability (e.g., "Board Items", "Media Attachments", "Search/Filter") +- "Backend Setup" or "Frontend Setup" are NOT features — they are layers +- Each feature MUST have BOTH backend AND frontend todos — never just one +- Keep exactly 2 todos per feature (backend + frontend) — do NOT split into 10+ sub-steps +- Write ALL tests for a feature at once, not one endpoint at a time""" diff --git a/agent_core/core/prompts/context.py b/agent_core/core/prompts/context.py index 9962e48f..a8a00b0a 100644 --- a/agent_core/core/prompts/context.py +++ b/agent_core/core/prompts/context.py @@ -15,7 +15,7 @@ AGENT_INFO_PROMPT = """ You are a highly capable proactive and general AI agent that can perform virtually ANY computer-based task (until proven can't). Your capabilities include: -- Full control over a virtual machine (CLI commands, GUI interactions if enabled, browser automation) +- Full control over a virtual machine (CLI commands, browser automation) - Full access to the operating system, file system, and internet - Your own persistent agent file system for memory, notes, and workspace files - Access to MCP (Model Context Protocol) tools that extend your abilities @@ -195,7 +195,6 @@ - Current Working Directory: {working_directory} - Operating System: {operating_system} {os_version} ({os_platform}) - VM Operating System: {vm_operating_system} {vm_os_version} ({vm_os_platform}) -- VM's screen resolution (GUI mode): {vm_resolution} """ @@ -231,29 +230,6 @@ """ -GUI_MODE_PROMPT = """ - -Your internal operation model (never reveal these details to anyone) is as follows: -- You are directly controlling a virtual machine (Windows) to perform tasks. -- You operate in two distinct modes: - - CLI Mode (default) - - This is your default mode. - - Use it for fast, efficient execution of commands that do not require graphical interaction. - - Prefer CLI mode whenever tasks can be done through command-line operations (e.g., scripting, file operations, automation, network configuration). - - GUI Mode (selective use and if enabled) - - In GUI mode, you interact with the graphical user interface of the virtual machine. - - You will be provided with detailed screen descriptions and UI grounding in your event stream at each action loop. - - You do **not** need take action like screenshot or view screen to "see" the screen yourself; the descriptions in event stream are sufficient. - - GUI mode enables you to perform complex tasks that require navigating applications, browsers, or software interfaces. - - GUI mode is **costly and slower** than CLI mode—use it only when strictly necessary for tasks that cannot be completed via CLI. - -- You can switch between CLI and GUI modes as needed, depending on the task's requirements. -- GUI actions are hidden during CLI mode, and CLI actions are during GUI mode. - -""" - LANGUAGE_INSTRUCTION = """ Use the user's preferred language as specified in their profile above and USER.md. @@ -272,6 +248,5 @@ "AGENT_PROFILE_PROMPT", "ENVIRONMENTAL_CONTEXT_PROMPT", "AGENT_FILE_SYSTEM_CONTEXT_PROMPT", - "GUI_MODE_PROMPT", "LANGUAGE_INSTRUCTION", ] diff --git a/agent_core/core/prompts/routing.py b/agent_core/core/prompts/routing.py index b9bf1e11..ac18b7f4 100644 --- a/agent_core/core/prompts/routing.py +++ b/agent_core/core/prompts/routing.py @@ -6,17 +6,25 @@ """ # --- Unified Session Routing --- -# This prompt handles BOTH incoming messages AND triggers in a single LLM call. -# Provides rich context including task details, progress, and platform info. +# This prompt is the LAST-RESORT routing decision. The chat handler short-circuits +# the easy cases deterministically (explicit UI reply target, third-party +# notifications, single waiting task, reply markers) BEFORE this prompt runs. +# By the time the LLM sees the message, those cases are already handled. +# +# The prompt's job: decide if a message in main chat with active task(s) is +# CLEARLY a continuation/modification of one of those tasks, or a new request. +# Default to NEW SESSION when in doubt. ROUTE_TO_SESSION_PROMPT = """ -You are a session routing system. Determine which task session an incoming message belongs to. +You are a session router. Decide whether an incoming message is a clear continuation +of an existing task, or a new request that should open a new session. Type: {item_type} Content: {item_content} Source Platform: {source_platform} +User's current Living UI page: {current_living_ui_id} @@ -24,31 +32,62 @@ +Recent messages across all sessions (oldest first, may include completed tasks +that are no longer in ): {recent_conversation} -1. ROUTE TO EXISTING SESSION when: - - The message is a response to a question the agent asked (check Recent Activity) - - Short replies like "yes", "no", "ok", numbers → route to related session waiting for reply - - The message is related to an existing task's topic or instruction - - The message references files, outputs, or artifacts created by an existing task (check Recent Activity for file paths) - -2. SINGLE ACTIVE SESSION BIAS: - - When there is ONLY ONE active session, strongly prefer routing to it unless the message is clearly about a completely different topic - - This is because follow-up requests often relate to the current task's outputs (e.g., "convert to PDF" after a report was generated) - -3. CREATE NEW SESSION when: - - The message is a NEW topic clearly unrelated to any existing task - - The message doesn't match any existing task's context AND there are multiple active sessions - - The message appears to be a follow-up to a COMPLETED task visible in recent conversation history but NOT in existing sessions - -IMPORTANT NOTES: -- If the message has no context, it is very LIKELY it is meant for another task, DO NOT CREATE a new session -- If there is on-going task waiting for user reply, it is very LIKELY the incoming item is meant for the session -- However, if recent conversation history shows a completed task matching the message topic, prefer creating a new session over routing to an unrelated active task -- When the incoming message is ambiguous and could match any session, slightly prefer the most recent conversation topic (latest messages in recent conversation history) -- People naturally respond to the most recent thing discussed, so an out-of-context reply like "is it good?" most likely refers to the latest topic, not an older one +DEFAULT: create a new session. When in doubt, choose "new". + +Route to an existing session ONLY IF the message clearly fits ONE of these: + - References a specific file, output, or artifact created by that task + (e.g. "the PDF you made", "the translated report", a filename produced by that task) + - Is a clear modification of that task's original instruction + (e.g. "translate to Spanish instead", "also include X", "skip page 5", "make it shorter") + - Cancels or pauses that task explicitly + (e.g. "stop the translation", "pause the report", "cancel that task") + - Is a context-dependent message ("fix this", "it's broken", "add a feature") + AND there is an active task whose Living UI ID matches the user's current + Living UI page (see above) + - Explicitly names a Living UI app/project that matches one of the active + tasks' Living UI bindings — even if the user is currently viewing a + different Living UI page. Chat is global; the user can talk about any + Living UI from anywhere. + +DO NOT route based on: + - "There's only one active task" — single active task is NOT a reason to route + a generic message to it. This bias previously caused multiple wrong-routing bugs. + - Generic acknowledgments ("thanks", "ok", "got it", "yes", "no") — these are + conversational. Create a new session. + - Topic resemblance alone — "I want to translate something" while a translate + task is running is a NEW request, not a modification of the active task, + unless the user explicitly says so. + - "[REPLYING TO PREVIOUS AGENT MESSAGE]:" markers — those are handled before + this prompt runs and won't reach you. + +Living UI specifics: + - The user's current Living UI page is a CONTEXT hint, not a hard binding. + - For context-dependent messages with no explicit reference, prefer the task + bound to the user's current Living UI. + - For messages that explicitly name a different Living UI (by app name, project + path, or feature description that clearly belongs to that other Living UI), + route to THAT Living UI's task instead. + - If no active task matches the referenced Living UI, choose new session. + +Using : + - It tells you what was just discussed across the whole agent (not just one + task). Use it to disambiguate context-dependent messages — e.g., "and + Spanish" makes sense if the previous message was about translation. + - If the recent conversation shows a task topic that has already COMPLETED + (no longer in ), prefer creating a new session over + routing to an unrelated active task. The completed task can't be resumed. + - If the recent conversation contains nothing relevant, treat the message + purely on its own merits per the rules above. + +The "agent asked a question, user is answering" case is handled +deterministically before this prompt runs (via the waiting_for_user_reply flag). +You do NOT need to consider it. diff --git a/agent_core/core/prompts/skill.py b/agent_core/core/prompts/skill.py index 070917d2..3300f53e 100644 --- a/agent_core/core/prompts/skill.py +++ b/agent_core/core/prompts/skill.py @@ -43,7 +43,6 @@ - File work → 'file_operations' - Web browsing/searching → 'web_research' - PDFs/documents → 'document_processing' - - GUI automation → 'gui_interaction' - Running commands → 'shell' - Select ONLY the sets needed (fewer is better for performance)- - If the source platform is an external messaging service, you MUST include that platform's action set, for example: @@ -122,7 +121,6 @@ - If the task involves files, include 'file_operations' - If the task involves web browsing or searching, include 'web_research' - If the task involves PDFs or documents, include 'document_processing' - - If the task involves GUI automation, include 'gui_interaction' - If the task involves running commands or scripts, include 'shell' diff --git a/agent_core/core/task/task.py b/agent_core/core/task/task.py index 3051823e..c90c3a21 100644 --- a/agent_core/core/task/task.py +++ b/agent_core/core/task/task.py @@ -71,6 +71,9 @@ class Task: waiting_for_user_reply: bool = False # Platform that started (or most recently resumed) this task — outbound messages route here source_platform: Optional[str] = None + # Named background workflow this task runs on behalf of (e.g. "memory_processing"). + # When set, the TaskManager auto-releases the corresponding lock on task end. + workflow_id: Optional[str] = None def get_current_todo(self) -> Optional[TodoItem]: """ @@ -117,6 +120,7 @@ def to_dict(self) -> Dict[str, Any]: "chatserver_action_id": self.chatserver_action_id, "waiting_for_user_reply": self.waiting_for_user_reply, "source_platform": self.source_platform, + "workflow_id": self.workflow_id, } @classmethod @@ -144,4 +148,5 @@ def from_dict(cls, data: Dict[str, Any]) -> "Task": chatserver_action_id=data.get("chatserver_action_id"), waiting_for_user_reply=data.get("waiting_for_user_reply", False), source_platform=data.get("source_platform"), + workflow_id=data.get("workflow_id"), ) diff --git a/agent_core/utils/file_utils.py b/agent_core/utils/file_utils.py new file mode 100644 index 00000000..6cbbdca3 --- /dev/null +++ b/agent_core/utils/file_utils.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +"""File utility helpers for agent-core.""" + +from pathlib import Path + +# Maximum size (bytes) for append-only MD logs before trimming (default: 10 MB) +MAX_MD_FILE_BYTES = 10 * 1024 * 1024 + + +def rotate_md_file_if_needed(file_path: Path, max_bytes: int = MAX_MD_FILE_BYTES) -> None: + """Drop the oldest 1/3 of lines from *file_path* when it exceeds *max_bytes*. + + The file is trimmed in-place: the most recent 2/3 of lines are kept so the + agent never loses recent context and no extra archive files are created. + """ + try: + if not file_path.exists() or file_path.stat().st_size < max_bytes: + return + lines = file_path.read_text(encoding="utf-8").splitlines(keepends=True) + keep_from = len(lines) // 3 # drop oldest 1/3, keep newest 2/3 + file_path.write_text("".join(lines[keep_from:]), encoding="utf-8") + except Exception: + pass # Never block a write due to trim failure diff --git a/agent_file_system/GLOBAL_LIVING_UI.md b/agent_file_system/GLOBAL_LIVING_UI.md new file mode 100644 index 00000000..a5a7060f --- /dev/null +++ b/agent_file_system/GLOBAL_LIVING_UI.md @@ -0,0 +1,55 @@ +# Global Living UI Configuration + +Global design preferences and rules applied to ALL Living UI projects. +Per-project settings from Phase 0 Q&A override these when they conflict. + +## Design Preferences + +- **Primary Color:** #FF4F18 +- **Secondary Color:** #262626 +- **Accent Color:** #E64515 +- **Background Style:** Default (use CraftBot design tokens) +- **Theme Mode:** Follow system (dark/light) +- **Font Family:** System default (Segoe UI, sans-serif) +- **Border Radius:** Rounded (var(--radius-md)) +- **Spacing:** Comfortable + +## Always Enforced + +- Must use preset UI components (Button, Card, Input, Modal, Table, etc.) +- Must use design tokens from global.css (no arbitrary colors) +- All API calls must handle errors with user-visible feedback +- No inline styles for standard UI elements +- Use react-toastify for notifications (already installed) +- Backend routes must use absolute imports (not relative) +- Images must always render with visible thumbnails +- Videos must show preview thumbnails +- Links should show preview cards when possible +- Empty states must have helpful messages with action buttons +- Loading spinners required for all async operations +- Use toast notifications for all CRUD feedback (success, error) +- Show confirmation dialogs for destructive actions (delete, reset) +- Forms must have inline validation with error messages +- Mobile responsive design required +- Hover states on all clickable elements +- Text must have sufficient contrast against background (dark text on light backgrounds, light text on dark backgrounds) +- Never use light text on light backgrounds or dark text on dark backgrounds + +## Optional Rules + +- [x] Enable drag-and-drop for reordering items +- [x] Add keyboard shortcuts for common actions +- [x] Show item count badges on categories/sections +- [x] Add search/filter bar to all list views +- [x] Support bulk selection and batch operations +- [ ] Enable dark mode only (ignore system preference) +- [ ] Add animations and transitions to UI interactions +- [ ] Show timestamps on all items (created/updated) +- [ ] Enable infinite scroll instead of pagination +- [ ] Add undo/redo support for user actions +- [ ] Show breadcrumb navigation for nested views + +## Custom Rules + + + diff --git a/agent_file_system/MEMORY.md b/agent_file_system/MEMORY.md index 96be4143..e5e37224 100644 --- a/agent_file_system/MEMORY.md +++ b/agent_file_system/MEMORY.md @@ -9,3 +9,6 @@ DO NOT copy and paste events here: This memory file only stores distilled memory ## Memory +[2026-03-26 13:20:40] [system_warning] Token usage reached 80% of maximum limit (6000000 tokens) +[2026-03-26 13:22:25] [system_limit] Task cancelled due to reaching maximum token limit (6000000 tokens) + diff --git a/agent_file_system/PROACTIVE.md b/agent_file_system/PROACTIVE.md index d7238f8b..c3706ac2 100644 --- a/agent_file_system/PROACTIVE.md +++ b/agent_file_system/PROACTIVE.md @@ -1,6 +1,6 @@ --- version: "1.0" -last_updated: null # Auto-updated by system (format: YYYY-MM-DDTHH:MM:SSZ) +last_updated: 2026-03-27T22:00:00Z # Auto-updated by system (format: YYYY-MM-DDTHH:MM:SSZ) --- # Proactive Tasks @@ -178,15 +178,20 @@ No long-term goals defined yet. ### Current Focus -No current focus defined. +Supporting Living UI development projects and maintaining development workflow efficiency. ### Recent Accomplishments -None yet. +- ✅ Successfully completed Living UI Todo Manager project with full kanban board functionality +- ✅ Implemented persistent data storage with SQLite backend +- ✅ Created responsive UI with drag-and-drop capabilities and modern design ### Upcoming Priorities -None defined. +- Monitor Living UI project health and performance metrics +- Proactively identify opportunities for new Living UI applications +- Maintain development environment and dependencies +- Track project completion rates and user satisfaction --- diff --git a/app/action/action_set.py b/app/action/action_set.py index 60adc8e3..aa8c02fd 100644 --- a/app/action/action_set.py +++ b/app/action/action_set.py @@ -20,8 +20,8 @@ "file_operations": "File and folder manipulation (read, write, search, edit)", "web_research": "Internet search and browsing (web search, fetch URLs)", "document_processing": "PDF and document handling (read, create, convert)", - # [V1.2.2] GUI mode temporarily disabled. Uncomment to re-enable. - # "gui_interaction": "Mouse, keyboard, and screen operations", + "image": "Image viewing, analysis, and OCR (screenshots, photos, diagrams)", + "video": "Video analysis and understanding — describe, summarise, or answer questions about video files (MP4, AVI, MOV)", "clipboard": "Clipboard read/write operations", "shell": "Command line and Python execution", } diff --git a/app/agent_base.py b/app/agent_base.py index aa1b85de..3ffbfe4f 100644 --- a/app/agent_base.py +++ b/app/agent_base.py @@ -10,7 +10,6 @@ CraftBot is an open-source, light version of AI agent developed by CraftOS. Here are the core features: - Todo-based task tracking -- Can switch between CLI/GUI mode Main agent cycle: - Receive query from user @@ -55,7 +54,13 @@ from app.vlm_interface import VLMInterface from app.database_interface import DatabaseInterface from app.logger import logger -from agent_core import MemoryManager, MemoryPointer, MemoryFileWatcher, create_memory_processing_task +from agent_core import ( + MemoryManager, + MemoryPointer, + MemoryFileWatcher, + create_memory_processing_task, + WorkflowLockManager, +) from app.context_engine import ContextEngine from app.state.state_manager import StateManager from app.state.agent_state import STATE @@ -69,7 +74,12 @@ from app.gui.handler import GUIHandler from app.scheduler import SchedulerManager from app.proactive import initialize_proactive_manager, get_proactive_manager -from app.ui_layer.settings.memory_settings import is_memory_enabled +from app.ui_layer.settings.memory_settings import ( + is_memory_enabled, + _parse_memory_items, + get_memory_max_items, + get_memory_prune_target, +) from agent_core import profile, profile_loop, OperationCategory from agent_core import ( # Registries for dependency injection @@ -106,6 +116,7 @@ class TriggerData: contact_id: str | None = None # Sender/chat ID from external platform channel_id: str | None = None # Channel/group ID from external platform payload: dict | None = None # Full trigger payload for passing extra data + living_ui_id: str | None = None # Living UI project ID if user is on a Living UI page class AgentBase: """ @@ -143,7 +154,7 @@ def __init__( llm_api_key: API key for the LLM provider. llm_base_url: Base URL for the LLM provider (optional). llm_model: Model name override (None = use registry default). - vlm_provider: Provider name for VLM (defaults to llm_provider). + vlm_provider: Provider name for VLM (defaults to llm_provider if None). vlm_model: VLM model name override (None = use registry default). deferred_init: If True, allow LLM/VLM initialization to be deferred until API key is configured (useful for first-time setup). @@ -154,6 +165,9 @@ def __init__( data_dir = data_dir, chroma_path=chroma_path ) + # Stores original task instructions keyed by session_id for LLM retry after failure + self._llm_retry_instructions: dict[str, str] = {} + # LLM + prompt plumbing (may be deferred if API key not yet configured) self.llm = LLMInterface( provider=llm_provider, @@ -162,11 +176,11 @@ def __init__( base_url=llm_base_url, deferred=deferred_init, ) - # VLM uses its own provider/model settings, falling back to LLM values - _vlm_provider = vlm_provider or llm_provider - _vlm_api_key = get_api_key(_vlm_provider) if vlm_provider else llm_api_key - _vlm_base_url = get_base_url(_vlm_provider) if vlm_provider else llm_base_url + _vlm_provider = vlm_provider or llm_provider + _vlm_api_key = get_api_key(_vlm_provider) if vlm_provider else llm_api_key + _vlm_base_url = get_base_url(_vlm_provider) if vlm_provider else llm_base_url + self.vlm = VLMInterface( provider=_vlm_provider, model=vlm_model, @@ -200,6 +214,11 @@ def __init__( ) self.action_router = ActionRouter(self.action_library, self.llm, self.context_engine) + # Workflow lock registry — prevents overlapping runs of named background + # workflows (e.g. memory processing, proactive cycle). Locks are released + # automatically when the owning task ends. + self.workflow_lock_manager = WorkflowLockManager() + self.task_manager = TaskManager( db_interface=self.db_interface, event_stream_manager=self.event_stream_manager, @@ -207,6 +226,7 @@ def __init__( llm_interface=self.llm, context_engine=self.context_engine, on_task_end_callback=self._cleanup_session_triggers, + workflow_lock_manager=self.workflow_lock_manager, ) # Bind task_manager so state_manager can look up tasks by session_id @@ -441,7 +461,11 @@ async def react(self, trigger: Trigger) -> None: # Memory Processing # ===================================== - def create_process_memory_task(self) -> Optional[str]: + def create_process_memory_task( + self, + needs_pruning: bool = False, + prune_target: int = 0, + ) -> Optional[str]: """ Create a task to process unprocessed events and move them to memory. @@ -452,6 +476,7 @@ def create_process_memory_task(self) -> Optional[str]: 3. Check for duplicate memories using memory_search 4. Write important, unique events to MEMORY.md 5. Clear processed events from EVENT_UNPROCESSED.md + 6. If needs_pruning, run the pruning phase on MEMORY.md afterwards Returns: The task ID of the created task, or None if memory is disabled. @@ -461,7 +486,10 @@ def create_process_memory_task(self) -> Optional[str]: logger.info("[MEMORY] Memory is disabled, skipping process memory task") return None - logger.info("[MEMORY] Creating process memory task") + logger.info( + "[MEMORY] Creating process memory task" + + (" with pruning phase" if needs_pruning else "") + ) # Enable skip_unprocessed_logging to prevent infinite loops # (events generated during memory processing won't be added to EVENT_UNPROCESSED.md) @@ -469,7 +497,11 @@ def create_process_memory_task(self) -> Optional[str]: self.event_stream_manager.set_skip_unprocessed_logging(True) # Create task using the memory-processor skill - task_id = create_memory_processing_task(self.task_manager) + task_id = create_memory_processing_task( + self.task_manager, + needs_pruning=needs_pruning, + prune_target=prune_target, + ) logger.info(f"[MEMORY] Process memory task created: {task_id}") return task_id @@ -546,41 +578,85 @@ async def _handle_memory_processing_trigger(self) -> bool: logger.info("[MEMORY] Memory is disabled, skipping memory processing trigger") return False - task_created = False + # Early-exit if there's nothing to process (avoid touching the lock for a no-op). + unprocessed_file = AGENT_FILE_SYSTEM_PATH / "EVENT_UNPROCESSED.md" + if not unprocessed_file.exists(): + logger.debug("[MEMORY] EVENT_UNPROCESSED.md not found") + return False try: - # Check if there are events to process - unprocessed_file = AGENT_FILE_SYSTEM_PATH / "EVENT_UNPROCESSED.md" - if unprocessed_file.exists(): - content = unprocessed_file.read_text(encoding="utf-8") - lines = content.strip().split("\n") - event_lines = [l for l in lines if l.strip() and l.strip().startswith("[")] - - if event_lines: - logger.info(f"[MEMORY] Processing {len(event_lines)} unprocessed events") - task_id = self.create_process_memory_task() - - if task_id: - # Queue trigger to start the task - trigger = Trigger( - fire_at=time.time(), - priority=60, - next_action_description="Process unprocessed events into long-term memory", - session_id=task_id, - payload={}, + content = unprocessed_file.read_text(encoding="utf-8") + except Exception as e: + logger.warning(f"[MEMORY] Failed to read EVENT_UNPROCESSED.md: {e}") + return False + + event_lines = [ + l for l in content.strip().split("\n") + if l.strip() and l.strip().startswith("[") + ] + if not event_lines: + logger.info("[MEMORY] No unprocessed events to process") + return False + + # Acquire the exclusive workflow lock. If another memory-processing task + # is still running (e.g. a slow prior run when 3am fires), skip this + # trigger — the lock is released automatically by TaskManager._end_task. + if not await self.workflow_lock_manager.try_acquire("memory_processing"): + logger.info( + "[MEMORY] memory_processing workflow already active; skipping trigger" + ) + return False + + try: + # Count items in MEMORY.md to decide whether the pruning phase + # should run alongside event processing. + max_items = get_memory_max_items() + needs_pruning = False + memory_file = AGENT_FILE_SYSTEM_PATH / "MEMORY.md" + if memory_file.exists(): + try: + memory_items = _parse_memory_items( + memory_file.read_text(encoding="utf-8") + ) + if len(memory_items) >= max_items: + needs_pruning = True + logger.info( + f"[MEMORY] MEMORY.md has {len(memory_items)} items " + f"(>= {max_items}); pruning phase will run" ) - await self.triggers.put(trigger) - logger.info(f"[MEMORY] Queued trigger for memory processing task: {task_id}") - task_created = True - else: - logger.info("[MEMORY] No unprocessed events to process") - else: - logger.debug("[MEMORY] EVENT_UNPROCESSED.md not found") + except Exception as e: + logger.warning(f"[MEMORY] Failed to count MEMORY.md items: {e}") + + logger.info(f"[MEMORY] Processing {len(event_lines)} unprocessed events") + task_id = self.create_process_memory_task( + needs_pruning=needs_pruning, + prune_target=get_memory_prune_target(), + ) + + if not task_id: + # Task was not created (e.g. memory disabled mid-trigger). Release + # the lock so the next trigger can try again. + await self.workflow_lock_manager.release("memory_processing") + return False + + # Queue trigger to start the task. Lock is now owned by the task and + # will be released by TaskManager when the task ends. + trigger = Trigger( + fire_at=time.time(), + priority=60, + next_action_description="Process unprocessed events into long-term memory", + session_id=task_id, + payload={}, + ) + await self.triggers.put(trigger) + logger.info(f"[MEMORY] Queued trigger for memory processing task: {task_id}") + return True except Exception as e: + # Anything went wrong before the task took ownership — release the lock. logger.warning(f"[MEMORY] Failed to process memory: {e}") - - return task_created + await self.workflow_lock_manager.release("memory_processing") + return False # ===================================== # Workflow Routing @@ -605,6 +681,7 @@ def _extract_trigger_data(self, trigger: Trigger) -> TriggerData: contact_id=payload.get("contact_id", ""), channel_id=payload.get("channel_id", ""), payload=payload, + living_ui_id=payload.get("living_ui_id"), ) def _extract_user_message_from_trigger(self, trigger: Trigger) -> Optional[str]: @@ -1250,10 +1327,23 @@ async def _handle_react_error( f"[REACT ERROR] LLMConsecutiveFailureError detected - cancelling task {session_to_use} " "to prevent infinite retry loop." ) + # Cache instruction BEFORE cancellation removes task from tasks dict + failed_task = self.task_manager.tasks.get(session_to_use) if self.task_manager else None + if failed_task: + self._llm_retry_instructions[session_to_use] = failed_task.instruction if self.task_manager: await self.task_manager.mark_task_cancel( reason="LLM calls failed too many consecutive times. Task aborted." ) + if self.ui_controller: + from app.ui_layer.events import UIEvent, UIEventType + self.ui_controller.event_bus.emit( + UIEvent( + type=UIEventType.LLM_FATAL_ERROR, + data={"session_id": session_to_use}, + task_id=session_to_use, + ) + ) else: await self._create_new_trigger(session_to_use, action_output, STATE) except Exception as e: @@ -1508,6 +1598,21 @@ async def handle_limit_abort(self, session_id: str) -> None: task_id=session_id, ) + async def handle_llm_retry(self, session_id: str) -> None: + """Retry the original task after a fatal LLM failure. Resets the failure counter and re-submits.""" + instruction = self._llm_retry_instructions.pop(session_id, None) + if not instruction: + logger.warning(f"[LLM_RETRY] Cannot retry: no cached instruction for session {session_id}") + return + + try: + self.llm.reset_failure_counter() + except Exception as e: + logger.debug(f"[LLM_RETRY] Could not reset failure counter: {e}") + + if self.ui_controller: + await self.ui_controller.submit_message(instruction) + # ----- Trigger Management ----- async def _cleanup_session_triggers(self, session_id: str) -> None: @@ -1691,6 +1796,25 @@ def _format_sessions_for_routing( lines.append(f"Platform: {platform}") + # Add Living UI context if the user is on a Living UI page + living_ui_id = trigger.payload.get("living_ui_id") if trigger else None + if living_ui_id: + lines.append(f"Living UI ID: {living_ui_id}") + try: + from app.living_ui import get_living_ui_manager + mgr = get_living_ui_manager() + if mgr: + proj = mgr.get_project(living_ui_id) + if proj: + lines.append(f"Living UI Name: {proj.name}") + lines.append(f"Living UI Path: {proj.path}") + lines.append(f" Read {proj.path}/LIVING_UI.md for app context") + lines.append(f" If debugging issues, FIRST read these logs:") + lines.append(f" - {proj.path}/backend/logs/subprocess_output.log (crashes, stack traces)") + lines.append(f" - {proj.path}/backend/logs/frontend_console.log (frontend errors, network failures)") + except Exception: + pass + sections.append("\n".join(lines)) return "\n\n".join(sections) @@ -1764,7 +1888,8 @@ async def _route_to_session( item_content: str, existing_sessions: str, source_platform: str = "default", - recent_conversation: str = "No recent conversation history.", + current_living_ui_id: Optional[str] = None, + recent_conversation: str = "(no recent conversation)", ) -> Dict[str, Any]: """Route incoming item to appropriate session using unified prompt. @@ -1773,7 +1898,13 @@ async def _route_to_session( item_content: The content of the message or trigger description existing_sessions: Formatted string of existing sessions source_platform: The platform the message came from (e.g., "cli", "gui") - recent_conversation: Formatted string of recent conversation messages + current_living_ui_id: The Living UI page the user is currently viewing, + if any. Used by the prompt to default context-dependent messages + ("fix this", "it's broken") to that Living UI's task while still + allowing explicit cross-Living-UI references to override. + recent_conversation: Formatted recent messages across sessions for + cross-session context (helps disambiguate "and Spanish" style + continuations and references to completed tasks). Returns: Dict with routing decision containing: @@ -1786,6 +1917,7 @@ async def _route_to_session( item_content=item_content, source_platform=source_platform, existing_sessions=existing_sessions, + current_living_ui_id=current_living_ui_id or "(not on a Living UI page)", recent_conversation=recent_conversation, ) @@ -1806,229 +1938,288 @@ async def _route_to_session( logger.error("[ROUTING] Failed to parse routing response JSON") return {"action": "new", "session_id": "new", "reason": "Failed to parse routing response"} + # ───────────────────────────────────────────────────────────────────── + # Chat routing helpers + # ───────────────────────────────────────────────────────────────────── + + @staticmethod + def _build_living_ui_prefix(living_ui_id: str) -> str: + """Build the Living UI context prefix string prepended to a new session's + first message. Falls back to a minimal `[Living UI: {id}]` tag if the + Living UI manager / project lookup is unavailable.""" + try: + from app.living_ui import get_living_ui_manager + mgr = get_living_ui_manager() + if mgr: + proj = mgr.get_project(living_ui_id) + if proj: + return ( + f"[Living UI: {proj.name} ({living_ui_id}) | " + f"Path: {proj.path} | " + f"Read {proj.path}/LIVING_UI.md for app context]" + f" If debugging issues, FIRST read these logs:" + f" - {proj.path}/backend/logs/subprocess_output.log (crashes, stack traces)" + f" - {proj.path}/backend/logs/frontend_console.log (frontend errors, network failures)" + ) + except Exception: + pass + return f"[Living UI: {living_ui_id}]" + + def _post_third_party_notification(self, payload: Dict, platform: str) -> None: + """Post a deterministic notification about a third-party external message + to the main event stream. No session, no trigger, no LLM.""" + source = payload.get("source") or platform + contact_name = payload.get("contact_name") or payload.get("contact_id") or "unknown sender" + message_body = payload.get("message_body") or "" + preview = message_body.strip() + if len(preview) > 500: + preview = preview[:500] + "…" + notification = ( + f"📧 New {source} message from {contact_name}" + f"{(': ' + preview) if preview else ''}\n\n" + f"Reply here if you'd like me to do anything with it." + ) + self.event_stream_manager.get_main_stream().log( + "agent message to platform: CraftBot Interface", + notification, + display_message=notification, + ) + self.state_manager._append_to_conversation_history("agent", notification) + self.state_manager.bump_event_stream() + + async def _fire_session( + self, + session_id: str, + chat_content: str, + platform: str, + living_ui_id: Optional[str], + ) -> bool: + """Fire a trigger on an existing session and update task/UI state. + + Returns True if the trigger was found and fired, False otherwise. + """ + fired = await self.triggers.fire( + session_id, message=chat_content, platform=platform, + living_ui_id=living_ui_id, + ) + if not fired: + return False + + # Reset waiting-for-reply flag and update source platform + if self.task_manager: + task = self.task_manager.tasks.get(session_id) + if task: + if task.waiting_for_user_reply: + task.waiting_for_user_reply = False + logger.info(f"[TASK] Task {session_id} no longer waiting for user reply") + if platform and task.source_platform != platform: + logger.info( + f"[TASK] Task {session_id} source_platform switched " + f"from {task.source_platform!r} to {platform!r}" + ) + task.source_platform = platform + + # UI status: this task back to running, agent state to working if + # nothing else is waiting. + if self.ui_controller: + from app.ui_layer.events import UIEvent, UIEventType + self.ui_controller.event_bus.emit( + UIEvent( + type=UIEventType.TASK_UPDATE, + data={"task_id": session_id, "status": "running"}, + ) + ) + triggers = await self.triggers.list_triggers() + has_waiting_tasks = any( + getattr(t, "waiting_for_reply", False) + for t in triggers + if t.session_id != session_id + ) + if not has_waiting_tasks: + self.ui_controller.event_bus.emit( + UIEvent( + type=UIEventType.AGENT_STATE_CHANGED, + data={ + "state": "working", + "status_message": "Agent is working...", + }, + ) + ) + return True + + async def _create_new_session_trigger( + self, + chat_content: str, + payload: Dict, + platform: str, + gui_mode: Optional[bool], + ) -> None: + """Start a new session and queue a trigger to handle this message.""" + await self.state_manager.start_session(gui_mode) + + # Prepend Living UI context to the message if the user is on a Living UI page. + living_ui_id = payload.get("living_ui_id") + if living_ui_id: + chat_content = f"{self._build_living_ui_prefix(living_ui_id)}\n{chat_content}" + + # Log the user message to MAIN stream (not the active task's stream) and skip + # record_conversation_message. state_manager.record_user_message would fall + # back to self.task.id (the currently-running task) when no session_id is + # passed and would also push the message into the global _conversation_history, + # which gets re-injected into every active task's + # prompt block — causing the active task to see and act on a message that + # was meant for a brand-new session. The trigger description below already + # carries the message into the new session, so nothing is lost. + event_label = f"user message from platform: {platform}" if platform else "user message" + self.event_stream_manager.get_main_stream().log( + event_label, chat_content, display_message=chat_content, + ) + self.state_manager._append_to_conversation_history("user", chat_content) + self.state_manager.bump_event_stream() + + trigger_payload = { + "gui_mode": gui_mode, + "platform": platform, + "user_message": chat_content, + } + if payload.get("living_ui_id"): + trigger_payload["living_ui_id"] = payload["living_ui_id"] + if payload.get("external_event"): + trigger_payload["is_self_message"] = payload.get("is_self_message", False) + trigger_payload["contact_id"] = payload.get("contact_id", "") + trigger_payload["channel_id"] = payload.get("channel_id", "") + if payload.get("pre_selected_skills"): + trigger_payload["pre_selected_skills"] = payload["pre_selected_skills"] + + # Steer the action-selection LLM to use the right platform-specific + # send action when replying. + platform_hint = "" + if platform and platform.lower() != "craftbot interface": + platform_hint = f" from {platform} (reply on {platform}, NOT send_message)" + + await self.triggers.put( + Trigger( + fire_at=time.time(), + priority=3, + next_action_description=( + "Please perform action that best suit this user chat " + f"you just received{platform_hint}: {chat_content}" + ), + session_id=await self._generate_unique_session_id(), + payload=trigger_payload, + ), + skip_merge=True, + ) + + # ───────────────────────────────────────────────────────────────────── + # Chat message entry point + # ───────────────────────────────────────────────────────────────────── + async def _handle_chat_message(self, payload: Dict): + """Decide where an incoming chat message goes. + + Layered routing rules (deterministic first, LLM only as last resort): + 1. Third-party external (no is_self_message): post notification, done. + 2. UI reply with valid target_session_id: fire that session. + 3. UI reply marker without valid target: new session, reply context + stays embedded in the message text. + 4. Exactly one task is waiting_for_user_reply: fire that one. + 5. Active tasks exist and message is genuinely ambiguous: routing LLM + with conservative prompt (defaults to new session). Handles Living + UI cross-references — chat is global, so a message about Living UI + B while viewing Living UI A should still route to B's task. + 6. Default: new session. + + Routing only decides *where* the message goes. The new session's first + action-selection LLM still picks send_message / task_start(simple) / + task_start(complex) as appropriate. + """ try: - user_input: str = payload.get("text", "") - if not user_input: + chat_content = payload.get("text", "") + if not chat_content: logger.warning("Received empty message.") return - chat_content = user_input logger.info(f"[CHAT RECEIVED] {chat_content}") - # clear any stuck consecutive-failure state from a prior aborted task so the next - # LLM call actually hits the provider instead of short-circuiting. + # Clear any stuck consecutive-failure state from a prior aborted task. try: self.llm.reset_failure_counter() except Exception as e: logger.debug(f"[CHAT] Could not reset LLM failure counter: {e}") gui_mode = payload.get("gui_mode") - - # Determine platform - use payload's platform if available, otherwise default - # External messages (WhatsApp, Telegram, etc.) have platform set by _handle_external_event - # Interface/CLI messages don't have platform in payload, so use "CraftBot Interface" - if payload.get("platform"): - # External message - capitalize for display (e.g., "whatsapp" -> "Whatsapp") - platform = payload["platform"].capitalize() - else: - # Local Interface/CLI message - platform = "CraftBot Interface" - - # Direct reply bypass - skip routing LLM when target_session_id is provided + platform = payload["platform"].capitalize() if payload.get("platform") else "CraftBot Interface" target_session_id = payload.get("target_session_id") - if target_session_id: - logger.info(f"[CHAT] Direct reply to session {target_session_id}") - - # Fire the target trigger directly, bypassing routing LLM - fired = await self.triggers.fire( - target_session_id, message=chat_content, platform=platform - ) - - if fired: - logger.info(f"[CHAT] Successfully resumed session {target_session_id}") + living_ui_id = payload.get("living_ui_id") - # Reset task's waiting_for_user_reply flag - if self.task_manager: - task = self.task_manager.tasks.get(target_session_id) - if task and task.waiting_for_user_reply: - task.waiting_for_user_reply = False - logger.info(f"[TASK] Task {target_session_id} no longer waiting for user reply") + # ── Rule 1: Third-party external message → notification only. + if payload.get("external_event") is True and not payload.get("is_self_message", False): + logger.info(f"[CHAT] Third-party external from {platform} — posting notification, no session") + self._post_third_party_notification(payload, platform) + return - # Reset task status from "waiting" to "running" when user replies - if self.ui_controller: - from app.ui_layer.events import UIEvent, UIEventType - - self.ui_controller.event_bus.emit( - UIEvent( - type=UIEventType.TASK_UPDATE, - data={ - "task_id": target_session_id, - "status": "running", - }, - ) - ) + active_task_ids = self.state_manager.get_main_state().active_task_ids - # Check if there are still other tasks waiting - triggers = await self.triggers.list_triggers() - has_waiting_tasks = any( - getattr(t, 'waiting_for_reply', False) - for t in triggers - if t.session_id != target_session_id - ) - if not has_waiting_tasks: - self.ui_controller.event_bus.emit( - UIEvent( - type=UIEventType.AGENT_STATE_CHANGED, - data={ - "state": "working", - "status_message": "Agent is working...", - }, - ) - ) - - return # Task will resume with user message in event stream - - # If fire() returns False, no waiting trigger found for this session - # Fall through to normal routing (conversation mode) + # ── Rule 2: Explicit UI reply with valid target_session_id. + if target_session_id: + logger.info(f"[CHAT] UI reply targeting session {target_session_id}") + if await self._fire_session(target_session_id, chat_content, platform, living_ui_id): + return logger.warning( - f"[CHAT] Session {target_session_id} not found or expired, falling through to normal routing" + f"[CHAT] target_session_id {target_session_id} not found — falling through to next rule" ) - # Check active tasks — route message to matching session if possible - # Use active_task_ids from state_manager (not just triggers in queue) to ensure - # all running tasks are visible for routing, not just those waiting in queue - active_task_ids = self.state_manager.get_main_state().active_task_ids - triggers = await self.triggers.list_triggers() # Still get triggers for waiting_for_reply status + # ── Rule 3: UI reply marker present but no valid target → new session. + # User replied to a main-stream message (notification, conversation reply, etc). + # The reply context stays embedded in chat_content via the marker block. + if "[REPLYING TO PREVIOUS AGENT MESSAGE]:" in chat_content: + logger.info("[CHAT] UI reply marker without valid target — creating new session") + await self._create_new_session_trigger(chat_content, payload, platform, gui_mode) + return + + # ── Rule 4: Exactly one task is waiting_for_user_reply. + waiting_session_ids = [] + if self.task_manager: + for tid in active_task_ids: + task = self.task_manager.tasks.get(tid) + if task and getattr(task, "waiting_for_user_reply", False): + waiting_session_ids.append(tid) + if len(waiting_session_ids) == 1: + sid = waiting_session_ids[0] + logger.info(f"[CHAT] Routing to single waiting session {sid}") + if await self._fire_session(sid, chat_content, platform, living_ui_id): + return + # ── Rule 5: Active tasks exist and signal is ambiguous → conservative routing LLM. + # Also handles Living UI cross-references: chat is global, so a message + # explicitly about Living UI B while viewing Living UI A should still + # route to B's task. The LLM sees each session's Living UI binding and + # the user's current Living UI to decide. if active_task_ids: - # Use unified routing prompt with rich task context - existing_sessions = self._format_sessions_for_routing(active_task_ids, triggers) + active_triggers = await self.triggers.list_triggers() + existing_sessions = self._format_sessions_for_routing(active_task_ids, active_triggers) recent_conversation = self._format_recent_conversation(limit=10) routing_result = await self._route_to_session( item_type="message", item_content=chat_content, existing_sessions=existing_sessions, source_platform=platform, + current_living_ui_id=living_ui_id, recent_conversation=recent_conversation, ) - - action = routing_result.get("action", "new") - - if action == "route": - matched_session_id = routing_result.get("session_id", "new") - if matched_session_id != "new": - # Fire the matched trigger so it gets priority, - # and attach the new user message so react() sees it. - # This also works for active triggers (being processed). - fired = await self.triggers.fire( - matched_session_id, message=chat_content, platform=platform - ) + if routing_result.get("action") == "route": + matched = routing_result.get("session_id", "new") + if matched != "new": logger.info( - f"[CHAT] Routed message to existing session {matched_session_id} " - f"(fired={fired}, reason: {routing_result.get('reason', 'N/A')})" + f"[CHAT] LLM routed to {matched}: {routing_result.get('reason', 'N/A')}" ) + if await self._fire_session(matched, chat_content, platform, living_ui_id): + return + logger.warning(f"[CHAT] LLM routed to {matched} but trigger not found — creating new session") - # Reset task's waiting_for_user_reply flag and switch source_platform - # so subsequent outbound messages route to the platform the user is now on. - if self.task_manager: - task = self.task_manager.tasks.get(matched_session_id) - if task: - if task.waiting_for_user_reply: - task.waiting_for_user_reply = False - logger.info(f"[TASK] Task {matched_session_id} no longer waiting for user reply") - if platform and task.source_platform != platform: - logger.info( - f"[TASK] Task {matched_session_id} source_platform switched " - f"from {task.source_platform!r} to {platform!r}" - ) - task.source_platform = platform - - # Reset task status from "waiting" to "running" when user replies - # Update UI regardless of fire() result - user has replied so we should - # acknowledge it. If fire() failed, the task may be stale but we still - # want to reset the waiting indicator. - if self.ui_controller: - from app.ui_layer.events import UIEvent, UIEventType - - self.ui_controller.event_bus.emit( - UIEvent( - type=UIEventType.TASK_UPDATE, - data={ - "task_id": matched_session_id, - "status": "running", - }, - ) - ) - - # Check if there are still other tasks waiting - # If not, update global agent state back to working - triggers = await self.triggers.list_triggers() - has_waiting_tasks = any( - getattr(t, 'waiting_for_reply', False) - for t in triggers - if t.session_id != matched_session_id - ) - if not has_waiting_tasks: - self.ui_controller.event_bus.emit( - UIEvent( - type=UIEventType.AGENT_STATE_CHANGED, - data={ - "state": "working", - "status_message": "Agent is working...", - }, - ) - ) - - if not fired: - logger.warning( - f"[CHAT] Trigger not found for session {matched_session_id} - " - "message may not be delivered to task" - ) - - # Always trust routing decision - don't create new session - return - - # No existing triggers matched or action == "new" — create a fresh session - await self.state_manager.start_session(gui_mode) - self.state_manager.record_user_message(chat_content, platform=platform) - - # skip_merge=True because we already did routing above - trigger_payload = { - "gui_mode": gui_mode, - "platform": platform, - "user_message": chat_content, # Original user message for task event stream - } - # Carry external message context for platform-aware routing - if payload.get("external_event"): - trigger_payload["is_self_message"] = payload.get("is_self_message", False) - trigger_payload["contact_id"] = payload.get("contact_id", "") - trigger_payload["channel_id"] = payload.get("channel_id", "") - - # Carry pre-selected skills from skill slash commands (e.g., /pdf, /docx) - if payload.get("pre_selected_skills"): - trigger_payload["pre_selected_skills"] = payload["pre_selected_skills"] - - # Include platform in the action description so the LLM picks - # the correct platform-specific send action for replies. - # Must be directive (not just informational) for weaker LLMs. - platform_hint = "" - if platform and platform.lower() != "craftbot interface": - platform_hint = f" from {platform} (reply on {platform}, NOT send_message)" - - await self.triggers.put( - Trigger( - fire_at=time.time(), - priority=3, - next_action_description=( - "Please perform action that best suit this user chat " - f"you just received{platform_hint}: {chat_content}" - ), - session_id=await self._generate_unique_session_id(), - payload=trigger_payload, - ), - skip_merge=True, - ) + # ── Rule 6: Default — create a new session. + await self._create_new_session_trigger(chat_content, payload, platform, gui_mode) except Exception as e: logger.error(f"Error handling incoming message: {e}", exc_info=True) @@ -2140,6 +2331,10 @@ async def _handle_external_event(self, payload: Dict) -> None: "channel_id": channel_id, "channel_name": channel_name, "message_context": message_context, + # Raw fields for the third-party direct-notification path so it can + # build a clean user-facing message without parsing the LLM wrapper. + "source": source, + "message_body": message_body, }) except Exception as e: @@ -2177,7 +2372,7 @@ def _generate_role_info_prompt(self) -> str: Note: Call `self._get_interface_capabilities_prompt()` and append it to include interface-specific capabilities (e.g., file attachment support in browser mode). """ - base_prompt = "You are a general computer-use AI agent that can switch between CLI/GUI mode." + base_prompt = "You are a general computer-use AI agent." return base_prompt + self._get_interface_capabilities_prompt() def _build_db_interface(self, *, data_dir: str, chroma_path: str): @@ -3035,6 +3230,14 @@ def print_startup_step(step: int, total: int, message: str): # Shutdown scheduler (handles all periodic tasks including memory processing) self.is_running = False await self.scheduler.shutdown() + # Stop all Living UI projects (kill backend/frontend processes) + try: + from app.living_ui import get_living_ui_manager + lui_mgr = get_living_ui_manager() + if lui_mgr: + await lui_mgr.stop_all_projects() + except Exception as e: + logger.warning(f"[SHUTDOWN] Living UI cleanup error: {e}") # Gracefully shutdown MCP connections await self._shutdown_mcp() # Stop external communications diff --git a/app/config.py b/app/config.py index b02818cb..4f791e57 100644 --- a/app/config.py +++ b/app/config.py @@ -6,16 +6,49 @@ """ import json +import os import sys from pathlib import Path from typing import Any, Dict, Optional +def _frozen_user_data_root() -> Path: + """Return the per-user data directory for the frozen agent. + + When packaged as a PyInstaller binary the agent must NOT write + runtime files (agent_file_system, chroma_db_memory, logs, dbs) + into: + - sys._MEIPASS — wiped when the process exits + - the install directory (Program Files / %LOCALAPPDATA%\\Programs) + — install dirs by Windows convention are read-only-from-the-user's + perspective, and writing user data there mixes binaries with state. + + Mirrors craftbot.py's _user_data_dir() so the installer wizard and the + agent agree on where things live (e.g. logs). + """ + if sys.platform == "win32": + root = os.environ.get("LOCALAPPDATA") or os.path.expanduser(r"~\AppData\Local") + path = Path(root) / "CraftBot" + elif sys.platform == "darwin": + path = Path(os.path.expanduser("~/Library/Application Support/CraftBot")) + else: + root = os.environ.get("XDG_DATA_HOME") or os.path.expanduser("~/.local/share") + path = Path(root) / "craftbot" + path.mkdir(parents=True, exist_ok=True) + return path + + def get_project_root() -> Path: - """Get the project root directory""" + """Get the project root directory. + + Source mode: / — relative to this file. + Frozen mode: the per-user data dir (%LOCALAPPDATA%\\CraftBot on Windows, + ~/Library/Application Support/CraftBot on macOS, ${XDG_DATA_HOME}/craftbot + on Linux). Runtime state (agent_file_system, chroma_db_memory, dbs, logs) + lives there so the install dir stays clean and uninstalls don't lose data. + """ if getattr(sys, 'frozen', False): - # Frozen exe: use CWD so logs/workspace persist (not the temp _MEIPASS dir) - return Path.cwd() + return _frozen_user_data_root() return Path(__file__).resolve().parent.parent PROJECT_ROOT = get_project_root() diff --git a/app/config/settings.json b/app/config/settings.json index 4d5efca0..12c00359 100644 --- a/app/config/settings.json +++ b/app/config/settings.json @@ -1,5 +1,5 @@ { - "version": "1.2.3", + "version": "1.3.0", "general": { "agent_name": "CraftBot", "os_language": "en" @@ -8,7 +8,10 @@ "enabled": true }, "memory": { - "enabled": true + "enabled": true, + "max_items": 200, + "prune_target": 135, + "item_word_limit": 150 }, "model": { "llm_provider": "byteplus", @@ -22,8 +25,7 @@ "openai": "", "anthropic": "", "google": "", - "byteplus": "", - "deepseek": "" + "byteplus": "" }, "endpoints": { "remote_model_url": "", @@ -76,4 +78,4 @@ "google": true, "byteplus": true } -} +} \ No newline at end of file diff --git a/app/config/skills_config.json b/app/config/skills_config.json index a1917de2..5f963ad8 100644 --- a/app/config/skills_config.json +++ b/app/config/skills_config.json @@ -6,7 +6,10 @@ "pdf", "playwright-mcp", "pptx", - "xlsx" + "xlsx", + "living-ui-creator", + "living-ui-manager", + "living-ui-modify" ], "disabled_skills": [ "cli-anything", diff --git a/app/credentials/handlers.py b/app/credentials/handlers.py index e4c2c40e..830c2b07 100644 --- a/app/credentials/handlers.py +++ b/app/credentials/handlers.py @@ -1,6 +1,7 @@ """All integration credential handlers + registry.""" from __future__ import annotations + import base64 import hashlib import logging @@ -52,7 +53,7 @@ async def handle(self, sub: str, args: list[str]) -> Tuple[bool, str]: # ═══════════════════════════════════════════════════════════════════ class GoogleHandler(IntegrationHandler): - SCOPES = "https://www.googleapis.com/auth/gmail.modify https://www.googleapis.com/auth/calendar https://www.googleapis.com/auth/drive https://www.googleapis.com/auth/contacts.readonly https://www.googleapis.com/auth/userinfo.email https://www.googleapis.com/auth/userinfo.profile" + SCOPES = "https://www.googleapis.com/auth/gmail.modify https://www.googleapis.com/auth/calendar https://www.googleapis.com/auth/drive https://www.googleapis.com/auth/contacts.readonly https://www.googleapis.com/auth/userinfo.email https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/youtube.readonly https://www.googleapis.com/auth/youtube.force-ssl" async def login(self, args): from app.config import GOOGLE_CLIENT_ID, GOOGLE_CLIENT_SECRET diff --git a/app/data/action/CUSTOM_ACTION_GUIDE.md b/app/data/action/CUSTOM_ACTION_GUIDE.md index ec498f63..f086bf0a 100644 --- a/app/data/action/CUSTOM_ACTION_GUIDE.md +++ b/app/data/action/CUSTOM_ACTION_GUIDE.md @@ -299,7 +299,6 @@ Action sets group related actions together. During task execution, only actions | `"file_operations"` | File and folder manipulation | | `"web_research"` | Internet search and browsing | | `"document_processing"` | PDF and document handling | -| `"gui_interaction"` | Mouse, keyboard, screen operations | | `"clipboard"` | Clipboard operations | | `"shell"` | Command line and Python execution | diff --git a/app/data/action/action_set_management.py b/app/data/action/action_set_management.py index 216bef34..8eb840dd 100644 --- a/app/data/action/action_set_management.py +++ b/app/data/action/action_set_management.py @@ -24,7 +24,7 @@ "action_sets": { "type": "array", "items": {"type": "string"}, - "example": ["gui_interaction", "shell"], + "example": ["file_operations", "shell"], "description": ( "List of action set names to add. " "Use 'list_action_sets' to see available options." @@ -101,7 +101,7 @@ def add_action_sets(input_data: dict) -> dict: "action_sets": { "type": "array", "items": {"type": "string"}, - "example": ["gui_interaction"], + "example": ["file_operations"], "description": "List of action set names to remove. Cannot remove 'core' set.", }, }, @@ -124,7 +124,7 @@ def add_action_sets(input_data: dict) -> dict: }, }, test_payload={ - "action_sets": ["gui_interaction"], + "action_sets": ["file_operations"], "simulated_mode": True, }, ) @@ -204,7 +204,6 @@ def list_action_sets(input_data: dict) -> dict: "file_operations": "File and folder manipulation", "web_research": "Web search and browsing", "document_processing": "PDF and document handling", - "gui_interaction": "Mouse, keyboard, screen", "clipboard": "Clipboard operations", "shell": "Command line execution", }, diff --git a/app/data/action/describe_image.py b/app/data/action/describe_image.py index 67e58e20..6ab2cade 100644 --- a/app/data/action/describe_image.py +++ b/app/data/action/describe_image.py @@ -48,9 +48,36 @@ def view_image(input_data: dict) -> dict: prompt = str(input_data.get('prompt', '')).strip() or "Describe the content of this image in detail." if simulated_mode: - # Return mock result for testing return {'status': 'success', 'description': 'A simulated image description showing various objects and colors.', 'message': ''} + # ── VLM availability guard ────────────────────────────────────────── + import app.internal_action_interface as iai + from agent_core.core.models.model_registry import MODEL_REGISTRY + from agent_core.core.models.types import InterfaceType + from app.config import get_vlm_provider + + vlm = iai.InternalActionInterface.vlm_interface + current_provider = get_vlm_provider() + registry_vlm = MODEL_REGISTRY.get(current_provider, {}).get(InterfaceType.VLM) + + if vlm is None or not registry_vlm: + return { + 'status': 'error', + 'description': '', + 'message': ( + f"The current VLM provider '{current_provider}' does not support vision/image analysis. " + "Please inform the user and suggest switching to a provider that supports VLM.\n\n" + "Providers with VLM support: openai, anthropic, gemini, byteplus.\n\n" + "To switch provider, edit 'app/config/settings.json' and update:\n" + ' "vlm_provider": "" (e.g. "anthropic")\n' + ' "vlm_model": "" (e.g. "claude-sonnet-4-6" for anthropic)\n\n' + "Make sure the corresponding API key is configured under 'api_keys' in the same file. " + "If no API key is set, ask the user to provide one. " + "The system will automatically detect the config change and reload." + ), + } + # ─────────────────────────────────────────────────────────────────── + if not image_path: return {'status': 'error', 'description': '', 'message': 'image_path is required.'} diff --git a/app/data/action/generate_image.py b/app/data/action/generate_image.py index e692db32..f2aa6987 100644 --- a/app/data/action/generate_image.py +++ b/app/data/action/generate_image.py @@ -77,7 +77,7 @@ "description": "Status message or error message." } }, - requirement=["google-generativeai", "Pillow"], + requirement=["google-genai", "Pillow"], test_payload={ "prompt": "A cute cartoon cat sitting on a rainbow", "resolution": "1K", @@ -241,7 +241,7 @@ def _ensure_package(pkg_name): subprocess.check_call([sys.executable, '-m', 'pip', 'install', pkg_name, '--quiet']) try: - _ensure_package('google-generativeai') + _ensure_package('google-genai') _ensure_package('Pillow') except Exception as e: return { @@ -253,7 +253,7 @@ def _ensure_package(pkg_name): } try: - import google.generativeai as genai + from google import genai from PIL import Image import io import base64 diff --git a/app/data/action/http_request.py b/app/data/action/http_request.py index 643299e9..970c5d51 100644 --- a/app/data/action/http_request.py +++ b/app/data/action/http_request.py @@ -170,23 +170,47 @@ def send_http_requests(input_data: dict) -> dict: if not url or not (url.startswith('http://') or url.startswith('https://')): return {'status':'error','status_code':0,'response_headers':{},'body':'','final_url':'','elapsed_ms':0,'message':'Invalid or missing URL.'} - # SSRF protection: block requests to private/internal networks and cloud metadata + # SSRF protection: block requests to private/internal networks and cloud metadata. + # Loopback is allowed only when the port belongs to a registered Living UI project, + # so the agent can talk to its own apps without exposing arbitrary localhost services. try: from urllib.parse import urlparse as _urlparse import ipaddress as _ipaddress import socket as _socket _parsed = _urlparse(url) _hostname = _parsed.hostname or '' + _port = _parsed.port # Block cloud metadata endpoints _BLOCKED_HOSTS = {'169.254.169.254', 'metadata.google.internal', 'metadata.internal'} if _hostname in _BLOCKED_HOSTS: return {'status':'error','status_code':0,'response_headers':{},'body':'','final_url':'','elapsed_ms':0,'message':'Blocked: requests to cloud metadata endpoints are not allowed.'} + + def _living_ui_ports() -> set: + try: + from app.living_ui import get_living_ui_manager + _mgr = get_living_ui_manager() + if not _mgr: + return set() + _ports = set() + for _p in _mgr.projects.values(): + if _p.port: + _ports.add(int(_p.port)) + if _p.backend_port: + _ports.add(int(_p.backend_port)) + return _ports + except Exception: + return set() + # Resolve hostname and check for private IPs try: _resolved = _socket.getaddrinfo(_hostname, None) for _family, _type, _proto, _canonname, _sockaddr in _resolved: _ip = _ipaddress.ip_address(_sockaddr[0]) - if _ip.is_private or _ip.is_loopback or _ip.is_link_local: + if _ip.is_loopback: + if _port and _port in _living_ui_ports(): + continue # Allowed: targeting a known Living UI port + return {'status':'error','status_code':0,'response_headers':{},'body':'','final_url':'','elapsed_ms':0,'message':f'Blocked: requests to loopback addresses ({_hostname}) are only allowed for registered Living UI ports. Use the living_ui_http action with project_id to talk to your Living UI.'} + if _ip.is_private or _ip.is_link_local: return {'status':'error','status_code':0,'response_headers':{},'body':'','final_url':'','elapsed_ms':0,'message':f'Blocked: requests to private/internal addresses ({_hostname}) are not allowed.'} except (socket.gaierror, ValueError): pass # Let the request library handle DNS resolution errors diff --git a/app/data/action/keyboard_hotkey.py b/app/data/action/keyboard_hotkey.py deleted file mode 100644 index c9259e1f..00000000 --- a/app/data/action/keyboard_hotkey.py +++ /dev/null @@ -1,69 +0,0 @@ -from agent_core import action - -@action( - name="keyboard_hotkey", - description="Sends arbitrary keystrokes or key-combination shortcuts to the currently focused window (e.g., 'ctrl+c', ['alt+tab', 'f5']).", - mode="GUI", - action_sets=["gui_interaction"], - parallelizable=False, - input_schema={ - "keys": { - "type": [ - "string", - "array" - ], - "example": [ - "ctrl+c", - "alt+tab" - ], - "description": "A single key/combo string (\"enter\", \"ctrl+shift+t\") or a list of such strings executed in order (required)." - } - }, - output_schema={ - "status": { - "type": "string", - "example": "success", - "description": "'success' if all inputs were sent, 'error' otherwise." - }, - "message": { - "type": "string", - "example": "Invalid key string.", - "description": "Optional error message on failure." - } - }, - requirement=["pyautogui"], - test_payload={ - "keys": [ - "ctrl+c", - "alt+tab" - ], - "simulated_mode": False - } -) -def keyboard_input(input_data: dict) -> dict: - import json, sys, subprocess, importlib - pkg = 'pyautogui' - try: - importlib.import_module(pkg) - except ImportError: - subprocess.check_call([sys.executable, '-m', 'pip', 'install', pkg, '--quiet']) - import pyautogui - raw_keys = input_data.get('keys') - if raw_keys is None or (isinstance(raw_keys, str) and not raw_keys.strip()) or (isinstance(raw_keys, list) and not raw_keys): - return {'status': 'error', 'message': 'keys is required.'} - exit() - keys_seq = raw_keys if isinstance(raw_keys, list) else [raw_keys] - try: - for entry in keys_seq: - if not isinstance(entry, str) or not entry.strip(): - raise ValueError('Invalid key string.') - combo = [k.strip() for k in entry.lower().split('+') if k.strip()] - if len(combo) == 0: - raise ValueError('Invalid key string.') - if len(combo) == 1: - pyautogui.press(combo[0]) - else: - pyautogui.hotkey(*combo) - return {'status': 'success', 'message': ''} - except Exception as e: - return {'status': 'error', 'message': str(e)} \ No newline at end of file diff --git a/app/data/action/keyboard_type.py b/app/data/action/keyboard_type.py deleted file mode 100644 index 9981c2d0..00000000 --- a/app/data/action/keyboard_type.py +++ /dev/null @@ -1,56 +0,0 @@ -from agent_core import action - -@action( - name="keyboard_type", - description="Types the given text at the current keyboard focus in any active application window.", - mode="GUI", - action_sets=["gui_interaction"], - parallelizable=False, - input_schema={ - "text": { - "type": "string", - "example": "Hello, world!", - "description": "The exact text to type (required)." - }, - "interval": { - "type": "number", - "example": 0.05, - "description": "Optional delay in seconds between each keystroke. Defaults to 0 (instant)." - } - }, - output_schema={ - "status": { - "type": "string", - "example": "success", - "description": "'success' if typing completed, 'error' otherwise." - }, - "message": { - "type": "string", - "example": "No text provided.", - "description": "Optional error message." - } - }, - requirement=["pyautogui"], - test_payload={ - "text": "Hello, world!", - "interval": 0.05, - "simulated_mode": False - } -) -def keyboard_typing(input_data: dict) -> dict: - import json, sys, subprocess, importlib - pkg = 'pyautogui' - try: - importlib.import_module(pkg) - except ImportError: - subprocess.check_call([sys.executable, '-m', 'pip', 'install', pkg, '--quiet']) - import pyautogui - text = input_data.get('text', '') - interval = float(input_data.get('interval', 0)) - if not text: - return {'status': 'error', 'message': 'No text provided.'} - try: - pyautogui.write(text, interval=interval) - return {'status': 'success', 'message': ''} - except Exception as e: - return {'status': 'error', 'message': str(e)} \ No newline at end of file diff --git a/app/data/action/living_ui_actions.py b/app/data/action/living_ui_actions.py new file mode 100644 index 00000000..1c3c28e5 --- /dev/null +++ b/app/data/action/living_ui_actions.py @@ -0,0 +1,548 @@ +"""Living UI actions for agent to notify UI status and progress.""" + +from agent_core import action + + +@action( + name="living_ui_notify_ready", + description=( + "Launch, verify, and serve a Living UI project. " + "Call this after building the Living UI code. " + "This action installs dependencies, runs tests, starts the backend and frontend, " + "and notifies the browser. Returns test errors if anything fails." + ), + default=False, + mode="CLI", + action_sets=["living_ui"], + parallelizable=False, + input_schema={ + "project_id": { + "type": "string", + "example": "abc12345", + "description": "The Living UI project ID (provided in task instruction).", + }, + }, + output_schema={ + "status": { + "type": "string", + "example": "success", + "description": "Result: 'success' or 'error'.", + }, + "message": { + "type": "string", + "example": "Living UI abc12345 is now ready at http://localhost:3100", + "description": "Status message.", + }, + "test_errors": { + "type": "array", + "example": ["[import] Failed to import routes: ..."], + "description": "List of test errors if launch failed. Fix these and call again.", + }, + }, + test_payload={ + "project_id": "test123", + "simulated_mode": True, + }, +) +async def living_ui_notify_ready(input_data: dict) -> dict: + """Launch, verify, and notify browser that a Living UI is ready.""" + project_id = input_data.get("project_id", "") + simulated_mode = input_data.get("simulated_mode", False) + + if not project_id: + return {"status": "error", "message": "project_id is required"} + + if simulated_mode: + return {"status": "success", "message": f"Living UI {project_id} is now ready at http://localhost:3100"} + + try: + from app.living_ui import get_living_ui_manager, broadcast_living_ui_ready + + manager = get_living_ui_manager() + if not manager: + return {"status": "error", "message": "Living UI manager not initialized. Browser adapter may not be running."} + + # Run the full pipeline: install → test → launch → verify + result = await manager.launch_and_verify(project_id) + + if result["status"] == "success": + # Notify browser that the UI is ready + url = result.get("url", "") + port = result.get("port", 0) + await broadcast_living_ui_ready(project_id, url, port) + return { + "status": "success", + "message": f"Living UI {project_id} is now ready at {url}", + } + else: + # Return errors directly so the agent can fix them + errors = result.get("errors", []) + errors_str = "\n".join(errors[:10]) + return { + "status": "error", + "message": f"Launch failed at step: {result.get('step', 'unknown')}", + "test_errors": errors[:10], + "details": f"Fix these errors and call living_ui_notify_ready again:\n{errors_str}", + } + except Exception as e: + return {"status": "error", "message": f"Failed to launch: {str(e)}"} + + +@action( + name="living_ui_restart", + description=( + "Restart a Living UI project (backend + frontend). " + "Use this after modifying backend or frontend code so changes take effect. " + "Runs the full launch pipeline: install, test, build, start. Returns errors if any step fails." + ), + default=False, + mode="CLI", + action_sets=["living_ui"], + parallelizable=False, + input_schema={ + "project_id": { + "type": "string", + "example": "5a58a160", + "description": "The Living UI project ID (from living_ui_projects.json).", + }, + }, + output_schema={ + "status": { + "type": "string", + "example": "success", + "description": "Result: 'success' or 'error'.", + }, + "message": { + "type": "string", + "example": "Living UI '5a58a160' restarted", + "description": "Status message.", + }, + "test_errors": { + "type": "array", + "example": ["[import] Failed to import routes: ..."], + "description": "List of errors if restart failed. Fix these and call again.", + }, + }, + test_payload={ + "project_id": "test123", + "simulated_mode": True, + }, +) +async def living_ui_restart(input_data: dict) -> dict: + """Restart a running Living UI project.""" + project_id = input_data.get("project_id", "") + simulated_mode = input_data.get("simulated_mode", False) + + if not project_id: + return { + "status": "error", + "message": "project_id is required", + } + + if simulated_mode: + return { + "status": "success", + "message": f"Living UI '{project_id}' restarted", + "url": "http://localhost:3100", + "backend_url": "http://localhost:3101", + } + + try: + from app.living_ui import restart_living_ui + + result = await restart_living_ui(project_id) + return result + except Exception as e: + return { + "status": "error", + "message": f"Failed to restart: {str(e)}", + } + + +@action( + name="living_ui_report_progress", + description=( + "Report progress ONLY during Living UI creation (initial build). " + "Use this to keep the user informed about scaffolding, coding, testing, building, and launching phases. " + "Do NOT use this for runtime work on a project that is already running — it will be ignored " + "(use send_message for runtime narration, or living_ui_http to read/write data)." + ), + default=False, + mode="CLI", + action_sets=["living_ui"], + parallelizable=True, + input_schema={ + "project_id": { + "type": "string", + "example": "abc12345", + "description": "The Living UI project ID.", + }, + "phase": { + "type": "string", + "enum": ["initializing", "scaffolding", "coding", "testing", "building", "launching"], + "example": "coding", + "description": "Current development phase.", + }, + "progress": { + "type": "integer", + "minimum": 0, + "maximum": 100, + "example": 50, + "description": "Progress percentage (0-100).", + }, + "message": { + "type": "string", + "example": "Implementing view components...", + "description": "Human-readable status message.", + }, + }, + output_schema={ + "status": { + "type": "string", + "example": "success", + "description": "Result of the progress report.", + }, + }, + test_payload={ + "project_id": "test123", + "phase": "coding", + "progress": 50, + "message": "Test progress message", + "simulated_mode": True, + }, +) +async def living_ui_report_progress(input_data: dict) -> dict: + """Report Living UI creation progress to browser.""" + project_id = input_data.get("project_id", "") + phase = input_data.get("phase", "") + progress = input_data.get("progress", 0) + message = input_data.get("message", "") + simulated_mode = input_data.get("simulated_mode", False) + + if not project_id: + return { + "status": "error", + "message": "project_id is required", + } + + if simulated_mode: + return {"status": "success"} + + try: + from app.living_ui import broadcast_living_ui_progress, get_living_ui_manager + + # Progress reports are a creation-phase concept. If the project is already running, + # broadcasting one would flip the iframe out for the creation-progress screen, so + # skip it. For runtime narration the agent should use send_message instead. + manager = get_living_ui_manager() + project = manager.get_project(project_id) if manager else None + if project and project.status == "running": + return { + "status": "noop", + "message": ( + f"Project '{project_id}' is already running; progress reports are only for " + "the creation phase. Use send_message to narrate runtime work, or living_ui_http " + "to read/write data." + ), + } + + success = await broadcast_living_ui_progress( + project_id, phase, progress, message + ) + + if success: + return {"status": "success"} + else: + return { + "status": "error", + "message": "Broadcast callback not registered. Browser adapter may not be initialized.", + } + except Exception as e: + return { + "status": "error", + "message": f"Failed to report progress: {str(e)}", + } + + +@action( + name="living_ui_import_external", + description=( + "Import an external app as a Living UI project. " + "Use this when the user wants to add an existing app (Go, Node.js, Python, Rust, static site) " + "to their Living UI dashboard. The agent should first analyze the app source code to determine " + "the runtime, build/install command, start command, and health check strategy, then call this action." + ), + action_sets=["living_ui"], + input_schema={ + "name": {"type": "string", "description": "Display name for the project.", "example": "Glance Dashboard"}, + "description": {"type": "string", "description": "Brief app description.", "example": "Self-hosted dashboard"}, + "source_path": {"type": "string", "description": "Absolute path to the app source code.", "example": "/path/to/app"}, + "app_runtime": {"type": "string", "description": "Runtime: node, python, go, rust, docker, static, or unknown.", "example": "go"}, + "install_command": {"type": "string", "description": "Command to install/build the app (empty if none needed).", "example": "go build -o app ."}, + "start_command": {"type": "string", "description": "Command to start the app. Use {{PORT}} placeholder for port.", "example": "./app --port {{PORT}}"}, + "health_strategy": {"type": "string", "description": "Health check: http_get, tcp, or process_alive.", "example": "http_get"}, + "health_url": {"type": "string", "description": "Health check URL (for http_get). Use {{PORT}} placeholder.", "example": "http://localhost:{{PORT}}/health"}, + "port_env_var": {"type": "string", "description": "Env var name for port injection (e.g., PORT). Empty if app uses command-line flag.", "example": "PORT"}, + }, + output_schema={ + "status": {"type": "string", "example": "success"}, + "project": {"type": "object", "description": "Project info dict."}, + }, +) +async def living_ui_import_external(input_data: dict) -> dict: + """Import an external app as a Living UI project.""" + try: + from app.living_ui import get_living_ui_manager + manager = get_living_ui_manager() + if not manager: + return {"status": "error", "message": "Living UI manager not available."} + + result = await manager.import_external_app( + name=input_data.get("name", "External App"), + description=input_data.get("description", ""), + source_path=input_data["source_path"], + app_runtime=input_data.get("app_runtime", "unknown"), + install_command=input_data.get("install_command", ""), + start_command=input_data.get("start_command", ""), + health_strategy=input_data.get("health_strategy", "tcp"), + health_url=input_data.get("health_url", ""), + port_env_var=input_data.get("port_env_var", "PORT"), + ) + return result + except Exception as e: + return {"status": "error", "message": f"Import failed: {str(e)}"} + + +@action( + name="living_ui_import_zip", + description=( + "Import a Living UI project from a ZIP file. " + "The ZIP should contain a previously exported Living UI project. " + "A new project ID and ports are allocated automatically. " + "After importing, launch the project with living_ui_notify_ready." + ), + action_sets=["living_ui"], + input_schema={ + "zip_path": {"type": "string", "description": "Absolute path to the ZIP file.", "example": "/path/to/project.zip"}, + "name": {"type": "string", "description": "Display name for the imported project (optional, auto-detected from manifest).", "example": "My App"}, + }, + output_schema={ + "status": {"type": "string", "example": "success"}, + "project_id": {"type": "string", "example": "a1b2c3d4"}, + "message": {"type": "string"}, + }, +) +async def living_ui_import_zip(input_data: dict) -> dict: + """Import a Living UI project from a ZIP file.""" + try: + from app.living_ui import get_living_ui_manager + manager = get_living_ui_manager() + if not manager: + return {"status": "error", "message": "Living UI manager not available."} + + zip_path = input_data.get("zip_path", "") + name = input_data.get("name", "") + + if not zip_path: + return {"status": "error", "message": "zip_path is required."} + + project = await manager.import_project_zip(zip_path, name) + + # Clean up the ZIP file after successful import + import os + try: + os.unlink(zip_path) + except Exception: + pass + + return { + "status": "success", + "project_id": project.id, + "message": f"Imported '{project.name}' ({project.id}). Call living_ui_notify_ready to launch it.", + "project": project.to_dict(), + } + except Exception as e: + return {"status": "error", "message": f"ZIP import failed: {str(e)}"} + + +@action( + name="living_ui_http", + description=( + "Send an HTTP request to a running Living UI project's backend. " + "Use this to read or modify data in your Living UI (e.g., add a card to a kanban, fetch a list). " + "Pass the project_id and the API path (e.g., '/api/boards/2/cards'); the URL is resolved from the " + "project's registered backend. This bypasses the loopback SSRF restriction safely because the " + "target is a known Living UI process." + ), + default=False, + mode="CLI", + action_sets=["living_ui"], + parallelizable=True, + input_schema={ + "project_id": { + "type": "string", + "example": "84d93cca", + "description": "The Living UI project ID.", + }, + "method": { + "type": "string", + "enum": ["GET", "POST", "PUT", "PATCH", "DELETE"], + "example": "POST", + "description": "HTTP method to use.", + }, + "path": { + "type": "string", + "example": "/api/boards/2/cards", + "description": "API path on the Living UI backend, starting with '/'. Do NOT include scheme or host.", + }, + "headers": { + "type": "object", + "example": {"Accept": "application/json"}, + "description": "Optional headers to send.", + }, + "params": { + "type": "object", + "example": {"limit": "10"}, + "description": "Optional query parameters.", + }, + "json": { + "type": "object", + "example": {"title": "Call John at 5pm", "column": "todo"}, + "description": "JSON body to send. Mutually exclusive with 'data'.", + }, + "data": { + "type": "string", + "example": "field=value", + "description": "Raw request body. Mutually exclusive with 'json'.", + }, + "timeout": { + "type": "number", + "example": 30, + "description": "Timeout in seconds. Defaults to 30.", + }, + "target": { + "type": "string", + "enum": ["backend", "frontend"], + "example": "backend", + "description": "Which server to hit. Defaults to 'backend'. Use 'frontend' only if the project serves data from its frontend port.", + }, + }, + output_schema={ + "status": {"type": "string", "example": "success"}, + "status_code": {"type": "integer", "example": 200}, + "response_headers": {"type": "object", "example": {"Content-Type": "application/json"}}, + "body": {"type": "string", "example": '{"ok":true}'}, + "response_json": {"type": "object", "example": {"ok": True}}, + "final_url": {"type": "string", "example": "http://localhost:3101/api/boards/2/cards"}, + "elapsed_ms": {"type": "number", "example": 123}, + "message": {"type": "string", "example": ""}, + }, + requirement=["requests"], + test_payload={ + "project_id": "test123", + "method": "GET", + "path": "/api/health", + "simulated_mode": True, + }, +) +def living_ui_http(input_data: dict) -> dict: + """HTTP request scoped to a registered Living UI project's backend.""" + import sys, subprocess, importlib, time + + simulated_mode = input_data.get("simulated_mode", False) + if simulated_mode: + return { + "status": "success", + "status_code": 200, + "response_headers": {"Content-Type": "application/json"}, + "body": '{"ok": true}', + "final_url": "http://localhost:3100/api/health", + "elapsed_ms": 5, + "message": "", + } + + project_id = str(input_data.get("project_id", "")).strip() + method = str(input_data.get("method", "GET")).upper() + path = str(input_data.get("path", "")).strip() + target = str(input_data.get("target", "backend")).lower() + headers = input_data.get("headers") or {} + params = input_data.get("params") or {} + json_body = input_data.get("json") if "json" in input_data else None + data_body = input_data.get("data") if "data" in input_data else None + timeout = float(input_data.get("timeout", 30)) + + if not project_id: + return {"status": "error", "status_code": 0, "response_headers": {}, "body": "", "final_url": "", "elapsed_ms": 0, "message": "project_id is required."} + if method not in {"GET", "POST", "PUT", "PATCH", "DELETE"}: + return {"status": "error", "status_code": 0, "response_headers": {}, "body": "", "final_url": "", "elapsed_ms": 0, "message": "Unsupported method."} + if not path or not path.startswith("/"): + return {"status": "error", "status_code": 0, "response_headers": {}, "body": "", "final_url": "", "elapsed_ms": 0, "message": "path must start with '/' (e.g., '/api/items'). Do not include scheme or host."} + if json_body is not None and data_body is not None: + return {"status": "error", "status_code": 0, "response_headers": {}, "body": "", "final_url": "", "elapsed_ms": 0, "message": "Provide either json or data, not both."} + if not isinstance(headers, dict) or not isinstance(params, dict): + return {"status": "error", "status_code": 0, "response_headers": {}, "body": "", "final_url": "", "elapsed_ms": 0, "message": "headers and params must be objects."} + + try: + from app.living_ui import get_living_ui_manager + except Exception as e: + return {"status": "error", "status_code": 0, "response_headers": {}, "body": "", "final_url": "", "elapsed_ms": 0, "message": f"Living UI manager unavailable: {e}"} + + manager = get_living_ui_manager() + if not manager: + return {"status": "error", "status_code": 0, "response_headers": {}, "body": "", "final_url": "", "elapsed_ms": 0, "message": "Living UI manager not initialized."} + + project = manager.get_project(project_id) if hasattr(manager, "get_project") else manager.projects.get(project_id) + if not project: + return {"status": "error", "status_code": 0, "response_headers": {}, "body": "", "final_url": "", "elapsed_ms": 0, "message": f"Project '{project_id}' not found."} + if project.status != "running": + return {"status": "error", "status_code": 0, "response_headers": {}, "body": "", "final_url": "", "elapsed_ms": 0, "message": f"Project '{project_id}' is not running (status: {project.status}). Launch it first."} + + base_url = project.backend_url if target == "backend" else project.url + if not base_url: + # Fall back to constructing from port if URL field is missing + port = project.backend_port if target == "backend" else project.port + if port: + base_url = f"http://localhost:{port}" + if not base_url: + return {"status": "error", "status_code": 0, "response_headers": {}, "body": "", "final_url": "", "elapsed_ms": 0, "message": f"Project '{project_id}' has no {target} URL/port."} + + url = base_url.rstrip("/") + path + + try: + importlib.import_module("requests") + except ImportError: + subprocess.check_call([sys.executable, "-m", "pip", "install", "requests", "--quiet"]) + import requests + + headers = {str(k): str(v) for k, v in headers.items()} + params = {str(k): str(v) for k, v in params.items()} + kwargs = {"headers": headers, "params": params, "timeout": timeout, "allow_redirects": True} + if json_body is not None: + kwargs["json"] = json_body + elif data_body is not None: + kwargs["data"] = data_body + + try: + t0 = time.time() + resp = requests.request(method, url, **kwargs) + elapsed_ms = int((time.time() - t0) * 1000) + resp_headers = {k: v for k, v in resp.headers.items()} + parsed_json = None + try: + parsed_json = resp.json() + except Exception: + parsed_json = None + out = { + "status": "success" if resp.ok else "error", + "status_code": resp.status_code, + "response_headers": resp_headers, + "body": resp.text, + "final_url": resp.url, + "elapsed_ms": elapsed_ms, + "message": "" if resp.ok else f"HTTP {resp.status_code}", + } + if parsed_json is not None: + out["response_json"] = parsed_json + return out + except Exception as e: + return {"status": "error", "status_code": 0, "response_headers": {}, "body": "", "final_url": url, "elapsed_ms": 0, "message": str(e)} diff --git a/app/data/action/mouse_click.py b/app/data/action/mouse_click.py deleted file mode 100644 index 3d056756..00000000 --- a/app/data/action/mouse_click.py +++ /dev/null @@ -1,113 +0,0 @@ -from agent_core import action - -@action( - name="mouse_click", - description="Performs a mouse click at the specified screen coordinates (or at the current cursor position if no coordinates are provided). Supports left, right, and middle buttons, as well as single and double clicks.", - mode="GUI", - action_sets=["gui_interaction"], - parallelizable=False, - input_schema={ - "x": { - "type": "integer", - "example": 640, - "description": "X-coordinate in pixels. If omitted, the current cursor X is used." - }, - "y": { - "type": "integer", - "example": 360, - "description": "Y-coordinate in pixels. If omitted, the current cursor Y is used." - }, - "button": { - "type": "string", - "example": "left", - "description": "Mouse button to click: 'left', 'right', or 'middle'. Defaults to 'left'." - }, - "click_type": { - "type": "string", - "example": "single", - "description": "Click type: 'single' or 'double'. Defaults to 'single'." - } - }, - output_schema={ - "status": { - "type": "string", - "example": "success", - "description": "'success' if the click succeeded, 'error' otherwise." - }, - "position": { - "type": "object", - "properties": { - "x": {"type": "integer"}, - "y": {"type": "integer"} - }, - "example": {"x": 640, "y": 360}, - "description": "The screen coordinates where the click was executed." - }, - "message": { - "type": "string", - "example": "File not found.", - "description": "Optional error message." - } - }, - requirement=["pyautogui"], - test_payload={ - "x": 640, - "y": 360, - "button": "left", - "click_type": "single", - "simulated_mode": False - } -) -def mouse_click(input_data: dict) -> dict: - import sys, subprocess, importlib, time - - pkg = 'pyautogui' - try: - importlib.import_module(pkg) - except ImportError: - subprocess.check_call([sys.executable, '-m', 'pip', 'install', pkg, '--quiet']) - - import pyautogui - - x = input_data.get('x') - y = input_data.get('y') - button = input_data.get('button', 'left').lower() - click_type = input_data.get('click_type', 'single').lower() - - # Validate button - if button not in ('left', 'right', 'middle'): - return {'status': 'error', 'position': {}, 'message': f"Invalid button '{button}'. Must be 'left', 'right', or 'middle'."} - - # Validate click_type - if click_type not in ('single', 'double'): - return {'status': 'error', 'position': {}, 'message': f"Invalid click_type '{click_type}'. Must be 'single' or 'double'."} - - try: - # Disable fail-safe for VM environments where cursor position detection can be unreliable - pyautogui.FAILSAFE = False - - # Get screen size for boundary checking - screen_width, screen_height = pyautogui.size() - - # Get position (use current if not specified) - pos_x, pos_y = (x, y) if x is not None and y is not None else pyautogui.position() - pos_x, pos_y = int(pos_x), int(pos_y) - - # Clamp coordinates to screen bounds with a small margin to avoid edge issues - margin = 1 - pos_x = max(margin, min(pos_x, screen_width - margin)) - pos_y = max(margin, min(pos_y, screen_height - margin)) - - # Now move to target position with visible duration - pyautogui.moveTo(pos_x, pos_y, duration=0.1) - time.sleep(0.1) - - # Perform click at the specified coordinates directly (don't rely on current position) - if click_type == 'double': - pyautogui.doubleClick(x=pos_x, y=pos_y, button=button) - else: - pyautogui.click(x=pos_x, y=pos_y, button=button) - - return {'status': 'success', 'position': {'x': pos_x, 'y': pos_y}, 'message': ''} - except Exception as e: - return {'status': 'error', 'position': {}, 'message': str(e)} diff --git a/app/data/action/mouse_drag.py b/app/data/action/mouse_drag.py deleted file mode 100644 index 33e8374d..00000000 --- a/app/data/action/mouse_drag.py +++ /dev/null @@ -1,81 +0,0 @@ -from agent_core import action - -@action( - name="mouse_drag", - description="Performs a left-button drag from a start coordinate to an end coordinate.", - mode="GUI", - action_sets=["gui_interaction"], - parallelizable=False, - input_schema={ - "start_x": { - "type": "integer", - "example": 400, - "description": "Starting X-coordinate in pixels (required)." - }, - "start_y": { - "type": "integer", - "example": 300, - "description": "Starting Y-coordinate in pixels (required)." - }, - "end_x": { - "type": "integer", - "example": 800, - "description": "Ending X-coordinate in pixels (required)." - }, - "end_y": { - "type": "integer", - "example": 600, - "description": "Ending Y-coordinate in pixels (required)." - }, - "duration": { - "type": "number", - "example": 0.5, - "description": "Optional duration (seconds) for a smooth drag." - } - }, - output_schema={ - "status": { - "type": "string", - "example": "success", - "description": "'success' if the drag completed, 'error' otherwise." - }, - "message": { - "type": "string", - "example": "Missing coordinates.", - "description": "Optional error message." - } - }, - requirement=["pyautogui"], - test_payload={ - "start_x": 400, - "start_y": 300, - "end_x": 800, - "end_y": 600, - "duration": 0.5, - "simulated_mode": False - } -) -def mouse_drag(input_data: dict) -> dict: - import json, sys, subprocess, importlib - pkg = 'pyautogui' - try: - importlib.import_module(pkg) - except ImportError: - subprocess.check_call([sys.executable, '-m', 'pip', 'install', pkg, '--quiet']) - import pyautogui - sx = input_data.get('start_x') - sy = input_data.get('start_y') - ex = input_data.get('end_x') - ey = input_data.get('end_y') - duration = float(input_data.get('duration', 0)) - if None in (sx, sy, ex, ey): - return {'status': 'error', 'message': 'All coordinates are required.'} - exit() - try: - pyautogui.moveTo(int(sx), int(sy)) - pyautogui.mouseDown(button='left') - pyautogui.dragTo(int(ex), int(ey), duration=duration, button='left') - pyautogui.mouseUp(button='left') - return {'status': 'success', 'message': ''} - except Exception as e: - return {'status': 'error', 'message': str(e)} \ No newline at end of file diff --git a/app/data/action/mouse_move.py b/app/data/action/mouse_move.py deleted file mode 100644 index e3336257..00000000 --- a/app/data/action/mouse_move.py +++ /dev/null @@ -1,112 +0,0 @@ -from agent_core import action - -@action( - name="mouse_move", - description="Moves the mouse cursor to a specific screen coordinate.", - mode="GUI", - action_sets=["gui_interaction"], - parallelizable=False, - input_schema={ - "x": { - "type": "integer", - "example": 640, - "description": "Target X-coordinate in pixels (required)." - }, - "y": { - "type": "integer", - "example": 360, - "description": "Target Y-coordinate in pixels (required)." - }, - "duration": { - "type": "number", - "example": 0.25, - "description": "Optional duration in seconds for a smooth move. Defaults to instant." - } - }, - output_schema={ - "status": { - "type": "string", - "example": "success", - "description": "'success' if the cursor moved, 'error' otherwise." - }, - "position": { - "type": "object", - "properties": { - "x": {"type": "integer"}, - "y": {"type": "integer"} - }, - "description": "The final cursor coordinates." - }, - "message": { - "type": "string", - "description": "Error message if operation failed." - } - }, - # We assume these are pre-installed in the Docker image now - requirement=["pyautogui"], - test_payload={ - "x": 640, - "y": 360, - "duration": 0.25, - } -) -def mouse_move(input_data: dict) -> dict: - import sys - import os - - # 1. Basic Input Validation - x = input_data.get('x') - y = input_data.get('y') - duration = float(input_data.get('duration', 0)) - - if x is None or y is None: - return {'status': 'error', 'position': {}, 'message': 'Both x and y coordinates are required.'} - - # 2. Environment Check (Crucial for Linux Docker) - # PyAutoGUI needs a DISPLAY environment variable to know where to send events. - if sys.platform == 'linux' and 'DISPLAY' not in os.environ: - return { - 'status': 'error', - 'position': {}, - 'message': 'Linux environment detected but DISPLAY environment variable is not set. GUI actions require a display (e.g., Xvfb).' - } - - try: - # 3. Import PyAutoGUI correctly - # We assume it's pre-installed via Dockerfile. Removing the runtime pip install - # makes the action faster and more reliable. - import pyautogui - - # Fail fast if safety feature gets in the way (optional, but good practice for bots) - pyautogui.FAILSAFE = False - - # 4. Attempt the move - pyautogui.moveTo(int(x), int(y), duration=duration) - - # 5. Return success - return { - 'status': 'success', - # Note: pyautogui.position() gets actual current pos, better than just returning input x,y - 'position': {'x': pyautogui.position()[0], 'y': pyautogui.position()[1]}, - 'message': 'Cursor moved successfully.' - } - - except AttributeError as e: - # This catches the specific error: "module 'pyautogui' has no attribute 'moveTo'" - if "'moveTo'" in str(e): - msg = ("PyAutoGUI failed to initialize properly. This usually means system-level " - "X11 dependencies are missing in the Linux Docker container (e.g., libX11, libXtst). " - "Please update your Dockerfile to install these packages.") - else: - msg = f"PyAutoGUI attribute error: {e}" - return {'status': 'error', 'position': {}, 'message': msg} - - except ImportError: - return { - 'status': 'error', - 'position': {}, - 'message': "The 'pyautogui' Python package is not installed in the container. Please add 'pip install pyautogui' to your Dockerfile." - } - except Exception as e: - # Catch-all for other issues (like coordinates out of screen bounds) - return {'status': 'error', 'position': {}, 'message': f"An unexpected error occurred: {str(e)}"} \ No newline at end of file diff --git a/app/data/action/mouse_trace.py b/app/data/action/mouse_trace.py deleted file mode 100644 index c0241c7c..00000000 --- a/app/data/action/mouse_trace.py +++ /dev/null @@ -1,175 +0,0 @@ -from agent_core import action - -@action( - name="mouse_trace", - description="Moves the mouse cursor along a sequence of points, optionally with easing, per-segment duration, and pauses between segments.", - mode="GUI", - action_sets=["gui_interaction"], - parallelizable=False, - input_schema={ - "points": { - "type": "array", - "description": "Ordered list of waypoints to move through.", - "items": { - "type": "object", - "properties": { - "x": { - "type": "integer", - "description": "X-coordinate in pixels (absolute unless 'relative' is true)." - }, - "y": { - "type": "integer", - "description": "Y-coordinate in pixels (absolute unless 'relative' is true)." - }, - "duration": { - "type": "number", - "description": "Optional duration in seconds for this segment." - } - }, - "required": [ - "x", - "y" - ] - }, - "example": [ - { - "x": 400, - "y": 300, - "duration": 0.2 - }, - { - "x": 800, - "y": 300, - "duration": 0.15 - }, - { - "x": 800, - "y": 600, - "duration": 0.25 - } - ] - }, - "relative": { - "type": "boolean", - "example": False, - "description": "If true, each point is treated as an offset from the current cursor position (and then from each subsequent point)." - }, - "default_duration": { - "type": "number", - "example": 0.2, - "description": "Fallback duration (seconds) for any point that omits 'duration'. Defaults to 0." - }, - "easing": { - "type": "string", - "enum": [ - "linear", - "easeInQuad", - "easeOutQuad", - "easeInOutQuad", - "easeInCubic", - "easeOutCubic", - "easeInOutCubic" - ], - "example": "easeInOutQuad", - "description": "Easing function applied to each segment." - }, - "pause": { - "type": "number", - "example": 0.05, - "description": "Pause in seconds between segments. Defaults to 0." - } - }, - output_schema={ - "status": { - "type": "string", - "example": "success", - "description": "'success' if the path was fully traced, 'error' otherwise." - }, - "segments_executed": { - "type": "integer", - "example": 3, - "description": "How many segments were completed." - }, - "message": { - "type": "string", - "example": "Coordinate out of bounds.", - "description": "Optional error message if the operation failed or was partial." - } - }, - requirement=["pyautogui"], - test_payload={ - "points": [ - { - "x": 400, - "y": 300, - "duration": 0.2 - }, - { - "x": 800, - "y": 300, - "duration": 0.15 - }, - { - "x": 800, - "y": 600, - "duration": 0.25 - } - ], - "relative": False, - "default_duration": 0.2, - "easing": "easeInOutQuad", - "pause": 0.05, - "simulated_mode": False - } -) -def trace_mouse(input_data: dict) -> dict: - import json, sys, subprocess, importlib, time - pkg = 'pyautogui' - try: - importlib.import_module(pkg) - except ImportError: - subprocess.check_call([sys.executable, '-m', 'pip', 'install', pkg, '--quiet']) - import pyautogui - points = input_data.get('points') - relative = bool(input_data.get('relative', False)) - easing = str(input_data.get('easing', 'linear')).strip() - pause = float(input_data.get('pause', 0)) - default_duration = float(input_data.get('default_duration', 0)) - if not isinstance(points, list) or not points: - return {'status': 'error', 'segments_executed': 0, 'message': 'points must be a non-empty array.'} - exit() - def _linear(n): - return n - ease_map = { - 'linear': getattr(pyautogui, 'linear', _linear), - 'easeInQuad': getattr(pyautogui, 'easeInQuad', _linear), - 'easeOutQuad': getattr(pyautogui, 'easeOutQuad', _linear), - 'easeInOutQuad': getattr(pyautogui, 'easeInOutQuad', _linear), - 'easeInCubic': getattr(pyautogui, 'easeInCubic', _linear), - 'easeOutCubic': getattr(pyautogui, 'easeOutCubic', _linear), - 'easeInOutCubic': getattr(pyautogui, 'easeInOutCubic', _linear), - } - tween = ease_map.get(easing, ease_map['linear']) - width, height = pyautogui.size() - cx, cy = pyautogui.position() - segments_executed = 0 - try: - curx, cury = (cx, cy) - for p in points: - if not isinstance(p, dict) or 'x' not in p or 'y' not in p: - raise ValueError('Each point must be an object with x and y.') - px = int(p.get('x')) - py = int(p.get('y')) - dur = float(p.get('duration', default_duration)) - tx = px + curx if relative else px - ty = py + cury if relative else py - if tx < 0 or ty < 0 or tx >= width or ty >= height: - raise ValueError('Coordinate out of bounds.') - pyautogui.moveTo(tx, ty, duration=dur, tween=tween) - curx, cury = (tx, ty) - segments_executed += 1 - if pause > 0: - time.sleep(pause) - return {'status': 'success', 'segments_executed': segments_executed, 'message': ''} - except Exception as e: - return {'status': 'error', 'segments_executed': segments_executed, 'message': str(e)} \ No newline at end of file diff --git a/app/data/action/open_application.py b/app/data/action/open_application.py deleted file mode 100644 index b3642ab5..00000000 --- a/app/data/action/open_application.py +++ /dev/null @@ -1,73 +0,0 @@ -from agent_core import action - -@action( - name="open_application", - description="Launches a Windows application (executable) with optional command-line arguments.", - mode="GUI", - action_sets=["gui_interaction"], - parallelizable=False, - input_schema={ - "exe_path": { - "type": "string", - "example": "C:\\\\Program Files\\\\VideoLAN\\\\VLC\\\\vlc.exe", - "description": "Absolute path to the .exe file to launch (required)." - }, - "args": { - "type": "array", - "items": { - "type": "string" - }, - "example": [ - "--fullscreen" - ], - "description": "Optional list of command-line arguments." - } - }, - output_schema={ - "status": { - "type": "string", - "example": "success", - "description": "'success' if the application started, 'error' otherwise." - }, - "pid": { - "type": "integer", - "example": 12345, - "description": "Process ID of the launched application (present on success)." - }, - "message": { - "type": "string", - "example": "File not found.", - "description": "Optional error message." - } - }, - test_payload={ - "exe_path": "C:\\\\Program Files\\\\VideoLAN\\\\VLC\\\\vlc.exe", - "args": [ - "--fullscreen" - ], - "simulated_mode": False - } -) -def open_application(input_data: dict) -> dict: - import json, os, subprocess, sys - - exe_path = str(input_data.get('exe_path', '')).strip() - args = input_data.get('args') or [] - - if not exe_path: - return {'status': 'error', 'pid': -1, 'message': 'exe_path is required.'} - sys.exit() - - if not os.path.isfile(exe_path): - return {'status': 'error', 'pid': -1, 'message': 'File not found.'} - sys.exit() - - if not isinstance(args, list): - return {'status': 'error', 'pid': -1, 'message': 'args must be an array if provided.'} - sys.exit() - - try: - proc = subprocess.Popen([exe_path, *args], shell=False, cwd=os.path.dirname(exe_path), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - return {'status': 'success', 'pid': proc.pid, 'message': ''} - except Exception as e: - return {'status': 'error', 'pid': -1, 'message': str(e)} \ No newline at end of file diff --git a/app/data/action/open_browser.py b/app/data/action/open_browser.py deleted file mode 100644 index d79ebcca..00000000 --- a/app/data/action/open_browser.py +++ /dev/null @@ -1,254 +0,0 @@ -from agent_core import action - -@action( - name="open_browser", - description="Opens a web browser (Chrome, Edge, Firefox, Safari, or system default) across platforms. Optionally opens a specified URL.", - platforms=["windows"], - mode="GUI", - action_sets=["gui_interaction"], - parallelizable=False, - input_schema={ - "url": { - "type": "string", - "example": "https://www.example.com", - "description": "Optional URL to open in the browser." - } - }, - output_schema={ - "status": {"type": "string", "example": "success", "description": "'success' if a browser launched, 'error' otherwise."}, - "process_id": {"type": "integer", "example": 12345, "description": "Process ID of the launched browser instance when successful; -1 when opened via system default browser."}, - "browser": {"type": "string", "example": "chrome", "description": "Name of the browser that was launched."}, - "executable_path": {"type": "string", "example": "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe", "description": "Absolute path to the browser executable used, if applicable."}, - "message": {"type": "string", "example": "Launched successfully.", "description": "Error or informational message."} - }, -) -def open_browser_windows(input_data: dict) -> dict: - import os - import subprocess - import shutil - import webbrowser - import tempfile - - # Helper to get a temporary directory path for browser profiles. - # Ensures every launch is a fresh instance and avoids profile locking issues. - def _get_temp_profile_dir(prefix): - try: - return tempfile.mkdtemp(prefix=f"{prefix}_profile_") - except Exception: - return None - - url = str(input_data.get('url', '')).strip() - - candidates = [ - r"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe", - r"C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe", - os.path.expandvars(r"%LOCALAPPDATA%\\Google\\Chrome\\Application\\chrome.exe"), - r"C:\\Program Files\\Microsoft\\Edge\\Application\\msedge.exe", - r"C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe", - r"C:\\Program Files\\Mozilla Firefox\\firefox.exe" - ] - - try: - browser_path = next((p for p in candidates if os.path.isfile(p)), None) - if browser_path: - cmd = [browser_path] - - # Force Temp Profile for Chromium browsers on Windows - if "chrome" in browser_path.lower() or "edge" in browser_path.lower(): - temp_dir = _get_temp_profile_dir("win_browser") - if temp_dir: - cmd.append(f'--user-data-dir={temp_dir}') - - if url: - cmd.append(url) - - # CREATE_NEW_CONSOLE is excellent for Windows detachment. - creation_flags = getattr(subprocess, 'CREATE_NEW_CONSOLE', 0) - - proc = subprocess.Popen( - cmd, - stdin=subprocess.DEVNULL, # Isolate input - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - creationflags=creation_flags - ) - - msg = 'Launched specific browser successfully.' - if "user-data-dir" in " ".join(cmd): - msg += " (Using temporary profile)." - - return { - 'status': 'success', - 'process_id': proc.pid, - 'browser': os.path.basename(browser_path).split('.')[0], - 'executable_path': browser_path, - 'message': msg - } - else: - # Fallback - if url: - webbrowser.open(url) - return { - 'status': 'success', - 'process_id': -1, - 'browser': 'default', - 'executable_path': '', - 'message': 'Opened URL using system default browser.' - } - except Exception as e: - return { - 'status': 'error', 'process_id': -1, 'browser': '', 'executable_path': '', 'message': str(e) - } - -@action( - name="open_browser", - description="Opens a web browser (Chrome, Edge, Firefox, Safari, or system default) across platforms. Optionally opens a specified URL.", - platforms=["darwin"], - mode="GUI", - action_sets=["gui_interaction"], - input_schema={ - "url": {"type": "string", "example": "https://www.example.com", "description": "Optional URL to open in the browser."} - }, - output_schema={ - "status": {"type": "string"}, "process_id": {"type": "integer"}, "browser": {"type": "string"}, "executable_path": {"type": "string"}, "message": {"type": "string"} - }, -) -def open_browser_darwin(input_data: dict) -> dict: - import os - import subprocess - import shutil - import webbrowser - import tempfile - - # Helper (duplicated here for self-containment) - def _get_temp_profile_dir(prefix): - try: - return tempfile.mkdtemp(prefix=f"{prefix}_profile_") - except Exception: - return None - - url = str(input_data.get('url', '')).strip() - - # Launching binaries directly inside .app bundles allows passing custom arguments reliable. - candidates = [ - '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', - '/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge', - '/Applications/Brave Browser.app/Contents/MacOS/Brave Browser', - '/Applications/Firefox.app/Contents/MacOS/firefox', - ] - - try: - browser_path = next((p for p in candidates if os.path.isfile(p)), None) - if browser_path: - cmd = [browser_path] - - # Force Temp Profile on macOS to avoid locking issues - if "Chrome" in browser_path or "Edge" in browser_path or "Brave" in browser_path: - temp_dir = _get_temp_profile_dir("mac_browser") - if temp_dir: - cmd.append(f'--user-data-dir={temp_dir}') - - if url: - cmd.append(url) - - # Process Detachment for macOS using start_new_session - proc = subprocess.Popen( - cmd, - stdin=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - start_new_session=True - ) - - msg = 'Launched specific browser successfully.' - if "user-data-dir" in " ".join(cmd): - msg += " (Using temporary profile)." - - return { - 'status': 'success', - 'process_id': proc.pid, - 'browser': os.path.basename(browser_path).split('.')[0], - 'executable_path': browser_path, - 'message': msg - } - else: - # Fallback - if url: - webbrowser.open(url) - return { - 'status': 'success', - 'process_id': -1, - 'browser': 'default', - 'executable_path': '', - 'message': 'Opened URL using system default browser (Safari likely).' - } - except Exception as e: - return { - 'status': 'error', 'process_id': -1, 'browser': '', 'executable_path': '', 'message': str(e) - } - -@action( - name="open_browser", - description="Opens a web browser (Chrome, Edge, Firefox, Safari, or system default) across platforms. Optionally opens a specified URL.", - platforms=["linux"], - mode="GUI", - action_sets=["gui_interaction"], - input_schema={ "url": {"type": "string"} }, - output_schema={ "status": {"type": "string"}, "process_id": {"type": "integer"}, "browser": {"type": "string"}, "executable_path": {"type": "string"}, "message": {"type": "string"} }, -) -def open_browser_linux(input_data: dict) -> dict: - import os - import subprocess - import shutil - import webbrowser - - url = str(input_data.get('url', '')).strip() - - candidates = [ - shutil.which('google-chrome'), - shutil.which('google-chrome-stable'), - shutil.which('chromium'), - shutil.which('chromium-browser'), - shutil.which('brave-browser'), - shutil.which('firefox'), - shutil.which('microsoft-edge') - ] - - try: - browser_path = next((p for p in candidates if p and os.path.isfile(p)), None) - if browser_path: - cmd = [browser_path, '--no-sandbox', '--temp-profile'] - if url: - cmd.append(url) - - proc = subprocess.Popen( - cmd, - stdin=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - env=os.environ.copy(), - close_fds=True - ) - - return { - 'status': 'success', - 'process_id': proc.pid, - 'browser': os.path.basename(browser_path), - 'executable_path': browser_path, - 'message': f'Launched {os.path.basename(browser_path)} with temp profile.' - } - else: - if url: - webbrowser.open(url) - return { - 'status': 'success', - 'process_id': -1, - 'browser': 'default system', - 'executable_path': '', - 'message': 'Attempted to open URL using system default mechanism.' - } - - except Exception as e: - return { - 'status': 'error', 'process_id': -1, 'browser': '', 'executable_path': '', 'message': str(e) - } \ No newline at end of file diff --git a/app/data/action/perform_ocr.py b/app/data/action/perform_ocr.py new file mode 100644 index 00000000..ba83d2fb --- /dev/null +++ b/app/data/action/perform_ocr.py @@ -0,0 +1,80 @@ +from agent_core import action + +@action( + name="perform_ocr", + description="Extracts all text from an image using OCR via a Vision Language Model. Use this when the user wants to read text from a screenshot, scanned document, photo of a receipt, whiteboard, sign, or any image containing text. Returns extracted text saved to a file in workspace.", + mode="CLI", + action_sets=["document_processing", "image", "video"], + input_schema={ + "image_path": { + "type": "string", + "example": "C:\\Users\\user\\Pictures\\receipt.jpg", + "description": "Absolute path to the image file containing text to extract." + }, + "user_prompt": { + "type": "string", + "example": "Extract all text including prices and product names.", + "description": "Optional: extra instruction to guide the OCR (e.g. focus on specific regions or text types)." + } + }, + output_schema={ + "status": { + "type": "string", + "example": "success", + "description": "'success' if OCR completed, 'error' otherwise." + }, + "summary": { + "type": "string", + "example": "OCR complete: 42 lines, 1250 characters extracted.", + "description": "Brief summary of extraction results." + }, + "file_path": { + "type": "string", + "example": "/workspace/ocr_result_20260414_153000.txt", + "description": "Absolute path to the .txt file containing full extracted text." + }, + "file_saved": { + "type": "boolean", + "example": True, + "description": "True if the extracted text was saved to disk." + }, + "message": { + "type": "string", + "example": "File not found.", + "description": "Error message if applicable." + } + }, + test_payload={ + "image_path": "C:\\Users\\user\\Pictures\\sample.jpg", + "user_prompt": "Extract all visible text.", + "simulated_mode": True + } +) +def perform_ocr(input_data: dict) -> dict: + import os + + image_path = str(input_data.get('image_path', '')).strip() + user_prompt = str(input_data.get('user_prompt', '')).strip() or None + simulated_mode = input_data.get('simulated_mode', False) + + if simulated_mode: + return { + 'status': 'success', + 'summary': 'OCR complete: 5 lines, 120 characters extracted.', + 'file_path': '/workspace/ocr_result_simulated.txt', + 'file_saved': True, + 'message': '' + } + + if not image_path: + return {'status': 'error', 'summary': '', 'file_path': '', 'file_saved': False, 'message': 'image_path is required.'} + + if not os.path.isfile(image_path): + return {'status': 'error', 'summary': '', 'file_path': '', 'file_saved': False, 'message': 'File not found.'} + + try: + import app.internal_action_interface as iai + result = iai.InternalActionInterface.perform_ocr(image_path, user_prompt=user_prompt) + return {**result, 'message': ''} + except Exception as e: + return {'status': 'error', 'summary': '', 'file_path': '', 'file_saved': False, 'message': str(e)} diff --git a/app/data/action/scroll.py b/app/data/action/scroll.py deleted file mode 100644 index 01f54daf..00000000 --- a/app/data/action/scroll.py +++ /dev/null @@ -1,49 +0,0 @@ -from agent_core import action - -@action( - name="scroll", - description="Scrolls the active window one viewport up or down (≈90 % of the screen height, leaving ~10 % overlap).", - mode="GUI", - action_sets=["gui_interaction"], - parallelizable=False, - input_schema={ - "direction": { - "type": "string", - "enum": [ - "up", - "down" - ], - "example": "down", - "description": "Scroll direction." - } - }, - output_schema={ - "status": { - "type": "string", - "example": "success", - "description": "'success' if scrolling succeeded, 'error' otherwise." - }, - "message": { - "type": "string", - "example": "Invalid direction.", - "description": "Optional error message if the operation failed." - } - }, - requirement=["pyautogui"], - test_payload={ - "direction": "down", - "simulated_mode": False - } -) -def scroll(input_data: dict) -> dict: - import json, sys, subprocess, importlib - pkg = 'pyautogui' - try: - importlib.import_module(pkg) - except ImportError: - subprocess.check_call([sys.executable, '-m', 'pip', 'install', pkg, '--quiet']) - import pyautogui - - direction = str(input_data.get('direction', '')).lower() - if direction not in {'up', 'down'}: - return {'status': 'error', 'message': 'direction must be "up" or "down".'} \ No newline at end of file diff --git a/app/data/action/set_mode.py b/app/data/action/set_mode.py deleted file mode 100644 index d8d3060f..00000000 --- a/app/data/action/set_mode.py +++ /dev/null @@ -1,99 +0,0 @@ -from agent_core import action - -# [V1.2.2] GUI mode is temporarily disabled. Uncomment the decorator below to re-enable. -# @action( -# name="set_mode", -# description="Switch the agent between CLI and GUI modes. CLI mode operates without screen control; GUI mode enables screen interaction capabilities.", -# mode="ALL", -# default=True, -# action_sets=["core"], -# parallelizable=False, -# input_schema={ -# "target_mode": { -# "type": "string", -# "example": "cli", -# "description": "Target mode to switch to: 'cli' or 'gui'." -# } -# }, -# output_schema={ -# "status": { -# "type": "string", -# "example": "ok", -# "description": "Result status: 'ok' or 'error'." -# }, -# "gui_mode": { -# "type": "boolean", -# "example": False, -# "description": "Current GUI mode after the operation (True = GUI, False = CLI)." -# }, -# "message": { -# "type": "string", -# "example": "Successfully switched to CLI mode.", -# "description": "Status message." -# }, -# "error": { -# "type": "string", -# "example": "StateSession not initialized", -# "description": "Error message (present when status == 'error')." -# } -# }, -# test_payload={ -# "target_mode": "cli", -# "simulated_mode": False -# } -# ) -def set_mode(input_data: dict) -> dict: - import os - import app.internal_action_interface as iai - from app.state.agent_state import STATE - - target_mode = str(input_data.get('target_mode', '')).strip().lower() - simulated_mode = input_data.get('simulated_mode', False) - - if target_mode not in ('cli', 'gui'): - return { - "status": "error", - "error": f"Invalid target_mode '{target_mode}'. Must be 'cli' or 'gui'.", - "gui_mode": STATE.gui_mode - } - - try: - target_gui_mode = (target_mode == 'gui') - - # Check if GUI mode is globally disabled - if target_gui_mode: - gui_globally_enabled = os.getenv("GUI_MODE_ENABLED", "True") == "True" - if not gui_globally_enabled: - return { - "status": "error", - "error": "GUI mode is disabled. The application was started with --no-gui flag. " - "To enable GUI mode, restart with --enable-gui flag.", - "gui_mode": STATE.gui_mode, - "message": "Cannot switch to GUI mode - it has been disabled at startup." - } - - # Check if already in target mode - if STATE.gui_mode == target_gui_mode: - mode_name = "GUI" if target_gui_mode else "CLI" - return { - "status": "ok", - "gui_mode": target_gui_mode, - "message": f"Already in {mode_name} mode. No change needed." - } - - # Perform the switch - if not simulated_mode: - if target_gui_mode: - iai.InternalActionInterface.switch_to_GUI_mode() - else: - iai.InternalActionInterface.switch_to_CLI_mode() - - mode_name = "GUI" if target_gui_mode else "CLI" - return { - "status": "ok", - "gui_mode": target_gui_mode, - "message": f"Successfully switched to {mode_name} mode." - } - - except Exception as e: - return {"status": "error", "error": str(e), "gui_mode": STATE.gui_mode} diff --git a/app/data/action/understand_video.py b/app/data/action/understand_video.py new file mode 100644 index 00000000..10f5cc71 --- /dev/null +++ b/app/data/action/understand_video.py @@ -0,0 +1,152 @@ +from agent_core import action + +@action( + name="understand_video", + description="Uses the configured VLM model (default: Gemini 1.5 Pro) for native video understanding when a Google API key is configured. Falls back to keyframe extraction via OpenCV if no Google API key is available.", + mode="CLI", + action_sets=["document_processing", "image", "video"], + requirement=["google-generativeai"], + input_schema={ + "video_path": { + "type": "string", + "example": "C:\\Users\\user\\Videos\\meeting.mp4", + "description": "Absolute path to the video file (MP4, AVI, MOV supported)." + }, + "query": { + "type": "string", + "example": "What is being presented on the slides?", + "description": "Optional: specific question to answer about the video." + }, + "max_frames": { + "type": "integer", + "example": 8, + "description": "Number of evenly-spaced keyframes to sample (default: 8, max recommended: 16)." + } + }, + output_schema={ + "status": { + "type": "string", + "example": "success", + "description": "'success' if analysis completed, 'error' otherwise." + }, + "summary": { + "type": "string", + "example": "The video shows a person presenting slides about quarterly sales...", + "description": "First 500 characters of the video summary. Full summary saved to file." + }, + "file_path": { + "type": "string", + "example": "/workspace/video_summary_20260414_153000.txt", + "description": "Absolute path to the .txt file containing the full video summary." + }, + "file_saved": { + "type": "boolean", + "example": True, + "description": "True if the full summary was saved to disk." + }, + "message": { + "type": "string", + "example": "File not found.", + "description": "Error message if applicable." + } + }, + test_payload={ + "video_path": "C:\\Users\\user\\Videos\\sample.mp4", + "query": "Summarise the video content.", + "max_frames": 8, + "simulated_mode": True + } +) +def understand_video(input_data: dict) -> dict: + import os + + video_path = str(input_data.get('video_path', '')).strip() + query = str(input_data.get('query', '')).strip() or None + max_frames = int(input_data.get('max_frames', 8)) + simulated_mode = input_data.get('simulated_mode', False) + + if simulated_mode: + return { + 'status': 'success', + 'summary': 'The video shows a simulated presentation with 3 speakers.', + 'file_path': '/workspace/video_summary_simulated.txt', + 'file_saved': True, + 'message': '' + } + + if not video_path: + return {'status': 'error', 'summary': '', 'file_path': '', 'file_saved': False, 'message': 'video_path is required.'} + + if not os.path.isfile(video_path): + return {'status': 'error', 'summary': '', 'file_path': '', 'file_saved': False, 'message': 'File not found.'} + + from app.config import get_api_key, get_vlm_model + api_key = get_api_key('gemini') + +# --- Dual-path execution --- +# This is the only video action that contains its own dispatch logic rather than +# delegating entirely to InternalActionInterface. The reason is architectural: +# +# PATH 1 — Gemini Native (below, runs when api_key is present): +# Uses the Gemini Files API (genai.upload_file) for true native video +# understanding. The full video is uploaded and processed by the model with +# temporal context — no frame sampling needed. The uploaded file is deleted +# from Gemini servers after the call. The full summary is saved to disk. +# This path is preferred: more accurate, handles long videos, no OpenCV dep. +# +# PATH 2 — OpenCV Keyframe Fallback (bottom of function): +# Used when no Gemini API key is configured, or if PATH 1 raises any exception. +# Delegates to InternalActionInterface.understand_video(), which extracts +# evenly-spaced keyframes using OpenCV and sends them to whatever VLM provider +# is currently configured. Results are returned directly without saving to disk. +# +# The Gemini Files API is not accessible through VLMInterface, which is why +# this action cannot follow the standard single-delegation pattern. + + if api_key: + try: + import google.generativeai as genai + genai.configure(api_key=api_key) + import time + from datetime import datetime + from app.config import AGENT_WORKSPACE_ROOT + + video_file = genai.upload_file(path=video_path) + + while video_file.state.name == "PROCESSING": + time.sleep(2) + video_file = genai.get_file(video_file.name) + + vlm_model = get_vlm_model() or "gemini-1.5-pro" + model = genai.GenerativeModel(vlm_model) + prompt = query if query else "Understand and describe the contents of this video." + response = model.generate_content([video_file, prompt]) + + genai.delete_file(video_file.name) + + full_text = response.text + ts = datetime.utcnow().strftime("%Y%m%d_%H%M%S") + out_path = os.path.join(AGENT_WORKSPACE_ROOT, f"video_summary_{ts}.txt") + with open(out_path, "w", encoding="utf-8") as f: + f.write(full_text) + + return { + 'status': 'success', + 'summary': full_text[:500] + ("..." if len(full_text) > 500 else ""), + 'file_path': out_path, + 'file_saved': True, + 'message': '' + } + except Exception as e: + # Fall through to fallback path if Gemini native path fails + pass + + try: + import app.internal_action_interface as iai + result = iai.InternalActionInterface.understand_video(video_path, query=query, max_frames=max_frames) + return {**result, 'message': ''} + except RuntimeError as e: + # Catches missing opencv gracefully + return {'status': 'error', 'summary': '', 'file_path': '', 'file_saved': False, 'message': str(e)} + except Exception as e: + return {'status': 'error', 'summary': '', 'file_path': '', 'file_saved': False, 'message': str(e)} diff --git a/app/data/action/window_control.py b/app/data/action/window_control.py deleted file mode 100644 index 37afcc89..00000000 --- a/app/data/action/window_control.py +++ /dev/null @@ -1,115 +0,0 @@ -from agent_core import action - -@action( - name="window_control", - description="Controls an application window. Supports focus, close, maximize, and minimize operations. If a title is provided, the matching window is targeted; otherwise the currently active window is used.", - mode="GUI", - action_sets=["gui_interaction"], - parallelizable=False, - input_schema={ - "operation": { - "type": "string", - "example": "focus", - "description": "Operation to perform: 'focus', 'close', 'maximize', or 'minimize'." - }, - "title": { - "type": "string", - "example": "Notepad", - "description": "Substring (case-insensitive) of the window title to match. If omitted, the active window is used (except for 'focus' which requires a title)." - }, - "exact": { - "type": "boolean", - "example": False, - "description": "If true, match the title exactly; otherwise use substring matching (default: false)." - }, - "index": { - "type": "integer", - "example": 0, - "description": "If multiple windows match, select by zero-based index (default: 0)." - } - }, - output_schema={ - "status": { - "type": "string", - "example": "success", - "description": "'success' if the operation succeeded, 'error' otherwise." - }, - "matched_title": { - "type": "string", - "example": "Untitled - Notepad", - "description": "The exact title of the window that was operated on (present on success)." - }, - "message": { - "type": "string", - "example": "No matching window found.", - "description": "Optional error message." - } - }, - requirement=["pygetwindow"], - test_payload={ - "operation": "focus", - "title": "Notepad", - "exact": False, - "index": 0, - "simulated_mode": False - } -) -def window_control(input_data: dict) -> dict: - import sys, subprocess, importlib - - pkg = 'pygetwindow' - try: - importlib.import_module(pkg) - except ImportError: - subprocess.check_call([sys.executable, '-m', 'pip', 'install', pkg, '--quiet']) - - import pygetwindow as gw - - operation = str(input_data.get('operation', '')).strip().lower() - title = str(input_data.get('title', '')).strip() - exact = bool(input_data.get('exact', False)) - index = int(input_data.get('index', 0)) - - valid_operations = ('focus', 'close', 'maximize', 'minimize') - if operation not in valid_operations: - return {'status': 'error', 'matched_title': '', 'message': f"Invalid operation '{operation}'. Must be one of: {', '.join(valid_operations)}."} - - # Focus requires a title to be specified - if operation == 'focus' and not title: - return {'status': 'error', 'matched_title': '', 'message': 'title is required for focus operation.'} - - try: - if title: - # Match by title - if exact: - windows = [w for w in gw.getAllWindows() if w.title == title] - else: - windows = gw.getWindowsWithTitle(title) - - if not windows: - return {'status': 'error', 'matched_title': '', 'message': 'No matching window found.'} - - if index < 0 or index >= len(windows): - return {'status': 'error', 'matched_title': '', 'message': f'index {index} out of range (found {len(windows)} windows).'} - - win = windows[index] - else: - # Use active window - win = gw.getActiveWindow() - if win is None: - return {'status': 'error', 'matched_title': '', 'message': f'No active window to {operation}.'} - - # Perform the operation - if operation == 'focus': - win.activate() - elif operation == 'close': - win.close() - elif operation == 'maximize': - win.maximize() - elif operation == 'minimize': - win.minimize() - - return {'status': 'success', 'matched_title': win.title, 'message': ''} - - except Exception as e: - return {'status': 'error', 'matched_title': '', 'message': str(e)} diff --git a/app/data/agent_file_system_template/GLOBAL_LIVING_UI.md b/app/data/agent_file_system_template/GLOBAL_LIVING_UI.md new file mode 100644 index 00000000..a5a7060f --- /dev/null +++ b/app/data/agent_file_system_template/GLOBAL_LIVING_UI.md @@ -0,0 +1,55 @@ +# Global Living UI Configuration + +Global design preferences and rules applied to ALL Living UI projects. +Per-project settings from Phase 0 Q&A override these when they conflict. + +## Design Preferences + +- **Primary Color:** #FF4F18 +- **Secondary Color:** #262626 +- **Accent Color:** #E64515 +- **Background Style:** Default (use CraftBot design tokens) +- **Theme Mode:** Follow system (dark/light) +- **Font Family:** System default (Segoe UI, sans-serif) +- **Border Radius:** Rounded (var(--radius-md)) +- **Spacing:** Comfortable + +## Always Enforced + +- Must use preset UI components (Button, Card, Input, Modal, Table, etc.) +- Must use design tokens from global.css (no arbitrary colors) +- All API calls must handle errors with user-visible feedback +- No inline styles for standard UI elements +- Use react-toastify for notifications (already installed) +- Backend routes must use absolute imports (not relative) +- Images must always render with visible thumbnails +- Videos must show preview thumbnails +- Links should show preview cards when possible +- Empty states must have helpful messages with action buttons +- Loading spinners required for all async operations +- Use toast notifications for all CRUD feedback (success, error) +- Show confirmation dialogs for destructive actions (delete, reset) +- Forms must have inline validation with error messages +- Mobile responsive design required +- Hover states on all clickable elements +- Text must have sufficient contrast against background (dark text on light backgrounds, light text on dark backgrounds) +- Never use light text on light backgrounds or dark text on dark backgrounds + +## Optional Rules + +- [x] Enable drag-and-drop for reordering items +- [x] Add keyboard shortcuts for common actions +- [x] Show item count badges on categories/sections +- [x] Add search/filter bar to all list views +- [x] Support bulk selection and batch operations +- [ ] Enable dark mode only (ignore system preference) +- [ ] Add animations and transitions to UI interactions +- [ ] Show timestamps on all items (created/updated) +- [ ] Enable infinite scroll instead of pagination +- [ ] Add undo/redo support for user actions +- [ ] Show breadcrumb navigation for nested views + +## Custom Rules + + + diff --git a/app/data/living_ui_modules/auth/AuthService.ts b/app/data/living_ui_modules/auth/AuthService.ts new file mode 100644 index 00000000..7d8ca015 --- /dev/null +++ b/app/data/living_ui_modules/auth/AuthService.ts @@ -0,0 +1,187 @@ +/** + * Auth Service — handles login, registration, token storage, and authenticated requests. + * + * Copy this file into your project's frontend/services/ directory. + * + * Usage: + * import { authService } from './services/AuthService' + * await authService.login('email@example.com', 'password') + * const user = await authService.getMe() + * authService.logout() + */ + +import type { AuthUser, LoginResponse, MembershipInfo, InviteInfo } from '../auth_types' + +const TOKEN_KEY = 'auth_token' + +class AuthService { + private backendUrl: string + + constructor() { + this.backendUrl = (window as any).__CRAFTBOT_BACKEND_URL__ || 'http://localhost:3101' + } + + getToken(): string | null { + return localStorage.getItem(TOKEN_KEY) + } + + private setToken(token: string): void { + localStorage.setItem(TOKEN_KEY, token) + } + + private clearToken(): void { + localStorage.removeItem(TOKEN_KEY) + } + + isAuthenticated(): boolean { + return !!this.getToken() + } + + /** + * Make an authenticated fetch request. Automatically adds the Bearer token. + */ + async authFetch(url: string, options: RequestInit = {}): Promise { + const token = this.getToken() + const headers: Record = { + 'Content-Type': 'application/json', + ...(options.headers as Record || {}), + } + if (token) { + headers['Authorization'] = `Bearer ${token}` + } + return fetch(url, { ...options, headers }) + } + + async register(email: string, username: string, password: string): Promise { + const resp = await fetch(`${this.backendUrl}/api/auth/register`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ email, username, password }), + }) + if (!resp.ok) { + const err = await resp.json().catch(() => ({ detail: 'Registration failed' })) + throw new Error(err.detail || 'Registration failed') + } + const data: LoginResponse = await resp.json() + this.setToken(data.token) + return data + } + + async login(email: string, password: string): Promise { + const resp = await fetch(`${this.backendUrl}/api/auth/login`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ email, password }), + }) + if (!resp.ok) { + const err = await resp.json().catch(() => ({ detail: 'Login failed' })) + throw new Error(err.detail || 'Invalid email or password') + } + const data: LoginResponse = await resp.json() + this.setToken(data.token) + return data + } + + async getMe(): Promise { + const token = this.getToken() + if (!token) return null + try { + const resp = await this.authFetch(`${this.backendUrl}/api/auth/me`) + if (!resp.ok) { + this.clearToken() + return null + } + const data = await resp.json() + return data.user + } catch { + this.clearToken() + return null + } + } + + logout(): void { + this.clearToken() + } + + // ── Profile ────────────────────────────────────────────────── + + async updateProfile(updates: { username?: string; email?: string }): Promise { + const resp = await this.authFetch(`${this.backendUrl}/api/auth/me`, { + method: 'PUT', + body: JSON.stringify(updates), + }) + if (!resp.ok) { + const err = await resp.json().catch(() => ({ detail: 'Update failed' })) + throw new Error(err.detail || 'Update failed') + } + return (await resp.json()).user + } + + async changePassword(currentPassword: string, newPassword: string): Promise { + const resp = await this.authFetch(`${this.backendUrl}/api/auth/me/password`, { + method: 'PUT', + body: JSON.stringify({ current_password: currentPassword, new_password: newPassword }), + }) + if (!resp.ok) { + const err = await resp.json().catch(() => ({ detail: 'Password change failed' })) + throw new Error(err.detail || 'Password change failed') + } + } + + // ── Membership ─────────────────────────────────────────────── + + async getMembers(resourceType: string, resourceId: number): Promise { + const resp = await this.authFetch(`${this.backendUrl}/api/auth/members/${resourceType}/${resourceId}`) + if (!resp.ok) return [] + return (await resp.json()).members || [] + } + + async addMember(resourceType: string, resourceId: number, userId: number, role = 'member'): Promise { + const resp = await this.authFetch(`${this.backendUrl}/api/auth/members/${resourceType}/${resourceId}`, { + method: 'POST', + body: JSON.stringify({ user_id: userId, role }), + }) + if (!resp.ok) { + const err = await resp.json().catch(() => ({ detail: 'Failed to add member' })) + throw new Error(err.detail || 'Failed to add member') + } + return (await resp.json()).membership + } + + async removeMember(resourceType: string, resourceId: number, userId: number): Promise { + const resp = await this.authFetch(`${this.backendUrl}/api/auth/members/${resourceType}/${resourceId}/${userId}`, { + method: 'DELETE', + }) + if (!resp.ok) { + const err = await resp.json().catch(() => ({ detail: 'Failed to remove member' })) + throw new Error(err.detail || 'Failed to remove member') + } + } + + // ── Invites ────────────────────────────────────────────────── + + async createInvite(resourceType: string, resourceId: number, defaultRole = 'member', maxUses?: number): Promise { + const resp = await this.authFetch(`${this.backendUrl}/api/auth/invites`, { + method: 'POST', + body: JSON.stringify({ resource_type: resourceType, resource_id: resourceId, default_role: defaultRole, max_uses: maxUses }), + }) + if (!resp.ok) { + const err = await resp.json().catch(() => ({ detail: 'Failed to create invite' })) + throw new Error(err.detail || 'Failed to create invite') + } + return (await resp.json()).invite + } + + async acceptInvite(code: string): Promise { + const resp = await this.authFetch(`${this.backendUrl}/api/auth/invites/${code}/accept`, { + method: 'POST', + }) + if (!resp.ok) { + const err = await resp.json().catch(() => ({ detail: 'Failed to accept invite' })) + throw new Error(err.detail || 'Failed to accept invite') + } + return (await resp.json()).membership + } +} + +export const authService = new AuthService() diff --git a/app/data/living_ui_modules/auth/README.md b/app/data/living_ui_modules/auth/README.md new file mode 100644 index 00000000..8a77482b --- /dev/null +++ b/app/data/living_ui_modules/auth/README.md @@ -0,0 +1,230 @@ +# Auth Module — Multi-User Support for Living UI + +Self-contained authentication with SQLite + bcrypt + JWT. No external services needed. + +## Features +- User registration and login (email + password) +- First user automatically becomes admin +- JWT token auth (24h expiry, stored in localStorage) +- Role-based access (admin, member) +- Pre-built React components (LoginPage, RegisterPage, UserMenu) + +## Integration Steps + +### Backend + +1. Copy these files into `backend/`: + - `auth_models.py` — User model + - `auth_service.py` — password hashing + JWT + - `auth_middleware.py` — FastAPI dependencies (get_current_user, require_admin) + - `auth_routes.py` — /auth/register, /auth/login, /auth/me, /auth/users + +2. Append to `backend/requirements.txt`: + ``` + bcrypt>=4.0.0 + PyJWT>=2.8.0 + ``` + +3. In `backend/routes.py`, import and include the auth router: + ```python + from auth_routes import router as auth_router + router.include_router(auth_router) + ``` + +4. Import `User` in `models.py` so the table is created: + ```python + from auth_models import User # noqa: F401 + ``` + +5. Add `user_id` to your data models: + ```python + user_id = Column(Integer, ForeignKey("users.id"), nullable=False) + ``` + +6. Protect routes with auth dependency: + ```python + from auth_middleware import get_current_user + + @router.get("/my-items") + def get_my_items(user = Depends(get_current_user), db = Depends(get_db)): + return db.query(Item).filter(Item.user_id == user.id).all() + ``` + +### Frontend + +1. Copy `auth_types.ts` into `frontend/` +2. Copy `AuthService.ts` into `frontend/services/` +3. Copy `AuthProvider.tsx`, `LoginPage.tsx`, `RegisterPage.tsx`, `UserMenu.tsx` into `frontend/components/auth/` + +4. Wrap your app in AuthProvider (in App.tsx): + ```tsx + import { AuthProvider, useAuth } from './components/auth/AuthProvider' + import { LoginPage } from './components/auth/LoginPage' + import { RegisterPage } from './components/auth/RegisterPage' + + function App() { + return ( + + + + ) + } + + function AuthGate() { + const { isAuthenticated, loading } = useAuth() + const [page, setPage] = useState<'login' | 'register'>('login') + + if (loading) return
Loading...
+ if (!isAuthenticated) { + return page === 'login' + ? setPage('register')} /> + : setPage('login')} /> + } + return + } + ``` + +5. Add UserMenu to your header: + ```tsx + import { UserMenu } from './components/auth/UserMenu' + +
+

My App

+ +
+ ``` + +6. Use `authService.authFetch()` instead of `fetch()` for authenticated API calls: + ```typescript + import { authService } from './services/AuthService' + const resp = await authService.authFetch(`${BACKEND_URL}/api/my-items`) + ``` + +### Tests + +Copy `tests/test_auth.py` into `backend/tests/`. Run: +``` +cd backend && python -m pytest tests/test_auth.py -v +``` + +## Membership — Connecting Users to Resources + +The auth module includes a generic **Membership** system for linking users to app resources +(projects, boards, teams, etc.) and an **Invite** system for shareable join links. + +### How it works + +When a user creates a resource (e.g., a project), also create a Membership: +```python +from auth_models import Membership + +@router.post("/projects") +def create_project(data: ..., user = Depends(get_current_user), db = Depends(get_db)): + project = Project(name=data.name, created_by=user.id) + db.add(project) + db.flush() # Get project.id + + # Make creator the owner + membership = Membership(user_id=user.id, resource_type="project", + resource_id=project.id, role="owner") + db.add(membership) + db.commit() + return project.to_dict() +``` + +### Filtering by membership + +Only show resources the user is a member of: +```python +@router.get("/projects") +def get_my_projects(user = Depends(get_current_user), db = Depends(get_db)): + project_ids = [m.resource_id for m in db.query(Membership).filter_by( + user_id=user.id, resource_type="project" + ).all()] + return db.query(Project).filter(Project.id.in_(project_ids)).all() +``` + +### Protecting routes by membership + +Use `require_membership` to ensure the user belongs to the resource: +```python +from auth_middleware import require_membership + +@router.get("/projects/{project_id}/tasks") +def get_tasks(project_id: int, + member = Depends(require_membership("project")), + db = Depends(get_db)): + # Only runs if user is a member of this project + return db.query(Task).filter_by(project_id=project_id).all() +``` + +### Invite links + +Users can generate invite codes to share: +``` +POST /api/auth/invites → creates invite code for a resource +POST /api/auth/invites/{code}/accept → joins the resource +``` + +## Frontend Components for Membership + +### MemberList — show who's in a resource + +```tsx +import { MemberList } from './components/auth/MemberList' + +// In your project settings or sidebar: + +``` + +### InviteModal — create & accept invite codes + +```tsx +import { InviteModal } from './components/auth/InviteModal' + + setShowInvite(false)} +/> +``` + +The modal has two sections: +- **Create invite** — generates a code the owner can share +- **Join with code** — paste an invite code to join + +### ProfilePage — edit account & change password + +```tsx +import { ProfilePage } from './components/auth/ProfilePage' + +// As a page or modal content: +{showProfile && setShowProfile(false)} />} +``` + +### UserMenu — already includes link to profile + +The `UserMenu` component shows the user dropdown with sign-out. The agent should add +a "Profile" option that opens `ProfilePage`. + +## API Endpoints + +| Method | Path | Auth | Description | +|--------|------|------|-------------| +| POST | /api/auth/register | No | Create account (first user = admin) | +| POST | /api/auth/login | No | Login, returns JWT | +| GET | /api/auth/me | Yes | Get current user | +| PUT | /api/auth/me | Yes | Update profile (username, email) | +| PUT | /api/auth/me/password | Yes | Change password | +| POST | /api/auth/logout | No | Client-side logout | +| GET | /api/auth/users | Admin | List all users | +| GET | /api/auth/members/{type}/{id} | Member | List members of a resource | +| POST | /api/auth/members/{type}/{id} | Owner | Add a member to a resource | +| DELETE | /api/auth/members/{type}/{id}/{uid} | Owner | Remove a member | +| POST | /api/auth/invites | Owner | Create an invite link | +| POST | /api/auth/invites/{code}/accept | Yes | Accept invite and join | diff --git a/app/data/living_ui_modules/auth/auth_types.ts b/app/data/living_ui_modules/auth/auth_types.ts new file mode 100644 index 00000000..42ad071b --- /dev/null +++ b/app/data/living_ui_modules/auth/auth_types.ts @@ -0,0 +1,48 @@ +/** + * Auth TypeScript interfaces. + * + * Copy this file into your project's frontend/ directory. + */ + +export interface AuthUser { + id: number + email: string + username: string + role: 'admin' | 'member' + isActive: boolean + createdAt: string +} + +export interface AuthState { + user: AuthUser | null + token: string | null + isAuthenticated: boolean + loading: boolean +} + +export interface LoginResponse { + user: AuthUser + token: string +} + +export interface MembershipInfo { + id: number + userId: number + resourceType: string + resourceId: number + role: string + joinedAt: string + user: AuthUser | null +} + +export interface InviteInfo { + id: number + code: string + resourceType: string + resourceId: number + defaultRole: string + isActive: boolean + maxUses: number | null + useCount: number + createdAt: string +} diff --git a/app/data/living_ui_modules/auth/backend/auth_middleware.py b/app/data/living_ui_modules/auth/backend/auth_middleware.py new file mode 100644 index 00000000..efecd8ce --- /dev/null +++ b/app/data/living_ui_modules/auth/backend/auth_middleware.py @@ -0,0 +1,105 @@ +""" +Auth Middleware — FastAPI dependencies for protecting routes. + +Copy this file into your project's backend/ directory. + +Usage in routes: + from auth_middleware import get_current_user, require_admin + + @router.get("/my-items") + def get_my_items(user: User = Depends(get_current_user), db: Session = Depends(get_db)): + return db.query(Item).filter(Item.user_id == user.id).all() + + @router.get("/admin/users") + def list_users(user: User = Depends(require_admin), db: Session = Depends(get_db)): + return [u.to_dict() for u in db.query(User).all()] +""" + +from fastapi import Depends, Header, HTTPException +from sqlalchemy.orm import Session + +from auth_models import User, Membership +from auth_service import verify_token +from database import get_db + + +def get_current_user( + authorization: str = Header(None), + db: Session = Depends(get_db), +) -> User: + """FastAPI dependency that extracts and validates the Bearer token.""" + if not authorization or not authorization.startswith("Bearer "): + raise HTTPException(status_code=401, detail="Not authenticated") + + token = authorization.split(" ", 1)[1] + try: + payload = verify_token(token) + except Exception: + raise HTTPException(status_code=401, detail="Invalid or expired token") + + user_id = int(payload.get("sub", 0)) + user = db.query(User).filter(User.id == user_id, User.is_active == True).first() + if not user: + raise HTTPException(status_code=401, detail="User not found") + + return user + + +def require_admin(user: User = Depends(get_current_user)) -> User: + """FastAPI dependency that requires the current user to be an admin.""" + if user.role != "admin": + raise HTTPException(status_code=403, detail="Admin access required") + return user + + +def require_membership(resource_type: str): + """ + Factory that returns a FastAPI dependency requiring membership in a resource. + + The route must have a path parameter matching the resource_id. + + Usage: + @router.get("/projects/{project_id}/tasks") + def get_tasks( + project_id: int, + user: User = Depends(get_current_user), + member: Membership = Depends(require_membership("project")), + db: Session = Depends(get_db), + ): + return db.query(Task).filter_by(project_id=project_id).all() + """ + from fastapi import Request + + def dependency( + request: Request, + user: User = Depends(get_current_user), + db: Session = Depends(get_db), + ) -> Membership: + # Extract resource_id from path params — try common patterns + resource_id = ( + request.path_params.get(f"{resource_type}_id") + or request.path_params.get("resource_id") + or request.path_params.get("id") + ) + if not resource_id: + raise HTTPException(status_code=400, detail=f"Missing {resource_type}_id in path") + + # Global admins bypass membership check + if user.role == "admin": + membership = db.query(Membership).filter_by( + user_id=user.id, resource_type=resource_type, resource_id=int(resource_id) + ).first() + if membership: + return membership + # Admin without membership — create a synthetic one for compatibility + return Membership(user_id=user.id, resource_type=resource_type, + resource_id=int(resource_id), role="admin") + + membership = db.query(Membership).filter_by( + user_id=user.id, resource_type=resource_type, resource_id=int(resource_id) + ).first() + if not membership: + raise HTTPException(status_code=403, detail=f"Not a member of this {resource_type}") + return membership + + return dependency diff --git a/app/data/living_ui_modules/auth/backend/auth_models.py b/app/data/living_ui_modules/auth/backend/auth_models.py new file mode 100644 index 00000000..a680a305 --- /dev/null +++ b/app/data/living_ui_modules/auth/backend/auth_models.py @@ -0,0 +1,140 @@ +""" +Auth Models — User accounts and resource membership for multi-user Living UI apps. + +Copy this file into your project's backend/ directory. +Import in your models.py: + from auth_models import User, Membership # noqa: F401 +""" + +import secrets +from datetime import datetime +from sqlalchemy import Column, Integer, String, Boolean, DateTime, ForeignKey, UniqueConstraint +from sqlalchemy.orm import relationship +from models import Base + + +class User(Base): + __tablename__ = "users" + + id = Column(Integer, primary_key=True) + email = Column(String(255), unique=True, nullable=False, index=True) + username = Column(String(100), unique=True, nullable=False) + password_hash = Column(String(255), nullable=False) + role = Column(String(50), default="member") # "admin" or "member" + is_active = Column(Boolean, default=True) + created_at = Column(DateTime, default=datetime.utcnow) + + memberships = relationship("Membership", back_populates="user", cascade="all, delete-orphan") + + def to_dict(self): + return { + "id": self.id, + "email": self.email, + "username": self.username, + "role": self.role, + "isActive": self.is_active, + "createdAt": self.created_at.isoformat() if self.created_at else None, + } + + +class Membership(Base): + """ + Generic membership — links a user to any app resource (project, board, team, etc.). + + Usage: + # Add user to a project as editor + m = Membership(user_id=1, resource_type="project", resource_id=5, role="editor") + db.add(m) + + # Get all members of a project + members = db.query(Membership).filter_by(resource_type="project", resource_id=5).all() + + # Get all projects a user belongs to + project_ids = db.query(Membership.resource_id).filter_by( + user_id=1, resource_type="project" + ).all() + + # Check if user is a member + is_member = db.query(Membership).filter_by( + user_id=1, resource_type="project", resource_id=5 + ).first() is not None + """ + __tablename__ = "memberships" + __table_args__ = ( + UniqueConstraint("user_id", "resource_type", "resource_id", name="uq_membership"), + ) + + id = Column(Integer, primary_key=True) + user_id = Column(Integer, ForeignKey("users.id"), nullable=False, index=True) + resource_type = Column(String(50), nullable=False) # "project", "board", "team", etc. + resource_id = Column(Integer, nullable=False, index=True) + role = Column(String(50), default="member") # "owner", "admin", "editor", "viewer", "member" + invite_code = Column(String(64), nullable=True) # For pending invites + joined_at = Column(DateTime, default=datetime.utcnow) + + user = relationship("User", back_populates="memberships") + + def to_dict(self): + return { + "id": self.id, + "userId": self.user_id, + "resourceType": self.resource_type, + "resourceId": self.resource_id, + "role": self.role, + "joinedAt": self.joined_at.isoformat() if self.joined_at else None, + "user": self.user.to_dict() if self.user else None, + } + + +class Invite(Base): + """ + Invite links — generate a code that anyone can use to join a resource. + + Usage: + # Create invite link for a project + invite = Invite.create(resource_type="project", resource_id=5, created_by=1) + db.add(invite) + # Share the code: invite.code + + # Accept invite + invite = db.query(Invite).filter_by(code="abc123", is_active=True).first() + membership = Membership(user_id=2, resource_type=invite.resource_type, + resource_id=invite.resource_id, role=invite.default_role) + """ + __tablename__ = "invites" + + id = Column(Integer, primary_key=True) + code = Column(String(64), unique=True, nullable=False, index=True) + resource_type = Column(String(50), nullable=False) + resource_id = Column(Integer, nullable=False) + default_role = Column(String(50), default="member") + created_by = Column(Integer, ForeignKey("users.id"), nullable=False) + is_active = Column(Boolean, default=True) + max_uses = Column(Integer, nullable=True) # None = unlimited + use_count = Column(Integer, default=0) + created_at = Column(DateTime, default=datetime.utcnow) + + @classmethod + def create(cls, resource_type: str, resource_id: int, created_by: int, + default_role: str = "member", max_uses: int = None): + return cls( + code=secrets.token_urlsafe(16), + resource_type=resource_type, + resource_id=resource_id, + created_by=created_by, + default_role=default_role, + max_uses=max_uses, + ) + + def to_dict(self): + return { + "id": self.id, + "code": self.code, + "resourceType": self.resource_type, + "resourceId": self.resource_id, + "defaultRole": self.default_role, + "isActive": self.is_active, + "maxUses": self.max_uses, + "useCount": self.use_count, + "createdAt": self.created_at.isoformat() if self.created_at else None, + } diff --git a/app/data/living_ui_modules/auth/backend/auth_routes.py b/app/data/living_ui_modules/auth/backend/auth_routes.py new file mode 100644 index 00000000..688ebea2 --- /dev/null +++ b/app/data/living_ui_modules/auth/backend/auth_routes.py @@ -0,0 +1,306 @@ +""" +Auth Routes — registration, login, user management endpoints. + +Copy this file into your project's backend/ directory. +Then import and include the router in routes.py: + + from auth_routes import router as auth_router + # ... at the bottom of routes.py: + router.include_router(auth_router) +""" + +from fastapi import APIRouter, Depends, HTTPException +from pydantic import BaseModel, EmailStr +from sqlalchemy.orm import Session + +from auth_models import User, Membership, Invite +from auth_middleware import get_current_user, require_admin +from auth_service import hash_password, verify_password, create_token +from database import get_db + +router = APIRouter(prefix="/auth", tags=["auth"]) + + +class RegisterRequest(BaseModel): + email: str + username: str + password: str + + +class LoginRequest(BaseModel): + email: str + password: str + + +@router.post("/register") +def register(data: RegisterRequest, db: Session = Depends(get_db)): + """Register a new user. First user automatically becomes admin.""" + # Check for existing user + if db.query(User).filter(User.email == data.email).first(): + raise HTTPException(status_code=400, detail="Email already registered") + if db.query(User).filter(User.username == data.username).first(): + raise HTTPException(status_code=400, detail="Username already taken") + + # First user is admin + is_first_user = db.query(User).count() == 0 + role = "admin" if is_first_user else "member" + + user = User( + email=data.email, + username=data.username, + password_hash=hash_password(data.password), + role=role, + ) + db.add(user) + db.commit() + db.refresh(user) + + token = create_token(user.id) + return {"user": user.to_dict(), "token": token} + + +@router.post("/login") +def login(data: LoginRequest, db: Session = Depends(get_db)): + """Login with email and password.""" + user = db.query(User).filter(User.email == data.email).first() + if not user or not verify_password(data.password, user.password_hash): + raise HTTPException(status_code=401, detail="Invalid email or password") + if not user.is_active: + raise HTTPException(status_code=403, detail="Account is deactivated") + + token = create_token(user.id) + return {"user": user.to_dict(), "token": token} + + +@router.get("/me") +def get_me(user: User = Depends(get_current_user)): + """Get the current authenticated user.""" + return {"user": user.to_dict()} + + +@router.post("/logout") +def logout(): + """Logout — client should delete the stored token.""" + return {"message": "Logged out"} + + +@router.get("/users") +def list_users( + user: User = Depends(require_admin), + db: Session = Depends(get_db), +): + """List all users (admin only).""" + users = db.query(User).order_by(User.created_at.desc()).all() + return {"users": [u.to_dict() for u in users]} + + +# ============================================================================ +# Profile — update own account +# ============================================================================ + +class UpdateProfileRequest(BaseModel): + username: str = None + email: str = None + + +@router.put("/me") +def update_profile( + data: UpdateProfileRequest, + user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + """Update current user's profile.""" + if data.email and data.email != user.email: + if db.query(User).filter(User.email == data.email, User.id != user.id).first(): + raise HTTPException(status_code=400, detail="Email already in use") + user.email = data.email + if data.username and data.username != user.username: + if db.query(User).filter(User.username == data.username, User.id != user.id).first(): + raise HTTPException(status_code=400, detail="Username already taken") + user.username = data.username + db.commit() + db.refresh(user) + return {"user": user.to_dict()} + + +class ChangePasswordRequest(BaseModel): + current_password: str + new_password: str + + +@router.put("/me/password") +def change_password( + data: ChangePasswordRequest, + user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + """Change current user's password.""" + if not verify_password(data.current_password, user.password_hash): + raise HTTPException(status_code=400, detail="Current password is incorrect") + if len(data.new_password) < 6: + raise HTTPException(status_code=400, detail="Password must be at least 6 characters") + user.password_hash = hash_password(data.new_password) + db.commit() + return {"message": "Password updated"} + + +# ============================================================================ +# Membership — link users to resources (projects, boards, teams, etc.) +# ============================================================================ + +def _check_membership(db: Session, user: User, resource_type: str, resource_id: int, + required_roles: tuple = None) -> None: + """Verify user has access to a resource. Raises 403 if not. + + Args: + required_roles: If set, user must have one of these roles (e.g., ("owner", "admin")). + If None, any membership is sufficient. + """ + if user.role == "admin": + return # Global admins bypass all checks + membership = db.query(Membership).filter_by( + user_id=user.id, resource_type=resource_type, resource_id=resource_id + ).first() + if not membership: + raise HTTPException(status_code=403, detail="Not a member of this resource") + if required_roles and membership.role not in required_roles: + raise HTTPException(status_code=403, detail=f"Requires role: {' or '.join(required_roles)}") + + +@router.get("/members/{resource_type}/{resource_id}") +def get_members( + resource_type: str, + resource_id: int, + user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + """Get all members of a resource. Caller must be a member.""" + _check_membership(db, user, resource_type, resource_id) + members = db.query(Membership).filter_by( + resource_type=resource_type, resource_id=resource_id + ).all() + return {"members": [m.to_dict() for m in members]} + + +class AddMemberRequest(BaseModel): + user_id: int + role: str = "member" + + +@router.post("/members/{resource_type}/{resource_id}") +def add_member( + resource_type: str, + resource_id: int, + data: AddMemberRequest, + user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + """Add a user to a resource. Caller must be owner/admin of the resource.""" + _check_membership(db, user, resource_type, resource_id, ("owner", "admin")) + + existing = db.query(Membership).filter_by( + user_id=data.user_id, resource_type=resource_type, resource_id=resource_id + ).first() + if existing: + raise HTTPException(status_code=400, detail="User is already a member") + + membership = Membership( + user_id=data.user_id, + resource_type=resource_type, + resource_id=resource_id, + role=data.role, + ) + db.add(membership) + db.commit() + db.refresh(membership) + return {"membership": membership.to_dict()} + + +@router.delete("/members/{resource_type}/{resource_id}/{user_id}") +def remove_member( + resource_type: str, + resource_id: int, + user_id: int, + user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + """Remove a user from a resource. Caller must be owner/admin or removing themselves.""" + if user.id != user_id: + _check_membership(db, user, resource_type, resource_id, ("owner", "admin")) + + membership = db.query(Membership).filter_by( + user_id=user_id, resource_type=resource_type, resource_id=resource_id + ).first() + if not membership: + raise HTTPException(status_code=404, detail="Membership not found") + + db.delete(membership) + db.commit() + return {"message": "Member removed"} + + +# ============================================================================ +# Invites — shareable links to join a resource +# ============================================================================ + +class CreateInviteRequest(BaseModel): + resource_type: str + resource_id: int + default_role: str = "member" + max_uses: int = None + + +@router.post("/invites") +def create_invite( + data: CreateInviteRequest, + user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + """Create an invite link for a resource. Caller must be owner/admin.""" + _check_membership(db, user, data.resource_type, data.resource_id, ("owner", "admin")) + + invite = Invite.create( + resource_type=data.resource_type, + resource_id=data.resource_id, + created_by=user.id, + default_role=data.default_role, + max_uses=data.max_uses, + ) + db.add(invite) + db.commit() + db.refresh(invite) + return {"invite": invite.to_dict()} + + +@router.post("/invites/{code}/accept") +def accept_invite( + code: str, + user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + """Accept an invite and join the resource.""" + invite = db.query(Invite).filter_by(code=code, is_active=True).first() + if not invite: + raise HTTPException(status_code=404, detail="Invite not found or expired") + + if invite.max_uses and invite.use_count >= invite.max_uses: + raise HTTPException(status_code=410, detail="Invite has reached maximum uses") + + # Check if already a member + existing = db.query(Membership).filter_by( + user_id=user.id, resource_type=invite.resource_type, resource_id=invite.resource_id + ).first() + if existing: + return {"membership": existing.to_dict(), "message": "Already a member"} + + membership = Membership( + user_id=user.id, + resource_type=invite.resource_type, + resource_id=invite.resource_id, + role=invite.default_role, + ) + invite.use_count += 1 + db.add(membership) + db.commit() + db.refresh(membership) + return {"membership": membership.to_dict()} diff --git a/app/data/living_ui_modules/auth/backend/auth_service.py b/app/data/living_ui_modules/auth/backend/auth_service.py new file mode 100644 index 00000000..a6639737 --- /dev/null +++ b/app/data/living_ui_modules/auth/backend/auth_service.py @@ -0,0 +1,53 @@ +""" +Auth Service — password hashing and JWT token management. + +Copy this file into your project's backend/ directory. +""" + +import secrets +from datetime import datetime, timedelta +from pathlib import Path + +import bcrypt +import jwt + +# JWT secret stored in a file so it survives restarts but isn't committed +_SECRET_PATH = Path(__file__).parent / ".jwt_secret" +_JWT_ALGORITHM = "HS256" +_TOKEN_EXPIRY_HOURS = 24 + + +def get_or_create_secret() -> str: + """Read JWT secret from file, or generate and save a new one.""" + if _SECRET_PATH.exists(): + return _SECRET_PATH.read_text(encoding="utf-8").strip() + secret = secrets.token_hex(32) + _SECRET_PATH.write_text(secret, encoding="utf-8") + return secret + + +def hash_password(password: str) -> str: + """Hash a password with bcrypt.""" + return bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt()).decode("utf-8") + + +def verify_password(password: str, password_hash: str) -> bool: + """Verify a password against a bcrypt hash.""" + return bcrypt.checkpw(password.encode("utf-8"), password_hash.encode("utf-8")) + + +def create_token(user_id: int, expires_hours: int = _TOKEN_EXPIRY_HOURS) -> str: + """Create a JWT token for a user.""" + secret = get_or_create_secret() + payload = { + "sub": str(user_id), + "exp": datetime.utcnow() + timedelta(hours=expires_hours), + "iat": datetime.utcnow(), + } + return jwt.encode(payload, secret, algorithm=_JWT_ALGORITHM) + + +def verify_token(token: str) -> dict: + """Verify a JWT token. Returns the payload or raises jwt.InvalidTokenError.""" + secret = get_or_create_secret() + return jwt.decode(token, secret, algorithms=[_JWT_ALGORITHM]) diff --git a/app/data/living_ui_modules/auth/backend/tests/test_auth.py b/app/data/living_ui_modules/auth/backend/tests/test_auth.py new file mode 100644 index 00000000..ecb8a7d8 --- /dev/null +++ b/app/data/living_ui_modules/auth/backend/tests/test_auth.py @@ -0,0 +1,167 @@ +""" +Auth Module Tests — validates registration, login, token auth, and admin access. + +Copy this file into your project's backend/tests/ directory. +Run: cd backend && python -m pytest tests/test_auth.py -v +""" + +import pytest +from fastapi.testclient import TestClient +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +from sqlalchemy.pool import StaticPool + +from models import Base +from main import app +from database import get_db + + +# Test database — in-memory SQLite +test_engine = create_engine( + "sqlite://", + connect_args={"check_same_thread": False}, + poolclass=StaticPool, +) +TestSession = sessionmaker(autocommit=False, autoflush=False, bind=test_engine) + + +def override_get_db(): + db = TestSession() + try: + yield db + finally: + db.close() + + +@pytest.fixture(autouse=True) +def setup_db(): + """Create fresh tables for each test.""" + # Import auth models so they're registered with Base + import auth_models # noqa: F401 + Base.metadata.create_all(bind=test_engine) + yield + Base.metadata.drop_all(bind=test_engine) + + +@pytest.fixture +def client(): + app.dependency_overrides[get_db] = override_get_db + with TestClient(app) as c: + yield c + app.dependency_overrides.clear() + + +class TestRegistration: + def test_register_first_user_is_admin(self, client): + resp = client.post("/api/auth/register", json={ + "email": "admin@example.com", + "username": "admin", + "password": "secure123", + }) + assert resp.status_code == 200 + data = resp.json() + assert data["user"]["role"] == "admin" + assert "token" in data + + def test_register_second_user_is_member(self, client): + client.post("/api/auth/register", json={ + "email": "admin@example.com", "username": "admin", "password": "secure123", + }) + resp = client.post("/api/auth/register", json={ + "email": "user@example.com", "username": "user1", "password": "secure123", + }) + assert resp.status_code == 200 + assert resp.json()["user"]["role"] == "member" + + def test_register_duplicate_email(self, client): + client.post("/api/auth/register", json={ + "email": "test@example.com", "username": "user1", "password": "pass123", + }) + resp = client.post("/api/auth/register", json={ + "email": "test@example.com", "username": "user2", "password": "pass123", + }) + assert resp.status_code == 400 + assert "already registered" in resp.json()["detail"] + + def test_register_duplicate_username(self, client): + client.post("/api/auth/register", json={ + "email": "a@example.com", "username": "sameuser", "password": "pass123", + }) + resp = client.post("/api/auth/register", json={ + "email": "b@example.com", "username": "sameuser", "password": "pass123", + }) + assert resp.status_code == 400 + assert "already taken" in resp.json()["detail"] + + +class TestLogin: + def test_login_success(self, client): + client.post("/api/auth/register", json={ + "email": "test@example.com", "username": "testuser", "password": "mypassword", + }) + resp = client.post("/api/auth/login", json={ + "email": "test@example.com", "password": "mypassword", + }) + assert resp.status_code == 200 + assert "token" in resp.json() + + def test_login_wrong_password(self, client): + client.post("/api/auth/register", json={ + "email": "test@example.com", "username": "testuser", "password": "correct", + }) + resp = client.post("/api/auth/login", json={ + "email": "test@example.com", "password": "wrong", + }) + assert resp.status_code == 401 + + def test_login_nonexistent_user(self, client): + resp = client.post("/api/auth/login", json={ + "email": "nobody@example.com", "password": "pass", + }) + assert resp.status_code == 401 + + +class TestAuthenticatedAccess: + def _register_and_get_token(self, client, email="test@example.com"): + resp = client.post("/api/auth/register", json={ + "email": email, "username": email.split("@")[0], "password": "pass123", + }) + return resp.json()["token"] + + def test_get_me(self, client): + token = self._register_and_get_token(client) + resp = client.get("/api/auth/me", headers={"Authorization": f"Bearer {token}"}) + assert resp.status_code == 200 + assert resp.json()["user"]["email"] == "test@example.com" + + def test_get_me_no_token(self, client): + resp = client.get("/api/auth/me") + assert resp.status_code == 401 + + def test_get_me_invalid_token(self, client): + resp = client.get("/api/auth/me", headers={"Authorization": "Bearer invalid"}) + assert resp.status_code == 401 + + +class TestAdminAccess: + def test_admin_can_list_users(self, client): + resp = client.post("/api/auth/register", json={ + "email": "admin@example.com", "username": "admin", "password": "pass123", + }) + token = resp.json()["token"] + resp = client.get("/api/auth/users", headers={"Authorization": f"Bearer {token}"}) + assert resp.status_code == 200 + assert len(resp.json()["users"]) == 1 + + def test_member_cannot_list_users(self, client): + # First user is admin + client.post("/api/auth/register", json={ + "email": "admin@example.com", "username": "admin", "password": "pass123", + }) + # Second user is member + resp = client.post("/api/auth/register", json={ + "email": "member@example.com", "username": "member", "password": "pass123", + }) + token = resp.json()["token"] + resp = client.get("/api/auth/users", headers={"Authorization": f"Bearer {token}"}) + assert resp.status_code == 403 diff --git a/app/data/living_ui_modules/auth/frontend/AuthLayout.tsx b/app/data/living_ui_modules/auth/frontend/AuthLayout.tsx new file mode 100644 index 00000000..9d0414f5 --- /dev/null +++ b/app/data/living_ui_modules/auth/frontend/AuthLayout.tsx @@ -0,0 +1,102 @@ +/** + * Auth Layout — shared wrapper for login, register, and profile pages. + * Also exports FormField for consistent label + input pairs. + * + * Copy this file into your project's frontend/components/auth/ directory. + */ + +import { ReactNode } from 'react' +import { Card, Input, Alert } from '../ui' + +// ── Centered card layout for auth pages ──────────────────────── + +interface AuthLayoutProps { + title: string + children: ReactNode + error?: string + footer?: ReactNode +} + +export function AuthLayout({ title, children, error, footer }: AuthLayoutProps) { + return ( +
+ +

+ {title} +

+ {error && {error}} + {children} + {footer} +
+
+ ) +} + +// ── Label + Input pair ───────────────────────────────────────── + +interface FormFieldProps { + label: string + type?: string + value: string + onChange: (value: string) => void + placeholder?: string + required?: boolean + readOnly?: boolean +} + +const labelStyle: React.CSSProperties = { + display: 'block', fontSize: 'var(--text-sm)', + fontWeight: 'var(--font-weight-medium)' as any, + marginBottom: 'var(--space-1)', color: 'var(--text-secondary)', +} + +export function FormField({ label, type = 'text', value, onChange, placeholder, required, readOnly }: FormFieldProps) { + return ( +
+ + onChange(e.target.value)} + placeholder={placeholder} + required={required} + readOnly={readOnly} + /> +
+ ) +} + +// ── Switch link ("Don't have an account? Sign up") ───────────── + +interface AuthSwitchLinkProps { + text: string + linkText: string + onClick: () => void +} + +export function AuthSwitchLink({ text, linkText, onClick }: AuthSwitchLinkProps) { + return ( +

+ {text}{' '} + +

+ ) +} diff --git a/app/data/living_ui_modules/auth/frontend/AuthProvider.tsx b/app/data/living_ui_modules/auth/frontend/AuthProvider.tsx new file mode 100644 index 00000000..64a624d1 --- /dev/null +++ b/app/data/living_ui_modules/auth/frontend/AuthProvider.tsx @@ -0,0 +1,84 @@ +/** + * Auth Provider — React context for authentication state. + * + * Copy this file into your project's frontend/components/auth/ directory. + * + * Usage in App.tsx: + * import { AuthProvider, useAuth } from './components/auth/AuthProvider' + * + * function App() { + * return ( + * + * + * + * ) + * } + * + * function AppContent() { + * const { user, isAuthenticated, logout } = useAuth() + * if (!isAuthenticated) return + * return + * } + */ + +import { createContext, useContext, useState, useEffect, useCallback, ReactNode } from 'react' +import type { AuthUser, AuthState } from '../../auth_types' +import { authService } from '../../services/AuthService' + +interface AuthContextValue extends AuthState { + login: (email: string, password: string) => Promise + register: (email: string, username: string, password: string) => Promise + logout: () => void +} + +const AuthContext = createContext(null) + +export function useAuth(): AuthContextValue { + const ctx = useContext(AuthContext) + if (!ctx) throw new Error('useAuth must be used within ') + return ctx +} + +export function AuthProvider({ children }: { children: ReactNode }) { + const [state, setState] = useState({ + user: null, + token: authService.getToken(), + isAuthenticated: false, + loading: true, + }) + + // Validate existing token on mount + useEffect(() => { + const validate = async () => { + const user = await authService.getMe() + setState({ + user, + token: authService.getToken(), + isAuthenticated: !!user, + loading: false, + }) + } + validate() + }, []) + + const login = useCallback(async (email: string, password: string) => { + const { user, token } = await authService.login(email, password) + setState({ user, token, isAuthenticated: true, loading: false }) + }, []) + + const register = useCallback(async (email: string, username: string, password: string) => { + const { user, token } = await authService.register(email, username, password) + setState({ user, token, isAuthenticated: true, loading: false }) + }, []) + + const logout = useCallback(() => { + authService.logout() + setState({ user: null, token: null, isAuthenticated: false, loading: false }) + }, []) + + return ( + + {children} + + ) +} diff --git a/app/data/living_ui_modules/auth/frontend/InviteModal.tsx b/app/data/living_ui_modules/auth/frontend/InviteModal.tsx new file mode 100644 index 00000000..15d17a01 --- /dev/null +++ b/app/data/living_ui_modules/auth/frontend/InviteModal.tsx @@ -0,0 +1,141 @@ +/** + * Invite Modal — create and share invite links for a resource. + * + * Copy this file into your project's frontend/components/auth/ directory. + * + * Usage: + * import { InviteModal } from './components/auth/InviteModal' + * setShowInvite(false)} + * /> + */ + +import { useState } from 'react' +import { Button, Input, Alert, Modal } from '../ui' +import { authService } from '../../services/AuthService' + +interface InviteModalProps { + resourceType: string + resourceId: number + isOpen: boolean + onClose: () => void +} + +export function InviteModal({ resourceType, resourceId, isOpen, onClose }: InviteModalProps) { + const [inviteCode, setInviteCode] = useState('') + const [loading, setLoading] = useState(false) + const [error, setError] = useState('') + const [copied, setCopied] = useState(false) + + // Accept invite state + const [joinCode, setJoinCode] = useState('') + const [joining, setJoining] = useState(false) + const [joinSuccess, setJoinSuccess] = useState(false) + + const handleCreateInvite = async () => { + setLoading(true) + setError('') + try { + const invite = await authService.createInvite(resourceType, resourceId) + setInviteCode(invite.code) + } catch (err) { + setError(err instanceof Error ? err.message : 'Failed to create invite') + } finally { + setLoading(false) + } + } + + const handleCopy = () => { + navigator.clipboard.writeText(inviteCode) + setCopied(true) + setTimeout(() => setCopied(false), 2000) + } + + const handleJoin = async () => { + if (!joinCode.trim()) return + setJoining(true) + setError('') + try { + await authService.acceptInvite(joinCode.trim()) + setJoinSuccess(true) + setTimeout(() => { onClose(); setJoinSuccess(false); setJoinCode('') }, 1500) + } catch (err) { + setError(err instanceof Error ? err.message : 'Invalid invite code') + } finally { + setJoining(false) + } + } + + const handleClose = () => { + setInviteCode('') + setError('') + setCopied(false) + setJoinCode('') + setJoinSuccess(false) + onClose() + } + + if (!isOpen) return null + + return ( + +
+ {error && {error}} + + {/* Create Invite Section */} +
+

+ Create Invite Link +

+ {inviteCode ? ( +
+ + +
+ ) : ( + + )} +

+ Share this code with others so they can join. +

+
+ + {/* Divider */} +
+
+ or +
+
+ + {/* Join Section */} +
+

+ Join with Code +

+ {joinSuccess ? ( + Joined successfully! + ) : ( +
+ setJoinCode(e.target.value)} + placeholder="Paste invite code" + style={{ flex: 1 }} + /> + +
+ )} +
+
+ + ) +} diff --git a/app/data/living_ui_modules/auth/frontend/LoginPage.tsx b/app/data/living_ui_modules/auth/frontend/LoginPage.tsx new file mode 100644 index 00000000..7eabd526 --- /dev/null +++ b/app/data/living_ui_modules/auth/frontend/LoginPage.tsx @@ -0,0 +1,49 @@ +/** + * Login Page — email + password form using preset UI components. + * + * Copy this file into your project's frontend/components/auth/ directory. + */ + +import { useState } from 'react' +import { Button } from '../ui' +import { useAuth } from './AuthProvider' +import { AuthLayout, FormField, AuthSwitchLink } from './AuthLayout' + +interface LoginPageProps { + onSwitchToRegister: () => void +} + +export function LoginPage({ onSwitchToRegister }: LoginPageProps) { + const { login } = useAuth() + const [email, setEmail] = useState('') + const [password, setPassword] = useState('') + const [error, setError] = useState('') + const [loading, setLoading] = useState(false) + + const handleSubmit = async (e: React.FormEvent) => { + e.preventDefault() + setError('') + setLoading(true) + try { + await login(email, password) + } catch (err) { + setError(err instanceof Error ? err.message : 'Login failed') + } finally { + setLoading(false) + } + } + + return ( + } + > +
+ + + + +
+ ) +} diff --git a/app/data/living_ui_modules/auth/frontend/MemberList.tsx b/app/data/living_ui_modules/auth/frontend/MemberList.tsx new file mode 100644 index 00000000..64328ac3 --- /dev/null +++ b/app/data/living_ui_modules/auth/frontend/MemberList.tsx @@ -0,0 +1,116 @@ +/** + * Member List — shows members of a resource with role badges and remove button. + * + * Copy this file into your project's frontend/components/auth/ directory. + * + * Usage: + * import { MemberList } from './components/auth/MemberList' + * + */ + +import { useState, useEffect, useCallback } from 'react' +import { Button, Badge, Alert } from '../ui' +import { useAuth } from './AuthProvider' +import { authService } from '../../services/AuthService' +import type { MembershipInfo } from '../../auth_types' + +interface MemberListProps { + resourceType: string + resourceId: number + currentUserRole?: string // caller's role in this resource (for showing remove buttons) +} + +export function MemberList({ resourceType, resourceId, currentUserRole }: MemberListProps) { + const { user } = useAuth() + const [members, setMembers] = useState([]) + const [error, setError] = useState('') + const [removing, setRemoving] = useState(null) + + const canManage = currentUserRole === 'owner' || currentUserRole === 'admin' || user?.role === 'admin' + + const loadMembers = useCallback(async () => { + try { + const data = await authService.getMembers(resourceType, resourceId) + setMembers(data) + } catch { + setError('Failed to load members') + } + }, [resourceType, resourceId]) + + useEffect(() => { loadMembers() }, [loadMembers]) + + const handleRemove = async (userId: number) => { + setRemoving(userId) + try { + await authService.removeMember(resourceType, resourceId, userId) + setMembers(prev => prev.filter(m => m.userId !== userId)) + } catch (err) { + setError(err instanceof Error ? err.message : 'Failed to remove member') + } finally { + setRemoving(null) + } + } + + if (error) return {error} + + return ( +
+ {members.length === 0 ? ( +

No members yet

+ ) : ( + members.map(member => ( +
+ {/* Avatar */} +
+ {member.user?.username?.charAt(0).toUpperCase() || '?'} +
+ + {/* Info */} +
+
+ {member.user?.username || `User #${member.userId}`} + {member.userId === user?.id && ( + (you) + )} +
+
+ {member.user?.email} +
+
+ + {/* Role badge */} + + {member.role} + + + {/* Remove button */} + {canManage && member.role !== 'owner' && member.userId !== user?.id && ( + + )} +
+ )) + )} +
+ ) +} diff --git a/app/data/living_ui_modules/auth/frontend/ProfilePage.tsx b/app/data/living_ui_modules/auth/frontend/ProfilePage.tsx new file mode 100644 index 00000000..6d5a6dab --- /dev/null +++ b/app/data/living_ui_modules/auth/frontend/ProfilePage.tsx @@ -0,0 +1,117 @@ +/** + * Profile Page — edit username, email, and change password. + * + * Copy this file into your project's frontend/components/auth/ directory. + * + * Usage: + * import { ProfilePage } from './components/auth/ProfilePage' + * {showProfile && setShowProfile(false)} />} + */ + +import { useState } from 'react' +import { Button, Card, Alert } from '../ui' +import { useAuth } from './AuthProvider' +import { FormField } from './AuthLayout' +import { authService } from '../../services/AuthService' + +interface ProfilePageProps { + onClose?: () => void +} + +export function ProfilePage({ onClose }: ProfilePageProps) { + const { user, logout } = useAuth() + + const [username, setUsername] = useState(user?.username || '') + const [email, setEmail] = useState(user?.email || '') + const [profileMsg, setProfileMsg] = useState('') + const [profileErr, setProfileErr] = useState('') + const [profileLoading, setProfileLoading] = useState(false) + + const [currentPassword, setCurrentPassword] = useState('') + const [newPassword, setNewPassword] = useState('') + const [confirmPassword, setConfirmPassword] = useState('') + const [passwordMsg, setPasswordMsg] = useState('') + const [passwordErr, setPasswordErr] = useState('') + const [passwordLoading, setPasswordLoading] = useState(false) + + const handleUpdateProfile = async (e: React.FormEvent) => { + e.preventDefault() + setProfileMsg(''); setProfileErr('') + setProfileLoading(true) + try { + await authService.updateProfile({ username, email }) + setProfileMsg('Profile updated') + } catch (err) { + setProfileErr(err instanceof Error ? err.message : 'Update failed') + } finally { + setProfileLoading(false) + } + } + + const handleChangePassword = async (e: React.FormEvent) => { + e.preventDefault() + setPasswordMsg(''); setPasswordErr('') + if (newPassword !== confirmPassword) { setPasswordErr('Passwords do not match'); return } + if (newPassword.length < 6) { setPasswordErr('Password must be at least 6 characters'); return } + setPasswordLoading(true) + try { + await authService.changePassword(currentPassword, newPassword) + setPasswordMsg('Password changed') + setCurrentPassword(''); setNewPassword(''); setConfirmPassword('') + } catch (err) { + setPasswordErr(err instanceof Error ? err.message : 'Password change failed') + } finally { + setPasswordLoading(false) + } + } + + if (!user) return null + + return ( +
+ {onClose && ( +
+

Profile

+ +
+ )} + + +

+ Account Info +

+ {profileMsg && {profileMsg}} + {profileErr && {profileErr}} +
+ + + + +
+ + +

+ Change Password +

+ {passwordMsg && {passwordMsg}} + {passwordErr && {passwordErr}} +
+ + + + + +
+ + +

+ Sign Out +

+

+ You will need to sign in again to access your account. +

+ +
+
+ ) +} diff --git a/app/data/living_ui_modules/auth/frontend/RegisterPage.tsx b/app/data/living_ui_modules/auth/frontend/RegisterPage.tsx new file mode 100644 index 00000000..e6e35096 --- /dev/null +++ b/app/data/living_ui_modules/auth/frontend/RegisterPage.tsx @@ -0,0 +1,63 @@ +/** + * Register Page — email, username, password form using preset UI components. + * + * Copy this file into your project's frontend/components/auth/ directory. + */ + +import { useState } from 'react' +import { Button } from '../ui' +import { useAuth } from './AuthProvider' +import { AuthLayout, FormField, AuthSwitchLink } from './AuthLayout' + +interface RegisterPageProps { + onSwitchToLogin: () => void +} + +export function RegisterPage({ onSwitchToLogin }: RegisterPageProps) { + const { register } = useAuth() + const [email, setEmail] = useState('') + const [username, setUsername] = useState('') + const [password, setPassword] = useState('') + const [confirmPassword, setConfirmPassword] = useState('') + const [error, setError] = useState('') + const [loading, setLoading] = useState(false) + + const handleSubmit = async (e: React.FormEvent) => { + e.preventDefault() + setError('') + + if (password !== confirmPassword) { + setError('Passwords do not match') + return + } + if (password.length < 6) { + setError('Password must be at least 6 characters') + return + } + + setLoading(true) + try { + await register(email, username, password) + } catch (err) { + setError(err instanceof Error ? err.message : 'Registration failed') + } finally { + setLoading(false) + } + } + + return ( + } + > +
+ + + + + + +
+ ) +} diff --git a/app/data/living_ui_modules/auth/frontend/UserMenu.tsx b/app/data/living_ui_modules/auth/frontend/UserMenu.tsx new file mode 100644 index 00000000..3726ca84 --- /dev/null +++ b/app/data/living_ui_modules/auth/frontend/UserMenu.tsx @@ -0,0 +1,97 @@ +/** + * User Menu — dropdown showing current user with logout option. + * + * Copy this file into your project's frontend/components/auth/ directory. + * Place in your app's header/nav bar. + * + * Usage: + * import { UserMenu } from './components/auth/UserMenu' + *
+ *

My App

+ * + *
+ */ + +import { useState, useRef, useEffect } from 'react' +import { useAuth } from './AuthProvider' +import { Badge } from '../ui' + +export function UserMenu() { + const { user, logout } = useAuth() + const [open, setOpen] = useState(false) + const ref = useRef(null) + + // Close on outside click + useEffect(() => { + const handler = (e: MouseEvent) => { + if (ref.current && !ref.current.contains(e.target as Node)) setOpen(false) + } + document.addEventListener('mousedown', handler) + return () => document.removeEventListener('mousedown', handler) + }, []) + + if (!user) return null + + return ( +
+ + + {open && ( +
+
+
+ {user.username} +
+
+ {user.email} +
+ + {user.role} + +
+ +
+ )} +
+ ) +} diff --git a/app/data/living_ui_modules/auth/requirements.txt b/app/data/living_ui_modules/auth/requirements.txt new file mode 100644 index 00000000..c9f6a53d --- /dev/null +++ b/app/data/living_ui_modules/auth/requirements.txt @@ -0,0 +1,2 @@ +bcrypt>=4.0.0 +PyJWT>=2.8.0 diff --git a/app/data/living_ui_sidecar/proxy.py b/app/data/living_ui_sidecar/proxy.py new file mode 100644 index 00000000..eb868997 --- /dev/null +++ b/app/data/living_ui_sidecar/proxy.py @@ -0,0 +1,214 @@ +""" +Living UI Sidecar Proxy + +A lightweight reverse proxy that sits in front of external apps, +injecting Living UI features (console capture, health checks, logging) +without modifying the original app. + +Usage: + python proxy.py --app-port 3109 --proxy-port 3108 + +Architecture: + Browser → This proxy (port 3108) → External app (port 3109) + ↓ + - Injects console/network capture into HTML responses + - Provides /health, /api/logs endpoints + - Captures frontend logs to logs/frontend_console.log + - Forwards everything else transparently +""" + +import argparse +import logging +import sys +from datetime import datetime +from pathlib import Path +from typing import List, Optional + +import httpx +from fastapi import FastAPI, Request, Response +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import JSONResponse +from pydantic import BaseModel + +# Setup logging +LOG_DIR = Path(__file__).parent.parent / "logs" if (Path(__file__).parent.parent / "logs").exists() else Path("logs") +LOG_DIR.mkdir(parents=True, exist_ok=True) + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s | %(levelname)-8s | %(message)s", + handlers=[ + logging.FileHandler(LOG_DIR / "sidecar.log", encoding="utf-8"), + logging.StreamHandler(sys.stderr), + ], +) +logger = logging.getLogger("sidecar") + +# Parse args +parser = argparse.ArgumentParser() +parser.add_argument("--app-port", type=int, required=True, help="Port of the actual app") +parser.add_argument("--proxy-port", type=int, required=True, help="Port for this proxy") +args, _ = parser.parse_known_args() + +APP_URL = f"http://localhost:{args.app_port}" +FRONTEND_LOG_PATH = LOG_DIR / "frontend_console.log" + +# Console capture script to inject into HTML responses +CAPTURE_SCRIPT = """ + +""" + +# FastAPI app +app = FastAPI(title="Living UI Sidecar Proxy") +app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"]) + +http_client = httpx.AsyncClient(base_url=APP_URL, timeout=30, follow_redirects=True) + + +# ── Living UI endpoints (handled by sidecar, not forwarded) ────────── + +@app.get("/health") +async def health(): + """Health check — verifies both sidecar and app are running.""" + try: + resp = await http_client.get("/", timeout=5) + app_ok = resp.status_code < 500 + except Exception: + app_ok = False + return {"status": "healthy" if app_ok else "degraded", "sidecar": "ok", "app": "ok" if app_ok else "down"} + + +class LogEntry(BaseModel): + level: str + message: str + timestamp: Optional[str] = None + + +class LogBatch(BaseModel): + entries: List[LogEntry] + + +@app.post("/api/logs") +async def capture_logs(data: LogBatch): + """Receive frontend console logs from the injected capture script.""" + with open(FRONTEND_LOG_PATH, "a", encoding="utf-8") as f: + for entry in data.entries: + ts = entry.timestamp or datetime.utcnow().isoformat() + f.write(f"{ts} | {entry.level.upper():<7} | {entry.message}\n") + return {"status": "ok", "count": len(data.entries)} + + +# ── Reverse proxy (forwards everything else to the app) ────────────── + +@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD"]) +async def proxy(request: Request, path: str): + """Forward all requests to the actual app, inject capture script into HTML responses.""" + # Build the proxied URL + url = f"/{path}" + if request.url.query: + url += f"?{request.url.query}" + + # Forward headers (skip host) + headers = dict(request.headers) + headers.pop("host", None) + + try: + body = await request.body() + resp = await http_client.request( + method=request.method, + url=url, + headers=headers, + content=body if body else None, + ) + except httpx.ConnectError: + return JSONResponse({"error": "App not responding"}, status_code=502) + except Exception as e: + return JSONResponse({"error": str(e)}, status_code=502) + + # Check if response is HTML — inject capture script + content_type = resp.headers.get("content-type", "") + response_body = resp.content + + if "text/html" in content_type: + html = response_body.decode("utf-8", errors="replace") + # Inject capture script before or at end + if "" in html.lower(): + idx = html.lower().rfind("") + html = html[:idx] + CAPTURE_SCRIPT + html[idx:] + else: + html += CAPTURE_SCRIPT + response_body = html.encode("utf-8") + + # Build response with original headers + response_headers = dict(resp.headers) + response_headers.pop("content-length", None) # Will be recalculated + response_headers.pop("content-encoding", None) # We may have modified the content + response_headers.pop("transfer-encoding", None) + + return Response( + content=response_body, + status_code=resp.status_code, + headers=response_headers, + ) + + +if __name__ == "__main__": + import uvicorn + logger.info(f"Starting sidecar proxy: localhost:{args.proxy_port} → localhost:{args.app_port}") + uvicorn.run(app, host="0.0.0.0", port=args.proxy_port, log_level="warning") diff --git a/app/data/living_ui_sidecar/requirements.txt b/app/data/living_ui_sidecar/requirements.txt new file mode 100644 index 00000000..609f6748 --- /dev/null +++ b/app/data/living_ui_sidecar/requirements.txt @@ -0,0 +1,3 @@ +fastapi>=0.104.0 +uvicorn>=0.24.0 +httpx>=0.24.0 diff --git a/app/data/living_ui_template/.env.example b/app/data/living_ui_template/.env.example new file mode 100644 index 00000000..3bf1d1ec --- /dev/null +++ b/app/data/living_ui_template/.env.example @@ -0,0 +1,10 @@ +# Living UI Environment Variables + +# CraftBot WebSocket URL for agent communication +VITE_CRAFTBOT_WS_URL=ws://localhost:7926 + +# Backend API URL (if using Python backend) +VITE_API_URL=http://localhost:{{BACKEND_PORT}} + +# Add your API keys and secrets below +# VITE_API_KEY=your_api_key_here diff --git a/app/data/living_ui_template/LIVING_UI.md b/app/data/living_ui_template/LIVING_UI.md new file mode 100644 index 00000000..3ef7acb5 --- /dev/null +++ b/app/data/living_ui_template/LIVING_UI.md @@ -0,0 +1,80 @@ +# {{PROJECT_NAME}} + +{{PROJECT_DESCRIPTION}} + +## Overview + + + +## Requirements + + + +### Entities & Data Model + + +### Layout & Design + + +### Features + + +### Assumptions + + +## Data Model + +### Backend Models (backend/models.py) + + + +| Model | Purpose | Key Fields | +|-------|---------|------------| +| Example | Description | field1, field2 | + +## API Endpoints + +### Custom Routes (backend/routes.py) + + + +| Method | Path | Description | +|--------|------|-------------| +| GET | /example | Description | +| POST | /example | Description | + +## Frontend Components + +### Components (frontend/components/) + + + +| Component | Purpose | +|-----------|---------| +| MainView.tsx | Main UI layout | + +## Key Files + +| File | Purpose | +|------|---------| +| backend/models.py | Database models | +| backend/routes.py | API endpoints | +| frontend/types.ts | TypeScript interfaces | +| frontend/AppController.ts | State management | +| frontend/components/MainView.tsx | Main UI | + +## State Flow + +``` +User Action → Frontend Component → AppController → Backend API → SQLite DB + ↓ + Update UI State +``` + +## Testing + + + +1. Create a new item +2. Refresh the page +3. Verify item persists diff --git a/app/data/living_ui_template/backend/database.py b/app/data/living_ui_template/backend/database.py new file mode 100644 index 00000000..06b608f1 --- /dev/null +++ b/app/data/living_ui_template/backend/database.py @@ -0,0 +1,72 @@ +""" +Living UI Database Configuration + +SQLite database setup for persistent state storage. +Uses synchronous SQLite with SQLAlchemy for simplicity and reliability. +""" + +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker, Session +from models import Base +from pathlib import Path +import logging + +logger = logging.getLogger(__name__) + +# Database file stored in the project directory +DATABASE_PATH = Path(__file__).parent / "living_ui.db" +DATABASE_URL = f"sqlite:///{DATABASE_PATH}" + +# Create engine with check_same_thread=False for FastAPI compatibility +engine = create_engine( + DATABASE_URL, + connect_args={"check_same_thread": False}, + echo=False, # Set to True for SQL debugging +) + +# Enable WAL mode for better concurrent read/write performance (multi-user) +from sqlalchemy import event + +@event.listens_for(engine, "connect") +def _set_sqlite_pragma(dbapi_connection, connection_record): + cursor = dbapi_connection.cursor() + cursor.execute("PRAGMA journal_mode=WAL") + cursor.close() + +# Session factory +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + + +async def init_db(): + """Initialize database tables.""" + logger.info(f"[Database] Creating tables at {DATABASE_PATH}") + Base.metadata.create_all(bind=engine) + + # Ensure default app state exists + from models import AppState + db = SessionLocal() + try: + state = db.query(AppState).first() + if not state: + state = AppState() + db.add(state) + db.commit() + logger.info("[Database] Created default app state") + finally: + db.close() + + +def get_db(): + """ + Dependency to get database session. + + Usage in routes: + @router.get("/items") + def get_items(db: Session = Depends(get_db)): + return db.query(Item).all() + """ + db = SessionLocal() + try: + yield db + finally: + db.close() diff --git a/app/data/living_ui_template/backend/health_checker.py b/app/data/living_ui_template/backend/health_checker.py new file mode 100644 index 00000000..ba7dac20 --- /dev/null +++ b/app/data/living_ui_template/backend/health_checker.py @@ -0,0 +1,159 @@ +""" +Living UI Backend Health Checker + +Background thread that periodically verifies the backend is healthy. +Checks both the HTTP health endpoint and database connectivity. +Writes status to logs/health_status.json for the manager watchdog to read. +Self-terminates if too many consecutive failures occur. +""" + +import json +import logging +import os +import threading +import time +import urllib.request +from datetime import datetime +from pathlib import Path + +logger = logging.getLogger(__name__) + +LOG_DIR = Path(__file__).parent / "logs" + +_checker_thread: threading.Thread | None = None +_stop_event = threading.Event() + +# Number of consecutive failures before self-terminating +MAX_CONSECUTIVE_FAILURES = 5 +CHECK_INTERVAL_SECONDS = 60 +HEALTH_STATUS_FILE = LOG_DIR / "health_status.json" + + +def _write_status( + health_ok: bool, + db_ok: bool, + consecutive_failures: int, + error: str | None = None, +): + """Write current health status to JSON file for external monitoring.""" + LOG_DIR.mkdir(parents=True, exist_ok=True) + status = { + "last_check": datetime.now().isoformat(), + "health_endpoint": "ok" if health_ok else "fail", + "db_connectivity": "ok" if db_ok else "fail", + "consecutive_failures": consecutive_failures, + "error": error, + } + try: + HEALTH_STATUS_FILE.write_text( + json.dumps(status, indent=2), encoding="utf-8" + ) + except Exception as e: + logger.warning(f"[HealthChecker] Failed to write status file: {e}") + + +def _check_health_endpoint(port: int) -> bool: + """Hit the local /health endpoint.""" + try: + url = f"http://localhost:{port}/health" + resp = urllib.request.urlopen(url, timeout=5) + return resp.status == 200 + except Exception: + return False + + +def _check_db() -> bool: + """Verify database connectivity with a simple query.""" + try: + from sqlalchemy import text + from database import engine + + with engine.connect() as conn: + conn.execute(text("SELECT 1")) + return True + except Exception: + return False + + +def _run_checker(port: int): + """Main checker loop running in a background thread.""" + consecutive_failures = 0 + + # Wait a bit before first check to let the server fully start + if _stop_event.wait(timeout=15): + return + + logger.info( + f"[HealthChecker] Started - checking every {CHECK_INTERVAL_SECONDS}s " + f"(max {MAX_CONSECUTIVE_FAILURES} consecutive failures before exit)" + ) + + while not _stop_event.is_set(): + health_ok = _check_health_endpoint(port) + db_ok = _check_db() + + if health_ok and db_ok: + if consecutive_failures > 0: + logger.info( + f"[HealthChecker] Recovered after {consecutive_failures} failure(s)" + ) + consecutive_failures = 0 + _write_status(health_ok, db_ok, consecutive_failures) + else: + consecutive_failures += 1 + error_parts = [] + if not health_ok: + error_parts.append("health endpoint not responding") + if not db_ok: + error_parts.append("database connectivity failed") + error_msg = "; ".join(error_parts) + + logger.warning( + f"[HealthChecker] Check failed ({consecutive_failures}/{MAX_CONSECUTIVE_FAILURES}): {error_msg}" + ) + _write_status(health_ok, db_ok, consecutive_failures, error=error_msg) + + if consecutive_failures >= MAX_CONSECUTIVE_FAILURES: + logger.critical( + f"[HealthChecker] {MAX_CONSECUTIVE_FAILURES} consecutive failures - " + f"self-terminating. Last error: {error_msg}" + ) + _write_status( + health_ok, + db_ok, + consecutive_failures, + error=f"SELF-TERMINATED: {error_msg}", + ) + # Hard exit so the manager watchdog detects the crash and can restart + os._exit(1) + + _stop_event.wait(timeout=CHECK_INTERVAL_SECONDS) + + +def start_health_checker(port: int): + """Start the background health checker thread.""" + global _checker_thread + + if _checker_thread is not None and _checker_thread.is_alive(): + logger.warning("[HealthChecker] Already running") + return + + _stop_event.clear() + _checker_thread = threading.Thread( + target=_run_checker, args=(port,), daemon=True, name="health-checker" + ) + _checker_thread.start() + logger.info(f"[HealthChecker] Starting for port {port}") + + +def stop_health_checker(): + """Stop the background health checker thread.""" + global _checker_thread + + if _checker_thread is None: + return + + _stop_event.set() + _checker_thread.join(timeout=5) + _checker_thread = None + logger.info("[HealthChecker] Stopped") diff --git a/app/data/living_ui_template/backend/logger.py b/app/data/living_ui_template/backend/logger.py new file mode 100644 index 00000000..cd6608c2 --- /dev/null +++ b/app/data/living_ui_template/backend/logger.py @@ -0,0 +1,76 @@ +""" +Living UI Backend Logger + +Persistent file-based logging for Living UI backend. +Logs are written to the project's logs/ directory with automatic rotation. +Each session (server start) creates a new log file, old logs are retained. +""" + +import logging +import os +import sys +from datetime import datetime +from pathlib import Path + +# Log directory lives inside the project's backend folder +LOG_DIR = Path(__file__).parent / "logs" +LOG_DIR.mkdir(parents=True, exist_ok=True) + + +def setup_logging() -> logging.Logger: + """ + Configure persistent file-based logging for the backend. + + Creates a timestamped log file per session so each server run + is independently traceable. Also logs to stderr for subprocess capture. + + Returns: + The root logger, configured with file + stream handlers. + """ + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + log_file = LOG_DIR / f"backend_{timestamp}.log" + + formatter = logging.Formatter( + "%(asctime)s | %(levelname)-8s | %(name)s:%(funcName)s:%(lineno)d - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + + # File handler - captures everything (DEBUG+) + file_handler = logging.FileHandler(log_file, encoding="utf-8") + file_handler.setLevel(logging.DEBUG) + file_handler.setFormatter(formatter) + + # Stream handler - INFO+ to stderr (captured by manager subprocess pipes) + stream_handler = logging.StreamHandler(sys.stderr) + stream_handler.setLevel(logging.INFO) + stream_handler.setFormatter(formatter) + + # Configure root logger + root_logger = logging.getLogger() + root_logger.setLevel(logging.DEBUG) + root_logger.addHandler(file_handler) + root_logger.addHandler(stream_handler) + + # Also capture uvicorn logs into the same file + for uvi_logger_name in ("uvicorn", "uvicorn.access", "uvicorn.error"): + uvi_logger = logging.getLogger(uvi_logger_name) + uvi_logger.handlers = [] # Remove default handlers + uvi_logger.addHandler(file_handler) + uvi_logger.addHandler(stream_handler) + uvi_logger.propagate = False + + root_logger.info(f"[Logger] Session log started: {log_file}") + root_logger.info(f"[Logger] Python {sys.version}") + root_logger.info(f"[Logger] CWD: {os.getcwd()}") + + return root_logger + + +def cleanup_old_logs(keep: int = 20): + """Remove old log files, keeping the most recent `keep` files.""" + log_files = sorted(LOG_DIR.glob("backend_*.log"), reverse=True) + for old_log in log_files[keep:]: + try: + old_log.unlink() + except Exception: + pass diff --git a/app/data/living_ui_template/backend/main.py b/app/data/living_ui_template/backend/main.py new file mode 100644 index 00000000..14981971 --- /dev/null +++ b/app/data/living_ui_template/backend/main.py @@ -0,0 +1,134 @@ +""" +Living UI Python Backend + +FastAPI backend for Living UI projects. +Provides REST API for state management and data persistence. + +To run manually: + uvicorn main:app --port {{BACKEND_PORT}} --reload +""" + +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware +from contextlib import asynccontextmanager +from routes import router +from database import init_db +from logger import setup_logging, cleanup_old_logs +from pathlib import Path +import logging + +# Initialize persistent file-based logging before anything else +setup_logging() +cleanup_old_logs(keep=20) +logger = logging.getLogger(__name__) + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """Initialize database on startup.""" + logger.info("[Backend] Initializing database...") + await init_db() + logger.info("[Backend] Database initialized") + yield + logger.info("[Backend] Shutting down...") + + +app = FastAPI( + title="{{PROJECT_NAME}} API", + description="Backend API for {{PROJECT_NAME}} Living UI", + version="1.0.0", + lifespan=lifespan, +) + +# CORS configuration for frontend +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Include routes +app.include_router(router, prefix="/api") + +# Auto-include additional routers from routes/ directory (if any) +import importlib, pkgutil +_routes_dir = Path(__file__).parent / "routes" +if _routes_dir.exists() and (_routes_dir / "__init__.py").exists(): + for _imp, _mod, _pkg in pkgutil.iter_modules([str(_routes_dir)]): + _m = importlib.import_module(f"routes.{_mod}") + if hasattr(_m, 'router'): + app.include_router(_m.router, prefix="/api") + + +@app.get("/health") +async def health_check(): + """Health check endpoint for process management.""" + return {"status": "healthy", "project": "{{PROJECT_ID}}"} + + +# ============================================================================ +# Frontend Console Log Capture (registered on app directly, not on router, +# so it survives agent rewrites of routes.py) +# ============================================================================ +from pydantic import BaseModel +from typing import List, Optional +from datetime import datetime + +_FRONTEND_LOG_PATH = Path(__file__).parent / "logs" / "frontend_console.log" + + +class _FrontendLogEntry(BaseModel): + level: str + message: str + timestamp: Optional[str] = None + + +class _FrontendLogBatch(BaseModel): + entries: List[_FrontendLogEntry] + + +@app.post("/api/logs") +async def capture_frontend_logs(data: _FrontendLogBatch): + """Capture frontend console logs for agent debugging.""" + _FRONTEND_LOG_PATH.parent.mkdir(parents=True, exist_ok=True) + with open(_FRONTEND_LOG_PATH, "a", encoding="utf-8") as f: + for entry in data.entries: + ts = entry.timestamp or datetime.utcnow().isoformat() + f.write(f"{ts} | {entry.level.upper():<5} | {entry.message}\n") + return {"status": "ok", "count": len(data.entries)} + + +# ============================================================================ +# Serve frontend static files (built by Vite) — enables single-port access +# for LAN/tunnel sharing. Must be registered LAST (catch-all). +# ============================================================================ +from fastapi.staticfiles import StaticFiles +from fastapi.responses import FileResponse + +_DIST_DIR = Path(__file__).parent.parent / "dist" +_DIST_ASSETS = _DIST_DIR / "assets" +if _DIST_DIR.exists() and _DIST_ASSETS.exists(): + _CONFIG_DIR = Path(__file__).parent.parent / "config" + + @app.get("/config/manifest.json") + async def serve_manifest(): + manifest = _CONFIG_DIR / "manifest.json" + if manifest.exists(): + return FileResponse(manifest) + return {"error": "manifest not found"} + + app.mount("/assets", StaticFiles(directory=str(_DIST_ASSETS)), name="assets") + + @app.get("/{path:path}") + async def spa_fallback(path: str): + file_path = _DIST_DIR / path + if file_path.is_file(): + return FileResponse(file_path) + return FileResponse(_DIST_DIR / "index.html") + + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port={{BACKEND_PORT}}) diff --git a/app/data/living_ui_template/backend/models.py b/app/data/living_ui_template/backend/models.py new file mode 100644 index 00000000..a62c581c --- /dev/null +++ b/app/data/living_ui_template/backend/models.py @@ -0,0 +1,135 @@ +""" +Living UI Data Models + +SQLAlchemy models for data persistence. +Includes a flexible AppState model for storing arbitrary JSON state, +plus example Item model for reference. +""" + +from sqlalchemy import Column, Integer, String, DateTime, Boolean, Text, JSON +from sqlalchemy.ext.declarative import declarative_base +from datetime import datetime +from typing import Dict, Any + +Base = declarative_base() + + +class AppState(Base): + """ + Flexible application state storage. + + Stores the entire app state as JSON, allowing any structure. + This is the primary model used by the default state management. + + The agent should extend this with custom models for complex data needs. + """ + __tablename__ = "app_state" + + id = Column(Integer, primary_key=True, default=1) + data = Column(JSON, default=dict) # Stores arbitrary state as JSON + created_at = Column(DateTime, default=datetime.utcnow) + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for API response.""" + return { + "id": self.id, + "data": self.data or {}, + "createdAt": self.created_at.isoformat() if self.created_at else None, + "updatedAt": self.updated_at.isoformat() if self.updated_at else None, + } + + def update_data(self, updates: Dict[str, Any]) -> None: + """Merge updates into existing data.""" + current = self.data or {} + current.update(updates) + self.data = current + self.updated_at = datetime.utcnow() + + +# ============================================================================ +# Example models for reference - Agent should customize these +# ============================================================================ + +class UISnapshot(Base): + """ + UI state snapshot for agent observation. + + Frontend periodically posts UI state here. + Agent can GET this to observe the UI without WebSocket. + """ + __tablename__ = "ui_snapshot" + + id = Column(Integer, primary_key=True, default=1) + html_structure = Column(Text, nullable=True) # Simplified DOM structure + visible_text = Column(JSON, default=list) # Array of visible text content + input_values = Column(JSON, default=dict) # Form field values + component_state = Column(JSON, default=dict) # Registered component states + current_view = Column(String(255), nullable=True) # Current route/view + viewport = Column(JSON, default=dict) # Window dimensions, scroll position + timestamp = Column(DateTime, default=datetime.utcnow) + + def to_dict(self) -> Dict[str, Any]: + return { + "htmlStructure": self.html_structure, + "visibleText": self.visible_text or [], + "inputValues": self.input_values or {}, + "componentState": self.component_state or {}, + "currentView": self.current_view, + "viewport": self.viewport or {}, + "timestamp": self.timestamp.isoformat() if self.timestamp else None, + } + + +class UIScreenshot(Base): + """ + UI screenshot for agent visual observation. + + Frontend captures and posts screenshot here. + Agent can GET this to see the UI visually. + """ + __tablename__ = "ui_screenshot" + + id = Column(Integer, primary_key=True, default=1) + image_data = Column(Text, nullable=True) # Base64 encoded PNG + width = Column(Integer, nullable=True) + height = Column(Integer, nullable=True) + timestamp = Column(DateTime, default=datetime.utcnow) + + def to_dict(self) -> Dict[str, Any]: + return { + "imageData": self.image_data, + "width": self.width, + "height": self.height, + "timestamp": self.timestamp.isoformat() if self.timestamp else None, + } + + +class Item(Base): + """ + Example model for list-based data (todos, notes, etc.) + + Customize or replace this model based on your Living UI needs. + """ + __tablename__ = "items" + + id = Column(Integer, primary_key=True, index=True) + title = Column(String(255), nullable=False) + description = Column(Text, nullable=True) + completed = Column(Boolean, default=False) + order = Column(Integer, default=0) + extra_data = Column(JSON, default=dict) # Flexible extra data (avoid 'metadata' - reserved in SQLAlchemy) + created_at = Column(DateTime, default=datetime.utcnow) + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + + def to_dict(self) -> Dict[str, Any]: + return { + "id": self.id, + "title": self.title, + "description": self.description, + "completed": self.completed, + "order": self.order, + "extraData": self.extra_data or {}, + "createdAt": self.created_at.isoformat() if self.created_at else None, + "updatedAt": self.updated_at.isoformat() if self.updated_at else None, + } diff --git a/app/data/living_ui_template/backend/requirements.txt b/app/data/living_ui_template/backend/requirements.txt new file mode 100644 index 00000000..a850540e --- /dev/null +++ b/app/data/living_ui_template/backend/requirements.txt @@ -0,0 +1,7 @@ +# Living UI Backend Dependencies +fastapi>=0.104.0 +uvicorn>=0.24.0 +sqlalchemy>=2.0.0 +pydantic>=2.0.0 +pytest>=7.0.0 +httpx>=0.24.0 diff --git a/app/data/living_ui_template/backend/routes.py b/app/data/living_ui_template/backend/routes.py new file mode 100644 index 00000000..7bd9ecdb --- /dev/null +++ b/app/data/living_ui_template/backend/routes.py @@ -0,0 +1,401 @@ +""" +Living UI API Routes + +REST API endpoints for state management and data operations. +Provides both generic state storage and example CRUD operations. +""" + +from fastapi import APIRouter, Depends, HTTPException +from sqlalchemy.orm import Session +from pydantic import BaseModel +from typing import Dict, Any, List, Optional +from database import get_db +from models import AppState, Item, UISnapshot, UIScreenshot +from datetime import datetime +import logging +import base64 + +logger = logging.getLogger(__name__) +router = APIRouter() + + +# ============================================================================ +# Pydantic Schemas +# ============================================================================ + +class StateUpdate(BaseModel): + """Schema for updating app state.""" + data: Dict[str, Any] + + +class ActionRequest(BaseModel): + """Schema for executing an action.""" + action: str + payload: Optional[Dict[str, Any]] = None + + +class ItemCreate(BaseModel): + """Schema for creating an item.""" + title: str + description: Optional[str] = None + extra_data: Optional[Dict[str, Any]] = None + + +class ItemUpdate(BaseModel): + """Schema for updating an item.""" + title: Optional[str] = None + description: Optional[str] = None + completed: Optional[bool] = None + order: Optional[int] = None + extra_data: Optional[Dict[str, Any]] = None + + +class UISnapshotUpdate(BaseModel): + """Schema for updating UI snapshot.""" + htmlStructure: Optional[str] = None + visibleText: Optional[List[str]] = None + inputValues: Optional[Dict[str, Any]] = None + componentState: Optional[Dict[str, Any]] = None + currentView: Optional[str] = None + viewport: Optional[Dict[str, Any]] = None + + +class UIScreenshotUpdate(BaseModel): + """Schema for updating UI screenshot.""" + imageData: str # Base64 encoded PNG + width: Optional[int] = None + height: Optional[int] = None + + +# ============================================================================ +# State Management Routes (Primary API) +# ============================================================================ + +@router.get("/state") +def get_state(db: Session = Depends(get_db)) -> Dict[str, Any]: + """ + Get the current application state. + + Returns the stored state data, or empty dict if no state exists. + Frontend calls this on mount to restore state. + """ + state = db.query(AppState).first() + if not state: + state = AppState(data={}) + db.add(state) + db.commit() + db.refresh(state) + return state.data or {} + + +@router.put("/state") +def update_state(update: StateUpdate, db: Session = Depends(get_db)) -> Dict[str, Any]: + """ + Update the application state. + + Merges the provided data with existing state. + Returns the complete updated state. + """ + state = db.query(AppState).first() + if not state: + state = AppState(data=update.data) + db.add(state) + else: + state.update_data(update.data) + db.commit() + db.refresh(state) + logger.info(f"[Routes] State updated: {list(update.data.keys())}") + return state.data or {} + + +@router.post("/state/replace") +def replace_state(update: StateUpdate, db: Session = Depends(get_db)) -> Dict[str, Any]: + """ + Replace the entire application state. + + Unlike PUT /state which merges, this completely replaces the state. + Use with caution. + """ + state = db.query(AppState).first() + if not state: + state = AppState(data=update.data) + db.add(state) + else: + state.data = update.data + db.commit() + db.refresh(state) + logger.info("[Routes] State replaced") + return state.data or {} + + +@router.delete("/state") +def clear_state(db: Session = Depends(get_db)) -> Dict[str, str]: + """ + Clear all application state. + + Resets state to empty dict. + """ + state = db.query(AppState).first() + if state: + state.data = {} + db.commit() + logger.info("[Routes] State cleared") + return {"status": "cleared"} + + +@router.post("/action") +def execute_action(request: ActionRequest, db: Session = Depends(get_db)) -> Dict[str, Any]: + """ + Execute a named action. + + This is a generic endpoint for custom actions. + The agent should customize this based on the Living UI's needs. + + Example actions: + - {"action": "reset"} - Reset to initial state + - {"action": "increment", "payload": {"key": "counter"}} + """ + action = request.action + payload = request.payload or {} + + logger.info(f"[Routes] Executing action: {action}") + + # Get current state + state = db.query(AppState).first() + if not state: + state = AppState(data={}) + db.add(state) + + current_data = state.data or {} + + # Handle built-in actions + if action == "reset": + state.data = {} + db.commit() + return {"status": "reset", "data": {}} + + elif action == "increment": + key = payload.get("key", "counter") + current_data[key] = current_data.get(key, 0) + 1 + state.data = current_data + db.commit() + return {"status": "incremented", "data": current_data} + + elif action == "decrement": + key = payload.get("key", "counter") + current_data[key] = current_data.get(key, 0) - 1 + state.data = current_data + db.commit() + return {"status": "decremented", "data": current_data} + + # Custom actions should be added here by the agent + # Example: + # elif action == "feed_pet": + # current_data["pet"]["hunger"] = min(100, current_data.get("pet", {}).get("hunger", 50) + 25) + # state.data = current_data + # db.commit() + # return {"status": "fed", "data": current_data} + + else: + # Unknown action - return current state without changes + logger.warning(f"[Routes] Unknown action: {action}") + return {"status": "unknown_action", "action": action, "data": current_data} + + +# ============================================================================ +# Item CRUD Routes (Example for list-based data) +# ============================================================================ + +@router.get("/items") +def list_items(db: Session = Depends(get_db)) -> List[Dict[str, Any]]: + """Get all items, ordered by their order field.""" + items = db.query(Item).order_by(Item.order, Item.id).all() + return [item.to_dict() for item in items] + + +@router.post("/items") +def create_item(data: ItemCreate, db: Session = Depends(get_db)) -> Dict[str, Any]: + """Create a new item.""" + # Get max order to put new item at end + max_order = db.query(Item).count() + item = Item( + title=data.title, + description=data.description, + extra_data=data.extra_data or {}, + order=max_order, + ) + db.add(item) + db.commit() + db.refresh(item) + logger.info(f"[Routes] Created item: {item.id}") + return item.to_dict() + + +@router.get("/items/{item_id}") +def get_item(item_id: int, db: Session = Depends(get_db)) -> Dict[str, Any]: + """Get a specific item by ID.""" + item = db.query(Item).filter(Item.id == item_id).first() + if not item: + raise HTTPException(status_code=404, detail="Item not found") + return item.to_dict() + + +@router.put("/items/{item_id}") +def update_item(item_id: int, data: ItemUpdate, db: Session = Depends(get_db)) -> Dict[str, Any]: + """Update an existing item.""" + item = db.query(Item).filter(Item.id == item_id).first() + if not item: + raise HTTPException(status_code=404, detail="Item not found") + + if data.title is not None: + item.title = data.title + if data.description is not None: + item.description = data.description + if data.completed is not None: + item.completed = data.completed + if data.order is not None: + item.order = data.order + if data.extra_data is not None: + item.extra_data = data.extra_data + + db.commit() + db.refresh(item) + logger.info(f"[Routes] Updated item: {item_id}") + return item.to_dict() + + +@router.delete("/items/{item_id}") +def delete_item(item_id: int, db: Session = Depends(get_db)) -> Dict[str, str]: + """Delete an item.""" + item = db.query(Item).filter(Item.id == item_id).first() + if not item: + raise HTTPException(status_code=404, detail="Item not found") + + db.delete(item) + db.commit() + logger.info(f"[Routes] Deleted item: {item_id}") + return {"status": "deleted", "id": str(item_id)} + + +# ============================================================================ +# UI Observation Routes (Agent API) +# ============================================================================ + +@router.get("/ui-snapshot") +def get_ui_snapshot(db: Session = Depends(get_db)) -> Dict[str, Any]: + """ + Get the current UI snapshot. + + Returns the latest UI state captured by the frontend. + Agent uses this to observe the UI without WebSocket. + + Response includes: + - htmlStructure: Simplified DOM structure + - visibleText: Array of visible text on screen + - inputValues: Current form field values + - componentState: State of registered components + - currentView: Current route/view + - viewport: Window dimensions and scroll position + - timestamp: When the snapshot was captured + """ + snapshot = db.query(UISnapshot).first() + if not snapshot: + return { + "htmlStructure": None, + "visibleText": [], + "inputValues": {}, + "componentState": {}, + "currentView": None, + "viewport": {}, + "timestamp": None, + "status": "no_snapshot" + } + return snapshot.to_dict() + + +@router.post("/ui-snapshot") +def update_ui_snapshot(data: UISnapshotUpdate, db: Session = Depends(get_db)) -> Dict[str, Any]: + """ + Update the UI snapshot. + + Frontend calls this periodically to report UI state. + This replaces WebSocket-based state reporting. + """ + snapshot = db.query(UISnapshot).first() + if not snapshot: + snapshot = UISnapshot() + db.add(snapshot) + + if data.htmlStructure is not None: + snapshot.html_structure = data.htmlStructure + if data.visibleText is not None: + snapshot.visible_text = data.visibleText + if data.inputValues is not None: + snapshot.input_values = data.inputValues + if data.componentState is not None: + snapshot.component_state = data.componentState + if data.currentView is not None: + snapshot.current_view = data.currentView + if data.viewport is not None: + snapshot.viewport = data.viewport + + snapshot.timestamp = datetime.utcnow() + + db.commit() + db.refresh(snapshot) + logger.info("[Routes] UI snapshot updated") + return snapshot.to_dict() + + +@router.get("/ui-screenshot") +def get_ui_screenshot(db: Session = Depends(get_db)) -> Dict[str, Any]: + """ + Get the current UI screenshot. + + Returns the latest screenshot captured by the frontend as base64 PNG. + Agent uses this for visual observation of the UI. + + Response includes: + - imageData: Base64 encoded PNG image + - width: Image width in pixels + - height: Image height in pixels + - timestamp: When the screenshot was captured + + To use the image: + - Decode base64: base64.b64decode(imageData) + - Or display in HTML: + """ + screenshot = db.query(UIScreenshot).first() + if not screenshot or not screenshot.image_data: + return { + "imageData": None, + "width": None, + "height": None, + "timestamp": None, + "status": "no_screenshot" + } + return screenshot.to_dict() + + +@router.post("/ui-screenshot") +def update_ui_screenshot(data: UIScreenshotUpdate, db: Session = Depends(get_db)) -> Dict[str, Any]: + """ + Update the UI screenshot. + + Frontend calls this to post a screenshot of the current UI. + Screenshot should be a base64 encoded PNG. + """ + screenshot = db.query(UIScreenshot).first() + if not screenshot: + screenshot = UIScreenshot() + db.add(screenshot) + + screenshot.image_data = data.imageData + screenshot.width = data.width + screenshot.height = data.height + screenshot.timestamp = datetime.utcnow() + + db.commit() + db.refresh(screenshot) + logger.info(f"[Routes] UI screenshot updated ({data.width}x{data.height})") + return {"status": "updated", "timestamp": screenshot.timestamp.isoformat()} diff --git a/app/data/living_ui_template/backend/services/integration_client.py b/app/data/living_ui_template/backend/services/integration_client.py new file mode 100644 index 00000000..dee26124 --- /dev/null +++ b/app/data/living_ui_template/backend/services/integration_client.py @@ -0,0 +1,126 @@ +""" +CraftBot Integration Client — call external APIs through CraftBot. + +Living UIs are shareable, so they never store credentials. Instead, +requests go through CraftBot which injects auth headers server-side. + +Usage: + from services.integration_client import integration + + # Check what's available + integrations = await integration.get_integrations() + + # Make an authenticated API call + result = await integration.request( + integration="google_workspace", + method="GET", + url="https://www.googleapis.com/youtube/v3/channels?part=snippet&mine=true", + ) + if result["status"] == 200: + channels = result["data"] +""" + +import os +import httpx +from typing import Any, Dict, List, Optional + +BRIDGE_URL = os.environ.get("CRAFTBOT_BRIDGE_URL", "") +BRIDGE_TOKEN = os.environ.get("CRAFTBOT_BRIDGE_TOKEN", "") + + +class IntegrationClient: + """Proxy client for calling external APIs through CraftBot.""" + + def __init__(self): + self._client: Optional[httpx.AsyncClient] = None + + def _ensure_client(self) -> httpx.AsyncClient: + if self._client is None: + self._client = httpx.AsyncClient(timeout=30) + return self._client + + @property + def available(self) -> bool: + """Whether the CraftBot integration bridge is available.""" + return bool(BRIDGE_URL and BRIDGE_TOKEN) + + def _auth_headers(self) -> Dict[str, str]: + return {"Authorization": f"Bearer {BRIDGE_TOKEN}"} + + async def get_integrations(self) -> List[Dict[str, Any]]: + """ + List available integrations and their connection status. + + Returns a list like: + [ + {"id": "google_workspace", "connected": true, "granted": true}, + {"id": "slack", "connected": true, "granted": false}, + {"id": "discord", "connected": false, "granted": false}, + ] + """ + if not self.available: + return [] + try: + client = self._ensure_client() + r = await client.get( + f"{BRIDGE_URL}/api/integrations/available", + headers=self._auth_headers(), + ) + if r.status_code == 200: + return r.json().get("integrations", []) + return [] + except Exception: + return [] + + async def request( + self, + integration: str, + method: str, + url: str, + headers: Optional[Dict[str, str]] = None, + body: Any = None, + ) -> Dict[str, Any]: + """ + Make an authenticated request to an external API via CraftBot proxy. + + Args: + integration: Platform ID (e.g., "google_workspace", "slack", "discord") + method: HTTP method (GET, POST, PUT, DELETE) + url: Full URL to the external API endpoint + headers: Optional extra headers (e.g., custom Accept header) + body: Optional request body (dict for JSON) + + Returns: + {"status": 200, "data": {...}} on success + {"status": 4xx/5xx, "data": "error message"} on failure + {"error": "..."} if bridge itself fails + """ + if not self.available: + return {"error": "Integration bridge not available"} + + try: + client = self._ensure_client() + r = await client.post( + f"{BRIDGE_URL}/api/integrations/proxy", + headers=self._auth_headers(), + json={ + "integration": integration, + "method": method, + "url": url, + "headers": headers or {}, + "body": body, + }, + ) + return r.json() + except Exception as e: + return {"error": str(e)} + + async def close(self): + """Close the HTTP client.""" + if self._client: + await self._client.aclose() + self._client = None + + +# Singleton — import and use directly +integration = IntegrationClient() diff --git a/app/data/living_ui_template/backend/test_runner.py b/app/data/living_ui_template/backend/test_runner.py new file mode 100644 index 00000000..69cc15e7 --- /dev/null +++ b/app/data/living_ui_template/backend/test_runner.py @@ -0,0 +1,1006 @@ +""" +Living UI Backend Test Runner + +Auto-discovers and tests backend routes without agent involvement. +Four modes: + --internal : Pre-server validation (imports, models, route registration) + --unit : Auto-generated CRUD unit tests against temp DB + --compatibility : Frontend-backend route compatibility check + --external : Post-server HTTP smoke tests (requires running server) + +Usage: + python test_runner.py --internal + python test_runner.py --unit + python test_runner.py --compatibility + python test_runner.py --external --port 3101 +""" + +import argparse +import json +import logging +import re +import sys +import traceback +import urllib.request +import urllib.error +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List, Optional, Set, Tuple + +LOG_DIR = Path(__file__).parent / "logs" +LOG_DIR.mkdir(parents=True, exist_ok=True) + +logger = logging.getLogger("test_runner") + +# Routes to skip during smoke tests (framework/template-provided, not agent code) +SKIP_PATHS = {"/health", "/docs", "/redoc", "/openapi.json"} +# Template-provided UI observation routes — complex payloads (base64 images, DOM), skip in smoke tests +SKIP_API_PREFIXES = ( + "/api/ui-snapshot", + "/api/ui-screenshot", +) + + +# ============================================================================ +# Auto-payload generation from OpenAPI schemas +# ============================================================================ + +def generate_payload_from_schema(schema: Dict[str, Any], definitions: Dict[str, Any]) -> Dict[str, Any]: + """ + Generate a minimal valid payload from an OpenAPI/JSON Schema definition. + + Handles $ref resolution and generates test values for common types. + Only includes required fields. + """ + if "$ref" in schema: + ref_name = schema["$ref"].split("/")[-1] + schema = definitions.get(ref_name, {}) + + if schema.get("type") != "object": + return {} + + properties = schema.get("properties", {}) + required = set(schema.get("required", [])) + + # If no required fields specified, include all properties + if not required: + required = set(properties.keys()) + + payload = {} + for field_name, field_schema in properties.items(): + if field_name not in required: + continue + if field_name.startswith("_"): + continue + payload[field_name] = _generate_value(field_schema, definitions) + + return payload + + +def _generate_value(schema: Dict[str, Any], definitions: Dict[str, Any]) -> Any: + """Generate a test value for a single field based on its schema.""" + if "$ref" in schema: + ref_name = schema["$ref"].split("/")[-1] + ref_schema = definitions.get(ref_name, {}) + return generate_payload_from_schema(ref_schema, definitions) + + field_type = schema.get("type", "string") + + if field_type == "string": + if "enum" in schema: + return schema["enum"][0] + # Use format hints for better test values + fmt = schema.get("format", "") + if fmt == "date-time": + return "2026-01-01T00:00:00" + elif fmt == "date": + return "2026-01-01" + elif fmt == "email": + return "test@test.com" + elif fmt == "uri" or fmt == "url": + return "http://test.com" + return "test" + elif field_type == "integer": + return schema.get("minimum", 1) + elif field_type == "number": + return schema.get("minimum", 1.0) + elif field_type == "boolean": + return True + elif field_type == "array": + # Generate an array with one item of the correct type + items_schema = schema.get("items", {}) + if items_schema: + return [_generate_value(items_schema, definitions)] + return [] + elif field_type == "object": + # Check if it has properties (structured) or is a free-form dict + if schema.get("properties"): + return generate_payload_from_schema(schema, definitions) + # Free-form object (e.g., Dict[str, Any]) + return {} + elif field_type == "null": + return None + + # anyOf / oneOf — pick the first non-null type + for key in ("anyOf", "oneOf"): + if key in schema: + for variant in schema[key]: + if variant.get("type") != "null": + return _generate_value(variant, definitions) + + return "test" + + +# ============================================================================ +# Internal Tests (pre-server) +# ============================================================================ + +def run_internal_tests() -> Dict[str, Any]: + """ + Run pre-server validation tests. + + - Import validation for main, routes, models, database + - Route discovery from FastAPI app + - Model verification (SQLAlchemy tables) + + Returns dict with status, errors, and discovered routes. + """ + result = { + "status": "pass", + "errors": [], + "routes": [], + "timestamp": datetime.now().isoformat(), + "mode": "internal", + } + + # Test 1: Import validation + modules_to_test = ["database", "models", "routes", "main"] + for module_name in modules_to_test: + try: + __import__(module_name) + logger.info(f"[IMPORT] {module_name} — OK") + except Exception as e: + error_msg = f"Failed to import {module_name}: {e}" + logger.error(f"[IMPORT] {error_msg}") + result["errors"].append({"test": "import", "module": module_name, "error": str(e), "traceback": traceback.format_exc()}) + result["status"] = "fail" + + if result["status"] == "fail": + # No point continuing if imports fail + _write_result(result, "test_discovery.json") + return result + + # Test 2: Route discovery + try: + from main import app + + openapi_schema = app.openapi() + definitions = openapi_schema.get("components", {}).get("schemas", {}) + paths = openapi_schema.get("paths", {}) + + for path, methods in paths.items(): + for method, details in methods.items(): + if method.upper() in ("GET", "POST", "PUT", "DELETE", "PATCH"): + # Check for request body schema + body_schema = None + has_request_body = False + request_body = details.get("requestBody", {}) + if request_body: + has_request_body = True + content = request_body.get("content", {}) + json_content = content.get("application/json", {}) + body_schema = json_content.get("schema") + + # Check for path parameters + path_params = [] + for param in details.get("parameters", []): + if param.get("in") == "path": + path_params.append(param["name"]) + + route_info = { + "method": method.upper(), + "path": path, + "has_request_body": has_request_body, + "body_schema": body_schema, + "path_params": path_params, + "level": "light", + } + result["routes"].append(route_info) + logger.info(f"[ROUTE] {method.upper()} {path}") + + if not any(r["path"].startswith("/api") for r in result["routes"]): + result["errors"].append({ + "test": "route_discovery", + "error": "No /api/* routes found — backend has no application routes registered", + }) + result["status"] = "fail" + else: + api_count = sum(1 for r in result["routes"] if r["path"].startswith("/api")) + logger.info(f"[ROUTES] Discovered {api_count} API route(s)") + + except Exception as e: + result["errors"].append({"test": "route_discovery", "error": str(e), "traceback": traceback.format_exc()}) + result["status"] = "fail" + + # Test 3: Model/table verification + try: + from database import engine + from models import Base + + # Verify tables can be created (uses in-memory check, doesn't modify real DB) + table_names = list(Base.metadata.tables.keys()) + logger.info(f"[MODELS] Found {len(table_names)} table(s): {table_names}") + + if not table_names: + result["errors"].append({"test": "models", "error": "No SQLAlchemy models/tables defined"}) + result["status"] = "fail" + + except Exception as e: + result["errors"].append({"test": "models", "error": str(e), "traceback": traceback.format_exc()}) + result["status"] = "fail" + + # Test 4: System file integrity — verify critical system features weren't removed + system_checks = _check_system_files() + for check in system_checks: + if check["status"] == "fail": + result["errors"].append({"test": "system_integrity", "error": check["error"]}) + result["status"] = "fail" + logger.error(f"[SYSTEM] {check['error']}") + else: + logger.info(f"[SYSTEM] {check['name']} — OK") + + _write_result(result, "test_discovery.json") + return result + + +def _check_system_files() -> List[Dict[str, Any]]: + """Check that critical system features haven't been removed from template files.""" + checks = [] + backend_dir = Path(__file__).parent.parent / "backend" if (Path(__file__).parent.parent / "backend").exists() else Path(__file__).parent + project_root = Path(__file__).parent.parent + + # Check main.py has /health endpoint + main_py = backend_dir / "main.py" + if main_py.exists(): + content = main_py.read_text(encoding="utf-8") + if "/health" not in content: + checks.append({ + "name": "health_endpoint", + "status": "fail", + "error": "main.py is missing /health endpoint. Add: @app.get('/health') async def health_check(): return {'status': 'healthy'}", + }) + else: + checks.append({"name": "health_endpoint", "status": "pass"}) + + if "/api/logs" not in content: + checks.append({ + "name": "logs_endpoint", + "status": "fail", + "error": "main.py is missing POST /api/logs endpoint for frontend console capture. Restore it from the template or add: @app.post('/api/logs') that accepts {entries: [{level, message, timestamp}]} and writes to logs/frontend_console.log", + }) + else: + checks.append({"name": "logs_endpoint", "status": "pass"}) + + if "setup_logging" not in content: + checks.append({ + "name": "logging_setup", + "status": "fail", + "error": "main.py is missing setup_logging() call. Add: from logger import setup_logging, cleanup_old_logs; setup_logging(); cleanup_old_logs(keep=20)", + }) + else: + checks.append({"name": "logging_setup", "status": "pass"}) + + # Health checker is handled by the manager watchdog — no longer required in main.py + checks.append({"name": "health_checker", "status": "pass"}) + else: + checks.append({"name": "main_py", "status": "fail", "error": "main.py not found"}) + + # Check index.html has console capture script + index_html = project_root / "index.html" + if index_html.exists(): + content = index_html.read_text(encoding="utf-8") + if "ConsoleCapture" not in content and "/api/logs" not in content: + checks.append({ + "name": "console_capture", + "status": "fail", + "error": "index.html is missing the ConsoleCapture script. Restore it from the template — it should be an inline + + + + + + + + + + diff --git a/app/data/living_ui_template/package.json b/app/data/living_ui_template/package.json new file mode 100644 index 00000000..903a9ae1 --- /dev/null +++ b/app/data/living_ui_template/package.json @@ -0,0 +1,26 @@ +{ + "name": "{{PROJECT_NAME}}", + "version": "1.0.0", + "description": "{{PROJECT_DESCRIPTION}}", + "type": "module", + "scripts": { + "dev": "vite", + "build": "tsc && vite build", + "preview": "vite preview", + "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0" + }, + "dependencies": { + "html2canvas": "^1.4.1", + "react": "^18.2.0", + "react-dom": "^18.2.0", + "lucide-react": "^0.460.0", + "react-toastify": "^10.0.0" + }, + "devDependencies": { + "@types/react": "^18.2.0", + "@types/react-dom": "^18.2.0", + "@vitejs/plugin-react": "^4.0.0", + "typescript": "^5.0.0", + "vite": "^5.0.0" + } +} diff --git a/app/data/living_ui_template/requirements.txt b/app/data/living_ui_template/requirements.txt new file mode 100644 index 00000000..fbbd4fe5 --- /dev/null +++ b/app/data/living_ui_template/requirements.txt @@ -0,0 +1,9 @@ +# Python backend dependencies for Living UI +# Uncomment if backend functionality is needed + +# fastapi>=0.100.0 +# uvicorn>=0.23.0 +# sqlalchemy>=2.0.0 +# aiosqlite>=0.19.0 +# pydantic>=2.0.0 +# httpx>=0.24.0 diff --git a/app/data/living_ui_template/tsconfig.json b/app/data/living_ui_template/tsconfig.json new file mode 100644 index 00000000..cda9bcf8 --- /dev/null +++ b/app/data/living_ui_template/tsconfig.json @@ -0,0 +1,21 @@ +{ + "compilerOptions": { + "target": "ES2020", + "useDefineForClassFields": true, + "lib": ["ES2020", "DOM", "DOM.Iterable"], + "module": "ESNext", + "skipLibCheck": true, + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "resolveJsonModule": true, + "isolatedModules": true, + "noEmit": true, + "jsx": "react-jsx", + "strict": true, + "noUnusedLocals": true, + "noUnusedParameters": true, + "noFallthroughCasesInSwitch": true + }, + "include": ["frontend"], + "references": [{ "path": "./tsconfig.node.json" }] +} diff --git a/app/data/living_ui_template/tsconfig.node.json b/app/data/living_ui_template/tsconfig.node.json new file mode 100644 index 00000000..42872c59 --- /dev/null +++ b/app/data/living_ui_template/tsconfig.node.json @@ -0,0 +1,10 @@ +{ + "compilerOptions": { + "composite": true, + "skipLibCheck": true, + "module": "ESNext", + "moduleResolution": "bundler", + "allowSyntheticDefaultImports": true + }, + "include": ["vite.config.ts"] +} diff --git a/app/data/living_ui_template/vite.config.ts b/app/data/living_ui_template/vite.config.ts new file mode 100644 index 00000000..a30ac34c --- /dev/null +++ b/app/data/living_ui_template/vite.config.ts @@ -0,0 +1,25 @@ +import { defineConfig } from 'vite' +import react from '@vitejs/plugin-react' + +// https://vitejs.dev/config/ +export default defineConfig({ + plugins: [react()], + server: { + port: {{PORT}}, + host: true, + proxy: { + '/api': 'http://localhost:{{BACKEND_PORT}}', + }, + }, + preview: { + port: {{PORT}}, + host: true, + proxy: { + '/api': 'http://localhost:{{BACKEND_PORT}}', + }, + }, + build: { + outDir: 'dist', + sourcemap: true, + }, +}) diff --git a/app/internal_action_interface.py b/app/internal_action_interface.py index bf23e657..208e035c 100644 --- a/app/internal_action_interface.py +++ b/app/internal_action_interface.py @@ -100,6 +100,69 @@ def describe_image(cls, image_path: str, prompt: Optional[str] = None) -> str: raise RuntimeError("InternalActionInterface not initialized with VLMInterface.") return cls.vlm_interface.describe_image(image_path, user_prompt=prompt) + @classmethod + def perform_ocr(cls, image_path: str, user_prompt: Optional[str] = None) -> dict: + """ + Run OCR on an image and persist the extracted text to workspace. + Returns a concise status dict + saved file path to avoid TUI flooding. + """ + if cls.vlm_interface is None: + raise RuntimeError("InternalActionInterface not initialized with VLMInterface.") + + import os + from datetime import datetime + + raw_text = cls.vlm_interface.describe_image_ocr(image_path, user_prompt=user_prompt) + + # Persist to workspace to prevent token ballooning in the agent context + ts = datetime.utcnow().strftime("%Y%m%d_%H%M%S") + out_path = os.path.join(AGENT_WORKSPACE_ROOT, f"ocr_result_{ts}.txt") + with open(out_path, "w", encoding="utf-8") as f: + f.write(raw_text) + + line_count = raw_text.count("\n") + 1 + char_count = len(raw_text) + return { + "status": "success", + "summary": f"OCR complete: {line_count} lines, {char_count} characters extracted.", + "text": raw_text, + "file_path": out_path, + "file_saved": True, + } + + @classmethod + def understand_video( + cls, + video_path: str, + query: Optional[str] = None, + max_frames: int = 8, + ) -> dict: + """ + Analyse a video by extracting keyframes and querying the VLM. + Persists the summary to workspace to avoid TUI/context flooding. + """ + if cls.vlm_interface is None: + raise RuntimeError("InternalActionInterface not initialized with VLMInterface.") + + import os + from datetime import datetime + + summary = cls.vlm_interface.describe_video_frames( + video_path, query=query, max_frames=max_frames + ) + + ts = datetime.utcnow().strftime("%Y%m%d_%H%M%S") + out_path = os.path.join(AGENT_WORKSPACE_ROOT, f"video_summary_{ts}.txt") + with open(out_path, "w", encoding="utf-8") as f: + f.write(summary) + + return { + "status": "success", + "summary": summary[:500] + ("..." if len(summary) > 500 else ""), + "file_path": out_path, + "file_saved": True, + } + # ───────────────── Memory Search ───────────────── @classmethod diff --git a/app/living_ui/__init__.py b/app/living_ui/__init__.py new file mode 100644 index 00000000..fe6c59ec --- /dev/null +++ b/app/living_ui/__init__.py @@ -0,0 +1,37 @@ +"""Living UI module for managing dynamic agent-aware user interfaces. + +Public surface (import from `app.living_ui`): + +- LivingUIManager, LivingUIProject — data + lifecycle (see manager.py) +- get_living_ui_manager, set_living_ui_manager — module singleton accessor +- register_broadcast_callbacks — wire up browser adapter callbacks +- broadcast_living_ui_ready — async broadcast (agent actions) +- broadcast_living_ui_progress — async broadcast (agent actions) +- make_todo_broadcast_hook — factory for TaskManager hook +- restart_living_ui — async restart operation + +Internal (do not import from here): todo dispatch machinery lives in +`broadcast.py` behind `make_todo_broadcast_hook`. +""" + +from .manager import LivingUIManager, LivingUIProject +from ._state import get_living_ui_manager, set_living_ui_manager +from .broadcast import ( + register_broadcast_callbacks, + broadcast_living_ui_ready, + broadcast_living_ui_progress, + make_todo_broadcast_hook, +) +from .actions import restart_living_ui + +__all__ = [ + 'LivingUIManager', + 'LivingUIProject', + 'get_living_ui_manager', + 'set_living_ui_manager', + 'register_broadcast_callbacks', + 'broadcast_living_ui_ready', + 'broadcast_living_ui_progress', + 'make_todo_broadcast_hook', + 'restart_living_ui', +] diff --git a/app/living_ui/_state.py b/app/living_ui/_state.py new file mode 100644 index 00000000..a0dba0ed --- /dev/null +++ b/app/living_ui/_state.py @@ -0,0 +1,22 @@ +"""Module-level singleton for the LivingUIManager. + +Lives in its own file so that `broadcast.py` and `actions.py` can import the +accessor without triggering circular imports through `__init__.py`. +""" + +from typing import Optional + +from .manager import LivingUIManager + +_manager: Optional[LivingUIManager] = None + + +def get_living_ui_manager() -> Optional[LivingUIManager]: + """Get the global LivingUIManager instance.""" + return _manager + + +def set_living_ui_manager(manager: LivingUIManager) -> None: + """Set the global LivingUIManager instance (called by browser_adapter).""" + global _manager + _manager = manager diff --git a/app/living_ui/actions.py b/app/living_ui/actions.py new file mode 100644 index 00000000..72c36d1a --- /dev/null +++ b/app/living_ui/actions.py @@ -0,0 +1,41 @@ +"""Async operations on Living UI projects exposed to agent actions.""" + +from ._state import get_living_ui_manager + + +async def restart_living_ui(project_id: str) -> dict: + """Restart a running Living UI project (backend + frontend). + + Stops the entire project and relaunches via the pipeline. + Returns detailed errors if any step fails. + """ + manager = get_living_ui_manager() + if manager is None: + return {"status": "error", "message": "Living UI manager not initialized"} + + project = manager.get_project(project_id) + if project is None: + return {"status": "error", "message": f"Project '{project_id}' not found"} + + # Stop the entire project (backend + frontend) + await manager.stop_project(project_id) + + # Relaunch via the full pipeline + result = await manager.launch_and_verify(project_id) + + if result["status"] == "success": + return { + "status": "success", + "message": f"Living UI '{project_id}' restarted", + "url": result.get("url"), + "backend_url": result.get("backend_url"), + } + + errors = result.get("errors", []) + errors_str = "\n".join(errors[:10]) + return { + "status": "error", + "message": f"Restart failed at step: {result.get('step', 'unknown')}", + "test_errors": errors[:10], + "details": f"Fix these errors and call living_ui_restart again:\n{errors_str}", + } diff --git a/app/living_ui/broadcast.py b/app/living_ui/broadcast.py new file mode 100644 index 00000000..c8304a71 --- /dev/null +++ b/app/living_ui/broadcast.py @@ -0,0 +1,131 @@ +"""Broadcast callback registry and dispatchers for Living UI events. + +The browser adapter registers async callbacks at startup. Agent actions +(running in the main loop) call the broadcast_living_ui_ready / _progress +wrappers directly. TaskManager hooks (running on a worker thread pool) go +through make_todo_broadcast_hook, which schedules the async broadcast onto +the main loop in a thread-safe way. +""" + +import asyncio +from typing import Any, Awaitable, Callable, Dict, List, Optional + +try: + from loguru import logger +except ImportError: + import logging + logger = logging.getLogger(__name__) + +from ._state import get_living_ui_manager + +# Registered async callbacks into the browser adapter. +_broadcast_ready_callback: Optional[Callable[[str, str, int], Awaitable[bool]]] = None +_broadcast_progress_callback: Optional[Callable[[str, str, int, str], Awaitable[None]]] = None +_broadcast_todos_callback: Optional[Callable[[str, List[Dict[str, Any]]], Awaitable[None]]] = None + +# Captured at register time so cross-thread dispatchers (action handlers +# running on a worker thread pool) can schedule coroutines onto the main loop. +_main_loop: Optional[asyncio.AbstractEventLoop] = None + + +def register_broadcast_callbacks( + broadcast_ready: Callable[[str, str, int], Awaitable[bool]], + broadcast_progress: Callable[[str, str, int, str], Awaitable[None]], + broadcast_todos: Optional[Callable[[str, List[Dict[str, Any]]], Awaitable[None]]] = None, +) -> None: + """Register broadcast callbacks for Living UI actions to use. + + Called by the browser_adapter when it initializes. + """ + global _broadcast_ready_callback, _broadcast_progress_callback, _broadcast_todos_callback, _main_loop + _broadcast_ready_callback = broadcast_ready + _broadcast_progress_callback = broadcast_progress + _broadcast_todos_callback = broadcast_todos + try: + _main_loop = asyncio.get_running_loop() + except RuntimeError: + _main_loop = None + logger.warning("[LIVING_UI] No running loop at callback registration — cross-thread broadcasts will fail") + logger.info("[LIVING_UI] Broadcast callbacks registered") + + +async def broadcast_living_ui_ready(project_id: str, url: str, port: int) -> bool: + """Broadcast that a Living UI is ready. Returns True on success.""" + if _broadcast_ready_callback: + return await _broadcast_ready_callback(project_id, url, port) + logger.warning( + f"[LIVING_UI] broadcast_living_ui_ready called but callback is None " + f"(manager={get_living_ui_manager() is not None})" + ) + return False + + +async def broadcast_living_ui_progress( + project_id: str, phase: str, progress: int, message: str +) -> bool: + """Broadcast Living UI creation progress. Returns True on success.""" + if _broadcast_progress_callback: + await _broadcast_progress_callback(project_id, phase, progress, message) + return True + return False + + +async def _broadcast_todos_async( + project_id: str, todos: List[Dict[str, Any]] +) -> bool: + """Internal async broadcaster used by the sync dispatcher below.""" + if _broadcast_todos_callback: + await _broadcast_todos_callback(project_id, todos) + return True + return False + + +def _dispatch_todos(project_id: str, todos: List[Dict[str, Any]]) -> bool: + """Thread-safe todo broadcast. + + Handles both calling contexts: + - Main asyncio loop: schedules via loop.create_task + - Worker thread: uses asyncio.run_coroutine_threadsafe against _main_loop + + Returns True if the broadcast was scheduled, False otherwise. + """ + if not _broadcast_todos_callback: + return False + + coro = _broadcast_todos_async(project_id, todos) + + try: + running = asyncio.get_running_loop() + running.create_task(coro) + return True + except RuntimeError: + pass + + if _main_loop is not None and _main_loop.is_running(): + asyncio.run_coroutine_threadsafe(coro, _main_loop) + return True + + coro.close() + logger.warning("[LIVING_UI] No main loop available; todo broadcast skipped") + return False + + +def make_todo_broadcast_hook() -> Callable[[Any, List[Dict[str, Any]]], None]: + """Build a post-update-todos hook that broadcasts todos for Living UI tasks. + + The returned callable matches TaskManager's PostUpdateTodosHook signature: + (active_task, updated_todos_as_dicts) -> None + + It filters non-Living-UI tasks by checking whether the task id maps to + a project, so registering it globally is safe. + """ + def hook(task: Any, todos: List[Dict[str, Any]]) -> None: + manager = get_living_ui_manager() + if manager is None: + return + project = manager.get_project_by_task_id(task.id) + if project is None: + return # non-Living-UI task — silently skip + logger.debug(f"[LIVING_UI] Broadcasting {len(todos)} todos to project {project.id}") + _dispatch_todos(project.id, todos) + return hook diff --git a/app/living_ui/integration_bridge.py b/app/living_ui/integration_bridge.py new file mode 100644 index 00000000..d6312dd3 --- /dev/null +++ b/app/living_ui/integration_bridge.py @@ -0,0 +1,276 @@ +# -*- coding: utf-8 -*- +""" +Integration Bridge — proxy for Living UI ↔ External API calls. + +Living UI backends call CraftBot via this bridge to make authenticated +requests to external APIs (YouTube, Discord, Slack, etc.). Credentials +never leave CraftBot — the bridge injects auth headers server-side. + +Routes are registered on the browser adapter's aiohttp app. +""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING, Optional + +from aiohttp import web +import httpx + +if TYPE_CHECKING: + from app.living_ui.manager import LivingUIManager + +try: + from app.logger import logger +except Exception: + logger = logging.getLogger(__name__) + + +class IntegrationBridge: + """ + HTTP proxy that lets Living UI backends make authenticated API calls + to external services through CraftBot. + + Flow: + Living UI Backend → POST /api/integrations/proxy → CraftBot + CraftBot validates token, injects auth, forwards to external API. + """ + + def __init__(self, manager: "LivingUIManager"): + self._manager = manager + self._http_client = httpx.AsyncClient(timeout=30, follow_redirects=True) + + def register_routes(self, app: web.Application) -> None: + """Register integration bridge routes on the aiohttp app.""" + app.router.add_get("/api/integrations/available", self._handle_available) + app.router.add_post("/api/integrations/proxy", self._handle_proxy) + app.router.add_post("/api/bridge/llm", self._handle_llm) + app.router.add_post("/api/bridge/vlm", self._handle_vlm) + logger.info("[INTEGRATION_BRIDGE] Routes registered") + + async def cleanup(self) -> None: + """Close the HTTP client.""" + await self._http_client.aclose() + + # ------------------------------------------------------------------ + # Route handlers + # ------------------------------------------------------------------ + + async def _handle_available(self, request: web.Request) -> web.Response: + """List available integrations and their connection/grant status.""" + project_id = self._validate_token(request) + if not project_id: + return web.json_response({"error": "Unauthorized"}, status=401) + + from app.external_comms.registry import get_registered_platforms, get_client + + integrations = [] + for platform_id in get_registered_platforms(): + client = get_client(platform_id) + connected = client.has_credentials() if client else False + integrations.append({ + "id": platform_id, + "connected": connected, + }) + + return web.json_response({"integrations": integrations}) + + async def _handle_proxy(self, request: web.Request) -> web.Response: + """ + Proxy an API request to an external service with injected auth. + + Expected JSON body: + { + "integration": "google_workspace", + "method": "GET", + "url": "https://www.googleapis.com/youtube/v3/channels?part=snippet&mine=true", + "headers": {}, // optional extra headers + "body": null // optional request body + } + """ + project_id = self._validate_token(request) + if not project_id: + return web.json_response({"error": "Unauthorized"}, status=401) + + try: + data = await request.json() + except Exception: + return web.json_response({"error": "Invalid JSON body"}, status=400) + + integration = data.get("integration", "") + method = data.get("method", "GET").upper() + url = data.get("url", "") + extra_headers = data.get("headers") or {} + body = data.get("body") + + if not integration or not url: + return web.json_response( + {"error": "Missing required fields: integration, url"}, status=400 + ) + + # Get auth headers from platform client + auth_headers = self._get_auth_headers(integration) + if auth_headers is None: + return web.json_response( + {"error": f"Integration '{integration}' not connected (no credentials)"}, + status=424, + ) + + # Merge headers: auth + extra (extra can override Content-Type etc.) + merged_headers = {**auth_headers, **extra_headers} + + # Forward request to external API + try: + response = await self._http_client.request( + method=method, + url=url, + headers=merged_headers, + json=body if body and method in ("POST", "PUT", "PATCH") else None, + params=body if body and method == "GET" else None, + ) + + # Return proxied response + try: + response_body = response.json() + except Exception: + response_body = response.text + + return web.json_response( + { + "status": response.status_code, + "data": response_body, + }, + status=200, + ) + + except httpx.TimeoutException: + return web.json_response({"error": "External API timeout"}, status=504) + except Exception as e: + logger.error(f"[INTEGRATION_BRIDGE] Proxy error: {e}") + return web.json_response({"error": f"Proxy error: {str(e)}"}, status=502) + + async def _handle_llm(self, request: web.Request) -> web.Response: + """Proxy LLM completion request through CraftBot's configured LLM.""" + project_id = self._validate_token(request) + if not project_id: + return web.json_response({"error": "Unauthorized"}, status=401) + + try: + data = await request.json() + except Exception: + return web.json_response({"error": "Invalid JSON body"}, status=400) + + prompt = data.get("prompt", "") + system_message = data.get("system_message") + + if not prompt: + return web.json_response({"error": "Missing required field: prompt"}, status=400) + + try: + import app.internal_action_interface as iai + result = await iai.InternalActionInterface.use_llm(prompt, system_message) + llm_response = result.get("llm_response", "") + if isinstance(llm_response, dict): + response_text = llm_response.get("content", "") + else: + response_text = str(llm_response) + return web.json_response({"content": response_text}) + except Exception as e: + logger.error(f"[INTEGRATION_BRIDGE] LLM error: {e}") + return web.json_response({"error": f"LLM error: {str(e)}"}, status=502) + + async def _handle_vlm(self, request: web.Request) -> web.Response: + """Proxy VLM image description through CraftBot's configured VLM.""" + project_id = self._validate_token(request) + if not project_id: + return web.json_response({"error": "Unauthorized"}, status=401) + + try: + data = await request.json() + except Exception: + return web.json_response({"error": "Invalid JSON body"}, status=400) + + image_url = data.get("image_url", "") + prompt = data.get("prompt", "Describe this image.") + + if not image_url: + return web.json_response({"error": "Missing required field: image_url"}, status=400) + + try: + # Download image to temp file + import tempfile + import os + response = await self._http_client.get(image_url) + if response.status_code != 200: + return web.json_response({"error": f"Failed to download image: HTTP {response.status_code}"}, status=424) + + # Save to temp file for VLM + suffix = ".jpg" + if "png" in response.headers.get("content-type", ""): + suffix = ".png" + tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix) + tmp.write(response.content) + tmp.close() + + try: + import app.internal_action_interface as iai + description = iai.InternalActionInterface.describe_image(tmp.name, prompt) + return web.json_response({"description": description}) + finally: + os.unlink(tmp.name) + except Exception as e: + logger.error(f"[INTEGRATION_BRIDGE] VLM error: {e}") + return web.json_response({"error": f"VLM error: {str(e)}"}, status=502) + + # ------------------------------------------------------------------ + # Helpers + # ------------------------------------------------------------------ + + def _validate_token(self, request: web.Request) -> Optional[str]: + """ + Validate the bridge token from the Authorization header. + + Returns: + project_id if valid, None if invalid. + """ + auth = request.headers.get("Authorization", "") + if not auth.startswith("Bearer "): + return None + + token = auth[7:] + return self._manager.validate_bridge_token(token) + + def _get_auth_headers(self, platform_id: str) -> Optional[dict]: + """ + Get authentication headers from a platform client. + + Returns: + Dict of auth headers, or None if credentials unavailable. + """ + from app.external_comms.registry import get_client + + client = get_client(platform_id) + if not client or not client.has_credentials(): + return None + + # Most clients expose _headers() — use it + if hasattr(client, "_headers"): + try: + headers = client._headers() + if callable(headers): + headers = headers() + return headers + except Exception as e: + logger.warning(f"[INTEGRATION_BRIDGE] Failed to get headers for {platform_id}: {e}") + return None + + # Discord uses _bot_headers() + if hasattr(client, "_bot_headers"): + try: + return client._bot_headers() + except Exception as e: + logger.warning(f"[INTEGRATION_BRIDGE] Failed to get bot headers for {platform_id}: {e}") + return None + + logger.warning(f"[INTEGRATION_BRIDGE] No auth header method found for {platform_id}") + return None diff --git a/app/living_ui/manager.py b/app/living_ui/manager.py new file mode 100644 index 00000000..7400d39d --- /dev/null +++ b/app/living_ui/manager.py @@ -0,0 +1,2969 @@ +""" +Living UI Manager + +Manages the lifecycle of Living UI projects: +- Project creation from template +- Project launching and stopping +- Port allocation +- State tracking +- Startup auto-launch +- Task creation with trigger firing +""" + +import asyncio +import json +import os +import re +import shutil +import socket +import subprocess +import sys +import tempfile +import time +import uuid +import zipfile +from dataclasses import dataclass, field +from datetime import datetime +from pathlib import Path +from typing import Dict, List, Optional, Any, Set, Tuple, TYPE_CHECKING +try: + from loguru import logger +except ImportError: + import logging + logger = logging.getLogger(__name__) + +if TYPE_CHECKING: + from app.task.task_manager import TaskManager + from app.trigger import TriggerQueue + + +@dataclass +class LivingUIProject: + """Represents a Living UI project.""" + id: str + name: str + description: str + path: str + status: str = 'created' # created, creating, ready, running, stopped, error + port: Optional[int] = None # Frontend port + backend_port: Optional[int] = None # Backend API port + url: Optional[str] = None # Frontend URL + backend_url: Optional[str] = None # Backend API URL + created_at: float = field(default_factory=lambda: datetime.now().timestamp()) + features: List[str] = field(default_factory=list) + theme: str = 'system' + error: Optional[str] = None + task_id: Optional[str] = None + auto_launch: bool = False # Auto-launch on CraftBot startup + log_cleanup: bool = True # Clean logs on restart + project_type: str = 'native' # 'native' or 'external' + app_runtime: Optional[str] = None # 'go', 'node', 'python', 'rust', 'docker', 'static' + bridge_token: str = "" # Ephemeral token for integration bridge (NOT serialized) + tunnel_url: Optional[str] = None # Public tunnel URL (NOT serialized) + tunnel_process: Optional[subprocess.Popen] = None # Tunnel process (NOT serialized) + process: Optional[subprocess.Popen] = None # Frontend process + backend_process: Optional[subprocess.Popen] = None # Backend process + app_process: Optional[subprocess.Popen] = None # Single process for external apps + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for serialization.""" + return { + 'id': self.id, + 'name': self.name, + 'description': self.description, + 'path': self.path, + 'status': self.status, + 'port': self.port, + 'backendPort': self.backend_port, + 'url': self.url, + 'backendUrl': self.backend_url, + 'createdAt': int(self.created_at * 1000), # Convert to JS timestamp + 'features': self.features, + 'theme': self.theme, + 'error': self.error, + 'autoLaunch': self.auto_launch, + 'logCleanup': self.log_cleanup, + 'projectType': self.project_type, + 'appRuntime': self.app_runtime, + 'tunnelUrl': self.tunnel_url, + } + + +class LivingUIManager: + """Manages Living UI project lifecycle.""" + + def __init__(self, workspace_root: Path, template_path: Path): + """ + Initialize the Living UI Manager. + + Args: + workspace_root: Root directory for Living UI projects + template_path: Path to the Living UI template + """ + self.workspace_root = Path(workspace_root) + self.template_path = Path(template_path) + self.projects: Dict[str, LivingUIProject] = {} + self._next_port = 3100 + self._port_range = (3100, 3199) + self._used_ports: set = set() + self._projects_file = self.workspace_root / 'living_ui_projects.json' + + # Task and trigger management (set via bind_task_manager) + self._task_manager: Optional["TaskManager"] = None + self._trigger_queue: Optional["TriggerQueue"] = None + + # Watchdog state + self._watchdog_task: Optional[asyncio.Task] = None + self._watchdog_running: bool = False + + # Ensure workspace directory exists + self.living_ui_dir = self.workspace_root / 'living_ui' + self.living_ui_dir.mkdir(parents=True, exist_ok=True) + + # Load existing projects + self._load_projects() + + def bind_task_manager( + self, + task_manager: "TaskManager", + trigger_queue: "TriggerQueue" + ) -> None: + """ + Bind the task manager and trigger queue for creating development tasks. + + Args: + task_manager: TaskManager instance for creating tasks + trigger_queue: TriggerQueue instance for firing triggers + """ + self._task_manager = task_manager + self._trigger_queue = trigger_queue + logger.info("[LIVING_UI] Task manager and trigger queue bound") + + # ======================================================================== + # Watchdog - monitors running projects and restarts crashed processes + # ======================================================================== + + WATCHDOG_INTERVAL = 30 # seconds between checks + WATCHDOG_RETRY_DELAYS = [5, 15, 30] # seconds to wait between restart attempts + + def start_watchdog(self) -> None: + """Start the background watchdog that monitors running projects.""" + if self._watchdog_running: + logger.warning("[LIVING_UI:WATCHDOG] Already running") + return + + self._watchdog_running = True + self._watchdog_task = asyncio.create_task(self._watchdog_loop()) + logger.info("[LIVING_UI:WATCHDOG] Started") + + async def stop_watchdog(self) -> None: + """Stop the background watchdog.""" + if not self._watchdog_running: + return + + self._watchdog_running = False + if self._watchdog_task: + self._watchdog_task.cancel() + try: + await self._watchdog_task + except asyncio.CancelledError: + pass + self._watchdog_task = None + logger.info("[LIVING_UI:WATCHDOG] Stopped") + + async def _watchdog_loop(self) -> None: + """ + Background loop that checks all running projects for dead processes. + + On detecting a crash: + 1. Attempts silent restart (up to 3 retries with increasing delays) + 2. If all retries fail, sets status to 'error' and creates an agent + task to investigate and fix the issue + """ + retry_counts: Dict[str, int] = {} # project_id -> consecutive failures + + # Initial delay to let everything settle after startup + await asyncio.sleep(10) + + while self._watchdog_running: + try: + await asyncio.sleep(self.WATCHDOG_INTERVAL) + + for project_id, project in list(self.projects.items()): + if project.status != 'running': + # Clear retry count if project is no longer running + retry_counts.pop(project_id, None) + continue + + backend_dead = ( + project.backend_process is not None + and project.backend_process.poll() is not None + ) + frontend_dead = ( + project.process is not None + and project.process.poll() is not None + ) + + # Also check via port if process handles are None + # (can happen if manager was reloaded but processes survived) + if not backend_dead and project.backend_port: + if project.backend_process is None and not self._is_port_in_use(project.backend_port): + backend_dead = True + if not frontend_dead and project.port: + if project.process is None and not self._is_port_in_use(project.port): + frontend_dead = True + + if not backend_dead and not frontend_dead: + # Everything healthy, reset retry counter + if project_id in retry_counts: + logger.info(f"[LIVING_UI:WATCHDOG] {project.name} ({project_id}) recovered") + retry_counts.pop(project_id) + continue + + # Something is dead + retries = retry_counts.get(project_id, 0) + crash_target = [] + if backend_dead: + crash_target.append("backend") + if frontend_dead: + crash_target.append("frontend") + crash_str = " + ".join(crash_target) + + if retries >= len(self.WATCHDOG_RETRY_DELAYS): + # Exhausted retries — escalate to agent + logger.error( + f"[LIVING_UI:WATCHDOG] {project.name} ({project_id}) " + f"{crash_str} crashed, all {retries} restart attempts failed. Escalating to agent." + ) + await self._escalate_crash(project_id, crash_target) + retry_counts.pop(project_id, None) + continue + + delay = self.WATCHDOG_RETRY_DELAYS[retries] + retry_counts[project_id] = retries + 1 + logger.warning( + f"[LIVING_UI:WATCHDOG] {project.name} ({project_id}) " + f"{crash_str} crashed. Restart attempt {retries + 1}/{len(self.WATCHDOG_RETRY_DELAYS)} " + f"in {delay}s..." + ) + + await asyncio.sleep(delay) + + # Attempt restart + restart_ok = True + if backend_dead: + project.backend_process = None + success = await self.launch_backend(project_id) + if not success: + logger.error(f"[LIVING_UI:WATCHDOG] Backend restart failed for {project_id}") + restart_ok = False + + if frontend_dead: + project.process = None + success = await self._relaunch_frontend(project_id) + if not success: + logger.error(f"[LIVING_UI:WATCHDOG] Frontend restart failed for {project_id}") + restart_ok = False + + if restart_ok: + logger.info(f"[LIVING_UI:WATCHDOG] {project.name} ({project_id}) restarted successfully") + retry_counts.pop(project_id, None) + self._save_projects() + + except asyncio.CancelledError: + break + except Exception as e: + logger.error(f"[LIVING_UI:WATCHDOG] Unexpected error: {e}") + await asyncio.sleep(self.WATCHDOG_INTERVAL) + + async def _relaunch_frontend(self, project_id: str) -> bool: + """ + Relaunch just the frontend process for a project. + + Lightweight alternative to launch_project — reuses existing port, + skips npm install, doesn't touch backend. + """ + project = self.projects.get(project_id) + if not project: + return False + + project_path = Path(project.path) + port = project.port + if not port: + return False + + # Kill anything on the port first + if self._is_port_in_use(port): + self._kill_process_on_port(port) + await asyncio.sleep(1) + + try: + # Open timestamped log file for subprocess output + frontend_log = self._create_frontend_log(project_path) + frontend_log_handle = open(frontend_log, 'a', encoding='utf-8') + frontend_log_handle.write( + f"\n{'='*60}\n[{datetime.now().isoformat()}] " + f"Relaunching frontend on port {port}\n{'='*60}\n" + ) + frontend_log_handle.flush() + + process = subprocess.Popen( + ['npm', 'run', 'preview', '--', '--port', str(port)], + cwd=str(project_path), + stdout=frontend_log_handle, + stderr=frontend_log_handle, + shell=True if os.name == 'nt' else False, + ) + + project.process = process + + server_ready = await self._wait_for_server(port, timeout=15) + if not server_ready: + frontend_log_handle.flush() + try: + recent = frontend_log.read_text(encoding='utf-8')[-500:] + except Exception: + recent = '' + logger.error(f"[LIVING_UI] Frontend relaunch failed for {project_id}. Log tail:\n{recent}") + if process.poll() is None: + process.terminate() + project.process = None + frontend_log_handle.close() + return False + + project.url = f"http://localhost:{port}" + logger.info(f"[LIVING_UI] Frontend relaunched for {project_id} on port {port}") + return True + + except Exception as e: + logger.error(f"[LIVING_UI] Frontend relaunch error for {project_id}: {e}") + return False + + async def _escalate_crash(self, project_id: str, crash_targets: List[str]) -> None: + """ + Escalate a crash to the agent by creating a fix task. + + Called after all silent restart attempts have failed. + Reads crash logs and creates an agent task with full context. + """ + project = self.projects.get(project_id) + if not project: + return + + # Collect crash log tails + project_path = Path(project.path) + log_snippets = [] + + # Backend logs + backend_subprocess_log = project_path / 'backend' / 'logs' / 'subprocess_output.log' + if backend_subprocess_log.exists(): + try: + content = backend_subprocess_log.read_text(encoding='utf-8') + log_snippets.append(f"=== Backend subprocess log (last 1000 chars) ===\n{content[-1000:]}") + except Exception: + pass + + # Backend app-level logs (most recent session) + backend_logs_dir = project_path / 'backend' / 'logs' + if backend_logs_dir.exists(): + session_logs = sorted(backend_logs_dir.glob("backend_*.log"), reverse=True) + if session_logs: + try: + content = session_logs[0].read_text(encoding='utf-8') + log_snippets.append(f"=== Backend session log (last 1000 chars) ===\n{content[-1000:]}") + except Exception: + pass + + # Health status + health_status_file = project_path / 'backend' / 'logs' / 'health_status.json' + if health_status_file.exists(): + try: + log_snippets.append(f"=== Health status ===\n{health_status_file.read_text(encoding='utf-8')}") + except Exception: + pass + + # Frontend logs (most recent session) + frontend_logs_dir = project_path / 'logs' + if frontend_logs_dir.exists(): + frontend_logs = sorted(frontend_logs_dir.glob("frontend_*.log"), reverse=True) + if frontend_logs: + try: + content = frontend_logs[0].read_text(encoding='utf-8') + log_snippets.append(f"=== Frontend log (last 1000 chars) ===\n{content[-1000:]}") + except Exception: + pass + + crash_str = " and ".join(crash_targets) + all_logs = "\n\n".join(log_snippets) if log_snippets else "(no logs found)" + + # Update project status + project.status = 'error' + project.error = f'{crash_str} crashed after {len(self.WATCHDOG_RETRY_DELAYS)} restart attempts' + project.process = None + project.backend_process = None + self._save_projects() + + # Create agent task to investigate and fix + if not self._task_manager or not self._trigger_queue: + logger.error("[LIVING_UI:WATCHDOG] Cannot escalate — task manager or trigger queue not bound") + return + + from app.trigger import Trigger + + task_instruction = f"""Fix a crashed Living UI application. + +Project ID: {project.id} +Project Name: {project.name} +Project Path: {project.path} +Crashed components: {crash_str} + +The Living UI {crash_str} process(es) crashed and {len(self.WATCHDOG_RETRY_DELAYS)} automatic restart attempts all failed. +This means the code likely has a bug that prevents the server from running. + +CRASH LOGS: +{all_logs} + +STEPS: +1. Read the crash logs above to identify the root cause +2. Navigate to the project path and fix the code +3. Use living_ui_restart with project_id="{project.id}" to restart the project +4. Verify the project is running by checking that the restart succeeded + +Follow the living-ui-creator skill instructions for the project structure. +The backend is a FastAPI app at {project.path}/backend/main.py +The frontend is a Vite+React app at {project.path}/frontend/""" + + try: + task_id = self._task_manager.create_task( + task_name=f"Fix crashed Living UI: {project.name}", + task_instruction=task_instruction, + mode="complex", + action_sets=["file_operations", "code_execution", "living_ui", "core"], + selected_skills=["living-ui-creator"], + ) + + trigger = Trigger( + fire_at=time.time(), + priority=30, # Higher priority than normal creation tasks + next_action_description=f"[Living UI] Fix crash: {project.name}", + session_id=task_id, + payload={ + "type": "living_ui_crash_fix", + "project_id": project_id, + }, + ) + await self._trigger_queue.put(trigger) + + project.task_id = task_id + self._save_projects() + logger.info( + f"[LIVING_UI:WATCHDOG] Created fix task {task_id} for {project.name} ({project_id})" + ) + except Exception as e: + logger.error(f"[LIVING_UI:WATCHDOG] Failed to create fix task: {e}") + + def _load_projects(self) -> None: + """Load projects from persistent storage.""" + if self._projects_file.exists(): + try: + with open(self._projects_file, 'r') as f: + data = json.load(f) + for project_data in data.get('projects', []): + project = LivingUIProject( + id=project_data['id'], + name=project_data['name'], + description=project_data.get('description', ''), + path=project_data['path'], + status=project_data.get('status', 'stopped'), + port=project_data.get('port'), + backend_port=project_data.get('backendPort'), + created_at=project_data.get('createdAt', datetime.now().timestamp()) / 1000, + features=project_data.get('features', []), + theme=project_data.get('theme', 'system'), + auto_launch=project_data.get('autoLaunch', False), + log_cleanup=project_data.get('logCleanup', True), + project_type=project_data.get('projectType', 'native'), + app_runtime=project_data.get('appRuntime'), + ) + # Check if saved tunnel URL is still reachable + saved_tunnel = project_data.get('tunnelUrl') + if saved_tunnel: + try: + import urllib.request + req = urllib.request.Request(saved_tunnel, method='HEAD') + urllib.request.urlopen(req, timeout=3) + project.tunnel_url = saved_tunnel + logger.info(f"[LIVING_UI] Tunnel still active for '{project.name}': {saved_tunnel}") + except Exception: + logger.info(f"[LIVING_UI] Tunnel expired for '{project.name}', clearing") + project.tunnel_url = None + # Reset status to stopped for all loaded projects + project.status = 'stopped' if project.status == 'running' else project.status + self.projects[project.id] = project + # Track both frontend and backend ports + if project.port: + self._used_ports.add(project.port) + if project.backend_port: + self._used_ports.add(project.backend_port) + logger.info(f"[LIVING_UI] Loaded {len(self.projects)} projects") + except Exception as e: + logger.error(f"[LIVING_UI] Failed to load projects: {e}") + + def _save_projects(self) -> None: + """Save projects to persistent storage.""" + try: + data = { + 'projects': [p.to_dict() for p in self.projects.values()] + } + with open(self._projects_file, 'w') as f: + json.dump(data, f, indent=2) + except Exception as e: + logger.error(f"[LIVING_UI] Failed to save projects: {e}") + + def _allocate_port(self) -> int: + """Allocate a free port for a Living UI project. + + Checks both the internal tracking set AND actual system port usage + to avoid conflicts with orphan processes. + """ + for port in range(self._port_range[0], self._port_range[1] + 1): + # Skip if tracked as used + if port in self._used_ports: + continue + # Skip if actually in use on the system + if self._is_port_in_use(port): + logger.warning(f"[LIVING_UI] Port {port} in use by external process, skipping") + continue + self._used_ports.add(port) + return port + raise RuntimeError("No available ports in the Living UI port range") + + def _release_port(self, port: int) -> None: + """Release a port back to the pool.""" + self._used_ports.discard(port) + + def _is_port_in_use(self, port: int) -> bool: + """Check if a port is actually in use on the system.""" + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(0.5) + return s.connect_ex(('localhost', port)) == 0 + + def _get_pids_on_ports(self, ports_to_check: Optional[Set[int]] = None) -> Dict[int, str]: + """ + Get PIDs of processes listening on ports in the Living UI range. + Uses a single system call for efficiency. + + Args: + ports_to_check: Optional set of specific ports to check. + If None, checks all ports in the Living UI range. + + Returns: + Dict mapping port numbers to PIDs + """ + port_pids = {} + + if os.name == 'nt': + # Windows: run netstat once and parse all results + try: + result = subprocess.run( + ['netstat', '-ano'], + capture_output=True, + text=True, + shell=True, + timeout=5 + ) + for line in result.stdout.split('\n'): + if 'LISTENING' in line: + parts = line.split() + if len(parts) >= 5: + addr = parts[1] + pid = parts[-1] + if ':' in addr: + try: + port = int(addr.split(':')[-1]) + # Check if port is in range and optionally in the filter set + if self._port_range[0] <= port <= self._port_range[1]: + if ports_to_check is None or port in ports_to_check: + port_pids[port] = pid + except ValueError: + pass + except Exception as e: + logger.warning(f"[LIVING_UI] Failed to get ports via netstat: {e}") + else: + # Linux/Mac: use lsof + try: + result = subprocess.run( + ['lsof', '-i', '-P', '-n'], + capture_output=True, + text=True, + timeout=5 + ) + for line in result.stdout.split('\n'): + if 'LISTEN' in line: + parts = line.split() + if len(parts) >= 2: + # PID is typically the second column + pid = parts[1] + # Find the port in the line + for part in parts: + if ':' in part: + try: + port = int(part.split(':')[-1]) + if self._port_range[0] <= port <= self._port_range[1]: + if ports_to_check is None or port in ports_to_check: + port_pids[port] = pid + break + except ValueError: + pass + except Exception as e: + logger.warning(f"[LIVING_UI] Failed to get ports via lsof: {e}") + + return port_pids + + def _kill_process_by_pid(self, pid: str) -> bool: + """ + Kill a process by its PID. + + Args: + pid: Process ID to kill + + Returns: + True if process was killed, False otherwise + """ + try: + if os.name == 'nt': + subprocess.run( + ['taskkill', '/F', '/PID', pid], + capture_output=True, + shell=True + ) + else: + subprocess.run(['kill', '-9', pid], capture_output=True) + return True + except Exception as e: + logger.warning(f"[LIVING_UI] Failed to kill process {pid}: {e}") + return False + + async def _wait_for_server(self, port: int, timeout: int = 10) -> bool: + """ + Wait for a server to start listening on a port. + + Args: + port: The port to check + timeout: Maximum seconds to wait + + Returns: + True if server is responding, False if timeout + """ + for _ in range(timeout * 2): + if self._is_port_in_use(port): + return True + await asyncio.sleep(0.5) + return False + + async def _wait_for_health_check(self, url: str, timeout: int = 15) -> bool: + """ + Wait for a server's health endpoint to respond. + + Args: + url: The health check URL (e.g., http://localhost:3101/health) + timeout: Maximum seconds to wait + + Returns: + True if health check passes, False if timeout + """ + import urllib.request + import urllib.error + + for _ in range(timeout * 2): + try: + req = urllib.request.Request(url, method='GET') + with urllib.request.urlopen(req, timeout=2) as response: + if response.status == 200: + return True + except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError, OSError): + pass + await asyncio.sleep(0.5) + return False + + async def _run_backend_tests(self, project_id: str, mode: str, port: int = 0) -> bool: + """ + Run backend tests using test_runner.py. + + Args: + project_id: Project ID to test + mode: "internal" (pre-server) or "external" (post-server HTTP tests) + port: Backend port (required for external mode) + + Returns: + True if all tests pass, False otherwise + """ + project = self.projects.get(project_id) + if not project: + return False + + backend_path = Path(project.path) / 'backend' + test_runner = backend_path / 'test_runner.py' + if not test_runner.exists(): + logger.warning(f"[LIVING_UI] No test_runner.py for {project_id}, skipping {mode} tests") + return True # No tests = pass (backwards compat with older projects) + + logger.info(f"[LIVING_UI] Running {mode} tests for {project.name} ({project_id})...") + + cmd = [sys.executable, str(test_runner), f'--{mode}'] + if mode == 'external' and port: + cmd.extend(['--port', str(port)]) + + try: + proc = await asyncio.create_subprocess_exec( + *cmd, + cwd=str(backend_path), + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=60) + + stdout_str = stdout.decode('utf-8', errors='replace').strip() + stderr_str = stderr.decode('utf-8', errors='replace').strip() + + if stderr_str: + # stderr contains the test runner's logging output + for line in stderr_str.split('\n')[-20:]: # Last 20 lines + logger.debug(f"[LIVING_UI:TEST] {line}") + + if proc.returncode == 0: + logger.info(f"[LIVING_UI] {mode.capitalize()} tests passed for {project_id}") + return True + else: + # Read the detailed results file + if mode == 'internal': + results_file = backend_path / 'logs' / 'test_discovery.json' + else: + results_file = backend_path / 'logs' / 'test_results.json' + + error_details = '' + if results_file.exists(): + try: + results = json.loads(results_file.read_text(encoding='utf-8')) + errors = results.get('errors', []) + error_details = '; '.join( + f"[{e.get('test', '?')}] {e.get('error', '?')}" for e in errors[:5] + ) + except Exception: + pass + + logger.error( + f"[LIVING_UI] {mode.capitalize()} tests failed for {project_id}: {error_details or stderr_str[-500:]}" + ) + return False + + except asyncio.TimeoutError: + logger.error(f"[LIVING_UI] {mode.capitalize()} tests timed out for {project_id}") + return False + except Exception as e: + logger.error(f"[LIVING_UI] Failed to run {mode} tests for {project_id}: {e}") + return False + + # ======================================================================== + # Manifest-driven launch pipeline + # ======================================================================== + + async def launch_and_verify(self, project_id: str) -> dict: + """ + Launch and verify a Living UI project using its manifest pipeline. + + Runs backend and frontend tracks in parallel to collect all errors at once. + Only starts servers if all pre-start checks pass. + + Dependency graph: + pip install ──→ internal tests ──→ unit + compatibility tests (parallel) + npm install ──→ npm run build + Both tracks run in parallel. If ANY errors, return all without starting servers. + If clean: start backend → health check → external tests → start frontend. + + Returns: + {"status": "success", "url": "...", "backend_url": "...", "port": N} + {"status": "error", "step": "validation", "errors": [...all errors...]} + """ + project = self.projects.get(project_id) + if not project: + return {"status": "error", "step": "setup", "errors": [f"Project not found: {project_id}"]} + + project_path = Path(project.path) + if not project_path.exists(): + return {"status": "error", "step": "setup", "errors": [f"Project path not found: {project.path}"]} + + # Load manifest + manifest_path = project_path / 'config' / 'manifest.json' + if not manifest_path.exists(): + return {"status": "error", "step": "setup", "errors": ["config/manifest.json not found"]} + + try: + # Ensure ports are allocated and available + if not project.port: + project.port = self._allocate_port() + if not project.backend_port: + project.backend_port = self._allocate_port() + + # Read manifest and resolve ports — always use project's current ports + # regardless of what's hardcoded in the manifest file + manifest_raw = manifest_path.read_text(encoding='utf-8') + + # Extract old ports from manifest to do replacement + manifest_tmp = json.loads(manifest_raw) + old_ports = manifest_tmp.get('ports', {}) + old_frontend = str(old_ports.get('frontend', old_ports.get('app', ''))) + old_backend = str(old_ports.get('backend', '')) + + # Replace old ports with current allocated ports in manifest and source files + if old_frontend and old_frontend != str(project.port): + manifest_raw = manifest_raw.replace(old_frontend, str(project.port)) + if old_backend and old_backend != str(project.backend_port): + manifest_raw = manifest_raw.replace(old_backend, str(project.backend_port)) + + manifest = json.loads(manifest_raw) + + # Write updated manifest back to disk so frontend can read correct ports + if old_frontend != str(project.port) or old_backend != str(project.backend_port): + manifest_path.write_text(json.dumps(manifest, indent=2), encoding='utf-8') + logger.info(f"[LIVING_UI:PIPELINE] Updated manifest ports: frontend={project.port}, backend={project.backend_port}") + except Exception as e: + return {"status": "error", "step": "setup", "errors": [f"Failed to parse manifest: {e}"]} + + pipeline = manifest.get('pipeline', {}) + if not pipeline: + return {"status": "error", "step": "setup", "errors": ["No pipeline defined in manifest"]} + + logger.info(f"[LIVING_UI:PIPELINE] Starting launch pipeline for {project.name} ({project_id})") + + # Ensure index.html has the CraftBot theme sync listener (self-healing for older installs) + self._patch_theme_listener(project_path) + + # Check for single-process mode (external apps) + app_cfg = pipeline.get('app') + if app_cfg: + return await self._launch_single_process(project_id, project, project_path, app_cfg) + + # Stop any existing processes from previous launch attempts + # This prevents orphan uvicorn/vite processes accumulating on repeated calls + if project.backend_process and project.backend_process.poll() is None: + logger.info(f"[LIVING_UI:PIPELINE] Killing existing backend process before relaunch") + project.backend_process.terminate() + project.backend_process = None + if project.process and project.process.poll() is None: + logger.info(f"[LIVING_UI:PIPELINE] Killing existing frontend process before relaunch") + project.process.terminate() + project.process = None + + # Check if source files changed since last successful launch + files_changed = self._has_files_changed(project_path) + + if not files_changed: + logger.info(f"[LIVING_UI:PIPELINE] No source changes detected — skipping tests/build, starting servers directly") + # Fast path — just start servers + return await self._launch_servers_only(project_id, project, project_path, pipeline) + + # Clean up old log files so each launch starts fresh (if enabled) + if project.log_cleanup: + self._cleanup_project_logs(project_path) + + # ================================================================ + # PHASE 1: Parallel validation (collect ALL errors before starting) + # ================================================================ + + backend_cfg = pipeline.get('backend') + frontend_cfg = pipeline.get('frontend') + + # Run backend and frontend validation tracks in parallel + backend_task = None + frontend_task = None + + if backend_cfg: + backend_cwd = project_path / backend_cfg.get('cwd', 'backend') + backend_task = asyncio.create_task( + self._validate_backend_track(project_id, project_path, backend_cfg, backend_cwd) + ) + + if frontend_cfg: + frontend_cwd = project_path / frontend_cfg.get('cwd', '.') + if str(frontend_cwd) == '.': + frontend_cwd = project_path + frontend_task = asyncio.create_task( + self._validate_frontend_track(project_id, frontend_cfg, frontend_cwd) + ) + + # Wait for both tracks to complete + all_errors: List[str] = [] + + if backend_task: + backend_errors = await backend_task + all_errors.extend(backend_errors) + + if frontend_task: + frontend_errors = await frontend_task + all_errors.extend(frontend_errors) + + # If ANY errors from either track, return them all at once + if all_errors: + logger.error(f"[LIVING_UI:PIPELINE] Validation failed with {len(all_errors)} error(s)") + for err in all_errors[:10]: + logger.error(f"[LIVING_UI:PIPELINE] {err}") + project.status = 'error' + project.error = f'{len(all_errors)} validation error(s)' + self._save_projects() + return {"status": "error", "step": "validation", "errors": all_errors} + + logger.info(f"[LIVING_UI:PIPELINE] All validation passed, starting servers...") + + # ================================================================ + # PHASE 2: Start servers (sequential — needs running processes) + # ================================================================ + + # --- Start backend --- + if backend_cfg: + backend_cwd = project_path / backend_cfg.get('cwd', 'backend') + backend_port = project.backend_port + if not backend_port: + backend_port = self._allocate_port() + project.backend_port = backend_port + + if not await self._ensure_port_available(backend_port): + return {"status": "error", "step": "backend.port", "errors": [f"Port {backend_port} is occupied and could not be freed"]} + + start_cmd = backend_cfg.get('start', '') + if not start_cmd: + return {"status": "error", "step": "backend.start", "errors": ["No start command in manifest"]} + + logs_dir = backend_cwd / 'logs' + logs_dir.mkdir(parents=True, exist_ok=True) + log_file = logs_dir / 'subprocess_output.log' + + # Generate bridge token for integration proxy + from uuid import uuid4 + project.bridge_token = str(uuid4()) + + backend_process = self._start_process(backend_cwd, start_cmd, log_file, port=backend_port, project=project) + project.backend_process = backend_process + logger.info(f"[LIVING_UI:PIPELINE] Backend starting on port {backend_port}") + + # Health check + health_url = backend_cfg.get('health') + if health_url: + healthy = await self._wait_for_health_check(health_url, timeout=20) + if not healthy: + log_tail = self._read_log_tail(log_file, 1000) + if backend_process.poll() is not None: + err = f"Backend process exited with code {backend_process.returncode}" + else: + err = f"Backend not responding at {health_url}" + backend_process.terminate() + project.backend_process = None + return {"status": "error", "step": "backend.health", "errors": [err, log_tail]} + + project.backend_url = f"http://localhost:{backend_port}" + logger.info(f"[LIVING_UI:PIPELINE] Backend healthy on port {backend_port}") + + # Post-start tests (external smoke tests) + for test in backend_cfg.get('post_start_tests', []): + result = await self._run_pipeline_command( + backend_cwd, test['command'], step_name=f"backend.post_start.{test['name']}" + ) + if result["status"] == "error" and test.get('required', True): + errors = self._collect_test_errors(project_path, test['name']) or result["errors"] + await self.stop_backend(project_id) + return {"status": "error", "step": f"backend.post_start.{test['name']}", "errors": errors} + + # --- Start frontend --- + if frontend_cfg: + frontend_cwd = project_path / frontend_cfg.get('cwd', '.') + if str(frontend_cwd) == '.': + frontend_cwd = project_path + + frontend_port = project.port + if not frontend_port: + frontend_port = self._allocate_port() + project.port = frontend_port + + if not await self._ensure_port_available(frontend_port): + await self.stop_backend(project_id) + return {"status": "error", "step": "frontend.port", "errors": [f"Port {frontend_port} is occupied and could not be freed"]} + + start_cmd = frontend_cfg.get('start', '') + if not start_cmd: + await self.stop_backend(project_id) + return {"status": "error", "step": "frontend.start", "errors": ["No start command in manifest"]} + + frontend_log = self._create_frontend_log(project_path) + + frontend_process = self._start_process(frontend_cwd, start_cmd, frontend_log, port=frontend_port) + project.process = frontend_process + project.port = frontend_port + logger.info(f"[LIVING_UI:PIPELINE] Frontend starting on port {frontend_port}") + + server_ready = await self._wait_for_server(frontend_port, timeout=15) + if not server_ready: + log_tail = self._read_log_tail(frontend_log, 1000) + if frontend_process.poll() is not None: + err = f"Frontend process exited with code {frontend_process.returncode}" + else: + err = f"Frontend not responding on port {frontend_port}" + frontend_process.terminate() + project.process = None + await self.stop_backend(project_id) + return {"status": "error", "step": "frontend.health", "errors": [err, log_tail]} + + project.url = f"http://localhost:{frontend_port}" + logger.info(f"[LIVING_UI:PIPELINE] Frontend ready on port {frontend_port}") + + # === SUCCESS === + project.status = 'running' + project.error = None + self._save_projects() + self._save_launch_timestamp(project_path) + + logger.info(f"[LIVING_UI:PIPELINE] Launch complete for {project.name} ({project_id})") + if project.url: + logger.info(f"[LIVING_UI:PIPELINE] Frontend: {project.url}") + if project.backend_url: + logger.info(f"[LIVING_UI:PIPELINE] Backend: {project.backend_url}") + + return { + "status": "success", + "url": project.url, + "backend_url": project.backend_url, + "port": project.port, + } + + async def _launch_servers_only( + self, project_id: str, project: 'LivingUIProject', project_path: Path, pipeline: dict + ) -> dict: + """Fast path: start servers without running tests/build (no source changes detected).""" + backend_cfg = pipeline.get('backend') + frontend_cfg = pipeline.get('frontend') + + # Start backend + if backend_cfg: + backend_cwd = project_path / backend_cfg.get('cwd', 'backend') + backend_port = project.backend_port + if not backend_port: + backend_port = self._allocate_port() + project.backend_port = backend_port + + if not await self._ensure_port_available(backend_port): + return {"status": "error", "step": "backend.port", "errors": [f"Port {backend_port} occupied"]} + + start_cmd = backend_cfg.get('start', '') + if start_cmd: + logs_dir = backend_cwd / 'logs' + logs_dir.mkdir(parents=True, exist_ok=True) + log_file = logs_dir / 'subprocess_output.log' + + # Generate bridge token for integration proxy + from uuid import uuid4 + project.bridge_token = str(uuid4()) + + backend_process = self._start_process(backend_cwd, start_cmd, log_file, port=backend_port, project=project) + project.backend_process = backend_process + logger.info(f"[LIVING_UI:PIPELINE] Backend starting on port {backend_port} (fast)") + + health_url = backend_cfg.get('health') + if health_url: + healthy = await self._wait_for_health_check(health_url, timeout=20) + if not healthy: + log_tail = self._read_log_tail(log_file, 1000) + if backend_process.poll() is not None: + err = f"Backend process exited with code {backend_process.returncode}" + else: + err = f"Backend not responding at {health_url}" + backend_process.terminate() + project.backend_process = None + return {"status": "error", "step": "backend.health", "errors": [err, log_tail]} + + project.backend_url = f"http://localhost:{backend_port}" + logger.info(f"[LIVING_UI:PIPELINE] Backend healthy on port {backend_port}") + + # Start frontend + if frontend_cfg: + frontend_cwd = project_path / frontend_cfg.get('cwd', '.') + if str(frontend_cwd) == '.': + frontend_cwd = project_path + + frontend_port = project.port + if not frontend_port: + frontend_port = self._allocate_port() + project.port = frontend_port + + if not await self._ensure_port_available(frontend_port): + await self.stop_backend(project_id) + return {"status": "error", "step": "frontend.port", "errors": [f"Port {frontend_port} occupied"]} + + start_cmd = frontend_cfg.get('start', '') + if start_cmd: + frontend_log = self._create_frontend_log(project_path) + frontend_process = self._start_process(frontend_cwd, start_cmd, frontend_log, port=frontend_port) + project.process = frontend_process + project.port = frontend_port + logger.info(f"[LIVING_UI:PIPELINE] Frontend starting on port {frontend_port} (fast)") + + server_ready = await self._wait_for_server(frontend_port, timeout=15) + if not server_ready: + log_tail = self._read_log_tail(frontend_log, 1000) + if frontend_process.poll() is not None: + err = f"Frontend process exited with code {frontend_process.returncode}" + else: + err = f"Frontend not responding on port {frontend_port}" + frontend_process.terminate() + project.process = None + await self.stop_backend(project_id) + return {"status": "error", "step": "frontend.health", "errors": [err, log_tail]} + + project.url = f"http://localhost:{frontend_port}" + logger.info(f"[LIVING_UI:PIPELINE] Frontend ready on port {frontend_port}") + + project.status = 'running' + project.error = None + self._save_projects() + self._save_launch_timestamp(project_path) + + logger.info(f"[LIVING_UI:PIPELINE] Fast launch complete for {project.name} ({project_id})") + return {"status": "success", "url": project.url, "backend_url": project.backend_url, "port": project.port} + + async def _validate_backend_track( + self, project_id: str, project_path: Path, backend_cfg: dict, backend_cwd: Path + ) -> List[str]: + """ + Run backend validation: install → internal tests → unit + compatibility tests (parallel). + Returns list of error strings (empty = all passed). + """ + errors: List[str] = [] + + # 1. Install + install_cmd = backend_cfg.get('install') + if install_cmd and backend_cwd.exists(): + result = await self._run_pipeline_command(backend_cwd, install_cmd, step_name="backend.install") + if result["status"] == "error": + errors.append(f"[backend.install] {result['errors'][0] if result.get('errors') else 'install failed'}") + return errors # Can't test without dependencies + + # 2. Internal tests (must run first — generates test_discovery.json) + tests = backend_cfg.get('tests', []) + internal_tests = [t for t in tests if t['name'] == 'internal'] + other_tests = [t for t in tests if t['name'] != 'internal'] + + for test in internal_tests: + result = await self._run_pipeline_command( + backend_cwd, test['command'], step_name=f"backend.tests.{test['name']}" + ) + if result["status"] == "error" and test.get('required', True): + detailed = self._collect_test_errors(project_path, test['name']) + errors.extend(detailed or result.get("errors", [])) + + # 3. Remaining tests in parallel (unit + compatibility) + if other_tests: + parallel_tasks = [] + for test in other_tests: + parallel_tasks.append( + self._run_pipeline_command( + backend_cwd, test['command'], step_name=f"backend.tests.{test['name']}" + ) + ) + results = await asyncio.gather(*parallel_tasks) + + for test, result in zip(other_tests, results): + if result["status"] == "error" and test.get('required', True): + detailed = self._collect_test_errors(project_path, test['name']) + errors.extend(detailed or result.get("errors", [])) + + return errors + + async def _validate_frontend_track( + self, project_id: str, frontend_cfg: dict, frontend_cwd: Path + ) -> List[str]: + """ + Run frontend validation: install → build. + Returns list of error strings (empty = all passed). + """ + errors: List[str] = [] + + # 1. Install + install_cmd = frontend_cfg.get('install') + if install_cmd: + needs_install = not (frontend_cwd / 'node_modules').exists() + if needs_install: + result = await self._run_pipeline_command(frontend_cwd, install_cmd, step_name="frontend.install") + if result["status"] == "error": + errors.append(f"[frontend.install] {result['errors'][0] if result.get('errors') else 'install failed'}") + return errors # Can't build without dependencies + + # 2. Build + build_cmd = frontend_cfg.get('build') + if build_cmd: + result = await self._run_pipeline_command(frontend_cwd, build_cmd, step_name="frontend.build", timeout=240) + if result["status"] == "error": + build_errors = result.get("errors", ["build failed"]) + for err in build_errors: + errors.append(f"[frontend.build] {err}") + + return errors + + async def _run_pipeline_command( + self, cwd: Path, command: str, step_name: str, timeout: int = 1200 + ) -> dict: + """Run a single pipeline command. Returns {"status": "success"} or {"status": "error", ...}.""" + # Replace bare `pip`/`python`/`python3` with the current interpreter so + # they work on Windows where these names may be absent from PATH. + if command.startswith("pip "): + command = f"{sys.executable} -m pip {command[4:]}" + elif command.startswith("python3 "): + command = f"{sys.executable} {command[8:]}" + elif command.startswith("python "): + command = f"{sys.executable} {command[7:]}" + + logger.info(f"[LIVING_UI:PIPELINE] [{step_name}] Running: {command}") + + try: + proc = await asyncio.create_subprocess_shell( + command, + cwd=str(cwd), + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout) + stdout_str = stdout.decode('utf-8', errors='replace').strip() + stderr_str = stderr.decode('utf-8', errors='replace').strip() + + if proc.returncode == 0: + logger.info(f"[LIVING_UI:PIPELINE] [{step_name}] OK") + return {"status": "success"} + else: + # Combine stdout and stderr for error context + output = (stderr_str or stdout_str)[-1000:] + logger.error(f"[LIVING_UI:PIPELINE] [{step_name}] FAILED (exit code {proc.returncode})") + return { + "status": "error", + "step": step_name, + "errors": [output] if output else [f"Command failed with exit code {proc.returncode}"], + } + except asyncio.TimeoutError: + logger.error(f"[LIVING_UI:PIPELINE] [{step_name}] TIMEOUT ({timeout}s)") + return {"status": "error", "step": step_name, "errors": [f"Command timed out after {timeout}s"]} + except Exception as e: + logger.error(f"[LIVING_UI:PIPELINE] [{step_name}] ERROR: {e}") + return {"status": "error", "step": step_name, "errors": [str(e)]} + + async def _ensure_port_available(self, port: int) -> bool: + """Ensure a port is available, killing orphan processes if needed.""" + if not self._is_port_in_use(port): + return True + + logger.warning(f"[LIVING_UI:PIPELINE] Port {port} in use, attempting to free") + self._kill_process_on_port(port) + await asyncio.sleep(1) + + if self._is_port_in_use(port): + logger.error(f"[LIVING_UI:PIPELINE] Could not free port {port}") + return False + return True + + def _start_process( + self, cwd: Path, command: str, log_file: Path, port: int = 0, + project: "LivingUIProject" = None, extra_env: dict = None, + ) -> subprocess.Popen: + """Start a background process with output redirected to a log file.""" + # Replace bare `pip`/`python`/`python3` with the current interpreter so + # they work on Windows where these names may be absent from PATH. + if command.startswith("pip "): + command = f"{sys.executable} -m pip {command[4:]}" + elif command.startswith("python3 "): + command = f"{sys.executable} {command[8:]}" + elif command.startswith("python "): + command = f"{sys.executable} {command[7:]}" + + log_file.parent.mkdir(parents=True, exist_ok=True) + log_handle = open(log_file, 'a', encoding='utf-8') + log_handle.write(f"\n{'='*60}\n[{datetime.now().isoformat()}] Starting: {command}\n{'='*60}\n") + log_handle.flush() + + # Build env with integration bridge vars if project provided + env = os.environ.copy() + if extra_env: + env.update(extra_env) + if project and project.bridge_token: + bridge_port = int(os.environ.get("BROWSER_PORT", "7926")) + env["CRAFTBOT_BRIDGE_URL"] = f"http://localhost:{bridge_port}" + env["CRAFTBOT_BRIDGE_TOKEN"] = project.bridge_token + logger.info(f"[LIVING_UI] Bridge env injected: URL=http://localhost:{bridge_port}, token={project.bridge_token[:8]}...") + else: + logger.warning(f"[LIVING_UI] No bridge token for process: project={'yes' if project else 'no'}, token={'yes' if project and project.bridge_token else 'no'}") + + if os.name == 'nt': + process = subprocess.Popen( + command, + cwd=str(cwd), + env=env, + stdout=log_handle, + stderr=log_handle, + shell=True, + creationflags=subprocess.CREATE_NO_WINDOW if hasattr(subprocess, 'CREATE_NO_WINDOW') else 0, + ) + else: + process = subprocess.Popen( + command, + cwd=str(cwd), + env=env, + stdout=log_handle, + stderr=log_handle, + shell=True, + ) + return process + + def _collect_test_errors(self, project_path: Path, test_name: str) -> List[str]: + """Read test result JSON files and extract error messages.""" + errors = [] + # Map test names to result files + file_map = { + "internal": "test_discovery.json", + "unit": "test_unit.json", + "compatibility": "test_compatibility.json", + "external": "test_results.json", + } + result_file = project_path / 'backend' / 'logs' / file_map.get(test_name, f"test_{test_name}.json") + if result_file.exists(): + try: + data = json.loads(result_file.read_text(encoding='utf-8')) + for err in data.get('errors', []): + errors.append(f"[{err.get('test', '?')}] {err.get('error', '?')}") + except Exception: + pass + return errors + + @staticmethod + def _cleanup_project_logs(project_path: Path) -> None: + """Clean up old log files so each launch/restart starts fresh.""" + log_files_to_clean = [ + project_path / 'backend' / 'logs' / 'subprocess_output.log', + project_path / 'backend' / 'logs' / 'frontend_console.log', + project_path / 'backend' / 'logs' / 'test_discovery.json', + project_path / 'backend' / 'logs' / 'test_unit.json', + project_path / 'backend' / 'logs' / 'test_compatibility.json', + project_path / 'backend' / 'logs' / 'test_results.json', + project_path / 'backend' / 'logs' / 'health_status.json', + project_path / 'logs' / 'frontend_output.log', # Legacy non-timestamped + project_path / 'backend' / 'logs' / 'latest.log', # Legacy pointer file + ] + for log_file in log_files_to_clean: + try: + if log_file.exists(): + log_file.unlink() + except Exception: + pass + # Clean up old session logs — keep only the 5 most recent of each type + backend_logs_dir = project_path / 'backend' / 'logs' + if backend_logs_dir.exists(): + session_logs = sorted(backend_logs_dir.glob("backend_*.log"), reverse=True) + for old_log in session_logs[5:]: + try: + old_log.unlink() + except Exception: + pass + frontend_logs_dir = project_path / 'logs' + if frontend_logs_dir.exists(): + session_logs = sorted(frontend_logs_dir.glob("frontend_*.log"), reverse=True) + for old_log in session_logs[5:]: + try: + old_log.unlink() + except Exception: + pass + + logger.debug(f"[LIVING_UI:PIPELINE] Cleaned up old log files") + + @staticmethod + def _create_frontend_log(project_path: Path) -> Path: + """Create a timestamped frontend log file path.""" + logs_dir = project_path / 'logs' + logs_dir.mkdir(parents=True, exist_ok=True) + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + return logs_dir / f"frontend_{timestamp}.log" + + @staticmethod + def _has_files_changed(project_path: Path) -> bool: + """Check if any source files changed since last successful launch.""" + last_launch_file = project_path / '.last_launch' + if not last_launch_file.exists(): + return True # No record = assume changed + + try: + last_launch_time = last_launch_file.stat().st_mtime + except Exception: + return True + + source_extensions = {'.py', '.ts', '.tsx', '.js', '.jsx', '.json', '.html', '.css', '.md'} + skip_dirs = {'node_modules', '__pycache__', 'dist', 'logs', '.git'} + + for filepath in project_path.rglob('*'): + if filepath.is_file() and filepath.suffix in source_extensions: + if any(skip in filepath.parts for skip in skip_dirs): + continue + if filepath.stat().st_mtime > last_launch_time: + return True + return False + + @staticmethod + def _patch_theme_listener(project_path: Path) -> None: + """Inject CraftBot theme-sync listener into index.html if not already present.""" + index_html = project_path / 'index.html' + if not index_html.exists(): + return + try: + content = index_html.read_text(encoding='utf-8') + if 'craftbot-theme-request' in content: + return # Already patched + snippet = ( + '\n \n' + ' \n' + ) + patched = content.replace('', snippet + '', 1) + index_html.write_text(patched, encoding='utf-8') + logger.info(f"[LIVING_UI] Patched theme listener into {index_html}") + except Exception as e: + logger.warning(f"[LIVING_UI] Could not patch index.html: {e}") + + @staticmethod + def _save_launch_timestamp(project_path: Path) -> None: + """Save current time as last successful launch timestamp.""" + last_launch_file = project_path / '.last_launch' + try: + last_launch_file.write_text(datetime.now().isoformat(), encoding='utf-8') + except Exception: + pass + + @staticmethod + def _read_log_tail(log_file: Path, chars: int = 1000) -> str: + """Read the last N characters of a log file.""" + try: + content = log_file.read_text(encoding='utf-8') + return content[-chars:] if len(content) > chars else content + except Exception: + return '(could not read log)' + + async def launch_backend(self, project_id: str) -> bool: + """ + Launch the backend (FastAPI) server for a Living UI project. + + The backend holds all state and persists to SQLite. + It should be launched before the frontend. + + Args: + project_id: Project ID to launch backend for + + Returns: + True if backend launch was successful + """ + project = self.projects.get(project_id) + if not project: + logger.error(f"[LIVING_UI] Project not found: {project_id}") + return False + + project_path = Path(project.path) + backend_path = project_path / 'backend' + + if not backend_path.exists(): + logger.warning(f"[LIVING_UI] No backend directory for {project_id}") + return True # Not an error, just no backend + + # If backend port is occupied, allocate a new one instead of killing + backend_port = project.backend_port + if backend_port and self._is_port_in_use(backend_port): + logger.info(f"[LIVING_UI] Port {backend_port} occupied, allocating a new port...") + self._release_port(backend_port) + backend_port = self._allocate_port() + project.backend_port = backend_port + logger.info(f"[LIVING_UI] Allocated new backend port: {backend_port}") + + # Allocate port if needed + if not backend_port: + backend_port = self._allocate_port() + project.backend_port = backend_port + + try: + # Start the FastAPI backend using uvicorn + logger.info(f"[LIVING_UI] Starting backend for {project_id} on port {backend_port}") + + # Backend has its own file-based logger (logger.py in template), + # but also capture subprocess stdout/stderr to a fallback log file + # so we can diagnose startup crashes before the app logger initializes + logs_dir = backend_path / 'logs' + logs_dir.mkdir(parents=True, exist_ok=True) + subprocess_log = logs_dir / 'subprocess_output.log' + subprocess_log_handle = open(subprocess_log, 'a', encoding='utf-8') + subprocess_log_handle.write(f"\n{'='*60}\n[{datetime.now().isoformat()}] Starting uvicorn on port {backend_port}\n{'='*60}\n") + subprocess_log_handle.flush() + + # Generate bridge token for integration proxy + from uuid import uuid4 + bridge_token = str(uuid4()) + project.bridge_token = bridge_token + + # Build env with integration bridge vars + bridge_port = int(os.environ.get("BROWSER_PORT", "7926")) + backend_env = os.environ.copy() + backend_env["CRAFTBOT_BRIDGE_URL"] = f"http://localhost:{bridge_port}" + backend_env["CRAFTBOT_BRIDGE_TOKEN"] = bridge_token + + # Use python -m uvicorn to run the backend + if os.name == 'nt': + # Windows + backend_process = subprocess.Popen( + [sys.executable, '-m', 'uvicorn', 'main:app', '--host', '0.0.0.0', '--port', str(backend_port)], + cwd=str(backend_path), + env=backend_env, + stdout=subprocess_log_handle, + stderr=subprocess_log_handle, + shell=True, + creationflags=subprocess.CREATE_NO_WINDOW if hasattr(subprocess, 'CREATE_NO_WINDOW') else 0, + ) + else: + # Linux/Mac + backend_process = subprocess.Popen( + [sys.executable, '-m', 'uvicorn', 'main:app', '--host', '0.0.0.0', '--port', str(backend_port)], + cwd=str(backend_path), + env=backend_env, + stdout=subprocess_log_handle, + stderr=subprocess_log_handle, + ) + + project.backend_process = backend_process + + # Wait for health check to pass + health_url = f"http://localhost:{backend_port}/health" + logger.info(f"[LIVING_UI] Waiting for backend health check at {health_url}...") + backend_ready = await self._wait_for_health_check(health_url, timeout=20) + + if not backend_ready: + # Backend didn't start - read the subprocess log for diagnostics + subprocess_log_handle.flush() + try: + recent_output = subprocess_log.read_text(encoding='utf-8')[-1000:] + except Exception: + recent_output = '(could not read subprocess log)' + if backend_process.poll() is not None: + logger.error(f"[LIVING_UI] Backend process exited with code {backend_process.returncode}. Log tail:\n{recent_output}") + else: + logger.error(f"[LIVING_UI] Backend not responding on port {backend_port}. Log tail:\n{recent_output}") + backend_process.terminate() + project.backend_process = None + subprocess_log_handle.close() + return False + + project.backend_url = f"http://localhost:{backend_port}" + logger.info(f"[LIVING_UI] Backend started successfully on port {backend_port}") + return True + + except Exception as e: + logger.error(f"[LIVING_UI] Failed to launch backend: {e}") + return False + + async def stop_backend(self, project_id: str) -> bool: + """ + Stop the backend server for a Living UI project. + + Args: + project_id: Project ID to stop backend for + + Returns: + True if stop was successful + """ + project = self.projects.get(project_id) + if not project: + return False + + if project.backend_process: + self._terminate_process(project.backend_process) + project.backend_process = None + + # Also try to kill by port in case process reference is stale + if project.backend_port and self._is_port_in_use(project.backend_port): + self._kill_process_on_port(project.backend_port) + + project.backend_url = None + logger.info(f"[LIVING_UI] Stopped backend for {project_id}") + return True + + def _terminate_process(self, process: subprocess.Popen) -> None: + """Terminate a subprocess, killing the entire process tree on Windows.""" + try: + if os.name == 'nt': + # On Windows with shell=True, terminate() only kills cmd.exe, + # not the child python/uvicorn. Kill the whole tree via taskkill. + subprocess.run( + ['taskkill', '/T', '/F', '/PID', str(process.pid)], + capture_output=True, shell=True + ) + else: + process.terminate() + process.wait(timeout=5) + except (subprocess.TimeoutExpired, Exception): + try: + process.kill() + except Exception: + pass + + def _kill_process_on_port(self, port: int) -> bool: + """ + Kill any process listening on the specified port (Windows-specific). + + Args: + port: The port to free + + Returns: + True if a process was killed, False otherwise + """ + if os.name != 'nt': + # Linux/Mac: use lsof and kill + try: + result = subprocess.run( + ['lsof', '-ti', f':{port}'], + capture_output=True, + text=True + ) + if result.stdout.strip(): + pids = result.stdout.strip().split('\n') + for pid in pids: + subprocess.run(['kill', '-9', pid], capture_output=True) + logger.info(f"[LIVING_UI] Killed process(es) on port {port}") + return True + except Exception as e: + logger.warning(f"[LIVING_UI] Failed to kill process on port {port}: {e}") + return False + else: + # Windows: use netstat and taskkill + try: + result = subprocess.run( + ['netstat', '-ano'], + capture_output=True, + text=True, + shell=True + ) + killed = False + for line in result.stdout.split('\n'): + if f':{port}' in line and 'LISTENING' in line: + parts = line.split() + if len(parts) >= 5: + pid = parts[-1] + # /T kills entire process tree (shell + child processes) + subprocess.run( + ['taskkill', '/T', '/F', '/PID', pid], + capture_output=True, + shell=True + ) + logger.info(f"[LIVING_UI] Killed process tree {pid} on port {port}") + killed = True + if killed: + return True + except Exception as e: + logger.warning(f"[LIVING_UI] Failed to kill process on port {port}: {e}") + return False + + def cleanup_on_startup(self) -> None: + """ + Clean up orphan processes and folders on startup. + + This should be called after loading projects to: + 1. Kill any orphan Living UI server processes on tracked ports (frontend + backend) + 2. Delete project folders not tracked in the registry + 3. Reset all project statuses to 'stopped' + + Optimized to: + - Only check ports that are tracked in projects (not all 100 ports) + - Use a single netstat call to get all port info at once + """ + logger.info("[LIVING_UI] Running startup cleanup...") + + # 1. Kill orphan processes - on both frontend and backend ports + killed_count = 0 + tracked_ports = set() + for p in self.projects.values(): + if p.port: + tracked_ports.add(p.port) + if p.backend_port: + tracked_ports.add(p.backend_port) + + if tracked_ports: + # Get all port -> PID mappings with a single system call + port_pids = self._get_pids_on_ports(tracked_ports) + + # Kill processes on tracked ports + for port, pid in port_pids.items(): + if self._kill_process_by_pid(pid): + killed_count += 1 + logger.info(f"[LIVING_UI] Killed process {pid} on port {port}") + + if killed_count > 0: + logger.info(f"[LIVING_UI] Killed {killed_count} orphan process(es)") + + # 2. Clean up orphan project folders + orphan_count = self._cleanup_orphan_folders() + if orphan_count > 0: + logger.info(f"[LIVING_UI] Removed {orphan_count} orphan folder(s)") + + # 3. Reset all project statuses to 'stopped' and clear process references + for project in self.projects.values(): + if project.status == 'running': + project.status = 'stopped' + project.process = None + project.backend_process = None + project.url = None + project.backend_url = None + self._save_projects() + + logger.info("[LIVING_UI] Startup cleanup complete") + + def _cleanup_orphan_folders(self) -> int: + """ + Delete project folders that are not tracked in the registry. + + Returns: + Number of orphan folders deleted + """ + if not self.living_ui_dir.exists(): + return 0 + + tracked_paths = {Path(p.path) for p in self.projects.values()} + orphan_count = 0 + + for folder in self.living_ui_dir.iterdir(): + if folder.is_dir() and folder not in tracked_paths: + try: + shutil.rmtree(folder) + logger.info(f"[LIVING_UI] Deleted orphan folder: {folder.name}") + orphan_count += 1 + except Exception as e: + logger.warning(f"[LIVING_UI] Failed to delete orphan folder {folder}: {e}") + + return orphan_count + + def _generate_id(self) -> str: + """Generate a unique project ID.""" + return str(uuid.uuid4())[:8] + + def _sanitize_name(self, name: str) -> str: + """Sanitize project name for use in file paths.""" + # Replace spaces and special characters + sanitized = ''.join(c if c.isalnum() or c in '-_' else '_' for c in name) + return sanitized.lower() + + async def create_project( + self, + name: str, + description: str, + features: List[str] = None, + data_source: Optional[str] = None, + theme: str = 'system' + ) -> LivingUIProject: + """ + Create a new Living UI project from template. + + Args: + name: Project name + description: Project description + features: List of requested features + data_source: Optional API URL or data source description + theme: UI theme (light, dark, system) + + Returns: + Created LivingUIProject instance + """ + project_id = self._generate_id() + sanitized_name = self._sanitize_name(name) + project_path = self.living_ui_dir / f"{sanitized_name}_{project_id}" + + # Allocate ports + frontend_port = self._allocate_port() + backend_port = self._allocate_port() + + # Copy template + try: + shutil.copytree(self.template_path, project_path) + logger.info(f"[LIVING_UI] Copied template to {project_path}") + except Exception as e: + self._release_port(frontend_port) + self._release_port(backend_port) + raise RuntimeError(f"Failed to copy template: {e}") + + # Replace template placeholders (including ports for source code) + self._replace_placeholders(project_path, { + '{{PROJECT_ID}}': project_id, + '{{PROJECT_NAME}}': name, + '{{PROJECT_DESCRIPTION}}': description, + '{{PORT}}': str(frontend_port), + '{{BACKEND_PORT}}': str(backend_port), + '{{THEME}}': theme, + '{{CREATED_AT}}': datetime.now().isoformat(), + '{{FEATURES}}': ', '.join(features or []), + }) + + # Create project instance + project = LivingUIProject( + id=project_id, + name=name, + description=description, + path=str(project_path), + status='created', + port=frontend_port, + backend_port=backend_port, + features=features or [], + theme=theme, + ) + + self.projects[project_id] = project + self._save_projects() + + logger.info(f"[LIVING_UI] Created project: {name} ({project_id})") + return project + + def _replace_placeholders(self, directory: Path, replacements: Dict[str, str]) -> None: + """Replace placeholders in all text files in directory.""" + text_extensions = {'.ts', '.tsx', '.js', '.jsx', '.json', '.html', '.css', '.md', '.py', '.txt', '.env'} + + for filepath in directory.rglob('*'): + if filepath.is_file() and filepath.suffix in text_extensions: + try: + content = filepath.read_text(encoding='utf-8') + modified = False + for placeholder, value in replacements.items(): + if placeholder in content: + content = content.replace(placeholder, value) + modified = True + if modified: + filepath.write_text(content, encoding='utf-8') + except Exception as e: + logger.warning(f"[LIVING_UI] Failed to process {filepath}: {e}") + + async def install_from_marketplace( + self, + app_id: str, + app_name: str, + app_description: str, + custom_fields: Optional[Dict[str, str]] = None, + repo_url: str = "https://github.com/CraftOS-dev/living-ui-marketplace", + ) -> Dict[str, Any]: + """ + Install a pre-built Living UI app from the marketplace. + + Downloads the app from a GitHub repo, sets up the project, + and runs the launch pipeline. + + Args: + app_id: The app folder name in the marketplace repo + custom_fields: Optional dict of custom placeholder replacements (e.g., {"APP_TITLE": "My Board"}) + app_name: Display name for the project + app_description: App description + repo_url: GitHub repo URL + + Returns: + Dict with status, project info, or error + """ + import urllib.request + import zipfile + import io + + project_id = self._generate_id() + sanitized_name = self._sanitize_name(app_name) + project_path = self.living_ui_dir / f"{sanitized_name}_{project_id}" + + try: + # Download the repo as a zip + # GitHub API: /{owner}/{repo}/zipball/main + parts = repo_url.rstrip('/').split('/') + owner = parts[-2] + repo = parts[-1] + zip_url = f"https://github.com/{owner}/{repo}/archive/refs/heads/main.zip" + + logger.info(f"[LIVING_UI:MARKETPLACE] Downloading {app_id} from {zip_url}") + + import ssl, certifi + ssl_ctx = ssl.create_default_context(cafile=certifi.where()) + req = urllib.request.Request(zip_url, headers={'User-Agent': 'CraftBot'}) + response = urllib.request.urlopen(req, timeout=60, context=ssl_ctx) + zip_data = response.read() + + # Extract just the app folder from the zip + with zipfile.ZipFile(io.BytesIO(zip_data)) as zf: + # GitHub zips have a root folder like "repo-main/" + root_prefix = None + app_prefix = None + + for name in zf.namelist(): + if root_prefix is None: + root_prefix = name.split('/')[0] + '/' + # Look for the app folder: root/{app_id}/ + if f'/{app_id}/' in name: + if app_prefix is None: + # Find the prefix up to and including the app folder + idx = name.index(f'{app_id}/') + app_prefix = name[:idx + len(app_id) + 1] + break + + if not app_prefix: + return {"status": "error", "error": f"App '{app_id}' not found in marketplace repo"} + + # Extract app files to project path + project_path.mkdir(parents=True, exist_ok=True) + for member in zf.namelist(): + if member.startswith(app_prefix) and not member.endswith('/'): + # Get the relative path within the app folder + rel_path = member[len(app_prefix):] + if rel_path: + target = project_path / rel_path + target.parent.mkdir(parents=True, exist_ok=True) + with zf.open(member) as src, open(target, 'wb') as dst: + dst.write(src.read()) + + logger.info(f"[LIVING_UI:MARKETPLACE] Extracted {app_id} to {project_path}") + + # Allocate ports + frontend_port = self._allocate_port() + backend_port = self._allocate_port() + + # Replace placeholders (marketplace apps use the same template placeholders) + # Build replacements — system placeholders + custom fields + replacements = { + '{{PROJECT_ID}}': project_id, + '{{PROJECT_NAME}}': app_name, + '{{PROJECT_DESCRIPTION}}': app_description, + '{{PORT}}': str(frontend_port), + '{{BACKEND_PORT}}': str(backend_port), + '{{THEME}}': 'system', + '{{CREATED_AT}}': datetime.now().isoformat(), + '{{FEATURES}}': '', + } + # Add custom fields from marketplace template (e.g., APP_TITLE) + if custom_fields: + for key, value in custom_fields.items(): + replacements[f'{{{{{key}}}}}'] = value + + self._replace_placeholders(project_path, replacements) + + # Create project instance + project = LivingUIProject( + id=project_id, + name=app_name, + description=app_description, + path=str(project_path), + status='created', + port=frontend_port, + backend_port=backend_port, + ) + + self.projects[project_id] = project + self._save_projects() + + logger.info(f"[LIVING_UI:MARKETPLACE] Created project: {app_name} ({project_id})") + + # Run the launch pipeline + result = await self.launch_and_verify(project_id) + + if result["status"] == "success": + return { + "status": "success", + "project": project.to_dict(), + "url": result.get("url"), + "backend_url": result.get("backend_url"), + } + else: + return { + "status": "error", + "error": f"Launch failed at {result.get('step', 'unknown')}: {'; '.join(result.get('errors', [])[:3])}", + "project": project.to_dict(), + } + + except urllib.error.URLError as e: + logger.error(f"[LIVING_UI:MARKETPLACE] Download failed: {e}") + return {"status": "error", "error": f"Failed to download from marketplace: {e}"} + except Exception as e: + logger.error(f"[LIVING_UI:MARKETPLACE] Install failed: {e}") + # Clean up on failure + if project_path.exists(): + try: + shutil.rmtree(project_path) + except Exception: + pass + return {"status": "error", "error": f"Installation failed: {e}"} + + def update_project_status(self, project_id: str, status: str, error: Optional[str] = None) -> None: + """Update project status.""" + if project_id in self.projects: + self.projects[project_id].status = status + if error: + self.projects[project_id].error = error + self._save_projects() + + def set_project_task(self, project_id: str, task_id: str) -> None: + """Associate a task ID with a project.""" + if project_id in self.projects: + self.projects[project_id].task_id = task_id + + def get_project_by_task_id(self, task_id: str) -> Optional["LivingUIProject"]: + """Return the Living UI project linked to a given task_id, or None.""" + if not task_id: + return None + for project in self.projects.values(): + if project.task_id == task_id: + return project + return None + + async def create_development_task(self, project_id: str) -> Optional[str]: + """ + Create a task for the agent to develop a Living UI and fire the trigger. + + This creates the task and immediately fires a trigger to start execution. + The pattern follows how memory processing and scheduled tasks work. + + Args: + project_id: The Living UI project ID to develop + + Returns: + The task ID if successful, None otherwise + """ + from app.trigger import Trigger + + project = self.projects.get(project_id) + if not project: + logger.error(f"[LIVING_UI] Project not found: {project_id}") + return None + + if not self._task_manager: + logger.error("[LIVING_UI] Task manager not bound") + return None + + if not self._trigger_queue: + logger.error("[LIVING_UI] Trigger queue not bound") + return None + + # Build the task instruction + features_str = ', '.join(project.features) if project.features else 'None specified' + from agent_core.core.prompts.application import LIVING_UI_TASK_INSTRUCTION + task_instruction = LIVING_UI_TASK_INSTRUCTION.format( + project_id=project.id, + project_name=project.name, + description=project.description, + features=features_str, + theme=project.theme, + project_path=project.path, + ) + + try: + # Create the task (synchronous method) + # Include living_ui action set so agent can call living_ui_notify_ready + task_id = self._task_manager.create_task( + task_name=f"Create Living UI: {project.name}", + task_instruction=task_instruction, + mode="complex", + action_sets=["file_operations", "code_execution", "living_ui", "core"], + selected_skills=["living-ui-creator"], + ) + + # Associate task with project + self.set_project_task(project_id, task_id) + + # Update project status + self.update_project_status(project_id, "creating") + + # Create and fire the trigger to start execution + trigger = Trigger( + fire_at=time.time(), + priority=50, + next_action_description=f"[Living UI] Create: {project.name}", + session_id=task_id, + payload={ + "type": "living_ui_development", + "project_id": project_id, + }, + ) + await self._trigger_queue.put(trigger) + + logger.info(f"[LIVING_UI] Created task {task_id} and fired trigger for project {project_id}") + return task_id + + except Exception as e: + logger.error(f"[LIVING_UI] Failed to create development task: {e}") + self.update_project_status(project_id, "error", str(e)) + return None + + async def launch_project(self, project_id: str) -> bool: + """ + Launch a Living UI project. + + Thin wrapper around launch_and_verify() that returns bool for + backwards compatibility (watchdog, auto_launch_projects, restart). + Includes stale status detection. + """ + project = self.projects.get(project_id) + if not project: + logger.error(f"[LIVING_UI] Project not found: {project_id}") + return False + + if project.status == 'running': + # Verify processes are actually alive before trusting the stored status + actually_alive = True + + if project.process is not None and project.process.poll() is not None: + logger.warning(f"[LIVING_UI] Frontend process dead for {project_id} (stale status)") + project.process = None + actually_alive = False + + if project.backend_process is not None and project.backend_process.poll() is not None: + logger.warning(f"[LIVING_UI] Backend process dead for {project_id} (stale status)") + project.backend_process = None + actually_alive = False + + if actually_alive and project.port and not self._is_port_in_use(project.port): + logger.warning(f"[LIVING_UI] Frontend port {project.port} not responding for {project_id}") + actually_alive = False + + if actually_alive: + logger.info(f"[LIVING_UI] Project already running: {project_id}") + return True + + # Status was stale — reset and fall through to full launch + logger.info(f"[LIVING_UI] Project {project_id} status was stale, relaunching...") + project.status = 'stopped' + project.url = None + project.backend_url = None + + result = await self.launch_and_verify(project_id) + return result["status"] == "success" + + # ------------------------------------------------------------------ + # Integration bridge helpers + # ------------------------------------------------------------------ + + # ------------------------------------------------------------------ + # External app support + # ------------------------------------------------------------------ + + async def _launch_single_process( + self, project_id: str, project: 'LivingUIProject', project_path: Path, app_cfg: dict + ) -> dict: + """Launch a single-process app with sidecar proxy for logging/health.""" + # Allocate two ports: proxy (user-facing) and app (internal) + proxy_port = project.port + if not proxy_port: + proxy_port = self._allocate_port() + project.port = proxy_port + + app_port = project.backend_port + if not app_port: + app_port = self._allocate_port() + project.backend_port = app_port + + if not await self._ensure_port_available(proxy_port): + return {"status": "error", "step": "app.port", "errors": [f"Port {proxy_port} occupied"]} + if not await self._ensure_port_available(app_port): + return {"status": "error", "step": "app.port", "errors": [f"Port {app_port} occupied"]} + + cwd = project_path / app_cfg.get('cwd', '.') + + # Install step (optional) + install_cmd = app_cfg.get('install', '') + if install_cmd: + logger.info(f"[LIVING_UI:PIPELINE] [app.install] Running: {install_cmd}") + result = await self._run_pipeline_command(cwd, install_cmd, "app.install") + if result["status"] == "error": + return result + + # Start the app on the internal port + start_cmd = app_cfg.get('start', '') + if not start_cmd: + return {"status": "error", "step": "app.start", "errors": ["No start command in manifest"]} + + logs_dir = project_path / 'logs' + logs_dir.mkdir(parents=True, exist_ok=True) + log_file = logs_dir / 'app_output.log' + + # Build extra env vars — use app_port for the app itself + extra_env = {} + for k, v in app_cfg.get('env', {}).items(): + extra_env[k] = str(v).replace('{{PORT}}', str(app_port)).replace('{{BACKEND_PORT}}', str(app_port)) + # Always override PORT with the internal app port — manifest may have a stale hardcoded value + extra_env['PORT'] = str(app_port) + + # Replace port placeholders in start command with internal app port + start_cmd = start_cmd.replace('{{PORT}}', str(app_port)).replace('{{BACKEND_PORT}}', str(app_port)) + + # Generate bridge token + from uuid import uuid4 + project.bridge_token = str(uuid4()) + + app_process = self._start_process(cwd, start_cmd, log_file, port=app_port, project=project, extra_env=extra_env) + project.app_process = app_process + logger.info(f"[LIVING_UI:PIPELINE] App starting on internal port {app_port}") + + # Health check on the app's internal port + health_cfg = app_cfg.get('health', {}) + # Replace port placeholders in health URL with app_port + if isinstance(health_cfg, dict) and 'url' in health_cfg: + health_cfg = dict(health_cfg) + health_cfg['url'] = health_cfg['url'].replace('{{PORT}}', str(app_port)).replace('{{BACKEND_PORT}}', str(app_port)) + elif isinstance(health_cfg, str): + health_cfg = health_cfg.replace('{{PORT}}', str(app_port)).replace('{{BACKEND_PORT}}', str(app_port)) + + healthy = await self._check_health_with_strategy(health_cfg, app_port, app_process) + if not healthy: + log_tail = self._read_log_tail(log_file, 1000) + if app_process.poll() is not None: + err = f"App process exited with code {app_process.returncode}" + else: + err = f"App not responding on port {app_port}" + app_process.terminate() + project.app_process = None + return {"status": "error", "step": "app.health", "errors": [err, log_tail]} + + logger.info(f"[LIVING_UI:PIPELINE] App healthy on internal port {app_port}") + + # Start the sidecar proxy on the user-facing port + sidecar_path = Path(__file__).parent.parent / 'data' / 'living_ui_sidecar' / 'proxy.py' + if sidecar_path.exists(): + sidecar_cmd = f"python \"{sidecar_path}\" --app-port {app_port} --proxy-port {proxy_port}" + sidecar_log = logs_dir / 'sidecar_output.log' + sidecar_process = self._start_process(project_path, sidecar_cmd, sidecar_log, port=proxy_port, project=project) + project.process = sidecar_process # Store sidecar as frontend process (gets stopped with stop_project) + logger.info(f"[LIVING_UI:PIPELINE] Sidecar proxy starting: port {proxy_port} → app port {app_port}") + + # Wait for sidecar to be ready + sidecar_healthy = await self._wait_for_health_check(f"http://localhost:{proxy_port}/health", timeout=15) + if not sidecar_healthy: + logger.warning(f"[LIVING_UI:PIPELINE] Sidecar not responding, app still accessible directly on port {app_port}") + project.url = f"http://localhost:{app_port}" + else: + project.url = f"http://localhost:{proxy_port}" + logger.info(f"[LIVING_UI:PIPELINE] Sidecar ready on port {proxy_port}") + else: + logger.warning("[LIVING_UI:PIPELINE] Sidecar proxy not found, running app without proxy") + project.url = f"http://localhost:{app_port}" + + project.backend_url = f"http://localhost:{app_port}" + project.status = 'running' + self._save_projects() + + logger.info(f"[LIVING_UI:PIPELINE] App ready: {project.url}") + return { + "status": "success", + "url": project.url, + "port": proxy_port, + } + + @staticmethod + def _append_node_args(command: str, extra_args: str) -> str: + """Append CLI args to an npm/pnpm/yarn run command using `--`, or to a direct binary call.""" + if re.match(r'^\s*(?:npm|pnpm|yarn)\s+run\s+\S+', command): + return f"{command} {extra_args}" if ' -- ' in command else f"{command} -- {extra_args}" + return f"{command} {extra_args}" + + def _normalize_node_start_command( + self, project_path: Path, start_command: str, env: Dict[str, str] + ) -> Tuple[str, Dict[str, str]]: + """ + Adjust an imported Node.js project's start command + env so it embeds cleanly + in CraftBot's iframe: + - bind to the allocated PORT (config-file ports often override env vars) + - suppress system-browser auto-open (Vite/CRA's default behavior) + + Returns (start_command, env) — possibly modified. Falls back to the inputs + on any parse error. + """ + new_env = dict(env) if env else {} + new_start = start_command + + pkg_json_path = project_path / 'package.json' + if not pkg_json_path.exists(): + return new_start, new_env + + try: + pkg = json.loads(pkg_json_path.read_text(encoding='utf-8')) + except Exception as e: + logger.warning(f"[LIVING_UI] Could not parse {pkg_json_path}, skipping start-command normalization: {e}") + return new_start, new_env + + deps = {**pkg.get('dependencies', {}), **pkg.get('devDependencies', {})} + scripts = pkg.get('scripts', {}) + + # If start_command is `npm/pnpm/yarn run X`, look up what X actually invokes + underlying = start_command + run_match = re.match(r'^\s*(?:npm|pnpm|yarn)\s+run\s+(\S+)', start_command) + if run_match: + underlying = scripts.get(run_match.group(1), '') + + def uses(name: str) -> bool: + return name in deps or bool(re.search(rf'\b{re.escape(name)}\b', underlying)) + + already_has_port = bool(re.search(r'(--port|-p\s|--hostname|-H\s)', new_start)) + + if uses('vite'): + # Vite: CLI --port overrides server.port; BROWSER=none suppresses server.open auto-open + new_env.setdefault('BROWSER', 'none') + if not already_has_port: + new_start = self._append_node_args( + new_start, '--port {{PORT}} --host 127.0.0.1 --strictPort' + ) + elif uses('next'): + # Next.js: -p PORT, -H HOST. Doesn't auto-open by default. + if not already_has_port: + new_start = self._append_node_args(new_start, '-p {{PORT}} -H 127.0.0.1') + elif uses('react-scripts') or uses('webpack-dev-server'): + # CRA / webpack-dev-server: respect PORT env, BROWSER=none disables auto-open + new_env.setdefault('BROWSER', 'none') + elif uses('@vue/cli-service') or uses('vue-cli-service'): + new_env.setdefault('BROWSER', 'none') + if not already_has_port: + new_start = self._append_node_args( + new_start, '--port {{PORT}} --host 127.0.0.1' + ) + else: + # Generic Node app — defensively suppress browser auto-open + new_env.setdefault('BROWSER', 'none') + + if new_start != start_command or new_env != env: + logger.info( + f"[LIVING_UI] Normalized Node start command: '{start_command}' -> '{new_start}' " + f"(env additions: {set(new_env) - set(env or {})})" + ) + + return new_start, new_env + + async def import_external_app( + self, + name: str, + description: str, + source_path: str, + app_runtime: str = 'unknown', + install_command: str = '', + start_command: str = '', + health_strategy: str = 'tcp', + health_url: str = '', + port_env_var: str = 'PORT', + ) -> Dict[str, Any]: + """Import an external app as a Living UI project.""" + project_id = self._generate_id() + sanitized_name = self._sanitize_name(name) + project_path = self.living_ui_dir / f"{sanitized_name}_{project_id}" + + try: + # Copy source to workspace + shutil.copytree(source_path, project_path) + logger.info(f"[LIVING_UI] Copied external app to {project_path}") + except Exception as e: + return {"status": "error", "error": f"Failed to copy app: {e}"} + + # Allocate two ports: proxy (user-facing) and app (internal) + proxy_port = self._allocate_port() + app_port = self._allocate_port() + + # Create config directory and manifest + config_dir = project_path / 'config' + config_dir.mkdir(exist_ok=True) + logs_dir = project_path / 'logs' + logs_dir.mkdir(exist_ok=True) + + # Build health config — uses app_port (internal) + health_cfg: Any = {"strategy": health_strategy} + if health_strategy == 'http_get': + health_cfg["url"] = health_url or f"http://localhost:{{{{PORT}}}}" + health_cfg["timeout"] = 30 + + env_dict: Dict[str, str] = {port_env_var: "{{PORT}}"} if port_env_var else {} + + # Auto-normalize Node.js dev-server start commands so the app binds to + # CraftBot's allocated port and doesn't pop a system browser tab. + if app_runtime == 'node': + start_command, env_dict = self._normalize_node_start_command( + project_path, start_command, env_dict + ) + + # Generate manifest + manifest = { + "id": project_id, + "name": name, + "version": "1.0.0", + "description": description, + "projectType": "external", + "appRuntime": app_runtime, + "livingUIVersion": "1.0", + "ports": {"frontend": proxy_port, "backend": app_port}, + "pipeline": { + "app": { + "cwd": ".", + "install": install_command, + "start": start_command, + "env": env_dict, + "health": health_cfg, + } + }, + "agentAwareness": {"enabled": False, "observationMode": "external"}, + } + + manifest_path = config_dir / 'manifest.json' + manifest_path.write_text(json.dumps(manifest, indent=2)) + + project = LivingUIProject( + id=project_id, + name=name, + description=description, + path=str(project_path), + status='created', + port=proxy_port, + backend_port=app_port, + project_type='external', + app_runtime=app_runtime, + ) + + self.projects[project_id] = project + self._save_projects() + + logger.info(f"[LIVING_UI] Imported external app: {name} ({project_id})") + return { + "status": "success", + "project": project.to_dict(), + } + + async def _check_health_with_strategy(self, health_cfg, port: int, process, timeout: int = 30) -> bool: + """Check health using configured strategy (http_get, tcp, process_alive, or URL string).""" + if isinstance(health_cfg, str): + # Backward compat: plain URL string + return await self._wait_for_health_check(health_cfg, timeout=timeout) + + if not isinstance(health_cfg, dict): + # No health config — just check if port is listening + return await self._wait_for_server(port, timeout=timeout) + + strategy = health_cfg.get('strategy', 'tcp') + timeout = health_cfg.get('timeout', timeout) + + if strategy == 'http_get': + url = health_cfg.get('url', f'http://localhost:{port}') + url = url.replace('{{PORT}}', str(port)) + return await self._wait_for_health_check(url, timeout=timeout) + elif strategy == 'tcp': + return await self._wait_for_server(port, timeout=timeout) + elif strategy == 'process_alive': + await asyncio.sleep(2) + return process.poll() is None + + return await self._wait_for_server(port, timeout=timeout) + + def validate_bridge_token(self, token: str) -> Optional[str]: + """ + Validate a bridge token and return the associated project ID. + + Returns: + project_id if token is valid, None otherwise. + """ + for project_id, project in self.projects.items(): + if project.bridge_token and project.bridge_token == token: + return project_id + return None + + async def stop_all_projects(self) -> None: + """Stop all running Living UI projects. Called during agent shutdown.""" + running = [pid for pid, p in self.projects.items() if p.status == 'running'] + if not running: + return + logger.info(f"[LIVING_UI] Shutting down {len(running)} running project(s)...") + for project_id in running: + try: + await self.stop_project(project_id) + except Exception as e: + logger.warning(f"[LIVING_UI] Error stopping {project_id} during shutdown: {e}") + logger.info("[LIVING_UI] All projects stopped") + + async def stop_project(self, project_id: str, stop_backend: bool = True) -> bool: + """ + Stop a running Living UI project (frontend and optionally backend). + + Args: + project_id: Project ID to stop + stop_backend: Whether to also stop the backend (default: True) + + Returns: + True if stop was successful + """ + project = self.projects.get(project_id) + if not project: + logger.error(f"[LIVING_UI] Project not found: {project_id}") + return False + + # Stop app process (external/single-process apps) + if project.app_process: + self._terminate_process(project.app_process) + project.app_process = None + + # Stop frontend process + if project.process: + self._terminate_process(project.process) + project.process = None + + # Also kill by port in case process reference is stale + if project.port and self._is_port_in_use(project.port): + self._kill_process_on_port(project.port) + + project.url = None + + # Stop backend if requested + if stop_backend: + await self.stop_backend(project_id) + + project.status = 'stopped' + self._save_projects() + + logger.info(f"[LIVING_UI] Stopped project: {project_id}") + return True + + async def delete_project(self, project_id: str) -> bool: + """ + Delete a Living UI project. + + Args: + project_id: Project ID to delete + + Returns: + True if deletion was successful + """ + project = self.projects.get(project_id) + if not project: + logger.error(f"[LIVING_UI] Project not found: {project_id}") + return False + + # Stop tunnel if active + await self.stop_tunnel(project_id) + + # Stop if running + if project.status == 'running': + await self.stop_project(project_id) + + # Release ports + if project.port: + self._release_port(project.port) + if project.backend_port: + self._release_port(project.backend_port) + + # Delete project directory + project_path = Path(project.path) + if project_path.exists(): + try: + shutil.rmtree(project_path) + except Exception as e: + logger.error(f"[LIVING_UI] Failed to delete project directory: {e}") + + # Remove from registry + del self.projects[project_id] + self._save_projects() + + logger.info(f"[LIVING_UI] Deleted project: {project_id}") + return True + + def get_project(self, project_id: str) -> Optional[LivingUIProject]: + """Get a project by ID.""" + return self.projects.get(project_id) + + def list_projects(self) -> List[LivingUIProject]: + """List all projects.""" + return list(self.projects.values()) + + def export_project_zip(self, project_id: str) -> Path: + """Export a Living UI project as a ZIP file. + + Returns the path to the temporary ZIP file. Caller is responsible + for cleanup after serving the file. + """ + project = self.projects.get(project_id) + if not project: + raise ValueError(f"Project {project_id} not found") + + project_path = Path(project.path) + if not project_path.exists(): + raise FileNotFoundError(f"Project directory not found: {project_path}") + + # Create a temp ZIP + tmp = tempfile.NamedTemporaryFile( + suffix='.zip', prefix=f'livingui_{self._sanitize_name(project.name)}_', + delete=False, + ) + tmp.close() + zip_path = Path(tmp.name) + + skip_dirs = {'node_modules', '__pycache__', '.git', 'dist', 'build', 'logs', '.venv', 'venv'} + skip_suffixes = {'.pyc', '.pyo', '.log', '.db', '.sqlite', '.sqlite3'} + skip_names = {'.env', '.env.local', '.env.production', '.last_launch', + 'credentials.json', 'token.json', '.jwt_secret'} + + with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf: + for root, dirs, files in os.walk(project_path): + dirs[:] = [d for d in dirs if d not in skip_dirs] + for f in files: + file_path = Path(root) / f + if file_path.suffix in skip_suffixes or file_path.name in skip_names: + continue + zf.write(file_path, file_path.relative_to(project_path)) + + logger.info(f"[LIVING_UI] Exported project '{project.name}' to {zip_path}") + return zip_path + + async def import_project_zip(self, zip_path: str, name: str = '') -> 'LivingUIProject': + """Import a Living UI project from a ZIP file. + + The ZIP should contain a project directory structure with at least + a config/manifest.json. A new project ID and ports are allocated. + """ + zip_file = Path(zip_path) + if not zip_file.exists(): + raise FileNotFoundError(f"ZIP file not found: {zip_path}") + + # Extract to a temp directory first to inspect contents + with tempfile.TemporaryDirectory() as tmp_dir: + with zipfile.ZipFile(zip_file, 'r') as zf: + zf.extractall(tmp_dir) + + tmp_path = Path(tmp_dir) + + # Check if files are nested inside a single directory + entries = list(tmp_path.iterdir()) + if len(entries) == 1 and entries[0].is_dir(): + extracted_root = entries[0] + else: + extracted_root = tmp_path + + # Read manifest if it exists + manifest_path = extracted_root / 'config' / 'manifest.json' + manifest = {} + if manifest_path.exists(): + try: + manifest = json.loads(manifest_path.read_text(encoding='utf-8')) + except Exception: + pass + + # Determine project name + if not name: + name = manifest.get('name', zip_file.stem.replace('livingui_', '').rsplit('_', 1)[0]) + if not name: + name = 'imported_project' + + # Generate new ID and project path + project_id = self._generate_id() + sanitized_name = self._sanitize_name(name) + project_path = self.living_ui_dir / f"{sanitized_name}_{project_id}" + + # Copy to Living UI workspace + shutil.copytree(extracted_root, project_path) + + # Allocate new ports + frontend_port = self._allocate_port() + backend_port = self._allocate_port() + + # Update manifest with new ID and ports + manifest_path = project_path / 'config' / 'manifest.json' + if manifest_path.exists(): + try: + manifest = json.loads(manifest_path.read_text(encoding='utf-8')) + old_id = manifest.get('id', '') + old_port = str(manifest.get('ports', {}).get('frontend', manifest.get('ports', {}).get('app', ''))) + old_backend = str(manifest.get('ports', {}).get('backend', '')) + + manifest_raw = manifest_path.read_text(encoding='utf-8') + if old_id: + manifest_raw = manifest_raw.replace(old_id, project_id) + if old_port and old_port != str(frontend_port): + manifest_raw = manifest_raw.replace(old_port, str(frontend_port)) + if old_backend and old_backend != str(backend_port): + manifest_raw = manifest_raw.replace(old_backend, str(backend_port)) + + manifest_path.write_text(manifest_raw, encoding='utf-8') + manifest = json.loads(manifest_raw) + except Exception as e: + logger.warning(f"[LIVING_UI] Could not update imported manifest: {e}") + + # Determine project type from manifest + project_type = manifest.get('projectType', 'native') + app_runtime = manifest.get('appRuntime') + description = manifest.get('description', '') + + project = LivingUIProject( + id=project_id, + name=name, + description=description, + path=str(project_path), + status='ready', + port=frontend_port, + backend_port=backend_port, + project_type=project_type, + app_runtime=app_runtime, + ) + + self.projects[project_id] = project + self._save_projects() + + logger.info(f"[LIVING_UI] Imported project '{name}' ({project_id}) from ZIP") + return project + + def get_project_url(self, project_id: str) -> Optional[str]: + """Get the URL for a running project.""" + project = self.projects.get(project_id) + if project and project.status == 'running': + return project.url + return None + + # ------------------------------------------------------------------ + # LAN & Tunnel sharing + # ------------------------------------------------------------------ + + @staticmethod + def get_lan_ip() -> Optional[str]: + """Get the machine's LAN IP address.""" + try: + # Connect to a public IP to determine the right interface + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.settimeout(1) + s.connect(('8.8.8.8', 80)) + ip = s.getsockname()[0] + s.close() + return ip + except Exception: + try: + return socket.gethostbyname(socket.gethostname()) + except Exception: + return None + + def get_lan_url(self, project_id: str) -> Optional[str]: + """Get the LAN-accessible URL for a running project. + + Uses the backend port since the backend also serves the frontend + static files — single port for everything. + """ + project = self.projects.get(project_id) + if not project or project.status != 'running': + return None + # Prefer backend port (serves both API + frontend static files) + port = project.backend_port or project.port + if not port: + return None + ip = self.get_lan_ip() + if not ip or ip.startswith('127.'): + return None + return f"http://{ip}:{port}" + + # Cloudflared binary download URLs per platform + _CLOUDFLARED_URLS = { + 'win32': 'https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-windows-amd64.exe', + 'darwin': 'https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-darwin-amd64.tgz', + 'linux': 'https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64', + } + + def _get_cloudflared_path(self) -> Optional[str]: + """Find cloudflared — check PATH first, then our local bin directory.""" + system_path = shutil.which('cloudflared') + if system_path: + return system_path + # Check our local bin + import sys + ext = '.exe' if sys.platform == 'win32' else '' + local_bin = Path(__file__).parent.parent / 'bin' / f'cloudflared{ext}' + if local_bin.exists(): + return str(local_bin) + return None + + async def _ensure_cloudflared(self) -> Optional[str]: + """Find cloudflared or auto-install it. Returns the binary path or None.""" + path = self._get_cloudflared_path() + if path: + return path + + logger.info("[LIVING_UI] cloudflared not found, auto-installing...") + import sys + import urllib.request + + platform_key = sys.platform + if platform_key not in self._CLOUDFLARED_URLS: + logger.error(f"[LIVING_UI] Unsupported platform: {platform_key}") + return None + + bin_dir = Path(__file__).parent.parent / 'bin' + bin_dir.mkdir(parents=True, exist_ok=True) + ext = '.exe' if platform_key == 'win32' else '' + target = bin_dir / f'cloudflared{ext}' + + try: + url = self._CLOUDFLARED_URLS[platform_key] + req = urllib.request.Request(url, headers={'User-Agent': 'CraftBot'}) + resp = urllib.request.urlopen(req, timeout=60) + + if platform_key == 'darwin': + import tarfile, io + with tarfile.open(fileobj=io.BytesIO(resp.read()), mode='r:gz') as tar: + for member in tar.getmembers(): + if 'cloudflared' in member.name: + f = tar.extractfile(member) + if f: + target.write_bytes(f.read()) + break + else: + target.write_bytes(resp.read()) + + if platform_key != 'win32': + target.chmod(0o755) + + logger.info(f"[LIVING_UI] cloudflared installed at {target}") + return str(target) + except Exception as e: + logger.error(f"[LIVING_UI] Failed to download cloudflared: {e}") + if target.exists(): + target.unlink() + return None + + async def start_tunnel(self, project_id: str, provider: str = 'cloudflared') -> Optional[str]: + """Start a cloudflare tunnel for remote access. Returns the public URL.""" + logger.info(f"[LIVING_UI] start_tunnel called for {project_id}") + project = self.projects.get(project_id) + if not project or project.status != 'running': + logger.warning(f"[LIVING_UI] Cannot start tunnel: project={project is not None}, status={project.status if project else 'N/A'}") + return None + + logger.info(f"[LIVING_UI] Stopping any existing tunnel...") + await self.stop_tunnel(project_id) + + # Only kill orphans on first tunnel start (no other tunnels active) + other_tunnels = any( + p.tunnel_process is not None and p.id != project_id + for p in self.projects.values() + ) + if not other_tunnels: + logger.info("[LIVING_UI] No other tunnels active, cleaning orphan cloudflared processes...") + try: + if os.name == 'nt': + subprocess.run( + ['powershell', '-Command', 'Stop-Process -Name cloudflared -Force -ErrorAction SilentlyContinue'], + capture_output=True, timeout=5 + ) + else: + subprocess.run(['pkill', '-f', 'cloudflared'], capture_output=True) + await asyncio.sleep(1) + except Exception: + pass + + port = project.backend_port or project.port + if not port: + return None + + cloudflared = await self._ensure_cloudflared() + if not cloudflared: + logger.error("[LIVING_UI] cloudflared binary not found") + return None + + logger.info(f"[LIVING_UI] Starting cloudflared: {cloudflared} tunnel --url http://localhost:{port}") + proc = subprocess.Popen( + [cloudflared, 'tunnel', '--url', f'http://localhost:{port}'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + creationflags=subprocess.CREATE_NO_WINDOW if os.name == 'nt' and hasattr(subprocess, 'CREATE_NO_WINDOW') else 0, + ) + logger.info(f"[LIVING_UI] cloudflared started, PID={proc.pid}, parsing URL...") + url = await self._parse_cloudflare_url(proc) + logger.info(f"[LIVING_UI] cloudflared URL parse result: {url}") + + if url: + project.tunnel_process = proc + project.tunnel_url = url + self._save_projects() + logger.info(f"[LIVING_UI] Tunnel started for {project.name}: {url}") + return url + else: + self._terminate_process(proc) + logger.error(f"[LIVING_UI] Failed to get tunnel URL") + return None + + async def stop_tunnel(self, project_id: str) -> None: + """Stop the tunnel for a project.""" + project = self.projects.get(project_id) + if not project: + return + if project.tunnel_process: + self._terminate_process(project.tunnel_process) + project.tunnel_process = None + project.tunnel_url = None + self._save_projects() + logger.info(f"[LIVING_UI] Tunnel stopped for {project.name}") + + async def _parse_cloudflare_url(self, proc: subprocess.Popen, timeout: int = 30) -> Optional[str]: + """Parse the public URL from cloudflared output.""" + import re + import threading + + url_result = [None] + pattern = re.compile(r'https://[a-zA-Z0-9-]+\.trycloudflare\.com') + + def _read_stream(stream): + try: + for line_bytes in stream: + text = line_bytes.decode('utf-8', errors='replace') + match = pattern.search(text) + if match: + url_result[0] = match.group(0) + return + except Exception: + pass + + # Read both stdout and stderr in parallel threads + t1 = threading.Thread(target=_read_stream, args=(proc.stdout,), daemon=True) + t2 = threading.Thread(target=_read_stream, args=(proc.stderr,), daemon=True) + t1.start() + t2.start() + + # Wait for either thread to find the URL + deadline = time.time() + timeout + while time.time() < deadline and url_result[0] is None: + if proc.poll() is not None and url_result[0] is None: + break + await asyncio.sleep(0.5) + + if url_result[0]: + logger.info(f"[LIVING_UI] Parsed cloudflare URL: {url_result[0]}") + else: + logger.error("[LIVING_UI] Failed to parse cloudflare URL within timeout") + + return url_result[0] + + + async def auto_launch_projects(self, project_ids: List[str] = None) -> None: + """Auto-launch projects on startup. + + If project_ids provided, launches those. Otherwise launches all + projects with auto_launch=True. + """ + if project_ids is None: + # Launch all projects with auto_launch enabled + project_ids = [p.id for p in self.projects.values() if p.auto_launch] + + for project_id in project_ids: + project = self.projects.get(project_id) + if project and project.status != 'error': + logger.info(f"[LIVING_UI] Auto-launching: {project.name} ({project_id})") + project.status = 'launching' + self._save_projects() + await self.launch_project(project_id) diff --git a/app/main.py b/app/main.py index 50f2c83b..ddb90cc8 100644 --- a/app/main.py +++ b/app/main.py @@ -56,18 +56,16 @@ def _suppress_console_logging_early() -> None: import argparse import asyncio import sys -import pathlib # Register agent_core state provider and config before importing AgentBase # This ensures shared code can access state via get_state() from agent_core import StateRegistry, ConfigRegistry from app.state.agent_state import STATE +from app.config import get_project_root # CraftBot uses global STATE singleton - always available StateRegistry.register(lambda: STATE) -ConfigRegistry.register_workspace_root( - str(pathlib.Path(__file__).parent.parent.resolve()) -) +ConfigRegistry.register_workspace_root(str(get_project_root())) # Import settings reader (reads directly from settings.json) from app.config import get_llm_provider, get_vlm_provider, get_api_key, get_base_url, get_llm_model, get_vlm_model @@ -117,8 +115,8 @@ def _initial_settings() -> tuple: """Determine initial provider, API key, and base URL from settings.json. Returns: - Tuple of (provider, api_key, base_url, model, vlm_provider, vlm_model, has_valid_key) where has_valid_key - indicates if a working API key was found. + Tuple of (provider, api_key, base_url, model, vlm_provider, vlm_model, has_valid_key) + where has_valid_key indicates if a working API key was found. """ # Read directly from settings.json provider = get_llm_provider() @@ -131,6 +129,7 @@ def _initial_settings() -> tuple: # Remote (Ollama) doesn't require API key has_key = bool(api_key) or provider == "remote" + return provider, api_key, base_url, model, vlm_prov, vlm_mod, has_key diff --git a/app/state/agent_state.py b/app/state/agent_state.py index 2264e3a9..bb34686d 100644 --- a/app/state/agent_state.py +++ b/app/state/agent_state.py @@ -1,8 +1,10 @@ # -*- coding: utf-8 -*- """Global runtime state for a single-user, single-agent process.""" -from dataclasses import dataclass -from typing import Optional +import json +import time +from dataclasses import dataclass, field +from typing import Any, Dict, Optional from app.state.types import AgentProperties from app.task import Task diff --git a/app/state/state_manager.py b/app/state/state_manager.py index e122a1c0..895613dd 100644 --- a/app/state/state_manager.py +++ b/app/state/state_manager.py @@ -3,6 +3,7 @@ from pathlib import Path from agent_core.core.state.types import MainState from agent_core.core.state.session import StateSession +from agent_core.utils.file_utils import rotate_md_file_if_needed from app.state.types import AgentProperties from app.state.agent_state import STATE from app.event_stream import EventStreamManager @@ -197,6 +198,7 @@ def _append_to_conversation_history(self, sender: str, content: str) -> None: """ try: conversation_file = Path(AGENT_FILE_SYSTEM_PATH) / "CONVERSATION_HISTORY.md" + rotate_md_file_if_needed(conversation_file) timestamp = datetime.now().strftime("%Y/%m/%d %H:%M:%S") entry = f"[{timestamp}] [{sender}]: {content}\n" @@ -288,12 +290,23 @@ def record_agent_message( display_message=content, ) - # Record to conversation history for context injection into future tasks - self.event_stream_manager.record_conversation_message( - event_label, - content, - display_message=content, + # Skip _conversation_history (the global list re-injected into every active + # task's prompt via ) when this message is from a + # transient session that has no real task — e.g. the third-party email + # notification session. Otherwise the notification reply leaks into the + # currently-running task's next prompt. + is_transient_session = bool( + session_id + and self._task_manager + and self._task_manager.get_task_by_id(session_id) is None ) + if not is_transient_session: + # Record to conversation history for context injection into future tasks + self.event_stream_manager.record_conversation_message( + event_label, + content, + display_message=content, + ) self.bump_event_stream() self._append_to_conversation_history("agent", content) diff --git a/app/task/task_manager.py b/app/task/task_manager.py index cb338ea5..c99478ef 100644 --- a/app/task/task_manager.py +++ b/app/task/task_manager.py @@ -6,9 +6,15 @@ STATE singleton for state access and per-task event streams for multi-tasking. """ -from typing import Awaitable, Callable, List, Optional, TYPE_CHECKING +from typing import Any, Awaitable, Callable, Dict, List, Optional, TYPE_CHECKING from pathlib import Path +try: + from loguru import logger +except ImportError: + import logging + logger = logging.getLogger(__name__) + from agent_core.core.impl.task import TaskManager as _TaskManager from agent_core.core.task import Task from app.database_interface import DatabaseInterface @@ -21,6 +27,13 @@ if TYPE_CHECKING: from app.llm import LLMInterface from app.context_engine import ContextEngine + from agent_core.core.impl.workflow_lock import WorkflowLockManager + + +# Hook signature: (active_task, updated_todos_as_dicts) -> None. +# Fires after every update_todos call, regardless of transitions, so +# subscribers see the initial all-pending plan as well as later updates. +PostUpdateTodosHook = Callable[[Task, List[Dict[str, Any]]], None] def _get_gui_mode() -> bool: @@ -91,7 +104,10 @@ def __init__( llm_interface: Optional["LLMInterface"] = None, context_engine: Optional["ContextEngine"] = None, on_task_end_callback: Optional[Callable[[str], Awaitable[None]]] = None, + workflow_lock_manager: Optional["WorkflowLockManager"] = None, ): + self._post_update_todos_hooks: List[PostUpdateTodosHook] = [] + super().__init__( db_interface=db_interface, event_stream_manager=event_stream_manager, @@ -114,11 +130,40 @@ def __init__( on_task_persist=_on_task_persist, on_task_remove_persist=_on_task_remove_persist, # No chatserver hooks for CraftBot (local only) + # No chatserver hooks for CraftBot (local only). on_task_created_chatserver=None, on_todo_transition=None, on_task_ended_chatserver=None, finalize_todos_chatserver=None, + # Workflow lock registry for auto-release on task end + workflow_lock_manager=workflow_lock_manager, ) - -__all__ = ["TaskManager"] + def add_post_update_todos_hook(self, hook: PostUpdateTodosHook) -> None: + """Register a hook that fires after every update_todos call. + + Use this to observe todo changes without coupling domain-specific + logic into TaskManager. Each hook receives the active Task and the + updated todo list (as dicts). + """ + self._post_update_todos_hooks.append(hook) + + def update_todos(self, todos: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Update todos, then notify registered post-update hooks. + + We override (rather than using the parent's on_todo_transition) because + that hook fires only on status changes — the initial plan where every + item is 'pending' produces zero transitions and subscribers would miss + the first snapshot. + """ + result = super().update_todos(todos) + if self.active and self._post_update_todos_hooks: + for hook in self._post_update_todos_hooks: + try: + hook(self.active, result) + except Exception as e: + logger.warning(f"[TaskManager] post_update_todos hook failed: {e}") + return result + + +__all__ = ["TaskManager", "PostUpdateTodosHook"] diff --git a/app/ui_layer/adapters/base.py b/app/ui_layer/adapters/base.py index 13dfdefc..23a03123 100644 --- a/app/ui_layer/adapters/base.py +++ b/app/ui_layer/adapters/base.py @@ -205,6 +205,9 @@ def _subscribe_events(self) -> None: self._unsubscribers.append( bus.subscribe(UIEventType.ERROR_MESSAGE, self._handle_error_message) ) + self._unsubscribers.append( + bus.subscribe(UIEventType.LLM_FATAL_ERROR, self._handle_llm_fatal_error) + ) self._unsubscribers.append( bus.subscribe(UIEventType.INFO_MESSAGE, self._handle_info_message) ) @@ -263,7 +266,12 @@ def _subscribe_events(self) -> None: def _handle_user_message(self, event: UIEvent) -> None: """Handle user message event.""" asyncio.create_task( - self._display_chat_message("You", event.data.get("message", ""), "user") + self._display_chat_message( + "You", + event.data.get("message", ""), + "user", + client_id=event.data.get("client_id"), + ) ) def _handle_agent_message(self, event: UIEvent) -> None: @@ -307,6 +315,24 @@ def _handle_error_message(self, event: UIEvent) -> None: self._display_chat_message("Error", event.data.get("message", ""), "error") ) + def _handle_llm_fatal_error(self, event: UIEvent) -> None: + """Handle fatal LLM consecutive failure — show retry/change-model options.""" + from app.ui_layer.components.types import ChatMessageOption + session_id = event.data.get("session_id") + options = [ + ChatMessageOption(label="Retry", value="llm_retry", style="primary"), + ChatMessageOption(label="Change Model", value="llm_change_model", style="default"), + ] + asyncio.create_task( + self._display_chat_message( + "System", + "What would you like to do?", + "system", + task_session_id=session_id, + options=options, + ) + ) + def _handle_info_message(self, event: UIEvent) -> None: """Handle info message event.""" asyncio.create_task( @@ -456,6 +482,7 @@ async def _display_chat_message( style: str, task_session_id: Optional[str] = None, options: Optional[List[ChatMessageOption]] = None, + client_id: Optional[str] = None, ) -> None: """ Display a chat message. @@ -466,6 +493,7 @@ async def _display_chat_message( style: Style identifier task_session_id: Optional task session ID for reply feature options: Optional list of interactive options/buttons + client_id: Optional client-generated UUID for reconciling with optimistic UI """ import time @@ -477,6 +505,7 @@ async def _display_chat_message( timestamp=time.time(), task_session_id=task_session_id, options=options, + client_id=client_id, ) ) diff --git a/app/ui_layer/adapters/browser_adapter.py b/app/ui_layer/adapters/browser_adapter.py index 1a19aa3f..68014828 100644 --- a/app/ui_layer/adapters/browser_adapter.py +++ b/app/ui_layer/adapters/browser_adapter.py @@ -98,6 +98,13 @@ from app.ui_layer.events import UIEvent, UIEventType from app.ui_layer.onboarding import OnboardingFlowController from app.ui_layer.metrics import MetricsCollector +from app.living_ui import ( + LivingUIManager, + LivingUIProject, + set_living_ui_manager, + register_broadcast_callbacks, + make_todo_broadcast_hook, +) if TYPE_CHECKING: from app.ui_layer.controller.ui_controller import UIController @@ -275,6 +282,10 @@ async def append_message(self, message: ChatMessage) -> None: "messageId": message.message_id, } + # Include client_id so the browser can reconcile its optimistic pending bubble + if message.client_id: + message_data["clientId"] = message.client_id + # Include attachments if present if message.attachments: message_data["attachments"] = [ @@ -770,6 +781,8 @@ def __init__( self._ws_clients: Set = set() self._metrics_subscribers: Set = set() self._runner: Optional["web.AppRunner"] = None + self._started_at: float = 0.0 + self._ws_prepare_failures: int = 0 # Dashboard metrics collector self._metrics_collector = MetricsCollector(controller.agent) @@ -778,6 +791,37 @@ def __init__( # Track active OAuth tasks for cancellation support self._oauth_tasks: Dict[str, asyncio.Task] = {} + # Living UI manager + template_path = Path(__file__).parent.parent.parent / "data" / "living_ui_template" + self._living_ui_manager = LivingUIManager( + workspace_root=AGENT_WORKSPACE_ROOT, + template_path=template_path + ) + # Bind task_manager and trigger_queue for task creation + agent = self._controller.agent + self._living_ui_manager.bind_task_manager(agent.task_manager, agent.triggers) + + # Clean up orphan processes and folders from previous sessions + self._living_ui_manager.cleanup_on_startup() + + # Start watchdog to monitor running Living UI processes + self._living_ui_manager.start_watchdog() + + # Auto-launch projects that have auto_launch enabled + asyncio.create_task(self._living_ui_manager.auto_launch_projects()) + + # Register global accessor and callbacks for Living UI actions + set_living_ui_manager(self._living_ui_manager) + register_broadcast_callbacks( + broadcast_ready=self.broadcast_living_ui_ready, + broadcast_progress=self.broadcast_living_ui_progress, + broadcast_todos=self.broadcast_living_ui_todos, + ) + + # Subscribe the Living UI module to TaskManager todo updates so that + # the agent's task breakdown streams to the browser automatically. + agent.task_manager.add_post_update_todos_hook(make_todo_broadcast_hook()) + @property def theme_adapter(self) -> ThemeAdapter: return self._theme_adapter @@ -806,7 +850,9 @@ def metrics_collector(self) -> MetricsCollector: async def submit_message( self, message: str, - reply_context: Optional[Dict[str, Any]] = None + reply_context: Optional[Dict[str, Any]] = None, + living_ui_id: Optional[str] = None, + client_id: Optional[str] = None, ) -> None: """ Submit a message from the user with optional reply context. @@ -817,6 +863,8 @@ async def submit_message( Args: message: The user's input message reply_context: Optional dict with {sessionId?: str, originalMessage: str} + living_ui_id: Optional Living UI project ID if user is on a Living UI page + client_id: Optional client-generated UUID for reconciling optimistic UI """ agent_context = message @@ -830,7 +878,9 @@ async def submit_message( await self._controller.submit_message( agent_context, self._adapter_id, - target_session_id=target_session_id + target_session_id=target_session_id, + client_id=client_id, + living_ui_id=living_ui_id ) def _handle_task_start(self, event: UIEvent) -> None: @@ -910,6 +960,15 @@ async def _on_start(self) -> None: self._app.router.add_get("/api/workspace/{path:.*}", self._workspace_file_handler) self._app.router.add_get("/api/agent-profile-picture", self._agent_profile_picture_handler) + # Living UI export/import routes + self._app.router.add_get("/api/living-ui/{project_id}/export", self._living_ui_export_handler) + self._app.router.add_post("/api/living-ui/import", self._living_ui_import_handler) + + # Integration bridge routes (Living UI → external APIs) + from app.living_ui.integration_bridge import IntegrationBridge + self._integration_bridge = IntegrationBridge(self._living_ui_manager) + self._integration_bridge.register_routes(self._app) + # Serve Vite-built frontend (production) frontend_dist = Path(__file__).parent.parent / "browser" / "frontend" / "dist" if frontend_dist.exists(): @@ -947,6 +1006,7 @@ async def _static_or_spa(request: web.Request) -> web.StreamResponse: await self._runner.setup() site = web.TCPSite(self._runner, self._host, self._port) await site.start() + self._started_at = time.monotonic() # Only print URL info if not using browser startup UI (run.py handles it) import os @@ -975,6 +1035,14 @@ async def _static_or_spa(request: web.Request) -> web.StreamResponse: async def _on_stop(self) -> None: """Stop the browser interface.""" + # Stop all running Living UI projects + if self._living_ui_manager: + await self._living_ui_manager.stop_all_projects() + + # Close integration bridge HTTP client + if hasattr(self, '_integration_bridge'): + await self._integration_bridge.cleanup() + # Cancel metrics broadcasting task if self._metrics_task: self._metrics_task.cancel() @@ -1005,8 +1073,29 @@ async def _websocket_handler(self, request: "web.Request") -> "web.WebSocketResp try: await ws.prepare(request) + except ClientConnectionResetError: + # Benign: the client (browser) aborted the TCP connection before the WebSocket + # handshake could complete. Happens routinely in dev with React.StrictMode / + # Vite HMR double-mounting WS providers, and on page navigations. Nothing to do. + self._ws_prepare_failures += 1 + return ws except Exception as e: - print(f"[BROWSER ADAPTER] Failed to prepare WebSocket: {e}") + import traceback as _tb + self._ws_prepare_failures += 1 + try: + peer = request.transport.get_extra_info("peername") if request.transport else None + except Exception: + peer = None + user_agent = request.headers.get("User-Agent", "") + attempt_id = request.query.get("attempt", "") + uptime_s = (time.monotonic() - self._started_at) if self._started_at else -1.0 + print( + "[BROWSER ADAPTER] Failed to prepare WebSocket: " + f"err={type(e).__name__}: {e} | peer={peer} | attempt_id={attempt_id} " + f"| clients={len(self._ws_clients)} | uptime_s={uptime_s:.1f} " + f"| failures={self._ws_prepare_failures} | ua={user_agent!r}\n" + f"{_tb.format_exc()}" + ) return ws is_first_client = len(self._ws_clients) == 0 @@ -1082,13 +1171,25 @@ async def _handle_ws_message(self, data: Dict[str, Any], ws=None) -> None: content = data.get("content", "") attachments = data.get("attachments", []) reply_context = data.get("replyContext") # {sessionId?: str, originalMessage: str} - + living_ui_id = data.get("livingUIId") # Set when user is on a Living UI page + client_id = data.get("clientId") + if living_ui_id: + logger.info(f"[BROWSER ADAPTER] Message from Living UI page: {living_ui_id}") + + # Dispatch chat submission as a background task so the WS message loop + # can immediately read the next frame. Otherwise rapid-fire sends are + # serialised behind each message's routing-LLM call (~1s each), which + # makes optimistic bubbles un-gray one-by-one instead of all at once. if attachments: - # Message with attachments - use custom handler - await self._handle_chat_message_with_attachments(content, attachments, reply_context) + asyncio.create_task( + self._handle_chat_message_with_attachments( + content, attachments, reply_context, living_ui_id, client_id + ) + ) elif content: - # Regular message without attachments - use normal flow - await self.submit_message(content, reply_context) + asyncio.create_task( + self.submit_message(content, reply_context, living_ui_id, client_id) + ) elif msg_type == "chat_attachment_upload": # Upload attachment for chat message @@ -1440,6 +1541,37 @@ async def _handle_ws_message(self, data: Dict[str, Any], ws=None) -> None: watch_repos = data.get("watch_repos") await self._handle_github_update_settings(watch_tag=watch_tag, watch_repos=watch_repos) + # Living UI settings handlers + elif msg_type == "living_ui_settings_get": + await self._handle_living_ui_settings_get() + + elif msg_type == "living_ui_project_action": + project_id = data.get("projectId", "") + action = data.get("action", "") + await self._handle_living_ui_project_action(project_id, action) + + elif msg_type == "living_ui_project_setting_update": + project_id = data.get("projectId", "") + setting = data.get("setting", "") + value = data.get("value") + await self._handle_living_ui_project_setting_update(project_id, setting, value) + + elif msg_type == "living_ui_marketplace_list": + await self._handle_marketplace_list() + + elif msg_type == "living_ui_marketplace_install": + app_id = data.get("appId", "") + app_name = data.get("appName", "") + app_description = data.get("appDescription", "") + custom_fields = data.get("customFields", {}) + # Run as background task so the WS loop stays unblocked for concurrent installs + asyncio.create_task(self._handle_marketplace_install(app_id, app_name, app_description, custom_fields)) + + elif msg_type == "living_ui_import": + source = data.get("source", "") + name = data.get("name", "External App") + asyncio.create_task(self._handle_living_ui_import(source, name)) + # WhatsApp QR code flow handlers elif msg_type == "whatsapp_start_qr": await self._handle_whatsapp_start_qr() @@ -1494,6 +1626,40 @@ async def _handle_ws_message(self, data: Dict[str, Any], ws=None) -> None: model = data.get("model", "") base_url = data.get("baseUrl") await self._handle_local_llm_pull_model(model, base_url) + # Living UI handlers + elif msg_type == "living_ui_create": + await self._handle_living_ui_create(data) + + elif msg_type == "living_ui_list": + await self._handle_living_ui_list() + + elif msg_type == "living_ui_launch": + project_id = data.get("projectId", "") + await self._handle_living_ui_launch(project_id) + + elif msg_type == "living_ui_stop": + project_id = data.get("projectId", "") + await self._handle_living_ui_stop(project_id) + + elif msg_type == "living_ui_delete": + project_id = data.get("projectId", "") + await self._handle_living_ui_delete(project_id) + + elif msg_type == "living_ui_state_update": + await self._handle_living_ui_state_update(data) + + elif msg_type == "living_ui_tunnel_start": + project_id = data.get("projectId", "") + provider = data.get("provider", "cloudflared") + await self._handle_living_ui_tunnel_start(project_id, provider) + + elif msg_type == "living_ui_tunnel_stop": + project_id = data.get("projectId", "") + await self._handle_living_ui_tunnel_stop(project_id) + + elif msg_type == "living_ui_sharing_info": + project_id = data.get("projectId", "") + await self._handle_living_ui_sharing_info(project_id) # Update operations elif msg_type == "check_update": @@ -2037,6 +2203,417 @@ async def progress_callback(data: dict) -> None: "type": "local_llm_pull_model", "data": {"success": False, "error": str(e)}, }) + # ------------------------------------------------------------------------- + # Living UI Handlers + # ------------------------------------------------------------------------- + + async def _handle_living_ui_create(self, data: Dict[str, Any]) -> None: + """Create a new Living UI project.""" + try: + name = data.get("name", "") + description = data.get("description", "") + features = data.get("features", []) + data_source = data.get("dataSource") + theme = data.get("theme", "system") + + if not name or not description: + await self._broadcast({ + "type": "living_ui_error", + "data": { + "projectId": "", + "error": "Name and description are required", + }, + }) + return + + # Create the project (directory/template) + project = await self._living_ui_manager.create_project( + name=name, + description=description, + features=features, + data_source=data_source, + theme=theme, + ) + + # Broadcast project created + await self._broadcast({ + "type": "living_ui_create", + "data": { + "success": True, + "projectId": project.id, + "project": project.to_dict(), + }, + }) + + # Broadcast initial status update + await self._broadcast({ + "type": "living_ui_status", + "data": { + "projectId": project.id, + "phase": "initializing", + "progress": 10, + "message": "Project created, starting development...", + }, + }) + + # Create task and fire trigger via manager + # The manager handles: task creation, status update, trigger firing + task_id = await self._living_ui_manager.create_development_task(project.id) + + if task_id: + logger.info(f"[LIVING_UI] Created and triggered task {task_id} for project {project.id}") + else: + logger.error(f"[LIVING_UI] Failed to create task for project {project.id}") + await self._broadcast({ + "type": "living_ui_error", + "data": { + "projectId": project.id, + "error": "Failed to create development task", + }, + }) + + except Exception as e: + logger.error(f"[LIVING_UI] Error creating project: {e}") + await self._broadcast({ + "type": "living_ui_error", + "data": { + "projectId": "", + "error": str(e), + }, + }) + + async def _handle_living_ui_list(self) -> None: + """Get list of all Living UI projects.""" + try: + projects = self._living_ui_manager.list_projects() + await self._broadcast({ + "type": "living_ui_list", + "data": { + "success": True, + "projects": [p.to_dict() for p in projects], + }, + }) + except Exception as e: + logger.error(f"[LIVING_UI] Error listing projects: {e}") + await self._broadcast({ + "type": "living_ui_list", + "data": { + "success": False, + "error": str(e), + }, + }) + + async def _handle_living_ui_launch(self, project_id: str) -> None: + """Launch a Living UI project.""" + try: + success = await self._living_ui_manager.launch_project(project_id) + project = self._living_ui_manager.get_project(project_id) + + if success and project: + await self._broadcast({ + "type": "living_ui_launch", + "data": { + "success": True, + "projectId": project_id, + "url": project.url, + "port": project.port, + }, + }) + else: + await self._broadcast({ + "type": "living_ui_launch", + "data": { + "success": False, + "projectId": project_id, + "error": project.error if project else "Project not found", + }, + }) + except Exception as e: + logger.error(f"[LIVING_UI] Error launching project: {e}") + await self._broadcast({ + "type": "living_ui_launch", + "data": { + "success": False, + "projectId": project_id, + "error": str(e), + }, + }) + + async def _handle_living_ui_stop(self, project_id: str) -> None: + """Stop a running Living UI project.""" + try: + success = await self._living_ui_manager.stop_project(project_id) + await self._broadcast({ + "type": "living_ui_stop", + "data": { + "success": success, + "projectId": project_id, + }, + }) + except Exception as e: + logger.error(f"[LIVING_UI] Error stopping project: {e}") + await self._broadcast({ + "type": "living_ui_stop", + "data": { + "success": False, + "projectId": project_id, + "error": str(e), + }, + }) + + async def _handle_living_ui_delete(self, project_id: str) -> None: + """Delete a Living UI project.""" + try: + success = await self._living_ui_manager.delete_project(project_id) + await self._broadcast({ + "type": "living_ui_delete", + "data": { + "success": success, + "projectId": project_id, + }, + }) + except Exception as e: + logger.error(f"[LIVING_UI] Error deleting project: {e}") + await self._broadcast({ + "type": "living_ui_delete", + "data": { + "success": False, + "projectId": project_id, + "error": str(e), + }, + }) + + async def _living_ui_export_handler(self, request: 'web.Request') -> 'web.Response': + """HTTP handler: download a Living UI project as a ZIP file.""" + from aiohttp import web + project_id = request.match_info['project_id'] + try: + zip_path = self._living_ui_manager.export_project_zip(project_id) + project = self._living_ui_manager.get_project(project_id) + filename = f"{project.name.replace(' ', '_')}.zip" if project else f"{project_id}.zip" + + response = web.FileResponse( + zip_path, + headers={ + 'Content-Disposition': f'attachment; filename="{filename}"', + 'Content-Type': 'application/zip', + }, + ) + # Schedule cleanup after response is sent + response._zip_cleanup_path = zip_path + return response + except (ValueError, FileNotFoundError) as e: + return web.json_response({"error": str(e)}, status=404) + except Exception as e: + logger.error(f"[LIVING_UI] Export error: {e}") + return web.json_response({"error": str(e)}, status=500) + + async def _living_ui_import_handler(self, request: 'web.Request') -> 'web.Response': + """HTTP handler: stage a ZIP file upload and return the temp path. + + The frontend then sends a living_ui_import WebSocket message with + the path so the agent handles extraction via the importer skill. + """ + from aiohttp import web + try: + import tempfile + reader = await request.multipart() + zip_path = None + name = '' + + async for part in reader: + if part.name == 'name': + name = (await part.read()).decode('utf-8') + elif part.name == 'file': + # Save uploaded file to a staging location + staging_dir = Path(self._living_ui_manager.living_ui_dir) / '_staging' + staging_dir.mkdir(parents=True, exist_ok=True) + tmp = tempfile.NamedTemporaryFile( + suffix='.zip', prefix='import_', dir=str(staging_dir), delete=False + ) + while True: + chunk = await part.read_chunk() + if not chunk: + break + tmp.write(chunk) + tmp.close() + zip_path = tmp.name + + if not zip_path: + return web.json_response({"error": "No ZIP file uploaded"}, status=400) + + return web.json_response({ + "success": True, + "path": zip_path, + "name": name, + }) + except Exception as e: + logger.error(f"[LIVING_UI] Upload staging error: {e}") + return web.json_response({"error": str(e)}, status=500) + + async def _handle_living_ui_state_update(self, data: Dict[str, Any]) -> None: + """Handle state update from a Living UI for agent awareness.""" + try: + project_id = data.get("projectId", "") + state = data.get("state", {}) + + # Store the state for agent context + from app.state import STATE + if hasattr(STATE, 'update_living_ui_state'): + STATE.update_living_ui_state(project_id, state) + + # Also forward to any listening clients (for debugging/monitoring) + await self._broadcast({ + "type": "living_ui_state_update", + "data": { + "projectId": project_id, + "state": state, + }, + }) + except Exception as e: + logger.error(f"[LIVING_UI] Error handling state update: {e}") + + async def _handle_living_ui_sharing_info(self, project_id: str) -> None: + """Return sharing info (LAN URL, tunnel URL).""" + lan_url = self._living_ui_manager.get_lan_url(project_id) + project = self._living_ui_manager.get_project(project_id) + await self._broadcast({ + "type": "living_ui_sharing_info", + "data": { + "projectId": project_id, + "lanUrl": lan_url, + "tunnelUrl": project.tunnel_url if project else None, + }, + }) + + async def _handle_living_ui_tunnel_start(self, project_id: str, provider: str) -> None: + """Start a tunnel for a Living UI project.""" + logger.info(f"[LIVING_UI] Tunnel start requested: project={project_id}, provider={provider}") + try: + url = await self._living_ui_manager.start_tunnel(project_id, provider) + await self._broadcast({ + "type": "living_ui_tunnel_status", + "data": { + "projectId": project_id, + "tunnelUrl": url, + "success": url is not None, + "error": None if url else f"Failed to start {provider} tunnel", + }, + }) + except Exception as e: + logger.error(f"[LIVING_UI] Tunnel start error: {e}", exc_info=True) + await self._broadcast({ + "type": "living_ui_tunnel_status", + "data": { + "projectId": project_id, + "tunnelUrl": None, + "success": False, + "error": str(e), + }, + }) + + async def _handle_living_ui_tunnel_stop(self, project_id: str) -> None: + """Stop a tunnel for a Living UI project.""" + await self._living_ui_manager.stop_tunnel(project_id) + await self._broadcast({ + "type": "living_ui_tunnel_status", + "data": { + "projectId": project_id, + "tunnelUrl": None, + "success": True, + }, + }) + + async def broadcast_living_ui_ready(self, project_id: str, url: str, port: int) -> bool: + """ + Broadcast that a Living UI is ready (called from agent action). + + This method launches the Living UI server via the manager and notifies + the browser. The agent should NOT start the server itself - just build + and call this action. + + Returns: + True if project was found and launched successfully, False otherwise + """ + project = self._living_ui_manager.get_project(project_id) + if not project: + logger.error(f"[LIVING_UI] Project not found for ready notification: {project_id}") + # Broadcast error to browser so it can display the error state + await self._broadcast({ + "type": "living_ui_error", + "data": { + "projectId": project_id, + "error": f"Project '{project_id}' not found. Check that the project_id matches the one from the task instruction.", + }, + }) + return False + + # Update project status to "ready" (build complete, about to launch) + self._living_ui_manager.update_project_status(project_id, "ready") + + # Launch the project server via manager (centralizes process management) + success = await self._living_ui_manager.launch_project(project_id) + + if success: + # Get updated project info with URL + project = self._living_ui_manager.get_project(project_id) + await self._broadcast({ + "type": "living_ui_ready", + "data": { + "projectId": project_id, + "url": project.url if project else url, + "port": project.port if project else port, + }, + }) + logger.info(f"[LIVING_UI] Project {project_id} launched and ready") + return True + else: + # Launch failed + await self._broadcast({ + "type": "living_ui_error", + "data": { + "projectId": project_id, + "error": "Failed to launch Living UI server", + }, + }) + logger.error(f"[LIVING_UI] Failed to launch project {project_id}") + return False + + async def broadcast_living_ui_progress( + self, + project_id: str, + phase: str, + progress: int, + message: str + ) -> None: + """Broadcast Living UI creation progress (called from agent action).""" + await self._broadcast({ + "type": "living_ui_status", + "data": { + "projectId": project_id, + "phase": phase, + "progress": progress, + "message": message, + }, + }) + + async def broadcast_living_ui_todos( + self, + project_id: str, + todos: list, + ) -> None: + """Broadcast the agent's current todo list for a Living UI task. + + Fired from the task manager's on_todo_transition hook whenever the + agent updates its todos during a Living UI creation task. + """ + await self._broadcast({ + "type": "living_ui_todos", + "data": { + "projectId": project_id, + "todos": todos, + }, + }) async def _handle_task_cancel(self, task_id: str) -> None: """Cancel a running task.""" @@ -3858,6 +4435,149 @@ async def _handle_github_update_settings(self, watch_tag=None, watch_repos=None) except Exception as e: await self._broadcast({"type": "github_settings_result", "data": {"success": False, "error": str(e)}}) + # ========================== + # Living UI Settings Handlers + # ========================== + + async def _handle_living_ui_settings_get(self) -> None: + """Get all Living UI projects with their settings.""" + from app.ui_layer.settings.living_ui_settings import get_living_ui_projects + result = get_living_ui_projects() + await self._broadcast({"type": "living_ui_settings_get", "data": result}) + + async def _handle_living_ui_project_action(self, project_id: str, action: str) -> None: + """Execute a project action (launch/stop/delete).""" + from app.ui_layer.settings.living_ui_settings import living_ui_project_action + result = await living_ui_project_action(project_id, action) + await self._broadcast({"type": "living_ui_project_action", "data": result}) + + async def _handle_living_ui_project_setting_update(self, project_id: str, setting: str, value) -> None: + """Update a per-project setting.""" + from app.ui_layer.settings.living_ui_settings import update_project_setting + result = update_project_setting(project_id, setting, value) + await self._broadcast({"type": "living_ui_project_setting_update", "data": result}) + + # ===================== + # Marketplace Handlers + # ===================== + + async def _handle_marketplace_list(self) -> None: + """Fetch marketplace catalogue from GitHub.""" + import urllib.request + import json as _json + import re as _re + + CATALOGUE_URL = "https://raw.githubusercontent.com/CraftOS-dev/living-ui-marketplace/main/catalogue.json" + + try: + import ssl, certifi + ssl_ctx = ssl.create_default_context(cafile=certifi.where()) + req = urllib.request.Request(CATALOGUE_URL, headers={'User-Agent': 'CraftBot'}) + response = urllib.request.urlopen(req, timeout=15, context=ssl_ctx) + raw = response.read().decode() + # Strip trailing commas before ] or } (tolerant of hand-edited JSON) + raw = _re.sub(r',\s*([}\]])', r'\1', raw) + catalogue = _json.loads(raw) + await self._broadcast({ + "type": "living_ui_marketplace_list", + "data": {"success": True, "apps": catalogue.get("apps", [])}, + }) + except Exception as e: + await self._broadcast({ + "type": "living_ui_marketplace_list", + "data": {"success": False, "error": str(e), "apps": []}, + }) + + async def _handle_marketplace_install(self, app_id: str, app_name: str, app_description: str, custom_fields: dict = None) -> None: + """Install a marketplace app.""" + if not app_id or not app_name: + await self._broadcast({ + "type": "living_ui_marketplace_install", + "data": {"success": False, "error": "App ID and name are required", "appId": app_id}, + }) + return + + result = await self._living_ui_manager.install_from_marketplace( + app_id=app_id, + app_name=app_name, + app_description=app_description, + custom_fields=custom_fields, + ) + + if result.get("status") == "success": + # Also broadcast as living_ui_create so the sidebar updates + await self._broadcast({ + "type": "living_ui_create", + "data": { + "success": True, + "projectId": result["project"]["id"], + "project": result["project"], + }, + }) + + await self._broadcast({ + "type": "living_ui_marketplace_install", + "data": {**result, "appId": app_id}, + }) + + async def _handle_living_ui_import(self, source: str, name: str) -> None: + """Handle import of an external app or ZIP — creates a task with the importer skill.""" + if not source: + return + + is_zip = source.lower().endswith('.zip') + + if is_zip: + task_instruction = ( + f"Import this Living UI project from a ZIP file:\n" + f"ZIP path: {source}\n" + f"Name: {name}\n\n" + f"Steps:\n" + f"1. Call living_ui_import_zip to extract and register the project\n" + f"2. Review the project structure and manifest\n" + f"3. Install dependencies if needed\n" + f"4. Launch the app and verify it works\n" + f"5. Clean up the ZIP file after successful import" + ) + else: + task_instruction = ( + f"Import this external app as a Living UI:\n" + f"Source: {source}\n" + f"Name: {name}\n\n" + f"Follow the living-ui-importer skill instructions:\n" + f"1. Clone/copy the source code\n" + f"2. Detect the app type (Go, Node, Python, etc.) — NEVER use Docker if native build is possible\n" + f"3. Determine build/install command, start command, port config, and health check\n" + f"4. Call living_ui_import_external with the detected configuration\n" + f"5. Launch the app and verify it works\n" + f"6. Create LIVING_UI.md documenting the app" + ) + + task_id = self._controller.agent.task_manager.create_task( + task_name=f"Import Living UI: {name}", + task_instruction=task_instruction, + mode="complex", + action_sets=["file_operations", "code_execution", "living_ui", "core"], + selected_skills=["living-ui-importer"], + ) + + if task_id: + from app.trigger import Trigger + import time + trigger = Trigger( + fire_at=time.time(), + priority=50, + next_action_description=f"[Living UI] Import: {name}", + session_id=task_id, + payload={"type": "living_ui_import", "source": source}, + ) + await self._controller.agent.triggers.put(trigger) + + await self._broadcast({ + "type": "living_ui_import", + "data": {"status": "started", "name": name, "source": source}, + }) + # ===================== # WhatsApp QR Code Flow # ===================== @@ -4539,7 +5259,9 @@ async def _handle_chat_message_with_attachments( self, content: str, attachments: List[Dict[str, Any]], - reply_context: Optional[Dict[str, Any]] = None + reply_context: Optional[Dict[str, Any]] = None, + living_ui_id: Optional[str] = None, + client_id: Optional[str] = None, ) -> None: """Handle user chat message with attachments and optional reply context.""" import uuid @@ -4599,6 +5321,7 @@ async def _handle_chat_message_with_attachments( style="user", timestamp=time.time(), attachments=processed_attachments if processed_attachments else None, + client_id=client_id, ) await self._chat.append_message(user_message) @@ -4639,6 +5362,8 @@ async def _handle_chat_message_with_attachments( # Include target session ID if replying to a specific session if reply_context and reply_context.get("sessionId"): payload["target_session_id"] = reply_context["sessionId"] + if living_ui_id: + payload["living_ui_id"] = living_ui_id await self._controller._agent._handle_chat_message(payload) diff --git a/app/ui_layer/browser/frontend/src/App.tsx b/app/ui_layer/browser/frontend/src/App.tsx index a861f2af..137b1e82 100644 --- a/app/ui_layer/browser/frontend/src/App.tsx +++ b/app/ui_layer/browser/frontend/src/App.tsx @@ -8,12 +8,49 @@ import { ScreenPage } from './pages/Screen' import { WorkspacePage } from './pages/Workspace' import { SettingsPage } from './pages/Settings' import { OnboardingPage } from './pages/Onboarding' +import { LivingUIPage } from './pages/LivingUI' import { useWebSocket } from './contexts/WebSocketContext' function App() { - const { needsHardOnboarding } = useWebSocket() + const { initReceived, needsHardOnboarding } = useWebSocket() + + // Block rendering until the backend sends the initial state. + // Without this guard, needsHardOnboarding defaults to false and the chat + // flashes briefly before the onboarding page appears on first install. + if (!initReceived) { + return ( +
+ + + CraftBot + +
+
+
+
+
+
+ ) + } - // Show onboarding page if hard onboarding is needed if (needsHardOnboarding) { return } @@ -27,6 +64,7 @@ function App() { } /> } /> } /> + } /> } /> diff --git a/app/ui_layer/browser/frontend/src/components/Chat/Chat.module.css b/app/ui_layer/browser/frontend/src/components/Chat/Chat.module.css new file mode 100644 index 00000000..ea188fd2 --- /dev/null +++ b/app/ui_layer/browser/frontend/src/components/Chat/Chat.module.css @@ -0,0 +1,547 @@ +/* Self-contained Chat Component Styles */ + +.chat { + display: flex; + flex-direction: column; + height: 100%; + min-width: 0; +} + +.messagesContainer { + flex: 1; + overflow-y: auto; + padding: var(--space-4); + display: flex; + flex-direction: column; + gap: var(--space-3); +} + +.emptyState { + flex: 1; + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + gap: var(--space-3); + color: var(--text-secondary); + text-align: center; +} + +.emptyState h3 { + color: var(--text-primary); + font-size: var(--text-lg); +} + +.emptyState p { + font-size: var(--text-sm); +} + +.emptyIcon { + margin-bottom: var(--space-2); +} + +/* Status Bar */ +.statusBar { + display: flex; + align-items: center; + gap: var(--space-2); + padding: var(--space-2) var(--space-4); + background: var(--bg-secondary); + border-top: 1px solid var(--border-primary); + font-size: var(--text-xs); + color: var(--text-secondary); +} + +/* Input Area */ +.inputArea { + display: flex; + align-items: flex-end; + gap: var(--space-2); + padding: var(--space-3); + border-top: 1px solid var(--border-primary); + background: var(--bg-secondary); +} + +/* Send button (last direct child) — match textarea min-height so the bottom + row stays visually aligned with the input. */ +.inputArea > button:last-child { + height: 36px; +} + +.input { + display: block; + width: 100%; + box-sizing: border-box; + resize: none; + min-height: 36px; + max-height: 116px; + overflow-y: auto; + padding: var(--space-2) var(--space-3); + border: 1px solid var(--border-primary); + border-radius: var(--radius-lg); + background: var(--bg-primary); + color: var(--text-primary); + font-size: var(--text-sm); + font-family: inherit; + line-height: var(--leading-normal); +} + +.input:focus { + outline: none; + border-color: var(--color-primary); +} + +.input::placeholder { + color: var(--text-muted); +} + +/* Hidden file input */ +.hiddenFileInput { + display: none; +} + +/* Input wrapper for textarea and pending attachments */ +.inputWrapper { + flex: 1; + display: flex; + flex-direction: column; + gap: var(--space-2); + min-width: 0; + border-radius: var(--radius-md); + transition: outline var(--transition-fast), background var(--transition-fast); +} + +.inputWrapperDragOver { + outline: 2px dashed var(--color-primary); + background: var(--color-primary-subtle); +} + +/* Pending attachments container */ +.pendingAttachments { + display: flex; + flex-wrap: wrap; + gap: var(--space-1); +} + +.pendingAttachment { + display: flex; + align-items: center; + gap: 4px; + padding: 4px 8px; + background: var(--color-primary-subtle); + border: 1px solid var(--color-primary-light); + border-radius: var(--radius-sm); + font-size: var(--text-xs); + color: var(--text-primary); +} + +.pendingImageThumb { + width: 20px; + height: 20px; + object-fit: cover; + border-radius: 2px; + flex-shrink: 0; +} + +.pendingFileName { + max-width: 120px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.pendingFileSize { + color: var(--text-muted); +} + +.pendingAttachmentBody { + display: flex; + align-items: center; + gap: 4px; + background: none; + border: none; + padding: 0; + cursor: pointer; + color: inherit; + font-size: inherit; + min-width: 0; +} + +.pendingAttachmentBody:hover .pendingFileName { + text-decoration: underline; +} + +.removeAttachment { + display: flex; + align-items: center; + justify-content: center; + background: none; + border: none; + padding: 0; + margin-left: 4px; + cursor: pointer; + color: var(--text-muted); + transition: color var(--transition-fast); +} + +.removeAttachment:hover { + color: var(--color-error); +} + +/* Attachment error message */ +.attachmentError { + display: flex; + align-items: flex-start; + gap: var(--space-2); + padding: var(--space-2) var(--space-3); + background: var(--color-error-light); + border: 1px solid var(--color-error); + border-radius: var(--radius-sm); + font-size: var(--text-xs); + color: var(--color-error); +} + +.attachmentError span { + flex: 1; + line-height: 1.4; +} + +.attachmentError svg { + flex-shrink: 0; + margin-top: 1px; +} + +.dismissError { + display: flex; + align-items: center; + justify-content: center; + background: none; + border: none; + padding: 2px; + cursor: pointer; + color: var(--color-error); + opacity: 0.7; + transition: opacity var(--transition-fast); + flex-shrink: 0; +} + +.dismissError:hover { + opacity: 1; +} + +/* Reply bar above input */ +.replyBar { + display: flex; + align-items: center; + gap: var(--space-2); + padding: var(--space-2) var(--space-3); + background: var(--bg-tertiary); + border: 1px solid var(--border-primary); + border-radius: var(--radius-sm); + font-size: var(--text-xs); + color: var(--text-primary); +} + +.replyText { + flex: 1; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.replyCancel { + display: flex; + align-items: center; + justify-content: center; + background: none; + border: none; + padding: 2px; + cursor: pointer; + color: var(--text-muted); + transition: color var(--transition-fast); + flex-shrink: 0; +} + +.replyCancel:hover { + color: var(--color-error); +} + +.inputListening { + border-color: var(--color-primary); + box-shadow: 0 0 0 2px var(--color-primary-subtle); +} + +/* Mic button + language selector grouped together */ +.micGroup { + display: flex; + align-items: center; + gap: 2px; + position: relative; +} + +.langBtn { + background: transparent; + border: none; + color: var(--text-primary); + font-size: 10px; + font-family: inherit; + font-weight: 600; + cursor: pointer; + padding: 2px 3px; + border-radius: var(--radius-sm); + line-height: 1; + outline: none; + white-space: nowrap; +} + +.langBtn:hover:not(:disabled) { + background: var(--bg-tertiary); +} + +.langBtn:disabled { + opacity: 0.4; + cursor: not-allowed; +} + +.langDropdown { + position: absolute; + bottom: calc(100% + 6px); + left: 0; + background: var(--bg-secondary); + border: 1px solid var(--border-primary); + border-radius: var(--radius-md); + box-shadow: 0 4px 16px rgba(0, 0, 0, 0.5); + overflow: hidden; + z-index: 999; + min-width: 130px; +} + +.langOption { + display: flex; + align-items: center; + gap: 8px; + width: 100%; + background: transparent; + border: none; + color: var(--text-secondary); + font-family: inherit; + font-size: var(--text-sm); + padding: 7px 12px; + cursor: pointer; + text-align: left; +} + +.langOption:hover { + background: var(--bg-tertiary); + color: var(--text-primary); +} + +.langOptionActive { + color: var(--color-primary); +} + +.langCode { + font-weight: 600; + font-size: 11px; + width: 36px; + flex-shrink: 0; +} + +.langFull { + font-size: 11px; + opacity: 0.8; +} + +/* 3 bouncing dots shown while listening */ +.listeningDots { + display: flex; + align-items: center; + gap: 4px; + padding: 4px var(--space-3) 0; +} + +.listeningDots span { + display: block; + width: 6px; + height: 6px; + border-radius: 50%; + background: var(--color-primary); + animation: dotBounce 1.2s ease-in-out infinite; +} + +.listeningDots span:nth-child(1) { animation-delay: 0s; } +.listeningDots span:nth-child(2) { animation-delay: 0.2s; } +.listeningDots span:nth-child(3) { animation-delay: 0.4s; } + +@keyframes dotBounce { + 0%, 60%, 100% { transform: translateY(0); opacity: 0.4; } + 30% { transform: translateY(-5px); opacity: 1; } +} + +/* Mic button pulse animation when recording */ +.micListening { + animation: micPulse 1.2s ease-in-out infinite; +} + +@keyframes micPulse { + 0%, 100% { opacity: 1; } + 50% { opacity: 0.4; } +} + +/* Attachment preview modal */ +.previewOverlay { + position: fixed; + inset: 0; + background: rgba(0, 0, 0, 0.55); + backdrop-filter: blur(8px); + display: flex; + align-items: center; + justify-content: center; + z-index: 9999; + padding: 32px; + animation: previewFadeIn 0.12s ease-out; +} + +@keyframes previewFadeIn { + from { opacity: 0; } + to { opacity: 1; } +} + +.previewModal { + background: var(--bg-secondary); + border: 1px solid var(--border-secondary); + border-radius: var(--radius-xl); + width: fit-content; + min-width: 320px; + max-width: min(92vw, 1100px); + max-height: 92vh; + display: flex; + flex-direction: column; + overflow: hidden; + box-shadow: 0 24px 60px rgba(0, 0, 0, 0.5); + animation: previewSlideUp 0.12s ease-out; +} + +@keyframes previewSlideUp { + from { opacity: 0; transform: translateY(8px); } + to { opacity: 1; transform: translateY(0); } +} + +.previewHeader { + display: flex; + align-items: flex-start; + justify-content: space-between; + gap: 12px; + padding: 16px 20px; + border-bottom: 1px solid var(--border-primary); + min-width: 0; +} + +.previewHeaderLeft { + display: flex; + flex-direction: column; + gap: 4px; + min-width: 0; + flex: 1; +} + +.previewFileName { + font-size: var(--text-lg); + font-weight: var(--font-semibold); + color: var(--text-primary); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.previewMeta { + font-size: var(--text-xs); + color: var(--text-secondary); +} + +.previewClose { + display: flex; + align-items: center; + justify-content: center; + width: 32px; + height: 32px; + background: none; + border: none; + cursor: pointer; + color: var(--text-muted); + border-radius: var(--radius-md); + flex-shrink: 0; + transition: background var(--transition-fast), color var(--transition-fast); +} + +.previewClose:hover { + background: var(--bg-hover); + color: var(--text-primary); +} + +.previewImage { + display: block; + max-width: min(88vw, 1060px); + max-height: calc(92vh - 80px); + width: auto; + height: auto; + object-fit: contain; +} + +.previewPdf { + width: min(860px, 88vw); + height: calc(92vh - 80px); + border: none; + background: var(--bg-primary); + display: block; +} + +.previewTextContent { + width: min(760px, 88vw); + max-height: calc(92vh - 80px); + overflow: auto; + margin: 0; + padding: 16px 20px; + font-family: var(--font-mono); + font-size: var(--text-xs); + line-height: 1.6; + color: var(--text-primary); + background: var(--bg-primary); + white-space: pre; + min-height: 120px; + box-sizing: border-box; +} + +.previewFileInfo { + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + gap: 12px; + padding: 36px 48px; + background: var(--bg-primary); + width: min(480px, 88vw); +} + +.previewUnavailableText { + font-size: var(--text-sm); + color: var(--text-secondary); + text-align: center; + line-height: var(--leading-relaxed); + margin: 0; +} + +/* Mobile */ +@media (max-width: 768px) { + .messagesContainer { + padding: var(--space-3); + } + + .inputArea { + padding: var(--space-2); + } + + .statusBar { + padding: var(--space-2) var(--space-3); + } +} diff --git a/app/ui_layer/browser/frontend/src/components/Chat/Chat.tsx b/app/ui_layer/browser/frontend/src/components/Chat/Chat.tsx new file mode 100644 index 00000000..7dc810ab --- /dev/null +++ b/app/ui_layer/browser/frontend/src/components/Chat/Chat.tsx @@ -0,0 +1,731 @@ +import React, { useState, useRef, useEffect, useLayoutEffect, KeyboardEvent, useCallback, ChangeEvent, useMemo } from 'react' +import ReactDOM from 'react-dom' +import { Send, Paperclip, X, Loader2, File, AlertCircle, Reply, Mic, MicOff } from 'lucide-react' +import { useVirtualizer } from '@tanstack/react-virtual' +import { useWebSocket } from '../../contexts/WebSocketContext' +import { useToast } from '../../contexts/ToastContext' +import { Button, IconButton, StatusIndicator } from '../ui' +import { useDerivedAgentStatus } from '../../hooks' +import { ChatMessageItem } from '../../pages/Chat/ChatMessage' +import styles from './Chat.module.css' + +// Pending attachment type +interface PendingAttachment { + name: string + type: string + size: number + content: string // base64 +} + +interface ChatProps { + /** Optional Living UI project ID — auto-included in messages sent from this chat */ + livingUIId?: string + /** Optional placeholder text for the input */ + placeholder?: string + /** Optional empty state message */ + emptyMessage?: string +} + +const MIC_LANGUAGES = [ + { code: 'en-US', label: 'EN', full: 'English' }, + { code: 'ja-JP', label: 'JA', full: '日本語' }, + { code: 'zh-CN', label: 'ZH', full: '中文 (简体)' }, + { code: 'zh-TW', label: 'ZH-TW', full: '中文 (繁體)' }, + { code: 'ko-KR', label: 'KO', full: '한국어' }, + { code: 'ar-SA', label: 'AR', full: 'العربية' }, + { code: 'es-ES', label: 'ES', full: 'Español' }, + { code: 'fr-FR', label: 'FR', full: 'Français' }, + { code: 'de-DE', label: 'DE', full: 'Deutsch' }, + { code: 'pt-BR', label: 'PT', full: 'Português' }, + { code: 'hi-IN', label: 'HI', full: 'हिन्दी' }, + { code: 'ru-RU', label: 'RU', full: 'Русский' }, + { code: 'it-IT', label: 'IT', full: 'Italiano' }, +] + +// Attachment limits +const MAX_ATTACHMENT_COUNT = 10 +const MAX_TOTAL_SIZE_BYTES = 70 * 1024 * 1024 // 70MB + +const formatFileSize = (bytes: number): string => { + if (bytes === 0) return '0 B' + const k = 1024 + const sizes = ['B', 'KB', 'MB', 'GB', 'TB'] + const i = Math.floor(Math.log(bytes) / Math.log(k)) + return parseFloat((bytes / Math.pow(k, i)).toFixed(1)) + ' ' + sizes[i] +} + +export function Chat({ livingUIId, placeholder, emptyMessage }: ChatProps) { + const { + messages, + actions, + connected, + sendMessage, + sendOptionClick, + openFile, + openFolder, + lastSeenMessageId, + markMessagesAsSeen, + replyTarget, + setReplyTarget, + clearReplyTarget, + loadOlderMessages, + hasMoreMessages, + loadingOlderMessages, + } = useWebSocket() + + const status = useDerivedAgentStatus({ actions, messages, connected }) + const { showToast } = useToast() + + // Render messages in server-canonical timestamp order so that the order + // users see live matches the order they see after a refresh (where history + // is loaded sorted by timestamp). Pending bubbles use client time, so they + // land at the end; when the server echo arrives with its real timestamp, + // the item may shift a position or two — a CSS transform transition on the + // virtualized row animates that shift as a smooth slide. + const orderedMessages = useMemo(() => { + return messages.slice().sort((a, b) => a.timestamp - b.timestamp) + }, [messages]) + + const [input, setInput] = useState('') + const [pendingAttachments, setPendingAttachments] = useState([]) + const [attachmentError, setAttachmentError] = useState(null) + const [isDragOver, setIsDragOver] = useState(false) + const [previewAttachment, setPreviewAttachment] = useState(null) + const inputRef = useRef(null) + const fileInputRef = useRef(null) + + // Voice input state + const [isListening, setIsListening] = useState(false) + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const recognitionRef = useRef(null) + const [micLang, setMicLang] = useState(() => { + const browserLang = navigator.language || 'en-US' + return MIC_LANGUAGES.some(l => l.code === browserLang) ? browserLang : 'en-US' + }) + const [langOpen, setLangOpen] = useState(false) + const langDropdownRef = useRef(null) + + // Input history (terminal-style up/down arrow navigation) + const inputHistoryRef = useRef([]) + const historyIndexRef = useRef(-1) + const parentRef = useRef(null) + const wasNearBottomRef = useRef(true) + const prevMessageCountRef = useRef(0) + const hasInitialScrolled = useRef(false) + + const attachmentValidation = useMemo(() => { + const totalSize = pendingAttachments.reduce((sum, att) => sum + att.size, 0) + const count = pendingAttachments.length + if (count > MAX_ATTACHMENT_COUNT) { + return { valid: false, error: `Maximum ${MAX_ATTACHMENT_COUNT} files allowed. You have ${count} files.` } + } + if (totalSize > MAX_TOTAL_SIZE_BYTES) { + return { valid: false, error: `Total size (${formatFileSize(totalSize)}) exceeds 70MB limit.` } + } + return { valid: true, error: null } + }, [pendingAttachments]) + + const virtualizer = useVirtualizer({ + count: orderedMessages.length, + getScrollElement: () => parentRef.current, + estimateSize: () => 100, + overscan: 5, + }) + + const getFirstUnreadIndex = useCallback(() => { + if (!lastSeenMessageId) return -1 + const lastSeenIdx = orderedMessages.findIndex(m => m.messageId === lastSeenMessageId) + if (lastSeenIdx === -1) return 0 + if (lastSeenIdx === orderedMessages.length - 1) return -1 + return lastSeenIdx + 1 + }, [orderedMessages, lastSeenMessageId]) + + const isNearBottom = useCallback(() => { + const container = parentRef.current + if (!container) return true + return container.scrollHeight - container.scrollTop - container.clientHeight < 100 + }, []) + + // Close language dropdown when clicking outside + useEffect(() => { + if (!langOpen) return + const handler = (e: MouseEvent) => { + if (langDropdownRef.current && !langDropdownRef.current.contains(e.target as Node)) { + setLangOpen(false) + } + } + document.addEventListener('mousedown', handler) + return () => document.removeEventListener('mousedown', handler) + }, [langOpen]) + + // Close preview on Escape + useEffect(() => { + if (!previewAttachment) return + const handler = (e: globalThis.KeyboardEvent) => { if (e.key === 'Escape') setPreviewAttachment(null) } + document.addEventListener('keydown', handler) + return () => document.removeEventListener('keydown', handler) + }, [previewAttachment]) + + // Track scroll position + load older messages on scroll-to-top + useEffect(() => { + const container = parentRef.current + if (!container) return + const handleScroll = () => { + wasNearBottomRef.current = isNearBottom() + if (container.scrollTop < 100 && hasMoreMessages && !loadingOlderMessages) { + loadOlderMessages() + } + } + container.addEventListener('scroll', handleScroll) + return () => container.removeEventListener('scroll', handleScroll) + }, [isNearBottom, hasMoreMessages, loadingOlderMessages, loadOlderMessages]) + + // Scroll to unread on mount, auto-scroll on new messages if near bottom + useEffect(() => { + if (orderedMessages.length === 0) return + + const isNewMessage = orderedMessages.length > prevMessageCountRef.current + prevMessageCountRef.current = orderedMessages.length + + if (!hasInitialScrolled.current) { + hasInitialScrolled.current = true + const firstUnreadIdx = getFirstUnreadIndex() + setTimeout(() => { + if (firstUnreadIdx !== -1) { + virtualizer.scrollToIndex(firstUnreadIdx, { align: 'start', behavior: 'auto' }) + } else { + virtualizer.scrollToIndex(orderedMessages.length - 1, { align: 'end', behavior: 'auto' }) + } + markMessagesAsSeen() + }, 50) + } else if (isNewMessage && wasNearBottomRef.current) { + virtualizer.scrollToIndex(orderedMessages.length - 1, { align: 'end', behavior: 'smooth' }) + markMessagesAsSeen() + } + }, [orderedMessages.length, virtualizer, getFirstUnreadIndex, markMessagesAsSeen]) + + const adjustTextareaHeight = useCallback(() => { + const textarea = inputRef.current + if (!textarea) return + // When the textarea is empty, let CSS min-height control the height. + // Reading scrollHeight on an empty textarea in a narrow container can + // include wrapped placeholder text, which would balloon the input to + // multiple visual rows. + if (!textarea.value) { + textarea.style.height = '' + return + } + textarea.style.height = 'auto' + textarea.style.height = `${textarea.scrollHeight}px` + }, []) + + useLayoutEffect(() => { + adjustTextareaHeight() + }, [input, adjustTextareaHeight]) + + const handleChatReply = useCallback(( + sessionId: string | undefined, + displayName: string, + fullContent: string + ) => { + setReplyTarget({ + type: 'chat', + sessionId, + displayName, + originalContent: fullContent, + }) + inputRef.current?.focus() + }, [setReplyTarget]) + + const toggleListening = useCallback(() => { + if (isListening) { + recognitionRef.current?.stop() + setIsListening(false) + return + } + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const w = window as any + const SpeechRecognitionAPI = w.SpeechRecognition || w.webkitSpeechRecognition + + if (!SpeechRecognitionAPI) { + alert('Speech recognition is not supported in this browser.') + return + } + + const recognition = new SpeechRecognitionAPI() + recognition.continuous = true + recognition.interimResults = true + recognition.lang = micLang + + recognition.onresult = (event: SpeechRecognitionEvent) => { + let finalTranscript = '' + for (let i = event.resultIndex; i < event.results.length; i++) { + if (event.results[i].isFinal) { + finalTranscript += event.results[i][0].transcript + } + } + if (finalTranscript) { + setInput(prev => prev + (prev.endsWith(' ') || prev === '' ? '' : ' ') + finalTranscript) + if (inputRef.current) { + inputRef.current.style.height = 'auto' + inputRef.current.style.height = inputRef.current.scrollHeight + 'px' + } + } + } + + recognition.onerror = (event: SpeechRecognitionErrorEvent) => { + setIsListening(false) + if (event.error === 'not-allowed' || event.error === 'service-not-allowed') { + alert('Microphone access denied. Please allow microphone permission in your browser settings.') + } + } + recognition.onend = () => setIsListening(false) + + recognitionRef.current = recognition + recognition.start() + setIsListening(true) + inputRef.current?.focus() + }, [isListening, micLang]) + + // Stop mic if component unmounts while listening + useEffect(() => { + return () => { recognitionRef.current?.abort() } + }, []) + + const handleSend = () => { + if (!attachmentValidation.valid) return + if (input.trim() || pendingAttachments.length > 0) { + // Save to input history + if (input.trim()) { + inputHistoryRef.current.push(input.trim()) + } + historyIndexRef.current = -1 + + const replyContext = replyTarget ? { + sessionId: replyTarget.sessionId, + originalMessage: replyTarget.originalContent, + } : undefined + + // Stop mic if still listening when message is sent + if (isListening) { + recognitionRef.current?.stop() + setIsListening(false) + } + + sendMessage( + input.trim(), + pendingAttachments.length > 0 ? pendingAttachments : undefined, + replyContext, + livingUIId + ) + if (!connected) { + showToast('info', 'Reconnecting — your message will send when the connection is restored.') + } + setInput('') + setPendingAttachments([]) + setAttachmentError(null) + clearReplyTarget() + if (inputRef.current) { + inputRef.current.style.height = 'auto' + } + inputRef.current?.focus() + } + } + + const handleKeyDown = (e: KeyboardEvent) => { + if (e.key === 'Enter' && !e.shiftKey) { + e.preventDefault() + handleSend() + } else if (e.key === 'ArrowUp' || e.key === 'ArrowDown') { + const history = inputHistoryRef.current + if (history.length === 0) return + if (historyIndexRef.current === -1 && input.trim() !== '') return + + if (e.key === 'ArrowUp') { + e.preventDefault() + if (historyIndexRef.current === -1) { + historyIndexRef.current = history.length - 1 + } else if (historyIndexRef.current > 0) { + historyIndexRef.current-- + } + setInput(history[historyIndexRef.current]) + } else if (e.key === 'ArrowDown') { + e.preventDefault() + if (historyIndexRef.current === -1) return + if (historyIndexRef.current < history.length - 1) { + historyIndexRef.current++ + setInput(history[historyIndexRef.current]) + } else { + historyIndexRef.current = -1 + setInput('') + } + } + } + } + + const handleAttachClick = () => { + fileInputRef.current?.click() + } + + const processFiles = async (files: globalThis.File[]) => { + if (files.length === 0) return + + const totalFileCount = pendingAttachments.length + files.length + if (totalFileCount > MAX_ATTACHMENT_COUNT) { + setAttachmentError(`Maximum ${MAX_ATTACHMENT_COUNT} files allowed.`) + return + } + + const newAttachments: PendingAttachment[] = [] + let newTotalSize = pendingAttachments.reduce((sum, att) => sum + att.size, 0) + + for (const file of files) { + if (file.size > MAX_TOTAL_SIZE_BYTES) { + setAttachmentError(`File "${file.name}" (${formatFileSize(file.size)}) exceeds the 70MB limit.`) + return + } + if (newTotalSize + file.size > MAX_TOTAL_SIZE_BYTES) { + setAttachmentError(`Adding "${file.name}" would exceed the 70MB total size limit.`) + return + } + try { + const content = await readFileAsBase64(file) + newAttachments.push({ name: file.name, type: file.type || 'application/octet-stream', size: file.size, content }) + newTotalSize += file.size + } catch { + setAttachmentError(`Failed to read file "${file.name}".`) + return + } + } + + setAttachmentError(null) + setPendingAttachments(prev => [...prev, ...newAttachments]) + } + + const handleFileSelect = async (e: ChangeEvent) => { + const files = e.target.files + if (!files || files.length === 0) return + await processFiles(Array.from(files)) + e.target.value = '' + } + + const handleDragOver = (e: React.DragEvent) => { + e.preventDefault() + setIsDragOver(true) + } + + const handleDragLeave = (e: React.DragEvent) => { + if (!e.currentTarget.contains(e.relatedTarget as Node)) { + setIsDragOver(false) + } + } + + const handleDrop = async (e: React.DragEvent) => { + e.preventDefault() + setIsDragOver(false) + const files = Array.from(e.dataTransfer.files) + await processFiles(files) + } + + const handlePaste = async (e: React.ClipboardEvent) => { + const files = Array.from(e.clipboardData.files) + if (files.length === 0) return + e.preventDefault() + await processFiles(files) + } + + const removeAttachment = (index: number) => { + setPendingAttachments(prev => prev.filter((_, i) => i !== index)) + setAttachmentError(null) + } + + const openPreview = (att: PendingAttachment) => { + setPreviewAttachment(att) + } + + const readFileAsBase64 = (file: globalThis.File): Promise => { + return new Promise((resolve, reject) => { + const reader = new FileReader() + reader.onload = () => { + const result = reader.result as string + resolve(result.split(',')[1]) + } + reader.onerror = reject + reader.readAsDataURL(file) + }) + } + + const pdfBlobUrl = useMemo(() => { + if (!previewAttachment) return null + const isPdf = previewAttachment.type === 'application/pdf' || previewAttachment.name.toLowerCase().endsWith('.pdf') + if (!isPdf) return null + try { + const bytes = Uint8Array.from(atob(previewAttachment.content), c => c.charCodeAt(0)) + const blob = new Blob([bytes], { type: 'application/pdf' }) + return URL.createObjectURL(blob) + } catch { return null } + }, [previewAttachment]) + + useEffect(() => { + return () => { if (pdfBlobUrl) URL.revokeObjectURL(pdfBlobUrl) } + }, [pdfBlobUrl]) + + return ( +
+
+ {orderedMessages.length === 0 ? ( +
+
+ + + + +
+

{emptyMessage || 'Start a conversation'}

+

{livingUIId ? 'Ask the agent about this UI' : 'Send a message to begin interacting with CraftBot'}

+
+ ) : ( +
+ {loadingOlderMessages && ( +
+ Loading older messages... +
+ )} + {virtualizer.getVirtualItems().map((virtualItem) => { + const message = orderedMessages[virtualItem.index] + // Prefer clientId as the React key so that when a pending optimistic + // message is reconciled with the server echo (messageId changes from + // `pending:` to the real id), React reuses the same DOM node — + // letting the CSS transform transition animate the slide into + // its server-canonical sorted position. + const rowKey = message.clientId || message.messageId || virtualItem.index + return ( +
+ +
+ ) + })} +
+ )} +
+ + {/* Status bar */} +
+ + {status.message} +
+ + {/* Input area */} +
+ + } variant="ghost" tooltip="Attach file" onClick={handleAttachClick} /> + +
+ : } + variant="ghost" + active={isListening} + tooltip={isListening ? 'Stop listening' : 'Voice input'} + onClick={toggleListening} + className={isListening ? styles.micListening : undefined} + /> + + {langOpen && ( +
+ {MIC_LANGUAGES.map(lang => ( + + ))} +
+ )} +
+ +
+ {(attachmentError || !attachmentValidation.valid) && ( +
+ + {attachmentError || attachmentValidation.error} + +
+ )} + + {replyTarget && ( +
+ + Replying to: {replyTarget.displayName} + +
+ )} + + {pendingAttachments.length > 0 && ( +
+ {pendingAttachments.map((att, idx) => ( +
+ + +
+ ))} +
+ )} + + {isListening && ( +
+ +
+ )} + +