From 43fbe6f0e99dfb63b0d7351077afcf2c6bce6610 Mon Sep 17 00:00:00 2001 From: Null <1708213363@qq.com> Date: Sat, 21 Mar 2026 10:48:22 +0800 Subject: [PATCH 01/25] feat(arxiv): add paper extract and search tools --- ARCHITECTURE.md | 8 +- README.md | 1 + config.toml.example | 25 ++ docs/configuration.md | 19 ++ docs/development.md | 1 + src/Undefined/README.md | 1 + src/Undefined/arxiv/__init__.py | 19 ++ src/Undefined/arxiv/client.py | 183 +++++++++++++ src/Undefined/arxiv/downloader.py | 105 ++++++++ src/Undefined/arxiv/models.py | 16 ++ src/Undefined/arxiv/parser.py | 143 ++++++++++ src/Undefined/arxiv/sender.py | 244 ++++++++++++++++++ src/Undefined/config/loader.py | 80 ++++++ src/Undefined/handlers.py | 136 +++++++++- src/Undefined/skills/agents/README.md | 6 +- .../skills/agents/info_agent/README.md | 2 +- .../skills/agents/info_agent/config.json | 4 +- .../skills/agents/info_agent/intro.md | 3 +- .../skills/agents/info_agent/prompt.md | 1 + .../info_agent/tools/arxiv_search/config.json | 25 ++ .../info_agent/tools/arxiv_search/handler.py | 81 ++++++ .../skills/tools/arxiv_paper/README.md | 22 ++ .../skills/tools/arxiv_paper/config.json | 26 ++ .../skills/tools/arxiv_paper/handler.py | 78 ++++++ .../messages/send_url_file/handler.py | 98 +++---- src/Undefined/utils/http_download.py | 125 +++++++++ tests/test_arxiv_config.py | 89 +++++++ tests/test_arxiv_parser.py | 37 +++ tests/test_arxiv_sender.py | 167 ++++++++++++ tests/test_arxiv_tools.py | 94 +++++++ tests/test_handlers_arxiv_auto_extract.py | 67 +++++ 31 files changed, 1823 insertions(+), 83 deletions(-) create mode 100644 src/Undefined/arxiv/__init__.py create mode 100644 src/Undefined/arxiv/client.py create mode 100644 src/Undefined/arxiv/downloader.py create mode 100644 src/Undefined/arxiv/models.py create mode 100644 src/Undefined/arxiv/parser.py create mode 100644 src/Undefined/arxiv/sender.py create mode 100644 src/Undefined/skills/agents/info_agent/tools/arxiv_search/config.json create mode 100644 src/Undefined/skills/agents/info_agent/tools/arxiv_search/handler.py create mode 100644 src/Undefined/skills/tools/arxiv_paper/README.md create mode 100644 src/Undefined/skills/tools/arxiv_paper/config.json create mode 100644 src/Undefined/skills/tools/arxiv_paper/handler.py create mode 100644 src/Undefined/utils/http_download.py create mode 100644 tests/test_arxiv_config.py create mode 100644 tests/test_arxiv_parser.py create mode 100644 tests/test_arxiv_sender.py create mode 100644 tests/test_arxiv_tools.py create mode 100644 tests/test_handlers_arxiv_auto_extract.py diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index d0b03286..218699c5 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -86,6 +86,7 @@ graph TB T_Time["get_current_time
获取当前时间"] T_GetPicture["get_picture
获取图片"] T_GetUserInfo["get_user_info
获取用户信息"] + T_ArxivPaper["arxiv_paper
arXiv 论文发送"] T_BilibiliVideo["bilibili_video
B站视频下载发送"] end @@ -102,7 +103,7 @@ graph TB end subgraph IntelligentAgents["智能体 Agents (skills/agents/, 6个)"] - A_Info["info_agent
信息查询助手
(17个工具)
• weather_query
• *hot 热搜
• bilibili_*
• whois"] + A_Info["info_agent
信息查询助手
(18个工具)
• weather_query
• *hot 热搜
• bilibili_*
• arxiv_search
• whois"] A_Web["web_agent
网络搜索助手
(3个工具 + MCP)
• web_search
• crawl_webpage
• Playwright MCP"] A_File["file_analysis_agent
文件分析助手
(14个工具)
• extract_* (PDF/Word/Excel/PPT)
• analyze_code
• analyze_multimodal"] A_Naga["naga_code_analysis_agent
NagaAgent 代码分析
(7个工具)
• read_file / glob
• search_file_content"] @@ -802,6 +803,7 @@ description: 从 PDF 文件中提取文本和表格,填写表单。当用户 | **存储配置** | `token_usage.*` | Token 归档和清理策略 | | **认知记忆** | `cognitive.enabled`, `cognitive.query.*`, `models.embedding.*` | 事件检索、时间衰减加权、侧写与后台史官 | | **Bilibili** | `bilibili.auto_extract_enabled`, `bilibili.cookie`, `bilibili.prefer_quality` | B站视频自动提取与下载 | +| **arXiv** | `arxiv.auto_extract_enabled`, `arxiv.max_file_size`, `arxiv.auto_extract_max_items` | arXiv 论文自动提取、搜索与 PDF 发送 | | **思考链** | `*.thinking_enabled` | 思维链支持 | | **思维链兼容** | `*.thinking_tool_call_compat` | 思维链 + 工具调用兼容 | | **WebUI** | `webui.url`, `webui.port`, `webui.password` | 配置控制台 | @@ -832,7 +834,7 @@ description: 从 PDF 文件中提取文本和表格,填写表单。当用户 | Agent | 功能定位 | 工具数量 | 核心能力 | |-------|---------|---------|---------| -| **info_agent** | 信息查询助手 | 17个 | 天气查询、热搜榜单、网络检测、B站信息查询等 | +| **info_agent** | 信息查询助手 | 18个 | 天气查询、热搜榜单、网络检测、B站信息查询、arXiv 搜索等 | | **web_agent** | 网络搜索助手 | 3个 + MCP | 网页搜索、爬虫、Playwright MCP | | **file_analysis_agent** | 文件分析助手 | 14个 | PDF/Word/Excel/PPT解析、代码分析、多模态分析 | | **naga_code_analysis_agent** | NagaAgent 代码分析 | 7个 | 代码库浏览、文件搜索、目录遍历 | @@ -841,7 +843,7 @@ description: 从 PDF 文件中提取文本和表格,填写表单。当用户 ### Skills 插件系统 -- **Tools (基础工具)**:原子化的功能单元,如 `send_message`, `get_history`, `bilibili_video`。 +- **Tools (基础工具)**:原子化的功能单元,如 `send_message`, `get_history`, `bilibili_video`, `arxiv_paper`。 - **Toolsets (复合工具集)**:9大类工具集 (group, messages, memory, contacts, group_analysis, notices, render, scheduler, cognitive)。 - **延迟加载 + 热重载**:`handler.py` 仅在首次调用时导入;当 `skills/` 下的 `config.json`/`handler.py` 发生变更时会自动重新加载。 - **Agent 自我介绍自动生成**:启动时按 Agent 代码/配置 hash 生成 `intro.generated.md` 并与 `intro.md` 合并。 diff --git a/README.md b/README.md index 12be7a70..2befc1e0 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,7 @@ - **Agent 私有 MCP**:可为单个 agent 提供独立 MCP 配置,按调用即时加载并释放,工具仅对该 agent 可见。 - **Anthropic Skills**:支持 Anthropic Agent Skills(SKILL.md 格式),遵循 agentskills.io 开放标准,提供领域知识注入能力。 - **Bilibili 视频提取**:自动检测消息中的 B 站视频链接/BV 号/小程序分享,下载 1080p 视频并通过 QQ 发送;同时提供 AI 工具调用入口。 +- **arXiv 论文提取与搜索**:自动检测消息中的 arXiv 链接/标识并发送论文信息与 PDF;同时提供 `arxiv_paper` 发送工具和 `arxiv_search` 检索工具。 - **思维链支持**:支持开启思维链,提升复杂逻辑推理能力。 - **高并发架构**:基于 `asyncio` 全异步设计,支持多队列消息处理与工具并发执行,轻松应对高并发场景。 - **异步安全 I/O**:统一 IO 层通过线程池 + 跨平台文件锁(Linux/macOS `flock`,Windows `msvcrt`)+ 原子写入(`os.replace`)保证并发写入不损坏、且不阻塞主事件循环。 diff --git a/config.toml.example b/config.toml.example index 62b95723..972a350b 100644 --- a/config.toml.example +++ b/config.toml.example @@ -720,6 +720,31 @@ auto_extract_group_ids = [] # en: Private chat allowlist for auto-extraction (empty = follow global access.allowed_private_ids). auto_extract_private_ids = [] +# zh: arXiv 论文自动提取配置。 +# en: arXiv paper auto-extraction settings. +[arxiv] +# zh: 是否启用自动提取(检测到 arXiv 链接 / arXiv:ID / 带 arxiv 关键词的新式编号时自动发送论文信息并尽力附带 PDF)。 +# en: Enable auto-extraction (auto-send paper info and best-effort PDF when arXiv links / arXiv:ID / keyword-scoped new-style IDs are detected). +auto_extract_enabled = false +# zh: 最大 PDF 文件大小(MB),超过则不上传 PDF,仅发送论文信息。0=不限。 +# en: Max PDF file size (MB); exceeding this skips PDF upload and sends paper info only. 0=unlimited. +max_file_size = 100 +# zh: 自动提取功能的群聊白名单(空=跟随全局 access.allowed_group_ids)。 +# en: Group allowlist for auto-extraction (empty = follow global access.allowed_group_ids). +auto_extract_group_ids = [] +# zh: 自动提取功能的私聊白名单(空=跟随全局 access.allowed_private_ids)。 +# en: Private chat allowlist for auto-extraction (empty = follow global access.allowed_private_ids). +auto_extract_private_ids = [] +# zh: 单条消息最多自动处理几篇 arXiv 论文。<=0 回退 5。 +# en: Max number of arXiv papers to auto-process from one message. <=0 falls back to 5. +auto_extract_max_items = 5 +# zh: 论文信息里最多展示多少位作者。<=0 回退 20。 +# en: Max number of authors shown in paper info. <=0 falls back to 20. +author_preview_limit = 20 +# zh: 论文信息里摘要预览的最大字符数。<=0 回退 1000。 +# en: Max summary preview characters in paper info. <=0 falls back to 1000. +summary_preview_chars = 1000 + # zh: Code Delivery Agent 配置(代码交付 Agent,在 Docker 容器中编写代码并打包上传)。 # en: Code Delivery Agent settings (writes code in Docker containers and delivers packaged results). [code_delivery] diff --git a/docs/configuration.md b/docs/configuration.md index 391109ea..ebb4b5be 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -536,6 +536,25 @@ model_name = "gpt-4o-mini" --- +### 4.20.1 `[arxiv]` 自动提取 + +| 字段 | 默认值 | 说明 | 约束/回退 | +|---|---:|---|---| +| `auto_extract_enabled` | `false` | 是否自动提取 arXiv 论文 | | +| `max_file_size` | `100` | 最大 PDF 体积(MB),`0` 不限 | `<0` 回退 `100` | +| `auto_extract_group_ids` | `[]` | 功能级群白名单 | 空时跟随全局 access | +| `auto_extract_private_ids` | `[]` | 功能级私聊白名单 | 空时跟随全局 access | +| `auto_extract_max_items` | `5` | 单条消息最多自动处理几篇论文 | `<=0` 回退 `5`,`>20` 截断到 `20` | +| `author_preview_limit` | `20` | 信息消息中作者预览上限 | `<=0` 回退 `20`,`>100` 截断到 `100` | +| `summary_preview_chars` | `1000` | 信息消息中摘要预览字符数上限 | `<=0` 回退 `1000`,`>8000` 截断到 `8000` | + +触发规则: +- 命中 `arxiv.org/abs/...`、`arxiv.org/pdf/...` 或 `arXiv:` 时直接触发。 +- 裸新式编号仅在消息中同时出现 `arxiv` 关键词时触发,避免误判普通数字串。 +- PDF 下载或上传失败时不会额外发送失败提示,只保留论文信息消息。 + +--- + ### 4.21 `[code_delivery]` 代码交付 Agent | 字段 | 默认值 | 说明 | 约束/回退 | diff --git a/docs/development.md b/docs/development.md index 54a1d2d0..66afd43e 100644 --- a/docs/development.md +++ b/docs/development.md @@ -11,6 +11,7 @@ Undefined 欢迎开发者参与共建和进行二次开发! ```text src/Undefined/ ├── ai/ # AI 运行时核心组件 (client, prompt, tooling 工具组装, summary 短期摘要, multimodal 多模态) +├── arxiv/ # arXiv 论文解析、元信息获取、PDF 下载与发送 ├── bilibili/ # B站视频流解析、分段下载与异步发送 ├── cognitive/ # 认知记忆系统底座 (向量存储, 史官合并/改写, 侧写生成, 任务队列) ├── skills/ # 技能插件核心目录 (存放所有的工具与智能体) diff --git a/src/Undefined/README.md b/src/Undefined/README.md index addd956a..534c11ab 100644 --- a/src/Undefined/README.md +++ b/src/Undefined/README.md @@ -4,6 +4,7 @@ 核心模块: - `ai/`:模型请求、提示词构建、工具调度、多模态与总结能力 +- `arxiv/`:arXiv 论文解析、元信息获取、PDF 下载与发送(自动触发 + AI 工具) - `bilibili/`:B 站视频解析、下载与发送(自动触发 + AI 工具) - `services/`:安全、命令、队列与协调服务 - `skills/`:工具与智能体插件系统 diff --git a/src/Undefined/arxiv/__init__.py b/src/Undefined/arxiv/__init__.py new file mode 100644 index 00000000..f557af20 --- /dev/null +++ b/src/Undefined/arxiv/__init__.py @@ -0,0 +1,19 @@ +from Undefined.arxiv.client import SearchResponse, get_paper_info, search_papers +from Undefined.arxiv.models import PaperInfo +from Undefined.arxiv.parser import ( + extract_arxiv_ids, + extract_from_json_message, + normalize_arxiv_id, +) +from Undefined.arxiv.sender import send_arxiv_paper + +__all__ = [ + "PaperInfo", + "SearchResponse", + "extract_arxiv_ids", + "extract_from_json_message", + "get_paper_info", + "normalize_arxiv_id", + "search_papers", + "send_arxiv_paper", +] diff --git a/src/Undefined/arxiv/client.py b/src/Undefined/arxiv/client.py new file mode 100644 index 00000000..bbe2940d --- /dev/null +++ b/src/Undefined/arxiv/client.py @@ -0,0 +1,183 @@ +"""arXiv 官方 API 客户端。""" + +from __future__ import annotations + +from collections.abc import Iterable +from dataclasses import dataclass +import re +from typing import cast + +from lxml import etree + +from Undefined.arxiv.models import PaperInfo +from Undefined.arxiv.parser import normalize_arxiv_id +from Undefined.skills.http_client import request_with_retry + +_ARXIV_API_ENDPOINT = "https://export.arxiv.org/api/query" +_HEADERS = { + "User-Agent": "Undefined-bot/3.x (https://github.com/69gg/Undefined)", +} +_NS = { + "atom": "http://www.w3.org/2005/Atom", + "arxiv": "http://arxiv.org/schemas/atom", + "opensearch": "http://a9.com/-/spec/opensearch/1.1/", +} +_ADVANCED_QUERY_REGEX = re.compile(r"\b(?:all|ti|au|abs|cat|jr|rn):", re.I) + + +@dataclass(frozen=True) +class SearchResponse: + items: tuple[PaperInfo, ...] + total_results: int | None + start_index: int | None + + +def _normalize_space(value: str | None) -> str: + return " ".join((value or "").split()).strip() + + +def _build_abs_url(paper_id: str) -> str: + return f"https://arxiv.org/abs/{paper_id}" + + +def _build_pdf_url(paper_id: str) -> str: + return f"https://arxiv.org/pdf/{paper_id}.pdf" + + +def _xml_text(node: etree._Element | None, xpath: str) -> str: + if node is None: + return "" + result = node.findtext(xpath, default="", namespaces=_NS) + return _normalize_space(result) + + +def _link_url(entry: etree._Element, *, rel: str, title: str | None = None) -> str: + for link in entry.findall("atom:link", namespaces=_NS): + rel_value = _normalize_space(link.get("rel")) + title_value = _normalize_space(link.get("title")) + if rel_value != rel: + continue + if title is not None and title_value != title: + continue + href = _normalize_space(link.get("href")) + if href: + return href + return "" + + +def _parse_entry(entry: etree._Element) -> PaperInfo: + entry_id = _xml_text(entry, "atom:id") + paper_id = normalize_arxiv_id(entry_id) or "" + if not paper_id: + alternate_url = _link_url(entry, rel="alternate") + paper_id = normalize_arxiv_id(alternate_url) or "" + if not paper_id: + raise ValueError("无法从 arXiv API 结果中解析论文 ID") + + authors = tuple( + _normalize_space(author.findtext("atom:name", default="", namespaces=_NS)) + for author in entry.findall("atom:author", namespaces=_NS) + if _normalize_space(author.findtext("atom:name", default="", namespaces=_NS)) + ) + abs_url = _link_url(entry, rel="alternate") or _build_abs_url(paper_id) + pdf_url = _link_url(entry, rel="related", title="pdf") or _build_pdf_url(paper_id) + primary_category = "" + primary_node = entry.find("arxiv:primary_category", namespaces=_NS) + if primary_node is not None: + primary_category = _normalize_space(primary_node.get("term")) + + return PaperInfo( + paper_id=paper_id, + title=_xml_text(entry, "atom:title"), + authors=authors, + summary=_xml_text(entry, "atom:summary"), + published=_xml_text(entry, "atom:published"), + updated=_xml_text(entry, "atom:updated"), + primary_category=primary_category, + abs_url=abs_url, + pdf_url=pdf_url, + ) + + +def _parse_feed(xml_payload: bytes) -> etree._Element: + try: + return etree.fromstring(xml_payload) + except etree.XMLSyntaxError as exc: + raise ValueError("arXiv API 返回了无法解析的 XML") from exc + + +def _find_entries(feed: etree._Element) -> Iterable[etree._Element]: + return cast(list[etree._Element], feed.findall("atom:entry", namespaces=_NS)) + + +async def get_paper_info( + paper_id: str, + *, + context: dict[str, object] | None = None, +) -> PaperInfo: + normalized = normalize_arxiv_id(paper_id) + if normalized is None: + raise ValueError(f"无法解析 arXiv 标识: {paper_id}") + + response = await request_with_retry( + "GET", + _ARXIV_API_ENDPOINT, + params={"id_list": normalized}, + headers=_HEADERS, + default_timeout=30.0, + follow_redirects=True, + context=context, + ) + feed = _parse_feed(response.content) + entries = list(_find_entries(feed)) + if not entries: + raise ValueError(f"未找到 arXiv 论文: {normalized}") + return _parse_entry(entries[0]) + + +def _build_search_query(query: str) -> str: + stripped = _normalize_space(query) + if not stripped: + raise ValueError("请提供搜索内容。") + if _ADVANCED_QUERY_REGEX.search(stripped): + return stripped + keywords = [part for part in stripped.split(" ") if part] + return " AND ".join(f"all:{keyword}" for keyword in keywords) + + +async def search_papers( + query: str, + *, + start: int = 0, + max_results: int = 5, + context: dict[str, object] | None = None, +) -> SearchResponse: + safe_start = max(0, int(start)) + safe_max_results = max(1, min(int(max_results), 20)) + search_query = _build_search_query(query) + + response = await request_with_retry( + "GET", + _ARXIV_API_ENDPOINT, + params={ + "search_query": search_query, + "start": safe_start, + "max_results": safe_max_results, + }, + headers=_HEADERS, + default_timeout=30.0, + follow_redirects=True, + context=context, + ) + feed = _parse_feed(response.content) + items = tuple(_parse_entry(entry) for entry in _find_entries(feed)) + + total_results_text = _xml_text(feed, "opensearch:totalResults") + start_index_text = _xml_text(feed, "opensearch:startIndex") + total_results = int(total_results_text) if total_results_text.isdigit() else None + start_index = int(start_index_text) if start_index_text.isdigit() else None + return SearchResponse( + items=items, + total_results=total_results, + start_index=start_index, + ) diff --git a/src/Undefined/arxiv/downloader.py b/src/Undefined/arxiv/downloader.py new file mode 100644 index 00000000..c18a3601 --- /dev/null +++ b/src/Undefined/arxiv/downloader.py @@ -0,0 +1,105 @@ +"""arXiv PDF 下载。""" + +from __future__ import annotations + +from dataclasses import dataclass +import logging +from pathlib import Path +import uuid + +from Undefined.arxiv.models import PaperInfo +from Undefined.skills.http_config import get_request_timeout +from Undefined.utils.http_download import ( + cleanup_download_dir, + download_remote_file, + probe_remote_file, +) +from Undefined.utils.paths import DOWNLOAD_CACHE_DIR, ensure_dir + +logger = logging.getLogger(__name__) + +_ARXIV_DOWNLOAD_DIR = DOWNLOAD_CACHE_DIR / "arxiv" + + +@dataclass(frozen=True) +class PaperDownloadResult: + path: Path | None + size_bytes: int | None + status: str + + +def build_download_filename(paper_id: str) -> str: + safe_paper_id = paper_id.replace("/", "_") + return f"arXiv-{safe_paper_id}.pdf" + + +async def download_paper_pdf( + paper: PaperInfo, + *, + max_file_size_mb: int, + context: dict[str, object] | None = None, +) -> tuple[PaperDownloadResult, Path]: + timeout_seconds = max(get_request_timeout(480.0), 15.0) + max_file_size_bytes = ( + max_file_size_mb * 1024 * 1024 if max_file_size_mb > 0 else 2**63 - 1 + ) + task_dir = ensure_dir(_ARXIV_DOWNLOAD_DIR / uuid.uuid4().hex) + file_path = task_dir / build_download_filename(paper.paper_id) + + final_url = paper.pdf_url + expected_size: int | None = None + + try: + probe = await probe_remote_file( + paper.pdf_url, + timeout_seconds=min(timeout_seconds, 60.0), + follow_redirects=True, + context=context, + ) + final_url = probe.final_url + expected_size = probe.content_length + if expected_size is not None and expected_size > max_file_size_bytes: + logger.info( + "[arXiv] PDF 超过限制,跳过下载: paper=%s size=%sB limit=%sMB", + paper.paper_id, + expected_size, + max_file_size_mb, + ) + return PaperDownloadResult(None, expected_size, "too_large"), task_dir + except Exception as exc: + logger.warning( + "[arXiv] PDF 预检失败,转为直接流式下载: paper=%s err=%s", + paper.paper_id, + exc, + ) + + try: + _, downloaded_size = await download_remote_file( + final_url, + file_path, + max_file_size_bytes=max_file_size_bytes, + timeout_seconds=timeout_seconds, + expected_size=expected_size, + follow_redirects=True, + ) + logger.info( + "[arXiv] PDF 下载完成: paper=%s size=%sB path=%s", + paper.paper_id, + downloaded_size, + file_path, + ) + return PaperDownloadResult(file_path, downloaded_size, "downloaded"), task_dir + except ValueError as exc: + logger.info( + "[arXiv] PDF 下载跳过: paper=%s reason=%s", + paper.paper_id, + exc, + ) + return PaperDownloadResult(None, None, "skipped"), task_dir + except Exception: + logger.exception("[arXiv] PDF 下载失败: paper=%s", paper.paper_id) + return PaperDownloadResult(None, None, "failed"), task_dir + + +async def cleanup_download_path(task_dir: Path) -> None: + await cleanup_download_dir(task_dir) diff --git a/src/Undefined/arxiv/models.py b/src/Undefined/arxiv/models.py new file mode 100644 index 00000000..232c5fcb --- /dev/null +++ b/src/Undefined/arxiv/models.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass(frozen=True) +class PaperInfo: + paper_id: str + title: str + authors: tuple[str, ...] + summary: str + published: str + updated: str + primary_category: str + abs_url: str + pdf_url: str diff --git a/src/Undefined/arxiv/parser.py b/src/Undefined/arxiv/parser.py new file mode 100644 index 00000000..ee0317d5 --- /dev/null +++ b/src/Undefined/arxiv/parser.py @@ -0,0 +1,143 @@ +"""arXiv 标识解析。""" + +from __future__ import annotations + +import html +import json +import logging +import re +from typing import Any +from urllib.parse import unquote, urlparse + +logger = logging.getLogger(__name__) + +_URL_HOSTS = {"arxiv.org", "www.arxiv.org", "export.arxiv.org"} +_URL_REGEX = re.compile(r"https?://(?:www\.|export\.)?arxiv\.org/[^\s<>()]+", re.I) +_ARXIV_PREFIX_REGEX = re.compile( + r"\barxiv\s*:\s*([A-Za-z0-9.\-\/]+(?:v\d+)?)", + re.I, +) +_NEW_ID_REGEX = re.compile(r"\b\d{4}\.\d{4,5}(?:v\d+)?\b") +_OLD_ID_REGEX = re.compile(r"\b[a-z][a-z.\-]+/\d{7}(?:v\d+)?\b", re.I) +_ARXIV_KEYWORD_REGEX = re.compile(r"\barxiv\b", re.I) + + +def _strip_wrapper_chars(value: str) -> str: + stripped = value.strip() + while stripped and stripped[-1] in ".,;:!?)>]}'\"": + stripped = stripped[:-1].rstrip() + while stripped and stripped[0] in "(<[{'\"": + stripped = stripped[1:].lstrip() + return stripped + + +def _normalize_candidate(candidate: str) -> str | None: + normalized = _strip_wrapper_chars(html.unescape(candidate).strip()) + if not normalized: + return None + if _NEW_ID_REGEX.fullmatch(normalized): + return normalized + if _OLD_ID_REGEX.fullmatch(normalized): + return normalized + return None + + +def normalize_arxiv_id(identifier: str) -> str | None: + """将 URL 或文本中的 arXiv 标识标准化。""" + raw = html.unescape(identifier).strip() + if not raw: + return None + + prefix_match = _ARXIV_PREFIX_REGEX.search(raw) + if prefix_match: + return _normalize_candidate(prefix_match.group(1)) + + parsed = urlparse(raw) + hostname = parsed.hostname.lower() if parsed.hostname else "" + if hostname in _URL_HOSTS: + path = unquote(parsed.path or "").strip() + if path.startswith("/abs/"): + return _normalize_candidate(path.removeprefix("/abs/")) + if path.startswith("/pdf/"): + candidate = path.removeprefix("/pdf/") + if candidate.lower().endswith(".pdf"): + candidate = candidate[:-4] + return _normalize_candidate(candidate) + + return _normalize_candidate(raw) + + +def _append_candidate( + candidate: str, + *, + results: list[str], + seen: set[str], +) -> None: + normalized = normalize_arxiv_id(candidate) + if normalized is None or normalized in seen: + return + seen.add(normalized) + results.append(normalized) + + +def extract_arxiv_ids(text: str) -> list[str]: + """从纯文本中提取 arXiv 标识。""" + results: list[str] = [] + seen: set[str] = set() + + for match in _URL_REGEX.finditer(text): + _append_candidate(match.group(0), results=results, seen=seen) + + for match in _ARXIV_PREFIX_REGEX.finditer(text): + _append_candidate(match.group(1), results=results, seen=seen) + + if _ARXIV_KEYWORD_REGEX.search(text): + for match in _NEW_ID_REGEX.finditer(text): + _append_candidate(match.group(0), results=results, seen=seen) + + return results + + +def _collect_json_strings(value: Any) -> list[str]: + if isinstance(value, str): + return [value] + if isinstance(value, list): + strings: list[str] = [] + for item in value: + strings.extend(_collect_json_strings(item)) + return strings + if isinstance(value, dict): + strings = [] + for item in value.values(): + strings.extend(_collect_json_strings(item)) + return strings + return [] + + +def extract_from_json_message(segments: list[dict[str, Any]]) -> list[str]: + """从 QQ JSON 消息段中提取 arXiv 标识。""" + results: list[str] = [] + seen: set[str] = set() + + for segment in segments: + if segment.get("type") != "json": + continue + + raw_data = segment.get("data", {}).get("data", "") + if not raw_data: + continue + + try: + payload = json.loads(html.unescape(raw_data)) + except (TypeError, json.JSONDecodeError): + logger.debug("[arXiv] JSON 消息解析失败,跳过", exc_info=True) + continue + + for item in _collect_json_strings(payload): + for paper_id in extract_arxiv_ids(item): + if paper_id in seen: + continue + seen.add(paper_id) + results.append(paper_id) + + return results diff --git a/src/Undefined/arxiv/sender.py b/src/Undefined/arxiv/sender.py new file mode 100644 index 00000000..214ccebb --- /dev/null +++ b/src/Undefined/arxiv/sender.py @@ -0,0 +1,244 @@ +"""arXiv 论文发送。""" + +from __future__ import annotations + +import asyncio +import logging +from typing import TYPE_CHECKING, Literal + +from Undefined.arxiv.client import get_paper_info +from Undefined.arxiv.downloader import cleanup_download_path, download_paper_pdf +from Undefined.arxiv.models import PaperInfo +from Undefined.arxiv.parser import normalize_arxiv_id + +if TYPE_CHECKING: + from Undefined.utils.sender import MessageSender + +logger = logging.getLogger(__name__) + +_INFLIGHT_LOCK = asyncio.Lock() +_INFLIGHT_SENDS: dict[tuple[str, int, str], asyncio.Future[str]] = {} + + +def _build_abs_url(paper_id: str) -> str: + return f"https://arxiv.org/abs/{paper_id}" + + +def _build_pdf_url(paper_id: str) -> str: + return f"https://arxiv.org/pdf/{paper_id}.pdf" + + +def _minimal_paper_info(paper_id: str) -> PaperInfo: + return PaperInfo( + paper_id=paper_id, + title=f"arXiv:{paper_id}", + authors=(), + summary="", + published="", + updated="", + primary_category="", + abs_url=_build_abs_url(paper_id), + pdf_url=_build_pdf_url(paper_id), + ) + + +def _preview_authors(authors: tuple[str, ...], limit: int) -> str: + if not authors: + return "" + if len(authors) <= limit: + return "、".join(authors) + return f"{'、'.join(authors[:limit])} 等{len(authors)}位作者" + + +def _preview_summary(summary: str, limit: int) -> str: + normalized = " ".join(summary.split()).strip() + if not normalized: + return "" + if len(normalized) <= limit: + return normalized + return normalized[:limit].rstrip() + "..." + + +def _display_date(info: PaperInfo) -> str: + source = info.published or info.updated + if not source: + return "" + return source[:10] + + +def _build_info_message( + info: PaperInfo, + *, + author_preview_limit: int, + summary_preview_chars: int, +) -> str: + lines: list[str] = [f"「{info.title or f'arXiv:{info.paper_id}'}」"] + + authors = _preview_authors(info.authors, author_preview_limit) + if authors: + lines.append(f"作者: {authors}") + if info.primary_category: + lines.append(f"分类: {info.primary_category}") + display_date = _display_date(info) + if display_date: + lines.append(f"日期: {display_date}") + + summary_preview = _preview_summary(info.summary, summary_preview_chars) + if summary_preview: + lines.append("---") + lines.append(summary_preview) + + lines.append("---") + lines.append(info.abs_url) + return "\n".join(lines) + + +async def _send_text_message( + sender: "MessageSender", + target_type: Literal["group", "private"], + target_id: int, + message: str, +) -> None: + if target_type == "group": + await sender.send_group_message(target_id, message, auto_history=False) + else: + await sender.send_private_message(target_id, message, auto_history=False) + + +async def _send_file_message( + sender: "MessageSender", + target_type: Literal["group", "private"], + target_id: int, + file_path: str, + file_name: str, +) -> None: + if target_type == "group": + await sender.send_group_file( + target_id, file_path, file_name, auto_history=False + ) + else: + await sender.send_private_file( + target_id, file_path, file_name, auto_history=False + ) + + +async def _send_arxiv_paper_once( + *, + paper_id: str, + sender: "MessageSender", + target_type: Literal["group", "private"], + target_id: int, + max_file_size: int, + author_preview_limit: int, + summary_preview_chars: int, + context: dict[str, object] | None = None, +) -> str: + info: PaperInfo + metadata_ready = True + try: + info = await get_paper_info(paper_id, context=context) + except Exception: + metadata_ready = False + info = _minimal_paper_info(paper_id) + logger.exception("[arXiv] 获取论文元信息失败: paper=%s", paper_id) + + info_message = _build_info_message( + info, + author_preview_limit=author_preview_limit, + summary_preview_chars=summary_preview_chars, + ) + await _send_text_message(sender, target_type, target_id, info_message) + + download_result, task_dir = await download_paper_pdf( + info, + max_file_size_mb=max_file_size, + context=context, + ) + try: + if download_result.path is None: + if metadata_ready: + return f"已发送论文信息:{info.paper_id}(未附带 PDF)" + return f"已发送论文最小信息:{info.paper_id}(未附带 PDF)" + + try: + await _send_file_message( + sender, + target_type, + target_id, + str(download_result.path.resolve()), + download_result.path.name, + ) + return f"已发送论文信息与 PDF:{info.paper_id}" + except Exception: + logger.exception( + "[arXiv] PDF 上传失败,已跳过: paper=%s target=%s:%s", + info.paper_id, + target_type, + target_id, + ) + if metadata_ready: + return f"已发送论文信息:{info.paper_id}(PDF 上传失败已跳过)" + return f"已发送论文最小信息:{info.paper_id}(PDF 上传失败已跳过)" + finally: + await cleanup_download_path(task_dir) + + +async def send_arxiv_paper( + *, + paper_id: str, + sender: "MessageSender", + target_type: Literal["group", "private"], + target_id: int, + max_file_size: int, + author_preview_limit: int, + summary_preview_chars: int, + context: dict[str, object] | None = None, +) -> str: + """发送 arXiv 论文信息并尽力附带 PDF。""" + normalized = normalize_arxiv_id(paper_id) + if normalized is None: + return f"无法解析 arXiv 标识: {paper_id}" + + key = (target_type, int(target_id), normalized) + created = False + + async with _INFLIGHT_LOCK: + future = _INFLIGHT_SENDS.get(key) + if future is None: + future = asyncio.get_running_loop().create_future() + _INFLIGHT_SENDS[key] = future + created = True + + if not created: + logger.info( + "[arXiv] 复用在途发送任务: paper=%s target=%s:%s", + normalized, + target_type, + target_id, + ) + return await asyncio.shield(future) + + try: + result = await _send_arxiv_paper_once( + paper_id=normalized, + sender=sender, + target_type=target_type, + target_id=target_id, + max_file_size=max_file_size, + author_preview_limit=author_preview_limit, + summary_preview_chars=summary_preview_chars, + context=context, + ) + except Exception as exc: + if not future.done(): + future.set_exception(exc) + raise + else: + if not future.done(): + future.set_result(result) + return result + finally: + async with _INFLIGHT_LOCK: + current = _INFLIGHT_SENDS.get(key) + if current is future: + _INFLIGHT_SENDS.pop(key, None) diff --git a/src/Undefined/config/loader.py b/src/Undefined/config/loader.py index af269d9d..8862292f 100644 --- a/src/Undefined/config/loader.py +++ b/src/Undefined/config/loader.py @@ -564,6 +564,14 @@ class Config: bilibili_oversize_strategy: str bilibili_auto_extract_group_ids: list[int] bilibili_auto_extract_private_ids: list[int] + # arXiv 论文提取 + arxiv_auto_extract_enabled: bool + arxiv_max_file_size: int + arxiv_auto_extract_group_ids: list[int] + arxiv_auto_extract_private_ids: list[int] + arxiv_auto_extract_max_items: int + arxiv_author_preview_limit: int + arxiv_summary_preview_chars: int # 认知记忆 cognitive: CognitiveConfig # Naga 集成 @@ -598,6 +606,16 @@ class Config: init=False, repr=False, ) + _arxiv_group_ids_set: set[int] = dataclass_field( + default_factory=set, + init=False, + repr=False, + ) + _arxiv_private_ids_set: set[int] = dataclass_field( + default_factory=set, + init=False, + repr=False, + ) def __post_init__(self) -> None: # 访问控制属于高频热路径,启动后缓存为 set 降低重复构建开销。 @@ -615,6 +633,12 @@ def __post_init__(self) -> None: self._bilibili_private_ids_set = { int(item) for item in self.bilibili_auto_extract_private_ids } + self._arxiv_group_ids_set = { + int(item) for item in self.arxiv_auto_extract_group_ids + } + self._arxiv_private_ids_set = { + int(item) for item in self.arxiv_auto_extract_private_ids + } @classmethod def load(cls, config_path: Optional[Path] = None, strict: bool = True) -> "Config": @@ -1154,6 +1178,43 @@ def load(cls, config_path: Optional[Path] = None, strict: bool = True) -> "Confi _get_value(data, ("bilibili", "auto_extract_private_ids"), None) ) + # arXiv 配置 + arxiv_auto_extract_enabled = _coerce_bool( + _get_value(data, ("arxiv", "auto_extract_enabled"), None), False + ) + arxiv_max_file_size = _coerce_int( + _get_value(data, ("arxiv", "max_file_size"), None), 100 + ) + if arxiv_max_file_size < 0: + arxiv_max_file_size = 100 + arxiv_auto_extract_group_ids = _coerce_int_list( + _get_value(data, ("arxiv", "auto_extract_group_ids"), None) + ) + arxiv_auto_extract_private_ids = _coerce_int_list( + _get_value(data, ("arxiv", "auto_extract_private_ids"), None) + ) + arxiv_auto_extract_max_items = _coerce_int( + _get_value(data, ("arxiv", "auto_extract_max_items"), None), 5 + ) + if arxiv_auto_extract_max_items <= 0: + arxiv_auto_extract_max_items = 5 + if arxiv_auto_extract_max_items > 20: + arxiv_auto_extract_max_items = 20 + arxiv_author_preview_limit = _coerce_int( + _get_value(data, ("arxiv", "author_preview_limit"), None), 20 + ) + if arxiv_author_preview_limit <= 0: + arxiv_author_preview_limit = 20 + if arxiv_author_preview_limit > 100: + arxiv_author_preview_limit = 100 + arxiv_summary_preview_chars = _coerce_int( + _get_value(data, ("arxiv", "summary_preview_chars"), None), 1000 + ) + if arxiv_summary_preview_chars <= 0: + arxiv_summary_preview_chars = 1000 + if arxiv_summary_preview_chars > 8000: + arxiv_summary_preview_chars = 8000 + # Code Delivery Agent 配置 code_delivery_enabled = _coerce_bool( _get_value(data, ("code_delivery", "enabled"), None), True @@ -1372,6 +1433,13 @@ def load(cls, config_path: Optional[Path] = None, strict: bool = True) -> "Confi bilibili_oversize_strategy=bilibili_oversize_strategy, bilibili_auto_extract_group_ids=bilibili_auto_extract_group_ids, bilibili_auto_extract_private_ids=bilibili_auto_extract_private_ids, + arxiv_auto_extract_enabled=arxiv_auto_extract_enabled, + arxiv_max_file_size=arxiv_max_file_size, + arxiv_auto_extract_group_ids=arxiv_auto_extract_group_ids, + arxiv_auto_extract_private_ids=arxiv_auto_extract_private_ids, + arxiv_auto_extract_max_items=arxiv_auto_extract_max_items, + arxiv_author_preview_limit=arxiv_author_preview_limit, + arxiv_summary_preview_chars=arxiv_summary_preview_chars, embedding_model=embedding_model, rerank_model=rerank_model, knowledge_enabled=knowledge_enabled, @@ -1525,6 +1593,18 @@ def is_bilibili_auto_extract_allowed_private(self, user_id: int) -> bool: # 功能白名单为空时跟随全局 access 控制 return self.is_private_allowed(user_id) + def is_arxiv_auto_extract_allowed_group(self, group_id: int) -> bool: + """群聊是否允许 arXiv 自动提取。""" + if self._arxiv_group_ids_set: + return int(group_id) in self._arxiv_group_ids_set + return self.is_group_allowed(group_id) + + def is_arxiv_auto_extract_allowed_private(self, user_id: int) -> bool: + """私聊是否允许 arXiv 自动提取。""" + if self._arxiv_private_ids_set: + return int(user_id) in self._arxiv_private_ids_set + return self.is_private_allowed(user_id) + def should_process_group_message(self, is_at_bot: bool) -> bool: """是否处理该条群消息。""" diff --git a/src/Undefined/handlers.py b/src/Undefined/handlers.py index 46beae88..97c28a56 100644 --- a/src/Undefined/handlers.py +++ b/src/Undefined/handlers.py @@ -6,7 +6,7 @@ import os from pathlib import Path import random -from typing import Any +from typing import Any, Coroutine from Undefined.ai import AIClient from Undefined.config import Config @@ -112,6 +112,8 @@ def __init__( command_dispatcher=self.command_dispatcher, ) + self._background_tasks: set[asyncio.Task[None]] = set() + # 启动队列 self.ai_coordinator.queue_manager.start(self.ai_coordinator.execute_reply) @@ -295,10 +297,24 @@ async def handle_message(self, event: dict[str, Any]) -> None: text, private_message_content ) if bvids: - asyncio.create_task( + self._spawn_background_task( + "bilibili_auto_extract_private", self._handle_bilibili_extract( private_sender_id, bvids, "private" - ) + ), + ) + return + + # arXiv 论文自动提取(私聊) + if self.config.arxiv_auto_extract_enabled: + if self.config.is_arxiv_auto_extract_allowed_private(private_sender_id): + paper_ids = self._extract_arxiv_ids(text, private_message_content) + if paper_ids: + self._spawn_background_task( + "arxiv_auto_extract_private", + self._handle_arxiv_extract( + private_sender_id, paper_ids, "private" + ), ) return @@ -461,8 +477,20 @@ async def handle_message(self, event: dict[str, Any]) -> None: if self.config.is_bilibili_auto_extract_allowed_group(group_id): bvids = await self._extract_bilibili_ids(text, message_content) if bvids: - asyncio.create_task( - self._handle_bilibili_extract(group_id, bvids, "group") + self._spawn_background_task( + "bilibili_auto_extract_group", + self._handle_bilibili_extract(group_id, bvids, "group"), + ) + return + + # arXiv 论文自动提取 + if self.config.arxiv_auto_extract_enabled: + if self.config.is_arxiv_auto_extract_allowed_group(group_id): + paper_ids = self._extract_arxiv_ids(text, message_content) + if paper_ids: + self._spawn_background_task( + "arxiv_auto_extract_group", + self._handle_arxiv_extract(group_id, paper_ids, "group"), ) return @@ -625,6 +653,29 @@ async def _extract_bilibili_ids( bvids = await extract_from_json_message(message_content) return bvids + def _extract_arxiv_ids( + self, text: str, message_content: list[dict[str, Any]] + ) -> list[str]: + """从文本和消息段中提取 arXiv 论文 ID。""" + from Undefined.arxiv.parser import extract_arxiv_ids, extract_from_json_message + + paper_ids: list[str] = [] + seen: set[str] = set() + + for paper_id in extract_arxiv_ids(text): + if paper_id in seen: + continue + seen.add(paper_id) + paper_ids.append(paper_id) + + for paper_id in extract_from_json_message(message_content): + if paper_id in seen: + continue + seen.add(paper_id) + paper_ids.append(paper_id) + + return paper_ids + async def _handle_bilibili_extract( self, target_id: int, @@ -669,8 +720,83 @@ async def _handle_bilibili_extract( except Exception: pass + async def _handle_arxiv_extract( + self, + target_id: int, + paper_ids: list[str], + target_type: str, + ) -> None: + """处理 arXiv 论文自动提取和发送。""" + from Undefined.arxiv.sender import send_arxiv_paper + + max_items = max(1, int(self.config.arxiv_auto_extract_max_items)) + + for paper_id in paper_ids[:max_items]: + try: + result = await send_arxiv_paper( + paper_id=paper_id, + sender=self.sender, + target_type=target_type, # type: ignore[arg-type] + target_id=target_id, + max_file_size=self.config.arxiv_max_file_size, + author_preview_limit=self.config.arxiv_author_preview_limit, + summary_preview_chars=self.config.arxiv_summary_preview_chars, + context={ + "request_id": ( + f"arxiv_auto_extract:{target_type}:{target_id}:{paper_id}" + ) + }, + ) + logger.info( + "[arXiv] 自动提取完成 %s → %s:%s: %s", + paper_id, + target_type, + target_id, + result, + ) + except Exception: + logger.exception( + "[arXiv] 自动提取失败 %s → %s:%s", + paper_id, + target_type, + target_id, + ) + + def _spawn_background_task( + self, + name: str, + coroutine: Coroutine[Any, Any, None], + ) -> None: + task = asyncio.create_task(coroutine, name=name) + self._background_tasks.add(task) + + def _finalize(done_task: asyncio.Task[None]) -> None: + self._background_tasks.discard(done_task) + try: + exc = done_task.exception() + except asyncio.CancelledError: + logger.debug("[后台任务] 已取消: %s", name) + return + if exc is not None: + logger.exception( + "[后台任务] 执行失败: name=%s", + name, + exc_info=(type(exc), exc, exc.__traceback__), + ) + + task.add_done_callback(_finalize) + async def close(self) -> None: """关闭消息处理器""" logger.info("正在关闭消息处理器...") + if self._background_tasks: + logger.info( + "[后台任务] 等待自动提取任务收敛: count=%s", + len(self._background_tasks), + ) + await asyncio.gather( + *list(self._background_tasks), + return_exceptions=True, + ) await self.ai_coordinator.queue_manager.stop() logger.info("消息处理器已关闭") diff --git a/src/Undefined/skills/agents/README.md b/src/Undefined/skills/agents/README.md index 14023e0c..3f136d60 100644 --- a/src/Undefined/skills/agents/README.md +++ b/src/Undefined/skills/agents/README.md @@ -274,9 +274,9 @@ mv skills/tools/my_tool skills/agents/my_agent/tools/ - **子工具**:`read_file`, `search_code`, `analyze_structure` ### info_agent(信息查询助手) -- **功能**:查询天气、热搜、历史、WHOIS、B 站信息等 -- **适用场景**:天气查询、热点榜单、域名查询、B 站视频和 UP 主信息查询 -- **子工具**:`weather_query`, `*hot`, `whois`, `bilibili_search`, `bilibili_user_info` +- **功能**:查询天气、热搜、历史、WHOIS、B 站信息、arXiv 检索等 +- **适用场景**:天气查询、热点榜单、域名查询、B 站视频和 UP 主信息查询、论文搜索 +- **子工具**:`weather_query`, `*hot`, `whois`, `bilibili_search`, `bilibili_user_info`, `arxiv_search` ### entertainment_agent(娱乐助手) - **功能**:运势、小说、创意内容与随机视频推荐等娱乐功能 diff --git a/src/Undefined/skills/agents/info_agent/README.md b/src/Undefined/skills/agents/info_agent/README.md index 1ee4c8cc..16847cce 100644 --- a/src/Undefined/skills/agents/info_agent/README.md +++ b/src/Undefined/skills/agents/info_agent/README.md @@ -1,6 +1,6 @@ # info_agent 智能体 -用于信息查询类任务,如天气、热搜、价格、B 站信息查询等。 +用于信息查询类任务,如天气、热搜、价格、B 站信息查询、arXiv 论文搜索等。 目录结构: - `config.json`:智能体定义 diff --git a/src/Undefined/skills/agents/info_agent/config.json b/src/Undefined/skills/agents/info_agent/config.json index 34c0f162..f5d4d9fc 100644 --- a/src/Undefined/skills/agents/info_agent/config.json +++ b/src/Undefined/skills/agents/info_agent/config.json @@ -2,13 +2,13 @@ "type": "function", "function": { "name": "info_agent", - "description": "信息查询助手,提供天气、热搜、新闻、历史、网络诊断、B站查询、编码工具等各类实用信息查询功能。", + "description": "信息查询助手,提供天气、热搜、新闻、历史、网络诊断、B站查询、arXiv 搜索、编码工具等各类实用信息查询功能。", "parameters": { "type": "object", "properties": { "prompt": { "type": "string", - "description": "用户的查询需求,例如:'北京天气'、'今日热搜'、'搜索 B 站原神视频'、'查询 qq123456 的等级'" + "description": "用户的查询需求,例如:'北京天气'、'今日热搜'、'搜索 B 站原神视频'、'搜索 arXiv diffusion policy 论文'、'查询 qq123456 的等级'" } }, "required": ["prompt"] diff --git a/src/Undefined/skills/agents/info_agent/intro.md b/src/Undefined/skills/agents/info_agent/intro.md index 6fc4306b..7ea8aa9a 100644 --- a/src/Undefined/skills/agents/info_agent/intro.md +++ b/src/Undefined/skills/agents/info_agent/intro.md @@ -8,6 +8,7 @@ - 热点榜单:百度/微博/抖音热搜、腾讯新闻摘要 - 文化历史:历史上的今天 - 网络工具:连通性检测、测速、Whois、编码/哈希 +- 学术检索:arXiv 论文搜索 - B 站查询:视频检索、UP 主信息 - 其他查询:QQ 等级 @@ -16,5 +17,5 @@ - **不做**通用聊天/写作/复杂研究(交给主 AI 或其他 Agent) ## 输入偏好 -- 城市、域名/IP、QQ 号、B 站关键词/UID、待编码/哈希文本等明确参数 +- 城市、域名/IP、QQ 号、B 站关键词/UID、arXiv 关键词、待编码/哈希文本等明确参数 - 若需求含糊,可先向用户追问澄清 diff --git a/src/Undefined/skills/agents/info_agent/prompt.md b/src/Undefined/skills/agents/info_agent/prompt.md index b5f9c7a3..2d218c6d 100644 --- a/src/Undefined/skills/agents/info_agent/prompt.md +++ b/src/Undefined/skills/agents/info_agent/prompt.md @@ -4,6 +4,7 @@ - 先理解用户是“要数据”还是“要解释”,必要时追问关键参数。 - 能用工具就用工具,不要凭空猜测。 - 涉及 B 站检索或用户信息时,优先调用对应的 B 站工具。 +- 涉及 arXiv 论文检索时,优先调用 `arxiv_search`;需要把具体论文发到会话时,再交给主 AI 调用 `arxiv_paper`。 - 结果尽量简洁,必要时给出下一步建议或可选筛选项。 边界提醒: diff --git a/src/Undefined/skills/agents/info_agent/tools/arxiv_search/config.json b/src/Undefined/skills/agents/info_agent/tools/arxiv_search/config.json new file mode 100644 index 00000000..505fb8c1 --- /dev/null +++ b/src/Undefined/skills/agents/info_agent/tools/arxiv_search/config.json @@ -0,0 +1,25 @@ +{ + "type": "function", + "function": { + "name": "arxiv_search", + "description": "搜索 arXiv 论文,返回标题、作者、分类、日期和 arXiv 标识。", + "parameters": { + "type": "object", + "properties": { + "msg": { + "type": "string", + "description": "搜索内容" + }, + "n": { + "type": "integer", + "description": "返回数据数量(默认 5,最大 20)" + }, + "start": { + "type": "integer", + "description": "起始偏移(默认 0)" + } + }, + "required": ["msg"] + } + } +} diff --git a/src/Undefined/skills/agents/info_agent/tools/arxiv_search/handler.py b/src/Undefined/skills/agents/info_agent/tools/arxiv_search/handler.py new file mode 100644 index 00000000..b554f33e --- /dev/null +++ b/src/Undefined/skills/agents/info_agent/tools/arxiv_search/handler.py @@ -0,0 +1,81 @@ +from __future__ import annotations + +import logging +from typing import Any + +from Undefined.arxiv.client import search_papers + +logger = logging.getLogger(__name__) + + +def _normalize_space(value: Any) -> str: + return " ".join(str(value or "").split()).strip() + + +def _to_non_negative_int(value: Any, default: int) -> int: + try: + parsed = int(value) + except (TypeError, ValueError): + return default + if parsed < 0: + return default + return parsed + + +def _preview_authors(authors: tuple[str, ...], limit: int) -> str: + if not authors: + return "" + if len(authors) <= limit: + return "、".join(authors) + return f"{'、'.join(authors[:limit])} 等{len(authors)}位作者" + + +async def execute(args: dict[str, Any], context: dict[str, Any]) -> str: + query = _normalize_space(args.get("msg")) + if not query: + return "请提供搜索内容。" + + limit = _to_non_negative_int(args.get("n", 5), 5) + limit = max(1, min(limit, 20)) + start = _to_non_negative_int(args.get("start", 0), 0) + + runtime_config = context.get("runtime_config") + author_preview_limit = 20 + if runtime_config is not None: + author_preview_limit = getattr(runtime_config, "arxiv_author_preview_limit", 20) + + try: + response = await search_papers( + query, + start=start, + max_results=limit, + context={"request_id": context.get("request_id", "-")}, + ) + except Exception as exc: + logger.exception("[arxiv_search] 搜索失败: %s", exc) + return "arXiv 搜索失败,请稍后重试" + + if not response.items: + return f"未找到与“{query}”相关的 arXiv 论文。" + + header = "🔍 arXiv 搜索结果" + if response.total_results is not None: + header += f"(total={response.total_results}" + if response.start_index is not None: + header += f", start={response.start_index}" + header += ")" + + lines = [header] + for idx, item in enumerate(response.items, start=1): + lines.append(f"{idx}. {item.title or f'arXiv:{item.paper_id}'}") + lines.append(f" ID: {item.paper_id}") + authors = _preview_authors(item.authors, author_preview_limit) + if authors: + lines.append(f" 作者: {authors}") + if item.primary_category: + lines.append(f" 分类: {item.primary_category}") + if item.published: + lines.append(f" 日期: {item.published[:10]}") + lines.append(f" 链接: {item.abs_url}") + + return "\n".join(lines) diff --git a/src/Undefined/skills/tools/arxiv_paper/README.md b/src/Undefined/skills/tools/arxiv_paper/README.md new file mode 100644 index 00000000..c7d3c653 --- /dev/null +++ b/src/Undefined/skills/tools/arxiv_paper/README.md @@ -0,0 +1,22 @@ +# arxiv_paper 工具 + +下载并发送 arXiv 论文到群聊或私聊。支持 arXiv ID、`arXiv:` 前缀和 arXiv 页面链接。 + +常用参数: +- `paper_id`:论文标识(如 `2501.01234`、`arXiv:2501.01234v2`、`https://arxiv.org/abs/2501.01234`) +- `target_type`:可选,目标会话类型(`group`/`private`) +- `target_id`:可选,目标会话 ID + +运行流程: +1. 解析 `paper_id` 为标准 arXiv 标识 +2. 调用 arXiv 官方 API 获取论文元信息 +3. 先发送标题/作者/摘要/链接信息 +4. 尝试下载并上传 PDF +5. 下载超限或 PDF 失败时仅保留信息消息 + +配置依赖: +- `config.toml` 中的 `[arxiv]` 段控制 PDF 大小上限、作者预览和摘要预览等 + +目录结构: +- `config.json`:工具定义 +- `handler.py`:执行逻辑 diff --git a/src/Undefined/skills/tools/arxiv_paper/config.json b/src/Undefined/skills/tools/arxiv_paper/config.json new file mode 100644 index 00000000..470afdba --- /dev/null +++ b/src/Undefined/skills/tools/arxiv_paper/config.json @@ -0,0 +1,26 @@ +{ + "type": "function", + "function": { + "name": "arxiv_paper", + "description": "下载并发送 arXiv 论文到群聊或私聊。支持 arXiv ID、arXiv: 前缀或 arXiv 链接。", + "parameters": { + "type": "object", + "properties": { + "paper_id": { + "type": "string", + "description": "arXiv 论文标识:如 2501.01234、arXiv:2501.01234v2 或 https://arxiv.org/abs/2501.01234" + }, + "target_type": { + "type": "string", + "enum": ["group", "private"], + "description": "可选。目标会话类型" + }, + "target_id": { + "type": "integer", + "description": "可选。目标会话 ID" + } + }, + "required": ["paper_id"] + } + } +} diff --git a/src/Undefined/skills/tools/arxiv_paper/handler.py b/src/Undefined/skills/tools/arxiv_paper/handler.py new file mode 100644 index 00000000..8a7fad5e --- /dev/null +++ b/src/Undefined/skills/tools/arxiv_paper/handler.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +import logging +from typing import Any, Literal + +from Undefined.arxiv.sender import send_arxiv_paper + +logger = logging.getLogger(__name__) + + +def _resolve_target( + args: dict[str, Any], context: dict[str, Any] +) -> tuple[tuple[Literal["group", "private"], int] | None, str | None]: + target_type_raw = args.get("target_type") + target_id_raw = args.get("target_id") + + if target_type_raw is not None and target_id_raw is not None: + target_type = str(target_type_raw).strip().lower() + if target_type not in ("group", "private"): + return None, "target_type 只能是 group 或 private" + try: + target_id = int(target_id_raw) + except (TypeError, ValueError): + return None, "target_id 必须是整数" + return (target_type, target_id), None # type: ignore[return-value] + + request_type = context.get("request_type") + if request_type == "group" and context.get("group_id"): + return ("group", int(context["group_id"])), None + if request_type == "private" and context.get("user_id"): + return ("private", int(context["user_id"])), None + + if context.get("group_id"): + return ("group", int(context["group_id"])), None + if context.get("user_id"): + return ("private", int(context["user_id"])), None + return None, "无法确定目标会话,请提供 target_type 与 target_id" + + +async def execute(args: dict[str, Any], context: dict[str, Any]) -> str: + paper_id = str(args.get("paper_id", "")).strip() + if not paper_id: + return "paper_id 不能为空" + + target, error = _resolve_target(args, context) + if error or target is None: + return f"目标解析失败: {error or '参数错误'}" + target_type, target_id = target + + sender = context.get("sender") + if sender is None: + return "缺少必要的运行时组件(sender)" + + runtime_config = context.get("runtime_config") + max_file_size = 100 + author_preview_limit = 20 + summary_preview_chars = 1000 + if runtime_config is not None: + max_file_size = getattr(runtime_config, "arxiv_max_file_size", 100) + author_preview_limit = getattr(runtime_config, "arxiv_author_preview_limit", 20) + summary_preview_chars = getattr( + runtime_config, "arxiv_summary_preview_chars", 1000 + ) + + try: + return await send_arxiv_paper( + paper_id=paper_id, + sender=sender, + target_type=target_type, + target_id=target_id, + max_file_size=max_file_size, + author_preview_limit=author_preview_limit, + summary_preview_chars=summary_preview_chars, + context={"request_id": context.get("request_id", "-")}, + ) + except Exception as exc: + logger.exception("[arxiv_paper] 执行失败: %s", exc) + return f"论文处理失败: {exc}" diff --git a/src/Undefined/skills/toolsets/messages/send_url_file/handler.py b/src/Undefined/skills/toolsets/messages/send_url_file/handler.py index 4ce13c0c..c57db73d 100644 --- a/src/Undefined/skills/toolsets/messages/send_url_file/handler.py +++ b/src/Undefined/skills/toolsets/messages/send_url_file/handler.py @@ -1,20 +1,21 @@ from __future__ import annotations -import asyncio import logging -import os -import shutil import uuid from collections.abc import Awaitable, Callable from pathlib import Path from typing import Any, Dict, Literal, cast from urllib.parse import unquote, urlparse -import aiofiles import httpx -from Undefined.skills.http_client import request_with_retry from Undefined.skills.http_config import get_request_timeout +from Undefined.utils.http_download import ( + cleanup_download_dir, + download_remote_file, + parse_content_length, + probe_remote_file, +) logger = logging.getLogger(__name__) @@ -328,14 +329,15 @@ def _resolve_content_length( ) -> tuple[int | None, str | None]: if content_length_raw is None: return None, "无法获取文件大小(缺少 Content-Length)" - try: - content_length = int(content_length_raw) + raw_value = int(content_length_raw) except (TypeError, ValueError): return None, "无法获取文件大小(Content-Length 非法)" - - if content_length <= 0: + if raw_value <= 0: return None, "无法获取文件大小(Content-Length 非正数)" + content_length = parse_content_length(content_length_raw) + if content_length is None: + return None, "无法获取文件大小(Content-Length 非法)" return content_length, None @@ -347,61 +349,19 @@ async def _download_to_local_file( max_file_size_bytes: int, timeout_seconds: float, ) -> tuple[str, int]: - part_path = target_path.with_suffix(f"{target_path.suffix}.part") - - try: - target_path.parent.mkdir(parents=True, exist_ok=True) - downloaded_size = 0 - - async with httpx.AsyncClient( - timeout=httpx.Timeout(timeout_seconds), - follow_redirects=True, - ) as client: - async with client.stream("GET", url) as response: - response.raise_for_status() - - get_size_raw = response.headers.get("content-length") - if get_size_raw is not None: - get_size, get_size_error = _resolve_content_length(get_size_raw) - if get_size_error or get_size is None: - raise ValueError(get_size_error or "下载响应大小非法") - if get_size != expected_size: - raise ValueError("文件大小与预检不一致,已取消发送") - - async with aiofiles.open(part_path, "wb") as f: - async for chunk in response.aiter_bytes( - chunk_size=DOWNLOAD_CHUNK_SIZE - ): - if not chunk: - continue - downloaded_size += len(chunk) - if downloaded_size > max_file_size_bytes: - raise ValueError("下载中发现文件超过大小限制,已取消发送") - if downloaded_size > expected_size: - raise ValueError("下载中发现文件超出预检大小,已取消发送") - await f.write(chunk) - await f.flush() - - if downloaded_size != expected_size: - raise ValueError("下载完成后大小与预检不一致,已取消发送") - - await asyncio.to_thread(os.replace, part_path, target_path) - abs_path = str(target_path.resolve()) - return abs_path, downloaded_size - finally: - if part_path.exists(): - try: - part_path.unlink() - except OSError: - pass + return await download_remote_file( + url, + target_path, + max_file_size_bytes=max_file_size_bytes, + timeout_seconds=timeout_seconds, + expected_size=expected_size, + follow_redirects=True, + chunk_size=DOWNLOAD_CHUNK_SIZE, + ) async def _cleanup_directory(path: Path) -> None: - def sync_cleanup() -> None: - if path.exists(): - shutil.rmtree(path, ignore_errors=True) - - await asyncio.to_thread(sync_cleanup) + await cleanup_download_dir(path) def _group_access_error(runtime_config: Any, group_id: int) -> str: @@ -461,10 +421,9 @@ async def execute(args: Dict[str, Any], context: Dict[str, Any]) -> str: head_timeout_seconds = min(timeout_seconds, 60.0) try: - head_response = await request_with_retry( - "HEAD", + probe = await probe_remote_file( url, - timeout=head_timeout_seconds, + timeout_seconds=head_timeout_seconds, follow_redirects=True, context=context, ) @@ -477,8 +436,11 @@ async def execute(args: Dict[str, Any], context: Dict[str, Any]) -> str: ) return "发送失败:无法获取文件大小,已取消发送" - content_length, content_length_error = _resolve_content_length( - head_response.headers.get("content-length") + content_length = probe.content_length + content_length_error = ( + None + if content_length is not None + else "无法获取文件大小(缺少 Content-Length)" ) if content_length_error or content_length is None: logger.warning( @@ -500,10 +462,10 @@ async def execute(args: Dict[str, Any], context: Dict[str, Any]) -> str: task_uuid = uuid.uuid4().hex task_dir = ensure_dir(URL_FILE_CACHE_DIR / task_uuid) - final_url = str(head_response.url) + final_url = probe.final_url filename, filename_error = _resolve_filename( args, - head_response.headers, + probe.headers, final_url, task_uuid, ) diff --git a/src/Undefined/utils/http_download.py b/src/Undefined/utils/http_download.py new file mode 100644 index 00000000..b29e20b2 --- /dev/null +++ b/src/Undefined/utils/http_download.py @@ -0,0 +1,125 @@ +from __future__ import annotations + +import asyncio +import os +import shutil +from dataclasses import dataclass +from pathlib import Path + +import aiofiles +import httpx + +from Undefined.skills.http_client import request_with_retry + +DEFAULT_DOWNLOAD_CHUNK_SIZE = 64 * 1024 + + +@dataclass(frozen=True) +class RemoteFileProbe: + final_url: str + headers: httpx.Headers + content_length: int | None + + +def parse_content_length(value: str | None) -> int | None: + if value is None: + return None + try: + parsed = int(value) + except (TypeError, ValueError): + return None + if parsed <= 0: + return None + return parsed + + +async def probe_remote_file( + url: str, + *, + timeout_seconds: float, + follow_redirects: bool = True, + context: dict[str, object] | None = None, +) -> RemoteFileProbe: + response = await request_with_retry( + "HEAD", + url, + timeout=timeout_seconds, + follow_redirects=follow_redirects, + context=context, + ) + return RemoteFileProbe( + final_url=str(response.url), + headers=response.headers, + content_length=parse_content_length(response.headers.get("content-length")), + ) + + +async def download_remote_file( + url: str, + target_path: Path, + *, + max_file_size_bytes: int, + timeout_seconds: float, + expected_size: int | None = None, + follow_redirects: bool = True, + chunk_size: int = DEFAULT_DOWNLOAD_CHUNK_SIZE, +) -> tuple[str, int]: + part_path = target_path.with_suffix(f"{target_path.suffix}.part") + + try: + target_path.parent.mkdir(parents=True, exist_ok=True) + downloaded_size = 0 + + async with httpx.AsyncClient( + timeout=httpx.Timeout(timeout_seconds), + follow_redirects=follow_redirects, + ) as client: + async with client.stream("GET", url) as response: + response.raise_for_status() + + response_size = parse_content_length( + response.headers.get("content-length") + ) + if response_size is not None and response_size > max_file_size_bytes: + raise ValueError("远程文件超过大小限制,已取消下载") + if ( + expected_size is not None + and response_size is not None + and response_size != expected_size + ): + raise ValueError("远程文件大小与预检不一致,已取消下载") + + async with aiofiles.open(part_path, "wb") as f: + async for chunk in response.aiter_bytes(chunk_size=chunk_size): + if not chunk: + continue + downloaded_size += len(chunk) + if downloaded_size > max_file_size_bytes: + raise ValueError("下载中发现文件超过大小限制,已取消下载") + if ( + expected_size is not None + and downloaded_size > expected_size + ): + raise ValueError("下载中发现文件超出预检大小,已取消下载") + await f.write(chunk) + await f.flush() + + if expected_size is not None and downloaded_size != expected_size: + raise ValueError("下载完成后大小与预检不一致,已取消下载") + + await asyncio.to_thread(os.replace, part_path, target_path) + return str(target_path.resolve()), downloaded_size + finally: + if part_path.exists(): + try: + part_path.unlink() + except OSError: + pass + + +async def cleanup_download_dir(path: Path) -> None: + def _cleanup() -> None: + if path.exists(): + shutil.rmtree(path, ignore_errors=True) + + await asyncio.to_thread(_cleanup) diff --git a/tests/test_arxiv_config.py b/tests/test_arxiv_config.py new file mode 100644 index 00000000..c470b082 --- /dev/null +++ b/tests/test_arxiv_config.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +from pathlib import Path + +from Undefined.config.loader import Config + + +def _load_config(tmp_path: Path, extra_toml: str) -> Config: + config_path = tmp_path / "config.toml" + config_path.write_text( + ( + "[core]\n" + "bot_qq = 10001\n" + "superadmin_qq = 20002\n\n" + "[onebot]\n" + 'ws_url = "ws://127.0.0.1:3001"\n\n' + f"{extra_toml}\n" + ), + encoding="utf-8", + ) + return Config.load(config_path=config_path, strict=False) + + +def test_arxiv_config_clamps_invalid_values(tmp_path: Path) -> None: + config = _load_config( + tmp_path, + ( + "[arxiv]\n" + "auto_extract_enabled = true\n" + "max_file_size = -1\n" + "auto_extract_group_ids = [123456]\n" + "auto_extract_private_ids = [20003]\n" + "auto_extract_max_items = 99\n" + "author_preview_limit = 0\n" + "summary_preview_chars = -10\n" + ), + ) + + assert config.arxiv_auto_extract_enabled is True + assert config.arxiv_max_file_size == 100 + assert config.arxiv_auto_extract_group_ids == [123456] + assert config.arxiv_auto_extract_private_ids == [20003] + assert config.arxiv_auto_extract_max_items == 20 + assert config.arxiv_author_preview_limit == 20 + assert config.arxiv_summary_preview_chars == 1000 + + +def test_arxiv_auto_extract_allowlist_follows_global_access_when_empty( + tmp_path: Path, +) -> None: + config = _load_config( + tmp_path, + ( + "[access]\n" + 'mode = "allowlist"\n' + "allowed_group_ids = [123456]\n" + "allowed_private_ids = [20003]\n\n" + "[arxiv]\n" + "auto_extract_enabled = true\n" + ), + ) + + assert config.is_arxiv_auto_extract_allowed_group(123456) is True + assert config.is_arxiv_auto_extract_allowed_group(654321) is False + assert config.is_arxiv_auto_extract_allowed_private(20003) is True + assert config.is_arxiv_auto_extract_allowed_private(30004) is False + + +def test_arxiv_auto_extract_allowlist_overrides_global_access_when_non_empty( + tmp_path: Path, +) -> None: + config = _load_config( + tmp_path, + ( + "[access]\n" + 'mode = "allowlist"\n' + "allowed_group_ids = [123456]\n" + "allowed_private_ids = [20003]\n\n" + "[arxiv]\n" + "auto_extract_enabled = true\n" + "auto_extract_group_ids = [654321]\n" + "auto_extract_private_ids = [30004]\n" + ), + ) + + assert config.is_arxiv_auto_extract_allowed_group(123456) is False + assert config.is_arxiv_auto_extract_allowed_group(654321) is True + assert config.is_arxiv_auto_extract_allowed_private(20003) is False + assert config.is_arxiv_auto_extract_allowed_private(30004) is True diff --git a/tests/test_arxiv_parser.py b/tests/test_arxiv_parser.py new file mode 100644 index 00000000..a436667d --- /dev/null +++ b/tests/test_arxiv_parser.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from Undefined.arxiv.parser import ( + extract_arxiv_ids, + extract_from_json_message, + normalize_arxiv_id, +) + + +def test_normalize_arxiv_id_accepts_abs_pdf_and_prefix() -> None: + assert normalize_arxiv_id("https://arxiv.org/abs/2501.01234v2") == "2501.01234v2" + assert normalize_arxiv_id("https://arxiv.org/pdf/2501.01234.pdf") == "2501.01234" + assert normalize_arxiv_id("arXiv:hep-th/9901001v3") == "hep-th/9901001v3" + + +def test_extract_arxiv_ids_requires_keyword_for_bare_new_style_id() -> None: + assert extract_arxiv_ids("2501.01234") == [] + assert extract_arxiv_ids("看看 arxiv 2501.01234 和 arXiv:2501.01235") == [ + "2501.01235", + "2501.01234", + ] + + +def test_extract_from_json_message_recursively_scans_strings() -> None: + segments = [ + { + "type": "json", + "data": { + "data": ( + '{"meta":{"detail_1":{"desc":"' + '论文链接 https://arxiv.org/abs/2501.01234v2"},"items":["arxiv:2501.01235"]}}' + ) + }, + } + ] + + assert extract_from_json_message(segments) == ["2501.01234v2", "2501.01235"] diff --git a/tests/test_arxiv_sender.py b/tests/test_arxiv_sender.py new file mode 100644 index 00000000..120ab0af --- /dev/null +++ b/tests/test_arxiv_sender.py @@ -0,0 +1,167 @@ +from __future__ import annotations + +import asyncio +from pathlib import Path +from types import SimpleNamespace +from typing import Any +from unittest.mock import AsyncMock + +import pytest + +from Undefined.arxiv.downloader import PaperDownloadResult +from Undefined.arxiv.models import PaperInfo +from Undefined.arxiv.sender import send_arxiv_paper +import Undefined.arxiv.sender as arxiv_sender + + +@pytest.fixture(autouse=True) +def _clear_inflight() -> None: + arxiv_sender._INFLIGHT_SENDS.clear() + + +def _paper_info() -> PaperInfo: + return PaperInfo( + paper_id="2501.01234", + title="Diffusion Policy for Robots", + authors=("Alice", "Bob", "Carol"), + summary="A concise summary of the paper.", + published="2025-01-02T03:04:05Z", + updated="2025-01-03T03:04:05Z", + primary_category="cs.RO", + abs_url="https://arxiv.org/abs/2501.01234", + pdf_url="https://arxiv.org/pdf/2501.01234.pdf", + ) + + +def _sender() -> Any: + return SimpleNamespace( + send_group_message=AsyncMock(), + send_private_message=AsyncMock(), + send_group_file=AsyncMock(), + send_private_file=AsyncMock(), + ) + + +@pytest.mark.asyncio +async def test_send_arxiv_paper_sends_info_and_pdf( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + sender = _sender() + pdf_path = tmp_path / "paper.pdf" + pdf_path.write_bytes(b"%PDF-1.4") + + monkeypatch.setattr( + arxiv_sender, "get_paper_info", AsyncMock(return_value=_paper_info()) + ) + monkeypatch.setattr( + arxiv_sender, + "download_paper_pdf", + AsyncMock( + return_value=( + PaperDownloadResult(pdf_path, pdf_path.stat().st_size, "downloaded"), + tmp_path, + ) + ), + ) + cleanup_mock = AsyncMock() + monkeypatch.setattr(arxiv_sender, "cleanup_download_path", cleanup_mock) + + result = await send_arxiv_paper( + paper_id="2501.01234", + sender=sender, + target_type="group", + target_id=123456, + max_file_size=100, + author_preview_limit=2, + summary_preview_chars=1000, + ) + + sender.send_group_message.assert_awaited_once() + sender.send_group_file.assert_awaited_once() + cleanup_mock.assert_awaited_once_with(tmp_path) + assert "PDF" in result + + +@pytest.mark.asyncio +async def test_send_arxiv_paper_skips_pdf_failure_without_extra_prompt( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + sender = _sender() + + monkeypatch.setattr( + arxiv_sender, "get_paper_info", AsyncMock(return_value=_paper_info()) + ) + monkeypatch.setattr( + arxiv_sender, + "download_paper_pdf", + AsyncMock(return_value=(PaperDownloadResult(None, None, "failed"), tmp_path)), + ) + cleanup_mock = AsyncMock() + monkeypatch.setattr(arxiv_sender, "cleanup_download_path", cleanup_mock) + + result = await send_arxiv_paper( + paper_id="2501.01234", + sender=sender, + target_type="group", + target_id=123456, + max_file_size=100, + author_preview_limit=20, + summary_preview_chars=1000, + ) + + sender.send_group_message.assert_awaited_once() + sender.send_group_file.assert_not_called() + cleanup_mock.assert_awaited_once_with(tmp_path) + assert "未附带 PDF" in result + + +@pytest.mark.asyncio +async def test_send_arxiv_paper_deduplicates_inflight_requests( + monkeypatch: pytest.MonkeyPatch, +) -> None: + sender = _sender() + started = asyncio.Event() + release = asyncio.Event() + called = 0 + + async def _fake_once(**_: object) -> str: + nonlocal called + called += 1 + started.set() + await release.wait() + return "ok" + + monkeypatch.setattr(arxiv_sender, "_send_arxiv_paper_once", _fake_once) + + first = asyncio.create_task( + send_arxiv_paper( + paper_id="2501.01234", + sender=sender, + target_type="group", + target_id=123456, + max_file_size=100, + author_preview_limit=20, + summary_preview_chars=1000, + ) + ) + await started.wait() + second = asyncio.create_task( + send_arxiv_paper( + paper_id="2501.01234", + sender=sender, + target_type="group", + target_id=123456, + max_file_size=100, + author_preview_limit=20, + summary_preview_chars=1000, + ) + ) + + release.set() + first_result, second_result = await asyncio.gather(first, second) + + assert first_result == "ok" + assert second_result == "ok" + assert called == 1 diff --git a/tests/test_arxiv_tools.py b/tests/test_arxiv_tools.py new file mode 100644 index 00000000..fbcfb04f --- /dev/null +++ b/tests/test_arxiv_tools.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +from types import SimpleNamespace + +import pytest + +from Undefined.arxiv.client import SearchResponse +from Undefined.arxiv.models import PaperInfo +from Undefined.skills.agents.info_agent.tools.arxiv_search import ( + handler as arxiv_search, +) +from Undefined.skills.tools.arxiv_paper import handler as arxiv_paper + + +@pytest.mark.asyncio +async def test_arxiv_paper_tool_uses_runtime_config( + monkeypatch: pytest.MonkeyPatch, +) -> None: + captured: dict[str, object] = {} + + async def _fake_send_arxiv_paper(**kwargs: object) -> str: + captured.update(kwargs) + return "ok" + + monkeypatch.setattr(arxiv_paper, "send_arxiv_paper", _fake_send_arxiv_paper) + + context = { + "request_type": "group", + "group_id": 123456, + "sender": object(), + "request_id": "req-1", + "runtime_config": SimpleNamespace( + arxiv_max_file_size=42, + arxiv_author_preview_limit=7, + arxiv_summary_preview_chars=2048, + ), + } + result = await arxiv_paper.execute({"paper_id": "2501.01234"}, context) + + assert result == "ok" + assert captured["paper_id"] == "2501.01234" + assert captured["target_type"] == "group" + assert captured["target_id"] == 123456 + assert captured["max_file_size"] == 42 + assert captured["author_preview_limit"] == 7 + assert captured["summary_preview_chars"] == 2048 + + +@pytest.mark.asyncio +async def test_arxiv_search_tool_formats_results( + monkeypatch: pytest.MonkeyPatch, +) -> None: + async def _fake_search_papers( + _query: str, + *, + start: int, + max_results: int, + context: dict[str, object] | None = None, + ) -> SearchResponse: + assert start == 2 + assert max_results == 3 + assert context == {"request_id": "req-2"} + return SearchResponse( + items=( + PaperInfo( + paper_id="2501.01234", + title="First Paper", + authors=("Alice", "Bob", "Carol"), + summary="", + published="2025-01-02T03:04:05Z", + updated="", + primary_category="cs.RO", + abs_url="https://arxiv.org/abs/2501.01234", + pdf_url="https://arxiv.org/pdf/2501.01234.pdf", + ), + ), + total_results=123, + start_index=2, + ) + + monkeypatch.setattr(arxiv_search, "search_papers", _fake_search_papers) + + result = await arxiv_search.execute( + {"msg": "diffusion policy", "n": 3, "start": 2}, + { + "request_id": "req-2", + "runtime_config": SimpleNamespace(arxiv_author_preview_limit=2), + }, + ) + + assert "First Paper" in result + assert "ID: 2501.01234" in result + assert "Alice、Bob 等3位作者" in result + assert "total=123" in result diff --git a/tests/test_handlers_arxiv_auto_extract.py b/tests/test_handlers_arxiv_auto_extract.py new file mode 100644 index 00000000..2beb2ada --- /dev/null +++ b/tests/test_handlers_arxiv_auto_extract.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +from types import SimpleNamespace +from typing import Any +from unittest.mock import AsyncMock, MagicMock + +import pytest + +import Undefined.handlers as handlers_module +from Undefined.handlers import MessageHandler + + +@pytest.mark.asyncio +async def test_private_message_schedules_arxiv_auto_extract( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + handlers_module, + "parse_message_content_for_history", + AsyncMock(return_value="arxiv 2501.01234"), + ) + + handler: Any = MessageHandler.__new__(MessageHandler) + handler.config = SimpleNamespace( + bot_qq=10000, + is_private_allowed=lambda _uid: True, + access_control_enabled=lambda: False, + should_process_private_message=lambda: True, + bilibili_auto_extract_enabled=False, + arxiv_auto_extract_enabled=True, + is_arxiv_auto_extract_allowed_private=lambda _uid: True, + ) + handler.onebot = SimpleNamespace( + get_stranger_info=AsyncMock(return_value={"nickname": "测试用户"}), + get_msg=AsyncMock(), + get_forward_msg=AsyncMock(), + ) + handler.history_manager = SimpleNamespace(add_private_message=AsyncMock()) + handler.ai_coordinator = SimpleNamespace( + model_pool=SimpleNamespace( + handle_private_message=AsyncMock(return_value=False) + ), + handle_private_reply=AsyncMock(), + ) + handler.command_dispatcher = SimpleNamespace(parse_command=lambda _text: None) + handler._background_tasks = set() + handler._extract_arxiv_ids = MagicMock(return_value=["2501.01234"]) + + def _fake_spawn_background_task(_name: str, coroutine: Any) -> None: + coroutine.close() + + handler._spawn_background_task = MagicMock(side_effect=_fake_spawn_background_task) + + event = { + "post_type": "message", + "message_type": "private", + "user_id": 20001, + "message_id": 30001, + "message": [{"type": "text", "data": {"text": "arxiv 2501.01234"}}], + "sender": {"user_id": 20001, "nickname": "测试用户"}, + } + + await handler.handle_message(event) + + handler._extract_arxiv_ids.assert_called_once() + handler._spawn_background_task.assert_called_once() + handler.ai_coordinator.handle_private_reply.assert_not_called() From 550084729c7ff260e1580a55c54a7fcf7c322876 Mon Sep 17 00:00:00 2001 From: Null <1708213363@qq.com> Date: Sat, 21 Mar 2026 11:28:37 +0800 Subject: [PATCH 02/25] refactor(agent): pass prompts through verbatim --- src/Undefined/skills/agents/README.md | 2 +- .../agents/code_delivery_agent/handler.py | 19 +++- .../agents/entertainment_agent/handler.py | 3 +- .../agents/file_analysis_agent/handler.py | 13 ++- .../skills/agents/info_agent/handler.py | 3 +- .../naga_code_analysis_agent/handler.py | 3 +- src/Undefined/skills/agents/runner.py | 3 + .../skills/agents/web_agent/handler.py | 3 +- tests/test_agent_prompt_passthrough.py | 94 +++++++++++++++++++ ..._code_delivery_agent_prompt_passthrough.py | 54 +++++++++++ 10 files changed, 182 insertions(+), 15 deletions(-) create mode 100644 tests/test_agent_prompt_passthrough.py create mode 100644 tests/test_code_delivery_agent_prompt_passthrough.py diff --git a/src/Undefined/skills/agents/README.md b/src/Undefined/skills/agents/README.md index 3f136d60..fbd91fb7 100644 --- a/src/Undefined/skills/agents/README.md +++ b/src/Undefined/skills/agents/README.md @@ -210,7 +210,7 @@ async def execute(args: Dict[str, Any], context: Dict[str, Any]) -> str: messages = [{"role": "system", "content": system_prompt}] if agent_history: messages.extend(agent_history) # 注入历史 - messages.append({"role": "user", "content": f"用户需求:{user_prompt}"}) + messages.append({"role": "user", "content": user_prompt}) # 3. 使用统一接口请求模型 result = await ai_client.submit_queued_llm_call( diff --git a/src/Undefined/skills/agents/code_delivery_agent/handler.py b/src/Undefined/skills/agents/code_delivery_agent/handler.py index 586ac6dd..9627be45 100644 --- a/src/Undefined/skills/agents/code_delivery_agent/handler.py +++ b/src/Undefined/skills/agents/code_delivery_agent/handler.py @@ -51,9 +51,22 @@ async def execute(args: dict[str, Any], context: dict[str, Any]) -> str: cleanup_on_finish = context["code_delivery_config"]["cleanup_on_finish"] try: - user_content = f"用户需求:{user_prompt}\n\n请开始工作。" + context_messages = [ + { + "role": "system", + "content": f"当前初始化来源:{context['init_args']['source_type']}", + } + ] + git_url = str(context["init_args"].get("git_url", "")).strip() + git_ref = str(context["init_args"].get("git_ref", "")).strip() + if git_url: + git_message = f"当前 Git 仓库:{git_url}" + if git_ref: + git_message += f" @ {git_ref}" + context_messages.append({"role": "system", "content": git_message}) result = await _run_agent_with_retry( - user_content=user_content, + user_content=user_prompt, + context_messages=context_messages, context=context, agent_dir=Path(__file__).parent, ) @@ -81,6 +94,7 @@ async def execute(args: dict[str, Any], context: dict[str, Any]) -> str: async def _run_agent_with_retry( *, user_content: str, + context_messages: list[dict[str, str]] | None, context: dict[str, Any], agent_dir: Path, ) -> str: @@ -90,6 +104,7 @@ async def _run_agent_with_retry( return await run_agent_with_tools( agent_name="code_delivery_agent", user_content=user_content, + context_messages=context_messages, empty_user_content_message="请提供任务目标描述", default_prompt="你是一个专业的代码交付助手。", context=context, diff --git a/src/Undefined/skills/agents/entertainment_agent/handler.py b/src/Undefined/skills/agents/entertainment_agent/handler.py index 94f622c9..3408af04 100644 --- a/src/Undefined/skills/agents/entertainment_agent/handler.py +++ b/src/Undefined/skills/agents/entertainment_agent/handler.py @@ -13,10 +13,9 @@ async def execute(args: dict[str, Any], context: dict[str, Any]) -> str: """执行 entertainment_agent。""" user_prompt = str(args.get("prompt", "")).strip() - user_content = f"用户需求:{user_prompt}" if user_prompt else "" return await run_agent_with_tools( agent_name="entertainment_agent", - user_content=user_content, + user_content=user_prompt, empty_user_content_message="请提供您的娱乐需求", default_prompt="你是一个娱乐助手...", context=context, diff --git a/src/Undefined/skills/agents/file_analysis_agent/handler.py b/src/Undefined/skills/agents/file_analysis_agent/handler.py index 12d4e8ac..3f8bb909 100644 --- a/src/Undefined/skills/agents/file_analysis_agent/handler.py +++ b/src/Undefined/skills/agents/file_analysis_agent/handler.py @@ -18,14 +18,19 @@ async def execute(args: dict[str, Any], context: dict[str, Any]) -> str: if not file_source: return "请提供文件 URL 或 file_id" - if user_prompt: - user_content = f"文件源:{file_source}\n\n用户需求:{user_prompt}" - else: - user_content = f"请分析这个文件:{file_source}" + context["file_source"] = file_source + context_messages = [ + { + "role": "system", + "content": f"当前任务附带文件源:{file_source}", + } + ] + user_content = user_prompt if user_prompt else "请分析这个文件。" return await run_agent_with_tools( agent_name="file_analysis_agent", user_content=user_content, + context_messages=context_messages, empty_user_content_message="请提供文件 URL 或 file_id", default_prompt="你是一个专业的文件分析助手...", context=context, diff --git a/src/Undefined/skills/agents/info_agent/handler.py b/src/Undefined/skills/agents/info_agent/handler.py index d5f3503e..910971e6 100644 --- a/src/Undefined/skills/agents/info_agent/handler.py +++ b/src/Undefined/skills/agents/info_agent/handler.py @@ -13,10 +13,9 @@ async def execute(args: dict[str, Any], context: dict[str, Any]) -> str: """执行 info_agent。""" user_prompt = str(args.get("prompt", "")).strip() - user_content = f"用户需求:{user_prompt}" if user_prompt else "" return await run_agent_with_tools( agent_name="info_agent", - user_content=user_content, + user_content=user_prompt, empty_user_content_message="请提供您的查询需求", default_prompt="你是一个信息查询助手...", context=context, diff --git a/src/Undefined/skills/agents/naga_code_analysis_agent/handler.py b/src/Undefined/skills/agents/naga_code_analysis_agent/handler.py index 7dfe8992..e9dca11f 100644 --- a/src/Undefined/skills/agents/naga_code_analysis_agent/handler.py +++ b/src/Undefined/skills/agents/naga_code_analysis_agent/handler.py @@ -13,10 +13,9 @@ async def execute(args: dict[str, Any], context: dict[str, Any]) -> str: """执行 naga_code_analysis_agent。""" user_prompt = str(args.get("prompt", "")).strip() - user_content = f"用户需求:{user_prompt}" if user_prompt else "" return await run_agent_with_tools( agent_name="naga_code_analysis_agent", - user_content=user_content, + user_content=user_prompt, empty_user_content_message="请提供您的分析需求", default_prompt="你是一个专业的代码分析助手...", context=context, diff --git a/src/Undefined/skills/agents/runner.py b/src/Undefined/skills/agents/runner.py index 274c7408..2373128d 100644 --- a/src/Undefined/skills/agents/runner.py +++ b/src/Undefined/skills/agents/runner.py @@ -27,6 +27,7 @@ async def run_agent_with_tools( *, agent_name: str, user_content: str, + context_messages: list[dict[str, str]] | None = None, empty_user_content_message: str, default_prompt: str, context: dict[str, Any], @@ -102,6 +103,8 @@ async def run_agent_with_tools( messages: list[dict[str, Any]] = [{"role": "system", "content": system_prompt}] if agent_history: messages.extend(agent_history) + if context_messages: + messages.extend(context_messages) messages.append({"role": "user", "content": user_content}) transport_state: dict[str, Any] | None = None queue_lane = context.get("queue_lane") diff --git a/src/Undefined/skills/agents/web_agent/handler.py b/src/Undefined/skills/agents/web_agent/handler.py index 6db4a47d..3595c77e 100644 --- a/src/Undefined/skills/agents/web_agent/handler.py +++ b/src/Undefined/skills/agents/web_agent/handler.py @@ -14,10 +14,9 @@ async def execute(args: dict[str, Any], context: dict[str, Any]) -> str: user_prompt = str(args.get("prompt", "")).strip() agent_dir = Path(__file__).parent - user_content = f"用户需求:{user_prompt}" if user_prompt else "" return await run_agent_with_tools( agent_name="web_agent", - user_content=user_content, + user_content=user_prompt, empty_user_content_message="请提供您的搜索需求", default_prompt="你是一个网络搜索助手...", context=context, diff --git a/tests/test_agent_prompt_passthrough.py b/tests/test_agent_prompt_passthrough.py new file mode 100644 index 00000000..182b8afc --- /dev/null +++ b/tests/test_agent_prompt_passthrough.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +from typing import Any + +import pytest + +from Undefined.skills.agents.entertainment_agent import handler as entertainment_handler +from Undefined.skills.agents.file_analysis_agent import handler as file_handler +from Undefined.skills.agents.info_agent import handler as info_handler +from Undefined.skills.agents.naga_code_analysis_agent import ( + handler as naga_code_handler, +) +from Undefined.skills.agents.web_agent import handler as web_handler + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + ("handler_module", "agent_name"), + [ + (web_handler, "web_agent"), + (info_handler, "info_agent"), + (entertainment_handler, "entertainment_agent"), + (naga_code_handler, "naga_code_analysis_agent"), + ], +) +async def test_simple_agents_pass_prompt_through_verbatim( + monkeypatch: pytest.MonkeyPatch, + handler_module: Any, + agent_name: str, +) -> None: + captured: dict[str, Any] = {} + + async def _fake_run_agent_with_tools(**kwargs: Any) -> str: + captured.update(kwargs) + return "raw answer" + + monkeypatch.setattr(handler_module, "run_agent_with_tools", _fake_run_agent_with_tools) + + result = await handler_module.execute({"prompt": " keep my original prompt "}, {}) + + assert result == "raw answer" + assert captured["agent_name"] == agent_name + assert captured["user_content"] == "keep my original prompt" + assert "context_messages" not in captured or captured["context_messages"] is None + + +@pytest.mark.asyncio +async def test_file_analysis_agent_keeps_prompt_raw_and_moves_file_to_context( + monkeypatch: pytest.MonkeyPatch, +) -> None: + captured: dict[str, Any] = {} + + async def _fake_run_agent_with_tools(**kwargs: Any) -> str: + captured.update(kwargs) + return "analysis" + + monkeypatch.setattr(file_handler, "run_agent_with_tools", _fake_run_agent_with_tools) + + context: dict[str, Any] = {} + result = await file_handler.execute( + {"file_source": "https://example.com/demo.pdf", "prompt": "提取结论"}, + context, + ) + + assert result == "analysis" + assert captured["user_content"] == "提取结论" + assert captured["context_messages"] == [ + { + "role": "system", + "content": "当前任务附带文件源:https://example.com/demo.pdf", + } + ] + assert context["file_source"] == "https://example.com/demo.pdf" + + +@pytest.mark.asyncio +async def test_file_analysis_agent_uses_generic_prompt_when_missing( + monkeypatch: pytest.MonkeyPatch, +) -> None: + captured: dict[str, Any] = {} + + async def _fake_run_agent_with_tools(**kwargs: Any) -> str: + captured.update(kwargs) + return "analysis" + + monkeypatch.setattr(file_handler, "run_agent_with_tools", _fake_run_agent_with_tools) + + result = await file_handler.execute({"file_source": "file-123"}, {}) + + assert result == "analysis" + assert captured["user_content"] == "请分析这个文件。" + assert captured["context_messages"] == [ + {"role": "system", "content": "当前任务附带文件源:file-123"} + ] diff --git a/tests/test_code_delivery_agent_prompt_passthrough.py b/tests/test_code_delivery_agent_prompt_passthrough.py new file mode 100644 index 00000000..24f491c1 --- /dev/null +++ b/tests/test_code_delivery_agent_prompt_passthrough.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +from typing import Any + +import pytest + +from Undefined.skills.agents.code_delivery_agent import handler as code_delivery_handler + + +@pytest.mark.asyncio +async def test_code_delivery_agent_keeps_prompt_raw_and_separates_context( + monkeypatch: pytest.MonkeyPatch, +) -> None: + captured: dict[str, Any] = {} + + async def _fake_run_agent_with_retry(**kwargs: Any) -> str: + captured.update(kwargs) + return "delivery" + + monkeypatch.setattr( + code_delivery_handler, + "_run_agent_with_retry", + _fake_run_agent_with_retry, + ) + + context: dict[str, Any] = {"config": None} + result = await code_delivery_handler.execute( + { + "prompt": "修复这个项目的启动脚本", + "source_type": "git", + "git_url": "https://example.com/repo.git", + "git_ref": "main", + "target_type": "group", + "target_id": 123456, + }, + context, + ) + + assert result == "delivery" + assert captured["user_content"] == "修复这个项目的启动脚本" + assert captured["context_messages"] == [ + {"role": "system", "content": "当前初始化来源:git"}, + { + "role": "system", + "content": "当前 Git 仓库:https://example.com/repo.git @ main", + }, + ] + assert context["target_type"] == "group" + assert context["target_id"] == 123456 + assert context["init_args"] == { + "source_type": "git", + "git_url": "https://example.com/repo.git", + "git_ref": "main", + } From 8660481ea03e1569521aff89369b77298e726c27 Mon Sep 17 00:00:00 2001 From: Null <1708213363@qq.com> Date: Sat, 21 Mar 2026 11:29:28 +0800 Subject: [PATCH 03/25] style(test): format agent passthrough tests --- tests/test_agent_prompt_passthrough.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/test_agent_prompt_passthrough.py b/tests/test_agent_prompt_passthrough.py index 182b8afc..bcecdbb8 100644 --- a/tests/test_agent_prompt_passthrough.py +++ b/tests/test_agent_prompt_passthrough.py @@ -34,7 +34,9 @@ async def _fake_run_agent_with_tools(**kwargs: Any) -> str: captured.update(kwargs) return "raw answer" - monkeypatch.setattr(handler_module, "run_agent_with_tools", _fake_run_agent_with_tools) + monkeypatch.setattr( + handler_module, "run_agent_with_tools", _fake_run_agent_with_tools + ) result = await handler_module.execute({"prompt": " keep my original prompt "}, {}) @@ -54,7 +56,9 @@ async def _fake_run_agent_with_tools(**kwargs: Any) -> str: captured.update(kwargs) return "analysis" - monkeypatch.setattr(file_handler, "run_agent_with_tools", _fake_run_agent_with_tools) + monkeypatch.setattr( + file_handler, "run_agent_with_tools", _fake_run_agent_with_tools + ) context: dict[str, Any] = {} result = await file_handler.execute( @@ -83,7 +87,9 @@ async def _fake_run_agent_with_tools(**kwargs: Any) -> str: captured.update(kwargs) return "analysis" - monkeypatch.setattr(file_handler, "run_agent_with_tools", _fake_run_agent_with_tools) + monkeypatch.setattr( + file_handler, "run_agent_with_tools", _fake_run_agent_with_tools + ) result = await file_handler.execute({"file_source": "file-123"}, {}) From 0affa467e7d2bedb5aa03296aed2ad06229d1c5a Mon Sep 17 00:00:00 2001 From: Null <1708213363@qq.com> Date: Sat, 21 Mar 2026 12:48:57 +0800 Subject: [PATCH 04/25] perf(cognitive): reuse query embeddings in retrieval --- config.toml.example | 4 +- docs/cognitive-memory.md | 3 + docs/configuration.md | 4 +- docs/knowledge.md | 6 +- src/Undefined/cognitive/historian.py | 30 +++++ src/Undefined/cognitive/service.py | 37 ++++- src/Undefined/cognitive/vector_store.py | 126 +++++++++++++++++- src/Undefined/config/loader.py | 10 +- src/Undefined/config/models.py | 4 +- tests/test_cognitive_historian.py | 6 + tests/test_cognitive_service.py | 76 +++++++++++ tests/test_cognitive_vector_store_metadata.py | 37 +++++ tests/test_queue_intervals.py | 26 +++- 13 files changed, 349 insertions(+), 20 deletions(-) diff --git a/config.toml.example b/config.toml.example index 972a350b..3819bc88 100644 --- a/config.toml.example +++ b/config.toml.example @@ -431,7 +431,7 @@ api_key = "" model_name = "" # zh: 队列发车间隔(秒,0 表示立即发车)。 # en: Queue interval (seconds; 0 dispatches immediately). -queue_interval_seconds = 1.0 +queue_interval_seconds = 0.0 # zh: 向量维度(可选)。0 或留空表示使用模型默认维度。 # en: Embedding dimensions (optional). Use 0/empty to use model defaults. dimensions = 0 @@ -460,7 +460,7 @@ api_key = "" model_name = "" # zh: 队列发车间隔(秒,0 表示立即发车)。 # en: Queue interval (seconds; 0 dispatches immediately). -queue_interval_seconds = 1.0 +queue_interval_seconds = 0.0 # zh: 查询端指令前缀(可选,部分 rerank 模型需要,如 "Instruct: ...\\nQuery: ")。 # en: Query instruction prefix (optional, required by some rerank models, e.g. "Instruct: ...\\nQuery: "). query_instruction = "" diff --git a/docs/cognitive-memory.md b/docs/cognitive-memory.md index 6f88369d..7e19c656 100644 --- a/docs/cognitive-memory.md +++ b/docs/cognitive-memory.md @@ -34,6 +34,7 @@ enabled = true api_url = "https://api.openai.com/v1" api_key = "sk-xxx" model_name = "text-embedding-3-small" +queue_interval_seconds = 0.0 ``` > `models.embedding` 是必要前提。未配置时,即使 `cognitive.enabled = true`,启动时也会自动降级并打印警告。 @@ -185,6 +186,7 @@ MMR_score = λ × relevance(doc, query) − (1 − λ) × max_similarity(doc, se 说明: - 该规则影响自动注入路径下的语义召回与 rerank(两者使用同一 query)。 +- 同一轮自动检索会复用同一个 query embedding;短时间内的相同 query 还会命中本地短 TTL 缓存,避免 group/private 多作用域场景重复向量化。 - 手动工具 `cognitive.search_events` / `cognitive.search_profiles` 仍使用调用方显式传入的 `query`。 ### 自动注入场景的跨会话检索与加权 @@ -332,6 +334,7 @@ data/cognitive/ | `api_url` | OpenAI 兼容 base URL | | `api_key` | API 密钥 | | `model_name` | 模型名称(推荐 `text-embedding-3-small`) | +| `queue_interval_seconds` | 发车间隔(默认 `0.0`,立即发车;`<0` 回退 `0.0`) | | `dimensions` | 向量维度(可选,模型默认值) | ### 热更新说明 diff --git a/docs/configuration.md b/docs/configuration.md index ebb4b5be..3b3a443e 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -297,7 +297,7 @@ model_name = "gpt-4o-mini" | `api_url` | `""` | 嵌入 API 地址 | | `api_key` | `""` | API Key | | `model_name` | `""` | 模型名 | -| `queue_interval_seconds` | `1.0` | 发车间隔(`0` 立即发车,`<0` 回退 `1.0`) | +| `queue_interval_seconds` | `0.0` | 发车间隔(`0` 立即发车,`<0` 回退 `0.0`) | | `dimensions` | `0` | 向量维度;`0`/空视为 `None`(模型默认) | | `query_instruction` | `""` | 查询前缀 | | `document_instruction` | `""` | 文档前缀 | @@ -310,7 +310,7 @@ model_name = "gpt-4o-mini" | `api_url` | `""` | rerank API 地址 | | `api_key` | `""` | API Key | | `model_name` | `""` | 模型名 | -| `queue_interval_seconds` | `1.0` | `0` 立即发车,`<0` 回退 `1.0` | +| `queue_interval_seconds` | `0.0` | `0` 立即发车,`<0` 回退 `0.0` | | `query_instruction` | `""` | 查询前缀 | | `request_params` | `{}` | 额外请求体参数;保留字段如 `model`/`query`/`documents`/`top_n` 会忽略 | diff --git a/docs/knowledge.md b/docs/knowledge.md index 800b5570..5b75e01e 100644 --- a/docs/knowledge.md +++ b/docs/knowledge.md @@ -32,7 +32,7 @@ knowledge/ # 项目根目录,仅存数据 api_url = "https://api.openai.com/v1" api_key = "sk-xxx" model_name = "text-embedding-3-small" -queue_interval_seconds = 1.0 # 发车间隔(秒,0 表示立即发车) +queue_interval_seconds = 0.0 # 发车间隔(秒,0 表示立即发车) dimensions = 512 # 向量维度(可选,0或不填则使用模型默认值) query_instruction = "" # 查询端指令前缀(Qwen3-Embedding 等模型需要) document_instruction = "" # 文档端指令前缀(E5 系列需要 "passage: ") @@ -41,7 +41,7 @@ document_instruction = "" # 文档端指令前缀(E5 系列需要 "pas api_url = "https://api.openai.com/v1" api_key = "sk-xxx" model_name = "text-rerank-001" -queue_interval_seconds = 1.0 # 发车间隔(秒,0 表示立即发车) +queue_interval_seconds = 0.0 # 发车间隔(秒,0 表示立即发车) query_instruction = "" # 查询端指令前缀(部分重排模型需要) [knowledge] @@ -186,7 +186,7 @@ chunk_size=4, chunk_overlap=1 → step=3 ### 站台/发车队列 -嵌入与重排请求都通过内置队列串行发送,按各自 `queue_interval_seconds` 控制发车间隔;其中 `0` 表示立即发车,负数回退到默认值。多行文本按 `embed_batch_size` 分批,每批一次 API 调用。 +嵌入与重排请求都通过内置队列串行发送,按各自 `queue_interval_seconds` 控制发车间隔;embedding/rerank 默认值均为 `0.0`,表示立即发车,负数回退到 `0.0`。多行文本按 `embed_batch_size` 分批,每批一次 API 调用。 ``` texts → split_lines → [batch 1, batch 2, ...] → Queue → API (间隔发车) diff --git a/src/Undefined/cognitive/historian.py b/src/Undefined/cognitive/historian.py index f494a5fd..fde5d24a 100644 --- a/src/Undefined/cognitive/historian.py +++ b/src/Undefined/cognitive/historian.py @@ -187,6 +187,27 @@ def __init__( self._task: asyncio.Task[None] | None = None self._inflight_tasks: set[asyncio.Task[None]] = set() + async def _prepare_query_embedding(self, query_text: str) -> list[float] | None: + embed_query = getattr(self._vector_store, "embed_query", None) + if not callable(embed_query): + return None + try: + result = await embed_query(query_text) + except Exception as exc: + logger.warning("[史官] 预生成查询向量失败,回退即时计算: error=%s", exc) + return None + if not isinstance(result, list): + logger.warning("[史官] 预生成查询向量返回值非法,回退即时计算") + return None + normalized: list[float] = [] + for item in result: + try: + normalized.append(float(item)) + except (TypeError, ValueError): + logger.warning("[史官] 预生成查询向量包含非法元素,回退即时计算") + return None + return normalized + async def start(self) -> None: logger.info("[史官] Worker 启动中") self._task = asyncio.create_task(self._poll_loop()) @@ -673,6 +694,7 @@ async def _query_user_history_events_for_profile_merge( query_text: str, entity_id: str, top_k: int, + query_embedding: list[float] | None = None, ) -> list[dict[str, Any]]: """用户历史检索兼容路径:分别按 sender_id/user_id 查询并合并去重。 @@ -680,17 +702,22 @@ async def _query_user_history_events_for_profile_merge( query sender_id/user_id separately, then merge and dedupe. """ safe_top_k = max(1, int(top_k)) + query_embedding_value = query_embedding + if query_embedding_value is None: + query_embedding_value = await self._prepare_query_embedding(query_text) sender_query = self._vector_store.query_events( query_text, top_k=safe_top_k, where={"sender_id": entity_id}, apply_mmr=True, + query_embedding=query_embedding_value, ) user_query = self._vector_store.query_events( query_text, top_k=safe_top_k, where={"user_id": entity_id}, apply_mmr=True, + query_embedding=query_embedding_value, ) sender_events_raw, user_events_raw = await asyncio.gather( sender_query, user_query @@ -748,18 +775,21 @@ async def _merge_profile_target( if isinstance(observations_raw, list) else str(observations_raw) ) + query_embedding = await self._prepare_query_embedding(observations_text) if entity_type == "group": historical_events = await self._vector_store.query_events( observations_text, top_k=8, where={"group_id": entity_id}, apply_mmr=True, + query_embedding=query_embedding, ) else: historical_events = await self._query_user_history_events_for_profile_merge( query_text=observations_text, entity_id=entity_id, top_k=8, + query_embedding=query_embedding, ) historical_lines = ( "\n".join( diff --git a/src/Undefined/cognitive/service.py b/src/Undefined/cognitive/service.py index 1c7dc3c9..c79a1aa7 100644 --- a/src/Undefined/cognitive/service.py +++ b/src/Undefined/cognitive/service.py @@ -4,6 +4,7 @@ import asyncio import logging +import time from datetime import datetime, timezone from typing import TYPE_CHECKING, Any, Callable, cast @@ -143,6 +144,27 @@ def _current_reranker(self) -> Any: return None return self._base_reranker() + async def _prepare_query_embedding(self, query: str) -> list[float] | None: + embed_query = getattr(self._vector_store, "embed_query", None) + if not callable(embed_query): + return None + try: + result = await embed_query(query) + except Exception as exc: + logger.warning("[认知服务] 预生成查询向量失败,回退即时计算: error=%s", exc) + return None + if not isinstance(result, list): + logger.warning("[认知服务] 预生成查询向量返回值非法,回退即时计算") + return None + normalized: list[float] = [] + for item in result: + try: + normalized.append(float(item)) + except (TypeError, ValueError): + logger.warning("[认知服务] 预生成查询向量包含非法元素,回退即时计算") + return None + return normalized + @property def enabled(self) -> bool: return bool(self._config_getter().enabled) @@ -247,6 +269,7 @@ async def _query_events_for_auto_context( ) if current_private_boost <= 0: current_private_boost = 1.25 + query_embedding = await self._prepare_query_embedding(query) common_kwargs: dict[str, Any] = { "reranker": self._current_reranker(), "candidate_multiplier": config.rerank_candidate_multiplier, @@ -260,6 +283,8 @@ async def _query_events_for_auto_context( ), "apply_mmr": True, } + if query_embedding is not None: + common_kwargs["query_embedding"] = query_embedding uid_values = self._uid_candidates(user_id, sender_id) if request_type == "group": @@ -269,19 +294,22 @@ async def _query_events_for_auto_context( where={"request_type": "group"}, **common_kwargs, ) + merge_started = time.perf_counter() merged = self._merge_weighted_events( [(group_events, 1.0)], top_k=safe_top_k, current_group_id=group_id, current_group_boost=current_group_boost, ) + merge_duration = time.perf_counter() - merge_started logger.info( - "[认知服务] 自动检索(群聊): group_candidates=%s merged=%s top_k=%s scope_multiplier=%s current_group_boost=%.2f", + "[认知服务] 自动检索(群聊): group_candidates=%s merged=%s top_k=%s scope_multiplier=%s current_group_boost=%.2f merge=%.3fs", len(group_events), len(merged), safe_top_k, scope_candidate_multiplier, current_group_boost, + merge_duration, ) return merged @@ -316,6 +344,7 @@ async def _query_events_for_auto_context( else: group_events = cast(list[dict[str, Any]], await group_task) private_events = [] + merge_started = time.perf_counter() merged = self._merge_weighted_events( [ (group_events, 1.0), @@ -323,8 +352,9 @@ async def _query_events_for_auto_context( ], top_k=safe_top_k, ) + merge_duration = time.perf_counter() - merge_started logger.info( - "[认知服务] 自动检索(私聊): group_candidates=%s private_candidates=%s merged=%s top_k=%s scope_multiplier=%s private_boost=%.2f uid_candidates=%s", + "[认知服务] 自动检索(私聊): group_candidates=%s private_candidates=%s merged=%s top_k=%s scope_multiplier=%s private_boost=%.2f uid_candidates=%s merge=%.3fs", len(group_events), len(private_events), len(merged), @@ -332,6 +362,7 @@ async def _query_events_for_auto_context( scope_candidate_multiplier, current_private_boost, uid_values, + merge_duration, ) return merged @@ -670,6 +701,7 @@ async def search_events(self, query: str, **kwargs: Any) -> list[dict[str, Any]] getattr(config, "time_decay_min_similarity", 0.35) ), apply_mmr=True, + query_embedding=await self._prepare_query_embedding(query), ) logger.info("[认知服务] 搜索事件完成: count=%s", len(results)) return results @@ -720,6 +752,7 @@ async def search_profiles(self, query: str, **kwargs: Any) -> list[dict[str, Any where=where, reranker=self._current_reranker(), candidate_multiplier=config.rerank_candidate_multiplier, + query_embedding=await self._prepare_query_embedding(query), ) logger.info("[认知服务] 搜索侧写完成: count=%s", len(results)) return results diff --git a/src/Undefined/cognitive/vector_store.py b/src/Undefined/cognitive/vector_store.py index f6d990ce..fadada48 100644 --- a/src/Undefined/cognitive/vector_store.py +++ b/src/Undefined/cognitive/vector_store.py @@ -4,6 +4,8 @@ import asyncio import logging +import time +from collections import OrderedDict from datetime import datetime, timezone from pathlib import Path from typing import Any @@ -15,6 +17,9 @@ logger = logging.getLogger(__name__) +_QUERY_EMBEDDING_CACHE_TTL_SECONDS = 60.0 +_QUERY_EMBEDDING_CACHE_MAX_SIZE = 256 + def _clamp(value: float, lower: float, upper: float) -> float: if value < lower: @@ -183,11 +188,17 @@ def __init__(self, path: str | Path, embedder: Any) -> None: "cognitive_profiles", metadata={"hnsw:space": "cosine"} ) self._embedder = embedder + self._query_embedding_cache: OrderedDict[ + tuple[str, str, str, str], tuple[float, list[float]] + ] = OrderedDict() + self._query_embedding_cache_lock = asyncio.Lock() logger.info( - "[认知向量库] 初始化完成: path=%s events=%s profiles=%s", + "[认知向量库] 初始化完成: path=%s events=%s profiles=%s query_cache_ttl=%ss query_cache_size=%s", str(path), getattr(self._events, "name", "cognitive_events"), getattr(self._profiles, "name", "cognitive_profiles"), + _QUERY_EMBEDDING_CACHE_TTL_SECONDS, + _QUERY_EMBEDDING_CACHE_MAX_SIZE, ) async def _embed(self, text: str) -> list[float]: @@ -200,6 +211,81 @@ async def _embed(self, text: str) -> list[float]: ) return vector + def _query_embedding_cache_key(self, query_text: str) -> tuple[str, str, str, str]: + model_config = getattr(self._embedder, "_embedding_model", None) + model_name = str( + getattr(model_config, "model_name", "") + or getattr(self._embedder, "model_name", "") + or "" + ) + dimensions = str(getattr(model_config, "dimensions", "") or "") + query_instruction = str(getattr(self._embedder, "query_instruction", "") or "") + normalized_query = str(query_text or "").strip() + return (model_name, dimensions, query_instruction, normalized_query) + + async def _get_or_create_query_embedding( + self, + query_text: str, + ) -> tuple[list[float], str]: + cache_key = self._query_embedding_cache_key(query_text) + now = time.monotonic() + async with self._query_embedding_cache_lock: + cached = self._query_embedding_cache.get(cache_key) + if cached is not None: + cached_at, cached_embedding = cached + if now - cached_at < _QUERY_EMBEDDING_CACHE_TTL_SECONDS: + self._query_embedding_cache.move_to_end(cache_key) + logger.debug( + "[认知向量库] 查询向量缓存命中: model=%s dims=%s query_len=%s", + cache_key[0], + cache_key[1] or "default", + len(cache_key[3]), + ) + return list(cached_embedding), "cache_hit" + self._query_embedding_cache.pop(cache_key, None) + + embedding = await self._embed(query_text) + now = time.monotonic() + async with self._query_embedding_cache_lock: + cached = self._query_embedding_cache.get(cache_key) + if cached is not None: + cached_at, cached_embedding = cached + if now - cached_at < _QUERY_EMBEDDING_CACHE_TTL_SECONDS: + self._query_embedding_cache.move_to_end(cache_key) + logger.debug( + "[认知向量库] 查询向量缓存并发命中: model=%s dims=%s query_len=%s", + cache_key[0], + cache_key[1] or "default", + len(cache_key[3]), + ) + return list(cached_embedding), "cache_hit" + self._query_embedding_cache.pop(cache_key, None) + + self._query_embedding_cache[cache_key] = (now, list(embedding)) + self._query_embedding_cache.move_to_end(cache_key) + while len(self._query_embedding_cache) > _QUERY_EMBEDDING_CACHE_MAX_SIZE: + self._query_embedding_cache.popitem(last=False) + logger.debug( + "[认知向量库] 查询向量缓存写入: model=%s dims=%s query_len=%s", + cache_key[0], + cache_key[1] or "default", + len(cache_key[3]), + ) + return list(embedding), "cache_miss" + + async def embed_query(self, query_text: str) -> list[float]: + embedding, _ = await self._get_or_create_query_embedding(query_text) + return embedding + + async def _resolve_query_embedding( + self, + query_text: str, + query_embedding: list[float] | None = None, + ) -> tuple[list[float], str]: + if query_embedding is not None: + return list(query_embedding), "provided" + return await self._get_or_create_query_embedding(query_text) + async def upsert_event( self, event_id: str, document: str, metadata: dict[str, Any] ) -> None: @@ -234,6 +320,7 @@ async def query_events( time_decay_boost: float = 0.2, time_decay_min_similarity: float = 0.35, apply_mmr: bool = False, + query_embedding: list[float] | None = None, ) -> list[dict[str, Any]]: logger.info( "[认知向量库] 查询事件: query_len=%s top_k=%s where=%s reranker=%s multiplier=%s decay_enabled=%s half_life_days=%s boost=%s min_sim=%s mmr=%s", @@ -260,6 +347,7 @@ async def query_events( time_decay_boost=time_decay_boost, time_decay_min_similarity=time_decay_min_similarity, apply_mmr=apply_mmr, + query_embedding=query_embedding, ) async def upsert_profile( @@ -291,6 +379,7 @@ async def query_profiles( where: dict[str, Any] | None = None, reranker: Any = None, candidate_multiplier: int = 3, + query_embedding: list[float] | None = None, ) -> list[dict[str, Any]]: logger.info( "[认知向量库] 查询侧写: query_len=%s top_k=%s where=%s reranker=%s multiplier=%s", @@ -301,7 +390,13 @@ async def query_profiles( candidate_multiplier, ) return await self._query( - self._profiles, query_text, top_k, where, reranker, candidate_multiplier + self._profiles, + query_text, + top_k, + where, + reranker, + candidate_multiplier, + query_embedding=query_embedding, ) async def _query( @@ -318,10 +413,12 @@ async def _query( time_decay_boost: float = 0.2, time_decay_min_similarity: float = 0.35, apply_mmr: bool = False, + query_embedding: list[float] | None = None, ) -> list[dict[str, Any]]: col_name = getattr(col, "name", "unknown") safe_top_k = _safe_positive_int(top_k, default=1) safe_multiplier = _safe_positive_int(candidate_multiplier, default=1) + total_started = time.perf_counter() logger.debug( "[认知向量库] 开始查询 collection=%s top_k=%s where=%s decay=%s mmr=%s", col_name, @@ -330,7 +427,12 @@ async def _query( apply_time_decay, apply_mmr, ) - emb = await self._embed(query_text) + embed_started = time.perf_counter() + emb, embedding_source = await self._resolve_query_embedding( + query_text, + query_embedding=query_embedding, + ) + embed_duration = time.perf_counter() - embed_started # 重排要求候选数 > 最终返回数,否则重排无意义 use_reranker = bool(reranker) and safe_multiplier >= 2 if reranker and safe_multiplier < 2: @@ -356,7 +458,9 @@ async def _query( def _q() -> Any: return col.query(**kwargs) + chroma_started = time.perf_counter() raw = await asyncio.to_thread(_q) + chroma_duration = time.perf_counter() - chroma_started docs: list[str] = (raw.get("documents") or [[]])[0] metas: list[dict[str, Any]] = (raw.get("metadatas") or [[]])[0] dists: list[float] = (raw.get("distances") or [[]])[0] @@ -376,6 +480,7 @@ def _q() -> Any: len(results), ) + rerank_duration = 0.0 if use_reranker and results: logger.info( "[认知向量库] 开始重排: collection=%s candidates=%s top_k=%s", @@ -384,6 +489,7 @@ def _q() -> Any: safe_top_k, ) rerank_top_n = fetch_k if (apply_time_decay or apply_mmr) else safe_top_k + rerank_started = time.perf_counter() try: reranked = await reranker.rerank( query_text, [r["document"] for r in results], top_n=rerank_top_n @@ -427,7 +533,9 @@ def _q() -> Any: safe_top_k, len(results), ) + rerank_duration = time.perf_counter() - rerank_started + post_rank_started = time.perf_counter() if apply_time_decay and results: decay_top_k = fetch_k if apply_mmr else safe_top_k final = self._apply_time_decay_ranking( @@ -450,12 +558,24 @@ def _q() -> Any: final = self._apply_mmr(final, emb, safe_top_k) for item in final: item.pop("embedding", None) + post_rank_duration = time.perf_counter() - post_rank_started + total_duration = time.perf_counter() - total_started logger.info( "[认知向量库] 返回查询结果: collection=%s final_count=%s", col_name, len(final), ) + logger.info( + "[认知向量库] 查询阶段耗时: collection=%s embed=%.3fs chroma_query=%.3fs rerank=%.3fs post_rank=%.3fs total=%.3fs embedding_source=%s", + col_name, + embed_duration, + chroma_duration, + rerank_duration, + post_rank_duration, + total_duration, + embedding_source, + ) return final def _apply_time_decay_ranking( diff --git a/src/Undefined/config/loader.py b/src/Undefined/config/loader.py index 8862292f..794127a9 100644 --- a/src/Undefined/config/loader.py +++ b/src/Undefined/config/loader.py @@ -1759,8 +1759,9 @@ def _parse_embedding_model_config(data: dict[str, Any]) -> EmbeddingModelConfig: _get_value( data, ("models", "embedding", "queue_interval_seconds"), None ), - 1.0, - ) + 0.0, + ), + 0.0, ), dimensions=_coerce_int( _get_value(data, ("models", "embedding", "dimensions"), None), 0 @@ -1781,8 +1782,9 @@ def _parse_rerank_model_config(data: dict[str, Any]) -> RerankModelConfig: queue_interval_seconds = _normalize_queue_interval( _coerce_float( _get_value(data, ("models", "rerank", "queue_interval_seconds"), None), - 1.0, - ) + 0.0, + ), + 0.0, ) return RerankModelConfig( api_url=_coerce_str( diff --git a/src/Undefined/config/models.py b/src/Undefined/config/models.py index daea4380..2b844c45 100644 --- a/src/Undefined/config/models.py +++ b/src/Undefined/config/models.py @@ -141,7 +141,7 @@ class EmbeddingModelConfig: api_url: str api_key: str model_name: str - queue_interval_seconds: float = 1.0 + queue_interval_seconds: float = 0.0 dimensions: int | None = None query_instruction: str = "" # 查询端指令前缀(如 Qwen3-Embedding 需要) document_instruction: str = "" # 文档端指令前缀(如 E5 系列需要 "passage: ") @@ -155,7 +155,7 @@ class RerankModelConfig: api_url: str api_key: str model_name: str - queue_interval_seconds: float = 1.0 + queue_interval_seconds: float = 0.0 query_instruction: str = "" # 查询端指令前缀(如部分 rerank 模型需要) request_params: dict[str, Any] = field(default_factory=dict) diff --git a/tests/test_cognitive_historian.py b/tests/test_cognitive_historian.py index 9eb1e2f7..c86841aa 100644 --- a/tests/test_cognitive_historian.py +++ b/tests/test_cognitive_historian.py @@ -116,6 +116,11 @@ async def test_merge_profile_target_user_queries_history_with_sender_or_user_id( class _FakeVectorStore: def __init__(self) -> None: self.where_calls: list[dict[str, Any]] = [] + self.embed_query_calls = 0 + + async def embed_query(self, _query: str) -> list[float]: + self.embed_query_calls += 1 + return [0.56, 0.78] async def query_events( self, _query: str, **kwargs: Any @@ -173,6 +178,7 @@ async def submit_background_llm_call(self, **kwargs: Any) -> dict[str, Any]: ) assert result is False + assert vector_store.embed_query_calls == 1 assert {"sender_id": "123456"} in vector_store.where_calls assert {"user_id": "123456"} in vector_store.where_calls diff --git a/tests/test_cognitive_service.py b/tests/test_cognitive_service.py index 49cc4162..724f2651 100644 --- a/tests/test_cognitive_service.py +++ b/tests/test_cognitive_service.py @@ -422,6 +422,82 @@ def _resolve_events(kwargs: dict[str, Any]) -> list[dict[str, Any]]: assert context.index("当前私聊上下文") < context.index("群聊公共经验") +@pytest.mark.asyncio +async def test_build_context_private_mode_reuses_single_query_embedding() -> None: + class _EmbeddingAwareVectorStore(_FakeVectorStore): + def __init__(self) -> None: + super().__init__() + self.embed_query_calls = 0 + + async def embed_query(self, _query: str) -> list[float]: + self.embed_query_calls += 1 + return [0.12, 0.34] + + vector_store = _EmbeddingAwareVectorStore() + + def _resolve_events(kwargs: dict[str, Any]) -> list[dict[str, Any]]: + where = kwargs.get("where") + if where == {"request_type": "group"}: + return [ + { + "document": "群聊公共经验", + "metadata": { + "timestamp_local": "2026-02-24 11:00:00", + "group_id": "9001", + "request_type": "group", + }, + "distance": 0.28, + } + ] + if isinstance(where, dict) and "$and" in where: + return [ + { + "document": "当前私聊上下文", + "metadata": { + "timestamp_local": "2026-02-24 12:00:00", + "group_id": "", + "request_type": "private", + "user_id": "u1", + "sender_id": "u2", + }, + "distance": 0.40, + } + ] + return [] + + vector_store.event_resolver = _resolve_events + service = CognitiveService( + config_getter=lambda: SimpleNamespace( + enabled=True, + enable_rerank=False, + auto_top_k=2, + auto_scope_candidate_multiplier=2, + auto_current_private_boost=1.25, + rerank_candidate_multiplier=3, + time_decay_enabled=True, + time_decay_half_life_days_auto=14.0, + time_decay_boost=0.2, + time_decay_min_similarity=0.35, + ), + vector_store=vector_store, + job_queue=_FakeJobQueue(), + profile_storage=_FakeProfileStorage(), + reranker=None, + ) + + await service.build_context( + query="我这个报错之前怎么处理过", + user_id="u1", + sender_id="u2", + request_type="private", + ) + + assert vector_store.embed_query_calls == 1 + assert len(vector_store.event_calls) == 2 + assert vector_store.event_calls[0].get("query_embedding") == [0.12, 0.34] + assert vector_store.event_calls[1].get("query_embedding") == [0.12, 0.34] + + def test_merge_weighted_events_preserves_scope_rank_order() -> None: # scoped_events 已经是 query_events 的最终排序(含 time_decay/mmr/rerank), # merge 过程不应再按 base_score 重新洗牌。 diff --git a/tests/test_cognitive_vector_store_metadata.py b/tests/test_cognitive_vector_store_metadata.py index cb7e00b6..c6c12c5a 100644 --- a/tests/test_cognitive_vector_store_metadata.py +++ b/tests/test_cognitive_vector_store_metadata.py @@ -1,6 +1,13 @@ from __future__ import annotations +import asyncio +from collections import OrderedDict +from types import SimpleNamespace + +import pytest + from Undefined.cognitive.vector_store import _sanitize_metadata +from Undefined.cognitive.vector_store import CognitiveVectorStore def test_sanitize_metadata_drops_empty_message_ids_list() -> None: @@ -27,3 +34,33 @@ def test_sanitize_metadata_keeps_non_empty_message_ids_list() -> None: assert result["user_id"] == "42" assert result["message_ids"] == ["10001", 10002] + + +@pytest.mark.asyncio +async def test_embed_query_cache_reuses_recent_embedding() -> None: + class _FakeEmbedder: + query_instruction = "query: " + + def __init__(self) -> None: + self.calls = 0 + self._embedding_model = SimpleNamespace( + model_name="text-embedding-test", + dimensions=3, + ) + + async def embed(self, texts: list[str]) -> list[list[float]]: + self.calls += 1 + _ = texts + return [[0.11, 0.22, 0.33]] + + store = CognitiveVectorStore.__new__(CognitiveVectorStore) + store._embedder = _FakeEmbedder() + store._query_embedding_cache = OrderedDict() + store._query_embedding_cache_lock = asyncio.Lock() + + first = await store.embed_query(" hello world ") + second = await store.embed_query("hello world") + + assert first == [0.11, 0.22, 0.33] + assert second == [0.11, 0.22, 0.33] + assert store._embedder.calls == 1 diff --git a/tests/test_queue_intervals.py b/tests/test_queue_intervals.py index 4f50f652..fe34b1c7 100644 --- a/tests/test_queue_intervals.py +++ b/tests/test_queue_intervals.py @@ -154,8 +154,30 @@ def test_negative_queue_intervals_still_fall_back_to_defaults(tmp_path: Path) -> assert cfg.vision_model.queue_interval_seconds == 1.0 assert cfg.agent_model.queue_interval_seconds == 0.5 assert cfg.historian_model.queue_interval_seconds == 0.5 - assert cfg.embedding_model.queue_interval_seconds == 1.0 - assert cfg.rerank_model.queue_interval_seconds == 1.0 + assert cfg.embedding_model.queue_interval_seconds == 0.0 + assert cfg.rerank_model.queue_interval_seconds == 0.0 + + +def test_embedding_and_rerank_default_to_immediate_dispatch_when_unset( + tmp_path: Path, +) -> None: + cfg = _load_config( + tmp_path / "config.toml", + """ +[models.embedding] +api_url = "https://api.openai.com/v1" +api_key = "sk-embed" +model_name = "text-embedding-3-small" + +[models.rerank] +api_url = "https://api.openai.com/v1" +api_key = "sk-rerank" +model_name = "text-rerank-001" +""", + ) + + assert cfg.embedding_model.queue_interval_seconds == 0.0 + assert cfg.rerank_model.queue_interval_seconds == 0.0 def test_queue_manager_allows_zero_default_interval() -> None: From 919ec06a7d5703a8cc0862431c8d414d720d0c62 Mon Sep 17 00:00:00 2001 From: Null <1708213363@qq.com> Date: Sat, 21 Mar 2026 14:33:09 +0800 Subject: [PATCH 05/25] feat(changelog): add runtime changelog command and query tool --- CHANGELOG.md | 647 ++++++++++++++++++ README.md | 2 + docs/development.md | 7 + docs/slash-commands.md | 34 + docs/usage.md | 14 +- pyproject.toml | 4 + src/Undefined/changelog.py | 209 ++++++ .../skills/commands/changelog/README.md | 20 + .../skills/commands/changelog/config.json | 16 + .../skills/commands/changelog/handler.py | 78 +++ .../skills/tools/changelog_query/README.md | 21 + .../skills/tools/changelog_query/config.json | 39 ++ .../skills/tools/changelog_query/handler.py | 149 ++++ tests/test_changelog.py | 121 ++++ tests/test_changelog_command.py | 144 ++++ tests/test_changelog_tool.py | 108 +++ 16 files changed, 1611 insertions(+), 2 deletions(-) create mode 100644 CHANGELOG.md create mode 100644 src/Undefined/changelog.py create mode 100644 src/Undefined/skills/commands/changelog/README.md create mode 100644 src/Undefined/skills/commands/changelog/config.json create mode 100644 src/Undefined/skills/commands/changelog/handler.py create mode 100644 src/Undefined/skills/tools/changelog_query/README.md create mode 100644 src/Undefined/skills/tools/changelog_query/config.json create mode 100644 src/Undefined/skills/tools/changelog_query/handler.py create mode 100644 tests/test_changelog.py create mode 100644 tests/test_changelog_command.py create mode 100644 tests/test_changelog_tool.py diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..7fd07d20 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,647 @@ +## v3.2.6 Responses 重试与私聊发送修复 + +优化了消息投递系统的可靠性,涵盖 Responses 回放、队列重试及私聊发送链路。主要改进包括发送回退机制、零间隔调度支持,以及 Naga 投递追踪与运行时测试的完善。 + +- 过滤 replay-only 状态字段,防止 Responses 回放结果干扰后续请求。 +- 调整队列重试调度逻辑,支持零间隔立即投递,并细化重试时序。 +- 增加私聊消息发送失败时的临时会话回退机制,降低消息丢失率。 +- 限制 `/lsadmin` 命令的可见性,提升安全性。 +- 为 Naga 增加发送 UUID 幂等性校验、投递追踪及相关测试覆盖。 +- 修复 CI 与运行时测试,并补充 OpenAI reasoning 参数的对齐处理。 + +--- + +## v3.2.5 形象资源与调用暴露整理 + +调整了项目形象资源与部分调用权限配置。优化了外部形象资源的引用方式,并清理了 info_agent 的暴露行为,保持现有功能入口不变。 + +- 更新 README 中的项目形象资源与展示素材。 +- 移除不再需要的 info_agent callable override 配置。 +- 保持既有功能入口的稳定性,减少对使用方式的变更。 + +--- + +## v3.2.4 Naga 回调端口与联动修复 + +适配新版 NagaAgent 接口变化,校准了回调端口与联动流程。重点优化了回调地址解析、端口处理及绑定后的投递逻辑,增强了异常处理与测试覆盖。 + +- 适配新版 NagaAgent 的回调端口配置与请求路径。 +- 修复联动模式下部分绑定状态与回调逻辑不一致的问题。 +- 增强异常处理机制,减少联动过程中的静默失败。 +- 补充 Naga 回调流程相关的测试用例。 + +--- + +## v3.2.3 配置同步与推理参数重构 + +统一了配置同步体验与 AI 推理参数管理,优化了管理端与运行时的读取逻辑。同步更新了 Naga 子模块版本,增强了模板对齐与增量同步功能。 + +- 增强配置同步脚本,支持更精准的模板对齐与增量同步。 +- 重构 AI 推理相关配置项,统一参数表达与读取方式。 +- 更新 NagaAgent 子模块版本,保持联动能力一致性。 +- 规范配置文档与默认值,降低配置迁移成本。 + +--- + +## v3.2.2 IPv6 监听与代理 URL 修复 + +修复了 Runtime API 与 WebUI 在 IPv6 双栈环境下的地址构造问题。统一了 URL 构造逻辑,增强了监听地址、回环地址及代理 URL 在复杂网络环境中的兼容性与稳定性。 + +- 修复 Runtime API 代理 URL 在 IPv6 监听地址下的解析错误。 +- 统一 URL 构造逻辑,防止多入口拼接导致的地址异常。 +- 增强 IPv4/IPv6 双栈监听的兼容性。 +- 提升管理端在复杂网络环境中的连接稳定性。 + +--- + +## v3.2.1 Tool Invoke API 与 Naga Scoped Token + +扩展了 Runtime API 的外部调用能力,新增 Tool Invoke API 并引入 Naga Scoped Token 鉴权机制。明确了接口权限边界,完善了 API 路由与错误处理。 + +- 新增 Tool Invoke API,支持通过运行时接口直接触发工具调用。 +- 引入 Naga Scoped Token 鉴权机制,细化联动权限控制。 +- 完善 API 路由定义、鉴权校验及错误返回格式。 +- 为管理端及外部集成提供更稳定的调用接口。 + +--- + +## v3.2.0 多平台客户端与 Responses 支持增强 + +新增多平台客户端发布链路(Windows/macOS/Linux/Android),并增强了 WebUI 管理体验。运行时新增引用回复支持,优化 Python 解释器与 Responses API 接入,提升了跨平台管理与交互能力。 + +- 建立覆盖 Windows、macOS、Linux 与 Android 的客户端发布流程。 +- 优化 WebUI 界面交互与管理流程,提升远程管理体验。 +- 支持消息引用回复、Python 解释器功能增强及 Responses API 接入。 +- 修复 macOS 与 Android 构建兼容性问题,优化 CI 缓存策略。 +- 统一移动端与桌面端的管理面运行基础。 + +--- + +## v3.1.1 记忆联动与后台调度增强 + +增强了记忆架构的实用性与稳定性。支持跨群记忆联动,优化了后台史官调度(改为轮询周期)及图片分析链路,提升了记忆自动检索的上下文质量。 + +- 增加记忆自动检索加权配置,优化上下文注入效果。 +- 支持跨群记忆联动,扩展认知记忆的共享范围。 +- 将 Historian Job 调整为轮询周期调度,缓解后台任务拥塞。 +- 优化图片分析与 B 站下载链路,减少对主流程的干扰。 +- 提升后台史官任务的稳定性与故障恢复能力。 + +--- + +## v3.1.0 后台史官 Agent Loop + +实现了后台史官的 Agentic Loop,支持持续的记忆整理与长期状态更新。新增侧写维护工具 (read/update profile),优化了记忆改写与回写流程,减少对前台对话的阻塞。 + +- 实现后台史官 Agentic Loop,支持持续处理记忆任务。 +- 新增 read/update profile 工具,完善认知侧写维护链路。 +- 优化后台记忆改写与回写流程,降低前台响应延迟。 +- 为长期记忆与用户画像的持续演进提供基础支持。 + +--- + +## v3.0.3 本地文件 URI 统一 + +统一本地文件发送格式为 `file://` URI,消除了不同入口对文件路径解析的差异,提升了图片与文件消息发送的一致性。 + +- 统一本地文件发送使用 `file://` URI 协议。 +- 消除不同消息入口对文件路径解释的差异。 +- 提升图片和文件消息发送链路的一致性。 + +--- + +## v3.0.2 图片发送链路修复 + +修复了图片发送失败的问题,调整了消息输出链路中的兼容性处理,降低了多模态结果回传的失败率。 + +- 修复图片无法发送的问题。 +- 调整图片消息输出链路中的兼容处理。 +- 降低多模态结果回传时的失败率。 + +--- + +## v3.0.1 WebUI 会话上下文补全 + +补全了 WebUI 会话中的 `request_type` 上下文,改善了 WebUI AI Chat 与运行时上下文的对齐程度,解决了工具调用时的上下文缺失问题。 + +- 为 WebUI 会话 `extra_context` 补充 `request_type` 字段。 +- 改善 WebUI AI Chat 与运行时上下文的对齐程度。 +- 修复管理端对话在工具调用时的上下文缺失问题。 + +--- + +## v3.0.0 新版认知记忆架构与 WebUI 大改进 + +重构了认知记忆架构与 WebUI 管理体验。推出了基于事件、侧写和长期记忆的新版架构,重构了后台史官流程,并显著增强了 WebUI 的可用性与运行态展示。 + +- 推出新版认知记忆架构,优化事件、侧写和长期记忆的组织方式。 +- 重构后台史官相关流程,建立 Agentic Loop 基础。 +- 改进 WebUI 管理体验和运行态展示,增强管理端可用性。 +- 优化记忆注入、检索与改写链路,减少对前台响应的阻塞。 +- 为跨群记忆联动和认知查询能力提供底层支持。 + +--- + +## v2.15.0 知识库与访问控制落地 + +正式引入知识库功能,集成了文本检索、语义检索与重排链路。实现了基于模式的访问控制(黑白名单),并强化了防重复执行机制与斜杠命令识别。 + +- 新增基于文本检索、语义检索和重排的知识库功能。 +- 引入模式化访问控制,支持群聊与私聊黑白名单。 +- 修复带昵称 `@` 片段导致的斜杠命令识别失败问题。 +- 强化防多次执行提示词,降低重复调用工具的概率。 +- 为认知记忆和知识检索联动提供底层能力。 + +--- + +## v2.14.0 工具扩充与重复执行防护 + +扩充了实用工具集,并优化了防重复执行的提示词与决策逻辑。移除了旧版 inflight summary 方案,提升了多工具协作的稳定性与可控性。 + +- 新增更多实用工具,扩展日常查询和执行能力。 +- 重构防重复执行相关提示词与决策逻辑。 +- 移除旧的 inflight summary 方案,简化中间态处理。 +- 优化多工具协作时的稳定性和可控性。 + +--- + +## v2.13.1 私聊多模型池 + +在私聊场景引入多模型池支持,实现了更灵活的模型选择。同时修复了代码交付 Agent 的稳定性问题,优化了上下文切换体验。 + +- 为私聊场景增加多模型池支持和智能模型选择能力。 +- 修复代码交付 Agent 的多个稳定性问题。 +- 改善多模型切换时的上下文和调用体验。 +- 收敛 Agent 协作中的边界处理。 + +--- + +## v2.13.0 并发防重与工具增强 + +引入并发防重复执行机制,解决了并发场景下的工具重复触发问题。升级了时间工具、Bilibili 工具及文本文件投递能力,并收紧了 Agent 互调用权限。 + +- 新增并发防重复执行机制,降低进行中摘要和工具重复触发。 +- 升级 `get_current_time`,增加农历等信息支持。 +- 切换 Bilibili 工具到官方接口并增加 WBI 重试。 +- 新增 `send_text_file` 工具,增强文本文件投递能力。 +- 收紧主工具白名单与 Agent 互调用权限控制。 +- 增强统计功能并修复多项稳定性问题。 + +--- + +## v2.12.11 代码交付 Agent 与互调用 + +正式发布代码交付 Agent,支持代码生成、验证与交付。新增 Agent 互调用能力,支持多 Agent 协作完成复杂任务。 + +- 新增代码交付 Agent,支持生成、验证并交付代码结果。 +- 增加 Agent 互调用能力,支持更复杂的多 Agent 协作。 +- 调整相关提示词与执行链路,提升任务拆分能力。 +- 为编程、交付和复合任务场景提供新的执行入口。 + +--- + +## v2.12.10 工具系统稳定性修复 + +修复了工具调用体系中的兼容性与稳定性问题。重点优化了工具注册、解析和执行链路,确保工具调用的可靠性。 + +- 修复工具系统中的多个兼容性与稳定性问题。 +- 调整工具注册和调用链路,减少异常失败。 +- 改善工具错误处理与输出一致性。 +- 为 Agent 协作与复杂工具链提供稳定基础。 + +--- + +## v2.12.9 Agent 架构与多 Agent 稳定性增强 + +重构了 Agent 架构,梳理了主 AI 与子 Agent 的协作边界。新增好友与群聊查询能力,并修复了多项稳定性问题,提升了多 Agent 协作成功率。 + +- 重构 Agent 架构,明确主 AI 与子 Agent 的协作边界。 +- 新增好友与群聊查询相关能力,扩展联系人可见范围。 +- 修复多项稳定性问题,提升多 Agent 协作成功率。 +- 优化 `end_summary` 和记忆相关逻辑,提升长期上下文质量。 +- 补强部分工具链路的异常处理和容错能力。 + +--- + +## v2.12.8 @ 处理与群成员检索增强 + +优化了群聊中的 @ 处理逻辑与群成员检索功能。增强了复杂消息中的 @ 识别准确性,支持按昵称查找群成员。 + +- 增强 @ 相关处理逻辑,减少复杂消息中的识别偏差。 +- 新增群成员检索工具,支持按昵称查找群成员。 +- 提升消息链路与工具调用之间的衔接稳定性。 +- 为群管理分析和成员操作能力提供支持。 + +--- + +## v2.12.7 已知问题快速修补 + +修复了若干关键 Bug,重点解决了部分异常输入导致的失败问题,确保了前一版本新增功能的稳定性。 + +- 修复三个关键 Bug,降低常见故障触发率。 +- 调整相关边界处理,减少异常输入导致的失败。 +- 巩固前一版本新增功能的实际可用性。 + +--- + +## v2.12.6 Bilibili 视频解析发送 + +新增 Bilibili 视频解析、下载与发送功能,并补充了群管理分析工具集。打通了从视频内容识别到发送的完整链路。 + +- 新增 Bilibili 视频解析、下载与发送功能。 +- 增加群管理分析工具集,补充群聊观察能力。 +- 打通视频内容从识别到发送的完整链路。 +- 扩展多媒体工具和群分析工具的能力边界。 + +--- + +## v2.12.5 Anthropic Skills 与 Naga 工具更新 + +集成 Anthropic Skills 支持,允许以 `SKILL.md` 形式注入能力。更新了 NagaAgent 子模块及相关工具,增强了外部知识注入与联动的一致性。 + +- 新增 Anthropic Skills 支持,兼容 `SKILL.md` 形式的能力注入。 +- 更新 NagaAgent 子模块及相关工具,改善联动一致性。 +- 整理 AGENTS 文档和子模块说明,减少维护歧义。 +- 优化提示词与对接流程,提升多来源能力接入体验。 + +--- + +## v2.12.4 提示词回退与拒答修复 + +回退主提示词至更稳定的版本,降低了降智、拒绝回复及不稳定输出的概率。修复了提示词标签缺失问题,提供了更精简的 NagaAgent 提示词变体。 + +- 回退主提示词到更稳定的旧版本思路,降低异常行为概率。 +- 基于回退方案提供更精简的 NagaAgent 提示词变体。 +- 修复提示词中缺失标签的问题,避免格式错误影响模型理解。 +- 缓解因提示词变形引发的降智、拒绝回复和不稳定输出问题。 + +--- + +## v2.12.3 消息目标自动推断 + +优化了消息发送的目标推断逻辑,减少了手动填写目标参数的需求。修复了仅提供目标类型时 `send_message` 的失败问题,提升了群聊与私聊的目标解析准确性。 + +- 增加消息发送目标推断逻辑,简化参数填写。 +- 修复仅提供目标类型时 `send_message` 可能直接失败的问题。 +- 调整消息工具 Handler,提升群聊和私聊的目标解析稳定性。 +- 同步更新版本号与依赖锁文件。 + +--- + +## v2.12.2 WebUI 安全与自更新增强 + +增强了 WebUI 的安全性与自更新能力。引入强制初始密码修改流程与基于 Git 的自更新机制,改善了长期运行实例的维护体验。 + +- 增加强制修改初始密码流程,降低默认凭据风险。 +- 引入基于 Git 的自更新能力,改善维护与升级体验。 +- 改进 WebUI 路由、模板和前端交互,补齐安全提示与引导。 +- 调整主进程启动和配置读取细节,配合更新流程落地。 +- 重构 `self_update` 相关实现,为后续维护操作提供基础。 + +--- + +## v2.12.1 NagaAgent 模式开关 + +引入 NagaAgent 模式开关,支持显式控制联动能力的启用。优化了相关提示词资源与工具装配逻辑,使模式切换更加清晰直观。 + +- 添加 NagaAgent 模式开关,显式控制联动能力启用状态。 +- 新增或拆分 NagaAgent 专用提示词资源,减少与主提示词耦合。 +- 调整 AI 客户端与工具装配逻辑,使模式切换更清晰。 +- 更新配置示例和 WebUI 读取逻辑,确保管理端可配置该能力。 +- 优化联动模式下的默认 Prompt 表达。 + +--- + +## v2.12.0 配置增强与命令系统重构 + +升级了配置系统与命令体系。增强了运行参数配置与热重载支持,重构了命令注册表以支持插件化。同时调整了核心组件实现以适配新架构。 + +- 增强配置系统,补充更多运行参数,优化默认体验。 +- 新增配置热重载支持,使配置改动在运行中即时生效。 +- 重构命令系统与注册表,推动斜杠命令插件化。 +- 调整 AI、队列、上下文和安全服务的实现,配合新架构升级。 +- 更新 NagaAgent 子模块,为后续联动能力提供支持。 +- 修复关闭思考时仍发送思考参数的问题,并补充启动模式说明。 + +--- + +## v2.11.2 严格供应商兼容修复 + +优化了工具命名与兼容逻辑,适配了对函数调用约束更严格的供应商。调整了 AI 客户端与 Agent Handler,降低了因命名或适配差异导致的调用失败率。 + +- 重写工具名称生成与兼容逻辑,适配严格的函数调用约束。 +- 修复部分供应商因工具名不合法而拒绝请求的问题。 +- 更新 AI 客户端与 LLM 层处理路径,统一命名规则。 +- 调整多个 Agent 的 Handler 与 Prompt,提升适配兼容性。 +- 更新配置示例和 WebUI 相关读取逻辑,保持前后端一致。 + +--- + +## v2.11.1 WebUI 自举与访问控制补强 + +完善了 WebUI 的自举启动流程与访问控制功能。新增群聊/私聊白名单模式,支持限制消息收发范围。增加了 Agent/Tool 调用播报功能,提升了运行时可观察性。 + +- 修复缺失 `config.toml` 时的启动与引导流程,支持配置自举。 +- 补强 Wheel 资源打包与跨平台文件锁,提升开箱可用性。 +- 新增群聊与私聊 Allowlist 模式,支持限制消息范围。 +- 增加 Agent/Tool 调用播报和彩蛋模式,提升可观察性。 +- 改进 WebUI 日志筛选、配置解析报错与构建细节。 + +--- + +## v2.11.0 WebUI 与 TOML 配置中枢 + +推出了 WebUI 管理界面与 `config.toml` 统一配置中心。实现了可视化配置编辑、日志查看与 Bot 控制,支持配置热重载与校验。 + +- 新增 `Undefined-webui` 入口,提供登录、配置编辑、日志查看和 Bot 控制。 +- 引入 `config.toml` 优先的配置加载与热重载机制。 +- 重构 WebUI 前后端结构,补齐模板、静态资源与国际化支持。 +- 废弃 `.env.example`,强化 TOML 配置使用方式。 +- 调整运行时组件以适配新配置对象,并提升 Python 版本要求至 3.11。 + +--- + +## v2.10.1 `/stats` 并发错配修复 + +修复了 `/stats` 命令在高并发场景下的分析结果错配问题。增加了 Matplotlib 缺失时的兜底处理,提升了统计功能的稳定性。 + +- 修复 `/stats` 并发执行时分析结果错配问题。 +- 改善统计分析结果与请求上下文的绑定关系。 +- 增加 Matplotlib 缺失或异常时的兜底处理。 +- 提升统计命令在高并发场景下的稳定性。 +- 补强统计链路的异常处理与可恢复性。 + +--- + +## v2.10.0 架构整理与 CoT 兼容增强 + +重构了系统架构,优化了配置管理与工具调用稳定性。增强了 DeepSeek CoT 等推理模型的兼容性,初步构建了 AI 统计分析能力。 + +- 重构系统架构,梳理核心模块边界。 +- 优化配置管理结构,降低维护复杂度。 +- 增强 DeepSeek CoT 兼容性,改善推理类模型接入体验。 +- 提升工具调用稳定性,减少异常调用失败。 +- 补强 AI 统计分析能力,为 `/stats` 功能提供基础。 + +--- + +## v2.9.0 OpenAI SDK 驱动的 AI 运行时 + +基于 OpenAI SDK 重构了 LLM 请求层,模块化了 AI 运行时。增强了多供应商接口兼容性,并引入了更完整的 Token 归档与安全处理链路。 + +- 使用 OpenAI SDK 重构统一的 LLM 请求层。 +- 模块化 AI 运行时,明确模型调用与执行边界。 +- 增强不同供应商接口的兼容性。 +- 引入更完整的 Token 归档与统计基础。 +- 加强安全处理链路,为功能扩展提供支持。 + +--- + +## v2.8.3 系统提示词优化 + +优化了系统提示词结构与表达方式,提升了模型对任务意图的理解一致性。降低了跑偏与冗余输出的概率,增强了对话稳定性。 + +- 优化系统提示词结构与表达方式。 +- 改善模型对任务意图的理解一致性。 +- 降低部分场景下的跑偏和冗余输出。 +- 提升整体对话稳定性与回复可用性。 +- 延续架构调整后的行为校正。 + +--- + +## v2.8.2 定时任务上下文修复 + +修复了定时任务执行时的上下文缺失问题,实现了运行时动态注入与持久化。提升了定时任务重启后的恢复能力与执行稳定性。 + +- 修复定时任务执行时上下文缺失的问题。 +- 为调度任务补充上下文持久化能力。 +- 增加运行时动态注入,改善任务恢复后的可执行性。 +- 提升定时任务重启后继续执行的稳定性。 +- 修正调度器相关的边界行为。 + +--- + +## v2.8.1 导入路径修复与结构清理 + +批量替换相对导入为绝对导入,解决了模块间导入不稳定的问题。清理了代码结构,并补充了文件分析 Agent 的相关文档。 + +- 将相对导入批量替换为绝对导入,消除路径歧义。 +- 修复重构后模块间导入不稳定的问题。 +- 提升代码结构清晰度和可维护性。 +- 补充文件分析 Agent 的图片分析能力说明。 +- 降低 Skills 目录拆分的维护成本。 + +--- + +## v2.8.0 Skills 架构重写 + +重写了 Skills 加载架构,引入统一注册表模型。支持工具与 Agent 的懒加载、热重载及私有 MCP 接入,并自动生成 Agent 说明文档。 + +- 重写 Skills 加载架构,引入统一注册表逻辑。 +- 支持工具与 Agent 的懒加载和热重载。 +- 增加工具调用超时控制,改善运行时稳定性。 +- 支持 Agent 级独立 `mcp.json`,实现私有 MCP 工具接入。 +- 自动生成 Agent `intro.md`,提升能力描述一致性。 + +--- + +## v2.7.0 并发安全与异步 IO 重构 + +重构了并发基础设施,统一了异步安全文件操作与请求上下文隔离。修复了核心并发竞态条件,将阻塞型 IO 移出主事件循环,提升了高并发场景下的稳定性。 + +- 重构 IO 层,统一为异步安全文件操作。 +- 引入基于 `contextvars` 的请求上下文隔离。 +- 修复核心并发竞态条件,增强数据访问安全性。 +- 将阻塞型 IO 移出主事件循环,降低死锁风险。 +- 为高并发工具与 Agent 调度提供稳定基础。 + +--- + +## v2.6.0 非阻塞调度循环与联系人能力 + +将 AI 请求调度改为基于模型的非阻塞循环,显著提升了并发处理能力。新增 QQ 用户与群组管理工具集,扩展了联系人与成员信息查询能力。 + +- 新增 QQ 用户与群组管理工具集,扩展联系人查询能力。 +- 重构成员信息查询链路,统一工具组织方式。 +- 将队列调度改为基于模型的非阻塞循环,减少串行阻塞。 +- 提升高并发场景下的请求吞吐与调度稳定性。 +- 同步更新版本与配套文档说明。 + +--- + +## v2.5.1 Python 解释器工具接入 + +接入 Python 解释器工具,基于 Docker 提供隔离执行环境。支持在运行时安全执行 Python 代码,为代码交付与分析能力提供基础。 + +- 新增 Python 解释器工具。 +- 基于 Docker 提供隔离执行环境,提升安全性。 +- 支持在运行时执行 Python 代码并返回结果。 +- 为代码执行类能力提供独立工具入口。 + +--- + +## v2.5.0 AI 请求层重构与 `/stats` 上线 + +重构了 AI 请求层,理顺了调用与统计链路。上线 `/stats` 可视化统计命令,支持 Token 用量统计与图表展示。 + +- 重构 AI 请求层,理顺调用与统计链路。 +- 新增 Token 用量统计能力。 +- 上线 `/stats` 可视化统计与分析入口。 +- 为用量观测补充图表和聚合展示基础。 +- 改进 CI 中 Ruff 和 MyPy 的缓存配置。 + +--- + +## v2.4.0 核心架构解耦与记忆增强 + +模块化解耦了主 AI、安全检查与命令系统。增强了记忆系统与 OneBot 群组管理能力,优化了群聊上下文处理与日志系统。 + +- 重构核心架构,实现模块化解耦。 +- 增强记忆系统与 OneBot 群组管理相关能力。 +- 优化群聊上下文处理,改善运行时状态组织。 +- 增强日志系统,提供更丰富的观测信息与彩色输出。 +- 新增基于 `uv` 的自动化代码质量检查工作流。 +- 优化 GitHub Actions 缓存策略,降低 CI 成本。 + +--- + +## v2.3.0 MCP 接入与多工具调度 + +正式接入 MCP (Model Context Protocol),支持通过标准配置连接外部工具。定时任务新增多工具执行支持,允许串行或并行运行。 + +- 新增 MCP 支持,可通过标准 JSON 配置外部工具连接。 +- 基于 `fastmcp` 驱动 MCP 接入,提升扩展规范性。 +- 定时任务新增多工具执行,支持串行与并行模式。 +- 补充 Prompt 文档与 Agent 说明,明确工具执行模式。 + +--- + +## v2.2.6 非 URL 文件输入支持 + +`download_file` 工具新增对本地文件路径的支持,优化了目标解析逻辑。允许非 URL 文件直接进入下载与分析流程,提升了本地资源使用的便利性。 + +- `download_file` 工具新增对本地文件路径的支持。 +- 优化非 URL 文件的目标解析逻辑。 +- 移除文件分析前必须转换为网络地址的限制。 +- 提升本地资源接入文件工具链时的可用性。 + +--- + +## v2.2.5 文件消息类型补全处理 + +补全了对文件、视频、录音等消息类型的处理链路。修复了缺失类型时的兼容性问题,降低了多媒体消息解析失败率。 + +- 补充对文件消息的完整处理链路。 +- 修复缺失 `file`、`video`、`record`、`audio` 等消息类型时的兼容问题。 +- 降低多媒体消息进入主流程时的解析失败率。 +- 打通文件下载与分析的输入前置。 + +--- + +## v2.2.4 Toolsets 分层与安全回复优化 + +引入 `toolsets` 分层结构,按分类暴露工具前缀。重构 Skills 加载逻辑,新增工具加载统计日志。优化安全拦截回复提示词,减少无效输出。 + +- 引入 `toolsets` 工具层级,降低工具命名歧义。 +- 重构 Skills 加载逻辑,解耦依赖并统一发现流程。 +- 新增工具加载统计日志,提升技能注册可见性。 +- 将部分渲染与调度能力迁入新的工具集结构。 +- 优化注入防护回复提示词,采用更自然的风格。 +- 缩短安全拦截回复长度,减少干扰。 + +--- + +## v2.2.3 调度指令精度与发布检查增强 + +优化了 `scheduler_agent` 的 Prompt 与文档,提升定时调用意图识别准确度。在发布流程中集成 Lint 检查,提前拦截风格与静态错误。 + +- 优化 `scheduler_agent` 的 Prompt 和 Intro,提升意图对齐。 +- 为发布流程补充 Lint 集成,提前拦截代码问题。 +- 新增 `ci` 依赖组,统一 CI 环境下的质量检查入口。 +- 升级 Ruff 版本并同步版本号。 + +--- + +## v2.2.2 放宽 Python 版本要求 + +将 Python 版本要求放宽至 `>=3.10`,降低了部署与试用门槛,减少了因解释器版本不符导致的安装失败。 + +- 将 Python 版本支持范围扩展到 `>=3.10`。 +- 降低用户部署和试用的环境门槛。 +- 减少因解释器版本不符导致的安装失败。 +- 同步更新版本声明与发布配置。 + +--- + +## v2.2.1 README 资源兼容性修复 + +修复了 README 中本地图片路径在外部平台(如 PyPI)下的显示问题,统一使用 GitHub Raw URL 以保持显示一致性。 + +- 将 README 中的图片引用改为 GitHub Raw URL。 +- 提升 README 在 GitHub、PyPI 等平台中的显示一致性。 +- 解决相对路径图片在非仓库目录下失效的问题。 +- 保持文档结构不变,修复资源访问方式。 + +--- + +## v2.2.0 PyPI 发布与包名调整 + +正式面向 PyPI 发布,包名调整为 `Undefined-bot`。新增自动发布流程,并保留原有 CLI 启动方式,降低迁移成本。 + +- 新增基于 Trusted Publishing 的 PyPI 自动发布流程。 +- 将 PyPI 包名调整为 `Undefined-bot`。 +- 保持 CLI 启动命令不变,兼容既有使用方式。 +- 补充 PyPI 安装说明和发布文档。 +- 完善安装路径,支持源码与安装包分发。 + +--- + +## v2.1.1 AI 请求兼容性修复 + +修复了 OpenAI 请求链路中的兼容性问题,增强了日志与错误处理。优化了 Release 流程中的注释提取与说明生成逻辑。 + +- 修复工具响应未正确写回会话历史导致的 API 兼容性问题。 +- 增强 AI 请求日志与错误处理,便于定位失败原因。 +- 修复 Agent 调用链中可能触发的 HTTP 400 错误。 +- 调整 Release 注释提取逻辑,区分标题与正文。 +- 优化自动发布说明生成,提升文档质量。 + +--- + +## v2.1.0 日志可观测性与持久化增强 + +大幅增强了系统级日志的可观测性,完善了 AI 调用、工具执行与消息流转的记录。实现了定时任务与摘要结果的本地持久化,并建立了 GitHub Release 自动化工作流。 + +- 增强 AI 调用、工具执行和消息流转日志,提升排障能力。 +- 为定时任务增加本地持久化存储,降低数据丢失风险。 +- 为 AI 对话结束摘要增加持久化能力。 +- 为所有 Agent 补充 `get_current_time` 工具,增强时间感知。 +- 统一 Agent 介绍文档,清理过时工具。 +- 新增 GitHub Release 自动化工作流与变更日志生成。 + +--- + +## v2.0.0 Skills 架构与 CLI 入口 + +将原有工具体系重构为 Skills 架构,区分基础工具与智能 Agent。新增 CLI 启动入口与定时任务能力,独立了队列管理服务。 + +- 重构工具体系为 Skills 架构,引入注册表机制。 +- 增加 CLI 启动入口,支持更便捷的运行方式。 +- 引入并行工具执行能力,提升效率。 +- 新增基于 Crontab 的定时任务能力及 `scheduler_agent`。 +- 拆分队列管理为独立的 `QueueManager` 服务。 +- 新增群成员信息查询工具,修复 OneBot 初始化问题。 + +--- + +## v1.0.0 初始发布与基础能力落地 + +首个公开版本,基于 `asyncio` 与 `aiohttp` 构建了异步机器人架构。接入 OneBot V11,集成了 AI 对话、网页抓取、多模态分析与基础记忆模块。 + +- 完成基于 `asyncio` 和 `aiohttp` 的异步机器人架构。 +- 接入 OneBot V11,支持群聊与私聊消息处理。 +- 集成 AI 对话、网页抓取、多模态分析和图像渲染能力。 +- 提供消息读取、文件读取、联网抓取和图片发送等基础工具。 +- 引入基础记忆、FAQ 与限流模块。 + +--- diff --git a/README.md b/README.md index 2befc1e0..33b75cf1 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,7 @@ - **多模型池**:支持配置多个 AI 模型,可轮询、随机选择或用户指定;支持多模型并发比较,选择最佳结果继续对话。详见 [多模型功能文档](docs/multi-model.md)。 - **本地知识库**:将纯文本文件向量化存入 ChromaDB,AI 可通过关键词搜索或语义搜索查询领域知识;支持增量嵌入与自动扫描。详见 [知识库文档](docs/knowledge.md)。 - **访问控制(群/私聊)**:支持 `access.mode` 三种模式(`off` / `blacklist` / `allowlist`)和群/私聊黑白名单;可按策略限制收发范围,避免误触发与误投递。详见 [docs/access-control.md](docs/access-control.md)。 +- **版本变更可查询**:仓库根目录维护 `CHANGELOG.md`,并提供 `/changelog` 命令在运行时查看最近版本和单版本摘要。 - **并行工具执行**:无论是主 AI 还是子 Agent,均支持 `asyncio` 并发工具调用,大幅提升多任务处理速度(如同时读取多个文件或搜索多个关键词)。 - **智能 Agent 矩阵**:内置多个专业 Agent,分工协作处理复杂任务。 - **callable.json 共享机制**:通过简单的配置文件(`callable.json`)即可让 Agent 互相调用、将 `skills/tools/` 或 `skills/toolsets/` 下的工具按白名单暴露给 Agent,支持细粒度访问控制,实现复杂的多 Agent 协作场景。 @@ -76,6 +77,7 @@ Undefined 的功能极为丰富,为了让本页面不过于臃肿,我们将 - 🧭 **[Management API 与远程管理](docs/management-api.md)**:WebUI / App 共用的管理接口、认证、配置/日志/Bot 控制与引导探针说明。 - 🛠️ **[配置与热更新说明](docs/configuration.md)**:从模型切换到 MCP 库挂载,全方位掌握 `config.toml` 的高阶配置。 - 💡 **[交互与使用手册](docs/usage.md)**:包含实用的对话示例、多模态解析用法,以及群管家必备的管理员`/指令`。 +- 📝 **[版本变更记录](CHANGELOG.md)**:查看按版本整理的更新摘要,也可在运行时使用 `/changelog` 查询。 - 🛡️ **[访问控制说明](docs/access-control.md)**:教你如何精准配置黑白名单,让机器人的使用范围分毫不差。 - 🧠 **[认知记忆系统详解](docs/cognitive-memory.md)**:黑科技解密——“无阻塞后台史官”是如何将对话内化为向量记忆与用户侧写的。 - 📚 **[本地知识库接入方案](docs/knowledge.md)**:为 AI 挂载本地文本资产,轻松拥抱企业/个人专属 QA。 diff --git a/docs/development.md b/docs/development.md index 66afd43e..bed5cb32 100644 --- a/docs/development.md +++ b/docs/development.md @@ -10,6 +10,7 @@ Undefined 欢迎开发者参与共建和进行二次开发! ```text src/Undefined/ +├── changelog.py # CHANGELOG.md 解析与版本查询公共层 ├── ai/ # AI 运行时核心组件 (client, prompt, tooling 工具组装, summary 短期摘要, multimodal 多模态) ├── arxiv/ # arXiv 论文解析、元信息获取、PDF 下载与发送 ├── bilibili/ # B站视频流解析、分段下载与异步发送 @@ -35,6 +36,12 @@ src/Undefined/ - **Toolsets 开发专版**:[toolsets/README.md](../../src/Undefined/skills/toolsets/README.md) - **Commands 开发专版**:[详细斜杠指令开发指南](slash-commands.md) +### CHANGELOG 维护约定 + +- 仓库根目录的 `CHANGELOG.md` 是正式版本历史的唯一事实来源。 +- `src/Undefined/changelog.py` 负责解析和校验这份文档,供 `/changelog` 命令和 `changelog_query` tool 共用。 +- 新增或调整版本条目时,不要只改 tag 注释;应同步维护 `CHANGELOG.md`,确保运行时查询和仓库文档一致。 + ### callable.json 共享授权机制 在开发过程中,如何让你的 Agent 具备特定工具的访问权限,或让多个 Agent 进行合作调用? diff --git a/docs/slash-commands.md b/docs/slash-commands.md index 5607a707..65c7ed40 100644 --- a/docs/slash-commands.md +++ b/docs/slash-commands.md @@ -44,6 +44,36 @@ Undefined 提供了一套强大的斜杠指令(Slash Commands)系统。管 /license ``` +- **/changelog [list [数量] | show <版本号> | latest]** + - **说明**: + - 查看仓库内 `CHANGELOG.md` 维护的版本历史。 + - 不带参数时,默认列最近 8 个版本,只展示版本号与标题,避免刷屏。 + - `show`/`latest` 会展示单个版本的标题、摘要和变更点。 + - **参数**: + + | 参数 | 是否必填 | 说明 | + |------|----------|------| + | `list` | 可选 | 列出最近多个版本;省略时默认等价于 `/changelog` | + | `数量` | 可选 | `list` 模式返回的版本数量,默认 8,最大 20 | + | `show` | 可选 | 查看指定版本详情 | + | `版本号` | `show` 时必填 | 目标版本号,支持 `v3.2.6` 或 `3.2.6` | + | `latest` | 可选 | 直接查看最新版本详情 | + + - **返回内容**: + - `list`:版本号 + 标题的紧凑列表,并提示使用 `/changelog show ` 查看详情。 + - `show` / `latest`:版本标题、摘要和 bullet 变更点。 + - **边界行为**: + - `list` 的数量超过 20 会自动截到 20。 + - 版本不存在、参数格式不合法或 `CHANGELOG.md` 格式异常时,会返回明确错误提示。 + - **示例**: + ``` + /changelog + /changelog list 12 + /changelog show v3.2.6 + /changelog show 3.2.6 + /changelog latest + ``` + #### 2. 统计与分析服务 - **/stats [时间范围] [--ai]** - **说明**:生成过去一段时间内 Token 的使用统计数据、模型消耗排行、输入输出比例,并输出可视化图表。默认不启用 AI 分析,显式传 `--ai`(或 `-a`)才会触发。 @@ -215,12 +245,14 @@ Undefined 具有可插拔的指令解析层,所有的指令逻辑实现均放 ```text src/Undefined/ +├── changelog.py # CHANGELOG.md 解析与查询公共模块 ├── services/commands/ │ ├── __init__.py │ ├── context.py # 核心上下文 (CommandContext) 定义 │ └── registry.py # 命令注册表 (CommandRegistry) 和 Meta 定义 └── skills/commands/ # 具体的所有指令实现存放目录 ├── __init__.py + ├── changelog/ # 内置命令:版本历史查询 ├── help/ # 内置命令:基础帮助 ├── copyright/ # 内置命令:版权与免责声明 ├── faq/ # 内置命令:FAQ增删改查 @@ -231,6 +263,8 @@ src/Undefined/ 在 `skills/commands/` 目录下新建一个你的命令大类目录,例如 `skills/commands/hello_world/`,然后在里面创建 `config.json`、`handler.py` 和 `README.md`。 +如果命令需要读取仓库级文档或共享数据源,建议像 `/changelog` 一样先把解析逻辑抽到公共模块中,再让命令层只负责参数解析和文本格式化。这样后续即使再接一个 tool 或 API,也不用重复解析同一份文档。 + #### A. 配置声明 (`config.json`) ```json { diff --git a/docs/usage.md b/docs/usage.md index 68627463..7225fb30 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -31,14 +31,16 @@ --- -## 管理员命令 (斜杠指令) +## 斜杠指令 > 💡 **进阶玩法**:想了解每个命令的具体使用参数,或者学习如何通过写几行代码**自定义属于你的独家斜杠指令**?请前往 [命令系统与斜杠指令配置指南](slash-commands.md)。 -在群聊或私聊中使用以下指令(需要具备被设置的超级管理员或管理员权限): +在群聊或私聊中可使用以下指令。除明确说明外,管理类命令需要具备被设置的超级管理员或管理员权限: ```bash /help # 查看帮助菜单 +/changelog # 查看最近版本历史(公开命令) +/changelog show v3.2.6 # 查看指定版本详情(公开命令) /lsadmin # 查看当前所有的系统管理员列表 /addadmin # 添加新的普通管理员(仅限超级管理员使用) /rmadmin # 移除某位普通管理员 @@ -46,6 +48,14 @@ /stats [时间范围] [--ai] # 核心统计功能:获取 Token 使用统计 + 成本计算;加 --ai 才启用智能分析 ``` +### 关于 `/changelog` 的详细说明: + +- `/changelog` 默认列最近 8 个版本,按新到旧展示 `版本号 + 标题`。 +- `/changelog list 12` 可查看更多版本,最大 20 条。 +- `/changelog show <版本号>` 会展示单个版本的标题、摘要和变更点,版本号支持带或不带 `v`。 +- `/changelog latest` 会直接展示 `CHANGELOG.md` 中最新一条版本记录。 +- 版本内容直接来自仓库内维护的 `CHANGELOG.md`,不是运行时临时扫描 git tag。 + ### 关于 `/stats` 的详细说明: - 默认统计最近 7 天的数据,时间参数范围会自动被系统钳制在 1 天 - 365 天之间。 diff --git a/pyproject.toml b/pyproject.toml index 5c01a38d..479cb115 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,12 +89,16 @@ only-include = [ ] sources = ["src"] +[tool.hatch.build.targets.wheel.force-include] +"CHANGELOG.md" = "Undefined/CHANGELOG.md" + [tool.hatch.build.targets.sdist] include = [ "/src/**", "/res/**", "/img/**", "/config/**", + "/CHANGELOG.md", "/config.toml.example", "/README.md", "/ARCHITECTURE.md", diff --git a/src/Undefined/changelog.py b/src/Undefined/changelog.py new file mode 100644 index 00000000..0b4342e1 --- /dev/null +++ b/src/Undefined/changelog.py @@ -0,0 +1,209 @@ +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +import re + +from Undefined.utils.resources import resolve_resource_path + +CHANGELOG_FILENAME = "CHANGELOG.md" +_VERSION_RE = re.compile(r"^v\d+\.\d+\.\d+$") +_HEADING_RE = re.compile(r"^(#{2,6})\s+(v\d+\.\d+\.\d+)\s+(.+?)\s*$") +_SEPARATOR_RE = re.compile(r"(?m)^\s*---\s*$") +_BULLET_RE = re.compile(r"^\s*-\s+(.+?)\s*$") + + +class ChangelogError(ValueError): + """Base error for changelog loading failures.""" + + +class ChangelogFormatError(ChangelogError): + """Raised when CHANGELOG.md does not match the expected format.""" + + +@dataclass(frozen=True, slots=True) +class ChangelogEntry: + version: str + title: str + summary: str + changes: tuple[str, ...] + heading_level: int = 2 + + +def normalize_version(version: str) -> str: + normalized = str(version or "").strip() + if not normalized: + raise ChangelogFormatError("版本号不能为空") + if not normalized.startswith("v"): + normalized = f"v{normalized}" + if not _VERSION_RE.fullmatch(normalized): + raise ChangelogFormatError(f"非法版本号格式: {version}") + return normalized + + +def resolve_changelog_path(path: str | Path | None = None) -> Path: + if path is not None: + return Path(path) + return resolve_resource_path(CHANGELOG_FILENAME) + + +def load_changelog(path: str | Path | None = None) -> tuple[ChangelogEntry, ...]: + changelog_path = resolve_changelog_path(path) + text = changelog_path.read_text(encoding="utf-8") + return parse_changelog_text(text) + + +def parse_changelog_text(text: str) -> tuple[ChangelogEntry, ...]: + normalized = str(text or "").strip() + if not normalized: + raise ChangelogFormatError("CHANGELOG 内容为空") + + entries: list[ChangelogEntry] = [] + seen_versions: set[str] = set() + for block in _SEPARATOR_RE.split(normalized): + stripped = block.strip() + if not stripped: + continue + entry = _parse_entry_block(stripped) + if entry.version in seen_versions: + raise ChangelogFormatError(f"发现重复版本: {entry.version}") + seen_versions.add(entry.version) + entries.append(entry) + + if not entries: + raise ChangelogFormatError("未解析到任何版本条目") + return tuple(entries) + + +def _parse_entry_block(block: str) -> ChangelogEntry: + lines = block.splitlines() + if not lines: + raise ChangelogFormatError("发现空版本块") + + heading_match = _HEADING_RE.fullmatch(lines[0].strip()) + if heading_match is None: + raise ChangelogFormatError(f"标题行格式错误: {lines[0]}") + + heading_hashes, version, raw_title = heading_match.groups() + title = raw_title.strip() + if not title: + raise ChangelogFormatError(f"{version} 缺少标题") + + summary_lines: list[str] = [] + changes: list[str] = [] + bullet_started = False + + for raw_line in lines[1:]: + bullet_match = _BULLET_RE.fullmatch(raw_line) + if bullet_match is not None: + bullet_started = True + changes.append(bullet_match.group(1).strip()) + continue + if bullet_started: + if raw_line.strip(): + raise ChangelogFormatError( + f"{version} 的变更列表后存在非法内容: {raw_line}" + ) + continue + summary_lines.append(raw_line.strip() if raw_line.strip() else "") + + summary = "\n".join(_normalize_summary_lines(summary_lines)).strip() + if not summary: + raise ChangelogFormatError(f"{version} 缺少摘要") + if not changes: + raise ChangelogFormatError(f"{version} 缺少变更点") + + return ChangelogEntry( + version=version, + title=title, + summary=summary, + changes=tuple(changes), + heading_level=len(heading_hashes), + ) + + +def _normalize_summary_lines(lines: list[str]) -> list[str]: + normalized: list[str] = [] + for line in lines: + if line.strip(): + normalized.append(line.strip()) + else: + normalized.append("") + return normalized + + +def get_latest_entry( + *, + entries: tuple[ChangelogEntry, ...] | None = None, + path: str | Path | None = None, +) -> ChangelogEntry: + resolved_entries = entries if entries is not None else load_changelog(path) + return resolved_entries[0] + + +def list_entries( + *, + limit: int | None = None, + entries: tuple[ChangelogEntry, ...] | None = None, + path: str | Path | None = None, +) -> tuple[ChangelogEntry, ...]: + resolved_entries = entries if entries is not None else load_changelog(path) + if limit is None: + return resolved_entries + if limit <= 0: + raise ChangelogFormatError("limit 必须大于 0") + return resolved_entries[:limit] + + +def get_entry( + version: str, + *, + entries: tuple[ChangelogEntry, ...] | None = None, + path: str | Path | None = None, +) -> ChangelogEntry: + normalized = normalize_version(version) + resolved_entries = entries if entries is not None else load_changelog(path) + for entry in resolved_entries: + if entry.version == normalized: + return entry + raise ChangelogError(f"未找到版本: {normalized}") + + +def entry_to_dict( + entry: ChangelogEntry, + *, + include_summary: bool = True, + include_changes: bool = True, + max_changes: int | None = None, +) -> dict[str, object]: + payload: dict[str, object] = { + "version": entry.version, + "title": entry.title, + } + if include_summary: + payload["summary"] = entry.summary + if include_changes: + changes = list(entry.changes) + if max_changes is not None: + if max_changes <= 0: + raise ChangelogFormatError("max_changes 必须大于 0") + changes = changes[:max_changes] + payload["changes"] = changes + payload["change_count"] = len(entry.changes) + return payload + + +__all__ = [ + "CHANGELOG_FILENAME", + "ChangelogEntry", + "ChangelogError", + "ChangelogFormatError", + "entry_to_dict", + "get_entry", + "get_latest_entry", + "list_entries", + "load_changelog", + "normalize_version", + "parse_changelog_text", + "resolve_changelog_path", +] diff --git a/src/Undefined/skills/commands/changelog/README.md b/src/Undefined/skills/commands/changelog/README.md new file mode 100644 index 00000000..9f5c3597 --- /dev/null +++ b/src/Undefined/skills/commands/changelog/README.md @@ -0,0 +1,20 @@ +# /changelog 命令说明 + +用于查看仓库内 `CHANGELOG.md` 中维护的版本历史。 + +## 用法 + +- `/changelog` +- `/changelog list` +- `/changelog list 12` +- `/changelog show v3.2.6` +- `/changelog show 3.2.6` +- `/changelog latest` + +## 说明 + +- `/changelog` 默认列最近 8 个版本。 +- `list [数量]` 按新到旧列版本与标题,最大 20。 +- `show <版本号>` 展示单个版本的标题、摘要和变更点。 +- `latest` 展示 `CHANGELOG.md` 中第一条版本详情。 +- 版本数据直接来自仓库维护的 `CHANGELOG.md`,不会运行时扫描 git tag。 diff --git a/src/Undefined/skills/commands/changelog/config.json b/src/Undefined/skills/commands/changelog/config.json new file mode 100644 index 00000000..205af6a0 --- /dev/null +++ b/src/Undefined/skills/commands/changelog/config.json @@ -0,0 +1,16 @@ +{ + "name": "changelog", + "description": "查看版本历史与单版本变更摘要", + "usage": "/changelog [list [数量]|show <版本号>|latest]", + "example": "/changelog show v3.2.6", + "permission": "public", + "rate_limit": { + "user": 5, + "admin": 0, + "superadmin": 0 + }, + "show_in_help": true, + "order": 15, + "allow_in_private": true, + "aliases": [] +} diff --git a/src/Undefined/skills/commands/changelog/handler.py b/src/Undefined/skills/commands/changelog/handler.py new file mode 100644 index 00000000..f06beb87 --- /dev/null +++ b/src/Undefined/skills/commands/changelog/handler.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +from Undefined.changelog import ( + ChangelogError, + ChangelogFormatError, + get_entry, + get_latest_entry, + list_entries, +) +from Undefined.services.commands.context import CommandContext + +_DEFAULT_LIST_LIMIT = 8 +_MAX_LIST_LIMIT = 20 + + +def _parse_list_limit(raw: str | None) -> int: + if raw is None or not str(raw).strip(): + return _DEFAULT_LIST_LIMIT + try: + parsed = int(str(raw).strip()) + except (TypeError, ValueError) as exc: + raise ChangelogFormatError("数量必须是整数") from exc + if parsed <= 0: + raise ChangelogFormatError("数量必须大于 0") + return min(parsed, _MAX_LIST_LIMIT) + + +def _format_entry_message(version: str | None = None) -> str: + entry = get_latest_entry() if version is None else get_entry(version) + lines = [ + f"{entry.version} {entry.title}", + "", + entry.summary, + "", + *[f"- {change}" for change in entry.changes], + ] + return "\n".join(lines) + + +def _format_list(limit: int) -> str: + entries = list_entries(limit=limit) + lines = [ + "Undefined CHANGELOG", + "", + *[f"- {entry.version} | {entry.title}" for entry in entries], + "", + "查看详情:/changelog show ", + ] + return "\n".join(lines) + + +async def execute(args: list[str], context: CommandContext) -> None: + try: + if not args: + message = _format_list(_DEFAULT_LIST_LIMIT) + else: + subcommand = str(args[0]).strip().lower() + if subcommand == "list": + if len(args) > 2: + raise ChangelogFormatError("用法:/changelog list [数量]") + limit = _parse_list_limit(args[1] if len(args) == 2 else None) + message = _format_list(limit) + elif subcommand == "show": + if len(args) != 2: + raise ChangelogFormatError("用法:/changelog show <版本号>") + message = _format_entry_message(args[1]) + elif subcommand == "latest": + if len(args) != 1: + raise ChangelogFormatError("用法:/changelog latest") + message = _format_entry_message() + else: + raise ChangelogFormatError( + "用法:/changelog [list [数量]|show <版本号>|latest]" + ) + except (FileNotFoundError, ChangelogError) as exc: + message = f"❌ {exc}" + + await context.sender.send_group_message(context.group_id, message) diff --git a/src/Undefined/skills/tools/changelog_query/README.md b/src/Undefined/skills/tools/changelog_query/README.md new file mode 100644 index 00000000..96ed0c92 --- /dev/null +++ b/src/Undefined/skills/tools/changelog_query/README.md @@ -0,0 +1,21 @@ +# changelog_query + +查询 Undefined 自身维护的 `CHANGELOG.md`。 + +## action + +- `latest`:读取最新版本 +- `list`:列出最近多个版本 +- `show`:读取指定版本 + +## 常用参数 + +- `version`:`show` 时指定版本号 +- `limit`:`list` 时限制条数,默认 5,最大 20 +- `include_summary`:控制是否返回摘要 +- `include_changes`:控制是否返回变更点 +- `max_changes`:限制返回的变更点数量 + +## 返回 + +返回 JSON 字符串,包含 `ok`、`action`、`items` 或 `entry` 等字段,便于 AI 按需读取和摘要。 diff --git a/src/Undefined/skills/tools/changelog_query/config.json b/src/Undefined/skills/tools/changelog_query/config.json new file mode 100644 index 00000000..41e6a5bc --- /dev/null +++ b/src/Undefined/skills/tools/changelog_query/config.json @@ -0,0 +1,39 @@ +{ + "type": "function", + "function": { + "name": "changelog_query", + "description": "查询 Undefined 自身的 CHANGELOG。支持查看最新版本、列出最近几个版本,或读取指定版本的标题、摘要和变更点。", + "parameters": { + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": ["latest", "list", "show"], + "description": "查询动作。latest 查看最新版本,list 查看最近多个版本,show 查看指定版本。默认 latest。", + "default": "latest" + }, + "version": { + "type": "string", + "description": "可选。show 动作时要查询的版本号,支持 v3.2.6 或 3.2.6。" + }, + "limit": { + "type": "integer", + "description": "可选。list 动作返回的最大版本数,默认 5,最大 20。", + "default": 5 + }, + "include_summary": { + "type": "boolean", + "description": "可选。是否包含摘要。list 默认 false,latest/show 默认 true。" + }, + "include_changes": { + "type": "boolean", + "description": "可选。是否包含变更点。list 默认 false,latest/show 默认 true。" + }, + "max_changes": { + "type": "integer", + "description": "可选。最多返回多少条变更点。latest/show 默认 6,list 仅在 include_changes=true 时生效。" + } + } + } + } +} diff --git a/src/Undefined/skills/tools/changelog_query/handler.py b/src/Undefined/skills/tools/changelog_query/handler.py new file mode 100644 index 00000000..2f8205f4 --- /dev/null +++ b/src/Undefined/skills/tools/changelog_query/handler.py @@ -0,0 +1,149 @@ +from __future__ import annotations + +from typing import Any +import json + +from Undefined.changelog import ( + ChangelogError, + ChangelogFormatError, + entry_to_dict, + get_entry, + get_latest_entry, + list_entries, +) + +_DEFAULT_LIST_LIMIT = 5 +_MAX_LIST_LIMIT = 20 +_DEFAULT_MAX_CHANGES = 6 + + +def _error_payload(action: str, message: str) -> str: + return json.dumps( + { + "ok": False, + "action": action, + "error": message, + }, + ensure_ascii=False, + ) + + +def _parse_optional_bool(value: Any) -> bool | None: + if value is None: + return None + if isinstance(value, bool): + return value + if isinstance(value, str): + lowered = value.strip().lower() + if lowered in {"true", "1", "yes", "on"}: + return True + if lowered in {"false", "0", "no", "off"}: + return False + raise ChangelogFormatError("布尔参数必须是 true/false") + + +def _parse_limit(value: Any) -> int: + if value is None: + return _DEFAULT_LIST_LIMIT + try: + parsed = int(value) + except (TypeError, ValueError) as exc: + raise ChangelogFormatError("limit 必须是整数") from exc + if parsed <= 0: + raise ChangelogFormatError("limit 必须大于 0") + return min(parsed, _MAX_LIST_LIMIT) + + +def _parse_max_changes(value: Any) -> int: + if value is None: + return _DEFAULT_MAX_CHANGES + try: + parsed = int(value) + except (TypeError, ValueError) as exc: + raise ChangelogFormatError("max_changes 必须是整数") from exc + if parsed <= 0: + raise ChangelogFormatError("max_changes 必须大于 0") + return parsed + + +def _resolve_include_flag( + *, + raw: Any, + action: str, + default_for_detail: bool, +) -> bool: + parsed = _parse_optional_bool(raw) + if parsed is not None: + return parsed + if action == "list": + return False + return default_for_detail + + +async def execute(args: dict[str, Any], context: dict[str, Any]) -> str: + _ = context + action = str(args.get("action") or "latest").strip().lower() + if action not in {"latest", "list", "show"}: + return _error_payload(action, "action 只能是 latest、list 或 show") + + try: + include_summary = _resolve_include_flag( + raw=args.get("include_summary"), + action=action, + default_for_detail=True, + ) + include_changes = _resolve_include_flag( + raw=args.get("include_changes"), + action=action, + default_for_detail=True, + ) + max_changes = _parse_max_changes(args.get("max_changes")) + + if action == "latest": + entry = get_latest_entry() + payload = { + "ok": True, + "action": action, + "entry": entry_to_dict( + entry, + include_summary=include_summary, + include_changes=include_changes, + max_changes=max_changes if include_changes else None, + ), + } + elif action == "show": + version = str(args.get("version") or "").strip() + if not version: + raise ChangelogFormatError("show 动作必须提供 version") + entry = get_entry(version) + payload = { + "ok": True, + "action": action, + "entry": entry_to_dict( + entry, + include_summary=include_summary, + include_changes=include_changes, + max_changes=max_changes if include_changes else None, + ), + } + else: + limit = _parse_limit(args.get("limit")) + entries = list_entries(limit=limit) + payload = { + "ok": True, + "action": action, + "count": len(entries), + "items": [ + entry_to_dict( + entry, + include_summary=include_summary, + include_changes=include_changes, + max_changes=max_changes if include_changes else None, + ) + for entry in entries + ], + } + except (FileNotFoundError, ChangelogError) as exc: + return _error_payload(action, str(exc)) + + return json.dumps(payload, ensure_ascii=False) diff --git a/tests/test_changelog.py b/tests/test_changelog.py new file mode 100644 index 00000000..fad00b02 --- /dev/null +++ b/tests/test_changelog.py @@ -0,0 +1,121 @@ +from __future__ import annotations + +import pytest + +from Undefined.changelog import ( + ChangelogFormatError, + get_entry, + get_latest_entry, + list_entries, + parse_changelog_text, +) + + +def test_parse_changelog_text_parses_multiple_entries() -> None: + entries = parse_changelog_text( + """ + ## v3.2.6 Responses 重试修复 + + 修复 replay-only 状态字段导致的兼容性问题。 + + - 过滤 replay-only 状态字段 + - 补充相关测试 + + --- + + ## v3.2.5 形象资源更新 + + 调整项目的展示形象和相关素材引用。 + + - 更新形象素材 + - 清理旧引用 + - 同步版本文案 + """ + ) + + assert [entry.version for entry in entries] == ["v3.2.6", "v3.2.5"] + assert entries[0].title == "Responses 重试修复" + assert entries[0].summary == "修复 replay-only 状态字段导致的兼容性问题。" + assert entries[0].changes == ("过滤 replay-only 状态字段", "补充相关测试") + assert entries[1].heading_level == 2 + + +def test_get_entry_normalizes_version_without_v_prefix() -> None: + entries = parse_changelog_text( + """ + ## v1.0.0 初始发布 + + 第一个可用版本。 + + - 搭建基础架构 + - 接入 OneBot + - 提供基础工具 + """ + ) + + assert get_entry("1.0.0", entries=entries).version == "v1.0.0" + assert get_latest_entry(entries=entries).version == "v1.0.0" + assert list_entries(limit=1, entries=entries)[0].version == "v1.0.0" + + +def test_parse_changelog_text_preserves_multiline_summary() -> None: + entries = parse_changelog_text( + """ + ## v2.0.0 Skills 架构 + + 第一段摘要。 + + 第二段摘要。 + + - 引入 Skills + - 增加 CLI + - 调整队列服务 + """ + ) + + assert entries[0].summary == "第一段摘要。\n\n第二段摘要。" + + +@pytest.mark.parametrize( + "text, expected_message", + [ + ( + """ + ## v1.0.0 缺摘要 + - 只有 bullet + """, + "缺少摘要", + ), + ( + """ + ## v1.0.0 缺变更点 + + 这里只有摘要。 + """, + "缺少变更点", + ), + ( + """ + ## v1.0.0 标题一 + + 摘要一。 + + - 变更一 + + --- + + ## v1.0.0 标题二 + + 摘要二。 + + - 变更二 + """, + "重复版本", + ), + ], +) +def test_parse_changelog_text_rejects_invalid_blocks( + text: str, expected_message: str +) -> None: + with pytest.raises(ChangelogFormatError, match=expected_message): + parse_changelog_text(text) diff --git a/tests/test_changelog_command.py b/tests/test_changelog_command.py new file mode 100644 index 00000000..d93a48f6 --- /dev/null +++ b/tests/test_changelog_command.py @@ -0,0 +1,144 @@ +from __future__ import annotations + +from pathlib import Path +from types import SimpleNamespace +from typing import Any, cast + +import pytest + +from Undefined.changelog import ChangelogError, ChangelogEntry +from Undefined.services.command import CommandDispatcher +from Undefined.services.commands.context import CommandContext +from Undefined.services.commands.registry import CommandRegistry +from Undefined.skills.commands.changelog import handler as changelog_handler + + +class _DummySender: + def __init__(self) -> None: + self.messages: list[tuple[int, str, bool]] = [] + + async def send_group_message( + self, group_id: int, message: str, mark_sent: bool = False + ) -> None: + self.messages.append((group_id, message, mark_sent)) + + +def _build_context(sender: _DummySender) -> CommandContext: + stub = cast(Any, SimpleNamespace()) + return CommandContext( + group_id=10001, + sender_id=10002, + config=stub, + sender=cast(Any, sender), + ai=stub, + faq_storage=stub, + onebot=stub, + security=stub, + queue_manager=None, + rate_limiter=None, + dispatcher=stub, + registry=CommandRegistry(Path("/tmp/not-used")), + ) + + +def _entry(version: str, title: str) -> ChangelogEntry: + return ChangelogEntry( + version=version, + title=title, + summary=f"{title} 摘要", + changes=(f"{title} 变更一", f"{title} 变更二", f"{title} 变更三"), + ) + + +@pytest.mark.asyncio +async def test_changelog_command_lists_recent_versions( + monkeypatch: pytest.MonkeyPatch, +) -> None: + sender = _DummySender() + context = _build_context(sender) + monkeypatch.setattr( + changelog_handler, + "list_entries", + lambda limit: (_entry("v3.2.6", "标题甲"), _entry("v3.2.5", "标题乙"))[:limit], + ) + + await changelog_handler.execute([], context) + + output = sender.messages[-1][1] + assert "Undefined CHANGELOG" in output + assert "- v3.2.6 | 标题甲" in output + assert "- v3.2.5 | 标题乙" in output + assert "/changelog show " in output + + +@pytest.mark.asyncio +async def test_changelog_command_show_supports_version_argument( + monkeypatch: pytest.MonkeyPatch, +) -> None: + sender = _DummySender() + context = _build_context(sender) + monkeypatch.setattr( + changelog_handler, + "get_entry", + lambda version: _entry("v3.2.6", "标题甲"), + ) + + await changelog_handler.execute(["show", "3.2.6"], context) + + output = sender.messages[-1][1] + assert output.startswith("v3.2.6 标题甲") + assert "标题甲 摘要" in output + assert "- 标题甲 变更一" in output + + +@pytest.mark.asyncio +async def test_changelog_command_latest_uses_first_entry( + monkeypatch: pytest.MonkeyPatch, +) -> None: + sender = _DummySender() + context = _build_context(sender) + monkeypatch.setattr( + changelog_handler, + "get_latest_entry", + lambda: _entry("v3.2.6", "标题甲"), + ) + + await changelog_handler.execute(["latest"], context) + + assert sender.messages[-1][1].startswith("v3.2.6 标题甲") + + +@pytest.mark.asyncio +async def test_changelog_command_reports_lookup_errors( + monkeypatch: pytest.MonkeyPatch, +) -> None: + sender = _DummySender() + context = _build_context(sender) + + def _raise(_: str) -> ChangelogEntry: + raise ChangelogError("未找到版本: v9.9.9") + + monkeypatch.setattr(changelog_handler, "get_entry", _raise) + + await changelog_handler.execute(["show", "v9.9.9"], context) + + assert sender.messages[-1][1] == "❌ 未找到版本: v9.9.9" + + +def test_changelog_command_is_registered_for_private_use() -> None: + dispatcher = CommandDispatcher( + config=cast( + Any, + SimpleNamespace(is_superadmin=lambda _x: False, is_admin=lambda _x: False), + ), + sender=cast(Any, _DummySender()), + ai=cast(Any, SimpleNamespace()), + faq_storage=cast(Any, SimpleNamespace()), + onebot=cast(Any, SimpleNamespace()), + security=cast(Any, SimpleNamespace(rate_limiter=None)), + ) + + meta = dispatcher.command_registry.resolve("changelog") + assert meta is not None + assert meta.allow_in_private is True + assert meta.rate_limit.user == 5 diff --git a/tests/test_changelog_tool.py b/tests/test_changelog_tool.py new file mode 100644 index 00000000..052d3412 --- /dev/null +++ b/tests/test_changelog_tool.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +import json + +import pytest + +from Undefined.changelog import ChangelogEntry +from Undefined.skills.tools.changelog_query import handler as changelog_tool_handler + + +def _entry(version: str, title: str) -> ChangelogEntry: + return ChangelogEntry( + version=version, + title=title, + summary=f"{title} 摘要", + changes=(f"{title} 变更一", f"{title} 变更二", f"{title} 变更三"), + ) + + +@pytest.mark.asyncio +async def test_changelog_tool_latest_returns_structured_entry( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + changelog_tool_handler, + "get_latest_entry", + lambda: _entry("v3.2.6", "标题甲"), + ) + + result = await changelog_tool_handler.execute({}, {}) + payload = json.loads(result) + + assert payload["ok"] is True + assert payload["action"] == "latest" + assert payload["entry"]["version"] == "v3.2.6" + assert payload["entry"]["summary"] == "标题甲 摘要" + assert payload["entry"]["changes"] == [ + "标题甲 变更一", + "标题甲 变更二", + "标题甲 变更三", + ] + + +@pytest.mark.asyncio +async def test_changelog_tool_list_defaults_to_compact_items( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + changelog_tool_handler, + "list_entries", + lambda limit: (_entry("v3.2.6", "标题甲"), _entry("v3.2.5", "标题乙"))[:limit], + ) + + result = await changelog_tool_handler.execute({"action": "list", "limit": 1}, {}) + payload = json.loads(result) + + assert payload["ok"] is True + assert payload["count"] == 1 + assert payload["items"] == [{"version": "v3.2.6", "title": "标题甲"}] + + +@pytest.mark.asyncio +async def test_changelog_tool_show_honors_detail_flags( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + changelog_tool_handler, + "get_entry", + lambda version: _entry("v3.2.6", "标题甲"), + ) + + result = await changelog_tool_handler.execute( + { + "action": "show", + "version": "3.2.6", + "include_summary": False, + "include_changes": True, + "max_changes": 2, + }, + {}, + ) + payload = json.loads(result) + + assert payload["ok"] is True + assert "summary" not in payload["entry"] + assert payload["entry"]["changes"] == ["标题甲 变更一", "标题甲 变更二"] + assert payload["entry"]["change_count"] == 3 + + +@pytest.mark.asyncio +async def test_changelog_tool_rejects_invalid_action() -> None: + result = await changelog_tool_handler.execute({"action": "unknown"}, {}) + payload = json.loads(result) + + assert payload == { + "ok": False, + "action": "unknown", + "error": "action 只能是 latest、list 或 show", + } + + +@pytest.mark.asyncio +async def test_changelog_tool_requires_version_for_show() -> None: + result = await changelog_tool_handler.execute({"action": "show"}, {}) + payload = json.loads(result) + + assert payload["ok"] is False + assert payload["error"] == "show 动作必须提供 version" From 6e639007c78891e09b950eed55bdcf2546448cd6 Mon Sep 17 00:00:00 2001 From: Null <1708213363@qq.com> Date: Sat, 21 Mar 2026 15:04:08 +0800 Subject: [PATCH 06/25] fix(changelog): improve shortcuts and long list delivery --- src/Undefined/changelog.py | 5 +- .../skills/commands/changelog/README.md | 11 +- .../skills/commands/changelog/config.json | 6 +- .../skills/commands/changelog/handler.py | 109 +++++++++++-- tests/test_changelog.py | 1 + tests/test_changelog_command.py | 150 +++++++++++++++++- 6 files changed, 260 insertions(+), 22 deletions(-) diff --git a/src/Undefined/changelog.py b/src/Undefined/changelog.py index 0b4342e1..ed7f54b5 100644 --- a/src/Undefined/changelog.py +++ b/src/Undefined/changelog.py @@ -34,8 +34,9 @@ def normalize_version(version: str) -> str: normalized = str(version or "").strip() if not normalized: raise ChangelogFormatError("版本号不能为空") - if not normalized.startswith("v"): - normalized = f"v{normalized}" + if normalized[:1].lower() == "v": + normalized = normalized[1:] + normalized = f"v{normalized}" if not _VERSION_RE.fullmatch(normalized): raise ChangelogFormatError(f"非法版本号格式: {version}") return normalized diff --git a/src/Undefined/skills/commands/changelog/README.md b/src/Undefined/skills/commands/changelog/README.md index 9f5c3597..426330f8 100644 --- a/src/Undefined/skills/commands/changelog/README.md +++ b/src/Undefined/skills/commands/changelog/README.md @@ -5,8 +5,12 @@ ## 用法 - `/changelog` +- `/cl` +- `/changelog 12` - `/changelog list` - `/changelog list 12` +- `/changelog v3.2.6` +- `/changelog 3.2.6` - `/changelog show v3.2.6` - `/changelog show 3.2.6` - `/changelog latest` @@ -14,7 +18,12 @@ ## 说明 - `/changelog` 默认列最近 8 个版本。 -- `list [数量]` 按新到旧列版本与标题,最大 20。 +- `/cl` 是 `/changelog` 的短别名。 +- `/changelog <数量>` 会直接列出最近 N 个版本,不再限制最大 20 条。 +- `/changelog <版本号>` 会直接展示对应版本详情。 +- `list [数量]` 按新到旧列版本与标题,不再限制最大 20 条。 +- 群聊里当请求数量大于 20 时,会改用合并转发发送完整列表,避免普通消息里看不全。 +- 私聊里不会走合并转发,而是直接发普通文本消息。 - `show <版本号>` 展示单个版本的标题、摘要和变更点。 - `latest` 展示 `CHANGELOG.md` 中第一条版本详情。 - 版本数据直接来自仓库维护的 `CHANGELOG.md`,不会运行时扫描 git tag。 diff --git a/src/Undefined/skills/commands/changelog/config.json b/src/Undefined/skills/commands/changelog/config.json index 205af6a0..9477d560 100644 --- a/src/Undefined/skills/commands/changelog/config.json +++ b/src/Undefined/skills/commands/changelog/config.json @@ -1,8 +1,8 @@ { "name": "changelog", "description": "查看版本历史与单版本变更摘要", - "usage": "/changelog [list [数量]|show <版本号>|latest]", - "example": "/changelog show v3.2.6", + "usage": "/changelog [list [数量]|show <版本号>|latest|<版本号>|<数量>]", + "example": "/changelog 3.2.6", "permission": "public", "rate_limit": { "user": 5, @@ -12,5 +12,5 @@ "show_in_help": true, "order": 15, "allow_in_private": true, - "aliases": [] + "aliases": ["cl"] } diff --git a/src/Undefined/skills/commands/changelog/handler.py b/src/Undefined/skills/commands/changelog/handler.py index f06beb87..30118196 100644 --- a/src/Undefined/skills/commands/changelog/handler.py +++ b/src/Undefined/skills/commands/changelog/handler.py @@ -1,16 +1,22 @@ from __future__ import annotations +from typing import Any + from Undefined.changelog import ( + ChangelogEntry, ChangelogError, ChangelogFormatError, get_entry, get_latest_entry, list_entries, + normalize_version, ) from Undefined.services.commands.context import CommandContext _DEFAULT_LIST_LIMIT = 8 -_MAX_LIST_LIMIT = 20 +_FORWARD_LIST_THRESHOLD = 20 +_FORWARD_NODE_BATCH_SIZE = 50 +_USAGE_TEXT = "用法:/changelog [list [数量]|show <版本号>|latest|<版本号>|<数量>]" def _parse_list_limit(raw: str | None) -> int: @@ -22,7 +28,7 @@ def _parse_list_limit(raw: str | None) -> int: raise ChangelogFormatError("数量必须是整数") from exc if parsed <= 0: raise ChangelogFormatError("数量必须大于 0") - return min(parsed, _MAX_LIST_LIMIT) + return parsed def _format_entry_message(version: str | None = None) -> str: @@ -37,29 +43,105 @@ def _format_entry_message(version: str | None = None) -> str: return "\n".join(lines) -def _format_list(limit: int) -> str: - entries = list_entries(limit=limit) +def _format_list_entries(entries: tuple[ChangelogEntry, ...]) -> str: lines = [ "Undefined CHANGELOG", "", *[f"- {entry.version} | {entry.title}" for entry in entries], "", - "查看详情:/changelog show ", + "查看详情:/changelog 或 /changelog show ", ] return "\n".join(lines) +def _format_list(limit: int) -> str: + return _format_list_entries(list_entries(limit=limit)) + + +def _build_list_forward_nodes( + entries: tuple[ChangelogEntry, ...], *, bot_qq: int | str +) -> list[dict[str, Any]]: + nodes: list[dict[str, Any]] = [] + bot_uin = str(bot_qq) + + def _add_node(content: str) -> None: + nodes.append( + { + "type": "node", + "data": {"name": "Bot", "uin": bot_uin, "content": content}, + } + ) + + _add_node( + "\n".join( + [ + "Undefined CHANGELOG", + f"共 {len(entries)} 个版本", + "查看详情:/changelog 或 /changelog show ", + ] + ) + ) + + batch: list[str] = [] + for index, entry in enumerate(entries, start=1): + batch.append(f"{index}. {entry.version} | {entry.title}") + if len(batch) >= _FORWARD_NODE_BATCH_SIZE: + _add_node("\n".join(batch)) + batch = [] + + if batch: + _add_node("\n".join(batch)) + + return nodes + + +async def _send_list(limit: int, context: CommandContext) -> None: + entries = list_entries(limit=limit) + if ( + context.scope != "private" + and context.group_id > 0 + and limit > _FORWARD_LIST_THRESHOLD + ): + bot_qq = getattr(context.config, "bot_qq", 0) + forward_nodes = _build_list_forward_nodes(entries, bot_qq=bot_qq) + await context.onebot.send_forward_msg(context.group_id, forward_nodes) + return + await _send_message(_format_list_entries(entries), context) + + +async def _send_message(message: str, context: CommandContext) -> None: + if context.scope == "private" and context.user_id is not None: + await context.sender.send_private_message(context.user_id, message) + return + await context.sender.send_group_message(context.group_id, message) + + +def _infer_single_argument(arg: str) -> tuple[str, str] | None: + token = str(arg).strip() + if not token: + return None + if token.isdigit(): + return ("list", token) + try: + normalize_version(token) + except ChangelogFormatError: + return None + return ("show", token) + + async def execute(args: list[str], context: CommandContext) -> None: try: if not args: - message = _format_list(_DEFAULT_LIST_LIMIT) + await _send_list(_DEFAULT_LIST_LIMIT, context) + return else: subcommand = str(args[0]).strip().lower() if subcommand == "list": if len(args) > 2: raise ChangelogFormatError("用法:/changelog list [数量]") limit = _parse_list_limit(args[1] if len(args) == 2 else None) - message = _format_list(limit) + await _send_list(limit, context) + return elif subcommand == "show": if len(args) != 2: raise ChangelogFormatError("用法:/changelog show <版本号>") @@ -69,10 +151,15 @@ async def execute(args: list[str], context: CommandContext) -> None: raise ChangelogFormatError("用法:/changelog latest") message = _format_entry_message() else: - raise ChangelogFormatError( - "用法:/changelog [list [数量]|show <版本号>|latest]" - ) + inferred = _infer_single_argument(args[0]) if len(args) == 1 else None + if inferred is None: + raise ChangelogFormatError(_USAGE_TEXT) + action, value = inferred + if action == "list": + await _send_list(_parse_list_limit(value), context) + return + message = _format_entry_message(value) except (FileNotFoundError, ChangelogError) as exc: message = f"❌ {exc}" - await context.sender.send_group_message(context.group_id, message) + await _send_message(message, context) diff --git a/tests/test_changelog.py b/tests/test_changelog.py index fad00b02..886fc7cd 100644 --- a/tests/test_changelog.py +++ b/tests/test_changelog.py @@ -54,6 +54,7 @@ def test_get_entry_normalizes_version_without_v_prefix() -> None: ) assert get_entry("1.0.0", entries=entries).version == "v1.0.0" + assert get_entry("V1.0.0", entries=entries).version == "v1.0.0" assert get_latest_entry(entries=entries).version == "v1.0.0" assert list_entries(limit=1, entries=entries)[0].version == "v1.0.0" diff --git a/tests/test_changelog_command.py b/tests/test_changelog_command.py index d93a48f6..4b2b57e7 100644 --- a/tests/test_changelog_command.py +++ b/tests/test_changelog_command.py @@ -3,6 +3,7 @@ from pathlib import Path from types import SimpleNamespace from typing import Any, cast +from unittest.mock import AsyncMock import pytest @@ -16,28 +17,58 @@ class _DummySender: def __init__(self) -> None: self.messages: list[tuple[int, str, bool]] = [] + self.private_messages: list[tuple[int, str, bool]] = [] async def send_group_message( self, group_id: int, message: str, mark_sent: bool = False ) -> None: self.messages.append((group_id, message, mark_sent)) + async def send_private_message( + self, + user_id: int, + message: str, + auto_history: bool = True, + *, + mark_sent: bool = True, + ) -> None: + _ = auto_history + self.private_messages.append((user_id, message, mark_sent)) + -def _build_context(sender: _DummySender) -> CommandContext: +def _build_context( + sender: _DummySender, + *, + group_id: int = 10001, + scope: str = "group", + user_id: int | None = None, + onebot: Any | None = None, + config: Any | None = None, +) -> CommandContext: stub = cast(Any, SimpleNamespace()) + resolved_onebot = ( + onebot + if onebot is not None + else cast(Any, SimpleNamespace(send_forward_msg=None)) + ) + resolved_config = ( + config if config is not None else cast(Any, SimpleNamespace(bot_qq=10000)) + ) return CommandContext( - group_id=10001, + group_id=group_id, sender_id=10002, - config=stub, + config=resolved_config, sender=cast(Any, sender), ai=stub, faq_storage=stub, - onebot=stub, + onebot=resolved_onebot, security=stub, queue_manager=None, rate_limiter=None, dispatcher=stub, registry=CommandRegistry(Path("/tmp/not-used")), + scope=scope, + user_id=user_id, ) @@ -68,7 +99,7 @@ async def test_changelog_command_lists_recent_versions( assert "Undefined CHANGELOG" in output assert "- v3.2.6 | 标题甲" in output assert "- v3.2.5 | 标题乙" in output - assert "/changelog show " in output + assert "/changelog " in output @pytest.mark.asyncio @@ -91,6 +122,95 @@ async def test_changelog_command_show_supports_version_argument( assert "- 标题甲 变更一" in output +@pytest.mark.asyncio +async def test_changelog_command_inferrs_show_for_direct_version_argument( + monkeypatch: pytest.MonkeyPatch, +) -> None: + sender = _DummySender() + context = _build_context(sender) + monkeypatch.setattr( + changelog_handler, + "get_entry", + lambda version: _entry("v3.2.6", "标题甲"), + ) + + await changelog_handler.execute(["V3.2.6"], context) + + output = sender.messages[-1][1] + assert output.startswith("v3.2.6 标题甲") + assert "标题甲 摘要" in output + + +@pytest.mark.asyncio +async def test_changelog_command_inferrs_list_for_direct_limit_argument( + monkeypatch: pytest.MonkeyPatch, +) -> None: + sender = _DummySender() + context = _build_context(sender) + captured: dict[str, int] = {} + + def _list_entries(*, limit: int) -> tuple[ChangelogEntry, ...]: + captured["limit"] = limit + return (_entry("v3.2.6", "标题甲"),) + + monkeypatch.setattr(changelog_handler, "list_entries", _list_entries) + + await changelog_handler.execute(["12"], context) + + assert captured["limit"] == 12 + assert "- v3.2.6 | 标题甲" in sender.messages[-1][1] + + +@pytest.mark.asyncio +async def test_changelog_command_large_list_uses_forward_in_group( + monkeypatch: pytest.MonkeyPatch, +) -> None: + sender = _DummySender() + onebot = cast(Any, SimpleNamespace(send_forward_msg=AsyncMock())) + context = _build_context(sender, onebot=onebot) + monkeypatch.setattr( + changelog_handler, + "list_entries", + lambda *, limit: tuple(_entry(f"v3.2.{idx}", f"标题{idx}") for idx in range(6)), + ) + + await changelog_handler.execute(["list", "25"], context) + + assert not sender.messages + onebot.send_forward_msg.assert_awaited_once() + group_id, nodes = onebot.send_forward_msg.await_args.args + assert group_id == 10001 + assert nodes[0]["data"]["content"].startswith("Undefined CHANGELOG") + assert "1. v3.2.0 | 标题0" in nodes[1]["data"]["content"] + + +@pytest.mark.asyncio +async def test_changelog_command_large_list_uses_private_sender_in_private_scope( + monkeypatch: pytest.MonkeyPatch, +) -> None: + sender = _DummySender() + onebot = cast(Any, SimpleNamespace(send_forward_msg=AsyncMock())) + context = _build_context( + sender, + group_id=0, + scope="private", + user_id=20001, + onebot=onebot, + ) + monkeypatch.setattr( + changelog_handler, + "list_entries", + lambda *, limit: (_entry("v3.2.6", "标题甲"), _entry("v3.2.5", "标题乙")), + ) + + await changelog_handler.execute(["25"], context) + + assert not sender.messages + assert sender.private_messages[-1][0] == 20001 + assert "Undefined CHANGELOG" in sender.private_messages[-1][1] + onebot.send_forward_msg.assert_not_awaited() + + @pytest.mark.asyncio async def test_changelog_command_latest_uses_first_entry( monkeypatch: pytest.MonkeyPatch, @@ -108,6 +228,25 @@ async def test_changelog_command_latest_uses_first_entry( assert sender.messages[-1][1].startswith("v3.2.6 标题甲") +@pytest.mark.asyncio +async def test_changelog_command_latest_uses_private_sender_in_private_scope( + monkeypatch: pytest.MonkeyPatch, +) -> None: + sender = _DummySender() + context = _build_context(sender, group_id=0, scope="private", user_id=20001) + monkeypatch.setattr( + changelog_handler, + "get_latest_entry", + lambda: _entry("v3.2.6", "标题甲"), + ) + + await changelog_handler.execute(["latest"], context) + + assert not sender.messages + assert sender.private_messages[-1][0] == 20001 + assert sender.private_messages[-1][1].startswith("v3.2.6 标题甲") + + @pytest.mark.asyncio async def test_changelog_command_reports_lookup_errors( monkeypatch: pytest.MonkeyPatch, @@ -142,3 +281,4 @@ def test_changelog_command_is_registered_for_private_use() -> None: assert meta is not None assert meta.allow_in_private is True assert meta.rate_limit.user == 5 + assert dispatcher.command_registry.resolve("cl") is not None From d39df8d9572a4e25c7ec3e7adc83bc7bdb93ab36 Mon Sep 17 00:00:00 2001 From: Null <1708213363@qq.com> Date: Sat, 21 Mar 2026 15:12:04 +0800 Subject: [PATCH 07/25] =?UTF-8?q?docs(changelog):=20=E6=9B=B4=E6=96=B0?= =?UTF-8?q?=E5=91=BD=E4=BB=A4=E8=AF=B4=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Undefined/skills/commands/changelog/README.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/Undefined/skills/commands/changelog/README.md b/src/Undefined/skills/commands/changelog/README.md index 426330f8..ef8bcaf6 100644 --- a/src/Undefined/skills/commands/changelog/README.md +++ b/src/Undefined/skills/commands/changelog/README.md @@ -18,12 +18,9 @@ ## 说明 - `/changelog` 默认列最近 8 个版本。 -- `/cl` 是 `/changelog` 的短别名。 -- `/changelog <数量>` 会直接列出最近 N 个版本,不再限制最大 20 条。 +- `/changelog <数量>` 会直接列出最近 N 个版本。 - `/changelog <版本号>` 会直接展示对应版本详情。 -- `list [数量]` 按新到旧列版本与标题,不再限制最大 20 条。 +- `list [数量]` 按新到旧列版本与标题。 - 群聊里当请求数量大于 20 时,会改用合并转发发送完整列表,避免普通消息里看不全。 - 私聊里不会走合并转发,而是直接发普通文本消息。 - `show <版本号>` 展示单个版本的标题、摘要和变更点。 -- `latest` 展示 `CHANGELOG.md` 中第一条版本详情。 -- 版本数据直接来自仓库维护的 `CHANGELOG.md`,不会运行时扫描 git tag。 From 113ec221545c6f532f26e5439aa5f89478abaf72 Mon Sep 17 00:00:00 2001 From: Null <1708213363@qq.com> Date: Sat, 21 Mar 2026 15:18:14 +0800 Subject: [PATCH 08/25] =?UTF-8?q?fix:=20=E5=8E=BB=E9=99=A4=20WebUI=20?= =?UTF-8?q?=E5=86=97=E4=BD=99=E8=AF=B4=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Undefined/webui/static/js/runtime.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Undefined/webui/static/js/runtime.js b/src/Undefined/webui/static/js/runtime.js index c1a1ef8d..807eb530 100644 --- a/src/Undefined/webui/static/js/runtime.js +++ b/src/Undefined/webui/static/js/runtime.js @@ -837,14 +837,14 @@ html += ``; const summary = []; - if (configExists) summary.push(t("probes.bootstrap_config_exists")); - if (!configExists) summary.push(t("probes.bootstrap_config_missing")); + //if (configExists) summary.push(t("probes.bootstrap_config_exists")); + //if (!configExists) summary.push(t("probes.bootstrap_config_missing")); if (configValid === false && data.validation_error) summary.push(String(data.validation_error)); if (usingDefaultPassword) summary.push(t("auth.change_required")); - if (runtimeEnabled && runtimeReachable === false) - summary.push(t("probes.bootstrap_runtime_pending")); - if (!summary.length) summary.push(t("probes.bootstrap_ready")); + //if (runtimeEnabled && runtimeReachable === false) + // summary.push(t("probes.bootstrap_runtime_pending")); + //if (!summary.length) summary.push(t("probes.bootstrap_ready")); html += `
${summary .concat(advice) From 83c317f718fad1d281d1f0ee3126bd3b017c49d8 Mon Sep 17 00:00:00 2001 From: Null <1708213363@qq.com> Date: Sat, 21 Mar 2026 15:27:01 +0800 Subject: [PATCH 09/25] fix(webui): remove bootstrap probe panel --- src/Undefined/webui/static/js/i18n.js | 26 ------ src/Undefined/webui/static/js/runtime.js | 111 ----------------------- src/Undefined/webui/templates/index.html | 4 - 3 files changed, 141 deletions(-) diff --git a/src/Undefined/webui/static/js/i18n.js b/src/Undefined/webui/static/js/i18n.js index b6027322..b8ee4ac9 100644 --- a/src/Undefined/webui/static/js/i18n.js +++ b/src/Undefined/webui/static/js/i18n.js @@ -140,7 +140,6 @@ const I18N = { "probes.section_queues": "请求队列", "probes.section_services": "服务状态", "probes.section_skills": "技能统计", - "probes.section_bootstrap": "引导与管理探针", "probes.version": "版本", "probes.platform": "平台", "probes.uptime": "运行时间", @@ -153,18 +152,6 @@ const I18N = { "probes.api_listen": "监听地址", "probes.tools": "基础工具", "probes.agents": "智能体", - "probes.bootstrap_config": "配置文件", - "probes.bootstrap_validation": "严格校验", - "probes.bootstrap_auth": "认证状态", - "probes.bootstrap_runtime": "运行态", - "probes.bootstrap_config_exists": - "已检测到 config.toml,可继续在控制台编辑。", - "probes.bootstrap_config_missing": - "尚未检测到 config.toml,控制台会先生成模板供你补齐。", - "probes.bootstrap_runtime_pending": - "管理层在线,但运行态尚未就绪;完善配置后可直接启动 Bot。", - "probes.bootstrap_ready": - "管理入口已就绪,可继续编辑配置、查看日志或启动实例。", "probes.all_ok": "所有外部端点均正常", "probes.some_failed": "部分外部端点异常", "memory.title": "记忆检索", @@ -373,7 +360,6 @@ const I18N = { "probes.section_queues": "Request Queues", "probes.section_services": "Service Status", "probes.section_skills": "Skill Statistics", - "probes.section_bootstrap": "Bootstrap & Management", "probes.version": "Version", "probes.platform": "Platform", "probes.uptime": "Uptime", @@ -386,18 +372,6 @@ const I18N = { "probes.api_listen": "Listen Addr", "probes.tools": "Tools", "probes.agents": "Agents", - "probes.bootstrap_config": "Config File", - "probes.bootstrap_validation": "Strict Validation", - "probes.bootstrap_auth": "Auth", - "probes.bootstrap_runtime": "Runtime", - "probes.bootstrap_config_exists": - "config.toml is present and can be edited in the console.", - "probes.bootstrap_config_missing": - "config.toml is missing; the console will bootstrap a template for recovery.", - "probes.bootstrap_runtime_pending": - "Management is online but runtime is not ready yet; finish config and start the bot.", - "probes.bootstrap_ready": - "Management entry is ready for config edits, logs, and bot startup.", "probes.all_ok": "All external endpoints are healthy", "probes.some_failed": "Some external endpoints are unhealthy", "memory.title": "Memory Hub", diff --git a/src/Undefined/webui/static/js/runtime.js b/src/Undefined/webui/static/js/runtime.js index 807eb530..f47c39d8 100644 --- a/src/Undefined/webui/static/js/runtime.js +++ b/src/Undefined/webui/static/js/runtime.js @@ -2,7 +2,6 @@ const runtimeState = { initialized: false, probesLoaded: false, - bootstrapLoaded: false, memoryLoaded: false, runtimeMetaLoaded: false, runtimeEnabled: true, @@ -780,96 +779,6 @@
`; } - function renderBootstrapProbe(data) { - const el = get("managementBootstrapProbe"); - if (!el) return; - if (!data || data.error) { - el.innerHTML = `
${escapeHtml(data?.error || "--")}
`; - return; - } - - const configExists = !!data.config_exists; - const configValid = - data.config_valid === undefined ? null : !!data.config_valid; - const usingDefaultPassword = !!data.using_default_password; - const runtimeEnabled = !!data.runtime_enabled; - const runtimeReachable = - data.runtime_reachable === undefined - ? null - : !!data.runtime_reachable; - const authMode = - data.auth_mode || (state.authAccessToken ? "token" : "cookie"); - const advice = Array.isArray(data.advice) ? data.advice : []; - - const configStatus = configExists ? "ok" : "error"; - const validationStatus = - configValid === null ? "skipped" : configValid ? "ok" : "error"; - const authStatus = usingDefaultPassword ? "error" : "ok"; - const runtimeStatus = - runtimeReachable === null - ? runtimeEnabled - ? "skipped" - : "error" - : runtimeReachable - ? "ok" - : runtimeEnabled - ? "error" - : "skipped"; - - let html = `
${t("probes.section_bootstrap")}
`; - html += `
`; - html += probeItem( - t("probes.bootstrap_config"), - probeStatusBadge(configStatus), - ); - html += probeItem( - t("probes.bootstrap_validation"), - probeStatusBadge(validationStatus), - ); - html += probeItem( - t("probes.bootstrap_auth"), - `${probeStatusBadge(authStatus)} ${escapeHtml(String(authMode))}`, - ); - html += probeItem( - t("probes.bootstrap_runtime"), - probeStatusBadge(runtimeStatus), - ); - html += `
`; - - const summary = []; - //if (configExists) summary.push(t("probes.bootstrap_config_exists")); - //if (!configExists) summary.push(t("probes.bootstrap_config_missing")); - if (configValid === false && data.validation_error) - summary.push(String(data.validation_error)); - if (usingDefaultPassword) summary.push(t("auth.change_required")); - //if (runtimeEnabled && runtimeReachable === false) - // summary.push(t("probes.bootstrap_runtime_pending")); - //if (!summary.length) summary.push(t("probes.bootstrap_ready")); - - html += `
${summary - .concat(advice) - .map( - (item) => - `
${escapeHtml(item)}
`, - ) - .join("")}
`; - html += `
`; - el.innerHTML = html; - } - - function buildBootstrapFallback(meta, errorMessage = "") { - return { - config_exists: !!state.configExists, - config_valid: null, - using_default_password: !!state.usingDefaultPassword, - auth_mode: state.authAccessToken ? "token" : "cookie", - runtime_enabled: !!(meta && meta.enabled), - runtime_reachable: false, - validation_error: "", - advice: errorMessage ? [String(errorMessage)] : [], - }; - } - function setProbeUnavailable(message) { const msg = String(message || RUNTIME_DISABLED_ERROR); renderInternalProbe({ error: msg }); @@ -917,23 +826,6 @@ renderExternalProbe(data); } - async function fetchBootstrapProbe() { - try { - const data = await fetchJsonOrThrow([ - "/api/v1/management/probes/bootstrap", - "/api/v1/management/probes/capabilities", - ]); - renderBootstrapProbe(data); - } catch (error) { - const meta = await fetchRuntimeMeta().catch(() => ({ - enabled: false, - })); - renderBootstrapProbe( - buildBootstrapFallback(meta, error.message || error), - ); - } - } - async function searchMemory() { if (!(await ensureRuntimeEnabled())) { setMemoryUnavailable(t("runtime.disabled")); @@ -1245,17 +1137,14 @@ async function refreshProbes() { try { - await fetchBootstrapProbe(); if (!(await ensureRuntimeEnabled())) { setProbeUnavailable(t("runtime.disabled")); runtimeState.probesLoaded = true; - runtimeState.bootstrapLoaded = true; return; } await Promise.all([fetchInternalProbe(), fetchExternalProbe()]); runtimeState.probesLoaded = true; - runtimeState.bootstrapLoaded = true; } catch (error) { showToast( `${t("runtime.failed")}: ${appendRuntimeApiHint(error.message || error)}`, diff --git a/src/Undefined/webui/templates/index.html b/src/Undefined/webui/templates/index.html index d2752cb7..98ad198b 100644 --- a/src/Undefined/webui/templates/index.html +++ b/src/Undefined/webui/templates/index.html @@ -366,10 +366,6 @@

探针

-
-
引导与管理探针
-
--
-
内部探针
--
From 38d2cca9e9a7c0c18148889ec63df82e87066e04 Mon Sep 17 00:00:00 2001 From: Null <1708213363@qq.com> Date: Sat, 21 Mar 2026 15:57:39 +0800 Subject: [PATCH 10/25] refactor(undefined-console): simplify launcher copy --- apps/undefined-console/src/main.ts | 22 +--------------------- apps/undefined-console/src/style.css | 22 +++++----------------- 2 files changed, 6 insertions(+), 38 deletions(-) diff --git a/apps/undefined-console/src/main.ts b/apps/undefined-console/src/main.ts index 07c643d9..165015f0 100644 --- a/apps/undefined-console/src/main.ts +++ b/apps/undefined-console/src/main.ts @@ -35,13 +35,9 @@ const PREFERENCE_STORAGE_KEY = "undefined.console.preferences"; const messages = { zh: { brand: "Undefined Console", - subtitle: "保存连接、测试端点,然后直接进入真正的 WebUI。", lang_toggle: "English", theme_light: "浅色", theme_dark: "深色", - hero_title: "连接后直接打开远程 WebUI", - hero_copy: - "Tauri 只负责保存连接与做基础探测;真正的管理界面直接使用现有 WebUI,因此样式和功能与浏览器版保持一致。", button_open: "打开 WebUI", button_test: "测试连接", button_seed: "填入本地示例", @@ -53,7 +49,6 @@ const messages = { saved_profiles: "已保存连接", saved_profiles_copy: "连接配置保存在当前设备本地。", editor_title: "连接编辑器", - editor_copy: "填写同一个 IP/域名,再分别填写 Management 与 Runtime 端口。", display_name: "显示名称", host: "IP / 域名", host_placeholder: "例如:192.168.2.1 或 example.com", @@ -63,8 +58,6 @@ const messages = { password_placeholder: "填写后打开 WebUI 时会自动尝试登录", notes: "备注", notes_placeholder: "例如:本机、预发布、Android 备用连接", - launcher_hint: - "Tauri 只负责连接管理;真正的后台功能与样式都以 WebUI 为准。", empty_profiles: "还没有保存的连接。", empty_probes: "点击“测试连接”后,这里会显示探针结果。", profile_default_name: "本地管理入口", @@ -98,13 +91,9 @@ const messages = { }, en: { brand: "Undefined Console", - subtitle: "Save connections, test endpoints, then open the real WebUI.", lang_toggle: "中文", theme_light: "Light", theme_dark: "Dark", - hero_title: "Open the real remote WebUI after choosing a connection", - hero_copy: - "Tauri only stores connections and runs basic probes. The actual management interface uses the existing WebUI so the look and features stay aligned with the browser version.", button_open: "Open WebUI", button_test: "Test connection", button_seed: "Seed local", @@ -116,8 +105,6 @@ const messages = { saved_profiles: "Saved profiles", saved_profiles_copy: "Profiles are stored locally on this device.", editor_title: "Profile editor", - editor_copy: - "Use one host/IP field, then provide dedicated Management and Runtime ports.", display_name: "Display name", host: "Host / IP", host_placeholder: "For example: 192.168.2.1 or example.com", @@ -127,8 +114,6 @@ const messages = { password_placeholder: "If filled, WebUI will try to sign in automatically", notes: "Notes", notes_placeholder: "For example: local, staging, Android fallback", - launcher_hint: - "Tauri only manages connections; the real console UI and features stay in WebUI.", empty_profiles: "No saved profiles yet.", empty_probes: "Probe results will appear here after you click test connection.", @@ -625,7 +610,6 @@ function render(): void {
${t("brand")}
-

${t("subtitle")}