diff --git a/.github/workflows/opencode.yml b/.github/workflows/opencode.yml new file mode 100644 index 0000000..b767381 --- /dev/null +++ b/.github/workflows/opencode.yml @@ -0,0 +1,33 @@ +name: opencode + +on: + issue_comment: + types: [created] + pull_request_review_comment: + types: [created] + +jobs: + opencode: + if: | + contains(github.event.comment.body, ' /oc') || + startsWith(github.event.comment.body, '/oc') || + contains(github.event.comment.body, ' /opencode') || + startsWith(github.event.comment.body, '/opencode') + runs-on: ubuntu-latest + permissions: + id-token: write + contents: read + pull-requests: read + issues: read + steps: + - name: Checkout repository + uses: actions/checkout@v6 + with: + persist-credentials: false + + - name: Run opencode + uses: anomalyco/opencode/github@latest + env: + ALIBABA_CODING_PLAN_API_KEY: ${{ secrets.ALIBABA_CODING_PLAN_API_KEY }} + with: + model: alibaba-coding-plan-cn/qwen3.5-plus \ No newline at end of file diff --git a/.github/workflows/pr-review-gate.yml b/.github/workflows/pr-review-gate.yml new file mode 100644 index 0000000..32cd330 --- /dev/null +++ b/.github/workflows/pr-review-gate.yml @@ -0,0 +1,58 @@ +name: PR Review Gate + +on: + pull_request: + types: [opened, synchronize, reopened] + workflow_dispatch: + inputs: + pr_number: + description: 'PR number' + required: false + type: string + +jobs: + pr-review: + runs-on: ubuntu-latest + permissions: + pull-requests: write + contents: read + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Get PR Number + id: pr + run: | + if [ -n "${{ github.event.inputs.pr_number }}" ]; then + PR_NUM="${{ github.event.inputs.pr_number }}" + else + PR_NUM="${{ github.event.number }}" + fi + echo "pr_number=$PR_NUM" >> $GITHUB_OUTPUT + + - name: Post OpenCode Review Request + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + PR_NUM="${{ steps.pr.outputs.pr_number }}" + + BODY='## 🔍 OpenCode PR Review Required + + 这是一个受保护的分支,merge 前需要进行 code review。 + + **请运行以下命令进行 OpenCode review:** + ``` + /oc review https://github.com/${{ github.repository }}/pull/$PR_NUM + ``` + + 或者在 PR 页面评论 `/oc` 来触发 OpenCode review。 + + --- + + *This is an automated reminder from PR Review Gate.*' + + gh pr comment $PR_NUM --body "$BODY" + + - name: Set status check to success + run: | + echo "OpenCode review request posted." diff --git a/apps/api/deps.py b/apps/api/deps.py index b801c3a..bbe2604 100644 --- a/apps/api/deps.py +++ b/apps/api/deps.py @@ -70,6 +70,7 @@ def invalidate_prefix(self, prefix: str): for k in keys: del self._store[k] + cache = TTLCache() @@ -97,6 +98,7 @@ def iso_dt(dt: datetime | None) -> str | None: def brief_date() -> str: from packages.timezone import user_date_str + return user_date_str() diff --git a/apps/api/main.py b/apps/api/main.py index f6ee0a8..ea8abc5 100644 --- a/apps/api/main.py +++ b/apps/api/main.py @@ -101,6 +101,7 @@ async def dispatch(self, request: Request, call_next): request.state.user = payload return await call_next(request) + # ---------- 启动时检查认证配置 ---------- settings = get_settings() @@ -119,6 +120,7 @@ async def dispatch(self, request: Request, call_next): app.add_middleware(AuthMiddleware) app.add_middleware(GZipMiddleware, minimum_size=1000) # Starlette 内置跳过 text/event-stream + @app.exception_handler(AppError) async def app_error_handler(_request: Request, exc: AppError): """统一处理所有业务异常""" @@ -150,19 +152,23 @@ async def app_error_handler(_request: Request, exc: AppError): agent, auth, content, + cs_feeds, graph, jobs, papers, pipelines, - settings as settings_router, system, topics, writing, ) +from apps.api.routers import ( + settings as settings_router, +) app.include_router(system.router) app.include_router(papers.router) app.include_router(topics.router) +app.include_router(cs_feeds.router) app.include_router(graph.router) app.include_router(agent.router) app.include_router(content.router) diff --git a/apps/api/routers/agent.py b/apps/api/routers/agent.py index c4eb119..18f5a9f 100644 --- a/apps/api/routers/agent.py +++ b/apps/api/routers/agent.py @@ -111,25 +111,31 @@ def stream_with_save(): text_content += data.get("content", "") elif event_type == "tool_result": # 记录工具调用结果 - tool_calls_records.append({ - "name": data.get("name"), - "success": data.get("success"), - "summary": data.get("summary"), - "data": data.get("data"), - }) + tool_calls_records.append( + { + "name": data.get("name"), + "success": data.get("success"), + "summary": data.get("summary"), + "data": data.get("data"), + } + ) elif event_type == "action_result": # 记录用户确认的操作结果 - tool_calls_records.append({ - "action_id": data.get("id"), - "success": data.get("success"), - "summary": data.get("summary"), - "data": data.get("data"), - }) + tool_calls_records.append( + { + "action_id": data.get("id"), + "success": data.get("success"), + "summary": data.get("summary"), + "data": data.get("data"), + } + ) yield chunk # 流结束后保存助手响应 if text_content or tool_calls_records: - _save_assistant_response(text_content, tool_calls_records if tool_calls_records else None) + _save_assistant_response( + text_content, tool_calls_records if tool_calls_records else None + ) return StreamingResponse( stream_with_save(), @@ -186,7 +192,7 @@ def get_conversation_messages( ) -> dict: """获取指定会话的所有消息""" from packages.storage.db import session_scope - from packages.storage.repositories import AgentMessageRepository, AgentConversationRepository + from packages.storage.repositories import AgentConversationRepository, AgentMessageRepository with session_scope() as session: conv_repo = AgentConversationRepository(session) diff --git a/apps/api/routers/auth.py b/apps/api/routers/auth.py index a725b4b..e2383ba 100644 --- a/apps/api/routers/auth.py +++ b/apps/api/routers/auth.py @@ -32,15 +32,15 @@ async def login(request: LoginRequest): 成功返回 JWT token """ settings = get_settings() - + # 如果未配置密码,返回错误 if not settings.auth_password: raise HTTPException(status_code=403, detail="Authentication is disabled") - + # 验证密码 if not authenticate_user(request.password): raise HTTPException(status_code=401, detail="Incorrect password") - + # 生成 token access_token = create_access_token(data={"sub": "papermind-user"}) return LoginResponse(access_token=access_token) @@ -52,4 +52,4 @@ async def auth_status(): 检查认证是否启用 """ settings = get_settings() - return AuthStatusResponse(auth_enabled=bool(settings.auth_password)) \ No newline at end of file + return AuthStatusResponse(auth_enabled=bool(settings.auth_password)) diff --git a/apps/api/routers/content.py b/apps/api/routers/content.py index 8bfee1c..519f2a3 100644 --- a/apps/api/routers/content.py +++ b/apps/api/routers/content.py @@ -4,7 +4,7 @@ from fastapi import APIRouter, HTTPException, Query -from apps.api.deps import brief_date, brief_service, cache, graph_service, iso_dt, settings +from apps.api.deps import brief_service, cache, graph_service, iso_dt from packages.domain.schemas import DailyBriefRequest from packages.domain.task_tracker import global_tracker from packages.storage.db import session_scope @@ -168,7 +168,16 @@ def daily_brief(req: DailyBriefRequest) -> dict: """生成每日简报(异步任务)""" from packages.domain.task_tracker import global_tracker - recipient = req.recipient or settings.notify_default_to + # 如果没有指定收件人,从数据库读取配置 + recipient = req.recipient + if not recipient: + from packages.storage.db import session_scope + from packages.storage.repositories import DailyReportConfigRepository + + with session_scope() as session: + config = DailyReportConfigRepository(session).get_config() + if config.send_email_report and config.recipient_emails: + recipient = config.recipient_emails.split(",")[0] def _generate_fn(progress_callback=None): # publish() 内部已写入 generated_content 表,无需重复 diff --git a/apps/api/routers/cs_feeds.py b/apps/api/routers/cs_feeds.py new file mode 100644 index 0000000..7fe94bd --- /dev/null +++ b/apps/api/routers/cs_feeds.py @@ -0,0 +1,178 @@ +"""CS 分类订阅 API +@author Color2333 +""" + +import logging + +from fastapi import APIRouter, Depends, Query, Request + +from packages.domain.task_tracker import global_tracker +from packages.storage.db import SessionLocal +from packages.storage.repositories import CSFeedRepository + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/cs", tags=["cs-feeds"]) + + +def get_repo(): + session = SessionLocal() + try: + yield CSFeedRepository(session) + finally: + session.close() + + +class CategoryInfo: + def __init__(self, code: str, name: str, description: str = ""): + self.code = code + self.name = name + self.description = description + + +class FeedInfo: + def __init__( + self, + category_code: str, + category_name: str, + daily_limit: int, + enabled: bool, + status: str, + last_run_at: str | None, + last_run_count: int, + ): + self.category_code = category_code + self.category_name = category_name + self.daily_limit = daily_limit + self.enabled = enabled + self.status = status + self.last_run_at = last_run_at + self.last_run_count = last_run_count + + +@router.get("/categories") +def list_categories(repo: CSFeedRepository = Depends(get_repo)): + categories = repo.get_categories() + return { + "categories": [ + {"code": c.code, "name": c.name, "description": c.description} for c in categories + ] + } + + +@router.get("/feeds") +def list_feeds(repo: CSFeedRepository = Depends(get_repo)): + feeds = repo.get_subscriptions() + categories = {c.code: c.name for c in repo.get_categories()} + return { + "feeds": [ + { + "category_code": f.category_code, + "category_name": categories.get(f.category_code, f.category_code), + "daily_limit": f.daily_limit, + "enabled": f.enabled, + "status": f.status, + "last_run_at": f.last_run_at.isoformat() if f.last_run_at else None, + "last_run_count": f.last_run_count, + } + for f in feeds + ] + } + + +@router.post("/feeds") +async def subscribe( + repo: CSFeedRepository = Depends(get_repo), + category_codes: list[str] | None = Query(default=None, alias="category_codes"), + daily_limit: int = Query(default=30, alias="daily_limit"), + enabled: bool = Query(default=True, alias="enabled"), + request: Request = None, +): + if category_codes is None and request is not None: + body = await request.json() + category_codes = body.get("category_codes", []) + daily_limit = body.get("daily_limit", 30) + enabled = body.get("enabled", True) + if not category_codes: + category_codes = [] + created = [] + for code in category_codes: + sub = repo.upsert_subscription(code, daily_limit, enabled) + created.append( + { + "category_code": sub.category_code, + "daily_limit": sub.daily_limit, + "enabled": sub.enabled, + } + ) + return {"created": len(created), "feeds": created} + + +@router.delete("/feeds/{category_code}") +def unsubscribe(category_code: str, repo: CSFeedRepository = Depends(get_repo)): + deleted = repo.delete_subscription(category_code) + return {"deleted": deleted} + + +@router.patch("/feeds/{category_code}") +def update_feed( + category_code: str, + daily_limit: int | None = Query(default=None, alias="daily_limit"), + enabled: bool | None = Query(default=None, alias="enabled"), + repo: CSFeedRepository = Depends(get_repo), +): + sub = repo.get_subscription(category_code) + if not sub: + return {"error": "订阅不存在"} + if daily_limit is not None: + sub.daily_limit = daily_limit + if enabled is not None: + sub.enabled = enabled + repo.session.commit() + return { + "category_code": sub.category_code, + "daily_limit": sub.daily_limit, + "enabled": sub.enabled, + } + + +@router.post("/feeds/{category_code}/fetch") +def fetch_category( + category_code: str, + repo: CSFeedRepository = Depends(get_repo), +): + """手动触发单个分类的论文抓取(后台任务)""" + sub = repo.get_subscription(category_code) + if not sub: + return {"error": "订阅不存在"} + + def _fetch_fn(progress_callback=None): + from packages.integrations.arxiv_client import ArxivClient + from packages.storage.db import session_scope + from packages.storage.repositories import PaperRepository + + client = ArxivClient() + papers = client.fetch_latest( + query=f"cat:{category_code}", + max_results=sub.daily_limit, + days_back=7, + ) + count = 0 + with session_scope() as session: + paper_repo = PaperRepository(session) + for p in papers: + paper_repo.upsert_paper(p) + count += 1 + repo.update_run_status(category_code, count) + return {"fetched": count} + + task_id = global_tracker.submit( + task_type="cs_feed_fetch", + title=f"📥 抓取分类: {category_code}", + fn=_fetch_fn, + ) + return { + "status": "started", + "task_id": task_id, + "message": f"「{category_code}」抓取已在后台启动", + } diff --git a/apps/api/routers/jobs.py b/apps/api/routers/jobs.py index b57fe18..5c1dd0e 100644 --- a/apps/api/routers/jobs.py +++ b/apps/api/routers/jobs.py @@ -7,7 +7,6 @@ from fastapi import APIRouter, BackgroundTasks, HTTPException, Query -from apps.api.deps import pipelines from packages.ai.daily_runner import run_daily_brief, run_daily_ingest, run_weekly_graph_maintenance from packages.domain.enums import ReadStatus from packages.domain.task_tracker import global_tracker @@ -56,7 +55,7 @@ def batch_process_unread( import uuid from concurrent.futures import ThreadPoolExecutor, as_completed - from packages.ai.daily_runner import _process_paper, PAPER_CONCURRENCY + from packages.ai.daily_runner import PAPER_CONCURRENCY, _process_paper # 先获取需要处理的论文数量 with session_scope() as session: @@ -200,6 +199,7 @@ def get_action_papers( async def run_daily_report_once(background_tasks: BackgroundTasks): """完整工作流(精读 + 生成 + 发邮件)— 后台执行""" import asyncio + from packages.ai.auto_read_service import AutoReadService def _run_workflow_bg(): diff --git a/apps/api/routers/papers.py b/apps/api/routers/papers.py index 47e0285..0dfddb8 100644 --- a/apps/api/routers/papers.py +++ b/apps/api/routers/papers.py @@ -42,6 +42,7 @@ def latest( search: str | None = Query(default=None), sort_by: str = Query(default="created_at"), sort_order: str = Query(default="desc"), + category: str | None = Query(default=None), ) -> dict: with session_scope() as session: repo = PaperRepository(session) @@ -53,8 +54,11 @@ def latest( status=status, date_str=date, search=search.strip() if search else None, - sort_by=sort_by if sort_by in ("created_at", "publication_date", "title") else "created_at", + sort_by=sort_by + if sort_by in ("created_at", "publication_date", "title") + else "created_at", sort_order=sort_order if sort_order in ("asc", "desc") else "desc", + category=category, ) resp = paper_list_response(papers, repo) resp["total"] = total @@ -121,9 +125,10 @@ def paper_detail(paper_id: UUID) -> dict: raise HTTPException(status_code=404, detail=str(exc)) from exc topic_map = repo.get_topic_names_for_papers([str(p.id)]) # 查询已有分析报告 - from packages.storage.models import AnalysisReport as AR from sqlalchemy import select as _sel + from packages.storage.models import AnalysisReport as AR + ar = session.execute(_sel(AR).where(AR.paper_id == str(p.id))).scalar_one_or_none() skim_data = None deep_data = None @@ -277,9 +282,10 @@ def get_paper_figures(paper_id: UUID) -> dict: @router.get("/papers/{paper_id}/figures/{figure_id}/image") def get_figure_image(paper_id: UUID, figure_id: str): """返回图表原始图片文件""" + from sqlalchemy import select + from packages.storage.db import session_scope from packages.storage.models import ImageAnalysis - from sqlalchemy import select with session_scope() as session: row = session.execute( @@ -305,7 +311,6 @@ def analyze_paper_figures( max_figures: int = Query(default=10, ge=1, le=30), ) -> dict: """提取并解读论文中的图表(异步任务)""" - from packages.domain.task_tracker import global_tracker # 先验证论文和 PDF with session_scope() as session: diff --git a/apps/api/routers/pipelines.py b/apps/api/routers/pipelines.py index 5fb47d8..62922ce 100644 --- a/apps/api/routers/pipelines.py +++ b/apps/api/routers/pipelines.py @@ -169,6 +169,6 @@ def get_task_result(task_id: str) -> dict: if not status: raise NotFoundError(f"Task {task_id} not found") if not status.get("finished"): - raise HTTPException(400, f"Task not finished yet") + raise HTTPException(400, "Task not finished yet") result = global_tracker.get_result(task_id) return result or {} diff --git a/apps/api/routers/settings.py b/apps/api/routers/settings.py index 400366d..320a8c3 100644 --- a/apps/api/routers/settings.py +++ b/apps/api/routers/settings.py @@ -4,7 +4,7 @@ from typing import Literal -from fastapi import APIRouter, HTTPException, Query +from fastapi import APIRouter, HTTPException from pydantic import BaseModel from apps.api.deps import iso_dt, settings @@ -45,14 +45,14 @@ class EmailConfigUpdate(BaseModel): class DailyReportConfigUpdate(BaseModel): - """更新每日报告配置请求""" + """每日报告配置更新请求""" enabled: bool | None = None auto_deep_read: bool | None = None deep_read_limit: int | None = None send_email_report: bool | None = None recipient_emails: str | None = None - report_time_utc: int | None = None + cron_expression: str | None = None # 新增:cron 表达式配置 include_paper_details: bool | None = None include_graph_insights: bool | None = None diff --git a/apps/api/routers/system.py b/apps/api/routers/system.py index 5ae520a..4f0da62 100644 --- a/apps/api/routers/system.py +++ b/apps/api/routers/system.py @@ -58,6 +58,6 @@ def system_status() -> dict: @router.get("/metrics/costs") -def cost_metrics(days: int = Query(default=7, ge=1, le=90)) -> dict: +def cost_metrics(days: int = Query(default=7, ge=0, le=3650)) -> dict: with session_scope() as session: return PromptTraceRepository(session).summarize_costs(days=days) diff --git a/apps/api/routers/topics.py b/apps/api/routers/topics.py index 383fd87..23c49de 100644 --- a/apps/api/routers/topics.py +++ b/apps/api/routers/topics.py @@ -38,7 +38,8 @@ def _topic_dict(t, session=None) -> dict: } if session is not None: from sqlalchemy import func, select - from packages.storage.models import PaperTopic, CollectionAction + + from packages.storage.models import CollectionAction, PaperTopic # 论文计数 cnt = session.scalar( @@ -237,3 +238,34 @@ def ingest_references_status(task_id: str) -> dict: if not task: raise HTTPException(404, "Task not found") return task + + +# ---------- 统计 ---------- + + +@router.get("/topics/stats") +def topic_stats() -> dict: + """主题维度统计(30s 缓存)""" + from apps.api.deps import cache + + cached = cache.get("topic_stats") + if cached is not None: + return cached + with session_scope() as session: + result = PaperRepository(session).topic_stats() + cache.set("topic_stats", result, ttl=30) + return result + + +@router.get("/topics/distribution") +def paper_distribution() -> dict: + """论文分布统计:年份分布 + 来源分布(30s 缓存)""" + from apps.api.deps import cache + + cached = cache.get("paper_distribution") + if cached is not None: + return cached + with session_scope() as session: + result = PaperRepository(session).paper_distribution_stats() + cache.set("paper_distribution", result, ttl=30) + return result diff --git a/apps/desktop/server.py b/apps/desktop/server.py index a9e73df..4a614c8 100644 --- a/apps/desktop/server.py +++ b/apps/desktop/server.py @@ -3,6 +3,7 @@ Tauri sidecar 调用此二进制,自动选端口 + 内嵌 scheduler。 @author Color2333 """ + from __future__ import annotations import json @@ -47,6 +48,7 @@ def _apply_env_overrides(data_dir: Path, env_file: Path | None) -> None: if env_file and env_file.is_file(): os.environ["PAPERMIND_ENV_FILE"] = str(env_file) from dotenv import load_dotenv + load_dotenv(env_file, override=True) logger.info("Loaded .env from %s", env_file) @@ -54,6 +56,7 @@ def _apply_env_overrides(data_dir: Path, env_file: Path | None) -> None: def _start_scheduler() -> None: """后台线程运行 APScheduler(复用 worker 逻辑)""" from apps.worker.main import run_worker + t = threading.Thread(target=run_worker, daemon=True, name="scheduler") t.start() logger.info("Embedded scheduler started on background thread") @@ -74,7 +77,9 @@ def main() -> None: os.environ["API_HOST"] = "127.0.0.1" os.environ["API_PORT"] = str(port) - os.environ["CORS_ALLOW_ORIGINS"] = f"tauri://localhost,https://tauri.localhost,http://127.0.0.1:{port}" + os.environ["CORS_ALLOW_ORIGINS"] = ( + f"tauri://localhost,https://tauri.localhost,http://127.0.0.1:{port}" + ) # Tauri 通过 stdout 读取端口号(协议:首行 JSON) sys.stdout.write(json.dumps({"port": port}) + "\n") @@ -86,6 +91,7 @@ def main() -> None: _start_scheduler() import uvicorn + from apps.api.main import app def _handle_signal(sig, _frame): diff --git a/apps/worker/main.py b/apps/worker/main.py index 4ec40e9..3f55023 100644 --- a/apps/worker/main.py +++ b/apps/worker/main.py @@ -9,13 +9,14 @@ import logging import signal import time -from datetime import datetime, timezone +from datetime import UTC, datetime from pathlib import Path from threading import Event from apscheduler.schedulers.blocking import BlockingScheduler from apscheduler.triggers.cron import CronTrigger +from packages.ai.cs_feed_orchestrator import CSFeedOrchestrator from packages.ai.daily_runner import ( run_daily_brief, run_topic_ingest, @@ -65,6 +66,8 @@ def _retry_with_backoff(fn, *args, max_retries: int = 3, base_delay: float = 5.0 _RETRY_MAX = settings.worker_retry_max _RETRY_DELAY = settings.worker_retry_base_delay +cs_orchestrator = CSFeedOrchestrator() + def _should_run(freq: str, time_utc: int, hour: int, weekday: int) -> bool: """判断当前 UTC 小时是否匹配主题的调度规则""" @@ -81,7 +84,7 @@ def _should_run(freq: str, time_utc: int, hour: int, weekday: int) -> bool: def topic_dispatch_job() -> None: """每小时执行:检查哪些主题需要在当前小时触发""" - now = datetime.now(timezone.utc) + now = datetime.now(UTC) hour = now.hour weekday = now.weekday() # 0=Monday @@ -159,6 +162,12 @@ def weekly_graph_job() -> None: _write_heartbeat() +def cs_feed_dispatch_job(): + """每小时同步分类 + 执行订阅抓取""" + cs_orchestrator.sync_categories() + cs_orchestrator.run() + + def run_worker() -> None: """ Worker 主函数 - UTC 时间智能调度 @@ -188,9 +197,28 @@ def run_worker() -> None: ) logger.info("✅ 已添加:主题分发任务(每小时整点,UTC)") - # 每日简报(UTC 4 点生成,4 点半发送) - # 默认配置:DAILY_CRON=0 4 * * * - daily_trigger = CronTrigger.from_crontab(getattr(settings, "daily_cron", "0 4 * * *")) + # CS 分类订阅调度(每小时整点) + scheduler.add_job( + cs_feed_dispatch_job, + trigger=CronTrigger(minute=0), + id="cs_feed_dispatch", + replace_existing=True, + ) + logger.info("✅ 已添加:CS分类订阅调度任务(每小时整点,UTC)") + + # 每日简报(从数据库读取 cron 表达式) + from packages.storage.db import session_scope + from packages.storage.repositories import DailyReportConfigRepository + + try: + with session_scope() as session: + config = DailyReportConfigRepository(session).get_config() + daily_cron = config.cron_expression or "0 4 * * *" + except Exception as e: + logger.warning(f"从数据库读取 cron 失败:{e},使用默认值") + daily_cron = "0 4 * * *" + + daily_trigger = CronTrigger.from_crontab(daily_cron) scheduler.add_job( brief_job, trigger=daily_trigger, @@ -198,9 +226,8 @@ def run_worker() -> None: replace_existing=True, ) logger.info( - "✅ 已添加:每日简报任务(UTC %s,北京时间%s)", - getattr(settings, "daily_cron", "0 4 * * *"), - "12:00" if getattr(settings, "daily_cron", "").startswith("0 4") else "计算中", + "✅ 已添加:每日简报任务(cron: %s)", + daily_cron, ) # 每周图谱维护(UTC 周日 22 点 = 北京时间周一 6 点) diff --git a/docs/plans/2026-03-19-cs-category-feed-design.md b/docs/plans/2026-03-19-cs-category-feed-design.md new file mode 100644 index 0000000..6543af9 --- /dev/null +++ b/docs/plans/2026-03-19-cs-category-feed-design.md @@ -0,0 +1,254 @@ +# CS 分类订阅功能设计 + +## 概述 + +新增独立的 CS 分类订阅系统,用户可直接订阅 arXiv CS 下的细分领域(如 cs.CV、cs.LG),系统定时抓取最新论文入库。与现有的关键词主题订阅完全独立,UI 整合在 Topics 页面内通过 Tab 切换。 + +--- + +## 数据模型 + +### CSCategory(arXiv 分类缓存) + +```python +class CSCategory(Base): + code: str # "cs.CV" + name: str # "Computer Vision and Pattern Recognition" + description: str # "Covers image processing, computer vision, pattern recognition etc." + cached_at: datetime # 缓存时间,30 天后失效需刷新 +``` + +来源:启动时从 arXiv API `https://arxiv.org/api/categories` 动态拉取,存入 DB,30 天更新一次。 + +### CSFeedSubscription(用户订阅记录) + +```python +class CSFeedSubscription(Base): + id: UUID + category_code: str # "cs.CV" + user_id: str | None # 预留,多用户时区分;当前单用户可写死 + daily_limit: int # 每日配额(篇数) + enabled: bool # 是否启用 + status: str # "active" | "cool_down" | "paused" + cool_down_until: datetime # 熔断冷却截止时间 + last_run_at: datetime | None + last_run_count: int # 上次入库数量 + created_at: datetime +``` + +与 `TopicSubscription` 完全独立,不共用表。 + +--- + +## 调度服务 + +### CSFeedOrchestrator + +协调所有分类订阅的定时抓取,复用现有 Worker 的 APScheduler 调度框架。 + +#### 调度策略 + +- 触发频率:每小时整点执行一次(与主题订阅调度共用 Worker) +- 全局请求控制:每分钟最多 20 个 arXiv API 请求(token bucket 模式) +- 请求间隔:每个分类请求之间至少间隔 3 秒 +- 每日配额:用户设的 daily_limit 精确控制,精确到每个分类的每日已入库计数 + +#### 处理流程 + +``` +每小时触发 CSFeedOrchestrator.run() + ↓ +1. 加载所有 enabled=True 的 CSFeedSubscription + ↓ +2. 遍历每个订阅(按 category_code 字母序): + ┌─────────────────────────────────────────┐ + │ 检查 status == "cool_down" │ + │ → 当前时间 < cool_down_until → 跳过 │ + ├─────────────────────────────────────────┤ + │ 计算今日已入库数量 │ + │ → 今日已入库 >= daily_limit → 跳过 │ + ├─────────────────────────────────────────┤ + │ 检查全局 token bucket │ + │ → 桶已满 → 等到下一分钟或下一小时 │ + ├─────────────────────────────────────────┤ + │ 调用 ArxivClient.fetch_latest( │ + │ query=f"cat:{category_code}", │ + │ max_results=剩余配额, │ + │ days_back=7 │ + │ ) │ + ├─────────────────────────────────────────┤ + │ 成功 → upsert → 记录 CollectionAction │ + │ 触发 429 → status="cool_down", │ + │ cool_down_until=now+30min │ + │ 其他错误 → 记录日志,跳过该分类 │ + └─────────────────────────────────────────┘ + ↓ +3. 全部处理完 → 记录本次执行摘要日志 +``` + +#### 熔断机制 + +- 触发条件:arXiv 返回 429 Too Many Requests +- 冷却时间:30 分钟(cool_down_until = now + 30min) +- 恢复条件:冷却时间结束后自动恢复为 "active" + +--- + +## API 设计 + +### GET /cs-categories + +返回 arXiv CS 全部分类列表(供 UI 勾选用)。 + +``` +Response: +{ + "categories": [ + { + "code": "cs.CV", + "name": "Computer Vision and Pattern Recognition", + "description": "Covers image processing, computer vision, pattern recognition..." + }, + ... + ], + "updated_at": "2026-03-19T10:00:00Z" +} +``` + +### GET /cs-feeds + +返回当前用户的分类订阅列表。 + +``` +Response: +{ + "feeds": [ + { + "id": "uuid", + "category_code": "cs.CV", + "category_name": "Computer Vision and Pattern Recognition", + "daily_limit": 30, + "enabled": true, + "status": "active", + "last_run_at": "2026-03-19T08:00:00Z", + "last_run_count": 25 + } + ] +} +``` + +### POST /cs-feeds + +批量订阅分类。 + +``` +Body: +{ + "category_codes": ["cs.CV", "cs.LG"], + "daily_limit": 50, + "enabled": true +} + +Response: +{ + "created": 2, + "feeds": [...] +} +``` + +### DELETE /cs-feeds/{id} + +取消订阅。 + +### POST /cs-feeds/{id}/trigger + +手动触发一次抓取(立即执行,跳过定时)。 + +--- + +## 前端 UI + +### 页面结构 + +在 `/topics` 页面内新增 Tab: + +``` +[主题订阅] [分类订阅] +``` + +### 分类订阅 Tab 布局 + +``` +┌─────────────────────────────────────────────────────────┐ +│ arXiv CS 分类订阅 │ +│ ───────────────────────────────────────────────────── │ +│ 全局每日配额:[ 50 ] 篇 │ +│ │ +│ ┌─────────────────┐ ┌──────────────────────────────┐ │ +│ │ 分类列表(左侧) │ │ 已订阅列表(右侧) │ │ +│ │ │ │ │ │ +│ │ 🔍 搜索分类 │ │ cs.CV 30篇/天 [x] │ │ +│ │ │ │ cs.LG 50篇/天 [x] │ │ +│ │ ☑ cs.CV (CV) │ │ cs.CL 20篇/天 [x] │ │ +│ │ ☑ cs.LG (LG) │ │ │ │ +│ │ ☐ cs.CL (CL) │ │ 状态: 运行中 / 3个分类 │ │ +│ │ ☐ cs.AI (AI) │ │ │ │ +│ │ ☐ cs.RO (RO) │ │ [保存配置] │ │ +│ │ ... │ │ │ │ +│ └─────────────────┘ └──────────────────────────────┘ │ +└─────────────────────────────────────────────────────────┘ +``` + +- 左侧:可搜索的分类列表,带复选框 +- 右侧:已勾选的分类 + 各自配额(可编辑)+ 删除按钮 +- 底部:保存配置按钮 + +### 分类来源 + +调用 `GET /cs-categories` 获取分类列表,缓存 30 天。 + +--- + +## 文件变更 + +### 后端新增 + +| 文件 | 说明 | +|------|------| +| `packages/storage/models.py` | 新增 `CSCategory`、`CSFeedSubscription` 模型 | +| `packages/storage/repositories.py` | 新增 `CSFeedRepository` | +| `packages/integrations/arxiv_client.py` | 新增 `fetch_categories()` 方法 | +| `packages/ai/cs_feed_orchestrator.py` | 新增 `CSFeedOrchestrator` 调度服务 | +| `apps/api/routers/cs_feeds.py` | 新增分类订阅 REST API | +| `apps/worker/main.py` | 注册 `CSFeedOrchestrator` 调度任务 | + +### 后端修改 + +| 文件 | 说明 | +|------|------| +| `apps/api/main.py` | 注册 `cs_feeds` router | +| `packages/storage/repositories.py` | `BaseQuery` 可复用 | + +### 前端新增 + +| 文件 | 说明 | +|------|------| +| `frontend/src/pages/CSFeeds.tsx` | 分类订阅 Tab 页面 | +| `frontend/src/services/api.ts` | 新增 `csFeedApi` | + +### 前端修改 + +| 文件 | 说明 | +|------|------| +| `frontend/src/pages/Topics.tsx` | 新增 Tab 切换 + `` 组件 | +| `frontend/src/components/Sidebar.tsx` | 如需要可添加分类订阅入口 | + +--- + +## 实现顺序 + +1. **后端基础** — 数据模型 + 分类获取 + Repository +2. **后端 API** — REST 接口 +3. **调度服务** — Orchestrator(含限流 + 熔断) +4. **前端 UI** — Tab + 分类列表 + 订阅管理 +5. **集成测试** — 手动触发 + 观察限流行为 diff --git a/docs/plans/2026-03-19-cs-category-feed-implementation.md b/docs/plans/2026-03-19-cs-category-feed-implementation.md new file mode 100644 index 0000000..3715915 --- /dev/null +++ b/docs/plans/2026-03-19-cs-category-feed-implementation.md @@ -0,0 +1,719 @@ +# CS 分类订阅功能实施计划 + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** 新增独立的 arXiv CS 分类订阅系统,用户可订阅分类并控制每日配额,系统自动调度抓取 + +**Architecture:** 新增 `CSCategory` + `CSFeedSubscription` 数据模型,独立于 TopicSubscription;新增 `CSFeedOrchestrator` 调度服务管理限流和熔断;REST API 挂载在 `/cs-categories` 和 `/cs-feeds`;前端在 Topics 页面内新增 Tab + +**Tech Stack:** FastAPI + SQLite + APScheduler + React + +--- + +## Phase 1: 数据模型 + +### Task 1: 新增 CSCategory 和 CSFeedSubscription 模型 + +**Files:** +- Modify: `packages/storage/models.py` +- Reference: `packages/storage/models.py:189-209` (TopicSubscription 作为参考) + +**Step 1: 添加 CSCategory 模型** + +在 `models.py` 末尾添加: + +```python +class CSCategory(Base): + __tablename__ = "cs_categories" + + code: str = Field(primary_key) # "cs.CV" + name: str = Field(nullable=False) + description: str = Field(default="") + cached_at: datetime = Field(default_factory=datetime.utcnow) + + +class CSFeedSubscription(Base): + __tablename__ = "cs_feed_subscriptions" + + id: UUID = Field(default_factory=uuid4, primary_key=True) + category_code: str = Field(nullable=False) + daily_limit: int = Field(default=30) + enabled: bool = Field(default=True) + status: str = Field(default="active") # active | cool_down | paused + cool_down_until: datetime | None = Field(default=None) + last_run_at: datetime | None = Field(default=None) + last_run_count: int = Field(default=0) + created_at: datetime = Field(default_factory=datetime.utcnow) +``` + +**Step 2: 验证模型可导入** + +Run: `cd /Users/haojiang/Documents/2026/PaperMind && python -c "from packages.storage.models import CSCategory, CSFeedSubscription; print('OK')"` + +--- + +### Task 2: 新增 CSFeedRepository + +**Files:** +- Modify: `packages/storage/repositories.py` +- Reference: `packages/storage/repositories.py:964-1062` (TopicRepository 作为参考) + +**Step 1: 在 repositories.py 添加 CSFeedRepository** + +在文件末尾添加: + +```python +class CSFeedRepository: + def __init__(self, session: Session): + self.session = session + + def get_categories(self) -> list[CSCategory]: + return list(self.session.execute(select(CSCategory)).scalars()) + + def upsert_category(self, code: str, name: str, description: str = "") -> CSCategory: + existing = self.session.execute(select(CSCategory).where(CSCategory.code == code)).scalar_one_or_none() + if existing: + existing.name = name + existing.description = description + existing.cached_at = datetime.utcnow() + return existing + cat = CSCategory(code=code, name=name, description=description) + self.session.add(cat) + self.session.commit() + return cat + + def get_subscriptions(self) -> list[CSFeedSubscription]: + return list(self.session.execute(select(CSFeedSubscription)).scalars()) + + def get_subscription(self, category_code: str) -> CSFeedSubscription | None: + return self.session.execute( + select(CSFeedSubscription).where(CSFeedSubscription.category_code == category_code) + ).scalar_one_or_none() + + def upsert_subscription(self, category_code: str, daily_limit: int, enabled: bool = True) -> CSFeedSubscription: + existing = self.get_subscription(category_code) + if existing: + existing.daily_limit = daily_limit + existing.enabled = enabled + self.session.commit() + return existing + sub = CSFeedSubscription(category_code=category_code, daily_limit=daily_limit, enabled=enabled) + self.session.add(sub) + self.session.commit() + return sub + + def delete_subscription(self, category_code: str) -> bool: + sub = self.get_subscription(category_code) + if sub: + self.session.delete(sub) + self.session.commit() + return True + return False + + def update_run_status(self, category_code: str, count: int): + sub = self.get_subscription(category_code) + if sub: + sub.last_run_at = datetime.utcnow() + sub.last_run_count = count + sub.status = "active" + self.session.commit() + + def set_cool_down(self, category_code: str, until: datetime): + sub = self.get_subscription(category_code) + if sub: + sub.status = "cool_down" + sub.cool_down_until = until + self.session.commit() + + def get_active_subscriptions(self) -> list[CSFeedSubscription]: + return list(self.session.execute( + select(CSFeedSubscription).where(CSFeedSubscription.enabled == True) + ).scalars()) +``` + +**Step 2: 验证 repository 可导入** + +Run: `cd /Users/haojiang/Documents/2026/PaperMind && python -c "from packages.storage.repositories import CSFeedRepository; print('OK')"` + +--- + +## Phase 2: arXiv 分类获取 + +### Task 3: arxiv_client 新增 fetch_categories + +**Files:** +- Modify: `packages/integrations/arxiv_client.py` +- Reference: `packages/integrations/arxiv_client.py:58-117` (fetch_latest 作为参考) + +**Step 1: 添加 fetch_categories 方法** + +在 `ArxivClient` 类中添加: + +```python +def fetch_categories(self) -> list[dict]: + """从 arXiv API 获取 CS 分类列表""" + url = "https://arxiv.org/api/categories" + acquire_api("arxiv", timeout=30) + response = self.client.get(url, timeout=30) + response.raise_for_status() + root = ElementTree.fromstring(response.text) + categories = [] + for cat in root.findall("category"): + code = cat.find("code").text or "" + if code.startswith("cs."): + categories.append({ + "code": code, + "name": cat.find("name").text or "", + "description": cat.find("description").text or "", + }) + return categories +``` + +**Step 2: 验证方法可调用** + +Run: `cd /Users/haojiang/Documents/2026/PaperMind && python -c "from packages.integrations.arxiv_client import ArxivClient; print(ArxivClient().fetch_categories()[:3])"` + +--- + +## Phase 3: REST API + +### Task 4: 新增 cs_feeds router + +**Files:** +- Create: `apps/api/routers/cs_feeds.py` +- Reference: `apps/api/routers/topics.py` (作为 API 风格参考) + +**Step 1: 编写 API 路由** + +```python +from fastapi import APIRouter, Depends +from pydantic import BaseModel +from packages.storage.repositories import CSFeedRepository +from packages.integrations.arxiv_client import ArxivClient + +router = APIRouter(prefix="/cs", tags=["cs-feeds"]) + + +class CategoryInfo(BaseModel): + code: str + name: str + description: str + + +class CSFeedItem(BaseModel): + category_code: str + category_name: str + daily_limit: int + enabled: bool + status: str + last_run_at: str | None + last_run_count: int + + +class SubscribeRequest(BaseModel): + category_codes: list[str] + daily_limit: int = 30 + enabled: bool = True + + +def get_repo(): + from packages.storage.database import SessionLocal + session = SessionLocal() + try: + yield CSFeedRepository(session) + finally: + session.close() + + +@router.get("/categories") +def list_categories(repo: CSFeedRepository = Depends(get_repo)): + categories = repo.get_categories() + return {"categories": [CategoryInfo.model_validate(c).__dict__ for c in categories]} + + +@router.get("/feeds") +def list_feeds(repo: CSFeedRepository = Depends(get_repo)): + feeds = repo.get_subscriptions() + categories = {c.code: c.name for c in repo.get_categories()} + return { + "feeds": [ + { + "category_code": f.category_code, + "category_name": categories.get(f.category_code, f.category_code), + "daily_limit": f.daily_limit, + "enabled": f.enabled, + "status": f.status, + "last_run_at": f.last_run_at.isoformat() if f.last_run_at else None, + "last_run_count": f.last_run_count, + } + for f in feeds + ] + } + + +@router.post("/feeds") +def subscribe(req: SubscribeRequest, repo: CSFeedRepository = Depends(get_repo)): + created = [] + for code in req.category_codes: + sub = repo.upsert_subscription(code, req.daily_limit, req.enabled) + created.append({ + "category_code": sub.category_code, + "daily_limit": sub.daily_limit, + "enabled": sub.enabled, + }) + return {"created": len(created), "feeds": created} + + +@router.delete("/feeds/{category_code}") +def unsubscribe(category_code: str, repo: CSFeedRepository = Depends(get_repo)): + deleted = repo.delete_subscription(category_code) + return {"deleted": deleted} +``` + +**Step 2: 注册 router** + +Modify `apps/api/main.py`,在 `app.include_router(topics.router)` 后添加: + +```python +from apps.api.routers import cs_feeds +app.include_router(cs_feeds.router) +``` + +--- + +## Phase 4: 调度服务 + +### Task 5: 新增 CSFeedOrchestrator + +**Files:** +- Create: `packages/ai/cs_feed_orchestrator.py` +- Reference: `packages/ai/daily_runner.py:119-287` (run_topic_ingest 作为参考) + +**Step 1: 编写 Orchestrator** + +```python +from datetime import datetime, timedelta +import threading +import time +import logging + +from packages.integrations.arxiv_client import ArxivClient +from packages.storage.database import SessionLocal +from packages.storage.repositories import CSFeedRepository + +logger = logging.getLogger(__name__) + +TOKEN_BUCKET_SIZE = 20 # 每分钟最多 20 请求 +TOKEN_FILL_RATE = 20 # 每分钟补充 20 个 token +REQUEST_INTERVAL = 3 # 每请求间隔 3 秒 +COOL_DOWN_MINUTES = 30 # 熔断冷却时间 + + +class TokenBucket: + def __init__(self, size: int, fill_rate: int): + self.size = size + self.tokens = size + self.fill_rate = fill_rate + self.last_refill = time.time() + self.lock = threading.Lock() + + def acquire(self, timeout: float = 60) -> bool: + while True: + with self.lock: + self._refill() + if self.tokens >= 1: + self.tokens -= 1 + return True + if time.time() - self.last_refill > timeout: + return False + time.sleep(1) + + def _refill(self): + now = time.time() + elapsed = now - self.last_refill + new_tokens = elapsed * (self.fill_rate / 60) + self.tokens = min(self.size, self.tokens + new_tokens) + self.last_refill = now + + +class CSFeedOrchestrator: + def __init__(self): + self.bucket = TokenBucket(TOKEN_BUCKET_SIZE, TOKEN_FILL_RATE) + + def sync_categories(self): + """从 arXiv 拉取分类并写入 DB""" + client = ArxivClient() + cats = client.fetch_categories() + repo = CSFeedRepository(SessionLocal()) + for c in cats: + repo.upsert_category(c["code"], c["name"], c.get("description", "")) + logger.info("[CSFeed] Synced %d categories", len(cats)) + + def run(self): + """每小时执行一次""" + repo = CSFeedRepository(SessionLocal()) + subs = repo.get_active_subscriptions() + + for sub in subs: + now = datetime.utcnow() + + # 冷却中检查 + if sub.status == "cool_down" and sub.cool_down_until: + if now < sub.cool_down_until: + logger.info("[CSFeed] Skipping %s (cool down until %s)", sub.category_code, sub.cool_down_until) + continue + + # 每日配额检查 + today_start = now.replace(hour=0, minute=0, second=0, microsecond=0) + if sub.last_run_at and sub.last_run_at >= today_start: + remaining = sub.daily_limit - sub.last_run_count + else: + remaining = sub.daily_limit + + if remaining <= 0: + logger.info("[CSFeed] Skipping %s (daily limit reached)", sub.category_code) + continue + + # 请求间隔 + if not self.bucket.acquire(timeout=30): + logger.warning("[CSFeed] Token bucket timeout, skipping %s", sub.category_code) + continue + time.sleep(REQUEST_INTERVAL) + + # 抓取 + try: + client = ArxivClient() + papers = client.fetch_latest( + query=f"cat:{sub.category_code}", + max_results=remaining, + days_back=7, + ) + # upsert papers (复用现有逻辑) + from packages.storage.repositories import PaperRepository + paper_repo = PaperRepository(SessionLocal()) + count = 0 + for p in papers: + paper_repo.upsert_paper(p) + count += 1 + + repo.update_run_status(sub.category_code, count) + logger.info("[CSFeed] %s: ingested %d papers", sub.category_code, count) + + except Exception as e: + err_str = str(e) + if "429" in err_str or "Too Many Requests" in err_str: + repo.set_cool_down(sub.category_code, now + timedelta(minutes=COOL_DOWN_MINUTES)) + logger.warning("[CSFeed] Rate limited %s, cool down 30min", sub.category_code) + else: + logger.error("[CSFeed] Error fetching %s: %s", sub.category_code, e) +``` + +**Step 2: 注册定时任务** + +Modify `apps/worker/main.py`,在 `topic_dispatch_job` 旁边添加: + +```python +from packages.ai.cs_feed_orchestrator import CSFeedOrchestrator + +cs_orchestrator = CSFeedOrchestrator() + +def cs_feed_dispatch_job(): + """每小时同步分类 + 执行订阅抓取""" + cs_orchestrator.sync_categories() + cs_orchestrator.run() +``` + +并在 APScheduler 注册: + +```python +scheduler.add_job(cs_feed_dispatch_job, "cron", minute=0, id="cs_feed_dispatch") +``` + +--- + +## Phase 5: 前端 UI + +### Task 6: 新增 CSFeeds 前端页面 + +**Files:** +- Create: `frontend/src/pages/CSFeeds.tsx` +- Reference: `frontend/src/pages/Topics.tsx` (作为 UI 风格参考) + +**Step 1: 编写 CSFeeds 页面组件** + +```tsx +import { useEffect, useState } from "react"; +import { Loader2, RefreshCw, CheckCircle2, XCircle, Search } from "lucide-react"; +import { topicApi } from "@/services/api"; + +interface CSCategory { + code: string; + name: string; + description: string; +} + +interface CSFeed { + category_code: string; + category_name: string; + daily_limit: number; + enabled: boolean; + status: string; + last_run_at: string | null; + last_run_count: number; +} + +const ARXIV_CS_PREFIXES = [ + "cs.CV", "cs.LG", "cs.CL", "cs.AI", "cs.NE", "cs.CR", "cs.DB", + "cs.DC", "cs.DL", "cs.DM", "cs.DS", "cs.ET", "cs.FL", "cs.GL", + "cs.GR", "cs.GT", "cs.HC", "cs.IR", "cs.IT", "cs.LO", "cs.MA", + "cs.MM", "cs.MS", "cs.NA", "cs.NI", "cs.OH", "cs.OS", "cs.PL", + "cs.RO", "cs.SC", "cs.SD", "cs.SE", "cs.SI", "cs.SY", +]; + +export default function CSFeeds() { + const [categories, setCategories] = useState([]); + const [feeds, setFeeds] = useState([]); + const [loading, setLoading] = useState(true); + const [search, setSearch] = useState(""); + const [globalLimit, setGlobalLimit] = useState(30); + const [saving, setSaving] = useState(false); + + async function loadData() { + setLoading(true); + try { + const [catRes, feedRes] = await Promise.all([ + topicApi.csCategories(), + topicApi.csFeeds(), + ]); + setCategories(catRes.categories || []); + setFeeds(feedRes.feeds || []); + } finally { + setLoading(false); + } + } + + useEffect(() => { loadData(); }, []); + + const subscribedCodes = new Set(feeds.map(f => f.category_code)); + const filtered = categories.filter(c => + c.code.toLowerCase().includes(search.toLowerCase()) || + c.name.toLowerCase().includes(search.toLowerCase()) + ); + + async function toggleCategory(code: string) { + if (subscribedCodes.has(code)) { + await topicApi.csFeedDelete(code); + } else { + await topicApi.csFeedCreate({ category_codes: [code], daily_limit: globalLimit }); + } + await loadData(); + } + + if (loading) { + return ( +
+ +
+ ); + } + + return ( +
+
+

arXiv CS 分类订阅

+

订阅 CS 细分领域,自动抓取最新论文

+
+ +
+ + setGlobalLimit(Number(e.target.value))} + className="w-20 rounded-lg border bg-background px-3 py-1.5 text-sm" + min={1} + max={200} + /> + 篇/分类/天 + +
+ +
+
+
+ + setSearch(e.target.value)} + className="flex-1 rounded-lg border bg-background px-3 py-1.5 text-sm" + /> +
+
+ {filtered.slice(0, 40).map(c => ( + + ))} +
+
+ +
+

已订阅 ({feeds.length} 个分类)

+ {feeds.length === 0 ? ( +

左侧勾选要订阅的分类

+ ) : ( +
+ {feeds.map(f => ( +
+
+
+ {f.category_code} + + {f.status === "active" ? "运行中" : f.status === "cool_down" ? "冷却中" : "已暂停"} + +
+
+ 配额 {f.daily_limit}/天 · 上次入库 {f.last_run_count} 篇 + {f.last_run_at && ` · ${new Date(f.last_run_at).toLocaleDateString()}`} +
+
+ +
+ ))} +
+ )} +
+
+
+ ); +} +``` + +**Step 2: 添加 API 方法** + +Modify `frontend/src/services/api.ts`,添加: + +```ts +csCategories(): Promise<{ categories: CSCategory[] }> +csFeeds(): Promise<{ feeds: CSFeed[] }> +csFeedCreate(req: { category_codes: string[]; daily_limit: number }): Promise +csFeedDelete(categoryCode: string): Promise +``` + +--- + +### Task 7: Topics 页面添加 Tab 切换 + +**Files:** +- Modify: `frontend/src/pages/Topics.tsx` + +**Step 1: 在 Topics.tsx 顶部添加 Tab 状态** + +```tsx +const [activeTab, setActiveTab] = useState<"topics" | "cs-feeds">("topics"); +``` + +**Step 2: 在 Topics 组件 return 的顶部添加 Tab 栏** + +在 `
` 之前添加: + +```tsx +
+ + +
+``` + +**Step 3: 根据 Tab 条件渲染** + +把 `return` 的主体用条件包裹: + +```tsx +if (activeTab === "cs-feeds") { + return ; +} + +return ( +
+ {/* 原有的 Topics 页面内容 */} +
+); +``` + +**Step 4: 导入 CSFeeds** + +在文件顶部添加: + +```tsx +import CSFeeds from "./CSFeeds"; +``` + +--- + +## Phase 6: 数据库迁移 + +### Task 8: 生成并执行数据库迁移 + +**Step 1: 生成迁移** + +Run: `cd /Users/haojiang/Documents/2026/PaperMind/infra && alembic revision --autogenerate -m "add cs_categories and cs_feed_subscriptions"` + +**Step 2: 执行迁移** + +Run: `cd /Users/haojiang/Documents/2026/PaperMind/infra && alembic upgrade head` + +--- + +## 验证清单 + +- [ ] `GET /cs/categories` 返回 CS 分类列表 +- [ ] `POST /cs/feeds` 可订阅分类 +- [ ] `GET /cs/feeds` 可查看已订阅列表 +- [ ] `DELETE /cs/feeds/{code}` 可取消订阅 +- [ ] Topics 页面 Tab 切换正常 +- [ ] 分类订阅 Tab 显示分类列表和已订阅状态 +- [ ] 勾选分类后 `topics.csFeedCreate` API 被调用 +- [ ] Worker 每小时整点触发 `cs_feed_dispatch_job` +- [ ] 触发 429 后分类进入 30 分钟冷却 +- [ ] 每日配额控制生效 diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 3f74040..d9bc2dd 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -25,6 +25,7 @@ const Pipelines = lazy(() => import("@/pages/Pipelines")); const Operations = lazy(() => import("@/pages/Operations")); const EmailSettings = lazy(() => import("@/pages/EmailSettings")); const Writing = lazy(() => import("@/pages/Writing")); +const Statistics = lazy(() => import("@/pages/Statistics")); import LoginPage from "@/pages/Login"; import { isAuthenticated as checkAuth, clearAuth } from "@/services/api"; @@ -110,6 +111,7 @@ export default function App() { }>} /> }>} /> }>} /> + }>} /> {/* 常见拼写重定向 */} } /> diff --git a/frontend/src/components/SettingsDialog.tsx b/frontend/src/components/SettingsDialog.tsx index c8a09dc..ea8d3da 100644 --- a/frontend/src/components/SettingsDialog.tsx +++ b/frontend/src/components/SettingsDialog.tsx @@ -1074,20 +1074,26 @@ function EmailTab() { disabled={submitting} className="w-full rounded border border-border bg-surface px-2 py-1.5 text-xs text-ink placeholder:text-ink-placeholder" /> -
- + {/* Cron 表达式配置 */} +
+ handleInputChange("report_time_utc", parseInt(e.target.value) || 21)} - onBlur={() => handleInputBlur("report_time_utc")} + type="text" + placeholder="0 4 * * *" + value={localConfig?.cron_expression ?? dailyReportConfig.cron_expression ?? "0 4 * * *"} + onChange={(e) => handleInputChange("cron_expression", e.target.value)} + onBlur={() => handleInputBlur("cron_expression")} disabled={submitting} - className="w-16 rounded border border-border bg-surface px-2 py-0.5 text-center text-xs text-ink" + className="w-full rounded border border-border bg-surface px-2 py-1.5 text-xs font-mono text-ink placeholder:text-ink-placeholder" /> - (北京时间 = UTC + 8) +

+ 默认:0 4 * * *(UTC 4 点 = 北京时间 12 点) +
+ 格式:分 时 日 月 周 +

+ {/* 旧的 report_time_utc 保留但隐藏,向后兼容 */} +
)}
diff --git a/frontend/src/components/Sidebar.tsx b/frontend/src/components/Sidebar.tsx index 58d0563..289d333 100644 --- a/frontend/src/components/Sidebar.tsx +++ b/frontend/src/components/Sidebar.tsx @@ -28,6 +28,7 @@ import { Search, Menu, X, + BarChart3, PenTool, Loader2, LogOut, @@ -43,6 +44,7 @@ const TOOLS = [ { to: "/wiki", icon: BookOpen, label: "Wiki", accent: false }, { to: "/brief", icon: Newspaper, label: "研究简报", accent: false }, { to: "/dashboard", icon: LayoutDashboard, label: "看板", accent: false }, + { to: "/statistics", icon: BarChart3, label: "主题统计", accent: false }, ]; function useDarkMode() { diff --git a/frontend/src/pages/Agent.tsx b/frontend/src/pages/Agent.tsx index 323b2e3..41479e5 100644 --- a/frontend/src/pages/Agent.tsx +++ b/frontend/src/pages/Agent.tsx @@ -53,20 +53,61 @@ interface Ability { const ABILITIES: Ability[] = [ { icon: Search, label: "搜索论文", prefix: "帮我搜索关于 ", placeholder: "输入搜索关键词..." }, - { icon: Download, label: "下载入库", prefix: "从 arXiv 下载关于 ", placeholder: "输入主题关键词..." }, + { + icon: Download, + label: "下载入库", + prefix: "从 arXiv 下载关于 ", + placeholder: "输入主题关键词...", + }, { icon: Brain, label: "知识问答", prefix: "基于知识库回答:", placeholder: "输入你的问题..." }, - { icon: FileText, label: "生成 Wiki", prefix: "帮我生成一篇关于 ", placeholder: "输入 Wiki 主题..." }, - { icon: Newspaper, label: "生成简报", prefix: "帮我生成今日的研究简报", placeholder: "", direct: true }, + { + icon: FileText, + label: "生成 Wiki", + prefix: "帮我生成一篇关于 ", + placeholder: "输入 Wiki 主题...", + }, + { + icon: Newspaper, + label: "生成简报", + prefix: "帮我生成今日的研究简报", + placeholder: "", + direct: true, + }, ]; /* ========== 快捷建议(空状态卡片) ========== */ const SUGGESTIONS = [ - { icon: Search, label: "搜索调研", desc: "搜索特定领域论文", prompt: "帮我搜索关于 3D Gaussian Splatting 的最新论文" }, - { icon: Download, label: "下载论文", desc: "从 arXiv 获取并分析", prompt: "从 arXiv 下载最新的大语言模型相关论文,然后帮我粗读分析" }, - { icon: BookOpen, label: "论文分析", desc: "粗读/精读已有论文", prompt: "帮我分析库中最近的论文,先粗读再挑选重要的精读" }, - { icon: Brain, label: "知识问答", desc: "基于知识库回答", prompt: "基于知识库回答:什么是 attention mechanism?有哪些变体?" }, - { icon: FileText, label: "生成 Wiki", desc: "生成主题综述", prompt: "帮我生成一篇关于 Neural Radiance Fields 的 Wiki 综述" }, + { + icon: Search, + label: "搜索调研", + desc: "搜索特定领域论文", + prompt: "帮我搜索关于 3D Gaussian Splatting 的最新论文", + }, + { + icon: Download, + label: "下载论文", + desc: "从 arXiv 获取并分析", + prompt: "从 arXiv 下载最新的大语言模型相关论文,然后帮我粗读分析", + }, + { + icon: BookOpen, + label: "论文分析", + desc: "粗读/精读已有论文", + prompt: "帮我分析库中最近的论文,先粗读再挑选重要的精读", + }, + { + icon: Brain, + label: "知识问答", + desc: "基于知识库回答", + prompt: "基于知识库回答:什么是 attention mechanism?有哪些变体?", + }, + { + icon: FileText, + label: "生成 Wiki", + desc: "生成主题综述", + prompt: "帮我生成一篇关于 Neural Radiance Fields 的 Wiki 综述", + }, { icon: Newspaper, label: "生成简报", desc: "生成研究日报", prompt: "帮我生成今日的研究简报" }, ]; @@ -100,8 +141,17 @@ function getToolMeta(name: string) { export default function Agent() { const navigate = useNavigate(); const { - items, loading, pendingActions, confirmingActions, canvas, - hasPendingConfirm, setCanvas, sendMessage, handleConfirm, handleReject, stopGeneration, + items, + loading, + pendingActions, + confirmingActions, + canvas, + hasPendingConfirm, + setCanvas, + sendMessage, + handleConfirm, + handleReject, + stopGeneration, } = useAgentSession(); const [input, setInput] = useState(""); @@ -146,33 +196,42 @@ export default function Agent() { const inputDisabled = loading || hasPendingConfirm; - const handleAbilityClick = useCallback((ability: Ability) => { - if (ability.direct) { + const handleAbilityClick = useCallback( + (ability: Ability) => { + if (ability.direct) { + isAtBottomRef.current = true; + sendMessage(ability.prefix).catch(() => {}); + return; + } + setActiveAbility(ability); + setInput(ability.prefix); + requestAnimationFrame(() => textareaRef.current?.focus()); + }, + [sendMessage] + ); + + const handleSend = useCallback( + async (text: string) => { + const savedInput = text; isAtBottomRef.current = true; - sendMessage(ability.prefix).catch(() => {}); - return; - } - setActiveAbility(ability); - setInput(ability.prefix); - requestAnimationFrame(() => textareaRef.current?.focus()); - }, [sendMessage]); - - const handleSend = useCallback(async (text: string) => { - const savedInput = text; - isAtBottomRef.current = true; - setInput(""); - setActiveAbility(null); - try { - await sendMessage(text); - } catch { - setInput(savedInput); - } - }, [sendMessage]); + setInput(""); + setActiveAbility(null); + try { + await sendMessage(text); + } catch { + setInput(savedInput); + } + }, + [sendMessage] + ); - const handleConfirmAction = useCallback((actionId: string) => { - isAtBottomRef.current = true; - handleConfirm(actionId); - }, [handleConfirm]); + const handleConfirmAction = useCallback( + (actionId: string) => { + isAtBottomRef.current = true; + handleConfirm(actionId); + }, + [handleConfirm] + ); const handleKeyDown = (e: React.KeyboardEvent) => { if (e.key === "Enter" && !e.shiftKey) { @@ -190,20 +249,27 @@ export default function Agent() {
{/* 主对话区域 */}
-
+
{items.length === 0 ? ( handleSend(p)} /> ) : (
{items.map((item, idx) => { - const retryFn = item.type === "error" ? (() => { - for (let i = idx - 1; i >= 0; i--) { - if (items[i].type === "user") { - handleSend(items[i].content); - return; - } - } - }) : undefined; + const retryFn = + item.type === "error" + ? () => { + for (let i = idx - 1; i >= 0; i--) { + if (items[i].type === "user") { + handleSend(items[i].content); + return; + } + } + } + : undefined; return ( setCanvas({ title, markdown: content, isHtml })} + onOpenArtifact={(title, content, isHtml) => + setCanvas({ title, markdown: content, isHtml }) + } onRetry={retryFn} /> ); })} {loading && items[items.length - 1]?.type !== "action_confirm" && ( -
+
- - - + + +
)} @@ -237,7 +305,7 @@ export default function Agent() { isAtBottomRef.current = true; endRef.current?.scrollIntoView({ behavior: "smooth" }); }} - className="absolute bottom-4 left-1/2 z-10 flex -translate-x-1/2 items-center gap-1.5 rounded-full border border-border bg-surface px-3 py-1.5 text-xs font-medium text-ink-secondary shadow-lg transition-all hover:bg-hover hover:text-ink" + className="border-border bg-surface text-ink-secondary hover:bg-hover hover:text-ink absolute bottom-4 left-1/2 z-10 flex -translate-x-1/2 items-center gap-1.5 rounded-full border px-3 py-1.5 text-xs font-medium shadow-lg transition-all" > 回到底部 @@ -246,10 +314,10 @@ export default function Agent() {
{/* 输入区域 */} -
+
{hasPendingConfirm && ( -
+
请先处理上方的确认请求,再继续对话
@@ -263,14 +331,16 @@ export default function Agent() { return (