diff --git a/.env.sample b/.env.sample new file mode 100644 index 0000000..399ba8f --- /dev/null +++ b/.env.sample @@ -0,0 +1,89 @@ +# CodeDog 环境变量示例文件 +# 复制此文件为 .env 并填入您的实际配置值 + +# ===== 平台配置 ===== +# 选择一个平台: GitHub 或 GitLab + +# GitHub 配置 +GITHUB_TOKEN="your_github_personal_access_token" + +# GitLab 配置 +# 如果使用 GitLab 而不是 GitHub +# GITLAB_TOKEN="your_gitlab_personal_access_token" +# 对于自托管实例,修改为您的 GitLab URL +# GITLAB_URL="https://gitlab.com" + +# ===== LLM 配置 ===== +# 选择一种配置方式: OpenAI, Azure OpenAI, DeepSeek 或 MindConnect + +# OpenAI 配置 +# 标准 OpenAI API +OPENAI_API_KEY="your_openai_api_key" + +# Azure OpenAI 配置 +# 如果使用 Azure 的 OpenAI 服务 +# AZURE_OPENAI="true" +# AZURE_OPENAI_API_KEY="your_azure_openai_api_key" +# AZURE_OPENAI_API_BASE="https://your-instance.openai.azure.com/" +# 可选,默认会使用一个较新的版本 +# AZURE_OPENAI_API_VERSION="2023-05-15" +# 用于代码摘要和评审的 GPT-3.5 部署 +# AZURE_OPENAI_DEPLOYMENT_ID="your_gpt35_deployment_name" +# 用于 PR 摘要的 GPT-4 部署 +# AZURE_OPENAI_GPT4_DEPLOYMENT_ID="your_gpt4_deployment_name" + +# DeepSeek 配置 +# 如果使用 DeepSeek 模型 +# DEEPSEEK_API_KEY="your_deepseek_api_key" +# DeepSeek 模型名称 +DEEPSEEK_MODEL="deepseek-chat" +# DeepSeek API 基础 URL +DEEPSEEK_API_BASE="https://api.deepseek.com" +# DeepSeek 温度参数 +DEEPSEEK_TEMPERATURE="0" +# DeepSeek 最大token数 +DEEPSEEK_MAX_TOKENS="4096" +# DeepSeek top_p参数 +DEEPSEEK_TOP_P="0.95" +# DeepSeek 超时时间(秒) +DEEPSEEK_TIMEOUT="60" +# DeepSeek R1 特定配置 +DEEPSEEK_R1_API_BASE="https://api.deepseek.com" +DEEPSEEK_R1_MODEL="deepseek-reasoner" + +# ===== 模型选择配置 ===== +# 可选值: "gpt-3.5", "gpt-4o", "deepseek" +CODE_SUMMARY_MODEL="gpt-3.5" +PR_SUMMARY_MODEL="gpt-3.5" +CODE_REVIEW_MODEL="gpt-3.5" + +# ===== 电子邮件通知配置 ===== +# 启用电子邮件通知 +EMAIL_ENABLED="false" +# 接收通知的邮箱,多个邮箱用逗号分隔 +NOTIFICATION_EMAILS="your_email@example.com" + +# SMTP 服务器配置 +# 用于发送电子邮件通知 +# Gmail SMTP 配置说明: +# 1. 必须在 Google 账户开启两步验证: https://myaccount.google.com/security +# 2. 创建应用专用密码: https://myaccount.google.com/apppasswords +# 3. 使用应用专用密码而非您的常规Gmail密码 +# Gmail SMTP 服务器地址 +SMTP_SERVER="smtp.gmail.com" +# Gmail SMTP 服务器端口 +SMTP_PORT="587" +# 发送邮件的 Gmail 账户 +SMTP_USERNAME="your_email@gmail.com" +# SMTP_PASSWORD 应该是应用专用密码,不是您的 Gmail 登录密码 +SMTP_PASSWORD="your_app_specific_password" + +# ===== 开发者评价配置 ===== +# 默认包含的文件类型 +DEV_EVAL_DEFAULT_INCLUDE=".py,.js,.java,.ts,.tsx,.jsx,.c,.cpp,.h,.hpp" +# 默认排除的文件类型 +DEV_EVAL_DEFAULT_EXCLUDE=".md,.txt,.json,.lock,.gitignore" + +# ===== 其他可选配置 ===== +# 日志级别,可以是 DEBUG, INFO, WARNING, ERROR +LOG_LEVEL="INFO" diff --git a/README.md b/README.md index a302245..af0456e 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Codedog leverages Large Language Models (LLMs) like GPT to automatically review * **Platform Support**: Works with GitHub and GitLab. * **Automated Code Review**: Uses LLMs to analyze code changes, provide feedback, and suggest improvements * **Scoring System**: Evaluates code across multiple dimensions, including correctness, readability, and maintainability -* **Multiple LLM Support**: Works with OpenAI (including GPT-4o), Azure OpenAI, DeepSeek, and MindConnect R1 models (see [Models Guide](docs/models.md)) +* **Multiple LLM Support**: Works with OpenAI (including GPT-4o), Azure OpenAI, DeepSeek, and DeepSeek R1 models (see [Models Guide](docs/models.md)) * **Email Notifications**: Sends code review reports via email (see [Email Setup Guide](docs/email_setup.md)) * **Commit-Triggered Reviews**: Automatically reviews code when commits are made (see [Commit Review Guide](docs/commit_review.md)) * **Developer Evaluation**: Evaluates a developer's code over a specific time period @@ -108,9 +108,6 @@ OPENAI_API_KEY="sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" # DEEPSEEK_MODEL="deepseek-r1" # DEEPSEEK_R1_API_BASE="https://your-r1-endpoint" -# LLM (MindConnect R1 example) -# MINDCONNECT_API_KEY="your_mindconnect_api_key" - # Model selection (optional) CODE_SUMMARY_MODEL="gpt-3.5" PR_SUMMARY_MODEL="gpt-4" @@ -141,7 +138,14 @@ The `README.md` in the project root (and `codedog/__init__.py`) contains a quick 4. **Run the Script**: Execute the script within the Poetry environment: ```bash - poetry run python run_codedog.py + # For GitHub PR review + poetry run python run_codedog.py pr "owner/repo" 123 + + # For GitLab MR review + poetry run python run_codedog.py pr "owner/repo" 123 --platform gitlab + + # For GitLab MR review with custom GitLab instance + poetry run python run_codedog.py pr "owner/repo" 123 --platform gitlab --gitlab-url "https://your.gitlab.instance.com" ``` This will: @@ -151,6 +155,30 @@ This will: * Use the configured LLM to generate code review suggestions. * Print a formatted Markdown report to the console. +## GitLab Integration + +Codedog fully supports GitLab integration for reviewing merge requests. To use GitLab integration: + +1. **Set up GitLab Token**: Generate a personal access token with `api` scope from your GitLab account settings. + +2. **Configure Environment Variables**: Add the following to your `.env` file: + ``` + GITLAB_TOKEN="your_gitlab_personal_access_token" + GITLAB_URL="https://gitlab.com" # Or your self-hosted GitLab URL + ``` + +3. **Run GitLab MR Review**: Use the following command to review a GitLab merge request: + ```bash + python run_codedog.py pr "owner/repo" 123 --platform gitlab + ``` + + Replace `owner/repo` with your GitLab project path and `123` with your merge request IID. + +4. **Self-hosted GitLab**: If you're using a self-hosted GitLab instance, specify the URL: + ```bash + python run_codedog.py pr "owner/repo" 123 --platform gitlab --gitlab-url "https://your.gitlab.instance.com" + ``` + ## Running Tests To ensure the package is working correctly after setup or changes: diff --git a/UPDATES.md b/UPDATES.md index d88a94b..6ec690f 100644 --- a/UPDATES.md +++ b/UPDATES.md @@ -54,9 +54,16 @@ python run_codedog.py eval "开发者名称" --start-date YYYY-MM-DD --end-date YYYY-MM-DD ``` -2. **审查PR**: +2. **审查PR/MR**: ```bash + # GitHub PR审查 python run_codedog.py pr "仓库名称" PR编号 + + # GitLab MR审查 + python run_codedog.py pr "仓库名称" MR编号 --platform gitlab + + # 自托管GitLab实例 + python run_codedog.py pr "仓库名称" MR编号 --platform gitlab --gitlab-url "https://your.gitlab.instance.com" ``` 3. **设置Git钩子**: @@ -74,4 +81,4 @@ 1. 实现更好的文本分块和处理,以处理大型代码差异 2. 针对不同文件类型的更专业评分标准 3. 进一步改进报告呈现,添加可视化图表 -4. 与CI/CD系统的更深入集成 \ No newline at end of file +4. 与CI/CD系统的更深入集成 \ No newline at end of file diff --git a/codedog/templates/optimized_code_review_prompt.py b/codedog/templates/optimized_code_review_prompt.py new file mode 100644 index 0000000..29c5260 --- /dev/null +++ b/codedog/templates/optimized_code_review_prompt.py @@ -0,0 +1,310 @@ +""" +Optimized code review prompts based on high-star GitHub projects. +This file contains improved prompts for code review that follow best practices +from popular open source projects like code-review-gpt and sweep. +""" + +# System prompt for code review +SYSTEM_PROMPT = """You are CodeDog, an expert code reviewer with deep knowledge of software engineering principles, design patterns, and best practices across multiple programming languages. + +Your task is to provide a comprehensive, objective, and actionable code review that helps developers improve their code quality, maintainability, and performance. + +You have the following capabilities: +1. Deep understanding of multiple programming languages and their ecosystems +2. Recognition of code patterns, anti-patterns, and best practices +3. Security vulnerability detection and mitigation recommendations +4. Performance optimization identification +5. Code style and consistency checking + +You will analyze code changes and provide a detailed evaluation with specific scores based on the following dimensions: +- Readability: Code clarity, naming conventions, and overall comprehensibility +- Efficiency & Performance: Algorithm efficiency, resource utilization, and optimization opportunities +- Security: Vulnerability prevention, input validation, and secure coding practices +- Structure & Design: Architecture, modularity, and adherence to design principles +- Error Handling: Exception management, edge cases, and failure recovery +- Documentation & Comments: Code documentation quality and completeness +- Code Style: Adherence to language-specific conventions and formatting standards + +For each dimension, you will provide a score from 1 to 10, where: +- 1-3: Poor, significant issues present +- 4-6: Acceptable, but with notable improvement opportunities +- 7-10: Excellent, follows best practices + +You will also calculate an overall score as the weighted average of all dimensions. +""" + +# User prompt for code review +CODE_REVIEW_PROMPT = """# Code Review Request + +## File Information +- **File Name**: {file_name} +- **Language**: {language} + +## Code to Review +```{language} +{code_content} +``` + +## Instructions + +Please conduct a comprehensive code review following these steps: + +1. **Initial Analysis**: Begin with a brief overview of the code's purpose and functionality. + +2. **Detailed Evaluation**: Analyze the code across these key dimensions: + + a. **Readability** (1-10): + - Variable and function naming clarity + - Code organization and structure + - Consistent formatting and indentation + - Appropriate use of comments + + b. **Efficiency & Performance** (1-10): + - Algorithm efficiency and complexity + - Resource utilization (memory, CPU) + - Optimization opportunities + - Potential bottlenecks + + c. **Security** (1-10): + - Input validation and sanitization + - Authentication and authorization concerns + - Data protection and privacy + - Potential vulnerabilities + + d. **Structure & Design** (1-10): + - Modularity and separation of concerns + - Appropriate design patterns + - Code reusability + - Dependency management + + e. **Error Handling** (1-10): + - Exception handling completeness + - Edge case coverage + - Graceful failure mechanisms + - Informative error messages + + f. **Documentation & Comments** (1-10): + - Documentation completeness + - Comment quality and relevance + - API documentation + - Usage examples where appropriate + + g. **Code Style** (1-10): + - Adherence to language conventions + - Consistency with project style + - Readability enhancements + - Modern language feature usage + +3. **Specific Recommendations**: For each dimension with a score below 8, provide: + - Concrete examples of issues + - Specific, actionable improvement suggestions + - Code examples demonstrating better approaches + - References to relevant best practices or documentation + +4. **Positive Aspects**: Highlight 2-3 strengths of the code that should be maintained. + +5. **Summary**: Provide a concise overview of your findings and the most critical improvements needed. + +## Response Format + +Please structure your response as follows: + +1. **Code Overview**: Brief description of the code's purpose and functionality (2-3 sentences) + +2. **Detailed Analysis**: For each dimension, provide: + - Score (1-10) + - Brief justification for the score + - Specific issues identified + - Improvement recommendations with code examples + +3. **Strengths**: 2-3 positive aspects of the code + +4. **Priority Improvements**: Top 3-5 most important changes recommended + +5. **Score Summary**: Present all scores in a clearly formatted section: + +### SCORES: +- Readability: [score] /10 +- Efficiency & Performance: [score] /10 +- Security: [score] /10 +- Structure & Design: [score] /10 +- Error Handling: [score] /10 +- Documentation & Comments: [score] /10 +- Code Style: [score] /10 +- **Final Overall Score**: [calculated_overall_score] /10 + +Please ensure your review is constructive, specific, and actionable, focusing on helping the developer improve the code rather than just pointing out flaws. +""" + +# Prompt for PR summary +PR_SUMMARY_PROMPT = """# Pull Request Review Request + +## Pull Request Information +- **Title**: {pr_title} +- **Description**: {pr_description} + +## Changes Overview +{changes_summary} + +## Instructions + +Please provide a comprehensive review of this pull request following these steps: + +1. **PR Understanding**: Demonstrate your understanding of the PR's purpose and scope. + +2. **Change Analysis**: Analyze the key changes made across files, focusing on: + - Architectural changes + - New functionality added + - Bug fixes implemented + - Performance improvements + - Security enhancements + +3. **Risk Assessment**: Identify potential risks or concerns, including: + - Regression risks + - Security implications + - Performance impacts + - Maintainability concerns + - Testing gaps + +4. **Implementation Quality**: Evaluate the overall implementation quality: + - Code organization and structure + - Error handling and edge cases + - Documentation completeness + - Test coverage adequacy + +5. **Recommendations**: Provide specific, actionable recommendations for improvement. + +## Response Format + +Please structure your response as follows: + +1. **PR Summary**: Concise overview of the PR's purpose and main changes (3-5 sentences) + +2. **Key Changes**: Bulleted list of the most significant changes + +3. **Potential Issues**: Identified concerns or risks that should be addressed + +4. **Improvement Suggestions**: Specific recommendations with examples where applicable + +5. **Overall Assessment**: Final evaluation of the PR's readiness for merging + +Your review should be thorough yet concise, focusing on the most important aspects that require attention before merging. +""" + +# Prompt for extracting scores from review text +SCORE_EXTRACTION_REGEX = r'#{1,3}\s*(?:SCORES|评分):\s*([\s\S]*?)(?=#{1,3}|$)' +INDIVIDUAL_SCORE_REGEX = r'[-*]\s*(\w+(?:\s*[&]\s*\w+)*):\s*(\d+(?:\.\d+)?)\s*/\s*10' +OVERALL_SCORE_REGEX = r'[-*]\s*(?:Final\s+)?Overall(?:\s+Score)?:\s*(\d+(?:\.\d+)?)\s*/\s*10' + +# Prompt for code review with specific focus areas +CODE_REVIEW_FOCUSED_PROMPT = """# Focused Code Review Request + +## File Information +- **File Name**: {file_name} +- **Language**: {language} +- **Focus Areas**: {focus_areas} + +## Code to Review +```{language} +{code_content} +``` + +## Instructions + +Please conduct a focused code review that pays special attention to the specified focus areas while still evaluating all standard dimensions. + +{additional_instructions} + +Follow the same evaluation dimensions and scoring system as in a standard review, but provide more detailed analysis for the focus areas. + +## Response Format + +Use the standard response format, but ensure that the focus areas receive more detailed treatment in your analysis and recommendations. +""" + +# Prompt for security-focused code review +SECURITY_FOCUSED_REVIEW_PROMPT = """# Security-Focused Code Review + +## File Information +- **File Name**: {file_name} +- **Language**: {language} +- **Security Context**: {security_context} + +## Code to Review +```{language} +{code_content} +``` + +## Instructions + +Please conduct a security-focused code review that thoroughly examines potential vulnerabilities and security risks. Pay special attention to: + +1. **Input Validation**: Ensure all user inputs are properly validated and sanitized +2. **Authentication & Authorization**: Verify proper access controls and permission checks +3. **Data Protection**: Check for proper handling of sensitive data +4. **Injection Prevention**: Look for SQL, command, XSS, and other injection vulnerabilities +5. **Secure Communications**: Verify secure communication protocols and practices +6. **Cryptographic Issues**: Identify improper use of cryptographic functions +7. **Error Handling**: Check for information leakage in error messages +8. **Dependency Security**: Note any potentially vulnerable dependencies + +While security is the primary focus, still evaluate all standard dimensions but with greater emphasis on security aspects. + +## Response Format + +Use the standard response format, but provide a more detailed security analysis section that covers each of the security focus areas listed above. +""" + +# Language-specific review considerations +LANGUAGE_SPECIFIC_CONSIDERATIONS = { + "python": """ +## Python-Specific Considerations + +When reviewing Python code, pay special attention to: + +1. **PEP 8 Compliance**: Adherence to Python's style guide +2. **Type Hints**: Proper use of type annotations +3. **Pythonic Patterns**: Use of language-specific idioms and patterns +4. **Package Management**: Proper dependency specification +5. **Exception Handling**: Appropriate use of try/except blocks +6. **Context Managers**: Proper resource management with 'with' statements +7. **Docstrings**: PEP 257 compliant documentation +8. **Import Organization**: Proper grouping and ordering of imports +9. **List Comprehensions**: Appropriate use vs. traditional loops +10. **Standard Library Usage**: Effective use of built-in functions and modules +""", + + "javascript": """ +## JavaScript-Specific Considerations + +When reviewing JavaScript code, pay special attention to: + +1. **ES6+ Features**: Appropriate use of modern JavaScript features +2. **Asynchronous Patterns**: Proper use of Promises, async/await +3. **DOM Manipulation**: Efficient and safe DOM operations +4. **Event Handling**: Proper event binding and cleanup +5. **Closure Usage**: Appropriate use of closures and scope +6. **Framework Patterns**: Adherence to framework-specific best practices +7. **Browser Compatibility**: Consideration of cross-browser issues +8. **Memory Management**: Prevention of memory leaks +9. **Error Handling**: Proper promise rejection and try/catch usage +10. **Module System**: Appropriate use of import/export +""", + + "java": """ +## Java-Specific Considerations + +When reviewing Java code, pay special attention to: + +1. **OOP Principles**: Proper application of encapsulation, inheritance, polymorphism +2. **Exception Handling**: Appropriate checked vs. unchecked exceptions +3. **Resource Management**: Proper use of try-with-resources +4. **Concurrency**: Thread safety and synchronization +5. **Collections Framework**: Appropriate collection type selection +6. **Stream API**: Effective use of functional programming features +7. **Design Patterns**: Appropriate application of common patterns +8. **Dependency Injection**: Proper management of dependencies +9. **Generics**: Effective use of type parameters +10. **JavaDoc**: Comprehensive API documentation +""" +} diff --git a/codedog/utils/code_evaluator.py b/codedog/utils/code_evaluator.py index ee61ae4..62ef1ae 100644 --- a/codedog/utils/code_evaluator.py +++ b/codedog/utils/code_evaluator.py @@ -17,6 +17,12 @@ # 导入 grimoire 模板 from codedog.templates.grimoire_en import CODE_SUGGESTION from codedog.templates.grimoire_cn import GrimoireCn +# 导入优化的代码评审prompt +from codedog.templates.optimized_code_review_prompt import ( + SYSTEM_PROMPT, + CODE_REVIEW_PROMPT, + LANGUAGE_SPECIFIC_CONSIDERATIONS +) # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') @@ -328,23 +334,11 @@ def __init__(self, model: BaseChatModel, tokens_per_minute: int = 9000, max_conc if self.save_diffs: os.makedirs("diffs", exist_ok=True) - # System prompt - self.system_prompt = """你是一个经验丰富的代码审阅者。 -请根据我提供的代码差异,进行代码评价,你将针对以下方面给出1-10分制的评分: - -1. 可读性 (Readability):代码的命名、格式和注释质量 -2. 效率与性能 (Efficiency):代码执行效率和资源利用情况 -3. 安全性 (Security):代码的安全实践和潜在漏洞防范 -4. 结构与设计 (Structure):代码组织、模块化和架构设计 -5. 错误处理 (Error Handling):对异常情况的处理方式 -6. 文档与注释 (Documentation):文档的完整性和注释的有效性 -7. 代码风格 (Code Style):符合语言规范和项目风格指南的程度 - -每个指标的评分标准: -- 1-3分:较差,存在明显问题 -- 4-6分:一般,基本可接受但有改进空间 -- 7-10分:优秀,符合最佳实践 + # System prompt - 使用优化的系统提示 + self.system_prompt = SYSTEM_PROMPT + # 添加JSON输出指令 + self.json_output_instruction = """ 请以JSON格式返回评价结果,包含7个评分字段和详细评价意见: ```json @@ -361,7 +355,7 @@ def __init__(self, model: BaseChatModel, tokens_per_minute: int = 9000, max_conc } ``` -总评分计算方式:所有7个指标的平均值(取一位小数)。 +总评分计算方式:所有7个指标的加权平均值(取一位小数)。 """ @retry( @@ -581,10 +575,36 @@ async def _evaluate_single_diff(self, diff_content: str) -> Dict[str, Any]: # 发送请求到模型 async with self.request_semaphore: - # 创建消息 + # 创建消息 - 使用优化的prompt + # 获取文件名和语言 + file_name = "unknown" + language = "unknown" + + # 尝试从diff内容中提取文件名 + file_name_match = re.search(r'diff --git a/(.*?) b/', diff_content) + if file_name_match: + file_name = file_name_match.group(1) + # 猜测语言 + language = self._guess_language(file_name) + + # 使用优化的代码评审prompt + review_prompt = CODE_REVIEW_PROMPT.format( + file_name=file_name, + language=language.lower(), + code_content=diff_content + ) + + # 添加语言特定的考虑因素 + language_key = language.lower() + if language_key in LANGUAGE_SPECIFIC_CONSIDERATIONS: + review_prompt += "\n\n" + LANGUAGE_SPECIFIC_CONSIDERATIONS[language_key] + + # 添加JSON输出指令 + review_prompt += "\n\n" + self.json_output_instruction + messages = [ SystemMessage(content=self.system_prompt), - HumanMessage(content=f"请评价以下代码差异:\n\n```\n{diff_content}\n```") + HumanMessage(content=review_prompt) ] # 调用模型 @@ -1109,10 +1129,24 @@ async def _evaluate_diff_chunk(self, chunk: str) -> Dict[str, Any]: # 发送请求到模型 async with self.request_semaphore: - # 创建消息 - 使用简化的提示,以减少令牌消耗 + # 创建消息 - 使用优化的prompt + # 获取文件名和语言 + file_name = "unknown" + language = "unknown" + + # 尝试从diff内容中提取文件名 + file_name_match = re.search(r'diff --git a/(.*?) b/', chunk) + if file_name_match: + file_name = file_name_match.group(1) + # 猜测语言 + language = self._guess_language(file_name) + + # 使用简化的代码评审prompt,以减少令牌消耗 + review_prompt = f"请评价以下代码:\n\n文件名:{file_name}\n语言:{language}\n\n```{language.lower()}\n{chunk}\n```\n\n请给出1-10分的评分和简要评价。返回JSON格式的结果。" + messages = [ - SystemMessage(content="请对以下代码差异进行评价,给出1-10分的评分和简要评价。返回JSON格式的结果。"), - HumanMessage(content=f"请评价以下代码差异:\n\n```\n{chunk}\n```") + SystemMessage(content=self.system_prompt), + HumanMessage(content=review_prompt) ] # 调用模型 diff --git a/docs/commit_review.md b/docs/commit_review.md index 1663a35..3eb0b37 100644 --- a/docs/commit_review.md +++ b/docs/commit_review.md @@ -36,7 +36,7 @@ CodeDog can automatically review your code commits and send the review results v b) **Default Email**: - If you don't configure any email settings, the system will automatically send review results to `xiejun06@qq.com`. + If you don't configure any email settings, the system will automatically send review results to `kratosxie@gmail.com`. 3. **Configure LLM Models** diff --git a/product.md b/product.md new file mode 100644 index 0000000..ace6e68 --- /dev/null +++ b/product.md @@ -0,0 +1,206 @@ +# CodeDog 产品文档 + +## 1. 产品概述 + +CodeDog 是一款基于大语言模型(LLM)的智能代码评审工具,旨在通过自动化代码分析提高开发团队的代码质量和开发效率。它能够自动分析代码提交,生成详细的评审报告,并通过电子邮件通知相关人员。 + +### 1.1 核心功能 + +- **自动代码评审**:在代码提交时自动触发评审流程,分析代码质量 +- **多维度评分**:从可读性、效率、安全性等多个维度评估代码 +- **详细报告生成**:生成结构化的 Markdown 格式评审报告 +- **邮件通知**:将评审结果通过邮件发送给相关人员 +- **多模型支持**:支持 OpenAI、Azure OpenAI 和 DeepSeek 等多种 LLM 模型 + +### 1.2 应用场景 + +- 个人开发者的代码自我评审 +- 团队协作中的代码质量控制 +- 拉取请求(PR)的自动评审 +- 开发者代码质量评估和绩效分析 + +## 2. 系统架构 + +CodeDog 采用模块化设计,主要包含以下组件: + +- **Git 钩子处理器**:捕获 Git 事件并触发评审流程 +- **代码分析引擎**:解析和分析代码结构和内容 +- **LLM 集成层**:与各种大语言模型 API 交互 +- **评审生成器**:基于 LLM 输出生成结构化评审 +- **报告格式化器**:将评审结果转换为可读性强的报告 +- **通知系统**:处理电子邮件发送和其他通知 + +## 3. 功能详解 + +### 3.1 自动代码评审 + +CodeDog 可以在代码提交时自动触发评审流程,通过 Git 钩子机制捕获提交事件,分析更改的代码,并生成评审报告。 + +**工作流程**: +1. 开发者提交代码到 Git 仓库 +2. Git 钩子脚本被触发(如 post-commit) +3. 系统获取提交信息和更改的文件 +4. LLM 生成代码评审和摘要 +5. 系统格式化评审结果为结构化报告 +6. 通知系统将报告发送给相关人员 + +**安装 Git 钩子**: +```python +from codedog.utils.git_hooks import install_git_hooks +install_git_hooks("/path/to/your/repo") +``` + +### 3.2 多维度代码评估 + +系统从多个维度对代码进行全面评估,包括: + +- **可读性**:代码结构、命名规范、注释质量 +- **效率与性能**:算法效率、资源利用、潜在瓶颈 +- **安全性**:输入验证、错误处理、安全编码实践 +- **结构与设计**:模块化、整体架构、设计原则 +- **错误处理**:异常处理、边缘情况处理 +- **文档与注释**:文档完整性、注释清晰度 +- **代码风格**:符合语言特定编码标准 + +每个维度满分 10 分,最终总分为各维度的加权平均值。 + +### 3.3 报告生成与通知 + +CodeDog 生成结构化的 Markdown 格式评审报告,包含: + +- 提交摘要和概述 +- 文件级别的详细评审 +- 多维度评分表格 +- 具体改进建议 +- 代码量统计信息 + +评审报告可以通过电子邮件发送给相关人员,支持 HTML 格式的邮件内容,使用配置的 SMTP 服务器发送。 + +### 3.4 多模型支持 + +CodeDog 支持多种大语言模型,以满足不同的需求和预算: + +- **OpenAI GPT-3.5/GPT-4o**:通用模型,适合日常代码评审 +- **Azure OpenAI**:企业级安全性,适合需要数据合规的场景 +- **DeepSeek Chat/Reasoner**:专业模型,适合复杂代码分析 + +可以为不同任务配置不同模型: +``` +CODE_SUMMARY_MODEL="gpt-3.5" # 代码摘要 +PR_SUMMARY_MODEL="gpt-4o" # PR摘要 +CODE_REVIEW_MODEL="deepseek" # 代码评审 +``` + +## 4. 使用指南 + +### 4.1 环境要求 + +- Python 3.8+ +- Git +- 互联网连接(用于 API 调用) +- SMTP 服务器访问(用于邮件通知) + +### 4.2 安装与配置 + +1. **安装 CodeDog**: + ```bash + pip install codedog + ``` + +2. **配置环境变量**: + 创建 `.env` 文件,添加必要的配置: + ``` + # API密钥 + OPENAI_API_KEY=your_openai_api_key + + # 模型选择 + CODE_REVIEW_MODEL=gpt-3.5 + PR_SUMMARY_MODEL=gpt-4o + + # 邮件配置 + EMAIL_ENABLED=true + NOTIFICATION_EMAILS=your_email@example.com + SMTP_SERVER=smtp.gmail.com + SMTP_PORT=587 + SMTP_USERNAME=your_email@gmail.com + SMTP_PASSWORD=your_app_specific_password + ``` + +3. **安装 Git 钩子**: + ```python + from codedog.utils.git_hooks import install_git_hooks + install_git_hooks(".") + ``` + +### 4.3 基本使用 + +#### 评估单个提交 + +```bash +# 评审最新提交 +python run_codedog_commit.py --verbose + +# 评审特定提交 +python run_codedog_commit.py --commit --verbose +``` + +#### 评估时间段内的提交 + +```bash +python run_codedog.py eval "" --start-date YYYY-MM-DD --end-date YYYY-MM-DD --include .py +``` + +#### 评估 GitHub PR + +```bash +python run_codedog.py pr "owner/repo" +``` + +### 4.4 配置选项 + +CodeDog 提供多种配置选项,可以通过环境变量或命令行参数设置: + +- **平台配置**:GitHub/GitLab 访问令牌 +- **LLM 配置**:API 密钥和端点设置 +- **模型选择**:用于不同任务的模型选择 +- **电子邮件配置**:SMTP 服务器和通知设置 +- **评审配置**:文件类型包含/排除规则 + +## 5. 最佳实践 + +### 5.1 个人开发者 + +- 在提交前评审代码,发现潜在问题 +- 使用 Git 钩子自动触发评审 +- 关注评审中反复出现的问题模式 +- 定期运行评估跟踪进步 + +### 5.2 团队协作 + +- 将 CodeDog 集成到 CI/CD 流程中 +- 为每个 PR 生成自动评审 +- 使用评审报告作为讨论的起点 +- 定期回顾团队评审趋势,识别系统性问题 + +## 6. 常见问题解答 + +**Q: 如何处理大文件或大量文件的评审?** +A: CodeDog 会自动处理文件分割和批处理,但对于特别大的文件,可能需要增加超时设置或选择更快的模型。 + +**Q: 如何解决 API 限制问题?** +A: 可以调整请求频率、使用缓存或升级 API 计划。对于 DeepSeek API 错误,系统会自动重试两次,如果仍然失败,则放弃评估并给出 0 分。 + +**Q: 如何配置 Gmail SMTP?** +A: 需要在 Google 账户开启两步验证,然后创建应用专用密码用于 SMTP 认证。详细步骤请参考文档。 + +## 7. 技术规格 + +- **支持的语言**:Python、JavaScript、Java、TypeScript 等主流编程语言 +- **支持的模型**:GPT-3.5、GPT-4o、DeepSeek Chat、DeepSeek Reasoner、Azure OpenAI +- **支持的平台**:GitHub、GitLab、本地 Git 仓库 +- **报告格式**:Markdown、HTML 邮件 +- **评分维度**:7个维度(可读性、效率、安全性、结构、错误处理、文档、代码风格) + +--- + +*CodeDog - 智能代码评审,提升开发效率* diff --git a/run_codedog.py b/run_codedog.py index 3cdc894..2e6a086 100755 --- a/run_codedog.py +++ b/run_codedog.py @@ -11,11 +11,12 @@ load_dotenv() from github import Github +from gitlab import Gitlab from langchain_community.callbacks.manager import get_openai_callback from codedog.actors.reporters.pull_request import PullRequestReporter from codedog.chains import CodeReviewChain, PRSummaryChain -from codedog.retrievers import GithubRetriever +from codedog.retrievers import GithubRetriever, GitlabRetriever from codedog.utils.langchain_utils import load_model_by_name from codedog.utils.email_utils import send_report_email from codedog.utils.git_hooks import install_git_hooks @@ -26,20 +27,23 @@ def parse_args(): """Parse command line arguments.""" parser = argparse.ArgumentParser(description="CodeDog - AI-powered code review tool") - + # Main operation subparsers subparsers = parser.add_subparsers(dest="command", help="Command to run") - + # PR review command - pr_parser = subparsers.add_parser("pr", help="Review a GitHub pull request") + pr_parser = subparsers.add_parser("pr", help="Review a GitHub or GitLab pull request") pr_parser.add_argument("repository", help="Repository path (e.g. owner/repo)") pr_parser.add_argument("pr_number", type=int, help="Pull request number to review") + pr_parser.add_argument("--platform", choices=["github", "gitlab"], default="github", + help="Platform to use (github or gitlab, defaults to github)") + pr_parser.add_argument("--gitlab-url", help="GitLab URL (defaults to https://gitlab.com or GITLAB_URL env var)") pr_parser.add_argument("--email", help="Email addresses to send the report to (comma-separated)") - + # Setup git hooks command hook_parser = subparsers.add_parser("setup-hooks", help="Set up git hooks for commit-triggered reviews") hook_parser.add_argument("--repo", help="Path to git repository (defaults to current directory)") - + # Developer code evaluation command eval_parser = subparsers.add_parser("eval", help="Evaluate code commits of a developer in a time period") eval_parser.add_argument("author", help="Developer name or email (partial match)") @@ -51,7 +55,7 @@ def parse_args(): eval_parser.add_argument("--model", help="Evaluation model, defaults to CODE_REVIEW_MODEL env var or gpt-3.5") eval_parser.add_argument("--email", help="Email addresses to send the report to (comma-separated)") eval_parser.add_argument("--output", help="Report output path, defaults to codedog_eval__.md") - + return parser.parse_args() @@ -59,7 +63,7 @@ def parse_emails(emails_str: Optional[str]) -> List[str]: """Parse comma-separated email addresses.""" if not emails_str: return [] - + return [email.strip() for email in emails_str.split(",") if email.strip()] @@ -67,7 +71,7 @@ def parse_extensions(extensions_str: Optional[str]) -> Optional[List[str]]: """Parse comma-separated file extensions.""" if not extensions_str: return None - + return [ext.strip() for ext in extensions_str.split(",") if ext.strip()] @@ -104,46 +108,46 @@ async def evaluate_developer_code( author_slug = author.replace("@", "_at_").replace(" ", "_").replace("/", "_") date_slug = datetime.now().strftime("%Y%m%d") output_file = f"codedog_eval_{author_slug}_{date_slug}.md" - + # Get model model = load_model_by_name(model_name) - + print(f"Evaluating {author}'s code commits from {start_date} to {end_date}...") - + # Get commits and diffs commits, commit_file_diffs = get_file_diffs_by_timeframe( - author, - start_date, - end_date, + author, + start_date, + end_date, repo_path, include_extensions, exclude_extensions ) - + if not commits: print(f"No commits found for {author} in the specified time period") return - + print(f"Found {len(commits)} commits with {sum(len(diffs) for diffs in commit_file_diffs.values())} modified files") - + # Initialize evaluator evaluator = DiffEvaluator(model) - + # Timing and statistics start_time = time.time() - + with get_openai_callback() as cb: # Perform evaluation print("Evaluating code commits...") evaluation_results = await evaluator.evaluate_commits(commits, commit_file_diffs) - + # Generate Markdown report report = generate_evaluation_markdown(evaluation_results) - + # Calculate cost and tokens total_cost = cb.total_cost total_tokens = cb.total_tokens - + # Add evaluation statistics elapsed_time = time.time() - start_time telemetry_info = ( @@ -153,72 +157,109 @@ async def evaluate_developer_code( f"- **Tokens Used**: {total_tokens}\n" f"- **Cost**: ${total_cost:.4f}\n" ) - + report += telemetry_info - + # Save report with open(output_file, "w", encoding="utf-8") as f: f.write(report) print(f"Report saved to {output_file}") - + # Send email report if addresses provided if email_addresses: subject = f"[CodeDog] Code Evaluation Report for {author} ({start_date} to {end_date})" - + sent = send_report_email( to_emails=email_addresses, subject=subject, markdown_content=report, ) - + if sent: print(f"Report sent to {', '.join(email_addresses)}") else: print("Failed to send email notification") - + return report -def generate_full_report(repository_name, pull_request_number, email_addresses=None): - """Generate a full report including PR summary and code review.""" +def generate_full_report(repository_name, pull_request_number, email_addresses=None, platform="github", gitlab_url=None): + """Generate a full report including PR summary and code review. + + Args: + repository_name (str): Repository path (e.g. owner/repo) + pull_request_number (int): Pull request number to review + email_addresses (list, optional): List of email addresses to send the report to + platform (str, optional): Platform to use (github or gitlab). Defaults to "github". + gitlab_url (str, optional): GitLab URL. Defaults to https://gitlab.com or GITLAB_URL env var. + """ start_time = time.time() - - # Initialize GitHub client and retriever - github_client = Github() # Will automatically load GITHUB_TOKEN from environment - print(f"Analyzing GitHub repository {repository_name} PR #{pull_request_number}") - - try: - retriever = GithubRetriever(github_client, repository_name, pull_request_number) - print(f"Successfully retrieved PR: {retriever.pull_request.title}") - except Exception as e: - error_msg = f"Failed to retrieve PR: {str(e)}" + + # Initialize client and retriever based on platform + if platform.lower() == "github": + # Initialize GitHub client and retriever + github_client = Github() # Will automatically load GITHUB_TOKEN from environment + print(f"Analyzing GitHub repository {repository_name} PR #{pull_request_number}") + + try: + retriever = GithubRetriever(github_client, repository_name, pull_request_number) + print(f"Successfully retrieved PR: {retriever.pull_request.title}") + except Exception as e: + error_msg = f"Failed to retrieve GitHub PR: {str(e)}" + print(error_msg) + return error_msg + + elif platform.lower() == "gitlab": + # Initialize GitLab client and retriever + gitlab_token = os.environ.get("GITLAB_TOKEN", "") + if not gitlab_token: + error_msg = "GITLAB_TOKEN environment variable is not set" + print(error_msg) + return error_msg + + # Use provided GitLab URL or fall back to environment variable or default + gitlab_url = gitlab_url or os.environ.get("GITLAB_URL", "https://gitlab.com") + + gitlab_client = Gitlab(url=gitlab_url, private_token=gitlab_token) + print(f"Analyzing GitLab repository {repository_name} MR #{pull_request_number}") + + try: + retriever = GitlabRetriever(gitlab_client, repository_name, pull_request_number) + print(f"Successfully retrieved MR: {retriever.pull_request.title}") + except Exception as e: + error_msg = f"Failed to retrieve GitLab MR: {str(e)}" + print(error_msg) + return error_msg + + else: + error_msg = f"Unsupported platform: {platform}. Use 'github' or 'gitlab'." print(error_msg) return error_msg - + # Load models based on environment variables code_summary_model = os.environ.get("CODE_SUMMARY_MODEL", "gpt-3.5") pr_summary_model = os.environ.get("PR_SUMMARY_MODEL", "gpt-4") code_review_model = os.environ.get("CODE_REVIEW_MODEL", "gpt-3.5") - + # Initialize chains with specified models summary_chain = PRSummaryChain.from_llm( code_summary_llm=load_model_by_name(code_summary_model), pr_summary_llm=load_model_by_name(pr_summary_model), verbose=True ) - + review_chain = CodeReviewChain.from_llm( llm=load_model_by_name(code_review_model), verbose=True ) - + with get_openai_callback() as cb: # Get PR summary print(f"Generating PR summary using {pr_summary_model}...") pr_summary_result = asyncio.run(pr_summary(retriever, summary_chain)) pr_summary_cost = cb.total_cost print(f"PR summary complete, cost: ${pr_summary_cost:.4f}") - + # Get code review print(f"Generating code review using {code_review_model}...") try: @@ -230,11 +271,11 @@ def generate_full_report(repository_name, pull_request_number, email_addresses=N print(traceback.format_exc()) # Use empty code review code_review_result = {"code_reviews": []} - + # Create report total_cost = cb.total_cost total_time = time.time() - start_time - + reporter = PullRequestReporter( pr_summary=pr_summary_result["pr_summary"], code_summaries=pr_summary_result["code_summaries"], @@ -247,15 +288,15 @@ def generate_full_report(repository_name, pull_request_number, email_addresses=N "tokens": cb.total_tokens, }, ) - + report = reporter.report() - + # Save report to file report_file = f"codedog_pr_{pull_request_number}.md" with open(report_file, "w", encoding="utf-8") as f: f.write(report) print(f"Report saved to {report_file}") - + # Send email notification if email addresses provided if email_addresses: subject = f"[CodeDog] Code Review for {repository_name} PR #{pull_request_number}: {retriever.pull_request.title}" @@ -268,23 +309,29 @@ def generate_full_report(repository_name, pull_request_number, email_addresses=N print(f"Report sent to {', '.join(email_addresses)}") else: print("Failed to send email notification") - + return report def main(): """Main function to parse arguments and run the appropriate command.""" args = parse_args() - + if args.command == "pr": - # Review a GitHub pull request + # Review a GitHub or GitLab pull request email_addresses = parse_emails(args.email or os.environ.get("NOTIFICATION_EMAILS", "")) - report = generate_full_report(args.repository, args.pr_number, email_addresses) - + report = generate_full_report( + repository_name=args.repository, + pull_request_number=args.pr_number, + email_addresses=email_addresses, + platform=args.platform, + gitlab_url=args.gitlab_url + ) + print("\n===================== Review Report =====================\n") print(report) print("\n===================== Report End =====================\n") - + elif args.command == "setup-hooks": # Set up git hooks for commit-triggered reviews repo_path = args.repo or os.getcwd() @@ -292,7 +339,7 @@ def main(): if success: print("Git hooks successfully installed.") print("CodeDog will now automatically review new commits.") - + # Check if notification emails are configured emails = os.environ.get("NOTIFICATION_EMAILS", "") if emails: @@ -301,35 +348,35 @@ def main(): print("No notification emails configured. Add NOTIFICATION_EMAILS to your .env file to receive email reports.") else: print("Failed to install git hooks.") - + elif args.command == "eval": # Evaluate developer's code commits # Process date parameters today = datetime.now().strftime("%Y-%m-%d") week_ago = (datetime.now() - timedelta(days=7)).strftime("%Y-%m-%d") - + start_date = args.start_date or week_ago end_date = args.end_date or today - + # Process file extension parameters include_extensions = None if args.include: include_extensions = parse_extensions(args.include) elif os.environ.get("DEV_EVAL_DEFAULT_INCLUDE"): include_extensions = parse_extensions(os.environ.get("DEV_EVAL_DEFAULT_INCLUDE")) - + exclude_extensions = None if args.exclude: exclude_extensions = parse_extensions(args.exclude) elif os.environ.get("DEV_EVAL_DEFAULT_EXCLUDE"): exclude_extensions = parse_extensions(os.environ.get("DEV_EVAL_DEFAULT_EXCLUDE")) - + # Get model model_name = args.model or os.environ.get("CODE_REVIEW_MODEL", "gpt-3.5") - + # Get email addresses email_addresses = parse_emails(args.email or os.environ.get("NOTIFICATION_EMAILS", "")) - + # Run evaluation report = asyncio.run(evaluate_developer_code( author=args.author, @@ -342,18 +389,19 @@ def main(): output_file=args.output, email_addresses=email_addresses, )) - + if report: print("\n===================== Evaluation Report =====================\n") print("Report generated successfully. See output file for details.") print("\n===================== Report End =====================\n") - + else: # No command specified, show usage print("Please specify a command. Use --help for more information.") - print("Example: python run_codedog.py pr owner/repo 123") - print("Example: python run_codedog.py setup-hooks") - print("Example: python run_codedog.py eval username --start-date 2023-01-01 --end-date 2023-01-31") + print("Example: python run_codedog.py pr owner/repo 123 # GitHub PR review") + print("Example: python run_codedog.py pr owner/repo 123 --platform gitlab # GitLab MR review") + print("Example: python run_codedog.py setup-hooks # Set up git hooks") + print("Example: python run_codedog.py eval username --start-date 2023-01-01 --end-date 2023-01-31 # Evaluate code") if __name__ == "__main__": @@ -362,4 +410,4 @@ def main(): except Exception as e: print(f"Error: {str(e)}") print("\nDetailed error information:") - traceback.print_exc() \ No newline at end of file + traceback.print_exc() \ No newline at end of file diff --git a/run_codedog_commit.py b/run_codedog_commit.py index b45b686..5a13e20 100755 --- a/run_codedog_commit.py +++ b/run_codedog_commit.py @@ -10,6 +10,7 @@ from typing import List, Optional # Load environment variables from .env file +# This will load GitHub or GitLab tokens from the .env file load_dotenv() from langchain_community.callbacks.manager import get_openai_callback @@ -27,7 +28,7 @@ def parse_args(): """Parse command line arguments.""" - parser = argparse.ArgumentParser(description="CodeDog - Automatic commit code review") + parser = argparse.ArgumentParser(description="CodeDog - Automatic commit code review for GitHub and GitLab repositories") parser.add_argument("--commit", help="Commit hash to review (defaults to HEAD)") parser.add_argument("--repo", help="Path to git repository (defaults to current directory)") parser.add_argument("--email", help="Email addresses to send the report to (comma-separated)") @@ -181,7 +182,23 @@ def generate_commit_review(commit_hash: str, repo_path: Optional[str] = None, code_review_model: str = None, pr_summary_model: str = None, verbose: bool = False) -> str: - """Generate a code review for a commit.""" + """Generate a code review for a commit. + + This function works with both GitHub and GitLab repositories by analyzing local Git commits. + It doesn't require direct API access to GitHub or GitLab as it works with the local repository. + + Args: + commit_hash: The commit hash to review + repo_path: Path to git repository (defaults to current directory) + email_addresses: List of email addresses to send the report to + output_file: Output file path (defaults to codedog_commit_.md) + code_review_model: Model to use for code review + pr_summary_model: Model to use for PR summary + verbose: Enable verbose output + + Returns: + str: The generated review report in markdown format + """ start_time = time.time() # Set default models from environment variables @@ -283,7 +300,10 @@ def generate_commit_review(commit_hash: str, repo_path: Optional[str] = None, def main(): - """Main function to parse arguments and run the commit review.""" + """Main function to parse arguments and run the commit review. + + This works with both GitHub and GitLab repositories by analyzing local Git commits. + """ args = parse_args() # Get commit hash (default to HEAD if not provided)