diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml new file mode 100644 index 0000000..d04ff10 --- /dev/null +++ b/.github/workflows/docker.yml @@ -0,0 +1,49 @@ +name: Docker Build + +on: + push: + branches: [main, develop] + paths: + - "backend/Dockerfile" + - "backend/docker-compose.yml" + - "backend/requirements.txt" + - ".github/workflows/docker.yml" + pull_request: + branches: [main] + +jobs: + build: + name: Build Docker Image + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Install docker-compose + run: | + sudo apt-get update + sudo apt-get install -y docker-compose + + - name: Build backend image + run: | + cd backend + docker build --tag codeguard-backend:${{ github.sha }} . + + - name: Test Docker image + run: | + docker run --rm codeguard-backend:${{ github.sha }} python --version + + - name: Test Docker Compose (validation only) + run: | + cd backend + docker-compose config + + - name: Summary + if: success() + run: | + echo " Docker image built successfully!" + echo " Image: codeguard-backend:${{ github.sha }}" diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..c89c30c --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,56 @@ +name: Lint & Format Check + +on: + push: + branches: [main, develop, "feature/**"] + paths: + - "backend/**/*.py" + - ".github/workflows/lint.yml" + pull_request: + branches: [main, develop] + paths: + - "backend/**/*.py" + +jobs: + lint: + name: Code Quality Check + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: "pip" + + - name: Install linting dependencies + run: | + python -m pip install --upgrade pip + pip install black>=23.0.0 flake8>=7.0.0 isort>=5.13.0 + + - name: Check code formatting with Black + run: | + cd backend + black src/ --line-length=100 --check + + - name: Check import sorting with isort + run: | + cd backend + isort src/ --profile=black --line-length=100 --check-only + + - name: Lint with Flake8 + run: | + cd backend + flake8 src/ --count --select=E9,F63,F7,F82 --show-source --statistics + flake8 src/ --count --max-complexity=10 --max-line-length=100 --statistics + + - name: Summary + if: success() + run: | + echo "= All code quality checks passed!" + echo "- Black formatting: ✓" + echo "- Import sorting (isort): ✓" + echo "- Linting (flake8): ✓" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..a4d1940 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,65 @@ +name: Tests & Coverage + +on: + push: + branches: [main, develop, "feature/**"] + paths: + - "backend/**/*.py" + - "backend/tests/**" + - ".github/workflows/test.yml" + pull_request: + branches: [main, develop] + paths: + - "backend/**/*.py" + - "backend/tests/**" + +jobs: + test: + name: Run Tests & Coverage + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: "pip" + + - name: Install dependencies + run: | + cd backend + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install pytest>=8.0.0 pytest-asyncio>=0.23.0 pytest-cov>=4.1.0 + + - name: Run tests with coverage + run: | + cd backend + pytest tests/ --cov=src --cov-report=html --cov-report=term-missing --cov-report=xml --cov-fail-under=75 -v + continue-on-error: false + + - name: Upload coverage report + if: always() + uses: actions/upload-artifact@v4 + with: + name: coverage-report + path: backend/htmlcov/ + retention-days: 30 + + - name: Upload coverage to Codecov (optional) + if: always() + uses: codecov/codecov-action@v4 + with: + file: backend/coverage.xml + flags: unittests + name: codecov-umbrella + fail_ci_if_error: false + + - name: Summary + if: success() + run: | + echo "All tests passed with >75% coverage!" + echo "Coverage report uploaded as artifact" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..93e4886 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,693 @@ +# 🤝 Guía de Contribución - CodeGuard AI + +¡Gracias por tu interés en contribuir a **CodeGuard AI**! Este documento te guía a través del flujo de trabajo para colaborar efectivamente en el proyecto. + +--- + +## 📋 Tabla de Contenidos + +- [Código de Conducta](#-código-de-conducta) +- [Cómo Contribuir](#-cómo-contribuir) +- [Flujo de Trabajo GitFlow](#-flujo-de-trabajo-gitflow) +- [Convenciones de Commits](#-convenciones-de-commits) +- [Estándares de Calidad](#-estándares-de-calidad) +- [Proceso de Pull Request](#-proceso-de-pull-request) +- [Configuración del Entorno](#-configuración-del-entorno) +- [Testing](#-testing) +- [Reportar Bugs](#-reportar-bugs) +- [Sugerir Mejoras](#-sugerir-mejoras) + +--- + +## 📜 Código de Conducta + +Este proyecto y todos los participantes están regidos por nuestro **Código de Conducta**. Por favor: + +- ✅ Sé respetuoso y empático con otros contribuidores +- ✅ Acepta críticas constructivas con gracia +- ✅ Enfócate en lo que es mejor para la comunidad +- ✅ Muestra cortesía hacia diferentes puntos de vista + +**Para reportar comportamientos inaceptables**, contáctanos en: +📧 `codeguard-ai@unal.edu.co` + +--- + +## 🚀 Cómo Contribuir + +### Tipos de Contribuciones + +1. **🐛 Reportar Bugs**: Identifica y documenta errores +2. **✨ Implementar Features**: Desarrolla nuevas funcionalidades +3. **📝 Mejorar Documentación**: Actualiza o crea documentación +4. **🧪 Escribir Tests**: Aumenta la cobertura de pruebas +5. **🎨 Refactorizar Código**: Mejora la estructura sin cambiar funcionalidad +6. **⚡ Optimizar Rendimiento**: Mejora velocidad o uso de recursos + +### Antes de Empezar + +1. ✅ **Revisa el backlog**: Ve a [GitHub Issues](https://github.com/YOUR_ORG/CodeGuard-Unal/issues) +2. ✅ **Busca issue abierto**: Verifica que no esté duplicado +3. ✅ **Asigna el issue**: Comenta que deseas trabajar en él +4. ✅ **Lee la documentación**: Familiarízate con la arquitectura + +--- + +## 🔀 Flujo de Trabajo GitFlow + +CodeGuard AI utiliza **GitFlow** como estrategia de branching. Este modelo define ramas para diferentes propósitos. + +### Estructura de Ramas + +``` +main (producción) + └─ Etiquetas: v1.0.0, v1.1.0 + ↑ (merges desde release/* y hotfix/*) + +develop (integración) + └─ Rama principal de desarrollo + ↑ (merges desde feature/*, bugfix/*, hotfix/*) + +feature/* (features nuevas) + ├─ feature/CGAI-12-base-agent + ├─ feature/CGAI-19-security-agent + └─ feature/CGAI-20-fastapi-endpoint + +bugfix/* (bugs en develop) + └─ bugfix/CGAI-99-fix-orchestrator-timeout + +hotfix/* (bugs críticos en main) + └─ hotfix/CGAI-98-security-patch + +release/* (preparación de releases - Sprint 2+) + └─ release/v1.1.0 +``` + +### Crear Feature Branch + +```bash +# 1. Asegúrate que develop esté actualizado +git checkout develop +git pull origin develop + +# 2. Crear feature branch (formato: feature/CGAI-XX-descripcion-corta) +git checkout -b feature/CGAI-19-security-agent + +# 3. Hacer cambios y commits +# ... trabajar en el código ... + +# 4. Mantener actualizado con develop +git fetch origin +git rebase origin/develop + +# 5. Push +git push -u origin feature/CGAI-19-security-agent + +# 6. Crear PR en GitHub +``` + +### Crear Bugfix Branch (bugs en develop) + +```bash +git checkout develop +git pull origin develop +git checkout -b bugfix/CGAI-99-fix-description +# ... hacer cambios ... +git push -u origin bugfix/CGAI-99-fix-description +``` + +### Crear Hotfix Branch (bugs críticos en main) + +```bash +# Los hotfix se ramifican desde main +git checkout main +git pull origin main +git checkout -b hotfix/CGAI-98-critical-fix + +# Hacer fix y commit +git commit -m "fix(agents): patch critical vulnerability + +[descripción del fix]" + +# Merge a main +git checkout main +git merge --no-ff hotfix/CGAI-98-critical-fix +git push origin main + +# Merge también a develop +git checkout develop +git merge --no-ff hotfix/CGAI-98-critical-fix +git push origin develop +``` + +### Release Branch (Sprint 2+) + +```bash +# Para preparar una versión +git checkout develop +git checkout -b release/v1.1.0 + +# En release solo se corrigen bugs, no se agregan features +git commit -m "bump version to 1.1.0" + +# Merge a main con tag +git checkout main +git merge --no-ff release/v1.1.0 +git tag -a v1.1.0 -m "Release version 1.1.0" +git push origin main --tags +``` + +--- + +## 📝 Convenciones de Commits + +CodeGuard AI sigue **Conventional Commits** para mantener un historial limpio y automatizable. + +### Formato + +``` +(): + +[cuerpo opcional] + +[footer(s) opcional(es)] +``` + +### Tipos de Commits + +| Tipo | Descripción | Ejemplo | +|------|-------------|---------| +| `feat` | Nueva funcionalidad | `feat(security): add hardcoded credentials detection` | +| `fix` | Corrección de bug | `fix(api): handle null pointer in analyze endpoint` | +| `docs` | Cambios en documentación | `docs(readme): update installation steps` | +| `style` | Formato (sin cambio lógico) | `style(code): format with black` | +| `refactor` | Refactorización sin cambiar funcionalidad | `refactor(agents): extract logging method` | +| `test` | Agregar o modificar tests | `test(security): add unit tests for eval detection` | +| `chore` | Mantenimiento, dependencias | `chore(deps): update pytest to 8.0` | +| `perf` | Mejora de rendimiento | `perf(analysis): optimize AST parsing` | +| `ci` | Cambios en CI/CD | `ci(github): add coverage reporting` | + +### Scopes Comunes + +``` +agents, security, quality, performance, style, orchestrator +api, schemas, routers, services, core, database +auth, cache, events, config, dependencies +docker, ci, tests, docs +``` + +### Ejemplos Correctos + +```bash +# Feature simple +git commit -m "feat(security): add SQL injection detection" + +# Bug fix +git commit -m "fix(api): return 422 for invalid filename" + +# Con cuerpo +git commit -m "feat(agents): implement quality metrics calculation + +- Add cyclomatic complexity calculation +- Add code duplication detection +- Add test coverage computation +- Related to CGAI-20" + +# Breaking change +git commit -m "feat(api)!: change analyze response format + +BREAKING CHANGE: response now uses 'analysis_id' instead of 'id'" + +# Multiple scopes +git commit -m "refactor(core,services): improve dependency injection + +- Simplify container initialization +- Add lazy loading for services +- Update documentation" +``` + +### ❌ Ejemplos Incorrectos + +```bash +# Falta tipo +git commit -m "add new feature" + +# Tipo incorrecto +git commit -m "Feature: add new agent" + +# Descripción muy vaga +git commit -m "fix: fixes bug" + +# Mayúscula al inicio +git commit -m "feat: Add new endpoint" + +# Punto al final +git commit -m "feat(security): add detection." + +# Demasiado largo (>72 caracteres) +git commit -m "feat(api): implement a very comprehensive analysis system for detecting all types of vulnerabilities" +``` + +### Reglas de Formato + +| Regla | Detalle | +|-------|---------| +| **Primera línea** | Máximo 72 caracteres | +| **Cuerpo** | Máximo 100 caracteres por línea | +| **Tipo** | En minúscula | +| **Scope** | En minúscula (opcional) | +| **Descripción** | Comienza en minúscula, modo imperativo | +| **Punto final** | Sin punto en la primera línea | + +--- + +## ✅ Estándares de Calidad + +### 1. Linting (Pylint ≥ 8.5/10) + +```bash +cd backend + +# Ejecutar pylint +pylint src/ --rcfile=.pylintrc + +# Verificar score +pylint src/ --rcfile=.pylintrc | grep -E "rated at" +``` + +**Configuración** (`.pylintrc`): +```ini +[MASTER] +max-line-length=100 +disable=C0111,C0103,R0903 + +[MESSAGES CONTROL] +disable=missing-docstring,too-few-public-methods +``` + +### 2. Testing (Coverage ≥ 75%) + +```bash +cd backend + +# Ejecutar tests con cobertura +pytest tests/ \ + --cov=src \ + --cov-report=term-missing \ + --cov-report=html \ + --cov-fail-under=75 \ + -v +``` + +### 3. Formateo (Black + isort) + +```bash +cd backend + +# Formatear código +black src/ tests/ --line-length=100 +isort src/ tests/ --profile=black + +# Verificar sin modificar +black --check src/ tests/ +isort --check-only src/ tests/ +``` + +### 4. Type Hints (Obligatorio para métodos públicos) + +```python +# ✅ Correcto +def analyze(self, context: AnalysisContext) -> List[Finding]: + """Analyze code and return findings.""" + pass + +# ❌ Incorrecto +def analyze(self, context): + return [] +``` + +### 5. Docstrings (Obligatorio para clases y métodos públicos) + +```python +# ✅ Correcto +class SecurityAgent(BaseAgent): + """ + Agent for detecting security vulnerabilities. + + Analyzes Python code for: + - Dangerous functions (eval, exec) + - SQL injection patterns + - Hardcoded credentials + """ + + def analyze(self, context: AnalysisContext) -> List[Finding]: + """ + Analyze code for security issues. + + Args: + context: Analysis context with code and metadata + + Returns: + List of security findings + """ + pass + +# ❌ Incorrecto +class SecurityAgent(BaseAgent): + def analyze(self, context): + pass +``` + +--- + +## 🔄 Proceso de Pull Request + +### Antes de Crear el PR + +```bash +cd backend + +# 1. Verificar linting +pylint src/ --rcfile=.pylintrc + +# 2. Ejecutar tests localmente +pytest tests/ --cov=src --cov-fail-under=75 + +# 3. Formatear código +black src/ tests/ --line-length=100 +isort src/ tests/ --profile=black + +# 4. Verificar commits +git log --oneline -5 +# Todos deben tener formato: tipo(scope): descripcion + +# 5. Rebase con develop (si es necesario) +git fetch origin +git rebase origin/develop +``` + +### Crear Pull Request + +1. **Push de la rama**: +```bash +git push -u origin feature/CGAI-19-security-agent +``` + +2. **Crear PR en GitHub**: + - Base: `develop` (o `main` para hotfixes) + - Compare: tu rama + +3. **Completar la plantilla del PR**: + +```markdown +## 📝 Descripción +Implementa detección de credenciales hardcodeadas en SecurityAgent para identificar contraseñas, API keys y tokens en código Python. + +## 🎯 Historia de Usuario Relacionada +Closes #19 (CGAI-19: SecurityAgent v1) + +## 🧪 Cómo se Probó +- [x] Tests unitarios agregados (15 nuevos tests) +- [x] Tests de integración con AnalysisService +- [x] Probado manualmente con código malicioso +- [x] Cobertura: 88% (cumple umbral 75%) + +## ✅ Checklist Previo al Merge +- [x] Mi código sigue las convenciones del proyecto +- [x] He agregado tests que prueban mis cambios +- [x] Todos los tests pasan localmente (`pytest`) +- [x] He actualizado la documentación relevante +- [x] Mis commits siguen Conventional Commits +- [x] He hecho rebase con develop +- [x] He ejecutado linting localmente +- [x] He verificado coverage >75% + +## 📸 Screenshots (si aplica) +N/A + +## 📚 Notas Adicionales +- Implementa detección con regex patterns +- Detecta placeholders (YOUR_, REPLACE_) para evitar falsos positivos +- Integrado con EventBus para notificaciones en tiempo real +- Compatible con Python 3.11+ +``` + +### Revisión de Código + +**Requisitos para merge**: +1. ✅ **CI Passing**: Los 3 workflows en verde + - `lint.yml`: Pylint ≥ 8.5/10 + - `test.yml`: Tests passing + coverage ≥ 75% + - `docker.yml`: Build exitoso + +2. ✅ **1+ Aprobación**: Al menos un reviewer + +3. ✅ **Conflicts Resolved**: Sin conflictos con base + +**Proceso**: +- Revisor deja comentarios en líneas específicas +- Autor responde y hace cambios +- Push de commits adicionales (NO force push) +- Revisor aprueba cuando cambios son satisfactorios + +### Merge del PR + +```bash +# Merge strategy: Squash (por defecto para features) +# Esto combina todos los commits en uno solo + +# Mensaje de merge sugerido: +feat(security): detect hardcoded credentials (#19) + +- Implement regex-based credential detection +- Add placeholders to avoid false positives +- Integrate with event system +- Add comprehensive unit tests (88% coverage) + +Closes CGAI-19 +``` + +**Después del merge**: +```bash +# Branch se elimina automáticamente en GitHub +# O manualmente: +git branch -d feature/CGAI-19-security-agent +git push origin --delete feature/CGAI-19-security-agent +``` + +--- + +## 🛠️ Configuración del Entorno + +### Requisitos Previos + +- Python 3.11+ +- Git +- Docker (opcional) +- VSCode o PyCharm + +### Instalación + +```bash +# 1. Fork y clonar +git clone https://github.com/YOUR_USERNAME/CodeGuard-Unal.git +cd CodeGuard-Unal/backend + +# 2. Agregar remote upstream +git remote add upstream https://github.com/YOUR_ORG/CodeGuard-Unal.git + +# 3. Entorno virtual +python3.11 -m venv venv +source venv/bin/activate # Windows: venv\Scripts\activate + +# 4. Instalar dependencias +pip install --upgrade pip +pip install -r requirements.txt +pip install -r requirements-dev.txt + +# 5. Pre-commit hooks +pip install pre-commit +pre-commit install + +# 6. Copiar .env +cp .env.example .env +``` + +### Pre-commit Hooks (Validación Automática) + +Los pre-commit hooks ejecutan validaciones automáticamente antes de cada commit. + +**Archivo**: `.pre-commit-config.yaml` + +```yaml +repos: + - repo: https://github.com/psf/black + rev: 23.9.1 + hooks: + - id: black + language_version: python3.11 + + - repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort + args: [--profile=black] + + - repo: https://github.com/PyCQA/pylint + rev: v3.0.0 + hooks: + - id: pylint + args: [--rcfile=.pylintrc, --fail-under=8.5] +``` + +--- + +## 🧪 Testing + +### Ejecutar Tests + +```bash +cd backend + +# Todos los tests +pytest tests/ -v + +# Solo tests unitarios +pytest tests/unit/ -v + +# Solo tests de integración +pytest tests/integration/ -v + +# Con cobertura detallada +pytest tests/ --cov=src --cov-report=term-missing + +# HTML report +pytest tests/ --cov=src --cov-report=html +open htmlcov/index.html +``` + +### Estructura de Tests + +``` +backend/tests/ +├── unit/ # Tests de componentes aislados +│ ├── test_base_agent.py +│ ├── test_security_agent.py +│ └── ... +├── integration/ # Tests de interacción entre componentes +│ ├── test_analysis_service.py +│ └── test_security_agent_integration.py +├── e2e/ # Tests end-to-end +│ └── test_complete_analysis.py +├── fixtures/ # Mock data +│ ├── mock_data.py +│ └── sample_code.py +└── conftest.py # Pytest fixtures +``` + +### Escribir Tests + +```python +import pytest +from src.agents.security_agent import SecurityAgent +from src.schemas.analysis import AnalysisContext + +class TestSecurityAgent: + """Test suite for SecurityAgent""" + + @pytest.fixture + def agent(self): + """Create agent instance""" + return SecurityAgent() + + def test_detect_eval(self, agent): + """Test detection of eval() function""" + code = "result = eval(user_input)" + context = AnalysisContext( + code_content=code, + filename="test.py" + ) + + findings = agent.analyze(context) + + assert len(findings) >= 1 + assert any(f.issue_type == "dangerous_function" for f in findings) +``` + +--- + +## 🐛 Reportar Bugs + +### Antes de Reportar + +1. Busca issues existentes duplicados +2. Reproduce el bug consistentemente +3. Recopila información: OS, Python version, logs + +### Template de Issue para Bugs + +```markdown +## 🐛 Descripción del Bug +Descripción clara y concisa del problema. + +## 🔄 Pasos para Reproducir +1. Cargar archivo con 'eval' +2. Llamar POST /api/v1/analyze +3. Observar que no se detecta eval + +## ✅ Comportamiento Esperado +El SecurityAgent debería detectar eval con severity=critical + +## ❌ Comportamiento Actual +El análisis retorna 0 findings + +## 📋 Contexto +- OS: Ubuntu 22.04 +- Python: 3.11.5 +- Branch: develop + +## 📝 Logs +\`\`\` +[ERROR] AST parsing failed for test.py +Traceback... +\`\`\` +``` + +--- + +## ✨ Sugerir Mejoras + +### Template de Feature Request + +```markdown +## ✨ Descripción +Agregar soporte para detección de SSRF (Server-Side Request Forgery) + +## 🎯 Problema que Resuelve +SSRF está en OWASP Top 10 y no está detectado actualmente + +## 💡 Solución Propuesta +- Detectar urllib/requests sin validación +- Identificar patrones como requests.get(user_input) +- Sugerir listas blancas de dominios + +## 🔄 Alternativas Consideradas +- Integrar Bandit con regla B310 +- Custom regex patterns +``` + +--- + +## 💬 Preguntas? + +- **Slack**: [#codeguard-dev](https://codeguard-unal.slack.com) +- **Email**: codeguard-ai@unal.edu.co +- **Office Hours**: Martes y Jueves 2-4 PM (COT) +- **Issues**: [GitHub Issues](https://github.com/YOUR_ORG/CodeGuard-Unal/issues) + +--- + +
+

Gracias por contribuir a CodeGuard AI ❤️

+

Juntos hacemos mejores desarrolladores y código más seguro

+
diff --git a/README.md b/README.md index d0f700f..f350717 100644 Binary files a/README.md and b/README.md differ diff --git a/backend/.dockerignore b/backend/.dockerignore index d81bd43..4b432d9 100644 --- a/backend/.dockerignore +++ b/backend/.dockerignore @@ -1,22 +1,55 @@ -__pycache__ -*.pyc -*.pyo -*.pyd +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so .Python env/ venv/ +.venv/ +ENV/ + +# Testing +.pytest_cache/ +.coverage +htmlcov/ +.tox/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo + +# OS +.DS_Store +Thumbs.db + +# Environment .env -.venv -*.egg-info/ -dist/ +.env.local + +# Logs +*.log +logs/ + +# Build build/ +dist/ +*.egg-info/ + +# Git .git/ .gitignore -.vscode/ -.idea/ + +# Documentation +docs/ *.md +!README.md + +# Tests tests/ -docs/ -.pytest_cache/ -.coverage -htmlcov/ + +# Alembic migrations (include only in production builds) +alembic/versions/*.py +!alembic/versions/__init__.py diff --git a/backend/.env.example b/backend/.env.example index 7b00ee9..ea2c467 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -1,3 +1,8 @@ +# ========================================== +# CodeGuard AI - Environment Variables +# Copy to .env and fill with your values +# ========================================== + # Application APP_NAME=CodeGuard AI APP_VERSION=1.0.0 @@ -9,20 +14,20 @@ API_HOST=0.0.0.0 API_PORT=8000 # Database (Supabase PostgreSQL) -DATABASE_URL=postgresql://user:password@localhost:5432/codeguard +DATABASE_URL=postgresql://codeguard:devpassword@localhost:5432/codeguard_dev SUPABASE_URL=https://your-project.supabase.co SUPABASE_KEY=your-supabase-anon-key # Redis Cache REDIS_URL=redis://localhost:6379/0 -REDIS_PASSWORD= +REDIS_PASSWORD=devpassword REDIS_TTL=86400 # Authentication (Clerk) CLERK_SECRET_KEY=sk_test_... CLERK_PUBLISHABLE_KEY=pk_test_... -# AI Services (Sprint 3) +# AI Services (Sprint 3 - Optional for Sprint 1) GOOGLE_AI_API_KEY=AIzaSy... GOOGLE_CLOUD_PROJECT=your-gcp-project VERTEX_AI_LOCATION=us-central1 diff --git a/backend/Dockerfile b/backend/Dockerfile index adb466b..723d1aa 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -1,22 +1,49 @@ +# ========================================== +# CodeGuard AI - Backend Dockerfile +# Python 3.11 + FastAPI +# ========================================== + FROM python:3.11-slim +# Metadata +LABEL maintainer="CodeGuard AI Team " +LABEL description="Multi-Agent Code Review System - Backend API" + +# Environment variables +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + PIP_NO_CACHE_DIR=1 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 + +# Set working directory WORKDIR /app # Install system dependencies -RUN apt-get update && apt-get install -y \ +RUN apt-get update && apt-get install -y --no-install-recommends \ gcc \ postgresql-client \ + curl \ && rm -rf /var/lib/apt/lists/* -# Copy requirements +# Copy requirements first (for layer caching) COPY requirements.txt . + +# Install Python dependencies RUN pip install --no-cache-dir -r requirements.txt -# Copy application +# Copy application code COPY . . +# Create non-root user +RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app +USER appuser + # Expose port EXPOSE 8000 +# Health check +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD curl -f http://localhost:8000/health || exit 1 + # Run application -CMD ["uvicorn", "src.core.main:app", "--host", "0.0.0.0", "--port", "8000"] +CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/backend/docker-compose.yml b/backend/docker-compose.yml index 46c3af8..a53b1d5 100644 --- a/backend/docker-compose.yml +++ b/backend/docker-compose.yml @@ -1,21 +1,53 @@ -version: '3.8' +version: '3.9' services: + # ========================================== + # Backend API (FastAPI) + # ========================================== backend: - build: . + build: + context: . + dockerfile: Dockerfile container_name: codeguard-backend ports: - "8000:8000" - env_file: - - .env + environment: + # Application + APP_NAME: "CodeGuard AI" + DEBUG: "True" + ENVIRONMENT: "development" + + # Database + DATABASE_URL: "postgresql://codeguard:devpassword@db:5432/codeguard_dev" + SUPABASE_URL: "${SUPABASE_URL:-https://your-project.supabase.co}" + SUPABASE_KEY: "${SUPABASE_KEY:-your-key-here}" + + # Redis + REDIS_URL: "redis://:devpassword@redis:6379/0" + + # Auth (Clerk) + CLERK_SECRET_KEY: "${CLERK_SECRET_KEY:-sk_test_placeholder}" + CLERK_PUBLISHABLE_KEY: "${CLERK_PUBLISHABLE_KEY:-pk_test_placeholder}" + + # AI (Sprint 3 - optional for now) + GOOGLE_AI_API_KEY: "${GOOGLE_AI_API_KEY:-}" + + # CORS + ALLOWED_ORIGINS: "http://localhost:3000,http://localhost:5173" volumes: - - ./src:/app/src - - ./tests:/app/tests + - .:/app depends_on: - - db - - redis - command: uvicorn src.core.main:app --host 0.0.0.0 --port 8000 --reload + db: + condition: service_healthy + redis: + condition: service_healthy + networks: + - codeguard-network + restart: unless-stopped + # ========================================== + # PostgreSQL Database + # ========================================== db: image: postgres:15-alpine container_name: codeguard-db @@ -23,20 +55,46 @@ services: POSTGRES_USER: codeguard POSTGRES_PASSWORD: devpassword POSTGRES_DB: codeguard_dev + PGDATA: /var/lib/postgresql/data/pgdata ports: - - "5432:5432" + - "5433:5432" volumes: - postgres_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U codeguard -d codeguard_dev"] + interval: 10s + timeout: 3s + retries: 5 + networks: + - codeguard-network + restart: unless-stopped + # ========================================== + # Redis Cache + # ========================================== redis: image: redis:7-alpine container_name: codeguard-redis + command: redis-server --appendonly yes --requirepass devpassword ports: - "6379:6379" - command: redis-server --appendonly yes volumes: - redis_data:/data + healthcheck: + test: ["CMD", "redis-cli", "-a", "devpassword", "ping"] + interval: 10s + timeout: 3s + retries: 5 + networks: + - codeguard-network + restart: unless-stopped volumes: postgres_data: + driver: local redis_data: + driver: local + +networks: + codeguard-network: + driver: bridge diff --git a/backend/fix_code_quality.sh b/backend/fix_code_quality.sh new file mode 100644 index 0000000..3b03e77 --- /dev/null +++ b/backend/fix_code_quality.sh @@ -0,0 +1,28 @@ +#!/bin/bash +set -e +set -o pipefail + +echo "🔧 Fixing code quality issues..." + +echo "📝 Running black..." +if ! black src/agents/ src/schemas/ tests/ --line-length=100; then + echo "❌ Black formatting failed" + exit 1 +fi + +echo "📦 Running isort..." +if ! isort src/agents/ src/schemas/ tests/ --profile=black; then + echo "❌ isort failed" + exit 1 +fi + +echo "🔍 Running pylint..." +if ! pylint src/agents/ src/schemas/ --fail-under=8.5; then + echo "❌ Pylint score below 8.5" + exit 1 +fi + +echo "✅ All quality checks passed!" +echo "" +echo "Now run tests:" +echo "pytest tests/unit/ -v --cov=src --cov-report=term-missing" \ No newline at end of file diff --git a/backend/main.py b/backend/main.py deleted file mode 100644 index eb672e6..0000000 --- a/backend/main.py +++ /dev/null @@ -1,15 +0,0 @@ -""" -CodeGuard AI - Backend Entry Point -Multi-Agent Code Review System -""" -import uvicorn -from src.config.settings import settings - -if __name__ == "__main__": - uvicorn.run( - "src.core.main:app", - host=settings.API_HOST, - port=settings.API_PORT, - reload=settings.DEBUG, - log_level=settings.LOG_LEVEL.lower() - ) diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 2dfb213..ab95532 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -1,14 +1,20 @@ [tool.black] line-length = 100 -target-version = ['py311'] include = '\.pyi?$' extend-exclude = ''' /( - \.eggs + # directories + \.eggs | \.git + | \.hg + | \.mypy_cache + | \.tox | \.venv + | _build + | buck-out | build | dist + | alembic/versions )/ ''' @@ -17,16 +23,15 @@ profile = "black" line_length = 100 multi_line_output = 3 include_trailing_comma = true +force_grid_wrap = 0 +use_parentheses = true +ensure_newline_before_comments = true +skip_gitignore = true +skip = ["alembic/versions"] -[tool.mypy] -python_version = "3.11" -warn_return_any = true -warn_unused_configs = true -disallow_untyped_defs = true -plugins = ["pydantic.mypy"] - -[tool.pylint.messages_control] -disable = "C0330, C0326" - -[tool.pylint.format] -max-line-length = "100" +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = "test_*.py" +python_classes = "Test*" +python_functions = "test_*" +addopts = "-v --cov=src --cov-report=html --cov-report=term-missing --cov-fail-under=75" diff --git a/backend/requirements-dev.txt b/backend/requirements-dev.txt index b0426c2..a7cda32 100644 --- a/backend/requirements-dev.txt +++ b/backend/requirements-dev.txt @@ -1,17 +1,15 @@ --r requirements.txt +# ========================================== +# CodeGuard AI - Development Dependencies +# ========================================== -# Development Tools -ipython==8.18.1 -ipdb==0.13.13 +# ===== DEVELOPMENT TOOLS ===== +black>=24.0.0 +isort>=5.13.0 +mypy>=1.8.0 -# Code Quality -pre-commit==3.5.0 -pylint==3.0.2 -flake8==6.1.0 -black==23.11.0 -isort==5.12.0 -mypy==1.7.1 - -# Documentation -mkdocs==1.5.3 -mkdocs-material==9.5.0 +# ===== TESTING (Sprint 1 Core) ===== +pytest>=8.0.0 +pytest-asyncio>=0.23.0 +pytest-cov>=4.1.0 +faker>=22.0.0 # Para datos de prueba +httpx>=0.27.0 # Para TestClient diff --git a/backend/requirements.txt b/backend/requirements.txt index 7e3a494..8d03844 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,55 +1,61 @@ -# Web Framework -fastapi==0.104.1 -uvicorn[standard]==0.24.0 -python-multipart==0.0.6 -websockets==12.0 - -# Database & ORM -sqlalchemy==2.0.23 -alembic==1.12.1 -psycopg2-binary==2.9.9 - -# Supabase Client -supabase==2.0.3 - -# Authentication -clerk-backend-api==1.0.0 -python-jose[cryptography]==3.3.0 -passlib[bcrypt]==1.7.4 - -# AI & MCP (Sprint 3) -google-generativeai==0.3.1 -google-cloud-aiplatform==1.38.0 -mcp==1.0.0 - -# Static Analysis Tools -bandit==1.7.5 -radon==6.0.1 -pylint==3.0.2 -flake8==6.1.0 - -# Cache -redis==5.0.1 -hiredis==2.2.3 - -# Utilities -pydantic==2.5.0 -pydantic-settings==2.1.0 -python-dotenv==1.0.0 -structlog==23.2.0 - -# PDF Generation -reportlab==4.0.7 -weasyprint==60.1 - -# Testing -pytest==7.4.3 -pytest-asyncio==0.21.1 -pytest-cov==4.1.0 -httpx==0.25.2 -faker==20.1.0 - -# Development -black==23.11.0 -isort==5.12.0 -mypy==1.7.1 +# ========================================== +# CodeGuard AI - Backend Dependencies +# ========================================== + +# ===== WEB FRAMEWORK ===== +fastapi>=0.109.0 # Última versión compatible con Pydantic 2.8+ +uvicorn[standard]>=0.27.0 +python-multipart>=0.0.6 + +# ===== DATABASE & ORM ===== +sqlalchemy>=2.0.25 +alembic>=1.13.0 +psycopg2-binary>=2.9.9 + +# ===== AUTHENTICATION ===== +# Clerk (requiere Pydantic 2.8+, httpx 0.27+) +clerk-backend-api>=1.0.0 +python-jose[cryptography]>=3.3.0 + +# ===== SUPABASE (comentado - incompatible con Clerk httpx) ===== +# Usar PostgreSQL directo con SQLAlchemy en su lugar +# supabase>=2.3.0 + +# ===== STATIC ANALYSIS (Sprint 1 Core) ===== +bandit>=1.7.5 +radon>=6.0.1 +pylint>=3.0.3 +flake8>=7.0.0 + +# ===== CACHE ===== +redis>=5.0.1 +hiredis>=2.3.0 + +# ===== UTILITIES ===== +pydantic>=2.8.0 # Compatible con Clerk +pydantic-settings>=2.2.0 +python-dotenv>=1.0.0 +requests>=2.31.0 +httpx>=0.27.0 # Compatible con Clerk + +# ===== AI SERVICES (Sprint 3) ===== +google-generativeai>=0.3.2 # Gemini API +# google-cloud-aiplatform>=1.40.0 # Vertex AI (opcional) + +# ===== DEVELOPMENT TOOLS ===== (Moved to requirements-dev.txt) +# black>=24.0.0 +# isort>=5.13.0 +# mypy>=1.8.0 + +# ===== TESTING (Sprint 1 Core) ===== (Moved to requirements-dev.txt) +# pytest>=8.0.0 +# pytest-asyncio>=0.23.0 +# pytest-cov>=4.1.0 +# faker>=22.0.0 # Para datos de prueba + +# ===== PDF GENERATION (Sprint 2) ===== +reportlab>=4.0.9 + +# ===== WEBSOCKETS (Sprint 2) ===== +websockets>=12.0 +email-validator>=2.0.0 diff --git a/backend/src/agents/base_agent.py b/backend/src/agents/base_agent.py index e69de29..fc678d9 100644 --- a/backend/src/agents/base_agent.py +++ b/backend/src/agents/base_agent.py @@ -0,0 +1,199 @@ +""" +Clase base abstracta para todos los agentes de análisis +""" + +import logging +from abc import ABC, abstractmethod +from datetime import datetime +from typing import TYPE_CHECKING, List, Optional + +from src.schemas.analysis import AnalysisContext +from src.schemas.finding import Finding + +if TYPE_CHECKING: + from src.core.events.event_bus import EventBus + + +class BaseAgent(ABC): + """ + Clase base abstracta para todos los agentes de análisis. + + Todos los agentes especializados deben heredar de esta clase + e implementar el método analyze(). + + Attributes: + name: Nombre identificador del agente + version: Versión del agente + category: Categoría (security, quality, performance, style) + enabled: Estado de habilitación del agente + event_bus: Sistema de eventos para comunicación (opcional) + + Example: + class SecurityAgent(BaseAgent): + def __init__(self): + super().__init__( + name="SecurityAgent", + version="1.0.0", + category="security" + ) + + def analyze(self, context: AnalysisContext) -> List[Finding]: + # Implementación específica + pass + """ + + def __init__( + self, + name: str, + version: str = "1.0.0", + category: str = "general", + enabled: bool = True, + event_bus: Optional["EventBus"] = None, + ) -> None: + # pylint: disable=too-many-arguments,too-many-positional-arguments + """ + Inicializa el agente base. + + Args: + name: Identificador único del agente + version: Versión del agente (semver) + category: Categoría de análisis + enabled: Si el agente está activo + event_bus: EventBus para emitir eventos (opcional) + + Raises: + ValueError: Si name está vacío + """ + if not name or not name.strip(): + raise ValueError("Agent name cannot be empty") + + self.name = name + self.version = version + self.category = category + self.enabled = enabled + self.event_bus = event_bus + self.logger = logging.getLogger(f"agents.{name}") + + self.logger.info("[%s] Agent initialized - version %s", self.name, self.version) + + @abstractmethod + def analyze(self, context: AnalysisContext) -> List[Finding]: + """ + Método abstracto que debe ser implementado por todas las clases hijas. + + Analiza el código en el contexto y retorna una lista de hallazgos. + + Args: + context: Contexto de análisis con código y metadata + + Returns: + Lista de Finding encontrados durante el análisis + + Raises: + NotImplementedError: Si no es implementado por la clase hija + """ + + def _emit_agent_started(self, context: AnalysisContext) -> None: + """Emite evento AGENT_STARTED.""" + if self.event_bus: + self.event_bus.publish( + { + "type": "AGENT_STARTED", + "agent_name": self.name, + "analysis_id": str(context.analysis_id), + "timestamp": datetime.utcnow().isoformat(), + } + ) + self.log_info("Analysis started") + + def _emit_agent_completed(self, context: AnalysisContext, findings: List[Finding]) -> None: + """Emite evento AGENT_COMPLETED.""" + if self.event_bus: + self.event_bus.publish( + { + "type": "AGENT_COMPLETED", + "agent_name": self.name, + "analysis_id": str(context.analysis_id), + "findings_count": len(findings), + "timestamp": datetime.utcnow().isoformat(), + } + ) + self.log_info(f"Analysis completed - {len(findings)} findings") + + def _emit_agent_failed(self, context: AnalysisContext, error: Exception) -> None: + """Emite evento AGENT_FAILED.""" + if self.event_bus: + self.event_bus.publish( + { + "type": "AGENT_FAILED", + "agent_name": self.name, + "analysis_id": str(context.analysis_id), + "error": str(error), + "timestamp": datetime.utcnow().isoformat(), + } + ) + self.log_error(f"Analysis failed: {error}") + + def is_enabled(self) -> bool: + """ + Verifica si el agente está habilitado. + + Returns: + True si el agente está habilitado, False en caso contrario + """ + return self.enabled + + def enable(self) -> None: + """Habilita el agente.""" + self.enabled = True + self.logger.info("[%s] Agent enabled", self.name) + + def disable(self) -> None: + """Deshabilita el agente.""" + self.enabled = False + self.logger.warning("[%s] Agent disabled", self.name) + + def get_info(self) -> dict: + """ + Retorna información metadata del agente. + + Returns: + Diccionario con información del agente + """ + return { + "name": self.name, + "version": self.version, + "category": self.category, + "enabled": self.enabled, + } + + def log_info(self, message: str) -> None: + """Log a message at INFO level.""" + self.logger.info("[%s] %s", self.name, message) + + def log_warning(self, message: str) -> None: + """Log a nivel WARNING.""" + self.logger.warning("[%s] %s", self.name, message) + + def log_error(self, message: str) -> None: + """Log a nivel ERROR.""" + self.logger.error("[%s] %s", self.name, message) + + def log_debug(self, message: str) -> None: + """Log a nivel DEBUG.""" + self.logger.debug("[%s] %s", self.name, message) + + def __repr__(self) -> str: + """Representación string del agente.""" + return ( + f"<{self.__class__.__name__}(" + f"name={self.name}, " + f"version={self.version}, " + f"category={self.category}, " + f"enabled={self.enabled})>" + ) + + def __str__(self) -> str: + """String amigable del agente.""" + status = "enabled" if self.enabled else "disabled" + return f"{self.name} v{self.version} ({self.category}) - {status}" diff --git a/backend/src/agents/security_agent.py b/backend/src/agents/security_agent.py index e69de29..a644e8d 100644 --- a/backend/src/agents/security_agent.py +++ b/backend/src/agents/security_agent.py @@ -0,0 +1,648 @@ +""" +SecurityAgent - Agente especializado en detección de vulnerabilidades de seguridad. + +Este agente analiza código Python en busca de problemas de seguridad comunes incluyendo: +- Funciones peligrosas (eval, exec, pickle, etc.) +- Vulnerabilidades de inyección SQL +- Credenciales hardcodeadas (contraseñas, API keys, tokens) +- Algoritmos criptográficos débiles (MD5, SHA1, DES) +""" + +import ast +import re +from typing import Dict, List, Optional, Set + +from src.agents.base_agent import BaseAgent +from src.schemas.analysis import AnalysisContext +from src.schemas.finding import Finding, Severity + + +class SecurityAgent(BaseAgent): + """ + Agente especializado en detectar vulnerabilidades de seguridad en código Python. + + Analiza el código usando múltiples estrategias de detección: + 1. Análisis AST (Abstract Syntax Tree) para funciones peligrosas + 2. Coincidencia de patrones regex para inyección SQL + 3. Regex y detección de placeholders para credenciales hardcodeadas + 4. Análisis AST para algoritmos criptográficos débiles + + Atributos: + DANGEROUS_FUNCTIONS: Conjunto de nombres de funciones consideradas peligrosas + SQL_INJECTION_PATTERNS: Patrones regex para detección de inyección SQL + CREDENTIAL_PATTERNS: Patrones regex para detección de credenciales + WEAK_CRYPTO_ALGORITHMS: Conjunto de nombres de algoritmos criptográficos débiles + + Ejemplo: + >>> agent = SecurityAgent() + >>> context = AnalysisContext( + ... code_content="result = eval(user_input)", + ... filename="vulnerable.py" + ... ) + >>> findings = agent.analyze(context) + >>> assert len(findings) >= 1 + >>> assert findings[0].severity == Severity.CRITICAL + """ + + # Funciones peligrosas que permiten ejecución arbitraria de código + DANGEROUS_FUNCTIONS: Set[str] = { + "eval", + "exec", + "compile", + "__import__", + "execfile", # Python 2 + } + + # Funciones peligrosas de pickle/serialización + PICKLE_FUNCTIONS: Set[str] = { + "pickle.loads", + "pickle.load", + "cPickle.loads", + "cPickle.load", + "yaml.load", # Sin argumento Loader + "marshal.loads", + } + + # Patrones de inyección SQL (regex) - CORREGIDOS + SQL_INJECTION_PATTERNS: List[str] = [ + r'execute\s*\(\s*["\'].*\+', # Concatenación con + + r'execute\s*\(\s*f["\']', # f-strings en execute directo + r'execute\s*\(\s*["\'].*%s', # %s formatting + r'execute\s*\(\s*["\'].*\.format', # .format() en execute + r'\.execute\s*\(\s*["\'].*\+\s*\w', # execute con concatenación y variable + ] + + # Patrones de credenciales (regex) + CREDENTIAL_PATTERNS: List[dict] = [ + { + "pattern": r'password\s*=\s*["\'][^"\']{8,}["\']', + "name": "password", + "severity": Severity.CRITICAL, + }, + { + "pattern": r'api[_-]?key\s*=\s*["\'][^"\']{10,}["\']', + "name": "api_key", + "severity": Severity.CRITICAL, + }, + { + "pattern": r'secret[_-]?key\s*=\s*["\'][^"\']{10,}["\']', + "name": "secret_key", + "severity": Severity.CRITICAL, + }, + { + "pattern": r'token\s*=\s*["\'][^"\']{10,}["\']', + "name": "token", + "severity": Severity.HIGH, + }, + { + "pattern": r'access[_-]?key\s*=\s*["\'][^"\']{10,}["\']', + "name": "access_key", + "severity": Severity.HIGH, + }, + ] + + # Placeholders a ignorar (no son credenciales reales) + PLACEHOLDER_PATTERNS: List[str] = [ + r"YOUR_", + r"REPLACE_", + r"CHANGE_", + r"TODO", + r"FIXME", + r"example", + r"test", + r"dummy", + r"<.*>", + r"\*+", + r"xxx+", + ] + + # Algoritmos criptográficos débiles + WEAK_CRYPTO_ALGORITHMS: Set[str] = { + "md5", + "sha1", + "DES", + "RC4", + "Blowfish", + } + + SQL_INJECTION_MESSAGE = ( + "Posible vulnerabilidad de inyección SQL detectada - " + "entrada de usuario concatenada o formateada en consulta" + ) + SQL_INJECTION_SUGGESTION = ( + "Use parameterized queries or an ORM: " + "cursor.execute('SELECT * FROM users WHERE id=?', (user_id,))" + ) + + def __init__(self): + """Inicializa SecurityAgent con reglas de seguridad predefinidas.""" + super().__init__(name="SecurityAgent", version="1.0.0", category="security", enabled=True) + self.logger.info("SecurityAgent inicializado con 4 módulos de detección") + + def analyze(self, context: AnalysisContext) -> List[Finding]: + """ + Analiza código Python en busca de vulnerabilidades de seguridad. + + Ejecuta 4 tipos de análisis de seguridad: + 1. Detección de funciones peligrosas (eval, exec, etc.) + 2. Detección de patrones de inyección SQL + 3. Detección de credenciales hardcodeadas + 4. Detección de criptografía débil + + Args: + context: Contexto de análisis que contiene el código y metadata + + Returns: + Lista de hallazgos de seguridad, ordenados por severidad (CRITICAL primero) + + Raises: + SyntaxError: Si el código tiene sintaxis Python inválida (se registra, no se lanza) + + Ejemplo: + >>> agent = SecurityAgent() + >>> context = AnalysisContext( + ... code_content="password = 'MySecret123'", + ... filename="config.py" + ... ) + >>> findings = agent.analyze(context) + >>> assert any(f.issue_type == "hardcoded_credentials" for f in findings) + """ + self.log_info(f"Iniciando análisis de seguridad para {context.filename}") + findings: List[Finding] = [] + + try: + # Módulo 1: Detectar funciones peligrosas + dangerous_findings = self._detect_dangerous_functions(context) + findings.extend(dangerous_findings) + self.log_debug(f"Funciones peligrosas: {len(dangerous_findings)} hallazgos") + + # Módulo 2: Detectar patrones de inyección SQL (regex + AST) + sql_findings = self._detect_sql_injection(context) + findings.extend(sql_findings) + self.log_debug(f"Inyección SQL: {len(sql_findings)} hallazgos") + + # Módulo 3: Detectar credenciales hardcodeadas + credential_findings = self._detect_hardcoded_credentials(context) + findings.extend(credential_findings) + self.log_debug(f"Credenciales hardcodeadas: {len(credential_findings)} hallazgos") + + # Módulo 4: Detectar criptografía débil + crypto_findings = self._detect_weak_crypto(context) + findings.extend(crypto_findings) + self.log_debug(f"Criptografía débil: {len(crypto_findings)} hallazgos") + + except SyntaxError as e: + self.log_error( + f"Error de sintaxis en {context.filename}: {e}. " + "Algunos módulos de análisis pueden tener resultados incompletos." + ) + # Continuar con hallazgos de módulos que no necesitan análisis AST + + # Ordenar hallazgos por severidad (CRITICAL primero) + findings.sort( + key=lambda f: (["critical", "high", "medium", "low", "info"].index(f.severity.value)) + ) + + self.log_info( + f"Análisis de seguridad completado: {len(findings)} hallazgos " + f"({sum(1 for f in findings if f.is_critical)} críticos)" + ) + + return findings + + def _detect_dangerous_functions(self, context: AnalysisContext) -> List[Finding]: + """ + Detecta funciones peligrosas como eval(), exec() usando análisis AST. + + Args: + context: Contexto de análisis con el código a analizar + + Returns: + Lista de hallazgos para uso de funciones peligrosas + """ + findings: List[Finding] = [] + + try: + tree = ast.parse(context.code_content) + + for node in ast.walk(tree): + if isinstance(node, ast.Call): + func_name = self._get_function_name(node) + + # Verificar funciones peligrosas directas + if func_name in self.DANGEROUS_FUNCTIONS: + finding = Finding( + severity=Severity.CRITICAL, + issue_type="dangerous_function", + message=( + f"Uso de {func_name}() detectado - " + "permite ejecución arbitraria de código" + ), + line_number=node.lineno, + code_snippet=self._get_code_snippet(context, node.lineno), + suggestion=self._get_dangerous_function_suggestion(func_name), + agent_name=self.name, + rule_id=f"SEC001_{func_name.upper()}", + ) + findings.append(finding) + + # Verificar funciones de pickle/serialización + elif func_name in self.PICKLE_FUNCTIONS: + finding = Finding( + severity=Severity.HIGH, + issue_type="unsafe_deserialization", + message=( + f"Uso de {func_name} detectado - " + "puede ejecutar código arbitrario durante " + "deserialización" + ), + line_number=node.lineno, + code_snippet=self._get_code_snippet(context, node.lineno), + suggestion=( + "Use json.loads() for data deserialization or " + "validate pickle sources" + ), + agent_name=self.name, + rule_id="SEC001_PICKLE", + ) + findings.append(finding) + + except SyntaxError: + # El código fuente puede estar incompleto o contener errores de sintaxis. + # Ignoramos el error porque no se puede analizar AST en código inválido. + pass + + return findings + + def _detect_sql_injection(self, context: AnalysisContext) -> List[Finding]: + """ + Detecta vulnerabilidades de inyección SQL usando patrones regex mejorados. + + Detecta múltiples patrones comunes de SQL injection: + - Concatenación de strings con + + - Formateo con %s + - F-strings con {} + - .format() en queries + - Palabras clave SQL con variables + + Args: + context: Contexto de análisis con el código a analizar + + Returns: + Lista de hallazgos para vulnerabilidades de inyección SQL + """ + findings: List[Finding] = [] + found_sql_lines: Set[int] = set() + + findings.extend(self._detect_sql_injection_patterns(context, found_sql_lines)) + findings.extend(self._detect_sql_injection_ast(context, found_sql_lines)) + return findings + + def _detect_sql_injection_patterns( + self, context: AnalysisContext, found_sql_lines: Set[int] + ) -> List[Finding]: + """Analiza línea por línea usando regex para detectar SQL injection directa.""" + findings: List[Finding] = [] + lines = context.code_content.splitlines() + + for line_num, line in enumerate(lines, start=1): + stripped = line.strip() + if not stripped or stripped.startswith("#") or line_num in found_sql_lines: + continue + + for pattern in self.SQL_INJECTION_PATTERNS: + if re.search(pattern, line, re.IGNORECASE | re.MULTILINE): + findings.append( + Finding( + severity=Severity.HIGH, + issue_type="sql_injection", + message=self.SQL_INJECTION_MESSAGE, + line_number=line_num, + code_snippet=stripped, + suggestion=self.SQL_INJECTION_SUGGESTION, + agent_name=self.name, + rule_id="SEC002_SQL_INJECTION", + ) + ) + found_sql_lines.add(line_num) + break + + return findings + + def _detect_sql_injection_ast( + self, context: AnalysisContext, found_sql_lines: Set[int] + ) -> List[Finding]: + """Analiza el AST para detectar queries construidas antes de ejecutar.""" + findings: List[Finding] = [] + suspicious_vars = self._collect_suspicious_query_assignments(context) + if not suspicious_vars: + return findings + + execute_calls = self._find_execute_calls(context) + for line_num, argument in execute_calls: + if line_num not in found_sql_lines and self._is_suspicious_execute_arg( + argument, suspicious_vars + ): + findings.append( + Finding( + severity=Severity.HIGH, + issue_type="sql_injection", + message=self.SQL_INJECTION_MESSAGE, + line_number=line_num, + code_snippet=self._get_code_snippet(context, line_num), + suggestion=self.SQL_INJECTION_SUGGESTION, + agent_name=self.name, + rule_id="SEC002_SQL_INJECTION", + ) + ) + found_sql_lines.add(line_num) + + return findings + + @staticmethod + def _collect_suspicious_query_assignments( + context: AnalysisContext, + ) -> Dict[str, str]: + """Construye un mapa de variables que contienen posibles queries inseguras.""" + suspicious_vars: Dict[str, str] = {} + try: + tree = ast.parse(context.code_content) + except SyntaxError: + return suspicious_vars + + for node in ast.walk(tree): + if isinstance(node, ast.Assign) and node.targets: + target = node.targets[0] + if isinstance(target, ast.Name): + assignment_type = SecurityAgent._classify_sql_assignment(node.value) + if assignment_type: + suspicious_vars[target.id] = assignment_type + return suspicious_vars + + @staticmethod + def _find_execute_calls(context: AnalysisContext) -> List[tuple[int, ast.AST]]: + """Obtiene las llamadas a execute() con su línea y primer argumento.""" + execute_calls: List[tuple[int, ast.AST]] = [] + try: + tree = ast.parse(context.code_content) + except SyntaxError: + return execute_calls + + for node in ast.walk(tree): + if ( + isinstance(node, ast.Call) + and isinstance(node.func, ast.Attribute) + and node.func.attr == "execute" + and node.args + ): + line_num = getattr(node, "lineno", 1) + execute_calls.append((line_num, node.args[0])) + return execute_calls + + @staticmethod + def _classify_sql_assignment(value: ast.AST) -> Optional[str]: + """Clasifica asignaciones sospechosas de queries.""" + if isinstance(value, ast.JoinedStr): + return "fstring" + if isinstance(value, ast.BinOp) and isinstance(value.op, ast.Add): + return "concat" + if isinstance(value, ast.BinOp) and isinstance(value.op, ast.Mod): + return "mod" + if ( + isinstance(value, ast.Call) + and isinstance(value.func, ast.Attribute) + and value.func.attr == "format" + ): + return "format" + return None + + @staticmethod + def _is_suspicious_execute_arg(arg: ast.AST, suspicious_vars: Dict[str, str]) -> bool: + """Determina si el argumento pasado a execute es potencialmente inseguro.""" + if isinstance(arg, ast.JoinedStr): + return True + if isinstance(arg, ast.BinOp) and isinstance(arg.op, (ast.Add, ast.Mod)): + return True + if ( + isinstance(arg, ast.Call) + and isinstance(arg.func, ast.Attribute) + and arg.func.attr == "format" + ): + return True + if isinstance(arg, ast.Name) and arg.id in suspicious_vars: + return True + return False + + def _detect_hardcoded_credentials(self, context: AnalysisContext) -> List[Finding]: + """ + Detecta credenciales hardcodeadas usando patrones regex y detección de placeholders. + + Busca patrones comunes como: + - password = "valor" + - api_key = "valor" + - secret_key = "valor" + - token = "valor" + + Filtra falsos positivos ignorando placeholders y valores cortos. + + Args: + context: Contexto de análisis con el código a analizar + + Returns: + Lista de hallazgos para credenciales hardcodeadas + """ + findings: List[Finding] = [] + lines = context.code_content.splitlines() + + for line_num, line in enumerate(lines, start=1): + # Saltar comentarios y líneas vacías + stripped = line.strip() + if not stripped or stripped.startswith("#"): + continue + + for cred_config in self.CREDENTIAL_PATTERNS: + pattern = cred_config["pattern"] + cred_name = cred_config["name"] + severity = cred_config["severity"] + + match = re.search(pattern, line, re.IGNORECASE) + if match: + value = match.group(0).split("=")[1].strip().strip("\"'") + if self._is_placeholder(value) or len(value) < 8: + continue + + env_var = cred_name.upper() + finding = Finding( + severity=severity, + issue_type="hardcoded_credentials", + message=( + f"Hardcoded {cred_name} detected - secrets " + "should not be in source code" + ), + line_number=line_num, + code_snippet=line.strip(), + suggestion=( + f"Use environment variables: {env_var} = " f"os.getenv('{env_var}')" + ), + agent_name=self.name, + rule_id=f"SEC003_{env_var}", + ) + findings.append(finding) + break # Solo un hallazgo por línea + + return findings + + def _detect_weak_crypto(self, context: AnalysisContext) -> List[Finding]: + """ + Detecta uso de algoritmos criptográficos débiles. + + Busca uso de: + - hashlib.md5() + - hashlib.sha1() + - Crypto.Cipher.DES + - RC4 + - Blowfish + + Args: + context: Contexto de análisis con el código a analizar + + Returns: + Lista de hallazgos para criptografía débil + """ + findings: List[Finding] = [] + + try: + tree = ast.parse(context.code_content) + + for node in ast.walk(tree): + if isinstance(node, ast.Call): + func_name = self._get_function_name(node) + if not func_name: + continue + + lower_name = func_name.lower() + + # Verificar funciones débiles de hash (md5 o sha1 en cualquiera de sus formas) + if "md5" in lower_name or "sha1" in lower_name: + algo = "MD5" if "md5" in lower_name else "SHA1" + finding = Finding( + severity=Severity.MEDIUM, + issue_type="weak_cryptography", + message=f"Uso de algoritmo de hash débil {algo} detectado", + line_number=node.lineno, + code_snippet=self._get_code_snippet(context, node.lineno), + suggestion="Usa SHA-256 o superior: hashlib.sha256()", + agent_name=self.name, + rule_id=f"SEC004_{algo}", + ) + findings.append(finding) + continue + + # Verificar algoritmos débiles de encriptación en librería Crypto + if any(weak in func_name for weak in ["DES", "RC4", "Blowfish"]): + finding = Finding( + severity=Severity.HIGH, + issue_type="weak_cryptography", + message=( + "Uso de algoritmo de encriptación débil " f"detectado: {func_name}" + ), + line_number=node.lineno, + code_snippet=self._get_code_snippet(context, node.lineno), + suggestion="Usa AES-256 con Crypto.Cipher.AES", + agent_name=self.name, + rule_id="SEC004_WEAK_ENCRYPTION", + ) + findings.append(finding) + + except SyntaxError: + # El código fuente puede estar incompleto o contener errores de sintaxis. + # Ignoramos el error porque no se puede analizar criptografía en código inválido. + pass + + return findings + + def _get_function_name(self, node: ast.Call) -> str: + """ + Extrae el nombre de la función de un nodo Call del AST. + + Maneja tanto llamadas simples (func()) como llamadas de atributo (module.func()). + + Args: + node: Nodo Call del AST + + Returns: + Nombre de la función como string (ej: "eval" o "hashlib.md5") + """ + if isinstance(node.func, ast.Name): + return node.func.id + if isinstance(node.func, ast.Attribute): + if isinstance(node.func.value, ast.Name): + return f"{node.func.value.id}.{node.func.attr}" + return node.func.attr + return "" + + def _get_code_snippet( + self, context: AnalysisContext, line_number: int, context_lines: int = 0 + ) -> str: + """ + Extrae fragmento de código alrededor de la línea especificada. + + Args: + context: Contexto de análisis con el código + line_number: Número de línea (1-based) a extraer + context_lines: Número de líneas antes/después a incluir + + Returns: + Fragmento de código como string + """ + lines = context.code_content.splitlines() + + if 1 <= line_number <= len(lines): + start = max(0, line_number - 1 - context_lines) + end = min(len(lines), line_number + context_lines) + snippet_lines = lines[start:end] + return "\n".join(snippet_lines) + + return "" + + def _get_dangerous_function_suggestion(self, func_name: str) -> str: + """ + Obtiene sugerencia específica para el uso de función peligrosa. + + Args: + func_name: Nombre de la función peligrosa + + Returns: + String con sugerencia de alternativa segura + """ + suggestions = { + "eval": "Use ast.literal_eval() for safe evaluation of literals", + "exec": "Avoid exec() or validate input strictly with whitelisting", + "compile": "Avoid compile() or validate source code strictly", + "__import__": "Use importlib.import_module() with validation", + "execfile": "Use with open() and exec() with strict validation (Python 2 only)", + } + return suggestions.get(func_name, f"Avoid using {func_name}() or validate input strictly") + + def _is_placeholder(self, value: str) -> bool: + """ + Verifica si un valor de credencial es un placeholder (no un secreto real). + + Ignora valores que contienen patrones comunes de placeholders como: + - YOUR_, REPLACE_, CHANGE_ + - TODO, FIXME + - example, test, dummy + + Args: + value: Valor de credencial a verificar + + Returns: + True si el valor es un placeholder, False en caso contrario + """ + value_lower = value.lower() + + for pattern in self.PLACEHOLDER_PATTERNS: + if re.search(pattern, value_lower, re.IGNORECASE): + return True + + return False diff --git a/backend/src/core/database.py b/backend/src/core/database.py index e69de29..99b1b09 100644 --- a/backend/src/core/database.py +++ b/backend/src/core/database.py @@ -0,0 +1,19 @@ +""" +Database configuration for CodeGuard AI +""" + +import os + +from dotenv import load_dotenv +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +load_dotenv() + +DATABASE_URL = os.getenv( + "DATABASE_URL", + "postgresql://postgres:postgres@localhost:5432/codeguard", +) + +engine = create_engine(DATABASE_URL, pool_pre_ping=True) +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) diff --git a/backend/src/core/dependencies/auth.py b/backend/src/core/dependencies/auth.py index e69de29..c944bde 100644 --- a/backend/src/core/dependencies/auth.py +++ b/backend/src/core/dependencies/auth.py @@ -0,0 +1,57 @@ +""" +Dependencia de autenticación. + +Provee OAuth2PasswordBearer para Swagger UI y autenticación opcional en desarrollo. +""" + +import os + +from fastapi import Depends, HTTPException +from fastapi.security import OAuth2PasswordBearer + +from src.schemas.user import Role, User + +# OAuth2 scheme para Swagger UI - muestra botón "Authorize" +oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/v1/auth/token", auto_error=False) + + +async def get_current_user(token: str = Depends(oauth2_scheme)) -> User: + """ + Obtiene el usuario actual basado en el token. + + En desarrollo: retorna usuario mock. + En producción: valida token JWT (a implementar en Sprint 2). + + Args: + token: Token JWT del header Authorization. + + Returns: + User: Usuario autenticado. + + Raises: + HTTPException: 401 si el token es inválido en producción. + """ + environment = os.getenv("ENVIRONMENT", "development") + + if environment == "production": + # En producción, validar token real + if not token: + raise HTTPException( + status_code=401, + detail="Token de autenticación requerido", + headers={"WWW-Authenticate": "Bearer"}, + ) + # TODO: Implementar validación real con Clerk en Sprint 2 + raise HTTPException( + status_code=401, + detail="Token inválido", + headers={"WWW-Authenticate": "Bearer"}, + ) + + # En desarrollo, retornar usuario mock + return User( + id="user_123", + email="dev@codeguard.ai", + name="Developer User", + role=Role.DEVELOPER, + ) diff --git a/backend/src/core/dependencies/get_db.py b/backend/src/core/dependencies/get_db.py index e69de29..8f6f97c 100644 --- a/backend/src/core/dependencies/get_db.py +++ b/backend/src/core/dependencies/get_db.py @@ -0,0 +1,29 @@ +""" +Dependencia para obtener sesión de base de datos. +""" + +from typing import Generator + +from sqlalchemy.orm import Session + +from src.core.database import SessionLocal + + +def get_db() -> Generator[Session, None, None]: + """ + Crea una sesión de base de datos por request y la cierra al finalizar. + + Yields: + Session: Sesión de SQLAlchemy para operaciones de base de datos. + + Example: + @router.post("/items") + def create_item(db: Session = Depends(get_db)): + # usar db aquí + pass + """ + db = SessionLocal() + try: + yield db + finally: + db.close() diff --git a/backend/src/core/events/analysis_events.py b/backend/src/core/events/analysis_events.py index e69de29..3d422e5 100644 --- a/backend/src/core/events/analysis_events.py +++ b/backend/src/core/events/analysis_events.py @@ -0,0 +1,21 @@ +""" +Definición de eventos del dominio de análisis. +Ubicación: Core/Events (Shared Kernel). +""" + +from enum import Enum + + +class AnalysisEventType(str, Enum): + """ + Enumeración de tipos de eventos para el ciclo de vida del análisis. + """ + + ANALYSIS_STARTED = "analysis_started" + ANALYSIS_COMPLETED = "analysis_completed" + ANALYSIS_FAILED = "analysis_failed" + + AGENT_STARTED = "agent_started" + AGENT_COMPLETED = "agent_completed" + AGENT_FAILED = "agent_failed" + AGENT_TIMEOUT = "agent_timeout" diff --git a/backend/src/core/events/event_bus.py b/backend/src/core/events/event_bus.py index e69de29..d45f8de 100644 --- a/backend/src/core/events/event_bus.py +++ b/backend/src/core/events/event_bus.py @@ -0,0 +1,84 @@ +""" +Event Bus para comunicación desacoplada entre componentes. +""" + +import logging +from enum import Enum +from typing import Any, Dict, List, Optional + +from src.core.events.observers import EventObserver + +logger = logging.getLogger(__name__) + + +class EventType(str, Enum): + """Tipos de eventos estándar del sistema.""" + + ANALYSIS_STARTED = "analysis_started" + ANALYSIS_COMPLETED = "analysis_completed" + ANALYSIS_FAILED = "analysis_failed" + AGENT_STARTED = "agent_started" + AGENT_COMPLETED = "agent_completed" + AGENT_FAILED = "agent_failed" + + +class EventBus: + """ + Bus de eventos centralizado para desacoplar componentes. + + Permite que el AnalysisService notifique progreso sin conocer + detalles de WebSockets o persistencia. + """ + + _instance: Optional["EventBus"] = None + _observers: List[EventObserver] = [] + + def __new__(cls) -> "EventBus": + """Implementación del patrón Singleton.""" + if cls._instance is None: + cls._instance = super().__new__(cls) + cls._observers = [] + return cls._instance + + def subscribe(self, observer: EventObserver) -> None: + """ + Registra un observer para recibir eventos. + + Args: + observer: Observer que implementa EventObserver. + """ + if observer not in self._observers: + self._observers.append(observer) + + def unsubscribe(self, observer: EventObserver) -> None: + """ + Elimina un observer del bus. + + Args: + observer: Observer a eliminar. + """ + if observer in self._observers: + self._observers.remove(observer) + + def publish(self, event_type: str, data: Dict[str, Any]) -> None: + """ + Publica un evento a todos los observers suscritos. + + Args: + event_type: Tipo de evento (str o EventType). + data: Datos del evento. + """ + # Convertir Enum a string si es necesario + if isinstance(event_type, Enum): + event_type = event_type.value + + for observer in self._observers: + try: + observer.on_event(event_type, data) + except Exception as e: + # Log error pero no interrumpir otros observers + logger.error(f"Error in observer {observer}: {e}") + + def clear(self) -> None: + """Elimina todos los observers.""" + self._observers.clear() diff --git a/backend/src/core/events/observers.py b/backend/src/core/events/observers.py index e69de29..5f538be 100644 --- a/backend/src/core/events/observers.py +++ b/backend/src/core/events/observers.py @@ -0,0 +1,22 @@ +""" +Definición de interfaces para el patrón Observer. +""" + +from abc import ABC, abstractmethod +from typing import Any, Dict + + +class EventObserver(ABC): + """ + Interfaz base para cualquier observador que desee suscribirse al EventBus. + """ + + @abstractmethod + async def on_event(self, event: Dict[str, Any]) -> None: + """ + Método invocado cuando ocurre un evento. + + Args: + event: Diccionario con los datos del evento (tipo, timestamp, payload). + """ + pass diff --git a/backend/src/main.py b/backend/src/main.py index e69de29..cecca13 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -0,0 +1,45 @@ +""" +CodeGuard AI - Backend Entry Point +FastAPI Application +""" + +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware + +from src.routers.analysis import router as analysis_router + +# Create FastAPI app +app = FastAPI( + title="CodeGuard AI", + description="Multi-Agent Code Review System", + version="1.0.0", + docs_url="/docs", + redoc_url="/redoc", +) + +# CORS +app.add_middleware( + CORSMiddleware, + allow_origins=["http://localhost:3000", "http://localhost:5173"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +app.include_router(analysis_router) + + +@app.get("/health") +async def health_check(): + """Health check endpoint""" + return {"status": "healthy", "version": "1.0.0", "service": "CodeGuard AI Backend"} + + +@app.get("/") +async def root(): + """Root endpoint""" + return { + "message": "CodeGuard AI - Multi-Agent Code Review System", + "docs": "/docs", + "health": "/health", + } diff --git a/backend/src/models/base.py b/backend/src/models/base.py index e69de29..a4bfede 100644 --- a/backend/src/models/base.py +++ b/backend/src/models/base.py @@ -0,0 +1,22 @@ +""" +Configuración base para los modelos ORM de SQLAlchemy. + +Este módulo define la clase base declarativa de la cual deben heredar +todas las entidades de la base de datos para ser reconocidas por el ORM. +""" + +from sqlalchemy.orm import DeclarativeBase + + +class Base(DeclarativeBase): + """ + Clase base declarativa para todos los modelos ORM del sistema. + + Utiliza el estilo moderno de SQLAlchemy 2.0 (`DeclarativeBase`), lo que + proporciona mejor soporte para tipado estático y autocompletado en IDEs + comparado con la función antigua `declarative_base()`. + + Todas las entidades (ej. `CodeReviewEntity`) deben heredar de esta clase. + """ + + pass diff --git a/backend/src/models/code_review.py b/backend/src/models/code_review.py index e69de29..764c55b 100644 --- a/backend/src/models/code_review.py +++ b/backend/src/models/code_review.py @@ -0,0 +1,29 @@ +import uuid +from datetime import datetime + +from sqlalchemy import Column, DateTime, Enum, Integer, LargeBinary, String +from sqlalchemy.dialects.postgresql import UUID + +from src.models.base import Base +from src.models.enums.review_status import ReviewStatus + + +class CodeReviewEntity(Base): + """ + Entidad ORM que representa la tabla 'code_reviews' en la base de datos. + """ + + __tablename__ = "code_reviews" + + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + user_id = Column(String, nullable=False, index=True) + filename = Column(String, nullable=False) + + # RN16: code_content se almacena como bytes encriptados (BYTEA) + code_content = Column(LargeBinary, nullable=False) + + quality_score = Column(Integer, nullable=False) + status = Column(Enum(ReviewStatus), nullable=False, index=True) + total_findings = Column(Integer, default=0) + created_at = Column(DateTime, default=datetime.utcnow, index=True) + completed_at = Column(DateTime, nullable=True) diff --git a/backend/src/models/enums/review_status.py b/backend/src/models/enums/review_status.py index e69de29..2b990cd 100644 --- a/backend/src/models/enums/review_status.py +++ b/backend/src/models/enums/review_status.py @@ -0,0 +1,8 @@ +from enum import Enum + + +class ReviewStatus(str, Enum): + PENDING = "PENDING" + PROCESSING = "PROCESSING" + COMPLETED = "COMPLETED" + FAILED = "FAILED" diff --git a/backend/src/repositories/code_review_repository.py b/backend/src/repositories/code_review_repository.py new file mode 100644 index 0000000..ceff874 --- /dev/null +++ b/backend/src/repositories/code_review_repository.py @@ -0,0 +1,113 @@ +from typing import Optional +from uuid import UUID + +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.orm import Session + +from src.models.code_review import CodeReviewEntity +from src.schemas.analysis import CodeReview +from src.utils.encryption.aes_encryptor import decrypt_aes256, encrypt_aes256 +from src.utils.logger import logger + + +class CodeReviewRepository: + """ + Repositorio para manejar la persistencia de las revisiones de código. + + Implementa el patrón Repository para desacoplar la lógica de negocio (Domain) + de la implementación de base de datos (SQLAlchemy). Maneja automáticamente + la encriptación/desencriptación del código fuente. + """ + + def __init__(self, session: Session): + """ + Inicializa el repositorio con una sesión de base de datos. + + Args: + session: Sesión activa de SQLAlchemy. + """ + self.session = session + + def create(self, review: CodeReview) -> CodeReview: + """ + Persiste una nueva entidad CodeReview en la base de datos. + + Aplica encriptación AES-256 al contenido del código antes de guardar, + cumpliendo con la Regla de Negocio RN16. + + Args: + review: Objeto de dominio CodeReview con los datos a guardar. + + Returns: + CodeReview: El objeto de dominio confirmado y persistido. + + Raises: + SQLAlchemyError: Si ocurre un error a nivel de base de datos. + ValueError: Si el contenido del código es inválido para encriptar. + """ + try: + # RN16: Encriptar contenido sensible antes de tocar la BD + encrypted_content = encrypt_aes256(review.code_content) + + entity = CodeReviewEntity( + id=review.id, + user_id=review.user_id, + filename=review.filename, + code_content=encrypted_content, + quality_score=review.quality_score, + status=review.status, + total_findings=review.total_findings, + created_at=review.created_at, + completed_at=review.completed_at, + ) + + self.session.add(entity) + self.session.commit() + + logger.info(f"CodeReview persistido exitosamente: {review.id}") + return review + + except SQLAlchemyError as e: + self.session.rollback() + logger.error(f"Error de base de datos al crear CodeReview {review.id}: {str(e)}") + raise e + except Exception as e: + self.session.rollback() + logger.error(f"Error inesperado en CodeReviewRepository.create: {str(e)}") + raise e + + def find_by_id(self, review_id: UUID) -> Optional[CodeReview]: + """ + Busca una revisión por su ID y desencripta el contenido automáticamente. + + Args: + review_id: Identificador único (UUID) de la revisión. + + Returns: + Optional[CodeReview]: Objeto de dominio reconstruido o None si no existe. + + Raises: + Exception: Si falla la desencriptación o la lectura de BD. + """ + try: + entity = self.session.get(CodeReviewEntity, review_id) + + if not entity: + return None + + decrypted_content = decrypt_aes256(entity.code_content) + + return CodeReview( + id=entity.id, + user_id=entity.user_id, + filename=entity.filename, + code_content=decrypted_content, + quality_score=entity.quality_score, + status=entity.status, + total_findings=entity.total_findings, + created_at=entity.created_at, + completed_at=entity.completed_at, + ) + except Exception as e: + logger.error(f"Error recuperando CodeReview {review_id}: {str(e)}") + raise e diff --git a/backend/src/routers/analysis.py b/backend/src/routers/analysis.py index e69de29..72effe5 100644 --- a/backend/src/routers/analysis.py +++ b/backend/src/routers/analysis.py @@ -0,0 +1,66 @@ +from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status +from sqlalchemy.orm import Session + +from src.core.dependencies.auth import get_current_user +from src.core.dependencies.get_db import get_db +from src.repositories.code_review_repository import CodeReviewRepository +from src.schemas.analysis import AnalysisResponse +from src.schemas.user import User +from src.services.analysis_service import AnalysisService +from src.utils.logger import logger + +router = APIRouter(prefix="/api/v1", tags=["analysis"]) + + +@router.post( + "/analyze", + response_model=AnalysisResponse, + status_code=status.HTTP_200_OK, + summary="Analizar código fuente Python", +) +async def analyze_code( + file: UploadFile = File(...), + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + """ + Sube un archivo Python para análisis automatizado de seguridad y calidad. + + Reglas de Negocio: + - **RN1**: Requiere autenticación JWT. + - **RN3**: Verifica cuota diaria (Developers: 10/día). + - **RN4**: Valida extensión .py, tamaño <10MB y codificación UTF-8. + + Args: + file: Archivo .py a analizar. + current_user: Usuario autenticado (inyectado). + db: Sesión de base de datos (inyectada). + + Returns: + AnalysisResponse: Objeto con ID de análisis, estado y resumen. + + Raises: + HTTPException: 500 si ocurre un error interno. + """ + + repo = CodeReviewRepository(db) + service = AnalysisService(repo) + + try: + result = await service.analyze_code(file, current_user.id) + except HTTPException: + raise + except Exception as e: + logger.error(f"Error interno en análisis: {str(e)}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Error interno del servidor" + ) from e + + return AnalysisResponse( + analysis_id=result.id, + filename=result.filename, + status=result.status, + quality_score=result.quality_score, + total_findings=result.total_findings, + created_at=result.created_at, + ) diff --git a/backend/src/schemas/analysis.py b/backend/src/schemas/analysis.py index e69de29..cd1a58d 100644 --- a/backend/src/schemas/analysis.py +++ b/backend/src/schemas/analysis.py @@ -0,0 +1,277 @@ +""" +Esquemas de análisis usando Pydantic v2 +""" + +import ast as python_ast +from datetime import datetime +from textwrap import dedent +from typing import Any, Dict, List, Optional +from uuid import UUID, uuid4 + +from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, field_validator, model_validator + +from src.models.enums.review_status import ReviewStatus + + +class AnalysisContext(BaseModel): + """ + Contexto de análisis que encapsula toda la información de un análisis. + + Se pasa entre agentes para que cada uno realice su análisis específico. + + Attributes: + code_content: Código Python a analizar + filename: Nombre del archivo (debe terminar en .py) + language: Lenguaje de programación (default: python) + analysis_id: UUID único del análisis + metadata: Información adicional (usuario, timestamp, etc.) + created_at: Timestamp UTC de creación + + Example: + context = AnalysisContext( + code_content="def hello():\n print('Hello')", + filename="app.py", + metadata={"user_id": "123"} + ) + """ + + code_content: str = Field(..., min_length=1, description="Código Python a analizar") + filename: str = Field( + ..., min_length=3, description="Nombre del archivo (debe terminar en .py)" + ) + language: str = Field(default="python", description="Lenguaje de programación") + analysis_id: UUID = Field(default_factory=uuid4, description="ID único del análisis") + metadata: Dict[str, Any] = Field(default_factory=dict, description="Información adicional") + created_at: datetime = Field( + default_factory=datetime.utcnow, description="Timestamp UTC de creación" + ) + + # Se Usa PrivateAttr en Pydantic v2 por sugerencia + _ast_cache: Optional[python_ast.Module] = PrivateAttr(default=None) + _lines_cache: Optional[List[str]] = PrivateAttr(default=None) + + model_config = ConfigDict( + arbitrary_types_allowed=True, + json_schema_extra={ + "example": { + "code_content": "def hello():\n print('Hello World')", + "filename": "example.py", + "language": "python", + "metadata": {"user_id": "123", "project": "CodeGuard"}, + } + }, + ) + + @field_validator("code_content") + @classmethod + def validate_code_content(cls, v: str) -> str: + """Valida que el código no esté vacío.""" + if not v or not v.strip(): + raise ValueError("code_content cannot be empty or whitespace only") + return v + + @field_validator("filename") + @classmethod + def validate_filename(cls, v: str) -> str: + """Valida que sea archivo Python.""" + if not v.endswith(".py"): + raise ValueError("Only Python files (.py) are supported") + if not v or len(v) < 3: + raise ValueError("filename must be at least 3 characters") + return v + + @model_validator(mode="after") + def _normalize_code_content(self) -> "AnalysisContext": + """ + Normaliza el código eliminando la indentación común para evitar + SyntaxError cuando se parsean fixtures con sangría artificial. + """ + self.code_content = dedent(self.code_content) + return self + + @property + def line_count(self) -> int: + """Retorna el número de líneas del código.""" + return len(self.code_content.splitlines()) # pylint: disable=no-member + + @property + def char_count(self) -> int: + """Retorna el número de caracteres del código.""" + return len(self.code_content) + + def add_metadata(self, key: str, value: Any) -> None: + """ + Agrega una entrada a la metadata del contexto. + + Args: + key: Clave de la metadata + value: Valor de la metadata + """ + self.metadata[key] = value + + def get_ast(self) -> python_ast.Module: + """ + Retorna el AST parseado del código (lazy loading). + + Returns: + AST Module del código Python + + Raises: + SyntaxError: Si el código no es Python válido + """ + if self._ast_cache is None: + try: + self._ast_cache = python_ast.parse(self.code_content, filename=self.filename) + except SyntaxError as e: + raise SyntaxError(f"Invalid Python syntax in {self.filename}: {e}") from e + return self._ast_cache + + def get_lines(self) -> List[str]: + """ + Retorna el código como lista de líneas (lazy loading). + + Returns: + Lista de strings, una por línea + """ + if self._lines_cache is None: + self._lines_cache = self.code_content.splitlines() # pylint: disable=no-member + return self._lines_cache + + def get_line(self, line_number: int) -> Optional[str]: + """ + Retorna una línea específica del código (1-based indexing). + + Args: + line_number: Número de línea (1-based) + + Returns: + String con la línea o None si no existe + """ + lines = self.get_lines() + if 1 <= line_number <= len(lines): + return lines[line_number - 1] + return None + + def get_code_snippet(self, start_line: int, end_line: int) -> str: + """ + Retorna un fragmento de código entre líneas. + + Args: + start_line: Línea inicial (1-based, inclusiva) + end_line: Línea final (1-based, inclusiva) + + Returns: + String con el fragmento de código + """ + lines = self.get_lines() + start_idx = max(0, start_line - 1) + end_idx = min(len(lines), end_line) + return "\n".join(lines[start_idx:end_idx]) + + +class AnalysisRequest(BaseModel): + """ + Request para iniciar un análisis de código. + + Attributes: + filename: Nombre del archivo + code_content: Código a analizar + agents_config: Configuración de qué agentes ejecutar + """ + + filename: str = Field(..., min_length=3, description="Nombre del archivo") + code_content: str = Field(..., min_length=1, description="Código a analizar") + agents_config: Optional[Dict[str, bool]] = Field( + default=None, description="Qué agentes ejecutar" + ) + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "filename": "app.py", + "code_content": "import os\n\ndef main():\n pass", + "agents_config": { + "security": True, + "quality": True, + "performance": False, + "style": True, + }, + } + } + ) + + +class AnalysisResponse(BaseModel): + """ + Response cuando se inicia un análisis. + + Attributes: + analysis_id: UUID del análisis + filename: Nombre del archivo + status: Estado actual (pending, processing, completed, failed) + created_at: Timestamp de creación + """ + + analysis_id: UUID = Field(..., description="ID único del análisis") + filename: str = Field(..., description="Nombre del archivo") + status: str = Field(..., description="Estado del análisis") + quality_score: int = Field(..., ge=0, le=100, description="Puntaje de calidad") + total_findings: int = Field(..., ge=0, description="Total de hallazgos") + created_at: datetime = Field(..., description="Timestamp de creación") + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "analysis_id": "550e8400-e29b-41d4-a716-446655440000", + "filename": "app.py", + "status": "pending", + "created_at": "2025-11-06T21:00:00Z", + } + } + ) + + +class CodeReview(BaseModel): + """ + Modelo de dominio para una revisión de código completa. + + Este modelo representa la información que fluye entre la capa de persistencia + y la capa de servicio. Contiene el código desencriptado listo para ser usado. + + Attributes: + id: Identificador único de la revisión. + user_id: ID del usuario propietario. + filename: Nombre del archivo analizado. + code_content: Contenido del código fuente (texto plano). + quality_score: Puntaje de calidad calculado (0-100). + status: Estado actual del análisis. + total_findings: Cantidad total de hallazgos detectados. + created_at: Fecha de creación. + completed_at: Fecha de finalización (opcional). + """ + + id: UUID = Field(..., description="ID único de la revisión") + user_id: str = Field(..., description="ID del usuario propietario (Clerk ID)") + filename: str = Field(..., description="Nombre del archivo analizado") + code_content: str = Field(..., description="Contenido del código fuente desencriptado") + quality_score: int = Field(..., ge=0, le=100, description="Puntaje de calidad (0-100)") + status: ReviewStatus = Field(..., description="Estado actual del análisis") + total_findings: int = Field(default=0, ge=0, description="Total de hallazgos encontrados") + created_at: datetime = Field(..., description="Fecha de creación del análisis") + completed_at: Optional[datetime] = Field(default=None, description="Fecha de finalización") + + model_config = ConfigDict( + from_attributes=True, + json_schema_extra={ + "example": { + "id": "123e4567-e89b-12d3-a456-426614174000", + "user_id": "user_2819", + "filename": "main.py", + "code_content": "print('Hello World')", + "quality_score": 85, + "status": "completed", + "total_findings": 3, + "created_at": "2025-11-22T10:00:00Z", + } + }, + ) diff --git a/backend/src/schemas/finding.py b/backend/src/schemas/finding.py index e69de29..e75cb55 100644 --- a/backend/src/schemas/finding.py +++ b/backend/src/schemas/finding.py @@ -0,0 +1,168 @@ +""" +Esquemas para hallazgos encontrados en análisis +""" + +from __future__ import annotations + +from datetime import datetime, timezone +from enum import Enum +from typing import Any, ClassVar, Dict, Optional, cast + +from pydantic import BaseModel, ConfigDict, Field + + +class Severity(str, Enum): + """ + Niveles de severidad de un hallazgo. + + CRITICAL: Riesgo inmediato, debe corregirse + HIGH: Importante, debe corregirse pronto + MEDIUM: Moderado, se recomienda corrección + LOW: Menor, mejora opcional + INFO: Información, no es un problema + """ + + CRITICAL = "critical" + HIGH = "high" + MEDIUM = "medium" + LOW = "low" + INFO = "info" + + +class Finding(BaseModel): + """ + Hallazgo encontrado durante el análisis de código. + + Attributes: + severity: Nivel de severidad del hallazgo + issue_type: Tipo de problema (ej: dangerous_function, sql_injection) + message: Descripción del problema + line_number: Número de línea donde se encontró (1-based) + agent_name: Nombre del agente que detectó el hallazgo + code_snippet: Fragmento de código problemático (opcional) + suggestion: Sugerencia de cómo corregir (opcional) + rule_id: ID de la regla que se violó (opcional) + detected_at: Timestamp de detección + + Example: + finding = Finding( + severity=Severity.CRITICAL, + issue_type="dangerous_function", + message="Use of eval() detected", + line_number=10, + agent_name="SecurityAgent", + code_snippet="result = eval(user_input)", + suggestion="Use ast.literal_eval() instead", + rule_id="SEC001_EVAL" + ) + """ + + severity: Severity = Field(..., description="Nivel de severidad") + issue_type: str = Field(..., min_length=1, description="Tipo de problema") + message: str = Field(..., min_length=5, description="Descripción del problema") + line_number: int = Field(..., ge=1, description="Número de línea (1-based)") + agent_name: str = Field(..., min_length=1, description="Nombre del agente") + code_snippet: Optional[str] = Field(default=None, description="Fragmento de código") + suggestion: Optional[str] = Field(default=None, description="Sugerencia de corrección") + rule_id: Optional[str] = Field(default=None, description="ID de la regla") + detected_at: datetime = Field( + default_factory=lambda: datetime.now(timezone.utc), description="Timestamp de detección" + ) + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "severity": "critical", + "issue_type": "dangerous_function", + "message": "Use of eval() detected", + "line_number": 10, + "agent_name": "SecurityAgent", + "code_snippet": "result = eval(user_input)", + "suggestion": "Use ast.literal_eval() instead", + "rule_id": "SEC001_EVAL", + } + } + ) + + PENALTY_BY_SEVERITY: ClassVar[Dict[Severity, int]] = { + Severity.CRITICAL: 10, + Severity.HIGH: 5, + Severity.MEDIUM: 2, + Severity.LOW: 1, + Severity.INFO: 0, + } + + @property + def is_critical(self) -> bool: + """Retorna True si el hallazgo es crítico.""" + return self.severity == Severity.CRITICAL + + @property + def is_high_or_critical(self) -> bool: + """Retorna True si el hallazgo es HIGH o CRITICAL.""" + return self.severity in (Severity.CRITICAL, Severity.HIGH) + + @property + def is_actionable(self) -> bool: + """Retorna True si el hallazgo requiere acción (no INFO).""" + return self.severity != Severity.INFO + + @classmethod + def from_dict(cls, data: dict) -> "Finding": + """ + Crea un Finding desde un diccionario. + + Args: + data: Diccionario con datos del finding + + Returns: + Instancia de Finding + """ + detected_at_str = data.get("detected_at") + detected_at = ( + datetime.fromisoformat(detected_at_str) + if detected_at_str + else datetime.now(timezone.utc) + ) + return cls( + severity=Severity(data["severity"]), + issue_type=data["issue_type"], + message=data["message"], + line_number=data["line_number"], + agent_name=data["agent_name"], + code_snippet=data.get("code_snippet"), + suggestion=data.get("suggestion"), + rule_id=data.get("rule_id"), + detected_at=detected_at, + ) + + def to_dict(self) -> Dict[str, Any]: + """ + Convierte el Finding a diccionario para persistencia. + + Returns: + Diccionario con todos los campos del finding + """ + severity_value = cast(Severity, self.severity).value + detected_at_value = cast(datetime, self.detected_at) + + return { + "severity": severity_value, + "issue_type": self.issue_type, + "message": self.message, + "line_number": self.line_number, + "agent_name": self.agent_name, + "code_snippet": self.code_snippet, + "suggestion": self.suggestion, + "rule_id": self.rule_id, + "detected_at": detected_at_value.isoformat(), + } + + def calculate_penalty(self) -> int: + """ + Calcula el penalty para el quality score según severidad. + + Returns: + Penalty points (CRITICAL=10, HIGH=5, MEDIUM=2, LOW=1, INFO=0) + """ + return self.PENALTY_BY_SEVERITY.get(self.severity, 0) diff --git a/backend/src/schemas/user.py b/backend/src/schemas/user.py index e69de29..887a9d4 100644 --- a/backend/src/schemas/user.py +++ b/backend/src/schemas/user.py @@ -0,0 +1,44 @@ +""" +Esquemas de usuario para CodeGuard AI +""" + +from enum import Enum +from typing import Optional + +from pydantic import BaseModel, ConfigDict, EmailStr, Field + + +class Role(str, Enum): + """Roles de usuario.""" + + DEVELOPER = "developer" + ADMIN = "admin" + + +class User(BaseModel): + """ + Modelo de usuario autenticado. + + Attributes: + id: Clerk user ID + email: Email del usuario + name: Nombre completo + role: Rol (developer o admin) + """ + + id: str = Field(..., description="Clerk user ID") + email: EmailStr = Field(..., description="Email del usuario") + name: Optional[str] = Field(default=None, description="Nombre completo") + role: Role = Field(default=Role.DEVELOPER, description="Rol del usuario") + + model_config = ConfigDict( + from_attributes=True, + json_schema_extra={ + "example": { + "id": "user_123", + "email": "dev@codeguard.ai", + "name": "Developer", + "role": "developer", + } + }, + ) diff --git a/backend/src/services/analysis_service.py b/backend/src/services/analysis_service.py index e69de29..1ee8396 100644 --- a/backend/src/services/analysis_service.py +++ b/backend/src/services/analysis_service.py @@ -0,0 +1,189 @@ +""" +Servicio de análisis de código para CodeGuard AI. +""" + +from datetime import datetime +from typing import List, Tuple +from uuid import uuid4 + +from fastapi import HTTPException, UploadFile + +from src.agents.security_agent import SecurityAgent +from src.core.events.analysis_events import AnalysisEventType +from src.core.events.event_bus import EventBus +from src.models.enums.review_status import ReviewStatus +from src.repositories.code_review_repository import CodeReviewRepository +from src.schemas.analysis import AnalysisContext, CodeReview +from src.schemas.finding import Finding, Severity +from src.utils.logger import logger + + +class AnalysisService: + """ + Servicio de aplicación para orquestar el análisis de código. + Coordina la validación, ejecución de agentes y persistencia. + """ + + def __init__(self, repo: CodeReviewRepository): + """ + Inicializa el servicio con sus dependencias. + + Args: + repo: Repositorio para persistencia de revisiones. + """ + self.repo = repo + self.event_bus = EventBus() + + async def analyze_code(self, file: UploadFile, user_id: str) -> CodeReview: + """ + Procesa un archivo subido, ejecuta el análisis y guarda los resultados. + + Flujo (RN4, RN5, RN8): + 1. Validar archivo. + 2. Crear contexto de análisis. + 3. Ejecutar SecurityAgent. + 4. Calcular métricas. + 5. Persistir resultados. + + Args: + file: Archivo subido por el usuario. + user_id: ID del usuario autenticado. + + Returns: + CodeReview: Resultado del análisis persistido. + + Raises: + HTTPException: Si el archivo no es válido (422) o muy grande (413). + """ + logger.info(f"Iniciando análisis para usuario {user_id} archivo {file.filename}") + + # 1. Validación de Archivo (RN4) + content, filename = await self._validate_file(file) + + # 2. Preparar Contexto + analysis_id = uuid4() + context = AnalysisContext( + code_content=content, + filename=filename, + analysis_id=analysis_id, + metadata={"user_id": user_id}, + ) + + # Notificar inicio usando el Enum + self.event_bus.publish(AnalysisEventType.ANALYSIS_STARTED, {"id": str(analysis_id)}) + + # 3. Ejecutar Agentes (Solo SecurityAgent para Sprint 1) + findings: List[Finding] = [] + try: + agent = SecurityAgent() + findings = agent.analyze(context) + except Exception as e: + logger.error(f"Error ejecutando SecurityAgent: {e}") + + # 4. Calcular Quality Score (RN8) + quality_score = self._calculate_quality_score(findings) + + # 5. Construir Objeto de Dominio para persistencia + review = CodeReview( + id=analysis_id, + user_id=user_id, + filename=filename, + code_content=content, + quality_score=quality_score, + status=ReviewStatus.COMPLETED, + total_findings=len(findings), + created_at=datetime.utcnow(), + completed_at=datetime.utcnow(), + ) + + # 6. Persistir (RN14) + saved_review = self.repo.create(review) + + # Notificar fin usando el Enum + self.event_bus.publish( + AnalysisEventType.ANALYSIS_COMPLETED, + {"id": str(analysis_id), "score": quality_score}, + ) + + return saved_review + + async def _validate_file(self, file: UploadFile) -> Tuple[str, str]: + """ + Valida las restricciones del archivo (RN4). + + Args: + file: Archivo subido por el usuario. + + Returns: + Tuple[str, str]: (contenido, nombre_archivo) + + Raises: + HTTPException: Si el archivo no cumple las validaciones. + """ + # Validar que filename exista + if not file.filename: + raise HTTPException( + status_code=422, + detail="El nombre del archivo es requerido", + ) + + filename = file.filename + + if not filename.endswith(".py"): + raise HTTPException(status_code=422, detail="Solo se aceptan archivos .py") + + # Leer contenido + content_bytes = await file.read() + + # Validar tamaño (10MB = 10 * 1024 * 1024 bytes) + if len(content_bytes) > 10 * 1024 * 1024: + raise HTTPException( + status_code=413, + detail="El tamaño del archivo excede el límite de 10 MB", + ) + + try: + content = content_bytes.decode("utf-8") + except UnicodeDecodeError as exc: + raise HTTPException( + status_code=422, + detail="El archivo debe tener codificación UTF-8 válida", + ) from exc + + # Validar contenido vacío + lines = [line for line in content.splitlines() if line.strip()] + if len(lines) < 5: + raise HTTPException( + status_code=422, + detail="El archivo debe tener al menos 5 líneas de código", + ) + + # Resetear puntero del archivo + await file.seek(0) + + return content, filename + + def _calculate_quality_score(self, findings: List[Finding]) -> int: + """ + Calcula el puntaje de calidad basado en penalizaciones (RN8). + + Fórmula: score = max(0, 100 - penalizaciones) + + Args: + findings: Lista de hallazgos detectados. + + Returns: + int: Puntaje de calidad (0-100). + """ + penalty = 0 + for finding in findings: + if finding.severity == Severity.CRITICAL: + penalty += 10 + elif finding.severity == Severity.HIGH: + penalty += 5 + elif finding.severity == Severity.MEDIUM: + penalty += 2 + elif finding.severity == Severity.LOW: + penalty += 1 + + return max(0, 100 - penalty) diff --git a/backend/src/utils/encryption/aes_encryptor.py b/backend/src/utils/encryption/aes_encryptor.py index e69de29..36166dc 100644 --- a/backend/src/utils/encryption/aes_encryptor.py +++ b/backend/src/utils/encryption/aes_encryptor.py @@ -0,0 +1,51 @@ +import os + +from cryptography.fernet import Fernet +from dotenv import load_dotenv + +# Cargar variables de entorno +load_dotenv() + +# Obtener clave de encriptación +# NOTA: En producción, esto DEBE venir de variables de entorno. +# Si no existe, generamos una temporal para desarrollo. +# (Esto evita que falle en local si no configuraste el .env) +_KEY = os.getenv("ENCRYPTION_SECRET_KEY", Fernet.generate_key().decode()) +_CIPHER = Fernet(_KEY.encode() if isinstance(_KEY, str) else _KEY) + + +def encrypt_aes256(content: str) -> bytes: + """ + Encripta una cadena de texto usando Fernet (AES-256). + + Cumple con la RN16: Encriptación de Código Fuente en reposo. + + Args: + content: El texto plano (código fuente) a encriptar. + + Returns: + bytes: El contenido encriptado listo para almacenar en BD. + + Raises: + ValueError: Si el contenido es nulo o vacío. + """ + if not content: + raise ValueError("El contenido a encriptar no puede estar vacío") + + return _CIPHER.encrypt(content.encode("utf-8")) + + +def decrypt_aes256(encrypted_content: bytes) -> str: + """ + Desencripta bytes almacenados para recuperar el texto original. + + Args: + encrypted_content: Los bytes encriptados recuperados de la BD. + + Returns: + str: El código fuente original en texto plano. + """ + if not encrypted_content: + return "" + + return _CIPHER.decrypt(encrypted_content).decode("utf-8") diff --git a/backend/src/utils/logger.py b/backend/src/utils/logger.py index e69de29..966ca34 100644 --- a/backend/src/utils/logger.py +++ b/backend/src/utils/logger.py @@ -0,0 +1,11 @@ +import logging +import sys + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + handlers=[logging.StreamHandler(sys.stdout)], +) + +logger = logging.getLogger("codeguard") diff --git a/backend/tests/__init__.py b/backend/tests/__init__.py index e69de29..66173ae 100644 --- a/backend/tests/__init__.py +++ b/backend/tests/__init__.py @@ -0,0 +1 @@ +# Test package diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index e69de29..baf6f55 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -0,0 +1,27 @@ +""" +Pytest configuration and fixtures +""" + +import pytest +from fastapi.testclient import TestClient + +from src.main import app + + +@pytest.fixture +def client(): + """FastAPI test client""" + return TestClient(app) + + +@pytest.fixture +def sample_python_code(): + """Sample Python code for testing""" + return """ +def calculate_sum(a, b): + return a + b + +def main(): + result = calculate_sum(5, 3) + print(f"Result: {result}") +""" diff --git a/backend/tests/integration/test_api_endpoints.py b/backend/tests/integration/test_api_endpoints.py index e69de29..199cc7a 100644 --- a/backend/tests/integration/test_api_endpoints.py +++ b/backend/tests/integration/test_api_endpoints.py @@ -0,0 +1,300 @@ +""" +Integration tests for Analysis API Endpoint. + +Tests the /api/v1/analyze endpoint with realistic scenarios +covering file validation, security analysis, and response format. +""" + +from datetime import datetime +from io import BytesIO +from unittest.mock import MagicMock, patch +from uuid import uuid4 + +import pytest +from fastapi import status +from fastapi.testclient import TestClient + +from src.core.dependencies.auth import get_current_user +from src.core.dependencies.get_db import get_db +from src.main import app +from src.schemas.user import Role, User + +# ============================================================================= +# Fixtures +# ============================================================================= + + +@pytest.fixture +def mock_user() -> User: + """Usuario autenticado de prueba.""" + return User( + id="user_test_123", + email="developer@codeguard.ai", + name="Test Developer", + role=Role.DEVELOPER, + ) + + +@pytest.fixture +def mock_db_session(): + """Sesión de base de datos mockeada.""" + session = MagicMock() + session.add = MagicMock() + session.commit = MagicMock() + session.refresh = MagicMock() + return session + + +@pytest.fixture +def client(mock_user: User, mock_db_session): + """Cliente de prueba con dependencias mockeadas.""" + + def override_get_current_user(): + return mock_user + + def override_get_db(): + yield mock_db_session + + app.dependency_overrides[get_current_user] = override_get_current_user + app.dependency_overrides[get_db] = override_get_db + + yield TestClient(app) + + app.dependency_overrides.clear() + + +# ============================================================================= +# Helper Functions +# ============================================================================= + + +def create_python_file(content: str, filename: str = "test_code.py") -> tuple: + """Crea un archivo Python simulado para upload.""" + file_bytes = BytesIO(content.encode("utf-8")) + return ("file", (filename, file_bytes, "text/x-python")) + + +def create_valid_python_code() -> str: + """Genera código Python válido con al menos 5 líneas.""" + return '''"""Module docstring.""" +import os + +def hello_world(): + """Print hello world.""" + print("Hello, World!") + +if __name__ == "__main__": + hello_world() +''' + + +def create_vulnerable_code() -> str: + """Genera código Python con vulnerabilidades de seguridad.""" + return '''"""Vulnerable code for testing.""" +import os +import pickle + +def unsafe_eval(user_input): + """Dangerous eval usage.""" + return eval(user_input) + +def unsafe_query(user_id): + """SQL injection vulnerability.""" + query = "SELECT * FROM users WHERE id = " + user_id + return query + +PASSWORD = "super_secret_password_123" +API_KEY = "sk-1234567890abcdef" +''' + + +# ============================================================================= +# Test Classes +# ============================================================================= + + +class TestAnalyzeEndpointValidation: + """Tests para validación de archivos (RN4).""" + + def test_reject_non_python_file(self, client: TestClient): + """Rechaza archivos que no son .py.""" + file_data = create_python_file("print('hello')", "script.js") + + response = client.post("/api/v1/analyze", files=[file_data]) + + assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + assert "Solo se aceptan archivos .py" in response.json()["detail"] + + def test_reject_file_without_extension(self, client: TestClient): + """Rechaza archivos sin extensión.""" + file_data = create_python_file("print('hello')", "script") + + response = client.post("/api/v1/analyze", files=[file_data]) + + assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + + def test_reject_empty_file(self, client: TestClient): + """Rechaza archivos vacíos o con menos de 5 líneas.""" + file_data = create_python_file("# just a comment\n", "empty.py") + + response = client.post("/api/v1/analyze", files=[file_data]) + + assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + assert "al menos 5 líneas" in response.json()["detail"] + + def test_reject_file_too_large(self, client: TestClient): + """Rechaza archivos mayores a 10MB.""" + large_content = "x = 1\n" * (10 * 1024 * 1024 // 6 + 1) + file_data = create_python_file(large_content, "large.py") + + response = client.post("/api/v1/analyze", files=[file_data]) + + assert response.status_code == status.HTTP_413_REQUEST_ENTITY_TOO_LARGE + + def test_reject_invalid_utf8_encoding(self, client: TestClient): + """Rechaza archivos con codificación inválida.""" + invalid_bytes = b"\x80\x81\x82\x83\x84" + file_bytes = BytesIO(invalid_bytes) + file_data = ("file", ("invalid.py", file_bytes, "text/x-python")) + + response = client.post("/api/v1/analyze", files=[file_data]) + + assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + assert "UTF-8" in response.json()["detail"] + + +class TestAnalyzeEndpointSuccess: + """Tests para análisis exitoso.""" + + @patch("src.services.analysis_service.AnalysisService.analyze_code") + def test_analyze_valid_python_file(self, mock_analyze, client: TestClient): + """Analiza correctamente un archivo Python válido.""" + mock_analyze.return_value = MagicMock( + id=uuid4(), + filename="test_code.py", + status="completed", + quality_score=95, + total_findings=2, + created_at=datetime.utcnow(), + ) + + file_data = create_python_file(create_valid_python_code()) + response = client.post("/api/v1/analyze", files=[file_data]) + + assert response.status_code == status.HTTP_200_OK + data = response.json() + assert "analysis_id" in data + assert data["status"] == "completed" + assert data["quality_score"] == 95 + + @patch("src.services.analysis_service.AnalysisService.analyze_code") + def test_analyze_vulnerable_code_returns_findings(self, mock_analyze, client: TestClient): + """Detecta vulnerabilidades y retorna findings.""" + mock_analyze.return_value = MagicMock( + id=uuid4(), + filename="vulnerable.py", + status="completed", + quality_score=45, + total_findings=5, + created_at=datetime.utcnow(), + ) + + file_data = create_python_file(create_vulnerable_code(), "vulnerable.py") + response = client.post("/api/v1/analyze", files=[file_data]) + + assert response.status_code == status.HTTP_200_OK + data = response.json() + assert data["total_findings"] >= 1 + assert data["quality_score"] < 100 + + +class TestAnalyzeEndpointResponseFormat: + """Tests para formato de respuesta (AnalysisResponse).""" + + @patch("src.services.analysis_service.AnalysisService.analyze_code") + def test_response_contains_required_fields(self, mock_analyze, client: TestClient): + """La respuesta contiene todos los campos requeridos.""" + analysis_id = uuid4() + mock_analyze.return_value = MagicMock( + id=analysis_id, + filename="app.py", + status="completed", + quality_score=85, + total_findings=3, + created_at=datetime.utcnow(), + ) + + file_data = create_python_file(create_valid_python_code(), "app.py") + response = client.post("/api/v1/analyze", files=[file_data]) + + assert response.status_code == status.HTTP_200_OK + data = response.json() + + required_fields = [ + "analysis_id", + "filename", + "status", + "quality_score", + "total_findings", + "created_at", + ] + for field in required_fields: + assert field in data, f"Missing required field: {field}" + + @patch("src.services.analysis_service.AnalysisService.analyze_code") + def test_quality_score_within_bounds(self, mock_analyze, client: TestClient): + """El quality_score está entre 0 y 100.""" + mock_analyze.return_value = MagicMock( + id=uuid4(), + filename="test.py", + status="completed", + quality_score=75, + total_findings=5, + created_at=datetime.utcnow(), + ) + + file_data = create_python_file(create_valid_python_code()) + response = client.post("/api/v1/analyze", files=[file_data]) + + data = response.json() + assert 0 <= data["quality_score"] <= 100 + + +class TestAnalyzeEndpointAuthentication: + """Tests para autenticación.""" + + def test_reject_unauthenticated_request(self, monkeypatch): + """Rechaza requests sin autenticación en modo producción.""" + # Forzar modo producción donde auth es obligatorio + monkeypatch.setenv("ENVIRONMENT", "production") + app.dependency_overrides.clear() + + client = TestClient(app) + file_data = create_python_file(create_valid_python_code()) + + response = client.post("/api/v1/analyze", files=[file_data]) + + assert response.status_code in [ + status.HTTP_401_UNAUTHORIZED, + status.HTTP_403_FORBIDDEN, + ] + + +class TestAnalyzeEndpointErrorHandling: + """Tests para manejo de errores.""" + + def test_missing_file_returns_422(self, client: TestClient): + """Retorna 422 cuando no se envía archivo.""" + response = client.post("/api/v1/analyze") + + assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + + @patch("src.services.analysis_service.AnalysisService.analyze_code") + def test_internal_error_returns_500(self, mock_analyze, client: TestClient): + """Retorna 500 en errores internos.""" + mock_analyze.side_effect = Exception("Database connection failed") + + file_data = create_python_file(create_valid_python_code()) + response = client.post("/api/v1/analyze", files=[file_data]) + + assert response.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR diff --git a/backend/tests/integration/test_security_agent_integration.py b/backend/tests/integration/test_security_agent_integration.py new file mode 100644 index 0000000..3d735ef --- /dev/null +++ b/backend/tests/integration/test_security_agent_integration.py @@ -0,0 +1,225 @@ +""" +Integration tests for SecurityAgent. + +Tests SecurityAgent with realistic vulnerable code samples +and verifies end-to-end behavior. +""" + +import pytest + +from src.agents.security_agent import SecurityAgent +from src.schemas.analysis import AnalysisContext +from src.schemas.finding import Severity + + +class TestSecurityAgentIntegration: + """Integration tests for SecurityAgent with realistic code.""" + + @pytest.fixture + def agent(self): + """Create SecurityAgent instance.""" + return SecurityAgent() + + @pytest.fixture + def vulnerable_web_app_code(self): + """Realistic vulnerable web application code.""" + return """ +import hashlib +import pickle +from flask import Flask, request + +app = Flask(__name__) + +# Hardcoded credentials +DB_PASSWORD = "MyDatabasePass123" +API_KEY = "sk_live_abc123xyz789" + +@app.route('/login', methods=['POST']) +def login(): + username = request.form['username'] + password = request.form['password'] + + # SQL injection vulnerability + query = f"SELECT * FROM users WHERE username='{username}' AND password='{password}'" + cursor.execute(query) + user = cursor.fetchone() + + if user: + # Weak hashing + session_token = hashlib.md5(username.encode()).hexdigest() + return {'token': session_token} + + return {'error': 'Invalid credentials'}, 401 + +@app.route('/execute', methods=['POST']) +def execute_code(): + code = request.form['code'] + + # Dangerous function - arbitrary code execution + result = eval(code) + + return {'result': result} + +@app.route('/load_data', methods=['POST']) +def load_data(): + data = request.form['data'] + + # Unsafe deserialization + obj = pickle.loads(data.encode()) + + return {'loaded': str(obj)} +""" + + def test_comprehensive_vulnerability_detection(self, agent, vulnerable_web_app_code): + """Test detection of all vulnerability types in realistic code.""" + context = AnalysisContext(code_content=vulnerable_web_app_code, filename="app.py") + + findings = agent.analyze(context) + + # Should detect multiple vulnerabilities + assert len(findings) >= 5 + + # Verify each vulnerability type is detected + issue_types = {f.issue_type for f in findings} + assert "hardcoded_credentials" in issue_types + assert "sql_injection" in issue_types + assert "weak_cryptography" in issue_types + assert "dangerous_function" in issue_types + + # Verify severity distribution + critical_count = sum(1 for f in findings if f.is_critical) + high_count = sum(1 for f in findings if f.is_high_or_critical) + + assert critical_count >= 2 # Password, API key, eval + assert high_count >= 4 # Including SQL injection + + # Verify findings have suggestions + for finding in findings: + assert finding.suggestion is not None + assert len(finding.suggestion) > 10 + + # Verify findings are sorted by severity + severities = [f.severity.value for f in findings] + expected_order = ["critical", "high", "medium", "low", "info"] + + for i in range(len(severities) - 1): + assert expected_order.index(severities[i]) <= expected_order.index(severities[i + 1]) + + def test_secure_code_no_false_positives(self, agent): + """Test that secure code doesn't generate false positives.""" + secure_code = """ +import os +import hashlib +from sqlalchemy import create_engine, text + +# Secure credential handling +DB_PASSWORD = os.getenv('DB_PASSWORD') +API_KEY = os.getenv('API_KEY') + +def authenticate_user(username: str, password: str) -> bool: + # Parameterized query - safe from SQL injection + query = text('SELECT * FROM users WHERE username=:username') + result = db.execute(query, {'username': username}) + user = result.fetchone() + + if user: + # Strong hashing with salt + hashed = hashlib.sha256( + (password + user['salt']).encode() + ).hexdigest() + return hashed == user['password_hash'] + + return False + +def process_data(data: dict) -> dict: + # Safe data processing - no eval or exec + processed = { + 'id': data.get('id'), + 'name': data.get('name'), + 'value': data.get('value', 0) * 2 + } + return processed +""" + context = AnalysisContext(code_content=secure_code, filename="secure_app.py") + + findings = agent.analyze(context) + + # Should have 0 findings for secure code + assert len(findings) == 0 + + def test_partial_vulnerability_file(self, agent): + """Test file with mix of secure and vulnerable code.""" + mixed_code = """ +import hashlib + +# Secure part +def hash_file(filepath: str) -> str: + with open(filepath, 'rb') as f: + return hashlib.sha256(f.read()).hexdigest() + +# Vulnerable part +def legacy_hash(data: str) -> str: + # Old code - needs refactoring + return hashlib.md5(data.encode()).hexdigest() + +# Secure part +class Config: + DATABASE_URL = os.getenv('DATABASE_URL') + SECRET_KEY = os.getenv('SECRET_KEY') +""" + context = AnalysisContext(code_content=mixed_code, filename="utils.py") + + findings = agent.analyze(context) + + # Should only detect MD5 usage + assert len(findings) == 1 + assert findings[0].issue_type == "weak_cryptography" + assert "md5" in findings[0].message.lower() + assert findings[0].severity == Severity.MEDIUM + + def test_analysis_context_metadata_preserved(self, agent): + """Test that analysis context metadata is preserved in findings.""" + code = "result = eval(user_input)" + context = AnalysisContext(code_content=code, filename="vulnerable_script.py") + context.add_metadata("user_id", "test_user_123") + context.add_metadata("project", "SecurityTest") + + findings = agent.analyze(context) + + assert len(findings) >= 1 + # Verify agent name is set correctly + for finding in findings: + assert finding.agent_name == "SecurityAgent" + assert finding.detected_at is not None + + def test_large_file_performance(self, agent): + """Test SecurityAgent performance with larger file.""" + # Generate code with 100 functions + large_code = """ +import hashlib + +""" + for i in range(100): + large_code += f""" +def function_{i}(data): + # Safe function + return hashlib.sha256(data.encode()).hexdigest() + +""" + + # Add one vulnerability at the end + large_code += """ +# Single vulnerability +password = "HardcodedPassword123" +""" + + context = AnalysisContext(code_content=large_code, filename="large_module.py") + + findings = agent.analyze(context) + + # Should detect the single vulnerability + assert len(findings) == 1 + assert findings[0].issue_type == "hardcoded_credentials" + + # Verify finding points to correct line + assert "password" in findings[0].message.lower() diff --git a/backend/tests/unit/agents/__init__.py b/backend/tests/unit/agents/__init__.py index e69de29..84cbda4 100644 --- a/backend/tests/unit/agents/__init__.py +++ b/backend/tests/unit/agents/__init__.py @@ -0,0 +1 @@ +"""Unit tests for agents module.""" diff --git a/backend/tests/unit/agents/test_base_agent.py b/backend/tests/unit/agents/test_base_agent.py index e69de29..dcf4965 100644 --- a/backend/tests/unit/agents/test_base_agent.py +++ b/backend/tests/unit/agents/test_base_agent.py @@ -0,0 +1,237 @@ +""" +Unit tests for BaseAgent abstract class +Tests para la clase base BaseAgent +""" + +from typing import List +from unittest.mock import Mock + +import pytest + +from src.agents.base_agent import BaseAgent +from src.schemas.analysis import AnalysisContext +from src.schemas.finding import Finding, Severity + + +class DummyAgent(BaseAgent): + """ + Agente dummy para testing. + + Implementación concreta de BaseAgent para fines de testing. + """ + + def __init__(self): + super().__init__(name="DummyAgent", version="1.0.0", category="test") + + def analyze(self, context: AnalysisContext) -> List[Finding]: + """Implementación dummy que retorna un finding de prueba.""" + return [ + Finding( + severity=Severity.INFO, + issue_type="test", + message="Test finding", + line_number=1, + agent_name=self.name, + ) + ] + + +class TestBaseAgentInitialization: + """Tests para inicialización del agente.""" + + def test_create_agent_with_defaults(self): + """Test crear agente con valores por defecto.""" + agent = DummyAgent() + + assert agent.name == "DummyAgent" + assert agent.version == "1.0.0" + assert agent.category == "test" + assert agent.enabled is True + + def test_agent_name_required(self): + """Test que el nombre es requerido.""" + with pytest.raises(ValueError, match="name cannot be empty"): + + class BadAgent(BaseAgent): + def __init__(self): + super().__init__(name="") + + def analyze(self, context): + pass + + BadAgent() + + def test_agent_info_dict(self): + """Test que get_info retorna diccionario correcto.""" + agent = DummyAgent() + info = agent.get_info() + + assert isinstance(info, dict) + assert info["name"] == "DummyAgent" + assert info["version"] == "1.0.0" + assert info["category"] == "test" + assert info["enabled"] is True + + +class TestBaseAgentMethods: + """Tests para métodos del agente.""" + + def test_is_enabled_when_enabled(self): + """Test is_enabled cuando está habilitado.""" + agent = DummyAgent() + assert agent.is_enabled() is True + + def test_is_enabled_when_disabled(self): + """Test is_enabled cuando está deshabilitado.""" + agent = DummyAgent() + agent.disable() + assert agent.is_enabled() is False + + def test_enable_agent(self): + """Test habilitar un agente.""" + agent = DummyAgent() + agent.disable() + assert agent.enabled is False + + agent.enable() + assert agent.enabled is True + + def test_disable_agent(self): + """Test deshabilitar un agente.""" + agent = DummyAgent() + assert agent.enabled is True + + agent.disable() + assert agent.enabled is False + + +class TestBaseAgentAnalyze: + """Tests para el método analyze.""" + + def test_analyze_returns_findings(self): + """Test que analyze retorna lista de findings.""" + agent = DummyAgent() + context = AnalysisContext(code_content="print('hello')", filename="test.py") + + findings = agent.analyze(context) + + assert isinstance(findings, list) + assert len(findings) >= 1 + assert findings[0].agent_name == "DummyAgent" + assert findings[0].severity == Severity.INFO + + def test_abstract_method_not_callable(self): + """Test que no se puede instanciar BaseAgent directamente.""" + with pytest.raises(TypeError): + BaseAgent(name="TestAgent") + + +class TestBaseAgentRepr: + """Tests para representación string.""" + + def test_repr_contains_name_and_version(self): + """Test que __repr__ contiene nombre y versión.""" + agent = DummyAgent() + repr_str = repr(agent) + + assert "DummyAgent" in repr_str + assert "1.0.0" in repr_str + assert "test" in repr_str + + def test_str_representation(self): + """Test que __str__ es legible.""" + agent = DummyAgent() + str_repr = str(agent) + + assert "DummyAgent" in str_repr + assert "1.0.0" in str_repr + assert "test" in str_repr + assert "enabled" in str_repr.lower() + + +class TestBaseAgentEvents: + """Tests para emisión de eventos.""" + + def test_emit_agent_started(self): + """Test que _emit_agent_started publica evento.""" + event_bus_mock = Mock() + agent = DummyAgent() + agent.event_bus = event_bus_mock + + context = AnalysisContext(code_content="code", filename="test.py") + + agent._emit_agent_started(context) + + event_bus_mock.publish.assert_called_once() + call_args = event_bus_mock.publish.call_args[0][0] + assert call_args["type"] == "AGENT_STARTED" + assert call_args["agent_name"] == "DummyAgent" + + def test_emit_agent_completed(self): + """Test que _emit_agent_completed publica evento.""" + event_bus_mock = Mock() + agent = DummyAgent() + agent.event_bus = event_bus_mock + + context = AnalysisContext(code_content="code", filename="test.py") + findings = [ + Finding( + severity=Severity.INFO, + issue_type="test", + message="Test finding message", + line_number=1, + agent_name="DummyAgent", + ) + ] + + agent._emit_agent_completed(context, findings) + + event_bus_mock.publish.assert_called_once() + call_args = event_bus_mock.publish.call_args[0][0] + assert call_args["type"] == "AGENT_COMPLETED" + assert call_args["findings_count"] == 1 + + def test_emit_agent_failed(self): + """Test que _emit_agent_failed publica evento.""" + event_bus_mock = Mock() + agent = DummyAgent() + agent.event_bus = event_bus_mock + context = AnalysisContext(code_content="code", filename="test.py") + + error = RuntimeError("boom") + agent._emit_agent_failed(context, error) + + event_bus_mock.publish.assert_called_once() + payload = event_bus_mock.publish.call_args[0][0] + assert payload["type"] == "AGENT_FAILED" + assert "boom" in payload["error"] + + def test_no_events_when_event_bus_none(self): + """Test que no falla si event_bus es None.""" + agent = DummyAgent() + agent.event_bus = None + + context = AnalysisContext(code_content="code", filename="test.py") + + # No debe lanzar excepción + agent._emit_agent_started(context) + agent._emit_agent_completed(context, []) + + +class TestBaseAgentLogging: + """Tests para el logging del agente.""" + + def test_log_helpers_delegate_to_logger(self): + """Test que los helpers de log delegan en el logger.""" + agent = DummyAgent() + agent.logger = Mock() + + agent.log_info("info") + agent.log_warning("warn") + agent.log_error("err") + agent.log_debug("dbg") + + agent.logger.info.assert_called_once_with("[%s] %s", "DummyAgent", "info") + agent.logger.warning.assert_called_once_with("[%s] %s", "DummyAgent", "warn") + agent.logger.error.assert_called_once_with("[%s] %s", "DummyAgent", "err") + agent.logger.debug.assert_called_once_with("[%s] %s", "DummyAgent", "dbg") diff --git a/backend/tests/unit/agents/test_security_agent.py b/backend/tests/unit/agents/test_security_agent.py index e69de29..7324376 100644 --- a/backend/tests/unit/agents/test_security_agent.py +++ b/backend/tests/unit/agents/test_security_agent.py @@ -0,0 +1,412 @@ +""" +Unit tests for SecurityAgent. + +Tests cover all 4 detection modules: +1. Dangerous functions detection +2. SQL injection detection +3. Hardcoded credentials detection +4. Weak cryptography detection +""" + +import pytest + +from src.agents.security_agent import SecurityAgent +from src.schemas.analysis import AnalysisContext +from src.schemas.finding import Severity + + +class TestSecurityAgentInitialization: + """Test SecurityAgent initialization.""" + + def test_agent_initialization(self): + """Test SecurityAgent is created with correct attributes.""" + agent = SecurityAgent() + + assert agent.name == "SecurityAgent" + assert agent.version == "1.0.0" + assert agent.category == "security" + assert agent.is_enabled() is True + + def test_agent_info(self): + """Test get_info returns correct metadata.""" + agent = SecurityAgent() + info = agent.get_info() + + assert info["name"] == "SecurityAgent" + assert info["category"] == "security" + + +class TestDangerousFunctionsDetection: + """Test detection of dangerous functions.""" + + @pytest.fixture + def agent(self): + """Create SecurityAgent instance.""" + return SecurityAgent() + + def test_detect_eval_function(self, agent): + """Test detection of eval() function.""" + code = """ +result = eval(user_input) +print(result) +""" + context = AnalysisContext(code_content=code, filename="test.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + eval_finding = next(f for f in findings if "eval" in f.message.lower()) + assert eval_finding.severity == Severity.CRITICAL + assert eval_finding.issue_type == "dangerous_function" + assert eval_finding.line_number == 2 + assert "ast.literal_eval" in eval_finding.suggestion + assert eval_finding.rule_id == "SEC001_EVAL" + + def test_detect_exec_function(self, agent): + """Test detection of exec() function.""" + code = "exec(malicious_code)" + context = AnalysisContext(code_content=code, filename="test.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + exec_finding = next(f for f in findings if "exec" in f.message.lower()) + assert exec_finding.severity == Severity.CRITICAL + assert exec_finding.issue_type == "dangerous_function" + assert "validate input" in exec_finding.suggestion.lower() + + def test_detect_compile_function(self, agent): + """Test detection of compile() function.""" + code = "compiled = compile(source, 'file', 'exec')" + context = AnalysisContext(code_content=code, filename="test.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + compile_finding = next(f for f in findings if "compile" in f.message.lower()) + assert compile_finding.severity == Severity.CRITICAL + + def test_detect_pickle_loads(self, agent): + """Test detection of pickle.loads().""" + code = """ +import pickle +data = pickle.loads(untrusted_data) +""" + context = AnalysisContext(code_content=code, filename="test.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + pickle_finding = next( + f + for f in findings + if "pickle" in f.message.lower() or "deserialization" in f.issue_type + ) + assert pickle_finding.severity == Severity.HIGH + assert "json.loads" in pickle_finding.suggestion + + def test_no_false_positives_for_safe_functions(self, agent): + """Test that safe functions don't trigger findings.""" + code = """ +def evaluate_math(a, b): + return a + b + +result = evaluate_math(5, 3) +""" + context = AnalysisContext(code_content=code, filename="test.py") + findings = agent.analyze(context) + + # Should have 0 findings for this safe code + assert len(findings) == 0 + + +class TestSQLInjectionDetection: + """Test detection of SQL injection vulnerabilities.""" + + @pytest.fixture + def agent(self): + """Create SecurityAgent instance.""" + return SecurityAgent() + + def test_detect_string_concatenation_sql(self, agent): + """Test detection of SQL injection via string concatenation.""" + code = 'cursor.execute("SELECT * FROM users WHERE id=" + user_id)' + context = AnalysisContext(code_content=code, filename="test.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + sql_finding = next(f for f in findings if f.issue_type == "sql_injection") + assert sql_finding.severity == Severity.HIGH + assert "parameterized" in sql_finding.suggestion.lower() + assert sql_finding.rule_id == "SEC002_SQL_INJECTION" + + def test_detect_fstring_sql_injection(self, agent): + """Test detection of SQL injection via f-strings.""" + code = "query = f\"DELETE FROM logs WHERE date < '{cutoff}'\"\ncursor.execute(query)" + context = AnalysisContext(code_content=code, filename="test.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + sql_finding = next(f for f in findings if f.issue_type == "sql_injection") + assert sql_finding.severity == Severity.HIGH + + def test_detect_percent_formatting_sql(self, agent): + """Test detection of SQL injection via %s formatting.""" + code = "cursor.execute('SELECT * FROM users WHERE name=%s' % username)" + context = AnalysisContext(code_content=code, filename="test.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + sql_finding = next(f for f in findings if f.issue_type == "sql_injection") + assert sql_finding.severity == Severity.HIGH + + def test_no_false_positives_for_safe_queries(self, agent): + """Test that parameterized queries don't trigger findings.""" + code = """ +cursor.execute('SELECT * FROM users WHERE id=?', (user_id,)) +cursor.execute('INSERT INTO logs VALUES (?, ?)', (timestamp, message)) +""" + context = AnalysisContext(code_content=code, filename="test.py") + findings = agent.analyze(context) + + # Should have 0 SQL injection findings + sql_findings = [f for f in findings if f.issue_type == "sql_injection"] + assert len(sql_findings) == 0 + + +class TestHardcodedCredentialsDetection: + """Test detection of hardcoded credentials.""" + + @pytest.fixture + def agent(self): + """Create SecurityAgent instance.""" + return SecurityAgent() + + def test_detect_hardcoded_password(self, agent): + """Test detection of hardcoded password.""" + code = 'password = "MySecretPass123"' + context = AnalysisContext(code_content=code, filename="config.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + pwd_finding = next( + f + for f in findings + if f.issue_type == "hardcoded_credentials" and "password" in f.message.lower() + ) + assert pwd_finding.severity == Severity.CRITICAL + assert "environment variable" in pwd_finding.suggestion.lower() + assert "SEC003_PASSWORD" in pwd_finding.rule_id + + def test_detect_hardcoded_api_key(self, agent): + """Test detection of hardcoded API key.""" + code = 'api_key = "sk_live_abc123xyz789"' + context = AnalysisContext(code_content=code, filename="config.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + api_finding = next( + f + for f in findings + if f.issue_type == "hardcoded_credentials" and "api" in f.message.lower() + ) + assert api_finding.severity == Severity.CRITICAL + + def test_detect_hardcoded_token(self, agent): + """Test detection of hardcoded token.""" + code = 'auth_token = "ghp_abc123xyz789012345"' + context = AnalysisContext(code_content=code, filename="auth.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + token_finding = next( + f + for f in findings + if f.issue_type == "hardcoded_credentials" and "token" in f.message.lower() + ) + assert token_finding.severity == Severity.HIGH + + def test_ignore_placeholders(self, agent): + """Test that placeholders are not flagged as credentials.""" + code = """ +password = "YOUR_PASSWORD_HERE" +api_key = "REPLACE_WITH_YOUR_API_KEY" +token = "TODO: Add token" +secret = "example_secret" +""" + context = AnalysisContext(code_content=code, filename="config.py") + findings = agent.analyze(context) + + # Should have 0 findings for placeholders + cred_findings = [f for f in findings if f.issue_type == "hardcoded_credentials"] + assert len(cred_findings) == 0 + + def test_ignore_short_values(self, agent): + """Test that very short values are not flagged.""" + code = 'password = "abc"' + context = AnalysisContext(code_content=code, filename="test.py") + findings = agent.analyze(context) + + # Should not flag very short passwords + cred_findings = [f for f in findings if f.issue_type == "hardcoded_credentials"] + assert len(cred_findings) == 0 + + +class TestWeakCryptographyDetection: + """Test detection of weak cryptographic algorithms.""" + + @pytest.fixture + def agent(self): + """Create SecurityAgent instance.""" + return SecurityAgent() + + def test_detect_md5_usage(self, agent): + """Test detection of MD5 hash algorithm.""" + code = """ +import hashlib +hash_value = hashlib.md5(data).hexdigest() +""" + context = AnalysisContext(code_content=code, filename="crypto.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + md5_finding = next( + f + for f in findings + if f.issue_type == "weak_cryptography" and "md5" in f.message.lower() + ) + assert md5_finding.severity == Severity.MEDIUM + assert "SHA-256" in md5_finding.suggestion + assert md5_finding.rule_id == "SEC004_MD5" + + def test_detect_sha1_usage(self, agent): + """Test detection of SHA1 hash algorithm.""" + code = """ + import hashlib + digest = hashlib.sha1(message.encode()).digest() + """ + context = AnalysisContext(code_content=code, filename="hasher.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + sha1_finding = next( + f + for f in findings + if f.issue_type == "weak_cryptography" and "sha1" in f.message.lower() + ) + assert sha1_finding.severity == Severity.MEDIUM + + def test_safe_sha256_no_findings(self, agent): + """Test that SHA-256 doesn't trigger findings.""" + code = """ + import hashlib + secure_hash = hashlib.sha256(data).hexdigest() + """ + context = AnalysisContext(code_content=code, filename="secure.py") + findings = agent.analyze(context) + + # Should have 0 weak crypto findings for SHA-256 + crypto_findings = [f for f in findings if f.issue_type == "weak_cryptography"] + assert len(crypto_findings) == 0 + + +class TestComplexScenarios: + """Test complex scenarios with multiple vulnerabilities.""" + + @pytest.fixture + def agent(self): + """Create SecurityAgent instance.""" + return SecurityAgent() + + def test_multiple_vulnerabilities_in_one_file(self, agent): + """Test detection of multiple vulnerability types.""" + code = """ + import hashlib + import pickle + + # Hardcoded credential + password = "MySecretPassword123" + api_key = "sk_live_abc123xyz" + + # Dangerous function + def execute_command(user_input): + result = eval(user_input) + return result + + # SQL injection + def query_user(user_id): + query = f"SELECT * FROM users WHERE id = {user_id}" + cursor.execute(query) + return cursor.fetchone() + + # Weak crypto + def hash_password(pwd): + return hashlib.md5(pwd.encode()).hexdigest() + + # Unsafe deserialization + def load_data(data): + return pickle.loads(data) + """ + context = AnalysisContext(code_content=code, filename="vulnerable.py") + findings = agent.analyze(context) + + # Should detect at least 6 vulnerabilities + assert len(findings) >= 6 + + # Verify each type is detected + issue_types = {f.issue_type for f in findings} + assert "hardcoded_credentials" in issue_types + assert "dangerous_function" in issue_types + assert "sql_injection" in issue_types + assert "weak_cryptography" in issue_types + + # Verify CRITICAL findings are first (sorted by severity) + critical_findings = [f for f in findings if f.is_critical] + assert len(critical_findings) >= 2 + # First findings should be CRITICAL + assert findings[0].severity == Severity.CRITICAL + + def test_syntax_error_handling(self, agent): + """Test that syntax errors are handled gracefully.""" + code = """ + def incomplete_function( + # Missing closing parenthesis and body + """ + context = AnalysisContext(code_content=code, filename="broken.py") + + # Should not raise exception, but log error + findings = agent.analyze(context) + + # May have some findings from regex-based modules + # Should not crash + assert isinstance(findings, list) + + def test_empty_code(self, agent): + """Test analysis of minimal valid code.""" + code = "# Just a comment\npass" + context = AnalysisContext(code_content=code, filename="minimal.py") + findings = agent.analyze(context) + + assert len(findings) == 0 + + def test_findings_sorted_by_severity(self, agent): + """Test that findings are sorted by severity.""" + code = """ + # MEDIUM severity issue + import hashlib + hash1 = hashlib.md5(data).hexdigest() + + # CRITICAL severity issue + password = "SuperSecret123" + + # HIGH severity issue + query = f"DELETE FROM users WHERE id={user_id}" + cursor.execute(query) + + # CRITICAL severity issue + result = eval(user_input) + """ + context = AnalysisContext(code_content=code, filename="mixed.py") + findings = agent.analyze(context) + + assert len(findings) >= 4 + + # First findings should be CRITICAL + for i in range(min(2, len(findings))): + assert findings[i].severity in [Severity.CRITICAL, Severity.HIGH] diff --git a/backend/tests/unit/application/test_analysis_service.py b/backend/tests/unit/application/test_analysis_service.py index e69de29..1bd0820 100644 --- a/backend/tests/unit/application/test_analysis_service.py +++ b/backend/tests/unit/application/test_analysis_service.py @@ -0,0 +1,176 @@ +"""Tests adicionales para AnalysisService.""" + +from datetime import datetime +from unittest.mock import AsyncMock, MagicMock, patch +from uuid import uuid4 + +import pytest +from fastapi import HTTPException, UploadFile + +from src.models.enums.review_status import ReviewStatus +from src.schemas.finding import Finding, Severity +from src.services.analysis_service import AnalysisService + + +@pytest.fixture +def mock_repo(): + """Repositorio mockeado.""" + repo = MagicMock() + repo.create.return_value = MagicMock( + id=uuid4(), + user_id="user_123", + filename="test.py", + code_content="print('hello')", + quality_score=100, + status=ReviewStatus.COMPLETED, + total_findings=0, + created_at=datetime.utcnow(), + completed_at=datetime.utcnow(), + ) + return repo + + +@pytest.fixture +def service(mock_repo): + """Instancia de AnalysisService.""" + return AnalysisService(mock_repo) + + +class TestAnalyzeCodeFull: + """Tests completos para analyze_code.""" + + @pytest.mark.asyncio + async def test_analyze_code_success(self, service, mock_repo): + """Verifica flujo completo de análisis exitoso.""" + content = b"import os\n\ndef main():\n pass\n\nif __name__ == '__main__':\n main()\n" + mock_file = AsyncMock(spec=UploadFile) + mock_file.filename = "clean_code.py" + mock_file.read.return_value = content + mock_file.seek = AsyncMock() + + with patch.object( + service, "_validate_file", return_value=(content.decode(), "clean_code.py") + ): + result = await service.analyze_code(mock_file, "user_123") + + assert result is not None + mock_repo.create.assert_called_once() + + @pytest.mark.asyncio + async def test_analyze_code_with_vulnerabilities(self, service, mock_repo): + """Verifica análisis con código vulnerable.""" + vulnerable_code = b"""import os +def unsafe(): + result = eval(user_input) + return result + +password = "secret123" +""" + mock_file = AsyncMock(spec=UploadFile) + mock_file.filename = "vulnerable.py" + mock_file.read.return_value = vulnerable_code + mock_file.seek = AsyncMock() + + # Mock para que devuelva el código validado + with patch.object( + service, "_validate_file", return_value=(vulnerable_code.decode(), "vulnerable.py") + ): + result = await service.analyze_code(mock_file, "user_456") + + assert result is not None + # Verificar que se llamó create con hallazgos + call_args = mock_repo.create.call_args[0][0] + assert call_args.total_findings >= 0 + + @pytest.mark.asyncio + async def test_analyze_code_agent_exception_handled(self, service, mock_repo): + """Verifica que excepciones del agente se manejan gracefully.""" + content = b"import os\n\ndef main():\n pass\n\nmain()\n" + mock_file = AsyncMock(spec=UploadFile) + mock_file.filename = "test.py" + mock_file.read.return_value = content + mock_file.seek = AsyncMock() + + with patch.object(service, "_validate_file", return_value=(content.decode(), "test.py")): + with patch( + "src.services.analysis_service.SecurityAgent.analyze", + side_effect=Exception("Agent crashed"), + ): + result = await service.analyze_code(mock_file, "user_789") + + # Debe completar aunque el agente falle + assert result is not None + + +class TestValidateFileEdgeCases: + """Tests para casos edge de validación.""" + + @pytest.mark.asyncio + async def test_validate_file_missing_filename(self, service): + """Verifica error cuando filename es None.""" + mock_file = AsyncMock(spec=UploadFile) + mock_file.filename = None + + with pytest.raises(HTTPException) as exc: + await service._validate_file(mock_file) + + assert exc.value.status_code == 422 + + @pytest.mark.asyncio + async def test_validate_file_unicode_decode_error(self, service): + """Verifica error con contenido no UTF-8.""" + mock_file = AsyncMock(spec=UploadFile) + mock_file.filename = "binary.py" + mock_file.read.return_value = b"\x80\x81\x82\x83\x84" + + with pytest.raises(HTTPException) as exc: + await service._validate_file(mock_file) + + assert exc.value.status_code == 422 + assert "UTF-8" in exc.value.detail + + +class TestCalculateQualityScoreEdgeCases: + """Tests adicionales para cálculo de score.""" + + def test_score_with_info_findings(self, service): + """INFO findings no penalizan.""" + findings = [ + Finding( + severity=Severity.INFO, + issue_type="info", + message="Informational note", + line_number=1, + agent_name="test", + ) + ] + score = service._calculate_quality_score(findings) + assert score == 100 + + def test_score_with_low_findings(self, service): + """LOW findings penalizan 1 punto.""" + findings = [ + Finding( + severity=Severity.LOW, + issue_type="minor", + message="Minor issue here", + line_number=1, + agent_name="test", + ) + ] + score = service._calculate_quality_score(findings) + assert score == 99 + + def test_score_with_medium_findings(self, service): + """MEDIUM findings penalizan 2 puntos.""" + findings = [ + Finding( + severity=Severity.MEDIUM, + issue_type="medium", + message="Medium severity issue", + line_number=1, + agent_name="test", + ) + ] + score = service._calculate_quality_score(findings) + assert score == 98 diff --git a/backend/tests/unit/domain/test_event_bus.py b/backend/tests/unit/domain/test_event_bus.py index e69de29..21e15fc 100644 --- a/backend/tests/unit/domain/test_event_bus.py +++ b/backend/tests/unit/domain/test_event_bus.py @@ -0,0 +1,95 @@ +"""Tests para EventBus.""" + +import pytest + +from src.core.events.event_bus import EventBus +from src.core.events.observers import EventObserver + + +class MockObserver(EventObserver): + """Observer mock para testing.""" + + def __init__(self): + self.received_events = [] + + def on_event(self, event_type: str, data: dict) -> None: + self.received_events.append((event_type, data)) + + +class TestEventBus: + """Tests para EventBus.""" + + @pytest.fixture + def event_bus(self): + """Crea instancia de EventBus y limpia estado.""" + bus = EventBus() + bus.clear() + return bus + + def test_subscribe_and_publish(self, event_bus): + """Verifica que los observers reciben eventos.""" + observer = MockObserver() + + event_bus.subscribe(observer) + event_bus.publish("analysis_started", {"id": "123"}) + + assert len(observer.received_events) == 1 + assert observer.received_events[0][0] == "analysis_started" + assert observer.received_events[0][1]["id"] == "123" + + def test_multiple_subscribers(self, event_bus): + """Verifica que múltiples observers reciben el mismo evento.""" + observer1 = MockObserver() + observer2 = MockObserver() + + event_bus.subscribe(observer1) + event_bus.subscribe(observer2) + event_bus.publish("analysis_started", {"test": True}) + + assert len(observer1.received_events) == 1 + assert len(observer2.received_events) == 1 + + def test_unsubscribe(self, event_bus): + """Verifica que unsubscribe funciona.""" + observer = MockObserver() + + event_bus.subscribe(observer) + event_bus.unsubscribe(observer) + event_bus.publish("analysis_started", {"id": "456"}) + + assert len(observer.received_events) == 0 + + def test_publish_without_subscribers(self, event_bus): + """Publicar sin suscriptores no debe fallar.""" + # No debe lanzar excepción + event_bus.publish("analysis_completed", {"id": "789"}) + + def test_clear_all_subscribers(self, event_bus): + """Verifica que clear elimina todos los suscriptores.""" + observer = MockObserver() + + event_bus.subscribe(observer) + event_bus.clear() + + event_bus.publish("analysis_started", {}) + event_bus.publish("analysis_completed", {}) + + assert len(observer.received_events) == 0 + + def test_handler_exception_does_not_break_others(self, event_bus): + """Un observer que falla no debe afectar a otros.""" + + class FailingObserver(EventObserver): + def on_event(self, event_type: str, data: dict) -> None: + raise ValueError("Observer error") + + failing_observer = FailingObserver() + working_observer = MockObserver() + + event_bus.subscribe(failing_observer) + event_bus.subscribe(working_observer) + + # No debe lanzar excepción + event_bus.publish("analysis_started", {"id": "test"}) + + assert len(working_observer.received_events) == 1 diff --git a/backend/src/repositories/code_review_repo.py b/backend/tests/unit/middleware/__init__.py similarity index 100% rename from backend/src/repositories/code_review_repo.py rename to backend/tests/unit/middleware/__init__.py diff --git a/backend/tests/unit/middleware/test_auth.py b/backend/tests/unit/middleware/test_auth.py new file mode 100644 index 0000000..e75240f --- /dev/null +++ b/backend/tests/unit/middleware/test_auth.py @@ -0,0 +1,50 @@ +"""Tests para la dependencia de autenticación.""" + +import os +from unittest.mock import patch + +import pytest +from fastapi import HTTPException + +from src.core.dependencies.auth import get_current_user +from src.schemas.user import Role, User + + +class TestGetCurrentUser: + """Tests para get_current_user.""" + + @pytest.mark.asyncio + @patch.dict(os.environ, {"ENVIRONMENT": "production"}) + async def test_production_requires_valid_token(self): + """En producción, un token inválido debe lanzar 401.""" + with pytest.raises(HTTPException) as exc: + await get_current_user(token="invalid-token") + + assert exc.value.status_code == 401 + + @pytest.mark.asyncio + @patch.dict(os.environ, {"ENVIRONMENT": "production"}) + async def test_production_missing_token_raises_401(self): + """En producción, sin token debe lanzar 401.""" + with pytest.raises(HTTPException) as exc: + await get_current_user(token="") + + assert exc.value.status_code == 401 + + @pytest.mark.asyncio + @patch.dict(os.environ, {"ENVIRONMENT": "development"}) + async def test_development_returns_mock_user(self): + """En desarrollo, retorna usuario mock.""" + user = await get_current_user(token="any-token") + + assert isinstance(user, User) + assert user.id == "user_123" + assert user.role == Role.DEVELOPER + + @pytest.mark.asyncio + @patch.dict(os.environ, {"ENVIRONMENT": "development"}) + async def test_development_accepts_empty_token(self): + """En desarrollo, acepta token vacío.""" + user = await get_current_user(token="") + + assert isinstance(user, User) diff --git a/backend/tests/unit/repositories/test_code_review_repo.py b/backend/tests/unit/repositories/test_code_review_repo.py index e69de29..9dc7031 100644 --- a/backend/tests/unit/repositories/test_code_review_repo.py +++ b/backend/tests/unit/repositories/test_code_review_repo.py @@ -0,0 +1,132 @@ +from datetime import datetime +from unittest.mock import MagicMock +from uuid import uuid4 + +import pytest +from sqlalchemy.exc import SQLAlchemyError + +from src.models.code_review import CodeReviewEntity +from src.models.enums.review_status import ReviewStatus +from src.repositories.code_review_repository import CodeReviewRepository +from src.schemas.analysis import CodeReview +from src.utils.encryption.aes_encryptor import decrypt_aes256, encrypt_aes256 + + +def test_encrypt_decrypt_cycle(): + """Verifica que lo que se encripta se pueda desencriptar correctamente.""" + original = "Secret Code 123" + encrypted = encrypt_aes256(original) + decrypted = decrypt_aes256(encrypted) + + assert original == decrypted + assert encrypted != original + assert isinstance(encrypted, bytes) + + +def test_encrypt_empty_raises_error(): + """Verifica que encriptar vacío lance error.""" + with pytest.raises(ValueError): + encrypt_aes256("") + + +def test_decrypt_empty_returns_empty(): + """Verifica que desencriptar bytes vacíos retorne string vacío.""" + assert decrypt_aes256(b"") == "" + assert decrypt_aes256(None) == "" + + +@pytest.fixture +def mock_session(): + return MagicMock() + + +@pytest.fixture +def repo(mock_session): + return CodeReviewRepository(mock_session) + + +@pytest.fixture +def sample_review(): + return CodeReview( + id=uuid4(), + user_id="user_123", + filename="test.py", + code_content="print('Hello')", + quality_score=100, + status=ReviewStatus.PENDING, + total_findings=0, + created_at=datetime.utcnow(), + ) + + +def test_create_success(repo, mock_session, sample_review): + """Verifica creación exitosa y encriptación.""" + result = repo.create(sample_review) + + assert result == sample_review + mock_session.add.assert_called_once() + mock_session.commit.assert_called_once() + + # Verificar que se guardó encriptado + args, _ = mock_session.add.call_args + entity = args[0] + assert entity.code_content != "print('Hello')" + assert isinstance(entity.code_content, bytes) + + +def test_create_db_error(repo, mock_session, sample_review): + """Verifica manejo de errores de DB al crear.""" + mock_session.commit.side_effect = SQLAlchemyError("DB Error") + + with pytest.raises(SQLAlchemyError): + repo.create(sample_review) + + mock_session.rollback.assert_called_once() + + +def test_find_by_id_success(repo, mock_session): + """Verifica búsqueda exitosa y desencriptación.""" + review_id = uuid4() + encrypted_content = encrypt_aes256("print('Found')") + + mock_entity = CodeReviewEntity( + id=review_id, + user_id="user_1", + filename="found.py", + code_content=encrypted_content, + quality_score=90, + status=ReviewStatus.COMPLETED, + total_findings=2, + created_at=datetime.utcnow(), + ) + mock_session.get.return_value = mock_entity + + result = repo.find_by_id(review_id) + + assert result is not None + assert result.id == review_id + assert result.code_content == "print('Found')" # Desencriptado + assert result.status == ReviewStatus.COMPLETED + + +def test_find_by_id_not_found(repo, mock_session): + """Verifica retorno None si no existe.""" + mock_session.get.return_value = None + result = repo.find_by_id(uuid4()) + assert result is None + + +def test_find_by_id_decryption_error(repo, mock_session): + """Verifica manejo de error al desencriptar/recuperar.""" + review_id = uuid4() + mock_entity = CodeReviewEntity( + id=review_id, code_content=b"invalid_bytes" # Esto fallará al desencriptar con Fernet + ) + mock_session.get.return_value = mock_entity + + # Mockear decrypt para forzar error genérico si Fernet no falla con basura + # O confiar en que Fernet falle. Fernet lanza InvalidToken. + # Pero el repo captura Exception. + + with pytest.raises(Exception): + repo.find_by_id(review_id) diff --git a/backend/tests/unit/services/test_analysis_service.py b/backend/tests/unit/services/test_analysis_service.py index e69de29..3395131 100644 --- a/backend/tests/unit/services/test_analysis_service.py +++ b/backend/tests/unit/services/test_analysis_service.py @@ -0,0 +1,119 @@ +from unittest.mock import AsyncMock, MagicMock + +import pytest +from fastapi import HTTPException, UploadFile + +from src.schemas.finding import Finding, Severity +from src.services.analysis_service import AnalysisService + + +# Fixtures +@pytest.fixture +def mock_repo(): + return MagicMock() + + +@pytest.fixture +def service(mock_repo): + return AnalysisService(mock_repo) + + +# Tests de Validación de Archivo (RN4) + + +@pytest.mark.asyncio +async def test_validate_file_success(service): + """Verifica que un archivo válido pase la validación.""" + content = b"import os\n" * 6 # > 5 líneas + mock_file = AsyncMock(spec=UploadFile) + mock_file.filename = "valid.py" + mock_file.read.return_value = content + + result = await service._validate_file(mock_file) + # _validate_file returns tuple (content, filename) + assert result == (content.decode("utf-8"), "valid.py") + + +@pytest.mark.asyncio +async def test_validate_file_extension_error(service): + """Verifica error 422 con extensión inválida.""" + mock_file = AsyncMock(spec=UploadFile) + mock_file.filename = "script.txt" + + with pytest.raises(HTTPException) as exc: + await service._validate_file(mock_file) + assert exc.value.status_code == 422 + assert "Solo se aceptan archivos .py" in exc.value.detail + + +@pytest.mark.asyncio +async def test_validate_file_size_error(service): + """Verifica error 413 con archivo muy grande (>10MB).""" + mock_file = AsyncMock(spec=UploadFile) + mock_file.filename = "big.py" + # Simular 11MB + mock_file.read.return_value = b"a" * (11 * 1024 * 1024) + + with pytest.raises(HTTPException) as exc: + await service._validate_file(mock_file) + assert exc.value.status_code == 413 + + +@pytest.mark.asyncio +async def test_validate_file_empty_error(service): + """Verifica error 422 con archivo con pocas líneas (<5).""" + mock_file = AsyncMock(spec=UploadFile) + mock_file.filename = "empty.py" + mock_file.read.return_value = b"print('hi')" + + with pytest.raises(HTTPException) as exc: + await service._validate_file(mock_file) + assert exc.value.status_code == 422 + + +# Tests de Cálculo de Score (RN8) + + +def test_calculate_quality_score_mixed(service): + """Prueba cálculo de score con hallazgos mixtos.""" + findings = [ + Finding( + severity=Severity.CRITICAL, + issue_type="security", + message="Critical vulnerability found", # > 5 chars + line_number=1, + agent_name="test", + ), # -10 + Finding( + severity=Severity.HIGH, + issue_type="security", + message="High severity issue found", # > 5 chars + line_number=1, + agent_name="test", + ), # -5 + ] + score = service._calculate_quality_score(findings) + assert score == 85 # 100 - 15 + + +def test_calculate_quality_score_perfect(service): + """Prueba score 100 sin hallazgos.""" + score = service._calculate_quality_score([]) + assert score == 100 + + +def test_calculate_quality_score_zero_floor(service): + """Prueba que el score no baje de 0.""" + # 11 críticos = -110 puntos + findings = [ + Finding( + severity=Severity.CRITICAL, + issue_type="security", + message="Critical vulnerability found", # > 5 chars + line_number=1, + agent_name="test", + ) + ] * 11 + + score = service._calculate_quality_score(findings) + assert score == 0 diff --git a/backend/tests/unit/test_analysis_schemas.py b/backend/tests/unit/test_analysis_schemas.py new file mode 100644 index 0000000..ecddd72 --- /dev/null +++ b/backend/tests/unit/test_analysis_schemas.py @@ -0,0 +1,289 @@ +""" +Unit tests for Analysis Schemas +Tests para los esquemas de análisis +""" + +from datetime import datetime + +import pytest +from pydantic import ValidationError + +from src.schemas.analysis import AnalysisContext, AnalysisRequest, AnalysisResponse +from src.schemas.finding import Finding, Severity + + +class TestAnalysisContext: + """Tests para AnalysisContext schema.""" + + def test_create_valid_context(self): + """Test crear contexto válido.""" + context = AnalysisContext( + code_content="def hello():\n print('Hello')", filename="test.py" + ) + + assert context.code_content == "def hello():\n print('Hello')" + assert context.filename == "test.py" + assert context.language == "python" + assert context.analysis_id is not None + assert isinstance(context.created_at, datetime) + + def test_empty_code_raises_error(self): + """Test que código vacío lanza error.""" + with pytest.raises(ValidationError) as exc_info: + AnalysisContext(code_content="", filename="test.py") + + assert "code_content" in str(exc_info.value).lower() + + def test_whitespace_only_code_raises_error(self): + """Test que código solo con espacios lanza error.""" + with pytest.raises(ValidationError): + AnalysisContext(code_content=" \n ", filename="test.py") + + def test_invalid_filename_extension(self): + """Test que extensión no .py lanza error.""" + with pytest.raises(ValidationError) as exc_info: + AnalysisContext(code_content="code", filename="test.txt") + + assert "Python files" in str(exc_info.value) + + def test_short_filename_raises_error(self): + """Test que filename muy corto lanza error.""" + with pytest.raises(ValidationError): + AnalysisContext(code_content="code", filename=".p") + + def test_line_count_property(self): + """Test propiedad line_count.""" + context = AnalysisContext(code_content="line1\nline2\nline3", filename="test.py") + assert context.line_count == 3 + + def test_line_count_single_line(self): + """Test line_count con una línea.""" + context = AnalysisContext(code_content="single line", filename="test.py") + assert context.line_count == 1 + + def test_char_count_property(self): + """Test propiedad char_count.""" + context = AnalysisContext(code_content="hello world", filename="test.py") + assert context.char_count == 11 + + def test_add_metadata(self): + """Test agregar metadata.""" + context = AnalysisContext(code_content="code", filename="test.py") + + context.add_metadata("user_id", "123") + context.add_metadata("project", "CodeGuard") + + assert context.metadata["user_id"] == "123" + assert context.metadata["project"] == "CodeGuard" + + def test_metadata_persists_after_mutation(self): + """Test que metadata persiste después de mutación.""" + context = AnalysisContext(code_content="code", filename="test.py") + + context.add_metadata("key1", "value1") + assert "key1" in context.metadata + + context.add_metadata("key2", "value2") + assert "key1" in context.metadata # key1 todavía existe + + +class TestAnalysisRequest: + """Tests para AnalysisRequest schema.""" + + def test_create_valid_request(self): + """Test crear request válido.""" + request = AnalysisRequest(filename="app.py", code_content="def main():\n pass") + + assert request.filename == "app.py" + assert request.code_content == "def main():\n pass" + assert request.agents_config is None + + def test_request_with_agents_config(self): + """Test request con configuración de agentes.""" + config = {"security": True, "quality": True, "performance": False, "style": True} + request = AnalysisRequest(filename="app.py", code_content="code", agents_config=config) + + assert request.agents_config == config + assert request.agents_config["security"] is True + assert request.agents_config["performance"] is False + + +class TestAnalysisResponse: + """Tests para AnalysisResponse schema.""" + + def test_create_response(self): + """Test crear response.""" + from uuid import uuid4 + + analysis_id = uuid4() + response = AnalysisResponse( + analysis_id=analysis_id, + filename="app.py", + status="pending", + quality_score=85, + total_findings=3, + created_at=datetime.utcnow(), + ) + + assert response.analysis_id == analysis_id + assert response.filename == "app.py" + assert response.status == "pending" + assert response.quality_score == 85 + assert response.total_findings == 3 + + +class TestFinding: + """Tests para Finding schema.""" + + def test_create_valid_finding(self): + """Test crear finding válido.""" + finding = Finding( + severity=Severity.CRITICAL, + issue_type="dangerous_function", + message="Use of eval detected", + line_number=10, + agent_name="SecurityAgent", + ) + + assert finding.severity == Severity.CRITICAL + assert finding.issue_type == "dangerous_function" + assert finding.line_number == 10 + assert isinstance(finding.detected_at, datetime) + + def test_invalid_line_number_zero(self): + """Test que line_number < 1 lanza error.""" + with pytest.raises(ValidationError): + Finding( + severity=Severity.CRITICAL, + issue_type="test", + message="Test message", + line_number=0, + agent_name="TestAgent", + ) + + def test_invalid_line_number_negative(self): + """Test que line_number negativo lanza error.""" + with pytest.raises(ValidationError): + Finding( + severity=Severity.CRITICAL, + issue_type="test", + message="Test message", + line_number=-1, + agent_name="TestAgent", + ) + + def test_is_critical_property(self): + """Test propiedad is_critical.""" + critical = Finding( + severity=Severity.CRITICAL, + issue_type="test", + message="Test message", + line_number=1, + agent_name="TestAgent", + ) + + assert critical.is_critical is True + + non_critical = Finding( + severity=Severity.INFO, + issue_type="test", + message="Test message", + line_number=1, + agent_name="TestAgent", + ) + + assert non_critical.is_critical is False + + def test_is_high_or_critical_property(self): + """Test propiedad is_high_or_critical.""" + critical = Finding( + severity=Severity.CRITICAL, + issue_type="test", + message="Test message", + line_number=1, + agent_name="TestAgent", + ) + assert critical.is_high_or_critical is True + + high = Finding( + severity=Severity.HIGH, + issue_type="test", + message="Test message", + line_number=1, + agent_name="TestAgent", + ) + assert high.is_high_or_critical is True + + medium = Finding( + severity=Severity.MEDIUM, + issue_type="test", + message="Test message", + line_number=1, + agent_name="TestAgent", + ) + assert medium.is_high_or_critical is False + + def test_is_actionable_property(self): + """Test propiedad is_actionable.""" + critical = Finding( + severity=Severity.CRITICAL, + issue_type="test", + message="Test message", + line_number=1, + agent_name="TestAgent", + ) + assert critical.is_actionable is True + + info = Finding( + severity=Severity.INFO, + issue_type="test", + message="Test message", + line_number=1, + agent_name="TestAgent", + ) + assert info.is_actionable is False + + +class TestAnalysisContextHelpers: + def test_code_is_dedented_and_ast_cached(self): + context = AnalysisContext( + code_content=" def foo():\n return 1", + filename="foo.py", + ) + assert context.code_content.startswith("def foo") + first_ast = context.get_ast() + assert context.get_ast() is first_ast + + def test_get_ast_invalid_code_raises(self): + context = AnalysisContext(code_content="def broken(", filename="bad.py") + with pytest.raises(SyntaxError): + context.get_ast() + + def test_get_lines_and_snippets(self): + context = AnalysisContext(code_content="a\nb\nc", filename="file.py") + assert context.get_line(2) == "b" + assert context.get_line(99) is None + assert context.get_code_snippet(1, 2) == "a\nb" + + def test_finding_from_and_to_dict_without_detected_at(self): + data = { + "severity": "critical", + "issue_type": "dangerous_function", + "message": "Use of eval() detected", + "line_number": 5, + "agent_name": "SecurityAgent", + } + finding = Finding.from_dict(data) + serialized = finding.to_dict() + assert serialized["severity"] == "critical" + assert "detected_at" in serialized + + def test_calculate_penalty_map(self): + finding = Finding( + severity=Severity.HIGH, + issue_type="test", + message="Test issue", + line_number=1, + agent_name="TestAgent", + ) + assert finding.calculate_penalty() == 5 diff --git a/backend/tests/unit/test_main.py b/backend/tests/unit/test_main.py new file mode 100644 index 0000000..fbab9cf --- /dev/null +++ b/backend/tests/unit/test_main.py @@ -0,0 +1,35 @@ +""" +Tests for main FastAPI application +""" + +from fastapi.testclient import TestClient + +from src.main import app + +client = TestClient(app) + + +def test_health_endpoint(): + """Test health check endpoint returns 200""" + response = client.get("/health") + assert response.status_code == 200 + data = response.json() + assert data["status"] == "healthy" + assert data["version"] == "1.0.0" + assert "service" in data + + +def test_root_endpoint(): + """Test root endpoint returns 200""" + response = client.get("/") + assert response.status_code == 200 + data = response.json() + assert "message" in data + assert "docs" in data + assert data["docs"] == "/docs" + + +def test_docs_endpoint_accessible(): + """Test Swagger docs are accessible""" + response = client.get("/docs") + assert response.status_code == 200 diff --git a/backend/tests/unit/vulnerable_test.py b/backend/tests/unit/vulnerable_test.py new file mode 100644 index 0000000..3ae4b84 --- /dev/null +++ b/backend/tests/unit/vulnerable_test.py @@ -0,0 +1,19 @@ +"""Archivo de prueba con vulnerabilidades.""" + +import os +import pickle + + +def unsafe_eval(user_input): + """Uso peligroso de eval.""" + return eval(user_input) + + +def unsafe_query(user_id): + """SQL injection vulnerability.""" + query = "SELECT * FROM users WHERE id = " + user_id + return query + + +PASSWORD = "super_secret_password_123" +API_KEY = "sk-1234567890abcdef" diff --git a/docs/ci-cd-setup.md b/docs/ci-cd-setup.md new file mode 100644 index 0000000..dd65d93 --- /dev/null +++ b/docs/ci-cd-setup.md @@ -0,0 +1,694 @@ +# 🔧 Documentación Técnica del Pipeline CI/CD - CodeGuard AI + +Esta documentación detalla la configuración completa del pipeline de **Integración Continua / Despliegue Continuo (CI/CD)** implementado con **GitHub Actions** para CodeGuard AI. + +--- + +## 📋 Tabla de Contenidos + +- [Visión General](#-visión-general-del-pipeline) +- [Workflows Implementados](#-workflows-implementados) +- [Estructura de Directorios](#-estructura-de-archivos) +- [Protección de Ramas](#-protección-de-ramas) +- [Secretos y Variables](#-secretos-y-variables-de-entorno) +- [Configuración Detallada](#-configuración-detallada-de-workflows) +- [Badges de Estado](#-badges-de-estado) +- [Monitoreo](#-monitoreo-y-logging) +- [Troubleshooting](#-troubleshooting) +- [Mejores Prácticas](#-mejores-prácticas) + +--- + +## 🎯 Visión General del Pipeline + +El pipeline CI/CD de CodeGuard AI automatiza la **validación, testing y construcción** del código para garantizar que todos los cambios que llegan a las ramas `main` y `develop` cumplen con los estándares de calidad establecidos. + +### Objetivos del Pipeline + +1. ✅ **Validación Automática**: Linting, tests, build +2. ✅ **Garantía de Calidad**: Cobertura ≥75%, pylint ≥8.5/10 +3. ✅ **Prevención de Regresiones**: Tests obligatorios +4. ✅ **Feedback Inmediato**: En PRs y commits +5. ✅ **Deployment Seguro**: Build validado + +### Arquitectura del Pipeline + +``` +┌───────────────────────────────────────────────────────────┐ +│ GITHUB ACTIONS WORKFLOW ORCHESTRATION │ +└───────────────────────────────────────────────────────────┘ + ↓ + Trigger: push a rama / pull request + ↓ + ┌────────────────────────┬────────────────────┬──────────────┐ + ↓ ↓ ↓ ↓ +┌──────────────┐ ┌───────────────────┐ ┌─────────────┐ ┌──────────┐ +│ Lint Check │ │ Test & Coverage │ │ Docker Build│ │ Security │ +│ (lint.yml) │ │ (test.yml) │ │ (docker.yml)│ │ Scan │ +└──────────────┘ └───────────────────┘ └─────────────┘ └──────────┘ + ✅/❌ ✅/❌ ✅/❌ ✅/❌ + └────────────────────┬────────────────────┘ + ↓ + ┌──────────────────────┐ + │ Branch Protection │ + │ Status Checks │ + └──────────────────────┘ + ↓ + Merge Allowed? ✅ +``` + +--- + +## 🔄 Workflows Implementados + +### 1️⃣ Workflow: Lint Check (`lint.yml`) + +**Ubicación**: `.github/workflows/lint.yml` + +**Propósito**: Validar que el código cumple con estándares de estilo y calidad. + +**Triggers**: +- Push a ramas: `main`, `develop`, `feature/**`, `bugfix/**`, `hotfix/**` +- Pull requests hacia: `main`, `develop` + +**Herramientas**: +- **Black**: Formateo de código +- **isort**: Ordenamiento de imports +- **Flake8**: Análisis de PEP 8 y errores básicos +- **Pylint**: Análisis comprehensive de código + +**Configuración**: + +```yaml +name: Lint Code + +on: + push: + branches: [main, develop, "feature/**", "bugfix/**", "hotfix/**"] + paths: + - "backend/src/**/*.py" + - "backend/tests/**/*.py" + - ".github/workflows/lint.yml" + pull_request: + branches: [main, develop] + +jobs: + lint: + name: Code Quality Check + runs-on: ubuntu-latest + + steps: + # 1. Checkout código + - uses: actions/checkout@v4 + + # 2. Setup Python 3.11 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: "pip" + + # 3. Instalar dependencias + - name: Install dependencies + run: | + cd backend + python -m pip install --upgrade pip + pip install black isort flake8 pylint + pip install -r requirements.txt + + # 4. Ejecutar Black (formatter) + - name: Run Black + run: | + cd backend + black src/ tests/ --line-length=100 --check + + # 5. Ejecutar isort + - name: Run isort + run: | + cd backend + isort src/ tests/ --profile=black --check-only + + # 6. Ejecutar Flake8 + - name: Run Flake8 + run: | + cd backend + flake8 src/ tests/ --max-line-length=100 --extend-ignore=E203,W503 + + # 7. Ejecutar Pylint + - name: Run Pylint + run: | + cd backend + pylint src/ --rcfile=.pylintrc --fail-under=8.5 || exit 1 + echo "✅ Pylint passed with score ≥8.5/10" + + # 8. Summary + - name: Summary + if: success() + run: echo "✅ All lint checks passed!" +``` + +**Criterios de Éxito**: +- ✅ Black: Sin cambios requeridos (--check) +- ✅ isort: Imports correctamente ordenados +- ✅ Flake8: Sin errores de estilo +- ✅ Pylint: Puntuación ≥ 8.5/10 + +--- + +### 2️⃣ Workflow: Testing & Coverage (`test.yml`) + +**Ubicación**: `.github/workflows/test.yml` + +**Propósito**: Ejecutar tests y validar cobertura de código. + +**Triggers**: +- Push a ramas: `main`, `develop`, `feature/**`, `bugfix/**`, `hotfix/**` +- Pull requests hacia: `main`, `develop` + +**Servicios**: +- PostgreSQL 15 (para tests de integración) +- Redis (cache layer) + +**Configuración**: + +```yaml +name: Tests & Coverage + +on: + push: + branches: [main, develop, "feature/**", "bugfix/**", "hotfix/**"] + paths: + - "backend/src/**/*.py" + - "backend/tests/**/*.py" + - "backend/requirements.txt" + - ".github/workflows/test.yml" + pull_request: + branches: [main, develop] + +jobs: + test: + name: Run Tests & Coverage + runs-on: ubuntu-latest + + strategy: + matrix: + python-version: ["3.11", "3.12"] # Test en múltiples versiones + + services: + postgres: + image: postgres:15-alpine + env: + POSTGRES_USER: codeguard_test + POSTGRES_PASSWORD: test_password + POSTGRES_DB: codeguard_test_db + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 5432:5432 + + redis: + image: redis:7-alpine + options: >- + --health-cmd "redis-cli ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 6379:6379 + + steps: + # 1. Checkout código + - uses: actions/checkout@v4 + + # 2. Setup Python + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + + # 3. Instalar dependencias + - name: Install dependencies + run: | + cd backend + python -m pip install --upgrade pip + pip install pytest pytest-cov pytest-asyncio httpx + pip install -r requirements.txt + + # 4. Ejecutar tests + - name: Run tests with coverage + env: + DATABASE_URL: postgresql://codeguard_test:test_password@localhost:5432/codeguard_test_db + REDIS_URL: redis://localhost:6379/0 + run: | + cd backend + pytest tests/ \ + --cov=src \ + --cov-report=term-missing \ + --cov-report=xml \ + --cov-report=html \ + --cov-fail-under=75 \ + -v + + # 5. Subir cobertura a Codecov + - name: Upload to Codecov + uses: codecov/codecov-action@v4 + with: + file: backend/coverage.xml + flags: unittests + name: codecov-${{ matrix.python-version }} + fail_ci_if_error: false + + # 6. Guardar reporte HTML + - name: Upload coverage report + if: always() + uses: actions/upload-artifact@v4 + with: + name: coverage-report-py${{ matrix.python-version }} + path: backend/htmlcov/ + retention-days: 30 + + # 7. Summary + - name: Summary + if: success() + run: | + echo "✅ Tests passed!" + echo "📊 Coverage: ≥75%" +``` + +**Criterios de Éxito**: +- ✅ Todos los tests pasan +- ✅ Cobertura ≥ 75% +- ✅ Tests en Python 3.11 y 3.12 + +--- + +### 3️⃣ Workflow: Docker Build (`docker.yml`) + +**Ubicación**: `.github/workflows/docker.yml` + +**Propósito**: Validar que la imagen Docker se construye correctamente. + +**Triggers**: +- Push a: `main`, `develop` +- Pull requests hacia: `main`, `develop` + +**Configuración**: + +```yaml +name: Docker Build + +on: + push: + branches: [main, develop] + paths: + - "backend/Dockerfile" + - "backend/docker-compose.yml" + - "backend/requirements.txt" + - "backend/src/**/*.py" + - ".github/workflows/docker.yml" + pull_request: + branches: [main, develop] + +jobs: + build: + name: Build & Validate Docker Image + runs-on: ubuntu-latest + + steps: + # 1. Checkout + - uses: actions/checkout@v4 + + # 2. Setup Docker Buildx (mejor caché) + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + # 3. Build imagen + - name: Build Docker image + uses: docker/build-push-action@v5 + with: + context: backend/ + push: false + tags: codeguard-backend:${{ github.sha }} + cache-from: type=gha + cache-to: type=gha,mode=max + + # 4. Validar docker-compose + - name: Validate docker-compose + run: | + cd backend + docker-compose config > /dev/null + echo "✅ docker-compose.yml is valid" + + # 5. Test imagen (verificar que se puede ejecutar) + - name: Test Docker image + run: | + docker run --rm codeguard-backend:${{ github.sha }} python --version + docker run --rm codeguard-backend:${{ github.sha }} pip list | grep fastapi + echo "✅ Docker image validated" + + # 6. Scan vulnerabilidades (Trivy) + - name: Run Trivy vulnerability scanner + uses: aquasecurity/trivy-action@master + with: + image-ref: codeguard-backend:${{ github.sha }} + format: sarif + output: trivy-results.sarif + exit-code: 0 # No bloquea si hay advertencias + + # 7. Upload Trivy results + - name: Upload Trivy results to GitHub Security + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: trivy-results.sarif + + # 8. Summary + - name: Summary + if: success() + run: | + echo "✅ Docker build successful" + echo "Image: codeguard-backend:${{ github.sha }}" +``` + +**Criterios de Éxito**: +- ✅ Imagen Docker se construye sin errores +- ✅ Contiene Python y FastAPI +- ✅ Sin vulnerabilidades críticas + +--- + +## 📁 Estructura de Archivos + +``` +.github/ +├── workflows/ +│ ├── lint.yml # Linting workflow +│ ├── test.yml # Testing workflow +│ ├── docker.yml # Docker build workflow +│ └── deploy.yml # (Futuro) Deployment +│ +└── PULL_REQUEST_TEMPLATE.md # Template para PRs +``` + +### Archivo: `.github/PULL_REQUEST_TEMPLATE.md` + +```markdown +## 📝 Descripción +Descripción clara de los cambios realizados. + +## 🎯 Historia de Usuario Relacionada +Closes #XX (CGAI-XX) + +## 🧪 Testing +- [x] Tests unitarios agregados +- [x] Tests de integración +- [x] Coverage ≥75% + +## ✅ Checklist +- [x] He seguido las convenciones de commits +- [x] He agregado tests +- [x] Todos los tests pasan +- [x] He actualizado documentación +- [x] Mi código sigue las convenciones + +## 🔗 Related Issues +Closes #XX, #YY +``` + +--- + +## 🛡️ Protección de Ramas + +### Rama `main` (Producción) + +**Ubicación**: Settings → Branches → Add rule + +**Configuración**: + +| Regla | Estado | +|-------|--------| +| **Require pull request reviews** | ✅ Sí (1 aprobación) | +| **Dismiss stale PR approvals** | ✅ Sí | +| **Require status checks** | ✅ Sí: lint, test, docker | +| **Require branches up to date** | ✅ Sí | +| **Resolve conversations** | ✅ Sí | +| **Require signed commits** | ❌ No (opcional) | +| **Linear history** | ❌ No | +| **Allow force pushes** | ❌ No | +| **Allow deletions** | ❌ No | + +### Rama `develop` (Integración) + +**Configuración Similar a `main` pero**: +- Aprobaciones requeridas: 1 (no 2) +- Sin restricción de "quien puede pushear" + +--- + +## 🔐 Secretos y Variables de Entorno + +### Secretos Requeridos (GitHub Settings → Secrets) + +| Secreto | Descripción | Requerido | Usado en | +|---------|-------------|-----------|----------| +| `DATABASE_URL` | PostgreSQL connection string | ✅ Tests | test.yml | +| `REDIS_URL` | Redis connection string | ✅ Tests | test.yml | +| `SUPABASE_URL` | Supabase project URL | ✅ Producción | Aplicación | +| `SUPABASE_KEY` | Supabase API key | ✅ Producción | Aplicación | + +### Variables de Entorno (Públicas) + +```yaml +env: + PYTHON_VERSION: "3.11" + REGISTRY: ghcr.io + IMAGE_NAME: codeguard-backend +``` + +### Configurar Secretos + +```bash +# 1. Ir a GitHub Settings → Secrets and variables → Actions +# 2. Click "New repository secret" +# 3. Name: DATABASE_URL +# 4. Value: postgresql://user:pass@localhost:5432/codeguard_db +# 5. Click "Add secret" +``` + +--- + +## ⚙️ Configuración Detallada de Workflows + +### Caching de Dependencias + +```yaml +- uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: "pip" # Cache automático de pip +``` + +**Ventajas**: +- ✅ Reduce tiempo de instalación de dependencias +- ✅ Acelera workflow ~2-3 minutos + +### Matrix Testing (Múltiples Versiones) + +```yaml +strategy: + matrix: + python-version: ["3.11", "3.12"] + os: [ubuntu-latest, macos-latest] # (Futuro) +``` + +**Ventajas**: +- ✅ Prueba en múltiples versiones +- ✅ Garantiza compatibilidad + +### Condicionales en Steps + +```yaml +- name: Deploy to production + if: github.ref == 'refs/heads/main' && github.event_name == 'push' + run: echo "Deploying..." + +- name: Upload artifacts + if: always() # Siempre, incluso si fallaron pasos anteriores + uses: actions/upload-artifact@v4 +``` + +--- + +## 📊 Badges de Estado + +### Agregar Badges al README + +En `README.md` (raíz del proyecto): + +```markdown +[![Lint](https://github.com/YOUR_ORG/CodeGuard-Unal/actions/workflows/lint.yml/badge.svg?branch=main)](https://github.com/YOUR_ORG/CodeGuard-Unal/actions/workflows/lint.yml) +[![Tests](https://github.com/YOUR_ORG/CodeGuard-Unal/actions/workflows/test.yml/badge.svg?branch=main)](https://github.com/YOUR_ORG/CodeGuard-Unal/actions/workflows/test.yml) +[![Docker](https://github.com/YOUR_ORG/CodeGuard-Unal/actions/workflows/docker.yml/badge.svg?branch=main)](https://github.com/YOUR_ORG/CodeGuard-Unal/actions/workflows/docker.yml) +``` + +### Generar Automáticamente + +```bash +# En GitHub: +# 1. Actions → Seleccionar workflow (ej: Lint Code) +# 2. Click "..." → "Create status badge" +# 3. Seleccionar rama (main) +# 4. Copy markdown +# 5. Pegar en README.md +``` + +--- + +## 📈 Monitoreo y Logging + +### Ver Logs de Workflows + +```bash +# En GitHub: +# 1. Actions → Seleccionar workflow run +# 2. Jobs → Seleccionar job +# 3. Step → Expandir para ver logs detallados +``` + +### Debugging de Workflows + +```yaml +- name: Debug info + run: | + echo "GitHub context:" + echo " ref: ${{ github.ref }}" + echo " sha: ${{ github.sha }}" + echo " event: ${{ github.event_name }}" +``` + +--- + +## 🔧 Troubleshooting + +### ❌ Problema: "lint.yml" falla por formato + +**Síntoma**: +``` +black: error: cannot format backend/src/file.py +``` + +**Solución**: +```bash +cd backend +black src/ --line-length=100 +git add . +git commit -m "style: format code with black" +``` + +### ❌ Problema: Tests fallan solo en CI + +**Causas comunes**: +1. Falta variable de entorno +2. Diferencia de BD (CI usa BD limpia) +3. Race conditions en tests async + +**Soluciones**: +```bash +# Verificar env vars en workflow +# Añadir fixtures para resetear BD +# Usar pytest-asyncio correctamente +pytest tests/ -v --tb=short +``` + +### ❌ Problema: Docker build timeout + +**Solución**: Usar caché: +```yaml +cache-from: type=gha +cache-to: type=gha,mode=max +``` + +### ❌ Problema: Coverage no alcanza 75% + +**Pasos**: +1. Generar reporte: `pytest --cov=src --cov-report=html` +2. Abrir `htmlcov/index.html` +3. Identificar archivos sin cobertura +4. Escribir tests adicionales + +--- + +## 🎯 Mejores Prácticas + +### 1. Commits Pequeños y Frecuentes + +```bash +# ✅ Bien +git commit -m "feat(agents): add eval detection" +git commit -m "test(agents): add eval tests" +git commit -m "docs(readme): update examples" + +# ❌ Evitar +git commit -m "Add features, fix bugs, update docs" +``` + +### 2. Ejecutar Tests Localmente Antes de Push + +```bash +cd backend +pytest tests/ --cov=src --cov-fail-under=75 +pylint src/ --rcfile=.pylintrc --fail-under=8.5 +``` + +### 3. Mantener Workflows Rápidos + +| Métrica | Objetivo | +|---------|----------| +| Lint | < 1 min | +| Tests | < 5 min | +| Docker Build | < 3 min | +| Total | < 10 min | + +**Optimizaciones**: +- ✅ Cache de pip +- ✅ Cache de Docker layers +- ✅ Paralelización de tests + +### 4. Revisar Logs Detallados + +Ante un fallo: +1. Expandir todos los steps +2. Buscar el primer error (🔴 rojo) +3. Copiar comando y ejecutar localmente + +### 5. Documentar Cambios en CI + +```bash +git commit -m "ci(github): add Docker Trivy scanning + +- Scan for CRITICAL and HIGH vulnerabilities +- Upload results to GitHub Security +- Non-blocking (warnings allowed) + +Relates to security hardening" +``` + +--- + +## 📚 Referencias + +- [GitHub Actions Documentation](https://docs.github.com/en/actions) +- [Branch Protection Rules](https://docs.github.com/en/repositories/configuring-branches-and-merges) +- [Workflow Syntax](https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions) +- [Pytest Documentation](https://docs.pytest.org/) +- [Docker Best Practices](https://docs.docker.com/develop/dev-best-practices/) + +--- + +
+

Documentación del Pipeline CI/CD - CodeGuard AI

+

Universidad Nacional de Colombia - 2025

+

Última actualización: 6 de Noviembre de 2025

+