From ef0873f8c899ae6c60708bca3d641f71277b6cb6 Mon Sep 17 00:00:00 2001 From: Yosoyepa Date: Thu, 4 Dec 2025 05:51:56 -0500 Subject: [PATCH 1/3] feat(ai): integrate Vertex AI Gemini with MCP for code analysis - Add GeminiClient for Vertex AI integration with gemini-1.5-flash/pro models - Implement Model Context Protocol (MCP) with OWASP Top 10 security contexts - Create AIService as orchestrator for AI-powered code analysis - Add SecurityContext dataclass for structured prompt engineering - Fix Clerk JWT authentication to support HS256 with Custom JWT Templates - Update auth_service to use standard 'sub' claim from JWT payload - Fix Severity enum to use uppercase values matching database schema - Add comprehensive unit tests for AI components (88%+ coverage) - Configure GCP credentials for Vertex AI authentication BREAKING CHANGE: Severity enum values changed from lowercase to UPPERCASE Closes CGAI-32 --- backend/.gitignore | 1 + backend/requirements.txt | 2 +- backend/src/agents/quality_agent.py | 2 +- backend/src/agents/security_agent.py | 2 +- backend/src/core/config/ai_config.py | 170 +++++++ backend/src/core/config/mcp_config.py | 417 +++++++++++++++++ backend/src/core/config/settings.py | 51 ++- backend/src/core/dependencies/auth.py | 174 ++++++-- backend/src/external/clerk_client.py | 284 ++++++++++-- backend/src/external/gemini_client.py | 310 +++++++++++++ backend/src/external/interfaces/__init__.py | 17 + backend/src/external/interfaces/ai_client.py | 206 +++++++++ backend/src/external/mcp_client.py | 190 ++++++++ backend/src/main.py | 2 + backend/src/routers/findings.py | 278 ++++++++++++ backend/src/schemas/ai_explanation.py | 234 ++++++++++ backend/src/schemas/finding.py | 12 +- backend/src/services/ai_service.py | 418 +++++++++++++++++ backend/src/services/auth_service.py | 14 +- backend/src/services/mcp_context_enricher.py | 198 ++++++++ backend/tests/generate_jwt.py | 2 +- backend/tests/integration/test_auth_router.py | 4 +- .../test_quality_agent_integration.py | 2 +- .../test_security_agent_integration.py | 2 +- backend/tests/test_ai_service.py | 421 ++++++++++++++++++ .../tests/unit/external/test_clerk_client.py | 32 +- backend/tests/unit/middleware/test_auth.py | 4 +- .../tests/unit/services/test_auth_service.py | 6 +- backend/tests/unit/test_analysis_schemas.py | 4 +- 29 files changed, 3349 insertions(+), 110 deletions(-) create mode 100644 backend/src/external/interfaces/__init__.py create mode 100644 backend/src/external/interfaces/ai_client.py create mode 100644 backend/src/routers/findings.py create mode 100644 backend/tests/test_ai_service.py diff --git a/backend/.gitignore b/backend/.gitignore index bc6b469..2393d10 100644 --- a/backend/.gitignore +++ b/backend/.gitignore @@ -59,3 +59,4 @@ Thumbs.db uploads/ exports/ temp/ +/stately-vector-480209-i2-d7466f761460.json \ No newline at end of file diff --git a/backend/requirements.txt b/backend/requirements.txt index 8d03844..e33967f 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -40,7 +40,7 @@ httpx>=0.27.0 # Compatible con Clerk # ===== AI SERVICES (Sprint 3) ===== google-generativeai>=0.3.2 # Gemini API -# google-cloud-aiplatform>=1.40.0 # Vertex AI (opcional) +google-cloud-aiplatform>=1.40.0 # Vertex AI (opcional) # ===== DEVELOPMENT TOOLS ===== (Moved to requirements-dev.txt) # black>=24.0.0 diff --git a/backend/src/agents/quality_agent.py b/backend/src/agents/quality_agent.py index 4bbdbc4..f3a2c05 100644 --- a/backend/src/agents/quality_agent.py +++ b/backend/src/agents/quality_agent.py @@ -91,7 +91,7 @@ def analyze(self, context: AnalysisContext) -> List[Finding]: # Ordenar hallazgos por severidad findings.sort( - key=lambda f: (["critical", "high", "medium", "low", "info"].index(f.severity.value)) + key=lambda f: (["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"].index(f.severity.value)) ) return findings diff --git a/backend/src/agents/security_agent.py b/backend/src/agents/security_agent.py index a644e8d..276806f 100644 --- a/backend/src/agents/security_agent.py +++ b/backend/src/agents/security_agent.py @@ -200,7 +200,7 @@ def analyze(self, context: AnalysisContext) -> List[Finding]: # Ordenar hallazgos por severidad (CRITICAL primero) findings.sort( - key=lambda f: (["critical", "high", "medium", "low", "info"].index(f.severity.value)) + key=lambda f: (["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"].index(f.severity.value)) ) self.log_info( diff --git a/backend/src/core/config/ai_config.py b/backend/src/core/config/ai_config.py index e69de29..4ec2328 100644 --- a/backend/src/core/config/ai_config.py +++ b/backend/src/core/config/ai_config.py @@ -0,0 +1,170 @@ +""" +Configuración de Inteligencia Artificial para CodeGuard AI. + +Gestiona la configuración de Vertex AI (Gemini), incluyendo: +- Selección dinámica de modelo por entorno (dev/prod) +- Rate limiting por usuario +- Configuración de reintentos con exponential backoff +""" + +from typing import Optional + +from pydantic import Field, computed_field +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class AISettings(BaseSettings): + """ + Configuración de IA cargada desde variables de entorno. + + Usa Google Cloud Vertex AI con autenticación via Service Account. + La variable GOOGLE_APPLICATION_CREDENTIALS debe apuntar al archivo JSON. + + Attributes: + GCP_PROJECT_ID: ID del proyecto en Google Cloud Platform + GCP_LOCATION: Región de Vertex AI (us-central1 recomendado) + GOOGLE_APPLICATION_CREDENTIALS: Ruta al archivo JSON de Service Account + AI_ENABLED: Habilitar/deshabilitar funcionalidad de IA + AI_MODEL_DEV: Modelo para desarrollo (flash = rápido/económico) + AI_MODEL_PROD: Modelo para producción (pro = mejor razonamiento) + AI_TEMPERATURE: Temperatura del modelo (0.0-1.0, menor = más determinista) + AI_MAX_OUTPUT_TOKENS: Límite de tokens en respuesta + AI_RATE_LIMIT_PER_HOUR: Límite de llamadas por usuario por hora + AI_MAX_RETRIES: Intentos máximos ante errores transitorios + AI_BACKOFF_FACTOR: Factor de espera exponencial entre reintentos + """ + + # Google Cloud Platform + GCP_PROJECT_ID: Optional[str] = Field( + default=None, + description="ID del proyecto en Google Cloud Platform", + ) + GCP_LOCATION: str = Field( + default="us-central1", + description="Región de Vertex AI", + ) + GOOGLE_APPLICATION_CREDENTIALS: Optional[str] = Field( + default=None, + description="Ruta al archivo JSON de Service Account", + ) + + # Feature Flag + AI_ENABLED: bool = Field( + default=True, + description="Habilitar funcionalidad de IA", + ) + + # Model Selection (por entorno) + AI_MODEL_DEV: str = Field( + default="gemini-1.5-flash-001", + description="Modelo para desarrollo (optimizado velocidad/costo)", + ) + AI_MODEL_PROD: str = Field( + default="gemini-1.5-pro-001", + description="Modelo para producción (optimizado razonamiento)", + ) + + # Model Parameters + AI_TEMPERATURE: float = Field( + default=0.3, + ge=0.0, + le=1.0, + description="Temperatura del modelo (0.0-1.0)", + ) + AI_MAX_OUTPUT_TOKENS: int = Field( + default=2048, + ge=100, + le=8192, + description="Límite de tokens en respuesta", + ) + + # Rate Limiting (para controlar costos) + AI_RATE_LIMIT_PER_HOUR: int = Field( + default=10, + ge=1, + description="Límite de llamadas por usuario por hora", + ) + + # Retry Configuration (exponential backoff) + AI_MAX_RETRIES: int = Field( + default=3, + ge=1, + le=10, + description="Intentos máximos ante errores transitorios", + ) + AI_BACKOFF_FACTOR: float = Field( + default=2.0, + ge=1.0, + le=5.0, + description="Factor de espera exponencial (segundos)", + ) + AI_INITIAL_BACKOFF: float = Field( + default=1.0, + ge=0.5, + le=10.0, + description="Espera inicial antes del primer reintento (segundos)", + ) + + # Environment (heredado de settings principal) + ENVIRONMENT: str = Field( + default="development", + description="Entorno de ejecución", + ) + + model_config = SettingsConfigDict( + env_file=".env", + env_file_encoding="utf-8", + extra="ignore", + ) + + @computed_field + @property + def model_name(self) -> str: + """ + Selecciona el modelo de Gemini según el entorno. + + Returns: + str: Nombre del modelo (flash para dev, pro para prod) + """ + if self.ENVIRONMENT == "production": + return self.AI_MODEL_PROD + return self.AI_MODEL_DEV + + @computed_field + @property + def is_configured(self) -> bool: + """ + Verifica si la configuración de IA está completa. + + Returns: + bool: True si GCP_PROJECT_ID y credenciales están configurados + """ + return bool(self.AI_ENABLED and self.GCP_PROJECT_ID and self.GOOGLE_APPLICATION_CREDENTIALS) + + def get_generation_config(self) -> dict: + """ + Retorna la configuración de generación para Vertex AI. + + Returns: + dict: Parámetros de generación del modelo + """ + return { + "temperature": self.AI_TEMPERATURE, + "max_output_tokens": self.AI_MAX_OUTPUT_TOKENS, + "top_p": 0.95, + "top_k": 40, + } + + +# Singleton de configuración de IA +ai_settings = AISettings() + + +def get_ai_settings() -> AISettings: + """ + Factory function para obtener la configuración de IA. + + Returns: + Instancia singleton de AISettings + """ + return ai_settings diff --git a/backend/src/core/config/mcp_config.py b/backend/src/core/config/mcp_config.py index e69de29..bd7d8c0 100644 --- a/backend/src/core/config/mcp_config.py +++ b/backend/src/core/config/mcp_config.py @@ -0,0 +1,417 @@ +""" +Configuración del Protocolo de Contexto de Modelo (MCP). + +Contiene el diccionario embebido OWASP Top 10 con descripciones +de vulnerabilidades y remediaciones para enriquecer los prompts +enviados a la IA generativa. +""" + +from dataclasses import dataclass +from typing import Dict, List, Optional + + +@dataclass +class SecurityContext: + """ + Contexto de seguridad para una categoría de vulnerabilidad. + + Attributes: + category: Categoría OWASP (ej: "A03:2021 - Injection") + description: Descripción de la vulnerabilidad + impact: Impacto potencial en el sistema + mitigation: Estrategias de mitigación genéricas + references: URLs de documentación oficial + cwe_ids: IDs de CWE relacionados + """ + + category: str + description: str + impact: str + mitigation: str + references: List[str] + cwe_ids: List[str] + + +# ============================================================================= +# Diccionario OWASP Top 10 (2021) +# ============================================================================= + +OWASP_TOP_10: Dict[str, SecurityContext] = { + # A01:2021 - Broken Access Control + "broken_access_control": SecurityContext( + category="A01:2021 - Broken Access Control", + description=( + "Las restricciones sobre lo que los usuarios autenticados pueden hacer " + "a menudo no se aplican correctamente. Los atacantes pueden explotar " + "estos fallos para acceder a funcionalidades y/o datos no autorizados." + ), + impact=( + "Acceso no autorizado a datos sensibles, modificación de datos de otros " + "usuarios, escalación de privilegios, o ejecución de acciones administrativas." + ), + mitigation=( + "1. Denegar por defecto, excepto para recursos públicos.\n" + "2. Implementar mecanismos de control de acceso una vez y reutilizarlos.\n" + "3. Hacer cumplir la propiedad de registros (cada usuario solo accede a sus datos).\n" + "4. Deshabilitar listado de directorios del servidor web.\n" + "5. Registrar fallos de control de acceso y alertar a administradores." + ), + references=[ + "https://owasp.org/Top10/A01_2021-Broken_Access_Control/", + "https://cheatsheetseries.owasp.org/cheatsheets/Authorization_Cheat_Sheet.html", + ], + cwe_ids=["CWE-200", "CWE-284", "CWE-285", "CWE-352", "CWE-639"], + ), + # A02:2021 - Cryptographic Failures + "cryptographic_failures": SecurityContext( + category="A02:2021 - Cryptographic Failures", + description=( + "Antes conocido como 'Exposición de Datos Sensibles'. Se centra en " + "fallos relacionados con la criptografía que a menudo conducen a la " + "exposición de datos sensibles." + ), + impact=( + "Exposición de credenciales, tokens, datos personales (PII), datos " + "financieros, o información médica. Posible robo de identidad." + ), + mitigation=( + "1. Clasificar los datos procesados, almacenados o transmitidos.\n" + "2. No almacenar datos sensibles innecesariamente.\n" + "3. Cifrar todos los datos sensibles en reposo con algoritmos fuertes.\n" + "4. Usar protocolos actualizados (TLS 1.3) para datos en tránsito.\n" + "5. No usar algoritmos criptográficos obsoletos (MD5, SHA1, DES)." + ), + references=[ + "https://owasp.org/Top10/A02_2021-Cryptographic_Failures/", + "https://cheatsheetseries.owasp.org/cheatsheets/Cryptographic_Storage_Cheat_Sheet.html", + ], + cwe_ids=["CWE-259", "CWE-327", "CWE-328", "CWE-330", "CWE-331"], + ), + # A03:2021 - Injection + "injection": SecurityContext( + category="A03:2021 - Injection", + description=( + "Una aplicación es vulnerable a ataques de inyección cuando datos " + "suministrados por el usuario no son validados, filtrados o sanitizados. " + "Incluye SQL, NoSQL, OS Command, LDAP, XPath y ORM injection." + ), + impact=( + "Pérdida de datos, corrupción de datos, divulgación a partes no autorizadas, " + "pérdida de responsabilidad, denegación de acceso, o toma completa del host." + ), + mitigation=( + "1. Usar APIs seguras que eviten el uso del intérprete (consultas parametrizadas).\n" + "2. Usar validación de entrada positiva del lado del servidor.\n" + "3. Escapar caracteres especiales usando la sintaxis de escape específica.\n" + "4. Usar LIMIT y otros controles SQL para prevenir divulgación masiva.\n" + "5. No concatenar cadenas con datos del usuario en consultas dinámicas." + ), + references=[ + "https://owasp.org/Top10/A03_2021-Injection/", + "https://cheatsheetseries.owasp.org/cheatsheets/" + "SQL_Injection_Prevention_Cheat_Sheet.html", + "https://cheatsheetseries.owasp.org/cheatsheets/" + "Query_Parameterization_Cheat_Sheet.html", + ], + cwe_ids=["CWE-77", "CWE-78", "CWE-79", "CWE-89", "CWE-94"], + ), + # A04:2021 - Insecure Design + "insecure_design": SecurityContext( + category="A04:2021 - Insecure Design", + description=( + "Una nueva categoría que se centra en los riesgos relacionados con " + "defectos de diseño. La diferencia con una implementación insegura es " + "que un diseño perfecto aún puede tener defectos de implementación." + ), + impact=( + "Vulnerabilidades sistémicas que no pueden ser corregidas solo con código. " + "Exposición de lógica de negocio, flujos de trabajo inseguros." + ), + mitigation=( + "1. Establecer y usar un ciclo de desarrollo seguro con profesionales de AppSec.\n" + "2. Usar bibliotecas de patrones de diseño seguro.\n" + "3. Usar modelado de amenazas para autenticación crítica y control de acceso.\n" + "4. Integrar controles de seguridad en las historias de usuario.\n" + "5. Escribir pruebas unitarias y de integración para validar flujos críticos." + ), + references=[ + "https://owasp.org/Top10/A04_2021-Insecure_Design/", + "https://cheatsheetseries.owasp.org/cheatsheets/Threat_Modeling_Cheat_Sheet.html", + ], + cwe_ids=["CWE-209", "CWE-256", "CWE-501", "CWE-522"], + ), + # A05:2021 - Security Misconfiguration + "security_misconfiguration": SecurityContext( + category="A05:2021 - Security Misconfiguration", + description=( + "La aplicación puede ser vulnerable si no está correctamente " + "endurecida o tiene permisos mal configurados, características " + "innecesarias habilitadas, o mensajes de error detallados." + ), + impact=( + "Acceso no autorizado a datos o funcionalidad del sistema. " + "Posible compromiso completo del sistema." + ), + mitigation=( + "1. Proceso de endurecimiento repetible y automatizado.\n" + "2. Plataforma mínima sin características, componentes o documentación innecesarios.\n" + "3. Revisar y actualizar configuraciones según avisos de seguridad.\n" + "4. Arquitectura de aplicación segmentada con contenedores.\n" + "5. Enviar directivas de seguridad a clientes (CSP, X-Frame-Options)." + ), + references=[ + "https://owasp.org/Top10/A05_2021-Security_Misconfiguration/", + "https://cheatsheetseries.owasp.org/cheatsheets/" + "Configuration_Security_Cheat_Sheet.html", + ], + cwe_ids=["CWE-16", "CWE-611", "CWE-1004", "CWE-2"], + ), + # A06:2021 - Vulnerable and Outdated Components + "vulnerable_components": SecurityContext( + category="A06:2021 - Vulnerable and Outdated Components", + description=( + "Usar componentes con vulnerabilidades conocidas. Esto incluye " + "bibliotecas, frameworks, y otros módulos de software que se ejecutan " + "con los mismos privilegios que la aplicación." + ), + impact=( + "Desde ataques menores hasta toma completa del servidor, dependiendo " + "de la vulnerabilidad del componente." + ), + mitigation=( + "1. Eliminar dependencias no utilizadas, características y componentes innecesarios.\n" + "2. Inventariar versiones de componentes cliente y servidor continuamente.\n" + "3. Monitorear fuentes como CVE y NVD para vulnerabilidades.\n" + "4. Obtener componentes solo de fuentes oficiales sobre enlaces seguros.\n" + "5. Monitorear bibliotecas y componentes sin mantenimiento." + ), + references=[ + "https://owasp.org/Top10/" "A06_2021-Vulnerable_and_Outdated_Components/", + "https://cheatsheetseries.owasp.org/cheatsheets/" + "Vulnerable_Dependency_Management_Cheat_Sheet.html", + ], + cwe_ids=["CWE-1104"], + ), + # A07:2021 - Identification and Authentication Failures + "authentication_failures": SecurityContext( + category="A07:2021 - Identification and Authentication Failures", + description=( + "Confirmación de la identidad del usuario, autenticación y gestión " + "de sesiones es crítica. La aplicación es vulnerable si permite " + "ataques automatizados, contraseñas débiles, o sesiones mal gestionadas." + ), + impact=( + "Compromiso de cuentas de usuario, robo de identidad, acceso " + "no autorizado a datos sensibles o funcionalidad administrativa." + ), + mitigation=( + "1. Implementar autenticación multifactor donde sea posible.\n" + "2. No desplegar con credenciales por defecto, especialmente admin.\n" + "3. Implementar verificaciones de contraseñas débiles.\n" + "4. Limitar o retrasar cada vez más los intentos de login fallidos.\n" + "5. Usar un gestor de sesiones seguro del lado del servidor con alta entropía." + ), + references=[ + "https://owasp.org/Top10/A07_2021-Identification_and_Authentication_Failures/", + "https://cheatsheetseries.owasp.org/cheatsheets/Authentication_Cheat_Sheet.html", + "https://cheatsheetseries.owasp.org/cheatsheets/Session_Management_Cheat_Sheet.html", + ], + cwe_ids=["CWE-287", "CWE-384", "CWE-307", "CWE-613"], + ), + # A08:2021 - Software and Data Integrity Failures + "integrity_failures": SecurityContext( + category="A08:2021 - Software and Data Integrity Failures", + description=( + "Se relaciona con código e infraestructura que no protege contra " + "violaciones de integridad. Incluye actualizaciones de software " + "inseguras, pipelines CI/CD inseguros, y deserialización insegura." + ), + impact=( + "Ejecución remota de código, ataques a la cadena de suministro, " + "modificación de datos sin autorización." + ), + mitigation=( + "1. Usar firmas digitales para verificar que el software " + "proviene de la fuente esperada.\n" + "2. Asegurar que las bibliotecas y dependencias usan " + "repositorios de confianza.\n" + "3. Usar herramientas de análisis de composición de software (SCA).\n" + "4. Asegurar que el pipeline CI/CD tiene segregación apropiada " + "y control de acceso.\n" + "5. No enviar datos serializados sin firmar o sin cifrar " + "a clientes no confiables." + ), + references=[ + "https://owasp.org/Top10/" "A08_2021-Software_and_Data_Integrity_Failures/", + "https://cheatsheetseries.owasp.org/cheatsheets/" "Deserialization_Cheat_Sheet.html", + ], + cwe_ids=["CWE-829", "CWE-494", "CWE-502"], + ), + # A09:2021 - Security Logging and Monitoring Failures + "logging_failures": SecurityContext( + category="A09:2021 - Security Logging and Monitoring Failures", + description=( + "Sin registro y monitoreo suficiente, los ataques no pueden ser " + "detectados. Incluye no registrar eventos auditables, no generar " + "alertas adecuadas, o no tener un plan de respuesta a incidentes." + ), + impact=( + "Los atacantes pueden mantener persistencia, pivotar a más sistemas, " + "manipular, extraer o destruir datos sin ser detectados." + ), + mitigation=( + "1. Asegurar que todos los fallos de login, control de acceso " + "y validación de entrada del servidor se registran " + "con contexto suficiente.\n" + "2. Asegurar que los logs se generan en formato que las " + "soluciones de gestión de logs puedan consumir fácilmente.\n" + "3. Asegurar que los datos de log se codifican correctamente " + "para prevenir inyecciones.\n" + "4. Establecer monitoreo y alertas efectivos " + "para actividades sospechosas.\n" + "5. Establecer un plan de respuesta y recuperación de incidentes." + ), + references=[ + "https://owasp.org/Top10/" "A09_2021-Security_Logging_and_Monitoring_Failures/", + "https://cheatsheetseries.owasp.org/cheatsheets/Logging_Cheat_Sheet.html", + ], + cwe_ids=["CWE-117", "CWE-223", "CWE-532", "CWE-778"], + ), + # A10:2021 - Server-Side Request Forgery (SSRF) + "ssrf": SecurityContext( + category="A10:2021 - Server-Side Request Forgery (SSRF)", + description=( + "SSRF ocurre cuando una aplicación web obtiene un recurso remoto " + "sin validar la URL suministrada por el usuario. Permite a un atacante " + "forzar a la aplicación a enviar una solicitud crafteada a un destino inesperado." + ), + impact=( + "Escaneo de puertos internos, acceso a servicios internos, lectura de " + "metadatos de servicios en la nube, o ejecución remota de código." + ), + mitigation=( + "1. Segmentar la funcionalidad de acceso a recursos remotos en redes separadas.\n" + "2. Hacer cumplir políticas de firewall 'deny by default'.\n" + "3. Sanitizar y validar todos los datos de entrada suministrados por el cliente.\n" + "4. No enviar respuestas raw al cliente.\n" + "5. Deshabilitar redirecciones HTTP y usar listas de permitidos para URL." + ), + references=[ + "https://owasp.org/Top10/" "A10_2021-Server-Side_Request_Forgery_%28SSRF%29/", + "https://cheatsheetseries.owasp.org/cheatsheets/" + "Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html", + ], + cwe_ids=["CWE-918"], + ), +} + + +# ============================================================================= +# Mapeo de reglas de CodeGuard a categorías OWASP +# ============================================================================= + +RULE_TO_OWASP_MAPPING: Dict[str, str] = { + # SecurityAgent - Dangerous Functions + "SEC001_EVAL": "injection", + "SEC001_EXEC": "injection", + "SEC001_COMPILE": "injection", + "SEC001___IMPORT__": "injection", + "SEC001_EXECFILE": "injection", + "SEC001_PICKLE": "integrity_failures", + # SecurityAgent - SQL Injection + "SEC002_SQL_INJECTION": "injection", + # SecurityAgent - Hardcoded Credentials + "SEC003_PASSWORD": "cryptographic_failures", + "SEC003_API_KEY": "cryptographic_failures", + "SEC003_SECRET_KEY": "cryptographic_failures", + "SEC003_TOKEN": "cryptographic_failures", + "SEC003_ACCESS_KEY": "cryptographic_failures", + # SecurityAgent - Weak Cryptography + "SEC004_MD5": "cryptographic_failures", + "SEC004_SHA1": "cryptographic_failures", + "SEC004_WEAK_ENCRYPTION": "cryptographic_failures", + # Common patterns by issue_type + "dangerous_function": "injection", + "sql_injection": "injection", + "hardcoded_credentials": "cryptographic_failures", + "weak_cryptography": "cryptographic_failures", + "insecure_deserialization": "integrity_failures", + "path_traversal": "broken_access_control", + "ssrf": "ssrf", + "xss": "injection", + "command_injection": "injection", + "ldap_injection": "injection", + "xpath_injection": "injection", +} + + +# ============================================================================= +# Funciones de utilidad +# ============================================================================= + + +def get_security_context( + rule_id: Optional[str] = None, + issue_type: Optional[str] = None, +) -> Optional[SecurityContext]: + """ + Obtiene el contexto de seguridad OWASP para una regla o tipo de issue. + + Args: + rule_id: ID de la regla (ej: "SEC001_EVAL") + issue_type: Tipo de issue (ej: "sql_injection") + + Returns: + SecurityContext si se encuentra mapeo, None en caso contrario + """ + # Primero intentar con rule_id + if rule_id: + owasp_key = RULE_TO_OWASP_MAPPING.get(rule_id) + if owasp_key: + return OWASP_TOP_10.get(owasp_key) + + # Luego intentar con issue_type + if issue_type: + # Normalizar issue_type (convertir espacios/guiones a underscore) + normalized = issue_type.lower().replace("-", "_").replace(" ", "_") + owasp_key = RULE_TO_OWASP_MAPPING.get(normalized) + if owasp_key: + return OWASP_TOP_10.get(owasp_key) + + # Buscar coincidencia parcial + for key, owasp_category in RULE_TO_OWASP_MAPPING.items(): + if key in normalized or normalized in key: + return OWASP_TOP_10.get(owasp_category) + + return None + + +def format_security_context(context: SecurityContext) -> str: + """ + Formatea el contexto de seguridad para incluirlo en un prompt. + + Args: + context: Contexto de seguridad OWASP + + Returns: + str: Texto formateado para el prompt de IA + """ + return f""" +=== CONTEXTO DE SEGURIDAD (OWASP) === +Categoría: {context.category} + +Descripción: +{context.description} + +Impacto Potencial: +{context.impact} + +Estrategias de Mitigación: +{context.mitigation} + +Referencias: +{chr(10).join(f"- {ref}" for ref in context.references)} + +CWEs Relacionados: {", ".join(context.cwe_ids)} +=================================== +""" diff --git a/backend/src/core/config/settings.py b/backend/src/core/config/settings.py index 8fefd8d..9a33a24 100644 --- a/backend/src/core/config/settings.py +++ b/backend/src/core/config/settings.py @@ -6,6 +6,7 @@ from typing import Optional +from pydantic import Field from pydantic_settings import BaseSettings, SettingsConfigDict @@ -22,8 +23,14 @@ class Settings(BaseSettings): """ # Clerk Authentication - CLERK_SECRET_KEY: str + CLERK_SECRET_KEY: Optional[str] = None CLERK_PUBLISHABLE_KEY: str + CLERK_JWKS_URL: Optional[str] = Field( + default=None, description="URL del endpoint JWKS de Clerk para validar tokens RS256" + ) + CLERK_JWT_SIGNING_KEY: Optional[str] = Field( + default=None, description="Signing Key para validar Custom JWT Templates (HS256)" + ) # Database DATABASE_URL: str @@ -41,10 +48,38 @@ class Settings(BaseSettings): # CORS ALLOWED_ORIGINS: str = "http://localhost:3000,http://localhost:5173" - # Redis (opcional para Sprint 2) + # Redis (opcional) REDIS_URL: Optional[str] = None REDIS_PASSWORD: Optional[str] = None + # ========================================== + # AI Services - Vertex AI (Sprint 3) + # ========================================== + + # Google Cloud Platform + GCP_PROJECT_ID: Optional[str] = Field(default=None) + GCP_LOCATION: str = Field(default="us-central1") + GOOGLE_APPLICATION_CREDENTIALS: Optional[str] = Field(default=None) + + # Feature Flag + AI_ENABLED: bool = Field(default=True) + + # Model Selection + AI_MODEL_DEV: str = Field(default="gemini-1.5-flash-001") + AI_MODEL_PROD: str = Field(default="gemini-1.5-pro-001") + + # Model Parameters + AI_TEMPERATURE: float = Field(default=0.3, ge=0.0, le=1.0) + AI_MAX_OUTPUT_TOKENS: int = Field(default=2048, ge=100, le=8192) + + # Rate Limiting + AI_RATE_LIMIT_PER_HOUR: int = Field(default=10, ge=1) + + # Retry Configuration + AI_MAX_RETRIES: int = Field(default=3, ge=1, le=10) + AI_BACKOFF_FACTOR: float = Field(default=2.0, ge=1.0, le=5.0) + AI_INITIAL_BACKOFF: float = Field(default=1.0, ge=0.5, le=10.0) + model_config = SettingsConfigDict( env_file=".env", env_file_encoding="utf-8", @@ -56,6 +91,18 @@ def allowed_origins_list(self) -> list[str]: """Retorna lista de orígenes permitidos para CORS.""" return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")] + @property + def ai_model_name(self) -> str: + """Selecciona el modelo según el entorno.""" + if self.ENVIRONMENT == "production": + return self.AI_MODEL_PROD + return self.AI_MODEL_DEV + + @property + def is_ai_configured(self) -> bool: + """Verifica si la IA está configurada correctamente.""" + return bool(self.AI_ENABLED and self.GCP_PROJECT_ID and self.GOOGLE_APPLICATION_CREDENTIALS) + # Singleton de configuración settings = Settings() diff --git a/backend/src/core/dependencies/auth.py b/backend/src/core/dependencies/auth.py index a607bb5..b3525a5 100644 --- a/backend/src/core/dependencies/auth.py +++ b/backend/src/core/dependencies/auth.py @@ -1,10 +1,11 @@ """ Dependencia de autenticación. -Valida tokens JWT de Clerk y protege rutas. +Valida tokens JWT de Clerk (RS256) y protege rutas. +Extrae información del usuario desde el payload completo del token. """ -from fastapi import Depends, HTTPException +from fastapi import Depends, HTTPException, status from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer from src.external.clerk_client import ( @@ -18,30 +19,89 @@ http_bearer = HTTPBearer(auto_error=False) -async def get_current_user( - credentials: HTTPAuthorizationCredentials = Depends(http_bearer), -) -> User: +def _map_role_from_payload(payload: dict) -> Role: """ - Obtiene el usuario actual validando el token JWT de Clerk. - - Flujo: - 1. Extrae token del header Authorization: Bearer - 2. Valida el token con ClerkClient - 3. Retorna User schema con los datos del token - - Args: - credentials: Credenciales HTTP Bearer. + Mapea el rol del payload JWT al enum Role. + + Clerk permite definir roles en public_metadata o como claim directo. + Orden de prioridad: + 1. public_metadata.role (recomendado) + 2. unsafe_metadata.role + 3. role claim directo + 4. Default: DEVELOPER + """ + # Prioridad 1: public_metadata.role + public_metadata = payload.get("public_metadata", {}) + if isinstance(public_metadata, dict): + role = public_metadata.get("role", "").lower() + if role == "admin": + return Role.ADMIN + if role == "developer": + return Role.DEVELOPER + + # Prioridad 2: unsafe_metadata.role + unsafe_metadata = payload.get("unsafe_metadata", {}) + if isinstance(unsafe_metadata, dict): + role = unsafe_metadata.get("role", "").lower() + if role == "admin": + return Role.ADMIN + if role == "developer": + return Role.DEVELOPER + + # Prioridad 3: claim directo 'role' + role_claim = payload.get("role", "").lower() + if role_claim == "admin": + return Role.ADMIN + if role_claim == "developer": + return Role.DEVELOPER + + return Role.DEVELOPER + + +def _extract_email_from_payload(payload: dict) -> str: + """Extrae el email del payload de Clerk.""" + email = payload.get("email", "") + if email: + return email + + public_metadata = payload.get("public_metadata", {}) + if isinstance(public_metadata, dict): + email = public_metadata.get("email", "") + if email: + return email + + email_addresses = payload.get("email_addresses", []) + if isinstance(email_addresses, list) and len(email_addresses) > 0: + first_email = email_addresses[0] + if isinstance(first_email, dict): + return first_email.get("email_address", "") + + return "" + + +def _extract_name_from_payload(payload: dict) -> str | None: + """Extrae el nombre completo del usuario desde el payload.""" + name = payload.get("name") + if name and isinstance(name, str): + return name.strip() or None + + first_name = payload.get("first_name", "") + last_name = payload.get("last_name", "") + + if first_name or last_name: + full_name = f"{first_name} {last_name}".strip() + return full_name or None + + return None - Returns: - User: Usuario autenticado. - Raises: - HTTPException 401: Si el token falta, es inválido o expiró. - """ - # AC Escenario 2: Verificar que el token esté presente +async def get_current_user( + credentials: HTTPAuthorizationCredentials = Depends(http_bearer), +) -> User: + """Obtiene el usuario actual validando el token JWT de Clerk (RS256).""" if not credentials: raise HTTPException( - status_code=401, + status_code=status.HTTP_401_UNAUTHORIZED, detail="Token de autenticación requerido", headers={"WWW-Authenticate": "Bearer"}, ) @@ -50,28 +110,38 @@ async def get_current_user( clerk_client = ClerkClient() try: - # Validar token con Clerk payload = clerk_client.verify_token(token) + user_id = payload.get("sub") + if not user_id: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Token inválido: falta claim 'sub'", + headers={"WWW-Authenticate": "Bearer"}, + ) + + email = _extract_email_from_payload(payload) + name = _extract_name_from_payload(payload) + role = _map_role_from_payload(payload) + return User( - id=payload["user_id"], - email=payload.get("email", ""), - name=payload.get("name"), - role=Role.DEVELOPER, + id=user_id, + email=email, + name=name, + role=role, ) except ClerkTokenExpiredError: - # AC Escenario 6: Token expirado raise HTTPException( - status_code=401, - detail="Token expirado", + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Token expirado. Por favor, inicia sesión nuevamente.", headers={"WWW-Authenticate": "Bearer"}, ) - except ClerkTokenInvalidError: - # AC Escenario 5: Token inválido + + except ClerkTokenInvalidError as e: raise HTTPException( - status_code=401, - detail="Token inválido", + status_code=status.HTTP_401_UNAUTHORIZED, + detail=f"Token inválido: {str(e)}", headers={"WWW-Authenticate": "Bearer"}, ) @@ -79,21 +149,31 @@ async def get_current_user( async def get_optional_user( credentials: HTTPAuthorizationCredentials = Depends(http_bearer), ) -> User | None: - """ - Obtiene el usuario actual si hay token, None si no. - - Útil para endpoints que funcionan con o sin autenticación. + """Obtiene el usuario si hay token válido, None si no hay token.""" + if not credentials: + return None + return await get_current_user(credentials) - Args: - credentials: Credenciales HTTP Bearer (opcional). - Returns: - User si hay token válido, None si no hay token. +async def require_admin( + current_user: User = Depends(get_current_user), +) -> User: + """Requiere que el usuario sea ADMIN.""" + if current_user.role != Role.ADMIN: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Se requieren permisos de administrador para esta acción", + ) + return current_user - Raises: - HTTPException 401: Si hay token pero es inválido o expiró. - """ - if not credentials: - return None - return await get_current_user(credentials) +async def require_developer( + current_user: User = Depends(get_current_user), +) -> User: + """Requiere que el usuario sea DEVELOPER o ADMIN.""" + if current_user.role not in [Role.DEVELOPER, Role.ADMIN]: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Se requieren permisos de developer o superiores", + ) + return current_user diff --git a/backend/src/external/clerk_client.py b/backend/src/external/clerk_client.py index 19e167e..f7447f2 100644 --- a/backend/src/external/clerk_client.py +++ b/backend/src/external/clerk_client.py @@ -1,12 +1,18 @@ """ Cliente externo para validación de tokens JWT de Clerk. -Abstrae la lógica de validación usando python-jose con algoritmo HS256. +Soporta dos tipos de tokens: +1. Session Tokens (RS256 con JWKS) - Tokens estándar de Clerk +2. Custom JWT Templates (HS256 con secret key) - Para integraciones de terceros + +El cliente detecta automáticamente el algoritmo del token y usa la +validación correspondiente. """ -from typing import Any, Dict +from typing import Any, Dict, Optional -from jose import ExpiredSignatureError, JWTError, jwt +import httpx +from jose import ExpiredSignatureError, JWTError, jwk, jwt from src.core.config.settings import settings @@ -33,46 +39,257 @@ class ClerkClient: """ Cliente para validar tokens JWT emitidos por Clerk. - Utiliza el algoritmo HS256 con la CLERK_SECRET_KEY para - decodificar y validar tokens. + Detecta automáticamente el tipo de token: + - RS256: Session tokens estándar (valida con JWKS) + - HS256: Custom JWT templates (valida con secret key) + + Referencias: + - Session Tokens: https://clerk.com/docs/guides/sessions/session-tokens + - JWT Templates: https://clerk.com/docs/guides/sessions/jwt-templates """ + # Cache de JWKS para evitar requests en cada validación + _jwks_cache: Optional[Dict[str, Any]] = None + def __init__(self): - """Inicializa el cliente con la configuración de Clerk.""" - self._secret_key = settings.CLERK_SECRET_KEY - self._algorithms = ["HS256"] + """ + Inicializa el cliente con la configuración de Clerk. + + Requiere al menos uno de: + - CLERK_JWKS_URL: Para validar session tokens (RS256) + - CLERK_JWT_SIGNING_KEY: Para validar custom JWT templates (HS256) + """ + self._jwks_url = settings.CLERK_JWKS_URL + # Para HS256, priorizar JWT_SIGNING_KEY sobre SECRET_KEY + self._signing_key = settings.CLERK_JWT_SIGNING_KEY or settings.CLERK_SECRET_KEY + + if not self._jwks_url and not self._signing_key: + raise ValueError( + "Se requiere CLERK_JWKS_URL o CLERK_JWT_SIGNING_KEY. " + "Configura al menos una de estas variables de entorno." + ) + + def _get_token_algorithm(self, token: str) -> str: + """ + Extrae el algoritmo del header del token. + + Args: + token: Token JWT. + + Returns: + Algoritmo (ej: "HS256", "RS256"). + + Raises: + ClerkTokenInvalidError: Si no se puede leer el header. + """ + try: + unverified_header = jwt.get_unverified_header(token) + alg = unverified_header.get("alg") + + if not alg: + raise ClerkTokenInvalidError("Token no contiene algoritmo en el header") + + return alg + + except JWTError as e: + raise ClerkTokenInvalidError(f"Error al leer header del token: {e}") from e + + def _fetch_jwks(self) -> Dict[str, Any]: + """ + Obtiene las claves públicas del endpoint JWKS de Clerk. + + Returns: + Dict con las claves JWKS en formato JWK. + + Raises: + ClerkTokenInvalidError: Si no se puede obtener el JWKS. + """ + if ClerkClient._jwks_cache is not None: + return ClerkClient._jwks_cache + + if not self._jwks_url: + raise ClerkTokenInvalidError( + "CLERK_JWKS_URL no configurado. " "Requerido para validar tokens RS256." + ) + + try: + response = httpx.get(self._jwks_url, timeout=10.0) + response.raise_for_status() + jwks_data = response.json() + + # Validar que tenga la estructura esperada + if "keys" not in jwks_data or not isinstance(jwks_data["keys"], list): + raise ClerkTokenInvalidError("Respuesta JWKS inválida: falta campo 'keys'") + + ClerkClient._jwks_cache = jwks_data + return ClerkClient._jwks_cache + + except httpx.HTTPError as e: + raise ClerkTokenInvalidError(f"Error al obtener JWKS de {self._jwks_url}: {e}") from e + + def _get_public_key(self, token: str): + """ + Obtiene la clave pública RSA correcta para verificar el token RS256. + + Args: + token: Token JWT para extraer el kid del header. + + Returns: + Clave pública RSA construida desde JWKS. + + Raises: + ClerkTokenInvalidError: Si no se encuentra la clave o el kid. + """ + try: + # Obtener kid del header del token (sin verificar aún) + unverified_header = jwt.get_unverified_header(token) + kid = unverified_header.get("kid") + + if not kid: + raise ClerkTokenInvalidError("Token RS256 no contiene 'kid' en el header") + + # Buscar la clave en JWKS + jwks_data = self._fetch_jwks() + + for key_data in jwks_data.get("keys", []): + if key_data.get("kid") == kid: + return jwk.construct(key_data) + + # Si no se encuentra, invalidar cache y reintentar una vez + ClerkClient._jwks_cache = None + jwks_data = self._fetch_jwks() + + for key_data in jwks_data.get("keys", []): + if key_data.get("kid") == kid: + return jwk.construct(key_data) + + raise ClerkTokenInvalidError(f"No se encontró clave pública con kid '{kid}' en JWKS") + + except JWTError as e: + raise ClerkTokenInvalidError(f"Error al extraer header del token: {e}") from e + + def _verify_rs256_token(self, token: str) -> Dict[str, Any]: + """ + Verifica un token RS256 (Session Token estándar de Clerk). + + Args: + token: Token JWT con algoritmo RS256. + + Returns: + Payload decodificado del token. + + Raises: + ClerkTokenExpiredError: Si el token expiró. + ClerkTokenInvalidError: Si el token es inválido. + """ + public_key = self._get_public_key(token) + + return jwt.decode( + token, + public_key, + algorithms=["RS256"], + options={ + "verify_signature": True, + "verify_exp": True, + "verify_nbf": True, + "verify_iat": True, + "verify_aud": False, # Clerk no siempre usa aud + "verify_iss": False, # ISS varía según instancia + }, + ) + + def _verify_hs256_token(self, token: str) -> Dict[str, Any]: + """ + Verifica un token HS256 (Custom JWT Template de Clerk). + + Args: + token: Token JWT con algoritmo HS256. + + Returns: + Payload decodificado del token. + + Raises: + ClerkTokenExpiredError: Si el token expiró. + ClerkTokenInvalidError: Si el token es inválido. + """ + if not self._signing_key: + raise ClerkTokenInvalidError( + "CLERK_JWT_SIGNING_KEY no configurado. " + "Requerido para validar tokens HS256 (JWT Templates)." + ) + + return jwt.decode( + token, + self._signing_key, + algorithms=["HS256"], + options={ + "verify_signature": True, + "verify_exp": True, + "verify_nbf": True, + "verify_iat": True, + "verify_aud": False, + "verify_iss": False, + }, + ) def verify_token(self, token: str) -> Dict[str, Any]: """ - Valida un token JWT y retorna el payload decodificado. + Valida un token JWT de Clerk y retorna el payload completo. + + Detecta automáticamente el algoritmo del token: + - RS256: Session token estándar (valida con JWKS) + - HS256: Custom JWT template (valida con secret key) Args: token: Token JWT a validar. Returns: - Dict con user_id, email, name extraídos del payload. + Dict con el payload completo del JWT, incluyendo: + - sub: User ID (subject) + - email: Email del usuario + - name: Nombre del usuario + - role: Rol del usuario (si está configurado) + - exp, iat, nbf: Timestamps + - iss, jti: Emisor e identificador + - public_metadata, user_metadata, app_metadata: Metadatos Raises: - ClerkTokenExpiredError: Si el token ha expirado. - ClerkTokenInvalidError: Si el token es inválido o malformado. + ClerkTokenExpiredError: Si el token ha expirado (exp < now). + ClerkTokenInvalidError: Si el token es inválido, malformado, + o no se puede validar. """ try: - payload = jwt.decode( - token, - self._secret_key, - algorithms=self._algorithms, - ) + # Detectar algoritmo del token + algorithm = self._get_token_algorithm(token) + + # Validar según el algoritmo + if algorithm == "RS256": + payload = self._verify_rs256_token(token) + elif algorithm == "HS256": + payload = self._verify_hs256_token(token) + else: + raise ClerkTokenInvalidError( + f"Algoritmo no soportado: {algorithm}. " + "Clerk usa RS256 (session tokens) o HS256 (JWT templates)." + ) - return { - "user_id": payload.get("sub"), - "email": payload.get("email"), - "name": payload.get("name"), - } + return payload except ExpiredSignatureError as e: - raise ClerkTokenExpiredError("El token ha expirado") from e + raise ClerkTokenExpiredError( + "El token ha expirado. El usuario debe iniciar sesión nuevamente." + ) from e + + except ClerkTokenExpiredError: + # Re-raise para mantener el tipo de excepción + raise + + except ClerkTokenInvalidError: + # Re-raise para mantener el tipo de excepción + raise + except JWTError as e: - raise ClerkTokenInvalidError("Token inválido o malformado") from e + raise ClerkTokenInvalidError(f"Token inválido o malformado: {e}") from e def get_user_id_from_token(self, token: str) -> str: """ @@ -82,16 +299,27 @@ def get_user_id_from_token(self, token: str) -> str: token: Token JWT. Returns: - User ID (sub claim). + User ID (claim 'sub'). Raises: ClerkTokenExpiredError: Si el token ha expirado. - ClerkTokenInvalidError: Si el token es inválido. + ClerkTokenInvalidError: Si el token es inválido o no tiene 'sub'. """ payload = self.verify_token(token) - user_id = payload.get("user_id") + user_id = payload.get("sub") if not user_id: - raise ClerkTokenInvalidError("Token no contiene user_id (sub)") + raise ClerkTokenInvalidError("Token no contiene 'sub' claim. Token inválido de Clerk.") return user_id + + @classmethod + def clear_jwks_cache(cls): + """ + Limpia el cache de JWKS. + + Útil para: + - Testing + - Forzar recarga después de rotación de claves + """ + cls._jwks_cache = None diff --git a/backend/src/external/gemini_client.py b/backend/src/external/gemini_client.py index e69de29..989137e 100644 --- a/backend/src/external/gemini_client.py +++ b/backend/src/external/gemini_client.py @@ -0,0 +1,310 @@ +""" +Cliente de Google Vertex AI para generación de explicaciones con Gemini. + +Implementa el patrón Adapter para abstraer la comunicación con Vertex AI, +con soporte para exponential backoff en caso de rate limiting. + +Requiere: pip install google-cloud-aiplatform>=1.40.0 +""" + +import asyncio +import logging +from typing import Optional + +import vertexai +from google.api_core import exceptions as google_exceptions +from vertexai.generative_models import GenerationConfig, GenerativeModel + +from src.core.config.ai_config import ai_settings +from src.external.interfaces.ai_client import ( + AIClient, + AIClientError, + AIConnectionError, + AIModelError, + AIRateLimitError, + AIResponse, + AIResponseError, +) + +logger = logging.getLogger("agents.VertexAI") + + +class VertexAIClient(AIClient): + """ + Cliente para Google Vertex AI (Gemini). + + Utiliza autenticación via Service Account configurada en + GOOGLE_APPLICATION_CREDENTIALS. Implementa reintentos + automáticos con exponential backoff para errores transitorios. + + Attributes: + _model: Instancia del modelo generativo + _initialized: Flag indicando si Vertex AI fue inicializado + """ + + def __init__(self): + """ + Inicializa el cliente de Vertex AI. + + La inicialización real se hace de forma lazy en el primer uso + para evitar errores si las credenciales no están configuradas. + """ + self._model: Optional[GenerativeModel] = None + self._initialized: bool = False + self._generation_config: Optional[GenerationConfig] = None + + def _initialize(self) -> None: + """ + Inicializa Vertex AI y carga el modelo. + + Se ejecuta de forma lazy en la primera llamada a generate_explanation. + + Raises: + AIConnectionError: Si no se puede conectar a Vertex AI + AIModelError: Si el modelo no está disponible + """ + if self._initialized: + return + + if not ai_settings.is_configured: + raise AIClientError( + "Vertex AI no está configurado. " + "Verifica GCP_PROJECT_ID y GOOGLE_APPLICATION_CREDENTIALS en .env" + ) + + try: + # Inicializar Vertex AI con proyecto y ubicación + vertexai.init( + project=ai_settings.GCP_PROJECT_ID, + location=ai_settings.GCP_LOCATION, + ) + + # Cargar el modelo según el entorno (flash para dev, pro para prod) + self._model = GenerativeModel(ai_settings.model_name) + + # Configuración de generación + config_dict = ai_settings.get_generation_config() + self._generation_config = GenerationConfig(**config_dict) + + self._initialized = True + logger.info( + f"[VertexAI] Inicializado con modelo {ai_settings.model_name} " + f"en {ai_settings.GCP_LOCATION}" + ) + + except google_exceptions.PermissionDenied as e: + raise AIConnectionError( + "Permisos insuficientes. Verifica que la Service Account " + "tenga el rol 'Vertex AI User'.", + original_error=e, + ) + except google_exceptions.NotFound as e: + raise AIModelError( + f"Modelo {ai_settings.model_name} no encontrado. " + "Verifica el nombre del modelo y la región.", + original_error=e, + ) + except Exception as e: + raise AIConnectionError( + f"Error inicializando Vertex AI: {str(e)}", + original_error=e, + ) + + def _parse_response(self, response) -> AIResponse: + """ + Parsea y valida la respuesta del modelo. + + Args: + response: Respuesta raw del modelo Vertex AI + + Returns: + AIResponse: Respuesta estructurada + + Raises: + AIResponseError: Si la respuesta es inválida o está vacía + """ + if not response or not response.candidates: + raise AIResponseError("Respuesta vacía del modelo") + + candidate = response.candidates[0] + + # Verificar si fue bloqueado por safety + if candidate.finish_reason.name == "SAFETY": + raise AIResponseError("Contenido bloqueado por filtros de seguridad de Google") + + # Extraer texto + text = candidate.content.parts[0].text if candidate.content.parts else "" + + if not text: + raise AIResponseError("No se generó texto en la respuesta") + + # Calcular tokens (aproximado si no está disponible) + tokens_used = 0 + if hasattr(response, "usage_metadata"): + usage = response.usage_metadata + tokens_used = getattr(usage, "prompt_token_count", 0) + getattr( + usage, "candidates_token_count", 0 + ) + + logger.info( + f"[VertexAI] Generación exitosa - " + f"Tokens: {tokens_used}, " + f"Finish: {candidate.finish_reason.name}" + ) + + return AIResponse( + content=text, + model_name=ai_settings.model_name, + tokens_used=tokens_used, + finish_reason=candidate.finish_reason.name, + ) + + async def _handle_retryable_error( + self, error: Exception, attempt: int, max_retries: int, backoff: float, error_type: str + ) -> float: + """ + Maneja errores que permiten reintento con backoff. + + Args: + error: Excepción capturada + attempt: Intento actual (0-based) + max_retries: Máximo de reintentos permitidos + backoff: Tiempo de espera actual + error_type: Tipo de error para logging + + Returns: + float: Nuevo valor de backoff + + Raises: + AIRateLimitError: Si se agotan reintentos por rate limit + AIConnectionError: Si se agotan reintentos por servicio no disponible + """ + if attempt < max_retries: + logger.warning( + f"[VertexAI] {error_type}. " f"Reintento {attempt + 1}/{max_retries} en {backoff}s" + ) + await asyncio.sleep(backoff) + return backoff * ai_settings.AI_BACKOFF_FACTOR + + # Se agotaron los reintentos + if error_type == "Rate limit alcanzado": + raise AIRateLimitError( + "Límite de tasa excedido después de múltiples reintentos", + retry_after=backoff, + original_error=error, + ) + else: + raise AIConnectionError( + "Servicio de Vertex AI no disponible", + original_error=error, + ) + + async def generate_explanation(self, prompt: str) -> AIResponse: + """ + Genera una explicación usando Gemini con reintentos automáticos. + + Implementa exponential backoff para manejar rate limits (429) + y errores transitorios de la API. + + Args: + prompt: Texto del prompt a enviar al modelo + + Returns: + AIResponse: Respuesta estructurada con contenido y metadata + + Raises: + AIRateLimitError: Si se agotan los reintentos por rate limiting + AIConnectionError: Si hay problemas de conexión + AIClientError: Para otros errores + """ + # Inicialización lazy + self._initialize() + + if not self._model: + raise AIClientError("Modelo no inicializado") + + # Configuración de reintentos + max_retries = ai_settings.AI_MAX_RETRIES + backoff = ai_settings.AI_INITIAL_BACKOFF + last_error: Optional[Exception] = None + + for attempt in range(max_retries + 1): + try: + # Ejecutar generación en thread pool (Vertex AI SDK es síncrono) + response = await asyncio.get_event_loop().run_in_executor( + None, + lambda: self._model.generate_content( + prompt, + generation_config=self._generation_config, + ), + ) + return self._parse_response(response) + + except google_exceptions.ResourceExhausted as e: + last_error = e + backoff = await self._handle_retryable_error( + e, attempt, max_retries, backoff, "Rate limit alcanzado" + ) + + except google_exceptions.ServiceUnavailable as e: + last_error = e + backoff = await self._handle_retryable_error( + e, attempt, max_retries, backoff, "Servicio no disponible" + ) + + except google_exceptions.InvalidArgument as e: + raise AIModelError(f"Prompt inválido: {str(e)}", original_error=e) + + except AIResponseError: + raise + + except Exception as e: + logger.error(f"[VertexAI] Error inesperado: {str(e)}") + raise AIClientError(f"Error generando contenido: {str(e)}", original_error=e) + + raise AIClientError("Error después de múltiples reintentos", original_error=last_error) + + async def health_check(self) -> bool: + """ + Verifica si el cliente de Vertex AI está operativo. + + Intenta inicializar el cliente y verificar que el modelo esté disponible. + + Returns: + bool: True si el servicio está disponible + """ + try: + self._initialize() + return self._initialized and self._model is not None + except Exception as e: + logger.warning(f"[VertexAI] Health check fallido: {str(e)}") + return False + + @property + def model_name(self) -> str: + """Retorna el nombre del modelo configurado.""" + return ai_settings.model_name + + @property + def is_configured(self) -> bool: + """Verifica si el cliente está correctamente configurado.""" + return ai_settings.is_configured + + +# Singleton del cliente (opcional, para inyección de dependencias) +def get_ai_client() -> AIClient: + """ + Factory function para obtener el cliente de IA. + + Permite cambiar fácilmente la implementación (mock para tests). + + Returns: + AIClient: Instancia del cliente de IA configurado + + Raises: + AIClientError: Si la IA está deshabilitada o no hay biblioteca instalada + """ + if not ai_settings.AI_ENABLED: + raise AIClientError("Funcionalidad de IA deshabilitada (AI_ENABLED=false)") + + return VertexAIClient() diff --git a/backend/src/external/interfaces/__init__.py b/backend/src/external/interfaces/__init__.py new file mode 100644 index 0000000..f890958 --- /dev/null +++ b/backend/src/external/interfaces/__init__.py @@ -0,0 +1,17 @@ +""" +Interfaces para clientes externos de IA. +""" + +from src.external.interfaces.ai_client import ( + AIClient, + AIClientError, + AIConnectionError, + AIRateLimitError, +) + +__all__ = [ + "AIClient", + "AIClientError", + "AIRateLimitError", + "AIConnectionError", +] diff --git a/backend/src/external/interfaces/ai_client.py b/backend/src/external/interfaces/ai_client.py new file mode 100644 index 0000000..d110b8f --- /dev/null +++ b/backend/src/external/interfaces/ai_client.py @@ -0,0 +1,206 @@ +""" +Interfaz abstracta para clientes de IA generativa. + +Define el contrato que deben implementar todos los proveedores de IA +(Google Vertex AI, OpenAI, Anthropic, etc.) siguiendo el patrón Adapter. +""" + +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Optional + +# ============================================================================= +# Excepciones personalizadas para clientes de IA +# ============================================================================= + + +class AIClientError(Exception): + """ + Error base para todos los problemas con clientes de IA. + + Attributes: + message: Descripción del error + original_error: Excepción original (si existe) + """ + + def __init__(self, message: str, original_error: Optional[Exception] = None): + self.message = message + self.original_error = original_error + super().__init__(self.message) + + +class AIRateLimitError(AIClientError): + """ + Error de límite de tasa de la API de IA. + + Se lanza cuando la API retorna un error 429 (Too Many Requests) + o ResourceExhausted en el caso de Google Cloud. + + Attributes: + retry_after: Segundos sugeridos de espera antes de reintentar + """ + + def __init__( + self, + message: str = "Rate limit exceeded", + retry_after: Optional[float] = None, + original_error: Optional[Exception] = None, + ): + super().__init__(message, original_error) + self.retry_after = retry_after + + +class AIConnectionError(AIClientError): + """ + Error de conexión con el servicio de IA. + + Se lanza cuando no se puede establecer conexión con la API, + hay timeout o problemas de red. + """ + + def __init__( + self, + message: str = "Failed to connect to AI service", + original_error: Optional[Exception] = None, + ): + super().__init__(message, original_error) + + +class AIModelError(AIClientError): + """ + Error relacionado con el modelo de IA. + + Se lanza cuando el modelo no está disponible, el prompt excede + los límites o hay problemas con la configuración del modelo. + """ + + def __init__( + self, + message: str = "AI model error", + original_error: Optional[Exception] = None, + ): + super().__init__(message, original_error) + + +class AIResponseError(AIClientError): + """ + Error al procesar la respuesta de la IA. + + Se lanza cuando la respuesta no tiene el formato esperado + o no se puede parsear correctamente. + """ + + def __init__( + self, + message: str = "Invalid AI response", + original_error: Optional[Exception] = None, + ): + super().__init__(message, original_error) + + +# ============================================================================= +# Dataclass para respuesta estructurada +# ============================================================================= + + +@dataclass +class AIResponse: + """ + Respuesta estructurada de una llamada a la IA. + + Attributes: + content: Texto generado por el modelo + model_name: Nombre del modelo usado + tokens_used: Tokens consumidos (input + output) + finish_reason: Razón de finalización (stop, length, safety, etc.) + """ + + content: str + model_name: str + tokens_used: int = 0 + finish_reason: str = "stop" + + +# ============================================================================= +# Interfaz abstracta (Adapter Pattern) +# ============================================================================= + + +class AIClient(ABC): + """ + Interfaz abstracta para clientes de IA generativa. + + Define el contrato que deben implementar todos los proveedores + de IA, permitiendo cambiar entre diferentes servicios sin + modificar el código del negocio. + + Example: + ```python + class VertexAIClient(AIClient): + async def generate_explanation(self, prompt: str) -> AIResponse: + # Implementación específica de Vertex AI + ... + + # Uso + client: AIClient = VertexAIClient() + response = await client.generate_explanation("Explica este error...") + print(response.content) + ``` + """ + + @abstractmethod + async def generate_explanation(self, prompt: str) -> AIResponse: + """ + Genera una explicación o respuesta basada en el prompt. + + Este es el método principal que deben implementar todos los + proveedores de IA. + + Args: + prompt: Texto del prompt a enviar al modelo + + Returns: + AIResponse: Respuesta estructurada con el contenido generado + + Raises: + AIRateLimitError: Si se excede el límite de tasa de la API + AIConnectionError: Si hay problemas de conexión + AIModelError: Si hay problemas con el modelo + AIResponseError: Si la respuesta no es válida + AIClientError: Para otros errores de la API + """ + pass + + @abstractmethod + async def health_check(self) -> bool: + """ + Verifica si el cliente de IA está operativo. + + Útil para health checks del sistema y monitoreo. + + Returns: + bool: True si el servicio está disponible + """ + pass + + @property + @abstractmethod + def model_name(self) -> str: + """ + Retorna el nombre del modelo configurado. + + Returns: + str: Identificador del modelo (ej: 'gemini-1.5-flash-001') + """ + pass + + @property + @abstractmethod + def is_configured(self) -> bool: + """ + Verifica si el cliente tiene toda la configuración necesaria. + + Returns: + bool: True si el cliente está correctamente configurado + """ + pass diff --git a/backend/src/external/mcp_client.py b/backend/src/external/mcp_client.py index e69de29..52aa408 100644 --- a/backend/src/external/mcp_client.py +++ b/backend/src/external/mcp_client.py @@ -0,0 +1,190 @@ +""" +Cliente MCP (Model Context Protocol) para enriquecer prompts con contexto de seguridad. + +Proporciona acceso a la base de conocimiento OWASP Top 10 y mapeos CWE +para enriquecer las explicaciones generadas por IA. + +Principios de diseño: +- SRP: Solo busca y formatea contexto de seguridad +- Acoplamiento débil: Interfaz abstracta permite múltiples implementaciones +- Async: Todas las operaciones son asíncronas para consistencia +""" + +import logging +from abc import ABC, abstractmethod +from typing import List, Optional + +from src.core.config.mcp_config import ( + OWASP_TOP_10, + SecurityContext, + format_security_context, + get_security_context, +) +from src.schemas.finding import Finding + +logger = logging.getLogger("agents.MCP") + + +class MCPClient(ABC): + """ + Interfaz abstracta para clientes MCP (Model Context Protocol). + + Define el contrato para obtener contexto de seguridad que será + usado para enriquecer prompts de IA generativa. + """ + + @abstractmethod + async def get_context(self, finding: Finding) -> Optional[str]: + """ + Obtiene contexto de seguridad formateado para un hallazgo. + + Args: + finding: Hallazgo de seguridad a enriquecer + + Returns: + Contexto formateado como texto o None si no se encuentra + """ + pass + + @abstractmethod + async def get_security_context(self, finding: Finding) -> Optional[SecurityContext]: + """ + Obtiene el objeto SecurityContext para un hallazgo. + + Args: + finding: Hallazgo de seguridad + + Returns: + SecurityContext o None si no se encuentra + """ + pass + + @abstractmethod + def get_available_categories(self) -> List[str]: + """ + Lista las categorías OWASP disponibles. + + Returns: + Lista de nombres de categorías + """ + pass + + +class LocalMCPClient(MCPClient): + """ + Cliente MCP local usando el diccionario OWASP Top 10 embebido. + + Busca contexto de seguridad relevante basado en rule_id o issue_type + del hallazgo y lo formatea para enriquecer prompts de IA. + + Esta implementación usa datos locales. Puede ser extendida o reemplazada + por una que consulte servidores MCP externos. + + Example: + client = LocalMCPClient() + context = await client.get_context(finding) + if context: + prompt = f"Contexto OWASP:\\n{context}" + """ + + async def get_context(self, finding: Finding) -> Optional[str]: + """ + Obtiene contexto de seguridad OWASP formateado para un hallazgo. + + Busca primero por rule_id (más específico) y luego por issue_type. + + Args: + finding: Hallazgo de seguridad + + Returns: + Contexto formateado o None si no se encuentra + """ + context = await self.get_security_context(finding) + + if context: + formatted = format_security_context(context) + logger.debug( + f"[MCP] Contexto encontrado para {finding.rule_id or finding.issue_type}: " + f"{context.category}" + ) + return formatted + + logger.debug(f"[MCP] Sin contexto OWASP para {finding.rule_id or finding.issue_type}") + return None + + async def get_security_context(self, finding: Finding) -> Optional[SecurityContext]: + """ + Obtiene el objeto SecurityContext para un hallazgo. + + Prioriza rule_id sobre issue_type para mayor precisión. + + Args: + finding: Hallazgo de seguridad + + Returns: + SecurityContext o None si no se encuentra + """ + # Buscar por rule_id primero (más específico) + if finding.rule_id: + context = get_security_context(rule_id=finding.rule_id) + if context: + return context + + # Fallback a issue_type + if finding.issue_type: + context = get_security_context(issue_type=finding.issue_type) + if context: + return context + + return None + + def get_available_categories(self) -> List[str]: + """ + Lista todas las categorías OWASP disponibles. + + Returns: + Lista de claves del diccionario OWASP_TOP_10 + """ + return list(OWASP_TOP_10.keys()) + + async def get_context_by_category(self, category_key: str) -> Optional[str]: + """ + Obtiene contexto por clave de categoría directamente. + + Args: + category_key: Clave del diccionario OWASP (ej: "injection", "broken_access_control") + + Returns: + Contexto formateado o None + """ + context = OWASP_TOP_10.get(category_key) + if context: + return format_security_context(context) + return None + + +# Singleton del cliente MCP +_mcp_client_instance: Optional[MCPClient] = None + + +def get_mcp_client() -> MCPClient: + """ + Factory function para obtener el cliente MCP. + + Usa patrón singleton para reutilizar la misma instancia. + + Returns: + Instancia de MCPClient (LocalMCPClient por defecto) + """ + global _mcp_client_instance + if _mcp_client_instance is None: + _mcp_client_instance = LocalMCPClient() + return _mcp_client_instance + + +def reset_mcp_client() -> None: + """ + Resetea el singleton del cliente MCP (útil para testing). + """ + global _mcp_client_instance + _mcp_client_instance = None diff --git a/backend/src/main.py b/backend/src/main.py index 3d6d83c..364b308 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -8,6 +8,7 @@ from src.routers.analysis import router as analysis_router from src.routers.auth import router as auth_router +from src.routers.findings import router as findings_router # Create FastAPI app app = FastAPI( @@ -29,6 +30,7 @@ app.include_router(analysis_router) app.include_router(auth_router) +app.include_router(findings_router) @app.get("/health") diff --git a/backend/src/routers/findings.py b/backend/src/routers/findings.py new file mode 100644 index 0000000..c463dd6 --- /dev/null +++ b/backend/src/routers/findings.py @@ -0,0 +1,278 @@ +""" +Router para hallazgos (findings) con explicaciones de IA. + +Endpoints: +- GET /api/v1/findings/{id} - Obtener un hallazgo +- POST /api/v1/findings/{id}/explain - Generar explicación con IA +- GET /api/v1/findings/{id}/explain/status - Estado del rate limit + +Principios de diseño: +- SRP: Solo maneja HTTP, delega lógica a servicios +- Defensibilidad: Validación de entrada y manejo de errores +- Seguridad: Requiere autenticación para todas las operaciones +""" + +from typing import Any, Dict +from uuid import UUID + +from fastapi import APIRouter, Depends, HTTPException, status +from sqlalchemy.orm import Session + +from src.core.config.ai_config import get_ai_settings +from src.core.dependencies.auth import get_current_user +from src.core.dependencies.get_db import get_db +from src.models.finding import AgentFindingEntity +from src.schemas.ai_explanation import ( + AIExplanation, + AIExplanationError, + AIExplanationRequest, + AIExplanationResponse, + RateLimitInfo, +) +from src.schemas.finding import Finding, Severity +from src.schemas.user import User +from src.services.ai_service import ( + AIExplainerService, +) +from src.services.ai_service import AIExplanationError as ServiceAIError +from src.services.ai_service import ( + RateLimitExceeded, + get_ai_explainer_service, +) +from src.utils.logger import logger + +router = APIRouter(prefix="/api/v1/findings", tags=["findings"]) + + +def _entity_to_finding(entity: AgentFindingEntity) -> Finding: + """ + Convierte una entidad de BD a esquema Finding. + + Args: + entity: Entidad de base de datos + + Returns: + Esquema Finding + """ + return Finding( + severity=Severity(entity.severity.value), + issue_type=entity.issue_type, + message=entity.message, + line_number=entity.line_number, + agent_name=entity.agent_type, + code_snippet=entity.code_snippet, + suggestion=entity.suggestion, + rule_id=entity.issue_type, # Usar issue_type como rule_id si no hay otro + ) + + +@router.get( + "/{finding_id}", + response_model=Dict[str, Any], + status_code=status.HTTP_200_OK, + summary="Obtener un hallazgo por ID", +) +async def get_finding( + finding_id: UUID, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +) -> Dict[str, Any]: + """ + Obtiene los detalles de un hallazgo específico. + + Args: + finding_id: UUID del hallazgo + current_user: Usuario autenticado + db: Sesión de base de datos + + Returns: + Detalles del hallazgo + + Raises: + HTTPException 404: Si el hallazgo no existe + """ + finding = db.query(AgentFindingEntity).filter(AgentFindingEntity.id == finding_id).first() + + if not finding: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, detail=f"Hallazgo {finding_id} no encontrado" + ) + + return { + "id": str(finding.id), + "agent_type": finding.agent_type, + "severity": finding.severity.value, + "issue_type": finding.issue_type, + "line_number": finding.line_number, + "message": finding.message, + "code_snippet": finding.code_snippet, + "suggestion": finding.suggestion, + "ai_explanation": finding.ai_explanation, + "created_at": finding.created_at.isoformat(), + } + + +@router.post( + "/{finding_id}/explain", + response_model=AIExplanationResponse, + status_code=status.HTTP_200_OK, + summary="Generar explicación con IA para un hallazgo", + responses={ + 200: {"description": "Explicación generada exitosamente"}, + 404: {"description": "Hallazgo no encontrado"}, + 429: {"description": "Rate limit excedido"}, + 503: {"description": "Servicio de IA no disponible"}, + }, +) +async def explain_finding( + finding_id: UUID, + request: AIExplanationRequest = AIExplanationRequest(), + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), + service: AIExplainerService = Depends(get_ai_explainer_service), +) -> AIExplanationResponse: + """ + Genera una explicación detallada de un hallazgo usando IA generativa. + + Este endpoint: + 1. Verifica si ya existe una explicación en cache (JSONB) + 2. Si no, genera una nueva usando Vertex AI (Gemini) + 3. Almacena la explicación en cache para futuras consultas + + Reglas de Negocio: + - **RN1**: Requiere autenticación JWT + - **RN**: Rate limit de 10 requests/hora por usuario (configurable) + - **RN**: La explicación se cachea en BD para evitar regeneración + + Args: + finding_id: UUID del hallazgo a explicar + request: Opciones de la explicación + current_user: Usuario autenticado + db: Sesión de base de datos + service: Servicio de explicaciones de IA + + Returns: + AIExplanationResponse con la explicación generada + + Raises: + HTTPException 404: Si el hallazgo no existe + HTTPException 429: Si se excede el rate limit + HTTPException 503: Si el servicio de IA no está disponible + """ + # 1. Verificar que el servicio está configurado + settings = get_ai_settings() + if not settings.is_configured: + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="El servicio de IA no está configurado. " + "Configure GOOGLE_APPLICATION_CREDENTIALS.", + ) + + # 2. Buscar el hallazgo + finding_entity = ( + db.query(AgentFindingEntity).filter(AgentFindingEntity.id == finding_id).first() + ) + + if not finding_entity: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, detail=f"Hallazgo {finding_id} no encontrado" + ) + + # 3. Verificar cache (ai_explanation JSONB) + if finding_entity.ai_explanation: + logger.info(f"Returning cached AI explanation for finding {finding_id}") + cached_explanation = AIExplanation.from_dict(finding_entity.ai_explanation) + return AIExplanationResponse( + finding_id=finding_id.int, + explanation=cached_explanation, + cached=True, + ) + + # 4. Generar nueva explicación + try: + # Convertir entidad a Finding schema + finding = _entity_to_finding(finding_entity) + + # Obtener código fuente del code_review si existe + code_context = None + if finding_entity.code_review and hasattr(finding_entity.code_review, "source_code"): + code_context = finding_entity.code_review.source_code + + # Generar explicación + explanation, rate_limit_info = await service.explain_finding( + finding=finding, + code_context=code_context, + user_id=current_user.id, + ) + + # 5. Guardar en cache (JSONB) + finding_entity.ai_explanation = explanation.to_dict() + db.commit() + + logger.info( + f"AI explanation generated and cached for finding {finding_id}. " + f"Tokens used: {explanation.tokens_used}" + ) + + return AIExplanationResponse( + finding_id=finding_id.int, + explanation=explanation, + cached=False, + ) + + except RateLimitExceeded as e: + logger.warning(f"Rate limit exceeded for user {current_user.id}: {e}") + raise HTTPException( + status_code=status.HTTP_429_TOO_MANY_REQUESTS, + detail=AIExplanationError( + error_type="rate_limit", + message="Has excedido el límite de explicaciones por hora. " + f"Límite: {e.rate_limit_info.requests_limit}/hora.", + rate_limit_info=e.rate_limit_info, + ).model_dump(), + ) from e + + except ServiceAIError as e: + logger.error(f"AI service error for finding {finding_id}: {e}") + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail=AIExplanationError( + error_type="ai_error", + message=str(e), + ).model_dump(), + ) from e + + except Exception as e: + logger.error(f"Unexpected error explaining finding {finding_id}: {e}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Error interno generando explicación", + ) from e + + +@router.get( + "/{finding_id}/explain/status", + response_model=RateLimitInfo, + status_code=status.HTTP_200_OK, + summary="Obtener estado del rate limit para explicaciones", +) +async def get_rate_limit_status( + finding_id: UUID, # Solo para consistencia de URL + current_user: User = Depends(get_current_user), + service: AIExplainerService = Depends(get_ai_explainer_service), +) -> RateLimitInfo: + """ + Obtiene el estado actual del rate limit del usuario. + + Útil para mostrar al usuario cuántas explicaciones puede + solicitar antes de alcanzar el límite. + + Args: + finding_id: UUID del hallazgo (no usado, solo para URL) + current_user: Usuario autenticado + service: Servicio de explicaciones + + Returns: + RateLimitInfo con requests restantes y tiempo de reset + """ + return service.get_rate_limit_info(current_user.id) diff --git a/backend/src/schemas/ai_explanation.py b/backend/src/schemas/ai_explanation.py index e69de29..8d5c0ca 100644 --- a/backend/src/schemas/ai_explanation.py +++ b/backend/src/schemas/ai_explanation.py @@ -0,0 +1,234 @@ +""" +Esquemas para explicaciones generadas por IA. + +Define las estructuras de datos para las explicaciones de seguridad +generadas por modelos de IA generativa (Gemini/Vertex AI). + +Principios de diseño: +- Inmutabilidad: Los esquemas son de solo lectura +- Validación: Pydantic valida todos los campos +- Serialización: Compatible con JSON para almacenamiento en JSONB +""" + +from datetime import datetime, timezone +from typing import List, Optional + +from pydantic import BaseModel, ConfigDict, Field + + +class AIExplanationRequest(BaseModel): + """ + Request para solicitar una explicación de IA. + + Attributes: + include_attack_example: Si se debe incluir ejemplo de ataque + include_references: Si se deben incluir referencias + language: Idioma de la explicación (es/en) + """ + + include_attack_example: bool = Field( + default=True, description="Incluir ejemplo de ataque potencial" + ) + include_references: bool = Field(default=True, description="Incluir referencias OWASP/CWE") + language: str = Field(default="es", description="Idioma de la explicación") + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "include_attack_example": True, + "include_references": True, + "language": "es", + } + } + ) + + +class AIExplanation(BaseModel): + """ + Explicación generada por IA para un hallazgo de seguridad. + + Esta estructura se almacena en el campo JSONB 'ai_explanation' + de AgentFindingEntity para cache persistente. + + Attributes: + explanation: Explicación detallada del problema de seguridad + suggested_fix: Código sugerido para corregir el problema + attack_example: Ejemplo de cómo podría explotarse (opcional) + references: Lista de referencias OWASP, CWE, etc. (opcional) + model_used: Nombre del modelo que generó la explicación + tokens_used: Número de tokens consumidos + generated_at: Timestamp de generación + + Example: + explanation = AIExplanation( + explanation="El uso de eval() permite ejecución de código...", + suggested_fix="import ast\\nresult = ast.literal_eval(user_input)", + attack_example="user_input = '__import__(\"os\").system(\"rm -rf /\")'", + references=["OWASP A03:2021", "CWE-94"], + model_used="gemini-1.5-flash-001", + tokens_used=450 + ) + """ + + explanation: str = Field(..., min_length=10, description="Explicación detallada del problema") + suggested_fix: str = Field(..., min_length=5, description="Código sugerido para corregir") + attack_example: Optional[str] = Field( + default=None, description="Ejemplo de explotación potencial" + ) + references: Optional[List[str]] = Field( + default=None, description="Referencias OWASP, CWE, etc." + ) + model_used: str = Field(..., description="Modelo de IA usado") + tokens_used: int = Field(..., ge=0, description="Tokens consumidos") + generated_at: datetime = Field( + default_factory=lambda: datetime.now(timezone.utc), + description="Timestamp de generación", + ) + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "explanation": ( + "El uso de eval() en Python es extremadamente peligroso porque " + "permite la ejecución arbitraria de código. Un atacante podría " + "inyectar código malicioso que se ejecutaría con los privilegios " + "del proceso actual." + ), + "suggested_fix": ( + "import ast\n\n" + "# Usar literal_eval para evaluar literales de forma segura\n" + "result = ast.literal_eval(user_input)" + ), + "attack_example": ( + "# Un atacante podría enviar:\n" + 'user_input = \'__import__("os").system("cat /etc/passwd")\'' + ), + "references": ["OWASP A03:2021 - Injection", "CWE-94: Code Injection"], + "model_used": "gemini-1.5-flash-001", + "tokens_used": 450, + "generated_at": "2024-01-15T10:30:00Z", + } + } + ) + + def to_dict(self) -> dict: + """ + Convierte a diccionario para almacenamiento en JSONB. + + Returns: + Diccionario serializable + """ + return { + "explanation": self.explanation, + "suggested_fix": self.suggested_fix, + "attack_example": self.attack_example, + "references": self.references, + "model_used": self.model_used, + "tokens_used": self.tokens_used, + "generated_at": self.generated_at.isoformat(), + } + + @classmethod + def from_dict(cls, data: dict) -> "AIExplanation": + """ + Crea instancia desde diccionario (recuperado de JSONB). + + Args: + data: Diccionario con datos de la explicación + + Returns: + Instancia de AIExplanation + """ + # Convertir string ISO a datetime si es necesario + generated_at = data.get("generated_at") + if isinstance(generated_at, str): + data["generated_at"] = datetime.fromisoformat(generated_at.replace("Z", "+00:00")) + + return cls(**data) + + +class AIExplanationResponse(BaseModel): + """ + Response con la explicación de IA para el endpoint. + + Attributes: + finding_id: ID del hallazgo explicado + explanation: La explicación generada + cached: Si la explicación viene de cache + """ + + finding_id: int = Field(..., description="ID del hallazgo") + explanation: AIExplanation = Field(..., description="Explicación generada") + cached: bool = Field(..., description="Si viene de cache") + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "finding_id": 123, + "explanation": { + "explanation": "El uso de eval() es peligroso...", + "suggested_fix": "Usar ast.literal_eval()", + "attack_example": "user_input = '__import__(\"os\")...'", + "references": ["OWASP A03:2021"], + "model_used": "gemini-1.5-flash-001", + "tokens_used": 450, + "generated_at": "2024-01-15T10:30:00Z", + }, + "cached": False, + } + } + ) + + +class RateLimitInfo(BaseModel): + """ + Información sobre el rate limit del usuario. + + Attributes: + requests_remaining: Requests restantes en el período + requests_limit: Límite total de requests + reset_at: Cuando se resetea el contador + """ + + requests_remaining: int = Field(..., ge=0, description="Requests restantes") + requests_limit: int = Field(..., ge=0, description="Límite total") + reset_at: datetime = Field(..., description="Hora de reset") + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "requests_remaining": 8, + "requests_limit": 10, + "reset_at": "2024-01-15T11:00:00Z", + } + } + ) + + +class AIExplanationError(BaseModel): + """ + Error en la generación de explicación. + + Attributes: + error_type: Tipo de error (rate_limit, ai_error, not_found) + message: Mensaje descriptivo + rate_limit_info: Info de rate limit si aplica + """ + + error_type: str = Field(..., description="Tipo de error") + message: str = Field(..., description="Mensaje de error") + rate_limit_info: Optional[RateLimitInfo] = Field(default=None, description="Info de rate limit") + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "error_type": "rate_limit", + "message": "Has excedido el límite de explicaciones por hora", + "rate_limit_info": { + "requests_remaining": 0, + "requests_limit": 10, + "reset_at": "2024-01-15T11:00:00Z", + }, + } + } + ) diff --git a/backend/src/schemas/finding.py b/backend/src/schemas/finding.py index e75cb55..e535619 100644 --- a/backend/src/schemas/finding.py +++ b/backend/src/schemas/finding.py @@ -22,11 +22,11 @@ class Severity(str, Enum): INFO: Información, no es un problema """ - CRITICAL = "critical" - HIGH = "high" - MEDIUM = "medium" - LOW = "low" - INFO = "info" + CRITICAL = "CRITICAL" + HIGH = "HIGH" + MEDIUM = "MEDIUM" + LOW = "LOW" + INFO = "INFO" class Finding(BaseModel): @@ -72,7 +72,7 @@ class Finding(BaseModel): model_config = ConfigDict( json_schema_extra={ "example": { - "severity": "critical", + "severity": "CRITICAL", "issue_type": "dangerous_function", "message": "Use of eval() detected", "line_number": 10, diff --git a/backend/src/services/ai_service.py b/backend/src/services/ai_service.py index e69de29..4e2fec6 100644 --- a/backend/src/services/ai_service.py +++ b/backend/src/services/ai_service.py @@ -0,0 +1,418 @@ +""" +AI Explainer Service + +Servicio principal para generar explicaciones de seguridad usando IA generativa. +Integra el cliente de Vertex AI, enriquecimiento de contexto MCP y rate limiting. + +Principios de diseño: +- SRP: Solo orquesta la generación de explicaciones +- Acoplamiento débil: Depende de interfaces (AIClient) +- Defensibilidad: Rate limiting y validación de entrada +- Async: Todas las operaciones son asíncronas +""" + +import logging +from collections import defaultdict +from datetime import datetime, timedelta, timezone +from typing import Dict, Optional, Tuple + +from src.core.config.ai_config import get_ai_settings +from src.external.gemini_client import get_ai_client +from src.external.interfaces import ( + AIClient, + AIClientError, + AIRateLimitError, +) +from src.schemas.ai_explanation import AIExplanation, RateLimitInfo +from src.schemas.finding import Finding +from src.services.mcp_context_enricher import ( + EnrichedContext, + MCPContextEnricher, + get_mcp_context_enricher, +) + +logger = logging.getLogger(__name__) + + +class RateLimitExceeded(Exception): + """Excepción cuando el usuario excede su límite de requests.""" + + def __init__(self, message: str, rate_limit_info: RateLimitInfo): + super().__init__(message) + self.rate_limit_info = rate_limit_info + + +class AIExplanationError(Exception): + """Error general en la generación de explicación.""" + + pass + + +class InMemoryRateLimiter: + """ + Rate limiter en memoria para controlar requests por usuario. + + Esta implementación es para desarrollo. En producción se puede + reemplazar por un RateLimiter basado en Redis siguiendo el + patrón Adapter. + + Attributes: + limit_per_hour: Máximo de requests por hora + user_requests: Diccionario de requests por usuario + """ + + def __init__(self, limit_per_hour: int = 10): + """ + Inicializa el rate limiter. + + Args: + limit_per_hour: Límite de requests por usuario por hora + """ + self._limit_per_hour = limit_per_hour + # user_id -> list of timestamps + self._user_requests: Dict[str, list[datetime]] = defaultdict(list) + + def check_and_consume(self, user_id: str) -> RateLimitInfo: + """ + Verifica si el usuario puede hacer un request y lo consume. + + Args: + user_id: ID del usuario + + Returns: + RateLimitInfo con el estado actual + + Raises: + RateLimitExceeded: Si el usuario excede su límite + """ + now = datetime.now(timezone.utc) + hour_ago = now - timedelta(hours=1) + + # Limpiar requests antiguos + self._user_requests[user_id] = [ts for ts in self._user_requests[user_id] if ts > hour_ago] + + # Calcular info de rate limit + requests_used = len(self._user_requests[user_id]) + requests_remaining = max(0, self._limit_per_hour - requests_used) + + # Calcular cuando se resetea (1 hora desde el request más antiguo) + if self._user_requests[user_id]: + oldest = min(self._user_requests[user_id]) + reset_at = oldest + timedelta(hours=1) + else: + reset_at = now + timedelta(hours=1) + + rate_limit_info = RateLimitInfo( + requests_remaining=requests_remaining - 1 if requests_remaining > 0 else 0, + requests_limit=self._limit_per_hour, + reset_at=reset_at, + ) + + # Verificar límite + if requests_remaining <= 0: + raise RateLimitExceeded( + f"Rate limit exceeded. Limit: {self._limit_per_hour}/hour", + rate_limit_info, + ) + + # Consumir request + self._user_requests[user_id].append(now) + return rate_limit_info + + def get_remaining(self, user_id: str) -> RateLimitInfo: + """ + Obtiene el estado del rate limit sin consumir. + + Args: + user_id: ID del usuario + + Returns: + RateLimitInfo con el estado actual + """ + now = datetime.now(timezone.utc) + hour_ago = now - timedelta(hours=1) + + # Limpiar y contar + self._user_requests[user_id] = [ts for ts in self._user_requests[user_id] if ts > hour_ago] + requests_used = len(self._user_requests[user_id]) + requests_remaining = max(0, self._limit_per_hour - requests_used) + + if self._user_requests[user_id]: + oldest = min(self._user_requests[user_id]) + reset_at = oldest + timedelta(hours=1) + else: + reset_at = now + timedelta(hours=1) + + return RateLimitInfo( + requests_remaining=requests_remaining, + requests_limit=self._limit_per_hour, + reset_at=reset_at, + ) + + +class AIExplainerService: + """ + Servicio para generar explicaciones de seguridad con IA. + + Orquesta el proceso completo: + 1. Verifica rate limit del usuario + 2. Enriquece el hallazgo con contexto OWASP + 3. Construye el prompt DevSecOps + 4. Llama al modelo de IA + 5. Parsea y retorna la explicación + + Principios: + - SRP: Solo orquesta, delega a componentes especializados + - Acoplamiento débil: Dependencias inyectadas + - Defensibilidad: Valida entrada, maneja errores + - Testeabilidad: Fácil de mockear dependencias + + Example: + service = AIExplainerService() + explanation = await service.explain_finding( + finding=my_finding, + code_context="full source code", + user_id="user-123" + ) + """ + + # Prompt template DevSecOps + PROMPT_TEMPLATE = """Eres un experto en DevSecOps y seguridad de aplicaciones. +Tu rol es explicar vulnerabilidades de seguridad a desarrolladores de forma clara, +educativa y accionable. + +{context} + +## Tu Tarea + +Proporciona una explicación completa que incluya: + +1. **Explicación del Problema**: Explica qué es esta vulnerabilidad, por qué es peligrosa + y qué impacto podría tener en la aplicación (1-2 párrafos). + +2. **Código Corregido**: Proporciona el código corregido que soluciona el problema. + Incluye comentarios explicando los cambios. + +3. **Ejemplo de Ataque**: Muestra un ejemplo concreto de cómo un atacante podría explotar + esta vulnerabilidad (código o pasos). + +4. **Referencias**: Lista referencias relevantes (OWASP, CWE, etc.). + +## Formato de Respuesta + +Responde en formato JSON con esta estructura: +```json +{{ + "explanation": "Explicación detallada del problema...", + "suggested_fix": "Código corregido con comentarios...", + "attack_example": "Ejemplo de cómo explotar la vulnerabilidad...", + "references": ["OWASP A03:2021", "CWE-94"] +}} +``` + +IMPORTANTE: +- Responde SOLO con el JSON, sin texto adicional +- La explicación debe ser en español +- El código debe ser Python válido +- Sé específico sobre el contexto del código analizado +""" + + def __init__( + self, + ai_client: Optional[AIClient] = None, + context_enricher: Optional[MCPContextEnricher] = None, + rate_limiter: Optional[InMemoryRateLimiter] = None, + ): + """ + Inicializa el servicio con dependencias inyectadas. + + Args: + ai_client: Cliente de IA (default: VertexAIClient) + context_enricher: Enriquecedor de contexto (default: MCPContextEnricher) + rate_limiter: Rate limiter (default: InMemoryRateLimiter) + """ + settings = get_ai_settings() + + self._ai_client = ai_client or get_ai_client() + self._context_enricher = context_enricher or get_mcp_context_enricher() + self._rate_limiter = rate_limiter or InMemoryRateLimiter( + limit_per_hour=settings.AI_RATE_LIMIT_PER_HOUR + ) + + async def explain_finding( + self, + finding: Finding, + code_context: Optional[str] = None, + user_id: str = "anonymous", + ) -> Tuple[AIExplanation, RateLimitInfo]: + """ + Genera una explicación de IA para un hallazgo de seguridad. + + Args: + finding: El hallazgo a explicar + code_context: Código fuente completo para contexto (opcional) + user_id: ID del usuario para rate limiting + + Returns: + Tupla (AIExplanation, RateLimitInfo) + + Raises: + RateLimitExceeded: Si el usuario excede su límite + AIExplanationError: Si hay error en la generación + """ + # 1. Verificar rate limit + rate_limit_info = self._rate_limiter.check_and_consume(user_id) + + try: + # 2. Enriquecer con contexto OWASP + enriched = await self._context_enricher.enrich(finding) + + # 3. Construir prompt + prompt = self._build_prompt(enriched, code_context) + + # 4. Llamar a IA + logger.info( + f"Generating AI explanation for finding: " + f"rule_id={finding.rule_id}, user_id={user_id}" + ) + + response = await self._ai_client.generate_explanation(prompt) + + # 5. Parsear respuesta + explanation = self._parse_response( + response.content, response.model_name, response.tokens_used + ) + + logger.info( + f"AI explanation generated successfully. " f"tokens_used={response.tokens_used}" + ) + + return explanation, rate_limit_info + + except AIRateLimitError as e: + logger.warning(f"AI API rate limit hit: {e}") + raise AIExplanationError( + "El servicio de IA está temporalmente sobrecargado. " + "Intenta de nuevo en unos minutos." + ) from e + + except AIClientError as e: + logger.error(f"AI client error: {e}") + raise AIExplanationError(f"Error al comunicarse con el servicio de IA: {e}") from e + + except Exception as e: + logger.error(f"Unexpected error generating explanation: {e}") + raise AIExplanationError(f"Error inesperado generando explicación: {e}") from e + + def _build_prompt(self, enriched: EnrichedContext, code_context: Optional[str]) -> str: + """ + Construye el prompt completo para el modelo de IA. + + Args: + enriched: Contexto enriquecido con OWASP + code_context: Código fuente adicional (opcional) + + Returns: + Prompt formateado + """ + context_parts = [enriched.formatted_prompt_context] + + # Agregar código fuente completo si está disponible + if code_context: + context_parts.append(f"## Código Fuente Completo\n```python\n{code_context}\n```") + + full_context = "\n\n".join(context_parts) + return self.PROMPT_TEMPLATE.format(context=full_context) + + def _parse_response(self, content: str, model_name: str, tokens_used: int) -> AIExplanation: + """ + Parsea la respuesta del modelo de IA. + + Args: + content: Contenido de la respuesta + model_name: Nombre del modelo usado + tokens_used: Tokens consumidos + + Returns: + AIExplanation parseada + """ + import json + + # Intentar extraer JSON de la respuesta + try: + # La respuesta debería ser JSON puro + # Pero a veces viene con markdown code blocks + clean_content = content.strip() + + # Remover bloques de código markdown si existen + if clean_content.startswith("```"): + lines = clean_content.split("\n") + # Remover primera y última línea (```json y ```) + clean_content = "\n".join(lines[1:-1]) + + data = json.loads(clean_content) + + return AIExplanation( + explanation=data.get("explanation", "Sin explicación disponible"), + suggested_fix=data.get("suggested_fix", "# Sin sugerencia disponible"), + attack_example=data.get("attack_example"), + references=data.get("references"), + model_used=model_name, + tokens_used=tokens_used, + ) + + except json.JSONDecodeError: + # Si no es JSON válido, usar el contenido como explicación + logger.warning("Could not parse AI response as JSON, using raw content") + return AIExplanation( + explanation=content, + suggested_fix="# Ver explicación para sugerencias", + attack_example=None, + references=None, + model_used=model_name, + tokens_used=tokens_used, + ) + + def get_rate_limit_info(self, user_id: str) -> RateLimitInfo: + """ + Obtiene el estado del rate limit para un usuario. + + Args: + user_id: ID del usuario + + Returns: + RateLimitInfo con el estado actual + """ + return self._rate_limiter.get_remaining(user_id) + + @property + def is_configured(self) -> bool: + """Indica si el servicio está configurado correctamente.""" + return self._ai_client.is_configured + + +# Factory function para inyección de dependencias +_service_instance: Optional[AIExplainerService] = None + + +def get_ai_explainer_service() -> AIExplainerService: + """ + Factory function para obtener el servicio de explicaciones. + + Usa singleton para reutilizar el rate limiter en memoria. + + Returns: + Instancia de AIExplainerService + """ + global _service_instance + if _service_instance is None: + _service_instance = AIExplainerService() + return _service_instance + + +def reset_ai_explainer_service() -> None: + """ + Resetea el singleton (útil para testing). + """ + global _service_instance + _service_instance = None diff --git a/backend/src/services/auth_service.py b/backend/src/services/auth_service.py index 8ee0f8f..e5e6ab0 100644 --- a/backend/src/services/auth_service.py +++ b/backend/src/services/auth_service.py @@ -5,7 +5,7 @@ de usuarios en la base de datos. """ -from src.external.clerk_client import ClerkClient +from src.external.clerk_client import ClerkClient, ClerkTokenInvalidError from src.models.user import UserEntity from src.repositories.user_repo import UserRepository from src.schemas.user import Role, User @@ -52,7 +52,11 @@ def login_user(self, token: str) -> User: # 1. Validar token con Clerk clerk_data = self._clerk_client.verify_token(token) - user_id = clerk_data["user_id"] + # Clerk usa 'sub' para user_id en el payload JWT + user_id = clerk_data.get("sub") + if not user_id: + raise ClerkTokenInvalidError("Token no contiene 'sub' claim") + email = clerk_data.get("email") name = clerk_data.get("name") @@ -94,8 +98,12 @@ def get_user_from_token(self, token: str) -> User: """ clerk_data = self._clerk_client.verify_token(token) + user_id = clerk_data.get("sub") + if not user_id: + raise ClerkTokenInvalidError("Token no contiene 'sub' claim") + return User( - id=clerk_data["user_id"], + id=user_id, email=clerk_data.get("email", ""), name=clerk_data.get("name"), role=Role.DEVELOPER, diff --git a/backend/src/services/mcp_context_enricher.py b/backend/src/services/mcp_context_enricher.py index e69de29..2c37d51 100644 --- a/backend/src/services/mcp_context_enricher.py +++ b/backend/src/services/mcp_context_enricher.py @@ -0,0 +1,198 @@ +""" +MCP Context Enricher Service + +Enriquece los hallazgos de seguridad con contexto OWASP Top 10 y CWE +para mejorar las explicaciones generadas por IA. + +Este servicio actúa como un "Model Context Protocol" local que proporciona +contexto relevante de seguridad para cada hallazgo antes de enviarlo +al modelo de IA generativa. + +Principios de diseño: +- SRP: Solo enriquece contexto, no genera explicaciones +- Acoplamiento débil: Usa MCPClient interface, no implementaciones directas +- Async: Todas las operaciones son asíncronas para consistencia +""" + +from dataclasses import dataclass +from typing import Optional + +from src.core.config.mcp_config import SecurityContext, format_security_context +from src.external.mcp_client import MCPClient, get_mcp_client +from src.schemas.finding import Finding + + +@dataclass +class EnrichedContext: + """ + Contexto enriquecido para un hallazgo. + + Attributes: + finding: El hallazgo original + security_context: Contexto de seguridad OWASP (si aplica) + formatted_prompt_context: Texto formateado para incluir en el prompt + has_security_context: Indica si se encontró contexto OWASP + """ + + finding: Finding + security_context: Optional[SecurityContext] + formatted_prompt_context: str + has_security_context: bool + + @property + def is_security_finding(self) -> bool: + """Indica si es un hallazgo de seguridad con contexto OWASP.""" + return self.has_security_context + + +class MCPContextEnricher: + """ + Servicio para enriquecer hallazgos con contexto de seguridad. + + Este servicio usa MCPClient para buscar información relevante en la + base de conocimiento OWASP Top 10 y la formatea para prompts de IA. + + Example: + enricher = MCPContextEnricher() + context = await enricher.enrich(finding) + prompt = f"Analiza este hallazgo:\\n{context.formatted_prompt_context}" + + Principios: + - SRP: Solo enriquece, no genera explicaciones + - Acoplamiento débil: Depende de MCPClient interface + - Testeabilidad: MCP client inyectable facilita testing + """ + + def __init__(self, mcp_client: Optional[MCPClient] = None): + """ + Inicializa el enricher con un cliente MCP. + + Args: + mcp_client: Cliente MCP a usar (default: LocalMCPClient) + """ + self._mcp_client = mcp_client or get_mcp_client() + + async def enrich(self, finding: Finding) -> EnrichedContext: + """ + Enriquece un hallazgo con contexto de seguridad OWASP. + + Args: + finding: Hallazgo a enriquecer + + Returns: + EnrichedContext con información de seguridad relevante + """ + # Buscar contexto de seguridad usando MCP client + security_context = await self._mcp_client.get_security_context(finding) + + # Formatear el contexto del hallazgo + formatted_context = self._format_finding_context(finding, security_context) + + return EnrichedContext( + finding=finding, + security_context=security_context, + formatted_prompt_context=formatted_context, + has_security_context=security_context is not None, + ) + + async def enrich_batch(self, findings: list[Finding]) -> list[EnrichedContext]: + """ + Enriquece múltiples hallazgos de forma eficiente. + + Args: + findings: Lista de hallazgos a enriquecer + + Returns: + Lista de EnrichedContext + """ + return [await self.enrich(finding) for finding in findings] + + def _format_finding_context( + self, finding: Finding, security_context: Optional[SecurityContext] + ) -> str: + """ + Formatea el contexto completo del hallazgo para el prompt de IA. + + Incluye información del hallazgo original más contexto OWASP si existe. + + Args: + finding: Hallazgo original + security_context: Contexto de seguridad (opcional) + + Returns: + Texto formateado para incluir en el prompt + """ + sections = [] + + # Sección: Información del hallazgo + sections.append(self._format_finding_info(finding)) + + # Sección: Contexto de seguridad OWASP (si existe) + if security_context: + owasp_context = format_security_context(security_context) + sections.append(owasp_context) + + # Sección: Código problemático (si existe) + if finding.code_snippet: + sections.append(self._format_code_section(finding)) + + return "\n\n".join(sections) + + def _format_finding_info(self, finding: Finding) -> str: + """ + Formatea la información básica del hallazgo. + + Args: + finding: Hallazgo a formatear + + Returns: + Texto formateado con información del hallazgo + """ + lines = [ + "## Hallazgo Detectado", + f"- **Tipo**: {finding.issue_type}", + f"- **Severidad**: {finding.severity.value.upper()}", + f"- **Mensaje**: {finding.message}", + f"- **Línea**: {finding.line_number}", + f"- **Agente**: {finding.agent_name}", + ] + + if finding.rule_id: + lines.append(f"- **Regla**: {finding.rule_id}") + + if finding.suggestion: + lines.append(f"- **Sugerencia inicial**: {finding.suggestion}") + + return "\n".join(lines) + + def _format_code_section(self, finding: Finding) -> str: + """ + Formatea la sección de código problemático. + + Args: + finding: Hallazgo con código + + Returns: + Texto formateado con el código + """ + return ( + "## Código Problemático\n" + "```python\n" + f"# Línea {finding.line_number}\n" + f"{finding.code_snippet}\n" + "```" + ) + + +# Factory function para facilitar inyección de dependencias +def get_mcp_context_enricher(mcp_client: Optional[MCPClient] = None) -> MCPContextEnricher: + """ + Factory function para crear instancias del enricher. + + Args: + mcp_client: Cliente MCP opcional para inyección + + Returns: + Nueva instancia de MCPContextEnricher + """ + return MCPContextEnricher(mcp_client=mcp_client) diff --git a/backend/tests/generate_jwt.py b/backend/tests/generate_jwt.py index a334f80..36b217d 100644 --- a/backend/tests/generate_jwt.py +++ b/backend/tests/generate_jwt.py @@ -19,7 +19,7 @@ "app_metadata": {}, "user_metadata": {}, }, - "sk_test_hwourB8W6TcFQwgvcmMln6lwFZSUwesWOD8zSWbteZ", + "sk_test_B9jJLVRD26bS62mEXg3u0e6ARxxtIznBQBDsCbhe2m", algorithm="HS256", ) diff --git a/backend/tests/integration/test_auth_router.py b/backend/tests/integration/test_auth_router.py index c3ef8ad..cf1c1b1 100644 --- a/backend/tests/integration/test_auth_router.py +++ b/backend/tests/integration/test_auth_router.py @@ -70,7 +70,7 @@ def test_login_success_new_user( # Arrange mock_clerk = MagicMock() mock_clerk.verify_token.return_value = { - "user_id": "user_123", + "sub": "user_123", "email": "test@example.com", "name": "Test User", } @@ -108,7 +108,7 @@ def test_login_success_existing_user( # Arrange mock_clerk = MagicMock() mock_clerk.verify_token.return_value = { - "user_id": "user_123", + "sub": "user_123", "email": "updated@example.com", "name": "Updated Name", } diff --git a/backend/tests/integration/test_quality_agent_integration.py b/backend/tests/integration/test_quality_agent_integration.py index 89b5ed5..71c714c 100644 --- a/backend/tests/integration/test_quality_agent_integration.py +++ b/backend/tests/integration/test_quality_agent_integration.py @@ -115,7 +115,7 @@ def test_comprehensive_quality_detection(self, agent, poor_quality_code): # Verify findings are sorted by severity severities = [f.severity.value for f in findings] - expected_order = ["critical", "high", "medium", "low", "info"] + expected_order = ["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"] for i in range(len(severities) - 1): assert expected_order.index(severities[i]) <= expected_order.index(severities[i + 1]) diff --git a/backend/tests/integration/test_security_agent_integration.py b/backend/tests/integration/test_security_agent_integration.py index 3d735ef..9632ff5 100644 --- a/backend/tests/integration/test_security_agent_integration.py +++ b/backend/tests/integration/test_security_agent_integration.py @@ -100,7 +100,7 @@ def test_comprehensive_vulnerability_detection(self, agent, vulnerable_web_app_c # Verify findings are sorted by severity severities = [f.severity.value for f in findings] - expected_order = ["critical", "high", "medium", "low", "info"] + expected_order = ["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"] for i in range(len(severities) - 1): assert expected_order.index(severities[i]) <= expected_order.index(severities[i + 1]) diff --git a/backend/tests/test_ai_service.py b/backend/tests/test_ai_service.py new file mode 100644 index 0000000..40c3db0 --- /dev/null +++ b/backend/tests/test_ai_service.py @@ -0,0 +1,421 @@ +""" +Tests for AIExplainerService and related components. + +Tests Sprint 3 functionality including: +- Rate limiting +- MCP Context Enricher +- AI explanation generation +""" + +from datetime import datetime, timedelta, timezone +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from src.core.config.mcp_config import ( + OWASP_TOP_10, + SecurityContext, + format_security_context, + get_security_context, +) +from src.external.interfaces.ai_client import AIResponse +from src.schemas.ai_explanation import AIExplanation, RateLimitInfo +from src.schemas.finding import Finding, Severity +from src.services.ai_service import ( + AIExplainerService, + AIExplanationError, + InMemoryRateLimiter, + RateLimitExceeded, +) +from src.services.mcp_context_enricher import ( + EnrichedContext, + MCPContextEnricher, +) + +# ============================================================ +# Fixtures +# ============================================================ + + +@pytest.fixture +def sample_security_finding() -> Finding: + """Create a sample security finding for testing.""" + return Finding( + severity=Severity.CRITICAL, + issue_type="dangerous_function", + message="Use of eval() detected - allows arbitrary code execution", + line_number=42, + agent_name="SecurityAgent", + code_snippet="result = eval(user_input)", + suggestion="Use ast.literal_eval() for safe literal evaluation", + rule_id="SEC001_EVAL", + ) + + +@pytest.fixture +def sample_style_finding() -> Finding: + """Create a sample style finding (non-security).""" + return Finding( + severity=Severity.LOW, + issue_type="line_too_long", + message="Line exceeds 88 characters", + line_number=100, + agent_name="StyleAgent", + code_snippet="x = 'a very long string' * 10 # this line is too long", + suggestion="Break the line into multiple lines", + rule_id="STYLE001_LINE_LENGTH", + ) + + +@pytest.fixture +def mock_ai_client(): + """Create a mock AI client.""" + client = AsyncMock() + client.generate_explanation = AsyncMock( + return_value=AIResponse( + content='{"explanation": "Test explanation", "suggested_fix": "# fixed code", "attack_example": "evil code", "references": ["CWE-94"]}', + model_name="gemini-1.5-flash-001", + tokens_used=150, + finish_reason="STOP", + ) + ) + client.is_configured = True + client.model_name = "gemini-1.5-flash-001" + return client + + +@pytest.fixture +def rate_limiter(): + """Create a rate limiter with low limit for testing.""" + return InMemoryRateLimiter(limit_per_hour=3) + + +# ============================================================ +# Tests for MCP Config (OWASP Top 10) +# ============================================================ + + +class TestMCPConfig: + """Tests for MCP configuration and OWASP lookups.""" + + def test_owasp_top_10_has_all_categories(self): + """OWASP dictionary should have all 10 categories.""" + assert len(OWASP_TOP_10) == 10 + + # Las claves son descriptivas, las categorías OWASP están en los valores + expected_categories = [ + "A01:2021", + "A02:2021", + "A03:2021", + "A04:2021", + "A05:2021", + "A06:2021", + "A07:2021", + "A08:2021", + "A09:2021", + "A10:2021", + ] + # Extraer las categorías de los valores del diccionario + actual_categories = [ctx.category for ctx in OWASP_TOP_10.values()] + for cat in expected_categories: + found = any(cat in actual_cat for actual_cat in actual_categories) + assert found, f"Missing OWASP category: {cat}" + + def test_get_security_context_by_rule_id(self): + """Should find security context by rule_id.""" + context = get_security_context(rule_id="SEC001_EVAL") + + assert context is not None + assert "Injection" in context.category or "Inyección" in context.category + + def test_get_security_context_by_issue_type(self): + """Should find security context by issue_type.""" + context = get_security_context(issue_type="sql_injection") + + assert context is not None + assert context.cwe_ids is not None + + def test_get_security_context_unknown(self): + """Should return None for unknown rule_id.""" + context = get_security_context(rule_id="UNKNOWN_RULE") + assert context is None + + def test_format_security_context(self): + """Should format security context as text.""" + context = SecurityContext( + category="A03:2021 - Injection", + description="Test description", + impact="Test impact", + mitigation="Test mitigation", + references=["https://owasp.org"], + cwe_ids=["CWE-94"], + ) + + formatted = format_security_context(context) + + assert "A03:2021" in formatted + assert "Test description" in formatted + assert "CWE-94" in formatted + + +# ============================================================ +# Tests for MCP Context Enricher +# ============================================================ + + +class TestMCPContextEnricher: + """Tests for the MCP Context Enricher service.""" + + @pytest.mark.asyncio + async def test_enrich_security_finding(self, sample_security_finding): + """Should enrich security findings with OWASP context.""" + enricher = MCPContextEnricher() + + result = await enricher.enrich(sample_security_finding) + + assert isinstance(result, EnrichedContext) + assert result.finding == sample_security_finding + assert result.has_security_context + assert result.security_context is not None + assert result.is_security_finding + + @pytest.mark.asyncio + async def test_enrich_non_security_finding(self, sample_style_finding): + """Should handle non-security findings gracefully.""" + enricher = MCPContextEnricher() + + result = await enricher.enrich(sample_style_finding) + + assert isinstance(result, EnrichedContext) + assert result.finding == sample_style_finding + # Style findings don't have OWASP context + assert not result.is_security_finding + + @pytest.mark.asyncio + async def test_enrich_batch(self, sample_security_finding, sample_style_finding): + """Should enrich multiple findings.""" + enricher = MCPContextEnricher() + findings = [sample_security_finding, sample_style_finding] + + results = await enricher.enrich_batch(findings) + + assert len(results) == 2 + assert all(isinstance(r, EnrichedContext) for r in results) + + @pytest.mark.asyncio + async def test_formatted_context_includes_finding_info(self, sample_security_finding): + """Formatted context should include finding details.""" + enricher = MCPContextEnricher() + + result = await enricher.enrich(sample_security_finding) + + assert ( + "eval()" in result.formatted_prompt_context + or "dangerous_function" in result.formatted_prompt_context + ) + assert str(sample_security_finding.line_number) in result.formatted_prompt_context + + +# ============================================================ +# Tests for In-Memory Rate Limiter +# ============================================================ + + +class TestInMemoryRateLimiter: + """Tests for the in-memory rate limiter.""" + + def test_check_and_consume_allows_within_limit(self, rate_limiter): + """Should allow requests within limit.""" + user_id = "user-123" + + # First 3 requests should succeed (limit is 3) + for i in range(3): + info = rate_limiter.check_and_consume(user_id) + assert info.requests_remaining == 2 - i + + def test_check_and_consume_blocks_over_limit(self, rate_limiter): + """Should block requests over limit.""" + user_id = "user-456" + + # Consume all 3 requests + for _ in range(3): + rate_limiter.check_and_consume(user_id) + + # 4th request should raise + with pytest.raises(RateLimitExceeded) as exc_info: + rate_limiter.check_and_consume(user_id) + + assert exc_info.value.rate_limit_info.requests_remaining == 0 + + def test_rate_limit_per_user(self, rate_limiter): + """Each user should have independent limits.""" + user1 = "user-1" + user2 = "user-2" + + # Exhaust user1's limit + for _ in range(3): + rate_limiter.check_and_consume(user1) + + # user2 should still be able to make requests + info = rate_limiter.check_and_consume(user2) + assert info.requests_remaining == 2 + + def test_get_remaining_without_consuming(self, rate_limiter): + """get_remaining should not consume a request.""" + user_id = "user-789" + + info1 = rate_limiter.get_remaining(user_id) + info2 = rate_limiter.get_remaining(user_id) + + assert info1.requests_remaining == info2.requests_remaining == 3 + + +# ============================================================ +# Tests for AI Explainer Service +# ============================================================ + + +class TestAIExplainerService: + """Tests for the AI Explainer Service.""" + + @pytest.mark.asyncio + async def test_explain_finding_success(self, sample_security_finding, mock_ai_client): + """Should successfully generate explanation.""" + service = AIExplainerService( + ai_client=mock_ai_client, + rate_limiter=InMemoryRateLimiter(limit_per_hour=10), + ) + + explanation, rate_info = await service.explain_finding( + finding=sample_security_finding, + user_id="test-user", + ) + + assert isinstance(explanation, AIExplanation) + assert explanation.explanation == "Test explanation" + assert explanation.model_used == "gemini-1.5-flash-001" + assert rate_info.requests_remaining == 9 + + @pytest.mark.asyncio + async def test_explain_finding_rate_limited(self, sample_security_finding, mock_ai_client): + """Should raise when rate limit exceeded.""" + service = AIExplainerService( + ai_client=mock_ai_client, + rate_limiter=InMemoryRateLimiter(limit_per_hour=1), + ) + + # First request succeeds + await service.explain_finding( + finding=sample_security_finding, + user_id="limited-user", + ) + + # Second request should fail + with pytest.raises(RateLimitExceeded): + await service.explain_finding( + finding=sample_security_finding, + user_id="limited-user", + ) + + @pytest.mark.asyncio + async def test_explain_finding_parses_json_response( + self, sample_security_finding, mock_ai_client + ): + """Should parse JSON response from AI.""" + # Set up mock to return JSON + mock_ai_client.generate_explanation.return_value = AIResponse( + content='{"explanation": "Detailed explanation", "suggested_fix": "fixed_code()", "attack_example": "exploit", "references": ["CWE-94", "OWASP A03"]}', + model_name="gemini-1.5-pro-001", + tokens_used=200, + finish_reason="STOP", + ) + + service = AIExplainerService( + ai_client=mock_ai_client, + rate_limiter=InMemoryRateLimiter(limit_per_hour=10), + ) + + explanation, _ = await service.explain_finding( + finding=sample_security_finding, + user_id="test-user", + ) + + assert explanation.explanation == "Detailed explanation" + assert explanation.suggested_fix == "fixed_code()" + assert explanation.attack_example == "exploit" + assert "CWE-94" in explanation.references + + @pytest.mark.asyncio + async def test_explain_finding_handles_non_json_response( + self, sample_security_finding, mock_ai_client + ): + """Should handle non-JSON response gracefully.""" + mock_ai_client.generate_explanation.return_value = AIResponse( + content="This is a plain text response without JSON formatting.", + model_name="gemini-1.5-flash-001", + tokens_used=50, + finish_reason="STOP", + ) + + service = AIExplainerService( + ai_client=mock_ai_client, + rate_limiter=InMemoryRateLimiter(limit_per_hour=10), + ) + + explanation, _ = await service.explain_finding( + finding=sample_security_finding, + user_id="test-user", + ) + + # Should use raw content as explanation + assert "plain text response" in explanation.explanation + + def test_is_configured_delegates_to_client(self, mock_ai_client): + """is_configured should delegate to AI client.""" + service = AIExplainerService(ai_client=mock_ai_client) + + assert service.is_configured == mock_ai_client.is_configured + + +# ============================================================ +# Tests for AIExplanation Schema +# ============================================================ + + +class TestAIExplanationSchema: + """Tests for AIExplanation Pydantic schema.""" + + def test_to_dict_serialization(self): + """Should serialize to dict for JSONB storage.""" + explanation = AIExplanation( + explanation="Test explanation", + suggested_fix="# fixed", + attack_example="exploit code", + references=["CWE-94"], + model_used="gemini-1.5-flash", + tokens_used=100, + ) + + data = explanation.to_dict() + + assert data["explanation"] == "Test explanation" + assert data["tokens_used"] == 100 + assert "generated_at" in data + + def test_from_dict_deserialization(self): + """Should deserialize from JSONB dict.""" + data = { + "explanation": "Test explanation with sufficient length for validation", + "suggested_fix": "# fix", + "attack_example": None, + "references": ["CWE-1"], + "model_used": "test-model", + "tokens_used": 50, + "generated_at": "2024-01-15T10:30:00+00:00", + } + + explanation = AIExplanation.from_dict(data) + + assert explanation.explanation == "Test explanation with sufficient length for validation" + assert explanation.model_used == "test-model" + assert explanation.generated_at.year == 2024 diff --git a/backend/tests/unit/external/test_clerk_client.py b/backend/tests/unit/external/test_clerk_client.py index b184ced..5586d8a 100644 --- a/backend/tests/unit/external/test_clerk_client.py +++ b/backend/tests/unit/external/test_clerk_client.py @@ -47,20 +47,25 @@ class TestClerkClient: @patch("src.external.clerk_client.settings") def test_verify_token_valid(self, mock_settings: MagicMock): """Token válido retorna payload correcto.""" - mock_settings.CLERK_SECRET_KEY = TEST_SECRET_KEY + mock_settings.CLERK_JWT_SIGNING_KEY = TEST_SECRET_KEY + mock_settings.CLERK_SECRET_KEY = None + mock_settings.CLERK_JWKS_URL = None client = ClerkClient() token = create_valid_token() result = client.verify_token(token) - assert result["user_id"] == "user_test123" + # verify_token retorna el payload completo del JWT con 'sub' + assert result["sub"] == "user_test123" assert result["email"] == "test@example.com" assert result["name"] == "Test User" @patch("src.external.clerk_client.settings") def test_verify_token_expired(self, mock_settings: MagicMock): """Token expirado lanza ClerkTokenExpiredError.""" - mock_settings.CLERK_SECRET_KEY = TEST_SECRET_KEY + mock_settings.CLERK_JWT_SIGNING_KEY = TEST_SECRET_KEY + mock_settings.CLERK_SECRET_KEY = None + mock_settings.CLERK_JWKS_URL = None client = ClerkClient() token = create_expired_token() @@ -70,7 +75,9 @@ def test_verify_token_expired(self, mock_settings: MagicMock): @patch("src.external.clerk_client.settings") def test_verify_token_invalid(self, mock_settings: MagicMock): """Token inválido lanza ClerkTokenInvalidError.""" - mock_settings.CLERK_SECRET_KEY = TEST_SECRET_KEY + mock_settings.CLERK_JWT_SIGNING_KEY = TEST_SECRET_KEY + mock_settings.CLERK_SECRET_KEY = None + mock_settings.CLERK_JWKS_URL = None client = ClerkClient() with pytest.raises(ClerkTokenInvalidError): @@ -79,7 +86,9 @@ def test_verify_token_invalid(self, mock_settings: MagicMock): @patch("src.external.clerk_client.settings") def test_verify_token_malformed(self, mock_settings: MagicMock): """Token malformado lanza ClerkTokenInvalidError.""" - mock_settings.CLERK_SECRET_KEY = TEST_SECRET_KEY + mock_settings.CLERK_JWT_SIGNING_KEY = TEST_SECRET_KEY + mock_settings.CLERK_SECRET_KEY = None + mock_settings.CLERK_JWKS_URL = None client = ClerkClient() with pytest.raises(ClerkTokenInvalidError): @@ -87,8 +96,10 @@ def test_verify_token_malformed(self, mock_settings: MagicMock): @patch("src.external.clerk_client.settings") def test_get_user_id_from_token(self, mock_settings: MagicMock): - """get_user_id_from_token retorna el user_id.""" - mock_settings.CLERK_SECRET_KEY = TEST_SECRET_KEY + """get_user_id_from_token retorna el user_id (sub claim).""" + mock_settings.CLERK_JWT_SIGNING_KEY = TEST_SECRET_KEY + mock_settings.CLERK_SECRET_KEY = None + mock_settings.CLERK_JWKS_URL = None client = ClerkClient() token = create_valid_token() @@ -99,7 +110,9 @@ def test_get_user_id_from_token(self, mock_settings: MagicMock): @patch("src.external.clerk_client.settings") def test_get_user_id_missing_sub(self, mock_settings: MagicMock): """Token sin sub lanza ClerkTokenInvalidError.""" - mock_settings.CLERK_SECRET_KEY = TEST_SECRET_KEY + mock_settings.CLERK_JWT_SIGNING_KEY = TEST_SECRET_KEY + mock_settings.CLERK_SECRET_KEY = None + mock_settings.CLERK_JWKS_URL = None client = ClerkClient() now = int(time.time()) @@ -112,4 +125,5 @@ def test_get_user_id_missing_sub(self, mock_settings: MagicMock): with pytest.raises(ClerkTokenInvalidError) as exc: client.get_user_id_from_token(token) - assert "user_id" in str(exc.value).lower() + # El mensaje ahora menciona 'sub' en lugar de 'user_id' + assert "sub" in str(exc.value).lower() diff --git a/backend/tests/unit/middleware/test_auth.py b/backend/tests/unit/middleware/test_auth.py index d8b2366..7080399 100644 --- a/backend/tests/unit/middleware/test_auth.py +++ b/backend/tests/unit/middleware/test_auth.py @@ -29,7 +29,7 @@ async def test_valid_token_returns_user(self): credentials = HTTPAuthorizationCredentials(scheme="Bearer", credentials="valid-token") mock_payload = { - "user_id": "user_abc123", + "sub": "user_abc123", "email": "test@example.com", "name": "Test User", } @@ -92,7 +92,7 @@ async def test_valid_credentials_returns_user(self): credentials = HTTPAuthorizationCredentials(scheme="Bearer", credentials="valid-token") mock_payload = { - "user_id": "user_optional", + "sub": "user_optional", "email": "optional@test.com", "name": "Optional User", } diff --git a/backend/tests/unit/services/test_auth_service.py b/backend/tests/unit/services/test_auth_service.py index ae2e4a4..1cef732 100644 --- a/backend/tests/unit/services/test_auth_service.py +++ b/backend/tests/unit/services/test_auth_service.py @@ -44,7 +44,7 @@ def test_login_user_creates_new_user( ): """login_user crea usuario si no existe.""" mock_clerk_client.verify_token.return_value = { - "user_id": "user_new", + "sub": "user_new", "email": "new@example.com", "name": "New User", } @@ -62,7 +62,7 @@ def test_login_user_updates_existing_user( ): """login_user actualiza usuario si ya existe.""" mock_clerk_client.verify_token.return_value = { - "user_id": "user_abc123", + "sub": "user_abc123", "email": "updated@example.com", "name": "Updated Name", } @@ -87,7 +87,7 @@ def test_login_user_invalid_token_raises( def test_get_user_from_token(self, auth_service, mock_clerk_client): """get_user_from_token retorna User sin sincronizar BD.""" mock_clerk_client.verify_token.return_value = { - "user_id": "user_fromtoken", + "sub": "user_fromtoken", "email": "fromtoken@example.com", "name": "From Token", } diff --git a/backend/tests/unit/test_analysis_schemas.py b/backend/tests/unit/test_analysis_schemas.py index ecddd72..11f1147 100644 --- a/backend/tests/unit/test_analysis_schemas.py +++ b/backend/tests/unit/test_analysis_schemas.py @@ -267,7 +267,7 @@ def test_get_lines_and_snippets(self): def test_finding_from_and_to_dict_without_detected_at(self): data = { - "severity": "critical", + "severity": "CRITICAL", "issue_type": "dangerous_function", "message": "Use of eval() detected", "line_number": 5, @@ -275,7 +275,7 @@ def test_finding_from_and_to_dict_without_detected_at(self): } finding = Finding.from_dict(data) serialized = finding.to_dict() - assert serialized["severity"] == "critical" + assert serialized["severity"] == "CRITICAL" assert "detected_at" in serialized def test_calculate_penalty_map(self): From 161b9a47d6938cb258dcbadfc80aeec4b5baf26a Mon Sep 17 00:00:00 2001 From: Yosoyepa Date: Thu, 4 Dec 2025 06:11:51 -0500 Subject: [PATCH 2/3] fix(agents): correct severity case mismatch in PerformanceAgent - Change severity ordering list from lowercase to UPPERCASE in performance_agent.py - Update expected_order in integration test to match Severity enum values - Fixes ValueError: 'HIGH' is not in list during findings sorting The Severity enum uses UPPERCASE values ('CRITICAL', 'HIGH', etc.) but the sorting list used lowercase, causing index lookup failures. Fixes 2 failing tests: - test_comprehensive_performance_detection - test_detect_triple_nested_loops_critical --- .github/workflows/docker.yml | 98 +- .github/workflows/lint.yml | 112 +- .github/workflows/test.yml | 140 +- .gitignore | 10 +- CONTRIBUTING.md | 1386 ++++++++-------- backend/.dockerignore | 110 +- backend/.env.example | 100 +- backend/.flake8 | 8 +- backend/Dockerfile | 98 +- backend/alembic/env.py | 238 +-- backend/alembic/script.py.mako | 64 +- ...b8e18_create_initial_tables_users_code_.py | 192 +-- backend/docker-compose.yml | 200 +-- backend/fix_code_quality.sh | 54 +- backend/pyproject.toml | 74 +- backend/requirements-dev.txt | 32 +- backend/requirements.txt | 122 +- .../src/agents/analyzers/flake8_analyzer.py | 344 ++-- .../src/agents/analyzers/pylint_analyzer.py | 344 ++-- backend/src/agents/base_agent.py | 398 ++--- backend/src/agents/quality_agent.py | 574 +++---- backend/src/agents/security_agent.py | 1296 +++++++-------- backend/src/agents/style_agent.py | 1126 ++++++------- backend/src/core/config/ai_config.py | 340 ++-- backend/src/core/config/mcp_config.py | 834 +++++----- backend/src/core/config/settings.py | 216 +-- backend/src/core/database.py | 78 +- backend/src/core/dependencies/get_db.py | 58 +- backend/src/core/events/analysis_events.py | 42 +- backend/src/core/events/observers.py | 44 +- backend/src/external/clerk_client.py | 650 ++++---- backend/src/external/gemini_client.py | 620 ++++---- backend/src/external/interfaces/__init__.py | 34 +- backend/src/external/interfaces/ai_client.py | 412 ++--- backend/src/external/mcp_client.py | 380 ++--- backend/src/main.py | 98 +- backend/src/models/__init__.py | 56 +- backend/src/models/base.py | 50 +- backend/src/models/code_review.py | 168 +- backend/src/models/enums/review_status.py | 16 +- backend/src/models/enums/severity_enum.py | 44 +- backend/src/models/enums/user_role.py | 36 +- backend/src/models/finding.py | 212 +-- backend/src/models/user.py | 172 +- .../repositories/code_review_repository.py | 280 ++-- backend/src/repositories/user_repo.py | 294 ++-- backend/src/routers/analysis.py | 132 +- backend/src/routers/auth.py | 234 +-- backend/src/routers/findings.py | 556 +++---- backend/src/schemas/ai_explanation.py | 468 +++--- backend/src/schemas/analysis.py | 562 +++---- backend/src/schemas/finding.py | 336 ++-- backend/src/schemas/user.py | 88 +- backend/src/services/ai_service.py | 836 +++++----- backend/src/services/analysis_service.py | 56 +- backend/src/services/auth_service.py | 254 +-- backend/src/services/mcp_context_enricher.py | 396 ++--- backend/src/utils/encryption/aes_encryptor.py | 102 +- backend/src/utils/logger.py | 22 +- backend/tests/__init__.py | 2 +- backend/tests/conftest.py | 54 +- backend/tests/generate_jwt.py | 2 +- .../tests/integration/test_api_endpoints.py | 600 +++---- backend/tests/integration/test_auth_router.py | 540 +++---- .../test_quality_agent_integration.py | 422 ++--- .../test_security_agent_integration.py | 450 +++--- .../test_style_agent_integration.py | 686 ++++---- backend/tests/test_ai_service.py | 842 +++++----- backend/tests/unit/agents/__init__.py | 2 +- .../tests/unit/agents/analyzers/__init__.py | 2 +- .../agents/analyzers/test_flake8_analyzer.py | 748 ++++----- .../agents/analyzers/test_pylint_analyzer.py | 562 +++---- backend/tests/unit/agents/test_base_agent.py | 486 +++--- .../tests/unit/agents/test_quality_agent.py | 360 ++--- .../tests/unit/agents/test_security_agent.py | 824 +++++----- backend/tests/unit/agents/test_style_agent.py | 878 +++++------ .../unit/application/test_analysis_service.py | 18 + backend/tests/unit/bad_quality_code.py | 128 +- backend/tests/unit/combined_test.py | 88 +- .../tests/unit/dependencies/test_get_db.py | 190 +-- backend/tests/unit/domain/test_event_bus.py | 190 +-- .../tests/unit/external/test_clerk_client.py | 258 +-- backend/tests/unit/middleware/test_auth.py | 242 +-- backend/tests/unit/models/test_code_review.py | 194 +-- backend/tests/unit/models/test_finding.py | 340 ++-- backend/tests/unit/models/test_user.py | 338 ++-- .../repositories/test_code_review_repo.py | 264 ++-- .../tests/unit/repositories/test_user_repo.py | 672 ++++---- .../unit/services/test_analysis_service.py | 67 + .../tests/unit/services/test_auth_service.py | 200 +-- backend/tests/unit/test_analysis_schemas.py | 578 +++---- backend/tests/unit/test_main.py | 70 +- backend/tests/unit/vulnerable_test.py | 38 +- docs/ci-cd-setup.md | 1388 ++++++++--------- 94 files changed, 14568 insertions(+), 14451 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index d04ff10..3db84e9 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -1,49 +1,49 @@ -name: Docker Build - -on: - push: - branches: [main, develop] - paths: - - "backend/Dockerfile" - - "backend/docker-compose.yml" - - "backend/requirements.txt" - - ".github/workflows/docker.yml" - pull_request: - branches: [main] - -jobs: - build: - name: Build Docker Image - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Install docker-compose - run: | - sudo apt-get update - sudo apt-get install -y docker-compose - - - name: Build backend image - run: | - cd backend - docker build --tag codeguard-backend:${{ github.sha }} . - - - name: Test Docker image - run: | - docker run --rm codeguard-backend:${{ github.sha }} python --version - - - name: Test Docker Compose (validation only) - run: | - cd backend - docker-compose config - - - name: Summary - if: success() - run: | - echo " Docker image built successfully!" - echo " Image: codeguard-backend:${{ github.sha }}" +name: Docker Build + +on: + push: + branches: [main, develop] + paths: + - "backend/Dockerfile" + - "backend/docker-compose.yml" + - "backend/requirements.txt" + - ".github/workflows/docker.yml" + pull_request: + branches: [main] + +jobs: + build: + name: Build Docker Image + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Install docker-compose + run: | + sudo apt-get update + sudo apt-get install -y docker-compose + + - name: Build backend image + run: | + cd backend + docker build --tag codeguard-backend:${{ github.sha }} . + + - name: Test Docker image + run: | + docker run --rm codeguard-backend:${{ github.sha }} python --version + + - name: Test Docker Compose (validation only) + run: | + cd backend + docker-compose config + + - name: Summary + if: success() + run: | + echo " Docker image built successfully!" + echo " Image: codeguard-backend:${{ github.sha }}" diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index c89c30c..3cfc8bf 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,56 +1,56 @@ -name: Lint & Format Check - -on: - push: - branches: [main, develop, "feature/**"] - paths: - - "backend/**/*.py" - - ".github/workflows/lint.yml" - pull_request: - branches: [main, develop] - paths: - - "backend/**/*.py" - -jobs: - lint: - name: Code Quality Check - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: "3.11" - cache: "pip" - - - name: Install linting dependencies - run: | - python -m pip install --upgrade pip - pip install black>=23.0.0 flake8>=7.0.0 isort>=5.13.0 - - - name: Check code formatting with Black - run: | - cd backend - black src/ --line-length=100 --check - - - name: Check import sorting with isort - run: | - cd backend - isort src/ --profile=black --line-length=100 --check-only - - - name: Lint with Flake8 - run: | - cd backend - flake8 src/ --count --select=E9,F63,F7,F82 --show-source --statistics - flake8 src/ --count --max-complexity=10 --max-line-length=100 --statistics - - - name: Summary - if: success() - run: | - echo "= All code quality checks passed!" - echo "- Black formatting: ✓" - echo "- Import sorting (isort): ✓" - echo "- Linting (flake8): ✓" +name: Lint & Format Check + +on: + push: + branches: [main, develop, "feature/**"] + paths: + - "backend/**/*.py" + - ".github/workflows/lint.yml" + pull_request: + branches: [main, develop] + paths: + - "backend/**/*.py" + +jobs: + lint: + name: Code Quality Check + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: "pip" + + - name: Install linting dependencies + run: | + python -m pip install --upgrade pip + pip install black>=23.0.0 flake8>=7.0.0 isort>=5.13.0 + + - name: Check code formatting with Black + run: | + cd backend + black src/ --line-length=100 --check + + - name: Check import sorting with isort + run: | + cd backend + isort src/ --profile=black --line-length=100 --check-only + + - name: Lint with Flake8 + run: | + cd backend + flake8 src/ --count --select=E9,F63,F7,F82 --show-source --statistics + flake8 src/ --count --max-complexity=10 --max-line-length=100 --statistics + + - name: Summary + if: success() + run: | + echo "= All code quality checks passed!" + echo "- Black formatting: ✓" + echo "- Import sorting (isort): ✓" + echo "- Linting (flake8): ✓" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4c817c1..98abeb8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,70 +1,70 @@ -name: Tests & Coverage - -on: - push: - branches: [main, develop, "feature/**"] - paths: - - "backend/**/*.py" - - "backend/tests/**" - - ".github/workflows/test.yml" - pull_request: - branches: [main, develop] - paths: - - "backend/**/*.py" - - "backend/tests/**" - -jobs: - test: - name: Run Tests & Coverage - runs-on: ubuntu-latest - - env: - CLERK_SECRET_KEY: ${{ secrets.CLERK_SECRET_KEY }} - CLERK_PUBLISHABLE_KEY: ${{ secrets.CLERK_PUBLISHABLE_KEY }} - DATABASE_URL: ${{ secrets.DATABASE_URL }} - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: "3.11" - cache: "pip" - - - name: Install dependencies - run: | - cd backend - python -m pip install --upgrade pip - pip install -r requirements.txt - pip install pytest>=8.0.0 pytest-asyncio>=0.23.0 pytest-cov>=4.1.0 - - - name: Run tests with coverage - run: | - cd backend - pytest tests/ --cov=src --cov-report=html --cov-report=term-missing --cov-report=xml --cov-fail-under=75 -v - continue-on-error: false - - - name: Upload coverage report - if: always() - uses: actions/upload-artifact@v4 - with: - name: coverage-report - path: backend/htmlcov/ - retention-days: 30 - - - name: Upload coverage to Codecov (optional) - if: always() - uses: codecov/codecov-action@v4 - with: - file: backend/coverage.xml - flags: unittests - name: codecov-umbrella - fail_ci_if_error: false - - - name: Summary - if: success() - run: | - echo "All tests passed with >75% coverage!" - echo "Coverage report uploaded as artifact" +name: Tests & Coverage + +on: + push: + branches: [main, develop, "feature/**"] + paths: + - "backend/**/*.py" + - "backend/tests/**" + - ".github/workflows/test.yml" + pull_request: + branches: [main, develop] + paths: + - "backend/**/*.py" + - "backend/tests/**" + +jobs: + test: + name: Run Tests & Coverage + runs-on: ubuntu-latest + + env: + CLERK_SECRET_KEY: ${{ secrets.CLERK_SECRET_KEY }} + CLERK_PUBLISHABLE_KEY: ${{ secrets.CLERK_PUBLISHABLE_KEY }} + DATABASE_URL: ${{ secrets.DATABASE_URL }} + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: "pip" + + - name: Install dependencies + run: | + cd backend + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install pytest>=8.0.0 pytest-asyncio>=0.23.0 pytest-cov>=4.1.0 + + - name: Run tests with coverage + run: | + cd backend + pytest tests/ --cov=src --cov-report=html --cov-report=term-missing --cov-report=xml --cov-fail-under=75 -v + continue-on-error: false + + - name: Upload coverage report + if: always() + uses: actions/upload-artifact@v4 + with: + name: coverage-report + path: backend/htmlcov/ + retention-days: 30 + + - name: Upload coverage to Codecov (optional) + if: always() + uses: codecov/codecov-action@v4 + with: + file: backend/coverage.xml + flags: unittests + name: codecov-umbrella + fail_ci_if_error: false + + - name: Summary + if: success() + run: | + echo "All tests passed with >75% coverage!" + echo "Coverage report uploaded as artifact" diff --git a/.gitignore b/.gitignore index 9750545..35811b2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ -/test -/out -.DS_Store -node_modules -.env +/test +/out +.DS_Store +node_modules +.env diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 93e4886..79c0fe5 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,693 +1,693 @@ -# 🤝 Guía de Contribución - CodeGuard AI - -¡Gracias por tu interés en contribuir a **CodeGuard AI**! Este documento te guía a través del flujo de trabajo para colaborar efectivamente en el proyecto. - ---- - -## 📋 Tabla de Contenidos - -- [Código de Conducta](#-código-de-conducta) -- [Cómo Contribuir](#-cómo-contribuir) -- [Flujo de Trabajo GitFlow](#-flujo-de-trabajo-gitflow) -- [Convenciones de Commits](#-convenciones-de-commits) -- [Estándares de Calidad](#-estándares-de-calidad) -- [Proceso de Pull Request](#-proceso-de-pull-request) -- [Configuración del Entorno](#-configuración-del-entorno) -- [Testing](#-testing) -- [Reportar Bugs](#-reportar-bugs) -- [Sugerir Mejoras](#-sugerir-mejoras) - ---- - -## 📜 Código de Conducta - -Este proyecto y todos los participantes están regidos por nuestro **Código de Conducta**. Por favor: - -- ✅ Sé respetuoso y empático con otros contribuidores -- ✅ Acepta críticas constructivas con gracia -- ✅ Enfócate en lo que es mejor para la comunidad -- ✅ Muestra cortesía hacia diferentes puntos de vista - -**Para reportar comportamientos inaceptables**, contáctanos en: -📧 `codeguard-ai@unal.edu.co` - ---- - -## 🚀 Cómo Contribuir - -### Tipos de Contribuciones - -1. **🐛 Reportar Bugs**: Identifica y documenta errores -2. **✨ Implementar Features**: Desarrolla nuevas funcionalidades -3. **📝 Mejorar Documentación**: Actualiza o crea documentación -4. **🧪 Escribir Tests**: Aumenta la cobertura de pruebas -5. **🎨 Refactorizar Código**: Mejora la estructura sin cambiar funcionalidad -6. **⚡ Optimizar Rendimiento**: Mejora velocidad o uso de recursos - -### Antes de Empezar - -1. ✅ **Revisa el backlog**: Ve a [GitHub Issues](https://github.com/YOUR_ORG/CodeGuard-Unal/issues) -2. ✅ **Busca issue abierto**: Verifica que no esté duplicado -3. ✅ **Asigna el issue**: Comenta que deseas trabajar en él -4. ✅ **Lee la documentación**: Familiarízate con la arquitectura - ---- - -## 🔀 Flujo de Trabajo GitFlow - -CodeGuard AI utiliza **GitFlow** como estrategia de branching. Este modelo define ramas para diferentes propósitos. - -### Estructura de Ramas - -``` -main (producción) - └─ Etiquetas: v1.0.0, v1.1.0 - ↑ (merges desde release/* y hotfix/*) - -develop (integración) - └─ Rama principal de desarrollo - ↑ (merges desde feature/*, bugfix/*, hotfix/*) - -feature/* (features nuevas) - ├─ feature/CGAI-12-base-agent - ├─ feature/CGAI-19-security-agent - └─ feature/CGAI-20-fastapi-endpoint - -bugfix/* (bugs en develop) - └─ bugfix/CGAI-99-fix-orchestrator-timeout - -hotfix/* (bugs críticos en main) - └─ hotfix/CGAI-98-security-patch - -release/* (preparación de releases - Sprint 2+) - └─ release/v1.1.0 -``` - -### Crear Feature Branch - -```bash -# 1. Asegúrate que develop esté actualizado -git checkout develop -git pull origin develop - -# 2. Crear feature branch (formato: feature/CGAI-XX-descripcion-corta) -git checkout -b feature/CGAI-19-security-agent - -# 3. Hacer cambios y commits -# ... trabajar en el código ... - -# 4. Mantener actualizado con develop -git fetch origin -git rebase origin/develop - -# 5. Push -git push -u origin feature/CGAI-19-security-agent - -# 6. Crear PR en GitHub -``` - -### Crear Bugfix Branch (bugs en develop) - -```bash -git checkout develop -git pull origin develop -git checkout -b bugfix/CGAI-99-fix-description -# ... hacer cambios ... -git push -u origin bugfix/CGAI-99-fix-description -``` - -### Crear Hotfix Branch (bugs críticos en main) - -```bash -# Los hotfix se ramifican desde main -git checkout main -git pull origin main -git checkout -b hotfix/CGAI-98-critical-fix - -# Hacer fix y commit -git commit -m "fix(agents): patch critical vulnerability - -[descripción del fix]" - -# Merge a main -git checkout main -git merge --no-ff hotfix/CGAI-98-critical-fix -git push origin main - -# Merge también a develop -git checkout develop -git merge --no-ff hotfix/CGAI-98-critical-fix -git push origin develop -``` - -### Release Branch (Sprint 2+) - -```bash -# Para preparar una versión -git checkout develop -git checkout -b release/v1.1.0 - -# En release solo se corrigen bugs, no se agregan features -git commit -m "bump version to 1.1.0" - -# Merge a main con tag -git checkout main -git merge --no-ff release/v1.1.0 -git tag -a v1.1.0 -m "Release version 1.1.0" -git push origin main --tags -``` - ---- - -## 📝 Convenciones de Commits - -CodeGuard AI sigue **Conventional Commits** para mantener un historial limpio y automatizable. - -### Formato - -``` -(): - -[cuerpo opcional] - -[footer(s) opcional(es)] -``` - -### Tipos de Commits - -| Tipo | Descripción | Ejemplo | -|------|-------------|---------| -| `feat` | Nueva funcionalidad | `feat(security): add hardcoded credentials detection` | -| `fix` | Corrección de bug | `fix(api): handle null pointer in analyze endpoint` | -| `docs` | Cambios en documentación | `docs(readme): update installation steps` | -| `style` | Formato (sin cambio lógico) | `style(code): format with black` | -| `refactor` | Refactorización sin cambiar funcionalidad | `refactor(agents): extract logging method` | -| `test` | Agregar o modificar tests | `test(security): add unit tests for eval detection` | -| `chore` | Mantenimiento, dependencias | `chore(deps): update pytest to 8.0` | -| `perf` | Mejora de rendimiento | `perf(analysis): optimize AST parsing` | -| `ci` | Cambios en CI/CD | `ci(github): add coverage reporting` | - -### Scopes Comunes - -``` -agents, security, quality, performance, style, orchestrator -api, schemas, routers, services, core, database -auth, cache, events, config, dependencies -docker, ci, tests, docs -``` - -### Ejemplos Correctos - -```bash -# Feature simple -git commit -m "feat(security): add SQL injection detection" - -# Bug fix -git commit -m "fix(api): return 422 for invalid filename" - -# Con cuerpo -git commit -m "feat(agents): implement quality metrics calculation - -- Add cyclomatic complexity calculation -- Add code duplication detection -- Add test coverage computation -- Related to CGAI-20" - -# Breaking change -git commit -m "feat(api)!: change analyze response format - -BREAKING CHANGE: response now uses 'analysis_id' instead of 'id'" - -# Multiple scopes -git commit -m "refactor(core,services): improve dependency injection - -- Simplify container initialization -- Add lazy loading for services -- Update documentation" -``` - -### ❌ Ejemplos Incorrectos - -```bash -# Falta tipo -git commit -m "add new feature" - -# Tipo incorrecto -git commit -m "Feature: add new agent" - -# Descripción muy vaga -git commit -m "fix: fixes bug" - -# Mayúscula al inicio -git commit -m "feat: Add new endpoint" - -# Punto al final -git commit -m "feat(security): add detection." - -# Demasiado largo (>72 caracteres) -git commit -m "feat(api): implement a very comprehensive analysis system for detecting all types of vulnerabilities" -``` - -### Reglas de Formato - -| Regla | Detalle | -|-------|---------| -| **Primera línea** | Máximo 72 caracteres | -| **Cuerpo** | Máximo 100 caracteres por línea | -| **Tipo** | En minúscula | -| **Scope** | En minúscula (opcional) | -| **Descripción** | Comienza en minúscula, modo imperativo | -| **Punto final** | Sin punto en la primera línea | - ---- - -## ✅ Estándares de Calidad - -### 1. Linting (Pylint ≥ 8.5/10) - -```bash -cd backend - -# Ejecutar pylint -pylint src/ --rcfile=.pylintrc - -# Verificar score -pylint src/ --rcfile=.pylintrc | grep -E "rated at" -``` - -**Configuración** (`.pylintrc`): -```ini -[MASTER] -max-line-length=100 -disable=C0111,C0103,R0903 - -[MESSAGES CONTROL] -disable=missing-docstring,too-few-public-methods -``` - -### 2. Testing (Coverage ≥ 75%) - -```bash -cd backend - -# Ejecutar tests con cobertura -pytest tests/ \ - --cov=src \ - --cov-report=term-missing \ - --cov-report=html \ - --cov-fail-under=75 \ - -v -``` - -### 3. Formateo (Black + isort) - -```bash -cd backend - -# Formatear código -black src/ tests/ --line-length=100 -isort src/ tests/ --profile=black - -# Verificar sin modificar -black --check src/ tests/ -isort --check-only src/ tests/ -``` - -### 4. Type Hints (Obligatorio para métodos públicos) - -```python -# ✅ Correcto -def analyze(self, context: AnalysisContext) -> List[Finding]: - """Analyze code and return findings.""" - pass - -# ❌ Incorrecto -def analyze(self, context): - return [] -``` - -### 5. Docstrings (Obligatorio para clases y métodos públicos) - -```python -# ✅ Correcto -class SecurityAgent(BaseAgent): - """ - Agent for detecting security vulnerabilities. - - Analyzes Python code for: - - Dangerous functions (eval, exec) - - SQL injection patterns - - Hardcoded credentials - """ - - def analyze(self, context: AnalysisContext) -> List[Finding]: - """ - Analyze code for security issues. - - Args: - context: Analysis context with code and metadata - - Returns: - List of security findings - """ - pass - -# ❌ Incorrecto -class SecurityAgent(BaseAgent): - def analyze(self, context): - pass -``` - ---- - -## 🔄 Proceso de Pull Request - -### Antes de Crear el PR - -```bash -cd backend - -# 1. Verificar linting -pylint src/ --rcfile=.pylintrc - -# 2. Ejecutar tests localmente -pytest tests/ --cov=src --cov-fail-under=75 - -# 3. Formatear código -black src/ tests/ --line-length=100 -isort src/ tests/ --profile=black - -# 4. Verificar commits -git log --oneline -5 -# Todos deben tener formato: tipo(scope): descripcion - -# 5. Rebase con develop (si es necesario) -git fetch origin -git rebase origin/develop -``` - -### Crear Pull Request - -1. **Push de la rama**: -```bash -git push -u origin feature/CGAI-19-security-agent -``` - -2. **Crear PR en GitHub**: - - Base: `develop` (o `main` para hotfixes) - - Compare: tu rama - -3. **Completar la plantilla del PR**: - -```markdown -## 📝 Descripción -Implementa detección de credenciales hardcodeadas en SecurityAgent para identificar contraseñas, API keys y tokens en código Python. - -## 🎯 Historia de Usuario Relacionada -Closes #19 (CGAI-19: SecurityAgent v1) - -## 🧪 Cómo se Probó -- [x] Tests unitarios agregados (15 nuevos tests) -- [x] Tests de integración con AnalysisService -- [x] Probado manualmente con código malicioso -- [x] Cobertura: 88% (cumple umbral 75%) - -## ✅ Checklist Previo al Merge -- [x] Mi código sigue las convenciones del proyecto -- [x] He agregado tests que prueban mis cambios -- [x] Todos los tests pasan localmente (`pytest`) -- [x] He actualizado la documentación relevante -- [x] Mis commits siguen Conventional Commits -- [x] He hecho rebase con develop -- [x] He ejecutado linting localmente -- [x] He verificado coverage >75% - -## 📸 Screenshots (si aplica) -N/A - -## 📚 Notas Adicionales -- Implementa detección con regex patterns -- Detecta placeholders (YOUR_, REPLACE_) para evitar falsos positivos -- Integrado con EventBus para notificaciones en tiempo real -- Compatible con Python 3.11+ -``` - -### Revisión de Código - -**Requisitos para merge**: -1. ✅ **CI Passing**: Los 3 workflows en verde - - `lint.yml`: Pylint ≥ 8.5/10 - - `test.yml`: Tests passing + coverage ≥ 75% - - `docker.yml`: Build exitoso - -2. ✅ **1+ Aprobación**: Al menos un reviewer - -3. ✅ **Conflicts Resolved**: Sin conflictos con base - -**Proceso**: -- Revisor deja comentarios en líneas específicas -- Autor responde y hace cambios -- Push de commits adicionales (NO force push) -- Revisor aprueba cuando cambios son satisfactorios - -### Merge del PR - -```bash -# Merge strategy: Squash (por defecto para features) -# Esto combina todos los commits en uno solo - -# Mensaje de merge sugerido: -feat(security): detect hardcoded credentials (#19) - -- Implement regex-based credential detection -- Add placeholders to avoid false positives -- Integrate with event system -- Add comprehensive unit tests (88% coverage) - -Closes CGAI-19 -``` - -**Después del merge**: -```bash -# Branch se elimina automáticamente en GitHub -# O manualmente: -git branch -d feature/CGAI-19-security-agent -git push origin --delete feature/CGAI-19-security-agent -``` - ---- - -## 🛠️ Configuración del Entorno - -### Requisitos Previos - -- Python 3.11+ -- Git -- Docker (opcional) -- VSCode o PyCharm - -### Instalación - -```bash -# 1. Fork y clonar -git clone https://github.com/YOUR_USERNAME/CodeGuard-Unal.git -cd CodeGuard-Unal/backend - -# 2. Agregar remote upstream -git remote add upstream https://github.com/YOUR_ORG/CodeGuard-Unal.git - -# 3. Entorno virtual -python3.11 -m venv venv -source venv/bin/activate # Windows: venv\Scripts\activate - -# 4. Instalar dependencias -pip install --upgrade pip -pip install -r requirements.txt -pip install -r requirements-dev.txt - -# 5. Pre-commit hooks -pip install pre-commit -pre-commit install - -# 6. Copiar .env -cp .env.example .env -``` - -### Pre-commit Hooks (Validación Automática) - -Los pre-commit hooks ejecutan validaciones automáticamente antes de cada commit. - -**Archivo**: `.pre-commit-config.yaml` - -```yaml -repos: - - repo: https://github.com/psf/black - rev: 23.9.1 - hooks: - - id: black - language_version: python3.11 - - - repo: https://github.com/PyCQA/isort - rev: 5.12.0 - hooks: - - id: isort - args: [--profile=black] - - - repo: https://github.com/PyCQA/pylint - rev: v3.0.0 - hooks: - - id: pylint - args: [--rcfile=.pylintrc, --fail-under=8.5] -``` - ---- - -## 🧪 Testing - -### Ejecutar Tests - -```bash -cd backend - -# Todos los tests -pytest tests/ -v - -# Solo tests unitarios -pytest tests/unit/ -v - -# Solo tests de integración -pytest tests/integration/ -v - -# Con cobertura detallada -pytest tests/ --cov=src --cov-report=term-missing - -# HTML report -pytest tests/ --cov=src --cov-report=html -open htmlcov/index.html -``` - -### Estructura de Tests - -``` -backend/tests/ -├── unit/ # Tests de componentes aislados -│ ├── test_base_agent.py -│ ├── test_security_agent.py -│ └── ... -├── integration/ # Tests de interacción entre componentes -│ ├── test_analysis_service.py -│ └── test_security_agent_integration.py -├── e2e/ # Tests end-to-end -│ └── test_complete_analysis.py -├── fixtures/ # Mock data -│ ├── mock_data.py -│ └── sample_code.py -└── conftest.py # Pytest fixtures -``` - -### Escribir Tests - -```python -import pytest -from src.agents.security_agent import SecurityAgent -from src.schemas.analysis import AnalysisContext - -class TestSecurityAgent: - """Test suite for SecurityAgent""" - - @pytest.fixture - def agent(self): - """Create agent instance""" - return SecurityAgent() - - def test_detect_eval(self, agent): - """Test detection of eval() function""" - code = "result = eval(user_input)" - context = AnalysisContext( - code_content=code, - filename="test.py" - ) - - findings = agent.analyze(context) - - assert len(findings) >= 1 - assert any(f.issue_type == "dangerous_function" for f in findings) -``` - ---- - -## 🐛 Reportar Bugs - -### Antes de Reportar - -1. Busca issues existentes duplicados -2. Reproduce el bug consistentemente -3. Recopila información: OS, Python version, logs - -### Template de Issue para Bugs - -```markdown -## 🐛 Descripción del Bug -Descripción clara y concisa del problema. - -## 🔄 Pasos para Reproducir -1. Cargar archivo con 'eval' -2. Llamar POST /api/v1/analyze -3. Observar que no se detecta eval - -## ✅ Comportamiento Esperado -El SecurityAgent debería detectar eval con severity=critical - -## ❌ Comportamiento Actual -El análisis retorna 0 findings - -## 📋 Contexto -- OS: Ubuntu 22.04 -- Python: 3.11.5 -- Branch: develop - -## 📝 Logs -\`\`\` -[ERROR] AST parsing failed for test.py -Traceback... -\`\`\` -``` - ---- - -## ✨ Sugerir Mejoras - -### Template de Feature Request - -```markdown -## ✨ Descripción -Agregar soporte para detección de SSRF (Server-Side Request Forgery) - -## 🎯 Problema que Resuelve -SSRF está en OWASP Top 10 y no está detectado actualmente - -## 💡 Solución Propuesta -- Detectar urllib/requests sin validación -- Identificar patrones como requests.get(user_input) -- Sugerir listas blancas de dominios - -## 🔄 Alternativas Consideradas -- Integrar Bandit con regla B310 -- Custom regex patterns -``` - ---- - -## 💬 Preguntas? - -- **Slack**: [#codeguard-dev](https://codeguard-unal.slack.com) -- **Email**: codeguard-ai@unal.edu.co -- **Office Hours**: Martes y Jueves 2-4 PM (COT) -- **Issues**: [GitHub Issues](https://github.com/YOUR_ORG/CodeGuard-Unal/issues) - ---- - -
-

Gracias por contribuir a CodeGuard AI ❤️

-

Juntos hacemos mejores desarrolladores y código más seguro

-
+# 🤝 Guía de Contribución - CodeGuard AI + +¡Gracias por tu interés en contribuir a **CodeGuard AI**! Este documento te guía a través del flujo de trabajo para colaborar efectivamente en el proyecto. + +--- + +## 📋 Tabla de Contenidos + +- [Código de Conducta](#-código-de-conducta) +- [Cómo Contribuir](#-cómo-contribuir) +- [Flujo de Trabajo GitFlow](#-flujo-de-trabajo-gitflow) +- [Convenciones de Commits](#-convenciones-de-commits) +- [Estándares de Calidad](#-estándares-de-calidad) +- [Proceso de Pull Request](#-proceso-de-pull-request) +- [Configuración del Entorno](#-configuración-del-entorno) +- [Testing](#-testing) +- [Reportar Bugs](#-reportar-bugs) +- [Sugerir Mejoras](#-sugerir-mejoras) + +--- + +## 📜 Código de Conducta + +Este proyecto y todos los participantes están regidos por nuestro **Código de Conducta**. Por favor: + +- ✅ Sé respetuoso y empático con otros contribuidores +- ✅ Acepta críticas constructivas con gracia +- ✅ Enfócate en lo que es mejor para la comunidad +- ✅ Muestra cortesía hacia diferentes puntos de vista + +**Para reportar comportamientos inaceptables**, contáctanos en: +📧 `codeguard-ai@unal.edu.co` + +--- + +## 🚀 Cómo Contribuir + +### Tipos de Contribuciones + +1. **🐛 Reportar Bugs**: Identifica y documenta errores +2. **✨ Implementar Features**: Desarrolla nuevas funcionalidades +3. **📝 Mejorar Documentación**: Actualiza o crea documentación +4. **🧪 Escribir Tests**: Aumenta la cobertura de pruebas +5. **🎨 Refactorizar Código**: Mejora la estructura sin cambiar funcionalidad +6. **⚡ Optimizar Rendimiento**: Mejora velocidad o uso de recursos + +### Antes de Empezar + +1. ✅ **Revisa el backlog**: Ve a [GitHub Issues](https://github.com/YOUR_ORG/CodeGuard-Unal/issues) +2. ✅ **Busca issue abierto**: Verifica que no esté duplicado +3. ✅ **Asigna el issue**: Comenta que deseas trabajar en él +4. ✅ **Lee la documentación**: Familiarízate con la arquitectura + +--- + +## 🔀 Flujo de Trabajo GitFlow + +CodeGuard AI utiliza **GitFlow** como estrategia de branching. Este modelo define ramas para diferentes propósitos. + +### Estructura de Ramas + +``` +main (producción) + └─ Etiquetas: v1.0.0, v1.1.0 + ↑ (merges desde release/* y hotfix/*) + +develop (integración) + └─ Rama principal de desarrollo + ↑ (merges desde feature/*, bugfix/*, hotfix/*) + +feature/* (features nuevas) + ├─ feature/CGAI-12-base-agent + ├─ feature/CGAI-19-security-agent + └─ feature/CGAI-20-fastapi-endpoint + +bugfix/* (bugs en develop) + └─ bugfix/CGAI-99-fix-orchestrator-timeout + +hotfix/* (bugs críticos en main) + └─ hotfix/CGAI-98-security-patch + +release/* (preparación de releases - Sprint 2+) + └─ release/v1.1.0 +``` + +### Crear Feature Branch + +```bash +# 1. Asegúrate que develop esté actualizado +git checkout develop +git pull origin develop + +# 2. Crear feature branch (formato: feature/CGAI-XX-descripcion-corta) +git checkout -b feature/CGAI-19-security-agent + +# 3. Hacer cambios y commits +# ... trabajar en el código ... + +# 4. Mantener actualizado con develop +git fetch origin +git rebase origin/develop + +# 5. Push +git push -u origin feature/CGAI-19-security-agent + +# 6. Crear PR en GitHub +``` + +### Crear Bugfix Branch (bugs en develop) + +```bash +git checkout develop +git pull origin develop +git checkout -b bugfix/CGAI-99-fix-description +# ... hacer cambios ... +git push -u origin bugfix/CGAI-99-fix-description +``` + +### Crear Hotfix Branch (bugs críticos en main) + +```bash +# Los hotfix se ramifican desde main +git checkout main +git pull origin main +git checkout -b hotfix/CGAI-98-critical-fix + +# Hacer fix y commit +git commit -m "fix(agents): patch critical vulnerability + +[descripción del fix]" + +# Merge a main +git checkout main +git merge --no-ff hotfix/CGAI-98-critical-fix +git push origin main + +# Merge también a develop +git checkout develop +git merge --no-ff hotfix/CGAI-98-critical-fix +git push origin develop +``` + +### Release Branch (Sprint 2+) + +```bash +# Para preparar una versión +git checkout develop +git checkout -b release/v1.1.0 + +# En release solo se corrigen bugs, no se agregan features +git commit -m "bump version to 1.1.0" + +# Merge a main con tag +git checkout main +git merge --no-ff release/v1.1.0 +git tag -a v1.1.0 -m "Release version 1.1.0" +git push origin main --tags +``` + +--- + +## 📝 Convenciones de Commits + +CodeGuard AI sigue **Conventional Commits** para mantener un historial limpio y automatizable. + +### Formato + +``` +(): + +[cuerpo opcional] + +[footer(s) opcional(es)] +``` + +### Tipos de Commits + +| Tipo | Descripción | Ejemplo | +|------|-------------|---------| +| `feat` | Nueva funcionalidad | `feat(security): add hardcoded credentials detection` | +| `fix` | Corrección de bug | `fix(api): handle null pointer in analyze endpoint` | +| `docs` | Cambios en documentación | `docs(readme): update installation steps` | +| `style` | Formato (sin cambio lógico) | `style(code): format with black` | +| `refactor` | Refactorización sin cambiar funcionalidad | `refactor(agents): extract logging method` | +| `test` | Agregar o modificar tests | `test(security): add unit tests for eval detection` | +| `chore` | Mantenimiento, dependencias | `chore(deps): update pytest to 8.0` | +| `perf` | Mejora de rendimiento | `perf(analysis): optimize AST parsing` | +| `ci` | Cambios en CI/CD | `ci(github): add coverage reporting` | + +### Scopes Comunes + +``` +agents, security, quality, performance, style, orchestrator +api, schemas, routers, services, core, database +auth, cache, events, config, dependencies +docker, ci, tests, docs +``` + +### Ejemplos Correctos + +```bash +# Feature simple +git commit -m "feat(security): add SQL injection detection" + +# Bug fix +git commit -m "fix(api): return 422 for invalid filename" + +# Con cuerpo +git commit -m "feat(agents): implement quality metrics calculation + +- Add cyclomatic complexity calculation +- Add code duplication detection +- Add test coverage computation +- Related to CGAI-20" + +# Breaking change +git commit -m "feat(api)!: change analyze response format + +BREAKING CHANGE: response now uses 'analysis_id' instead of 'id'" + +# Multiple scopes +git commit -m "refactor(core,services): improve dependency injection + +- Simplify container initialization +- Add lazy loading for services +- Update documentation" +``` + +### ❌ Ejemplos Incorrectos + +```bash +# Falta tipo +git commit -m "add new feature" + +# Tipo incorrecto +git commit -m "Feature: add new agent" + +# Descripción muy vaga +git commit -m "fix: fixes bug" + +# Mayúscula al inicio +git commit -m "feat: Add new endpoint" + +# Punto al final +git commit -m "feat(security): add detection." + +# Demasiado largo (>72 caracteres) +git commit -m "feat(api): implement a very comprehensive analysis system for detecting all types of vulnerabilities" +``` + +### Reglas de Formato + +| Regla | Detalle | +|-------|---------| +| **Primera línea** | Máximo 72 caracteres | +| **Cuerpo** | Máximo 100 caracteres por línea | +| **Tipo** | En minúscula | +| **Scope** | En minúscula (opcional) | +| **Descripción** | Comienza en minúscula, modo imperativo | +| **Punto final** | Sin punto en la primera línea | + +--- + +## ✅ Estándares de Calidad + +### 1. Linting (Pylint ≥ 8.5/10) + +```bash +cd backend + +# Ejecutar pylint +pylint src/ --rcfile=.pylintrc + +# Verificar score +pylint src/ --rcfile=.pylintrc | grep -E "rated at" +``` + +**Configuración** (`.pylintrc`): +```ini +[MASTER] +max-line-length=100 +disable=C0111,C0103,R0903 + +[MESSAGES CONTROL] +disable=missing-docstring,too-few-public-methods +``` + +### 2. Testing (Coverage ≥ 75%) + +```bash +cd backend + +# Ejecutar tests con cobertura +pytest tests/ \ + --cov=src \ + --cov-report=term-missing \ + --cov-report=html \ + --cov-fail-under=75 \ + -v +``` + +### 3. Formateo (Black + isort) + +```bash +cd backend + +# Formatear código +black src/ tests/ --line-length=100 +isort src/ tests/ --profile=black + +# Verificar sin modificar +black --check src/ tests/ +isort --check-only src/ tests/ +``` + +### 4. Type Hints (Obligatorio para métodos públicos) + +```python +# ✅ Correcto +def analyze(self, context: AnalysisContext) -> List[Finding]: + """Analyze code and return findings.""" + pass + +# ❌ Incorrecto +def analyze(self, context): + return [] +``` + +### 5. Docstrings (Obligatorio para clases y métodos públicos) + +```python +# ✅ Correcto +class SecurityAgent(BaseAgent): + """ + Agent for detecting security vulnerabilities. + + Analyzes Python code for: + - Dangerous functions (eval, exec) + - SQL injection patterns + - Hardcoded credentials + """ + + def analyze(self, context: AnalysisContext) -> List[Finding]: + """ + Analyze code for security issues. + + Args: + context: Analysis context with code and metadata + + Returns: + List of security findings + """ + pass + +# ❌ Incorrecto +class SecurityAgent(BaseAgent): + def analyze(self, context): + pass +``` + +--- + +## 🔄 Proceso de Pull Request + +### Antes de Crear el PR + +```bash +cd backend + +# 1. Verificar linting +pylint src/ --rcfile=.pylintrc + +# 2. Ejecutar tests localmente +pytest tests/ --cov=src --cov-fail-under=75 + +# 3. Formatear código +black src/ tests/ --line-length=100 +isort src/ tests/ --profile=black + +# 4. Verificar commits +git log --oneline -5 +# Todos deben tener formato: tipo(scope): descripcion + +# 5. Rebase con develop (si es necesario) +git fetch origin +git rebase origin/develop +``` + +### Crear Pull Request + +1. **Push de la rama**: +```bash +git push -u origin feature/CGAI-19-security-agent +``` + +2. **Crear PR en GitHub**: + - Base: `develop` (o `main` para hotfixes) + - Compare: tu rama + +3. **Completar la plantilla del PR**: + +```markdown +## 📝 Descripción +Implementa detección de credenciales hardcodeadas en SecurityAgent para identificar contraseñas, API keys y tokens en código Python. + +## 🎯 Historia de Usuario Relacionada +Closes #19 (CGAI-19: SecurityAgent v1) + +## 🧪 Cómo se Probó +- [x] Tests unitarios agregados (15 nuevos tests) +- [x] Tests de integración con AnalysisService +- [x] Probado manualmente con código malicioso +- [x] Cobertura: 88% (cumple umbral 75%) + +## ✅ Checklist Previo al Merge +- [x] Mi código sigue las convenciones del proyecto +- [x] He agregado tests que prueban mis cambios +- [x] Todos los tests pasan localmente (`pytest`) +- [x] He actualizado la documentación relevante +- [x] Mis commits siguen Conventional Commits +- [x] He hecho rebase con develop +- [x] He ejecutado linting localmente +- [x] He verificado coverage >75% + +## 📸 Screenshots (si aplica) +N/A + +## 📚 Notas Adicionales +- Implementa detección con regex patterns +- Detecta placeholders (YOUR_, REPLACE_) para evitar falsos positivos +- Integrado con EventBus para notificaciones en tiempo real +- Compatible con Python 3.11+ +``` + +### Revisión de Código + +**Requisitos para merge**: +1. ✅ **CI Passing**: Los 3 workflows en verde + - `lint.yml`: Pylint ≥ 8.5/10 + - `test.yml`: Tests passing + coverage ≥ 75% + - `docker.yml`: Build exitoso + +2. ✅ **1+ Aprobación**: Al menos un reviewer + +3. ✅ **Conflicts Resolved**: Sin conflictos con base + +**Proceso**: +- Revisor deja comentarios en líneas específicas +- Autor responde y hace cambios +- Push de commits adicionales (NO force push) +- Revisor aprueba cuando cambios son satisfactorios + +### Merge del PR + +```bash +# Merge strategy: Squash (por defecto para features) +# Esto combina todos los commits en uno solo + +# Mensaje de merge sugerido: +feat(security): detect hardcoded credentials (#19) + +- Implement regex-based credential detection +- Add placeholders to avoid false positives +- Integrate with event system +- Add comprehensive unit tests (88% coverage) + +Closes CGAI-19 +``` + +**Después del merge**: +```bash +# Branch se elimina automáticamente en GitHub +# O manualmente: +git branch -d feature/CGAI-19-security-agent +git push origin --delete feature/CGAI-19-security-agent +``` + +--- + +## 🛠️ Configuración del Entorno + +### Requisitos Previos + +- Python 3.11+ +- Git +- Docker (opcional) +- VSCode o PyCharm + +### Instalación + +```bash +# 1. Fork y clonar +git clone https://github.com/YOUR_USERNAME/CodeGuard-Unal.git +cd CodeGuard-Unal/backend + +# 2. Agregar remote upstream +git remote add upstream https://github.com/YOUR_ORG/CodeGuard-Unal.git + +# 3. Entorno virtual +python3.11 -m venv venv +source venv/bin/activate # Windows: venv\Scripts\activate + +# 4. Instalar dependencias +pip install --upgrade pip +pip install -r requirements.txt +pip install -r requirements-dev.txt + +# 5. Pre-commit hooks +pip install pre-commit +pre-commit install + +# 6. Copiar .env +cp .env.example .env +``` + +### Pre-commit Hooks (Validación Automática) + +Los pre-commit hooks ejecutan validaciones automáticamente antes de cada commit. + +**Archivo**: `.pre-commit-config.yaml` + +```yaml +repos: + - repo: https://github.com/psf/black + rev: 23.9.1 + hooks: + - id: black + language_version: python3.11 + + - repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort + args: [--profile=black] + + - repo: https://github.com/PyCQA/pylint + rev: v3.0.0 + hooks: + - id: pylint + args: [--rcfile=.pylintrc, --fail-under=8.5] +``` + +--- + +## 🧪 Testing + +### Ejecutar Tests + +```bash +cd backend + +# Todos los tests +pytest tests/ -v + +# Solo tests unitarios +pytest tests/unit/ -v + +# Solo tests de integración +pytest tests/integration/ -v + +# Con cobertura detallada +pytest tests/ --cov=src --cov-report=term-missing + +# HTML report +pytest tests/ --cov=src --cov-report=html +open htmlcov/index.html +``` + +### Estructura de Tests + +``` +backend/tests/ +├── unit/ # Tests de componentes aislados +│ ├── test_base_agent.py +│ ├── test_security_agent.py +│ └── ... +├── integration/ # Tests de interacción entre componentes +│ ├── test_analysis_service.py +│ └── test_security_agent_integration.py +├── e2e/ # Tests end-to-end +│ └── test_complete_analysis.py +├── fixtures/ # Mock data +│ ├── mock_data.py +│ └── sample_code.py +└── conftest.py # Pytest fixtures +``` + +### Escribir Tests + +```python +import pytest +from src.agents.security_agent import SecurityAgent +from src.schemas.analysis import AnalysisContext + +class TestSecurityAgent: + """Test suite for SecurityAgent""" + + @pytest.fixture + def agent(self): + """Create agent instance""" + return SecurityAgent() + + def test_detect_eval(self, agent): + """Test detection of eval() function""" + code = "result = eval(user_input)" + context = AnalysisContext( + code_content=code, + filename="test.py" + ) + + findings = agent.analyze(context) + + assert len(findings) >= 1 + assert any(f.issue_type == "dangerous_function" for f in findings) +``` + +--- + +## 🐛 Reportar Bugs + +### Antes de Reportar + +1. Busca issues existentes duplicados +2. Reproduce el bug consistentemente +3. Recopila información: OS, Python version, logs + +### Template de Issue para Bugs + +```markdown +## 🐛 Descripción del Bug +Descripción clara y concisa del problema. + +## 🔄 Pasos para Reproducir +1. Cargar archivo con 'eval' +2. Llamar POST /api/v1/analyze +3. Observar que no se detecta eval + +## ✅ Comportamiento Esperado +El SecurityAgent debería detectar eval con severity=critical + +## ❌ Comportamiento Actual +El análisis retorna 0 findings + +## 📋 Contexto +- OS: Ubuntu 22.04 +- Python: 3.11.5 +- Branch: develop + +## 📝 Logs +\`\`\` +[ERROR] AST parsing failed for test.py +Traceback... +\`\`\` +``` + +--- + +## ✨ Sugerir Mejoras + +### Template de Feature Request + +```markdown +## ✨ Descripción +Agregar soporte para detección de SSRF (Server-Side Request Forgery) + +## 🎯 Problema que Resuelve +SSRF está en OWASP Top 10 y no está detectado actualmente + +## 💡 Solución Propuesta +- Detectar urllib/requests sin validación +- Identificar patrones como requests.get(user_input) +- Sugerir listas blancas de dominios + +## 🔄 Alternativas Consideradas +- Integrar Bandit con regla B310 +- Custom regex patterns +``` + +--- + +## 💬 Preguntas? + +- **Slack**: [#codeguard-dev](https://codeguard-unal.slack.com) +- **Email**: codeguard-ai@unal.edu.co +- **Office Hours**: Martes y Jueves 2-4 PM (COT) +- **Issues**: [GitHub Issues](https://github.com/YOUR_ORG/CodeGuard-Unal/issues) + +--- + +
+

Gracias por contribuir a CodeGuard AI ❤️

+

Juntos hacemos mejores desarrolladores y código más seguro

+
diff --git a/backend/.dockerignore b/backend/.dockerignore index 4b432d9..c329db3 100644 --- a/backend/.dockerignore +++ b/backend/.dockerignore @@ -1,55 +1,55 @@ -# Python -__pycache__/ -*.py[cod] -*$py.class -*.so -.Python -env/ -venv/ -.venv/ -ENV/ - -# Testing -.pytest_cache/ -.coverage -htmlcov/ -.tox/ - -# IDE -.vscode/ -.idea/ -*.swp -*.swo - -# OS -.DS_Store -Thumbs.db - -# Environment -.env -.env.local - -# Logs -*.log -logs/ - -# Build -build/ -dist/ -*.egg-info/ - -# Git -.git/ -.gitignore - -# Documentation -docs/ -*.md -!README.md - -# Tests -tests/ - -# Alembic migrations (include only in production builds) -alembic/versions/*.py -!alembic/versions/__init__.py +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +env/ +venv/ +.venv/ +ENV/ + +# Testing +.pytest_cache/ +.coverage +htmlcov/ +.tox/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo + +# OS +.DS_Store +Thumbs.db + +# Environment +.env +.env.local + +# Logs +*.log +logs/ + +# Build +build/ +dist/ +*.egg-info/ + +# Git +.git/ +.gitignore + +# Documentation +docs/ +*.md +!README.md + +# Tests +tests/ + +# Alembic migrations (include only in production builds) +alembic/versions/*.py +!alembic/versions/__init__.py diff --git a/backend/.env.example b/backend/.env.example index ea2c467..0a47ed8 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -1,50 +1,50 @@ -# ========================================== -# CodeGuard AI - Environment Variables -# Copy to .env and fill with your values -# ========================================== - -# Application -APP_NAME=CodeGuard AI -APP_VERSION=1.0.0 -DEBUG=True -ENVIRONMENT=development - -# API -API_HOST=0.0.0.0 -API_PORT=8000 - -# Database (Supabase PostgreSQL) -DATABASE_URL=postgresql://codeguard:devpassword@localhost:5432/codeguard_dev -SUPABASE_URL=https://your-project.supabase.co -SUPABASE_KEY=your-supabase-anon-key - -# Redis Cache -REDIS_URL=redis://localhost:6379/0 -REDIS_PASSWORD=devpassword -REDIS_TTL=86400 - -# Authentication (Clerk) -CLERK_SECRET_KEY=sk_test_... -CLERK_PUBLISHABLE_KEY=pk_test_... - -# AI Services (Sprint 3 - Optional for Sprint 1) -GOOGLE_AI_API_KEY=AIzaSy... -GOOGLE_CLOUD_PROJECT=your-gcp-project -VERTEX_AI_LOCATION=us-central1 -AI_MODEL=gemini-1.5-flash -AI_RATE_LIMIT=100 - -# MCP Servers (Sprint 3) -MCP_OWASP_SERVER_PATH=/path/to/owasp-mcp -MCP_CVE_SERVER_PATH=/path/to/cve-mcp -MCP_CUSTOM_SERVER_PATH=/path/to/codeguard-kb-mcp - -# Encryption -AES_ENCRYPTION_KEY=your-32-byte-key-here - -# CORS -ALLOWED_ORIGINS=http://localhost:3000,http://localhost:5173 - -# Logging -LOG_LEVEL=INFO -LOG_FORMAT=json +# ========================================== +# CodeGuard AI - Environment Variables +# Copy to .env and fill with your values +# ========================================== + +# Application +APP_NAME=CodeGuard AI +APP_VERSION=1.0.0 +DEBUG=True +ENVIRONMENT=development + +# API +API_HOST=0.0.0.0 +API_PORT=8000 + +# Database (Supabase PostgreSQL) +DATABASE_URL=postgresql://codeguard:devpassword@localhost:5432/codeguard_dev +SUPABASE_URL=https://your-project.supabase.co +SUPABASE_KEY=your-supabase-anon-key + +# Redis Cache +REDIS_URL=redis://localhost:6379/0 +REDIS_PASSWORD=devpassword +REDIS_TTL=86400 + +# Authentication (Clerk) +CLERK_SECRET_KEY=sk_test_... +CLERK_PUBLISHABLE_KEY=pk_test_... + +# AI Services (Sprint 3 - Optional for Sprint 1) +GOOGLE_AI_API_KEY=AIzaSy... +GOOGLE_CLOUD_PROJECT=your-gcp-project +VERTEX_AI_LOCATION=us-central1 +AI_MODEL=gemini-1.5-flash +AI_RATE_LIMIT=100 + +# MCP Servers (Sprint 3) +MCP_OWASP_SERVER_PATH=/path/to/owasp-mcp +MCP_CVE_SERVER_PATH=/path/to/cve-mcp +MCP_CUSTOM_SERVER_PATH=/path/to/codeguard-kb-mcp + +# Encryption +AES_ENCRYPTION_KEY=your-32-byte-key-here + +# CORS +ALLOWED_ORIGINS=http://localhost:3000,http://localhost:5173 + +# Logging +LOG_LEVEL=INFO +LOG_FORMAT=json diff --git a/backend/.flake8 b/backend/.flake8 index cb0e37e..66be25f 100644 --- a/backend/.flake8 +++ b/backend/.flake8 @@ -1,4 +1,4 @@ -[flake8] -max-line-length = 100 -extend-ignore = E203 -exclude = .git,__pycache__,docs/source/conf.py,old,build,dist,.venv,venv,alembic/versions +[flake8] +max-line-length = 100 +extend-ignore = E203 +exclude = .git,__pycache__,docs/source/conf.py,old,build,dist,.venv,venv,alembic/versions diff --git a/backend/Dockerfile b/backend/Dockerfile index 723d1aa..be537c3 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -1,49 +1,49 @@ -# ========================================== -# CodeGuard AI - Backend Dockerfile -# Python 3.11 + FastAPI -# ========================================== - -FROM python:3.11-slim - -# Metadata -LABEL maintainer="CodeGuard AI Team " -LABEL description="Multi-Agent Code Review System - Backend API" - -# Environment variables -ENV PYTHONUNBUFFERED=1 \ - PYTHONDONTWRITEBYTECODE=1 \ - PIP_NO_CACHE_DIR=1 \ - PIP_DISABLE_PIP_VERSION_CHECK=1 - -# Set working directory -WORKDIR /app - -# Install system dependencies -RUN apt-get update && apt-get install -y --no-install-recommends \ - gcc \ - postgresql-client \ - curl \ - && rm -rf /var/lib/apt/lists/* - -# Copy requirements first (for layer caching) -COPY requirements.txt . - -# Install Python dependencies -RUN pip install --no-cache-dir -r requirements.txt - -# Copy application code -COPY . . - -# Create non-root user -RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app -USER appuser - -# Expose port -EXPOSE 8000 - -# Health check -HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ - CMD curl -f http://localhost:8000/health || exit 1 - -# Run application -CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8000"] +# ========================================== +# CodeGuard AI - Backend Dockerfile +# Python 3.11 + FastAPI +# ========================================== + +FROM python:3.11-slim + +# Metadata +LABEL maintainer="CodeGuard AI Team " +LABEL description="Multi-Agent Code Review System - Backend API" + +# Environment variables +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + PIP_NO_CACHE_DIR=1 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 + +# Set working directory +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + postgresql-client \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements first (for layer caching) +COPY requirements.txt . + +# Install Python dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY . . + +# Create non-root user +RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app +USER appuser + +# Expose port +EXPOSE 8000 + +# Health check +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD curl -f http://localhost:8000/health || exit 1 + +# Run application +CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/backend/alembic/env.py b/backend/alembic/env.py index e9ee08c..12dda3d 100644 --- a/backend/alembic/env.py +++ b/backend/alembic/env.py @@ -1,120 +1,120 @@ -""" -Alembic Environment Configuration for CodeGuard AI. - -Este archivo configura Alembic para detectar todos los modelos ORM -y generar migraciones automáticamente contra Supabase PostgreSQL. -""" - -import os -import sys -from logging.config import fileConfig - -from sqlalchemy import engine_from_config, pool - -from alembic import context - -# ------------------------------------------------------------------------ -# 1. Configuración de Rutas (Path) -# ------------------------------------------------------------------------ -# Agregamos el directorio padre (backend/) al path de Python -# para que Alembic pueda encontrar la carpeta 'src' -sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) - -# ------------------------------------------------------------------------ -# 2. Importación de Modelos y Configuración -# ------------------------------------------------------------------------ -# Importamos la Base declarativa y TODOS los modelos -# IMPORTANTE: Cada modelo debe ser importado para que Alembic lo detecte -from src.models import ( - Base, - UserEntity, - CodeReviewEntity, - AgentFindingEntity, - ReviewStatus, - SeverityEnum, - UserRole, -) - -# Configuración de la base de datos -# Intentamos cargar desde settings, con fallback a variables de entorno -try: - from src.core.config.settings import settings - db_url = settings.DATABASE_URL -except ImportError: - from dotenv import load_dotenv - load_dotenv() - db_url = os.getenv("DATABASE_URL") - -if not db_url: - raise ValueError( - "DATABASE_URL no está configurada. " - "Configúrala en .env o en src/core/config/settings.py" - ) - -# ------------------------------------------------------------------------ -# 3. Configuración de Alembic -# ------------------------------------------------------------------------ -config = context.config - -# Interpretar el archivo de configuración para el logging -if config.config_file_name is not None: - fileConfig(config.config_file_name) - -# Asignar la metadata de los modelos para que Alembic pueda "ver" las tablas -target_metadata = Base.metadata - - -def run_migrations_offline() -> None: - """ - Ejecuta migraciones en modo 'offline'. - - Configura el contexto con solo una URL, sin crear un Engine. - Útil para generar scripts SQL sin conexión a la BD. - """ - context.configure( - url=db_url, - target_metadata=target_metadata, - literal_binds=True, - dialect_opts={"paramstyle": "named"}, - # Comparar tipos para detectar cambios en columnas - compare_type=True, - ) - - with context.begin_transaction(): - context.run_migrations() - - -def run_migrations_online() -> None: - """ - Ejecuta migraciones en modo 'online'. - - Crea un Engine y se conecta a la base de datos Supabase. - """ - # Obtenemos la configuración de alembic.ini - configuration = config.get_section(config.config_ini_section) - - # Inyectar la URL de la base de datos desde el entorno - configuration["sqlalchemy.url"] = db_url - - connectable = engine_from_config( - configuration, - prefix="sqlalchemy.", - poolclass=pool.NullPool, - ) - - with connectable.connect() as connection: - context.configure( - connection=connection, - target_metadata=target_metadata, - # Comparar tipos para detectar cambios en columnas - compare_type=True, - ) - - with context.begin_transaction(): - context.run_migrations() - - -if context.is_offline_mode(): - run_migrations_offline() -else: +""" +Alembic Environment Configuration for CodeGuard AI. + +Este archivo configura Alembic para detectar todos los modelos ORM +y generar migraciones automáticamente contra Supabase PostgreSQL. +""" + +import os +import sys +from logging.config import fileConfig + +from sqlalchemy import engine_from_config, pool + +from alembic import context + +# ------------------------------------------------------------------------ +# 1. Configuración de Rutas (Path) +# ------------------------------------------------------------------------ +# Agregamos el directorio padre (backend/) al path de Python +# para que Alembic pueda encontrar la carpeta 'src' +sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) + +# ------------------------------------------------------------------------ +# 2. Importación de Modelos y Configuración +# ------------------------------------------------------------------------ +# Importamos la Base declarativa y TODOS los modelos +# IMPORTANTE: Cada modelo debe ser importado para que Alembic lo detecte +from src.models import ( + Base, + UserEntity, + CodeReviewEntity, + AgentFindingEntity, + ReviewStatus, + SeverityEnum, + UserRole, +) + +# Configuración de la base de datos +# Intentamos cargar desde settings, con fallback a variables de entorno +try: + from src.core.config.settings import settings + db_url = settings.DATABASE_URL +except ImportError: + from dotenv import load_dotenv + load_dotenv() + db_url = os.getenv("DATABASE_URL") + +if not db_url: + raise ValueError( + "DATABASE_URL no está configurada. " + "Configúrala en .env o en src/core/config/settings.py" + ) + +# ------------------------------------------------------------------------ +# 3. Configuración de Alembic +# ------------------------------------------------------------------------ +config = context.config + +# Interpretar el archivo de configuración para el logging +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# Asignar la metadata de los modelos para que Alembic pueda "ver" las tablas +target_metadata = Base.metadata + + +def run_migrations_offline() -> None: + """ + Ejecuta migraciones en modo 'offline'. + + Configura el contexto con solo una URL, sin crear un Engine. + Útil para generar scripts SQL sin conexión a la BD. + """ + context.configure( + url=db_url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + # Comparar tipos para detectar cambios en columnas + compare_type=True, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """ + Ejecuta migraciones en modo 'online'. + + Crea un Engine y se conecta a la base de datos Supabase. + """ + # Obtenemos la configuración de alembic.ini + configuration = config.get_section(config.config_ini_section) + + # Inyectar la URL de la base de datos desde el entorno + configuration["sqlalchemy.url"] = db_url + + connectable = engine_from_config( + configuration, + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure( + connection=connection, + target_metadata=target_metadata, + # Comparar tipos para detectar cambios en columnas + compare_type=True, + ) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: run_migrations_online() \ No newline at end of file diff --git a/backend/alembic/script.py.mako b/backend/alembic/script.py.mako index 04d5198..ccb6e8e 100644 --- a/backend/alembic/script.py.mako +++ b/backend/alembic/script.py.mako @@ -1,33 +1,33 @@ -"""${message} - -Revision ID: ${up_revision} -Revises: ${down_revision | comma,n} -Create Date: ${create_date} - -""" -from typing import Sequence, Union - -from alembic import op -import sqlalchemy as sa -${imports if imports else ""} - -# revision identifiers, used by Alembic. -revision: str = ${repr(up_revision)} -down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)} -branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} -depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} - - -def upgrade() -> None: - """Upgrade schema.""" - ${upgrades if upgrades else "pass"} - - -def downgrade() -> None: - """Downgrade schema.""" - - - - - +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + """Upgrade schema.""" + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + """Downgrade schema.""" + + + + + ${downgrades if downgrades else "pass"} \ No newline at end of file diff --git a/backend/alembic/versions/ba48c1bb8e18_create_initial_tables_users_code_.py b/backend/alembic/versions/ba48c1bb8e18_create_initial_tables_users_code_.py index 00d9d72..7fb8e2f 100644 --- a/backend/alembic/versions/ba48c1bb8e18_create_initial_tables_users_code_.py +++ b/backend/alembic/versions/ba48c1bb8e18_create_initial_tables_users_code_.py @@ -1,97 +1,97 @@ -"""create_initial_tables_users_code_reviews_findings - -Revision ID: ba48c1bb8e18 -Revises: -Create Date: 2025-11-27 02:43:54.598631 - -""" -from typing import Sequence, Union - -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision: str = 'ba48c1bb8e18' -down_revision: Union[str, Sequence[str], None] = None -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - """Upgrade schema.""" - # ### commands auto generated by Alembic - please adjust! ### - op.create_table('users', - sa.Column('id', sa.String(length=255), nullable=False), - sa.Column('email', sa.String(length=255), nullable=False), - sa.Column('name', sa.String(length=255), nullable=True), - sa.Column('avatar_url', sa.String(length=500), nullable=True), - sa.Column('role', sa.Enum('DEVELOPER', 'ADMIN', name='userrole'), nullable=False), - sa.Column('daily_analysis_count', sa.Integer(), nullable=False), - sa.Column('last_analysis_date', sa.Date(), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=False), - sa.Column('updated_at', sa.DateTime(), nullable=False), - sa.PrimaryKeyConstraint('id') - ) - op.create_index(op.f('ix_users_email'), 'users', ['email'], unique=True) - op.create_index(op.f('ix_users_role'), 'users', ['role'], unique=False) - op.create_table('code_reviews', - sa.Column('id', sa.UUID(), nullable=False), - sa.Column('user_id', sa.String(length=255), nullable=False), - sa.Column('filename', sa.String(length=500), nullable=False), - sa.Column('code_content', sa.LargeBinary(), nullable=False), - sa.Column('quality_score', sa.Integer(), nullable=True), - sa.Column('status', sa.Enum('PENDING', 'PROCESSING', 'COMPLETED', 'FAILED', name='reviewstatus'), nullable=False), - sa.Column('total_findings', sa.Integer(), nullable=True), - sa.Column('error_message', sa.Text(), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.Column('completed_at', sa.DateTime(), nullable=True), - sa.ForeignKeyConstraint(['user_id'], ['users.id'], ondelete='CASCADE'), - sa.PrimaryKeyConstraint('id') - ) - op.create_index(op.f('ix_code_reviews_created_at'), 'code_reviews', ['created_at'], unique=False) - op.create_index(op.f('ix_code_reviews_status'), 'code_reviews', ['status'], unique=False) - op.create_index(op.f('ix_code_reviews_user_id'), 'code_reviews', ['user_id'], unique=False) - op.create_table('agent_findings', - sa.Column('id', sa.UUID(), nullable=False), - sa.Column('review_id', sa.UUID(), nullable=False), - sa.Column('agent_type', sa.String(length=100), nullable=False), - sa.Column('severity', sa.Enum('CRITICAL', 'HIGH', 'MEDIUM', 'LOW', name='severityenum'), nullable=False), - sa.Column('issue_type', sa.String(length=200), nullable=False), - sa.Column('line_number', sa.Integer(), nullable=False), - sa.Column('code_snippet', sa.Text(), nullable=True), - sa.Column('message', sa.Text(), nullable=False), - sa.Column('suggestion', sa.Text(), nullable=True), - sa.Column('metrics', postgresql.JSONB(astext_type=sa.Text()), nullable=True), - sa.Column('ai_explanation', postgresql.JSONB(astext_type=sa.Text()), nullable=True), - sa.Column('mcp_references', postgresql.ARRAY(sa.Text()), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=False), - sa.ForeignKeyConstraint(['review_id'], ['code_reviews.id'], ondelete='CASCADE'), - sa.PrimaryKeyConstraint('id') - ) - op.create_index(op.f('ix_agent_findings_agent_type'), 'agent_findings', ['agent_type'], unique=False) - op.create_index(op.f('ix_agent_findings_review_id'), 'agent_findings', ['review_id'], unique=False) - op.create_index(op.f('ix_agent_findings_severity'), 'agent_findings', ['severity'], unique=False) - # ### end Alembic commands ### - - -def downgrade() -> None: - """Downgrade schema.""" - - - - - - # ### commands auto generated by Alembic - please adjust! ### - op.drop_index(op.f('ix_agent_findings_severity'), table_name='agent_findings') - op.drop_index(op.f('ix_agent_findings_review_id'), table_name='agent_findings') - op.drop_index(op.f('ix_agent_findings_agent_type'), table_name='agent_findings') - op.drop_table('agent_findings') - op.drop_index(op.f('ix_code_reviews_user_id'), table_name='code_reviews') - op.drop_index(op.f('ix_code_reviews_status'), table_name='code_reviews') - op.drop_index(op.f('ix_code_reviews_created_at'), table_name='code_reviews') - op.drop_table('code_reviews') - op.drop_index(op.f('ix_users_role'), table_name='users') - op.drop_index(op.f('ix_users_email'), table_name='users') - op.drop_table('users') +"""create_initial_tables_users_code_reviews_findings + +Revision ID: ba48c1bb8e18 +Revises: +Create Date: 2025-11-27 02:43:54.598631 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = 'ba48c1bb8e18' +down_revision: Union[str, Sequence[str], None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('users', + sa.Column('id', sa.String(length=255), nullable=False), + sa.Column('email', sa.String(length=255), nullable=False), + sa.Column('name', sa.String(length=255), nullable=True), + sa.Column('avatar_url', sa.String(length=500), nullable=True), + sa.Column('role', sa.Enum('DEVELOPER', 'ADMIN', name='userrole'), nullable=False), + sa.Column('daily_analysis_count', sa.Integer(), nullable=False), + sa.Column('last_analysis_date', sa.Date(), nullable=True), + sa.Column('created_at', sa.DateTime(), nullable=False), + sa.Column('updated_at', sa.DateTime(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_users_email'), 'users', ['email'], unique=True) + op.create_index(op.f('ix_users_role'), 'users', ['role'], unique=False) + op.create_table('code_reviews', + sa.Column('id', sa.UUID(), nullable=False), + sa.Column('user_id', sa.String(length=255), nullable=False), + sa.Column('filename', sa.String(length=500), nullable=False), + sa.Column('code_content', sa.LargeBinary(), nullable=False), + sa.Column('quality_score', sa.Integer(), nullable=True), + sa.Column('status', sa.Enum('PENDING', 'PROCESSING', 'COMPLETED', 'FAILED', name='reviewstatus'), nullable=False), + sa.Column('total_findings', sa.Integer(), nullable=True), + sa.Column('error_message', sa.Text(), nullable=True), + sa.Column('created_at', sa.DateTime(), nullable=True), + sa.Column('completed_at', sa.DateTime(), nullable=True), + sa.ForeignKeyConstraint(['user_id'], ['users.id'], ondelete='CASCADE'), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_code_reviews_created_at'), 'code_reviews', ['created_at'], unique=False) + op.create_index(op.f('ix_code_reviews_status'), 'code_reviews', ['status'], unique=False) + op.create_index(op.f('ix_code_reviews_user_id'), 'code_reviews', ['user_id'], unique=False) + op.create_table('agent_findings', + sa.Column('id', sa.UUID(), nullable=False), + sa.Column('review_id', sa.UUID(), nullable=False), + sa.Column('agent_type', sa.String(length=100), nullable=False), + sa.Column('severity', sa.Enum('CRITICAL', 'HIGH', 'MEDIUM', 'LOW', name='severityenum'), nullable=False), + sa.Column('issue_type', sa.String(length=200), nullable=False), + sa.Column('line_number', sa.Integer(), nullable=False), + sa.Column('code_snippet', sa.Text(), nullable=True), + sa.Column('message', sa.Text(), nullable=False), + sa.Column('suggestion', sa.Text(), nullable=True), + sa.Column('metrics', postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column('ai_explanation', postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column('mcp_references', postgresql.ARRAY(sa.Text()), nullable=True), + sa.Column('created_at', sa.DateTime(), nullable=False), + sa.ForeignKeyConstraint(['review_id'], ['code_reviews.id'], ondelete='CASCADE'), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_agent_findings_agent_type'), 'agent_findings', ['agent_type'], unique=False) + op.create_index(op.f('ix_agent_findings_review_id'), 'agent_findings', ['review_id'], unique=False) + op.create_index(op.f('ix_agent_findings_severity'), 'agent_findings', ['severity'], unique=False) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + + + + + + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index(op.f('ix_agent_findings_severity'), table_name='agent_findings') + op.drop_index(op.f('ix_agent_findings_review_id'), table_name='agent_findings') + op.drop_index(op.f('ix_agent_findings_agent_type'), table_name='agent_findings') + op.drop_table('agent_findings') + op.drop_index(op.f('ix_code_reviews_user_id'), table_name='code_reviews') + op.drop_index(op.f('ix_code_reviews_status'), table_name='code_reviews') + op.drop_index(op.f('ix_code_reviews_created_at'), table_name='code_reviews') + op.drop_table('code_reviews') + op.drop_index(op.f('ix_users_role'), table_name='users') + op.drop_index(op.f('ix_users_email'), table_name='users') + op.drop_table('users') # ### end Alembic commands ### \ No newline at end of file diff --git a/backend/docker-compose.yml b/backend/docker-compose.yml index a53b1d5..861b2d2 100644 --- a/backend/docker-compose.yml +++ b/backend/docker-compose.yml @@ -1,100 +1,100 @@ -version: '3.9' - -services: - # ========================================== - # Backend API (FastAPI) - # ========================================== - backend: - build: - context: . - dockerfile: Dockerfile - container_name: codeguard-backend - ports: - - "8000:8000" - environment: - # Application - APP_NAME: "CodeGuard AI" - DEBUG: "True" - ENVIRONMENT: "development" - - # Database - DATABASE_URL: "postgresql://codeguard:devpassword@db:5432/codeguard_dev" - SUPABASE_URL: "${SUPABASE_URL:-https://your-project.supabase.co}" - SUPABASE_KEY: "${SUPABASE_KEY:-your-key-here}" - - # Redis - REDIS_URL: "redis://:devpassword@redis:6379/0" - - # Auth (Clerk) - CLERK_SECRET_KEY: "${CLERK_SECRET_KEY:-sk_test_placeholder}" - CLERK_PUBLISHABLE_KEY: "${CLERK_PUBLISHABLE_KEY:-pk_test_placeholder}" - - # AI (Sprint 3 - optional for now) - GOOGLE_AI_API_KEY: "${GOOGLE_AI_API_KEY:-}" - - # CORS - ALLOWED_ORIGINS: "http://localhost:3000,http://localhost:5173" - volumes: - - .:/app - depends_on: - db: - condition: service_healthy - redis: - condition: service_healthy - networks: - - codeguard-network - restart: unless-stopped - - # ========================================== - # PostgreSQL Database - # ========================================== - db: - image: postgres:15-alpine - container_name: codeguard-db - environment: - POSTGRES_USER: codeguard - POSTGRES_PASSWORD: devpassword - POSTGRES_DB: codeguard_dev - PGDATA: /var/lib/postgresql/data/pgdata - ports: - - "5433:5432" - volumes: - - postgres_data:/var/lib/postgresql/data - healthcheck: - test: ["CMD-SHELL", "pg_isready -U codeguard -d codeguard_dev"] - interval: 10s - timeout: 3s - retries: 5 - networks: - - codeguard-network - restart: unless-stopped - - # ========================================== - # Redis Cache - # ========================================== - redis: - image: redis:7-alpine - container_name: codeguard-redis - command: redis-server --appendonly yes --requirepass devpassword - ports: - - "6379:6379" - volumes: - - redis_data:/data - healthcheck: - test: ["CMD", "redis-cli", "-a", "devpassword", "ping"] - interval: 10s - timeout: 3s - retries: 5 - networks: - - codeguard-network - restart: unless-stopped - -volumes: - postgres_data: - driver: local - redis_data: - driver: local - -networks: - codeguard-network: - driver: bridge +version: '3.9' + +services: + # ========================================== + # Backend API (FastAPI) + # ========================================== + backend: + build: + context: . + dockerfile: Dockerfile + container_name: codeguard-backend + ports: + - "8000:8000" + environment: + # Application + APP_NAME: "CodeGuard AI" + DEBUG: "True" + ENVIRONMENT: "development" + + # Database + DATABASE_URL: "postgresql://codeguard:devpassword@db:5432/codeguard_dev" + SUPABASE_URL: "${SUPABASE_URL:-https://your-project.supabase.co}" + SUPABASE_KEY: "${SUPABASE_KEY:-your-key-here}" + + # Redis + REDIS_URL: "redis://:devpassword@redis:6379/0" + + # Auth (Clerk) + CLERK_SECRET_KEY: "${CLERK_SECRET_KEY:-sk_test_placeholder}" + CLERK_PUBLISHABLE_KEY: "${CLERK_PUBLISHABLE_KEY:-pk_test_placeholder}" + + # AI (Sprint 3 - optional for now) + GOOGLE_AI_API_KEY: "${GOOGLE_AI_API_KEY:-}" + + # CORS + ALLOWED_ORIGINS: "http://localhost:3000,http://localhost:5173" + volumes: + - .:/app + depends_on: + db: + condition: service_healthy + redis: + condition: service_healthy + networks: + - codeguard-network + restart: unless-stopped + + # ========================================== + # PostgreSQL Database + # ========================================== + db: + image: postgres:15-alpine + container_name: codeguard-db + environment: + POSTGRES_USER: codeguard + POSTGRES_PASSWORD: devpassword + POSTGRES_DB: codeguard_dev + PGDATA: /var/lib/postgresql/data/pgdata + ports: + - "5433:5432" + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U codeguard -d codeguard_dev"] + interval: 10s + timeout: 3s + retries: 5 + networks: + - codeguard-network + restart: unless-stopped + + # ========================================== + # Redis Cache + # ========================================== + redis: + image: redis:7-alpine + container_name: codeguard-redis + command: redis-server --appendonly yes --requirepass devpassword + ports: + - "6379:6379" + volumes: + - redis_data:/data + healthcheck: + test: ["CMD", "redis-cli", "-a", "devpassword", "ping"] + interval: 10s + timeout: 3s + retries: 5 + networks: + - codeguard-network + restart: unless-stopped + +volumes: + postgres_data: + driver: local + redis_data: + driver: local + +networks: + codeguard-network: + driver: bridge diff --git a/backend/fix_code_quality.sh b/backend/fix_code_quality.sh index 3b03e77..29d0377 100644 --- a/backend/fix_code_quality.sh +++ b/backend/fix_code_quality.sh @@ -1,28 +1,28 @@ -#!/bin/bash -set -e -set -o pipefail - -echo "🔧 Fixing code quality issues..." - -echo "📝 Running black..." -if ! black src/agents/ src/schemas/ tests/ --line-length=100; then - echo "❌ Black formatting failed" - exit 1 -fi - -echo "📦 Running isort..." -if ! isort src/agents/ src/schemas/ tests/ --profile=black; then - echo "❌ isort failed" - exit 1 -fi - -echo "🔍 Running pylint..." -if ! pylint src/agents/ src/schemas/ --fail-under=8.5; then - echo "❌ Pylint score below 8.5" - exit 1 -fi - -echo "✅ All quality checks passed!" -echo "" -echo "Now run tests:" +#!/bin/bash +set -e +set -o pipefail + +echo "🔧 Fixing code quality issues..." + +echo "📝 Running black..." +if ! black src/agents/ src/schemas/ tests/ --line-length=100; then + echo "❌ Black formatting failed" + exit 1 +fi + +echo "📦 Running isort..." +if ! isort src/agents/ src/schemas/ tests/ --profile=black; then + echo "❌ isort failed" + exit 1 +fi + +echo "🔍 Running pylint..." +if ! pylint src/agents/ src/schemas/ --fail-under=8.5; then + echo "❌ Pylint score below 8.5" + exit 1 +fi + +echo "✅ All quality checks passed!" +echo "" +echo "Now run tests:" echo "pytest tests/unit/ -v --cov=src --cov-report=term-missing" \ No newline at end of file diff --git a/backend/pyproject.toml b/backend/pyproject.toml index ab95532..2fdb758 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -1,37 +1,37 @@ -[tool.black] -line-length = 100 -include = '\.pyi?$' -extend-exclude = ''' -/( - # directories - \.eggs - | \.git - | \.hg - | \.mypy_cache - | \.tox - | \.venv - | _build - | buck-out - | build - | dist - | alembic/versions -)/ -''' - -[tool.isort] -profile = "black" -line_length = 100 -multi_line_output = 3 -include_trailing_comma = true -force_grid_wrap = 0 -use_parentheses = true -ensure_newline_before_comments = true -skip_gitignore = true -skip = ["alembic/versions"] - -[tool.pytest.ini_options] -testpaths = ["tests"] -python_files = "test_*.py" -python_classes = "Test*" -python_functions = "test_*" -addopts = "-v --cov=src --cov-report=html --cov-report=term-missing --cov-fail-under=75" +[tool.black] +line-length = 100 +include = '\.pyi?$' +extend-exclude = ''' +/( + # directories + \.eggs + | \.git + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | _build + | buck-out + | build + | dist + | alembic/versions +)/ +''' + +[tool.isort] +profile = "black" +line_length = 100 +multi_line_output = 3 +include_trailing_comma = true +force_grid_wrap = 0 +use_parentheses = true +ensure_newline_before_comments = true +skip_gitignore = true +skip = ["alembic/versions"] + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = "test_*.py" +python_classes = "Test*" +python_functions = "test_*" +addopts = "-v --cov=src --cov-report=html --cov-report=term-missing --cov-fail-under=75" diff --git a/backend/requirements-dev.txt b/backend/requirements-dev.txt index f9ffaa4..5f32f53 100644 --- a/backend/requirements-dev.txt +++ b/backend/requirements-dev.txt @@ -1,16 +1,16 @@ -# ========================================== -# CodeGuard AI - Development Dependencies -# ========================================== - -# ===== DEVELOPMENT TOOLS ===== -black>=24.0.0 -isort>=5.13.0 -mypy>=1.8.0 - -# ===== TESTING (Sprint 1 Core) ===== -pytest>=8.0.0 -pytest-asyncio>=0.23.0 -pytest-cov>=4.1.0 -pytest-mock>=3.12.0 -faker>=22.0.0 # Para datos de prueba -httpx>=0.27.0 # Para TestClient +# ========================================== +# CodeGuard AI - Development Dependencies +# ========================================== + +# ===== DEVELOPMENT TOOLS ===== +black>=24.0.0 +isort>=5.13.0 +mypy>=1.8.0 + +# ===== TESTING (Sprint 1 Core) ===== +pytest>=8.0.0 +pytest-asyncio>=0.23.0 +pytest-cov>=4.1.0 +pytest-mock>=3.12.0 +faker>=22.0.0 # Para datos de prueba +httpx>=0.27.0 # Para TestClient diff --git a/backend/requirements.txt b/backend/requirements.txt index e33967f..d62321a 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,61 +1,61 @@ -# ========================================== -# CodeGuard AI - Backend Dependencies -# ========================================== - -# ===== WEB FRAMEWORK ===== -fastapi>=0.109.0 # Última versión compatible con Pydantic 2.8+ -uvicorn[standard]>=0.27.0 -python-multipart>=0.0.6 - -# ===== DATABASE & ORM ===== -sqlalchemy>=2.0.25 -alembic>=1.13.0 -psycopg2-binary>=2.9.9 - -# ===== AUTHENTICATION ===== -# Clerk (requiere Pydantic 2.8+, httpx 0.27+) -clerk-backend-api>=1.0.0 -python-jose[cryptography]>=3.3.0 - -# ===== SUPABASE (comentado - incompatible con Clerk httpx) ===== -# Usar PostgreSQL directo con SQLAlchemy en su lugar -# supabase>=2.3.0 - -# ===== STATIC ANALYSIS (Sprint 1 Core) ===== -bandit>=1.7.5 -radon>=6.0.1 -pylint>=3.0.3 -flake8>=7.0.0 - -# ===== CACHE ===== -redis>=5.0.1 -hiredis>=2.3.0 - -# ===== UTILITIES ===== -pydantic>=2.8.0 # Compatible con Clerk -pydantic-settings>=2.2.0 -python-dotenv>=1.0.0 -requests>=2.31.0 -httpx>=0.27.0 # Compatible con Clerk - -# ===== AI SERVICES (Sprint 3) ===== -google-generativeai>=0.3.2 # Gemini API -google-cloud-aiplatform>=1.40.0 # Vertex AI (opcional) - -# ===== DEVELOPMENT TOOLS ===== (Moved to requirements-dev.txt) -# black>=24.0.0 -# isort>=5.13.0 -# mypy>=1.8.0 - -# ===== TESTING (Sprint 1 Core) ===== (Moved to requirements-dev.txt) -# pytest>=8.0.0 -# pytest-asyncio>=0.23.0 -# pytest-cov>=4.1.0 -# faker>=22.0.0 # Para datos de prueba - -# ===== PDF GENERATION (Sprint 2) ===== -reportlab>=4.0.9 - -# ===== WEBSOCKETS (Sprint 2) ===== -websockets>=12.0 -email-validator>=2.0.0 +# ========================================== +# CodeGuard AI - Backend Dependencies +# ========================================== + +# ===== WEB FRAMEWORK ===== +fastapi>=0.109.0 # Última versión compatible con Pydantic 2.8+ +uvicorn[standard]>=0.27.0 +python-multipart>=0.0.6 + +# ===== DATABASE & ORM ===== +sqlalchemy>=2.0.25 +alembic>=1.13.0 +psycopg2-binary>=2.9.9 + +# ===== AUTHENTICATION ===== +# Clerk (requiere Pydantic 2.8+, httpx 0.27+) +clerk-backend-api>=1.0.0 +python-jose[cryptography]>=3.3.0 + +# ===== SUPABASE (comentado - incompatible con Clerk httpx) ===== +# Usar PostgreSQL directo con SQLAlchemy en su lugar +# supabase>=2.3.0 + +# ===== STATIC ANALYSIS (Sprint 1 Core) ===== +bandit>=1.7.5 +radon>=6.0.1 +pylint>=3.0.3 +flake8>=7.0.0 + +# ===== CACHE ===== +redis>=5.0.1 +hiredis>=2.3.0 + +# ===== UTILITIES ===== +pydantic>=2.8.0 # Compatible con Clerk +pydantic-settings>=2.2.0 +python-dotenv>=1.0.0 +requests>=2.31.0 +httpx>=0.27.0 # Compatible con Clerk + +# ===== AI SERVICES (Sprint 3) ===== +google-generativeai>=0.3.2 # Gemini API +google-cloud-aiplatform>=1.40.0 # Vertex AI (opcional) + +# ===== DEVELOPMENT TOOLS ===== (Moved to requirements-dev.txt) +# black>=24.0.0 +# isort>=5.13.0 +# mypy>=1.8.0 + +# ===== TESTING (Sprint 1 Core) ===== (Moved to requirements-dev.txt) +# pytest>=8.0.0 +# pytest-asyncio>=0.23.0 +# pytest-cov>=4.1.0 +# faker>=22.0.0 # Para datos de prueba + +# ===== PDF GENERATION (Sprint 2) ===== +reportlab>=4.0.9 + +# ===== WEBSOCKETS (Sprint 2) ===== +websockets>=12.0 +email-validator>=2.0.0 diff --git a/backend/src/agents/analyzers/flake8_analyzer.py b/backend/src/agents/analyzers/flake8_analyzer.py index f44c42d..1f75502 100644 --- a/backend/src/agents/analyzers/flake8_analyzer.py +++ b/backend/src/agents/analyzers/flake8_analyzer.py @@ -1,172 +1,172 @@ -""" -Flake8Analyzer - Analizador especializado para ejecutar Flake8. - -Responsabilidad única: Ejecutar flake8 sobre código Python y -parsear su salida en objetos Finding. -""" - -import os -import subprocess -import sys -import tempfile -from typing import List, Optional - -from src.schemas.finding import Finding, Severity - - -class Flake8Analyzer: - """ - Analizador que ejecuta Flake8 sobre código Python. - - Encapsula la lógica de ejecución de flake8 como subproceso - y el parseo de su salida a objetos Finding. - - Attributes: - _cmd_template: Lista base de comandos para ejecutar flake8. - """ - - def __init__(self) -> None: - """Inicializa el analizador Flake8 con la plantilla de comandos.""" - self._cmd_template: List[str] = [ - sys.executable, - "-m", - "flake8", - "--format=%(row)d:%(col)d:%(code)s:%(text)s", - ] - - def analyze( - self, - code_content: str, - agent_name: str = "StyleAgent", - ) -> List[Finding]: - """ - Ejecuta flake8 sobre el código y retorna los hallazgos. - - Args: - code_content: Código Python a analizar. - agent_name: Nombre del agente que solicita el análisis. - - Returns: - Lista de Finding encontrados por Flake8. - Lista vacía si flake8 no está disponible. - """ - findings: List[Finding] = [] - tmp_path: Optional[str] = None - - try: - # Crear archivo temporal con el código - with tempfile.NamedTemporaryFile( - suffix=".py", - delete=False, - mode="w", - encoding="utf-8", - ) as tmp: - tmp.write(code_content) - tmp_path = tmp.name - - # Ejecutar flake8 - cmd = self._cmd_template + [tmp_path] - result = subprocess.run( - cmd, - capture_output=True, - text=True, - check=False, - ) - - # Parsear salida - findings = self._parse_output(result.stdout, code_content, agent_name) - - except FileNotFoundError: - # flake8 no está instalado - pass - except Exception: - # Otros errores - silenciar para no romper el análisis - pass - finally: - # Limpiar archivo temporal - if tmp_path and os.path.exists(tmp_path): - try: - os.remove(tmp_path) - except OSError: - pass - - return findings - - def _parse_output( - self, - output: str, - code_content: str, - agent_name: str, - ) -> List[Finding]: - """ - Parsea la salida de flake8 y genera objetos Finding. - - Args: - output: Salida estándar de flake8. - code_content: Código original para extraer snippets. - agent_name: Nombre del agente para los findings. - - Returns: - Lista de Finding parseados. - """ - findings: List[Finding] = [] - lines = code_content.splitlines() - - for line in output.splitlines(): - parts = line.split(":", 3) - if len(parts) < 4: - continue - - line_str, _col_str, code, msg = parts - try: - line_number = int(line_str) - except ValueError: - continue - - severity = self._map_severity(code) - code_snippet = "" - if 1 <= line_number <= len(lines): - code_snippet = lines[line_number - 1] - - findings.append( - Finding( - severity=severity, - issue_type="style/pep8", - message=msg.strip(), - line_number=line_number, - code_snippet=code_snippet, - suggestion=None, - agent_name=agent_name, - rule_id=f"FLAKE8_{code}", - ) - ) - - return findings - - @staticmethod - def _map_severity(code: str) -> Severity: - """ - Mapea el código de flake8 a severidad. - - Flake8 usa prefijos: - - E = error (estilo) -> MEDIUM - - W = warning -> LOW - - F = pyflakes (errores lógicos) -> HIGH - - C = complejidad -> MEDIUM - - N = naming -> LOW - - Args: - code: Código del error de flake8 (ej: E501, F401). - - Returns: - Nivel de severidad correspondiente. - """ - if not code: - return Severity.LOW - - prefix = code[0].upper() - if prefix == "F": - return Severity.HIGH - if prefix in ("E", "C"): - return Severity.MEDIUM - return Severity.LOW +""" +Flake8Analyzer - Analizador especializado para ejecutar Flake8. + +Responsabilidad única: Ejecutar flake8 sobre código Python y +parsear su salida en objetos Finding. +""" + +import os +import subprocess +import sys +import tempfile +from typing import List, Optional + +from src.schemas.finding import Finding, Severity + + +class Flake8Analyzer: + """ + Analizador que ejecuta Flake8 sobre código Python. + + Encapsula la lógica de ejecución de flake8 como subproceso + y el parseo de su salida a objetos Finding. + + Attributes: + _cmd_template: Lista base de comandos para ejecutar flake8. + """ + + def __init__(self) -> None: + """Inicializa el analizador Flake8 con la plantilla de comandos.""" + self._cmd_template: List[str] = [ + sys.executable, + "-m", + "flake8", + "--format=%(row)d:%(col)d:%(code)s:%(text)s", + ] + + def analyze( + self, + code_content: str, + agent_name: str = "StyleAgent", + ) -> List[Finding]: + """ + Ejecuta flake8 sobre el código y retorna los hallazgos. + + Args: + code_content: Código Python a analizar. + agent_name: Nombre del agente que solicita el análisis. + + Returns: + Lista de Finding encontrados por Flake8. + Lista vacía si flake8 no está disponible. + """ + findings: List[Finding] = [] + tmp_path: Optional[str] = None + + try: + # Crear archivo temporal con el código + with tempfile.NamedTemporaryFile( + suffix=".py", + delete=False, + mode="w", + encoding="utf-8", + ) as tmp: + tmp.write(code_content) + tmp_path = tmp.name + + # Ejecutar flake8 + cmd = self._cmd_template + [tmp_path] + result = subprocess.run( + cmd, + capture_output=True, + text=True, + check=False, + ) + + # Parsear salida + findings = self._parse_output(result.stdout, code_content, agent_name) + + except FileNotFoundError: + # flake8 no está instalado + pass + except Exception: + # Otros errores - silenciar para no romper el análisis + pass + finally: + # Limpiar archivo temporal + if tmp_path and os.path.exists(tmp_path): + try: + os.remove(tmp_path) + except OSError: + pass + + return findings + + def _parse_output( + self, + output: str, + code_content: str, + agent_name: str, + ) -> List[Finding]: + """ + Parsea la salida de flake8 y genera objetos Finding. + + Args: + output: Salida estándar de flake8. + code_content: Código original para extraer snippets. + agent_name: Nombre del agente para los findings. + + Returns: + Lista de Finding parseados. + """ + findings: List[Finding] = [] + lines = code_content.splitlines() + + for line in output.splitlines(): + parts = line.split(":", 3) + if len(parts) < 4: + continue + + line_str, _col_str, code, msg = parts + try: + line_number = int(line_str) + except ValueError: + continue + + severity = self._map_severity(code) + code_snippet = "" + if 1 <= line_number <= len(lines): + code_snippet = lines[line_number - 1] + + findings.append( + Finding( + severity=severity, + issue_type="style/pep8", + message=msg.strip(), + line_number=line_number, + code_snippet=code_snippet, + suggestion=None, + agent_name=agent_name, + rule_id=f"FLAKE8_{code}", + ) + ) + + return findings + + @staticmethod + def _map_severity(code: str) -> Severity: + """ + Mapea el código de flake8 a severidad. + + Flake8 usa prefijos: + - E = error (estilo) -> MEDIUM + - W = warning -> LOW + - F = pyflakes (errores lógicos) -> HIGH + - C = complejidad -> MEDIUM + - N = naming -> LOW + + Args: + code: Código del error de flake8 (ej: E501, F401). + + Returns: + Nivel de severidad correspondiente. + """ + if not code: + return Severity.LOW + + prefix = code[0].upper() + if prefix == "F": + return Severity.HIGH + if prefix in ("E", "C"): + return Severity.MEDIUM + return Severity.LOW diff --git a/backend/src/agents/analyzers/pylint_analyzer.py b/backend/src/agents/analyzers/pylint_analyzer.py index d38d465..60103ed 100644 --- a/backend/src/agents/analyzers/pylint_analyzer.py +++ b/backend/src/agents/analyzers/pylint_analyzer.py @@ -1,172 +1,172 @@ -""" -PylintAnalyzer - Analizador especializado para ejecutar Pylint. - -Responsabilidad única: Ejecutar pylint sobre código Python y -parsear su salida en objetos Finding. -""" - -import os -import subprocess -import sys -import tempfile -from typing import List, Optional - -from src.schemas.finding import Finding, Severity - - -class PylintAnalyzer: - """ - Analizador que ejecuta Pylint sobre código Python. - - Encapsula la lógica de ejecución de pylint como subproceso - y el parseo de su salida a objetos Finding. - - Attributes: - _cmd_template: Lista base de comandos para ejecutar pylint. - """ - - def __init__(self) -> None: - """Inicializa el analizador Pylint con la plantilla de comandos.""" - self._cmd_template: List[str] = [ - sys.executable, - "-m", - "pylint", - "--output-format=text", - "--score=no", - "--msg-template={line}:{column}:{msg_id}:{msg}", - ] - - def analyze( - self, - code_content: str, - agent_name: str = "StyleAgent", - ) -> List[Finding]: - """ - Ejecuta pylint sobre el código y retorna los hallazgos. - - Args: - code_content: Código Python a analizar. - agent_name: Nombre del agente que solicita el análisis. - - Returns: - Lista de Finding encontrados por Pylint. - Lista vacía si pylint no está disponible. - """ - findings: List[Finding] = [] - tmp_path: Optional[str] = None - - try: - # Crear archivo temporal con el código - with tempfile.NamedTemporaryFile( - suffix=".py", - delete=False, - mode="w", - encoding="utf-8", - ) as tmp: - tmp.write(code_content) - tmp_path = tmp.name - - # Ejecutar pylint - cmd = self._cmd_template + [tmp_path] - result = subprocess.run( - cmd, - capture_output=True, - text=True, - check=False, - ) - - # Parsear salida - findings = self._parse_output(result.stdout, code_content, agent_name) - - except FileNotFoundError: - # pylint no está instalado - pass - except Exception: - # Otros errores - silenciar para no romper el análisis - pass - finally: - # Limpiar archivo temporal - if tmp_path and os.path.exists(tmp_path): - try: - os.remove(tmp_path) - except OSError: - pass - - return findings - - def _parse_output( - self, - output: str, - code_content: str, - agent_name: str, - ) -> List[Finding]: - """ - Parsea la salida de pylint y genera objetos Finding. - - Args: - output: Salida estándar de pylint. - code_content: Código original para extraer snippets. - agent_name: Nombre del agente para los findings. - - Returns: - Lista de Finding parseados. - """ - findings: List[Finding] = [] - lines = code_content.splitlines() - - for line in output.splitlines(): - parts = line.split(":", 3) - if len(parts) < 4: - continue - - line_str, _col_str, msg_id, msg = parts - try: - line_number = int(line_str) - except ValueError: - continue - - severity = self._map_severity(msg_id) - code_snippet = "" - if 1 <= line_number <= len(lines): - code_snippet = lines[line_number - 1] - - findings.append( - Finding( - severity=severity, - issue_type="style/pep8", - message=msg.strip(), - line_number=line_number, - code_snippet=code_snippet, - suggestion=None, - agent_name=agent_name, - rule_id=f"PYLINT_{msg_id}", - ) - ) - - return findings - - @staticmethod - def _map_severity(msg_id: str) -> Severity: - """ - Mapea el prefijo de mensaje pylint a severidad. - - Pylint usa prefijos: - - C = convention, R = refactor -> LOW - - W = warning -> MEDIUM - - E = error, F = fatal -> HIGH - - Args: - msg_id: ID del mensaje de pylint (ej: C0114, E0001). - - Returns: - Nivel de severidad correspondiente. - """ - if not msg_id: - return Severity.LOW - - prefix = msg_id[0].upper() - if prefix in ("E", "F"): - return Severity.HIGH - if prefix == "W": - return Severity.MEDIUM - return Severity.LOW +""" +PylintAnalyzer - Analizador especializado para ejecutar Pylint. + +Responsabilidad única: Ejecutar pylint sobre código Python y +parsear su salida en objetos Finding. +""" + +import os +import subprocess +import sys +import tempfile +from typing import List, Optional + +from src.schemas.finding import Finding, Severity + + +class PylintAnalyzer: + """ + Analizador que ejecuta Pylint sobre código Python. + + Encapsula la lógica de ejecución de pylint como subproceso + y el parseo de su salida a objetos Finding. + + Attributes: + _cmd_template: Lista base de comandos para ejecutar pylint. + """ + + def __init__(self) -> None: + """Inicializa el analizador Pylint con la plantilla de comandos.""" + self._cmd_template: List[str] = [ + sys.executable, + "-m", + "pylint", + "--output-format=text", + "--score=no", + "--msg-template={line}:{column}:{msg_id}:{msg}", + ] + + def analyze( + self, + code_content: str, + agent_name: str = "StyleAgent", + ) -> List[Finding]: + """ + Ejecuta pylint sobre el código y retorna los hallazgos. + + Args: + code_content: Código Python a analizar. + agent_name: Nombre del agente que solicita el análisis. + + Returns: + Lista de Finding encontrados por Pylint. + Lista vacía si pylint no está disponible. + """ + findings: List[Finding] = [] + tmp_path: Optional[str] = None + + try: + # Crear archivo temporal con el código + with tempfile.NamedTemporaryFile( + suffix=".py", + delete=False, + mode="w", + encoding="utf-8", + ) as tmp: + tmp.write(code_content) + tmp_path = tmp.name + + # Ejecutar pylint + cmd = self._cmd_template + [tmp_path] + result = subprocess.run( + cmd, + capture_output=True, + text=True, + check=False, + ) + + # Parsear salida + findings = self._parse_output(result.stdout, code_content, agent_name) + + except FileNotFoundError: + # pylint no está instalado + pass + except Exception: + # Otros errores - silenciar para no romper el análisis + pass + finally: + # Limpiar archivo temporal + if tmp_path and os.path.exists(tmp_path): + try: + os.remove(tmp_path) + except OSError: + pass + + return findings + + def _parse_output( + self, + output: str, + code_content: str, + agent_name: str, + ) -> List[Finding]: + """ + Parsea la salida de pylint y genera objetos Finding. + + Args: + output: Salida estándar de pylint. + code_content: Código original para extraer snippets. + agent_name: Nombre del agente para los findings. + + Returns: + Lista de Finding parseados. + """ + findings: List[Finding] = [] + lines = code_content.splitlines() + + for line in output.splitlines(): + parts = line.split(":", 3) + if len(parts) < 4: + continue + + line_str, _col_str, msg_id, msg = parts + try: + line_number = int(line_str) + except ValueError: + continue + + severity = self._map_severity(msg_id) + code_snippet = "" + if 1 <= line_number <= len(lines): + code_snippet = lines[line_number - 1] + + findings.append( + Finding( + severity=severity, + issue_type="style/pep8", + message=msg.strip(), + line_number=line_number, + code_snippet=code_snippet, + suggestion=None, + agent_name=agent_name, + rule_id=f"PYLINT_{msg_id}", + ) + ) + + return findings + + @staticmethod + def _map_severity(msg_id: str) -> Severity: + """ + Mapea el prefijo de mensaje pylint a severidad. + + Pylint usa prefijos: + - C = convention, R = refactor -> LOW + - W = warning -> MEDIUM + - E = error, F = fatal -> HIGH + + Args: + msg_id: ID del mensaje de pylint (ej: C0114, E0001). + + Returns: + Nivel de severidad correspondiente. + """ + if not msg_id: + return Severity.LOW + + prefix = msg_id[0].upper() + if prefix in ("E", "F"): + return Severity.HIGH + if prefix == "W": + return Severity.MEDIUM + return Severity.LOW diff --git a/backend/src/agents/base_agent.py b/backend/src/agents/base_agent.py index 866b9c3..2c943d2 100644 --- a/backend/src/agents/base_agent.py +++ b/backend/src/agents/base_agent.py @@ -1,199 +1,199 @@ -""" -Clase base abstracta para todos los agentes de análisis -""" - -import logging -from abc import ABC, abstractmethod -from datetime import datetime, timezone -from typing import TYPE_CHECKING, List, Optional - -from src.schemas.analysis import AnalysisContext -from src.schemas.finding import Finding - -if TYPE_CHECKING: - from src.core.events.event_bus import EventBus - - -class BaseAgent(ABC): - """ - Clase base abstracta para todos los agentes de análisis. - - Todos los agentes especializados deben heredar de esta clase - e implementar el método analyze(). - - Attributes: - name: Nombre identificador del agente - version: Versión del agente - category: Categoría (security, quality, performance, style) - enabled: Estado de habilitación del agente - event_bus: Sistema de eventos para comunicación (opcional) - - Example: - class SecurityAgent(BaseAgent): - def __init__(self): - super().__init__( - name="SecurityAgent", - version="1.0.0", - category="security" - ) - - def analyze(self, context: AnalysisContext) -> List[Finding]: - # Implementación específica - pass - """ - - def __init__( - self, - name: str, - version: str = "1.0.0", - category: str = "general", - enabled: bool = True, - event_bus: Optional["EventBus"] = None, - ) -> None: - # pylint: disable=too-many-arguments,too-many-positional-arguments - """ - Inicializa el agente base. - - Args: - name: Identificador único del agente - version: Versión del agente (semver) - category: Categoría de análisis - enabled: Si el agente está activo - event_bus: EventBus para emitir eventos (opcional) - - Raises: - ValueError: Si name está vacío - """ - if not name or not name.strip(): - raise ValueError("Agent name cannot be empty") - - self.name = name - self.version = version - self.category = category - self.enabled = enabled - self.event_bus = event_bus - self.logger = logging.getLogger(f"agents.{name}") - - self.logger.info("[%s] Agent initialized - version %s", self.name, self.version) - - @abstractmethod - def analyze(self, context: AnalysisContext) -> List[Finding]: - """ - Método abstracto que debe ser implementado por todas las clases hijas. - - Analiza el código en el contexto y retorna una lista de hallazgos. - - Args: - context: Contexto de análisis con código y metadata - - Returns: - Lista de Finding encontrados durante el análisis - - Raises: - NotImplementedError: Si no es implementado por la clase hija - """ - - def _emit_agent_started(self, context: AnalysisContext) -> None: - """Emite evento AGENT_STARTED.""" - if self.event_bus: - self.event_bus.publish( - "AGENT_STARTED", - { - "agent_name": self.name, - "analysis_id": str(context.analysis_id), - "timestamp": datetime.now(timezone.utc).isoformat(), - }, - ) - self.log_info("Analysis started") - - def _emit_agent_completed(self, context: AnalysisContext, findings: List[Finding]) -> None: - """Emite evento AGENT_COMPLETED.""" - if self.event_bus: - self.event_bus.publish( - "AGENT_COMPLETED", - { - "agent_name": self.name, - "analysis_id": str(context.analysis_id), - "findings_count": len(findings), - "timestamp": datetime.now(timezone.utc).isoformat(), - }, - ) - self.log_info(f"Analysis completed - {len(findings)} findings") - - def _emit_agent_failed(self, context: AnalysisContext, error: Exception) -> None: - """Emite evento AGENT_FAILED.""" - if self.event_bus: - self.event_bus.publish( - "AGENT_FAILED", - { - "agent_name": self.name, - "analysis_id": str(context.analysis_id), - "error": str(error), - "timestamp": datetime.now(timezone.utc).isoformat(), - }, - ) - self.log_error(f"Analysis failed: {error}") - - def is_enabled(self) -> bool: - """ - Verifica si el agente está habilitado. - - Returns: - True si el agente está habilitado, False en caso contrario - """ - return self.enabled - - def enable(self) -> None: - """Habilita el agente.""" - self.enabled = True - self.logger.info("[%s] Agent enabled", self.name) - - def disable(self) -> None: - """Deshabilita el agente.""" - self.enabled = False - self.logger.warning("[%s] Agent disabled", self.name) - - def get_info(self) -> dict: - """ - Retorna información metadata del agente. - - Returns: - Diccionario con información del agente - """ - return { - "name": self.name, - "version": self.version, - "category": self.category, - "enabled": self.enabled, - } - - def log_info(self, message: str) -> None: - """Log a message at INFO level.""" - self.logger.info("[%s] %s", self.name, message) - - def log_warning(self, message: str) -> None: - """Log a nivel WARNING.""" - self.logger.warning("[%s] %s", self.name, message) - - def log_error(self, message: str) -> None: - """Log a nivel ERROR.""" - self.logger.error("[%s] %s", self.name, message) - - def log_debug(self, message: str) -> None: - """Log a nivel DEBUG.""" - self.logger.debug("[%s] %s", self.name, message) - - def __repr__(self) -> str: - """Representación string del agente.""" - return ( - f"<{self.__class__.__name__}(" - f"name={self.name}, " - f"version={self.version}, " - f"category={self.category}, " - f"enabled={self.enabled})>" - ) - - def __str__(self) -> str: - """String amigable del agente.""" - status = "enabled" if self.enabled else "disabled" - return f"{self.name} v{self.version} ({self.category}) - {status}" +""" +Clase base abstracta para todos los agentes de análisis +""" + +import logging +from abc import ABC, abstractmethod +from datetime import datetime, timezone +from typing import TYPE_CHECKING, List, Optional + +from src.schemas.analysis import AnalysisContext +from src.schemas.finding import Finding + +if TYPE_CHECKING: + from src.core.events.event_bus import EventBus + + +class BaseAgent(ABC): + """ + Clase base abstracta para todos los agentes de análisis. + + Todos los agentes especializados deben heredar de esta clase + e implementar el método analyze(). + + Attributes: + name: Nombre identificador del agente + version: Versión del agente + category: Categoría (security, quality, performance, style) + enabled: Estado de habilitación del agente + event_bus: Sistema de eventos para comunicación (opcional) + + Example: + class SecurityAgent(BaseAgent): + def __init__(self): + super().__init__( + name="SecurityAgent", + version="1.0.0", + category="security" + ) + + def analyze(self, context: AnalysisContext) -> List[Finding]: + # Implementación específica + pass + """ + + def __init__( + self, + name: str, + version: str = "1.0.0", + category: str = "general", + enabled: bool = True, + event_bus: Optional["EventBus"] = None, + ) -> None: + # pylint: disable=too-many-arguments,too-many-positional-arguments + """ + Inicializa el agente base. + + Args: + name: Identificador único del agente + version: Versión del agente (semver) + category: Categoría de análisis + enabled: Si el agente está activo + event_bus: EventBus para emitir eventos (opcional) + + Raises: + ValueError: Si name está vacío + """ + if not name or not name.strip(): + raise ValueError("Agent name cannot be empty") + + self.name = name + self.version = version + self.category = category + self.enabled = enabled + self.event_bus = event_bus + self.logger = logging.getLogger(f"agents.{name}") + + self.logger.info("[%s] Agent initialized - version %s", self.name, self.version) + + @abstractmethod + def analyze(self, context: AnalysisContext) -> List[Finding]: + """ + Método abstracto que debe ser implementado por todas las clases hijas. + + Analiza el código en el contexto y retorna una lista de hallazgos. + + Args: + context: Contexto de análisis con código y metadata + + Returns: + Lista de Finding encontrados durante el análisis + + Raises: + NotImplementedError: Si no es implementado por la clase hija + """ + + def _emit_agent_started(self, context: AnalysisContext) -> None: + """Emite evento AGENT_STARTED.""" + if self.event_bus: + self.event_bus.publish( + "AGENT_STARTED", + { + "agent_name": self.name, + "analysis_id": str(context.analysis_id), + "timestamp": datetime.now(timezone.utc).isoformat(), + }, + ) + self.log_info("Analysis started") + + def _emit_agent_completed(self, context: AnalysisContext, findings: List[Finding]) -> None: + """Emite evento AGENT_COMPLETED.""" + if self.event_bus: + self.event_bus.publish( + "AGENT_COMPLETED", + { + "agent_name": self.name, + "analysis_id": str(context.analysis_id), + "findings_count": len(findings), + "timestamp": datetime.now(timezone.utc).isoformat(), + }, + ) + self.log_info(f"Analysis completed - {len(findings)} findings") + + def _emit_agent_failed(self, context: AnalysisContext, error: Exception) -> None: + """Emite evento AGENT_FAILED.""" + if self.event_bus: + self.event_bus.publish( + "AGENT_FAILED", + { + "agent_name": self.name, + "analysis_id": str(context.analysis_id), + "error": str(error), + "timestamp": datetime.now(timezone.utc).isoformat(), + }, + ) + self.log_error(f"Analysis failed: {error}") + + def is_enabled(self) -> bool: + """ + Verifica si el agente está habilitado. + + Returns: + True si el agente está habilitado, False en caso contrario + """ + return self.enabled + + def enable(self) -> None: + """Habilita el agente.""" + self.enabled = True + self.logger.info("[%s] Agent enabled", self.name) + + def disable(self) -> None: + """Deshabilita el agente.""" + self.enabled = False + self.logger.warning("[%s] Agent disabled", self.name) + + def get_info(self) -> dict: + """ + Retorna información metadata del agente. + + Returns: + Diccionario con información del agente + """ + return { + "name": self.name, + "version": self.version, + "category": self.category, + "enabled": self.enabled, + } + + def log_info(self, message: str) -> None: + """Log a message at INFO level.""" + self.logger.info("[%s] %s", self.name, message) + + def log_warning(self, message: str) -> None: + """Log a nivel WARNING.""" + self.logger.warning("[%s] %s", self.name, message) + + def log_error(self, message: str) -> None: + """Log a nivel ERROR.""" + self.logger.error("[%s] %s", self.name, message) + + def log_debug(self, message: str) -> None: + """Log a nivel DEBUG.""" + self.logger.debug("[%s] %s", self.name, message) + + def __repr__(self) -> str: + """Representación string del agente.""" + return ( + f"<{self.__class__.__name__}(" + f"name={self.name}, " + f"version={self.version}, " + f"category={self.category}, " + f"enabled={self.enabled})>" + ) + + def __str__(self) -> str: + """String amigable del agente.""" + status = "enabled" if self.enabled else "disabled" + return f"{self.name} v{self.version} ({self.category}) - {status}" diff --git a/backend/src/agents/quality_agent.py b/backend/src/agents/quality_agent.py index f3a2c05..ad10055 100644 --- a/backend/src/agents/quality_agent.py +++ b/backend/src/agents/quality_agent.py @@ -1,287 +1,287 @@ -""" -QualityAgent - Agente especializado en análisis de calidad de código Python. -""" - -import ast -from typing import Dict, List - -try: - from radon.complexity import cc_visit_ast as radon_visit - from radon.metrics import mi_visit -except ImportError: - radon_visit = None - mi_visit = None - -from src.agents.base_agent import BaseAgent -from src.schemas.analysis import AnalysisContext -from src.schemas.finding import Finding, Severity - - -class QualityAgent(BaseAgent): - """ - Agente especializado en analizar la calidad del código Python. - Implementa métricas de complejidad, duplicación, longitud y mantenibilidad. - """ - - def __init__(self, event_bus=None): - """ - Inicializa QualityAgent con reglas de calidad. - - Args: - event_bus: EventBus para emitir eventos (opcional) - """ - super().__init__( - name="QualityAgent", - version="1.0.0", - category="quality", - enabled=True, - event_bus=event_bus, - ) - # Atributos definidos en el diagrama de dominio - self.complexity_threshold: int = 10 - self.duplication_threshold: float = 0.20 - self.function_length_threshold: int = 100 - - # Atributos adicionales para funcionalidades extra - self.maintainability_threshold: float = 50.0 - self.duplication_block_size: int = 4 - - self.logger.info("QualityAgent inicializado") - - def analyze(self, context: AnalysisContext) -> List[Finding]: - """ - Analiza código Python en busca de problemas de calidad. - - Args: - context (AnalysisContext): Contexto del análisis con código y metadatos. - - Returns: - List[Finding]: Lista de hallazgos ordenados por severidad. - """ - self._emit_agent_started(context) - findings: List[Finding] = [] - - try: - # Parsear AST una sola vez - try: - ast_tree = ast.parse(context.code_content) - except SyntaxError as e: - self.log_error(f"Error de sintaxis al parsear AST: {e}") - return findings - - # 1. Complejidad Ciclomática - findings.extend(self.calculate_complexity(ast_tree)) - - # 2. Duplicación de Código - findings.extend(self.detect_code_duplication(context.code_content)) - - # 3. Longitud de Funciones - findings.extend(self.measure_function_length(ast_tree)) - - # 4. Índice de Mantenibilidad - mi_score = self.calculate_maintainability_index(context.code_content) - if mi_score < self.maintainability_threshold: - findings.append(self._create_mi_finding(mi_score)) - - self._emit_agent_completed(context, findings) - - except Exception as e: - self._emit_agent_failed(context, e) - self.log_error(f"Error durante análisis de calidad: {str(e)}") - - # Ordenar hallazgos por severidad - findings.sort( - key=lambda f: (["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"].index(f.severity.value)) - ) - - return findings - - def calculate_complexity(self, ast_tree: ast.AST) -> List[Finding]: - """ - Calcula la complejidad ciclomática usando Radon sobre el AST. - - Args: - ast_tree (ast.AST): Árbol de sintaxis abstracta del código. - - Returns: - List[Finding]: Lista de hallazgos de complejidad que superan el umbral. - """ - findings: List[Finding] = [] - if not radon_visit: - return findings - - try: - # radon.complexity.visit retorna una lista de bloques (Function, Class) - blocks = radon_visit(ast_tree) - for block in blocks: - # Filtramos solo funciones/métodos - if hasattr(block, "complexity") and block.complexity > self.complexity_threshold: - severity = Severity.MEDIUM - if block.complexity > 20: - severity = Severity.HIGH - if block.complexity > 50: - severity = Severity.CRITICAL - - finding = Finding( - severity=severity, - issue_type="quality/cyclomatic-complexity", - message=( - f"Alta complejidad ciclomática detectada ({block.complexity}) " - f"en '{block.name}'" - ), - line_number=block.lineno, - code_snippet=f"def {block.name}...", - suggestion=( - f"Refactoriza '{block.name}' para reducir su complejidad " - f"(objetivo: < {self.complexity_threshold})." - ), - agent_name=self.name, - rule_id="QUAL001_COMPLEXITY", - ) - findings.append(finding) - except Exception as e: - self.log_error(f"Error en cálculo de complejidad: {e}") - - return findings - - def detect_code_duplication(self, code: str) -> List[Finding]: - """ - Detecta duplicación de código mediante hashing de bloques. - - Args: - code (str): Código fuente completo a analizar. - - Returns: - List[Finding]: Lista de hallazgos de bloques de código duplicados. - """ - findings: List[Finding] = [] - lines = [line.strip() for line in code.splitlines()] - block_hashes: Dict[str, List[int]] = {} - block_size = self.duplication_block_size - - if len(lines) < block_size: - return findings - - for i in range(len(lines) - block_size + 1): - block_content = "".join(lines[i : i + block_size]) - if not block_content or block_content.startswith("#"): - continue - - # Non-cryptographic hash for code duplication detection; intentionally not secure. - block_hash = str(hash(block_content)) - - if block_hash in block_hashes: - original_line = block_hashes[block_hash][0] - if i > original_line + block_size: - finding = Finding( - severity=Severity.MEDIUM, - issue_type="quality/duplication", - message=( - f"Bloque de código duplicado " - f"(original en línea {original_line + 1})" - ), - line_number=i + 1, - code_snippet="...", - suggestion="Extrae la lógica duplicada a una función reutilizable", - agent_name=self.name, - rule_id="QUAL002_DUPLICATION", - ) - findings.append(finding) - else: - block_hashes[block_hash] = [i] - - return findings - - def measure_function_length(self, ast_tree: ast.AST) -> List[Finding]: - """ - Mide la longitud de las funciones y detecta las que exceden el umbral. - - Args: - ast_tree (ast.AST): Árbol de sintaxis abstracta del código. - - Returns: - List[Finding]: Lista de hallazgos de funciones demasiado largas. - """ - findings: List[Finding] = [] - - for node in ast.walk(ast_tree): - if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): - info = self._visit_function_def(node) - length = info.get("length", 0) - - if length > self.function_length_threshold: - finding = Finding( - severity=Severity.MEDIUM, - issue_type="quality/function-length", - message=f"Función '{node.name}' demasiado larga ({length} líneas)", - line_number=node.lineno, - code_snippet=f"def {node.name}...", - suggestion=( - f"""Divide la función en partes más pequeñas """ - f"""(límite: {self.function_length_threshold})""" - ), - agent_name=self.name, - rule_id="QUAL005_FUNCTION_LENGTH", - ) - findings.append(finding) - - return findings - - def calculate_maintainability_index(self, code: str) -> float: - """ - Calcula el índice de mantenibilidad (MI) del código. - - Args: - code (str): Código fuente a analizar. - - Returns: - float: Puntuación del índice de mantenibilidad (0-100). - """ - if not mi_visit: - return 100.0 - try: - return mi_visit(code, True) - except Exception: - return 100.0 - - def _visit_function_def(self, node: ast.FunctionDef) -> Dict: - """ - Helper para extraer información de una definición de función. - - Args: - node (ast.FunctionDef): Nodo de definición de función del AST. - - Returns: - Dict: Diccionario con nombre, línea de inicio y longitud de la función. - """ - length = 0 - if hasattr(node, "end_lineno") and hasattr(node, "lineno"): - length = node.end_lineno - node.lineno - return {"name": node.name, "lineno": node.lineno, "length": length} - - def _create_mi_finding(self, score: float) -> Finding: - """ - Crea un hallazgo para un índice de mantenibilidad bajo. - - Args: - score (float): Puntuación de mantenibilidad calculada. - - Returns: - Finding: Objeto Finding con la severidad y detalles correspondientes. - """ - severity = Severity.MEDIUM - if score < 20: - severity = Severity.CRITICAL - elif score < 40: - severity = Severity.HIGH - - return Finding( - severity=severity, - issue_type="quality/maintainability-index", - message=f"Índice de mantenibilidad bajo ({score:.2f})", - line_number=1, - code_snippet=None, - suggestion="Mejora la mantenibilidad reduciendo complejidad.", - agent_name=self.name, - rule_id="QUAL003_MAINTAINABILITY", - ) +""" +QualityAgent - Agente especializado en análisis de calidad de código Python. +""" + +import ast +from typing import Dict, List + +try: + from radon.complexity import cc_visit_ast as radon_visit + from radon.metrics import mi_visit +except ImportError: + radon_visit = None + mi_visit = None + +from src.agents.base_agent import BaseAgent +from src.schemas.analysis import AnalysisContext +from src.schemas.finding import Finding, Severity + + +class QualityAgent(BaseAgent): + """ + Agente especializado en analizar la calidad del código Python. + Implementa métricas de complejidad, duplicación, longitud y mantenibilidad. + """ + + def __init__(self, event_bus=None): + """ + Inicializa QualityAgent con reglas de calidad. + + Args: + event_bus: EventBus para emitir eventos (opcional) + """ + super().__init__( + name="QualityAgent", + version="1.0.0", + category="quality", + enabled=True, + event_bus=event_bus, + ) + # Atributos definidos en el diagrama de dominio + self.complexity_threshold: int = 10 + self.duplication_threshold: float = 0.20 + self.function_length_threshold: int = 100 + + # Atributos adicionales para funcionalidades extra + self.maintainability_threshold: float = 50.0 + self.duplication_block_size: int = 4 + + self.logger.info("QualityAgent inicializado") + + def analyze(self, context: AnalysisContext) -> List[Finding]: + """ + Analiza código Python en busca de problemas de calidad. + + Args: + context (AnalysisContext): Contexto del análisis con código y metadatos. + + Returns: + List[Finding]: Lista de hallazgos ordenados por severidad. + """ + self._emit_agent_started(context) + findings: List[Finding] = [] + + try: + # Parsear AST una sola vez + try: + ast_tree = ast.parse(context.code_content) + except SyntaxError as e: + self.log_error(f"Error de sintaxis al parsear AST: {e}") + return findings + + # 1. Complejidad Ciclomática + findings.extend(self.calculate_complexity(ast_tree)) + + # 2. Duplicación de Código + findings.extend(self.detect_code_duplication(context.code_content)) + + # 3. Longitud de Funciones + findings.extend(self.measure_function_length(ast_tree)) + + # 4. Índice de Mantenibilidad + mi_score = self.calculate_maintainability_index(context.code_content) + if mi_score < self.maintainability_threshold: + findings.append(self._create_mi_finding(mi_score)) + + self._emit_agent_completed(context, findings) + + except Exception as e: + self._emit_agent_failed(context, e) + self.log_error(f"Error durante análisis de calidad: {str(e)}") + + # Ordenar hallazgos por severidad + findings.sort( + key=lambda f: (["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"].index(f.severity.value)) + ) + + return findings + + def calculate_complexity(self, ast_tree: ast.AST) -> List[Finding]: + """ + Calcula la complejidad ciclomática usando Radon sobre el AST. + + Args: + ast_tree (ast.AST): Árbol de sintaxis abstracta del código. + + Returns: + List[Finding]: Lista de hallazgos de complejidad que superan el umbral. + """ + findings: List[Finding] = [] + if not radon_visit: + return findings + + try: + # radon.complexity.visit retorna una lista de bloques (Function, Class) + blocks = radon_visit(ast_tree) + for block in blocks: + # Filtramos solo funciones/métodos + if hasattr(block, "complexity") and block.complexity > self.complexity_threshold: + severity = Severity.MEDIUM + if block.complexity > 20: + severity = Severity.HIGH + if block.complexity > 50: + severity = Severity.CRITICAL + + finding = Finding( + severity=severity, + issue_type="quality/cyclomatic-complexity", + message=( + f"Alta complejidad ciclomática detectada ({block.complexity}) " + f"en '{block.name}'" + ), + line_number=block.lineno, + code_snippet=f"def {block.name}...", + suggestion=( + f"Refactoriza '{block.name}' para reducir su complejidad " + f"(objetivo: < {self.complexity_threshold})." + ), + agent_name=self.name, + rule_id="QUAL001_COMPLEXITY", + ) + findings.append(finding) + except Exception as e: + self.log_error(f"Error en cálculo de complejidad: {e}") + + return findings + + def detect_code_duplication(self, code: str) -> List[Finding]: + """ + Detecta duplicación de código mediante hashing de bloques. + + Args: + code (str): Código fuente completo a analizar. + + Returns: + List[Finding]: Lista de hallazgos de bloques de código duplicados. + """ + findings: List[Finding] = [] + lines = [line.strip() for line in code.splitlines()] + block_hashes: Dict[str, List[int]] = {} + block_size = self.duplication_block_size + + if len(lines) < block_size: + return findings + + for i in range(len(lines) - block_size + 1): + block_content = "".join(lines[i : i + block_size]) + if not block_content or block_content.startswith("#"): + continue + + # Non-cryptographic hash for code duplication detection; intentionally not secure. + block_hash = str(hash(block_content)) + + if block_hash in block_hashes: + original_line = block_hashes[block_hash][0] + if i > original_line + block_size: + finding = Finding( + severity=Severity.MEDIUM, + issue_type="quality/duplication", + message=( + f"Bloque de código duplicado " + f"(original en línea {original_line + 1})" + ), + line_number=i + 1, + code_snippet="...", + suggestion="Extrae la lógica duplicada a una función reutilizable", + agent_name=self.name, + rule_id="QUAL002_DUPLICATION", + ) + findings.append(finding) + else: + block_hashes[block_hash] = [i] + + return findings + + def measure_function_length(self, ast_tree: ast.AST) -> List[Finding]: + """ + Mide la longitud de las funciones y detecta las que exceden el umbral. + + Args: + ast_tree (ast.AST): Árbol de sintaxis abstracta del código. + + Returns: + List[Finding]: Lista de hallazgos de funciones demasiado largas. + """ + findings: List[Finding] = [] + + for node in ast.walk(ast_tree): + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): + info = self._visit_function_def(node) + length = info.get("length", 0) + + if length > self.function_length_threshold: + finding = Finding( + severity=Severity.MEDIUM, + issue_type="quality/function-length", + message=f"Función '{node.name}' demasiado larga ({length} líneas)", + line_number=node.lineno, + code_snippet=f"def {node.name}...", + suggestion=( + f"""Divide la función en partes más pequeñas """ + f"""(límite: {self.function_length_threshold})""" + ), + agent_name=self.name, + rule_id="QUAL005_FUNCTION_LENGTH", + ) + findings.append(finding) + + return findings + + def calculate_maintainability_index(self, code: str) -> float: + """ + Calcula el índice de mantenibilidad (MI) del código. + + Args: + code (str): Código fuente a analizar. + + Returns: + float: Puntuación del índice de mantenibilidad (0-100). + """ + if not mi_visit: + return 100.0 + try: + return mi_visit(code, True) + except Exception: + return 100.0 + + def _visit_function_def(self, node: ast.FunctionDef) -> Dict: + """ + Helper para extraer información de una definición de función. + + Args: + node (ast.FunctionDef): Nodo de definición de función del AST. + + Returns: + Dict: Diccionario con nombre, línea de inicio y longitud de la función. + """ + length = 0 + if hasattr(node, "end_lineno") and hasattr(node, "lineno"): + length = node.end_lineno - node.lineno + return {"name": node.name, "lineno": node.lineno, "length": length} + + def _create_mi_finding(self, score: float) -> Finding: + """ + Crea un hallazgo para un índice de mantenibilidad bajo. + + Args: + score (float): Puntuación de mantenibilidad calculada. + + Returns: + Finding: Objeto Finding con la severidad y detalles correspondientes. + """ + severity = Severity.MEDIUM + if score < 20: + severity = Severity.CRITICAL + elif score < 40: + severity = Severity.HIGH + + return Finding( + severity=severity, + issue_type="quality/maintainability-index", + message=f"Índice de mantenibilidad bajo ({score:.2f})", + line_number=1, + code_snippet=None, + suggestion="Mejora la mantenibilidad reduciendo complejidad.", + agent_name=self.name, + rule_id="QUAL003_MAINTAINABILITY", + ) diff --git a/backend/src/agents/security_agent.py b/backend/src/agents/security_agent.py index 276806f..07643d2 100644 --- a/backend/src/agents/security_agent.py +++ b/backend/src/agents/security_agent.py @@ -1,648 +1,648 @@ -""" -SecurityAgent - Agente especializado en detección de vulnerabilidades de seguridad. - -Este agente analiza código Python en busca de problemas de seguridad comunes incluyendo: -- Funciones peligrosas (eval, exec, pickle, etc.) -- Vulnerabilidades de inyección SQL -- Credenciales hardcodeadas (contraseñas, API keys, tokens) -- Algoritmos criptográficos débiles (MD5, SHA1, DES) -""" - -import ast -import re -from typing import Dict, List, Optional, Set - -from src.agents.base_agent import BaseAgent -from src.schemas.analysis import AnalysisContext -from src.schemas.finding import Finding, Severity - - -class SecurityAgent(BaseAgent): - """ - Agente especializado en detectar vulnerabilidades de seguridad en código Python. - - Analiza el código usando múltiples estrategias de detección: - 1. Análisis AST (Abstract Syntax Tree) para funciones peligrosas - 2. Coincidencia de patrones regex para inyección SQL - 3. Regex y detección de placeholders para credenciales hardcodeadas - 4. Análisis AST para algoritmos criptográficos débiles - - Atributos: - DANGEROUS_FUNCTIONS: Conjunto de nombres de funciones consideradas peligrosas - SQL_INJECTION_PATTERNS: Patrones regex para detección de inyección SQL - CREDENTIAL_PATTERNS: Patrones regex para detección de credenciales - WEAK_CRYPTO_ALGORITHMS: Conjunto de nombres de algoritmos criptográficos débiles - - Ejemplo: - >>> agent = SecurityAgent() - >>> context = AnalysisContext( - ... code_content="result = eval(user_input)", - ... filename="vulnerable.py" - ... ) - >>> findings = agent.analyze(context) - >>> assert len(findings) >= 1 - >>> assert findings[0].severity == Severity.CRITICAL - """ - - # Funciones peligrosas que permiten ejecución arbitraria de código - DANGEROUS_FUNCTIONS: Set[str] = { - "eval", - "exec", - "compile", - "__import__", - "execfile", # Python 2 - } - - # Funciones peligrosas de pickle/serialización - PICKLE_FUNCTIONS: Set[str] = { - "pickle.loads", - "pickle.load", - "cPickle.loads", - "cPickle.load", - "yaml.load", # Sin argumento Loader - "marshal.loads", - } - - # Patrones de inyección SQL (regex) - CORREGIDOS - SQL_INJECTION_PATTERNS: List[str] = [ - r'execute\s*\(\s*["\'].*\+', # Concatenación con + - r'execute\s*\(\s*f["\']', # f-strings en execute directo - r'execute\s*\(\s*["\'].*%s', # %s formatting - r'execute\s*\(\s*["\'].*\.format', # .format() en execute - r'\.execute\s*\(\s*["\'].*\+\s*\w', # execute con concatenación y variable - ] - - # Patrones de credenciales (regex) - CREDENTIAL_PATTERNS: List[dict] = [ - { - "pattern": r'password\s*=\s*["\'][^"\']{8,}["\']', - "name": "password", - "severity": Severity.CRITICAL, - }, - { - "pattern": r'api[_-]?key\s*=\s*["\'][^"\']{10,}["\']', - "name": "api_key", - "severity": Severity.CRITICAL, - }, - { - "pattern": r'secret[_-]?key\s*=\s*["\'][^"\']{10,}["\']', - "name": "secret_key", - "severity": Severity.CRITICAL, - }, - { - "pattern": r'token\s*=\s*["\'][^"\']{10,}["\']', - "name": "token", - "severity": Severity.HIGH, - }, - { - "pattern": r'access[_-]?key\s*=\s*["\'][^"\']{10,}["\']', - "name": "access_key", - "severity": Severity.HIGH, - }, - ] - - # Placeholders a ignorar (no son credenciales reales) - PLACEHOLDER_PATTERNS: List[str] = [ - r"YOUR_", - r"REPLACE_", - r"CHANGE_", - r"TODO", - r"FIXME", - r"example", - r"test", - r"dummy", - r"<.*>", - r"\*+", - r"xxx+", - ] - - # Algoritmos criptográficos débiles - WEAK_CRYPTO_ALGORITHMS: Set[str] = { - "md5", - "sha1", - "DES", - "RC4", - "Blowfish", - } - - SQL_INJECTION_MESSAGE = ( - "Posible vulnerabilidad de inyección SQL detectada - " - "entrada de usuario concatenada o formateada en consulta" - ) - SQL_INJECTION_SUGGESTION = ( - "Use parameterized queries or an ORM: " - "cursor.execute('SELECT * FROM users WHERE id=?', (user_id,))" - ) - - def __init__(self): - """Inicializa SecurityAgent con reglas de seguridad predefinidas.""" - super().__init__(name="SecurityAgent", version="1.0.0", category="security", enabled=True) - self.logger.info("SecurityAgent inicializado con 4 módulos de detección") - - def analyze(self, context: AnalysisContext) -> List[Finding]: - """ - Analiza código Python en busca de vulnerabilidades de seguridad. - - Ejecuta 4 tipos de análisis de seguridad: - 1. Detección de funciones peligrosas (eval, exec, etc.) - 2. Detección de patrones de inyección SQL - 3. Detección de credenciales hardcodeadas - 4. Detección de criptografía débil - - Args: - context: Contexto de análisis que contiene el código y metadata - - Returns: - Lista de hallazgos de seguridad, ordenados por severidad (CRITICAL primero) - - Raises: - SyntaxError: Si el código tiene sintaxis Python inválida (se registra, no se lanza) - - Ejemplo: - >>> agent = SecurityAgent() - >>> context = AnalysisContext( - ... code_content="password = 'MySecret123'", - ... filename="config.py" - ... ) - >>> findings = agent.analyze(context) - >>> assert any(f.issue_type == "hardcoded_credentials" for f in findings) - """ - self.log_info(f"Iniciando análisis de seguridad para {context.filename}") - findings: List[Finding] = [] - - try: - # Módulo 1: Detectar funciones peligrosas - dangerous_findings = self._detect_dangerous_functions(context) - findings.extend(dangerous_findings) - self.log_debug(f"Funciones peligrosas: {len(dangerous_findings)} hallazgos") - - # Módulo 2: Detectar patrones de inyección SQL (regex + AST) - sql_findings = self._detect_sql_injection(context) - findings.extend(sql_findings) - self.log_debug(f"Inyección SQL: {len(sql_findings)} hallazgos") - - # Módulo 3: Detectar credenciales hardcodeadas - credential_findings = self._detect_hardcoded_credentials(context) - findings.extend(credential_findings) - self.log_debug(f"Credenciales hardcodeadas: {len(credential_findings)} hallazgos") - - # Módulo 4: Detectar criptografía débil - crypto_findings = self._detect_weak_crypto(context) - findings.extend(crypto_findings) - self.log_debug(f"Criptografía débil: {len(crypto_findings)} hallazgos") - - except SyntaxError as e: - self.log_error( - f"Error de sintaxis en {context.filename}: {e}. " - "Algunos módulos de análisis pueden tener resultados incompletos." - ) - # Continuar con hallazgos de módulos que no necesitan análisis AST - - # Ordenar hallazgos por severidad (CRITICAL primero) - findings.sort( - key=lambda f: (["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"].index(f.severity.value)) - ) - - self.log_info( - f"Análisis de seguridad completado: {len(findings)} hallazgos " - f"({sum(1 for f in findings if f.is_critical)} críticos)" - ) - - return findings - - def _detect_dangerous_functions(self, context: AnalysisContext) -> List[Finding]: - """ - Detecta funciones peligrosas como eval(), exec() usando análisis AST. - - Args: - context: Contexto de análisis con el código a analizar - - Returns: - Lista de hallazgos para uso de funciones peligrosas - """ - findings: List[Finding] = [] - - try: - tree = ast.parse(context.code_content) - - for node in ast.walk(tree): - if isinstance(node, ast.Call): - func_name = self._get_function_name(node) - - # Verificar funciones peligrosas directas - if func_name in self.DANGEROUS_FUNCTIONS: - finding = Finding( - severity=Severity.CRITICAL, - issue_type="dangerous_function", - message=( - f"Uso de {func_name}() detectado - " - "permite ejecución arbitraria de código" - ), - line_number=node.lineno, - code_snippet=self._get_code_snippet(context, node.lineno), - suggestion=self._get_dangerous_function_suggestion(func_name), - agent_name=self.name, - rule_id=f"SEC001_{func_name.upper()}", - ) - findings.append(finding) - - # Verificar funciones de pickle/serialización - elif func_name in self.PICKLE_FUNCTIONS: - finding = Finding( - severity=Severity.HIGH, - issue_type="unsafe_deserialization", - message=( - f"Uso de {func_name} detectado - " - "puede ejecutar código arbitrario durante " - "deserialización" - ), - line_number=node.lineno, - code_snippet=self._get_code_snippet(context, node.lineno), - suggestion=( - "Use json.loads() for data deserialization or " - "validate pickle sources" - ), - agent_name=self.name, - rule_id="SEC001_PICKLE", - ) - findings.append(finding) - - except SyntaxError: - # El código fuente puede estar incompleto o contener errores de sintaxis. - # Ignoramos el error porque no se puede analizar AST en código inválido. - pass - - return findings - - def _detect_sql_injection(self, context: AnalysisContext) -> List[Finding]: - """ - Detecta vulnerabilidades de inyección SQL usando patrones regex mejorados. - - Detecta múltiples patrones comunes de SQL injection: - - Concatenación de strings con + - - Formateo con %s - - F-strings con {} - - .format() en queries - - Palabras clave SQL con variables - - Args: - context: Contexto de análisis con el código a analizar - - Returns: - Lista de hallazgos para vulnerabilidades de inyección SQL - """ - findings: List[Finding] = [] - found_sql_lines: Set[int] = set() - - findings.extend(self._detect_sql_injection_patterns(context, found_sql_lines)) - findings.extend(self._detect_sql_injection_ast(context, found_sql_lines)) - return findings - - def _detect_sql_injection_patterns( - self, context: AnalysisContext, found_sql_lines: Set[int] - ) -> List[Finding]: - """Analiza línea por línea usando regex para detectar SQL injection directa.""" - findings: List[Finding] = [] - lines = context.code_content.splitlines() - - for line_num, line in enumerate(lines, start=1): - stripped = line.strip() - if not stripped or stripped.startswith("#") or line_num in found_sql_lines: - continue - - for pattern in self.SQL_INJECTION_PATTERNS: - if re.search(pattern, line, re.IGNORECASE | re.MULTILINE): - findings.append( - Finding( - severity=Severity.HIGH, - issue_type="sql_injection", - message=self.SQL_INJECTION_MESSAGE, - line_number=line_num, - code_snippet=stripped, - suggestion=self.SQL_INJECTION_SUGGESTION, - agent_name=self.name, - rule_id="SEC002_SQL_INJECTION", - ) - ) - found_sql_lines.add(line_num) - break - - return findings - - def _detect_sql_injection_ast( - self, context: AnalysisContext, found_sql_lines: Set[int] - ) -> List[Finding]: - """Analiza el AST para detectar queries construidas antes de ejecutar.""" - findings: List[Finding] = [] - suspicious_vars = self._collect_suspicious_query_assignments(context) - if not suspicious_vars: - return findings - - execute_calls = self._find_execute_calls(context) - for line_num, argument in execute_calls: - if line_num not in found_sql_lines and self._is_suspicious_execute_arg( - argument, suspicious_vars - ): - findings.append( - Finding( - severity=Severity.HIGH, - issue_type="sql_injection", - message=self.SQL_INJECTION_MESSAGE, - line_number=line_num, - code_snippet=self._get_code_snippet(context, line_num), - suggestion=self.SQL_INJECTION_SUGGESTION, - agent_name=self.name, - rule_id="SEC002_SQL_INJECTION", - ) - ) - found_sql_lines.add(line_num) - - return findings - - @staticmethod - def _collect_suspicious_query_assignments( - context: AnalysisContext, - ) -> Dict[str, str]: - """Construye un mapa de variables que contienen posibles queries inseguras.""" - suspicious_vars: Dict[str, str] = {} - try: - tree = ast.parse(context.code_content) - except SyntaxError: - return suspicious_vars - - for node in ast.walk(tree): - if isinstance(node, ast.Assign) and node.targets: - target = node.targets[0] - if isinstance(target, ast.Name): - assignment_type = SecurityAgent._classify_sql_assignment(node.value) - if assignment_type: - suspicious_vars[target.id] = assignment_type - return suspicious_vars - - @staticmethod - def _find_execute_calls(context: AnalysisContext) -> List[tuple[int, ast.AST]]: - """Obtiene las llamadas a execute() con su línea y primer argumento.""" - execute_calls: List[tuple[int, ast.AST]] = [] - try: - tree = ast.parse(context.code_content) - except SyntaxError: - return execute_calls - - for node in ast.walk(tree): - if ( - isinstance(node, ast.Call) - and isinstance(node.func, ast.Attribute) - and node.func.attr == "execute" - and node.args - ): - line_num = getattr(node, "lineno", 1) - execute_calls.append((line_num, node.args[0])) - return execute_calls - - @staticmethod - def _classify_sql_assignment(value: ast.AST) -> Optional[str]: - """Clasifica asignaciones sospechosas de queries.""" - if isinstance(value, ast.JoinedStr): - return "fstring" - if isinstance(value, ast.BinOp) and isinstance(value.op, ast.Add): - return "concat" - if isinstance(value, ast.BinOp) and isinstance(value.op, ast.Mod): - return "mod" - if ( - isinstance(value, ast.Call) - and isinstance(value.func, ast.Attribute) - and value.func.attr == "format" - ): - return "format" - return None - - @staticmethod - def _is_suspicious_execute_arg(arg: ast.AST, suspicious_vars: Dict[str, str]) -> bool: - """Determina si el argumento pasado a execute es potencialmente inseguro.""" - if isinstance(arg, ast.JoinedStr): - return True - if isinstance(arg, ast.BinOp) and isinstance(arg.op, (ast.Add, ast.Mod)): - return True - if ( - isinstance(arg, ast.Call) - and isinstance(arg.func, ast.Attribute) - and arg.func.attr == "format" - ): - return True - if isinstance(arg, ast.Name) and arg.id in suspicious_vars: - return True - return False - - def _detect_hardcoded_credentials(self, context: AnalysisContext) -> List[Finding]: - """ - Detecta credenciales hardcodeadas usando patrones regex y detección de placeholders. - - Busca patrones comunes como: - - password = "valor" - - api_key = "valor" - - secret_key = "valor" - - token = "valor" - - Filtra falsos positivos ignorando placeholders y valores cortos. - - Args: - context: Contexto de análisis con el código a analizar - - Returns: - Lista de hallazgos para credenciales hardcodeadas - """ - findings: List[Finding] = [] - lines = context.code_content.splitlines() - - for line_num, line in enumerate(lines, start=1): - # Saltar comentarios y líneas vacías - stripped = line.strip() - if not stripped or stripped.startswith("#"): - continue - - for cred_config in self.CREDENTIAL_PATTERNS: - pattern = cred_config["pattern"] - cred_name = cred_config["name"] - severity = cred_config["severity"] - - match = re.search(pattern, line, re.IGNORECASE) - if match: - value = match.group(0).split("=")[1].strip().strip("\"'") - if self._is_placeholder(value) or len(value) < 8: - continue - - env_var = cred_name.upper() - finding = Finding( - severity=severity, - issue_type="hardcoded_credentials", - message=( - f"Hardcoded {cred_name} detected - secrets " - "should not be in source code" - ), - line_number=line_num, - code_snippet=line.strip(), - suggestion=( - f"Use environment variables: {env_var} = " f"os.getenv('{env_var}')" - ), - agent_name=self.name, - rule_id=f"SEC003_{env_var}", - ) - findings.append(finding) - break # Solo un hallazgo por línea - - return findings - - def _detect_weak_crypto(self, context: AnalysisContext) -> List[Finding]: - """ - Detecta uso de algoritmos criptográficos débiles. - - Busca uso de: - - hashlib.md5() - - hashlib.sha1() - - Crypto.Cipher.DES - - RC4 - - Blowfish - - Args: - context: Contexto de análisis con el código a analizar - - Returns: - Lista de hallazgos para criptografía débil - """ - findings: List[Finding] = [] - - try: - tree = ast.parse(context.code_content) - - for node in ast.walk(tree): - if isinstance(node, ast.Call): - func_name = self._get_function_name(node) - if not func_name: - continue - - lower_name = func_name.lower() - - # Verificar funciones débiles de hash (md5 o sha1 en cualquiera de sus formas) - if "md5" in lower_name or "sha1" in lower_name: - algo = "MD5" if "md5" in lower_name else "SHA1" - finding = Finding( - severity=Severity.MEDIUM, - issue_type="weak_cryptography", - message=f"Uso de algoritmo de hash débil {algo} detectado", - line_number=node.lineno, - code_snippet=self._get_code_snippet(context, node.lineno), - suggestion="Usa SHA-256 o superior: hashlib.sha256()", - agent_name=self.name, - rule_id=f"SEC004_{algo}", - ) - findings.append(finding) - continue - - # Verificar algoritmos débiles de encriptación en librería Crypto - if any(weak in func_name for weak in ["DES", "RC4", "Blowfish"]): - finding = Finding( - severity=Severity.HIGH, - issue_type="weak_cryptography", - message=( - "Uso de algoritmo de encriptación débil " f"detectado: {func_name}" - ), - line_number=node.lineno, - code_snippet=self._get_code_snippet(context, node.lineno), - suggestion="Usa AES-256 con Crypto.Cipher.AES", - agent_name=self.name, - rule_id="SEC004_WEAK_ENCRYPTION", - ) - findings.append(finding) - - except SyntaxError: - # El código fuente puede estar incompleto o contener errores de sintaxis. - # Ignoramos el error porque no se puede analizar criptografía en código inválido. - pass - - return findings - - def _get_function_name(self, node: ast.Call) -> str: - """ - Extrae el nombre de la función de un nodo Call del AST. - - Maneja tanto llamadas simples (func()) como llamadas de atributo (module.func()). - - Args: - node: Nodo Call del AST - - Returns: - Nombre de la función como string (ej: "eval" o "hashlib.md5") - """ - if isinstance(node.func, ast.Name): - return node.func.id - if isinstance(node.func, ast.Attribute): - if isinstance(node.func.value, ast.Name): - return f"{node.func.value.id}.{node.func.attr}" - return node.func.attr - return "" - - def _get_code_snippet( - self, context: AnalysisContext, line_number: int, context_lines: int = 0 - ) -> str: - """ - Extrae fragmento de código alrededor de la línea especificada. - - Args: - context: Contexto de análisis con el código - line_number: Número de línea (1-based) a extraer - context_lines: Número de líneas antes/después a incluir - - Returns: - Fragmento de código como string - """ - lines = context.code_content.splitlines() - - if 1 <= line_number <= len(lines): - start = max(0, line_number - 1 - context_lines) - end = min(len(lines), line_number + context_lines) - snippet_lines = lines[start:end] - return "\n".join(snippet_lines) - - return "" - - def _get_dangerous_function_suggestion(self, func_name: str) -> str: - """ - Obtiene sugerencia específica para el uso de función peligrosa. - - Args: - func_name: Nombre de la función peligrosa - - Returns: - String con sugerencia de alternativa segura - """ - suggestions = { - "eval": "Use ast.literal_eval() for safe evaluation of literals", - "exec": "Avoid exec() or validate input strictly with whitelisting", - "compile": "Avoid compile() or validate source code strictly", - "__import__": "Use importlib.import_module() with validation", - "execfile": "Use with open() and exec() with strict validation (Python 2 only)", - } - return suggestions.get(func_name, f"Avoid using {func_name}() or validate input strictly") - - def _is_placeholder(self, value: str) -> bool: - """ - Verifica si un valor de credencial es un placeholder (no un secreto real). - - Ignora valores que contienen patrones comunes de placeholders como: - - YOUR_, REPLACE_, CHANGE_ - - TODO, FIXME - - example, test, dummy - - Args: - value: Valor de credencial a verificar - - Returns: - True si el valor es un placeholder, False en caso contrario - """ - value_lower = value.lower() - - for pattern in self.PLACEHOLDER_PATTERNS: - if re.search(pattern, value_lower, re.IGNORECASE): - return True - - return False +""" +SecurityAgent - Agente especializado en detección de vulnerabilidades de seguridad. + +Este agente analiza código Python en busca de problemas de seguridad comunes incluyendo: +- Funciones peligrosas (eval, exec, pickle, etc.) +- Vulnerabilidades de inyección SQL +- Credenciales hardcodeadas (contraseñas, API keys, tokens) +- Algoritmos criptográficos débiles (MD5, SHA1, DES) +""" + +import ast +import re +from typing import Dict, List, Optional, Set + +from src.agents.base_agent import BaseAgent +from src.schemas.analysis import AnalysisContext +from src.schemas.finding import Finding, Severity + + +class SecurityAgent(BaseAgent): + """ + Agente especializado en detectar vulnerabilidades de seguridad en código Python. + + Analiza el código usando múltiples estrategias de detección: + 1. Análisis AST (Abstract Syntax Tree) para funciones peligrosas + 2. Coincidencia de patrones regex para inyección SQL + 3. Regex y detección de placeholders para credenciales hardcodeadas + 4. Análisis AST para algoritmos criptográficos débiles + + Atributos: + DANGEROUS_FUNCTIONS: Conjunto de nombres de funciones consideradas peligrosas + SQL_INJECTION_PATTERNS: Patrones regex para detección de inyección SQL + CREDENTIAL_PATTERNS: Patrones regex para detección de credenciales + WEAK_CRYPTO_ALGORITHMS: Conjunto de nombres de algoritmos criptográficos débiles + + Ejemplo: + >>> agent = SecurityAgent() + >>> context = AnalysisContext( + ... code_content="result = eval(user_input)", + ... filename="vulnerable.py" + ... ) + >>> findings = agent.analyze(context) + >>> assert len(findings) >= 1 + >>> assert findings[0].severity == Severity.CRITICAL + """ + + # Funciones peligrosas que permiten ejecución arbitraria de código + DANGEROUS_FUNCTIONS: Set[str] = { + "eval", + "exec", + "compile", + "__import__", + "execfile", # Python 2 + } + + # Funciones peligrosas de pickle/serialización + PICKLE_FUNCTIONS: Set[str] = { + "pickle.loads", + "pickle.load", + "cPickle.loads", + "cPickle.load", + "yaml.load", # Sin argumento Loader + "marshal.loads", + } + + # Patrones de inyección SQL (regex) - CORREGIDOS + SQL_INJECTION_PATTERNS: List[str] = [ + r'execute\s*\(\s*["\'].*\+', # Concatenación con + + r'execute\s*\(\s*f["\']', # f-strings en execute directo + r'execute\s*\(\s*["\'].*%s', # %s formatting + r'execute\s*\(\s*["\'].*\.format', # .format() en execute + r'\.execute\s*\(\s*["\'].*\+\s*\w', # execute con concatenación y variable + ] + + # Patrones de credenciales (regex) + CREDENTIAL_PATTERNS: List[dict] = [ + { + "pattern": r'password\s*=\s*["\'][^"\']{8,}["\']', + "name": "password", + "severity": Severity.CRITICAL, + }, + { + "pattern": r'api[_-]?key\s*=\s*["\'][^"\']{10,}["\']', + "name": "api_key", + "severity": Severity.CRITICAL, + }, + { + "pattern": r'secret[_-]?key\s*=\s*["\'][^"\']{10,}["\']', + "name": "secret_key", + "severity": Severity.CRITICAL, + }, + { + "pattern": r'token\s*=\s*["\'][^"\']{10,}["\']', + "name": "token", + "severity": Severity.HIGH, + }, + { + "pattern": r'access[_-]?key\s*=\s*["\'][^"\']{10,}["\']', + "name": "access_key", + "severity": Severity.HIGH, + }, + ] + + # Placeholders a ignorar (no son credenciales reales) + PLACEHOLDER_PATTERNS: List[str] = [ + r"YOUR_", + r"REPLACE_", + r"CHANGE_", + r"TODO", + r"FIXME", + r"example", + r"test", + r"dummy", + r"<.*>", + r"\*+", + r"xxx+", + ] + + # Algoritmos criptográficos débiles + WEAK_CRYPTO_ALGORITHMS: Set[str] = { + "md5", + "sha1", + "DES", + "RC4", + "Blowfish", + } + + SQL_INJECTION_MESSAGE = ( + "Posible vulnerabilidad de inyección SQL detectada - " + "entrada de usuario concatenada o formateada en consulta" + ) + SQL_INJECTION_SUGGESTION = ( + "Use parameterized queries or an ORM: " + "cursor.execute('SELECT * FROM users WHERE id=?', (user_id,))" + ) + + def __init__(self): + """Inicializa SecurityAgent con reglas de seguridad predefinidas.""" + super().__init__(name="SecurityAgent", version="1.0.0", category="security", enabled=True) + self.logger.info("SecurityAgent inicializado con 4 módulos de detección") + + def analyze(self, context: AnalysisContext) -> List[Finding]: + """ + Analiza código Python en busca de vulnerabilidades de seguridad. + + Ejecuta 4 tipos de análisis de seguridad: + 1. Detección de funciones peligrosas (eval, exec, etc.) + 2. Detección de patrones de inyección SQL + 3. Detección de credenciales hardcodeadas + 4. Detección de criptografía débil + + Args: + context: Contexto de análisis que contiene el código y metadata + + Returns: + Lista de hallazgos de seguridad, ordenados por severidad (CRITICAL primero) + + Raises: + SyntaxError: Si el código tiene sintaxis Python inválida (se registra, no se lanza) + + Ejemplo: + >>> agent = SecurityAgent() + >>> context = AnalysisContext( + ... code_content="password = 'MySecret123'", + ... filename="config.py" + ... ) + >>> findings = agent.analyze(context) + >>> assert any(f.issue_type == "hardcoded_credentials" for f in findings) + """ + self.log_info(f"Iniciando análisis de seguridad para {context.filename}") + findings: List[Finding] = [] + + try: + # Módulo 1: Detectar funciones peligrosas + dangerous_findings = self._detect_dangerous_functions(context) + findings.extend(dangerous_findings) + self.log_debug(f"Funciones peligrosas: {len(dangerous_findings)} hallazgos") + + # Módulo 2: Detectar patrones de inyección SQL (regex + AST) + sql_findings = self._detect_sql_injection(context) + findings.extend(sql_findings) + self.log_debug(f"Inyección SQL: {len(sql_findings)} hallazgos") + + # Módulo 3: Detectar credenciales hardcodeadas + credential_findings = self._detect_hardcoded_credentials(context) + findings.extend(credential_findings) + self.log_debug(f"Credenciales hardcodeadas: {len(credential_findings)} hallazgos") + + # Módulo 4: Detectar criptografía débil + crypto_findings = self._detect_weak_crypto(context) + findings.extend(crypto_findings) + self.log_debug(f"Criptografía débil: {len(crypto_findings)} hallazgos") + + except SyntaxError as e: + self.log_error( + f"Error de sintaxis en {context.filename}: {e}. " + "Algunos módulos de análisis pueden tener resultados incompletos." + ) + # Continuar con hallazgos de módulos que no necesitan análisis AST + + # Ordenar hallazgos por severidad (CRITICAL primero) + findings.sort( + key=lambda f: (["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"].index(f.severity.value)) + ) + + self.log_info( + f"Análisis de seguridad completado: {len(findings)} hallazgos " + f"({sum(1 for f in findings if f.is_critical)} críticos)" + ) + + return findings + + def _detect_dangerous_functions(self, context: AnalysisContext) -> List[Finding]: + """ + Detecta funciones peligrosas como eval(), exec() usando análisis AST. + + Args: + context: Contexto de análisis con el código a analizar + + Returns: + Lista de hallazgos para uso de funciones peligrosas + """ + findings: List[Finding] = [] + + try: + tree = ast.parse(context.code_content) + + for node in ast.walk(tree): + if isinstance(node, ast.Call): + func_name = self._get_function_name(node) + + # Verificar funciones peligrosas directas + if func_name in self.DANGEROUS_FUNCTIONS: + finding = Finding( + severity=Severity.CRITICAL, + issue_type="dangerous_function", + message=( + f"Uso de {func_name}() detectado - " + "permite ejecución arbitraria de código" + ), + line_number=node.lineno, + code_snippet=self._get_code_snippet(context, node.lineno), + suggestion=self._get_dangerous_function_suggestion(func_name), + agent_name=self.name, + rule_id=f"SEC001_{func_name.upper()}", + ) + findings.append(finding) + + # Verificar funciones de pickle/serialización + elif func_name in self.PICKLE_FUNCTIONS: + finding = Finding( + severity=Severity.HIGH, + issue_type="unsafe_deserialization", + message=( + f"Uso de {func_name} detectado - " + "puede ejecutar código arbitrario durante " + "deserialización" + ), + line_number=node.lineno, + code_snippet=self._get_code_snippet(context, node.lineno), + suggestion=( + "Use json.loads() for data deserialization or " + "validate pickle sources" + ), + agent_name=self.name, + rule_id="SEC001_PICKLE", + ) + findings.append(finding) + + except SyntaxError: + # El código fuente puede estar incompleto o contener errores de sintaxis. + # Ignoramos el error porque no se puede analizar AST en código inválido. + pass + + return findings + + def _detect_sql_injection(self, context: AnalysisContext) -> List[Finding]: + """ + Detecta vulnerabilidades de inyección SQL usando patrones regex mejorados. + + Detecta múltiples patrones comunes de SQL injection: + - Concatenación de strings con + + - Formateo con %s + - F-strings con {} + - .format() en queries + - Palabras clave SQL con variables + + Args: + context: Contexto de análisis con el código a analizar + + Returns: + Lista de hallazgos para vulnerabilidades de inyección SQL + """ + findings: List[Finding] = [] + found_sql_lines: Set[int] = set() + + findings.extend(self._detect_sql_injection_patterns(context, found_sql_lines)) + findings.extend(self._detect_sql_injection_ast(context, found_sql_lines)) + return findings + + def _detect_sql_injection_patterns( + self, context: AnalysisContext, found_sql_lines: Set[int] + ) -> List[Finding]: + """Analiza línea por línea usando regex para detectar SQL injection directa.""" + findings: List[Finding] = [] + lines = context.code_content.splitlines() + + for line_num, line in enumerate(lines, start=1): + stripped = line.strip() + if not stripped or stripped.startswith("#") or line_num in found_sql_lines: + continue + + for pattern in self.SQL_INJECTION_PATTERNS: + if re.search(pattern, line, re.IGNORECASE | re.MULTILINE): + findings.append( + Finding( + severity=Severity.HIGH, + issue_type="sql_injection", + message=self.SQL_INJECTION_MESSAGE, + line_number=line_num, + code_snippet=stripped, + suggestion=self.SQL_INJECTION_SUGGESTION, + agent_name=self.name, + rule_id="SEC002_SQL_INJECTION", + ) + ) + found_sql_lines.add(line_num) + break + + return findings + + def _detect_sql_injection_ast( + self, context: AnalysisContext, found_sql_lines: Set[int] + ) -> List[Finding]: + """Analiza el AST para detectar queries construidas antes de ejecutar.""" + findings: List[Finding] = [] + suspicious_vars = self._collect_suspicious_query_assignments(context) + if not suspicious_vars: + return findings + + execute_calls = self._find_execute_calls(context) + for line_num, argument in execute_calls: + if line_num not in found_sql_lines and self._is_suspicious_execute_arg( + argument, suspicious_vars + ): + findings.append( + Finding( + severity=Severity.HIGH, + issue_type="sql_injection", + message=self.SQL_INJECTION_MESSAGE, + line_number=line_num, + code_snippet=self._get_code_snippet(context, line_num), + suggestion=self.SQL_INJECTION_SUGGESTION, + agent_name=self.name, + rule_id="SEC002_SQL_INJECTION", + ) + ) + found_sql_lines.add(line_num) + + return findings + + @staticmethod + def _collect_suspicious_query_assignments( + context: AnalysisContext, + ) -> Dict[str, str]: + """Construye un mapa de variables que contienen posibles queries inseguras.""" + suspicious_vars: Dict[str, str] = {} + try: + tree = ast.parse(context.code_content) + except SyntaxError: + return suspicious_vars + + for node in ast.walk(tree): + if isinstance(node, ast.Assign) and node.targets: + target = node.targets[0] + if isinstance(target, ast.Name): + assignment_type = SecurityAgent._classify_sql_assignment(node.value) + if assignment_type: + suspicious_vars[target.id] = assignment_type + return suspicious_vars + + @staticmethod + def _find_execute_calls(context: AnalysisContext) -> List[tuple[int, ast.AST]]: + """Obtiene las llamadas a execute() con su línea y primer argumento.""" + execute_calls: List[tuple[int, ast.AST]] = [] + try: + tree = ast.parse(context.code_content) + except SyntaxError: + return execute_calls + + for node in ast.walk(tree): + if ( + isinstance(node, ast.Call) + and isinstance(node.func, ast.Attribute) + and node.func.attr == "execute" + and node.args + ): + line_num = getattr(node, "lineno", 1) + execute_calls.append((line_num, node.args[0])) + return execute_calls + + @staticmethod + def _classify_sql_assignment(value: ast.AST) -> Optional[str]: + """Clasifica asignaciones sospechosas de queries.""" + if isinstance(value, ast.JoinedStr): + return "fstring" + if isinstance(value, ast.BinOp) and isinstance(value.op, ast.Add): + return "concat" + if isinstance(value, ast.BinOp) and isinstance(value.op, ast.Mod): + return "mod" + if ( + isinstance(value, ast.Call) + and isinstance(value.func, ast.Attribute) + and value.func.attr == "format" + ): + return "format" + return None + + @staticmethod + def _is_suspicious_execute_arg(arg: ast.AST, suspicious_vars: Dict[str, str]) -> bool: + """Determina si el argumento pasado a execute es potencialmente inseguro.""" + if isinstance(arg, ast.JoinedStr): + return True + if isinstance(arg, ast.BinOp) and isinstance(arg.op, (ast.Add, ast.Mod)): + return True + if ( + isinstance(arg, ast.Call) + and isinstance(arg.func, ast.Attribute) + and arg.func.attr == "format" + ): + return True + if isinstance(arg, ast.Name) and arg.id in suspicious_vars: + return True + return False + + def _detect_hardcoded_credentials(self, context: AnalysisContext) -> List[Finding]: + """ + Detecta credenciales hardcodeadas usando patrones regex y detección de placeholders. + + Busca patrones comunes como: + - password = "valor" + - api_key = "valor" + - secret_key = "valor" + - token = "valor" + + Filtra falsos positivos ignorando placeholders y valores cortos. + + Args: + context: Contexto de análisis con el código a analizar + + Returns: + Lista de hallazgos para credenciales hardcodeadas + """ + findings: List[Finding] = [] + lines = context.code_content.splitlines() + + for line_num, line in enumerate(lines, start=1): + # Saltar comentarios y líneas vacías + stripped = line.strip() + if not stripped or stripped.startswith("#"): + continue + + for cred_config in self.CREDENTIAL_PATTERNS: + pattern = cred_config["pattern"] + cred_name = cred_config["name"] + severity = cred_config["severity"] + + match = re.search(pattern, line, re.IGNORECASE) + if match: + value = match.group(0).split("=")[1].strip().strip("\"'") + if self._is_placeholder(value) or len(value) < 8: + continue + + env_var = cred_name.upper() + finding = Finding( + severity=severity, + issue_type="hardcoded_credentials", + message=( + f"Hardcoded {cred_name} detected - secrets " + "should not be in source code" + ), + line_number=line_num, + code_snippet=line.strip(), + suggestion=( + f"Use environment variables: {env_var} = " f"os.getenv('{env_var}')" + ), + agent_name=self.name, + rule_id=f"SEC003_{env_var}", + ) + findings.append(finding) + break # Solo un hallazgo por línea + + return findings + + def _detect_weak_crypto(self, context: AnalysisContext) -> List[Finding]: + """ + Detecta uso de algoritmos criptográficos débiles. + + Busca uso de: + - hashlib.md5() + - hashlib.sha1() + - Crypto.Cipher.DES + - RC4 + - Blowfish + + Args: + context: Contexto de análisis con el código a analizar + + Returns: + Lista de hallazgos para criptografía débil + """ + findings: List[Finding] = [] + + try: + tree = ast.parse(context.code_content) + + for node in ast.walk(tree): + if isinstance(node, ast.Call): + func_name = self._get_function_name(node) + if not func_name: + continue + + lower_name = func_name.lower() + + # Verificar funciones débiles de hash (md5 o sha1 en cualquiera de sus formas) + if "md5" in lower_name or "sha1" in lower_name: + algo = "MD5" if "md5" in lower_name else "SHA1" + finding = Finding( + severity=Severity.MEDIUM, + issue_type="weak_cryptography", + message=f"Uso de algoritmo de hash débil {algo} detectado", + line_number=node.lineno, + code_snippet=self._get_code_snippet(context, node.lineno), + suggestion="Usa SHA-256 o superior: hashlib.sha256()", + agent_name=self.name, + rule_id=f"SEC004_{algo}", + ) + findings.append(finding) + continue + + # Verificar algoritmos débiles de encriptación en librería Crypto + if any(weak in func_name for weak in ["DES", "RC4", "Blowfish"]): + finding = Finding( + severity=Severity.HIGH, + issue_type="weak_cryptography", + message=( + "Uso de algoritmo de encriptación débil " f"detectado: {func_name}" + ), + line_number=node.lineno, + code_snippet=self._get_code_snippet(context, node.lineno), + suggestion="Usa AES-256 con Crypto.Cipher.AES", + agent_name=self.name, + rule_id="SEC004_WEAK_ENCRYPTION", + ) + findings.append(finding) + + except SyntaxError: + # El código fuente puede estar incompleto o contener errores de sintaxis. + # Ignoramos el error porque no se puede analizar criptografía en código inválido. + pass + + return findings + + def _get_function_name(self, node: ast.Call) -> str: + """ + Extrae el nombre de la función de un nodo Call del AST. + + Maneja tanto llamadas simples (func()) como llamadas de atributo (module.func()). + + Args: + node: Nodo Call del AST + + Returns: + Nombre de la función como string (ej: "eval" o "hashlib.md5") + """ + if isinstance(node.func, ast.Name): + return node.func.id + if isinstance(node.func, ast.Attribute): + if isinstance(node.func.value, ast.Name): + return f"{node.func.value.id}.{node.func.attr}" + return node.func.attr + return "" + + def _get_code_snippet( + self, context: AnalysisContext, line_number: int, context_lines: int = 0 + ) -> str: + """ + Extrae fragmento de código alrededor de la línea especificada. + + Args: + context: Contexto de análisis con el código + line_number: Número de línea (1-based) a extraer + context_lines: Número de líneas antes/después a incluir + + Returns: + Fragmento de código como string + """ + lines = context.code_content.splitlines() + + if 1 <= line_number <= len(lines): + start = max(0, line_number - 1 - context_lines) + end = min(len(lines), line_number + context_lines) + snippet_lines = lines[start:end] + return "\n".join(snippet_lines) + + return "" + + def _get_dangerous_function_suggestion(self, func_name: str) -> str: + """ + Obtiene sugerencia específica para el uso de función peligrosa. + + Args: + func_name: Nombre de la función peligrosa + + Returns: + String con sugerencia de alternativa segura + """ + suggestions = { + "eval": "Use ast.literal_eval() for safe evaluation of literals", + "exec": "Avoid exec() or validate input strictly with whitelisting", + "compile": "Avoid compile() or validate source code strictly", + "__import__": "Use importlib.import_module() with validation", + "execfile": "Use with open() and exec() with strict validation (Python 2 only)", + } + return suggestions.get(func_name, f"Avoid using {func_name}() or validate input strictly") + + def _is_placeholder(self, value: str) -> bool: + """ + Verifica si un valor de credencial es un placeholder (no un secreto real). + + Ignora valores que contienen patrones comunes de placeholders como: + - YOUR_, REPLACE_, CHANGE_ + - TODO, FIXME + - example, test, dummy + + Args: + value: Valor de credencial a verificar + + Returns: + True si el valor es un placeholder, False en caso contrario + """ + value_lower = value.lower() + + for pattern in self.PLACEHOLDER_PATTERNS: + if re.search(pattern, value_lower, re.IGNORECASE): + return True + + return False diff --git a/backend/src/agents/style_agent.py b/backend/src/agents/style_agent.py index 75cf4df..a37c876 100644 --- a/backend/src/agents/style_agent.py +++ b/backend/src/agents/style_agent.py @@ -1,563 +1,563 @@ -""" -StyleAgent - Agente especializado en deteccion de violaciones de estilo PEP 8. - -Este agente analiza codigo Python en busca de problemas de estilo incluyendo: -- Violaciones de PEP 8 (longitud de linea, espacios, etc.) -- Docstrings faltantes en funciones y clases publicas -- Organizacion y uso de imports -- Convenciones de nombres -(PEP 8: snake_case para funciones/variables, PascalCase para clases) -- Hallazgos externos de Pylint y Flake8 -""" - -import ast -import re -from typing import Dict, List, Set - -from src.agents.analyzers import flake8_analyzer, pylint_analyzer -from src.agents.base_agent import BaseAgent -from src.schemas.analysis import AnalysisContext -from src.schemas.finding import Finding, Severity - - -class StyleAgent(BaseAgent): - """Agente especializado en detectar violaciones de estilo en codigo Python. - - Analiza el codigo usando multiples estrategias: - 1. Analisis de lineas (longitud, espacios) - 2. Analisis AST para docstrings en funciones y clases publicas - 3. Analisis AST para organizacion de imports - 4. Analisis AST para convenciones de nombres PEP 8 - 5. Integracion interna con Pylint y Flake8 - """ - - # Limite por defecto de longitud de linea - LINE_LENGTH_LIMIT: int = 88 - - # Patrones de nombres PEP 8 - FUNCTION_NAME_PATTERN: str = r"^[a-z_][a-z0-9_]*$" - CLASS_NAME_PATTERN: str = r"^[A-Z][a-zA-Z0-9]*$" - CONSTANT_NAME_PATTERN: str = r"^[A-Z_][A-Z0-9_]*$" - VARIABLE_NAME_PATTERN: str = r"^[a-z_][a-z0-9_]*$" - - def __init__(self) -> None: - """ - Inicializa StyleAgent con configuracion por defecto. - """ - super().__init__(name="StyleAgent", version="1.0.0", category="style", enabled=True) - # Usa el limite de clase por defecto - self.line_length_limit = self.LINE_LENGTH_LIMIT - # Analizadores externos - self.pylint_analyzer = pylint_analyzer.PylintAnalyzer() - self.flake8_analyzer = flake8_analyzer.Flake8Analyzer() - self.logger.info("StyleAgent inicializado con 6 modulos de analisis de estilo.") - - def analyze(self, context: AnalysisContext) -> List[Finding]: - """ - Analiza codigo Python en busca de violaciones de estilo. - - Ejecuta varios chequeos de estilo: - 1. Longitud de linea y espacios basicos - 2. Docstrings faltantes - 3. Problemas en imports - 4. Convenciones de nombres - 5. Pylint (si esta disponibles en el entorno) - 6. Flake8 (si esta disponibles en el entorno) - - Args: - context: Contexto de analisis con el codigo a revisar. - - Returns: - Lista de Finding ordenada por numero de linea. - """ - # Emitir evento de inicio - self._emit_agent_started(context) - - self.log_info(f"Iniciando analisis de estilo para {context.filename}") - findings: List[Finding] = [] - - try: - # Modulo 1: estilo de lineas - line_findings = self._check_line_style(context) - findings.extend(line_findings) - self.log_debug(f"Estilo de lineas: {len(line_findings)} hallazgos") - - # Modulo 2: docstrings - docstring_findings = self._check_docstrings(context) - findings.extend(docstring_findings) - self.log_debug(f"Docstrings: {len(docstring_findings)} hallazgos") - - # Modulo 3: imports - import_findings = self._check_imports(context) - findings.extend(import_findings) - self.log_debug(f"Imports: {len(import_findings)} hallazgos") - - # Modulo 4: convenciones de nombres - naming_findings = self._check_naming_conventions(context) - findings.extend(naming_findings) - self.log_debug(f"Convenciones de nombres: {len(naming_findings)} hallazgos") - - # Modulo 5: Pylint interno (si disponible) - pylint_findings = self._run_pylint(context) - self.log_info(f"pylint produjo {len(pylint_findings)} hallazgos") - findings.extend(pylint_findings) - self.log_debug(f"Pylint: {len(pylint_findings)} hallazgos") - - # Modulo 6: Flake8 interno (si disponible) - flake8_findings = self._run_flake8(context) - findings.extend(flake8_findings) - self.log_debug(f"Flake8: {len(flake8_findings)} hallazgos") - - except SyntaxError as e: - self.log_error( - f"Error de sintaxis en {context.filename}: {e}. " - "Algunos modulos de analisis pueden tener resultados incompletos." - ) - # Emitir evento de fallo pero continuar con findings parciales - self._emit_agent_failed(context, e) - - except Exception as e: - self.log_error(f"Error inesperado en analisis de estilo: {e}") - self._emit_agent_failed(context, e) - raise - - # Eliminar duplicados y ordenar hallazgos por numero de linea - findings = self._remove_duplicates(findings) - findings.sort(key=lambda f: f.line_number) - - self.log_info(f"Analisis de estilo completado: {len(findings)} hallazgos") - - # Emitir evento de completado - self._emit_agent_completed(context, findings) - - return findings - - # --------------------------------------------------------------------- - # Modulo 1: estilo de lineas - # --------------------------------------------------------------------- - def _check_line_style(self, context: AnalysisContext) -> List[Finding]: - """ - Detecta problemas basicos de estilo a nivel de linea. - - Verifica: - - Lineas que exceden line_length_limit - - Espacios en blanco al final de la linea - - Caracteres de tabulacion en la indentacion - - Mas de dos lineas en blanco consecutivas - """ - findings: List[Finding] = [] - lines = context.code_content.splitlines() - blank_run = 0 - - for line_num, line in enumerate(lines, start=1): - stripped = line.rstrip("\n") - - # Contar lineas en blanco consecutivas - if stripped.strip() == "": - blank_run += 1 - else: - blank_run = 0 - - # Linea demasiado larga - if len(stripped) > self.line_length_limit: - findings.append( - Finding( - severity=Severity.LOW, - issue_type="style/pep8", - message=( - f"La linea excede la longitud maxima de " - f"{self.line_length_limit} caracteres" - ), - line_number=line_num, - code_snippet=self._get_code_snippet(context, line_num), - suggestion=( - "Divide la expresion en varias lineas o usa parentesis " - "para agrupar expresiones largas" - ), - agent_name=self.name, - rule_id="STYLE001_LINE_LENGTH", - ) - ) - - # Espacios en blanco al final de la linea - if stripped.rstrip(" \t") != stripped: - findings.append( - Finding( - severity=Severity.LOW, - issue_type="style/pep8", - message="La linea tiene espacios en blanco al final", - line_number=line_num, - code_snippet=self._get_code_snippet(context, line_num), - suggestion="Elimina espacios o tabs al final de la linea", - agent_name=self.name, - rule_id="STYLE002_TRAILING_WS", - ) - ) - - # Tabs en la indentacion - if re.match(r"^\t+", line) or re.match(r"^ +\t+", line): - findings.append( - Finding( - severity=Severity.MEDIUM, - issue_type="style/pep8", - message="Se usan caracteres de tabulacion en la indentacion", - line_number=line_num, - code_snippet=self._get_code_snippet(context, line_num), - suggestion="Usa 4 espacios por nivel de indentacion en lugar de tabs", - agent_name=self.name, - rule_id="STYLE003_TABS", - ) - ) - - # Mas de dos lineas en blanco consecutivas - if blank_run > 2: - findings.append( - Finding( - severity=Severity.LOW, - issue_type="style/pep8", - message="Hay mas de dos lineas en blanco consecutivas", - line_number=line_num, - code_snippet=self._get_code_snippet(context, line_num), - suggestion="Reduce las lineas en blanco consecutivas a maximo dos", - agent_name=self.name, - rule_id="STYLE004_BLANK_LINES", - ) - ) - - return findings - - # --------------------------------------------------------------------- - # Modulo 2: docstrings - # --------------------------------------------------------------------- - def _check_docstrings(self, context: AnalysisContext) -> List[Finding]: - """ - Detecta docstrings faltantes en funciones y clases publicas. - """ - findings: List[Finding] = [] - - try: - tree = ast.parse(context.code_content) - except SyntaxError: - return findings - - for node in ast.walk(tree): - if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): - name = node.name - if not self._is_public_member(name): - continue - - doc = ast.get_docstring(node) - if not doc: - if isinstance(node, ast.AsyncFunctionDef): - node_type = "funcion asincrona" - elif isinstance(node, ast.ClassDef): - node_type = "clase" - else: - node_type = "funcion" - - findings.append( - Finding( - severity=Severity.LOW, - issue_type="style/documentation", - message=f"La {node_type} publica '{name}' no tiene docstring", - line_number=node.lineno, - code_snippet=self._get_code_snippet(context, node.lineno), - suggestion=( - "Agrega un docstring descriptivo que explique el " - "comportamiento, parametros y valor de retorno" - ), - agent_name=self.name, - rule_id="STYLE010_MISSING_DOCSTRING", - ) - ) - - return findings - - # --------------------------------------------------------------------- - # Modulo 3: imports - # --------------------------------------------------------------------- - def _check_imports(self, context: AnalysisContext) -> List[Finding]: # noqa: C901 - """ - Detecta problemas basicos en imports: - - Imports no usados - - Imports duplicados - """ - findings: List[Finding] = [] - - try: - tree = ast.parse(context.code_content) - except SyntaxError: - return findings - - imported: Dict[str, List[int]] = {} - used_names: Set[str] = set() - - # Recolectar imports y usos de nombres - for node in ast.walk(tree): - # Imports - if isinstance(node, ast.Import): - for alias in node.names: - alias_name = alias.asname or alias.name - imported.setdefault(alias_name, []).append(node.lineno) - elif isinstance(node, ast.ImportFrom): - for alias in node.names: - alias_name = alias.asname or alias.name - imported.setdefault(alias_name, []).append(node.lineno) - - # Uso de nombres - if isinstance(node, ast.Name): - used_names.add(node.id) - - # Detectar imports no usados - for name, lines in imported.items(): - if name not in used_names: - for line in lines: - findings.append( - Finding( - severity=Severity.LOW, - issue_type="style/imports", - message=f"El nombre importado '{name}' no se usa en el archivo", - line_number=line, - code_snippet=self._get_code_snippet(context, line), - suggestion="Elimina imports no usados para mantener el codigo limpio", - agent_name=self.name, - rule_id="STYLE020_UNUSED_IMPORT", - ) - ) - - # Detectar imports duplicados - for name, lines in imported.items(): - if len(lines) > 1: - for line in lines[1:]: - findings.append( - Finding( - severity=Severity.LOW, - issue_type="style/imports", - message=f"El nombre '{name}' se importa multiples veces", - line_number=line, - code_snippet=self._get_code_snippet(context, line), - suggestion="Conserva una sola instruccion de import por nombre", - agent_name=self.name, - rule_id="STYLE021_DUP_IMPORT", - ) - ) - - return findings - - # --------------------------------------------------------------------- - # Modulo 4: convenciones de nombres - # --------------------------------------------------------------------- - def _check_naming_conventions(self, context: AnalysisContext) -> List[Finding]: # noqa: C901 - """ - Detecta violaciones de convenciones de nombres para funciones, clases y variables. - """ - findings: List[Finding] = [] - - try: - tree = ast.parse(context.code_content) - except SyntaxError: - return findings - - for node in ast.walk(tree): - # Funciones - if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): - name = node.name - if self._is_public_member(name) and not self._matches_pattern( - name, self.FUNCTION_NAME_PATTERN - ): - findings.append( - Finding( - severity=Severity.LOW, - issue_type="style/naming", - message=f"El nombre de funcion '{name}' debe usar snake_case", - line_number=node.lineno, - code_snippet=self._get_code_snippet(context, node.lineno), - suggestion=( - "Renombra la funcion para seguir snake_case " - "(por ejemplo: mi_funcion_principal)" - ), - agent_name=self.name, - rule_id="STYLE030_FUNC_NAMING", - ) - ) - - # Clases - if isinstance(node, ast.ClassDef): - name = node.name - if self._is_public_member(name) and not self._matches_pattern( - name, self.CLASS_NAME_PATTERN - ): - findings.append( - Finding( - severity=Severity.LOW, - issue_type="style/naming", - message=f"El nombre de clase '{name}' debe usar PascalCase", - line_number=node.lineno, - code_snippet=self._get_code_snippet(context, node.lineno), - suggestion=( - "Renombra la clase para seguir PascalCase " - "(por ejemplo: MiClasePrincipal)" - ), - agent_name=self.name, - rule_id="STYLE031_CLASS_NAMING", - ) - ) - - # Asignaciones simples para variables y constantes - if isinstance(node, ast.Assign): - if not node.targets: - continue - target = node.targets[0] - if isinstance(target, ast.Name): - name = target.id - # Posible constante: todo en mayusculas - if name.isupper(): - if not self._matches_pattern(name, self.CONSTANT_NAME_PATTERN): - findings.append( - Finding( - severity=Severity.LOW, - issue_type="style/naming", - message=( - f"El nombre de constante '{name}' debe usar " - "UPPER_SNAKE_CASE" - ), - line_number=node.lineno, - code_snippet=self._get_code_snippet(context, node.lineno), - suggestion=( - "Renombra la constante para usar UPPER_SNAKE_CASE " - "(por ejemplo: MI_CONSTANTE)" - ), - agent_name=self.name, - rule_id="STYLE032_CONST_NAMING", - ) - ) - else: - # Variable regular - if not self._matches_pattern(name, self.VARIABLE_NAME_PATTERN): - findings.append( - Finding( - severity=Severity.LOW, - issue_type="style/naming", - message=( - f"El nombre de variable '{name}' debe usar snake_case" - ), - line_number=node.lineno, - code_snippet=self._get_code_snippet(context, node.lineno), - suggestion=( - "Renombra la variable para usar snake_case " - "(por ejemplo: mi_variable)" - ), - agent_name=self.name, - rule_id="STYLE033_VAR_NAMING", - ) - ) - - return findings - - # --------------------------------------------------------------------- - # Modulo 5: Pylint con analizador - # --------------------------------------------------------------------- - def _run_pylint(self, context: AnalysisContext) -> List[Finding]: - """ - Ejecuta pylint usando PylintAnalyzer. - - Si pylint no esta disponible en el entorno, devuelve una lista vacia. - """ - findings: List[Finding] = [] - - try: - findings = self.pylint_analyzer.analyze( - code_content=context.code_content, - agent_name=self.name, - ) - self.log_debug(f"PylintAnalyzer retorno {len(findings)} hallazgos") - except FileNotFoundError: - # pylint no esta instalado en este entorno - self.log_info("pylint no disponible; se omiten hallazgos externos de pylint") - except Exception as exc: - # No romper todo el analisis si pylint falla - self.log_error(f"Error ejecutando PylintAnalyzer: {exc}") - - return findings - - # --------------------------------------------------------------------- - # Modulo 6: Flake8 con analizador - # --------------------------------------------------------------------- - def _run_flake8(self, context: AnalysisContext) -> List[Finding]: - """ - Ejecuta flake8 usando Flake8Analyzer. - - Si flake8 no esta disponible en el entorno, devuelve una lista vacia. - """ - findings: List[Finding] = [] - - try: - findings = self.flake8_analyzer.analyze( - code_content=context.code_content, - agent_name=self.name, - ) - self.log_debug(f"Flake8Analyzer retorno {len(findings)} hallazgos") - except FileNotFoundError: - # flake8 no esta instalado o no esta en PATH - self.log_debug("flake8 no disponible; se omiten hallazgos externos de flake8") - except Exception as exc: - # No romper todo el analisis si flake8 falla - self.log_error(f"Error ejecutando Flake8Analyzer: {exc}") - - return findings - - # --------------------------------------------------------------------- - # Helpers - # --------------------------------------------------------------------- - def _remove_duplicates(self, findings: List[Finding]) -> List[Finding]: - """ - Elimina hallazgos duplicados manteniendo el primero. - """ - seen: Set[tuple] = set() - unique_findings: List[Finding] = [] - - for finding in findings: - key = ( - finding.line_number, - finding.issue_type, - finding.rule_id, - finding.agent_name, - ) - if key not in seen: - seen.add(key) - unique_findings.append(finding) - - return unique_findings - - def _get_code_snippet( - self, - context: AnalysisContext, - line_number: int, - context_lines: int = 0, - ) -> str: - """ - Extrae un fragmento de codigo alrededor de una linea dada. - """ - lines = context.code_content.splitlines() - - if 1 <= line_number <= len(lines): - start = max(0, line_number - 1 - context_lines) - end = min(len(lines), line_number + context_lines) - snippet_lines = lines[start:end] - return "\n".join(snippet_lines) - - return "" - - def _is_public_member(self, name: str) -> bool: - """ - Determina si un miembro (funcion, clase o variable) es publico. - - Un miembro publico no empieza con guion bajo. - """ - return not name.startswith("_") - - def _matches_pattern(self, name: str, pattern: str) -> bool: - """ - Verifica si un nombre cumple con el patron regex dado. - """ - return bool(re.match(pattern, name)) +""" +StyleAgent - Agente especializado en deteccion de violaciones de estilo PEP 8. + +Este agente analiza codigo Python en busca de problemas de estilo incluyendo: +- Violaciones de PEP 8 (longitud de linea, espacios, etc.) +- Docstrings faltantes en funciones y clases publicas +- Organizacion y uso de imports +- Convenciones de nombres +(PEP 8: snake_case para funciones/variables, PascalCase para clases) +- Hallazgos externos de Pylint y Flake8 +""" + +import ast +import re +from typing import Dict, List, Set + +from src.agents.analyzers import flake8_analyzer, pylint_analyzer +from src.agents.base_agent import BaseAgent +from src.schemas.analysis import AnalysisContext +from src.schemas.finding import Finding, Severity + + +class StyleAgent(BaseAgent): + """Agente especializado en detectar violaciones de estilo en codigo Python. + + Analiza el codigo usando multiples estrategias: + 1. Analisis de lineas (longitud, espacios) + 2. Analisis AST para docstrings en funciones y clases publicas + 3. Analisis AST para organizacion de imports + 4. Analisis AST para convenciones de nombres PEP 8 + 5. Integracion interna con Pylint y Flake8 + """ + + # Limite por defecto de longitud de linea + LINE_LENGTH_LIMIT: int = 88 + + # Patrones de nombres PEP 8 + FUNCTION_NAME_PATTERN: str = r"^[a-z_][a-z0-9_]*$" + CLASS_NAME_PATTERN: str = r"^[A-Z][a-zA-Z0-9]*$" + CONSTANT_NAME_PATTERN: str = r"^[A-Z_][A-Z0-9_]*$" + VARIABLE_NAME_PATTERN: str = r"^[a-z_][a-z0-9_]*$" + + def __init__(self) -> None: + """ + Inicializa StyleAgent con configuracion por defecto. + """ + super().__init__(name="StyleAgent", version="1.0.0", category="style", enabled=True) + # Usa el limite de clase por defecto + self.line_length_limit = self.LINE_LENGTH_LIMIT + # Analizadores externos + self.pylint_analyzer = pylint_analyzer.PylintAnalyzer() + self.flake8_analyzer = flake8_analyzer.Flake8Analyzer() + self.logger.info("StyleAgent inicializado con 6 modulos de analisis de estilo.") + + def analyze(self, context: AnalysisContext) -> List[Finding]: + """ + Analiza codigo Python en busca de violaciones de estilo. + + Ejecuta varios chequeos de estilo: + 1. Longitud de linea y espacios basicos + 2. Docstrings faltantes + 3. Problemas en imports + 4. Convenciones de nombres + 5. Pylint (si esta disponibles en el entorno) + 6. Flake8 (si esta disponibles en el entorno) + + Args: + context: Contexto de analisis con el codigo a revisar. + + Returns: + Lista de Finding ordenada por numero de linea. + """ + # Emitir evento de inicio + self._emit_agent_started(context) + + self.log_info(f"Iniciando analisis de estilo para {context.filename}") + findings: List[Finding] = [] + + try: + # Modulo 1: estilo de lineas + line_findings = self._check_line_style(context) + findings.extend(line_findings) + self.log_debug(f"Estilo de lineas: {len(line_findings)} hallazgos") + + # Modulo 2: docstrings + docstring_findings = self._check_docstrings(context) + findings.extend(docstring_findings) + self.log_debug(f"Docstrings: {len(docstring_findings)} hallazgos") + + # Modulo 3: imports + import_findings = self._check_imports(context) + findings.extend(import_findings) + self.log_debug(f"Imports: {len(import_findings)} hallazgos") + + # Modulo 4: convenciones de nombres + naming_findings = self._check_naming_conventions(context) + findings.extend(naming_findings) + self.log_debug(f"Convenciones de nombres: {len(naming_findings)} hallazgos") + + # Modulo 5: Pylint interno (si disponible) + pylint_findings = self._run_pylint(context) + self.log_info(f"pylint produjo {len(pylint_findings)} hallazgos") + findings.extend(pylint_findings) + self.log_debug(f"Pylint: {len(pylint_findings)} hallazgos") + + # Modulo 6: Flake8 interno (si disponible) + flake8_findings = self._run_flake8(context) + findings.extend(flake8_findings) + self.log_debug(f"Flake8: {len(flake8_findings)} hallazgos") + + except SyntaxError as e: + self.log_error( + f"Error de sintaxis en {context.filename}: {e}. " + "Algunos modulos de analisis pueden tener resultados incompletos." + ) + # Emitir evento de fallo pero continuar con findings parciales + self._emit_agent_failed(context, e) + + except Exception as e: + self.log_error(f"Error inesperado en analisis de estilo: {e}") + self._emit_agent_failed(context, e) + raise + + # Eliminar duplicados y ordenar hallazgos por numero de linea + findings = self._remove_duplicates(findings) + findings.sort(key=lambda f: f.line_number) + + self.log_info(f"Analisis de estilo completado: {len(findings)} hallazgos") + + # Emitir evento de completado + self._emit_agent_completed(context, findings) + + return findings + + # --------------------------------------------------------------------- + # Modulo 1: estilo de lineas + # --------------------------------------------------------------------- + def _check_line_style(self, context: AnalysisContext) -> List[Finding]: + """ + Detecta problemas basicos de estilo a nivel de linea. + + Verifica: + - Lineas que exceden line_length_limit + - Espacios en blanco al final de la linea + - Caracteres de tabulacion en la indentacion + - Mas de dos lineas en blanco consecutivas + """ + findings: List[Finding] = [] + lines = context.code_content.splitlines() + blank_run = 0 + + for line_num, line in enumerate(lines, start=1): + stripped = line.rstrip("\n") + + # Contar lineas en blanco consecutivas + if stripped.strip() == "": + blank_run += 1 + else: + blank_run = 0 + + # Linea demasiado larga + if len(stripped) > self.line_length_limit: + findings.append( + Finding( + severity=Severity.LOW, + issue_type="style/pep8", + message=( + f"La linea excede la longitud maxima de " + f"{self.line_length_limit} caracteres" + ), + line_number=line_num, + code_snippet=self._get_code_snippet(context, line_num), + suggestion=( + "Divide la expresion en varias lineas o usa parentesis " + "para agrupar expresiones largas" + ), + agent_name=self.name, + rule_id="STYLE001_LINE_LENGTH", + ) + ) + + # Espacios en blanco al final de la linea + if stripped.rstrip(" \t") != stripped: + findings.append( + Finding( + severity=Severity.LOW, + issue_type="style/pep8", + message="La linea tiene espacios en blanco al final", + line_number=line_num, + code_snippet=self._get_code_snippet(context, line_num), + suggestion="Elimina espacios o tabs al final de la linea", + agent_name=self.name, + rule_id="STYLE002_TRAILING_WS", + ) + ) + + # Tabs en la indentacion + if re.match(r"^\t+", line) or re.match(r"^ +\t+", line): + findings.append( + Finding( + severity=Severity.MEDIUM, + issue_type="style/pep8", + message="Se usan caracteres de tabulacion en la indentacion", + line_number=line_num, + code_snippet=self._get_code_snippet(context, line_num), + suggestion="Usa 4 espacios por nivel de indentacion en lugar de tabs", + agent_name=self.name, + rule_id="STYLE003_TABS", + ) + ) + + # Mas de dos lineas en blanco consecutivas + if blank_run > 2: + findings.append( + Finding( + severity=Severity.LOW, + issue_type="style/pep8", + message="Hay mas de dos lineas en blanco consecutivas", + line_number=line_num, + code_snippet=self._get_code_snippet(context, line_num), + suggestion="Reduce las lineas en blanco consecutivas a maximo dos", + agent_name=self.name, + rule_id="STYLE004_BLANK_LINES", + ) + ) + + return findings + + # --------------------------------------------------------------------- + # Modulo 2: docstrings + # --------------------------------------------------------------------- + def _check_docstrings(self, context: AnalysisContext) -> List[Finding]: + """ + Detecta docstrings faltantes en funciones y clases publicas. + """ + findings: List[Finding] = [] + + try: + tree = ast.parse(context.code_content) + except SyntaxError: + return findings + + for node in ast.walk(tree): + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): + name = node.name + if not self._is_public_member(name): + continue + + doc = ast.get_docstring(node) + if not doc: + if isinstance(node, ast.AsyncFunctionDef): + node_type = "funcion asincrona" + elif isinstance(node, ast.ClassDef): + node_type = "clase" + else: + node_type = "funcion" + + findings.append( + Finding( + severity=Severity.LOW, + issue_type="style/documentation", + message=f"La {node_type} publica '{name}' no tiene docstring", + line_number=node.lineno, + code_snippet=self._get_code_snippet(context, node.lineno), + suggestion=( + "Agrega un docstring descriptivo que explique el " + "comportamiento, parametros y valor de retorno" + ), + agent_name=self.name, + rule_id="STYLE010_MISSING_DOCSTRING", + ) + ) + + return findings + + # --------------------------------------------------------------------- + # Modulo 3: imports + # --------------------------------------------------------------------- + def _check_imports(self, context: AnalysisContext) -> List[Finding]: # noqa: C901 + """ + Detecta problemas basicos en imports: + - Imports no usados + - Imports duplicados + """ + findings: List[Finding] = [] + + try: + tree = ast.parse(context.code_content) + except SyntaxError: + return findings + + imported: Dict[str, List[int]] = {} + used_names: Set[str] = set() + + # Recolectar imports y usos de nombres + for node in ast.walk(tree): + # Imports + if isinstance(node, ast.Import): + for alias in node.names: + alias_name = alias.asname or alias.name + imported.setdefault(alias_name, []).append(node.lineno) + elif isinstance(node, ast.ImportFrom): + for alias in node.names: + alias_name = alias.asname or alias.name + imported.setdefault(alias_name, []).append(node.lineno) + + # Uso de nombres + if isinstance(node, ast.Name): + used_names.add(node.id) + + # Detectar imports no usados + for name, lines in imported.items(): + if name not in used_names: + for line in lines: + findings.append( + Finding( + severity=Severity.LOW, + issue_type="style/imports", + message=f"El nombre importado '{name}' no se usa en el archivo", + line_number=line, + code_snippet=self._get_code_snippet(context, line), + suggestion="Elimina imports no usados para mantener el codigo limpio", + agent_name=self.name, + rule_id="STYLE020_UNUSED_IMPORT", + ) + ) + + # Detectar imports duplicados + for name, lines in imported.items(): + if len(lines) > 1: + for line in lines[1:]: + findings.append( + Finding( + severity=Severity.LOW, + issue_type="style/imports", + message=f"El nombre '{name}' se importa multiples veces", + line_number=line, + code_snippet=self._get_code_snippet(context, line), + suggestion="Conserva una sola instruccion de import por nombre", + agent_name=self.name, + rule_id="STYLE021_DUP_IMPORT", + ) + ) + + return findings + + # --------------------------------------------------------------------- + # Modulo 4: convenciones de nombres + # --------------------------------------------------------------------- + def _check_naming_conventions(self, context: AnalysisContext) -> List[Finding]: # noqa: C901 + """ + Detecta violaciones de convenciones de nombres para funciones, clases y variables. + """ + findings: List[Finding] = [] + + try: + tree = ast.parse(context.code_content) + except SyntaxError: + return findings + + for node in ast.walk(tree): + # Funciones + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): + name = node.name + if self._is_public_member(name) and not self._matches_pattern( + name, self.FUNCTION_NAME_PATTERN + ): + findings.append( + Finding( + severity=Severity.LOW, + issue_type="style/naming", + message=f"El nombre de funcion '{name}' debe usar snake_case", + line_number=node.lineno, + code_snippet=self._get_code_snippet(context, node.lineno), + suggestion=( + "Renombra la funcion para seguir snake_case " + "(por ejemplo: mi_funcion_principal)" + ), + agent_name=self.name, + rule_id="STYLE030_FUNC_NAMING", + ) + ) + + # Clases + if isinstance(node, ast.ClassDef): + name = node.name + if self._is_public_member(name) and not self._matches_pattern( + name, self.CLASS_NAME_PATTERN + ): + findings.append( + Finding( + severity=Severity.LOW, + issue_type="style/naming", + message=f"El nombre de clase '{name}' debe usar PascalCase", + line_number=node.lineno, + code_snippet=self._get_code_snippet(context, node.lineno), + suggestion=( + "Renombra la clase para seguir PascalCase " + "(por ejemplo: MiClasePrincipal)" + ), + agent_name=self.name, + rule_id="STYLE031_CLASS_NAMING", + ) + ) + + # Asignaciones simples para variables y constantes + if isinstance(node, ast.Assign): + if not node.targets: + continue + target = node.targets[0] + if isinstance(target, ast.Name): + name = target.id + # Posible constante: todo en mayusculas + if name.isupper(): + if not self._matches_pattern(name, self.CONSTANT_NAME_PATTERN): + findings.append( + Finding( + severity=Severity.LOW, + issue_type="style/naming", + message=( + f"El nombre de constante '{name}' debe usar " + "UPPER_SNAKE_CASE" + ), + line_number=node.lineno, + code_snippet=self._get_code_snippet(context, node.lineno), + suggestion=( + "Renombra la constante para usar UPPER_SNAKE_CASE " + "(por ejemplo: MI_CONSTANTE)" + ), + agent_name=self.name, + rule_id="STYLE032_CONST_NAMING", + ) + ) + else: + # Variable regular + if not self._matches_pattern(name, self.VARIABLE_NAME_PATTERN): + findings.append( + Finding( + severity=Severity.LOW, + issue_type="style/naming", + message=( + f"El nombre de variable '{name}' debe usar snake_case" + ), + line_number=node.lineno, + code_snippet=self._get_code_snippet(context, node.lineno), + suggestion=( + "Renombra la variable para usar snake_case " + "(por ejemplo: mi_variable)" + ), + agent_name=self.name, + rule_id="STYLE033_VAR_NAMING", + ) + ) + + return findings + + # --------------------------------------------------------------------- + # Modulo 5: Pylint con analizador + # --------------------------------------------------------------------- + def _run_pylint(self, context: AnalysisContext) -> List[Finding]: + """ + Ejecuta pylint usando PylintAnalyzer. + + Si pylint no esta disponible en el entorno, devuelve una lista vacia. + """ + findings: List[Finding] = [] + + try: + findings = self.pylint_analyzer.analyze( + code_content=context.code_content, + agent_name=self.name, + ) + self.log_debug(f"PylintAnalyzer retorno {len(findings)} hallazgos") + except FileNotFoundError: + # pylint no esta instalado en este entorno + self.log_info("pylint no disponible; se omiten hallazgos externos de pylint") + except Exception as exc: + # No romper todo el analisis si pylint falla + self.log_error(f"Error ejecutando PylintAnalyzer: {exc}") + + return findings + + # --------------------------------------------------------------------- + # Modulo 6: Flake8 con analizador + # --------------------------------------------------------------------- + def _run_flake8(self, context: AnalysisContext) -> List[Finding]: + """ + Ejecuta flake8 usando Flake8Analyzer. + + Si flake8 no esta disponible en el entorno, devuelve una lista vacia. + """ + findings: List[Finding] = [] + + try: + findings = self.flake8_analyzer.analyze( + code_content=context.code_content, + agent_name=self.name, + ) + self.log_debug(f"Flake8Analyzer retorno {len(findings)} hallazgos") + except FileNotFoundError: + # flake8 no esta instalado o no esta en PATH + self.log_debug("flake8 no disponible; se omiten hallazgos externos de flake8") + except Exception as exc: + # No romper todo el analisis si flake8 falla + self.log_error(f"Error ejecutando Flake8Analyzer: {exc}") + + return findings + + # --------------------------------------------------------------------- + # Helpers + # --------------------------------------------------------------------- + def _remove_duplicates(self, findings: List[Finding]) -> List[Finding]: + """ + Elimina hallazgos duplicados manteniendo el primero. + """ + seen: Set[tuple] = set() + unique_findings: List[Finding] = [] + + for finding in findings: + key = ( + finding.line_number, + finding.issue_type, + finding.rule_id, + finding.agent_name, + ) + if key not in seen: + seen.add(key) + unique_findings.append(finding) + + return unique_findings + + def _get_code_snippet( + self, + context: AnalysisContext, + line_number: int, + context_lines: int = 0, + ) -> str: + """ + Extrae un fragmento de codigo alrededor de una linea dada. + """ + lines = context.code_content.splitlines() + + if 1 <= line_number <= len(lines): + start = max(0, line_number - 1 - context_lines) + end = min(len(lines), line_number + context_lines) + snippet_lines = lines[start:end] + return "\n".join(snippet_lines) + + return "" + + def _is_public_member(self, name: str) -> bool: + """ + Determina si un miembro (funcion, clase o variable) es publico. + + Un miembro publico no empieza con guion bajo. + """ + return not name.startswith("_") + + def _matches_pattern(self, name: str, pattern: str) -> bool: + """ + Verifica si un nombre cumple con el patron regex dado. + """ + return bool(re.match(pattern, name)) diff --git a/backend/src/core/config/ai_config.py b/backend/src/core/config/ai_config.py index 4ec2328..d34d9b5 100644 --- a/backend/src/core/config/ai_config.py +++ b/backend/src/core/config/ai_config.py @@ -1,170 +1,170 @@ -""" -Configuración de Inteligencia Artificial para CodeGuard AI. - -Gestiona la configuración de Vertex AI (Gemini), incluyendo: -- Selección dinámica de modelo por entorno (dev/prod) -- Rate limiting por usuario -- Configuración de reintentos con exponential backoff -""" - -from typing import Optional - -from pydantic import Field, computed_field -from pydantic_settings import BaseSettings, SettingsConfigDict - - -class AISettings(BaseSettings): - """ - Configuración de IA cargada desde variables de entorno. - - Usa Google Cloud Vertex AI con autenticación via Service Account. - La variable GOOGLE_APPLICATION_CREDENTIALS debe apuntar al archivo JSON. - - Attributes: - GCP_PROJECT_ID: ID del proyecto en Google Cloud Platform - GCP_LOCATION: Región de Vertex AI (us-central1 recomendado) - GOOGLE_APPLICATION_CREDENTIALS: Ruta al archivo JSON de Service Account - AI_ENABLED: Habilitar/deshabilitar funcionalidad de IA - AI_MODEL_DEV: Modelo para desarrollo (flash = rápido/económico) - AI_MODEL_PROD: Modelo para producción (pro = mejor razonamiento) - AI_TEMPERATURE: Temperatura del modelo (0.0-1.0, menor = más determinista) - AI_MAX_OUTPUT_TOKENS: Límite de tokens en respuesta - AI_RATE_LIMIT_PER_HOUR: Límite de llamadas por usuario por hora - AI_MAX_RETRIES: Intentos máximos ante errores transitorios - AI_BACKOFF_FACTOR: Factor de espera exponencial entre reintentos - """ - - # Google Cloud Platform - GCP_PROJECT_ID: Optional[str] = Field( - default=None, - description="ID del proyecto en Google Cloud Platform", - ) - GCP_LOCATION: str = Field( - default="us-central1", - description="Región de Vertex AI", - ) - GOOGLE_APPLICATION_CREDENTIALS: Optional[str] = Field( - default=None, - description="Ruta al archivo JSON de Service Account", - ) - - # Feature Flag - AI_ENABLED: bool = Field( - default=True, - description="Habilitar funcionalidad de IA", - ) - - # Model Selection (por entorno) - AI_MODEL_DEV: str = Field( - default="gemini-1.5-flash-001", - description="Modelo para desarrollo (optimizado velocidad/costo)", - ) - AI_MODEL_PROD: str = Field( - default="gemini-1.5-pro-001", - description="Modelo para producción (optimizado razonamiento)", - ) - - # Model Parameters - AI_TEMPERATURE: float = Field( - default=0.3, - ge=0.0, - le=1.0, - description="Temperatura del modelo (0.0-1.0)", - ) - AI_MAX_OUTPUT_TOKENS: int = Field( - default=2048, - ge=100, - le=8192, - description="Límite de tokens en respuesta", - ) - - # Rate Limiting (para controlar costos) - AI_RATE_LIMIT_PER_HOUR: int = Field( - default=10, - ge=1, - description="Límite de llamadas por usuario por hora", - ) - - # Retry Configuration (exponential backoff) - AI_MAX_RETRIES: int = Field( - default=3, - ge=1, - le=10, - description="Intentos máximos ante errores transitorios", - ) - AI_BACKOFF_FACTOR: float = Field( - default=2.0, - ge=1.0, - le=5.0, - description="Factor de espera exponencial (segundos)", - ) - AI_INITIAL_BACKOFF: float = Field( - default=1.0, - ge=0.5, - le=10.0, - description="Espera inicial antes del primer reintento (segundos)", - ) - - # Environment (heredado de settings principal) - ENVIRONMENT: str = Field( - default="development", - description="Entorno de ejecución", - ) - - model_config = SettingsConfigDict( - env_file=".env", - env_file_encoding="utf-8", - extra="ignore", - ) - - @computed_field - @property - def model_name(self) -> str: - """ - Selecciona el modelo de Gemini según el entorno. - - Returns: - str: Nombre del modelo (flash para dev, pro para prod) - """ - if self.ENVIRONMENT == "production": - return self.AI_MODEL_PROD - return self.AI_MODEL_DEV - - @computed_field - @property - def is_configured(self) -> bool: - """ - Verifica si la configuración de IA está completa. - - Returns: - bool: True si GCP_PROJECT_ID y credenciales están configurados - """ - return bool(self.AI_ENABLED and self.GCP_PROJECT_ID and self.GOOGLE_APPLICATION_CREDENTIALS) - - def get_generation_config(self) -> dict: - """ - Retorna la configuración de generación para Vertex AI. - - Returns: - dict: Parámetros de generación del modelo - """ - return { - "temperature": self.AI_TEMPERATURE, - "max_output_tokens": self.AI_MAX_OUTPUT_TOKENS, - "top_p": 0.95, - "top_k": 40, - } - - -# Singleton de configuración de IA -ai_settings = AISettings() - - -def get_ai_settings() -> AISettings: - """ - Factory function para obtener la configuración de IA. - - Returns: - Instancia singleton de AISettings - """ - return ai_settings +""" +Configuración de Inteligencia Artificial para CodeGuard AI. + +Gestiona la configuración de Vertex AI (Gemini), incluyendo: +- Selección dinámica de modelo por entorno (dev/prod) +- Rate limiting por usuario +- Configuración de reintentos con exponential backoff +""" + +from typing import Optional + +from pydantic import Field, computed_field +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class AISettings(BaseSettings): + """ + Configuración de IA cargada desde variables de entorno. + + Usa Google Cloud Vertex AI con autenticación via Service Account. + La variable GOOGLE_APPLICATION_CREDENTIALS debe apuntar al archivo JSON. + + Attributes: + GCP_PROJECT_ID: ID del proyecto en Google Cloud Platform + GCP_LOCATION: Región de Vertex AI (us-central1 recomendado) + GOOGLE_APPLICATION_CREDENTIALS: Ruta al archivo JSON de Service Account + AI_ENABLED: Habilitar/deshabilitar funcionalidad de IA + AI_MODEL_DEV: Modelo para desarrollo (flash = rápido/económico) + AI_MODEL_PROD: Modelo para producción (pro = mejor razonamiento) + AI_TEMPERATURE: Temperatura del modelo (0.0-1.0, menor = más determinista) + AI_MAX_OUTPUT_TOKENS: Límite de tokens en respuesta + AI_RATE_LIMIT_PER_HOUR: Límite de llamadas por usuario por hora + AI_MAX_RETRIES: Intentos máximos ante errores transitorios + AI_BACKOFF_FACTOR: Factor de espera exponencial entre reintentos + """ + + # Google Cloud Platform + GCP_PROJECT_ID: Optional[str] = Field( + default=None, + description="ID del proyecto en Google Cloud Platform", + ) + GCP_LOCATION: str = Field( + default="us-central1", + description="Región de Vertex AI", + ) + GOOGLE_APPLICATION_CREDENTIALS: Optional[str] = Field( + default=None, + description="Ruta al archivo JSON de Service Account", + ) + + # Feature Flag + AI_ENABLED: bool = Field( + default=True, + description="Habilitar funcionalidad de IA", + ) + + # Model Selection (por entorno) + AI_MODEL_DEV: str = Field( + default="gemini-1.5-flash-001", + description="Modelo para desarrollo (optimizado velocidad/costo)", + ) + AI_MODEL_PROD: str = Field( + default="gemini-1.5-pro-001", + description="Modelo para producción (optimizado razonamiento)", + ) + + # Model Parameters + AI_TEMPERATURE: float = Field( + default=0.3, + ge=0.0, + le=1.0, + description="Temperatura del modelo (0.0-1.0)", + ) + AI_MAX_OUTPUT_TOKENS: int = Field( + default=2048, + ge=100, + le=8192, + description="Límite de tokens en respuesta", + ) + + # Rate Limiting (para controlar costos) + AI_RATE_LIMIT_PER_HOUR: int = Field( + default=10, + ge=1, + description="Límite de llamadas por usuario por hora", + ) + + # Retry Configuration (exponential backoff) + AI_MAX_RETRIES: int = Field( + default=3, + ge=1, + le=10, + description="Intentos máximos ante errores transitorios", + ) + AI_BACKOFF_FACTOR: float = Field( + default=2.0, + ge=1.0, + le=5.0, + description="Factor de espera exponencial (segundos)", + ) + AI_INITIAL_BACKOFF: float = Field( + default=1.0, + ge=0.5, + le=10.0, + description="Espera inicial antes del primer reintento (segundos)", + ) + + # Environment (heredado de settings principal) + ENVIRONMENT: str = Field( + default="development", + description="Entorno de ejecución", + ) + + model_config = SettingsConfigDict( + env_file=".env", + env_file_encoding="utf-8", + extra="ignore", + ) + + @computed_field + @property + def model_name(self) -> str: + """ + Selecciona el modelo de Gemini según el entorno. + + Returns: + str: Nombre del modelo (flash para dev, pro para prod) + """ + if self.ENVIRONMENT == "production": + return self.AI_MODEL_PROD + return self.AI_MODEL_DEV + + @computed_field + @property + def is_configured(self) -> bool: + """ + Verifica si la configuración de IA está completa. + + Returns: + bool: True si GCP_PROJECT_ID y credenciales están configurados + """ + return bool(self.AI_ENABLED and self.GCP_PROJECT_ID and self.GOOGLE_APPLICATION_CREDENTIALS) + + def get_generation_config(self) -> dict: + """ + Retorna la configuración de generación para Vertex AI. + + Returns: + dict: Parámetros de generación del modelo + """ + return { + "temperature": self.AI_TEMPERATURE, + "max_output_tokens": self.AI_MAX_OUTPUT_TOKENS, + "top_p": 0.95, + "top_k": 40, + } + + +# Singleton de configuración de IA +ai_settings = AISettings() + + +def get_ai_settings() -> AISettings: + """ + Factory function para obtener la configuración de IA. + + Returns: + Instancia singleton de AISettings + """ + return ai_settings diff --git a/backend/src/core/config/mcp_config.py b/backend/src/core/config/mcp_config.py index bd7d8c0..6dd18b6 100644 --- a/backend/src/core/config/mcp_config.py +++ b/backend/src/core/config/mcp_config.py @@ -1,417 +1,417 @@ -""" -Configuración del Protocolo de Contexto de Modelo (MCP). - -Contiene el diccionario embebido OWASP Top 10 con descripciones -de vulnerabilidades y remediaciones para enriquecer los prompts -enviados a la IA generativa. -""" - -from dataclasses import dataclass -from typing import Dict, List, Optional - - -@dataclass -class SecurityContext: - """ - Contexto de seguridad para una categoría de vulnerabilidad. - - Attributes: - category: Categoría OWASP (ej: "A03:2021 - Injection") - description: Descripción de la vulnerabilidad - impact: Impacto potencial en el sistema - mitigation: Estrategias de mitigación genéricas - references: URLs de documentación oficial - cwe_ids: IDs de CWE relacionados - """ - - category: str - description: str - impact: str - mitigation: str - references: List[str] - cwe_ids: List[str] - - -# ============================================================================= -# Diccionario OWASP Top 10 (2021) -# ============================================================================= - -OWASP_TOP_10: Dict[str, SecurityContext] = { - # A01:2021 - Broken Access Control - "broken_access_control": SecurityContext( - category="A01:2021 - Broken Access Control", - description=( - "Las restricciones sobre lo que los usuarios autenticados pueden hacer " - "a menudo no se aplican correctamente. Los atacantes pueden explotar " - "estos fallos para acceder a funcionalidades y/o datos no autorizados." - ), - impact=( - "Acceso no autorizado a datos sensibles, modificación de datos de otros " - "usuarios, escalación de privilegios, o ejecución de acciones administrativas." - ), - mitigation=( - "1. Denegar por defecto, excepto para recursos públicos.\n" - "2. Implementar mecanismos de control de acceso una vez y reutilizarlos.\n" - "3. Hacer cumplir la propiedad de registros (cada usuario solo accede a sus datos).\n" - "4. Deshabilitar listado de directorios del servidor web.\n" - "5. Registrar fallos de control de acceso y alertar a administradores." - ), - references=[ - "https://owasp.org/Top10/A01_2021-Broken_Access_Control/", - "https://cheatsheetseries.owasp.org/cheatsheets/Authorization_Cheat_Sheet.html", - ], - cwe_ids=["CWE-200", "CWE-284", "CWE-285", "CWE-352", "CWE-639"], - ), - # A02:2021 - Cryptographic Failures - "cryptographic_failures": SecurityContext( - category="A02:2021 - Cryptographic Failures", - description=( - "Antes conocido como 'Exposición de Datos Sensibles'. Se centra en " - "fallos relacionados con la criptografía que a menudo conducen a la " - "exposición de datos sensibles." - ), - impact=( - "Exposición de credenciales, tokens, datos personales (PII), datos " - "financieros, o información médica. Posible robo de identidad." - ), - mitigation=( - "1. Clasificar los datos procesados, almacenados o transmitidos.\n" - "2. No almacenar datos sensibles innecesariamente.\n" - "3. Cifrar todos los datos sensibles en reposo con algoritmos fuertes.\n" - "4. Usar protocolos actualizados (TLS 1.3) para datos en tránsito.\n" - "5. No usar algoritmos criptográficos obsoletos (MD5, SHA1, DES)." - ), - references=[ - "https://owasp.org/Top10/A02_2021-Cryptographic_Failures/", - "https://cheatsheetseries.owasp.org/cheatsheets/Cryptographic_Storage_Cheat_Sheet.html", - ], - cwe_ids=["CWE-259", "CWE-327", "CWE-328", "CWE-330", "CWE-331"], - ), - # A03:2021 - Injection - "injection": SecurityContext( - category="A03:2021 - Injection", - description=( - "Una aplicación es vulnerable a ataques de inyección cuando datos " - "suministrados por el usuario no son validados, filtrados o sanitizados. " - "Incluye SQL, NoSQL, OS Command, LDAP, XPath y ORM injection." - ), - impact=( - "Pérdida de datos, corrupción de datos, divulgación a partes no autorizadas, " - "pérdida de responsabilidad, denegación de acceso, o toma completa del host." - ), - mitigation=( - "1. Usar APIs seguras que eviten el uso del intérprete (consultas parametrizadas).\n" - "2. Usar validación de entrada positiva del lado del servidor.\n" - "3. Escapar caracteres especiales usando la sintaxis de escape específica.\n" - "4. Usar LIMIT y otros controles SQL para prevenir divulgación masiva.\n" - "5. No concatenar cadenas con datos del usuario en consultas dinámicas." - ), - references=[ - "https://owasp.org/Top10/A03_2021-Injection/", - "https://cheatsheetseries.owasp.org/cheatsheets/" - "SQL_Injection_Prevention_Cheat_Sheet.html", - "https://cheatsheetseries.owasp.org/cheatsheets/" - "Query_Parameterization_Cheat_Sheet.html", - ], - cwe_ids=["CWE-77", "CWE-78", "CWE-79", "CWE-89", "CWE-94"], - ), - # A04:2021 - Insecure Design - "insecure_design": SecurityContext( - category="A04:2021 - Insecure Design", - description=( - "Una nueva categoría que se centra en los riesgos relacionados con " - "defectos de diseño. La diferencia con una implementación insegura es " - "que un diseño perfecto aún puede tener defectos de implementación." - ), - impact=( - "Vulnerabilidades sistémicas que no pueden ser corregidas solo con código. " - "Exposición de lógica de negocio, flujos de trabajo inseguros." - ), - mitigation=( - "1. Establecer y usar un ciclo de desarrollo seguro con profesionales de AppSec.\n" - "2. Usar bibliotecas de patrones de diseño seguro.\n" - "3. Usar modelado de amenazas para autenticación crítica y control de acceso.\n" - "4. Integrar controles de seguridad en las historias de usuario.\n" - "5. Escribir pruebas unitarias y de integración para validar flujos críticos." - ), - references=[ - "https://owasp.org/Top10/A04_2021-Insecure_Design/", - "https://cheatsheetseries.owasp.org/cheatsheets/Threat_Modeling_Cheat_Sheet.html", - ], - cwe_ids=["CWE-209", "CWE-256", "CWE-501", "CWE-522"], - ), - # A05:2021 - Security Misconfiguration - "security_misconfiguration": SecurityContext( - category="A05:2021 - Security Misconfiguration", - description=( - "La aplicación puede ser vulnerable si no está correctamente " - "endurecida o tiene permisos mal configurados, características " - "innecesarias habilitadas, o mensajes de error detallados." - ), - impact=( - "Acceso no autorizado a datos o funcionalidad del sistema. " - "Posible compromiso completo del sistema." - ), - mitigation=( - "1. Proceso de endurecimiento repetible y automatizado.\n" - "2. Plataforma mínima sin características, componentes o documentación innecesarios.\n" - "3. Revisar y actualizar configuraciones según avisos de seguridad.\n" - "4. Arquitectura de aplicación segmentada con contenedores.\n" - "5. Enviar directivas de seguridad a clientes (CSP, X-Frame-Options)." - ), - references=[ - "https://owasp.org/Top10/A05_2021-Security_Misconfiguration/", - "https://cheatsheetseries.owasp.org/cheatsheets/" - "Configuration_Security_Cheat_Sheet.html", - ], - cwe_ids=["CWE-16", "CWE-611", "CWE-1004", "CWE-2"], - ), - # A06:2021 - Vulnerable and Outdated Components - "vulnerable_components": SecurityContext( - category="A06:2021 - Vulnerable and Outdated Components", - description=( - "Usar componentes con vulnerabilidades conocidas. Esto incluye " - "bibliotecas, frameworks, y otros módulos de software que se ejecutan " - "con los mismos privilegios que la aplicación." - ), - impact=( - "Desde ataques menores hasta toma completa del servidor, dependiendo " - "de la vulnerabilidad del componente." - ), - mitigation=( - "1. Eliminar dependencias no utilizadas, características y componentes innecesarios.\n" - "2. Inventariar versiones de componentes cliente y servidor continuamente.\n" - "3. Monitorear fuentes como CVE y NVD para vulnerabilidades.\n" - "4. Obtener componentes solo de fuentes oficiales sobre enlaces seguros.\n" - "5. Monitorear bibliotecas y componentes sin mantenimiento." - ), - references=[ - "https://owasp.org/Top10/" "A06_2021-Vulnerable_and_Outdated_Components/", - "https://cheatsheetseries.owasp.org/cheatsheets/" - "Vulnerable_Dependency_Management_Cheat_Sheet.html", - ], - cwe_ids=["CWE-1104"], - ), - # A07:2021 - Identification and Authentication Failures - "authentication_failures": SecurityContext( - category="A07:2021 - Identification and Authentication Failures", - description=( - "Confirmación de la identidad del usuario, autenticación y gestión " - "de sesiones es crítica. La aplicación es vulnerable si permite " - "ataques automatizados, contraseñas débiles, o sesiones mal gestionadas." - ), - impact=( - "Compromiso de cuentas de usuario, robo de identidad, acceso " - "no autorizado a datos sensibles o funcionalidad administrativa." - ), - mitigation=( - "1. Implementar autenticación multifactor donde sea posible.\n" - "2. No desplegar con credenciales por defecto, especialmente admin.\n" - "3. Implementar verificaciones de contraseñas débiles.\n" - "4. Limitar o retrasar cada vez más los intentos de login fallidos.\n" - "5. Usar un gestor de sesiones seguro del lado del servidor con alta entropía." - ), - references=[ - "https://owasp.org/Top10/A07_2021-Identification_and_Authentication_Failures/", - "https://cheatsheetseries.owasp.org/cheatsheets/Authentication_Cheat_Sheet.html", - "https://cheatsheetseries.owasp.org/cheatsheets/Session_Management_Cheat_Sheet.html", - ], - cwe_ids=["CWE-287", "CWE-384", "CWE-307", "CWE-613"], - ), - # A08:2021 - Software and Data Integrity Failures - "integrity_failures": SecurityContext( - category="A08:2021 - Software and Data Integrity Failures", - description=( - "Se relaciona con código e infraestructura que no protege contra " - "violaciones de integridad. Incluye actualizaciones de software " - "inseguras, pipelines CI/CD inseguros, y deserialización insegura." - ), - impact=( - "Ejecución remota de código, ataques a la cadena de suministro, " - "modificación de datos sin autorización." - ), - mitigation=( - "1. Usar firmas digitales para verificar que el software " - "proviene de la fuente esperada.\n" - "2. Asegurar que las bibliotecas y dependencias usan " - "repositorios de confianza.\n" - "3. Usar herramientas de análisis de composición de software (SCA).\n" - "4. Asegurar que el pipeline CI/CD tiene segregación apropiada " - "y control de acceso.\n" - "5. No enviar datos serializados sin firmar o sin cifrar " - "a clientes no confiables." - ), - references=[ - "https://owasp.org/Top10/" "A08_2021-Software_and_Data_Integrity_Failures/", - "https://cheatsheetseries.owasp.org/cheatsheets/" "Deserialization_Cheat_Sheet.html", - ], - cwe_ids=["CWE-829", "CWE-494", "CWE-502"], - ), - # A09:2021 - Security Logging and Monitoring Failures - "logging_failures": SecurityContext( - category="A09:2021 - Security Logging and Monitoring Failures", - description=( - "Sin registro y monitoreo suficiente, los ataques no pueden ser " - "detectados. Incluye no registrar eventos auditables, no generar " - "alertas adecuadas, o no tener un plan de respuesta a incidentes." - ), - impact=( - "Los atacantes pueden mantener persistencia, pivotar a más sistemas, " - "manipular, extraer o destruir datos sin ser detectados." - ), - mitigation=( - "1. Asegurar que todos los fallos de login, control de acceso " - "y validación de entrada del servidor se registran " - "con contexto suficiente.\n" - "2. Asegurar que los logs se generan en formato que las " - "soluciones de gestión de logs puedan consumir fácilmente.\n" - "3. Asegurar que los datos de log se codifican correctamente " - "para prevenir inyecciones.\n" - "4. Establecer monitoreo y alertas efectivos " - "para actividades sospechosas.\n" - "5. Establecer un plan de respuesta y recuperación de incidentes." - ), - references=[ - "https://owasp.org/Top10/" "A09_2021-Security_Logging_and_Monitoring_Failures/", - "https://cheatsheetseries.owasp.org/cheatsheets/Logging_Cheat_Sheet.html", - ], - cwe_ids=["CWE-117", "CWE-223", "CWE-532", "CWE-778"], - ), - # A10:2021 - Server-Side Request Forgery (SSRF) - "ssrf": SecurityContext( - category="A10:2021 - Server-Side Request Forgery (SSRF)", - description=( - "SSRF ocurre cuando una aplicación web obtiene un recurso remoto " - "sin validar la URL suministrada por el usuario. Permite a un atacante " - "forzar a la aplicación a enviar una solicitud crafteada a un destino inesperado." - ), - impact=( - "Escaneo de puertos internos, acceso a servicios internos, lectura de " - "metadatos de servicios en la nube, o ejecución remota de código." - ), - mitigation=( - "1. Segmentar la funcionalidad de acceso a recursos remotos en redes separadas.\n" - "2. Hacer cumplir políticas de firewall 'deny by default'.\n" - "3. Sanitizar y validar todos los datos de entrada suministrados por el cliente.\n" - "4. No enviar respuestas raw al cliente.\n" - "5. Deshabilitar redirecciones HTTP y usar listas de permitidos para URL." - ), - references=[ - "https://owasp.org/Top10/" "A10_2021-Server-Side_Request_Forgery_%28SSRF%29/", - "https://cheatsheetseries.owasp.org/cheatsheets/" - "Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html", - ], - cwe_ids=["CWE-918"], - ), -} - - -# ============================================================================= -# Mapeo de reglas de CodeGuard a categorías OWASP -# ============================================================================= - -RULE_TO_OWASP_MAPPING: Dict[str, str] = { - # SecurityAgent - Dangerous Functions - "SEC001_EVAL": "injection", - "SEC001_EXEC": "injection", - "SEC001_COMPILE": "injection", - "SEC001___IMPORT__": "injection", - "SEC001_EXECFILE": "injection", - "SEC001_PICKLE": "integrity_failures", - # SecurityAgent - SQL Injection - "SEC002_SQL_INJECTION": "injection", - # SecurityAgent - Hardcoded Credentials - "SEC003_PASSWORD": "cryptographic_failures", - "SEC003_API_KEY": "cryptographic_failures", - "SEC003_SECRET_KEY": "cryptographic_failures", - "SEC003_TOKEN": "cryptographic_failures", - "SEC003_ACCESS_KEY": "cryptographic_failures", - # SecurityAgent - Weak Cryptography - "SEC004_MD5": "cryptographic_failures", - "SEC004_SHA1": "cryptographic_failures", - "SEC004_WEAK_ENCRYPTION": "cryptographic_failures", - # Common patterns by issue_type - "dangerous_function": "injection", - "sql_injection": "injection", - "hardcoded_credentials": "cryptographic_failures", - "weak_cryptography": "cryptographic_failures", - "insecure_deserialization": "integrity_failures", - "path_traversal": "broken_access_control", - "ssrf": "ssrf", - "xss": "injection", - "command_injection": "injection", - "ldap_injection": "injection", - "xpath_injection": "injection", -} - - -# ============================================================================= -# Funciones de utilidad -# ============================================================================= - - -def get_security_context( - rule_id: Optional[str] = None, - issue_type: Optional[str] = None, -) -> Optional[SecurityContext]: - """ - Obtiene el contexto de seguridad OWASP para una regla o tipo de issue. - - Args: - rule_id: ID de la regla (ej: "SEC001_EVAL") - issue_type: Tipo de issue (ej: "sql_injection") - - Returns: - SecurityContext si se encuentra mapeo, None en caso contrario - """ - # Primero intentar con rule_id - if rule_id: - owasp_key = RULE_TO_OWASP_MAPPING.get(rule_id) - if owasp_key: - return OWASP_TOP_10.get(owasp_key) - - # Luego intentar con issue_type - if issue_type: - # Normalizar issue_type (convertir espacios/guiones a underscore) - normalized = issue_type.lower().replace("-", "_").replace(" ", "_") - owasp_key = RULE_TO_OWASP_MAPPING.get(normalized) - if owasp_key: - return OWASP_TOP_10.get(owasp_key) - - # Buscar coincidencia parcial - for key, owasp_category in RULE_TO_OWASP_MAPPING.items(): - if key in normalized or normalized in key: - return OWASP_TOP_10.get(owasp_category) - - return None - - -def format_security_context(context: SecurityContext) -> str: - """ - Formatea el contexto de seguridad para incluirlo en un prompt. - - Args: - context: Contexto de seguridad OWASP - - Returns: - str: Texto formateado para el prompt de IA - """ - return f""" -=== CONTEXTO DE SEGURIDAD (OWASP) === -Categoría: {context.category} - -Descripción: -{context.description} - -Impacto Potencial: -{context.impact} - -Estrategias de Mitigación: -{context.mitigation} - -Referencias: -{chr(10).join(f"- {ref}" for ref in context.references)} - -CWEs Relacionados: {", ".join(context.cwe_ids)} -=================================== -""" +""" +Configuración del Protocolo de Contexto de Modelo (MCP). + +Contiene el diccionario embebido OWASP Top 10 con descripciones +de vulnerabilidades y remediaciones para enriquecer los prompts +enviados a la IA generativa. +""" + +from dataclasses import dataclass +from typing import Dict, List, Optional + + +@dataclass +class SecurityContext: + """ + Contexto de seguridad para una categoría de vulnerabilidad. + + Attributes: + category: Categoría OWASP (ej: "A03:2021 - Injection") + description: Descripción de la vulnerabilidad + impact: Impacto potencial en el sistema + mitigation: Estrategias de mitigación genéricas + references: URLs de documentación oficial + cwe_ids: IDs de CWE relacionados + """ + + category: str + description: str + impact: str + mitigation: str + references: List[str] + cwe_ids: List[str] + + +# ============================================================================= +# Diccionario OWASP Top 10 (2021) +# ============================================================================= + +OWASP_TOP_10: Dict[str, SecurityContext] = { + # A01:2021 - Broken Access Control + "broken_access_control": SecurityContext( + category="A01:2021 - Broken Access Control", + description=( + "Las restricciones sobre lo que los usuarios autenticados pueden hacer " + "a menudo no se aplican correctamente. Los atacantes pueden explotar " + "estos fallos para acceder a funcionalidades y/o datos no autorizados." + ), + impact=( + "Acceso no autorizado a datos sensibles, modificación de datos de otros " + "usuarios, escalación de privilegios, o ejecución de acciones administrativas." + ), + mitigation=( + "1. Denegar por defecto, excepto para recursos públicos.\n" + "2. Implementar mecanismos de control de acceso una vez y reutilizarlos.\n" + "3. Hacer cumplir la propiedad de registros (cada usuario solo accede a sus datos).\n" + "4. Deshabilitar listado de directorios del servidor web.\n" + "5. Registrar fallos de control de acceso y alertar a administradores." + ), + references=[ + "https://owasp.org/Top10/A01_2021-Broken_Access_Control/", + "https://cheatsheetseries.owasp.org/cheatsheets/Authorization_Cheat_Sheet.html", + ], + cwe_ids=["CWE-200", "CWE-284", "CWE-285", "CWE-352", "CWE-639"], + ), + # A02:2021 - Cryptographic Failures + "cryptographic_failures": SecurityContext( + category="A02:2021 - Cryptographic Failures", + description=( + "Antes conocido como 'Exposición de Datos Sensibles'. Se centra en " + "fallos relacionados con la criptografía que a menudo conducen a la " + "exposición de datos sensibles." + ), + impact=( + "Exposición de credenciales, tokens, datos personales (PII), datos " + "financieros, o información médica. Posible robo de identidad." + ), + mitigation=( + "1. Clasificar los datos procesados, almacenados o transmitidos.\n" + "2. No almacenar datos sensibles innecesariamente.\n" + "3. Cifrar todos los datos sensibles en reposo con algoritmos fuertes.\n" + "4. Usar protocolos actualizados (TLS 1.3) para datos en tránsito.\n" + "5. No usar algoritmos criptográficos obsoletos (MD5, SHA1, DES)." + ), + references=[ + "https://owasp.org/Top10/A02_2021-Cryptographic_Failures/", + "https://cheatsheetseries.owasp.org/cheatsheets/Cryptographic_Storage_Cheat_Sheet.html", + ], + cwe_ids=["CWE-259", "CWE-327", "CWE-328", "CWE-330", "CWE-331"], + ), + # A03:2021 - Injection + "injection": SecurityContext( + category="A03:2021 - Injection", + description=( + "Una aplicación es vulnerable a ataques de inyección cuando datos " + "suministrados por el usuario no son validados, filtrados o sanitizados. " + "Incluye SQL, NoSQL, OS Command, LDAP, XPath y ORM injection." + ), + impact=( + "Pérdida de datos, corrupción de datos, divulgación a partes no autorizadas, " + "pérdida de responsabilidad, denegación de acceso, o toma completa del host." + ), + mitigation=( + "1. Usar APIs seguras que eviten el uso del intérprete (consultas parametrizadas).\n" + "2. Usar validación de entrada positiva del lado del servidor.\n" + "3. Escapar caracteres especiales usando la sintaxis de escape específica.\n" + "4. Usar LIMIT y otros controles SQL para prevenir divulgación masiva.\n" + "5. No concatenar cadenas con datos del usuario en consultas dinámicas." + ), + references=[ + "https://owasp.org/Top10/A03_2021-Injection/", + "https://cheatsheetseries.owasp.org/cheatsheets/" + "SQL_Injection_Prevention_Cheat_Sheet.html", + "https://cheatsheetseries.owasp.org/cheatsheets/" + "Query_Parameterization_Cheat_Sheet.html", + ], + cwe_ids=["CWE-77", "CWE-78", "CWE-79", "CWE-89", "CWE-94"], + ), + # A04:2021 - Insecure Design + "insecure_design": SecurityContext( + category="A04:2021 - Insecure Design", + description=( + "Una nueva categoría que se centra en los riesgos relacionados con " + "defectos de diseño. La diferencia con una implementación insegura es " + "que un diseño perfecto aún puede tener defectos de implementación." + ), + impact=( + "Vulnerabilidades sistémicas que no pueden ser corregidas solo con código. " + "Exposición de lógica de negocio, flujos de trabajo inseguros." + ), + mitigation=( + "1. Establecer y usar un ciclo de desarrollo seguro con profesionales de AppSec.\n" + "2. Usar bibliotecas de patrones de diseño seguro.\n" + "3. Usar modelado de amenazas para autenticación crítica y control de acceso.\n" + "4. Integrar controles de seguridad en las historias de usuario.\n" + "5. Escribir pruebas unitarias y de integración para validar flujos críticos." + ), + references=[ + "https://owasp.org/Top10/A04_2021-Insecure_Design/", + "https://cheatsheetseries.owasp.org/cheatsheets/Threat_Modeling_Cheat_Sheet.html", + ], + cwe_ids=["CWE-209", "CWE-256", "CWE-501", "CWE-522"], + ), + # A05:2021 - Security Misconfiguration + "security_misconfiguration": SecurityContext( + category="A05:2021 - Security Misconfiguration", + description=( + "La aplicación puede ser vulnerable si no está correctamente " + "endurecida o tiene permisos mal configurados, características " + "innecesarias habilitadas, o mensajes de error detallados." + ), + impact=( + "Acceso no autorizado a datos o funcionalidad del sistema. " + "Posible compromiso completo del sistema." + ), + mitigation=( + "1. Proceso de endurecimiento repetible y automatizado.\n" + "2. Plataforma mínima sin características, componentes o documentación innecesarios.\n" + "3. Revisar y actualizar configuraciones según avisos de seguridad.\n" + "4. Arquitectura de aplicación segmentada con contenedores.\n" + "5. Enviar directivas de seguridad a clientes (CSP, X-Frame-Options)." + ), + references=[ + "https://owasp.org/Top10/A05_2021-Security_Misconfiguration/", + "https://cheatsheetseries.owasp.org/cheatsheets/" + "Configuration_Security_Cheat_Sheet.html", + ], + cwe_ids=["CWE-16", "CWE-611", "CWE-1004", "CWE-2"], + ), + # A06:2021 - Vulnerable and Outdated Components + "vulnerable_components": SecurityContext( + category="A06:2021 - Vulnerable and Outdated Components", + description=( + "Usar componentes con vulnerabilidades conocidas. Esto incluye " + "bibliotecas, frameworks, y otros módulos de software que se ejecutan " + "con los mismos privilegios que la aplicación." + ), + impact=( + "Desde ataques menores hasta toma completa del servidor, dependiendo " + "de la vulnerabilidad del componente." + ), + mitigation=( + "1. Eliminar dependencias no utilizadas, características y componentes innecesarios.\n" + "2. Inventariar versiones de componentes cliente y servidor continuamente.\n" + "3. Monitorear fuentes como CVE y NVD para vulnerabilidades.\n" + "4. Obtener componentes solo de fuentes oficiales sobre enlaces seguros.\n" + "5. Monitorear bibliotecas y componentes sin mantenimiento." + ), + references=[ + "https://owasp.org/Top10/" "A06_2021-Vulnerable_and_Outdated_Components/", + "https://cheatsheetseries.owasp.org/cheatsheets/" + "Vulnerable_Dependency_Management_Cheat_Sheet.html", + ], + cwe_ids=["CWE-1104"], + ), + # A07:2021 - Identification and Authentication Failures + "authentication_failures": SecurityContext( + category="A07:2021 - Identification and Authentication Failures", + description=( + "Confirmación de la identidad del usuario, autenticación y gestión " + "de sesiones es crítica. La aplicación es vulnerable si permite " + "ataques automatizados, contraseñas débiles, o sesiones mal gestionadas." + ), + impact=( + "Compromiso de cuentas de usuario, robo de identidad, acceso " + "no autorizado a datos sensibles o funcionalidad administrativa." + ), + mitigation=( + "1. Implementar autenticación multifactor donde sea posible.\n" + "2. No desplegar con credenciales por defecto, especialmente admin.\n" + "3. Implementar verificaciones de contraseñas débiles.\n" + "4. Limitar o retrasar cada vez más los intentos de login fallidos.\n" + "5. Usar un gestor de sesiones seguro del lado del servidor con alta entropía." + ), + references=[ + "https://owasp.org/Top10/A07_2021-Identification_and_Authentication_Failures/", + "https://cheatsheetseries.owasp.org/cheatsheets/Authentication_Cheat_Sheet.html", + "https://cheatsheetseries.owasp.org/cheatsheets/Session_Management_Cheat_Sheet.html", + ], + cwe_ids=["CWE-287", "CWE-384", "CWE-307", "CWE-613"], + ), + # A08:2021 - Software and Data Integrity Failures + "integrity_failures": SecurityContext( + category="A08:2021 - Software and Data Integrity Failures", + description=( + "Se relaciona con código e infraestructura que no protege contra " + "violaciones de integridad. Incluye actualizaciones de software " + "inseguras, pipelines CI/CD inseguros, y deserialización insegura." + ), + impact=( + "Ejecución remota de código, ataques a la cadena de suministro, " + "modificación de datos sin autorización." + ), + mitigation=( + "1. Usar firmas digitales para verificar que el software " + "proviene de la fuente esperada.\n" + "2. Asegurar que las bibliotecas y dependencias usan " + "repositorios de confianza.\n" + "3. Usar herramientas de análisis de composición de software (SCA).\n" + "4. Asegurar que el pipeline CI/CD tiene segregación apropiada " + "y control de acceso.\n" + "5. No enviar datos serializados sin firmar o sin cifrar " + "a clientes no confiables." + ), + references=[ + "https://owasp.org/Top10/" "A08_2021-Software_and_Data_Integrity_Failures/", + "https://cheatsheetseries.owasp.org/cheatsheets/" "Deserialization_Cheat_Sheet.html", + ], + cwe_ids=["CWE-829", "CWE-494", "CWE-502"], + ), + # A09:2021 - Security Logging and Monitoring Failures + "logging_failures": SecurityContext( + category="A09:2021 - Security Logging and Monitoring Failures", + description=( + "Sin registro y monitoreo suficiente, los ataques no pueden ser " + "detectados. Incluye no registrar eventos auditables, no generar " + "alertas adecuadas, o no tener un plan de respuesta a incidentes." + ), + impact=( + "Los atacantes pueden mantener persistencia, pivotar a más sistemas, " + "manipular, extraer o destruir datos sin ser detectados." + ), + mitigation=( + "1. Asegurar que todos los fallos de login, control de acceso " + "y validación de entrada del servidor se registran " + "con contexto suficiente.\n" + "2. Asegurar que los logs se generan en formato que las " + "soluciones de gestión de logs puedan consumir fácilmente.\n" + "3. Asegurar que los datos de log se codifican correctamente " + "para prevenir inyecciones.\n" + "4. Establecer monitoreo y alertas efectivos " + "para actividades sospechosas.\n" + "5. Establecer un plan de respuesta y recuperación de incidentes." + ), + references=[ + "https://owasp.org/Top10/" "A09_2021-Security_Logging_and_Monitoring_Failures/", + "https://cheatsheetseries.owasp.org/cheatsheets/Logging_Cheat_Sheet.html", + ], + cwe_ids=["CWE-117", "CWE-223", "CWE-532", "CWE-778"], + ), + # A10:2021 - Server-Side Request Forgery (SSRF) + "ssrf": SecurityContext( + category="A10:2021 - Server-Side Request Forgery (SSRF)", + description=( + "SSRF ocurre cuando una aplicación web obtiene un recurso remoto " + "sin validar la URL suministrada por el usuario. Permite a un atacante " + "forzar a la aplicación a enviar una solicitud crafteada a un destino inesperado." + ), + impact=( + "Escaneo de puertos internos, acceso a servicios internos, lectura de " + "metadatos de servicios en la nube, o ejecución remota de código." + ), + mitigation=( + "1. Segmentar la funcionalidad de acceso a recursos remotos en redes separadas.\n" + "2. Hacer cumplir políticas de firewall 'deny by default'.\n" + "3. Sanitizar y validar todos los datos de entrada suministrados por el cliente.\n" + "4. No enviar respuestas raw al cliente.\n" + "5. Deshabilitar redirecciones HTTP y usar listas de permitidos para URL." + ), + references=[ + "https://owasp.org/Top10/" "A10_2021-Server-Side_Request_Forgery_%28SSRF%29/", + "https://cheatsheetseries.owasp.org/cheatsheets/" + "Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html", + ], + cwe_ids=["CWE-918"], + ), +} + + +# ============================================================================= +# Mapeo de reglas de CodeGuard a categorías OWASP +# ============================================================================= + +RULE_TO_OWASP_MAPPING: Dict[str, str] = { + # SecurityAgent - Dangerous Functions + "SEC001_EVAL": "injection", + "SEC001_EXEC": "injection", + "SEC001_COMPILE": "injection", + "SEC001___IMPORT__": "injection", + "SEC001_EXECFILE": "injection", + "SEC001_PICKLE": "integrity_failures", + # SecurityAgent - SQL Injection + "SEC002_SQL_INJECTION": "injection", + # SecurityAgent - Hardcoded Credentials + "SEC003_PASSWORD": "cryptographic_failures", + "SEC003_API_KEY": "cryptographic_failures", + "SEC003_SECRET_KEY": "cryptographic_failures", + "SEC003_TOKEN": "cryptographic_failures", + "SEC003_ACCESS_KEY": "cryptographic_failures", + # SecurityAgent - Weak Cryptography + "SEC004_MD5": "cryptographic_failures", + "SEC004_SHA1": "cryptographic_failures", + "SEC004_WEAK_ENCRYPTION": "cryptographic_failures", + # Common patterns by issue_type + "dangerous_function": "injection", + "sql_injection": "injection", + "hardcoded_credentials": "cryptographic_failures", + "weak_cryptography": "cryptographic_failures", + "insecure_deserialization": "integrity_failures", + "path_traversal": "broken_access_control", + "ssrf": "ssrf", + "xss": "injection", + "command_injection": "injection", + "ldap_injection": "injection", + "xpath_injection": "injection", +} + + +# ============================================================================= +# Funciones de utilidad +# ============================================================================= + + +def get_security_context( + rule_id: Optional[str] = None, + issue_type: Optional[str] = None, +) -> Optional[SecurityContext]: + """ + Obtiene el contexto de seguridad OWASP para una regla o tipo de issue. + + Args: + rule_id: ID de la regla (ej: "SEC001_EVAL") + issue_type: Tipo de issue (ej: "sql_injection") + + Returns: + SecurityContext si se encuentra mapeo, None en caso contrario + """ + # Primero intentar con rule_id + if rule_id: + owasp_key = RULE_TO_OWASP_MAPPING.get(rule_id) + if owasp_key: + return OWASP_TOP_10.get(owasp_key) + + # Luego intentar con issue_type + if issue_type: + # Normalizar issue_type (convertir espacios/guiones a underscore) + normalized = issue_type.lower().replace("-", "_").replace(" ", "_") + owasp_key = RULE_TO_OWASP_MAPPING.get(normalized) + if owasp_key: + return OWASP_TOP_10.get(owasp_key) + + # Buscar coincidencia parcial + for key, owasp_category in RULE_TO_OWASP_MAPPING.items(): + if key in normalized or normalized in key: + return OWASP_TOP_10.get(owasp_category) + + return None + + +def format_security_context(context: SecurityContext) -> str: + """ + Formatea el contexto de seguridad para incluirlo en un prompt. + + Args: + context: Contexto de seguridad OWASP + + Returns: + str: Texto formateado para el prompt de IA + """ + return f""" +=== CONTEXTO DE SEGURIDAD (OWASP) === +Categoría: {context.category} + +Descripción: +{context.description} + +Impacto Potencial: +{context.impact} + +Estrategias de Mitigación: +{context.mitigation} + +Referencias: +{chr(10).join(f"- {ref}" for ref in context.references)} + +CWEs Relacionados: {", ".join(context.cwe_ids)} +=================================== +""" diff --git a/backend/src/core/config/settings.py b/backend/src/core/config/settings.py index 9a33a24..8cfc774 100644 --- a/backend/src/core/config/settings.py +++ b/backend/src/core/config/settings.py @@ -1,108 +1,108 @@ -""" -Configuración centralizada para CodeGuard AI. - -Carga variables de entorno usando pydantic-settings. -""" - -from typing import Optional - -from pydantic import Field -from pydantic_settings import BaseSettings, SettingsConfigDict - - -class Settings(BaseSettings): - """ - Configuración de la aplicación cargada desde variables de entorno. - - Attributes: - CLERK_SECRET_KEY: Clave secreta de Clerk para validar JWT - CLERK_PUBLISHABLE_KEY: Clave pública de Clerk - DATABASE_URL: URL de conexión a PostgreSQL/Supabase - ENVIRONMENT: Entorno de ejecución (development/production) - DEBUG: Modo debug - """ - - # Clerk Authentication - CLERK_SECRET_KEY: Optional[str] = None - CLERK_PUBLISHABLE_KEY: str - CLERK_JWKS_URL: Optional[str] = Field( - default=None, description="URL del endpoint JWKS de Clerk para validar tokens RS256" - ) - CLERK_JWT_SIGNING_KEY: Optional[str] = Field( - default=None, description="Signing Key para validar Custom JWT Templates (HS256)" - ) - - # Database - DATABASE_URL: str - - # Application - ENVIRONMENT: str = "development" - DEBUG: bool = True - APP_NAME: str = "CodeGuard AI" - APP_VERSION: str = "1.0.0" - - # API - API_HOST: str = "0.0.0.0" - API_PORT: int = 8000 - - # CORS - ALLOWED_ORIGINS: str = "http://localhost:3000,http://localhost:5173" - - # Redis (opcional) - REDIS_URL: Optional[str] = None - REDIS_PASSWORD: Optional[str] = None - - # ========================================== - # AI Services - Vertex AI (Sprint 3) - # ========================================== - - # Google Cloud Platform - GCP_PROJECT_ID: Optional[str] = Field(default=None) - GCP_LOCATION: str = Field(default="us-central1") - GOOGLE_APPLICATION_CREDENTIALS: Optional[str] = Field(default=None) - - # Feature Flag - AI_ENABLED: bool = Field(default=True) - - # Model Selection - AI_MODEL_DEV: str = Field(default="gemini-1.5-flash-001") - AI_MODEL_PROD: str = Field(default="gemini-1.5-pro-001") - - # Model Parameters - AI_TEMPERATURE: float = Field(default=0.3, ge=0.0, le=1.0) - AI_MAX_OUTPUT_TOKENS: int = Field(default=2048, ge=100, le=8192) - - # Rate Limiting - AI_RATE_LIMIT_PER_HOUR: int = Field(default=10, ge=1) - - # Retry Configuration - AI_MAX_RETRIES: int = Field(default=3, ge=1, le=10) - AI_BACKOFF_FACTOR: float = Field(default=2.0, ge=1.0, le=5.0) - AI_INITIAL_BACKOFF: float = Field(default=1.0, ge=0.5, le=10.0) - - model_config = SettingsConfigDict( - env_file=".env", - env_file_encoding="utf-8", - extra="ignore", - ) - - @property - def allowed_origins_list(self) -> list[str]: - """Retorna lista de orígenes permitidos para CORS.""" - return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")] - - @property - def ai_model_name(self) -> str: - """Selecciona el modelo según el entorno.""" - if self.ENVIRONMENT == "production": - return self.AI_MODEL_PROD - return self.AI_MODEL_DEV - - @property - def is_ai_configured(self) -> bool: - """Verifica si la IA está configurada correctamente.""" - return bool(self.AI_ENABLED and self.GCP_PROJECT_ID and self.GOOGLE_APPLICATION_CREDENTIALS) - - -# Singleton de configuración -settings = Settings() +""" +Configuración centralizada para CodeGuard AI. + +Carga variables de entorno usando pydantic-settings. +""" + +from typing import Optional + +from pydantic import Field +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class Settings(BaseSettings): + """ + Configuración de la aplicación cargada desde variables de entorno. + + Attributes: + CLERK_SECRET_KEY: Clave secreta de Clerk para validar JWT + CLERK_PUBLISHABLE_KEY: Clave pública de Clerk + DATABASE_URL: URL de conexión a PostgreSQL/Supabase + ENVIRONMENT: Entorno de ejecución (development/production) + DEBUG: Modo debug + """ + + # Clerk Authentication + CLERK_SECRET_KEY: Optional[str] = None + CLERK_PUBLISHABLE_KEY: str + CLERK_JWKS_URL: Optional[str] = Field( + default=None, description="URL del endpoint JWKS de Clerk para validar tokens RS256" + ) + CLERK_JWT_SIGNING_KEY: Optional[str] = Field( + default=None, description="Signing Key para validar Custom JWT Templates (HS256)" + ) + + # Database + DATABASE_URL: str + + # Application + ENVIRONMENT: str = "development" + DEBUG: bool = True + APP_NAME: str = "CodeGuard AI" + APP_VERSION: str = "1.0.0" + + # API + API_HOST: str = "0.0.0.0" + API_PORT: int = 8000 + + # CORS + ALLOWED_ORIGINS: str = "http://localhost:3000,http://localhost:5173" + + # Redis (opcional) + REDIS_URL: Optional[str] = None + REDIS_PASSWORD: Optional[str] = None + + # ========================================== + # AI Services - Vertex AI (Sprint 3) + # ========================================== + + # Google Cloud Platform + GCP_PROJECT_ID: Optional[str] = Field(default=None) + GCP_LOCATION: str = Field(default="us-central1") + GOOGLE_APPLICATION_CREDENTIALS: Optional[str] = Field(default=None) + + # Feature Flag + AI_ENABLED: bool = Field(default=True) + + # Model Selection + AI_MODEL_DEV: str = Field(default="gemini-1.5-flash-001") + AI_MODEL_PROD: str = Field(default="gemini-1.5-pro-001") + + # Model Parameters + AI_TEMPERATURE: float = Field(default=0.3, ge=0.0, le=1.0) + AI_MAX_OUTPUT_TOKENS: int = Field(default=2048, ge=100, le=8192) + + # Rate Limiting + AI_RATE_LIMIT_PER_HOUR: int = Field(default=10, ge=1) + + # Retry Configuration + AI_MAX_RETRIES: int = Field(default=3, ge=1, le=10) + AI_BACKOFF_FACTOR: float = Field(default=2.0, ge=1.0, le=5.0) + AI_INITIAL_BACKOFF: float = Field(default=1.0, ge=0.5, le=10.0) + + model_config = SettingsConfigDict( + env_file=".env", + env_file_encoding="utf-8", + extra="ignore", + ) + + @property + def allowed_origins_list(self) -> list[str]: + """Retorna lista de orígenes permitidos para CORS.""" + return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")] + + @property + def ai_model_name(self) -> str: + """Selecciona el modelo según el entorno.""" + if self.ENVIRONMENT == "production": + return self.AI_MODEL_PROD + return self.AI_MODEL_DEV + + @property + def is_ai_configured(self) -> bool: + """Verifica si la IA está configurada correctamente.""" + return bool(self.AI_ENABLED and self.GCP_PROJECT_ID and self.GOOGLE_APPLICATION_CREDENTIALS) + + +# Singleton de configuración +settings = Settings() diff --git a/backend/src/core/database.py b/backend/src/core/database.py index 9e238cb..d80a0bd 100644 --- a/backend/src/core/database.py +++ b/backend/src/core/database.py @@ -1,39 +1,39 @@ -""" -Database configuration for CodeGuard AI -""" - -import os -from typing import Generator - -from dotenv import load_dotenv -from sqlalchemy import create_engine -from sqlalchemy.orm import Session, sessionmaker - -load_dotenv() - -DATABASE_URL = os.getenv( - "DATABASE_URL", - "postgresql://postgres:postgres@localhost:5432/codeguard", -) - -engine = create_engine(DATABASE_URL, pool_pre_ping=True) -SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) - - -def get_db() -> Generator[Session, None, None]: - """ - Dependencia de FastAPI para obtener sesión de base de datos. - - Yields: - Session: Sesión de SQLAlchemy. - - Example: - @app.get("/users") - def get_users(db: Session = Depends(get_db)): - return db.query(User).all() - """ - db = SessionLocal() - try: - yield db - finally: - db.close() +""" +Database configuration for CodeGuard AI +""" + +import os +from typing import Generator + +from dotenv import load_dotenv +from sqlalchemy import create_engine +from sqlalchemy.orm import Session, sessionmaker + +load_dotenv() + +DATABASE_URL = os.getenv( + "DATABASE_URL", + "postgresql://postgres:postgres@localhost:5432/codeguard", +) + +engine = create_engine(DATABASE_URL, pool_pre_ping=True) +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + + +def get_db() -> Generator[Session, None, None]: + """ + Dependencia de FastAPI para obtener sesión de base de datos. + + Yields: + Session: Sesión de SQLAlchemy. + + Example: + @app.get("/users") + def get_users(db: Session = Depends(get_db)): + return db.query(User).all() + """ + db = SessionLocal() + try: + yield db + finally: + db.close() diff --git a/backend/src/core/dependencies/get_db.py b/backend/src/core/dependencies/get_db.py index 8f6f97c..4b9cb55 100644 --- a/backend/src/core/dependencies/get_db.py +++ b/backend/src/core/dependencies/get_db.py @@ -1,29 +1,29 @@ -""" -Dependencia para obtener sesión de base de datos. -""" - -from typing import Generator - -from sqlalchemy.orm import Session - -from src.core.database import SessionLocal - - -def get_db() -> Generator[Session, None, None]: - """ - Crea una sesión de base de datos por request y la cierra al finalizar. - - Yields: - Session: Sesión de SQLAlchemy para operaciones de base de datos. - - Example: - @router.post("/items") - def create_item(db: Session = Depends(get_db)): - # usar db aquí - pass - """ - db = SessionLocal() - try: - yield db - finally: - db.close() +""" +Dependencia para obtener sesión de base de datos. +""" + +from typing import Generator + +from sqlalchemy.orm import Session + +from src.core.database import SessionLocal + + +def get_db() -> Generator[Session, None, None]: + """ + Crea una sesión de base de datos por request y la cierra al finalizar. + + Yields: + Session: Sesión de SQLAlchemy para operaciones de base de datos. + + Example: + @router.post("/items") + def create_item(db: Session = Depends(get_db)): + # usar db aquí + pass + """ + db = SessionLocal() + try: + yield db + finally: + db.close() diff --git a/backend/src/core/events/analysis_events.py b/backend/src/core/events/analysis_events.py index 3d422e5..1f8e54c 100644 --- a/backend/src/core/events/analysis_events.py +++ b/backend/src/core/events/analysis_events.py @@ -1,21 +1,21 @@ -""" -Definición de eventos del dominio de análisis. -Ubicación: Core/Events (Shared Kernel). -""" - -from enum import Enum - - -class AnalysisEventType(str, Enum): - """ - Enumeración de tipos de eventos para el ciclo de vida del análisis. - """ - - ANALYSIS_STARTED = "analysis_started" - ANALYSIS_COMPLETED = "analysis_completed" - ANALYSIS_FAILED = "analysis_failed" - - AGENT_STARTED = "agent_started" - AGENT_COMPLETED = "agent_completed" - AGENT_FAILED = "agent_failed" - AGENT_TIMEOUT = "agent_timeout" +""" +Definición de eventos del dominio de análisis. +Ubicación: Core/Events (Shared Kernel). +""" + +from enum import Enum + + +class AnalysisEventType(str, Enum): + """ + Enumeración de tipos de eventos para el ciclo de vida del análisis. + """ + + ANALYSIS_STARTED = "analysis_started" + ANALYSIS_COMPLETED = "analysis_completed" + ANALYSIS_FAILED = "analysis_failed" + + AGENT_STARTED = "agent_started" + AGENT_COMPLETED = "agent_completed" + AGENT_FAILED = "agent_failed" + AGENT_TIMEOUT = "agent_timeout" diff --git a/backend/src/core/events/observers.py b/backend/src/core/events/observers.py index 5f538be..181c4b4 100644 --- a/backend/src/core/events/observers.py +++ b/backend/src/core/events/observers.py @@ -1,22 +1,22 @@ -""" -Definición de interfaces para el patrón Observer. -""" - -from abc import ABC, abstractmethod -from typing import Any, Dict - - -class EventObserver(ABC): - """ - Interfaz base para cualquier observador que desee suscribirse al EventBus. - """ - - @abstractmethod - async def on_event(self, event: Dict[str, Any]) -> None: - """ - Método invocado cuando ocurre un evento. - - Args: - event: Diccionario con los datos del evento (tipo, timestamp, payload). - """ - pass +""" +Definición de interfaces para el patrón Observer. +""" + +from abc import ABC, abstractmethod +from typing import Any, Dict + + +class EventObserver(ABC): + """ + Interfaz base para cualquier observador que desee suscribirse al EventBus. + """ + + @abstractmethod + async def on_event(self, event: Dict[str, Any]) -> None: + """ + Método invocado cuando ocurre un evento. + + Args: + event: Diccionario con los datos del evento (tipo, timestamp, payload). + """ + pass diff --git a/backend/src/external/clerk_client.py b/backend/src/external/clerk_client.py index f7447f2..037da93 100644 --- a/backend/src/external/clerk_client.py +++ b/backend/src/external/clerk_client.py @@ -1,325 +1,325 @@ -""" -Cliente externo para validación de tokens JWT de Clerk. - -Soporta dos tipos de tokens: -1. Session Tokens (RS256 con JWKS) - Tokens estándar de Clerk -2. Custom JWT Templates (HS256 con secret key) - Para integraciones de terceros - -El cliente detecta automáticamente el algoritmo del token y usa la -validación correspondiente. -""" - -from typing import Any, Dict, Optional - -import httpx -from jose import ExpiredSignatureError, JWTError, jwk, jwt - -from src.core.config.settings import settings - - -class ClerkTokenError(Exception): - """Error base para problemas con tokens de Clerk.""" - - pass - - -class ClerkTokenExpiredError(ClerkTokenError): - """Token JWT expirado.""" - - pass - - -class ClerkTokenInvalidError(ClerkTokenError): - """Token JWT inválido o malformado.""" - - pass - - -class ClerkClient: - """ - Cliente para validar tokens JWT emitidos por Clerk. - - Detecta automáticamente el tipo de token: - - RS256: Session tokens estándar (valida con JWKS) - - HS256: Custom JWT templates (valida con secret key) - - Referencias: - - Session Tokens: https://clerk.com/docs/guides/sessions/session-tokens - - JWT Templates: https://clerk.com/docs/guides/sessions/jwt-templates - """ - - # Cache de JWKS para evitar requests en cada validación - _jwks_cache: Optional[Dict[str, Any]] = None - - def __init__(self): - """ - Inicializa el cliente con la configuración de Clerk. - - Requiere al menos uno de: - - CLERK_JWKS_URL: Para validar session tokens (RS256) - - CLERK_JWT_SIGNING_KEY: Para validar custom JWT templates (HS256) - """ - self._jwks_url = settings.CLERK_JWKS_URL - # Para HS256, priorizar JWT_SIGNING_KEY sobre SECRET_KEY - self._signing_key = settings.CLERK_JWT_SIGNING_KEY or settings.CLERK_SECRET_KEY - - if not self._jwks_url and not self._signing_key: - raise ValueError( - "Se requiere CLERK_JWKS_URL o CLERK_JWT_SIGNING_KEY. " - "Configura al menos una de estas variables de entorno." - ) - - def _get_token_algorithm(self, token: str) -> str: - """ - Extrae el algoritmo del header del token. - - Args: - token: Token JWT. - - Returns: - Algoritmo (ej: "HS256", "RS256"). - - Raises: - ClerkTokenInvalidError: Si no se puede leer el header. - """ - try: - unverified_header = jwt.get_unverified_header(token) - alg = unverified_header.get("alg") - - if not alg: - raise ClerkTokenInvalidError("Token no contiene algoritmo en el header") - - return alg - - except JWTError as e: - raise ClerkTokenInvalidError(f"Error al leer header del token: {e}") from e - - def _fetch_jwks(self) -> Dict[str, Any]: - """ - Obtiene las claves públicas del endpoint JWKS de Clerk. - - Returns: - Dict con las claves JWKS en formato JWK. - - Raises: - ClerkTokenInvalidError: Si no se puede obtener el JWKS. - """ - if ClerkClient._jwks_cache is not None: - return ClerkClient._jwks_cache - - if not self._jwks_url: - raise ClerkTokenInvalidError( - "CLERK_JWKS_URL no configurado. " "Requerido para validar tokens RS256." - ) - - try: - response = httpx.get(self._jwks_url, timeout=10.0) - response.raise_for_status() - jwks_data = response.json() - - # Validar que tenga la estructura esperada - if "keys" not in jwks_data or not isinstance(jwks_data["keys"], list): - raise ClerkTokenInvalidError("Respuesta JWKS inválida: falta campo 'keys'") - - ClerkClient._jwks_cache = jwks_data - return ClerkClient._jwks_cache - - except httpx.HTTPError as e: - raise ClerkTokenInvalidError(f"Error al obtener JWKS de {self._jwks_url}: {e}") from e - - def _get_public_key(self, token: str): - """ - Obtiene la clave pública RSA correcta para verificar el token RS256. - - Args: - token: Token JWT para extraer el kid del header. - - Returns: - Clave pública RSA construida desde JWKS. - - Raises: - ClerkTokenInvalidError: Si no se encuentra la clave o el kid. - """ - try: - # Obtener kid del header del token (sin verificar aún) - unverified_header = jwt.get_unverified_header(token) - kid = unverified_header.get("kid") - - if not kid: - raise ClerkTokenInvalidError("Token RS256 no contiene 'kid' en el header") - - # Buscar la clave en JWKS - jwks_data = self._fetch_jwks() - - for key_data in jwks_data.get("keys", []): - if key_data.get("kid") == kid: - return jwk.construct(key_data) - - # Si no se encuentra, invalidar cache y reintentar una vez - ClerkClient._jwks_cache = None - jwks_data = self._fetch_jwks() - - for key_data in jwks_data.get("keys", []): - if key_data.get("kid") == kid: - return jwk.construct(key_data) - - raise ClerkTokenInvalidError(f"No se encontró clave pública con kid '{kid}' en JWKS") - - except JWTError as e: - raise ClerkTokenInvalidError(f"Error al extraer header del token: {e}") from e - - def _verify_rs256_token(self, token: str) -> Dict[str, Any]: - """ - Verifica un token RS256 (Session Token estándar de Clerk). - - Args: - token: Token JWT con algoritmo RS256. - - Returns: - Payload decodificado del token. - - Raises: - ClerkTokenExpiredError: Si el token expiró. - ClerkTokenInvalidError: Si el token es inválido. - """ - public_key = self._get_public_key(token) - - return jwt.decode( - token, - public_key, - algorithms=["RS256"], - options={ - "verify_signature": True, - "verify_exp": True, - "verify_nbf": True, - "verify_iat": True, - "verify_aud": False, # Clerk no siempre usa aud - "verify_iss": False, # ISS varía según instancia - }, - ) - - def _verify_hs256_token(self, token: str) -> Dict[str, Any]: - """ - Verifica un token HS256 (Custom JWT Template de Clerk). - - Args: - token: Token JWT con algoritmo HS256. - - Returns: - Payload decodificado del token. - - Raises: - ClerkTokenExpiredError: Si el token expiró. - ClerkTokenInvalidError: Si el token es inválido. - """ - if not self._signing_key: - raise ClerkTokenInvalidError( - "CLERK_JWT_SIGNING_KEY no configurado. " - "Requerido para validar tokens HS256 (JWT Templates)." - ) - - return jwt.decode( - token, - self._signing_key, - algorithms=["HS256"], - options={ - "verify_signature": True, - "verify_exp": True, - "verify_nbf": True, - "verify_iat": True, - "verify_aud": False, - "verify_iss": False, - }, - ) - - def verify_token(self, token: str) -> Dict[str, Any]: - """ - Valida un token JWT de Clerk y retorna el payload completo. - - Detecta automáticamente el algoritmo del token: - - RS256: Session token estándar (valida con JWKS) - - HS256: Custom JWT template (valida con secret key) - - Args: - token: Token JWT a validar. - - Returns: - Dict con el payload completo del JWT, incluyendo: - - sub: User ID (subject) - - email: Email del usuario - - name: Nombre del usuario - - role: Rol del usuario (si está configurado) - - exp, iat, nbf: Timestamps - - iss, jti: Emisor e identificador - - public_metadata, user_metadata, app_metadata: Metadatos - - Raises: - ClerkTokenExpiredError: Si el token ha expirado (exp < now). - ClerkTokenInvalidError: Si el token es inválido, malformado, - o no se puede validar. - """ - try: - # Detectar algoritmo del token - algorithm = self._get_token_algorithm(token) - - # Validar según el algoritmo - if algorithm == "RS256": - payload = self._verify_rs256_token(token) - elif algorithm == "HS256": - payload = self._verify_hs256_token(token) - else: - raise ClerkTokenInvalidError( - f"Algoritmo no soportado: {algorithm}. " - "Clerk usa RS256 (session tokens) o HS256 (JWT templates)." - ) - - return payload - - except ExpiredSignatureError as e: - raise ClerkTokenExpiredError( - "El token ha expirado. El usuario debe iniciar sesión nuevamente." - ) from e - - except ClerkTokenExpiredError: - # Re-raise para mantener el tipo de excepción - raise - - except ClerkTokenInvalidError: - # Re-raise para mantener el tipo de excepción - raise - - except JWTError as e: - raise ClerkTokenInvalidError(f"Token inválido o malformado: {e}") from e - - def get_user_id_from_token(self, token: str) -> str: - """ - Extrae solo el user_id del token. - - Args: - token: Token JWT. - - Returns: - User ID (claim 'sub'). - - Raises: - ClerkTokenExpiredError: Si el token ha expirado. - ClerkTokenInvalidError: Si el token es inválido o no tiene 'sub'. - """ - payload = self.verify_token(token) - user_id = payload.get("sub") - - if not user_id: - raise ClerkTokenInvalidError("Token no contiene 'sub' claim. Token inválido de Clerk.") - - return user_id - - @classmethod - def clear_jwks_cache(cls): - """ - Limpia el cache de JWKS. - - Útil para: - - Testing - - Forzar recarga después de rotación de claves - """ - cls._jwks_cache = None +""" +Cliente externo para validación de tokens JWT de Clerk. + +Soporta dos tipos de tokens: +1. Session Tokens (RS256 con JWKS) - Tokens estándar de Clerk +2. Custom JWT Templates (HS256 con secret key) - Para integraciones de terceros + +El cliente detecta automáticamente el algoritmo del token y usa la +validación correspondiente. +""" + +from typing import Any, Dict, Optional + +import httpx +from jose import ExpiredSignatureError, JWTError, jwk, jwt + +from src.core.config.settings import settings + + +class ClerkTokenError(Exception): + """Error base para problemas con tokens de Clerk.""" + + pass + + +class ClerkTokenExpiredError(ClerkTokenError): + """Token JWT expirado.""" + + pass + + +class ClerkTokenInvalidError(ClerkTokenError): + """Token JWT inválido o malformado.""" + + pass + + +class ClerkClient: + """ + Cliente para validar tokens JWT emitidos por Clerk. + + Detecta automáticamente el tipo de token: + - RS256: Session tokens estándar (valida con JWKS) + - HS256: Custom JWT templates (valida con secret key) + + Referencias: + - Session Tokens: https://clerk.com/docs/guides/sessions/session-tokens + - JWT Templates: https://clerk.com/docs/guides/sessions/jwt-templates + """ + + # Cache de JWKS para evitar requests en cada validación + _jwks_cache: Optional[Dict[str, Any]] = None + + def __init__(self): + """ + Inicializa el cliente con la configuración de Clerk. + + Requiere al menos uno de: + - CLERK_JWKS_URL: Para validar session tokens (RS256) + - CLERK_JWT_SIGNING_KEY: Para validar custom JWT templates (HS256) + """ + self._jwks_url = settings.CLERK_JWKS_URL + # Para HS256, priorizar JWT_SIGNING_KEY sobre SECRET_KEY + self._signing_key = settings.CLERK_JWT_SIGNING_KEY or settings.CLERK_SECRET_KEY + + if not self._jwks_url and not self._signing_key: + raise ValueError( + "Se requiere CLERK_JWKS_URL o CLERK_JWT_SIGNING_KEY. " + "Configura al menos una de estas variables de entorno." + ) + + def _get_token_algorithm(self, token: str) -> str: + """ + Extrae el algoritmo del header del token. + + Args: + token: Token JWT. + + Returns: + Algoritmo (ej: "HS256", "RS256"). + + Raises: + ClerkTokenInvalidError: Si no se puede leer el header. + """ + try: + unverified_header = jwt.get_unverified_header(token) + alg = unverified_header.get("alg") + + if not alg: + raise ClerkTokenInvalidError("Token no contiene algoritmo en el header") + + return alg + + except JWTError as e: + raise ClerkTokenInvalidError(f"Error al leer header del token: {e}") from e + + def _fetch_jwks(self) -> Dict[str, Any]: + """ + Obtiene las claves públicas del endpoint JWKS de Clerk. + + Returns: + Dict con las claves JWKS en formato JWK. + + Raises: + ClerkTokenInvalidError: Si no se puede obtener el JWKS. + """ + if ClerkClient._jwks_cache is not None: + return ClerkClient._jwks_cache + + if not self._jwks_url: + raise ClerkTokenInvalidError( + "CLERK_JWKS_URL no configurado. " "Requerido para validar tokens RS256." + ) + + try: + response = httpx.get(self._jwks_url, timeout=10.0) + response.raise_for_status() + jwks_data = response.json() + + # Validar que tenga la estructura esperada + if "keys" not in jwks_data or not isinstance(jwks_data["keys"], list): + raise ClerkTokenInvalidError("Respuesta JWKS inválida: falta campo 'keys'") + + ClerkClient._jwks_cache = jwks_data + return ClerkClient._jwks_cache + + except httpx.HTTPError as e: + raise ClerkTokenInvalidError(f"Error al obtener JWKS de {self._jwks_url}: {e}") from e + + def _get_public_key(self, token: str): + """ + Obtiene la clave pública RSA correcta para verificar el token RS256. + + Args: + token: Token JWT para extraer el kid del header. + + Returns: + Clave pública RSA construida desde JWKS. + + Raises: + ClerkTokenInvalidError: Si no se encuentra la clave o el kid. + """ + try: + # Obtener kid del header del token (sin verificar aún) + unverified_header = jwt.get_unverified_header(token) + kid = unverified_header.get("kid") + + if not kid: + raise ClerkTokenInvalidError("Token RS256 no contiene 'kid' en el header") + + # Buscar la clave en JWKS + jwks_data = self._fetch_jwks() + + for key_data in jwks_data.get("keys", []): + if key_data.get("kid") == kid: + return jwk.construct(key_data) + + # Si no se encuentra, invalidar cache y reintentar una vez + ClerkClient._jwks_cache = None + jwks_data = self._fetch_jwks() + + for key_data in jwks_data.get("keys", []): + if key_data.get("kid") == kid: + return jwk.construct(key_data) + + raise ClerkTokenInvalidError(f"No se encontró clave pública con kid '{kid}' en JWKS") + + except JWTError as e: + raise ClerkTokenInvalidError(f"Error al extraer header del token: {e}") from e + + def _verify_rs256_token(self, token: str) -> Dict[str, Any]: + """ + Verifica un token RS256 (Session Token estándar de Clerk). + + Args: + token: Token JWT con algoritmo RS256. + + Returns: + Payload decodificado del token. + + Raises: + ClerkTokenExpiredError: Si el token expiró. + ClerkTokenInvalidError: Si el token es inválido. + """ + public_key = self._get_public_key(token) + + return jwt.decode( + token, + public_key, + algorithms=["RS256"], + options={ + "verify_signature": True, + "verify_exp": True, + "verify_nbf": True, + "verify_iat": True, + "verify_aud": False, # Clerk no siempre usa aud + "verify_iss": False, # ISS varía según instancia + }, + ) + + def _verify_hs256_token(self, token: str) -> Dict[str, Any]: + """ + Verifica un token HS256 (Custom JWT Template de Clerk). + + Args: + token: Token JWT con algoritmo HS256. + + Returns: + Payload decodificado del token. + + Raises: + ClerkTokenExpiredError: Si el token expiró. + ClerkTokenInvalidError: Si el token es inválido. + """ + if not self._signing_key: + raise ClerkTokenInvalidError( + "CLERK_JWT_SIGNING_KEY no configurado. " + "Requerido para validar tokens HS256 (JWT Templates)." + ) + + return jwt.decode( + token, + self._signing_key, + algorithms=["HS256"], + options={ + "verify_signature": True, + "verify_exp": True, + "verify_nbf": True, + "verify_iat": True, + "verify_aud": False, + "verify_iss": False, + }, + ) + + def verify_token(self, token: str) -> Dict[str, Any]: + """ + Valida un token JWT de Clerk y retorna el payload completo. + + Detecta automáticamente el algoritmo del token: + - RS256: Session token estándar (valida con JWKS) + - HS256: Custom JWT template (valida con secret key) + + Args: + token: Token JWT a validar. + + Returns: + Dict con el payload completo del JWT, incluyendo: + - sub: User ID (subject) + - email: Email del usuario + - name: Nombre del usuario + - role: Rol del usuario (si está configurado) + - exp, iat, nbf: Timestamps + - iss, jti: Emisor e identificador + - public_metadata, user_metadata, app_metadata: Metadatos + + Raises: + ClerkTokenExpiredError: Si el token ha expirado (exp < now). + ClerkTokenInvalidError: Si el token es inválido, malformado, + o no se puede validar. + """ + try: + # Detectar algoritmo del token + algorithm = self._get_token_algorithm(token) + + # Validar según el algoritmo + if algorithm == "RS256": + payload = self._verify_rs256_token(token) + elif algorithm == "HS256": + payload = self._verify_hs256_token(token) + else: + raise ClerkTokenInvalidError( + f"Algoritmo no soportado: {algorithm}. " + "Clerk usa RS256 (session tokens) o HS256 (JWT templates)." + ) + + return payload + + except ExpiredSignatureError as e: + raise ClerkTokenExpiredError( + "El token ha expirado. El usuario debe iniciar sesión nuevamente." + ) from e + + except ClerkTokenExpiredError: + # Re-raise para mantener el tipo de excepción + raise + + except ClerkTokenInvalidError: + # Re-raise para mantener el tipo de excepción + raise + + except JWTError as e: + raise ClerkTokenInvalidError(f"Token inválido o malformado: {e}") from e + + def get_user_id_from_token(self, token: str) -> str: + """ + Extrae solo el user_id del token. + + Args: + token: Token JWT. + + Returns: + User ID (claim 'sub'). + + Raises: + ClerkTokenExpiredError: Si el token ha expirado. + ClerkTokenInvalidError: Si el token es inválido o no tiene 'sub'. + """ + payload = self.verify_token(token) + user_id = payload.get("sub") + + if not user_id: + raise ClerkTokenInvalidError("Token no contiene 'sub' claim. Token inválido de Clerk.") + + return user_id + + @classmethod + def clear_jwks_cache(cls): + """ + Limpia el cache de JWKS. + + Útil para: + - Testing + - Forzar recarga después de rotación de claves + """ + cls._jwks_cache = None diff --git a/backend/src/external/gemini_client.py b/backend/src/external/gemini_client.py index 989137e..c300d49 100644 --- a/backend/src/external/gemini_client.py +++ b/backend/src/external/gemini_client.py @@ -1,310 +1,310 @@ -""" -Cliente de Google Vertex AI para generación de explicaciones con Gemini. - -Implementa el patrón Adapter para abstraer la comunicación con Vertex AI, -con soporte para exponential backoff en caso de rate limiting. - -Requiere: pip install google-cloud-aiplatform>=1.40.0 -""" - -import asyncio -import logging -from typing import Optional - -import vertexai -from google.api_core import exceptions as google_exceptions -from vertexai.generative_models import GenerationConfig, GenerativeModel - -from src.core.config.ai_config import ai_settings -from src.external.interfaces.ai_client import ( - AIClient, - AIClientError, - AIConnectionError, - AIModelError, - AIRateLimitError, - AIResponse, - AIResponseError, -) - -logger = logging.getLogger("agents.VertexAI") - - -class VertexAIClient(AIClient): - """ - Cliente para Google Vertex AI (Gemini). - - Utiliza autenticación via Service Account configurada en - GOOGLE_APPLICATION_CREDENTIALS. Implementa reintentos - automáticos con exponential backoff para errores transitorios. - - Attributes: - _model: Instancia del modelo generativo - _initialized: Flag indicando si Vertex AI fue inicializado - """ - - def __init__(self): - """ - Inicializa el cliente de Vertex AI. - - La inicialización real se hace de forma lazy en el primer uso - para evitar errores si las credenciales no están configuradas. - """ - self._model: Optional[GenerativeModel] = None - self._initialized: bool = False - self._generation_config: Optional[GenerationConfig] = None - - def _initialize(self) -> None: - """ - Inicializa Vertex AI y carga el modelo. - - Se ejecuta de forma lazy en la primera llamada a generate_explanation. - - Raises: - AIConnectionError: Si no se puede conectar a Vertex AI - AIModelError: Si el modelo no está disponible - """ - if self._initialized: - return - - if not ai_settings.is_configured: - raise AIClientError( - "Vertex AI no está configurado. " - "Verifica GCP_PROJECT_ID y GOOGLE_APPLICATION_CREDENTIALS en .env" - ) - - try: - # Inicializar Vertex AI con proyecto y ubicación - vertexai.init( - project=ai_settings.GCP_PROJECT_ID, - location=ai_settings.GCP_LOCATION, - ) - - # Cargar el modelo según el entorno (flash para dev, pro para prod) - self._model = GenerativeModel(ai_settings.model_name) - - # Configuración de generación - config_dict = ai_settings.get_generation_config() - self._generation_config = GenerationConfig(**config_dict) - - self._initialized = True - logger.info( - f"[VertexAI] Inicializado con modelo {ai_settings.model_name} " - f"en {ai_settings.GCP_LOCATION}" - ) - - except google_exceptions.PermissionDenied as e: - raise AIConnectionError( - "Permisos insuficientes. Verifica que la Service Account " - "tenga el rol 'Vertex AI User'.", - original_error=e, - ) - except google_exceptions.NotFound as e: - raise AIModelError( - f"Modelo {ai_settings.model_name} no encontrado. " - "Verifica el nombre del modelo y la región.", - original_error=e, - ) - except Exception as e: - raise AIConnectionError( - f"Error inicializando Vertex AI: {str(e)}", - original_error=e, - ) - - def _parse_response(self, response) -> AIResponse: - """ - Parsea y valida la respuesta del modelo. - - Args: - response: Respuesta raw del modelo Vertex AI - - Returns: - AIResponse: Respuesta estructurada - - Raises: - AIResponseError: Si la respuesta es inválida o está vacía - """ - if not response or not response.candidates: - raise AIResponseError("Respuesta vacía del modelo") - - candidate = response.candidates[0] - - # Verificar si fue bloqueado por safety - if candidate.finish_reason.name == "SAFETY": - raise AIResponseError("Contenido bloqueado por filtros de seguridad de Google") - - # Extraer texto - text = candidate.content.parts[0].text if candidate.content.parts else "" - - if not text: - raise AIResponseError("No se generó texto en la respuesta") - - # Calcular tokens (aproximado si no está disponible) - tokens_used = 0 - if hasattr(response, "usage_metadata"): - usage = response.usage_metadata - tokens_used = getattr(usage, "prompt_token_count", 0) + getattr( - usage, "candidates_token_count", 0 - ) - - logger.info( - f"[VertexAI] Generación exitosa - " - f"Tokens: {tokens_used}, " - f"Finish: {candidate.finish_reason.name}" - ) - - return AIResponse( - content=text, - model_name=ai_settings.model_name, - tokens_used=tokens_used, - finish_reason=candidate.finish_reason.name, - ) - - async def _handle_retryable_error( - self, error: Exception, attempt: int, max_retries: int, backoff: float, error_type: str - ) -> float: - """ - Maneja errores que permiten reintento con backoff. - - Args: - error: Excepción capturada - attempt: Intento actual (0-based) - max_retries: Máximo de reintentos permitidos - backoff: Tiempo de espera actual - error_type: Tipo de error para logging - - Returns: - float: Nuevo valor de backoff - - Raises: - AIRateLimitError: Si se agotan reintentos por rate limit - AIConnectionError: Si se agotan reintentos por servicio no disponible - """ - if attempt < max_retries: - logger.warning( - f"[VertexAI] {error_type}. " f"Reintento {attempt + 1}/{max_retries} en {backoff}s" - ) - await asyncio.sleep(backoff) - return backoff * ai_settings.AI_BACKOFF_FACTOR - - # Se agotaron los reintentos - if error_type == "Rate limit alcanzado": - raise AIRateLimitError( - "Límite de tasa excedido después de múltiples reintentos", - retry_after=backoff, - original_error=error, - ) - else: - raise AIConnectionError( - "Servicio de Vertex AI no disponible", - original_error=error, - ) - - async def generate_explanation(self, prompt: str) -> AIResponse: - """ - Genera una explicación usando Gemini con reintentos automáticos. - - Implementa exponential backoff para manejar rate limits (429) - y errores transitorios de la API. - - Args: - prompt: Texto del prompt a enviar al modelo - - Returns: - AIResponse: Respuesta estructurada con contenido y metadata - - Raises: - AIRateLimitError: Si se agotan los reintentos por rate limiting - AIConnectionError: Si hay problemas de conexión - AIClientError: Para otros errores - """ - # Inicialización lazy - self._initialize() - - if not self._model: - raise AIClientError("Modelo no inicializado") - - # Configuración de reintentos - max_retries = ai_settings.AI_MAX_RETRIES - backoff = ai_settings.AI_INITIAL_BACKOFF - last_error: Optional[Exception] = None - - for attempt in range(max_retries + 1): - try: - # Ejecutar generación en thread pool (Vertex AI SDK es síncrono) - response = await asyncio.get_event_loop().run_in_executor( - None, - lambda: self._model.generate_content( - prompt, - generation_config=self._generation_config, - ), - ) - return self._parse_response(response) - - except google_exceptions.ResourceExhausted as e: - last_error = e - backoff = await self._handle_retryable_error( - e, attempt, max_retries, backoff, "Rate limit alcanzado" - ) - - except google_exceptions.ServiceUnavailable as e: - last_error = e - backoff = await self._handle_retryable_error( - e, attempt, max_retries, backoff, "Servicio no disponible" - ) - - except google_exceptions.InvalidArgument as e: - raise AIModelError(f"Prompt inválido: {str(e)}", original_error=e) - - except AIResponseError: - raise - - except Exception as e: - logger.error(f"[VertexAI] Error inesperado: {str(e)}") - raise AIClientError(f"Error generando contenido: {str(e)}", original_error=e) - - raise AIClientError("Error después de múltiples reintentos", original_error=last_error) - - async def health_check(self) -> bool: - """ - Verifica si el cliente de Vertex AI está operativo. - - Intenta inicializar el cliente y verificar que el modelo esté disponible. - - Returns: - bool: True si el servicio está disponible - """ - try: - self._initialize() - return self._initialized and self._model is not None - except Exception as e: - logger.warning(f"[VertexAI] Health check fallido: {str(e)}") - return False - - @property - def model_name(self) -> str: - """Retorna el nombre del modelo configurado.""" - return ai_settings.model_name - - @property - def is_configured(self) -> bool: - """Verifica si el cliente está correctamente configurado.""" - return ai_settings.is_configured - - -# Singleton del cliente (opcional, para inyección de dependencias) -def get_ai_client() -> AIClient: - """ - Factory function para obtener el cliente de IA. - - Permite cambiar fácilmente la implementación (mock para tests). - - Returns: - AIClient: Instancia del cliente de IA configurado - - Raises: - AIClientError: Si la IA está deshabilitada o no hay biblioteca instalada - """ - if not ai_settings.AI_ENABLED: - raise AIClientError("Funcionalidad de IA deshabilitada (AI_ENABLED=false)") - - return VertexAIClient() +""" +Cliente de Google Vertex AI para generación de explicaciones con Gemini. + +Implementa el patrón Adapter para abstraer la comunicación con Vertex AI, +con soporte para exponential backoff en caso de rate limiting. + +Requiere: pip install google-cloud-aiplatform>=1.40.0 +""" + +import asyncio +import logging +from typing import Optional + +import vertexai +from google.api_core import exceptions as google_exceptions +from vertexai.generative_models import GenerationConfig, GenerativeModel + +from src.core.config.ai_config import ai_settings +from src.external.interfaces.ai_client import ( + AIClient, + AIClientError, + AIConnectionError, + AIModelError, + AIRateLimitError, + AIResponse, + AIResponseError, +) + +logger = logging.getLogger("agents.VertexAI") + + +class VertexAIClient(AIClient): + """ + Cliente para Google Vertex AI (Gemini). + + Utiliza autenticación via Service Account configurada en + GOOGLE_APPLICATION_CREDENTIALS. Implementa reintentos + automáticos con exponential backoff para errores transitorios. + + Attributes: + _model: Instancia del modelo generativo + _initialized: Flag indicando si Vertex AI fue inicializado + """ + + def __init__(self): + """ + Inicializa el cliente de Vertex AI. + + La inicialización real se hace de forma lazy en el primer uso + para evitar errores si las credenciales no están configuradas. + """ + self._model: Optional[GenerativeModel] = None + self._initialized: bool = False + self._generation_config: Optional[GenerationConfig] = None + + def _initialize(self) -> None: + """ + Inicializa Vertex AI y carga el modelo. + + Se ejecuta de forma lazy en la primera llamada a generate_explanation. + + Raises: + AIConnectionError: Si no se puede conectar a Vertex AI + AIModelError: Si el modelo no está disponible + """ + if self._initialized: + return + + if not ai_settings.is_configured: + raise AIClientError( + "Vertex AI no está configurado. " + "Verifica GCP_PROJECT_ID y GOOGLE_APPLICATION_CREDENTIALS en .env" + ) + + try: + # Inicializar Vertex AI con proyecto y ubicación + vertexai.init( + project=ai_settings.GCP_PROJECT_ID, + location=ai_settings.GCP_LOCATION, + ) + + # Cargar el modelo según el entorno (flash para dev, pro para prod) + self._model = GenerativeModel(ai_settings.model_name) + + # Configuración de generación + config_dict = ai_settings.get_generation_config() + self._generation_config = GenerationConfig(**config_dict) + + self._initialized = True + logger.info( + f"[VertexAI] Inicializado con modelo {ai_settings.model_name} " + f"en {ai_settings.GCP_LOCATION}" + ) + + except google_exceptions.PermissionDenied as e: + raise AIConnectionError( + "Permisos insuficientes. Verifica que la Service Account " + "tenga el rol 'Vertex AI User'.", + original_error=e, + ) + except google_exceptions.NotFound as e: + raise AIModelError( + f"Modelo {ai_settings.model_name} no encontrado. " + "Verifica el nombre del modelo y la región.", + original_error=e, + ) + except Exception as e: + raise AIConnectionError( + f"Error inicializando Vertex AI: {str(e)}", + original_error=e, + ) + + def _parse_response(self, response) -> AIResponse: + """ + Parsea y valida la respuesta del modelo. + + Args: + response: Respuesta raw del modelo Vertex AI + + Returns: + AIResponse: Respuesta estructurada + + Raises: + AIResponseError: Si la respuesta es inválida o está vacía + """ + if not response or not response.candidates: + raise AIResponseError("Respuesta vacía del modelo") + + candidate = response.candidates[0] + + # Verificar si fue bloqueado por safety + if candidate.finish_reason.name == "SAFETY": + raise AIResponseError("Contenido bloqueado por filtros de seguridad de Google") + + # Extraer texto + text = candidate.content.parts[0].text if candidate.content.parts else "" + + if not text: + raise AIResponseError("No se generó texto en la respuesta") + + # Calcular tokens (aproximado si no está disponible) + tokens_used = 0 + if hasattr(response, "usage_metadata"): + usage = response.usage_metadata + tokens_used = getattr(usage, "prompt_token_count", 0) + getattr( + usage, "candidates_token_count", 0 + ) + + logger.info( + f"[VertexAI] Generación exitosa - " + f"Tokens: {tokens_used}, " + f"Finish: {candidate.finish_reason.name}" + ) + + return AIResponse( + content=text, + model_name=ai_settings.model_name, + tokens_used=tokens_used, + finish_reason=candidate.finish_reason.name, + ) + + async def _handle_retryable_error( + self, error: Exception, attempt: int, max_retries: int, backoff: float, error_type: str + ) -> float: + """ + Maneja errores que permiten reintento con backoff. + + Args: + error: Excepción capturada + attempt: Intento actual (0-based) + max_retries: Máximo de reintentos permitidos + backoff: Tiempo de espera actual + error_type: Tipo de error para logging + + Returns: + float: Nuevo valor de backoff + + Raises: + AIRateLimitError: Si se agotan reintentos por rate limit + AIConnectionError: Si se agotan reintentos por servicio no disponible + """ + if attempt < max_retries: + logger.warning( + f"[VertexAI] {error_type}. " f"Reintento {attempt + 1}/{max_retries} en {backoff}s" + ) + await asyncio.sleep(backoff) + return backoff * ai_settings.AI_BACKOFF_FACTOR + + # Se agotaron los reintentos + if error_type == "Rate limit alcanzado": + raise AIRateLimitError( + "Límite de tasa excedido después de múltiples reintentos", + retry_after=backoff, + original_error=error, + ) + else: + raise AIConnectionError( + "Servicio de Vertex AI no disponible", + original_error=error, + ) + + async def generate_explanation(self, prompt: str) -> AIResponse: + """ + Genera una explicación usando Gemini con reintentos automáticos. + + Implementa exponential backoff para manejar rate limits (429) + y errores transitorios de la API. + + Args: + prompt: Texto del prompt a enviar al modelo + + Returns: + AIResponse: Respuesta estructurada con contenido y metadata + + Raises: + AIRateLimitError: Si se agotan los reintentos por rate limiting + AIConnectionError: Si hay problemas de conexión + AIClientError: Para otros errores + """ + # Inicialización lazy + self._initialize() + + if not self._model: + raise AIClientError("Modelo no inicializado") + + # Configuración de reintentos + max_retries = ai_settings.AI_MAX_RETRIES + backoff = ai_settings.AI_INITIAL_BACKOFF + last_error: Optional[Exception] = None + + for attempt in range(max_retries + 1): + try: + # Ejecutar generación en thread pool (Vertex AI SDK es síncrono) + response = await asyncio.get_event_loop().run_in_executor( + None, + lambda: self._model.generate_content( + prompt, + generation_config=self._generation_config, + ), + ) + return self._parse_response(response) + + except google_exceptions.ResourceExhausted as e: + last_error = e + backoff = await self._handle_retryable_error( + e, attempt, max_retries, backoff, "Rate limit alcanzado" + ) + + except google_exceptions.ServiceUnavailable as e: + last_error = e + backoff = await self._handle_retryable_error( + e, attempt, max_retries, backoff, "Servicio no disponible" + ) + + except google_exceptions.InvalidArgument as e: + raise AIModelError(f"Prompt inválido: {str(e)}", original_error=e) + + except AIResponseError: + raise + + except Exception as e: + logger.error(f"[VertexAI] Error inesperado: {str(e)}") + raise AIClientError(f"Error generando contenido: {str(e)}", original_error=e) + + raise AIClientError("Error después de múltiples reintentos", original_error=last_error) + + async def health_check(self) -> bool: + """ + Verifica si el cliente de Vertex AI está operativo. + + Intenta inicializar el cliente y verificar que el modelo esté disponible. + + Returns: + bool: True si el servicio está disponible + """ + try: + self._initialize() + return self._initialized and self._model is not None + except Exception as e: + logger.warning(f"[VertexAI] Health check fallido: {str(e)}") + return False + + @property + def model_name(self) -> str: + """Retorna el nombre del modelo configurado.""" + return ai_settings.model_name + + @property + def is_configured(self) -> bool: + """Verifica si el cliente está correctamente configurado.""" + return ai_settings.is_configured + + +# Singleton del cliente (opcional, para inyección de dependencias) +def get_ai_client() -> AIClient: + """ + Factory function para obtener el cliente de IA. + + Permite cambiar fácilmente la implementación (mock para tests). + + Returns: + AIClient: Instancia del cliente de IA configurado + + Raises: + AIClientError: Si la IA está deshabilitada o no hay biblioteca instalada + """ + if not ai_settings.AI_ENABLED: + raise AIClientError("Funcionalidad de IA deshabilitada (AI_ENABLED=false)") + + return VertexAIClient() diff --git a/backend/src/external/interfaces/__init__.py b/backend/src/external/interfaces/__init__.py index f890958..431b1da 100644 --- a/backend/src/external/interfaces/__init__.py +++ b/backend/src/external/interfaces/__init__.py @@ -1,17 +1,17 @@ -""" -Interfaces para clientes externos de IA. -""" - -from src.external.interfaces.ai_client import ( - AIClient, - AIClientError, - AIConnectionError, - AIRateLimitError, -) - -__all__ = [ - "AIClient", - "AIClientError", - "AIRateLimitError", - "AIConnectionError", -] +""" +Interfaces para clientes externos de IA. +""" + +from src.external.interfaces.ai_client import ( + AIClient, + AIClientError, + AIConnectionError, + AIRateLimitError, +) + +__all__ = [ + "AIClient", + "AIClientError", + "AIRateLimitError", + "AIConnectionError", +] diff --git a/backend/src/external/interfaces/ai_client.py b/backend/src/external/interfaces/ai_client.py index d110b8f..345e4b4 100644 --- a/backend/src/external/interfaces/ai_client.py +++ b/backend/src/external/interfaces/ai_client.py @@ -1,206 +1,206 @@ -""" -Interfaz abstracta para clientes de IA generativa. - -Define el contrato que deben implementar todos los proveedores de IA -(Google Vertex AI, OpenAI, Anthropic, etc.) siguiendo el patrón Adapter. -""" - -from abc import ABC, abstractmethod -from dataclasses import dataclass -from typing import Optional - -# ============================================================================= -# Excepciones personalizadas para clientes de IA -# ============================================================================= - - -class AIClientError(Exception): - """ - Error base para todos los problemas con clientes de IA. - - Attributes: - message: Descripción del error - original_error: Excepción original (si existe) - """ - - def __init__(self, message: str, original_error: Optional[Exception] = None): - self.message = message - self.original_error = original_error - super().__init__(self.message) - - -class AIRateLimitError(AIClientError): - """ - Error de límite de tasa de la API de IA. - - Se lanza cuando la API retorna un error 429 (Too Many Requests) - o ResourceExhausted en el caso de Google Cloud. - - Attributes: - retry_after: Segundos sugeridos de espera antes de reintentar - """ - - def __init__( - self, - message: str = "Rate limit exceeded", - retry_after: Optional[float] = None, - original_error: Optional[Exception] = None, - ): - super().__init__(message, original_error) - self.retry_after = retry_after - - -class AIConnectionError(AIClientError): - """ - Error de conexión con el servicio de IA. - - Se lanza cuando no se puede establecer conexión con la API, - hay timeout o problemas de red. - """ - - def __init__( - self, - message: str = "Failed to connect to AI service", - original_error: Optional[Exception] = None, - ): - super().__init__(message, original_error) - - -class AIModelError(AIClientError): - """ - Error relacionado con el modelo de IA. - - Se lanza cuando el modelo no está disponible, el prompt excede - los límites o hay problemas con la configuración del modelo. - """ - - def __init__( - self, - message: str = "AI model error", - original_error: Optional[Exception] = None, - ): - super().__init__(message, original_error) - - -class AIResponseError(AIClientError): - """ - Error al procesar la respuesta de la IA. - - Se lanza cuando la respuesta no tiene el formato esperado - o no se puede parsear correctamente. - """ - - def __init__( - self, - message: str = "Invalid AI response", - original_error: Optional[Exception] = None, - ): - super().__init__(message, original_error) - - -# ============================================================================= -# Dataclass para respuesta estructurada -# ============================================================================= - - -@dataclass -class AIResponse: - """ - Respuesta estructurada de una llamada a la IA. - - Attributes: - content: Texto generado por el modelo - model_name: Nombre del modelo usado - tokens_used: Tokens consumidos (input + output) - finish_reason: Razón de finalización (stop, length, safety, etc.) - """ - - content: str - model_name: str - tokens_used: int = 0 - finish_reason: str = "stop" - - -# ============================================================================= -# Interfaz abstracta (Adapter Pattern) -# ============================================================================= - - -class AIClient(ABC): - """ - Interfaz abstracta para clientes de IA generativa. - - Define el contrato que deben implementar todos los proveedores - de IA, permitiendo cambiar entre diferentes servicios sin - modificar el código del negocio. - - Example: - ```python - class VertexAIClient(AIClient): - async def generate_explanation(self, prompt: str) -> AIResponse: - # Implementación específica de Vertex AI - ... - - # Uso - client: AIClient = VertexAIClient() - response = await client.generate_explanation("Explica este error...") - print(response.content) - ``` - """ - - @abstractmethod - async def generate_explanation(self, prompt: str) -> AIResponse: - """ - Genera una explicación o respuesta basada en el prompt. - - Este es el método principal que deben implementar todos los - proveedores de IA. - - Args: - prompt: Texto del prompt a enviar al modelo - - Returns: - AIResponse: Respuesta estructurada con el contenido generado - - Raises: - AIRateLimitError: Si se excede el límite de tasa de la API - AIConnectionError: Si hay problemas de conexión - AIModelError: Si hay problemas con el modelo - AIResponseError: Si la respuesta no es válida - AIClientError: Para otros errores de la API - """ - pass - - @abstractmethod - async def health_check(self) -> bool: - """ - Verifica si el cliente de IA está operativo. - - Útil para health checks del sistema y monitoreo. - - Returns: - bool: True si el servicio está disponible - """ - pass - - @property - @abstractmethod - def model_name(self) -> str: - """ - Retorna el nombre del modelo configurado. - - Returns: - str: Identificador del modelo (ej: 'gemini-1.5-flash-001') - """ - pass - - @property - @abstractmethod - def is_configured(self) -> bool: - """ - Verifica si el cliente tiene toda la configuración necesaria. - - Returns: - bool: True si el cliente está correctamente configurado - """ - pass +""" +Interfaz abstracta para clientes de IA generativa. + +Define el contrato que deben implementar todos los proveedores de IA +(Google Vertex AI, OpenAI, Anthropic, etc.) siguiendo el patrón Adapter. +""" + +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Optional + +# ============================================================================= +# Excepciones personalizadas para clientes de IA +# ============================================================================= + + +class AIClientError(Exception): + """ + Error base para todos los problemas con clientes de IA. + + Attributes: + message: Descripción del error + original_error: Excepción original (si existe) + """ + + def __init__(self, message: str, original_error: Optional[Exception] = None): + self.message = message + self.original_error = original_error + super().__init__(self.message) + + +class AIRateLimitError(AIClientError): + """ + Error de límite de tasa de la API de IA. + + Se lanza cuando la API retorna un error 429 (Too Many Requests) + o ResourceExhausted en el caso de Google Cloud. + + Attributes: + retry_after: Segundos sugeridos de espera antes de reintentar + """ + + def __init__( + self, + message: str = "Rate limit exceeded", + retry_after: Optional[float] = None, + original_error: Optional[Exception] = None, + ): + super().__init__(message, original_error) + self.retry_after = retry_after + + +class AIConnectionError(AIClientError): + """ + Error de conexión con el servicio de IA. + + Se lanza cuando no se puede establecer conexión con la API, + hay timeout o problemas de red. + """ + + def __init__( + self, + message: str = "Failed to connect to AI service", + original_error: Optional[Exception] = None, + ): + super().__init__(message, original_error) + + +class AIModelError(AIClientError): + """ + Error relacionado con el modelo de IA. + + Se lanza cuando el modelo no está disponible, el prompt excede + los límites o hay problemas con la configuración del modelo. + """ + + def __init__( + self, + message: str = "AI model error", + original_error: Optional[Exception] = None, + ): + super().__init__(message, original_error) + + +class AIResponseError(AIClientError): + """ + Error al procesar la respuesta de la IA. + + Se lanza cuando la respuesta no tiene el formato esperado + o no se puede parsear correctamente. + """ + + def __init__( + self, + message: str = "Invalid AI response", + original_error: Optional[Exception] = None, + ): + super().__init__(message, original_error) + + +# ============================================================================= +# Dataclass para respuesta estructurada +# ============================================================================= + + +@dataclass +class AIResponse: + """ + Respuesta estructurada de una llamada a la IA. + + Attributes: + content: Texto generado por el modelo + model_name: Nombre del modelo usado + tokens_used: Tokens consumidos (input + output) + finish_reason: Razón de finalización (stop, length, safety, etc.) + """ + + content: str + model_name: str + tokens_used: int = 0 + finish_reason: str = "stop" + + +# ============================================================================= +# Interfaz abstracta (Adapter Pattern) +# ============================================================================= + + +class AIClient(ABC): + """ + Interfaz abstracta para clientes de IA generativa. + + Define el contrato que deben implementar todos los proveedores + de IA, permitiendo cambiar entre diferentes servicios sin + modificar el código del negocio. + + Example: + ```python + class VertexAIClient(AIClient): + async def generate_explanation(self, prompt: str) -> AIResponse: + # Implementación específica de Vertex AI + ... + + # Uso + client: AIClient = VertexAIClient() + response = await client.generate_explanation("Explica este error...") + print(response.content) + ``` + """ + + @abstractmethod + async def generate_explanation(self, prompt: str) -> AIResponse: + """ + Genera una explicación o respuesta basada en el prompt. + + Este es el método principal que deben implementar todos los + proveedores de IA. + + Args: + prompt: Texto del prompt a enviar al modelo + + Returns: + AIResponse: Respuesta estructurada con el contenido generado + + Raises: + AIRateLimitError: Si se excede el límite de tasa de la API + AIConnectionError: Si hay problemas de conexión + AIModelError: Si hay problemas con el modelo + AIResponseError: Si la respuesta no es válida + AIClientError: Para otros errores de la API + """ + pass + + @abstractmethod + async def health_check(self) -> bool: + """ + Verifica si el cliente de IA está operativo. + + Útil para health checks del sistema y monitoreo. + + Returns: + bool: True si el servicio está disponible + """ + pass + + @property + @abstractmethod + def model_name(self) -> str: + """ + Retorna el nombre del modelo configurado. + + Returns: + str: Identificador del modelo (ej: 'gemini-1.5-flash-001') + """ + pass + + @property + @abstractmethod + def is_configured(self) -> bool: + """ + Verifica si el cliente tiene toda la configuración necesaria. + + Returns: + bool: True si el cliente está correctamente configurado + """ + pass diff --git a/backend/src/external/mcp_client.py b/backend/src/external/mcp_client.py index 52aa408..3186191 100644 --- a/backend/src/external/mcp_client.py +++ b/backend/src/external/mcp_client.py @@ -1,190 +1,190 @@ -""" -Cliente MCP (Model Context Protocol) para enriquecer prompts con contexto de seguridad. - -Proporciona acceso a la base de conocimiento OWASP Top 10 y mapeos CWE -para enriquecer las explicaciones generadas por IA. - -Principios de diseño: -- SRP: Solo busca y formatea contexto de seguridad -- Acoplamiento débil: Interfaz abstracta permite múltiples implementaciones -- Async: Todas las operaciones son asíncronas para consistencia -""" - -import logging -from abc import ABC, abstractmethod -from typing import List, Optional - -from src.core.config.mcp_config import ( - OWASP_TOP_10, - SecurityContext, - format_security_context, - get_security_context, -) -from src.schemas.finding import Finding - -logger = logging.getLogger("agents.MCP") - - -class MCPClient(ABC): - """ - Interfaz abstracta para clientes MCP (Model Context Protocol). - - Define el contrato para obtener contexto de seguridad que será - usado para enriquecer prompts de IA generativa. - """ - - @abstractmethod - async def get_context(self, finding: Finding) -> Optional[str]: - """ - Obtiene contexto de seguridad formateado para un hallazgo. - - Args: - finding: Hallazgo de seguridad a enriquecer - - Returns: - Contexto formateado como texto o None si no se encuentra - """ - pass - - @abstractmethod - async def get_security_context(self, finding: Finding) -> Optional[SecurityContext]: - """ - Obtiene el objeto SecurityContext para un hallazgo. - - Args: - finding: Hallazgo de seguridad - - Returns: - SecurityContext o None si no se encuentra - """ - pass - - @abstractmethod - def get_available_categories(self) -> List[str]: - """ - Lista las categorías OWASP disponibles. - - Returns: - Lista de nombres de categorías - """ - pass - - -class LocalMCPClient(MCPClient): - """ - Cliente MCP local usando el diccionario OWASP Top 10 embebido. - - Busca contexto de seguridad relevante basado en rule_id o issue_type - del hallazgo y lo formatea para enriquecer prompts de IA. - - Esta implementación usa datos locales. Puede ser extendida o reemplazada - por una que consulte servidores MCP externos. - - Example: - client = LocalMCPClient() - context = await client.get_context(finding) - if context: - prompt = f"Contexto OWASP:\\n{context}" - """ - - async def get_context(self, finding: Finding) -> Optional[str]: - """ - Obtiene contexto de seguridad OWASP formateado para un hallazgo. - - Busca primero por rule_id (más específico) y luego por issue_type. - - Args: - finding: Hallazgo de seguridad - - Returns: - Contexto formateado o None si no se encuentra - """ - context = await self.get_security_context(finding) - - if context: - formatted = format_security_context(context) - logger.debug( - f"[MCP] Contexto encontrado para {finding.rule_id or finding.issue_type}: " - f"{context.category}" - ) - return formatted - - logger.debug(f"[MCP] Sin contexto OWASP para {finding.rule_id or finding.issue_type}") - return None - - async def get_security_context(self, finding: Finding) -> Optional[SecurityContext]: - """ - Obtiene el objeto SecurityContext para un hallazgo. - - Prioriza rule_id sobre issue_type para mayor precisión. - - Args: - finding: Hallazgo de seguridad - - Returns: - SecurityContext o None si no se encuentra - """ - # Buscar por rule_id primero (más específico) - if finding.rule_id: - context = get_security_context(rule_id=finding.rule_id) - if context: - return context - - # Fallback a issue_type - if finding.issue_type: - context = get_security_context(issue_type=finding.issue_type) - if context: - return context - - return None - - def get_available_categories(self) -> List[str]: - """ - Lista todas las categorías OWASP disponibles. - - Returns: - Lista de claves del diccionario OWASP_TOP_10 - """ - return list(OWASP_TOP_10.keys()) - - async def get_context_by_category(self, category_key: str) -> Optional[str]: - """ - Obtiene contexto por clave de categoría directamente. - - Args: - category_key: Clave del diccionario OWASP (ej: "injection", "broken_access_control") - - Returns: - Contexto formateado o None - """ - context = OWASP_TOP_10.get(category_key) - if context: - return format_security_context(context) - return None - - -# Singleton del cliente MCP -_mcp_client_instance: Optional[MCPClient] = None - - -def get_mcp_client() -> MCPClient: - """ - Factory function para obtener el cliente MCP. - - Usa patrón singleton para reutilizar la misma instancia. - - Returns: - Instancia de MCPClient (LocalMCPClient por defecto) - """ - global _mcp_client_instance - if _mcp_client_instance is None: - _mcp_client_instance = LocalMCPClient() - return _mcp_client_instance - - -def reset_mcp_client() -> None: - """ - Resetea el singleton del cliente MCP (útil para testing). - """ - global _mcp_client_instance - _mcp_client_instance = None +""" +Cliente MCP (Model Context Protocol) para enriquecer prompts con contexto de seguridad. + +Proporciona acceso a la base de conocimiento OWASP Top 10 y mapeos CWE +para enriquecer las explicaciones generadas por IA. + +Principios de diseño: +- SRP: Solo busca y formatea contexto de seguridad +- Acoplamiento débil: Interfaz abstracta permite múltiples implementaciones +- Async: Todas las operaciones son asíncronas para consistencia +""" + +import logging +from abc import ABC, abstractmethod +from typing import List, Optional + +from src.core.config.mcp_config import ( + OWASP_TOP_10, + SecurityContext, + format_security_context, + get_security_context, +) +from src.schemas.finding import Finding + +logger = logging.getLogger("agents.MCP") + + +class MCPClient(ABC): + """ + Interfaz abstracta para clientes MCP (Model Context Protocol). + + Define el contrato para obtener contexto de seguridad que será + usado para enriquecer prompts de IA generativa. + """ + + @abstractmethod + async def get_context(self, finding: Finding) -> Optional[str]: + """ + Obtiene contexto de seguridad formateado para un hallazgo. + + Args: + finding: Hallazgo de seguridad a enriquecer + + Returns: + Contexto formateado como texto o None si no se encuentra + """ + pass + + @abstractmethod + async def get_security_context(self, finding: Finding) -> Optional[SecurityContext]: + """ + Obtiene el objeto SecurityContext para un hallazgo. + + Args: + finding: Hallazgo de seguridad + + Returns: + SecurityContext o None si no se encuentra + """ + pass + + @abstractmethod + def get_available_categories(self) -> List[str]: + """ + Lista las categorías OWASP disponibles. + + Returns: + Lista de nombres de categorías + """ + pass + + +class LocalMCPClient(MCPClient): + """ + Cliente MCP local usando el diccionario OWASP Top 10 embebido. + + Busca contexto de seguridad relevante basado en rule_id o issue_type + del hallazgo y lo formatea para enriquecer prompts de IA. + + Esta implementación usa datos locales. Puede ser extendida o reemplazada + por una que consulte servidores MCP externos. + + Example: + client = LocalMCPClient() + context = await client.get_context(finding) + if context: + prompt = f"Contexto OWASP:\\n{context}" + """ + + async def get_context(self, finding: Finding) -> Optional[str]: + """ + Obtiene contexto de seguridad OWASP formateado para un hallazgo. + + Busca primero por rule_id (más específico) y luego por issue_type. + + Args: + finding: Hallazgo de seguridad + + Returns: + Contexto formateado o None si no se encuentra + """ + context = await self.get_security_context(finding) + + if context: + formatted = format_security_context(context) + logger.debug( + f"[MCP] Contexto encontrado para {finding.rule_id or finding.issue_type}: " + f"{context.category}" + ) + return formatted + + logger.debug(f"[MCP] Sin contexto OWASP para {finding.rule_id or finding.issue_type}") + return None + + async def get_security_context(self, finding: Finding) -> Optional[SecurityContext]: + """ + Obtiene el objeto SecurityContext para un hallazgo. + + Prioriza rule_id sobre issue_type para mayor precisión. + + Args: + finding: Hallazgo de seguridad + + Returns: + SecurityContext o None si no se encuentra + """ + # Buscar por rule_id primero (más específico) + if finding.rule_id: + context = get_security_context(rule_id=finding.rule_id) + if context: + return context + + # Fallback a issue_type + if finding.issue_type: + context = get_security_context(issue_type=finding.issue_type) + if context: + return context + + return None + + def get_available_categories(self) -> List[str]: + """ + Lista todas las categorías OWASP disponibles. + + Returns: + Lista de claves del diccionario OWASP_TOP_10 + """ + return list(OWASP_TOP_10.keys()) + + async def get_context_by_category(self, category_key: str) -> Optional[str]: + """ + Obtiene contexto por clave de categoría directamente. + + Args: + category_key: Clave del diccionario OWASP (ej: "injection", "broken_access_control") + + Returns: + Contexto formateado o None + """ + context = OWASP_TOP_10.get(category_key) + if context: + return format_security_context(context) + return None + + +# Singleton del cliente MCP +_mcp_client_instance: Optional[MCPClient] = None + + +def get_mcp_client() -> MCPClient: + """ + Factory function para obtener el cliente MCP. + + Usa patrón singleton para reutilizar la misma instancia. + + Returns: + Instancia de MCPClient (LocalMCPClient por defecto) + """ + global _mcp_client_instance + if _mcp_client_instance is None: + _mcp_client_instance = LocalMCPClient() + return _mcp_client_instance + + +def reset_mcp_client() -> None: + """ + Resetea el singleton del cliente MCP (útil para testing). + """ + global _mcp_client_instance + _mcp_client_instance = None diff --git a/backend/src/main.py b/backend/src/main.py index 364b308..684395b 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -1,49 +1,49 @@ -""" -CodeGuard AI - Backend Entry Point -FastAPI Application -""" - -from fastapi import FastAPI -from fastapi.middleware.cors import CORSMiddleware - -from src.routers.analysis import router as analysis_router -from src.routers.auth import router as auth_router -from src.routers.findings import router as findings_router - -# Create FastAPI app -app = FastAPI( - title="CodeGuard AI", - description="Multi-Agent Code Review System", - version="1.0.0", - docs_url="/docs", - redoc_url="/redoc", -) - -# CORS -app.add_middleware( - CORSMiddleware, - allow_origins=["http://localhost:3000", "http://localhost:5173"], - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) - -app.include_router(analysis_router) -app.include_router(auth_router) -app.include_router(findings_router) - - -@app.get("/health") -async def health_check(): - """Health check endpoint""" - return {"status": "healthy", "version": "1.0.0", "service": "CodeGuard AI Backend"} - - -@app.get("/") -async def root(): - """Root endpoint""" - return { - "message": "CodeGuard AI - Multi-Agent Code Review System", - "docs": "/docs", - "health": "/health", - } +""" +CodeGuard AI - Backend Entry Point +FastAPI Application +""" + +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware + +from src.routers.analysis import router as analysis_router +from src.routers.auth import router as auth_router +from src.routers.findings import router as findings_router + +# Create FastAPI app +app = FastAPI( + title="CodeGuard AI", + description="Multi-Agent Code Review System", + version="1.0.0", + docs_url="/docs", + redoc_url="/redoc", +) + +# CORS +app.add_middleware( + CORSMiddleware, + allow_origins=["http://localhost:3000", "http://localhost:5173"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +app.include_router(analysis_router) +app.include_router(auth_router) +app.include_router(findings_router) + + +@app.get("/health") +async def health_check(): + """Health check endpoint""" + return {"status": "healthy", "version": "1.0.0", "service": "CodeGuard AI Backend"} + + +@app.get("/") +async def root(): + """Root endpoint""" + return { + "message": "CodeGuard AI - Multi-Agent Code Review System", + "docs": "/docs", + "health": "/health", + } diff --git a/backend/src/models/__init__.py b/backend/src/models/__init__.py index b91a00a..158dd20 100644 --- a/backend/src/models/__init__.py +++ b/backend/src/models/__init__.py @@ -1,28 +1,28 @@ -""" -SQLAlchemy ORM Models para CodeGuard AI. - -Este módulo exporta todas las entidades de base de datos para facilitar imports. -""" - -from src.models.base import Base -from src.models.code_review import CodeReviewEntity - -# Enums -from src.models.enums.review_status import ReviewStatus -from src.models.enums.severity_enum import SeverityEnum -from src.models.enums.user_role import UserRole -from src.models.finding import AgentFindingEntity -from src.models.user import UserEntity - -__all__ = [ - # Base - "Base", - # Entities - "UserEntity", - "CodeReviewEntity", - "AgentFindingEntity", - # Enums - "ReviewStatus", - "SeverityEnum", - "UserRole", -] +""" +SQLAlchemy ORM Models para CodeGuard AI. + +Este módulo exporta todas las entidades de base de datos para facilitar imports. +""" + +from src.models.base import Base +from src.models.code_review import CodeReviewEntity + +# Enums +from src.models.enums.review_status import ReviewStatus +from src.models.enums.severity_enum import SeverityEnum +from src.models.enums.user_role import UserRole +from src.models.finding import AgentFindingEntity +from src.models.user import UserEntity + +__all__ = [ + # Base + "Base", + # Entities + "UserEntity", + "CodeReviewEntity", + "AgentFindingEntity", + # Enums + "ReviewStatus", + "SeverityEnum", + "UserRole", +] diff --git a/backend/src/models/base.py b/backend/src/models/base.py index cc1270d..4f1a2a7 100644 --- a/backend/src/models/base.py +++ b/backend/src/models/base.py @@ -1,25 +1,25 @@ -""" -Configuración base para los modelos ORM de SQLAlchemy. - -Este módulo define la clase base declarativa de la cual deben heredar -todas las entidades de la base de datos para ser reconocidas por el ORM. -""" - -from sqlalchemy.orm import DeclarativeBase - - -class Base(DeclarativeBase): - """ - Clase base declarativa para todos los modelos ORM del sistema. - - Utiliza el estilo moderno de SQLAlchemy 2.0 (`DeclarativeBase`), lo que - proporciona mejor soporte para tipado estático y autocompletado en IDEs - comparado con la función antigua `declarative_base()`. - - Todas las entidades (ej. `CodeReviewEntity`) deben heredar de esta clase. - - __allow_unmapped__ = True permite usar anotaciones de tipo sin Mapped[] - para mantener compatibilidad con el código existente. - """ - - __allow_unmapped__ = True +""" +Configuración base para los modelos ORM de SQLAlchemy. + +Este módulo define la clase base declarativa de la cual deben heredar +todas las entidades de la base de datos para ser reconocidas por el ORM. +""" + +from sqlalchemy.orm import DeclarativeBase + + +class Base(DeclarativeBase): + """ + Clase base declarativa para todos los modelos ORM del sistema. + + Utiliza el estilo moderno de SQLAlchemy 2.0 (`DeclarativeBase`), lo que + proporciona mejor soporte para tipado estático y autocompletado en IDEs + comparado con la función antigua `declarative_base()`. + + Todas las entidades (ej. `CodeReviewEntity`) deben heredar de esta clase. + + __allow_unmapped__ = True permite usar anotaciones de tipo sin Mapped[] + para mantener compatibilidad con el código existente. + """ + + __allow_unmapped__ = True diff --git a/backend/src/models/code_review.py b/backend/src/models/code_review.py index 42d7f7b..c59df9f 100644 --- a/backend/src/models/code_review.py +++ b/backend/src/models/code_review.py @@ -1,84 +1,84 @@ -""" -Entidad ORM para code reviews. -Alineado con tabla 'code_reviews' en PostgreSQL (Supabase). -""" - -import uuid -from datetime import datetime -from typing import TYPE_CHECKING, List - -from sqlalchemy import Column, DateTime, Enum, ForeignKey, Integer, LargeBinary, String, Text -from sqlalchemy.dialects.postgresql import UUID -from sqlalchemy.orm import relationship - -from src.models.base import Base -from src.models.enums.review_status import ReviewStatus - -if TYPE_CHECKING: - from src.models.finding import AgentFindingEntity - from src.models.user import UserEntity - - -class CodeReviewEntity(Base): - """ - Entidad ORM que representa la tabla 'code_reviews' en la base de datos. - - Attributes: - id: UUID del análisis - user_id: FK a users (Clerk user_id) - filename: Nombre del archivo analizado - code_content: Contenido encriptado con AES-256 (BYTEA) - quality_score: Puntuación de calidad (0-100) - status: PENDING, PROCESSING, COMPLETED, FAILED - total_findings: Número total de hallazgos - error_message: Mensaje de error si falló - created_at: Timestamp de creación - completed_at: Timestamp de finalización - """ - - __tablename__ = "code_reviews" - - id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) - user_id = Column( - String(255), ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True - ) - filename = Column(String(500), nullable=False) - - # RN16: code_content se almacena como bytes encriptados (BYTEA) - code_content = Column(LargeBinary, nullable=False) - - quality_score = Column(Integer, nullable=True) - status = Column(Enum(ReviewStatus), default=ReviewStatus.PENDING, nullable=False, index=True) - total_findings = Column(Integer, default=0) - error_message = Column(Text, nullable=True) - created_at = Column(DateTime, default=datetime.utcnow, index=True) - completed_at = Column(DateTime, nullable=True) - - # Relationships - user: "UserEntity" = relationship("UserEntity", back_populates="code_reviews") - - findings: List["AgentFindingEntity"] = relationship( - "AgentFindingEntity", - back_populates="code_review", - cascade="all, delete-orphan", - lazy="dynamic", - ) - - def __repr__(self) -> str: - return ( - f"" - ) - - def calculate_quality_score(self) -> int: - """ - Calcula el quality score basado en los findings. - - Formula: score = max(0, 100 - sum(penalties)) - """ - - total_penalty = sum(f.penalty for f in self.findings) - return max(0, 100 - total_penalty) +""" +Entidad ORM para code reviews. +Alineado con tabla 'code_reviews' en PostgreSQL (Supabase). +""" + +import uuid +from datetime import datetime +from typing import TYPE_CHECKING, List + +from sqlalchemy import Column, DateTime, Enum, ForeignKey, Integer, LargeBinary, String, Text +from sqlalchemy.dialects.postgresql import UUID +from sqlalchemy.orm import relationship + +from src.models.base import Base +from src.models.enums.review_status import ReviewStatus + +if TYPE_CHECKING: + from src.models.finding import AgentFindingEntity + from src.models.user import UserEntity + + +class CodeReviewEntity(Base): + """ + Entidad ORM que representa la tabla 'code_reviews' en la base de datos. + + Attributes: + id: UUID del análisis + user_id: FK a users (Clerk user_id) + filename: Nombre del archivo analizado + code_content: Contenido encriptado con AES-256 (BYTEA) + quality_score: Puntuación de calidad (0-100) + status: PENDING, PROCESSING, COMPLETED, FAILED + total_findings: Número total de hallazgos + error_message: Mensaje de error si falló + created_at: Timestamp de creación + completed_at: Timestamp de finalización + """ + + __tablename__ = "code_reviews" + + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + user_id = Column( + String(255), ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True + ) + filename = Column(String(500), nullable=False) + + # RN16: code_content se almacena como bytes encriptados (BYTEA) + code_content = Column(LargeBinary, nullable=False) + + quality_score = Column(Integer, nullable=True) + status = Column(Enum(ReviewStatus), default=ReviewStatus.PENDING, nullable=False, index=True) + total_findings = Column(Integer, default=0) + error_message = Column(Text, nullable=True) + created_at = Column(DateTime, default=datetime.utcnow, index=True) + completed_at = Column(DateTime, nullable=True) + + # Relationships + user: "UserEntity" = relationship("UserEntity", back_populates="code_reviews") + + findings: List["AgentFindingEntity"] = relationship( + "AgentFindingEntity", + back_populates="code_review", + cascade="all, delete-orphan", + lazy="dynamic", + ) + + def __repr__(self) -> str: + return ( + f"" + ) + + def calculate_quality_score(self) -> int: + """ + Calcula el quality score basado en los findings. + + Formula: score = max(0, 100 - sum(penalties)) + """ + + total_penalty = sum(f.penalty for f in self.findings) + return max(0, 100 - total_penalty) diff --git a/backend/src/models/enums/review_status.py b/backend/src/models/enums/review_status.py index 2b990cd..11666c0 100644 --- a/backend/src/models/enums/review_status.py +++ b/backend/src/models/enums/review_status.py @@ -1,8 +1,8 @@ -from enum import Enum - - -class ReviewStatus(str, Enum): - PENDING = "PENDING" - PROCESSING = "PROCESSING" - COMPLETED = "COMPLETED" - FAILED = "FAILED" +from enum import Enum + + +class ReviewStatus(str, Enum): + PENDING = "PENDING" + PROCESSING = "PROCESSING" + COMPLETED = "COMPLETED" + FAILED = "FAILED" diff --git a/backend/src/models/enums/severity_enum.py b/backend/src/models/enums/severity_enum.py index 1447158..eb96241 100644 --- a/backend/src/models/enums/severity_enum.py +++ b/backend/src/models/enums/severity_enum.py @@ -1,22 +1,22 @@ -""" -Enum para niveles de severidad de hallazgos. -Alineado con PostgreSQL ENUM 'finding_severity'. -""" - -from enum import Enum - - -class SeverityEnum(str, Enum): - """ - Niveles de severidad de un hallazgo en la base de datos. - - CRITICAL: OWASP Top 10, explotable inmediatamente - HIGH: Vulnerabilidades comunes que requieren condiciones específicas - MEDIUM: Code smells de seguridad/rendimiento - LOW: Violaciones de estilo menores - """ - - CRITICAL = "CRITICAL" - HIGH = "HIGH" - MEDIUM = "MEDIUM" - LOW = "LOW" +""" +Enum para niveles de severidad de hallazgos. +Alineado con PostgreSQL ENUM 'finding_severity'. +""" + +from enum import Enum + + +class SeverityEnum(str, Enum): + """ + Niveles de severidad de un hallazgo en la base de datos. + + CRITICAL: OWASP Top 10, explotable inmediatamente + HIGH: Vulnerabilidades comunes que requieren condiciones específicas + MEDIUM: Code smells de seguridad/rendimiento + LOW: Violaciones de estilo menores + """ + + CRITICAL = "CRITICAL" + HIGH = "HIGH" + MEDIUM = "MEDIUM" + LOW = "LOW" diff --git a/backend/src/models/enums/user_role.py b/backend/src/models/enums/user_role.py index 99751c3..b47e572 100644 --- a/backend/src/models/enums/user_role.py +++ b/backend/src/models/enums/user_role.py @@ -1,18 +1,18 @@ -""" -Enum para roles de usuario. -Alineado con PostgreSQL ENUM 'user_role'. -""" - -from enum import Enum - - -class UserRole(str, Enum): - """ - Roles de usuario en el sistema. - - DEVELOPER: Acceso básico, límite de 10 análisis/día - ADMIN: Acceso completo, sin límites, puede configurar agentes - """ - - DEVELOPER = "DEVELOPER" - ADMIN = "ADMIN" +""" +Enum para roles de usuario. +Alineado con PostgreSQL ENUM 'user_role'. +""" + +from enum import Enum + + +class UserRole(str, Enum): + """ + Roles de usuario en el sistema. + + DEVELOPER: Acceso básico, límite de 10 análisis/día + ADMIN: Acceso completo, sin límites, puede configurar agentes + """ + + DEVELOPER = "DEVELOPER" + ADMIN = "ADMIN" diff --git a/backend/src/models/finding.py b/backend/src/models/finding.py index 5043348..1033c41 100644 --- a/backend/src/models/finding.py +++ b/backend/src/models/finding.py @@ -1,106 +1,106 @@ -""" -Entidad ORM para hallazgos de agentes. -Alineado con tabla 'agent_findings' en PostgreSQL (Supabase). -""" - -import uuid -from datetime import datetime -from typing import TYPE_CHECKING, Any, Dict - -from sqlalchemy import Column, DateTime, Enum, ForeignKey, Integer, String, Text -from sqlalchemy.dialects.postgresql import ARRAY, JSONB, UUID -from sqlalchemy.orm import relationship - -from src.models.base import Base -from src.models.enums.severity_enum import SeverityEnum - -if TYPE_CHECKING: - from src.models.code_review import CodeReviewEntity - - -class AgentFindingEntity(Base): - """ - Entidad ORM que representa la tabla 'agent_findings' en la base de datos. - - Attributes: - id: UUID del hallazgo - review_id: FK a code_reviews - agent_type: Nombre del agente (SecurityAgent, QualityAgent, etc.) - severity: CRITICAL, HIGH, MEDIUM, LOW - issue_type: Tipo de problema (dangerous_function, sql_injection, etc.) - line_number: Número de línea donde se encontró - code_snippet: Fragmento de código problemático - message: Descripción del problema - suggestion: Sugerencia de corrección - metrics: Métricas adicionales (JSONB) - ai_explanation: Explicación generada por IA - Sprint 3 (JSONB) - mcp_references: Referencias a servidores MCP - Sprint 3 (TEXT[]) - created_at: Timestamp de creación - """ - - __tablename__ = "agent_findings" - - id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) - review_id = Column( - UUID(as_uuid=True), - ForeignKey("code_reviews.id", ondelete="CASCADE"), - nullable=False, - index=True, - ) - agent_type = Column(String(100), nullable=False, index=True) - severity = Column(Enum(SeverityEnum), nullable=False, index=True) - issue_type = Column(String(200), nullable=False) - line_number = Column(Integer, nullable=False) - code_snippet = Column(Text, nullable=True) - message = Column(Text, nullable=False) - suggestion = Column(Text, nullable=True) - - # Campos adicionales - metrics = Column(JSONB, nullable=True) - - # Sprint 3: IA y MCP - ai_explanation = Column(JSONB, nullable=True) - mcp_references = Column(ARRAY(Text), nullable=True) - - # Timestamps - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) - - # Relationships - code_review: "CodeReviewEntity" = relationship("CodeReviewEntity", back_populates="findings") - - def __repr__(self) -> str: - return ( - f"" - ) - - @property - def penalty(self) -> int: - """Retorna la penalización para el quality score según severidad.""" - penalties = { - SeverityEnum.CRITICAL: 10, - SeverityEnum.HIGH: 5, - SeverityEnum.MEDIUM: 2, - SeverityEnum.LOW: 1, - } - return penalties.get(self.severity, 0) - - def to_dict(self) -> Dict[str, Any]: - """Convierte la entidad a diccionario.""" - return { - "id": str(self.id), - "review_id": str(self.review_id), - "agent_type": self.agent_type, - "severity": self.severity.value if self.severity else None, - "issue_type": self.issue_type, - "line_number": self.line_number, - "code_snippet": self.code_snippet, - "message": self.message, - "suggestion": self.suggestion, - "metrics": self.metrics, - "created_at": self.created_at.isoformat() if self.created_at else None, - } +""" +Entidad ORM para hallazgos de agentes. +Alineado con tabla 'agent_findings' en PostgreSQL (Supabase). +""" + +import uuid +from datetime import datetime +from typing import TYPE_CHECKING, Any, Dict + +from sqlalchemy import Column, DateTime, Enum, ForeignKey, Integer, String, Text +from sqlalchemy.dialects.postgresql import ARRAY, JSONB, UUID +from sqlalchemy.orm import relationship + +from src.models.base import Base +from src.models.enums.severity_enum import SeverityEnum + +if TYPE_CHECKING: + from src.models.code_review import CodeReviewEntity + + +class AgentFindingEntity(Base): + """ + Entidad ORM que representa la tabla 'agent_findings' en la base de datos. + + Attributes: + id: UUID del hallazgo + review_id: FK a code_reviews + agent_type: Nombre del agente (SecurityAgent, QualityAgent, etc.) + severity: CRITICAL, HIGH, MEDIUM, LOW + issue_type: Tipo de problema (dangerous_function, sql_injection, etc.) + line_number: Número de línea donde se encontró + code_snippet: Fragmento de código problemático + message: Descripción del problema + suggestion: Sugerencia de corrección + metrics: Métricas adicionales (JSONB) + ai_explanation: Explicación generada por IA - Sprint 3 (JSONB) + mcp_references: Referencias a servidores MCP - Sprint 3 (TEXT[]) + created_at: Timestamp de creación + """ + + __tablename__ = "agent_findings" + + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + review_id = Column( + UUID(as_uuid=True), + ForeignKey("code_reviews.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + agent_type = Column(String(100), nullable=False, index=True) + severity = Column(Enum(SeverityEnum), nullable=False, index=True) + issue_type = Column(String(200), nullable=False) + line_number = Column(Integer, nullable=False) + code_snippet = Column(Text, nullable=True) + message = Column(Text, nullable=False) + suggestion = Column(Text, nullable=True) + + # Campos adicionales + metrics = Column(JSONB, nullable=True) + + # Sprint 3: IA y MCP + ai_explanation = Column(JSONB, nullable=True) + mcp_references = Column(ARRAY(Text), nullable=True) + + # Timestamps + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + + # Relationships + code_review: "CodeReviewEntity" = relationship("CodeReviewEntity", back_populates="findings") + + def __repr__(self) -> str: + return ( + f"" + ) + + @property + def penalty(self) -> int: + """Retorna la penalización para el quality score según severidad.""" + penalties = { + SeverityEnum.CRITICAL: 10, + SeverityEnum.HIGH: 5, + SeverityEnum.MEDIUM: 2, + SeverityEnum.LOW: 1, + } + return penalties.get(self.severity, 0) + + def to_dict(self) -> Dict[str, Any]: + """Convierte la entidad a diccionario.""" + return { + "id": str(self.id), + "review_id": str(self.review_id), + "agent_type": self.agent_type, + "severity": self.severity.value if self.severity else None, + "issue_type": self.issue_type, + "line_number": self.line_number, + "code_snippet": self.code_snippet, + "message": self.message, + "suggestion": self.suggestion, + "metrics": self.metrics, + "created_at": self.created_at.isoformat() if self.created_at else None, + } diff --git a/backend/src/models/user.py b/backend/src/models/user.py index 9db0232..a177511 100644 --- a/backend/src/models/user.py +++ b/backend/src/models/user.py @@ -1,86 +1,86 @@ -""" -Entidad ORM para usuarios. -Alineado con tabla 'users' en PostgreSQL (Supabase). -""" - -from datetime import date, datetime -from typing import TYPE_CHECKING, List - -from sqlalchemy import Column, Date, DateTime, Enum, Integer, String -from sqlalchemy.orm import relationship - -from src.models.base import Base -from src.models.enums.user_role import UserRole - -if TYPE_CHECKING: - from src.models.code_review import CodeReviewEntity - - -class UserEntity(Base): - """ - Entidad ORM que representa la tabla 'users' en la base de datos. - - Attributes: - id: Clerk user_id (VARCHAR, PK) - email: Email único del usuario - name: Nombre del usuario (opcional) - avatar_url: URL del avatar (opcional) - role: DEVELOPER o ADMIN - daily_analysis_count: Contador de análisis del día - last_analysis_date: Fecha del último análisis - created_at: Timestamp de creación - updated_at: Timestamp de última actualización - """ - - __tablename__ = "users" - - # Clerk user_id como PK (no es UUID, es string de Clerk) - id = Column(String(255), primary_key=True) - email = Column(String(255), unique=True, nullable=False, index=True) - name = Column(String(255), nullable=True) - avatar_url = Column(String(500), nullable=True) - role = Column(Enum(UserRole), default=UserRole.DEVELOPER, nullable=False, index=True) - - # Rate limiting (RN3: 10 análisis/día para developers) - daily_analysis_count = Column(Integer, default=0, nullable=False) - last_analysis_date = Column(Date, nullable=True) - - # Timestamps - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) - updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False) - - # Relationships - code_reviews: List["CodeReviewEntity"] = relationship( - "CodeReviewEntity", back_populates="user", cascade="all, delete-orphan", lazy="dynamic" - ) - - def __repr__(self) -> str: - return f"" - - def can_analyze(self, max_daily: int = 10) -> bool: - """ - Verifica si el usuario puede realizar más análisis hoy. - - Args: - max_daily: Límite diario para developers (default: 10) - - Returns: - True si puede analizar, False si alcanzó el límite - """ - if self.role == UserRole.ADMIN: - return True - - today = date.today() - if self.last_analysis_date != today: - return True - - return self.daily_analysis_count < max_daily - - def increment_analysis_count(self) -> None: - """Incrementa el contador de análisis del día.""" - today = date.today() - if self.last_analysis_date != today: - self.daily_analysis_count = 1 - self.last_analysis_date = today - else: - self.daily_analysis_count += 1 +""" +Entidad ORM para usuarios. +Alineado con tabla 'users' en PostgreSQL (Supabase). +""" + +from datetime import date, datetime +from typing import TYPE_CHECKING, List + +from sqlalchemy import Column, Date, DateTime, Enum, Integer, String +from sqlalchemy.orm import relationship + +from src.models.base import Base +from src.models.enums.user_role import UserRole + +if TYPE_CHECKING: + from src.models.code_review import CodeReviewEntity + + +class UserEntity(Base): + """ + Entidad ORM que representa la tabla 'users' en la base de datos. + + Attributes: + id: Clerk user_id (VARCHAR, PK) + email: Email único del usuario + name: Nombre del usuario (opcional) + avatar_url: URL del avatar (opcional) + role: DEVELOPER o ADMIN + daily_analysis_count: Contador de análisis del día + last_analysis_date: Fecha del último análisis + created_at: Timestamp de creación + updated_at: Timestamp de última actualización + """ + + __tablename__ = "users" + + # Clerk user_id como PK (no es UUID, es string de Clerk) + id = Column(String(255), primary_key=True) + email = Column(String(255), unique=True, nullable=False, index=True) + name = Column(String(255), nullable=True) + avatar_url = Column(String(500), nullable=True) + role = Column(Enum(UserRole), default=UserRole.DEVELOPER, nullable=False, index=True) + + # Rate limiting (RN3: 10 análisis/día para developers) + daily_analysis_count = Column(Integer, default=0, nullable=False) + last_analysis_date = Column(Date, nullable=True) + + # Timestamps + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False) + + # Relationships + code_reviews: List["CodeReviewEntity"] = relationship( + "CodeReviewEntity", back_populates="user", cascade="all, delete-orphan", lazy="dynamic" + ) + + def __repr__(self) -> str: + return f"" + + def can_analyze(self, max_daily: int = 10) -> bool: + """ + Verifica si el usuario puede realizar más análisis hoy. + + Args: + max_daily: Límite diario para developers (default: 10) + + Returns: + True si puede analizar, False si alcanzó el límite + """ + if self.role == UserRole.ADMIN: + return True + + today = date.today() + if self.last_analysis_date != today: + return True + + return self.daily_analysis_count < max_daily + + def increment_analysis_count(self) -> None: + """Incrementa el contador de análisis del día.""" + today = date.today() + if self.last_analysis_date != today: + self.daily_analysis_count = 1 + self.last_analysis_date = today + else: + self.daily_analysis_count += 1 diff --git a/backend/src/repositories/code_review_repository.py b/backend/src/repositories/code_review_repository.py index d0d36b4..515860f 100644 --- a/backend/src/repositories/code_review_repository.py +++ b/backend/src/repositories/code_review_repository.py @@ -1,140 +1,140 @@ -from typing import Optional -from uuid import UUID - -from sqlalchemy.exc import SQLAlchemyError -from sqlalchemy.orm import Session - -from src.models.code_review import CodeReviewEntity -from src.models.enums.severity_enum import SeverityEnum -from src.models.finding import AgentFindingEntity -from src.schemas.analysis import CodeReview -from src.utils.encryption.aes_encryptor import decrypt_aes256, encrypt_aes256 -from src.utils.logger import logger - - -class CodeReviewRepository: - """ - Repositorio para manejar la persistencia de las revisiones de código. - - Implementa el patrón Repository para desacoplar la lógica de negocio (Domain) - de la implementación de base de datos (SQLAlchemy). Maneja automáticamente - la encriptación/desencriptación del código fuente. - """ - - def __init__(self, session: Session): - """ - Inicializa el repositorio con una sesión de base de datos. - - Args: - session: Sesión activa de SQLAlchemy. - """ - self.session = session - - def create(self, review: CodeReview) -> CodeReview: - """ - Persiste una nueva entidad CodeReview en la base de datos. - - Aplica encriptación AES-256 al contenido del código antes de guardar, - cumpliendo con la Regla de Negocio RN16. - - Args: - review: Objeto de dominio CodeReview con los datos a guardar. - - Returns: - CodeReview: El objeto de dominio confirmado y persistido. - - Raises: - SQLAlchemyError: Si ocurre un error a nivel de base de datos. - ValueError: Si el contenido del código es inválido para encriptar. - """ - try: - # RN16: Encriptar contenido sensible antes de tocar la BD - encrypted_content = encrypt_aes256(review.code_content) - - entity = CodeReviewEntity( - id=review.id, - user_id=review.user_id, - filename=review.filename, - code_content=encrypted_content, - quality_score=review.quality_score, - status=review.status, - total_findings=review.total_findings, - created_at=review.created_at, - completed_at=review.completed_at, - ) - - self.session.add(entity) - - # Persistir hallazgos (findings) - for finding in review.findings: - # Mapear severidad de Schema (lowercase) a Entity (uppercase) - if finding.severity.name not in SeverityEnum.__members__: - logger.warning( - f"Finding with unsupported severity '{finding.severity.name}' " - f"skipped for review {review.id}." - ) - continue - severity_enum = SeverityEnum[finding.severity.name] - - finding_entity = AgentFindingEntity( - review_id=review.id, - agent_type=finding.agent_name, - severity=severity_enum, - issue_type=finding.issue_type, - line_number=finding.line_number, - code_snippet=finding.code_snippet, - message=finding.message, - suggestion=finding.suggestion, - created_at=finding.detected_at, - ) - self.session.add(finding_entity) - - self.session.commit() - - logger.info(f"CodeReview persistido exitosamente: {review.id}") - return review - - except SQLAlchemyError as e: - self.session.rollback() - logger.error(f"Error de base de datos al crear CodeReview {review.id}: {str(e)}") - raise e - except Exception as e: - self.session.rollback() - logger.error(f"Error inesperado en CodeReviewRepository.create: {str(e)}") - raise e - - def find_by_id(self, review_id: UUID) -> Optional[CodeReview]: - """ - Busca una revisión por su ID y desencripta el contenido automáticamente. - - Args: - review_id: Identificador único (UUID) de la revisión. - - Returns: - Optional[CodeReview]: Objeto de dominio reconstruido o None si no existe. - - Raises: - Exception: Si falla la desencriptación o la lectura de BD. - """ - try: - entity = self.session.get(CodeReviewEntity, review_id) - - if not entity: - return None - - decrypted_content = decrypt_aes256(entity.code_content) - - return CodeReview( - id=entity.id, - user_id=entity.user_id, - filename=entity.filename, - code_content=decrypted_content, - quality_score=entity.quality_score, - status=entity.status, - total_findings=entity.total_findings, - created_at=entity.created_at, - completed_at=entity.completed_at, - ) - except Exception as e: - logger.error(f"Error recuperando CodeReview {review_id}: {str(e)}") - raise e +from typing import Optional +from uuid import UUID + +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.orm import Session + +from src.models.code_review import CodeReviewEntity +from src.models.enums.severity_enum import SeverityEnum +from src.models.finding import AgentFindingEntity +from src.schemas.analysis import CodeReview +from src.utils.encryption.aes_encryptor import decrypt_aes256, encrypt_aes256 +from src.utils.logger import logger + + +class CodeReviewRepository: + """ + Repositorio para manejar la persistencia de las revisiones de código. + + Implementa el patrón Repository para desacoplar la lógica de negocio (Domain) + de la implementación de base de datos (SQLAlchemy). Maneja automáticamente + la encriptación/desencriptación del código fuente. + """ + + def __init__(self, session: Session): + """ + Inicializa el repositorio con una sesión de base de datos. + + Args: + session: Sesión activa de SQLAlchemy. + """ + self.session = session + + def create(self, review: CodeReview) -> CodeReview: + """ + Persiste una nueva entidad CodeReview en la base de datos. + + Aplica encriptación AES-256 al contenido del código antes de guardar, + cumpliendo con la Regla de Negocio RN16. + + Args: + review: Objeto de dominio CodeReview con los datos a guardar. + + Returns: + CodeReview: El objeto de dominio confirmado y persistido. + + Raises: + SQLAlchemyError: Si ocurre un error a nivel de base de datos. + ValueError: Si el contenido del código es inválido para encriptar. + """ + try: + # RN16: Encriptar contenido sensible antes de tocar la BD + encrypted_content = encrypt_aes256(review.code_content) + + entity = CodeReviewEntity( + id=review.id, + user_id=review.user_id, + filename=review.filename, + code_content=encrypted_content, + quality_score=review.quality_score, + status=review.status, + total_findings=review.total_findings, + created_at=review.created_at, + completed_at=review.completed_at, + ) + + self.session.add(entity) + + # Persistir hallazgos (findings) + for finding in review.findings: + # Mapear severidad de Schema (lowercase) a Entity (uppercase) + if finding.severity.name not in SeverityEnum.__members__: + logger.warning( + f"Finding with unsupported severity '{finding.severity.name}' " + f"skipped for review {review.id}." + ) + continue + severity_enum = SeverityEnum[finding.severity.name] + + finding_entity = AgentFindingEntity( + review_id=review.id, + agent_type=finding.agent_name, + severity=severity_enum, + issue_type=finding.issue_type, + line_number=finding.line_number, + code_snippet=finding.code_snippet, + message=finding.message, + suggestion=finding.suggestion, + created_at=finding.detected_at, + ) + self.session.add(finding_entity) + + self.session.commit() + + logger.info(f"CodeReview persistido exitosamente: {review.id}") + return review + + except SQLAlchemyError as e: + self.session.rollback() + logger.error(f"Error de base de datos al crear CodeReview {review.id}: {str(e)}") + raise e + except Exception as e: + self.session.rollback() + logger.error(f"Error inesperado en CodeReviewRepository.create: {str(e)}") + raise e + + def find_by_id(self, review_id: UUID) -> Optional[CodeReview]: + """ + Busca una revisión por su ID y desencripta el contenido automáticamente. + + Args: + review_id: Identificador único (UUID) de la revisión. + + Returns: + Optional[CodeReview]: Objeto de dominio reconstruido o None si no existe. + + Raises: + Exception: Si falla la desencriptación o la lectura de BD. + """ + try: + entity = self.session.get(CodeReviewEntity, review_id) + + if not entity: + return None + + decrypted_content = decrypt_aes256(entity.code_content) + + return CodeReview( + id=entity.id, + user_id=entity.user_id, + filename=entity.filename, + code_content=decrypted_content, + quality_score=entity.quality_score, + status=entity.status, + total_findings=entity.total_findings, + created_at=entity.created_at, + completed_at=entity.completed_at, + ) + except Exception as e: + logger.error(f"Error recuperando CodeReview {review_id}: {str(e)}") + raise e diff --git a/backend/src/repositories/user_repo.py b/backend/src/repositories/user_repo.py index 812f24f..52c201f 100644 --- a/backend/src/repositories/user_repo.py +++ b/backend/src/repositories/user_repo.py @@ -1,147 +1,147 @@ -""" -Repositorio para operaciones CRUD de usuarios. - -Maneja la persistencia en la tabla 'users' usando UserEntity. -""" - -from datetime import datetime -from typing import Optional - -from sqlalchemy.orm import Session - -from src.models.enums.user_role import UserRole -from src.models.user import UserEntity - - -class UserRepository: - """ - Repositorio para gestionar usuarios en la base de datos. - - Responsabilidad única: operaciones de persistencia sobre la tabla users. - """ - - def __init__(self, db: Session): - """ - Inicializa el repositorio con una sesión de base de datos. - - Args: - db: Sesión de SQLAlchemy. - """ - self._db = db - - def get_by_id(self, user_id: str) -> Optional[UserEntity]: - """ - Busca un usuario por su ID (Clerk user_id). - - Args: - user_id: ID del usuario (Clerk sub). - - Returns: - UserEntity si existe, None si no. - """ - return self._db.query(UserEntity).filter(UserEntity.id == user_id).first() - - def get_by_email(self, email: str) -> Optional[UserEntity]: - """ - Busca un usuario por su email. - - Args: - email: Email del usuario. - - Returns: - UserEntity si existe, None si no. - """ - return self._db.query(UserEntity).filter(UserEntity.email == email).first() - - def create( - self, - user_id: str, - email: str, - name: Optional[str] = None, - avatar_url: Optional[str] = None, - role: UserRole = UserRole.DEVELOPER, - ) -> UserEntity: - """ - Crea un nuevo usuario en la base de datos. - - Args: - user_id: ID del usuario (Clerk sub). - email: Email del usuario. - name: Nombre del usuario (opcional). - avatar_url: URL del avatar (opcional). - role: Rol del usuario (default: DEVELOPER). - - Returns: - UserEntity creado. - """ - now = datetime.utcnow() - user = UserEntity( - id=user_id, - email=email, - name=name, - avatar_url=avatar_url, - role=role, - daily_analysis_count=0, - created_at=now, - updated_at=now, - ) - self._db.add(user) - self._db.commit() - self._db.refresh(user) - return user - - def update( - self, - user: UserEntity, - email: Optional[str] = None, - name: Optional[str] = None, - avatar_url: Optional[str] = None, - ) -> UserEntity: - """ - Actualiza los datos de un usuario existente. - - Args: - user: Entidad de usuario a actualizar. - email: Nuevo email (opcional). - name: Nuevo nombre (opcional). - avatar_url: Nueva URL de avatar (opcional). - - Returns: - UserEntity actualizado. - """ - if email is not None: - user.email = email - if name is not None: - user.name = name - if avatar_url is not None: - user.avatar_url = avatar_url - - user.updated_at = datetime.utcnow() - self._db.commit() - self._db.refresh(user) - return user - - def delete(self, user: UserEntity) -> None: - """ - Elimina un usuario de la base de datos. - - Args: - user: Entidad de usuario a eliminar. - """ - self._db.delete(user) - self._db.commit() - - def increment_analysis_count(self, user: UserEntity) -> UserEntity: - """ - Incrementa el contador de análisis del usuario. - - Args: - user: Entidad de usuario. - - Returns: - UserEntity con contador actualizado. - """ - user.increment_analysis_count() - self._db.commit() - self._db.refresh(user) - return user +""" +Repositorio para operaciones CRUD de usuarios. + +Maneja la persistencia en la tabla 'users' usando UserEntity. +""" + +from datetime import datetime +from typing import Optional + +from sqlalchemy.orm import Session + +from src.models.enums.user_role import UserRole +from src.models.user import UserEntity + + +class UserRepository: + """ + Repositorio para gestionar usuarios en la base de datos. + + Responsabilidad única: operaciones de persistencia sobre la tabla users. + """ + + def __init__(self, db: Session): + """ + Inicializa el repositorio con una sesión de base de datos. + + Args: + db: Sesión de SQLAlchemy. + """ + self._db = db + + def get_by_id(self, user_id: str) -> Optional[UserEntity]: + """ + Busca un usuario por su ID (Clerk user_id). + + Args: + user_id: ID del usuario (Clerk sub). + + Returns: + UserEntity si existe, None si no. + """ + return self._db.query(UserEntity).filter(UserEntity.id == user_id).first() + + def get_by_email(self, email: str) -> Optional[UserEntity]: + """ + Busca un usuario por su email. + + Args: + email: Email del usuario. + + Returns: + UserEntity si existe, None si no. + """ + return self._db.query(UserEntity).filter(UserEntity.email == email).first() + + def create( + self, + user_id: str, + email: str, + name: Optional[str] = None, + avatar_url: Optional[str] = None, + role: UserRole = UserRole.DEVELOPER, + ) -> UserEntity: + """ + Crea un nuevo usuario en la base de datos. + + Args: + user_id: ID del usuario (Clerk sub). + email: Email del usuario. + name: Nombre del usuario (opcional). + avatar_url: URL del avatar (opcional). + role: Rol del usuario (default: DEVELOPER). + + Returns: + UserEntity creado. + """ + now = datetime.utcnow() + user = UserEntity( + id=user_id, + email=email, + name=name, + avatar_url=avatar_url, + role=role, + daily_analysis_count=0, + created_at=now, + updated_at=now, + ) + self._db.add(user) + self._db.commit() + self._db.refresh(user) + return user + + def update( + self, + user: UserEntity, + email: Optional[str] = None, + name: Optional[str] = None, + avatar_url: Optional[str] = None, + ) -> UserEntity: + """ + Actualiza los datos de un usuario existente. + + Args: + user: Entidad de usuario a actualizar. + email: Nuevo email (opcional). + name: Nuevo nombre (opcional). + avatar_url: Nueva URL de avatar (opcional). + + Returns: + UserEntity actualizado. + """ + if email is not None: + user.email = email + if name is not None: + user.name = name + if avatar_url is not None: + user.avatar_url = avatar_url + + user.updated_at = datetime.utcnow() + self._db.commit() + self._db.refresh(user) + return user + + def delete(self, user: UserEntity) -> None: + """ + Elimina un usuario de la base de datos. + + Args: + user: Entidad de usuario a eliminar. + """ + self._db.delete(user) + self._db.commit() + + def increment_analysis_count(self, user: UserEntity) -> UserEntity: + """ + Incrementa el contador de análisis del usuario. + + Args: + user: Entidad de usuario. + + Returns: + UserEntity con contador actualizado. + """ + user.increment_analysis_count() + self._db.commit() + self._db.refresh(user) + return user diff --git a/backend/src/routers/analysis.py b/backend/src/routers/analysis.py index 72effe5..0c4f41c 100644 --- a/backend/src/routers/analysis.py +++ b/backend/src/routers/analysis.py @@ -1,66 +1,66 @@ -from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status -from sqlalchemy.orm import Session - -from src.core.dependencies.auth import get_current_user -from src.core.dependencies.get_db import get_db -from src.repositories.code_review_repository import CodeReviewRepository -from src.schemas.analysis import AnalysisResponse -from src.schemas.user import User -from src.services.analysis_service import AnalysisService -from src.utils.logger import logger - -router = APIRouter(prefix="/api/v1", tags=["analysis"]) - - -@router.post( - "/analyze", - response_model=AnalysisResponse, - status_code=status.HTTP_200_OK, - summary="Analizar código fuente Python", -) -async def analyze_code( - file: UploadFile = File(...), - current_user: User = Depends(get_current_user), - db: Session = Depends(get_db), -): - """ - Sube un archivo Python para análisis automatizado de seguridad y calidad. - - Reglas de Negocio: - - **RN1**: Requiere autenticación JWT. - - **RN3**: Verifica cuota diaria (Developers: 10/día). - - **RN4**: Valida extensión .py, tamaño <10MB y codificación UTF-8. - - Args: - file: Archivo .py a analizar. - current_user: Usuario autenticado (inyectado). - db: Sesión de base de datos (inyectada). - - Returns: - AnalysisResponse: Objeto con ID de análisis, estado y resumen. - - Raises: - HTTPException: 500 si ocurre un error interno. - """ - - repo = CodeReviewRepository(db) - service = AnalysisService(repo) - - try: - result = await service.analyze_code(file, current_user.id) - except HTTPException: - raise - except Exception as e: - logger.error(f"Error interno en análisis: {str(e)}") - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Error interno del servidor" - ) from e - - return AnalysisResponse( - analysis_id=result.id, - filename=result.filename, - status=result.status, - quality_score=result.quality_score, - total_findings=result.total_findings, - created_at=result.created_at, - ) +from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status +from sqlalchemy.orm import Session + +from src.core.dependencies.auth import get_current_user +from src.core.dependencies.get_db import get_db +from src.repositories.code_review_repository import CodeReviewRepository +from src.schemas.analysis import AnalysisResponse +from src.schemas.user import User +from src.services.analysis_service import AnalysisService +from src.utils.logger import logger + +router = APIRouter(prefix="/api/v1", tags=["analysis"]) + + +@router.post( + "/analyze", + response_model=AnalysisResponse, + status_code=status.HTTP_200_OK, + summary="Analizar código fuente Python", +) +async def analyze_code( + file: UploadFile = File(...), + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + """ + Sube un archivo Python para análisis automatizado de seguridad y calidad. + + Reglas de Negocio: + - **RN1**: Requiere autenticación JWT. + - **RN3**: Verifica cuota diaria (Developers: 10/día). + - **RN4**: Valida extensión .py, tamaño <10MB y codificación UTF-8. + + Args: + file: Archivo .py a analizar. + current_user: Usuario autenticado (inyectado). + db: Sesión de base de datos (inyectada). + + Returns: + AnalysisResponse: Objeto con ID de análisis, estado y resumen. + + Raises: + HTTPException: 500 si ocurre un error interno. + """ + + repo = CodeReviewRepository(db) + service = AnalysisService(repo) + + try: + result = await service.analyze_code(file, current_user.id) + except HTTPException: + raise + except Exception as e: + logger.error(f"Error interno en análisis: {str(e)}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Error interno del servidor" + ) from e + + return AnalysisResponse( + analysis_id=result.id, + filename=result.filename, + status=result.status, + quality_score=result.quality_score, + total_findings=result.total_findings, + created_at=result.created_at, + ) diff --git a/backend/src/routers/auth.py b/backend/src/routers/auth.py index 42af8a4..49f4d85 100644 --- a/backend/src/routers/auth.py +++ b/backend/src/routers/auth.py @@ -1,117 +1,117 @@ -""" -Router de autenticación. - -Expone endpoint POST /api/v1/auth/login para sincronizar -usuarios de Clerk con la base de datos (AC Escenario 1). -""" - -from fastapi import APIRouter, Depends, HTTPException -from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer -from sqlalchemy.orm import Session - -from src.core.database import get_db -from src.external.clerk_client import ( - ClerkClient, - ClerkTokenExpiredError, - ClerkTokenInvalidError, -) -from src.repositories.user_repo import UserRepository -from src.schemas.user import User -from src.services.auth_service import AuthService - -router = APIRouter(prefix="/api/v1/auth", tags=["Authentication"]) - -http_bearer = HTTPBearer() - - -@router.post("/login", response_model=User) -async def login( - credentials: HTTPAuthorizationCredentials = Depends(http_bearer), - db: Session = Depends(get_db), -) -> User: - """ - Sincroniza usuario de Clerk con la base de datos. - - Este endpoint cumple con AC Escenario 1 de JIRA: - - Valida el token JWT de Clerk - - Crea el usuario en la BD si no existe - - Actualiza los datos del usuario si ya existe - - Retorna el User schema con los datos sincronizados - - Args: - credentials: Token JWT en header Authorization: Bearer - db: Sesión de base de datos. - - Returns: - User: Usuario sincronizado. - - Raises: - HTTPException 401: Si el token es inválido o expirado. - """ - token = credentials.credentials - - # Inyectar dependencias - clerk_client = ClerkClient() - user_repository = UserRepository(db) - auth_service = AuthService(clerk_client, user_repository) - - try: - user = auth_service.login_user(token) - return user - - except ClerkTokenExpiredError: - raise HTTPException( - status_code=401, - detail="Token expirado", - headers={"WWW-Authenticate": "Bearer"}, - ) - except ClerkTokenInvalidError: - raise HTTPException( - status_code=401, - detail="Token inválido", - headers={"WWW-Authenticate": "Bearer"}, - ) - - -@router.get("/me", response_model=User) -async def get_current_user_info( - credentials: HTTPAuthorizationCredentials = Depends(http_bearer), -) -> User: - """ - Obtiene información del usuario actual sin sincronizar con BD. - - Útil para verificar el estado de autenticación desde el frontend. - - Args: - credentials: Token JWT en header Authorization: Bearer - - Returns: - User: Datos del usuario extraídos del token. - - Raises: - HTTPException 401: Si el token es inválido o expirado. - """ - token = credentials.credentials - clerk_client = ClerkClient() - - try: - payload = clerk_client.verify_token(token) - - return User( - id=payload["user_id"], - email=payload.get("email", ""), - name=payload.get("name"), - ) - - except ClerkTokenExpiredError: - raise HTTPException( - status_code=401, - detail="Token expirado", - headers={"WWW-Authenticate": "Bearer"}, - ) - except ClerkTokenInvalidError: - raise HTTPException( - status_code=401, - detail="Token inválido", - headers={"WWW-Authenticate": "Bearer"}, - ) +""" +Router de autenticación. + +Expone endpoint POST /api/v1/auth/login para sincronizar +usuarios de Clerk con la base de datos (AC Escenario 1). +""" + +from fastapi import APIRouter, Depends, HTTPException +from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer +from sqlalchemy.orm import Session + +from src.core.database import get_db +from src.external.clerk_client import ( + ClerkClient, + ClerkTokenExpiredError, + ClerkTokenInvalidError, +) +from src.repositories.user_repo import UserRepository +from src.schemas.user import User +from src.services.auth_service import AuthService + +router = APIRouter(prefix="/api/v1/auth", tags=["Authentication"]) + +http_bearer = HTTPBearer() + + +@router.post("/login", response_model=User) +async def login( + credentials: HTTPAuthorizationCredentials = Depends(http_bearer), + db: Session = Depends(get_db), +) -> User: + """ + Sincroniza usuario de Clerk con la base de datos. + + Este endpoint cumple con AC Escenario 1 de JIRA: + - Valida el token JWT de Clerk + - Crea el usuario en la BD si no existe + - Actualiza los datos del usuario si ya existe + - Retorna el User schema con los datos sincronizados + + Args: + credentials: Token JWT en header Authorization: Bearer + db: Sesión de base de datos. + + Returns: + User: Usuario sincronizado. + + Raises: + HTTPException 401: Si el token es inválido o expirado. + """ + token = credentials.credentials + + # Inyectar dependencias + clerk_client = ClerkClient() + user_repository = UserRepository(db) + auth_service = AuthService(clerk_client, user_repository) + + try: + user = auth_service.login_user(token) + return user + + except ClerkTokenExpiredError: + raise HTTPException( + status_code=401, + detail="Token expirado", + headers={"WWW-Authenticate": "Bearer"}, + ) + except ClerkTokenInvalidError: + raise HTTPException( + status_code=401, + detail="Token inválido", + headers={"WWW-Authenticate": "Bearer"}, + ) + + +@router.get("/me", response_model=User) +async def get_current_user_info( + credentials: HTTPAuthorizationCredentials = Depends(http_bearer), +) -> User: + """ + Obtiene información del usuario actual sin sincronizar con BD. + + Útil para verificar el estado de autenticación desde el frontend. + + Args: + credentials: Token JWT en header Authorization: Bearer + + Returns: + User: Datos del usuario extraídos del token. + + Raises: + HTTPException 401: Si el token es inválido o expirado. + """ + token = credentials.credentials + clerk_client = ClerkClient() + + try: + payload = clerk_client.verify_token(token) + + return User( + id=payload["user_id"], + email=payload.get("email", ""), + name=payload.get("name"), + ) + + except ClerkTokenExpiredError: + raise HTTPException( + status_code=401, + detail="Token expirado", + headers={"WWW-Authenticate": "Bearer"}, + ) + except ClerkTokenInvalidError: + raise HTTPException( + status_code=401, + detail="Token inválido", + headers={"WWW-Authenticate": "Bearer"}, + ) diff --git a/backend/src/routers/findings.py b/backend/src/routers/findings.py index c463dd6..6369855 100644 --- a/backend/src/routers/findings.py +++ b/backend/src/routers/findings.py @@ -1,278 +1,278 @@ -""" -Router para hallazgos (findings) con explicaciones de IA. - -Endpoints: -- GET /api/v1/findings/{id} - Obtener un hallazgo -- POST /api/v1/findings/{id}/explain - Generar explicación con IA -- GET /api/v1/findings/{id}/explain/status - Estado del rate limit - -Principios de diseño: -- SRP: Solo maneja HTTP, delega lógica a servicios -- Defensibilidad: Validación de entrada y manejo de errores -- Seguridad: Requiere autenticación para todas las operaciones -""" - -from typing import Any, Dict -from uuid import UUID - -from fastapi import APIRouter, Depends, HTTPException, status -from sqlalchemy.orm import Session - -from src.core.config.ai_config import get_ai_settings -from src.core.dependencies.auth import get_current_user -from src.core.dependencies.get_db import get_db -from src.models.finding import AgentFindingEntity -from src.schemas.ai_explanation import ( - AIExplanation, - AIExplanationError, - AIExplanationRequest, - AIExplanationResponse, - RateLimitInfo, -) -from src.schemas.finding import Finding, Severity -from src.schemas.user import User -from src.services.ai_service import ( - AIExplainerService, -) -from src.services.ai_service import AIExplanationError as ServiceAIError -from src.services.ai_service import ( - RateLimitExceeded, - get_ai_explainer_service, -) -from src.utils.logger import logger - -router = APIRouter(prefix="/api/v1/findings", tags=["findings"]) - - -def _entity_to_finding(entity: AgentFindingEntity) -> Finding: - """ - Convierte una entidad de BD a esquema Finding. - - Args: - entity: Entidad de base de datos - - Returns: - Esquema Finding - """ - return Finding( - severity=Severity(entity.severity.value), - issue_type=entity.issue_type, - message=entity.message, - line_number=entity.line_number, - agent_name=entity.agent_type, - code_snippet=entity.code_snippet, - suggestion=entity.suggestion, - rule_id=entity.issue_type, # Usar issue_type como rule_id si no hay otro - ) - - -@router.get( - "/{finding_id}", - response_model=Dict[str, Any], - status_code=status.HTTP_200_OK, - summary="Obtener un hallazgo por ID", -) -async def get_finding( - finding_id: UUID, - current_user: User = Depends(get_current_user), - db: Session = Depends(get_db), -) -> Dict[str, Any]: - """ - Obtiene los detalles de un hallazgo específico. - - Args: - finding_id: UUID del hallazgo - current_user: Usuario autenticado - db: Sesión de base de datos - - Returns: - Detalles del hallazgo - - Raises: - HTTPException 404: Si el hallazgo no existe - """ - finding = db.query(AgentFindingEntity).filter(AgentFindingEntity.id == finding_id).first() - - if not finding: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, detail=f"Hallazgo {finding_id} no encontrado" - ) - - return { - "id": str(finding.id), - "agent_type": finding.agent_type, - "severity": finding.severity.value, - "issue_type": finding.issue_type, - "line_number": finding.line_number, - "message": finding.message, - "code_snippet": finding.code_snippet, - "suggestion": finding.suggestion, - "ai_explanation": finding.ai_explanation, - "created_at": finding.created_at.isoformat(), - } - - -@router.post( - "/{finding_id}/explain", - response_model=AIExplanationResponse, - status_code=status.HTTP_200_OK, - summary="Generar explicación con IA para un hallazgo", - responses={ - 200: {"description": "Explicación generada exitosamente"}, - 404: {"description": "Hallazgo no encontrado"}, - 429: {"description": "Rate limit excedido"}, - 503: {"description": "Servicio de IA no disponible"}, - }, -) -async def explain_finding( - finding_id: UUID, - request: AIExplanationRequest = AIExplanationRequest(), - current_user: User = Depends(get_current_user), - db: Session = Depends(get_db), - service: AIExplainerService = Depends(get_ai_explainer_service), -) -> AIExplanationResponse: - """ - Genera una explicación detallada de un hallazgo usando IA generativa. - - Este endpoint: - 1. Verifica si ya existe una explicación en cache (JSONB) - 2. Si no, genera una nueva usando Vertex AI (Gemini) - 3. Almacena la explicación en cache para futuras consultas - - Reglas de Negocio: - - **RN1**: Requiere autenticación JWT - - **RN**: Rate limit de 10 requests/hora por usuario (configurable) - - **RN**: La explicación se cachea en BD para evitar regeneración - - Args: - finding_id: UUID del hallazgo a explicar - request: Opciones de la explicación - current_user: Usuario autenticado - db: Sesión de base de datos - service: Servicio de explicaciones de IA - - Returns: - AIExplanationResponse con la explicación generada - - Raises: - HTTPException 404: Si el hallazgo no existe - HTTPException 429: Si se excede el rate limit - HTTPException 503: Si el servicio de IA no está disponible - """ - # 1. Verificar que el servicio está configurado - settings = get_ai_settings() - if not settings.is_configured: - raise HTTPException( - status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail="El servicio de IA no está configurado. " - "Configure GOOGLE_APPLICATION_CREDENTIALS.", - ) - - # 2. Buscar el hallazgo - finding_entity = ( - db.query(AgentFindingEntity).filter(AgentFindingEntity.id == finding_id).first() - ) - - if not finding_entity: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, detail=f"Hallazgo {finding_id} no encontrado" - ) - - # 3. Verificar cache (ai_explanation JSONB) - if finding_entity.ai_explanation: - logger.info(f"Returning cached AI explanation for finding {finding_id}") - cached_explanation = AIExplanation.from_dict(finding_entity.ai_explanation) - return AIExplanationResponse( - finding_id=finding_id.int, - explanation=cached_explanation, - cached=True, - ) - - # 4. Generar nueva explicación - try: - # Convertir entidad a Finding schema - finding = _entity_to_finding(finding_entity) - - # Obtener código fuente del code_review si existe - code_context = None - if finding_entity.code_review and hasattr(finding_entity.code_review, "source_code"): - code_context = finding_entity.code_review.source_code - - # Generar explicación - explanation, rate_limit_info = await service.explain_finding( - finding=finding, - code_context=code_context, - user_id=current_user.id, - ) - - # 5. Guardar en cache (JSONB) - finding_entity.ai_explanation = explanation.to_dict() - db.commit() - - logger.info( - f"AI explanation generated and cached for finding {finding_id}. " - f"Tokens used: {explanation.tokens_used}" - ) - - return AIExplanationResponse( - finding_id=finding_id.int, - explanation=explanation, - cached=False, - ) - - except RateLimitExceeded as e: - logger.warning(f"Rate limit exceeded for user {current_user.id}: {e}") - raise HTTPException( - status_code=status.HTTP_429_TOO_MANY_REQUESTS, - detail=AIExplanationError( - error_type="rate_limit", - message="Has excedido el límite de explicaciones por hora. " - f"Límite: {e.rate_limit_info.requests_limit}/hora.", - rate_limit_info=e.rate_limit_info, - ).model_dump(), - ) from e - - except ServiceAIError as e: - logger.error(f"AI service error for finding {finding_id}: {e}") - raise HTTPException( - status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail=AIExplanationError( - error_type="ai_error", - message=str(e), - ).model_dump(), - ) from e - - except Exception as e: - logger.error(f"Unexpected error explaining finding {finding_id}: {e}") - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail="Error interno generando explicación", - ) from e - - -@router.get( - "/{finding_id}/explain/status", - response_model=RateLimitInfo, - status_code=status.HTTP_200_OK, - summary="Obtener estado del rate limit para explicaciones", -) -async def get_rate_limit_status( - finding_id: UUID, # Solo para consistencia de URL - current_user: User = Depends(get_current_user), - service: AIExplainerService = Depends(get_ai_explainer_service), -) -> RateLimitInfo: - """ - Obtiene el estado actual del rate limit del usuario. - - Útil para mostrar al usuario cuántas explicaciones puede - solicitar antes de alcanzar el límite. - - Args: - finding_id: UUID del hallazgo (no usado, solo para URL) - current_user: Usuario autenticado - service: Servicio de explicaciones - - Returns: - RateLimitInfo con requests restantes y tiempo de reset - """ - return service.get_rate_limit_info(current_user.id) +""" +Router para hallazgos (findings) con explicaciones de IA. + +Endpoints: +- GET /api/v1/findings/{id} - Obtener un hallazgo +- POST /api/v1/findings/{id}/explain - Generar explicación con IA +- GET /api/v1/findings/{id}/explain/status - Estado del rate limit + +Principios de diseño: +- SRP: Solo maneja HTTP, delega lógica a servicios +- Defensibilidad: Validación de entrada y manejo de errores +- Seguridad: Requiere autenticación para todas las operaciones +""" + +from typing import Any, Dict +from uuid import UUID + +from fastapi import APIRouter, Depends, HTTPException, status +from sqlalchemy.orm import Session + +from src.core.config.ai_config import get_ai_settings +from src.core.dependencies.auth import get_current_user +from src.core.dependencies.get_db import get_db +from src.models.finding import AgentFindingEntity +from src.schemas.ai_explanation import ( + AIExplanation, + AIExplanationError, + AIExplanationRequest, + AIExplanationResponse, + RateLimitInfo, +) +from src.schemas.finding import Finding, Severity +from src.schemas.user import User +from src.services.ai_service import ( + AIExplainerService, +) +from src.services.ai_service import AIExplanationError as ServiceAIError +from src.services.ai_service import ( + RateLimitExceeded, + get_ai_explainer_service, +) +from src.utils.logger import logger + +router = APIRouter(prefix="/api/v1/findings", tags=["findings"]) + + +def _entity_to_finding(entity: AgentFindingEntity) -> Finding: + """ + Convierte una entidad de BD a esquema Finding. + + Args: + entity: Entidad de base de datos + + Returns: + Esquema Finding + """ + return Finding( + severity=Severity(entity.severity.value), + issue_type=entity.issue_type, + message=entity.message, + line_number=entity.line_number, + agent_name=entity.agent_type, + code_snippet=entity.code_snippet, + suggestion=entity.suggestion, + rule_id=entity.issue_type, # Usar issue_type como rule_id si no hay otro + ) + + +@router.get( + "/{finding_id}", + response_model=Dict[str, Any], + status_code=status.HTTP_200_OK, + summary="Obtener un hallazgo por ID", +) +async def get_finding( + finding_id: UUID, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +) -> Dict[str, Any]: + """ + Obtiene los detalles de un hallazgo específico. + + Args: + finding_id: UUID del hallazgo + current_user: Usuario autenticado + db: Sesión de base de datos + + Returns: + Detalles del hallazgo + + Raises: + HTTPException 404: Si el hallazgo no existe + """ + finding = db.query(AgentFindingEntity).filter(AgentFindingEntity.id == finding_id).first() + + if not finding: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, detail=f"Hallazgo {finding_id} no encontrado" + ) + + return { + "id": str(finding.id), + "agent_type": finding.agent_type, + "severity": finding.severity.value, + "issue_type": finding.issue_type, + "line_number": finding.line_number, + "message": finding.message, + "code_snippet": finding.code_snippet, + "suggestion": finding.suggestion, + "ai_explanation": finding.ai_explanation, + "created_at": finding.created_at.isoformat(), + } + + +@router.post( + "/{finding_id}/explain", + response_model=AIExplanationResponse, + status_code=status.HTTP_200_OK, + summary="Generar explicación con IA para un hallazgo", + responses={ + 200: {"description": "Explicación generada exitosamente"}, + 404: {"description": "Hallazgo no encontrado"}, + 429: {"description": "Rate limit excedido"}, + 503: {"description": "Servicio de IA no disponible"}, + }, +) +async def explain_finding( + finding_id: UUID, + request: AIExplanationRequest = AIExplanationRequest(), + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), + service: AIExplainerService = Depends(get_ai_explainer_service), +) -> AIExplanationResponse: + """ + Genera una explicación detallada de un hallazgo usando IA generativa. + + Este endpoint: + 1. Verifica si ya existe una explicación en cache (JSONB) + 2. Si no, genera una nueva usando Vertex AI (Gemini) + 3. Almacena la explicación en cache para futuras consultas + + Reglas de Negocio: + - **RN1**: Requiere autenticación JWT + - **RN**: Rate limit de 10 requests/hora por usuario (configurable) + - **RN**: La explicación se cachea en BD para evitar regeneración + + Args: + finding_id: UUID del hallazgo a explicar + request: Opciones de la explicación + current_user: Usuario autenticado + db: Sesión de base de datos + service: Servicio de explicaciones de IA + + Returns: + AIExplanationResponse con la explicación generada + + Raises: + HTTPException 404: Si el hallazgo no existe + HTTPException 429: Si se excede el rate limit + HTTPException 503: Si el servicio de IA no está disponible + """ + # 1. Verificar que el servicio está configurado + settings = get_ai_settings() + if not settings.is_configured: + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="El servicio de IA no está configurado. " + "Configure GOOGLE_APPLICATION_CREDENTIALS.", + ) + + # 2. Buscar el hallazgo + finding_entity = ( + db.query(AgentFindingEntity).filter(AgentFindingEntity.id == finding_id).first() + ) + + if not finding_entity: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, detail=f"Hallazgo {finding_id} no encontrado" + ) + + # 3. Verificar cache (ai_explanation JSONB) + if finding_entity.ai_explanation: + logger.info(f"Returning cached AI explanation for finding {finding_id}") + cached_explanation = AIExplanation.from_dict(finding_entity.ai_explanation) + return AIExplanationResponse( + finding_id=finding_id.int, + explanation=cached_explanation, + cached=True, + ) + + # 4. Generar nueva explicación + try: + # Convertir entidad a Finding schema + finding = _entity_to_finding(finding_entity) + + # Obtener código fuente del code_review si existe + code_context = None + if finding_entity.code_review and hasattr(finding_entity.code_review, "source_code"): + code_context = finding_entity.code_review.source_code + + # Generar explicación + explanation, rate_limit_info = await service.explain_finding( + finding=finding, + code_context=code_context, + user_id=current_user.id, + ) + + # 5. Guardar en cache (JSONB) + finding_entity.ai_explanation = explanation.to_dict() + db.commit() + + logger.info( + f"AI explanation generated and cached for finding {finding_id}. " + f"Tokens used: {explanation.tokens_used}" + ) + + return AIExplanationResponse( + finding_id=finding_id.int, + explanation=explanation, + cached=False, + ) + + except RateLimitExceeded as e: + logger.warning(f"Rate limit exceeded for user {current_user.id}: {e}") + raise HTTPException( + status_code=status.HTTP_429_TOO_MANY_REQUESTS, + detail=AIExplanationError( + error_type="rate_limit", + message="Has excedido el límite de explicaciones por hora. " + f"Límite: {e.rate_limit_info.requests_limit}/hora.", + rate_limit_info=e.rate_limit_info, + ).model_dump(), + ) from e + + except ServiceAIError as e: + logger.error(f"AI service error for finding {finding_id}: {e}") + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail=AIExplanationError( + error_type="ai_error", + message=str(e), + ).model_dump(), + ) from e + + except Exception as e: + logger.error(f"Unexpected error explaining finding {finding_id}: {e}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Error interno generando explicación", + ) from e + + +@router.get( + "/{finding_id}/explain/status", + response_model=RateLimitInfo, + status_code=status.HTTP_200_OK, + summary="Obtener estado del rate limit para explicaciones", +) +async def get_rate_limit_status( + finding_id: UUID, # Solo para consistencia de URL + current_user: User = Depends(get_current_user), + service: AIExplainerService = Depends(get_ai_explainer_service), +) -> RateLimitInfo: + """ + Obtiene el estado actual del rate limit del usuario. + + Útil para mostrar al usuario cuántas explicaciones puede + solicitar antes de alcanzar el límite. + + Args: + finding_id: UUID del hallazgo (no usado, solo para URL) + current_user: Usuario autenticado + service: Servicio de explicaciones + + Returns: + RateLimitInfo con requests restantes y tiempo de reset + """ + return service.get_rate_limit_info(current_user.id) diff --git a/backend/src/schemas/ai_explanation.py b/backend/src/schemas/ai_explanation.py index 8d5c0ca..82804a8 100644 --- a/backend/src/schemas/ai_explanation.py +++ b/backend/src/schemas/ai_explanation.py @@ -1,234 +1,234 @@ -""" -Esquemas para explicaciones generadas por IA. - -Define las estructuras de datos para las explicaciones de seguridad -generadas por modelos de IA generativa (Gemini/Vertex AI). - -Principios de diseño: -- Inmutabilidad: Los esquemas son de solo lectura -- Validación: Pydantic valida todos los campos -- Serialización: Compatible con JSON para almacenamiento en JSONB -""" - -from datetime import datetime, timezone -from typing import List, Optional - -from pydantic import BaseModel, ConfigDict, Field - - -class AIExplanationRequest(BaseModel): - """ - Request para solicitar una explicación de IA. - - Attributes: - include_attack_example: Si se debe incluir ejemplo de ataque - include_references: Si se deben incluir referencias - language: Idioma de la explicación (es/en) - """ - - include_attack_example: bool = Field( - default=True, description="Incluir ejemplo de ataque potencial" - ) - include_references: bool = Field(default=True, description="Incluir referencias OWASP/CWE") - language: str = Field(default="es", description="Idioma de la explicación") - - model_config = ConfigDict( - json_schema_extra={ - "example": { - "include_attack_example": True, - "include_references": True, - "language": "es", - } - } - ) - - -class AIExplanation(BaseModel): - """ - Explicación generada por IA para un hallazgo de seguridad. - - Esta estructura se almacena en el campo JSONB 'ai_explanation' - de AgentFindingEntity para cache persistente. - - Attributes: - explanation: Explicación detallada del problema de seguridad - suggested_fix: Código sugerido para corregir el problema - attack_example: Ejemplo de cómo podría explotarse (opcional) - references: Lista de referencias OWASP, CWE, etc. (opcional) - model_used: Nombre del modelo que generó la explicación - tokens_used: Número de tokens consumidos - generated_at: Timestamp de generación - - Example: - explanation = AIExplanation( - explanation="El uso de eval() permite ejecución de código...", - suggested_fix="import ast\\nresult = ast.literal_eval(user_input)", - attack_example="user_input = '__import__(\"os\").system(\"rm -rf /\")'", - references=["OWASP A03:2021", "CWE-94"], - model_used="gemini-1.5-flash-001", - tokens_used=450 - ) - """ - - explanation: str = Field(..., min_length=10, description="Explicación detallada del problema") - suggested_fix: str = Field(..., min_length=5, description="Código sugerido para corregir") - attack_example: Optional[str] = Field( - default=None, description="Ejemplo de explotación potencial" - ) - references: Optional[List[str]] = Field( - default=None, description="Referencias OWASP, CWE, etc." - ) - model_used: str = Field(..., description="Modelo de IA usado") - tokens_used: int = Field(..., ge=0, description="Tokens consumidos") - generated_at: datetime = Field( - default_factory=lambda: datetime.now(timezone.utc), - description="Timestamp de generación", - ) - - model_config = ConfigDict( - json_schema_extra={ - "example": { - "explanation": ( - "El uso de eval() en Python es extremadamente peligroso porque " - "permite la ejecución arbitraria de código. Un atacante podría " - "inyectar código malicioso que se ejecutaría con los privilegios " - "del proceso actual." - ), - "suggested_fix": ( - "import ast\n\n" - "# Usar literal_eval para evaluar literales de forma segura\n" - "result = ast.literal_eval(user_input)" - ), - "attack_example": ( - "# Un atacante podría enviar:\n" - 'user_input = \'__import__("os").system("cat /etc/passwd")\'' - ), - "references": ["OWASP A03:2021 - Injection", "CWE-94: Code Injection"], - "model_used": "gemini-1.5-flash-001", - "tokens_used": 450, - "generated_at": "2024-01-15T10:30:00Z", - } - } - ) - - def to_dict(self) -> dict: - """ - Convierte a diccionario para almacenamiento en JSONB. - - Returns: - Diccionario serializable - """ - return { - "explanation": self.explanation, - "suggested_fix": self.suggested_fix, - "attack_example": self.attack_example, - "references": self.references, - "model_used": self.model_used, - "tokens_used": self.tokens_used, - "generated_at": self.generated_at.isoformat(), - } - - @classmethod - def from_dict(cls, data: dict) -> "AIExplanation": - """ - Crea instancia desde diccionario (recuperado de JSONB). - - Args: - data: Diccionario con datos de la explicación - - Returns: - Instancia de AIExplanation - """ - # Convertir string ISO a datetime si es necesario - generated_at = data.get("generated_at") - if isinstance(generated_at, str): - data["generated_at"] = datetime.fromisoformat(generated_at.replace("Z", "+00:00")) - - return cls(**data) - - -class AIExplanationResponse(BaseModel): - """ - Response con la explicación de IA para el endpoint. - - Attributes: - finding_id: ID del hallazgo explicado - explanation: La explicación generada - cached: Si la explicación viene de cache - """ - - finding_id: int = Field(..., description="ID del hallazgo") - explanation: AIExplanation = Field(..., description="Explicación generada") - cached: bool = Field(..., description="Si viene de cache") - - model_config = ConfigDict( - json_schema_extra={ - "example": { - "finding_id": 123, - "explanation": { - "explanation": "El uso de eval() es peligroso...", - "suggested_fix": "Usar ast.literal_eval()", - "attack_example": "user_input = '__import__(\"os\")...'", - "references": ["OWASP A03:2021"], - "model_used": "gemini-1.5-flash-001", - "tokens_used": 450, - "generated_at": "2024-01-15T10:30:00Z", - }, - "cached": False, - } - } - ) - - -class RateLimitInfo(BaseModel): - """ - Información sobre el rate limit del usuario. - - Attributes: - requests_remaining: Requests restantes en el período - requests_limit: Límite total de requests - reset_at: Cuando se resetea el contador - """ - - requests_remaining: int = Field(..., ge=0, description="Requests restantes") - requests_limit: int = Field(..., ge=0, description="Límite total") - reset_at: datetime = Field(..., description="Hora de reset") - - model_config = ConfigDict( - json_schema_extra={ - "example": { - "requests_remaining": 8, - "requests_limit": 10, - "reset_at": "2024-01-15T11:00:00Z", - } - } - ) - - -class AIExplanationError(BaseModel): - """ - Error en la generación de explicación. - - Attributes: - error_type: Tipo de error (rate_limit, ai_error, not_found) - message: Mensaje descriptivo - rate_limit_info: Info de rate limit si aplica - """ - - error_type: str = Field(..., description="Tipo de error") - message: str = Field(..., description="Mensaje de error") - rate_limit_info: Optional[RateLimitInfo] = Field(default=None, description="Info de rate limit") - - model_config = ConfigDict( - json_schema_extra={ - "example": { - "error_type": "rate_limit", - "message": "Has excedido el límite de explicaciones por hora", - "rate_limit_info": { - "requests_remaining": 0, - "requests_limit": 10, - "reset_at": "2024-01-15T11:00:00Z", - }, - } - } - ) +""" +Esquemas para explicaciones generadas por IA. + +Define las estructuras de datos para las explicaciones de seguridad +generadas por modelos de IA generativa (Gemini/Vertex AI). + +Principios de diseño: +- Inmutabilidad: Los esquemas son de solo lectura +- Validación: Pydantic valida todos los campos +- Serialización: Compatible con JSON para almacenamiento en JSONB +""" + +from datetime import datetime, timezone +from typing import List, Optional + +from pydantic import BaseModel, ConfigDict, Field + + +class AIExplanationRequest(BaseModel): + """ + Request para solicitar una explicación de IA. + + Attributes: + include_attack_example: Si se debe incluir ejemplo de ataque + include_references: Si se deben incluir referencias + language: Idioma de la explicación (es/en) + """ + + include_attack_example: bool = Field( + default=True, description="Incluir ejemplo de ataque potencial" + ) + include_references: bool = Field(default=True, description="Incluir referencias OWASP/CWE") + language: str = Field(default="es", description="Idioma de la explicación") + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "include_attack_example": True, + "include_references": True, + "language": "es", + } + } + ) + + +class AIExplanation(BaseModel): + """ + Explicación generada por IA para un hallazgo de seguridad. + + Esta estructura se almacena en el campo JSONB 'ai_explanation' + de AgentFindingEntity para cache persistente. + + Attributes: + explanation: Explicación detallada del problema de seguridad + suggested_fix: Código sugerido para corregir el problema + attack_example: Ejemplo de cómo podría explotarse (opcional) + references: Lista de referencias OWASP, CWE, etc. (opcional) + model_used: Nombre del modelo que generó la explicación + tokens_used: Número de tokens consumidos + generated_at: Timestamp de generación + + Example: + explanation = AIExplanation( + explanation="El uso de eval() permite ejecución de código...", + suggested_fix="import ast\\nresult = ast.literal_eval(user_input)", + attack_example="user_input = '__import__(\"os\").system(\"rm -rf /\")'", + references=["OWASP A03:2021", "CWE-94"], + model_used="gemini-1.5-flash-001", + tokens_used=450 + ) + """ + + explanation: str = Field(..., min_length=10, description="Explicación detallada del problema") + suggested_fix: str = Field(..., min_length=5, description="Código sugerido para corregir") + attack_example: Optional[str] = Field( + default=None, description="Ejemplo de explotación potencial" + ) + references: Optional[List[str]] = Field( + default=None, description="Referencias OWASP, CWE, etc." + ) + model_used: str = Field(..., description="Modelo de IA usado") + tokens_used: int = Field(..., ge=0, description="Tokens consumidos") + generated_at: datetime = Field( + default_factory=lambda: datetime.now(timezone.utc), + description="Timestamp de generación", + ) + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "explanation": ( + "El uso de eval() en Python es extremadamente peligroso porque " + "permite la ejecución arbitraria de código. Un atacante podría " + "inyectar código malicioso que se ejecutaría con los privilegios " + "del proceso actual." + ), + "suggested_fix": ( + "import ast\n\n" + "# Usar literal_eval para evaluar literales de forma segura\n" + "result = ast.literal_eval(user_input)" + ), + "attack_example": ( + "# Un atacante podría enviar:\n" + 'user_input = \'__import__("os").system("cat /etc/passwd")\'' + ), + "references": ["OWASP A03:2021 - Injection", "CWE-94: Code Injection"], + "model_used": "gemini-1.5-flash-001", + "tokens_used": 450, + "generated_at": "2024-01-15T10:30:00Z", + } + } + ) + + def to_dict(self) -> dict: + """ + Convierte a diccionario para almacenamiento en JSONB. + + Returns: + Diccionario serializable + """ + return { + "explanation": self.explanation, + "suggested_fix": self.suggested_fix, + "attack_example": self.attack_example, + "references": self.references, + "model_used": self.model_used, + "tokens_used": self.tokens_used, + "generated_at": self.generated_at.isoformat(), + } + + @classmethod + def from_dict(cls, data: dict) -> "AIExplanation": + """ + Crea instancia desde diccionario (recuperado de JSONB). + + Args: + data: Diccionario con datos de la explicación + + Returns: + Instancia de AIExplanation + """ + # Convertir string ISO a datetime si es necesario + generated_at = data.get("generated_at") + if isinstance(generated_at, str): + data["generated_at"] = datetime.fromisoformat(generated_at.replace("Z", "+00:00")) + + return cls(**data) + + +class AIExplanationResponse(BaseModel): + """ + Response con la explicación de IA para el endpoint. + + Attributes: + finding_id: ID del hallazgo explicado + explanation: La explicación generada + cached: Si la explicación viene de cache + """ + + finding_id: int = Field(..., description="ID del hallazgo") + explanation: AIExplanation = Field(..., description="Explicación generada") + cached: bool = Field(..., description="Si viene de cache") + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "finding_id": 123, + "explanation": { + "explanation": "El uso de eval() es peligroso...", + "suggested_fix": "Usar ast.literal_eval()", + "attack_example": "user_input = '__import__(\"os\")...'", + "references": ["OWASP A03:2021"], + "model_used": "gemini-1.5-flash-001", + "tokens_used": 450, + "generated_at": "2024-01-15T10:30:00Z", + }, + "cached": False, + } + } + ) + + +class RateLimitInfo(BaseModel): + """ + Información sobre el rate limit del usuario. + + Attributes: + requests_remaining: Requests restantes en el período + requests_limit: Límite total de requests + reset_at: Cuando se resetea el contador + """ + + requests_remaining: int = Field(..., ge=0, description="Requests restantes") + requests_limit: int = Field(..., ge=0, description="Límite total") + reset_at: datetime = Field(..., description="Hora de reset") + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "requests_remaining": 8, + "requests_limit": 10, + "reset_at": "2024-01-15T11:00:00Z", + } + } + ) + + +class AIExplanationError(BaseModel): + """ + Error en la generación de explicación. + + Attributes: + error_type: Tipo de error (rate_limit, ai_error, not_found) + message: Mensaje descriptivo + rate_limit_info: Info de rate limit si aplica + """ + + error_type: str = Field(..., description="Tipo de error") + message: str = Field(..., description="Mensaje de error") + rate_limit_info: Optional[RateLimitInfo] = Field(default=None, description="Info de rate limit") + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "error_type": "rate_limit", + "message": "Has excedido el límite de explicaciones por hora", + "rate_limit_info": { + "requests_remaining": 0, + "requests_limit": 10, + "reset_at": "2024-01-15T11:00:00Z", + }, + } + } + ) diff --git a/backend/src/schemas/analysis.py b/backend/src/schemas/analysis.py index 2974b13..5c61afe 100644 --- a/backend/src/schemas/analysis.py +++ b/backend/src/schemas/analysis.py @@ -1,281 +1,281 @@ -""" -Esquemas de análisis usando Pydantic v2 -""" - -import ast as python_ast -from datetime import datetime, timezone -from textwrap import dedent -from typing import Any, Dict, List, Optional -from uuid import UUID, uuid4 - -from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, field_validator, model_validator - -from src.models.enums.review_status import ReviewStatus -from src.schemas.finding import Finding - - -class AnalysisContext(BaseModel): - """ - Contexto de análisis que encapsula toda la información de un análisis. - - Se pasa entre agentes para que cada uno realice su análisis específico. - - Attributes: - code_content: Código Python a analizar - filename: Nombre del archivo (debe terminar en .py) - language: Lenguaje de programación (default: python) - analysis_id: UUID único del análisis - metadata: Información adicional (usuario, timestamp, etc.) - created_at: Timestamp UTC de creación - - Example: - context = AnalysisContext( - code_content="def hello():\n print('Hello')", - filename="app.py", - metadata={"user_id": "123"} - ) - """ - - code_content: str = Field(..., min_length=1, description="Código Python a analizar") - filename: str = Field( - ..., min_length=3, description="Nombre del archivo (debe terminar en .py)" - ) - language: str = Field(default="python", description="Lenguaje de programación") - analysis_id: UUID = Field(default_factory=uuid4, description="ID único del análisis") - metadata: Dict[str, Any] = Field(default_factory=dict, description="Información adicional") - created_at: datetime = Field( - default_factory=lambda: datetime.now(timezone.utc), description="Timestamp UTC de creación" - ) - - # Se Usa PrivateAttr en Pydantic v2 por sugerencia - _ast_cache: Optional[python_ast.Module] = PrivateAttr(default=None) - _lines_cache: Optional[List[str]] = PrivateAttr(default=None) - - model_config = ConfigDict( - arbitrary_types_allowed=True, - json_schema_extra={ - "example": { - "code_content": "def hello():\n print('Hello World')", - "filename": "example.py", - "language": "python", - "metadata": {"user_id": "123", "project": "CodeGuard"}, - } - }, - ) - - @field_validator("code_content") - @classmethod - def validate_code_content(cls, v: str) -> str: - """Valida que el código no esté vacío.""" - if not v or not v.strip(): - raise ValueError("code_content cannot be empty or whitespace only") - return v - - @field_validator("filename") - @classmethod - def validate_filename(cls, v: str) -> str: - """Valida que sea archivo Python.""" - if not v.endswith(".py"): - raise ValueError("Only Python files (.py) are supported") - if not v or len(v) < 3: - raise ValueError("filename must be at least 3 characters") - return v - - @model_validator(mode="after") - def _normalize_code_content(self) -> "AnalysisContext": - """ - Normaliza el código eliminando la indentación común para evitar - SyntaxError cuando se parsean fixtures con sangría artificial. - """ - self.code_content = dedent(self.code_content) - return self - - @property - def line_count(self) -> int: - """Retorna el número de líneas del código.""" - return len(self.code_content.splitlines()) # pylint: disable=no-member - - @property - def char_count(self) -> int: - """Retorna el número de caracteres del código.""" - return len(self.code_content) - - def add_metadata(self, key: str, value: Any) -> None: - """ - Agrega una entrada a la metadata del contexto. - - Args: - key: Clave de la metadata - value: Valor de la metadata - """ - self.metadata[key] = value - - def get_ast(self) -> python_ast.Module: - """ - Retorna el AST parseado del código (lazy loading). - - Returns: - AST Module del código Python - - Raises: - SyntaxError: Si el código no es Python válido - """ - if self._ast_cache is None: - try: - self._ast_cache = python_ast.parse(self.code_content, filename=self.filename) - except SyntaxError as e: - raise SyntaxError(f"Invalid Python syntax in {self.filename}: {e}") from e - return self._ast_cache - - def get_lines(self) -> List[str]: - """ - Retorna el código como lista de líneas (lazy loading). - - Returns: - Lista de strings, una por línea - """ - if self._lines_cache is None: - self._lines_cache = self.code_content.splitlines() # pylint: disable=no-member - return self._lines_cache - - def get_line(self, line_number: int) -> Optional[str]: - """ - Retorna una línea específica del código (1-based indexing). - - Args: - line_number: Número de línea (1-based) - - Returns: - String con la línea o None si no existe - """ - lines = self.get_lines() - if 1 <= line_number <= len(lines): - return lines[line_number - 1] - return None - - def get_code_snippet(self, start_line: int, end_line: int) -> str: - """ - Retorna un fragmento de código entre líneas. - - Args: - start_line: Línea inicial (1-based, inclusiva) - end_line: Línea final (1-based, inclusiva) - - Returns: - String con el fragmento de código - """ - lines = self.get_lines() - start_idx = max(0, start_line - 1) - end_idx = min(len(lines), end_line) - return "\n".join(lines[start_idx:end_idx]) - - -class AnalysisRequest(BaseModel): - """ - Request para iniciar un análisis de código. - - Attributes: - filename: Nombre del archivo - code_content: Código a analizar - agents_config: Configuración de qué agentes ejecutar - """ - - filename: str = Field(..., min_length=3, description="Nombre del archivo") - code_content: str = Field(..., min_length=1, description="Código a analizar") - agents_config: Optional[Dict[str, bool]] = Field( - default=None, description="Qué agentes ejecutar" - ) - - model_config = ConfigDict( - json_schema_extra={ - "example": { - "filename": "app.py", - "code_content": "import os\n\ndef main():\n pass", - "agents_config": { - "security": True, - "quality": True, - "performance": False, - "style": True, - }, - } - } - ) - - -class AnalysisResponse(BaseModel): - """ - Response cuando se inicia un análisis. - - Attributes: - analysis_id: UUID del análisis - filename: Nombre del archivo - status: Estado actual (pending, processing, completed, failed) - created_at: Timestamp de creación - """ - - analysis_id: UUID = Field(..., description="ID único del análisis") - filename: str = Field(..., description="Nombre del archivo") - status: str = Field(..., description="Estado del análisis") - quality_score: int = Field(..., ge=0, le=100, description="Puntaje de calidad") - total_findings: int = Field(..., ge=0, description="Total de hallazgos") - created_at: datetime = Field(..., description="Timestamp de creación") - - model_config = ConfigDict( - json_schema_extra={ - "example": { - "analysis_id": "550e8400-e29b-41d4-a716-446655440000", - "filename": "app.py", - "status": "pending", - "created_at": "2025-11-06T21:00:00Z", - } - } - ) - - -class CodeReview(BaseModel): - """ - Modelo de dominio para una revisión de código completa. - - Este modelo representa la información que fluye entre la capa de persistencia - y la capa de servicio. Contiene el código desencriptado listo para ser usado. - - Attributes: - id: Identificador único de la revisión. - user_id: ID del usuario propietario. - filename: Nombre del archivo analizado. - code_content: Contenido del código fuente (texto plano). - quality_score: Puntaje de calidad calculado (0-100). - status: Estado actual del análisis. - total_findings: Cantidad total de hallazgos detectados. - created_at: Fecha de creación. - completed_at: Fecha de finalización (opcional). - """ - - id: UUID = Field(..., description="ID único de la revisión") - user_id: str = Field(..., description="ID del usuario propietario (Clerk ID)") - filename: str = Field(..., description="Nombre del archivo analizado") - code_content: str = Field(..., description="Contenido del código fuente desencriptado") - quality_score: int = Field(..., ge=0, le=100, description="Puntaje de calidad (0-100)") - status: ReviewStatus = Field(..., description="Estado actual del análisis") - total_findings: int = Field(default=0, ge=0, description="Total de hallazgos encontrados") - findings: List[Finding] = Field( - default_factory=list, description="Lista de hallazgos detallados" - ) - created_at: datetime = Field(..., description="Fecha de creación del análisis") - completed_at: Optional[datetime] = Field(default=None, description="Fecha de finalización") - - model_config = ConfigDict( - from_attributes=True, - json_schema_extra={ - "example": { - "id": "123e4567-e89b-12d3-a456-426614174000", - "user_id": "user_2819", - "filename": "main.py", - "code_content": "print('Hello World')", - "quality_score": 85, - "status": "completed", - "total_findings": 3, - "created_at": "2025-11-22T10:00:00Z", - } - }, - ) +""" +Esquemas de análisis usando Pydantic v2 +""" + +import ast as python_ast +from datetime import datetime, timezone +from textwrap import dedent +from typing import Any, Dict, List, Optional +from uuid import UUID, uuid4 + +from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, field_validator, model_validator + +from src.models.enums.review_status import ReviewStatus +from src.schemas.finding import Finding + + +class AnalysisContext(BaseModel): + """ + Contexto de análisis que encapsula toda la información de un análisis. + + Se pasa entre agentes para que cada uno realice su análisis específico. + + Attributes: + code_content: Código Python a analizar + filename: Nombre del archivo (debe terminar en .py) + language: Lenguaje de programación (default: python) + analysis_id: UUID único del análisis + metadata: Información adicional (usuario, timestamp, etc.) + created_at: Timestamp UTC de creación + + Example: + context = AnalysisContext( + code_content="def hello():\n print('Hello')", + filename="app.py", + metadata={"user_id": "123"} + ) + """ + + code_content: str = Field(..., min_length=1, description="Código Python a analizar") + filename: str = Field( + ..., min_length=3, description="Nombre del archivo (debe terminar en .py)" + ) + language: str = Field(default="python", description="Lenguaje de programación") + analysis_id: UUID = Field(default_factory=uuid4, description="ID único del análisis") + metadata: Dict[str, Any] = Field(default_factory=dict, description="Información adicional") + created_at: datetime = Field( + default_factory=lambda: datetime.now(timezone.utc), description="Timestamp UTC de creación" + ) + + # Se Usa PrivateAttr en Pydantic v2 por sugerencia + _ast_cache: Optional[python_ast.Module] = PrivateAttr(default=None) + _lines_cache: Optional[List[str]] = PrivateAttr(default=None) + + model_config = ConfigDict( + arbitrary_types_allowed=True, + json_schema_extra={ + "example": { + "code_content": "def hello():\n print('Hello World')", + "filename": "example.py", + "language": "python", + "metadata": {"user_id": "123", "project": "CodeGuard"}, + } + }, + ) + + @field_validator("code_content") + @classmethod + def validate_code_content(cls, v: str) -> str: + """Valida que el código no esté vacío.""" + if not v or not v.strip(): + raise ValueError("code_content cannot be empty or whitespace only") + return v + + @field_validator("filename") + @classmethod + def validate_filename(cls, v: str) -> str: + """Valida que sea archivo Python.""" + if not v.endswith(".py"): + raise ValueError("Only Python files (.py) are supported") + if not v or len(v) < 3: + raise ValueError("filename must be at least 3 characters") + return v + + @model_validator(mode="after") + def _normalize_code_content(self) -> "AnalysisContext": + """ + Normaliza el código eliminando la indentación común para evitar + SyntaxError cuando se parsean fixtures con sangría artificial. + """ + self.code_content = dedent(self.code_content) + return self + + @property + def line_count(self) -> int: + """Retorna el número de líneas del código.""" + return len(self.code_content.splitlines()) # pylint: disable=no-member + + @property + def char_count(self) -> int: + """Retorna el número de caracteres del código.""" + return len(self.code_content) + + def add_metadata(self, key: str, value: Any) -> None: + """ + Agrega una entrada a la metadata del contexto. + + Args: + key: Clave de la metadata + value: Valor de la metadata + """ + self.metadata[key] = value + + def get_ast(self) -> python_ast.Module: + """ + Retorna el AST parseado del código (lazy loading). + + Returns: + AST Module del código Python + + Raises: + SyntaxError: Si el código no es Python válido + """ + if self._ast_cache is None: + try: + self._ast_cache = python_ast.parse(self.code_content, filename=self.filename) + except SyntaxError as e: + raise SyntaxError(f"Invalid Python syntax in {self.filename}: {e}") from e + return self._ast_cache + + def get_lines(self) -> List[str]: + """ + Retorna el código como lista de líneas (lazy loading). + + Returns: + Lista de strings, una por línea + """ + if self._lines_cache is None: + self._lines_cache = self.code_content.splitlines() # pylint: disable=no-member + return self._lines_cache + + def get_line(self, line_number: int) -> Optional[str]: + """ + Retorna una línea específica del código (1-based indexing). + + Args: + line_number: Número de línea (1-based) + + Returns: + String con la línea o None si no existe + """ + lines = self.get_lines() + if 1 <= line_number <= len(lines): + return lines[line_number - 1] + return None + + def get_code_snippet(self, start_line: int, end_line: int) -> str: + """ + Retorna un fragmento de código entre líneas. + + Args: + start_line: Línea inicial (1-based, inclusiva) + end_line: Línea final (1-based, inclusiva) + + Returns: + String con el fragmento de código + """ + lines = self.get_lines() + start_idx = max(0, start_line - 1) + end_idx = min(len(lines), end_line) + return "\n".join(lines[start_idx:end_idx]) + + +class AnalysisRequest(BaseModel): + """ + Request para iniciar un análisis de código. + + Attributes: + filename: Nombre del archivo + code_content: Código a analizar + agents_config: Configuración de qué agentes ejecutar + """ + + filename: str = Field(..., min_length=3, description="Nombre del archivo") + code_content: str = Field(..., min_length=1, description="Código a analizar") + agents_config: Optional[Dict[str, bool]] = Field( + default=None, description="Qué agentes ejecutar" + ) + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "filename": "app.py", + "code_content": "import os\n\ndef main():\n pass", + "agents_config": { + "security": True, + "quality": True, + "performance": False, + "style": True, + }, + } + } + ) + + +class AnalysisResponse(BaseModel): + """ + Response cuando se inicia un análisis. + + Attributes: + analysis_id: UUID del análisis + filename: Nombre del archivo + status: Estado actual (pending, processing, completed, failed) + created_at: Timestamp de creación + """ + + analysis_id: UUID = Field(..., description="ID único del análisis") + filename: str = Field(..., description="Nombre del archivo") + status: str = Field(..., description="Estado del análisis") + quality_score: int = Field(..., ge=0, le=100, description="Puntaje de calidad") + total_findings: int = Field(..., ge=0, description="Total de hallazgos") + created_at: datetime = Field(..., description="Timestamp de creación") + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "analysis_id": "550e8400-e29b-41d4-a716-446655440000", + "filename": "app.py", + "status": "pending", + "created_at": "2025-11-06T21:00:00Z", + } + } + ) + + +class CodeReview(BaseModel): + """ + Modelo de dominio para una revisión de código completa. + + Este modelo representa la información que fluye entre la capa de persistencia + y la capa de servicio. Contiene el código desencriptado listo para ser usado. + + Attributes: + id: Identificador único de la revisión. + user_id: ID del usuario propietario. + filename: Nombre del archivo analizado. + code_content: Contenido del código fuente (texto plano). + quality_score: Puntaje de calidad calculado (0-100). + status: Estado actual del análisis. + total_findings: Cantidad total de hallazgos detectados. + created_at: Fecha de creación. + completed_at: Fecha de finalización (opcional). + """ + + id: UUID = Field(..., description="ID único de la revisión") + user_id: str = Field(..., description="ID del usuario propietario (Clerk ID)") + filename: str = Field(..., description="Nombre del archivo analizado") + code_content: str = Field(..., description="Contenido del código fuente desencriptado") + quality_score: int = Field(..., ge=0, le=100, description="Puntaje de calidad (0-100)") + status: ReviewStatus = Field(..., description="Estado actual del análisis") + total_findings: int = Field(default=0, ge=0, description="Total de hallazgos encontrados") + findings: List[Finding] = Field( + default_factory=list, description="Lista de hallazgos detallados" + ) + created_at: datetime = Field(..., description="Fecha de creación del análisis") + completed_at: Optional[datetime] = Field(default=None, description="Fecha de finalización") + + model_config = ConfigDict( + from_attributes=True, + json_schema_extra={ + "example": { + "id": "123e4567-e89b-12d3-a456-426614174000", + "user_id": "user_2819", + "filename": "main.py", + "code_content": "print('Hello World')", + "quality_score": 85, + "status": "completed", + "total_findings": 3, + "created_at": "2025-11-22T10:00:00Z", + } + }, + ) diff --git a/backend/src/schemas/finding.py b/backend/src/schemas/finding.py index e535619..c684fa6 100644 --- a/backend/src/schemas/finding.py +++ b/backend/src/schemas/finding.py @@ -1,168 +1,168 @@ -""" -Esquemas para hallazgos encontrados en análisis -""" - -from __future__ import annotations - -from datetime import datetime, timezone -from enum import Enum -from typing import Any, ClassVar, Dict, Optional, cast - -from pydantic import BaseModel, ConfigDict, Field - - -class Severity(str, Enum): - """ - Niveles de severidad de un hallazgo. - - CRITICAL: Riesgo inmediato, debe corregirse - HIGH: Importante, debe corregirse pronto - MEDIUM: Moderado, se recomienda corrección - LOW: Menor, mejora opcional - INFO: Información, no es un problema - """ - - CRITICAL = "CRITICAL" - HIGH = "HIGH" - MEDIUM = "MEDIUM" - LOW = "LOW" - INFO = "INFO" - - -class Finding(BaseModel): - """ - Hallazgo encontrado durante el análisis de código. - - Attributes: - severity: Nivel de severidad del hallazgo - issue_type: Tipo de problema (ej: dangerous_function, sql_injection) - message: Descripción del problema - line_number: Número de línea donde se encontró (1-based) - agent_name: Nombre del agente que detectó el hallazgo - code_snippet: Fragmento de código problemático (opcional) - suggestion: Sugerencia de cómo corregir (opcional) - rule_id: ID de la regla que se violó (opcional) - detected_at: Timestamp de detección - - Example: - finding = Finding( - severity=Severity.CRITICAL, - issue_type="dangerous_function", - message="Use of eval() detected", - line_number=10, - agent_name="SecurityAgent", - code_snippet="result = eval(user_input)", - suggestion="Use ast.literal_eval() instead", - rule_id="SEC001_EVAL" - ) - """ - - severity: Severity = Field(..., description="Nivel de severidad") - issue_type: str = Field(..., min_length=1, description="Tipo de problema") - message: str = Field(..., min_length=5, description="Descripción del problema") - line_number: int = Field(..., ge=1, description="Número de línea (1-based)") - agent_name: str = Field(..., min_length=1, description="Nombre del agente") - code_snippet: Optional[str] = Field(default=None, description="Fragmento de código") - suggestion: Optional[str] = Field(default=None, description="Sugerencia de corrección") - rule_id: Optional[str] = Field(default=None, description="ID de la regla") - detected_at: datetime = Field( - default_factory=lambda: datetime.now(timezone.utc), description="Timestamp de detección" - ) - - model_config = ConfigDict( - json_schema_extra={ - "example": { - "severity": "CRITICAL", - "issue_type": "dangerous_function", - "message": "Use of eval() detected", - "line_number": 10, - "agent_name": "SecurityAgent", - "code_snippet": "result = eval(user_input)", - "suggestion": "Use ast.literal_eval() instead", - "rule_id": "SEC001_EVAL", - } - } - ) - - PENALTY_BY_SEVERITY: ClassVar[Dict[Severity, int]] = { - Severity.CRITICAL: 10, - Severity.HIGH: 5, - Severity.MEDIUM: 2, - Severity.LOW: 1, - Severity.INFO: 0, - } - - @property - def is_critical(self) -> bool: - """Retorna True si el hallazgo es crítico.""" - return self.severity == Severity.CRITICAL - - @property - def is_high_or_critical(self) -> bool: - """Retorna True si el hallazgo es HIGH o CRITICAL.""" - return self.severity in (Severity.CRITICAL, Severity.HIGH) - - @property - def is_actionable(self) -> bool: - """Retorna True si el hallazgo requiere acción (no INFO).""" - return self.severity != Severity.INFO - - @classmethod - def from_dict(cls, data: dict) -> "Finding": - """ - Crea un Finding desde un diccionario. - - Args: - data: Diccionario con datos del finding - - Returns: - Instancia de Finding - """ - detected_at_str = data.get("detected_at") - detected_at = ( - datetime.fromisoformat(detected_at_str) - if detected_at_str - else datetime.now(timezone.utc) - ) - return cls( - severity=Severity(data["severity"]), - issue_type=data["issue_type"], - message=data["message"], - line_number=data["line_number"], - agent_name=data["agent_name"], - code_snippet=data.get("code_snippet"), - suggestion=data.get("suggestion"), - rule_id=data.get("rule_id"), - detected_at=detected_at, - ) - - def to_dict(self) -> Dict[str, Any]: - """ - Convierte el Finding a diccionario para persistencia. - - Returns: - Diccionario con todos los campos del finding - """ - severity_value = cast(Severity, self.severity).value - detected_at_value = cast(datetime, self.detected_at) - - return { - "severity": severity_value, - "issue_type": self.issue_type, - "message": self.message, - "line_number": self.line_number, - "agent_name": self.agent_name, - "code_snippet": self.code_snippet, - "suggestion": self.suggestion, - "rule_id": self.rule_id, - "detected_at": detected_at_value.isoformat(), - } - - def calculate_penalty(self) -> int: - """ - Calcula el penalty para el quality score según severidad. - - Returns: - Penalty points (CRITICAL=10, HIGH=5, MEDIUM=2, LOW=1, INFO=0) - """ - return self.PENALTY_BY_SEVERITY.get(self.severity, 0) +""" +Esquemas para hallazgos encontrados en análisis +""" + +from __future__ import annotations + +from datetime import datetime, timezone +from enum import Enum +from typing import Any, ClassVar, Dict, Optional, cast + +from pydantic import BaseModel, ConfigDict, Field + + +class Severity(str, Enum): + """ + Niveles de severidad de un hallazgo. + + CRITICAL: Riesgo inmediato, debe corregirse + HIGH: Importante, debe corregirse pronto + MEDIUM: Moderado, se recomienda corrección + LOW: Menor, mejora opcional + INFO: Información, no es un problema + """ + + CRITICAL = "CRITICAL" + HIGH = "HIGH" + MEDIUM = "MEDIUM" + LOW = "LOW" + INFO = "INFO" + + +class Finding(BaseModel): + """ + Hallazgo encontrado durante el análisis de código. + + Attributes: + severity: Nivel de severidad del hallazgo + issue_type: Tipo de problema (ej: dangerous_function, sql_injection) + message: Descripción del problema + line_number: Número de línea donde se encontró (1-based) + agent_name: Nombre del agente que detectó el hallazgo + code_snippet: Fragmento de código problemático (opcional) + suggestion: Sugerencia de cómo corregir (opcional) + rule_id: ID de la regla que se violó (opcional) + detected_at: Timestamp de detección + + Example: + finding = Finding( + severity=Severity.CRITICAL, + issue_type="dangerous_function", + message="Use of eval() detected", + line_number=10, + agent_name="SecurityAgent", + code_snippet="result = eval(user_input)", + suggestion="Use ast.literal_eval() instead", + rule_id="SEC001_EVAL" + ) + """ + + severity: Severity = Field(..., description="Nivel de severidad") + issue_type: str = Field(..., min_length=1, description="Tipo de problema") + message: str = Field(..., min_length=5, description="Descripción del problema") + line_number: int = Field(..., ge=1, description="Número de línea (1-based)") + agent_name: str = Field(..., min_length=1, description="Nombre del agente") + code_snippet: Optional[str] = Field(default=None, description="Fragmento de código") + suggestion: Optional[str] = Field(default=None, description="Sugerencia de corrección") + rule_id: Optional[str] = Field(default=None, description="ID de la regla") + detected_at: datetime = Field( + default_factory=lambda: datetime.now(timezone.utc), description="Timestamp de detección" + ) + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "severity": "CRITICAL", + "issue_type": "dangerous_function", + "message": "Use of eval() detected", + "line_number": 10, + "agent_name": "SecurityAgent", + "code_snippet": "result = eval(user_input)", + "suggestion": "Use ast.literal_eval() instead", + "rule_id": "SEC001_EVAL", + } + } + ) + + PENALTY_BY_SEVERITY: ClassVar[Dict[Severity, int]] = { + Severity.CRITICAL: 10, + Severity.HIGH: 5, + Severity.MEDIUM: 2, + Severity.LOW: 1, + Severity.INFO: 0, + } + + @property + def is_critical(self) -> bool: + """Retorna True si el hallazgo es crítico.""" + return self.severity == Severity.CRITICAL + + @property + def is_high_or_critical(self) -> bool: + """Retorna True si el hallazgo es HIGH o CRITICAL.""" + return self.severity in (Severity.CRITICAL, Severity.HIGH) + + @property + def is_actionable(self) -> bool: + """Retorna True si el hallazgo requiere acción (no INFO).""" + return self.severity != Severity.INFO + + @classmethod + def from_dict(cls, data: dict) -> "Finding": + """ + Crea un Finding desde un diccionario. + + Args: + data: Diccionario con datos del finding + + Returns: + Instancia de Finding + """ + detected_at_str = data.get("detected_at") + detected_at = ( + datetime.fromisoformat(detected_at_str) + if detected_at_str + else datetime.now(timezone.utc) + ) + return cls( + severity=Severity(data["severity"]), + issue_type=data["issue_type"], + message=data["message"], + line_number=data["line_number"], + agent_name=data["agent_name"], + code_snippet=data.get("code_snippet"), + suggestion=data.get("suggestion"), + rule_id=data.get("rule_id"), + detected_at=detected_at, + ) + + def to_dict(self) -> Dict[str, Any]: + """ + Convierte el Finding a diccionario para persistencia. + + Returns: + Diccionario con todos los campos del finding + """ + severity_value = cast(Severity, self.severity).value + detected_at_value = cast(datetime, self.detected_at) + + return { + "severity": severity_value, + "issue_type": self.issue_type, + "message": self.message, + "line_number": self.line_number, + "agent_name": self.agent_name, + "code_snippet": self.code_snippet, + "suggestion": self.suggestion, + "rule_id": self.rule_id, + "detected_at": detected_at_value.isoformat(), + } + + def calculate_penalty(self) -> int: + """ + Calcula el penalty para el quality score según severidad. + + Returns: + Penalty points (CRITICAL=10, HIGH=5, MEDIUM=2, LOW=1, INFO=0) + """ + return self.PENALTY_BY_SEVERITY.get(self.severity, 0) diff --git a/backend/src/schemas/user.py b/backend/src/schemas/user.py index 887a9d4..522a51d 100644 --- a/backend/src/schemas/user.py +++ b/backend/src/schemas/user.py @@ -1,44 +1,44 @@ -""" -Esquemas de usuario para CodeGuard AI -""" - -from enum import Enum -from typing import Optional - -from pydantic import BaseModel, ConfigDict, EmailStr, Field - - -class Role(str, Enum): - """Roles de usuario.""" - - DEVELOPER = "developer" - ADMIN = "admin" - - -class User(BaseModel): - """ - Modelo de usuario autenticado. - - Attributes: - id: Clerk user ID - email: Email del usuario - name: Nombre completo - role: Rol (developer o admin) - """ - - id: str = Field(..., description="Clerk user ID") - email: EmailStr = Field(..., description="Email del usuario") - name: Optional[str] = Field(default=None, description="Nombre completo") - role: Role = Field(default=Role.DEVELOPER, description="Rol del usuario") - - model_config = ConfigDict( - from_attributes=True, - json_schema_extra={ - "example": { - "id": "user_123", - "email": "dev@codeguard.ai", - "name": "Developer", - "role": "developer", - } - }, - ) +""" +Esquemas de usuario para CodeGuard AI +""" + +from enum import Enum +from typing import Optional + +from pydantic import BaseModel, ConfigDict, EmailStr, Field + + +class Role(str, Enum): + """Roles de usuario.""" + + DEVELOPER = "developer" + ADMIN = "admin" + + +class User(BaseModel): + """ + Modelo de usuario autenticado. + + Attributes: + id: Clerk user ID + email: Email del usuario + name: Nombre completo + role: Rol (developer o admin) + """ + + id: str = Field(..., description="Clerk user ID") + email: EmailStr = Field(..., description="Email del usuario") + name: Optional[str] = Field(default=None, description="Nombre completo") + role: Role = Field(default=Role.DEVELOPER, description="Rol del usuario") + + model_config = ConfigDict( + from_attributes=True, + json_schema_extra={ + "example": { + "id": "user_123", + "email": "dev@codeguard.ai", + "name": "Developer", + "role": "developer", + } + }, + ) diff --git a/backend/src/services/ai_service.py b/backend/src/services/ai_service.py index 4e2fec6..fff7bb4 100644 --- a/backend/src/services/ai_service.py +++ b/backend/src/services/ai_service.py @@ -1,418 +1,418 @@ -""" -AI Explainer Service - -Servicio principal para generar explicaciones de seguridad usando IA generativa. -Integra el cliente de Vertex AI, enriquecimiento de contexto MCP y rate limiting. - -Principios de diseño: -- SRP: Solo orquesta la generación de explicaciones -- Acoplamiento débil: Depende de interfaces (AIClient) -- Defensibilidad: Rate limiting y validación de entrada -- Async: Todas las operaciones son asíncronas -""" - -import logging -from collections import defaultdict -from datetime import datetime, timedelta, timezone -from typing import Dict, Optional, Tuple - -from src.core.config.ai_config import get_ai_settings -from src.external.gemini_client import get_ai_client -from src.external.interfaces import ( - AIClient, - AIClientError, - AIRateLimitError, -) -from src.schemas.ai_explanation import AIExplanation, RateLimitInfo -from src.schemas.finding import Finding -from src.services.mcp_context_enricher import ( - EnrichedContext, - MCPContextEnricher, - get_mcp_context_enricher, -) - -logger = logging.getLogger(__name__) - - -class RateLimitExceeded(Exception): - """Excepción cuando el usuario excede su límite de requests.""" - - def __init__(self, message: str, rate_limit_info: RateLimitInfo): - super().__init__(message) - self.rate_limit_info = rate_limit_info - - -class AIExplanationError(Exception): - """Error general en la generación de explicación.""" - - pass - - -class InMemoryRateLimiter: - """ - Rate limiter en memoria para controlar requests por usuario. - - Esta implementación es para desarrollo. En producción se puede - reemplazar por un RateLimiter basado en Redis siguiendo el - patrón Adapter. - - Attributes: - limit_per_hour: Máximo de requests por hora - user_requests: Diccionario de requests por usuario - """ - - def __init__(self, limit_per_hour: int = 10): - """ - Inicializa el rate limiter. - - Args: - limit_per_hour: Límite de requests por usuario por hora - """ - self._limit_per_hour = limit_per_hour - # user_id -> list of timestamps - self._user_requests: Dict[str, list[datetime]] = defaultdict(list) - - def check_and_consume(self, user_id: str) -> RateLimitInfo: - """ - Verifica si el usuario puede hacer un request y lo consume. - - Args: - user_id: ID del usuario - - Returns: - RateLimitInfo con el estado actual - - Raises: - RateLimitExceeded: Si el usuario excede su límite - """ - now = datetime.now(timezone.utc) - hour_ago = now - timedelta(hours=1) - - # Limpiar requests antiguos - self._user_requests[user_id] = [ts for ts in self._user_requests[user_id] if ts > hour_ago] - - # Calcular info de rate limit - requests_used = len(self._user_requests[user_id]) - requests_remaining = max(0, self._limit_per_hour - requests_used) - - # Calcular cuando se resetea (1 hora desde el request más antiguo) - if self._user_requests[user_id]: - oldest = min(self._user_requests[user_id]) - reset_at = oldest + timedelta(hours=1) - else: - reset_at = now + timedelta(hours=1) - - rate_limit_info = RateLimitInfo( - requests_remaining=requests_remaining - 1 if requests_remaining > 0 else 0, - requests_limit=self._limit_per_hour, - reset_at=reset_at, - ) - - # Verificar límite - if requests_remaining <= 0: - raise RateLimitExceeded( - f"Rate limit exceeded. Limit: {self._limit_per_hour}/hour", - rate_limit_info, - ) - - # Consumir request - self._user_requests[user_id].append(now) - return rate_limit_info - - def get_remaining(self, user_id: str) -> RateLimitInfo: - """ - Obtiene el estado del rate limit sin consumir. - - Args: - user_id: ID del usuario - - Returns: - RateLimitInfo con el estado actual - """ - now = datetime.now(timezone.utc) - hour_ago = now - timedelta(hours=1) - - # Limpiar y contar - self._user_requests[user_id] = [ts for ts in self._user_requests[user_id] if ts > hour_ago] - requests_used = len(self._user_requests[user_id]) - requests_remaining = max(0, self._limit_per_hour - requests_used) - - if self._user_requests[user_id]: - oldest = min(self._user_requests[user_id]) - reset_at = oldest + timedelta(hours=1) - else: - reset_at = now + timedelta(hours=1) - - return RateLimitInfo( - requests_remaining=requests_remaining, - requests_limit=self._limit_per_hour, - reset_at=reset_at, - ) - - -class AIExplainerService: - """ - Servicio para generar explicaciones de seguridad con IA. - - Orquesta el proceso completo: - 1. Verifica rate limit del usuario - 2. Enriquece el hallazgo con contexto OWASP - 3. Construye el prompt DevSecOps - 4. Llama al modelo de IA - 5. Parsea y retorna la explicación - - Principios: - - SRP: Solo orquesta, delega a componentes especializados - - Acoplamiento débil: Dependencias inyectadas - - Defensibilidad: Valida entrada, maneja errores - - Testeabilidad: Fácil de mockear dependencias - - Example: - service = AIExplainerService() - explanation = await service.explain_finding( - finding=my_finding, - code_context="full source code", - user_id="user-123" - ) - """ - - # Prompt template DevSecOps - PROMPT_TEMPLATE = """Eres un experto en DevSecOps y seguridad de aplicaciones. -Tu rol es explicar vulnerabilidades de seguridad a desarrolladores de forma clara, -educativa y accionable. - -{context} - -## Tu Tarea - -Proporciona una explicación completa que incluya: - -1. **Explicación del Problema**: Explica qué es esta vulnerabilidad, por qué es peligrosa - y qué impacto podría tener en la aplicación (1-2 párrafos). - -2. **Código Corregido**: Proporciona el código corregido que soluciona el problema. - Incluye comentarios explicando los cambios. - -3. **Ejemplo de Ataque**: Muestra un ejemplo concreto de cómo un atacante podría explotar - esta vulnerabilidad (código o pasos). - -4. **Referencias**: Lista referencias relevantes (OWASP, CWE, etc.). - -## Formato de Respuesta - -Responde en formato JSON con esta estructura: -```json -{{ - "explanation": "Explicación detallada del problema...", - "suggested_fix": "Código corregido con comentarios...", - "attack_example": "Ejemplo de cómo explotar la vulnerabilidad...", - "references": ["OWASP A03:2021", "CWE-94"] -}} -``` - -IMPORTANTE: -- Responde SOLO con el JSON, sin texto adicional -- La explicación debe ser en español -- El código debe ser Python válido -- Sé específico sobre el contexto del código analizado -""" - - def __init__( - self, - ai_client: Optional[AIClient] = None, - context_enricher: Optional[MCPContextEnricher] = None, - rate_limiter: Optional[InMemoryRateLimiter] = None, - ): - """ - Inicializa el servicio con dependencias inyectadas. - - Args: - ai_client: Cliente de IA (default: VertexAIClient) - context_enricher: Enriquecedor de contexto (default: MCPContextEnricher) - rate_limiter: Rate limiter (default: InMemoryRateLimiter) - """ - settings = get_ai_settings() - - self._ai_client = ai_client or get_ai_client() - self._context_enricher = context_enricher or get_mcp_context_enricher() - self._rate_limiter = rate_limiter or InMemoryRateLimiter( - limit_per_hour=settings.AI_RATE_LIMIT_PER_HOUR - ) - - async def explain_finding( - self, - finding: Finding, - code_context: Optional[str] = None, - user_id: str = "anonymous", - ) -> Tuple[AIExplanation, RateLimitInfo]: - """ - Genera una explicación de IA para un hallazgo de seguridad. - - Args: - finding: El hallazgo a explicar - code_context: Código fuente completo para contexto (opcional) - user_id: ID del usuario para rate limiting - - Returns: - Tupla (AIExplanation, RateLimitInfo) - - Raises: - RateLimitExceeded: Si el usuario excede su límite - AIExplanationError: Si hay error en la generación - """ - # 1. Verificar rate limit - rate_limit_info = self._rate_limiter.check_and_consume(user_id) - - try: - # 2. Enriquecer con contexto OWASP - enriched = await self._context_enricher.enrich(finding) - - # 3. Construir prompt - prompt = self._build_prompt(enriched, code_context) - - # 4. Llamar a IA - logger.info( - f"Generating AI explanation for finding: " - f"rule_id={finding.rule_id}, user_id={user_id}" - ) - - response = await self._ai_client.generate_explanation(prompt) - - # 5. Parsear respuesta - explanation = self._parse_response( - response.content, response.model_name, response.tokens_used - ) - - logger.info( - f"AI explanation generated successfully. " f"tokens_used={response.tokens_used}" - ) - - return explanation, rate_limit_info - - except AIRateLimitError as e: - logger.warning(f"AI API rate limit hit: {e}") - raise AIExplanationError( - "El servicio de IA está temporalmente sobrecargado. " - "Intenta de nuevo en unos minutos." - ) from e - - except AIClientError as e: - logger.error(f"AI client error: {e}") - raise AIExplanationError(f"Error al comunicarse con el servicio de IA: {e}") from e - - except Exception as e: - logger.error(f"Unexpected error generating explanation: {e}") - raise AIExplanationError(f"Error inesperado generando explicación: {e}") from e - - def _build_prompt(self, enriched: EnrichedContext, code_context: Optional[str]) -> str: - """ - Construye el prompt completo para el modelo de IA. - - Args: - enriched: Contexto enriquecido con OWASP - code_context: Código fuente adicional (opcional) - - Returns: - Prompt formateado - """ - context_parts = [enriched.formatted_prompt_context] - - # Agregar código fuente completo si está disponible - if code_context: - context_parts.append(f"## Código Fuente Completo\n```python\n{code_context}\n```") - - full_context = "\n\n".join(context_parts) - return self.PROMPT_TEMPLATE.format(context=full_context) - - def _parse_response(self, content: str, model_name: str, tokens_used: int) -> AIExplanation: - """ - Parsea la respuesta del modelo de IA. - - Args: - content: Contenido de la respuesta - model_name: Nombre del modelo usado - tokens_used: Tokens consumidos - - Returns: - AIExplanation parseada - """ - import json - - # Intentar extraer JSON de la respuesta - try: - # La respuesta debería ser JSON puro - # Pero a veces viene con markdown code blocks - clean_content = content.strip() - - # Remover bloques de código markdown si existen - if clean_content.startswith("```"): - lines = clean_content.split("\n") - # Remover primera y última línea (```json y ```) - clean_content = "\n".join(lines[1:-1]) - - data = json.loads(clean_content) - - return AIExplanation( - explanation=data.get("explanation", "Sin explicación disponible"), - suggested_fix=data.get("suggested_fix", "# Sin sugerencia disponible"), - attack_example=data.get("attack_example"), - references=data.get("references"), - model_used=model_name, - tokens_used=tokens_used, - ) - - except json.JSONDecodeError: - # Si no es JSON válido, usar el contenido como explicación - logger.warning("Could not parse AI response as JSON, using raw content") - return AIExplanation( - explanation=content, - suggested_fix="# Ver explicación para sugerencias", - attack_example=None, - references=None, - model_used=model_name, - tokens_used=tokens_used, - ) - - def get_rate_limit_info(self, user_id: str) -> RateLimitInfo: - """ - Obtiene el estado del rate limit para un usuario. - - Args: - user_id: ID del usuario - - Returns: - RateLimitInfo con el estado actual - """ - return self._rate_limiter.get_remaining(user_id) - - @property - def is_configured(self) -> bool: - """Indica si el servicio está configurado correctamente.""" - return self._ai_client.is_configured - - -# Factory function para inyección de dependencias -_service_instance: Optional[AIExplainerService] = None - - -def get_ai_explainer_service() -> AIExplainerService: - """ - Factory function para obtener el servicio de explicaciones. - - Usa singleton para reutilizar el rate limiter en memoria. - - Returns: - Instancia de AIExplainerService - """ - global _service_instance - if _service_instance is None: - _service_instance = AIExplainerService() - return _service_instance - - -def reset_ai_explainer_service() -> None: - """ - Resetea el singleton (útil para testing). - """ - global _service_instance - _service_instance = None +""" +AI Explainer Service + +Servicio principal para generar explicaciones de seguridad usando IA generativa. +Integra el cliente de Vertex AI, enriquecimiento de contexto MCP y rate limiting. + +Principios de diseño: +- SRP: Solo orquesta la generación de explicaciones +- Acoplamiento débil: Depende de interfaces (AIClient) +- Defensibilidad: Rate limiting y validación de entrada +- Async: Todas las operaciones son asíncronas +""" + +import logging +from collections import defaultdict +from datetime import datetime, timedelta, timezone +from typing import Dict, Optional, Tuple + +from src.core.config.ai_config import get_ai_settings +from src.external.gemini_client import get_ai_client +from src.external.interfaces import ( + AIClient, + AIClientError, + AIRateLimitError, +) +from src.schemas.ai_explanation import AIExplanation, RateLimitInfo +from src.schemas.finding import Finding +from src.services.mcp_context_enricher import ( + EnrichedContext, + MCPContextEnricher, + get_mcp_context_enricher, +) + +logger = logging.getLogger(__name__) + + +class RateLimitExceeded(Exception): + """Excepción cuando el usuario excede su límite de requests.""" + + def __init__(self, message: str, rate_limit_info: RateLimitInfo): + super().__init__(message) + self.rate_limit_info = rate_limit_info + + +class AIExplanationError(Exception): + """Error general en la generación de explicación.""" + + pass + + +class InMemoryRateLimiter: + """ + Rate limiter en memoria para controlar requests por usuario. + + Esta implementación es para desarrollo. En producción se puede + reemplazar por un RateLimiter basado en Redis siguiendo el + patrón Adapter. + + Attributes: + limit_per_hour: Máximo de requests por hora + user_requests: Diccionario de requests por usuario + """ + + def __init__(self, limit_per_hour: int = 10): + """ + Inicializa el rate limiter. + + Args: + limit_per_hour: Límite de requests por usuario por hora + """ + self._limit_per_hour = limit_per_hour + # user_id -> list of timestamps + self._user_requests: Dict[str, list[datetime]] = defaultdict(list) + + def check_and_consume(self, user_id: str) -> RateLimitInfo: + """ + Verifica si el usuario puede hacer un request y lo consume. + + Args: + user_id: ID del usuario + + Returns: + RateLimitInfo con el estado actual + + Raises: + RateLimitExceeded: Si el usuario excede su límite + """ + now = datetime.now(timezone.utc) + hour_ago = now - timedelta(hours=1) + + # Limpiar requests antiguos + self._user_requests[user_id] = [ts for ts in self._user_requests[user_id] if ts > hour_ago] + + # Calcular info de rate limit + requests_used = len(self._user_requests[user_id]) + requests_remaining = max(0, self._limit_per_hour - requests_used) + + # Calcular cuando se resetea (1 hora desde el request más antiguo) + if self._user_requests[user_id]: + oldest = min(self._user_requests[user_id]) + reset_at = oldest + timedelta(hours=1) + else: + reset_at = now + timedelta(hours=1) + + rate_limit_info = RateLimitInfo( + requests_remaining=requests_remaining - 1 if requests_remaining > 0 else 0, + requests_limit=self._limit_per_hour, + reset_at=reset_at, + ) + + # Verificar límite + if requests_remaining <= 0: + raise RateLimitExceeded( + f"Rate limit exceeded. Limit: {self._limit_per_hour}/hour", + rate_limit_info, + ) + + # Consumir request + self._user_requests[user_id].append(now) + return rate_limit_info + + def get_remaining(self, user_id: str) -> RateLimitInfo: + """ + Obtiene el estado del rate limit sin consumir. + + Args: + user_id: ID del usuario + + Returns: + RateLimitInfo con el estado actual + """ + now = datetime.now(timezone.utc) + hour_ago = now - timedelta(hours=1) + + # Limpiar y contar + self._user_requests[user_id] = [ts for ts in self._user_requests[user_id] if ts > hour_ago] + requests_used = len(self._user_requests[user_id]) + requests_remaining = max(0, self._limit_per_hour - requests_used) + + if self._user_requests[user_id]: + oldest = min(self._user_requests[user_id]) + reset_at = oldest + timedelta(hours=1) + else: + reset_at = now + timedelta(hours=1) + + return RateLimitInfo( + requests_remaining=requests_remaining, + requests_limit=self._limit_per_hour, + reset_at=reset_at, + ) + + +class AIExplainerService: + """ + Servicio para generar explicaciones de seguridad con IA. + + Orquesta el proceso completo: + 1. Verifica rate limit del usuario + 2. Enriquece el hallazgo con contexto OWASP + 3. Construye el prompt DevSecOps + 4. Llama al modelo de IA + 5. Parsea y retorna la explicación + + Principios: + - SRP: Solo orquesta, delega a componentes especializados + - Acoplamiento débil: Dependencias inyectadas + - Defensibilidad: Valida entrada, maneja errores + - Testeabilidad: Fácil de mockear dependencias + + Example: + service = AIExplainerService() + explanation = await service.explain_finding( + finding=my_finding, + code_context="full source code", + user_id="user-123" + ) + """ + + # Prompt template DevSecOps + PROMPT_TEMPLATE = """Eres un experto en DevSecOps y seguridad de aplicaciones. +Tu rol es explicar vulnerabilidades de seguridad a desarrolladores de forma clara, +educativa y accionable. + +{context} + +## Tu Tarea + +Proporciona una explicación completa que incluya: + +1. **Explicación del Problema**: Explica qué es esta vulnerabilidad, por qué es peligrosa + y qué impacto podría tener en la aplicación (1-2 párrafos). + +2. **Código Corregido**: Proporciona el código corregido que soluciona el problema. + Incluye comentarios explicando los cambios. + +3. **Ejemplo de Ataque**: Muestra un ejemplo concreto de cómo un atacante podría explotar + esta vulnerabilidad (código o pasos). + +4. **Referencias**: Lista referencias relevantes (OWASP, CWE, etc.). + +## Formato de Respuesta + +Responde en formato JSON con esta estructura: +```json +{{ + "explanation": "Explicación detallada del problema...", + "suggested_fix": "Código corregido con comentarios...", + "attack_example": "Ejemplo de cómo explotar la vulnerabilidad...", + "references": ["OWASP A03:2021", "CWE-94"] +}} +``` + +IMPORTANTE: +- Responde SOLO con el JSON, sin texto adicional +- La explicación debe ser en español +- El código debe ser Python válido +- Sé específico sobre el contexto del código analizado +""" + + def __init__( + self, + ai_client: Optional[AIClient] = None, + context_enricher: Optional[MCPContextEnricher] = None, + rate_limiter: Optional[InMemoryRateLimiter] = None, + ): + """ + Inicializa el servicio con dependencias inyectadas. + + Args: + ai_client: Cliente de IA (default: VertexAIClient) + context_enricher: Enriquecedor de contexto (default: MCPContextEnricher) + rate_limiter: Rate limiter (default: InMemoryRateLimiter) + """ + settings = get_ai_settings() + + self._ai_client = ai_client or get_ai_client() + self._context_enricher = context_enricher or get_mcp_context_enricher() + self._rate_limiter = rate_limiter or InMemoryRateLimiter( + limit_per_hour=settings.AI_RATE_LIMIT_PER_HOUR + ) + + async def explain_finding( + self, + finding: Finding, + code_context: Optional[str] = None, + user_id: str = "anonymous", + ) -> Tuple[AIExplanation, RateLimitInfo]: + """ + Genera una explicación de IA para un hallazgo de seguridad. + + Args: + finding: El hallazgo a explicar + code_context: Código fuente completo para contexto (opcional) + user_id: ID del usuario para rate limiting + + Returns: + Tupla (AIExplanation, RateLimitInfo) + + Raises: + RateLimitExceeded: Si el usuario excede su límite + AIExplanationError: Si hay error en la generación + """ + # 1. Verificar rate limit + rate_limit_info = self._rate_limiter.check_and_consume(user_id) + + try: + # 2. Enriquecer con contexto OWASP + enriched = await self._context_enricher.enrich(finding) + + # 3. Construir prompt + prompt = self._build_prompt(enriched, code_context) + + # 4. Llamar a IA + logger.info( + f"Generating AI explanation for finding: " + f"rule_id={finding.rule_id}, user_id={user_id}" + ) + + response = await self._ai_client.generate_explanation(prompt) + + # 5. Parsear respuesta + explanation = self._parse_response( + response.content, response.model_name, response.tokens_used + ) + + logger.info( + f"AI explanation generated successfully. " f"tokens_used={response.tokens_used}" + ) + + return explanation, rate_limit_info + + except AIRateLimitError as e: + logger.warning(f"AI API rate limit hit: {e}") + raise AIExplanationError( + "El servicio de IA está temporalmente sobrecargado. " + "Intenta de nuevo en unos minutos." + ) from e + + except AIClientError as e: + logger.error(f"AI client error: {e}") + raise AIExplanationError(f"Error al comunicarse con el servicio de IA: {e}") from e + + except Exception as e: + logger.error(f"Unexpected error generating explanation: {e}") + raise AIExplanationError(f"Error inesperado generando explicación: {e}") from e + + def _build_prompt(self, enriched: EnrichedContext, code_context: Optional[str]) -> str: + """ + Construye el prompt completo para el modelo de IA. + + Args: + enriched: Contexto enriquecido con OWASP + code_context: Código fuente adicional (opcional) + + Returns: + Prompt formateado + """ + context_parts = [enriched.formatted_prompt_context] + + # Agregar código fuente completo si está disponible + if code_context: + context_parts.append(f"## Código Fuente Completo\n```python\n{code_context}\n```") + + full_context = "\n\n".join(context_parts) + return self.PROMPT_TEMPLATE.format(context=full_context) + + def _parse_response(self, content: str, model_name: str, tokens_used: int) -> AIExplanation: + """ + Parsea la respuesta del modelo de IA. + + Args: + content: Contenido de la respuesta + model_name: Nombre del modelo usado + tokens_used: Tokens consumidos + + Returns: + AIExplanation parseada + """ + import json + + # Intentar extraer JSON de la respuesta + try: + # La respuesta debería ser JSON puro + # Pero a veces viene con markdown code blocks + clean_content = content.strip() + + # Remover bloques de código markdown si existen + if clean_content.startswith("```"): + lines = clean_content.split("\n") + # Remover primera y última línea (```json y ```) + clean_content = "\n".join(lines[1:-1]) + + data = json.loads(clean_content) + + return AIExplanation( + explanation=data.get("explanation", "Sin explicación disponible"), + suggested_fix=data.get("suggested_fix", "# Sin sugerencia disponible"), + attack_example=data.get("attack_example"), + references=data.get("references"), + model_used=model_name, + tokens_used=tokens_used, + ) + + except json.JSONDecodeError: + # Si no es JSON válido, usar el contenido como explicación + logger.warning("Could not parse AI response as JSON, using raw content") + return AIExplanation( + explanation=content, + suggested_fix="# Ver explicación para sugerencias", + attack_example=None, + references=None, + model_used=model_name, + tokens_used=tokens_used, + ) + + def get_rate_limit_info(self, user_id: str) -> RateLimitInfo: + """ + Obtiene el estado del rate limit para un usuario. + + Args: + user_id: ID del usuario + + Returns: + RateLimitInfo con el estado actual + """ + return self._rate_limiter.get_remaining(user_id) + + @property + def is_configured(self) -> bool: + """Indica si el servicio está configurado correctamente.""" + return self._ai_client.is_configured + + +# Factory function para inyección de dependencias +_service_instance: Optional[AIExplainerService] = None + + +def get_ai_explainer_service() -> AIExplainerService: + """ + Factory function para obtener el servicio de explicaciones. + + Usa singleton para reutilizar el rate limiter en memoria. + + Returns: + Instancia de AIExplainerService + """ + global _service_instance + if _service_instance is None: + _service_instance = AIExplainerService() + return _service_instance + + +def reset_ai_explainer_service() -> None: + """ + Resetea el singleton (útil para testing). + """ + global _service_instance + _service_instance = None diff --git a/backend/src/services/analysis_service.py b/backend/src/services/analysis_service.py index f36215b..72476e8 100644 --- a/backend/src/services/analysis_service.py +++ b/backend/src/services/analysis_service.py @@ -8,13 +8,15 @@ from fastapi import HTTPException, UploadFile -from src.agents.orchestrator import OrchestratorAgent +from src.agents.quality_agent import QualityAgent +from src.agents.security_agent import SecurityAgent +from src.agents.style_agent import StyleAgent from src.core.events.analysis_events import AnalysisEventType from src.core.events.event_bus import EventBus from src.models.enums.review_status import ReviewStatus from src.repositories.code_review_repository import CodeReviewRepository from src.schemas.analysis import AnalysisContext, CodeReview -from src.schemas.finding import Finding +from src.schemas.finding import Finding, Severity from src.utils.logger import logger @@ -33,8 +35,6 @@ def __init__(self, repo: CodeReviewRepository): """ self.repo = repo self.event_bus = EventBus() - # Orquestador de la capa de dominio, reutilizando el mismo EventBus - self.orchestrator = OrchestratorAgent(event_bus=self.event_bus) async def analyze_code(self, file: UploadFile, user_id: str) -> CodeReview: """ @@ -43,7 +43,7 @@ async def analyze_code(self, file: UploadFile, user_id: str) -> CodeReview: Flujo (RN4, RN5, RN8): 1. Validar archivo. 2. Crear contexto de análisis. - 3. Ejecutar agentes via OrchestratorAgent. + 3. Ejecutar SecurityAgent. 4. Calcular métricas. 5. Persistir resultados. @@ -74,12 +74,28 @@ async def analyze_code(self, file: UploadFile, user_id: str) -> CodeReview: # Notificar inicio usando el Enum self.event_bus.publish(AnalysisEventType.ANALYSIS_STARTED, {"id": str(analysis_id)}) - # 3. Ejecutar agentes via OrchestratorAgent (Security, Style, Quality, etc.) + # 3. Ejecutar Agentes (SecurityAgent, StyleAgent y QualityAgent) findings: List[Finding] = [] + + # Security Agent + Style Agent + try: + security_agent = SecurityAgent() + style_agent = StyleAgent() + + security_findings = security_agent.analyze(context) + style_findings = style_agent.analyze(context) + + findings = security_findings + style_findings + + except Exception as e: + logger.error(f"Error ejecutando agentes de analisis: {e}") + + # Quality Agent try: - findings = self.orchestrator.orchestrate_analysis(context) - except Exception as exc: # pylint: disable=broad-except - logger.error("Error ejecutando orquestador de analisis: %s", exc) + quality_agent = QualityAgent() + findings.extend(quality_agent.analyze(context)) + except Exception as e: + logger.error(f"Error ejecutando QualityAgent: {e}") # 4. Calcular Quality Score (RN8) quality_score = self._calculate_quality_score(findings) @@ -169,7 +185,23 @@ def _calculate_quality_score(self, findings: List[Finding]) -> int: """ Calcula el puntaje de calidad basado en penalizaciones (RN8). - Delegado al OrchestratorAgent para mantener la logica - de negocio en la capa de dominio. + Fórmula: score = max(0, 100 - penalizaciones) + + Args: + findings: Lista de hallazgos detectados. + + Returns: + int: Puntaje de calidad (0-100). """ - return self.orchestrator.calculate_quality_score(findings) + penalty = 0 + for finding in findings: + if finding.severity == Severity.CRITICAL: + penalty += 10 + elif finding.severity == Severity.HIGH: + penalty += 5 + elif finding.severity == Severity.MEDIUM: + penalty += 2 + elif finding.severity == Severity.LOW: + penalty += 1 + + return max(0, 100 - penalty) diff --git a/backend/src/services/auth_service.py b/backend/src/services/auth_service.py index e5e6ab0..659c318 100644 --- a/backend/src/services/auth_service.py +++ b/backend/src/services/auth_service.py @@ -1,127 +1,127 @@ -""" -Servicio de autenticación. - -Orquesta la validación de tokens JWT de Clerk y la sincronización -de usuarios en la base de datos. -""" - -from src.external.clerk_client import ClerkClient, ClerkTokenInvalidError -from src.models.user import UserEntity -from src.repositories.user_repo import UserRepository -from src.schemas.user import Role, User - - -class AuthService: - """ - Servicio para gestionar la autenticación de usuarios. - - Responsabilidad: lógica de negocio para validación de tokens - y sincronización de usuarios con Clerk. - """ - - def __init__(self, clerk_client: ClerkClient, user_repository: UserRepository): - """ - Inicializa el servicio con sus dependencias. - - Args: - clerk_client: Cliente para validar tokens de Clerk. - user_repository: Repositorio para operaciones de usuarios. - """ - self._clerk_client = clerk_client - self._user_repository = user_repository - - def login_user(self, token: str) -> User: - """ - Procesa el login de un usuario con token de Clerk. - - Flujo: - 1. Valida el token JWT con Clerk - 2. Busca el usuario en la BD - 3. Si no existe, lo crea; si existe, actualiza sus datos - 4. Retorna el User schema - - Args: - token: Token JWT de Clerk. - - Returns: - User schema con los datos del usuario. - - Raises: - ClerkTokenError: Si el token es inválido o expirado. - """ - # 1. Validar token con Clerk - clerk_data = self._clerk_client.verify_token(token) - - # Clerk usa 'sub' para user_id en el payload JWT - user_id = clerk_data.get("sub") - if not user_id: - raise ClerkTokenInvalidError("Token no contiene 'sub' claim") - - email = clerk_data.get("email") - name = clerk_data.get("name") - - # 2. Buscar usuario en BD - user_entity = self._user_repository.get_by_id(user_id) - - # 3. Crear o actualizar usuario - if not user_entity: - user_entity = self._user_repository.create( - user_id=user_id, - email=email, - name=name, - ) - else: - user_entity = self._user_repository.update( - user=user_entity, - email=email, - name=name, - ) - - # 4. Convertir a schema - return self._entity_to_schema(user_entity) - - def get_user_from_token(self, token: str) -> User: - """ - Obtiene un User schema a partir de un token válido. - - No sincroniza con la BD, solo valida el token. - Útil para el middleware de protección de rutas. - - Args: - token: Token JWT de Clerk. - - Returns: - User schema con datos del token. - - Raises: - ClerkTokenError: Si el token es inválido o expirado. - """ - clerk_data = self._clerk_client.verify_token(token) - - user_id = clerk_data.get("sub") - if not user_id: - raise ClerkTokenInvalidError("Token no contiene 'sub' claim") - - return User( - id=user_id, - email=clerk_data.get("email", ""), - name=clerk_data.get("name"), - role=Role.DEVELOPER, - ) - - def _entity_to_schema(self, entity: UserEntity) -> User: - """ - Convierte UserEntity a User schema. - - Args: - entity: Entidad de usuario. - - Returns: - User schema. - """ - return User( - id=entity.id, - email=entity.email, - name=entity.name, - role=Role(entity.role.value.lower()), - ) +""" +Servicio de autenticación. + +Orquesta la validación de tokens JWT de Clerk y la sincronización +de usuarios en la base de datos. +""" + +from src.external.clerk_client import ClerkClient, ClerkTokenInvalidError +from src.models.user import UserEntity +from src.repositories.user_repo import UserRepository +from src.schemas.user import Role, User + + +class AuthService: + """ + Servicio para gestionar la autenticación de usuarios. + + Responsabilidad: lógica de negocio para validación de tokens + y sincronización de usuarios con Clerk. + """ + + def __init__(self, clerk_client: ClerkClient, user_repository: UserRepository): + """ + Inicializa el servicio con sus dependencias. + + Args: + clerk_client: Cliente para validar tokens de Clerk. + user_repository: Repositorio para operaciones de usuarios. + """ + self._clerk_client = clerk_client + self._user_repository = user_repository + + def login_user(self, token: str) -> User: + """ + Procesa el login de un usuario con token de Clerk. + + Flujo: + 1. Valida el token JWT con Clerk + 2. Busca el usuario en la BD + 3. Si no existe, lo crea; si existe, actualiza sus datos + 4. Retorna el User schema + + Args: + token: Token JWT de Clerk. + + Returns: + User schema con los datos del usuario. + + Raises: + ClerkTokenError: Si el token es inválido o expirado. + """ + # 1. Validar token con Clerk + clerk_data = self._clerk_client.verify_token(token) + + # Clerk usa 'sub' para user_id en el payload JWT + user_id = clerk_data.get("sub") + if not user_id: + raise ClerkTokenInvalidError("Token no contiene 'sub' claim") + + email = clerk_data.get("email") + name = clerk_data.get("name") + + # 2. Buscar usuario en BD + user_entity = self._user_repository.get_by_id(user_id) + + # 3. Crear o actualizar usuario + if not user_entity: + user_entity = self._user_repository.create( + user_id=user_id, + email=email, + name=name, + ) + else: + user_entity = self._user_repository.update( + user=user_entity, + email=email, + name=name, + ) + + # 4. Convertir a schema + return self._entity_to_schema(user_entity) + + def get_user_from_token(self, token: str) -> User: + """ + Obtiene un User schema a partir de un token válido. + + No sincroniza con la BD, solo valida el token. + Útil para el middleware de protección de rutas. + + Args: + token: Token JWT de Clerk. + + Returns: + User schema con datos del token. + + Raises: + ClerkTokenError: Si el token es inválido o expirado. + """ + clerk_data = self._clerk_client.verify_token(token) + + user_id = clerk_data.get("sub") + if not user_id: + raise ClerkTokenInvalidError("Token no contiene 'sub' claim") + + return User( + id=user_id, + email=clerk_data.get("email", ""), + name=clerk_data.get("name"), + role=Role.DEVELOPER, + ) + + def _entity_to_schema(self, entity: UserEntity) -> User: + """ + Convierte UserEntity a User schema. + + Args: + entity: Entidad de usuario. + + Returns: + User schema. + """ + return User( + id=entity.id, + email=entity.email, + name=entity.name, + role=Role(entity.role.value.lower()), + ) diff --git a/backend/src/services/mcp_context_enricher.py b/backend/src/services/mcp_context_enricher.py index 2c37d51..9a8aa44 100644 --- a/backend/src/services/mcp_context_enricher.py +++ b/backend/src/services/mcp_context_enricher.py @@ -1,198 +1,198 @@ -""" -MCP Context Enricher Service - -Enriquece los hallazgos de seguridad con contexto OWASP Top 10 y CWE -para mejorar las explicaciones generadas por IA. - -Este servicio actúa como un "Model Context Protocol" local que proporciona -contexto relevante de seguridad para cada hallazgo antes de enviarlo -al modelo de IA generativa. - -Principios de diseño: -- SRP: Solo enriquece contexto, no genera explicaciones -- Acoplamiento débil: Usa MCPClient interface, no implementaciones directas -- Async: Todas las operaciones son asíncronas para consistencia -""" - -from dataclasses import dataclass -from typing import Optional - -from src.core.config.mcp_config import SecurityContext, format_security_context -from src.external.mcp_client import MCPClient, get_mcp_client -from src.schemas.finding import Finding - - -@dataclass -class EnrichedContext: - """ - Contexto enriquecido para un hallazgo. - - Attributes: - finding: El hallazgo original - security_context: Contexto de seguridad OWASP (si aplica) - formatted_prompt_context: Texto formateado para incluir en el prompt - has_security_context: Indica si se encontró contexto OWASP - """ - - finding: Finding - security_context: Optional[SecurityContext] - formatted_prompt_context: str - has_security_context: bool - - @property - def is_security_finding(self) -> bool: - """Indica si es un hallazgo de seguridad con contexto OWASP.""" - return self.has_security_context - - -class MCPContextEnricher: - """ - Servicio para enriquecer hallazgos con contexto de seguridad. - - Este servicio usa MCPClient para buscar información relevante en la - base de conocimiento OWASP Top 10 y la formatea para prompts de IA. - - Example: - enricher = MCPContextEnricher() - context = await enricher.enrich(finding) - prompt = f"Analiza este hallazgo:\\n{context.formatted_prompt_context}" - - Principios: - - SRP: Solo enriquece, no genera explicaciones - - Acoplamiento débil: Depende de MCPClient interface - - Testeabilidad: MCP client inyectable facilita testing - """ - - def __init__(self, mcp_client: Optional[MCPClient] = None): - """ - Inicializa el enricher con un cliente MCP. - - Args: - mcp_client: Cliente MCP a usar (default: LocalMCPClient) - """ - self._mcp_client = mcp_client or get_mcp_client() - - async def enrich(self, finding: Finding) -> EnrichedContext: - """ - Enriquece un hallazgo con contexto de seguridad OWASP. - - Args: - finding: Hallazgo a enriquecer - - Returns: - EnrichedContext con información de seguridad relevante - """ - # Buscar contexto de seguridad usando MCP client - security_context = await self._mcp_client.get_security_context(finding) - - # Formatear el contexto del hallazgo - formatted_context = self._format_finding_context(finding, security_context) - - return EnrichedContext( - finding=finding, - security_context=security_context, - formatted_prompt_context=formatted_context, - has_security_context=security_context is not None, - ) - - async def enrich_batch(self, findings: list[Finding]) -> list[EnrichedContext]: - """ - Enriquece múltiples hallazgos de forma eficiente. - - Args: - findings: Lista de hallazgos a enriquecer - - Returns: - Lista de EnrichedContext - """ - return [await self.enrich(finding) for finding in findings] - - def _format_finding_context( - self, finding: Finding, security_context: Optional[SecurityContext] - ) -> str: - """ - Formatea el contexto completo del hallazgo para el prompt de IA. - - Incluye información del hallazgo original más contexto OWASP si existe. - - Args: - finding: Hallazgo original - security_context: Contexto de seguridad (opcional) - - Returns: - Texto formateado para incluir en el prompt - """ - sections = [] - - # Sección: Información del hallazgo - sections.append(self._format_finding_info(finding)) - - # Sección: Contexto de seguridad OWASP (si existe) - if security_context: - owasp_context = format_security_context(security_context) - sections.append(owasp_context) - - # Sección: Código problemático (si existe) - if finding.code_snippet: - sections.append(self._format_code_section(finding)) - - return "\n\n".join(sections) - - def _format_finding_info(self, finding: Finding) -> str: - """ - Formatea la información básica del hallazgo. - - Args: - finding: Hallazgo a formatear - - Returns: - Texto formateado con información del hallazgo - """ - lines = [ - "## Hallazgo Detectado", - f"- **Tipo**: {finding.issue_type}", - f"- **Severidad**: {finding.severity.value.upper()}", - f"- **Mensaje**: {finding.message}", - f"- **Línea**: {finding.line_number}", - f"- **Agente**: {finding.agent_name}", - ] - - if finding.rule_id: - lines.append(f"- **Regla**: {finding.rule_id}") - - if finding.suggestion: - lines.append(f"- **Sugerencia inicial**: {finding.suggestion}") - - return "\n".join(lines) - - def _format_code_section(self, finding: Finding) -> str: - """ - Formatea la sección de código problemático. - - Args: - finding: Hallazgo con código - - Returns: - Texto formateado con el código - """ - return ( - "## Código Problemático\n" - "```python\n" - f"# Línea {finding.line_number}\n" - f"{finding.code_snippet}\n" - "```" - ) - - -# Factory function para facilitar inyección de dependencias -def get_mcp_context_enricher(mcp_client: Optional[MCPClient] = None) -> MCPContextEnricher: - """ - Factory function para crear instancias del enricher. - - Args: - mcp_client: Cliente MCP opcional para inyección - - Returns: - Nueva instancia de MCPContextEnricher - """ - return MCPContextEnricher(mcp_client=mcp_client) +""" +MCP Context Enricher Service + +Enriquece los hallazgos de seguridad con contexto OWASP Top 10 y CWE +para mejorar las explicaciones generadas por IA. + +Este servicio actúa como un "Model Context Protocol" local que proporciona +contexto relevante de seguridad para cada hallazgo antes de enviarlo +al modelo de IA generativa. + +Principios de diseño: +- SRP: Solo enriquece contexto, no genera explicaciones +- Acoplamiento débil: Usa MCPClient interface, no implementaciones directas +- Async: Todas las operaciones son asíncronas para consistencia +""" + +from dataclasses import dataclass +from typing import Optional + +from src.core.config.mcp_config import SecurityContext, format_security_context +from src.external.mcp_client import MCPClient, get_mcp_client +from src.schemas.finding import Finding + + +@dataclass +class EnrichedContext: + """ + Contexto enriquecido para un hallazgo. + + Attributes: + finding: El hallazgo original + security_context: Contexto de seguridad OWASP (si aplica) + formatted_prompt_context: Texto formateado para incluir en el prompt + has_security_context: Indica si se encontró contexto OWASP + """ + + finding: Finding + security_context: Optional[SecurityContext] + formatted_prompt_context: str + has_security_context: bool + + @property + def is_security_finding(self) -> bool: + """Indica si es un hallazgo de seguridad con contexto OWASP.""" + return self.has_security_context + + +class MCPContextEnricher: + """ + Servicio para enriquecer hallazgos con contexto de seguridad. + + Este servicio usa MCPClient para buscar información relevante en la + base de conocimiento OWASP Top 10 y la formatea para prompts de IA. + + Example: + enricher = MCPContextEnricher() + context = await enricher.enrich(finding) + prompt = f"Analiza este hallazgo:\\n{context.formatted_prompt_context}" + + Principios: + - SRP: Solo enriquece, no genera explicaciones + - Acoplamiento débil: Depende de MCPClient interface + - Testeabilidad: MCP client inyectable facilita testing + """ + + def __init__(self, mcp_client: Optional[MCPClient] = None): + """ + Inicializa el enricher con un cliente MCP. + + Args: + mcp_client: Cliente MCP a usar (default: LocalMCPClient) + """ + self._mcp_client = mcp_client or get_mcp_client() + + async def enrich(self, finding: Finding) -> EnrichedContext: + """ + Enriquece un hallazgo con contexto de seguridad OWASP. + + Args: + finding: Hallazgo a enriquecer + + Returns: + EnrichedContext con información de seguridad relevante + """ + # Buscar contexto de seguridad usando MCP client + security_context = await self._mcp_client.get_security_context(finding) + + # Formatear el contexto del hallazgo + formatted_context = self._format_finding_context(finding, security_context) + + return EnrichedContext( + finding=finding, + security_context=security_context, + formatted_prompt_context=formatted_context, + has_security_context=security_context is not None, + ) + + async def enrich_batch(self, findings: list[Finding]) -> list[EnrichedContext]: + """ + Enriquece múltiples hallazgos de forma eficiente. + + Args: + findings: Lista de hallazgos a enriquecer + + Returns: + Lista de EnrichedContext + """ + return [await self.enrich(finding) for finding in findings] + + def _format_finding_context( + self, finding: Finding, security_context: Optional[SecurityContext] + ) -> str: + """ + Formatea el contexto completo del hallazgo para el prompt de IA. + + Incluye información del hallazgo original más contexto OWASP si existe. + + Args: + finding: Hallazgo original + security_context: Contexto de seguridad (opcional) + + Returns: + Texto formateado para incluir en el prompt + """ + sections = [] + + # Sección: Información del hallazgo + sections.append(self._format_finding_info(finding)) + + # Sección: Contexto de seguridad OWASP (si existe) + if security_context: + owasp_context = format_security_context(security_context) + sections.append(owasp_context) + + # Sección: Código problemático (si existe) + if finding.code_snippet: + sections.append(self._format_code_section(finding)) + + return "\n\n".join(sections) + + def _format_finding_info(self, finding: Finding) -> str: + """ + Formatea la información básica del hallazgo. + + Args: + finding: Hallazgo a formatear + + Returns: + Texto formateado con información del hallazgo + """ + lines = [ + "## Hallazgo Detectado", + f"- **Tipo**: {finding.issue_type}", + f"- **Severidad**: {finding.severity.value.upper()}", + f"- **Mensaje**: {finding.message}", + f"- **Línea**: {finding.line_number}", + f"- **Agente**: {finding.agent_name}", + ] + + if finding.rule_id: + lines.append(f"- **Regla**: {finding.rule_id}") + + if finding.suggestion: + lines.append(f"- **Sugerencia inicial**: {finding.suggestion}") + + return "\n".join(lines) + + def _format_code_section(self, finding: Finding) -> str: + """ + Formatea la sección de código problemático. + + Args: + finding: Hallazgo con código + + Returns: + Texto formateado con el código + """ + return ( + "## Código Problemático\n" + "```python\n" + f"# Línea {finding.line_number}\n" + f"{finding.code_snippet}\n" + "```" + ) + + +# Factory function para facilitar inyección de dependencias +def get_mcp_context_enricher(mcp_client: Optional[MCPClient] = None) -> MCPContextEnricher: + """ + Factory function para crear instancias del enricher. + + Args: + mcp_client: Cliente MCP opcional para inyección + + Returns: + Nueva instancia de MCPContextEnricher + """ + return MCPContextEnricher(mcp_client=mcp_client) diff --git a/backend/src/utils/encryption/aes_encryptor.py b/backend/src/utils/encryption/aes_encryptor.py index 36166dc..e652ad6 100644 --- a/backend/src/utils/encryption/aes_encryptor.py +++ b/backend/src/utils/encryption/aes_encryptor.py @@ -1,51 +1,51 @@ -import os - -from cryptography.fernet import Fernet -from dotenv import load_dotenv - -# Cargar variables de entorno -load_dotenv() - -# Obtener clave de encriptación -# NOTA: En producción, esto DEBE venir de variables de entorno. -# Si no existe, generamos una temporal para desarrollo. -# (Esto evita que falle en local si no configuraste el .env) -_KEY = os.getenv("ENCRYPTION_SECRET_KEY", Fernet.generate_key().decode()) -_CIPHER = Fernet(_KEY.encode() if isinstance(_KEY, str) else _KEY) - - -def encrypt_aes256(content: str) -> bytes: - """ - Encripta una cadena de texto usando Fernet (AES-256). - - Cumple con la RN16: Encriptación de Código Fuente en reposo. - - Args: - content: El texto plano (código fuente) a encriptar. - - Returns: - bytes: El contenido encriptado listo para almacenar en BD. - - Raises: - ValueError: Si el contenido es nulo o vacío. - """ - if not content: - raise ValueError("El contenido a encriptar no puede estar vacío") - - return _CIPHER.encrypt(content.encode("utf-8")) - - -def decrypt_aes256(encrypted_content: bytes) -> str: - """ - Desencripta bytes almacenados para recuperar el texto original. - - Args: - encrypted_content: Los bytes encriptados recuperados de la BD. - - Returns: - str: El código fuente original en texto plano. - """ - if not encrypted_content: - return "" - - return _CIPHER.decrypt(encrypted_content).decode("utf-8") +import os + +from cryptography.fernet import Fernet +from dotenv import load_dotenv + +# Cargar variables de entorno +load_dotenv() + +# Obtener clave de encriptación +# NOTA: En producción, esto DEBE venir de variables de entorno. +# Si no existe, generamos una temporal para desarrollo. +# (Esto evita que falle en local si no configuraste el .env) +_KEY = os.getenv("ENCRYPTION_SECRET_KEY", Fernet.generate_key().decode()) +_CIPHER = Fernet(_KEY.encode() if isinstance(_KEY, str) else _KEY) + + +def encrypt_aes256(content: str) -> bytes: + """ + Encripta una cadena de texto usando Fernet (AES-256). + + Cumple con la RN16: Encriptación de Código Fuente en reposo. + + Args: + content: El texto plano (código fuente) a encriptar. + + Returns: + bytes: El contenido encriptado listo para almacenar en BD. + + Raises: + ValueError: Si el contenido es nulo o vacío. + """ + if not content: + raise ValueError("El contenido a encriptar no puede estar vacío") + + return _CIPHER.encrypt(content.encode("utf-8")) + + +def decrypt_aes256(encrypted_content: bytes) -> str: + """ + Desencripta bytes almacenados para recuperar el texto original. + + Args: + encrypted_content: Los bytes encriptados recuperados de la BD. + + Returns: + str: El código fuente original en texto plano. + """ + if not encrypted_content: + return "" + + return _CIPHER.decrypt(encrypted_content).decode("utf-8") diff --git a/backend/src/utils/logger.py b/backend/src/utils/logger.py index 966ca34..96268f9 100644 --- a/backend/src/utils/logger.py +++ b/backend/src/utils/logger.py @@ -1,11 +1,11 @@ -import logging -import sys - -# Configure logging -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", - handlers=[logging.StreamHandler(sys.stdout)], -) - -logger = logging.getLogger("codeguard") +import logging +import sys + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + handlers=[logging.StreamHandler(sys.stdout)], +) + +logger = logging.getLogger("codeguard") diff --git a/backend/tests/__init__.py b/backend/tests/__init__.py index 66173ae..0f0e5ed 100644 --- a/backend/tests/__init__.py +++ b/backend/tests/__init__.py @@ -1 +1 @@ -# Test package +# Test package diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index baf6f55..974b8f1 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -1,27 +1,27 @@ -""" -Pytest configuration and fixtures -""" - -import pytest -from fastapi.testclient import TestClient - -from src.main import app - - -@pytest.fixture -def client(): - """FastAPI test client""" - return TestClient(app) - - -@pytest.fixture -def sample_python_code(): - """Sample Python code for testing""" - return """ -def calculate_sum(a, b): - return a + b - -def main(): - result = calculate_sum(5, 3) - print(f"Result: {result}") -""" +""" +Pytest configuration and fixtures +""" + +import pytest +from fastapi.testclient import TestClient + +from src.main import app + + +@pytest.fixture +def client(): + """FastAPI test client""" + return TestClient(app) + + +@pytest.fixture +def sample_python_code(): + """Sample Python code for testing""" + return """ +def calculate_sum(a, b): + return a + b + +def main(): + result = calculate_sum(5, 3) + print(f"Result: {result}") +""" diff --git a/backend/tests/generate_jwt.py b/backend/tests/generate_jwt.py index b9de14d..36b217d 100644 --- a/backend/tests/generate_jwt.py +++ b/backend/tests/generate_jwt.py @@ -28,4 +28,4 @@ print("\n--- Para probar con cURL ---") print( f'curl -X POST "http://localhost:8000/api/v1/auth/login" -H "Content-Type: application/json" -d \'{{"token": "{token}"}}\'' -) \ No newline at end of file +) diff --git a/backend/tests/integration/test_api_endpoints.py b/backend/tests/integration/test_api_endpoints.py index 199cc7a..d1131a1 100644 --- a/backend/tests/integration/test_api_endpoints.py +++ b/backend/tests/integration/test_api_endpoints.py @@ -1,300 +1,300 @@ -""" -Integration tests for Analysis API Endpoint. - -Tests the /api/v1/analyze endpoint with realistic scenarios -covering file validation, security analysis, and response format. -""" - -from datetime import datetime -from io import BytesIO -from unittest.mock import MagicMock, patch -from uuid import uuid4 - -import pytest -from fastapi import status -from fastapi.testclient import TestClient - -from src.core.dependencies.auth import get_current_user -from src.core.dependencies.get_db import get_db -from src.main import app -from src.schemas.user import Role, User - -# ============================================================================= -# Fixtures -# ============================================================================= - - -@pytest.fixture -def mock_user() -> User: - """Usuario autenticado de prueba.""" - return User( - id="user_test_123", - email="developer@codeguard.ai", - name="Test Developer", - role=Role.DEVELOPER, - ) - - -@pytest.fixture -def mock_db_session(): - """Sesión de base de datos mockeada.""" - session = MagicMock() - session.add = MagicMock() - session.commit = MagicMock() - session.refresh = MagicMock() - return session - - -@pytest.fixture -def client(mock_user: User, mock_db_session): - """Cliente de prueba con dependencias mockeadas.""" - - def override_get_current_user(): - return mock_user - - def override_get_db(): - yield mock_db_session - - app.dependency_overrides[get_current_user] = override_get_current_user - app.dependency_overrides[get_db] = override_get_db - - yield TestClient(app) - - app.dependency_overrides.clear() - - -# ============================================================================= -# Helper Functions -# ============================================================================= - - -def create_python_file(content: str, filename: str = "test_code.py") -> tuple: - """Crea un archivo Python simulado para upload.""" - file_bytes = BytesIO(content.encode("utf-8")) - return ("file", (filename, file_bytes, "text/x-python")) - - -def create_valid_python_code() -> str: - """Genera código Python válido con al menos 5 líneas.""" - return '''"""Module docstring.""" -import os - -def hello_world(): - """Print hello world.""" - print("Hello, World!") - -if __name__ == "__main__": - hello_world() -''' - - -def create_vulnerable_code() -> str: - """Genera código Python con vulnerabilidades de seguridad.""" - return '''"""Vulnerable code for testing.""" -import os -import pickle - -def unsafe_eval(user_input): - """Dangerous eval usage.""" - return eval(user_input) - -def unsafe_query(user_id): - """SQL injection vulnerability.""" - query = "SELECT * FROM users WHERE id = " + user_id - return query - -PASSWORD = "super_secret_password_123" -API_KEY = "sk-1234567890abcdef" -''' - - -# ============================================================================= -# Test Classes -# ============================================================================= - - -class TestAnalyzeEndpointValidation: - """Tests para validación de archivos (RN4).""" - - def test_reject_non_python_file(self, client: TestClient): - """Rechaza archivos que no son .py.""" - file_data = create_python_file("print('hello')", "script.js") - - response = client.post("/api/v1/analyze", files=[file_data]) - - assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert "Solo se aceptan archivos .py" in response.json()["detail"] - - def test_reject_file_without_extension(self, client: TestClient): - """Rechaza archivos sin extensión.""" - file_data = create_python_file("print('hello')", "script") - - response = client.post("/api/v1/analyze", files=[file_data]) - - assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - - def test_reject_empty_file(self, client: TestClient): - """Rechaza archivos vacíos o con menos de 5 líneas.""" - file_data = create_python_file("# just a comment\n", "empty.py") - - response = client.post("/api/v1/analyze", files=[file_data]) - - assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert "al menos 5 líneas" in response.json()["detail"] - - def test_reject_file_too_large(self, client: TestClient): - """Rechaza archivos mayores a 10MB.""" - large_content = "x = 1\n" * (10 * 1024 * 1024 // 6 + 1) - file_data = create_python_file(large_content, "large.py") - - response = client.post("/api/v1/analyze", files=[file_data]) - - assert response.status_code == status.HTTP_413_REQUEST_ENTITY_TOO_LARGE - - def test_reject_invalid_utf8_encoding(self, client: TestClient): - """Rechaza archivos con codificación inválida.""" - invalid_bytes = b"\x80\x81\x82\x83\x84" - file_bytes = BytesIO(invalid_bytes) - file_data = ("file", ("invalid.py", file_bytes, "text/x-python")) - - response = client.post("/api/v1/analyze", files=[file_data]) - - assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert "UTF-8" in response.json()["detail"] - - -class TestAnalyzeEndpointSuccess: - """Tests para análisis exitoso.""" - - @patch("src.services.analysis_service.AnalysisService.analyze_code") - def test_analyze_valid_python_file(self, mock_analyze, client: TestClient): - """Analiza correctamente un archivo Python válido.""" - mock_analyze.return_value = MagicMock( - id=uuid4(), - filename="test_code.py", - status="completed", - quality_score=95, - total_findings=2, - created_at=datetime.utcnow(), - ) - - file_data = create_python_file(create_valid_python_code()) - response = client.post("/api/v1/analyze", files=[file_data]) - - assert response.status_code == status.HTTP_200_OK - data = response.json() - assert "analysis_id" in data - assert data["status"] == "completed" - assert data["quality_score"] == 95 - - @patch("src.services.analysis_service.AnalysisService.analyze_code") - def test_analyze_vulnerable_code_returns_findings(self, mock_analyze, client: TestClient): - """Detecta vulnerabilidades y retorna findings.""" - mock_analyze.return_value = MagicMock( - id=uuid4(), - filename="vulnerable.py", - status="completed", - quality_score=45, - total_findings=5, - created_at=datetime.utcnow(), - ) - - file_data = create_python_file(create_vulnerable_code(), "vulnerable.py") - response = client.post("/api/v1/analyze", files=[file_data]) - - assert response.status_code == status.HTTP_200_OK - data = response.json() - assert data["total_findings"] >= 1 - assert data["quality_score"] < 100 - - -class TestAnalyzeEndpointResponseFormat: - """Tests para formato de respuesta (AnalysisResponse).""" - - @patch("src.services.analysis_service.AnalysisService.analyze_code") - def test_response_contains_required_fields(self, mock_analyze, client: TestClient): - """La respuesta contiene todos los campos requeridos.""" - analysis_id = uuid4() - mock_analyze.return_value = MagicMock( - id=analysis_id, - filename="app.py", - status="completed", - quality_score=85, - total_findings=3, - created_at=datetime.utcnow(), - ) - - file_data = create_python_file(create_valid_python_code(), "app.py") - response = client.post("/api/v1/analyze", files=[file_data]) - - assert response.status_code == status.HTTP_200_OK - data = response.json() - - required_fields = [ - "analysis_id", - "filename", - "status", - "quality_score", - "total_findings", - "created_at", - ] - for field in required_fields: - assert field in data, f"Missing required field: {field}" - - @patch("src.services.analysis_service.AnalysisService.analyze_code") - def test_quality_score_within_bounds(self, mock_analyze, client: TestClient): - """El quality_score está entre 0 y 100.""" - mock_analyze.return_value = MagicMock( - id=uuid4(), - filename="test.py", - status="completed", - quality_score=75, - total_findings=5, - created_at=datetime.utcnow(), - ) - - file_data = create_python_file(create_valid_python_code()) - response = client.post("/api/v1/analyze", files=[file_data]) - - data = response.json() - assert 0 <= data["quality_score"] <= 100 - - -class TestAnalyzeEndpointAuthentication: - """Tests para autenticación.""" - - def test_reject_unauthenticated_request(self, monkeypatch): - """Rechaza requests sin autenticación en modo producción.""" - # Forzar modo producción donde auth es obligatorio - monkeypatch.setenv("ENVIRONMENT", "production") - app.dependency_overrides.clear() - - client = TestClient(app) - file_data = create_python_file(create_valid_python_code()) - - response = client.post("/api/v1/analyze", files=[file_data]) - - assert response.status_code in [ - status.HTTP_401_UNAUTHORIZED, - status.HTTP_403_FORBIDDEN, - ] - - -class TestAnalyzeEndpointErrorHandling: - """Tests para manejo de errores.""" - - def test_missing_file_returns_422(self, client: TestClient): - """Retorna 422 cuando no se envía archivo.""" - response = client.post("/api/v1/analyze") - - assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - - @patch("src.services.analysis_service.AnalysisService.analyze_code") - def test_internal_error_returns_500(self, mock_analyze, client: TestClient): - """Retorna 500 en errores internos.""" - mock_analyze.side_effect = Exception("Database connection failed") - - file_data = create_python_file(create_valid_python_code()) - response = client.post("/api/v1/analyze", files=[file_data]) - - assert response.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR +""" +Integration tests for Analysis API Endpoint. + +Tests the /api/v1/analyze endpoint with realistic scenarios +covering file validation, security analysis, and response format. +""" + +from datetime import datetime +from io import BytesIO +from unittest.mock import MagicMock, patch +from uuid import uuid4 + +import pytest +from fastapi import status +from fastapi.testclient import TestClient + +from src.core.dependencies.auth import get_current_user +from src.core.dependencies.get_db import get_db +from src.main import app +from src.schemas.user import Role, User + +# ============================================================================= +# Fixtures +# ============================================================================= + + +@pytest.fixture +def mock_user() -> User: + """Usuario autenticado de prueba.""" + return User( + id="user_test_123", + email="developer@codeguard.ai", + name="Test Developer", + role=Role.DEVELOPER, + ) + + +@pytest.fixture +def mock_db_session(): + """Sesión de base de datos mockeada.""" + session = MagicMock() + session.add = MagicMock() + session.commit = MagicMock() + session.refresh = MagicMock() + return session + + +@pytest.fixture +def client(mock_user: User, mock_db_session): + """Cliente de prueba con dependencias mockeadas.""" + + def override_get_current_user(): + return mock_user + + def override_get_db(): + yield mock_db_session + + app.dependency_overrides[get_current_user] = override_get_current_user + app.dependency_overrides[get_db] = override_get_db + + yield TestClient(app) + + app.dependency_overrides.clear() + + +# ============================================================================= +# Helper Functions +# ============================================================================= + + +def create_python_file(content: str, filename: str = "test_code.py") -> tuple: + """Crea un archivo Python simulado para upload.""" + file_bytes = BytesIO(content.encode("utf-8")) + return ("file", (filename, file_bytes, "text/x-python")) + + +def create_valid_python_code() -> str: + """Genera código Python válido con al menos 5 líneas.""" + return '''"""Module docstring.""" +import os + +def hello_world(): + """Print hello world.""" + print("Hello, World!") + +if __name__ == "__main__": + hello_world() +''' + + +def create_vulnerable_code() -> str: + """Genera código Python con vulnerabilidades de seguridad.""" + return '''"""Vulnerable code for testing.""" +import os +import pickle + +def unsafe_eval(user_input): + """Dangerous eval usage.""" + return eval(user_input) + +def unsafe_query(user_id): + """SQL injection vulnerability.""" + query = "SELECT * FROM users WHERE id = " + user_id + return query + +PASSWORD = "super_secret_password_123" +API_KEY = "sk-1234567890abcdef" +''' + + +# ============================================================================= +# Test Classes +# ============================================================================= + + +class TestAnalyzeEndpointValidation: + """Tests para validación de archivos (RN4).""" + + def test_reject_non_python_file(self, client: TestClient): + """Rechaza archivos que no son .py.""" + file_data = create_python_file("print('hello')", "script.js") + + response = client.post("/api/v1/analyze", files=[file_data]) + + assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + assert "Solo se aceptan archivos .py" in response.json()["detail"] + + def test_reject_file_without_extension(self, client: TestClient): + """Rechaza archivos sin extensión.""" + file_data = create_python_file("print('hello')", "script") + + response = client.post("/api/v1/analyze", files=[file_data]) + + assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + + def test_reject_empty_file(self, client: TestClient): + """Rechaza archivos vacíos o con menos de 5 líneas.""" + file_data = create_python_file("# just a comment\n", "empty.py") + + response = client.post("/api/v1/analyze", files=[file_data]) + + assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + assert "al menos 5 líneas" in response.json()["detail"] + + def test_reject_file_too_large(self, client: TestClient): + """Rechaza archivos mayores a 10MB.""" + large_content = "x = 1\n" * (10 * 1024 * 1024 // 6 + 1) + file_data = create_python_file(large_content, "large.py") + + response = client.post("/api/v1/analyze", files=[file_data]) + + assert response.status_code == status.HTTP_413_REQUEST_ENTITY_TOO_LARGE + + def test_reject_invalid_utf8_encoding(self, client: TestClient): + """Rechaza archivos con codificación inválida.""" + invalid_bytes = b"\x80\x81\x82\x83\x84" + file_bytes = BytesIO(invalid_bytes) + file_data = ("file", ("invalid.py", file_bytes, "text/x-python")) + + response = client.post("/api/v1/analyze", files=[file_data]) + + assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + assert "UTF-8" in response.json()["detail"] + + +class TestAnalyzeEndpointSuccess: + """Tests para análisis exitoso.""" + + @patch("src.services.analysis_service.AnalysisService.analyze_code") + def test_analyze_valid_python_file(self, mock_analyze, client: TestClient): + """Analiza correctamente un archivo Python válido.""" + mock_analyze.return_value = MagicMock( + id=uuid4(), + filename="test_code.py", + status="completed", + quality_score=95, + total_findings=2, + created_at=datetime.utcnow(), + ) + + file_data = create_python_file(create_valid_python_code()) + response = client.post("/api/v1/analyze", files=[file_data]) + + assert response.status_code == status.HTTP_200_OK + data = response.json() + assert "analysis_id" in data + assert data["status"] == "completed" + assert data["quality_score"] == 95 + + @patch("src.services.analysis_service.AnalysisService.analyze_code") + def test_analyze_vulnerable_code_returns_findings(self, mock_analyze, client: TestClient): + """Detecta vulnerabilidades y retorna findings.""" + mock_analyze.return_value = MagicMock( + id=uuid4(), + filename="vulnerable.py", + status="completed", + quality_score=45, + total_findings=5, + created_at=datetime.utcnow(), + ) + + file_data = create_python_file(create_vulnerable_code(), "vulnerable.py") + response = client.post("/api/v1/analyze", files=[file_data]) + + assert response.status_code == status.HTTP_200_OK + data = response.json() + assert data["total_findings"] >= 1 + assert data["quality_score"] < 100 + + +class TestAnalyzeEndpointResponseFormat: + """Tests para formato de respuesta (AnalysisResponse).""" + + @patch("src.services.analysis_service.AnalysisService.analyze_code") + def test_response_contains_required_fields(self, mock_analyze, client: TestClient): + """La respuesta contiene todos los campos requeridos.""" + analysis_id = uuid4() + mock_analyze.return_value = MagicMock( + id=analysis_id, + filename="app.py", + status="completed", + quality_score=85, + total_findings=3, + created_at=datetime.utcnow(), + ) + + file_data = create_python_file(create_valid_python_code(), "app.py") + response = client.post("/api/v1/analyze", files=[file_data]) + + assert response.status_code == status.HTTP_200_OK + data = response.json() + + required_fields = [ + "analysis_id", + "filename", + "status", + "quality_score", + "total_findings", + "created_at", + ] + for field in required_fields: + assert field in data, f"Missing required field: {field}" + + @patch("src.services.analysis_service.AnalysisService.analyze_code") + def test_quality_score_within_bounds(self, mock_analyze, client: TestClient): + """El quality_score está entre 0 y 100.""" + mock_analyze.return_value = MagicMock( + id=uuid4(), + filename="test.py", + status="completed", + quality_score=75, + total_findings=5, + created_at=datetime.utcnow(), + ) + + file_data = create_python_file(create_valid_python_code()) + response = client.post("/api/v1/analyze", files=[file_data]) + + data = response.json() + assert 0 <= data["quality_score"] <= 100 + + +class TestAnalyzeEndpointAuthentication: + """Tests para autenticación.""" + + def test_reject_unauthenticated_request(self, monkeypatch): + """Rechaza requests sin autenticación en modo producción.""" + # Forzar modo producción donde auth es obligatorio + monkeypatch.setenv("ENVIRONMENT", "production") + app.dependency_overrides.clear() + + client = TestClient(app) + file_data = create_python_file(create_valid_python_code()) + + response = client.post("/api/v1/analyze", files=[file_data]) + + assert response.status_code in [ + status.HTTP_401_UNAUTHORIZED, + status.HTTP_403_FORBIDDEN, + ] + + +class TestAnalyzeEndpointErrorHandling: + """Tests para manejo de errores.""" + + def test_missing_file_returns_422(self, client: TestClient): + """Retorna 422 cuando no se envía archivo.""" + response = client.post("/api/v1/analyze") + + assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + + @patch("src.services.analysis_service.AnalysisService.analyze_code") + def test_internal_error_returns_500(self, mock_analyze, client: TestClient): + """Retorna 500 en errores internos.""" + mock_analyze.side_effect = Exception("Database connection failed") + + file_data = create_python_file(create_valid_python_code()) + response = client.post("/api/v1/analyze", files=[file_data]) + + assert response.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR diff --git a/backend/tests/integration/test_auth_router.py b/backend/tests/integration/test_auth_router.py index cf1c1b1..9fa7946 100644 --- a/backend/tests/integration/test_auth_router.py +++ b/backend/tests/integration/test_auth_router.py @@ -1,270 +1,270 @@ -"""Tests de integración para auth router.""" - -import time -from unittest.mock import MagicMock, patch - -import pytest -from fastapi.testclient import TestClient -from jose import jwt - -from src.main import app -from src.models.enums.user_role import UserRole -from src.models.user import UserEntity - -# Test secret key -TEST_SECRET_KEY = "test-secret-key-for-router-tests" - - -def create_valid_token(user_id: str = "user_123", email: str = "test@example.com") -> str: - """Genera un token JWT válido para tests.""" - now = int(time.time()) - payload = { - "sub": user_id, - "email": email, - "name": "Test User", - "exp": now + 3600, - "iat": now, - } - return jwt.encode(payload, TEST_SECRET_KEY, algorithm="HS256") - - -def create_expired_token() -> str: - """Genera un token JWT expirado.""" - now = int(time.time()) - payload = { - "sub": "user_expired", - "email": "expired@example.com", - "exp": now - 3600, # Expirado hace 1 hora - "iat": now - 7200, - } - return jwt.encode(payload, TEST_SECRET_KEY, algorithm="HS256") - - -@pytest.fixture -def client(): - """TestClient de FastAPI.""" - return TestClient(app) - - -@pytest.fixture -def mock_user_entity(): - """UserEntity mockeado.""" - entity = MagicMock(spec=UserEntity) - entity.id = "user_123" - entity.email = "test@example.com" - entity.name = "Test User" - entity.role = UserRole.DEVELOPER - return entity - - -class TestLoginEndpoint: - """Tests para POST /api/v1/auth/login.""" - - @patch("src.routers.auth.ClerkClient") - @patch("src.routers.auth.UserRepository") - @patch("src.routers.auth.get_db") - def test_login_success_new_user( - self, mock_get_db, mock_repo_class, mock_clerk_class, client, mock_user_entity - ): - """Login exitoso crea usuario nuevo.""" - # Arrange - mock_clerk = MagicMock() - mock_clerk.verify_token.return_value = { - "sub": "user_123", - "email": "test@example.com", - "name": "Test User", - } - mock_clerk_class.return_value = mock_clerk - - mock_repo = MagicMock() - mock_repo.get_by_id.return_value = None # Usuario no existe - mock_repo.create.return_value = mock_user_entity - mock_repo_class.return_value = mock_repo - - mock_session = MagicMock() - mock_get_db.return_value = iter([mock_session]) - - token = create_valid_token() - - # Act - response = client.post( - "/api/v1/auth/login", - headers={"Authorization": f"Bearer {token}"}, - ) - - # Assert - assert response.status_code == 200 - data = response.json() - assert data["id"] == "user_123" - assert data["email"] == "test@example.com" - - @patch("src.routers.auth.ClerkClient") - @patch("src.routers.auth.UserRepository") - @patch("src.routers.auth.get_db") - def test_login_success_existing_user( - self, mock_get_db, mock_repo_class, mock_clerk_class, client, mock_user_entity - ): - """Login exitoso actualiza usuario existente.""" - # Arrange - mock_clerk = MagicMock() - mock_clerk.verify_token.return_value = { - "sub": "user_123", - "email": "updated@example.com", - "name": "Updated Name", - } - mock_clerk_class.return_value = mock_clerk - - mock_repo = MagicMock() - mock_repo.get_by_id.return_value = mock_user_entity # Usuario existe - mock_repo.update.return_value = mock_user_entity - mock_repo_class.return_value = mock_repo - - mock_session = MagicMock() - mock_get_db.return_value = iter([mock_session]) - - token = create_valid_token() - - # Act - response = client.post( - "/api/v1/auth/login", - headers={"Authorization": f"Bearer {token}"}, - ) - - # Assert - assert response.status_code == 200 - - @patch("src.routers.auth.ClerkClient") - @patch("src.routers.auth.get_db") - def test_login_token_expired(self, mock_get_db, mock_clerk_class, client): - """Token expirado retorna 401.""" - # Arrange - from src.external.clerk_client import ClerkTokenExpiredError - - mock_clerk = MagicMock() - mock_clerk.verify_token.side_effect = ClerkTokenExpiredError("Token expirado") - mock_clerk_class.return_value = mock_clerk - - mock_session = MagicMock() - mock_get_db.return_value = iter([mock_session]) - - token = create_expired_token() - - # Act - response = client.post( - "/api/v1/auth/login", - headers={"Authorization": f"Bearer {token}"}, - ) - - # Assert - assert response.status_code == 401 - assert "expirado" in response.json()["detail"].lower() - - @patch("src.routers.auth.ClerkClient") - @patch("src.routers.auth.get_db") - def test_login_token_invalid(self, mock_get_db, mock_clerk_class, client): - """Token inválido retorna 401.""" - # Arrange - from src.external.clerk_client import ClerkTokenInvalidError - - mock_clerk = MagicMock() - mock_clerk.verify_token.side_effect = ClerkTokenInvalidError("Token inválido") - mock_clerk_class.return_value = mock_clerk - - mock_session = MagicMock() - mock_get_db.return_value = iter([mock_session]) - - # Act - response = client.post( - "/api/v1/auth/login", - headers={"Authorization": f"Bearer invalid-token"}, - ) - - # Assert - assert response.status_code == 401 - assert "inválido" in response.json()["detail"].lower() - - def test_login_missing_token(self, client): - """Sin token retorna 401 o 403 (depende de versión FastAPI).""" - response = client.post("/api/v1/auth/login") - - # 401 en versiones nuevas de Starlette, 403 en anteriores - assert response.status_code in (401, 403) - - -class TestGetMeEndpoint: - """Tests para GET /api/v1/auth/me.""" - - @patch("src.routers.auth.ClerkClient") - def test_get_me_success(self, mock_clerk_class, client): - """Token válido retorna datos del usuario.""" - # Arrange - mock_clerk = MagicMock() - mock_clerk.verify_token.return_value = { - "user_id": "user_me", - "email": "me@example.com", - "name": "Current User", - } - mock_clerk_class.return_value = mock_clerk - - token = create_valid_token(user_id="user_me", email="me@example.com") - - # Act - response = client.get( - "/api/v1/auth/me", - headers={"Authorization": f"Bearer {token}"}, - ) - - # Assert - assert response.status_code == 200 - data = response.json() - assert data["id"] == "user_me" - assert data["email"] == "me@example.com" - - @patch("src.routers.auth.ClerkClient") - def test_get_me_token_expired(self, mock_clerk_class, client): - """Token expirado retorna 401.""" - # Arrange - from src.external.clerk_client import ClerkTokenExpiredError - - mock_clerk = MagicMock() - mock_clerk.verify_token.side_effect = ClerkTokenExpiredError("Token expirado") - mock_clerk_class.return_value = mock_clerk - - token = create_expired_token() - - # Act - response = client.get( - "/api/v1/auth/me", - headers={"Authorization": f"Bearer {token}"}, - ) - - # Assert - assert response.status_code == 401 - assert "expirado" in response.json()["detail"].lower() - - @patch("src.routers.auth.ClerkClient") - def test_get_me_token_invalid(self, mock_clerk_class, client): - """Token inválido retorna 401.""" - # Arrange - from src.external.clerk_client import ClerkTokenInvalidError - - mock_clerk = MagicMock() - mock_clerk.verify_token.side_effect = ClerkTokenInvalidError("Token inválido") - mock_clerk_class.return_value = mock_clerk - - # Act - response = client.get( - "/api/v1/auth/me", - headers={"Authorization": f"Bearer bad-token"}, - ) - - # Assert - assert response.status_code == 401 - assert "inválido" in response.json()["detail"].lower() - - def test_get_me_missing_token(self, client): - """Sin token retorna 401 o 403 (depende de versión FastAPI).""" - response = client.get("/api/v1/auth/me") - - # 401 en versiones nuevas de Starlette, 403 en anteriores - assert response.status_code in (401, 403) +"""Tests de integración para auth router.""" + +import time +from unittest.mock import MagicMock, patch + +import pytest +from fastapi.testclient import TestClient +from jose import jwt + +from src.main import app +from src.models.enums.user_role import UserRole +from src.models.user import UserEntity + +# Test secret key +TEST_SECRET_KEY = "test-secret-key-for-router-tests" + + +def create_valid_token(user_id: str = "user_123", email: str = "test@example.com") -> str: + """Genera un token JWT válido para tests.""" + now = int(time.time()) + payload = { + "sub": user_id, + "email": email, + "name": "Test User", + "exp": now + 3600, + "iat": now, + } + return jwt.encode(payload, TEST_SECRET_KEY, algorithm="HS256") + + +def create_expired_token() -> str: + """Genera un token JWT expirado.""" + now = int(time.time()) + payload = { + "sub": "user_expired", + "email": "expired@example.com", + "exp": now - 3600, # Expirado hace 1 hora + "iat": now - 7200, + } + return jwt.encode(payload, TEST_SECRET_KEY, algorithm="HS256") + + +@pytest.fixture +def client(): + """TestClient de FastAPI.""" + return TestClient(app) + + +@pytest.fixture +def mock_user_entity(): + """UserEntity mockeado.""" + entity = MagicMock(spec=UserEntity) + entity.id = "user_123" + entity.email = "test@example.com" + entity.name = "Test User" + entity.role = UserRole.DEVELOPER + return entity + + +class TestLoginEndpoint: + """Tests para POST /api/v1/auth/login.""" + + @patch("src.routers.auth.ClerkClient") + @patch("src.routers.auth.UserRepository") + @patch("src.routers.auth.get_db") + def test_login_success_new_user( + self, mock_get_db, mock_repo_class, mock_clerk_class, client, mock_user_entity + ): + """Login exitoso crea usuario nuevo.""" + # Arrange + mock_clerk = MagicMock() + mock_clerk.verify_token.return_value = { + "sub": "user_123", + "email": "test@example.com", + "name": "Test User", + } + mock_clerk_class.return_value = mock_clerk + + mock_repo = MagicMock() + mock_repo.get_by_id.return_value = None # Usuario no existe + mock_repo.create.return_value = mock_user_entity + mock_repo_class.return_value = mock_repo + + mock_session = MagicMock() + mock_get_db.return_value = iter([mock_session]) + + token = create_valid_token() + + # Act + response = client.post( + "/api/v1/auth/login", + headers={"Authorization": f"Bearer {token}"}, + ) + + # Assert + assert response.status_code == 200 + data = response.json() + assert data["id"] == "user_123" + assert data["email"] == "test@example.com" + + @patch("src.routers.auth.ClerkClient") + @patch("src.routers.auth.UserRepository") + @patch("src.routers.auth.get_db") + def test_login_success_existing_user( + self, mock_get_db, mock_repo_class, mock_clerk_class, client, mock_user_entity + ): + """Login exitoso actualiza usuario existente.""" + # Arrange + mock_clerk = MagicMock() + mock_clerk.verify_token.return_value = { + "sub": "user_123", + "email": "updated@example.com", + "name": "Updated Name", + } + mock_clerk_class.return_value = mock_clerk + + mock_repo = MagicMock() + mock_repo.get_by_id.return_value = mock_user_entity # Usuario existe + mock_repo.update.return_value = mock_user_entity + mock_repo_class.return_value = mock_repo + + mock_session = MagicMock() + mock_get_db.return_value = iter([mock_session]) + + token = create_valid_token() + + # Act + response = client.post( + "/api/v1/auth/login", + headers={"Authorization": f"Bearer {token}"}, + ) + + # Assert + assert response.status_code == 200 + + @patch("src.routers.auth.ClerkClient") + @patch("src.routers.auth.get_db") + def test_login_token_expired(self, mock_get_db, mock_clerk_class, client): + """Token expirado retorna 401.""" + # Arrange + from src.external.clerk_client import ClerkTokenExpiredError + + mock_clerk = MagicMock() + mock_clerk.verify_token.side_effect = ClerkTokenExpiredError("Token expirado") + mock_clerk_class.return_value = mock_clerk + + mock_session = MagicMock() + mock_get_db.return_value = iter([mock_session]) + + token = create_expired_token() + + # Act + response = client.post( + "/api/v1/auth/login", + headers={"Authorization": f"Bearer {token}"}, + ) + + # Assert + assert response.status_code == 401 + assert "expirado" in response.json()["detail"].lower() + + @patch("src.routers.auth.ClerkClient") + @patch("src.routers.auth.get_db") + def test_login_token_invalid(self, mock_get_db, mock_clerk_class, client): + """Token inválido retorna 401.""" + # Arrange + from src.external.clerk_client import ClerkTokenInvalidError + + mock_clerk = MagicMock() + mock_clerk.verify_token.side_effect = ClerkTokenInvalidError("Token inválido") + mock_clerk_class.return_value = mock_clerk + + mock_session = MagicMock() + mock_get_db.return_value = iter([mock_session]) + + # Act + response = client.post( + "/api/v1/auth/login", + headers={"Authorization": f"Bearer invalid-token"}, + ) + + # Assert + assert response.status_code == 401 + assert "inválido" in response.json()["detail"].lower() + + def test_login_missing_token(self, client): + """Sin token retorna 401 o 403 (depende de versión FastAPI).""" + response = client.post("/api/v1/auth/login") + + # 401 en versiones nuevas de Starlette, 403 en anteriores + assert response.status_code in (401, 403) + + +class TestGetMeEndpoint: + """Tests para GET /api/v1/auth/me.""" + + @patch("src.routers.auth.ClerkClient") + def test_get_me_success(self, mock_clerk_class, client): + """Token válido retorna datos del usuario.""" + # Arrange + mock_clerk = MagicMock() + mock_clerk.verify_token.return_value = { + "user_id": "user_me", + "email": "me@example.com", + "name": "Current User", + } + mock_clerk_class.return_value = mock_clerk + + token = create_valid_token(user_id="user_me", email="me@example.com") + + # Act + response = client.get( + "/api/v1/auth/me", + headers={"Authorization": f"Bearer {token}"}, + ) + + # Assert + assert response.status_code == 200 + data = response.json() + assert data["id"] == "user_me" + assert data["email"] == "me@example.com" + + @patch("src.routers.auth.ClerkClient") + def test_get_me_token_expired(self, mock_clerk_class, client): + """Token expirado retorna 401.""" + # Arrange + from src.external.clerk_client import ClerkTokenExpiredError + + mock_clerk = MagicMock() + mock_clerk.verify_token.side_effect = ClerkTokenExpiredError("Token expirado") + mock_clerk_class.return_value = mock_clerk + + token = create_expired_token() + + # Act + response = client.get( + "/api/v1/auth/me", + headers={"Authorization": f"Bearer {token}"}, + ) + + # Assert + assert response.status_code == 401 + assert "expirado" in response.json()["detail"].lower() + + @patch("src.routers.auth.ClerkClient") + def test_get_me_token_invalid(self, mock_clerk_class, client): + """Token inválido retorna 401.""" + # Arrange + from src.external.clerk_client import ClerkTokenInvalidError + + mock_clerk = MagicMock() + mock_clerk.verify_token.side_effect = ClerkTokenInvalidError("Token inválido") + mock_clerk_class.return_value = mock_clerk + + # Act + response = client.get( + "/api/v1/auth/me", + headers={"Authorization": f"Bearer bad-token"}, + ) + + # Assert + assert response.status_code == 401 + assert "inválido" in response.json()["detail"].lower() + + def test_get_me_missing_token(self, client): + """Sin token retorna 401 o 403 (depende de versión FastAPI).""" + response = client.get("/api/v1/auth/me") + + # 401 en versiones nuevas de Starlette, 403 en anteriores + assert response.status_code in (401, 403) diff --git a/backend/tests/integration/test_quality_agent_integration.py b/backend/tests/integration/test_quality_agent_integration.py index 71c714c..aa9c94d 100644 --- a/backend/tests/integration/test_quality_agent_integration.py +++ b/backend/tests/integration/test_quality_agent_integration.py @@ -1,211 +1,211 @@ -""" -Integration tests for QualityAgent. - -Tests QualityAgent with realistic code samples -and verifies end-to-end behavior for code quality metrics. -""" - -import pytest - -from src.agents.quality_agent import QualityAgent -from src.schemas.analysis import AnalysisContext -from src.schemas.finding import Severity - - -class TestQualityAgentIntegration: - """Integration tests for QualityAgent with realistic code.""" - - @pytest.fixture - def agent(self): - """Create QualityAgent instance.""" - return QualityAgent() - - @pytest.fixture - def poor_quality_code(self): - """Realistic poor quality code with multiple issues.""" - return """ -def complex_and_long_function(data): - # High Cyclomatic Complexity - result = [] - if data: - for item in data: - if item.get('active'): - if item.get('type') == 'A': - if item.get('value') > 10: - result.append(item) - else: - print("Value too low") - elif item.get('type') == 'B': - if item.get('value') > 20: - result.append(item) - else: - if item.get('force'): - result.append(item) - else: - if item.get('retry'): - process_retry(item) - elif item.get('fail'): - log_failure(item) - elif item.get('warn'): - log_warning(item) - - # Code Duplication Block 1 - x = 0 - y = 0 - z = 0 - for i in range(10): - x += i - y += i * 2 - z += i * 3 - print(f"Result: {x}, {y}, {z}") - - # Code Duplication Block 2 (Identical to Block 1) - x = 0 - y = 0 - z = 0 - for i in range(10): - x += i - y += i * 2 - z += i * 3 - print(f"Result: {x}, {y}, {z}") - - return result - -def another_complex_function(x, y): - # Another complex function to ensure multiple findings - if x > 0: - if y > 0: - return x + y - else: - return x - y - else: - if y > 0: - return y - x - else: - return -x - y -""" - - def test_comprehensive_quality_detection(self, agent, poor_quality_code): - """Test detection of all quality issues in realistic code.""" - context = AnalysisContext(code_content=poor_quality_code, filename="legacy_module.py") - - findings = agent.analyze(context) - - # Should detect multiple quality issues - # 1. Complexity (complex_and_long_function) - # 2. Duplication - # 3. Maintainability (likely low due to complexity) - assert len(findings) >= 3 - - # Verify each issue type is detected - issue_types = {f.issue_type for f in findings} - - # Note: Depending on the exact complexity score, it might be medium or high - assert "quality/cyclomatic-complexity" in issue_types - assert "quality/duplication" in issue_types - - # Verify severity distribution - # Complexity > 10 is Medium, > 20 is High. The sample code is moderately complex. - # Duplication is Medium. - - # Verify findings have suggestions - for finding in findings: - assert finding.suggestion is not None - assert len(finding.suggestion) > 5 - - # Verify findings are sorted by severity - severities = [f.severity.value for f in findings] - expected_order = ["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"] - - for i in range(len(severities) - 1): - assert expected_order.index(severities[i]) <= expected_order.index(severities[i + 1]) - - def test_clean_code_no_false_positives(self, agent): - """Test that clean code doesn't generate false positives.""" - clean_code = ''' -def calculate_total(items: list) -> float: - """Calculate total price of items.""" - return sum(item.price for item in items) - -def filter_active_items(items: list) -> list: - """Return only active items.""" - return [item for item in items if item.is_active] - -class UserProcessor: - def __init__(self, user_service): - self.user_service = user_service - - def process(self, user_id: int): - user = self.user_service.get_user(user_id) - if user and user.is_active: - return self._handle_active_user(user) - return None - - def _handle_active_user(self, user): - return {"status": "processed", "id": user.id} -''' - context = AnalysisContext(code_content=clean_code, filename="clean_module.py") - - findings = agent.analyze(context) - - # Should have 0 findings for clean code - # Complexity is low, functions are short, no duplication - assert len(findings) == 0 - - def test_long_function_detection(self, agent): - """Test specifically for function length.""" - # Generate a function with > 100 lines - long_code = "def very_long_function():\n" - for i in range(105): - long_code += f" var_{i} = {i}\n" - long_code += " return var_100\n" - - context = AnalysisContext(code_content=long_code, filename="long_func.py") - findings = agent.analyze(context) - - assert len(findings) >= 1 - assert findings[0].issue_type == "quality/function-length" - assert "demasiado larga" in findings[0].message.lower() - - def test_analysis_context_metadata_preserved(self, agent): - """Test that analysis context metadata is preserved in findings.""" - # Generate code with high complexity to trigger a finding - bad_code = "def complex_func(x):\n" - for i in range(20): - bad_code += f" if x == {i}: return {i}\n" - - context = AnalysisContext(code_content=bad_code, filename="complex.py") - context.add_metadata("user_id", "dev_user") - - findings = agent.analyze(context) - assert len(findings) > 0 - for finding in findings: - assert finding.agent_name == "QualityAgent" - - def test_large_file_performance(self, agent): - """Test QualityAgent performance with larger file.""" - # Generate code with 50 simple functions - large_code = "" - for i in range(50): - large_code += f""" -def function_{i}(data): - return data * {i} -""" - # Add a duplicated block at the end - dupe_block = """ -def duplicated_logic(): - x = 1 - y = 2 - z = 3 - return x + y + z -""" - large_code += dupe_block - large_code += dupe_block # Duplication - - context = AnalysisContext(code_content=large_code, filename="large_module.py") - - findings = agent.analyze(context) - - # Should detect the duplication - dupe_findings = [f for f in findings if f.issue_type == "quality/duplication"] - assert len(dupe_findings) >= 1 +""" +Integration tests for QualityAgent. + +Tests QualityAgent with realistic code samples +and verifies end-to-end behavior for code quality metrics. +""" + +import pytest + +from src.agents.quality_agent import QualityAgent +from src.schemas.analysis import AnalysisContext +from src.schemas.finding import Severity + + +class TestQualityAgentIntegration: + """Integration tests for QualityAgent with realistic code.""" + + @pytest.fixture + def agent(self): + """Create QualityAgent instance.""" + return QualityAgent() + + @pytest.fixture + def poor_quality_code(self): + """Realistic poor quality code with multiple issues.""" + return """ +def complex_and_long_function(data): + # High Cyclomatic Complexity + result = [] + if data: + for item in data: + if item.get('active'): + if item.get('type') == 'A': + if item.get('value') > 10: + result.append(item) + else: + print("Value too low") + elif item.get('type') == 'B': + if item.get('value') > 20: + result.append(item) + else: + if item.get('force'): + result.append(item) + else: + if item.get('retry'): + process_retry(item) + elif item.get('fail'): + log_failure(item) + elif item.get('warn'): + log_warning(item) + + # Code Duplication Block 1 + x = 0 + y = 0 + z = 0 + for i in range(10): + x += i + y += i * 2 + z += i * 3 + print(f"Result: {x}, {y}, {z}") + + # Code Duplication Block 2 (Identical to Block 1) + x = 0 + y = 0 + z = 0 + for i in range(10): + x += i + y += i * 2 + z += i * 3 + print(f"Result: {x}, {y}, {z}") + + return result + +def another_complex_function(x, y): + # Another complex function to ensure multiple findings + if x > 0: + if y > 0: + return x + y + else: + return x - y + else: + if y > 0: + return y - x + else: + return -x - y +""" + + def test_comprehensive_quality_detection(self, agent, poor_quality_code): + """Test detection of all quality issues in realistic code.""" + context = AnalysisContext(code_content=poor_quality_code, filename="legacy_module.py") + + findings = agent.analyze(context) + + # Should detect multiple quality issues + # 1. Complexity (complex_and_long_function) + # 2. Duplication + # 3. Maintainability (likely low due to complexity) + assert len(findings) >= 3 + + # Verify each issue type is detected + issue_types = {f.issue_type for f in findings} + + # Note: Depending on the exact complexity score, it might be medium or high + assert "quality/cyclomatic-complexity" in issue_types + assert "quality/duplication" in issue_types + + # Verify severity distribution + # Complexity > 10 is Medium, > 20 is High. The sample code is moderately complex. + # Duplication is Medium. + + # Verify findings have suggestions + for finding in findings: + assert finding.suggestion is not None + assert len(finding.suggestion) > 5 + + # Verify findings are sorted by severity + severities = [f.severity.value for f in findings] + expected_order = ["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"] + + for i in range(len(severities) - 1): + assert expected_order.index(severities[i]) <= expected_order.index(severities[i + 1]) + + def test_clean_code_no_false_positives(self, agent): + """Test that clean code doesn't generate false positives.""" + clean_code = ''' +def calculate_total(items: list) -> float: + """Calculate total price of items.""" + return sum(item.price for item in items) + +def filter_active_items(items: list) -> list: + """Return only active items.""" + return [item for item in items if item.is_active] + +class UserProcessor: + def __init__(self, user_service): + self.user_service = user_service + + def process(self, user_id: int): + user = self.user_service.get_user(user_id) + if user and user.is_active: + return self._handle_active_user(user) + return None + + def _handle_active_user(self, user): + return {"status": "processed", "id": user.id} +''' + context = AnalysisContext(code_content=clean_code, filename="clean_module.py") + + findings = agent.analyze(context) + + # Should have 0 findings for clean code + # Complexity is low, functions are short, no duplication + assert len(findings) == 0 + + def test_long_function_detection(self, agent): + """Test specifically for function length.""" + # Generate a function with > 100 lines + long_code = "def very_long_function():\n" + for i in range(105): + long_code += f" var_{i} = {i}\n" + long_code += " return var_100\n" + + context = AnalysisContext(code_content=long_code, filename="long_func.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + assert findings[0].issue_type == "quality/function-length" + assert "demasiado larga" in findings[0].message.lower() + + def test_analysis_context_metadata_preserved(self, agent): + """Test that analysis context metadata is preserved in findings.""" + # Generate code with high complexity to trigger a finding + bad_code = "def complex_func(x):\n" + for i in range(20): + bad_code += f" if x == {i}: return {i}\n" + + context = AnalysisContext(code_content=bad_code, filename="complex.py") + context.add_metadata("user_id", "dev_user") + + findings = agent.analyze(context) + assert len(findings) > 0 + for finding in findings: + assert finding.agent_name == "QualityAgent" + + def test_large_file_performance(self, agent): + """Test QualityAgent performance with larger file.""" + # Generate code with 50 simple functions + large_code = "" + for i in range(50): + large_code += f""" +def function_{i}(data): + return data * {i} +""" + # Add a duplicated block at the end + dupe_block = """ +def duplicated_logic(): + x = 1 + y = 2 + z = 3 + return x + y + z +""" + large_code += dupe_block + large_code += dupe_block # Duplication + + context = AnalysisContext(code_content=large_code, filename="large_module.py") + + findings = agent.analyze(context) + + # Should detect the duplication + dupe_findings = [f for f in findings if f.issue_type == "quality/duplication"] + assert len(dupe_findings) >= 1 diff --git a/backend/tests/integration/test_security_agent_integration.py b/backend/tests/integration/test_security_agent_integration.py index 9632ff5..1f7305b 100644 --- a/backend/tests/integration/test_security_agent_integration.py +++ b/backend/tests/integration/test_security_agent_integration.py @@ -1,225 +1,225 @@ -""" -Integration tests for SecurityAgent. - -Tests SecurityAgent with realistic vulnerable code samples -and verifies end-to-end behavior. -""" - -import pytest - -from src.agents.security_agent import SecurityAgent -from src.schemas.analysis import AnalysisContext -from src.schemas.finding import Severity - - -class TestSecurityAgentIntegration: - """Integration tests for SecurityAgent with realistic code.""" - - @pytest.fixture - def agent(self): - """Create SecurityAgent instance.""" - return SecurityAgent() - - @pytest.fixture - def vulnerable_web_app_code(self): - """Realistic vulnerable web application code.""" - return """ -import hashlib -import pickle -from flask import Flask, request - -app = Flask(__name__) - -# Hardcoded credentials -DB_PASSWORD = "MyDatabasePass123" -API_KEY = "sk_live_abc123xyz789" - -@app.route('/login', methods=['POST']) -def login(): - username = request.form['username'] - password = request.form['password'] - - # SQL injection vulnerability - query = f"SELECT * FROM users WHERE username='{username}' AND password='{password}'" - cursor.execute(query) - user = cursor.fetchone() - - if user: - # Weak hashing - session_token = hashlib.md5(username.encode()).hexdigest() - return {'token': session_token} - - return {'error': 'Invalid credentials'}, 401 - -@app.route('/execute', methods=['POST']) -def execute_code(): - code = request.form['code'] - - # Dangerous function - arbitrary code execution - result = eval(code) - - return {'result': result} - -@app.route('/load_data', methods=['POST']) -def load_data(): - data = request.form['data'] - - # Unsafe deserialization - obj = pickle.loads(data.encode()) - - return {'loaded': str(obj)} -""" - - def test_comprehensive_vulnerability_detection(self, agent, vulnerable_web_app_code): - """Test detection of all vulnerability types in realistic code.""" - context = AnalysisContext(code_content=vulnerable_web_app_code, filename="app.py") - - findings = agent.analyze(context) - - # Should detect multiple vulnerabilities - assert len(findings) >= 5 - - # Verify each vulnerability type is detected - issue_types = {f.issue_type for f in findings} - assert "hardcoded_credentials" in issue_types - assert "sql_injection" in issue_types - assert "weak_cryptography" in issue_types - assert "dangerous_function" in issue_types - - # Verify severity distribution - critical_count = sum(1 for f in findings if f.is_critical) - high_count = sum(1 for f in findings if f.is_high_or_critical) - - assert critical_count >= 2 # Password, API key, eval - assert high_count >= 4 # Including SQL injection - - # Verify findings have suggestions - for finding in findings: - assert finding.suggestion is not None - assert len(finding.suggestion) > 10 - - # Verify findings are sorted by severity - severities = [f.severity.value for f in findings] - expected_order = ["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"] - - for i in range(len(severities) - 1): - assert expected_order.index(severities[i]) <= expected_order.index(severities[i + 1]) - - def test_secure_code_no_false_positives(self, agent): - """Test that secure code doesn't generate false positives.""" - secure_code = """ -import os -import hashlib -from sqlalchemy import create_engine, text - -# Secure credential handling -DB_PASSWORD = os.getenv('DB_PASSWORD') -API_KEY = os.getenv('API_KEY') - -def authenticate_user(username: str, password: str) -> bool: - # Parameterized query - safe from SQL injection - query = text('SELECT * FROM users WHERE username=:username') - result = db.execute(query, {'username': username}) - user = result.fetchone() - - if user: - # Strong hashing with salt - hashed = hashlib.sha256( - (password + user['salt']).encode() - ).hexdigest() - return hashed == user['password_hash'] - - return False - -def process_data(data: dict) -> dict: - # Safe data processing - no eval or exec - processed = { - 'id': data.get('id'), - 'name': data.get('name'), - 'value': data.get('value', 0) * 2 - } - return processed -""" - context = AnalysisContext(code_content=secure_code, filename="secure_app.py") - - findings = agent.analyze(context) - - # Should have 0 findings for secure code - assert len(findings) == 0 - - def test_partial_vulnerability_file(self, agent): - """Test file with mix of secure and vulnerable code.""" - mixed_code = """ -import hashlib - -# Secure part -def hash_file(filepath: str) -> str: - with open(filepath, 'rb') as f: - return hashlib.sha256(f.read()).hexdigest() - -# Vulnerable part -def legacy_hash(data: str) -> str: - # Old code - needs refactoring - return hashlib.md5(data.encode()).hexdigest() - -# Secure part -class Config: - DATABASE_URL = os.getenv('DATABASE_URL') - SECRET_KEY = os.getenv('SECRET_KEY') -""" - context = AnalysisContext(code_content=mixed_code, filename="utils.py") - - findings = agent.analyze(context) - - # Should only detect MD5 usage - assert len(findings) == 1 - assert findings[0].issue_type == "weak_cryptography" - assert "md5" in findings[0].message.lower() - assert findings[0].severity == Severity.MEDIUM - - def test_analysis_context_metadata_preserved(self, agent): - """Test that analysis context metadata is preserved in findings.""" - code = "result = eval(user_input)" - context = AnalysisContext(code_content=code, filename="vulnerable_script.py") - context.add_metadata("user_id", "test_user_123") - context.add_metadata("project", "SecurityTest") - - findings = agent.analyze(context) - - assert len(findings) >= 1 - # Verify agent name is set correctly - for finding in findings: - assert finding.agent_name == "SecurityAgent" - assert finding.detected_at is not None - - def test_large_file_performance(self, agent): - """Test SecurityAgent performance with larger file.""" - # Generate code with 100 functions - large_code = """ -import hashlib - -""" - for i in range(100): - large_code += f""" -def function_{i}(data): - # Safe function - return hashlib.sha256(data.encode()).hexdigest() - -""" - - # Add one vulnerability at the end - large_code += """ -# Single vulnerability -password = "HardcodedPassword123" -""" - - context = AnalysisContext(code_content=large_code, filename="large_module.py") - - findings = agent.analyze(context) - - # Should detect the single vulnerability - assert len(findings) == 1 - assert findings[0].issue_type == "hardcoded_credentials" - - # Verify finding points to correct line - assert "password" in findings[0].message.lower() +""" +Integration tests for SecurityAgent. + +Tests SecurityAgent with realistic vulnerable code samples +and verifies end-to-end behavior. +""" + +import pytest + +from src.agents.security_agent import SecurityAgent +from src.schemas.analysis import AnalysisContext +from src.schemas.finding import Severity + + +class TestSecurityAgentIntegration: + """Integration tests for SecurityAgent with realistic code.""" + + @pytest.fixture + def agent(self): + """Create SecurityAgent instance.""" + return SecurityAgent() + + @pytest.fixture + def vulnerable_web_app_code(self): + """Realistic vulnerable web application code.""" + return """ +import hashlib +import pickle +from flask import Flask, request + +app = Flask(__name__) + +# Hardcoded credentials +DB_PASSWORD = "MyDatabasePass123" +API_KEY = "sk_live_abc123xyz789" + +@app.route('/login', methods=['POST']) +def login(): + username = request.form['username'] + password = request.form['password'] + + # SQL injection vulnerability + query = f"SELECT * FROM users WHERE username='{username}' AND password='{password}'" + cursor.execute(query) + user = cursor.fetchone() + + if user: + # Weak hashing + session_token = hashlib.md5(username.encode()).hexdigest() + return {'token': session_token} + + return {'error': 'Invalid credentials'}, 401 + +@app.route('/execute', methods=['POST']) +def execute_code(): + code = request.form['code'] + + # Dangerous function - arbitrary code execution + result = eval(code) + + return {'result': result} + +@app.route('/load_data', methods=['POST']) +def load_data(): + data = request.form['data'] + + # Unsafe deserialization + obj = pickle.loads(data.encode()) + + return {'loaded': str(obj)} +""" + + def test_comprehensive_vulnerability_detection(self, agent, vulnerable_web_app_code): + """Test detection of all vulnerability types in realistic code.""" + context = AnalysisContext(code_content=vulnerable_web_app_code, filename="app.py") + + findings = agent.analyze(context) + + # Should detect multiple vulnerabilities + assert len(findings) >= 5 + + # Verify each vulnerability type is detected + issue_types = {f.issue_type for f in findings} + assert "hardcoded_credentials" in issue_types + assert "sql_injection" in issue_types + assert "weak_cryptography" in issue_types + assert "dangerous_function" in issue_types + + # Verify severity distribution + critical_count = sum(1 for f in findings if f.is_critical) + high_count = sum(1 for f in findings if f.is_high_or_critical) + + assert critical_count >= 2 # Password, API key, eval + assert high_count >= 4 # Including SQL injection + + # Verify findings have suggestions + for finding in findings: + assert finding.suggestion is not None + assert len(finding.suggestion) > 10 + + # Verify findings are sorted by severity + severities = [f.severity.value for f in findings] + expected_order = ["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"] + + for i in range(len(severities) - 1): + assert expected_order.index(severities[i]) <= expected_order.index(severities[i + 1]) + + def test_secure_code_no_false_positives(self, agent): + """Test that secure code doesn't generate false positives.""" + secure_code = """ +import os +import hashlib +from sqlalchemy import create_engine, text + +# Secure credential handling +DB_PASSWORD = os.getenv('DB_PASSWORD') +API_KEY = os.getenv('API_KEY') + +def authenticate_user(username: str, password: str) -> bool: + # Parameterized query - safe from SQL injection + query = text('SELECT * FROM users WHERE username=:username') + result = db.execute(query, {'username': username}) + user = result.fetchone() + + if user: + # Strong hashing with salt + hashed = hashlib.sha256( + (password + user['salt']).encode() + ).hexdigest() + return hashed == user['password_hash'] + + return False + +def process_data(data: dict) -> dict: + # Safe data processing - no eval or exec + processed = { + 'id': data.get('id'), + 'name': data.get('name'), + 'value': data.get('value', 0) * 2 + } + return processed +""" + context = AnalysisContext(code_content=secure_code, filename="secure_app.py") + + findings = agent.analyze(context) + + # Should have 0 findings for secure code + assert len(findings) == 0 + + def test_partial_vulnerability_file(self, agent): + """Test file with mix of secure and vulnerable code.""" + mixed_code = """ +import hashlib + +# Secure part +def hash_file(filepath: str) -> str: + with open(filepath, 'rb') as f: + return hashlib.sha256(f.read()).hexdigest() + +# Vulnerable part +def legacy_hash(data: str) -> str: + # Old code - needs refactoring + return hashlib.md5(data.encode()).hexdigest() + +# Secure part +class Config: + DATABASE_URL = os.getenv('DATABASE_URL') + SECRET_KEY = os.getenv('SECRET_KEY') +""" + context = AnalysisContext(code_content=mixed_code, filename="utils.py") + + findings = agent.analyze(context) + + # Should only detect MD5 usage + assert len(findings) == 1 + assert findings[0].issue_type == "weak_cryptography" + assert "md5" in findings[0].message.lower() + assert findings[0].severity == Severity.MEDIUM + + def test_analysis_context_metadata_preserved(self, agent): + """Test that analysis context metadata is preserved in findings.""" + code = "result = eval(user_input)" + context = AnalysisContext(code_content=code, filename="vulnerable_script.py") + context.add_metadata("user_id", "test_user_123") + context.add_metadata("project", "SecurityTest") + + findings = agent.analyze(context) + + assert len(findings) >= 1 + # Verify agent name is set correctly + for finding in findings: + assert finding.agent_name == "SecurityAgent" + assert finding.detected_at is not None + + def test_large_file_performance(self, agent): + """Test SecurityAgent performance with larger file.""" + # Generate code with 100 functions + large_code = """ +import hashlib + +""" + for i in range(100): + large_code += f""" +def function_{i}(data): + # Safe function + return hashlib.sha256(data.encode()).hexdigest() + +""" + + # Add one vulnerability at the end + large_code += """ +# Single vulnerability +password = "HardcodedPassword123" +""" + + context = AnalysisContext(code_content=large_code, filename="large_module.py") + + findings = agent.analyze(context) + + # Should detect the single vulnerability + assert len(findings) == 1 + assert findings[0].issue_type == "hardcoded_credentials" + + # Verify finding points to correct line + assert "password" in findings[0].message.lower() diff --git a/backend/tests/integration/test_style_agent_integration.py b/backend/tests/integration/test_style_agent_integration.py index 1612b9c..2ac502a 100644 --- a/backend/tests/integration/test_style_agent_integration.py +++ b/backend/tests/integration/test_style_agent_integration.py @@ -1,343 +1,343 @@ -"""Tests de integración para StyleAgent. - -Estos tests verifican el funcionamiento del StyleAgent con código -realista que contiene múltiples tipos de problemas de estilo. -""" - -from typing import Any, Dict - -import pytest - -from src.agents.style_agent import StyleAgent -from src.core.events.event_bus import EventBus -from src.core.events.observers import EventObserver -from src.schemas.analysis import AnalysisContext -from src.schemas.finding import Severity - - -class MockEventObserver(EventObserver): - """Observer de prueba para capturar eventos.""" - - def __init__(self): - self.events_received = [] - - def on_event(self, event_type: str, data: Dict[str, Any]) -> None: - """Captura eventos recibidos.""" - self.events_received.append((event_type, data)) - - -class TestStyleAgentComprehensiveAnalysis: - """Tests de análisis completo con múltiples problemas.""" - - def test_comprehensive_style_analysis(self): - """StyleAgent detecta múltiples tipos de problemas en código realista.""" - code = """ -import os -import sys -import os - -def badFunction(): - x = "Esta es una linea que tiene mucho codigo y excede el limite de 88 caracteres establecido por PEP 8" - return x - -class badclass: - def anotherBadMethod(): - pass - -""" - context = AnalysisContext(code_content=code, filename="test_file.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - # Debe detectar varios tipos de problemas - assert len(findings) >= 3, "Debe detectar múltiples problemas" - - # Verificar categorías detectadas - categories = {f.issue_type for f in findings} - - # Al menos algunas de estas categorías deben estar - expected_categories = {"style/pep8", "style/naming", "style/documentation", "style/imports"} - found_expected = categories.intersection(expected_categories) - assert ( - len(found_expected) >= 2 - ), f"Debe detectar al menos 2 categorías, encontró: {categories}" - - def test_clean_code_minimal_findings(self): - """Código limpio produce mínimos findings.""" - code = '''"""Módulo de prueba bien documentado.""" - - -def my_function(value: int) -> int: - """Retorna el valor duplicado. - - Args: - value: Valor a duplicar. - - Returns: - El valor multiplicado por 2. - """ - return value * 2 - - -class MyClass: - """Clase de prueba bien documentada.""" - - def __init__(self): - """Inicializa la clase.""" - self.value = 0 - - def get_value(self) -> int: - """Retorna el valor actual.""" - return self.value -''' - context = AnalysisContext(code_content=code, filename="clean_code.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - # Código limpio debería tener pocos o ningún finding - assert ( - len(findings) <= 3 - ), f"Código limpio no debería tener muchos findings: {len(findings)}" - - def test_findings_contain_required_fields(self): - """Todos los findings tienen los campos requeridos.""" - code = """ -def myBadFunc(): - x = 1 -""" - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - for finding in findings: - # Campos requeridos - assert finding.message is not None - assert finding.line_number is not None - assert finding.issue_type is not None - assert finding.severity is not None - assert finding.agent_name is not None - - def test_agent_name_is_set_correctly(self): - """El nombre del agente está correcto en todos los findings.""" - code = """ -def badFunc(): - pass -""" - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - for finding in findings: - assert finding.agent_name == "StyleAgent" - - -class TestStyleAgentEventLifecycle: - """Tests del ciclo de vida completo con eventos.""" - - def test_full_event_lifecycle(self): - """Verifica el ciclo completo: STARTED -> (análisis) -> COMPLETED.""" - event_bus = EventBus() - event_bus.clear() # Limpiar observers previos - observer = MockEventObserver() - event_bus.subscribe(observer) - - agent = StyleAgent() - agent.event_bus = event_bus - - code = "x = 1" - context = AnalysisContext(code_content=code, filename="test.py") - agent.analyze(context) - - # Extraer tipos de eventos - event_types = [e[0] for e in observer.events_received] - - assert "AGENT_STARTED" in event_types, "Debe emitir AGENT_STARTED" - assert "AGENT_COMPLETED" in event_types, "Debe emitir AGENT_COMPLETED" - assert "AGENT_FAILED" not in event_types, "No debe emitir AGENT_FAILED en caso exitoso" - - -class TestStyleAgentWithRealWorldPatterns: - """Tests con patrones de código del mundo real.""" - - def test_flask_route_pattern(self): - """Analiza patrón típico de ruta Flask.""" - code = ''' -from flask import Flask, jsonify - -app = Flask(__name__) - - -@app.route("/api/users") -def get_users(): - """Retorna lista de usuarios.""" - return jsonify({"users": []}) -''' - context = AnalysisContext(code_content=code, filename="routes.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - # Este código es bastante limpio, pocos findings esperados - assert isinstance(findings, list) - - def test_dataclass_pattern(self): - """Analiza patrón de dataclass.""" - code = ''' -from dataclasses import dataclass -from typing import Optional - - -@dataclass -class User: - """Representa un usuario del sistema.""" - - id: int - name: str - email: str - age: Optional[int] = None -''' - context = AnalysisContext(code_content=code, filename="models.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - # Código limpio, mínimos findings - assert isinstance(findings, list) - - def test_test_file_pattern(self): - """Analiza patrón típico de archivo de tests.""" - code = ''' -import pytest -from mymodule import MyClass - - -class TestMyClass: - """Tests para MyClass.""" - - def test_initialization(self): - """Verifica inicialización correcta.""" - obj = MyClass() - assert obj is not None - - def test_method_call(self): - """Verifica llamada a método.""" - obj = MyClass() - result = obj.do_something() - assert result == "expected" -''' - context = AnalysisContext(code_content=code, filename="test_mymodule.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - # Tests bien escritos, pocos findings - assert isinstance(findings, list) - - -class TestStyleAgentSeverityLevels: - """Tests para niveles de severidad apropiados.""" - - def test_severity_levels_are_valid(self): - """Todas las severidades son valores válidos del enum.""" - code = """ -def badFunc(): - x = "linea muy larga " + "a" * 100 -""" - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - valid_severities = {Severity.LOW, Severity.MEDIUM, Severity.HIGH, Severity.CRITICAL} - - for finding in findings: - assert finding.severity in valid_severities, f"Severidad inválida: {finding.severity}" - - def test_naming_issues_are_medium_or_lower(self): - """Problemas de naming son MEDIUM o menor severidad.""" - code = """ -def badFunction(): - pass -""" - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - naming_findings = [f for f in findings if f.issue_type == "style/naming"] - - for f in naming_findings: - assert f.severity in {Severity.LOW, Severity.MEDIUM} - - -class TestStyleAgentEdgeCases: - """Tests de casos borde.""" - - def test_unicode_content(self): - """Maneja contenido con caracteres unicode.""" - code = ''' -def greet(): - """Saluda en español.""" - return "¡Hola, cómo estás! 你好 🎉" -''' - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - # No debe crashear - assert isinstance(findings, list) - - def test_very_long_file(self): - """Maneja archivos largos.""" - # Generar código con 500 funciones - lines = ["# Generated file\n"] - for i in range(100): - lines.append( - f''' -def function_{i}(): - """Función número {i}.""" - return {i} -''' - ) - code = "\n".join(lines) - - context = AnalysisContext(code_content=code, filename="large_file.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - # No debe crashear ni tomar demasiado tiempo - assert isinstance(findings, list) - - def test_only_comments(self): - """Maneja archivo con solo comentarios.""" - code = """ -# Este es un comentario -# Otro comentario -# Más comentarios -""" - context = AnalysisContext(code_content=code, filename="comments.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - assert isinstance(findings, list) - - def test_only_imports(self): - """Maneja archivo con solo imports.""" - code = """ -import os -import sys -from typing import List, Dict -""" - context = AnalysisContext(code_content=code, filename="imports.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - assert isinstance(findings, list) +"""Tests de integración para StyleAgent. + +Estos tests verifican el funcionamiento del StyleAgent con código +realista que contiene múltiples tipos de problemas de estilo. +""" + +from typing import Any, Dict + +import pytest + +from src.agents.style_agent import StyleAgent +from src.core.events.event_bus import EventBus +from src.core.events.observers import EventObserver +from src.schemas.analysis import AnalysisContext +from src.schemas.finding import Severity + + +class MockEventObserver(EventObserver): + """Observer de prueba para capturar eventos.""" + + def __init__(self): + self.events_received = [] + + def on_event(self, event_type: str, data: Dict[str, Any]) -> None: + """Captura eventos recibidos.""" + self.events_received.append((event_type, data)) + + +class TestStyleAgentComprehensiveAnalysis: + """Tests de análisis completo con múltiples problemas.""" + + def test_comprehensive_style_analysis(self): + """StyleAgent detecta múltiples tipos de problemas en código realista.""" + code = """ +import os +import sys +import os + +def badFunction(): + x = "Esta es una linea que tiene mucho codigo y excede el limite de 88 caracteres establecido por PEP 8" + return x + +class badclass: + def anotherBadMethod(): + pass + +""" + context = AnalysisContext(code_content=code, filename="test_file.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + # Debe detectar varios tipos de problemas + assert len(findings) >= 3, "Debe detectar múltiples problemas" + + # Verificar categorías detectadas + categories = {f.issue_type for f in findings} + + # Al menos algunas de estas categorías deben estar + expected_categories = {"style/pep8", "style/naming", "style/documentation", "style/imports"} + found_expected = categories.intersection(expected_categories) + assert ( + len(found_expected) >= 2 + ), f"Debe detectar al menos 2 categorías, encontró: {categories}" + + def test_clean_code_minimal_findings(self): + """Código limpio produce mínimos findings.""" + code = '''"""Módulo de prueba bien documentado.""" + + +def my_function(value: int) -> int: + """Retorna el valor duplicado. + + Args: + value: Valor a duplicar. + + Returns: + El valor multiplicado por 2. + """ + return value * 2 + + +class MyClass: + """Clase de prueba bien documentada.""" + + def __init__(self): + """Inicializa la clase.""" + self.value = 0 + + def get_value(self) -> int: + """Retorna el valor actual.""" + return self.value +''' + context = AnalysisContext(code_content=code, filename="clean_code.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + # Código limpio debería tener pocos o ningún finding + assert ( + len(findings) <= 3 + ), f"Código limpio no debería tener muchos findings: {len(findings)}" + + def test_findings_contain_required_fields(self): + """Todos los findings tienen los campos requeridos.""" + code = """ +def myBadFunc(): + x = 1 +""" + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + for finding in findings: + # Campos requeridos + assert finding.message is not None + assert finding.line_number is not None + assert finding.issue_type is not None + assert finding.severity is not None + assert finding.agent_name is not None + + def test_agent_name_is_set_correctly(self): + """El nombre del agente está correcto en todos los findings.""" + code = """ +def badFunc(): + pass +""" + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + for finding in findings: + assert finding.agent_name == "StyleAgent" + + +class TestStyleAgentEventLifecycle: + """Tests del ciclo de vida completo con eventos.""" + + def test_full_event_lifecycle(self): + """Verifica el ciclo completo: STARTED -> (análisis) -> COMPLETED.""" + event_bus = EventBus() + event_bus.clear() # Limpiar observers previos + observer = MockEventObserver() + event_bus.subscribe(observer) + + agent = StyleAgent() + agent.event_bus = event_bus + + code = "x = 1" + context = AnalysisContext(code_content=code, filename="test.py") + agent.analyze(context) + + # Extraer tipos de eventos + event_types = [e[0] for e in observer.events_received] + + assert "AGENT_STARTED" in event_types, "Debe emitir AGENT_STARTED" + assert "AGENT_COMPLETED" in event_types, "Debe emitir AGENT_COMPLETED" + assert "AGENT_FAILED" not in event_types, "No debe emitir AGENT_FAILED en caso exitoso" + + +class TestStyleAgentWithRealWorldPatterns: + """Tests con patrones de código del mundo real.""" + + def test_flask_route_pattern(self): + """Analiza patrón típico de ruta Flask.""" + code = ''' +from flask import Flask, jsonify + +app = Flask(__name__) + + +@app.route("/api/users") +def get_users(): + """Retorna lista de usuarios.""" + return jsonify({"users": []}) +''' + context = AnalysisContext(code_content=code, filename="routes.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + # Este código es bastante limpio, pocos findings esperados + assert isinstance(findings, list) + + def test_dataclass_pattern(self): + """Analiza patrón de dataclass.""" + code = ''' +from dataclasses import dataclass +from typing import Optional + + +@dataclass +class User: + """Representa un usuario del sistema.""" + + id: int + name: str + email: str + age: Optional[int] = None +''' + context = AnalysisContext(code_content=code, filename="models.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + # Código limpio, mínimos findings + assert isinstance(findings, list) + + def test_test_file_pattern(self): + """Analiza patrón típico de archivo de tests.""" + code = ''' +import pytest +from mymodule import MyClass + + +class TestMyClass: + """Tests para MyClass.""" + + def test_initialization(self): + """Verifica inicialización correcta.""" + obj = MyClass() + assert obj is not None + + def test_method_call(self): + """Verifica llamada a método.""" + obj = MyClass() + result = obj.do_something() + assert result == "expected" +''' + context = AnalysisContext(code_content=code, filename="test_mymodule.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + # Tests bien escritos, pocos findings + assert isinstance(findings, list) + + +class TestStyleAgentSeverityLevels: + """Tests para niveles de severidad apropiados.""" + + def test_severity_levels_are_valid(self): + """Todas las severidades son valores válidos del enum.""" + code = """ +def badFunc(): + x = "linea muy larga " + "a" * 100 +""" + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + valid_severities = {Severity.LOW, Severity.MEDIUM, Severity.HIGH, Severity.CRITICAL} + + for finding in findings: + assert finding.severity in valid_severities, f"Severidad inválida: {finding.severity}" + + def test_naming_issues_are_medium_or_lower(self): + """Problemas de naming son MEDIUM o menor severidad.""" + code = """ +def badFunction(): + pass +""" + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + naming_findings = [f for f in findings if f.issue_type == "style/naming"] + + for f in naming_findings: + assert f.severity in {Severity.LOW, Severity.MEDIUM} + + +class TestStyleAgentEdgeCases: + """Tests de casos borde.""" + + def test_unicode_content(self): + """Maneja contenido con caracteres unicode.""" + code = ''' +def greet(): + """Saluda en español.""" + return "¡Hola, cómo estás! 你好 🎉" +''' + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + # No debe crashear + assert isinstance(findings, list) + + def test_very_long_file(self): + """Maneja archivos largos.""" + # Generar código con 500 funciones + lines = ["# Generated file\n"] + for i in range(100): + lines.append( + f''' +def function_{i}(): + """Función número {i}.""" + return {i} +''' + ) + code = "\n".join(lines) + + context = AnalysisContext(code_content=code, filename="large_file.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + # No debe crashear ni tomar demasiado tiempo + assert isinstance(findings, list) + + def test_only_comments(self): + """Maneja archivo con solo comentarios.""" + code = """ +# Este es un comentario +# Otro comentario +# Más comentarios +""" + context = AnalysisContext(code_content=code, filename="comments.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + assert isinstance(findings, list) + + def test_only_imports(self): + """Maneja archivo con solo imports.""" + code = """ +import os +import sys +from typing import List, Dict +""" + context = AnalysisContext(code_content=code, filename="imports.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + assert isinstance(findings, list) diff --git a/backend/tests/test_ai_service.py b/backend/tests/test_ai_service.py index 40c3db0..33175a5 100644 --- a/backend/tests/test_ai_service.py +++ b/backend/tests/test_ai_service.py @@ -1,421 +1,421 @@ -""" -Tests for AIExplainerService and related components. - -Tests Sprint 3 functionality including: -- Rate limiting -- MCP Context Enricher -- AI explanation generation -""" - -from datetime import datetime, timedelta, timezone -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - -from src.core.config.mcp_config import ( - OWASP_TOP_10, - SecurityContext, - format_security_context, - get_security_context, -) -from src.external.interfaces.ai_client import AIResponse -from src.schemas.ai_explanation import AIExplanation, RateLimitInfo -from src.schemas.finding import Finding, Severity -from src.services.ai_service import ( - AIExplainerService, - AIExplanationError, - InMemoryRateLimiter, - RateLimitExceeded, -) -from src.services.mcp_context_enricher import ( - EnrichedContext, - MCPContextEnricher, -) - -# ============================================================ -# Fixtures -# ============================================================ - - -@pytest.fixture -def sample_security_finding() -> Finding: - """Create a sample security finding for testing.""" - return Finding( - severity=Severity.CRITICAL, - issue_type="dangerous_function", - message="Use of eval() detected - allows arbitrary code execution", - line_number=42, - agent_name="SecurityAgent", - code_snippet="result = eval(user_input)", - suggestion="Use ast.literal_eval() for safe literal evaluation", - rule_id="SEC001_EVAL", - ) - - -@pytest.fixture -def sample_style_finding() -> Finding: - """Create a sample style finding (non-security).""" - return Finding( - severity=Severity.LOW, - issue_type="line_too_long", - message="Line exceeds 88 characters", - line_number=100, - agent_name="StyleAgent", - code_snippet="x = 'a very long string' * 10 # this line is too long", - suggestion="Break the line into multiple lines", - rule_id="STYLE001_LINE_LENGTH", - ) - - -@pytest.fixture -def mock_ai_client(): - """Create a mock AI client.""" - client = AsyncMock() - client.generate_explanation = AsyncMock( - return_value=AIResponse( - content='{"explanation": "Test explanation", "suggested_fix": "# fixed code", "attack_example": "evil code", "references": ["CWE-94"]}', - model_name="gemini-1.5-flash-001", - tokens_used=150, - finish_reason="STOP", - ) - ) - client.is_configured = True - client.model_name = "gemini-1.5-flash-001" - return client - - -@pytest.fixture -def rate_limiter(): - """Create a rate limiter with low limit for testing.""" - return InMemoryRateLimiter(limit_per_hour=3) - - -# ============================================================ -# Tests for MCP Config (OWASP Top 10) -# ============================================================ - - -class TestMCPConfig: - """Tests for MCP configuration and OWASP lookups.""" - - def test_owasp_top_10_has_all_categories(self): - """OWASP dictionary should have all 10 categories.""" - assert len(OWASP_TOP_10) == 10 - - # Las claves son descriptivas, las categorías OWASP están en los valores - expected_categories = [ - "A01:2021", - "A02:2021", - "A03:2021", - "A04:2021", - "A05:2021", - "A06:2021", - "A07:2021", - "A08:2021", - "A09:2021", - "A10:2021", - ] - # Extraer las categorías de los valores del diccionario - actual_categories = [ctx.category for ctx in OWASP_TOP_10.values()] - for cat in expected_categories: - found = any(cat in actual_cat for actual_cat in actual_categories) - assert found, f"Missing OWASP category: {cat}" - - def test_get_security_context_by_rule_id(self): - """Should find security context by rule_id.""" - context = get_security_context(rule_id="SEC001_EVAL") - - assert context is not None - assert "Injection" in context.category or "Inyección" in context.category - - def test_get_security_context_by_issue_type(self): - """Should find security context by issue_type.""" - context = get_security_context(issue_type="sql_injection") - - assert context is not None - assert context.cwe_ids is not None - - def test_get_security_context_unknown(self): - """Should return None for unknown rule_id.""" - context = get_security_context(rule_id="UNKNOWN_RULE") - assert context is None - - def test_format_security_context(self): - """Should format security context as text.""" - context = SecurityContext( - category="A03:2021 - Injection", - description="Test description", - impact="Test impact", - mitigation="Test mitigation", - references=["https://owasp.org"], - cwe_ids=["CWE-94"], - ) - - formatted = format_security_context(context) - - assert "A03:2021" in formatted - assert "Test description" in formatted - assert "CWE-94" in formatted - - -# ============================================================ -# Tests for MCP Context Enricher -# ============================================================ - - -class TestMCPContextEnricher: - """Tests for the MCP Context Enricher service.""" - - @pytest.mark.asyncio - async def test_enrich_security_finding(self, sample_security_finding): - """Should enrich security findings with OWASP context.""" - enricher = MCPContextEnricher() - - result = await enricher.enrich(sample_security_finding) - - assert isinstance(result, EnrichedContext) - assert result.finding == sample_security_finding - assert result.has_security_context - assert result.security_context is not None - assert result.is_security_finding - - @pytest.mark.asyncio - async def test_enrich_non_security_finding(self, sample_style_finding): - """Should handle non-security findings gracefully.""" - enricher = MCPContextEnricher() - - result = await enricher.enrich(sample_style_finding) - - assert isinstance(result, EnrichedContext) - assert result.finding == sample_style_finding - # Style findings don't have OWASP context - assert not result.is_security_finding - - @pytest.mark.asyncio - async def test_enrich_batch(self, sample_security_finding, sample_style_finding): - """Should enrich multiple findings.""" - enricher = MCPContextEnricher() - findings = [sample_security_finding, sample_style_finding] - - results = await enricher.enrich_batch(findings) - - assert len(results) == 2 - assert all(isinstance(r, EnrichedContext) for r in results) - - @pytest.mark.asyncio - async def test_formatted_context_includes_finding_info(self, sample_security_finding): - """Formatted context should include finding details.""" - enricher = MCPContextEnricher() - - result = await enricher.enrich(sample_security_finding) - - assert ( - "eval()" in result.formatted_prompt_context - or "dangerous_function" in result.formatted_prompt_context - ) - assert str(sample_security_finding.line_number) in result.formatted_prompt_context - - -# ============================================================ -# Tests for In-Memory Rate Limiter -# ============================================================ - - -class TestInMemoryRateLimiter: - """Tests for the in-memory rate limiter.""" - - def test_check_and_consume_allows_within_limit(self, rate_limiter): - """Should allow requests within limit.""" - user_id = "user-123" - - # First 3 requests should succeed (limit is 3) - for i in range(3): - info = rate_limiter.check_and_consume(user_id) - assert info.requests_remaining == 2 - i - - def test_check_and_consume_blocks_over_limit(self, rate_limiter): - """Should block requests over limit.""" - user_id = "user-456" - - # Consume all 3 requests - for _ in range(3): - rate_limiter.check_and_consume(user_id) - - # 4th request should raise - with pytest.raises(RateLimitExceeded) as exc_info: - rate_limiter.check_and_consume(user_id) - - assert exc_info.value.rate_limit_info.requests_remaining == 0 - - def test_rate_limit_per_user(self, rate_limiter): - """Each user should have independent limits.""" - user1 = "user-1" - user2 = "user-2" - - # Exhaust user1's limit - for _ in range(3): - rate_limiter.check_and_consume(user1) - - # user2 should still be able to make requests - info = rate_limiter.check_and_consume(user2) - assert info.requests_remaining == 2 - - def test_get_remaining_without_consuming(self, rate_limiter): - """get_remaining should not consume a request.""" - user_id = "user-789" - - info1 = rate_limiter.get_remaining(user_id) - info2 = rate_limiter.get_remaining(user_id) - - assert info1.requests_remaining == info2.requests_remaining == 3 - - -# ============================================================ -# Tests for AI Explainer Service -# ============================================================ - - -class TestAIExplainerService: - """Tests for the AI Explainer Service.""" - - @pytest.mark.asyncio - async def test_explain_finding_success(self, sample_security_finding, mock_ai_client): - """Should successfully generate explanation.""" - service = AIExplainerService( - ai_client=mock_ai_client, - rate_limiter=InMemoryRateLimiter(limit_per_hour=10), - ) - - explanation, rate_info = await service.explain_finding( - finding=sample_security_finding, - user_id="test-user", - ) - - assert isinstance(explanation, AIExplanation) - assert explanation.explanation == "Test explanation" - assert explanation.model_used == "gemini-1.5-flash-001" - assert rate_info.requests_remaining == 9 - - @pytest.mark.asyncio - async def test_explain_finding_rate_limited(self, sample_security_finding, mock_ai_client): - """Should raise when rate limit exceeded.""" - service = AIExplainerService( - ai_client=mock_ai_client, - rate_limiter=InMemoryRateLimiter(limit_per_hour=1), - ) - - # First request succeeds - await service.explain_finding( - finding=sample_security_finding, - user_id="limited-user", - ) - - # Second request should fail - with pytest.raises(RateLimitExceeded): - await service.explain_finding( - finding=sample_security_finding, - user_id="limited-user", - ) - - @pytest.mark.asyncio - async def test_explain_finding_parses_json_response( - self, sample_security_finding, mock_ai_client - ): - """Should parse JSON response from AI.""" - # Set up mock to return JSON - mock_ai_client.generate_explanation.return_value = AIResponse( - content='{"explanation": "Detailed explanation", "suggested_fix": "fixed_code()", "attack_example": "exploit", "references": ["CWE-94", "OWASP A03"]}', - model_name="gemini-1.5-pro-001", - tokens_used=200, - finish_reason="STOP", - ) - - service = AIExplainerService( - ai_client=mock_ai_client, - rate_limiter=InMemoryRateLimiter(limit_per_hour=10), - ) - - explanation, _ = await service.explain_finding( - finding=sample_security_finding, - user_id="test-user", - ) - - assert explanation.explanation == "Detailed explanation" - assert explanation.suggested_fix == "fixed_code()" - assert explanation.attack_example == "exploit" - assert "CWE-94" in explanation.references - - @pytest.mark.asyncio - async def test_explain_finding_handles_non_json_response( - self, sample_security_finding, mock_ai_client - ): - """Should handle non-JSON response gracefully.""" - mock_ai_client.generate_explanation.return_value = AIResponse( - content="This is a plain text response without JSON formatting.", - model_name="gemini-1.5-flash-001", - tokens_used=50, - finish_reason="STOP", - ) - - service = AIExplainerService( - ai_client=mock_ai_client, - rate_limiter=InMemoryRateLimiter(limit_per_hour=10), - ) - - explanation, _ = await service.explain_finding( - finding=sample_security_finding, - user_id="test-user", - ) - - # Should use raw content as explanation - assert "plain text response" in explanation.explanation - - def test_is_configured_delegates_to_client(self, mock_ai_client): - """is_configured should delegate to AI client.""" - service = AIExplainerService(ai_client=mock_ai_client) - - assert service.is_configured == mock_ai_client.is_configured - - -# ============================================================ -# Tests for AIExplanation Schema -# ============================================================ - - -class TestAIExplanationSchema: - """Tests for AIExplanation Pydantic schema.""" - - def test_to_dict_serialization(self): - """Should serialize to dict for JSONB storage.""" - explanation = AIExplanation( - explanation="Test explanation", - suggested_fix="# fixed", - attack_example="exploit code", - references=["CWE-94"], - model_used="gemini-1.5-flash", - tokens_used=100, - ) - - data = explanation.to_dict() - - assert data["explanation"] == "Test explanation" - assert data["tokens_used"] == 100 - assert "generated_at" in data - - def test_from_dict_deserialization(self): - """Should deserialize from JSONB dict.""" - data = { - "explanation": "Test explanation with sufficient length for validation", - "suggested_fix": "# fix", - "attack_example": None, - "references": ["CWE-1"], - "model_used": "test-model", - "tokens_used": 50, - "generated_at": "2024-01-15T10:30:00+00:00", - } - - explanation = AIExplanation.from_dict(data) - - assert explanation.explanation == "Test explanation with sufficient length for validation" - assert explanation.model_used == "test-model" - assert explanation.generated_at.year == 2024 +""" +Tests for AIExplainerService and related components. + +Tests Sprint 3 functionality including: +- Rate limiting +- MCP Context Enricher +- AI explanation generation +""" + +from datetime import datetime, timedelta, timezone +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from src.core.config.mcp_config import ( + OWASP_TOP_10, + SecurityContext, + format_security_context, + get_security_context, +) +from src.external.interfaces.ai_client import AIResponse +from src.schemas.ai_explanation import AIExplanation, RateLimitInfo +from src.schemas.finding import Finding, Severity +from src.services.ai_service import ( + AIExplainerService, + AIExplanationError, + InMemoryRateLimiter, + RateLimitExceeded, +) +from src.services.mcp_context_enricher import ( + EnrichedContext, + MCPContextEnricher, +) + +# ============================================================ +# Fixtures +# ============================================================ + + +@pytest.fixture +def sample_security_finding() -> Finding: + """Create a sample security finding for testing.""" + return Finding( + severity=Severity.CRITICAL, + issue_type="dangerous_function", + message="Use of eval() detected - allows arbitrary code execution", + line_number=42, + agent_name="SecurityAgent", + code_snippet="result = eval(user_input)", + suggestion="Use ast.literal_eval() for safe literal evaluation", + rule_id="SEC001_EVAL", + ) + + +@pytest.fixture +def sample_style_finding() -> Finding: + """Create a sample style finding (non-security).""" + return Finding( + severity=Severity.LOW, + issue_type="line_too_long", + message="Line exceeds 88 characters", + line_number=100, + agent_name="StyleAgent", + code_snippet="x = 'a very long string' * 10 # this line is too long", + suggestion="Break the line into multiple lines", + rule_id="STYLE001_LINE_LENGTH", + ) + + +@pytest.fixture +def mock_ai_client(): + """Create a mock AI client.""" + client = AsyncMock() + client.generate_explanation = AsyncMock( + return_value=AIResponse( + content='{"explanation": "Test explanation", "suggested_fix": "# fixed code", "attack_example": "evil code", "references": ["CWE-94"]}', + model_name="gemini-1.5-flash-001", + tokens_used=150, + finish_reason="STOP", + ) + ) + client.is_configured = True + client.model_name = "gemini-1.5-flash-001" + return client + + +@pytest.fixture +def rate_limiter(): + """Create a rate limiter with low limit for testing.""" + return InMemoryRateLimiter(limit_per_hour=3) + + +# ============================================================ +# Tests for MCP Config (OWASP Top 10) +# ============================================================ + + +class TestMCPConfig: + """Tests for MCP configuration and OWASP lookups.""" + + def test_owasp_top_10_has_all_categories(self): + """OWASP dictionary should have all 10 categories.""" + assert len(OWASP_TOP_10) == 10 + + # Las claves son descriptivas, las categorías OWASP están en los valores + expected_categories = [ + "A01:2021", + "A02:2021", + "A03:2021", + "A04:2021", + "A05:2021", + "A06:2021", + "A07:2021", + "A08:2021", + "A09:2021", + "A10:2021", + ] + # Extraer las categorías de los valores del diccionario + actual_categories = [ctx.category for ctx in OWASP_TOP_10.values()] + for cat in expected_categories: + found = any(cat in actual_cat for actual_cat in actual_categories) + assert found, f"Missing OWASP category: {cat}" + + def test_get_security_context_by_rule_id(self): + """Should find security context by rule_id.""" + context = get_security_context(rule_id="SEC001_EVAL") + + assert context is not None + assert "Injection" in context.category or "Inyección" in context.category + + def test_get_security_context_by_issue_type(self): + """Should find security context by issue_type.""" + context = get_security_context(issue_type="sql_injection") + + assert context is not None + assert context.cwe_ids is not None + + def test_get_security_context_unknown(self): + """Should return None for unknown rule_id.""" + context = get_security_context(rule_id="UNKNOWN_RULE") + assert context is None + + def test_format_security_context(self): + """Should format security context as text.""" + context = SecurityContext( + category="A03:2021 - Injection", + description="Test description", + impact="Test impact", + mitigation="Test mitigation", + references=["https://owasp.org"], + cwe_ids=["CWE-94"], + ) + + formatted = format_security_context(context) + + assert "A03:2021" in formatted + assert "Test description" in formatted + assert "CWE-94" in formatted + + +# ============================================================ +# Tests for MCP Context Enricher +# ============================================================ + + +class TestMCPContextEnricher: + """Tests for the MCP Context Enricher service.""" + + @pytest.mark.asyncio + async def test_enrich_security_finding(self, sample_security_finding): + """Should enrich security findings with OWASP context.""" + enricher = MCPContextEnricher() + + result = await enricher.enrich(sample_security_finding) + + assert isinstance(result, EnrichedContext) + assert result.finding == sample_security_finding + assert result.has_security_context + assert result.security_context is not None + assert result.is_security_finding + + @pytest.mark.asyncio + async def test_enrich_non_security_finding(self, sample_style_finding): + """Should handle non-security findings gracefully.""" + enricher = MCPContextEnricher() + + result = await enricher.enrich(sample_style_finding) + + assert isinstance(result, EnrichedContext) + assert result.finding == sample_style_finding + # Style findings don't have OWASP context + assert not result.is_security_finding + + @pytest.mark.asyncio + async def test_enrich_batch(self, sample_security_finding, sample_style_finding): + """Should enrich multiple findings.""" + enricher = MCPContextEnricher() + findings = [sample_security_finding, sample_style_finding] + + results = await enricher.enrich_batch(findings) + + assert len(results) == 2 + assert all(isinstance(r, EnrichedContext) for r in results) + + @pytest.mark.asyncio + async def test_formatted_context_includes_finding_info(self, sample_security_finding): + """Formatted context should include finding details.""" + enricher = MCPContextEnricher() + + result = await enricher.enrich(sample_security_finding) + + assert ( + "eval()" in result.formatted_prompt_context + or "dangerous_function" in result.formatted_prompt_context + ) + assert str(sample_security_finding.line_number) in result.formatted_prompt_context + + +# ============================================================ +# Tests for In-Memory Rate Limiter +# ============================================================ + + +class TestInMemoryRateLimiter: + """Tests for the in-memory rate limiter.""" + + def test_check_and_consume_allows_within_limit(self, rate_limiter): + """Should allow requests within limit.""" + user_id = "user-123" + + # First 3 requests should succeed (limit is 3) + for i in range(3): + info = rate_limiter.check_and_consume(user_id) + assert info.requests_remaining == 2 - i + + def test_check_and_consume_blocks_over_limit(self, rate_limiter): + """Should block requests over limit.""" + user_id = "user-456" + + # Consume all 3 requests + for _ in range(3): + rate_limiter.check_and_consume(user_id) + + # 4th request should raise + with pytest.raises(RateLimitExceeded) as exc_info: + rate_limiter.check_and_consume(user_id) + + assert exc_info.value.rate_limit_info.requests_remaining == 0 + + def test_rate_limit_per_user(self, rate_limiter): + """Each user should have independent limits.""" + user1 = "user-1" + user2 = "user-2" + + # Exhaust user1's limit + for _ in range(3): + rate_limiter.check_and_consume(user1) + + # user2 should still be able to make requests + info = rate_limiter.check_and_consume(user2) + assert info.requests_remaining == 2 + + def test_get_remaining_without_consuming(self, rate_limiter): + """get_remaining should not consume a request.""" + user_id = "user-789" + + info1 = rate_limiter.get_remaining(user_id) + info2 = rate_limiter.get_remaining(user_id) + + assert info1.requests_remaining == info2.requests_remaining == 3 + + +# ============================================================ +# Tests for AI Explainer Service +# ============================================================ + + +class TestAIExplainerService: + """Tests for the AI Explainer Service.""" + + @pytest.mark.asyncio + async def test_explain_finding_success(self, sample_security_finding, mock_ai_client): + """Should successfully generate explanation.""" + service = AIExplainerService( + ai_client=mock_ai_client, + rate_limiter=InMemoryRateLimiter(limit_per_hour=10), + ) + + explanation, rate_info = await service.explain_finding( + finding=sample_security_finding, + user_id="test-user", + ) + + assert isinstance(explanation, AIExplanation) + assert explanation.explanation == "Test explanation" + assert explanation.model_used == "gemini-1.5-flash-001" + assert rate_info.requests_remaining == 9 + + @pytest.mark.asyncio + async def test_explain_finding_rate_limited(self, sample_security_finding, mock_ai_client): + """Should raise when rate limit exceeded.""" + service = AIExplainerService( + ai_client=mock_ai_client, + rate_limiter=InMemoryRateLimiter(limit_per_hour=1), + ) + + # First request succeeds + await service.explain_finding( + finding=sample_security_finding, + user_id="limited-user", + ) + + # Second request should fail + with pytest.raises(RateLimitExceeded): + await service.explain_finding( + finding=sample_security_finding, + user_id="limited-user", + ) + + @pytest.mark.asyncio + async def test_explain_finding_parses_json_response( + self, sample_security_finding, mock_ai_client + ): + """Should parse JSON response from AI.""" + # Set up mock to return JSON + mock_ai_client.generate_explanation.return_value = AIResponse( + content='{"explanation": "Detailed explanation", "suggested_fix": "fixed_code()", "attack_example": "exploit", "references": ["CWE-94", "OWASP A03"]}', + model_name="gemini-1.5-pro-001", + tokens_used=200, + finish_reason="STOP", + ) + + service = AIExplainerService( + ai_client=mock_ai_client, + rate_limiter=InMemoryRateLimiter(limit_per_hour=10), + ) + + explanation, _ = await service.explain_finding( + finding=sample_security_finding, + user_id="test-user", + ) + + assert explanation.explanation == "Detailed explanation" + assert explanation.suggested_fix == "fixed_code()" + assert explanation.attack_example == "exploit" + assert "CWE-94" in explanation.references + + @pytest.mark.asyncio + async def test_explain_finding_handles_non_json_response( + self, sample_security_finding, mock_ai_client + ): + """Should handle non-JSON response gracefully.""" + mock_ai_client.generate_explanation.return_value = AIResponse( + content="This is a plain text response without JSON formatting.", + model_name="gemini-1.5-flash-001", + tokens_used=50, + finish_reason="STOP", + ) + + service = AIExplainerService( + ai_client=mock_ai_client, + rate_limiter=InMemoryRateLimiter(limit_per_hour=10), + ) + + explanation, _ = await service.explain_finding( + finding=sample_security_finding, + user_id="test-user", + ) + + # Should use raw content as explanation + assert "plain text response" in explanation.explanation + + def test_is_configured_delegates_to_client(self, mock_ai_client): + """is_configured should delegate to AI client.""" + service = AIExplainerService(ai_client=mock_ai_client) + + assert service.is_configured == mock_ai_client.is_configured + + +# ============================================================ +# Tests for AIExplanation Schema +# ============================================================ + + +class TestAIExplanationSchema: + """Tests for AIExplanation Pydantic schema.""" + + def test_to_dict_serialization(self): + """Should serialize to dict for JSONB storage.""" + explanation = AIExplanation( + explanation="Test explanation", + suggested_fix="# fixed", + attack_example="exploit code", + references=["CWE-94"], + model_used="gemini-1.5-flash", + tokens_used=100, + ) + + data = explanation.to_dict() + + assert data["explanation"] == "Test explanation" + assert data["tokens_used"] == 100 + assert "generated_at" in data + + def test_from_dict_deserialization(self): + """Should deserialize from JSONB dict.""" + data = { + "explanation": "Test explanation with sufficient length for validation", + "suggested_fix": "# fix", + "attack_example": None, + "references": ["CWE-1"], + "model_used": "test-model", + "tokens_used": 50, + "generated_at": "2024-01-15T10:30:00+00:00", + } + + explanation = AIExplanation.from_dict(data) + + assert explanation.explanation == "Test explanation with sufficient length for validation" + assert explanation.model_used == "test-model" + assert explanation.generated_at.year == 2024 diff --git a/backend/tests/unit/agents/__init__.py b/backend/tests/unit/agents/__init__.py index 84cbda4..6022d5b 100644 --- a/backend/tests/unit/agents/__init__.py +++ b/backend/tests/unit/agents/__init__.py @@ -1 +1 @@ -"""Unit tests for agents module.""" +"""Unit tests for agents module.""" diff --git a/backend/tests/unit/agents/analyzers/__init__.py b/backend/tests/unit/agents/analyzers/__init__.py index b785786..40f9b53 100644 --- a/backend/tests/unit/agents/analyzers/__init__.py +++ b/backend/tests/unit/agents/analyzers/__init__.py @@ -1 +1 @@ -# Tests for analyzer modules +# Tests for analyzer modules diff --git a/backend/tests/unit/agents/analyzers/test_flake8_analyzer.py b/backend/tests/unit/agents/analyzers/test_flake8_analyzer.py index 3cd5053..5feda94 100644 --- a/backend/tests/unit/agents/analyzers/test_flake8_analyzer.py +++ b/backend/tests/unit/agents/analyzers/test_flake8_analyzer.py @@ -1,374 +1,374 @@ -""" -Unit tests for Flake8Analyzer. - -Tests cover: -- Initialization -- Severity mapping -- Output parsing -- Analysis execution -""" - -import subprocess -from unittest.mock import MagicMock, patch - -import pytest - -from src.agents.analyzers.flake8_analyzer import Flake8Analyzer -from src.schemas.finding import Severity - - -class TestFlake8AnalyzerInitialization: - """Tests for Flake8Analyzer initialization.""" - - def test_init_creates_instance(self): - """Test that Flake8Analyzer can be instantiated.""" - analyzer = Flake8Analyzer() - assert analyzer is not None - - def test_init_sets_cmd_template(self): - """Test that command template is set.""" - analyzer = Flake8Analyzer() - assert hasattr(analyzer, "_cmd_template") - assert isinstance(analyzer._cmd_template, list) - assert "flake8" in str(analyzer._cmd_template) - - -class TestFlake8AnalyzerMapSeverity: - """Tests for severity mapping.""" - - def test_map_severity_fatal_returns_high(self): - """Test that 'F' (pyflakes) errors map to HIGH severity.""" - result = Flake8Analyzer._map_severity("F401") - assert result == Severity.HIGH - - def test_map_severity_error_returns_medium(self): - """Test that 'E' (error) maps to MEDIUM severity.""" - result = Flake8Analyzer._map_severity("E501") - assert result == Severity.MEDIUM - - def test_map_severity_complexity_returns_medium(self): - """Test that 'C' (complexity) maps to MEDIUM severity.""" - result = Flake8Analyzer._map_severity("C901") - assert result == Severity.MEDIUM - - def test_map_severity_warning_returns_low(self): - """Test that 'W' (warning) maps to LOW severity.""" - result = Flake8Analyzer._map_severity("W291") - assert result == Severity.LOW - - def test_map_severity_naming_returns_low(self): - """Test that 'N' (naming) maps to LOW severity.""" - result = Flake8Analyzer._map_severity("N801") - assert result == Severity.LOW - - def test_map_severity_unknown_returns_low(self): - """Test that unknown types default to LOW severity.""" - assert Flake8Analyzer._map_severity("X999") == Severity.LOW - assert Flake8Analyzer._map_severity("") == Severity.LOW - - def test_map_severity_lowercase_works(self): - """Test that lowercase prefixes work.""" - assert Flake8Analyzer._map_severity("f401") == Severity.HIGH - assert Flake8Analyzer._map_severity("e501") == Severity.MEDIUM - - -class TestFlake8AnalyzerParseOutput: - """Tests for output parsing.""" - - def test_parse_output_empty_returns_empty_list(self): - """Test parsing empty output.""" - analyzer = Flake8Analyzer() - result = analyzer._parse_output("", "x = 1", "StyleAgent") - assert result == [] - - def test_parse_output_valid_line(self): - """Test parsing valid flake8 output line.""" - analyzer = Flake8Analyzer() - code_content = "x = 1\n" - # Flake8 format: {row}:{col}:{code}:{text} - output = "1:5:E501:line too long (120 > 79 characters)" - result = analyzer._parse_output(output, code_content, "StyleAgent") - assert len(result) == 1 - assert result[0].line_number == 1 - assert result[0].severity == Severity.MEDIUM # E -> MEDIUM - assert "line too long" in result[0].message - - def test_parse_output_multiple_issues(self): - """Test parsing multiple issues.""" - analyzer = Flake8Analyzer() - code_content = "import os\nx = 1\ny = 2\n" - output = """1:1:F401:'os' imported but unused -2:5:E501:line too long -3:1:W291:trailing whitespace""" - result = analyzer._parse_output(output, code_content, "StyleAgent") - assert len(result) == 3 - assert result[0].severity == Severity.HIGH # F -> HIGH - assert result[1].severity == Severity.MEDIUM # E -> MEDIUM - assert result[2].severity == Severity.LOW # W -> LOW - - def test_parse_output_preserves_line_numbers(self): - """Test that line numbers are correctly preserved.""" - analyzer = Flake8Analyzer() - code_content = "\n" * 50 + "x = 1\n" - output = "42:1:W291:trailing whitespace" - result = analyzer._parse_output(output, code_content, "StyleAgent") - assert result[0].line_number == 42 - - def test_parse_output_invalid_format_skipped(self): - """Test that invalid format lines are skipped.""" - analyzer = Flake8Analyzer() - code_content = "x = 1\ny = 2\n" - output = """1:1:E501:line too long -not a valid line -2:1:W291:trailing whitespace""" - result = analyzer._parse_output(output, code_content, "StyleAgent") - assert len(result) == 2 - - def test_parse_output_extracts_code_snippet(self): - """Test that code snippet is extracted from code content.""" - analyzer = Flake8Analyzer() - code_content = "first_line = 1\nsecond_line = 2\nthird_line = 3\n" - output = "2:1:E501:line too long in this file" # was "2:1:E501:test" - result = analyzer._parse_output(output, code_content, "StyleAgent") - assert result[0].code_snippet == "second_line = 2" - - def test_parse_output_sets_agent_name(self): - """Test that agent name is set correctly.""" - analyzer = Flake8Analyzer() - code_content = "x = 1\n" - output = "1:1:E501:line too long error message" # was "1:1:E501:test" - result = analyzer._parse_output(output, code_content, "TestAgent") - assert result[0].agent_name == "TestAgent" - - def test_parse_output_sets_rule_id(self): - """Test that rule_id includes FLAKE8 prefix.""" - analyzer = Flake8Analyzer() - code_content = "x = 1\n" - output = "1:1:E501:line too long error message" # was "1:1:E501:test" - result = analyzer._parse_output(output, code_content, "StyleAgent") - assert result[0].rule_id == "FLAKE8_E501" - - def test_parse_output_sets_issue_type(self): - """Test that issue_type is set to style/pep8.""" - analyzer = Flake8Analyzer() - code_content = "x = 1\n" - output = "1:1:E501:line too long error message" # was "1:1:E501:test" - result = analyzer._parse_output(output, code_content, "StyleAgent") - assert result[0].issue_type == "style/pep8" - - -class TestFlake8AnalyzerAnalyze: - """Tests for analyze method.""" - - def test_analyze_with_no_issues(self): - """Test analysis of clean code.""" - analyzer = Flake8Analyzer() - code = "x = 1\n" - - with patch("subprocess.run") as mock_run: - mock_run.return_value = MagicMock(stdout="", stderr="", returncode=0) - result = analyzer.analyze(code) - assert result == [] - - def test_analyze_returns_findings(self): - """Test that analyze returns findings for code with issues.""" - analyzer = Flake8Analyzer() - code = "import os\nx = 1\n" - - flake8_output = "1:1:F401:'os' imported but unused" - - with patch("subprocess.run") as mock_run: - mock_run.return_value = MagicMock(stdout=flake8_output, stderr="", returncode=1) - result = analyzer.analyze(code) - assert len(result) == 1 - assert "'os' imported but unused" in result[0].message - - def test_analyze_handles_file_not_found(self): - """Test that FileNotFoundError (flake8 not installed) is handled.""" - analyzer = Flake8Analyzer() - - with patch("subprocess.run") as mock_run: - mock_run.side_effect = FileNotFoundError("flake8 not found") - result = analyzer.analyze("some code") - assert result == [] - - def test_analyze_handles_generic_exception(self): - """Test that generic exceptions are handled gracefully.""" - analyzer = Flake8Analyzer() - - with patch("subprocess.run") as mock_run: - mock_run.side_effect = Exception("Unexpected error") - result = analyzer.analyze("some code") - assert result == [] - - def test_analyze_cleans_up_temp_file(self): - """Test that temporary file is cleaned up after analysis.""" - analyzer = Flake8Analyzer() - - with patch("subprocess.run") as mock_run: - mock_run.return_value = MagicMock(stdout="", stderr="", returncode=0) - with patch("os.path.exists", return_value=True): - with patch("os.remove") as mock_remove: - analyzer.analyze("x = 1") - mock_remove.assert_called() - - def test_analyze_with_agent_name(self): - """Test analyze with custom agent name.""" - analyzer = Flake8Analyzer() - code = "import os\n" - - flake8_output = "1:1:F401:unused" - - with patch("subprocess.run") as mock_run: - mock_run.return_value = MagicMock(stdout=flake8_output, stderr="", returncode=1) - result = analyzer.analyze(code, agent_name="CustomAgent") - assert len(result) == 1 - assert result[0].agent_name == "CustomAgent" - - def test_analyze_default_agent_name(self): - """Test analyze uses default agent name.""" - analyzer = Flake8Analyzer() - code = "import os\n" - - flake8_output = "1:1:F401:unused" - - with patch("subprocess.run") as mock_run: - mock_run.return_value = MagicMock(stdout=flake8_output, stderr="", returncode=1) - result = analyzer.analyze(code) - assert len(result) == 1 - assert result[0].agent_name == "StyleAgent" - - -class TestFlake8AnalyzerIssueTypes: - """Tests for issue type categorization by error code.""" - - def test_e1_indentation_error(self): - """Test E1xx indentation errors are parsed.""" - analyzer = Flake8Analyzer() - code = "x = 1\n" - output = "1:1:E101:indentation contains mixed spaces and tabs" - result = analyzer._parse_output(output, code, "StyleAgent") - assert len(result) == 1 - assert result[0].severity == Severity.MEDIUM - - def test_e2_whitespace_error(self): - """Test E2xx whitespace errors are parsed.""" - analyzer = Flake8Analyzer() - code = "x=1\n" - output = "1:2:E225:missing whitespace around operator" - result = analyzer._parse_output(output, code, "StyleAgent") - assert len(result) == 1 - assert result[0].severity == Severity.MEDIUM - - def test_e3_blank_line_error(self): - """Test E3xx blank line errors are parsed.""" - analyzer = Flake8Analyzer() - code = "def foo():\n pass\n" - output = "1:1:E302:expected 2 blank lines, found 1" - result = analyzer._parse_output(output, code, "StyleAgent") - assert len(result) == 1 - assert result[0].severity == Severity.MEDIUM - - def test_e5_line_length_error(self): - """Test E5xx line length errors are parsed.""" - analyzer = Flake8Analyzer() - code = "x = 1\n" - output = "1:80:E501:line too long (120 > 79 characters)" - result = analyzer._parse_output(output, code, "StyleAgent") - assert len(result) == 1 - assert result[0].severity == Severity.MEDIUM - - def test_e7_statement_error(self): - """Test E7xx statement errors are parsed.""" - analyzer = Flake8Analyzer() - code = "if x == None: pass\n" - output = "1:6:E711:comparison to None" - result = analyzer._parse_output(output, code, "StyleAgent") - assert len(result) == 1 - assert result[0].severity == Severity.MEDIUM - - def test_f4_import_error(self): - """Test F4xx import errors are parsed.""" - analyzer = Flake8Analyzer() - code = "import os\n" - output = "1:1:F401:'os' imported but unused" - result = analyzer._parse_output(output, code, "StyleAgent") - assert len(result) == 1 - assert result[0].severity == Severity.HIGH - - def test_f8_name_error(self): - """Test F8xx name errors are parsed.""" - analyzer = Flake8Analyzer() - code = "print(foo)\n" - output = "1:7:F821:undefined name 'foo'" - result = analyzer._parse_output(output, code, "StyleAgent") - assert len(result) == 1 - assert result[0].severity == Severity.HIGH - - def test_w_warning(self): - """Test W warnings are parsed.""" - analyzer = Flake8Analyzer() - code = "x = 1 \n" - output = "1:6:W291:trailing whitespace" - result = analyzer._parse_output(output, code, "StyleAgent") - assert len(result) == 1 - assert result[0].severity == Severity.LOW - - def test_c9_complexity(self): - """Test C9xx complexity warnings are parsed.""" - analyzer = Flake8Analyzer() - code = "def complex(): pass\n" - output = "1:1:C901:'complex' is too complex (15)" - result = analyzer._parse_output(output, code, "StyleAgent") - assert len(result) == 1 - assert result[0].severity == Severity.MEDIUM - - -class TestFlake8AnalyzerEdgeCases: - """Tests for edge cases and error handling.""" - - def test_analyze_empty_code(self): - """Test analyzing empty code.""" - analyzer = Flake8Analyzer() - with patch("subprocess.run") as mock_run: - mock_run.return_value = MagicMock(stdout="", stderr="", returncode=0) - result = analyzer.analyze("") - assert result == [] - - def test_parse_output_with_special_characters(self): - """Test parsing output with special characters in message.""" - analyzer = Flake8Analyzer() - code = "x = 1\n" - output = "1:1:E501:line too long (contains 'quotes' and \"double quotes\")" - result = analyzer._parse_output(output, code, "StyleAgent") - assert len(result) == 1 - - def test_analyze_unicode_code(self): - """Test analyzing code with unicode characters.""" - analyzer = Flake8Analyzer() - code = '# -*- coding: utf-8 -*-\n"""Módulo con caracteres especiales: áéíóú."""\n' - with patch("subprocess.run") as mock_run: - mock_run.return_value = MagicMock(stdout="", stderr="", returncode=0) - result = analyzer.analyze(code) - assert isinstance(result, list) - - def test_finding_has_all_required_fields(self): - """Test that findings have all required fields.""" - analyzer = Flake8Analyzer() - code = "import os\n" - - flake8_output = "1:1:F401:'os' imported but unused" - - with patch("subprocess.run") as mock_run: - mock_run.return_value = MagicMock(stdout=flake8_output, stderr="", returncode=1) - result = analyzer.analyze(code) - assert len(result) == 1 - finding = result[0] - - # Check all Finding fields - assert finding.severity is not None - assert finding.issue_type is not None - assert finding.message is not None - assert finding.line_number is not None - assert finding.agent_name is not None - assert finding.rule_id is not None +""" +Unit tests for Flake8Analyzer. + +Tests cover: +- Initialization +- Severity mapping +- Output parsing +- Analysis execution +""" + +import subprocess +from unittest.mock import MagicMock, patch + +import pytest + +from src.agents.analyzers.flake8_analyzer import Flake8Analyzer +from src.schemas.finding import Severity + + +class TestFlake8AnalyzerInitialization: + """Tests for Flake8Analyzer initialization.""" + + def test_init_creates_instance(self): + """Test that Flake8Analyzer can be instantiated.""" + analyzer = Flake8Analyzer() + assert analyzer is not None + + def test_init_sets_cmd_template(self): + """Test that command template is set.""" + analyzer = Flake8Analyzer() + assert hasattr(analyzer, "_cmd_template") + assert isinstance(analyzer._cmd_template, list) + assert "flake8" in str(analyzer._cmd_template) + + +class TestFlake8AnalyzerMapSeverity: + """Tests for severity mapping.""" + + def test_map_severity_fatal_returns_high(self): + """Test that 'F' (pyflakes) errors map to HIGH severity.""" + result = Flake8Analyzer._map_severity("F401") + assert result == Severity.HIGH + + def test_map_severity_error_returns_medium(self): + """Test that 'E' (error) maps to MEDIUM severity.""" + result = Flake8Analyzer._map_severity("E501") + assert result == Severity.MEDIUM + + def test_map_severity_complexity_returns_medium(self): + """Test that 'C' (complexity) maps to MEDIUM severity.""" + result = Flake8Analyzer._map_severity("C901") + assert result == Severity.MEDIUM + + def test_map_severity_warning_returns_low(self): + """Test that 'W' (warning) maps to LOW severity.""" + result = Flake8Analyzer._map_severity("W291") + assert result == Severity.LOW + + def test_map_severity_naming_returns_low(self): + """Test that 'N' (naming) maps to LOW severity.""" + result = Flake8Analyzer._map_severity("N801") + assert result == Severity.LOW + + def test_map_severity_unknown_returns_low(self): + """Test that unknown types default to LOW severity.""" + assert Flake8Analyzer._map_severity("X999") == Severity.LOW + assert Flake8Analyzer._map_severity("") == Severity.LOW + + def test_map_severity_lowercase_works(self): + """Test that lowercase prefixes work.""" + assert Flake8Analyzer._map_severity("f401") == Severity.HIGH + assert Flake8Analyzer._map_severity("e501") == Severity.MEDIUM + + +class TestFlake8AnalyzerParseOutput: + """Tests for output parsing.""" + + def test_parse_output_empty_returns_empty_list(self): + """Test parsing empty output.""" + analyzer = Flake8Analyzer() + result = analyzer._parse_output("", "x = 1", "StyleAgent") + assert result == [] + + def test_parse_output_valid_line(self): + """Test parsing valid flake8 output line.""" + analyzer = Flake8Analyzer() + code_content = "x = 1\n" + # Flake8 format: {row}:{col}:{code}:{text} + output = "1:5:E501:line too long (120 > 79 characters)" + result = analyzer._parse_output(output, code_content, "StyleAgent") + assert len(result) == 1 + assert result[0].line_number == 1 + assert result[0].severity == Severity.MEDIUM # E -> MEDIUM + assert "line too long" in result[0].message + + def test_parse_output_multiple_issues(self): + """Test parsing multiple issues.""" + analyzer = Flake8Analyzer() + code_content = "import os\nx = 1\ny = 2\n" + output = """1:1:F401:'os' imported but unused +2:5:E501:line too long +3:1:W291:trailing whitespace""" + result = analyzer._parse_output(output, code_content, "StyleAgent") + assert len(result) == 3 + assert result[0].severity == Severity.HIGH # F -> HIGH + assert result[1].severity == Severity.MEDIUM # E -> MEDIUM + assert result[2].severity == Severity.LOW # W -> LOW + + def test_parse_output_preserves_line_numbers(self): + """Test that line numbers are correctly preserved.""" + analyzer = Flake8Analyzer() + code_content = "\n" * 50 + "x = 1\n" + output = "42:1:W291:trailing whitespace" + result = analyzer._parse_output(output, code_content, "StyleAgent") + assert result[0].line_number == 42 + + def test_parse_output_invalid_format_skipped(self): + """Test that invalid format lines are skipped.""" + analyzer = Flake8Analyzer() + code_content = "x = 1\ny = 2\n" + output = """1:1:E501:line too long +not a valid line +2:1:W291:trailing whitespace""" + result = analyzer._parse_output(output, code_content, "StyleAgent") + assert len(result) == 2 + + def test_parse_output_extracts_code_snippet(self): + """Test that code snippet is extracted from code content.""" + analyzer = Flake8Analyzer() + code_content = "first_line = 1\nsecond_line = 2\nthird_line = 3\n" + output = "2:1:E501:line too long in this file" # was "2:1:E501:test" + result = analyzer._parse_output(output, code_content, "StyleAgent") + assert result[0].code_snippet == "second_line = 2" + + def test_parse_output_sets_agent_name(self): + """Test that agent name is set correctly.""" + analyzer = Flake8Analyzer() + code_content = "x = 1\n" + output = "1:1:E501:line too long error message" # was "1:1:E501:test" + result = analyzer._parse_output(output, code_content, "TestAgent") + assert result[0].agent_name == "TestAgent" + + def test_parse_output_sets_rule_id(self): + """Test that rule_id includes FLAKE8 prefix.""" + analyzer = Flake8Analyzer() + code_content = "x = 1\n" + output = "1:1:E501:line too long error message" # was "1:1:E501:test" + result = analyzer._parse_output(output, code_content, "StyleAgent") + assert result[0].rule_id == "FLAKE8_E501" + + def test_parse_output_sets_issue_type(self): + """Test that issue_type is set to style/pep8.""" + analyzer = Flake8Analyzer() + code_content = "x = 1\n" + output = "1:1:E501:line too long error message" # was "1:1:E501:test" + result = analyzer._parse_output(output, code_content, "StyleAgent") + assert result[0].issue_type == "style/pep8" + + +class TestFlake8AnalyzerAnalyze: + """Tests for analyze method.""" + + def test_analyze_with_no_issues(self): + """Test analysis of clean code.""" + analyzer = Flake8Analyzer() + code = "x = 1\n" + + with patch("subprocess.run") as mock_run: + mock_run.return_value = MagicMock(stdout="", stderr="", returncode=0) + result = analyzer.analyze(code) + assert result == [] + + def test_analyze_returns_findings(self): + """Test that analyze returns findings for code with issues.""" + analyzer = Flake8Analyzer() + code = "import os\nx = 1\n" + + flake8_output = "1:1:F401:'os' imported but unused" + + with patch("subprocess.run") as mock_run: + mock_run.return_value = MagicMock(stdout=flake8_output, stderr="", returncode=1) + result = analyzer.analyze(code) + assert len(result) == 1 + assert "'os' imported but unused" in result[0].message + + def test_analyze_handles_file_not_found(self): + """Test that FileNotFoundError (flake8 not installed) is handled.""" + analyzer = Flake8Analyzer() + + with patch("subprocess.run") as mock_run: + mock_run.side_effect = FileNotFoundError("flake8 not found") + result = analyzer.analyze("some code") + assert result == [] + + def test_analyze_handles_generic_exception(self): + """Test that generic exceptions are handled gracefully.""" + analyzer = Flake8Analyzer() + + with patch("subprocess.run") as mock_run: + mock_run.side_effect = Exception("Unexpected error") + result = analyzer.analyze("some code") + assert result == [] + + def test_analyze_cleans_up_temp_file(self): + """Test that temporary file is cleaned up after analysis.""" + analyzer = Flake8Analyzer() + + with patch("subprocess.run") as mock_run: + mock_run.return_value = MagicMock(stdout="", stderr="", returncode=0) + with patch("os.path.exists", return_value=True): + with patch("os.remove") as mock_remove: + analyzer.analyze("x = 1") + mock_remove.assert_called() + + def test_analyze_with_agent_name(self): + """Test analyze with custom agent name.""" + analyzer = Flake8Analyzer() + code = "import os\n" + + flake8_output = "1:1:F401:unused" + + with patch("subprocess.run") as mock_run: + mock_run.return_value = MagicMock(stdout=flake8_output, stderr="", returncode=1) + result = analyzer.analyze(code, agent_name="CustomAgent") + assert len(result) == 1 + assert result[0].agent_name == "CustomAgent" + + def test_analyze_default_agent_name(self): + """Test analyze uses default agent name.""" + analyzer = Flake8Analyzer() + code = "import os\n" + + flake8_output = "1:1:F401:unused" + + with patch("subprocess.run") as mock_run: + mock_run.return_value = MagicMock(stdout=flake8_output, stderr="", returncode=1) + result = analyzer.analyze(code) + assert len(result) == 1 + assert result[0].agent_name == "StyleAgent" + + +class TestFlake8AnalyzerIssueTypes: + """Tests for issue type categorization by error code.""" + + def test_e1_indentation_error(self): + """Test E1xx indentation errors are parsed.""" + analyzer = Flake8Analyzer() + code = "x = 1\n" + output = "1:1:E101:indentation contains mixed spaces and tabs" + result = analyzer._parse_output(output, code, "StyleAgent") + assert len(result) == 1 + assert result[0].severity == Severity.MEDIUM + + def test_e2_whitespace_error(self): + """Test E2xx whitespace errors are parsed.""" + analyzer = Flake8Analyzer() + code = "x=1\n" + output = "1:2:E225:missing whitespace around operator" + result = analyzer._parse_output(output, code, "StyleAgent") + assert len(result) == 1 + assert result[0].severity == Severity.MEDIUM + + def test_e3_blank_line_error(self): + """Test E3xx blank line errors are parsed.""" + analyzer = Flake8Analyzer() + code = "def foo():\n pass\n" + output = "1:1:E302:expected 2 blank lines, found 1" + result = analyzer._parse_output(output, code, "StyleAgent") + assert len(result) == 1 + assert result[0].severity == Severity.MEDIUM + + def test_e5_line_length_error(self): + """Test E5xx line length errors are parsed.""" + analyzer = Flake8Analyzer() + code = "x = 1\n" + output = "1:80:E501:line too long (120 > 79 characters)" + result = analyzer._parse_output(output, code, "StyleAgent") + assert len(result) == 1 + assert result[0].severity == Severity.MEDIUM + + def test_e7_statement_error(self): + """Test E7xx statement errors are parsed.""" + analyzer = Flake8Analyzer() + code = "if x == None: pass\n" + output = "1:6:E711:comparison to None" + result = analyzer._parse_output(output, code, "StyleAgent") + assert len(result) == 1 + assert result[0].severity == Severity.MEDIUM + + def test_f4_import_error(self): + """Test F4xx import errors are parsed.""" + analyzer = Flake8Analyzer() + code = "import os\n" + output = "1:1:F401:'os' imported but unused" + result = analyzer._parse_output(output, code, "StyleAgent") + assert len(result) == 1 + assert result[0].severity == Severity.HIGH + + def test_f8_name_error(self): + """Test F8xx name errors are parsed.""" + analyzer = Flake8Analyzer() + code = "print(foo)\n" + output = "1:7:F821:undefined name 'foo'" + result = analyzer._parse_output(output, code, "StyleAgent") + assert len(result) == 1 + assert result[0].severity == Severity.HIGH + + def test_w_warning(self): + """Test W warnings are parsed.""" + analyzer = Flake8Analyzer() + code = "x = 1 \n" + output = "1:6:W291:trailing whitespace" + result = analyzer._parse_output(output, code, "StyleAgent") + assert len(result) == 1 + assert result[0].severity == Severity.LOW + + def test_c9_complexity(self): + """Test C9xx complexity warnings are parsed.""" + analyzer = Flake8Analyzer() + code = "def complex(): pass\n" + output = "1:1:C901:'complex' is too complex (15)" + result = analyzer._parse_output(output, code, "StyleAgent") + assert len(result) == 1 + assert result[0].severity == Severity.MEDIUM + + +class TestFlake8AnalyzerEdgeCases: + """Tests for edge cases and error handling.""" + + def test_analyze_empty_code(self): + """Test analyzing empty code.""" + analyzer = Flake8Analyzer() + with patch("subprocess.run") as mock_run: + mock_run.return_value = MagicMock(stdout="", stderr="", returncode=0) + result = analyzer.analyze("") + assert result == [] + + def test_parse_output_with_special_characters(self): + """Test parsing output with special characters in message.""" + analyzer = Flake8Analyzer() + code = "x = 1\n" + output = "1:1:E501:line too long (contains 'quotes' and \"double quotes\")" + result = analyzer._parse_output(output, code, "StyleAgent") + assert len(result) == 1 + + def test_analyze_unicode_code(self): + """Test analyzing code with unicode characters.""" + analyzer = Flake8Analyzer() + code = '# -*- coding: utf-8 -*-\n"""Módulo con caracteres especiales: áéíóú."""\n' + with patch("subprocess.run") as mock_run: + mock_run.return_value = MagicMock(stdout="", stderr="", returncode=0) + result = analyzer.analyze(code) + assert isinstance(result, list) + + def test_finding_has_all_required_fields(self): + """Test that findings have all required fields.""" + analyzer = Flake8Analyzer() + code = "import os\n" + + flake8_output = "1:1:F401:'os' imported but unused" + + with patch("subprocess.run") as mock_run: + mock_run.return_value = MagicMock(stdout=flake8_output, stderr="", returncode=1) + result = analyzer.analyze(code) + assert len(result) == 1 + finding = result[0] + + # Check all Finding fields + assert finding.severity is not None + assert finding.issue_type is not None + assert finding.message is not None + assert finding.line_number is not None + assert finding.agent_name is not None + assert finding.rule_id is not None diff --git a/backend/tests/unit/agents/analyzers/test_pylint_analyzer.py b/backend/tests/unit/agents/analyzers/test_pylint_analyzer.py index 3ba3b90..c13356b 100644 --- a/backend/tests/unit/agents/analyzers/test_pylint_analyzer.py +++ b/backend/tests/unit/agents/analyzers/test_pylint_analyzer.py @@ -1,281 +1,281 @@ -""" -Unit tests for PylintAnalyzer. - -Tests cover: -- Initialization -- Severity mapping -- Output parsing -- Analysis execution -""" - -import subprocess -from unittest.mock import MagicMock, patch - -import pytest - -from src.agents.analyzers.pylint_analyzer import PylintAnalyzer -from src.schemas.finding import Severity - - -class TestPylintAnalyzerInitialization: - """Tests for PylintAnalyzer initialization.""" - - def test_init_creates_instance(self): - """Test that PylintAnalyzer can be instantiated.""" - analyzer = PylintAnalyzer() - assert analyzer is not None - - def test_init_sets_cmd_template(self): - """Test that command template is set.""" - analyzer = PylintAnalyzer() - assert hasattr(analyzer, "_cmd_template") - assert isinstance(analyzer._cmd_template, list) - assert "pylint" in str(analyzer._cmd_template) - - -class TestPylintAnalyzerMapSeverity: - """Tests for severity mapping.""" - - def test_map_severity_error_returns_high(self): - """Test that 'E' (error) maps to HIGH severity.""" - result = PylintAnalyzer._map_severity("E0001") - assert result == Severity.HIGH - - def test_map_severity_fatal_returns_high(self): - """Test that 'F' (fatal) maps to HIGH severity.""" - result = PylintAnalyzer._map_severity("F0001") - assert result == Severity.HIGH - - def test_map_severity_warning_returns_medium(self): - """Test that 'W' (warning) maps to MEDIUM severity.""" - result = PylintAnalyzer._map_severity("W0612") - assert result == Severity.MEDIUM - - def test_map_severity_convention_returns_low(self): - """Test that 'C' (convention) maps to LOW severity.""" - result = PylintAnalyzer._map_severity("C0114") - assert result == Severity.LOW - - def test_map_severity_refactor_returns_low(self): - """Test that 'R' (refactor) maps to LOW severity.""" - result = PylintAnalyzer._map_severity("R0903") - assert result == Severity.LOW - - def test_map_severity_information_returns_low(self): - """Test that 'I' (information) maps to LOW severity.""" - result = PylintAnalyzer._map_severity("I0001") - assert result == Severity.LOW - - def test_map_severity_unknown_returns_low(self): - """Test that unknown types default to LOW severity.""" - assert PylintAnalyzer._map_severity("X9999") == Severity.LOW - assert PylintAnalyzer._map_severity("") == Severity.LOW - - def test_map_severity_lowercase_works(self): - """Test that lowercase prefixes work.""" - assert PylintAnalyzer._map_severity("e0001") == Severity.HIGH - assert PylintAnalyzer._map_severity("w0001") == Severity.MEDIUM - - -class TestPylintAnalyzerParseOutput: - """Tests for output parsing.""" - - def test_parse_output_empty_returns_empty_list(self): - """Test parsing empty output.""" - analyzer = PylintAnalyzer() - result = analyzer._parse_output("", "x = 1", "StyleAgent") - assert result == [] - - def test_parse_output_valid_format(self): - """Test parsing valid pylint text output.""" - analyzer = PylintAnalyzer() - code_content = "x = 1\ny = 2\n" - # Pylint format: {line}:{column}:{msg_id}:{msg} - output = "1:0:C0114:Missing module docstring" - result = analyzer._parse_output(output, code_content, "StyleAgent") - assert len(result) == 1 - assert result[0].line_number == 1 - assert result[0].severity == Severity.LOW - assert "Missing module docstring" in result[0].message - - def test_parse_output_multiple_issues(self): - """Test parsing multiple issues.""" - analyzer = PylintAnalyzer() - code_content = "x = 1\ny = 2\nz = 3\n" - output = """1:0:E0001:Syntax error -2:0:W0612:Unused variable 'y' -3:0:C0103:Invalid name 'z'""" - result = analyzer._parse_output(output, code_content, "StyleAgent") - assert len(result) == 3 - assert result[0].severity == Severity.HIGH # E -> HIGH - assert result[1].severity == Severity.MEDIUM # W -> MEDIUM - assert result[2].severity == Severity.LOW # C -> LOW - - def test_parse_output_invalid_format_skipped(self): - """Test that invalid format lines are skipped.""" - analyzer = PylintAnalyzer() - code_content = "x = 1\n" - output = """1:0:C0114:Missing module docstring -not a valid line -another invalid line""" - result = analyzer._parse_output(output, code_content, "StyleAgent") - assert len(result) == 1 - - def test_parse_output_preserves_line_numbers(self): - """Test that line numbers are correctly preserved.""" - analyzer = PylintAnalyzer() - code_content = "\n" * 50 + "x = 1\n" - output = "42:0:C0114:Test message" - result = analyzer._parse_output(output, code_content, "StyleAgent") - assert result[0].line_number == 42 - - def test_parse_output_extracts_code_snippet(self): - """Test that code snippet is extracted from code content.""" - analyzer = PylintAnalyzer() - code_content = "first_line = 1\nsecond_line = 2\nthird_line = 3\n" - output = "2:0:C0114:Test message" - result = analyzer._parse_output(output, code_content, "StyleAgent") - assert result[0].code_snippet == "second_line = 2" - - def test_parse_output_sets_agent_name(self): - """Test that agent name is set correctly.""" - analyzer = PylintAnalyzer() - code_content = "x = 1\n" - output = "1:0:C0114:Test message" - result = analyzer._parse_output(output, code_content, "TestAgent") - assert result[0].agent_name == "TestAgent" - - def test_parse_output_sets_rule_id(self): - """Test that rule_id includes PYLINT prefix.""" - analyzer = PylintAnalyzer() - code_content = "x = 1\n" - output = "1:0:C0114:Test message" - result = analyzer._parse_output(output, code_content, "StyleAgent") - assert result[0].rule_id == "PYLINT_C0114" - - def test_parse_output_sets_issue_type(self): - """Test that issue_type is set to style/pep8.""" - analyzer = PylintAnalyzer() - code_content = "x = 1\n" - output = "1:0:C0114:Test message" - result = analyzer._parse_output(output, code_content, "StyleAgent") - assert result[0].issue_type == "style/pep8" - - -class TestPylintAnalyzerAnalyze: - """Tests for analyze method.""" - - def test_analyze_with_no_issues(self): - """Test analysis of clean code.""" - analyzer = PylintAnalyzer() - code = "x = 1\n" - - with patch("subprocess.run") as mock_run: - mock_run.return_value = MagicMock(stdout="", stderr="", returncode=0) - result = analyzer.analyze(code) - assert result == [] - - def test_analyze_returns_findings(self): - """Test that analyze returns findings for code with issues.""" - analyzer = PylintAnalyzer() - code = "x = 1\n" - - pylint_output = "1:0:C0114:Missing module docstring" - - with patch("subprocess.run") as mock_run: - mock_run.return_value = MagicMock(stdout=pylint_output, stderr="", returncode=4) - result = analyzer.analyze(code) - assert len(result) == 1 - assert result[0].message == "Missing module docstring" - - def test_analyze_handles_file_not_found(self): - """Test that FileNotFoundError (pylint not installed) is handled.""" - analyzer = PylintAnalyzer() - - with patch("subprocess.run") as mock_run: - mock_run.side_effect = FileNotFoundError("pylint not found") - result = analyzer.analyze("some code") - assert result == [] - - def test_analyze_handles_generic_exception(self): - """Test that generic exceptions are handled gracefully.""" - analyzer = PylintAnalyzer() - - with patch("subprocess.run") as mock_run: - mock_run.side_effect = Exception("Unexpected error") - result = analyzer.analyze("some code") - assert result == [] - - def test_analyze_cleans_up_temp_file(self): - """Test that temporary file is cleaned up after analysis.""" - analyzer = PylintAnalyzer() - - with patch("subprocess.run") as mock_run: - mock_run.return_value = MagicMock(stdout="", stderr="", returncode=0) - with patch("os.path.exists", return_value=True): - with patch("os.remove") as mock_remove: - analyzer.analyze("x = 1") - # os.remove should be called to clean up temp file - mock_remove.assert_called() - - def test_analyze_with_agent_name(self): - """Test analyze with custom agent name.""" - analyzer = PylintAnalyzer() - code = "x = 1\n" - - pylint_output = "1:0:C0114:Missing module docstring" - - with patch.object(analyzer, "_cmd_template", []): - with patch("src.agents.analyzers.pylint_analyzer.subprocess.run") as mock_run: - mock_run.return_value = MagicMock(stdout=pylint_output, stderr="", returncode=4) - result = analyzer.analyze(code, agent_name="CustomAgent") - assert len(result) == 1 - assert result[0].agent_name == "CustomAgent" - - def test_analyze_default_agent_name(self): - """Test analyze uses default agent name.""" - analyzer = PylintAnalyzer() - code = "x = 1\n" - - pylint_output = "1:0:C0114:Missing module docstring" - - with patch.object(analyzer, "_cmd_template", []): - with patch("src.agents.analyzers.pylint_analyzer.subprocess.run") as mock_run: - mock_run.return_value = MagicMock(stdout=pylint_output, stderr="", returncode=4) - result = analyzer.analyze(code) - assert len(result) == 1 - assert result[0].agent_name == "StyleAgent" - - -class TestPylintAnalyzerIntegration: - """Integration-like tests for end-to-end behavior.""" - - def test_finding_has_all_required_fields(self): - """Test that findings have all required fields.""" - analyzer = PylintAnalyzer() - code = "x = 1\n" - - pylint_output = "1:0:C0114:Missing module docstring" - - with patch("subprocess.run") as mock_run: - mock_run.return_value = MagicMock(stdout=pylint_output, stderr="", returncode=4) - result = analyzer.analyze(code) - assert len(result) == 1 - finding = result[0] - - # Check all Finding fields - assert finding.severity is not None - assert finding.issue_type is not None - assert finding.message is not None - assert finding.line_number is not None - assert finding.agent_name is not None - assert finding.rule_id is not None - - def test_empty_code_returns_empty_list(self): - """Test analyzing empty code.""" - analyzer = PylintAnalyzer() - - with patch("subprocess.run") as mock_run: - mock_run.return_value = MagicMock(stdout="", stderr="", returncode=0) - result = analyzer.analyze("") - assert result == [] +""" +Unit tests for PylintAnalyzer. + +Tests cover: +- Initialization +- Severity mapping +- Output parsing +- Analysis execution +""" + +import subprocess +from unittest.mock import MagicMock, patch + +import pytest + +from src.agents.analyzers.pylint_analyzer import PylintAnalyzer +from src.schemas.finding import Severity + + +class TestPylintAnalyzerInitialization: + """Tests for PylintAnalyzer initialization.""" + + def test_init_creates_instance(self): + """Test that PylintAnalyzer can be instantiated.""" + analyzer = PylintAnalyzer() + assert analyzer is not None + + def test_init_sets_cmd_template(self): + """Test that command template is set.""" + analyzer = PylintAnalyzer() + assert hasattr(analyzer, "_cmd_template") + assert isinstance(analyzer._cmd_template, list) + assert "pylint" in str(analyzer._cmd_template) + + +class TestPylintAnalyzerMapSeverity: + """Tests for severity mapping.""" + + def test_map_severity_error_returns_high(self): + """Test that 'E' (error) maps to HIGH severity.""" + result = PylintAnalyzer._map_severity("E0001") + assert result == Severity.HIGH + + def test_map_severity_fatal_returns_high(self): + """Test that 'F' (fatal) maps to HIGH severity.""" + result = PylintAnalyzer._map_severity("F0001") + assert result == Severity.HIGH + + def test_map_severity_warning_returns_medium(self): + """Test that 'W' (warning) maps to MEDIUM severity.""" + result = PylintAnalyzer._map_severity("W0612") + assert result == Severity.MEDIUM + + def test_map_severity_convention_returns_low(self): + """Test that 'C' (convention) maps to LOW severity.""" + result = PylintAnalyzer._map_severity("C0114") + assert result == Severity.LOW + + def test_map_severity_refactor_returns_low(self): + """Test that 'R' (refactor) maps to LOW severity.""" + result = PylintAnalyzer._map_severity("R0903") + assert result == Severity.LOW + + def test_map_severity_information_returns_low(self): + """Test that 'I' (information) maps to LOW severity.""" + result = PylintAnalyzer._map_severity("I0001") + assert result == Severity.LOW + + def test_map_severity_unknown_returns_low(self): + """Test that unknown types default to LOW severity.""" + assert PylintAnalyzer._map_severity("X9999") == Severity.LOW + assert PylintAnalyzer._map_severity("") == Severity.LOW + + def test_map_severity_lowercase_works(self): + """Test that lowercase prefixes work.""" + assert PylintAnalyzer._map_severity("e0001") == Severity.HIGH + assert PylintAnalyzer._map_severity("w0001") == Severity.MEDIUM + + +class TestPylintAnalyzerParseOutput: + """Tests for output parsing.""" + + def test_parse_output_empty_returns_empty_list(self): + """Test parsing empty output.""" + analyzer = PylintAnalyzer() + result = analyzer._parse_output("", "x = 1", "StyleAgent") + assert result == [] + + def test_parse_output_valid_format(self): + """Test parsing valid pylint text output.""" + analyzer = PylintAnalyzer() + code_content = "x = 1\ny = 2\n" + # Pylint format: {line}:{column}:{msg_id}:{msg} + output = "1:0:C0114:Missing module docstring" + result = analyzer._parse_output(output, code_content, "StyleAgent") + assert len(result) == 1 + assert result[0].line_number == 1 + assert result[0].severity == Severity.LOW + assert "Missing module docstring" in result[0].message + + def test_parse_output_multiple_issues(self): + """Test parsing multiple issues.""" + analyzer = PylintAnalyzer() + code_content = "x = 1\ny = 2\nz = 3\n" + output = """1:0:E0001:Syntax error +2:0:W0612:Unused variable 'y' +3:0:C0103:Invalid name 'z'""" + result = analyzer._parse_output(output, code_content, "StyleAgent") + assert len(result) == 3 + assert result[0].severity == Severity.HIGH # E -> HIGH + assert result[1].severity == Severity.MEDIUM # W -> MEDIUM + assert result[2].severity == Severity.LOW # C -> LOW + + def test_parse_output_invalid_format_skipped(self): + """Test that invalid format lines are skipped.""" + analyzer = PylintAnalyzer() + code_content = "x = 1\n" + output = """1:0:C0114:Missing module docstring +not a valid line +another invalid line""" + result = analyzer._parse_output(output, code_content, "StyleAgent") + assert len(result) == 1 + + def test_parse_output_preserves_line_numbers(self): + """Test that line numbers are correctly preserved.""" + analyzer = PylintAnalyzer() + code_content = "\n" * 50 + "x = 1\n" + output = "42:0:C0114:Test message" + result = analyzer._parse_output(output, code_content, "StyleAgent") + assert result[0].line_number == 42 + + def test_parse_output_extracts_code_snippet(self): + """Test that code snippet is extracted from code content.""" + analyzer = PylintAnalyzer() + code_content = "first_line = 1\nsecond_line = 2\nthird_line = 3\n" + output = "2:0:C0114:Test message" + result = analyzer._parse_output(output, code_content, "StyleAgent") + assert result[0].code_snippet == "second_line = 2" + + def test_parse_output_sets_agent_name(self): + """Test that agent name is set correctly.""" + analyzer = PylintAnalyzer() + code_content = "x = 1\n" + output = "1:0:C0114:Test message" + result = analyzer._parse_output(output, code_content, "TestAgent") + assert result[0].agent_name == "TestAgent" + + def test_parse_output_sets_rule_id(self): + """Test that rule_id includes PYLINT prefix.""" + analyzer = PylintAnalyzer() + code_content = "x = 1\n" + output = "1:0:C0114:Test message" + result = analyzer._parse_output(output, code_content, "StyleAgent") + assert result[0].rule_id == "PYLINT_C0114" + + def test_parse_output_sets_issue_type(self): + """Test that issue_type is set to style/pep8.""" + analyzer = PylintAnalyzer() + code_content = "x = 1\n" + output = "1:0:C0114:Test message" + result = analyzer._parse_output(output, code_content, "StyleAgent") + assert result[0].issue_type == "style/pep8" + + +class TestPylintAnalyzerAnalyze: + """Tests for analyze method.""" + + def test_analyze_with_no_issues(self): + """Test analysis of clean code.""" + analyzer = PylintAnalyzer() + code = "x = 1\n" + + with patch("subprocess.run") as mock_run: + mock_run.return_value = MagicMock(stdout="", stderr="", returncode=0) + result = analyzer.analyze(code) + assert result == [] + + def test_analyze_returns_findings(self): + """Test that analyze returns findings for code with issues.""" + analyzer = PylintAnalyzer() + code = "x = 1\n" + + pylint_output = "1:0:C0114:Missing module docstring" + + with patch("subprocess.run") as mock_run: + mock_run.return_value = MagicMock(stdout=pylint_output, stderr="", returncode=4) + result = analyzer.analyze(code) + assert len(result) == 1 + assert result[0].message == "Missing module docstring" + + def test_analyze_handles_file_not_found(self): + """Test that FileNotFoundError (pylint not installed) is handled.""" + analyzer = PylintAnalyzer() + + with patch("subprocess.run") as mock_run: + mock_run.side_effect = FileNotFoundError("pylint not found") + result = analyzer.analyze("some code") + assert result == [] + + def test_analyze_handles_generic_exception(self): + """Test that generic exceptions are handled gracefully.""" + analyzer = PylintAnalyzer() + + with patch("subprocess.run") as mock_run: + mock_run.side_effect = Exception("Unexpected error") + result = analyzer.analyze("some code") + assert result == [] + + def test_analyze_cleans_up_temp_file(self): + """Test that temporary file is cleaned up after analysis.""" + analyzer = PylintAnalyzer() + + with patch("subprocess.run") as mock_run: + mock_run.return_value = MagicMock(stdout="", stderr="", returncode=0) + with patch("os.path.exists", return_value=True): + with patch("os.remove") as mock_remove: + analyzer.analyze("x = 1") + # os.remove should be called to clean up temp file + mock_remove.assert_called() + + def test_analyze_with_agent_name(self): + """Test analyze with custom agent name.""" + analyzer = PylintAnalyzer() + code = "x = 1\n" + + pylint_output = "1:0:C0114:Missing module docstring" + + with patch.object(analyzer, "_cmd_template", []): + with patch("src.agents.analyzers.pylint_analyzer.subprocess.run") as mock_run: + mock_run.return_value = MagicMock(stdout=pylint_output, stderr="", returncode=4) + result = analyzer.analyze(code, agent_name="CustomAgent") + assert len(result) == 1 + assert result[0].agent_name == "CustomAgent" + + def test_analyze_default_agent_name(self): + """Test analyze uses default agent name.""" + analyzer = PylintAnalyzer() + code = "x = 1\n" + + pylint_output = "1:0:C0114:Missing module docstring" + + with patch.object(analyzer, "_cmd_template", []): + with patch("src.agents.analyzers.pylint_analyzer.subprocess.run") as mock_run: + mock_run.return_value = MagicMock(stdout=pylint_output, stderr="", returncode=4) + result = analyzer.analyze(code) + assert len(result) == 1 + assert result[0].agent_name == "StyleAgent" + + +class TestPylintAnalyzerIntegration: + """Integration-like tests for end-to-end behavior.""" + + def test_finding_has_all_required_fields(self): + """Test that findings have all required fields.""" + analyzer = PylintAnalyzer() + code = "x = 1\n" + + pylint_output = "1:0:C0114:Missing module docstring" + + with patch("subprocess.run") as mock_run: + mock_run.return_value = MagicMock(stdout=pylint_output, stderr="", returncode=4) + result = analyzer.analyze(code) + assert len(result) == 1 + finding = result[0] + + # Check all Finding fields + assert finding.severity is not None + assert finding.issue_type is not None + assert finding.message is not None + assert finding.line_number is not None + assert finding.agent_name is not None + assert finding.rule_id is not None + + def test_empty_code_returns_empty_list(self): + """Test analyzing empty code.""" + analyzer = PylintAnalyzer() + + with patch("subprocess.run") as mock_run: + mock_run.return_value = MagicMock(stdout="", stderr="", returncode=0) + result = analyzer.analyze("") + assert result == [] diff --git a/backend/tests/unit/agents/test_base_agent.py b/backend/tests/unit/agents/test_base_agent.py index 0253b65..7c5ddc3 100644 --- a/backend/tests/unit/agents/test_base_agent.py +++ b/backend/tests/unit/agents/test_base_agent.py @@ -1,243 +1,243 @@ -""" -Unit tests for BaseAgent abstract class -Tests para la clase base BaseAgent -""" - -from typing import List -from unittest.mock import Mock - -import pytest - -from src.agents.base_agent import BaseAgent -from src.schemas.analysis import AnalysisContext -from src.schemas.finding import Finding, Severity - - -class DummyAgent(BaseAgent): - """ - Agente dummy para testing. - - Implementación concreta de BaseAgent para fines de testing. - """ - - def __init__(self): - super().__init__(name="DummyAgent", version="1.0.0", category="test") - - def analyze(self, context: AnalysisContext) -> List[Finding]: - """Implementación dummy que retorna un finding de prueba.""" - return [ - Finding( - severity=Severity.INFO, - issue_type="test", - message="Test finding", - line_number=1, - agent_name=self.name, - ) - ] - - -class TestBaseAgentInitialization: - """Tests para inicialización del agente.""" - - def test_create_agent_with_defaults(self): - """Test crear agente con valores por defecto.""" - agent = DummyAgent() - - assert agent.name == "DummyAgent" - assert agent.version == "1.0.0" - assert agent.category == "test" - assert agent.enabled is True - - def test_agent_name_required(self): - """Test que el nombre es requerido.""" - with pytest.raises(ValueError, match="name cannot be empty"): - - class BadAgent(BaseAgent): - def __init__(self): - super().__init__(name="") - - def analyze(self, context): - pass - - BadAgent() - - def test_agent_info_dict(self): - """Test que get_info retorna diccionario correcto.""" - agent = DummyAgent() - info = agent.get_info() - - assert isinstance(info, dict) - assert info["name"] == "DummyAgent" - assert info["version"] == "1.0.0" - assert info["category"] == "test" - assert info["enabled"] is True - - -class TestBaseAgentMethods: - """Tests para métodos del agente.""" - - def test_is_enabled_when_enabled(self): - """Test is_enabled cuando está habilitado.""" - agent = DummyAgent() - assert agent.is_enabled() is True - - def test_is_enabled_when_disabled(self): - """Test is_enabled cuando está deshabilitado.""" - agent = DummyAgent() - agent.disable() - assert agent.is_enabled() is False - - def test_enable_agent(self): - """Test habilitar un agente.""" - agent = DummyAgent() - agent.disable() - assert agent.enabled is False - - agent.enable() - assert agent.enabled is True - - def test_disable_agent(self): - """Test deshabilitar un agente.""" - agent = DummyAgent() - assert agent.enabled is True - - agent.disable() - assert agent.enabled is False - - -class TestBaseAgentAnalyze: - """Tests para el método analyze.""" - - def test_analyze_returns_findings(self): - """Test que analyze retorna lista de findings.""" - agent = DummyAgent() - context = AnalysisContext(code_content="print('hello')", filename="test.py") - - findings = agent.analyze(context) - - assert isinstance(findings, list) - assert len(findings) >= 1 - assert findings[0].agent_name == "DummyAgent" - assert findings[0].severity == Severity.INFO - - def test_abstract_method_not_callable(self): - """Test que no se puede instanciar BaseAgent directamente.""" - with pytest.raises(TypeError): - BaseAgent(name="TestAgent") - - -class TestBaseAgentRepr: - """Tests para representación string.""" - - def test_repr_contains_name_and_version(self): - """Test que __repr__ contiene nombre y versión.""" - agent = DummyAgent() - repr_str = repr(agent) - - assert "DummyAgent" in repr_str - assert "1.0.0" in repr_str - assert "test" in repr_str - - def test_str_representation(self): - """Test que __str__ es legible.""" - agent = DummyAgent() - str_repr = str(agent) - - assert "DummyAgent" in str_repr - assert "1.0.0" in str_repr - assert "test" in str_repr - assert "enabled" in str_repr.lower() - - -class TestBaseAgentEvents: - """Tests para emisión de eventos.""" - - def test_emit_agent_started(self): - """Test que _emit_agent_started publica evento.""" - event_bus_mock = Mock() - agent = DummyAgent() - agent.event_bus = event_bus_mock - - context = AnalysisContext(code_content="code", filename="test.py") - - agent._emit_agent_started(context) - - event_bus_mock.publish.assert_called_once() - # publish recibe (event_type, data) - event_type = event_bus_mock.publish.call_args[0][0] - data = event_bus_mock.publish.call_args[0][1] - assert event_type == "AGENT_STARTED" - assert data["agent_name"] == "DummyAgent" - - def test_emit_agent_completed(self): - """Test que _emit_agent_completed publica evento.""" - event_bus_mock = Mock() - agent = DummyAgent() - agent.event_bus = event_bus_mock - - context = AnalysisContext(code_content="code", filename="test.py") - findings = [ - Finding( - severity=Severity.INFO, - issue_type="test", - message="Test finding message", - line_number=1, - agent_name="DummyAgent", - ) - ] - - agent._emit_agent_completed(context, findings) - - event_bus_mock.publish.assert_called_once() - # publish recibe (event_type, data) - event_type = event_bus_mock.publish.call_args[0][0] - data = event_bus_mock.publish.call_args[0][1] - assert event_type == "AGENT_COMPLETED" - assert data["findings_count"] == 1 - - def test_emit_agent_failed(self): - """Test que _emit_agent_failed publica evento.""" - event_bus_mock = Mock() - agent = DummyAgent() - agent.event_bus = event_bus_mock - context = AnalysisContext(code_content="code", filename="test.py") - - error = RuntimeError("boom") - agent._emit_agent_failed(context, error) - - event_bus_mock.publish.assert_called_once() - # publish recibe (event_type, data) - event_type = event_bus_mock.publish.call_args[0][0] - data = event_bus_mock.publish.call_args[0][1] - assert event_type == "AGENT_FAILED" - assert "boom" in data["error"] - - def test_no_events_when_event_bus_none(self): - """Test que no falla si event_bus es None.""" - agent = DummyAgent() - agent.event_bus = None - - context = AnalysisContext(code_content="code", filename="test.py") - - # No debe lanzar excepción - agent._emit_agent_started(context) - agent._emit_agent_completed(context, []) - - -class TestBaseAgentLogging: - """Tests para el logging del agente.""" - - def test_log_helpers_delegate_to_logger(self): - """Test que los helpers de log delegan en el logger.""" - agent = DummyAgent() - agent.logger = Mock() - - agent.log_info("info") - agent.log_warning("warn") - agent.log_error("err") - agent.log_debug("dbg") - - agent.logger.info.assert_called_once_with("[%s] %s", "DummyAgent", "info") - agent.logger.warning.assert_called_once_with("[%s] %s", "DummyAgent", "warn") - agent.logger.error.assert_called_once_with("[%s] %s", "DummyAgent", "err") - agent.logger.debug.assert_called_once_with("[%s] %s", "DummyAgent", "dbg") +""" +Unit tests for BaseAgent abstract class +Tests para la clase base BaseAgent +""" + +from typing import List +from unittest.mock import Mock + +import pytest + +from src.agents.base_agent import BaseAgent +from src.schemas.analysis import AnalysisContext +from src.schemas.finding import Finding, Severity + + +class DummyAgent(BaseAgent): + """ + Agente dummy para testing. + + Implementación concreta de BaseAgent para fines de testing. + """ + + def __init__(self): + super().__init__(name="DummyAgent", version="1.0.0", category="test") + + def analyze(self, context: AnalysisContext) -> List[Finding]: + """Implementación dummy que retorna un finding de prueba.""" + return [ + Finding( + severity=Severity.INFO, + issue_type="test", + message="Test finding", + line_number=1, + agent_name=self.name, + ) + ] + + +class TestBaseAgentInitialization: + """Tests para inicialización del agente.""" + + def test_create_agent_with_defaults(self): + """Test crear agente con valores por defecto.""" + agent = DummyAgent() + + assert agent.name == "DummyAgent" + assert agent.version == "1.0.0" + assert agent.category == "test" + assert agent.enabled is True + + def test_agent_name_required(self): + """Test que el nombre es requerido.""" + with pytest.raises(ValueError, match="name cannot be empty"): + + class BadAgent(BaseAgent): + def __init__(self): + super().__init__(name="") + + def analyze(self, context): + pass + + BadAgent() + + def test_agent_info_dict(self): + """Test que get_info retorna diccionario correcto.""" + agent = DummyAgent() + info = agent.get_info() + + assert isinstance(info, dict) + assert info["name"] == "DummyAgent" + assert info["version"] == "1.0.0" + assert info["category"] == "test" + assert info["enabled"] is True + + +class TestBaseAgentMethods: + """Tests para métodos del agente.""" + + def test_is_enabled_when_enabled(self): + """Test is_enabled cuando está habilitado.""" + agent = DummyAgent() + assert agent.is_enabled() is True + + def test_is_enabled_when_disabled(self): + """Test is_enabled cuando está deshabilitado.""" + agent = DummyAgent() + agent.disable() + assert agent.is_enabled() is False + + def test_enable_agent(self): + """Test habilitar un agente.""" + agent = DummyAgent() + agent.disable() + assert agent.enabled is False + + agent.enable() + assert agent.enabled is True + + def test_disable_agent(self): + """Test deshabilitar un agente.""" + agent = DummyAgent() + assert agent.enabled is True + + agent.disable() + assert agent.enabled is False + + +class TestBaseAgentAnalyze: + """Tests para el método analyze.""" + + def test_analyze_returns_findings(self): + """Test que analyze retorna lista de findings.""" + agent = DummyAgent() + context = AnalysisContext(code_content="print('hello')", filename="test.py") + + findings = agent.analyze(context) + + assert isinstance(findings, list) + assert len(findings) >= 1 + assert findings[0].agent_name == "DummyAgent" + assert findings[0].severity == Severity.INFO + + def test_abstract_method_not_callable(self): + """Test que no se puede instanciar BaseAgent directamente.""" + with pytest.raises(TypeError): + BaseAgent(name="TestAgent") + + +class TestBaseAgentRepr: + """Tests para representación string.""" + + def test_repr_contains_name_and_version(self): + """Test que __repr__ contiene nombre y versión.""" + agent = DummyAgent() + repr_str = repr(agent) + + assert "DummyAgent" in repr_str + assert "1.0.0" in repr_str + assert "test" in repr_str + + def test_str_representation(self): + """Test que __str__ es legible.""" + agent = DummyAgent() + str_repr = str(agent) + + assert "DummyAgent" in str_repr + assert "1.0.0" in str_repr + assert "test" in str_repr + assert "enabled" in str_repr.lower() + + +class TestBaseAgentEvents: + """Tests para emisión de eventos.""" + + def test_emit_agent_started(self): + """Test que _emit_agent_started publica evento.""" + event_bus_mock = Mock() + agent = DummyAgent() + agent.event_bus = event_bus_mock + + context = AnalysisContext(code_content="code", filename="test.py") + + agent._emit_agent_started(context) + + event_bus_mock.publish.assert_called_once() + # publish recibe (event_type, data) + event_type = event_bus_mock.publish.call_args[0][0] + data = event_bus_mock.publish.call_args[0][1] + assert event_type == "AGENT_STARTED" + assert data["agent_name"] == "DummyAgent" + + def test_emit_agent_completed(self): + """Test que _emit_agent_completed publica evento.""" + event_bus_mock = Mock() + agent = DummyAgent() + agent.event_bus = event_bus_mock + + context = AnalysisContext(code_content="code", filename="test.py") + findings = [ + Finding( + severity=Severity.INFO, + issue_type="test", + message="Test finding message", + line_number=1, + agent_name="DummyAgent", + ) + ] + + agent._emit_agent_completed(context, findings) + + event_bus_mock.publish.assert_called_once() + # publish recibe (event_type, data) + event_type = event_bus_mock.publish.call_args[0][0] + data = event_bus_mock.publish.call_args[0][1] + assert event_type == "AGENT_COMPLETED" + assert data["findings_count"] == 1 + + def test_emit_agent_failed(self): + """Test que _emit_agent_failed publica evento.""" + event_bus_mock = Mock() + agent = DummyAgent() + agent.event_bus = event_bus_mock + context = AnalysisContext(code_content="code", filename="test.py") + + error = RuntimeError("boom") + agent._emit_agent_failed(context, error) + + event_bus_mock.publish.assert_called_once() + # publish recibe (event_type, data) + event_type = event_bus_mock.publish.call_args[0][0] + data = event_bus_mock.publish.call_args[0][1] + assert event_type == "AGENT_FAILED" + assert "boom" in data["error"] + + def test_no_events_when_event_bus_none(self): + """Test que no falla si event_bus es None.""" + agent = DummyAgent() + agent.event_bus = None + + context = AnalysisContext(code_content="code", filename="test.py") + + # No debe lanzar excepción + agent._emit_agent_started(context) + agent._emit_agent_completed(context, []) + + +class TestBaseAgentLogging: + """Tests para el logging del agente.""" + + def test_log_helpers_delegate_to_logger(self): + """Test que los helpers de log delegan en el logger.""" + agent = DummyAgent() + agent.logger = Mock() + + agent.log_info("info") + agent.log_warning("warn") + agent.log_error("err") + agent.log_debug("dbg") + + agent.logger.info.assert_called_once_with("[%s] %s", "DummyAgent", "info") + agent.logger.warning.assert_called_once_with("[%s] %s", "DummyAgent", "warn") + agent.logger.error.assert_called_once_with("[%s] %s", "DummyAgent", "err") + agent.logger.debug.assert_called_once_with("[%s] %s", "DummyAgent", "dbg") diff --git a/backend/tests/unit/agents/test_quality_agent.py b/backend/tests/unit/agents/test_quality_agent.py index 9191742..557bc28 100644 --- a/backend/tests/unit/agents/test_quality_agent.py +++ b/backend/tests/unit/agents/test_quality_agent.py @@ -1,180 +1,180 @@ -from unittest.mock import MagicMock, patch - -import pytest - -from src.agents.quality_agent import QualityAgent -from src.schemas.analysis import AnalysisContext -from src.schemas.finding import Finding, Severity - - -class TestQualityAgent: - """Test suite for QualityAgent.""" - - @pytest.fixture - def mock_event_bus(self): - """Mock the EventBus.""" - return MagicMock() - - @pytest.fixture - def agent(self, mock_event_bus): - """Create QualityAgent instance.""" - return QualityAgent(event_bus=mock_event_bus) - - def test_analyze_quality_metrics(self, agent, mock_event_bus): - """Test detection of all quality metrics (Happy Path).""" - code = """ -def complex_function(x): - pass -""" - context = AnalysisContext(code_content=code, filename="quality_test.py") - - # Mock Radon Complexity - with patch("src.agents.quality_agent.radon_visit") as mock_radon_visit, patch( - "src.agents.quality_agent.mi_visit" - ) as mock_mi_visit: - - mock_func = MagicMock() - mock_func.name = "complex_function" - mock_func.complexity = 15 - mock_func.lineno = 2 - mock_radon_visit.return_value = [mock_func] - - # Mock Radon Maintainability - mock_mi_visit.return_value = 40.0 - - findings = agent.analyze(context) - - assert mock_event_bus.publish.called - issue_types = [f.issue_type for f in findings] - assert "quality/cyclomatic-complexity" in issue_types - assert "quality/maintainability-index" in issue_types - - def test_measure_function_length(self, agent): - """Test specifically function length detection.""" - # Create a valid python code with a long function - code = "def long_func():\n" + "\n".join([f" x = {i}" for i in range(105)]) - import ast - - tree = ast.parse(code) - findings = agent.measure_function_length(tree) - assert len(findings) == 1 - assert findings[0].issue_type == "quality/function-length" - assert "105" in findings[0].message or "106" in findings[0].message - - def test_calculate_maintainability_index(self, agent): - """Test MI calculation.""" - with patch("src.agents.quality_agent.mi_visit") as mock_mi: - mock_mi.return_value = 30.0 - score = agent.calculate_maintainability_index("some code") - assert score == 30.0 - - def test_syntax_error_handling(self, agent): - """Test handling of syntax errors in AST parsing.""" - context = AnalysisContext(code_content="def broken_code(", filename="error.py") - findings = agent.analyze(context) - assert len(findings) == 0 - - def test_complexity_thresholds(self, agent): - """Test different complexity thresholds (High/Critical).""" - import ast - - tree = ast.parse("def foo(): pass") - - with patch("src.agents.quality_agent.radon_visit") as mock_radon_visit: - # Case 1: Critical (> 50) - mock_crit = MagicMock() - mock_crit.name = "crit_func" - mock_crit.complexity = 51 - mock_crit.lineno = 1 - - # Case 2: High (> 20) - mock_high = MagicMock() - mock_high.name = "high_func" - mock_high.complexity = 21 - mock_high.lineno = 5 - - mock_radon_visit.return_value = [mock_crit, mock_high] - - findings = agent.calculate_complexity(tree) - - assert len(findings) == 2 - severities = [f.severity for f in findings] - assert Severity.CRITICAL in severities - assert Severity.HIGH in severities - - def test_maintainability_critical(self, agent): - """Test critical maintainability index.""" - # Use valid code so AST parsing succeeds - context = AnalysisContext(code_content="def foo(): pass", filename="test.py") - - with patch("src.agents.quality_agent.mi_visit") as mock_mi, patch( - "src.agents.quality_agent.radon_visit", return_value=[] - ): - - # Mock MI < 20 - mock_mi.return_value = 10.0 - - findings = agent.analyze(context) - mi_finding = next( - (f for f in findings if f.issue_type == "quality/maintainability-index"), None - ) - assert mi_finding is not None - assert mi_finding.severity == Severity.CRITICAL - - def test_short_file_duplication(self, agent): - """Test that short files skip duplication check.""" - code = "print('hello')\n" * 2 - findings = agent.detect_code_duplication(code) - assert len(findings) == 0 - - def test_code_duplication_detected(self, agent): - """Test detection of duplicated code blocks.""" - # Create code with duplication - # Block size is 4 lines. We need a block of 4 lines repeated. - block = "x = 1\ny = 2\nz = 3\nw = 4\n" - code = block + "a = 0\n" + block - - findings = agent.detect_code_duplication(code) - assert len(findings) > 0 - assert findings[0].issue_type == "quality/duplication" - assert "Bloque de código duplicado" in findings[0].message - - def test_radon_not_installed(self, agent): - """Test behavior when radon is not installed.""" - with patch("src.agents.quality_agent.radon_visit", None), patch( - "src.agents.quality_agent.mi_visit", None - ): - - findings_cc = agent.calculate_complexity(MagicMock()) - assert len(findings_cc) == 0 - - score = agent.calculate_maintainability_index("code") - assert score == 100.0 - - def test_exception_handling_in_analyze(self, agent): - """Test global exception handling in analyze method.""" - context = AnalysisContext(code_content="code", filename="test.py") - with patch("ast.parse", side_effect=Exception("Unexpected error")): - findings = agent.analyze(context) - assert len(findings) == 0 - - def test_exception_in_complexity_calculation(self, agent): - """Test exception handling inside calculate_complexity.""" - with patch("src.agents.quality_agent.radon_visit") as mock_radon: - mock_radon.side_effect = Exception("Radon error") - findings = agent.calculate_complexity(MagicMock()) - assert len(findings) == 0 - - def test_mi_visit_exception(self, agent): - """Test exception inside calculate_maintainability_index.""" - with patch("src.agents.quality_agent.mi_visit") as mock_mi: - mock_mi.side_effect = Exception("MI Error") - score = agent.calculate_maintainability_index("code") - assert score == 100.0 - - def test_duplication_with_comments(self, agent): - """Test that comments are ignored in duplication check.""" - # Block of comments - comments = "# comment\n" * 5 - findings = agent.detect_code_duplication(comments) - assert len(findings) == 0 +from unittest.mock import MagicMock, patch + +import pytest + +from src.agents.quality_agent import QualityAgent +from src.schemas.analysis import AnalysisContext +from src.schemas.finding import Finding, Severity + + +class TestQualityAgent: + """Test suite for QualityAgent.""" + + @pytest.fixture + def mock_event_bus(self): + """Mock the EventBus.""" + return MagicMock() + + @pytest.fixture + def agent(self, mock_event_bus): + """Create QualityAgent instance.""" + return QualityAgent(event_bus=mock_event_bus) + + def test_analyze_quality_metrics(self, agent, mock_event_bus): + """Test detection of all quality metrics (Happy Path).""" + code = """ +def complex_function(x): + pass +""" + context = AnalysisContext(code_content=code, filename="quality_test.py") + + # Mock Radon Complexity + with patch("src.agents.quality_agent.radon_visit") as mock_radon_visit, patch( + "src.agents.quality_agent.mi_visit" + ) as mock_mi_visit: + + mock_func = MagicMock() + mock_func.name = "complex_function" + mock_func.complexity = 15 + mock_func.lineno = 2 + mock_radon_visit.return_value = [mock_func] + + # Mock Radon Maintainability + mock_mi_visit.return_value = 40.0 + + findings = agent.analyze(context) + + assert mock_event_bus.publish.called + issue_types = [f.issue_type for f in findings] + assert "quality/cyclomatic-complexity" in issue_types + assert "quality/maintainability-index" in issue_types + + def test_measure_function_length(self, agent): + """Test specifically function length detection.""" + # Create a valid python code with a long function + code = "def long_func():\n" + "\n".join([f" x = {i}" for i in range(105)]) + import ast + + tree = ast.parse(code) + findings = agent.measure_function_length(tree) + assert len(findings) == 1 + assert findings[0].issue_type == "quality/function-length" + assert "105" in findings[0].message or "106" in findings[0].message + + def test_calculate_maintainability_index(self, agent): + """Test MI calculation.""" + with patch("src.agents.quality_agent.mi_visit") as mock_mi: + mock_mi.return_value = 30.0 + score = agent.calculate_maintainability_index("some code") + assert score == 30.0 + + def test_syntax_error_handling(self, agent): + """Test handling of syntax errors in AST parsing.""" + context = AnalysisContext(code_content="def broken_code(", filename="error.py") + findings = agent.analyze(context) + assert len(findings) == 0 + + def test_complexity_thresholds(self, agent): + """Test different complexity thresholds (High/Critical).""" + import ast + + tree = ast.parse("def foo(): pass") + + with patch("src.agents.quality_agent.radon_visit") as mock_radon_visit: + # Case 1: Critical (> 50) + mock_crit = MagicMock() + mock_crit.name = "crit_func" + mock_crit.complexity = 51 + mock_crit.lineno = 1 + + # Case 2: High (> 20) + mock_high = MagicMock() + mock_high.name = "high_func" + mock_high.complexity = 21 + mock_high.lineno = 5 + + mock_radon_visit.return_value = [mock_crit, mock_high] + + findings = agent.calculate_complexity(tree) + + assert len(findings) == 2 + severities = [f.severity for f in findings] + assert Severity.CRITICAL in severities + assert Severity.HIGH in severities + + def test_maintainability_critical(self, agent): + """Test critical maintainability index.""" + # Use valid code so AST parsing succeeds + context = AnalysisContext(code_content="def foo(): pass", filename="test.py") + + with patch("src.agents.quality_agent.mi_visit") as mock_mi, patch( + "src.agents.quality_agent.radon_visit", return_value=[] + ): + + # Mock MI < 20 + mock_mi.return_value = 10.0 + + findings = agent.analyze(context) + mi_finding = next( + (f for f in findings if f.issue_type == "quality/maintainability-index"), None + ) + assert mi_finding is not None + assert mi_finding.severity == Severity.CRITICAL + + def test_short_file_duplication(self, agent): + """Test that short files skip duplication check.""" + code = "print('hello')\n" * 2 + findings = agent.detect_code_duplication(code) + assert len(findings) == 0 + + def test_code_duplication_detected(self, agent): + """Test detection of duplicated code blocks.""" + # Create code with duplication + # Block size is 4 lines. We need a block of 4 lines repeated. + block = "x = 1\ny = 2\nz = 3\nw = 4\n" + code = block + "a = 0\n" + block + + findings = agent.detect_code_duplication(code) + assert len(findings) > 0 + assert findings[0].issue_type == "quality/duplication" + assert "Bloque de código duplicado" in findings[0].message + + def test_radon_not_installed(self, agent): + """Test behavior when radon is not installed.""" + with patch("src.agents.quality_agent.radon_visit", None), patch( + "src.agents.quality_agent.mi_visit", None + ): + + findings_cc = agent.calculate_complexity(MagicMock()) + assert len(findings_cc) == 0 + + score = agent.calculate_maintainability_index("code") + assert score == 100.0 + + def test_exception_handling_in_analyze(self, agent): + """Test global exception handling in analyze method.""" + context = AnalysisContext(code_content="code", filename="test.py") + with patch("ast.parse", side_effect=Exception("Unexpected error")): + findings = agent.analyze(context) + assert len(findings) == 0 + + def test_exception_in_complexity_calculation(self, agent): + """Test exception handling inside calculate_complexity.""" + with patch("src.agents.quality_agent.radon_visit") as mock_radon: + mock_radon.side_effect = Exception("Radon error") + findings = agent.calculate_complexity(MagicMock()) + assert len(findings) == 0 + + def test_mi_visit_exception(self, agent): + """Test exception inside calculate_maintainability_index.""" + with patch("src.agents.quality_agent.mi_visit") as mock_mi: + mock_mi.side_effect = Exception("MI Error") + score = agent.calculate_maintainability_index("code") + assert score == 100.0 + + def test_duplication_with_comments(self, agent): + """Test that comments are ignored in duplication check.""" + # Block of comments + comments = "# comment\n" * 5 + findings = agent.detect_code_duplication(comments) + assert len(findings) == 0 diff --git a/backend/tests/unit/agents/test_security_agent.py b/backend/tests/unit/agents/test_security_agent.py index 7324376..779a8c4 100644 --- a/backend/tests/unit/agents/test_security_agent.py +++ b/backend/tests/unit/agents/test_security_agent.py @@ -1,412 +1,412 @@ -""" -Unit tests for SecurityAgent. - -Tests cover all 4 detection modules: -1. Dangerous functions detection -2. SQL injection detection -3. Hardcoded credentials detection -4. Weak cryptography detection -""" - -import pytest - -from src.agents.security_agent import SecurityAgent -from src.schemas.analysis import AnalysisContext -from src.schemas.finding import Severity - - -class TestSecurityAgentInitialization: - """Test SecurityAgent initialization.""" - - def test_agent_initialization(self): - """Test SecurityAgent is created with correct attributes.""" - agent = SecurityAgent() - - assert agent.name == "SecurityAgent" - assert agent.version == "1.0.0" - assert agent.category == "security" - assert agent.is_enabled() is True - - def test_agent_info(self): - """Test get_info returns correct metadata.""" - agent = SecurityAgent() - info = agent.get_info() - - assert info["name"] == "SecurityAgent" - assert info["category"] == "security" - - -class TestDangerousFunctionsDetection: - """Test detection of dangerous functions.""" - - @pytest.fixture - def agent(self): - """Create SecurityAgent instance.""" - return SecurityAgent() - - def test_detect_eval_function(self, agent): - """Test detection of eval() function.""" - code = """ -result = eval(user_input) -print(result) -""" - context = AnalysisContext(code_content=code, filename="test.py") - findings = agent.analyze(context) - - assert len(findings) >= 1 - eval_finding = next(f for f in findings if "eval" in f.message.lower()) - assert eval_finding.severity == Severity.CRITICAL - assert eval_finding.issue_type == "dangerous_function" - assert eval_finding.line_number == 2 - assert "ast.literal_eval" in eval_finding.suggestion - assert eval_finding.rule_id == "SEC001_EVAL" - - def test_detect_exec_function(self, agent): - """Test detection of exec() function.""" - code = "exec(malicious_code)" - context = AnalysisContext(code_content=code, filename="test.py") - findings = agent.analyze(context) - - assert len(findings) >= 1 - exec_finding = next(f for f in findings if "exec" in f.message.lower()) - assert exec_finding.severity == Severity.CRITICAL - assert exec_finding.issue_type == "dangerous_function" - assert "validate input" in exec_finding.suggestion.lower() - - def test_detect_compile_function(self, agent): - """Test detection of compile() function.""" - code = "compiled = compile(source, 'file', 'exec')" - context = AnalysisContext(code_content=code, filename="test.py") - findings = agent.analyze(context) - - assert len(findings) >= 1 - compile_finding = next(f for f in findings if "compile" in f.message.lower()) - assert compile_finding.severity == Severity.CRITICAL - - def test_detect_pickle_loads(self, agent): - """Test detection of pickle.loads().""" - code = """ -import pickle -data = pickle.loads(untrusted_data) -""" - context = AnalysisContext(code_content=code, filename="test.py") - findings = agent.analyze(context) - - assert len(findings) >= 1 - pickle_finding = next( - f - for f in findings - if "pickle" in f.message.lower() or "deserialization" in f.issue_type - ) - assert pickle_finding.severity == Severity.HIGH - assert "json.loads" in pickle_finding.suggestion - - def test_no_false_positives_for_safe_functions(self, agent): - """Test that safe functions don't trigger findings.""" - code = """ -def evaluate_math(a, b): - return a + b - -result = evaluate_math(5, 3) -""" - context = AnalysisContext(code_content=code, filename="test.py") - findings = agent.analyze(context) - - # Should have 0 findings for this safe code - assert len(findings) == 0 - - -class TestSQLInjectionDetection: - """Test detection of SQL injection vulnerabilities.""" - - @pytest.fixture - def agent(self): - """Create SecurityAgent instance.""" - return SecurityAgent() - - def test_detect_string_concatenation_sql(self, agent): - """Test detection of SQL injection via string concatenation.""" - code = 'cursor.execute("SELECT * FROM users WHERE id=" + user_id)' - context = AnalysisContext(code_content=code, filename="test.py") - findings = agent.analyze(context) - - assert len(findings) >= 1 - sql_finding = next(f for f in findings if f.issue_type == "sql_injection") - assert sql_finding.severity == Severity.HIGH - assert "parameterized" in sql_finding.suggestion.lower() - assert sql_finding.rule_id == "SEC002_SQL_INJECTION" - - def test_detect_fstring_sql_injection(self, agent): - """Test detection of SQL injection via f-strings.""" - code = "query = f\"DELETE FROM logs WHERE date < '{cutoff}'\"\ncursor.execute(query)" - context = AnalysisContext(code_content=code, filename="test.py") - findings = agent.analyze(context) - - assert len(findings) >= 1 - sql_finding = next(f for f in findings if f.issue_type == "sql_injection") - assert sql_finding.severity == Severity.HIGH - - def test_detect_percent_formatting_sql(self, agent): - """Test detection of SQL injection via %s formatting.""" - code = "cursor.execute('SELECT * FROM users WHERE name=%s' % username)" - context = AnalysisContext(code_content=code, filename="test.py") - findings = agent.analyze(context) - - assert len(findings) >= 1 - sql_finding = next(f for f in findings if f.issue_type == "sql_injection") - assert sql_finding.severity == Severity.HIGH - - def test_no_false_positives_for_safe_queries(self, agent): - """Test that parameterized queries don't trigger findings.""" - code = """ -cursor.execute('SELECT * FROM users WHERE id=?', (user_id,)) -cursor.execute('INSERT INTO logs VALUES (?, ?)', (timestamp, message)) -""" - context = AnalysisContext(code_content=code, filename="test.py") - findings = agent.analyze(context) - - # Should have 0 SQL injection findings - sql_findings = [f for f in findings if f.issue_type == "sql_injection"] - assert len(sql_findings) == 0 - - -class TestHardcodedCredentialsDetection: - """Test detection of hardcoded credentials.""" - - @pytest.fixture - def agent(self): - """Create SecurityAgent instance.""" - return SecurityAgent() - - def test_detect_hardcoded_password(self, agent): - """Test detection of hardcoded password.""" - code = 'password = "MySecretPass123"' - context = AnalysisContext(code_content=code, filename="config.py") - findings = agent.analyze(context) - - assert len(findings) >= 1 - pwd_finding = next( - f - for f in findings - if f.issue_type == "hardcoded_credentials" and "password" in f.message.lower() - ) - assert pwd_finding.severity == Severity.CRITICAL - assert "environment variable" in pwd_finding.suggestion.lower() - assert "SEC003_PASSWORD" in pwd_finding.rule_id - - def test_detect_hardcoded_api_key(self, agent): - """Test detection of hardcoded API key.""" - code = 'api_key = "sk_live_abc123xyz789"' - context = AnalysisContext(code_content=code, filename="config.py") - findings = agent.analyze(context) - - assert len(findings) >= 1 - api_finding = next( - f - for f in findings - if f.issue_type == "hardcoded_credentials" and "api" in f.message.lower() - ) - assert api_finding.severity == Severity.CRITICAL - - def test_detect_hardcoded_token(self, agent): - """Test detection of hardcoded token.""" - code = 'auth_token = "ghp_abc123xyz789012345"' - context = AnalysisContext(code_content=code, filename="auth.py") - findings = agent.analyze(context) - - assert len(findings) >= 1 - token_finding = next( - f - for f in findings - if f.issue_type == "hardcoded_credentials" and "token" in f.message.lower() - ) - assert token_finding.severity == Severity.HIGH - - def test_ignore_placeholders(self, agent): - """Test that placeholders are not flagged as credentials.""" - code = """ -password = "YOUR_PASSWORD_HERE" -api_key = "REPLACE_WITH_YOUR_API_KEY" -token = "TODO: Add token" -secret = "example_secret" -""" - context = AnalysisContext(code_content=code, filename="config.py") - findings = agent.analyze(context) - - # Should have 0 findings for placeholders - cred_findings = [f for f in findings if f.issue_type == "hardcoded_credentials"] - assert len(cred_findings) == 0 - - def test_ignore_short_values(self, agent): - """Test that very short values are not flagged.""" - code = 'password = "abc"' - context = AnalysisContext(code_content=code, filename="test.py") - findings = agent.analyze(context) - - # Should not flag very short passwords - cred_findings = [f for f in findings if f.issue_type == "hardcoded_credentials"] - assert len(cred_findings) == 0 - - -class TestWeakCryptographyDetection: - """Test detection of weak cryptographic algorithms.""" - - @pytest.fixture - def agent(self): - """Create SecurityAgent instance.""" - return SecurityAgent() - - def test_detect_md5_usage(self, agent): - """Test detection of MD5 hash algorithm.""" - code = """ -import hashlib -hash_value = hashlib.md5(data).hexdigest() -""" - context = AnalysisContext(code_content=code, filename="crypto.py") - findings = agent.analyze(context) - - assert len(findings) >= 1 - md5_finding = next( - f - for f in findings - if f.issue_type == "weak_cryptography" and "md5" in f.message.lower() - ) - assert md5_finding.severity == Severity.MEDIUM - assert "SHA-256" in md5_finding.suggestion - assert md5_finding.rule_id == "SEC004_MD5" - - def test_detect_sha1_usage(self, agent): - """Test detection of SHA1 hash algorithm.""" - code = """ - import hashlib - digest = hashlib.sha1(message.encode()).digest() - """ - context = AnalysisContext(code_content=code, filename="hasher.py") - findings = agent.analyze(context) - - assert len(findings) >= 1 - sha1_finding = next( - f - for f in findings - if f.issue_type == "weak_cryptography" and "sha1" in f.message.lower() - ) - assert sha1_finding.severity == Severity.MEDIUM - - def test_safe_sha256_no_findings(self, agent): - """Test that SHA-256 doesn't trigger findings.""" - code = """ - import hashlib - secure_hash = hashlib.sha256(data).hexdigest() - """ - context = AnalysisContext(code_content=code, filename="secure.py") - findings = agent.analyze(context) - - # Should have 0 weak crypto findings for SHA-256 - crypto_findings = [f for f in findings if f.issue_type == "weak_cryptography"] - assert len(crypto_findings) == 0 - - -class TestComplexScenarios: - """Test complex scenarios with multiple vulnerabilities.""" - - @pytest.fixture - def agent(self): - """Create SecurityAgent instance.""" - return SecurityAgent() - - def test_multiple_vulnerabilities_in_one_file(self, agent): - """Test detection of multiple vulnerability types.""" - code = """ - import hashlib - import pickle - - # Hardcoded credential - password = "MySecretPassword123" - api_key = "sk_live_abc123xyz" - - # Dangerous function - def execute_command(user_input): - result = eval(user_input) - return result - - # SQL injection - def query_user(user_id): - query = f"SELECT * FROM users WHERE id = {user_id}" - cursor.execute(query) - return cursor.fetchone() - - # Weak crypto - def hash_password(pwd): - return hashlib.md5(pwd.encode()).hexdigest() - - # Unsafe deserialization - def load_data(data): - return pickle.loads(data) - """ - context = AnalysisContext(code_content=code, filename="vulnerable.py") - findings = agent.analyze(context) - - # Should detect at least 6 vulnerabilities - assert len(findings) >= 6 - - # Verify each type is detected - issue_types = {f.issue_type for f in findings} - assert "hardcoded_credentials" in issue_types - assert "dangerous_function" in issue_types - assert "sql_injection" in issue_types - assert "weak_cryptography" in issue_types - - # Verify CRITICAL findings are first (sorted by severity) - critical_findings = [f for f in findings if f.is_critical] - assert len(critical_findings) >= 2 - # First findings should be CRITICAL - assert findings[0].severity == Severity.CRITICAL - - def test_syntax_error_handling(self, agent): - """Test that syntax errors are handled gracefully.""" - code = """ - def incomplete_function( - # Missing closing parenthesis and body - """ - context = AnalysisContext(code_content=code, filename="broken.py") - - # Should not raise exception, but log error - findings = agent.analyze(context) - - # May have some findings from regex-based modules - # Should not crash - assert isinstance(findings, list) - - def test_empty_code(self, agent): - """Test analysis of minimal valid code.""" - code = "# Just a comment\npass" - context = AnalysisContext(code_content=code, filename="minimal.py") - findings = agent.analyze(context) - - assert len(findings) == 0 - - def test_findings_sorted_by_severity(self, agent): - """Test that findings are sorted by severity.""" - code = """ - # MEDIUM severity issue - import hashlib - hash1 = hashlib.md5(data).hexdigest() - - # CRITICAL severity issue - password = "SuperSecret123" - - # HIGH severity issue - query = f"DELETE FROM users WHERE id={user_id}" - cursor.execute(query) - - # CRITICAL severity issue - result = eval(user_input) - """ - context = AnalysisContext(code_content=code, filename="mixed.py") - findings = agent.analyze(context) - - assert len(findings) >= 4 - - # First findings should be CRITICAL - for i in range(min(2, len(findings))): - assert findings[i].severity in [Severity.CRITICAL, Severity.HIGH] +""" +Unit tests for SecurityAgent. + +Tests cover all 4 detection modules: +1. Dangerous functions detection +2. SQL injection detection +3. Hardcoded credentials detection +4. Weak cryptography detection +""" + +import pytest + +from src.agents.security_agent import SecurityAgent +from src.schemas.analysis import AnalysisContext +from src.schemas.finding import Severity + + +class TestSecurityAgentInitialization: + """Test SecurityAgent initialization.""" + + def test_agent_initialization(self): + """Test SecurityAgent is created with correct attributes.""" + agent = SecurityAgent() + + assert agent.name == "SecurityAgent" + assert agent.version == "1.0.0" + assert agent.category == "security" + assert agent.is_enabled() is True + + def test_agent_info(self): + """Test get_info returns correct metadata.""" + agent = SecurityAgent() + info = agent.get_info() + + assert info["name"] == "SecurityAgent" + assert info["category"] == "security" + + +class TestDangerousFunctionsDetection: + """Test detection of dangerous functions.""" + + @pytest.fixture + def agent(self): + """Create SecurityAgent instance.""" + return SecurityAgent() + + def test_detect_eval_function(self, agent): + """Test detection of eval() function.""" + code = """ +result = eval(user_input) +print(result) +""" + context = AnalysisContext(code_content=code, filename="test.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + eval_finding = next(f for f in findings if "eval" in f.message.lower()) + assert eval_finding.severity == Severity.CRITICAL + assert eval_finding.issue_type == "dangerous_function" + assert eval_finding.line_number == 2 + assert "ast.literal_eval" in eval_finding.suggestion + assert eval_finding.rule_id == "SEC001_EVAL" + + def test_detect_exec_function(self, agent): + """Test detection of exec() function.""" + code = "exec(malicious_code)" + context = AnalysisContext(code_content=code, filename="test.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + exec_finding = next(f for f in findings if "exec" in f.message.lower()) + assert exec_finding.severity == Severity.CRITICAL + assert exec_finding.issue_type == "dangerous_function" + assert "validate input" in exec_finding.suggestion.lower() + + def test_detect_compile_function(self, agent): + """Test detection of compile() function.""" + code = "compiled = compile(source, 'file', 'exec')" + context = AnalysisContext(code_content=code, filename="test.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + compile_finding = next(f for f in findings if "compile" in f.message.lower()) + assert compile_finding.severity == Severity.CRITICAL + + def test_detect_pickle_loads(self, agent): + """Test detection of pickle.loads().""" + code = """ +import pickle +data = pickle.loads(untrusted_data) +""" + context = AnalysisContext(code_content=code, filename="test.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + pickle_finding = next( + f + for f in findings + if "pickle" in f.message.lower() or "deserialization" in f.issue_type + ) + assert pickle_finding.severity == Severity.HIGH + assert "json.loads" in pickle_finding.suggestion + + def test_no_false_positives_for_safe_functions(self, agent): + """Test that safe functions don't trigger findings.""" + code = """ +def evaluate_math(a, b): + return a + b + +result = evaluate_math(5, 3) +""" + context = AnalysisContext(code_content=code, filename="test.py") + findings = agent.analyze(context) + + # Should have 0 findings for this safe code + assert len(findings) == 0 + + +class TestSQLInjectionDetection: + """Test detection of SQL injection vulnerabilities.""" + + @pytest.fixture + def agent(self): + """Create SecurityAgent instance.""" + return SecurityAgent() + + def test_detect_string_concatenation_sql(self, agent): + """Test detection of SQL injection via string concatenation.""" + code = 'cursor.execute("SELECT * FROM users WHERE id=" + user_id)' + context = AnalysisContext(code_content=code, filename="test.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + sql_finding = next(f for f in findings if f.issue_type == "sql_injection") + assert sql_finding.severity == Severity.HIGH + assert "parameterized" in sql_finding.suggestion.lower() + assert sql_finding.rule_id == "SEC002_SQL_INJECTION" + + def test_detect_fstring_sql_injection(self, agent): + """Test detection of SQL injection via f-strings.""" + code = "query = f\"DELETE FROM logs WHERE date < '{cutoff}'\"\ncursor.execute(query)" + context = AnalysisContext(code_content=code, filename="test.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + sql_finding = next(f for f in findings if f.issue_type == "sql_injection") + assert sql_finding.severity == Severity.HIGH + + def test_detect_percent_formatting_sql(self, agent): + """Test detection of SQL injection via %s formatting.""" + code = "cursor.execute('SELECT * FROM users WHERE name=%s' % username)" + context = AnalysisContext(code_content=code, filename="test.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + sql_finding = next(f for f in findings if f.issue_type == "sql_injection") + assert sql_finding.severity == Severity.HIGH + + def test_no_false_positives_for_safe_queries(self, agent): + """Test that parameterized queries don't trigger findings.""" + code = """ +cursor.execute('SELECT * FROM users WHERE id=?', (user_id,)) +cursor.execute('INSERT INTO logs VALUES (?, ?)', (timestamp, message)) +""" + context = AnalysisContext(code_content=code, filename="test.py") + findings = agent.analyze(context) + + # Should have 0 SQL injection findings + sql_findings = [f for f in findings if f.issue_type == "sql_injection"] + assert len(sql_findings) == 0 + + +class TestHardcodedCredentialsDetection: + """Test detection of hardcoded credentials.""" + + @pytest.fixture + def agent(self): + """Create SecurityAgent instance.""" + return SecurityAgent() + + def test_detect_hardcoded_password(self, agent): + """Test detection of hardcoded password.""" + code = 'password = "MySecretPass123"' + context = AnalysisContext(code_content=code, filename="config.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + pwd_finding = next( + f + for f in findings + if f.issue_type == "hardcoded_credentials" and "password" in f.message.lower() + ) + assert pwd_finding.severity == Severity.CRITICAL + assert "environment variable" in pwd_finding.suggestion.lower() + assert "SEC003_PASSWORD" in pwd_finding.rule_id + + def test_detect_hardcoded_api_key(self, agent): + """Test detection of hardcoded API key.""" + code = 'api_key = "sk_live_abc123xyz789"' + context = AnalysisContext(code_content=code, filename="config.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + api_finding = next( + f + for f in findings + if f.issue_type == "hardcoded_credentials" and "api" in f.message.lower() + ) + assert api_finding.severity == Severity.CRITICAL + + def test_detect_hardcoded_token(self, agent): + """Test detection of hardcoded token.""" + code = 'auth_token = "ghp_abc123xyz789012345"' + context = AnalysisContext(code_content=code, filename="auth.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + token_finding = next( + f + for f in findings + if f.issue_type == "hardcoded_credentials" and "token" in f.message.lower() + ) + assert token_finding.severity == Severity.HIGH + + def test_ignore_placeholders(self, agent): + """Test that placeholders are not flagged as credentials.""" + code = """ +password = "YOUR_PASSWORD_HERE" +api_key = "REPLACE_WITH_YOUR_API_KEY" +token = "TODO: Add token" +secret = "example_secret" +""" + context = AnalysisContext(code_content=code, filename="config.py") + findings = agent.analyze(context) + + # Should have 0 findings for placeholders + cred_findings = [f for f in findings if f.issue_type == "hardcoded_credentials"] + assert len(cred_findings) == 0 + + def test_ignore_short_values(self, agent): + """Test that very short values are not flagged.""" + code = 'password = "abc"' + context = AnalysisContext(code_content=code, filename="test.py") + findings = agent.analyze(context) + + # Should not flag very short passwords + cred_findings = [f for f in findings if f.issue_type == "hardcoded_credentials"] + assert len(cred_findings) == 0 + + +class TestWeakCryptographyDetection: + """Test detection of weak cryptographic algorithms.""" + + @pytest.fixture + def agent(self): + """Create SecurityAgent instance.""" + return SecurityAgent() + + def test_detect_md5_usage(self, agent): + """Test detection of MD5 hash algorithm.""" + code = """ +import hashlib +hash_value = hashlib.md5(data).hexdigest() +""" + context = AnalysisContext(code_content=code, filename="crypto.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + md5_finding = next( + f + for f in findings + if f.issue_type == "weak_cryptography" and "md5" in f.message.lower() + ) + assert md5_finding.severity == Severity.MEDIUM + assert "SHA-256" in md5_finding.suggestion + assert md5_finding.rule_id == "SEC004_MD5" + + def test_detect_sha1_usage(self, agent): + """Test detection of SHA1 hash algorithm.""" + code = """ + import hashlib + digest = hashlib.sha1(message.encode()).digest() + """ + context = AnalysisContext(code_content=code, filename="hasher.py") + findings = agent.analyze(context) + + assert len(findings) >= 1 + sha1_finding = next( + f + for f in findings + if f.issue_type == "weak_cryptography" and "sha1" in f.message.lower() + ) + assert sha1_finding.severity == Severity.MEDIUM + + def test_safe_sha256_no_findings(self, agent): + """Test that SHA-256 doesn't trigger findings.""" + code = """ + import hashlib + secure_hash = hashlib.sha256(data).hexdigest() + """ + context = AnalysisContext(code_content=code, filename="secure.py") + findings = agent.analyze(context) + + # Should have 0 weak crypto findings for SHA-256 + crypto_findings = [f for f in findings if f.issue_type == "weak_cryptography"] + assert len(crypto_findings) == 0 + + +class TestComplexScenarios: + """Test complex scenarios with multiple vulnerabilities.""" + + @pytest.fixture + def agent(self): + """Create SecurityAgent instance.""" + return SecurityAgent() + + def test_multiple_vulnerabilities_in_one_file(self, agent): + """Test detection of multiple vulnerability types.""" + code = """ + import hashlib + import pickle + + # Hardcoded credential + password = "MySecretPassword123" + api_key = "sk_live_abc123xyz" + + # Dangerous function + def execute_command(user_input): + result = eval(user_input) + return result + + # SQL injection + def query_user(user_id): + query = f"SELECT * FROM users WHERE id = {user_id}" + cursor.execute(query) + return cursor.fetchone() + + # Weak crypto + def hash_password(pwd): + return hashlib.md5(pwd.encode()).hexdigest() + + # Unsafe deserialization + def load_data(data): + return pickle.loads(data) + """ + context = AnalysisContext(code_content=code, filename="vulnerable.py") + findings = agent.analyze(context) + + # Should detect at least 6 vulnerabilities + assert len(findings) >= 6 + + # Verify each type is detected + issue_types = {f.issue_type for f in findings} + assert "hardcoded_credentials" in issue_types + assert "dangerous_function" in issue_types + assert "sql_injection" in issue_types + assert "weak_cryptography" in issue_types + + # Verify CRITICAL findings are first (sorted by severity) + critical_findings = [f for f in findings if f.is_critical] + assert len(critical_findings) >= 2 + # First findings should be CRITICAL + assert findings[0].severity == Severity.CRITICAL + + def test_syntax_error_handling(self, agent): + """Test that syntax errors are handled gracefully.""" + code = """ + def incomplete_function( + # Missing closing parenthesis and body + """ + context = AnalysisContext(code_content=code, filename="broken.py") + + # Should not raise exception, but log error + findings = agent.analyze(context) + + # May have some findings from regex-based modules + # Should not crash + assert isinstance(findings, list) + + def test_empty_code(self, agent): + """Test analysis of minimal valid code.""" + code = "# Just a comment\npass" + context = AnalysisContext(code_content=code, filename="minimal.py") + findings = agent.analyze(context) + + assert len(findings) == 0 + + def test_findings_sorted_by_severity(self, agent): + """Test that findings are sorted by severity.""" + code = """ + # MEDIUM severity issue + import hashlib + hash1 = hashlib.md5(data).hexdigest() + + # CRITICAL severity issue + password = "SuperSecret123" + + # HIGH severity issue + query = f"DELETE FROM users WHERE id={user_id}" + cursor.execute(query) + + # CRITICAL severity issue + result = eval(user_input) + """ + context = AnalysisContext(code_content=code, filename="mixed.py") + findings = agent.analyze(context) + + assert len(findings) >= 4 + + # First findings should be CRITICAL + for i in range(min(2, len(findings))): + assert findings[i].severity in [Severity.CRITICAL, Severity.HIGH] diff --git a/backend/tests/unit/agents/test_style_agent.py b/backend/tests/unit/agents/test_style_agent.py index afb4d52..e63f29d 100644 --- a/backend/tests/unit/agents/test_style_agent.py +++ b/backend/tests/unit/agents/test_style_agent.py @@ -1,439 +1,439 @@ -"""Tests unitarios para StyleAgent.""" - -from typing import Any, Dict -from unittest.mock import MagicMock, patch - -import pytest - -from src.agents.style_agent import StyleAgent -from src.core.events.event_bus import EventBus -from src.core.events.observers import EventObserver -from src.schemas.analysis import AnalysisContext -from src.schemas.finding import Severity - - -class MockEventObserver(EventObserver): - """Observer de prueba para capturar eventos.""" - - def __init__(self): - self.events_received = [] - - def on_event(self, event_type: str, data: Dict[str, Any]) -> None: - """Captura eventos recibidos.""" - self.events_received.append((event_type, data)) - - -class TestStyleAgentInitialization: - """Tests de inicialización del StyleAgent.""" - - def test_agent_initialization(self): - """StyleAgent se inicializa con configuración correcta.""" - agent = StyleAgent() - - assert agent.name == "StyleAgent" - assert agent.version == "1.0.0" - assert agent.category == "style" - assert agent.enabled is True - - def test_line_length_limit_default(self): - """El límite de longitud de línea es 88 por defecto.""" - agent = StyleAgent() - - assert agent.line_length_limit == 88 - - -class TestLineLengthDetection: - """Tests para detección de líneas largas.""" - - def test_detect_line_too_long(self): - """Detecta líneas que exceden 88 caracteres.""" - # Línea de 100+ caracteres - code = "x = '" + "a" * 100 + "'" - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - line_findings = [f for f in findings if "longitud" in f.message.lower()] - assert len(line_findings) >= 1 - assert line_findings[0].issue_type == "style/pep8" - - def test_no_finding_for_short_lines(self): - """No detecta líneas cortas como problema de longitud.""" - code = "x = 1\ny = 2\n" - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - line_length_findings = [f for f in findings if "longitud" in f.message.lower()] - assert len(line_length_findings) == 0 - - def test_detect_trailing_whitespace(self): - """Detecta espacios en blanco al final de línea.""" - code = "x = 1 \n" # Espacios al final - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - trailing_findings = [f for f in findings if "blanco al final" in f.message.lower()] - assert len(trailing_findings) >= 1 - - -class TestDocstringDetection: - """Tests para detección de docstrings faltantes.""" - - def test_detect_missing_function_docstring(self): - """Detecta función pública sin docstring.""" - code = """ -def my_public_function(): - pass -""" - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - docstring_findings = [f for f in findings if "docstring" in f.message.lower()] - assert len(docstring_findings) >= 1 - assert any(f.issue_type == "style/documentation" for f in docstring_findings) - - def test_detect_missing_class_docstring(self): - """Detecta clase sin docstring.""" - code = """ -class MyClass: - pass -""" - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - docstring_findings = [f for f in findings if "docstring" in f.message.lower()] - assert len(docstring_findings) >= 1 - - def test_no_finding_for_private_function(self): - """No detecta funciones privadas sin docstring.""" - code = """ -def _private_function(): - pass -""" - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - # Funciones privadas no requieren docstring en nuestros checks - private_docstring_findings = [ - f for f in findings if "docstring" in f.message.lower() and "_private" in f.message - ] - assert len(private_docstring_findings) == 0 - - def test_no_finding_for_function_with_docstring(self): - """No detecta función con docstring.""" - code = ''' -def my_function(): - """Esta es la documentación.""" - pass -''' - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - # No debería haber findings de docstring para my_function - my_func_docstring = [ - f for f in findings if "docstring" in f.message.lower() and "my_function" in f.message - ] - assert len(my_func_docstring) == 0 - - -class TestNamingConventions: - """Tests para convenciones de nombres PEP 8.""" - - def test_detect_camelcase_function(self): - """Detecta función con nombre en camelCase.""" - code = """ -def myBadFunction(): - pass -""" - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - naming_findings = [ - f - for f in findings - if "snake_case" in f.message.lower() and "funcion" in f.message.lower() - ] - assert len(naming_findings) >= 1 - assert any(f.issue_type == "style/naming" for f in naming_findings) - - def test_detect_lowercase_class(self): - """Detecta clase con nombre en lowercase.""" - code = """ -class myclass: - pass -""" - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - naming_findings = [ - f for f in findings if "PascalCase" in f.message and "clase" in f.message.lower() - ] - assert len(naming_findings) >= 1 - - def test_no_finding_for_correct_function_name(self): - """No detecta función con nombre correcto.""" - code = """ -def my_correct_function(): - pass -""" - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - naming_findings = [ - f - for f in findings - if "my_correct_function" in f.message and "snake_case" in f.message.lower() - ] - assert len(naming_findings) == 0 - - def test_no_finding_for_correct_class_name(self): - """No detecta clase con nombre correcto.""" - code = """ -class MyCorrectClass: - pass -""" - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - naming_findings = [ - f for f in findings if "MyCorrectClass" in f.message and "PascalCase" in f.message - ] - assert len(naming_findings) == 0 - - -class TestImportAnalysis: - """Tests para análisis de imports.""" - - def test_detect_unused_import(self): - """Detecta import no utilizado.""" - code = """ -import os -import sys - -print("hello") -""" - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - import_findings = [ - f for f in findings if f.issue_type == "style/imports" or "import" in f.message.lower() - ] - # os y sys no se usan, deberían detectarse - assert len(import_findings) >= 1 - - def test_detect_duplicate_import(self): - """Detecta import duplicado.""" - code = """ -import os -import sys -import os -""" - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - # El mensaje usa "múltiples veces" no "duplicado" - duplicate_findings = [ - f - for f in findings - if "multiples" in f.message.lower() or "reimport" in f.message.lower() - ] - assert len(duplicate_findings) >= 1 - - -class TestFindingsOrdering: - """Tests para ordenamiento de findings.""" - - def test_findings_ordered_by_line_number(self): - """Los findings se ordenan por número de línea ascendente.""" - code = """ -def badFunc(): - pass - -class badclass: - pass - -def anotherBadFunc(): - pass -""" - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - if len(findings) > 1: - line_numbers = [f.line_number for f in findings] - assert line_numbers == sorted(line_numbers), "Findings deben estar ordenados por línea" - - -class TestEventEmission: - """Tests para emisión de eventos.""" - - def test_emits_agent_started_event(self): - """El agente emite evento AGENT_STARTED al iniciar.""" - event_bus = EventBus() - event_bus.clear() # Limpiar observers previos - observer = MockEventObserver() - event_bus.subscribe(observer) - - agent = StyleAgent() - agent.event_bus = event_bus - - code = "x = 1" - context = AnalysisContext(code_content=code, filename="test.py") - agent.analyze(context) - - # Verificar que hubo eventos AGENT_STARTED - started_events = [ - e - for e in observer.events_received - if e[0] == "AGENT_STARTED" or e[1].get("type") == "AGENT_STARTED" - ] - assert len(started_events) >= 1 - - def test_emits_agent_completed_event(self): - """El agente emite evento AGENT_COMPLETED al finalizar.""" - event_bus = EventBus() - event_bus.clear() # Limpiar observers previos - observer = MockEventObserver() - event_bus.subscribe(observer) - - agent = StyleAgent() - agent.event_bus = event_bus - - code = "x = 1" - context = AnalysisContext(code_content=code, filename="test.py") - agent.analyze(context) - - # Verificar que hubo eventos AGENT_COMPLETED - completed_events = [ - e - for e in observer.events_received - if e[0] == "AGENT_COMPLETED" or e[1].get("type") == "AGENT_COMPLETED" - ] - assert len(completed_events) >= 1 - - -class TestErrorHandling: - """Tests para manejo de errores.""" - - def test_syntax_error_does_not_crash(self): - """Error de sintaxis no crashea el agente.""" - code = "def broken(" # Sintaxis inválida - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - # No debe lanzar excepción - findings = agent.analyze(context) - - # Puede retornar lista vacía o parcial - assert isinstance(findings, list) - - def test_empty_code_returns_empty_or_minimal_findings(self): - """Código mínimo retorna lista con findings mínimos.""" - code = "# Empty file" # Código mínimo válido - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - # Lista vacía o con findings mínimos (como missing module docstring) - assert isinstance(findings, list) - # Puede tener algunos findings menores - assert len(findings) <= 5 - - -class TestIssueTypeCategories: - """Tests para verificar categorías correctas de issue_type.""" - - def test_line_style_uses_pep8_category(self): - """Problemas de línea usan categoría style/pep8.""" - code = "x = '" + "a" * 100 + "'" - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - line_findings = [f for f in findings if "longitud" in f.message.lower()] - for f in line_findings: - assert f.issue_type == "style/pep8" - - def test_docstring_uses_documentation_category(self): - """Problemas de docstring usan categoría style/documentation.""" - code = """ -def my_function(): - pass -""" - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - # Buscar findings con "docstring" en el mensaje que usen nuestras reglas internas - doc_findings = [ - f - for f in findings - if "docstring" in f.message.lower() and f.rule_id.startswith("STYLE") - ] - for f in doc_findings: - assert f.issue_type == "style/documentation" - - def test_naming_uses_naming_category(self): - """Problemas de nombres usan categoría style/naming.""" - code = """ -def badFunction(): - pass -""" - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - # Buscar findings con snake_case que sean de nuestras reglas internas - naming_findings = [ - f - for f in findings - if "snake_case" in f.message.lower() and f.rule_id.startswith("STYLE") - ] - for f in naming_findings: - assert f.issue_type == "style/naming" - - def test_imports_uses_imports_category(self): - """Problemas de imports usan categoría style/imports.""" - code = """ -import os -import os -""" - context = AnalysisContext(code_content=code, filename="test.py") - - agent = StyleAgent() - findings = agent.analyze(context) - - # Buscar findings de import que sean de nuestras reglas internas - import_findings = [f for f in findings if f.rule_id.startswith("STYLE02")] - for f in import_findings: - assert f.issue_type == "style/imports" +"""Tests unitarios para StyleAgent.""" + +from typing import Any, Dict +from unittest.mock import MagicMock, patch + +import pytest + +from src.agents.style_agent import StyleAgent +from src.core.events.event_bus import EventBus +from src.core.events.observers import EventObserver +from src.schemas.analysis import AnalysisContext +from src.schemas.finding import Severity + + +class MockEventObserver(EventObserver): + """Observer de prueba para capturar eventos.""" + + def __init__(self): + self.events_received = [] + + def on_event(self, event_type: str, data: Dict[str, Any]) -> None: + """Captura eventos recibidos.""" + self.events_received.append((event_type, data)) + + +class TestStyleAgentInitialization: + """Tests de inicialización del StyleAgent.""" + + def test_agent_initialization(self): + """StyleAgent se inicializa con configuración correcta.""" + agent = StyleAgent() + + assert agent.name == "StyleAgent" + assert agent.version == "1.0.0" + assert agent.category == "style" + assert agent.enabled is True + + def test_line_length_limit_default(self): + """El límite de longitud de línea es 88 por defecto.""" + agent = StyleAgent() + + assert agent.line_length_limit == 88 + + +class TestLineLengthDetection: + """Tests para detección de líneas largas.""" + + def test_detect_line_too_long(self): + """Detecta líneas que exceden 88 caracteres.""" + # Línea de 100+ caracteres + code = "x = '" + "a" * 100 + "'" + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + line_findings = [f for f in findings if "longitud" in f.message.lower()] + assert len(line_findings) >= 1 + assert line_findings[0].issue_type == "style/pep8" + + def test_no_finding_for_short_lines(self): + """No detecta líneas cortas como problema de longitud.""" + code = "x = 1\ny = 2\n" + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + line_length_findings = [f for f in findings if "longitud" in f.message.lower()] + assert len(line_length_findings) == 0 + + def test_detect_trailing_whitespace(self): + """Detecta espacios en blanco al final de línea.""" + code = "x = 1 \n" # Espacios al final + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + trailing_findings = [f for f in findings if "blanco al final" in f.message.lower()] + assert len(trailing_findings) >= 1 + + +class TestDocstringDetection: + """Tests para detección de docstrings faltantes.""" + + def test_detect_missing_function_docstring(self): + """Detecta función pública sin docstring.""" + code = """ +def my_public_function(): + pass +""" + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + docstring_findings = [f for f in findings if "docstring" in f.message.lower()] + assert len(docstring_findings) >= 1 + assert any(f.issue_type == "style/documentation" for f in docstring_findings) + + def test_detect_missing_class_docstring(self): + """Detecta clase sin docstring.""" + code = """ +class MyClass: + pass +""" + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + docstring_findings = [f for f in findings if "docstring" in f.message.lower()] + assert len(docstring_findings) >= 1 + + def test_no_finding_for_private_function(self): + """No detecta funciones privadas sin docstring.""" + code = """ +def _private_function(): + pass +""" + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + # Funciones privadas no requieren docstring en nuestros checks + private_docstring_findings = [ + f for f in findings if "docstring" in f.message.lower() and "_private" in f.message + ] + assert len(private_docstring_findings) == 0 + + def test_no_finding_for_function_with_docstring(self): + """No detecta función con docstring.""" + code = ''' +def my_function(): + """Esta es la documentación.""" + pass +''' + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + # No debería haber findings de docstring para my_function + my_func_docstring = [ + f for f in findings if "docstring" in f.message.lower() and "my_function" in f.message + ] + assert len(my_func_docstring) == 0 + + +class TestNamingConventions: + """Tests para convenciones de nombres PEP 8.""" + + def test_detect_camelcase_function(self): + """Detecta función con nombre en camelCase.""" + code = """ +def myBadFunction(): + pass +""" + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + naming_findings = [ + f + for f in findings + if "snake_case" in f.message.lower() and "funcion" in f.message.lower() + ] + assert len(naming_findings) >= 1 + assert any(f.issue_type == "style/naming" for f in naming_findings) + + def test_detect_lowercase_class(self): + """Detecta clase con nombre en lowercase.""" + code = """ +class myclass: + pass +""" + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + naming_findings = [ + f for f in findings if "PascalCase" in f.message and "clase" in f.message.lower() + ] + assert len(naming_findings) >= 1 + + def test_no_finding_for_correct_function_name(self): + """No detecta función con nombre correcto.""" + code = """ +def my_correct_function(): + pass +""" + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + naming_findings = [ + f + for f in findings + if "my_correct_function" in f.message and "snake_case" in f.message.lower() + ] + assert len(naming_findings) == 0 + + def test_no_finding_for_correct_class_name(self): + """No detecta clase con nombre correcto.""" + code = """ +class MyCorrectClass: + pass +""" + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + naming_findings = [ + f for f in findings if "MyCorrectClass" in f.message and "PascalCase" in f.message + ] + assert len(naming_findings) == 0 + + +class TestImportAnalysis: + """Tests para análisis de imports.""" + + def test_detect_unused_import(self): + """Detecta import no utilizado.""" + code = """ +import os +import sys + +print("hello") +""" + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + import_findings = [ + f for f in findings if f.issue_type == "style/imports" or "import" in f.message.lower() + ] + # os y sys no se usan, deberían detectarse + assert len(import_findings) >= 1 + + def test_detect_duplicate_import(self): + """Detecta import duplicado.""" + code = """ +import os +import sys +import os +""" + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + # El mensaje usa "múltiples veces" no "duplicado" + duplicate_findings = [ + f + for f in findings + if "multiples" in f.message.lower() or "reimport" in f.message.lower() + ] + assert len(duplicate_findings) >= 1 + + +class TestFindingsOrdering: + """Tests para ordenamiento de findings.""" + + def test_findings_ordered_by_line_number(self): + """Los findings se ordenan por número de línea ascendente.""" + code = """ +def badFunc(): + pass + +class badclass: + pass + +def anotherBadFunc(): + pass +""" + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + if len(findings) > 1: + line_numbers = [f.line_number for f in findings] + assert line_numbers == sorted(line_numbers), "Findings deben estar ordenados por línea" + + +class TestEventEmission: + """Tests para emisión de eventos.""" + + def test_emits_agent_started_event(self): + """El agente emite evento AGENT_STARTED al iniciar.""" + event_bus = EventBus() + event_bus.clear() # Limpiar observers previos + observer = MockEventObserver() + event_bus.subscribe(observer) + + agent = StyleAgent() + agent.event_bus = event_bus + + code = "x = 1" + context = AnalysisContext(code_content=code, filename="test.py") + agent.analyze(context) + + # Verificar que hubo eventos AGENT_STARTED + started_events = [ + e + for e in observer.events_received + if e[0] == "AGENT_STARTED" or e[1].get("type") == "AGENT_STARTED" + ] + assert len(started_events) >= 1 + + def test_emits_agent_completed_event(self): + """El agente emite evento AGENT_COMPLETED al finalizar.""" + event_bus = EventBus() + event_bus.clear() # Limpiar observers previos + observer = MockEventObserver() + event_bus.subscribe(observer) + + agent = StyleAgent() + agent.event_bus = event_bus + + code = "x = 1" + context = AnalysisContext(code_content=code, filename="test.py") + agent.analyze(context) + + # Verificar que hubo eventos AGENT_COMPLETED + completed_events = [ + e + for e in observer.events_received + if e[0] == "AGENT_COMPLETED" or e[1].get("type") == "AGENT_COMPLETED" + ] + assert len(completed_events) >= 1 + + +class TestErrorHandling: + """Tests para manejo de errores.""" + + def test_syntax_error_does_not_crash(self): + """Error de sintaxis no crashea el agente.""" + code = "def broken(" # Sintaxis inválida + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + # No debe lanzar excepción + findings = agent.analyze(context) + + # Puede retornar lista vacía o parcial + assert isinstance(findings, list) + + def test_empty_code_returns_empty_or_minimal_findings(self): + """Código mínimo retorna lista con findings mínimos.""" + code = "# Empty file" # Código mínimo válido + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + # Lista vacía o con findings mínimos (como missing module docstring) + assert isinstance(findings, list) + # Puede tener algunos findings menores + assert len(findings) <= 5 + + +class TestIssueTypeCategories: + """Tests para verificar categorías correctas de issue_type.""" + + def test_line_style_uses_pep8_category(self): + """Problemas de línea usan categoría style/pep8.""" + code = "x = '" + "a" * 100 + "'" + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + line_findings = [f for f in findings if "longitud" in f.message.lower()] + for f in line_findings: + assert f.issue_type == "style/pep8" + + def test_docstring_uses_documentation_category(self): + """Problemas de docstring usan categoría style/documentation.""" + code = """ +def my_function(): + pass +""" + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + # Buscar findings con "docstring" en el mensaje que usen nuestras reglas internas + doc_findings = [ + f + for f in findings + if "docstring" in f.message.lower() and f.rule_id.startswith("STYLE") + ] + for f in doc_findings: + assert f.issue_type == "style/documentation" + + def test_naming_uses_naming_category(self): + """Problemas de nombres usan categoría style/naming.""" + code = """ +def badFunction(): + pass +""" + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + # Buscar findings con snake_case que sean de nuestras reglas internas + naming_findings = [ + f + for f in findings + if "snake_case" in f.message.lower() and f.rule_id.startswith("STYLE") + ] + for f in naming_findings: + assert f.issue_type == "style/naming" + + def test_imports_uses_imports_category(self): + """Problemas de imports usan categoría style/imports.""" + code = """ +import os +import os +""" + context = AnalysisContext(code_content=code, filename="test.py") + + agent = StyleAgent() + findings = agent.analyze(context) + + # Buscar findings de import que sean de nuestras reglas internas + import_findings = [f for f in findings if f.rule_id.startswith("STYLE02")] + for f in import_findings: + assert f.issue_type == "style/imports" diff --git a/backend/tests/unit/application/test_analysis_service.py b/backend/tests/unit/application/test_analysis_service.py index b89fc2f..1bd0820 100644 --- a/backend/tests/unit/application/test_analysis_service.py +++ b/backend/tests/unit/application/test_analysis_service.py @@ -82,6 +82,24 @@ def unsafe(): call_args = mock_repo.create.call_args[0][0] assert call_args.total_findings >= 0 + @pytest.mark.asyncio + async def test_analyze_code_agent_exception_handled(self, service, mock_repo): + """Verifica que excepciones del agente se manejan gracefully.""" + content = b"import os\n\ndef main():\n pass\n\nmain()\n" + mock_file = AsyncMock(spec=UploadFile) + mock_file.filename = "test.py" + mock_file.read.return_value = content + mock_file.seek = AsyncMock() + + with patch.object(service, "_validate_file", return_value=(content.decode(), "test.py")): + with patch( + "src.services.analysis_service.SecurityAgent.analyze", + side_effect=Exception("Agent crashed"), + ): + result = await service.analyze_code(mock_file, "user_789") + + # Debe completar aunque el agente falle + assert result is not None class TestValidateFileEdgeCases: diff --git a/backend/tests/unit/bad_quality_code.py b/backend/tests/unit/bad_quality_code.py index bb36035..af69c87 100644 --- a/backend/tests/unit/bad_quality_code.py +++ b/backend/tests/unit/bad_quality_code.py @@ -1,64 +1,64 @@ -def complex_logic_example(x, y, z): - """ - Esta función está diseñada intencionalmente para tener baja calidad: - 1. Alta complejidad ciclomática (muchos if/else anidados). - 2. Función demasiado larga. - 3. Código duplicado. - """ - result = 0 - - # 1. Alta Complejidad Ciclomática - if x > 0: - if y > 0: - if z > 0: - result = x + y + z - if result > 100: - print("Big number") - else: - print("Small number") - else: - result = x + y - z - else: - if z > 0: - result = x - y + z - else: - result = x - y - z - else: - if y > 0: - result = -x + y - else: - result = -x - y - - # 2. Código Duplicado (Bloque A) - print("Calculando métricas complejas...") - val1 = x * 2 - val2 = y * 3 - total = val1 + val2 - if total > 10: - print("Total es mayor a 10") - else: - print("Total es menor o igual a 10") - - # Relleno para hacer la función larga (> 25 líneas suele ser warning) - a = 1 - b = 2 - c = 3 - d = 4 - e = 5 - f = 6 - g = 7 - h = 8 - i = 9 - j = 10 - - # 2. Código Duplicado (Bloque A - Copia Exacta) - print("Calculando métricas complejas...") - val1 = x * 2 - val2 = y * 3 - total = val1 + val2 - if total > 10: - print("Total es mayor a 10") - else: - print("Total es menor o igual a 10") - - return result +def complex_logic_example(x, y, z): + """ + Esta función está diseñada intencionalmente para tener baja calidad: + 1. Alta complejidad ciclomática (muchos if/else anidados). + 2. Función demasiado larga. + 3. Código duplicado. + """ + result = 0 + + # 1. Alta Complejidad Ciclomática + if x > 0: + if y > 0: + if z > 0: + result = x + y + z + if result > 100: + print("Big number") + else: + print("Small number") + else: + result = x + y - z + else: + if z > 0: + result = x - y + z + else: + result = x - y - z + else: + if y > 0: + result = -x + y + else: + result = -x - y + + # 2. Código Duplicado (Bloque A) + print("Calculando métricas complejas...") + val1 = x * 2 + val2 = y * 3 + total = val1 + val2 + if total > 10: + print("Total es mayor a 10") + else: + print("Total es menor o igual a 10") + + # Relleno para hacer la función larga (> 25 líneas suele ser warning) + a = 1 + b = 2 + c = 3 + d = 4 + e = 5 + f = 6 + g = 7 + h = 8 + i = 9 + j = 10 + + # 2. Código Duplicado (Bloque A - Copia Exacta) + print("Calculando métricas complejas...") + val1 = x * 2 + val2 = y * 3 + total = val1 + val2 + if total > 10: + print("Total es mayor a 10") + else: + print("Total es menor o igual a 10") + + return result diff --git a/backend/tests/unit/combined_test.py b/backend/tests/unit/combined_test.py index caf2a4f..08e8732 100644 --- a/backend/tests/unit/combined_test.py +++ b/backend/tests/unit/combined_test.py @@ -1,44 +1,44 @@ -"""Combined test file with security AND style violations.""" - -import hashlib -import json # STYLE020_UNUSED_IMPORT: never used -import os # STYLE021_DUP_IMPORT: duplicate -import pickle - -PASSWORD = "MySuperSecretPassword123" # SEC003_PASSWORD: hardcoded -api_key = "sk_live_abcdefghij1234567890" # SEC003_API_KEY + STYLE033: camelCase var - - -def badlyNamedFunction(userInput): # STYLE030_FUNC_NAMING: not snake_case, no docstring - result = eval(userInput) # SEC001_EVAL: arbitrary code execution - exec(userInput) # SEC001_EXEC: arbitrary code execution - return result - - -class lowercase_class: # STYLE031_CLASS_NAMING: not PascalCase, no docstring - def CamelCaseMethod(self, data): # STYLE030: method not snake_case, no docstring - return pickle.loads(data) # SEC001_PICKLE: unsafe deserialization - - -def sql_query(user_id): - query = "SELECT * FROM users WHERE id = " + user_id # SEC002_SQL_INJECTION - cursor.execute(f"DELETE FROM logs WHERE user = {user_id}") # SEC002_SQL_INJECTION - return query - - -def weak_hash(data): - md5_hash = hashlib.md5(data.encode()).hexdigest() # SEC004_MD5: weak crypto - sha1_hash = hashlib.sha1(data.encode()).hexdigest() # SEC004_SHA1: weak crypto - return md5_hash, sha1_hash - - -def line_too_long(): - very_long_variable_name_that_exceeds_the_pep8_limit = "This is a string that combined with the variable name makes this line exceed 88 characters" # STYLE001 - return very_long_variable_name_that_exceeds_the_pep8_limit - - -def trailing_ws(): - x = 1 - y = 2 - unusedVar = x + y # STYLE033: camelCase + F841: unused - return x +"""Combined test file with security AND style violations.""" + +import hashlib +import json # STYLE020_UNUSED_IMPORT: never used +import os # STYLE021_DUP_IMPORT: duplicate +import pickle + +PASSWORD = "MySuperSecretPassword123" # SEC003_PASSWORD: hardcoded +api_key = "sk_live_abcdefghij1234567890" # SEC003_API_KEY + STYLE033: camelCase var + + +def badlyNamedFunction(userInput): # STYLE030_FUNC_NAMING: not snake_case, no docstring + result = eval(userInput) # SEC001_EVAL: arbitrary code execution + exec(userInput) # SEC001_EXEC: arbitrary code execution + return result + + +class lowercase_class: # STYLE031_CLASS_NAMING: not PascalCase, no docstring + def CamelCaseMethod(self, data): # STYLE030: method not snake_case, no docstring + return pickle.loads(data) # SEC001_PICKLE: unsafe deserialization + + +def sql_query(user_id): + query = "SELECT * FROM users WHERE id = " + user_id # SEC002_SQL_INJECTION + cursor.execute(f"DELETE FROM logs WHERE user = {user_id}") # SEC002_SQL_INJECTION + return query + + +def weak_hash(data): + md5_hash = hashlib.md5(data.encode()).hexdigest() # SEC004_MD5: weak crypto + sha1_hash = hashlib.sha1(data.encode()).hexdigest() # SEC004_SHA1: weak crypto + return md5_hash, sha1_hash + + +def line_too_long(): + very_long_variable_name_that_exceeds_the_pep8_limit = "This is a string that combined with the variable name makes this line exceed 88 characters" # STYLE001 + return very_long_variable_name_that_exceeds_the_pep8_limit + + +def trailing_ws(): + x = 1 + y = 2 + unusedVar = x + y # STYLE033: camelCase + F841: unused + return x diff --git a/backend/tests/unit/dependencies/test_get_db.py b/backend/tests/unit/dependencies/test_get_db.py index bf6d324..e1ed46e 100644 --- a/backend/tests/unit/dependencies/test_get_db.py +++ b/backend/tests/unit/dependencies/test_get_db.py @@ -1,95 +1,95 @@ -"""Tests para get_db dependency.""" - -from unittest.mock import MagicMock, patch - -import pytest - - -class TestGetDb: - """Tests para get_db dependency.""" - - @patch("src.core.dependencies.get_db.SessionLocal") - def test_get_db_yields_session(self, mock_session_local): - """get_db yields una sesión de base de datos.""" - from src.core.dependencies.get_db import get_db - - mock_session = MagicMock() - mock_session_local.return_value = mock_session - - # Act - generator = get_db() - session = next(generator) - - # Assert - assert session == mock_session - mock_session_local.assert_called_once() - - @patch("src.core.dependencies.get_db.SessionLocal") - def test_get_db_closes_session_after_use(self, mock_session_local): - """get_db cierra la sesión después de usarla.""" - from src.core.dependencies.get_db import get_db - - mock_session = MagicMock() - mock_session_local.return_value = mock_session - - # Act - generator = get_db() - session = next(generator) - - # Simular fin del request - try: - next(generator) - except StopIteration: - pass - - # Assert - mock_session.close.assert_called_once() - - @patch("src.core.dependencies.get_db.SessionLocal") - def test_get_db_closes_session_on_exception(self, mock_session_local): - """get_db cierra la sesión incluso si hay excepción.""" - from src.core.dependencies.get_db import get_db - - mock_session = MagicMock() - mock_session_local.return_value = mock_session - - # Act - generator = get_db() - session = next(generator) - - # Simular excepción y cierre - try: - generator.throw(Exception("Test exception")) - except Exception: - pass - - # Assert - mock_session.close.assert_called_once() - - @patch("src.core.dependencies.get_db.SessionLocal") - def test_get_db_can_be_used_as_context(self, mock_session_local): - """get_db funciona correctamente en contexto de FastAPI Depends.""" - from src.core.dependencies.get_db import get_db - - mock_session = MagicMock() - mock_session_local.return_value = mock_session - - # Simular uso típico con Depends - db_generator = get_db() - - # Obtener sesión - db = next(db_generator) - assert db is mock_session - - # Usar la sesión - db.query.return_value = "result" - result = db.query() - assert result == "result" - - # Cerrar (simular fin de request) - try: - next(db_generator) - except StopIteration: - pass - - mock_session.close.assert_called_once() +"""Tests para get_db dependency.""" + +from unittest.mock import MagicMock, patch + +import pytest + + +class TestGetDb: + """Tests para get_db dependency.""" + + @patch("src.core.dependencies.get_db.SessionLocal") + def test_get_db_yields_session(self, mock_session_local): + """get_db yields una sesión de base de datos.""" + from src.core.dependencies.get_db import get_db + + mock_session = MagicMock() + mock_session_local.return_value = mock_session + + # Act + generator = get_db() + session = next(generator) + + # Assert + assert session == mock_session + mock_session_local.assert_called_once() + + @patch("src.core.dependencies.get_db.SessionLocal") + def test_get_db_closes_session_after_use(self, mock_session_local): + """get_db cierra la sesión después de usarla.""" + from src.core.dependencies.get_db import get_db + + mock_session = MagicMock() + mock_session_local.return_value = mock_session + + # Act + generator = get_db() + session = next(generator) + + # Simular fin del request + try: + next(generator) + except StopIteration: + pass + + # Assert + mock_session.close.assert_called_once() + + @patch("src.core.dependencies.get_db.SessionLocal") + def test_get_db_closes_session_on_exception(self, mock_session_local): + """get_db cierra la sesión incluso si hay excepción.""" + from src.core.dependencies.get_db import get_db + + mock_session = MagicMock() + mock_session_local.return_value = mock_session + + # Act + generator = get_db() + session = next(generator) + + # Simular excepción y cierre + try: + generator.throw(Exception("Test exception")) + except Exception: + pass + + # Assert + mock_session.close.assert_called_once() + + @patch("src.core.dependencies.get_db.SessionLocal") + def test_get_db_can_be_used_as_context(self, mock_session_local): + """get_db funciona correctamente en contexto de FastAPI Depends.""" + from src.core.dependencies.get_db import get_db + + mock_session = MagicMock() + mock_session_local.return_value = mock_session + + # Simular uso típico con Depends + db_generator = get_db() + + # Obtener sesión + db = next(db_generator) + assert db is mock_session + + # Usar la sesión + db.query.return_value = "result" + result = db.query() + assert result == "result" + + # Cerrar (simular fin de request) + try: + next(db_generator) + except StopIteration: + pass + + mock_session.close.assert_called_once() diff --git a/backend/tests/unit/domain/test_event_bus.py b/backend/tests/unit/domain/test_event_bus.py index 21e15fc..67bd477 100644 --- a/backend/tests/unit/domain/test_event_bus.py +++ b/backend/tests/unit/domain/test_event_bus.py @@ -1,95 +1,95 @@ -"""Tests para EventBus.""" - -import pytest - -from src.core.events.event_bus import EventBus -from src.core.events.observers import EventObserver - - -class MockObserver(EventObserver): - """Observer mock para testing.""" - - def __init__(self): - self.received_events = [] - - def on_event(self, event_type: str, data: dict) -> None: - self.received_events.append((event_type, data)) - - -class TestEventBus: - """Tests para EventBus.""" - - @pytest.fixture - def event_bus(self): - """Crea instancia de EventBus y limpia estado.""" - bus = EventBus() - bus.clear() - return bus - - def test_subscribe_and_publish(self, event_bus): - """Verifica que los observers reciben eventos.""" - observer = MockObserver() - - event_bus.subscribe(observer) - event_bus.publish("analysis_started", {"id": "123"}) - - assert len(observer.received_events) == 1 - assert observer.received_events[0][0] == "analysis_started" - assert observer.received_events[0][1]["id"] == "123" - - def test_multiple_subscribers(self, event_bus): - """Verifica que múltiples observers reciben el mismo evento.""" - observer1 = MockObserver() - observer2 = MockObserver() - - event_bus.subscribe(observer1) - event_bus.subscribe(observer2) - event_bus.publish("analysis_started", {"test": True}) - - assert len(observer1.received_events) == 1 - assert len(observer2.received_events) == 1 - - def test_unsubscribe(self, event_bus): - """Verifica que unsubscribe funciona.""" - observer = MockObserver() - - event_bus.subscribe(observer) - event_bus.unsubscribe(observer) - event_bus.publish("analysis_started", {"id": "456"}) - - assert len(observer.received_events) == 0 - - def test_publish_without_subscribers(self, event_bus): - """Publicar sin suscriptores no debe fallar.""" - # No debe lanzar excepción - event_bus.publish("analysis_completed", {"id": "789"}) - - def test_clear_all_subscribers(self, event_bus): - """Verifica que clear elimina todos los suscriptores.""" - observer = MockObserver() - - event_bus.subscribe(observer) - event_bus.clear() - - event_bus.publish("analysis_started", {}) - event_bus.publish("analysis_completed", {}) - - assert len(observer.received_events) == 0 - - def test_handler_exception_does_not_break_others(self, event_bus): - """Un observer que falla no debe afectar a otros.""" - - class FailingObserver(EventObserver): - def on_event(self, event_type: str, data: dict) -> None: - raise ValueError("Observer error") - - failing_observer = FailingObserver() - working_observer = MockObserver() - - event_bus.subscribe(failing_observer) - event_bus.subscribe(working_observer) - - # No debe lanzar excepción - event_bus.publish("analysis_started", {"id": "test"}) - - assert len(working_observer.received_events) == 1 +"""Tests para EventBus.""" + +import pytest + +from src.core.events.event_bus import EventBus +from src.core.events.observers import EventObserver + + +class MockObserver(EventObserver): + """Observer mock para testing.""" + + def __init__(self): + self.received_events = [] + + def on_event(self, event_type: str, data: dict) -> None: + self.received_events.append((event_type, data)) + + +class TestEventBus: + """Tests para EventBus.""" + + @pytest.fixture + def event_bus(self): + """Crea instancia de EventBus y limpia estado.""" + bus = EventBus() + bus.clear() + return bus + + def test_subscribe_and_publish(self, event_bus): + """Verifica que los observers reciben eventos.""" + observer = MockObserver() + + event_bus.subscribe(observer) + event_bus.publish("analysis_started", {"id": "123"}) + + assert len(observer.received_events) == 1 + assert observer.received_events[0][0] == "analysis_started" + assert observer.received_events[0][1]["id"] == "123" + + def test_multiple_subscribers(self, event_bus): + """Verifica que múltiples observers reciben el mismo evento.""" + observer1 = MockObserver() + observer2 = MockObserver() + + event_bus.subscribe(observer1) + event_bus.subscribe(observer2) + event_bus.publish("analysis_started", {"test": True}) + + assert len(observer1.received_events) == 1 + assert len(observer2.received_events) == 1 + + def test_unsubscribe(self, event_bus): + """Verifica que unsubscribe funciona.""" + observer = MockObserver() + + event_bus.subscribe(observer) + event_bus.unsubscribe(observer) + event_bus.publish("analysis_started", {"id": "456"}) + + assert len(observer.received_events) == 0 + + def test_publish_without_subscribers(self, event_bus): + """Publicar sin suscriptores no debe fallar.""" + # No debe lanzar excepción + event_bus.publish("analysis_completed", {"id": "789"}) + + def test_clear_all_subscribers(self, event_bus): + """Verifica que clear elimina todos los suscriptores.""" + observer = MockObserver() + + event_bus.subscribe(observer) + event_bus.clear() + + event_bus.publish("analysis_started", {}) + event_bus.publish("analysis_completed", {}) + + assert len(observer.received_events) == 0 + + def test_handler_exception_does_not_break_others(self, event_bus): + """Un observer que falla no debe afectar a otros.""" + + class FailingObserver(EventObserver): + def on_event(self, event_type: str, data: dict) -> None: + raise ValueError("Observer error") + + failing_observer = FailingObserver() + working_observer = MockObserver() + + event_bus.subscribe(failing_observer) + event_bus.subscribe(working_observer) + + # No debe lanzar excepción + event_bus.publish("analysis_started", {"id": "test"}) + + assert len(working_observer.received_events) == 1 diff --git a/backend/tests/unit/external/test_clerk_client.py b/backend/tests/unit/external/test_clerk_client.py index 5586d8a..efa6c45 100644 --- a/backend/tests/unit/external/test_clerk_client.py +++ b/backend/tests/unit/external/test_clerk_client.py @@ -1,129 +1,129 @@ -"""Tests para ClerkClient.""" - -import time -from unittest.mock import MagicMock, patch - -import pytest -from jose import jwt - -from src.external.clerk_client import ( - ClerkClient, - ClerkTokenExpiredError, - ClerkTokenInvalidError, -) - -# Constante para el secret key de tests -TEST_SECRET_KEY = "test-secret-key-12345" - - -def create_valid_token() -> str: - """Genera un token JWT válido.""" - now = int(time.time()) - payload = { - "sub": "user_test123", - "email": "test@example.com", - "name": "Test User", - "exp": now + 3600, - "iat": now, - } - return jwt.encode(payload, TEST_SECRET_KEY, algorithm="HS256") - - -def create_expired_token() -> str: - """Genera un token JWT expirado.""" - now = int(time.time()) - payload = { - "sub": "user_expired", - "email": "expired@example.com", - "exp": now - 3600, - "iat": now - 7200, - } - return jwt.encode(payload, TEST_SECRET_KEY, algorithm="HS256") - - -class TestClerkClient: - """Tests para ClerkClient.""" - - @patch("src.external.clerk_client.settings") - def test_verify_token_valid(self, mock_settings: MagicMock): - """Token válido retorna payload correcto.""" - mock_settings.CLERK_JWT_SIGNING_KEY = TEST_SECRET_KEY - mock_settings.CLERK_SECRET_KEY = None - mock_settings.CLERK_JWKS_URL = None - client = ClerkClient() - token = create_valid_token() - - result = client.verify_token(token) - - # verify_token retorna el payload completo del JWT con 'sub' - assert result["sub"] == "user_test123" - assert result["email"] == "test@example.com" - assert result["name"] == "Test User" - - @patch("src.external.clerk_client.settings") - def test_verify_token_expired(self, mock_settings: MagicMock): - """Token expirado lanza ClerkTokenExpiredError.""" - mock_settings.CLERK_JWT_SIGNING_KEY = TEST_SECRET_KEY - mock_settings.CLERK_SECRET_KEY = None - mock_settings.CLERK_JWKS_URL = None - client = ClerkClient() - token = create_expired_token() - - with pytest.raises(ClerkTokenExpiredError): - client.verify_token(token) - - @patch("src.external.clerk_client.settings") - def test_verify_token_invalid(self, mock_settings: MagicMock): - """Token inválido lanza ClerkTokenInvalidError.""" - mock_settings.CLERK_JWT_SIGNING_KEY = TEST_SECRET_KEY - mock_settings.CLERK_SECRET_KEY = None - mock_settings.CLERK_JWKS_URL = None - client = ClerkClient() - - with pytest.raises(ClerkTokenInvalidError): - client.verify_token("invalid-token-string") - - @patch("src.external.clerk_client.settings") - def test_verify_token_malformed(self, mock_settings: MagicMock): - """Token malformado lanza ClerkTokenInvalidError.""" - mock_settings.CLERK_JWT_SIGNING_KEY = TEST_SECRET_KEY - mock_settings.CLERK_SECRET_KEY = None - mock_settings.CLERK_JWKS_URL = None - client = ClerkClient() - - with pytest.raises(ClerkTokenInvalidError): - client.verify_token("not.a.valid.jwt.token") - - @patch("src.external.clerk_client.settings") - def test_get_user_id_from_token(self, mock_settings: MagicMock): - """get_user_id_from_token retorna el user_id (sub claim).""" - mock_settings.CLERK_JWT_SIGNING_KEY = TEST_SECRET_KEY - mock_settings.CLERK_SECRET_KEY = None - mock_settings.CLERK_JWKS_URL = None - client = ClerkClient() - token = create_valid_token() - - user_id = client.get_user_id_from_token(token) - - assert user_id == "user_test123" - - @patch("src.external.clerk_client.settings") - def test_get_user_id_missing_sub(self, mock_settings: MagicMock): - """Token sin sub lanza ClerkTokenInvalidError.""" - mock_settings.CLERK_JWT_SIGNING_KEY = TEST_SECRET_KEY - mock_settings.CLERK_SECRET_KEY = None - mock_settings.CLERK_JWKS_URL = None - client = ClerkClient() - - now = int(time.time()) - payload = { - "email": "nosub@example.com", - "exp": now + 3600, - } - token = jwt.encode(payload, TEST_SECRET_KEY, algorithm="HS256") - - with pytest.raises(ClerkTokenInvalidError) as exc: - client.get_user_id_from_token(token) - - # El mensaje ahora menciona 'sub' en lugar de 'user_id' - assert "sub" in str(exc.value).lower() +"""Tests para ClerkClient.""" + +import time +from unittest.mock import MagicMock, patch + +import pytest +from jose import jwt + +from src.external.clerk_client import ( + ClerkClient, + ClerkTokenExpiredError, + ClerkTokenInvalidError, +) + +# Constante para el secret key de tests +TEST_SECRET_KEY = "test-secret-key-12345" + + +def create_valid_token() -> str: + """Genera un token JWT válido.""" + now = int(time.time()) + payload = { + "sub": "user_test123", + "email": "test@example.com", + "name": "Test User", + "exp": now + 3600, + "iat": now, + } + return jwt.encode(payload, TEST_SECRET_KEY, algorithm="HS256") + + +def create_expired_token() -> str: + """Genera un token JWT expirado.""" + now = int(time.time()) + payload = { + "sub": "user_expired", + "email": "expired@example.com", + "exp": now - 3600, + "iat": now - 7200, + } + return jwt.encode(payload, TEST_SECRET_KEY, algorithm="HS256") + + +class TestClerkClient: + """Tests para ClerkClient.""" + + @patch("src.external.clerk_client.settings") + def test_verify_token_valid(self, mock_settings: MagicMock): + """Token válido retorna payload correcto.""" + mock_settings.CLERK_JWT_SIGNING_KEY = TEST_SECRET_KEY + mock_settings.CLERK_SECRET_KEY = None + mock_settings.CLERK_JWKS_URL = None + client = ClerkClient() + token = create_valid_token() + + result = client.verify_token(token) + + # verify_token retorna el payload completo del JWT con 'sub' + assert result["sub"] == "user_test123" + assert result["email"] == "test@example.com" + assert result["name"] == "Test User" + + @patch("src.external.clerk_client.settings") + def test_verify_token_expired(self, mock_settings: MagicMock): + """Token expirado lanza ClerkTokenExpiredError.""" + mock_settings.CLERK_JWT_SIGNING_KEY = TEST_SECRET_KEY + mock_settings.CLERK_SECRET_KEY = None + mock_settings.CLERK_JWKS_URL = None + client = ClerkClient() + token = create_expired_token() + + with pytest.raises(ClerkTokenExpiredError): + client.verify_token(token) + + @patch("src.external.clerk_client.settings") + def test_verify_token_invalid(self, mock_settings: MagicMock): + """Token inválido lanza ClerkTokenInvalidError.""" + mock_settings.CLERK_JWT_SIGNING_KEY = TEST_SECRET_KEY + mock_settings.CLERK_SECRET_KEY = None + mock_settings.CLERK_JWKS_URL = None + client = ClerkClient() + + with pytest.raises(ClerkTokenInvalidError): + client.verify_token("invalid-token-string") + + @patch("src.external.clerk_client.settings") + def test_verify_token_malformed(self, mock_settings: MagicMock): + """Token malformado lanza ClerkTokenInvalidError.""" + mock_settings.CLERK_JWT_SIGNING_KEY = TEST_SECRET_KEY + mock_settings.CLERK_SECRET_KEY = None + mock_settings.CLERK_JWKS_URL = None + client = ClerkClient() + + with pytest.raises(ClerkTokenInvalidError): + client.verify_token("not.a.valid.jwt.token") + + @patch("src.external.clerk_client.settings") + def test_get_user_id_from_token(self, mock_settings: MagicMock): + """get_user_id_from_token retorna el user_id (sub claim).""" + mock_settings.CLERK_JWT_SIGNING_KEY = TEST_SECRET_KEY + mock_settings.CLERK_SECRET_KEY = None + mock_settings.CLERK_JWKS_URL = None + client = ClerkClient() + token = create_valid_token() + + user_id = client.get_user_id_from_token(token) + + assert user_id == "user_test123" + + @patch("src.external.clerk_client.settings") + def test_get_user_id_missing_sub(self, mock_settings: MagicMock): + """Token sin sub lanza ClerkTokenInvalidError.""" + mock_settings.CLERK_JWT_SIGNING_KEY = TEST_SECRET_KEY + mock_settings.CLERK_SECRET_KEY = None + mock_settings.CLERK_JWKS_URL = None + client = ClerkClient() + + now = int(time.time()) + payload = { + "email": "nosub@example.com", + "exp": now + 3600, + } + token = jwt.encode(payload, TEST_SECRET_KEY, algorithm="HS256") + + with pytest.raises(ClerkTokenInvalidError) as exc: + client.get_user_id_from_token(token) + + # El mensaje ahora menciona 'sub' en lugar de 'user_id' + assert "sub" in str(exc.value).lower() diff --git a/backend/tests/unit/middleware/test_auth.py b/backend/tests/unit/middleware/test_auth.py index 7080399..1d3acb9 100644 --- a/backend/tests/unit/middleware/test_auth.py +++ b/backend/tests/unit/middleware/test_auth.py @@ -1,121 +1,121 @@ -"""Tests para la dependencia de autenticación.""" - -from unittest.mock import MagicMock, patch - -import pytest -from fastapi import HTTPException -from fastapi.security import HTTPAuthorizationCredentials - -from src.core.dependencies.auth import get_current_user, get_optional_user -from src.external.clerk_client import ClerkTokenExpiredError, ClerkTokenInvalidError -from src.schemas.user import Role, User - - -class TestGetCurrentUser: - """Tests para get_current_user.""" - - @pytest.mark.asyncio - async def test_missing_credentials_raises_401(self): - """Sin credenciales debe lanzar 401.""" - with pytest.raises(HTTPException) as exc: - await get_current_user(credentials=None) - - assert exc.value.status_code == 401 - assert "requerido" in exc.value.detail.lower() - - @pytest.mark.asyncio - async def test_valid_token_returns_user(self): - """Token válido retorna usuario.""" - credentials = HTTPAuthorizationCredentials(scheme="Bearer", credentials="valid-token") - - mock_payload = { - "sub": "user_abc123", - "email": "test@example.com", - "name": "Test User", - } - - with patch("src.core.dependencies.auth.ClerkClient") as MockClerk: - mock_client = MockClerk.return_value - mock_client.verify_token.return_value = mock_payload - - user = await get_current_user(credentials=credentials) - - assert isinstance(user, User) - assert user.id == "user_abc123" - assert user.email == "test@example.com" - assert user.name == "Test User" - assert user.role == Role.DEVELOPER - - @pytest.mark.asyncio - async def test_expired_token_raises_401(self): - """Token expirado debe lanzar 401.""" - credentials = HTTPAuthorizationCredentials(scheme="Bearer", credentials="expired-token") - - with patch("src.core.dependencies.auth.ClerkClient") as MockClerk: - mock_client = MockClerk.return_value - mock_client.verify_token.side_effect = ClerkTokenExpiredError("Token expirado") - - with pytest.raises(HTTPException) as exc: - await get_current_user(credentials=credentials) - - assert exc.value.status_code == 401 - assert "expirado" in exc.value.detail.lower() - - @pytest.mark.asyncio - async def test_invalid_token_raises_401(self): - """Token inválido debe lanzar 401.""" - credentials = HTTPAuthorizationCredentials(scheme="Bearer", credentials="invalid-token") - - with patch("src.core.dependencies.auth.ClerkClient") as MockClerk: - mock_client = MockClerk.return_value - mock_client.verify_token.side_effect = ClerkTokenInvalidError("Token inválido") - - with pytest.raises(HTTPException) as exc: - await get_current_user(credentials=credentials) - - assert exc.value.status_code == 401 - assert "inválido" in exc.value.detail.lower() - - -class TestGetOptionalUser: - """Tests para get_optional_user.""" - - @pytest.mark.asyncio - async def test_no_credentials_returns_none(self): - """Sin credenciales retorna None.""" - result = await get_optional_user(credentials=None) - assert result is None - - @pytest.mark.asyncio - async def test_valid_credentials_returns_user(self): - """Con credenciales válidas retorna usuario.""" - credentials = HTTPAuthorizationCredentials(scheme="Bearer", credentials="valid-token") - - mock_payload = { - "sub": "user_optional", - "email": "optional@test.com", - "name": "Optional User", - } - - with patch("src.core.dependencies.auth.ClerkClient") as MockClerk: - mock_client = MockClerk.return_value - mock_client.verify_token.return_value = mock_payload - - user = await get_optional_user(credentials=credentials) - - assert user is not None - assert user.id == "user_optional" - - @pytest.mark.asyncio - async def test_invalid_token_raises_401(self): - """Token inválido en get_optional_user debe lanzar 401.""" - credentials = HTTPAuthorizationCredentials(scheme="Bearer", credentials="bad-token") - - with patch("src.core.dependencies.auth.ClerkClient") as MockClerk: - mock_client = MockClerk.return_value - mock_client.verify_token.side_effect = ClerkTokenInvalidError("Token inválido") - - with pytest.raises(HTTPException) as exc: - await get_optional_user(credentials=credentials) - - assert exc.value.status_code == 401 +"""Tests para la dependencia de autenticación.""" + +from unittest.mock import MagicMock, patch + +import pytest +from fastapi import HTTPException +from fastapi.security import HTTPAuthorizationCredentials + +from src.core.dependencies.auth import get_current_user, get_optional_user +from src.external.clerk_client import ClerkTokenExpiredError, ClerkTokenInvalidError +from src.schemas.user import Role, User + + +class TestGetCurrentUser: + """Tests para get_current_user.""" + + @pytest.mark.asyncio + async def test_missing_credentials_raises_401(self): + """Sin credenciales debe lanzar 401.""" + with pytest.raises(HTTPException) as exc: + await get_current_user(credentials=None) + + assert exc.value.status_code == 401 + assert "requerido" in exc.value.detail.lower() + + @pytest.mark.asyncio + async def test_valid_token_returns_user(self): + """Token válido retorna usuario.""" + credentials = HTTPAuthorizationCredentials(scheme="Bearer", credentials="valid-token") + + mock_payload = { + "sub": "user_abc123", + "email": "test@example.com", + "name": "Test User", + } + + with patch("src.core.dependencies.auth.ClerkClient") as MockClerk: + mock_client = MockClerk.return_value + mock_client.verify_token.return_value = mock_payload + + user = await get_current_user(credentials=credentials) + + assert isinstance(user, User) + assert user.id == "user_abc123" + assert user.email == "test@example.com" + assert user.name == "Test User" + assert user.role == Role.DEVELOPER + + @pytest.mark.asyncio + async def test_expired_token_raises_401(self): + """Token expirado debe lanzar 401.""" + credentials = HTTPAuthorizationCredentials(scheme="Bearer", credentials="expired-token") + + with patch("src.core.dependencies.auth.ClerkClient") as MockClerk: + mock_client = MockClerk.return_value + mock_client.verify_token.side_effect = ClerkTokenExpiredError("Token expirado") + + with pytest.raises(HTTPException) as exc: + await get_current_user(credentials=credentials) + + assert exc.value.status_code == 401 + assert "expirado" in exc.value.detail.lower() + + @pytest.mark.asyncio + async def test_invalid_token_raises_401(self): + """Token inválido debe lanzar 401.""" + credentials = HTTPAuthorizationCredentials(scheme="Bearer", credentials="invalid-token") + + with patch("src.core.dependencies.auth.ClerkClient") as MockClerk: + mock_client = MockClerk.return_value + mock_client.verify_token.side_effect = ClerkTokenInvalidError("Token inválido") + + with pytest.raises(HTTPException) as exc: + await get_current_user(credentials=credentials) + + assert exc.value.status_code == 401 + assert "inválido" in exc.value.detail.lower() + + +class TestGetOptionalUser: + """Tests para get_optional_user.""" + + @pytest.mark.asyncio + async def test_no_credentials_returns_none(self): + """Sin credenciales retorna None.""" + result = await get_optional_user(credentials=None) + assert result is None + + @pytest.mark.asyncio + async def test_valid_credentials_returns_user(self): + """Con credenciales válidas retorna usuario.""" + credentials = HTTPAuthorizationCredentials(scheme="Bearer", credentials="valid-token") + + mock_payload = { + "sub": "user_optional", + "email": "optional@test.com", + "name": "Optional User", + } + + with patch("src.core.dependencies.auth.ClerkClient") as MockClerk: + mock_client = MockClerk.return_value + mock_client.verify_token.return_value = mock_payload + + user = await get_optional_user(credentials=credentials) + + assert user is not None + assert user.id == "user_optional" + + @pytest.mark.asyncio + async def test_invalid_token_raises_401(self): + """Token inválido en get_optional_user debe lanzar 401.""" + credentials = HTTPAuthorizationCredentials(scheme="Bearer", credentials="bad-token") + + with patch("src.core.dependencies.auth.ClerkClient") as MockClerk: + mock_client = MockClerk.return_value + mock_client.verify_token.side_effect = ClerkTokenInvalidError("Token inválido") + + with pytest.raises(HTTPException) as exc: + await get_optional_user(credentials=credentials) + + assert exc.value.status_code == 401 diff --git a/backend/tests/unit/models/test_code_review.py b/backend/tests/unit/models/test_code_review.py index 6a85a52..03a62a3 100644 --- a/backend/tests/unit/models/test_code_review.py +++ b/backend/tests/unit/models/test_code_review.py @@ -1,97 +1,97 @@ -"""Tests para CodeReviewEntity model.""" - -import uuid -from datetime import datetime -from unittest.mock import MagicMock, PropertyMock - -import pytest - -from src.models.code_review import CodeReviewEntity -from src.models.enums.review_status import ReviewStatus -from src.models.enums.severity_enum import SeverityEnum - - -class TestCodeReviewEntityRepr: - """Tests para __repr__.""" - - def test_repr_returns_readable_string(self): - """__repr__ retorna representación legible.""" - review_id = uuid.uuid4() - review = CodeReviewEntity( - id=review_id, - user_id="user_123", - filename="test_file.py", - code_content=b"encrypted_content", - status=ReviewStatus.COMPLETED, - ) - - result = repr(review) - - assert "CodeReviewEntity" in result - assert "test_file.py" in result - assert "COMPLETED" in result or "completed" in result.lower() - - -class TestCodeReviewEntityCalculateQualityScore: - """Tests para calculate_quality_score.""" - - def test_calculate_quality_score_no_findings(self): - """Sin findings retorna score 100.""" - review = CodeReviewEntity( - id=uuid.uuid4(), - user_id="user_123", - filename="clean_file.py", - code_content=b"content", - ) - # Mock de findings vacío - review.findings = [] - - score = review.calculate_quality_score() - - assert score == 100 - - def test_calculate_quality_score_with_findings(self): - """Con findings calcula penalidades correctamente.""" - review = CodeReviewEntity( - id=uuid.uuid4(), - user_id="user_123", - filename="file_with_issues.py", - code_content=b"content", - ) - - # Mock de findings con penalidades - finding1 = MagicMock() - finding1.penalty = 10 # CRITICAL - finding2 = MagicMock() - finding2.penalty = 5 # HIGH - finding3 = MagicMock() - finding3.penalty = 2 # MEDIUM - - review.findings = [finding1, finding2, finding3] - - score = review.calculate_quality_score() - - # 100 - (10 + 5 + 2) = 83 - assert score == 83 - - def test_calculate_quality_score_floor_at_zero(self): - """Score mínimo es 0, no negativo.""" - review = CodeReviewEntity( - id=uuid.uuid4(), - user_id="user_123", - filename="terrible_file.py", - code_content=b"content", - ) - - # Mock de muchos findings críticos - findings = [] - for _ in range(15): - f = MagicMock() - f.penalty = 10 # 15 x 10 = 150 - findings.append(f) - - review.findings = findings - - score = review.calculate_quality_score() - - assert score == 0 # max(0, 100 - 150) = 0 +"""Tests para CodeReviewEntity model.""" + +import uuid +from datetime import datetime +from unittest.mock import MagicMock, PropertyMock + +import pytest + +from src.models.code_review import CodeReviewEntity +from src.models.enums.review_status import ReviewStatus +from src.models.enums.severity_enum import SeverityEnum + + +class TestCodeReviewEntityRepr: + """Tests para __repr__.""" + + def test_repr_returns_readable_string(self): + """__repr__ retorna representación legible.""" + review_id = uuid.uuid4() + review = CodeReviewEntity( + id=review_id, + user_id="user_123", + filename="test_file.py", + code_content=b"encrypted_content", + status=ReviewStatus.COMPLETED, + ) + + result = repr(review) + + assert "CodeReviewEntity" in result + assert "test_file.py" in result + assert "COMPLETED" in result or "completed" in result.lower() + + +class TestCodeReviewEntityCalculateQualityScore: + """Tests para calculate_quality_score.""" + + def test_calculate_quality_score_no_findings(self): + """Sin findings retorna score 100.""" + review = CodeReviewEntity( + id=uuid.uuid4(), + user_id="user_123", + filename="clean_file.py", + code_content=b"content", + ) + # Mock de findings vacío + review.findings = [] + + score = review.calculate_quality_score() + + assert score == 100 + + def test_calculate_quality_score_with_findings(self): + """Con findings calcula penalidades correctamente.""" + review = CodeReviewEntity( + id=uuid.uuid4(), + user_id="user_123", + filename="file_with_issues.py", + code_content=b"content", + ) + + # Mock de findings con penalidades + finding1 = MagicMock() + finding1.penalty = 10 # CRITICAL + finding2 = MagicMock() + finding2.penalty = 5 # HIGH + finding3 = MagicMock() + finding3.penalty = 2 # MEDIUM + + review.findings = [finding1, finding2, finding3] + + score = review.calculate_quality_score() + + # 100 - (10 + 5 + 2) = 83 + assert score == 83 + + def test_calculate_quality_score_floor_at_zero(self): + """Score mínimo es 0, no negativo.""" + review = CodeReviewEntity( + id=uuid.uuid4(), + user_id="user_123", + filename="terrible_file.py", + code_content=b"content", + ) + + # Mock de muchos findings críticos + findings = [] + for _ in range(15): + f = MagicMock() + f.penalty = 10 # 15 x 10 = 150 + findings.append(f) + + review.findings = findings + + score = review.calculate_quality_score() + + assert score == 0 # max(0, 100 - 150) = 0 diff --git a/backend/tests/unit/models/test_finding.py b/backend/tests/unit/models/test_finding.py index 4f18c97..1c31f73 100644 --- a/backend/tests/unit/models/test_finding.py +++ b/backend/tests/unit/models/test_finding.py @@ -1,170 +1,170 @@ -"""Tests para AgentFindingEntity model.""" - -import uuid -from datetime import datetime - -import pytest - -from src.models.enums.severity_enum import SeverityEnum -from src.models.finding import AgentFindingEntity - - -class TestAgentFindingEntityRepr: - """Tests para __repr__.""" - - def test_repr_returns_readable_string(self): - """__repr__ retorna representación legible.""" - finding_id = uuid.uuid4() - review_id = uuid.uuid4() - finding = AgentFindingEntity( - id=finding_id, - review_id=review_id, - agent_type="SecurityAgent", - severity=SeverityEnum.HIGH, - issue_type="sql_injection", - line_number=42, - message="SQL injection detected", - ) - - result = repr(finding) - - assert "AgentFindingEntity" in result - assert "SecurityAgent" in result - assert "42" in result - - -class TestAgentFindingEntityPenalty: - """Tests para penalty property.""" - - def test_penalty_critical(self): - """CRITICAL tiene penalidad de 10.""" - finding = AgentFindingEntity( - id=uuid.uuid4(), - review_id=uuid.uuid4(), - agent_type="SecurityAgent", - severity=SeverityEnum.CRITICAL, - issue_type="dangerous_function", - line_number=1, - message="Critical issue", - ) - - assert finding.penalty == 10 - - def test_penalty_high(self): - """HIGH tiene penalidad de 5.""" - finding = AgentFindingEntity( - id=uuid.uuid4(), - review_id=uuid.uuid4(), - agent_type="SecurityAgent", - severity=SeverityEnum.HIGH, - issue_type="hardcoded_password", - line_number=10, - message="High severity issue", - ) - - assert finding.penalty == 5 - - def test_penalty_medium(self): - """MEDIUM tiene penalidad de 2.""" - finding = AgentFindingEntity( - id=uuid.uuid4(), - review_id=uuid.uuid4(), - agent_type="QualityAgent", - severity=SeverityEnum.MEDIUM, - issue_type="code_smell", - line_number=20, - message="Medium severity issue", - ) - - assert finding.penalty == 2 - - def test_penalty_low(self): - """LOW tiene penalidad de 1.""" - finding = AgentFindingEntity( - id=uuid.uuid4(), - review_id=uuid.uuid4(), - agent_type="StyleAgent", - severity=SeverityEnum.LOW, - issue_type="style_violation", - line_number=30, - message="Low severity issue", - ) - - assert finding.penalty == 1 - - -class TestAgentFindingEntityToDict: - """Tests para to_dict.""" - - def test_to_dict_complete(self): - """to_dict retorna diccionario con todos los campos.""" - finding_id = uuid.uuid4() - review_id = uuid.uuid4() - created = datetime(2025, 12, 1, 10, 0, 0) - - finding = AgentFindingEntity( - id=finding_id, - review_id=review_id, - agent_type="SecurityAgent", - severity=SeverityEnum.HIGH, - issue_type="sql_injection", - line_number=42, - code_snippet="cursor.execute(f'SELECT * FROM users WHERE id={user_id}')", - message="Potential SQL injection vulnerability", - suggestion="Use parameterized queries instead", - metrics={"confidence": 0.95}, - created_at=created, - ) - - result = finding.to_dict() - - assert result["id"] == str(finding_id) - assert result["review_id"] == str(review_id) - assert result["agent_type"] == "SecurityAgent" - assert result["severity"] == "HIGH" - assert result["issue_type"] == "sql_injection" - assert result["line_number"] == 42 - assert "SELECT * FROM users" in result["code_snippet"] - assert result["message"] == "Potential SQL injection vulnerability" - assert result["suggestion"] == "Use parameterized queries instead" - assert result["metrics"] == {"confidence": 0.95} - assert result["created_at"] == "2025-12-01T10:00:00" - - def test_to_dict_with_none_values(self): - """to_dict maneja valores None correctamente.""" - finding = AgentFindingEntity( - id=uuid.uuid4(), - review_id=uuid.uuid4(), - agent_type="SecurityAgent", - severity=SeverityEnum.LOW, - issue_type="minor_issue", - line_number=1, - message="Minor issue found", - code_snippet=None, - suggestion=None, - metrics=None, - created_at=None, - ) - - result = finding.to_dict() - - assert result["code_snippet"] is None - assert result["suggestion"] is None - assert result["metrics"] is None - assert result["created_at"] is None - - def test_to_dict_severity_none(self): - """to_dict maneja severity None.""" - finding = AgentFindingEntity( - id=uuid.uuid4(), - review_id=uuid.uuid4(), - agent_type="TestAgent", - severity=None, - issue_type="test", - line_number=1, - message="Test message", - ) - - result = finding.to_dict() - - assert result["severity"] is None +"""Tests para AgentFindingEntity model.""" + +import uuid +from datetime import datetime + +import pytest + +from src.models.enums.severity_enum import SeverityEnum +from src.models.finding import AgentFindingEntity + + +class TestAgentFindingEntityRepr: + """Tests para __repr__.""" + + def test_repr_returns_readable_string(self): + """__repr__ retorna representación legible.""" + finding_id = uuid.uuid4() + review_id = uuid.uuid4() + finding = AgentFindingEntity( + id=finding_id, + review_id=review_id, + agent_type="SecurityAgent", + severity=SeverityEnum.HIGH, + issue_type="sql_injection", + line_number=42, + message="SQL injection detected", + ) + + result = repr(finding) + + assert "AgentFindingEntity" in result + assert "SecurityAgent" in result + assert "42" in result + + +class TestAgentFindingEntityPenalty: + """Tests para penalty property.""" + + def test_penalty_critical(self): + """CRITICAL tiene penalidad de 10.""" + finding = AgentFindingEntity( + id=uuid.uuid4(), + review_id=uuid.uuid4(), + agent_type="SecurityAgent", + severity=SeverityEnum.CRITICAL, + issue_type="dangerous_function", + line_number=1, + message="Critical issue", + ) + + assert finding.penalty == 10 + + def test_penalty_high(self): + """HIGH tiene penalidad de 5.""" + finding = AgentFindingEntity( + id=uuid.uuid4(), + review_id=uuid.uuid4(), + agent_type="SecurityAgent", + severity=SeverityEnum.HIGH, + issue_type="hardcoded_password", + line_number=10, + message="High severity issue", + ) + + assert finding.penalty == 5 + + def test_penalty_medium(self): + """MEDIUM tiene penalidad de 2.""" + finding = AgentFindingEntity( + id=uuid.uuid4(), + review_id=uuid.uuid4(), + agent_type="QualityAgent", + severity=SeverityEnum.MEDIUM, + issue_type="code_smell", + line_number=20, + message="Medium severity issue", + ) + + assert finding.penalty == 2 + + def test_penalty_low(self): + """LOW tiene penalidad de 1.""" + finding = AgentFindingEntity( + id=uuid.uuid4(), + review_id=uuid.uuid4(), + agent_type="StyleAgent", + severity=SeverityEnum.LOW, + issue_type="style_violation", + line_number=30, + message="Low severity issue", + ) + + assert finding.penalty == 1 + + +class TestAgentFindingEntityToDict: + """Tests para to_dict.""" + + def test_to_dict_complete(self): + """to_dict retorna diccionario con todos los campos.""" + finding_id = uuid.uuid4() + review_id = uuid.uuid4() + created = datetime(2025, 12, 1, 10, 0, 0) + + finding = AgentFindingEntity( + id=finding_id, + review_id=review_id, + agent_type="SecurityAgent", + severity=SeverityEnum.HIGH, + issue_type="sql_injection", + line_number=42, + code_snippet="cursor.execute(f'SELECT * FROM users WHERE id={user_id}')", + message="Potential SQL injection vulnerability", + suggestion="Use parameterized queries instead", + metrics={"confidence": 0.95}, + created_at=created, + ) + + result = finding.to_dict() + + assert result["id"] == str(finding_id) + assert result["review_id"] == str(review_id) + assert result["agent_type"] == "SecurityAgent" + assert result["severity"] == "HIGH" + assert result["issue_type"] == "sql_injection" + assert result["line_number"] == 42 + assert "SELECT * FROM users" in result["code_snippet"] + assert result["message"] == "Potential SQL injection vulnerability" + assert result["suggestion"] == "Use parameterized queries instead" + assert result["metrics"] == {"confidence": 0.95} + assert result["created_at"] == "2025-12-01T10:00:00" + + def test_to_dict_with_none_values(self): + """to_dict maneja valores None correctamente.""" + finding = AgentFindingEntity( + id=uuid.uuid4(), + review_id=uuid.uuid4(), + agent_type="SecurityAgent", + severity=SeverityEnum.LOW, + issue_type="minor_issue", + line_number=1, + message="Minor issue found", + code_snippet=None, + suggestion=None, + metrics=None, + created_at=None, + ) + + result = finding.to_dict() + + assert result["code_snippet"] is None + assert result["suggestion"] is None + assert result["metrics"] is None + assert result["created_at"] is None + + def test_to_dict_severity_none(self): + """to_dict maneja severity None.""" + finding = AgentFindingEntity( + id=uuid.uuid4(), + review_id=uuid.uuid4(), + agent_type="TestAgent", + severity=None, + issue_type="test", + line_number=1, + message="Test message", + ) + + result = finding.to_dict() + + assert result["severity"] is None diff --git a/backend/tests/unit/models/test_user.py b/backend/tests/unit/models/test_user.py index 053c17f..076470f 100644 --- a/backend/tests/unit/models/test_user.py +++ b/backend/tests/unit/models/test_user.py @@ -1,169 +1,169 @@ -"""Tests para UserEntity model.""" - -from datetime import date, datetime -from unittest.mock import patch - -import pytest - -from src.models.enums.user_role import UserRole -from src.models.user import UserEntity - - -class TestUserEntityRepr: - """Tests para __repr__.""" - - def test_repr_returns_readable_string(self): - """__repr__ retorna representación legible.""" - user = UserEntity( - id="user_123", - email="test@example.com", - role=UserRole.DEVELOPER, - ) - - result = repr(user) - - assert "user_123" in result - assert "test@example.com" in result - assert "UserEntity" in result - - -class TestUserEntityCanAnalyze: - """Tests para can_analyze (rate limiting RN3).""" - - def test_admin_always_can_analyze(self): - """Admin siempre puede analizar sin límite.""" - user = UserEntity( - id="admin_user", - email="admin@example.com", - role=UserRole.ADMIN, - daily_analysis_count=100, # Muchos análisis - last_analysis_date=date.today(), - ) - - assert user.can_analyze() is True - assert user.can_analyze(max_daily=5) is True - - def test_developer_can_analyze_new_day(self): - """Developer puede analizar si es un nuevo día.""" - yesterday = date(2025, 11, 30) - user = UserEntity( - id="dev_user", - email="dev@example.com", - role=UserRole.DEVELOPER, - daily_analysis_count=10, # Alcanzó límite ayer - last_analysis_date=yesterday, - ) - - with patch("src.models.user.date") as mock_date: - mock_date.today.return_value = date(2025, 12, 1) # Hoy es nuevo día - assert user.can_analyze() is True - - def test_developer_can_analyze_under_limit(self): - """Developer puede analizar si está bajo el límite diario.""" - today = date.today() - user = UserEntity( - id="dev_user", - email="dev@example.com", - role=UserRole.DEVELOPER, - daily_analysis_count=5, - last_analysis_date=today, - ) - - assert user.can_analyze(max_daily=10) is True - - def test_developer_cannot_analyze_at_limit(self): - """Developer NO puede analizar si alcanzó el límite.""" - today = date.today() - user = UserEntity( - id="dev_user", - email="dev@example.com", - role=UserRole.DEVELOPER, - daily_analysis_count=10, - last_analysis_date=today, - ) - - assert user.can_analyze(max_daily=10) is False - - def test_developer_cannot_analyze_over_limit(self): - """Developer NO puede analizar si está sobre el límite.""" - today = date.today() - user = UserEntity( - id="dev_user", - email="dev@example.com", - role=UserRole.DEVELOPER, - daily_analysis_count=15, - last_analysis_date=today, - ) - - assert user.can_analyze(max_daily=10) is False - - def test_can_analyze_with_no_previous_analysis(self): - """Usuario sin análisis previos puede analizar.""" - user = UserEntity( - id="new_user", - email="new@example.com", - role=UserRole.DEVELOPER, - daily_analysis_count=0, - last_analysis_date=None, - ) - - assert user.can_analyze() is True - - -class TestUserEntityIncrementAnalysisCount: - """Tests para increment_analysis_count.""" - - def test_increment_resets_on_new_day(self): - """Contador se reinicia en nuevo día.""" - yesterday = date(2025, 11, 30) - user = UserEntity( - id="dev_user", - email="dev@example.com", - role=UserRole.DEVELOPER, - daily_analysis_count=5, - last_analysis_date=yesterday, - ) - - with patch("src.models.user.date") as mock_date: - today = date(2025, 12, 1) - mock_date.today.return_value = today - - user.increment_analysis_count() - - assert user.daily_analysis_count == 1 - assert user.last_analysis_date == today - - def test_increment_same_day(self): - """Contador se incrementa en el mismo día.""" - today = date.today() - user = UserEntity( - id="dev_user", - email="dev@example.com", - role=UserRole.DEVELOPER, - daily_analysis_count=3, - last_analysis_date=today, - ) - - user.increment_analysis_count() - - assert user.daily_analysis_count == 4 - assert user.last_analysis_date == today - - def test_increment_first_analysis_ever(self): - """Primer análisis del usuario.""" - user = UserEntity( - id="new_user", - email="new@example.com", - role=UserRole.DEVELOPER, - daily_analysis_count=0, - last_analysis_date=None, - ) - - with patch("src.models.user.date") as mock_date: - today = date(2025, 12, 1) - mock_date.today.return_value = today - - user.increment_analysis_count() - - assert user.daily_analysis_count == 1 - assert user.last_analysis_date == today +"""Tests para UserEntity model.""" + +from datetime import date, datetime +from unittest.mock import patch + +import pytest + +from src.models.enums.user_role import UserRole +from src.models.user import UserEntity + + +class TestUserEntityRepr: + """Tests para __repr__.""" + + def test_repr_returns_readable_string(self): + """__repr__ retorna representación legible.""" + user = UserEntity( + id="user_123", + email="test@example.com", + role=UserRole.DEVELOPER, + ) + + result = repr(user) + + assert "user_123" in result + assert "test@example.com" in result + assert "UserEntity" in result + + +class TestUserEntityCanAnalyze: + """Tests para can_analyze (rate limiting RN3).""" + + def test_admin_always_can_analyze(self): + """Admin siempre puede analizar sin límite.""" + user = UserEntity( + id="admin_user", + email="admin@example.com", + role=UserRole.ADMIN, + daily_analysis_count=100, # Muchos análisis + last_analysis_date=date.today(), + ) + + assert user.can_analyze() is True + assert user.can_analyze(max_daily=5) is True + + def test_developer_can_analyze_new_day(self): + """Developer puede analizar si es un nuevo día.""" + yesterday = date(2025, 11, 30) + user = UserEntity( + id="dev_user", + email="dev@example.com", + role=UserRole.DEVELOPER, + daily_analysis_count=10, # Alcanzó límite ayer + last_analysis_date=yesterday, + ) + + with patch("src.models.user.date") as mock_date: + mock_date.today.return_value = date(2025, 12, 1) # Hoy es nuevo día + assert user.can_analyze() is True + + def test_developer_can_analyze_under_limit(self): + """Developer puede analizar si está bajo el límite diario.""" + today = date.today() + user = UserEntity( + id="dev_user", + email="dev@example.com", + role=UserRole.DEVELOPER, + daily_analysis_count=5, + last_analysis_date=today, + ) + + assert user.can_analyze(max_daily=10) is True + + def test_developer_cannot_analyze_at_limit(self): + """Developer NO puede analizar si alcanzó el límite.""" + today = date.today() + user = UserEntity( + id="dev_user", + email="dev@example.com", + role=UserRole.DEVELOPER, + daily_analysis_count=10, + last_analysis_date=today, + ) + + assert user.can_analyze(max_daily=10) is False + + def test_developer_cannot_analyze_over_limit(self): + """Developer NO puede analizar si está sobre el límite.""" + today = date.today() + user = UserEntity( + id="dev_user", + email="dev@example.com", + role=UserRole.DEVELOPER, + daily_analysis_count=15, + last_analysis_date=today, + ) + + assert user.can_analyze(max_daily=10) is False + + def test_can_analyze_with_no_previous_analysis(self): + """Usuario sin análisis previos puede analizar.""" + user = UserEntity( + id="new_user", + email="new@example.com", + role=UserRole.DEVELOPER, + daily_analysis_count=0, + last_analysis_date=None, + ) + + assert user.can_analyze() is True + + +class TestUserEntityIncrementAnalysisCount: + """Tests para increment_analysis_count.""" + + def test_increment_resets_on_new_day(self): + """Contador se reinicia en nuevo día.""" + yesterday = date(2025, 11, 30) + user = UserEntity( + id="dev_user", + email="dev@example.com", + role=UserRole.DEVELOPER, + daily_analysis_count=5, + last_analysis_date=yesterday, + ) + + with patch("src.models.user.date") as mock_date: + today = date(2025, 12, 1) + mock_date.today.return_value = today + + user.increment_analysis_count() + + assert user.daily_analysis_count == 1 + assert user.last_analysis_date == today + + def test_increment_same_day(self): + """Contador se incrementa en el mismo día.""" + today = date.today() + user = UserEntity( + id="dev_user", + email="dev@example.com", + role=UserRole.DEVELOPER, + daily_analysis_count=3, + last_analysis_date=today, + ) + + user.increment_analysis_count() + + assert user.daily_analysis_count == 4 + assert user.last_analysis_date == today + + def test_increment_first_analysis_ever(self): + """Primer análisis del usuario.""" + user = UserEntity( + id="new_user", + email="new@example.com", + role=UserRole.DEVELOPER, + daily_analysis_count=0, + last_analysis_date=None, + ) + + with patch("src.models.user.date") as mock_date: + today = date(2025, 12, 1) + mock_date.today.return_value = today + + user.increment_analysis_count() + + assert user.daily_analysis_count == 1 + assert user.last_analysis_date == today diff --git a/backend/tests/unit/repositories/test_code_review_repo.py b/backend/tests/unit/repositories/test_code_review_repo.py index 9dc7031..bd43de3 100644 --- a/backend/tests/unit/repositories/test_code_review_repo.py +++ b/backend/tests/unit/repositories/test_code_review_repo.py @@ -1,132 +1,132 @@ -from datetime import datetime -from unittest.mock import MagicMock -from uuid import uuid4 - -import pytest -from sqlalchemy.exc import SQLAlchemyError - -from src.models.code_review import CodeReviewEntity -from src.models.enums.review_status import ReviewStatus -from src.repositories.code_review_repository import CodeReviewRepository -from src.schemas.analysis import CodeReview -from src.utils.encryption.aes_encryptor import decrypt_aes256, encrypt_aes256 - - -def test_encrypt_decrypt_cycle(): - """Verifica que lo que se encripta se pueda desencriptar correctamente.""" - original = "Secret Code 123" - encrypted = encrypt_aes256(original) - decrypted = decrypt_aes256(encrypted) - - assert original == decrypted - assert encrypted != original - assert isinstance(encrypted, bytes) - - -def test_encrypt_empty_raises_error(): - """Verifica que encriptar vacío lance error.""" - with pytest.raises(ValueError): - encrypt_aes256("") - - -def test_decrypt_empty_returns_empty(): - """Verifica que desencriptar bytes vacíos retorne string vacío.""" - assert decrypt_aes256(b"") == "" - assert decrypt_aes256(None) == "" - - -@pytest.fixture -def mock_session(): - return MagicMock() - - -@pytest.fixture -def repo(mock_session): - return CodeReviewRepository(mock_session) - - -@pytest.fixture -def sample_review(): - return CodeReview( - id=uuid4(), - user_id="user_123", - filename="test.py", - code_content="print('Hello')", - quality_score=100, - status=ReviewStatus.PENDING, - total_findings=0, - created_at=datetime.utcnow(), - ) - - -def test_create_success(repo, mock_session, sample_review): - """Verifica creación exitosa y encriptación.""" - result = repo.create(sample_review) - - assert result == sample_review - mock_session.add.assert_called_once() - mock_session.commit.assert_called_once() - - # Verificar que se guardó encriptado - args, _ = mock_session.add.call_args - entity = args[0] - assert entity.code_content != "print('Hello')" - assert isinstance(entity.code_content, bytes) - - -def test_create_db_error(repo, mock_session, sample_review): - """Verifica manejo de errores de DB al crear.""" - mock_session.commit.side_effect = SQLAlchemyError("DB Error") - - with pytest.raises(SQLAlchemyError): - repo.create(sample_review) - - mock_session.rollback.assert_called_once() - - -def test_find_by_id_success(repo, mock_session): - """Verifica búsqueda exitosa y desencriptación.""" - review_id = uuid4() - encrypted_content = encrypt_aes256("print('Found')") - - mock_entity = CodeReviewEntity( - id=review_id, - user_id="user_1", - filename="found.py", - code_content=encrypted_content, - quality_score=90, - status=ReviewStatus.COMPLETED, - total_findings=2, - created_at=datetime.utcnow(), - ) - mock_session.get.return_value = mock_entity - - result = repo.find_by_id(review_id) - - assert result is not None - assert result.id == review_id - assert result.code_content == "print('Found')" # Desencriptado - assert result.status == ReviewStatus.COMPLETED - - -def test_find_by_id_not_found(repo, mock_session): - """Verifica retorno None si no existe.""" - mock_session.get.return_value = None - result = repo.find_by_id(uuid4()) - assert result is None - - -def test_find_by_id_decryption_error(repo, mock_session): - """Verifica manejo de error al desencriptar/recuperar.""" - review_id = uuid4() - mock_entity = CodeReviewEntity( - id=review_id, code_content=b"invalid_bytes" # Esto fallará al desencriptar con Fernet - ) - mock_session.get.return_value = mock_entity - - # Mockear decrypt para forzar error genérico si Fernet no falla con basura - # O confiar en que Fernet falle. Fernet lanza InvalidToken. - # Pero el repo captura Exception. - - with pytest.raises(Exception): - repo.find_by_id(review_id) +from datetime import datetime +from unittest.mock import MagicMock +from uuid import uuid4 + +import pytest +from sqlalchemy.exc import SQLAlchemyError + +from src.models.code_review import CodeReviewEntity +from src.models.enums.review_status import ReviewStatus +from src.repositories.code_review_repository import CodeReviewRepository +from src.schemas.analysis import CodeReview +from src.utils.encryption.aes_encryptor import decrypt_aes256, encrypt_aes256 + + +def test_encrypt_decrypt_cycle(): + """Verifica que lo que se encripta se pueda desencriptar correctamente.""" + original = "Secret Code 123" + encrypted = encrypt_aes256(original) + decrypted = decrypt_aes256(encrypted) + + assert original == decrypted + assert encrypted != original + assert isinstance(encrypted, bytes) + + +def test_encrypt_empty_raises_error(): + """Verifica que encriptar vacío lance error.""" + with pytest.raises(ValueError): + encrypt_aes256("") + + +def test_decrypt_empty_returns_empty(): + """Verifica que desencriptar bytes vacíos retorne string vacío.""" + assert decrypt_aes256(b"") == "" + assert decrypt_aes256(None) == "" + + +@pytest.fixture +def mock_session(): + return MagicMock() + + +@pytest.fixture +def repo(mock_session): + return CodeReviewRepository(mock_session) + + +@pytest.fixture +def sample_review(): + return CodeReview( + id=uuid4(), + user_id="user_123", + filename="test.py", + code_content="print('Hello')", + quality_score=100, + status=ReviewStatus.PENDING, + total_findings=0, + created_at=datetime.utcnow(), + ) + + +def test_create_success(repo, mock_session, sample_review): + """Verifica creación exitosa y encriptación.""" + result = repo.create(sample_review) + + assert result == sample_review + mock_session.add.assert_called_once() + mock_session.commit.assert_called_once() + + # Verificar que se guardó encriptado + args, _ = mock_session.add.call_args + entity = args[0] + assert entity.code_content != "print('Hello')" + assert isinstance(entity.code_content, bytes) + + +def test_create_db_error(repo, mock_session, sample_review): + """Verifica manejo de errores de DB al crear.""" + mock_session.commit.side_effect = SQLAlchemyError("DB Error") + + with pytest.raises(SQLAlchemyError): + repo.create(sample_review) + + mock_session.rollback.assert_called_once() + + +def test_find_by_id_success(repo, mock_session): + """Verifica búsqueda exitosa y desencriptación.""" + review_id = uuid4() + encrypted_content = encrypt_aes256("print('Found')") + + mock_entity = CodeReviewEntity( + id=review_id, + user_id="user_1", + filename="found.py", + code_content=encrypted_content, + quality_score=90, + status=ReviewStatus.COMPLETED, + total_findings=2, + created_at=datetime.utcnow(), + ) + mock_session.get.return_value = mock_entity + + result = repo.find_by_id(review_id) + + assert result is not None + assert result.id == review_id + assert result.code_content == "print('Found')" # Desencriptado + assert result.status == ReviewStatus.COMPLETED + + +def test_find_by_id_not_found(repo, mock_session): + """Verifica retorno None si no existe.""" + mock_session.get.return_value = None + result = repo.find_by_id(uuid4()) + assert result is None + + +def test_find_by_id_decryption_error(repo, mock_session): + """Verifica manejo de error al desencriptar/recuperar.""" + review_id = uuid4() + mock_entity = CodeReviewEntity( + id=review_id, code_content=b"invalid_bytes" # Esto fallará al desencriptar con Fernet + ) + mock_session.get.return_value = mock_entity + + # Mockear decrypt para forzar error genérico si Fernet no falla con basura + # O confiar en que Fernet falle. Fernet lanza InvalidToken. + # Pero el repo captura Exception. + + with pytest.raises(Exception): + repo.find_by_id(review_id) diff --git a/backend/tests/unit/repositories/test_user_repo.py b/backend/tests/unit/repositories/test_user_repo.py index 7096246..1b2e81c 100644 --- a/backend/tests/unit/repositories/test_user_repo.py +++ b/backend/tests/unit/repositories/test_user_repo.py @@ -1,336 +1,336 @@ -"""Tests para UserRepository.""" - -from datetime import datetime -from unittest.mock import MagicMock, patch - -import pytest -from sqlalchemy.orm import Session - -from src.models.enums.user_role import UserRole -from src.models.user import UserEntity -from src.repositories.user_repo import UserRepository - - -class TestUserRepository: - """Tests para UserRepository.""" - - @pytest.fixture - def mock_session(self): - """Mock de SQLAlchemy Session.""" - return MagicMock(spec=Session) - - @pytest.fixture - def repo(self, mock_session): - """Instancia de UserRepository con session mockeada.""" - return UserRepository(mock_session) - - @pytest.fixture - def sample_user_entity(self): - """Crea una entidad de usuario de prueba.""" - entity = MagicMock(spec=UserEntity) - entity.id = "user_123" - entity.email = "test@example.com" - entity.name = "Test User" - entity.role = UserRole.DEVELOPER - entity.daily_analysis_count = 0 - entity.last_analysis_date = None - entity.created_at = datetime(2025, 1, 1, 12, 0, 0) - entity.updated_at = datetime(2025, 1, 1, 12, 0, 0) - return entity - - -class TestGetById: - """Tests para get_by_id.""" - - @pytest.fixture - def mock_session(self): - """Mock de SQLAlchemy Session.""" - return MagicMock(spec=Session) - - @pytest.fixture - def repo(self, mock_session): - """Instancia de UserRepository.""" - return UserRepository(mock_session) - - @pytest.fixture - def sample_user_entity(self): - """Entidad de usuario de prueba.""" - entity = MagicMock(spec=UserEntity) - entity.id = "user_123" - entity.email = "test@example.com" - entity.name = "Test User" - entity.role = UserRole.DEVELOPER - return entity - - def test_get_by_id_found(self, repo, mock_session, sample_user_entity): - """get_by_id retorna usuario si existe.""" - # Arrange - mock_query = MagicMock() - mock_session.query.return_value = mock_query - mock_query.filter.return_value.first.return_value = sample_user_entity - - # Act - result = repo.get_by_id("user_123") - - # Assert - assert result == sample_user_entity - mock_session.query.assert_called_once_with(UserEntity) - - def test_get_by_id_not_found(self, repo, mock_session): - """get_by_id retorna None si usuario no existe.""" - # Arrange - mock_query = MagicMock() - mock_session.query.return_value = mock_query - mock_query.filter.return_value.first.return_value = None - - # Act - result = repo.get_by_id("nonexistent_user") - - # Assert - assert result is None - - -class TestGetByEmail: - """Tests para get_by_email.""" - - @pytest.fixture - def mock_session(self): - """Mock de SQLAlchemy Session.""" - return MagicMock(spec=Session) - - @pytest.fixture - def repo(self, mock_session): - """Instancia de UserRepository.""" - return UserRepository(mock_session) - - @pytest.fixture - def sample_user_entity(self): - """Entidad de usuario de prueba.""" - entity = MagicMock(spec=UserEntity) - entity.id = "user_456" - entity.email = "found@example.com" - entity.name = "Found User" - entity.role = UserRole.DEVELOPER - return entity - - def test_get_by_email_found(self, repo, mock_session, sample_user_entity): - """get_by_email retorna usuario si existe.""" - # Arrange - mock_query = MagicMock() - mock_session.query.return_value = mock_query - mock_query.filter.return_value.first.return_value = sample_user_entity - - # Act - result = repo.get_by_email("found@example.com") - - # Assert - assert result == sample_user_entity - assert result.email == "found@example.com" - - def test_get_by_email_not_found(self, repo, mock_session): - """get_by_email retorna None si email no existe.""" - # Arrange - mock_query = MagicMock() - mock_session.query.return_value = mock_query - mock_query.filter.return_value.first.return_value = None - - # Act - result = repo.get_by_email("notfound@example.com") - - # Assert - assert result is None - - -class TestCreate: - """Tests para create.""" - - @pytest.fixture - def mock_session(self): - """Mock de SQLAlchemy Session.""" - return MagicMock(spec=Session) - - @pytest.fixture - def repo(self, mock_session): - """Instancia de UserRepository.""" - return UserRepository(mock_session) - - def test_create_user_success(self, repo, mock_session): - """create crea usuario y llama add, commit, refresh.""" - # Act - with patch("src.repositories.user_repo.datetime") as mock_datetime: - mock_datetime.utcnow.return_value = datetime(2025, 12, 1, 10, 0, 0) - result = repo.create( - user_id="new_user_123", - email="newuser@example.com", - name="New User", - avatar_url="https://example.com/avatar.png", - role=UserRole.DEVELOPER, - ) - - # Assert - mock_session.add.assert_called_once() - mock_session.commit.assert_called_once() - mock_session.refresh.assert_called_once() - - # Verificar que el usuario fue creado con los datos correctos - created_user = mock_session.add.call_args[0][0] - assert created_user.id == "new_user_123" - assert created_user.email == "newuser@example.com" - assert created_user.name == "New User" - assert created_user.avatar_url == "https://example.com/avatar.png" - assert created_user.role == UserRole.DEVELOPER - - def test_create_user_with_defaults(self, repo, mock_session): - """create usa valores por defecto correctamente.""" - # Act - with patch("src.repositories.user_repo.datetime") as mock_datetime: - mock_datetime.utcnow.return_value = datetime(2025, 12, 1, 10, 0, 0) - result = repo.create( - user_id="minimal_user", - email="minimal@example.com", - ) - - # Assert - created_user = mock_session.add.call_args[0][0] - assert created_user.id == "minimal_user" - assert created_user.email == "minimal@example.com" - assert created_user.name is None - assert created_user.avatar_url is None - assert created_user.role == UserRole.DEVELOPER - assert created_user.daily_analysis_count == 0 - - -class TestUpdate: - """Tests para update.""" - - @pytest.fixture - def mock_session(self): - """Mock de SQLAlchemy Session.""" - return MagicMock(spec=Session) - - @pytest.fixture - def repo(self, mock_session): - """Instancia de UserRepository.""" - return UserRepository(mock_session) - - @pytest.fixture - def existing_user(self): - """Usuario existente para actualizar.""" - user = MagicMock(spec=UserEntity) - user.id = "user_to_update" - user.email = "old@example.com" - user.name = "Old Name" - user.avatar_url = "https://old.com/avatar.png" - user.role = UserRole.DEVELOPER - return user - - def test_update_all_fields(self, repo, mock_session, existing_user): - """update actualiza todos los campos proporcionados.""" - # Act - with patch("src.repositories.user_repo.datetime") as mock_datetime: - mock_datetime.utcnow.return_value = datetime(2025, 12, 1, 15, 0, 0) - result = repo.update( - user=existing_user, - email="new@example.com", - name="New Name", - avatar_url="https://new.com/avatar.png", - ) - - # Assert - assert existing_user.email == "new@example.com" - assert existing_user.name == "New Name" - assert existing_user.avatar_url == "https://new.com/avatar.png" - mock_session.commit.assert_called_once() - mock_session.refresh.assert_called_once_with(existing_user) - - def test_update_partial_fields(self, repo, mock_session, existing_user): - """update solo actualiza campos proporcionados.""" - original_email = existing_user.email - original_avatar = existing_user.avatar_url - - # Act - with patch("src.repositories.user_repo.datetime") as mock_datetime: - mock_datetime.utcnow.return_value = datetime(2025, 12, 1, 15, 0, 0) - result = repo.update( - user=existing_user, - name="Only Name Changed", - ) - - # Assert - assert existing_user.name == "Only Name Changed" - # Email y avatar no deberían cambiar - mock_session.commit.assert_called_once() - - def test_update_no_fields(self, repo, mock_session, existing_user): - """update sin campos aún actualiza updated_at y hace commit.""" - # Act - with patch("src.repositories.user_repo.datetime") as mock_datetime: - mock_datetime.utcnow.return_value = datetime(2025, 12, 1, 15, 0, 0) - result = repo.update(user=existing_user) - - # Assert - mock_session.commit.assert_called_once() - mock_session.refresh.assert_called_once() - - -class TestDelete: - """Tests para delete.""" - - @pytest.fixture - def mock_session(self): - """Mock de SQLAlchemy Session.""" - return MagicMock(spec=Session) - - @pytest.fixture - def repo(self, mock_session): - """Instancia de UserRepository.""" - return UserRepository(mock_session) - - @pytest.fixture - def user_to_delete(self): - """Usuario para eliminar.""" - user = MagicMock(spec=UserEntity) - user.id = "user_to_delete" - user.email = "delete@example.com" - return user - - def test_delete_success(self, repo, mock_session, user_to_delete): - """delete elimina usuario y llama commit.""" - # Act - repo.delete(user_to_delete) - - # Assert - mock_session.delete.assert_called_once_with(user_to_delete) - mock_session.commit.assert_called_once() - - -class TestIncrementAnalysisCount: - """Tests para increment_analysis_count.""" - - @pytest.fixture - def mock_session(self): - """Mock de SQLAlchemy Session.""" - return MagicMock(spec=Session) - - @pytest.fixture - def repo(self, mock_session): - """Instancia de UserRepository.""" - return UserRepository(mock_session) - - @pytest.fixture - def user_with_count(self): - """Usuario con contador de análisis.""" - user = MagicMock(spec=UserEntity) - user.id = "counting_user" - user.daily_analysis_count = 5 - return user - - def test_increment_analysis_count_success(self, repo, mock_session, user_with_count): - """increment_analysis_count llama al método del usuario y hace commit.""" - # Act - result = repo.increment_analysis_count(user_with_count) - - # Assert - user_with_count.increment_analysis_count.assert_called_once() - mock_session.commit.assert_called_once() - mock_session.refresh.assert_called_once_with(user_with_count) +"""Tests para UserRepository.""" + +from datetime import datetime +from unittest.mock import MagicMock, patch + +import pytest +from sqlalchemy.orm import Session + +from src.models.enums.user_role import UserRole +from src.models.user import UserEntity +from src.repositories.user_repo import UserRepository + + +class TestUserRepository: + """Tests para UserRepository.""" + + @pytest.fixture + def mock_session(self): + """Mock de SQLAlchemy Session.""" + return MagicMock(spec=Session) + + @pytest.fixture + def repo(self, mock_session): + """Instancia de UserRepository con session mockeada.""" + return UserRepository(mock_session) + + @pytest.fixture + def sample_user_entity(self): + """Crea una entidad de usuario de prueba.""" + entity = MagicMock(spec=UserEntity) + entity.id = "user_123" + entity.email = "test@example.com" + entity.name = "Test User" + entity.role = UserRole.DEVELOPER + entity.daily_analysis_count = 0 + entity.last_analysis_date = None + entity.created_at = datetime(2025, 1, 1, 12, 0, 0) + entity.updated_at = datetime(2025, 1, 1, 12, 0, 0) + return entity + + +class TestGetById: + """Tests para get_by_id.""" + + @pytest.fixture + def mock_session(self): + """Mock de SQLAlchemy Session.""" + return MagicMock(spec=Session) + + @pytest.fixture + def repo(self, mock_session): + """Instancia de UserRepository.""" + return UserRepository(mock_session) + + @pytest.fixture + def sample_user_entity(self): + """Entidad de usuario de prueba.""" + entity = MagicMock(spec=UserEntity) + entity.id = "user_123" + entity.email = "test@example.com" + entity.name = "Test User" + entity.role = UserRole.DEVELOPER + return entity + + def test_get_by_id_found(self, repo, mock_session, sample_user_entity): + """get_by_id retorna usuario si existe.""" + # Arrange + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.filter.return_value.first.return_value = sample_user_entity + + # Act + result = repo.get_by_id("user_123") + + # Assert + assert result == sample_user_entity + mock_session.query.assert_called_once_with(UserEntity) + + def test_get_by_id_not_found(self, repo, mock_session): + """get_by_id retorna None si usuario no existe.""" + # Arrange + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.filter.return_value.first.return_value = None + + # Act + result = repo.get_by_id("nonexistent_user") + + # Assert + assert result is None + + +class TestGetByEmail: + """Tests para get_by_email.""" + + @pytest.fixture + def mock_session(self): + """Mock de SQLAlchemy Session.""" + return MagicMock(spec=Session) + + @pytest.fixture + def repo(self, mock_session): + """Instancia de UserRepository.""" + return UserRepository(mock_session) + + @pytest.fixture + def sample_user_entity(self): + """Entidad de usuario de prueba.""" + entity = MagicMock(spec=UserEntity) + entity.id = "user_456" + entity.email = "found@example.com" + entity.name = "Found User" + entity.role = UserRole.DEVELOPER + return entity + + def test_get_by_email_found(self, repo, mock_session, sample_user_entity): + """get_by_email retorna usuario si existe.""" + # Arrange + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.filter.return_value.first.return_value = sample_user_entity + + # Act + result = repo.get_by_email("found@example.com") + + # Assert + assert result == sample_user_entity + assert result.email == "found@example.com" + + def test_get_by_email_not_found(self, repo, mock_session): + """get_by_email retorna None si email no existe.""" + # Arrange + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.filter.return_value.first.return_value = None + + # Act + result = repo.get_by_email("notfound@example.com") + + # Assert + assert result is None + + +class TestCreate: + """Tests para create.""" + + @pytest.fixture + def mock_session(self): + """Mock de SQLAlchemy Session.""" + return MagicMock(spec=Session) + + @pytest.fixture + def repo(self, mock_session): + """Instancia de UserRepository.""" + return UserRepository(mock_session) + + def test_create_user_success(self, repo, mock_session): + """create crea usuario y llama add, commit, refresh.""" + # Act + with patch("src.repositories.user_repo.datetime") as mock_datetime: + mock_datetime.utcnow.return_value = datetime(2025, 12, 1, 10, 0, 0) + result = repo.create( + user_id="new_user_123", + email="newuser@example.com", + name="New User", + avatar_url="https://example.com/avatar.png", + role=UserRole.DEVELOPER, + ) + + # Assert + mock_session.add.assert_called_once() + mock_session.commit.assert_called_once() + mock_session.refresh.assert_called_once() + + # Verificar que el usuario fue creado con los datos correctos + created_user = mock_session.add.call_args[0][0] + assert created_user.id == "new_user_123" + assert created_user.email == "newuser@example.com" + assert created_user.name == "New User" + assert created_user.avatar_url == "https://example.com/avatar.png" + assert created_user.role == UserRole.DEVELOPER + + def test_create_user_with_defaults(self, repo, mock_session): + """create usa valores por defecto correctamente.""" + # Act + with patch("src.repositories.user_repo.datetime") as mock_datetime: + mock_datetime.utcnow.return_value = datetime(2025, 12, 1, 10, 0, 0) + result = repo.create( + user_id="minimal_user", + email="minimal@example.com", + ) + + # Assert + created_user = mock_session.add.call_args[0][0] + assert created_user.id == "minimal_user" + assert created_user.email == "minimal@example.com" + assert created_user.name is None + assert created_user.avatar_url is None + assert created_user.role == UserRole.DEVELOPER + assert created_user.daily_analysis_count == 0 + + +class TestUpdate: + """Tests para update.""" + + @pytest.fixture + def mock_session(self): + """Mock de SQLAlchemy Session.""" + return MagicMock(spec=Session) + + @pytest.fixture + def repo(self, mock_session): + """Instancia de UserRepository.""" + return UserRepository(mock_session) + + @pytest.fixture + def existing_user(self): + """Usuario existente para actualizar.""" + user = MagicMock(spec=UserEntity) + user.id = "user_to_update" + user.email = "old@example.com" + user.name = "Old Name" + user.avatar_url = "https://old.com/avatar.png" + user.role = UserRole.DEVELOPER + return user + + def test_update_all_fields(self, repo, mock_session, existing_user): + """update actualiza todos los campos proporcionados.""" + # Act + with patch("src.repositories.user_repo.datetime") as mock_datetime: + mock_datetime.utcnow.return_value = datetime(2025, 12, 1, 15, 0, 0) + result = repo.update( + user=existing_user, + email="new@example.com", + name="New Name", + avatar_url="https://new.com/avatar.png", + ) + + # Assert + assert existing_user.email == "new@example.com" + assert existing_user.name == "New Name" + assert existing_user.avatar_url == "https://new.com/avatar.png" + mock_session.commit.assert_called_once() + mock_session.refresh.assert_called_once_with(existing_user) + + def test_update_partial_fields(self, repo, mock_session, existing_user): + """update solo actualiza campos proporcionados.""" + original_email = existing_user.email + original_avatar = existing_user.avatar_url + + # Act + with patch("src.repositories.user_repo.datetime") as mock_datetime: + mock_datetime.utcnow.return_value = datetime(2025, 12, 1, 15, 0, 0) + result = repo.update( + user=existing_user, + name="Only Name Changed", + ) + + # Assert + assert existing_user.name == "Only Name Changed" + # Email y avatar no deberían cambiar + mock_session.commit.assert_called_once() + + def test_update_no_fields(self, repo, mock_session, existing_user): + """update sin campos aún actualiza updated_at y hace commit.""" + # Act + with patch("src.repositories.user_repo.datetime") as mock_datetime: + mock_datetime.utcnow.return_value = datetime(2025, 12, 1, 15, 0, 0) + result = repo.update(user=existing_user) + + # Assert + mock_session.commit.assert_called_once() + mock_session.refresh.assert_called_once() + + +class TestDelete: + """Tests para delete.""" + + @pytest.fixture + def mock_session(self): + """Mock de SQLAlchemy Session.""" + return MagicMock(spec=Session) + + @pytest.fixture + def repo(self, mock_session): + """Instancia de UserRepository.""" + return UserRepository(mock_session) + + @pytest.fixture + def user_to_delete(self): + """Usuario para eliminar.""" + user = MagicMock(spec=UserEntity) + user.id = "user_to_delete" + user.email = "delete@example.com" + return user + + def test_delete_success(self, repo, mock_session, user_to_delete): + """delete elimina usuario y llama commit.""" + # Act + repo.delete(user_to_delete) + + # Assert + mock_session.delete.assert_called_once_with(user_to_delete) + mock_session.commit.assert_called_once() + + +class TestIncrementAnalysisCount: + """Tests para increment_analysis_count.""" + + @pytest.fixture + def mock_session(self): + """Mock de SQLAlchemy Session.""" + return MagicMock(spec=Session) + + @pytest.fixture + def repo(self, mock_session): + """Instancia de UserRepository.""" + return UserRepository(mock_session) + + @pytest.fixture + def user_with_count(self): + """Usuario con contador de análisis.""" + user = MagicMock(spec=UserEntity) + user.id = "counting_user" + user.daily_analysis_count = 5 + return user + + def test_increment_analysis_count_success(self, repo, mock_session, user_with_count): + """increment_analysis_count llama al método del usuario y hace commit.""" + # Act + result = repo.increment_analysis_count(user_with_count) + + # Assert + user_with_count.increment_analysis_count.assert_called_once() + mock_session.commit.assert_called_once() + mock_session.refresh.assert_called_once_with(user_with_count) diff --git a/backend/tests/unit/services/test_analysis_service.py b/backend/tests/unit/services/test_analysis_service.py index cace57a..e84487d 100644 --- a/backend/tests/unit/services/test_analysis_service.py +++ b/backend/tests/unit/services/test_analysis_service.py @@ -174,3 +174,70 @@ def test_calculate_quality_score_all_severities(service): ] score = service._calculate_quality_score(findings) assert score == 100 - (10 + 5 + 2 + 1 + 0) # 82 + + +# Tests de analyze_code (Integración de servicio) + + +@pytest.mark.asyncio +async def test_analyze_code_success(service, mock_repo): + """Prueba el flujo completo de analyze_code.""" + content = b"import os\n" * 6 + mock_file = AsyncMock(spec=UploadFile) + mock_file.filename = "valid.py" + mock_file.read.return_value = content + + # Mock all three agents used in analysis_service + with patch("src.services.analysis_service.SecurityAgent") as MockSecurityAgent, patch( + "src.services.analysis_service.StyleAgent" + ) as MockStyleAgent, patch("src.services.analysis_service.QualityAgent") as MockQualityAgent: + + mock_sec_instance = MockSecurityAgent.return_value + mock_sec_instance.analyze.return_value = [] + + mock_style_instance = MockStyleAgent.return_value + mock_style_instance.analyze.return_value = [] + + mock_qual_instance = MockQualityAgent.return_value + mock_qual_instance.analyze.return_value = [] + + mock_repo.create.return_value = MagicMock(status=ReviewStatus.COMPLETED) + + result = await service.analyze_code(mock_file, "user_123") + + assert result.status == ReviewStatus.COMPLETED + mock_repo.create.assert_called_once() + mock_sec_instance.analyze.assert_called_once() + mock_style_instance.analyze.assert_called_once() + mock_qual_instance.analyze.assert_called_once() + + +@pytest.mark.asyncio +async def test_analyze_code_agent_failure(service, mock_repo): + """Prueba que el análisis continúe si un agente falla.""" + content = b"import os\n" * 6 + mock_file = AsyncMock(spec=UploadFile) + mock_file.filename = "valid.py" + mock_file.read.return_value = content + + with patch("src.services.analysis_service.SecurityAgent") as MockSecurityAgent, patch( + "src.services.analysis_service.StyleAgent" + ) as MockStyleAgent, patch("src.services.analysis_service.QualityAgent") as MockQualityAgent: + + # Security agent fails (along with StyleAgent in same try block) + mock_sec_instance = MockSecurityAgent.return_value + mock_sec_instance.analyze.side_effect = Exception("Security Agent Failed") + + mock_style_instance = MockStyleAgent.return_value + mock_style_instance.analyze.return_value = [] + + # Quality agent succeeds + mock_qual_instance = MockQualityAgent.return_value + mock_qual_instance.analyze.return_value = [] + + mock_repo.create.return_value = MagicMock(status=ReviewStatus.COMPLETED) + + result = await service.analyze_code(mock_file, "user_123") + + assert result.status == ReviewStatus.COMPLETED + mock_repo.create.assert_called_once() diff --git a/backend/tests/unit/services/test_auth_service.py b/backend/tests/unit/services/test_auth_service.py index 1cef732..2aef7e1 100644 --- a/backend/tests/unit/services/test_auth_service.py +++ b/backend/tests/unit/services/test_auth_service.py @@ -1,100 +1,100 @@ -"""Tests para AuthService.""" - -from unittest.mock import MagicMock - -import pytest - -from src.external.clerk_client import ClerkTokenInvalidError -from src.models.enums.user_role import UserRole -from src.models.user import UserEntity -from src.schemas.user import Role, User -from src.services.auth_service import AuthService - - -class TestAuthService: - """Tests para AuthService.""" - - @pytest.fixture - def mock_clerk_client(self): - """Mock de ClerkClient.""" - return MagicMock() - - @pytest.fixture - def mock_user_repository(self): - """Mock de UserRepository.""" - return MagicMock() - - @pytest.fixture - def auth_service(self, mock_clerk_client, mock_user_repository): - """Crea instancia de AuthService con mocks.""" - return AuthService(mock_clerk_client, mock_user_repository) - - @pytest.fixture - def sample_user_entity(self): - """Crea una entidad de usuario de prueba.""" - entity = MagicMock(spec=UserEntity) - entity.id = "user_abc123" - entity.email = "test@example.com" - entity.name = "Test User" - entity.role = UserRole.DEVELOPER - return entity - - def test_login_user_creates_new_user( - self, auth_service, mock_clerk_client, mock_user_repository, sample_user_entity - ): - """login_user crea usuario si no existe.""" - mock_clerk_client.verify_token.return_value = { - "sub": "user_new", - "email": "new@example.com", - "name": "New User", - } - mock_user_repository.get_by_id.return_value = None - mock_user_repository.create.return_value = sample_user_entity - - result = auth_service.login_user("valid-token") - - assert isinstance(result, User) - mock_user_repository.get_by_id.assert_called_once_with("user_new") - mock_user_repository.create.assert_called_once() - - def test_login_user_updates_existing_user( - self, auth_service, mock_clerk_client, mock_user_repository, sample_user_entity - ): - """login_user actualiza usuario si ya existe.""" - mock_clerk_client.verify_token.return_value = { - "sub": "user_abc123", - "email": "updated@example.com", - "name": "Updated Name", - } - mock_user_repository.get_by_id.return_value = sample_user_entity - mock_user_repository.update.return_value = sample_user_entity - - result = auth_service.login_user("valid-token") - - assert isinstance(result, User) - mock_user_repository.update.assert_called_once() - mock_user_repository.create.assert_not_called() - - def test_login_user_invalid_token_raises( - self, auth_service, mock_clerk_client, mock_user_repository - ): - """login_user propaga error si token es inválido.""" - mock_clerk_client.verify_token.side_effect = ClerkTokenInvalidError("Invalid") - - with pytest.raises(ClerkTokenInvalidError): - auth_service.login_user("invalid-token") - - def test_get_user_from_token(self, auth_service, mock_clerk_client): - """get_user_from_token retorna User sin sincronizar BD.""" - mock_clerk_client.verify_token.return_value = { - "sub": "user_fromtoken", - "email": "fromtoken@example.com", - "name": "From Token", - } - - result = auth_service.get_user_from_token("valid-token") - - assert isinstance(result, User) - assert result.id == "user_fromtoken" - assert result.email == "fromtoken@example.com" - assert result.role == Role.DEVELOPER +"""Tests para AuthService.""" + +from unittest.mock import MagicMock + +import pytest + +from src.external.clerk_client import ClerkTokenInvalidError +from src.models.enums.user_role import UserRole +from src.models.user import UserEntity +from src.schemas.user import Role, User +from src.services.auth_service import AuthService + + +class TestAuthService: + """Tests para AuthService.""" + + @pytest.fixture + def mock_clerk_client(self): + """Mock de ClerkClient.""" + return MagicMock() + + @pytest.fixture + def mock_user_repository(self): + """Mock de UserRepository.""" + return MagicMock() + + @pytest.fixture + def auth_service(self, mock_clerk_client, mock_user_repository): + """Crea instancia de AuthService con mocks.""" + return AuthService(mock_clerk_client, mock_user_repository) + + @pytest.fixture + def sample_user_entity(self): + """Crea una entidad de usuario de prueba.""" + entity = MagicMock(spec=UserEntity) + entity.id = "user_abc123" + entity.email = "test@example.com" + entity.name = "Test User" + entity.role = UserRole.DEVELOPER + return entity + + def test_login_user_creates_new_user( + self, auth_service, mock_clerk_client, mock_user_repository, sample_user_entity + ): + """login_user crea usuario si no existe.""" + mock_clerk_client.verify_token.return_value = { + "sub": "user_new", + "email": "new@example.com", + "name": "New User", + } + mock_user_repository.get_by_id.return_value = None + mock_user_repository.create.return_value = sample_user_entity + + result = auth_service.login_user("valid-token") + + assert isinstance(result, User) + mock_user_repository.get_by_id.assert_called_once_with("user_new") + mock_user_repository.create.assert_called_once() + + def test_login_user_updates_existing_user( + self, auth_service, mock_clerk_client, mock_user_repository, sample_user_entity + ): + """login_user actualiza usuario si ya existe.""" + mock_clerk_client.verify_token.return_value = { + "sub": "user_abc123", + "email": "updated@example.com", + "name": "Updated Name", + } + mock_user_repository.get_by_id.return_value = sample_user_entity + mock_user_repository.update.return_value = sample_user_entity + + result = auth_service.login_user("valid-token") + + assert isinstance(result, User) + mock_user_repository.update.assert_called_once() + mock_user_repository.create.assert_not_called() + + def test_login_user_invalid_token_raises( + self, auth_service, mock_clerk_client, mock_user_repository + ): + """login_user propaga error si token es inválido.""" + mock_clerk_client.verify_token.side_effect = ClerkTokenInvalidError("Invalid") + + with pytest.raises(ClerkTokenInvalidError): + auth_service.login_user("invalid-token") + + def test_get_user_from_token(self, auth_service, mock_clerk_client): + """get_user_from_token retorna User sin sincronizar BD.""" + mock_clerk_client.verify_token.return_value = { + "sub": "user_fromtoken", + "email": "fromtoken@example.com", + "name": "From Token", + } + + result = auth_service.get_user_from_token("valid-token") + + assert isinstance(result, User) + assert result.id == "user_fromtoken" + assert result.email == "fromtoken@example.com" + assert result.role == Role.DEVELOPER diff --git a/backend/tests/unit/test_analysis_schemas.py b/backend/tests/unit/test_analysis_schemas.py index 11f1147..6aee1b3 100644 --- a/backend/tests/unit/test_analysis_schemas.py +++ b/backend/tests/unit/test_analysis_schemas.py @@ -1,289 +1,289 @@ -""" -Unit tests for Analysis Schemas -Tests para los esquemas de análisis -""" - -from datetime import datetime - -import pytest -from pydantic import ValidationError - -from src.schemas.analysis import AnalysisContext, AnalysisRequest, AnalysisResponse -from src.schemas.finding import Finding, Severity - - -class TestAnalysisContext: - """Tests para AnalysisContext schema.""" - - def test_create_valid_context(self): - """Test crear contexto válido.""" - context = AnalysisContext( - code_content="def hello():\n print('Hello')", filename="test.py" - ) - - assert context.code_content == "def hello():\n print('Hello')" - assert context.filename == "test.py" - assert context.language == "python" - assert context.analysis_id is not None - assert isinstance(context.created_at, datetime) - - def test_empty_code_raises_error(self): - """Test que código vacío lanza error.""" - with pytest.raises(ValidationError) as exc_info: - AnalysisContext(code_content="", filename="test.py") - - assert "code_content" in str(exc_info.value).lower() - - def test_whitespace_only_code_raises_error(self): - """Test que código solo con espacios lanza error.""" - with pytest.raises(ValidationError): - AnalysisContext(code_content=" \n ", filename="test.py") - - def test_invalid_filename_extension(self): - """Test que extensión no .py lanza error.""" - with pytest.raises(ValidationError) as exc_info: - AnalysisContext(code_content="code", filename="test.txt") - - assert "Python files" in str(exc_info.value) - - def test_short_filename_raises_error(self): - """Test que filename muy corto lanza error.""" - with pytest.raises(ValidationError): - AnalysisContext(code_content="code", filename=".p") - - def test_line_count_property(self): - """Test propiedad line_count.""" - context = AnalysisContext(code_content="line1\nline2\nline3", filename="test.py") - assert context.line_count == 3 - - def test_line_count_single_line(self): - """Test line_count con una línea.""" - context = AnalysisContext(code_content="single line", filename="test.py") - assert context.line_count == 1 - - def test_char_count_property(self): - """Test propiedad char_count.""" - context = AnalysisContext(code_content="hello world", filename="test.py") - assert context.char_count == 11 - - def test_add_metadata(self): - """Test agregar metadata.""" - context = AnalysisContext(code_content="code", filename="test.py") - - context.add_metadata("user_id", "123") - context.add_metadata("project", "CodeGuard") - - assert context.metadata["user_id"] == "123" - assert context.metadata["project"] == "CodeGuard" - - def test_metadata_persists_after_mutation(self): - """Test que metadata persiste después de mutación.""" - context = AnalysisContext(code_content="code", filename="test.py") - - context.add_metadata("key1", "value1") - assert "key1" in context.metadata - - context.add_metadata("key2", "value2") - assert "key1" in context.metadata # key1 todavía existe - - -class TestAnalysisRequest: - """Tests para AnalysisRequest schema.""" - - def test_create_valid_request(self): - """Test crear request válido.""" - request = AnalysisRequest(filename="app.py", code_content="def main():\n pass") - - assert request.filename == "app.py" - assert request.code_content == "def main():\n pass" - assert request.agents_config is None - - def test_request_with_agents_config(self): - """Test request con configuración de agentes.""" - config = {"security": True, "quality": True, "performance": False, "style": True} - request = AnalysisRequest(filename="app.py", code_content="code", agents_config=config) - - assert request.agents_config == config - assert request.agents_config["security"] is True - assert request.agents_config["performance"] is False - - -class TestAnalysisResponse: - """Tests para AnalysisResponse schema.""" - - def test_create_response(self): - """Test crear response.""" - from uuid import uuid4 - - analysis_id = uuid4() - response = AnalysisResponse( - analysis_id=analysis_id, - filename="app.py", - status="pending", - quality_score=85, - total_findings=3, - created_at=datetime.utcnow(), - ) - - assert response.analysis_id == analysis_id - assert response.filename == "app.py" - assert response.status == "pending" - assert response.quality_score == 85 - assert response.total_findings == 3 - - -class TestFinding: - """Tests para Finding schema.""" - - def test_create_valid_finding(self): - """Test crear finding válido.""" - finding = Finding( - severity=Severity.CRITICAL, - issue_type="dangerous_function", - message="Use of eval detected", - line_number=10, - agent_name="SecurityAgent", - ) - - assert finding.severity == Severity.CRITICAL - assert finding.issue_type == "dangerous_function" - assert finding.line_number == 10 - assert isinstance(finding.detected_at, datetime) - - def test_invalid_line_number_zero(self): - """Test que line_number < 1 lanza error.""" - with pytest.raises(ValidationError): - Finding( - severity=Severity.CRITICAL, - issue_type="test", - message="Test message", - line_number=0, - agent_name="TestAgent", - ) - - def test_invalid_line_number_negative(self): - """Test que line_number negativo lanza error.""" - with pytest.raises(ValidationError): - Finding( - severity=Severity.CRITICAL, - issue_type="test", - message="Test message", - line_number=-1, - agent_name="TestAgent", - ) - - def test_is_critical_property(self): - """Test propiedad is_critical.""" - critical = Finding( - severity=Severity.CRITICAL, - issue_type="test", - message="Test message", - line_number=1, - agent_name="TestAgent", - ) - - assert critical.is_critical is True - - non_critical = Finding( - severity=Severity.INFO, - issue_type="test", - message="Test message", - line_number=1, - agent_name="TestAgent", - ) - - assert non_critical.is_critical is False - - def test_is_high_or_critical_property(self): - """Test propiedad is_high_or_critical.""" - critical = Finding( - severity=Severity.CRITICAL, - issue_type="test", - message="Test message", - line_number=1, - agent_name="TestAgent", - ) - assert critical.is_high_or_critical is True - - high = Finding( - severity=Severity.HIGH, - issue_type="test", - message="Test message", - line_number=1, - agent_name="TestAgent", - ) - assert high.is_high_or_critical is True - - medium = Finding( - severity=Severity.MEDIUM, - issue_type="test", - message="Test message", - line_number=1, - agent_name="TestAgent", - ) - assert medium.is_high_or_critical is False - - def test_is_actionable_property(self): - """Test propiedad is_actionable.""" - critical = Finding( - severity=Severity.CRITICAL, - issue_type="test", - message="Test message", - line_number=1, - agent_name="TestAgent", - ) - assert critical.is_actionable is True - - info = Finding( - severity=Severity.INFO, - issue_type="test", - message="Test message", - line_number=1, - agent_name="TestAgent", - ) - assert info.is_actionable is False - - -class TestAnalysisContextHelpers: - def test_code_is_dedented_and_ast_cached(self): - context = AnalysisContext( - code_content=" def foo():\n return 1", - filename="foo.py", - ) - assert context.code_content.startswith("def foo") - first_ast = context.get_ast() - assert context.get_ast() is first_ast - - def test_get_ast_invalid_code_raises(self): - context = AnalysisContext(code_content="def broken(", filename="bad.py") - with pytest.raises(SyntaxError): - context.get_ast() - - def test_get_lines_and_snippets(self): - context = AnalysisContext(code_content="a\nb\nc", filename="file.py") - assert context.get_line(2) == "b" - assert context.get_line(99) is None - assert context.get_code_snippet(1, 2) == "a\nb" - - def test_finding_from_and_to_dict_without_detected_at(self): - data = { - "severity": "CRITICAL", - "issue_type": "dangerous_function", - "message": "Use of eval() detected", - "line_number": 5, - "agent_name": "SecurityAgent", - } - finding = Finding.from_dict(data) - serialized = finding.to_dict() - assert serialized["severity"] == "CRITICAL" - assert "detected_at" in serialized - - def test_calculate_penalty_map(self): - finding = Finding( - severity=Severity.HIGH, - issue_type="test", - message="Test issue", - line_number=1, - agent_name="TestAgent", - ) - assert finding.calculate_penalty() == 5 +""" +Unit tests for Analysis Schemas +Tests para los esquemas de análisis +""" + +from datetime import datetime + +import pytest +from pydantic import ValidationError + +from src.schemas.analysis import AnalysisContext, AnalysisRequest, AnalysisResponse +from src.schemas.finding import Finding, Severity + + +class TestAnalysisContext: + """Tests para AnalysisContext schema.""" + + def test_create_valid_context(self): + """Test crear contexto válido.""" + context = AnalysisContext( + code_content="def hello():\n print('Hello')", filename="test.py" + ) + + assert context.code_content == "def hello():\n print('Hello')" + assert context.filename == "test.py" + assert context.language == "python" + assert context.analysis_id is not None + assert isinstance(context.created_at, datetime) + + def test_empty_code_raises_error(self): + """Test que código vacío lanza error.""" + with pytest.raises(ValidationError) as exc_info: + AnalysisContext(code_content="", filename="test.py") + + assert "code_content" in str(exc_info.value).lower() + + def test_whitespace_only_code_raises_error(self): + """Test que código solo con espacios lanza error.""" + with pytest.raises(ValidationError): + AnalysisContext(code_content=" \n ", filename="test.py") + + def test_invalid_filename_extension(self): + """Test que extensión no .py lanza error.""" + with pytest.raises(ValidationError) as exc_info: + AnalysisContext(code_content="code", filename="test.txt") + + assert "Python files" in str(exc_info.value) + + def test_short_filename_raises_error(self): + """Test que filename muy corto lanza error.""" + with pytest.raises(ValidationError): + AnalysisContext(code_content="code", filename=".p") + + def test_line_count_property(self): + """Test propiedad line_count.""" + context = AnalysisContext(code_content="line1\nline2\nline3", filename="test.py") + assert context.line_count == 3 + + def test_line_count_single_line(self): + """Test line_count con una línea.""" + context = AnalysisContext(code_content="single line", filename="test.py") + assert context.line_count == 1 + + def test_char_count_property(self): + """Test propiedad char_count.""" + context = AnalysisContext(code_content="hello world", filename="test.py") + assert context.char_count == 11 + + def test_add_metadata(self): + """Test agregar metadata.""" + context = AnalysisContext(code_content="code", filename="test.py") + + context.add_metadata("user_id", "123") + context.add_metadata("project", "CodeGuard") + + assert context.metadata["user_id"] == "123" + assert context.metadata["project"] == "CodeGuard" + + def test_metadata_persists_after_mutation(self): + """Test que metadata persiste después de mutación.""" + context = AnalysisContext(code_content="code", filename="test.py") + + context.add_metadata("key1", "value1") + assert "key1" in context.metadata + + context.add_metadata("key2", "value2") + assert "key1" in context.metadata # key1 todavía existe + + +class TestAnalysisRequest: + """Tests para AnalysisRequest schema.""" + + def test_create_valid_request(self): + """Test crear request válido.""" + request = AnalysisRequest(filename="app.py", code_content="def main():\n pass") + + assert request.filename == "app.py" + assert request.code_content == "def main():\n pass" + assert request.agents_config is None + + def test_request_with_agents_config(self): + """Test request con configuración de agentes.""" + config = {"security": True, "quality": True, "performance": False, "style": True} + request = AnalysisRequest(filename="app.py", code_content="code", agents_config=config) + + assert request.agents_config == config + assert request.agents_config["security"] is True + assert request.agents_config["performance"] is False + + +class TestAnalysisResponse: + """Tests para AnalysisResponse schema.""" + + def test_create_response(self): + """Test crear response.""" + from uuid import uuid4 + + analysis_id = uuid4() + response = AnalysisResponse( + analysis_id=analysis_id, + filename="app.py", + status="pending", + quality_score=85, + total_findings=3, + created_at=datetime.utcnow(), + ) + + assert response.analysis_id == analysis_id + assert response.filename == "app.py" + assert response.status == "pending" + assert response.quality_score == 85 + assert response.total_findings == 3 + + +class TestFinding: + """Tests para Finding schema.""" + + def test_create_valid_finding(self): + """Test crear finding válido.""" + finding = Finding( + severity=Severity.CRITICAL, + issue_type="dangerous_function", + message="Use of eval detected", + line_number=10, + agent_name="SecurityAgent", + ) + + assert finding.severity == Severity.CRITICAL + assert finding.issue_type == "dangerous_function" + assert finding.line_number == 10 + assert isinstance(finding.detected_at, datetime) + + def test_invalid_line_number_zero(self): + """Test que line_number < 1 lanza error.""" + with pytest.raises(ValidationError): + Finding( + severity=Severity.CRITICAL, + issue_type="test", + message="Test message", + line_number=0, + agent_name="TestAgent", + ) + + def test_invalid_line_number_negative(self): + """Test que line_number negativo lanza error.""" + with pytest.raises(ValidationError): + Finding( + severity=Severity.CRITICAL, + issue_type="test", + message="Test message", + line_number=-1, + agent_name="TestAgent", + ) + + def test_is_critical_property(self): + """Test propiedad is_critical.""" + critical = Finding( + severity=Severity.CRITICAL, + issue_type="test", + message="Test message", + line_number=1, + agent_name="TestAgent", + ) + + assert critical.is_critical is True + + non_critical = Finding( + severity=Severity.INFO, + issue_type="test", + message="Test message", + line_number=1, + agent_name="TestAgent", + ) + + assert non_critical.is_critical is False + + def test_is_high_or_critical_property(self): + """Test propiedad is_high_or_critical.""" + critical = Finding( + severity=Severity.CRITICAL, + issue_type="test", + message="Test message", + line_number=1, + agent_name="TestAgent", + ) + assert critical.is_high_or_critical is True + + high = Finding( + severity=Severity.HIGH, + issue_type="test", + message="Test message", + line_number=1, + agent_name="TestAgent", + ) + assert high.is_high_or_critical is True + + medium = Finding( + severity=Severity.MEDIUM, + issue_type="test", + message="Test message", + line_number=1, + agent_name="TestAgent", + ) + assert medium.is_high_or_critical is False + + def test_is_actionable_property(self): + """Test propiedad is_actionable.""" + critical = Finding( + severity=Severity.CRITICAL, + issue_type="test", + message="Test message", + line_number=1, + agent_name="TestAgent", + ) + assert critical.is_actionable is True + + info = Finding( + severity=Severity.INFO, + issue_type="test", + message="Test message", + line_number=1, + agent_name="TestAgent", + ) + assert info.is_actionable is False + + +class TestAnalysisContextHelpers: + def test_code_is_dedented_and_ast_cached(self): + context = AnalysisContext( + code_content=" def foo():\n return 1", + filename="foo.py", + ) + assert context.code_content.startswith("def foo") + first_ast = context.get_ast() + assert context.get_ast() is first_ast + + def test_get_ast_invalid_code_raises(self): + context = AnalysisContext(code_content="def broken(", filename="bad.py") + with pytest.raises(SyntaxError): + context.get_ast() + + def test_get_lines_and_snippets(self): + context = AnalysisContext(code_content="a\nb\nc", filename="file.py") + assert context.get_line(2) == "b" + assert context.get_line(99) is None + assert context.get_code_snippet(1, 2) == "a\nb" + + def test_finding_from_and_to_dict_without_detected_at(self): + data = { + "severity": "CRITICAL", + "issue_type": "dangerous_function", + "message": "Use of eval() detected", + "line_number": 5, + "agent_name": "SecurityAgent", + } + finding = Finding.from_dict(data) + serialized = finding.to_dict() + assert serialized["severity"] == "CRITICAL" + assert "detected_at" in serialized + + def test_calculate_penalty_map(self): + finding = Finding( + severity=Severity.HIGH, + issue_type="test", + message="Test issue", + line_number=1, + agent_name="TestAgent", + ) + assert finding.calculate_penalty() == 5 diff --git a/backend/tests/unit/test_main.py b/backend/tests/unit/test_main.py index fbab9cf..23fd600 100644 --- a/backend/tests/unit/test_main.py +++ b/backend/tests/unit/test_main.py @@ -1,35 +1,35 @@ -""" -Tests for main FastAPI application -""" - -from fastapi.testclient import TestClient - -from src.main import app - -client = TestClient(app) - - -def test_health_endpoint(): - """Test health check endpoint returns 200""" - response = client.get("/health") - assert response.status_code == 200 - data = response.json() - assert data["status"] == "healthy" - assert data["version"] == "1.0.0" - assert "service" in data - - -def test_root_endpoint(): - """Test root endpoint returns 200""" - response = client.get("/") - assert response.status_code == 200 - data = response.json() - assert "message" in data - assert "docs" in data - assert data["docs"] == "/docs" - - -def test_docs_endpoint_accessible(): - """Test Swagger docs are accessible""" - response = client.get("/docs") - assert response.status_code == 200 +""" +Tests for main FastAPI application +""" + +from fastapi.testclient import TestClient + +from src.main import app + +client = TestClient(app) + + +def test_health_endpoint(): + """Test health check endpoint returns 200""" + response = client.get("/health") + assert response.status_code == 200 + data = response.json() + assert data["status"] == "healthy" + assert data["version"] == "1.0.0" + assert "service" in data + + +def test_root_endpoint(): + """Test root endpoint returns 200""" + response = client.get("/") + assert response.status_code == 200 + data = response.json() + assert "message" in data + assert "docs" in data + assert data["docs"] == "/docs" + + +def test_docs_endpoint_accessible(): + """Test Swagger docs are accessible""" + response = client.get("/docs") + assert response.status_code == 200 diff --git a/backend/tests/unit/vulnerable_test.py b/backend/tests/unit/vulnerable_test.py index 3ae4b84..30f1fee 100644 --- a/backend/tests/unit/vulnerable_test.py +++ b/backend/tests/unit/vulnerable_test.py @@ -1,19 +1,19 @@ -"""Archivo de prueba con vulnerabilidades.""" - -import os -import pickle - - -def unsafe_eval(user_input): - """Uso peligroso de eval.""" - return eval(user_input) - - -def unsafe_query(user_id): - """SQL injection vulnerability.""" - query = "SELECT * FROM users WHERE id = " + user_id - return query - - -PASSWORD = "super_secret_password_123" -API_KEY = "sk-1234567890abcdef" +"""Archivo de prueba con vulnerabilidades.""" + +import os +import pickle + + +def unsafe_eval(user_input): + """Uso peligroso de eval.""" + return eval(user_input) + + +def unsafe_query(user_id): + """SQL injection vulnerability.""" + query = "SELECT * FROM users WHERE id = " + user_id + return query + + +PASSWORD = "super_secret_password_123" +API_KEY = "sk-1234567890abcdef" diff --git a/docs/ci-cd-setup.md b/docs/ci-cd-setup.md index dd65d93..ed437b8 100644 --- a/docs/ci-cd-setup.md +++ b/docs/ci-cd-setup.md @@ -1,694 +1,694 @@ -# 🔧 Documentación Técnica del Pipeline CI/CD - CodeGuard AI - -Esta documentación detalla la configuración completa del pipeline de **Integración Continua / Despliegue Continuo (CI/CD)** implementado con **GitHub Actions** para CodeGuard AI. - ---- - -## 📋 Tabla de Contenidos - -- [Visión General](#-visión-general-del-pipeline) -- [Workflows Implementados](#-workflows-implementados) -- [Estructura de Directorios](#-estructura-de-archivos) -- [Protección de Ramas](#-protección-de-ramas) -- [Secretos y Variables](#-secretos-y-variables-de-entorno) -- [Configuración Detallada](#-configuración-detallada-de-workflows) -- [Badges de Estado](#-badges-de-estado) -- [Monitoreo](#-monitoreo-y-logging) -- [Troubleshooting](#-troubleshooting) -- [Mejores Prácticas](#-mejores-prácticas) - ---- - -## 🎯 Visión General del Pipeline - -El pipeline CI/CD de CodeGuard AI automatiza la **validación, testing y construcción** del código para garantizar que todos los cambios que llegan a las ramas `main` y `develop` cumplen con los estándares de calidad establecidos. - -### Objetivos del Pipeline - -1. ✅ **Validación Automática**: Linting, tests, build -2. ✅ **Garantía de Calidad**: Cobertura ≥75%, pylint ≥8.5/10 -3. ✅ **Prevención de Regresiones**: Tests obligatorios -4. ✅ **Feedback Inmediato**: En PRs y commits -5. ✅ **Deployment Seguro**: Build validado - -### Arquitectura del Pipeline - -``` -┌───────────────────────────────────────────────────────────┐ -│ GITHUB ACTIONS WORKFLOW ORCHESTRATION │ -└───────────────────────────────────────────────────────────┘ - ↓ - Trigger: push a rama / pull request - ↓ - ┌────────────────────────┬────────────────────┬──────────────┐ - ↓ ↓ ↓ ↓ -┌──────────────┐ ┌───────────────────┐ ┌─────────────┐ ┌──────────┐ -│ Lint Check │ │ Test & Coverage │ │ Docker Build│ │ Security │ -│ (lint.yml) │ │ (test.yml) │ │ (docker.yml)│ │ Scan │ -└──────────────┘ └───────────────────┘ └─────────────┘ └──────────┘ - ✅/❌ ✅/❌ ✅/❌ ✅/❌ - └────────────────────┬────────────────────┘ - ↓ - ┌──────────────────────┐ - │ Branch Protection │ - │ Status Checks │ - └──────────────────────┘ - ↓ - Merge Allowed? ✅ -``` - ---- - -## 🔄 Workflows Implementados - -### 1️⃣ Workflow: Lint Check (`lint.yml`) - -**Ubicación**: `.github/workflows/lint.yml` - -**Propósito**: Validar que el código cumple con estándares de estilo y calidad. - -**Triggers**: -- Push a ramas: `main`, `develop`, `feature/**`, `bugfix/**`, `hotfix/**` -- Pull requests hacia: `main`, `develop` - -**Herramientas**: -- **Black**: Formateo de código -- **isort**: Ordenamiento de imports -- **Flake8**: Análisis de PEP 8 y errores básicos -- **Pylint**: Análisis comprehensive de código - -**Configuración**: - -```yaml -name: Lint Code - -on: - push: - branches: [main, develop, "feature/**", "bugfix/**", "hotfix/**"] - paths: - - "backend/src/**/*.py" - - "backend/tests/**/*.py" - - ".github/workflows/lint.yml" - pull_request: - branches: [main, develop] - -jobs: - lint: - name: Code Quality Check - runs-on: ubuntu-latest - - steps: - # 1. Checkout código - - uses: actions/checkout@v4 - - # 2. Setup Python 3.11 - - uses: actions/setup-python@v5 - with: - python-version: "3.11" - cache: "pip" - - # 3. Instalar dependencias - - name: Install dependencies - run: | - cd backend - python -m pip install --upgrade pip - pip install black isort flake8 pylint - pip install -r requirements.txt - - # 4. Ejecutar Black (formatter) - - name: Run Black - run: | - cd backend - black src/ tests/ --line-length=100 --check - - # 5. Ejecutar isort - - name: Run isort - run: | - cd backend - isort src/ tests/ --profile=black --check-only - - # 6. Ejecutar Flake8 - - name: Run Flake8 - run: | - cd backend - flake8 src/ tests/ --max-line-length=100 --extend-ignore=E203,W503 - - # 7. Ejecutar Pylint - - name: Run Pylint - run: | - cd backend - pylint src/ --rcfile=.pylintrc --fail-under=8.5 || exit 1 - echo "✅ Pylint passed with score ≥8.5/10" - - # 8. Summary - - name: Summary - if: success() - run: echo "✅ All lint checks passed!" -``` - -**Criterios de Éxito**: -- ✅ Black: Sin cambios requeridos (--check) -- ✅ isort: Imports correctamente ordenados -- ✅ Flake8: Sin errores de estilo -- ✅ Pylint: Puntuación ≥ 8.5/10 - ---- - -### 2️⃣ Workflow: Testing & Coverage (`test.yml`) - -**Ubicación**: `.github/workflows/test.yml` - -**Propósito**: Ejecutar tests y validar cobertura de código. - -**Triggers**: -- Push a ramas: `main`, `develop`, `feature/**`, `bugfix/**`, `hotfix/**` -- Pull requests hacia: `main`, `develop` - -**Servicios**: -- PostgreSQL 15 (para tests de integración) -- Redis (cache layer) - -**Configuración**: - -```yaml -name: Tests & Coverage - -on: - push: - branches: [main, develop, "feature/**", "bugfix/**", "hotfix/**"] - paths: - - "backend/src/**/*.py" - - "backend/tests/**/*.py" - - "backend/requirements.txt" - - ".github/workflows/test.yml" - pull_request: - branches: [main, develop] - -jobs: - test: - name: Run Tests & Coverage - runs-on: ubuntu-latest - - strategy: - matrix: - python-version: ["3.11", "3.12"] # Test en múltiples versiones - - services: - postgres: - image: postgres:15-alpine - env: - POSTGRES_USER: codeguard_test - POSTGRES_PASSWORD: test_password - POSTGRES_DB: codeguard_test_db - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - - 5432:5432 - - redis: - image: redis:7-alpine - options: >- - --health-cmd "redis-cli ping" - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - - 6379:6379 - - steps: - # 1. Checkout código - - uses: actions/checkout@v4 - - # 2. Setup Python - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - cache: "pip" - - # 3. Instalar dependencias - - name: Install dependencies - run: | - cd backend - python -m pip install --upgrade pip - pip install pytest pytest-cov pytest-asyncio httpx - pip install -r requirements.txt - - # 4. Ejecutar tests - - name: Run tests with coverage - env: - DATABASE_URL: postgresql://codeguard_test:test_password@localhost:5432/codeguard_test_db - REDIS_URL: redis://localhost:6379/0 - run: | - cd backend - pytest tests/ \ - --cov=src \ - --cov-report=term-missing \ - --cov-report=xml \ - --cov-report=html \ - --cov-fail-under=75 \ - -v - - # 5. Subir cobertura a Codecov - - name: Upload to Codecov - uses: codecov/codecov-action@v4 - with: - file: backend/coverage.xml - flags: unittests - name: codecov-${{ matrix.python-version }} - fail_ci_if_error: false - - # 6. Guardar reporte HTML - - name: Upload coverage report - if: always() - uses: actions/upload-artifact@v4 - with: - name: coverage-report-py${{ matrix.python-version }} - path: backend/htmlcov/ - retention-days: 30 - - # 7. Summary - - name: Summary - if: success() - run: | - echo "✅ Tests passed!" - echo "📊 Coverage: ≥75%" -``` - -**Criterios de Éxito**: -- ✅ Todos los tests pasan -- ✅ Cobertura ≥ 75% -- ✅ Tests en Python 3.11 y 3.12 - ---- - -### 3️⃣ Workflow: Docker Build (`docker.yml`) - -**Ubicación**: `.github/workflows/docker.yml` - -**Propósito**: Validar que la imagen Docker se construye correctamente. - -**Triggers**: -- Push a: `main`, `develop` -- Pull requests hacia: `main`, `develop` - -**Configuración**: - -```yaml -name: Docker Build - -on: - push: - branches: [main, develop] - paths: - - "backend/Dockerfile" - - "backend/docker-compose.yml" - - "backend/requirements.txt" - - "backend/src/**/*.py" - - ".github/workflows/docker.yml" - pull_request: - branches: [main, develop] - -jobs: - build: - name: Build & Validate Docker Image - runs-on: ubuntu-latest - - steps: - # 1. Checkout - - uses: actions/checkout@v4 - - # 2. Setup Docker Buildx (mejor caché) - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - # 3. Build imagen - - name: Build Docker image - uses: docker/build-push-action@v5 - with: - context: backend/ - push: false - tags: codeguard-backend:${{ github.sha }} - cache-from: type=gha - cache-to: type=gha,mode=max - - # 4. Validar docker-compose - - name: Validate docker-compose - run: | - cd backend - docker-compose config > /dev/null - echo "✅ docker-compose.yml is valid" - - # 5. Test imagen (verificar que se puede ejecutar) - - name: Test Docker image - run: | - docker run --rm codeguard-backend:${{ github.sha }} python --version - docker run --rm codeguard-backend:${{ github.sha }} pip list | grep fastapi - echo "✅ Docker image validated" - - # 6. Scan vulnerabilidades (Trivy) - - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@master - with: - image-ref: codeguard-backend:${{ github.sha }} - format: sarif - output: trivy-results.sarif - exit-code: 0 # No bloquea si hay advertencias - - # 7. Upload Trivy results - - name: Upload Trivy results to GitHub Security - uses: github/codeql-action/upload-sarif@v3 - with: - sarif_file: trivy-results.sarif - - # 8. Summary - - name: Summary - if: success() - run: | - echo "✅ Docker build successful" - echo "Image: codeguard-backend:${{ github.sha }}" -``` - -**Criterios de Éxito**: -- ✅ Imagen Docker se construye sin errores -- ✅ Contiene Python y FastAPI -- ✅ Sin vulnerabilidades críticas - ---- - -## 📁 Estructura de Archivos - -``` -.github/ -├── workflows/ -│ ├── lint.yml # Linting workflow -│ ├── test.yml # Testing workflow -│ ├── docker.yml # Docker build workflow -│ └── deploy.yml # (Futuro) Deployment -│ -└── PULL_REQUEST_TEMPLATE.md # Template para PRs -``` - -### Archivo: `.github/PULL_REQUEST_TEMPLATE.md` - -```markdown -## 📝 Descripción -Descripción clara de los cambios realizados. - -## 🎯 Historia de Usuario Relacionada -Closes #XX (CGAI-XX) - -## 🧪 Testing -- [x] Tests unitarios agregados -- [x] Tests de integración -- [x] Coverage ≥75% - -## ✅ Checklist -- [x] He seguido las convenciones de commits -- [x] He agregado tests -- [x] Todos los tests pasan -- [x] He actualizado documentación -- [x] Mi código sigue las convenciones - -## 🔗 Related Issues -Closes #XX, #YY -``` - ---- - -## 🛡️ Protección de Ramas - -### Rama `main` (Producción) - -**Ubicación**: Settings → Branches → Add rule - -**Configuración**: - -| Regla | Estado | -|-------|--------| -| **Require pull request reviews** | ✅ Sí (1 aprobación) | -| **Dismiss stale PR approvals** | ✅ Sí | -| **Require status checks** | ✅ Sí: lint, test, docker | -| **Require branches up to date** | ✅ Sí | -| **Resolve conversations** | ✅ Sí | -| **Require signed commits** | ❌ No (opcional) | -| **Linear history** | ❌ No | -| **Allow force pushes** | ❌ No | -| **Allow deletions** | ❌ No | - -### Rama `develop` (Integración) - -**Configuración Similar a `main` pero**: -- Aprobaciones requeridas: 1 (no 2) -- Sin restricción de "quien puede pushear" - ---- - -## 🔐 Secretos y Variables de Entorno - -### Secretos Requeridos (GitHub Settings → Secrets) - -| Secreto | Descripción | Requerido | Usado en | -|---------|-------------|-----------|----------| -| `DATABASE_URL` | PostgreSQL connection string | ✅ Tests | test.yml | -| `REDIS_URL` | Redis connection string | ✅ Tests | test.yml | -| `SUPABASE_URL` | Supabase project URL | ✅ Producción | Aplicación | -| `SUPABASE_KEY` | Supabase API key | ✅ Producción | Aplicación | - -### Variables de Entorno (Públicas) - -```yaml -env: - PYTHON_VERSION: "3.11" - REGISTRY: ghcr.io - IMAGE_NAME: codeguard-backend -``` - -### Configurar Secretos - -```bash -# 1. Ir a GitHub Settings → Secrets and variables → Actions -# 2. Click "New repository secret" -# 3. Name: DATABASE_URL -# 4. Value: postgresql://user:pass@localhost:5432/codeguard_db -# 5. Click "Add secret" -``` - ---- - -## ⚙️ Configuración Detallada de Workflows - -### Caching de Dependencias - -```yaml -- uses: actions/setup-python@v5 - with: - python-version: "3.11" - cache: "pip" # Cache automático de pip -``` - -**Ventajas**: -- ✅ Reduce tiempo de instalación de dependencias -- ✅ Acelera workflow ~2-3 minutos - -### Matrix Testing (Múltiples Versiones) - -```yaml -strategy: - matrix: - python-version: ["3.11", "3.12"] - os: [ubuntu-latest, macos-latest] # (Futuro) -``` - -**Ventajas**: -- ✅ Prueba en múltiples versiones -- ✅ Garantiza compatibilidad - -### Condicionales en Steps - -```yaml -- name: Deploy to production - if: github.ref == 'refs/heads/main' && github.event_name == 'push' - run: echo "Deploying..." - -- name: Upload artifacts - if: always() # Siempre, incluso si fallaron pasos anteriores - uses: actions/upload-artifact@v4 -``` - ---- - -## 📊 Badges de Estado - -### Agregar Badges al README - -En `README.md` (raíz del proyecto): - -```markdown -[![Lint](https://github.com/YOUR_ORG/CodeGuard-Unal/actions/workflows/lint.yml/badge.svg?branch=main)](https://github.com/YOUR_ORG/CodeGuard-Unal/actions/workflows/lint.yml) -[![Tests](https://github.com/YOUR_ORG/CodeGuard-Unal/actions/workflows/test.yml/badge.svg?branch=main)](https://github.com/YOUR_ORG/CodeGuard-Unal/actions/workflows/test.yml) -[![Docker](https://github.com/YOUR_ORG/CodeGuard-Unal/actions/workflows/docker.yml/badge.svg?branch=main)](https://github.com/YOUR_ORG/CodeGuard-Unal/actions/workflows/docker.yml) -``` - -### Generar Automáticamente - -```bash -# En GitHub: -# 1. Actions → Seleccionar workflow (ej: Lint Code) -# 2. Click "..." → "Create status badge" -# 3. Seleccionar rama (main) -# 4. Copy markdown -# 5. Pegar en README.md -``` - ---- - -## 📈 Monitoreo y Logging - -### Ver Logs de Workflows - -```bash -# En GitHub: -# 1. Actions → Seleccionar workflow run -# 2. Jobs → Seleccionar job -# 3. Step → Expandir para ver logs detallados -``` - -### Debugging de Workflows - -```yaml -- name: Debug info - run: | - echo "GitHub context:" - echo " ref: ${{ github.ref }}" - echo " sha: ${{ github.sha }}" - echo " event: ${{ github.event_name }}" -``` - ---- - -## 🔧 Troubleshooting - -### ❌ Problema: "lint.yml" falla por formato - -**Síntoma**: -``` -black: error: cannot format backend/src/file.py -``` - -**Solución**: -```bash -cd backend -black src/ --line-length=100 -git add . -git commit -m "style: format code with black" -``` - -### ❌ Problema: Tests fallan solo en CI - -**Causas comunes**: -1. Falta variable de entorno -2. Diferencia de BD (CI usa BD limpia) -3. Race conditions en tests async - -**Soluciones**: -```bash -# Verificar env vars en workflow -# Añadir fixtures para resetear BD -# Usar pytest-asyncio correctamente -pytest tests/ -v --tb=short -``` - -### ❌ Problema: Docker build timeout - -**Solución**: Usar caché: -```yaml -cache-from: type=gha -cache-to: type=gha,mode=max -``` - -### ❌ Problema: Coverage no alcanza 75% - -**Pasos**: -1. Generar reporte: `pytest --cov=src --cov-report=html` -2. Abrir `htmlcov/index.html` -3. Identificar archivos sin cobertura -4. Escribir tests adicionales - ---- - -## 🎯 Mejores Prácticas - -### 1. Commits Pequeños y Frecuentes - -```bash -# ✅ Bien -git commit -m "feat(agents): add eval detection" -git commit -m "test(agents): add eval tests" -git commit -m "docs(readme): update examples" - -# ❌ Evitar -git commit -m "Add features, fix bugs, update docs" -``` - -### 2. Ejecutar Tests Localmente Antes de Push - -```bash -cd backend -pytest tests/ --cov=src --cov-fail-under=75 -pylint src/ --rcfile=.pylintrc --fail-under=8.5 -``` - -### 3. Mantener Workflows Rápidos - -| Métrica | Objetivo | -|---------|----------| -| Lint | < 1 min | -| Tests | < 5 min | -| Docker Build | < 3 min | -| Total | < 10 min | - -**Optimizaciones**: -- ✅ Cache de pip -- ✅ Cache de Docker layers -- ✅ Paralelización de tests - -### 4. Revisar Logs Detallados - -Ante un fallo: -1. Expandir todos los steps -2. Buscar el primer error (🔴 rojo) -3. Copiar comando y ejecutar localmente - -### 5. Documentar Cambios en CI - -```bash -git commit -m "ci(github): add Docker Trivy scanning - -- Scan for CRITICAL and HIGH vulnerabilities -- Upload results to GitHub Security -- Non-blocking (warnings allowed) - -Relates to security hardening" -``` - ---- - -## 📚 Referencias - -- [GitHub Actions Documentation](https://docs.github.com/en/actions) -- [Branch Protection Rules](https://docs.github.com/en/repositories/configuring-branches-and-merges) -- [Workflow Syntax](https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions) -- [Pytest Documentation](https://docs.pytest.org/) -- [Docker Best Practices](https://docs.docker.com/develop/dev-best-practices/) - ---- - -
-

Documentación del Pipeline CI/CD - CodeGuard AI

-

Universidad Nacional de Colombia - 2025

-

Última actualización: 6 de Noviembre de 2025

-
+# 🔧 Documentación Técnica del Pipeline CI/CD - CodeGuard AI + +Esta documentación detalla la configuración completa del pipeline de **Integración Continua / Despliegue Continuo (CI/CD)** implementado con **GitHub Actions** para CodeGuard AI. + +--- + +## 📋 Tabla de Contenidos + +- [Visión General](#-visión-general-del-pipeline) +- [Workflows Implementados](#-workflows-implementados) +- [Estructura de Directorios](#-estructura-de-archivos) +- [Protección de Ramas](#-protección-de-ramas) +- [Secretos y Variables](#-secretos-y-variables-de-entorno) +- [Configuración Detallada](#-configuración-detallada-de-workflows) +- [Badges de Estado](#-badges-de-estado) +- [Monitoreo](#-monitoreo-y-logging) +- [Troubleshooting](#-troubleshooting) +- [Mejores Prácticas](#-mejores-prácticas) + +--- + +## 🎯 Visión General del Pipeline + +El pipeline CI/CD de CodeGuard AI automatiza la **validación, testing y construcción** del código para garantizar que todos los cambios que llegan a las ramas `main` y `develop` cumplen con los estándares de calidad establecidos. + +### Objetivos del Pipeline + +1. ✅ **Validación Automática**: Linting, tests, build +2. ✅ **Garantía de Calidad**: Cobertura ≥75%, pylint ≥8.5/10 +3. ✅ **Prevención de Regresiones**: Tests obligatorios +4. ✅ **Feedback Inmediato**: En PRs y commits +5. ✅ **Deployment Seguro**: Build validado + +### Arquitectura del Pipeline + +``` +┌───────────────────────────────────────────────────────────┐ +│ GITHUB ACTIONS WORKFLOW ORCHESTRATION │ +└───────────────────────────────────────────────────────────┘ + ↓ + Trigger: push a rama / pull request + ↓ + ┌────────────────────────┬────────────────────┬──────────────┐ + ↓ ↓ ↓ ↓ +┌──────────────┐ ┌───────────────────┐ ┌─────────────┐ ┌──────────┐ +│ Lint Check │ │ Test & Coverage │ │ Docker Build│ │ Security │ +│ (lint.yml) │ │ (test.yml) │ │ (docker.yml)│ │ Scan │ +└──────────────┘ └───────────────────┘ └─────────────┘ └──────────┘ + ✅/❌ ✅/❌ ✅/❌ ✅/❌ + └────────────────────┬────────────────────┘ + ↓ + ┌──────────────────────┐ + │ Branch Protection │ + │ Status Checks │ + └──────────────────────┘ + ↓ + Merge Allowed? ✅ +``` + +--- + +## 🔄 Workflows Implementados + +### 1️⃣ Workflow: Lint Check (`lint.yml`) + +**Ubicación**: `.github/workflows/lint.yml` + +**Propósito**: Validar que el código cumple con estándares de estilo y calidad. + +**Triggers**: +- Push a ramas: `main`, `develop`, `feature/**`, `bugfix/**`, `hotfix/**` +- Pull requests hacia: `main`, `develop` + +**Herramientas**: +- **Black**: Formateo de código +- **isort**: Ordenamiento de imports +- **Flake8**: Análisis de PEP 8 y errores básicos +- **Pylint**: Análisis comprehensive de código + +**Configuración**: + +```yaml +name: Lint Code + +on: + push: + branches: [main, develop, "feature/**", "bugfix/**", "hotfix/**"] + paths: + - "backend/src/**/*.py" + - "backend/tests/**/*.py" + - ".github/workflows/lint.yml" + pull_request: + branches: [main, develop] + +jobs: + lint: + name: Code Quality Check + runs-on: ubuntu-latest + + steps: + # 1. Checkout código + - uses: actions/checkout@v4 + + # 2. Setup Python 3.11 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: "pip" + + # 3. Instalar dependencias + - name: Install dependencies + run: | + cd backend + python -m pip install --upgrade pip + pip install black isort flake8 pylint + pip install -r requirements.txt + + # 4. Ejecutar Black (formatter) + - name: Run Black + run: | + cd backend + black src/ tests/ --line-length=100 --check + + # 5. Ejecutar isort + - name: Run isort + run: | + cd backend + isort src/ tests/ --profile=black --check-only + + # 6. Ejecutar Flake8 + - name: Run Flake8 + run: | + cd backend + flake8 src/ tests/ --max-line-length=100 --extend-ignore=E203,W503 + + # 7. Ejecutar Pylint + - name: Run Pylint + run: | + cd backend + pylint src/ --rcfile=.pylintrc --fail-under=8.5 || exit 1 + echo "✅ Pylint passed with score ≥8.5/10" + + # 8. Summary + - name: Summary + if: success() + run: echo "✅ All lint checks passed!" +``` + +**Criterios de Éxito**: +- ✅ Black: Sin cambios requeridos (--check) +- ✅ isort: Imports correctamente ordenados +- ✅ Flake8: Sin errores de estilo +- ✅ Pylint: Puntuación ≥ 8.5/10 + +--- + +### 2️⃣ Workflow: Testing & Coverage (`test.yml`) + +**Ubicación**: `.github/workflows/test.yml` + +**Propósito**: Ejecutar tests y validar cobertura de código. + +**Triggers**: +- Push a ramas: `main`, `develop`, `feature/**`, `bugfix/**`, `hotfix/**` +- Pull requests hacia: `main`, `develop` + +**Servicios**: +- PostgreSQL 15 (para tests de integración) +- Redis (cache layer) + +**Configuración**: + +```yaml +name: Tests & Coverage + +on: + push: + branches: [main, develop, "feature/**", "bugfix/**", "hotfix/**"] + paths: + - "backend/src/**/*.py" + - "backend/tests/**/*.py" + - "backend/requirements.txt" + - ".github/workflows/test.yml" + pull_request: + branches: [main, develop] + +jobs: + test: + name: Run Tests & Coverage + runs-on: ubuntu-latest + + strategy: + matrix: + python-version: ["3.11", "3.12"] # Test en múltiples versiones + + services: + postgres: + image: postgres:15-alpine + env: + POSTGRES_USER: codeguard_test + POSTGRES_PASSWORD: test_password + POSTGRES_DB: codeguard_test_db + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 5432:5432 + + redis: + image: redis:7-alpine + options: >- + --health-cmd "redis-cli ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 6379:6379 + + steps: + # 1. Checkout código + - uses: actions/checkout@v4 + + # 2. Setup Python + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + + # 3. Instalar dependencias + - name: Install dependencies + run: | + cd backend + python -m pip install --upgrade pip + pip install pytest pytest-cov pytest-asyncio httpx + pip install -r requirements.txt + + # 4. Ejecutar tests + - name: Run tests with coverage + env: + DATABASE_URL: postgresql://codeguard_test:test_password@localhost:5432/codeguard_test_db + REDIS_URL: redis://localhost:6379/0 + run: | + cd backend + pytest tests/ \ + --cov=src \ + --cov-report=term-missing \ + --cov-report=xml \ + --cov-report=html \ + --cov-fail-under=75 \ + -v + + # 5. Subir cobertura a Codecov + - name: Upload to Codecov + uses: codecov/codecov-action@v4 + with: + file: backend/coverage.xml + flags: unittests + name: codecov-${{ matrix.python-version }} + fail_ci_if_error: false + + # 6. Guardar reporte HTML + - name: Upload coverage report + if: always() + uses: actions/upload-artifact@v4 + with: + name: coverage-report-py${{ matrix.python-version }} + path: backend/htmlcov/ + retention-days: 30 + + # 7. Summary + - name: Summary + if: success() + run: | + echo "✅ Tests passed!" + echo "📊 Coverage: ≥75%" +``` + +**Criterios de Éxito**: +- ✅ Todos los tests pasan +- ✅ Cobertura ≥ 75% +- ✅ Tests en Python 3.11 y 3.12 + +--- + +### 3️⃣ Workflow: Docker Build (`docker.yml`) + +**Ubicación**: `.github/workflows/docker.yml` + +**Propósito**: Validar que la imagen Docker se construye correctamente. + +**Triggers**: +- Push a: `main`, `develop` +- Pull requests hacia: `main`, `develop` + +**Configuración**: + +```yaml +name: Docker Build + +on: + push: + branches: [main, develop] + paths: + - "backend/Dockerfile" + - "backend/docker-compose.yml" + - "backend/requirements.txt" + - "backend/src/**/*.py" + - ".github/workflows/docker.yml" + pull_request: + branches: [main, develop] + +jobs: + build: + name: Build & Validate Docker Image + runs-on: ubuntu-latest + + steps: + # 1. Checkout + - uses: actions/checkout@v4 + + # 2. Setup Docker Buildx (mejor caché) + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + # 3. Build imagen + - name: Build Docker image + uses: docker/build-push-action@v5 + with: + context: backend/ + push: false + tags: codeguard-backend:${{ github.sha }} + cache-from: type=gha + cache-to: type=gha,mode=max + + # 4. Validar docker-compose + - name: Validate docker-compose + run: | + cd backend + docker-compose config > /dev/null + echo "✅ docker-compose.yml is valid" + + # 5. Test imagen (verificar que se puede ejecutar) + - name: Test Docker image + run: | + docker run --rm codeguard-backend:${{ github.sha }} python --version + docker run --rm codeguard-backend:${{ github.sha }} pip list | grep fastapi + echo "✅ Docker image validated" + + # 6. Scan vulnerabilidades (Trivy) + - name: Run Trivy vulnerability scanner + uses: aquasecurity/trivy-action@master + with: + image-ref: codeguard-backend:${{ github.sha }} + format: sarif + output: trivy-results.sarif + exit-code: 0 # No bloquea si hay advertencias + + # 7. Upload Trivy results + - name: Upload Trivy results to GitHub Security + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: trivy-results.sarif + + # 8. Summary + - name: Summary + if: success() + run: | + echo "✅ Docker build successful" + echo "Image: codeguard-backend:${{ github.sha }}" +``` + +**Criterios de Éxito**: +- ✅ Imagen Docker se construye sin errores +- ✅ Contiene Python y FastAPI +- ✅ Sin vulnerabilidades críticas + +--- + +## 📁 Estructura de Archivos + +``` +.github/ +├── workflows/ +│ ├── lint.yml # Linting workflow +│ ├── test.yml # Testing workflow +│ ├── docker.yml # Docker build workflow +│ └── deploy.yml # (Futuro) Deployment +│ +└── PULL_REQUEST_TEMPLATE.md # Template para PRs +``` + +### Archivo: `.github/PULL_REQUEST_TEMPLATE.md` + +```markdown +## 📝 Descripción +Descripción clara de los cambios realizados. + +## 🎯 Historia de Usuario Relacionada +Closes #XX (CGAI-XX) + +## 🧪 Testing +- [x] Tests unitarios agregados +- [x] Tests de integración +- [x] Coverage ≥75% + +## ✅ Checklist +- [x] He seguido las convenciones de commits +- [x] He agregado tests +- [x] Todos los tests pasan +- [x] He actualizado documentación +- [x] Mi código sigue las convenciones + +## 🔗 Related Issues +Closes #XX, #YY +``` + +--- + +## 🛡️ Protección de Ramas + +### Rama `main` (Producción) + +**Ubicación**: Settings → Branches → Add rule + +**Configuración**: + +| Regla | Estado | +|-------|--------| +| **Require pull request reviews** | ✅ Sí (1 aprobación) | +| **Dismiss stale PR approvals** | ✅ Sí | +| **Require status checks** | ✅ Sí: lint, test, docker | +| **Require branches up to date** | ✅ Sí | +| **Resolve conversations** | ✅ Sí | +| **Require signed commits** | ❌ No (opcional) | +| **Linear history** | ❌ No | +| **Allow force pushes** | ❌ No | +| **Allow deletions** | ❌ No | + +### Rama `develop` (Integración) + +**Configuración Similar a `main` pero**: +- Aprobaciones requeridas: 1 (no 2) +- Sin restricción de "quien puede pushear" + +--- + +## 🔐 Secretos y Variables de Entorno + +### Secretos Requeridos (GitHub Settings → Secrets) + +| Secreto | Descripción | Requerido | Usado en | +|---------|-------------|-----------|----------| +| `DATABASE_URL` | PostgreSQL connection string | ✅ Tests | test.yml | +| `REDIS_URL` | Redis connection string | ✅ Tests | test.yml | +| `SUPABASE_URL` | Supabase project URL | ✅ Producción | Aplicación | +| `SUPABASE_KEY` | Supabase API key | ✅ Producción | Aplicación | + +### Variables de Entorno (Públicas) + +```yaml +env: + PYTHON_VERSION: "3.11" + REGISTRY: ghcr.io + IMAGE_NAME: codeguard-backend +``` + +### Configurar Secretos + +```bash +# 1. Ir a GitHub Settings → Secrets and variables → Actions +# 2. Click "New repository secret" +# 3. Name: DATABASE_URL +# 4. Value: postgresql://user:pass@localhost:5432/codeguard_db +# 5. Click "Add secret" +``` + +--- + +## ⚙️ Configuración Detallada de Workflows + +### Caching de Dependencias + +```yaml +- uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: "pip" # Cache automático de pip +``` + +**Ventajas**: +- ✅ Reduce tiempo de instalación de dependencias +- ✅ Acelera workflow ~2-3 minutos + +### Matrix Testing (Múltiples Versiones) + +```yaml +strategy: + matrix: + python-version: ["3.11", "3.12"] + os: [ubuntu-latest, macos-latest] # (Futuro) +``` + +**Ventajas**: +- ✅ Prueba en múltiples versiones +- ✅ Garantiza compatibilidad + +### Condicionales en Steps + +```yaml +- name: Deploy to production + if: github.ref == 'refs/heads/main' && github.event_name == 'push' + run: echo "Deploying..." + +- name: Upload artifacts + if: always() # Siempre, incluso si fallaron pasos anteriores + uses: actions/upload-artifact@v4 +``` + +--- + +## 📊 Badges de Estado + +### Agregar Badges al README + +En `README.md` (raíz del proyecto): + +```markdown +[![Lint](https://github.com/YOUR_ORG/CodeGuard-Unal/actions/workflows/lint.yml/badge.svg?branch=main)](https://github.com/YOUR_ORG/CodeGuard-Unal/actions/workflows/lint.yml) +[![Tests](https://github.com/YOUR_ORG/CodeGuard-Unal/actions/workflows/test.yml/badge.svg?branch=main)](https://github.com/YOUR_ORG/CodeGuard-Unal/actions/workflows/test.yml) +[![Docker](https://github.com/YOUR_ORG/CodeGuard-Unal/actions/workflows/docker.yml/badge.svg?branch=main)](https://github.com/YOUR_ORG/CodeGuard-Unal/actions/workflows/docker.yml) +``` + +### Generar Automáticamente + +```bash +# En GitHub: +# 1. Actions → Seleccionar workflow (ej: Lint Code) +# 2. Click "..." → "Create status badge" +# 3. Seleccionar rama (main) +# 4. Copy markdown +# 5. Pegar en README.md +``` + +--- + +## 📈 Monitoreo y Logging + +### Ver Logs de Workflows + +```bash +# En GitHub: +# 1. Actions → Seleccionar workflow run +# 2. Jobs → Seleccionar job +# 3. Step → Expandir para ver logs detallados +``` + +### Debugging de Workflows + +```yaml +- name: Debug info + run: | + echo "GitHub context:" + echo " ref: ${{ github.ref }}" + echo " sha: ${{ github.sha }}" + echo " event: ${{ github.event_name }}" +``` + +--- + +## 🔧 Troubleshooting + +### ❌ Problema: "lint.yml" falla por formato + +**Síntoma**: +``` +black: error: cannot format backend/src/file.py +``` + +**Solución**: +```bash +cd backend +black src/ --line-length=100 +git add . +git commit -m "style: format code with black" +``` + +### ❌ Problema: Tests fallan solo en CI + +**Causas comunes**: +1. Falta variable de entorno +2. Diferencia de BD (CI usa BD limpia) +3. Race conditions en tests async + +**Soluciones**: +```bash +# Verificar env vars en workflow +# Añadir fixtures para resetear BD +# Usar pytest-asyncio correctamente +pytest tests/ -v --tb=short +``` + +### ❌ Problema: Docker build timeout + +**Solución**: Usar caché: +```yaml +cache-from: type=gha +cache-to: type=gha,mode=max +``` + +### ❌ Problema: Coverage no alcanza 75% + +**Pasos**: +1. Generar reporte: `pytest --cov=src --cov-report=html` +2. Abrir `htmlcov/index.html` +3. Identificar archivos sin cobertura +4. Escribir tests adicionales + +--- + +## 🎯 Mejores Prácticas + +### 1. Commits Pequeños y Frecuentes + +```bash +# ✅ Bien +git commit -m "feat(agents): add eval detection" +git commit -m "test(agents): add eval tests" +git commit -m "docs(readme): update examples" + +# ❌ Evitar +git commit -m "Add features, fix bugs, update docs" +``` + +### 2. Ejecutar Tests Localmente Antes de Push + +```bash +cd backend +pytest tests/ --cov=src --cov-fail-under=75 +pylint src/ --rcfile=.pylintrc --fail-under=8.5 +``` + +### 3. Mantener Workflows Rápidos + +| Métrica | Objetivo | +|---------|----------| +| Lint | < 1 min | +| Tests | < 5 min | +| Docker Build | < 3 min | +| Total | < 10 min | + +**Optimizaciones**: +- ✅ Cache de pip +- ✅ Cache de Docker layers +- ✅ Paralelización de tests + +### 4. Revisar Logs Detallados + +Ante un fallo: +1. Expandir todos los steps +2. Buscar el primer error (🔴 rojo) +3. Copiar comando y ejecutar localmente + +### 5. Documentar Cambios en CI + +```bash +git commit -m "ci(github): add Docker Trivy scanning + +- Scan for CRITICAL and HIGH vulnerabilities +- Upload results to GitHub Security +- Non-blocking (warnings allowed) + +Relates to security hardening" +``` + +--- + +## 📚 Referencias + +- [GitHub Actions Documentation](https://docs.github.com/en/actions) +- [Branch Protection Rules](https://docs.github.com/en/repositories/configuring-branches-and-merges) +- [Workflow Syntax](https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions) +- [Pytest Documentation](https://docs.pytest.org/) +- [Docker Best Practices](https://docs.docker.com/develop/dev-best-practices/) + +--- + +
+

Documentación del Pipeline CI/CD - CodeGuard AI

+

Universidad Nacional de Colombia - 2025

+

Última actualización: 6 de Noviembre de 2025

+
From 0ad0f7ec87a63cd97fe7cf835f560ff9ab5bee0a Mon Sep 17 00:00:00 2001 From: Yosoyepa Date: Thu, 4 Dec 2025 06:12:03 -0500 Subject: [PATCH 3/3] fix(tests): standardize severity case in PerformanceAgent tests and improve formatting --- backend/src/agents/performance_agent.py | 2 +- .../test_performance_agent_integration.py | 21 ++++++++------ .../unit/agents/test_performance_agent.py | 28 +++++++++++-------- backend/tests/unit/test_vulnerable.py | 2 ++ 4 files changed, 32 insertions(+), 21 deletions(-) diff --git a/backend/src/agents/performance_agent.py b/backend/src/agents/performance_agent.py index 24cbfe2..974c9ce 100644 --- a/backend/src/agents/performance_agent.py +++ b/backend/src/agents/performance_agent.py @@ -182,7 +182,7 @@ def analyze(self, context: AnalysisContext) -> List[Finding]: # Ordenar hallazgos por severidad (CRITICAL primero) findings.sort( key=lambda f: ( - ["critical", "high", "medium", "low", "info"].index(f.severity.value) + ["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"].index(f.severity.value) ) ) diff --git a/backend/tests/integration/test_performance_agent_integration.py b/backend/tests/integration/test_performance_agent_integration.py index f60ee20..1ac9617 100644 --- a/backend/tests/integration/test_performance_agent_integration.py +++ b/backend/tests/integration/test_performance_agent_integration.py @@ -5,9 +5,10 @@ and verifies end-to-end behavior. """ -import pytest import time +import pytest + from src.agents.performance_agent import PerformanceAgent from src.schemas.analysis import AnalysisContext from src.schemas.finding import Severity @@ -78,7 +79,9 @@ def terrible_complexity(data): def test_comprehensive_performance_detection(self, agent, inefficient_data_processing_code): """Test detection of all performance issues in realistic code.""" - context = AnalysisContext(code_content=inefficient_data_processing_code, filename="data_processor.py") + context = AnalysisContext( + code_content=inefficient_data_processing_code, filename="data_processor.py" + ) findings = agent.analyze(context) @@ -106,8 +109,8 @@ def test_comprehensive_performance_detection(self, agent, inefficient_data_proce # Verify findings are sorted by severity severities = [f.severity.value for f in findings] - expected_order = ["critical", "high", "medium", "low", "info"] - + expected_order = ["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"] + # Check if sorted correctly (indices in expected_order should be non-decreasing) indices = [expected_order.index(s) for s in severities] assert indices == sorted(indices) @@ -190,17 +193,17 @@ def test_large_file_performance(self, agent): for i in range(500): lines.append(f" x_{i} = {i} * 2") lines.append(" return x_499") - + code_content = "\n".join(lines) - + context = AnalysisContext(code_content=code_content, filename="large_clean.py") - + start_time = time.time() findings = agent.analyze(context) end_time = time.time() - + execution_time = end_time - start_time - + # Should be fast assert execution_time < 1.0 assert len(findings) == 0 diff --git a/backend/tests/unit/agents/test_performance_agent.py b/backend/tests/unit/agents/test_performance_agent.py index b496024..45a78af 100644 --- a/backend/tests/unit/agents/test_performance_agent.py +++ b/backend/tests/unit/agents/test_performance_agent.py @@ -9,9 +9,10 @@ 4. Robust error handling and edge cases. """ -import pytest -from unittest.mock import MagicMock, patch import ast +from unittest.mock import MagicMock, patch + +import pytest from src.agents.performance_agent import PerformanceAgent from src.schemas.analysis import AnalysisContext @@ -114,7 +115,9 @@ def build_list(items): context = AnalysisContext(code_content=code, filename="collections.py") findings = agent.analyze(context) - finding = next((f for f in findings if "insert(0)" in f.message or "insert" in f.code_snippet), None) + finding = next( + (f for f in findings if "insert(0)" in f.message or "insert" in f.code_snippet), None + ) assert finding is not None assert finding.severity == Severity.HIGH assert finding.rule_id == "PERF002_LIST_INSERT" @@ -150,7 +153,7 @@ def filter_fast(items, whitelist_set): """ context = AnalysisContext(code_content=code, filename="fast_search.py") findings = agent.analyze(context) - + search_findings = [f for f in findings if "Búsqueda lineal" in f.message] assert len(search_findings) == 0 @@ -234,21 +237,21 @@ def test_syntax_error_handling(self, agent): """Test that syntax errors in code do not crash the agent.""" code = "def broken_code(:" # Syntax error context = AnalysisContext(code_content=code, filename="broken.py") - + # Should not raise exception findings = agent.analyze(context) - + # Should return empty list or list with syntax error finding assert isinstance(findings, list) def test_generic_exception_handling(self, agent): """Test handling of unexpected exceptions during analysis.""" context = AnalysisContext(code_content="pass", filename="test.py") - + # Mock ast.parse to raise generic exception with patch("ast.parse", side_effect=Exception("Unexpected AST failure")): findings = agent.analyze(context) - + # Should handle gracefully and return empty list assert findings == [] @@ -256,9 +259,12 @@ def test_visitor_exception_handling(self, agent): """Test exception handling within the visitor traversal.""" code = "x = 1" context = AnalysisContext(code_content=code, filename="test.py") - + # Mock the visitor's visit method to raise exception - with patch("src.agents.performance_agent.PerformanceVisitor.visit", side_effect=Exception("Visitor Error")): + with patch( + "src.agents.performance_agent.PerformanceVisitor.visit", + side_effect=Exception("Visitor Error"), + ): findings = agent.analyze(context) # Should catch and return empty or partial findings assert findings == [] @@ -295,5 +301,5 @@ def process_efficiently(data_list, lookup_set): """ context = AnalysisContext(code_content=code, filename="optimized.py") findings = agent.analyze(context) - + assert len(findings) == 0 diff --git a/backend/tests/unit/test_vulnerable.py b/backend/tests/unit/test_vulnerable.py index 010d8e7..c99534e 100644 --- a/backend/tests/unit/test_vulnerable.py +++ b/backend/tests/unit/test_vulnerable.py @@ -1,6 +1,7 @@ import os import socket + def terrible_function(): # Performance: Triple nested loop (O(n^3)) -> CRITICAL for i in range(10): @@ -23,5 +24,6 @@ def terrible_function(): # Simulating DB call inside loop execute("SELECT * FROM table WHERE id = " + str(item)) + def execute(query): pass