diff --git a/.gitignore b/.gitignore index 01232df8..1de43ccd 100644 --- a/.gitignore +++ b/.gitignore @@ -334,4 +334,12 @@ apps/backend/logs/ .claude* .mcp.json -.serena \ No newline at end of file +# Serena MCP - ignore cache but track memories and config +.serena/cache/ +# Track Serena memories and project configuration +!.serena/ +!.serena/.gitignore +!.serena/project.yml +!.serena/memories/ +!.serena/memories/*.md +.github/instructions/sonarqube_mcp.instructions.md diff --git a/.markdownlint-cli2.yaml b/.markdownlint-cli2.yaml index 0035c6c9..4d26384b 100644 --- a/.markdownlint-cli2.yaml +++ b/.markdownlint-cli2.yaml @@ -38,6 +38,7 @@ gitignore: true globs: - '**/*.{md,markdown}' -# 5) Ignore patterns - exclude prompts folder +# 5) Ignore patterns - exclude prompts folder and Serena memories ignores: - 'prompts/**/*.{md,markdown}' + - '.serena/memories/**/*.{md,markdown}' diff --git a/.serena/.gitignore b/.serena/.gitignore new file mode 100644 index 00000000..14d86ad6 --- /dev/null +++ b/.serena/.gitignore @@ -0,0 +1 @@ +/cache diff --git a/.serena/memories/architecture_overview.md b/.serena/memories/architecture_overview.md new file mode 100644 index 00000000..f56fc031 --- /dev/null +++ b/.serena/memories/architecture_overview.md @@ -0,0 +1,400 @@ +# GitRay - Architecture Overview + +## High-Level Architecture + +GitRay follows a **monorepo architecture** with clear separation between frontend, backend, and shared types. + +``` +┌─────────────────┐ +│ React 19 UI │ Port 5173 (Vite dev server) +│ (Frontend) │ +└────────┬────────┘ + │ HTTP/REST + │ (Axios) +┌────────▼────────┐ +│ Express 5 API │ Port 3001 +│ (Backend) │ +└────────┬────────┘ + │ + ┌────┴──────┬─────────┬──────────┐ + │ │ │ │ +┌───▼───┐ ┌───▼───┐ ┌──▼────┐ ┌───▼────┐ +│ Redis │ │ Disk │ │ simple│ │ Winston│ +│ Cache │ │ Cache │ │ git │ │ Logs │ +└───────┘ └───────┘ └───────┘ └────────┘ +``` + +## Backend Architecture + +### Layered Design + +``` +Routes (API Endpoints) + ↓ +Middlewares (Validation, Auth, Error Handling) + ↓ +Services (Business Logic) + ↓ +Utils (Helpers, Cache, Locks, Memory Management) + ↓ +External Systems (Redis, Git, Filesystem) +``` + +### Key Services + +#### 1. **gitService** (`services/gitService.ts`) +- Git operations: clone, log extraction, repository analysis +- Streaming support for large repositories (50k+ commits) +- Batch processing with configurable batch sizes +- Integration with `repositoryCoordinator` for shared repository access + +**Key Methods:** +- `getCommits(repoPath)` - Extract commits from local repository +- `cloneRepository(repoUrl, options)` - Clone with configurable depth +- Streaming capabilities for memory-efficient large repo handling + +#### 2. **cache** (`services/cache.ts`) +Multi-tier caching strategy: +- **Tier 1 - Raw Commits** (60% memory): Direct Git extraction results, TTL 1h +- **Tier 2 - Filtered Commits** (25% memory): Author/date filtered, TTL 30min +- **Tier 3 - Aggregated Data** (15% memory): Processed visualizations, TTL 15min + +**Backends:** +- **Redis**: Primary distributed cache (via ioredis) +- **hybridLruCache**: In-memory LRU + disk persistence fallback +- Automatic fallback and health checks + +**Key Functions:** +- `getFromCache(key)` - Multi-tier read with fallback +- `setInCache(key, value, ttl)` - Multi-tier write with replication +- `isCacheHealthy()` - Health status of cache backends +- `switchCacheBackend(backend)` - Runtime backend switching + +#### 3. **repositoryCoordinator** (`services/repositoryCoordinator.ts`) +Prevents duplicate repository clones and manages shared access: +- **Operation Coalescing**: Combines identical concurrent operations +- **Reference Counting**: Tracks active users of each repository +- **Automatic Cleanup**: Removes unused repositories +- **Lock Management**: Deadlock-free concurrent access via `lockManager` + +**Key Functions:** +- `withSharedRepository(repoUrl, operation)` - Execute with shared repo access +- `coordinatedOperation(repoUrl, operationType, operation)` - Coordinated execution + +**Architecture:** +``` +Request 1 ─┐ +Request 2 ─┼─→ Coordinator ─→ Single Clone ─→ Shared Access +Request 3 ─┘ (Reference Counted) +``` + +#### 4. **repositoryCache** (`services/repositoryCache.ts`) +Physical repository caching on disk: +- Max repositories: 50 (configurable) +- Max age: 24 hours (configurable) +- LRU eviction when limits reached +- Integration with coordinator for reuse + +#### 5. **fileAnalysisService** (`services/fileAnalysisService.ts`) +File type distribution analysis: +- Categorizes files (code, documentation, config, assets, other) +- Extension-based statistics +- Directory-level breakdown +- Performance optimized with streaming + +#### 6. **repositorySummaryService** (`services/repositorySummaryService.ts`) +Repository metadata extraction: +- Sparse clone approach (95-99% bandwidth savings) +- Creation date determination (first commit or API) +- Last commit info with relative time +- Activity status classification (active/inactive/archived) +- Total commits and contributor count + +#### 7. **metrics** (`services/metrics.ts`) +Prometheus metrics collection: +- Request counters and latencies +- Cache hit rates +- Memory usage +- Repository coordination metrics +- Custom business metrics + +#### 8. **logger** (`services/logger.ts`) +Winston logging with: +- Daily log rotation +- Multiple log levels (error, warn, info, debug) +- Structured logging with context +- Separate error log file + +### Utilities + +#### **hybridLruCache** (`utils/hybridLruCache.ts`) +Hierarchical LRU cache: +- In-memory primary cache +- Disk-based secondary cache +- Automatic tier promotion/demotion +- Memory pressure-aware eviction + +#### **lockManager** (`utils/lockManager.ts`) +Distributed locking: +- Redis-based locks with TTL +- Lock cleanup on timeout +- Prevents race conditions in coordinator +- Supports lock renewal + +#### **memoryPressureManager** (`utils/memoryPressureManager.ts`) +Memory monitoring and protection: +- Thresholds: Warning (75%), Critical (85%), Emergency (95%) +- Circuit breakers for memory protection +- Request throttling under pressure +- Emergency cache eviction + +#### **urlSecurity** (`utils/urlSecurity.ts`) +Repository URL validation: +- Blocks malicious URLs (file://, javascript:, etc.) +- Validates Git hosting platforms (GitHub, GitLab, Bitbucket) +- Normalizes URLs + +#### **routeHelpers** (`utils/routeHelpers.ts`) +Common route patterns extracted for reuse: +- `setupRouteRequest()` - Initialize request context +- `recordRouteSuccess()` - Success response with metrics +- `recordRouteError()` - Error handling with logging +- `recordCacheHit()` / `recordCacheMiss()` - Cache metrics + +### Middlewares + +1. **errorHandler** - Centralized error handling with proper status codes +2. **validation** - Express-validator integration +3. **memoryPressureMiddleware** - Reject requests under high memory pressure +4. **requestId** - Add unique request IDs for tracing +5. **strictContentType** - Enforce JSON content type for POST/PUT +6. **adminAuth** - Admin endpoint authentication + +### Routes + +#### **repositoryRoutes** (`routes/repositoryRoutes.ts`) +- `GET /repositories/summary` - Repository metadata +- `GET /repositories/churn` - Code churn analysis +- `GET /repositories/commits` - All commits +- `GET /repositories/contributors` - Top contributors +- `GET /repositories/heatmap` - Heatmap data +- `GET /repositories/full-data` - Complete repository data + +#### **commitRoutes** (`routes/commitRoutes.ts`) +- Legacy commit endpoints (being refactored) + +#### **healthRoutes** (`routes/healthRoutes.ts`) +- `GET /health` - Basic health check +- `GET /health/detailed` - Comprehensive system status +- `GET /health/memory` - Memory pressure status +- `GET /metrics` - Prometheus metrics + +## Caching Strategy + +### Three-Tier Hierarchy + +``` +Request → Tier 1 (Raw Commits, 60%) + ↓ miss + Tier 2 (Filtered, 25%) + ↓ miss + Tier 3 (Aggregated, 15%) + ↓ miss + Git Extraction +``` + +### Cache Key Design +```typescript +// Tier 1: Raw commits +`commits:${repoUrlHash}` + +// Tier 2: Filtered commits +`commits:filtered:${repoUrlHash}:${filterHash}` + +// Tier 3: Aggregated data +`heatmap:${repoUrlHash}:${timePeriod}:${filterHash}` +``` + +### TTL Strategy +- **Raw data**: 1 hour (highest reusability) +- **Filtered data**: 30 minutes (medium reusability) +- **Aggregated data**: 15 minutes (specific use case) + +### Backends Priority +1. **Redis** (primary) - Distributed, fast, persistent +2. **Memory** (fallback) - Local, fastest, volatile +3. **Disk** (last resort) - Local, slow, persistent + +## Repository Coordination + +### Operation Flow + +``` +Request → Coordinator.withSharedRepository() + ↓ + Check existing operations + ├─ Match found → Join existing + └─ No match → Create new operation + ↓ + Acquire lock + ↓ + Clone/reuse repository + ↓ + Execute operation + ↓ + Update reference count + ↓ + Release lock + ↓ + Return result (shared with all waiters) +``` + +### Benefits +- **Efficiency**: Single clone for concurrent identical requests +- **Resource Management**: Reference counting prevents premature cleanup +- **Consistency**: Lock-based coordination prevents race conditions +- **Automatic Cleanup**: Unused repositories automatically removed + +## Memory Management + +### Monitoring +``` +Normal (< 75%) → Allow all operations +Warning (75-85%) → Log warnings, continue +Critical (85-95%) → Throttle requests, emergency eviction +Emergency (> 95%) → Reject new requests, aggressive eviction +``` + +### Emergency Eviction Order +1. Tier 3 cache (aggregated data) - least reusable +2. Tier 2 cache (filtered data) - medium reusability +3. Tier 1 cache (raw commits) - highest reusability + +### Circuit Breakers +- Automatic request rejection at emergency threshold +- Prevents system overload and crashes +- Self-recovery when memory drops below threshold + +## Streaming for Large Repositories + +### Activation +- Automatically enabled for repositories with 50k+ commits +- Configurable threshold via `STREAMING_COMMIT_THRESHOLD` + +### Batch Processing +- Default batch size: 1000 commits +- Configurable via `STREAMING_BATCH_SIZE` +- Memory-efficient processing of massive histories + +### Benefits +- Handles repositories with 100k+ commits +- Prevents memory exhaustion +- Progressive data delivery to frontend + +## Frontend Architecture + +### Component Structure +``` +App.tsx (Root) + ↓ +MainPage.tsx (Main layout) + ├─ RepoInput.tsx (URL input) + ├─ ActivityHeatmap.tsx (Visualization) + ├─ CommitList.tsx (Commit display) + └─ RiveLoader.tsx (Loading animation) +``` + +### API Communication +- **Centralized API client**: `services/api.ts` +- **Axios-based**: Configured with base URL and interceptors +- **Type-safe**: All requests/responses use types from `@gitray/shared-types` + +### State Management +- React hooks for local state +- No global state management (Redux/Context) currently +- Direct API calls from components + +## Shared Types Package + +### Purpose +- Single source of truth for TypeScript types +- Prevents type duplication between frontend/backend +- Exported as `@gitray/shared-types` workspace package + +### Key Exports +- `Commit`, `Author`, `CommitFilterOptions` +- `CommitHeatmapData`, `CommitAggregation`, `TimePeriod` +- `FileTypeDistribution`, `FileInfo`, `FileCategory` +- `CodeChurnAnalysis`, `FileChurnData`, `ChurnRiskLevel` +- `RepositorySummary`, `RepositoryStatus`, `RepositoryPlatform` +- `GitrayError`, `ValidationError`, `RepositoryError` +- Constants: `HTTP_STATUS`, `TIME`, `ERROR_MESSAGES`, `GIT_SERVICE` + +### Build Process +- Must be built before backend/frontend (`pnpm build:shared-types`) +- Produces both CommonJS and ESM outputs +- Consumed via TypeScript project references + +## Performance Optimizations + +### Backend +- Multi-tier caching reduces Git operations by ~90% +- Repository coordination eliminates duplicate clones +- Streaming mode for large repositories +- Memory pressure management prevents crashes +- LRU eviction maintains optimal cache size + +### Frontend +- Vite for fast HMR and optimized builds +- React 19 with automatic batching +- Lazy loading of heavy components +- Efficient re-rendering with proper key usage + +### Network +- Compressed responses (gzip/brotli via helmet) +- Cache headers for static assets +- Minimal payload sizes via selective data fetching + +## Security Measures + +- **Helmet**: Security headers (CSP, HSTS, etc.) +- **CORS**: Restricted origins +- **Rate Limiting**: 100 requests per 15 minutes per IP +- **Input Validation**: Express-validator + Zod schemas +- **URL Security**: Blocks malicious repository URLs +- **Content-Type Enforcement**: Strict JSON-only for mutations + +## Monitoring & Observability + +### Metrics (Prometheus) +- Request count, duration, status codes +- Cache hit/miss rates per tier +- Memory usage and pressure levels +- Repository coordination stats +- Git operation durations + +### Logging (Winston) +- Structured JSON logs +- Log levels: error, warn, info, debug +- Daily rotation with compression +- Request IDs for tracing +- Contextual metadata in all logs + +### Health Checks +- Basic: Service up/down +- Detailed: Redis status, memory usage, cache health +- Memory: Current pressure level and thresholds + +## Scalability Considerations + +### Current Design Supports +- Multiple concurrent users on single server +- Horizontal scaling limited by Redis as single point +- Repository cache shared via filesystem + +### Future Scaling Options +- Redis Cluster for distributed caching +- Load balancer with sticky sessions +- Shared filesystem (NFS/S3) for repository cache +- Separate worker processes for Git operations +- Database for persistent metadata (currently cache-only) diff --git a/.serena/memories/codebase_structure.md b/.serena/memories/codebase_structure.md new file mode 100644 index 00000000..a862016d --- /dev/null +++ b/.serena/memories/codebase_structure.md @@ -0,0 +1,157 @@ +# GitRay - Codebase Structure + +## Repository Layout + +``` +gitray/ +├── apps/ +│ ├── backend/ # Express API server +│ │ ├── src/ +│ │ │ ├── routes/ # API endpoint definitions +│ │ │ │ ├── healthRoutes.ts +│ │ │ │ ├── commitRoutes.ts +│ │ │ │ ├── repositoryRoutes.ts +│ │ │ │ └── index.ts +│ │ │ ├── services/ # Business logic layer +│ │ │ │ ├── cache.ts +│ │ │ │ ├── gitService.ts +│ │ │ │ ├── repositoryCache.ts +│ │ │ │ ├── repositoryCoordinator.ts +│ │ │ │ ├── distributedCacheInvalidation.ts +│ │ │ │ ├── fileAnalysisService.ts +│ │ │ │ ├── repositorySummaryService.ts +│ │ │ │ ├── metrics.ts +│ │ │ │ └── logger.ts +│ │ │ ├── utils/ # Utility functions +│ │ │ │ ├── hybridLruCache.ts +│ │ │ │ ├── lockManager.ts +│ │ │ │ ├── memoryPressureManager.ts +│ │ │ │ ├── gitUtils.ts +│ │ │ │ ├── urlSecurity.ts +│ │ │ │ ├── routeHelpers.ts +│ │ │ │ ├── withTempRepository.ts +│ │ │ │ ├── serializationWorker.ts +│ │ │ │ ├── gracefulShutdown.ts +│ │ │ │ └── cleanupScheduler.ts +│ │ │ ├── middlewares/ # Express middlewares +│ │ │ │ ├── errorHandler.ts +│ │ │ │ ├── validation.ts +│ │ │ │ ├── memoryPressureMiddleware.ts +│ │ │ │ ├── requestId.ts +│ │ │ │ ├── adminAuth.ts +│ │ │ │ └── strictContentType.ts +│ │ │ ├── config.ts # Configuration management +│ │ │ └── index.ts # Application entry point +│ │ ├── perf/ # k6 performance tests +│ │ ├── package.json +│ │ └── tsconfig.json +│ │ +│ └── frontend/ # React UI +│ ├── src/ +│ │ ├── components/ # React components +│ │ │ ├── ActivityHeatmap.tsx +│ │ │ ├── CommitList.tsx +│ │ │ ├── RepoInput.tsx +│ │ │ ├── RiveLogo.tsx +│ │ │ └── RiveLoader.tsx +│ │ ├── pages/ # Page components +│ │ │ └── MainPage.tsx +│ │ ├── services/ # API clients +│ │ │ └── api.ts +│ │ ├── utils/ # Utility functions +│ │ │ └── dateUtils.ts +│ │ ├── styles/ # CSS files +│ │ │ └── heatmap.css +│ │ ├── types/ # TypeScript type definitions +│ │ │ └── react-calendar-heatmap.d.ts +│ │ ├── assets/ # Static assets +│ │ ├── App.tsx # Root component +│ │ ├── main.tsx # Application entry +│ │ └── test-setup.ts # Vitest setup +│ ├── package.json +│ ├── tsconfig.json +│ └── vite.config.ts +│ +├── packages/ +│ └── shared-types/ # Shared TypeScript definitions +│ ├── src/ +│ │ └── index.ts # Type exports +│ ├── dist/ # Built types (CJS + ESM) +│ ├── package.json +│ └── tsconfig.json +│ +├── scripts/ # Dev and maintenance scripts +│ ├── start.sh # Development environment manager +│ └── normalize-line-endings.sh +│ +├── .github/ # GitHub Actions workflows +├── .husky/ # Git hooks +├── .vscode/ # VS Code settings +├── .serena/ # Serena MCP memories +├── logs/ # Application logs (gitignored) +├── coverage/ # Test coverage reports (gitignored) +├── node_modules/ # Dependencies (gitignored) +│ +├── package.json # Root workspace config +├── pnpm-workspace.yaml # pnpm workspace definition +├── tsconfig.json # Root TypeScript config with project references +├── vitest.config.ts # Vitest test configuration +├── eslint.config.mjs # ESLint flat config +├── prettier.config.js # Prettier configuration +├── .gitignore +├── CLAUDE.md # Guidelines for Claude AI assistant +├── README.md # Project documentation +└── LICENSE + +``` + +## Important File Locations + +### Configuration Files +- **Root TypeScript**: `tsconfig.json` (composite project references) +- **Backend Config**: `apps/backend/src/config.ts` +- **Environment**: `.env` (not checked in) +- **ESLint**: `eslint.config.mjs` (flat config format) +- **Prettier**: `prettier.config.js` +- **Vitest**: `vitest.config.ts` (workspace-aware) + +### Entry Points +- **Backend Server**: `apps/backend/src/index.ts` +- **Frontend App**: `apps/frontend/src/main.tsx` +- **Shared Types**: `packages/shared-types/src/index.ts` + +### Testing +- **Backend Tests**: Co-located with source files as `*.test.ts` +- **Frontend Tests**: Co-located with components as `*.test.tsx` +- **Performance Tests**: `apps/backend/perf/` + +## Build Artifacts (Gitignored) +- `dist/` - Compiled TypeScript output +- `*.tsbuildinfo` - TypeScript incremental build cache +- `coverage/` - Test coverage reports +- `.vite/` - Vite cache +- `.eslintcache` - ESLint cache +- `.nyc_output/` - Coverage intermediate files +- `logs/` - Winston log files +- `node_modules/` - Dependencies + +## Key Architectural Components + +### Backend Services +- **gitService**: Git operations (clone, log, analysis) +- **cache**: Multi-tier caching (Redis + Memory + Disk) +- **repositoryCoordinator**: Shared repository management +- **repositoryCache**: Repository-level caching +- **fileAnalysisService**: File type distribution analysis +- **repositorySummaryService**: Repository metadata extraction +- **metrics**: Prometheus metrics collection +- **logger**: Winston logging with daily rotation + +### Backend Utilities +- **hybridLruCache**: LRU cache with hierarchical tiers +- **lockManager**: Distributed locking for coordination +- **memoryPressureManager**: Memory threshold monitoring +- **urlSecurity**: Repository URL validation + +### Frontend Services +- **api.ts**: Axios-based API client for backend communication diff --git a/.serena/memories/coding_standards.md b/.serena/memories/coding_standards.md new file mode 100644 index 00000000..9c78897e --- /dev/null +++ b/.serena/memories/coding_standards.md @@ -0,0 +1,399 @@ +# GitRay - Coding Standards and Conventions + +## Core Principles +- **TypeScript Strict Mode**: Enabled everywhere, avoid `any` and implicit `any` +- **Functional React**: Use functional components with hooks only +- **Professional Logging**: Use Winston logger, not `console.log` in runtime code +- **Shared Types**: Import from `@gitray/shared-types`, never duplicate interfaces +- **Path Aliases**: Use `@/` for absolute imports from `src/` +- **Test Co-location**: Place `*.test.ts`/`*.spec.ts` beside implementations +- **Named Exports**: Prefer named exports over default exports + +## Naming Conventions + +### Components & Types (PascalCase) +```typescript +// React Components +export const CommitHeatmap: React.FC = ({ ... }) => { ... }; + +// Interfaces and Types +export interface CommitHeatmapProps { ... } +export type TimePeriod = 'day' | 'week' | 'month'; + +// Classes +export class GitService { ... } +export class RepositoryCoordinator { ... } +``` + +### Hooks (use + camelCase) +```typescript +export const useCommitFilters = () => { ... }; +export const useRepositoryData = (repoUrl: string) => { ... }; +``` + +### Functions & Variables (camelCase) +```typescript +export const calculateCommitStats = (commits: Commit[]) => { ... }; +const filteredCommits = filterByAuthor(commits, author); +let isLoading = false; +``` + +### Constants & Enums (SCREAMING_SNAKE_CASE) +```typescript +export const MAX_CACHE_ENTRIES = 10000; +export const STREAMING_THRESHOLD = 50000; +export const HTTP_STATUS = { OK: 200, ... } as const; + +export enum CacheTier { + MEMORY = 'MEMORY', + REDIS = 'REDIS', + DISK = 'DISK' +} +``` + +### Environment Variables (UPPER_SNAKE_CASE) +```bash +PORT=3001 +REDIS_HOST=localhost +CACHE_ENABLE_REDIS=true +NODE_ENV=development +``` + +## File and Directory Naming + +### Frontend +- **Components**: `apps/frontend/src/components//index.tsx` (PascalCase) +- **Pages**: `apps/frontend/src/pages/.tsx` (PascalCase) +- **Hooks**: `apps/frontend/src/hooks/use.ts` (camelCase with 'use' prefix) +- **Utilities**: `apps/frontend/src/utils/.ts` (camelCase) +- **Services**: `apps/frontend/src/services/.ts` (camelCase) + +### Backend +- **Routes**: `apps/backend/src/routes/Routes.ts` (camelCase + 'Routes') +- **Services**: `apps/backend/src/services/Service.ts` (camelCase + 'Service') +- **Utilities**: `apps/backend/src/utils/.ts` (camelCase) +- **Middlewares**: `apps/backend/src/middlewares/.ts` (camelCase) + +### Shared Types +- **Index file**: `packages/shared-types/src/index.ts` (all exports in single file) + +## Import Organization + +Group and order imports: +1. External packages (React, Express, etc.) +2. Internal modules (`@gitray/shared-types`, `@/...`) +3. Relative imports +4. Style imports (CSS) +5. Test utilities (in test files) + +```typescript +// 1. External +import express from 'express'; +import { simpleGit } from 'simple-git'; + +// 2. Internal workspace +import { Commit, CommitFilterOptions } from '@gitray/shared-types'; +import { logger } from '@/services/logger'; + +// 3. Relative +import { validateRepoUrl } from '../utils/urlSecurity'; +import type { CacheOptions } from './cache'; + +// 4. Styles (frontend) +import './heatmap.css'; + +// 5. Test utils (in tests) +import { describe, it, expect, vi } from 'vitest'; +``` + +## Async & Error Handling + +### Use async/await, not promise chains +```typescript +// ✅ GOOD +async function getCommits(repoUrl: string): Promise { + try { + const repoPath = await cloneRepository(repoUrl); + const commits = await extractCommits(repoPath); + return commits; + } catch (error) { + logger.error('Failed to get commits', { repoUrl, error }); + throw new RepositoryError('Failed to fetch commits', repoUrl); + } +} + +// ❌ BAD (promise chains) +function getCommits(repoUrl: string): Promise { + return cloneRepository(repoUrl) + .then(extractCommits) + .catch(error => { throw error; }); +} +``` + +### Never swallow errors +```typescript +// ✅ GOOD +try { + await someOperation(); +} catch (error) { + logger.error('Operation failed', { error }); + throw new GitrayError('Operation failed', HTTP_STATUS.INTERNAL_SERVER_ERROR); +} + +// ❌ BAD +try { + await someOperation(); +} catch (error) { + // Silent failure - never do this +} +``` + +### Use typed error classes +```typescript +import { GitrayError, RepositoryError, ValidationError } from '@gitray/shared-types'; + +throw new ValidationError('Invalid input', errors); +throw new RepositoryError('Clone failed', repoUrl); +throw new GitrayError('Internal error', HTTP_STATUS.INTERNAL_SERVER_ERROR); +``` + +## React Component Style + +### Functional components with proper typing +```typescript +import { FC } from 'react'; + +interface CommitListProps { + commits: Commit[]; + onCommitClick?: (commit: Commit) => void; +} + +export const CommitList: FC = ({ commits, onCommitClick }) => { + return ( +
+ {commits.map((commit) => ( +
onCommitClick?.(commit)}> + {commit.message} +
+ ))} +
+ ); +}; +``` + +### Follow Rules of Hooks +```typescript +// ✅ GOOD - hooks at top level +const MyComponent: FC = () => { + const [data, setData] = useState([]); + const { loading, error } = useRepositoryData(repoUrl); + + useEffect(() => { + fetchData(); + }, []); + + return
...
; +}; + +// ❌ BAD - conditional hooks +const MyComponent: FC = () => { + if (condition) { + const [data, setData] = useState([]); // NEVER do this + } + return
...
; +}; +``` + +## Styling + +### Use Tailwind CSS classes +```tsx +
+ Title + +
+``` + +### Avoid inline styles (except dynamic values) +```tsx +// ✅ GOOD - dynamic value +
...
+ +// ❌ BAD - static styles should use Tailwind +
...
+``` + +## Backend Route Structure + +### RESTful conventions +```typescript +import { Router } from 'express'; +import { validateRequest } from '@/middlewares/validation'; +import { handleValidationErrors } from '@/utils/routeHelpers'; + +const router = Router(); + +// GET: Retrieve data +router.get('/repositories/summary', + repoUrlValidation, + handleValidationErrors, + async (req, res) => { ... } +); + +// POST: Create or process data +router.post('/repositories', + repoUrlValidation, + handleValidationErrors, + async (req, res) => { ... } +); + +export default router; +``` + +### Consistent error handling in routes +```typescript +import { setupRouteRequest, recordRouteSuccess, recordRouteError } from '@/utils/routeHelpers'; + +router.get('/endpoint', async (req, res) => { + const { logger, startTime } = setupRouteRequest(req, 'operation-name'); + + try { + const result = await performOperation(); + recordRouteSuccess(res, result, logger, startTime, 'operation-name'); + } catch (error) { + recordRouteError(res, error, logger, 'operation-name'); + } +}); +``` + +## Testing Standards + +### Test file naming +- Place beside source: `myModule.ts` → `myModule.test.ts` +- Use descriptive test names +- Use HappyPath Concept +- Use AAA Pattern + +### Test structure (Vitest) +```typescript +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { myFunction } from './myModule'; + +describe('myFunction', () => { + beforeEach(() => { + // Setup + }); + + afterEach(() => { + // Cleanup + vi.clearAllMocks(); + }); + + it('should return expected result for valid input', () => { + const result = myFunction(validInput); + expect(result).toEqual(expectedOutput); + }); + + it('should throw error for invalid input', () => { + expect(() => myFunction(invalidInput)).toThrow(ValidationError); + }); +}); +``` + +### Maintain ≥80% coverage +- Focus on critical paths +- Test error cases +- Mock external dependencies (Redis, Git, filesystem) + +## Code Quality Rules + +### No `any` without justification +```typescript +// ✅ GOOD +function processData(data: Commit[]): CommitStats { ... } + +// ❌ BAD +function processData(data: any): any { ... } + +// ⚠️ ACCEPTABLE with comment explaining why +function legacyAPI(data: any): any { // External API with unknown shape + // ... +} +``` + +### Prefer readonly where appropriate +```typescript +interface Config { + readonly port: number; + readonly redisHost: string; +} + +const config: Readonly = { ... }; +``` + +### Use const assertions for constants +```typescript +export const HTTP_STATUS = { + OK: 200, + BAD_REQUEST: 400, + INTERNAL_SERVER_ERROR: 500 +} as const; + +export type HttpStatus = typeof HTTP_STATUS[keyof typeof HTTP_STATUS]; +``` + +## Documentation + +### JSDoc for public APIs +```typescript +/** + * Aggregates commits by time period for heatmap visualization. + * + * @param commits - Array of commits to aggregate + * @param timePeriod - Aggregation period ('day' | 'week' | 'month' | 'year') + * @param filterOptions - Optional filtering criteria + * @returns Aggregated commit data with metadata + * @throws {ValidationError} If timePeriod is invalid + */ +export function aggregateCommits( + commits: Commit[], + timePeriod: TimePeriod, + filterOptions?: CommitFilterOptions +): CommitHeatmapData { ... } +``` + +### Complex logic comments +```typescript +// Use temporal locality: recently used entries are more likely to be used again. +// This implements a 3-tier LRU cache with 60/25/15 memory allocation. +const tierSizes = calculateTierSizes(maxEntries); +``` + +## Commit Message Convention + +Follow Conventional Commits: +``` +feat: add code churn analysis endpoint +fix: resolve memory leak in cache manager +refactor: extract route helpers to reduce duplication +test: add integration tests for repository coordinator +docs: update API documentation for /summary endpoint +perf: optimize commit aggregation for large datasets +style: format code with prettier +chore: update dependencies +``` + +## Common Mistakes to Avoid + +1. ❌ Using `console.log` in production code (use `logger`) +2. ❌ Skipping `pnpm build:shared-types` before building apps +3. ❌ Creating duplicate types instead of importing from `@gitray/shared-types` +4. ❌ Using relative paths when `@/` alias exists +5. ❌ Adding `node_modules`, `dist`, or build outputs to git +6. ❌ Introducing unhandled promise rejections +7. ❌ Forgetting to update both backend and frontend when API contracts change +8. ❌ Mixing feature changes with refactoring in same commit +9. ❌ Not adding tests for new features +10. ❌ Default exports for components/utilities (use named exports) diff --git a/.serena/memories/project_overview.md b/.serena/memories/project_overview.md new file mode 100644 index 00000000..1fc6e902 --- /dev/null +++ b/.serena/memories/project_overview.md @@ -0,0 +1,66 @@ +# GitRay - Project Overview + +## Purpose +GitRay is a professional Git visualization tool that transforms repository commit history into beautiful, interactive heatmaps and activity calendars. It provides deep insights into development patterns and team collaboration. + +## Key Features +- **Activity Heatmaps**: GitHub-style contribution calendars with customizable time periods +- **Commit Analysis**: Detailed commit statistics and author breakdowns +- **Code Churn Analysis**: Track code changes and stability patterns with risk level indicators +- **Interactive Filtering**: Filter by authors, date ranges, and commit patterns +- **Multi-tier Caching**: Intelligent caching system with Redis, disk, and memory tiers +- **Streaming Support**: Handle large repositories (50k+ commits) efficiently +- **Repository Coordination**: Prevents duplicate clones with shared repository management + +## Technology Stack + +### Backend +- **Runtime**: Node.js 18+ with TypeScript 5.7 +- **Framework**: Express 5 +- **Git Operations**: simple-git +- **Caching**: Redis 7 (via ioredis) +- **Logging**: Winston with daily rotate file +- **Metrics**: Prometheus (prom-client) +- **Validation**: Express-validator, Zod +- **Security**: Helmet, CORS, express-rate-limit + +### Frontend +- **Framework**: React 19 with TypeScript 5.7 +- **Build Tool**: Vite 6 +- **Styling**: Tailwind CSS 4 +- **Visualizations**: react-calendar-heatmap, ApexCharts +- **Animations**: Rive (@rive-app/react-canvas) +- **HTTP Client**: Axios +- **Date Handling**: date-fns + +### Shared Infrastructure +- **Package Manager**: pnpm 10.16.1 (workspaces) +- **Testing**: Vitest 3.2.3 (86.4% coverage) +- **Performance Testing**: k6 for backend load testing +- **Linting**: ESLint 9 (flat config) +- **Formatting**: Prettier 3 +- **Git Hooks**: Husky + lint-staged +- **CI/CD**: GitHub Actions (assumed from .github directory) + +## Monorepo Structure +- **apps/backend**: Express API server +- **apps/frontend**: React UI application +- **packages/shared-types**: Shared TypeScript types and error classes +- **scripts/**: Development and maintenance scripts + +## Architecture Principles +- **Strict TypeScript**: No `any` types, strict type checking enabled +- **Monorepo with Project References**: TypeScript project references for incremental builds +- **Shared Type Safety**: All types exported from @gitray/shared-types +- **Layered Architecture**: Routes → Services → Utils pattern in backend +- **Component-Based Frontend**: Functional React components with hooks +- **Comprehensive Testing**: Unit, integration, and performance tests +- **Professional Logging**: Winston instead of console.log +- **Performance Optimized**: Multi-tier caching, streaming, memory pressure management + +## Current Development Branch +Main development branch: `dev` +Current working branch: `120-enhancementscopebackend-refactor-old-routes-to-use-unified-cache-service-retain-redis-remove-manual-caching` + +## License +ISC License diff --git a/.serena/memories/suggested_commands.md b/.serena/memories/suggested_commands.md new file mode 100644 index 00000000..ca7d1247 --- /dev/null +++ b/.serena/memories/suggested_commands.md @@ -0,0 +1,236 @@ +# GitRay - Suggested Development Commands + +## Essential Commands (Most Commonly Used) + +### Development Environment +```bash +pnpm app # Interactive development environment manager +pnpm dev # Build shared types + start all services with hot reload +pnpm dev:frontend # Start frontend only (Vite on port 5173) +pnpm dev:backend # Start backend only (Express on port 3001) +``` + +### Testing +```bash +pnpm test # Run all tests across workspace +pnpm test:frontend # Frontend tests only +pnpm test:backend # Backend tests only +pnpm test:watch # Watch mode for development +pnpm test:watch:changed # Watch changed files only +pnpm test:coverage # Generate combined coverage report (86.4%+) +pnpm test:ui # Open Vitest UI +``` + +### Code Quality +```bash +pnpm lint # Run ESLint on all code +pnpm lint:fix # Auto-fix linting issues +pnpm lint:md # Lint markdown files +pnpm format # Format all files with Prettier +``` + +### Building +```bash +pnpm build # Build everything (shared-types → backend → frontend) +pnpm build:shared-types # Build shared types only (REQUIRED before apps) +pnpm build:apps # Build backend + frontend +``` + +### Environment Management +```bash +pnpm env:status # Show service status (frontend, backend, Redis) +pnpm env:stop # Stop all services +pnpm env:clean # Clean environment (stop services + clean cache) +``` + +### Cleanup +```bash +pnpm clean # Clean dist + cache + node_modules +pnpm clean:dist # Remove build artifacts only +pnpm clean:cache # Remove Vite/ESLint/nyc caches +pnpm clean:node_modules # Remove all node_modules (deep clean) +pnpm clean:all # Deep clean including logs +pnpm rebuild # Full clean + install + build +``` + +## Installation & Setup + +```bash +# Initial setup +pnpm install # Install all workspace dependencies + +# Start Redis (via Docker) +docker run --name gitray-redis -d -p 6379:6379 redis:7-alpine + +# Check Redis status +docker ps | grep redis +docker restart gitray-redis # If needed + +# Build before first run +pnpm build +``` + +## Application Management Scripts + +```bash +pnpm start # Full development setup (via scripts/start.sh) +pnpm quick # Frontend-only quick start +``` + +## Testing Variants + +### Backend-Specific +```bash +pnpm --filter backend test # Backend unit tests +pnpm --filter backend test:coverage # Backend coverage +pnpm --filter backend test:perf # k6 performance tests +pnpm --filter backend test:perf:smoke # Quick smoke test (30s) +pnpm --filter backend test:perf:stress # Stress test (2x load) +``` + +### Frontend-Specific +```bash +pnpm --filter frontend test # Frontend unit tests +pnpm --filter frontend test:coverage # Frontend coverage +``` + +### Coverage Details +```bash +pnpm test:coverage:frontend # Frontend coverage (apps/frontend/coverage) +pnpm test:coverage:backend # Backend coverage (apps/backend/coverage) +pnpm test:coverage:merge # Merge coverage reports +pnpm test:coverage:report # Generate HTML/LCOV/text reports +pnpm clean:coverage-output # Clean coverage artifacts +``` + +## Git Hooks (Automated via Husky) + +### Pre-commit (Automated) +- ESLint auto-fix on `*.{ts,tsx,js,jsx}` +- Prettier format on code files +- Markdownlint on `*.md` files +- Prettier format on `*.{json,yml,yaml}` + +### Manual Hook Setup +```bash +pnpm prepare # Install Husky hooks +``` + +## Debugging & Troubleshooting + +```bash +# Check what's using a port +lsof -i :3001 # Backend port +lsof -i :5173 # Frontend port +lsof -i :6379 # Redis port + +# Kill process by PID +kill -9 + +# Check Redis connection +docker logs gitray-redis + +# View application logs +tail -f logs/combined.log +tail -f logs/error.log + +# Memory and system status +pnpm env:status +``` + +## Performance Testing (k6) + +```bash +# Standard load test +pnpm --filter backend test:perf + +# Quick smoke test (5 VUs, 30 seconds) +pnpm --filter backend test:perf:smoke + +# Stress test (2x multiplier) +pnpm --filter backend test:perf:stress + +# Custom k6 test +cd apps/backend +k6 run --vus 10 --duration 60s perf/load-test.ts +``` + +## Useful System Commands (Linux) + +### File Operations +```bash +ls -la # List files with details +find . -name "*.ts" # Find TypeScript files +grep -r "pattern" src/ # Search in files +``` + +### Git Operations +```bash +git status # Current branch status +git log --oneline -10 # Recent commits +git diff # View changes +git checkout dev # Switch to dev branch +``` + +## Build Order (IMPORTANT!) + +**Always build in this order:** +1. `pnpm build:shared-types` (or `pnpm --filter @gitray/shared-types build`) +2. `pnpm build:apps` (or manually: backend → frontend) + +**Why?** Backend and frontend depend on built types from `@gitray/shared-types`. + +## Environment Variables + +Create `.env` in project root: +```bash +# Server +PORT=3001 +CORS_ORIGIN=http://localhost:5173 + +# Redis +REDIS_HOST=localhost +REDIS_PORT=6379 + +# Caching +CACHE_ENABLE_REDIS=true +CACHE_ENABLE_DISK=true + +# Development +NODE_ENV=development +LOG_LEVEL=info +DEBUG_CACHE_LOGGING=false +``` + +## Quick Reference: Common Workflows + +### Starting Development +```bash +pnpm app # Interactive menu +# OR +pnpm dev # Direct start (recommended) +``` + +### Before Committing +```bash +pnpm lint # Check for issues +pnpm test # Run tests +pnpm format # Format code +``` + +### After Pulling Changes +```bash +pnpm install # Update dependencies +pnpm build:shared-types # Rebuild shared types +``` + +### Adding New Dependencies +```bash +# Root level +pnpm add -D + +# Specific workspace +pnpm --filter backend add +pnpm --filter frontend add +pnpm --filter @gitray/shared-types add +``` diff --git a/.serena/memories/task_completion_checklist.md b/.serena/memories/task_completion_checklist.md new file mode 100644 index 00000000..5af2af20 --- /dev/null +++ b/.serena/memories/task_completion_checklist.md @@ -0,0 +1,305 @@ +# GitRay - Task Completion Checklist + +## Before Committing Code + +### 1. Code Quality Checks +```bash +# Run linting +pnpm lint + +# Fix auto-fixable issues +pnpm lint:fix + +# Lint markdown files (if docs changed) +pnpm lint:md +``` + +### 2. Run Tests +```bash +# Run all tests +pnpm test + +# Or run specific workspace tests +pnpm test:frontend # Frontend only +pnpm test:backend # Backend only + +# Check coverage (maintain ≥80%) +pnpm test:coverage +``` + +### 3. Build Validation +```bash +# Ensure clean build +pnpm build + +# Or incrementally +pnpm build:shared-types # If types changed +pnpm build:apps # If app code changed +``` + +### 4. Manual Testing +- [ ] Test the feature/fix in the running application +- [ ] Verify frontend behavior (`pnpm dev:frontend`) +- [ ] Verify backend endpoints (`pnpm dev:backend`) +- [ ] Check browser console for errors +- [ ] Check backend logs for errors + +### 5. Type Safety +- [ ] No TypeScript errors (`pnpm build`) +- [ ] No use of `any` without justification +- [ ] Proper types imported from `@gitray/shared-types` +- [ ] All new functions/components properly typed + +## Code Review Self-Checklist + +### General +- [ ] Code follows project conventions (see `coding_standards.md`) +- [ ] No debug code (`console.log`, commented code, etc.) +- [ ] Descriptive variable and function names +- [ ] Complex logic has explanatory comments +- [ ] No duplicate code (DRY principle) + +### TypeScript +- [ ] Strict type checking passes +- [ ] No `any` types without justification +- [ ] Proper error handling with typed error classes +- [ ] Async functions use `async/await`, not promise chains + +### React Components (Frontend) +- [ ] Functional components with proper typing +- [ ] Hooks follow Rules of Hooks +- [ ] Proper key props for lists +- [ ] No inline functions in render (performance) +- [ ] Tailwind CSS for styling (avoid inline styles) + +### Backend Routes & Services +- [ ] Proper error handling with try/catch +- [ ] Use Winston logger, not `console.log` +- [ ] Input validation with express-validator or Zod +- [ ] HTTP status codes from `HTTP_STATUS` constants +- [ ] Route helpers used for consistency (`setupRouteRequest`, etc.) + +### Testing +- [ ] New features have tests +- [ ] Bug fixes have regression tests +- [ ] Test coverage maintained (≥80%) +- [ ] Tests are meaningful (not just coverage padding) +- [ ] Mocks are used for external dependencies + +### Documentation +- [ ] README updated if user-facing changes +- [ ] CLAUDE.md updated if guidelines change +- [ ] JSDoc comments for public APIs +- [ ] Complex algorithms explained + +## When Changing Shared Types + +If you modified `packages/shared-types/src/index.ts`: + +1. **Rebuild shared types** + ```bash + pnpm build:shared-types + ``` + +2. **Update imports** in backend and frontend + ```typescript + import { YourNewType } from '@gitray/shared-types'; + ``` + +3. **Update both backend and frontend** if API contract changed + - Backend: Route handlers, services + - Frontend: API client, components + +4. **Run tests across workspace** + ```bash + pnpm test # All workspaces + ``` + +## When Adding Dependencies + +### Root dependencies +```bash +pnpm add -D # Dev dependency at root +``` + +### Workspace dependencies +```bash +pnpm --filter backend add +pnpm --filter frontend add +pnpm --filter @gitray/shared-types add +``` + +### After adding dependencies +- [ ] Verify `pnpm-lock.yaml` is updated +- [ ] Test that build still works +- [ ] Update README if dependency is significant + +## When Creating a Pull Request + +### 1. Ensure Clean Branch +```bash +# Sync with main development branch +git checkout dev +git pull origin dev + +# Rebase your feature branch +git checkout your-feature-branch +git rebase dev +``` + +### 2. Commit Message +Follow Conventional Commits format: +``` +feat: add code churn risk indicators +fix: resolve cache eviction race condition +refactor: extract route success/error helpers +test: add integration tests for repository summary +docs: update API documentation for /churn endpoint +perf: optimize commit aggregation algorithm +``` + +### 3. PR Description Template +```markdown +## Description +Brief description of changes + +## Type of Change +- [ ] Bug fix (non-breaking change which fixes an issue) +- [ ] New feature (non-breaking change which adds functionality) +- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) +- [ ] Refactoring (no functional changes) +- [ ] Documentation update + +## Testing +- [ ] Tests pass locally +- [ ] Added tests for new features +- [ ] Coverage maintained ≥80% + +## Checklist +- [ ] Code follows project conventions +- [ ] Self-reviewed the code +- [ ] Commented complex logic +- [ ] Updated documentation +- [ ] No breaking changes (or documented if necessary) +``` + +## Performance Considerations + +When implementing features, consider: + +- [ ] **Caching**: Can this be cached? Which tier? +- [ ] **Memory**: Will this consume significant memory? +- [ ] **Streaming**: For large datasets, should streaming be used? +- [ ] **Repository Coordination**: Use `withSharedRepository()` for Git ops +- [ ] **Pagination**: Large result sets should be paginated +- [ ] **Error Recovery**: Graceful degradation on failures + +## Security Considerations + +- [ ] Input validation for all user inputs +- [ ] URL validation for repository URLs +- [ ] No exposure of sensitive data in logs +- [ ] Proper error messages (don't leak internals) +- [ ] Rate limiting on new endpoints + +## Specific Task Types + +### Adding a New API Endpoint + +1. **Define types** in `packages/shared-types/src/index.ts` +2. **Build shared types**: `pnpm build:shared-types` +3. **Create route** in `apps/backend/src/routes/` +4. **Add validation** middleware +5. **Implement service logic** in `apps/backend/src/services/` +6. **Add tests** for route and service +7. **Update frontend API client** in `apps/frontend/src/services/api.ts` +8. **Create/update components** to consume the endpoint +9. **Test end-to-end** +10. **Update documentation** (README, API docs) + +### Fixing a Bug + +1. **Write failing test** that reproduces the bug +2. **Fix the bug** with minimal changes +3. **Verify test passes** +4. **Run full test suite** +5. **Test manually** +6. **Commit with `fix:` prefix** + +### Refactoring + +1. **Ensure tests exist** and pass +2. **Make refactoring changes** (behavior unchanged) +3. **Verify tests still pass** (no changes to tests needed) +4. **Verify build works** +5. **Commit with `refactor:` prefix** +6. **Keep refactor separate** from feature changes + +## Environment-Specific Checks + +### Development +- [ ] Redis running (`docker ps | grep redis`) +- [ ] Ports available (3001, 5173, 6379) +- [ ] Environment variables set (`.env` file) +- [ ] Logs accessible (`logs/` directory) + +### Before Production Deploy (Future) +- [ ] All tests pass in CI +- [ ] Coverage ≥80% +- [ ] No TypeScript errors +- [ ] No ESLint errors +- [ ] Security audit passed (`pnpm audit`) +- [ ] Environment variables configured +- [ ] Redis/database connections verified +- [ ] Monitoring configured (Prometheus, logs) + +## Final Checks Before Git Push + +```bash +# 1. Status check +git status + +# 2. Ensure no unintended changes +git diff + +# 3. Run full validation +pnpm lint && pnpm test && pnpm build + +# 4. Commit with conventional commit message +git add . +git commit -m "feat: your feature description" + +# 5. Push +git push origin your-branch-name +``` + +## Automated Checks (Pre-commit Hook) + +The project uses Husky with lint-staged for automatic checks: +- **TypeScript/JavaScript**: ESLint auto-fix + Prettier +- **Markdown**: Markdownlint +- **JSON/YAML**: Prettier formatting + +These run automatically on `git commit`. If they fail, fix issues before committing. + +## Quick Reference + +### Validation Pipeline +``` +Code → Lint → Format → Test → Build → Manual Test → Commit +``` + +### Must-Run Before Commit +```bash +pnpm lint && pnpm test && pnpm build +``` + +### If Shared Types Changed +```bash +pnpm build:shared-types && pnpm test +``` + +### If Unsure, Run Everything +```bash +pnpm rebuild && pnpm lint && pnpm test +``` diff --git a/.serena/project.yml b/.serena/project.yml new file mode 100644 index 00000000..9551299c --- /dev/null +++ b/.serena/project.yml @@ -0,0 +1,82 @@ +# list of languages for which language servers are started; choose from: +# al bash clojure cpp csharp csharp_omnisharp +# dart elixir elm erlang fortran go +# haskell java julia kotlin lua markdown +# nix perl php python python_jedi r +# rego ruby ruby_solargraph rust scala swift +# terraform typescript typescript_vts yaml zig +# Note: +# - For C, use cpp +# - For JavaScript, use typescript +# Special requirements: +# - csharp: Requires the presence of a .sln file in the project folder. +# When using multiple languages, the first language server that supports a given file will be used for that file. +# The first language is the default language and the respective language server will be used as a fallback. +# Note that when using the JetBrains backend, language servers are not used and this list is correspondingly ignored. +languages: + - typescript + - bash +encoding: 'utf-8' + +# whether to use the project's gitignore file to ignore files +# Added on 2025-04-07 +ignore_all_files_in_gitignore: true + +# list of additional paths to ignore +# same syntax as gitignore, so you can use * and ** +# Was previously called `ignored_dirs`, please update your config if you are using that. +# Added (renamed) on 2025-04-07 +ignored_paths: [] + +# whether the project is in read-only mode +# If set to true, all editing tools will be disabled and attempts to use them will result in an error +# Added on 2025-04-18 +read_only: false + +# list of tool names to exclude. We recommend not excluding any tools, see the readme for more details. +# Below is the complete list of tools for convenience. +# To make sure you have the latest list of tools, and to view their descriptions, +# execute `uv run scripts/print_tool_overview.py`. +# +# * `activate_project`: Activates a project by name. +# * `check_onboarding_performed`: Checks whether project onboarding was already performed. +# * `create_text_file`: Creates/overwrites a file in the project directory. +# * `delete_lines`: Deletes a range of lines within a file. +# * `delete_memory`: Deletes a memory from Serena's project-specific memory store. +# * `execute_shell_command`: Executes a shell command. +# * `find_referencing_code_snippets`: Finds code snippets in which the symbol at the given location is referenced. +# * `find_referencing_symbols`: Finds symbols that reference the symbol at the given location (optionally filtered by type). +# * `find_symbol`: Performs a global (or local) search for symbols with/containing a given name/substring (optionally filtered by type). +# * `get_current_config`: Prints the current configuration of the agent, including the active and available projects, tools, contexts, and modes. +# * `get_symbols_overview`: Gets an overview of the top-level symbols defined in a given file. +# * `initial_instructions`: Gets the initial instructions for the current project. +# Should only be used in settings where the system prompt cannot be set, +# e.g. in clients you have no control over, like Claude Desktop. +# * `insert_after_symbol`: Inserts content after the end of the definition of a given symbol. +# * `insert_at_line`: Inserts content at a given line in a file. +# * `insert_before_symbol`: Inserts content before the beginning of the definition of a given symbol. +# * `list_dir`: Lists files and directories in the given directory (optionally with recursion). +# * `list_memories`: Lists memories in Serena's project-specific memory store. +# * `onboarding`: Performs onboarding (identifying the project structure and essential tasks, e.g. for testing or building). +# * `prepare_for_new_conversation`: Provides instructions for preparing for a new conversation (in order to continue with the necessary context). +# * `read_file`: Reads a file within the project directory. +# * `read_memory`: Reads the memory with the given name from Serena's project-specific memory store. +# * `remove_project`: Removes a project from the Serena configuration. +# * `replace_lines`: Replaces a range of lines within a file with new content. +# * `replace_symbol_body`: Replaces the full definition of a symbol. +# * `restart_language_server`: Restarts the language server, may be necessary when edits not through Serena happen. +# * `search_for_pattern`: Performs a search for a pattern in the project. +# * `summarize_changes`: Provides instructions for summarizing the changes made to the codebase. +# * `switch_modes`: Activates modes by providing a list of their names +# * `think_about_collected_information`: Thinking tool for pondering the completeness of collected information. +# * `think_about_task_adherence`: Thinking tool for determining whether the agent is still on track with the current task. +# * `think_about_whether_you_are_done`: Thinking tool for determining whether the task is truly completed. +# * `write_memory`: Writes a named memory (for future reference) to Serena's project-specific memory store. +excluded_tools: [] + +# initial prompt for the project. It will always be given to the LLM upon activating the project +# (contrary to the memories, which are loaded on demand). +initial_prompt: '' + +project_name: 'gitray' +included_optional_tools: [] diff --git a/FRONTEND_API_MIGRATION.md b/FRONTEND_API_MIGRATION.md new file mode 100644 index 00000000..8e819e75 --- /dev/null +++ b/FRONTEND_API_MIGRATION.md @@ -0,0 +1,1117 @@ +# Frontend API Migration Guide + +## Overview + +This guide documents the backend API changes from PR #122 (Issue #120) and provides +complete migration instructions for **any frontend implementation** consuming the GitRay +backend API. + +**Scope**: This document is frontend-agnostic and covers general API interaction +patterns, not specific to the current frontend implementation (which is being replaced). + +**Key Changes**: + +- All POST endpoints → GET endpoints with query parameters +- Enhanced pagination support +- Filter parameters flattened to query params +- Improved response structures with nested data +- Multi-tier caching for better performance + +--- + +## Table of Contents + +- [API Endpoint Changes](#api-endpoint-changes) +- [Detailed Endpoint Documentation](#detailed-endpoint-documentation) + - [1. GET /api/repositories/commits](#1-get-apirepositoriescommits) + - [2. GET /api/repositories/heatmap](#2-get-apirepositoriesheatmap) + - [3. GET /api/repositories/contributors](#3-get-apirepositoriescontributors) + - [4. GET /api/repositories/churn](#4-get-apirepositorieschurn) + - [5. GET /api/repositories/summary](#5-get-apirepositoriessummary) + - [6. GET /api/repositories/full-data](#6-get-apirepositories full-data) +- [Migration Patterns](#migration-patterns) +- [Query Parameter Guidelines](#query-parameter-guidelines) +- [Response Structure Changes](#response-structure-changes) +- [Error Handling](#error-handling) +- [Testing Recommendations](#testing-recommendations) +- [Common Pitfalls](#common-pitfalls) + +--- + +## API Endpoint Changes + +### Complete Endpoint Mapping + +| **Old Endpoint** | **New Endpoint** | **Method** | **Key Differences** | +|------------------|------------------|------------|---------------------| +| `POST /api/repositories` | `GET /api/repositories/commits` | POST→GET | Pagination added | +| `POST /api/repositories/heatmap` | `GET /api/repositories/heatmap` | POST→GET | Query params | +| `POST /api/repositories/contributors` | `GET /api/repositories/contributors` | POST→GET | Filters | +| `POST /api/repositories/churn` | `GET /api/repositories/churn` | POST→GET | Churn filters | +| `POST /api/repositories/full-data` | `GET /api/repositories/full-data` | POST→GET | Pagination | +| `GET /api/repositories/summary` | `GET /api/repositories/summary` | No change | Improved caching | + +--- + +## Detailed Endpoint Documentation + +### 1. GET /api/repositories/commits + +**Purpose**: Retrieve paginated commit history for a repository. + +**Query Parameters**: + +```typescript +{ + repoUrl: string; // Required - Git repository URL + page?: number; // Optional - Page number (default: 1) + limit?: number; // Optional - Items per page (default: 100) +} +``` + +**Example Request**: + +```bash +GET /api/repositories/commits?repoUrl=https://github.com/jonasyr/gitray.git&page=1&limit=50 +``` + +**Response Structure**: + +```typescript +{ + commits: Commit[]; // Array of commit objects + page: number; // Current page number + limit: number; // Items per page +} +``` + +**Sample Response**: + +```json +{ + "commits": [ + { + "sha": "abc123...", + "message": "feat: add new feature", + "author": { + "name": "Jonas", + "email": "jonas@example.com" + }, + "date": "2024-12-01T10:30:00Z", + "stats": { + "additions": 150, + "deletions": 30 + } + } + ], + "page": 1, + "limit": 50 +} +``` + +**Migration Example**: + +```typescript +// OLD (POST) +const response = await fetch('/api/repositories', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ repoUrl }) +}); + +// NEW (GET) +const params = new URLSearchParams({ + repoUrl, + page: '1', + limit: '50' +}); +const response = await fetch(`/api/repositories/commits?${params}`); +const { commits, page, limit } = await response.json(); +``` + +--- + +### 2. GET /api/repositories/heatmap + +**Purpose**: Retrieve commit activity heatmap data with optional filters. + +**Query Parameters**: + +```typescript +{ + repoUrl: string; // Required - Git repository URL + author?: string; // Optional - Filter by single author + authors?: string; // Optional - Comma-separated author list + fromDate?: string; // Optional - Start date (ISO 8601) + toDate?: string; // Optional - End date (ISO 8601) +} +``` + +**Example Request**: + +```bash +GET /api/repositories/heatmap?repoUrl=https://github.com/user/repo.git&fromDate=2024-01-01&toDate=2024-12-31 +``` + +**Response Structure**: + +```typescript +{ + heatmapData: { + timePeriod: 'day' | 'week' | 'month'; + data: Array<{ + date: string; // ISO 8601 date + count: number; // Commit count + authors: number; // Unique author count + }>; + metadata?: { + totalCommits: number; + dateRange: { start: string; end: string }; + }; + } +} +``` + +**Sample Response**: + +```json +{ + "heatmapData": { + "timePeriod": "day", + "data": [ + { "date": "2024-01-01", "count": 5, "authors": 2 }, + { "date": "2024-01-02", "count": 3, "authors": 1 } + ], + "metadata": { + "totalCommits": 480, + "dateRange": { + "start": "2024-01-01", + "end": "2024-12-31" + } + } + } +} +``` + +**Migration Example**: + +```typescript +// OLD (POST with nested filterOptions) +const response = await fetch('/api/repositories/heatmap', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + repoUrl, + filterOptions: { + author: 'john', + fromDate: '2024-01-01', + toDate: '2024-12-31' + } + }) +}); + +// NEW (GET with flat query params) +const params = new URLSearchParams({ repoUrl }); +if (author) params.append('author', author); +if (fromDate) params.append('fromDate', fromDate); +if (toDate) params.append('toDate', toDate); + +const response = await fetch(`/api/repositories/heatmap?${params}`); +const { heatmapData } = await response.json(); +``` + +--- + +### 3. GET /api/repositories/contributors + +**Purpose**: Retrieve top contributors with statistics and optional filters. + +**Query Parameters**: + +```typescript +{ + repoUrl: string; // Required - Git repository URL + author?: string; // Optional - Filter by single author + authors?: string; // Optional - Comma-separated author list + fromDate?: string; // Optional - Start date (ISO 8601) + toDate?: string; // Optional - End date (ISO 8601) +} +``` + +**Example Request**: + +```bash +GET /api/repositories/contributors?repoUrl=https://github.com/user/repo.git&fromDate=2024-01-01 +``` + +**Response Structure**: + +```typescript +{ + contributors: Array<{ + name: string; + email: string; + commits: number; + additions: number; + deletions: number; + percentage: number; // Contribution percentage + }> +} +``` + +**Sample Response**: + +```json +{ + "contributors": [ + { + "name": "Jonas", + "email": "jonas@example.com", + "commits": 280, + "additions": 15420, + "deletions": 3210, + "percentage": 58.3 + }, + { + "name": "Contributor2", + "email": "contrib@example.com", + "commits": 200, + "additions": 8500, + "deletions": 1200, + "percentage": 41.7 + } + ] +} +``` + +**Migration Example**: + +```typescript +// OLD (POST) +const response = await fetch('/api/repositories/contributors', { + method: 'POST', + body: JSON.stringify({ repoUrl, filterOptions }) +}); + +// NEW (GET) +const params = new URLSearchParams({ repoUrl }); +if (fromDate) params.append('fromDate', fromDate); +if (toDate) params.append('toDate', toDate); + +const response = await fetch(`/api/repositories/contributors?${params}`); +const { contributors } = await response.json(); +``` + +--- + +### 4. GET /api/repositories/churn + +**Purpose**: Retrieve code churn analysis showing file change frequency. + +**Query Parameters**: + +```typescript +{ + repoUrl: string; // Required - Git repository URL + fromDate?: string; // Optional - Analysis start date (ISO 8601) + toDate?: string; // Optional - Analysis end date (ISO 8601) + minChanges?: string; // Optional - Minimum changes filter (numeric) + extensions?: string; // Optional - Comma-separated file extensions (e.g., 'ts,tsx,js') +} +``` + +**Example Request**: + +```bash +GET /api/repositories/churn?repoUrl=https://github.com/user/repo.git&minChanges=10&extensions=ts,tsx +``` + +**Response Structure**: + +```typescript +{ + churnData: { + files: Array<{ + path: string; + additions: number; + deletions: number; + changes: number; + riskLevel: 'low' | 'medium' | 'high' | 'critical'; + }>; + summary: { + totalFiles: number; + highRiskFiles: number; + averageChanges: number; + }; + metadata: { + dateRange: { start: string; end: string }; + filters: { + minChanges?: number; + extensions?: string[]; + }; + }; + } +} +``` + +**Sample Response**: + +```json +{ + "churnData": { + "files": [ + { + "path": "src/services/cache.ts", + "additions": 450, + "deletions": 120, + "changes": 570, + "riskLevel": "high" + } + ], + "summary": { + "totalFiles": 87, + "highRiskFiles": 12, + "averageChanges": 45.3 + } + } +} +``` + +**Migration Example**: + +```typescript +// OLD (POST) +const response = await fetch('/api/repositories/churn', { + method: 'POST', + body: JSON.stringify({ repoUrl, filterOptions }) +}); + +// NEW (GET with churn-specific params) +const params = new URLSearchParams({ repoUrl }); +if (minChanges) params.append('minChanges', minChanges.toString()); +if (extensions && extensions.length > 0) { + params.append('extensions', extensions.join(',')); +} +if (fromDate) params.append('fromDate', fromDate); + +const response = await fetch(`/api/repositories/churn?${params}`); +const { churnData } = await response.json(); +``` + +--- + +### 5. GET /api/repositories/summary + +**Purpose**: Retrieve repository metadata and statistics. + +**Query Parameters**: + +```typescript +{ + repoUrl: string; // Required - Git repository URL +} +``` + +**Example Request**: + +```bash +GET /api/repositories/summary?repoUrl=https://github.com/jonasyr/gitray.git +``` + +**Response Structure**: + +```typescript +{ + summary: { + repository: { + name: string; + owner: string; + url: string; + platform: 'github' | 'gitlab' | 'bitbucket' | 'other'; + defaultBranch?: string; + }; + created: { + date: string; // ISO 8601 + source: 'git-log' | 'github-api' | 'gitlab-api' | 'estimated'; + }; + age: { + years: number; + months: number; + formatted: string; // e.g., "2.5y" + }; + lastCommit: { + date: string; // ISO 8601 + relativeTime: string; // e.g., "2 days ago" + sha: string; + author: string; + }; + stats: { + totalCommits: number; // ⚠️ Important: nested under stats + contributors: number; // ⚠️ Important: nested under stats + status: 'active' | 'inactive' | 'archived'; + }; + metadata: { + cached: boolean; + dataSource: 'git-sparse-clone' | 'cache'; + createdDateAccuracy: 'exact' | 'approximate'; + bandwidthSaved?: string; + lastUpdated: string; // ISO 8601 + }; + } +} +``` + +**Sample Response**: + +```json +{ + "summary": { + "repository": { + "name": "gitray", + "owner": "jonasyr", + "url": "https://github.com/jonasyr/gitray.git", + "platform": "github" + }, + "stats": { + "totalCommits": 480, + "contributors": 6, + "status": "active" + }, + "lastCommit": { + "date": "2024-12-02T08:15:00Z", + "relativeTime": "2 hours ago", + "sha": "abc123def456", + "author": "Jonas" + }, + "metadata": { + "cached": true, + "dataSource": "cache" + } + } +} +``` + +**⚠️ Critical Migration Note**: + +```typescript +// ❌ WRONG - Old structure (will be undefined) +const totalCommits = response.totalCommits; +const contributors = response.totalContributors; + +// ✅ CORRECT - New nested structure +const totalCommits = response.summary.stats.totalCommits; +const contributors = response.summary.stats.contributors; // Note: field is 'contributors', not 'totalContributors' +``` + +--- + +### 6. GET /api/repositories/full-data + +**Purpose**: Retrieve both commits and heatmap data in a single request with pagination and filters. + +**Query Parameters**: + +```typescript +{ + repoUrl: string; // Required - Git repository URL + page?: number; // Optional - Page number (default: 1) + limit?: number; // Optional - Items per page (default: 100) + author?: string; // Optional - Filter by single author + authors?: string; // Optional - Comma-separated author list + fromDate?: string; // Optional - Start date (ISO 8601) + toDate?: string; // Optional - End date (ISO 8601) +} +``` + +**Example Request**: + +```bash +GET /api/repositories/full-data?repoUrl=https://github.com/user/repo.git&page=1&limit=20&fromDate=2024-01-01 +``` + +**Response Structure**: + +```typescript +{ + commits: Commit[]; // Paginated commits + heatmapData: CommitHeatmapData; // Filtered heatmap data + page: number; + limit: number; + isValidHeatmap: boolean; // Backend validation flag +} +``` + +**Sample Response**: + +```json +{ + "commits": [ + { + "sha": "abc123", + "message": "Initial commit", + "author": { "name": "Jonas", "email": "jonas@example.com" }, + "date": "2024-01-01T10:00:00Z" + } + ], + "heatmapData": { + "timePeriod": "day", + "data": [ + { "date": "2024-01-01", "count": 1, "authors": 1 } + ] + }, + "page": 1, + "limit": 20, + "isValidHeatmap": true +} +``` + +**Migration Example**: + +```typescript +// OLD (POST) +const response = await fetch('/api/repositories/full-data', { + method: 'POST', + body: JSON.stringify({ + repoUrl, + timePeriod: 'month', + filterOptions: { fromDate, toDate } + }) +}); + +// NEW (GET) +const params = new URLSearchParams({ + repoUrl, + page: '1', + limit: '100' +}); +if (fromDate) params.append('fromDate', fromDate); +if (toDate) params.append('toDate', toDate); + +const response = await fetch(`/api/repositories/full-data?${params}`); +const { commits, heatmapData, page, limit } = await response.json(); +``` + +--- + +## Migration Patterns + +### Pattern 1: Basic POST → GET Migration + +```typescript +// Before +async function fetchData(repoUrl: string) { + const response = await apiClient.post('/api/repositories', { repoUrl }); + return response.data; +} + +// After +async function fetchData(repoUrl: string) { + const params = new URLSearchParams({ repoUrl }); + const response = await apiClient.get('/api/repositories/commits', { params }); + return response.data; +} +``` + +### Pattern 2: Handling Optional Filters + +```typescript +function buildQueryParams( + repoUrl: string, + filters?: { + author?: string; + authors?: string[]; + fromDate?: string; + toDate?: string; + } +): URLSearchParams { + const params = new URLSearchParams({ repoUrl }); + + if (filters?.author) { + params.append('author', filters.author); + } + + if (filters?.authors && filters.authors.length > 0) { + params.append('authors', filters.authors.join(',')); + } + + if (filters?.fromDate) { + params.append('fromDate', filters.fromDate); + } + + if (filters?.toDate) { + params.append('toDate', filters.toDate); + } + + return params; +} + +// Usage +const params = buildQueryParams(repoUrl, { fromDate: '2024-01-01' }); +const response = await fetch(`/api/repositories/heatmap?${params}`); +``` + +### Pattern 3: Pagination Helper + +```typescript +interface PaginationParams { + page?: number; + limit?: number; +} + +function addPaginationParams( + params: URLSearchParams, + pagination?: PaginationParams +): void { + const page = pagination?.page ?? 1; + const limit = pagination?.limit ?? 100; + + params.append('page', page.toString()); + params.append('limit', limit.toString()); +} + +// Usage +const params = new URLSearchParams({ repoUrl }); +addPaginationParams(params, { page: 2, limit: 50 }); +const response = await fetch(`/api/repositories/commits?${params}`); +``` + +### Pattern 4: Error Handling + +```typescript +async function fetchWithErrorHandling( + endpoint: string, + params: URLSearchParams +): Promise { + try { + const response = await fetch(`${endpoint}?${params}`); + + if (!response.ok) { + const error = await response.json(); + throw new Error(error.message || `HTTP ${response.status}`); + } + + return await response.json(); + } catch (error) { + console.error(`Failed to fetch ${endpoint}:`, error); + throw error; + } +} + +// Usage +const params = new URLSearchParams({ repoUrl }); +const data = await fetchWithErrorHandling('/api/repositories/summary', params); +``` + +--- + +## Query Parameter Guidelines + +### Arrays (authors, extensions) + +**Convert arrays to comma-separated strings**: + +```typescript +// Array to comma-separated string +const authors = ['alice', 'bob', 'charlie']; +params.append('authors', authors.join(',')); // 'alice,bob,charlie' + +const extensions = ['ts', 'tsx', 'js']; +params.append('extensions', extensions.join(',')); // 'ts,tsx,js' +``` + +### Dates (fromDate, toDate) + +**Use ISO 8601 format**: + +```typescript +// Correct date formats +params.append('fromDate', '2024-01-01'); +params.append('toDate', '2024-12-31'); + +// Also accepts full ISO 8601 +params.append('fromDate', '2024-01-01T00:00:00Z'); +``` + +### Numbers (page, limit, minChanges) + +**Convert numbers to strings**: + +```typescript +params.append('page', page.toString()); +params.append('limit', limit.toString()); +params.append('minChanges', minChanges.toString()); +``` + +### Conditional Parameters + +**Only include defined values**: + +```typescript +// Good - only includes defined values +if (author) params.append('author', author); +if (fromDate) params.append('fromDate', fromDate); + +// Bad - includes undefined +params.append('author', author || ''); // ❌ Don't do this +``` + +--- + +## Response Structure Changes + +### Summary Endpoint - Nested Stats + +**Critical**: The `summary` endpoint now returns deeply nested data. + +```typescript +// ❌ WRONG - Old pattern (undefined) +interface OldResponse { + totalCommits: number; + totalContributors: number; + status: string; +} + +// ✅ CORRECT - New pattern +interface NewResponse { + summary: { + repository: { name: string; owner: string; url: string; platform: string }; + stats: { + totalCommits: number; // Access via response.summary.stats.totalCommits + contributors: number; // Note: 'contributors' not 'totalContributors' + status: string; + }; + lastCommit: { date: string; sha: string; author: string }; + metadata: { cached: boolean }; + }; +} + +// Migration example +function getTotalCommits(response: NewResponse): number { + return response.summary?.stats?.totalCommits ?? 0; +} +``` + +### Heatmap Data - Always an Object + +```typescript +// Backend returns this structure +interface HeatmapResponse { + heatmapData: { + timePeriod: string; + data: Array<{ date: string; count: number }>; + metadata?: { totalCommits: number }; + }; +} + +// Access pattern +const dataPoints = response.heatmapData.data.length; +const totalCommits = response.heatmapData.metadata?.totalCommits; +``` + +### Full-Data - Validation Flag + +```typescript +interface FullDataResponse { + commits: Commit[]; + heatmapData: CommitHeatmapData; + isValidHeatmap: boolean; // Backend validation result +} + +// Always check validation flag +if (response.isValidHeatmap) { + renderHeatmap(response.heatmapData); +} else { + console.warn('Invalid heatmap data structure'); +} +``` + +--- + +## Error Handling + +### HTTP Status Codes + +| Code | Meaning | Common Causes | +|------|---------|---------------| +| `400` | Bad Request | Missing `repoUrl`, invalid date format, invalid URL | +| `404` | Not Found | Wrong endpoint path, typo in URL | +| `422` | Validation Error | Invalid query parameter values | +| `500` | Server Error | Cache failure, Git operation error | +| `504` | Gateway Timeout | Large repository taking too long | + +### Validation Errors + +```typescript +// Example validation error response +{ + "error": "Validation failed", + "details": [ + { + "field": "repoUrl", + "message": "Invalid URL format" + }, + { + "field": "fromDate", + "message": "Invalid date format, use YYYY-MM-DD" + } + ] +} +``` + +### Error Handling Pattern + +```typescript +async function handleApiCall( + endpoint: string, + params: URLSearchParams +): Promise { + try { + const response = await fetch(`${endpoint}?${params}`); + + if (response.status === 400) { + const error = await response.json(); + console.error('Validation error:', error.details); + return null; + } + + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${response.statusText}`); + } + + return await response.json(); + } catch (error) { + console.error('API call failed:', error); + return null; + } +} +``` + +--- + +## Testing Recommendations + +### 1. Test with Real Repository + +Use the GitRay repository for testing: + +```bash +curl "http://localhost:3001/api/repositories/summary?repoUrl=https://github.com/jonasyr/gitray.git" +``` + +**Expected Results**: + +- `stats.totalCommits`: 480 +- `stats.contributors`: 6 +- `stats.status`: "active" + +### 2. Test Pagination + +```bash +# Page 1 +curl "http://localhost:3001/api/repositories/commits?repoUrl=https://github.com/jonasyr/gitray.git&page=1&limit=10" + +# Page 2 +curl "http://localhost:3001/api/repositories/commits?repoUrl=https://github.com/jonasyr/gitray.git&page=2&limit=10" +``` + +### 3. Test Filters + +```bash +# Date range filter +curl "http://localhost:3001/api/repositories/heatmap?repoUrl=https://github.com/jonasyr/gitray.git&fromDate=2024-01-01&toDate=2024-12-31" + +# Author filter +curl "http://localhost:3001/api/repositories/contributors?repoUrl=https://github.com/jonasyr/gitray.git&author=jonas" + +# Multiple authors +curl "http://localhost:3001/api/repositories/heatmap?repoUrl=https://github.com/jonasyr/gitray.git&authors=jonas,contributor2" +``` + +### 4. Test Error Cases + +```bash +# Missing repoUrl +curl "http://localhost:3001/api/repositories/summary" +# Expected: HTTP 400 + +# Invalid date +curl "http://localhost:3001/api/repositories/heatmap?repoUrl=https://github.com/jonasyr/gitray.git&fromDate=invalid" +# Expected: HTTP 400 +``` + +### 5. Automated Test Checklist + +- [ ] All endpoints return HTTP 200 with valid params +- [ ] Pagination works correctly (page 1, 2, 3) +- [ ] Date filters reduce result set appropriately +- [ ] Author filters return subset of commits +- [ ] Multiple authors filter works (comma-separated) +- [ ] Invalid parameters return HTTP 400 +- [ ] Missing `repoUrl` returns HTTP 400 +- [ ] Response structures match documented types +- [ ] `summary.stats.totalCommits` accessible and correct +- [ ] Heatmap data has `timePeriod` and `data` fields +- [ ] Full-data returns both `commits` and `heatmapData` + +--- + +## Common Pitfalls + +### 1. Using POST Instead of GET + +```typescript +// ❌ WRONG - Will get HTTP 404 +fetch('/api/repositories/commits', { + method: 'POST', + body: JSON.stringify({ repoUrl }) +}); + +// ✅ CORRECT +const params = new URLSearchParams({ repoUrl }); +fetch(`/api/repositories/commits?${params}`); +``` + +### 2. Accessing Top-Level Fields in Summary + +```typescript +// ❌ WRONG - Returns undefined +const commits = response.totalCommits; + +// ✅ CORRECT - Access nested field +const commits = response.summary.stats.totalCommits; +``` + +### 3. Incorrect Field Name + +```typescript +// ❌ WRONG - Field doesn't exist +const count = response.summary.stats.totalContributors; + +// ✅ CORRECT - Field is 'contributors' +const count = response.summary.stats.contributors; +``` + +### 4. Arrays as JSON in Query Params + +```typescript +// ❌ WRONG - Don't stringify arrays +params.append('authors', JSON.stringify(['alice', 'bob'])); + +// ✅ CORRECT - Comma-separated string +params.append('authors', ['alice', 'bob'].join(',')); +``` + +### 5. Not Handling Optional Parameters + +```typescript +// ❌ WRONG - Includes undefined +params.append('author', author); // If author is undefined + +// ✅ CORRECT - Conditional inclusion +if (author) params.append('author', author); +``` + +### 6. Incorrect Date Format + +```typescript +// ❌ WRONG - Invalid format +params.append('fromDate', '12/01/2024'); + +// ✅ CORRECT - ISO 8601 format +params.append('fromDate', '2024-12-01'); +``` + +--- + +## Performance Considerations + +### Cache Behavior + +The backend uses multi-tier caching: + +- **Memory tier**: ~1ms response time +- **Disk tier**: ~10-50ms response time +- **Redis tier**: ~50-100ms response time +- **Git clone**: 5-30 seconds (first request only) + +**Recommendations**: + +- First request will be slow (Git clone) +- Subsequent requests with same parameters are fast (cache hit) +- Different filter combinations create separate cache entries +- Don't make unnecessary duplicate requests + +### Pagination Best Practices + +```typescript +// Good - Use reasonable page sizes +const limit = 50; // ✅ Balanced + +// Avoid - Too small or too large +const limit = 1; // ❌ Too many requests +const limit = 10000; // ❌ Memory issues +``` + +--- + +## Summary Checklist + +Use this checklist when migrating your frontend: + +### Endpoints + +- [ ] Changed all POST requests to GET +- [ ] Updated endpoint paths (`/repositories` → `/repositories/commits`) +- [ ] Moved request body to query parameters + +### Parameters + +- [ ] Arrays converted to comma-separated strings +- [ ] Dates in ISO 8601 format (`YYYY-MM-DD`) +- [ ] Numbers converted to strings for query params +- [ ] Conditional parameters only included if defined + +### Response Handling + +- [ ] Updated to access `response.summary.stats.totalCommits` +- [ ] Using `contributors` instead of `totalContributors` +- [ ] Handling nested `summary` object structure +- [ ] Validating `isValidHeatmap` flag in full-data endpoint + +### Error Management + +- [ ] Handling HTTP 400 for validation errors +- [ ] Handling HTTP 404 for incorrect endpoints +- [ ] Graceful degradation on server errors +- [ ] Logging errors for debugging + +### Testing + +- [ ] Tested all endpoints with valid parameters +- [ ] Tested pagination (multiple pages) +- [ ] Tested filters (author, date range) +- [ ] Tested error cases (missing params, invalid format) +- [ ] Verified response structures match documented types + +--- + +## Additional Resources + +- **Backend Repository Routes**: `apps/backend/src/routes/repositoryRoutes.ts` +- **Shared Types Package**: `packages/shared-types/src/index.ts` +- **API Test Script**: `test-api-phase1.sh` +- **Test Scenarios Documentation**: `scripts/api_test_scenarios.md` + +--- + +## Questions or Issues? + +If you encounter problems during migration: + +1. **Check backend logs** - Detailed error messages are logged +2. **Verify query parameters** - Use browser DevTools Network tab +3. **Test with curl** - Isolate frontend vs backend issues +4. **Review response structure** - Compare against documented types +5. **Check SonarQube** - Code quality issues may surface + +For the most up-to-date backend implementation, always refer to the source code in `apps/backend/src/routes/repositoryRoutes.ts`. diff --git a/apps/backend/__tests__/unit/routes/repositoryRoutes.unit.test.ts b/apps/backend/__tests__/unit/routes/repositoryRoutes.unit.test.ts index 857d47ce..cd327f82 100644 --- a/apps/backend/__tests__/unit/routes/repositoryRoutes.unit.test.ts +++ b/apps/backend/__tests__/unit/routes/repositoryRoutes.unit.test.ts @@ -3,107 +3,136 @@ import request from 'supertest'; import express, { Application } from 'express'; // Mock all external dependencies BEFORE imports -const mockGitService = { - getCommits: vi.fn(), - aggregateCommitsByTime: vi.fn(), - getTopContributors: vi.fn(), - analyzeCodeChurn: vi.fn(), +const mockRepositoryCache = { + getCachedCommits: vi.fn(), + getCachedAggregatedData: vi.fn(), + getCachedContributors: vi.fn(), + getCachedChurnData: vi.fn(), + getCachedSummary: vi.fn(), }; -const mockRedis = { - get: vi.fn(), - set: vi.fn(), -}; - -const mockWithTempRepository = vi.fn(); - const mockMetrics = { recordFeatureUsage: vi.fn(), recordEnhancedCacheOperation: vi.fn(), - recordDataFreshness: vi.fn(), getUserType: vi.fn(), getRepositorySizeCategory: vi.fn(), }; -const mockRepositorySummaryService = { - getRepositorySummary: vi.fn(), -}; - -// Create middleware function that can be chained -const createValidationMiddleware = () => { - const middleware = vi.fn((req: any, res: any, next: any) => next()) as any; - middleware.isURL = vi.fn(() => middleware); - middleware.withMessage = vi.fn(() => middleware); - middleware.matches = vi.fn(() => middleware); - middleware.optional = vi.fn(() => middleware); - middleware.isObject = vi.fn(() => middleware); - middleware.custom = vi.fn(() => middleware); - return middleware; +const mockLogger = { + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), }; -// Mock modules with proper middleware functions -vi.mock('../../../src/services/gitService', () => ({ - __esModule: true, - gitService: mockGitService, -})); - -vi.mock('../../../src/services/cache', () => ({ - __esModule: true, - default: mockRedis, -})); - -vi.mock('../../../src/utils/withTempRepository', () => ({ - __esModule: true, - withTempRepository: mockWithTempRepository, -})); +// Mock modules +vi.mock('../../../src/services/repositoryCache', () => mockRepositoryCache); -vi.mock('../../../src/services/metrics', () => ({ - __esModule: true, - ...mockMetrics, -})); +vi.mock('../../../src/services/metrics', () => mockMetrics); -vi.mock('express-validator', () => ({ - __esModule: true, - body: vi.fn(() => createValidationMiddleware()), +vi.mock('../../../src/services/logger', () => ({ + getLogger: () => mockLogger, + createRequestLogger: vi.fn(() => mockLogger), })); vi.mock('../../../src/middlewares/validation', () => ({ - __esModule: true, - handleValidationErrors: vi.fn((req: any, res: any, next: any) => next()), isSecureGitUrl: vi.fn(() => Promise.resolve(true)), + handleValidationErrorsWithResponse: vi.fn((req: any, res: any, next: any) => + next() + ), + repoUrlValidation: vi.fn(() => []), + paginationValidation: vi.fn(() => []), + dateValidation: vi.fn(() => []), + authorValidation: vi.fn(() => []), + churnValidation: vi.fn(() => []), })); -vi.mock('../../../src/services/repositorySummaryService', () => ({ - __esModule: true, - repositorySummaryService: mockRepositorySummaryService, +// Mock utility modules +vi.mock('../../../src/utils/routeHelpers', () => ({ + buildCommitFilters: vi.fn((query) => { + const filters: any = {}; + if (query.author) filters.author = query.author; + if (query.authors) filters.authors = query.authors; + if (query.fromDate) filters.fromDate = query.fromDate; + if (query.toDate) filters.toDate = query.toDate; + return filters; + }), + buildChurnFilters: vi.fn((query) => { + const filters: any = {}; + if (query.minChanges !== undefined) + filters.minChanges = parseInt(query.minChanges); + if (query.extensions) filters.extensions = query.extensions; + if (query.since) filters.since = query.since; + if (query.until) filters.until = query.until; + return filters; + }), + extractPaginationParams: vi.fn((query) => ({ + page: parseInt(query.page as string) || 1, + limit: parseInt(query.limit as string) || 100, + skip: + ((parseInt(query.page as string) || 1) - 1) * + (parseInt(query.limit as string) || 100), + })), + extractFilterParams: vi.fn((query) => ({ + author: query.author, + authors: query.authors, + fromDate: query.fromDate, + toDate: query.toDate, + })), + setupRouteRequest: vi.fn((req) => ({ + logger: mockLogger, + repoUrl: req.query.repoUrl as string, + userType: 'anonymous', + })), + recordRouteSuccess: vi.fn(), + recordRouteError: vi.fn(), })); -vi.mock('@gitray/shared-types', () => { - const TIME = { - SECOND: 1000, - MINUTE: 60 * 1000, - HOUR: 60 * 60 * 1000, - DAY: 24 * 60 * 60 * 1000, - WEEK: 7 * 24 * 60 * 60 * 1000, - }; - - class GitrayError extends Error { - constructor( - message: string, - public readonly statusCode: number = 500, - public readonly code?: string - ) { - super(message); - this.name = 'GitrayError'; - } - } +vi.mock('../../../src/utils/repositoryRouteFactory', () => ({ + createCachedRouteHandler: vi.fn((featureName, processor, buildMetrics) => [ + async (req: any, res: any, next: any) => { + try { + const logger = mockLogger; + const repoUrl = req.query.repoUrl as string; + const userType = 'anonymous'; + + // Validate repoUrl is present (simple validation for testing) + if (!repoUrl) { + return res.status(400).json({ + error: 'Validation failed', + code: 'VALIDATION_ERROR', + errors: [ + { msg: 'repoUrl query parameter is required', param: 'repoUrl' }, + ], + }); + } + + const result = await processor({ req, logger, repoUrl, userType }); + const metrics = buildMetrics ? buildMetrics(result) : {}; + + mockMetrics.recordFeatureUsage(featureName, userType, true, 'api_call'); + res.status(200).json(result); + } catch (error: any) { + mockMetrics.recordFeatureUsage( + featureName, + 'anonymous', + false, + 'api_call' + ); + next(error); + } + }, + ]), + buildRepoValidationChain: vi.fn(() => []), +})); - class ValidationError extends GitrayError { +vi.mock('@gitray/shared-types', () => { + class ValidationError extends Error { constructor( message: string, public readonly errors?: any[] ) { - super(message, 400, 'VALIDATION_ERROR'); + super(message); this.name = 'ValidationError'; } } @@ -118,20 +147,18 @@ vi.mock('@gitray/shared-types', () => { BAD_REQUEST: 400, INTERNAL_SERVER_ERROR: 500, }, - TIME, - RATE_LIMIT: { - WINDOW_MS: 15 * TIME.MINUTE, - MAX_REQUESTS: 100, - MESSAGE: 'Too many requests from this IP, please try again later.', - }, - GitrayError, ValidationError, CommitFilterOptions: {}, ChurnFilterOptions: {}, + GIT_SERVICE: { + MAX_CONCURRENT_PROCESSES: 3, + CLONE_DEPTH: 50, + TIMEOUT_MS: 30000, + }, }; }); -describe('RepositoryRoutes Unit Tests', () => { +describe('RepositoryRoutes Unit Tests (Refactored with Unified Cache)', () => { let app: Application; beforeEach(async () => { @@ -140,9 +167,6 @@ describe('RepositoryRoutes Unit Tests', () => { // Set up default mock returns mockMetrics.getUserType.mockReturnValue('anonymous'); mockMetrics.getRepositorySizeCategory.mockReturnValue('medium'); - mockMetrics.recordFeatureUsage.mockResolvedValue(undefined); - mockMetrics.recordEnhancedCacheOperation.mockResolvedValue(undefined); - mockMetrics.recordDataFreshness.mockResolvedValue(undefined); // Set up Express app app = express(); @@ -152,13 +176,16 @@ describe('RepositoryRoutes Unit Tests', () => { const { default: repositoryRoutes } = await import( '../../../src/routes/repositoryRoutes' ); - app.use('/', repositoryRoutes); + app.use('/api/repositories', repositoryRoutes); // Add error handler - app.use((err: any, req: any, res: any) => { - res.status(err.status || 500).json({ - error: err.message || 'Internal server error', - }); + app.use((err: any, req: any, res: any, next: any) => { + if (!res.headersSent) { + res.status(err.statusCode || 500).json({ + error: err.message || 'Internal server error', + code: err.code || 'INTERNAL_ERROR', + }); + } }); }); @@ -166,8 +193,8 @@ describe('RepositoryRoutes Unit Tests', () => { vi.resetModules(); }); - describe('POST / - Get Repository Commits', () => { - test('should return cached commits when cache hit occurs', async () => { + describe('GET /commits - Repository Commits with Unified Cache', () => { + test('should return commits using unified cache service', async () => { // ARRANGE const mockCommits = [ { @@ -178,25 +205,28 @@ describe('RepositoryRoutes Unit Tests', () => { authorEmail: 'test@example.com', }, ]; - const repoUrl = 'https://github.com/user/repo.git'; - mockRedis.get.mockResolvedValue(JSON.stringify(mockCommits)); + mockRepositoryCache.getCachedCommits.mockResolvedValue(mockCommits); // ACT - const response = await request(app).post('/').send({ repoUrl }); + const response = await request(app).get( + '/api/repositories/commits?repoUrl=https://github.com/test/repo&page=1&limit=100' + ); // ASSERT expect(response.status).toBe(200); - expect(response.body).toEqual({ commits: mockCommits }); - expect(mockRedis.get).toHaveBeenCalledWith(`commits:${repoUrl}`); - expect(mockWithTempRepository).not.toHaveBeenCalled(); - expect(mockMetrics.recordEnhancedCacheOperation).toHaveBeenCalledWith( - 'commits', - true, - expect.any(Object), - repoUrl, - mockCommits.length + expect(response.body).toHaveProperty('commits'); + expect(response.body.commits).toEqual(mockCommits); + expect(response.body.page).toBe(1); + expect(response.body.limit).toBe(100); + + // Verify unified cache was called with correct parameters + expect(mockRepositoryCache.getCachedCommits).toHaveBeenCalledWith( + 'https://github.com/test/repo', + { skip: 0, limit: 100 } ); + + // Verify metrics were recorded expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( 'repository_commits', 'anonymous', @@ -205,1182 +235,292 @@ describe('RepositoryRoutes Unit Tests', () => { ); }); - test('should fetch and cache commits when cache miss occurs', async () => { - // ARRANGE - const mockCommits = [ - { - sha: 'def456', - message: 'New commit', - date: '2023-01-02T00:00:00Z', - authorName: 'Developer', - authorEmail: 'dev@example.com', - }, - ]; - const repoUrl = 'https://github.com/user/repo.git'; - - mockRedis.get.mockResolvedValue(null); - mockWithTempRepository.mockResolvedValue(mockCommits); - mockRedis.set.mockResolvedValue('OK'); - + test('should validate repoUrl is required', async () => { // ACT - const response = await request(app).post('/').send({ repoUrl }); + const response = await request(app).get('/api/repositories/commits'); // ASSERT - expect(response.status).toBe(200); - expect(response.body).toEqual({ commits: mockCommits }); - expect(mockWithTempRepository).toHaveBeenCalledWith( - repoUrl, - expect.any(Function) - ); - expect(mockRedis.set).toHaveBeenCalledWith( - `commits:${repoUrl}`, - JSON.stringify(mockCommits), - 'EX', - 3600 - ); - expect(mockMetrics.recordEnhancedCacheOperation).toHaveBeenCalledWith( - 'commits', - false, - expect.any(Object), - repoUrl, - mockCommits.length - ); + expect(response.status).toBe(400); + expect(response.body).toHaveProperty('error'); + expect(response.body.code).toBe('VALIDATION_ERROR'); }); - test('should handle repository fetch errors and record failed feature usage', async () => { - // ARRANGE - const repoUrl = 'https://github.com/user/repo.git'; - const fetchError = new Error('Repository not found'); - - mockRedis.get.mockResolvedValue(null); - mockWithTempRepository.mockRejectedValue(fetchError); + test('should handle pagination parameters', async () => { + mockRepositoryCache.getCachedCommits.mockResolvedValue([]); // ACT - const response = await request(app).post('/').send({ repoUrl }); - - // ASSERT - expect(response.status).toBe(500); - expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( - 'repository_commits', - 'anonymous', - false, - 'api_call' + const response = await request(app).get( + '/api/repositories/commits?repoUrl=https://github.com/test/repo&page=3&limit=50' ); - }); - - test('should handle different user types for metrics', async () => { - // ARRANGE - const repoUrl = 'https://github.com/user/repo.git'; - mockMetrics.getUserType.mockReturnValue('premium'); - mockRedis.get.mockResolvedValue(JSON.stringify([])); - - // ACT - await request(app).post('/').send({ repoUrl }); // ASSERT - expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( - 'repository_commits', - 'premium', - true, - 'api_call' + expect(response.status).toBe(200); + expect(mockRepositoryCache.getCachedCommits).toHaveBeenCalledWith( + 'https://github.com/test/repo', + { skip: 100, limit: 50 } // (page-1) * limit = (3-1) * 50 = 100 ); }); }); - describe('POST /heatmap - Get Heatmap Data', () => { - test('should return cached heatmap data when cache hit occurs', async () => { + describe('GET /heatmap - Commit Heatmap with Unified Cache', () => { + test('should return heatmap data using unified cache service', async () => { // ARRANGE const mockHeatmapData = { - timePeriod: 'day', - data: [{ date: '2023-01-01', commits: 5 }], - metadata: { maxCommitCount: 5, totalCommits: 5 }, + timePeriod: 'month', + data: [ + { date: '2023-01', count: 10 }, + { date: '2023-02', count: 15 }, + ], + metadata: { totalCommits: 25 }, }; - const repoUrl = 'https://github.com/user/repo.git'; - const filterOptions = { author: 'testuser' }; - - mockRedis.get.mockResolvedValue(JSON.stringify(mockHeatmapData)); - // ACT - const response = await request(app) - .post('/heatmap') - .send({ repoUrl, filterOptions }); - - // ASSERT - expect(response.status).toBe(200); - expect(response.body).toEqual({ heatmapData: mockHeatmapData }); - expect(mockRedis.get).toHaveBeenCalledWith( - `heatmap:${repoUrl}:${JSON.stringify(filterOptions)}` + mockRepositoryCache.getCachedAggregatedData.mockResolvedValue( + mockHeatmapData ); - expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( - 'heatmap_view', - 'anonymous', - true, - 'api_call' - ); - }); - - test('should generate and cache heatmap data when cache miss occurs', async () => { - // ARRANGE - const repoUrl = 'https://github.com/user/repo.git'; - const filterOptions = { fromDate: '2023-01-01' }; - const mockCommits = [{ sha: 'abc123', date: '2023-01-01T12:00:00Z' }]; - const mockHeatmapData = { - timePeriod: 'day', - data: [{ date: '2023-01-01', commits: 1 }], - metadata: { maxCommitCount: 1, totalCommits: 1 }, - }; - - mockRedis.get.mockResolvedValue(null); - mockWithTempRepository.mockImplementation(async (url, callback) => { - return await callback('/tmp/repo'); - }); - mockGitService.getCommits.mockResolvedValue(mockCommits); - mockGitService.aggregateCommitsByTime.mockResolvedValue(mockHeatmapData); - mockRedis.set.mockResolvedValue('OK'); // ACT - const response = await request(app) - .post('/heatmap') - .send({ repoUrl, filterOptions }); + const response = await request(app).get( + '/api/repositories/heatmap?repoUrl=https://github.com/test/repo' + ); // ASSERT expect(response.status).toBe(200); - expect(response.body).toEqual({ heatmapData: mockHeatmapData }); - expect(mockGitService.aggregateCommitsByTime).toHaveBeenCalledWith( - mockCommits, - filterOptions - ); - expect(mockRedis.set).toHaveBeenCalledWith( - `heatmap:${repoUrl}:${JSON.stringify(filterOptions)}`, - JSON.stringify(mockHeatmapData), - 'EX', - 3600 - ); - }); - - test('should handle aggregation errors and record failed metrics', async () => { - // ARRANGE - const repoUrl = 'https://github.com/user/repo.git'; - const aggregationError = new Error('Aggregation failed'); - - mockRedis.get.mockResolvedValue(null); - mockWithTempRepository.mockRejectedValue(aggregationError); + expect(response.body).toHaveProperty('heatmapData'); + expect(response.body.heatmapData).toEqual(mockHeatmapData); - // ACT - const response = await request(app).post('/heatmap').send({ repoUrl }); - - // ASSERT - expect(response.status).toBe(500); - expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( - 'heatmap_view', - 'anonymous', - false, - 'api_call' + // Verify unified cache was called + expect(mockRepositoryCache.getCachedAggregatedData).toHaveBeenCalledWith( + 'https://github.com/test/repo', + { + author: undefined, + authors: undefined, + fromDate: undefined, + toDate: undefined, + } ); }); - test('should handle undefined filter options gracefully', async () => { - // ARRANGE - const repoUrl = 'https://github.com/user/repo.git'; - const mockHeatmapData = { timePeriod: 'day', data: [], metadata: {} }; - - mockRedis.get.mockResolvedValue(null); - mockWithTempRepository.mockImplementation(async (url, callback) => { - return await callback('/tmp/repo'); + test('should apply filter options from query parameters', async () => { + mockRepositoryCache.getCachedAggregatedData.mockResolvedValue({ + timePeriod: 'month', + data: [], + metadata: {}, }); - mockGitService.getCommits.mockResolvedValue([]); - mockGitService.aggregateCommitsByTime.mockResolvedValue(mockHeatmapData); // ACT - const response = await request(app).post('/heatmap').send({ repoUrl }); + const response = await request(app).get( + '/api/repositories/heatmap?repoUrl=https://github.com/test/repo&author=john&fromDate=2023-01-01&toDate=2023-12-31' + ); // ASSERT expect(response.status).toBe(200); - expect(mockGitService.aggregateCommitsByTime).toHaveBeenCalledWith( - [], - undefined + expect(mockRepositoryCache.getCachedAggregatedData).toHaveBeenCalledWith( + 'https://github.com/test/repo', + { + author: 'john', + authors: undefined, + fromDate: '2023-01-01', + toDate: '2023-12-31', + } ); }); }); - describe('POST /full-data - Get Combined Data', () => { - test('should return cached data when both commits and heatmap are cached', async () => { + describe('GET /contributors - Top Contributors with Unified Cache', () => { + test('should return contributors using unified cache service', async () => { // ARRANGE - const repoUrl = 'https://github.com/user/repo.git'; - const filterOptions = { author: 'testuser' }; - const mockCommits = [{ sha: 'abc123', message: 'Test' }]; - const mockHeatmapData = { timePeriod: 'day', data: [] }; - - mockRedis.get - .mockResolvedValueOnce(JSON.stringify(mockCommits)) - .mockResolvedValueOnce(JSON.stringify(mockHeatmapData)); - - // ACT - const response = await request(app) - .post('/full-data') - .send({ repoUrl, filterOptions }); + const mockContributors = [ + { + login: 'user1', + commitCount: 50, + linesAdded: 1000, + linesDeleted: 200, + contributionPercentage: 60, + }, + ]; - // ASSERT - expect(response.status).toBe(200); - expect(response.body).toEqual({ - commits: mockCommits, - heatmapData: mockHeatmapData, - }); - expect(mockRedis.get).toHaveBeenCalledWith(`commits:${repoUrl}`); - expect(mockRedis.get).toHaveBeenCalledWith( - `heatmap:${repoUrl}:${JSON.stringify(filterOptions)}` - ); - expect(mockWithTempRepository).not.toHaveBeenCalled(); - expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( - 'full_data_view', - 'anonymous', - true, - 'api_call' + mockRepositoryCache.getCachedContributors.mockResolvedValue( + mockContributors ); - }); - - test('should fetch and cache both data types when cache miss occurs', async () => { - // ARRANGE - const repoUrl = 'https://github.com/user/repo.git'; - const filterOptions = { fromDate: '2023-01-01' }; - const mockCommits = [{ sha: 'def456', message: 'New commit' }]; - const mockHeatmapData = { timePeriod: 'day', data: [{ commits: 1 }] }; - - mockRedis.get.mockResolvedValue(null); - mockWithTempRepository.mockImplementation(async (url, callback) => { - return await callback('/tmp/repo'); - }); - mockGitService.getCommits.mockResolvedValue(mockCommits); - mockGitService.aggregateCommitsByTime.mockResolvedValue(mockHeatmapData); - mockRedis.set.mockResolvedValue('OK'); // ACT - const response = await request(app) - .post('/full-data') - .send({ repoUrl, filterOptions }); - - // ASSERT - expect(response.status).toBe(200); - expect(response.body).toEqual({ - commits: mockCommits, - heatmapData: mockHeatmapData, - }); - expect(mockRedis.set).toHaveBeenCalledWith( - `commits:${repoUrl}`, - JSON.stringify(mockCommits), - 'EX', - 3600 - ); - expect(mockRedis.set).toHaveBeenCalledWith( - `heatmap:${repoUrl}:${JSON.stringify(filterOptions)}`, - JSON.stringify(mockHeatmapData), - 'EX', - 3600 + const response = await request(app).get( + '/api/repositories/contributors?repoUrl=https://github.com/test/repo' ); - expect(mockMetrics.recordEnhancedCacheOperation).toHaveBeenCalledTimes(2); - }); - - test('should handle partial cache hits correctly', async () => { - // ARRANGE - const repoUrl = 'https://github.com/user/repo.git'; - const mockCommits = [{ sha: 'cached', message: 'From cache' }]; - - // Only commits are cached, heatmap is not - mockRedis.get - .mockResolvedValueOnce(JSON.stringify(mockCommits)) - .mockResolvedValueOnce(null); - - mockWithTempRepository.mockImplementation(async (url, callback) => { - return await callback('/tmp/repo'); - }); - mockGitService.getCommits.mockResolvedValue(mockCommits); - mockGitService.aggregateCommitsByTime.mockResolvedValue({ - timePeriod: 'day', - data: [], - }); - - // ACT - const response = await request(app).post('/full-data').send({ repoUrl }); // ASSERT expect(response.status).toBe(200); - expect(mockWithTempRepository).toHaveBeenCalled(); - expect(mockMetrics.recordEnhancedCacheOperation).toHaveBeenCalledWith( - 'commits', - false, - expect.any(Object), - repoUrl, - mockCommits.length - ); - }); - - test('should handle data processing errors and record failures', async () => { - // ARRANGE - const repoUrl = 'https://github.com/user/repo.git'; - const processingError = new Error('Data processing failed'); + expect(response.body).toHaveProperty('contributors'); + expect(response.body.contributors).toEqual(mockContributors); - mockRedis.get.mockResolvedValue(null); - mockWithTempRepository.mockRejectedValue(processingError); - - // ACT - const response = await request(app).post('/full-data').send({ repoUrl }); - - // ASSERT - expect(response.status).toBe(500); - expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( - 'full_data_view', - 'anonymous', - false, - 'api_call' + expect(mockRepositoryCache.getCachedContributors).toHaveBeenCalledWith( + 'https://github.com/test/repo', + { + author: undefined, + authors: undefined, + fromDate: undefined, + toDate: undefined, + } ); }); }); - describe('Cache Operations', () => { - test('should handle cache get failures gracefully', async () => { - // ARRANGE - const repoUrl = 'https://github.com/user/repo.git'; - const cacheError = new Error('Cache connection failed'); - - mockRedis.get.mockRejectedValue(cacheError); - mockWithTempRepository.mockResolvedValue([]); - - // ACT - const response = await request(app).post('/').send({ repoUrl }); - - // ASSERT - expect(response.status).toBe(200); - expect(mockWithTempRepository).toHaveBeenCalled(); - }); - - test('should handle cache set failures without affecting response', async () => { + describe('GET /churn - Code Churn Analysis with Unified Cache', () => { + test('should return churn data using unified cache service', async () => { // ARRANGE - const repoUrl = 'https://github.com/user/repo.git'; - const mockCommits = [{ sha: 'abc123' }]; + const mockChurnData = { + files: [ + { + path: 'src/index.ts', + changes: 25, + risk: 'high', + }, + ], + metadata: { + totalFiles: 1, + totalChanges: 25, + riskThresholds: { high: 30, medium: 15, low: 0 }, + dateRange: { from: '2023-01-01', to: '2023-12-31' }, + highRiskCount: 1, + mediumRiskCount: 0, + lowRiskCount: 0, + analyzedAt: '2023-12-31T23:59:59Z', + }, + }; - mockRedis.get.mockResolvedValue(null); - mockWithTempRepository.mockResolvedValue(mockCommits); - mockRedis.set.mockRejectedValue(new Error('Cache write failed')); + mockRepositoryCache.getCachedChurnData.mockResolvedValue(mockChurnData); // ACT - const response = await request(app).post('/').send({ repoUrl }); + const response = await request(app).get( + '/api/repositories/churn?repoUrl=https://github.com/test/repo&minChanges=10' + ); // ASSERT expect(response.status).toBe(200); - expect(response.body).toEqual({ commits: mockCommits }); - }); - - test('should handle corrupted cache data gracefully', async () => { - // ARRANGE - const repoUrl = 'https://github.com/user/repo.git'; - - mockRedis.get.mockResolvedValue('invalid json data'); - mockWithTempRepository.mockResolvedValue([]); - - // ACT - const response = await request(app).post('/').send({ repoUrl }); + expect(response.body).toHaveProperty('churnData'); + expect(response.body.churnData).toEqual(mockChurnData); - // ASSERT - expect(response.status).toBe(200); - expect(mockWithTempRepository).toHaveBeenCalled(); + expect(mockRepositoryCache.getCachedChurnData).toHaveBeenCalledWith( + 'https://github.com/test/repo', + { + since: undefined, + until: undefined, + minChanges: 10, + extensions: undefined, + } + ); }); }); - describe('Metrics Recording', () => { - test('should record different repository size categories', async () => { + describe('GET /summary - Repository Summary with Unified Cache', () => { + test('should return repository summary using unified cache service', async () => { // ARRANGE - const repoUrl = 'https://github.com/user/large-repo.git'; - const largeCommitSet = Array(5000).fill({ sha: 'abc' }); + const mockSummary = { + repository: { + name: 'test-repo', + owner: 'test-owner', + url: 'https://github.com/test/repo', + platform: 'github', + }, + created: { + date: '2020-01-01T00:00:00Z', + source: 'first-commit', + }, + age: { + years: 4, + months: 0, + formatted: '4.0y', + }, + lastCommit: { + date: '2023-12-31T23:59:59Z', + relativeTime: '1 day ago', + sha: 'xyz789', + author: 'Test User', + }, + stats: { + totalCommits: 500, + contributors: 10, + status: 'active', + }, + metadata: { + cached: true, + dataSource: 'cache', + }, + }; - mockMetrics.getRepositorySizeCategory.mockReturnValue('large'); - mockRedis.get.mockResolvedValue(JSON.stringify(largeCommitSet)); + mockRepositoryCache.getCachedSummary.mockResolvedValue(mockSummary); // ACT - await request(app).post('/').send({ repoUrl }); - - // ASSERT - expect(mockMetrics.recordDataFreshness).toHaveBeenCalledWith( - 'commits', - 0, - 'hybrid', - 'large' + const response = await request(app).get( + '/api/repositories/summary?repoUrl=https://github.com/test/repo' ); - }); - - test('should record authenticated user metrics correctly', async () => { - // ARRANGE - const repoUrl = 'https://github.com/user/repo.git'; - - mockMetrics.getUserType.mockReturnValue('authenticated'); - mockRedis.get.mockResolvedValue(JSON.stringify([])); - - // ACT - await request(app).post('/heatmap').send({ repoUrl }); // ASSERT - expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( - 'heatmap_view', - 'authenticated', - true, - 'api_call' + expect(response.status).toBe(200); + expect(response.body).toHaveProperty('summary'); + expect(response.body.summary).toEqual(mockSummary); + + expect(mockRepositoryCache.getCachedSummary).toHaveBeenCalledWith( + 'https://github.com/test/repo' ); }); + }); - test('should handle metrics recording failures silently', async () => { + describe('GET /full-data - Combined Data with Unified Cache', () => { + test('should return both commits and heatmap using parallel cache calls', async () => { // ARRANGE - const repoUrl = 'https://github.com/user/repo.git'; + const mockCommits = [{ sha: 'abc123', message: 'Test' }]; + const mockHeatmapData = { + timePeriod: 'month', + data: [{ date: '2023-01', count: 10 }], + metadata: {}, + }; - mockMetrics.recordFeatureUsage.mockRejectedValue( - new Error('Metrics service down') + mockRepositoryCache.getCachedCommits.mockResolvedValue(mockCommits); + mockRepositoryCache.getCachedAggregatedData.mockResolvedValue( + mockHeatmapData ); - mockRedis.get.mockResolvedValue(JSON.stringify([])); // ACT - const response = await request(app).post('/').send({ repoUrl }); + const response = await request(app).get( + '/api/repositories/full-data?repoUrl=https://github.com/test/repo&page=1&limit=100' + ); // ASSERT expect(response.status).toBe(200); - // Metrics failure should not affect the main operation + expect(response.body).toHaveProperty('commits'); + expect(response.body).toHaveProperty('heatmapData'); + expect(response.body.commits).toEqual(mockCommits); + expect(response.body.heatmapData).toEqual(mockHeatmapData); + + // Verify both cache services were called + expect(mockRepositoryCache.getCachedCommits).toHaveBeenCalledTimes(1); + expect(mockRepositoryCache.getCachedAggregatedData).toHaveBeenCalledTimes( + 1 + ); }); }); - describe('Error Boundary Tests', () => { - test('should handle unexpected errors in middleware chain', async () => { + describe('Error Handling', () => { + test('should handle cache service errors gracefully', async () => { // ARRANGE - const repoUrl = 'https://github.com/user/repo.git'; - - // Force an unexpected error in the middleware chain - mockMetrics.getUserType.mockImplementation(() => { - throw new Error('Unexpected middleware error'); - }); + mockRepositoryCache.getCachedCommits.mockRejectedValue( + new Error('Cache service error') + ); // ACT - const response = await request(app).post('/').send({ repoUrl }); + const response = await request(app).get( + '/api/repositories/commits?repoUrl=https://github.com/test/repo' + ); // ASSERT expect(response.status).toBe(500); - }); - - test('should handle empty response data gracefully', async () => { - // ARRANGE - const repoUrl = 'https://github.com/user/repo.git'; - - mockRedis.get.mockResolvedValue(null); - mockWithTempRepository.mockResolvedValue(undefined); - - // ACT - const response = await request(app).post('/').send({ repoUrl }); - - // ASSERT - expect(response.status).toBe(200); - expect(response.body).toEqual({ commits: undefined }); - }); - }); - - describe('POST /churn - Get Code Churn Analysis', () => { - test('should return cached churn data when cache hit occurs', async () => { - // ARRANGE - const mockChurnData = { - files: [ - { - path: 'src/api/auth.ts', - changes: 47, - risk: 'high', - extension: '.ts', - firstChange: '2023-01-01T12:00:00Z', - lastChange: '2023-12-31T12:00:00Z', - authorCount: 5, - }, - { - path: 'src/components/Dashboard.tsx', - changes: 38, - risk: 'high', - extension: '.tsx', - firstChange: '2023-02-01T12:00:00Z', - lastChange: '2023-12-15T12:00:00Z', - authorCount: 3, - }, - ], - metadata: { - totalFiles: 2, - totalChanges: 85, - riskThresholds: { high: 30, medium: 15, low: 0 }, - dateRange: { from: '2023-01-01', to: '2023-12-31' }, - highRiskCount: 2, - mediumRiskCount: 0, - lowRiskCount: 0, - analyzedAt: '2024-01-01T00:00:00Z', - processingTime: 150, - }, - }; - const repoUrl = 'https://github.com/user/repo.git'; - - mockRedis.get.mockResolvedValue(JSON.stringify(mockChurnData)); - - // ACT - const response = await request(app).post('/churn').send({ repoUrl }); - - // ASSERT - expect(response.status).toBe(200); - expect(response.body.churnData).toEqual({ - ...mockChurnData, - metadata: { ...mockChurnData.metadata, fromCache: true }, - }); - expect(mockRedis.get).toHaveBeenCalledWith(`churn:${repoUrl}:{}`); - expect(mockWithTempRepository).not.toHaveBeenCalled(); - expect(mockMetrics.recordEnhancedCacheOperation).toHaveBeenCalledWith( - 'churn', - true, - expect.any(Object), - repoUrl, - mockChurnData.files.length - ); - expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( - 'code_churn_view', - 'anonymous', - true, - 'api_call' - ); - }); - - test('should analyze and cache churn data when cache miss occurs', async () => { - // ARRANGE - const mockChurnData = { - files: [ - { - path: 'src/utils/helpers.ts', - changes: 32, - risk: 'high', - extension: '.ts', - firstChange: '2023-03-01T12:00:00Z', - lastChange: '2023-11-20T12:00:00Z', - authorCount: 8, - }, - ], - metadata: { - totalFiles: 1, - totalChanges: 32, - riskThresholds: { high: 30, medium: 15, low: 0 }, - dateRange: { from: '2023-01-01', to: '2023-12-31' }, - highRiskCount: 1, - mediumRiskCount: 0, - lowRiskCount: 0, - analyzedAt: '2024-01-01T00:00:00Z', - processingTime: 200, - }, - }; - const repoUrl = 'https://github.com/user/repo.git'; - - mockRedis.get.mockResolvedValue(null); - mockWithTempRepository.mockResolvedValue(mockChurnData); - mockRedis.set.mockResolvedValue('OK'); - - // ACT - const response = await request(app).post('/churn').send({ repoUrl }); - - // ASSERT - expect(response.status).toBe(200); - expect(response.body).toEqual({ churnData: mockChurnData }); - expect(mockWithTempRepository).toHaveBeenCalledWith( - repoUrl, - expect.any(Function) - ); - expect(mockRedis.set).toHaveBeenCalledWith( - `churn:${repoUrl}:{}`, - JSON.stringify(mockChurnData), - 'EX', - 3600 - ); - expect(mockMetrics.recordEnhancedCacheOperation).toHaveBeenCalledWith( - 'churn', - false, - expect.any(Object), - repoUrl, - mockChurnData.files.length - ); - }); - - test('should apply filter options to churn analysis', async () => { - // ARRANGE - const filterOptions = { - since: '2023-01-01', - until: '2023-12-31', - extensions: ['ts', 'tsx'], - minChanges: 10, - }; - const mockChurnData = { - files: [ - { - path: 'src/index.ts', - changes: 25, - risk: 'medium', - extension: '.ts', - }, - ], - metadata: { - totalFiles: 1, - totalChanges: 25, - riskThresholds: { high: 30, medium: 15, low: 0 }, - dateRange: { from: '2023-01-01', to: '2023-12-31' }, - highRiskCount: 0, - mediumRiskCount: 1, - lowRiskCount: 0, - analyzedAt: '2024-01-01T00:00:00Z', - filterOptions, - }, - }; - const repoUrl = 'https://github.com/user/repo.git'; - - mockRedis.get.mockResolvedValue(null); - mockWithTempRepository.mockImplementation(async (url, callback) => { - return await callback('/tmp/repo'); - }); - mockGitService.analyzeCodeChurn.mockResolvedValue(mockChurnData); - mockRedis.set.mockResolvedValue('OK'); - - // ACT - const response = await request(app) - .post('/churn') - .send({ repoUrl, filterOptions }); - - // ASSERT - expect(response.status).toBe(200); - expect(response.body).toEqual({ churnData: mockChurnData }); - expect(mockGitService.analyzeCodeChurn).toHaveBeenCalledWith( - '/tmp/repo', - filterOptions - ); - expect(mockRedis.set).toHaveBeenCalledWith( - `churn:${repoUrl}:${JSON.stringify(filterOptions)}`, - JSON.stringify(mockChurnData), - 'EX', - 3600 - ); - }); - - test('should handle analysis errors and record failed feature usage', async () => { - // ARRANGE - const repoUrl = 'https://github.com/user/repo.git'; - const analysisError = new Error('Churn analysis failed'); - - mockRedis.get.mockResolvedValue(null); - mockWithTempRepository.mockRejectedValue(analysisError); - - // ACT - const response = await request(app).post('/churn').send({ repoUrl }); - - // ASSERT - expect(response.status).toBe(500); - expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( - 'code_churn_view', - 'anonymous', - false, - 'api_call' - ); - }); - - test('should handle different user types for churn metrics', async () => { - // ARRANGE - const repoUrl = 'https://github.com/user/repo.git'; - mockMetrics.getUserType.mockReturnValue('premium'); - mockRedis.get.mockResolvedValue( - JSON.stringify({ files: [], metadata: {} }) - ); - - // ACT - await request(app).post('/churn').send({ repoUrl }); - - // ASSERT - expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( - 'code_churn_view', - 'premium', - true, - 'api_call' - ); - }); - - test('should handle cache failures gracefully and fetch from repository', async () => { - // ARRANGE - const repoUrl = 'https://github.com/user/repo.git'; - const cacheError = new Error('Cache connection failed'); - const mockChurnData = { files: [], metadata: {} }; - - mockRedis.get.mockRejectedValue(cacheError); - mockWithTempRepository.mockResolvedValue(mockChurnData); - - // ACT - const response = await request(app).post('/churn').send({ repoUrl }); - - // ASSERT - expect(response.status).toBe(200); - expect(mockWithTempRepository).toHaveBeenCalled(); - }); - - test('should handle cache set failures without affecting response', async () => { - // ARRANGE - const repoUrl = 'https://github.com/user/repo.git'; - const mockChurnData = { - files: [{ path: 'test.ts', changes: 5, risk: 'low' }], - metadata: { totalFiles: 1 }, - }; - - mockRedis.get.mockResolvedValue(null); - mockWithTempRepository.mockResolvedValue(mockChurnData); - mockRedis.set.mockRejectedValue(new Error('Cache write failed')); - - // ACT - const response = await request(app).post('/churn').send({ repoUrl }); - - // ASSERT - expect(response.status).toBe(200); - expect(response.body).toEqual({ churnData: mockChurnData }); - }); - - test('should handle empty churn results', async () => { - // ARRANGE - const repoUrl = 'https://github.com/user/empty-repo.git'; - const emptyChurnData = { - files: [], - metadata: { - totalFiles: 0, - totalChanges: 0, - riskThresholds: { high: 30, medium: 15, low: 0 }, - dateRange: { from: '2023-01-01', to: '2023-12-31' }, - highRiskCount: 0, - mediumRiskCount: 0, - lowRiskCount: 0, - analyzedAt: '2024-01-01T00:00:00Z', - }, - }; - - mockRedis.get.mockResolvedValue(null); - mockWithTempRepository.mockResolvedValue(emptyChurnData); - - // ACT - const response = await request(app).post('/churn').send({ repoUrl }); - - // ASSERT - expect(response.status).toBe(200); - expect(response.body.churnData.files).toHaveLength(0); - expect(response.body.churnData.metadata.totalFiles).toBe(0); - }); - }); - - describe('GET /summary - Get Repository Summary Statistics', () => { - beforeEach(async () => { - vi.clearAllMocks(); - mockMetrics.getUserType.mockReturnValue('anonymous'); - }); - - test('should return repository summary when service succeeds', async () => { - // ARRANGE - const mockSummary = { - repository: { - name: 'Hello-World', - owner: 'octocat', - url: 'https://github.com/octocat/Hello-World.git', - platform: 'github' as const, - }, - created: { - date: '2011-03-22T00:00:00.000Z', - source: 'first-commit' as const, - }, - age: { - years: 13, - months: 8, - formatted: '13.7y', - }, - lastCommit: { - date: '2025-11-15T10:30:00.000Z', - relativeTime: '4 days ago', - sha: 'abc123', - author: 'Test Author', - }, - stats: { - totalCommits: 100, - contributors: 5, - status: 'active' as const, - }, - metadata: { - cached: false, - dataSource: 'git-sparse-clone' as const, - createdDateAccuracy: 'approximate' as const, - bandwidthSaved: '95-99% vs full clone', - lastUpdated: '2025-11-19T10:00:00.000Z', - }, - }; - - mockRepositorySummaryService.getRepositorySummary.mockResolvedValue( - mockSummary - ); - - // ACT - const response = await request(app).get( - '/summary?repoUrl=https://github.com/octocat/Hello-World.git' - ); - - // ASSERT - expect(response.status).toBe(200); - expect(response.body).toEqual({ summary: mockSummary }); - expect( - mockRepositorySummaryService.getRepositorySummary - ).toHaveBeenCalledWith('https://github.com/octocat/Hello-World.git'); expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( - 'repository_summary', - 'anonymous', - true, - 'api_call' - ); - }); - - test('should return 400 when repoUrl query parameter is missing', async () => { - // ACT - const response = await request(app).get('/summary'); - - // ASSERT - expect(response.status).toBe(400); - expect( - mockRepositorySummaryService.getRepositorySummary - ).not.toHaveBeenCalled(); - expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( - 'repository_summary', - 'anonymous', - false, - 'api_call' - ); - }); - - test('should return 400 when repoUrl is not a string', async () => { - // ACT - const response = await request(app).get('/summary?repoUrl='); - - // ASSERT - expect(response.status).toBe(400); - expect( - mockRepositorySummaryService.getRepositorySummary - ).not.toHaveBeenCalled(); - }); - - test('should return 400 when repoUrl has invalid protocol', async () => { - // ACT - const response = await request(app).get( - '/summary?repoUrl=ftp://invalid.com/repo.git' - ); - - // ASSERT - expect(response.status).toBe(400); - expect( - mockRepositorySummaryService.getRepositorySummary - ).not.toHaveBeenCalled(); - expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( - 'repository_summary', - 'anonymous', - false, - 'api_call' - ); - }); - - test('should handle service errors and return 500', async () => { - // ARRANGE - const serviceError = new Error('Repository not found'); - mockRepositorySummaryService.getRepositorySummary.mockRejectedValue( - serviceError - ); - - // ACT - const response = await request(app).get( - '/summary?repoUrl=https://github.com/test/notfound.git' - ); - - // ASSERT - expect(response.status).toBe(500); - expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( - 'repository_summary', + 'repository_commits', 'anonymous', false, 'api_call' ); }); - - test('should record cache hit when summary is cached', async () => { - // ARRANGE - const cachedSummary = { - repository: { - name: 'cached-repo', - owner: 'test', - url: 'https://github.com/test/cached-repo.git', - platform: 'github' as const, - }, - created: { - date: '2020-01-01T00:00:00.000Z', - source: 'first-commit' as const, - }, - age: { - years: 5, - months: 0, - formatted: '5.0y', - }, - lastCommit: { - date: '2025-11-19T00:00:00.000Z', - relativeTime: '1 day ago', - sha: 'def456', - author: 'Cached Author', - }, - stats: { - totalCommits: 500, - contributors: 10, - status: 'active' as const, - }, - metadata: { - cached: true, - dataSource: 'cache' as const, - createdDateAccuracy: 'approximate' as const, - bandwidthSaved: '95-99% vs full clone', - lastUpdated: '2025-11-18T10:00:00.000Z', - }, - }; - - mockRepositorySummaryService.getRepositorySummary.mockResolvedValue( - cachedSummary - ); - - // ACT - const response = await request(app).get( - '/summary?repoUrl=https://github.com/test/cached-repo.git' - ); - - // ASSERT - expect(response.status).toBe(200); - expect(response.body.summary.metadata.cached).toBe(true); - expect(mockMetrics.recordEnhancedCacheOperation).toHaveBeenCalledWith( - 'summary', - true, - expect.any(Object), - 'https://github.com/test/cached-repo.git' - ); - expect(mockMetrics.recordDataFreshness).toHaveBeenCalledWith( - 'summary', - 0, - 'hybrid' - ); - }); - - test('should record cache miss when summary is fetched fresh', async () => { - // ARRANGE - const freshSummary = { - repository: { - name: 'fresh-repo', - owner: 'test', - url: 'https://github.com/test/fresh-repo.git', - platform: 'github' as const, - }, - created: { - date: '2023-01-01T00:00:00.000Z', - source: 'first-commit' as const, - }, - age: { - years: 2, - months: 0, - formatted: '2.0y', - }, - lastCommit: { - date: '2025-11-19T10:00:00.000Z', - relativeTime: 'just now', - sha: 'ghi789', - author: 'Fresh Author', - }, - stats: { - totalCommits: 250, - contributors: 3, - status: 'active' as const, - }, - metadata: { - cached: false, - dataSource: 'git-sparse-clone' as const, - createdDateAccuracy: 'approximate' as const, - bandwidthSaved: '95-99% vs full clone', - lastUpdated: '2025-11-19T10:00:00.000Z', - }, - }; - - mockRepositorySummaryService.getRepositorySummary.mockResolvedValue( - freshSummary - ); - - // ACT - const response = await request(app).get( - '/summary?repoUrl=https://github.com/test/fresh-repo.git' - ); - - // ASSERT - expect(response.status).toBe(200); - expect(response.body.summary.metadata.cached).toBe(false); - expect(mockMetrics.recordEnhancedCacheOperation).toHaveBeenCalledWith( - 'summary', - false, - expect.any(Object), - 'https://github.com/test/fresh-repo.git' - ); - expect(mockMetrics.recordDataFreshness).not.toHaveBeenCalled(); - }); - - test('should handle different user types for summary metrics', async () => { - // ARRANGE - mockMetrics.getUserType.mockReturnValue('premium'); - const mockSummary = { - repository: { - name: 'test', - owner: 'test', - url: 'https://github.com/test/test.git', - platform: 'github' as const, - }, - created: { - date: '2020-01-01T00:00:00.000Z', - source: 'first-commit' as const, - }, - age: { years: 5, months: 0, formatted: '5.0y' }, - lastCommit: { - date: '2025-11-19T00:00:00.000Z', - relativeTime: 'now', - sha: 'abc', - author: 'Test', - }, - stats: { - totalCommits: 100, - contributors: 5, - status: 'active' as const, - }, - metadata: { - cached: false, - dataSource: 'git-sparse-clone' as const, - createdDateAccuracy: 'approximate' as const, - bandwidthSaved: '95-99% vs full clone', - lastUpdated: '2025-11-19T00:00:00.000Z', - }, - }; - - mockRepositorySummaryService.getRepositorySummary.mockResolvedValue( - mockSummary - ); - - // ACT - await request(app).get( - '/summary?repoUrl=https://github.com/test/test.git' - ); - - // ASSERT - expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( - 'repository_summary', - 'premium', - true, - 'api_call' - ); - }); - - test('should handle empty repository (status: empty)', async () => { - // ARRANGE - const emptySummary = { - repository: { - name: 'empty-repo', - owner: 'test', - url: 'https://github.com/test/empty-repo.git', - platform: 'github' as const, - }, - created: { - date: '', - source: 'first-commit' as const, - }, - age: { - years: 0, - months: 0, - formatted: '0.0y', - }, - lastCommit: { - date: '', - relativeTime: 'no commits', - sha: '', - author: '', - }, - stats: { - totalCommits: 0, - contributors: 0, - status: 'empty' as const, - }, - metadata: { - cached: false, - dataSource: 'git-sparse-clone' as const, - createdDateAccuracy: 'approximate' as const, - bandwidthSaved: '95-99% vs full clone', - lastUpdated: '2025-11-19T10:00:00.000Z', - }, - }; - - mockRepositorySummaryService.getRepositorySummary.mockResolvedValue( - emptySummary - ); - - // ACT - const response = await request(app).get( - '/summary?repoUrl=https://github.com/test/empty-repo.git' - ); - - // ASSERT - expect(response.status).toBe(200); - expect(response.body.summary.stats.status).toBe('empty'); - expect(response.body.summary.stats.totalCommits).toBe(0); - expect(response.body.summary.lastCommit.relativeTime).toBe('no commits'); - }); - - test('should handle different repository platforms (GitLab, Bitbucket)', async () => { - // ARRANGE - GitLab - const gitlabSummary = { - repository: { - name: 'gitlab-repo', - owner: 'test', - url: 'https://gitlab.com/test/gitlab-repo.git', - platform: 'gitlab' as const, - }, - created: { - date: '2021-01-01T00:00:00.000Z', - source: 'first-commit' as const, - }, - age: { - years: 4, - months: 0, - formatted: '4.0y', - }, - lastCommit: { - date: '2025-11-01T00:00:00.000Z', - relativeTime: '18 days ago', - sha: 'gitlab123', - author: 'GitLab User', - }, - stats: { - totalCommits: 300, - contributors: 7, - status: 'inactive' as const, - }, - metadata: { - cached: false, - dataSource: 'git-sparse-clone' as const, - createdDateAccuracy: 'approximate' as const, - bandwidthSaved: '95-99% vs full clone', - lastUpdated: '2025-11-19T10:00:00.000Z', - }, - }; - - mockRepositorySummaryService.getRepositorySummary.mockResolvedValue( - gitlabSummary - ); - - // ACT - const response = await request(app).get( - '/summary?repoUrl=https://gitlab.com/test/gitlab-repo.git' - ); - - // ASSERT - expect(response.status).toBe(200); - expect(response.body.summary.repository.platform).toBe('gitlab'); - }); }); }); diff --git a/apps/backend/__tests__/unit/routes/repositoryRoutes.unit.test.ts.old b/apps/backend/__tests__/unit/routes/repositoryRoutes.unit.test.ts.old new file mode 100644 index 00000000..857d47ce --- /dev/null +++ b/apps/backend/__tests__/unit/routes/repositoryRoutes.unit.test.ts.old @@ -0,0 +1,1386 @@ +import { describe, test, expect, beforeEach, vi, afterEach } from 'vitest'; +import request from 'supertest'; +import express, { Application } from 'express'; + +// Mock all external dependencies BEFORE imports +const mockGitService = { + getCommits: vi.fn(), + aggregateCommitsByTime: vi.fn(), + getTopContributors: vi.fn(), + analyzeCodeChurn: vi.fn(), +}; + +const mockRedis = { + get: vi.fn(), + set: vi.fn(), +}; + +const mockWithTempRepository = vi.fn(); + +const mockMetrics = { + recordFeatureUsage: vi.fn(), + recordEnhancedCacheOperation: vi.fn(), + recordDataFreshness: vi.fn(), + getUserType: vi.fn(), + getRepositorySizeCategory: vi.fn(), +}; + +const mockRepositorySummaryService = { + getRepositorySummary: vi.fn(), +}; + +// Create middleware function that can be chained +const createValidationMiddleware = () => { + const middleware = vi.fn((req: any, res: any, next: any) => next()) as any; + middleware.isURL = vi.fn(() => middleware); + middleware.withMessage = vi.fn(() => middleware); + middleware.matches = vi.fn(() => middleware); + middleware.optional = vi.fn(() => middleware); + middleware.isObject = vi.fn(() => middleware); + middleware.custom = vi.fn(() => middleware); + return middleware; +}; + +// Mock modules with proper middleware functions +vi.mock('../../../src/services/gitService', () => ({ + __esModule: true, + gitService: mockGitService, +})); + +vi.mock('../../../src/services/cache', () => ({ + __esModule: true, + default: mockRedis, +})); + +vi.mock('../../../src/utils/withTempRepository', () => ({ + __esModule: true, + withTempRepository: mockWithTempRepository, +})); + +vi.mock('../../../src/services/metrics', () => ({ + __esModule: true, + ...mockMetrics, +})); + +vi.mock('express-validator', () => ({ + __esModule: true, + body: vi.fn(() => createValidationMiddleware()), +})); + +vi.mock('../../../src/middlewares/validation', () => ({ + __esModule: true, + handleValidationErrors: vi.fn((req: any, res: any, next: any) => next()), + isSecureGitUrl: vi.fn(() => Promise.resolve(true)), +})); + +vi.mock('../../../src/services/repositorySummaryService', () => ({ + __esModule: true, + repositorySummaryService: mockRepositorySummaryService, +})); + +vi.mock('@gitray/shared-types', () => { + const TIME = { + SECOND: 1000, + MINUTE: 60 * 1000, + HOUR: 60 * 60 * 1000, + DAY: 24 * 60 * 60 * 1000, + WEEK: 7 * 24 * 60 * 60 * 1000, + }; + + class GitrayError extends Error { + constructor( + message: string, + public readonly statusCode: number = 500, + public readonly code?: string + ) { + super(message); + this.name = 'GitrayError'; + } + } + + class ValidationError extends GitrayError { + constructor( + message: string, + public readonly errors?: any[] + ) { + super(message, 400, 'VALIDATION_ERROR'); + this.name = 'ValidationError'; + } + } + + return { + __esModule: true, + ERROR_MESSAGES: { + INVALID_REPO_URL: 'Invalid repository URL', + }, + HTTP_STATUS: { + OK: 200, + BAD_REQUEST: 400, + INTERNAL_SERVER_ERROR: 500, + }, + TIME, + RATE_LIMIT: { + WINDOW_MS: 15 * TIME.MINUTE, + MAX_REQUESTS: 100, + MESSAGE: 'Too many requests from this IP, please try again later.', + }, + GitrayError, + ValidationError, + CommitFilterOptions: {}, + ChurnFilterOptions: {}, + }; +}); + +describe('RepositoryRoutes Unit Tests', () => { + let app: Application; + + beforeEach(async () => { + vi.clearAllMocks(); + + // Set up default mock returns + mockMetrics.getUserType.mockReturnValue('anonymous'); + mockMetrics.getRepositorySizeCategory.mockReturnValue('medium'); + mockMetrics.recordFeatureUsage.mockResolvedValue(undefined); + mockMetrics.recordEnhancedCacheOperation.mockResolvedValue(undefined); + mockMetrics.recordDataFreshness.mockResolvedValue(undefined); + + // Set up Express app + app = express(); + app.use(express.json()); + + // Import and mount the router after mocks are configured + const { default: repositoryRoutes } = await import( + '../../../src/routes/repositoryRoutes' + ); + app.use('/', repositoryRoutes); + + // Add error handler + app.use((err: any, req: any, res: any) => { + res.status(err.status || 500).json({ + error: err.message || 'Internal server error', + }); + }); + }); + + afterEach(() => { + vi.resetModules(); + }); + + describe('POST / - Get Repository Commits', () => { + test('should return cached commits when cache hit occurs', async () => { + // ARRANGE + const mockCommits = [ + { + sha: 'abc123', + message: 'Test commit', + date: '2023-01-01T00:00:00Z', + authorName: 'Test User', + authorEmail: 'test@example.com', + }, + ]; + const repoUrl = 'https://github.com/user/repo.git'; + + mockRedis.get.mockResolvedValue(JSON.stringify(mockCommits)); + + // ACT + const response = await request(app).post('/').send({ repoUrl }); + + // ASSERT + expect(response.status).toBe(200); + expect(response.body).toEqual({ commits: mockCommits }); + expect(mockRedis.get).toHaveBeenCalledWith(`commits:${repoUrl}`); + expect(mockWithTempRepository).not.toHaveBeenCalled(); + expect(mockMetrics.recordEnhancedCacheOperation).toHaveBeenCalledWith( + 'commits', + true, + expect.any(Object), + repoUrl, + mockCommits.length + ); + expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( + 'repository_commits', + 'anonymous', + true, + 'api_call' + ); + }); + + test('should fetch and cache commits when cache miss occurs', async () => { + // ARRANGE + const mockCommits = [ + { + sha: 'def456', + message: 'New commit', + date: '2023-01-02T00:00:00Z', + authorName: 'Developer', + authorEmail: 'dev@example.com', + }, + ]; + const repoUrl = 'https://github.com/user/repo.git'; + + mockRedis.get.mockResolvedValue(null); + mockWithTempRepository.mockResolvedValue(mockCommits); + mockRedis.set.mockResolvedValue('OK'); + + // ACT + const response = await request(app).post('/').send({ repoUrl }); + + // ASSERT + expect(response.status).toBe(200); + expect(response.body).toEqual({ commits: mockCommits }); + expect(mockWithTempRepository).toHaveBeenCalledWith( + repoUrl, + expect.any(Function) + ); + expect(mockRedis.set).toHaveBeenCalledWith( + `commits:${repoUrl}`, + JSON.stringify(mockCommits), + 'EX', + 3600 + ); + expect(mockMetrics.recordEnhancedCacheOperation).toHaveBeenCalledWith( + 'commits', + false, + expect.any(Object), + repoUrl, + mockCommits.length + ); + }); + + test('should handle repository fetch errors and record failed feature usage', async () => { + // ARRANGE + const repoUrl = 'https://github.com/user/repo.git'; + const fetchError = new Error('Repository not found'); + + mockRedis.get.mockResolvedValue(null); + mockWithTempRepository.mockRejectedValue(fetchError); + + // ACT + const response = await request(app).post('/').send({ repoUrl }); + + // ASSERT + expect(response.status).toBe(500); + expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( + 'repository_commits', + 'anonymous', + false, + 'api_call' + ); + }); + + test('should handle different user types for metrics', async () => { + // ARRANGE + const repoUrl = 'https://github.com/user/repo.git'; + mockMetrics.getUserType.mockReturnValue('premium'); + mockRedis.get.mockResolvedValue(JSON.stringify([])); + + // ACT + await request(app).post('/').send({ repoUrl }); + + // ASSERT + expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( + 'repository_commits', + 'premium', + true, + 'api_call' + ); + }); + }); + + describe('POST /heatmap - Get Heatmap Data', () => { + test('should return cached heatmap data when cache hit occurs', async () => { + // ARRANGE + const mockHeatmapData = { + timePeriod: 'day', + data: [{ date: '2023-01-01', commits: 5 }], + metadata: { maxCommitCount: 5, totalCommits: 5 }, + }; + const repoUrl = 'https://github.com/user/repo.git'; + const filterOptions = { author: 'testuser' }; + + mockRedis.get.mockResolvedValue(JSON.stringify(mockHeatmapData)); + + // ACT + const response = await request(app) + .post('/heatmap') + .send({ repoUrl, filterOptions }); + + // ASSERT + expect(response.status).toBe(200); + expect(response.body).toEqual({ heatmapData: mockHeatmapData }); + expect(mockRedis.get).toHaveBeenCalledWith( + `heatmap:${repoUrl}:${JSON.stringify(filterOptions)}` + ); + expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( + 'heatmap_view', + 'anonymous', + true, + 'api_call' + ); + }); + + test('should generate and cache heatmap data when cache miss occurs', async () => { + // ARRANGE + const repoUrl = 'https://github.com/user/repo.git'; + const filterOptions = { fromDate: '2023-01-01' }; + const mockCommits = [{ sha: 'abc123', date: '2023-01-01T12:00:00Z' }]; + const mockHeatmapData = { + timePeriod: 'day', + data: [{ date: '2023-01-01', commits: 1 }], + metadata: { maxCommitCount: 1, totalCommits: 1 }, + }; + + mockRedis.get.mockResolvedValue(null); + mockWithTempRepository.mockImplementation(async (url, callback) => { + return await callback('/tmp/repo'); + }); + mockGitService.getCommits.mockResolvedValue(mockCommits); + mockGitService.aggregateCommitsByTime.mockResolvedValue(mockHeatmapData); + mockRedis.set.mockResolvedValue('OK'); + + // ACT + const response = await request(app) + .post('/heatmap') + .send({ repoUrl, filterOptions }); + + // ASSERT + expect(response.status).toBe(200); + expect(response.body).toEqual({ heatmapData: mockHeatmapData }); + expect(mockGitService.aggregateCommitsByTime).toHaveBeenCalledWith( + mockCommits, + filterOptions + ); + expect(mockRedis.set).toHaveBeenCalledWith( + `heatmap:${repoUrl}:${JSON.stringify(filterOptions)}`, + JSON.stringify(mockHeatmapData), + 'EX', + 3600 + ); + }); + + test('should handle aggregation errors and record failed metrics', async () => { + // ARRANGE + const repoUrl = 'https://github.com/user/repo.git'; + const aggregationError = new Error('Aggregation failed'); + + mockRedis.get.mockResolvedValue(null); + mockWithTempRepository.mockRejectedValue(aggregationError); + + // ACT + const response = await request(app).post('/heatmap').send({ repoUrl }); + + // ASSERT + expect(response.status).toBe(500); + expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( + 'heatmap_view', + 'anonymous', + false, + 'api_call' + ); + }); + + test('should handle undefined filter options gracefully', async () => { + // ARRANGE + const repoUrl = 'https://github.com/user/repo.git'; + const mockHeatmapData = { timePeriod: 'day', data: [], metadata: {} }; + + mockRedis.get.mockResolvedValue(null); + mockWithTempRepository.mockImplementation(async (url, callback) => { + return await callback('/tmp/repo'); + }); + mockGitService.getCommits.mockResolvedValue([]); + mockGitService.aggregateCommitsByTime.mockResolvedValue(mockHeatmapData); + + // ACT + const response = await request(app).post('/heatmap').send({ repoUrl }); + + // ASSERT + expect(response.status).toBe(200); + expect(mockGitService.aggregateCommitsByTime).toHaveBeenCalledWith( + [], + undefined + ); + }); + }); + + describe('POST /full-data - Get Combined Data', () => { + test('should return cached data when both commits and heatmap are cached', async () => { + // ARRANGE + const repoUrl = 'https://github.com/user/repo.git'; + const filterOptions = { author: 'testuser' }; + const mockCommits = [{ sha: 'abc123', message: 'Test' }]; + const mockHeatmapData = { timePeriod: 'day', data: [] }; + + mockRedis.get + .mockResolvedValueOnce(JSON.stringify(mockCommits)) + .mockResolvedValueOnce(JSON.stringify(mockHeatmapData)); + + // ACT + const response = await request(app) + .post('/full-data') + .send({ repoUrl, filterOptions }); + + // ASSERT + expect(response.status).toBe(200); + expect(response.body).toEqual({ + commits: mockCommits, + heatmapData: mockHeatmapData, + }); + expect(mockRedis.get).toHaveBeenCalledWith(`commits:${repoUrl}`); + expect(mockRedis.get).toHaveBeenCalledWith( + `heatmap:${repoUrl}:${JSON.stringify(filterOptions)}` + ); + expect(mockWithTempRepository).not.toHaveBeenCalled(); + expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( + 'full_data_view', + 'anonymous', + true, + 'api_call' + ); + }); + + test('should fetch and cache both data types when cache miss occurs', async () => { + // ARRANGE + const repoUrl = 'https://github.com/user/repo.git'; + const filterOptions = { fromDate: '2023-01-01' }; + const mockCommits = [{ sha: 'def456', message: 'New commit' }]; + const mockHeatmapData = { timePeriod: 'day', data: [{ commits: 1 }] }; + + mockRedis.get.mockResolvedValue(null); + mockWithTempRepository.mockImplementation(async (url, callback) => { + return await callback('/tmp/repo'); + }); + mockGitService.getCommits.mockResolvedValue(mockCommits); + mockGitService.aggregateCommitsByTime.mockResolvedValue(mockHeatmapData); + mockRedis.set.mockResolvedValue('OK'); + + // ACT + const response = await request(app) + .post('/full-data') + .send({ repoUrl, filterOptions }); + + // ASSERT + expect(response.status).toBe(200); + expect(response.body).toEqual({ + commits: mockCommits, + heatmapData: mockHeatmapData, + }); + expect(mockRedis.set).toHaveBeenCalledWith( + `commits:${repoUrl}`, + JSON.stringify(mockCommits), + 'EX', + 3600 + ); + expect(mockRedis.set).toHaveBeenCalledWith( + `heatmap:${repoUrl}:${JSON.stringify(filterOptions)}`, + JSON.stringify(mockHeatmapData), + 'EX', + 3600 + ); + expect(mockMetrics.recordEnhancedCacheOperation).toHaveBeenCalledTimes(2); + }); + + test('should handle partial cache hits correctly', async () => { + // ARRANGE + const repoUrl = 'https://github.com/user/repo.git'; + const mockCommits = [{ sha: 'cached', message: 'From cache' }]; + + // Only commits are cached, heatmap is not + mockRedis.get + .mockResolvedValueOnce(JSON.stringify(mockCommits)) + .mockResolvedValueOnce(null); + + mockWithTempRepository.mockImplementation(async (url, callback) => { + return await callback('/tmp/repo'); + }); + mockGitService.getCommits.mockResolvedValue(mockCommits); + mockGitService.aggregateCommitsByTime.mockResolvedValue({ + timePeriod: 'day', + data: [], + }); + + // ACT + const response = await request(app).post('/full-data').send({ repoUrl }); + + // ASSERT + expect(response.status).toBe(200); + expect(mockWithTempRepository).toHaveBeenCalled(); + expect(mockMetrics.recordEnhancedCacheOperation).toHaveBeenCalledWith( + 'commits', + false, + expect.any(Object), + repoUrl, + mockCommits.length + ); + }); + + test('should handle data processing errors and record failures', async () => { + // ARRANGE + const repoUrl = 'https://github.com/user/repo.git'; + const processingError = new Error('Data processing failed'); + + mockRedis.get.mockResolvedValue(null); + mockWithTempRepository.mockRejectedValue(processingError); + + // ACT + const response = await request(app).post('/full-data').send({ repoUrl }); + + // ASSERT + expect(response.status).toBe(500); + expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( + 'full_data_view', + 'anonymous', + false, + 'api_call' + ); + }); + }); + + describe('Cache Operations', () => { + test('should handle cache get failures gracefully', async () => { + // ARRANGE + const repoUrl = 'https://github.com/user/repo.git'; + const cacheError = new Error('Cache connection failed'); + + mockRedis.get.mockRejectedValue(cacheError); + mockWithTempRepository.mockResolvedValue([]); + + // ACT + const response = await request(app).post('/').send({ repoUrl }); + + // ASSERT + expect(response.status).toBe(200); + expect(mockWithTempRepository).toHaveBeenCalled(); + }); + + test('should handle cache set failures without affecting response', async () => { + // ARRANGE + const repoUrl = 'https://github.com/user/repo.git'; + const mockCommits = [{ sha: 'abc123' }]; + + mockRedis.get.mockResolvedValue(null); + mockWithTempRepository.mockResolvedValue(mockCommits); + mockRedis.set.mockRejectedValue(new Error('Cache write failed')); + + // ACT + const response = await request(app).post('/').send({ repoUrl }); + + // ASSERT + expect(response.status).toBe(200); + expect(response.body).toEqual({ commits: mockCommits }); + }); + + test('should handle corrupted cache data gracefully', async () => { + // ARRANGE + const repoUrl = 'https://github.com/user/repo.git'; + + mockRedis.get.mockResolvedValue('invalid json data'); + mockWithTempRepository.mockResolvedValue([]); + + // ACT + const response = await request(app).post('/').send({ repoUrl }); + + // ASSERT + expect(response.status).toBe(200); + expect(mockWithTempRepository).toHaveBeenCalled(); + }); + }); + + describe('Metrics Recording', () => { + test('should record different repository size categories', async () => { + // ARRANGE + const repoUrl = 'https://github.com/user/large-repo.git'; + const largeCommitSet = Array(5000).fill({ sha: 'abc' }); + + mockMetrics.getRepositorySizeCategory.mockReturnValue('large'); + mockRedis.get.mockResolvedValue(JSON.stringify(largeCommitSet)); + + // ACT + await request(app).post('/').send({ repoUrl }); + + // ASSERT + expect(mockMetrics.recordDataFreshness).toHaveBeenCalledWith( + 'commits', + 0, + 'hybrid', + 'large' + ); + }); + + test('should record authenticated user metrics correctly', async () => { + // ARRANGE + const repoUrl = 'https://github.com/user/repo.git'; + + mockMetrics.getUserType.mockReturnValue('authenticated'); + mockRedis.get.mockResolvedValue(JSON.stringify([])); + + // ACT + await request(app).post('/heatmap').send({ repoUrl }); + + // ASSERT + expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( + 'heatmap_view', + 'authenticated', + true, + 'api_call' + ); + }); + + test('should handle metrics recording failures silently', async () => { + // ARRANGE + const repoUrl = 'https://github.com/user/repo.git'; + + mockMetrics.recordFeatureUsage.mockRejectedValue( + new Error('Metrics service down') + ); + mockRedis.get.mockResolvedValue(JSON.stringify([])); + + // ACT + const response = await request(app).post('/').send({ repoUrl }); + + // ASSERT + expect(response.status).toBe(200); + // Metrics failure should not affect the main operation + }); + }); + + describe('Error Boundary Tests', () => { + test('should handle unexpected errors in middleware chain', async () => { + // ARRANGE + const repoUrl = 'https://github.com/user/repo.git'; + + // Force an unexpected error in the middleware chain + mockMetrics.getUserType.mockImplementation(() => { + throw new Error('Unexpected middleware error'); + }); + + // ACT + const response = await request(app).post('/').send({ repoUrl }); + + // ASSERT + expect(response.status).toBe(500); + }); + + test('should handle empty response data gracefully', async () => { + // ARRANGE + const repoUrl = 'https://github.com/user/repo.git'; + + mockRedis.get.mockResolvedValue(null); + mockWithTempRepository.mockResolvedValue(undefined); + + // ACT + const response = await request(app).post('/').send({ repoUrl }); + + // ASSERT + expect(response.status).toBe(200); + expect(response.body).toEqual({ commits: undefined }); + }); + }); + + describe('POST /churn - Get Code Churn Analysis', () => { + test('should return cached churn data when cache hit occurs', async () => { + // ARRANGE + const mockChurnData = { + files: [ + { + path: 'src/api/auth.ts', + changes: 47, + risk: 'high', + extension: '.ts', + firstChange: '2023-01-01T12:00:00Z', + lastChange: '2023-12-31T12:00:00Z', + authorCount: 5, + }, + { + path: 'src/components/Dashboard.tsx', + changes: 38, + risk: 'high', + extension: '.tsx', + firstChange: '2023-02-01T12:00:00Z', + lastChange: '2023-12-15T12:00:00Z', + authorCount: 3, + }, + ], + metadata: { + totalFiles: 2, + totalChanges: 85, + riskThresholds: { high: 30, medium: 15, low: 0 }, + dateRange: { from: '2023-01-01', to: '2023-12-31' }, + highRiskCount: 2, + mediumRiskCount: 0, + lowRiskCount: 0, + analyzedAt: '2024-01-01T00:00:00Z', + processingTime: 150, + }, + }; + const repoUrl = 'https://github.com/user/repo.git'; + + mockRedis.get.mockResolvedValue(JSON.stringify(mockChurnData)); + + // ACT + const response = await request(app).post('/churn').send({ repoUrl }); + + // ASSERT + expect(response.status).toBe(200); + expect(response.body.churnData).toEqual({ + ...mockChurnData, + metadata: { ...mockChurnData.metadata, fromCache: true }, + }); + expect(mockRedis.get).toHaveBeenCalledWith(`churn:${repoUrl}:{}`); + expect(mockWithTempRepository).not.toHaveBeenCalled(); + expect(mockMetrics.recordEnhancedCacheOperation).toHaveBeenCalledWith( + 'churn', + true, + expect.any(Object), + repoUrl, + mockChurnData.files.length + ); + expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( + 'code_churn_view', + 'anonymous', + true, + 'api_call' + ); + }); + + test('should analyze and cache churn data when cache miss occurs', async () => { + // ARRANGE + const mockChurnData = { + files: [ + { + path: 'src/utils/helpers.ts', + changes: 32, + risk: 'high', + extension: '.ts', + firstChange: '2023-03-01T12:00:00Z', + lastChange: '2023-11-20T12:00:00Z', + authorCount: 8, + }, + ], + metadata: { + totalFiles: 1, + totalChanges: 32, + riskThresholds: { high: 30, medium: 15, low: 0 }, + dateRange: { from: '2023-01-01', to: '2023-12-31' }, + highRiskCount: 1, + mediumRiskCount: 0, + lowRiskCount: 0, + analyzedAt: '2024-01-01T00:00:00Z', + processingTime: 200, + }, + }; + const repoUrl = 'https://github.com/user/repo.git'; + + mockRedis.get.mockResolvedValue(null); + mockWithTempRepository.mockResolvedValue(mockChurnData); + mockRedis.set.mockResolvedValue('OK'); + + // ACT + const response = await request(app).post('/churn').send({ repoUrl }); + + // ASSERT + expect(response.status).toBe(200); + expect(response.body).toEqual({ churnData: mockChurnData }); + expect(mockWithTempRepository).toHaveBeenCalledWith( + repoUrl, + expect.any(Function) + ); + expect(mockRedis.set).toHaveBeenCalledWith( + `churn:${repoUrl}:{}`, + JSON.stringify(mockChurnData), + 'EX', + 3600 + ); + expect(mockMetrics.recordEnhancedCacheOperation).toHaveBeenCalledWith( + 'churn', + false, + expect.any(Object), + repoUrl, + mockChurnData.files.length + ); + }); + + test('should apply filter options to churn analysis', async () => { + // ARRANGE + const filterOptions = { + since: '2023-01-01', + until: '2023-12-31', + extensions: ['ts', 'tsx'], + minChanges: 10, + }; + const mockChurnData = { + files: [ + { + path: 'src/index.ts', + changes: 25, + risk: 'medium', + extension: '.ts', + }, + ], + metadata: { + totalFiles: 1, + totalChanges: 25, + riskThresholds: { high: 30, medium: 15, low: 0 }, + dateRange: { from: '2023-01-01', to: '2023-12-31' }, + highRiskCount: 0, + mediumRiskCount: 1, + lowRiskCount: 0, + analyzedAt: '2024-01-01T00:00:00Z', + filterOptions, + }, + }; + const repoUrl = 'https://github.com/user/repo.git'; + + mockRedis.get.mockResolvedValue(null); + mockWithTempRepository.mockImplementation(async (url, callback) => { + return await callback('/tmp/repo'); + }); + mockGitService.analyzeCodeChurn.mockResolvedValue(mockChurnData); + mockRedis.set.mockResolvedValue('OK'); + + // ACT + const response = await request(app) + .post('/churn') + .send({ repoUrl, filterOptions }); + + // ASSERT + expect(response.status).toBe(200); + expect(response.body).toEqual({ churnData: mockChurnData }); + expect(mockGitService.analyzeCodeChurn).toHaveBeenCalledWith( + '/tmp/repo', + filterOptions + ); + expect(mockRedis.set).toHaveBeenCalledWith( + `churn:${repoUrl}:${JSON.stringify(filterOptions)}`, + JSON.stringify(mockChurnData), + 'EX', + 3600 + ); + }); + + test('should handle analysis errors and record failed feature usage', async () => { + // ARRANGE + const repoUrl = 'https://github.com/user/repo.git'; + const analysisError = new Error('Churn analysis failed'); + + mockRedis.get.mockResolvedValue(null); + mockWithTempRepository.mockRejectedValue(analysisError); + + // ACT + const response = await request(app).post('/churn').send({ repoUrl }); + + // ASSERT + expect(response.status).toBe(500); + expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( + 'code_churn_view', + 'anonymous', + false, + 'api_call' + ); + }); + + test('should handle different user types for churn metrics', async () => { + // ARRANGE + const repoUrl = 'https://github.com/user/repo.git'; + mockMetrics.getUserType.mockReturnValue('premium'); + mockRedis.get.mockResolvedValue( + JSON.stringify({ files: [], metadata: {} }) + ); + + // ACT + await request(app).post('/churn').send({ repoUrl }); + + // ASSERT + expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( + 'code_churn_view', + 'premium', + true, + 'api_call' + ); + }); + + test('should handle cache failures gracefully and fetch from repository', async () => { + // ARRANGE + const repoUrl = 'https://github.com/user/repo.git'; + const cacheError = new Error('Cache connection failed'); + const mockChurnData = { files: [], metadata: {} }; + + mockRedis.get.mockRejectedValue(cacheError); + mockWithTempRepository.mockResolvedValue(mockChurnData); + + // ACT + const response = await request(app).post('/churn').send({ repoUrl }); + + // ASSERT + expect(response.status).toBe(200); + expect(mockWithTempRepository).toHaveBeenCalled(); + }); + + test('should handle cache set failures without affecting response', async () => { + // ARRANGE + const repoUrl = 'https://github.com/user/repo.git'; + const mockChurnData = { + files: [{ path: 'test.ts', changes: 5, risk: 'low' }], + metadata: { totalFiles: 1 }, + }; + + mockRedis.get.mockResolvedValue(null); + mockWithTempRepository.mockResolvedValue(mockChurnData); + mockRedis.set.mockRejectedValue(new Error('Cache write failed')); + + // ACT + const response = await request(app).post('/churn').send({ repoUrl }); + + // ASSERT + expect(response.status).toBe(200); + expect(response.body).toEqual({ churnData: mockChurnData }); + }); + + test('should handle empty churn results', async () => { + // ARRANGE + const repoUrl = 'https://github.com/user/empty-repo.git'; + const emptyChurnData = { + files: [], + metadata: { + totalFiles: 0, + totalChanges: 0, + riskThresholds: { high: 30, medium: 15, low: 0 }, + dateRange: { from: '2023-01-01', to: '2023-12-31' }, + highRiskCount: 0, + mediumRiskCount: 0, + lowRiskCount: 0, + analyzedAt: '2024-01-01T00:00:00Z', + }, + }; + + mockRedis.get.mockResolvedValue(null); + mockWithTempRepository.mockResolvedValue(emptyChurnData); + + // ACT + const response = await request(app).post('/churn').send({ repoUrl }); + + // ASSERT + expect(response.status).toBe(200); + expect(response.body.churnData.files).toHaveLength(0); + expect(response.body.churnData.metadata.totalFiles).toBe(0); + }); + }); + + describe('GET /summary - Get Repository Summary Statistics', () => { + beforeEach(async () => { + vi.clearAllMocks(); + mockMetrics.getUserType.mockReturnValue('anonymous'); + }); + + test('should return repository summary when service succeeds', async () => { + // ARRANGE + const mockSummary = { + repository: { + name: 'Hello-World', + owner: 'octocat', + url: 'https://github.com/octocat/Hello-World.git', + platform: 'github' as const, + }, + created: { + date: '2011-03-22T00:00:00.000Z', + source: 'first-commit' as const, + }, + age: { + years: 13, + months: 8, + formatted: '13.7y', + }, + lastCommit: { + date: '2025-11-15T10:30:00.000Z', + relativeTime: '4 days ago', + sha: 'abc123', + author: 'Test Author', + }, + stats: { + totalCommits: 100, + contributors: 5, + status: 'active' as const, + }, + metadata: { + cached: false, + dataSource: 'git-sparse-clone' as const, + createdDateAccuracy: 'approximate' as const, + bandwidthSaved: '95-99% vs full clone', + lastUpdated: '2025-11-19T10:00:00.000Z', + }, + }; + + mockRepositorySummaryService.getRepositorySummary.mockResolvedValue( + mockSummary + ); + + // ACT + const response = await request(app).get( + '/summary?repoUrl=https://github.com/octocat/Hello-World.git' + ); + + // ASSERT + expect(response.status).toBe(200); + expect(response.body).toEqual({ summary: mockSummary }); + expect( + mockRepositorySummaryService.getRepositorySummary + ).toHaveBeenCalledWith('https://github.com/octocat/Hello-World.git'); + expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( + 'repository_summary', + 'anonymous', + true, + 'api_call' + ); + }); + + test('should return 400 when repoUrl query parameter is missing', async () => { + // ACT + const response = await request(app).get('/summary'); + + // ASSERT + expect(response.status).toBe(400); + expect( + mockRepositorySummaryService.getRepositorySummary + ).not.toHaveBeenCalled(); + expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( + 'repository_summary', + 'anonymous', + false, + 'api_call' + ); + }); + + test('should return 400 when repoUrl is not a string', async () => { + // ACT + const response = await request(app).get('/summary?repoUrl='); + + // ASSERT + expect(response.status).toBe(400); + expect( + mockRepositorySummaryService.getRepositorySummary + ).not.toHaveBeenCalled(); + }); + + test('should return 400 when repoUrl has invalid protocol', async () => { + // ACT + const response = await request(app).get( + '/summary?repoUrl=ftp://invalid.com/repo.git' + ); + + // ASSERT + expect(response.status).toBe(400); + expect( + mockRepositorySummaryService.getRepositorySummary + ).not.toHaveBeenCalled(); + expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( + 'repository_summary', + 'anonymous', + false, + 'api_call' + ); + }); + + test('should handle service errors and return 500', async () => { + // ARRANGE + const serviceError = new Error('Repository not found'); + mockRepositorySummaryService.getRepositorySummary.mockRejectedValue( + serviceError + ); + + // ACT + const response = await request(app).get( + '/summary?repoUrl=https://github.com/test/notfound.git' + ); + + // ASSERT + expect(response.status).toBe(500); + expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( + 'repository_summary', + 'anonymous', + false, + 'api_call' + ); + }); + + test('should record cache hit when summary is cached', async () => { + // ARRANGE + const cachedSummary = { + repository: { + name: 'cached-repo', + owner: 'test', + url: 'https://github.com/test/cached-repo.git', + platform: 'github' as const, + }, + created: { + date: '2020-01-01T00:00:00.000Z', + source: 'first-commit' as const, + }, + age: { + years: 5, + months: 0, + formatted: '5.0y', + }, + lastCommit: { + date: '2025-11-19T00:00:00.000Z', + relativeTime: '1 day ago', + sha: 'def456', + author: 'Cached Author', + }, + stats: { + totalCommits: 500, + contributors: 10, + status: 'active' as const, + }, + metadata: { + cached: true, + dataSource: 'cache' as const, + createdDateAccuracy: 'approximate' as const, + bandwidthSaved: '95-99% vs full clone', + lastUpdated: '2025-11-18T10:00:00.000Z', + }, + }; + + mockRepositorySummaryService.getRepositorySummary.mockResolvedValue( + cachedSummary + ); + + // ACT + const response = await request(app).get( + '/summary?repoUrl=https://github.com/test/cached-repo.git' + ); + + // ASSERT + expect(response.status).toBe(200); + expect(response.body.summary.metadata.cached).toBe(true); + expect(mockMetrics.recordEnhancedCacheOperation).toHaveBeenCalledWith( + 'summary', + true, + expect.any(Object), + 'https://github.com/test/cached-repo.git' + ); + expect(mockMetrics.recordDataFreshness).toHaveBeenCalledWith( + 'summary', + 0, + 'hybrid' + ); + }); + + test('should record cache miss when summary is fetched fresh', async () => { + // ARRANGE + const freshSummary = { + repository: { + name: 'fresh-repo', + owner: 'test', + url: 'https://github.com/test/fresh-repo.git', + platform: 'github' as const, + }, + created: { + date: '2023-01-01T00:00:00.000Z', + source: 'first-commit' as const, + }, + age: { + years: 2, + months: 0, + formatted: '2.0y', + }, + lastCommit: { + date: '2025-11-19T10:00:00.000Z', + relativeTime: 'just now', + sha: 'ghi789', + author: 'Fresh Author', + }, + stats: { + totalCommits: 250, + contributors: 3, + status: 'active' as const, + }, + metadata: { + cached: false, + dataSource: 'git-sparse-clone' as const, + createdDateAccuracy: 'approximate' as const, + bandwidthSaved: '95-99% vs full clone', + lastUpdated: '2025-11-19T10:00:00.000Z', + }, + }; + + mockRepositorySummaryService.getRepositorySummary.mockResolvedValue( + freshSummary + ); + + // ACT + const response = await request(app).get( + '/summary?repoUrl=https://github.com/test/fresh-repo.git' + ); + + // ASSERT + expect(response.status).toBe(200); + expect(response.body.summary.metadata.cached).toBe(false); + expect(mockMetrics.recordEnhancedCacheOperation).toHaveBeenCalledWith( + 'summary', + false, + expect.any(Object), + 'https://github.com/test/fresh-repo.git' + ); + expect(mockMetrics.recordDataFreshness).not.toHaveBeenCalled(); + }); + + test('should handle different user types for summary metrics', async () => { + // ARRANGE + mockMetrics.getUserType.mockReturnValue('premium'); + const mockSummary = { + repository: { + name: 'test', + owner: 'test', + url: 'https://github.com/test/test.git', + platform: 'github' as const, + }, + created: { + date: '2020-01-01T00:00:00.000Z', + source: 'first-commit' as const, + }, + age: { years: 5, months: 0, formatted: '5.0y' }, + lastCommit: { + date: '2025-11-19T00:00:00.000Z', + relativeTime: 'now', + sha: 'abc', + author: 'Test', + }, + stats: { + totalCommits: 100, + contributors: 5, + status: 'active' as const, + }, + metadata: { + cached: false, + dataSource: 'git-sparse-clone' as const, + createdDateAccuracy: 'approximate' as const, + bandwidthSaved: '95-99% vs full clone', + lastUpdated: '2025-11-19T00:00:00.000Z', + }, + }; + + mockRepositorySummaryService.getRepositorySummary.mockResolvedValue( + mockSummary + ); + + // ACT + await request(app).get( + '/summary?repoUrl=https://github.com/test/test.git' + ); + + // ASSERT + expect(mockMetrics.recordFeatureUsage).toHaveBeenCalledWith( + 'repository_summary', + 'premium', + true, + 'api_call' + ); + }); + + test('should handle empty repository (status: empty)', async () => { + // ARRANGE + const emptySummary = { + repository: { + name: 'empty-repo', + owner: 'test', + url: 'https://github.com/test/empty-repo.git', + platform: 'github' as const, + }, + created: { + date: '', + source: 'first-commit' as const, + }, + age: { + years: 0, + months: 0, + formatted: '0.0y', + }, + lastCommit: { + date: '', + relativeTime: 'no commits', + sha: '', + author: '', + }, + stats: { + totalCommits: 0, + contributors: 0, + status: 'empty' as const, + }, + metadata: { + cached: false, + dataSource: 'git-sparse-clone' as const, + createdDateAccuracy: 'approximate' as const, + bandwidthSaved: '95-99% vs full clone', + lastUpdated: '2025-11-19T10:00:00.000Z', + }, + }; + + mockRepositorySummaryService.getRepositorySummary.mockResolvedValue( + emptySummary + ); + + // ACT + const response = await request(app).get( + '/summary?repoUrl=https://github.com/test/empty-repo.git' + ); + + // ASSERT + expect(response.status).toBe(200); + expect(response.body.summary.stats.status).toBe('empty'); + expect(response.body.summary.stats.totalCommits).toBe(0); + expect(response.body.summary.lastCommit.relativeTime).toBe('no commits'); + }); + + test('should handle different repository platforms (GitLab, Bitbucket)', async () => { + // ARRANGE - GitLab + const gitlabSummary = { + repository: { + name: 'gitlab-repo', + owner: 'test', + url: 'https://gitlab.com/test/gitlab-repo.git', + platform: 'gitlab' as const, + }, + created: { + date: '2021-01-01T00:00:00.000Z', + source: 'first-commit' as const, + }, + age: { + years: 4, + months: 0, + formatted: '4.0y', + }, + lastCommit: { + date: '2025-11-01T00:00:00.000Z', + relativeTime: '18 days ago', + sha: 'gitlab123', + author: 'GitLab User', + }, + stats: { + totalCommits: 300, + contributors: 7, + status: 'inactive' as const, + }, + metadata: { + cached: false, + dataSource: 'git-sparse-clone' as const, + createdDateAccuracy: 'approximate' as const, + bandwidthSaved: '95-99% vs full clone', + lastUpdated: '2025-11-19T10:00:00.000Z', + }, + }; + + mockRepositorySummaryService.getRepositorySummary.mockResolvedValue( + gitlabSummary + ); + + // ACT + const response = await request(app).get( + '/summary?repoUrl=https://gitlab.com/test/gitlab-repo.git' + ); + + // ASSERT + expect(response.status).toBe(200); + expect(response.body.summary.repository.platform).toBe('gitlab'); + }); + }); +}); diff --git a/apps/backend/__tests__/unit/services/repositoryCache.unit.test.ts b/apps/backend/__tests__/unit/services/repositoryCache.unit.test.ts index 782314cb..887beaa8 100644 --- a/apps/backend/__tests__/unit/services/repositoryCache.unit.test.ts +++ b/apps/backend/__tests__/unit/services/repositoryCache.unit.test.ts @@ -576,14 +576,9 @@ describe('RepositoryCache - Fast High Coverage', () => { expect(cache.cacheKeyPatterns.size).toBeGreaterThanOrEqual(0); - // Test hash generation (covers lines 1761-1764) - const hash1 = cache.hashUrl('https://github.com/test/repo.git'); - const hash2 = cache.hashUrl('https://github.com/test/repo.git'); - expect(hash1).toBe(hash2); - expect(hash1).toHaveLength(16); - - const objHash = cache.hashObject({ author: 'test', limit: 10 }); - expect(objHash).toHaveLength(8); + // Note: hashUrl and hashObject are utility functions from hashUtils, + // not methods of the cache class, so we don't test them here. + // They are tested in the hashUtils unit tests. }); test('should handle filter edge cases', () => { @@ -864,38 +859,47 @@ describe('RepositoryCache - Fast High Coverage', () => { expect(duration).toBeLessThan(5000); }); - test('lock helper methods return correct lock arrays', () => { - // Test getCommitLocks - const commitLocks = (repositoryCache as any).getCommitLocks( - 'https://github.com/test/repo.git' - ); + test('lock helper methods return correct lock arrays (deadlock prevention)', () => { + const repoUrl = 'https://github.com/test/repo.git'; + + // Test getCommitLocks - should NOT include repo-access + const commitLocks = (repositoryCache as any).getCommitLocks(repoUrl); expect(commitLocks).toEqual([ - 'cache-filtered:https://github.com/test/repo.git', - 'cache-operation:https://github.com/test/repo.git', - 'repo-access:https://github.com/test/repo.git', + `cache-filtered:${repoUrl}`, + `cache-operation:${repoUrl}`, ]); + expect(commitLocks).not.toContain(`repo-access:${repoUrl}`); - // Test getContributorLocks + // Test getContributorLocks - should NOT include repo-access const contributorLocks = (repositoryCache as any).getContributorLocks( - 'https://github.com/test/repo.git' + repoUrl ); expect(contributorLocks).toEqual([ - 'cache-contributors:https://github.com/test/repo.git', - 'cache-filtered:https://github.com/test/repo.git', - 'cache-operation:https://github.com/test/repo.git', - 'repo-access:https://github.com/test/repo.git', + `cache-contributors:${repoUrl}`, + `cache-filtered:${repoUrl}`, + `cache-operation:${repoUrl}`, ]); + expect(contributorLocks).not.toContain(`repo-access:${repoUrl}`); - // Test getAggregatedLocks + // Test getAggregatedLocks - should NOT include repo-access const aggregatedLocks = (repositoryCache as any).getAggregatedLocks( - 'https://github.com/test/repo.git' + repoUrl ); expect(aggregatedLocks).toEqual([ - 'cache-aggregated:https://github.com/test/repo.git', - 'cache-filtered:https://github.com/test/repo.git', - 'cache-operation:https://github.com/test/repo.git', - 'repo-access:https://github.com/test/repo.git', + `cache-aggregated:${repoUrl}`, + `cache-filtered:${repoUrl}`, + `cache-operation:${repoUrl}`, + ]); + expect(aggregatedLocks).not.toContain(`repo-access:${repoUrl}`); + + // Test getChurnLocks - should NOT include repo-access + const churnLocks = (repositoryCache as any).getChurnLocks(repoUrl); + expect(churnLocks).toEqual([ + `cache-churn:${repoUrl}`, + `cache-filtered:${repoUrl}`, + `cache-operation:${repoUrl}`, ]); + expect(churnLocks).not.toContain(`repo-access:${repoUrl}`); }); }); }); diff --git a/apps/backend/__tests__/unit/utils/cacheHelpers.unit.test.ts b/apps/backend/__tests__/unit/utils/cacheHelpers.unit.test.ts new file mode 100644 index 00000000..fcc2ddb0 --- /dev/null +++ b/apps/backend/__tests__/unit/utils/cacheHelpers.unit.test.ts @@ -0,0 +1,330 @@ +/** + * Unit tests for cacheHelpers + * + * Coverage target: ≥80% + * Testing strategy: AAA pattern (Arrange-Act-Assert) + * Focus: Happy path first, then edge cases + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { + safeCacheGet, + handleTransactionError, +} from '../../../src/utils/cacheHelpers'; +import type HybridLRUCache from '../../../src/utils/hybridLruCache'; +import type { Logger } from 'winston'; + +// Mock metrics service +vi.mock('../../../src/services/metrics', () => ({ + recordDetailedError: vi.fn(), + updateServiceHealthScore: vi.fn(), +})); + +describe('cacheHelpers', () => { + let mockCache: HybridLRUCache; + let mockLogger: Logger; + + beforeEach(() => { + vi.clearAllMocks(); + + mockCache = { + get: vi.fn(), + } as any; + + mockLogger = { + error: vi.fn(), + info: vi.fn(), + warn: vi.fn(), + debug: vi.fn(), + } as any; + }); + + describe('safeCacheGet', () => { + describe('Happy Path', () => { + it('should return cached value when key exists', async () => { + // ARRANGE + const testData = { commits: [{ sha: 'abc123' }] }; + (mockCache.get as any).mockResolvedValue(testData); + + // ACT + const result = await safeCacheGet(mockCache, 'test-key', mockLogger); + + // ASSERT + expect(result).toEqual(testData); + expect(mockCache.get).toHaveBeenCalledWith('test-key'); + }); + + it('should return cached value when key exists', async () => { + // ARRANGE + const testData = { commits: [{ sha: 'abc123' }] }; + (mockCache.get as any).mockResolvedValue(testData); + + // ACT + const result = await safeCacheGet(mockCache, 'test-key', mockLogger); + + // ASSERT + expect(result).toEqual(testData); + expect(mockCache.get).toHaveBeenCalledWith('test-key'); + }); + + it('should return null when key does not exist (cache miss)', async () => { + // ARRANGE + (mockCache.get as any).mockResolvedValue(null); + + // ACT + const result = await safeCacheGet(mockCache, 'missing-key', mockLogger); + + // ASSERT + expect(result).toBeNull(); + }); + + it('should work with different data types', async () => { + // ARRANGE - Array + const arrayData = ['item1', 'item2']; + (mockCache.get as any).mockResolvedValue(arrayData); + + // ACT + const result1 = await safeCacheGet(mockCache, 'array-key', mockLogger); + + // ASSERT + expect(result1).toEqual(arrayData); + + // ARRANGE - String + (mockCache.get as any).mockResolvedValue('simple string'); + + // ACT + const result2 = await safeCacheGet(mockCache, 'string-key', mockLogger); + + // ASSERT + expect(result2).toBe('simple string'); + }); + }); + + describe('Error Handling', () => { + it('should return null and log error when cache.get throws Error', async () => { + // ARRANGE + const error = new Error('Cache read error'); + (mockCache.get as any).mockRejectedValue(error); + + // ACT + const result = await safeCacheGet(mockCache, 'error-key', mockLogger, { + operation: 'get', + key: 'error-key', + repoUrl: 'https://github.com/test/repo', + }); + + // ASSERT + expect(result).toBeNull(); + expect(mockLogger.error).toHaveBeenCalledWith( + 'Cache operation failed', + expect.objectContaining({ + operation: 'get', + key: 'error-key', + repoUrl: 'https://github.com/test/repo', + error: 'Cache read error', + }) + ); + }); + + it('should handle non-Error exceptions (string)', async () => { + // ARRANGE + (mockCache.get as any).mockRejectedValue('String error'); + + // ACT + const result = await safeCacheGet(mockCache, 'test-key', mockLogger); + + // ASSERT + expect(result).toBeNull(); + expect(mockLogger.error).toHaveBeenCalledWith( + 'Cache operation failed', + expect.objectContaining({ + error: 'String error', + }) + ); + }); + + it('should use provided context for error logging', async () => { + // ARRANGE + (mockCache.get as any).mockRejectedValue(new Error('Test error')); + + // ACT + await safeCacheGet(mockCache, 'key1', mockLogger, { + operation: 'custom-op', + key: 'custom-key', + repoUrl: 'https://github.com/custom/repo', + }); + + // ASSERT + expect(mockLogger.error).toHaveBeenCalledWith( + 'Cache operation failed', + expect.objectContaining({ + operation: 'custom-op', + key: 'custom-key', + repoUrl: 'https://github.com/custom/repo', + }) + ); + }); + + it('should use default operation and key when context not provided', async () => { + // ARRANGE + (mockCache.get as any).mockRejectedValue(new Error('Test error')); + + // ACT + await safeCacheGet(mockCache, 'test-key', mockLogger); + + // ASSERT + expect(mockLogger.error).toHaveBeenCalledWith( + 'Cache operation failed', + expect.objectContaining({ + operation: 'get', + key: 'test-key', + }) + ); + }); + }); + }); + + describe('handleTransactionError', () => { + describe('Happy Path', () => { + it('should increment metrics, rollback transaction, and rethrow error', async () => { + // ARRANGE + const mockTransaction = { + id: 'tx-123', + operations: [], + }; + const mockMetrics = { + transactions: { failed: 0 }, + }; + const error = new Error('Transaction failed'); + const mockRollback = vi.fn().mockResolvedValue(undefined); + + // ACT & ASSERT + await expect( + handleTransactionError( + mockTransaction, + error, + mockMetrics, + mockLogger, + { + repoUrl: 'https://github.com/test/repo', + operation: 'cache_operation', + transactionId: 'tx-123', + }, + mockRollback + ) + ).rejects.toThrow('Transaction failed'); + + // ASSERT - Metrics incremented + expect(mockMetrics.transactions.failed).toBe(1); + + // ASSERT - Rollback called + expect(mockRollback).toHaveBeenCalledWith(mockTransaction); + + // ASSERT - Error logged + expect(mockLogger.error).toHaveBeenCalledWith( + 'Failed to cache_operation, transaction rolled back', + expect.objectContaining({ + repoUrl: 'https://github.com/test/repo', + transactionId: 'tx-123', + error: 'Transaction failed', + }) + ); + }); + + it('should call rollback function before rethrowing', async () => { + // ARRANGE + const mockTransaction = { id: 'tx-789', operations: [] }; + const mockMetrics = { transactions: { failed: 0 } }; + const error = new Error('Rollback test'); + let rollbackCalled = false; + const mockRollback = vi.fn(async () => { + rollbackCalled = true; + }); + + // ACT & ASSERT + try { + await handleTransactionError( + mockTransaction, + error, + mockMetrics, + mockLogger, + { + repoUrl: 'https://github.com/test/repo', + operation: 'test', + transactionId: 'tx-789', + }, + mockRollback + ); + } catch (e) { + // Expected to throw + } + + // ASSERT + expect(rollbackCalled).toBe(true); + expect(mockRollback).toHaveBeenCalledBefore(mockLogger.error as any); + }); + }); + + describe('Error Handling', () => { + it('should handle non-Error exceptions (string)', async () => { + // ARRANGE + const mockTransaction = { id: 'tx-456', operations: [] }; + const mockMetrics = { transactions: { failed: 5 } }; + const mockRollback = vi.fn().mockResolvedValue(undefined); + + // ACT & ASSERT + await expect( + handleTransactionError( + mockTransaction, + 'String error', + mockMetrics, + mockLogger, + { + repoUrl: 'https://github.com/test/repo', + operation: 'test_op', + transactionId: 'tx-456', + }, + mockRollback + ) + ).rejects.toThrow(); + + // ASSERT + expect(mockMetrics.transactions.failed).toBe(6); + expect(mockLogger.error).toHaveBeenCalledWith( + 'Failed to test_op, transaction rolled back', + expect.objectContaining({ + error: 'String error', + }) + ); + }); + + it('should increment failed counter from any starting value', async () => { + // ARRANGE + const mockTransaction = { id: 'tx-999', operations: [] }; + const mockMetrics = { transactions: { failed: 42 } }; + const mockRollback = vi.fn().mockResolvedValue(undefined); + + // ACT + try { + await handleTransactionError( + mockTransaction, + new Error('Test'), + mockMetrics, + mockLogger, + { + repoUrl: 'https://github.com/test/repo', + operation: 'increment_test', + transactionId: 'tx-999', + }, + mockRollback + ); + } catch (e) { + // Expected + } + + // ASSERT + expect(mockMetrics.transactions.failed).toBe(43); + }); + }); + }); +}); diff --git a/apps/backend/__tests__/unit/utils/gitUtils.unit.test.ts b/apps/backend/__tests__/unit/utils/gitUtils.unit.test.ts new file mode 100644 index 00000000..741a07ba --- /dev/null +++ b/apps/backend/__tests__/unit/utils/gitUtils.unit.test.ts @@ -0,0 +1,236 @@ +/** + * Unit tests for gitUtils + * + * Coverage target: ≥80% + * Testing strategy: AAA pattern (Arrange-Act-Assert) + * Focus: Happy path first, then edge cases + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { shallowClone } from '../../../src/utils/gitUtils'; +import simpleGit from 'simple-git'; + +// Mock simple-git +vi.mock('simple-git'); + +// Mock config +vi.mock('../../../src/config', () => ({ + config: { + git: { + cloneDepth: 50, + }, + }, +})); + +describe('gitUtils', () => { + describe('shallowClone', () => { + let mockGit: any; + + beforeEach(() => { + vi.clearAllMocks(); + + // Create mock git instance + mockGit = { + init: vi.fn().mockResolvedValue(undefined), + addRemote: vi.fn().mockResolvedValue(undefined), + raw: vi.fn().mockResolvedValue(undefined), + }; + + // Mock simpleGit to return our mock instance + (simpleGit as any).mockReturnValue(mockGit); + }); + + describe('Happy Path', () => { + it('should clone repository with blob filtering for complete history', async () => { + // ARRANGE + const repoUrl = 'https://github.com/test/repo.git'; + const targetDir = '/tmp/test-repo'; + + // ACT + await shallowClone(repoUrl, targetDir); + + // ASSERT - Verify git commands called in correct order + expect(simpleGit).toHaveBeenCalledWith(targetDir); + expect(mockGit.init).toHaveBeenCalled(); + expect(mockGit.addRemote).toHaveBeenCalledWith('origin', repoUrl); + + // Verify sparse checkout configuration + expect(mockGit.raw).toHaveBeenCalledWith([ + 'config', + 'core.sparseCheckout', + 'true', + ]); + + // Verify fetch with blob filtering + expect(mockGit.raw).toHaveBeenCalledWith([ + 'fetch', + '--filter=blob:none', + '--no-tags', + 'origin', + 'HEAD', + ]); + + // Verify checkout + expect(mockGit.raw).toHaveBeenCalledWith(['checkout', 'FETCH_HEAD']); + }); + + it('should use default depth from config', async () => { + // ARRANGE + const repoUrl = 'https://github.com/test/repo.git'; + const targetDir = '/tmp/test-repo'; + + // ACT + await shallowClone(repoUrl, targetDir); + + // ASSERT - depth parameter is ignored in new implementation + // but function should still work + expect(mockGit.init).toHaveBeenCalled(); + expect(mockGit.addRemote).toHaveBeenCalledWith('origin', repoUrl); + }); + + it('should clone with custom depth parameter (legacy parameter, not used)', async () => { + // ARRANGE + const repoUrl = 'https://github.com/test/repo.git'; + const targetDir = '/tmp/test-repo'; + const customDepth = 100; + + // ACT + await shallowClone(repoUrl, targetDir, customDepth); + + // ASSERT - Even with custom depth, blob filtering is used + expect(mockGit.raw).toHaveBeenCalledWith([ + 'fetch', + '--filter=blob:none', + '--no-tags', + 'origin', + 'HEAD', + ]); + }); + + it('should work with different repository URLs', async () => { + // ARRANGE + const testCases = [ + 'https://github.com/owner/repo.git', + 'https://gitlab.com/group/project.git', + 'https://bitbucket.org/user/repository.git', + ]; + + for (const repoUrl of testCases) { + vi.clearAllMocks(); + + // ACT + await shallowClone(repoUrl, '/tmp/test'); + + // ASSERT + expect(mockGit.addRemote).toHaveBeenCalledWith('origin', repoUrl); + } + }); + + it('should work with different target directories', async () => { + // ARRANGE + const repoUrl = 'https://github.com/test/repo.git'; + const testCases = [ + '/tmp/dir1', + '/var/repos/project', + '/home/user/workspace/repo', + ]; + + for (const targetDir of testCases) { + vi.clearAllMocks(); + + // ACT + await shallowClone(repoUrl, targetDir); + + // ASSERT + expect(simpleGit).toHaveBeenCalledWith(targetDir); + } + }); + + it('should execute git operations in correct sequence', async () => { + // ARRANGE + const repoUrl = 'https://github.com/test/repo.git'; + const targetDir = '/tmp/test-repo'; + const callOrder: string[] = []; + + mockGit.init.mockImplementation(() => { + callOrder.push('init'); + return Promise.resolve(); + }); + mockGit.addRemote.mockImplementation(() => { + callOrder.push('addRemote'); + return Promise.resolve(); + }); + mockGit.raw.mockImplementation((args: string[]) => { + callOrder.push(`raw-${args[0]}`); + return Promise.resolve(); + }); + + // ACT + await shallowClone(repoUrl, targetDir); + + // ASSERT - Verify execution order + expect(callOrder).toEqual([ + 'init', + 'addRemote', + 'raw-config', + 'raw-fetch', + 'raw-checkout', + ]); + }); + }); + + describe('Error Handling', () => { + it('should propagate error if git init fails', async () => { + // ARRANGE + const error = new Error('Git init failed'); + mockGit.init.mockRejectedValue(error); + + // ACT & ASSERT + await expect( + shallowClone('https://github.com/test/repo.git', '/tmp/test') + ).rejects.toThrow('Git init failed'); + }); + + it('should propagate error if addRemote fails', async () => { + // ARRANGE + const error = new Error('Failed to add remote'); + mockGit.addRemote.mockRejectedValue(error); + + // ACT & ASSERT + await expect( + shallowClone('https://github.com/test/repo.git', '/tmp/test') + ).rejects.toThrow('Failed to add remote'); + }); + + it('should propagate error if fetch fails', async () => { + // ARRANGE + mockGit.raw.mockImplementation((args: string[]) => { + if (args[0] === 'fetch') { + return Promise.reject(new Error('Fetch failed')); + } + return Promise.resolve(); + }); + + // ACT & ASSERT + await expect( + shallowClone('https://github.com/test/repo.git', '/tmp/test') + ).rejects.toThrow('Fetch failed'); + }); + + it('should propagate error if checkout fails', async () => { + // ARRANGE + mockGit.raw.mockImplementation((args: string[]) => { + if (args[0] === 'checkout') { + return Promise.reject(new Error('Checkout failed')); + } + return Promise.resolve(); + }); + + // ACT & ASSERT + await expect( + shallowClone('https://github.com/test/repo.git', '/tmp/test') + ).rejects.toThrow('Checkout failed'); + }); + }); + }); +}); diff --git a/apps/backend/__tests__/unit/utils/repositoryRouteFactory.unit.test.ts b/apps/backend/__tests__/unit/utils/repositoryRouteFactory.unit.test.ts new file mode 100644 index 00000000..add25987 --- /dev/null +++ b/apps/backend/__tests__/unit/utils/repositoryRouteFactory.unit.test.ts @@ -0,0 +1,503 @@ +/** + * Unit Tests for Repository Route Factory + * + * Focus: Route handler factory and validation chain builder + * Pattern: AAA (Arrange-Act-Assert), Happy Path First + */ + +import { describe, it, expect, vi, beforeEach, type Mock } from 'vitest'; +import type { Request, Response, NextFunction } from 'express'; +import type { ValidationChain } from 'express-validator'; +import { + createCachedRouteHandler, + buildRepoValidationChain, + type RouteContext, + type SuccessMetricsBuilder, + type RouteProcessor, + type ValidationChainOptions, +} from '../../../src/utils/repositoryRouteFactory.js'; + +// Mock dependencies +vi.mock('../../../src/utils/routeHelpers.js', () => ({ + setupRouteRequest: vi.fn((req: Request) => ({ + logger: { + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }, + repoUrl: req.body.repoUrl || 'https://github.com/test/repo.git', + userType: 'authenticated', + })), + recordRouteSuccess: vi.fn( + ( + featureName: string, + userType: string, + logger: unknown, + repoUrl: string, + result: unknown, + res: Response, + metrics: unknown + ) => { + res.json(result); + } + ), + recordRouteError: vi.fn( + ( + featureName: string, + userType: string, + logger: unknown, + repoUrl: string, + error: unknown, + next: NextFunction + ) => { + next(error); + } + ), +})); + +describe('Repository Route Factory', () => { + describe('createCachedRouteHandler', () => { + let mockReq: Partial; + let mockRes: Partial; + let mockNext: Mock; + + beforeEach(() => { + mockReq = { + body: { repoUrl: 'https://github.com/test/repo.git' }, + query: {}, + }; + mockRes = { + json: vi.fn(), + status: vi.fn().mockReturnThis(), + }; + mockNext = vi.fn(); + }); + + // Happy Path Tests + it('should create a route handler that processes successfully', async () => { + // ARRANGE + const processor: RouteProcessor<{ data: string }> = vi + .fn() + .mockResolvedValue({ data: 'test-data' }); + const buildMetrics: SuccessMetricsBuilder<{ data: string }> = vi + .fn() + .mockReturnValue({ dataLength: 9 }); + + const handlers = createCachedRouteHandler( + 'test_feature', + processor, + buildMetrics + ); + + // ACT + await handlers[0]( + mockReq as Request, + mockRes as Response, + mockNext as NextFunction + ); + + // ASSERT + expect(handlers).toHaveLength(1); + expect(processor).toHaveBeenCalledWith( + expect.objectContaining({ + req: mockReq, + repoUrl: 'https://github.com/test/repo.git', + userType: 'authenticated', + }) + ); + expect(buildMetrics).toHaveBeenCalledWith({ data: 'test-data' }); + expect(mockRes.json).toHaveBeenCalledWith({ data: 'test-data' }); + }); + + it('should handle different feature names', async () => { + // ARRANGE + const processor: RouteProcessor<{ count: number }> = vi + .fn() + .mockResolvedValue({ count: 42 }); + const buildMetrics: SuccessMetricsBuilder<{ count: number }> = vi + .fn() + .mockReturnValue({ itemCount: 42 }); + + const handlers1 = createCachedRouteHandler( + 'feature_one', + processor, + buildMetrics + ); + const handlers2 = createCachedRouteHandler( + 'feature_two', + processor, + buildMetrics + ); + + // ACT + await handlers1[0]( + mockReq as Request, + mockRes as Response, + mockNext as NextFunction + ); + await handlers2[0]( + mockReq as Request, + mockRes as Response, + mockNext as NextFunction + ); + + // ASSERT + expect(processor).toHaveBeenCalledTimes(2); + expect(mockRes.json).toHaveBeenCalledTimes(2); + }); + + it('should pass route context with logger to processor', async () => { + // ARRANGE + let capturedContext: RouteContext | null = null; + const processor: RouteProcessor<{ result: string }> = vi + .fn() + .mockImplementation((ctx: RouteContext) => { + capturedContext = ctx; + return Promise.resolve({ result: 'ok' }); + }); + const buildMetrics: SuccessMetricsBuilder<{ result: string }> = vi + .fn() + .mockReturnValue({}); + + const handlers = createCachedRouteHandler( + 'test', + processor, + buildMetrics + ); + + // ACT + await handlers[0]( + mockReq as Request, + mockRes as Response, + mockNext as NextFunction + ); + + // ASSERT + expect(capturedContext).not.toBeNull(); + expect( + (capturedContext as unknown as RouteContext).logger + ).toHaveProperty('info'); + expect( + (capturedContext as unknown as RouteContext).logger + ).toHaveProperty('error'); + expect((capturedContext as unknown as RouteContext).repoUrl).toBe( + 'https://github.com/test/repo.git' + ); + expect((capturedContext as unknown as RouteContext).userType).toBe( + 'authenticated' + ); + }); + + // Error Handling Tests + it('should handle processor errors gracefully', async () => { + // ARRANGE + const testError = new Error('Processor failed'); + const processor: RouteProcessor<{ data: string }> = vi + .fn() + .mockRejectedValue(testError); + const buildMetrics: SuccessMetricsBuilder<{ data: string }> = vi.fn(); + + const handlers = createCachedRouteHandler( + 'failing_feature', + processor, + buildMetrics + ); + + // ACT + await handlers[0]( + mockReq as Request, + mockRes as Response, + mockNext as NextFunction + ); + + // ASSERT + expect(processor).toHaveBeenCalled(); + expect(buildMetrics).not.toHaveBeenCalled(); + expect(mockNext).toHaveBeenCalledWith(testError); + }); + + it('should call error handler when processor throws', async () => { + // ARRANGE + const processor: RouteProcessor = vi + .fn() + .mockRejectedValue(new Error('Cache error')); + const buildMetrics: SuccessMetricsBuilder = vi.fn(); + + const handlers = createCachedRouteHandler( + 'error_test', + processor, + buildMetrics + ); + + // ACT + await handlers[0]( + mockReq as Request, + mockRes as Response, + mockNext as NextFunction + ); + + // ASSERT + expect(mockNext).toHaveBeenCalled(); + expect(mockNext.mock.calls[0]?.[0]).toBeInstanceOf(Error); + }); + + // Edge Cases + it('should handle empty metrics from buildMetrics', async () => { + // ARRANGE + const processor: RouteProcessor<{ data: string }> = vi + .fn() + .mockResolvedValue({ data: 'test' }); + const buildMetrics: SuccessMetricsBuilder<{ data: string }> = vi + .fn() + .mockReturnValue({}); + + const handlers = createCachedRouteHandler( + 'empty_metrics', + processor, + buildMetrics + ); + + // ACT + await handlers[0]( + mockReq as Request, + mockRes as Response, + mockNext as NextFunction + ); + + // ASSERT + expect(buildMetrics).toHaveBeenCalledWith({ data: 'test' }); + expect(mockRes.json).toHaveBeenCalledWith({ data: 'test' }); + }); + + it('should handle complex result types', async () => { + // ARRANGE + const complexResult = { + commits: [{ sha: 'abc123' }, { sha: 'def456' }], + metadata: { total: 2, page: 1 }, + }; + const processor: RouteProcessor = vi + .fn() + .mockResolvedValue(complexResult); + const buildMetrics: SuccessMetricsBuilder = vi + .fn() + .mockReturnValue({ commitCount: 2, page: 1 }); + + const handlers = createCachedRouteHandler( + 'complex', + processor, + buildMetrics + ); + + // ACT + await handlers[0]( + mockReq as Request, + mockRes as Response, + mockNext as NextFunction + ); + + // ASSERT + expect(processor).toHaveBeenCalled(); + expect(buildMetrics).toHaveBeenCalledWith(complexResult); + expect(mockRes.json).toHaveBeenCalledWith(complexResult); + }); + }); + + describe('buildRepoValidationChain', () => { + let mockValidators: { + repoUrlValidation: () => ValidationChain[]; + paginationValidation: () => ValidationChain[]; + dateValidation: () => ValidationChain[]; + authorValidation: () => ValidationChain[]; + churnValidation: () => ValidationChain[]; + }; + + beforeEach(() => { + mockValidators = { + repoUrlValidation: vi + .fn() + .mockReturnValue([{ name: 'repoUrl' } as unknown as ValidationChain]), + paginationValidation: vi + .fn() + .mockReturnValue([ + { name: 'page' } as unknown as ValidationChain, + { name: 'limit' } as unknown as ValidationChain, + ]), + dateValidation: vi + .fn() + .mockReturnValue([ + { name: 'fromDate' } as unknown as ValidationChain, + { name: 'toDate' } as unknown as ValidationChain, + ]), + authorValidation: vi + .fn() + .mockReturnValue([{ name: 'author' } as unknown as ValidationChain]), + churnValidation: vi + .fn() + .mockReturnValue([ + { name: 'minChanges' } as unknown as ValidationChain, + ]), + }; + }); + + // Happy Path Tests + it('should include only repoUrl validation by default', () => { + // ARRANGE + const options: ValidationChainOptions = {}; + + // ACT + const chain = buildRepoValidationChain(options, mockValidators); + + // ASSERT + expect(chain).toHaveLength(1); + expect(mockValidators.repoUrlValidation).toHaveBeenCalled(); + expect(mockValidators.paginationValidation).not.toHaveBeenCalled(); + }); + + it('should include pagination validation when requested', () => { + // ARRANGE + const options: ValidationChainOptions = { includePagination: true }; + + // ACT + const chain = buildRepoValidationChain(options, mockValidators); + + // ASSERT + expect(chain).toHaveLength(3); // repoUrl + page + limit + expect(mockValidators.repoUrlValidation).toHaveBeenCalled(); + expect(mockValidators.paginationValidation).toHaveBeenCalled(); + }); + + it('should include date validation when requested', () => { + // ARRANGE + const options: ValidationChainOptions = { includeDates: true }; + + // ACT + const chain = buildRepoValidationChain(options, mockValidators); + + // ASSERT + expect(chain).toHaveLength(3); // repoUrl + fromDate + toDate + expect(mockValidators.dateValidation).toHaveBeenCalled(); + }); + + it('should include author validation when requested', () => { + // ARRANGE + const options: ValidationChainOptions = { includeAuthors: true }; + + // ACT + const chain = buildRepoValidationChain(options, mockValidators); + + // ASSERT + expect(chain).toHaveLength(2); // repoUrl + author + expect(mockValidators.authorValidation).toHaveBeenCalled(); + }); + + it('should include churn validation when requested', () => { + // ARRANGE + const options: ValidationChainOptions = { includeChurn: true }; + + // ACT + const chain = buildRepoValidationChain(options, mockValidators); + + // ASSERT + expect(chain).toHaveLength(2); // repoUrl + minChanges + expect(mockValidators.churnValidation).toHaveBeenCalled(); + }); + + it('should combine multiple validation types', () => { + // ARRANGE + const options: ValidationChainOptions = { + includePagination: true, + includeDates: true, + }; + + // ACT + const chain = buildRepoValidationChain(options, mockValidators); + + // ASSERT + expect(chain).toHaveLength(5); // repoUrl + page + limit + fromDate + toDate + expect(mockValidators.repoUrlValidation).toHaveBeenCalled(); + expect(mockValidators.paginationValidation).toHaveBeenCalled(); + expect(mockValidators.dateValidation).toHaveBeenCalled(); + }); + + it('should include all validation types when all requested', () => { + // ARRANGE + const options: ValidationChainOptions = { + includePagination: true, + includeDates: true, + includeAuthors: true, + includeChurn: true, + }; + + // ACT + const chain = buildRepoValidationChain(options, mockValidators); + + // ASSERT + expect(chain).toHaveLength(7); // repoUrl(1) + pagination(2) + dates(2) + author(1) + churn(1) = 7 + expect(mockValidators.repoUrlValidation).toHaveBeenCalled(); + expect(mockValidators.paginationValidation).toHaveBeenCalled(); + expect(mockValidators.dateValidation).toHaveBeenCalled(); + expect(mockValidators.authorValidation).toHaveBeenCalled(); + expect(mockValidators.churnValidation).toHaveBeenCalled(); + }); + + // Edge Cases + it('should handle missing optional validators gracefully', () => { + // ARRANGE + const options: ValidationChainOptions = { includePagination: true }; + const partialValidators = { + repoUrlValidation: vi + .fn() + .mockReturnValue([{ name: 'repoUrl' } as unknown as ValidationChain]), + }; + + // ACT + const chain = buildRepoValidationChain(options, partialValidators); + + // ASSERT + expect(chain).toHaveLength(1); // Only repoUrl since pagination validator missing + }); + + it('should maintain correct order of validators', () => { + // ARRANGE + const options: ValidationChainOptions = { + includeChurn: true, + includeAuthors: true, + includeDates: true, + includePagination: true, + }; + + // ACT + const chain = buildRepoValidationChain(options, mockValidators); + + // ASSERT - Order should be: repoUrl, pagination, dates, authors, churn + expect(chain[0]).toEqual({ name: 'repoUrl' }); + expect(chain[1]).toEqual({ name: 'page' }); + expect(chain[2]).toEqual({ name: 'limit' }); + expect(chain[3]).toEqual({ name: 'fromDate' }); + expect(chain[4]).toEqual({ name: 'toDate' }); + expect(chain[5]).toEqual({ name: 'author' }); + expect(chain[6]).toEqual({ name: 'minChanges' }); + }); + + it('should handle false flags correctly', () => { + // ARRANGE + const options: ValidationChainOptions = { + includePagination: false, + includeDates: false, + includeAuthors: false, + includeChurn: false, + }; + + // ACT + const chain = buildRepoValidationChain(options, mockValidators); + + // ASSERT + expect(chain).toHaveLength(1); // Only repoUrl + expect(mockValidators.paginationValidation).not.toHaveBeenCalled(); + expect(mockValidators.dateValidation).not.toHaveBeenCalled(); + expect(mockValidators.authorValidation).not.toHaveBeenCalled(); + expect(mockValidators.churnValidation).not.toHaveBeenCalled(); + }); + }); +}); diff --git a/apps/backend/__tests__/unit/utils/routeHelpers.unit.test.ts b/apps/backend/__tests__/unit/utils/routeHelpers.unit.test.ts new file mode 100644 index 00000000..0ea91393 --- /dev/null +++ b/apps/backend/__tests__/unit/utils/routeHelpers.unit.test.ts @@ -0,0 +1,745 @@ +/** + * Unit tests for routeHelpers + * + * Coverage target: ≥80% + * Testing strategy: AAA pattern (Arrange-Act-Assert) + * Focus: Happy path first, then edge cases + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { + setupRouteRequest, + recordRouteSuccess, + recordRouteError, + buildCommitFilters, + extractPaginationParams, + extractFilterParams, + buildChurnFilters, +} from '../../../src/utils/routeHelpers'; +import type { Request, Response } from 'express'; + +// Mock dependencies +vi.mock('../../../src/services/logger', () => ({ + createRequestLogger: vi.fn(() => ({ + info: vi.fn(), + error: vi.fn(), + warn: vi.fn(), + debug: vi.fn(), + })), +})); + +vi.mock('../../../src/services/metrics', () => ({ + getUserType: vi.fn(() => 'anonymous'), + recordFeatureUsage: vi.fn(), +})); + +vi.mock('@gitray/shared-types', () => ({ + HTTP_STATUS: { + OK: 200, + }, + CommitFilterOptions: {}, + ChurnFilterOptions: {}, +})); + +describe('routeHelpers', () => { + describe('setupRouteRequest', () => { + describe('Happy Path', () => { + it('should extract logger, repoUrl, and userType from request', () => { + // ARRANGE + const mockReq = { + query: { repoUrl: 'https://github.com/test/repo.git' }, + } as any as Request; + + // ACT + const result = setupRouteRequest(mockReq); + + // ASSERT + expect(result).toHaveProperty('logger'); + expect(result).toHaveProperty( + 'repoUrl', + 'https://github.com/test/repo.git' + ); + expect(result).toHaveProperty('userType', 'anonymous'); + }); + + it('should handle different repository URLs', () => { + // ARRANGE + const testUrls = [ + 'https://github.com/owner/repo.git', + 'https://gitlab.com/group/project.git', + 'https://bitbucket.org/user/repository.git', + ]; + + for (const url of testUrls) { + const mockReq = { + query: { repoUrl: url }, + } as any as Request; + + // ACT + const result = setupRouteRequest(mockReq); + + // ASSERT + expect(result.repoUrl).toBe(url); + } + }); + }); + }); + + describe('recordRouteSuccess', () => { + let mockRes: any; + let mockLogger: any; + + beforeEach(() => { + vi.clearAllMocks(); + + mockRes = { + status: vi.fn().mockReturnThis(), + json: vi.fn(), + }; + + mockLogger = { + info: vi.fn(), + error: vi.fn(), + }; + }); + + describe('Happy Path', () => { + it('should record metrics, log success, and send response', () => { + // ARRANGE + const data = { commits: [{ sha: 'abc123' }] }; + const additionalLogData = { commitCount: 1 }; + + // ACT + recordRouteSuccess( + 'repository_commits', + 'anonymous', + mockLogger, + 'https://github.com/test/repo.git', + data, + mockRes, + additionalLogData + ); + + // ASSERT - Logger called + expect(mockLogger.info).toHaveBeenCalledWith( + 'repository_commits retrieved successfully', + expect.objectContaining({ + repoUrl: 'https://github.com/test/repo.git', + commitCount: 1, + }) + ); + + // ASSERT - Response sent + expect(mockRes.status).toHaveBeenCalledWith(200); + expect(mockRes.json).toHaveBeenCalledWith(data); + }); + + it('should work without additional log data', () => { + // ARRANGE + const data = { heatmap: [] }; + + // ACT + recordRouteSuccess( + 'heatmap_view', + 'authenticated', + mockLogger, + 'https://github.com/test/repo.git', + data, + mockRes + ); + + // ASSERT + expect(mockLogger.info).toHaveBeenCalledWith( + 'heatmap_view retrieved successfully', + expect.objectContaining({ + repoUrl: 'https://github.com/test/repo.git', + }) + ); + expect(mockRes.json).toHaveBeenCalledWith(data); + }); + + it('should handle different feature names', () => { + // ARRANGE + const features = [ + 'repository_commits', + 'heatmap_view', + 'top_contributors', + 'code_churn', + ]; + + for (const feature of features) { + vi.clearAllMocks(); + + // ACT + recordRouteSuccess( + feature, + 'anonymous', + mockLogger, + 'https://github.com/test/repo.git', + {}, + mockRes + ); + + // ASSERT + expect(mockLogger.info).toHaveBeenCalledWith( + `${feature} retrieved successfully`, + expect.any(Object) + ); + } + }); + + it('should handle different user types', () => { + // ARRANGE + const userTypes = ['anonymous', 'authenticated', 'admin']; + + for (const userType of userTypes) { + vi.clearAllMocks(); + + // ACT + recordRouteSuccess( + 'test_feature', + userType, + mockLogger, + 'https://github.com/test/repo.git', + {}, + mockRes + ); + + // ASSERT - Should complete without errors + expect(mockRes.json).toHaveBeenCalled(); + } + }); + }); + }); + + describe('recordRouteError', () => { + let mockLogger: any; + let mockNext: any; + + beforeEach(() => { + vi.clearAllMocks(); + + mockLogger = { + error: vi.fn(), + }; + + mockNext = vi.fn(); + }); + + describe('Happy Path', () => { + it('should log error and call next with Error object', () => { + // ARRANGE + const error = new Error('Repository not found'); + + // ACT + recordRouteError( + 'repository_commits', + 'anonymous', + mockLogger, + 'https://github.com/test/repo.git', + error, + mockNext + ); + + // ASSERT - Error logged + expect(mockLogger.error).toHaveBeenCalledWith( + 'Failed to retrieve repository_commits', + expect.objectContaining({ + repoUrl: 'https://github.com/test/repo.git', + error: 'Repository not found', + }) + ); + + // ASSERT - Error propagated + expect(mockNext).toHaveBeenCalledWith(error); + }); + + it('should handle non-Error exceptions (string)', () => { + // ARRANGE + const error = 'String error message'; + + // ACT + recordRouteError( + 'heatmap_view', + 'anonymous', + mockLogger, + 'https://github.com/test/repo.git', + error, + mockNext + ); + + // ASSERT + expect(mockLogger.error).toHaveBeenCalledWith( + 'Failed to retrieve heatmap_view', + expect.objectContaining({ + error: 'String error message', + }) + ); + expect(mockNext).toHaveBeenCalledWith(error); + }); + + it('should handle different error types', () => { + // ARRANGE + const testErrors = [ + new Error('Test error'), + 'String error', + { message: 'Object error' }, + 42, + ]; + + for (const error of testErrors) { + vi.clearAllMocks(); + + // ACT + recordRouteError( + 'test_feature', + 'anonymous', + mockLogger, + 'https://github.com/test/repo.git', + error, + mockNext + ); + + // ASSERT + expect(mockNext).toHaveBeenCalledWith(error); + } + }); + }); + }); + + describe('buildCommitFilters', () => { + describe('Happy Path', () => { + it('should build filter with all properties defined', () => { + // ARRANGE + const query = { + author: 'john', + authors: 'john,jane,bob', + fromDate: '2024-01-01', + toDate: '2024-12-31', + }; + + // ACT + const result = buildCommitFilters(query); + + // ASSERT + expect(result).toEqual({ + author: 'john', + authors: ['john', 'jane', 'bob'], + fromDate: '2024-01-01', + toDate: '2024-12-31', + }); + }); + + it('should build filter with only author', () => { + // ARRANGE + const query = { author: 'alice' }; + + // ACT + const result = buildCommitFilters(query); + + // ASSERT + expect(result).toEqual({ author: 'alice' }); + }); + + it('should build filter with only date range', () => { + // ARRANGE + const query = { + fromDate: '2024-01-01', + toDate: '2024-06-30', + }; + + // ACT + const result = buildCommitFilters(query); + + // ASSERT + expect(result).toEqual({ + fromDate: '2024-01-01', + toDate: '2024-06-30', + }); + }); + + it('should return empty object when no filters provided', () => { + // ARRANGE + const query = {}; + + // ACT + const result = buildCommitFilters(query); + + // ASSERT + expect(result).toEqual({}); + }); + + it('should trim whitespace from authors list', () => { + // ARRANGE + const query = { + authors: 'alice , bob , charlie ', + }; + + // ACT + const result = buildCommitFilters(query); + + // ASSERT + expect(result).toEqual({ + authors: ['alice', 'bob', 'charlie'], + }); + }); + + it('should handle single author in authors list', () => { + // ARRANGE + const query = { + authors: 'alice', + }; + + // ACT + const result = buildCommitFilters(query); + + // ASSERT + expect(result).toEqual({ + authors: ['alice'], + }); + }); + }); + + describe('Edge Cases', () => { + it('should exclude undefined properties', () => { + // ARRANGE + const query = { + author: 'john', + authors: undefined, + fromDate: undefined, + toDate: '2024-12-31', + }; + + // ACT + const result = buildCommitFilters(query); + + // ASSERT + expect(result).toEqual({ + author: 'john', + toDate: '2024-12-31', + }); + expect(result).not.toHaveProperty('authors'); + expect(result).not.toHaveProperty('fromDate'); + }); + }); + }); + + describe('extractPaginationParams', () => { + describe('Happy Path', () => { + it('should extract page and limit with skip calculation', () => { + // ARRANGE + const query = { + page: '2', + limit: '50', + }; + + // ACT + const result = extractPaginationParams(query); + + // ASSERT + expect(result).toEqual({ + page: 2, + limit: 50, + skip: 50, // (2-1) * 50 + }); + }); + + it('should use default values when not provided', () => { + // ARRANGE + const query = {}; + + // ACT + const result = extractPaginationParams(query); + + // ASSERT + expect(result).toEqual({ + page: 1, + limit: 100, + skip: 0, + }); + }); + + it('should handle page 1 with default limit', () => { + // ARRANGE + const query = { page: '1' }; + + // ACT + const result = extractPaginationParams(query); + + // ASSERT + expect(result).toEqual({ + page: 1, + limit: 100, + skip: 0, + }); + }); + + it('should calculate correct skip for different pages', () => { + // ARRANGE + const testCases = [ + { + query: { page: '1', limit: '10' }, + expected: { page: 1, limit: 10, skip: 0 }, + }, + { + query: { page: '2', limit: '10' }, + expected: { page: 2, limit: 10, skip: 10 }, + }, + { + query: { page: '5', limit: '25' }, + expected: { page: 5, limit: 25, skip: 100 }, + }, + { + query: { page: '10', limit: '20' }, + expected: { page: 10, limit: 20, skip: 180 }, + }, + ]; + + for (const testCase of testCases) { + // ACT + const result = extractPaginationParams(testCase.query); + + // ASSERT + expect(result).toEqual(testCase.expected); + } + }); + }); + + describe('Edge Cases', () => { + it('should handle invalid page as default', () => { + // ARRANGE + const query = { page: 'invalid', limit: '20' }; + + // ACT + const result = extractPaginationParams(query); + + // ASSERT + expect(result.page).toBe(1); + expect(result.limit).toBe(20); + expect(result.skip).toBe(0); + }); + + it('should handle invalid limit as default', () => { + // ARRANGE + const query = { page: '3', limit: 'invalid' }; + + // ACT + const result = extractPaginationParams(query); + + // ASSERT + expect(result.page).toBe(3); + expect(result.limit).toBe(100); + expect(result.skip).toBe(200); + }); + + it('should handle zero page value (falls back to 1)', () => { + // ARRANGE + const query = { page: '0' }; + + // ACT + const result = extractPaginationParams(query); + + // ASSERT + // parseInt('0') || 1 = 1 (0 is falsy, so || returns 1) + expect(result.page).toBe(1); + expect(result.limit).toBe(100); // default + expect(result.skip).toBe(0); // (1-1) * 100 + }); + }); + }); + + describe('extractFilterParams', () => { + describe('Happy Path', () => { + it('should extract all filter parameters', () => { + // ARRANGE + const query = { + author: 'john', + authors: 'john,jane', + fromDate: '2024-01-01', + toDate: '2024-12-31', + }; + + // ACT + const result = extractFilterParams(query); + + // ASSERT + expect(result).toEqual({ + author: 'john', + authors: 'john,jane', + fromDate: '2024-01-01', + toDate: '2024-12-31', + }); + }); + + it('should handle missing parameters as undefined', () => { + // ARRANGE + const query = { + author: 'alice', + }; + + // ACT + const result = extractFilterParams(query); + + // ASSERT + expect(result).toEqual({ + author: 'alice', + authors: undefined, + fromDate: undefined, + toDate: undefined, + }); + }); + + it('should return all undefined when no parameters', () => { + // ARRANGE + const query = {}; + + // ACT + const result = extractFilterParams(query); + + // ASSERT + expect(result).toEqual({ + author: undefined, + authors: undefined, + fromDate: undefined, + toDate: undefined, + }); + }); + }); + }); + + describe('buildChurnFilters', () => { + describe('Happy Path', () => { + it('should build churn filter with all properties', () => { + // ARRANGE + const query = { + fromDate: '2024-01-01', + toDate: '2024-12-31', + minChanges: '5', + extensions: 'ts,tsx,js', + }; + + // ACT + const result = buildChurnFilters(query); + + // ASSERT + expect(result).toEqual({ + since: '2024-01-01', + until: '2024-12-31', + minChanges: 5, + extensions: ['ts', 'tsx', 'js'], + }); + }); + + it('should map fromDate to since and toDate to until', () => { + // ARRANGE + const query = { + fromDate: '2024-06-01', + toDate: '2024-06-30', + }; + + // ACT + const result = buildChurnFilters(query); + + // ASSERT + expect(result).toEqual({ + since: '2024-06-01', + until: '2024-06-30', + }); + }); + + it('should parse minChanges as integer', () => { + // ARRANGE + const query = { + minChanges: '10', + }; + + // ACT + const result = buildChurnFilters(query); + + // ASSERT + expect(result).toEqual({ + minChanges: 10, + }); + expect(typeof result.minChanges).toBe('number'); + }); + + it('should split and trim extensions', () => { + // ARRANGE + const query = { + extensions: ' ts , js , py ', + }; + + // ACT + const result = buildChurnFilters(query); + + // ASSERT + expect(result).toEqual({ + extensions: ['ts', 'js', 'py'], + }); + }); + + it('should return empty object when no filters provided', () => { + // ARRANGE + const query = {}; + + // ACT + const result = buildChurnFilters(query); + + // ASSERT + expect(result).toEqual({}); + }); + + it('should handle single extension', () => { + // ARRANGE + const query = { + extensions: 'ts', + }; + + // ACT + const result = buildChurnFilters(query); + + // ASSERT + expect(result).toEqual({ + extensions: ['ts'], + }); + }); + }); + + describe('Edge Cases', () => { + it('should exclude undefined properties', () => { + // ARRANGE + const query = { + fromDate: '2024-01-01', + toDate: undefined, + minChanges: undefined, + extensions: 'ts', + }; + + // ACT + const result = buildChurnFilters(query); + + // ASSERT + expect(result).toEqual({ + since: '2024-01-01', + extensions: ['ts'], + }); + expect(result).not.toHaveProperty('until'); + expect(result).not.toHaveProperty('minChanges'); + }); + + it('should exclude empty extensions string', () => { + // ARRANGE + const query = { + extensions: '', + }; + + // ACT + const result = buildChurnFilters(query); + + // ASSERT + // Empty string is falsy, so it gets excluded + expect(result).toEqual({}); + }); + }); + }); +}); diff --git a/apps/backend/src/middlewares/validation.ts b/apps/backend/src/middlewares/validation.ts index e000d88a..5d3e2ed8 100644 --- a/apps/backend/src/middlewares/validation.ts +++ b/apps/backend/src/middlewares/validation.ts @@ -1,10 +1,31 @@ import { Request, Response, NextFunction } from 'express'; -import { validationResult, CustomValidator } from 'express-validator'; -import { ValidationError } from '@gitray/shared-types'; +import { + validationResult, + CustomValidator, + ValidationChain, + query, + body, +} from 'express-validator'; +import { + ValidationError, + ERROR_MESSAGES, + HTTP_STATUS, +} from '@gitray/shared-types'; import { isSafeGitUrl } from '../utils/urlSecurity.js'; +import { createRequestLogger } from '../services/logger'; -// Middleware wrapper that throws a ValidationError when request validation fails +// Re-export for use in route files +export { ERROR_MESSAGES } from '@gitray/shared-types'; +export type { ValidationChain, CustomValidator } from 'express-validator'; +// --------------------------------------------------------------------------- +// Validation error handlers +// --------------------------------------------------------------------------- + +/** + * Middleware that throws a ValidationError when request validation fails. + * Use this for routes that have centralized error handling middleware. + */ export const handleValidationErrors = ( req: Request, res: Response, @@ -18,6 +39,38 @@ export const handleValidationErrors = ( next(); }; +/** + * Middleware that returns JSON 400 response when validation fails. + * Use this for routes that need direct error responses without throwing. + */ +export const handleValidationErrorsWithResponse = ( + req: Request, + res: Response, + next: NextFunction +): void => { + const errors = validationResult(req); + if (!errors.isEmpty()) { + const logger = createRequestLogger(req); + logger.warn('Validation failed', { + errors: errors.array(), + query: req.query, + path: req.path, + }); + + res.status(HTTP_STATUS.BAD_REQUEST).json({ + error: 'Validation failed', + code: 'VALIDATION_ERROR', + errors: errors.array(), + }); + return; + } + next(); +}; + +// --------------------------------------------------------------------------- +// Custom validators +// --------------------------------------------------------------------------- + /** * Custom validator for Git repository URLs with SSRF protection * @@ -34,3 +87,142 @@ export const isSecureGitUrl: CustomValidator = async (value: string) => { } return true; }; + +// --------------------------------------------------------------------------- +// Reusable validation chains +// --------------------------------------------------------------------------- + +/** + * Repository URL validation chain with security checks. + * Validates format, protocol, and safety of repository URLs. + */ +export const repoUrlValidation = (): ValidationChain[] => [ + query('repoUrl') + .notEmpty() + .withMessage('repoUrl query parameter is required') + .isURL({ + protocols: ['http', 'https'], + require_protocol: true, + require_valid_protocol: true, + }) + .withMessage(ERROR_MESSAGES.INVALID_REPO_URL) + .custom(isSecureGitUrl) + .withMessage('Invalid or potentially unsafe repository URL'), +]; + +/** + * Pagination validation chain for page and limit parameters. + * Enforces reasonable bounds to prevent excessive data retrieval. + */ +export const paginationValidation = (): ValidationChain[] => [ + query('page') + .optional() + .isInt({ min: 1, max: 1000 }) + .withMessage('Page must be between 1 and 1000') + .toInt(), + query('limit') + .optional() + .isInt({ min: 1, max: 100 }) + .withMessage('Limit must be between 1 and 100') + .toInt(), +]; + +/** + * Date range validation chain for fromDate and toDate parameters. + * Ensures dates are valid ISO 8601 format, not in the future, and in correct order. + */ +export const dateValidation = (): ValidationChain[] => [ + query('fromDate') + .optional() + .isISO8601({ strict: true }) + .withMessage('fromDate must be a valid ISO 8601 date') + .custom((value) => { + if (value && new Date(value) > new Date()) { + return false; + } + return true; + }) + .withMessage('fromDate cannot be in the future'), + query('toDate') + .optional() + .isISO8601({ strict: true }) + .withMessage('toDate must be a valid ISO 8601 date') + .custom((value, { req }) => { + if (value && new Date(value) > new Date()) { + return false; + } + const fromDate = req.query?.fromDate as string; + if (value && fromDate && new Date(value) < new Date(fromDate)) { + return false; + } + return true; + }) + .withMessage('toDate must be after fromDate and not in the future'), +]; + +/** + * Author filtering validation chain for author and authors parameters. + * Supports single author or comma-separated list with sanitization. + */ +export const authorValidation = (): ValidationChain[] => [ + query('author') + .optional() + .isString() + .trim() + .isLength({ min: 1, max: 100 }) + .withMessage('Author must be between 1 and 100 characters') + .escape(), + query('authors') + .optional() + .isString() + .custom((value) => { + const authors = value.split(','); + return ( + authors.length <= 10 && + authors.every((a: string) => a.trim().length > 0) + ); + }) + .withMessage( + 'Authors must be comma-separated and maximum 10 authors allowed' + ), +]; + +/** + * Code churn filtering validation chain for minChanges and extensions parameters. + * Validates change thresholds and file extension filters. + */ +export const churnValidation = (): ValidationChain[] => [ + query('minChanges') + .optional() + .isInt({ min: 1, max: 1000 }) + .withMessage('minChanges must be between 1 and 1000') + .toInt(), + query('extensions') + .optional() + .isString() + .custom((value) => { + const exts = value.split(','); + return ( + exts.length <= 20 && exts.every((e: string) => e.trim().length > 0) + ); + }) + .withMessage('Extensions must be comma-separated and maximum 20 allowed'), +]; + +/** + * Repository URL validation chain for request body (POST/PUT/PATCH). + * Validates format, protocol, and safety of repository URLs in body parameters. + */ +export const repoUrlBodyValidation = (): ValidationChain[] => [ + body('repoUrl') + .notEmpty() + .withMessage('repoUrl is required') + .isURL({ + protocols: ['http', 'https'], + require_protocol: true, + require_valid_protocol: true, + }) + .withMessage(ERROR_MESSAGES.INVALID_REPO_URL) + .custom(isSecureGitUrl) + .withMessage('Invalid or potentially unsafe repository URL'), +]; diff --git a/apps/backend/src/routes/commitRoutes.ts b/apps/backend/src/routes/commitRoutes.ts index f520cf08..59e90fe1 100644 --- a/apps/backend/src/routes/commitRoutes.ts +++ b/apps/backend/src/routes/commitRoutes.ts @@ -1,10 +1,5 @@ import express, { Request, Response, NextFunction } from 'express'; -import { - query, - body, - validationResult, - ValidationChain, -} from 'express-validator'; +import { query, body } from 'express-validator'; import { gitService } from '../services/gitService'; import { getCachedCommits, @@ -33,7 +28,6 @@ import { } from '../services/metrics'; import { CommitFilterOptions, - ERROR_MESSAGES, HTTP_STATUS, FileAnalysisFilterOptions, FileTypeDistribution, @@ -41,7 +35,17 @@ import { } from '@gitray/shared-types'; import { config } from '../config'; import { fileAnalysisService } from '../services/fileAnalysisService'; -import { isSecureGitUrl } from '../middlewares/validation'; +import { + handleValidationErrorsWithResponse as handleValidationErrors, + repoUrlValidation, + repoUrlBodyValidation, + paginationValidation, + dateValidation, + authorValidation, + ERROR_MESSAGES, + isSecureGitUrl, + type ValidationChain, +} from '../middlewares/validation'; import { requireAdminToken } from '../middlewares/adminAuth'; import rateLimit from 'express-rate-limit'; @@ -57,122 +61,6 @@ const adminRateLimiter = rateLimit({ legacyHeaders: false, }); -// --------------------------------------------------------------------------- -// Custom validation error handler that formats errors correctly -// --------------------------------------------------------------------------- -const handleValidationErrors = ( - req: Request, - res: Response, - next: NextFunction -): void => { - const errors = validationResult(req); - if (!errors.isEmpty()) { - // Log validation errors for debugging - const logger = createRequestLogger(req); - logger.warn('Validation failed', { - errors: errors.array(), - query: req.query, - path: req.path, - }); - - // Return the expected error format with errors array - res.status(HTTP_STATUS.BAD_REQUEST).json({ - error: 'Validation failed', - code: 'VALIDATION_ERROR', - errors: errors.array(), - }); - return; - } - next(); -}; - -// --------------------------------------------------------------------------- -// Reusable validation chains with comprehensive security checks -// --------------------------------------------------------------------------- -const repoUrlValidation = (): ValidationChain[] => [ - query('repoUrl') - .notEmpty() - .withMessage('repoUrl query parameter is required') - .isURL({ - protocols: ['http', 'https'], - require_protocol: true, - require_valid_protocol: true, - }) - .withMessage(ERROR_MESSAGES.INVALID_REPO_URL) - .custom(isSecureGitUrl) - .withMessage('Invalid or potentially unsafe repository URL'), -]; - -const paginationValidation = (): ValidationChain[] => [ - query('page') - .optional() - .isInt({ min: 1, max: 1000 }) - .withMessage('Page must be between 1 and 1000') - .toInt(), - query('limit') - .optional() - .isInt({ min: 1, max: 100 }) - .withMessage('Limit must be between 1 and 100') - .toInt(), -]; - -const dateValidation = (): ValidationChain[] => [ - query('fromDate') - .optional() - .isISO8601({ strict: true }) - .withMessage('fromDate must be a valid ISO 8601 date') - .custom((value) => { - // Ensure fromDate is not in the future - if (value && new Date(value) > new Date()) { - return false; - } - return true; - }) - .withMessage('fromDate cannot be in the future'), - query('toDate') - .optional() - .isISO8601({ strict: true }) - .withMessage('toDate must be a valid ISO 8601 date') - .custom((value, { req }) => { - // Ensure toDate is not in the future - if (value && new Date(value) > new Date()) { - return false; - } - // Ensure toDate is after fromDate if both are provided - const fromDate = req.query?.fromDate as string; - if (value && fromDate && new Date(value) < new Date(fromDate)) { - return false; - } - return true; - }) - .withMessage('toDate must be after fromDate and not in the future'), -]; - -const authorValidation = (): ValidationChain[] => [ - query('author') - .optional() - .isString() - .trim() - .isLength({ min: 1, max: 100 }) - .withMessage('Author must be between 1 and 100 characters') - // Sanitize to prevent XSS - .escape(), - query('authors') - .optional() - .isString() - .custom((value) => { - // Validate comma-separated authors - const authors = value.split(','); - return ( - authors.length <= 10 && - authors.every((a: string) => a.trim().length > 0) - ); - }) - .withMessage( - 'Authors must be comma-separated and maximum 10 authors allowed' - ), -]; - // --------------------------------------------------------------------------- // ENHANCED: GET / - paginated list of commits with unified caching // --------------------------------------------------------------------------- @@ -465,14 +353,7 @@ router.get( // --------------------------------------------------------------------------- router.get( '/info', - [ - query('repoUrl') - .isURL({ protocols: ['http', 'https'] }) - .withMessage(ERROR_MESSAGES.INVALID_REPO_URL) - .custom(isSecureGitUrl) - .withMessage('Invalid or potentially unsafe repository URL'), - handleValidationErrors, - ], + [...repoUrlValidation(), handleValidationErrors], async (req: Request, res: Response, next: NextFunction) => { const logger = createRequestLogger(req); const { repoUrl } = req.query as Record; @@ -569,14 +450,7 @@ router.post( '/cache/invalidate', adminRateLimiter, requireAdminToken, - [ - body('repoUrl') - .isURL({ protocols: ['http', 'https'] }) - .withMessage(ERROR_MESSAGES.INVALID_REPO_URL) - .custom(isSecureGitUrl) - .withMessage('Invalid or potentially unsafe repository URL'), - handleValidationErrors, - ], + [...repoUrlBodyValidation(), handleValidationErrors], async (req: Request, res: Response) => { const logger = createRequestLogger(req); const { repoUrl } = req.body; @@ -647,17 +521,7 @@ router.get( // Streaming validation for POST /stream endpoint const streamingOptionsValidation = [ - body('repoUrl') - .notEmpty() - .withMessage('repoUrl is required') - .isURL({ - protocols: ['http', 'https'], - require_protocol: true, - require_valid_protocol: true, - }) - .withMessage(ERROR_MESSAGES.INVALID_REPO_URL) - .custom(isSecureGitUrl) - .withMessage('Invalid or potentially unsafe repository URL'), + ...repoUrlBodyValidation(), body('batchSize') .optional() .isInt({ min: 1, max: 10000 }) @@ -915,17 +779,7 @@ router.post( // File analysis validation chain const fileAnalysisValidation = (): ValidationChain[] => [ - query('repoUrl') - .notEmpty() - .withMessage('repoUrl query parameter is required') - .isURL({ - protocols: ['http', 'https'], - require_protocol: true, - require_valid_protocol: true, - }) - .withMessage(ERROR_MESSAGES.INVALID_REPO_URL) - .custom(isSecureGitUrl) - .withMessage('Invalid or potentially unsafe repository URL'), + ...repoUrlValidation(), query('extensions') .optional() .isString() diff --git a/apps/backend/src/routes/repositoryRoutes.ts b/apps/backend/src/routes/repositoryRoutes.ts index f2b3b132..cd028676 100644 --- a/apps/backend/src/routes/repositoryRoutes.ts +++ b/apps/backend/src/routes/repositoryRoutes.ts @@ -1,31 +1,47 @@ import express, { Request, Response, NextFunction } from 'express'; -import { gitService } from '../services/gitService'; -import redis from '../services/cache'; -import { body } from 'express-validator'; import { - handleValidationErrors, - isSecureGitUrl, -} from '../middlewares/validation'; -import { withTempRepository } from '../utils/withTempRepository'; -import { - ERROR_MESSAGES, - HTTP_STATUS, - CommitFilterOptions, - TIME, - ChurnFilterOptions, -} from '@gitray/shared-types'; + getCachedCommits, + getCachedAggregatedData, + getCachedContributors, + getCachedChurnData, + getCachedSummary, + type CommitCacheOptions, +} from '../services/repositoryCache.js'; +import { createRequestLogger } from '../services/logger.js'; import { recordFeatureUsage, recordEnhancedCacheOperation, - recordDataFreshness, getUserType, getRepositorySizeCategory, -} from '../services/metrics'; -import { repositorySummaryService } from '../services/repositorySummaryService'; -import { ValidationError } from '@gitray/shared-types'; -import { getLogger } from '../services/logger'; +} from '../services/metrics.js'; +import { + CommitFilterOptions, + ChurnFilterOptions, + HTTP_STATUS, +} from '@gitray/shared-types'; +import { + handleValidationErrorsWithResponse as handleValidationErrors, + repoUrlValidation, + paginationValidation, + dateValidation, + authorValidation, + churnValidation, +} from '../middlewares/validation.js'; +import { + buildCommitFilters, + buildChurnFilters, + extractPaginationParams, + extractFilterParams, + setupRouteRequest, + recordRouteSuccess, + recordRouteError, +} from '../utils/routeHelpers.js'; +import { + createCachedRouteHandler, + buildRepoValidationChain, +} from '../utils/repositoryRouteFactory.js'; -const logger = getLogger(); +// Remove unused imports: redis, gitService, withTempRepository, repositorySummaryService // Middleware to set request priority based on route const setRequestPriority = (priority: 'low' | 'normal' | 'high') => { @@ -39,556 +55,278 @@ const setRequestPriority = (priority: 'low' | 'normal' | 'high') => { const router = express.Router(); // --------------------------------------------------------------------------- -// Validation rules +// GET endpoint to get repository commits with pagination (unified cache) // --------------------------------------------------------------------------- -const repoUrlValidation = [ - body('repoUrl') - .isURL({ protocols: ['http', 'https'] }) - .withMessage(ERROR_MESSAGES.INVALID_REPO_URL) - .custom(isSecureGitUrl) - .withMessage('Invalid or potentially unsafe repository URL'), - handleValidationErrors, -]; - -// Additional validation for heatmap and full-data routes -const heatmapValidation = [ - ...repoUrlValidation, - body('filterOptions') - .optional() - .isObject() - .withMessage('filterOptions must be an object'), +router.get( + '/commits', + setRequestPriority('normal'), + ...buildRepoValidationChain( + { includePagination: true }, + { + repoUrlValidation, + paginationValidation, + } + ), handleValidationErrors, -]; -const fullDataValidation = heatmapValidation; + ...createCachedRouteHandler( + 'repository_commits', + async ({ req, repoUrl, logger }) => { + const { page, limit, skip } = extractPaginationParams(req.query); -// --------------------------------------------------------------------------- -// POST endpoint to get repository commit data only -// --------------------------------------------------------------------------- -router.post( - '/', - setRequestPriority('normal'), // Normal priority for basic commit data - repoUrlValidation, - async (req: Request, res: Response, next: NextFunction) => { - const { repoUrl } = req.body; - const userType = getUserType(req); - - try { - const cacheKey = `commits:${repoUrl}`; - let cached = null; - let commits = null; - - // Try to get from cache, but handle cache failures gracefully - try { - cached = await redis.get(cacheKey); - if (cached) { - commits = JSON.parse(cached); - // Record enhanced cache operation and feature usage - recordEnhancedCacheOperation( - 'commits', - true, - req, - repoUrl, - commits.length - ); - recordFeatureUsage('repository_commits', userType, true, 'api_call'); - recordDataFreshness( - 'commits', - 0, - 'hybrid', - getRepositorySizeCategory(commits.length) - ); - - res.status(HTTP_STATUS.OK).json({ commits }); - return; - } - } catch (cacheError) { - // Cache operation failed, continue to fetch from repository - logger.warn( - 'Cache get operation failed:', - (cacheError as Error).message - ); - } - - commits ??= await withTempRepository(repoUrl, (tempDir) => - gitService.getCommits(tempDir) - ); - - // Record cache miss and successful operation - recordEnhancedCacheOperation( - 'commits', - false, - req, + logger.info('Processing commits request with unified caching', { repoUrl, - commits ? commits.length : 0 - ); - recordFeatureUsage('repository_commits', userType, true, 'api_call'); - - // Try to cache the result, but don't fail if cache operation fails - if (commits) { - try { - await redis.set( - cacheKey, - JSON.stringify(commits), - 'EX', - TIME.HOUR / 1000 - ); - } catch (cacheError) { - logger.warn( - 'Cache set operation failed:', - (cacheError as Error).message - ); - } - } + page, + limit, + }); - res.status(HTTP_STATUS.OK).json({ commits }); - return; - } catch (error) { - // Record failed feature usage - recordFeatureUsage('repository_commits', userType, false, 'api_call'); - next(error); - } - } + // Use unified cache manager (handles all three cache levels automatically) + const cacheOptions: CommitCacheOptions = { + skip, + limit, + }; + + const commits = await getCachedCommits(repoUrl, cacheOptions); + + return { commits, page, limit }; + }, + ({ commits, page, limit }) => ({ + commitCount: commits.length, + page, + limit, + }) + ) ); // --------------------------------------------------------------------------- -// POST endpoint to get commit heatmap data +// GET endpoint to get commit heatmap data with filters (unified cache) // --------------------------------------------------------------------------- -router.post( +router.get( '/heatmap', - setRequestPriority('low'), // Low priority for heatmap data - memory intensive - heatmapValidation, - async (req: Request, res: Response, next: NextFunction) => { - const { repoUrl, filterOptions } = req.body; - const userType = getUserType(req); - - try { - const cacheKey = `heatmap:${repoUrl}:${JSON.stringify(filterOptions)}`; - let cached = null; - let heatmapData = null; - - // Try to get from cache, but handle cache failures gracefully - try { - cached = await redis.get(cacheKey); - if (cached) { - heatmapData = JSON.parse(cached); - // Record enhanced cache hit and feature usage - recordEnhancedCacheOperation('heatmap', true, req, repoUrl); - recordFeatureUsage('heatmap_view', userType, true, 'api_call'); - recordDataFreshness('heatmap', 0, 'hybrid'); - - res.status(HTTP_STATUS.OK).json({ heatmapData }); - return; - } - } catch (cacheError) { - // Cache operation failed, continue to fetch from repository - logger.warn( - 'Cache get operation failed:', - (cacheError as Error).message - ); - } + setRequestPriority('low'), + ...buildRepoValidationChain( + { includeDates: true, includeAuthors: true }, + { + repoUrlValidation, + dateValidation, + authorValidation, + } + ), + handleValidationErrors, + ...createCachedRouteHandler( + 'heatmap_view', + async ({ req, repoUrl, logger }) => { + const { author, authors, fromDate, toDate } = extractFilterParams( + req.query as Record + ); - heatmapData ??= await withTempRepository(repoUrl, async (tempDir) => { - const commits = await gitService.getCommits(tempDir); - return gitService.aggregateCommitsByTime( - commits, - filterOptions as CommitFilterOptions - ); + logger.info('Processing heatmap request with unified caching', { + repoUrl, + hasFilters: !!(author || authors || fromDate || toDate), }); - // Record cache miss and successful operation - recordEnhancedCacheOperation('heatmap', false, req, repoUrl); - recordFeatureUsage('heatmap_view', userType, true, 'api_call'); - - // Try to cache the result, but don't fail if cache operation fails - if (heatmapData) { - try { - await redis.set( - cacheKey, - JSON.stringify(heatmapData), - 'EX', - TIME.HOUR / 1000 - ); - } catch (cacheError) { - logger.warn( - 'Cache set operation failed:', - (cacheError as Error).message - ); - } - } + // Build filter options from query parameters using helper function + const filters = buildCommitFilters({ author, authors, fromDate, toDate }); - res.status(HTTP_STATUS.OK).json({ heatmapData }); - return; - } catch (error) { - // Record failed feature usage - recordFeatureUsage('heatmap_view', userType, false, 'api_call'); - next(error); - } - } + // Use unified cache manager for aggregated data (Level 3 cache) + const heatmapData = await getCachedAggregatedData(repoUrl, filters); + + return { heatmapData }; + }, + ({ heatmapData }) => ({ dataPoints: heatmapData.data.length }) + ) ); // --------------------------------------------------------------------------- -// POST endpoint to get repository top contributors +// GET endpoint to get repository top contributors with filters (unified cache) // --------------------------------------------------------------------------- -router.post( +router.get( '/contributors', - setRequestPriority('normal'), // Normal priority for contributor data - repoUrlValidation, - async (req: Request, res: Response, next: NextFunction) => { - const { repoUrl, filterOptions } = req.body; - const userType = getUserType(req); - - try { - const cacheKey = `contributors:${repoUrl}:${JSON.stringify(filterOptions || {})}`; - let cached = null; - let contributors = null; - - // Try to get from cache, but handle cache failures gracefully - try { - cached = await redis.get(cacheKey); - if (cached) { - contributors = JSON.parse(cached); - // Record enhanced cache operation and feature usage - recordEnhancedCacheOperation( - 'contributors', - true, - req, - repoUrl, - contributors.length - ); - recordFeatureUsage('contributors_view', userType, true, 'api_call'); - recordDataFreshness('contributors', 0, 'hybrid'); - - res.status(HTTP_STATUS.OK).json({ contributors }); - return; - } - } catch (cacheError) { - // Cache operation failed, continue to fetch from repository - logger.warn( - 'Cache get operation failed:', - (cacheError as Error).message - ); - } - - // Fetch contributors using the service layer - contributors ??= await withTempRepository(repoUrl, (tempDir) => - gitService.getTopContributors( - tempDir, - filterOptions as CommitFilterOptions - ) + setRequestPriority('normal'), + ...buildRepoValidationChain( + { includeDates: true, includeAuthors: true }, + { + repoUrlValidation, + dateValidation, + authorValidation, + } + ), + handleValidationErrors, + ...createCachedRouteHandler( + 'contributors_view', + async ({ req, repoUrl, logger }) => { + const { author, authors, fromDate, toDate } = extractFilterParams( + req.query as Record ); - // Record cache miss and successful operation - recordEnhancedCacheOperation( - 'contributors', - false, - req, + logger.info('Processing contributors request with unified caching', { repoUrl, - contributors ? contributors.length : 0 - ); - recordFeatureUsage('contributors_view', userType, true, 'api_call'); - - // Try to cache the result, but don't fail if cache operation fails - if (contributors) { - try { - await redis.set( - cacheKey, - JSON.stringify(contributors), - 'EX', - TIME.HOUR / 1000 - ); - } catch (cacheError) { - logger.warn( - 'Cache set operation failed:', - (cacheError as Error).message - ); - } - } + hasFilters: !!(author || authors || fromDate || toDate), + }); - res.status(HTTP_STATUS.OK).json({ contributors }); - return; - } catch (error) { - // Record failed feature usage - recordFeatureUsage('contributors_view', userType, false, 'api_call'); - next(error); - } - } + // Build filter options from query parameters using helper function + const filters = buildCommitFilters({ author, authors, fromDate, toDate }); + + // Use unified cache manager for contributors data + const contributors = await getCachedContributors(repoUrl, filters); + + return { contributors }; + }, + ({ contributors }) => ({ contributorCount: contributors.length }) + ) ); // --------------------------------------------------------------------------- -// POST endpoint to get code churn analysis (file change frequency) +// GET endpoint to get code churn analysis with filters (unified cache) // --------------------------------------------------------------------------- -router.post( +router.get( '/churn', - setRequestPriority('normal'), // Normal priority for churn analysis - repoUrlValidation, - async (req: Request, res: Response, next: NextFunction) => { - const { repoUrl, filterOptions } = req.body; - const userType = getUserType(req); - - try { - const cacheKey = `churn:${repoUrl}:${JSON.stringify(filterOptions || {})}`; - let cached = null; - let churnData = null; - - // Try to get from cache, but handle cache failures gracefully - try { - cached = await redis.get(cacheKey); - if (cached) { - churnData = JSON.parse(cached); - // Mark as from cache - churnData.metadata.fromCache = true; - - // Record enhanced cache operation and feature usage - recordEnhancedCacheOperation( - 'churn', - true, - req, - repoUrl, - churnData.files.length - ); - recordFeatureUsage('code_churn_view', userType, true, 'api_call'); - recordDataFreshness('churn', 0, 'hybrid'); - - res.status(HTTP_STATUS.OK).json({ churnData }); - return; - } - } catch (cacheError) { - // Cache operation failed, continue to fetch from repository - logger.warn( - 'Cache get operation failed:', - (cacheError as Error).message - ); - } + setRequestPriority('normal'), + ...buildRepoValidationChain( + { includeDates: true, includeChurn: true }, + { + repoUrlValidation, + dateValidation, + churnValidation, + } + ), + handleValidationErrors, + ...createCachedRouteHandler( + 'code_churn_view', + async ({ req, repoUrl, logger }) => { + const { fromDate, toDate, minChanges, extensions } = req.query as Record< + string, + string + >; + + logger.info('Processing churn analysis request with unified caching', { + repoUrl, + hasFilters: !!(fromDate || toDate || minChanges || extensions), + }); - // Fetch churn data using the service layer - churnData ??= await withTempRepository(repoUrl, (tempDir) => - gitService.analyzeCodeChurn( - tempDir, - filterOptions as ChurnFilterOptions - ) - ); + // Build filter options from query parameters using helper + const filters = buildChurnFilters({ + fromDate, + toDate, + minChanges, + extensions, + }); - // Record cache miss and successful operation - recordEnhancedCacheOperation( - 'churn', - false, - req, - repoUrl, - churnData ? churnData.files.length : 0 - ); - recordFeatureUsage('code_churn_view', userType, true, 'api_call'); - - // Try to cache the result, but don't fail if cache operation fails - if (churnData) { - try { - // Cache for 1 hour (code churn changes less frequently than commits) - await redis.set( - cacheKey, - JSON.stringify(churnData), - 'EX', - TIME.HOUR / 1000 - ); - } catch (cacheError) { - logger.warn( - 'Cache set operation failed:', - (cacheError as Error).message - ); - } - } + // Use unified cache manager for churn data + const churnData = await getCachedChurnData(repoUrl, filters); - res.status(HTTP_STATUS.OK).json({ churnData }); - return; - } catch (error) { - // Record failed feature usage - recordFeatureUsage('code_churn_view', userType, false, 'api_call'); - next(error); - } - } + return { churnData }; + }, + ({ churnData }) => ({ fileCount: churnData.files.length }) + ) ); // --------------------------------------------------------------------------- -// GET endpoint to get repository summary statistics +// GET endpoint to get repository summary statistics (unified cache) // --------------------------------------------------------------------------- router.get( '/summary', - setRequestPriority('normal'), // Normal priority - lightweight metadata operation - async (req: Request, res: Response, next: NextFunction) => { - const { repoUrl } = req.query; - const userType = getUserType(req); - - // Validate repoUrl query parameter - if (!repoUrl || typeof repoUrl !== 'string') { - recordFeatureUsage('repository_summary', userType, false, 'api_call'); - return next(new ValidationError('repoUrl query parameter is required')); - } - - // Validate URL format and security - try { - const url = new URL(repoUrl); - if (!['http:', 'https:'].includes(url.protocol)) { - throw new ValidationError('Invalid repository URL protocol'); - } - // Note: Additional validation happens in repositorySummaryService - } catch (error) { - recordFeatureUsage('repository_summary', userType, false, 'api_call'); - if (error instanceof ValidationError) { - return next(error); - } - return next(new ValidationError(ERROR_MESSAGES.INVALID_REPO_URL)); - } - - try { - const summary = - await repositorySummaryService.getRepositorySummary(repoUrl); - - // Record successful operation - recordEnhancedCacheOperation( - 'summary', - summary.metadata.cached, - req, - repoUrl + setRequestPriority('normal'), + ...buildRepoValidationChain({}, { repoUrlValidation }), + handleValidationErrors, + ...createCachedRouteHandler( + 'repository_summary', + async ({ repoUrl, logger }) => { + logger.info( + 'Processing repository summary request with unified caching', + { + repoUrl, + } ); - recordFeatureUsage('repository_summary', userType, true, 'api_call'); - if (summary.metadata.cached) { - recordDataFreshness('summary', 0, 'hybrid'); - } - res.status(HTTP_STATUS.OK).json({ summary }); - } catch (error) { - // Record failed feature usage - recordFeatureUsage('repository_summary', userType, false, 'api_call'); - next(error); - } - } + // Use unified cache manager for summary data + const summary = await getCachedSummary(repoUrl); + + return { summary }; + }, + ({ summary }) => ({ repositoryName: summary.repository.name }) + ) ); // --------------------------------------------------------------------------- -// POST endpoint to fetch both commits and heatmap data in a single request +// GET endpoint to fetch both commits and heatmap data with pagination and filters (unified cache) // --------------------------------------------------------------------------- -router.post( +router.get( '/full-data', - setRequestPriority('low'), // Low priority for full data - very memory intensive - fullDataValidation, - async (req: Request, res: Response, next: NextFunction) => { - const { repoUrl, filterOptions } = req.body; - const userType = getUserType(req); - - try { - const commitsKey = `commits:${repoUrl}`; - const heatmapKey = `heatmap:${repoUrl}:${JSON.stringify(filterOptions)}`; - let cachedCommits = null; - let cachedHeatmap = null; - - // Try to get from cache, but handle cache failures gracefully - try { - cachedCommits = await redis.get(commitsKey); - cachedHeatmap = await redis.get(heatmapKey); - } catch (cacheError) { - // Cache operation failed, continue to fetch from repository - logger.warn( - 'Cache get operation failed:', - (cacheError as Error).message - ); - } - - if (cachedCommits && cachedHeatmap) { - let commits, heatmapData; - try { - commits = JSON.parse(cachedCommits); - heatmapData = JSON.parse(cachedHeatmap); - - // Record enhanced cache operations for both data types - recordEnhancedCacheOperation( - 'commits', - true, - req, - repoUrl, - commits.length - ); - recordEnhancedCacheOperation('heatmap', true, req, repoUrl); - recordFeatureUsage('full_data_view', userType, true, 'api_call'); - recordDataFreshness( - 'combined', - 0, - 'hybrid', - getRepositorySizeCategory(commits.length) - ); - - res.status(HTTP_STATUS.OK).json({ commits, heatmapData }); - return; - } catch (parseError) { - // Corrupted cache data, continue to fetch from repository - logger.warn( - 'Cache data parsing failed:', - (parseError as Error).message - ); - } - } - - const { commits, heatmapData } = await withTempRepository( - repoUrl, - async (tempDir) => { - const commits = await gitService.getCommits(tempDir); - const heatmapData = await gitService.aggregateCommitsByTime( - commits, - filterOptions as CommitFilterOptions - ); - return { commits, heatmapData }; - } + setRequestPriority('low'), + ...buildRepoValidationChain( + { + includePagination: true, + includeDates: true, + includeAuthors: true, + }, + { + repoUrlValidation, + paginationValidation, + dateValidation, + authorValidation, + } + ), + handleValidationErrors, + ...createCachedRouteHandler( + 'full_data_view', + async ({ req, repoUrl, logger }) => { + const { author, authors, fromDate, toDate } = extractFilterParams( + req.query as Record ); + const { page, limit, skip } = extractPaginationParams(req.query); - // Record cache miss and successful operation - recordEnhancedCacheOperation( - 'commits', - false, - req, + logger.info('Processing full-data request with unified caching', { repoUrl, - commits ? commits.length : 0 - ); - recordEnhancedCacheOperation('heatmap', false, req, repoUrl); - recordFeatureUsage('full_data_view', userType, true, 'api_call'); - - // Try to cache the results, but don't fail if cache operations fail - if (commits) { - try { - await redis.set( - commitsKey, - JSON.stringify(commits), - 'EX', - TIME.HOUR / 1000 - ); - } catch (cacheError) { - logger.warn( - 'Cache set operation failed for commits:', - (cacheError as Error).message - ); - } - } + page, + limit, + hasFilters: !!(author || authors || fromDate || toDate), + }); - if (heatmapData) { - try { - await redis.set( - heatmapKey, - JSON.stringify(heatmapData), - 'EX', - TIME.HOUR / 1000 - ); - } catch (cacheError) { - logger.warn( - 'Cache set operation failed for heatmap:', - (cacheError as Error).message - ); - } + // Build filter options from query parameters using helper function + const filters = buildCommitFilters({ author, authors, fromDate, toDate }); + + const cacheOptions: CommitCacheOptions = { + skip, + limit, + ...filters, + }; + + // FIX: Fetch sequentially instead of parallel to avoid lock contention + // When both functions try to acquire overlapping locks in parallel, + // it can cause cache corruption where commits end up in heatmapData + const commits = await getCachedCommits(repoUrl, cacheOptions); + const heatmapData = await getCachedAggregatedData(repoUrl, filters); + + // Defensive check: Ensure heatmapData is actually CommitHeatmapData + const isValidHeatmap = + heatmapData && + typeof heatmapData === 'object' && + !Array.isArray(heatmapData) && + 'timePeriod' in heatmapData && + 'data' in heatmapData; + + if (!isValidHeatmap) { + logger.warn( + 'Invalid heatmap data structure detected, expected CommitHeatmapData', + { + repoUrl, + heatmapDataType: typeof heatmapData, + isArray: Array.isArray(heatmapData), + actualType: Array.isArray(heatmapData) ? 'Commit[]' : 'unknown', + } + ); } - res.status(HTTP_STATUS.OK).json({ commits, heatmapData }); - return; - } catch (error) { - // Record failed feature usage - recordFeatureUsage('full_data_view', userType, false, 'api_call'); - next(error); - } - } + return { commits, heatmapData, page, limit, isValidHeatmap }; + }, + ({ commits, heatmapData, page, limit, isValidHeatmap }) => ({ + commitCount: commits?.length ?? 0, + dataPoints: isValidHeatmap ? heatmapData.data.length : 0, + page, + limit, + heatmapIsValid: isValidHeatmap, + }) + ) ); export default router; diff --git a/apps/backend/src/services/fileAnalysisService.ts b/apps/backend/src/services/fileAnalysisService.ts index 1300980a..b8f0e39f 100644 --- a/apps/backend/src/services/fileAnalysisService.ts +++ b/apps/backend/src/services/fileAnalysisService.ts @@ -22,9 +22,9 @@ import { promises as fs } from 'node:fs'; import * as path from 'node:path'; import * as os from 'node:os'; -import * as crypto from 'node:crypto'; import { getLogger } from './logger'; import { config } from '../config'; +import { hashUrl, hashObject } from '../utils/hashUtils'; import { recordStreamingStart, recordStreamingCompletion, @@ -338,7 +338,7 @@ class FileAnalysisService { repoUrl: string, commitHash: string ): string { - const repoHash = this.hashUrl(repoUrl); + const repoHash = hashUrl(repoUrl); const commitHashShort = commitHash.substring(0, 12); // Use first 12 chars for efficiency return `file_tree:${repoHash}:${commitHashShort}`; } @@ -638,7 +638,7 @@ class FileAnalysisService { } private async invalidateFullRepositoryCache(repoUrl: string): Promise { - const repoHash = this.hashUrl(repoUrl); + const repoHash = hashUrl(repoUrl); const pattern = `file_tree:${repoHash}:*`; try { @@ -728,7 +728,7 @@ class FileAnalysisService { * Check if circuit breaker should prevent analysis for a repository */ private isCircuitBreakerOpen(repoUrl: string): boolean { - const repoHash = this.hashUrl(repoUrl); + const repoHash = hashUrl(repoUrl); const state = this.circuitBreakers.get(repoHash); if (!state) return false; @@ -788,7 +788,7 @@ class FileAnalysisService { * Record circuit breaker failure */ private recordCircuitBreakerFailure(repoUrl: string): void { - const repoHash = this.hashUrl(repoUrl); + const repoHash = hashUrl(repoUrl); // Manage memory before accessing/updating circuit breaker this.manageCircuitBreakerMemory(repoHash); @@ -835,7 +835,7 @@ class FileAnalysisService { * Record circuit breaker success */ private recordCircuitBreakerSuccess(repoUrl: string): void { - const repoHash = this.hashUrl(repoUrl); + const repoHash = hashUrl(repoUrl); // Manage memory before accessing circuit breaker this.manageCircuitBreakerMemory(repoHash); @@ -861,7 +861,7 @@ class FileAnalysisService { } private registerHalfOpenAttempt(repoUrl: string): boolean { - const repoHash = this.hashUrl(repoUrl); + const repoHash = hashUrl(repoUrl); const state = this.circuitBreakers.get(repoHash); if (!state || state.isOpen) { @@ -943,7 +943,7 @@ class FileAnalysisService { lastFailure?: Date; timeUntilRecovery?: number; } { - const repoHash = this.hashUrl(repoUrl); + const repoHash = hashUrl(repoUrl); const state = this.circuitBreakers.get(repoHash); if (!state) { @@ -991,7 +991,7 @@ class FileAnalysisService { } resetCircuitBreaker(repoUrl: string): void { - const repoHash = this.hashUrl(repoUrl); + const repoHash = hashUrl(repoUrl); if (this.circuitBreakers.delete(repoHash)) { logger.info('Circuit breaker manually reset for repository', { @@ -2839,31 +2839,11 @@ class FileAnalysisService { repoUrl: string, options?: FileAnalysisFilterOptions ): string { - const repoHash = this.hashUrl(repoUrl); - const filterHash = this.hashObject(options || {}); + const repoHash = hashUrl(repoUrl); + const filterHash = hashObject(options || {}); return `file_analysis:${repoHash}:${filterHash}`; } - /** - * Generate stable 16-character hash for repository URLs - * Following GitRay's caching pattern - */ - private hashUrl(url: string): string { - return crypto.createHash('md5').update(url).digest('hex').slice(0, 16); - } - - /** - * Generate stable 8-character hash for filter option objects - * Following GitRay's caching pattern - */ - private hashObject(obj: any): string { - const str = JSON.stringify( - obj, - Object.keys(obj).sort((a, b) => a.localeCompare(b)) - ); - return crypto.createHash('md5').update(str).digest('hex').slice(0, 8); - } - /** * Cached file analysis method with three-tier caching integration * diff --git a/apps/backend/src/services/repositoryCache.ts b/apps/backend/src/services/repositoryCache.ts index 17ea1ac3..f3e81814 100644 --- a/apps/backend/src/services/repositoryCache.ts +++ b/apps/backend/src/services/repositoryCache.ts @@ -17,11 +17,12 @@ * - Comprehensive metrics and health monitoring */ -import crypto from 'node:crypto'; import { gitService } from './gitService'; +import { repositorySummaryService } from './repositorySummaryService'; import { getLogger } from './logger'; import { withSharedRepository } from './repositoryCoordinator'; import type { RepositoryHandle } from './repositoryCoordinator'; +import { hashUrl, hashObject } from '../utils/hashUtils'; import { config } from '../config'; import HybridLRUCache from '../utils/hybridLruCache'; import { @@ -47,6 +48,9 @@ import { CommitFilterOptions, CommitHeatmapData, TransactionRollbackError, + CodeChurnAnalysis, + ChurnFilterOptions, + RepositorySummary, } from '@gitray/shared-types'; type ContributorAggregation = { @@ -57,7 +61,11 @@ type ContributorAggregation = { contributionPercentage: number; }; -type AggregatedCacheValue = CommitHeatmapData | ContributorAggregation[]; +type AggregatedCacheValue = + | CommitHeatmapData + | ContributorAggregation[] + | CodeChurnAnalysis + | RepositorySummary; /** * UNIFIED REPOSITORY CACHE MANAGER - FIXED VERSION @@ -68,6 +76,13 @@ type AggregatedCacheValue = CommitHeatmapData | ContributorAggregation[]; * 3. ✅ Atomic multi-tier cache updates * 4. ✅ Enhanced error handling and recovery * 5. ✅ Pattern-based cache key management + * 6. ✅ DEADLOCK FIX: Removed repo-access from cache lock arrays to prevent nested acquisition + * + * LOCK ARCHITECTURE: + * - Cache operations acquire ONLY cache-level locks (cache-*, not repo-*) + * - Repository access managed exclusively by withSharedRepository() + * - Prevents nested acquisition of repo-access lock (which caused deadlocks) + * - Lock ordering maintained via withOrderedLocks() for cache-level locks */ /** @@ -367,19 +382,21 @@ export class RepositoryCacheManager { /** * Helper method to generate standard lock array for commit operations. * Ensures consistent lock ordering across all methods to prevent deadlocks. - * Lock order: cache-filtered < cache-operation < repo-access (alphabetical) + * + * IMPORTANT: Does NOT include 'repo-access' lock because: + * - repo-access is managed exclusively by withSharedRepository() + * - Including it here would require complex re-entrant locking + * - Operations that need repository access should NOT use ordered locks + * + * Lock order: cache-filtered < cache-operation (alphabetical) */ private getCommitLocks(repoUrl: string): string[] { - return [ - `cache-filtered:${repoUrl}`, - `cache-operation:${repoUrl}`, - `repo-access:${repoUrl}`, - ]; + return [`cache-filtered:${repoUrl}`, `cache-operation:${repoUrl}`]; } /** * Helper method to generate lock array for contributor operations. - * Lock order: cache-contributors < cache-filtered < cache-operation < repo-access + * Lock order: cache-contributors < cache-filtered < cache-operation */ private getContributorLocks(repoUrl: string): string[] { return [`cache-contributors:${repoUrl}`, ...this.getCommitLocks(repoUrl)]; @@ -387,12 +404,29 @@ export class RepositoryCacheManager { /** * Helper method to generate lock array for aggregated data operations. - * Lock order: cache-aggregated < cache-filtered < cache-operation < repo-access + * Lock order: cache-aggregated < cache-filtered < cache-operation */ private getAggregatedLocks(repoUrl: string): string[] { return [`cache-aggregated:${repoUrl}`, ...this.getCommitLocks(repoUrl)]; } + /** + * Helper method to generate lock array for churn data operations. + * Lock order: cache-churn < cache-filtered < cache-operation + */ + private getChurnLocks(repoUrl: string): string[] { + return [`cache-churn:${repoUrl}`, ...this.getCommitLocks(repoUrl)]; + } + + /** + * Helper method to generate lock array for repository summary operations. + * Lock order: cache-summary < repo-access + * Note: Summary doesn't depend on commits cache, uses sparse clone directly + */ + private getSummaryLocks(repoUrl: string): string[] { + return [`cache-summary:${repoUrl}`, `repo-access:${repoUrl}`]; + } + /** * Creates a new cache transaction for atomic multi-tier operations. * @@ -1033,40 +1067,31 @@ export class RepositoryCacheManager { if (commits) { // Cache hit: Update metrics and return cached data immediately - this.metrics.operations.rawHits++; - this.recordHitTime(startTime); - cacheHits.inc({ operation: 'raw_commits' }); - recordEnhancedCacheOperation( + return this.handleCacheHit( + 'Raw commits', 'raw_commits', - true, - undefined, + 'rawHits', + startTime, repoUrl, - commits.length + commits, + { + commitsCount: commits.length, + cacheKey: rawKey, + }, + commits.length, + 'commits' ); - - // Track data freshness for cache effectiveness analysis - const cacheAge = Date.now() - startTime; - recordDataFreshness('commits', cacheAge); - - logger.debug('Raw commits cache hit', { - repoUrl, - commitsCount: commits.length, - cacheKey: rawKey, - }); - - return commits; } // Cache miss: Fetch from Git repository and cache the result - this.metrics.operations.rawMisses++; - this.recordMissTime(startTime); - cacheMisses.inc({ operation: 'raw_commits' }); - recordEnhancedCacheOperation('raw_commits', false, undefined, repoUrl); - - logger.info('Raw commits cache miss, fetching from repository', { + this.handleCacheMiss( + 'raw_commits', + 'rawMisses', + startTime, repoUrl, - cacheKey: rawKey, - }); + 'Raw commits cache miss, fetching from repository', + { cacheKey: rawKey } + ); const transaction = this.createTransaction(repoUrl); @@ -1114,61 +1139,27 @@ export class RepositoryCacheManager { // Store the fetched data in cache using transactional consistency const ttl = config.cacheStrategy.cacheKeys.rawCommitsTTL; - await this.transactionalSet( + return this.handleTransactionSuccess( this.rawCommitsCache, 'raw', rawKey, commits, ttl, - transaction - ); - - // Finalize the transaction - all operations succeeded - await this.commitTransaction(transaction); - - logger.info('Raw commits cached with transaction', { + transaction, repoUrl, - commitsCount: commits.length, - ttl, - sizeCategory: getRepositorySizeCategory(commits.length), - transactionId: transaction.id, - }); - - // Update system health metrics with successful operation - updateServiceHealthScore('cache', { - cacheHitRate: 1.0, - errorRate: 0.0, - }); - - return commits; - } catch (error) { - // Increment transaction failure counter for monitoring - this.metrics.transactions.failed++; - - // Record comprehensive error details for debugging and alerting - recordDetailedError( - 'cache', - error instanceof Error ? error : new Error(String(error)), + 'Raw commits cached with transaction', { - userImpact: 'degraded', - recoveryAction: 'retry', - severity: 'warning', + commitsCount: commits.length, + sizeCategory: getRepositorySizeCategory(commits.length), } ); - - // Update system health metrics to reflect the failure - updateServiceHealthScore('cache', { errorRate: 1 }); - - // Rollback all cache changes to maintain consistency - await this.rollbackTransaction(transaction); - - logger.error('Failed to cache raw commits, transaction rolled back', { + } catch (error) { + return this.handleTransactionError( + transaction, + error, repoUrl, - transactionId: transaction.id, - error: error instanceof Error ? error.message : String(error), - }); - - throw error; + 'raw commits' + ); } }); } @@ -1205,21 +1196,15 @@ export class RepositoryCacheManager { if (filteredCommits) { // Cache hit: Return filtered data immediately - this.metrics.operations.filteredHits++; - this.recordHitTime(startTime); - cacheHits.inc({ operation: 'filtered_commits' }); - recordEnhancedCacheOperation( + this.recordCacheHit( 'filtered_commits', - true, - undefined, + 'filteredHits', + startTime, repoUrl, - filteredCommits.length + filteredCommits.length, + 'commits' ); - // Track data freshness for filtered cache effectiveness - const cacheAge = Date.now() - startTime; - recordDataFreshness('commits', cacheAge); - logger.debug('Filtered commits cache hit', { repoUrl, commitsCount: filteredCommits.length, @@ -1231,13 +1216,10 @@ export class RepositoryCacheManager { } // Cache miss: Generate filtered data from raw commits - this.metrics.operations.filteredMisses++; - this.recordMissTime(startTime); - cacheMisses.inc({ operation: 'filtered_commits' }); - recordEnhancedCacheOperation( + this.recordCacheMiss( 'filtered_commits', - false, - undefined, + 'filteredMisses', + startTime, repoUrl ); @@ -1351,161 +1333,119 @@ export class RepositoryCacheManager { contributionPercentage: number; }> > { - // FIX: Use withOrderedLocks to prevent deadlock with getOrParseCommits - return withOrderedLocks(this.getContributorLocks(repoUrl), async () => { - const startTime = Date.now(); - - // Generate cache key for contributors - const contributorsKey = this.generateContributorsKey( - repoUrl, - filterOptions - ); - const cachedData = await this.aggregatedDataCache.get(contributorsKey); - - // Type guard to ensure we have contributor data - const isContributorArray = ( - data: any - ): data is Array<{ - login: string; - commitCount: number; - linesAdded: number; - linesDeleted: number; - contributionPercentage: number; - }> => { - return Array.isArray(data) && (data.length === 0 || 'login' in data[0]); - }; + // FIX: Don't use withOrderedLocks for contributors since it needs direct repository access + // The repository coordinator manages its own locking via withSharedRepository + const startTime = Date.now(); - if (cachedData && isContributorArray(cachedData)) { - // Cache hit: Return cached contributor data - this.metrics.operations.aggregatedHits++; - this.recordHitTime(startTime); - cacheHits.inc({ operation: 'contributors' }); - recordEnhancedCacheOperation('contributors', true, undefined, repoUrl); + // Generate cache key for contributors + const contributorsKey = this.generateContributorsKey( + repoUrl, + filterOptions + ); + const cachedData = await this.aggregatedDataCache.get(contributorsKey); - // Track data freshness - const cacheAge = Date.now() - startTime; - recordDataFreshness('contributors', cacheAge); + // Type guard to ensure we have contributor data + const isContributorArray = ( + data: any + ): data is Array<{ + login: string; + commitCount: number; + linesAdded: number; + linesDeleted: number; + contributionPercentage: number; + }> => { + return Array.isArray(data) && (data.length === 0 || 'login' in data[0]); + }; - logger.debug('Contributors cache hit', { - repoUrl, + if (cachedData && isContributorArray(cachedData)) { + // Cache hit: Return cached contributor data + return this.handleCacheHit( + 'Contributors', + 'contributors', + 'aggregatedHits', + startTime, + repoUrl, + cachedData, + { contributorsCount: cachedData.length, filters: filterOptions, cacheKey: contributorsKey, - }); + }, + undefined, + 'contributors' + ); + } - return cachedData; - } + // Cache miss: Generate contributor data + this.handleCacheMiss( + 'contributors', + 'aggregatedMisses', + startTime, + repoUrl, + 'Contributors cache miss, generating from commits', + { filters: filterOptions, cacheKey: contributorsKey } + ); - // Cache miss: Generate contributor data - this.metrics.operations.aggregatedMisses++; - this.recordMissTime(startTime); - cacheMisses.inc({ operation: 'contributors' }); - recordEnhancedCacheOperation('contributors', false, undefined, repoUrl); + const transaction = this.createTransaction(repoUrl); - logger.debug('Contributors cache miss, generating from commits', { + try { + // FIX: Repository coordinator manages its own locking via withSharedRepository + let contributors = await withSharedRepository( repoUrl, - filters: filterOptions, - cacheKey: contributorsKey, - }); - - const transaction = this.createTransaction(repoUrl); + async (handle: RepositoryHandle) => { + logger.info('Fetching contributors via shared repository', { + repoUrl, + commitCount: handle.commitCount, + sizeCategory: handle.sizeCategory, + isShared: handle.isShared, + }); - try { - // FIX: All locks already held by outer withOrderedLocks, no nested acquisition needed - let contributors = await withSharedRepository( - repoUrl, - async (handle: RepositoryHandle) => { - logger.info('Fetching contributors via shared repository', { + // Track efficiency gains from repository sharing + if (handle.isShared && handle.refCount > 1) { + this.metrics.efficiency.duplicateClonesPrevented++; + logger.debug('Duplicate clone prevented for contributors', { repoUrl, - commitCount: handle.commitCount, - sizeCategory: handle.sizeCategory, - isShared: handle.isShared, + refCount: handle.refCount, }); - - // Track efficiency gains from repository sharing - if (handle.isShared && handle.refCount > 1) { - this.metrics.efficiency.duplicateClonesPrevented++; - logger.debug('Duplicate clone prevented for contributors', { - repoUrl, - refCount: handle.refCount, - }); - } - - return gitService.getTopContributors( - handle.localPath, - filterOptions - ); } - ); - // Defensive programming: Handle null contributors gracefully - if (!contributors) { - contributors = []; - logger.warn( - 'gitService.getTopContributors returned null, using empty array', - { repoUrl } - ); + return gitService.getTopContributors(handle.localPath, filterOptions); } + ); - // Cache the contributors data - const ttl = config.cacheStrategy.cacheKeys.aggregatedDataTTL; - await this.transactionalSet( - this.aggregatedDataCache, - 'aggregated', - contributorsKey, - contributors, - ttl, - transaction + // Defensive programming: Handle null contributors gracefully + if (!contributors) { + contributors = []; + logger.warn( + 'gitService.getTopContributors returned null, using empty array', + { repoUrl } ); + } - // Finalize the transaction - await this.commitTransaction(transaction); - - logger.debug('Contributors cached with transaction', { - repoUrl, + // Cache the contributors data + const ttl = config.cacheStrategy.cacheKeys.aggregatedDataTTL; + return this.handleTransactionSuccess( + this.aggregatedDataCache, + 'aggregated', + contributorsKey, + contributors, + ttl, + transaction, + repoUrl, + 'Contributors cached with transaction', + { filters: filterOptions, contributorsCount: contributors.length, - ttl, - transactionId: transaction.id, - }); - - // Update system health metrics - updateServiceHealthScore('cache', { - cacheHitRate: 1, - errorRate: 0, - }); - - return contributors; - } catch (error) { - // Track contributor generation failure - this.metrics.transactions.failed++; - - // Record comprehensive error details - recordDetailedError( - 'cache', - error instanceof Error ? error : new Error(String(error)), - { - userImpact: 'degraded', - recoveryAction: 'retry', - severity: 'warning', - } - ); - - // Update system health metrics - updateServiceHealthScore('cache', { errorRate: 1 }); - - // Rollback transaction to maintain cache consistency - await this.rollbackTransaction(transaction); - - logger.error('Failed to cache contributors, transaction rolled back', { - repoUrl, - transactionId: transaction.id, - error: error instanceof Error ? error.message : String(error), - }); - - throw error; - } - }); + } + ); + } catch (error) { + return this.handleTransactionError( + transaction, + error, + repoUrl, + 'contributors' + ); + } } /** @@ -1526,6 +1466,12 @@ export class RepositoryCacheManager { repoUrl: string, filterOptions?: CommitFilterOptions ): Promise { + logger.info('getCachedAggregatedData called', { + repoUrl, + filterOptions, + hasFilters: !!filterOptions && Object.keys(filterOptions).length > 0, + }); + // FIX: Use withOrderedLocks to prevent deadlock with getOrParseCommits return withOrderedLocks(this.getAggregatedLocks(repoUrl), async () => { const startTime = Date.now(); @@ -1535,8 +1481,26 @@ export class RepositoryCacheManager { repoUrl, filterOptions ); + logger.info('Generated aggregated cache key', { + repoUrl, + aggregatedKey, + filterOptions, + }); const cachedData = await this.aggregatedDataCache.get(aggregatedKey); + logger.info('Aggregated cache lookup result', { + repoUrl, + hasCachedData: !!cachedData, + cachedDataType: cachedData ? typeof cachedData : 'null', + isArray: Array.isArray(cachedData), + cachedDataKeys: + cachedData && + typeof cachedData === 'object' && + !Array.isArray(cachedData) + ? Object.keys(cachedData) + : null, + }); + // Type guard to ensure we have CommitHeatmapData const isCommitHeatmapData = (data: any): data is CommitHeatmapData => { return ( @@ -1547,58 +1511,65 @@ export class RepositoryCacheManager { ); }; - if (cachedData && isCommitHeatmapData(cachedData)) { + const passesTypeGuard = cachedData && isCommitHeatmapData(cachedData); + logger.info('Type guard check', { + repoUrl, + passesTypeGuard, + hasTimePeriod: + cachedData && + typeof cachedData === 'object' && + 'timePeriod' in cachedData, + hasData: + cachedData && typeof cachedData === 'object' && 'data' in cachedData, + }); + + if (passesTypeGuard) { // Cache hit: Return pre-computed visualization data - this.metrics.operations.aggregatedHits++; - this.recordHitTime(startTime); - cacheHits.inc({ operation: 'aggregated_data' }); - recordEnhancedCacheOperation( + return this.handleCacheHit( + 'Aggregated data', 'aggregated_data', - true, + 'aggregatedHits', + startTime, + repoUrl, + cachedData, + { + filters: filterOptions, + cacheKey: aggregatedKey, + }, undefined, - repoUrl + 'aggregated_data' ); - - // Track data freshness for aggregated cache monitoring - const cacheAge = Date.now() - startTime; - recordDataFreshness('aggregated_data', cacheAge); - - logger.debug('Aggregated data cache hit', { - repoUrl, - filters: filterOptions, - cacheKey: aggregatedKey, - }); - - return cachedData; } // Cache miss: Generate aggregated data from filtered commits - this.metrics.operations.aggregatedMisses++; - this.recordMissTime(startTime); - cacheMisses.inc({ operation: 'aggregated_data' }); - recordEnhancedCacheOperation( + this.handleCacheMiss( 'aggregated_data', - false, - undefined, - repoUrl - ); - - logger.debug('Aggregated data cache miss, generating from commits', { + 'aggregatedMisses', + startTime, repoUrl, - filters: filterOptions, - cacheKey: aggregatedKey, - }); + 'Aggregated data cache miss, generating from commits', + { filters: filterOptions, cacheKey: aggregatedKey } + ); const transaction = this.createTransaction(repoUrl); try { // Convert filter options to commit cache options for consistency - const commitOptions: CommitCacheOptions = { - author: filterOptions?.author, - authors: filterOptions?.authors, - fromDate: filterOptions?.fromDate, - toDate: filterOptions?.toDate, - }; + // Build commitOptions without undefined properties to ensure consistent cache keys + // This prevents { author: undefined, ... } from hashing differently than {} + const commitOptions: CommitCacheOptions = {}; + if (filterOptions?.author !== undefined) { + commitOptions.author = filterOptions.author; + } + if (filterOptions?.authors !== undefined) { + commitOptions.authors = filterOptions.authors; + } + if (filterOptions?.fromDate !== undefined) { + commitOptions.fromDate = filterOptions.fromDate; + } + if (filterOptions?.toDate !== undefined) { + commitOptions.toDate = filterOptions.toDate; + } /* * FIX: All locks already held by outer withOrderedLocks in correct order. @@ -1612,44 +1583,291 @@ export class RepositoryCacheManager { let aggregatedData: CommitHeatmapData; - // Defensive programming: Handle null commits gracefully - if (commits) { - // Generate visualization data from filtered commits + // Defensive programming: Handle null or empty commits gracefully + if (!commits || commits.length === 0) { + logger.warn('No commits available for aggregation', { + repoUrl, + filterOptions, + commitsIsNull: commits === null, + commitsLength: commits?.length || 0, + commitOptionsUsed: commitOptions, + }); + // Generate empty aggregated data structure aggregatedData = await gitService.aggregateCommitsByTime( - commits, + [], filterOptions ); } else { - logger.warn( - 'getOrParseFilteredCommits returned null, using empty array', - { repoUrl } - ); - // Generate empty aggregated data structure + // Generate visualization data from filtered commits + logger.debug('Aggregating commits by time', { + repoUrl, + commitsCount: commits.length, + filterOptions, + }); aggregatedData = await gitService.aggregateCommitsByTime( - [], + commits, filterOptions ); } // Cache the computationally expensive aggregated results const ttl = config.cacheStrategy.cacheKeys.aggregatedDataTTL; - await this.transactionalSet( + return this.handleTransactionSuccess( this.aggregatedDataCache, 'aggregated', aggregatedKey, aggregatedData, ttl, + transaction, + repoUrl, + 'Aggregated data cached with transaction', + { + filters: filterOptions, + dataPoints: aggregatedData.data.length, + totalCommits: aggregatedData.metadata?.totalCommits ?? 0, + aggregatedDataType: typeof aggregatedData, + hasTimePeriod: 'timePeriod' in aggregatedData, + hasData: 'data' in aggregatedData, + } + ); + } catch (error) { + return this.handleTransactionError( + transaction, + error, + repoUrl, + 'aggregated data' + ); + } + }); + } + + /** + * Retrieves or generates code churn analysis data using the tertiary cache tier. + * + * This method handles file change frequency analysis by processing commit history + * to identify high-churn files that may indicate code quality issues or hotspots. + * + * Churn data is cached in the aggregated tier since it's computationally expensive + * and changes less frequently than individual commits. + * + * @param repoUrl - Git repository URL + * @param filterOptions - Optional filters for churn analysis scope + * @returns Promise resolving to code churn analysis results + */ + async getOrGenerateChurnData( + repoUrl: string, + filterOptions?: ChurnFilterOptions + ): Promise { + return withOrderedLocks(this.getChurnLocks(repoUrl), async () => { + const startTime = Date.now(); + + // Attempt retrieval from aggregated data cache (Tier 3) + const churnKey = this.generateChurnKey(repoUrl, filterOptions); + const cachedData = await this.aggregatedDataCache.get(churnKey); + + // Type guard to ensure we have CodeChurnAnalysis + const isCodeChurnAnalysis = (data: any): data is CodeChurnAnalysis => { + return ( + data !== null && + typeof data === 'object' && + 'files' in data && + 'metadata' in data && + Array.isArray(data.files) + ); + }; + + if (cachedData && isCodeChurnAnalysis(cachedData)) { + // Cache hit: Return pre-computed churn analysis + return this.handleCacheHit( + 'Churn data', + 'churn', + 'aggregatedHits', + startTime, + repoUrl, + cachedData, + { + filters: filterOptions, + cacheKey: churnKey, + fileCount: cachedData.files.length, + }, + undefined, + 'churn' + ); + } + + // Cache miss: Generate churn data from repository + this.handleCacheMiss( + 'churn', + 'aggregatedMisses', + startTime, + repoUrl, + 'Churn data cache miss, analyzing repository', + { filters: filterOptions, cacheKey: churnKey } + ); + + const transaction = this.createTransaction(repoUrl); + + try { + // Analyze code churn using shared repository + const churnData = await withSharedRepository( + repoUrl, + async (handle: RepositoryHandle) => { + logger.info('Analyzing code churn via shared repository', { + repoUrl, + commitCount: handle.commitCount, + sizeCategory: handle.sizeCategory, + isShared: handle.isShared, + }); + + // Track efficiency gains from repository sharing + if (handle.isShared && handle.refCount > 1) { + this.metrics.efficiency.duplicateClonesPrevented++; + logger.debug('Duplicate clone prevented for churn analysis', { + repoUrl, + refCount: handle.refCount, + }); + } + + return gitService.analyzeCodeChurn(handle.localPath, filterOptions); + } + ); + + // Defensive programming: Handle null churn data gracefully + if (!churnData) { + logger.error('gitService.analyzeCodeChurn returned null', { + repoUrl, + }); + throw new Error('Failed to analyze code churn: null result'); + } + + // Cache the churn analysis results + const ttl = config.cacheStrategy.cacheKeys.aggregatedDataTTL; + return this.handleTransactionSuccess( + this.aggregatedDataCache, + 'aggregated', + churnKey, + churnData, + ttl, + transaction, + repoUrl, + 'Churn data cached with transaction', + { + filters: filterOptions, + fileCount: churnData.files.length, + } + ); + } catch (error) { + return this.handleTransactionError( + transaction, + error, + repoUrl, + 'churn data' + ); + } + }); + } + + /** + * Retrieves or generates repository summary statistics using the aggregated cache tier. + * + * This method handles repository metadata extraction using sparse clones for efficiency. + * Summary data includes repository age, commit count, contributors, and activity status. + * + * Unlike other cache methods, this uses the repositorySummaryService which performs + * a sparse clone to minimize bandwidth and storage requirements. + * + * @param repoUrl - Git repository URL + * @returns Promise resolving to repository summary + */ + async getOrGenerateSummary(repoUrl: string): Promise { + return withOrderedLocks(this.getSummaryLocks(repoUrl), async () => { + const startTime = Date.now(); + + // Attempt retrieval from aggregated data cache (Tier 3) + const summaryKey = this.generateSummaryKey(repoUrl); + const cachedData = await this.aggregatedDataCache.get(summaryKey); + + // Type guard to ensure we have RepositorySummary + const isRepositorySummary = (data: any): data is RepositorySummary => { + return ( + data !== null && + typeof data === 'object' && + 'repository' in data && + 'created' in data && + 'stats' in data + ); + }; + + if (cachedData && isRepositorySummary(cachedData)) { + // Cache hit: Return cached summary + this.recordCacheHit( + 'summary', + 'aggregatedHits', + startTime, + repoUrl, + undefined, + 'summary' + ); + + logger.debug('Summary cache hit', { + repoUrl, + cacheKey: summaryKey, + }); + + // Update metadata to reflect cached status + return { + ...cachedData, + metadata: { + ...cachedData.metadata, + cached: true, + dataSource: 'cache', + }, + }; + } + + // Cache miss: Generate summary from repository + this.recordCacheMiss('summary', 'aggregatedMisses', startTime, repoUrl); + + logger.debug('Summary cache miss, generating from repository', { + repoUrl, + cacheKey: summaryKey, + }); + + const transaction = this.createTransaction(repoUrl); + + try { + // Use repositorySummaryService which handles sparse clones internally + // Note: This service already uses coordinatedOperation, so no need for withSharedRepository + const summary = + await repositorySummaryService.getRepositorySummary(repoUrl); + + // Defensive programming: Validate summary structure + if (!summary || !summary.repository) { + logger.error('repositorySummaryService returned invalid summary', { + repoUrl, + }); + throw new Error( + 'Failed to generate repository summary: invalid result' + ); + } + + // Cache the summary data - use repositoryInfoTTL (2 hours, longer than aggregated data) + const ttl = config.cacheStrategy.cacheKeys.repositoryInfoTTL; + await this.transactionalSet( + this.aggregatedDataCache, + 'aggregated', + summaryKey, + summary, + ttl, transaction ); // Finalize the transaction await this.commitTransaction(transaction); - logger.debug('Aggregated data cached with transaction', { + logger.debug('Summary cached with transaction', { repoUrl, - filters: filterOptions, - dataPoints: aggregatedData.data.length, - totalCommits: aggregatedData.metadata?.totalCommits ?? 0, + repositoryName: summary.repository.name, ttl, transactionId: transaction.id, }); @@ -1660,12 +1878,12 @@ export class RepositoryCacheManager { errorRate: 0, }); - return aggregatedData; + return summary; } catch (error) { - // Track aggregation failure for system monitoring + // Track summary generation failure this.metrics.transactions.failed++; - // Record comprehensive error details for debugging complex aggregations + // Record comprehensive error details recordDetailedError( 'cache', error instanceof Error ? error : new Error(String(error)), @@ -1682,14 +1900,11 @@ export class RepositoryCacheManager { // Rollback transaction to maintain cache consistency await this.rollbackTransaction(transaction); - logger.error( - 'Failed to cache aggregated data, transaction rolled back', - { - repoUrl, - transactionId: transaction.id, - error: error instanceof Error ? error.message : String(error), - } - ); + logger.error('Failed to cache summary, transaction rolled back', { + repoUrl, + transactionId: transaction.id, + error: error instanceof Error ? error.message : String(error), + }); throw error; } @@ -1724,9 +1939,8 @@ export class RepositoryCacheManager { await distributedCache.invalidateGlobally('repository', { repoUrl, reason: 'repository_update', - keysCount: ( - this.cacheKeyPatterns.get(this.hashUrl(repoUrl)) ?? new Set() - ).size, + keysCount: (this.cacheKeyPatterns.get(hashUrl(repoUrl)) ?? new Set()) + .size, }); } catch (err) { logger.warn('Failed to broadcast distributed cache invalidation', { @@ -1748,7 +1962,7 @@ export class RepositoryCacheManager { repoUrl, }); - const repoHash = this.hashUrl(repoUrl); + const repoHash = hashUrl(repoUrl); const keysToInvalidate = this.cacheKeyPatterns.get(repoHash) ?? new Set(); const operations: Promise[] = []; @@ -1978,7 +2192,7 @@ export class RepositoryCacheManager { */ private generateRawCommitsKey(repoUrl: string): string { - const key = `raw_commits:${this.hashUrl(repoUrl)}`; + const key = `raw_commits:${hashUrl(repoUrl)}`; this.trackCacheKey(key); return key; } @@ -1987,8 +2201,8 @@ export class RepositoryCacheManager { repoUrl: string, options?: CommitCacheOptions ): string { - const filterHash = this.hashObject(options || {}); - const key = `filtered_commits:${this.hashUrl(repoUrl)}:${filterHash}`; + const filterHash = hashObject(options || {}); + const key = `filtered_commits:${hashUrl(repoUrl)}:${filterHash}`; this.trackCacheKey(key); return key; } @@ -1997,8 +2211,8 @@ export class RepositoryCacheManager { repoUrl: string, filterOptions?: CommitFilterOptions ): string { - const filterHash = this.hashObject(filterOptions ?? {}); - const key = `aggregated_data:${this.hashUrl(repoUrl)}:${filterHash}`; + const filterHash = hashObject(filterOptions ?? {}); + const key = `aggregated_data:${hashUrl(repoUrl)}:${filterHash}`; this.trackCacheKey(key); return key; } @@ -2008,28 +2222,28 @@ export class RepositoryCacheManager { repoUrl: string, filterOptions?: CommitFilterOptions ): string { - const filterHash = this.hashObject(filterOptions ?? {}); - const key = `contributors:${this.hashUrl(repoUrl)}:${filterHash}`; + const filterHash = hashObject(filterOptions ?? {}); + const key = `contributors:${hashUrl(repoUrl)}:${filterHash}`; this.trackCacheKey(key); return key; } - /** Generates stable 16-character hash for repository URLs */ - private hashUrl(url: string): string { - // SAFE: MD5 used for cache key generation only (not security-sensitive) - // Performance is prioritized over cryptographic strength for cache keys - return crypto.createHash('md5').update(url).digest('hex').slice(0, 16); + /** Generate cache key for churn data */ + private generateChurnKey( + repoUrl: string, + filterOptions?: ChurnFilterOptions + ): string { + const filterHash = hashObject(filterOptions ?? {}); + const key = `churn_data:${hashUrl(repoUrl)}:${filterHash}`; + this.trackCacheKey(key); + return key; } - /** Generates stable 8-character hash for filter option objects */ - private hashObject(obj: any): string { - const str = JSON.stringify( - obj, - Object.keys(obj).sort((a, b) => a.localeCompare(b)) - ); - // SAFE: MD5 used for cache key generation only (not security-sensitive) - // Performance is prioritized over cryptographic strength for cache keys - return crypto.createHash('md5').update(str).digest('hex').slice(0, 8); + /** Generate cache key for repository summary */ + private generateSummaryKey(repoUrl: string): string { + const key = `repository_summary:${hashUrl(repoUrl)}`; + this.trackCacheKey(key); + return key; } /** Determines if request has specific filters requiring filtered cache tier */ @@ -2219,6 +2433,258 @@ export class RepositoryCacheManager { this.metrics.performance.operationCount++; } + /** + * Records comprehensive cache hit metrics and tracking. + * Centralizes the common pattern of recording: + * - Internal metrics counters + * - Prometheus cache hit metrics + * - Enhanced cache operation tracking + * - Data freshness monitoring (optional, for aggregated data) + * + * This helper eliminates duplication across 8 cache hit locations. + * + * @param operation - Operation name for Prometheus metrics (e.g., 'raw_commits', 'contributors') + * @param metricsField - Internal metrics field to increment ('rawHits', 'filteredHits', 'aggregatedHits') + * @param startTime - Operation start timestamp for timing calculations + * @param repoUrl - Repository URL for enhanced cache operation tracking + * @param dataCount - Optional data count for enhanced metrics (used for raw/filtered commits) + * @param dataType - Optional data type for freshness tracking (used for aggregated data types) + */ + private recordCacheHit( + operation: string, + metricsField: 'rawHits' | 'filteredHits' | 'aggregatedHits', + startTime: number, + repoUrl: string, + dataCount?: number, + dataType?: string + ): void { + this.metrics.operations[metricsField]++; + this.recordHitTime(startTime); + cacheHits.inc({ operation }); + recordEnhancedCacheOperation( + operation, + true, + undefined, + repoUrl, + dataCount + ); + + // Track data freshness if dataType provided (for aggregated data types) + if (dataType) { + const cacheAge = Date.now() - startTime; + recordDataFreshness(dataType, cacheAge); + } + } + + /** + * Records comprehensive cache miss metrics and tracking. + * Centralizes the common pattern of recording: + * - Internal metrics counters + * - Prometheus cache miss metrics + * - Enhanced cache operation tracking + * + * This helper eliminates duplication across 8 cache miss locations. + * + * @param operation - Operation name for Prometheus metrics (e.g., 'raw_commits', 'contributors') + * @param metricsField - Internal metrics field to increment ('rawMisses', 'filteredMisses', 'aggregatedMisses') + * @param startTime - Operation start timestamp for timing calculations + * @param repoUrl - Repository URL for enhanced cache operation tracking + */ + private recordCacheMiss( + operation: string, + metricsField: 'rawMisses' | 'filteredMisses' | 'aggregatedMisses', + startTime: number, + repoUrl: string + ): void { + this.metrics.operations[metricsField]++; + this.recordMissTime(startTime); + cacheMisses.inc({ operation }); + recordEnhancedCacheOperation(operation, false, undefined, repoUrl); + } + + /** + * Handles cache hit path with logging and metrics. + * Consolidates the duplicate pattern of recording cache hits and returning cached data. + * + * Eliminates ~60 lines of duplication across 4 cache methods. + * + * @param cacheType - Human-readable cache type for logging (e.g., 'Raw commits', 'Aggregated data') + * @param operation - Operation name for Prometheus metrics (e.g., 'raw_commits', 'contributors') + * @param metricsField - Internal metrics field to increment + * @param startTime - Operation start timestamp + * @param repoUrl - Repository URL + * @param data - The cached data to return + * @param logContext - Additional context for debug logging + * @returns The cached data + */ + /** + * Handles cache hit path with logging and metrics. + * Consolidates the duplicate pattern of recording cache hits and returning cached data. + * + * Eliminates ~60 lines of duplication across 4 cache methods. + * + * @param cacheType - Human-readable cache type for logging (e.g., 'Raw commits', 'Aggregated data') + * @param operation - Operation name for Prometheus metrics (e.g., 'raw_commits', 'contributors') + * @param metricsField - Internal metrics field to increment + * @param startTime - Operation start timestamp + * @param repoUrl - Repository URL + * @param data - The cached data to return + * @param logContext - Additional context for debug logging + * @param dataCount - Optional data count for enhanced metrics (used for raw/filtered commits) + * @param dataType - Optional data type for freshness tracking (used for aggregated data types) + * @returns The cached data + */ + private handleCacheHit( + cacheType: string, + operation: string, + metricsField: 'rawHits' | 'filteredHits' | 'aggregatedHits', + startTime: number, + repoUrl: string, + data: T, + logContext?: Record, + dataCount?: number, + dataType?: string + ): T { + // Record metrics with optional data count and type + this.recordCacheHit( + operation, + metricsField, + startTime, + repoUrl, + dataCount, + dataType + ); + + // Log cache hit with context + logger.debug(`${cacheType} cache hit`, { + repoUrl, + ...logContext, + }); + + return data; + } + + /** + * Consolidates cache miss recording and logging. + * Reduces duplication across all cache methods by standardizing + * the cache miss path. + * + * @param operation - Operation identifier for metrics (e.g., 'raw_commits', 'contributors') + * @param metricsField - Which metrics counter to increment + * @param startTime - Request start timestamp for latency tracking + * @param repoUrl - Repository URL for logging context + * @param logMessage - Human-readable message describing the cache miss + * @param logContext - Additional context to include in the log + */ + private handleCacheMiss( + operation: string, + metricsField: 'rawMisses' | 'filteredMisses' | 'aggregatedMisses', + startTime: number, + repoUrl: string, + logMessage: string, + logContext?: Record + ): void { + this.recordCacheMiss(operation, metricsField, startTime, repoUrl); + logger.debug(logMessage, { + repoUrl, + ...logContext, + }); + } + + /** + * Consolidates successful transaction caching operations. + * Reduces duplication across all cache methods by standardizing + * the success path for caching operations. + * + * @param cache - Cache instance to store data in + * @param cacheType - Cache tier identifier ('raw', 'aggregated') + * @param key - Cache key for storing the data + * @param data - Data to cache + * @param ttl - Time-to-live in seconds + * @param transaction - Transaction object for atomicity + * @param repoUrl - Repository URL for logging context + * @param logMessage - Human-readable message describing what was cached + * @param logContext - Additional context to include in the log + * @returns The cached data + */ + private async handleTransactionSuccess( + cache: any, + cacheType: 'raw' | 'aggregated', + key: string, + data: T, + ttl: number, + transaction: CacheTransaction, + repoUrl: string, + logMessage: string, + logContext?: Record + ): Promise { + await this.transactionalSet(cache, cacheType, key, data, ttl, transaction); + await this.commitTransaction(transaction); + logger.debug(logMessage, { + repoUrl, + ttl, + transactionId: transaction.id, + ...logContext, + }); + updateServiceHealthScore('cache', { cacheHitRate: 1, errorRate: 0 }); + return data; + } + + /** + * Handles transaction errors with consistent cleanup and logging. + * Consolidates error handling logic across all cache methods. + * + * This helper ensures all cache failures follow the same pattern: + * - Record failure metrics + * - Update health scores + * - Rollback transactions + * - Log errors with context + * - Re-throw for upstream handling + * + * @param transaction - Transaction to rollback + * @param error - Error that occurred + * @param repoUrl - Repository URL for logging context + * @param operationName - Operation name for error message + * @param logContext - Optional additional context for logs + * @throws The original error after cleanup + */ + private async handleTransactionError( + transaction: CacheTransaction, + error: unknown, + repoUrl: string, + operationName: string, + logContext?: Record + ): Promise { + // Increment transaction failure counter for monitoring + this.metrics.transactions.failed++; + + // Record comprehensive error details for debugging and alerting + recordDetailedError( + 'cache', + error instanceof Error ? error : new Error(String(error)), + { + userImpact: 'degraded', + recoveryAction: 'retry', + severity: 'warning', + } + ); + + // Update system health metrics to reflect the failure + updateServiceHealthScore('cache', { errorRate: 1 }); + + // Rollback all cache changes to maintain consistency + await this.rollbackTransaction(transaction); + + logger.error(`Failed to cache ${operationName}, transaction rolled back`, { + repoUrl, + transactionId: transaction.id, + error: error instanceof Error ? error.message : String(error), + ...logContext, + }); + + throw error; + } + /** * Internal raw commits retrieval without external locking. * @@ -2260,21 +2726,15 @@ export class RepositoryCacheManager { } if (commits) { - this.metrics.operations.rawHits++; - this.recordHitTime(startTime); - cacheHits.inc({ operation: 'raw_commits' }); - recordEnhancedCacheOperation( + this.recordCacheHit( 'raw_commits', - true, - undefined, + 'rawHits', + startTime, repoUrl, - commits.length + commits.length, + 'commits' ); - // Record data freshness - const cacheAge = Date.now() - startTime; - recordDataFreshness('commits', cacheAge); - logger.debug('Raw commits cache hit', { repoUrl, commitsCount: commits.length, @@ -2285,10 +2745,7 @@ export class RepositoryCacheManager { } // Cache miss - need to fetch from repository with transaction - this.metrics.operations.rawMisses++; - this.recordMissTime(startTime); - cacheMisses.inc({ operation: 'raw_commits' }); - recordEnhancedCacheOperation('raw_commits', false, undefined, repoUrl); + this.recordCacheMiss('raw_commits', 'rawMisses', startTime, repoUrl); logger.info('Raw commits cache miss, fetching from repository', { repoUrl, @@ -2299,7 +2756,8 @@ export class RepositoryCacheManager { try { // Use shared repository to prevent duplicate clones - // Note: This will use the repo-access lock that's already acquired through withOrderedLocks + // FIX: repo-access lock is now acquired through withOrderedLocks above. + // The lock manager is re-entrant and will skip re-acquiring this lock. commits = await withSharedRepository( repoUrl, async (handle: RepositoryHandle) => { @@ -2330,6 +2788,17 @@ export class RepositoryCacheManager { logger.warn('gitService.getCommits returned null, using empty array', { repoUrl, }); + } else if (commits.length === 0) { + logger.warn('Git service returned zero commits', { + repoUrl, + message: + 'Repository might be empty or git operations may have failed', + }); + } else { + logger.debug('Raw commits fetched successfully', { + repoUrl, + commitCount: commits.length, + }); } // FIX: Transactional cache write @@ -2415,21 +2884,15 @@ export class RepositoryCacheManager { let filteredCommits = await this.filteredCommitsCache.get(filteredKey); if (filteredCommits) { - this.metrics.operations.filteredHits++; - this.recordHitTime(startTime); - cacheHits.inc({ operation: 'filtered_commits' }); - recordEnhancedCacheOperation( + this.recordCacheHit( 'filtered_commits', - true, - undefined, + 'filteredHits', + startTime, repoUrl, - filteredCommits.length + filteredCommits.length, + 'commits' ); - // Record data freshness - const cacheAge = Date.now() - startTime; - recordDataFreshness('commits', cacheAge); - logger.debug('Filtered commits cache hit', { repoUrl, commitsCount: filteredCommits.length, @@ -2441,10 +2904,12 @@ export class RepositoryCacheManager { } // Cache miss - get raw commits and apply filters with transaction - this.metrics.operations.filteredMisses++; - this.recordMissTime(startTime); - cacheMisses.inc({ operation: 'filtered_commits' }); - recordEnhancedCacheOperation('filtered_commits', false, undefined, repoUrl); + this.recordCacheMiss( + 'filtered_commits', + 'filteredMisses', + startTime, + repoUrl + ); logger.debug( 'Filtered commits cache miss, applying filters to raw commits', @@ -2632,3 +3097,35 @@ export async function getCachedContributors( > { return repositoryCache.getOrGenerateContributors(repoUrl, filterOptions); } + +/** + * Retrieves code churn analysis for a repository. + * + * Returns cached data when available, or generates fresh analysis by + * examining file change frequency patterns across commit history. + * + * @param repoUrl - Repository URL to analyze + * @param filterOptions - Optional filters for churn analysis scope + * @returns Promise resolving to code churn analysis results + */ +export async function getCachedChurnData( + repoUrl: string, + filterOptions?: ChurnFilterOptions +): Promise { + return repositoryCache.getOrGenerateChurnData(repoUrl, filterOptions); +} + +/** + * Retrieves repository summary statistics. + * + * Returns cached summary when available, or generates fresh statistics by + * performing a sparse clone to extract repository metadata. + * + * @param repoUrl - Repository URL to analyze + * @returns Promise resolving to repository summary + */ +export async function getCachedSummary( + repoUrl: string +): Promise { + return repositoryCache.getOrGenerateSummary(repoUrl); +} diff --git a/apps/backend/src/utils/cacheHelpers.ts b/apps/backend/src/utils/cacheHelpers.ts new file mode 100644 index 00000000..29aec78a --- /dev/null +++ b/apps/backend/src/utils/cacheHelpers.ts @@ -0,0 +1,167 @@ +import type HybridLRUCache from './hybridLruCache'; +import type { Logger } from 'winston'; +import { + recordDetailedError, + updateServiceHealthScore, +} from '../services/metrics'; + +/** + * Cache operation context for error handling. + */ +export interface CacheOperationContext { + operation: string; + key: string; + repoUrl?: string; +} + +/** + * Cache transaction interface matching repositoryCache.ts + */ +export interface CacheTransaction { + id: string; + operations: Array<{ + type: 'set' | 'delete'; + cache: HybridLRUCache; + key: string; + previousValue?: any; + }>; +} + +/** + * Safely retrieves a value from cache with standardized error handling. + * + * This helper wraps cache.get() operations with consistent error recording, + * logging, and null fallback behavior. It ensures that cache failures don't + * crash the application and are properly tracked for monitoring. + * + * @param cache - The HybridLRUCache instance to retrieve from + * @param key - Cache key to retrieve + * @param logger - Winston logger for error logging + * @param context - Optional context for enhanced error messages + * @returns Cached value or null if not found or error occurred + * + * @example + * const commits = await safeCacheGet( + * rawCommitsCache, + * 'raw_commits:abc123', + * logger, + * { operation: 'get', key: rawKey, repoUrl } + * ); + */ +export async function safeCacheGet( + cache: HybridLRUCache, + key: string, + logger: Logger, + context?: Partial +): Promise { + try { + return await cache.get(key); + } catch (error) { + // Record detailed error for system health monitoring + recordDetailedError( + 'cache', + error instanceof Error ? error : new Error(String(error)), + { + userImpact: 'degraded', + recoveryAction: 'fallback', + severity: 'warning', + } + ); + + // Log error with full context for debugging + logger.error('Cache operation failed', { + operation: context?.operation || 'get', + key: context?.key || key, + repoUrl: context?.repoUrl, + error: error instanceof Error ? error.message : String(error), + }); + + // Return null to indicate cache miss (graceful degradation) + return null; + } +} + +/** + * Transaction error handler context. + */ +export interface TransactionErrorContext { + repoUrl: string; + operation: string; + transactionId: string; +} + +/** + * Metrics interface for transaction failures (matches repositoryCache.ts) + */ +export interface TransactionMetrics { + transactions: { + failed: number; + }; +} + +/** + * Standardized transaction error handler with rollback and metrics. + * + * This helper provides consistent error handling for cache transaction failures, + * including metrics tracking, error recording, health score updates, transaction + * rollback, and structured logging. It ensures all transaction errors are handled + * uniformly across the codebase. + * + * @param transaction - Cache transaction to roll back + * @param error - The error that occurred + * @param metrics - Metrics object to update failure counter + * @param logger - Winston logger for error logging + * @param context - Transaction context (repoUrl, operation, transactionId) + * @param rollbackFn - Function to perform transaction rollback + * @returns Never (always rethrows the error after handling) + * + * @example + * catch (error) { + * await handleTransactionError( + * transaction, + * error, + * this.metrics, + * logger, + * { repoUrl, operation: 'cache_filtered', transactionId: transaction.id }, + * async (tx) => await this.rollbackTransaction(tx) + * ); + * } + */ +export async function handleTransactionError( + transaction: CacheTransaction, + error: unknown, + metrics: TransactionMetrics, + logger: Logger, + context: TransactionErrorContext, + rollbackFn: (tx: CacheTransaction) => Promise +): Promise { + // Increment failure counter for metrics tracking + metrics.transactions.failed++; + + // Record comprehensive error details for enhanced metrics + recordDetailedError( + 'cache', + error instanceof Error ? error : new Error(String(error)), + { + userImpact: 'degraded', + recoveryAction: 'retry', + severity: 'warning', + } + ); + + // Update system health score to reflect cache errors + updateServiceHealthScore('cache', { errorRate: 1 }); + + // Rollback transaction to maintain cache consistency + await rollbackFn(transaction); + + // Log error with full transaction context + logger.error(`Failed to ${context.operation}, transaction rolled back`, { + repoUrl: context.repoUrl, + transactionId: context.transactionId, + error: error instanceof Error ? error.message : String(error), + }); + + // Rethrow error to propagate to caller + throw error; +} diff --git a/apps/backend/src/utils/gitUtils.ts b/apps/backend/src/utils/gitUtils.ts index 04fa638d..1bf3733a 100644 --- a/apps/backend/src/utils/gitUtils.ts +++ b/apps/backend/src/utils/gitUtils.ts @@ -3,16 +3,38 @@ import { config } from '../config'; // Utility to perform a shallow clone with a configurable depth +/** + * FIX: Clone with full commit history using blob filtering + * This approach: + * - Fetches ALL commits from the default branch (complete history) + * - Excludes file contents (blobs) to save 95-99% bandwidth + * - Matches the behavior of repositorySummaryService for consistent commit counts + * + * Previous implementation used --depth which resulted in incomplete history + * (e.g., 346 commits instead of 480 for gitray repo) + */ export async function shallowClone( repoUrl: string, targetDir: string, depth: number = config.git.cloneDepth ): Promise { const git = simpleGit(targetDir); - // Perform a shallow clone to limit bandwidth and disk usage - await git.clone(repoUrl, '.', [ - '--depth', - String(depth), - '--no-single-branch', + + // Use blob filtering instead of depth limiting for complete history + // This matches the approach used by repositorySummaryService + await git.init(); + await git.addRemote('origin', repoUrl); + await git.raw(['config', 'core.sparseCheckout', 'true']); + + // Fetch all commits from default branch but exclude file contents (blobs) + // This saves bandwidth while preserving full commit history + await git.raw([ + 'fetch', + '--filter=blob:none', // Exclude file contents, keep commit history + '--no-tags', // Skip tags to reduce bandwidth + 'origin', + 'HEAD', // Fetch default branch with full history ]); + + await git.raw(['checkout', 'FETCH_HEAD']); } diff --git a/apps/backend/src/utils/hashUtils.ts b/apps/backend/src/utils/hashUtils.ts new file mode 100644 index 00000000..5a358cd6 --- /dev/null +++ b/apps/backend/src/utils/hashUtils.ts @@ -0,0 +1,45 @@ +import crypto from 'crypto'; + +/** + * Generate stable 16-character hash for repository URLs. + * + * IMPORTANT: MD5 is used here for cache key generation ONLY, not for security purposes. + * Performance is prioritized over cryptographic strength for cache keys. + * This provides deterministic, collision-resistant keys for the caching layer. + * + * @param url - Repository URL to hash + * @returns 16-character hexadecimal hash string + * + * @example + * hashUrl('https://github.com/user/repo') // => '5d41402abc4b2a76' + */ +export function hashUrl(url: string): string { + // SAFE: MD5 used for cache key generation only (not security-sensitive) + // Performance is prioritized over cryptographic strength for cache keys + return crypto.createHash('md5').update(url).digest('hex').slice(0, 16); +} + +/** + * Generate stable 8-character hash for filter option objects. + * + * IMPORTANT: MD5 is used here for cache key generation ONLY, not for security purposes. + * The function normalizes the object by sorting keys before hashing to ensure + * deterministic output regardless of property order. + * + * @param obj - Filter options object to hash + * @returns 8-character hexadecimal hash string + * + * @example + * hashObject({ author: 'Alice', fromDate: '2024-01-01' }) // => '3f8e2a1c' + */ +export function hashObject(obj: any): string { + // Normalize object by sorting keys to ensure deterministic hashing + const str = JSON.stringify( + obj, + Object.keys(obj).sort((a, b) => a.localeCompare(b)) + ); + + // SAFE: MD5 used for cache key generation only (not security-sensitive) + // Performance is prioritized over cryptographic strength for cache keys + return crypto.createHash('md5').update(str).digest('hex').slice(0, 8); +} diff --git a/apps/backend/src/utils/repositoryRouteFactory.ts b/apps/backend/src/utils/repositoryRouteFactory.ts new file mode 100644 index 00000000..3ea27b40 --- /dev/null +++ b/apps/backend/src/utils/repositoryRouteFactory.ts @@ -0,0 +1,200 @@ +/** + * Repository Route Factory + * + * This module provides factory functions to reduce duplication in repository routes. + * It extracts the common pattern of: + * - Setting up request context (logger, repoUrl, userType) + * - Executing a cache operation + * - Recording success metrics and sending response + * - Handling errors uniformly + * + * @module repositoryRouteFactory + */ + +import type { Request, Response, NextFunction, RequestHandler } from 'express'; +import type { ValidationChain } from 'express-validator'; +import { + setupRouteRequest, + recordRouteSuccess, + recordRouteError, +} from './routeHelpers.js'; + +/** + * Context provided to route processors, containing the essential + * request information extracted by setupRouteRequest + */ +export interface RouteContext { + req: Request; + logger: ReturnType['logger']; + repoUrl: string; + userType: string; +} + +/** + * Function that builds success metrics from the cache operation result. + * These metrics are logged and can be used for monitoring. + * + * @template T The type of data returned by the cache operation + * @param result The result from the cache operation + * @returns Object with metric key-value pairs + */ +export type SuccessMetricsBuilder = ( + result: T +) => Record; + +/** + * Function that executes the core cache operation for a route. + * It receives the route context and returns the cached data. + * + * @template T The type of data returned by the cache operation + * @param ctx Route context with logger, repoUrl, and userType + * @returns Promise resolving to the cached data + */ +export type RouteProcessor = (ctx: RouteContext) => Promise; + +/** + * Creates a route handler array with the unified cache pattern. + * + * This factory eliminates duplication by extracting the common structure: + * 1. Setup request context (logger, repoUrl, userType) + * 2. Execute cache operation via the processor function + * 3. Record success with metrics + * 4. Handle errors uniformly + * + * The returned array can be spread into router.get/post/etc calls. + * + * @template T The type of data returned by the cache operation + * @param featureName Feature identifier for metrics (e.g., 'repository_commits') + * @param processor Function that executes the cache operation + * @param buildMetrics Function that extracts metrics from the result + * @returns Array of Express request handlers (middleware) + * + * @example + * router.get( + * '/commits', + * setRequestPriority('normal'), + * ...buildRepoValidationChain({ includePagination: true }), + * ...createCachedRouteHandler( + * 'repository_commits', + * async ({ req, repoUrl }) => { + * const { page, limit, skip } = extractPaginationParams(req.query); + * const commits = await getCachedCommits(repoUrl, { skip, limit }); + * return { commits, page, limit }; + * }, + * ({ commits, page, limit }) => ({ commitCount: commits.length, page, limit }) + * ) + * ); + */ +export function createCachedRouteHandler( + featureName: string, + processor: RouteProcessor, + buildMetrics: SuccessMetricsBuilder +): RequestHandler[] { + return [ + async (req: Request, res: Response, next: NextFunction) => { + // Setup request context using existing helper + const { logger, repoUrl, userType } = setupRouteRequest(req); + + try { + // Execute the cache operation via processor + const result = await processor({ + req, + logger, + repoUrl, + userType, + }); + + // Record success with extracted metrics + recordRouteSuccess( + featureName, + userType, + logger, + repoUrl, + result, + res, + buildMetrics(result) + ); + } catch (error) { + // Uniform error handling + recordRouteError(featureName, userType, logger, repoUrl, error, next); + } + }, + ]; +} + +/** + * Options for building repository validation chains. + * Each boolean flag includes the corresponding validation middleware. + */ +export interface ValidationChainOptions { + /** Include pagination validation (page, limit) */ + includePagination?: boolean; + /** Include date validation (fromDate, toDate) */ + includeDates?: boolean; + /** Include author validation (author, authors) */ + includeAuthors?: boolean; + /** Include churn validation (minChanges, extensions) */ + includeChurn?: boolean; +} + +/** + * Builds a validation chain for repository routes based on the provided options. + * + * This helper consolidates the repetitive pattern of combining validation middlewares: + * - `repoUrlValidation()` is always included (required for all routes) + * - Additional validators are conditionally included based on options + * + * The order of validators matches the existing route patterns to maintain behavior. + * + * @param options Flags indicating which validators to include + * @returns Array of ValidationChain middleware + * + * @example + * // For /commits route (requires pagination): + * router.get('/commits', + * setRequestPriority('normal'), + * ...buildRepoValidationChain({ includePagination: true }), + * handleValidationErrors, + * ...createCachedRouteHandler(...) + * ); + * + * @example + * // For /heatmap route (requires dates and authors): + * router.get('/heatmap', + * setRequestPriority('low'), + * ...buildRepoValidationChain({ includeDates: true, includeAuthors: true }), + * handleValidationErrors, + * ...createCachedRouteHandler(...) + * ); + */ +export function buildRepoValidationChain( + options: ValidationChainOptions, + validators: { + repoUrlValidation: () => ValidationChain[]; + paginationValidation?: () => ValidationChain[]; + dateValidation?: () => ValidationChain[]; + authorValidation?: () => ValidationChain[]; + churnValidation?: () => ValidationChain[]; + } +): ValidationChain[] { + const chain: ValidationChain[] = [...validators.repoUrlValidation()]; + + // Add validators in the same order as existing routes to maintain behavior + if (options.includePagination && validators.paginationValidation) { + chain.push(...validators.paginationValidation()); + } + + if (options.includeDates && validators.dateValidation) { + chain.push(...validators.dateValidation()); + } + + if (options.includeAuthors && validators.authorValidation) { + chain.push(...validators.authorValidation()); + } + + if (options.includeChurn && validators.churnValidation) { + chain.push(...validators.churnValidation()); + } + + return chain; +} diff --git a/apps/backend/src/utils/routeHelpers.ts b/apps/backend/src/utils/routeHelpers.ts new file mode 100644 index 00000000..7532ffdc --- /dev/null +++ b/apps/backend/src/utils/routeHelpers.ts @@ -0,0 +1,266 @@ +import { Request, Response } from 'express'; +import { + CommitFilterOptions, + ChurnFilterOptions, + HTTP_STATUS, +} from '@gitray/shared-types'; +import { createRequestLogger } from '../services/logger'; +import { getUserType, recordFeatureUsage } from '../services/metrics'; + +/** + * Extracts common request initialization for route handlers. + * Reduces duplication across all repository route endpoints. + * + * This helper consolidates the standard setup that every route handler needs: + * - Request-scoped logger with correlation ID + * - Repository URL from query parameters + * - User type for metrics tracking + * + * @param req - Express request object + * @returns Object containing logger, repoUrl, and userType + * + * @example + * const { logger, repoUrl, userType } = setupRouteRequest(req); + * logger.info('Processing request', { repoUrl }); + */ +export function setupRouteRequest(req: Request) { + const logger = createRequestLogger(req); + const { repoUrl } = req.query as Record; + const userType = getUserType(req); + + return { logger, repoUrl, userType }; +} + +/** + * Records successful route operation with metrics and logging. + * Standardizes success path across all repository endpoints. + * + * This helper consolidates three common operations after successful data retrieval: + * - Recording success metrics for monitoring + * - Logging operation completion with context + * - Sending HTTP 200 response with data + * + * @param featureName - Feature identifier for metrics (e.g., 'repository_commits') + * @param userType - User type from metrics service + * @param logger - Request-scoped logger instance + * @param repoUrl - Repository URL for logging context + * @param data - Response data to send to client + * @param res - Express response object + * @param additionalLogData - Optional extra fields for success log + * + * @example + * recordRouteSuccess( + * 'repository_commits', + * userType, + * logger, + * repoUrl, + * { commits, page, limit }, + * res, + * { commitCount: commits.length, page, limit } + * ); + */ +export function recordRouteSuccess( + featureName: string, + userType: string, + logger: any, + repoUrl: string, + data: T, + res: any, + additionalLogData?: Record +): void { + // Record success metrics + recordFeatureUsage(featureName, userType, true, 'api_call'); + + // Log successful operation + logger.info(`${featureName} retrieved successfully`, { + repoUrl, + ...additionalLogData, + }); + + // Send response + res.status(HTTP_STATUS.OK).json(data); +} + +/** + * Records failed route operation with metrics and logging. + * Standardizes error handling across all repository endpoints. + * + * This helper consolidates three common operations when errors occur: + * - Recording failure metrics for monitoring + * - Logging error details with context + * - Propagating error to Express error handler middleware + * + * @param featureName - Feature identifier for metrics (e.g., 'repository_commits') + * @param userType - User type from metrics service + * @param logger - Request-scoped logger instance + * @param repoUrl - Repository URL for logging context + * @param error - The error that occurred + * @param next - Express next function for error propagation + * + * @example + * } catch (error) { + * recordRouteError('repository_commits', userType, logger, repoUrl, error, next); + * } + */ +export function recordRouteError( + featureName: string, + userType: string, + logger: any, + repoUrl: string, + error: unknown, + next: any +): void { + // Record failure metrics + recordFeatureUsage(featureName, userType, false, 'api_call'); + + // Log error with context + logger.error(`Failed to retrieve ${featureName}`, { + repoUrl, + error: error instanceof Error ? error.message : String(error), + }); + + // Propagate error to Express error handler + next(error); +} + +/** + * Builds CommitFilterOptions from Express query parameters. + * Only includes defined properties to ensure consistent cache keys. + * + * This helper eliminates duplication across route handlers that need to + * construct filter objects from query parameters. By excluding undefined + * properties, it ensures that cache key generation is consistent regardless + * of which optional filters are provided. + * + * @param query - Express request query object containing filter parameters + * @returns CommitFilterOptions with only defined properties + * + * @example + * const filters = buildCommitFilters({ + * author: 'john', + * fromDate: '2024-01-01', + * toDate: '2024-12-31' + * }); + * // Returns: { author: 'john', fromDate: '2024-01-01', toDate: '2024-12-31' } + */ +export function buildCommitFilters(query: { + author?: string; + authors?: string; + fromDate?: string; + toDate?: string; +}): CommitFilterOptions { + const filters: CommitFilterOptions = {}; + + if (query.author) { + filters.author = query.author; + } + if (query.authors) { + filters.authors = query.authors.split(',').map((a) => a.trim()); + } + if (query.fromDate) { + filters.fromDate = query.fromDate; + } + if (query.toDate) { + filters.toDate = query.toDate; + } + + return filters; +} + +/** + * Extracted pagination parameters from Express query parameters. + * Provides consistent pagination logic across all paginated routes. + * + * This helper eliminates duplication in routes that need pagination. + * It handles default values and ensures consistent page/limit/skip calculations. + * + * @param query - Express request query object containing pagination parameters + * @returns PaginationParams with page, limit, and skip values + * + * @example + * const { page, limit, skip } = extractPaginationParams(req.query); + * // Returns: { page: 1, limit: 100, skip: 0 } with defaults + */ +export interface PaginationParams { + page: number; + limit: number; + skip: number; +} + +export function extractPaginationParams(query: { + page?: string; + limit?: string; +}): PaginationParams { + const page = Number.parseInt(query.page || '1') || 1; + const limit = Number.parseInt(query.limit || '100') || 100; + const skip = (page - 1) * limit; + + return { page, limit, skip }; +} + +/** + * Extracts filter parameters from Express query parameters. + * Provides consistent extraction of author/date filter parameters. + * + * This helper eliminates duplication in routes that need filter parameters. + * Simply destructures the filter fields from query for cleaner code. + * + * @param query - Express request query object containing filter parameters + * @returns Object with optional filter fields + * + * @example + * const { author, authors, fromDate, toDate } = extractFilterParams(req.query); + */ +export function extractFilterParams( + query: Record +): { + author?: string; + authors?: string; + fromDate?: string; + toDate?: string; +} { + const { author, authors, fromDate, toDate } = query; + return { author, authors, fromDate, toDate }; +} + +/** + * Builds ChurnFilterOptions from Express query parameters. + * Only includes defined properties to ensure consistent cache keys. + * + * This helper mirrors the pattern of buildCommitFilters but for churn analysis. + * By excluding undefined properties, it ensures cache key consistency. + * + * @param query - Express request query object containing churn filter parameters + * @returns ChurnFilterOptions with only defined properties + * + * @example + * const filters = buildChurnFilters({ + * fromDate: '2024-01-01', + * minChanges: '5', + * extensions: 'ts,tsx' + * }); + * // Returns: { since: '2024-01-01', minChanges: 5, extensions: ['ts', 'tsx'] } + */ +export function buildChurnFilters(query: { + fromDate?: string; + toDate?: string; + minChanges?: string; + extensions?: string; +}): ChurnFilterOptions { + const filters: ChurnFilterOptions = {}; + + if (query.fromDate) { + filters.since = query.fromDate; + } + if (query.toDate) { + filters.until = query.toDate; + } + if (query.minChanges) { + filters.minChanges = Number.parseInt(query.minChanges); + } + if (query.extensions) { + filters.extensions = query.extensions.split(',').map((e) => e.trim()); + } + + return filters; +} diff --git a/eslint.config.mjs b/eslint.config.mjs index e382e1c1..3a42af91 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -21,6 +21,7 @@ export default tseslint.config( 'apps/frontend/tailwind.config.cjs', 'prettier.config.js', '**/dist/**', + '**/build/**', '**/node_modules/**', 'apps/backend/src/**/*.js', 'apps/backend/src/**/*.js.map', diff --git a/scripts/api_test_scenarios.md b/scripts/api_test_scenarios.md new file mode 100644 index 00000000..df96a8ce --- /dev/null +++ b/scripts/api_test_scenarios.md @@ -0,0 +1,269 @@ + +# Comprehensive API Testing Scenarios + +## Route 1: `/api/repositories/heatmap` (REFACTORED) + +### Valid Scenarios +1. **No filters** - Baseline test + - Expected: Full heatmap data with all commits + - Validates: Basic functionality works after refactor + +2. **Date filter - fromDate only** + - Input: `fromDate=2024-01-01` + - Expected: Data from 2024 onwards + - Validates: Single date filter parameter extraction + +3. **Date filter - toDate only** + - Input: `toDate=2024-12-31` + - Expected: Data up to end of 2024 + - Validates: Single date filter parameter extraction + +4. **Date range - fromDate + toDate** + - Input: `fromDate=2024-01-01&toDate=2024-12-31` + - Expected: Data within 2024 + - Validates: Multiple filter parameters, buildCommitFilters logic + +5. **Author filter - single author** + - Input: `author=jonas` + - Expected: Data for specific author + - Validates: Author parameter extraction + +6. **Authors filter - multiple authors** + - Input: `authors=jonas,contributor2` + - Expected: Data for multiple authors + - Validates: Authors array parsing (split by comma) + +7. **Combined filters** + - Input: `fromDate=2024-01-01&toDate=2024-12-31&author=jonas` + - Expected: Data matching all filters + - Validates: Complete filter pipeline + +### Cache Behavior +1. **First call** - Should be cache MISS +2. **Second call (same params)** - Should be cache HIT +3. **Different params** - Should be cache MISS + +### Error Scenarios +1. **Missing repoUrl** - HTTP 400 +2. **Invalid repoUrl format** - HTTP 400 +3. **Invalid date format** - HTTP 400 +4. **Invalid URL scheme** - HTTP 400 + +### Response Validation +- Contains `data` array +- Contains `timePeriod` field +- Data points have required fields: date, commits, authors +- HTTP 200 status + +--- + +## Route 2: `/api/repositories/contributors` (REFACTORED) + +### Valid Scenarios +1. **No filters** - All contributors + - Expected: Array of contributors with stats + - Validates: Basic functionality + +2. **Date filter - fromDate** + - Input: `fromDate=2024-01-01` + - Expected: Contributors from 2024 onwards + +3. **Date filter - toDate** + - Input: `toDate=2024-12-31` + - Expected: Contributors up to 2024 + +4. **Date range** + - Input: `fromDate=2024-01-01&toDate=2024-12-31` + - Expected: Contributors within 2024 + +5. **Author filter** + - Input: `author=jonas` + - Expected: Single contributor data + +6. **Combined filters** + - Input: `fromDate=2024-01-01&toDate=2024-12-31&author=jonas` + +### Cache Behavior +- Same as heatmap + +### Error Scenarios +- Same as heatmap + +### Response Validation +- Returns array of contributor objects +- Each contributor has: name, email, commits, additions, deletions +- Sorted by commit count descending +- HTTP 200 status + +--- + +## Route 3: `/api/repositories/churn` (REFACTORED) + +### Valid Scenarios +1. **No filters** - All churn data + - Expected: Complete churn analysis + - Validates: Basic functionality + +2. **Date filter - fromDate** + - Input: `fromDate=2024-01-01` + - Expected: Churn from 2024 onwards + - Validates: fromDate → since mapping in buildChurnFilters + +3. **Date filter - toDate** + - Input: `toDate=2024-12-31` + - Expected: Churn up to 2024 + - Validates: toDate → until mapping + +4. **Date range** + - Input: `fromDate=2024-01-01&toDate=2024-12-31` + - Expected: Churn within 2024 + +5. **minChanges filter** + - Input: `minChanges=10` + - Expected: Only files with 10+ changes + - Validates: Integer parsing + +6. **extensions filter - single** + - Input: `extensions=ts` + - Expected: Only TypeScript files + +7. **extensions filter - multiple** + - Input: `extensions=ts,tsx,js` + - Expected: Multiple file types + - Validates: Split and trim logic + +8. **Combined filters** + - Input: `fromDate=2024-01-01&minChanges=5&extensions=ts,tsx` + - Expected: All filters applied + +### Cache Behavior +- Same pattern as other routes + +### Error Scenarios +- Same as heatmap +- Invalid minChanges (non-numeric) - HTTP 400 + +### Response Validation +- Contains `files` array +- Contains `summary` object +- Files have: path, additions, deletions, changes +- HTTP 200 status + +--- + +## Route 4: `/api/repositories/full-data` (NOT REFACTORED) + +### Valid Scenarios +1. **No filters, default pagination** + - Expected: First 100 commits + heatmap + - Validates: No regression in non-refactored code + +2. **Custom pagination - page 1** + - Input: `page=1&limit=10` + - Expected: First 10 commits + +3. **Custom pagination - page 2** + - Input: `page=2&limit=10` + - Expected: Commits 11-20 + +4. **Date filters** + - Input: `fromDate=2024-01-01&toDate=2024-12-31` + - Expected: Filtered heatmap (commits unfiltered due to pagination) + +5. **Combined filters + pagination** + - Input: `fromDate=2024-01-01&page=1&limit=5` + +### Cache Behavior +- Two cache operations (commits + heatmapData) +- Sequential fetching (not parallel) +- Both should cache independently + +### Error Scenarios +- Same as heatmap + +### Response Validation +- Contains `commits` array +- Contains `heatmapData` object +- heatmapData has `data` and `timePeriod` +- Commits have proper structure +- Pagination metadata present +- HTTP 200 status + +--- + +## Route 5: `/api/repositories/commits` (NOT REFACTORED) + +### Valid Scenarios +1. **Default pagination** + - Expected: First 100 commits + +2. **Custom pagination** + - Input: `page=1&limit=20` + - Expected: First 20 commits + +3. **Page 2** + - Input: `page=2&limit=20` + - Expected: Commits 21-40 + +### Cache Behavior +- Standard cache hit/miss pattern + +### Error Scenarios +- Same as heatmap + +### Response Validation +- Returns array of commit objects +- Each commit has: hash, message, author, date, stats +- Proper pagination applied +- HTTP 200 status + +--- + +## Route 6: `/api/repositories/summary` (NOT REFACTORED) + +### Valid Scenarios +1. **Basic request** + - Expected: Repository summary with all stats + +### Cache Behavior +- Single cache operation +- Should cache entire summary + +### Error Scenarios +- Same as heatmap + +### Response Validation +- Contains `repository` object (name, url, defaultBranch) +- Contains `statistics` object (commits, contributors, files, etc.) +- Contains `timeline` data +- HTTP 200 status + +--- + +## Cross-Route Testing + +### Cache Consistency +1. Call heatmap → cache miss +2. Call contributors → separate cache miss +3. Call heatmap again → cache hit +4. Call contributors again → cache hit + +### Filter Consistency +1. Same date filters across routes should use same data subset +2. Author filters should match commit authors + +### Performance +1. First call (cache miss) - slower +2. Second call (cache hit) - fast (<50ms) +3. Different params - cache miss + +--- + +## Error Handling Consistency + +All routes should handle these consistently: +1. Missing repoUrl → HTTP 400, specific error message +2. Invalid repoUrl → HTTP 400, validation error +3. Invalid date format → HTTP 400, validation error +4. Server errors → HTTP 500, proper error structure +5. Timeout scenarios → HTTP 504 diff --git a/scripts/test_api_complete.sh b/scripts/test_api_complete.sh new file mode 100755 index 00000000..96e12061 --- /dev/null +++ b/scripts/test_api_complete.sh @@ -0,0 +1,198 @@ +#!/bin/bash +# +# Comprehensive API Test Suite +# Tests all GitRay API endpoints after refactoring +# + +set -euo pipefail + +GREEN='\033[0;32m' +RED='\033[0;31m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +CYAN='\033[0;36m' +BOLD='\033[1m' +NC='\033[0m' + +BASE_URL="http://localhost:3001" +REPO_URL="https://github.com/jonasyr/gitray.git" + +TOTAL=0 +PASSED=0 +FAILED=0 + +echo -e "${BOLD}${BLUE}╔════════════════════════════════════════════════════════════════╗${NC}" +echo -e "${BOLD}${BLUE}║ Comprehensive API Test Suite for GitRay ║${NC}" +echo -e "${BOLD}${BLUE}╚════════════════════════════════════════════════════════════════╝${NC}" +echo "" +echo "Repository: $REPO_URL" +echo "Base URL: $BASE_URL" +echo "" + +# Clear cache and stale locks +echo -e "${YELLOW}► Clearing cache and locks...${NC}" +rm -rf apps/backend/cache/* 2>/dev/null || true +rm -rf apps/backend/locks/* 2>/dev/null || true +echo -e "${GREEN} ✓ Cache and locks cleared${NC}" +echo "" + +# Test function +test_api() { + local name="$1" + local url="$2" + local expect_status="${3:-200}" + + TOTAL=$((TOTAL + 1)) + + echo -e "${CYAN}► Test $TOTAL: ${name}${NC}" + + # Make request + local temp_file=$(mktemp) + local http_code=$(curl -s -w "%{http_code}" -o "$temp_file" "$url") + local body=$(cat "$temp_file") + rm -f "$temp_file" + + # Check status + if [[ "$http_code" != "$expect_status" ]]; then + echo -e "${RED} ✗ FAIL: HTTP $http_code (expected $expect_status)${NC}" + echo " Response: $body" | head -c 200 + FAILED=$((FAILED + 1)) + return 1 + fi + + # Validate JSON (only if expecting 200) + if [[ "$expect_status" == "200" ]]; then + if echo "$body" | python3 -m json.tool >/dev/null 2>&1; then + echo -e "${GREEN} ✓ PASS: HTTP $http_code, Valid JSON${NC}" + PASSED=$((PASSED + 1)) + else + echo -e "${RED} ✗ FAIL: Invalid JSON response${NC}" + echo " Response: $body" | head -c 200 + FAILED=$((FAILED + 1)) + return 1 + fi + else + echo -e "${GREEN} ✓ PASS: HTTP $http_code (error scenario)${NC}" + PASSED=$((PASSED + 1)) + fi +} + +# REFACTORED ROUTES +echo -e "${YELLOW}═══════════════════════════════════════════════════════════════${NC}" +echo -e "${YELLOW}TESTING REFACTORED ROUTES (handleFilteredRoute helper)${NC}" +echo -e "${YELLOW}═══════════════════════════════════════════════════════════════${NC}" +echo "" + +echo -e "${BOLD}1. HEATMAP ROUTE${NC}" +test_api "Heatmap - No filters" \ + "${BASE_URL}/api/repositories/heatmap?repoUrl=${REPO_URL}" + +test_api "Heatmap - From date" \ + "${BASE_URL}/api/repositories/heatmap?repoUrl=${REPO_URL}&fromDate=2024-01-01" + +test_api "Heatmap - Date range" \ + "${BASE_URL}/api/repositories/heatmap?repoUrl=${REPO_URL}&fromDate=2024-01-01&toDate=2024-12-31" + +test_api "Heatmap - With author" \ + "${BASE_URL}/api/repositories/heatmap?repoUrl=${REPO_URL}&author=jonas" + +echo "" +echo -e "${BOLD}2. CONTRIBUTORS ROUTE${NC}" +test_api "Contributors - No filters" \ + "${BASE_URL}/api/repositories/contributors?repoUrl=${REPO_URL}" + +test_api "Contributors - From date" \ + "${BASE_URL}/api/repositories/contributors?repoUrl=${REPO_URL}&fromDate=2024-01-01" + +test_api "Contributors - Date range" \ + "${BASE_URL}/api/repositories/contributors?repoUrl=${REPO_URL}&fromDate=2024-01-01&toDate=2024-12-31" + +echo "" +echo -e "${BOLD}3. CHURN ROUTE${NC}" +test_api "Churn - No filters" \ + "${BASE_URL}/api/repositories/churn?repoUrl=${REPO_URL}" + +test_api "Churn - From date" \ + "${BASE_URL}/api/repositories/churn?repoUrl=${REPO_URL}&fromDate=2024-01-01" + +test_api "Churn - Min changes" \ + "${BASE_URL}/api/repositories/churn?repoUrl=${REPO_URL}&minChanges=10" + +test_api "Churn - Extensions" \ + "${BASE_URL}/api/repositories/churn?repoUrl=${REPO_URL}&extensions=ts,tsx" + +test_api "Churn - All filters" \ + "${BASE_URL}/api/repositories/churn?repoUrl=${REPO_URL}&fromDate=2024-01-01&minChanges=5&extensions=ts" + +echo "" +echo -e "${YELLOW}═══════════════════════════════════════════════════════════════${NC}" +echo -e "${YELLOW}TESTING NON-REFACTORED ROUTES (Regression Check)${NC}" +echo -e "${YELLOW}═══════════════════════════════════════════════════════════════${NC}" +echo "" + +echo -e "${BOLD}4. FULL-DATA ROUTE${NC}" +test_api "Full-data - Default" \ + "${BASE_URL}/api/repositories/full-data?repoUrl=${REPO_URL}" + +test_api "Full-data - With pagination" \ + "${BASE_URL}/api/repositories/full-data?repoUrl=${REPO_URL}&page=1&limit=10" + +test_api "Full-data - With filters" \ + "${BASE_URL}/api/repositories/full-data?repoUrl=${REPO_URL}&fromDate=2024-01-01&page=1&limit=5" + +echo "" +echo -e "${BOLD}5. COMMITS ROUTE${NC}" +test_api "Commits - Default" \ + "${BASE_URL}/api/repositories/commits?repoUrl=${REPO_URL}" + +test_api "Commits - With pagination" \ + "${BASE_URL}/api/repositories/commits?repoUrl=${REPO_URL}&page=1&limit=20" + +echo "" +echo -e "${BOLD}6. SUMMARY ROUTE${NC}" +test_api "Summary - Basic" \ + "${BASE_URL}/api/repositories/summary?repoUrl=${REPO_URL}" + +echo "" +echo -e "${YELLOW}═══════════════════════════════════════════════════════════════${NC}" +echo -e "${YELLOW}TESTING ERROR SCENARIOS${NC}" +echo -e "${YELLOW}═══════════════════════════════════════════════════════════════${NC}" +echo "" + +test_api "Missing repoUrl" \ + "${BASE_URL}/api/repositories/heatmap" \ + 400 + +test_api "Invalid repoUrl format" \ + "${BASE_URL}/api/repositories/heatmap?repoUrl=not-a-url" \ + 400 + +test_api "Invalid date format" \ + "${BASE_URL}/api/repositories/heatmap?repoUrl=${REPO_URL}&fromDate=invalid" \ + 400 + +# SUMMARY +echo "" +echo -e "${BOLD}${BLUE}╔════════════════════════════════════════════════════════════════╗${NC}" +echo -e "${BOLD}${BLUE}║ TEST RESULTS${NC}" +echo -e "${BOLD}${BLUE}╚════════════════════════════════════════════════════════════════╝${NC}" +echo "" +echo "Total Tests: $TOTAL" +echo -e "${GREEN}Passed: $PASSED${NC}" +echo -e "${RED}Failed: $FAILED${NC}" + +if [[ $TOTAL -gt 0 ]]; then + pass_rate=$(awk "BEGIN {printf \"%.1f\", ($PASSED / $TOTAL) * 100}") + echo "Pass Rate: ${pass_rate}%" +fi + +echo "" + +if [[ $FAILED -eq 0 ]]; then + echo -e "${GREEN}${BOLD}✓✓✓ ALL TESTS PASSED ✓✓✓${NC}" + echo -e "${GREEN}Refactored code working perfectly!${NC}" + exit 0 +else + echo -e "${RED}${BOLD}✗ SOME TESTS FAILED${NC}" + exit 1 +fi