diff --git a/apps/deployments-service/internal/service/lifecycle.go b/apps/deployments-service/internal/service/lifecycle.go index 5196a95a..2ab2534b 100644 --- a/apps/deployments-service/internal/service/lifecycle.go +++ b/apps/deployments-service/internal/service/lifecycle.go @@ -652,6 +652,27 @@ func (s *Service) StartDeployment(ctx context.Context, req *connect.Request[depl return nil, connect.NewError(connect.CodeNotFound, fmt.Errorf("deployment %s not found", deploymentID)) } + // Quota check: verify memory/CPU won't exceed limits when starting + startMemory := int64(512 * 1024 * 1024) // Default 512MB + if dbDep.MemoryBytes != nil { + startMemory = *dbDep.MemoryBytes + } + startCPU := int64(1024) // Default + if dbDep.CPUShares != nil { + startCPU = *dbDep.CPUShares + } + startReplicas := 1 + if dbDep.Replicas != nil { + startReplicas = int(*dbDep.Replicas) + } + if err := s.quotaChecker.CanAllocate(ctx, orgID, quota.RequestedResources{ + Replicas: startReplicas, + MemoryBytes: startMemory, + CPUshares: startCPU, + }); err != nil { + return nil, connect.NewError(connect.CodeFailedPrecondition, fmt.Errorf("quota check failed: %w", err)) + } + // Check if this is a compose-based deployment if dbDep.ComposeYaml != "" { // Deploy using Docker Compose @@ -838,18 +859,43 @@ func (s *Service) ScaleDeployment(ctx context.Context, req *connect.Request[depl if err := s.permissionChecker.CheckScopedPermission(ctx, orgID, auth.ScopedPermission{Permission: auth.PermissionDeploymentScale, ResourceType: "deployment", ResourceID: deploymentID}); err != nil { return nil, connect.NewError(connect.CodePermissionDenied, err) } - // Quota check: replicas delta + // Quota check: replicas + memory/CPU newReplicas := int(req.Msg.GetReplicas()) if newReplicas <= 0 { return nil, connect.NewError(connect.CodeInvalidArgument, fmt.Errorf("replicas must be > 0")) } - if err := s.quotaChecker.CanAllocate(ctx, orgID, quota.RequestedResources{Replicas: newReplicas}); err != nil { - return nil, connect.NewError(connect.CodeFailedPrecondition, err) + dbDep, err := s.repo.GetByID(ctx, deploymentID) + if err != nil { + return nil, connect.NewError(connect.CodeNotFound, fmt.Errorf("deployment %s not found", deploymentID)) + } + // Only check quota for the delta (additional replicas beyond current). + // currentAllocations already includes this deployment's existing usage. + currentReplicas := 1 + if dbDep.Replicas != nil { + currentReplicas = int(*dbDep.Replicas) + } + deltaReplicas := newReplicas - currentReplicas + if deltaReplicas > 0 { + scaleMemory := int64(512 * 1024 * 1024) // Default 512MB + if dbDep.MemoryBytes != nil { + scaleMemory = *dbDep.MemoryBytes + } + scaleCPU := int64(1024) // Default + if dbDep.CPUShares != nil { + scaleCPU = *dbDep.CPUShares + } + if err := s.quotaChecker.CanAllocate(ctx, orgID, quota.RequestedResources{ + Replicas: deltaReplicas, + MemoryBytes: scaleMemory, + CPUshares: scaleCPU, + }); err != nil { + return nil, connect.NewError(connect.CodeFailedPrecondition, fmt.Errorf("quota check failed: %w", err)) + } } if s.manager != nil { _ = s.manager.ScaleDeployment(ctx, deploymentID, newReplicas) } - dbDep, err := s.repo.GetByID(ctx, deploymentID) + dbDep, err = s.repo.GetByID(ctx, deploymentID) if err != nil { return nil, connect.NewError(connect.CodeNotFound, fmt.Errorf("deployment %s not found", deploymentID)) } diff --git a/apps/shared/pkg/quota/quota.go b/apps/shared/pkg/quota/quota.go index d0698ca5..864a1774 100644 --- a/apps/shared/pkg/quota/quota.go +++ b/apps/shared/pkg/quota/quota.go @@ -28,7 +28,7 @@ func (c *Checker) CanAllocate(ctx context.Context, organizationID string, req Re if err != nil { return fmt.Errorf("quota: load: %w", err) } // Get plan limits first (these are the maximum boundary) - planDeployMax, planMem, planCPU := c.getPlanLimits(organizationID) + planDeployMax, planMem, planCPU := c.getPlanLimitsFromQuota(quota) // Get effective limits: use overrides if set, but cap them to plan limits // Plan limits are the final boundary - org overrides cannot exceed them @@ -84,9 +84,10 @@ func (c *Checker) CanAllocate(ctx context.Context, organizationID string, req Re if effMem > 0 && curMemBytes+req.MemoryBytes*int64(req.Replicas) > effMem { return fmt.Errorf("quota exceeded: memory %d bytes > max %d bytes", curMemBytes+req.MemoryBytes*int64(req.Replicas), effMem) } - // Convert Docker CPUshares to cores (Docker uses 1024 shares per core) - reqCores := int(req.CPUshares) / 1024 - if req.CPUshares%1024 != 0 { + // Convert Docker CPUshares to cores, multiplied by replicas (matching currentAllocations) + totalCPUshares := req.CPUshares * int64(req.Replicas) + reqCores := int(totalCPUshares) / 1024 + if totalCPUshares%1024 != 0 { reqCores++ // round up partial cores } if effCPU > 0 && curCPUcores+reqCores > effCPU { @@ -104,40 +105,45 @@ func (c *Checker) getQuota(orgID string) (*database.OrgQuota, error) { return "a, nil } -// getPlanLimits gets plan limits for an organization -func (c *Checker) getPlanLimits(orgID string) (deploymentsMax int, memoryBytes int64, cpuCores int) { - var quota database.OrgQuota - if err := database.DB.Where("organization_id = ?", orgID).First("a).Error; err != nil { - return 0, 0, 0 // No plan +// getPlanLimitsFromQuota gets plan limits using an already-loaded OrgQuota +func (c *Checker) getPlanLimitsFromQuota(q *database.OrgQuota) (deploymentsMax int, memoryBytes int64, cpuCores int) { + if q.PlanID == "" { + return 0, 0, 0 } - - if quota.PlanID == "" { - return 0, 0, 0 // No plan assigned - } - var plan database.OrganizationPlan - if err := database.DB.First(&plan, "id = ?", quota.PlanID).Error; err != nil { - return 0, 0, 0 // Plan not found + if err := database.DB.First(&plan, "id = ?", q.PlanID).Error; err != nil { + return 0, 0, 0 } - return plan.DeploymentsMax, plan.MemoryBytes, plan.CPUCores } func (c *Checker) currentAllocations(orgID string) (replicas int, memBytes int64, cpuCores int, err error) { - // Count running replicas + // Count running replicas from deployment locations var count int64 - if err = database.DB.Model(&database.DeploymentLocation{}). - Where("deployment_locations.status = ?", "running"). - Joins("JOIN deployments d ON d.id = deployment_locations.deployment_id"). - Where("d.organization_id = ?", orgID). - Count(&count).Error; err != nil { return } - // Sum requested memory and CPU across org deployments (nil treated as 0) + if err = database.DB.Model(&database.DeploymentLocation{}). + Where("deployment_locations.status = ?", "running"). + Joins("JOIN deployments d ON d.id = deployment_locations.deployment_id"). + Where("d.organization_id = ?", orgID). + Count(&count).Error; err != nil { + return + } + // Sum memory and CPU across active deployments, multiplied by their replica count. + // Only count deployments that are running, building, or deploying (not stopped/failed). + // RUNNING=3, BUILDING=2, DEPLOYING=6 type agg struct{ Mem int64; CPU int64 } var a agg if err = database.DB.Model(&database.Deployment{}). - Select("COALESCE(SUM(COALESCE(memory_bytes,0)),0) as mem, COALESCE(SUM(COALESCE(cpu_shares,0)),0) as cpu"). - Where("organization_id = ?", orgID).Scan(&a).Error; err != nil { return } - return int(count), a.Mem, int(a.CPU), nil + Select("COALESCE(SUM(COALESCE(memory_bytes,0) * COALESCE(replicas,1)),0) as mem, COALESCE(SUM(COALESCE(cpu_shares,0) * COALESCE(replicas,1)),0) as cpu"). + Where("organization_id = ? AND deleted_at IS NULL AND status IN (2,3,6)", orgID). + Scan(&a).Error; err != nil { + return + } + // Convert Docker CPU shares to cores (1024 shares = 1 core) + cpuCores = int(a.CPU) / 1024 + if a.CPU%1024 != 0 { + cpuCores++ // round up partial cores + } + return int(count), a.Mem, cpuCores, nil } // GetEffectiveLimits returns the effective memory and CPU limits for an organization