Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 50 additions & 4 deletions apps/deployments-service/internal/service/lifecycle.go
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,27 @@ func (s *Service) StartDeployment(ctx context.Context, req *connect.Request[depl
return nil, connect.NewError(connect.CodeNotFound, fmt.Errorf("deployment %s not found", deploymentID))
}

// Quota check: verify memory/CPU won't exceed limits when starting
startMemory := int64(512 * 1024 * 1024) // Default 512MB
if dbDep.MemoryBytes != nil {
startMemory = *dbDep.MemoryBytes
}
startCPU := int64(1024) // Default
if dbDep.CPUShares != nil {
startCPU = *dbDep.CPUShares
}
startReplicas := 1
if dbDep.Replicas != nil {
startReplicas = int(*dbDep.Replicas)
}
if err := s.quotaChecker.CanAllocate(ctx, orgID, quota.RequestedResources{
Replicas: startReplicas,
MemoryBytes: startMemory,
CPUshares: startCPU,
}); err != nil {
return nil, connect.NewError(connect.CodeFailedPrecondition, fmt.Errorf("quota check failed: %w", err))
}

// Check if this is a compose-based deployment
if dbDep.ComposeYaml != "" {
// Deploy using Docker Compose
Expand Down Expand Up @@ -838,18 +859,43 @@ func (s *Service) ScaleDeployment(ctx context.Context, req *connect.Request[depl
if err := s.permissionChecker.CheckScopedPermission(ctx, orgID, auth.ScopedPermission{Permission: auth.PermissionDeploymentScale, ResourceType: "deployment", ResourceID: deploymentID}); err != nil {
return nil, connect.NewError(connect.CodePermissionDenied, err)
}
// Quota check: replicas delta
// Quota check: replicas + memory/CPU
newReplicas := int(req.Msg.GetReplicas())
if newReplicas <= 0 {
return nil, connect.NewError(connect.CodeInvalidArgument, fmt.Errorf("replicas must be > 0"))
}
if err := s.quotaChecker.CanAllocate(ctx, orgID, quota.RequestedResources{Replicas: newReplicas}); err != nil {
return nil, connect.NewError(connect.CodeFailedPrecondition, err)
dbDep, err := s.repo.GetByID(ctx, deploymentID)
if err != nil {
return nil, connect.NewError(connect.CodeNotFound, fmt.Errorf("deployment %s not found", deploymentID))
}
// Only check quota for the delta (additional replicas beyond current).
// currentAllocations already includes this deployment's existing usage.
currentReplicas := 1
if dbDep.Replicas != nil {
currentReplicas = int(*dbDep.Replicas)
}
deltaReplicas := newReplicas - currentReplicas
if deltaReplicas > 0 {
scaleMemory := int64(512 * 1024 * 1024) // Default 512MB
if dbDep.MemoryBytes != nil {
scaleMemory = *dbDep.MemoryBytes
}
scaleCPU := int64(1024) // Default
if dbDep.CPUShares != nil {
scaleCPU = *dbDep.CPUShares
}
if err := s.quotaChecker.CanAllocate(ctx, orgID, quota.RequestedResources{
Replicas: deltaReplicas,
MemoryBytes: scaleMemory,
CPUshares: scaleCPU,
}); err != nil {
return nil, connect.NewError(connect.CodeFailedPrecondition, fmt.Errorf("quota check failed: %w", err))
}
}
if s.manager != nil {
_ = s.manager.ScaleDeployment(ctx, deploymentID, newReplicas)
}
dbDep, err := s.repo.GetByID(ctx, deploymentID)
dbDep, err = s.repo.GetByID(ctx, deploymentID)
if err != nil {
return nil, connect.NewError(connect.CodeNotFound, fmt.Errorf("deployment %s not found", deploymentID))
}
Expand Down
60 changes: 33 additions & 27 deletions apps/shared/pkg/quota/quota.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func (c *Checker) CanAllocate(ctx context.Context, organizationID string, req Re
if err != nil { return fmt.Errorf("quota: load: %w", err) }

// Get plan limits first (these are the maximum boundary)
planDeployMax, planMem, planCPU := c.getPlanLimits(organizationID)
planDeployMax, planMem, planCPU := c.getPlanLimitsFromQuota(quota)

// Get effective limits: use overrides if set, but cap them to plan limits
// Plan limits are the final boundary - org overrides cannot exceed them
Expand Down Expand Up @@ -84,9 +84,10 @@ func (c *Checker) CanAllocate(ctx context.Context, organizationID string, req Re
if effMem > 0 && curMemBytes+req.MemoryBytes*int64(req.Replicas) > effMem {
return fmt.Errorf("quota exceeded: memory %d bytes > max %d bytes", curMemBytes+req.MemoryBytes*int64(req.Replicas), effMem)
}
// Convert Docker CPUshares to cores (Docker uses 1024 shares per core)
reqCores := int(req.CPUshares) / 1024
if req.CPUshares%1024 != 0 {
// Convert Docker CPUshares to cores, multiplied by replicas (matching currentAllocations)
totalCPUshares := req.CPUshares * int64(req.Replicas)
reqCores := int(totalCPUshares) / 1024
if totalCPUshares%1024 != 0 {
reqCores++ // round up partial cores
}
if effCPU > 0 && curCPUcores+reqCores > effCPU {
Expand All @@ -104,40 +105,45 @@ func (c *Checker) getQuota(orgID string) (*database.OrgQuota, error) {
return &quota, nil
}

// getPlanLimits gets plan limits for an organization
func (c *Checker) getPlanLimits(orgID string) (deploymentsMax int, memoryBytes int64, cpuCores int) {
var quota database.OrgQuota
if err := database.DB.Where("organization_id = ?", orgID).First(&quota).Error; err != nil {
return 0, 0, 0 // No plan
// getPlanLimitsFromQuota gets plan limits using an already-loaded OrgQuota
func (c *Checker) getPlanLimitsFromQuota(q *database.OrgQuota) (deploymentsMax int, memoryBytes int64, cpuCores int) {
if q.PlanID == "" {
return 0, 0, 0
}

if quota.PlanID == "" {
return 0, 0, 0 // No plan assigned
}

var plan database.OrganizationPlan
if err := database.DB.First(&plan, "id = ?", quota.PlanID).Error; err != nil {
return 0, 0, 0 // Plan not found
if err := database.DB.First(&plan, "id = ?", q.PlanID).Error; err != nil {
return 0, 0, 0
}

return plan.DeploymentsMax, plan.MemoryBytes, plan.CPUCores
}

func (c *Checker) currentAllocations(orgID string) (replicas int, memBytes int64, cpuCores int, err error) {
// Count running replicas
// Count running replicas from deployment locations
var count int64
if err = database.DB.Model(&database.DeploymentLocation{}).
Where("deployment_locations.status = ?", "running").
Joins("JOIN deployments d ON d.id = deployment_locations.deployment_id").
Where("d.organization_id = ?", orgID).
Count(&count).Error; err != nil { return }
// Sum requested memory and CPU across org deployments (nil treated as 0)
if err = database.DB.Model(&database.DeploymentLocation{}).
Where("deployment_locations.status = ?", "running").
Joins("JOIN deployments d ON d.id = deployment_locations.deployment_id").
Where("d.organization_id = ?", orgID).
Count(&count).Error; err != nil {
return
}
// Sum memory and CPU across active deployments, multiplied by their replica count.
// Only count deployments that are running, building, or deploying (not stopped/failed).
// RUNNING=3, BUILDING=2, DEPLOYING=6
type agg struct{ Mem int64; CPU int64 }
var a agg
if err = database.DB.Model(&database.Deployment{}).
Select("COALESCE(SUM(COALESCE(memory_bytes,0)),0) as mem, COALESCE(SUM(COALESCE(cpu_shares,0)),0) as cpu").
Where("organization_id = ?", orgID).Scan(&a).Error; err != nil { return }
return int(count), a.Mem, int(a.CPU), nil
Select("COALESCE(SUM(COALESCE(memory_bytes,0) * COALESCE(replicas,1)),0) as mem, COALESCE(SUM(COALESCE(cpu_shares,0) * COALESCE(replicas,1)),0) as cpu").
Where("organization_id = ? AND deleted_at IS NULL AND status IN (2,3,6)", orgID).
Scan(&a).Error; err != nil {
return
}
// Convert Docker CPU shares to cores (1024 shares = 1 core)
cpuCores = int(a.CPU) / 1024
if a.CPU%1024 != 0 {
cpuCores++ // round up partial cores
}
return int(count), a.Mem, cpuCores, nil
Comment on lines 135 to +146
Copy link

Copilot AI Feb 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

currentAllocations now aggregates CPU usage as cpu_shares * replicas and converts that to cores, but the corresponding CPU quota check in CanAllocate still derives requested cores only from the per-replica CPUshares value and does not multiply by the requested replica count. This inconsistency means that scaling or starting deployments with multiple replicas can under-count the additional CPU usage in the quota check and allow effective CPU consumption to exceed the plan/override limit. Consider updating the requested-CPU calculation in CanAllocate to match this aggregation strategy (e.g., using per-replica CPU shares times the requested replica count before converting to cores).

Copilot uses AI. Check for mistakes.
}

// GetEffectiveLimits returns the effective memory and CPU limits for an organization
Expand Down
Loading