From 0801b12656c67765d9b166f9866d61ce3bb7c742 Mon Sep 17 00:00:00 2001 From: veryCrunchy Date: Sun, 1 Feb 2026 05:30:43 +0100 Subject: [PATCH 1/3] fix(quota): convert CPU shares to rounded-up cores Convert stored Docker CPU shares into CPU cores when returning quota results. Previously the function returned raw cpu_shares values, which did not reflect user-visible CPU capacity. The change divides cpu_shares by 1024 (1024 shares = 1 core) and rounds up any partial core so that consumers see a whole-core allocation estimate. This prevents under-reporting CPU needs and aligns quota reporting with how CPU resources are commonly interpreted. --- apps/shared/pkg/quota/quota.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/apps/shared/pkg/quota/quota.go b/apps/shared/pkg/quota/quota.go index d0698ca5..32945be8 100644 --- a/apps/shared/pkg/quota/quota.go +++ b/apps/shared/pkg/quota/quota.go @@ -137,7 +137,12 @@ func (c *Checker) currentAllocations(orgID string) (replicas int, memBytes int64 if err = database.DB.Model(&database.Deployment{}). Select("COALESCE(SUM(COALESCE(memory_bytes,0)),0) as mem, COALESCE(SUM(COALESCE(cpu_shares,0)),0) as cpu"). Where("organization_id = ?", orgID).Scan(&a).Error; err != nil { return } - return int(count), a.Mem, int(a.CPU), nil + // Convert Docker CPU shares to cores (1024 shares = 1 core) + cpuCores = int(a.CPU) / 1024 + if a.CPU%1024 != 0 { + cpuCores++ // round up partial cores + } + return int(count), a.Mem, cpuCores, nil } // GetEffectiveLimits returns the effective memory and CPU limits for an organization From cf7496b5daa4ccac1b374f2446ed776ba9d4782f Mon Sep 17 00:00:00 2001 From: veryCrunchy Date: Sun, 1 Feb 2026 05:38:27 +0100 Subject: [PATCH 2/3] feat(quota): enforce memory/CPU checks on start and scale Add quota checks that include memory and CPU when starting or scaling deployments. When starting a deployment we now read the deployment's memory and CPU (falling back to sensible defaults) and call the quota checker to ensure the allocation is allowed before proceeding. The scale endpoint similarly fetches the deployment, derives memory/CPU values, and validates the requested replica count together with those resources. Refactor quota checker internals: rename getPlanLimits to getPlanLimitsFromQuota and adjust callers to supply the already-loaded OrgQuota. Also tidyup the currentAllocations query comment and formatting. These changes prevent exceeding memory/CPU plan limits during start and scale operations and ensure quota decisions consider per-deployment resource usage, reducing risk of resource overcommit. --- .../internal/service/lifecycle.go | 45 ++++++++++++++++-- apps/shared/pkg/quota/quota.go | 46 +++++++++---------- 2 files changed, 64 insertions(+), 27 deletions(-) diff --git a/apps/deployments-service/internal/service/lifecycle.go b/apps/deployments-service/internal/service/lifecycle.go index 5196a95a..2a5d5ac1 100644 --- a/apps/deployments-service/internal/service/lifecycle.go +++ b/apps/deployments-service/internal/service/lifecycle.go @@ -652,6 +652,27 @@ func (s *Service) StartDeployment(ctx context.Context, req *connect.Request[depl return nil, connect.NewError(connect.CodeNotFound, fmt.Errorf("deployment %s not found", deploymentID)) } + // Quota check: verify memory/CPU won't exceed limits when starting + startMemory := int64(512 * 1024 * 1024) // Default 512MB + if dbDep.MemoryBytes != nil { + startMemory = *dbDep.MemoryBytes + } + startCPU := int64(1024) // Default + if dbDep.CPUShares != nil { + startCPU = *dbDep.CPUShares + } + startReplicas := 1 + if dbDep.Replicas != nil { + startReplicas = int(*dbDep.Replicas) + } + if err := s.quotaChecker.CanAllocate(ctx, orgID, quota.RequestedResources{ + Replicas: startReplicas, + MemoryBytes: startMemory, + CPUshares: startCPU, + }); err != nil { + return nil, connect.NewError(connect.CodeFailedPrecondition, fmt.Errorf("quota check failed: %w", err)) + } + // Check if this is a compose-based deployment if dbDep.ComposeYaml != "" { // Deploy using Docker Compose @@ -838,18 +859,34 @@ func (s *Service) ScaleDeployment(ctx context.Context, req *connect.Request[depl if err := s.permissionChecker.CheckScopedPermission(ctx, orgID, auth.ScopedPermission{Permission: auth.PermissionDeploymentScale, ResourceType: "deployment", ResourceID: deploymentID}); err != nil { return nil, connect.NewError(connect.CodePermissionDenied, err) } - // Quota check: replicas delta + // Quota check: replicas + memory/CPU newReplicas := int(req.Msg.GetReplicas()) if newReplicas <= 0 { return nil, connect.NewError(connect.CodeInvalidArgument, fmt.Errorf("replicas must be > 0")) } - if err := s.quotaChecker.CanAllocate(ctx, orgID, quota.RequestedResources{Replicas: newReplicas}); err != nil { - return nil, connect.NewError(connect.CodeFailedPrecondition, err) + dbDep, err := s.repo.GetByID(ctx, deploymentID) + if err != nil { + return nil, connect.NewError(connect.CodeNotFound, fmt.Errorf("deployment %s not found", deploymentID)) + } + scaleMemory := int64(512 * 1024 * 1024) // Default 512MB + if dbDep.MemoryBytes != nil { + scaleMemory = *dbDep.MemoryBytes + } + scaleCPU := int64(1024) // Default + if dbDep.CPUShares != nil { + scaleCPU = *dbDep.CPUShares + } + if err := s.quotaChecker.CanAllocate(ctx, orgID, quota.RequestedResources{ + Replicas: newReplicas, + MemoryBytes: scaleMemory, + CPUshares: scaleCPU, + }); err != nil { + return nil, connect.NewError(connect.CodeFailedPrecondition, fmt.Errorf("quota check failed: %w", err)) } if s.manager != nil { _ = s.manager.ScaleDeployment(ctx, deploymentID, newReplicas) } - dbDep, err := s.repo.GetByID(ctx, deploymentID) + dbDep, err = s.repo.GetByID(ctx, deploymentID) if err != nil { return nil, connect.NewError(connect.CodeNotFound, fmt.Errorf("deployment %s not found", deploymentID)) } diff --git a/apps/shared/pkg/quota/quota.go b/apps/shared/pkg/quota/quota.go index 32945be8..d7ce96c7 100644 --- a/apps/shared/pkg/quota/quota.go +++ b/apps/shared/pkg/quota/quota.go @@ -28,7 +28,7 @@ func (c *Checker) CanAllocate(ctx context.Context, organizationID string, req Re if err != nil { return fmt.Errorf("quota: load: %w", err) } // Get plan limits first (these are the maximum boundary) - planDeployMax, planMem, planCPU := c.getPlanLimits(organizationID) + planDeployMax, planMem, planCPU := c.getPlanLimitsFromQuota(quota) // Get effective limits: use overrides if set, but cap them to plan limits // Plan limits are the final boundary - org overrides cannot exceed them @@ -104,39 +104,39 @@ func (c *Checker) getQuota(orgID string) (*database.OrgQuota, error) { return "a, nil } -// getPlanLimits gets plan limits for an organization -func (c *Checker) getPlanLimits(orgID string) (deploymentsMax int, memoryBytes int64, cpuCores int) { - var quota database.OrgQuota - if err := database.DB.Where("organization_id = ?", orgID).First("a).Error; err != nil { - return 0, 0, 0 // No plan - } - - if quota.PlanID == "" { - return 0, 0, 0 // No plan assigned +// getPlanLimitsFromQuota gets plan limits using an already-loaded OrgQuota +func (c *Checker) getPlanLimitsFromQuota(q *database.OrgQuota) (deploymentsMax int, memoryBytes int64, cpuCores int) { + if q.PlanID == "" { + return 0, 0, 0 } - var plan database.OrganizationPlan - if err := database.DB.First(&plan, "id = ?", quota.PlanID).Error; err != nil { - return 0, 0, 0 // Plan not found + if err := database.DB.First(&plan, "id = ?", q.PlanID).Error; err != nil { + return 0, 0, 0 } - return plan.DeploymentsMax, plan.MemoryBytes, plan.CPUCores } func (c *Checker) currentAllocations(orgID string) (replicas int, memBytes int64, cpuCores int, err error) { - // Count running replicas + // Count running replicas from deployment locations var count int64 - if err = database.DB.Model(&database.DeploymentLocation{}). - Where("deployment_locations.status = ?", "running"). - Joins("JOIN deployments d ON d.id = deployment_locations.deployment_id"). - Where("d.organization_id = ?", orgID). - Count(&count).Error; err != nil { return } - // Sum requested memory and CPU across org deployments (nil treated as 0) + if err = database.DB.Model(&database.DeploymentLocation{}). + Where("deployment_locations.status = ?", "running"). + Joins("JOIN deployments d ON d.id = deployment_locations.deployment_id"). + Where("d.organization_id = ?", orgID). + Count(&count).Error; err != nil { + return + } + // Sum memory and CPU across active deployments, multiplied by their replica count. + // Only count deployments that are running, building, or deploying (not stopped/failed). + // RUNNING=3, BUILDING=2, DEPLOYING=6 type agg struct{ Mem int64; CPU int64 } var a agg if err = database.DB.Model(&database.Deployment{}). - Select("COALESCE(SUM(COALESCE(memory_bytes,0)),0) as mem, COALESCE(SUM(COALESCE(cpu_shares,0)),0) as cpu"). - Where("organization_id = ?", orgID).Scan(&a).Error; err != nil { return } + Select("COALESCE(SUM(COALESCE(memory_bytes,0) * COALESCE(replicas,1)),0) as mem, COALESCE(SUM(COALESCE(cpu_shares,0) * COALESCE(replicas,1)),0) as cpu"). + Where("organization_id = ? AND deleted_at IS NULL AND status IN (2,3,6)", orgID). + Scan(&a).Error; err != nil { + return + } // Convert Docker CPU shares to cores (1024 shares = 1 core) cpuCores = int(a.CPU) / 1024 if a.CPU%1024 != 0 { From 5c3c8117f1f30fb2228cbc6fdf0c38f20120ad69 Mon Sep 17 00:00:00 2001 From: veryCrunchy Date: Sun, 1 Feb 2026 06:02:05 +0100 Subject: [PATCH 3/3] fix(quota): check quota only for additional replicas Adjust quota checks to account for only the delta when scaling a deployment, rather than counting the deployment's current replicas as new allocations. Compute currentReplicas from the DB and calculate deltaReplicas = newReplicas - currentReplicas; perform memory/CPU quota validation only if deltaReplicas > 0. This prevents false quota failures when reducing replicas or keeping the same count. Also change CPU accounting in quota calculations to sum CPUshares across replicas before converting to cores. Multiply CPUshares by replicas, then divide by 1024 and round up partial cores. This makes CPU and memory checks consistent with currentAllocations and the new delta-based replication logic. --- .../internal/service/lifecycle.go | 35 ++++++++++++------- apps/shared/pkg/quota/quota.go | 7 ++-- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/apps/deployments-service/internal/service/lifecycle.go b/apps/deployments-service/internal/service/lifecycle.go index 2a5d5ac1..2ab2534b 100644 --- a/apps/deployments-service/internal/service/lifecycle.go +++ b/apps/deployments-service/internal/service/lifecycle.go @@ -868,20 +868,29 @@ func (s *Service) ScaleDeployment(ctx context.Context, req *connect.Request[depl if err != nil { return nil, connect.NewError(connect.CodeNotFound, fmt.Errorf("deployment %s not found", deploymentID)) } - scaleMemory := int64(512 * 1024 * 1024) // Default 512MB - if dbDep.MemoryBytes != nil { - scaleMemory = *dbDep.MemoryBytes - } - scaleCPU := int64(1024) // Default - if dbDep.CPUShares != nil { - scaleCPU = *dbDep.CPUShares + // Only check quota for the delta (additional replicas beyond current). + // currentAllocations already includes this deployment's existing usage. + currentReplicas := 1 + if dbDep.Replicas != nil { + currentReplicas = int(*dbDep.Replicas) } - if err := s.quotaChecker.CanAllocate(ctx, orgID, quota.RequestedResources{ - Replicas: newReplicas, - MemoryBytes: scaleMemory, - CPUshares: scaleCPU, - }); err != nil { - return nil, connect.NewError(connect.CodeFailedPrecondition, fmt.Errorf("quota check failed: %w", err)) + deltaReplicas := newReplicas - currentReplicas + if deltaReplicas > 0 { + scaleMemory := int64(512 * 1024 * 1024) // Default 512MB + if dbDep.MemoryBytes != nil { + scaleMemory = *dbDep.MemoryBytes + } + scaleCPU := int64(1024) // Default + if dbDep.CPUShares != nil { + scaleCPU = *dbDep.CPUShares + } + if err := s.quotaChecker.CanAllocate(ctx, orgID, quota.RequestedResources{ + Replicas: deltaReplicas, + MemoryBytes: scaleMemory, + CPUshares: scaleCPU, + }); err != nil { + return nil, connect.NewError(connect.CodeFailedPrecondition, fmt.Errorf("quota check failed: %w", err)) + } } if s.manager != nil { _ = s.manager.ScaleDeployment(ctx, deploymentID, newReplicas) diff --git a/apps/shared/pkg/quota/quota.go b/apps/shared/pkg/quota/quota.go index d7ce96c7..864a1774 100644 --- a/apps/shared/pkg/quota/quota.go +++ b/apps/shared/pkg/quota/quota.go @@ -84,9 +84,10 @@ func (c *Checker) CanAllocate(ctx context.Context, organizationID string, req Re if effMem > 0 && curMemBytes+req.MemoryBytes*int64(req.Replicas) > effMem { return fmt.Errorf("quota exceeded: memory %d bytes > max %d bytes", curMemBytes+req.MemoryBytes*int64(req.Replicas), effMem) } - // Convert Docker CPUshares to cores (Docker uses 1024 shares per core) - reqCores := int(req.CPUshares) / 1024 - if req.CPUshares%1024 != 0 { + // Convert Docker CPUshares to cores, multiplied by replicas (matching currentAllocations) + totalCPUshares := req.CPUshares * int64(req.Replicas) + reqCores := int(totalCPUshares) / 1024 + if totalCPUshares%1024 != 0 { reqCores++ // round up partial cores } if effCPU > 0 && curCPUcores+reqCores > effCPU {