diff --git a/apps/deployments-service/internal/service/lifecycle.go b/apps/deployments-service/internal/service/lifecycle.go
index 5196a95a..2ab2534b 100644
--- a/apps/deployments-service/internal/service/lifecycle.go
+++ b/apps/deployments-service/internal/service/lifecycle.go
@@ -652,6 +652,27 @@ func (s *Service) StartDeployment(ctx context.Context, req *connect.Request[depl
 		return nil, connect.NewError(connect.CodeNotFound, fmt.Errorf("deployment %s not found", deploymentID))
 	}
 
+	// Quota check: verify memory/CPU won't exceed limits when starting
+	startMemory := int64(512 * 1024 * 1024) // Default 512MB
+	if dbDep.MemoryBytes != nil {
+		startMemory = *dbDep.MemoryBytes
+	}
+	startCPU := int64(1024) // Default
+	if dbDep.CPUShares != nil {
+		startCPU = *dbDep.CPUShares
+	}
+	startReplicas := 1
+	if dbDep.Replicas != nil {
+		startReplicas = int(*dbDep.Replicas)
+	}
+	if err := s.quotaChecker.CanAllocate(ctx, orgID, quota.RequestedResources{
+		Replicas:    startReplicas,
+		MemoryBytes: startMemory,
+		CPUshares:   startCPU,
+	}); err != nil {
+		return nil, connect.NewError(connect.CodeFailedPrecondition, fmt.Errorf("quota check failed: %w", err))
+	}
+
 	// Check if this is a compose-based deployment
 	if dbDep.ComposeYaml != "" {
 		// Deploy using Docker Compose
@@ -838,18 +859,43 @@ func (s *Service) ScaleDeployment(ctx context.Context, req *connect.Request[depl
 	if err := s.permissionChecker.CheckScopedPermission(ctx, orgID, auth.ScopedPermission{Permission: auth.PermissionDeploymentScale, ResourceType: "deployment", ResourceID: deploymentID}); err != nil {
 		return nil, connect.NewError(connect.CodePermissionDenied, err)
 	}
-	// Quota check: replicas delta
+	// Quota check: replicas + memory/CPU
 	newReplicas := int(req.Msg.GetReplicas())
 	if newReplicas <= 0 {
 		return nil, connect.NewError(connect.CodeInvalidArgument, fmt.Errorf("replicas must be > 0"))
 	}
-	if err := s.quotaChecker.CanAllocate(ctx, orgID, quota.RequestedResources{Replicas: newReplicas}); err != nil {
-		return nil, connect.NewError(connect.CodeFailedPrecondition, err)
+	dbDep, err := s.repo.GetByID(ctx, deploymentID)
+	if err != nil {
+		return nil, connect.NewError(connect.CodeNotFound, fmt.Errorf("deployment %s not found", deploymentID))
+	}
+	// Only check quota for the delta (additional replicas beyond current).
+	// currentAllocations already includes this deployment's existing usage.
+	currentReplicas := 1
+	if dbDep.Replicas != nil {
+		currentReplicas = int(*dbDep.Replicas)
+	}
+	deltaReplicas := newReplicas - currentReplicas
+	if deltaReplicas > 0 {
+		scaleMemory := int64(512 * 1024 * 1024) // Default 512MB
+		if dbDep.MemoryBytes != nil {
+			scaleMemory = *dbDep.MemoryBytes
+		}
+		scaleCPU := int64(1024) // Default
+		if dbDep.CPUShares != nil {
+			scaleCPU = *dbDep.CPUShares
+		}
+		if err := s.quotaChecker.CanAllocate(ctx, orgID, quota.RequestedResources{
+			Replicas:    deltaReplicas,
+			MemoryBytes: scaleMemory,
+			CPUshares:   scaleCPU,
+		}); err != nil {
+			return nil, connect.NewError(connect.CodeFailedPrecondition, fmt.Errorf("quota check failed: %w", err))
+		}
 	}
 	if s.manager != nil {
 		_ = s.manager.ScaleDeployment(ctx, deploymentID, newReplicas)
 	}
-	dbDep, err := s.repo.GetByID(ctx, deploymentID)
+	dbDep, err = s.repo.GetByID(ctx, deploymentID)
 	if err != nil {
 		return nil, connect.NewError(connect.CodeNotFound, fmt.Errorf("deployment %s not found", deploymentID))
 	}
diff --git a/apps/shared/pkg/quota/quota.go b/apps/shared/pkg/quota/quota.go
index d0698ca5..864a1774 100644
--- a/apps/shared/pkg/quota/quota.go
+++ b/apps/shared/pkg/quota/quota.go
@@ -28,7 +28,7 @@ func (c *Checker) CanAllocate(ctx context.Context, organizationID string, req Re
 	if err != nil { return fmt.Errorf("quota: load: %w", err) }
 
 	// Get plan limits first (these are the maximum boundary)
-	planDeployMax, planMem, planCPU := c.getPlanLimits(organizationID)
+	planDeployMax, planMem, planCPU := c.getPlanLimitsFromQuota(quota)
 	
 	// Get effective limits: use overrides if set, but cap them to plan limits
 	// Plan limits are the final boundary - org overrides cannot exceed them
@@ -84,9 +84,10 @@ func (c *Checker) CanAllocate(ctx context.Context, organizationID string, req Re
 	if effMem > 0 && curMemBytes+req.MemoryBytes*int64(req.Replicas) > effMem {
 		return fmt.Errorf("quota exceeded: memory %d bytes > max %d bytes", curMemBytes+req.MemoryBytes*int64(req.Replicas), effMem)
 	}
-	// Convert Docker CPUshares to cores (Docker uses 1024 shares per core)
-	reqCores := int(req.CPUshares) / 1024
-	if req.CPUshares%1024 != 0 {
+	// Convert Docker CPUshares to cores, multiplied by replicas (matching currentAllocations)
+	totalCPUshares := req.CPUshares * int64(req.Replicas)
+	reqCores := int(totalCPUshares) / 1024
+	if totalCPUshares%1024 != 0 {
 		reqCores++ // round up partial cores
 	}
 	if effCPU > 0 && curCPUcores+reqCores > effCPU {
@@ -104,40 +105,45 @@ func (c *Checker) getQuota(orgID string) (*database.OrgQuota, error) {
 	return &quota, nil
 }
 
-// getPlanLimits gets plan limits for an organization
-func (c *Checker) getPlanLimits(orgID string) (deploymentsMax int, memoryBytes int64, cpuCores int) {
-	var quota database.OrgQuota
-	if err := database.DB.Where("organization_id = ?", orgID).First(&quota).Error; err != nil {
-		return 0, 0, 0 // No plan
+// getPlanLimitsFromQuota gets plan limits using an already-loaded OrgQuota
+func (c *Checker) getPlanLimitsFromQuota(q *database.OrgQuota) (deploymentsMax int, memoryBytes int64, cpuCores int) {
+	if q.PlanID == "" {
+		return 0, 0, 0
 	}
-	
-	if quota.PlanID == "" {
-		return 0, 0, 0 // No plan assigned
-	}
-	
 	var plan database.OrganizationPlan
-	if err := database.DB.First(&plan, "id = ?", quota.PlanID).Error; err != nil {
-		return 0, 0, 0 // Plan not found
+	if err := database.DB.First(&plan, "id = ?", q.PlanID).Error; err != nil {
+		return 0, 0, 0
 	}
-	
 	return plan.DeploymentsMax, plan.MemoryBytes, plan.CPUCores
 }
 
 func (c *Checker) currentAllocations(orgID string) (replicas int, memBytes int64, cpuCores int, err error) {
-	// Count running replicas
+	// Count running replicas from deployment locations
 	var count int64
-    if err = database.DB.Model(&database.DeploymentLocation{}).
-        Where("deployment_locations.status = ?", "running").
-        Joins("JOIN deployments d ON d.id = deployment_locations.deployment_id").
-        Where("d.organization_id = ?", orgID).
-        Count(&count).Error; err != nil { return }
-	// Sum requested memory and CPU across org deployments (nil treated as 0)
+	if err = database.DB.Model(&database.DeploymentLocation{}).
+		Where("deployment_locations.status = ?", "running").
+		Joins("JOIN deployments d ON d.id = deployment_locations.deployment_id").
+		Where("d.organization_id = ?", orgID).
+		Count(&count).Error; err != nil {
+		return
+	}
+	// Sum memory and CPU across active deployments, multiplied by their replica count.
+	// Only count deployments that are running, building, or deploying (not stopped/failed).
+	// RUNNING=3, BUILDING=2, DEPLOYING=6
 	type agg struct{ Mem int64; CPU int64 }
 	var a agg
 	if err = database.DB.Model(&database.Deployment{}).
-		Select("COALESCE(SUM(COALESCE(memory_bytes,0)),0) as mem, COALESCE(SUM(COALESCE(cpu_shares,0)),0) as cpu").
-		Where("organization_id = ?", orgID).Scan(&a).Error; err != nil { return }
-	return int(count), a.Mem, int(a.CPU), nil
+		Select("COALESCE(SUM(COALESCE(memory_bytes,0) * COALESCE(replicas,1)),0) as mem, COALESCE(SUM(COALESCE(cpu_shares,0) * COALESCE(replicas,1)),0) as cpu").
+		Where("organization_id = ? AND deleted_at IS NULL AND status IN (2,3,6)", orgID).
+		Scan(&a).Error; err != nil {
+		return
+	}
+	// Convert Docker CPU shares to cores (1024 shares = 1 core)
+	cpuCores = int(a.CPU) / 1024
+	if a.CPU%1024 != 0 {
+		cpuCores++ // round up partial cores
+	}
+	return int(count), a.Mem, cpuCores, nil
 }
 
 // GetEffectiveLimits returns the effective memory and CPU limits for an organization