Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion internal/api/setup/dependencies.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ func InitServices(c ServiceConfig) (*Services, *Workers, error) {
eventSvc := services.NewEventService(services.EventServiceParams{Repo: c.Repos.Event, RBACSvc: rbacSvc, Publisher: wsHub, Logger: c.Logger})

// 3. Cloud Infrastructure Services (VPC, Subnet, Instance, Volume, SG, LB)
vpcSvc := services.NewVpcService(services.VpcServiceParams{Repo: c.Repos.Vpc, LBRepo: c.Repos.LB, PeeringRepo: c.Repos.VPCPeering, AsRepo: c.Repos.AutoScaling, RBACSvc: rbacSvc, Network: c.Network, AuditSvc: auditSvc, Logger: c.Logger, DefaultCIDR: c.Config.DefaultVPCCIDR})
vpcSvc := services.NewVpcService(services.VpcServiceParams{Repo: c.Repos.Vpc, LBRepo: c.Repos.LB, PeeringRepo: c.Repos.VPCPeering, AsRepo: c.Repos.AutoScaling, RBACSvc: rbacSvc, Network: c.Network, AuditSvc: auditSvc, Logger: c.Logger, DefaultCIDR: c.Config.DefaultVPCCIDR, ComputeBackend: c.Config.ComputeBackend})
subnetSvc := services.NewSubnetService(services.SubnetServiceParams{Repo: c.Repos.Subnet, RBACSvc: rbacSvc, VpcRepo: c.Repos.Vpc, AuditSvc: auditSvc, Logger: c.Logger})
volumeSvc := services.NewVolumeService(services.VolumeServiceParams{Repo: c.Repos.Volume, RBACSvc: rbacSvc, Storage: c.Storage, EventSvc: eventSvc, AuditSvc: auditSvc, Logger: c.Logger})

Expand Down
4 changes: 0 additions & 4 deletions internal/core/services/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,10 +174,6 @@ func (s *CacheService) resolveNetworkID(ctx context.Context, vpcID *uuid.UUID) (
s.logger.Error("failed to get VPC", "vpc_id", vpcID, "error", err)
return "", err
}
// For Docker backend with OVS networks, don't pass br-vpc-* networks
if s.compute != nil && s.compute.Type() == "docker" && strings.HasPrefix(vpc.NetworkID, "br-vpc-") {
return "", nil
}
return vpc.NetworkID, nil
}

Expand Down
3 changes: 0 additions & 3 deletions internal/core/services/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -1195,9 +1195,6 @@ func (s *DatabaseService) resolveVpcNetwork(ctx context.Context, vpcID *uuid.UUI
if err != nil {
return "", err
}
if s.compute != nil && s.compute.Type() == "docker" && strings.HasPrefix(vpc.NetworkID, "br-vpc-") {
return "", nil
}
return vpc.NetworkID, nil
}

Expand Down
4 changes: 2 additions & 2 deletions internal/core/services/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -606,8 +606,8 @@ func (s *InstanceService) StopInstance(ctx context.Context, idOrName string) err

if err := s.compute.StopInstance(ctx, target); err != nil {
platform.InstanceOperationsTotal.WithLabelValues("stop", "failure").Inc()
s.logger.Error("failed to stop docker container", "container_id", target, "error", err)
return errors.Wrap(errors.Internal, "failed to stop container", err)
s.logger.Error("failed to stop instance", "backend", s.compute.Type(), "id", target, "error", err)
return errors.Wrap(errors.Internal, fmt.Sprintf("failed to stop %s instance", s.compute.Type()), err)
}

platform.InstancesTotal.WithLabelValues("running", s.compute.Type()).Dec()
Expand Down
48 changes: 34 additions & 14 deletions internal/core/services/vpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ type VpcServiceParams struct {
AuditSvc ports.AuditService
Logger *slog.Logger
DefaultCIDR string
// ComputeBackend is the compute backend type ("docker", "libvirt", "firecracker").
// When "libvirt", VPC creation skips OVS bridge creation since libvirt manages its own networking.
ComputeBackend string
}

// VpcService handles the lifecycle of Virtual Private Clouds (VPCs),
Expand All @@ -44,6 +47,7 @@ type VpcService struct {
auditSvc ports.AuditService
logger *slog.Logger
defaultCIDR string
computeBackend string
}

// NewVpcService creates a new instance of VpcService.
Expand All @@ -68,6 +72,7 @@ func NewVpcService(params VpcServiceParams) *VpcService {
auditSvc: params.AuditSvc,
logger: logger,
defaultCIDR: defaultCIDR,
computeBackend: params.ComputeBackend,
}
}

Expand Down Expand Up @@ -102,10 +107,19 @@ func (s *VpcService) CreateVPC(ctx context.Context, name, cidrBlock string) (*do
// 1. Generate unique VNI (for demo purposes we use a hash based int)
vxlanID := int(vpcID[0]) + 100

// 2. Create OVS bridge
bridgeName := fmt.Sprintf("br-vpc-%s", vpcID.String()[:8])
if err := s.network.CreateBridge(ctx, bridgeName, vxlanID); err != nil {
return nil, errors.Wrap(errors.Internal, "failed to create OVS bridge", err)
var bridgeName string
var bridgeCreated bool

// 2. Create OVS bridge (skipped for libvirt compute backend)
if s.computeBackend != "libvirt" {
if s.network == nil {
return nil, errors.New(errors.Internal, "network backend is required for VPC bridge creation")
}
bridgeName = fmt.Sprintf("br-vpc-%s", vpcID.String()[:8])
if err := s.network.CreateBridge(ctx, bridgeName, vxlanID); err != nil {
return nil, errors.Wrap(errors.Internal, "failed to create OVS bridge", err)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}
bridgeCreated = true
}

// 3. Construct ARN
Expand All @@ -118,7 +132,7 @@ func (s *VpcService) CreateVPC(ctx context.Context, name, cidrBlock string) (*do
TenantID: tenantID,
Name: name,
CIDRBlock: cidrBlock,
NetworkID: bridgeName,
NetworkID: bridgeName, // empty string for libvirt
VXLANID: vxlanID,
Status: "active",
ARN: arn,
Expand All @@ -127,9 +141,11 @@ func (s *VpcService) CreateVPC(ctx context.Context, name, cidrBlock string) (*do

if err := s.repo.Create(ctx, vpc); err != nil {
// Cleanup OVS bridge if DB fails
s.logger.Error("failed to create VPC in DB, rolling back bridge", "name", name, "error", err)
if rbErr := s.network.DeleteBridge(ctx, bridgeName); rbErr != nil {
s.logger.Error("failed to rollback bridge", "bridge", bridgeName, "error", rbErr)
if bridgeCreated {
s.logger.Error("failed to create VPC in DB, rolling back bridge", "name", name, "error", err)
if rbErr := s.network.DeleteBridge(ctx, bridgeName); rbErr != nil {
s.logger.Error("failed to rollback bridge", "bridge", bridgeName, "error", rbErr)
}
}
return nil, errors.Wrap(errors.Internal, "failed to create VPC in database", err)
}
Expand All @@ -153,7 +169,9 @@ func (s *VpcService) CreateVPC(ctx context.Context, name, cidrBlock string) (*do
// Rollback: delete VPC
s.logger.Error("failed to create main route table, rolling back VPC", "error", err)
_ = s.repo.Delete(ctx, vpc.ID)
_ = s.network.DeleteBridge(ctx, bridgeName)
if bridgeCreated {
_ = s.network.DeleteBridge(ctx, bridgeName)
}
return nil, errors.Wrap(errors.Internal, "failed to create main route table", err)
}
}
Expand Down Expand Up @@ -224,12 +242,14 @@ func (s *VpcService) DeleteVPC(ctx context.Context, idOrName string, force bool)
}
}

// 2. Remove OVS bridge
if err := s.network.DeleteBridge(ctx, vpc.NetworkID); err != nil {
s.logger.Error("failed to remove OVS bridge", "bridge", vpc.NetworkID, "error", err)
return errors.Wrap(errors.Internal, "failed to remove OVS bridge", err)
// 2. Remove OVS bridge (skip for libvirt where NetworkID is empty)
if vpc.NetworkID != "" {
if err := s.network.DeleteBridge(ctx, vpc.NetworkID); err != nil {
s.logger.Error("failed to remove OVS bridge", "bridge", vpc.NetworkID, "error", err)
return errors.Wrap(errors.Internal, "failed to remove OVS bridge", err)
}
s.logger.Info("vpc bridge removed", "bridge", vpc.NetworkID)
}
s.logger.Info("vpc bridge removed", "bridge", vpc.NetworkID)

// 3. Delete from DB
if err := s.repo.Delete(ctx, vpc.ID); err != nil {
Expand Down
6 changes: 6 additions & 0 deletions internal/platform/resilient_compute.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ func (o ResilientComputeOpts) withDefaults() ResilientComputeOpts {

// ResilientCompute wraps a ComputeBackend with circuit breaker, bulkhead,
// and per-call timeouts. It implements the ports.ComputeBackend interface.
//
// Note: the circuit breaker is per-backend-instance (one breaker guards all
// operations on a single compute backend). If one operation fails repeatedly,
// the breaker trips and blocks all subsequent calls to that backend until the
// reset timeout expires. This is a deliberate design trade-off for simplicity.
// For finer-grained isolation, each operation type could have its own breaker.
type ResilientCompute struct {
inner ports.ComputeBackend
cb *CircuitBreaker
Expand Down
Loading