diff --git a/internal/api/setup/dependencies.go b/internal/api/setup/dependencies.go index aeef50ad..1c3f2c3d 100644 --- a/internal/api/setup/dependencies.go +++ b/internal/api/setup/dependencies.go @@ -247,7 +247,7 @@ func InitServices(c ServiceConfig) (*Services, *Workers, error) { eventSvc := services.NewEventService(services.EventServiceParams{Repo: c.Repos.Event, RBACSvc: rbacSvc, Publisher: wsHub, Logger: c.Logger}) // 3. Cloud Infrastructure Services (VPC, Subnet, Instance, Volume, SG, LB) - vpcSvc := services.NewVpcService(services.VpcServiceParams{Repo: c.Repos.Vpc, LBRepo: c.Repos.LB, PeeringRepo: c.Repos.VPCPeering, AsRepo: c.Repos.AutoScaling, RBACSvc: rbacSvc, Network: c.Network, AuditSvc: auditSvc, Logger: c.Logger, DefaultCIDR: c.Config.DefaultVPCCIDR}) + vpcSvc := services.NewVpcService(services.VpcServiceParams{Repo: c.Repos.Vpc, LBRepo: c.Repos.LB, PeeringRepo: c.Repos.VPCPeering, AsRepo: c.Repos.AutoScaling, RBACSvc: rbacSvc, Network: c.Network, AuditSvc: auditSvc, Logger: c.Logger, DefaultCIDR: c.Config.DefaultVPCCIDR, ComputeBackend: c.Config.ComputeBackend}) subnetSvc := services.NewSubnetService(services.SubnetServiceParams{Repo: c.Repos.Subnet, RBACSvc: rbacSvc, VpcRepo: c.Repos.Vpc, AuditSvc: auditSvc, Logger: c.Logger}) volumeSvc := services.NewVolumeService(services.VolumeServiceParams{Repo: c.Repos.Volume, RBACSvc: rbacSvc, Storage: c.Storage, EventSvc: eventSvc, AuditSvc: auditSvc, Logger: c.Logger}) diff --git a/internal/core/services/cache.go b/internal/core/services/cache.go index 34d7dba0..2a2380e4 100644 --- a/internal/core/services/cache.go +++ b/internal/core/services/cache.go @@ -174,10 +174,6 @@ func (s *CacheService) resolveNetworkID(ctx context.Context, vpcID *uuid.UUID) ( s.logger.Error("failed to get VPC", "vpc_id", vpcID, "error", err) return "", err } - // For Docker backend with OVS networks, don't pass br-vpc-* networks - if s.compute != nil && s.compute.Type() == "docker" && strings.HasPrefix(vpc.NetworkID, "br-vpc-") { - return "", nil - } return vpc.NetworkID, nil } diff --git a/internal/core/services/database.go b/internal/core/services/database.go index 1951e913..8d520645 100644 --- a/internal/core/services/database.go +++ b/internal/core/services/database.go @@ -1195,9 +1195,6 @@ func (s *DatabaseService) resolveVpcNetwork(ctx context.Context, vpcID *uuid.UUI if err != nil { return "", err } - if s.compute != nil && s.compute.Type() == "docker" && strings.HasPrefix(vpc.NetworkID, "br-vpc-") { - return "", nil - } return vpc.NetworkID, nil } diff --git a/internal/core/services/instance.go b/internal/core/services/instance.go index 298a7b28..6a0e3db9 100644 --- a/internal/core/services/instance.go +++ b/internal/core/services/instance.go @@ -606,8 +606,8 @@ func (s *InstanceService) StopInstance(ctx context.Context, idOrName string) err if err := s.compute.StopInstance(ctx, target); err != nil { platform.InstanceOperationsTotal.WithLabelValues("stop", "failure").Inc() - s.logger.Error("failed to stop docker container", "container_id", target, "error", err) - return errors.Wrap(errors.Internal, "failed to stop container", err) + s.logger.Error("failed to stop instance", "backend", s.compute.Type(), "id", target, "error", err) + return errors.Wrap(errors.Internal, fmt.Sprintf("failed to stop %s instance", s.compute.Type()), err) } platform.InstancesTotal.WithLabelValues("running", s.compute.Type()).Dec() diff --git a/internal/core/services/vpc.go b/internal/core/services/vpc.go index 5b2b701d..f83726b1 100644 --- a/internal/core/services/vpc.go +++ b/internal/core/services/vpc.go @@ -29,6 +29,9 @@ type VpcServiceParams struct { AuditSvc ports.AuditService Logger *slog.Logger DefaultCIDR string + // ComputeBackend is the compute backend type ("docker", "libvirt", "firecracker"). + // When "libvirt", VPC creation skips OVS bridge creation since libvirt manages its own networking. + ComputeBackend string } // VpcService handles the lifecycle of Virtual Private Clouds (VPCs), @@ -44,6 +47,7 @@ type VpcService struct { auditSvc ports.AuditService logger *slog.Logger defaultCIDR string + computeBackend string } // NewVpcService creates a new instance of VpcService. @@ -68,6 +72,7 @@ func NewVpcService(params VpcServiceParams) *VpcService { auditSvc: params.AuditSvc, logger: logger, defaultCIDR: defaultCIDR, + computeBackend: params.ComputeBackend, } } @@ -102,10 +107,19 @@ func (s *VpcService) CreateVPC(ctx context.Context, name, cidrBlock string) (*do // 1. Generate unique VNI (for demo purposes we use a hash based int) vxlanID := int(vpcID[0]) + 100 - // 2. Create OVS bridge - bridgeName := fmt.Sprintf("br-vpc-%s", vpcID.String()[:8]) - if err := s.network.CreateBridge(ctx, bridgeName, vxlanID); err != nil { - return nil, errors.Wrap(errors.Internal, "failed to create OVS bridge", err) + var bridgeName string + var bridgeCreated bool + + // 2. Create OVS bridge (skipped for libvirt compute backend) + if s.computeBackend != "libvirt" { + if s.network == nil { + return nil, errors.New(errors.Internal, "network backend is required for VPC bridge creation") + } + bridgeName = fmt.Sprintf("br-vpc-%s", vpcID.String()[:8]) + if err := s.network.CreateBridge(ctx, bridgeName, vxlanID); err != nil { + return nil, errors.Wrap(errors.Internal, "failed to create OVS bridge", err) + } + bridgeCreated = true } // 3. Construct ARN @@ -118,7 +132,7 @@ func (s *VpcService) CreateVPC(ctx context.Context, name, cidrBlock string) (*do TenantID: tenantID, Name: name, CIDRBlock: cidrBlock, - NetworkID: bridgeName, + NetworkID: bridgeName, // empty string for libvirt VXLANID: vxlanID, Status: "active", ARN: arn, @@ -127,9 +141,11 @@ func (s *VpcService) CreateVPC(ctx context.Context, name, cidrBlock string) (*do if err := s.repo.Create(ctx, vpc); err != nil { // Cleanup OVS bridge if DB fails - s.logger.Error("failed to create VPC in DB, rolling back bridge", "name", name, "error", err) - if rbErr := s.network.DeleteBridge(ctx, bridgeName); rbErr != nil { - s.logger.Error("failed to rollback bridge", "bridge", bridgeName, "error", rbErr) + if bridgeCreated { + s.logger.Error("failed to create VPC in DB, rolling back bridge", "name", name, "error", err) + if rbErr := s.network.DeleteBridge(ctx, bridgeName); rbErr != nil { + s.logger.Error("failed to rollback bridge", "bridge", bridgeName, "error", rbErr) + } } return nil, errors.Wrap(errors.Internal, "failed to create VPC in database", err) } @@ -153,7 +169,9 @@ func (s *VpcService) CreateVPC(ctx context.Context, name, cidrBlock string) (*do // Rollback: delete VPC s.logger.Error("failed to create main route table, rolling back VPC", "error", err) _ = s.repo.Delete(ctx, vpc.ID) - _ = s.network.DeleteBridge(ctx, bridgeName) + if bridgeCreated { + _ = s.network.DeleteBridge(ctx, bridgeName) + } return nil, errors.Wrap(errors.Internal, "failed to create main route table", err) } } @@ -224,12 +242,14 @@ func (s *VpcService) DeleteVPC(ctx context.Context, idOrName string, force bool) } } - // 2. Remove OVS bridge - if err := s.network.DeleteBridge(ctx, vpc.NetworkID); err != nil { - s.logger.Error("failed to remove OVS bridge", "bridge", vpc.NetworkID, "error", err) - return errors.Wrap(errors.Internal, "failed to remove OVS bridge", err) + // 2. Remove OVS bridge (skip for libvirt where NetworkID is empty) + if vpc.NetworkID != "" { + if err := s.network.DeleteBridge(ctx, vpc.NetworkID); err != nil { + s.logger.Error("failed to remove OVS bridge", "bridge", vpc.NetworkID, "error", err) + return errors.Wrap(errors.Internal, "failed to remove OVS bridge", err) + } + s.logger.Info("vpc bridge removed", "bridge", vpc.NetworkID) } - s.logger.Info("vpc bridge removed", "bridge", vpc.NetworkID) // 3. Delete from DB if err := s.repo.Delete(ctx, vpc.ID); err != nil { diff --git a/internal/platform/resilient_compute.go b/internal/platform/resilient_compute.go index c477ed63..60a5059a 100644 --- a/internal/platform/resilient_compute.go +++ b/internal/platform/resilient_compute.go @@ -54,6 +54,12 @@ func (o ResilientComputeOpts) withDefaults() ResilientComputeOpts { // ResilientCompute wraps a ComputeBackend with circuit breaker, bulkhead, // and per-call timeouts. It implements the ports.ComputeBackend interface. +// +// Note: the circuit breaker is per-backend-instance (one breaker guards all +// operations on a single compute backend). If one operation fails repeatedly, +// the breaker trips and blocks all subsequent calls to that backend until the +// reset timeout expires. This is a deliberate design trade-off for simplicity. +// For finer-grained isolation, each operation type could have its own breaker. type ResilientCompute struct { inner ports.ComputeBackend cb *CircuitBreaker