From f8041dbfc3f34c980412f84593ca7bcbe37fc03c Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Fri, 12 Dec 2025 15:18:57 +0300 Subject: [PATCH 01/48] Initial commit --- ARCHITECTURE_ANALYSIS.md | 1131 ++++++++++++++++++++++ LICENSE | 201 ++++ README.md | 103 +- TODO.md | 57 ++ go.mod | 49 + go.sum | 154 +++ internal/config/types.go | 244 +++++ internal/infrastructure/ssh/client.go | 268 +++++ internal/infrastructure/ssh/interface.go | 45 + pkg/testkit/cluster/cluster.go | 211 ++++ tests/cluster_creation_test.go | 75 ++ tests/cluster_creation_test.yml | 69 ++ tests/integration_suite_test.go | 29 + 13 files changed, 2634 insertions(+), 2 deletions(-) create mode 100644 ARCHITECTURE_ANALYSIS.md create mode 100644 LICENSE create mode 100644 TODO.md create mode 100644 go.mod create mode 100644 go.sum create mode 100644 internal/config/types.go create mode 100644 internal/infrastructure/ssh/client.go create mode 100644 internal/infrastructure/ssh/interface.go create mode 100644 pkg/testkit/cluster/cluster.go create mode 100644 tests/cluster_creation_test.go create mode 100644 tests/cluster_creation_test.yml create mode 100644 tests/integration_suite_test.go diff --git a/ARCHITECTURE_ANALYSIS.md b/ARCHITECTURE_ANALYSIS.md new file mode 100644 index 0000000..f0c5451 --- /dev/null +++ b/ARCHITECTURE_ANALYSIS.md @@ -0,0 +1,1131 @@ +# Architecture Analysis and Refactoring Plan + +## Executive Summary + +This document provides a deep analysis of the current `testkit_v2` codebase structure and proposes a clear, modular architecture to replace the current "pasta code" implementation. The analysis covers: + +1. Current structure and dependencies +2. Architectural problems identified +3. Proposed target architecture +4. Detailed refactoring plan with step-by-step migration strategy + +--- + +## 1. Current Structure Analysis + +### 1.1 Package Structure + +**Critical Finding**: All code is currently in a single package `integration`: +- `testkit_v2/tests/*` - All test files +- `testkit_v2/util/*` - All utility files + +This monolith package design causes: +- No encapsulation boundaries +- Global state scattered across files +- Hidden circular dependencies +- Difficulty in testing components in isolation +- Hard to understand code flow and dependencies + +### 1.2 File Organization + +#### Test Files (`testkit_v2/tests/`) +``` +tests/ +├── 00_healthcheck_test.go # Basic cluster health checks +├── 01_sds_nc_test.go # LVG (LVM Volume Group) operations +├── 03_sds_lv_test.go # PVC (Persistent Volume Claim) operations +├── 05_sds_node_configurator_test.go # Comprehensive LVM tests (thick/thin) +├── 99_finalizer_test.go # Cleanup tests +├── tools.go # Shared test utilities +└── data-exporter/ + └── base_test.go # Base test for data exporter feature +``` + +#### Utility Files (`testkit_v2/util/`) +``` +util/ +├── env.go # Environment config, flags, cluster types +├── filter.go # Filter/Where interfaces +├── kube_cluster_definitions.go # Cluster definition types (NEW) +├── kube_cluster.go # Cluster singleton/cache +├── kube_deckhouse_modules.go # Deckhouse module management +├── kube_deploy.go # Deployment/Service operations +├── kube_modules.go # Custom CRDs (SSHCredentials, StaticInstance) +├── kube_node.go # Node operations, execution +├── kube_secret.go # SSH credentials CRUD +├── kube_storage.go # Storage (BD, LVG, PVC, SC) +├── kube_tester.go # Test execution helpers +├── kube_vm_cluster.go # VM cluster creation, Deckhouse install +├── kube_vm.go # VM, VD, VMBD operations +├── kube.go # Core Kubernetes client setup +├── log.go # Logging utilities +├── ssh.go # SSH client operations +└── tools.go # Utility functions (retry, random) +``` + +### 1.3 Dependency Graph + +``` +Tests (integration package) + └─> util package (imported as "github.com/deckhouse/sds-e2e/util") + └─> Actually same package! Only different directory structure + +Current Dependencies: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +kube_cluster.go (singleton/cache) + ├─> env.go (envInit, global vars) + ├─> kube.go (InitKCluster) + └─> ssh.go (GetSshClient, tunnel creation) + +kube.go (core client setup) + ├─> kube_modules.go (D8SchemeBuilder) + └─> Multiple Kubernetes API imports + +kube_storage.go (storage operations) + ├─> kube.go (KCluster type) + ├─> filter.go (filters) + └─> tools.go (RetrySec) + +kube_node.go (node operations) + ├─> kube.go (KCluster type) + ├─> kube_modules.go (StaticInstance CRD) + ├─> filter.go (filters) + └─> ssh.go (ExecNodeSsh) + +kube_vm_cluster.go (cluster creation) + ├─> env.go (global vars) + ├─> kube.go (InitKCluster) + ├─> kube_vm.go (VM operations) + ├─> kube_node.go (AddStaticNodes) + ├─> ssh.go (SSH operations) + └─> tools.go (retry utilities) + +kube_vm.go (VM operations) + ├─> kube.go (KCluster type) + ├─> filter.go (filters) + └─> tools.go (hashMd5) + +All files → env.go (global state!) +All files → log.go (logging) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +``` + +### 1.4 Major Architectural Problems + +#### Problem 1: Global State Everywhere +- `env.go` contains ~50 global variables +- Package-level variables in multiple files (`clrCache`, `mx`, etc.) +- No dependency injection +- Hard to test in isolation +- Race conditions possible + +#### Problem 2: God Object (`KCluster`) +- `KCluster` struct has 60+ methods +- Violates Single Responsibility Principle +- Methods span multiple domains: + - Kubernetes API operations + - Node management + - Storage operations + - VM operations + - Module management + - Deployment management + +#### Problem 3: Mixed Concerns +- Business logic mixed with infrastructure +- Test utilities mixed with production code +- Configuration mixed with execution +- No clear separation of layers + +#### Problem 4: Poor Encapsulation +- Everything in one package = no private boundaries +- Internal implementation details exposed +- Can't hide complexity behind interfaces + +#### Problem 5: Circular Dependencies (Hidden) +- Files import each other indirectly +- Hidden cycles through globals +- `env.go` → everything, everything → `env.go` + +#### Problem 6: Testing Difficulties +- Can't mock dependencies (globals) +- Hard to create isolated test scenarios +- Test files use same package = can access internals incorrectly + +--- + +## 2. Target Architecture + +### 2.1 Package Structure + +``` +testkit_v2/ +├── cmd/ +│ └── runner/ # Test runner CLI (optional, for future) +│ +├── internal/ # Internal packages (not importable) +│ ├── config/ # Configuration management +│ │ ├── env.go # Environment variables +│ │ ├── flags.go # CLI flags +│ │ ├── cluster_types.go # Cluster type definitions +│ │ └── images.go # OS image definitions +│ │ +│ ├── cluster/ # Cluster management +│ │ ├── manager.go # Cluster manager (singleton replacement) +│ │ ├── client.go # Kubernetes client factory +│ │ └── types.go # Cluster types +│ │ +│ ├── kubernetes/ # Kubernetes API operations +│ │ ├── core/ # Core K8s resources +│ │ │ ├── namespace.go +│ │ │ ├── pod.go +│ │ │ ├── node.go +│ │ │ └── service.go +│ │ ├── apps/ # Apps resources +│ │ │ ├── deployment.go +│ │ │ └── daemonset.go +│ │ ├── storage/ # Storage resources +│ │ │ ├── pvc.go +│ │ │ ├── storageclass.go +│ │ │ ├── blockdevice.go +│ │ │ └── lvmvolumegroup.go +│ │ ├── virtualization/ # VM resources +│ │ │ ├── vm.go +│ │ │ ├── vdisk.go +│ │ │ └── vmbd.go +│ │ └── deckhouse/ # Deckhouse resources +│ │ ├── modules.go +│ │ ├── nodegroups.go +│ │ └── staticinstance.go +│ │ +│ ├── infrastructure/ # Infrastructure operations +│ │ ├── ssh/ # SSH operations +│ │ │ ├── client.go +│ │ │ ├── keys.go +│ │ │ └── tunnel.go +│ │ └── vm/ # VM provisioning +│ │ ├── provider.go # Interface +│ │ └── deckhouse/ # Deckhouse VM provider +│ │ └── provider.go +│ │ +│ ├── test/ # Test framework utilities +│ │ ├── framework.go # Test framework +│ │ ├── filters.go # Filter implementations +│ │ ├── runner.go # Test runner +│ │ └── node_test_context.go # Node test context +│ │ +│ └── utils/ # Pure utility functions +│ ├── retry.go +│ ├── random.go +│ └── crypto.go +│ +├── pkg/ # Public API (importable) +│ ├── cluster/ # Public cluster interface +│ │ ├── interface.go # Cluster interface +│ │ └── config.go # Cluster config types +│ │ +│ └── testkit/ # Testkit public API +│ ├── test.go # Test helpers +│ └── fixtures.go # Test fixtures +│ +├── tests/ # Test files +│ ├── healthcheck/ +│ │ └── healthcheck_test.go +│ ├── storage/ +│ │ ├── lvg_test.go +│ │ ├── pvc_test.go +│ │ └── lvm_test.go +│ ├── node_configurator/ +│ │ └── node_configurator_test.go +│ ├── data_exporter/ +│ │ └── data_exporter_test.go +│ └── cleanup/ +│ └── finalizer_test.go +│ +├── go.mod +└── README.md +``` + +### 2.2 Layer Architecture + +``` +┌─────────────────────────────────────────────────────────┐ +│ Test Layer │ +│ (tests/*.go - High-level test scenarios) │ +└──────────────────┬──────────────────────────────────────┘ + │ +┌──────────────────▼──────────────────────────────────────┐ +│ Testkit API Layer │ +│ (pkg/testkit/* - Public test helpers and fixtures) │ +└──────────────────┬──────────────────────────────────────┘ + │ +┌──────────────────▼──────────────────────────────────────┐ +│ Domain Logic Layer │ +│ (internal/cluster, internal/kubernetes/*) │ +│ - Cluster management │ +│ - Resource operations │ +│ - Business logic │ +└──────────────────┬──────────────────────────────────────┘ + │ +┌──────────────────▼──────────────────────────────────────┐ +│ Infrastructure Layer │ +│ (internal/infrastructure/*) │ +│ - SSH connections │ +│ - VM provisioning │ +│ - Network tunneling │ +└──────────────────┬──────────────────────────────────────┘ + │ +┌──────────────────▼──────────────────────────────────────┐ +│ Kubernetes API Layer │ +│ (k8s.io/client-go, controller-runtime) │ +└──────────────────────────────────────────────────────────┘ +``` + +### 2.3 Core Interfaces + +#### Cluster Interface +```go +// pkg/cluster/interface.go +type Cluster interface { + // Core operations + Name() string + Context() context.Context + + // Resource operations + Namespaces() NamespaceClient + Nodes() NodeClient + Pods() PodClient + Storage() StorageClient + Virtualization() VirtualizationClient // TODO asergunov: Is the VirtualizationClient this one? https://github.com/deckhouse/virtualization/blob/main/api/client/kubeclient/client.go#L53 + Deckhouse() DeckhouseClient + + // Lifecycle + EnsureReady(ctx context.Context) error + Close() error +} +``` + +#### Resource Clients +```go +// Internal interfaces for resource operations +type StorageClient interface { + BlockDevices() BlockDeviceClient + LVMVolumeGroups() LVMVolumeGroupClient + PersistentVolumeClaims() PersistentVolumeClaimClient + StorageClasses() StorageClassClient +} + +type NodeClient interface { + List(ctx context.Context, filters ...NodeFilter) ([]Node, error) + Get(ctx context.Context, name string) (*Node, error) + Execute(ctx context.Context, name string, cmd []string) (stdout, stderr string, err error) + // ... +} + +type VirtualizationClient interface { + VMs() VMClient + VirtualDisks() VirtualDiskClient + VirtualMachineBlockDevices() VMBDClient +} +``` + +### 2.4 Dependency Injection + +**Cluster Manager Pattern**: +```go +// internal/cluster/manager.go +type Manager struct { + config *config.Config + clusters map[string]Cluster + mu sync.RWMutex + logger logger.Logger + sshFactory ssh.Factory +} + +func NewManager(cfg *config.Config, opts ...Option) *Manager { + // Constructor with options for dependency injection +} + +func (m *Manager) GetOrCreate(ctx context.Context, configPath, name string) (Cluster, error) { + // Lazy initialization with proper error handling +} +``` + +### 2.5 Configuration Management + +**Structured Configuration**: +```go +// internal/config/config.go +type Config struct { + // Environment + TestNS string + TestNSCleanUp string + KeepState bool + + // Cluster configuration + NestedCluster NestedClusterConfig + Hypervisor HypervisorConfig + + // Feature flags + SkipOptional bool + Parallel bool + TreeMode bool + + // Logging + Verbose bool + Debug bool + LogFile string +} + +type NestedClusterConfig struct { + KubeConfig string + Host string + SSHUser string + SSHKey string + K8sPort string + StorageClass string +} +``` + +--- + +## 3. Refactoring Plan + +### 3.1 Phase 1: Foundation (Low Risk) + +**Goal**: Extract configuration and utilities without breaking existing code. + +#### Step 1.1: Extract Configuration +- [ ] Create `internal/config/` package +- [ ] Move `env.go` → `internal/config/env.go` +- [ ] Move cluster types → `internal/config/cluster_types.go` +- [ ] Move image definitions → `internal/config/images.go` +- [ ] Create `Config` struct to hold all configuration +- [ ] Create constructor `config.Load()` to initialize from flags/env +- [ ] Keep global variables temporarily with deprecation comments + +**Migration Strategy**: +```go +// Keep existing globals for backward compatibility +var TestNS = config.Current().TestNS + +// But internally use structured config +func EnsureCluster(...) { + cfg := config.Current() + // Use cfg instead of globals +} +``` + +#### Step 1.2: Extract Pure Utilities +- [ ] Create `internal/utils/` package +- [ ] Move `tools.go` utilities → `internal/utils/` +- [ ] Move `log.go` → `internal/logger/` with interface +- [ ] Create logger interface for testability +- [ ] Update all files to use logger interface + +**Files Affected**: +- `util/tools.go` → `internal/utils/retry.go`, `random.go`, `crypto.go` +- `util/log.go` → `internal/logger/logger.go` + +#### Step 1.3: Extract Filters +- [ ] Create `internal/test/filters.go` +- [ ] Move `filter.go` → `internal/test/filters.go` +- [ ] Make filters type-safe and well-documented +- [ ] Keep old imports working temporarily + +**Estimated Time**: 1-2 days +**Risk Level**: Low (internal changes, maintain compatibility) + +--- + +### 3.2 Phase 2: Extract Kubernetes Clients (Medium Risk) + +**Goal**: Separate Kubernetes API operations from business logic. + +#### Step 2.1: Create Kubernetes Client Packages +- [ ] Create `internal/kubernetes/` structure +- [ ] Extract core operations: + - `kube.go` → `internal/kubernetes/client.go` (client factory) + - `kube.go` (namespace) → `internal/kubernetes/core/namespace.go` + - `kube_node.go` → `internal/kubernetes/core/node.go` + - `kube_deploy.go` → `internal/kubernetes/apps/deployment.go` +- [ ] Extract storage operations: + - `kube_storage.go` → `internal/kubernetes/storage/*.go` + - Split into separate files per resource type +- [ ] Extract virtualization operations: + - `kube_vm.go` → `internal/kubernetes/virtualization/*.go` + +#### Step 2.2: Create Client Interfaces +- [ ] Define interfaces for each resource client +- [ ] Implement interfaces with existing code +- [ ] Update `KCluster` to use clients via composition + +**Before**: +```go +type KCluster struct { + controllerRuntimeClient ctrlrtclient.Client + goClient *kubernetes.Clientset + // ... 60+ methods directly on KCluster +} +``` + +**After**: +```go +type KCluster struct { + client kubernetes.Client + storage *storage.Client + nodes *node.Client + // ... composition instead of methods +} + +type Client struct { + controller ctrlrtclient.Client + goClient *kubernetes.Clientset + // Resource clients + namespaces NamespaceClient + nodes NodeClient + pods PodClient + storage StorageClient + // ... +} +``` + +#### Step 2.3: Update Tests Gradually +- [ ] Create wrapper functions in old package that delegate to new structure +- [ ] Update tests one by one to use new interfaces +- [ ] Remove old methods once all tests migrated + +**Migration Helper**: +```go +// In old package (temporary compatibility layer) +func (cluster *KCluster) CreateLVG(...) error { // TODO: asergunov: Maybe D8Cluster? Or Cluster interface and d8.Cluster as its implementation + return cluster.storage.LVMVolumeGroups().Create(...) +} +``` + +**Estimated Time**: 3-5 days +**Risk Level**: Medium (interface changes, needs careful testing) + +--- + +### 3.3 Phase 3: Extract Infrastructure (Medium Risk) + +**Goal**: Separate infrastructure concerns (SSH, VM provisioning). + +#### Step 3.1: Extract SSH Operations +- [ ] Create `internal/infrastructure/ssh/` package +- [ ] Move `ssh.go` → `internal/infrastructure/ssh/` +- [ ] Create SSH client factory interface +- [ ] Make SSH client mockable for tests +- [ ] Update all SSH usages to use factory + +#### Step 3.2: Extract VM Cluster Operations +- [ ] Create `internal/infrastructure/vm/` package +- [ ] Extract Deckhouse VM provider +- [ ] Move `kube_vm_cluster.go` logic → `internal/infrastructure/vm/deckhouse/` +- [ ] Create VM provider interface for extensibility +- [ ] Separate VM lifecycle from cluster operations + +**Structure**: +```go +// internal/infrastructure/vm/provider.go +type Provider interface { + CreateVM(ctx context.Context, spec VMSpec) (*VM, error) + DeleteVM(ctx context.Context, name string) error + WaitForVMReady(ctx context.Context, name string) error +} + +// internal/infrastructure/vm/deckhouse/provider.go +type DeckhouseProvider struct { + cluster Cluster + // ... +} +``` + +#### Step 3.3: Extract Cluster Creation Logic +- [ ] Move cluster creation from `kube_vm_cluster.go` +- [ ] Create `internal/cluster/builder.go` for cluster creation +- [ ] Separate concerns: VM creation, Deckhouse installation, node registration + +**Estimated Time**: 3-4 days +**Risk Level**: Medium (infrastructure changes affect tests) + +--- + +### 3.4 Phase 4: Refactor Cluster Management (High Risk) + +**Goal**: Replace singleton pattern with proper dependency injection. + +#### Step 4.1: Create Cluster Manager +- [ ] Create `internal/cluster/manager.go` +- [ ] Replace `EnsureCluster` singleton with Manager +- [ ] Implement proper lifecycle management +- [ ] Add context support for cancellation + +#### Step 4.2: Refactor KCluster to Cluster Interface +- [ ] Create `pkg/cluster/interface.go` with public Cluster interface +- [ ] Implement interface in `internal/cluster/cluster.go` +- [ ] Break up `KCluster` into smaller, focused structs +- [ ] Use composition instead of 60+ methods + +#### Step 4.3: Update All Tests +- [ ] Update test files to use new Cluster interface +- [ ] Remove dependency on singleton +- [ ] Enable dependency injection in tests + +**Before**: +```go +func TestSomething(t *testing.T) { + cluster := util.EnsureCluster("", "") // Singleton + // ... +} +``` + +**After**: +```go +func TestSomething(t *testing.T) { + ctx := context.Background() + cfg := config.Load() + manager := cluster.NewManager(cfg) + cl, err := manager.GetOrCreate(ctx, "", "") + // ... +} +``` + +**Or with test helper**: +```go +func TestSomething(t *testing.T) { + cluster := testkit.GetCluster(t) // Helper that manages lifecycle + // ... +} +``` + +**Estimated Time**: 5-7 days +**Risk Level**: High (touches all test files) + +--- + +### 3.5 Phase 5: Organize Tests (Low Risk) + +**Goal**: Organize test files into logical packages. + +#### Step 5.1: Reorganize Test Files +- [ ] Move tests into domain-specific packages: + - `tests/healthcheck/` + - `tests/storage/` + - `tests/node_configurator/` + - `tests/data_exporter/` + - `tests/cleanup/` +- [ ] Create shared test utilities in `pkg/testkit/` +- [ ] Update package names appropriately + +#### Step 5.2: Create Test Framework +- [ ] Create `internal/test/framework.go` for test helpers +- [ ] Extract common test patterns +- [ ] Create fixtures for common scenarios + +**Estimated Time**: 2-3 days +**Risk Level**: Low (mostly moving files) + +--- + +### 3.6 Phase 6: Cleanup and Documentation (Low Risk) + +**Goal**: Remove old code, add documentation, improve developer experience. + +#### Step 6.1: Remove Deprecated Code +- [ ] Remove compatibility wrappers +- [ ] Remove old package structure +- [ ] Clean up unused imports +- [ ] Remove global variables + +#### Step 6.2: Add Documentation +- [ ] Write package-level documentation +- [ ] Document public APIs +- [ ] Create architecture diagrams +- [ ] Add examples for common use cases + +#### Step 6.3: Improve Developer Experience +- [ ] Add clear error messages +- [ ] Improve logging +- [ ] Add validation +- [ ] Create helper functions for common operations + +**Estimated Time**: 2-3 days +**Risk Level**: Low (cleanup phase) + +--- + +## 4. Migration Strategy Details + +### 4.1 Compatibility Layer Approach + +During migration, maintain a compatibility layer that delegates to new implementation: + +```go +// Old location: util/kube_storage.go (temporary) +package integration + +import ( + newStorage "github.com/deckhouse/sds-e2e/internal/kubernetes/storage" +) + +func (cluster *KCluster) CreateLVG(name, nodeName string, bds []string) error { + // Delegate to new implementation + return cluster.storageClient.LVMVolumeGroups().Create( + cluster.ctx, + newStorage.LVGCreateRequest{ + Name: name, + NodeName: nodeName, + BlockDevices: bds, + }, + ) +} +``` + +This allows: +- Gradual migration of tests +- Running old and new code side-by-side +- Easy rollback if issues arise +- Zero-downtime refactoring + +### 4.2 Testing Strategy + +1. **Unit Tests First**: Test new packages in isolation +2. **Integration Tests**: Ensure new code works with existing tests +3. **Parallel Running**: Run old and new implementations in parallel +4. **Gradual Cutover**: Move tests one by one to new implementation + +### 4.3 Rollback Plan + +At each phase: +- Keep old code in place until new code is proven +- Use feature flags if needed +- Maintain compatibility layer +- Document rollback procedure + +--- + +## 5. Detailed Module Structure + +### 5.1 Configuration Module (`internal/config/`) + +``` +config/ +├── config.go # Main Config struct and Load() +├── env.go # Environment variable parsing +├── flags.go # CLI flag definitions +├── cluster_types.go # Cluster type definitions and validation +├── images.go # OS image URL definitions +└── defaults.go # Default values +``` + +**Responsibilities**: +- Configuration loading from flags, env vars, files +- Configuration validation +- Type-safe configuration access +- No business logic + +### 5.2 Cluster Module (`internal/cluster/`) + +``` +cluster/ +├── manager.go # Cluster manager (replaces EnsureCluster singleton) +├── cluster.go # Cluster implementation +├── client.go # Kubernetes client factory +├── cache.go # Cluster caching logic +└── types.go # Cluster-related types +``` + +**Responsibilities**: +- Cluster lifecycle management +- Client initialization and caching +- Context management +- No resource operations (delegates to kubernetes clients) + +### 5.3 Kubernetes Module (`internal/kubernetes/`) + +``` +kubernetes/ +├── client.go # Base client setup and scheme registration +├── core/ +│ ├── namespace.go +│ ├── node.go +│ ├── pod.go +│ └── service.go +├── apps/ +│ ├── deployment.go +│ └── daemonset.go +├── storage/ +│ ├── client.go # Storage client interface +│ ├── blockdevice.go +│ ├── lvmvolumegroup.go +│ ├── pvc.go +│ └── storageclass.go +├── virtualization/ +│ ├── client.go +│ ├── vm.go +│ ├── vdisk.go +│ ├── vmbd.go +│ └── cluster_virtual_image.go +└── deckhouse/ + ├── client.go + ├── modules.go + ├── nodegroups.go + └── staticinstance.go +``` + +**Responsibilities**: +- All Kubernetes API operations +- Resource-specific logic +- Filtering and querying +- CRUD operations +- No infrastructure concerns (SSH, VM provisioning handled elsewhere) + +### 5.4 Infrastructure Module (`internal/infrastructure/`) + +``` +infrastructure/ +├── ssh/ +│ ├── client.go # SSH client implementation +│ ├── factory.go # SSH client factory +│ ├── keys.go # SSH key generation +│ └── tunnel.go # SSH tunnel management +└── vm/ + ├── provider.go # VM provider interface + └── deckhouse/ + ├── provider.go # Deckhouse VM provider + └── installer.go # Deckhouse installation logic +``` + +**Responsibilities**: +- SSH connection management +- VM provisioning (via providers) +- Infrastructure setup +- No Kubernetes operations (uses kubernetes clients) + +### 5.5 Test Module (`internal/test/`) + +``` +test/ +├── framework.go # Test framework and helpers +├── filters.go # Filter implementations +├── runner.go # Test execution runner +├── node_context.go # Node test context +└── fixtures.go # Test fixtures +``` + +**Responsibilities**: +- Test execution utilities +- Filter implementations +- Test context management +- Node-specific test helpers + +### 5.6 Public API (`pkg/`) + +``` +pkg/ +├── cluster/ +│ ├── interface.go # Public Cluster interface +│ └── config.go # Public config types +└── testkit/ + ├── test.go # Public test helpers + └── fixtures.go # Public fixtures +``` + +**Responsibilities**: +- Public API for external consumers +- Stable interfaces +- Well-documented +- Backward compatibility guarantees + +--- + +## 6. Key Design Decisions + +### 6.1 Why Internal Packages? + +- **Encapsulation**: Internal packages cannot be imported outside the module +- **Flexibility**: Can refactor internal packages without breaking external API +- **Clear Boundaries**: Makes it obvious what is public vs private + +### 6.2 Why Composition Over Inheritance? + +- **Flexibility**: Easier to swap implementations +- **Testability**: Can mock individual components +- **Single Responsibility**: Each client has one job + +### 6.3 Why Interface-Based Design? + +- **Testability**: Easy to create mocks +- **Extensibility**: Can add new implementations +- **Dependency Inversion**: High-level code doesn't depend on low-level details + +### 6.4 Why Separate Infrastructure? + +- **Clear Boundaries**: Infrastructure is separate from business logic +- **Testability**: Can mock infrastructure in tests +- **Flexibility**: Can swap VM providers, SSH implementations, etc. + +--- + +## 7. Migration Checklist + +### Phase 1: Foundation +- [ ] Extract configuration to `internal/config/` +- [ ] Extract utilities to `internal/utils/` +- [ ] Extract filters to `internal/test/filters.go` +- [ ] Extract logging to `internal/logger/` +- [ ] All existing tests still pass + +### Phase 2: Kubernetes Clients +- [ ] Create `internal/kubernetes/` structure +- [ ] Extract all K8s operations to appropriate packages +- [ ] Create client interfaces +- [ ] Update KCluster to use composition +- [ ] All existing tests still pass + +### Phase 3: Infrastructure +- [ ] Extract SSH to `internal/infrastructure/ssh/` +- [ ] Extract VM operations to `internal/infrastructure/vm/` +- [ ] Create provider interfaces +- [ ] All existing tests still pass + +### Phase 4: Cluster Management +- [ ] Create Cluster Manager +- [ ] Create Cluster interface +- [ ] Refactor KCluster implementation +- [ ] Update all tests to use new interface +- [ ] All tests still pass + +### Phase 5: Test Organization +- [ ] Reorganize test files +- [ ] Create test framework +- [ ] Update package names +- [ ] All tests still pass + +### Phase 6: Cleanup +- [ ] Remove deprecated code +- [ ] Add documentation +- [ ] Improve error messages +- [ ] Final verification + +--- + +## 8. Benefits of New Architecture + +### 8.1 Maintainability +- **Clear Structure**: Easy to find code +- **Single Responsibility**: Each package has one job +- **Documented**: Clear purpose for each module + +### 8.2 Testability +- **Mockable**: Can mock dependencies via interfaces +- **Isolated**: Test individual components +- **Fast**: Unit tests run quickly + +### 8.3 Extensibility +- **Pluggable**: Can add new VM providers, storage backends, etc. +- **Modular**: Can add new features without touching existing code +- **Interface-Based**: New implementations satisfy existing interfaces + +### 8.4 Developer Experience +- **Clear API**: Public interfaces are well-defined +- **Better Errors**: Structured error handling +- **Documentation**: Each package is documented +- **Examples**: Common patterns documented + +### 8.5 Performance +- **Efficient**: No unnecessary allocations +- **Cached**: Client reuse via manager +- **Context-Aware**: Proper context propagation for cancellation + +--- + +## 9. Risks and Mitigations + +### Risk 1: Breaking Existing Tests +**Mitigation**: +- Maintain compatibility layer +- Gradual migration +- Extensive testing at each phase + +### Risk 2: Time Investment +**Mitigation**: +- Phased approach (can stop at any phase) +- Parallel development possible +- Each phase delivers value + +### Risk 3: Learning Curve +**Mitigation**: +- Good documentation +- Clear examples +- Code reviews and knowledge sharing + +### Risk 4: Over-Engineering +**Mitigation**: +- Start with minimum viable structure +- Add complexity only when needed +- Keep it simple + +--- + +## 10. Success Criteria + +1. **All existing tests pass** after refactoring +2. **No performance regression** (ideally improvement) +3. **Code is easier to understand** (measured by code review time) +4. **New features are easier to add** (measured by time to implement) +5. **Tests are easier to write** (measured by lines of test code) +6. **Documentation is comprehensive** (all public APIs documented) + +--- + +## 11. Next Steps + +1. **Review this document** with team +2. **Prioritize phases** based on immediate needs +3. **Create GitHub issues** for each phase +4. **Start with Phase 1** (lowest risk) +5. **Iterate and adjust** based on learnings + +--- + +## Appendix A: Current vs Proposed Structure Comparison + +### Current Structure Issues + +``` +❌ Everything in one package +❌ Global state everywhere +❌ 60+ methods on one struct +❌ Mixed concerns +❌ Hard to test +❌ Circular dependencies +``` + +### Proposed Structure Benefits + +``` +✅ Clear package boundaries +✅ Structured configuration +✅ Interface-based design +✅ Separated concerns +✅ Easy to test +✅ No circular dependencies +``` + +--- + +## Appendix B: Code Examples + +### Example 1: Using New Cluster Interface + +```go +// tests/storage/pvc_test.go +package storage + +import ( + "context" + "testing" + + "github.com/deckhouse/sds-e2e/pkg/cluster" + "github.com/deckhouse/sds-e2e/pkg/testkit" +) + +func TestPVCCreate(t *testing.T) { + ctx := context.Background() + + // Get cluster via testkit helper (manages lifecycle) + cl := testkit.GetCluster(t) + defer cl.Close() + + // Use typed client interfaces + pvc, err := cl.Storage().PersistentVolumeClaims().Create(ctx, testkit.PVCSpec{ + Name: "test-pvc", + Namespace: testkit.TestNS, + Size: "1Gi", + StorageClass: "test-lvm-thick", + }) + if err != nil { + t.Fatal(err) + } + + // Wait for ready + err = cl.Storage().PersistentVolumeClaims().WaitReady(ctx, pvc.Name, 30*time.Second) + if err != nil { + t.Fatal(err) + } +} +``` + +### Example 2: Using Configuration + +```go +// internal/config/config.go +package config + +type Config struct { + TestNS string + NestedCluster NestedClusterConfig + // ... +} + +func Load() *Config { + cfg := &Config{ + TestNS: getTestNS(), + // ... + } + return cfg +} + +// Usage +cfg := config.Load() +cluster := cluster.NewManager(cfg) +``` + +### Example 3: Mocking for Tests + +```go +// internal/kubernetes/storage/mock.go (generated) +type MockLVMVolumeGroupClient struct { + CreateFunc func(ctx context.Context, req LVGCreateRequest) error + // ... +} + +func (m *MockLVMVolumeGroupClient) Create(ctx context.Context, req LVGCreateRequest) error { + return m.CreateFunc(ctx, req) +} + +// In test +func TestLVGCreate(t *testing.T) { + mockClient := &MockLVMVolumeGroupClient{ + CreateFunc: func(ctx context.Context, req LVGCreateRequest) error { + // Test-specific behavior + return nil + }, + } + // Use mock in test +} +``` + +--- + +## Conclusion + +This architecture refactoring will transform the codebase from a monolithic "pasta code" structure into a clean, maintainable, and testable modular architecture. The phased approach minimizes risk while delivering incremental value. + +The key principles: +1. **Separation of Concerns**: Each package has one responsibility +2. **Interface-Based Design**: Easy to test and extend +3. **Dependency Injection**: No globals, proper lifecycle management +4. **Clear Boundaries**: Internal vs public API +5. **Gradual Migration**: Low risk, incremental progress + +With this structure, the codebase will be: +- **Easier to understand** (clear package organization) +- **Easier to test** (mockable interfaces) +- **Easier to extend** (modular design) +- **Easier to maintain** (single responsibility) + +Start with Phase 1 and iterate based on learnings! + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index 64b621a..586c400 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,101 @@ -# storage-e2e -e2e tests for different storage repositories +# E2E Tests + +This package contains end-to-end tests for SDS (Storage for Deckhouse Services). + +## Architecture + +The package follows a clean, modular architecture: + +- **`internal/config/`** - Configuration management (environment variables, cluster definitions, module configs) +- **`internal/cluster/`** - Cluster lifecycle management (manager, builder) +- **`internal/kubernetes/`** - Kubernetes API clients (virtualization, deckhouse modules) +- **`internal/infrastructure/`** - Infrastructure operations (SSH, VM provisioning) +- **`internal/logger/`** - Logging utilities +- **`internal/utils/`** - Utility functions (retry, crypto) +- **`pkg/cluster/`** - Public cluster interface +- **`pkg/testkit/`** - Public test helpers +- **`tests/`** - Test files using Ginkgo + +## Cluster Creation Workflow + +The cluster builder implements the following workflow: + +1. **Connect to Base Cluster**: Get kubeconfig of the base Deckhouse cluster and connect via SSH +2. **Enable Virtualization Module**: Enable Deckhouse Virtualization Platform module on base cluster +3. **Create Virtual Machines**: Create VMs as defined in cluster configuration +4. **Deploy Deckhouse**: Connect to master VM via SSH and deploy Deckhouse Kubernetes Platform +5. **Get Kubeconfig**: Retrieve kubeconfig of the nested cluster +6. **Enable Modules**: Enable and configure required modules in the nested cluster + +## Quick start - Running tests +// TODO amarkov: I strongly recommend add a full example how to run tests with all environments, arguments and commands. + +## Writing Tests + +Tests are written using Ginkgo. Keep test files simple - they should only contain test logic. Business logic is in other modules. + +Example: + +```go +var _ = Describe("Cluster Creation", func() { + var ( + ctx context.Context + baseCluster cluster.Cluster + testCluster cluster.Cluster + clusterCfg *config.DKPClusterConfig + ) + + BeforeEach(func() { + ctx = context.Background() + baseCluster, _ = testkit.GetCluster(ctx, cfg.BaseCluster.KubeConfig, "") + clusterCfg = &config.DKPClusterConfig{ + // Define your cluster configuration + } + }) + + It("should create a nested Kubernetes cluster", func() { + testCluster, err := testkit.BuildTestCluster(ctx, baseCluster, clusterCfg) + Expect(err).NotTo(HaveOccurred()) + + err = testCluster.EnsureReady(ctx) + Expect(err).NotTo(HaveOccurred()) + }) +}) +``` + +## Configuration + +Configuration is loaded from environment variables: + +- `BASE_KUBECONFIG` - Path to base cluster kubeconfig +- `BASE_SSH_HOST` - Base cluster SSH host +- `BASE_SSH_USER` - Base cluster SSH user +- `BASE_SSH_KEY` - Base cluster SSH key path +- `NESTED_KUBECONFIG` - Path for nested cluster kubeconfig +- `NESTED_SSH_HOST` - Nested cluster SSH host +- And more... See `internal/config/config.go` for full list + +## Running Tests + +```bash +go test ./tests/... -v +``` + +Or with Ginkgo: + +```bash +ginkgo ./tests/... +``` + +## Structure + +- `tests/` - Test files +- `internal/` - Internal packages (not importable outside) +- `pkg/` - Public API (importable) + +## Notes + +- The code is independent of legacy code (no imports from `legacy/`) +- Test files are simple and focus on test logic only +- Business logic is in separate modules +- The architecture allows for easy extension and testing diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..f0249ab --- /dev/null +++ b/TODO.md @@ -0,0 +1,57 @@ +# TODOs, ideas and questions + +## Переиспользование кластера + +1. Мы подключились к базовому кластеру (далее, гипервизор) и нужно создавать виртуалки. Мы по умолчанию считаем, что виртуализация там включена (Да) и падаем, если нет; либо мы включаем ее сами и падаем если не включилась (Нет). Да/нет? +2. Про идемпотентность тестового кластера. + +2.1. Есть у нас описанный конфигом тестовый кластер. Мы подключились к ДКП на гипервизоре и видим, что виртуалка (одна из) вроде бы уже есть и вроде даже такая, как надо - неймспейс, проц, память, диск, имя, образ, даже клауд-инит конфиг. НО подключиться к машине проверить ОС и ядро не можем - authentication error. В этом случае мы: + +2.1.1. Если это единственный мастер (в конфиге тестового кластера), сносим все виртуалки и переставляем их заново с развертыванием нового кластера с нуля (Да), или падаем (Нет). Да/нет? + +2.1.2. Если это воркер или не единственный мастер, при условии нормального подключения хотя бы к одному мастеру и его соответствия конфигу, мы удаляем эту ноду в кластере, удаляем виртуалку, переставляем ее с нуля и добавляем в кластер в той роли, в которой она была (Да), или падаем (Нет). Да/нет? Тут могут быть сложности в виде невыгоняемых подов с ноды - как быть в этом случае? + +2.2. Аналогично 2.1. но нода не соответствует конфигу. + +2.2.1. Если это единственный мастер, то убиваем все и переставляем весь кластер целиком с нуля (Да), либо падаем (Нет). Да/нет? + +2.2.2. Если это воркер или не единственный мастер, при условии нормального подключения хотя бы к одному мастеру и его соответствия конфигу, мы удаляем эту ноду в кластере, удаляем виртуалку, переставляем ее с нуля и добавляем в кластер в той роли, в которой она была (Да), или падаем (Нет). Да/нет? Тут могут быть сложности в виде невыгоняемых подов с ноды - как быть в этом случае? +> 2.3. Что делать если одна или несколько виртуалок выключены? Включаем и пробуем подключиться (Да), падаем (Нет). Да/нет? + + +alexandr.zimin +привет. + +1. Да - падаем, если нет виртуализации(сами не включаем) + +2. пункт - сделать флаг, который будет менять поведение, Пока там 2 переключалки сделать: +alwaysUseExisting (или еще варианты: ignoreTestConfig) - вообще не смотрим на требования, запускаем тесты на любом указанном кластере. если нет доступа - падаем и пишем об этом +alwaysRecreate - если в ns есть уже хоть одна виртуалка - падаем и пишем об этом. С этим режимом можно запускать только в чистом ns +Потом можно еще будет добавить useExistingWithConfigCheck - но пока его НЕ нужно реализовывать, чтобы время не терять + +наверное alwaysRecreate стоит переименовать на alwaysCreate + +ну и по умолчанию alwaysCreate + +хотя даже давай обязательным флагом сделаем + +просто падаем, если не задан + +## Новая репа + +Нужна ли? И в гитхабе ли? - сделать новую репу в фоксе, перетащить туда существующий код, убить в гитхаюе и пересоздать. + +## Как ревьюить и к кому с этим идти? + +Не ревьюить. + + + + + + +alwaysUseExisting - что проверять? Включен, доступен ДКП, включены модули? - да. +alwaysCreate - если виртуалки такие уже есть в НС, то падаем, если виртуалок нет - создаем кластер. + +Нужно все просто. Если есть какие-то непонятные кейсы, падать с ошибкой, потом обсудим и пофиксим, есличо. + diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..dcbefe6 --- /dev/null +++ b/go.mod @@ -0,0 +1,49 @@ +module github.com/deckhouse/sds-e2e-tests + +go 1.24.2 + +toolchain go1.24.3 + +require ( + github.com/onsi/ginkgo/v2 v2.21.0 + github.com/onsi/gomega v1.35.1 + github.com/pkg/sftp v1.13.10 + golang.org/x/crypto v0.46.0 + golang.org/x/term v0.38.0 + gopkg.in/yaml.v3 v3.0.1 + k8s.io/client-go v0.32.1 +) + +require ( + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/fxamacker/cbor/v2 v2.7.0 // indirect + github.com/go-logr/logr v1.4.2 // indirect + github.com/go-task/slim-sprig/v3 v3.0.0 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/google/go-cmp v0.6.0 // indirect + github.com/google/gofuzz v1.2.0 // indirect + github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/kr/fs v0.1.0 // indirect + github.com/kr/pretty v0.3.1 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/rogpeppe/go-internal v1.12.0 // indirect + github.com/spf13/pflag v1.0.5 // indirect + github.com/x448/float16 v0.8.4 // indirect + golang.org/x/net v0.47.0 // indirect + golang.org/x/oauth2 v0.23.0 // indirect + golang.org/x/sys v0.39.0 // indirect + golang.org/x/text v0.32.0 // indirect + golang.org/x/time v0.7.0 // indirect + golang.org/x/tools v0.39.0 // indirect + gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + k8s.io/apimachinery v0.32.1 // indirect + k8s.io/klog/v2 v2.130.1 // indirect + k8s.io/utils v0.0.0-20241210054802-24370beab758 // indirect + sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.5.0 // indirect + sigs.k8s.io/yaml v1.4.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..f5d951f --- /dev/null +++ b/go.sum @@ -0,0 +1,154 @@ +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= +github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= +github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= +github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= +github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= +github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= +github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= +github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= +github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo= +github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/kr/fs v0.1.0 h1:Jskdu9ieNAYnjxsi0LbQp1ulIKZV1LAFgK1tWhpZgl8= +github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/onsi/ginkgo/v2 v2.21.0 h1:7rg/4f3rB88pb5obDgNZrNHrQ4e6WpjonchcpuBRnZM= +github.com/onsi/ginkgo/v2 v2.21.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= +github.com/onsi/gomega v1.35.1 h1:Cwbd75ZBPxFSuZ6T+rN/WCb/gOc6YgFBXLlZLhC7Ds4= +github.com/onsi/gomega v1.35.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= +github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/sftp v1.13.10 h1:+5FbKNTe5Z9aspU88DPIKJ9z2KZoaGCu6Sr6kKR/5mU= +github.com/pkg/sftp v1.13.10/go.mod h1:bJ1a7uDhrX/4OII+agvy28lzRvQrmIQuaHrcI1HbeGA= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= +github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= +github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU= +golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY= +golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= +golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= +golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= +golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.38.0 h1:PQ5pkm/rLO6HnxFR7N2lJHOZX6Kez5Y1gDSJla6jo7Q= +golang.org/x/term v0.38.0/go.mod h1:bSEAKrOT1W+VSu9TSCMtoGEOUcKxOKgl3LE5QEF/xVg= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU= +golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY= +golang.org/x/time v0.7.0 h1:ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ= +golang.org/x/time v0.7.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.39.0 h1:ik4ho21kwuQln40uelmciQPp9SipgNDdrafrYA4TmQQ= +golang.org/x/tools v0.39.0/go.mod h1:JnefbkDPyD8UU2kI5fuf8ZX4/yUeh9W877ZeBONxUqQ= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= +google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= +gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.32.1 h1:f562zw9cy+GvXzXf0CKlVQ7yHJVYzLfL6JAS4kOAaOc= +k8s.io/api v0.32.1/go.mod h1:/Yi/BqkuueW1BgpoePYBRdDYfjPF5sgTr5+YqDZra5k= +k8s.io/apimachinery v0.32.1 h1:683ENpaCBjma4CYqsmZyhEzrGz6cjn1MY/X2jB2hkZs= +k8s.io/apimachinery v0.32.1/go.mod h1:GpHVgxoKlTxClKcteaeuF1Ul/lDVb74KpZcxcmLDElE= +k8s.io/client-go v0.32.1 h1:otM0AxdhdBIaQh7l1Q0jQpmo7WOFIk5FFa4bg6YMdUU= +k8s.io/client-go v0.32.1/go.mod h1:aTTKZY7MdxUaJ/KiUs8D+GssR9zJZi77ZqtzcGXIiDg= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f h1:GA7//TjRY9yWGy1poLzYYJJ4JRdzg3+O6e8I+e+8T5Y= +k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f/go.mod h1:R/HEjbvWI0qdfb8viZUeVZm0X6IZnxAydC7YU42CMw4= +k8s.io/utils v0.0.0-20241210054802-24370beab758 h1:sdbE21q2nlQtFh65saZY+rRM6x6aJJI8IUa1AmH/qa0= +k8s.io/utils v0.0.0-20241210054802-24370beab758/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= +sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= +sigs.k8s.io/structured-merge-diff/v4 v4.5.0 h1:nbCitCK2hfnhyiKo6uf2HxUPTCodY6Qaf85SbDIaMBk= +sigs.k8s.io/structured-merge-diff/v4 v4.5.0/go.mod h1:N8f93tFZh9U6vpxwRArLiikrE5/2tiu1w1AGfACIGE4= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/internal/config/types.go b/internal/config/types.go new file mode 100644 index 0000000..7a19665 --- /dev/null +++ b/internal/config/types.go @@ -0,0 +1,244 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package config + +import ( + "fmt" + "time" + + "gopkg.in/yaml.v3" +) + +// HostType represents the type of host (VM or bare-metal) +type HostType string + +const ( + HostTypeVM HostType = "vm" + HostTypeBareMetal HostType = "bare-metal" +) + +// ClusterRole represents the role of a node in the cluster +type ClusterRole string + +const ( + ClusterRoleMaster ClusterRole = "master" + ClusterRoleWorker ClusterRole = "worker" + ClusterRoleSetup ClusterRole = "setup" // Bootstrap node for DKP installation +) + +// OSType represents the operating system type +type OSType struct { + Name string + ImageURL string + KernelVersion string +} + +var ( + OSTypeMap = map[string]OSType{ + "Ubuntu 22.04 6.2.0-39-generic": { + ImageURL: "https://cloud-images.ubuntu.com/jammy/current/jammy-server-cloudimg-amd64.img", + KernelVersion: "6.2.0-39-generic", + }, + "Ubuntu 24.04 6.8.0-53-generic": { + ImageURL: "https://cloud-images.ubuntu.com/noble/current/noble-server-cloudimg-amd64.img", + KernelVersion: "6.8.0-53-generic", + }, + } +) + +// AuthMethod represents the authentication method +type AuthMethod string + +const ( + AuthMethodSSHKey AuthMethod = "ssh-key" + AuthMethodSSHPass AuthMethod = "ssh-password" +) + +// NodeAuth contains authentication information for a node +type NodeAuth struct { + Method AuthMethod `yaml:"method"` + User string `yaml:"user"` + SSHKey string `yaml:"sshKey"` // Public key (value like "ssh-rsa ...", path to .pub file, or empty for default) + Password string `yaml:"password,omitempty"` // Password (if using password auth) +} + +// ClusterNode defines a single node in the cluster +type ClusterNode struct { + Hostname string `yaml:"hostname"` + IPAddress string `yaml:"ipAddress,omitempty"` // Required for bare-metal, optional for VM + OSType OSType `yaml:"osType"` // Required for VM, optional for bare-metal (custom unmarshaler handles string -> OSType conversion) + HostType HostType `yaml:"hostType"` + Role ClusterRole `yaml:"role"` + Auth NodeAuth `yaml:"auth"` + // VM-specific fields (only used when HostType == HostTypeVM) + CPU int `yaml:"cpu"` // Required for VM + RAM int `yaml:"ram"` // Required for VM, in GB + DiskSize int `yaml:"diskSize"` // Required for VM, in GB + // Bare-metal specific fields + Prepared bool `yaml:"prepared,omitempty"` // Whether the node is already prepared for DKP installation +} + +// DKPParameters defines DKP-specific parameters for cluster deployment +type DKPParameters struct { + KubernetesVersion string `yaml:"kubernetesVersion"` + PodSubnetCIDR string `yaml:"podSubnetCIDR"` + ServiceSubnetCIDR string `yaml:"serviceSubnetCIDR"` + ClusterDomain string `yaml:"clusterDomain"` + LicenseKey string `yaml:"licenseKey"` + RegistryRepo string `yaml:"registryRepo"` + Namespace string `yaml:"namespace"` + StorageClass string `yaml:"storageClass"` + Modules []*ModuleConfig `yaml:"modules,omitempty"` +} + +// ClusterDefinition defines the complete cluster configuration +type ClusterDefinition struct { + Masters []ClusterNode `yaml:"masters"` + Workers []ClusterNode `yaml:"workers"` + Setup *ClusterNode `yaml:"setup,omitempty"` // Bootstrap node (can be nil) + DKPParameters DKPParameters `yaml:"dkpParameters"` +} + +// ModuleConfig defines a Deckhouse module configuration +type ModuleConfig struct { // TODO amarkov: I suggest allow user to specify ModulePullOverride version, to run tests on MR/PR during development process. + Name string `yaml:"name"` + Version int `yaml:"version"` + Enabled bool `yaml:"enabled"` + Settings map[string]any `yaml:"settings,omitempty"` + Dependencies []string `yaml:"dependencies,omitempty"` // Names of modules that must be enabled before this one + ModulePullOverride string `yaml:"modulePullOverride,omitempty"` // Override the module pull branch or tag (e.g. "main", "pr123", "mr41") +} + +const ( + HostReadyTimeout = 10 * time.Minute // Timeout for hosts to be ready + DKPDeployTimeout = 30 * time.Minute // Timeout for DKP deployment + ModuleDeployTimeout = 10 * time.Minute // Timeout for module deployment +) + +// UnmarshalYAML implements custom YAML unmarshaling for ClusterNode +// to handle OSType conversion from string key to OSType struct +func (n *ClusterNode) UnmarshalYAML(value *yaml.Node) error { + // Temporary struct with OSType as string for unmarshaling + type clusterNodeTmp struct { + Hostname string `yaml:"hostname"` + IPAddress string `yaml:"ipAddress,omitempty"` + OSType string `yaml:"osType"` + HostType string `yaml:"hostType"` + Role string `yaml:"role"` + Auth NodeAuth `yaml:"auth"` + CPU int `yaml:"cpu"` + RAM int `yaml:"ram"` + DiskSize int `yaml:"diskSize"` + Prepared bool `yaml:"prepared,omitempty"` + } + + var tmp clusterNodeTmp + if err := value.Decode(&tmp); err != nil { + return err + } + + // Convert HostType + hostType := HostType(tmp.HostType) + if hostType != HostTypeVM && hostType != HostTypeBareMetal { + return fmt.Errorf("invalid hostType: %s", tmp.HostType) + } + + // Convert Role + role := ClusterRole(tmp.Role) + if role != ClusterRoleMaster && role != ClusterRoleWorker && role != ClusterRoleSetup { + return fmt.Errorf("invalid role: %s", tmp.Role) + } + + // Convert OSType string key to OSType struct + osType, ok := OSTypeMap[tmp.OSType] + if !ok { + return fmt.Errorf("unknown osType: %s", tmp.OSType) + } + + // Convert AuthMethod + authMethod := AuthMethod(tmp.Auth.Method) + if authMethod != AuthMethodSSHKey && authMethod != AuthMethodSSHPass { + return fmt.Errorf("invalid auth method: %s", tmp.Auth.Method) + } + + // Assign to actual struct + n.Hostname = tmp.Hostname + n.IPAddress = tmp.IPAddress + n.OSType = osType + n.HostType = hostType + n.Role = role + n.Auth = NodeAuth{ + Method: authMethod, + User: tmp.Auth.User, + SSHKey: tmp.Auth.SSHKey, + Password: tmp.Auth.Password, + } + n.CPU = tmp.CPU + n.RAM = tmp.RAM + n.DiskSize = tmp.DiskSize + n.Prepared = tmp.Prepared + + return nil +} + +// UnmarshalYAML implements custom YAML unmarshaling for ClusterDefinition +// to handle the top-level "clusterDefinition:" key in the YAML +func (c *ClusterDefinition) UnmarshalYAML(value *yaml.Node) error { + // Check if we have a top-level "clusterDefinition" key + if value.Kind == yaml.MappingNode && len(value.Content) > 0 { + // Look for "clusterDefinition" key + for i := 0; i < len(value.Content)-1; i += 2 { + if value.Content[i].Value == "clusterDefinition" { + // Found the key, decode the value (next node) into a temporary struct + // to avoid infinite recursion + type clusterDefTmp struct { + Masters []ClusterNode `yaml:"masters"` + Workers []ClusterNode `yaml:"workers"` + Setup *ClusterNode `yaml:"setup,omitempty"` + DKPParameters DKPParameters `yaml:"dkpParameters"` + } + var tmp clusterDefTmp + if err := value.Content[i+1].Decode(&tmp); err != nil { + return err + } + // Copy to actual struct + c.Masters = tmp.Masters + c.Workers = tmp.Workers + c.Setup = tmp.Setup + c.DKPParameters = tmp.DKPParameters + return nil + } + } + } + // If no "clusterDefinition" key found, decode directly using temporary struct + // to avoid infinite recursion + type clusterDefTmp struct { + Masters []ClusterNode `yaml:"masters"` + Workers []ClusterNode `yaml:"workers"` + Setup *ClusterNode `yaml:"setup,omitempty"` + DKPParameters DKPParameters `yaml:"dkpParameters"` + } + var tmp clusterDefTmp + if err := value.Decode(&tmp); err != nil { + return err + } + c.Masters = tmp.Masters + c.Workers = tmp.Workers + c.Setup = tmp.Setup + c.DKPParameters = tmp.DKPParameters + return nil +} diff --git a/internal/infrastructure/ssh/client.go b/internal/infrastructure/ssh/client.go new file mode 100644 index 0000000..6d3dfe9 --- /dev/null +++ b/internal/infrastructure/ssh/client.go @@ -0,0 +1,268 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ssh + +import ( + "context" + "fmt" + "io" + "net" + "os" + "os/user" + "path/filepath" + "strings" + "syscall" + + "github.com/pkg/sftp" + "golang.org/x/crypto/ssh" + "golang.org/x/term" +) + +// client implements Client interface +type client struct { + sshClient *ssh.Client +} + +// NewFactory creates a new SSH factory +func NewFactory() SSHFactory { + return &factory{} +} + +// readPassword reads a password from the terminal +func readPassword(prompt string) ([]byte, error) { + fmt.Fprint(os.Stderr, prompt) + var fd int + if term.IsTerminal(syscall.Stdin) { + fd = syscall.Stdin + } else { + tty, err := os.Open("/dev/tty") + if err != nil { + return nil, fmt.Errorf("error allocating terminal: %w", err) + } + defer tty.Close() + fd = int(tty.Fd()) + } + + pass, err := term.ReadPassword(fd) + fmt.Fprintln(os.Stderr) + return pass, err +} + +// expandPath expands ~ to home directory +func expandPath(path string) (string, error) { + if !strings.HasPrefix(path, "~") { + return path, nil + } + + usr, err := user.Current() + if err != nil { + return "", fmt.Errorf("failed to get current user: %w", err) + } + + if path == "~" { + return usr.HomeDir, nil + } + + return filepath.Join(usr.HomeDir, strings.TrimPrefix(path, "~/")), nil +} + +// createSSHConfig creates SSH client config with support for passphrase-protected keys +func createSSHConfig(user, keyPath string) (*ssh.ClientConfig, error) { + expandedKeyPath, err := expandPath(keyPath) + if err != nil { + return nil, fmt.Errorf("failed to expand key path: %w", err) + } + + key, err := os.ReadFile(expandedKeyPath) + if err != nil { + return nil, fmt.Errorf("unable to read private key %s: %w", expandedKeyPath, err) + } + + signer, err := ssh.ParsePrivateKey(key) + if err != nil { + if !strings.Contains(err.Error(), "ssh: this private key is passphrase protected") { + return nil, fmt.Errorf("unable to parse private key: %w", err) + } + + // Try to get passphrase from environment variable first + var pass []byte + if envPass := os.Getenv("SSH_PASSPHRASE"); envPass != "" { + pass = []byte(envPass) + } else { + // Try to read from terminal + var readErr error + pass, readErr = readPassword(" Enter passphrase for '" + expandedKeyPath + "': ") + if readErr != nil { + return nil, fmt.Errorf("SSH key '%s' is passphrase protected. Set SSH_PASSPHRASE environment variable: export SSH_PASSPHRASE='your-passphrase'\nOriginal error: %w", expandedKeyPath, readErr) + } + } + + signer, err = ssh.ParsePrivateKeyWithPassphrase(key, pass) + if err != nil { + return nil, fmt.Errorf("unable to parse private key with passphrase: %w", err) + } + } + + return &ssh.ClientConfig{ + User: user, + Auth: []ssh.AuthMethod{ + ssh.PublicKeys(signer), + }, + HostKeyCallback: ssh.InsecureIgnoreHostKey(), + }, nil +} + +// Create creates a new SSH client +func (c *client) Create(user, host, keyPath string) (SSHClient, error) { + sshConfig, err := createSSHConfig(user, keyPath) + if err != nil { + return nil, fmt.Errorf("failed to create SSH config: %w", err) + } + + // Ensure host has port if not specified + addr := host + if !strings.Contains(addr, ":") { + addr = addr + ":22" + } + + sshClient, err := ssh.Dial("tcp", addr, sshConfig) + if err != nil { + return nil, fmt.Errorf("failed to connect to %s@%s: %w", user, addr, err) + } + + return &client{sshClient: sshClient}, nil +} + +// CreateForward creates an SSH client with port forwarding +func (c *client) CreateForward(user, host, keyPath string, localPort, remotePort string) (SSHClient, error) { + // First create a regular connection + baseClient, err := c.Create(user, host, keyPath) + if err != nil { + return nil, err + } + + // Set up port forwarding + baseClientImpl := baseClient.(*client) + listener, err := net.Listen("tcp", "localhost:"+localPort) + if err != nil { + baseClientImpl.Close() + return nil, fmt.Errorf("failed to listen on local port %s: %w", localPort, err) + } + + go func() { + for { + localConn, err := listener.Accept() + if err != nil { + return + } + + remoteConn, err := baseClientImpl.sshClient.Dial("tcp", "localhost:"+remotePort) + if err != nil { + localConn.Close() + continue + } + + go func() { + io.Copy(localConn, remoteConn) + localConn.Close() + remoteConn.Close() + }() + go func() { + io.Copy(remoteConn, localConn) + localConn.Close() + remoteConn.Close() + }() + } + }() + + return baseClient, nil +} + +// Exec executes a command on the remote host +func (c *client) Exec(ctx context.Context, cmd string) (string, error) { + session, err := c.sshClient.NewSession() + if err != nil { + return "", fmt.Errorf("failed to create SSH session: %w", err) + } + defer session.Close() + + output, err := session.CombinedOutput(cmd) + if err != nil { + return string(output), fmt.Errorf("command failed: %w", err) + } + + return string(output), nil +} + +// ExecFatal executes a command and returns error if it fails +func (c *client) ExecFatal(ctx context.Context, cmd string) string { + output, err := c.Exec(ctx, cmd) + if err != nil { + panic(fmt.Sprintf("ExecFatal failed for command '%s': %v\nOutput: %s", cmd, err, output)) + } + return output +} + +// Upload uploads a local file to the remote host +func (c *client) Upload(ctx context.Context, localPath, remotePath string) error { + sftpClient, err := sftp.NewClient(c.sshClient) + if err != nil { + return fmt.Errorf("failed to create SFTP client: %w", err) + } + defer sftpClient.Close() + + localFile, err := os.Open(localPath) + if err != nil { + return fmt.Errorf("failed to open local file %s: %w", localPath, err) + } + defer localFile.Close() + + remoteFile, err := sftpClient.Create(remotePath) + if err != nil { + return fmt.Errorf("failed to create remote file %s: %w", remotePath, err) + } + defer remoteFile.Close() + + _, err = io.Copy(remoteFile, localFile) + if err != nil { + return fmt.Errorf("failed to copy file: %w", err) + } + + return nil +} + +// Close closes the SSH connection +func (c *client) Close() error { + if c.sshClient != nil { + return c.sshClient.Close() + } + return nil +} + +// NewClient creates a new SSH client +func NewClient(user, host, keyPath string) (SSHClient, error) { + var c client + return c.Create(user, host, keyPath) +} + +// factory implements Factory interface +type factory struct{} + +// CreateClient creates a new SSH client +func (f *factory) CreateClient(user, host, keyPath string) (SSHClient, error) { + return NewClient(user, host, keyPath) +} diff --git a/internal/infrastructure/ssh/interface.go b/internal/infrastructure/ssh/interface.go new file mode 100644 index 0000000..6a35de8 --- /dev/null +++ b/internal/infrastructure/ssh/interface.go @@ -0,0 +1,45 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ssh + +import "context" + +// SSHClient provides SSH operations +type SSHClient interface { + // Create creates a new SSH client + Create(user, host, keyPath string) (SSHClient, error) + + // CreateForward creates an SSH client with port forwarding + CreateForward(user, host, keyPath string, localPort, remotePort string) (SSHClient, error) + + // Exec executes a command on the remote host + Exec(ctx context.Context, cmd string) (string, error) + + // ExecFatal executes a command and returns error if it fails + ExecFatal(ctx context.Context, cmd string) string + + // Uploads a local file to the remote host + Upload(ctx context.Context, localPath, remotePath string) error + + // Close closes the SSH connection + Close() error +} + +// Factory provides a way to create SSH clients +type SSHFactory interface { + CreateClient(user, host, keyPath string) (SSHClient, error) +} diff --git a/pkg/testkit/cluster/cluster.go b/pkg/testkit/cluster/cluster.go new file mode 100644 index 0000000..50fa30a --- /dev/null +++ b/pkg/testkit/cluster/cluster.go @@ -0,0 +1,211 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cluster + +import ( + "context" + "fmt" + "os" + "path/filepath" + "runtime" + "strings" + + "gopkg.in/yaml.v3" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" + + "github.com/deckhouse/sds-e2e-tests/internal/config" + "github.com/deckhouse/sds-e2e-tests/internal/infrastructure/ssh" +) + +// LoadClusterConfig loads and validates a cluster configuration from a YAML file +// The config file is expected to be in the same directory as the caller (typically the test file) +func LoadClusterConfig(configFilename string) (*config.ClusterDefinition, error) { + // Get the caller's file path (the test file that called this function) + _, callerFile, _, ok := runtime.Caller(1) + if !ok { + return nil, fmt.Errorf("failed to determine caller file path") + } + callerDir := filepath.Dir(callerFile) + yamlConfigPath := filepath.Join(callerDir, configFilename) + + // Read the YAML file + data, err := os.ReadFile(yamlConfigPath) + if err != nil { + return nil, fmt.Errorf("failed to read config file %s: %w", yamlConfigPath, err) + } + + // Parse YAML directly into ClusterDefinition (has custom UnmarshalYAML for root key) + var clusterDef config.ClusterDefinition + if err := yaml.Unmarshal(data, &clusterDef); err != nil { + return nil, fmt.Errorf("failed to parse YAML config: %w", err) + } + + // Validate the configuration + if err := validateClusterConfig(&clusterDef); err != nil { + return nil, fmt.Errorf("config validation failed: %w", err) + } + + return &clusterDef, nil +} + +// validateClusterConfig validates the cluster configuration +func validateClusterConfig(cfg *config.ClusterDefinition) error { + // Validate that at least one master exists + if len(cfg.Masters) == 0 { + return fmt.Errorf("at least one master node is required") + } + + // Validate master nodes + for i, master := range cfg.Masters { + if err := validateNode(master, true); err != nil { + return fmt.Errorf("master[%d] validation failed: %w", i, err) + } + } + + // Validate worker nodes + for i, worker := range cfg.Workers { + if err := validateNode(worker, false); err != nil { + return fmt.Errorf("worker[%d] validation failed: %w", i, err) + } + } + + // Validate setup node if present + if cfg.Setup != nil { + if err := validateNode(*cfg.Setup, false); err != nil { + return fmt.Errorf("setup node validation failed: %w", err) + } + } + + // Validate DKP parameters + dkpParams := cfg.DKPParameters + if dkpParams.PodSubnetCIDR == "" { + return fmt.Errorf("dkpParameters.podSubnetCIDR is required") + } + if dkpParams.ServiceSubnetCIDR == "" { + return fmt.Errorf("dkpParameters.serviceSubnetCIDR is required") + } + if dkpParams.ClusterDomain == "" { + return fmt.Errorf("dkpParameters.clusterDomain is required") + } + if dkpParams.RegistryRepo == "" { + return fmt.Errorf("dkpParameters.registryRepo is required") + } + + return nil +} + +// validateNode validates a single node configuration +func validateNode(node config.ClusterNode, isMaster bool) error { + if node.Hostname == "" { + return fmt.Errorf("hostname is required") + } + + if node.HostType == config.HostTypeVM { + if node.CPU <= 0 { + return fmt.Errorf("CPU must be greater than 0 for VM nodes") + } + if node.RAM <= 0 { + return fmt.Errorf("RAM must be greater than 0 for VM nodes") + } + if node.DiskSize <= 0 { + return fmt.Errorf("diskSize must be greater than 0 for VM nodes") + } + } + + if node.Auth.User == "" { + return fmt.Errorf("auth.user is required") + } + + if node.Auth.Method == config.AuthMethodSSHKey && node.Auth.SSHKey == "" { + return fmt.Errorf("auth.sshKey is required when using ssh-key authentication") + } + + if node.Auth.Method == config.AuthMethodSSHPass && node.Auth.Password == "" { + return fmt.Errorf("auth.password is required when using ssh-password authentication") + } + + return nil +} + +// GetKubeconfig connects to the master node via SSH, retrieves kubeconfig from /etc/kubernetes/admin.conf, +// and returns a rest.Config that can be used with Kubernetes clients. +// If sshClient is provided, it will be used instead of creating a new connection. +// If sshClient is nil, a new connection will be created and closed automatically. +func GetKubeconfig(masterIP, user, keyPath string, sshClient ssh.SSHClient) (*rest.Config, error) { + // Create SSH client if not provided + shouldClose := false + if sshClient == nil { + var err error + sshClient, err = ssh.NewClient(user, masterIP, keyPath) + if err != nil { + return nil, fmt.Errorf("failed to create SSH client: %w", err) + } + shouldClose = true + } + if shouldClose { + defer sshClient.Close() + } + + // Read kubeconfig from /etc/kubernetes/admin.conf + ctx := context.Background() + kubeconfigContent, err := sshClient.Exec(ctx, "sudo cat /etc/kubernetes/admin.conf") + if err != nil { + return nil, fmt.Errorf("failed to read kubeconfig from master: %w", err) + } + + // Get the test file name from the caller + _, callerFile, _, ok := runtime.Caller(1) + if !ok { + return nil, fmt.Errorf("failed to get caller file information") + } + testFileName := strings.TrimSuffix(filepath.Base(callerFile), filepath.Ext(callerFile)) + + // Determine the temp directory path relative to e2e-tests + // callerFile is in tests/ directory, so we go up one level to reach e2e-tests/ + e2eTestsDir := filepath.Join(filepath.Dir(callerFile), "..") + tempDir := filepath.Join(e2eTestsDir, "temp", testFileName) + + // Create temp directory if it doesn't exist + if err := os.MkdirAll(tempDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create temp directory %s: %w", tempDir, err) + } + + // Create kubeconfig file in temp directory + kubeconfigPath := filepath.Join(tempDir, fmt.Sprintf("kubeconfig-%s.yaml", masterIP)) + kubeconfigFile, err := os.Create(kubeconfigPath) + if err != nil { + return nil, fmt.Errorf("failed to create kubeconfig file %s: %w", kubeconfigPath, err) + } + + // Write kubeconfig content to file + if _, err := kubeconfigFile.Write([]byte(kubeconfigContent)); err != nil { + kubeconfigFile.Close() + return nil, fmt.Errorf("failed to write kubeconfig to file: %w", err) + } + if err := kubeconfigFile.Close(); err != nil { + return nil, fmt.Errorf("failed to close kubeconfig file: %w", err) + } + + // Build rest.Config from the kubeconfig file + config, err := clientcmd.BuildConfigFromFlags("", kubeconfigPath) + if err != nil { + return nil, fmt.Errorf("failed to build config from kubeconfig: %w", err) + } + + return config, nil +} diff --git a/tests/cluster_creation_test.go b/tests/cluster_creation_test.go new file mode 100644 index 0000000..99ed5d0 --- /dev/null +++ b/tests/cluster_creation_test.go @@ -0,0 +1,75 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration + +import ( + "fmt" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "k8s.io/client-go/rest" + + // "github.com/deckhouse/sds-e2e-tests/pkg/cluster" + "github.com/deckhouse/sds-e2e-tests/internal/config" + "github.com/deckhouse/sds-e2e-tests/internal/infrastructure/ssh" + "github.com/deckhouse/sds-e2e-tests/pkg/testkit/cluster" +) + +var _ = Describe("Cluster Creation", func() { + var ( + yamlConfigFilename string = "cluster_creation_test.yml" + baseClusterMasterIP string = "10.0.0.181" + baseClusterUser string = "w-ansible" + baseClusterSSHPrivateKey string = "~/.ssh/aya_rsa" + ) + + BeforeEach(func(ctx SpecContext) { + var err error + var clusterDefinition *config.ClusterDefinition + var kubeconfig *rest.Config + var sshClient ssh.SSHClient + + // Stage 1: LoadConfig - verifies and parses the config from yaml file + By("LoadConfig: Loading and verifying cluster configuration from YAML", func() { + clusterDefinition, err = cluster.LoadClusterConfig(yamlConfigFilename) + Expect(err).NotTo(HaveOccurred()) + }) + + // Stage 2: Establish SSH connection to base cluster (reused for getting kubeconfig) + By("Establishing ssh connection to the base cluster", func() { + sshClient, err = ssh.NewClient(baseClusterUser, baseClusterMasterIP, baseClusterSSHPrivateKey) + Expect(err).NotTo(HaveOccurred()) + }) + + // Stage 3: Getting kubeconfig from base cluster (reusing SSH connection to avoid double passphrase prompt) + By("Get kubeconfig: Getting kubeconfig from the base cluster", func() { + kubeconfig, err = cluster.GetKubeconfig(baseClusterMasterIP, baseClusterUser, baseClusterSSHPrivateKey, sshClient) + Expect(err).NotTo(HaveOccurred()) + }) + + _ = sshClient // TODO: use sshClient + _ = clusterDefinition // TODO: use clusterDefinition + _ = kubeconfig // TODO: use kubeconfig + }) // BeforeEach: Cluster Creation + + It("should create a test cluster", func() { + By("Creating a test cluster", func() { + fmt.Println("Creating a test cluster") + + }) + }) // It: should create a test cluster +}) // Describe: Cluster Creation diff --git a/tests/cluster_creation_test.yml b/tests/cluster_creation_test.yml new file mode 100644 index 0000000..711e9a3 --- /dev/null +++ b/tests/cluster_creation_test.yml @@ -0,0 +1,69 @@ +# Test nested cluster config definition +clusterDefinition: + masters: # Master nodes configuration + - hostname: "master-1" + hostType: "vm" + role: "master" + osType: "Ubuntu 22.04 6.2.0-39-generic" + auth: + method: "ssh-key" + user: "user" + sshKey: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8WyGvnBNQp+v6CUweF1QYCRtR7Do/IA8IA2uMd2HuBsddFrc5xYon2ZtEvypZC4Vm1CzgcgUm9UkHgxytKEB4zOOWkmqFP62OSLNyuWMaFEW1fb0EDenup6B5SrjnA8ckm4Hf2NSLvwW9yS98TfN3nqPOPJKfQsN+OTiCerTtNyXjca//ppuGKsQd99jG7SqE9aDQ3sYCXatM53SXqhxS2nTew82bmzVmKXDxcIzVrS9f+2WmXIdY2cKo2I352yKWOIp1Nk0uji8ozLPHFQGvbAG8DGG1KNVcBl2qYUcttmCpN+iXEcGqyn/atUVJJMnZXGtp0fiL1rMLqAd/bb6TFNzZFSsS+zqGesxqLePe32vLCQ3xursP3BRZkrScM+JzIqevfP63INHJEZfYlUf4Ic+gfliS2yA1LwhU7hD4LSVXMQynlF9WeGjuv6ZYxmO8hC6IWCqWnIUqKUiGtvBSPXwsZo7wgljBr4ykJgBzS9MjZ0fzz1JKe80tH6clpjIOn6ReBPwQBq2zmDDrpa5GVqqqjXhRQuA0AfpHdhs5UKxs1PBr7/PTLA7PI39xkOAE/Zj1TYQ2dmqvpskshi7AtBStjinQBAlLXysLSHBtO+3+PLAYcMZMVfb0bVqfGGludO2prvXrrWWTku0eOsA5IRahrRdGhv5zhKgFV7cwUQ== ayakubov@MacBook-Pro-Alexey.local" + cpu: 4 + ram: 8 + diskSize: 30 + workers: # Worker nodes configuration + - hostname: "worker-1" + hostType: "vm" + role: "worker" + osType: "Ubuntu 22.04 6.2.0-39-generic" + auth: + method: "ssh-key" + user: "user" + sshKey: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8WyGvnBNQp+v6CUweF1QYCRtR7Do/IA8IA2uMd2HuBsddFrc5xYon2ZtEvypZC4Vm1CzgcgUm9UkHgxytKEB4zOOWkmqFP62OSLNyuWMaFEW1fb0EDenup6B5SrjnA8ckm4Hf2NSLvwW9yS98TfN3nqPOPJKfQsN+OTiCerTtNyXjca//ppuGKsQd99jG7SqE9aDQ3sYCXatM53SXqhxS2nTew82bmzVmKXDxcIzVrS9f+2WmXIdY2cKo2I352yKWOIp1Nk0uji8ozLPHFQGvbAG8DGG1KNVcBl2qYUcttmCpN+iXEcGqyn/atUVJJMnZXGtp0fiL1rMLqAd/bb6TFNzZFSsS+zqGesxqLePe32vLCQ3xursP3BRZkrScM+JzIqevfP63INHJEZfYlUf4Ic+gfliS2yA1LwhU7hD4LSVXMQynlF9WeGjuv6ZYxmO8hC6IWCqWnIUqKUiGtvBSPXwsZo7wgljBr4ykJgBzS9MjZ0fzz1JKe80tH6clpjIOn6ReBPwQBq2zmDDrpa5GVqqqjXhRQuA0AfpHdhs5UKxs1PBr7/PTLA7PI39xkOAE/Zj1TYQ2dmqvpskshi7AtBStjinQBAlLXysLSHBtO+3+PLAYcMZMVfb0bVqfGGludO2prvXrrWWTku0eOsA5IRahrRdGhv5zhKgFV7cwUQ== ayakubov@MacBook-Pro-Alexey.local" + cpu: 2 + ram: 6 + diskSize: 30 + - hostname: "worker-2" + hostType: "vm" + role: "worker" + osType: "Ubuntu 22.04 6.2.0-39-generic" + auth: + method: "ssh-key" + user: "user" + sshKey: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8WyGvnBNQp+v6CUweF1QYCRtR7Do/IA8IA2uMd2HuBsddFrc5xYon2ZtEvypZC4Vm1CzgcgUm9UkHgxytKEB4zOOWkmqFP62OSLNyuWMaFEW1fb0EDenup6B5SrjnA8ckm4Hf2NSLvwW9yS98TfN3nqPOPJKfQsN+OTiCerTtNyXjca//ppuGKsQd99jG7SqE9aDQ3sYCXatM53SXqhxS2nTew82bmzVmKXDxcIzVrS9f+2WmXIdY2cKo2I352yKWOIp1Nk0uji8ozLPHFQGvbAG8DGG1KNVcBl2qYUcttmCpN+iXEcGqyn/atUVJJMnZXGtp0fiL1rMLqAd/bb6TFNzZFSsS+zqGesxqLePe32vLCQ3xursP3BRZkrScM+JzIqevfP63INHJEZfYlUf4Ic+gfliS2yA1LwhU7hD4LSVXMQynlF9WeGjuv6ZYxmO8hC6IWCqWnIUqKUiGtvBSPXwsZo7wgljBr4ykJgBzS9MjZ0fzz1JKe80tH6clpjIOn6ReBPwQBq2zmDDrpa5GVqqqjXhRQuA0AfpHdhs5UKxs1PBr7/PTLA7PI39xkOAE/Zj1TYQ2dmqvpskshi7AtBStjinQBAlLXysLSHBtO+3+PLAYcMZMVfb0bVqfGGludO2prvXrrWWTku0eOsA5IRahrRdGhv5zhKgFV7cwUQ== ayakubov@MacBook-Pro-Alexey.local" + cpu: 2 + ram: 6 + diskSize: 30 + # DKP parameters + dkpParameters: + kubernetesVersion: "Automatic" + podSubnetCIDR: "10.112.0.0/16" + serviceSubnetCIDR: "10.225.0.0/16" + clusterDomain: "cluster.local" + licenseKey: "" + registryRepo: "dev-registry.deckhouse.io/sys/deckhouse-oss" + namespace: "e2e-nested-1" + storageClass: "nfs-storage-class" + modules: + - name: "snapshot-controller" + version: 1 + enabled: true + dependencies: [] + - name: "sds-local-volume" + version: 1 + enabled: true + dependencies: + - "snapshot-controller" + - name: "sds-node-configurator" + version: 1 + enabled: true + settings: + enableThinProvisioning: true + dependencies: + - "sds-local-volume" + - name: "sds-replicated-volume" + version: 1 + enabled: true + dependencies: + - "sds-node-configurator" diff --git a/tests/integration_suite_test.go b/tests/integration_suite_test.go new file mode 100644 index 0000000..f9d903e --- /dev/null +++ b/tests/integration_suite_test.go @@ -0,0 +1,29 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestIntegration(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Integration Test Suite") +} From 66a3f5a1713c083084b0cc549bcf59f7af3b2866 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Fri, 12 Dec 2025 15:44:15 +0300 Subject: [PATCH 02/48] Flags and env.go have been added --- TODO.md | 4 ++++ internal/config/env.go | 11 +++++++++ internal/config/flags.go | 51 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+) create mode 100644 internal/config/env.go create mode 100644 internal/config/flags.go diff --git a/TODO.md b/TODO.md index f0249ab..e962d2c 100644 --- a/TODO.md +++ b/TODO.md @@ -41,10 +41,14 @@ alwaysRecreate - если в ns есть уже хоть одна виртуал Нужна ли? И в гитхабе ли? - сделать новую репу в фоксе, перетащить туда существующий код, убить в гитхаюе и пересоздать. +https://github.com/deckhouse/storage-e2e.git - Done! + ## Как ревьюить и к кому с этим идти? Не ревьюить. +Done! + diff --git a/internal/config/env.go b/internal/config/env.go new file mode 100644 index 0000000..e06a767 --- /dev/null +++ b/internal/config/env.go @@ -0,0 +1,11 @@ +// Environment variables used by codebase + +package config + +import ( + "os" +) + +var ( + SSHPassphrase = os.Getenv("SSH_PASSPHRASE") +) diff --git a/internal/config/flags.go b/internal/config/flags.go new file mode 100644 index 0000000..c6664f7 --- /dev/null +++ b/internal/config/flags.go @@ -0,0 +1,51 @@ +// Flags used by codebase + +package config + +import ( + "flag" + "fmt" + "os" +) + +var ( + // alwaysUseExisting indicates to always use an existing cluster if available + alwaysUseExisting = flag.Bool("alwaysUseExisting", false, "Always use an existing cluster if available") + // alwaysCreateNew indicates to always create a new cluster + alwaysCreateNew = flag.Bool("alwaysCreateNew", false, "Always create a new cluster") +) + +// init registers flags, aliases, and validates that at least one of alwaysUseExisting or alwaysCreateNew is set +func init() { + // Register short aliases for flags + flag.BoolVar(alwaysUseExisting, "e", false, "Alias for -alwaysUseExisting") + flag.BoolVar(alwaysCreateNew, "n", false, "Alias for -alwaysCreateNew") + + flag.Usage = usage + flag.Parse() + + // Validate that at least one of the flags is set + if (!*alwaysUseExisting && !*alwaysCreateNew) || (*alwaysUseExisting && *alwaysCreateNew) { + fmt.Fprintf(os.Stderr, "Error: Either --alwaysUseExisting (-e) or --alwaysCreateNew (-n) must be set, but not both\n\n") + flag.Usage() + os.Exit(1) + } +} + +// usage prints the usage information for the command-line flags +func usage() { + fmt.Fprintf(os.Stderr, "Usage: %s [options]\n\n", os.Args[0]) + fmt.Fprintf(os.Stderr, "Options:\n") + flag.PrintDefaults() + fmt.Fprintf(os.Stderr, "\nNote: Either -alwaysUseExisting or -alwaysCreateNew must be set, but not both together!\n") +} + +// AlwaysUseExisting returns the value of the alwaysUseExisting flag +func AlwaysUseExisting() bool { + return *alwaysUseExisting +} + +// AlwaysCreateNew returns the value of the alwaysCreateNew flag +func AlwaysCreateNew() bool { + return *alwaysCreateNew +} From 0ed37951ceca91028156d5f32d0f105dee33948d Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Fri, 12 Dec 2025 17:28:55 +0300 Subject: [PATCH 03/48] Structure a bit updated --- ARCHITECTURE_ANALYSIS.md => ARCHITECTURE.md | 147 +----------------- go.mod | 2 +- {pkg/testkit => internal}/cluster/cluster.go | 20 ++- internal/cluster/interface.go | 34 ++++ internal/config/flags.go | 35 +---- internal/config/images.go | 30 ++++ internal/config/types.go | 13 -- internal/kubernetes/apps/daemonset.go | 20 +++ internal/kubernetes/apps/deployment.go | 20 +++ internal/kubernetes/client.go | 20 +++ internal/kubernetes/core/namespace.go | 20 +++ internal/kubernetes/core/node.go | 20 +++ internal/kubernetes/core/pod.go | 20 +++ internal/kubernetes/core/service.go | 20 +++ internal/kubernetes/deckhouse/client.go | 20 +++ internal/kubernetes/deckhouse/modules.go | 20 +++ internal/kubernetes/deckhouse/nodegroups.go | 20 +++ .../kubernetes/deckhouse/staticinstance.go | 20 +++ internal/kubernetes/storage/blockdevice.go | 20 +++ internal/kubernetes/storage/client.go | 20 +++ internal/kubernetes/storage/lvmvolumegroup.go | 20 +++ internal/kubernetes/storage/pvc.go | 20 +++ internal/kubernetes/storage/storageclass.go | 20 +++ internal/kubernetes/virtualization/client.go | 20 +++ .../virtualization/cluster_virtual_image.go | 20 +++ internal/kubernetes/virtualization/vdisk.go | 20 +++ internal/kubernetes/virtualization/vm.go | 20 +++ internal/kubernetes/virtualization/vmbd.go | 20 +++ main.go | 36 +++++ .../cluster_creation_suite_test.go} | 0 .../cluster_creation_test.go | 10 +- .../cluster_creation_test.yml | 0 32 files changed, 554 insertions(+), 193 deletions(-) rename ARCHITECTURE_ANALYSIS.md => ARCHITECTURE.md (84%) rename {pkg/testkit => internal}/cluster/cluster.go (90%) create mode 100644 internal/cluster/interface.go create mode 100644 internal/config/images.go create mode 100644 internal/kubernetes/apps/daemonset.go create mode 100644 internal/kubernetes/apps/deployment.go create mode 100644 internal/kubernetes/client.go create mode 100644 internal/kubernetes/core/namespace.go create mode 100644 internal/kubernetes/core/node.go create mode 100644 internal/kubernetes/core/pod.go create mode 100644 internal/kubernetes/core/service.go create mode 100644 internal/kubernetes/deckhouse/client.go create mode 100644 internal/kubernetes/deckhouse/modules.go create mode 100644 internal/kubernetes/deckhouse/nodegroups.go create mode 100644 internal/kubernetes/deckhouse/staticinstance.go create mode 100644 internal/kubernetes/storage/blockdevice.go create mode 100644 internal/kubernetes/storage/client.go create mode 100644 internal/kubernetes/storage/lvmvolumegroup.go create mode 100644 internal/kubernetes/storage/pvc.go create mode 100644 internal/kubernetes/storage/storageclass.go create mode 100644 internal/kubernetes/virtualization/client.go create mode 100644 internal/kubernetes/virtualization/cluster_virtual_image.go create mode 100644 internal/kubernetes/virtualization/vdisk.go create mode 100644 internal/kubernetes/virtualization/vm.go create mode 100644 internal/kubernetes/virtualization/vmbd.go create mode 100644 main.go rename tests/{integration_suite_test.go => cluster_creation/cluster_creation_suite_test.go} (100%) rename tests/{ => cluster_creation}/cluster_creation_test.go (91%) rename tests/{ => cluster_creation}/cluster_creation_test.yml (100%) diff --git a/ARCHITECTURE_ANALYSIS.md b/ARCHITECTURE.md similarity index 84% rename from ARCHITECTURE_ANALYSIS.md rename to ARCHITECTURE.md index f0c5451..6e73212 100644 --- a/ARCHITECTURE_ANALYSIS.md +++ b/ARCHITECTURE.md @@ -11,151 +11,10 @@ This document provides a deep analysis of the current `testkit_v2` codebase stru --- -## 1. Current Structure Analysis -### 1.1 Package Structure - -**Critical Finding**: All code is currently in a single package `integration`: -- `testkit_v2/tests/*` - All test files -- `testkit_v2/util/*` - All utility files - -This monolith package design causes: -- No encapsulation boundaries -- Global state scattered across files -- Hidden circular dependencies -- Difficulty in testing components in isolation -- Hard to understand code flow and dependencies - -### 1.2 File Organization - -#### Test Files (`testkit_v2/tests/`) -``` -tests/ -├── 00_healthcheck_test.go # Basic cluster health checks -├── 01_sds_nc_test.go # LVG (LVM Volume Group) operations -├── 03_sds_lv_test.go # PVC (Persistent Volume Claim) operations -├── 05_sds_node_configurator_test.go # Comprehensive LVM tests (thick/thin) -├── 99_finalizer_test.go # Cleanup tests -├── tools.go # Shared test utilities -└── data-exporter/ - └── base_test.go # Base test for data exporter feature -``` - -#### Utility Files (`testkit_v2/util/`) -``` -util/ -├── env.go # Environment config, flags, cluster types -├── filter.go # Filter/Where interfaces -├── kube_cluster_definitions.go # Cluster definition types (NEW) -├── kube_cluster.go # Cluster singleton/cache -├── kube_deckhouse_modules.go # Deckhouse module management -├── kube_deploy.go # Deployment/Service operations -├── kube_modules.go # Custom CRDs (SSHCredentials, StaticInstance) -├── kube_node.go # Node operations, execution -├── kube_secret.go # SSH credentials CRUD -├── kube_storage.go # Storage (BD, LVG, PVC, SC) -├── kube_tester.go # Test execution helpers -├── kube_vm_cluster.go # VM cluster creation, Deckhouse install -├── kube_vm.go # VM, VD, VMBD operations -├── kube.go # Core Kubernetes client setup -├── log.go # Logging utilities -├── ssh.go # SSH client operations -└── tools.go # Utility functions (retry, random) -``` +## 1. Target Architecture -### 1.3 Dependency Graph - -``` -Tests (integration package) - └─> util package (imported as "github.com/deckhouse/sds-e2e/util") - └─> Actually same package! Only different directory structure - -Current Dependencies: -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -kube_cluster.go (singleton/cache) - ├─> env.go (envInit, global vars) - ├─> kube.go (InitKCluster) - └─> ssh.go (GetSshClient, tunnel creation) - -kube.go (core client setup) - ├─> kube_modules.go (D8SchemeBuilder) - └─> Multiple Kubernetes API imports - -kube_storage.go (storage operations) - ├─> kube.go (KCluster type) - ├─> filter.go (filters) - └─> tools.go (RetrySec) - -kube_node.go (node operations) - ├─> kube.go (KCluster type) - ├─> kube_modules.go (StaticInstance CRD) - ├─> filter.go (filters) - └─> ssh.go (ExecNodeSsh) - -kube_vm_cluster.go (cluster creation) - ├─> env.go (global vars) - ├─> kube.go (InitKCluster) - ├─> kube_vm.go (VM operations) - ├─> kube_node.go (AddStaticNodes) - ├─> ssh.go (SSH operations) - └─> tools.go (retry utilities) - -kube_vm.go (VM operations) - ├─> kube.go (KCluster type) - ├─> filter.go (filters) - └─> tools.go (hashMd5) - -All files → env.go (global state!) -All files → log.go (logging) -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -``` - -### 1.4 Major Architectural Problems - -#### Problem 1: Global State Everywhere -- `env.go` contains ~50 global variables -- Package-level variables in multiple files (`clrCache`, `mx`, etc.) -- No dependency injection -- Hard to test in isolation -- Race conditions possible - -#### Problem 2: God Object (`KCluster`) -- `KCluster` struct has 60+ methods -- Violates Single Responsibility Principle -- Methods span multiple domains: - - Kubernetes API operations - - Node management - - Storage operations - - VM operations - - Module management - - Deployment management - -#### Problem 3: Mixed Concerns -- Business logic mixed with infrastructure -- Test utilities mixed with production code -- Configuration mixed with execution -- No clear separation of layers - -#### Problem 4: Poor Encapsulation -- Everything in one package = no private boundaries -- Internal implementation details exposed -- Can't hide complexity behind interfaces - -#### Problem 5: Circular Dependencies (Hidden) -- Files import each other indirectly -- Hidden cycles through globals -- `env.go` → everything, everything → `env.go` - -#### Problem 6: Testing Difficulties -- Can't mock dependencies (globals) -- Hard to create isolated test scenarios -- Test files use same package = can access internals incorrectly - ---- - -## 2. Target Architecture - -### 2.1 Package Structure +### 1.1 Package Structure ``` testkit_v2/ @@ -166,7 +25,7 @@ testkit_v2/ │ ├── config/ # Configuration management │ │ ├── env.go # Environment variables │ │ ├── flags.go # CLI flags -│ │ ├── cluster_types.go # Cluster type definitions +│ │ ├── types.go # Cluster type definitions │ │ └── images.go # OS image definitions │ │ │ ├── cluster/ # Cluster management diff --git a/go.mod b/go.mod index dcbefe6..f48d40f 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,4 @@ -module github.com/deckhouse/sds-e2e-tests +module github.com/deckhouse/storage-e2e go 1.24.2 diff --git a/pkg/testkit/cluster/cluster.go b/internal/cluster/cluster.go similarity index 90% rename from pkg/testkit/cluster/cluster.go rename to internal/cluster/cluster.go index 50fa30a..2ac8a57 100644 --- a/pkg/testkit/cluster/cluster.go +++ b/internal/cluster/cluster.go @@ -14,6 +14,22 @@ See the License for the specific language governing permissions and limitations under the License. */ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package cluster import ( @@ -28,8 +44,8 @@ import ( "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" - "github.com/deckhouse/sds-e2e-tests/internal/config" - "github.com/deckhouse/sds-e2e-tests/internal/infrastructure/ssh" + "github.com/deckhouse/storage-e2e/internal/config" + "github.com/deckhouse/storage-e2e/internal/infrastructure/ssh" ) // LoadClusterConfig loads and validates a cluster configuration from a YAML file diff --git a/internal/cluster/interface.go b/internal/cluster/interface.go new file mode 100644 index 0000000..e5062a2 --- /dev/null +++ b/internal/cluster/interface.go @@ -0,0 +1,34 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cluster + +// Cluster is the public interface for cluster operations +type Cluster interface { + // Core operations + Name() string + + // Resource operations + // TODO: Implement these interfaces + // Namespaces() NamespaceClient + // Nodes() NodeClient + // Pods() PodClient + // Storage() StorageClient + // Virtualization() VirtualizationClient + // Deckhouse() DeckhouseClient + + Close() error +} diff --git a/internal/config/flags.go b/internal/config/flags.go index c6664f7..863506a 100644 --- a/internal/config/flags.go +++ b/internal/config/flags.go @@ -4,48 +4,27 @@ package config import ( "flag" - "fmt" - "os" ) var ( // alwaysUseExisting indicates to always use an existing cluster if available - alwaysUseExisting = flag.Bool("alwaysUseExisting", false, "Always use an existing cluster if available") + alwaysUseExisting = flag.Bool("always-use-existing", false, "Always use an existing cluster if available") // alwaysCreateNew indicates to always create a new cluster - alwaysCreateNew = flag.Bool("alwaysCreateNew", false, "Always create a new cluster") + alwaysCreateNew = flag.Bool("always-create-new", false, "Always create a new cluster") ) -// init registers flags, aliases, and validates that at least one of alwaysUseExisting or alwaysCreateNew is set +// Just a dummy for flags to avoid compiler error func init() { - // Register short aliases for flags - flag.BoolVar(alwaysUseExisting, "e", false, "Alias for -alwaysUseExisting") - flag.BoolVar(alwaysCreateNew, "n", false, "Alias for -alwaysCreateNew") - - flag.Usage = usage - flag.Parse() - - // Validate that at least one of the flags is set - if (!*alwaysUseExisting && !*alwaysCreateNew) || (*alwaysUseExisting && *alwaysCreateNew) { - fmt.Fprintf(os.Stderr, "Error: Either --alwaysUseExisting (-e) or --alwaysCreateNew (-n) must be set, but not both\n\n") - flag.Usage() - os.Exit(1) - } -} - -// usage prints the usage information for the command-line flags -func usage() { - fmt.Fprintf(os.Stderr, "Usage: %s [options]\n\n", os.Args[0]) - fmt.Fprintf(os.Stderr, "Options:\n") - flag.PrintDefaults() - fmt.Fprintf(os.Stderr, "\nNote: Either -alwaysUseExisting or -alwaysCreateNew must be set, but not both together!\n") + _ = *alwaysUseExisting + _ = *alwaysCreateNew } // AlwaysUseExisting returns the value of the alwaysUseExisting flag func AlwaysUseExisting() bool { - return *alwaysUseExisting + return false } // AlwaysCreateNew returns the value of the alwaysCreateNew flag func AlwaysCreateNew() bool { - return *alwaysCreateNew + return true } diff --git a/internal/config/images.go b/internal/config/images.go new file mode 100644 index 0000000..9b849e6 --- /dev/null +++ b/internal/config/images.go @@ -0,0 +1,30 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package config + +// OSTypeMap maps OS type names to their definitions +var OSTypeMap = map[string]OSType{ + "Ubuntu 22.04 6.2.0-39-generic": { + ImageURL: "https://cloud-images.ubuntu.com/jammy/current/jammy-server-cloudimg-amd64.img", + KernelVersion: "6.2.0-39-generic", + }, + "Ubuntu 24.04 6.8.0-53-generic": { + ImageURL: "https://cloud-images.ubuntu.com/noble/current/noble-server-cloudimg-amd64.img", + KernelVersion: "6.8.0-53-generic", + }, +} + diff --git a/internal/config/types.go b/internal/config/types.go index 7a19665..03f7798 100644 --- a/internal/config/types.go +++ b/internal/config/types.go @@ -47,19 +47,6 @@ type OSType struct { KernelVersion string } -var ( - OSTypeMap = map[string]OSType{ - "Ubuntu 22.04 6.2.0-39-generic": { - ImageURL: "https://cloud-images.ubuntu.com/jammy/current/jammy-server-cloudimg-amd64.img", - KernelVersion: "6.2.0-39-generic", - }, - "Ubuntu 24.04 6.8.0-53-generic": { - ImageURL: "https://cloud-images.ubuntu.com/noble/current/noble-server-cloudimg-amd64.img", - KernelVersion: "6.8.0-53-generic", - }, - } -) - // AuthMethod represents the authentication method type AuthMethod string diff --git a/internal/kubernetes/apps/daemonset.go b/internal/kubernetes/apps/daemonset.go new file mode 100644 index 0000000..b149099 --- /dev/null +++ b/internal/kubernetes/apps/daemonset.go @@ -0,0 +1,20 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package apps + +// TODO: Implement daemonset operations + diff --git a/internal/kubernetes/apps/deployment.go b/internal/kubernetes/apps/deployment.go new file mode 100644 index 0000000..c7b48b7 --- /dev/null +++ b/internal/kubernetes/apps/deployment.go @@ -0,0 +1,20 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package apps + +// TODO: Implement deployment operations + diff --git a/internal/kubernetes/client.go b/internal/kubernetes/client.go new file mode 100644 index 0000000..cec4144 --- /dev/null +++ b/internal/kubernetes/client.go @@ -0,0 +1,20 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kubernetes + +// TODO: Implement base client setup and scheme registration + diff --git a/internal/kubernetes/core/namespace.go b/internal/kubernetes/core/namespace.go new file mode 100644 index 0000000..8abfbbd --- /dev/null +++ b/internal/kubernetes/core/namespace.go @@ -0,0 +1,20 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package core + +// TODO: Implement namespace operations + diff --git a/internal/kubernetes/core/node.go b/internal/kubernetes/core/node.go new file mode 100644 index 0000000..ea7097c --- /dev/null +++ b/internal/kubernetes/core/node.go @@ -0,0 +1,20 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package core + +// TODO: Implement node operations + diff --git a/internal/kubernetes/core/pod.go b/internal/kubernetes/core/pod.go new file mode 100644 index 0000000..f3e160c --- /dev/null +++ b/internal/kubernetes/core/pod.go @@ -0,0 +1,20 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package core + +// TODO: Implement pod operations + diff --git a/internal/kubernetes/core/service.go b/internal/kubernetes/core/service.go new file mode 100644 index 0000000..50b8497 --- /dev/null +++ b/internal/kubernetes/core/service.go @@ -0,0 +1,20 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package core + +// TODO: Implement service operations + diff --git a/internal/kubernetes/deckhouse/client.go b/internal/kubernetes/deckhouse/client.go new file mode 100644 index 0000000..af20937 --- /dev/null +++ b/internal/kubernetes/deckhouse/client.go @@ -0,0 +1,20 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deckhouse + +// TODO: Implement Deckhouse client interface + diff --git a/internal/kubernetes/deckhouse/modules.go b/internal/kubernetes/deckhouse/modules.go new file mode 100644 index 0000000..06ca95a --- /dev/null +++ b/internal/kubernetes/deckhouse/modules.go @@ -0,0 +1,20 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deckhouse + +// TODO: Implement Deckhouse module operations + diff --git a/internal/kubernetes/deckhouse/nodegroups.go b/internal/kubernetes/deckhouse/nodegroups.go new file mode 100644 index 0000000..275a1f4 --- /dev/null +++ b/internal/kubernetes/deckhouse/nodegroups.go @@ -0,0 +1,20 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deckhouse + +// TODO: Implement nodegroup operations + diff --git a/internal/kubernetes/deckhouse/staticinstance.go b/internal/kubernetes/deckhouse/staticinstance.go new file mode 100644 index 0000000..a4d9df7 --- /dev/null +++ b/internal/kubernetes/deckhouse/staticinstance.go @@ -0,0 +1,20 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deckhouse + +// TODO: Implement static instance operations + diff --git a/internal/kubernetes/storage/blockdevice.go b/internal/kubernetes/storage/blockdevice.go new file mode 100644 index 0000000..54c04ca --- /dev/null +++ b/internal/kubernetes/storage/blockdevice.go @@ -0,0 +1,20 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package storage + +// TODO: Implement blockdevice operations + diff --git a/internal/kubernetes/storage/client.go b/internal/kubernetes/storage/client.go new file mode 100644 index 0000000..5b5f234 --- /dev/null +++ b/internal/kubernetes/storage/client.go @@ -0,0 +1,20 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package storage + +// TODO: Implement storage client interface + diff --git a/internal/kubernetes/storage/lvmvolumegroup.go b/internal/kubernetes/storage/lvmvolumegroup.go new file mode 100644 index 0000000..d43ac31 --- /dev/null +++ b/internal/kubernetes/storage/lvmvolumegroup.go @@ -0,0 +1,20 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package storage + +// TODO: Implement LVM volume group operations + diff --git a/internal/kubernetes/storage/pvc.go b/internal/kubernetes/storage/pvc.go new file mode 100644 index 0000000..6412ff9 --- /dev/null +++ b/internal/kubernetes/storage/pvc.go @@ -0,0 +1,20 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package storage + +// TODO: Implement PVC operations + diff --git a/internal/kubernetes/storage/storageclass.go b/internal/kubernetes/storage/storageclass.go new file mode 100644 index 0000000..161c319 --- /dev/null +++ b/internal/kubernetes/storage/storageclass.go @@ -0,0 +1,20 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package storage + +// TODO: Implement storageclass operations + diff --git a/internal/kubernetes/virtualization/client.go b/internal/kubernetes/virtualization/client.go new file mode 100644 index 0000000..7bb06b7 --- /dev/null +++ b/internal/kubernetes/virtualization/client.go @@ -0,0 +1,20 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package virtualization + +// TODO: Implement virtualization client interface + diff --git a/internal/kubernetes/virtualization/cluster_virtual_image.go b/internal/kubernetes/virtualization/cluster_virtual_image.go new file mode 100644 index 0000000..e6bb7fc --- /dev/null +++ b/internal/kubernetes/virtualization/cluster_virtual_image.go @@ -0,0 +1,20 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package virtualization + +// TODO: Implement cluster virtual image operations + diff --git a/internal/kubernetes/virtualization/vdisk.go b/internal/kubernetes/virtualization/vdisk.go new file mode 100644 index 0000000..b7326cb --- /dev/null +++ b/internal/kubernetes/virtualization/vdisk.go @@ -0,0 +1,20 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package virtualization + +// TODO: Implement virtual disk operations + diff --git a/internal/kubernetes/virtualization/vm.go b/internal/kubernetes/virtualization/vm.go new file mode 100644 index 0000000..d77e4dc --- /dev/null +++ b/internal/kubernetes/virtualization/vm.go @@ -0,0 +1,20 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package virtualization + +// TODO: Implement VM operations + diff --git a/internal/kubernetes/virtualization/vmbd.go b/internal/kubernetes/virtualization/vmbd.go new file mode 100644 index 0000000..35329f6 --- /dev/null +++ b/internal/kubernetes/virtualization/vmbd.go @@ -0,0 +1,20 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package virtualization + +// TODO: Implement VM block device operations + diff --git a/main.go b/main.go new file mode 100644 index 0000000..d0cb124 --- /dev/null +++ b/main.go @@ -0,0 +1,36 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "flag" + "fmt" + "os" + + "github.com/deckhouse/storage-e2e/internal/config" +) + +func main() { + // config.ParseFlags() // TODO - investigate flag parsing with go test later. + + // Validate that at least one of the flags is set + if (!config.AlwaysUseExisting() && !config.AlwaysCreateNew()) || (config.AlwaysUseExisting() && config.AlwaysCreateNew()) { + fmt.Fprintf(os.Stderr, "Error: Either --always-use-existing or --always-create-new must be set, but not both\n\n") + flag.Usage() + os.Exit(1) + } +} diff --git a/tests/integration_suite_test.go b/tests/cluster_creation/cluster_creation_suite_test.go similarity index 100% rename from tests/integration_suite_test.go rename to tests/cluster_creation/cluster_creation_suite_test.go diff --git a/tests/cluster_creation_test.go b/tests/cluster_creation/cluster_creation_test.go similarity index 91% rename from tests/cluster_creation_test.go rename to tests/cluster_creation/cluster_creation_test.go index 99ed5d0..972e165 100644 --- a/tests/cluster_creation_test.go +++ b/tests/cluster_creation/cluster_creation_test.go @@ -19,14 +19,14 @@ package integration import ( "fmt" + "k8s.io/client-go/rest" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - "k8s.io/client-go/rest" - // "github.com/deckhouse/sds-e2e-tests/pkg/cluster" - "github.com/deckhouse/sds-e2e-tests/internal/config" - "github.com/deckhouse/sds-e2e-tests/internal/infrastructure/ssh" - "github.com/deckhouse/sds-e2e-tests/pkg/testkit/cluster" + "github.com/deckhouse/storage-e2e/internal/cluster" + "github.com/deckhouse/storage-e2e/internal/config" + "github.com/deckhouse/storage-e2e/internal/infrastructure/ssh" ) var _ = Describe("Cluster Creation", func() { diff --git a/tests/cluster_creation_test.yml b/tests/cluster_creation/cluster_creation_test.yml similarity index 100% rename from tests/cluster_creation_test.yml rename to tests/cluster_creation/cluster_creation_test.yml From 704011f91b3a6e297707f579ea9e9926ee3a0bee Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Fri, 12 Dec 2025 19:29:02 +0300 Subject: [PATCH 04/48] kubeconfig is finally obtained and stored in temp --- README.md | 102 +++--------------- internal/cluster/cluster.go | 93 +++++++++++++--- internal/config/env.go | 6 ++ internal/infrastructure/ssh/client.go | 4 +- .../cluster_creation/cluster_creation_test.go | 10 +- 5 files changed, 105 insertions(+), 110 deletions(-) diff --git a/README.md b/README.md index 586c400..98219ad 100644 --- a/README.md +++ b/README.md @@ -1,101 +1,23 @@ -# E2E Tests +# E2E tests -This package contains end-to-end tests for SDS (Storage for Deckhouse Services). +## Quick start guide -## Architecture +### Prerequisites -The package follows a clean, modular architecture: - -- **`internal/config/`** - Configuration management (environment variables, cluster definitions, module configs) -- **`internal/cluster/`** - Cluster lifecycle management (manager, builder) -- **`internal/kubernetes/`** - Kubernetes API clients (virtualization, deckhouse modules) -- **`internal/infrastructure/`** - Infrastructure operations (SSH, VM provisioning) -- **`internal/logger/`** - Logging utilities -- **`internal/utils/`** - Utility functions (retry, crypto) -- **`pkg/cluster/`** - Public cluster interface -- **`pkg/testkit/`** - Public test helpers -- **`tests/`** - Test files using Ginkgo - -## Cluster Creation Workflow - -The cluster builder implements the following workflow: - -1. **Connect to Base Cluster**: Get kubeconfig of the base Deckhouse cluster and connect via SSH -2. **Enable Virtualization Module**: Enable Deckhouse Virtualization Platform module on base cluster -3. **Create Virtual Machines**: Create VMs as defined in cluster configuration -4. **Deploy Deckhouse**: Connect to master VM via SSH and deploy Deckhouse Kubernetes Platform -5. **Get Kubeconfig**: Retrieve kubeconfig of the nested cluster -6. **Enable Modules**: Enable and configure required modules in the nested cluster - -## Quick start - Running tests -// TODO amarkov: I strongly recommend add a full example how to run tests with all environments, arguments and commands. - -## Writing Tests - -Tests are written using Ginkgo. Keep test files simple - they should only contain test logic. Business logic is in other modules. - -Example: - -```go -var _ = Describe("Cluster Creation", func() { - var ( - ctx context.Context - baseCluster cluster.Cluster - testCluster cluster.Cluster - clusterCfg *config.DKPClusterConfig - ) - - BeforeEach(func() { - ctx = context.Background() - baseCluster, _ = testkit.GetCluster(ctx, cfg.BaseCluster.KubeConfig, "") - clusterCfg = &config.DKPClusterConfig{ - // Define your cluster configuration - } - }) - - It("should create a nested Kubernetes cluster", func() { - testCluster, err := testkit.BuildTestCluster(ctx, baseCluster, clusterCfg) - Expect(err).NotTo(HaveOccurred()) - - err = testCluster.EnsureReady(ctx) - Expect(err).NotTo(HaveOccurred()) - }) -}) -``` - -## Configuration - -Configuration is loaded from environment variables: - -- `BASE_KUBECONFIG` - Path to base cluster kubeconfig -- `BASE_SSH_HOST` - Base cluster SSH host -- `BASE_SSH_USER` - Base cluster SSH user -- `BASE_SSH_KEY` - Base cluster SSH key path -- `NESTED_KUBECONFIG` - Path for nested cluster kubeconfig -- `NESTED_SSH_HOST` - Nested cluster SSH host -- And more... See `internal/config/config.go` for full list - -## Running Tests +#### Required exports ```bash -go test ./tests/... -v +# Passphrase of the private key used to connect to the base cluster +export SSH_PASSPHRASE='passphrase' + +# Used in case if the code cannot obtain kubeconfig from master itself because e.g. password is required in sudo +export KUBE_CONFIG_PATH='/path/to/kubeconfig/file' ``` -Or with Ginkgo: +#### Running a test example ```bash -ginkgo ./tests/... +go test -v ./tests/cluster_creation -count=1 +# count=1 prevents go test from using cached test results ``` -## Structure - -- `tests/` - Test files -- `internal/` - Internal packages (not importable outside) -- `pkg/` - Public API (importable) - -## Notes - -- The code is independent of legacy code (no imports from `legacy/`) -- Test files are simple and focus on test logic only -- Business logic is in separate modules -- The architecture allows for easy extension and testing diff --git a/internal/cluster/cluster.go b/internal/cluster/cluster.go index 2ac8a57..fdc43a3 100644 --- a/internal/cluster/cluster.go +++ b/internal/cluster/cluster.go @@ -158,6 +158,37 @@ func validateNode(node config.ClusterNode, isMaster bool) error { return nil } +// expandPath expands ~ to home directory and resolves symlinks if present +func expandPath(path string) (string, error) { + var expandedPath string + + // Expand ~ to home directory + if strings.HasPrefix(path, "~") { + homeDir, err := os.UserHomeDir() + if err != nil { + return "", fmt.Errorf("failed to get home directory: %w", err) + } + + if path == "~" { + expandedPath = homeDir + } else { + expandedPath = filepath.Join(homeDir, strings.TrimPrefix(path, "~/")) + } + } else { + expandedPath = path + } + + // Resolve symlinks if present (usually it won't be a symlink) + // If resolution fails (e.g., path doesn't exist or is not a symlink), use the expanded path + resolvedPath, err := filepath.EvalSymlinks(expandedPath) + if err != nil { + // Path might not exist yet or might not be a symlink - use expanded path as-is + return expandedPath, nil + } + + return resolvedPath, nil +} + // GetKubeconfig connects to the master node via SSH, retrieves kubeconfig from /etc/kubernetes/admin.conf, // and returns a rest.Config that can be used with Kubernetes clients. // If sshClient is provided, it will be used instead of creating a new connection. @@ -177,13 +208,6 @@ func GetKubeconfig(masterIP, user, keyPath string, sshClient ssh.SSHClient) (*re defer sshClient.Close() } - // Read kubeconfig from /etc/kubernetes/admin.conf - ctx := context.Background() - kubeconfigContent, err := sshClient.Exec(ctx, "sudo cat /etc/kubernetes/admin.conf") - if err != nil { - return nil, fmt.Errorf("failed to read kubeconfig from master: %w", err) - } - // Get the test file name from the caller _, callerFile, _, ok := runtime.Caller(1) if !ok { @@ -191,25 +215,62 @@ func GetKubeconfig(masterIP, user, keyPath string, sshClient ssh.SSHClient) (*re } testFileName := strings.TrimSuffix(filepath.Base(callerFile), filepath.Ext(callerFile)) - // Determine the temp directory path relative to e2e-tests - // callerFile is in tests/ directory, so we go up one level to reach e2e-tests/ - e2eTestsDir := filepath.Join(filepath.Dir(callerFile), "..") - tempDir := filepath.Join(e2eTestsDir, "temp", testFileName) + // Determine the temp directory path in the repo root + // callerFile is in tests/{test-dir}/, so we go up two levels to reach repo root + callerDir := filepath.Dir(callerFile) + repoRootPath := filepath.Join(callerDir, "..", "..") + // Resolve the .. parts to get absolute path + repoRoot, err := filepath.Abs(repoRootPath) + if err != nil { + return nil, fmt.Errorf("failed to resolve repo root path: %w", err) + } + tempDir := filepath.Join(repoRoot, "temp", testFileName) // Create temp directory if it doesn't exist if err := os.MkdirAll(tempDir, 0755); err != nil { return nil, fmt.Errorf("failed to create temp directory %s: %w", tempDir, err) } - // Create kubeconfig file in temp directory - kubeconfigPath := filepath.Join(tempDir, fmt.Sprintf("kubeconfig-%s.yaml", masterIP)) + // Create kubeconfig file path in temp directory + kubeconfigPath := filepath.Join(tempDir, fmt.Sprintf("kubeconfig-%s.yml", masterIP)) + + var kubeconfigContent []byte + + // Try to read kubeconfig from /etc/kubernetes/admin.conf via SSH + ctx := context.Background() + kubeconfigContentStr, err := sshClient.Exec(ctx, "sudo -n cat /etc/kubernetes/admin.conf") + if err != nil { + // SSH retrieval failed (likely due to sudo password requirement) + // Try to use KUBE_CONFIG_PATH if set, otherwise notify user + if config.KubeConfigPath != "" { + // Expand path to handle ~ and resolve symlinks if present + resolvedPath, err := expandPath(config.KubeConfigPath) + if err != nil { + return nil, fmt.Errorf("failed to expand KUBE_CONFIG_PATH (%s): %w", config.KubeConfigPath, err) + } + // Read kubeconfig content from the provided file + kubeconfigContent, err = os.ReadFile(resolvedPath) + if err != nil { + return nil, fmt.Errorf("failed to read kubeconfig from KUBE_CONFIG_PATH (%s): %w", resolvedPath, err) + } + } else { + // KUBE_CONFIG_PATH not set, notify user and fail + return nil, fmt.Errorf("failed to read kubeconfig from master (this may occur if sudo requires a password). "+ + "Please download the kubeconfig file manually and provide its full path via KUBE_CONFIG_PATH environment variable. "+ + "Original error: %w", err) + } + } else { + // SSH succeeded - use the content from SSH + kubeconfigContent = []byte(kubeconfigContentStr) + } + + // Write kubeconfig content to temp file (always copy to temp, regardless of source) kubeconfigFile, err := os.Create(kubeconfigPath) if err != nil { return nil, fmt.Errorf("failed to create kubeconfig file %s: %w", kubeconfigPath, err) } - // Write kubeconfig content to file - if _, err := kubeconfigFile.Write([]byte(kubeconfigContent)); err != nil { + if _, err := kubeconfigFile.Write(kubeconfigContent); err != nil { kubeconfigFile.Close() return nil, fmt.Errorf("failed to write kubeconfig to file: %w", err) } @@ -217,7 +278,7 @@ func GetKubeconfig(masterIP, user, keyPath string, sshClient ssh.SSHClient) (*re return nil, fmt.Errorf("failed to close kubeconfig file: %w", err) } - // Build rest.Config from the kubeconfig file + // Build rest.Config from the kubeconfig file in temp directory config, err := clientcmd.BuildConfigFromFlags("", kubeconfigPath) if err != nil { return nil, fmt.Errorf("failed to build config from kubeconfig: %w", err) diff --git a/internal/config/env.go b/internal/config/env.go index e06a767..37c6767 100644 --- a/internal/config/env.go +++ b/internal/config/env.go @@ -7,5 +7,11 @@ import ( ) var ( + // ssh passphrase for ssh private key used to connect to base cluster SSHPassphrase = os.Getenv("SSH_PASSPHRASE") + + // KubeConfigPath is the path to a kubeconfig file. If SSH retrieval fails (e.g., sudo requires password), + // this path will be used as a fallback. If not set and SSH fails, the user will be notified to download + // the kubeconfig manually and set this environment variable. + KubeConfigPath = os.Getenv("KUBE_CONFIG_PATH") ) diff --git a/internal/infrastructure/ssh/client.go b/internal/infrastructure/ssh/client.go index 6d3dfe9..5dea714 100644 --- a/internal/infrastructure/ssh/client.go +++ b/internal/infrastructure/ssh/client.go @@ -92,13 +92,15 @@ func createSSHConfig(user, keyPath string) (*ssh.ClientConfig, error) { return nil, fmt.Errorf("unable to read private key %s: %w", expandedKeyPath, err) } + // Always try parsing without passphrase first signer, err := ssh.ParsePrivateKey(key) if err != nil { + // Only if the error specifically indicates passphrase protection, try with passphrase if !strings.Contains(err.Error(), "ssh: this private key is passphrase protected") { return nil, fmt.Errorf("unable to parse private key: %w", err) } - // Try to get passphrase from environment variable first + // Key is passphrase-protected, get passphrase var pass []byte if envPass := os.Getenv("SSH_PASSPHRASE"); envPass != "" { pass = []byte(envPass) diff --git a/tests/cluster_creation/cluster_creation_test.go b/tests/cluster_creation/cluster_creation_test.go index 972e165..2cf74e9 100644 --- a/tests/cluster_creation/cluster_creation_test.go +++ b/tests/cluster_creation/cluster_creation_test.go @@ -32,9 +32,9 @@ import ( var _ = Describe("Cluster Creation", func() { var ( yamlConfigFilename string = "cluster_creation_test.yml" - baseClusterMasterIP string = "10.0.0.181" - baseClusterUser string = "w-ansible" - baseClusterSSHPrivateKey string = "~/.ssh/aya_rsa" + baseClusterMasterIP string = "172.17.1.67" + baseClusterUser string = "tfadm" + baseClusterSSHPrivateKey string = "~/.ssh/id_rsa" ) BeforeEach(func(ctx SpecContext) { @@ -61,6 +61,10 @@ var _ = Describe("Cluster Creation", func() { Expect(err).NotTo(HaveOccurred()) }) + By("Establishing ssh tunnel to the base cluster with 6445 port forwarding", func() { + + }) + _ = sshClient // TODO: use sshClient _ = clusterDefinition // TODO: use clusterDefinition _ = kubeconfig // TODO: use kubeconfig From d768fd3273e90c46c7ee1b030531579cec33ecdf Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Fri, 12 Dec 2025 20:44:29 +0300 Subject: [PATCH 05/48] ssh tunnel established to the base cluster's master --- internal/cluster/cluster.go | 77 +++++++++-- internal/infrastructure/net/port.go | 35 +++++ internal/infrastructure/ssh/client.go | 87 ++++++------ internal/infrastructure/ssh/interface.go | 10 +- internal/infrastructure/ssh/tunnel.go | 124 ++++++++++++++++++ internal/infrastructure/ssh/types.go | 24 ++++ .../cluster_creation/cluster_creation_test.go | 67 ++++++---- 7 files changed, 342 insertions(+), 82 deletions(-) create mode 100644 internal/infrastructure/net/port.go create mode 100644 internal/infrastructure/ssh/tunnel.go create mode 100644 internal/infrastructure/ssh/types.go diff --git a/internal/cluster/cluster.go b/internal/cluster/cluster.go index fdc43a3..6b6ca7b 100644 --- a/internal/cluster/cluster.go +++ b/internal/cluster/cluster.go @@ -190,17 +190,17 @@ func expandPath(path string) (string, error) { } // GetKubeconfig connects to the master node via SSH, retrieves kubeconfig from /etc/kubernetes/admin.conf, -// and returns a rest.Config that can be used with Kubernetes clients. +// and returns a rest.Config that can be used with Kubernetes clients, along with the path to the kubeconfig file. // If sshClient is provided, it will be used instead of creating a new connection. // If sshClient is nil, a new connection will be created and closed automatically. -func GetKubeconfig(masterIP, user, keyPath string, sshClient ssh.SSHClient) (*rest.Config, error) { +func GetKubeconfig(masterIP, user, keyPath string, sshClient ssh.SSHClient) (*rest.Config, string, error) { // Create SSH client if not provided shouldClose := false if sshClient == nil { var err error sshClient, err = ssh.NewClient(user, masterIP, keyPath) if err != nil { - return nil, fmt.Errorf("failed to create SSH client: %w", err) + return nil, "", fmt.Errorf("failed to create SSH client: %w", err) } shouldClose = true } @@ -211,7 +211,7 @@ func GetKubeconfig(masterIP, user, keyPath string, sshClient ssh.SSHClient) (*re // Get the test file name from the caller _, callerFile, _, ok := runtime.Caller(1) if !ok { - return nil, fmt.Errorf("failed to get caller file information") + return nil, "", fmt.Errorf("failed to get caller file information") } testFileName := strings.TrimSuffix(filepath.Base(callerFile), filepath.Ext(callerFile)) @@ -222,13 +222,13 @@ func GetKubeconfig(masterIP, user, keyPath string, sshClient ssh.SSHClient) (*re // Resolve the .. parts to get absolute path repoRoot, err := filepath.Abs(repoRootPath) if err != nil { - return nil, fmt.Errorf("failed to resolve repo root path: %w", err) + return nil, "", fmt.Errorf("failed to resolve repo root path: %w", err) } tempDir := filepath.Join(repoRoot, "temp", testFileName) // Create temp directory if it doesn't exist if err := os.MkdirAll(tempDir, 0755); err != nil { - return nil, fmt.Errorf("failed to create temp directory %s: %w", tempDir, err) + return nil, "", fmt.Errorf("failed to create temp directory %s: %w", tempDir, err) } // Create kubeconfig file path in temp directory @@ -246,16 +246,16 @@ func GetKubeconfig(masterIP, user, keyPath string, sshClient ssh.SSHClient) (*re // Expand path to handle ~ and resolve symlinks if present resolvedPath, err := expandPath(config.KubeConfigPath) if err != nil { - return nil, fmt.Errorf("failed to expand KUBE_CONFIG_PATH (%s): %w", config.KubeConfigPath, err) + return nil, "", fmt.Errorf("failed to expand KUBE_CONFIG_PATH (%s): %w", config.KubeConfigPath, err) } // Read kubeconfig content from the provided file kubeconfigContent, err = os.ReadFile(resolvedPath) if err != nil { - return nil, fmt.Errorf("failed to read kubeconfig from KUBE_CONFIG_PATH (%s): %w", resolvedPath, err) + return nil, "", fmt.Errorf("failed to read kubeconfig from KUBE_CONFIG_PATH (%s): %w", resolvedPath, err) } } else { // KUBE_CONFIG_PATH not set, notify user and fail - return nil, fmt.Errorf("failed to read kubeconfig from master (this may occur if sudo requires a password). "+ + return nil, "", fmt.Errorf("failed to read kubeconfig from master (this may occur if sudo requires a password). "+ "Please download the kubeconfig file manually and provide its full path via KUBE_CONFIG_PATH environment variable. "+ "Original error: %w", err) } @@ -267,22 +267,71 @@ func GetKubeconfig(masterIP, user, keyPath string, sshClient ssh.SSHClient) (*re // Write kubeconfig content to temp file (always copy to temp, regardless of source) kubeconfigFile, err := os.Create(kubeconfigPath) if err != nil { - return nil, fmt.Errorf("failed to create kubeconfig file %s: %w", kubeconfigPath, err) + return nil, "", fmt.Errorf("failed to create kubeconfig file %s: %w", kubeconfigPath, err) } if _, err := kubeconfigFile.Write(kubeconfigContent); err != nil { kubeconfigFile.Close() - return nil, fmt.Errorf("failed to write kubeconfig to file: %w", err) + return nil, "", fmt.Errorf("failed to write kubeconfig to file: %w", err) } if err := kubeconfigFile.Close(); err != nil { - return nil, fmt.Errorf("failed to close kubeconfig file: %w", err) + return nil, "", fmt.Errorf("failed to close kubeconfig file: %w", err) } // Build rest.Config from the kubeconfig file in temp directory config, err := clientcmd.BuildConfigFromFlags("", kubeconfigPath) if err != nil { - return nil, fmt.Errorf("failed to build config from kubeconfig: %w", err) + return nil, "", fmt.Errorf("failed to build config from kubeconfig: %w", err) } - return config, nil + return config, kubeconfigPath, nil +} + +// UpdateKubeconfigPort updates the kubeconfig file to use the specified local port +// It replaces the server URL with 127.0.0.1:port +func UpdateKubeconfigPort(kubeconfigPath string, localPort int) error { + content, err := os.ReadFile(kubeconfigPath) + if err != nil { + return fmt.Errorf("failed to read kubeconfig file: %w", err) + } + + contentStr := string(content) + // Replace server URL with localhost and new port + // Common patterns: server: https://:6445 or server: https://127.0.0.1:6445 + // Also handle: server: https://:6443 (standard k8s port) + lines := strings.Split(contentStr, "\n") + updated := false + for i, line := range lines { + trimmedLine := strings.TrimSpace(line) + if strings.HasPrefix(trimmedLine, "server:") { + // Replace the entire server URL with 127.0.0.1:port + // Pattern: server: https://: + if strings.Contains(trimmedLine, "https://") { + // Find the URL part and replace it + urlStart := strings.Index(trimmedLine, "https://") + if urlStart != -1 { + // Replace the URL with localhost:port + // Preserve any indentation before "server:" + indent := "" + for j := 0; j < len(line) && (line[j] == ' ' || line[j] == '\t'); j++ { + indent += string(line[j]) + } + newURL := fmt.Sprintf("https://127.0.0.1:%d", localPort) + lines[i] = indent + "server: " + newURL + updated = true + } + } + } + } + + if !updated { + return fmt.Errorf("could not find server URL in kubeconfig to update") + } + + newContent := strings.Join(lines, "\n") + if err := os.WriteFile(kubeconfigPath, []byte(newContent), 0600); err != nil { + return fmt.Errorf("failed to write updated kubeconfig: %w", err) + } + + return nil } diff --git a/internal/infrastructure/net/port.go b/internal/infrastructure/net/port.go new file mode 100644 index 0000000..073dd6a --- /dev/null +++ b/internal/infrastructure/net/port.go @@ -0,0 +1,35 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package net + +import ( + "fmt" + "net" +) + +// FindFreePort finds a free port starting from startPort and incrementing until a free port is found +func FindFreePort(startPort int) (int, error) { + for port := startPort; port < startPort+100; port++ { + addr, err := net.Listen("tcp", fmt.Sprintf("127.0.0.1:%d", port)) + if err == nil { + addr.Close() + return port, nil + } + } + return 0, fmt.Errorf("could not find a free port starting from %d", startPort) +} + diff --git a/internal/infrastructure/ssh/client.go b/internal/infrastructure/ssh/client.go index 5dea714..536702c 100644 --- a/internal/infrastructure/ssh/client.go +++ b/internal/infrastructure/ssh/client.go @@ -37,11 +37,6 @@ type client struct { sshClient *ssh.Client } -// NewFactory creates a new SSH factory -func NewFactory() SSHFactory { - return &factory{} -} - // readPassword reads a password from the terminal func readPassword(prompt string) ([]byte, error) { fmt.Fprint(os.Stderr, prompt) @@ -149,49 +144,71 @@ func (c *client) Create(user, host, keyPath string) (SSHClient, error) { return &client{sshClient: sshClient}, nil } -// CreateForward creates an SSH client with port forwarding -func (c *client) CreateForward(user, host, keyPath string, localPort, remotePort string) (SSHClient, error) { - // First create a regular connection - baseClient, err := c.Create(user, host, keyPath) - if err != nil { - return nil, err - } - - // Set up port forwarding - baseClientImpl := baseClient.(*client) - listener, err := net.Listen("tcp", "localhost:"+localPort) +// StartTunnel starts an SSH tunnel with port forwarding from local to remote +// It returns a function to stop the tunnel and an error if the tunnel fails to start +func (c *client) StartTunnel(localPort, remotePort string) (func() error, error) { + listener, err := net.Listen("tcp", "127.0.0.1:"+localPort) if err != nil { - baseClientImpl.Close() return nil, fmt.Errorf("failed to listen on local port %s: %w", localPort, err) } + stopChan := make(chan struct{}) + go func() { + defer listener.Close() for { - localConn, err := listener.Accept() - if err != nil { + // Check if we should stop before accepting + select { + case <-stopChan: return + default: } - remoteConn, err := baseClientImpl.sshClient.Dial("tcp", "localhost:"+remotePort) + // Set a deadline for Accept to allow periodic checking of stopChan + localConn, err := listener.Accept() if err != nil { - localConn.Close() - continue + // Listener closed or error occurred + select { + case <-stopChan: + return + default: + // Continue if not stopped + continue + } } go func() { - io.Copy(localConn, remoteConn) - localConn.Close() - remoteConn.Close() - }() - go func() { - io.Copy(remoteConn, localConn) - localConn.Close() - remoteConn.Close() + defer localConn.Close() + remoteConn, err := c.sshClient.Dial("tcp", "127.0.0.1:"+remotePort) + if err != nil { + // Connection failed, just return - the error will be visible to the client + return + } + defer remoteConn.Close() + + // Copy data bidirectionally + done := make(chan struct{}, 2) + go func() { + io.Copy(localConn, remoteConn) + done <- struct{}{} + }() + go func() { + io.Copy(remoteConn, localConn) + done <- struct{}{} + }() + + // Wait for either direction to finish + <-done }() } }() - return baseClient, nil + stop := func() error { + close(stopChan) + return listener.Close() + } + + return stop, nil } // Exec executes a command on the remote host @@ -260,11 +277,3 @@ func NewClient(user, host, keyPath string) (SSHClient, error) { var c client return c.Create(user, host, keyPath) } - -// factory implements Factory interface -type factory struct{} - -// CreateClient creates a new SSH client -func (f *factory) CreateClient(user, host, keyPath string) (SSHClient, error) { - return NewClient(user, host, keyPath) -} diff --git a/internal/infrastructure/ssh/interface.go b/internal/infrastructure/ssh/interface.go index 6a35de8..a51d056 100644 --- a/internal/infrastructure/ssh/interface.go +++ b/internal/infrastructure/ssh/interface.go @@ -23,8 +23,9 @@ type SSHClient interface { // Create creates a new SSH client Create(user, host, keyPath string) (SSHClient, error) - // CreateForward creates an SSH client with port forwarding - CreateForward(user, host, keyPath string, localPort, remotePort string) (SSHClient, error) + // StartTunnel starts an SSH tunnel with port forwarding from local to remote + // It returns a function to stop the tunnel and an error if the tunnel fails to start + StartTunnel(localPort, remotePort string) (stop func() error, err error) // Exec executes a command on the remote host Exec(ctx context.Context, cmd string) (string, error) @@ -38,8 +39,3 @@ type SSHClient interface { // Close closes the SSH connection Close() error } - -// Factory provides a way to create SSH clients -type SSHFactory interface { - CreateClient(user, host, keyPath string) (SSHClient, error) -} diff --git a/internal/infrastructure/ssh/tunnel.go b/internal/infrastructure/ssh/tunnel.go new file mode 100644 index 0000000..d8432e1 --- /dev/null +++ b/internal/infrastructure/ssh/tunnel.go @@ -0,0 +1,124 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ssh + +import ( + "fmt" + "io" + netstd "net" + "strconv" + + netpkg "github.com/deckhouse/storage-e2e/internal/infrastructure/net" + "golang.org/x/crypto/ssh" +) + +// StartTunnel starts an SSH tunnel with port forwarding from local to remote +// It returns a function to stop the tunnel and an error if the tunnel fails to start +func StartTunnel(sshClient *ssh.Client, localPort, remotePort string) (func() error, error) { + listener, err := netstd.Listen("tcp", "127.0.0.1:"+localPort) + if err != nil { + return nil, fmt.Errorf("failed to listen on local port %s: %w", localPort, err) + } + + stopChan := make(chan struct{}) + + go func() { + defer listener.Close() + for { + // Check if we should stop before accepting + select { + case <-stopChan: + return + default: + } + + // Set a deadline for Accept to allow periodic checking of stopChan + localConn, err := listener.Accept() + if err != nil { + // Listener closed or error occurred + select { + case <-stopChan: + return + default: + // Continue if not stopped + continue + } + } + + go func() { + defer localConn.Close() + remoteConn, err := sshClient.Dial("tcp", "127.0.0.1:"+remotePort) + if err != nil { + // Connection failed, just return - the error will be visible to the client + return + } + defer remoteConn.Close() + + // Copy data bidirectionally + done := make(chan struct{}, 2) + go func() { + io.Copy(localConn, remoteConn) + done <- struct{}{} + }() + go func() { + io.Copy(remoteConn, localConn) + done <- struct{}{} + }() + + // Wait for either direction to finish + <-done + }() + } + }() + + stop := func() error { + close(stopChan) + return listener.Close() + } + + return stop, nil +} + +// EstablishSSHTunnel establishes an SSH tunnel with port forwarding from the master node to the same port of client, running the test +// It finds a free local port starting from remotePort and creates the tunnel +// Returns the tunnel info, local port and error if the tunnel fails to start +func EstablishSSHTunnel(sshClient SSHClient, remotePort string) (*TunnelInfo, error) { + // Find a free local port starting from remotePort + remotePortInt := 1024 + if parsed, err := strconv.Atoi(remotePort); err == nil { + remotePortInt = parsed + } + + localPort, err := netpkg.FindFreePort(remotePortInt) + if err != nil { + return nil, fmt.Errorf("failed to find free port: %w", err) + } + + // Start the SSH tunnel + stopFunc, err := sshClient.StartTunnel(strconv.Itoa(localPort), remotePort) + if err != nil { + return nil, fmt.Errorf("failed to start SSH tunnel: %w", err) + } + + tunnelInfo := &TunnelInfo{ + LocalPort: localPort, + RemotePort: remotePortInt, + StopFunc: stopFunc, + } + + return tunnelInfo, nil +} diff --git a/internal/infrastructure/ssh/types.go b/internal/infrastructure/ssh/types.go new file mode 100644 index 0000000..2c1a5d9 --- /dev/null +++ b/internal/infrastructure/ssh/types.go @@ -0,0 +1,24 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ssh + +// TunnelInfo holds information about an established SSH tunnel +type TunnelInfo struct { + LocalPort int + RemotePort int + StopFunc func() error +} diff --git a/tests/cluster_creation/cluster_creation_test.go b/tests/cluster_creation/cluster_creation_test.go index 2cf74e9..7f40935 100644 --- a/tests/cluster_creation/cluster_creation_test.go +++ b/tests/cluster_creation/cluster_creation_test.go @@ -35,13 +35,18 @@ var _ = Describe("Cluster Creation", func() { baseClusterMasterIP string = "172.17.1.67" baseClusterUser string = "tfadm" baseClusterSSHPrivateKey string = "~/.ssh/id_rsa" + + err error + sshclient ssh.SSHClient + kubeconfig *rest.Config + kubeconfigPath string + tunnelinfo *ssh.TunnelInfo ) BeforeEach(func(ctx SpecContext) { var err error var clusterDefinition *config.ClusterDefinition - var kubeconfig *rest.Config - var sshClient ssh.SSHClient + var tunnelinfo *ssh.TunnelInfo // Stage 1: LoadConfig - verifies and parses the config from yaml file By("LoadConfig: Loading and verifying cluster configuration from YAML", func() { @@ -49,31 +54,49 @@ var _ = Describe("Cluster Creation", func() { Expect(err).NotTo(HaveOccurred()) }) - // Stage 2: Establish SSH connection to base cluster (reused for getting kubeconfig) - By("Establishing ssh connection to the base cluster", func() { - sshClient, err = ssh.NewClient(baseClusterUser, baseClusterMasterIP, baseClusterSSHPrivateKey) - Expect(err).NotTo(HaveOccurred()) + // Clean up tunnel when test completes + DeferCleanup(func() { + if tunnelinfo != nil && tunnelinfo.StopFunc != nil { + _ = tunnelinfo.StopFunc() + } }) - // Stage 3: Getting kubeconfig from base cluster (reusing SSH connection to avoid double passphrase prompt) - By("Get kubeconfig: Getting kubeconfig from the base cluster", func() { - kubeconfig, err = cluster.GetKubeconfig(baseClusterMasterIP, baseClusterUser, baseClusterSSHPrivateKey, sshClient) - Expect(err).NotTo(HaveOccurred()) - }) + _ = clusterDefinition // TODO: use clusterDefinition - By("Establishing ssh tunnel to the base cluster with 6445 port forwarding", func() { + }) // BeforeEach: Cluster Creation - }) + // Stage 2: Establish SSH connection to base cluster (reused for getting kubeconfig) + It("should establish ssh connection to the base cluster", func() { + sshclient, err = ssh.NewClient(baseClusterUser, baseClusterMasterIP, baseClusterSSHPrivateKey) + Expect(err).NotTo(HaveOccurred()) + }) - _ = sshClient // TODO: use sshClient - _ = clusterDefinition // TODO: use clusterDefinition - _ = kubeconfig // TODO: use kubeconfig - }) // BeforeEach: Cluster Creation + // Stage 3: Getting kubeconfig from base cluster (reusing SSH connection to avoid double passphrase prompt) - It("should create a test cluster", func() { - By("Creating a test cluster", func() { - fmt.Println("Creating a test cluster") + It("should get kubeconfig from the base cluster", func() { + kubeconfig, kubeconfigPath, err = cluster.GetKubeconfig(baseClusterMasterIP, baseClusterUser, baseClusterSSHPrivateKey, sshclient) + Expect(err).NotTo(HaveOccurred()) + }) + + // Stage 4: Establish SSH tunnel with port forwarding + + It("should establish ssh tunnel to the base cluster with port forwarding", func() { + tunnelinfo, err = ssh.EstablishSSHTunnel(sshclient, "6445") + Expect(err).NotTo(HaveOccurred()) + Expect(tunnelinfo).NotTo(BeNil()) + Expect(tunnelinfo.LocalPort).To(BeNumerically(">=", 1024)) + + // Update kubeconfig if port differs from 6445 + if tunnelinfo.LocalPort != 6445 { + err = cluster.UpdateKubeconfigPort(kubeconfigPath, tunnelinfo.LocalPort) + } + }) + + It("should query K8s cluster", func() { + fmt.Println("querying K8s cluster") + + }) + + _ = kubeconfig // TODO: use kubeconfig - }) - }) // It: should create a test cluster }) // Describe: Cluster Creation From 0d5087029cc1876eb167e4dfa18300c5c81361c3 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Fri, 12 Dec 2025 21:39:16 +0300 Subject: [PATCH 06/48] Ginkgo logging enabled and pre-configured --- .../cluster_creation_suite_test.go | 6 +- .../cluster_creation/cluster_creation_test.go | 80 ++++++++++++------- 2 files changed, 57 insertions(+), 29 deletions(-) diff --git a/tests/cluster_creation/cluster_creation_suite_test.go b/tests/cluster_creation/cluster_creation_suite_test.go index f9d903e..4b6037b 100644 --- a/tests/cluster_creation/cluster_creation_suite_test.go +++ b/tests/cluster_creation/cluster_creation_suite_test.go @@ -25,5 +25,9 @@ import ( func TestIntegration(t *testing.T) { RegisterFailHandler(Fail) - RunSpecs(t, "Integration Test Suite") + // Configure Ginkgo to show verbose output + suiteConfig, reporterConfig := GinkgoConfiguration() + reporterConfig.Verbose = true + reporterConfig.ShowNodeEvents = false + RunSpecs(t, "Integration Test Suite", suiteConfig, reporterConfig) } diff --git a/tests/cluster_creation/cluster_creation_test.go b/tests/cluster_creation/cluster_creation_test.go index 7f40935..40fff63 100644 --- a/tests/cluster_creation/cluster_creation_test.go +++ b/tests/cluster_creation/cluster_creation_test.go @@ -29,72 +29,96 @@ import ( "github.com/deckhouse/storage-e2e/internal/infrastructure/ssh" ) -var _ = Describe("Cluster Creation", func() { +var _ = Describe("Cluster Creation", Ordered, func() { var ( yamlConfigFilename string = "cluster_creation_test.yml" baseClusterMasterIP string = "172.17.1.67" baseClusterUser string = "tfadm" baseClusterSSHPrivateKey string = "~/.ssh/id_rsa" - err error - sshclient ssh.SSHClient - kubeconfig *rest.Config - kubeconfigPath string - tunnelinfo *ssh.TunnelInfo + err error + sshclient ssh.SSHClient + kubeconfig *rest.Config + kubeconfigPath string + tunnelinfo *ssh.TunnelInfo + clusterDefinition *config.ClusterDefinition ) - BeforeEach(func(ctx SpecContext) { + BeforeAll(func() { var err error - var clusterDefinition *config.ClusterDefinition - var tunnelinfo *ssh.TunnelInfo // Stage 1: LoadConfig - verifies and parses the config from yaml file By("LoadConfig: Loading and verifying cluster configuration from YAML", func() { + GinkgoWriter.Printf(" ▶️ Loading cluster configuration from: %s\n", yamlConfigFilename) clusterDefinition, err = cluster.LoadClusterConfig(yamlConfigFilename) Expect(err).NotTo(HaveOccurred()) + GinkgoWriter.Printf(" ✅ Successfully loaded cluster configuration\n") }) - // Clean up tunnel when test completes + // AfterAll: Clean up tunnel - just for example. TODO - implement real cleanup DeferCleanup(func() { if tunnelinfo != nil && tunnelinfo.StopFunc != nil { - _ = tunnelinfo.StopFunc() + GinkgoWriter.Printf(" ▶️ Cleaning up SSH tunnel...\n") + err = tunnelinfo.StopFunc() + Expect(err).NotTo(HaveOccurred()) + GinkgoWriter.Printf(" ✅ SSH tunnel cleaned up successfully\n") } }) - _ = clusterDefinition // TODO: use clusterDefinition + }) // BeforeAll - }) // BeforeEach: Cluster Creation + _ = clusterDefinition // TODO: use clusterDefinition // Stage 2: Establish SSH connection to base cluster (reused for getting kubeconfig) It("should establish ssh connection to the base cluster", func() { - sshclient, err = ssh.NewClient(baseClusterUser, baseClusterMasterIP, baseClusterSSHPrivateKey) - Expect(err).NotTo(HaveOccurred()) + By(fmt.Sprintf("Connecting to %s@%s using key %s", baseClusterUser, baseClusterMasterIP, baseClusterSSHPrivateKey), func() { + GinkgoWriter.Printf(" ▶️ Creating SSH client for %s@%s\n", baseClusterUser, baseClusterMasterIP) + sshclient, err = ssh.NewClient(baseClusterUser, baseClusterMasterIP, baseClusterSSHPrivateKey) + Expect(err).NotTo(HaveOccurred()) + GinkgoWriter.Printf(" ✅ SSH connection established successfully\n") + }) }) // Stage 3: Getting kubeconfig from base cluster (reusing SSH connection to avoid double passphrase prompt) It("should get kubeconfig from the base cluster", func() { - kubeconfig, kubeconfigPath, err = cluster.GetKubeconfig(baseClusterMasterIP, baseClusterUser, baseClusterSSHPrivateKey, sshclient) - Expect(err).NotTo(HaveOccurred()) + By("Retrieving kubeconfig from base cluster", func() { + GinkgoWriter.Printf(" ▶️ Fetching kubeconfig from %s\n", baseClusterMasterIP) + kubeconfig, kubeconfigPath, err = cluster.GetKubeconfig(baseClusterMasterIP, baseClusterUser, baseClusterSSHPrivateKey, sshclient) + Expect(err).NotTo(HaveOccurred()) + GinkgoWriter.Printf(" ✅ Kubeconfig retrieved and saved to: %s\n", kubeconfigPath) + }) }) // Stage 4: Establish SSH tunnel with port forwarding It("should establish ssh tunnel to the base cluster with port forwarding", func() { - tunnelinfo, err = ssh.EstablishSSHTunnel(sshclient, "6445") - Expect(err).NotTo(HaveOccurred()) - Expect(tunnelinfo).NotTo(BeNil()) - Expect(tunnelinfo.LocalPort).To(BeNumerically(">=", 1024)) - - // Update kubeconfig if port differs from 6445 - if tunnelinfo.LocalPort != 6445 { - err = cluster.UpdateKubeconfigPort(kubeconfigPath, tunnelinfo.LocalPort) - } + By("Setting up SSH tunnel with port forwarding", func() { + GinkgoWriter.Printf(" ▶️ Establishing SSH tunnel to %s, forwarding port 6445\n", baseClusterMasterIP) + tunnelinfo, err = ssh.EstablishSSHTunnel(sshclient, "6445") + Expect(err).NotTo(HaveOccurred()) + Expect(tunnelinfo).NotTo(BeNil()) + Expect(tunnelinfo.LocalPort).To(BeNumerically(">=", 1024)) + GinkgoWriter.Printf(" ✅ SSH tunnel established on local port: %d\n", tunnelinfo.LocalPort) + + // Update kubeconfig if port differs from 6445 + if tunnelinfo.LocalPort != 6445 { + By(fmt.Sprintf("Updating kubeconfig to use local port %d instead of 6445", tunnelinfo.LocalPort), func() { + GinkgoWriter.Printf(" ▶️ Updating kubeconfig port from 6445 to %d\n", tunnelinfo.LocalPort) + err = cluster.UpdateKubeconfigPort(kubeconfigPath, tunnelinfo.LocalPort) + Expect(err).NotTo(HaveOccurred()) + GinkgoWriter.Printf(" ✅ Kubeconfig updated successfully\n") + }) + } + }) }) It("should query K8s cluster", func() { - fmt.Println("querying K8s cluster") - + By("Querying Kubernetes cluster", func() { + GinkgoWriter.Printf(" ▶️ Querying K8s cluster using kubeconfig: %s\n", kubeconfigPath) + // TODO: Add actual cluster querying logic here + GinkgoWriter.Printf(" ✅ Cluster query completed successfully\n") + }) }) _ = kubeconfig // TODO: use kubeconfig From 8539b179e917909203d03636014f2882953e971a Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Fri, 12 Dec 2025 22:26:23 +0300 Subject: [PATCH 07/48] module,mc and mpo processing added --- internal/kubernetes/deckhouse/modules.go | 448 +++++++++++++++++- internal/kubernetes/deckhouse/types.go | 107 +++++ .../cluster_creation/cluster_creation_test.go | 21 +- 3 files changed, 568 insertions(+), 8 deletions(-) create mode 100644 internal/kubernetes/deckhouse/types.go diff --git a/internal/kubernetes/deckhouse/modules.go b/internal/kubernetes/deckhouse/modules.go index 06ca95a..99104a4 100644 --- a/internal/kubernetes/deckhouse/modules.go +++ b/internal/kubernetes/deckhouse/modules.go @@ -16,5 +16,451 @@ limitations under the License. package deckhouse -// TODO: Implement Deckhouse module operations +import ( + "context" + "fmt" + "time" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/dynamic" + "k8s.io/client-go/rest" +) + +const ( + // ModuleGroupVersion is the API group and version for Module resources + ModuleGroupVersion = "deckhouse.io/v1alpha1" + // ModuleResource is the resource name for Module + ModuleResource = "modules" + // ModuleConfigGroupVersion is the API group and version for ModuleConfig resources + ModuleConfigGroupVersion = "deckhouse.io/v1alpha1" + // ModuleConfigResource is the resource name for ModuleConfig + ModuleConfigResource = "moduleconfigs" + // ModulePullOverrideGroupVersion is the API group and version for ModulePullOverride resources + ModulePullOverrideGroupVersion = "deckhouse.io/v1alpha2" + // ModulePullOverrideResource is the resource name for ModulePullOverride + ModulePullOverrideResource = "modulepulloverrides" +) + +// GetModule retrieves detailed information about a single module by name +func GetModule(ctx context.Context, config *rest.Config, moduleName string) (*Module, error) { + client, err := dynamic.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to create dynamic client: %w", err) + } + + gvr := schema.GroupVersionResource{ + Group: "deckhouse.io", + Version: "v1alpha1", + Resource: ModuleResource, + } + + // Module is a cluster-scoped resource, so we use empty namespace + unstructuredObj, err := client.Resource(gvr).Get(ctx, moduleName, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to get module %s: %w", moduleName, err) + } + + module, err := unstructuredToModule(unstructuredObj) + if err != nil { + return nil, fmt.Errorf("failed to convert unstructured module %s: %w", moduleName, err) + } + + return module, nil +} + +// unstructuredToModule converts an unstructured.Unstructured object to a Module struct +func unstructuredToModule(obj *unstructured.Unstructured) (*Module, error) { + module := &Module{} + + // Set TypeMeta + module.APIVersion = obj.GetAPIVersion() + module.Kind = obj.GetKind() + + // Set ObjectMeta + module.ObjectMeta = metav1.ObjectMeta{ + Name: obj.GetName(), + Namespace: obj.GetNamespace(), + UID: obj.GetUID(), + ResourceVersion: obj.GetResourceVersion(), + Generation: obj.GetGeneration(), + CreationTimestamp: obj.GetCreationTimestamp(), + Labels: obj.GetLabels(), + Annotations: obj.GetAnnotations(), + } + + // Extract properties + if properties, found, err := unstructured.NestedMap(obj.Object, "properties"); err != nil { + return nil, fmt.Errorf("failed to extract properties: %w", err) + } else if found { + if err := extractModuleProperties(properties, &module.Properties); err != nil { + return nil, fmt.Errorf("failed to extract module properties: %w", err) + } + } + + // Extract status + if status, found, err := unstructured.NestedMap(obj.Object, "status"); err != nil { + return nil, fmt.Errorf("failed to extract status: %w", err) + } else if found { + if err := extractModuleStatus(status, &module.Status); err != nil { + return nil, fmt.Errorf("failed to extract module status: %w", err) + } + } + + return module, nil +} + +// extractModuleProperties extracts ModuleProperties from a map +func extractModuleProperties(data map[string]interface{}, props *ModuleProperties) error { + if critical, found, err := unstructured.NestedBool(data, "critical"); err != nil { + return err + } else if found { + props.Critical = critical + } + + if disableOptions, found, err := unstructured.NestedMap(data, "disableOptions"); err != nil { + return err + } else if found && len(disableOptions) > 0 { + props.DisableOptions = &DisableOptions{} + if confirmation, found, err := unstructured.NestedBool(disableOptions, "confirmation"); err != nil { + return err + } else if found { + props.DisableOptions.Confirmation = confirmation + } + if message, found, err := unstructured.NestedString(disableOptions, "message"); err != nil { + return err + } else if found { + props.DisableOptions.Message = message + } + } + + if namespace, found, err := unstructured.NestedString(data, "namespace"); err != nil { + return err + } else if found { + props.Namespace = namespace + } + + if releaseChannel, found, err := unstructured.NestedString(data, "releaseChannel"); err != nil { + return err + } else if found { + props.ReleaseChannel = releaseChannel + } + + if source, found, err := unstructured.NestedString(data, "source"); err != nil { + return err + } else if found { + props.Source = source + } + + if stage, found, err := unstructured.NestedString(data, "stage"); err != nil { + return err + } else if found { + props.Stage = stage + } + + if subsystems, found, err := unstructured.NestedStringSlice(data, "subsystems"); err != nil { + return err + } else if found { + props.Subsystems = subsystems + } + + if version, found, err := unstructured.NestedString(data, "version"); err != nil { + return err + } else if found { + props.Version = version + } + + if weight, found, err := unstructured.NestedInt64(data, "weight"); err != nil { + return err + } else if found { + props.Weight = int(weight) + } + + return nil +} + +// extractModuleStatus extracts ModuleStatus from a map +func extractModuleStatus(data map[string]interface{}, status *ModuleStatus) error { + if conditions, found, err := unstructured.NestedSlice(data, "conditions"); err != nil { + return err + } else if found { + status.Conditions = make([]ModuleCondition, 0, len(conditions)) + for _, cond := range conditions { + condMap, ok := cond.(map[string]interface{}) + if !ok { + continue + } + condition := ModuleCondition{} + if lastProbeTime, found, err := unstructured.NestedString(condMap, "lastProbeTime"); err != nil { + return err + } else if found { + if t, err := time.Parse(time.RFC3339, lastProbeTime); err == nil { + condition.LastProbeTime = metav1.NewTime(t) + } + } + if lastTransitionTime, found, err := unstructured.NestedString(condMap, "lastTransitionTime"); err != nil { + return err + } else if found { + if t, err := time.Parse(time.RFC3339, lastTransitionTime); err == nil { + condition.LastTransitionTime = metav1.NewTime(t) + } + } + if statusStr, found, err := unstructured.NestedString(condMap, "status"); err != nil { + return err + } else if found { + condition.Status = statusStr + } + if typeStr, found, err := unstructured.NestedString(condMap, "type"); err != nil { + return err + } else if found { + condition.Type = typeStr + } + status.Conditions = append(status.Conditions, condition) + } + } + + if hooksState, found, err := unstructured.NestedString(data, "hooksState"); err != nil { + return err + } else if found { + status.HooksState = hooksState + } + + if phase, found, err := unstructured.NestedString(data, "phase"); err != nil { + return err + } else if found { + status.Phase = phase + } + + return nil +} + +// GetModuleConfig retrieves detailed information about a ModuleConfig by name +func GetModuleConfig(ctx context.Context, config *rest.Config, moduleName string) (*ModuleConfig, error) { + client, err := dynamic.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to create dynamic client: %w", err) + } + + gvr := schema.GroupVersionResource{ + Group: "deckhouse.io", + Version: "v1alpha1", + Resource: ModuleConfigResource, + } + + // ModuleConfig is a cluster-scoped resource, so we use empty namespace + unstructuredObj, err := client.Resource(gvr).Get(ctx, moduleName, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to get moduleconfig %s: %w", moduleName, err) + } + + moduleConfig, err := unstructuredToModuleConfig(unstructuredObj) + if err != nil { + return nil, fmt.Errorf("failed to convert unstructured moduleconfig %s: %w", moduleName, err) + } + + return moduleConfig, nil +} + +// GetModulePullOverride retrieves detailed information about a ModulePullOverride by name +func GetModulePullOverride(ctx context.Context, config *rest.Config, moduleName string) (*ModulePullOverride, error) { + client, err := dynamic.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to create dynamic client: %w", err) + } + + gvr := schema.GroupVersionResource{ + Group: "deckhouse.io", + Version: "v1alpha2", + Resource: ModulePullOverrideResource, + } + + // ModulePullOverride is a cluster-scoped resource, so we use empty namespace + unstructuredObj, err := client.Resource(gvr).Get(ctx, moduleName, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to get modulepulloverride %s: %w", moduleName, err) + } + + modulePullOverride, err := unstructuredToModulePullOverride(unstructuredObj) + if err != nil { + return nil, fmt.Errorf("failed to convert unstructured modulepulloverride %s: %w", moduleName, err) + } + + return modulePullOverride, nil +} + +// unstructuredToModuleConfig converts an unstructured.Unstructured object to a ModuleConfig struct +func unstructuredToModuleConfig(obj *unstructured.Unstructured) (*ModuleConfig, error) { + moduleConfig := &ModuleConfig{} + + // Set TypeMeta + moduleConfig.APIVersion = obj.GetAPIVersion() + moduleConfig.Kind = obj.GetKind() + + // Set ObjectMeta + moduleConfig.ObjectMeta = metav1.ObjectMeta{ + Name: obj.GetName(), + Namespace: obj.GetNamespace(), + UID: obj.GetUID(), + ResourceVersion: obj.GetResourceVersion(), + Generation: obj.GetGeneration(), + CreationTimestamp: obj.GetCreationTimestamp(), + Labels: obj.GetLabels(), + Annotations: obj.GetAnnotations(), + Finalizers: obj.GetFinalizers(), + } + + // Extract spec + if spec, found, err := unstructured.NestedMap(obj.Object, "spec"); err != nil { + return nil, fmt.Errorf("failed to extract spec: %w", err) + } else if found { + if err := extractModuleConfigSpec(spec, &moduleConfig.Spec); err != nil { + return nil, fmt.Errorf("failed to extract moduleconfig spec: %w", err) + } + } + + // Extract status + if status, found, err := unstructured.NestedMap(obj.Object, "status"); err != nil { + return nil, fmt.Errorf("failed to extract status: %w", err) + } else if found { + if err := extractModuleConfigStatus(status, &moduleConfig.Status); err != nil { + return nil, fmt.Errorf("failed to extract moduleconfig status: %w", err) + } + } + + return moduleConfig, nil +} + +// extractModuleConfigSpec extracts ModuleConfigSpec from a map +func extractModuleConfigSpec(data map[string]interface{}, spec *ModuleConfigSpec) error { + if enabled, found, err := unstructured.NestedBool(data, "enabled"); err != nil { + return err + } else if found { + spec.Enabled = enabled + } + + if settings, found, err := unstructured.NestedMap(data, "settings"); err != nil { + return err + } else if found { + spec.Settings = settings + } + + if version, found, err := unstructured.NestedInt64(data, "version"); err != nil { + return err + } else if found { + spec.Version = int(version) + } + + return nil +} + +// extractModuleConfigStatus extracts ModuleConfigStatus from a map +func extractModuleConfigStatus(data map[string]interface{}, status *ModuleConfigStatus) error { + if message, found, err := unstructured.NestedString(data, "message"); err != nil { + return err + } else if found { + status.Message = message + } + + if version, found, err := unstructured.NestedString(data, "version"); err != nil { + return err + } else if found { + status.Version = version + } + + return nil +} + +// unstructuredToModulePullOverride converts an unstructured.Unstructured object to a ModulePullOverride struct +func unstructuredToModulePullOverride(obj *unstructured.Unstructured) (*ModulePullOverride, error) { + modulePullOverride := &ModulePullOverride{} + + // Set TypeMeta + modulePullOverride.APIVersion = obj.GetAPIVersion() + modulePullOverride.Kind = obj.GetKind() + + // Set ObjectMeta + modulePullOverride.ObjectMeta = metav1.ObjectMeta{ + Name: obj.GetName(), + Namespace: obj.GetNamespace(), + UID: obj.GetUID(), + ResourceVersion: obj.GetResourceVersion(), + Generation: obj.GetGeneration(), + CreationTimestamp: obj.GetCreationTimestamp(), + Labels: obj.GetLabels(), + Annotations: obj.GetAnnotations(), + Finalizers: obj.GetFinalizers(), + } + + // Extract spec + if spec, found, err := unstructured.NestedMap(obj.Object, "spec"); err != nil { + return nil, fmt.Errorf("failed to extract spec: %w", err) + } else if found { + if err := extractModulePullOverrideSpec(spec, &modulePullOverride.Spec); err != nil { + return nil, fmt.Errorf("failed to extract modulepulloverride spec: %w", err) + } + } + + // Extract status + if status, found, err := unstructured.NestedMap(obj.Object, "status"); err != nil { + return nil, fmt.Errorf("failed to extract status: %w", err) + } else if found { + if err := extractModulePullOverrideStatus(status, &modulePullOverride.Status); err != nil { + return nil, fmt.Errorf("failed to extract modulepulloverride status: %w", err) + } + } + + return modulePullOverride, nil +} + +// extractModulePullOverrideSpec extracts ModulePullOverrideSpec from a map +func extractModulePullOverrideSpec(data map[string]interface{}, spec *ModulePullOverrideSpec) error { + if imageTag, found, err := unstructured.NestedString(data, "imageTag"); err != nil { + return err + } else if found { + spec.ImageTag = imageTag + } + + if rollback, found, err := unstructured.NestedBool(data, "rollback"); err != nil { + return err + } else if found { + spec.Rollback = rollback + } + + if scanInterval, found, err := unstructured.NestedString(data, "scanInterval"); err != nil { + return err + } else if found { + spec.ScanInterval = scanInterval + } + + return nil +} + +// extractModulePullOverrideStatus extracts ModulePullOverrideStatus from a map +func extractModulePullOverrideStatus(data map[string]interface{}, status *ModulePullOverrideStatus) error { + if imageDigest, found, err := unstructured.NestedString(data, "imageDigest"); err != nil { + return err + } else if found { + status.ImageDigest = imageDigest + } + + if message, found, err := unstructured.NestedString(data, "message"); err != nil { + return err + } else if found { + status.Message = message + } + + if updatedAt, found, err := unstructured.NestedString(data, "updatedAt"); err != nil { + return err + } else if found { + status.UpdatedAt = updatedAt + } + + if weight, found, err := unstructured.NestedInt64(data, "weight"); err != nil { + return err + } else if found { + status.Weight = int(weight) + } + + return nil +} diff --git a/internal/kubernetes/deckhouse/types.go b/internal/kubernetes/deckhouse/types.go new file mode 100644 index 0000000..e08986b --- /dev/null +++ b/internal/kubernetes/deckhouse/types.go @@ -0,0 +1,107 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deckhouse + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// Module represents a Deckhouse Module custom resource +type Module struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + Properties ModuleProperties `json:"properties,omitzero"` + Status ModuleStatus `json:"status,omitzero"` +} + +// ModuleProperties contains the properties of a Module +type ModuleProperties struct { + Critical bool `json:"critical,omitempty"` + DisableOptions *DisableOptions `json:"disableOptions,omitzero"` + Namespace string `json:"namespace,omitempty"` + ReleaseChannel string `json:"releaseChannel,omitempty"` + Source string `json:"source,omitempty"` + Stage string `json:"stage,omitempty"` + Subsystems []string `json:"subsystems,omitempty"` + Version string `json:"version,omitempty"` + Weight int `json:"weight,omitempty"` +} + +// DisableOptions contains options for disabling a module +type DisableOptions struct { + Confirmation bool `json:"confirmation,omitempty"` + Message string `json:"message,omitempty"` +} + +// ModuleStatus contains the status of a Module +type ModuleStatus struct { + Conditions []ModuleCondition `json:"conditions,omitzero"` + HooksState string `json:"hooksState,omitempty"` + Phase string `json:"phase,omitempty"` +} + +// ModuleCondition represents a condition of a Module +type ModuleCondition struct { + LastProbeTime metav1.Time `json:"lastProbeTime,omitzero"` + LastTransitionTime metav1.Time `json:"lastTransitionTime,omitzero"` + Status string `json:"status,omitempty"` + Type string `json:"type,omitempty"` +} + +// ModuleConfig represents a Deckhouse ModuleConfig custom resource +type ModuleConfig struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + Spec ModuleConfigSpec `json:"spec,omitzero"` + Status ModuleConfigStatus `json:"status,omitzero"` +} + +// ModuleConfigSpec contains the specification of a ModuleConfig +type ModuleConfigSpec struct { + Enabled bool `json:"enabled,omitempty"` + Settings map[string]interface{} `json:"settings,omitempty"` + Version int `json:"version,omitempty"` +} + +// ModuleConfigStatus contains the status of a ModuleConfig +type ModuleConfigStatus struct { + Message string `json:"message,omitempty"` + Version string `json:"version,omitempty"` +} + +// ModulePullOverride represents a Deckhouse ModulePullOverride custom resource +type ModulePullOverride struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + Spec ModulePullOverrideSpec `json:"spec,omitzero"` + Status ModulePullOverrideStatus `json:"status,omitzero"` +} + +// ModulePullOverrideSpec contains the specification of a ModulePullOverride +type ModulePullOverrideSpec struct { + ImageTag string `json:"imageTag,omitempty"` + Rollback bool `json:"rollback,omitempty"` + ScanInterval string `json:"scanInterval,omitempty"` +} + +// ModulePullOverrideStatus contains the status of a ModulePullOverride +type ModulePullOverrideStatus struct { + ImageDigest string `json:"imageDigest,omitempty"` + Message string `json:"message,omitempty"` + UpdatedAt string `json:"updatedAt,omitempty"` + Weight int `json:"weight,omitempty"` +} diff --git a/tests/cluster_creation/cluster_creation_test.go b/tests/cluster_creation/cluster_creation_test.go index 40fff63..08bce0c 100644 --- a/tests/cluster_creation/cluster_creation_test.go +++ b/tests/cluster_creation/cluster_creation_test.go @@ -17,7 +17,9 @@ limitations under the License. package integration import ( + "context" "fmt" + "time" "k8s.io/client-go/rest" @@ -27,6 +29,7 @@ import ( "github.com/deckhouse/storage-e2e/internal/cluster" "github.com/deckhouse/storage-e2e/internal/config" "github.com/deckhouse/storage-e2e/internal/infrastructure/ssh" + "github.com/deckhouse/storage-e2e/internal/kubernetes/deckhouse" ) var _ = Describe("Cluster Creation", Ordered, func() { @@ -42,6 +45,7 @@ var _ = Describe("Cluster Creation", Ordered, func() { kubeconfigPath string tunnelinfo *ssh.TunnelInfo clusterDefinition *config.ClusterDefinition + module *deckhouse.Module ) BeforeAll(func() { @@ -113,14 +117,17 @@ var _ = Describe("Cluster Creation", Ordered, func() { }) }) - It("should query K8s cluster", func() { - By("Querying Kubernetes cluster", func() { - GinkgoWriter.Printf(" ▶️ Querying K8s cluster using kubeconfig: %s\n", kubeconfigPath) - // TODO: Add actual cluster querying logic here - GinkgoWriter.Printf(" ✅ Cluster query completed successfully\n") + It("should make sure that virtualization module is Ready", func() { + By("Checking if virtualization module is Ready", func() { + GinkgoWriter.Printf(" ▶️ Getting module with timeout\n") + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + module, err = deckhouse.GetModule(ctx, kubeconfig, "virtualization") + Expect(err).NotTo(HaveOccurred()) + Expect(module).NotTo(BeNil()) + Expect(module.Status.Phase).To(Equal("Ready"), "Module status phase should be Ready") + GinkgoWriter.Printf(" ✅ Module %s retrieved successfully with status: %s\n", module.Name, module.Status.Phase) }) }) - _ = kubeconfig // TODO: use kubeconfig - }) // Describe: Cluster Creation From 6c120033c4ca3632ef819f243b14720b56c547da Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Fri, 12 Dec 2025 22:30:33 +0300 Subject: [PATCH 08/48] Cleanup re-worked --- .../cluster_creation/cluster_creation_test.go | 28 +++++++++++++++---- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/tests/cluster_creation/cluster_creation_test.go b/tests/cluster_creation/cluster_creation_test.go index 08bce0c..c5021f1 100644 --- a/tests/cluster_creation/cluster_creation_test.go +++ b/tests/cluster_creation/cluster_creation_test.go @@ -59,14 +59,32 @@ var _ = Describe("Cluster Creation", Ordered, func() { GinkgoWriter.Printf(" ✅ Successfully loaded cluster configuration\n") }) - // AfterAll: Clean up tunnel - just for example. TODO - implement real cleanup + // DeferCleanup: Clean up all resources in reverse order of creation (it's a synonym for AfterAll) DeferCleanup(func() { + // Step 1: Stop SSH tunnel (must be done before closing SSH client) if tunnelinfo != nil && tunnelinfo.StopFunc != nil { - GinkgoWriter.Printf(" ▶️ Cleaning up SSH tunnel...\n") - err = tunnelinfo.StopFunc() - Expect(err).NotTo(HaveOccurred()) - GinkgoWriter.Printf(" ✅ SSH tunnel cleaned up successfully\n") + GinkgoWriter.Printf(" ▶️ Stopping SSH tunnel on local port %d...\n", tunnelinfo.LocalPort) + err := tunnelinfo.StopFunc() + if err != nil { + GinkgoWriter.Printf(" ⚠️ Warning: Failed to stop SSH tunnel: %v\n", err) + } else { + GinkgoWriter.Printf(" ✅ SSH tunnel stopped successfully\n") + } } + + // Step 2: Close SSH client connection + if sshclient != nil { + GinkgoWriter.Printf(" ▶️ Closing SSH client connection...\n") + err := sshclient.Close() + if err != nil { + GinkgoWriter.Printf(" ⚠️ Warning: Failed to close SSH client: %v\n", err) + } else { + GinkgoWriter.Printf(" ✅ SSH client closed successfully\n") + } + } + + // Note: kubeconfig and kubeconfigPath are just config/file paths, no cleanup needed + // The kubeconfig file is stored in temp/ directory and can be kept for debugging }) }) // BeforeAll From 914f26b2ead95dde045148d4997e5c25d27a1fd7 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Fri, 12 Dec 2025 23:13:55 +0300 Subject: [PATCH 09/48] Context updated --- internal/cluster/cluster.go | 3 +- internal/infrastructure/ssh/client.go | 107 ++++++++++++++++-- internal/infrastructure/ssh/interface.go | 2 +- internal/infrastructure/ssh/tunnel.go | 58 ++++++++-- .../cluster_creation/cluster_creation_test.go | 7 +- 5 files changed, 151 insertions(+), 26 deletions(-) diff --git a/internal/cluster/cluster.go b/internal/cluster/cluster.go index 6b6ca7b..c32670d 100644 --- a/internal/cluster/cluster.go +++ b/internal/cluster/cluster.go @@ -193,7 +193,7 @@ func expandPath(path string) (string, error) { // and returns a rest.Config that can be used with Kubernetes clients, along with the path to the kubeconfig file. // If sshClient is provided, it will be used instead of creating a new connection. // If sshClient is nil, a new connection will be created and closed automatically. -func GetKubeconfig(masterIP, user, keyPath string, sshClient ssh.SSHClient) (*rest.Config, string, error) { +func GetKubeconfig(ctx context.Context, masterIP, user, keyPath string, sshClient ssh.SSHClient) (*rest.Config, string, error) { // Create SSH client if not provided shouldClose := false if sshClient == nil { @@ -237,7 +237,6 @@ func GetKubeconfig(masterIP, user, keyPath string, sshClient ssh.SSHClient) (*re var kubeconfigContent []byte // Try to read kubeconfig from /etc/kubernetes/admin.conf via SSH - ctx := context.Background() kubeconfigContentStr, err := sshClient.Exec(ctx, "sudo -n cat /etc/kubernetes/admin.conf") if err != nil { // SSH retrieval failed (likely due to sudo password requirement) diff --git a/internal/infrastructure/ssh/client.go b/internal/infrastructure/ssh/client.go index 536702c..79f4e84 100644 --- a/internal/infrastructure/ssh/client.go +++ b/internal/infrastructure/ssh/client.go @@ -37,6 +37,46 @@ type client struct { sshClient *ssh.Client } +// copyWithContext copies data from src to dst with context cancellation support +func copyWithContext(ctx context.Context, dst io.Writer, src io.Reader) (written int64, err error) { + buf := make([]byte, 32*1024) + for { + // Check context before each read + select { + case <-ctx.Done(): + return written, ctx.Err() + default: + } + + nr, er := src.Read(buf) + if nr > 0 { + nw, ew := dst.Write(buf[0:nr]) + if nw < 0 || nr < nw { + nw = 0 + if ew == nil { + ew = fmt.Errorf("invalid write result") + } + } + written += int64(nw) + if ew != nil { + err = ew + break + } + if nr != nw { + err = io.ErrShortWrite + break + } + } + if er != nil { + if er != io.EOF { + err = er + } + break + } + } + return written, err +} + // readPassword reads a password from the terminal func readPassword(prompt string) ([]byte, error) { fmt.Fprint(os.Stderr, prompt) @@ -146,7 +186,12 @@ func (c *client) Create(user, host, keyPath string) (SSHClient, error) { // StartTunnel starts an SSH tunnel with port forwarding from local to remote // It returns a function to stop the tunnel and an error if the tunnel fails to start -func (c *client) StartTunnel(localPort, remotePort string) (func() error, error) { +func (c *client) StartTunnel(ctx context.Context, localPort, remotePort string) (func() error, error) { + // Check context before starting + if err := ctx.Err(); err != nil { + return nil, fmt.Errorf("context error before starting tunnel: %w", err) + } + listener, err := net.Listen("tcp", "127.0.0.1:"+localPort) if err != nil { return nil, fmt.Errorf("failed to listen on local port %s: %w", localPort, err) @@ -157,18 +202,28 @@ func (c *client) StartTunnel(localPort, remotePort string) (func() error, error) go func() { defer listener.Close() for { - // Check if we should stop before accepting + // Check context and stop channel select { + case <-ctx.Done(): + return case <-stopChan: return default: } - // Set a deadline for Accept to allow periodic checking of stopChan + // Set deadline for Accept based on context deadline if available + if deadline, ok := ctx.Deadline(); ok { + if err := listener.(*net.TCPListener).SetDeadline(deadline); err != nil { + // If setting deadline fails, continue without it + } + } + localConn, err := listener.Accept() if err != nil { // Listener closed or error occurred select { + case <-ctx.Done(): + return case <-stopChan: return default: @@ -186,19 +241,30 @@ func (c *client) StartTunnel(localPort, remotePort string) (func() error, error) } defer remoteConn.Close() - // Copy data bidirectionally + // Copy data bidirectionally with context support done := make(chan struct{}, 2) go func() { - io.Copy(localConn, remoteConn) + _, _ = copyWithContext(ctx, localConn, remoteConn) done <- struct{}{} }() go func() { - io.Copy(remoteConn, localConn) + _, _ = copyWithContext(ctx, remoteConn, localConn) done <- struct{}{} }() - // Wait for either direction to finish - <-done + // Wait for either direction to finish or context cancellation + select { + case <-ctx.Done(): + return + case <-done: + // One direction finished, wait for the other + select { + case <-ctx.Done(): + return + case <-done: + // Both directions finished + } + } }() } }() @@ -213,17 +279,34 @@ func (c *client) StartTunnel(localPort, remotePort string) (func() error, error) // Exec executes a command on the remote host func (c *client) Exec(ctx context.Context, cmd string) (string, error) { + // Check context before starting + if err := ctx.Err(); err != nil { + return "", fmt.Errorf("context error before execution: %w", err) + } + session, err := c.sshClient.NewSession() if err != nil { return "", fmt.Errorf("failed to create SSH session: %w", err) } defer session.Close() + // Note: session.CombinedOutput doesn't support context directly, + // but we check context before and after the call + // For better cancellation support, consider using session.Start() with context-aware goroutines output, err := session.CombinedOutput(cmd) if err != nil { + // Check if context was cancelled during execution + if ctx.Err() != nil { + return string(output), fmt.Errorf("context cancelled: %w", ctx.Err()) + } return string(output), fmt.Errorf("command failed: %w", err) } + // Check context after execution + if err := ctx.Err(); err != nil { + return string(output), fmt.Errorf("context cancelled: %w", err) + } + return string(output), nil } @@ -238,6 +321,11 @@ func (c *client) ExecFatal(ctx context.Context, cmd string) string { // Upload uploads a local file to the remote host func (c *client) Upload(ctx context.Context, localPath, remotePath string) error { + // Check context before starting + if err := ctx.Err(); err != nil { + return fmt.Errorf("context error before upload: %w", err) + } + sftpClient, err := sftp.NewClient(c.sshClient) if err != nil { return fmt.Errorf("failed to create SFTP client: %w", err) @@ -256,7 +344,8 @@ func (c *client) Upload(ctx context.Context, localPath, remotePath string) error } defer remoteFile.Close() - _, err = io.Copy(remoteFile, localFile) + // Use context-aware copy + _, err = copyWithContext(ctx, remoteFile, localFile) if err != nil { return fmt.Errorf("failed to copy file: %w", err) } diff --git a/internal/infrastructure/ssh/interface.go b/internal/infrastructure/ssh/interface.go index a51d056..d2ba326 100644 --- a/internal/infrastructure/ssh/interface.go +++ b/internal/infrastructure/ssh/interface.go @@ -25,7 +25,7 @@ type SSHClient interface { // StartTunnel starts an SSH tunnel with port forwarding from local to remote // It returns a function to stop the tunnel and an error if the tunnel fails to start - StartTunnel(localPort, remotePort string) (stop func() error, err error) + StartTunnel(ctx context.Context, localPort, remotePort string) (stop func() error, err error) // Exec executes a command on the remote host Exec(ctx context.Context, cmd string) (string, error) diff --git a/internal/infrastructure/ssh/tunnel.go b/internal/infrastructure/ssh/tunnel.go index d8432e1..7b0449d 100644 --- a/internal/infrastructure/ssh/tunnel.go +++ b/internal/infrastructure/ssh/tunnel.go @@ -17,10 +17,11 @@ limitations under the License. package ssh import ( + "context" "fmt" - "io" netstd "net" "strconv" + "time" netpkg "github.com/deckhouse/storage-e2e/internal/infrastructure/net" "golang.org/x/crypto/ssh" @@ -28,7 +29,12 @@ import ( // StartTunnel starts an SSH tunnel with port forwarding from local to remote // It returns a function to stop the tunnel and an error if the tunnel fails to start -func StartTunnel(sshClient *ssh.Client, localPort, remotePort string) (func() error, error) { +func StartTunnel(ctx context.Context, sshClient *ssh.Client, localPort, remotePort string) (func() error, error) { + // Check context before starting + if err := ctx.Err(); err != nil { + return nil, fmt.Errorf("context error before starting tunnel: %w", err) + } + listener, err := netstd.Listen("tcp", "127.0.0.1:"+localPort) if err != nil { return nil, fmt.Errorf("failed to listen on local port %s: %w", localPort, err) @@ -39,18 +45,35 @@ func StartTunnel(sshClient *ssh.Client, localPort, remotePort string) (func() er go func() { defer listener.Close() for { - // Check if we should stop before accepting + // Check context and stop channel select { + case <-ctx.Done(): + return case <-stopChan: return default: } - // Set a deadline for Accept to allow periodic checking of stopChan + // Set deadline for Accept based on context deadline if available + if tcpListener, ok := listener.(*netstd.TCPListener); ok { + if deadline, ok := ctx.Deadline(); ok { + if err := tcpListener.SetDeadline(deadline); err != nil { + // If setting deadline fails, continue without it + } + } else { + // Set a short deadline to allow periodic context checking + if err := tcpListener.SetDeadline(time.Now().Add(100 * time.Millisecond)); err != nil { + // If setting deadline fails, continue without it + } + } + } + localConn, err := listener.Accept() if err != nil { // Listener closed or error occurred select { + case <-ctx.Done(): + return case <-stopChan: return default: @@ -68,19 +91,30 @@ func StartTunnel(sshClient *ssh.Client, localPort, remotePort string) (func() er } defer remoteConn.Close() - // Copy data bidirectionally + // Copy data bidirectionally with context support done := make(chan struct{}, 2) go func() { - io.Copy(localConn, remoteConn) + _, _ = copyWithContext(ctx, localConn, remoteConn) done <- struct{}{} }() go func() { - io.Copy(remoteConn, localConn) + _, _ = copyWithContext(ctx, remoteConn, localConn) done <- struct{}{} }() - // Wait for either direction to finish - <-done + // Wait for either direction to finish or context cancellation + select { + case <-ctx.Done(): + return + case <-done: + // One direction finished, wait for the other + select { + case <-ctx.Done(): + return + case <-done: + // Both directions finished + } + } }() } }() @@ -96,7 +130,7 @@ func StartTunnel(sshClient *ssh.Client, localPort, remotePort string) (func() er // EstablishSSHTunnel establishes an SSH tunnel with port forwarding from the master node to the same port of client, running the test // It finds a free local port starting from remotePort and creates the tunnel // Returns the tunnel info, local port and error if the tunnel fails to start -func EstablishSSHTunnel(sshClient SSHClient, remotePort string) (*TunnelInfo, error) { +func EstablishSSHTunnel(ctx context.Context, sshClient SSHClient, remotePort string) (*TunnelInfo, error) { // Find a free local port starting from remotePort remotePortInt := 1024 if parsed, err := strconv.Atoi(remotePort); err == nil { @@ -108,8 +142,8 @@ func EstablishSSHTunnel(sshClient SSHClient, remotePort string) (*TunnelInfo, er return nil, fmt.Errorf("failed to find free port: %w", err) } - // Start the SSH tunnel - stopFunc, err := sshClient.StartTunnel(strconv.Itoa(localPort), remotePort) + // Start the SSH tunnel with context + stopFunc, err := sshClient.StartTunnel(ctx, strconv.Itoa(localPort), remotePort) if err != nil { return nil, fmt.Errorf("failed to start SSH tunnel: %w", err) } diff --git a/tests/cluster_creation/cluster_creation_test.go b/tests/cluster_creation/cluster_creation_test.go index c5021f1..6160b50 100644 --- a/tests/cluster_creation/cluster_creation_test.go +++ b/tests/cluster_creation/cluster_creation_test.go @@ -106,7 +106,9 @@ var _ = Describe("Cluster Creation", Ordered, func() { It("should get kubeconfig from the base cluster", func() { By("Retrieving kubeconfig from base cluster", func() { GinkgoWriter.Printf(" ▶️ Fetching kubeconfig from %s\n", baseClusterMasterIP) - kubeconfig, kubeconfigPath, err = cluster.GetKubeconfig(baseClusterMasterIP, baseClusterUser, baseClusterSSHPrivateKey, sshclient) + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + kubeconfig, kubeconfigPath, err = cluster.GetKubeconfig(ctx, baseClusterMasterIP, baseClusterUser, baseClusterSSHPrivateKey, sshclient) Expect(err).NotTo(HaveOccurred()) GinkgoWriter.Printf(" ✅ Kubeconfig retrieved and saved to: %s\n", kubeconfigPath) }) @@ -117,7 +119,8 @@ var _ = Describe("Cluster Creation", Ordered, func() { It("should establish ssh tunnel to the base cluster with port forwarding", func() { By("Setting up SSH tunnel with port forwarding", func() { GinkgoWriter.Printf(" ▶️ Establishing SSH tunnel to %s, forwarding port 6445\n", baseClusterMasterIP) - tunnelinfo, err = ssh.EstablishSSHTunnel(sshclient, "6445") + ctx := context.Background() + tunnelinfo, err = ssh.EstablishSSHTunnel(ctx, sshclient, "6445") Expect(err).NotTo(HaveOccurred()) Expect(tunnelinfo).NotTo(BeNil()) Expect(tunnelinfo.LocalPort).To(BeNumerically(">=", 1024)) From 25416e2becd8a70bec7b9afc55cb702dc0f5817e Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Fri, 12 Dec 2025 23:35:24 +0300 Subject: [PATCH 10/48] Refactor cluster creation configuration: replace flag-based options with environment variable validation for CLUSTER_CREATE_MODE. Update README with usage instructions and remove obsolete flag handling code. --- README.md | 5 ++++ internal/config/env.go | 28 +++++++++++++++++ internal/config/flags.go | 30 ------------------- main.go | 24 +++++---------- .../cluster_creation_suite_test.go | 8 +++++ .../cluster_creation/cluster_creation_test.go | 2 +- 6 files changed, 50 insertions(+), 47 deletions(-) delete mode 100644 internal/config/flags.go diff --git a/README.md b/README.md index 98219ad..8514791 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,11 @@ # Passphrase of the private key used to connect to the base cluster export SSH_PASSPHRASE='passphrase' +# Cluster creation mode - must be set to either 'alwaysUseExisting' or 'alwaysCreateNew' +export CLUSTER_CREATE_MODE='alwaysUseExisting' # Use existing cluster +# OR +export CLUSTER_CREATE_MODE='alwaysCreateNew' # Create new cluster + # Used in case if the code cannot obtain kubeconfig from master itself because e.g. password is required in sudo export KUBE_CONFIG_PATH='/path/to/kubeconfig/file' ``` diff --git a/internal/config/env.go b/internal/config/env.go index 37c6767..034fd7e 100644 --- a/internal/config/env.go +++ b/internal/config/env.go @@ -3,9 +3,17 @@ package config import ( + "fmt" "os" ) +const ( + // ClusterCreateModeAlwaysUseExisting indicates to always use an existing cluster if available + ClusterCreateModeAlwaysUseExisting = "alwaysUseExisting" + // ClusterCreateModeAlwaysCreateNew indicates to always create a new cluster + ClusterCreateModeAlwaysCreateNew = "alwaysCreateNew" +) + var ( // ssh passphrase for ssh private key used to connect to base cluster SSHPassphrase = os.Getenv("SSH_PASSPHRASE") @@ -14,4 +22,24 @@ var ( // this path will be used as a fallback. If not set and SSH fails, the user will be notified to download // the kubeconfig manually and set this environment variable. KubeConfigPath = os.Getenv("KUBE_CONFIG_PATH") + + // ClusterCreateMode specifies the cluster creation mode. Must be set to either "alwaysUseExisting" or "alwaysCreateNew" + ClusterCreateMode = os.Getenv("CLUSTER_CREATE_MODE") ) + +// ValidateClusterCreateMode validates that CLUSTER_CREATE_MODE is set and has a valid value +func ValidateClusterCreateMode() error { + if ClusterCreateMode == "" { + return fmt.Errorf("CLUSTER_CREATE_MODE environment variable is required but not set. "+ + "Please set it to either '%s' or '%s'", + ClusterCreateModeAlwaysUseExisting, ClusterCreateModeAlwaysCreateNew) + } + + if ClusterCreateMode != ClusterCreateModeAlwaysUseExisting && ClusterCreateMode != ClusterCreateModeAlwaysCreateNew { + return fmt.Errorf("CLUSTER_CREATE_MODE has invalid value '%s'. "+ + "Must be either '%s' or '%s'", + ClusterCreateMode, ClusterCreateModeAlwaysUseExisting, ClusterCreateModeAlwaysCreateNew) + } + + return nil +} diff --git a/internal/config/flags.go b/internal/config/flags.go deleted file mode 100644 index 863506a..0000000 --- a/internal/config/flags.go +++ /dev/null @@ -1,30 +0,0 @@ -// Flags used by codebase - -package config - -import ( - "flag" -) - -var ( - // alwaysUseExisting indicates to always use an existing cluster if available - alwaysUseExisting = flag.Bool("always-use-existing", false, "Always use an existing cluster if available") - // alwaysCreateNew indicates to always create a new cluster - alwaysCreateNew = flag.Bool("always-create-new", false, "Always create a new cluster") -) - -// Just a dummy for flags to avoid compiler error -func init() { - _ = *alwaysUseExisting - _ = *alwaysCreateNew -} - -// AlwaysUseExisting returns the value of the alwaysUseExisting flag -func AlwaysUseExisting() bool { - return false -} - -// AlwaysCreateNew returns the value of the alwaysCreateNew flag -func AlwaysCreateNew() bool { - return true -} diff --git a/main.go b/main.go index d0cb124..648bbd9 100644 --- a/main.go +++ b/main.go @@ -16,21 +16,13 @@ limitations under the License. package main -import ( - "flag" - "fmt" - "os" - - "github.com/deckhouse/storage-e2e/internal/config" -) - func main() { - // config.ParseFlags() // TODO - investigate flag parsing with go test later. - - // Validate that at least one of the flags is set - if (!config.AlwaysUseExisting() && !config.AlwaysCreateNew()) || (config.AlwaysUseExisting() && config.AlwaysCreateNew()) { - fmt.Fprintf(os.Stderr, "Error: Either --always-use-existing or --always-create-new must be set, but not both\n\n") - flag.Usage() - os.Exit(1) - } + // // Validate that CLUSTER_CREATE_MODE is set and has a valid value + // if err := config.ValidateClusterCreateMode(); err != nil { + // fmt.Fprintf(os.Stderr, "Error: %v\n\n", err) + // fmt.Fprintf(os.Stderr, "Please set CLUSTER_CREATE_MODE environment variable:\n") + // fmt.Fprintf(os.Stderr, " export CLUSTER_CREATE_MODE='%s' # Use existing cluster\n", config.ClusterCreateModeAlwaysUseExisting) + // fmt.Fprintf(os.Stderr, " export CLUSTER_CREATE_MODE='%s' # Create new cluster\n", config.ClusterCreateModeAlwaysCreateNew) + // os.Exit(1) + // } } diff --git a/tests/cluster_creation/cluster_creation_suite_test.go b/tests/cluster_creation/cluster_creation_suite_test.go index 4b6037b..f730b1e 100644 --- a/tests/cluster_creation/cluster_creation_suite_test.go +++ b/tests/cluster_creation/cluster_creation_suite_test.go @@ -21,8 +21,16 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + + "github.com/deckhouse/storage-e2e/internal/config" ) +var _ = BeforeSuite(func() { + // Validate that CLUSTER_CREATE_MODE is set and has a valid value + err := config.ValidateClusterCreateMode() + Expect(err).NotTo(HaveOccurred(), "CLUSTER_CREATE_MODE environment variable must be set to either 'alwaysUseExisting' or 'alwaysCreateNew'") +}) + func TestIntegration(t *testing.T) { RegisterFailHandler(Fail) // Configure Ginkgo to show verbose output diff --git a/tests/cluster_creation/cluster_creation_test.go b/tests/cluster_creation/cluster_creation_test.go index 6160b50..2fcfd09 100644 --- a/tests/cluster_creation/cluster_creation_test.go +++ b/tests/cluster_creation/cluster_creation_test.go @@ -32,7 +32,7 @@ import ( "github.com/deckhouse/storage-e2e/internal/kubernetes/deckhouse" ) -var _ = Describe("Cluster Creation", Ordered, func() { +var _ = Describe("Cluster Creation Test", Ordered, func() { var ( yamlConfigFilename string = "cluster_creation_test.yml" baseClusterMasterIP string = "172.17.1.67" From 5d8b77ac1649ddb9b643a17522d241fa6cf2e658 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Sat, 13 Dec 2025 00:14:53 +0300 Subject: [PATCH 11/48] Remove unused FindFreePort function and update EstablishSSHTunnel to use the specified remote port directly. Adjust cluster creation tests to validate the local port is exactly 6445. --- internal/infrastructure/net/port.go | 35 ------------------- internal/infrastructure/ssh/tunnel.go | 22 +++++------- .../cluster_creation/cluster_creation_test.go | 15 ++------ 3 files changed, 11 insertions(+), 61 deletions(-) delete mode 100644 internal/infrastructure/net/port.go diff --git a/internal/infrastructure/net/port.go b/internal/infrastructure/net/port.go deleted file mode 100644 index 073dd6a..0000000 --- a/internal/infrastructure/net/port.go +++ /dev/null @@ -1,35 +0,0 @@ -/* -Copyright 2025 Flant JSC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package net - -import ( - "fmt" - "net" -) - -// FindFreePort finds a free port starting from startPort and incrementing until a free port is found -func FindFreePort(startPort int) (int, error) { - for port := startPort; port < startPort+100; port++ { - addr, err := net.Listen("tcp", fmt.Sprintf("127.0.0.1:%d", port)) - if err == nil { - addr.Close() - return port, nil - } - } - return 0, fmt.Errorf("could not find a free port starting from %d", startPort) -} - diff --git a/internal/infrastructure/ssh/tunnel.go b/internal/infrastructure/ssh/tunnel.go index 7b0449d..c556a47 100644 --- a/internal/infrastructure/ssh/tunnel.go +++ b/internal/infrastructure/ssh/tunnel.go @@ -23,7 +23,6 @@ import ( "strconv" "time" - netpkg "github.com/deckhouse/storage-e2e/internal/infrastructure/net" "golang.org/x/crypto/ssh" ) @@ -127,29 +126,24 @@ func StartTunnel(ctx context.Context, sshClient *ssh.Client, localPort, remotePo return stop, nil } -// EstablishSSHTunnel establishes an SSH tunnel with port forwarding from the master node to the same port of client, running the test -// It finds a free local port starting from remotePort and creates the tunnel +// EstablishSSHTunnel establishes an SSH tunnel with port forwarding from the master node to the same port on the client +// It uses the exact port specified in remotePort and fails immediately if the port is busy // Returns the tunnel info, local port and error if the tunnel fails to start func EstablishSSHTunnel(ctx context.Context, sshClient SSHClient, remotePort string) (*TunnelInfo, error) { - // Find a free local port starting from remotePort - remotePortInt := 1024 - if parsed, err := strconv.Atoi(remotePort); err == nil { - remotePortInt = parsed - } - - localPort, err := netpkg.FindFreePort(remotePortInt) + // Parse remote port to integer + remotePortInt, err := strconv.Atoi(remotePort) if err != nil { - return nil, fmt.Errorf("failed to find free port: %w", err) + return nil, fmt.Errorf("invalid remote port %s: %w", remotePort, err) } // Start the SSH tunnel with context - stopFunc, err := sshClient.StartTunnel(ctx, strconv.Itoa(localPort), remotePort) + stopFunc, err := sshClient.StartTunnel(ctx, remotePort, remotePort) if err != nil { - return nil, fmt.Errorf("failed to start SSH tunnel: %w", err) + return nil, fmt.Errorf("failed to start SSH tunnel on port %d (port may be busy): %w", remotePortInt, err) } tunnelInfo := &TunnelInfo{ - LocalPort: localPort, + LocalPort: remotePortInt, RemotePort: remotePortInt, StopFunc: stopFunc, } diff --git a/tests/cluster_creation/cluster_creation_test.go b/tests/cluster_creation/cluster_creation_test.go index 2fcfd09..94a5fa0 100644 --- a/tests/cluster_creation/cluster_creation_test.go +++ b/tests/cluster_creation/cluster_creation_test.go @@ -35,8 +35,8 @@ import ( var _ = Describe("Cluster Creation Test", Ordered, func() { var ( yamlConfigFilename string = "cluster_creation_test.yml" - baseClusterMasterIP string = "172.17.1.67" - baseClusterUser string = "tfadm" + baseClusterMasterIP string = "10.0.0.181" + baseClusterUser string = "w-ansible" baseClusterSSHPrivateKey string = "~/.ssh/id_rsa" err error @@ -123,18 +123,9 @@ var _ = Describe("Cluster Creation Test", Ordered, func() { tunnelinfo, err = ssh.EstablishSSHTunnel(ctx, sshclient, "6445") Expect(err).NotTo(HaveOccurred()) Expect(tunnelinfo).NotTo(BeNil()) - Expect(tunnelinfo.LocalPort).To(BeNumerically(">=", 1024)) + Expect(tunnelinfo.LocalPort).To(Equal(6445), "Local port should be exactly 6445") GinkgoWriter.Printf(" ✅ SSH tunnel established on local port: %d\n", tunnelinfo.LocalPort) - // Update kubeconfig if port differs from 6445 - if tunnelinfo.LocalPort != 6445 { - By(fmt.Sprintf("Updating kubeconfig to use local port %d instead of 6445", tunnelinfo.LocalPort), func() { - GinkgoWriter.Printf(" ▶️ Updating kubeconfig port from 6445 to %d\n", tunnelinfo.LocalPort) - err = cluster.UpdateKubeconfigPort(kubeconfigPath, tunnelinfo.LocalPort) - Expect(err).NotTo(HaveOccurred()) - GinkgoWriter.Printf(" ✅ Kubeconfig updated successfully\n") - }) - } }) }) From ce8b988e8e92efc1cf35696a5137ee032d7387fe Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Sun, 14 Dec 2025 21:59:11 +0300 Subject: [PATCH 12/48] Update README and add RedOS images configuration; created new cluster creation tests --- README.md | 2 +- internal/README.md | 31 ++ internal/config/images.go | 296 ++++++++++++++++++ pkg/cluster/cluster.go | 122 ++++++++ .../cluster_config.yml} | 0 .../cluster_creation_suite_test.go | 0 .../cluster_creation_test.go | 4 +- tests/cluster-creation/cluster_config.yml | 69 ++++ .../cluster_creation_suite_test.go | 41 +++ .../cluster-creation/cluster_creation_test.go | 81 +++++ 10 files changed, 643 insertions(+), 3 deletions(-) create mode 100644 internal/README.md create mode 100644 pkg/cluster/cluster.go rename tests/{cluster_creation/cluster_creation_test.yml => cluster-creation-by-steps/cluster_config.yml} (100%) rename tests/{cluster_creation => cluster-creation-by-steps}/cluster_creation_suite_test.go (100%) rename tests/{cluster_creation => cluster-creation-by-steps}/cluster_creation_test.go (97%) create mode 100644 tests/cluster-creation/cluster_config.yml create mode 100644 tests/cluster-creation/cluster_creation_suite_test.go create mode 100644 tests/cluster-creation/cluster_creation_test.go diff --git a/README.md b/README.md index 8514791..130d32b 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ export KUBE_CONFIG_PATH='/path/to/kubeconfig/file' #### Running a test example ```bash -go test -v ./tests/cluster_creation -count=1 +go test -v ./tests/cluster-creation-by-steps -count=1 # count=1 prevents go test from using cached test results ``` diff --git a/internal/README.md b/internal/README.md new file mode 100644 index 0000000..6e5a37d --- /dev/null +++ b/internal/README.md @@ -0,0 +1,31 @@ +# Internal Package + +Low-level code that directly interacts with system components (Kubernetes, SSH, infrastructure). + +## ✅ What Belongs Here + +Direct, atomic operations on components: + +```go +// ✅ CORRECT: Single, direct operations +GetModule(ctx, config, name) +UpdateModule(ctx, config, module) +GetModuleConfig(ctx, config, name) +``` + +## ❌ What Does NOT Belong Here + +Business logic that orchestrates multiple operations: + +```go +// ❌ INCORRECT: Combines multiple operations with logic +EnsureModuleEnabled(ctx, config, name) // Checks + enables +CheckSnapshotControllerReady(ctx, config) // Polls until ready +UpdateVirtualization(ctx, config, settings) // Validates + updates +``` + +Place these in higher-level packages `pkg/deckhouse/`, `pkg/kubernetes`, `pkg/testkit` etc. + +## Rule of Thumb + +**One function = one direct operation.** If it does multiple things or contains business logic, it doesn't belong here. diff --git a/internal/config/images.go b/internal/config/images.go index 9b849e6..ef8c6f1 100644 --- a/internal/config/images.go +++ b/internal/config/images.go @@ -26,5 +26,301 @@ var OSTypeMap = map[string]OSType{ ImageURL: "https://cloud-images.ubuntu.com/noble/current/noble-server-cloudimg-amd64.img", KernelVersion: "6.8.0-53-generic", }, + "RedOS 8.0 6.6.26-1.red80.x86_64": { + ImageURL: "https://89d64382-20df-4581-8cc7-80df331f67fa.selstorage.ru/redos/redos-8-1.x86_64.qcow2", + KernelVersion: "6.6.26-1.red80.x86_64", + }, + "RedOS 7.3.6 5.15.78-2.el7.3.x86_64": { + ImageURL: "https://89d64382-20df-4581-8cc7-80df331f67fa.selstorage.ru/redos/RO732_MIN-STD.qcow2", + KernelVersion: "5.15.78-2.el7.3.x86_64", + }, } +/* +#!/bin/bash + +# ============================================================================ +# Configuration Parameters +# ============================================================================ + +# Amount of VMs to create +VM_COUNT=1 + +# Starting index for VM numbering (e.g., 1 for vm-01, vm-02, etc. or 5 for vm-05, vm-06, etc.) +START_INDEX=4 + +# Namespace +NAMESPACE="ya" + +# Cloud init image URL + +# RedOS 8: +CLOUD_INIT_IMAGE_URL="https://89d64382-20df-4581-8cc7-80df331f67fa.selstorage.ru/redos/redos-8-1.x86_64.qcow2" +VMPREF="red8" + +# RedOS 7: +#CLOUD_INIT_IMAGE_URL="https://89d64382-20df-4581-8cc7-80df331f67fa.selstorage.ru/redos/RO732_MIN-STD.qcow2" +#VMPREF="red7" + +# Ubuntu Server 22.04 +#CLOUD_INIT_IMAGE_URL="https://cloud-images.ubuntu.com/jammy/current/jammy-server-cloudimg-amd64.img" +#VMPREF="ub22" + +# Ubuntu 2404 server +#CLOUD_INIT_IMAGE_URL="https://cloud-images.ubuntu.com/noble/current/noble-server-cloudimg-amd64.img" +#VMPREF="ub24" + +# VM name prefix (VM names will be: {PREFIX}{INDEX}, e.g., "vm-01", "vm-02", etc.) +VM_NAME_PREFIX="vm-$VMPREF-$NAMESPACE-" + +# Storage class +STORAGE_CLASS="nfs-storage-class" + +# CPU configuration +CPU_CORES=4 +CPU_CORE_FRACTION="10%" + +# RAM +MEMORY_SIZE="8Gi" + +# Disk size +DISK_SIZE="60G" + +# VirtualMachineClass name (shared across all VMs) +VM_CLASS_NAME="generic2" + +# ============================================================================ +# Script Logic +# ============================================================================ + +set -euo pipefail + +# Function to format VM number with leading zeros +format_vm_number() { + local num=$1 + printf "%02d" "$num" +} + +# Function to generate VM name from prefix and index +generate_vm_name() { + local index=$1 + local formatted_index=$(format_vm_number "$index") + echo "${VM_NAME_PREFIX}${formatted_index}" +} + +# Function to generate manifests for a single VM +generate_vm_manifests() { + local vm_index=$1 + local vm_name=$(generate_vm_name "$vm_index") + local formatted_index=$(format_vm_number "$vm_index") + # Extract base name from prefix (remove trailing dash if present) + local base_name="${VM_NAME_PREFIX%-}" + local secret_name="${base_name}-cloud-init-${formatted_index}" + local vd_name="${base_name}-vd-root-${formatted_index}" + local vi_name="$base_name" + + +cat </dev/null; then + echo "Creating namespace '${NAMESPACE}'..." + kubectl create namespace "${NAMESPACE}" + echo "" + else + echo "Namespace '${NAMESPACE}' already exists." + echo "" + fi + + # Create VirtualMachineClass if it doesn't exist + echo "Creating/updating VirtualMachineClass '${VM_CLASS_NAME}'..." + create_vm_class + echo "" + + # Create VMs + local end_index=$((START_INDEX + VM_COUNT - 1)) + for ((i=START_INDEX; i<=end_index; i++)); do + local vm_name=$(generate_vm_name "$i") + local vm_number=$((i - START_INDEX + 1)) + + echo "Creating VM ${vm_number}/${VM_COUNT}: ${vm_name}..." + + # Generate and apply manifests + generate_vm_manifests "$i" | kubectl apply -f - + + echo " Created VirtualMachine: ${vm_name}" + echo "" + done + + echo "==========================================" + echo "Deployment completed successfully!" + echo "==========================================" +} + +# Run main function +main +GiNVxVMDAwMUYMjVcVTAwMDFGNTIIgo +*/ diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go new file mode 100644 index 0000000..09680c1 --- /dev/null +++ b/pkg/cluster/cluster.go @@ -0,0 +1,122 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cluster + +import ( + "context" + "fmt" + "time" + + "k8s.io/client-go/rest" + + internalcluster "github.com/deckhouse/storage-e2e/internal/cluster" + "github.com/deckhouse/storage-e2e/internal/config" + "github.com/deckhouse/storage-e2e/internal/infrastructure/ssh" +) + +// TestClusterResources holds all resources created for a test cluster connection +type TestClusterResources struct { + SSHClient ssh.SSHClient + Kubeconfig *rest.Config + KubeconfigPath string + TunnelInfo *ssh.TunnelInfo + ClusterDefinition *config.ClusterDefinition +} + +// CreateTestCluster establishes a connection to a test cluster by: +// 1. Loading cluster configuration from YAML +// 2. Establishing SSH connection to the base cluster +// 3. Retrieving kubeconfig from the base cluster +// 4. Establishing SSH tunnel with port forwarding +// +// It returns all the resources needed to interact with the cluster. +func CreateTestCluster( + ctx context.Context, + yamlConfigFilename string, + baseClusterMasterIP string, + baseClusterUser string, + baseClusterSSHPrivateKey string, +) (*TestClusterResources, error) { + // Stage 1: Load cluster configuration from YAML + clusterDefinition, err := internalcluster.LoadClusterConfig(yamlConfigFilename) + if err != nil { + return nil, fmt.Errorf("failed to load cluster configuration: %w", err) + } + + // Stage 2: Establish SSH connection to base cluster + sshClient, err := ssh.NewClient(baseClusterUser, baseClusterMasterIP, baseClusterSSHPrivateKey) + if err != nil { + return nil, fmt.Errorf("failed to create SSH client: %w", err) + } + + // Stage 3: Get kubeconfig from base cluster + // Use a timeout context for kubeconfig retrieval + kubeconfigCtx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + + kubeconfig, kubeconfigPath, err := internalcluster.GetKubeconfig( + kubeconfigCtx, + baseClusterMasterIP, + baseClusterUser, + baseClusterSSHPrivateKey, + sshClient, + ) + if err != nil { + sshClient.Close() + return nil, fmt.Errorf("failed to get kubeconfig: %w", err) + } + + // Stage 4: Establish SSH tunnel with port forwarding + tunnelInfo, err := ssh.EstablishSSHTunnel(ctx, sshClient, "6445") + if err != nil { + sshClient.Close() + return nil, fmt.Errorf("failed to establish SSH tunnel: %w", err) + } + + return &TestClusterResources{ + SSHClient: sshClient, + Kubeconfig: kubeconfig, + KubeconfigPath: kubeconfigPath, + TunnelInfo: tunnelInfo, + ClusterDefinition: clusterDefinition, + }, nil +} + +// CleanupTestCluster cleans up all resources created by CreateTestCluster +func CleanupTestCluster(resources *TestClusterResources) error { + var errs []error + + // Stop SSH tunnel first (must be done before closing SSH client) + if resources.TunnelInfo != nil && resources.TunnelInfo.StopFunc != nil { + if err := resources.TunnelInfo.StopFunc(); err != nil { + errs = append(errs, fmt.Errorf("failed to stop SSH tunnel: %w", err)) + } + } + + // Close SSH client connection + if resources.SSHClient != nil { + if err := resources.SSHClient.Close(); err != nil { + errs = append(errs, fmt.Errorf("failed to close SSH client: %w", err)) + } + } + + if len(errs) > 0 { + return fmt.Errorf("cleanup errors: %v", errs) + } + + return nil +} diff --git a/tests/cluster_creation/cluster_creation_test.yml b/tests/cluster-creation-by-steps/cluster_config.yml similarity index 100% rename from tests/cluster_creation/cluster_creation_test.yml rename to tests/cluster-creation-by-steps/cluster_config.yml diff --git a/tests/cluster_creation/cluster_creation_suite_test.go b/tests/cluster-creation-by-steps/cluster_creation_suite_test.go similarity index 100% rename from tests/cluster_creation/cluster_creation_suite_test.go rename to tests/cluster-creation-by-steps/cluster_creation_suite_test.go diff --git a/tests/cluster_creation/cluster_creation_test.go b/tests/cluster-creation-by-steps/cluster_creation_test.go similarity index 97% rename from tests/cluster_creation/cluster_creation_test.go rename to tests/cluster-creation-by-steps/cluster_creation_test.go index 94a5fa0..91f347d 100644 --- a/tests/cluster_creation/cluster_creation_test.go +++ b/tests/cluster-creation-by-steps/cluster_creation_test.go @@ -32,9 +32,9 @@ import ( "github.com/deckhouse/storage-e2e/internal/kubernetes/deckhouse" ) -var _ = Describe("Cluster Creation Test", Ordered, func() { +var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { var ( - yamlConfigFilename string = "cluster_creation_test.yml" + yamlConfigFilename string = "cluster_config.yml" baseClusterMasterIP string = "10.0.0.181" baseClusterUser string = "w-ansible" baseClusterSSHPrivateKey string = "~/.ssh/id_rsa" diff --git a/tests/cluster-creation/cluster_config.yml b/tests/cluster-creation/cluster_config.yml new file mode 100644 index 0000000..711e9a3 --- /dev/null +++ b/tests/cluster-creation/cluster_config.yml @@ -0,0 +1,69 @@ +# Test nested cluster config definition +clusterDefinition: + masters: # Master nodes configuration + - hostname: "master-1" + hostType: "vm" + role: "master" + osType: "Ubuntu 22.04 6.2.0-39-generic" + auth: + method: "ssh-key" + user: "user" + sshKey: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8WyGvnBNQp+v6CUweF1QYCRtR7Do/IA8IA2uMd2HuBsddFrc5xYon2ZtEvypZC4Vm1CzgcgUm9UkHgxytKEB4zOOWkmqFP62OSLNyuWMaFEW1fb0EDenup6B5SrjnA8ckm4Hf2NSLvwW9yS98TfN3nqPOPJKfQsN+OTiCerTtNyXjca//ppuGKsQd99jG7SqE9aDQ3sYCXatM53SXqhxS2nTew82bmzVmKXDxcIzVrS9f+2WmXIdY2cKo2I352yKWOIp1Nk0uji8ozLPHFQGvbAG8DGG1KNVcBl2qYUcttmCpN+iXEcGqyn/atUVJJMnZXGtp0fiL1rMLqAd/bb6TFNzZFSsS+zqGesxqLePe32vLCQ3xursP3BRZkrScM+JzIqevfP63INHJEZfYlUf4Ic+gfliS2yA1LwhU7hD4LSVXMQynlF9WeGjuv6ZYxmO8hC6IWCqWnIUqKUiGtvBSPXwsZo7wgljBr4ykJgBzS9MjZ0fzz1JKe80tH6clpjIOn6ReBPwQBq2zmDDrpa5GVqqqjXhRQuA0AfpHdhs5UKxs1PBr7/PTLA7PI39xkOAE/Zj1TYQ2dmqvpskshi7AtBStjinQBAlLXysLSHBtO+3+PLAYcMZMVfb0bVqfGGludO2prvXrrWWTku0eOsA5IRahrRdGhv5zhKgFV7cwUQ== ayakubov@MacBook-Pro-Alexey.local" + cpu: 4 + ram: 8 + diskSize: 30 + workers: # Worker nodes configuration + - hostname: "worker-1" + hostType: "vm" + role: "worker" + osType: "Ubuntu 22.04 6.2.0-39-generic" + auth: + method: "ssh-key" + user: "user" + sshKey: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8WyGvnBNQp+v6CUweF1QYCRtR7Do/IA8IA2uMd2HuBsddFrc5xYon2ZtEvypZC4Vm1CzgcgUm9UkHgxytKEB4zOOWkmqFP62OSLNyuWMaFEW1fb0EDenup6B5SrjnA8ckm4Hf2NSLvwW9yS98TfN3nqPOPJKfQsN+OTiCerTtNyXjca//ppuGKsQd99jG7SqE9aDQ3sYCXatM53SXqhxS2nTew82bmzVmKXDxcIzVrS9f+2WmXIdY2cKo2I352yKWOIp1Nk0uji8ozLPHFQGvbAG8DGG1KNVcBl2qYUcttmCpN+iXEcGqyn/atUVJJMnZXGtp0fiL1rMLqAd/bb6TFNzZFSsS+zqGesxqLePe32vLCQ3xursP3BRZkrScM+JzIqevfP63INHJEZfYlUf4Ic+gfliS2yA1LwhU7hD4LSVXMQynlF9WeGjuv6ZYxmO8hC6IWCqWnIUqKUiGtvBSPXwsZo7wgljBr4ykJgBzS9MjZ0fzz1JKe80tH6clpjIOn6ReBPwQBq2zmDDrpa5GVqqqjXhRQuA0AfpHdhs5UKxs1PBr7/PTLA7PI39xkOAE/Zj1TYQ2dmqvpskshi7AtBStjinQBAlLXysLSHBtO+3+PLAYcMZMVfb0bVqfGGludO2prvXrrWWTku0eOsA5IRahrRdGhv5zhKgFV7cwUQ== ayakubov@MacBook-Pro-Alexey.local" + cpu: 2 + ram: 6 + diskSize: 30 + - hostname: "worker-2" + hostType: "vm" + role: "worker" + osType: "Ubuntu 22.04 6.2.0-39-generic" + auth: + method: "ssh-key" + user: "user" + sshKey: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8WyGvnBNQp+v6CUweF1QYCRtR7Do/IA8IA2uMd2HuBsddFrc5xYon2ZtEvypZC4Vm1CzgcgUm9UkHgxytKEB4zOOWkmqFP62OSLNyuWMaFEW1fb0EDenup6B5SrjnA8ckm4Hf2NSLvwW9yS98TfN3nqPOPJKfQsN+OTiCerTtNyXjca//ppuGKsQd99jG7SqE9aDQ3sYCXatM53SXqhxS2nTew82bmzVmKXDxcIzVrS9f+2WmXIdY2cKo2I352yKWOIp1Nk0uji8ozLPHFQGvbAG8DGG1KNVcBl2qYUcttmCpN+iXEcGqyn/atUVJJMnZXGtp0fiL1rMLqAd/bb6TFNzZFSsS+zqGesxqLePe32vLCQ3xursP3BRZkrScM+JzIqevfP63INHJEZfYlUf4Ic+gfliS2yA1LwhU7hD4LSVXMQynlF9WeGjuv6ZYxmO8hC6IWCqWnIUqKUiGtvBSPXwsZo7wgljBr4ykJgBzS9MjZ0fzz1JKe80tH6clpjIOn6ReBPwQBq2zmDDrpa5GVqqqjXhRQuA0AfpHdhs5UKxs1PBr7/PTLA7PI39xkOAE/Zj1TYQ2dmqvpskshi7AtBStjinQBAlLXysLSHBtO+3+PLAYcMZMVfb0bVqfGGludO2prvXrrWWTku0eOsA5IRahrRdGhv5zhKgFV7cwUQ== ayakubov@MacBook-Pro-Alexey.local" + cpu: 2 + ram: 6 + diskSize: 30 + # DKP parameters + dkpParameters: + kubernetesVersion: "Automatic" + podSubnetCIDR: "10.112.0.0/16" + serviceSubnetCIDR: "10.225.0.0/16" + clusterDomain: "cluster.local" + licenseKey: "" + registryRepo: "dev-registry.deckhouse.io/sys/deckhouse-oss" + namespace: "e2e-nested-1" + storageClass: "nfs-storage-class" + modules: + - name: "snapshot-controller" + version: 1 + enabled: true + dependencies: [] + - name: "sds-local-volume" + version: 1 + enabled: true + dependencies: + - "snapshot-controller" + - name: "sds-node-configurator" + version: 1 + enabled: true + settings: + enableThinProvisioning: true + dependencies: + - "sds-local-volume" + - name: "sds-replicated-volume" + version: 1 + enabled: true + dependencies: + - "sds-node-configurator" diff --git a/tests/cluster-creation/cluster_creation_suite_test.go b/tests/cluster-creation/cluster_creation_suite_test.go new file mode 100644 index 0000000..3d23e4b --- /dev/null +++ b/tests/cluster-creation/cluster_creation_suite_test.go @@ -0,0 +1,41 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/deckhouse/storage-e2e/internal/config" // global config package +) + +var _ = BeforeSuite(func() { + // Validate that CLUSTER_CREATE_MODE is set and has a valid value + err := config.ValidateClusterCreateMode() + Expect(err).NotTo(HaveOccurred(), "CLUSTER_CREATE_MODE environment variable must be set to either 'alwaysUseExisting' or 'alwaysCreateNew'") +}) + +func TestIntegration(t *testing.T) { + RegisterFailHandler(Fail) + // Configure Ginkgo to show verbose output + suiteConfig, reporterConfig := GinkgoConfiguration() + reporterConfig.Verbose = true + reporterConfig.ShowNodeEvents = false + RunSpecs(t, "Integration Test Suite", suiteConfig, reporterConfig) +} diff --git a/tests/cluster-creation/cluster_creation_test.go b/tests/cluster-creation/cluster_creation_test.go new file mode 100644 index 0000000..d857a51 --- /dev/null +++ b/tests/cluster-creation/cluster_creation_test.go @@ -0,0 +1,81 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/deckhouse/storage-e2e/pkg/cluster" +) + +var _ = Describe("Cluster Creation Test", Ordered, func() { + var ( + yamlConfigFilename string = "cluster_config.yml" + baseClusterMasterIP string = "10.0.0.181" + baseClusterUser string = "w-ansible" + baseClusterSSHPrivateKey string = "~/.ssh/id_rsa" + + testClusterResources *cluster.TestClusterResources + ctx context.Context = context.Background() + ) + + BeforeAll(func() { + // DeferCleanup: Clean up all resources in reverse order of creation - analog of AfterAll() in Ginkgo + DeferCleanup(func() { + if testClusterResources != nil { + By("Cleaning up test cluster resources", func() { + GinkgoWriter.Printf(" ▶️ Cleaning up test cluster resources\n") + err := cluster.CleanupTestCluster(testClusterResources) + Expect(err).NotTo(HaveOccurred(), "CleanupTestCluster should succeed") + GinkgoWriter.Printf(" ✅ Test cluster resources cleaned up successfully\n") + }) + } + }) + }) + + It("should successfully create test cluster", func() { + By("Creating test cluster connection", func() { + GinkgoWriter.Printf(" ▶️ Creating test cluster connection\n") + var err error + testClusterResources, err = cluster.CreateTestCluster( + ctx, + yamlConfigFilename, + baseClusterMasterIP, + baseClusterUser, + baseClusterSSHPrivateKey, + ) + Expect(err).NotTo(HaveOccurred(), "CreateTestCluster should succeed") + Expect(testClusterResources).NotTo(BeNil(), "TestClusterResources should not be nil") + GinkgoWriter.Printf(" ✅ Test cluster connection created successfully\n") + }) + }) + + It("should get all test cluster resources", func() { + Expect(testClusterResources).NotTo(BeNil()) + Expect(testClusterResources.SSHClient).NotTo(BeNil(), "SSH client should be created") + Expect(testClusterResources.Kubeconfig).NotTo(BeNil(), "Kubeconfig should be created") + Expect(testClusterResources.KubeconfigPath).NotTo(BeEmpty(), "Kubeconfig path should be set") + Expect(testClusterResources.TunnelInfo).NotTo(BeNil(), "Tunnel info should be created") + Expect(testClusterResources.TunnelInfo.LocalPort).To(Equal(6445), "Local port should be exactly 6445") + Expect(testClusterResources.ClusterDefinition).NotTo(BeNil(), "Cluster definition should be loaded") + GinkgoWriter.Printf(" ✅ All test cluster resources verified successfully\n") + }) + +}) // Describe: Cluster Creation From ff1a9a6a415572ecf1a5f3d2af4cab31d7f317ec Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Sun, 14 Dec 2025 22:12:34 +0300 Subject: [PATCH 13/48] Add AutoGenerateVMNames environment variable and update cloud-config template in images.go; refine cluster_config.yml comments --- internal/config/env.go | 4 ++ internal/config/images.go | 46 +++++++++++++++++++ .../cluster_config.yml | 3 +- tests/cluster-creation/cluster_config.yml | 3 +- 4 files changed, 54 insertions(+), 2 deletions(-) diff --git a/internal/config/env.go b/internal/config/env.go index 034fd7e..0e19ba8 100644 --- a/internal/config/env.go +++ b/internal/config/env.go @@ -25,6 +25,10 @@ var ( // ClusterCreateMode specifies the cluster creation mode. Must be set to either "alwaysUseExisting" or "alwaysCreateNew" ClusterCreateMode = os.Getenv("CLUSTER_CREATE_MODE") + + // AutoGenerateVMNames specifies whether to auto-generate VM names or use provided in config. + // Default is "false". If set to "true", the VM names suffix in kubernetes style will be added to VM names set in cluster config. + AutoGenerateVMNames = os.Getenv("AUTO_GENERATE_VM_NAMES") // TODO implement this in cluster.LoadClusterConfig function. ) // ValidateClusterCreateMode validates that CLUSTER_CREATE_MODE is set and has a valid value diff --git a/internal/config/images.go b/internal/config/images.go index ef8c6f1..5ea7b1c 100644 --- a/internal/config/images.go +++ b/internal/config/images.go @@ -36,6 +36,52 @@ var OSTypeMap = map[string]OSType{ }, } +/* +#cloud-config +package_update: true +packages: + - tmux + - htop + - qemu-guest-agent + - iputils-ping + - stress-ng + - jq + - yq + - rsync + - fio + - curl + +ssh_pwauth: true +users: + - name: cloud + # passwd: cloud + passwd: $6$rounds=4096$vln/.aPHBOI7BMYR$bBMkqQvuGs5Gyd/1H5DP4m9HjQSy.kgrxpaGEHwkX7KEFV8BS.HZWPitAtZ2Vd8ZqIZRqmlykRCagTgPejt1i. + shell: /bin/bash + sudo: ALL=(ALL) NOPASSWD:ALL + chpasswd: {expire: False} + lock_passwd: false + ssh_authorized_keys: + - ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDJ4lrUhqV/ymWyK7rWtx7ulyrUWQqZejmn2pR6/2mxTl+TPUQEYEZKLjt9xtgvOYsfHARRWsoF7URNZdg6LI/HuxMK6kz5ohwrP6GB4XngL7vfyZdefiV6OVK+Fsdw6WgH7Cr5myIc8Sv6gumcDYfT9xX0pcGipRZD9qaHkm34U9jhT6U1QRIgG0Po31HAA6JmKEFZ/0S715McYKTTx3aIFzrm5kxCmNCtk19oMZDOCdYhScVGcZKeaP/PLF7fpvajaWLySwKFfRj1HYnaX1rgmpINNpiWXsq+7D53a7/LUpTIvERYD31fh8YW72hilS8rWbymILZhQFRlTtma0kVY7T5qsvvBmP2da4T5Jn+DqZPI0Ey24eiVO7G8uk0gjZOW8YF5t0OJuVL/0lCBQo3RkIBjg9aR60zaJypVlXRZmYwm4attEjSFOU+4Hymu79NdeJNQhTCAxnCF5NC7OZ7ETtGzEt2L8s2t5w2jRiaDyDzKHeWAgXx7DLYdfqRIO+ETJj5Vmzl/c+R9t0UXNQpTuZjiutukTwVGe+ho/74HuXClUrs6qPkR125KjEHcME+EXuHEkwaCgDGJsCfiecjwFv30E//iPk0weJ3K3wFTyqf2vFixcMDgbkwOjgGqZ005blCfuN+FJ1NbqNLe3YhBymOpLFkB0/DhImqfF4kS6w== korolevn@nikita.korolev-macbook + - ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC5U6cSKbJOlgfNa7nhMT/J4PNAXmbfHH6/CA9p53qSmwAgV+shkbS7uEj9W4RxM6Q4dKJeRMLyzHj+76y98OBQ9Sb1PJ0fCDLabZR/NeCWy9m/7Lq2Ti3QMOoBkJurL4+T26edDQfhkivywaSOfrcxDiponDEwcqTQRs2rXQSC0tW0lvQrbYUFOjJZ425OOqUm7KUxuOoNeynLTlS4OerVk0fjTa5EtBuDCbpob47NMYtR+JP4PWOw4H9qyli6kegrW3UqamHpFQAAN+UN0x+KSraHfrF/78HO5IET1BpdHOzP5TAqRNVJySxzVOEl2Nau7cEJiHtqHeaP6/mwO4E699BXNtxWatXxT5dSNxdTwhH7FlpA176h04h5sAooIu3zcA3ItzC78wIrdq7ussDqEQfFcneCIqlMpBI6V/lh+e12uvuj3+PRe6Fekr0DC2QR3+rJsueW7huWHlEXEwrilXy3eVYWIRX+Dihrxd/7KLmnLpBW0hwaWZxTdQflwo7AmJgGD2CAWcKXY1vB6BFsO6Q2MOPlMn+Kejq0YFqguMkIiEMCKKbIkBMeyUdnn03ETaQjAJaANJTcO+RcfP0RVV1C116ePdFu6FumVvSK22pU78p4eIyH1WPpFq4akl2ZFpHVgPQVeO0Yz2TRRjM6Jo9QnaT+XxBi02gpCTY5CQ== user@default +write_files: + - path: /etc/ssh/sshd_config.d/allow_tcp_forwarding.conf + content: | + # Разрешить TCP forwarding + AllowTcpForwarding yes + +runcmd: + - systemctl restart ssh + + # - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - + # - add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" + # - apt-get update -y + # - apt-get install -y docker-ce docker-ce-cli containerd.io + # - systemctl start docker + # - systemctl enable docker + +final_message: "🔥🔥🔥 The system is finally up, after $UPTIME seconds 🔥🔥🔥" +*/ + /* #!/bin/bash diff --git a/tests/cluster-creation-by-steps/cluster_config.yml b/tests/cluster-creation-by-steps/cluster_config.yml index 711e9a3..b4ca43a 100644 --- a/tests/cluster-creation-by-steps/cluster_config.yml +++ b/tests/cluster-creation-by-steps/cluster_config.yml @@ -1,4 +1,4 @@ -# Test nested cluster config definition +# Test nested cluster configuration clusterDefinition: masters: # Master nodes configuration - hostname: "master-1" @@ -45,6 +45,7 @@ clusterDefinition: registryRepo: "dev-registry.deckhouse.io/sys/deckhouse-oss" namespace: "e2e-nested-1" storageClass: "nfs-storage-class" + # Module configuration modules: - name: "snapshot-controller" version: 1 diff --git a/tests/cluster-creation/cluster_config.yml b/tests/cluster-creation/cluster_config.yml index 711e9a3..b4ca43a 100644 --- a/tests/cluster-creation/cluster_config.yml +++ b/tests/cluster-creation/cluster_config.yml @@ -1,4 +1,4 @@ -# Test nested cluster config definition +# Test nested cluster configuration clusterDefinition: masters: # Master nodes configuration - hostname: "master-1" @@ -45,6 +45,7 @@ clusterDefinition: registryRepo: "dev-registry.deckhouse.io/sys/deckhouse-oss" namespace: "e2e-nested-1" storageClass: "nfs-storage-class" + # Module configuration modules: - name: "snapshot-controller" version: 1 From f7f8cced3809fe3242559aef425d0b670e94bd42 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Mon, 15 Dec 2025 15:59:19 +0300 Subject: [PATCH 14/48] Creating and waiting for VMs - implemented --- go.mod | 57 ++- go.sum | 346 +++++++++++++-- internal/config/env.go | 4 + internal/kubernetes/virtualization/client.go | 51 ++- .../virtualization/cluster_virtual_image.go | 65 ++- internal/kubernetes/virtualization/vdisk.go | 20 - .../kubernetes/virtualization/virtual_disk.go | 87 ++++ .../virtualization/virtual_machine.go | 87 ++++ internal/kubernetes/virtualization/vm.go | 20 - .../virtualization/vm_block_device.go | 87 ++++ internal/kubernetes/virtualization/vmbd.go | 20 - pkg/cluster/vms.go | 398 ++++++++++++++++++ .../cluster_creation_test.go | 67 ++- 13 files changed, 1178 insertions(+), 131 deletions(-) delete mode 100644 internal/kubernetes/virtualization/vdisk.go create mode 100644 internal/kubernetes/virtualization/virtual_disk.go create mode 100644 internal/kubernetes/virtualization/virtual_machine.go delete mode 100644 internal/kubernetes/virtualization/vm.go create mode 100644 internal/kubernetes/virtualization/vm_block_device.go delete mode 100644 internal/kubernetes/virtualization/vmbd.go create mode 100644 pkg/cluster/vms.go diff --git a/go.mod b/go.mod index f48d40f..5306d21 100644 --- a/go.mod +++ b/go.mod @@ -1,49 +1,68 @@ module github.com/deckhouse/storage-e2e -go 1.24.2 +go 1.24.6 -toolchain go1.24.3 +toolchain go1.24.11 require ( - github.com/onsi/ginkgo/v2 v2.21.0 - github.com/onsi/gomega v1.35.1 + github.com/deckhouse/virtualization/api v1.0.0 + github.com/onsi/ginkgo/v2 v2.22.0 + github.com/onsi/gomega v1.36.1 github.com/pkg/sftp v1.13.10 golang.org/x/crypto v0.46.0 golang.org/x/term v0.38.0 gopkg.in/yaml.v3 v3.0.1 - k8s.io/client-go v0.32.1 + k8s.io/apimachinery v0.34.1 + k8s.io/client-go v0.34.1 + sigs.k8s.io/controller-runtime v0.22.4 ) require ( github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect - github.com/fxamacker/cbor/v2 v2.7.0 // indirect + github.com/emicklei/go-restful/v3 v3.12.2 // indirect + github.com/evanphx/json-patch/v5 v5.9.11 // indirect + github.com/fxamacker/cbor/v2 v2.9.0 // indirect github.com/go-logr/logr v1.4.2 // indirect + github.com/go-openapi/jsonpointer v0.21.0 // indirect + github.com/go-openapi/jsonreference v0.20.2 // indirect + github.com/go-openapi/swag v0.23.0 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect - github.com/google/go-cmp v0.6.0 // indirect - github.com/google/gofuzz v1.2.0 // indirect + github.com/google/gnostic-models v0.7.0 // indirect + github.com/google/go-cmp v0.7.0 // indirect github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/kr/fs v0.1.0 // indirect - github.com/kr/pretty v0.3.1 // indirect + github.com/mailru/easyjson v0.7.7 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect - github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect - github.com/rogpeppe/go-internal v1.12.0 // indirect - github.com/spf13/pflag v1.0.5 // indirect + github.com/openshift/api v0.0.0-20230503133300-8bbcb7ca7183 // indirect + github.com/openshift/custom-resource-status v1.1.2 // indirect + github.com/spf13/pflag v1.0.7 // indirect github.com/x448/float16 v0.8.4 // indirect + go.yaml.in/yaml/v2 v2.4.2 // indirect + go.yaml.in/yaml/v3 v3.0.4 // indirect golang.org/x/net v0.47.0 // indirect - golang.org/x/oauth2 v0.23.0 // indirect + golang.org/x/oauth2 v0.27.0 // indirect golang.org/x/sys v0.39.0 // indirect golang.org/x/text v0.32.0 // indirect - golang.org/x/time v0.7.0 // indirect + golang.org/x/time v0.9.0 // indirect golang.org/x/tools v0.39.0 // indirect - gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect + google.golang.org/protobuf v1.36.5 // indirect gopkg.in/inf.v0 v0.9.1 // indirect - k8s.io/apimachinery v0.32.1 // indirect + k8s.io/api v0.34.1 // indirect + k8s.io/apiextensions-apiserver v0.34.1 // indirect k8s.io/klog/v2 v2.130.1 // indirect - k8s.io/utils v0.0.0-20241210054802-24370beab758 // indirect + k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b // indirect + k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect + kubevirt.io/api v1.3.1 // indirect + kubevirt.io/containerized-data-importer-api v1.57.0-alpha1 // indirect + kubevirt.io/controller-lifecycle-operator-sdk/api v0.0.0-20220329064328-f3cc58c6ed90 // indirect sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect - sigs.k8s.io/structured-merge-diff/v4 v4.5.0 // indirect - sigs.k8s.io/yaml v1.4.0 // indirect + sigs.k8s.io/randfill v1.0.0 // indirect + sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect + sigs.k8s.io/yaml v1.6.0 // indirect ) diff --git a/go.sum b/go.sum index f5d951f..b5e1a4b 100644 --- a/go.sum +++ b/go.sum @@ -1,46 +1,119 @@ +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ= +github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= +github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= +github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= +github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= +github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= -github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= -github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= -github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/deckhouse/virtualization/api v1.0.0 h1:q4TvC74tpjk25k0byXJCYP4HjvRexBSeI0cC8QeCMTQ= +github.com/deckhouse/virtualization/api v1.0.0/go.mod h1:meTeGulR+xwnvt0pTGsoI14YhGe0lHUVyAfhZsoQyeQ= +github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE= +github.com/elazarl/goproxy v0.0.0-20180725130230-947c36da3153/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc= +github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= +github.com/emicklei/go-restful v2.9.5+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= +github.com/emicklei/go-restful v2.15.0+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= +github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU= +github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/evanphx/json-patch v4.12.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= +github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU= +github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= +github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= +github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= +github.com/getkin/kin-openapi v0.76.0/go.mod h1:660oXbgy5JFMKreazJaQTw7o+X00qeSyhcnluiMv+Xg= +github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= +github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= +github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= +github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= +github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= +github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= +github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= +github.com/go-openapi/jsonreference v0.19.3/go.mod h1:rjx6GuL8TTa9VaixXglHmQmIL98+wF9xc8zWvFonSJ8= +github.com/go-openapi/jsonreference v0.19.5/go.mod h1:RdybgQwPxbL4UEjuAruzK1x3nE69AqPYEJeo/TWfEeg= +github.com/go-openapi/jsonreference v0.19.6/go.mod h1:diGHMEHg2IqXZGKxqyvWdfWU/aim5Dprw5bqpKkTvns= github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= +github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= +github.com/go-openapi/swag v0.19.14/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= +github.com/go-openapi/swag v0.21.1/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= +github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= -github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= -github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= -github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= -github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo= +github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= -github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.1.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo= github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= +github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/googleapis/gnostic v0.5.1/go.mod h1:6U4PtQXGIEt/Z3h5MAT7FNofLnw9vXk2cUuW7uA/OeU= +github.com/googleapis/gnostic v0.5.5/go.mod h1:7+EbHbldMins07ALC74bsA81Ovc97DwqyJO1AENw9kA= +github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= +github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/kr/fs v0.1.0 h1:Jskdu9ieNAYnjxsi0LbQp1ulIKZV1LAFgK1tWhpZgl8= github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= @@ -48,107 +121,290 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= +github.com/moby/spdystream v0.2.0/go.mod h1:f7i0iNDQJ059oMTcWxx8MA/zKFIuD/lY+0GqbN2Wy8c= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= -github.com/onsi/ginkgo/v2 v2.21.0 h1:7rg/4f3rB88pb5obDgNZrNHrQ4e6WpjonchcpuBRnZM= -github.com/onsi/ginkgo/v2 v2.21.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= -github.com/onsi/gomega v1.35.1 h1:Cwbd75ZBPxFSuZ6T+rN/WCb/gOc6YgFBXLlZLhC7Ds4= -github.com/onsi/gomega v1.35.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= -github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= +github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= +github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= +github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= +github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= +github.com/onsi/ginkgo v0.0.0-20170829012221-11459a886d9c/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= +github.com/onsi/ginkgo v1.14.0/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY= +github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0= +github.com/onsi/ginkgo/v2 v2.0.0/go.mod h1:vw5CSIxN1JObi/U8gcbwft7ZxR2dgaR70JSE3/PpL4c= +github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg= +github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= +github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= +github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= +github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= +github.com/onsi/gomega v1.17.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY= +github.com/onsi/gomega v1.18.1/go.mod h1:0q+aL8jAiMXy9hbwj2mr5GziHiwhAIQpFmmtT5hitRs= +github.com/onsi/gomega v1.36.1 h1:bJDPBO7ibjxcbHMgSCoo4Yj18UWbKDlLwX1x9sybDcw= +github.com/onsi/gomega v1.36.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= +github.com/openshift/api v0.0.0-20230503133300-8bbcb7ca7183 h1:t/CahSnpqY46sQR01SoS+Jt0jtjgmhgE6lFmRnO4q70= +github.com/openshift/api v0.0.0-20230503133300-8bbcb7ca7183/go.mod h1:4VWG+W22wrB4HfBL88P40DxLEpSOaiBVxUnfalfJo9k= +github.com/openshift/custom-resource-status v1.1.2 h1:C3DL44LEbvlbItfd8mT5jWrqPfHnSOQoQf/sypqA6A4= +github.com/openshift/custom-resource-status v1.1.2/go.mod h1:DB/Mf2oTeiAmVVX1gN+NEqweonAPY0TKUwADizj8+ZA= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/sftp v1.13.10 h1:+5FbKNTe5Z9aspU88DPIKJ9z2KZoaGCu6Sr6kKR/5mU= github.com/pkg/sftp v1.13.10/go.mod h1:bJ1a7uDhrX/4OII+agvy28lzRvQrmIQuaHrcI1HbeGA= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= -github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= -github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= -github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= -github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q= +github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ2Io= +github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I= +github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= +github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= +github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/pflag v1.0.7 h1:vN6T9TfwStFPFM5XzjsvmzZkLuaLX+HS+0SeFLRgU6M= +github.com/spf13/pflag v1.0.7/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +github.com/yuin/goldmark v1.4.0/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= +go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= +go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU= golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.5.1/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= +golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM= +golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= +golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20211209124913-491a49abca63/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY= golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= -golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= -golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M= +golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210831042530-f4d43177bf5e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.38.0 h1:PQ5pkm/rLO6HnxFR7N2lJHOZX6Kez5Y1gDSJla6jo7Q= golang.org/x/term v0.38.0/go.mod h1:bSEAKrOT1W+VSu9TSCMtoGEOUcKxOKgl3LE5QEF/xVg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU= golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY= -golang.org/x/time v0.7.0 h1:ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ= -golang.org/x/time v0.7.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY= +golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200505023115-26f46d2f7ef8/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/tools v0.1.6-0.20210820212750-d4cc65f0b2ff/go.mod h1:YD9qOF0M9xpSpdWTBbzEl5e/RnCefISl8E5Noe10jFM= +golang.org/x/tools v0.1.9/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= golang.org/x/tools v0.39.0 h1:ik4ho21kwuQln40uelmciQPp9SipgNDdrafrYA4TmQQ= golang.org/x/tools v0.39.0/go.mod h1:JnefbkDPyD8UU2kI5fuf8ZX4/yUeh9W877ZeBONxUqQ= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= -google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/genproto v0.0.0-20201019141844-1ed22bb0c154/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM= +google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -k8s.io/api v0.32.1 h1:f562zw9cy+GvXzXf0CKlVQ7yHJVYzLfL6JAS4kOAaOc= -k8s.io/api v0.32.1/go.mod h1:/Yi/BqkuueW1BgpoePYBRdDYfjPF5sgTr5+YqDZra5k= -k8s.io/apimachinery v0.32.1 h1:683ENpaCBjma4CYqsmZyhEzrGz6cjn1MY/X2jB2hkZs= -k8s.io/apimachinery v0.32.1/go.mod h1:GpHVgxoKlTxClKcteaeuF1Ul/lDVb74KpZcxcmLDElE= -k8s.io/client-go v0.32.1 h1:otM0AxdhdBIaQh7l1Q0jQpmo7WOFIk5FFa4bg6YMdUU= -k8s.io/client-go v0.32.1/go.mod h1:aTTKZY7MdxUaJ/KiUs8D+GssR9zJZi77ZqtzcGXIiDg= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +k8s.io/api v0.23.3/go.mod h1:w258XdGyvCmnBj/vGzQMj6kzdufJZVUwEM1U2fRJwSQ= +k8s.io/api v0.34.1 h1:jC+153630BMdlFukegoEL8E/yT7aLyQkIVuwhmwDgJM= +k8s.io/api v0.34.1/go.mod h1:SB80FxFtXn5/gwzCoN6QCtPD7Vbu5w2n1S0J5gFfTYk= +k8s.io/apiextensions-apiserver v0.34.1 h1:NNPBva8FNAPt1iSVwIE0FsdrVriRXMsaWFMqJbII2CI= +k8s.io/apiextensions-apiserver v0.34.1/go.mod h1:hP9Rld3zF5Ay2Of3BeEpLAToP+l4s5UlxiHfqRaRcMc= +k8s.io/apimachinery v0.23.3/go.mod h1:BEuFMMBaIbcOqVIJqNZJXGFTP4W6AycEpb5+m/97hrM= +k8s.io/apimachinery v0.34.1 h1:dTlxFls/eikpJxmAC7MVE8oOeP1zryV7iRyIjB0gky4= +k8s.io/apimachinery v0.34.1/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw= +k8s.io/client-go v0.34.1 h1:ZUPJKgXsnKwVwmKKdPfw4tB58+7/Ik3CrjOEhsiZ7mY= +k8s.io/client-go v0.34.1/go.mod h1:kA8v0FP+tk6sZA0yKLRG67LWjqufAoSHA2xVGKw9Of8= +k8s.io/code-generator v0.23.3/go.mod h1:S0Q1JVA+kSzTI1oUvbKAxZY/DYbA/ZUb4Uknog12ETk= +k8s.io/gengo v0.0.0-20210813121822-485abfe95c7c/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E= +k8s.io/gengo v0.0.0-20211129171323-c02415ce4185/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E= +k8s.io/klog/v2 v2.0.0/go.mod h1:PBfzABfn139FHAV07az/IF9Wp1bkk3vpT2XSJ76fSDE= +k8s.io/klog/v2 v2.2.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y= +k8s.io/klog/v2 v2.30.0/go.mod h1:y1WjHnz7Dj687irZUWR/WLkLc5N1YHtjLdmgWjndZn0= +k8s.io/klog/v2 v2.40.1/go.mod h1:y1WjHnz7Dj687irZUWR/WLkLc5N1YHtjLdmgWjndZn0= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= -k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f h1:GA7//TjRY9yWGy1poLzYYJJ4JRdzg3+O6e8I+e+8T5Y= -k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f/go.mod h1:R/HEjbvWI0qdfb8viZUeVZm0X6IZnxAydC7YU42CMw4= -k8s.io/utils v0.0.0-20241210054802-24370beab758 h1:sdbE21q2nlQtFh65saZY+rRM6x6aJJI8IUa1AmH/qa0= -k8s.io/utils v0.0.0-20241210054802-24370beab758/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +k8s.io/kube-openapi v0.0.0-20211115234752-e816edb12b65/go.mod h1:sX9MT8g7NVZM5lVL/j8QyCCJe8YSMW30QvGZWaCIDIk= +k8s.io/kube-openapi v0.0.0-20220124234850-424119656bbf/go.mod h1:sX9MT8g7NVZM5lVL/j8QyCCJe8YSMW30QvGZWaCIDIk= +k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b h1:MloQ9/bdJyIu9lb1PzujOPolHyvO06MXG5TUIj2mNAA= +k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b/go.mod h1:UZ2yyWbFTpuhSbFhv24aGNOdoRdJZgsIObGBUaYVsts= +k8s.io/utils v0.0.0-20210802155522-efc7438f0176/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= +k8s.io/utils v0.0.0-20211116205334-6203023598ed/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= +k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y= +k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +kubevirt.io/api v1.3.1 h1:MoTNo/zvDlZ44c2ocXLPln8XTaQOeUodiYbEKrTCqv4= +kubevirt.io/api v1.3.1/go.mod h1:tCn7VAZktEvymk490iPSMPCmKM9UjbbfH2OsFR/IOLU= +kubevirt.io/containerized-data-importer-api v1.57.0-alpha1 h1:IWo12+ei3jltSN5jQN1xjgakfvRSF3G3Rr4GXVOOy2I= +kubevirt.io/containerized-data-importer-api v1.57.0-alpha1/go.mod h1:Y/8ETgHS1GjO89bl682DPtQOYEU/1ctPFBz6Sjxm4DM= +kubevirt.io/controller-lifecycle-operator-sdk/api v0.0.0-20220329064328-f3cc58c6ed90 h1:QMrd0nKP0BGbnxTqakhDZAUhGKxPiPiN5gSDqKUmGGc= +kubevirt.io/controller-lifecycle-operator-sdk/api v0.0.0-20220329064328-f3cc58c6ed90/go.mod h1:018lASpFYBsYN6XwmA2TIrPCx6e0gviTd/ZNtSitKgc= +sigs.k8s.io/controller-runtime v0.22.4 h1:GEjV7KV3TY8e+tJ2LCTxUTanW4z/FmNB7l327UfMq9A= +sigs.k8s.io/controller-runtime v0.22.4/go.mod h1:+QX1XUpTXN4mLoblf4tqr5CQcyHPAki2HLXqQMY6vh8= +sigs.k8s.io/json v0.0.0-20211020170558-c049b76a60c6/go.mod h1:p4QtZmO4uMYipTQNzagwnNoseA6OxSUutVw05NhYDRs= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= -sigs.k8s.io/structured-merge-diff/v4 v4.5.0 h1:nbCitCK2hfnhyiKo6uf2HxUPTCodY6Qaf85SbDIaMBk= -sigs.k8s.io/structured-merge-diff/v4 v4.5.0/go.mod h1:N8f93tFZh9U6vpxwRArLiikrE5/2tiu1w1AGfACIGE4= -sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= -sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= +sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= +sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +sigs.k8s.io/structured-merge-diff/v4 v4.0.2/go.mod h1:bJZC9H9iH24zzfZ/41RGcq60oK1F7G282QMXDPYydCw= +sigs.k8s.io/structured-merge-diff/v4 v4.2.1/go.mod h1:j/nl6xW8vLS49O8YvXW1ocPhZawJtm+Yrr7PPRQ0Vg4= +sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco= +sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE= +sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= +sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= +sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= +sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4= diff --git a/internal/config/env.go b/internal/config/env.go index 0e19ba8..cb39697 100644 --- a/internal/config/env.go +++ b/internal/config/env.go @@ -29,6 +29,10 @@ var ( // AutoGenerateVMNames specifies whether to auto-generate VM names or use provided in config. // Default is "false". If set to "true", the VM names suffix in kubernetes style will be added to VM names set in cluster config. AutoGenerateVMNames = os.Getenv("AUTO_GENERATE_VM_NAMES") // TODO implement this in cluster.LoadClusterConfig function. + + // TestClusterCleanup specifies whether to remove the test cluster after tests complete. + // Default is "false". If set to "true" or "True", the test cluster will be cleaned up after tests. + TestClusterCleanup = os.Getenv("TEST_CLUSTER_CLEANUP") ) // ValidateClusterCreateMode validates that CLUSTER_CREATE_MODE is set and has a valid value diff --git a/internal/kubernetes/virtualization/client.go b/internal/kubernetes/virtualization/client.go index 7bb06b7..a6c3e37 100644 --- a/internal/kubernetes/virtualization/client.go +++ b/internal/kubernetes/virtualization/client.go @@ -16,5 +16,54 @@ limitations under the License. package virtualization -// TODO: Implement virtualization client interface +import ( + "context" + "github.com/deckhouse/virtualization/api/core/v1alpha2" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// Client provides access to virtualization resources +type Client struct { + client client.Client +} + +// NewClient creates a new virtualization client from a rest.Config +// It uses controller-runtime client which provides type-safe access to CRDs +func NewClient(ctx context.Context, config *rest.Config) (*Client, error) { + scheme := runtime.NewScheme() + + // Register virtualization API types with the scheme + if err := v1alpha2.SchemeBuilder.AddToScheme(scheme); err != nil { + return nil, err + } + + cl, err := client.New(config, client.Options{Scheme: scheme}) + if err != nil { + return nil, err + } + + return &Client{client: cl}, nil +} + +// VirtualMachines returns a VirtualMachine client +func (c *Client) VirtualMachines() VirtualMachineClient { + return &virtualMachineClient{client: c.client} +} + +// VirtualDisks returns a VirtualDisk client +func (c *Client) VirtualDisks() VirtualDiskClient { + return &virtualDiskClient{client: c.client} +} + +// ClusterVirtualImages returns a ClusterVirtualImage client +func (c *Client) ClusterVirtualImages() ClusterVirtualImageClient { + return &clusterVirtualImageClient{client: c.client} +} + +// VirtualMachineBlockDeviceAttachments returns a VMBD client +func (c *Client) VirtualMachineBlockDeviceAttachments() VMBDClient { + return &vmbdClient{client: c.client} +} diff --git a/internal/kubernetes/virtualization/cluster_virtual_image.go b/internal/kubernetes/virtualization/cluster_virtual_image.go index e6bb7fc..74d4542 100644 --- a/internal/kubernetes/virtualization/cluster_virtual_image.go +++ b/internal/kubernetes/virtualization/cluster_virtual_image.go @@ -16,5 +16,68 @@ limitations under the License. package virtualization -// TODO: Implement cluster virtual image operations +import ( + "context" + "fmt" + "github.com/deckhouse/virtualization/api/core/v1alpha2" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// ClusterVirtualImageClient provides operations on ClusterVirtualImage resources +// Note: ClusterVirtualImage is a cluster-scoped resource (no namespace) +type ClusterVirtualImageClient interface { + Get(ctx context.Context, name string) (*v1alpha2.ClusterVirtualImage, error) + List(ctx context.Context) ([]v1alpha2.ClusterVirtualImage, error) + Create(ctx context.Context, cvmi *v1alpha2.ClusterVirtualImage) error + Update(ctx context.Context, cvmi *v1alpha2.ClusterVirtualImage) error + Delete(ctx context.Context, name string) error +} + +type clusterVirtualImageClient struct { + client client.Client +} + +func (c *clusterVirtualImageClient) Get(ctx context.Context, name string) (*v1alpha2.ClusterVirtualImage, error) { + cvmi := &v1alpha2.ClusterVirtualImage{} + key := client.ObjectKey{Name: name} + if err := c.client.Get(ctx, key, cvmi); err != nil { + return nil, fmt.Errorf("failed to get ClusterVirtualImage %s: %w", name, err) + } + return cvmi, nil +} + +func (c *clusterVirtualImageClient) List(ctx context.Context) ([]v1alpha2.ClusterVirtualImage, error) { + list := &v1alpha2.ClusterVirtualImageList{} + if err := c.client.List(ctx, list); err != nil { + return nil, fmt.Errorf("failed to list ClusterVirtualImages: %w", err) + } + return list.Items, nil +} + +func (c *clusterVirtualImageClient) Create(ctx context.Context, cvmi *v1alpha2.ClusterVirtualImage) error { + if err := c.client.Create(ctx, cvmi); err != nil { + return fmt.Errorf("failed to create ClusterVirtualImage %s: %w", cvmi.Name, err) + } + return nil +} + +func (c *clusterVirtualImageClient) Update(ctx context.Context, cvmi *v1alpha2.ClusterVirtualImage) error { + if err := c.client.Update(ctx, cvmi); err != nil { + return fmt.Errorf("failed to update ClusterVirtualImage %s: %w", cvmi.Name, err) + } + return nil +} + +func (c *clusterVirtualImageClient) Delete(ctx context.Context, name string) error { + cvmi := &v1alpha2.ClusterVirtualImage{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + } + if err := c.client.Delete(ctx, cvmi); err != nil { + return fmt.Errorf("failed to delete ClusterVirtualImage %s: %w", name, err) + } + return nil +} diff --git a/internal/kubernetes/virtualization/vdisk.go b/internal/kubernetes/virtualization/vdisk.go deleted file mode 100644 index b7326cb..0000000 --- a/internal/kubernetes/virtualization/vdisk.go +++ /dev/null @@ -1,20 +0,0 @@ -/* -Copyright 2025 Flant JSC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package virtualization - -// TODO: Implement virtual disk operations - diff --git a/internal/kubernetes/virtualization/virtual_disk.go b/internal/kubernetes/virtualization/virtual_disk.go new file mode 100644 index 0000000..96f307a --- /dev/null +++ b/internal/kubernetes/virtualization/virtual_disk.go @@ -0,0 +1,87 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package virtualization + +import ( + "context" + "fmt" + + "github.com/deckhouse/virtualization/api/core/v1alpha2" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// VirtualDiskClient provides operations on VirtualDisk resources +type VirtualDiskClient interface { + Get(ctx context.Context, namespace, name string) (*v1alpha2.VirtualDisk, error) + List(ctx context.Context, namespace string) ([]v1alpha2.VirtualDisk, error) + Create(ctx context.Context, vd *v1alpha2.VirtualDisk) error + Update(ctx context.Context, vd *v1alpha2.VirtualDisk) error + Delete(ctx context.Context, namespace, name string) error +} + +type virtualDiskClient struct { + client client.Client +} + +func (c *virtualDiskClient) Get(ctx context.Context, namespace, name string) (*v1alpha2.VirtualDisk, error) { + vd := &v1alpha2.VirtualDisk{} + key := client.ObjectKey{Namespace: namespace, Name: name} + if err := c.client.Get(ctx, key, vd); err != nil { + return nil, fmt.Errorf("failed to get VirtualDisk %s/%s: %w", namespace, name, err) + } + return vd, nil +} + +func (c *virtualDiskClient) List(ctx context.Context, namespace string) ([]v1alpha2.VirtualDisk, error) { + list := &v1alpha2.VirtualDiskList{} + opts := []client.ListOption{} + if namespace != "" { + opts = append(opts, client.InNamespace(namespace)) + } + if err := c.client.List(ctx, list, opts...); err != nil { + return nil, fmt.Errorf("failed to list VirtualDisks: %w", err) + } + return list.Items, nil +} + +func (c *virtualDiskClient) Create(ctx context.Context, vd *v1alpha2.VirtualDisk) error { + if err := c.client.Create(ctx, vd); err != nil { + return fmt.Errorf("failed to create VirtualDisk %s/%s: %w", vd.Namespace, vd.Name, err) + } + return nil +} + +func (c *virtualDiskClient) Update(ctx context.Context, vd *v1alpha2.VirtualDisk) error { + if err := c.client.Update(ctx, vd); err != nil { + return fmt.Errorf("failed to update VirtualDisk %s/%s: %w", vd.Namespace, vd.Name, err) + } + return nil +} + +func (c *virtualDiskClient) Delete(ctx context.Context, namespace, name string) error { + vd := &v1alpha2.VirtualDisk{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + }, + } + if err := c.client.Delete(ctx, vd); err != nil { + return fmt.Errorf("failed to delete VirtualDisk %s/%s: %w", namespace, name, err) + } + return nil +} diff --git a/internal/kubernetes/virtualization/virtual_machine.go b/internal/kubernetes/virtualization/virtual_machine.go new file mode 100644 index 0000000..5bdc97e --- /dev/null +++ b/internal/kubernetes/virtualization/virtual_machine.go @@ -0,0 +1,87 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package virtualization + +import ( + "context" + "fmt" + + "github.com/deckhouse/virtualization/api/core/v1alpha2" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// VirtualMachineClient provides operations on VirtualMachine resources +type VirtualMachineClient interface { + Get(ctx context.Context, namespace, name string) (*v1alpha2.VirtualMachine, error) + List(ctx context.Context, namespace string) ([]v1alpha2.VirtualMachine, error) + Create(ctx context.Context, vm *v1alpha2.VirtualMachine) error + Update(ctx context.Context, vm *v1alpha2.VirtualMachine) error + Delete(ctx context.Context, namespace, name string) error +} + +type virtualMachineClient struct { + client client.Client +} + +func (c *virtualMachineClient) Get(ctx context.Context, namespace, name string) (*v1alpha2.VirtualMachine, error) { + vm := &v1alpha2.VirtualMachine{} + key := client.ObjectKey{Namespace: namespace, Name: name} + if err := c.client.Get(ctx, key, vm); err != nil { + return nil, fmt.Errorf("failed to get VirtualMachine %s/%s: %w", namespace, name, err) + } + return vm, nil +} + +func (c *virtualMachineClient) List(ctx context.Context, namespace string) ([]v1alpha2.VirtualMachine, error) { + list := &v1alpha2.VirtualMachineList{} + opts := []client.ListOption{} + if namespace != "" { + opts = append(opts, client.InNamespace(namespace)) + } + if err := c.client.List(ctx, list, opts...); err != nil { + return nil, fmt.Errorf("failed to list VirtualMachines: %w", err) + } + return list.Items, nil +} + +func (c *virtualMachineClient) Create(ctx context.Context, vm *v1alpha2.VirtualMachine) error { + if err := c.client.Create(ctx, vm); err != nil { + return fmt.Errorf("failed to create VirtualMachine %s/%s: %w", vm.Namespace, vm.Name, err) + } + return nil +} + +func (c *virtualMachineClient) Update(ctx context.Context, vm *v1alpha2.VirtualMachine) error { + if err := c.client.Update(ctx, vm); err != nil { + return fmt.Errorf("failed to update VirtualMachine %s/%s: %w", vm.Namespace, vm.Name, err) + } + return nil +} + +func (c *virtualMachineClient) Delete(ctx context.Context, namespace, name string) error { + vm := &v1alpha2.VirtualMachine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + }, + } + if err := c.client.Delete(ctx, vm); err != nil { + return fmt.Errorf("failed to delete VirtualMachine %s/%s: %w", namespace, name, err) + } + return nil +} diff --git a/internal/kubernetes/virtualization/vm.go b/internal/kubernetes/virtualization/vm.go deleted file mode 100644 index d77e4dc..0000000 --- a/internal/kubernetes/virtualization/vm.go +++ /dev/null @@ -1,20 +0,0 @@ -/* -Copyright 2025 Flant JSC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package virtualization - -// TODO: Implement VM operations - diff --git a/internal/kubernetes/virtualization/vm_block_device.go b/internal/kubernetes/virtualization/vm_block_device.go new file mode 100644 index 0000000..6aea821 --- /dev/null +++ b/internal/kubernetes/virtualization/vm_block_device.go @@ -0,0 +1,87 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package virtualization + +import ( + "context" + "fmt" + + "github.com/deckhouse/virtualization/api/core/v1alpha2" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// VMBDClient provides operations on VirtualMachineBlockDeviceAttachment resources +type VMBDClient interface { + Get(ctx context.Context, namespace, name string) (*v1alpha2.VirtualMachineBlockDeviceAttachment, error) + List(ctx context.Context, namespace string) ([]v1alpha2.VirtualMachineBlockDeviceAttachment, error) + Create(ctx context.Context, vmbd *v1alpha2.VirtualMachineBlockDeviceAttachment) error + Update(ctx context.Context, vmbd *v1alpha2.VirtualMachineBlockDeviceAttachment) error + Delete(ctx context.Context, namespace, name string) error +} + +type vmbdClient struct { + client client.Client +} + +func (c *vmbdClient) Get(ctx context.Context, namespace, name string) (*v1alpha2.VirtualMachineBlockDeviceAttachment, error) { + vmbd := &v1alpha2.VirtualMachineBlockDeviceAttachment{} + key := client.ObjectKey{Namespace: namespace, Name: name} + if err := c.client.Get(ctx, key, vmbd); err != nil { + return nil, fmt.Errorf("failed to get VirtualMachineBlockDeviceAttachment %s/%s: %w", namespace, name, err) + } + return vmbd, nil +} + +func (c *vmbdClient) List(ctx context.Context, namespace string) ([]v1alpha2.VirtualMachineBlockDeviceAttachment, error) { + list := &v1alpha2.VirtualMachineBlockDeviceAttachmentList{} + opts := []client.ListOption{} + if namespace != "" { + opts = append(opts, client.InNamespace(namespace)) + } + if err := c.client.List(ctx, list, opts...); err != nil { + return nil, fmt.Errorf("failed to list VirtualMachineBlockDeviceAttachments: %w", err) + } + return list.Items, nil +} + +func (c *vmbdClient) Create(ctx context.Context, vmbd *v1alpha2.VirtualMachineBlockDeviceAttachment) error { + if err := c.client.Create(ctx, vmbd); err != nil { + return fmt.Errorf("failed to create VirtualMachineBlockDeviceAttachment %s/%s: %w", vmbd.Namespace, vmbd.Name, err) + } + return nil +} + +func (c *vmbdClient) Update(ctx context.Context, vmbd *v1alpha2.VirtualMachineBlockDeviceAttachment) error { + if err := c.client.Update(ctx, vmbd); err != nil { + return fmt.Errorf("failed to update VirtualMachineBlockDeviceAttachment %s/%s: %w", vmbd.Namespace, vmbd.Name, err) + } + return nil +} + +func (c *vmbdClient) Delete(ctx context.Context, namespace, name string) error { + vmbd := &v1alpha2.VirtualMachineBlockDeviceAttachment{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + }, + } + if err := c.client.Delete(ctx, vmbd); err != nil { + return fmt.Errorf("failed to delete VirtualMachineBlockDeviceAttachment %s/%s: %w", namespace, name, err) + } + return nil +} diff --git a/internal/kubernetes/virtualization/vmbd.go b/internal/kubernetes/virtualization/vmbd.go deleted file mode 100644 index 35329f6..0000000 --- a/internal/kubernetes/virtualization/vmbd.go +++ /dev/null @@ -1,20 +0,0 @@ -/* -Copyright 2025 Flant JSC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package virtualization - -// TODO: Implement VM block device operations - diff --git a/pkg/cluster/vms.go b/pkg/cluster/vms.go new file mode 100644 index 0000000..ed58f5d --- /dev/null +++ b/pkg/cluster/vms.go @@ -0,0 +1,398 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cluster + +import ( + "context" + "fmt" + "math/rand" + "strings" + "time" + + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + + "github.com/deckhouse/storage-e2e/internal/config" + "github.com/deckhouse/storage-e2e/internal/kubernetes/virtualization" + "github.com/deckhouse/virtualization/api/core/v1alpha2" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// VMResources tracks VM-related resources created for a test cluster +type VMResources struct { + VirtClient *virtualization.Client + Namespace string + VMNames []string + CVMINames []string +} + +// CreateVirtualMachines creates virtual machines from cluster definition. +// It validates CLUSTER_CREATE_MODE, handles VM name conflicts, creates all VMs, +// and returns the list of VM names that were created along with resource tracking info. +func CreateVirtualMachines(ctx context.Context, virtClient *virtualization.Client, clusterDef *config.ClusterDefinition) ([]string, *VMResources, error) { + // Check CLUSTER_CREATE_MODE + if config.ClusterCreateMode != config.ClusterCreateModeAlwaysCreateNew { + return nil, nil, fmt.Errorf("CLUSTER_CREATE_MODE must be set to '%s'. Current value: '%s'. Using existing cluster currently is not supported", config.ClusterCreateModeAlwaysCreateNew, config.ClusterCreateMode) + } + + namespace := clusterDef.DKPParameters.Namespace + + // Get all VM nodes from cluster definition + vmNodes := getVMNodes(clusterDef) + if len(vmNodes) == 0 { + return nil, nil, fmt.Errorf("no VM nodes found in cluster definition") + } + + // Track CVMI names that we create or use + cvmiNamesMap := make(map[string]bool) + + vmNames := make([]string, 0, len(vmNodes)) + for _, node := range vmNodes { + vmNames = append(vmNames, node.Hostname) + } + + // Check if any VMs with these names already exist + existingVMs, err := virtClient.VirtualMachines().List(ctx, namespace) + if err != nil { + return nil, nil, fmt.Errorf("failed to list existing VMs: %w", err) + } + + existingVMNames := make(map[string]bool) + for _, vm := range existingVMs { + existingVMNames[vm.Name] = true + } + + conflictingVMs := make([]string, 0) + for _, vmName := range vmNames { + if existingVMNames[vmName] { + conflictingVMs = append(conflictingVMs, vmName) + } + } + + // Handle conflicts + if len(conflictingVMs) > 0 { + if config.AutoGenerateVMNames != "true" && config.AutoGenerateVMNames != "True" { + return nil, nil, fmt.Errorf("virtual machines with the following names already exist in namespace %s: %v. Set AUTO_GENERATE_VM_NAMES=true to auto-generate unique names", namespace, conflictingVMs) + } + + // Generate suffix and update clusterDefinition + suffix := generateRandomSuffix() + updateClusterDefinitionHostnames(clusterDef, suffix) + // Update vmNodes with new names + for i := range vmNodes { + vmNodes[i].Hostname = vmNodes[i].Hostname + suffix + } + // Update vmNames + vmNames = make([]string, 0, len(vmNodes)) + for _, node := range vmNodes { + vmNames = append(vmNames, node.Hostname) + } + } + + // Create all VMs + storageClass := clusterDef.DKPParameters.StorageClass + for _, node := range vmNodes { + cvmiName, err := createVM(ctx, virtClient, namespace, node, storageClass) + if err != nil { + return nil, nil, fmt.Errorf("failed to create VM %s: %w", node.Hostname, err) + } + if cvmiName != "" { + cvmiNamesMap[cvmiName] = true + } + } + + // Convert CVMI names map to slice + cvmiNames := make([]string, 0, len(cvmiNamesMap)) + for name := range cvmiNamesMap { + cvmiNames = append(cvmiNames, name) + } + + resources := &VMResources{ + VirtClient: virtClient, + Namespace: namespace, + VMNames: vmNames, + CVMINames: cvmiNames, + } + + return vmNames, resources, nil +} + +// getVMNodes extracts all VM nodes from cluster definition +func getVMNodes(clusterDef *config.ClusterDefinition) []config.ClusterNode { + var vmNodes []config.ClusterNode + + for _, node := range clusterDef.Masters { + if node.HostType == config.HostTypeVM { + vmNodes = append(vmNodes, node) + } + } + + for _, node := range clusterDef.Workers { + if node.HostType == config.HostTypeVM { + vmNodes = append(vmNodes, node) + } + } + + if clusterDef.Setup != nil && clusterDef.Setup.HostType == config.HostTypeVM { + vmNodes = append(vmNodes, *clusterDef.Setup) + } + + return vmNodes +} + +// updateClusterDefinitionHostnames updates hostnames in clusterDefinition with the given suffix +func updateClusterDefinitionHostnames(clusterDef *config.ClusterDefinition, suffix string) { + for i := range clusterDef.Masters { + if clusterDef.Masters[i].HostType == config.HostTypeVM { + clusterDef.Masters[i].Hostname = clusterDef.Masters[i].Hostname + suffix + } + } + + for i := range clusterDef.Workers { + if clusterDef.Workers[i].HostType == config.HostTypeVM { + clusterDef.Workers[i].Hostname = clusterDef.Workers[i].Hostname + suffix + } + } + + if clusterDef.Setup != nil && clusterDef.Setup.HostType == config.HostTypeVM { + clusterDef.Setup.Hostname = clusterDef.Setup.Hostname + suffix + } +} + +// generateRandomSuffix generates a random suffix of 6 lowercase letters +func generateRandomSuffix() string { + const letters = "abcdefghijklmnopqrstuvwxyz" + r := rand.New(rand.NewSource(time.Now().UnixNano())) + suffix := make([]byte, 6) + for i := range suffix { + suffix[i] = letters[r.Intn(len(letters))] + } + return "-" + string(suffix) +} + +// createVM creates a virtual machine with all required dependencies +// Returns the CVMI name that was used/created +func createVM(ctx context.Context, virtClient *virtualization.Client, namespace string, node config.ClusterNode, storageClass string) (string, error) { + vmName := node.Hostname + + // 1. Create or get ClusterVirtualImage + cvmiName := getCVMINameFromImageURL(node.OSType.ImageURL) + cvmi, err := virtClient.ClusterVirtualImages().Get(ctx, cvmiName) + if err != nil { + // CVMI doesn't exist, create it + cvmi = &v1alpha2.ClusterVirtualImage{ + ObjectMeta: metav1.ObjectMeta{ + Name: cvmiName, + }, + Spec: v1alpha2.ClusterVirtualImageSpec{ + DataSource: v1alpha2.ClusterVirtualImageDataSource{ + Type: "HTTP", + HTTP: &v1alpha2.DataSourceHTTP{ + URL: node.OSType.ImageURL, + }, + }, + }, + } + err = virtClient.ClusterVirtualImages().Create(ctx, cvmi) + if err != nil { + return "", fmt.Errorf("failed to create ClusterVirtualImage %s: %w", cvmiName, err) + } + } + + // 2. Create system VirtualDisk + systemDiskName := fmt.Sprintf("%s-system", vmName) + systemDisk := &v1alpha2.VirtualDisk{ + ObjectMeta: metav1.ObjectMeta{ + Name: systemDiskName, + Namespace: namespace, + }, + Spec: v1alpha2.VirtualDiskSpec{ + PersistentVolumeClaim: v1alpha2.VirtualDiskPersistentVolumeClaim{ + Size: resource.NewQuantity(int64(node.DiskSize)*1024*1024*1024, resource.BinarySI), + StorageClass: &storageClass, + }, + DataSource: &v1alpha2.VirtualDiskDataSource{ + Type: "ObjectRef", + ObjectRef: &v1alpha2.VirtualDiskObjectRef{ + Kind: "ClusterVirtualImage", + Name: cvmi.Name, + }, + }, + }, + } + err = virtClient.VirtualDisks().Create(ctx, systemDisk) + if err != nil { + return "", fmt.Errorf("failed to create system VirtualDisk %s: %w", systemDiskName, err) + } + + // 3. Create VirtualMachine + memoryQuantity := resource.MustParse(fmt.Sprintf("%dGi", node.RAM)) + vm := &v1alpha2.VirtualMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: vmName, + Namespace: namespace, + Labels: map[string]string{"vm": "linux", "service": "v1"}, + }, + Spec: v1alpha2.VirtualMachineSpec{ + VirtualMachineClassName: "generic", + EnableParavirtualization: true, + RunPolicy: v1alpha2.RunPolicy("AlwaysOn"), + OsType: v1alpha2.OsType("Generic"), + Bootloader: v1alpha2.BootloaderType("BIOS"), + CPU: v1alpha2.CPUSpec{ + Cores: node.CPU, + CoreFraction: "100%", + }, + Memory: v1alpha2.MemorySpec{ + Size: memoryQuantity, + }, + BlockDeviceRefs: []v1alpha2.BlockDeviceSpecRef{ + { + Kind: v1alpha2.DiskDevice, + Name: systemDiskName, + }, + }, + Provisioning: &v1alpha2.Provisioning{ + Type: "UserData", + UserData: generateCloudInitUserData(vmName, node.Auth.SSHKey), + }, + }, + } + err = virtClient.VirtualMachines().Create(ctx, vm) + if err != nil { + return "", fmt.Errorf("failed to create VirtualMachine %s: %w", vmName, err) + } + + return cvmiName, nil +} + +// getCVMINameFromImageURL extracts a CVMI name from an image URL +func getCVMINameFromImageURL(imageURL string) string { + // Extract filename from URL and use it as base name + parts := strings.Split(imageURL, "/") + filename := parts[len(parts)-1] + // Remove extension + name := strings.TrimSuffix(filename, ".img") + name = strings.TrimSuffix(name, ".qcow2") + // Make it Kubernetes-friendly (lowercase, replace dots with hyphens) + name = strings.ToLower(name) + name = strings.ReplaceAll(name, ".", "-") + return name +} + +// generateCloudInitUserData generates cloud-init user data for VM provisioning +func generateCloudInitUserData(hostname, sshPubKey string) string { + return fmt.Sprintf(`#cloud-config +package_update: true +packages: + - tmux + - htop + - qemu-guest-agent + - iputils-ping + - stress-ng + - jq + - yq + - rsync + - fio + - curl + +ssh_pwauth: true +users: + - name: cloud + # passwd: cloud + passwd: $6$rounds=4096$vln/.aPHBOI7BMYR$bBMkqQvuGs5Gyd/1H5DP4m9HjQSy.kgrxpaGEHwkX7KEFV8BS.HZWPitAtZ2Vd8ZqIZRqmlykRCagTgPejt1i. + shell: /bin/bash + sudo: ALL=(ALL) NOPASSWD:ALL + chpasswd: {expire: False} + lock_passwd: false + ssh_authorized_keys: + - %s +write_files: + - path: /etc/ssh/sshd_config.d/allow_tcp_forwarding.conf + content: | + # Разрешить TCP forwarding + AllowTcpForwarding yes + +runcmd: + - systemctl restart ssh + - hostnamectl set-hostname %s + - systemctl daemon-reload + - systemctl enable --now qemu-guest-agent.service +`, sshPubKey, hostname) +} + +// CleanupVMResources forcefully stops and deletes virtual machines, virtual disks, and cluster virtual images. +// If a ClusterVirtualImage is in use by other resources, it will be skipped but VMs and VDs will still be deleted. +func CleanupVMResources(ctx context.Context, resources *VMResources) error { + if resources == nil { + return fmt.Errorf("resources cannot be nil") + } + + // Step 1: Forcefully stop and delete Virtual Machines + for _, vmName := range resources.VMNames { + // Try to stop the VM by updating RunPolicy to Manual or by deleting directly + // Deletion will stop the VM automatically + err := resources.VirtClient.VirtualMachines().Delete(ctx, resources.Namespace, vmName) + if err != nil && !errors.IsNotFound(err) { + // Log but continue - we'll try to clean up other resources + fmt.Printf("Warning: Failed to delete VM %s/%s: %v\n", resources.Namespace, vmName, err) + } + } + + // Step 2: Delete Virtual Disks + // Delete system disks for our VMs + for _, vmName := range resources.VMNames { + systemDiskName := fmt.Sprintf("%s-system", vmName) + err := resources.VirtClient.VirtualDisks().Delete(ctx, resources.Namespace, systemDiskName) + if err != nil && !errors.IsNotFound(err) { + fmt.Printf("Warning: Failed to delete VirtualDisk %s/%s: %v\n", resources.Namespace, systemDiskName, err) + } + } + + // Step 3: Check which ClusterVirtualImages are in use and delete those that aren't + // Get all VirtualDisks across all namespaces to check for CVMI usage + allVDisksAllNS, err := resources.VirtClient.VirtualDisks().List(ctx, "") + if err != nil { + fmt.Printf("Warning: Failed to list VirtualDisks across all namespaces: %v\n", err) + allVDisksAllNS = []v1alpha2.VirtualDisk{} + } + + // Build a map of CVMI names that are in use + cvmiInUse := make(map[string]bool) + for _, vd := range allVDisksAllNS { + if vd.Spec.DataSource != nil && vd.Spec.DataSource.ObjectRef != nil { + if vd.Spec.DataSource.ObjectRef.Kind == "ClusterVirtualImage" { + cvmiInUse[vd.Spec.DataSource.ObjectRef.Name] = true + } + } + } + + // Delete ClusterVirtualImages that are not in use + for _, cvmiName := range resources.CVMINames { + if cvmiInUse[cvmiName] { + fmt.Printf("Skipping deletion of ClusterVirtualImage %s: still in use by other resources\n", cvmiName) + continue + } + + err := resources.VirtClient.ClusterVirtualImages().Delete(ctx, cvmiName) + if err != nil && !errors.IsNotFound(err) { + fmt.Printf("Warning: Failed to delete ClusterVirtualImage %s: %v\n", cvmiName, err) + } + } + + return nil +} diff --git a/tests/cluster-creation-by-steps/cluster_creation_test.go b/tests/cluster-creation-by-steps/cluster_creation_test.go index 91f347d..77bb838 100644 --- a/tests/cluster-creation-by-steps/cluster_creation_test.go +++ b/tests/cluster-creation-by-steps/cluster_creation_test.go @@ -26,10 +26,13 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - "github.com/deckhouse/storage-e2e/internal/cluster" + internalcluster "github.com/deckhouse/storage-e2e/internal/cluster" "github.com/deckhouse/storage-e2e/internal/config" "github.com/deckhouse/storage-e2e/internal/infrastructure/ssh" "github.com/deckhouse/storage-e2e/internal/kubernetes/deckhouse" + "github.com/deckhouse/storage-e2e/internal/kubernetes/virtualization" + "github.com/deckhouse/storage-e2e/pkg/cluster" + "github.com/deckhouse/virtualization/api/core/v1alpha2" ) var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { @@ -46,6 +49,8 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { tunnelinfo *ssh.TunnelInfo clusterDefinition *config.ClusterDefinition module *deckhouse.Module + virtClient *virtualization.Client + vmResources *cluster.VMResources ) BeforeAll(func() { @@ -54,7 +59,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { // Stage 1: LoadConfig - verifies and parses the config from yaml file By("LoadConfig: Loading and verifying cluster configuration from YAML", func() { GinkgoWriter.Printf(" ▶️ Loading cluster configuration from: %s\n", yamlConfigFilename) - clusterDefinition, err = cluster.LoadClusterConfig(yamlConfigFilename) + clusterDefinition, err = internalcluster.LoadClusterConfig(yamlConfigFilename) Expect(err).NotTo(HaveOccurred()) GinkgoWriter.Printf(" ✅ Successfully loaded cluster configuration\n") }) @@ -83,14 +88,29 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { } } + // Step 3: Cleanup test cluster VMs if enabled + // Note: vmResources is set in the test below, so we capture it in the closure + vmRes := vmResources + if config.TestClusterCleanup == "true" || config.TestClusterCleanup == "True" { + if vmRes != nil { + GinkgoWriter.Printf(" ▶️ Cleaning up test cluster VMs...\n") + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + defer cancel() + err := cluster.CleanupVMResources(ctx, vmRes) + if err != nil { + GinkgoWriter.Printf(" ⚠️ Warning: Failed to cleanup test cluster VMs: %v\n", err) + } else { + GinkgoWriter.Printf(" ✅ Test cluster VMs cleaned up successfully\n") + } + } + } + // Note: kubeconfig and kubeconfigPath are just config/file paths, no cleanup needed // The kubeconfig file is stored in temp/ directory and can be kept for debugging }) }) // BeforeAll - _ = clusterDefinition // TODO: use clusterDefinition - // Stage 2: Establish SSH connection to base cluster (reused for getting kubeconfig) It("should establish ssh connection to the base cluster", func() { By(fmt.Sprintf("Connecting to %s@%s using key %s", baseClusterUser, baseClusterMasterIP, baseClusterSSHPrivateKey), func() { @@ -108,7 +128,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { GinkgoWriter.Printf(" ▶️ Fetching kubeconfig from %s\n", baseClusterMasterIP) ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() - kubeconfig, kubeconfigPath, err = cluster.GetKubeconfig(ctx, baseClusterMasterIP, baseClusterUser, baseClusterSSHPrivateKey, sshclient) + kubeconfig, kubeconfigPath, err = internalcluster.GetKubeconfig(ctx, baseClusterMasterIP, baseClusterUser, baseClusterSSHPrivateKey, sshclient) Expect(err).NotTo(HaveOccurred()) GinkgoWriter.Printf(" ✅ Kubeconfig retrieved and saved to: %s\n", kubeconfigPath) }) @@ -142,4 +162,41 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) }) + It("should create virtual machines from cluster definition", func() { + By("Creating virtual machines", func() { + ctx, cancel := context.WithTimeout(context.Background(), 25*time.Minute) + defer cancel() + + GinkgoWriter.Printf(" ▶️ Creating virtualization client\n") + virtClient, err = virtualization.NewClient(ctx, kubeconfig) + Expect(err).NotTo(HaveOccurred()) + Expect(virtClient).NotTo(BeNil()) + GinkgoWriter.Printf(" ✅ Virtualization client initialized successfully\n") + + namespace := clusterDefinition.DKPParameters.Namespace + GinkgoWriter.Printf(" ▶️ Creating VMs in namespace: %s\n", namespace) + + // Create virtual machines + var vmNames []string + vmNames, vmResources, err = cluster.CreateVirtualMachines(ctx, virtClient, clusterDefinition) + Expect(err).NotTo(HaveOccurred(), "Failed to create virtual machines") + GinkgoWriter.Printf(" ✅ Created %d virtual machines: %v\n", len(vmNames), vmNames) + + // Wait for all VMs to become Running + GinkgoWriter.Printf(" ▶️ Waiting for VMs to become Running (timeout: 10 minutes)\n") + for _, vmName := range vmNames { + Eventually(func() (v1alpha2.MachinePhase, error) { + vm, err := virtClient.VirtualMachines().Get(ctx, namespace, vmName) + if err != nil { + return "", err + } + return vm.Status.Phase, nil + }).WithTimeout(10*time.Minute).WithPolling(10*time.Second).Should(Equal(v1alpha2.MachineRunning), + "VM %s should become Running within 10 minutes", vmName) + GinkgoWriter.Printf(" ✅ VM %s is Running\n", vmName) + } + GinkgoWriter.Printf(" ✅ All VMs are Running\n") + }) + }) + }) // Describe: Cluster Creation From a5c7efede7678339b49bbc7c7ff3cc486589c4f2 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Mon, 15 Dec 2025 16:04:33 +0300 Subject: [PATCH 15/48] Add virtualization client creation in cluster creation tests --- tests/cluster-creation-by-steps/cluster_creation_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/cluster-creation-by-steps/cluster_creation_test.go b/tests/cluster-creation-by-steps/cluster_creation_test.go index 77bb838..87cbc61 100644 --- a/tests/cluster-creation-by-steps/cluster_creation_test.go +++ b/tests/cluster-creation-by-steps/cluster_creation_test.go @@ -167,6 +167,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { ctx, cancel := context.WithTimeout(context.Background(), 25*time.Minute) defer cancel() + // Create virtualization client GinkgoWriter.Printf(" ▶️ Creating virtualization client\n") virtClient, err = virtualization.NewClient(ctx, kubeconfig) Expect(err).NotTo(HaveOccurred()) From 69f2193f904da0a9d77ac3da02a4384d88518184 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Tue, 16 Dec 2025 16:12:29 +0300 Subject: [PATCH 16/48] VMs have been deployed successfully --- go.mod | 4 +- internal/config/env.go | 6 +- internal/kubernetes/core/namespace.go | 51 +++- pkg/cluster/vms.go | 282 +++++++++++------- pkg/kubernetes/namespace.go | 51 ++++ .../cluster_config.yml | 20 +- .../cluster_creation_test.go | 20 +- 7 files changed, 301 insertions(+), 133 deletions(-) create mode 100644 pkg/kubernetes/namespace.go diff --git a/go.mod b/go.mod index 5306d21..e5060e5 100644 --- a/go.mod +++ b/go.mod @@ -12,6 +12,7 @@ require ( golang.org/x/crypto v0.46.0 golang.org/x/term v0.38.0 gopkg.in/yaml.v3 v3.0.1 + k8s.io/api v0.34.1 k8s.io/apimachinery v0.34.1 k8s.io/client-go v0.34.1 sigs.k8s.io/controller-runtime v0.22.4 @@ -41,6 +42,7 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/openshift/api v0.0.0-20230503133300-8bbcb7ca7183 // indirect github.com/openshift/custom-resource-status v1.1.2 // indirect + github.com/pkg/errors v0.9.1 // indirect github.com/spf13/pflag v1.0.7 // indirect github.com/x448/float16 v0.8.4 // indirect go.yaml.in/yaml/v2 v2.4.2 // indirect @@ -52,8 +54,8 @@ require ( golang.org/x/time v0.9.0 // indirect golang.org/x/tools v0.39.0 // indirect google.golang.org/protobuf v1.36.5 // indirect + gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect - k8s.io/api v0.34.1 // indirect k8s.io/apiextensions-apiserver v0.34.1 // indirect k8s.io/klog/v2 v2.130.1 // indirect k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b // indirect diff --git a/internal/config/env.go b/internal/config/env.go index cb39697..26d5745 100644 --- a/internal/config/env.go +++ b/internal/config/env.go @@ -17,6 +17,8 @@ const ( var ( // ssh passphrase for ssh private key used to connect to base cluster SSHPassphrase = os.Getenv("SSH_PASSPHRASE") + // TODO add envvar for ssh key and ssh user and probably ssh host. + // ssh public keys deploying to nodes must be also in envvar as well as ssh-user // KubeConfigPath is the path to a kubeconfig file. If SSH retrieval fails (e.g., sudo requires password), // this path will be used as a fallback. If not set and SSH fails, the user will be notified to download @@ -26,10 +28,6 @@ var ( // ClusterCreateMode specifies the cluster creation mode. Must be set to either "alwaysUseExisting" or "alwaysCreateNew" ClusterCreateMode = os.Getenv("CLUSTER_CREATE_MODE") - // AutoGenerateVMNames specifies whether to auto-generate VM names or use provided in config. - // Default is "false". If set to "true", the VM names suffix in kubernetes style will be added to VM names set in cluster config. - AutoGenerateVMNames = os.Getenv("AUTO_GENERATE_VM_NAMES") // TODO implement this in cluster.LoadClusterConfig function. - // TestClusterCleanup specifies whether to remove the test cluster after tests complete. // Default is "false". If set to "true" or "True", the test cluster will be cleaned up after tests. TestClusterCleanup = os.Getenv("TEST_CLUSTER_CLEANUP") diff --git a/internal/kubernetes/core/namespace.go b/internal/kubernetes/core/namespace.go index 8abfbbd..8041b7c 100644 --- a/internal/kubernetes/core/namespace.go +++ b/internal/kubernetes/core/namespace.go @@ -16,5 +16,54 @@ limitations under the License. package core -// TODO: Implement namespace operations +import ( + "context" + "fmt" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" +) + +// NamespaceClient provides operations on Namespace resources +type NamespaceClient interface { + Get(ctx context.Context, name string) (*corev1.Namespace, error) + Create(ctx context.Context, name string) (*corev1.Namespace, error) +} + +type namespaceClient struct { + client kubernetes.Interface +} + +// NewNamespaceClient creates a new namespace client from a rest.Config +func NewNamespaceClient(config *rest.Config) (NamespaceClient, error) { + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to create kubernetes clientset: %w", err) + } + return &namespaceClient{client: clientset}, nil +} + +// Get retrieves a namespace by name +func (c *namespaceClient) Get(ctx context.Context, name string) (*corev1.Namespace, error) { + ns, err := c.client.CoreV1().Namespaces().Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to get namespace %s: %w", name, err) + } + return ns, nil +} + +// Create creates a new namespace +func (c *namespaceClient) Create(ctx context.Context, name string) (*corev1.Namespace, error) { + ns := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + } + created, err := c.client.CoreV1().Namespaces().Create(ctx, ns, metav1.CreateOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to create namespace %s: %w", name, err) + } + return created, nil +} diff --git a/pkg/cluster/vms.go b/pkg/cluster/vms.go index ed58f5d..75707cc 100644 --- a/pkg/cluster/vms.go +++ b/pkg/cluster/vms.go @@ -19,9 +19,7 @@ package cluster import ( "context" "fmt" - "math/rand" "strings" - "time" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" @@ -65,42 +63,25 @@ func CreateVirtualMachines(ctx context.Context, virtClient *virtualization.Clien vmNames = append(vmNames, node.Hostname) } - // Check if any VMs with these names already exist - existingVMs, err := virtClient.VirtualMachines().List(ctx, namespace) + // Check for conflicts in all resources before creating anything + conflicts, err := checkResourceConflicts(ctx, virtClient, namespace, vmNodes) if err != nil { - return nil, nil, fmt.Errorf("failed to list existing VMs: %w", err) - } - - existingVMNames := make(map[string]bool) - for _, vm := range existingVMs { - existingVMNames[vm.Name] = true - } - - conflictingVMs := make([]string, 0) - for _, vmName := range vmNames { - if existingVMNames[vmName] { - conflictingVMs = append(conflictingVMs, vmName) - } + return nil, nil, fmt.Errorf("failed to check for resource conflicts: %w", err) } - // Handle conflicts - if len(conflictingVMs) > 0 { - if config.AutoGenerateVMNames != "true" && config.AutoGenerateVMNames != "True" { - return nil, nil, fmt.Errorf("virtual machines with the following names already exist in namespace %s: %v. Set AUTO_GENERATE_VM_NAMES=true to auto-generate unique names", namespace, conflictingVMs) + // If any conflicts exist, fail with a detailed error message + if len(conflicts.VMs) > 0 || len(conflicts.VirtualDisks) > 0 || len(conflicts.ClusterVirtualImages) > 0 { + conflictMessages := make([]string, 0) + if len(conflicts.VMs) > 0 { + conflictMessages = append(conflictMessages, fmt.Sprintf("VirtualMachines: %v", conflicts.VMs)) } - - // Generate suffix and update clusterDefinition - suffix := generateRandomSuffix() - updateClusterDefinitionHostnames(clusterDef, suffix) - // Update vmNodes with new names - for i := range vmNodes { - vmNodes[i].Hostname = vmNodes[i].Hostname + suffix + if len(conflicts.VirtualDisks) > 0 { + conflictMessages = append(conflictMessages, fmt.Sprintf("VirtualDisks: %v", conflicts.VirtualDisks)) } - // Update vmNames - vmNames = make([]string, 0, len(vmNodes)) - for _, node := range vmNodes { - vmNames = append(vmNames, node.Hostname) + if len(conflicts.ClusterVirtualImages) > 0 { + conflictMessages = append(conflictMessages, fmt.Sprintf("ClusterVirtualImages: %v", conflicts.ClusterVirtualImages)) } + return nil, nil, fmt.Errorf("the following VM-related resources already exist (CLUSTER_CREATE_MODE=%s): %s", config.ClusterCreateMode, strings.Join(conflictMessages, ", ")) } // Create all VMs @@ -131,6 +112,87 @@ func CreateVirtualMachines(ctx context.Context, virtClient *virtualization.Clien return vmNames, resources, nil } +// resourceConflicts tracks conflicts in different resource types +type resourceConflicts struct { + VMs []string + VirtualDisks []string + ClusterVirtualImages []string +} + +// checkResourceConflicts checks for conflicts in all VM-related resources +func checkResourceConflicts(ctx context.Context, virtClient *virtualization.Client, namespace string, vmNodes []config.ClusterNode) (*resourceConflicts, error) { + conflicts := &resourceConflicts{ + VMs: make([]string, 0), + VirtualDisks: make([]string, 0), + ClusterVirtualImages: make([]string, 0), + } + + // Collect all resource names we plan to create + vmNames := make([]string, 0, len(vmNodes)) + systemDiskNames := make([]string, 0, len(vmNodes)) + cvmiNamesSet := make(map[string]bool) + + for _, node := range vmNodes { + vmName := node.Hostname + vmNames = append(vmNames, vmName) + systemDiskName := fmt.Sprintf("%s-system", vmName) + systemDiskNames = append(systemDiskNames, systemDiskName) + + // Get CVMI name from image URL + cvmiName := getCVMINameFromImageURL(node.OSType.ImageURL) + cvmiNamesSet[cvmiName] = true + } + + // Check for conflicting VirtualMachines + existingVMs, err := virtClient.VirtualMachines().List(ctx, namespace) + if err != nil { + return nil, fmt.Errorf("failed to list existing VMs: %w", err) + } + existingVMNames := make(map[string]bool) + for _, vm := range existingVMs { + existingVMNames[vm.Name] = true + } + for _, vmName := range vmNames { + if existingVMNames[vmName] { + conflicts.VMs = append(conflicts.VMs, vmName) + } + } + + // Check for conflicting VirtualDisks + existingVDs, err := virtClient.VirtualDisks().List(ctx, namespace) + if err != nil { + return nil, fmt.Errorf("failed to list existing VirtualDisks: %w", err) + } + existingVDNames := make(map[string]bool) + for _, vd := range existingVDs { + existingVDNames[vd.Name] = true + } + for _, diskName := range systemDiskNames { + if existingVDNames[diskName] { + conflicts.VirtualDisks = append(conflicts.VirtualDisks, diskName) + } + } + + // Check for conflicting ClusterVirtualImages (cluster-scoped, no namespace) + cvmiNames := make([]string, 0, len(cvmiNamesSet)) + for name := range cvmiNamesSet { + cvmiNames = append(cvmiNames, name) + } + for _, cvmiName := range cvmiNames { + _, err := virtClient.ClusterVirtualImages().Get(ctx, cvmiName) + if err == nil { + // CVMI exists + conflicts.ClusterVirtualImages = append(conflicts.ClusterVirtualImages, cvmiName) + } else if !errors.IsNotFound(err) { + // Some other error occurred + return nil, fmt.Errorf("failed to check ClusterVirtualImage %s: %w", cvmiName, err) + } + // If IsNotFound, the CVMI doesn't exist, which is fine + } + + return conflicts, nil +} + // getVMNodes extracts all VM nodes from cluster definition func getVMNodes(clusterDef *config.ClusterDefinition) []config.ClusterNode { var vmNodes []config.ClusterNode @@ -154,36 +216,6 @@ func getVMNodes(clusterDef *config.ClusterDefinition) []config.ClusterNode { return vmNodes } -// updateClusterDefinitionHostnames updates hostnames in clusterDefinition with the given suffix -func updateClusterDefinitionHostnames(clusterDef *config.ClusterDefinition, suffix string) { - for i := range clusterDef.Masters { - if clusterDef.Masters[i].HostType == config.HostTypeVM { - clusterDef.Masters[i].Hostname = clusterDef.Masters[i].Hostname + suffix - } - } - - for i := range clusterDef.Workers { - if clusterDef.Workers[i].HostType == config.HostTypeVM { - clusterDef.Workers[i].Hostname = clusterDef.Workers[i].Hostname + suffix - } - } - - if clusterDef.Setup != nil && clusterDef.Setup.HostType == config.HostTypeVM { - clusterDef.Setup.Hostname = clusterDef.Setup.Hostname + suffix - } -} - -// generateRandomSuffix generates a random suffix of 6 lowercase letters -func generateRandomSuffix() string { - const letters = "abcdefghijklmnopqrstuvwxyz" - r := rand.New(rand.NewSource(time.Now().UnixNano())) - suffix := make([]byte, 6) - for i := range suffix { - suffix[i] = letters[r.Intn(len(letters))] - } - return "-" + string(suffix) -} - // createVM creates a virtual machine with all required dependencies // Returns the CVMI name that was used/created func createVM(ctx context.Context, virtClient *virtualization.Client, namespace string, node config.ClusterNode, storageClass string) (string, error) { @@ -193,6 +225,9 @@ func createVM(ctx context.Context, virtClient *virtualization.Client, namespace cvmiName := getCVMINameFromImageURL(node.OSType.ImageURL) cvmi, err := virtClient.ClusterVirtualImages().Get(ctx, cvmiName) if err != nil { + if !errors.IsNotFound(err) { + return "", fmt.Errorf("failed to get ClusterVirtualImage %s: %w", cvmiName, err) + } // CVMI doesn't exist, create it cvmi = &v1alpha2.ClusterVirtualImage{ ObjectMeta: metav1.ObjectMeta{ @@ -213,69 +248,86 @@ func createVM(ctx context.Context, virtClient *virtualization.Client, namespace } } - // 2. Create system VirtualDisk + // 2. Create system VirtualDisk (check if it exists first) systemDiskName := fmt.Sprintf("%s-system", vmName) - systemDisk := &v1alpha2.VirtualDisk{ - ObjectMeta: metav1.ObjectMeta{ - Name: systemDiskName, - Namespace: namespace, - }, - Spec: v1alpha2.VirtualDiskSpec{ - PersistentVolumeClaim: v1alpha2.VirtualDiskPersistentVolumeClaim{ - Size: resource.NewQuantity(int64(node.DiskSize)*1024*1024*1024, resource.BinarySI), - StorageClass: &storageClass, + _, err = virtClient.VirtualDisks().Get(ctx, namespace, systemDiskName) + if err != nil { + if !errors.IsNotFound(err) { + return "", fmt.Errorf("failed to check VirtualDisk %s: %w", systemDiskName, err) + } + // VirtualDisk doesn't exist, create it + systemDisk := &v1alpha2.VirtualDisk{ + ObjectMeta: metav1.ObjectMeta{ + Name: systemDiskName, + Namespace: namespace, }, - DataSource: &v1alpha2.VirtualDiskDataSource{ - Type: "ObjectRef", - ObjectRef: &v1alpha2.VirtualDiskObjectRef{ - Kind: "ClusterVirtualImage", - Name: cvmi.Name, + Spec: v1alpha2.VirtualDiskSpec{ + PersistentVolumeClaim: v1alpha2.VirtualDiskPersistentVolumeClaim{ + Size: resource.NewQuantity(int64(node.DiskSize)*1024*1024*1024, resource.BinarySI), + StorageClass: &storageClass, + }, + DataSource: &v1alpha2.VirtualDiskDataSource{ + Type: "ObjectRef", + ObjectRef: &v1alpha2.VirtualDiskObjectRef{ + Kind: "ClusterVirtualImage", + Name: cvmi.Name, + }, }, }, - }, - } - err = virtClient.VirtualDisks().Create(ctx, systemDisk) - if err != nil { - return "", fmt.Errorf("failed to create system VirtualDisk %s: %w", systemDiskName, err) + } + err = virtClient.VirtualDisks().Create(ctx, systemDisk) + if err != nil { + return "", fmt.Errorf("failed to create system VirtualDisk %s: %w", systemDiskName, err) + } } + // If VirtualDisk already exists, we'll use it - // 3. Create VirtualMachine - memoryQuantity := resource.MustParse(fmt.Sprintf("%dGi", node.RAM)) - vm := &v1alpha2.VirtualMachine{ - ObjectMeta: metav1.ObjectMeta{ - Name: vmName, - Namespace: namespace, - Labels: map[string]string{"vm": "linux", "service": "v1"}, - }, - Spec: v1alpha2.VirtualMachineSpec{ - VirtualMachineClassName: "generic", - EnableParavirtualization: true, - RunPolicy: v1alpha2.RunPolicy("AlwaysOn"), - OsType: v1alpha2.OsType("Generic"), - Bootloader: v1alpha2.BootloaderType("BIOS"), - CPU: v1alpha2.CPUSpec{ - Cores: node.CPU, - CoreFraction: "100%", - }, - Memory: v1alpha2.MemorySpec{ - Size: memoryQuantity, + // 3. Create VirtualMachine (check if it exists first) + _, err = virtClient.VirtualMachines().Get(ctx, namespace, vmName) + if err != nil { + if !errors.IsNotFound(err) { + return "", fmt.Errorf("failed to check VirtualMachine %s: %w", vmName, err) + } + // VirtualMachine doesn't exist, create it + memoryQuantity := resource.MustParse(fmt.Sprintf("%dGi", node.RAM)) + vm := &v1alpha2.VirtualMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: vmName, + Namespace: namespace, + Labels: map[string]string{"vm": "linux", "service": "v1"}, }, - BlockDeviceRefs: []v1alpha2.BlockDeviceSpecRef{ - { - Kind: v1alpha2.DiskDevice, - Name: systemDiskName, + Spec: v1alpha2.VirtualMachineSpec{ + VirtualMachineClassName: "generic", + EnableParavirtualization: true, + RunPolicy: v1alpha2.RunPolicy("AlwaysOn"), + OsType: v1alpha2.OsType("Generic"), + Bootloader: v1alpha2.BootloaderType("BIOS"), + LiveMigrationPolicy: v1alpha2.LiveMigrationPolicy("PreferSafe"), + CPU: v1alpha2.CPUSpec{ + Cores: node.CPU, + CoreFraction: "100%", + }, + Memory: v1alpha2.MemorySpec{ + Size: memoryQuantity, + }, + BlockDeviceRefs: []v1alpha2.BlockDeviceSpecRef{ + { + Kind: v1alpha2.DiskDevice, + Name: systemDiskName, + }, + }, + Provisioning: &v1alpha2.Provisioning{ + Type: "UserData", + UserData: generateCloudInitUserData(vmName, node.Auth.SSHKey), }, }, - Provisioning: &v1alpha2.Provisioning{ - Type: "UserData", - UserData: generateCloudInitUserData(vmName, node.Auth.SSHKey), - }, - }, - } - err = virtClient.VirtualMachines().Create(ctx, vm) - if err != nil { - return "", fmt.Errorf("failed to create VirtualMachine %s: %w", vmName, err) + } + err = virtClient.VirtualMachines().Create(ctx, vm) + if err != nil { + return "", fmt.Errorf("failed to create VirtualMachine %s: %w", vmName, err) + } } + // If VirtualMachine already exists, we'll skip creation return cvmiName, nil } diff --git a/pkg/kubernetes/namespace.go b/pkg/kubernetes/namespace.go new file mode 100644 index 0000000..9687a16 --- /dev/null +++ b/pkg/kubernetes/namespace.go @@ -0,0 +1,51 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kubernetes + +import ( + "context" + "fmt" + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/client-go/rest" + + "github.com/deckhouse/storage-e2e/internal/kubernetes/core" +) + +// CreateNamespaceIfNotExists creates a namespace if it doesn't exist, or returns the existing one. +// This is a high-level function that uses the low-level core namespace client. +func CreateNamespaceIfNotExists(ctx context.Context, config *rest.Config, name string) (*corev1.Namespace, error) { + nsClient, err := core.NewNamespaceClient(config) + if err != nil { + return nil, fmt.Errorf("failed to create namespace client: %w", err) + } + + // Try to get the namespace to check if it exists + ns, err := nsClient.Get(ctx, name) + if err != nil { + // If namespace doesn't exist, create it + if apierrors.IsNotFound(err) { + return nsClient.Create(ctx, name) + } + // For other errors, return them + return nil, fmt.Errorf("failed to get namespace %s: %w", name, err) + } + + // Namespace exists, return it + return ns, nil +} diff --git a/tests/cluster-creation-by-steps/cluster_config.yml b/tests/cluster-creation-by-steps/cluster_config.yml index b4ca43a..aba62e7 100644 --- a/tests/cluster-creation-by-steps/cluster_config.yml +++ b/tests/cluster-creation-by-steps/cluster_config.yml @@ -5,21 +5,21 @@ clusterDefinition: hostType: "vm" role: "master" osType: "Ubuntu 22.04 6.2.0-39-generic" - auth: + auth: # TODO move to envvars method: "ssh-key" - user: "user" + user: "cloud" sshKey: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8WyGvnBNQp+v6CUweF1QYCRtR7Do/IA8IA2uMd2HuBsddFrc5xYon2ZtEvypZC4Vm1CzgcgUm9UkHgxytKEB4zOOWkmqFP62OSLNyuWMaFEW1fb0EDenup6B5SrjnA8ckm4Hf2NSLvwW9yS98TfN3nqPOPJKfQsN+OTiCerTtNyXjca//ppuGKsQd99jG7SqE9aDQ3sYCXatM53SXqhxS2nTew82bmzVmKXDxcIzVrS9f+2WmXIdY2cKo2I352yKWOIp1Nk0uji8ozLPHFQGvbAG8DGG1KNVcBl2qYUcttmCpN+iXEcGqyn/atUVJJMnZXGtp0fiL1rMLqAd/bb6TFNzZFSsS+zqGesxqLePe32vLCQ3xursP3BRZkrScM+JzIqevfP63INHJEZfYlUf4Ic+gfliS2yA1LwhU7hD4LSVXMQynlF9WeGjuv6ZYxmO8hC6IWCqWnIUqKUiGtvBSPXwsZo7wgljBr4ykJgBzS9MjZ0fzz1JKe80tH6clpjIOn6ReBPwQBq2zmDDrpa5GVqqqjXhRQuA0AfpHdhs5UKxs1PBr7/PTLA7PI39xkOAE/Zj1TYQ2dmqvpskshi7AtBStjinQBAlLXysLSHBtO+3+PLAYcMZMVfb0bVqfGGludO2prvXrrWWTku0eOsA5IRahrRdGhv5zhKgFV7cwUQ== ayakubov@MacBook-Pro-Alexey.local" cpu: 4 ram: 8 diskSize: 30 - workers: # Worker nodes configuration + workers: # Worker nodes configuration // TODO implement logic allowing to deploy different number of workes and masters with the same config. - hostname: "worker-1" hostType: "vm" role: "worker" osType: "Ubuntu 22.04 6.2.0-39-generic" auth: - method: "ssh-key" - user: "user" + method: "ssh-key" + user: "cloud" sshKey: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8WyGvnBNQp+v6CUweF1QYCRtR7Do/IA8IA2uMd2HuBsddFrc5xYon2ZtEvypZC4Vm1CzgcgUm9UkHgxytKEB4zOOWkmqFP62OSLNyuWMaFEW1fb0EDenup6B5SrjnA8ckm4Hf2NSLvwW9yS98TfN3nqPOPJKfQsN+OTiCerTtNyXjca//ppuGKsQd99jG7SqE9aDQ3sYCXatM53SXqhxS2nTew82bmzVmKXDxcIzVrS9f+2WmXIdY2cKo2I352yKWOIp1Nk0uji8ozLPHFQGvbAG8DGG1KNVcBl2qYUcttmCpN+iXEcGqyn/atUVJJMnZXGtp0fiL1rMLqAd/bb6TFNzZFSsS+zqGesxqLePe32vLCQ3xursP3BRZkrScM+JzIqevfP63INHJEZfYlUf4Ic+gfliS2yA1LwhU7hD4LSVXMQynlF9WeGjuv6ZYxmO8hC6IWCqWnIUqKUiGtvBSPXwsZo7wgljBr4ykJgBzS9MjZ0fzz1JKe80tH6clpjIOn6ReBPwQBq2zmDDrpa5GVqqqjXhRQuA0AfpHdhs5UKxs1PBr7/PTLA7PI39xkOAE/Zj1TYQ2dmqvpskshi7AtBStjinQBAlLXysLSHBtO+3+PLAYcMZMVfb0bVqfGGludO2prvXrrWWTku0eOsA5IRahrRdGhv5zhKgFV7cwUQ== ayakubov@MacBook-Pro-Alexey.local" cpu: 2 ram: 6 @@ -30,7 +30,7 @@ clusterDefinition: osType: "Ubuntu 22.04 6.2.0-39-generic" auth: method: "ssh-key" - user: "user" + user: "cloud" sshKey: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8WyGvnBNQp+v6CUweF1QYCRtR7Do/IA8IA2uMd2HuBsddFrc5xYon2ZtEvypZC4Vm1CzgcgUm9UkHgxytKEB4zOOWkmqFP62OSLNyuWMaFEW1fb0EDenup6B5SrjnA8ckm4Hf2NSLvwW9yS98TfN3nqPOPJKfQsN+OTiCerTtNyXjca//ppuGKsQd99jG7SqE9aDQ3sYCXatM53SXqhxS2nTew82bmzVmKXDxcIzVrS9f+2WmXIdY2cKo2I352yKWOIp1Nk0uji8ozLPHFQGvbAG8DGG1KNVcBl2qYUcttmCpN+iXEcGqyn/atUVJJMnZXGtp0fiL1rMLqAd/bb6TFNzZFSsS+zqGesxqLePe32vLCQ3xursP3BRZkrScM+JzIqevfP63INHJEZfYlUf4Ic+gfliS2yA1LwhU7hD4LSVXMQynlF9WeGjuv6ZYxmO8hC6IWCqWnIUqKUiGtvBSPXwsZo7wgljBr4ykJgBzS9MjZ0fzz1JKe80tH6clpjIOn6ReBPwQBq2zmDDrpa5GVqqqjXhRQuA0AfpHdhs5UKxs1PBr7/PTLA7PI39xkOAE/Zj1TYQ2dmqvpskshi7AtBStjinQBAlLXysLSHBtO+3+PLAYcMZMVfb0bVqfGGludO2prvXrrWWTku0eOsA5IRahrRdGhv5zhKgFV7cwUQ== ayakubov@MacBook-Pro-Alexey.local" cpu: 2 ram: 6 @@ -41,13 +41,13 @@ clusterDefinition: podSubnetCIDR: "10.112.0.0/16" serviceSubnetCIDR: "10.225.0.0/16" clusterDomain: "cluster.local" - licenseKey: "" + licenseKey: "" # TODO Move to envvars registryRepo: "dev-registry.deckhouse.io/sys/deckhouse-oss" - namespace: "e2e-nested-1" - storageClass: "nfs-storage-class" + namespace: "e2e-nested-1" # TODO Move to a higher level + storageClass: "rsc-test-r2-local" # TODO Move to a higher level # Module configuration modules: - - name: "snapshot-controller" + - name: "snapshot-controller" # TODO add MPO version: 1 enabled: true dependencies: [] diff --git a/tests/cluster-creation-by-steps/cluster_creation_test.go b/tests/cluster-creation-by-steps/cluster_creation_test.go index 87cbc61..0097de2 100644 --- a/tests/cluster-creation-by-steps/cluster_creation_test.go +++ b/tests/cluster-creation-by-steps/cluster_creation_test.go @@ -32,14 +32,15 @@ import ( "github.com/deckhouse/storage-e2e/internal/kubernetes/deckhouse" "github.com/deckhouse/storage-e2e/internal/kubernetes/virtualization" "github.com/deckhouse/storage-e2e/pkg/cluster" + "github.com/deckhouse/storage-e2e/pkg/kubernetes" "github.com/deckhouse/virtualization/api/core/v1alpha2" ) var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { var ( yamlConfigFilename string = "cluster_config.yml" - baseClusterMasterIP string = "10.0.0.181" - baseClusterUser string = "w-ansible" + baseClusterMasterIP string = "94.26.231.181" + baseClusterUser string = "a.yakubov" baseClusterSSHPrivateKey string = "~/.ssh/id_rsa" err error @@ -162,6 +163,21 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) }) + It("should ensure test namespace exists", func() { + By("Checking and creating test namespace if needed", func() { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + namespace := clusterDefinition.DKPParameters.Namespace + GinkgoWriter.Printf(" ▶️ Ensuring namespace %s exists\n", namespace) + + ns, err := kubernetes.CreateNamespaceIfNotExists(ctx, kubeconfig, namespace) + Expect(err).NotTo(HaveOccurred(), "Failed to create namespace") + Expect(ns).NotTo(BeNil()) + GinkgoWriter.Printf(" ✅ Namespace %s is ready\n", namespace) + }) + }) + It("should create virtual machines from cluster definition", func() { By("Creating virtual machines", func() { ctx, cancel := context.WithTimeout(context.Background(), 25*time.Minute) From 7acc7fbf643a662458ac239e0c6a2cdaeb1ef8c7 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Tue, 16 Dec 2025 17:26:39 +0300 Subject: [PATCH 17/48] Refactoring and TODOs implementations --- internal/cluster/cluster.go | 12 ----- internal/config/env.go | 50 +++++++++++++++++-- internal/config/types.go | 49 ++++-------------- pkg/cluster/cluster.go | 17 ++++--- pkg/cluster/vms.go | 2 +- .../cluster_config.yml | 13 ----- .../cluster_creation_suite_test.go | 5 +- .../cluster_creation_test.go | 8 +-- tests/cluster-creation/cluster_config.yml | 21 ++------ .../cluster_creation_suite_test.go | 5 +- .../cluster-creation/cluster_creation_test.go | 9 +--- 11 files changed, 79 insertions(+), 112 deletions(-) diff --git a/internal/cluster/cluster.go b/internal/cluster/cluster.go index c32670d..c27aab3 100644 --- a/internal/cluster/cluster.go +++ b/internal/cluster/cluster.go @@ -143,18 +143,6 @@ func validateNode(node config.ClusterNode, isMaster bool) error { } } - if node.Auth.User == "" { - return fmt.Errorf("auth.user is required") - } - - if node.Auth.Method == config.AuthMethodSSHKey && node.Auth.SSHKey == "" { - return fmt.Errorf("auth.sshKey is required when using ssh-key authentication") - } - - if node.Auth.Method == config.AuthMethodSSHPass && node.Auth.Password == "" { - return fmt.Errorf("auth.password is required when using ssh-password authentication") - } - return nil } diff --git a/internal/config/env.go b/internal/config/env.go index 26d5745..b8978e4 100644 --- a/internal/config/env.go +++ b/internal/config/env.go @@ -15,10 +15,18 @@ const ( ) var ( - // ssh passphrase for ssh private key used to connect to base cluster + // YAMLConfigFilename is the filename of the YAML configuration file + YAMLConfigFilename = os.Getenv("YAML_CONFIG_FILENAME") + + // SSH credentials to connect to BASE cluster SSHPassphrase = os.Getenv("SSH_PASSPHRASE") - // TODO add envvar for ssh key and ssh user and probably ssh host. - // ssh public keys deploying to nodes must be also in envvar as well as ssh-user + SSHUser = os.Getenv("SSH_USER") + SSHKeyPath = os.Getenv("SSH_KEY_PATH") + SSHHost = os.Getenv("SSH_HOST") + + // SSH credentials to deploy to VM + VMSSHUser = os.Getenv("SSH_VM_USER") + VMSSHPublicKey = os.Getenv("SSH_VM_PUBLIC_KEY") // KubeConfigPath is the path to a kubeconfig file. If SSH retrieval fails (e.g., sudo requires password), // this path will be used as a fallback. If not set and SSH fails, the user will be notified to download @@ -31,10 +39,42 @@ var ( // TestClusterCleanup specifies whether to remove the test cluster after tests complete. // Default is "false". If set to "true" or "True", the test cluster will be cleaned up after tests. TestClusterCleanup = os.Getenv("TEST_CLUSTER_CLEANUP") + + // DKPLicenseKey specifies the DKP license key for cluster deployment + DKPLicenseKey = os.Getenv("DKP_LICENSE_KEY") ) -// ValidateClusterCreateMode validates that CLUSTER_CREATE_MODE is set and has a valid value -func ValidateClusterCreateMode() error { +func ValidateEnvironment() error { + // Default values for environment variables + if YAMLConfigFilename == "" { + YAMLConfigFilename = "cluster_config.yml" + } + + if TestClusterCleanup != "true" && TestClusterCleanup != "True" { + TestClusterCleanup = "false" + } + + if SSHKeyPath == "" { + SSHKeyPath = "~/.ssh/id_rsa" + } + if SSHUser == "" { + SSHUser = "a.yakubov" + } + if SSHHost == "" { + SSHHost = "94.26.231.181" + } + if VMSSHUser == "" { + VMSSHUser = "cloud" + } + if VMSSHPublicKey == "" { + VMSSHPublicKey = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8WyGvnBNQp+v6CUweF1QYCRtR7Do/IA8IA2uMd2HuBsddFrc5xYon2ZtEvypZC4Vm1CzgcgUm9UkHgxytKEB4zOOWkmqFP62OSLNyuWMaFEW1fb0EDenup6B5SrjnA8ckm4Hf2NSLvwW9yS98TfN3nqPOPJKfQsN+OTiCerTtNyXjca//ppuGKsQd99jG7SqE9aDQ3sYCXatM53SXqhxS2nTew82bmzVmKXDxcIzVrS9f+2WmXIdY2cKo2I352yKWOIp1Nk0uji8ozLPHFQGvbAG8DGG1KNVcBl2qYUcttmCpN+iXEcGqyn/atUVJJMnZXGtp0fiL1rMLqAd/bb6TFNzZFSsS+zqGesxqLePe32vLCQ3xursP3BRZkrScM+JzIqevfP63INHJEZfYlUf4Ic+gfliS2yA1LwhU7hD4LSVXMQynlF9WeGjuv6ZYxmO8hC6IWCqWnIUqKUiGtvBSPXwsZo7wgljBr4ykJgBzS9MjZ0fzz1JKe80tH6clpjIOn6ReBPwQBq2zmDDrpa5GVqqqjXhRQuA0AfpHdhs5UKxs1PBr7/PTLA7PI39xkOAE/Zj1TYQ2dmqvpskshi7AtBStjinQBAlLXysLSHBtO+3+PLAYcMZMVfb0bVqfGGludO2prvXrrWWTku0eOsA5IRahrRdGhv5zhKgFV7cwUQ== ayakubov@MacBook-Pro-Alexey.local" + } + + // There are no default values for these variables and they must be set! Otherwise, the test will fail. + if DKPLicenseKey == "" { + return fmt.Errorf("DKP_LICENSE_KEY environment variable is required but not set. ") + } + if ClusterCreateMode == "" { return fmt.Errorf("CLUSTER_CREATE_MODE environment variable is required but not set. "+ "Please set it to either '%s' or '%s'", diff --git a/internal/config/types.go b/internal/config/types.go index 03f7798..7fbd9cf 100644 --- a/internal/config/types.go +++ b/internal/config/types.go @@ -47,22 +47,6 @@ type OSType struct { KernelVersion string } -// AuthMethod represents the authentication method -type AuthMethod string - -const ( - AuthMethodSSHKey AuthMethod = "ssh-key" - AuthMethodSSHPass AuthMethod = "ssh-password" -) - -// NodeAuth contains authentication information for a node -type NodeAuth struct { - Method AuthMethod `yaml:"method"` - User string `yaml:"user"` - SSHKey string `yaml:"sshKey"` // Public key (value like "ssh-rsa ...", path to .pub file, or empty for default) - Password string `yaml:"password,omitempty"` // Password (if using password auth) -} - // ClusterNode defines a single node in the cluster type ClusterNode struct { Hostname string `yaml:"hostname"` @@ -70,7 +54,6 @@ type ClusterNode struct { OSType OSType `yaml:"osType"` // Required for VM, optional for bare-metal (custom unmarshaler handles string -> OSType conversion) HostType HostType `yaml:"hostType"` Role ClusterRole `yaml:"role"` - Auth NodeAuth `yaml:"auth"` // VM-specific fields (only used when HostType == HostTypeVM) CPU int `yaml:"cpu"` // Required for VM RAM int `yaml:"ram"` // Required for VM, in GB @@ -85,7 +68,6 @@ type DKPParameters struct { PodSubnetCIDR string `yaml:"podSubnetCIDR"` ServiceSubnetCIDR string `yaml:"serviceSubnetCIDR"` ClusterDomain string `yaml:"clusterDomain"` - LicenseKey string `yaml:"licenseKey"` RegistryRepo string `yaml:"registryRepo"` Namespace string `yaml:"namespace"` StorageClass string `yaml:"storageClass"` @@ -121,16 +103,15 @@ const ( func (n *ClusterNode) UnmarshalYAML(value *yaml.Node) error { // Temporary struct with OSType as string for unmarshaling type clusterNodeTmp struct { - Hostname string `yaml:"hostname"` - IPAddress string `yaml:"ipAddress,omitempty"` - OSType string `yaml:"osType"` - HostType string `yaml:"hostType"` - Role string `yaml:"role"` - Auth NodeAuth `yaml:"auth"` - CPU int `yaml:"cpu"` - RAM int `yaml:"ram"` - DiskSize int `yaml:"diskSize"` - Prepared bool `yaml:"prepared,omitempty"` + Hostname string `yaml:"hostname"` + IPAddress string `yaml:"ipAddress,omitempty"` + OSType string `yaml:"osType"` + HostType string `yaml:"hostType"` + Role string `yaml:"role"` + CPU int `yaml:"cpu"` + RAM int `yaml:"ram"` + DiskSize int `yaml:"diskSize"` + Prepared bool `yaml:"prepared,omitempty"` } var tmp clusterNodeTmp @@ -156,24 +137,12 @@ func (n *ClusterNode) UnmarshalYAML(value *yaml.Node) error { return fmt.Errorf("unknown osType: %s", tmp.OSType) } - // Convert AuthMethod - authMethod := AuthMethod(tmp.Auth.Method) - if authMethod != AuthMethodSSHKey && authMethod != AuthMethodSSHPass { - return fmt.Errorf("invalid auth method: %s", tmp.Auth.Method) - } - // Assign to actual struct n.Hostname = tmp.Hostname n.IPAddress = tmp.IPAddress n.OSType = osType n.HostType = hostType n.Role = role - n.Auth = NodeAuth{ - Method: authMethod, - User: tmp.Auth.User, - SSHKey: tmp.Auth.SSHKey, - Password: tmp.Auth.Password, - } n.CPU = tmp.CPU n.RAM = tmp.RAM n.DiskSize = tmp.DiskSize diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 09680c1..2da5a7d 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -44,12 +44,10 @@ type TestClusterResources struct { // 4. Establishing SSH tunnel with port forwarding // // It returns all the resources needed to interact with the cluster. +// SSH credentials are obtained from environment variables via config functions. func CreateTestCluster( ctx context.Context, yamlConfigFilename string, - baseClusterMasterIP string, - baseClusterUser string, - baseClusterSSHPrivateKey string, ) (*TestClusterResources, error) { // Stage 1: Load cluster configuration from YAML clusterDefinition, err := internalcluster.LoadClusterConfig(yamlConfigFilename) @@ -57,8 +55,13 @@ func CreateTestCluster( return nil, fmt.Errorf("failed to load cluster configuration: %w", err) } + // Get SSH credentials from environment variables + sshHost := config.SSHHost + sshUser := config.SSHUser + sshKeyPath := config.SSHKeyPath + // Stage 2: Establish SSH connection to base cluster - sshClient, err := ssh.NewClient(baseClusterUser, baseClusterMasterIP, baseClusterSSHPrivateKey) + sshClient, err := ssh.NewClient(sshUser, sshHost, sshKeyPath) if err != nil { return nil, fmt.Errorf("failed to create SSH client: %w", err) } @@ -70,9 +73,9 @@ func CreateTestCluster( kubeconfig, kubeconfigPath, err := internalcluster.GetKubeconfig( kubeconfigCtx, - baseClusterMasterIP, - baseClusterUser, - baseClusterSSHPrivateKey, + sshHost, + sshUser, + sshKeyPath, sshClient, ) if err != nil { diff --git a/pkg/cluster/vms.go b/pkg/cluster/vms.go index 75707cc..6b233a0 100644 --- a/pkg/cluster/vms.go +++ b/pkg/cluster/vms.go @@ -318,7 +318,7 @@ func createVM(ctx context.Context, virtClient *virtualization.Client, namespace }, Provisioning: &v1alpha2.Provisioning{ Type: "UserData", - UserData: generateCloudInitUserData(vmName, node.Auth.SSHKey), + UserData: generateCloudInitUserData(vmName, config.VMSSHPublicKey), }, }, } diff --git a/tests/cluster-creation-by-steps/cluster_config.yml b/tests/cluster-creation-by-steps/cluster_config.yml index aba62e7..0e499bc 100644 --- a/tests/cluster-creation-by-steps/cluster_config.yml +++ b/tests/cluster-creation-by-steps/cluster_config.yml @@ -5,10 +5,6 @@ clusterDefinition: hostType: "vm" role: "master" osType: "Ubuntu 22.04 6.2.0-39-generic" - auth: # TODO move to envvars - method: "ssh-key" - user: "cloud" - sshKey: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8WyGvnBNQp+v6CUweF1QYCRtR7Do/IA8IA2uMd2HuBsddFrc5xYon2ZtEvypZC4Vm1CzgcgUm9UkHgxytKEB4zOOWkmqFP62OSLNyuWMaFEW1fb0EDenup6B5SrjnA8ckm4Hf2NSLvwW9yS98TfN3nqPOPJKfQsN+OTiCerTtNyXjca//ppuGKsQd99jG7SqE9aDQ3sYCXatM53SXqhxS2nTew82bmzVmKXDxcIzVrS9f+2WmXIdY2cKo2I352yKWOIp1Nk0uji8ozLPHFQGvbAG8DGG1KNVcBl2qYUcttmCpN+iXEcGqyn/atUVJJMnZXGtp0fiL1rMLqAd/bb6TFNzZFSsS+zqGesxqLePe32vLCQ3xursP3BRZkrScM+JzIqevfP63INHJEZfYlUf4Ic+gfliS2yA1LwhU7hD4LSVXMQynlF9WeGjuv6ZYxmO8hC6IWCqWnIUqKUiGtvBSPXwsZo7wgljBr4ykJgBzS9MjZ0fzz1JKe80tH6clpjIOn6ReBPwQBq2zmDDrpa5GVqqqjXhRQuA0AfpHdhs5UKxs1PBr7/PTLA7PI39xkOAE/Zj1TYQ2dmqvpskshi7AtBStjinQBAlLXysLSHBtO+3+PLAYcMZMVfb0bVqfGGludO2prvXrrWWTku0eOsA5IRahrRdGhv5zhKgFV7cwUQ== ayakubov@MacBook-Pro-Alexey.local" cpu: 4 ram: 8 diskSize: 30 @@ -17,10 +13,6 @@ clusterDefinition: hostType: "vm" role: "worker" osType: "Ubuntu 22.04 6.2.0-39-generic" - auth: - method: "ssh-key" - user: "cloud" - sshKey: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8WyGvnBNQp+v6CUweF1QYCRtR7Do/IA8IA2uMd2HuBsddFrc5xYon2ZtEvypZC4Vm1CzgcgUm9UkHgxytKEB4zOOWkmqFP62OSLNyuWMaFEW1fb0EDenup6B5SrjnA8ckm4Hf2NSLvwW9yS98TfN3nqPOPJKfQsN+OTiCerTtNyXjca//ppuGKsQd99jG7SqE9aDQ3sYCXatM53SXqhxS2nTew82bmzVmKXDxcIzVrS9f+2WmXIdY2cKo2I352yKWOIp1Nk0uji8ozLPHFQGvbAG8DGG1KNVcBl2qYUcttmCpN+iXEcGqyn/atUVJJMnZXGtp0fiL1rMLqAd/bb6TFNzZFSsS+zqGesxqLePe32vLCQ3xursP3BRZkrScM+JzIqevfP63INHJEZfYlUf4Ic+gfliS2yA1LwhU7hD4LSVXMQynlF9WeGjuv6ZYxmO8hC6IWCqWnIUqKUiGtvBSPXwsZo7wgljBr4ykJgBzS9MjZ0fzz1JKe80tH6clpjIOn6ReBPwQBq2zmDDrpa5GVqqqjXhRQuA0AfpHdhs5UKxs1PBr7/PTLA7PI39xkOAE/Zj1TYQ2dmqvpskshi7AtBStjinQBAlLXysLSHBtO+3+PLAYcMZMVfb0bVqfGGludO2prvXrrWWTku0eOsA5IRahrRdGhv5zhKgFV7cwUQ== ayakubov@MacBook-Pro-Alexey.local" cpu: 2 ram: 6 diskSize: 30 @@ -28,10 +20,6 @@ clusterDefinition: hostType: "vm" role: "worker" osType: "Ubuntu 22.04 6.2.0-39-generic" - auth: - method: "ssh-key" - user: "cloud" - sshKey: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8WyGvnBNQp+v6CUweF1QYCRtR7Do/IA8IA2uMd2HuBsddFrc5xYon2ZtEvypZC4Vm1CzgcgUm9UkHgxytKEB4zOOWkmqFP62OSLNyuWMaFEW1fb0EDenup6B5SrjnA8ckm4Hf2NSLvwW9yS98TfN3nqPOPJKfQsN+OTiCerTtNyXjca//ppuGKsQd99jG7SqE9aDQ3sYCXatM53SXqhxS2nTew82bmzVmKXDxcIzVrS9f+2WmXIdY2cKo2I352yKWOIp1Nk0uji8ozLPHFQGvbAG8DGG1KNVcBl2qYUcttmCpN+iXEcGqyn/atUVJJMnZXGtp0fiL1rMLqAd/bb6TFNzZFSsS+zqGesxqLePe32vLCQ3xursP3BRZkrScM+JzIqevfP63INHJEZfYlUf4Ic+gfliS2yA1LwhU7hD4LSVXMQynlF9WeGjuv6ZYxmO8hC6IWCqWnIUqKUiGtvBSPXwsZo7wgljBr4ykJgBzS9MjZ0fzz1JKe80tH6clpjIOn6ReBPwQBq2zmDDrpa5GVqqqjXhRQuA0AfpHdhs5UKxs1PBr7/PTLA7PI39xkOAE/Zj1TYQ2dmqvpskshi7AtBStjinQBAlLXysLSHBtO+3+PLAYcMZMVfb0bVqfGGludO2prvXrrWWTku0eOsA5IRahrRdGhv5zhKgFV7cwUQ== ayakubov@MacBook-Pro-Alexey.local" cpu: 2 ram: 6 diskSize: 30 @@ -41,7 +29,6 @@ clusterDefinition: podSubnetCIDR: "10.112.0.0/16" serviceSubnetCIDR: "10.225.0.0/16" clusterDomain: "cluster.local" - licenseKey: "" # TODO Move to envvars registryRepo: "dev-registry.deckhouse.io/sys/deckhouse-oss" namespace: "e2e-nested-1" # TODO Move to a higher level storageClass: "rsc-test-r2-local" # TODO Move to a higher level diff --git a/tests/cluster-creation-by-steps/cluster_creation_suite_test.go b/tests/cluster-creation-by-steps/cluster_creation_suite_test.go index f730b1e..c56d687 100644 --- a/tests/cluster-creation-by-steps/cluster_creation_suite_test.go +++ b/tests/cluster-creation-by-steps/cluster_creation_suite_test.go @@ -26,9 +26,8 @@ import ( ) var _ = BeforeSuite(func() { - // Validate that CLUSTER_CREATE_MODE is set and has a valid value - err := config.ValidateClusterCreateMode() - Expect(err).NotTo(HaveOccurred(), "CLUSTER_CREATE_MODE environment variable must be set to either 'alwaysUseExisting' or 'alwaysCreateNew'") + err := config.ValidateEnvironment() + Expect(err).NotTo(HaveOccurred()) }) func TestIntegration(t *testing.T) { diff --git a/tests/cluster-creation-by-steps/cluster_creation_test.go b/tests/cluster-creation-by-steps/cluster_creation_test.go index 0097de2..4cecec4 100644 --- a/tests/cluster-creation-by-steps/cluster_creation_test.go +++ b/tests/cluster-creation-by-steps/cluster_creation_test.go @@ -38,10 +38,10 @@ import ( var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { var ( - yamlConfigFilename string = "cluster_config.yml" - baseClusterMasterIP string = "94.26.231.181" - baseClusterUser string = "a.yakubov" - baseClusterSSHPrivateKey string = "~/.ssh/id_rsa" + yamlConfigFilename string = config.YAMLConfigFilename + baseClusterMasterIP string = config.SSHHost + baseClusterUser string = config.SSHUser + baseClusterSSHPrivateKey string = config.SSHKeyPath err error sshclient ssh.SSHClient diff --git a/tests/cluster-creation/cluster_config.yml b/tests/cluster-creation/cluster_config.yml index b4ca43a..0e499bc 100644 --- a/tests/cluster-creation/cluster_config.yml +++ b/tests/cluster-creation/cluster_config.yml @@ -5,22 +5,14 @@ clusterDefinition: hostType: "vm" role: "master" osType: "Ubuntu 22.04 6.2.0-39-generic" - auth: - method: "ssh-key" - user: "user" - sshKey: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8WyGvnBNQp+v6CUweF1QYCRtR7Do/IA8IA2uMd2HuBsddFrc5xYon2ZtEvypZC4Vm1CzgcgUm9UkHgxytKEB4zOOWkmqFP62OSLNyuWMaFEW1fb0EDenup6B5SrjnA8ckm4Hf2NSLvwW9yS98TfN3nqPOPJKfQsN+OTiCerTtNyXjca//ppuGKsQd99jG7SqE9aDQ3sYCXatM53SXqhxS2nTew82bmzVmKXDxcIzVrS9f+2WmXIdY2cKo2I352yKWOIp1Nk0uji8ozLPHFQGvbAG8DGG1KNVcBl2qYUcttmCpN+iXEcGqyn/atUVJJMnZXGtp0fiL1rMLqAd/bb6TFNzZFSsS+zqGesxqLePe32vLCQ3xursP3BRZkrScM+JzIqevfP63INHJEZfYlUf4Ic+gfliS2yA1LwhU7hD4LSVXMQynlF9WeGjuv6ZYxmO8hC6IWCqWnIUqKUiGtvBSPXwsZo7wgljBr4ykJgBzS9MjZ0fzz1JKe80tH6clpjIOn6ReBPwQBq2zmDDrpa5GVqqqjXhRQuA0AfpHdhs5UKxs1PBr7/PTLA7PI39xkOAE/Zj1TYQ2dmqvpskshi7AtBStjinQBAlLXysLSHBtO+3+PLAYcMZMVfb0bVqfGGludO2prvXrrWWTku0eOsA5IRahrRdGhv5zhKgFV7cwUQ== ayakubov@MacBook-Pro-Alexey.local" cpu: 4 ram: 8 diskSize: 30 - workers: # Worker nodes configuration + workers: # Worker nodes configuration // TODO implement logic allowing to deploy different number of workes and masters with the same config. - hostname: "worker-1" hostType: "vm" role: "worker" osType: "Ubuntu 22.04 6.2.0-39-generic" - auth: - method: "ssh-key" - user: "user" - sshKey: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8WyGvnBNQp+v6CUweF1QYCRtR7Do/IA8IA2uMd2HuBsddFrc5xYon2ZtEvypZC4Vm1CzgcgUm9UkHgxytKEB4zOOWkmqFP62OSLNyuWMaFEW1fb0EDenup6B5SrjnA8ckm4Hf2NSLvwW9yS98TfN3nqPOPJKfQsN+OTiCerTtNyXjca//ppuGKsQd99jG7SqE9aDQ3sYCXatM53SXqhxS2nTew82bmzVmKXDxcIzVrS9f+2WmXIdY2cKo2I352yKWOIp1Nk0uji8ozLPHFQGvbAG8DGG1KNVcBl2qYUcttmCpN+iXEcGqyn/atUVJJMnZXGtp0fiL1rMLqAd/bb6TFNzZFSsS+zqGesxqLePe32vLCQ3xursP3BRZkrScM+JzIqevfP63INHJEZfYlUf4Ic+gfliS2yA1LwhU7hD4LSVXMQynlF9WeGjuv6ZYxmO8hC6IWCqWnIUqKUiGtvBSPXwsZo7wgljBr4ykJgBzS9MjZ0fzz1JKe80tH6clpjIOn6ReBPwQBq2zmDDrpa5GVqqqjXhRQuA0AfpHdhs5UKxs1PBr7/PTLA7PI39xkOAE/Zj1TYQ2dmqvpskshi7AtBStjinQBAlLXysLSHBtO+3+PLAYcMZMVfb0bVqfGGludO2prvXrrWWTku0eOsA5IRahrRdGhv5zhKgFV7cwUQ== ayakubov@MacBook-Pro-Alexey.local" cpu: 2 ram: 6 diskSize: 30 @@ -28,10 +20,6 @@ clusterDefinition: hostType: "vm" role: "worker" osType: "Ubuntu 22.04 6.2.0-39-generic" - auth: - method: "ssh-key" - user: "user" - sshKey: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8WyGvnBNQp+v6CUweF1QYCRtR7Do/IA8IA2uMd2HuBsddFrc5xYon2ZtEvypZC4Vm1CzgcgUm9UkHgxytKEB4zOOWkmqFP62OSLNyuWMaFEW1fb0EDenup6B5SrjnA8ckm4Hf2NSLvwW9yS98TfN3nqPOPJKfQsN+OTiCerTtNyXjca//ppuGKsQd99jG7SqE9aDQ3sYCXatM53SXqhxS2nTew82bmzVmKXDxcIzVrS9f+2WmXIdY2cKo2I352yKWOIp1Nk0uji8ozLPHFQGvbAG8DGG1KNVcBl2qYUcttmCpN+iXEcGqyn/atUVJJMnZXGtp0fiL1rMLqAd/bb6TFNzZFSsS+zqGesxqLePe32vLCQ3xursP3BRZkrScM+JzIqevfP63INHJEZfYlUf4Ic+gfliS2yA1LwhU7hD4LSVXMQynlF9WeGjuv6ZYxmO8hC6IWCqWnIUqKUiGtvBSPXwsZo7wgljBr4ykJgBzS9MjZ0fzz1JKe80tH6clpjIOn6ReBPwQBq2zmDDrpa5GVqqqjXhRQuA0AfpHdhs5UKxs1PBr7/PTLA7PI39xkOAE/Zj1TYQ2dmqvpskshi7AtBStjinQBAlLXysLSHBtO+3+PLAYcMZMVfb0bVqfGGludO2prvXrrWWTku0eOsA5IRahrRdGhv5zhKgFV7cwUQ== ayakubov@MacBook-Pro-Alexey.local" cpu: 2 ram: 6 diskSize: 30 @@ -41,13 +29,12 @@ clusterDefinition: podSubnetCIDR: "10.112.0.0/16" serviceSubnetCIDR: "10.225.0.0/16" clusterDomain: "cluster.local" - licenseKey: "" registryRepo: "dev-registry.deckhouse.io/sys/deckhouse-oss" - namespace: "e2e-nested-1" - storageClass: "nfs-storage-class" + namespace: "e2e-nested-1" # TODO Move to a higher level + storageClass: "rsc-test-r2-local" # TODO Move to a higher level # Module configuration modules: - - name: "snapshot-controller" + - name: "snapshot-controller" # TODO add MPO version: 1 enabled: true dependencies: [] diff --git a/tests/cluster-creation/cluster_creation_suite_test.go b/tests/cluster-creation/cluster_creation_suite_test.go index 3d23e4b..e1cfd85 100644 --- a/tests/cluster-creation/cluster_creation_suite_test.go +++ b/tests/cluster-creation/cluster_creation_suite_test.go @@ -26,9 +26,8 @@ import ( ) var _ = BeforeSuite(func() { - // Validate that CLUSTER_CREATE_MODE is set and has a valid value - err := config.ValidateClusterCreateMode() - Expect(err).NotTo(HaveOccurred(), "CLUSTER_CREATE_MODE environment variable must be set to either 'alwaysUseExisting' or 'alwaysCreateNew'") + err := config.ValidateEnvironment() + Expect(err).NotTo(HaveOccurred()) }) func TestIntegration(t *testing.T) { diff --git a/tests/cluster-creation/cluster_creation_test.go b/tests/cluster-creation/cluster_creation_test.go index d857a51..daa8a71 100644 --- a/tests/cluster-creation/cluster_creation_test.go +++ b/tests/cluster-creation/cluster_creation_test.go @@ -22,15 +22,13 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "github.com/deckhouse/storage-e2e/internal/config" "github.com/deckhouse/storage-e2e/pkg/cluster" ) var _ = Describe("Cluster Creation Test", Ordered, func() { var ( - yamlConfigFilename string = "cluster_config.yml" - baseClusterMasterIP string = "10.0.0.181" - baseClusterUser string = "w-ansible" - baseClusterSSHPrivateKey string = "~/.ssh/id_rsa" + yamlConfigFilename string = config.YAMLConfigFilename testClusterResources *cluster.TestClusterResources ctx context.Context = context.Background() @@ -57,9 +55,6 @@ var _ = Describe("Cluster Creation Test", Ordered, func() { testClusterResources, err = cluster.CreateTestCluster( ctx, yamlConfigFilename, - baseClusterMasterIP, - baseClusterUser, - baseClusterSSHPrivateKey, ) Expect(err).NotTo(HaveOccurred(), "CreateTestCluster should succeed") Expect(testClusterResources).NotTo(BeNil(), "TestClusterResources should not be nil") From 52e4d84bcefba0a94e0c8f2f9411e903da42fb48 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Tue, 16 Dec 2025 17:36:42 +0300 Subject: [PATCH 18/48] Refactor cluster creation tests to use direct configuration access for YAML filename and SSH parameters, improving readability and maintainability. --- .../cluster_creation_test.go | 18 +++++++----------- .../cluster-creation/cluster_creation_test.go | 3 +-- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/tests/cluster-creation-by-steps/cluster_creation_test.go b/tests/cluster-creation-by-steps/cluster_creation_test.go index 4cecec4..2eb373f 100644 --- a/tests/cluster-creation-by-steps/cluster_creation_test.go +++ b/tests/cluster-creation-by-steps/cluster_creation_test.go @@ -38,11 +38,6 @@ import ( var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { var ( - yamlConfigFilename string = config.YAMLConfigFilename - baseClusterMasterIP string = config.SSHHost - baseClusterUser string = config.SSHUser - baseClusterSSHPrivateKey string = config.SSHKeyPath - err error sshclient ssh.SSHClient kubeconfig *rest.Config @@ -59,6 +54,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { // Stage 1: LoadConfig - verifies and parses the config from yaml file By("LoadConfig: Loading and verifying cluster configuration from YAML", func() { + yamlConfigFilename := config.YAMLConfigFilename GinkgoWriter.Printf(" ▶️ Loading cluster configuration from: %s\n", yamlConfigFilename) clusterDefinition, err = internalcluster.LoadClusterConfig(yamlConfigFilename) Expect(err).NotTo(HaveOccurred()) @@ -114,9 +110,9 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { // Stage 2: Establish SSH connection to base cluster (reused for getting kubeconfig) It("should establish ssh connection to the base cluster", func() { - By(fmt.Sprintf("Connecting to %s@%s using key %s", baseClusterUser, baseClusterMasterIP, baseClusterSSHPrivateKey), func() { - GinkgoWriter.Printf(" ▶️ Creating SSH client for %s@%s\n", baseClusterUser, baseClusterMasterIP) - sshclient, err = ssh.NewClient(baseClusterUser, baseClusterMasterIP, baseClusterSSHPrivateKey) + By(fmt.Sprintf("Connecting to %s@%s using key %s", config.SSHUser, config.SSHHost, config.SSHKeyPath), func() { + GinkgoWriter.Printf(" ▶️ Creating SSH client for %s@%s\n", config.SSHUser, config.SSHHost) + sshclient, err = ssh.NewClient(config.SSHUser, config.SSHHost, config.SSHKeyPath) Expect(err).NotTo(HaveOccurred()) GinkgoWriter.Printf(" ✅ SSH connection established successfully\n") }) @@ -126,10 +122,10 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { It("should get kubeconfig from the base cluster", func() { By("Retrieving kubeconfig from base cluster", func() { - GinkgoWriter.Printf(" ▶️ Fetching kubeconfig from %s\n", baseClusterMasterIP) + GinkgoWriter.Printf(" ▶️ Fetching kubeconfig from %s\n", config.SSHHost) ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() - kubeconfig, kubeconfigPath, err = internalcluster.GetKubeconfig(ctx, baseClusterMasterIP, baseClusterUser, baseClusterSSHPrivateKey, sshclient) + kubeconfig, kubeconfigPath, err = internalcluster.GetKubeconfig(ctx, config.SSHHost, config.SSHUser, config.SSHKeyPath, sshclient) Expect(err).NotTo(HaveOccurred()) GinkgoWriter.Printf(" ✅ Kubeconfig retrieved and saved to: %s\n", kubeconfigPath) }) @@ -139,7 +135,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { It("should establish ssh tunnel to the base cluster with port forwarding", func() { By("Setting up SSH tunnel with port forwarding", func() { - GinkgoWriter.Printf(" ▶️ Establishing SSH tunnel to %s, forwarding port 6445\n", baseClusterMasterIP) + GinkgoWriter.Printf(" ▶️ Establishing SSH tunnel to %s, forwarding port 6445\n", config.SSHHost) ctx := context.Background() tunnelinfo, err = ssh.EstablishSSHTunnel(ctx, sshclient, "6445") Expect(err).NotTo(HaveOccurred()) diff --git a/tests/cluster-creation/cluster_creation_test.go b/tests/cluster-creation/cluster_creation_test.go index daa8a71..812b148 100644 --- a/tests/cluster-creation/cluster_creation_test.go +++ b/tests/cluster-creation/cluster_creation_test.go @@ -28,8 +28,6 @@ import ( var _ = Describe("Cluster Creation Test", Ordered, func() { var ( - yamlConfigFilename string = config.YAMLConfigFilename - testClusterResources *cluster.TestClusterResources ctx context.Context = context.Background() ) @@ -52,6 +50,7 @@ var _ = Describe("Cluster Creation Test", Ordered, func() { By("Creating test cluster connection", func() { GinkgoWriter.Printf(" ▶️ Creating test cluster connection\n") var err error + yamlConfigFilename := config.YAMLConfigFilename testClusterResources, err = cluster.CreateTestCluster( ctx, yamlConfigFilename, From e07adaff2bc9408494fc2aee1284d92e1f2fa45e Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Tue, 16 Dec 2025 18:03:37 +0300 Subject: [PATCH 19/48] Refactor environment variable handling for test cluster configuration, introducing new variables for namespace, storage class, and cleanup options. Update related validation logic and adjust cluster creation tests accordingly. --- internal/config/env.go | 32 ++++++++++++++++--- internal/config/types.go | 6 ++-- pkg/cluster/vms.go | 10 +++--- .../cluster_config.yml | 3 +- .../cluster_creation_test.go | 4 +-- tests/cluster-creation/cluster_config.yml | 3 +- 6 files changed, 38 insertions(+), 20 deletions(-) diff --git a/internal/config/env.go b/internal/config/env.go index b8978e4..cbc1e57 100644 --- a/internal/config/env.go +++ b/internal/config/env.go @@ -33,13 +33,19 @@ var ( // the kubeconfig manually and set this environment variable. KubeConfigPath = os.Getenv("KUBE_CONFIG_PATH") - // ClusterCreateMode specifies the cluster creation mode. Must be set to either "alwaysUseExisting" or "alwaysCreateNew" - ClusterCreateMode = os.Getenv("CLUSTER_CREATE_MODE") + // TestClusterCreateMode specifies the cluster creation mode. Must be set to either "alwaysUseExisting" or "alwaysCreateNew" + TestClusterCreateMode = os.Getenv("TEST_CLUSTER_CREATE_MODE") // TestClusterCleanup specifies whether to remove the test cluster after tests complete. // Default is "false". If set to "true" or "True", the test cluster will be cleaned up after tests. TestClusterCleanup = os.Getenv("TEST_CLUSTER_CLEANUP") + // TestClusterNamespace specifies the namespace for DKP cluster deployment + TestClusterNamespace = os.Getenv("TEST_CLUSTER_NAMESPACE") + + // TestClusterStorageClass specifies the storage class for DKP cluster deployment + TestClusterStorageClass = os.Getenv("TEST_CLUSTER_STORAGE_CLASS") + // DKPLicenseKey specifies the DKP license key for cluster deployment DKPLicenseKey = os.Getenv("DKP_LICENSE_KEY") ) @@ -69,22 +75,38 @@ func ValidateEnvironment() error { if VMSSHPublicKey == "" { VMSSHPublicKey = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8WyGvnBNQp+v6CUweF1QYCRtR7Do/IA8IA2uMd2HuBsddFrc5xYon2ZtEvypZC4Vm1CzgcgUm9UkHgxytKEB4zOOWkmqFP62OSLNyuWMaFEW1fb0EDenup6B5SrjnA8ckm4Hf2NSLvwW9yS98TfN3nqPOPJKfQsN+OTiCerTtNyXjca//ppuGKsQd99jG7SqE9aDQ3sYCXatM53SXqhxS2nTew82bmzVmKXDxcIzVrS9f+2WmXIdY2cKo2I352yKWOIp1Nk0uji8ozLPHFQGvbAG8DGG1KNVcBl2qYUcttmCpN+iXEcGqyn/atUVJJMnZXGtp0fiL1rMLqAd/bb6TFNzZFSsS+zqGesxqLePe32vLCQ3xursP3BRZkrScM+JzIqevfP63INHJEZfYlUf4Ic+gfliS2yA1LwhU7hD4LSVXMQynlF9WeGjuv6ZYxmO8hC6IWCqWnIUqKUiGtvBSPXwsZo7wgljBr4ykJgBzS9MjZ0fzz1JKe80tH6clpjIOn6ReBPwQBq2zmDDrpa5GVqqqjXhRQuA0AfpHdhs5UKxs1PBr7/PTLA7PI39xkOAE/Zj1TYQ2dmqvpskshi7AtBStjinQBAlLXysLSHBtO+3+PLAYcMZMVfb0bVqfGGludO2prvXrrWWTku0eOsA5IRahrRdGhv5zhKgFV7cwUQ== ayakubov@MacBook-Pro-Alexey.local" } + if TestClusterNamespace == "" { + TestClusterNamespace = "e2e-test-cluster" + } + if TestClusterStorageClass == "" { + TestClusterStorageClass = "rsc-test-r2-local" + } + + if TestClusterCleanup == "" { + TestClusterCleanup = "false" + } + + if TestClusterCleanup != "true" && TestClusterCleanup != "True" { + TestClusterCleanup = "false" + } else { + TestClusterCleanup = "true" + } // There are no default values for these variables and they must be set! Otherwise, the test will fail. if DKPLicenseKey == "" { return fmt.Errorf("DKP_LICENSE_KEY environment variable is required but not set. ") } - if ClusterCreateMode == "" { + if TestClusterCreateMode == "" { return fmt.Errorf("CLUSTER_CREATE_MODE environment variable is required but not set. "+ "Please set it to either '%s' or '%s'", ClusterCreateModeAlwaysUseExisting, ClusterCreateModeAlwaysCreateNew) } - if ClusterCreateMode != ClusterCreateModeAlwaysUseExisting && ClusterCreateMode != ClusterCreateModeAlwaysCreateNew { + if TestClusterCreateMode != ClusterCreateModeAlwaysUseExisting && TestClusterCreateMode != ClusterCreateModeAlwaysCreateNew { return fmt.Errorf("CLUSTER_CREATE_MODE has invalid value '%s'. "+ "Must be either '%s' or '%s'", - ClusterCreateMode, ClusterCreateModeAlwaysUseExisting, ClusterCreateModeAlwaysCreateNew) + TestClusterCreateMode, ClusterCreateModeAlwaysUseExisting, ClusterCreateModeAlwaysCreateNew) } return nil diff --git a/internal/config/types.go b/internal/config/types.go index 7fbd9cf..b9a8533 100644 --- a/internal/config/types.go +++ b/internal/config/types.go @@ -69,8 +69,6 @@ type DKPParameters struct { ServiceSubnetCIDR string `yaml:"serviceSubnetCIDR"` ClusterDomain string `yaml:"clusterDomain"` RegistryRepo string `yaml:"registryRepo"` - Namespace string `yaml:"namespace"` - StorageClass string `yaml:"storageClass"` Modules []*ModuleConfig `yaml:"modules,omitempty"` } @@ -83,13 +81,13 @@ type ClusterDefinition struct { } // ModuleConfig defines a Deckhouse module configuration -type ModuleConfig struct { // TODO amarkov: I suggest allow user to specify ModulePullOverride version, to run tests on MR/PR during development process. +type ModuleConfig struct { Name string `yaml:"name"` Version int `yaml:"version"` Enabled bool `yaml:"enabled"` Settings map[string]any `yaml:"settings,omitempty"` Dependencies []string `yaml:"dependencies,omitempty"` // Names of modules that must be enabled before this one - ModulePullOverride string `yaml:"modulePullOverride,omitempty"` // Override the module pull branch or tag (e.g. "main", "pr123", "mr41") + ModulePullOverride string `yaml:"modulePullOverride,omitempty"` // Override the module pull branch or tag (e.g. "main", "pr123", "mr41"). Main is defailt value. } const ( diff --git a/pkg/cluster/vms.go b/pkg/cluster/vms.go index 6b233a0..27f46e6 100644 --- a/pkg/cluster/vms.go +++ b/pkg/cluster/vms.go @@ -43,11 +43,11 @@ type VMResources struct { // and returns the list of VM names that were created along with resource tracking info. func CreateVirtualMachines(ctx context.Context, virtClient *virtualization.Client, clusterDef *config.ClusterDefinition) ([]string, *VMResources, error) { // Check CLUSTER_CREATE_MODE - if config.ClusterCreateMode != config.ClusterCreateModeAlwaysCreateNew { - return nil, nil, fmt.Errorf("CLUSTER_CREATE_MODE must be set to '%s'. Current value: '%s'. Using existing cluster currently is not supported", config.ClusterCreateModeAlwaysCreateNew, config.ClusterCreateMode) + if config.TestClusterCreateMode != config.ClusterCreateModeAlwaysCreateNew { + return nil, nil, fmt.Errorf("CLUSTER_CREATE_MODE must be set to '%s'. Current value: '%s'. Using existing cluster currently is not supported", config.ClusterCreateModeAlwaysCreateNew, config.TestClusterCreateMode) } - namespace := clusterDef.DKPParameters.Namespace + namespace := config.TestClusterNamespace // Get all VM nodes from cluster definition vmNodes := getVMNodes(clusterDef) @@ -81,11 +81,11 @@ func CreateVirtualMachines(ctx context.Context, virtClient *virtualization.Clien if len(conflicts.ClusterVirtualImages) > 0 { conflictMessages = append(conflictMessages, fmt.Sprintf("ClusterVirtualImages: %v", conflicts.ClusterVirtualImages)) } - return nil, nil, fmt.Errorf("the following VM-related resources already exist (CLUSTER_CREATE_MODE=%s): %s", config.ClusterCreateMode, strings.Join(conflictMessages, ", ")) + return nil, nil, fmt.Errorf("the following VM-related resources already exist (CLUSTER_CREATE_MODE=%s): %s", config.TestClusterCreateMode, strings.Join(conflictMessages, ", ")) } // Create all VMs - storageClass := clusterDef.DKPParameters.StorageClass + storageClass := config.TestClusterStorageClass for _, node := range vmNodes { cvmiName, err := createVM(ctx, virtClient, namespace, node, storageClass) if err != nil { diff --git a/tests/cluster-creation-by-steps/cluster_config.yml b/tests/cluster-creation-by-steps/cluster_config.yml index 0e499bc..fcae6ea 100644 --- a/tests/cluster-creation-by-steps/cluster_config.yml +++ b/tests/cluster-creation-by-steps/cluster_config.yml @@ -30,13 +30,12 @@ clusterDefinition: serviceSubnetCIDR: "10.225.0.0/16" clusterDomain: "cluster.local" registryRepo: "dev-registry.deckhouse.io/sys/deckhouse-oss" - namespace: "e2e-nested-1" # TODO Move to a higher level - storageClass: "rsc-test-r2-local" # TODO Move to a higher level # Module configuration modules: - name: "snapshot-controller" # TODO add MPO version: 1 enabled: true + modulePullOverride: "main" # imageTag for ModulePullOverride. Main is default value. Default value is used if not specified. dependencies: [] - name: "sds-local-volume" version: 1 diff --git a/tests/cluster-creation-by-steps/cluster_creation_test.go b/tests/cluster-creation-by-steps/cluster_creation_test.go index 2eb373f..32e6dd2 100644 --- a/tests/cluster-creation-by-steps/cluster_creation_test.go +++ b/tests/cluster-creation-by-steps/cluster_creation_test.go @@ -164,7 +164,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() - namespace := clusterDefinition.DKPParameters.Namespace + namespace := config.TestClusterNamespace GinkgoWriter.Printf(" ▶️ Ensuring namespace %s exists\n", namespace) ns, err := kubernetes.CreateNamespaceIfNotExists(ctx, kubeconfig, namespace) @@ -186,7 +186,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { Expect(virtClient).NotTo(BeNil()) GinkgoWriter.Printf(" ✅ Virtualization client initialized successfully\n") - namespace := clusterDefinition.DKPParameters.Namespace + namespace := config.TestClusterNamespace GinkgoWriter.Printf(" ▶️ Creating VMs in namespace: %s\n", namespace) // Create virtual machines diff --git a/tests/cluster-creation/cluster_config.yml b/tests/cluster-creation/cluster_config.yml index 0e499bc..0bbd95d 100644 --- a/tests/cluster-creation/cluster_config.yml +++ b/tests/cluster-creation/cluster_config.yml @@ -30,13 +30,12 @@ clusterDefinition: serviceSubnetCIDR: "10.225.0.0/16" clusterDomain: "cluster.local" registryRepo: "dev-registry.deckhouse.io/sys/deckhouse-oss" - namespace: "e2e-nested-1" # TODO Move to a higher level - storageClass: "rsc-test-r2-local" # TODO Move to a higher level # Module configuration modules: - name: "snapshot-controller" # TODO add MPO version: 1 enabled: true + modulePullOverride: "main" # imageTag for ModulePullOverride dependencies: [] - name: "sds-local-volume" version: 1 From da33597f350f6548e45a22981e22f5d8e3dae955 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Tue, 16 Dec 2025 18:10:52 +0300 Subject: [PATCH 20/48] Enhance environment variable validation in cluster creation tests by adding checks in BeforeAll hooks. Update documentation comments for KubeConfigPath and TestClusterCreateMode to indicate test failure conditions if not set. --- internal/config/env.go | 4 ++-- .../cluster_creation_suite_test.go | 7 ------- tests/cluster-creation-by-steps/cluster_creation_test.go | 7 ++++++- tests/cluster-creation/cluster_creation_suite_test.go | 7 ------- tests/cluster-creation/cluster_creation_test.go | 6 ++++++ 5 files changed, 14 insertions(+), 17 deletions(-) diff --git a/internal/config/env.go b/internal/config/env.go index cbc1e57..8ca65f6 100644 --- a/internal/config/env.go +++ b/internal/config/env.go @@ -30,10 +30,10 @@ var ( // KubeConfigPath is the path to a kubeconfig file. If SSH retrieval fails (e.g., sudo requires password), // this path will be used as a fallback. If not set and SSH fails, the user will be notified to download - // the kubeconfig manually and set this environment variable. + // the kubeconfig manually and set this environment variable, test will fail. KubeConfigPath = os.Getenv("KUBE_CONFIG_PATH") - // TestClusterCreateMode specifies the cluster creation mode. Must be set to either "alwaysUseExisting" or "alwaysCreateNew" + // TestClusterCreateMode specifies the cluster creation mode. Must be set to either "alwaysUseExisting" or "alwaysCreateNew". If not set, test will fail. TestClusterCreateMode = os.Getenv("TEST_CLUSTER_CREATE_MODE") // TestClusterCleanup specifies whether to remove the test cluster after tests complete. diff --git a/tests/cluster-creation-by-steps/cluster_creation_suite_test.go b/tests/cluster-creation-by-steps/cluster_creation_suite_test.go index c56d687..4b6037b 100644 --- a/tests/cluster-creation-by-steps/cluster_creation_suite_test.go +++ b/tests/cluster-creation-by-steps/cluster_creation_suite_test.go @@ -21,15 +21,8 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - - "github.com/deckhouse/storage-e2e/internal/config" ) -var _ = BeforeSuite(func() { - err := config.ValidateEnvironment() - Expect(err).NotTo(HaveOccurred()) -}) - func TestIntegration(t *testing.T) { RegisterFailHandler(Fail) // Configure Ginkgo to show verbose output diff --git a/tests/cluster-creation-by-steps/cluster_creation_test.go b/tests/cluster-creation-by-steps/cluster_creation_test.go index 32e6dd2..1f4faaf 100644 --- a/tests/cluster-creation-by-steps/cluster_creation_test.go +++ b/tests/cluster-creation-by-steps/cluster_creation_test.go @@ -50,7 +50,12 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { ) BeforeAll(func() { - var err error + By("Validating environment variables", func() { + GinkgoWriter.Printf(" ▶️ Validating environment variables\n") + err := config.ValidateEnvironment() + Expect(err).NotTo(HaveOccurred()) + GinkgoWriter.Printf(" ✅ Environment variables validated successfully\n") + }) // Stage 1: LoadConfig - verifies and parses the config from yaml file By("LoadConfig: Loading and verifying cluster configuration from YAML", func() { diff --git a/tests/cluster-creation/cluster_creation_suite_test.go b/tests/cluster-creation/cluster_creation_suite_test.go index e1cfd85..4b6037b 100644 --- a/tests/cluster-creation/cluster_creation_suite_test.go +++ b/tests/cluster-creation/cluster_creation_suite_test.go @@ -21,15 +21,8 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - - "github.com/deckhouse/storage-e2e/internal/config" // global config package ) -var _ = BeforeSuite(func() { - err := config.ValidateEnvironment() - Expect(err).NotTo(HaveOccurred()) -}) - func TestIntegration(t *testing.T) { RegisterFailHandler(Fail) // Configure Ginkgo to show verbose output diff --git a/tests/cluster-creation/cluster_creation_test.go b/tests/cluster-creation/cluster_creation_test.go index 812b148..9d9f729 100644 --- a/tests/cluster-creation/cluster_creation_test.go +++ b/tests/cluster-creation/cluster_creation_test.go @@ -33,6 +33,12 @@ var _ = Describe("Cluster Creation Test", Ordered, func() { ) BeforeAll(func() { + By("Validating environment variables", func() { + GinkgoWriter.Printf(" ▶️ Validating environment variables\n") + err := config.ValidateEnvironment() + Expect(err).NotTo(HaveOccurred()) + GinkgoWriter.Printf(" ✅ Environment variables validated successfully\n") + }) // DeferCleanup: Clean up all resources in reverse order of creation - analog of AfterAll() in Ginkgo DeferCleanup(func() { if testClusterResources != nil { From e2c3e4e57040a7537a3adc26b8771b08ab420e85 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Tue, 16 Dec 2025 18:15:00 +0300 Subject: [PATCH 21/48] Update README.md to enhance documentation for E2E tests, including detailed descriptions of test types, required environment variables, and instructions for running tests. --- README.md | 76 ++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 61 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 130d32b..1694273 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,74 @@ -# E2E tests +# E2E Tests -## Quick start guide +End-to-end tests for Deckhouse storage components. -### Prerequisites +## Tests -#### Required exports +### cluster-creation +High-level test that creates a complete test cluster from a YAML configuration file. This test handles the entire cluster creation process in a single operation. + +### cluster-creation-by-steps +Step-by-step test that creates a test cluster incrementally, validating each stage: +1. Environment validation +2. Cluster configuration loading +3. SSH connection establishment +4. Kubeconfig retrieval +5. SSH tunnel setup +6. Virtualization module readiness check +7. Namespace creation +8. Virtual machine creation and provisioning + +## Environment Variables + +### Required environment variables + +- **`TEST_CLUSTER_CREATE_MODE`** - Cluster creation mode. Must be set to either: + - `alwaysUseExisting` - Use existing cluster + - `alwaysCreateNew` - Create new cluster + +- **`DKP_LICENSE_KEY`** - DKP license key for cluster deployment + +### Optional (with defaults) + +- **`YAML_CONFIG_FILENAME`** - YAML configuration file name (default: `cluster_config.yml`) + +- **`SSH_USER`** - SSH username for base cluster connection (default: `a.yakubov`) +- **`SSH_HOST`** - SSH hostname/IP for base cluster (default: `94.26.231.181`) +- **`SSH_KEY_PATH`** - Path to SSH private key (default: `~/.ssh/id_rsa`) +- **`SSH_PASSPHRASE`** - Passphrase for SSH private key (no default) + +- **`SSH_VM_USER`** - SSH username for VM access (default: `cloud`) +- **`SSH_VM_PUBLIC_KEY`** - SSH public key to deploy to VMs (default: hardcoded key) + +- **`TEST_CLUSTER_NAMESPACE`** - Namespace for test cluster deployment (default: `e2e-test-cluster`) +- **`TEST_CLUSTER_STORAGE_CLASS`** - Storage class for test cluster (default: `rsc-test-r2-local`) +- **`TEST_CLUSTER_CLEANUP`** - Whether to cleanup test cluster after tests (default: `false`, set to `true` or `True` to enable) + +- **`KUBE_CONFIG_PATH`** - Fallback path to kubeconfig file if SSH retrieval fails (no default) + +## Running Tests + +### Run all tests in a test suite ```bash -# Passphrase of the private key used to connect to the base cluster -export SSH_PASSPHRASE='passphrase' +go test -v ./tests/cluster-creation-by-steps -count=1 +``` -# Cluster creation mode - must be set to either 'alwaysUseExisting' or 'alwaysCreateNew' -export CLUSTER_CREATE_MODE='alwaysUseExisting' # Use existing cluster -# OR -export CLUSTER_CREATE_MODE='alwaysCreateNew' # Create new cluster +The `-count=1` flag prevents Go from using cached test results. -# Used in case if the code cannot obtain kubeconfig from master itself because e.g. password is required in sudo -export KUBE_CONFIG_PATH='/path/to/kubeconfig/file' +### Run a specific test + +```bash +go test -v ./tests/cluster-creation-by-steps -count=1 -ginkgo.focus="should create virtual machines" ``` -#### Running a test example +### Example with environment variables ```bash +export TEST_CLUSTER_CREATE_MODE='alwaysCreateNew' +export DKP_LICENSE_KEY='your-license-key' +export SSH_PASSPHRASE='your-passphrase' +export TEST_CLUSTER_CLEANUP='true' + go test -v ./tests/cluster-creation-by-steps -count=1 -# count=1 prevents go test from using cached test results ``` - From cf52d51b5c74cae2f3d81396f73db629ca3559b3 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Tue, 16 Dec 2025 18:20:56 +0300 Subject: [PATCH 22/48] Refactor TODO.md for clarity and completeness; remove redundant entries and finalize sections. Update cluster interface by commenting out unused VirtualizationClient. Correct typo in cluster_config.yml comments for worker nodes configuration. --- ARCHITECTURE.md | 2 +- TODO.md | 15 ++++++++------- internal/cluster/interface.go | 1 - tests/cluster-creation/cluster_config.yml | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 6e73212..263326d 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -154,7 +154,7 @@ type Cluster interface { Nodes() NodeClient Pods() PodClient Storage() StorageClient - Virtualization() VirtualizationClient // TODO asergunov: Is the VirtualizationClient this one? https://github.com/deckhouse/virtualization/blob/main/api/client/kubeclient/client.go#L53 + Virtualization() VirtualizationClient Deckhouse() DeckhouseClient // Lifecycle diff --git a/TODO.md b/TODO.md index e962d2c..5404558 100644 --- a/TODO.md +++ b/TODO.md @@ -18,7 +18,6 @@ 2.2.2. Если это воркер или не единственный мастер, при условии нормального подключения хотя бы к одному мастеру и его соответствия конфигу, мы удаляем эту ноду в кластере, удаляем виртуалку, переставляем ее с нуля и добавляем в кластер в той роли, в которой она была (Да), или падаем (Нет). Да/нет? Тут могут быть сложности в виде невыгоняемых подов с ноды - как быть в этом случае? > 2.3. Что делать если одна или несколько виртуалок выключены? Включаем и пробуем подключиться (Да), падаем (Нет). Да/нет? - alexandr.zimin привет. @@ -37,25 +36,27 @@ alwaysRecreate - если в ns есть уже хоть одна виртуал просто падаем, если не задан +=== Done! === + ## Новая репа Нужна ли? И в гитхабе ли? - сделать новую репу в фоксе, перетащить туда существующий код, убить в гитхаюе и пересоздать. -https://github.com/deckhouse/storage-e2e.git - Done! +https://github.com/deckhouse/storage-e2e.git - + +=== Done! === ## Как ревьюить и к кому с этим идти? Не ревьюить. -Done! - - +=== Done! === +## alwaysUseExisting - что проверять? Включен, доступен ДКП, включены модули? - да. +=== Done! === -alwaysUseExisting - что проверять? Включен, доступен ДКП, включены модули? - да. -alwaysCreate - если виртуалки такие уже есть в НС, то падаем, если виртуалок нет - создаем кластер. Нужно все просто. Если есть какие-то непонятные кейсы, падать с ошибкой, потом обсудим и пофиксим, есличо. diff --git a/internal/cluster/interface.go b/internal/cluster/interface.go index e5062a2..f2142f1 100644 --- a/internal/cluster/interface.go +++ b/internal/cluster/interface.go @@ -27,7 +27,6 @@ type Cluster interface { // Nodes() NodeClient // Pods() PodClient // Storage() StorageClient - // Virtualization() VirtualizationClient // Deckhouse() DeckhouseClient Close() error diff --git a/tests/cluster-creation/cluster_config.yml b/tests/cluster-creation/cluster_config.yml index 0bbd95d..920a32c 100644 --- a/tests/cluster-creation/cluster_config.yml +++ b/tests/cluster-creation/cluster_config.yml @@ -8,7 +8,7 @@ clusterDefinition: cpu: 4 ram: 8 diskSize: 30 - workers: # Worker nodes configuration // TODO implement logic allowing to deploy different number of workes and masters with the same config. + workers: # Worker nodes configuration // TODO implement logic allowing to deploy different number of workers and masters with the same config. - hostname: "worker-1" hostType: "vm" role: "worker" From 0de0c1ce573f5d6cbd99944aac822aaca08a9e89 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Wed, 17 Dec 2025 13:09:16 +0300 Subject: [PATCH 23/48] Enhance SSH client functionality by introducing NewClientWithJumpHost for connections through a jump host. Update error messages in environment validation to reflect correct variable names. Improve VM name extraction logic to comply with RFC 1123 rules and add new functions for obtaining setup node and VM IP address. Update cluster configuration in tests to reflect new OS types and improve cleanup logic in test suite. --- internal/config/env.go | 4 +- internal/infrastructure/ssh/client.go | 264 ++++++++++++++++++ internal/infrastructure/ssh/tunnel.go | 3 +- pkg/cluster/vms.go | 48 +++- .../cluster_config.yml | 11 +- .../cluster_creation_suite_test.go | 9 + .../cluster_creation_test.go | 89 +++++- 7 files changed, 413 insertions(+), 15 deletions(-) diff --git a/internal/config/env.go b/internal/config/env.go index 8ca65f6..d055f08 100644 --- a/internal/config/env.go +++ b/internal/config/env.go @@ -98,13 +98,13 @@ func ValidateEnvironment() error { } if TestClusterCreateMode == "" { - return fmt.Errorf("CLUSTER_CREATE_MODE environment variable is required but not set. "+ + return fmt.Errorf("TEST_CLUSTER_CREATE_MODE environment variable is required but not set. "+ "Please set it to either '%s' or '%s'", ClusterCreateModeAlwaysUseExisting, ClusterCreateModeAlwaysCreateNew) } if TestClusterCreateMode != ClusterCreateModeAlwaysUseExisting && TestClusterCreateMode != ClusterCreateModeAlwaysCreateNew { - return fmt.Errorf("CLUSTER_CREATE_MODE has invalid value '%s'. "+ + return fmt.Errorf("TEST_CLUSTER_CREATE_MODE has invalid value '%s'. "+ "Must be either '%s' or '%s'", TestClusterCreateMode, ClusterCreateModeAlwaysUseExisting, ClusterCreateModeAlwaysCreateNew) } diff --git a/internal/infrastructure/ssh/client.go b/internal/infrastructure/ssh/client.go index 79f4e84..7f755ad 100644 --- a/internal/infrastructure/ssh/client.go +++ b/internal/infrastructure/ssh/client.go @@ -366,3 +366,267 @@ func NewClient(user, host, keyPath string) (SSHClient, error) { var c client return c.Create(user, host, keyPath) } + +// NewClientWithJumpHost creates a new SSH client that connects through a jump host +// It first connects to the jump host, then establishes a connection to the target host through it +func NewClientWithJumpHost(jumpUser, jumpHost, jumpKeyPath, targetUser, targetHost, targetKeyPath string) (SSHClient, error) { + // Create SSH config for jump host + jumpConfig, err := createSSHConfig(jumpUser, jumpKeyPath) + if err != nil { + return nil, fmt.Errorf("failed to create SSH config for jump host: %w", err) + } + + // Ensure jump host has port if not specified + jumpAddr := jumpHost + if !strings.Contains(jumpAddr, ":") { + jumpAddr = jumpAddr + ":22" + } + + // Connect to jump host + jumpClient, err := ssh.Dial("tcp", jumpAddr, jumpConfig) + if err != nil { + return nil, fmt.Errorf("failed to connect to jump host %s@%s: %w", jumpUser, jumpAddr, err) + } + + // Create SSH config for target host + targetConfig, err := createSSHConfig(targetUser, targetKeyPath) + if err != nil { + jumpClient.Close() + return nil, fmt.Errorf("failed to create SSH config for target host: %w", err) + } + + // Ensure target host has port if not specified + targetAddr := targetHost + if !strings.Contains(targetAddr, ":") { + targetAddr = targetAddr + ":22" + } + + // Connect to target host through jump host + targetConn, err := jumpClient.Dial("tcp", targetAddr) + if err != nil { + jumpClient.Close() + return nil, fmt.Errorf("failed to dial target host %s@%s through jump host: %w", targetUser, targetAddr, err) + } + + // Establish SSH connection over the forwarded connection + targetClientConn, targetChans, targetReqs, err := ssh.NewClientConn(targetConn, targetAddr, targetConfig) + if err != nil { + jumpClient.Close() + return nil, fmt.Errorf("failed to establish SSH connection to target host: %w", err) + } + + // Create SSH client for target host + targetClient := ssh.NewClient(targetClientConn, targetChans, targetReqs) + + // Return a client that wraps both connections + // When closing, we need to close both connections + return &jumpHostClient{ + jumpClient: jumpClient, + targetClient: targetClient, + }, nil +} + +// jumpHostClient wraps both jump host and target client connections +type jumpHostClient struct { + jumpClient *ssh.Client + targetClient *ssh.Client +} + +// Create creates a new SSH client (not used for jump host client) +func (c *jumpHostClient) Create(user, host, keyPath string) (SSHClient, error) { + return nil, fmt.Errorf("Create not supported for jump host client") +} + +// StartTunnel starts an SSH tunnel with port forwarding from local to remote +func (c *jumpHostClient) StartTunnel(ctx context.Context, localPort, remotePort string) (func() error, error) { + // Use the target client's StartTunnel method + // We need to access the underlying client's StartTunnel + // Since we can't directly call it, we'll implement it here + return startTunnelOnClient(ctx, c.targetClient, localPort, remotePort) +} + +// startTunnelOnClient starts a tunnel on a raw ssh.Client +func startTunnelOnClient(ctx context.Context, sshClient *ssh.Client, localPort, remotePort string) (func() error, error) { + // Check context before starting + if err := ctx.Err(); err != nil { + return nil, fmt.Errorf("context error before starting tunnel: %w", err) + } + + listener, err := net.Listen("tcp", "127.0.0.1:"+localPort) + if err != nil { + return nil, fmt.Errorf("failed to listen on local port %s: %w", localPort, err) + } + + stopChan := make(chan struct{}) + + go func() { + defer listener.Close() + for { + // Check context and stop channel + select { + case <-ctx.Done(): + return + case <-stopChan: + return + default: + } + + // Set deadline for Accept based on context deadline if available + if deadline, ok := ctx.Deadline(); ok { + if tcpListener, ok := listener.(*net.TCPListener); ok { + if err := tcpListener.SetDeadline(deadline); err != nil { + // If setting deadline fails, continue without it + } + } + } + + localConn, err := listener.Accept() + if err != nil { + // Listener closed or error occurred + select { + case <-ctx.Done(): + return + case <-stopChan: + return + default: + // Continue if not stopped + continue + } + } + + go func() { + defer localConn.Close() + remoteConn, err := sshClient.Dial("tcp", "127.0.0.1:"+remotePort) + if err != nil { + // Connection failed, just return - the error will be visible to the client + return + } + defer remoteConn.Close() + + // Copy data bidirectionally with context support + done := make(chan struct{}, 2) + go func() { + _, _ = copyWithContext(ctx, localConn, remoteConn) + done <- struct{}{} + }() + go func() { + _, _ = copyWithContext(ctx, remoteConn, localConn) + done <- struct{}{} + }() + + // Wait for either direction to finish or context cancellation + select { + case <-ctx.Done(): + return + case <-done: + // One direction finished, wait for the other + select { + case <-ctx.Done(): + return + case <-done: + // Both directions finished + } + } + }() + } + }() + + stop := func() error { + close(stopChan) + return listener.Close() + } + + return stop, nil +} + +// Exec executes a command on the remote host +func (c *jumpHostClient) Exec(ctx context.Context, cmd string) (string, error) { + // Check context before starting + if err := ctx.Err(); err != nil { + return "", fmt.Errorf("context error before execution: %w", err) + } + + session, err := c.targetClient.NewSession() + if err != nil { + return "", fmt.Errorf("failed to create SSH session: %w", err) + } + defer session.Close() + + output, err := session.CombinedOutput(cmd) + if err != nil { + // Check if context was cancelled during execution + if ctx.Err() != nil { + return string(output), fmt.Errorf("context cancelled: %w", ctx.Err()) + } + return string(output), fmt.Errorf("command failed: %w", err) + } + + // Check context after execution + if err := ctx.Err(); err != nil { + return string(output), fmt.Errorf("context cancelled: %w", err) + } + + return string(output), nil +} + +// ExecFatal executes a command and returns error if it fails +func (c *jumpHostClient) ExecFatal(ctx context.Context, cmd string) string { + output, err := c.Exec(ctx, cmd) + if err != nil { + panic(fmt.Sprintf("ExecFatal failed for command '%s': %v\nOutput: %s", cmd, err, output)) + } + return output +} + +// Upload uploads a local file to the remote host +func (c *jumpHostClient) Upload(ctx context.Context, localPath, remotePath string) error { + // Check context before starting + if err := ctx.Err(); err != nil { + return fmt.Errorf("context error before upload: %w", err) + } + + sftpClient, err := sftp.NewClient(c.targetClient) + if err != nil { + return fmt.Errorf("failed to create SFTP client: %w", err) + } + defer sftpClient.Close() + + localFile, err := os.Open(localPath) + if err != nil { + return fmt.Errorf("failed to open local file %s: %w", localPath, err) + } + defer localFile.Close() + + remoteFile, err := sftpClient.Create(remotePath) + if err != nil { + return fmt.Errorf("failed to create remote file %s: %w", remotePath, err) + } + defer remoteFile.Close() + + // Use context-aware copy + _, err = copyWithContext(ctx, remoteFile, localFile) + if err != nil { + return fmt.Errorf("failed to copy file: %w", err) + } + + return nil +} + +// Close closes both SSH connections +func (c *jumpHostClient) Close() error { + var errs []error + if c.targetClient != nil { + if err := c.targetClient.Close(); err != nil { + errs = append(errs, err) + } + } + if c.jumpClient != nil { + if err := c.jumpClient.Close(); err != nil { + errs = append(errs, err) + } + } + if len(errs) > 0 { + return fmt.Errorf("errors closing connections: %v", errs) + } + return nil +} diff --git a/internal/infrastructure/ssh/tunnel.go b/internal/infrastructure/ssh/tunnel.go index c556a47..f571c7d 100644 --- a/internal/infrastructure/ssh/tunnel.go +++ b/internal/infrastructure/ssh/tunnel.go @@ -126,7 +126,7 @@ func StartTunnel(ctx context.Context, sshClient *ssh.Client, localPort, remotePo return stop, nil } -// EstablishSSHTunnel establishes an SSH tunnel with port forwarding from the master node to the same port on the client +// EstablishSSHTunnel establishes an SSH tunnel with port forwarding from remote node to local port on the client // It uses the exact port specified in remotePort and fails immediately if the port is busy // Returns the tunnel info, local port and error if the tunnel fails to start func EstablishSSHTunnel(ctx context.Context, sshClient SSHClient, remotePort string) (*TunnelInfo, error) { @@ -137,6 +137,7 @@ func EstablishSSHTunnel(ctx context.Context, sshClient SSHClient, remotePort str } // Start the SSH tunnel with context + // --== NOTE! If sshClient was created with NewClientWithJumpHost, it already handles jump host routing ==-- stopFunc, err := sshClient.StartTunnel(ctx, remotePort, remotePort) if err != nil { return nil, fmt.Errorf("failed to start SSH tunnel on port %d (port may be busy): %w", remotePortInt, err) diff --git a/pkg/cluster/vms.go b/pkg/cluster/vms.go index 27f46e6..0046d5d 100644 --- a/pkg/cluster/vms.go +++ b/pkg/cluster/vms.go @@ -333,6 +333,8 @@ func createVM(ctx context.Context, virtClient *virtualization.Client, namespace } // getCVMINameFromImageURL extracts a CVMI name from an image URL +// The name must follow RFC 1123 subdomain rules: lowercase alphanumeric, hyphens, dots +// Must start and end with alphanumeric character func getCVMINameFromImageURL(imageURL string) string { // Extract filename from URL and use it as base name parts := strings.Split(imageURL, "/") @@ -340,9 +342,22 @@ func getCVMINameFromImageURL(imageURL string) string { // Remove extension name := strings.TrimSuffix(filename, ".img") name = strings.TrimSuffix(name, ".qcow2") - // Make it Kubernetes-friendly (lowercase, replace dots with hyphens) + // Make it Kubernetes-friendly (lowercase, replace invalid characters) name = strings.ToLower(name) + // Replace underscores and dots with hyphens (Kubernetes allows hyphens but not underscores) + name = strings.ReplaceAll(name, "_", "-") name = strings.ReplaceAll(name, ".", "-") + // Remove any consecutive hyphens + for strings.Contains(name, "--") { + name = strings.ReplaceAll(name, "--", "-") + } + // Ensure it starts and ends with alphanumeric character (RFC 1123 requirement) + // Remove leading/trailing hyphens + name = strings.Trim(name, "-") + // If empty after trimming, use a default name + if name == "" { + name = "image" + } return name } @@ -448,3 +463,34 @@ func CleanupVMResources(ctx context.Context, resources *VMResources) error { return nil } + +// GetSetupNode returns the setup node from cluster definition, or the first worker if setup is not set +func GetSetupNode(clusterDef *config.ClusterDefinition) (*config.ClusterNode, error) { + // If setup node is explicitly set, return it + if clusterDef.Setup != nil { + return clusterDef.Setup, nil + } + + // Otherwise, return the first worker + if len(clusterDef.Workers) == 0 { + return nil, fmt.Errorf("no setup node specified and no workers available") + } + + return &clusterDef.Workers[0], nil +} + +// GetVMIPAddress gets the IP address of a VM by querying its status +// It waits for the VM to have an IP address assigned +func GetVMIPAddress(ctx context.Context, virtClient *virtualization.Client, namespace, vmName string) (string, error) { + vm, err := virtClient.VirtualMachines().Get(ctx, namespace, vmName) + if err != nil { + return "", fmt.Errorf("failed to get VM %s/%s: %w", namespace, vmName, err) + } + + // Get IP from VM status.IPAddress field + if vm.Status.IPAddress == "" { + return "", fmt.Errorf("VM %s/%s does not have an IP address in status yet", namespace, vmName) + } + + return vm.Status.IPAddress, nil +} diff --git a/tests/cluster-creation-by-steps/cluster_config.yml b/tests/cluster-creation-by-steps/cluster_config.yml index fcae6ea..7c3e8c4 100644 --- a/tests/cluster-creation-by-steps/cluster_config.yml +++ b/tests/cluster-creation-by-steps/cluster_config.yml @@ -12,14 +12,21 @@ clusterDefinition: - hostname: "worker-1" hostType: "vm" role: "worker" - osType: "Ubuntu 22.04 6.2.0-39-generic" + osType: "RedOS 8.0 6.6.26-1.red80.x86_64" cpu: 2 ram: 6 diskSize: 30 - hostname: "worker-2" hostType: "vm" role: "worker" - osType: "Ubuntu 22.04 6.2.0-39-generic" + osType: "RedOS 7.3.6 5.15.78-2.el7.3.x86_64" + cpu: 2 + ram: 6 + diskSize: 30 + - hostname: "worker-3" + hostType: "vm" + role: "worker" + osType: "Ubuntu 24.04 6.8.0-53-generic" cpu: 2 ram: 6 diskSize: 30 diff --git a/tests/cluster-creation-by-steps/cluster_creation_suite_test.go b/tests/cluster-creation-by-steps/cluster_creation_suite_test.go index 4b6037b..197234f 100644 --- a/tests/cluster-creation-by-steps/cluster_creation_suite_test.go +++ b/tests/cluster-creation-by-steps/cluster_creation_suite_test.go @@ -23,6 +23,15 @@ import ( . "github.com/onsi/gomega" ) +// var _ = BeforeSuite(func() { +// By("Validating environment variables", func() { +// GinkgoWriter.Printf(" ▶️ Validating environment variables\n") +// err := config.ValidateEnvironment() +// Expect(err).NotTo(HaveOccurred()) +// GinkgoWriter.Printf(" ✅ Environment variables validated successfully\n") +// }) +// }) + func TestIntegration(t *testing.T) { RegisterFailHandler(Fail) // Configure Ginkgo to show verbose output diff --git a/tests/cluster-creation-by-steps/cluster_creation_test.go b/tests/cluster-creation-by-steps/cluster_creation_test.go index 1f4faaf..1cf7204 100644 --- a/tests/cluster-creation-by-steps/cluster_creation_test.go +++ b/tests/cluster-creation-by-steps/cluster_creation_test.go @@ -40,9 +40,11 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { var ( err error sshclient ssh.SSHClient + setupSSHClient ssh.SSHClient kubeconfig *rest.Config kubeconfigPath string tunnelinfo *ssh.TunnelInfo + setupTunnelInfo *ssh.TunnelInfo clusterDefinition *config.ClusterDefinition module *deckhouse.Module virtClient *virtualization.Client @@ -68,29 +70,51 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { // DeferCleanup: Clean up all resources in reverse order of creation (it's a synonym for AfterAll) DeferCleanup(func() { - // Step 1: Stop SSH tunnel (must be done before closing SSH client) + // Step 1: Stop setup SSH tunnel (must be done before closing SSH client) + if setupTunnelInfo != nil && setupTunnelInfo.StopFunc != nil { + GinkgoWriter.Printf(" ▶️ Stopping setup SSH tunnel on local port %d...\n", setupTunnelInfo.LocalPort) + err := setupTunnelInfo.StopFunc() + if err != nil { + GinkgoWriter.Printf(" ⚠️ Warning: Failed to stop setup SSH tunnel: %v\n", err) + } else { + GinkgoWriter.Printf(" ✅ Setup SSH tunnel stopped successfully\n") + } + } + + // Step 2: Close setup SSH client connection + if setupSSHClient != nil { + GinkgoWriter.Printf(" ▶️ Closing setup SSH client connection...\n") + err := setupSSHClient.Close() + if err != nil { + GinkgoWriter.Printf(" ⚠️ Warning: Failed to close setup SSH client: %v\n", err) + } else { + GinkgoWriter.Printf(" ✅ Setup SSH client closed successfully\n") + } + } + + // Step 3: Stop base cluster SSH tunnel (must be done before closing SSH client) if tunnelinfo != nil && tunnelinfo.StopFunc != nil { - GinkgoWriter.Printf(" ▶️ Stopping SSH tunnel on local port %d...\n", tunnelinfo.LocalPort) + GinkgoWriter.Printf(" ▶️ Stopping base cluster SSH tunnel on local port %d...\n", tunnelinfo.LocalPort) err := tunnelinfo.StopFunc() if err != nil { - GinkgoWriter.Printf(" ⚠️ Warning: Failed to stop SSH tunnel: %v\n", err) + GinkgoWriter.Printf(" ⚠️ Warning: Failed to stop base cluster SSH tunnel: %v\n", err) } else { - GinkgoWriter.Printf(" ✅ SSH tunnel stopped successfully\n") + GinkgoWriter.Printf(" ✅ Base cluster SSH tunnel stopped successfully\n") } } - // Step 2: Close SSH client connection + // Step 4: Close base cluster SSH client connection if sshclient != nil { - GinkgoWriter.Printf(" ▶️ Closing SSH client connection...\n") + GinkgoWriter.Printf(" ▶️ Closing base cluster SSH client connection...\n") err := sshclient.Close() if err != nil { - GinkgoWriter.Printf(" ⚠️ Warning: Failed to close SSH client: %v\n", err) + GinkgoWriter.Printf(" ⚠️ Warning: Failed to close base cluster SSH client: %v\n", err) } else { - GinkgoWriter.Printf(" ✅ SSH client closed successfully\n") + GinkgoWriter.Printf(" ✅ Base cluster SSH client closed successfully\n") } } - // Step 3: Cleanup test cluster VMs if enabled + // Step 5: Cleanup test cluster VMs if enabled // Note: vmResources is set in the test below, so we capture it in the closure vmRes := vmResources if config.TestClusterCleanup == "true" || config.TestClusterCleanup == "True" { @@ -217,4 +241,51 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) }) + It("should establish SSH connection to setup node through base cluster master", func() { + By("Stopping current SSH tunnel to base cluster", func() { + if tunnelinfo != nil && tunnelinfo.StopFunc != nil { + GinkgoWriter.Printf(" ▶️ Stopping SSH tunnel on local port %d...\n", tunnelinfo.LocalPort) + err := tunnelinfo.StopFunc() + Expect(err).NotTo(HaveOccurred()) + GinkgoWriter.Printf(" ✅ SSH tunnel stopped successfully\n") + tunnelinfo = nil + } + }) + + By("Obtaining SSH client for setting up tunnel to setup-node through base cluster master", func() { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + namespace := config.TestClusterNamespace + setupNode, _ := cluster.GetSetupNode(clusterDefinition) + + // Get setup node IP address + setupNodeIP, err := cluster.GetVMIPAddress(ctx, virtClient, namespace, setupNode.Hostname) + Expect(err).NotTo(HaveOccurred()) + Expect(setupNodeIP).NotTo(BeEmpty()) + + // Create SSH client with jump host (base cluster master) + GinkgoWriter.Printf(" ▶️ Creating SSH client to %s@%s through jump host %s@%s\n", + config.VMSSHUser, setupNodeIP, config.SSHUser, config.SSHHost) + setupSSHClient, err = ssh.NewClientWithJumpHost( + config.SSHUser, config.SSHHost, config.SSHKeyPath, // jump host + config.VMSSHUser, setupNodeIP, config.SSHKeyPath, // target host + ) + Expect(err).NotTo(HaveOccurred()) + Expect(setupSSHClient).NotTo(BeNil()) + GinkgoWriter.Printf(" ✅ SSH connection to setup node established successfully\n") + }) + + By("Establishing SSH tunnel with port forwarding from setup node", func() { + ctx := context.Background() + GinkgoWriter.Printf(" ▶️ Establishing SSH tunnel to setup node, forwarding port 6445\n") + // setupSSHClient already has jump host connection built in, so we can use EstablishSSHTunnel directly + setupTunnelInfo, err = ssh.EstablishSSHTunnel(ctx, setupSSHClient, "6445") + Expect(err).NotTo(HaveOccurred()) + Expect(setupTunnelInfo).NotTo(BeNil()) + Expect(setupTunnelInfo.LocalPort).To(Equal(6445), "Local port should be exactly 6445") + GinkgoWriter.Printf(" ✅ SSH tunnel established on local port: %d\n", setupTunnelInfo.LocalPort) + }) + }) + }) // Describe: Cluster Creation From 412d47aa950a97c115f9a5e03b41e0e1d4f6e276 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Wed, 17 Dec 2025 17:19:04 +0300 Subject: [PATCH 24/48] Setup VM is now added by default with Ubuntu, remove after the bootstrap. Docker installation works. --- internal/config/env.go | 78 +++++++----- pkg/cluster/setup.go | 113 ++++++++++++++++++ pkg/cluster/vms.go | 76 ++++++++---- .../cluster_creation_test.go | 44 +++---- 4 files changed, 235 insertions(+), 76 deletions(-) create mode 100644 pkg/cluster/setup.go diff --git a/internal/config/env.go b/internal/config/env.go index d055f08..fe8495c 100644 --- a/internal/config/env.go +++ b/internal/config/env.go @@ -15,18 +15,31 @@ const ( ) var ( + + // ENVIRONMENT VARIABLES DEFINITIONS + // YAMLConfigFilename is the filename of the YAML configuration file - YAMLConfigFilename = os.Getenv("YAML_CONFIG_FILENAME") + YAMLConfigFilename = os.Getenv("YAML_CONFIG_FILENAME") + YAMLConfigFilenameDefaultValue = "cluster_config.yml" // SSH credentials to connect to BASE cluster SSHPassphrase = os.Getenv("SSH_PASSPHRASE") - SSHUser = os.Getenv("SSH_USER") - SSHKeyPath = os.Getenv("SSH_KEY_PATH") - SSHHost = os.Getenv("SSH_HOST") + + SSHUser = os.Getenv("SSH_USER") + SSHUserDefaultValue = "a.yakubov" + + SSHKeyPath = os.Getenv("SSH_KEY_PATH") + SSHKeyPathDefaultValue = "~/.ssh/id_rsa" + + SSHHost = os.Getenv("SSH_HOST") + SSHHostDefaultValue = "94.26.231.181" // SSH credentials to deploy to VM - VMSSHUser = os.Getenv("SSH_VM_USER") - VMSSHPublicKey = os.Getenv("SSH_VM_PUBLIC_KEY") + VMSSHUser = os.Getenv("SSH_VM_USER") + VMSSHUserDefaultValue = "cloud" + + VMSSHPublicKey = os.Getenv("SSH_VM_PUBLIC_KEY") + VMSSHPublicKeyDefaultValue = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8WyGvnBNQp+v6CUweF1QYCRtR7Do/IA8IA2uMd2HuBsddFrc5xYon2ZtEvypZC4Vm1CzgcgUm9UkHgxytKEB4zOOWkmqFP62OSLNyuWMaFEW1fb0EDenup6B5SrjnA8ckm4Hf2NSLvwW9yS98TfN3nqPOPJKfQsN+OTiCerTtNyXjca//ppuGKsQd99jG7SqE9aDQ3sYCXatM53SXqhxS2nTew82bmzVmKXDxcIzVrS9f+2WmXIdY2cKo2I352yKWOIp1Nk0uji8ozLPHFQGvbAG8DGG1KNVcBl2qYUcttmCpN+iXEcGqyn/atUVJJMnZXGtp0fiL1rMLqAd/bb6TFNzZFSsS+zqGesxqLePe32vLCQ3xursP3BRZkrScM+JzIqevfP63INHJEZfYlUf4Ic+gfliS2yA1LwhU7hD4LSVXMQynlF9WeGjuv6ZYxmO8hC6IWCqWnIUqKUiGtvBSPXwsZo7wgljBr4ykJgBzS9MjZ0fzz1JKe80tH6clpjIOn6ReBPwQBq2zmDDrpa5GVqqqjXhRQuA0AfpHdhs5UKxs1PBr7/PTLA7PI39xkOAE/Zj1TYQ2dmqvpskshi7AtBStjinQBAlLXysLSHBtO+3+PLAYcMZMVfb0bVqfGGludO2prvXrrWWTku0eOsA5IRahrRdGhv5zhKgFV7cwUQ== ayakubov@MacBook-Pro-Alexey.local" // KubeConfigPath is the path to a kubeconfig file. If SSH retrieval fails (e.g., sudo requires password), // this path will be used as a fallback. If not set and SSH fails, the user will be notified to download @@ -38,58 +51,65 @@ var ( // TestClusterCleanup specifies whether to remove the test cluster after tests complete. // Default is "false". If set to "true" or "True", the test cluster will be cleaned up after tests. - TestClusterCleanup = os.Getenv("TEST_CLUSTER_CLEANUP") + TestClusterCleanup = os.Getenv("TEST_CLUSTER_CLEANUP") + TestClusterCleanupDefaultValue = "false" // TestClusterNamespace specifies the namespace for DKP cluster deployment - TestClusterNamespace = os.Getenv("TEST_CLUSTER_NAMESPACE") + TestClusterNamespace = os.Getenv("TEST_CLUSTER_NAMESPACE") + TestClusterNamespaceDefaultValue = "e2e-test-cluster" // TestClusterStorageClass specifies the storage class for DKP cluster deployment - TestClusterStorageClass = os.Getenv("TEST_CLUSTER_STORAGE_CLASS") + TestClusterStorageClass = os.Getenv("TEST_CLUSTER_STORAGE_CLASS") + TestClusterStorageClassDefaultValue = "rsc-test-r2-local" // DKPLicenseKey specifies the DKP license key for cluster deployment DKPLicenseKey = os.Getenv("DKP_LICENSE_KEY") + + // CONFIGURATION VARIABLES DEFINITIONS + + // DefaultSetupVM is the default VM configuration of the node that is used for bootstrap of test cluster. + // This VM is always created separately and should be deleted after cluster bootstrap. + DefaultSetupVM = ClusterNode{ + Hostname: "bootstrap-node-", + HostType: HostTypeVM, + Role: ClusterRoleSetup, + OSType: OSTypeMap["Ubuntu 22.04 6.2.0-39-generic"], + CPU: 2, + RAM: 4, + DiskSize: 20, + } ) func ValidateEnvironment() error { // Default values for environment variables if YAMLConfigFilename == "" { - YAMLConfigFilename = "cluster_config.yml" + YAMLConfigFilename = YAMLConfigFilenameDefaultValue } - if TestClusterCleanup != "true" && TestClusterCleanup != "True" { - TestClusterCleanup = "false" + if TestClusterCleanup == "" || TestClusterCleanup != "true" && TestClusterCleanup != "True" { + TestClusterCleanup = TestClusterCleanupDefaultValue } if SSHKeyPath == "" { - SSHKeyPath = "~/.ssh/id_rsa" + SSHKeyPath = SSHKeyPathDefaultValue } if SSHUser == "" { - SSHUser = "a.yakubov" + SSHUser = SSHUserDefaultValue } if SSHHost == "" { - SSHHost = "94.26.231.181" + SSHHost = SSHHostDefaultValue } if VMSSHUser == "" { - VMSSHUser = "cloud" + VMSSHUser = VMSSHUserDefaultValue } if VMSSHPublicKey == "" { - VMSSHPublicKey = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8WyGvnBNQp+v6CUweF1QYCRtR7Do/IA8IA2uMd2HuBsddFrc5xYon2ZtEvypZC4Vm1CzgcgUm9UkHgxytKEB4zOOWkmqFP62OSLNyuWMaFEW1fb0EDenup6B5SrjnA8ckm4Hf2NSLvwW9yS98TfN3nqPOPJKfQsN+OTiCerTtNyXjca//ppuGKsQd99jG7SqE9aDQ3sYCXatM53SXqhxS2nTew82bmzVmKXDxcIzVrS9f+2WmXIdY2cKo2I352yKWOIp1Nk0uji8ozLPHFQGvbAG8DGG1KNVcBl2qYUcttmCpN+iXEcGqyn/atUVJJMnZXGtp0fiL1rMLqAd/bb6TFNzZFSsS+zqGesxqLePe32vLCQ3xursP3BRZkrScM+JzIqevfP63INHJEZfYlUf4Ic+gfliS2yA1LwhU7hD4LSVXMQynlF9WeGjuv6ZYxmO8hC6IWCqWnIUqKUiGtvBSPXwsZo7wgljBr4ykJgBzS9MjZ0fzz1JKe80tH6clpjIOn6ReBPwQBq2zmDDrpa5GVqqqjXhRQuA0AfpHdhs5UKxs1PBr7/PTLA7PI39xkOAE/Zj1TYQ2dmqvpskshi7AtBStjinQBAlLXysLSHBtO+3+PLAYcMZMVfb0bVqfGGludO2prvXrrWWTku0eOsA5IRahrRdGhv5zhKgFV7cwUQ== ayakubov@MacBook-Pro-Alexey.local" + VMSSHPublicKey = VMSSHPublicKeyDefaultValue } if TestClusterNamespace == "" { - TestClusterNamespace = "e2e-test-cluster" + TestClusterNamespace = TestClusterNamespaceDefaultValue } if TestClusterStorageClass == "" { - TestClusterStorageClass = "rsc-test-r2-local" - } - - if TestClusterCleanup == "" { - TestClusterCleanup = "false" - } - - if TestClusterCleanup != "true" && TestClusterCleanup != "True" { - TestClusterCleanup = "false" - } else { - TestClusterCleanup = "true" + TestClusterStorageClass = TestClusterStorageClassDefaultValue } // There are no default values for these variables and they must be set! Otherwise, the test will fail. diff --git a/pkg/cluster/setup.go b/pkg/cluster/setup.go new file mode 100644 index 0000000..def8a26 --- /dev/null +++ b/pkg/cluster/setup.go @@ -0,0 +1,113 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cluster + +import ( + "context" + "fmt" + "strings" + + "github.com/deckhouse/storage-e2e/internal/infrastructure/ssh" +) + +// OSInfo represents detected operating system information +type OSInfo struct { + ID string // OS ID (e.g., "debian", "ubuntu", "centos", "redos", "astra", "altlinux") + IDLike string // OS ID_LIKE (e.g., "debian", "rhel fedora") + VersionID string // OS version ID + PrettyName string // OS pretty name + KernelVersion string // Kernel version (e.g., "5.15.0-91-generic") +} + +// GetOSInfo detects the operating system and kernel version on a remote host via SSH. +// This function reads /etc/os-release and runs uname -r to gather OS information. +func GetOSInfo(ctx context.Context, sshClient ssh.SSHClient) (*OSInfo, error) { + // Read /etc/os-release file + output, err := sshClient.Exec(ctx, "cat /etc/os-release") + if err != nil { + return nil, fmt.Errorf("failed to read /etc/os-release: %w", err) + } + + osInfo := &OSInfo{} + lines := strings.Split(output, "\n") + for _, line := range lines { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + + parts := strings.SplitN(line, "=", 2) + if len(parts) != 2 { + continue + } + + key := strings.TrimSpace(parts[0]) + value := strings.Trim(strings.TrimSpace(parts[1]), "\"") + + switch key { + case "ID": + osInfo.ID = strings.ToLower(value) + case "ID_LIKE": + osInfo.IDLike = strings.ToLower(value) + case "VERSION_ID": + osInfo.VersionID = value + case "PRETTY_NAME": + osInfo.PrettyName = value + } + } + + if osInfo.ID == "" { + return nil, fmt.Errorf("failed to detect OS ID from /etc/os-release") + } + + // Detect kernel version + kernelOutput, err := sshClient.Exec(ctx, "uname -r") + if err != nil { + return nil, fmt.Errorf("failed to detect kernel version: %w", err) + } + osInfo.KernelVersion = strings.TrimSpace(kernelOutput) + + return osInfo, nil +} + +// InstallDocker installs Docker on the remote host via SSH. +// Since the setup node is always Ubuntu 22.04, this function uses apt to install docker.io. +// It runs: apt update && apt install docker.io -y, then starts docker and verifies with docker ps. +func InstallDocker(ctx context.Context, sshClient ssh.SSHClient) error { + // Update package list and install docker.io + cmd := "sudo apt-get update && sudo apt-get install -y docker.io" + output, err := sshClient.Exec(ctx, cmd) + if err != nil { + return fmt.Errorf("failed to update packages and install docker.io: %w\nOutput: %s", err, output) + } + + // Start Docker service + cmd = "sudo systemctl start docker" + output, err = sshClient.Exec(ctx, cmd) + if err != nil { + return fmt.Errorf("failed to start docker service: %w\nOutput: %s", err, output) + } + + // Verify Docker is working by running docker ps + cmd = "sudo docker ps" + output, err = sshClient.Exec(ctx, cmd) + if err != nil { + return fmt.Errorf("failed to verify Docker installation (docker ps failed): %w\nOutput: %s", err, output) + } + + return nil +} diff --git a/pkg/cluster/vms.go b/pkg/cluster/vms.go index 0046d5d..0ae3a35 100644 --- a/pkg/cluster/vms.go +++ b/pkg/cluster/vms.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "strings" + "time" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" @@ -32,10 +33,11 @@ import ( // VMResources tracks VM-related resources created for a test cluster type VMResources struct { - VirtClient *virtualization.Client - Namespace string - VMNames []string - CVMINames []string + VirtClient *virtualization.Client + Namespace string + VMNames []string + CVMINames []string + SetupVMName string // Name of the setup VM (always created) } // CreateVirtualMachines creates virtual machines from cluster definition. @@ -55,6 +57,14 @@ func CreateVirtualMachines(ctx context.Context, virtClient *virtualization.Clien return nil, nil, fmt.Errorf("no VM nodes found in cluster definition") } + // Always add the default setup VM with a unique suffix + setupVM := config.DefaultSetupVM + // Generate unique suffix using timestamp + suffix := fmt.Sprintf("%d", time.Now().Unix()) + setupVM.Hostname = setupVM.Hostname + suffix + vmNodes = append(vmNodes, setupVM) + setupVMName := setupVM.Hostname // Store the generated name for later use + // Track CVMI names that we create or use cvmiNamesMap := make(map[string]bool) @@ -102,11 +112,14 @@ func CreateVirtualMachines(ctx context.Context, virtClient *virtualization.Clien cvmiNames = append(cvmiNames, name) } + // Track setup VM separately + // The setup VM is always created, so it will exist in vmNames resources := &VMResources{ - VirtClient: virtClient, - Namespace: namespace, - VMNames: vmNames, - CVMINames: cvmiNames, + VirtClient: virtClient, + Namespace: namespace, + VMNames: vmNames, + CVMINames: cvmiNames, + SetupVMName: setupVMName, // setupVMName was set above when creating setupVM } return vmNames, resources, nil @@ -464,19 +477,16 @@ func CleanupVMResources(ctx context.Context, resources *VMResources) error { return nil } -// GetSetupNode returns the setup node from cluster definition, or the first worker if setup is not set -func GetSetupNode(clusterDef *config.ClusterDefinition) (*config.ClusterNode, error) { - // If setup node is explicitly set, return it - if clusterDef.Setup != nil { - return clusterDef.Setup, nil - } - - // Otherwise, return the first worker - if len(clusterDef.Workers) == 0 { - return nil, fmt.Errorf("no setup node specified and no workers available") +// GetSetupNode returns the setup VM node from VMResources. +// The setup node is always a separate VM with a unique name (bootstrap-node-). +func GetSetupNode(vmResources *VMResources) (*config.ClusterNode, error) { + if vmResources == nil { + return nil, fmt.Errorf("VMResources cannot be nil") } - - return &clusterDef.Workers[0], nil + // Find the setup VM node by hostname + setupVM := config.DefaultSetupVM + setupVM.Hostname = vmResources.SetupVMName + return &setupVM, nil } // GetVMIPAddress gets the IP address of a VM by querying its status @@ -494,3 +504,29 @@ func GetVMIPAddress(ctx context.Context, virtClient *virtualization.Client, name return vm.Status.IPAddress, nil } + +// CleanupSetupVM deletes the setup VM and its associated resources. +// This should be called after the test cluster bootstrap is complete. +func CleanupSetupVM(ctx context.Context, resources *VMResources) error { + if resources == nil { + return fmt.Errorf("resources cannot be nil") + } + + namespace := resources.Namespace + setupVMName := resources.SetupVMName + + // Step 1: Delete the setup VM + err := resources.VirtClient.VirtualMachines().Delete(ctx, namespace, setupVMName) + if err != nil && !errors.IsNotFound(err) { + return fmt.Errorf("failed to delete setup VM %s/%s: %w", namespace, setupVMName, err) + } + + // Step 2: Delete the setup VM's system disk + systemDiskName := fmt.Sprintf("%s-system", setupVMName) + err = resources.VirtClient.VirtualDisks().Delete(ctx, namespace, systemDiskName) + if err != nil && !errors.IsNotFound(err) { + return fmt.Errorf("failed to delete setup VM system disk %s/%s: %w", namespace, systemDiskName, err) + } + + return nil +} diff --git a/tests/cluster-creation-by-steps/cluster_creation_test.go b/tests/cluster-creation-by-steps/cluster_creation_test.go index 1cf7204..da193fb 100644 --- a/tests/cluster-creation-by-steps/cluster_creation_test.go +++ b/tests/cluster-creation-by-steps/cluster_creation_test.go @@ -44,7 +44,6 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { kubeconfig *rest.Config kubeconfigPath string tunnelinfo *ssh.TunnelInfo - setupTunnelInfo *ssh.TunnelInfo clusterDefinition *config.ClusterDefinition module *deckhouse.Module virtClient *virtualization.Client @@ -70,18 +69,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { // DeferCleanup: Clean up all resources in reverse order of creation (it's a synonym for AfterAll) DeferCleanup(func() { - // Step 1: Stop setup SSH tunnel (must be done before closing SSH client) - if setupTunnelInfo != nil && setupTunnelInfo.StopFunc != nil { - GinkgoWriter.Printf(" ▶️ Stopping setup SSH tunnel on local port %d...\n", setupTunnelInfo.LocalPort) - err := setupTunnelInfo.StopFunc() - if err != nil { - GinkgoWriter.Printf(" ⚠️ Warning: Failed to stop setup SSH tunnel: %v\n", err) - } else { - GinkgoWriter.Printf(" ✅ Setup SSH tunnel stopped successfully\n") - } - } - - // Step 2: Close setup SSH client connection + // Step 1: Close setup SSH client connection if setupSSHClient != nil { GinkgoWriter.Printf(" ▶️ Closing setup SSH client connection...\n") err := setupSSHClient.Close() @@ -92,7 +80,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { } } - // Step 3: Stop base cluster SSH tunnel (must be done before closing SSH client) + // Step 2: Stop base cluster SSH tunnel (must be done before closing SSH client) if tunnelinfo != nil && tunnelinfo.StopFunc != nil { GinkgoWriter.Printf(" ▶️ Stopping base cluster SSH tunnel on local port %d...\n", tunnelinfo.LocalPort) err := tunnelinfo.StopFunc() @@ -103,7 +91,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { } } - // Step 4: Close base cluster SSH client connection + // Step 3: Close base cluster SSH client connection if sshclient != nil { GinkgoWriter.Printf(" ▶️ Closing base cluster SSH client connection...\n") err := sshclient.Close() @@ -114,7 +102,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { } } - // Step 5: Cleanup test cluster VMs if enabled + // Step 4: Cleanup test cluster VMs if enabled // Note: vmResources is set in the test below, so we capture it in the closure vmRes := vmResources if config.TestClusterCleanup == "true" || config.TestClusterCleanup == "True" { @@ -252,12 +240,13 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { } }) - By("Obtaining SSH client for setting up tunnel to setup-node through base cluster master", func() { + By("Obtaining SSH client to setup node through base cluster master", func() { ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() namespace := config.TestClusterNamespace - setupNode, _ := cluster.GetSetupNode(clusterDefinition) + setupNode, err := cluster.GetSetupNode(vmResources) + Expect(err).NotTo(HaveOccurred()) // Get setup node IP address setupNodeIP, err := cluster.GetVMIPAddress(ctx, virtClient, namespace, setupNode.Hostname) @@ -275,16 +264,17 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { Expect(setupSSHClient).NotTo(BeNil()) GinkgoWriter.Printf(" ✅ SSH connection to setup node established successfully\n") }) + }) - By("Establishing SSH tunnel with port forwarding from setup node", func() { - ctx := context.Background() - GinkgoWriter.Printf(" ▶️ Establishing SSH tunnel to setup node, forwarding port 6445\n") - // setupSSHClient already has jump host connection built in, so we can use EstablishSSHTunnel directly - setupTunnelInfo, err = ssh.EstablishSSHTunnel(ctx, setupSSHClient, "6445") - Expect(err).NotTo(HaveOccurred()) - Expect(setupTunnelInfo).NotTo(BeNil()) - Expect(setupTunnelInfo.LocalPort).To(Equal(6445), "Local port should be exactly 6445") - GinkgoWriter.Printf(" ✅ SSH tunnel established on local port: %d\n", setupTunnelInfo.LocalPort) + It("should ensure Docker is installed on the setup node", func() { + By("Installing Docker on setup node", func() { + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute) + defer cancel() + + GinkgoWriter.Printf(" ▶️ Installing Docker on setup node\n") + err := cluster.InstallDocker(ctx, setupSSHClient) + Expect(err).NotTo(HaveOccurred(), "Failed to install Docker on setup node") + GinkgoWriter.Printf(" ✅ Docker installed and running successfully on setup node\n") }) }) From 0f13b67d1953b13c44bcdde36bdd0c123c5b47ed Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Thu, 18 Dec 2025 10:35:55 +0300 Subject: [PATCH 25/48] Update readme, refactored cleanup, setup node processing, VMs are created and deleted successfully --- README.md | 49 +++- files/bootstrap/config.yml.tpl | 98 ++++++++ internal/config/config.go | 36 +++ internal/config/env.go | 19 +- internal/infrastructure/ssh/client.go | 52 +++- internal/kubernetes/virtualization/client.go | 5 + .../virtualization/virtual_image.go | 85 +++++++ pkg/cluster/setup.go | 209 ++++++++++++++++ pkg/cluster/vms.go | 223 ++++++++++++------ .../cluster_creation_test.go | 161 ++++++++++--- 10 files changed, 804 insertions(+), 133 deletions(-) create mode 100644 files/bootstrap/config.yml.tpl create mode 100644 internal/config/config.go create mode 100644 internal/kubernetes/virtualization/virtual_image.go diff --git a/README.md b/README.md index 1694273..ab5d2ca 100644 --- a/README.md +++ b/README.md @@ -9,14 +9,19 @@ High-level test that creates a complete test cluster from a YAML configuration f ### cluster-creation-by-steps Step-by-step test that creates a test cluster incrementally, validating each stage: -1. Environment validation -2. Cluster configuration loading -3. SSH connection establishment -4. Kubeconfig retrieval -5. SSH tunnel setup -6. Virtualization module readiness check -7. Namespace creation -8. Virtual machine creation and provisioning + +1. Environment validation - Validates required environment variables are set +2. Cluster configuration loading - Loads and parses cluster definition from YAML file +3. SSH connection establishment to base cluster - Connects to base cluster via SSH +4. Kubeconfig retrieval from base cluster - Fetches kubeconfig file from base cluster +5. SSH tunnel setup with port forwarding - Establishes tunnel to access Kubernetes API +6. Virtualization module readiness check - Verifies virtualization module is Ready +7. Test namespace creation - Creates test namespace if it doesn't exist +8. Virtual machine creation and provisioning - Creates VMs and waits for them to become Running +9. SSH connection establishment to setup node (through base cluster master) - Connects to setup node via jump host +10. Docker installation on setup node - Installs Docker (required for DKP bootstrap) +11. Bootstrap configuration preparation - Prepares bootstrap config from template with cluster-specific values +12. Bootstrap files upload (private key and config.yml) to setup node - Uploads files needed for DKP bootstrap ## Environment Variables @@ -26,7 +31,9 @@ Step-by-step test that creates a test cluster incrementally, validating each sta - `alwaysUseExisting` - Use existing cluster - `alwaysCreateNew` - Create new cluster -- **`DKP_LICENSE_KEY`** - DKP license key for cluster deployment +- **`DKP_LICENSE_KEY`** - DKP license key for cluster deployment (see license token at license.deckhouse.io) + +- **`REGISTRY_DOCKER_CFG`** - dockerRegistryCfg for downloading images from Deckhouse registry (see license.deckhouse.io) ### Optional (with defaults) @@ -46,12 +53,29 @@ Step-by-step test that creates a test cluster incrementally, validating each sta - **`KUBE_CONFIG_PATH`** - Fallback path to kubeconfig file if SSH retrieval fails (no default) +## Configuration Parameters + +These are code-level configuration constants defined in `internal/config/config.go`: + +- **`DefaultSetupVM`** - Default configuration for the setup/bootstrap VM node: + - Hostname prefix: `bootstrap-node-` + - Host type: VM + - Role: setup + - OS Type: Ubuntu 22.04 6.2.0-39-generic + - CPU: 2 cores + - RAM: 4 GB + - Disk size: 20 GB + +- **`VMsRunningTimeout`** - Timeout for waiting for all VMs to become Running state (default: `20 minutes`) + +**Note:** When running tests, use `-timeout` flag that is longer than `VMsRunningTimeout` to allow enough time for VM provisioning. For example, use `-timeout=25m` or `-timeout=60m` to ensure the test doesn't timeout prematurely. + ## Running Tests ### Run all tests in a test suite ```bash -go test -v ./tests/cluster-creation-by-steps -count=1 +go test -timeout=60m -v ./tests/cluster-creation-by-steps -count=1 ``` The `-count=1` flag prevents Go from using cached test results. @@ -59,7 +83,7 @@ The `-count=1` flag prevents Go from using cached test results. ### Run a specific test ```bash -go test -v ./tests/cluster-creation-by-steps -count=1 -ginkgo.focus="should create virtual machines" +go test -timeout=60m -v ./tests/cluster-creation-by-steps -count=1 -ginkgo.focus="should create virtual machines" ``` ### Example with environment variables @@ -67,8 +91,9 @@ go test -v ./tests/cluster-creation-by-steps -count=1 -ginkgo.focus="should crea ```bash export TEST_CLUSTER_CREATE_MODE='alwaysCreateNew' export DKP_LICENSE_KEY='your-license-key' +export REGISTRY_DOCKER_CFG='base64-encoded-docker-config-json' export SSH_PASSPHRASE='your-passphrase' export TEST_CLUSTER_CLEANUP='true' -go test -v ./tests/cluster-creation-by-steps -count=1 +go test -timeout=60m -v ./tests/cluster-creation-by-steps -count=1 ``` diff --git a/files/bootstrap/config.yml.tpl b/files/bootstrap/config.yml.tpl new file mode 100644 index 0000000..ac41278 --- /dev/null +++ b/files/bootstrap/config.yml.tpl @@ -0,0 +1,98 @@ +# Общие параметры кластера. +# https://deckhouse.ru/products/kubernetes-platform/documentation/v1/installing/configuration.html#clusterconfiguration +apiVersion: deckhouse.io/v1 +kind: ClusterConfiguration +clusterType: Static +# Адресное пространство подов кластера. +podSubnetCIDR: {{ .PodSubnetCIDR }} +# Адресное пространство сети сервисов кластера. +serviceSubnetCIDR: {{ .ServiceSubnetCIDR }} +kubernetesVersion: "{{ .KubernetesVersion }}" # Можно указать нужную - 1.29/1.31 и пр. +# Домен кластера. +clusterDomain: "{{ .ClusterDomain }}" +--- +# Настройки первичной инициализации кластера Deckhouse. +# https://deckhouse.ru/products/kubernetes-platform/documentation/v1/installing/configuration.html#initconfiguration +apiVersion: deckhouse.io/v1 +kind: InitConfiguration +deckhouse: + imagesRepo: {{ .ImagesRepo }} + registryDockerCfg: {{ .RegistryDockerCfg }} + devBranch : main +--- +# Настройки модуля deckhouse. +# https://deckhouse.ru/products/kubernetes-platform/documentation/v1/modules/deckhouse/configuration.html +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: deckhouse +spec: + version: 1 + enabled: true + settings: + bundle: Default + # Канал обновлений Deckhouse. Канал Early Access достаточно стабилен, его можно использовать в продуктивных окружениях. + # Если планируется использовать несколько кластеров, то рекомендуется установить на них разные каналы обновлений. + # Подробнее: https://deckhouse.ru/products/kubernetes-platform/documentation/v1/deckhouse-release-channels.html + releaseChannel: EarlyAccess + logLevel: Info +--- +# Глобальные настройки Deckhouse. +# https://deckhouse.ru/products/kubernetes-platform/documentation/v1/deckhouse-configure-global.html#%D0%BF%D0%B0%D1%80%D0%B0%D0%BC%D0%B5%D1%82%D1%80%D1%8B +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: global +spec: + version: 2 + settings: + modules: + # Шаблон, который будет использоваться для составления адресов системных приложений в кластере. + # Например, Grafana для %s.example.com будет доступна на домене 'grafana.example.com'. + # Домен НЕ ДОЛЖЕН совпадать с указанным в параметре clusterDomain ресурса ClusterConfiguration. + # Можете изменить на свой сразу, либо следовать шагам руководства и сменить его после установки. + publicDomainTemplate: "{{ .PublicDomainTemplate }}" +--- +# Настройки модуля user-authn. +# https://deckhouse.ru/products/kubernetes-platform/documentation/v1/modules/user-authn/configuration.html +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: user-authn +spec: + version: 2 + enabled: true + settings: + controlPlaneConfigurator: + dexCAMode: DoNotNeed + # Включение доступа к API-серверу Kubernetes через Ingress. + # https://deckhouse.ru/products/kubernetes-platform/documentation/v1/modules/user-authn/configuration.html#parameters-publishapi + publishAPI: + enabled: true + https: + mode: Global + global: + kubeconfigGeneratorMasterCA: "" +--- +# Настройки модуля cni-cilium. +# https://deckhouse.ru/products/kubernetes-platform/documentation/v1/modules/cni-cilium/configuration.html +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: cni-cilium +spec: + version: 1 + # Включить модуль cni-cilium + enabled: true + settings: + # Настройки модуля cni-cilium + # https://deckhouse.ru/products/kubernetes-platform/documentation/v1/modules/cni-cilium/configuration.html + tunnelMode: VXLAN +--- +# Параметры статического кластера. +# https://deckhouse.ru/products/kubernetes-platform/documentation/v1/installing/configuration.html#staticclusterconfiguration +apiVersion: deckhouse.io/v1 +kind: StaticClusterConfiguration +internalNetworkCIDRs: +- {{ .InternalNetworkCIDR }} + diff --git a/internal/config/config.go b/internal/config/config.go new file mode 100644 index 0000000..c80e332 --- /dev/null +++ b/internal/config/config.go @@ -0,0 +1,36 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package config + +import "time" + +// Configuration parameters used in code + +// DefaultSetupVM is the default VM configuration of the node that is used for bootstrap of test cluster. +// This VM is always created separately and should be deleted after cluster bootstrap. +var DefaultSetupVM = ClusterNode{ + Hostname: "bootstrap-node-", + HostType: HostTypeVM, + Role: ClusterRoleSetup, + OSType: OSTypeMap["Ubuntu 22.04 6.2.0-39-generic"], + CPU: 2, + RAM: 4, + DiskSize: 20, +} + +// VMsRunningTimeout is the timeout for waiting for all VMs to become Running state +const VMsRunningTimeout = 20 * time.Minute diff --git a/internal/config/env.go b/internal/config/env.go index fe8495c..7824deb 100644 --- a/internal/config/env.go +++ b/internal/config/env.go @@ -65,19 +65,8 @@ var ( // DKPLicenseKey specifies the DKP license key for cluster deployment DKPLicenseKey = os.Getenv("DKP_LICENSE_KEY") - // CONFIGURATION VARIABLES DEFINITIONS - - // DefaultSetupVM is the default VM configuration of the node that is used for bootstrap of test cluster. - // This VM is always created separately and should be deleted after cluster bootstrap. - DefaultSetupVM = ClusterNode{ - Hostname: "bootstrap-node-", - HostType: HostTypeVM, - Role: ClusterRoleSetup, - OSType: OSTypeMap["Ubuntu 22.04 6.2.0-39-generic"], - CPU: 2, - RAM: 4, - DiskSize: 20, - } + // RegistryDockerCfg specifies the docker registry key to download images from Deckhouse registry. + RegistryDockerCfg = os.Getenv("REGISTRY_DOCKER_CFG") ) func ValidateEnvironment() error { @@ -117,6 +106,10 @@ func ValidateEnvironment() error { return fmt.Errorf("DKP_LICENSE_KEY environment variable is required but not set. ") } + if RegistryDockerCfg == "" { + return fmt.Errorf("REGISTRY_DOCKER_CFG environment variable is required but not set.") + } + if TestClusterCreateMode == "" { return fmt.Errorf("TEST_CLUSTER_CREATE_MODE environment variable is required but not set. "+ "Please set it to either '%s' or '%s'", diff --git a/internal/infrastructure/ssh/client.go b/internal/infrastructure/ssh/client.go index 7f755ad..037bf9a 100644 --- a/internal/infrastructure/ssh/client.go +++ b/internal/infrastructure/ssh/client.go @@ -25,7 +25,9 @@ import ( "os/user" "path/filepath" "strings" + "sync" "syscall" + "time" "github.com/pkg/sftp" "golang.org/x/crypto/ssh" @@ -34,7 +36,10 @@ import ( // client implements Client interface type client struct { - sshClient *ssh.Client + sshClient *ssh.Client + keepaliveCtx context.Context + keepaliveCancel context.CancelFunc + keepaliveWg sync.WaitGroup } // copyWithContext copies data from src to dst with context cancellation support @@ -181,7 +186,45 @@ func (c *client) Create(user, host, keyPath string) (SSHClient, error) { return nil, fmt.Errorf("failed to connect to %s@%s: %w", user, addr, err) } - return &client{sshClient: sshClient}, nil + // Start keepalive mechanism (equivalent to ServerAliveInterval=60) + keepaliveCtx, keepaliveCancel := context.WithCancel(context.Background()) + newClient := &client{ + sshClient: sshClient, + keepaliveCtx: keepaliveCtx, + keepaliveCancel: keepaliveCancel, + } + newClient.startKeepalive() + + return newClient, nil +} + +// startKeepalive starts a goroutine that sends keepalive requests every 60 seconds +// This prevents SSH connections from timing out due to inactivity. +// Note: golang.org/x/crypto/ssh doesn't have a built-in keepalive parameter, +// so we implement it manually using SendRequest with "keepalive@openssh.com" +// (equivalent to ServerAliveInterval=60 in SSH config) +func (c *client) startKeepalive() { + c.keepaliveWg.Add(1) + go func() { + defer c.keepaliveWg.Done() + ticker := time.NewTicker(60 * time.Second) + defer ticker.Stop() + + for { + select { + case <-c.keepaliveCtx.Done(): + return + case <-ticker.C: + // Send keepalive request using standard OpenSSH keepalive request type + // This is equivalent to ServerAliveInterval in SSH config + _, _, err := c.sshClient.SendRequest("keepalive@openssh.com", true, nil) + if err != nil { + // Connection is closed, stop sending keepalives + return + } + } + } + }() } // StartTunnel starts an SSH tunnel with port forwarding from local to remote @@ -355,6 +398,11 @@ func (c *client) Upload(ctx context.Context, localPath, remotePath string) error // Close closes the SSH connection func (c *client) Close() error { + // Stop keepalive goroutine + if c.keepaliveCancel != nil { + c.keepaliveCancel() + c.keepaliveWg.Wait() + } if c.sshClient != nil { return c.sshClient.Close() } diff --git a/internal/kubernetes/virtualization/client.go b/internal/kubernetes/virtualization/client.go index a6c3e37..ff006f5 100644 --- a/internal/kubernetes/virtualization/client.go +++ b/internal/kubernetes/virtualization/client.go @@ -63,6 +63,11 @@ func (c *Client) ClusterVirtualImages() ClusterVirtualImageClient { return &clusterVirtualImageClient{client: c.client} } +// VirtualImages returns a VirtualImage client +func (c *Client) VirtualImages() VirtualImageClient { + return &virtualImageClient{client: c.client} +} + // VirtualMachineBlockDeviceAttachments returns a VMBD client func (c *Client) VirtualMachineBlockDeviceAttachments() VMBDClient { return &vmbdClient{client: c.client} diff --git a/internal/kubernetes/virtualization/virtual_image.go b/internal/kubernetes/virtualization/virtual_image.go new file mode 100644 index 0000000..cdfab93 --- /dev/null +++ b/internal/kubernetes/virtualization/virtual_image.go @@ -0,0 +1,85 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package virtualization + +import ( + "context" + "fmt" + + "github.com/deckhouse/virtualization/api/core/v1alpha2" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// VirtualImageClient provides operations on VirtualImage resources +// Note: VirtualImage is a namespace-scoped resource +type VirtualImageClient interface { + Get(ctx context.Context, namespace, name string) (*v1alpha2.VirtualImage, error) + List(ctx context.Context, namespace string) ([]v1alpha2.VirtualImage, error) + Create(ctx context.Context, vi *v1alpha2.VirtualImage) error + Update(ctx context.Context, vi *v1alpha2.VirtualImage) error + Delete(ctx context.Context, namespace, name string) error +} + +type virtualImageClient struct { + client client.Client +} + +func (c *virtualImageClient) Get(ctx context.Context, namespace, name string) (*v1alpha2.VirtualImage, error) { + vi := &v1alpha2.VirtualImage{} + key := client.ObjectKey{Namespace: namespace, Name: name} + if err := c.client.Get(ctx, key, vi); err != nil { + return nil, fmt.Errorf("failed to get VirtualImage %s/%s: %w", namespace, name, err) + } + return vi, nil +} + +func (c *virtualImageClient) List(ctx context.Context, namespace string) ([]v1alpha2.VirtualImage, error) { + list := &v1alpha2.VirtualImageList{} + if err := c.client.List(ctx, list, client.InNamespace(namespace)); err != nil { + return nil, fmt.Errorf("failed to list VirtualImages in namespace %s: %w", namespace, err) + } + return list.Items, nil +} + +func (c *virtualImageClient) Create(ctx context.Context, vi *v1alpha2.VirtualImage) error { + if err := c.client.Create(ctx, vi); err != nil { + return fmt.Errorf("failed to create VirtualImage %s/%s: %w", vi.Namespace, vi.Name, err) + } + return nil +} + +func (c *virtualImageClient) Update(ctx context.Context, vi *v1alpha2.VirtualImage) error { + if err := c.client.Update(ctx, vi); err != nil { + return fmt.Errorf("failed to update VirtualImage %s/%s: %w", vi.Namespace, vi.Name, err) + } + return nil +} + +func (c *virtualImageClient) Delete(ctx context.Context, namespace, name string) error { + vi := &v1alpha2.VirtualImage{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + } + if err := c.client.Delete(ctx, vi); err != nil { + return fmt.Errorf("failed to delete VirtualImage %s/%s: %w", namespace, name, err) + } + return nil +} + diff --git a/pkg/cluster/setup.go b/pkg/cluster/setup.go index def8a26..33358ef 100644 --- a/pkg/cluster/setup.go +++ b/pkg/cluster/setup.go @@ -19,8 +19,14 @@ package cluster import ( "context" "fmt" + "net" + "os" + "path/filepath" + "runtime" "strings" + "text/template" + "github.com/deckhouse/storage-e2e/internal/config" "github.com/deckhouse/storage-e2e/internal/infrastructure/ssh" ) @@ -111,3 +117,206 @@ func InstallDocker(ctx context.Context, sshClient ssh.SSHClient) error { return nil } + +// PrepareBootstrapConfig prepares the bootstrap configuration file from a template. +// It takes cluster definition, master IP address, and VM IP addresses to calculate the internal network CIDR. +// The function generates a config file and saves it to the temp/ directory. +// Returns the path to the generated config file. +func PrepareBootstrapConfig(clusterDef *config.ClusterDefinition, masterIP string, vmIPs []string) (string, error) { + if clusterDef == nil { + return "", fmt.Errorf("clusterDef cannot be nil") + } + if masterIP == "" { + return "", fmt.Errorf("masterIP cannot be empty") + } + if len(vmIPs) == 0 { + return "", fmt.Errorf("vmIPs cannot be empty") + } + + // Calculate internal network CIDR from VM IPs (assume /24 subnet) + internalNetworkCIDR, err := calculateNetworkCIDR(vmIPs) + if err != nil { + return "", fmt.Errorf("failed to calculate network CIDR: %w", err) + } + + // Format public domain template with master IP for sslip.io + // Format: %s.10.10.1.5.sslip.io (dots in IP are preserved) + publicDomainTemplate := fmt.Sprintf("%%s.%s.sslip.io", masterIP) + + // Prepare template data + templateData := struct { + PodSubnetCIDR string + ServiceSubnetCIDR string + KubernetesVersion string + ClusterDomain string + ImagesRepo string + RegistryDockerCfg string + PublicDomainTemplate string + InternalNetworkCIDR string + }{ + PodSubnetCIDR: clusterDef.DKPParameters.PodSubnetCIDR, + ServiceSubnetCIDR: clusterDef.DKPParameters.ServiceSubnetCIDR, + KubernetesVersion: clusterDef.DKPParameters.KubernetesVersion, + ClusterDomain: clusterDef.DKPParameters.ClusterDomain, + ImagesRepo: clusterDef.DKPParameters.RegistryRepo, + RegistryDockerCfg: config.RegistryDockerCfg, + PublicDomainTemplate: publicDomainTemplate, + InternalNetworkCIDR: internalNetworkCIDR, + } + + // Get the test file name from the caller + _, callerFile, _, ok := runtime.Caller(1) + if !ok { + return "", fmt.Errorf("failed to get caller file information") + } + testFileName := strings.TrimSuffix(filepath.Base(callerFile), filepath.Ext(callerFile)) + + // Determine the temp directory path in the repo root + // callerFile is in tests/{test-dir}/, so we go up two levels to reach repo root + callerDir := filepath.Dir(callerFile) + repoRootPath := filepath.Join(callerDir, "..", "..") + // Resolve the .. parts to get absolute path + repoRoot, err := filepath.Abs(repoRootPath) + if err != nil { + return "", fmt.Errorf("failed to resolve repo root path: %w", err) + } + + // Template file path + templatePath := filepath.Join(repoRoot, "files", "bootstrap", "config.yml.tpl") + + // Read template file + templateContent, err := os.ReadFile(templatePath) + if err != nil { + return "", fmt.Errorf("failed to read template file %s: %w", templatePath, err) + } + + // Parse template + tmpl, err := template.New("bootstrap-config").Parse(string(templateContent)) + if err != nil { + return "", fmt.Errorf("failed to parse template: %w", err) + } + + // Determine temp directory path - same pattern as GetKubeconfig + tempDir := filepath.Join(repoRoot, "temp", testFileName) + + // Create temp directory if it doesn't exist + if err := os.MkdirAll(tempDir, 0755); err != nil { + return "", fmt.Errorf("failed to create temp directory %s: %w", tempDir, err) + } + + // Output file path + outputPath := filepath.Join(tempDir, "config.yml") + + // Create output file + outputFile, err := os.Create(outputPath) + if err != nil { + return "", fmt.Errorf("failed to create output file %s: %w", outputPath, err) + } + defer outputFile.Close() + + // Execute template and write to file + if err := tmpl.Execute(outputFile, templateData); err != nil { + return "", fmt.Errorf("failed to execute template: %w", err) + } + + return outputPath, nil +} + +// calculateNetworkCIDR calculates the network CIDR that encompasses all VM IP addresses. +// It starts with a /24 network from the first IP and expands the network (reduces prefix length) +// until all IPs belong to the CIDR. +func calculateNetworkCIDR(vmIPs []string) (string, error) { + if len(vmIPs) == 0 { + return "", fmt.Errorf("vmIPs cannot be empty") + } + + // Parse all IP addresses + parsedIPs := make([]net.IP, 0, len(vmIPs)) + for _, ipStr := range vmIPs { + ip := net.ParseIP(ipStr) + if ip == nil { + return "", fmt.Errorf("invalid IP address: %s", ipStr) + } + // Convert to IPv4 if needed + ipv4 := ip.To4() + if ipv4 == nil { + return "", fmt.Errorf("IP address is not IPv4: %s", ipStr) + } + parsedIPs = append(parsedIPs, ipv4) + } + + // Start with /24 network from the first IP + // Replace last octet with 0 + firstIP := make(net.IP, len(parsedIPs[0])) + copy(firstIP, parsedIPs[0]) + firstIP[3] = 0 // Set last octet to 0 + + // Start with /24 and expand until all IPs fit + prefixLen := 24 + for prefixLen >= 16 { + // Create network with current prefix length + mask := net.CIDRMask(prefixLen, 32) + network := firstIP.Mask(mask) + cidrStr := fmt.Sprintf("%s/%d", network.String(), prefixLen) + + // Parse the CIDR to get network and mask + _, ipNet, err := net.ParseCIDR(cidrStr) + if err != nil { + return "", fmt.Errorf("failed to parse CIDR %s: %w", cidrStr, err) + } + + // Check if all IPs belong to this network + allInNetwork := true + for _, ip := range parsedIPs { + if !ipNet.Contains(ip) { + allInNetwork = false + break + } + } + + if allInNetwork { + return cidrStr, nil + } + + // Expand network by reducing prefix length + prefixLen-- + } + + return "", fmt.Errorf("failed to find a network CIDR that contains all IPs") +} + +// UploadBootstrapFiles uploads the private key and config.yml file to the setup node. +// The private key is uploaded to /home/cloud/.ssh/id_rsa with permissions 0600. +// The config.yml file is uploaded to /home/cloud/config.yml. +func UploadBootstrapFiles(ctx context.Context, sshClient ssh.SSHClient, privateKeyPath, configPath string) error { + if sshClient == nil { + return fmt.Errorf("sshClient cannot be nil") + } + if privateKeyPath == "" { + return fmt.Errorf("privateKeyPath cannot be empty") + } + if configPath == "" { + return fmt.Errorf("configPath cannot be empty") + } + + // Upload private key to /home/cloud/.ssh/id_rsa + remoteKeyPath := "/home/cloud/.ssh/id_rsa" + if err := sshClient.Upload(ctx, privateKeyPath, remoteKeyPath); err != nil { + return fmt.Errorf("failed to upload private key to %s: %w", remoteKeyPath, err) + } + + // Set permissions 0600 for the private key (no sudo needed, we own the file) + cmd := "chmod 600 /home/cloud/.ssh/id_rsa" + output, err := sshClient.Exec(ctx, cmd) + if err != nil { + return fmt.Errorf("failed to set permissions for private key: %w\nOutput: %s", err, output) + } + + // Upload config.yml to /home/cloud/config.yml + remoteConfigPath := "/home/cloud/config.yml" + if err := sshClient.Upload(ctx, configPath, remoteConfigPath); err != nil { + return fmt.Errorf("failed to upload config.yml to %s: %w", remoteConfigPath, err) + } + + return nil +} diff --git a/pkg/cluster/vms.go b/pkg/cluster/vms.go index 0ae3a35..74bbb1e 100644 --- a/pkg/cluster/vms.go +++ b/pkg/cluster/vms.go @@ -36,8 +36,8 @@ type VMResources struct { VirtClient *virtualization.Client Namespace string VMNames []string - CVMINames []string - SetupVMName string // Name of the setup VM (always created) + CVMINames []string // ClusterVirtualImage names (cluster-scoped) + SetupVMName string // Name of the setup VM (always created) } // CreateVirtualMachines creates virtual machines from cluster definition. @@ -415,62 +415,19 @@ runcmd: `, sshPubKey, hostname) } -// CleanupVMResources forcefully stops and deletes virtual machines, virtual disks, and cluster virtual images. -// If a ClusterVirtualImage is in use by other resources, it will be skipped but VMs and VDs will still be deleted. -func CleanupVMResources(ctx context.Context, resources *VMResources) error { +// RemoveAllVMs forcefully stops and deletes virtual machines, virtual disks, and virtual images. +// If a VirtualImage is in use by other resources, it will be skipped but VMs and VDs will still be deleted. +func RemoveAllVMs(ctx context.Context, resources *VMResources) error { if resources == nil { return fmt.Errorf("resources cannot be nil") } - // Step 1: Forcefully stop and delete Virtual Machines + // Delete all VMs using RemoveVM for _, vmName := range resources.VMNames { - // Try to stop the VM by updating RunPolicy to Manual or by deleting directly - // Deletion will stop the VM automatically - err := resources.VirtClient.VirtualMachines().Delete(ctx, resources.Namespace, vmName) - if err != nil && !errors.IsNotFound(err) { - // Log but continue - we'll try to clean up other resources - fmt.Printf("Warning: Failed to delete VM %s/%s: %v\n", resources.Namespace, vmName, err) - } - } - - // Step 2: Delete Virtual Disks - // Delete system disks for our VMs - for _, vmName := range resources.VMNames { - systemDiskName := fmt.Sprintf("%s-system", vmName) - err := resources.VirtClient.VirtualDisks().Delete(ctx, resources.Namespace, systemDiskName) - if err != nil && !errors.IsNotFound(err) { - fmt.Printf("Warning: Failed to delete VirtualDisk %s/%s: %v\n", resources.Namespace, systemDiskName, err) - } - } - - // Step 3: Check which ClusterVirtualImages are in use and delete those that aren't - // Get all VirtualDisks across all namespaces to check for CVMI usage - allVDisksAllNS, err := resources.VirtClient.VirtualDisks().List(ctx, "") - if err != nil { - fmt.Printf("Warning: Failed to list VirtualDisks across all namespaces: %v\n", err) - allVDisksAllNS = []v1alpha2.VirtualDisk{} - } - - // Build a map of CVMI names that are in use - cvmiInUse := make(map[string]bool) - for _, vd := range allVDisksAllNS { - if vd.Spec.DataSource != nil && vd.Spec.DataSource.ObjectRef != nil { - if vd.Spec.DataSource.ObjectRef.Kind == "ClusterVirtualImage" { - cvmiInUse[vd.Spec.DataSource.ObjectRef.Name] = true - } - } - } - - // Delete ClusterVirtualImages that are not in use - for _, cvmiName := range resources.CVMINames { - if cvmiInUse[cvmiName] { - fmt.Printf("Skipping deletion of ClusterVirtualImage %s: still in use by other resources\n", cvmiName) - continue - } - - err := resources.VirtClient.ClusterVirtualImages().Delete(ctx, cvmiName) - if err != nil && !errors.IsNotFound(err) { - fmt.Printf("Warning: Failed to delete ClusterVirtualImage %s: %v\n", cvmiName, err) + err := RemoveVM(ctx, resources.VirtClient, resources.Namespace, vmName) + if err != nil { + // Log but continue - we'll try to clean up other VMs + fmt.Printf("Warning: Failed to remove VM %s/%s: %v\n", resources.Namespace, vmName, err) } } @@ -505,8 +462,153 @@ func GetVMIPAddress(ctx context.Context, virtClient *virtualization.Client, name return vm.Status.IPAddress, nil } +// RemoveVM removes a VM and its associated VirtualDisks, then removes the ClusterVirtualImage if not used by other VMs. +// It removes resources in order: VM -> VirtualDisks -> ClusterVirtualImage (if unused). +func RemoveVM(ctx context.Context, virtClient *virtualization.Client, namespace, vmName string) error { + // Step 1: Get VM to find associated VirtualDisks + vm, err := virtClient.VirtualMachines().Get(ctx, namespace, vmName) + if err != nil { + if errors.IsNotFound(err) { + // VM doesn't exist, nothing to clean up + return nil + } + return fmt.Errorf("failed to get VM %s/%s: %w", namespace, vmName, err) + } + + // Collect VirtualDisk names from VM's BlockDeviceRefs + vdNames := make([]string, 0) + for _, bdRef := range vm.Spec.BlockDeviceRefs { + if bdRef.Kind == v1alpha2.DiskDevice { + vdNames = append(vdNames, bdRef.Name) + } + } + + // Step 2: Collect ClusterVirtualImage names from VirtualDisks before deleting them + cvmiNamesSet := make(map[string]bool) + for _, vdName := range vdNames { + vd, err := virtClient.VirtualDisks().Get(ctx, namespace, vdName) + if err != nil { + if errors.IsNotFound(err) { + continue // Already deleted + } + // Log but continue + fmt.Printf("Warning: Failed to get VirtualDisk %s/%s: %v\n", namespace, vdName, err) + continue + } + + if vd.Spec.DataSource != nil && vd.Spec.DataSource.ObjectRef != nil { + if vd.Spec.DataSource.ObjectRef.Kind == "ClusterVirtualImage" { + cvmiNamesSet[vd.Spec.DataSource.ObjectRef.Name] = true + } + } + } + + // Step 3: Delete the VM + err = virtClient.VirtualMachines().Delete(ctx, namespace, vmName) + if err != nil && !errors.IsNotFound(err) { + return fmt.Errorf("failed to delete VM %s/%s: %w", namespace, vmName, err) + } + + // Step 3.5: Wait for VM to be fully deleted before deleting VirtualDisks + // Kubernetes deletion is asynchronous, so we need to wait until the VM is gone + for { + _, err := virtClient.VirtualMachines().Get(ctx, namespace, vmName) + if errors.IsNotFound(err) { + // VirtualMachine is fully deleted + break + } + if err != nil { + // Some other error occurred, log and break to avoid infinite loop + fmt.Printf("Warning: Error checking if VirtualMachine %s/%s is deleted: %v\n", namespace, vmName, err) + break + } + // Wait a bit before checking again + select { + case <-ctx.Done(): + return fmt.Errorf("context cancelled while waiting for VM %s/%s to be deleted: %w", namespace, vmName, ctx.Err()) + case <-time.After(5 * time.Second): + // Continue polling + } + } + + // Step 4: Delete all VirtualDisks associated with this VM + deletedVDNames := make(map[string]bool) + for _, vdName := range vdNames { + err := virtClient.VirtualDisks().Delete(ctx, namespace, vdName) + if err != nil && !errors.IsNotFound(err) { + fmt.Printf("Warning: Failed to delete VirtualDisk %s/%s: %v\n", namespace, vdName, err) + } else { + deletedVDNames[vdName] = true + } + } + + // Step 4.5: Wait for all VirtualDisks to be fully deleted before checking ClusterVirtualImage usage + // Poll until all VirtualDisks we deleted are no longer present + for len(deletedVDNames) > 0 { + allDeleted := true + for vdName := range deletedVDNames { + _, err := virtClient.VirtualDisks().Get(ctx, namespace, vdName) + if errors.IsNotFound(err) { + // VirtualDisk is fully deleted, remove from tracking + delete(deletedVDNames, vdName) + } else if err != nil { + // Some other error occurred, log and remove from tracking to avoid infinite loop + fmt.Printf("Warning: Error checking if VirtualDisk %s/%s is deleted: %v\n", namespace, vdName, err) + delete(deletedVDNames, vdName) + } else { + // VirtualDisk still exists + allDeleted = false + } + } + if allDeleted { + break + } + // Wait a bit before checking again + select { + case <-ctx.Done(): + return fmt.Errorf("context cancelled while waiting for VirtualDisks to be deleted: %w", ctx.Err()) + case <-time.After(5 * time.Second): + // Continue polling + } + } + + // Step 5: Check if ClusterVirtualImages are still in use by other VirtualDisks in the namespace and delete if not + // Note: Since CVMI is cluster-scoped, it could be used by VDs in other namespaces too, + // but for simplicity we only check within the current namespace + allVDs, err := virtClient.VirtualDisks().List(ctx, namespace) + if err != nil { + fmt.Printf("Warning: Failed to list VirtualDisks to check ClusterVirtualImage usage: %v\n", err) + allVDs = []v1alpha2.VirtualDisk{} + } + + // Build map of ClusterVirtualImages that are still in use + cvmiInUse := make(map[string]bool) + for _, vd := range allVDs { + if vd.Spec.DataSource != nil && vd.Spec.DataSource.ObjectRef != nil { + if vd.Spec.DataSource.ObjectRef.Kind == "ClusterVirtualImage" { + cvmiInUse[vd.Spec.DataSource.ObjectRef.Name] = true + } + } + } + + // Delete ClusterVirtualImages that are not in use (cluster-scoped, no namespace) + for cvmiName := range cvmiNamesSet { + if cvmiInUse[cvmiName] { + continue // Still in use, skip deletion + } + + err := virtClient.ClusterVirtualImages().Delete(ctx, cvmiName) + if err != nil && !errors.IsNotFound(err) { + fmt.Printf("Warning: Failed to delete ClusterVirtualImage %s: %v\n", cvmiName, err) + } + } + + return nil +} + // CleanupSetupVM deletes the setup VM and its associated resources. // This should be called after the test cluster bootstrap is complete. +// Deprecated: Use RemoveVM instead. func CleanupSetupVM(ctx context.Context, resources *VMResources) error { if resources == nil { return fmt.Errorf("resources cannot be nil") @@ -515,18 +617,5 @@ func CleanupSetupVM(ctx context.Context, resources *VMResources) error { namespace := resources.Namespace setupVMName := resources.SetupVMName - // Step 1: Delete the setup VM - err := resources.VirtClient.VirtualMachines().Delete(ctx, namespace, setupVMName) - if err != nil && !errors.IsNotFound(err) { - return fmt.Errorf("failed to delete setup VM %s/%s: %w", namespace, setupVMName, err) - } - - // Step 2: Delete the setup VM's system disk - systemDiskName := fmt.Sprintf("%s-system", setupVMName) - err = resources.VirtClient.VirtualDisks().Delete(ctx, namespace, systemDiskName) - if err != nil && !errors.IsNotFound(err) { - return fmt.Errorf("failed to delete setup VM system disk %s/%s: %w", namespace, systemDiskName, err) - } - - return nil + return RemoveVM(ctx, resources.VirtClient, namespace, setupVMName) } diff --git a/tests/cluster-creation-by-steps/cluster_creation_test.go b/tests/cluster-creation-by-steps/cluster_creation_test.go index da193fb..91fbd6b 100644 --- a/tests/cluster-creation-by-steps/cluster_creation_test.go +++ b/tests/cluster-creation-by-steps/cluster_creation_test.go @@ -19,6 +19,9 @@ package integration import ( "context" "fmt" + "os" + "path/filepath" + "strings" "time" "k8s.io/client-go/rest" @@ -48,6 +51,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { module *deckhouse.Module virtClient *virtualization.Client vmResources *cluster.VMResources + bootstrapConfig string ) BeforeAll(func() { @@ -69,7 +73,36 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { // DeferCleanup: Clean up all resources in reverse order of creation (it's a synonym for AfterAll) DeferCleanup(func() { - // Step 1: Close setup SSH client connection + // Step 1: Cleanup setup VM (needs API access via SSH tunnel) + vmRes := vmResources + if vmRes != nil && vmRes.SetupVMName != "" { + GinkgoWriter.Printf(" ▶️ Removing setup VM %s...\n", vmRes.SetupVMName) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + err := cluster.RemoveVM(ctx, vmRes.VirtClient, vmRes.Namespace, vmRes.SetupVMName) + if err != nil { + GinkgoWriter.Printf(" ⚠️ Warning: Failed to remove setup VM: %v\n", err) + } else { + GinkgoWriter.Printf(" ✅ Setup VM removed successfully\n") + } + } + + // Step 2: Cleanup test cluster VMs if enabled + if config.TestClusterCleanup == "true" || config.TestClusterCleanup == "True" { + if vmRes != nil { + GinkgoWriter.Printf(" ▶️ Cleaning up test cluster VMs...\n") + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + defer cancel() + err := cluster.RemoveAllVMs(ctx, vmRes) + if err != nil { + GinkgoWriter.Printf(" ⚠️ Warning: Failed to cleanup test cluster VMs: %v\n", err) + } else { + GinkgoWriter.Printf(" ✅ Test cluster VMs cleaned up successfully\n") + } + } + } + + // Step 3: Close setup SSH client connection (no longer needed after VM cleanup) if setupSSHClient != nil { GinkgoWriter.Printf(" ▶️ Closing setup SSH client connection...\n") err := setupSSHClient.Close() @@ -80,7 +113,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { } } - // Step 2: Stop base cluster SSH tunnel (must be done before closing SSH client) + // Step 4: Stop base cluster SSH tunnel (must be done before closing SSH client) if tunnelinfo != nil && tunnelinfo.StopFunc != nil { GinkgoWriter.Printf(" ▶️ Stopping base cluster SSH tunnel on local port %d...\n", tunnelinfo.LocalPort) err := tunnelinfo.StopFunc() @@ -91,7 +124,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { } } - // Step 3: Close base cluster SSH client connection + // Step 5: Close base cluster SSH client connection if sshclient != nil { GinkgoWriter.Printf(" ▶️ Closing base cluster SSH client connection...\n") err := sshclient.Close() @@ -101,31 +134,11 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { GinkgoWriter.Printf(" ✅ Base cluster SSH client closed successfully\n") } } - - // Step 4: Cleanup test cluster VMs if enabled - // Note: vmResources is set in the test below, so we capture it in the closure - vmRes := vmResources - if config.TestClusterCleanup == "true" || config.TestClusterCleanup == "True" { - if vmRes != nil { - GinkgoWriter.Printf(" ▶️ Cleaning up test cluster VMs...\n") - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) - defer cancel() - err := cluster.CleanupVMResources(ctx, vmRes) - if err != nil { - GinkgoWriter.Printf(" ⚠️ Warning: Failed to cleanup test cluster VMs: %v\n", err) - } else { - GinkgoWriter.Printf(" ✅ Test cluster VMs cleaned up successfully\n") - } - } - } - - // Note: kubeconfig and kubeconfigPath are just config/file paths, no cleanup needed - // The kubeconfig file is stored in temp/ directory and can be kept for debugging }) }) // BeforeAll - // Stage 2: Establish SSH connection to base cluster (reused for getting kubeconfig) + // Step 3: Establish SSH connection to base cluster (reused for getting kubeconfig) It("should establish ssh connection to the base cluster", func() { By(fmt.Sprintf("Connecting to %s@%s using key %s", config.SSHUser, config.SSHHost, config.SSHKeyPath), func() { GinkgoWriter.Printf(" ▶️ Creating SSH client for %s@%s\n", config.SSHUser, config.SSHHost) @@ -135,8 +148,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) }) - // Stage 3: Getting kubeconfig from base cluster (reusing SSH connection to avoid double passphrase prompt) - + // Step 4: Getting kubeconfig from base cluster (reusing SSH connection to avoid double passphrase prompt) It("should get kubeconfig from the base cluster", func() { By("Retrieving kubeconfig from base cluster", func() { GinkgoWriter.Printf(" ▶️ Fetching kubeconfig from %s\n", config.SSHHost) @@ -148,8 +160,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) }) - // Stage 4: Establish SSH tunnel with port forwarding - + // Step 5: Establish SSH tunnel with port forwarding to access Kubernetes API It("should establish ssh tunnel to the base cluster with port forwarding", func() { By("Setting up SSH tunnel with port forwarding", func() { GinkgoWriter.Printf(" ▶️ Establishing SSH tunnel to %s, forwarding port 6445\n", config.SSHHost) @@ -163,6 +174,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) }) + // Step 6: Verify virtualization module is Ready before creating VMs It("should make sure that virtualization module is Ready", func() { By("Checking if virtualization module is Ready", func() { GinkgoWriter.Printf(" ▶️ Getting module with timeout\n") @@ -176,6 +188,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) }) + // Step 7: Create test namespace if it doesn't exist It("should ensure test namespace exists", func() { By("Checking and creating test namespace if needed", func() { ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) @@ -191,6 +204,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) }) + // Step 8: Create virtual machines and wait for them to become Running It("should create virtual machines from cluster definition", func() { By("Creating virtual machines", func() { ctx, cancel := context.WithTimeout(context.Background(), 25*time.Minute) @@ -212,23 +226,33 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { Expect(err).NotTo(HaveOccurred(), "Failed to create virtual machines") GinkgoWriter.Printf(" ✅ Created %d virtual machines: %v\n", len(vmNames), vmNames) - // Wait for all VMs to become Running - GinkgoWriter.Printf(" ▶️ Waiting for VMs to become Running (timeout: 10 minutes)\n") - for _, vmName := range vmNames { - Eventually(func() (v1alpha2.MachinePhase, error) { + GinkgoWriter.Printf(" ▶️ Waiting for all %d VMs to become Running (total timeout: %v)\n", len(vmNames), config.VMsRunningTimeout) + loggedRunning := make(map[string]bool) + Eventually(func() (bool, error) { + allRunning := true + for _, vmName := range vmNames { vm, err := virtClient.VirtualMachines().Get(ctx, namespace, vmName) if err != nil { - return "", err + return false, fmt.Errorf("failed to get VM %s: %w", vmName, err) } - return vm.Status.Phase, nil - }).WithTimeout(10*time.Minute).WithPolling(10*time.Second).Should(Equal(v1alpha2.MachineRunning), - "VM %s should become Running within 10 minutes", vmName) - GinkgoWriter.Printf(" ✅ VM %s is Running\n", vmName) - } - GinkgoWriter.Printf(" ✅ All VMs are Running\n") + if vm.Status.Phase == v1alpha2.MachineRunning { + if !loggedRunning[vmName] { + GinkgoWriter.Printf(" ✅ VM %s is Running\n", vmName) + loggedRunning[vmName] = true + } + } else { + allRunning = false + } + } + return allRunning, nil + }).WithTimeout(config.VMsRunningTimeout).WithPolling(20*time.Second).Should(BeTrue(), + "All VMs should become Running within %v", config.VMsRunningTimeout) + + GinkgoWriter.Printf(" ✅ All %d VMs are Running\n", len(vmNames)) }) }) + // Step 9: Establish SSH connection to setup node through base cluster master (jump host) It("should establish SSH connection to setup node through base cluster master", func() { By("Stopping current SSH tunnel to base cluster", func() { if tunnelinfo != nil && tunnelinfo.StopFunc != nil { @@ -266,6 +290,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) }) + // Step 10: Install Docker on setup node (required for DKP bootstrap) It("should ensure Docker is installed on the setup node", func() { By("Installing Docker on setup node", func() { ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute) @@ -278,4 +303,62 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) }) + // Step 11: Prepare bootstrap configuration file from template with cluster-specific values + It("should prepare bootstrap config for the setup node", func() { + By("Preparing bootstrap config for the setup node", func() { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + + namespace := config.TestClusterNamespace + + // Get IPs for all VMs (masters, workers, and setup node) + var vmIPs []string + allVMNames := append([]string{}, vmResources.VMNames...) + allVMNames = append(allVMNames, vmResources.SetupVMName) + + GinkgoWriter.Printf(" ▶️ Getting IP addresses for all VMs\n") + for _, vmName := range allVMNames { + vmIP, err := cluster.GetVMIPAddress(ctx, virtClient, namespace, vmName) + Expect(err).NotTo(HaveOccurred(), "Failed to get IP address for VM %s", vmName) + Expect(vmIP).NotTo(BeEmpty(), "VM %s IP address should not be empty", vmName) + vmIPs = append(vmIPs, vmIP) + GinkgoWriter.Printf(" ✅ VM %s has IP: %s\n", vmName, vmIP) + } + + firstMasterHostname := clusterDefinition.Masters[0].Hostname + masterIP, err := cluster.GetVMIPAddress(ctx, virtClient, namespace, firstMasterHostname) + Expect(err).NotTo(HaveOccurred(), "Failed to get IP address for master node %s", firstMasterHostname) + Expect(masterIP).NotTo(BeEmpty(), "Master node %s IP address should not be empty", firstMasterHostname) + GinkgoWriter.Printf(" ✅ Master node %s has IP: %s\n", firstMasterHostname, masterIP) + + GinkgoWriter.Printf(" ▶️ Preparing bootstrap config for the setup node\n") + bootstrapConfig, err = cluster.PrepareBootstrapConfig(clusterDefinition, masterIP, vmIPs) + Expect(err).NotTo(HaveOccurred(), "Failed to prepare bootstrap config for the setup node") + GinkgoWriter.Printf(" ✅ Bootstrap config prepared successfully at: %s\n", bootstrapConfig) + }) + }) + + // Step 12: Upload private key and config.yml to setup node for DKP bootstrap + It("should upload bootstrap files to the setup node", func() { + By("Uploading private key and config.yml to setup node", func() { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + + // Expand SSH key path to handle ~ + keyPath := config.SSHKeyPath + if strings.HasPrefix(keyPath, "~") { + homeDir, err := os.UserHomeDir() + Expect(err).NotTo(HaveOccurred()) + keyPath = filepath.Join(homeDir, strings.TrimPrefix(keyPath, "~/")) + } + + GinkgoWriter.Printf(" ▶️ Uploading bootstrap files to setup node\n") + GinkgoWriter.Printf(" 📁 Private key: %s -> /home/cloud/.ssh/id_rsa\n", keyPath) + GinkgoWriter.Printf(" 📁 Config file: %s -> /home/cloud/config.yml\n", bootstrapConfig) + + err := cluster.UploadBootstrapFiles(ctx, setupSSHClient, keyPath, bootstrapConfig) + Expect(err).NotTo(HaveOccurred(), "Failed to upload bootstrap files to setup node") + GinkgoWriter.Printf(" ✅ Bootstrap files uploaded successfully\n") + }) + }) }) // Describe: Cluster Creation From cf3148d7014de647f13b5d4a976a5238348717cc Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Thu, 18 Dec 2025 16:11:07 +0300 Subject: [PATCH 26/48] Enhance ClusterNode configuration by adding CoreFraction field for CPU core allocation. Update related YAML parsing and VM creation logic to support this new field. Implement devBranch extraction from bootstrap config and add BootstrapCluster function for cluster initialization. Update test configurations to include coreFraction values and improve logging during cluster bootstrap process. --- internal/config/config.go | 15 +- internal/config/types.go | 27 ++-- pkg/cluster/setup.go | 145 ++++++++++++++++++ pkg/cluster/vms.go | 14 +- .../cluster_config.yml | 4 + .../cluster_creation_test.go | 65 ++++++++ 6 files changed, 247 insertions(+), 23 deletions(-) diff --git a/internal/config/config.go b/internal/config/config.go index c80e332..0931f91 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -23,13 +23,14 @@ import "time" // DefaultSetupVM is the default VM configuration of the node that is used for bootstrap of test cluster. // This VM is always created separately and should be deleted after cluster bootstrap. var DefaultSetupVM = ClusterNode{ - Hostname: "bootstrap-node-", - HostType: HostTypeVM, - Role: ClusterRoleSetup, - OSType: OSTypeMap["Ubuntu 22.04 6.2.0-39-generic"], - CPU: 2, - RAM: 4, - DiskSize: 20, + Hostname: "bootstrap-node-", + HostType: HostTypeVM, + Role: ClusterRoleSetup, + OSType: OSTypeMap["Ubuntu 22.04 6.2.0-39-generic"], + CPU: 2, + CoreFraction: func() *int { v := 50; return &v }(), // 50% core fraction + RAM: 4, + DiskSize: 20, } // VMsRunningTimeout is the timeout for waiting for all VMs to become Running state diff --git a/internal/config/types.go b/internal/config/types.go index b9a8533..830b5cb 100644 --- a/internal/config/types.go +++ b/internal/config/types.go @@ -55,9 +55,10 @@ type ClusterNode struct { HostType HostType `yaml:"hostType"` Role ClusterRole `yaml:"role"` // VM-specific fields (only used when HostType == HostTypeVM) - CPU int `yaml:"cpu"` // Required for VM - RAM int `yaml:"ram"` // Required for VM, in GB - DiskSize int `yaml:"diskSize"` // Required for VM, in GB + CPU int `yaml:"cpu"` // Required for VM + CoreFraction *int `yaml:"coreFraction,omitempty"` // Optional for VM, CPU core fraction as percentage (e.g., 50 for 50%). Defaults to 100% if not specified. + RAM int `yaml:"ram"` // Required for VM, in GB + DiskSize int `yaml:"diskSize"` // Required for VM, in GB // Bare-metal specific fields Prepared bool `yaml:"prepared,omitempty"` // Whether the node is already prepared for DKP installation } @@ -101,15 +102,16 @@ const ( func (n *ClusterNode) UnmarshalYAML(value *yaml.Node) error { // Temporary struct with OSType as string for unmarshaling type clusterNodeTmp struct { - Hostname string `yaml:"hostname"` - IPAddress string `yaml:"ipAddress,omitempty"` - OSType string `yaml:"osType"` - HostType string `yaml:"hostType"` - Role string `yaml:"role"` - CPU int `yaml:"cpu"` - RAM int `yaml:"ram"` - DiskSize int `yaml:"diskSize"` - Prepared bool `yaml:"prepared,omitempty"` + Hostname string `yaml:"hostname"` + IPAddress string `yaml:"ipAddress,omitempty"` + OSType string `yaml:"osType"` + HostType string `yaml:"hostType"` + Role string `yaml:"role"` + CPU int `yaml:"cpu"` + CoreFraction *int `yaml:"coreFraction,omitempty"` + RAM int `yaml:"ram"` + DiskSize int `yaml:"diskSize"` + Prepared bool `yaml:"prepared,omitempty"` } var tmp clusterNodeTmp @@ -142,6 +144,7 @@ func (n *ClusterNode) UnmarshalYAML(value *yaml.Node) error { n.HostType = hostType n.Role = role n.CPU = tmp.CPU + n.CoreFraction = tmp.CoreFraction n.RAM = tmp.RAM n.DiskSize = tmp.DiskSize n.Prepared = tmp.Prepared diff --git a/pkg/cluster/setup.go b/pkg/cluster/setup.go index 33358ef..942d65a 100644 --- a/pkg/cluster/setup.go +++ b/pkg/cluster/setup.go @@ -26,6 +26,8 @@ import ( "strings" "text/template" + "gopkg.in/yaml.v3" + "github.com/deckhouse/storage-e2e/internal/config" "github.com/deckhouse/storage-e2e/internal/infrastructure/ssh" ) @@ -320,3 +322,146 @@ func UploadBootstrapFiles(ctx context.Context, sshClient ssh.SSHClient, privateK return nil } + +// getDevBranchFromConfig reads the devBranch value from the bootstrap config.yml file. +// It parses the YAML and extracts the devBranch from the InitConfiguration section. +func getDevBranchFromConfig(configPath string) (string, error) { + if configPath == "" { + return "", fmt.Errorf("configPath cannot be empty") + } + + data, err := os.ReadFile(configPath) + if err != nil { + return "", fmt.Errorf("failed to read config file %s: %w", configPath, err) + } + + // Parse YAML documents (the file contains multiple YAML documents separated by ---) + documents := strings.Split(string(data), "---") + for _, doc := range documents { + doc = strings.TrimSpace(doc) + if doc == "" { + continue + } + + var initConfig struct { + APIVersion string `yaml:"apiVersion"` + Kind string `yaml:"kind"` + Deckhouse struct { + DevBranch string `yaml:"devBranch"` + } `yaml:"deckhouse"` + } + + if err := yaml.Unmarshal([]byte(doc), &initConfig); err != nil { + continue // Skip documents that don't match + } + + // Check if this is an InitConfiguration + if initConfig.Kind == "InitConfiguration" && initConfig.Deckhouse.DevBranch != "" { + return initConfig.Deckhouse.DevBranch, nil + } + } + + return "", fmt.Errorf("devBranch not found in config file %s", configPath) +} + +// BootstrapCluster bootstraps a Kubernetes cluster from the setup node to the first master node. +// It performs the following steps: +// 1. Logs into the Docker registry using DKP_LICENSE_KEY from config +// 2. Runs the dhctl bootstrap command in a Docker container (can take up to 30 minutes) +// The function uses sudo to run commands as root on the setup node. +// It uses config.VMSSHUser and config.DKPLicenseKey from the config package. +// The install image is constructed from registryRepo and the devBranch read from configPath. +func BootstrapCluster(ctx context.Context, sshClient ssh.SSHClient, clusterDef *config.ClusterDefinition, masterIP string, configPath string) error { + if sshClient == nil { + return fmt.Errorf("sshClient cannot be nil") + } + if clusterDef == nil { + return fmt.Errorf("clusterDef cannot be nil") + } + if masterIP == "" { + return fmt.Errorf("masterIP cannot be empty") + } + if configPath == "" { + return fmt.Errorf("configPath cannot be empty") + } + if config.VMSSHUser == "" { + return fmt.Errorf("VMSSHUser cannot be empty in config") + } + if config.DKPLicenseKey == "" { + return fmt.Errorf("DKPLicenseKey cannot be empty in config") + } + + // Extract registry hostname from registry repo URL + // Example: "dev-registry.deckhouse.io/sys/deckhouse-oss" -> "dev-registry.deckhouse.io" + registryRepo := clusterDef.DKPParameters.RegistryRepo + if registryRepo == "" { + return fmt.Errorf("registryRepo cannot be empty in cluster definition") + } + registryHostname := strings.Split(registryRepo, "/")[0] + if registryHostname == "" { + return fmt.Errorf("failed to extract hostname from registry repo: %s", registryRepo) + } + + // Read devBranch from config file + // Example: "dev-registry.deckhouse.io/sys/deckhouse-oss" + "/install:" + "main" = "dev-registry.deckhouse.io/sys/deckhouse-oss/install:main" + devBranch, err := getDevBranchFromConfig(configPath) + if err != nil { + return fmt.Errorf("failed to get devBranch from config: %w", err) + } + + // Step 1: Login to Docker registry + // Command: echo "$DKP_LICENSE_KEY" | docker login -u license-token --password-stdin $REGISTRY_HOSTNAME + loginCmd := fmt.Sprintf("echo \"%s\" | sudo docker login -u license-token --password-stdin %s", config.DKPLicenseKey, registryHostname) + output, err := sshClient.Exec(ctx, loginCmd) + if err != nil { + return fmt.Errorf("failed to login to Docker registry %s: %w\nOutput: %s", registryHostname, err, output) + } + + // Determine log file path: configPath is in temp//config.yml, so log goes to temp//bootstrap.log + configDir := filepath.Dir(configPath) + logFilePath := filepath.Join(configDir, "bootstrap.log") + remoteLogPath := fmt.Sprintf("/tmp/bootstrap-%d.log", os.Getpid()) // Use unique name to avoid conflicts + + // Step 2: Run dhctl bootstrap command with output redirected to log file + // Note: Removed -it flags since output is redirected and we don't need interactive terminal + // Command: docker run --network=host --pull=always -v "/home/$VM_SSH_User/config.yml:/config.yml" -v "/home/$VM_SSH_User/.ssh:/tmp/.ssh" $REGISTRY_REPO/install:$DEV_BRANCH dhctl bootstrap --ssh-host=$IP_OF_MASTER_VM --ssh-user=$VM_SSH_User --ssh-agent-private-keys=/tmp/.ssh/id_rsa --config=/config.yml > $REMOTE_LOG_PATH 2>&1 + installImage := fmt.Sprintf("%s/install:%s", registryRepo, devBranch) + bootstrapCmd := fmt.Sprintf( + "sudo docker run --network=host --pull=always -v \"/home/%s/config.yml:/config.yml\" -v \"/home/%s/.ssh:/tmp/.ssh\" %s dhctl bootstrap --ssh-host=%s --ssh-user=%s --ssh-agent-private-keys=/tmp/.ssh/id_rsa --config=/config.yml > %s 2>&1", + config.VMSSHUser, config.VMSSHUser, installImage, masterIP, config.VMSSHUser, remoteLogPath, + ) + + // Run the bootstrap command (this can take up to 30 minutes) + // Output is redirected to remote log file, so output variable will be empty + output, err = sshClient.Exec(ctx, bootstrapCmd) + + // Always download log file from remote host (whether success or failure) + // Use sudo cat since the log file was created with sudo + logContent, logErr := sshClient.Exec(ctx, fmt.Sprintf("sudo cat %s 2>/dev/null || echo ''", remoteLogPath)) + + // Save log file locally + if logErr == nil && logContent != "" { + // Create local log file directory if it doesn't exist + if mkdirErr := os.MkdirAll(configDir, 0755); mkdirErr == nil { + // Write log content to local file + _ = os.WriteFile(logFilePath, []byte(logContent), 0644) + } + } + + // Clean up remote log file + _, _ = sshClient.Exec(ctx, fmt.Sprintf("sudo rm -f %s", remoteLogPath)) + + // If bootstrap failed, include log content in error + if err != nil { + baseErr := fmt.Errorf("failed to bootstrap cluster: %w", err) + if logContent != "" { + return fmt.Errorf("%w\n\nBootstrap log saved to: %s\n\nBootstrap log content:\n%s", baseErr, logFilePath, logContent) + } else if output != "" { + // Fallback to output if log file wasn't available + return fmt.Errorf("%w\n\nOutput: %s", baseErr, output) + } + return baseErr + } + + return nil +} diff --git a/pkg/cluster/vms.go b/pkg/cluster/vms.go index 74bbb1e..2f14e95 100644 --- a/pkg/cluster/vms.go +++ b/pkg/cluster/vms.go @@ -316,10 +316,16 @@ func createVM(ctx context.Context, virtClient *virtualization.Client, namespace OsType: v1alpha2.OsType("Generic"), Bootloader: v1alpha2.BootloaderType("BIOS"), LiveMigrationPolicy: v1alpha2.LiveMigrationPolicy("PreferSafe"), - CPU: v1alpha2.CPUSpec{ - Cores: node.CPU, - CoreFraction: "100%", - }, + CPU: func() v1alpha2.CPUSpec { + coreFraction := "100%" // Default to 100% + if node.CoreFraction != nil { + coreFraction = fmt.Sprintf("%d%%", *node.CoreFraction) + } + return v1alpha2.CPUSpec{ + Cores: node.CPU, + CoreFraction: coreFraction, + } + }(), Memory: v1alpha2.MemorySpec{ Size: memoryQuantity, }, diff --git a/tests/cluster-creation-by-steps/cluster_config.yml b/tests/cluster-creation-by-steps/cluster_config.yml index 7c3e8c4..1e6fd65 100644 --- a/tests/cluster-creation-by-steps/cluster_config.yml +++ b/tests/cluster-creation-by-steps/cluster_config.yml @@ -6,6 +6,7 @@ clusterDefinition: role: "master" osType: "Ubuntu 22.04 6.2.0-39-generic" cpu: 4 + coreFraction: 50 ram: 8 diskSize: 30 workers: # Worker nodes configuration // TODO implement logic allowing to deploy different number of workes and masters with the same config. @@ -14,6 +15,7 @@ clusterDefinition: role: "worker" osType: "RedOS 8.0 6.6.26-1.red80.x86_64" cpu: 2 + coreFraction: 50 ram: 6 diskSize: 30 - hostname: "worker-2" @@ -21,6 +23,7 @@ clusterDefinition: role: "worker" osType: "RedOS 7.3.6 5.15.78-2.el7.3.x86_64" cpu: 2 + coreFraction: 50 ram: 6 diskSize: 30 - hostname: "worker-3" @@ -28,6 +31,7 @@ clusterDefinition: role: "worker" osType: "Ubuntu 24.04 6.8.0-53-generic" cpu: 2 + coreFraction: 50 ram: 6 diskSize: 30 # DKP parameters diff --git a/tests/cluster-creation-by-steps/cluster_creation_test.go b/tests/cluster-creation-by-steps/cluster_creation_test.go index 91fbd6b..3b574d5 100644 --- a/tests/cluster-creation-by-steps/cluster_creation_test.go +++ b/tests/cluster-creation-by-steps/cluster_creation_test.go @@ -62,6 +62,48 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { GinkgoWriter.Printf(" ✅ Environment variables validated successfully\n") }) + By("Outputting environment variables without default values", func() { + GinkgoWriter.Printf(" 📋 Environment variables (without default values):\n") + + // Helper function to mask sensitive values + maskValue := func(value string, mask bool) string { + if mask && len(value) > 5 { + return value[:5] + "***" + } + return value + } + + // DKP_LICENSE_KEY - mask first 5 characters + if config.DKPLicenseKey != "" { + GinkgoWriter.Printf(" DKP_LICENSE_KEY: %s\n", maskValue(config.DKPLicenseKey, true)) + } + + // REGISTRY_DOCKER_CFG - mask first 5 characters + if config.RegistryDockerCfg != "" { + GinkgoWriter.Printf(" REGISTRY_DOCKER_CFG: %s\n", maskValue(config.RegistryDockerCfg, true)) + } + + // TEST_CLUSTER_CREATE_MODE - no masking + if config.TestClusterCreateMode != "" { + GinkgoWriter.Printf(" TEST_CLUSTER_CREATE_MODE: %s\n", config.TestClusterCreateMode) + } + + // TEST_CLUSTER_CLEANUP - no masking + if config.TestClusterCleanup != "" { + GinkgoWriter.Printf(" TEST_CLUSTER_CLEANUP: %s\n", config.TestClusterCleanup) + } + + // SSH_PASSPHRASE - no masking (optional, may be empty) + if config.SSHPassphrase != "" { + GinkgoWriter.Printf(" SSH_PASSPHRASE: \n") + } + + // KUBE_CONFIG_PATH - no masking (optional, may be empty) + if config.KubeConfigPath != "" { + GinkgoWriter.Printf(" KUBE_CONFIG_PATH: %s\n", config.KubeConfigPath) + } + }) + // Stage 1: LoadConfig - verifies and parses the config from yaml file By("LoadConfig: Loading and verifying cluster configuration from YAML", func() { yamlConfigFilename := config.YAMLConfigFilename @@ -361,4 +403,27 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { GinkgoWriter.Printf(" ✅ Bootstrap files uploaded successfully\n") }) }) + + // Step 13: Bootstrap cluster from setup node to first master node + It("should bootstrap cluster from setup node to first master", func() { + By("Bootstrapping cluster from setup node", func() { + ctx, cancel := context.WithTimeout(context.Background(), 35*time.Minute) + defer cancel() + + namespace := config.TestClusterNamespace + firstMasterHostname := clusterDefinition.Masters[0].Hostname + + // Get master IP address + masterIP, err := cluster.GetVMIPAddress(ctx, virtClient, namespace, firstMasterHostname) + Expect(err).NotTo(HaveOccurred(), "Failed to get IP address for master node %s", firstMasterHostname) + Expect(masterIP).NotTo(BeEmpty(), "Master node %s IP address should not be empty", firstMasterHostname) + + GinkgoWriter.Printf(" ▶️ Bootstrapping cluster from setup node to master %s (%s)\n", firstMasterHostname, masterIP) + GinkgoWriter.Printf(" ⏱️ This may take up to 30 minutes...\n") + + err = cluster.BootstrapCluster(ctx, setupSSHClient, clusterDefinition, masterIP, bootstrapConfig) + Expect(err).NotTo(HaveOccurred(), "Failed to bootstrap cluster") + GinkgoWriter.Printf(" ✅ Cluster bootstrap completed successfully\n") + }) + }) }) // Describe: Cluster Creation From 964b1cbc4b6783d0ecf73a48c6b27bd7f1503d18 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Thu, 18 Dec 2025 18:07:35 +0300 Subject: [PATCH 27/48] Bootstrap worked --- pkg/cluster/setup.go | 73 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 67 insertions(+), 6 deletions(-) diff --git a/pkg/cluster/setup.go b/pkg/cluster/setup.go index 942d65a..aef9867 100644 --- a/pkg/cluster/setup.go +++ b/pkg/cluster/setup.go @@ -420,21 +420,82 @@ func BootstrapCluster(ctx context.Context, sshClient ssh.SSHClient, clusterDef * // Determine log file path: configPath is in temp//config.yml, so log goes to temp//bootstrap.log configDir := filepath.Dir(configPath) logFilePath := filepath.Join(configDir, "bootstrap.log") - remoteLogPath := fmt.Sprintf("/tmp/bootstrap-%d.log", os.Getpid()) // Use unique name to avoid conflicts + remoteLogPath := fmt.Sprintf("/tmp/bootstrap-%d.log", os.Getpid()) // Use unique name to avoid conflicts + agentSocketPath := fmt.Sprintf("/tmp/ssh-agent-%d.sock", os.Getpid()) // Unique agent socket path + + // Step 2: Setup ssh-agent and add the SSH key + // Create a temporary askpass script to provide the passphrase non-interactively + askpassScriptPath := fmt.Sprintf("/tmp/ssh-askpass-%d.sh", os.Getpid()) + askpassScript := fmt.Sprintf(`#!/bin/bash +echo "%s" +`, config.SSHPassphrase) + + // Create the askpass script file on the remote host + createAskpassCmd := fmt.Sprintf("sudo -u %s bash -c 'cat > %s << \"ASKPASS_EOF\"\n%sASKPASS_EOF\nchmod +x %s'", config.VMSSHUser, askpassScriptPath, askpassScript, askpassScriptPath) + _, err = sshClient.Exec(ctx, createAskpassCmd) + if err != nil { + return fmt.Errorf("failed to create askpass script: %w", err) + } + + // Setup ssh-agent and add the key + setupAgentScript := fmt.Sprintf(` + # Start ssh-agent with specified socket path + eval $(ssh-agent -a %s) > /dev/null 2>&1 + export SSH_AUTH_SOCK=%s + export SSH_AGENT_PID=$SSH_AGENT_PID + + # Add the SSH key to the agent using the askpass script + if [ -n "%s" ]; then + DISPLAY=:0 SSH_ASKPASS=%s ssh-add /home/%s/.ssh/id_rsa &1 + else + ssh-add /home/%s/.ssh/id_rsa &1 + fi + + # Output the agent socket path for use in docker command + echo $SSH_AUTH_SOCK + `, agentSocketPath, agentSocketPath, config.SSHPassphrase, askpassScriptPath, config.VMSSHUser, config.VMSSHUser) + + // Run the agent setup script + agentOutput, err := sshClient.Exec(ctx, fmt.Sprintf("sudo -u %s bash -c %s", config.VMSSHUser, fmt.Sprintf("'%s'", setupAgentScript))) + if err != nil { + // Clean up askpass script on error + _, _ = sshClient.Exec(ctx, fmt.Sprintf("sudo rm -f %s", askpassScriptPath)) + return fmt.Errorf("failed to setup ssh-agent: %w\nOutput: %s", err, agentOutput) + } + + // Extract the actual SSH_AUTH_SOCK path from output (last line) + agentSocketLines := strings.Split(strings.TrimSpace(agentOutput), "\n") + actualAgentSocket := agentSocketPath // Default to our specified path + if len(agentSocketLines) > 0 { + lastLine := strings.TrimSpace(agentSocketLines[len(agentSocketLines)-1]) + if lastLine != "" && strings.HasPrefix(lastLine, "/") { + actualAgentSocket = lastLine + } + } + + // Make the socket readable by root (needed when docker runs with sudo) + // This allows the docker process (running as root) to access the socket + chmodCmd := fmt.Sprintf("sudo chmod 666 %s 2>/dev/null || true", actualAgentSocket) + _, _ = sshClient.Exec(ctx, chmodCmd) - // Step 2: Run dhctl bootstrap command with output redirected to log file - // Note: Removed -it flags since output is redirected and we don't need interactive terminal - // Command: docker run --network=host --pull=always -v "/home/$VM_SSH_User/config.yml:/config.yml" -v "/home/$VM_SSH_User/.ssh:/tmp/.ssh" $REGISTRY_REPO/install:$DEV_BRANCH dhctl bootstrap --ssh-host=$IP_OF_MASTER_VM --ssh-user=$VM_SSH_User --ssh-agent-private-keys=/tmp/.ssh/id_rsa --config=/config.yml > $REMOTE_LOG_PATH 2>&1 + // Step 3: Run dhctl bootstrap command with ssh-agent + // Mount SSH_AUTH_SOCK into the container and use it for authentication + // Note: We don't use --ssh-agent-private-keys anymore, dhctl will use SSH_AUTH_SOCK + // Docker needs to run with sudo for access to docker socket installImage := fmt.Sprintf("%s/install:%s", registryRepo, devBranch) bootstrapCmd := fmt.Sprintf( - "sudo docker run --network=host --pull=always -v \"/home/%s/config.yml:/config.yml\" -v \"/home/%s/.ssh:/tmp/.ssh\" %s dhctl bootstrap --ssh-host=%s --ssh-user=%s --ssh-agent-private-keys=/tmp/.ssh/id_rsa --config=/config.yml > %s 2>&1", - config.VMSSHUser, config.VMSSHUser, installImage, masterIP, config.VMSSHUser, remoteLogPath, + "sudo -u %s bash -c 'export SSH_AUTH_SOCK=%s; sudo docker run --network=host --pull=always -v \"/home/%s/config.yml:/config.yml\" -v \"%s:/tmp/ssh-agent.sock\" -e SSH_AUTH_SOCK=/tmp/ssh-agent.sock %s dhctl bootstrap --ssh-host=%s --ssh-user=%s --config=/config.yml > %s 2>&1'", + config.VMSSHUser, actualAgentSocket, config.VMSSHUser, actualAgentSocket, installImage, masterIP, config.VMSSHUser, remoteLogPath, ) // Run the bootstrap command (this can take up to 30 minutes) // Output is redirected to remote log file, so output variable will be empty output, err = sshClient.Exec(ctx, bootstrapCmd) + // Clean up ssh-agent and askpass script after bootstrap (whether success or failure) + cleanupAgentCmd := fmt.Sprintf("sudo -u %s bash -c 'SSH_AUTH_SOCK=%s ssh-agent -k 2>/dev/null || true; rm -f %s %s 2>/dev/null || true'", config.VMSSHUser, actualAgentSocket, actualAgentSocket, askpassScriptPath) + _, _ = sshClient.Exec(ctx, cleanupAgentCmd) + // Always download log file from remote host (whether success or failure) // Use sudo cat since the log file was created with sudo logContent, logErr := sshClient.Exec(ctx, fmt.Sprintf("sudo cat %s 2>/dev/null || echo ''", remoteLogPath)) From f14dba59ef8b3d70ddb9d4179133538a1cfebfc5 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Thu, 18 Dec 2025 18:13:29 +0300 Subject: [PATCH 28/48] Re-establishing ssh tunnel to remove bootstrap VM --- .../cluster_creation_test.go | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/cluster-creation-by-steps/cluster_creation_test.go b/tests/cluster-creation-by-steps/cluster_creation_test.go index 3b574d5..7fdd546 100644 --- a/tests/cluster-creation-by-steps/cluster_creation_test.go +++ b/tests/cluster-creation-by-steps/cluster_creation_test.go @@ -115,6 +115,22 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { // DeferCleanup: Clean up all resources in reverse order of creation (it's a synonym for AfterAll) DeferCleanup(func() { + // Step 0: Re-establish SSH tunnel if needed for VM cleanup + // The tunnel might have been stopped in Step 9, but we need it for VM cleanup + if tunnelinfo == nil && sshclient != nil { + GinkgoWriter.Printf(" ▶️ Re-establishing SSH tunnel for VM cleanup...\n") + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + var tunnelErr error + tunnelinfo, tunnelErr = ssh.EstablishSSHTunnel(ctx, sshclient, "6445") + cancel() + if tunnelErr != nil { + GinkgoWriter.Printf(" ⚠️ Warning: Failed to re-establish SSH tunnel: %v\n", tunnelErr) + GinkgoWriter.Printf(" ⚠️ VM cleanup will be skipped due to missing tunnel\n") + } else { + GinkgoWriter.Printf(" ✅ SSH tunnel re-established on local port: %d\n", tunnelinfo.LocalPort) + } + } + // Step 1: Cleanup setup VM (needs API access via SSH tunnel) vmRes := vmResources if vmRes != nil && vmRes.SetupVMName != "" { From fcda22aa026db6a66427fa9ae5f26e810b1796eb Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Thu, 18 Dec 2025 18:40:44 +0300 Subject: [PATCH 29/48] Connection to test cluster established and simple health check implemented. --- internal/kubernetes/apps/deployment.go | 36 +++++- internal/kubernetes/core/pod.go | 61 ++++++++- pkg/cluster/cluster.go | 121 ++++++++++++++++++ .../cluster_creation_test.go | 37 ++++++ 4 files changed, 253 insertions(+), 2 deletions(-) diff --git a/internal/kubernetes/apps/deployment.go b/internal/kubernetes/apps/deployment.go index c7b48b7..c35a7d4 100644 --- a/internal/kubernetes/apps/deployment.go +++ b/internal/kubernetes/apps/deployment.go @@ -16,5 +16,39 @@ limitations under the License. package apps -// TODO: Implement deployment operations +import ( + "context" + "fmt" + appsv1 "k8s.io/api/apps/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" +) + +// DeploymentClient provides operations on Deployment resources +type DeploymentClient interface { + Get(ctx context.Context, namespace, name string) (*appsv1.Deployment, error) +} + +type deploymentClient struct { + client kubernetes.Interface +} + +// NewDeploymentClient creates a new deployment client from a rest.Config +func NewDeploymentClient(config *rest.Config) (DeploymentClient, error) { + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to create kubernetes clientset: %w", err) + } + return &deploymentClient{client: clientset}, nil +} + +// Get retrieves a deployment by namespace and name +func (c *deploymentClient) Get(ctx context.Context, namespace, name string) (*appsv1.Deployment, error) { + deployment, err := c.client.AppsV1().Deployments(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to get deployment %s/%s: %w", namespace, name, err) + } + return deployment, nil +} diff --git a/internal/kubernetes/core/pod.go b/internal/kubernetes/core/pod.go index f3e160c..a1f00de 100644 --- a/internal/kubernetes/core/pod.go +++ b/internal/kubernetes/core/pod.go @@ -16,5 +16,64 @@ limitations under the License. package core -// TODO: Implement pod operations +import ( + "context" + "fmt" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" +) + +// PodClient provides operations on Pod resources +type PodClient interface { + ListByLabelSelector(ctx context.Context, namespace, labelSelector string) (*corev1.PodList, error) + IsRunning(ctx context.Context, pod *corev1.Pod) bool + AllContainersReady(ctx context.Context, pod *corev1.Pod) bool +} + +type podClient struct { + client kubernetes.Interface +} + +// NewPodClient creates a new pod client from a rest.Config +func NewPodClient(config *rest.Config) (PodClient, error) { + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to create kubernetes clientset: %w", err) + } + return &podClient{client: clientset}, nil +} + +// ListByLabelSelector lists pods in a namespace matching the label selector +func (c *podClient) ListByLabelSelector(ctx context.Context, namespace, labelSelector string) (*corev1.PodList, error) { + pods, err := c.client.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{ + LabelSelector: labelSelector, + }) + if err != nil { + return nil, fmt.Errorf("failed to list pods in namespace %s with selector %s: %w", namespace, labelSelector, err) + } + return pods, nil +} + +// IsRunning checks if a pod is in Running phase +func (c *podClient) IsRunning(ctx context.Context, pod *corev1.Pod) bool { + return pod.Status.Phase == corev1.PodRunning +} + +// AllContainersReady checks if all containers in a pod are ready +func (c *podClient) AllContainersReady(ctx context.Context, pod *corev1.Pod) bool { + if len(pod.Spec.Containers) == 0 { + return false + } + if len(pod.Status.ContainerStatuses) != len(pod.Spec.Containers) { + return false + } + for _, status := range pod.Status.ContainerStatuses { + if !status.Ready { + return false + } + } + return true +} diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 2da5a7d..2f95050 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -21,11 +21,15 @@ import ( "fmt" "time" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" internalcluster "github.com/deckhouse/storage-e2e/internal/cluster" "github.com/deckhouse/storage-e2e/internal/config" "github.com/deckhouse/storage-e2e/internal/infrastructure/ssh" + "github.com/deckhouse/storage-e2e/internal/kubernetes/apps" + "github.com/deckhouse/storage-e2e/internal/kubernetes/core" ) // TestClusterResources holds all resources created for a test cluster connection @@ -123,3 +127,120 @@ func CleanupTestCluster(resources *TestClusterResources) error { return nil } + +// CheckClusterHealth checks if the deckhouse deployment pod is running with 2/2 ready replicas +// in the d8-system namespace. This function is widely used to check cluster health after certain steps. +func CheckClusterHealth(ctx context.Context, kubeconfig *rest.Config) error { + namespace := "d8-system" + deploymentName := "deckhouse" + + // Create deployment client + deploymentClient, err := apps.NewDeploymentClient(kubeconfig) + if err != nil { + return fmt.Errorf("failed to create deployment client: %w", err) + } + + // Get the deployment + deployment, err := deploymentClient.Get(ctx, namespace, deploymentName) + if err != nil { + return fmt.Errorf("failed to get deployment %s/%s: %w", namespace, deploymentName, err) + } + + // Check if deployment has 2 ready replicas + if deployment.Status.ReadyReplicas != 2 { + return fmt.Errorf("deployment %s/%s has %d ready replicas, expected 2", namespace, deploymentName, deployment.Status.ReadyReplicas) + } + + // Create pod client + podClient, err := core.NewPodClient(kubeconfig) + if err != nil { + return fmt.Errorf("failed to create pod client: %w", err) + } + + // Get pods for the deployment using the deployment's selector + labelSelector := metav1.FormatLabelSelector(deployment.Spec.Selector) + pods, err := podClient.ListByLabelSelector(ctx, namespace, labelSelector) + if err != nil { + return fmt.Errorf("failed to list pods for deployment %s/%s: %w", namespace, deploymentName, err) + } + + // Check that we have exactly 2 pods and both are running + if len(pods.Items) != 1 { + return fmt.Errorf("expected 1 pods for deployment %s/%s, found %d", namespace, deploymentName, len(pods.Items)) + } + + // Check each pod is running and all containers are ready + for _, pod := range pods.Items { + if !podClient.IsRunning(ctx, &pod) { + return fmt.Errorf("pod %s/%s is not running (phase: %s)", namespace, pod.Name, pod.Status.Phase) + } + + if !podClient.AllContainersReady(ctx, &pod) { + return fmt.Errorf("pod %s/%s does not have all containers ready", namespace, pod.Name) + } + } + + return nil +} + +// ConnectToCluster establishes SSH connection to the test cluster master through the base cluster master, +// retrieves kubeconfig, and sets up port forwarding tunnel. +// The SSH tunnel remains active after this function returns (it's stored in the returned resources). +// Returns the test cluster resources including the tunnel that must be kept alive. +// Note: This function does NOT check cluster health - use CheckClusterHealth() for that. +func ConnectToCluster(ctx context.Context, baseSSHClient ssh.SSHClient, testClusterMasterIP string) (*TestClusterResources, error) { + if baseSSHClient == nil { + return nil, fmt.Errorf("baseSSHClient cannot be nil") + } + if testClusterMasterIP == "" { + return nil, fmt.Errorf("testClusterMasterIP cannot be empty") + } + + // Step 1: Create SSH client to test cluster master through base cluster master (jump host) + testSSHClient, err := ssh.NewClientWithJumpHost( + config.SSHUser, config.SSHHost, config.SSHKeyPath, // jump host (base cluster master) + config.VMSSHUser, testClusterMasterIP, config.SSHKeyPath, // target (test cluster master) + ) + if err != nil { + return nil, fmt.Errorf("failed to create SSH client to test cluster master: %w", err) + } + + // Step 2: Establish SSH tunnel with port forwarding 6445:127.0.0.1:6445 + tunnelInfo, err := ssh.EstablishSSHTunnel(ctx, testSSHClient, "6445") + if err != nil { + testSSHClient.Close() + return nil, fmt.Errorf("failed to establish SSH tunnel to test cluster: %w", err) + } + + // Step 3: Get kubeconfig from test cluster master + _, kubeconfigPath, err := internalcluster.GetKubeconfig(ctx, testClusterMasterIP, config.VMSSHUser, config.SSHKeyPath, testSSHClient) + if err != nil { + tunnelInfo.StopFunc() + testSSHClient.Close() + return nil, fmt.Errorf("failed to get kubeconfig from test cluster: %w", err) + } + + // Step 4: Update kubeconfig to use the tunnel port (6445) + if err := internalcluster.UpdateKubeconfigPort(kubeconfigPath, tunnelInfo.LocalPort); err != nil { + tunnelInfo.StopFunc() + testSSHClient.Close() + return nil, fmt.Errorf("failed to update kubeconfig port: %w", err) + } + + // Rebuild rest.Config from updated kubeconfig file + kubeconfig, err := clientcmd.BuildConfigFromFlags("", kubeconfigPath) + if err != nil { + tunnelInfo.StopFunc() + testSSHClient.Close() + return nil, fmt.Errorf("failed to rebuild kubeconfig from file: %w", err) + } + + // Return resources with active tunnel + // Note: The test will use Eventually to check cluster health with CheckClusterHealth + return &TestClusterResources{ + SSHClient: testSSHClient, + Kubeconfig: kubeconfig, + KubeconfigPath: kubeconfigPath, + TunnelInfo: tunnelInfo, + }, nil +} diff --git a/tests/cluster-creation-by-steps/cluster_creation_test.go b/tests/cluster-creation-by-steps/cluster_creation_test.go index 7fdd546..61c9ba8 100644 --- a/tests/cluster-creation-by-steps/cluster_creation_test.go +++ b/tests/cluster-creation-by-steps/cluster_creation_test.go @@ -196,6 +196,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) // BeforeAll + // TODO: Steps 3-5 can be joined into one using cluster.ConnectToCluster function // Step 3: Establish SSH connection to base cluster (reused for getting kubeconfig) It("should establish ssh connection to the base cluster", func() { By(fmt.Sprintf("Connecting to %s@%s using key %s", config.SSHUser, config.SSHHost, config.SSHKeyPath), func() { @@ -442,4 +443,40 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { GinkgoWriter.Printf(" ✅ Cluster bootstrap completed successfully\n") }) }) + + // Step 14: Verify cluster is ready + It("should verify cluster is ready", func() { + By("Verifying cluster is ready", func() { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + defer cancel() + + namespace := config.TestClusterNamespace + firstMasterHostname := clusterDefinition.Masters[0].Hostname + + // Get master IP address + masterIP, err := cluster.GetVMIPAddress(ctx, virtClient, namespace, firstMasterHostname) + Expect(err).NotTo(HaveOccurred(), "Failed to get IP address for master node %s", firstMasterHostname) + Expect(masterIP).NotTo(BeEmpty(), "Master node %s IP address should not be empty", firstMasterHostname) + + GinkgoWriter.Printf(" ▶️ Verifying cluster readiness for master %s (%s)\n", firstMasterHostname, masterIP) + + // Step 1: Establish connection to test cluster (tunnel and kubeconfig) + testClusterResources, err := cluster.ConnectToCluster(ctx, sshclient, masterIP) + Expect(err).NotTo(HaveOccurred(), "Failed to establish connection to test cluster") + Expect(testClusterResources).NotTo(BeNil()) + Expect(testClusterResources.TunnelInfo).NotTo(BeNil(), "Tunnel must remain active") + + GinkgoWriter.Printf(" ✅ Connection established, kubeconfig saved to: %s\n", testClusterResources.KubeconfigPath) + GinkgoWriter.Printf(" ✅ SSH tunnel active on local port: %d\n", testClusterResources.TunnelInfo.LocalPort) + + // Step 2: Check cluster health with Eventually (wait up to 10 minutes for deckhouse to be ready) + GinkgoWriter.Printf(" ⏱️ Waiting for deckhouse deployment to become ready (2/2 pods running)...\n") + Eventually(func() error { + return cluster.CheckClusterHealth(ctx, testClusterResources.Kubeconfig) + }).WithTimeout(10*time.Minute).WithPolling(20*time.Second).Should(Succeed(), + "Deckhouse deployment should have 2/2 pods running within 10 minutes") + + GinkgoWriter.Printf(" ✅ Cluster is ready (deckhouse deployment: 2/2 pods running)\n") + }) + }) }) // Describe: Cluster Creation From 4cabc301d1c8795e72b8d0dd44b3323e0dce392d Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Fri, 19 Dec 2025 12:21:58 +0300 Subject: [PATCH 30/48] Removed unneded interfaces, added stress-tests and fix for CheckClusterHealth - 1 --- 3par_diagnostic_guide.md | 246 ++++ internal/kubernetes/apps/deployment.go | 12 +- internal/kubernetes/core/namespace.go | 15 +- internal/kubernetes/core/pod.go | 18 +- internal/kubernetes/storage/pvc.go | 217 ++- internal/kubernetes/storage/volumesnapshot.go | 181 +++ internal/kubernetes/virtualization/client.go | 20 +- .../virtualization/cluster_virtual_image.go | 25 +- .../kubernetes/virtualization/virtual_disk.go | 25 +- .../virtualization/virtual_image.go | 26 +- .../virtualization/virtual_machine.go | 25 +- .../virtualization/vm_block_device.go | 25 +- pkg/testkit/stress-tests.go | 1233 +++++++++++++++++ .../cluster_creation_test.go | 32 +- 14 files changed, 1974 insertions(+), 126 deletions(-) create mode 100644 3par_diagnostic_guide.md create mode 100644 internal/kubernetes/storage/volumesnapshot.go create mode 100644 pkg/testkit/stress-tests.go diff --git a/3par_diagnostic_guide.md b/3par_diagnostic_guide.md new file mode 100644 index 0000000..119ea74 --- /dev/null +++ b/3par_diagnostic_guide.md @@ -0,0 +1,246 @@ +# HPE 3PAR SAN Diagnostic Guide + +## Current System Status Analysis + +Based on your `showversion` and `showsys` output: + +**System Information:** +- Model: HPE 3PAR 8400 +- Serial: CZ3811SC9P +- Version: 3.3.1 (MU3) with patches P50-P128 +- Nodes: 2 nodes (Master: Node 0) +- Cluster LED: **Off** (this is normal when system is healthy) + +**Capacity Status:** +- Total Capacity: 14,639,104 MB (~14.6 TB) +- Allocated: 4,809,728 MB (~4.8 TB) +- Free: 9,829,376 MB (~9.8 TB) +- Failed: 0 MB ✅ (Good - no failed capacity) + +**Initial Assessment:** The system appears healthy based on basic metrics (no failed capacity, proper node count). + +## Diagnostic Commands to Run + +Run these commands in order to get a complete health picture: + +### 1. System Health and Status +```bash +# Overall system health +showsys -health + +# Detailed system status +showsys -d + +# System statistics +showsys -space + +# Check for any alerts or events +showeventlog -min 60 # Last 60 minutes +showeventlog -alert # All alerts +``` + +### 2. Node Status +```bash +# Check node health +shownode -d + +# Node statistics +shownode -stat + +# Check if all nodes are up and healthy +shownode -state +``` + +### 3. Disk and Cage Status +```bash +# Check all physical disks +showpd -d + +# Check for failed or degraded disks +showpd -failed +showpd -degraded + +# Check disk cage status +showcage -d + +# Check disk statistics +showpd -stat +``` + +### 4. CPG (Common Provisioning Group) Status +```bash +# List all CPGs +showcpg -d + +# Check CPG space usage +showcpg -space + +# Check CPG growth history +showcpg -hist +``` + +### 5. Volume Status +```bash +# List all volumes +showvv -d + +# Check volume statistics +showvv -stat + +# Check for degraded volumes +showvv -degraded + +# Check volume space usage +showvv -space +``` + +### 6. Network and Port Status +```bash +# Check FC/iSCSI ports +showport -d + +# Check port statistics +showport -stat + +# Check for failed ports +showport -failed +``` + +### 7. Service Processor (SP) Status +```bash +# Check service processor status +showsp -d + +# Check SP network connectivity +showsp -net +``` + +### 8. Performance and Statistics +```bash +# System statistics +statcpu +statport +statpd +statvv +statrcopy + +# Real-time statistics (run for 30-60 seconds) +statcpu -iter 1 -rw 1 +statport -iter 1 -rw 1 +``` + +### 9. Replication Status (if configured) +```bash +# Check remote copy status +showrcopy -d + +# Check remote copy groups +showrcopygroup -d +``` + +### 10. Check for Known Issues +```bash +# Check for any service issues +showservice -d + +# Check license status +showlicense + +# Check firmware versions +showversion -d +``` + +## Key Health Indicators to Check + +### ✅ Healthy Indicators: +- All nodes show "UP" status +- No failed physical disks (showpd -failed returns empty) +- No degraded volumes (showvv -degraded returns empty) +- All ports show "Ready" status +- No critical alerts in event log +- Cluster LED is Off (normal) +- Failed capacity is 0 MB + +### ⚠️ Warning Indicators: +- Any node showing "DOWN" or "DEGRADED" +- Failed or degraded physical disks +- Degraded volumes +- Ports showing "FAILED" or "DEGRADED" +- Critical alerts in event log +- High latency in statistics +- Unusual capacity growth patterns + +### 🔴 Critical Issues: +- Multiple nodes down +- Multiple failed disks +- System unresponsive +- Data unavailability +- Replication failures (if configured) + +## Quick Health Check Script + +Save this as a script and run it: + +```bash +#!/bin/bash +echo "=== 3PAR Health Check ===" +echo "" +echo "1. System Health:" +showsys -health +echo "" +echo "2. Node Status:" +shownode -d +echo "" +echo "3. Failed Disks:" +showpd -failed +echo "" +echo "4. Degraded Volumes:" +showvv -degraded +echo "" +echo "5. Recent Alerts (last hour):" +showeventlog -min 60 -alert +echo "" +echo "6. Port Status:" +showport -d | grep -E "(Port|State|Status)" +echo "" +echo "=== Health Check Complete ===" +``` + +## Next Steps + +1. **Run the diagnostic commands** above and collect output +2. **Review the event log** for any recent errors or warnings +3. **Check performance statistics** if you're experiencing performance issues +4. **Verify backups/replication** if configured +5. **Document baseline metrics** for future comparison + +## Common Issues and Solutions + +### Issue: High Latency +- Check port statistics: `statport` +- Check disk statistics: `statpd` +- Review CPG layout and disk types + +### Issue: Capacity Warnings +- Check CPG growth: `showcpg -hist` +- Review volume space: `showvv -space` +- Consider expanding or cleaning up unused volumes + +### Issue: Node Degradation +- Check node status: `shownode -d` +- Review event log: `showeventlog` +- May require node replacement or service call + +### Issue: Disk Failures +- Check failed disks: `showpd -failed` +- Verify spare disks are available +- Plan for disk replacement + +## Support Information + +If issues are found: +- Collect all diagnostic output +- Note the exact error messages +- Check HPE support portal for known issues +- Contact HPE support with system serial: CZ3811SC9P + diff --git a/internal/kubernetes/apps/deployment.go b/internal/kubernetes/apps/deployment.go index c35a7d4..7ad4865 100644 --- a/internal/kubernetes/apps/deployment.go +++ b/internal/kubernetes/apps/deployment.go @@ -27,25 +27,21 @@ import ( ) // DeploymentClient provides operations on Deployment resources -type DeploymentClient interface { - Get(ctx context.Context, namespace, name string) (*appsv1.Deployment, error) -} - -type deploymentClient struct { +type DeploymentClient struct { client kubernetes.Interface } // NewDeploymentClient creates a new deployment client from a rest.Config -func NewDeploymentClient(config *rest.Config) (DeploymentClient, error) { +func NewDeploymentClient(config *rest.Config) (*DeploymentClient, error) { clientset, err := kubernetes.NewForConfig(config) if err != nil { return nil, fmt.Errorf("failed to create kubernetes clientset: %w", err) } - return &deploymentClient{client: clientset}, nil + return &DeploymentClient{client: clientset}, nil } // Get retrieves a deployment by namespace and name -func (c *deploymentClient) Get(ctx context.Context, namespace, name string) (*appsv1.Deployment, error) { +func (c *DeploymentClient) Get(ctx context.Context, namespace, name string) (*appsv1.Deployment, error) { deployment, err := c.client.AppsV1().Deployments(namespace).Get(ctx, name, metav1.GetOptions{}) if err != nil { return nil, fmt.Errorf("failed to get deployment %s/%s: %w", namespace, name, err) diff --git a/internal/kubernetes/core/namespace.go b/internal/kubernetes/core/namespace.go index 8041b7c..73c5cdf 100644 --- a/internal/kubernetes/core/namespace.go +++ b/internal/kubernetes/core/namespace.go @@ -27,26 +27,21 @@ import ( ) // NamespaceClient provides operations on Namespace resources -type NamespaceClient interface { - Get(ctx context.Context, name string) (*corev1.Namespace, error) - Create(ctx context.Context, name string) (*corev1.Namespace, error) -} - -type namespaceClient struct { +type NamespaceClient struct { client kubernetes.Interface } // NewNamespaceClient creates a new namespace client from a rest.Config -func NewNamespaceClient(config *rest.Config) (NamespaceClient, error) { +func NewNamespaceClient(config *rest.Config) (*NamespaceClient, error) { clientset, err := kubernetes.NewForConfig(config) if err != nil { return nil, fmt.Errorf("failed to create kubernetes clientset: %w", err) } - return &namespaceClient{client: clientset}, nil + return &NamespaceClient{client: clientset}, nil } // Get retrieves a namespace by name -func (c *namespaceClient) Get(ctx context.Context, name string) (*corev1.Namespace, error) { +func (c *NamespaceClient) Get(ctx context.Context, name string) (*corev1.Namespace, error) { ns, err := c.client.CoreV1().Namespaces().Get(ctx, name, metav1.GetOptions{}) if err != nil { return nil, fmt.Errorf("failed to get namespace %s: %w", name, err) @@ -55,7 +50,7 @@ func (c *namespaceClient) Get(ctx context.Context, name string) (*corev1.Namespa } // Create creates a new namespace -func (c *namespaceClient) Create(ctx context.Context, name string) (*corev1.Namespace, error) { +func (c *NamespaceClient) Create(ctx context.Context, name string) (*corev1.Namespace, error) { ns := &corev1.Namespace{ ObjectMeta: metav1.ObjectMeta{ Name: name, diff --git a/internal/kubernetes/core/pod.go b/internal/kubernetes/core/pod.go index a1f00de..2acb1e1 100644 --- a/internal/kubernetes/core/pod.go +++ b/internal/kubernetes/core/pod.go @@ -27,27 +27,21 @@ import ( ) // PodClient provides operations on Pod resources -type PodClient interface { - ListByLabelSelector(ctx context.Context, namespace, labelSelector string) (*corev1.PodList, error) - IsRunning(ctx context.Context, pod *corev1.Pod) bool - AllContainersReady(ctx context.Context, pod *corev1.Pod) bool -} - -type podClient struct { +type PodClient struct { client kubernetes.Interface } // NewPodClient creates a new pod client from a rest.Config -func NewPodClient(config *rest.Config) (PodClient, error) { +func NewPodClient(config *rest.Config) (*PodClient, error) { clientset, err := kubernetes.NewForConfig(config) if err != nil { return nil, fmt.Errorf("failed to create kubernetes clientset: %w", err) } - return &podClient{client: clientset}, nil + return &PodClient{client: clientset}, nil } // ListByLabelSelector lists pods in a namespace matching the label selector -func (c *podClient) ListByLabelSelector(ctx context.Context, namespace, labelSelector string) (*corev1.PodList, error) { +func (c *PodClient) ListByLabelSelector(ctx context.Context, namespace, labelSelector string) (*corev1.PodList, error) { pods, err := c.client.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{ LabelSelector: labelSelector, }) @@ -58,12 +52,12 @@ func (c *podClient) ListByLabelSelector(ctx context.Context, namespace, labelSel } // IsRunning checks if a pod is in Running phase -func (c *podClient) IsRunning(ctx context.Context, pod *corev1.Pod) bool { +func (c *PodClient) IsRunning(ctx context.Context, pod *corev1.Pod) bool { return pod.Status.Phase == corev1.PodRunning } // AllContainersReady checks if all containers in a pod are ready -func (c *podClient) AllContainersReady(ctx context.Context, pod *corev1.Pod) bool { +func (c *PodClient) AllContainersReady(ctx context.Context, pod *corev1.Pod) bool { if len(pod.Spec.Containers) == 0 { return false } diff --git a/internal/kubernetes/storage/pvc.go b/internal/kubernetes/storage/pvc.go index 6412ff9..cb60c21 100644 --- a/internal/kubernetes/storage/pvc.go +++ b/internal/kubernetes/storage/pvc.go @@ -16,5 +16,220 @@ limitations under the License. package storage -// TODO: Implement PVC operations +import ( + "context" + "encoding/json" + "fmt" + "time" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" +) + +// PVCClient provides operations on PersistentVolumeClaim resources +type PVCClient struct { + client kubernetes.Interface +} + +// NewPVCClient creates a new PVC client from a rest.Config +func NewPVCClient(config *rest.Config) (*PVCClient, error) { + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to create kubernetes clientset: %w", err) + } + return &PVCClient{client: clientset}, nil +} + +// Create creates a new PVC +func (c *PVCClient) Create(ctx context.Context, namespace string, pvc *corev1.PersistentVolumeClaim) (*corev1.PersistentVolumeClaim, error) { + created, err := c.client.CoreV1().PersistentVolumeClaims(namespace).Create(ctx, pvc, metav1.CreateOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to create PVC %s/%s: %w", namespace, pvc.Name, err) + } + return created, nil +} + +// Get retrieves a PVC by namespace and name +func (c *PVCClient) Get(ctx context.Context, namespace, name string) (*corev1.PersistentVolumeClaim, error) { + pvc, err := c.client.CoreV1().PersistentVolumeClaims(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to get PVC %s/%s: %w", namespace, name, err) + } + return pvc, nil +} + +// ListByLabelSelector lists PVCs in a namespace matching the label selector +func (c *PVCClient) ListByLabelSelector(ctx context.Context, namespace, labelSelector string) (*corev1.PersistentVolumeClaimList, error) { + pvcs, err := c.client.CoreV1().PersistentVolumeClaims(namespace).List(ctx, metav1.ListOptions{ + LabelSelector: labelSelector, + }) + if err != nil { + return nil, fmt.Errorf("failed to list PVCs in namespace %s with selector %s: %w", namespace, labelSelector, err) + } + return pvcs, nil +} + +// Resize resizes a PVC to a new size +func (c *PVCClient) Resize(ctx context.Context, namespace, name, newSize string) error { + patch := []map[string]interface{}{ + { + "op": "replace", + "path": "/spec/resources/requests/storage", + "value": newSize, + }, + } + patchBytes, err := json.Marshal(patch) + if err != nil { + return fmt.Errorf("failed to marshal patch: %w", err) + } + + _, err = c.client.CoreV1().PersistentVolumeClaims(namespace).Patch( + ctx, + name, + types.JSONPatchType, + patchBytes, + metav1.PatchOptions{}, + ) + if err != nil { + return fmt.Errorf("failed to resize PVC %s/%s: %w", namespace, name, err) + } + return nil +} + +// ResizeList resizes multiple PVCs to a new size +func (c *PVCClient) ResizeList(ctx context.Context, namespace string, pvcNames []string, newSize string) error { + for _, name := range pvcNames { + if err := c.Resize(ctx, namespace, name, newSize); err != nil { + return fmt.Errorf("failed to resize PVC %s: %w", name, err) + } + } + return nil +} + +// WaitForBound waits for PVCs matching the label selector to be in Bound state +func (c *PVCClient) WaitForBound(ctx context.Context, namespace, labelSelector string, expectedCount int, maxAttempts int, interval time.Duration) error { + attempt := 0 + for { + pvcs, err := c.ListByLabelSelector(ctx, namespace, labelSelector) + if err != nil { + return err + } + + boundCount := 0 + for _, pvc := range pvcs.Items { + if pvc.Status.Phase == corev1.ClaimBound { + boundCount++ + } + } + + if boundCount >= expectedCount { + return nil + } + + if boundCount > 0 { + attempt++ + } + + if maxAttempts > 0 && attempt >= maxAttempts { + return fmt.Errorf("timeout waiting for PVCs to be bound: %d/%d bound after %d attempts", boundCount, expectedCount, maxAttempts) + } + + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(interval): + } + } +} + +// WaitForResize waits for PVCs to be resized to the target size +func (c *PVCClient) WaitForResize(ctx context.Context, namespace string, pvcNames []string, targetSize string, maxAttempts int, interval time.Duration) error { + attempt := 0 + targetQuantity, err := resource.ParseQuantity(targetSize) + if err != nil { + return fmt.Errorf("invalid target size %s: %w", targetSize, err) + } + + for { + resizedCount := 0 + for _, name := range pvcNames { + pvc, err := c.Get(ctx, namespace, name) + if err != nil { + return err + } + + if pvc.Status.Capacity != nil { + if currentSize, ok := pvc.Status.Capacity[corev1.ResourceStorage]; ok { + if currentSize.Equal(targetQuantity) { + resizedCount++ + } + } + } + } + + if resizedCount == len(pvcNames) { + return nil + } + + if resizedCount > 0 { + attempt++ + } + + if maxAttempts > 0 && attempt >= maxAttempts { + return fmt.Errorf("timeout waiting for PVCs to be resized: %d/%d resized after %d attempts", resizedCount, len(pvcNames), maxAttempts) + } + + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(interval): + } + } +} + +// Delete deletes a PVC +func (c *PVCClient) Delete(ctx context.Context, namespace, name string) error { + err := c.client.CoreV1().PersistentVolumeClaims(namespace).Delete(ctx, name, metav1.DeleteOptions{}) + if err != nil { + return fmt.Errorf("failed to delete PVC %s/%s: %w", namespace, name, err) + } + return nil +} + +// DeleteByLabelSelector deletes all PVCs matching the label selector +func (c *PVCClient) DeleteByLabelSelector(ctx context.Context, namespace, labelSelector string) error { + return c.client.CoreV1().PersistentVolumeClaims(namespace).DeleteCollection(ctx, metav1.DeleteOptions{}, metav1.ListOptions{ + LabelSelector: labelSelector, + }) +} + +// WaitForDeletion waits for PVCs matching the label selector to be deleted +func (c *PVCClient) WaitForDeletion(ctx context.Context, namespace, labelSelector string, maxAttempts int, interval time.Duration) error { + attempt := 0 + for { + pvcs, err := c.ListByLabelSelector(ctx, namespace, labelSelector) + if err != nil { + // If listing fails, assume PVCs are deleted + return nil + } + + if len(pvcs.Items) == 0 { + return nil + } + + attempt++ + if maxAttempts > 0 && attempt >= maxAttempts { + return fmt.Errorf("timeout waiting for PVCs to be deleted: %d remaining after %d attempts", len(pvcs.Items), maxAttempts) + } + + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(interval): + } + } +} diff --git a/internal/kubernetes/storage/volumesnapshot.go b/internal/kubernetes/storage/volumesnapshot.go new file mode 100644 index 0000000..99fd8d3 --- /dev/null +++ b/internal/kubernetes/storage/volumesnapshot.go @@ -0,0 +1,181 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package storage + +import ( + "context" + "fmt" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/dynamic" + "k8s.io/client-go/rest" +) + +var ( + // VolumeSnapshotGVR is the GroupVersionResource for VolumeSnapshot + VolumeSnapshotGVR = schema.GroupVersionResource{ + Group: "snapshot.storage.k8s.io", + Version: "v1", + Resource: "volumesnapshots", + } +) + +// VolumeSnapshotClient provides operations on VolumeSnapshot resources +type VolumeSnapshotClient struct { + client dynamic.Interface +} + +// NewVolumeSnapshotClient creates a new VolumeSnapshot client from a rest.Config +func NewVolumeSnapshotClient(config *rest.Config) (*VolumeSnapshotClient, error) { + client, err := dynamic.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to create dynamic client: %w", err) + } + return &VolumeSnapshotClient{client: client}, nil +} + +// Create creates a new VolumeSnapshot +func (c *VolumeSnapshotClient) Create(ctx context.Context, namespace string, snapshot *unstructured.Unstructured) (*unstructured.Unstructured, error) { + created, err := c.client.Resource(VolumeSnapshotGVR).Namespace(namespace).Create(ctx, snapshot, metav1.CreateOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to create VolumeSnapshot %s/%s: %w", namespace, snapshot.GetName(), err) + } + return created, nil +} + +// Get retrieves a VolumeSnapshot by namespace and name +func (c *VolumeSnapshotClient) Get(ctx context.Context, namespace, name string) (*unstructured.Unstructured, error) { + snapshot, err := c.client.Resource(VolumeSnapshotGVR).Namespace(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to get VolumeSnapshot %s/%s: %w", namespace, name, err) + } + return snapshot, nil +} + +// ListByLabelSelector lists VolumeSnapshots in a namespace matching the label selector +func (c *VolumeSnapshotClient) ListByLabelSelector(ctx context.Context, namespace, labelSelector string) (*unstructured.UnstructuredList, error) { + snapshots, err := c.client.Resource(VolumeSnapshotGVR).Namespace(namespace).List(ctx, metav1.ListOptions{ + LabelSelector: labelSelector, + }) + if err != nil { + return nil, fmt.Errorf("failed to list VolumeSnapshots in namespace %s with selector %s: %w", namespace, labelSelector, err) + } + return snapshots, nil +} + +// IsReady checks if a VolumeSnapshot is ready to use +func (c *VolumeSnapshotClient) IsReady(snapshot *unstructured.Unstructured) bool { + status, found, err := unstructured.NestedMap(snapshot.Object, "status") + if !found || err != nil { + return false + } + readyToUse, found, err := unstructured.NestedBool(status, "readyToUse") + if !found || err != nil { + return false + } + return readyToUse +} + +// WaitForReady waits for VolumeSnapshots matching the label selector to be ready +func (c *VolumeSnapshotClient) WaitForReady(ctx context.Context, namespace, labelSelector string, expectedCount int, maxAttempts int, interval time.Duration) error { + attempt := 0 + for { + snapshots, err := c.ListByLabelSelector(ctx, namespace, labelSelector) + if err != nil { + return err + } + + readyCount := 0 + for _, snapshot := range snapshots.Items { + if c.IsReady(&snapshot) { + readyCount++ + } + } + + if readyCount >= expectedCount { + return nil + } + + if readyCount > 0 { + attempt++ + } + + if maxAttempts > 0 && attempt >= maxAttempts { + return fmt.Errorf("timeout waiting for VolumeSnapshots to be ready: %d/%d ready after %d attempts", readyCount, expectedCount, maxAttempts) + } + + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(interval): + } + } +} + +// Delete deletes a VolumeSnapshot +func (c *VolumeSnapshotClient) Delete(ctx context.Context, namespace, name string) error { + err := c.client.Resource(VolumeSnapshotGVR).Namespace(namespace).Delete(ctx, name, metav1.DeleteOptions{}) + if err != nil { + return fmt.Errorf("failed to delete VolumeSnapshot %s/%s: %w", namespace, name, err) + } + return nil +} + +// DeleteByLabelSelector deletes all VolumeSnapshots matching the label selector +func (c *VolumeSnapshotClient) DeleteByLabelSelector(ctx context.Context, namespace, labelSelector string) error { + snapshots, err := c.ListByLabelSelector(ctx, namespace, labelSelector) + if err != nil { + return err + } + + for _, snapshot := range snapshots.Items { + if err := c.Delete(ctx, namespace, snapshot.GetName()); err != nil { + return fmt.Errorf("failed to delete VolumeSnapshot %s: %w", snapshot.GetName(), err) + } + } + return nil +} + +// WaitForDeletion waits for VolumeSnapshots matching the label selector to be deleted +func (c *VolumeSnapshotClient) WaitForDeletion(ctx context.Context, namespace, labelSelector string, maxAttempts int, interval time.Duration) error { + attempt := 0 + for { + snapshots, err := c.ListByLabelSelector(ctx, namespace, labelSelector) + if err != nil { + // If listing fails, assume snapshots are deleted + return nil + } + + if len(snapshots.Items) == 0 { + return nil + } + + attempt++ + if maxAttempts > 0 && attempt >= maxAttempts { + return fmt.Errorf("timeout waiting for VolumeSnapshots to be deleted: %d remaining after %d attempts", len(snapshots.Items), maxAttempts) + } + + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(interval): + } + } +} diff --git a/internal/kubernetes/virtualization/client.go b/internal/kubernetes/virtualization/client.go index ff006f5..ca1b137 100644 --- a/internal/kubernetes/virtualization/client.go +++ b/internal/kubernetes/virtualization/client.go @@ -49,26 +49,26 @@ func NewClient(ctx context.Context, config *rest.Config) (*Client, error) { } // VirtualMachines returns a VirtualMachine client -func (c *Client) VirtualMachines() VirtualMachineClient { - return &virtualMachineClient{client: c.client} +func (c *Client) VirtualMachines() *VirtualMachineClient { + return &VirtualMachineClient{client: c.client} } // VirtualDisks returns a VirtualDisk client -func (c *Client) VirtualDisks() VirtualDiskClient { - return &virtualDiskClient{client: c.client} +func (c *Client) VirtualDisks() *VirtualDiskClient { + return &VirtualDiskClient{client: c.client} } // ClusterVirtualImages returns a ClusterVirtualImage client -func (c *Client) ClusterVirtualImages() ClusterVirtualImageClient { - return &clusterVirtualImageClient{client: c.client} +func (c *Client) ClusterVirtualImages() *ClusterVirtualImageClient { + return &ClusterVirtualImageClient{client: c.client} } // VirtualImages returns a VirtualImage client -func (c *Client) VirtualImages() VirtualImageClient { - return &virtualImageClient{client: c.client} +func (c *Client) VirtualImages() *VirtualImageClient { + return &VirtualImageClient{client: c.client} } // VirtualMachineBlockDeviceAttachments returns a VMBD client -func (c *Client) VirtualMachineBlockDeviceAttachments() VMBDClient { - return &vmbdClient{client: c.client} +func (c *Client) VirtualMachineBlockDeviceAttachments() *VMBDClient { + return &VMBDClient{client: c.client} } diff --git a/internal/kubernetes/virtualization/cluster_virtual_image.go b/internal/kubernetes/virtualization/cluster_virtual_image.go index 74d4542..5655e9c 100644 --- a/internal/kubernetes/virtualization/cluster_virtual_image.go +++ b/internal/kubernetes/virtualization/cluster_virtual_image.go @@ -27,19 +27,12 @@ import ( // ClusterVirtualImageClient provides operations on ClusterVirtualImage resources // Note: ClusterVirtualImage is a cluster-scoped resource (no namespace) -type ClusterVirtualImageClient interface { - Get(ctx context.Context, name string) (*v1alpha2.ClusterVirtualImage, error) - List(ctx context.Context) ([]v1alpha2.ClusterVirtualImage, error) - Create(ctx context.Context, cvmi *v1alpha2.ClusterVirtualImage) error - Update(ctx context.Context, cvmi *v1alpha2.ClusterVirtualImage) error - Delete(ctx context.Context, name string) error -} - -type clusterVirtualImageClient struct { +type ClusterVirtualImageClient struct { client client.Client } -func (c *clusterVirtualImageClient) Get(ctx context.Context, name string) (*v1alpha2.ClusterVirtualImage, error) { +// Get retrieves a ClusterVirtualImage by name +func (c *ClusterVirtualImageClient) Get(ctx context.Context, name string) (*v1alpha2.ClusterVirtualImage, error) { cvmi := &v1alpha2.ClusterVirtualImage{} key := client.ObjectKey{Name: name} if err := c.client.Get(ctx, key, cvmi); err != nil { @@ -48,7 +41,8 @@ func (c *clusterVirtualImageClient) Get(ctx context.Context, name string) (*v1al return cvmi, nil } -func (c *clusterVirtualImageClient) List(ctx context.Context) ([]v1alpha2.ClusterVirtualImage, error) { +// List lists all ClusterVirtualImages +func (c *ClusterVirtualImageClient) List(ctx context.Context) ([]v1alpha2.ClusterVirtualImage, error) { list := &v1alpha2.ClusterVirtualImageList{} if err := c.client.List(ctx, list); err != nil { return nil, fmt.Errorf("failed to list ClusterVirtualImages: %w", err) @@ -56,21 +50,24 @@ func (c *clusterVirtualImageClient) List(ctx context.Context) ([]v1alpha2.Cluste return list.Items, nil } -func (c *clusterVirtualImageClient) Create(ctx context.Context, cvmi *v1alpha2.ClusterVirtualImage) error { +// Create creates a new ClusterVirtualImage +func (c *ClusterVirtualImageClient) Create(ctx context.Context, cvmi *v1alpha2.ClusterVirtualImage) error { if err := c.client.Create(ctx, cvmi); err != nil { return fmt.Errorf("failed to create ClusterVirtualImage %s: %w", cvmi.Name, err) } return nil } -func (c *clusterVirtualImageClient) Update(ctx context.Context, cvmi *v1alpha2.ClusterVirtualImage) error { +// Update updates an existing ClusterVirtualImage +func (c *ClusterVirtualImageClient) Update(ctx context.Context, cvmi *v1alpha2.ClusterVirtualImage) error { if err := c.client.Update(ctx, cvmi); err != nil { return fmt.Errorf("failed to update ClusterVirtualImage %s: %w", cvmi.Name, err) } return nil } -func (c *clusterVirtualImageClient) Delete(ctx context.Context, name string) error { +// Delete deletes a ClusterVirtualImage by name +func (c *ClusterVirtualImageClient) Delete(ctx context.Context, name string) error { cvmi := &v1alpha2.ClusterVirtualImage{ ObjectMeta: metav1.ObjectMeta{ Name: name, diff --git a/internal/kubernetes/virtualization/virtual_disk.go b/internal/kubernetes/virtualization/virtual_disk.go index 96f307a..9589d5a 100644 --- a/internal/kubernetes/virtualization/virtual_disk.go +++ b/internal/kubernetes/virtualization/virtual_disk.go @@ -26,19 +26,12 @@ import ( ) // VirtualDiskClient provides operations on VirtualDisk resources -type VirtualDiskClient interface { - Get(ctx context.Context, namespace, name string) (*v1alpha2.VirtualDisk, error) - List(ctx context.Context, namespace string) ([]v1alpha2.VirtualDisk, error) - Create(ctx context.Context, vd *v1alpha2.VirtualDisk) error - Update(ctx context.Context, vd *v1alpha2.VirtualDisk) error - Delete(ctx context.Context, namespace, name string) error -} - -type virtualDiskClient struct { +type VirtualDiskClient struct { client client.Client } -func (c *virtualDiskClient) Get(ctx context.Context, namespace, name string) (*v1alpha2.VirtualDisk, error) { +// Get retrieves a VirtualDisk by namespace and name +func (c *VirtualDiskClient) Get(ctx context.Context, namespace, name string) (*v1alpha2.VirtualDisk, error) { vd := &v1alpha2.VirtualDisk{} key := client.ObjectKey{Namespace: namespace, Name: name} if err := c.client.Get(ctx, key, vd); err != nil { @@ -47,7 +40,8 @@ func (c *virtualDiskClient) Get(ctx context.Context, namespace, name string) (*v return vd, nil } -func (c *virtualDiskClient) List(ctx context.Context, namespace string) ([]v1alpha2.VirtualDisk, error) { +// List lists VirtualDisks in a namespace +func (c *VirtualDiskClient) List(ctx context.Context, namespace string) ([]v1alpha2.VirtualDisk, error) { list := &v1alpha2.VirtualDiskList{} opts := []client.ListOption{} if namespace != "" { @@ -59,21 +53,24 @@ func (c *virtualDiskClient) List(ctx context.Context, namespace string) ([]v1alp return list.Items, nil } -func (c *virtualDiskClient) Create(ctx context.Context, vd *v1alpha2.VirtualDisk) error { +// Create creates a new VirtualDisk +func (c *VirtualDiskClient) Create(ctx context.Context, vd *v1alpha2.VirtualDisk) error { if err := c.client.Create(ctx, vd); err != nil { return fmt.Errorf("failed to create VirtualDisk %s/%s: %w", vd.Namespace, vd.Name, err) } return nil } -func (c *virtualDiskClient) Update(ctx context.Context, vd *v1alpha2.VirtualDisk) error { +// Update updates an existing VirtualDisk +func (c *VirtualDiskClient) Update(ctx context.Context, vd *v1alpha2.VirtualDisk) error { if err := c.client.Update(ctx, vd); err != nil { return fmt.Errorf("failed to update VirtualDisk %s/%s: %w", vd.Namespace, vd.Name, err) } return nil } -func (c *virtualDiskClient) Delete(ctx context.Context, namespace, name string) error { +// Delete deletes a VirtualDisk by namespace and name +func (c *VirtualDiskClient) Delete(ctx context.Context, namespace, name string) error { vd := &v1alpha2.VirtualDisk{ ObjectMeta: metav1.ObjectMeta{ Namespace: namespace, diff --git a/internal/kubernetes/virtualization/virtual_image.go b/internal/kubernetes/virtualization/virtual_image.go index cdfab93..a0a60a2 100644 --- a/internal/kubernetes/virtualization/virtual_image.go +++ b/internal/kubernetes/virtualization/virtual_image.go @@ -27,19 +27,12 @@ import ( // VirtualImageClient provides operations on VirtualImage resources // Note: VirtualImage is a namespace-scoped resource -type VirtualImageClient interface { - Get(ctx context.Context, namespace, name string) (*v1alpha2.VirtualImage, error) - List(ctx context.Context, namespace string) ([]v1alpha2.VirtualImage, error) - Create(ctx context.Context, vi *v1alpha2.VirtualImage) error - Update(ctx context.Context, vi *v1alpha2.VirtualImage) error - Delete(ctx context.Context, namespace, name string) error -} - -type virtualImageClient struct { +type VirtualImageClient struct { client client.Client } -func (c *virtualImageClient) Get(ctx context.Context, namespace, name string) (*v1alpha2.VirtualImage, error) { +// Get retrieves a VirtualImage by namespace and name +func (c *VirtualImageClient) Get(ctx context.Context, namespace, name string) (*v1alpha2.VirtualImage, error) { vi := &v1alpha2.VirtualImage{} key := client.ObjectKey{Namespace: namespace, Name: name} if err := c.client.Get(ctx, key, vi); err != nil { @@ -48,7 +41,8 @@ func (c *virtualImageClient) Get(ctx context.Context, namespace, name string) (* return vi, nil } -func (c *virtualImageClient) List(ctx context.Context, namespace string) ([]v1alpha2.VirtualImage, error) { +// List lists VirtualImages in a namespace +func (c *VirtualImageClient) List(ctx context.Context, namespace string) ([]v1alpha2.VirtualImage, error) { list := &v1alpha2.VirtualImageList{} if err := c.client.List(ctx, list, client.InNamespace(namespace)); err != nil { return nil, fmt.Errorf("failed to list VirtualImages in namespace %s: %w", namespace, err) @@ -56,21 +50,24 @@ func (c *virtualImageClient) List(ctx context.Context, namespace string) ([]v1al return list.Items, nil } -func (c *virtualImageClient) Create(ctx context.Context, vi *v1alpha2.VirtualImage) error { +// Create creates a new VirtualImage +func (c *VirtualImageClient) Create(ctx context.Context, vi *v1alpha2.VirtualImage) error { if err := c.client.Create(ctx, vi); err != nil { return fmt.Errorf("failed to create VirtualImage %s/%s: %w", vi.Namespace, vi.Name, err) } return nil } -func (c *virtualImageClient) Update(ctx context.Context, vi *v1alpha2.VirtualImage) error { +// Update updates an existing VirtualImage +func (c *VirtualImageClient) Update(ctx context.Context, vi *v1alpha2.VirtualImage) error { if err := c.client.Update(ctx, vi); err != nil { return fmt.Errorf("failed to update VirtualImage %s/%s: %w", vi.Namespace, vi.Name, err) } return nil } -func (c *virtualImageClient) Delete(ctx context.Context, namespace, name string) error { +// Delete deletes a VirtualImage by namespace and name +func (c *VirtualImageClient) Delete(ctx context.Context, namespace, name string) error { vi := &v1alpha2.VirtualImage{ ObjectMeta: metav1.ObjectMeta{ Name: name, @@ -82,4 +79,3 @@ func (c *virtualImageClient) Delete(ctx context.Context, namespace, name string) } return nil } - diff --git a/internal/kubernetes/virtualization/virtual_machine.go b/internal/kubernetes/virtualization/virtual_machine.go index 5bdc97e..49cb552 100644 --- a/internal/kubernetes/virtualization/virtual_machine.go +++ b/internal/kubernetes/virtualization/virtual_machine.go @@ -26,19 +26,12 @@ import ( ) // VirtualMachineClient provides operations on VirtualMachine resources -type VirtualMachineClient interface { - Get(ctx context.Context, namespace, name string) (*v1alpha2.VirtualMachine, error) - List(ctx context.Context, namespace string) ([]v1alpha2.VirtualMachine, error) - Create(ctx context.Context, vm *v1alpha2.VirtualMachine) error - Update(ctx context.Context, vm *v1alpha2.VirtualMachine) error - Delete(ctx context.Context, namespace, name string) error -} - -type virtualMachineClient struct { +type VirtualMachineClient struct { client client.Client } -func (c *virtualMachineClient) Get(ctx context.Context, namespace, name string) (*v1alpha2.VirtualMachine, error) { +// Get retrieves a VirtualMachine by namespace and name +func (c *VirtualMachineClient) Get(ctx context.Context, namespace, name string) (*v1alpha2.VirtualMachine, error) { vm := &v1alpha2.VirtualMachine{} key := client.ObjectKey{Namespace: namespace, Name: name} if err := c.client.Get(ctx, key, vm); err != nil { @@ -47,7 +40,8 @@ func (c *virtualMachineClient) Get(ctx context.Context, namespace, name string) return vm, nil } -func (c *virtualMachineClient) List(ctx context.Context, namespace string) ([]v1alpha2.VirtualMachine, error) { +// List lists VirtualMachines in a namespace +func (c *VirtualMachineClient) List(ctx context.Context, namespace string) ([]v1alpha2.VirtualMachine, error) { list := &v1alpha2.VirtualMachineList{} opts := []client.ListOption{} if namespace != "" { @@ -59,21 +53,24 @@ func (c *virtualMachineClient) List(ctx context.Context, namespace string) ([]v1 return list.Items, nil } -func (c *virtualMachineClient) Create(ctx context.Context, vm *v1alpha2.VirtualMachine) error { +// Create creates a new VirtualMachine +func (c *VirtualMachineClient) Create(ctx context.Context, vm *v1alpha2.VirtualMachine) error { if err := c.client.Create(ctx, vm); err != nil { return fmt.Errorf("failed to create VirtualMachine %s/%s: %w", vm.Namespace, vm.Name, err) } return nil } -func (c *virtualMachineClient) Update(ctx context.Context, vm *v1alpha2.VirtualMachine) error { +// Update updates an existing VirtualMachine +func (c *VirtualMachineClient) Update(ctx context.Context, vm *v1alpha2.VirtualMachine) error { if err := c.client.Update(ctx, vm); err != nil { return fmt.Errorf("failed to update VirtualMachine %s/%s: %w", vm.Namespace, vm.Name, err) } return nil } -func (c *virtualMachineClient) Delete(ctx context.Context, namespace, name string) error { +// Delete deletes a VirtualMachine by namespace and name +func (c *VirtualMachineClient) Delete(ctx context.Context, namespace, name string) error { vm := &v1alpha2.VirtualMachine{ ObjectMeta: metav1.ObjectMeta{ Namespace: namespace, diff --git a/internal/kubernetes/virtualization/vm_block_device.go b/internal/kubernetes/virtualization/vm_block_device.go index 6aea821..c21ba2b 100644 --- a/internal/kubernetes/virtualization/vm_block_device.go +++ b/internal/kubernetes/virtualization/vm_block_device.go @@ -26,19 +26,12 @@ import ( ) // VMBDClient provides operations on VirtualMachineBlockDeviceAttachment resources -type VMBDClient interface { - Get(ctx context.Context, namespace, name string) (*v1alpha2.VirtualMachineBlockDeviceAttachment, error) - List(ctx context.Context, namespace string) ([]v1alpha2.VirtualMachineBlockDeviceAttachment, error) - Create(ctx context.Context, vmbd *v1alpha2.VirtualMachineBlockDeviceAttachment) error - Update(ctx context.Context, vmbd *v1alpha2.VirtualMachineBlockDeviceAttachment) error - Delete(ctx context.Context, namespace, name string) error -} - -type vmbdClient struct { +type VMBDClient struct { client client.Client } -func (c *vmbdClient) Get(ctx context.Context, namespace, name string) (*v1alpha2.VirtualMachineBlockDeviceAttachment, error) { +// Get retrieves a VirtualMachineBlockDeviceAttachment by namespace and name +func (c *VMBDClient) Get(ctx context.Context, namespace, name string) (*v1alpha2.VirtualMachineBlockDeviceAttachment, error) { vmbd := &v1alpha2.VirtualMachineBlockDeviceAttachment{} key := client.ObjectKey{Namespace: namespace, Name: name} if err := c.client.Get(ctx, key, vmbd); err != nil { @@ -47,7 +40,8 @@ func (c *vmbdClient) Get(ctx context.Context, namespace, name string) (*v1alpha2 return vmbd, nil } -func (c *vmbdClient) List(ctx context.Context, namespace string) ([]v1alpha2.VirtualMachineBlockDeviceAttachment, error) { +// List lists VirtualMachineBlockDeviceAttachments in a namespace +func (c *VMBDClient) List(ctx context.Context, namespace string) ([]v1alpha2.VirtualMachineBlockDeviceAttachment, error) { list := &v1alpha2.VirtualMachineBlockDeviceAttachmentList{} opts := []client.ListOption{} if namespace != "" { @@ -59,21 +53,24 @@ func (c *vmbdClient) List(ctx context.Context, namespace string) ([]v1alpha2.Vir return list.Items, nil } -func (c *vmbdClient) Create(ctx context.Context, vmbd *v1alpha2.VirtualMachineBlockDeviceAttachment) error { +// Create creates a new VirtualMachineBlockDeviceAttachment +func (c *VMBDClient) Create(ctx context.Context, vmbd *v1alpha2.VirtualMachineBlockDeviceAttachment) error { if err := c.client.Create(ctx, vmbd); err != nil { return fmt.Errorf("failed to create VirtualMachineBlockDeviceAttachment %s/%s: %w", vmbd.Namespace, vmbd.Name, err) } return nil } -func (c *vmbdClient) Update(ctx context.Context, vmbd *v1alpha2.VirtualMachineBlockDeviceAttachment) error { +// Update updates an existing VirtualMachineBlockDeviceAttachment +func (c *VMBDClient) Update(ctx context.Context, vmbd *v1alpha2.VirtualMachineBlockDeviceAttachment) error { if err := c.client.Update(ctx, vmbd); err != nil { return fmt.Errorf("failed to update VirtualMachineBlockDeviceAttachment %s/%s: %w", vmbd.Namespace, vmbd.Name, err) } return nil } -func (c *vmbdClient) Delete(ctx context.Context, namespace, name string) error { +// Delete deletes a VirtualMachineBlockDeviceAttachment by namespace and name +func (c *VMBDClient) Delete(ctx context.Context, namespace, name string) error { vmbd := &v1alpha2.VirtualMachineBlockDeviceAttachment{ ObjectMeta: metav1.ObjectMeta{ Namespace: namespace, diff --git a/pkg/testkit/stress-tests.go b/pkg/testkit/stress-tests.go new file mode 100644 index 0000000..5bc8db3 --- /dev/null +++ b/pkg/testkit/stress-tests.go @@ -0,0 +1,1233 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package testkit + +import ( + "context" + "fmt" + "time" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + + "github.com/deckhouse/storage-e2e/internal/kubernetes/apps" + "github.com/deckhouse/storage-e2e/internal/kubernetes/core" + "github.com/deckhouse/storage-e2e/internal/kubernetes/storage" +) + +// TestMode represents the mode of stress test +type TestMode string + +const ( + ModeFlog TestMode = "flog" + ModeCheckFSOnly TestMode = "check_fs_only" + ModeCheckCloning TestMode = "check_cloning" + ModeCheckRestoringFromSnapshot TestMode = "check_restoring_from_snapshot" + ModeSnapshotResizeCloning TestMode = "snapshot_resize_cloning" +) + +// ResourceType represents the type of Kubernetes resource to create +type ResourceType string + +const ( + ResourceTypePod ResourceType = "pod" + ResourceTypeDeployment ResourceType = "deployment" + ResourceTypeStatefulSet ResourceType = "statefulset" +) + +// TestStep represents a step in snapshot_resize_cloning mode +type TestStep string + +const ( + StepRestoreFromSnapshot TestStep = "restore_from_snapshot" + StepResize TestStep = "resize" + StepClone TestStep = "clone" +) + +// Config holds the configuration for stress tests +type Config struct { + // Basic configuration + Namespace string + StorageClassName string + PVCSize string + PodsCount int + ParallelismCount int + SchedulerName string + ResourceType ResourceType + Mode TestMode + + // Resize configuration + PVCSizeAfterResize string + PVCSizeAfterResizeStage2 string + + // Snapshot configuration + SnapshotsPerPVC int + SnapshotName string // For check_restoring_from_snapshot mode + PVCForCloning string // For check_cloning mode + + // Test order for snapshot_resize_cloning mode + TestOrder []TestStep + + // Timeouts and retries + MaxAttempts int + Interval time.Duration + + // Cleanup + Cleanup bool + DeleteNamespace bool +} + +// DefaultConfig returns a config with sensible defaults +func DefaultConfig() *Config { + return &Config{ + SchedulerName: "default-scheduler", + ResourceType: ResourceTypePod, + Mode: ModeFlog, + SnapshotsPerPVC: 1, + MaxAttempts: 0, // 0 means infinite + Interval: 5 * time.Second, + Cleanup: false, + DeleteNamespace: false, + TestOrder: []TestStep{StepRestoreFromSnapshot, StepResize, StepClone}, + } +} + +// StressTestRunner runs stress tests +type StressTestRunner struct { + config *Config + kubeClient kubernetes.Interface + pvcClient *storage.PVCClient + snapshotClient *storage.VolumeSnapshotClient + podClient *core.PodClient + deployClient *apps.DeploymentClient + restConfig *rest.Config +} + +// NewStressTestRunner creates a new stress test runner +func NewStressTestRunner(config *Config, restConfig *rest.Config) (*StressTestRunner, error) { + kubeClient, err := kubernetes.NewForConfig(restConfig) + if err != nil { + return nil, fmt.Errorf("failed to create kubernetes client: %w", err) + } + + pvcClient, err := storage.NewPVCClient(restConfig) + if err != nil { + return nil, fmt.Errorf("failed to create PVC client: %w", err) + } + + snapshotClient, err := storage.NewVolumeSnapshotClient(restConfig) + if err != nil { + return nil, fmt.Errorf("failed to create VolumeSnapshot client: %w", err) + } + + podClient, err := core.NewPodClient(restConfig) + if err != nil { + return nil, fmt.Errorf("failed to create pod client: %w", err) + } + + deployClient, err := apps.NewDeploymentClient(restConfig) + if err != nil { + return nil, fmt.Errorf("failed to create deployment client: %w", err) + } + + return &StressTestRunner{ + config: config, + kubeClient: kubeClient, + pvcClient: pvcClient, + snapshotClient: snapshotClient, + podClient: podClient, + deployClient: deployClient, + restConfig: restConfig, + }, nil +} + +// Validate validates the configuration +func (c *Config) Validate() error { + if c.Namespace == "" { + return fmt.Errorf("namespace is required") + } + if c.StorageClassName == "" { + return fmt.Errorf("storage class name is required") + } + if c.PVCSize == "" { + return fmt.Errorf("PVC size is required") + } + if c.PodsCount <= 0 { + return fmt.Errorf("pods count must be > 0") + } + if c.ParallelismCount <= 0 { + return fmt.Errorf("parallelism count must be > 0") + } + if c.ParallelismCount > c.PodsCount { + return fmt.Errorf("parallelism count (%d) > pods count (%d)", c.ParallelismCount, c.PodsCount) + } + + switch c.Mode { + case ModeCheckCloning: + if c.PVCForCloning == "" { + return fmt.Errorf("PVC for cloning is required for check_cloning mode") + } + if c.ResourceType != ResourceTypePod { + return fmt.Errorf("check_cloning mode only supports pod resource type") + } + case ModeCheckRestoringFromSnapshot: + if c.SnapshotName == "" { + return fmt.Errorf("snapshot name is required for check_restoring_from_snapshot mode") + } + if c.ResourceType != ResourceTypePod { + return fmt.Errorf("check_restoring_from_snapshot mode only supports pod resource type") + } + case ModeSnapshotResizeCloning: + if c.ResourceType != ResourceTypePod { + return fmt.Errorf("snapshot_resize_cloning mode only supports pod resource type") + } + if c.SnapshotsPerPVC <= 0 { + return fmt.Errorf("snapshots per PVC must be > 0") + } + // Validate test order + for _, step := range c.TestOrder { + switch step { + case StepRestoreFromSnapshot, StepResize, StepClone: + // Valid steps + default: + return fmt.Errorf("invalid test step: %s (allowed: restore_from_snapshot, resize, clone)", step) + } + } + // Check required parameters for steps + hasResize := false + hasCloneOrRestore := false + for _, step := range c.TestOrder { + if step == StepResize { + hasResize = true + } + if step == StepClone || step == StepRestoreFromSnapshot { + hasCloneOrRestore = true + } + } + if hasResize && c.PVCSizeAfterResize == "" { + return fmt.Errorf("PVC size after resize is required when resize step is enabled") + } + if hasCloneOrRestore && c.PVCSizeAfterResizeStage2 == "" { + return fmt.Errorf("PVC size after resize stage2 is required when clone/restore steps are enabled") + } + } + + return nil +} + +// Run executes the stress test +func (r *StressTestRunner) Run(ctx context.Context) error { + if err := r.config.Validate(); err != nil { + return fmt.Errorf("invalid configuration: %w", err) + } + + // Ensure namespace exists + nsClient, err := core.NewNamespaceClient(r.restConfig) + if err != nil { + return fmt.Errorf("failed to create namespace client: %w", err) + } + _, err = nsClient.Get(ctx, r.config.Namespace) + if err != nil { + _, err = nsClient.Create(ctx, r.config.Namespace) + if err != nil { + return fmt.Errorf("failed to create namespace: %w", err) + } + } + + // Label namespace + _, err = r.kubeClient.CoreV1().Namespaces().Patch(ctx, r.config.Namespace, types.JSONPatchType, []byte(`[{"op": "add", "path": "/metadata/labels/load-test", "value": "true"}]`), metav1.PatchOptions{}) + if err != nil { + // Ignore if label already exists + } + + switch r.config.Mode { + case ModeFlog: + return r.runFlogMode(ctx) + case ModeCheckFSOnly: + return r.runCheckFSOnlyMode(ctx) + case ModeCheckCloning: + return r.runCheckCloningMode(ctx) + case ModeCheckRestoringFromSnapshot: + return r.runCheckRestoringFromSnapshotMode(ctx) + case ModeSnapshotResizeCloning: + return r.runSnapshotResizeCloningMode(ctx) + default: + return fmt.Errorf("unknown mode: %s", r.config.Mode) + } +} + +// createOriginalPodAndPVC creates a pod and PVC for snapshot_resize_cloning mode +func (r *StressTestRunner) createOriginalPodAndPVC(ctx context.Context, index int) error { + pvcName := fmt.Sprintf("pvc-test-%d", index) + podName := fmt.Sprintf("pod-test-%d", index) + + // Create PVC + pvc := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + Labels: map[string]string{ + "load-test": "true", + "load-test-role": "original", + }, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse(r.config.PVCSize), + }, + }, + StorageClassName: &r.config.StorageClassName, + VolumeMode: func() *corev1.PersistentVolumeMode { v := corev1.PersistentVolumeFilesystem; return &v }(), + }, + } + + _, err := r.pvcClient.Create(ctx, r.config.Namespace, pvc) + if err != nil { + return fmt.Errorf("failed to create PVC %s: %w", pvcName, err) + } + + // Create Pod with data preloader and writer + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + Labels: map[string]string{ + "load-test": "true", + "load-test-role": "original", + }, + }, + Spec: corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyNever, + SchedulerName: r.config.SchedulerName, + Affinity: &corev1.Affinity{ + PodAntiAffinity: &corev1.PodAntiAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{ + { + Weight: 100, + PodAffinityTerm: corev1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "load-test", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"true"}, + }, + }, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + }, + }, + }, + Tolerations: []corev1.Toleration{ + { + Key: "node-role.kubernetes.io/control-plane", + }, + }, + InitContainers: []corev1.Container{ + { + Name: "data-preloader", + Image: "alpine", + Command: []string{"/bin/sh", "-c"}, + Args: []string{ + `set -e +dir="/usr/share/test-data" +mkdir -p "$dir" +echo "Preloading 5 files..." +for i in 1 2 3 4 5; do + fname="preload_file_${i}" + blocks=$(( ($RANDOM % 5120) + 1 )) + dd if=/dev/urandom of="${dir}/${fname}" bs=1024 count=${blocks} conv=fsync status=none || exit 1 + tmp_sum="${dir}/${fname}.sha256.tmp" + sha256sum "${dir}/${fname}" > "${tmp_sum}" || exit 1 + sync "${tmp_sum}" || true + mv "${tmp_sum}" "${dir}/${fname}.sha256" + echo "Created ${fname} (${blocks}KB)" +done`, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "data-volume", + MountPath: "/usr/share/test-data", + }, + }, + }, + }, + Containers: []corev1.Container{ + { + Name: "data-writer", + Image: "alpine", + Command: []string{"/bin/sh", "-c"}, + Args: []string{ + `set -e +dir="/usr/share/test-data" +for sumf in "$dir"/*.sha256; do + [ -e "$sumf" ] || continue + case "$sumf" in *.tmp) continue ;; esac + [ -s "$sumf" ] || continue + sha256sum -c "$sumf" +done +echo "Data check passed" +trap 'exit 0' TERM INT +while true; do + blocks=$(( ($RANDOM % 5120) + 1 )) + fname="file_${RANDOM}_$(date +%s%N)" + if dd if=/dev/urandom of="${dir}/${fname}" bs=1024 count=${blocks} conv=fsync status=none 2>/dev/null; then + tmp_sum="${dir}/${fname}.sha256.tmp" + if sha256sum "${dir}/${fname}" > "${tmp_sum}" 2>/dev/null; then + sync "${tmp_sum}" 2>/dev/null || true + if [ -s "${tmp_sum}" ]; then + mv "${tmp_sum}" "${dir}/${fname}.sha256" + else + rm -f "${tmp_sum}" 2>/dev/null || true + fi + else + rm -f "${tmp_sum}" 2>/dev/null || true + fi + fi + sleep 1 +done`, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "data-volume", + MountPath: "/usr/share/test-data", + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "data-volume", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: pvcName, + }, + }, + }, + }, + }, + } + + _, err = r.kubeClient.CoreV1().Pods(r.config.Namespace).Create(ctx, pod, metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("failed to create pod %s: %w", podName, err) + } + + return nil +} + +// createFlogPodAndPVC creates a pod and PVC for flog mode +func (r *StressTestRunner) createFlogPodAndPVC(ctx context.Context, index int, firstStart bool) error { + pvcName := fmt.Sprintf("pvc-test-%d", index) + podName := fmt.Sprintf("pod-test-%d", index) + + // Create PVC + pvc := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + Labels: map[string]string{ + "load-test": "true", + }, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse(r.config.PVCSize), + }, + }, + StorageClassName: &r.config.StorageClassName, + VolumeMode: func() *corev1.PersistentVolumeMode { v := corev1.PersistentVolumeFilesystem; return &v }(), + }, + } + + _, err := r.pvcClient.Create(ctx, r.config.Namespace, pvc) + if err != nil { + return fmt.Errorf("failed to create PVC %s: %w", pvcName, err) + } + + firstStartStr := "false" + if firstStart { + firstStartStr = "true" + } + + // Create Pod + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + Labels: map[string]string{ + "load-test": "true", + }, + }, + Spec: corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyNever, + SchedulerName: r.config.SchedulerName, + Affinity: &corev1.Affinity{ + PodAntiAffinity: &corev1.PodAntiAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{ + { + Weight: 100, + PodAffinityTerm: corev1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "load-test", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"true"}, + }, + }, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + }, + }, + }, + Tolerations: []corev1.Toleration{ + { + Key: "node-role.kubernetes.io/control-plane", + }, + }, + Containers: []corev1.Container{ + { + Name: "flog-generator", + Image: "ex42zav/flog:0.4.3", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("10m"), + corev1.ResourceMemory: resource.MustParse("20Mi"), + }, + }, + Command: []string{"/bin/sh", "-c"}, + Args: []string{ + `echo "Starting flog generator..." +ls -A /var/log/flog +folder_files=$(ls -A /var/log/flog 2>/dev/null | grep -v '^lost+found$') +echo "folder_files: $folder_files" +echo "FIRST_START: $FIRST_START" + +if [ -n "$folder_files" ] && [ "$FIRST_START" = "true" ]; then + echo "Error: leftover files found in /var/log/flog" >&2 + exit 1 +fi + +trap 'echo "Termination signal received, exiting..."; exit 0' TERM INT + +while true; do + /srv/flog/flog -b "${FLOG_BATCH_SIZE}" -f "${FLOG_LOG_FORMAT}" 2>&1 | tee -a /var/log/flog/fake.log + if ! touch /var/log/flog/fake.log; then + echo "Error: Unable to write to /var/log/flog/fake.log" >&2 + exit 1 + fi + sleep ${FLOG_TIME_INTERVAL} +done`, + }, + Env: []corev1.EnvVar{ + {Name: "FLOG_BATCH_SIZE", Value: "10700"}, + {Name: "FLOG_TIME_INTERVAL", Value: "1"}, + {Name: "FLOG_LOG_FORMAT", Value: "json"}, + {Name: "FIRST_START", Value: firstStartStr}, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "nginx-persistent-storage", + MountPath: "/var/log/flog", + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "nginx-persistent-storage", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: pvcName, + }, + }, + }, + }, + }, + } + + _, err = r.kubeClient.CoreV1().Pods(r.config.Namespace).Create(ctx, pod, metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("failed to create pod %s: %w", podName, err) + } + + return nil +} + +// waitForPodsStatus waits for pods to reach a specific status +func (r *StressTestRunner) waitForPodsStatus(ctx context.Context, labelSelector, status string, expectedCount int) error { + attempt := 0 + for { + pods, err := r.podClient.ListByLabelSelector(ctx, r.config.Namespace, labelSelector) + if err != nil { + return err + } + + readyCount := 0 + for _, pod := range pods.Items { + if string(pod.Status.Phase) == status || (status == "Completed" && pod.Status.Phase == corev1.PodSucceeded) { + readyCount++ + } + } + + if readyCount >= expectedCount { + return nil + } + + if readyCount > 0 { + attempt++ + } + + if r.config.MaxAttempts > 0 && attempt >= r.config.MaxAttempts { + return fmt.Errorf("timeout waiting for pods status %s: %d/%d after %d attempts", status, readyCount, expectedCount, r.config.MaxAttempts) + } + + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(r.config.Interval): + } + } +} + +// runFlogMode runs the flog mode test +func (r *StressTestRunner) runFlogMode(ctx context.Context) error { + iterations := (r.config.PodsCount + r.config.ParallelismCount - 1) / r.config.ParallelismCount + + for i := 0; i < iterations; i++ { + start := i*r.config.ParallelismCount + 1 + end := start + r.config.ParallelismCount - 1 + if end > r.config.PodsCount { + end = r.config.PodsCount + } + + for j := start; j <= end; j++ { + firstStart := (i == 0 && j == start) + if err := r.createFlogPodAndPVC(ctx, j, firstStart); err != nil { + return err + } + } + } + + // Wait for PVCs to be bound + if err := r.pvcClient.WaitForBound(ctx, r.config.Namespace, "load-test=true", r.config.PodsCount, r.config.MaxAttempts, r.config.Interval); err != nil { + return err + } + + // Wait for pods to be running + if err := r.waitForPodsStatus(ctx, "load-test=true", "Running", r.config.PodsCount); err != nil { + return err + } + + // Resize if configured + if r.config.PVCSizeAfterResize != "" { + pvcNames := make([]string, r.config.PodsCount) + for i := 1; i <= r.config.PodsCount; i++ { + pvcNames[i-1] = fmt.Sprintf("pvc-test-%d", i) + } + if err := r.pvcClient.ResizeList(ctx, r.config.Namespace, pvcNames, r.config.PVCSizeAfterResize); err != nil { + return err + } + if err := r.pvcClient.WaitForResize(ctx, r.config.Namespace, pvcNames, r.config.PVCSizeAfterResize, r.config.MaxAttempts, r.config.Interval); err != nil { + return err + } + } + + // Cleanup if requested + if r.config.Cleanup { + return r.cleanup(ctx) + } + + return nil +} + +// runCheckFSOnlyMode runs the check_fs_only mode test +func (r *StressTestRunner) runCheckFSOnlyMode(ctx context.Context) error { + // Similar to flog mode but with different pod spec + // Implementation would be similar to flog mode but with nginx container checking filesystem + return fmt.Errorf("check_fs_only mode not yet implemented") +} + +// runCheckCloningMode runs the check_cloning mode test +func (r *StressTestRunner) runCheckCloningMode(ctx context.Context) error { + // Create PVCs cloned from the specified PVC + // Implementation would create PVCs with dataSource pointing to r.config.PVCForCloning + return fmt.Errorf("check_cloning mode not yet implemented") +} + +// runCheckRestoringFromSnapshotMode runs the check_restoring_from_snapshot mode test +func (r *StressTestRunner) runCheckRestoringFromSnapshotMode(ctx context.Context) error { + // Create PVCs restored from the specified snapshot + // Implementation would create PVCs with dataSource pointing to r.config.SnapshotName + return fmt.Errorf("check_restoring_from_snapshot mode not yet implemented") +} + +// runSnapshotResizeCloningMode runs the snapshot_resize_cloning mode test +func (r *StressTestRunner) runSnapshotResizeCloningMode(ctx context.Context) error { + // Create original pods and PVCs + iterations := (r.config.PodsCount + r.config.ParallelismCount - 1) / r.config.ParallelismCount + + for i := 0; i < iterations; i++ { + start := i*r.config.ParallelismCount + 1 + end := start + r.config.ParallelismCount - 1 + if end > r.config.PodsCount { + end = r.config.PodsCount + } + + for j := start; j <= end; j++ { + if err := r.createOriginalPodAndPVC(ctx, j); err != nil { + return err + } + } + } + + // Wait for PVCs to be bound + if err := r.pvcClient.WaitForBound(ctx, r.config.Namespace, "load-test=true,load-test-role=original", r.config.PodsCount, r.config.MaxAttempts, r.config.Interval); err != nil { + return err + } + + // Wait for pods to be running + if err := r.waitForPodsStatus(ctx, "load-test=true,load-test-role=original", "Running", r.config.PodsCount); err != nil { + return err + } + + time.Sleep(5 * time.Second) + + var clonePVCNames []string + var restorePVCNames []string + originalPVCNames := make([]string, r.config.PodsCount) + for i := 1; i <= r.config.PodsCount; i++ { + originalPVCNames[i-1] = fmt.Sprintf("pvc-test-%d", i) + } + + // Execute test steps + for _, step := range r.config.TestOrder { + switch step { + case StepRestoreFromSnapshot: + if err := r.executeRestoreFromSnapshotStep(ctx, &restorePVCNames); err != nil { + return err + } + case StepResize: + if err := r.pvcClient.ResizeList(ctx, r.config.Namespace, originalPVCNames, r.config.PVCSizeAfterResize); err != nil { + return err + } + if err := r.pvcClient.WaitForResize(ctx, r.config.Namespace, originalPVCNames, r.config.PVCSizeAfterResize, r.config.MaxAttempts, r.config.Interval); err != nil { + return err + } + case StepClone: + if err := r.executeCloneStep(ctx, &clonePVCNames); err != nil { + return err + } + } + } + + // Stage 2: flog pods and resize for clones/restored + stage2PVCs := append(clonePVCNames, restorePVCNames...) + if len(stage2PVCs) > 0 { + if err := r.executeStage2(ctx, stage2PVCs); err != nil { + return err + } + } + + // Cleanup if requested + if r.config.Cleanup { + return r.cleanup(ctx) + } + + return nil +} + +// executeRestoreFromSnapshotStep executes the restore from snapshot step +func (r *StressTestRunner) executeRestoreFromSnapshotStep(ctx context.Context, restorePVCNames *[]string) error { + // Create snapshots + for batchStart := 1; batchStart <= r.config.PodsCount; batchStart += r.config.ParallelismCount { + batchEnd := batchStart + r.config.ParallelismCount - 1 + if batchEnd > r.config.PodsCount { + batchEnd = r.config.PodsCount + } + + for k := batchStart; k <= batchEnd; k++ { + for s := 1; s <= r.config.SnapshotsPerPVC; s++ { + snapshotName := fmt.Sprintf("snapshot-test-%d-%d", k, s) + if err := r.createVolumeSnapshot(ctx, k, snapshotName); err != nil { + return err + } + } + } + time.Sleep(5 * time.Second) + } + + totalSnapshots := r.config.PodsCount * r.config.SnapshotsPerPVC + if err := r.snapshotClient.WaitForReady(ctx, r.config.Namespace, "load-test-role=snapshot", totalSnapshots, r.config.MaxAttempts, r.config.Interval); err != nil { + return err + } + + // Create restore PVCs and pods + for k := 1; k <= r.config.PodsCount; k++ { + for s := 1; s <= r.config.SnapshotsPerPVC; s++ { + snapshotName := fmt.Sprintf("snapshot-test-%d-%d", k, s) + pvcName := fmt.Sprintf("pvc-test-%d-restore-%d", k, s) + podName := fmt.Sprintf("pod-test-%d-restore-%d", k, s) + if err := r.createRestorePodAndPVC(ctx, snapshotName, pvcName, podName); err != nil { + return err + } + *restorePVCNames = append(*restorePVCNames, pvcName) + } + } + + totalRestore := r.config.PodsCount * r.config.SnapshotsPerPVC + if err := r.pvcClient.WaitForBound(ctx, r.config.Namespace, "load-test=true,load-test-role=restore", totalRestore, r.config.MaxAttempts, r.config.Interval); err != nil { + return err + } + if err := r.waitForPodsStatus(ctx, "load-test=true,load-test-role=restore", "Completed", totalRestore); err != nil { + return err + } + + return nil +} + +// executeCloneStep executes the clone step +func (r *StressTestRunner) executeCloneStep(ctx context.Context, clonePVCNames *[]string) error { + for k := 1; k <= r.config.PodsCount; k++ { + // Get current size of original PVC + pvc, err := r.pvcClient.Get(ctx, r.config.Namespace, fmt.Sprintf("pvc-test-%d", k)) + if err != nil { + return err + } + currentSize := r.config.PVCSize + if pvc.Status.Capacity != nil { + if size, ok := pvc.Status.Capacity[corev1.ResourceStorage]; ok { + currentSize = size.String() + } + } + + pvcName := fmt.Sprintf("pvc-test-%d-clone", k) + podName := fmt.Sprintf("pod-test-%d-clone", k) + if err := r.createClonePodAndPVC(ctx, k, pvcName, podName, currentSize); err != nil { + return err + } + *clonePVCNames = append(*clonePVCNames, pvcName) + } + + if err := r.pvcClient.WaitForBound(ctx, r.config.Namespace, "load-test=true,load-test-role=clone", r.config.PodsCount, r.config.MaxAttempts, r.config.Interval); err != nil { + return err + } + if err := r.waitForPodsStatus(ctx, "load-test=true,load-test-role=clone", "Completed", r.config.PodsCount); err != nil { + return err + } + + return nil +} + +// executeStage2 executes stage 2: flog pods and resize +func (r *StressTestRunner) executeStage2(ctx context.Context, pvcNames []string) error { + // Create flog pods for each PVC + for _, pvcName := range pvcNames { + podName := fmt.Sprintf("%s-flog", pvcName) + role := "clone-flog" + if len(pvcName) > 8 && pvcName[len(pvcName)-8:] == "-restore" { + role = "restore-flog" + } + if err := r.createFlogPodForPVC(ctx, podName, pvcName, role); err != nil { + return err + } + } + + // Wait for pods to be running + cloneCount := 0 + restoreCount := 0 + for _, pvcName := range pvcNames { + if len(pvcName) > 6 && pvcName[len(pvcName)-6:] == "-clone" { + cloneCount++ + } else { + restoreCount++ + } + } + + if cloneCount > 0 { + if err := r.waitForPodsStatus(ctx, "load-test=true,load-test-role=clone-flog", "Running", cloneCount); err != nil { + return err + } + } + if restoreCount > 0 { + if err := r.waitForPodsStatus(ctx, "load-test=true,load-test-role=restore-flog", "Running", restoreCount); err != nil { + return err + } + } + + // Resize PVCs + if err := r.pvcClient.ResizeList(ctx, r.config.Namespace, pvcNames, r.config.PVCSizeAfterResizeStage2); err != nil { + return err + } + if err := r.pvcClient.WaitForResize(ctx, r.config.Namespace, pvcNames, r.config.PVCSizeAfterResizeStage2, r.config.MaxAttempts, r.config.Interval); err != nil { + return err + } + + return nil +} + +// createVolumeSnapshot creates a VolumeSnapshot +func (r *StressTestRunner) createVolumeSnapshot(ctx context.Context, pvcIndex int, snapshotName string) error { + pvcName := fmt.Sprintf("pvc-test-%d", pvcIndex) + snapshot := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "snapshot.storage.k8s.io/v1", + "kind": "VolumeSnapshot", + "metadata": map[string]interface{}{ + "name": snapshotName, + "labels": map[string]interface{}{ + "load-test": "true", + "load-test-role": "snapshot", + }, + }, + "spec": map[string]interface{}{ + "source": map[string]interface{}{ + "persistentVolumeClaimName": pvcName, + }, + }, + }, + } + + _, err := r.snapshotClient.Create(ctx, r.config.Namespace, snapshot) + return err +} + +// createRestorePodAndPVC creates a pod and PVC restored from a snapshot +func (r *StressTestRunner) createRestorePodAndPVC(ctx context.Context, snapshotName, pvcName, podName string) error { + // Create PVC + pvc := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + Labels: map[string]string{ + "load-test": "true", + "load-test-role": "restore", + }, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse(r.config.PVCSize), + }, + }, + StorageClassName: &r.config.StorageClassName, + DataSource: &corev1.TypedLocalObjectReference{ + APIGroup: func() *string { s := "snapshot.storage.k8s.io"; return &s }(), + Kind: "VolumeSnapshot", + Name: snapshotName, + }, + }, + } + + _, err := r.pvcClient.Create(ctx, r.config.Namespace, pvc) + if err != nil { + return fmt.Errorf("failed to create restore PVC %s: %w", pvcName, err) + } + + // Create Pod + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + Labels: map[string]string{ + "load-test": "true", + "load-test-role": "restore", + }, + }, + Spec: corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyNever, + SchedulerName: r.config.SchedulerName, + Tolerations: []corev1.Toleration{ + { + Key: "node-role.kubernetes.io/control-plane", + }, + }, + Containers: []corev1.Container{ + { + Name: "data-checker", + Image: "alpine", + Command: []string{"/bin/sh", "-c"}, + Args: []string{ + `set -e +dir="/usr/share/test-data" +echo "Listing directory contents:" +ls -lah "$dir" || true +checked=0 +skipped=0 +for sumf in "$dir"/*.sha256; do + [ -e "$sumf" ] || continue + case "$sumf" in *.tmp) skipped=$((skipped+1)); continue ;; esac + if [ ! -s "$sumf" ]; then + echo "SKIP empty checksum: $sumf" + skipped=$((skipped+1)) + continue + fi + sha256sum -c "$sumf" + checked=$((checked+1)) +done +echo "Data check passed (checked: $checked, skipped: $skipped)"`, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "data-volume", + MountPath: "/usr/share/test-data", + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "data-volume", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: pvcName, + }, + }, + }, + }, + }, + } + + _, err = r.kubeClient.CoreV1().Pods(r.config.Namespace).Create(ctx, pod, metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("failed to create restore pod %s: %w", podName, err) + } + + return nil +} + +// createClonePodAndPVC creates a pod and PVC cloned from an original PVC +func (r *StressTestRunner) createClonePodAndPVC(ctx context.Context, originalIndex int, pvcName, podName, cloneSize string) error { + originalPVCName := fmt.Sprintf("pvc-test-%d", originalIndex) + + // Create PVC + pvc := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + Labels: map[string]string{ + "load-test": "true", + "load-test-role": "clone", + }, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse(cloneSize), + }, + }, + StorageClassName: &r.config.StorageClassName, + DataSource: &corev1.TypedLocalObjectReference{ + Kind: "PersistentVolumeClaim", + Name: originalPVCName, + }, + }, + } + + _, err := r.pvcClient.Create(ctx, r.config.Namespace, pvc) + if err != nil { + return fmt.Errorf("failed to create clone PVC %s: %w", pvcName, err) + } + + // Create Pod (same as restore pod) + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + Labels: map[string]string{ + "load-test": "true", + "load-test-role": "clone", + }, + }, + Spec: corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyNever, + SchedulerName: r.config.SchedulerName, + Tolerations: []corev1.Toleration{ + { + Key: "node-role.kubernetes.io/control-plane", + }, + }, + Containers: []corev1.Container{ + { + Name: "data-checker", + Image: "alpine", + Command: []string{"/bin/sh", "-c"}, + Args: []string{ + `set -e +dir="/usr/share/test-data" +echo "Listing directory contents:" +ls -lah "$dir" || true +checked=0 +skipped=0 +for sumf in "$dir"/*.sha256; do + [ -e "$sumf" ] || continue + case "$sumf" in *.tmp) skipped=$((skipped+1)); continue ;; esac + if [ ! -s "$sumf" ]; then + echo "SKIP empty checksum: $sumf" + skipped=$((skipped+1)) + continue + fi + sha256sum -c "$sumf" + checked=$((checked+1)) +done +echo "Data check passed (checked: $checked, skipped: $skipped)"`, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "data-volume", + MountPath: "/usr/share/test-data", + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "data-volume", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: pvcName, + }, + }, + }, + }, + }, + } + + _, err = r.kubeClient.CoreV1().Pods(r.config.Namespace).Create(ctx, pod, metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("failed to create clone pod %s: %w", podName, err) + } + + return nil +} + +// createFlogPodForPVC creates a flog pod for an existing PVC +func (r *StressTestRunner) createFlogPodForPVC(ctx context.Context, podName, pvcName, role string) error { + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + Labels: map[string]string{ + "load-test": "true", + "load-test-role": role, + }, + }, + Spec: corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyNever, + SchedulerName: r.config.SchedulerName, + Tolerations: []corev1.Toleration{ + { + Key: "node-role.kubernetes.io/control-plane", + }, + }, + Containers: []corev1.Container{ + { + Name: "flog-generator", + Image: "ex42zav/flog:0.4.3", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("10m"), + corev1.ResourceMemory: resource.MustParse("20Mi"), + }, + }, + Command: []string{"/bin/sh", "-c"}, + Args: []string{ + `echo "Starting flog generator..." +trap 'echo "Termination signal received, exiting..."; exit 0' TERM INT +while true; do + /srv/flog/flog -b "${FLOG_BATCH_SIZE}" -f "${FLOG_LOG_FORMAT}" 2>&1 | tee -a /var/log/flog/fake.log + if ! touch /var/log/flog/fake.log; then + echo "Error: Unable to write to /var/log/flog/fake.log" >&2 + exit 1 + fi + sleep ${FLOG_TIME_INTERVAL} +done`, + }, + Env: []corev1.EnvVar{ + {Name: "FLOG_BATCH_SIZE", Value: "10700"}, + {Name: "FLOG_TIME_INTERVAL", Value: "1"}, + {Name: "FLOG_LOG_FORMAT", Value: "json"}, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "nginx-persistent-storage", + MountPath: "/var/log/flog", + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "nginx-persistent-storage", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: pvcName, + }, + }, + }, + }, + }, + } + + _, err := r.kubeClient.CoreV1().Pods(r.config.Namespace).Create(ctx, pod, metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("failed to create flog pod %s: %w", podName, err) + } + + return nil +} + +// cleanup cleans up all resources created during the test +func (r *StressTestRunner) cleanup(ctx context.Context) error { + // Delete pods + if err := r.kubeClient.CoreV1().Pods(r.config.Namespace).DeleteCollection(ctx, metav1.DeleteOptions{}, metav1.ListOptions{ + LabelSelector: "load-test=true", + }); err != nil { + return fmt.Errorf("failed to delete pods: %w", err) + } + + // Delete PVCs + if err := r.pvcClient.DeleteByLabelSelector(ctx, r.config.Namespace, "load-test=true"); err != nil { + return fmt.Errorf("failed to delete PVCs: %w", err) + } + + // Delete VolumeSnapshots + if err := r.snapshotClient.DeleteByLabelSelector(ctx, r.config.Namespace, "load-test=true"); err != nil { + return fmt.Errorf("failed to delete VolumeSnapshots: %w", err) + } + + // Wait for deletion + if err := r.pvcClient.WaitForDeletion(ctx, r.config.Namespace, "load-test=true", r.config.MaxAttempts, r.config.Interval); err != nil { + return err + } + + // Delete namespace if requested + if r.config.DeleteNamespace && r.config.Namespace != "default" { + if err := r.kubeClient.CoreV1().Namespaces().Delete(ctx, r.config.Namespace, metav1.DeleteOptions{}); err != nil { + return fmt.Errorf("failed to delete namespace: %w", err) + } + } + + return nil +} diff --git a/tests/cluster-creation-by-steps/cluster_creation_test.go b/tests/cluster-creation-by-steps/cluster_creation_test.go index 61c9ba8..28dedeb 100644 --- a/tests/cluster-creation-by-steps/cluster_creation_test.go +++ b/tests/cluster-creation-by-steps/cluster_creation_test.go @@ -313,17 +313,10 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { // Step 9: Establish SSH connection to setup node through base cluster master (jump host) It("should establish SSH connection to setup node through base cluster master", func() { - By("Stopping current SSH tunnel to base cluster", func() { - if tunnelinfo != nil && tunnelinfo.StopFunc != nil { - GinkgoWriter.Printf(" ▶️ Stopping SSH tunnel on local port %d...\n", tunnelinfo.LocalPort) - err := tunnelinfo.StopFunc() - Expect(err).NotTo(HaveOccurred()) - GinkgoWriter.Printf(" ✅ SSH tunnel stopped successfully\n") - tunnelinfo = nil - } - }) - By("Obtaining SSH client to setup node through base cluster master", func() { + // Note: We don't need to stop the base cluster tunnel here. + // Jump host clients are just SSH connections and don't require port forwarding. + // The base cluster tunnel can stay active for virtClient operations. ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() @@ -371,9 +364,9 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { namespace := config.TestClusterNamespace // Get IPs for all VMs (masters, workers, and setup node) + // Note: vmResources.VMNames already includes the setup VM, so we don't need to append it var vmIPs []string - allVMNames := append([]string{}, vmResources.VMNames...) - allVMNames = append(allVMNames, vmResources.SetupVMName) + allVMNames := vmResources.VMNames GinkgoWriter.Printf(" ▶️ Getting IP addresses for all VMs\n") for _, vmName := range allVMNames { @@ -453,14 +446,25 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { namespace := config.TestClusterNamespace firstMasterHostname := clusterDefinition.Masters[0].Hostname - // Get master IP address + // Get master IP address (base cluster tunnel should still be active from Step 5) masterIP, err := cluster.GetVMIPAddress(ctx, virtClient, namespace, firstMasterHostname) Expect(err).NotTo(HaveOccurred(), "Failed to get IP address for master node %s", firstMasterHostname) Expect(masterIP).NotTo(BeEmpty(), "Master node %s IP address should not be empty", firstMasterHostname) GinkgoWriter.Printf(" ▶️ Verifying cluster readiness for master %s (%s)\n", firstMasterHostname, masterIP) - // Step 1: Establish connection to test cluster (tunnel and kubeconfig) + // Step 1: Stop base cluster tunnel before creating test cluster tunnel + // Both tunnels use port 6445, so we can't have both active at the same time + if tunnelinfo != nil && tunnelinfo.StopFunc != nil { + GinkgoWriter.Printf(" ▶️ Stopping base cluster SSH tunnel (port 6445 needed for test cluster tunnel)...\n") + err := tunnelinfo.StopFunc() + Expect(err).NotTo(HaveOccurred(), "Failed to stop base cluster SSH tunnel") + tunnelinfo = nil + GinkgoWriter.Printf(" ✅ Base cluster SSH tunnel stopped successfully\n") + } + + // Step 2: Establish connection to test cluster (tunnel and kubeconfig) + // This will create a new tunnel on port 6445 to the test cluster testClusterResources, err := cluster.ConnectToCluster(ctx, sshclient, masterIP) Expect(err).NotTo(HaveOccurred(), "Failed to establish connection to test cluster") Expect(testClusterResources).NotTo(BeNil()) From 206885214a74094e3c923347c367dab353e31933 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Fri, 19 Dec 2025 12:22:51 +0300 Subject: [PATCH 31/48] Removed 3par... --- 3par_diagnostic_guide.md | 246 --------------------------------------- 1 file changed, 246 deletions(-) delete mode 100644 3par_diagnostic_guide.md diff --git a/3par_diagnostic_guide.md b/3par_diagnostic_guide.md deleted file mode 100644 index 119ea74..0000000 --- a/3par_diagnostic_guide.md +++ /dev/null @@ -1,246 +0,0 @@ -# HPE 3PAR SAN Diagnostic Guide - -## Current System Status Analysis - -Based on your `showversion` and `showsys` output: - -**System Information:** -- Model: HPE 3PAR 8400 -- Serial: CZ3811SC9P -- Version: 3.3.1 (MU3) with patches P50-P128 -- Nodes: 2 nodes (Master: Node 0) -- Cluster LED: **Off** (this is normal when system is healthy) - -**Capacity Status:** -- Total Capacity: 14,639,104 MB (~14.6 TB) -- Allocated: 4,809,728 MB (~4.8 TB) -- Free: 9,829,376 MB (~9.8 TB) -- Failed: 0 MB ✅ (Good - no failed capacity) - -**Initial Assessment:** The system appears healthy based on basic metrics (no failed capacity, proper node count). - -## Diagnostic Commands to Run - -Run these commands in order to get a complete health picture: - -### 1. System Health and Status -```bash -# Overall system health -showsys -health - -# Detailed system status -showsys -d - -# System statistics -showsys -space - -# Check for any alerts or events -showeventlog -min 60 # Last 60 minutes -showeventlog -alert # All alerts -``` - -### 2. Node Status -```bash -# Check node health -shownode -d - -# Node statistics -shownode -stat - -# Check if all nodes are up and healthy -shownode -state -``` - -### 3. Disk and Cage Status -```bash -# Check all physical disks -showpd -d - -# Check for failed or degraded disks -showpd -failed -showpd -degraded - -# Check disk cage status -showcage -d - -# Check disk statistics -showpd -stat -``` - -### 4. CPG (Common Provisioning Group) Status -```bash -# List all CPGs -showcpg -d - -# Check CPG space usage -showcpg -space - -# Check CPG growth history -showcpg -hist -``` - -### 5. Volume Status -```bash -# List all volumes -showvv -d - -# Check volume statistics -showvv -stat - -# Check for degraded volumes -showvv -degraded - -# Check volume space usage -showvv -space -``` - -### 6. Network and Port Status -```bash -# Check FC/iSCSI ports -showport -d - -# Check port statistics -showport -stat - -# Check for failed ports -showport -failed -``` - -### 7. Service Processor (SP) Status -```bash -# Check service processor status -showsp -d - -# Check SP network connectivity -showsp -net -``` - -### 8. Performance and Statistics -```bash -# System statistics -statcpu -statport -statpd -statvv -statrcopy - -# Real-time statistics (run for 30-60 seconds) -statcpu -iter 1 -rw 1 -statport -iter 1 -rw 1 -``` - -### 9. Replication Status (if configured) -```bash -# Check remote copy status -showrcopy -d - -# Check remote copy groups -showrcopygroup -d -``` - -### 10. Check for Known Issues -```bash -# Check for any service issues -showservice -d - -# Check license status -showlicense - -# Check firmware versions -showversion -d -``` - -## Key Health Indicators to Check - -### ✅ Healthy Indicators: -- All nodes show "UP" status -- No failed physical disks (showpd -failed returns empty) -- No degraded volumes (showvv -degraded returns empty) -- All ports show "Ready" status -- No critical alerts in event log -- Cluster LED is Off (normal) -- Failed capacity is 0 MB - -### ⚠️ Warning Indicators: -- Any node showing "DOWN" or "DEGRADED" -- Failed or degraded physical disks -- Degraded volumes -- Ports showing "FAILED" or "DEGRADED" -- Critical alerts in event log -- High latency in statistics -- Unusual capacity growth patterns - -### 🔴 Critical Issues: -- Multiple nodes down -- Multiple failed disks -- System unresponsive -- Data unavailability -- Replication failures (if configured) - -## Quick Health Check Script - -Save this as a script and run it: - -```bash -#!/bin/bash -echo "=== 3PAR Health Check ===" -echo "" -echo "1. System Health:" -showsys -health -echo "" -echo "2. Node Status:" -shownode -d -echo "" -echo "3. Failed Disks:" -showpd -failed -echo "" -echo "4. Degraded Volumes:" -showvv -degraded -echo "" -echo "5. Recent Alerts (last hour):" -showeventlog -min 60 -alert -echo "" -echo "6. Port Status:" -showport -d | grep -E "(Port|State|Status)" -echo "" -echo "=== Health Check Complete ===" -``` - -## Next Steps - -1. **Run the diagnostic commands** above and collect output -2. **Review the event log** for any recent errors or warnings -3. **Check performance statistics** if you're experiencing performance issues -4. **Verify backups/replication** if configured -5. **Document baseline metrics** for future comparison - -## Common Issues and Solutions - -### Issue: High Latency -- Check port statistics: `statport` -- Check disk statistics: `statpd` -- Review CPG layout and disk types - -### Issue: Capacity Warnings -- Check CPG growth: `showcpg -hist` -- Review volume space: `showvv -space` -- Consider expanding or cleaning up unused volumes - -### Issue: Node Degradation -- Check node status: `shownode -d` -- Review event log: `showeventlog` -- May require node replacement or service call - -### Issue: Disk Failures -- Check failed disks: `showpd -failed` -- Verify spare disks are available -- Plan for disk replacement - -## Support Information - -If issues are found: -- Collect all diagnostic output -- Note the exact error messages -- Check HPE support portal for known issues -- Contact HPE support with system serial: CZ3811SC9P - From 21f4d5507e23977b409c264e0c0cc1278e2358b6 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Mon, 22 Dec 2025 12:16:04 +0300 Subject: [PATCH 32/48] Cluster is now deployed and checked. --- README.md | 23 ++- internal/infrastructure/ssh/client.go | 18 +- pkg/cluster/cluster.go | 168 ++++++++++++---- .../cluster_creation_test.go | 180 ++++++++++-------- 4 files changed, 258 insertions(+), 131 deletions(-) diff --git a/README.md b/README.md index ab5d2ca..90a7296 100644 --- a/README.md +++ b/README.md @@ -10,18 +10,21 @@ High-level test that creates a complete test cluster from a YAML configuration f ### cluster-creation-by-steps Step-by-step test that creates a test cluster incrementally, validating each stage: +**Setup (BeforeAll):** 1. Environment validation - Validates required environment variables are set 2. Cluster configuration loading - Loads and parses cluster definition from YAML file -3. SSH connection establishment to base cluster - Connects to base cluster via SSH -4. Kubeconfig retrieval from base cluster - Fetches kubeconfig file from base cluster -5. SSH tunnel setup with port forwarding - Establishes tunnel to access Kubernetes API -6. Virtualization module readiness check - Verifies virtualization module is Ready -7. Test namespace creation - Creates test namespace if it doesn't exist -8. Virtual machine creation and provisioning - Creates VMs and waits for them to become Running -9. SSH connection establishment to setup node (through base cluster master) - Connects to setup node via jump host -10. Docker installation on setup node - Installs Docker (required for DKP bootstrap) -11. Bootstrap configuration preparation - Prepares bootstrap config from template with cluster-specific values -12. Bootstrap files upload (private key and config.yml) to setup node - Uploads files needed for DKP bootstrap + +**Test Steps:** +1. Connect to base cluster - Establishes SSH connection, retrieves kubeconfig, and sets up port forwarding tunnel +2. Virtualization module readiness check - Verifies virtualization module is Ready +3. Test namespace creation - Creates test namespace if it doesn't exist +4. Virtual machine creation and provisioning - Creates VMs and waits for them to become Running +5. SSH connection establishment to setup node (through base cluster master) - Connects to setup node via jump host +6. Docker installation on setup node - Installs Docker (required for DKP bootstrap) +7. Bootstrap configuration preparation - Prepares bootstrap config from template with cluster-specific values +8. Bootstrap files upload (private key and config.yml) to setup node - Uploads files needed for DKP bootstrap +9. Cluster bootstrap - Bootstraps Kubernetes cluster from setup node to first master node +10. Cluster readiness verification - Verifies cluster is ready by checking deckhouse deployment ## Environment Variables diff --git a/internal/infrastructure/ssh/client.go b/internal/infrastructure/ssh/client.go index 037bf9a..5a74faf 100644 --- a/internal/infrastructure/ssh/client.go +++ b/internal/infrastructure/ssh/client.go @@ -18,6 +18,7 @@ package ssh import ( "context" + "errors" "fmt" "io" "net" @@ -404,7 +405,12 @@ func (c *client) Close() error { c.keepaliveWg.Wait() } if c.sshClient != nil { - return c.sshClient.Close() + err := c.sshClient.Close() + // Ignore EOF errors - they just mean the connection was already closed + if err != nil && (errors.Is(err, io.EOF) || strings.Contains(err.Error(), "EOF")) { + return nil + } + return err } return nil } @@ -665,12 +671,18 @@ func (c *jumpHostClient) Close() error { var errs []error if c.targetClient != nil { if err := c.targetClient.Close(); err != nil { - errs = append(errs, err) + // Ignore EOF errors - they just mean the connection was already closed + if !errors.Is(err, io.EOF) && !strings.Contains(err.Error(), "EOF") { + errs = append(errs, err) + } } } if c.jumpClient != nil { if err := c.jumpClient.Close(); err != nil { - errs = append(errs, err) + // Ignore EOF errors - they just mean the connection was already closed + if !errors.Is(err, io.EOF) && !strings.Contains(err.Error(), "EOF") { + errs = append(errs, err) + } } } if len(errs) > 0 { diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 2f95050..bb528c1 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -128,7 +128,7 @@ func CleanupTestCluster(resources *TestClusterResources) error { return nil } -// CheckClusterHealth checks if the deckhouse deployment pod is running with 2/2 ready replicas +// CheckClusterHealth checks if the deckhouse deployment has 1 pod running with 2/2 containers ready // in the d8-system namespace. This function is widely used to check cluster health after certain steps. func CheckClusterHealth(ctx context.Context, kubeconfig *rest.Config) error { namespace := "d8-system" @@ -146,9 +146,9 @@ func CheckClusterHealth(ctx context.Context, kubeconfig *rest.Config) error { return fmt.Errorf("failed to get deployment %s/%s: %w", namespace, deploymentName, err) } - // Check if deployment has 2 ready replicas - if deployment.Status.ReadyReplicas != 2 { - return fmt.Errorf("deployment %s/%s has %d ready replicas, expected 2", namespace, deploymentName, deployment.Status.ReadyReplicas) + // Check if deployment has 1 ready replica (1 pod) + if deployment.Status.ReadyReplicas != 1 { + return fmt.Errorf("deployment %s/%s has %d ready replicas, expected 1", namespace, deploymentName, deployment.Status.ReadyReplicas) } // Create pod client @@ -164,66 +164,154 @@ func CheckClusterHealth(ctx context.Context, kubeconfig *rest.Config) error { return fmt.Errorf("failed to list pods for deployment %s/%s: %w", namespace, deploymentName, err) } - // Check that we have exactly 2 pods and both are running + // Check that we have exactly 1 pod if len(pods.Items) != 1 { - return fmt.Errorf("expected 1 pods for deployment %s/%s, found %d", namespace, deploymentName, len(pods.Items)) + return fmt.Errorf("expected 1 pod for deployment %s/%s, found %d", namespace, deploymentName, len(pods.Items)) } - // Check each pod is running and all containers are ready - for _, pod := range pods.Items { - if !podClient.IsRunning(ctx, &pod) { - return fmt.Errorf("pod %s/%s is not running (phase: %s)", namespace, pod.Name, pod.Status.Phase) - } + // Check the pod is running and has 2/2 containers ready + pod := pods.Items[0] + if !podClient.IsRunning(ctx, &pod) { + return fmt.Errorf("pod %s/%s is not running (phase: %s)", namespace, pod.Name, pod.Status.Phase) + } - if !podClient.AllContainersReady(ctx, &pod) { - return fmt.Errorf("pod %s/%s does not have all containers ready", namespace, pod.Name) - } + // Verify the pod has exactly 2 containers + if len(pod.Spec.Containers) != 2 { + return fmt.Errorf("pod %s/%s has %d containers, expected 2", namespace, pod.Name, len(pod.Spec.Containers)) + } + + // Check all containers are ready + if !podClient.AllContainersReady(ctx, &pod) { + return fmt.Errorf("pod %s/%s does not have all containers ready (expected 2/2 containers ready)", namespace, pod.Name) } return nil } -// ConnectToCluster establishes SSH connection to the test cluster master through the base cluster master, +// ConnectClusterOptions defines options for connecting to a cluster +type ConnectClusterOptions struct { + // Direct connection parameters (used when UseJumpHost is false) + SSHUser string + SSHHost string + SSHKeyPath string + + // Jump host parameters (used when UseJumpHost is true) + UseJumpHost bool + JumpHostUser string // Optional: defaults to SSHUser if empty + JumpHostHost string // Optional: defaults to SSHHost if empty + JumpHostKeyPath string // Optional: defaults to SSHKeyPath if empty + TargetUser string // Required when UseJumpHost is true + TargetHost string // Required when UseJumpHost is true (IP or hostname) + TargetKeyPath string // Optional: defaults to SSHKeyPath if empty +} + +// ConnectToCluster establishes SSH connection to a cluster (base or test), // retrieves kubeconfig, and sets up port forwarding tunnel. -// The SSH tunnel remains active after this function returns (it's stored in the returned resources). -// Returns the test cluster resources including the tunnel that must be kept alive. -// Note: This function does NOT check cluster health - use CheckClusterHealth() for that. -func ConnectToCluster(ctx context.Context, baseSSHClient ssh.SSHClient, testClusterMasterIP string) (*TestClusterResources, error) { - if baseSSHClient == nil { - return nil, fmt.Errorf("baseSSHClient cannot be nil") +func ConnectToCluster(ctx context.Context, opts ConnectClusterOptions) (*TestClusterResources, error) { + // Validate required parameters + if opts.SSHUser == "" { + return nil, fmt.Errorf("SSHUser cannot be empty") + } + if opts.SSHHost == "" { + return nil, fmt.Errorf("SSHHost cannot be empty") } - if testClusterMasterIP == "" { - return nil, fmt.Errorf("testClusterMasterIP cannot be empty") + if opts.SSHKeyPath == "" { + return nil, fmt.Errorf("SSHKeyPath cannot be empty") } - // Step 1: Create SSH client to test cluster master through base cluster master (jump host) - testSSHClient, err := ssh.NewClientWithJumpHost( - config.SSHUser, config.SSHHost, config.SSHKeyPath, // jump host (base cluster master) - config.VMSSHUser, testClusterMasterIP, config.SSHKeyPath, // target (test cluster master) - ) - if err != nil { - return nil, fmt.Errorf("failed to create SSH client to test cluster master: %w", err) + var sshClient ssh.SSHClient + var masterHost string // Host/IP to use for kubeconfig retrieval + var masterUser string // User to use for kubeconfig retrieval + + if opts.UseJumpHost { + // Validate jump host parameters + if opts.TargetHost == "" { + return nil, fmt.Errorf("TargetHost is required when UseJumpHost is true") + } + if opts.TargetUser == "" { + return nil, fmt.Errorf("TargetUser is required when UseJumpHost is true") + } + + // Set defaults for jump host parameters + jumpHostUser := opts.JumpHostUser + if jumpHostUser == "" { + jumpHostUser = opts.SSHUser + } + jumpHostHost := opts.JumpHostHost + if jumpHostHost == "" { + jumpHostHost = opts.SSHHost + } + jumpHostKeyPath := opts.JumpHostKeyPath + if jumpHostKeyPath == "" { + jumpHostKeyPath = opts.SSHKeyPath + } + targetKeyPath := opts.TargetKeyPath + if targetKeyPath == "" { + targetKeyPath = opts.SSHKeyPath + } + + // Create SSH client with jump host (retry with exponential backoff) + maxRetries := 3 + retryDelay := 2 * time.Second + var lastErr error + for attempt := 0; attempt < maxRetries; attempt++ { + if attempt > 0 { + // Wait before retry (exponential backoff) + select { + case <-ctx.Done(): + return nil, fmt.Errorf("context cancelled while retrying SSH connection: %w", ctx.Err()) + case <-time.After(retryDelay): + } + retryDelay *= 2 // Exponential backoff + } + + sshClient, lastErr = ssh.NewClientWithJumpHost( + jumpHostUser, jumpHostHost, jumpHostKeyPath, // jump host + opts.TargetUser, opts.TargetHost, targetKeyPath, // target + ) + if lastErr == nil { + break // Success + } + } + if lastErr != nil { + return nil, fmt.Errorf("failed to create SSH client with jump host after %d attempts: %w", maxRetries, lastErr) + } + + masterHost = opts.TargetHost + masterUser = opts.TargetUser + } else { + // Direct connection (no jump host) + var err error + sshClient, err = ssh.NewClient(opts.SSHUser, opts.SSHHost, opts.SSHKeyPath) + if err != nil { + return nil, fmt.Errorf("failed to create SSH client: %w", err) + } + + masterHost = opts.SSHHost + masterUser = opts.SSHUser } // Step 2: Establish SSH tunnel with port forwarding 6445:127.0.0.1:6445 - tunnelInfo, err := ssh.EstablishSSHTunnel(ctx, testSSHClient, "6445") + // Use context.Background() for the tunnel so it persists after the function returns + // The tunnel must remain active for subsequent operations + tunnelInfo, err := ssh.EstablishSSHTunnel(context.Background(), sshClient, "6445") if err != nil { - testSSHClient.Close() - return nil, fmt.Errorf("failed to establish SSH tunnel to test cluster: %w", err) + sshClient.Close() + return nil, fmt.Errorf("failed to establish SSH tunnel: %w", err) } - // Step 3: Get kubeconfig from test cluster master - _, kubeconfigPath, err := internalcluster.GetKubeconfig(ctx, testClusterMasterIP, config.VMSSHUser, config.SSHKeyPath, testSSHClient) + // Step 3: Get kubeconfig from cluster master + _, kubeconfigPath, err := internalcluster.GetKubeconfig(ctx, masterHost, masterUser, opts.SSHKeyPath, sshClient) if err != nil { tunnelInfo.StopFunc() - testSSHClient.Close() - return nil, fmt.Errorf("failed to get kubeconfig from test cluster: %w", err) + sshClient.Close() + return nil, fmt.Errorf("failed to get kubeconfig: %w", err) } // Step 4: Update kubeconfig to use the tunnel port (6445) if err := internalcluster.UpdateKubeconfigPort(kubeconfigPath, tunnelInfo.LocalPort); err != nil { tunnelInfo.StopFunc() - testSSHClient.Close() + sshClient.Close() return nil, fmt.Errorf("failed to update kubeconfig port: %w", err) } @@ -231,14 +319,14 @@ func ConnectToCluster(ctx context.Context, baseSSHClient ssh.SSHClient, testClus kubeconfig, err := clientcmd.BuildConfigFromFlags("", kubeconfigPath) if err != nil { tunnelInfo.StopFunc() - testSSHClient.Close() + sshClient.Close() return nil, fmt.Errorf("failed to rebuild kubeconfig from file: %w", err) } // Return resources with active tunnel // Note: The test will use Eventually to check cluster health with CheckClusterHealth return &TestClusterResources{ - SSHClient: testSSHClient, + SSHClient: sshClient, Kubeconfig: kubeconfig, KubeconfigPath: kubeconfigPath, TunnelInfo: tunnelInfo, diff --git a/tests/cluster-creation-by-steps/cluster_creation_test.go b/tests/cluster-creation-by-steps/cluster_creation_test.go index 28dedeb..449a22f 100644 --- a/tests/cluster-creation-by-steps/cluster_creation_test.go +++ b/tests/cluster-creation-by-steps/cluster_creation_test.go @@ -41,17 +41,18 @@ import ( var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { var ( - err error - sshclient ssh.SSHClient - setupSSHClient ssh.SSHClient - kubeconfig *rest.Config - kubeconfigPath string - tunnelinfo *ssh.TunnelInfo - clusterDefinition *config.ClusterDefinition - module *deckhouse.Module - virtClient *virtualization.Client - vmResources *cluster.VMResources - bootstrapConfig string + err error + sshclient ssh.SSHClient + setupSSHClient ssh.SSHClient + kubeconfig *rest.Config + kubeconfigPath string + tunnelinfo *ssh.TunnelInfo + clusterDefinition *config.ClusterDefinition + module *deckhouse.Module + virtClient *virtualization.Client + vmResources *cluster.VMResources + bootstrapConfig string + testClusterResources *cluster.TestClusterResources ) BeforeAll(func() { @@ -115,14 +116,34 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { // DeferCleanup: Clean up all resources in reverse order of creation (it's a synonym for AfterAll) DeferCleanup(func() { - // Step 0: Re-establish SSH tunnel if needed for VM cleanup - // The tunnel might have been stopped in Step 9, but we need it for VM cleanup + // Step 0: Stop test cluster tunnel if it exists (it uses port 6445, blocking base cluster tunnel) + if testClusterResources != nil && testClusterResources.TunnelInfo != nil && testClusterResources.TunnelInfo.StopFunc != nil { + GinkgoWriter.Printf(" ▶️ Stopping test cluster SSH tunnel on local port %d...\n", testClusterResources.TunnelInfo.LocalPort) + err := testClusterResources.TunnelInfo.StopFunc() + if err != nil { + GinkgoWriter.Printf(" ⚠️ Warning: Failed to stop test cluster SSH tunnel: %v\n", err) + } else { + GinkgoWriter.Printf(" ✅ Test cluster SSH tunnel stopped successfully\n") + } + } + + // Step 0.5: Close test cluster SSH client + if testClusterResources != nil && testClusterResources.SSHClient != nil { + GinkgoWriter.Printf(" ▶️ Closing test cluster SSH client connection...\n") + err := testClusterResources.SSHClient.Close() + if err != nil { + GinkgoWriter.Printf(" ⚠️ Warning: Failed to close test cluster SSH client: %v\n", err) + } else { + GinkgoWriter.Printf(" ✅ Test cluster SSH client closed successfully\n") + } + } + + // Step 1: Re-establish SSH tunnel if needed for VM cleanup + // we need it for VM cleanup if tunnelinfo == nil && sshclient != nil { GinkgoWriter.Printf(" ▶️ Re-establishing SSH tunnel for VM cleanup...\n") - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) var tunnelErr error - tunnelinfo, tunnelErr = ssh.EstablishSSHTunnel(ctx, sshclient, "6445") - cancel() + tunnelinfo, tunnelErr = ssh.EstablishSSHTunnel(context.Background(), sshclient, "6445") if tunnelErr != nil { GinkgoWriter.Printf(" ⚠️ Warning: Failed to re-establish SSH tunnel: %v\n", tunnelErr) GinkgoWriter.Printf(" ⚠️ VM cleanup will be skipped due to missing tunnel\n") @@ -131,7 +152,18 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { } } - // Step 1: Cleanup setup VM (needs API access via SSH tunnel) + // Step 2: Close setup SSH client connection + if setupSSHClient != nil { + GinkgoWriter.Printf(" ▶️ Closing setup SSH client connection...\n") + err := setupSSHClient.Close() + if err != nil { + GinkgoWriter.Printf(" ⚠️ Warning: Failed to close setup SSH client: %v\n", err) + } else { + GinkgoWriter.Printf(" ✅ Setup SSH client closed successfully\n") + } + } + + // Step 3: Cleanup setup VM (needs API access via SSH tunnel, but not SSH client) vmRes := vmResources if vmRes != nil && vmRes.SetupVMName != "" { GinkgoWriter.Printf(" ▶️ Removing setup VM %s...\n", vmRes.SetupVMName) @@ -145,7 +177,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { } } - // Step 2: Cleanup test cluster VMs if enabled + // Step 4: Cleanup test cluster VMs if enabled if config.TestClusterCleanup == "true" || config.TestClusterCleanup == "True" { if vmRes != nil { GinkgoWriter.Printf(" ▶️ Cleaning up test cluster VMs...\n") @@ -160,18 +192,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { } } - // Step 3: Close setup SSH client connection (no longer needed after VM cleanup) - if setupSSHClient != nil { - GinkgoWriter.Printf(" ▶️ Closing setup SSH client connection...\n") - err := setupSSHClient.Close() - if err != nil { - GinkgoWriter.Printf(" ⚠️ Warning: Failed to close setup SSH client: %v\n", err) - } else { - GinkgoWriter.Printf(" ✅ Setup SSH client closed successfully\n") - } - } - - // Step 4: Stop base cluster SSH tunnel (must be done before closing SSH client) + // Step 5: Stop base cluster SSH tunnel (must be done before closing SSH client) if tunnelinfo != nil && tunnelinfo.StopFunc != nil { GinkgoWriter.Printf(" ▶️ Stopping base cluster SSH tunnel on local port %d...\n", tunnelinfo.LocalPort) err := tunnelinfo.StopFunc() @@ -182,7 +203,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { } } - // Step 5: Close base cluster SSH client connection + // Step 6: Close base cluster SSH client connection if sshclient != nil { GinkgoWriter.Printf(" ▶️ Closing base cluster SSH client connection...\n") err := sshclient.Close() @@ -196,44 +217,40 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) // BeforeAll - // TODO: Steps 3-5 can be joined into one using cluster.ConnectToCluster function - // Step 3: Establish SSH connection to base cluster (reused for getting kubeconfig) - It("should establish ssh connection to the base cluster", func() { - By(fmt.Sprintf("Connecting to %s@%s using key %s", config.SSHUser, config.SSHHost, config.SSHKeyPath), func() { - GinkgoWriter.Printf(" ▶️ Creating SSH client for %s@%s\n", config.SSHUser, config.SSHHost) - sshclient, err = ssh.NewClient(config.SSHUser, config.SSHHost, config.SSHKeyPath) - Expect(err).NotTo(HaveOccurred()) - GinkgoWriter.Printf(" ✅ SSH connection established successfully\n") - }) - }) + // ---=== TEST BEGIN ===--- - // Step 4: Getting kubeconfig from base cluster (reusing SSH connection to avoid double passphrase prompt) - It("should get kubeconfig from the base cluster", func() { - By("Retrieving kubeconfig from base cluster", func() { - GinkgoWriter.Printf(" ▶️ Fetching kubeconfig from %s\n", config.SSHHost) - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + // Step 1: Connect to base cluster (SSH connection, kubeconfig, and tunnel) + It("should connect to the base cluster", func() { + By(fmt.Sprintf("Connecting to base cluster %s@%s", config.SSHUser, config.SSHHost), func() { + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) defer cancel() - kubeconfig, kubeconfigPath, err = internalcluster.GetKubeconfig(ctx, config.SSHHost, config.SSHUser, config.SSHKeyPath, sshclient) - Expect(err).NotTo(HaveOccurred()) - GinkgoWriter.Printf(" ✅ Kubeconfig retrieved and saved to: %s\n", kubeconfigPath) - }) - }) - - // Step 5: Establish SSH tunnel with port forwarding to access Kubernetes API - It("should establish ssh tunnel to the base cluster with port forwarding", func() { - By("Setting up SSH tunnel with port forwarding", func() { - GinkgoWriter.Printf(" ▶️ Establishing SSH tunnel to %s, forwarding port 6445\n", config.SSHHost) - ctx := context.Background() - tunnelinfo, err = ssh.EstablishSSHTunnel(ctx, sshclient, "6445") - Expect(err).NotTo(HaveOccurred()) - Expect(tunnelinfo).NotTo(BeNil()) - Expect(tunnelinfo.LocalPort).To(Equal(6445), "Local port should be exactly 6445") - GinkgoWriter.Printf(" ✅ SSH tunnel established on local port: %d\n", tunnelinfo.LocalPort) + GinkgoWriter.Printf(" ▶️ Connecting to base cluster %s@%s\n", config.SSHUser, config.SSHHost) + baseClusterResources, err := cluster.ConnectToCluster(ctx, cluster.ConnectClusterOptions{ + SSHUser: config.SSHUser, + SSHHost: config.SSHHost, + SSHKeyPath: config.SSHKeyPath, + UseJumpHost: false, + }) + Expect(err).NotTo(HaveOccurred(), "Failed to connect to base cluster") + Expect(baseClusterResources).NotTo(BeNil()) + Expect(baseClusterResources.SSHClient).NotTo(BeNil()) + Expect(baseClusterResources.Kubeconfig).NotTo(BeNil()) + Expect(baseClusterResources.TunnelInfo).NotTo(BeNil()) + + // Extract resources for backward compatibility with rest of the test + sshclient = baseClusterResources.SSHClient + kubeconfig = baseClusterResources.Kubeconfig + kubeconfigPath = baseClusterResources.KubeconfigPath + tunnelinfo = baseClusterResources.TunnelInfo + + GinkgoWriter.Printf(" ✅ Base cluster connection established successfully\n") + GinkgoWriter.Printf(" ✅ Kubeconfig saved to: %s\n", kubeconfigPath) + GinkgoWriter.Printf(" ✅ SSH tunnel active on local port: %d\n", tunnelinfo.LocalPort) }) }) - // Step 6: Verify virtualization module is Ready before creating VMs + // Step 2: Verify virtualization module is Ready before creating VMs It("should make sure that virtualization module is Ready", func() { By("Checking if virtualization module is Ready", func() { GinkgoWriter.Printf(" ▶️ Getting module with timeout\n") @@ -247,7 +264,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) }) - // Step 7: Create test namespace if it doesn't exist + // Step 3: Create test namespace if it doesn't exist It("should ensure test namespace exists", func() { By("Checking and creating test namespace if needed", func() { ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) @@ -263,7 +280,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) }) - // Step 8: Create virtual machines and wait for them to become Running + // Step 4: Create virtual machines and wait for them to become Running It("should create virtual machines from cluster definition", func() { By("Creating virtual machines", func() { ctx, cancel := context.WithTimeout(context.Background(), 25*time.Minute) @@ -311,7 +328,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) }) - // Step 9: Establish SSH connection to setup node through base cluster master (jump host) + // Step 5: Establish SSH connection to setup node through base cluster master (jump host) It("should establish SSH connection to setup node through base cluster master", func() { By("Obtaining SSH client to setup node through base cluster master", func() { // Note: We don't need to stop the base cluster tunnel here. @@ -342,7 +359,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) }) - // Step 10: Install Docker on setup node (required for DKP bootstrap) + // Step 6: Install Docker on setup node (required for DKP bootstrap) It("should ensure Docker is installed on the setup node", func() { By("Installing Docker on setup node", func() { ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute) @@ -355,7 +372,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) }) - // Step 11: Prepare bootstrap configuration file from template with cluster-specific values + // Step 7: Prepare bootstrap configuration file from template with cluster-specific values It("should prepare bootstrap config for the setup node", func() { By("Preparing bootstrap config for the setup node", func() { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) @@ -390,7 +407,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) }) - // Step 12: Upload private key and config.yml to setup node for DKP bootstrap + // Step 8: Upload private key and config.yml to setup node for DKP bootstrap It("should upload bootstrap files to the setup node", func() { By("Uploading private key and config.yml to setup node", func() { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) @@ -414,7 +431,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) }) - // Step 13: Bootstrap cluster from setup node to first master node + // Step 9: Bootstrap cluster from setup node to first master node It("should bootstrap cluster from setup node to first master", func() { By("Bootstrapping cluster from setup node", func() { ctx, cancel := context.WithTimeout(context.Background(), 35*time.Minute) @@ -437,7 +454,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) }) - // Step 14: Verify cluster is ready + // Step 10: Verify cluster is ready It("should verify cluster is ready", func() { By("Verifying cluster is ready", func() { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) @@ -446,7 +463,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { namespace := config.TestClusterNamespace firstMasterHostname := clusterDefinition.Masters[0].Hostname - // Get master IP address (base cluster tunnel should still be active from Step 5) + // Get master IP address (base cluster tunnel should still be active from Step 1, stopped below) masterIP, err := cluster.GetVMIPAddress(ctx, virtClient, namespace, firstMasterHostname) Expect(err).NotTo(HaveOccurred(), "Failed to get IP address for master node %s", firstMasterHostname) Expect(masterIP).NotTo(BeEmpty(), "Master node %s IP address should not be empty", firstMasterHostname) @@ -463,9 +480,16 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { GinkgoWriter.Printf(" ✅ Base cluster SSH tunnel stopped successfully\n") } - // Step 2: Establish connection to test cluster (tunnel and kubeconfig) - // This will create a new tunnel on port 6445 to the test cluster - testClusterResources, err := cluster.ConnectToCluster(ctx, sshclient, masterIP) + GinkgoWriter.Printf(" ▶️ Connecting to test cluster master %s through jump host %s@%s\n", masterIP, config.SSHUser, config.SSHHost) + testClusterResources, err = cluster.ConnectToCluster(ctx, cluster.ConnectClusterOptions{ + SSHUser: config.SSHUser, + SSHHost: config.SSHHost, + SSHKeyPath: config.SSHKeyPath, + UseJumpHost: true, + TargetUser: config.VMSSHUser, + TargetHost: masterIP, + TargetKeyPath: config.SSHKeyPath, + }) Expect(err).NotTo(HaveOccurred(), "Failed to establish connection to test cluster") Expect(testClusterResources).NotTo(BeNil()) Expect(testClusterResources.TunnelInfo).NotTo(BeNil(), "Tunnel must remain active") @@ -474,13 +498,13 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { GinkgoWriter.Printf(" ✅ SSH tunnel active on local port: %d\n", testClusterResources.TunnelInfo.LocalPort) // Step 2: Check cluster health with Eventually (wait up to 10 minutes for deckhouse to be ready) - GinkgoWriter.Printf(" ⏱️ Waiting for deckhouse deployment to become ready (2/2 pods running)...\n") + GinkgoWriter.Printf(" ⏱️ Waiting for deckhouse deployment to become ready (1 pod with 2/2 containers ready)...\n") Eventually(func() error { return cluster.CheckClusterHealth(ctx, testClusterResources.Kubeconfig) }).WithTimeout(10*time.Minute).WithPolling(20*time.Second).Should(Succeed(), - "Deckhouse deployment should have 2/2 pods running within 10 minutes") + "Deckhouse deployment should have 1 pod with 2/2 containers ready within 10 minutes") - GinkgoWriter.Printf(" ✅ Cluster is ready (deckhouse deployment: 2/2 pods running)\n") + GinkgoWriter.Printf(" ✅ Cluster is ready (deckhouse deployment: 1 pod with 2/2 containers ready)\n") }) }) }) // Describe: Cluster Creation From 82b77d7d1f38db7db7f6a0d4baf2482b69be5602 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Mon, 22 Dec 2025 12:56:43 +0300 Subject: [PATCH 33/48] Migration to Deckhouse entirely --- go.mod | 21 +- go.sum | 61 ++- internal/kubernetes/deckhouse/client.go | 35 +- internal/kubernetes/deckhouse/modules.go | 427 +----------------- .../kubernetes/deckhouse/staticinstance.go | 20 - internal/kubernetes/deckhouse/types.go | 95 +--- 6 files changed, 105 insertions(+), 554 deletions(-) delete mode 100644 internal/kubernetes/deckhouse/staticinstance.go diff --git a/go.mod b/go.mod index e5060e5..a67a4fd 100644 --- a/go.mod +++ b/go.mod @@ -5,9 +5,10 @@ go 1.24.6 toolchain go1.24.11 require ( + github.com/deckhouse/deckhouse v1.74.0 github.com/deckhouse/virtualization/api v1.0.0 - github.com/onsi/ginkgo/v2 v2.22.0 - github.com/onsi/gomega v1.36.1 + github.com/onsi/ginkgo/v2 v2.23.3 + github.com/onsi/gomega v1.37.0 github.com/pkg/sftp v1.13.10 golang.org/x/crypto v0.46.0 golang.org/x/term v0.38.0 @@ -19,19 +20,21 @@ require ( ) require ( + github.com/Masterminds/semver/v3 v3.3.1 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/emicklei/go-restful/v3 v3.12.2 // indirect github.com/evanphx/json-patch/v5 v5.9.11 // indirect github.com/fxamacker/cbor/v2 v2.9.0 // indirect - github.com/go-logr/logr v1.4.2 // indirect - github.com/go-openapi/jsonpointer v0.21.0 // indirect - github.com/go-openapi/jsonreference v0.20.2 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-openapi/jsonpointer v0.22.1 // indirect + github.com/go-openapi/jsonreference v0.21.2 // indirect github.com/go-openapi/swag v0.23.0 // indirect + github.com/go-openapi/swag/jsonname v0.25.1 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/google/gnostic-models v0.7.0 // indirect github.com/google/go-cmp v0.7.0 // indirect - github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db // indirect + github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad // indirect github.com/google/uuid v1.6.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect @@ -48,12 +51,12 @@ require ( go.yaml.in/yaml/v2 v2.4.2 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect golang.org/x/net v0.47.0 // indirect - golang.org/x/oauth2 v0.27.0 // indirect + golang.org/x/oauth2 v0.30.0 // indirect golang.org/x/sys v0.39.0 // indirect golang.org/x/text v0.32.0 // indirect - golang.org/x/time v0.9.0 // indirect + golang.org/x/time v0.12.0 // indirect golang.org/x/tools v0.39.0 // indirect - google.golang.org/protobuf v1.36.5 // indirect + google.golang.org/protobuf v1.36.6 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect k8s.io/apiextensions-apiserver v0.34.1 // indirect diff --git a/go.sum b/go.sum index b5e1a4b..1715c7c 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,7 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/Masterminds/semver/v3 v3.3.1 h1:QtNSWtVZ3nBfk8mAOu/B6v7FMJ+NHTIgUPi7rj+4nv4= +github.com/Masterminds/semver/v3 v3.3.1/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ= github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= @@ -18,6 +20,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/deckhouse/deckhouse v1.74.0 h1:a/gEuLKutoV6ReWaBWMDJ+VLlOkkCwS4VMvR/sHQscw= +github.com/deckhouse/deckhouse v1.74.0/go.mod h1:qMuvDbP8AYghXkWmDjoFPc6r1w9uw/cWxl/hmvA0BzA= github.com/deckhouse/virtualization/api v1.0.0 h1:q4TvC74tpjk25k0byXJCYP4HjvRexBSeI0cC8QeCMTQ= github.com/deckhouse/virtualization/api v1.0.0/go.mod h1:meTeGulR+xwnvt0pTGsoI14YhGe0lHUVyAfhZsoQyeQ= github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE= @@ -42,26 +46,26 @@ github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7 github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= -github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= -github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= -github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= -github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= +github.com/go-openapi/jsonpointer v0.22.1 h1:sHYI1He3b9NqJ4wXLoJDKmUmHkWy/L7rtEo92JUxBNk= +github.com/go-openapi/jsonpointer v0.22.1/go.mod h1:pQT9OsLkfz1yWoMgYFy4x3U5GY5nUlsOn1qSBH5MkCM= github.com/go-openapi/jsonreference v0.19.3/go.mod h1:rjx6GuL8TTa9VaixXglHmQmIL98+wF9xc8zWvFonSJ8= github.com/go-openapi/jsonreference v0.19.5/go.mod h1:RdybgQwPxbL4UEjuAruzK1x3nE69AqPYEJeo/TWfEeg= github.com/go-openapi/jsonreference v0.19.6/go.mod h1:diGHMEHg2IqXZGKxqyvWdfWU/aim5Dprw5bqpKkTvns= -github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= -github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= +github.com/go-openapi/jsonreference v0.21.2 h1:Wxjda4M/BBQllegefXrY/9aq1fxBA8sI5M/lFU6tSWU= +github.com/go-openapi/jsonreference v0.21.2/go.mod h1:pp3PEjIsJ9CZDGCNOyXIQxsNuroxm8FAJ/+quA0yKzQ= github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= github.com/go-openapi/swag v0.19.14/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= github.com/go-openapi/swag v0.21.1/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= -github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/go-openapi/swag/jsonname v0.25.1 h1:Sgx+qbwa4ej6AomWC6pEfXrA6uP2RkaNjA9BR8a1RJU= +github.com/go-openapi/swag/jsonname v0.25.1/go.mod h1:71Tekow6UOLBD3wS7XhdT98g5J5GR13NOTQ9/6Q11Zo= github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= @@ -92,8 +96,8 @@ github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.1.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= -github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo= -github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= +github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad h1:a6HEuzUHeKH6hwfN/ZoQgRgVIWFJljSWa/zetS2WTvg= +github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -114,7 +118,6 @@ github.com/kr/fs v0.1.0 h1:Jskdu9ieNAYnjxsi0LbQp1ulIKZV1LAFgK1tWhpZgl8= github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= @@ -148,15 +151,15 @@ github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108 github.com/onsi/ginkgo v1.14.0/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY= github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0= github.com/onsi/ginkgo/v2 v2.0.0/go.mod h1:vw5CSIxN1JObi/U8gcbwft7ZxR2dgaR70JSE3/PpL4c= -github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg= -github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= +github.com/onsi/ginkgo/v2 v2.23.3 h1:edHxnszytJ4lD9D5Jjc4tiDkPBZ3siDeJJkUZJJVkp0= +github.com/onsi/ginkgo/v2 v2.23.3/go.mod h1:zXTP6xIp3U8aVuXN8ENK9IXRaTjFnpVB9mGmaSRvxnM= github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= github.com/onsi/gomega v1.17.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY= github.com/onsi/gomega v1.18.1/go.mod h1:0q+aL8jAiMXy9hbwj2mr5GziHiwhAIQpFmmtT5hitRs= -github.com/onsi/gomega v1.36.1 h1:bJDPBO7ibjxcbHMgSCoo4Yj18UWbKDlLwX1x9sybDcw= -github.com/onsi/gomega v1.36.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= +github.com/onsi/gomega v1.37.0 h1:CdEG8g0S133B4OswTDC/5XPSzE1OeP29QOioj2PID2Y= +github.com/onsi/gomega v1.37.0/go.mod h1:8D9+Txp43QWKhM24yyOBEdpkzN8FvJyAwecBgsU4KU0= github.com/openshift/api v0.0.0-20230503133300-8bbcb7ca7183 h1:t/CahSnpqY46sQR01SoS+Jt0jtjgmhgE6lFmRnO4q70= github.com/openshift/api v0.0.0-20230503133300-8bbcb7ca7183/go.mod h1:4VWG+W22wrB4HfBL88P40DxLEpSOaiBVxUnfalfJo9k= github.com/openshift/custom-resource-status v1.1.2 h1:C3DL44LEbvlbItfd8mT5jWrqPfHnSOQoQf/sypqA6A4= @@ -165,8 +168,9 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/sftp v1.13.10 h1:+5FbKNTe5Z9aspU88DPIKJ9z2KZoaGCu6Sr6kKR/5mU= github.com/pkg/sftp v1.13.10/go.mod h1:bJ1a7uDhrX/4OII+agvy28lzRvQrmIQuaHrcI1HbeGA= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q= github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -176,27 +180,22 @@ github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= -github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= -github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= +github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= +github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/pflag v1.0.7 h1:vN6T9TfwStFPFM5XzjsvmzZkLuaLX+HS+0SeFLRgU6M= github.com/spf13/pflag v1.0.7/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= -github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= @@ -246,8 +245,8 @@ golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY= golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M= -golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= +golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= +golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -291,8 +290,8 @@ golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU= golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY= -golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY= -golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= +golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= @@ -333,8 +332,8 @@ google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGj google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM= -google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= +google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/internal/kubernetes/deckhouse/client.go b/internal/kubernetes/deckhouse/client.go index af20937..c448a9b 100644 --- a/internal/kubernetes/deckhouse/client.go +++ b/internal/kubernetes/deckhouse/client.go @@ -16,5 +16,38 @@ limitations under the License. package deckhouse -// TODO: Implement Deckhouse client interface +import ( + "context" + deckhousev1alpha1 "github.com/deckhouse/deckhouse/deckhouse-controller/pkg/apis/deckhouse.io/v1alpha1" + deckhousev1alpha2 "github.com/deckhouse/deckhouse/deckhouse-controller/pkg/apis/deckhouse.io/v1alpha2" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// Client provides access to deckhouse resources +type Client struct { + client client.Client +} + +// NewClient creates a new deckhouse client from a rest.Config +// It uses controller-runtime client which provides type-safe access to CRDs +func NewClient(ctx context.Context, config *rest.Config) (*Client, error) { + scheme := runtime.NewScheme() + + // Register deckhouse API types with the scheme + if err := deckhousev1alpha1.SchemeBuilder.AddToScheme(scheme); err != nil { + return nil, err + } + if err := deckhousev1alpha2.SchemeBuilder.AddToScheme(scheme); err != nil { + return nil, err + } + + cl, err := client.New(config, client.Options{Scheme: scheme}) + if err != nil { + return nil, err + } + + return &Client{client: cl}, nil +} diff --git a/internal/kubernetes/deckhouse/modules.go b/internal/kubernetes/deckhouse/modules.go index 99104a4..e5b0eb3 100644 --- a/internal/kubernetes/deckhouse/modules.go +++ b/internal/kubernetes/deckhouse/modules.go @@ -19,448 +19,57 @@ package deckhouse import ( "context" "fmt" - "time" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/client-go/dynamic" + deckhousev1alpha1 "github.com/deckhouse/deckhouse/deckhouse-controller/pkg/apis/deckhouse.io/v1alpha1" + deckhousev1alpha2 "github.com/deckhouse/deckhouse/deckhouse-controller/pkg/apis/deckhouse.io/v1alpha2" "k8s.io/client-go/rest" -) - -const ( - // ModuleGroupVersion is the API group and version for Module resources - ModuleGroupVersion = "deckhouse.io/v1alpha1" - // ModuleResource is the resource name for Module - ModuleResource = "modules" - // ModuleConfigGroupVersion is the API group and version for ModuleConfig resources - ModuleConfigGroupVersion = "deckhouse.io/v1alpha1" - // ModuleConfigResource is the resource name for ModuleConfig - ModuleConfigResource = "moduleconfigs" - // ModulePullOverrideGroupVersion is the API group and version for ModulePullOverride resources - ModulePullOverrideGroupVersion = "deckhouse.io/v1alpha2" - // ModulePullOverrideResource is the resource name for ModulePullOverride - ModulePullOverrideResource = "modulepulloverrides" + "sigs.k8s.io/controller-runtime/pkg/client" ) // GetModule retrieves detailed information about a single module by name func GetModule(ctx context.Context, config *rest.Config, moduleName string) (*Module, error) { - client, err := dynamic.NewForConfig(config) + cl, err := NewClient(ctx, config) if err != nil { - return nil, fmt.Errorf("failed to create dynamic client: %w", err) - } - - gvr := schema.GroupVersionResource{ - Group: "deckhouse.io", - Version: "v1alpha1", - Resource: ModuleResource, + return nil, fmt.Errorf("failed to create deckhouse client: %w", err) } - // Module is a cluster-scoped resource, so we use empty namespace - unstructuredObj, err := client.Resource(gvr).Get(ctx, moduleName, metav1.GetOptions{}) - if err != nil { + module := &deckhousev1alpha1.Module{} + key := client.ObjectKey{Name: moduleName} + if err := cl.client.Get(ctx, key, module); err != nil { return nil, fmt.Errorf("failed to get module %s: %w", moduleName, err) } - module, err := unstructuredToModule(unstructuredObj) - if err != nil { - return nil, fmt.Errorf("failed to convert unstructured module %s: %w", moduleName, err) - } - return module, nil } -// unstructuredToModule converts an unstructured.Unstructured object to a Module struct -func unstructuredToModule(obj *unstructured.Unstructured) (*Module, error) { - module := &Module{} - - // Set TypeMeta - module.APIVersion = obj.GetAPIVersion() - module.Kind = obj.GetKind() - - // Set ObjectMeta - module.ObjectMeta = metav1.ObjectMeta{ - Name: obj.GetName(), - Namespace: obj.GetNamespace(), - UID: obj.GetUID(), - ResourceVersion: obj.GetResourceVersion(), - Generation: obj.GetGeneration(), - CreationTimestamp: obj.GetCreationTimestamp(), - Labels: obj.GetLabels(), - Annotations: obj.GetAnnotations(), - } - - // Extract properties - if properties, found, err := unstructured.NestedMap(obj.Object, "properties"); err != nil { - return nil, fmt.Errorf("failed to extract properties: %w", err) - } else if found { - if err := extractModuleProperties(properties, &module.Properties); err != nil { - return nil, fmt.Errorf("failed to extract module properties: %w", err) - } - } - - // Extract status - if status, found, err := unstructured.NestedMap(obj.Object, "status"); err != nil { - return nil, fmt.Errorf("failed to extract status: %w", err) - } else if found { - if err := extractModuleStatus(status, &module.Status); err != nil { - return nil, fmt.Errorf("failed to extract module status: %w", err) - } - } - - return module, nil -} - -// extractModuleProperties extracts ModuleProperties from a map -func extractModuleProperties(data map[string]interface{}, props *ModuleProperties) error { - if critical, found, err := unstructured.NestedBool(data, "critical"); err != nil { - return err - } else if found { - props.Critical = critical - } - - if disableOptions, found, err := unstructured.NestedMap(data, "disableOptions"); err != nil { - return err - } else if found && len(disableOptions) > 0 { - props.DisableOptions = &DisableOptions{} - if confirmation, found, err := unstructured.NestedBool(disableOptions, "confirmation"); err != nil { - return err - } else if found { - props.DisableOptions.Confirmation = confirmation - } - if message, found, err := unstructured.NestedString(disableOptions, "message"); err != nil { - return err - } else if found { - props.DisableOptions.Message = message - } - } - - if namespace, found, err := unstructured.NestedString(data, "namespace"); err != nil { - return err - } else if found { - props.Namespace = namespace - } - - if releaseChannel, found, err := unstructured.NestedString(data, "releaseChannel"); err != nil { - return err - } else if found { - props.ReleaseChannel = releaseChannel - } - - if source, found, err := unstructured.NestedString(data, "source"); err != nil { - return err - } else if found { - props.Source = source - } - - if stage, found, err := unstructured.NestedString(data, "stage"); err != nil { - return err - } else if found { - props.Stage = stage - } - - if subsystems, found, err := unstructured.NestedStringSlice(data, "subsystems"); err != nil { - return err - } else if found { - props.Subsystems = subsystems - } - - if version, found, err := unstructured.NestedString(data, "version"); err != nil { - return err - } else if found { - props.Version = version - } - - if weight, found, err := unstructured.NestedInt64(data, "weight"); err != nil { - return err - } else if found { - props.Weight = int(weight) - } - - return nil -} - -// extractModuleStatus extracts ModuleStatus from a map -func extractModuleStatus(data map[string]interface{}, status *ModuleStatus) error { - if conditions, found, err := unstructured.NestedSlice(data, "conditions"); err != nil { - return err - } else if found { - status.Conditions = make([]ModuleCondition, 0, len(conditions)) - for _, cond := range conditions { - condMap, ok := cond.(map[string]interface{}) - if !ok { - continue - } - condition := ModuleCondition{} - if lastProbeTime, found, err := unstructured.NestedString(condMap, "lastProbeTime"); err != nil { - return err - } else if found { - if t, err := time.Parse(time.RFC3339, lastProbeTime); err == nil { - condition.LastProbeTime = metav1.NewTime(t) - } - } - if lastTransitionTime, found, err := unstructured.NestedString(condMap, "lastTransitionTime"); err != nil { - return err - } else if found { - if t, err := time.Parse(time.RFC3339, lastTransitionTime); err == nil { - condition.LastTransitionTime = metav1.NewTime(t) - } - } - if statusStr, found, err := unstructured.NestedString(condMap, "status"); err != nil { - return err - } else if found { - condition.Status = statusStr - } - if typeStr, found, err := unstructured.NestedString(condMap, "type"); err != nil { - return err - } else if found { - condition.Type = typeStr - } - status.Conditions = append(status.Conditions, condition) - } - } - - if hooksState, found, err := unstructured.NestedString(data, "hooksState"); err != nil { - return err - } else if found { - status.HooksState = hooksState - } - - if phase, found, err := unstructured.NestedString(data, "phase"); err != nil { - return err - } else if found { - status.Phase = phase - } - - return nil -} - // GetModuleConfig retrieves detailed information about a ModuleConfig by name func GetModuleConfig(ctx context.Context, config *rest.Config, moduleName string) (*ModuleConfig, error) { - client, err := dynamic.NewForConfig(config) + cl, err := NewClient(ctx, config) if err != nil { - return nil, fmt.Errorf("failed to create dynamic client: %w", err) + return nil, fmt.Errorf("failed to create deckhouse client: %w", err) } - gvr := schema.GroupVersionResource{ - Group: "deckhouse.io", - Version: "v1alpha1", - Resource: ModuleConfigResource, - } - - // ModuleConfig is a cluster-scoped resource, so we use empty namespace - unstructuredObj, err := client.Resource(gvr).Get(ctx, moduleName, metav1.GetOptions{}) - if err != nil { + moduleConfig := &deckhousev1alpha1.ModuleConfig{} + key := client.ObjectKey{Name: moduleName} + if err := cl.client.Get(ctx, key, moduleConfig); err != nil { return nil, fmt.Errorf("failed to get moduleconfig %s: %w", moduleName, err) } - moduleConfig, err := unstructuredToModuleConfig(unstructuredObj) - if err != nil { - return nil, fmt.Errorf("failed to convert unstructured moduleconfig %s: %w", moduleName, err) - } - return moduleConfig, nil } // GetModulePullOverride retrieves detailed information about a ModulePullOverride by name func GetModulePullOverride(ctx context.Context, config *rest.Config, moduleName string) (*ModulePullOverride, error) { - client, err := dynamic.NewForConfig(config) + cl, err := NewClient(ctx, config) if err != nil { - return nil, fmt.Errorf("failed to create dynamic client: %w", err) + return nil, fmt.Errorf("failed to create deckhouse client: %w", err) } - gvr := schema.GroupVersionResource{ - Group: "deckhouse.io", - Version: "v1alpha2", - Resource: ModulePullOverrideResource, - } - - // ModulePullOverride is a cluster-scoped resource, so we use empty namespace - unstructuredObj, err := client.Resource(gvr).Get(ctx, moduleName, metav1.GetOptions{}) - if err != nil { + modulePullOverride := &deckhousev1alpha2.ModulePullOverride{} + key := client.ObjectKey{Name: moduleName} + if err := cl.client.Get(ctx, key, modulePullOverride); err != nil { return nil, fmt.Errorf("failed to get modulepulloverride %s: %w", moduleName, err) } - modulePullOverride, err := unstructuredToModulePullOverride(unstructuredObj) - if err != nil { - return nil, fmt.Errorf("failed to convert unstructured modulepulloverride %s: %w", moduleName, err) - } - - return modulePullOverride, nil -} - -// unstructuredToModuleConfig converts an unstructured.Unstructured object to a ModuleConfig struct -func unstructuredToModuleConfig(obj *unstructured.Unstructured) (*ModuleConfig, error) { - moduleConfig := &ModuleConfig{} - - // Set TypeMeta - moduleConfig.APIVersion = obj.GetAPIVersion() - moduleConfig.Kind = obj.GetKind() - - // Set ObjectMeta - moduleConfig.ObjectMeta = metav1.ObjectMeta{ - Name: obj.GetName(), - Namespace: obj.GetNamespace(), - UID: obj.GetUID(), - ResourceVersion: obj.GetResourceVersion(), - Generation: obj.GetGeneration(), - CreationTimestamp: obj.GetCreationTimestamp(), - Labels: obj.GetLabels(), - Annotations: obj.GetAnnotations(), - Finalizers: obj.GetFinalizers(), - } - - // Extract spec - if spec, found, err := unstructured.NestedMap(obj.Object, "spec"); err != nil { - return nil, fmt.Errorf("failed to extract spec: %w", err) - } else if found { - if err := extractModuleConfigSpec(spec, &moduleConfig.Spec); err != nil { - return nil, fmt.Errorf("failed to extract moduleconfig spec: %w", err) - } - } - - // Extract status - if status, found, err := unstructured.NestedMap(obj.Object, "status"); err != nil { - return nil, fmt.Errorf("failed to extract status: %w", err) - } else if found { - if err := extractModuleConfigStatus(status, &moduleConfig.Status); err != nil { - return nil, fmt.Errorf("failed to extract moduleconfig status: %w", err) - } - } - - return moduleConfig, nil -} - -// extractModuleConfigSpec extracts ModuleConfigSpec from a map -func extractModuleConfigSpec(data map[string]interface{}, spec *ModuleConfigSpec) error { - if enabled, found, err := unstructured.NestedBool(data, "enabled"); err != nil { - return err - } else if found { - spec.Enabled = enabled - } - - if settings, found, err := unstructured.NestedMap(data, "settings"); err != nil { - return err - } else if found { - spec.Settings = settings - } - - if version, found, err := unstructured.NestedInt64(data, "version"); err != nil { - return err - } else if found { - spec.Version = int(version) - } - - return nil -} - -// extractModuleConfigStatus extracts ModuleConfigStatus from a map -func extractModuleConfigStatus(data map[string]interface{}, status *ModuleConfigStatus) error { - if message, found, err := unstructured.NestedString(data, "message"); err != nil { - return err - } else if found { - status.Message = message - } - - if version, found, err := unstructured.NestedString(data, "version"); err != nil { - return err - } else if found { - status.Version = version - } - - return nil -} - -// unstructuredToModulePullOverride converts an unstructured.Unstructured object to a ModulePullOverride struct -func unstructuredToModulePullOverride(obj *unstructured.Unstructured) (*ModulePullOverride, error) { - modulePullOverride := &ModulePullOverride{} - - // Set TypeMeta - modulePullOverride.APIVersion = obj.GetAPIVersion() - modulePullOverride.Kind = obj.GetKind() - - // Set ObjectMeta - modulePullOverride.ObjectMeta = metav1.ObjectMeta{ - Name: obj.GetName(), - Namespace: obj.GetNamespace(), - UID: obj.GetUID(), - ResourceVersion: obj.GetResourceVersion(), - Generation: obj.GetGeneration(), - CreationTimestamp: obj.GetCreationTimestamp(), - Labels: obj.GetLabels(), - Annotations: obj.GetAnnotations(), - Finalizers: obj.GetFinalizers(), - } - - // Extract spec - if spec, found, err := unstructured.NestedMap(obj.Object, "spec"); err != nil { - return nil, fmt.Errorf("failed to extract spec: %w", err) - } else if found { - if err := extractModulePullOverrideSpec(spec, &modulePullOverride.Spec); err != nil { - return nil, fmt.Errorf("failed to extract modulepulloverride spec: %w", err) - } - } - - // Extract status - if status, found, err := unstructured.NestedMap(obj.Object, "status"); err != nil { - return nil, fmt.Errorf("failed to extract status: %w", err) - } else if found { - if err := extractModulePullOverrideStatus(status, &modulePullOverride.Status); err != nil { - return nil, fmt.Errorf("failed to extract modulepulloverride status: %w", err) - } - } - return modulePullOverride, nil } - -// extractModulePullOverrideSpec extracts ModulePullOverrideSpec from a map -func extractModulePullOverrideSpec(data map[string]interface{}, spec *ModulePullOverrideSpec) error { - if imageTag, found, err := unstructured.NestedString(data, "imageTag"); err != nil { - return err - } else if found { - spec.ImageTag = imageTag - } - - if rollback, found, err := unstructured.NestedBool(data, "rollback"); err != nil { - return err - } else if found { - spec.Rollback = rollback - } - - if scanInterval, found, err := unstructured.NestedString(data, "scanInterval"); err != nil { - return err - } else if found { - spec.ScanInterval = scanInterval - } - - return nil -} - -// extractModulePullOverrideStatus extracts ModulePullOverrideStatus from a map -func extractModulePullOverrideStatus(data map[string]interface{}, status *ModulePullOverrideStatus) error { - if imageDigest, found, err := unstructured.NestedString(data, "imageDigest"); err != nil { - return err - } else if found { - status.ImageDigest = imageDigest - } - - if message, found, err := unstructured.NestedString(data, "message"); err != nil { - return err - } else if found { - status.Message = message - } - - if updatedAt, found, err := unstructured.NestedString(data, "updatedAt"); err != nil { - return err - } else if found { - status.UpdatedAt = updatedAt - } - - if weight, found, err := unstructured.NestedInt64(data, "weight"); err != nil { - return err - } else if found { - status.Weight = int(weight) - } - - return nil -} diff --git a/internal/kubernetes/deckhouse/staticinstance.go b/internal/kubernetes/deckhouse/staticinstance.go deleted file mode 100644 index a4d9df7..0000000 --- a/internal/kubernetes/deckhouse/staticinstance.go +++ /dev/null @@ -1,20 +0,0 @@ -/* -Copyright 2025 Flant JSC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package deckhouse - -// TODO: Implement static instance operations - diff --git a/internal/kubernetes/deckhouse/types.go b/internal/kubernetes/deckhouse/types.go index e08986b..ccf625a 100644 --- a/internal/kubernetes/deckhouse/types.go +++ b/internal/kubernetes/deckhouse/types.go @@ -17,91 +17,18 @@ limitations under the License. package deckhouse import ( - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + deckhousev1alpha1 "github.com/deckhouse/deckhouse/deckhouse-controller/pkg/apis/deckhouse.io/v1alpha1" + deckhousev1alpha2 "github.com/deckhouse/deckhouse/deckhouse-controller/pkg/apis/deckhouse.io/v1alpha2" ) -// Module represents a Deckhouse Module custom resource -type Module struct { - metav1.TypeMeta `json:",inline"` - metav1.ObjectMeta `json:"metadata,omitempty"` - Properties ModuleProperties `json:"properties,omitzero"` - Status ModuleStatus `json:"status,omitzero"` -} +// Re-export deckhouse types for convenience +type ( + // Module represents a Deckhouse Module custom resource + Module = deckhousev1alpha1.Module -// ModuleProperties contains the properties of a Module -type ModuleProperties struct { - Critical bool `json:"critical,omitempty"` - DisableOptions *DisableOptions `json:"disableOptions,omitzero"` - Namespace string `json:"namespace,omitempty"` - ReleaseChannel string `json:"releaseChannel,omitempty"` - Source string `json:"source,omitempty"` - Stage string `json:"stage,omitempty"` - Subsystems []string `json:"subsystems,omitempty"` - Version string `json:"version,omitempty"` - Weight int `json:"weight,omitempty"` -} + // ModuleConfig represents a Deckhouse ModuleConfig custom resource + ModuleConfig = deckhousev1alpha1.ModuleConfig -// DisableOptions contains options for disabling a module -type DisableOptions struct { - Confirmation bool `json:"confirmation,omitempty"` - Message string `json:"message,omitempty"` -} - -// ModuleStatus contains the status of a Module -type ModuleStatus struct { - Conditions []ModuleCondition `json:"conditions,omitzero"` - HooksState string `json:"hooksState,omitempty"` - Phase string `json:"phase,omitempty"` -} - -// ModuleCondition represents a condition of a Module -type ModuleCondition struct { - LastProbeTime metav1.Time `json:"lastProbeTime,omitzero"` - LastTransitionTime metav1.Time `json:"lastTransitionTime,omitzero"` - Status string `json:"status,omitempty"` - Type string `json:"type,omitempty"` -} - -// ModuleConfig represents a Deckhouse ModuleConfig custom resource -type ModuleConfig struct { - metav1.TypeMeta `json:",inline"` - metav1.ObjectMeta `json:"metadata,omitempty"` - Spec ModuleConfigSpec `json:"spec,omitzero"` - Status ModuleConfigStatus `json:"status,omitzero"` -} - -// ModuleConfigSpec contains the specification of a ModuleConfig -type ModuleConfigSpec struct { - Enabled bool `json:"enabled,omitempty"` - Settings map[string]interface{} `json:"settings,omitempty"` - Version int `json:"version,omitempty"` -} - -// ModuleConfigStatus contains the status of a ModuleConfig -type ModuleConfigStatus struct { - Message string `json:"message,omitempty"` - Version string `json:"version,omitempty"` -} - -// ModulePullOverride represents a Deckhouse ModulePullOverride custom resource -type ModulePullOverride struct { - metav1.TypeMeta `json:",inline"` - metav1.ObjectMeta `json:"metadata,omitempty"` - Spec ModulePullOverrideSpec `json:"spec,omitzero"` - Status ModulePullOverrideStatus `json:"status,omitzero"` -} - -// ModulePullOverrideSpec contains the specification of a ModulePullOverride -type ModulePullOverrideSpec struct { - ImageTag string `json:"imageTag,omitempty"` - Rollback bool `json:"rollback,omitempty"` - ScanInterval string `json:"scanInterval,omitempty"` -} - -// ModulePullOverrideStatus contains the status of a ModulePullOverride -type ModulePullOverrideStatus struct { - ImageDigest string `json:"imageDigest,omitempty"` - Message string `json:"message,omitempty"` - UpdatedAt string `json:"updatedAt,omitempty"` - Weight int `json:"weight,omitempty"` -} + // ModulePullOverride represents a Deckhouse ModulePullOverride custom resource + ModulePullOverride = deckhousev1alpha2.ModulePullOverride +) From f209e5472408de7af465671d4af0b83bcee731f6 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Tue, 23 Dec 2025 13:53:35 +0300 Subject: [PATCH 34/48] Nodes are added, modules are not enabled yet --- README.md | 73 +-- internal/config/config.go | 8 +- internal/config/env.go | 44 +- internal/config/types.go | 15 +- internal/infrastructure/ssh/client.go | 47 +- internal/kubernetes/core/node.go | 51 +- internal/kubernetes/core/secret.go | 76 +++ internal/kubernetes/deckhouse/modules.go | 117 ++++ internal/kubernetes/deckhouse/nodegroups.go | 63 ++- pkg/cluster/TODO.md | 92 +++ pkg/cluster/cluster.go | 54 +- pkg/cluster/modules.go | 523 ++++++++++++++++++ pkg/cluster/nodegroup.go | 46 ++ pkg/cluster/secrets.go | 105 ++++ pkg/cluster/setup.go | 416 +++++++++++++- pkg/cluster/vms.go | 111 +++- .../cluster_creation_test.go | 239 +++++--- tests/cluster-creation/cluster_config.yml | 56 -- .../cluster_creation_suite_test.go | 33 -- .../cluster-creation/cluster_creation_test.go | 81 --- 20 files changed, 1906 insertions(+), 344 deletions(-) create mode 100644 internal/kubernetes/core/secret.go create mode 100644 pkg/cluster/TODO.md create mode 100644 pkg/cluster/modules.go create mode 100644 pkg/cluster/nodegroup.go create mode 100644 pkg/cluster/secrets.go delete mode 100644 tests/cluster-creation/cluster_config.yml delete mode 100644 tests/cluster-creation/cluster_creation_suite_test.go delete mode 100644 tests/cluster-creation/cluster_creation_test.go diff --git a/README.md b/README.md index 90a7296..788b657 100644 --- a/README.md +++ b/README.md @@ -19,42 +19,47 @@ Step-by-step test that creates a test cluster incrementally, validating each sta 2. Virtualization module readiness check - Verifies virtualization module is Ready 3. Test namespace creation - Creates test namespace if it doesn't exist 4. Virtual machine creation and provisioning - Creates VMs and waits for them to become Running -5. SSH connection establishment to setup node (through base cluster master) - Connects to setup node via jump host -6. Docker installation on setup node - Installs Docker (required for DKP bootstrap) -7. Bootstrap configuration preparation - Prepares bootstrap config from template with cluster-specific values -8. Bootstrap files upload (private key and config.yml) to setup node - Uploads files needed for DKP bootstrap -9. Cluster bootstrap - Bootstraps Kubernetes cluster from setup node to first master node -10. Cluster readiness verification - Verifies cluster is ready by checking deckhouse deployment +5. VM information gathering - Gathers IP addresses and other information for all VMs +6. SSH connection establishment to setup node (through base cluster master) - Connects to setup node via jump host +7. Docker installation on setup node - Installs Docker (required for DKP bootstrap) +8. Bootstrap configuration preparation - Prepares bootstrap config from template with cluster-specific values +9. Bootstrap files upload (private key and config.yml) to setup node - Uploads files needed for DKP bootstrap +10. Cluster bootstrap - Bootstraps Kubernetes cluster from setup node to first master node +11. NodeGroup creation for workers - Creates static NodeGroup for worker nodes +12. Cluster readiness verification - Verifies cluster is ready by checking deckhouse deployment +13. Node addition to cluster - Adds remaining master nodes and all worker nodes to the cluster +14. Module enablement and configuration - Enables and configures modules from cluster definition +15. Module readiness verification - Waits for all modules to become Ready in the test cluster ## Environment Variables -### Required environment variables +### Ready-to-use setup script -- **`TEST_CLUSTER_CREATE_MODE`** - Cluster creation mode. Must be set to either: - - `alwaysUseExisting` - Use existing cluster - - `alwaysCreateNew` - Create new cluster +Copy and customize the following script with your values: -- **`DKP_LICENSE_KEY`** - DKP license key for cluster deployment (see license token at license.deckhouse.io) - -- **`REGISTRY_DOCKER_CFG`** - dockerRegistryCfg for downloading images from Deckhouse registry (see license.deckhouse.io) - -### Optional (with defaults) - -- **`YAML_CONFIG_FILENAME`** - YAML configuration file name (default: `cluster_config.yml`) - -- **`SSH_USER`** - SSH username for base cluster connection (default: `a.yakubov`) -- **`SSH_HOST`** - SSH hostname/IP for base cluster (default: `94.26.231.181`) -- **`SSH_KEY_PATH`** - Path to SSH private key (default: `~/.ssh/id_rsa`) -- **`SSH_PASSPHRASE`** - Passphrase for SSH private key (no default) - -- **`SSH_VM_USER`** - SSH username for VM access (default: `cloud`) -- **`SSH_VM_PUBLIC_KEY`** - SSH public key to deploy to VMs (default: hardcoded key) - -- **`TEST_CLUSTER_NAMESPACE`** - Namespace for test cluster deployment (default: `e2e-test-cluster`) -- **`TEST_CLUSTER_STORAGE_CLASS`** - Storage class for test cluster (default: `rsc-test-r2-local`) -- **`TEST_CLUSTER_CLEANUP`** - Whether to cleanup test cluster after tests (default: `false`, set to `true` or `True` to enable) +```bash +#!/bin/bash + +# Required environment variables (must be set) +export TEST_CLUSTER_CREATE_MODE='alwaysCreateNew' # or 'alwaysUseExisting' +export DKP_LICENSE_KEY='your-license-key-here' # Get from license.deckhouse.io +export REGISTRY_DOCKER_CFG='your-docker-registry-cfg-here' # Get from license.deckhouse.io +export SSH_USER='your-ssh-user' # SSH username for base cluster connection +export SSH_HOST='your-ssh-host' # SSH hostname/IP for base cluster + +# Optional environment variables with defaults (customize as needed) +export YAML_CONFIG_FILENAME='cluster_config.yml' # Default: cluster_config.yml +export SSH_KEY_PATH='~/.ssh/id_rsa' # Default: ~/.ssh/id_rsa +export SSH_PASSPHRASE='' # Optional: passphrase for SSH private key +export SSH_VM_USER='cloud' # Default: cloud +export SSH_VM_PUBLIC_KEY='ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8WyGvnBNQp+v6CUweF1QYCRtR7Do/IA8IA2uMd2HuBsddFrc5xYon2ZtEvypZC4Vm1CzgcgUm9UkHgxytKEB4zOOWkmqFP62OSLNyuWMaFEW1fb0EDenup6B5SrjnA8ckm4Hf2NSLvwW9yS98TfN3nqPOPJKfQsN+OTiCerTtNyXjca//ppuGKsQd99jG7SqE9aDQ3sYCXatM53SXqhxS2nTew82bmzVmKXDxcIzVrS9f+2WmXIdY2cKo2I352yKWOIp1Nk0uji8ozLPHFQGvbAG8DGG1KNVcBl2qYUcttmCpN+iXEcGqyn/atUVJJMnZXGtp0fiL1rMLqAd/bb6TFNzZFSsS+zqGesxqLePe32vLCQ3xursP3BRZkrScM+JzIqevfP63INHJEZfYlUf4Ic+gfliS2yA1LwhU7hD4LSVXMQynlF9WeGjuv6ZYxmO8hC6IWCqWnIUqKUiGtvBSPXwsZo7wgljBr4ykJgBzS9MjZ0fzz1JKe80tH6clpjIOn6ReBPwQBq2zmDDrpa5GVqqqjXhRQuA0AfpHdhs5UKxs1PBr7/PTLA7PI39xkOAE/Zj1TYQ2dmqvpskshi7AtBStjinQBAlLXysLSHBtO+3+PLAYcMZMVfb0bVqfGGludO2prvXrrWWTku0eOsA5IRahrRdGhv5zhKgFV7cwUQ== ayakubov@MacBook-Pro-Alexey.local' # Default: hardcoded key +export TEST_CLUSTER_NAMESPACE='e2e-test-cluster' # Default: e2e-test-cluster +export TEST_CLUSTER_STORAGE_CLASS='rsc-test-r2-local' # Default: rsc-test-r2-local +export TEST_CLUSTER_CLEANUP='false' # Default: false (set to 'true' or 'True' to enable cleanup) +export KUBE_CONFIG_PATH='' # Optional: fallback path to kubeconfig if SSH retrieval fails +``` -- **`KUBE_CONFIG_PATH`** - Fallback path to kubeconfig file if SSH retrieval fails (no default) +**Note:** The `SSH_VM_PUBLIC_KEY` default value is a hardcoded public key. You can replace it with your own SSH public key if needed. ## Configuration Parameters @@ -92,9 +97,15 @@ go test -timeout=60m -v ./tests/cluster-creation-by-steps -count=1 -ginkgo.focus ### Example with environment variables ```bash +# Source the setup script (or copy the exports from above) +source setup_env.sh # if you saved the script above + +# Or set variables inline export TEST_CLUSTER_CREATE_MODE='alwaysCreateNew' export DKP_LICENSE_KEY='your-license-key' -export REGISTRY_DOCKER_CFG='base64-encoded-docker-config-json' +export REGISTRY_DOCKER_CFG='your-docker-registry-cfg' +export SSH_USER='your-ssh-user' +export SSH_HOST='your-ssh-host' export SSH_PASSPHRASE='your-passphrase' export TEST_CLUSTER_CLEANUP='true' diff --git a/internal/config/config.go b/internal/config/config.go index 0931f91..f98c6dd 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -34,4 +34,10 @@ var DefaultSetupVM = ClusterNode{ } // VMsRunningTimeout is the timeout for waiting for all VMs to become Running state -const VMsRunningTimeout = 20 * time.Minute +const ( + VMsRunningTimeout = 20 * time.Minute + NodesReadyTimeout = 15 * time.Minute + DKPDeployTimeout = 30 * time.Minute // Timeout for DKP deployment + ModuleDeployTimeout = 20 * time.Minute // Timeout for module deployment + HostReadyTimeout = 10 * time.Minute // Timeout for hosts to be ready +) diff --git a/internal/config/env.go b/internal/config/env.go index 7824deb..b2a91b7 100644 --- a/internal/config/env.go +++ b/internal/config/env.go @@ -25,22 +25,26 @@ var ( // SSH credentials to connect to BASE cluster SSHPassphrase = os.Getenv("SSH_PASSPHRASE") - SSHUser = os.Getenv("SSH_USER") - SSHUserDefaultValue = "a.yakubov" + SSHUser = os.Getenv("SSH_USER") + //SSHUserDefaultValue = "a.yakubov" - SSHKeyPath = os.Getenv("SSH_KEY_PATH") - SSHKeyPathDefaultValue = "~/.ssh/id_rsa" + // Private key. Can be either path for a file or a base64 encoded string. + SSHPrivateKey = os.Getenv("SSH_PRIVATE_KEY") + SSHPrivateKeyDefaultValue = "~/.ssh/id_rsa" - SSHHost = os.Getenv("SSH_HOST") - SSHHostDefaultValue = "94.26.231.181" + // Public key. Can be either path to a file or a plain-text string. + SSHPublicKey = os.Getenv("SSH_PUBLIC_KEY") + //VMSSHPublicKeyDefaultValue = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8WyGvnBNQp+v6CUweF1QYCRtR7Do/IA8IA2uMd2HuBsddFrc5xYon2ZtEvypZC4Vm1CzgcgUm9UkHgxytKEB4zOOWkmqFP62OSLNyuWMaFEW1fb0EDenup6B5SrjnA8ckm4Hf2NSLvwW9yS98TfN3nqPOPJKfQsN+OTiCerTtNyXjca//ppuGKsQd99jG7SqE9aDQ3sYCXatM53SXqhxS2nTew82bmzVmKXDxcIzVrS9f+2WmXIdY2cKo2I352yKWOIp1Nk0uji8ozLPHFQGvbAG8DGG1KNVcBl2qYUcttmCpN+iXEcGqyn/atUVJJMnZXGtp0fiL1rMLqAd/bb6TFNzZFSsS+zqGesxqLePe32vLCQ3xursP3BRZkrScM+JzIqevfP63INHJEZfYlUf4Ic+gfliS2yA1LwhU7hD4LSVXMQynlF9WeGjuv6ZYxmO8hC6IWCqWnIUqKUiGtvBSPXwsZo7wgljBr4ykJgBzS9MjZ0fzz1JKe80tH6clpjIOn6ReBPwQBq2zmDDrpa5GVqqqjXhRQuA0AfpHdhs5UKxs1PBr7/PTLA7PI39xkOAE/Zj1TYQ2dmqvpskshi7AtBStjinQBAlLXysLSHBtO+3+PLAYcMZMVfb0bVqfGGludO2prvXrrWWTku0eOsA5IRahrRdGhv5zhKgFV7cwUQ== ayakubov@MacBook-Pro-Alexey.local" + SSHPublicKeyDefaultValue = "~/.ssh/id_rsa.pub" + + // Base cluster SSH host + SSHHost = os.Getenv("SSH_HOST") + //SSHHostDefaultValue = "94.26.231.181" // SSH credentials to deploy to VM VMSSHUser = os.Getenv("SSH_VM_USER") VMSSHUserDefaultValue = "cloud" - VMSSHPublicKey = os.Getenv("SSH_VM_PUBLIC_KEY") - VMSSHPublicKeyDefaultValue = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8WyGvnBNQp+v6CUweF1QYCRtR7Do/IA8IA2uMd2HuBsddFrc5xYon2ZtEvypZC4Vm1CzgcgUm9UkHgxytKEB4zOOWkmqFP62OSLNyuWMaFEW1fb0EDenup6B5SrjnA8ckm4Hf2NSLvwW9yS98TfN3nqPOPJKfQsN+OTiCerTtNyXjca//ppuGKsQd99jG7SqE9aDQ3sYCXatM53SXqhxS2nTew82bmzVmKXDxcIzVrS9f+2WmXIdY2cKo2I352yKWOIp1Nk0uji8ozLPHFQGvbAG8DGG1KNVcBl2qYUcttmCpN+iXEcGqyn/atUVJJMnZXGtp0fiL1rMLqAd/bb6TFNzZFSsS+zqGesxqLePe32vLCQ3xursP3BRZkrScM+JzIqevfP63INHJEZfYlUf4Ic+gfliS2yA1LwhU7hD4LSVXMQynlF9WeGjuv6ZYxmO8hC6IWCqWnIUqKUiGtvBSPXwsZo7wgljBr4ykJgBzS9MjZ0fzz1JKe80tH6clpjIOn6ReBPwQBq2zmDDrpa5GVqqqjXhRQuA0AfpHdhs5UKxs1PBr7/PTLA7PI39xkOAE/Zj1TYQ2dmqvpskshi7AtBStjinQBAlLXysLSHBtO+3+PLAYcMZMVfb0bVqfGGludO2prvXrrWWTku0eOsA5IRahrRdGhv5zhKgFV7cwUQ== ayakubov@MacBook-Pro-Alexey.local" - // KubeConfigPath is the path to a kubeconfig file. If SSH retrieval fails (e.g., sudo requires password), // this path will be used as a fallback. If not set and SSH fails, the user will be notified to download // the kubeconfig manually and set this environment variable, test will fail. @@ -79,20 +83,14 @@ func ValidateEnvironment() error { TestClusterCleanup = TestClusterCleanupDefaultValue } - if SSHKeyPath == "" { - SSHKeyPath = SSHKeyPathDefaultValue - } - if SSHUser == "" { - SSHUser = SSHUserDefaultValue - } - if SSHHost == "" { - SSHHost = SSHHostDefaultValue + if SSHPrivateKey == "" { + SSHPrivateKey = SSHPrivateKeyDefaultValue } if VMSSHUser == "" { VMSSHUser = VMSSHUserDefaultValue } - if VMSSHPublicKey == "" { - VMSSHPublicKey = VMSSHPublicKeyDefaultValue + if SSHPublicKey == "" { + SSHPublicKey = SSHPublicKeyDefaultValue } if TestClusterNamespace == "" { TestClusterNamespace = TestClusterNamespaceDefaultValue @@ -102,6 +100,14 @@ func ValidateEnvironment() error { } // There are no default values for these variables and they must be set! Otherwise, the test will fail. + if SSHUser == "" { + return fmt.Errorf("SSH_USER environment variable is required but not set.") + } + + if SSHHost == "" { + return fmt.Errorf("SSH_HOST environment variable is required but not set.") + } + if DKPLicenseKey == "" { return fmt.Errorf("DKP_LICENSE_KEY environment variable is required but not set. ") } diff --git a/internal/config/types.go b/internal/config/types.go index 830b5cb..73aca8a 100644 --- a/internal/config/types.go +++ b/internal/config/types.go @@ -18,7 +18,6 @@ package config import ( "fmt" - "time" "gopkg.in/yaml.v3" ) @@ -50,15 +49,15 @@ type OSType struct { // ClusterNode defines a single node in the cluster type ClusterNode struct { Hostname string `yaml:"hostname"` - IPAddress string `yaml:"ipAddress,omitempty"` // Required for bare-metal, optional for VM + IPAddress string `yaml:"ipAddress,omitempty"` // Required for bare-metal, filled in for VM when gathering VM info OSType OSType `yaml:"osType"` // Required for VM, optional for bare-metal (custom unmarshaler handles string -> OSType conversion) HostType HostType `yaml:"hostType"` Role ClusterRole `yaml:"role"` // VM-specific fields (only used when HostType == HostTypeVM) - CPU int `yaml:"cpu"` // Required for VM + CPU int `yaml:"cpu"` // Required for VM CoreFraction *int `yaml:"coreFraction,omitempty"` // Optional for VM, CPU core fraction as percentage (e.g., 50 for 50%). Defaults to 100% if not specified. - RAM int `yaml:"ram"` // Required for VM, in GB - DiskSize int `yaml:"diskSize"` // Required for VM, in GB + RAM int `yaml:"ram"` // Required for VM, in GB + DiskSize int `yaml:"diskSize"` // Required for VM, in GB // Bare-metal specific fields Prepared bool `yaml:"prepared,omitempty"` // Whether the node is already prepared for DKP installation } @@ -91,12 +90,6 @@ type ModuleConfig struct { ModulePullOverride string `yaml:"modulePullOverride,omitempty"` // Override the module pull branch or tag (e.g. "main", "pr123", "mr41"). Main is defailt value. } -const ( - HostReadyTimeout = 10 * time.Minute // Timeout for hosts to be ready - DKPDeployTimeout = 30 * time.Minute // Timeout for DKP deployment - ModuleDeployTimeout = 10 * time.Minute // Timeout for module deployment -) - // UnmarshalYAML implements custom YAML unmarshaling for ClusterNode // to handle OSType conversion from string key to OSType struct func (n *ClusterNode) UnmarshalYAML(value *yaml.Node) error { diff --git a/internal/infrastructure/ssh/client.go b/internal/infrastructure/ssh/client.go index 5a74faf..e900b93 100644 --- a/internal/infrastructure/ssh/client.go +++ b/internal/infrastructure/ssh/client.go @@ -18,6 +18,7 @@ package ssh import ( "context" + "encoding/base64" "errors" "fmt" "io" @@ -121,11 +122,51 @@ func expandPath(path string) (string, error) { return filepath.Join(usr.HomeDir, strings.TrimPrefix(path, "~/")), nil } +// getSSHPrivateKeyPath handles both file path and base64-encoded private key +// If keyPathOrBase64 is a base64 string, it decodes and writes to a temp file +// If it's a path, it expands ~ and returns the path +func getSSHPrivateKeyPath(keyPathOrBase64 string) (string, error) { + // Check if it looks like a file path (contains path separators or starts with ~) + looksLikePath := strings.Contains(keyPathOrBase64, "/") || strings.HasPrefix(keyPathOrBase64, "~") || strings.Contains(keyPathOrBase64, "\\") + + if !looksLikePath { + // Doesn't look like a path, try base64 decoding + decoded, err := base64.StdEncoding.DecodeString(keyPathOrBase64) + if err == nil && len(decoded) > 0 { + // Successfully decoded, write to temp file + tmpFile, err := os.CreateTemp("", "ssh_private_key_*") + if err != nil { + return "", fmt.Errorf("failed to create temp file for private key: %w", err) + } + defer tmpFile.Close() + + if _, err := tmpFile.Write(decoded); err != nil { + os.Remove(tmpFile.Name()) + return "", fmt.Errorf("failed to write decoded private key to temp file: %w", err) + } + + // Set permissions to 0600 + if err := os.Chmod(tmpFile.Name(), 0600); err != nil { + os.Remove(tmpFile.Name()) + return "", fmt.Errorf("failed to set permissions on temp private key file: %w", err) + } + + return tmpFile.Name(), nil + } + // If decoding failed, fall through to treat as path (might be a relative path without /) + } + + // Treat as file path + return expandPath(keyPathOrBase64) +} + // createSSHConfig creates SSH client config with support for passphrase-protected keys -func createSSHConfig(user, keyPath string) (*ssh.ClientConfig, error) { - expandedKeyPath, err := expandPath(keyPath) +func createSSHConfig(user, keyPathOrBase64 string) (*ssh.ClientConfig, error) { + // keyPathOrBase64 can be either a file path or a base64-encoded private key + // Use GetSSHPrivateKeyPath to handle both cases + expandedKeyPath, err := getSSHPrivateKeyPath(keyPathOrBase64) if err != nil { - return nil, fmt.Errorf("failed to expand key path: %w", err) + return nil, fmt.Errorf("failed to get private key path: %w", err) } key, err := os.ReadFile(expandedKeyPath) diff --git a/internal/kubernetes/core/node.go b/internal/kubernetes/core/node.go index ea7097c..d2271d4 100644 --- a/internal/kubernetes/core/node.go +++ b/internal/kubernetes/core/node.go @@ -16,5 +16,54 @@ limitations under the License. package core -// TODO: Implement node operations +import ( + "context" + "fmt" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" +) + +// NodeClient provides operations on Node resources +type NodeClient struct { + client kubernetes.Interface +} + +// NewNodeClient creates a new node client from a rest.Config +func NewNodeClient(config *rest.Config) (*NodeClient, error) { + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to create kubernetes clientset: %w", err) + } + return &NodeClient{client: clientset}, nil +} + +// Get retrieves a Node by name +func (c *NodeClient) Get(ctx context.Context, name string) (*corev1.Node, error) { + node, err := c.client.CoreV1().Nodes().Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to get node %s: %w", name, err) + } + return node, nil +} + +// List lists all Nodes +func (c *NodeClient) List(ctx context.Context) (*corev1.NodeList, error) { + nodes, err := c.client.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to list nodes: %w", err) + } + return nodes, nil +} + +// IsReady checks if a node is in Ready condition +func (c *NodeClient) IsReady(ctx context.Context, node *corev1.Node) bool { + for _, condition := range node.Status.Conditions { + if condition.Type == corev1.NodeReady { + return condition.Status == corev1.ConditionTrue + } + } + return false +} diff --git a/internal/kubernetes/core/secret.go b/internal/kubernetes/core/secret.go new file mode 100644 index 0000000..47bd730 --- /dev/null +++ b/internal/kubernetes/core/secret.go @@ -0,0 +1,76 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package core + +import ( + "context" + "fmt" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" +) + +// SecretClient provides operations on Secret resources +type SecretClient struct { + client kubernetes.Interface +} + +// NewSecretClient creates a new secret client from a rest.Config +func NewSecretClient(config *rest.Config) (*SecretClient, error) { + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to create kubernetes clientset: %w", err) + } + return &SecretClient{client: clientset}, nil +} + +// Get retrieves a Secret by namespace and name +func (c *SecretClient) Get(ctx context.Context, namespace, name string) (*corev1.Secret, error) { + secret, err := c.client.CoreV1().Secrets(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to get secret %s/%s: %w", namespace, name, err) + } + return secret, nil +} + +// List lists all Secrets in a namespace +func (c *SecretClient) List(ctx context.Context, namespace string) (*corev1.SecretList, error) { + secrets, err := c.client.CoreV1().Secrets(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to list secrets in namespace %s: %w", namespace, err) + } + return secrets, nil +} + +// GetDataValue retrieves a specific data value from a secret +// Note: Kubernetes secret.Data is already base64 decoded, so we return it directly +func (c *SecretClient) GetDataValue(ctx context.Context, namespace, name, key string) (string, error) { + secret, err := c.Get(ctx, namespace, name) + if err != nil { + return "", err + } + + value, exists := secret.Data[key] + if !exists { + return "", fmt.Errorf("key %s not found in secret %s/%s", key, namespace, name) + } + + // Kubernetes secret.Data is already decoded from base64 + return string(value), nil +} diff --git a/internal/kubernetes/deckhouse/modules.go b/internal/kubernetes/deckhouse/modules.go index e5b0eb3..8828d7e 100644 --- a/internal/kubernetes/deckhouse/modules.go +++ b/internal/kubernetes/deckhouse/modules.go @@ -19,9 +19,12 @@ package deckhouse import ( "context" "fmt" + "time" deckhousev1alpha1 "github.com/deckhouse/deckhouse/deckhouse-controller/pkg/apis/deckhouse.io/v1alpha1" deckhousev1alpha2 "github.com/deckhouse/deckhouse/deckhouse-controller/pkg/apis/deckhouse.io/v1alpha2" + "github.com/deckhouse/deckhouse/go_lib/libapi" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/rest" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -73,3 +76,117 @@ func GetModulePullOverride(ctx context.Context, config *rest.Config, moduleName return modulePullOverride, nil } + +// CreateModuleConfig creates a new ModuleConfig resource +func CreateModuleConfig(ctx context.Context, config *rest.Config, moduleName string, version int, enabled bool, settings map[string]interface{}) error { + cl, err := NewClient(ctx, config) + if err != nil { + return fmt.Errorf("failed to create deckhouse client: %w", err) + } + + moduleConfig := &deckhousev1alpha1.ModuleConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: moduleName, + }, + Spec: deckhousev1alpha1.ModuleConfigSpec{ + Version: version, + Enabled: &enabled, + Settings: deckhousev1alpha1.SettingsValues(settings), + }, + } + + if err := cl.client.Create(ctx, moduleConfig); err != nil { + return fmt.Errorf("failed to create moduleconfig %s: %w", moduleName, err) + } + + return nil +} + +// UpdateModuleConfig updates an existing ModuleConfig resource +func UpdateModuleConfig(ctx context.Context, config *rest.Config, moduleName string, version int, enabled bool, settings map[string]interface{}) error { + cl, err := NewClient(ctx, config) + if err != nil { + return fmt.Errorf("failed to create deckhouse client: %w", err) + } + + existing := &deckhousev1alpha1.ModuleConfig{} + key := client.ObjectKey{Name: moduleName} + if err := cl.client.Get(ctx, key, existing); err != nil { + return fmt.Errorf("failed to get moduleconfig %s: %w", moduleName, err) + } + + existing.Spec = deckhousev1alpha1.ModuleConfigSpec{ + Version: version, + Enabled: &enabled, + Settings: deckhousev1alpha1.SettingsValues(settings), + } + + if err := cl.client.Update(ctx, existing); err != nil { + return fmt.Errorf("failed to update moduleconfig %s: %w", moduleName, err) + } + + return nil +} + +// CreateModulePullOverride creates a new ModulePullOverride resource +func CreateModulePullOverride(ctx context.Context, config *rest.Config, moduleName string, imageTag string) error { + cl, err := NewClient(ctx, config) + if err != nil { + return fmt.Errorf("failed to create deckhouse client: %w", err) + } + + // Parse imageTag as Duration for ScanInterval (default: 1m) + scanInterval, err := time.ParseDuration("1m") + if err != nil { + return fmt.Errorf("failed to parse default scan interval: %w", err) + } + + modulePullOverride := &deckhousev1alpha2.ModulePullOverride{ + ObjectMeta: metav1.ObjectMeta{ + Name: moduleName, + }, + Spec: deckhousev1alpha2.ModulePullOverrideSpec{ + ImageTag: imageTag, + ScanInterval: libapi.Duration{Duration: scanInterval}, + Rollback: false, + }, + } + + if err := cl.client.Create(ctx, modulePullOverride); err != nil { + return fmt.Errorf("failed to create modulepulloverride %s: %w", moduleName, err) + } + + return nil +} + +// UpdateModulePullOverride updates an existing ModulePullOverride resource +func UpdateModulePullOverride(ctx context.Context, config *rest.Config, moduleName string, imageTag string) error { + cl, err := NewClient(ctx, config) + if err != nil { + return fmt.Errorf("failed to create deckhouse client: %w", err) + } + + existing := &deckhousev1alpha2.ModulePullOverride{} + key := client.ObjectKey{Name: moduleName} + if err := cl.client.Get(ctx, key, existing); err != nil { + return fmt.Errorf("failed to get modulepulloverride %s: %w", moduleName, err) + } + + // Parse imageTag as Duration for ScanInterval (default: 1m) + scanInterval, err := time.ParseDuration("1m") + if err != nil { + return fmt.Errorf("failed to parse default scan interval: %w", err) + } + + existing.Spec = deckhousev1alpha2.ModulePullOverrideSpec{ + ImageTag: imageTag, + ScanInterval: libapi.Duration{Duration: scanInterval}, + Rollback: false, + } + + if err := cl.client.Update(ctx, existing); err != nil { + return fmt.Errorf("failed to update modulepulloverride %s: %w", moduleName, err) + } + + return nil +} diff --git a/internal/kubernetes/deckhouse/nodegroups.go b/internal/kubernetes/deckhouse/nodegroups.go index 275a1f4..ae62423 100644 --- a/internal/kubernetes/deckhouse/nodegroups.go +++ b/internal/kubernetes/deckhouse/nodegroups.go @@ -16,5 +16,66 @@ limitations under the License. package deckhouse -// TODO: Implement nodegroup operations +import ( + "context" + "fmt" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const ( + // NodeGroupGroupVersion is the API group and version for NodeGroup resources + NodeGroupGroupVersion = "deckhouse.io/v1" + // NodeGroupResource is the resource name for NodeGroup + NodeGroupResource = "nodegroups" +) + +var ( + // NodeGroupGVK is the GroupVersionKind for NodeGroup + NodeGroupGVK = schema.GroupVersionKind{ + Group: "deckhouse.io", + Version: "v1", + Kind: "NodeGroup", + } +) + +// GetNodeGroup retrieves a NodeGroup by name +func GetNodeGroup(ctx context.Context, config *rest.Config, name string) (*unstructured.Unstructured, error) { + cl, err := NewClient(ctx, config) + if err != nil { + return nil, fmt.Errorf("failed to create deckhouse client: %w", err) + } + + nodeGroup := &unstructured.Unstructured{} + nodeGroup.SetGroupVersionKind(NodeGroupGVK) + key := client.ObjectKey{Name: name} + if err := cl.client.Get(ctx, key, nodeGroup); err != nil { + return nil, fmt.Errorf("failed to get nodegroup %s: %w", name, err) + } + + return nodeGroup, nil +} + +// CreateNodeGroup creates a NodeGroup resource +func CreateNodeGroup(ctx context.Context, config *rest.Config, name string, nodeType string) error { + cl, err := NewClient(ctx, config) + if err != nil { + return fmt.Errorf("failed to create deckhouse client: %w", err) + } + + nodeGroup := &unstructured.Unstructured{} + nodeGroup.SetGroupVersionKind(NodeGroupGVK) + nodeGroup.SetName(name) + nodeGroup.Object["spec"] = map[string]interface{}{ + "nodeType": nodeType, + } + + if err := cl.client.Create(ctx, nodeGroup); err != nil { + return fmt.Errorf("failed to create nodegroup %s: %w", name, err) + } + + return nil +} diff --git a/pkg/cluster/TODO.md b/pkg/cluster/TODO.md new file mode 100644 index 0000000..53424b6 --- /dev/null +++ b/pkg/cluster/TODO.md @@ -0,0 +1,92 @@ +# TODOs for the package + +## Error with module enablement + +### Error + +```bash +• [FAILED] [24.831 seconds] +Cluster Creation Step-by-Step Test [It] should enable and configure modules from cluster definition in test cluster +/Users/ayakubov/development/e2e/storage-e2e/tests/cluster-creation-by-steps/cluster_creation_test.go:553 + + [FAILED] Failed to enable and configure modules + Unexpected error: + <*fmt.wrapError | 0x14000112700>: + failed to create moduleconfig sds-replicated-volume: failed to create moduleconfig sds-replicated-volume: Internal error occurred: failed calling webhook "module-configs.deckhouse-webhook.deckhouse.io": failed to call webhook: Post "https://deckhouse.d8-system.svc:4223/validate/v1alpha1/module-configs?timeout=10s": dial tcp 10.225.43.103:4223: connect: connection refused + { + msg: "failed to create moduleconfig sds-replicated-volume: failed to create moduleconfig sds-replicated-volume: Internal error occurred: failed calling webhook \"module-configs.deckhouse-webhook.deckhouse.io\": failed to call webhook: Post \"https://deckhouse.d8-system.svc:4223/validate/v1alpha1/module-configs?timeout=10s\": dial tcp 10.225.43.103:4223: connect: connection refused", + err: <*fmt.wrapError | 0x140001126e0>{ + msg: "failed to create moduleconfig sds-replicated-volume: Internal error occurred: failed calling webhook \"module-configs.deckhouse-webhook.deckhouse.io\": failed to call webhook: Post \"https://deckhouse.d8-system.svc:4223/validate/v1alpha1/module-configs?timeout=10s\": dial tcp 10.225.43.103:4223: connect: connection refused", + err: <*errors.StatusError | 0x14000440aa0>{ + ErrStatus: { + TypeMeta: {Kind: "", APIVersion: ""}, + ListMeta: { + SelfLink: "", + ResourceVersion: "", + Continue: "", + RemainingItemCount: nil, + }, + Status: "Failure", + Message: "Internal error occurred: failed calling webhook \"module-configs.deckhouse-webhook.deckhouse.io\": failed to call webhook: Post \"https://deckhouse.d8-system.svc:4223/validate/v1alpha1/module-configs?timeout=10s\": dial tcp 10.225.43.103:4223: connect: connection refused", + Reason: "InternalError", + Details: { + Name: "", + Group: "", + Kind: "", + UID: "", + Causes: [ + { + Type: "", + Message: "failed calling webhook \"module-configs.deckhouse-webhook.deckhouse.io\": failed to call webhook: Post \"https://deckhouse.d8-system.svc:4223/validate/v1alpha1/module-configs?timeout=10s\": dial tcp 10.225.43.103:4223: connect: connection refused", + Field: "", + }, + ], + RetryAfterSeconds: 0, + }, + Code: 500, + }, + }, + }, + } + occurred + In [It] at: /Users/ayakubov/development/e2e/storage-e2e/tests/cluster-creation-by-steps/cluster_creation_test.go:563 @ 12/23/25 12:12:45.876 +------------------------------ +S [SKIPPED] [0.000 seconds] +Cluster Creation Step-by-Step Test [It] should wait for all modules to be ready in test cluster +/Users/ayakubov/development/e2e/storage-e2e/tests/cluster-creation-by-steps/cluster_creation_test.go:569 + + [SKIPPED] Spec skipped because an earlier spec in an ordered container failed + In [It] at: /Users/ayakubov/development/e2e/storage-e2e/tests/cluster-creation-by-steps/cluster_creation_test.go:569 @ 12/23/25 12:13:06.569 +------------------------------ + +Summarizing 1 Failure: + [FAIL] Cluster Creation Step-by-Step Test [It] should enable and configure modules from cluster definition in test cluster + /Users/ayakubov/development/e2e/storage-e2e/tests/cluster-creation-by-steps/cluster_creation_test.go:563 +``` + +### Code + +```go +// It retries on webhook connection errors to handle cases where the webhook service isn't ready yet +func configureModuleConfig(ctx context.Context, kubeconfig *rest.Config, moduleConfig *config.ModuleConfig) error { + settings := make(map[string]interface{}) + if moduleConfig.Settings != nil { + settings = moduleConfig.Settings + } + // Check if ModuleConfig exists + _, err := deckhouse.GetModuleConfig(ctx, kubeconfig, moduleConfig.Name) + if err != nil { + // Resource doesn't exist, create it + if err := deckhouse.CreateModuleConfig(ctx, kubeconfig, moduleConfig.Name, moduleConfig.Version, moduleConfig.Enabled, settings); err != nil { + return fmt.Errorf("failed to create moduleconfig %s: %w", moduleConfig.Name, err) + } + } else { + // Resource exists, update it + if err := deckhouse.UpdateModuleConfig(ctx, kubeconfig, moduleConfig.Name, moduleConfig.Version, moduleConfig.Enabled, settings); err != nil { + return fmt.Errorf("failed to update moduleconfig %s: %w", moduleConfig.Name, err) + } + +``` + +Need to fix the issue without ssh! (Fix temporarily with kubectl apply -f via ssh. It's not a good approach!) + diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index bb528c1..c176acc 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -62,7 +62,10 @@ func CreateTestCluster( // Get SSH credentials from environment variables sshHost := config.SSHHost sshUser := config.SSHUser - sshKeyPath := config.SSHKeyPath + sshKeyPath, err := GetSSHPrivateKeyPath() + if err != nil { + return nil, fmt.Errorf("failed to get SSH private key path: %w", err) + } // Stage 2: Establish SSH connection to base cluster sshClient, err := ssh.NewClient(sshUser, sshHost, sshKeyPath) @@ -129,7 +132,8 @@ func CleanupTestCluster(resources *TestClusterResources) error { } // CheckClusterHealth checks if the deckhouse deployment has 1 pod running with 2/2 containers ready -// in the d8-system namespace. This function is widely used to check cluster health after certain steps. +// in the d8-system namespace, and verifies that bootstrap secrets are available. +// This function is widely used to check cluster health after certain steps. func CheckClusterHealth(ctx context.Context, kubeconfig *rest.Config) error { namespace := "d8-system" deploymentName := "deckhouse" @@ -157,6 +161,12 @@ func CheckClusterHealth(ctx context.Context, kubeconfig *rest.Config) error { return fmt.Errorf("failed to create pod client: %w", err) } + // Check that bootstrap secrets are available + secretNamespace := "d8-cloud-instance-manager" + if err := checkBootstrapSecrets(ctx, kubeconfig, secretNamespace); err != nil { + return fmt.Errorf("bootstrap secrets not ready: %w", err) + } + // Get pods for the deployment using the deployment's selector labelSelector := metav1.FormatLabelSelector(deployment.Spec.Selector) pods, err := podClient.ListByLabelSelector(ctx, namespace, labelSelector) @@ -188,6 +198,46 @@ func CheckClusterHealth(ctx context.Context, kubeconfig *rest.Config) error { return nil } +// checkBootstrapSecrets verifies that both bootstrap secrets are available +func checkBootstrapSecrets(ctx context.Context, kubeconfig *rest.Config, namespace string) error { + secretClient, err := core.NewSecretClient(kubeconfig) + if err != nil { + return fmt.Errorf("failed to create secret client: %w", err) + } + + // Check for worker bootstrap secret + _, err = secretClient.Get(ctx, namespace, "manual-bootstrap-for-worker") + if err != nil { + // List available secrets for debugging + secretList, listErr := secretClient.List(ctx, namespace) + if listErr == nil { + availableNames := make([]string, 0, len(secretList.Items)) + for _, s := range secretList.Items { + availableNames = append(availableNames, s.Name) + } + return fmt.Errorf("worker bootstrap secret not found: %w. Available secrets in namespace %s: %v", err, namespace, availableNames) + } + return fmt.Errorf("worker bootstrap secret not found: %w", err) + } + + // Check for master bootstrap secret + _, err = secretClient.Get(ctx, namespace, "manual-bootstrap-for-master") + if err != nil { + // List available secrets for debugging + secretList, listErr := secretClient.List(ctx, namespace) + if listErr == nil { + availableNames := make([]string, 0, len(secretList.Items)) + for _, s := range secretList.Items { + availableNames = append(availableNames, s.Name) + } + return fmt.Errorf("master bootstrap secret not found: %w. Available secrets in namespace %s: %v", err, namespace, availableNames) + } + return fmt.Errorf("master bootstrap secret not found: %w", err) + } + + return nil +} + // ConnectClusterOptions defines options for connecting to a cluster type ConnectClusterOptions struct { // Direct connection parameters (used when UseJumpHost is false) diff --git a/pkg/cluster/modules.go b/pkg/cluster/modules.go new file mode 100644 index 0000000..ffa7cec --- /dev/null +++ b/pkg/cluster/modules.go @@ -0,0 +1,523 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cluster + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/deckhouse/storage-e2e/internal/config" + "github.com/deckhouse/storage-e2e/internal/infrastructure/ssh" + "github.com/deckhouse/storage-e2e/internal/kubernetes/deckhouse" + "gopkg.in/yaml.v3" + "k8s.io/client-go/rest" +) + +// moduleGraph represents the dependency graph structure +type moduleGraph struct { + modules map[string]*config.ModuleConfig // module name -> module config + dependencies map[string][]string // module name -> list of dependency names + reverseDeps map[string][]string // module name -> list of modules that depend on it +} + +// buildModuleGraph builds a dependency graph from module configurations +func buildModuleGraph(modules []*config.ModuleConfig) (*moduleGraph, error) { + graph := &moduleGraph{ + modules: make(map[string]*config.ModuleConfig), + dependencies: make(map[string][]string), + reverseDeps: make(map[string][]string), + } + + // Build module map and dependency lists + for _, module := range modules { + graph.modules[module.Name] = module + graph.dependencies[module.Name] = module.Dependencies + + // Build reverse dependencies (which modules depend on this one) + for _, depName := range module.Dependencies { + graph.reverseDeps[depName] = append(graph.reverseDeps[depName], module.Name) + } + } + + // Validate that all dependencies exist + for _, module := range modules { + for _, depName := range module.Dependencies { + if _, exists := graph.modules[depName]; !exists { + return nil, fmt.Errorf("dependency module %s not found for module %s", depName, module.Name) + } + } + } + + return graph, nil +} + +// topologicalSortLevels performs topological sort and returns modules organized by levels +// Level 0 contains modules with no dependencies, level 1 contains modules that only depend on level 0, etc. +func topologicalSortLevels(graph *moduleGraph) ([][]*config.ModuleConfig, error) { + // Calculate in-degrees (number of unresolved dependencies) + inDegree := make(map[string]int) + for name := range graph.modules { + inDegree[name] = len(graph.dependencies[name]) + } + + levels := [][]*config.ModuleConfig{} + + // Process levels until all modules are processed + for len(inDegree) > 0 { + // Find all modules with no remaining dependencies (current level) + currentLevel := []*config.ModuleConfig{} + for name, degree := range inDegree { + if degree == 0 { + currentLevel = append(currentLevel, graph.modules[name]) + } + } + + // If no modules found with degree 0, there's a cycle + if len(currentLevel) == 0 { + remaining := []string{} + for name := range inDegree { + remaining = append(remaining, name) + } + return nil, fmt.Errorf("circular dependency detected among modules: %v", remaining) + } + + // Add current level to result + levels = append(levels, currentLevel) + + // Remove processed modules and update in-degrees of dependent modules + for _, module := range currentLevel { + delete(inDegree, module.Name) + + // Decrease in-degree for all modules that depend on this one + for _, dependent := range graph.reverseDeps[module.Name] { + if _, exists := inDegree[dependent]; exists { + inDegree[dependent]-- + } + } + } + } + + return levels, nil +} + +// configureModuleConfig creates or updates a ModuleConfig resource +// It retries on webhook connection errors to handle cases where the webhook service isn't ready yet +func configureModuleConfig(ctx context.Context, kubeconfig *rest.Config, moduleConfig *config.ModuleConfig) error { + settings := make(map[string]interface{}) + if moduleConfig.Settings != nil { + settings = moduleConfig.Settings + } + + // Retry logic for webhook connection errors + maxRetries := 10 + retryDelay := 2 * time.Second + var lastErr error + + for attempt := 0; attempt < maxRetries; attempt++ { + // Check if ModuleConfig exists + _, err := deckhouse.GetModuleConfig(ctx, kubeconfig, moduleConfig.Name) + if err != nil { + // Resource doesn't exist, create it + err = deckhouse.CreateModuleConfig(ctx, kubeconfig, moduleConfig.Name, moduleConfig.Version, moduleConfig.Enabled, settings) + if err != nil { + lastErr = err + // Check if it's a webhook connection error + if isWebhookConnectionError(err) { + if attempt < maxRetries-1 { + // Wait before retrying + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(retryDelay): + // Exponential backoff + retryDelay = time.Duration(float64(retryDelay) * 1.5) + continue + } + } + } + return fmt.Errorf("failed to create moduleconfig %s: %w", moduleConfig.Name, err) + } + return nil + } else { + // Resource exists, update it + err = deckhouse.UpdateModuleConfig(ctx, kubeconfig, moduleConfig.Name, moduleConfig.Version, moduleConfig.Enabled, settings) + if err != nil { + lastErr = err + // Check if it's a webhook connection error + if isWebhookConnectionError(err) { + if attempt < maxRetries-1 { + // Wait before retrying + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(retryDelay): + // Exponential backoff + retryDelay = time.Duration(float64(retryDelay) * 1.5) + continue + } + } + } + return fmt.Errorf("failed to update moduleconfig %s: %w", moduleConfig.Name, err) + } + return nil + } + } + + return fmt.Errorf("failed to configure moduleconfig %s after %d attempts: %w", moduleConfig.Name, maxRetries, lastErr) +} + +// findKubectlPath finds the kubectl binary path on the remote host +// It checks both as the user and as root (via sudo) to ensure kubectl is accessible +func findKubectlPath(ctx context.Context, sshClient ssh.SSHClient) (string, error) { + // First, try to find kubectl as the user + cmd := "command -v kubectl 2>/dev/null || which kubectl 2>/dev/null || echo ''" + output, err := sshClient.Exec(ctx, cmd) + if err == nil { + path := strings.TrimSpace(output) + if path != "" { + // Verify it's accessible with sudo + verifyCmd := fmt.Sprintf("sudo test -x %s && echo %s", path, path) + verifyOutput, verifyErr := sshClient.Exec(ctx, verifyCmd) + if verifyErr == nil && strings.TrimSpace(verifyOutput) != "" { + return path, nil + } + } + } + + // Try common kubectl installation paths (check as root via sudo) + kubectlPaths := []string{"/usr/local/bin/kubectl", "/usr/bin/kubectl", "/opt/bin/kubectl", "/snap/bin/kubectl"} + for _, path := range kubectlPaths { + // Check if file exists and is executable (as root) + checkCmd := fmt.Sprintf("sudo test -x %s && echo %s", path, path) + checkOutput, checkErr := sshClient.Exec(ctx, checkCmd) + if checkErr == nil { + foundPath := strings.TrimSpace(checkOutput) + if foundPath != "" { + return foundPath, nil + } + } + } + + return "", fmt.Errorf("kubectl not found on master node (checked user PATH and common locations)") +} + +// configureModuleConfigViaSSH creates or updates a ModuleConfig resource via kubectl over SSH +// This ensures the webhook is called from within the cluster network +func configureModuleConfigViaSSH(ctx context.Context, sshClient ssh.SSHClient, moduleConfig *config.ModuleConfig) error { + // Build ModuleConfig YAML + moduleConfigYAML := struct { + APIVersion string `yaml:"apiVersion"` + Kind string `yaml:"kind"` + Metadata struct { + Name string `yaml:"name"` + } `yaml:"metadata"` + Spec struct { + Version int `yaml:"version"` + Enabled *bool `yaml:"enabled"` + Settings map[string]interface{} `yaml:"settings,omitempty"` + } `yaml:"spec"` + }{ + APIVersion: "deckhouse.io/v1alpha1", + Kind: "ModuleConfig", + Metadata: struct { + Name string `yaml:"name"` + }{ + Name: moduleConfig.Name, + }, + Spec: struct { + Version int `yaml:"version"` + Enabled *bool `yaml:"enabled"` + Settings map[string]interface{} `yaml:"settings,omitempty"` + }{ + Version: moduleConfig.Version, + Enabled: &moduleConfig.Enabled, + Settings: moduleConfig.Settings, // nil or empty map will be omitted due to omitempty + }, + } + + yamlBytes, err := yaml.Marshal(moduleConfigYAML) + if err != nil { + return fmt.Errorf("failed to marshal ModuleConfig YAML: %w", err) + } + + // Find kubectl path + kubectlPath, err := findKubectlPath(ctx, sshClient) + if err != nil { + return fmt.Errorf("failed to find kubectl: %w", err) + } + + // Apply via kubectl over SSH using the found path + cmd := fmt.Sprintf("sudo %s apply -f - << 'MODULECONFIG_EOF'\n%sMODULECONFIG_EOF", kubectlPath, string(yamlBytes)) + output, err := sshClient.Exec(ctx, cmd) + if err != nil { + return fmt.Errorf("failed to apply ModuleConfig %s via SSH: %w\nOutput: %s", moduleConfig.Name, err, output) + } + + return nil +} + +// configureModulePullOverrideViaSSH creates or updates a ModulePullOverride resource via kubectl over SSH +func configureModulePullOverrideViaSSH(ctx context.Context, sshClient ssh.SSHClient, moduleConfig *config.ModuleConfig, registryRepo string) error { + // Determine ModulePullOverride imageTag + var imageTag string + shouldCreateMPO := false + + if strings.HasPrefix(registryRepo, "dev-") { + shouldCreateMPO = true + if moduleConfig.ModulePullOverride != "" { + imageTag = moduleConfig.ModulePullOverride + } else { + imageTag = "main" + } + } else { + shouldCreateMPO = false + } + + if !shouldCreateMPO { + return nil + } + + // Build ModulePullOverride YAML + modulePullOverrideYAML := struct { + APIVersion string `yaml:"apiVersion"` + Kind string `yaml:"kind"` + Metadata struct { + Name string `yaml:"name"` + } `yaml:"metadata"` + Spec struct { + ImageTag string `yaml:"imageTag"` + ScanInterval string `yaml:"scanInterval"` + Rollback bool `yaml:"rollback"` + } `yaml:"spec"` + }{ + APIVersion: "deckhouse.io/v1alpha2", + Kind: "ModulePullOverride", + Metadata: struct { + Name string `yaml:"name"` + }{ + Name: moduleConfig.Name, + }, + Spec: struct { + ImageTag string `yaml:"imageTag"` + ScanInterval string `yaml:"scanInterval"` + Rollback bool `yaml:"rollback"` + }{ + ImageTag: imageTag, + ScanInterval: "1m", + Rollback: false, + }, + } + + yamlBytes, err := yaml.Marshal(modulePullOverrideYAML) + if err != nil { + return fmt.Errorf("failed to marshal ModulePullOverride YAML: %w", err) + } + + // Find kubectl path + kubectlPath, err := findKubectlPath(ctx, sshClient) + if err != nil { + return fmt.Errorf("failed to find kubectl: %w", err) + } + + // Apply via kubectl over SSH using the found path + cmd := fmt.Sprintf("sudo %s apply -f - << 'MODULEPULLOVERRIDE_EOF'\n%sMODULEPULLOVERRIDE_EOF", kubectlPath, string(yamlBytes)) + output, err := sshClient.Exec(ctx, cmd) + if err != nil { + return fmt.Errorf("failed to apply ModulePullOverride %s via SSH: %w\nOutput: %s", moduleConfig.Name, err, output) + } + + return nil +} + +// isWebhookConnectionError checks if the error is a webhook connection error +func isWebhookConnectionError(err error) bool { + if err == nil { + return false + } + errStr := err.Error() + // Check for common webhook connection error patterns + return strings.Contains(errStr, "connection refused") || + strings.Contains(errStr, "failed calling webhook") || + strings.Contains(errStr, "webhook") && strings.Contains(errStr, "timeout") +} + +// configureModulePullOverride creates or updates a ModulePullOverride resource if needed +func configureModulePullOverride(ctx context.Context, kubeconfig *rest.Config, moduleConfig *config.ModuleConfig, registryRepo string) error { + // Determine ModulePullOverride imageTag + // If registryRepo starts with "dev-", always create MPO: + // - Use moduleConfig.ModulePullOverride if specified (not empty) + // - Otherwise use "main" as default + // If registryRepo does NOT start with "dev-", we don't create MPO at all + var imageTag string + shouldCreateMPO := false + + if strings.HasPrefix(registryRepo, "dev-") { + // Always create MPO for dev registries + shouldCreateMPO = true + if moduleConfig.ModulePullOverride != "" { + imageTag = moduleConfig.ModulePullOverride + } else { + imageTag = "main" + } + } else { + // Don't create MPO for non-dev registries + shouldCreateMPO = false + } + + // Create or update ModulePullOverride if needed + if shouldCreateMPO { + _, err := deckhouse.GetModulePullOverride(ctx, kubeconfig, moduleConfig.Name) + if err != nil { + // Resource doesn't exist, create it + if err := deckhouse.CreateModulePullOverride(ctx, kubeconfig, moduleConfig.Name, imageTag); err != nil { + return fmt.Errorf("failed to create module pull override for %s: %w", moduleConfig.Name, err) + } + } else { + // Resource exists, update it + if err := deckhouse.UpdateModulePullOverride(ctx, kubeconfig, moduleConfig.Name, imageTag); err != nil { + return fmt.Errorf("failed to update module pull override for %s: %w", moduleConfig.Name, err) + } + } + } + + return nil +} + +// EnableAndConfigureModules enables and configures modules based on cluster definition +// It builds a dependency graph and processes modules level by level using topological sort +// If sshClient is provided, it uses kubectl via SSH (recommended for webhook access from within cluster) +// Otherwise, it falls back to using kubeconfig directly +func EnableAndConfigureModules(ctx context.Context, kubeconfig *rest.Config, clusterDef *config.ClusterDefinition, sshClient ssh.SSHClient) error { + if len(clusterDef.DKPParameters.Modules) == 0 { + return nil + } + + // Build dependency graph + graph, err := buildModuleGraph(clusterDef.DKPParameters.Modules) + if err != nil { + return fmt.Errorf("failed to build module graph: %w", err) + } + + // Perform topological sort to get modules organized by levels + levels, err := topologicalSortLevels(graph) + if err != nil { + return fmt.Errorf("failed to sort modules: %w", err) + } + + // Process modules level by level + for levelIndex, level := range levels { + for _, moduleConfig := range level { + // Configure ModuleConfig + if sshClient != nil { + if err := configureModuleConfigViaSSH(ctx, sshClient, moduleConfig); err != nil { + return err + } + } else { + if err := configureModuleConfig(ctx, kubeconfig, moduleConfig); err != nil { + return err + } + } + + // Configure ModulePullOverride + if sshClient != nil { + if err := configureModulePullOverrideViaSSH(ctx, sshClient, moduleConfig, clusterDef.DKPParameters.RegistryRepo); err != nil { + return err + } + } else { + if err := configureModulePullOverride(ctx, kubeconfig, moduleConfig, clusterDef.DKPParameters.RegistryRepo); err != nil { + return err + } + } + } + // All modules at this level are now configured + // Next level modules can be processed as their dependencies are satisfied + _ = levelIndex // Can be used for logging if needed + } + + return nil +} + +// WaitForModulesReady waits for all modules specified in cluster definition to be ready +// It builds a dependency graph and waits for modules level by level using topological sort +func WaitForModulesReady(ctx context.Context, kubeconfig *rest.Config, clusterDef *config.ClusterDefinition, timeout time.Duration) error { + if len(clusterDef.DKPParameters.Modules) == 0 { + return nil + } + + // Build dependency graph + graph, err := buildModuleGraph(clusterDef.DKPParameters.Modules) + if err != nil { + return fmt.Errorf("failed to build module graph: %w", err) + } + + // Perform topological sort to get modules organized by levels + levels, err := topologicalSortLevels(graph) + if err != nil { + return fmt.Errorf("failed to sort modules: %w", err) + } + + // Wait for modules level by level + for levelIndex, level := range levels { + for _, moduleConfig := range level { + // Only wait for enabled modules + if moduleConfig.Enabled { + if err := WaitForModuleReady(ctx, kubeconfig, moduleConfig.Name, timeout); err != nil { + return fmt.Errorf("failed to wait for module %s to be ready: %w", moduleConfig.Name, err) + } + } + } + // All modules at this level are now ready + // Next level modules can be waited for as their dependencies are satisfied + _ = levelIndex // Can be used for logging if needed + } + + return nil +} + +// WaitForModuleReady waits for a module to reach the Ready phase +func WaitForModuleReady(ctx context.Context, kubeconfig *rest.Config, moduleName string, timeout time.Duration) error { + deadline := time.Now().Add(timeout) + ticker := time.NewTicker(2 * time.Second) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + if time.Now().After(deadline) { + return fmt.Errorf("timeout waiting for module %s to be ready", moduleName) + } + + module, err := deckhouse.GetModule(ctx, kubeconfig, moduleName) + if err != nil { + // Module doesn't exist yet, continue waiting + continue + } + + if module.Status.Phase == "Ready" { + return nil + } + + if module.Status.Phase == "Error" { + return fmt.Errorf("module %s is in Error phase", moduleName) + } + } + } +} diff --git a/pkg/cluster/nodegroup.go b/pkg/cluster/nodegroup.go new file mode 100644 index 0000000..d68b277 --- /dev/null +++ b/pkg/cluster/nodegroup.go @@ -0,0 +1,46 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cluster + +import ( + "context" + "fmt" + + "github.com/deckhouse/storage-e2e/internal/kubernetes/deckhouse" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/client-go/rest" +) + +// CreateStaticNodeGroup creates a NodeGroup resource with Static nodeType +func CreateStaticNodeGroup(ctx context.Context, config *rest.Config, name string) error { + // Check if NodeGroup already exists + _, err := deckhouse.GetNodeGroup(ctx, config, name) + if err == nil { + // NodeGroup already exists, nothing to do + return nil + } + if !errors.IsNotFound(err) { + return fmt.Errorf("failed to check if nodegroup %s exists: %w", name, err) + } + + // Create NodeGroup with Static nodeType + if err := deckhouse.CreateNodeGroup(ctx, config, name, "Static"); err != nil { + return fmt.Errorf("failed to create nodegroup %s: %w", name, err) + } + + return nil +} diff --git a/pkg/cluster/secrets.go b/pkg/cluster/secrets.go new file mode 100644 index 0000000..46145f2 --- /dev/null +++ b/pkg/cluster/secrets.go @@ -0,0 +1,105 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cluster + +import ( + "context" + "fmt" + "strings" + + "github.com/deckhouse/storage-e2e/internal/kubernetes/core" + "k8s.io/client-go/rest" +) + +// FindSecretByName finds a secret by name, trying multiple matching strategies +// This helps with issues where secret names might have hidden Unicode characters +// 1. Exact match +// 2. Case-insensitive match +// 3. Fuzzy match (ignoring common Unicode issues like non-breaking spaces) +// Returns the actual secret name found (which may differ from the requested name due to Unicode issues) +func FindSecretByName(ctx context.Context, kubeconfig *rest.Config, namespace, name string) (string, error) { + secretClient, err := core.NewSecretClient(kubeconfig) + if err != nil { + return "", fmt.Errorf("failed to create secret client: %w", err) + } + + // First try exact match + secret, err := secretClient.Get(ctx, namespace, name) + if err == nil { + return secret.Name, nil + } + + // If exact match fails, list all secrets and try to find a match + secretList, err := secretClient.List(ctx, namespace) + if err != nil { + return "", fmt.Errorf("failed to list secrets: %w", err) + } + + // Normalize the search name: remove common problematic Unicode characters + normalizedName := normalizeSecretName(name) + + // Try case-insensitive and normalized matching + for i := range secretList.Items { + secretName := secretList.Items[i].Name + + // Try exact case-insensitive match + if strings.EqualFold(secretName, name) { + return secretName, nil + } + + // Try normalized match (handles hidden Unicode characters) + if normalizeSecretName(secretName) == normalizedName { + return secretName, nil + } + } + + // If still not found, return error with available secret names + availableNames := make([]string, 0, len(secretList.Items)) + for _, s := range secretList.Items { + availableNames = append(availableNames, s.Name) + } + return "", fmt.Errorf("secret %s/%s not found. Available secrets: %v", namespace, name, availableNames) +} + +// GetSecretDataValue retrieves a specific data value from a secret by name +// It uses FindSecretByName to handle potential Unicode character issues +func GetSecretDataValue(ctx context.Context, kubeconfig *rest.Config, namespace, name, key string) (string, error) { + actualName, err := FindSecretByName(ctx, kubeconfig, namespace, name) + if err != nil { + return "", err + } + + secretClient, err := core.NewSecretClient(kubeconfig) + if err != nil { + return "", fmt.Errorf("failed to create secret client: %w", err) + } + + return secretClient.GetDataValue(ctx, namespace, actualName, key) +} + +// normalizeSecretName normalizes a secret name by removing/replacing problematic Unicode characters +// This helps match secrets that have hidden Unicode characters (like non-breaking spaces) +func normalizeSecretName(name string) string { + // Replace common problematic Unicode characters with their ASCII equivalents + normalized := strings.ReplaceAll(name, "\u00A0", " ") // Non-breaking space -> regular space + normalized = strings.ReplaceAll(normalized, "\u200B", "") // Zero-width space -> empty + normalized = strings.ReplaceAll(normalized, "\uFEFF", "") // Zero-width no-break space -> empty + normalized = strings.ReplaceAll(normalized, "\u200C", "") // Zero-width non-joiner -> empty + normalized = strings.ReplaceAll(normalized, "\u200D", "") // Zero-width joiner -> empty + normalized = strings.ToLower(strings.TrimSpace(normalized)) + return normalized +} diff --git a/pkg/cluster/setup.go b/pkg/cluster/setup.go index aef9867..1b2534b 100644 --- a/pkg/cluster/setup.go +++ b/pkg/cluster/setup.go @@ -18,18 +18,23 @@ package cluster import ( "context" + "encoding/base64" "fmt" "net" "os" + "os/user" "path/filepath" "runtime" "strings" "text/template" + "time" "gopkg.in/yaml.v3" + "k8s.io/client-go/rest" "github.com/deckhouse/storage-e2e/internal/config" "github.com/deckhouse/storage-e2e/internal/infrastructure/ssh" + "github.com/deckhouse/storage-e2e/internal/kubernetes/core" ) // OSInfo represents detected operating system information @@ -121,18 +126,40 @@ func InstallDocker(ctx context.Context, sshClient ssh.SSHClient) error { } // PrepareBootstrapConfig prepares the bootstrap configuration file from a template. -// It takes cluster definition, master IP address, and VM IP addresses to calculate the internal network CIDR. +// It takes cluster definition and extracts VM IP addresses to calculate the internal network CIDR. // The function generates a config file and saves it to the temp/ directory. // Returns the path to the generated config file. -func PrepareBootstrapConfig(clusterDef *config.ClusterDefinition, masterIP string, vmIPs []string) (string, error) { +// Note: clusterDef must have IPAddress fields filled in for all VM nodes (via GatherVMInfo) +func PrepareBootstrapConfig(clusterDef *config.ClusterDefinition) (string, error) { if clusterDef == nil { return "", fmt.Errorf("clusterDef cannot be nil") } - if masterIP == "" { - return "", fmt.Errorf("masterIP cannot be empty") + + // Extract VM IPs from cluster definition + var vmIPs []string + firstMasterIP := "" + for _, master := range clusterDef.Masters { + if master.HostType == config.HostTypeVM && master.IPAddress != "" { + vmIPs = append(vmIPs, master.IPAddress) + if firstMasterIP == "" { + firstMasterIP = master.IPAddress + } + } + } + for _, worker := range clusterDef.Workers { + if worker.HostType == config.HostTypeVM && worker.IPAddress != "" { + vmIPs = append(vmIPs, worker.IPAddress) + } + } + if clusterDef.Setup != nil && clusterDef.Setup.HostType == config.HostTypeVM && clusterDef.Setup.IPAddress != "" { + vmIPs = append(vmIPs, clusterDef.Setup.IPAddress) } + if len(vmIPs) == 0 { - return "", fmt.Errorf("vmIPs cannot be empty") + return "", fmt.Errorf("no VM IP addresses found in cluster definition (IPAddress fields must be filled via GatherVMInfo)") + } + if firstMasterIP == "" { + return "", fmt.Errorf("no master IP address found in cluster definition") } // Calculate internal network CIDR from VM IPs (assume /24 subnet) @@ -143,7 +170,7 @@ func PrepareBootstrapConfig(clusterDef *config.ClusterDefinition, masterIP strin // Format public domain template with master IP for sslip.io // Format: %s.10.10.1.5.sslip.io (dots in IP are preserved) - publicDomainTemplate := fmt.Sprintf("%%s.%s.sslip.io", masterIP) + publicDomainTemplate := fmt.Sprintf("%%s.%s.sslip.io", firstMasterIP) // Prepare template data templateData := struct { @@ -368,19 +395,22 @@ func getDevBranchFromConfig(configPath string) (string, error) { // It performs the following steps: // 1. Logs into the Docker registry using DKP_LICENSE_KEY from config // 2. Runs the dhctl bootstrap command in a Docker container (can take up to 30 minutes) -// The function uses sudo to run commands as root on the setup node. -// It uses config.VMSSHUser and config.DKPLicenseKey from the config package. -// The install image is constructed from registryRepo and the devBranch read from configPath. -func BootstrapCluster(ctx context.Context, sshClient ssh.SSHClient, clusterDef *config.ClusterDefinition, masterIP string, configPath string) error { +// Note: clusterDef must have IPAddress fields filled in for all VM nodes (via GatherVMInfo) +func BootstrapCluster(ctx context.Context, sshClient ssh.SSHClient, clusterDef *config.ClusterDefinition, configPath string) error { if sshClient == nil { return fmt.Errorf("sshClient cannot be nil") } if clusterDef == nil { return fmt.Errorf("clusterDef cannot be nil") } - if masterIP == "" { - return fmt.Errorf("masterIP cannot be empty") + if len(clusterDef.Masters) == 0 { + return fmt.Errorf("cluster definition must have at least one master") + } + firstMaster := clusterDef.Masters[0] + if firstMaster.IPAddress == "" { + return fmt.Errorf("first master IP address is not set (must be filled via GatherVMInfo)") } + masterIP := firstMaster.IPAddress if configPath == "" { return fmt.Errorf("configPath cannot be empty") } @@ -526,3 +556,365 @@ echo "%s" return nil } + +// AddNodesToCluster adds nodes to the cluster +// It performs the following steps: +// 1. Gets bootstrap scripts from secrets +// 2. Runs bootstrap scripts on each node via SSH +// Note: NodeGroup must be created before calling this function (secrets won't appear until NodeGroup exists) +// Note: clusterDef must have IPAddress fields filled in for all VM nodes (via GatherVMInfo) +func AddNodesToCluster(ctx context.Context, kubeconfig *rest.Config, clusterDef *config.ClusterDefinition, baseSSHUser, baseSSHHost, sshKeyPath string) error { + if kubeconfig == nil { + return fmt.Errorf("kubeconfig cannot be nil") + } + if clusterDef == nil { + return fmt.Errorf("clusterDef cannot be nil") + } + + // Step 1: Get bootstrap scripts from secrets + workerBootstrapScript, err := GetSecretDataValue(ctx, kubeconfig, "d8-cloud-instance-manager", "manual-bootstrap-for-worker", "bootstrap.sh") + if err != nil { + return fmt.Errorf("failed to get worker bootstrap script: %w", err) + } + + masterBootstrapScript, err := GetSecretDataValue(ctx, kubeconfig, "d8-cloud-instance-manager", "manual-bootstrap-for-master", "bootstrap.sh") + if err != nil { + return fmt.Errorf("failed to get master bootstrap script: %w", err) + } + + // Process additional masters (skip the first one) + masterCount := len(clusterDef.Masters) - 1 + if masterCount > 0 { + fmt.Printf(" ▶️ Adding %d additional master node(s) to the cluster\n", masterCount) + for i := 1; i < len(clusterDef.Masters); i++ { + masterNode := clusterDef.Masters[i] + if err := addNodeToCluster(ctx, masterNode, masterBootstrapScript, clusterDef, baseSSHUser, baseSSHHost, sshKeyPath); err != nil { + return fmt.Errorf("failed to add master node %s: %w", masterNode.Hostname, err) + } + } + } + + // Process all workers + workerCount := len(clusterDef.Workers) + if workerCount > 0 { + fmt.Printf(" ▶️ Adding %d worker node(s) to the cluster\n", workerCount) + for _, workerNode := range clusterDef.Workers { + if err := addNodeToCluster(ctx, workerNode, workerBootstrapScript, clusterDef, baseSSHUser, baseSSHHost, sshKeyPath); err != nil { + return fmt.Errorf("failed to add worker node %s: %w", workerNode.Hostname, err) + } + } + } + + return nil +} + +// addNodeToCluster adds a single node to the cluster by running the bootstrap script +func addNodeToCluster(ctx context.Context, node config.ClusterNode, bootstrapScript string, clusterDef *config.ClusterDefinition, baseSSHUser, baseSSHHost, sshKeyPath string) error { + // Get node IP address from cluster definition + nodeIP, err := GetNodeIPAddress(clusterDef, node.Hostname) + if err != nil { + return fmt.Errorf("failed to get IP address for node %s: %w", node.Hostname, err) + } + + // Log start of node addition + nodeType := "worker" + if node.Role == config.ClusterRoleMaster { + nodeType = "master" + } + fmt.Printf(" ▶️ Adding %s node %s (%s) to the cluster...\n", nodeType, node.Hostname, nodeIP) + + // Create SSH client to the node through jump host (base cluster master) + sshClient, err := ssh.NewClientWithJumpHost( + baseSSHUser, baseSSHHost, sshKeyPath, // jump host + config.VMSSHUser, nodeIP, sshKeyPath, // target host + ) + if err != nil { + fmt.Printf(" ❌ Failed to create SSH connection to node %s (%s): %v\n", node.Hostname, nodeIP, err) + return fmt.Errorf("failed to create SSH client to node %s (%s): %w", node.Hostname, nodeIP, err) + } + defer sshClient.Close() + + // Log that bootstrap script is starting + fmt.Printf(" ⏳ Running bootstrap script on node %s (%s)...\n", node.Hostname, nodeIP) + + // Run bootstrap script as root + // Note: The bootstrap script from secret is already decoded (Kubernetes API returns decoded data) + cmd := fmt.Sprintf("sudo bash << 'BOOTSTRAP_EOF'\n%s\nBOOTSTRAP_EOF", bootstrapScript) + + output, err := sshClient.Exec(ctx, cmd) + if err != nil { + fmt.Printf(" ❌ Bootstrap script failed on node %s (%s): %v\n", node.Hostname, nodeIP, err) + if output != "" { + fmt.Printf(" 📋 Bootstrap script output from node %s:\n%s\n", node.Hostname, output) + } + return fmt.Errorf("failed to run bootstrap script on node %s: %w\nOutput: %s", node.Hostname, err, output) + } + + // Log successful completion (output is only shown on failure) + fmt.Printf(" ✅ Bootstrap script completed successfully on node %s (%s)\n", node.Hostname, nodeIP) + + return nil +} + +// WaitForNodeReady waits for a node to become Ready +func WaitForNodeReady(ctx context.Context, kubeconfig *rest.Config, nodeName string, timeout time.Duration) error { + nodeClient, err := core.NewNodeClient(kubeconfig) + if err != nil { + return fmt.Errorf("failed to create node client: %w", err) + } + + deadline := time.Now().Add(timeout) + ticker := time.NewTicker(2 * time.Second) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + if time.Now().After(deadline) { + return fmt.Errorf("timeout waiting for node %s to be ready", nodeName) + } + + node, err := nodeClient.Get(ctx, nodeName) + if err != nil { + // Node doesn't exist yet, continue waiting + continue + } + + if nodeClient.IsReady(ctx, node) { + return nil + } + } + } +} + +// WaitForAllNodesReady waits for all expected nodes to become Ready +// It validates that: +// 1. All expected nodes are present in the cluster +// 2. All nodes are in Ready state +// Expected nodes: all masters (including the first one that was bootstrapped) + all workers +func WaitForAllNodesReady(ctx context.Context, kubeconfig *rest.Config, clusterDef *config.ClusterDefinition, timeout time.Duration) error { + if kubeconfig == nil { + return fmt.Errorf("kubeconfig cannot be nil") + } + if clusterDef == nil { + return fmt.Errorf("clusterDef cannot be nil") + } + + nodeClient, err := core.NewNodeClient(kubeconfig) + if err != nil { + return fmt.Errorf("failed to create node client: %w", err) + } + + // Build expected node names (all masters + all workers) + expectedNodeNames := make(map[string]bool) + for _, master := range clusterDef.Masters { + expectedNodeNames[master.Hostname] = true + } + for _, worker := range clusterDef.Workers { + expectedNodeNames[worker.Hostname] = true + } + + expectedCount := len(expectedNodeNames) + if expectedCount == 0 { + return fmt.Errorf("no nodes expected in cluster definition") + } + + deadline := time.Now().Add(timeout) + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + if time.Now().After(deadline) { + // Get current state for better error message + nodes, err := nodeClient.List(ctx) + if err != nil { + return fmt.Errorf("timeout waiting for nodes to be ready (failed to list nodes: %w)", err) + } + + expectedReadyCount := 0 + foundNodes := make(map[string]bool) + missingNodes := make([]string, 0) + for _, node := range nodes.Items { + foundNodes[node.Name] = true + // Only count ready nodes that are in our expected list + if expectedNodeNames[node.Name] && nodeClient.IsReady(ctx, &node) { + expectedReadyCount++ + } + } + + // Find missing expected nodes + for expectedName := range expectedNodeNames { + if !foundNodes[expectedName] { + missingNodes = append(missingNodes, expectedName) + } + } + + errorMsg := fmt.Sprintf("timeout waiting for nodes to be ready: expected %d nodes (%v), found %d nodes, %d expected nodes ready", + expectedCount, getNodeNamesList(expectedNodeNames), len(foundNodes), expectedReadyCount) + if len(missingNodes) > 0 { + errorMsg += fmt.Sprintf(". Missing nodes: %v", missingNodes) + } + errorMsg += fmt.Sprintf(". Found nodes: %v", getNodeNamesList(foundNodes)) + + return fmt.Errorf("%s", errorMsg) + } + + // List all nodes + nodes, err := nodeClient.List(ctx) + if err != nil { + // Continue waiting if we can't list nodes yet + continue + } + + // Check if we have all expected nodes and they are all ready + foundNodes := make(map[string]bool) + expectedReadyCount := 0 + for _, node := range nodes.Items { + foundNodes[node.Name] = true + // Only count ready nodes that are in our expected list + if expectedNodeNames[node.Name] && nodeClient.IsReady(ctx, &node) { + expectedReadyCount++ + } + } + + // Check if all expected nodes are present + allPresent := true + for expectedName := range expectedNodeNames { + if !foundNodes[expectedName] { + allPresent = false + break + } + } + + // If all expected nodes are present and all expected nodes are ready, we're done + if allPresent && expectedReadyCount == expectedCount { + return nil + } + } + } +} + +// getNodeNamesList converts a map of node names to a sorted list for error messages +func getNodeNamesList(nodeMap map[string]bool) []string { + names := make([]string, 0, len(nodeMap)) + for name := range nodeMap { + names = append(names, name) + } + // Simple sort by iterating (for consistent error messages) + // In production, you might want to use sort.Strings + return names +} + +// GetSSHPrivateKeyPath returns the path to the SSH private key file. +// If SSHPrivateKey is a file path, it returns the expanded path. +// If SSHPrivateKey is a base64-encoded string, it decodes it, writes to a temporary file in temp//, +// and returns that path. +func GetSSHPrivateKeyPath() (string, error) { + // Check if it looks like a file path (contains path separators or starts with ~) + looksLikePath := strings.Contains(config.SSHPrivateKey, "/") || strings.HasPrefix(config.SSHPrivateKey, "~") || strings.Contains(config.SSHPrivateKey, "\\") + + if !looksLikePath { + // Doesn't look like a path, try base64 decoding + decoded, err := base64.StdEncoding.DecodeString(config.SSHPrivateKey) + if err == nil && len(decoded) > 0 { + // Successfully decoded, write to temp file in temp// + // Get the test file name from the caller (same pattern as PrepareBootstrapConfig) + _, callerFile, _, ok := runtime.Caller(1) + if !ok { + return "", fmt.Errorf("failed to get caller file information") + } + testFileName := strings.TrimSuffix(filepath.Base(callerFile), filepath.Ext(callerFile)) + + // Determine the temp directory path in the repo root + callerDir := filepath.Dir(callerFile) + repoRootPath := filepath.Join(callerDir, "..", "..") + repoRoot, err := filepath.Abs(repoRootPath) + if err != nil { + return "", fmt.Errorf("failed to resolve repo root path: %w", err) + } + + // Create temp directory if it doesn't exist + tempDir := filepath.Join(repoRoot, "temp", testFileName) + if err := os.MkdirAll(tempDir, 0755); err != nil { + return "", fmt.Errorf("failed to create temp directory %s: %w", tempDir, err) + } + + // Create temp file in temp// + tmpFile, err := os.CreateTemp(tempDir, "ssh_private_key_*") + if err != nil { + return "", fmt.Errorf("failed to create temp file for private key: %w", err) + } + defer tmpFile.Close() + + if _, err := tmpFile.Write(decoded); err != nil { + os.Remove(tmpFile.Name()) + return "", fmt.Errorf("failed to write decoded private key to temp file: %w", err) + } + + // Set permissions to 0600 + if err := os.Chmod(tmpFile.Name(), 0600); err != nil { + os.Remove(tmpFile.Name()) + return "", fmt.Errorf("failed to set permissions on temp private key file: %w", err) + } + + return tmpFile.Name(), nil + } + // If decoding failed, fall through to treat as path (might be a relative path without /) + } + + // Treat as file path + return expandPath(config.SSHPrivateKey) +} + +// GetSSHPublicKeyContent returns the SSH public key content as a string. +// If SSHPublicKey is a file path, it reads and returns the file content. +// If SSHPublicKey is a plain-text string, it returns it directly. +func GetSSHPublicKeyContent() (string, error) { + if config.SSHPublicKey == "" { + return "", fmt.Errorf("SSH_PUBLIC_KEY is not set") + } + + // Check if it looks like a file path (contains / or ~) + if strings.Contains(config.SSHPublicKey, "/") || strings.HasPrefix(config.SSHPublicKey, "~") { + // Treat as file path + expandedPath, err := expandPath(config.SSHPublicKey) + if err != nil { + return "", fmt.Errorf("failed to expand public key path: %w", err) + } + + content, err := os.ReadFile(expandedPath) + if err != nil { + return "", fmt.Errorf("failed to read public key file %s: %w", expandedPath, err) + } + + // Trim whitespace (public key files often have trailing newlines) + return strings.TrimSpace(string(content)), nil + } + + // Treat as plain-text public key + return strings.TrimSpace(config.SSHPublicKey), nil +} + +// expandPath expands ~ to home directory +func expandPath(path string) (string, error) { + if !strings.HasPrefix(path, "~") { + return path, nil + } + + usr, err := user.Current() + if err != nil { + return "", fmt.Errorf("failed to get current user: %w", err) + } + + if path == "~" { + return usr.HomeDir, nil + } + + return filepath.Join(usr.HomeDir, strings.TrimPrefix(path, "~/")), nil +} diff --git a/pkg/cluster/vms.go b/pkg/cluster/vms.go index 2f14e95..776c013 100644 --- a/pkg/cluster/vms.go +++ b/pkg/cluster/vms.go @@ -302,6 +302,12 @@ func createVM(ctx context.Context, virtClient *virtualization.Client, namespace return "", fmt.Errorf("failed to check VirtualMachine %s: %w", vmName, err) } // VirtualMachine doesn't exist, create it + // Get SSH public key content + sshPublicKey, err := GetSSHPublicKeyContent() + if err != nil { + return "", fmt.Errorf("failed to get SSH public key content: %w", err) + } + memoryQuantity := resource.MustParse(fmt.Sprintf("%dGi", node.RAM)) vm := &v1alpha2.VirtualMachine{ ObjectMeta: metav1.ObjectMeta{ @@ -337,7 +343,7 @@ func createVM(ctx context.Context, virtClient *virtualization.Client, namespace }, Provisioning: &v1alpha2.Provisioning{ Type: "UserData", - UserData: generateCloudInitUserData(vmName, config.VMSSHPublicKey), + UserData: generateCloudInitUserData(vmName, sshPublicKey), }, }, } @@ -440,20 +446,22 @@ func RemoveAllVMs(ctx context.Context, resources *VMResources) error { return nil } -// GetSetupNode returns the setup VM node from VMResources. +// GetSetupNode returns the setup VM node from ClusterDefinition. // The setup node is always a separate VM with a unique name (bootstrap-node-). -func GetSetupNode(vmResources *VMResources) (*config.ClusterNode, error) { - if vmResources == nil { - return nil, fmt.Errorf("VMResources cannot be nil") +// Note: clusterDef.Setup.Hostname must be set to the generated VM name (done by GatherVMInfo) +func GetSetupNode(clusterDef *config.ClusterDefinition) (*config.ClusterNode, error) { + if clusterDef == nil { + return nil, fmt.Errorf("clusterDef cannot be nil") } - // Find the setup VM node by hostname - setupVM := config.DefaultSetupVM - setupVM.Hostname = vmResources.SetupVMName - return &setupVM, nil + if clusterDef.Setup == nil { + return nil, fmt.Errorf("setup node is not defined in cluster definition") + } + return clusterDef.Setup, nil } // GetVMIPAddress gets the IP address of a VM by querying its status // It waits for the VM to have an IP address assigned +// DEPRECATED: Use GatherVMInfo to get all VM info at once, then use VMInfo.GetIPAddress func GetVMIPAddress(ctx context.Context, virtClient *virtualization.Client, namespace, vmName string) (string, error) { vm, err := virtClient.VirtualMachines().Get(ctx, namespace, vmName) if err != nil { @@ -468,6 +476,91 @@ func GetVMIPAddress(ctx context.Context, virtClient *virtualization.Client, name return vm.Status.IPAddress, nil } +// GatherVMInfo gathers IP addresses for all VMs in the cluster definition and fills them into ClusterDefinition. +// This should be called once while connected to the base cluster, before switching to test cluster. +// It modifies clusterDef in-place by setting IPAddress field for each VM node. +func GatherVMInfo(ctx context.Context, virtClient *virtualization.Client, namespace string, clusterDef *config.ClusterDefinition, vmResources *VMResources) error { + // Gather info for all masters + for i := range clusterDef.Masters { + master := &clusterDef.Masters[i] + if master.HostType == config.HostTypeVM { + ip, err := GetVMIPAddress(ctx, virtClient, namespace, master.Hostname) + if err != nil { + return fmt.Errorf("failed to get IP for master %s: %w", master.Hostname, err) + } + master.IPAddress = ip + } + } + + // Gather info for all workers + for i := range clusterDef.Workers { + worker := &clusterDef.Workers[i] + if worker.HostType == config.HostTypeVM { + ip, err := GetVMIPAddress(ctx, virtClient, namespace, worker.Hostname) + if err != nil { + return fmt.Errorf("failed to get IP for worker %s: %w", worker.Hostname, err) + } + worker.IPAddress = ip + } + } + + // Gather info for setup node + // The setup node is always created dynamically, so we need to create/update clusterDef.Setup + setupVMName := vmResources.SetupVMName + ip, err := GetVMIPAddress(ctx, virtClient, namespace, setupVMName) + if err != nil { + return fmt.Errorf("failed to get IP for setup node %s: %w", setupVMName, err) + } + + // Create or update clusterDef.Setup with the generated VM info + if clusterDef.Setup == nil { + // Create setup node from DefaultSetupVM template + setupNode := config.DefaultSetupVM + setupNode.Hostname = setupVMName + setupNode.IPAddress = ip + clusterDef.Setup = &setupNode + } else { + // Update existing setup node + clusterDef.Setup.Hostname = setupVMName + clusterDef.Setup.IPAddress = ip + } + + return nil +} + +// GetNodeIPAddress gets the IP address for a node by hostname from ClusterDefinition +func GetNodeIPAddress(clusterDef *config.ClusterDefinition, hostname string) (string, error) { + // Check masters + for _, master := range clusterDef.Masters { + if master.Hostname == hostname { + if master.IPAddress == "" { + return "", fmt.Errorf("IP address not set for master node %s", hostname) + } + return master.IPAddress, nil + } + } + + // Check workers + for _, worker := range clusterDef.Workers { + if worker.Hostname == hostname { + if worker.IPAddress == "" { + return "", fmt.Errorf("IP address not set for worker node %s", hostname) + } + return worker.IPAddress, nil + } + } + + // Check setup node + if clusterDef.Setup != nil && clusterDef.Setup.Hostname == hostname { + if clusterDef.Setup.IPAddress == "" { + return "", fmt.Errorf("IP address not set for setup node %s", hostname) + } + return clusterDef.Setup.IPAddress, nil + } + + return "", fmt.Errorf("node with hostname %s not found in cluster definition", hostname) +} + // RemoveVM removes a VM and its associated VirtualDisks, then removes the ClusterVirtualImage if not used by other VMs. // It removes resources in order: VM -> VirtualDisks -> ClusterVirtualImage (if unused). func RemoveVM(ctx context.Context, virtClient *virtualization.Client, namespace, vmName string) error { diff --git a/tests/cluster-creation-by-steps/cluster_creation_test.go b/tests/cluster-creation-by-steps/cluster_creation_test.go index 449a22f..44392f3 100644 --- a/tests/cluster-creation-by-steps/cluster_creation_test.go +++ b/tests/cluster-creation-by-steps/cluster_creation_test.go @@ -19,9 +19,6 @@ package integration import ( "context" "fmt" - "os" - "path/filepath" - "strings" "time" "k8s.io/client-go/rest" @@ -53,6 +50,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { vmResources *cluster.VMResources bootstrapConfig string testClusterResources *cluster.TestClusterResources + sshKeyPath string ) BeforeAll(func() { @@ -105,6 +103,13 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { } }) + By("Getting SSH private key path", func() { + GinkgoWriter.Printf(" ▶️ Getting SSH private key path\n") + sshKeyPath, err = cluster.GetSSHPrivateKeyPath() + Expect(err).NotTo(HaveOccurred(), "Failed to get SSH private key path") + GinkgoWriter.Printf(" ✅ SSH private key path obtained successfully\n") + }) + // Stage 1: LoadConfig - verifies and parses the config from yaml file By("LoadConfig: Loading and verifying cluster configuration from YAML", func() { yamlConfigFilename := config.YAMLConfigFilename @@ -229,7 +234,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { baseClusterResources, err := cluster.ConnectToCluster(ctx, cluster.ConnectClusterOptions{ SSHUser: config.SSHUser, SSHHost: config.SSHHost, - SSHKeyPath: config.SSHKeyPath, + SSHKeyPath: sshKeyPath, UseJumpHost: false, }) Expect(err).NotTo(HaveOccurred(), "Failed to connect to base cluster") @@ -250,7 +255,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) }) - // Step 2: Verify virtualization module is Ready before creating VMs + // Step 2: Verify virtualization module is Ready in base cluster before creating VMs It("should make sure that virtualization module is Ready", func() { By("Checking if virtualization module is Ready", func() { GinkgoWriter.Printf(" ▶️ Getting module with timeout\n") @@ -328,30 +333,62 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) }) - // Step 5: Establish SSH connection to setup node through base cluster master (jump host) + // Step 5: Gather VM information (IPs, etc.) while still connected to base cluster + It("should gather VM information", func() { + By("Gathering VM information", func() { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + + namespace := config.TestClusterNamespace + + GinkgoWriter.Printf(" ▶️ Gathering IP addresses and VM information for all VMs\n") + var err error + err = cluster.GatherVMInfo(ctx, virtClient, namespace, clusterDefinition, vmResources) + Expect(err).NotTo(HaveOccurred(), "Failed to gather VM information") + + // Log all gathered IPs + vmCount := 0 + for _, master := range clusterDefinition.Masters { + if master.HostType == config.HostTypeVM && master.IPAddress != "" { + GinkgoWriter.Printf(" ✅ VM %s has IP: %s\n", master.Hostname, master.IPAddress) + vmCount++ + } + } + for _, worker := range clusterDefinition.Workers { + if worker.HostType == config.HostTypeVM && worker.IPAddress != "" { + GinkgoWriter.Printf(" ✅ VM %s has IP: %s\n", worker.Hostname, worker.IPAddress) + vmCount++ + } + } + if clusterDefinition.Setup != nil && clusterDefinition.Setup.HostType == config.HostTypeVM && clusterDefinition.Setup.IPAddress != "" { + GinkgoWriter.Printf(" ✅ VM %s has IP: %s\n", clusterDefinition.Setup.Hostname, clusterDefinition.Setup.IPAddress) + vmCount++ + } + + GinkgoWriter.Printf(" ✅ Successfully gathered information for %d VMs\n", vmCount) + }) + }) + + // Step 6: Establish SSH connection to setup node through base cluster master (jump host) It("should establish SSH connection to setup node through base cluster master", func() { By("Obtaining SSH client to setup node through base cluster master", func() { // Note: We don't need to stop the base cluster tunnel here. // Jump host clients are just SSH connections and don't require port forwarding. // The base cluster tunnel can stay active for virtClient operations. - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() - namespace := config.TestClusterNamespace - setupNode, err := cluster.GetSetupNode(vmResources) + setupNode, err := cluster.GetSetupNode(clusterDefinition) Expect(err).NotTo(HaveOccurred()) - // Get setup node IP address - setupNodeIP, err := cluster.GetVMIPAddress(ctx, virtClient, namespace, setupNode.Hostname) - Expect(err).NotTo(HaveOccurred()) - Expect(setupNodeIP).NotTo(BeEmpty()) + // Get setup node IP address from cluster definition + setupNodeIP := setupNode.IPAddress + Expect(setupNodeIP).NotTo(BeEmpty(), "Setup node IP address should be set (gathered in Step 5)") // Create SSH client with jump host (base cluster master) GinkgoWriter.Printf(" ▶️ Creating SSH client to %s@%s through jump host %s@%s\n", config.VMSSHUser, setupNodeIP, config.SSHUser, config.SSHHost) setupSSHClient, err = ssh.NewClientWithJumpHost( - config.SSHUser, config.SSHHost, config.SSHKeyPath, // jump host - config.VMSSHUser, setupNodeIP, config.SSHKeyPath, // target host + config.SSHUser, config.SSHHost, sshKeyPath, // jump host + config.VMSSHUser, setupNodeIP, sshKeyPath, // target host ) Expect(err).NotTo(HaveOccurred()) Expect(setupSSHClient).NotTo(BeNil()) @@ -359,7 +396,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) }) - // Step 6: Install Docker on setup node (required for DKP bootstrap) + // Step 7: Install Docker on setup node (required for DKP bootstrap) It("should ensure Docker is installed on the setup node", func() { By("Installing Docker on setup node", func() { ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute) @@ -372,36 +409,11 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) }) - // Step 7: Prepare bootstrap configuration file from template with cluster-specific values + // Step 8: Prepare bootstrap configuration file from template with cluster-specific values It("should prepare bootstrap config for the setup node", func() { By("Preparing bootstrap config for the setup node", func() { - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) - defer cancel() - - namespace := config.TestClusterNamespace - - // Get IPs for all VMs (masters, workers, and setup node) - // Note: vmResources.VMNames already includes the setup VM, so we don't need to append it - var vmIPs []string - allVMNames := vmResources.VMNames - - GinkgoWriter.Printf(" ▶️ Getting IP addresses for all VMs\n") - for _, vmName := range allVMNames { - vmIP, err := cluster.GetVMIPAddress(ctx, virtClient, namespace, vmName) - Expect(err).NotTo(HaveOccurred(), "Failed to get IP address for VM %s", vmName) - Expect(vmIP).NotTo(BeEmpty(), "VM %s IP address should not be empty", vmName) - vmIPs = append(vmIPs, vmIP) - GinkgoWriter.Printf(" ✅ VM %s has IP: %s\n", vmName, vmIP) - } - - firstMasterHostname := clusterDefinition.Masters[0].Hostname - masterIP, err := cluster.GetVMIPAddress(ctx, virtClient, namespace, firstMasterHostname) - Expect(err).NotTo(HaveOccurred(), "Failed to get IP address for master node %s", firstMasterHostname) - Expect(masterIP).NotTo(BeEmpty(), "Master node %s IP address should not be empty", firstMasterHostname) - GinkgoWriter.Printf(" ✅ Master node %s has IP: %s\n", firstMasterHostname, masterIP) - GinkgoWriter.Printf(" ▶️ Preparing bootstrap config for the setup node\n") - bootstrapConfig, err = cluster.PrepareBootstrapConfig(clusterDefinition, masterIP, vmIPs) + bootstrapConfig, err = cluster.PrepareBootstrapConfig(clusterDefinition) Expect(err).NotTo(HaveOccurred(), "Failed to prepare bootstrap config for the setup node") GinkgoWriter.Printf(" ✅ Bootstrap config prepared successfully at: %s\n", bootstrapConfig) }) @@ -413,19 +425,11 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) defer cancel() - // Expand SSH key path to handle ~ - keyPath := config.SSHKeyPath - if strings.HasPrefix(keyPath, "~") { - homeDir, err := os.UserHomeDir() - Expect(err).NotTo(HaveOccurred()) - keyPath = filepath.Join(homeDir, strings.TrimPrefix(keyPath, "~/")) - } - GinkgoWriter.Printf(" ▶️ Uploading bootstrap files to setup node\n") - GinkgoWriter.Printf(" 📁 Private key: %s -> /home/cloud/.ssh/id_rsa\n", keyPath) + GinkgoWriter.Printf(" 📁 Private key: %s -> /home/cloud/.ssh/id_rsa\n", sshKeyPath) GinkgoWriter.Printf(" 📁 Config file: %s -> /home/cloud/config.yml\n", bootstrapConfig) - err := cluster.UploadBootstrapFiles(ctx, setupSSHClient, keyPath, bootstrapConfig) + err = cluster.UploadBootstrapFiles(ctx, setupSSHClient, sshKeyPath, bootstrapConfig) Expect(err).NotTo(HaveOccurred(), "Failed to upload bootstrap files to setup node") GinkgoWriter.Printf(" ✅ Bootstrap files uploaded successfully\n") }) @@ -437,41 +441,31 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { ctx, cancel := context.WithTimeout(context.Background(), 35*time.Minute) defer cancel() - namespace := config.TestClusterNamespace firstMasterHostname := clusterDefinition.Masters[0].Hostname + firstMasterIP := clusterDefinition.Masters[0].IPAddress + Expect(firstMasterIP).NotTo(BeEmpty(), "Master node %s IP address should be set (gathered in Step 5)", firstMasterHostname) - // Get master IP address - masterIP, err := cluster.GetVMIPAddress(ctx, virtClient, namespace, firstMasterHostname) - Expect(err).NotTo(HaveOccurred(), "Failed to get IP address for master node %s", firstMasterHostname) - Expect(masterIP).NotTo(BeEmpty(), "Master node %s IP address should not be empty", firstMasterHostname) - - GinkgoWriter.Printf(" ▶️ Bootstrapping cluster from setup node to master %s (%s)\n", firstMasterHostname, masterIP) + GinkgoWriter.Printf(" ▶️ Bootstrapping cluster from setup node to master %s (%s)\n", firstMasterHostname, firstMasterIP) GinkgoWriter.Printf(" ⏱️ This may take up to 30 minutes...\n") - err = cluster.BootstrapCluster(ctx, setupSSHClient, clusterDefinition, masterIP, bootstrapConfig) + err = cluster.BootstrapCluster(ctx, setupSSHClient, clusterDefinition, bootstrapConfig) Expect(err).NotTo(HaveOccurred(), "Failed to bootstrap cluster") GinkgoWriter.Printf(" ✅ Cluster bootstrap completed successfully\n") }) }) - // Step 10: Verify cluster is ready - It("should verify cluster is ready", func() { - By("Verifying cluster is ready", func() { - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + // Step 10: Create NodeGroup for workers + It("should create NodeGroup for workers", func() { + By("Creating NodeGroup for workers", func() { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) defer cancel() - namespace := config.TestClusterNamespace firstMasterHostname := clusterDefinition.Masters[0].Hostname + masterIP := clusterDefinition.Masters[0].IPAddress + Expect(masterIP).NotTo(BeEmpty(), "Master node %s IP address should be set (gathered in Step 5)", firstMasterHostname) - // Get master IP address (base cluster tunnel should still be active from Step 1, stopped below) - masterIP, err := cluster.GetVMIPAddress(ctx, virtClient, namespace, firstMasterHostname) - Expect(err).NotTo(HaveOccurred(), "Failed to get IP address for master node %s", firstMasterHostname) - Expect(masterIP).NotTo(BeEmpty(), "Master node %s IP address should not be empty", firstMasterHostname) - - GinkgoWriter.Printf(" ▶️ Verifying cluster readiness for master %s (%s)\n", firstMasterHostname, masterIP) - - // Step 1: Stop base cluster tunnel before creating test cluster tunnel - // Both tunnels use port 6445, so we can't have both active at the same time + // Connect to test cluster to get kubeconfig (needed for NodeGroup creation) + // Note: We need to stop base cluster tunnel first as both use port 6445 if tunnelinfo != nil && tunnelinfo.StopFunc != nil { GinkgoWriter.Printf(" ▶️ Stopping base cluster SSH tunnel (port 6445 needed for test cluster tunnel)...\n") err := tunnelinfo.StopFunc() @@ -484,27 +478,104 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { testClusterResources, err = cluster.ConnectToCluster(ctx, cluster.ConnectClusterOptions{ SSHUser: config.SSHUser, SSHHost: config.SSHHost, - SSHKeyPath: config.SSHKeyPath, + SSHKeyPath: sshKeyPath, UseJumpHost: true, TargetUser: config.VMSSHUser, TargetHost: masterIP, - TargetKeyPath: config.SSHKeyPath, + TargetKeyPath: sshKeyPath, }) Expect(err).NotTo(HaveOccurred(), "Failed to establish connection to test cluster") Expect(testClusterResources).NotTo(BeNil()) - Expect(testClusterResources.TunnelInfo).NotTo(BeNil(), "Tunnel must remain active") + Expect(testClusterResources.Kubeconfig).NotTo(BeNil(), "Test cluster kubeconfig must be available") GinkgoWriter.Printf(" ✅ Connection established, kubeconfig saved to: %s\n", testClusterResources.KubeconfigPath) GinkgoWriter.Printf(" ✅ SSH tunnel active on local port: %d\n", testClusterResources.TunnelInfo.LocalPort) - // Step 2: Check cluster health with Eventually (wait up to 10 minutes for deckhouse to be ready) - GinkgoWriter.Printf(" ⏱️ Waiting for deckhouse deployment to become ready (1 pod with 2/2 containers ready)...\n") + // Create NodeGroup for workers + GinkgoWriter.Printf(" ▶️ Creating NodeGroup for workers\n") + err = cluster.CreateStaticNodeGroup(ctx, testClusterResources.Kubeconfig, "worker") + Expect(err).NotTo(HaveOccurred(), "Failed to create worker NodeGroup") + GinkgoWriter.Printf(" ✅ NodeGroup for workers created successfully\n") + }) + }) + + // Step 11: Verify cluster is ready + It("should verify cluster is ready", func() { + By("Verifying cluster is ready", func() { + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute) + defer cancel() + + Expect(testClusterResources).NotTo(BeNil(), "Test cluster resources must be available from Step 10") + Expect(testClusterResources.Kubeconfig).NotTo(BeNil(), "Test cluster kubeconfig must be available from Step 10") + + GinkgoWriter.Printf(" ▶️ Verifying cluster readiness\n") + + // Check cluster health with Eventually (wait up to 15 minutes for deckhouse to be ready and secrets to appear) + GinkgoWriter.Printf(" ⏱️ Waiting for deckhouse deployment to become ready (1 pod with 2/2 containers ready) and bootstrap secrets to appear...\n") Eventually(func() error { return cluster.CheckClusterHealth(ctx, testClusterResources.Kubeconfig) - }).WithTimeout(10*time.Minute).WithPolling(20*time.Second).Should(Succeed(), - "Deckhouse deployment should have 1 pod with 2/2 containers ready within 10 minutes") + }).WithTimeout(15*time.Minute).WithPolling(20*time.Second).Should(Succeed(), + "Deckhouse deployment should have 1 pod with 2/2 containers ready and bootstrap secrets should be available within 15 minutes") + + GinkgoWriter.Printf(" ✅ Cluster is ready (deckhouse deployment: 1 pod with 2/2 containers ready, bootstrap secrets available)\n") + }) + }) + + // Step 12: Add nodes to the cluster + It("should add all nodes to the cluster", func() { + By("Adding nodes to the cluster", func() { + ctx, cancel := context.WithTimeout(context.Background(), config.NodesReadyTimeout) + defer cancel() + + Expect(testClusterResources).NotTo(BeNil(), "Test cluster resources must be available from Step 10") + Expect(testClusterResources.Kubeconfig).NotTo(BeNil(), "Test cluster kubeconfig must be available from Step 10") + + // Add all nodes to the cluster (skips first master, adds remaining masters and all workers) + GinkgoWriter.Printf(" ▶️ Adding nodes to the cluster (remaining masters and all workers)\n") + err = cluster.AddNodesToCluster(ctx, testClusterResources.Kubeconfig, clusterDefinition, config.SSHUser, config.SSHHost, sshKeyPath) + Expect(err).NotTo(HaveOccurred(), "Failed to add nodes to cluster") + GinkgoWriter.Printf(" ✅ All nodes added to cluster successfully\n") + + // Wait for all nodes to become Ready + GinkgoWriter.Printf(" ⏱️ Waiting for all nodes to become Ready (timeout: %v)...\n", config.NodesReadyTimeout) + Eventually(func() error { + return cluster.WaitForAllNodesReady(ctx, testClusterResources.Kubeconfig, clusterDefinition, config.NodesReadyTimeout) + }).WithTimeout(config.NodesReadyTimeout).WithPolling(10*time.Second).Should(Succeed(), + "All expected nodes should be present and Ready within %v", config.NodesReadyTimeout) + GinkgoWriter.Printf(" ✅ All nodes are Ready\n") + }) + }) + + // Step 13: Enable and configure modules from cluster definition in test cluster + It("should enable and configure modules from cluster definition in test cluster", func() { + By("Enabling and configuring modules in test cluster", func() { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + + Expect(testClusterResources).NotTo(BeNil(), "Test cluster resources must be available") + Expect(testClusterResources.Kubeconfig).NotTo(BeNil(), "Test cluster kubeconfig must be available") + + GinkgoWriter.Printf(" ▶️ Enabling and configuring modules from cluster definition in test cluster\n") + // Use SSH client to run kubectl commands from within the cluster (webhook needs to be accessible from cluster network) + err := cluster.EnableAndConfigureModules(ctx, testClusterResources.Kubeconfig, clusterDefinition, testClusterResources.SSHClient) + Expect(err).NotTo(HaveOccurred(), "Failed to enable and configure modules") + GinkgoWriter.Printf(" ✅ Modules enabled and configured successfully in test cluster\n") + }) + }) + + // Step 14: Wait for all modules to be ready in test cluster + It("should wait for all modules to be ready in test cluster", func() { + By("Waiting for modules to be ready in test cluster", func() { + ctx, cancel := context.WithTimeout(context.Background(), config.ModuleDeployTimeout) + defer cancel() + + Expect(testClusterResources).NotTo(BeNil(), "Test cluster resources must be available") + Expect(testClusterResources.Kubeconfig).NotTo(BeNil(), "Test cluster kubeconfig must be available") - GinkgoWriter.Printf(" ✅ Cluster is ready (deckhouse deployment: 1 pod with 2/2 containers ready)\n") + GinkgoWriter.Printf(" ▶️ Waiting for modules to be ready in test cluster (timeout: %v)\n", config.ModuleDeployTimeout) + err := cluster.WaitForModulesReady(ctx, testClusterResources.Kubeconfig, clusterDefinition, config.ModuleDeployTimeout) + Expect(err).NotTo(HaveOccurred(), "Failed to wait for modules to be ready") + GinkgoWriter.Printf(" ✅ All modules are ready in test cluster\n") }) }) }) // Describe: Cluster Creation diff --git a/tests/cluster-creation/cluster_config.yml b/tests/cluster-creation/cluster_config.yml deleted file mode 100644 index 920a32c..0000000 --- a/tests/cluster-creation/cluster_config.yml +++ /dev/null @@ -1,56 +0,0 @@ -# Test nested cluster configuration -clusterDefinition: - masters: # Master nodes configuration - - hostname: "master-1" - hostType: "vm" - role: "master" - osType: "Ubuntu 22.04 6.2.0-39-generic" - cpu: 4 - ram: 8 - diskSize: 30 - workers: # Worker nodes configuration // TODO implement logic allowing to deploy different number of workers and masters with the same config. - - hostname: "worker-1" - hostType: "vm" - role: "worker" - osType: "Ubuntu 22.04 6.2.0-39-generic" - cpu: 2 - ram: 6 - diskSize: 30 - - hostname: "worker-2" - hostType: "vm" - role: "worker" - osType: "Ubuntu 22.04 6.2.0-39-generic" - cpu: 2 - ram: 6 - diskSize: 30 - # DKP parameters - dkpParameters: - kubernetesVersion: "Automatic" - podSubnetCIDR: "10.112.0.0/16" - serviceSubnetCIDR: "10.225.0.0/16" - clusterDomain: "cluster.local" - registryRepo: "dev-registry.deckhouse.io/sys/deckhouse-oss" - # Module configuration - modules: - - name: "snapshot-controller" # TODO add MPO - version: 1 - enabled: true - modulePullOverride: "main" # imageTag for ModulePullOverride - dependencies: [] - - name: "sds-local-volume" - version: 1 - enabled: true - dependencies: - - "snapshot-controller" - - name: "sds-node-configurator" - version: 1 - enabled: true - settings: - enableThinProvisioning: true - dependencies: - - "sds-local-volume" - - name: "sds-replicated-volume" - version: 1 - enabled: true - dependencies: - - "sds-node-configurator" diff --git a/tests/cluster-creation/cluster_creation_suite_test.go b/tests/cluster-creation/cluster_creation_suite_test.go deleted file mode 100644 index 4b6037b..0000000 --- a/tests/cluster-creation/cluster_creation_suite_test.go +++ /dev/null @@ -1,33 +0,0 @@ -/* -Copyright 2025 Flant JSC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package integration - -import ( - "testing" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -func TestIntegration(t *testing.T) { - RegisterFailHandler(Fail) - // Configure Ginkgo to show verbose output - suiteConfig, reporterConfig := GinkgoConfiguration() - reporterConfig.Verbose = true - reporterConfig.ShowNodeEvents = false - RunSpecs(t, "Integration Test Suite", suiteConfig, reporterConfig) -} diff --git a/tests/cluster-creation/cluster_creation_test.go b/tests/cluster-creation/cluster_creation_test.go deleted file mode 100644 index 9d9f729..0000000 --- a/tests/cluster-creation/cluster_creation_test.go +++ /dev/null @@ -1,81 +0,0 @@ -/* -Copyright 2025 Flant JSC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package integration - -import ( - "context" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - - "github.com/deckhouse/storage-e2e/internal/config" - "github.com/deckhouse/storage-e2e/pkg/cluster" -) - -var _ = Describe("Cluster Creation Test", Ordered, func() { - var ( - testClusterResources *cluster.TestClusterResources - ctx context.Context = context.Background() - ) - - BeforeAll(func() { - By("Validating environment variables", func() { - GinkgoWriter.Printf(" ▶️ Validating environment variables\n") - err := config.ValidateEnvironment() - Expect(err).NotTo(HaveOccurred()) - GinkgoWriter.Printf(" ✅ Environment variables validated successfully\n") - }) - // DeferCleanup: Clean up all resources in reverse order of creation - analog of AfterAll() in Ginkgo - DeferCleanup(func() { - if testClusterResources != nil { - By("Cleaning up test cluster resources", func() { - GinkgoWriter.Printf(" ▶️ Cleaning up test cluster resources\n") - err := cluster.CleanupTestCluster(testClusterResources) - Expect(err).NotTo(HaveOccurred(), "CleanupTestCluster should succeed") - GinkgoWriter.Printf(" ✅ Test cluster resources cleaned up successfully\n") - }) - } - }) - }) - - It("should successfully create test cluster", func() { - By("Creating test cluster connection", func() { - GinkgoWriter.Printf(" ▶️ Creating test cluster connection\n") - var err error - yamlConfigFilename := config.YAMLConfigFilename - testClusterResources, err = cluster.CreateTestCluster( - ctx, - yamlConfigFilename, - ) - Expect(err).NotTo(HaveOccurred(), "CreateTestCluster should succeed") - Expect(testClusterResources).NotTo(BeNil(), "TestClusterResources should not be nil") - GinkgoWriter.Printf(" ✅ Test cluster connection created successfully\n") - }) - }) - - It("should get all test cluster resources", func() { - Expect(testClusterResources).NotTo(BeNil()) - Expect(testClusterResources.SSHClient).NotTo(BeNil(), "SSH client should be created") - Expect(testClusterResources.Kubeconfig).NotTo(BeNil(), "Kubeconfig should be created") - Expect(testClusterResources.KubeconfigPath).NotTo(BeEmpty(), "Kubeconfig path should be set") - Expect(testClusterResources.TunnelInfo).NotTo(BeNil(), "Tunnel info should be created") - Expect(testClusterResources.TunnelInfo.LocalPort).To(Equal(6445), "Local port should be exactly 6445") - Expect(testClusterResources.ClusterDefinition).NotTo(BeNil(), "Cluster definition should be loaded") - GinkgoWriter.Printf(" ✅ All test cluster resources verified successfully\n") - }) - -}) // Describe: Cluster Creation From 7b560197211afe8a832cd29c55ed717e295b795b Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Tue, 23 Dec 2025 14:12:45 +0300 Subject: [PATCH 35/48] Storage class envvar is now required, readm updated --- README.md | 105 ++++++++++++++--------------------------- internal/config/env.go | 11 +++-- 2 files changed, 41 insertions(+), 75 deletions(-) diff --git a/README.md b/README.md index 788b657..9a69173 100644 --- a/README.md +++ b/README.md @@ -2,40 +2,13 @@ End-to-end tests for Deckhouse storage components. -## Tests -### cluster-creation -High-level test that creates a complete test cluster from a YAML configuration file. This test handles the entire cluster creation process in a single operation. - -### cluster-creation-by-steps -Step-by-step test that creates a test cluster incrementally, validating each stage: - -**Setup (BeforeAll):** -1. Environment validation - Validates required environment variables are set -2. Cluster configuration loading - Loads and parses cluster definition from YAML file - -**Test Steps:** -1. Connect to base cluster - Establishes SSH connection, retrieves kubeconfig, and sets up port forwarding tunnel -2. Virtualization module readiness check - Verifies virtualization module is Ready -3. Test namespace creation - Creates test namespace if it doesn't exist -4. Virtual machine creation and provisioning - Creates VMs and waits for them to become Running -5. VM information gathering - Gathers IP addresses and other information for all VMs -6. SSH connection establishment to setup node (through base cluster master) - Connects to setup node via jump host -7. Docker installation on setup node - Installs Docker (required for DKP bootstrap) -8. Bootstrap configuration preparation - Prepares bootstrap config from template with cluster-specific values -9. Bootstrap files upload (private key and config.yml) to setup node - Uploads files needed for DKP bootstrap -10. Cluster bootstrap - Bootstraps Kubernetes cluster from setup node to first master node -11. NodeGroup creation for workers - Creates static NodeGroup for worker nodes -12. Cluster readiness verification - Verifies cluster is ready by checking deckhouse deployment -13. Node addition to cluster - Adds remaining master nodes and all worker nodes to the cluster -14. Module enablement and configuration - Enables and configures modules from cluster definition -15. Module readiness verification - Waits for all modules to become Ready in the test cluster ## Environment Variables ### Ready-to-use setup script -Copy and customize the following script with your values: +Copy and customize the following script with your values, put then to `/test_exports`, make executable and run: ```bash #!/bin/bash @@ -46,44 +19,26 @@ export DKP_LICENSE_KEY='your-license-key-here' # Get from license.deckhouse.io export REGISTRY_DOCKER_CFG='your-docker-registry-cfg-here' # Get from license.deckhouse.io export SSH_USER='your-ssh-user' # SSH username for base cluster connection export SSH_HOST='your-ssh-host' # SSH hostname/IP for base cluster +export TEST_CLUSTER_STORAGE_CLASS='your-storage-class' # Storage class for DVP cluster deployment +export KUBE_CONFIG_PATH='~/.kube/config' # Local path to kubeconfig for base cluster if SSH retrieval fails +export SSH_PASSPHRASE='' # Optional but required for non-interactive mode: passphrase for SSH private key # Optional environment variables with defaults (customize as needed) export YAML_CONFIG_FILENAME='cluster_config.yml' # Default: cluster_config.yml -export SSH_KEY_PATH='~/.ssh/id_rsa' # Default: ~/.ssh/id_rsa -export SSH_PASSPHRASE='' # Optional: passphrase for SSH private key +export SSH_PRIVATE_KEY='~/.ssh/id_rsa' # Default: ~/.ssh/id_rsa +export SSH_PUBLIC_KEY='~/.ssh/id_rsa.pub' export SSH_VM_USER='cloud' # Default: cloud -export SSH_VM_PUBLIC_KEY='ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8WyGvnBNQp+v6CUweF1QYCRtR7Do/IA8IA2uMd2HuBsddFrc5xYon2ZtEvypZC4Vm1CzgcgUm9UkHgxytKEB4zOOWkmqFP62OSLNyuWMaFEW1fb0EDenup6B5SrjnA8ckm4Hf2NSLvwW9yS98TfN3nqPOPJKfQsN+OTiCerTtNyXjca//ppuGKsQd99jG7SqE9aDQ3sYCXatM53SXqhxS2nTew82bmzVmKXDxcIzVrS9f+2WmXIdY2cKo2I352yKWOIp1Nk0uji8ozLPHFQGvbAG8DGG1KNVcBl2qYUcttmCpN+iXEcGqyn/atUVJJMnZXGtp0fiL1rMLqAd/bb6TFNzZFSsS+zqGesxqLePe32vLCQ3xursP3BRZkrScM+JzIqevfP63INHJEZfYlUf4Ic+gfliS2yA1LwhU7hD4LSVXMQynlF9WeGjuv6ZYxmO8hC6IWCqWnIUqKUiGtvBSPXwsZo7wgljBr4ykJgBzS9MjZ0fzz1JKe80tH6clpjIOn6ReBPwQBq2zmDDrpa5GVqqqjXhRQuA0AfpHdhs5UKxs1PBr7/PTLA7PI39xkOAE/Zj1TYQ2dmqvpskshi7AtBStjinQBAlLXysLSHBtO+3+PLAYcMZMVfb0bVqfGGludO2prvXrrWWTku0eOsA5IRahrRdGhv5zhKgFV7cwUQ== ayakubov@MacBook-Pro-Alexey.local' # Default: hardcoded key export TEST_CLUSTER_NAMESPACE='e2e-test-cluster' # Default: e2e-test-cluster -export TEST_CLUSTER_STORAGE_CLASS='rsc-test-r2-local' # Default: rsc-test-r2-local export TEST_CLUSTER_CLEANUP='false' # Default: false (set to 'true' or 'True' to enable cleanup) -export KUBE_CONFIG_PATH='' # Optional: fallback path to kubeconfig if SSH retrieval fails -``` - -**Note:** The `SSH_VM_PUBLIC_KEY` default value is a hardcoded public key. You can replace it with your own SSH public key if needed. - -## Configuration Parameters - -These are code-level configuration constants defined in `internal/config/config.go`: - -- **`DefaultSetupVM`** - Default configuration for the setup/bootstrap VM node: - - Hostname prefix: `bootstrap-node-` - - Host type: VM - - Role: setup - - OS Type: Ubuntu 22.04 6.2.0-39-generic - - CPU: 2 cores - - RAM: 4 GB - - Disk size: 20 GB -- **`VMsRunningTimeout`** - Timeout for waiting for all VMs to become Running state (default: `20 minutes`) - -**Note:** When running tests, use `-timeout` flag that is longer than `VMsRunningTimeout` to allow enough time for VM provisioning. For example, use `-timeout=25m` or `-timeout=60m` to ensure the test doesn't timeout prematurely. +``` ## Running Tests ### Run all tests in a test suite ```bash -go test -timeout=60m -v ./tests/cluster-creation-by-steps -count=1 +go test -timeout=90m -v ./tests/cluster-creation-by-steps -count=1 ``` The `-count=1` flag prevents Go from using cached test results. @@ -91,23 +46,33 @@ The `-count=1` flag prevents Go from using cached test results. ### Run a specific test ```bash -go test -timeout=60m -v ./tests/cluster-creation-by-steps -count=1 -ginkgo.focus="should create virtual machines" +go test -timeout=30m -v ./tests/cluster-creation-by-steps -count=1 -ginkgo.focus="should create virtual machines" ``` -### Example with environment variables +## Tests description + +### cluster-creation-by-steps +Step-by-step test that creates a test cluster incrementally, validating each stage: + +**Setup (BeforeAll):** +1. Environment validation - Validates required environment variables are set +2. Cluster configuration loading - Loads and parses cluster definition from YAML file + +**Test Steps:** +1. Connect to base cluster - Establishes SSH connection, retrieves kubeconfig, and sets up port forwarding tunnel +2. Virtualization module readiness check - Verifies virtualization module is Ready +3. Test namespace creation - Creates test namespace if it doesn't exist +4. Virtual machine creation and provisioning - Creates VMs and waits for them to become Running +5. VM information gathering - Gathers IP addresses and other information for all VMs +6. SSH connection establishment to setup node (through base cluster master) - Connects to setup node via jump host +7. Docker installation on setup node - Installs Docker (required for DKP bootstrap) +8. Bootstrap configuration preparation - Prepares bootstrap config from template with cluster-specific values +9. Bootstrap files upload (private key and config.yml) to setup node - Uploads files needed for DKP bootstrap +10. Cluster bootstrap - Bootstraps Kubernetes cluster from setup node to first master node +11. NodeGroup creation for workers - Creates static NodeGroup for worker nodes +12. Cluster readiness verification - Verifies cluster is ready by checking deckhouse deployment +13. Node addition to cluster - Adds remaining master nodes and all worker nodes to the cluster +14. Module enablement and configuration - Enables and configures modules from cluster definition +15. Module readiness verification - Waits for all modules to become Ready in the test cluster + -```bash -# Source the setup script (or copy the exports from above) -source setup_env.sh # if you saved the script above - -# Or set variables inline -export TEST_CLUSTER_CREATE_MODE='alwaysCreateNew' -export DKP_LICENSE_KEY='your-license-key' -export REGISTRY_DOCKER_CFG='your-docker-registry-cfg' -export SSH_USER='your-ssh-user' -export SSH_HOST='your-ssh-host' -export SSH_PASSPHRASE='your-passphrase' -export TEST_CLUSTER_CLEANUP='true' - -go test -timeout=60m -v ./tests/cluster-creation-by-steps -count=1 -``` diff --git a/internal/config/env.go b/internal/config/env.go index b2a91b7..34b267e 100644 --- a/internal/config/env.go +++ b/internal/config/env.go @@ -63,8 +63,8 @@ var ( TestClusterNamespaceDefaultValue = "e2e-test-cluster" // TestClusterStorageClass specifies the storage class for DKP cluster deployment - TestClusterStorageClass = os.Getenv("TEST_CLUSTER_STORAGE_CLASS") - TestClusterStorageClassDefaultValue = "rsc-test-r2-local" + TestClusterStorageClass = os.Getenv("TEST_CLUSTER_STORAGE_CLASS") + //TestClusterStorageClassDefaultValue = "rsc-test-r2-local" // DKPLicenseKey specifies the DKP license key for cluster deployment DKPLicenseKey = os.Getenv("DKP_LICENSE_KEY") @@ -95,9 +95,6 @@ func ValidateEnvironment() error { if TestClusterNamespace == "" { TestClusterNamespace = TestClusterNamespaceDefaultValue } - if TestClusterStorageClass == "" { - TestClusterStorageClass = TestClusterStorageClassDefaultValue - } // There are no default values for these variables and they must be set! Otherwise, the test will fail. if SSHUser == "" { @@ -108,6 +105,10 @@ func ValidateEnvironment() error { return fmt.Errorf("SSH_HOST environment variable is required but not set.") } + if TestClusterStorageClass == "" { + return fmt.Errorf("TEST_CLUSTER_STORAGE_CLASS environment variable is required but not set.") + } + if DKPLicenseKey == "" { return fmt.Errorf("DKP_LICENSE_KEY environment variable is required but not set. ") } From 0286ad49666c7e9d5247c47083c369098fb9bdb5 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Tue, 23 Dec 2025 14:17:48 +0300 Subject: [PATCH 36/48] kubectl path problem fixed --- pkg/cluster/modules.go | 51 ++---------------------------------------- 1 file changed, 2 insertions(+), 49 deletions(-) diff --git a/pkg/cluster/modules.go b/pkg/cluster/modules.go index ffa7cec..9db955d 100644 --- a/pkg/cluster/modules.go +++ b/pkg/cluster/modules.go @@ -182,41 +182,6 @@ func configureModuleConfig(ctx context.Context, kubeconfig *rest.Config, moduleC return fmt.Errorf("failed to configure moduleconfig %s after %d attempts: %w", moduleConfig.Name, maxRetries, lastErr) } -// findKubectlPath finds the kubectl binary path on the remote host -// It checks both as the user and as root (via sudo) to ensure kubectl is accessible -func findKubectlPath(ctx context.Context, sshClient ssh.SSHClient) (string, error) { - // First, try to find kubectl as the user - cmd := "command -v kubectl 2>/dev/null || which kubectl 2>/dev/null || echo ''" - output, err := sshClient.Exec(ctx, cmd) - if err == nil { - path := strings.TrimSpace(output) - if path != "" { - // Verify it's accessible with sudo - verifyCmd := fmt.Sprintf("sudo test -x %s && echo %s", path, path) - verifyOutput, verifyErr := sshClient.Exec(ctx, verifyCmd) - if verifyErr == nil && strings.TrimSpace(verifyOutput) != "" { - return path, nil - } - } - } - - // Try common kubectl installation paths (check as root via sudo) - kubectlPaths := []string{"/usr/local/bin/kubectl", "/usr/bin/kubectl", "/opt/bin/kubectl", "/snap/bin/kubectl"} - for _, path := range kubectlPaths { - // Check if file exists and is executable (as root) - checkCmd := fmt.Sprintf("sudo test -x %s && echo %s", path, path) - checkOutput, checkErr := sshClient.Exec(ctx, checkCmd) - if checkErr == nil { - foundPath := strings.TrimSpace(checkOutput) - if foundPath != "" { - return foundPath, nil - } - } - } - - return "", fmt.Errorf("kubectl not found on master node (checked user PATH and common locations)") -} - // configureModuleConfigViaSSH creates or updates a ModuleConfig resource via kubectl over SSH // This ensures the webhook is called from within the cluster network func configureModuleConfigViaSSH(ctx context.Context, sshClient ssh.SSHClient, moduleConfig *config.ModuleConfig) error { @@ -256,14 +221,8 @@ func configureModuleConfigViaSSH(ctx context.Context, sshClient ssh.SSHClient, m return fmt.Errorf("failed to marshal ModuleConfig YAML: %w", err) } - // Find kubectl path - kubectlPath, err := findKubectlPath(ctx, sshClient) - if err != nil { - return fmt.Errorf("failed to find kubectl: %w", err) - } - // Apply via kubectl over SSH using the found path - cmd := fmt.Sprintf("sudo %s apply -f - << 'MODULECONFIG_EOF'\n%sMODULECONFIG_EOF", kubectlPath, string(yamlBytes)) + cmd := fmt.Sprintf("sudo /opt/deckhouse/bin/kubectl apply -f - << 'MODULECONFIG_EOF'\n%sMODULECONFIG_EOF", string(yamlBytes)) output, err := sshClient.Exec(ctx, cmd) if err != nil { return fmt.Errorf("failed to apply ModuleConfig %s via SSH: %w\nOutput: %s", moduleConfig.Name, err, output) @@ -329,14 +288,8 @@ func configureModulePullOverrideViaSSH(ctx context.Context, sshClient ssh.SSHCli return fmt.Errorf("failed to marshal ModulePullOverride YAML: %w", err) } - // Find kubectl path - kubectlPath, err := findKubectlPath(ctx, sshClient) - if err != nil { - return fmt.Errorf("failed to find kubectl: %w", err) - } - // Apply via kubectl over SSH using the found path - cmd := fmt.Sprintf("sudo %s apply -f - << 'MODULEPULLOVERRIDE_EOF'\n%sMODULEPULLOVERRIDE_EOF", kubectlPath, string(yamlBytes)) + cmd := fmt.Sprintf("sudo /opt/deckhouse/bin/kubectl apply -f - << 'MODULEPULLOVERRIDE_EOF'\n%sMODULEPULLOVERRIDE_EOF", string(yamlBytes)) output, err := sshClient.Exec(ctx, cmd) if err != nil { return fmt.Errorf("failed to apply ModulePullOverride %s via SSH: %w\nOutput: %s", moduleConfig.Name, err, output) From 1fbd195d98b7c8b67e5de067a1558d8fd025a6bc Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Tue, 23 Dec 2025 17:33:59 +0300 Subject: [PATCH 37/48] Cluster has been deployed successfully with modules enabled and configured! --- internal/config/config.go | 2 +- pkg/cluster/modules.go | 17 +++++++++++------ .../cluster_config.yml | 10 +++++----- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/internal/config/config.go b/internal/config/config.go index f98c6dd..bfd0725 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -38,6 +38,6 @@ const ( VMsRunningTimeout = 20 * time.Minute NodesReadyTimeout = 15 * time.Minute DKPDeployTimeout = 30 * time.Minute // Timeout for DKP deployment - ModuleDeployTimeout = 20 * time.Minute // Timeout for module deployment + ModuleDeployTimeout = 30 * time.Minute // Timeout for module deployment HostReadyTimeout = 10 * time.Minute // Timeout for hosts to be ready ) diff --git a/pkg/cluster/modules.go b/pkg/cluster/modules.go index 9db955d..0e5b17d 100644 --- a/pkg/cluster/modules.go +++ b/pkg/cluster/modules.go @@ -444,6 +444,8 @@ func WaitForModulesReady(ctx context.Context, kubeconfig *rest.Config, clusterDe } // WaitForModuleReady waits for a module to reach the Ready phase +// It continues waiting even if the module is temporarily in Error phase, as modules can recover. +// Only fails if the timeout is exceeded and the module is still not Ready. func WaitForModuleReady(ctx context.Context, kubeconfig *rest.Config, moduleName string, timeout time.Duration) error { deadline := time.Now().Add(timeout) ticker := time.NewTicker(2 * time.Second) @@ -454,10 +456,6 @@ func WaitForModuleReady(ctx context.Context, kubeconfig *rest.Config, moduleName case <-ctx.Done(): return ctx.Err() case <-ticker.C: - if time.Now().After(deadline) { - return fmt.Errorf("timeout waiting for module %s to be ready", moduleName) - } - module, err := deckhouse.GetModule(ctx, kubeconfig, moduleName) if err != nil { // Module doesn't exist yet, continue waiting @@ -468,9 +466,16 @@ func WaitForModuleReady(ctx context.Context, kubeconfig *rest.Config, moduleName return nil } - if module.Status.Phase == "Error" { - return fmt.Errorf("module %s is in Error phase", moduleName) + // Check timeout only after checking the phase + // This ensures we wait the full timeout period even if module is in Error phase + if time.Now().After(deadline) { + if module.Status.Phase == "Error" { + return fmt.Errorf("timeout waiting for module %s to be ready: module is still in Error phase after %v", moduleName, timeout) + } + return fmt.Errorf("timeout waiting for module %s to be ready: module is in %s phase after %v", moduleName, module.Status.Phase, timeout) } + + // Continue waiting even if module is in Error phase - it may recover } } } diff --git a/tests/cluster-creation-by-steps/cluster_config.yml b/tests/cluster-creation-by-steps/cluster_config.yml index 1e6fd65..6506656 100644 --- a/tests/cluster-creation-by-steps/cluster_config.yml +++ b/tests/cluster-creation-by-steps/cluster_config.yml @@ -60,8 +60,8 @@ clusterDefinition: enableThinProvisioning: true dependencies: - "sds-local-volume" - - name: "sds-replicated-volume" - version: 1 - enabled: true - dependencies: - - "sds-node-configurator" + # - name: "sds-replicated-volume" + # version: 1 + # enabled: true + # dependencies: + # - "sds-node-configurator" From d98313782f99bee620c737f6755890137787c1d7 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Tue, 23 Dec 2025 18:06:14 +0300 Subject: [PATCH 38/48] test-template has been created --- pkg/cluster/cluster.go | 421 +++++++++++++++++++-- tests/test-template/cluster_config.yml | 67 ++++ tests/test-template/template_suite_test.go | 33 ++ tests/test-template/template_test.go | 99 +++++ 4 files changed, 578 insertions(+), 42 deletions(-) create mode 100644 tests/test-template/cluster_config.yml create mode 100644 tests/test-template/template_suite_test.go create mode 100644 tests/test-template/template_test.go diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index c176acc..2c87518 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -30,30 +30,50 @@ import ( "github.com/deckhouse/storage-e2e/internal/infrastructure/ssh" "github.com/deckhouse/storage-e2e/internal/kubernetes/apps" "github.com/deckhouse/storage-e2e/internal/kubernetes/core" + "github.com/deckhouse/storage-e2e/internal/kubernetes/deckhouse" + "github.com/deckhouse/storage-e2e/internal/kubernetes/virtualization" + "github.com/deckhouse/storage-e2e/pkg/kubernetes" + "github.com/deckhouse/virtualization/api/core/v1alpha2" ) // TestClusterResources holds all resources created for a test cluster connection type TestClusterResources struct { - SSHClient ssh.SSHClient - Kubeconfig *rest.Config - KubeconfigPath string - TunnelInfo *ssh.TunnelInfo - ClusterDefinition *config.ClusterDefinition + SSHClient ssh.SSHClient + Kubeconfig *rest.Config + KubeconfigPath string + TunnelInfo *ssh.TunnelInfo + ClusterDefinition *config.ClusterDefinition + VMResources *VMResources + BaseClusterClient ssh.SSHClient // Base cluster SSH client (for cleanup) + BaseKubeconfig *rest.Config // Base cluster kubeconfig (for cleanup) + BaseKubeconfigPath string // Base cluster kubeconfig path (for cleanup) + BaseTunnelInfo *ssh.TunnelInfo // Base cluster tunnel (for cleanup, may be nil if stopped) + SetupSSHClient ssh.SSHClient // Setup node SSH client (for cleanup) } -// CreateTestCluster establishes a connection to a test cluster by: +// CreateTestCluster creates a complete test cluster by performing all necessary steps: // 1. Loading cluster configuration from YAML -// 2. Establishing SSH connection to the base cluster -// 3. Retrieving kubeconfig from the base cluster -// 4. Establishing SSH tunnel with port forwarding +// 2. Connecting to base cluster +// 3. Verifying virtualization module is Ready +// 4. Creating test namespace +// 5. Creating virtual machines +// 6. Gathering VM information +// 7. Establishing SSH connection to setup node +// 8. Installing Docker on setup node +// 9. Preparing and uploading bootstrap config +// 10. Bootstrapping cluster +// 11. Creating NodeGroup for workers +// 12. Verifying cluster is ready +// 13. Adding nodes to cluster +// 14. Enabling and configuring modules // -// It returns all the resources needed to interact with the cluster. +// It returns all the resources needed to interact with the test cluster. // SSH credentials are obtained from environment variables via config functions. func CreateTestCluster( ctx context.Context, yamlConfigFilename string, ) (*TestClusterResources, error) { - // Stage 1: Load cluster configuration from YAML + // Step 1: Load cluster configuration from YAML clusterDefinition, err := internalcluster.LoadClusterConfig(yamlConfigFilename) if err != nil { return nil, fmt.Errorf("failed to load cluster configuration: %w", err) @@ -67,60 +87,377 @@ func CreateTestCluster( return nil, fmt.Errorf("failed to get SSH private key path: %w", err) } - // Stage 2: Establish SSH connection to base cluster - sshClient, err := ssh.NewClient(sshUser, sshHost, sshKeyPath) + // Step 2: Connect to base cluster + baseClusterResources, err := ConnectToCluster(ctx, ConnectClusterOptions{ + SSHUser: sshUser, + SSHHost: sshHost, + SSHKeyPath: sshKeyPath, + UseJumpHost: false, + }) if err != nil { - return nil, fmt.Errorf("failed to create SSH client: %w", err) + return nil, fmt.Errorf("failed to connect to base cluster: %w", err) } - // Stage 3: Get kubeconfig from base cluster - // Use a timeout context for kubeconfig retrieval - kubeconfigCtx, cancel := context.WithTimeout(ctx, 30*time.Second) + // Step 3: Verify virtualization module is Ready + moduleCtx, cancel := context.WithTimeout(ctx, 10*time.Second) + module, err := deckhouse.GetModule(moduleCtx, baseClusterResources.Kubeconfig, "virtualization") + cancel() + if err != nil { + baseClusterResources.SSHClient.Close() + baseClusterResources.TunnelInfo.StopFunc() + return nil, fmt.Errorf("failed to get virtualization module: %w", err) + } + if module.Status.Phase != "Ready" { + baseClusterResources.SSHClient.Close() + baseClusterResources.TunnelInfo.StopFunc() + return nil, fmt.Errorf("virtualization module is not Ready (phase: %s)", module.Status.Phase) + } + + // Step 4: Create test namespace + namespaceCtx, cancel := context.WithTimeout(ctx, 30*time.Second) + namespace := config.TestClusterNamespace + _, err = kubernetes.CreateNamespaceIfNotExists(namespaceCtx, baseClusterResources.Kubeconfig, namespace) + cancel() + if err != nil { + baseClusterResources.SSHClient.Close() + baseClusterResources.TunnelInfo.StopFunc() + return nil, fmt.Errorf("failed to create namespace: %w", err) + } + + // Step 5: Create virtualization client and virtual machines + virtCtx, cancel := context.WithTimeout(ctx, 25*time.Minute) + virtClient, err := virtualization.NewClient(virtCtx, baseClusterResources.Kubeconfig) + if err != nil { + cancel() + baseClusterResources.SSHClient.Close() + baseClusterResources.TunnelInfo.StopFunc() + return nil, fmt.Errorf("failed to create virtualization client: %w", err) + } + + vmNames, vmResources, err := CreateVirtualMachines(virtCtx, virtClient, clusterDefinition) + cancel() + if err != nil { + baseClusterResources.SSHClient.Close() + baseClusterResources.TunnelInfo.StopFunc() + return nil, fmt.Errorf("failed to create virtual machines: %w", err) + } + + // Wait for all VMs to become Running + vmWaitCtx, cancel := context.WithTimeout(ctx, config.VMsRunningTimeout) defer cancel() + for _, vmName := range vmNames { + vmReady := false + for !vmReady { + select { + case <-vmWaitCtx.Done(): + baseClusterResources.SSHClient.Close() + baseClusterResources.TunnelInfo.StopFunc() + return nil, fmt.Errorf("timeout waiting for VM %s to become Running", vmName) + case <-time.After(20 * time.Second): + vm, err := virtClient.VirtualMachines().Get(vmWaitCtx, namespace, vmName) + if err != nil { + continue + } + if vm.Status.Phase == v1alpha2.MachineRunning { + vmReady = true + } + } + } + } + + // Step 6: Gather VM information + gatherCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) + err = GatherVMInfo(gatherCtx, virtClient, namespace, clusterDefinition, vmResources) + cancel() + if err != nil { + baseClusterResources.SSHClient.Close() + baseClusterResources.TunnelInfo.StopFunc() + return nil, fmt.Errorf("failed to gather VM information: %w", err) + } + + // Step 7: Establish SSH connection to setup node + setupNode, err := GetSetupNode(clusterDefinition) + if err != nil { + baseClusterResources.SSHClient.Close() + baseClusterResources.TunnelInfo.StopFunc() + return nil, fmt.Errorf("failed to get setup node: %w", err) + } + setupNodeIP := setupNode.IPAddress + if setupNodeIP == "" { + baseClusterResources.SSHClient.Close() + baseClusterResources.TunnelInfo.StopFunc() + return nil, fmt.Errorf("setup node IP address is not set") + } - kubeconfig, kubeconfigPath, err := internalcluster.GetKubeconfig( - kubeconfigCtx, - sshHost, - sshUser, - sshKeyPath, - sshClient, + setupSSHClient, err := ssh.NewClientWithJumpHost( + sshUser, sshHost, sshKeyPath, // jump host + config.VMSSHUser, setupNodeIP, sshKeyPath, // target host ) if err != nil { - sshClient.Close() - return nil, fmt.Errorf("failed to get kubeconfig: %w", err) + baseClusterResources.SSHClient.Close() + baseClusterResources.TunnelInfo.StopFunc() + return nil, fmt.Errorf("failed to create SSH client to setup node: %w", err) } - // Stage 4: Establish SSH tunnel with port forwarding - tunnelInfo, err := ssh.EstablishSSHTunnel(ctx, sshClient, "6445") + // Step 8: Install Docker on setup node + dockerCtx, cancel := context.WithTimeout(ctx, 15*time.Minute) + err = InstallDocker(dockerCtx, setupSSHClient) + cancel() if err != nil { - sshClient.Close() - return nil, fmt.Errorf("failed to establish SSH tunnel: %w", err) + setupSSHClient.Close() + baseClusterResources.SSHClient.Close() + baseClusterResources.TunnelInfo.StopFunc() + return nil, fmt.Errorf("failed to install Docker on setup node: %w", err) } - return &TestClusterResources{ - SSHClient: sshClient, - Kubeconfig: kubeconfig, - KubeconfigPath: kubeconfigPath, - TunnelInfo: tunnelInfo, - ClusterDefinition: clusterDefinition, - }, nil + // Step 9: Prepare bootstrap config + bootstrapConfig, err := PrepareBootstrapConfig(clusterDefinition) + if err != nil { + setupSSHClient.Close() + baseClusterResources.SSHClient.Close() + baseClusterResources.TunnelInfo.StopFunc() + return nil, fmt.Errorf("failed to prepare bootstrap config: %w", err) + } + + // Step 10: Upload bootstrap files + uploadCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) + err = UploadBootstrapFiles(uploadCtx, setupSSHClient, sshKeyPath, bootstrapConfig) + cancel() + if err != nil { + setupSSHClient.Close() + baseClusterResources.SSHClient.Close() + baseClusterResources.TunnelInfo.StopFunc() + return nil, fmt.Errorf("failed to upload bootstrap files: %w", err) + } + + // Step 11: Bootstrap cluster + firstMasterIP := clusterDefinition.Masters[0].IPAddress + if firstMasterIP == "" { + setupSSHClient.Close() + baseClusterResources.SSHClient.Close() + baseClusterResources.TunnelInfo.StopFunc() + return nil, fmt.Errorf("first master IP address is not set") + } + + bootstrapCtx, cancel := context.WithTimeout(ctx, 35*time.Minute) + err = BootstrapCluster(bootstrapCtx, setupSSHClient, clusterDefinition, bootstrapConfig) + cancel() + if err != nil { + setupSSHClient.Close() + baseClusterResources.SSHClient.Close() + baseClusterResources.TunnelInfo.StopFunc() + return nil, fmt.Errorf("failed to bootstrap cluster: %w", err) + } + + // Step 12: Store base cluster kubeconfig before stopping tunnel (needed for cleanup) + baseKubeconfig := baseClusterResources.Kubeconfig + baseKubeconfigPath := baseClusterResources.KubeconfigPath + + // Step 13: Stop base cluster tunnel (needed for test cluster tunnel) + if baseClusterResources.TunnelInfo != nil && baseClusterResources.TunnelInfo.StopFunc != nil { + baseClusterResources.TunnelInfo.StopFunc() + } + + // Step 14: Connect to test cluster + testClusterResources, err := ConnectToCluster(ctx, ConnectClusterOptions{ + SSHUser: sshUser, + SSHHost: sshHost, + SSHKeyPath: sshKeyPath, + UseJumpHost: true, + TargetUser: config.VMSSHUser, + TargetHost: firstMasterIP, + TargetKeyPath: sshKeyPath, + }) + if err != nil { + setupSSHClient.Close() + baseClusterResources.SSHClient.Close() + return nil, fmt.Errorf("failed to connect to test cluster: %w", err) + } + + // Step 14: Create NodeGroup for workers + nodegroupCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) + err = CreateStaticNodeGroup(nodegroupCtx, testClusterResources.Kubeconfig, "worker") + cancel() + if err != nil { + testClusterResources.SSHClient.Close() + testClusterResources.TunnelInfo.StopFunc() + setupSSHClient.Close() + baseClusterResources.SSHClient.Close() + return nil, fmt.Errorf("failed to create worker NodeGroup: %w", err) + } + + // Step 15: Verify cluster is ready + healthCtx, cancel := context.WithTimeout(ctx, 15*time.Minute) + err = CheckClusterHealth(healthCtx, testClusterResources.Kubeconfig) + cancel() + if err != nil { + testClusterResources.SSHClient.Close() + testClusterResources.TunnelInfo.StopFunc() + setupSSHClient.Close() + baseClusterResources.SSHClient.Close() + return nil, fmt.Errorf("cluster is not ready: %w", err) + } + + // Step 16: Add nodes to cluster + nodesCtx, cancel := context.WithTimeout(ctx, config.NodesReadyTimeout) + err = AddNodesToCluster(nodesCtx, testClusterResources.Kubeconfig, clusterDefinition, sshUser, sshHost, sshKeyPath) + cancel() + if err != nil { + testClusterResources.SSHClient.Close() + testClusterResources.TunnelInfo.StopFunc() + setupSSHClient.Close() + baseClusterResources.SSHClient.Close() + return nil, fmt.Errorf("failed to add nodes to cluster: %w", err) + } + + // Wait for all nodes to become Ready + nodesReadyCtx, cancel := context.WithTimeout(ctx, config.NodesReadyTimeout) + err = WaitForAllNodesReady(nodesReadyCtx, testClusterResources.Kubeconfig, clusterDefinition, config.NodesReadyTimeout) + cancel() + if err != nil { + testClusterResources.SSHClient.Close() + testClusterResources.TunnelInfo.StopFunc() + setupSSHClient.Close() + baseClusterResources.SSHClient.Close() + return nil, fmt.Errorf("failed to wait for nodes to be ready: %w", err) + } + + // Step 17: Enable and configure modules + modulesCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) + err = EnableAndConfigureModules(modulesCtx, testClusterResources.Kubeconfig, clusterDefinition, testClusterResources.SSHClient) + cancel() + if err != nil { + testClusterResources.SSHClient.Close() + testClusterResources.TunnelInfo.StopFunc() + setupSSHClient.Close() + baseClusterResources.SSHClient.Close() + return nil, fmt.Errorf("failed to enable and configure modules: %w", err) + } + + // Set cluster definition and VM resources + testClusterResources.ClusterDefinition = clusterDefinition + testClusterResources.VMResources = vmResources + testClusterResources.BaseClusterClient = baseClusterResources.SSHClient + testClusterResources.BaseKubeconfig = baseKubeconfig + testClusterResources.BaseKubeconfigPath = baseKubeconfigPath + testClusterResources.BaseTunnelInfo = nil // Tunnel was stopped, will be re-established if needed + testClusterResources.SetupSSHClient = setupSSHClient + + return testClusterResources, nil +} + +// WaitForTestClusterReady waits for all modules in the test cluster to become Ready. +// It uses the ModuleDeployTimeout from config. +func WaitForTestClusterReady(ctx context.Context, resources *TestClusterResources) error { + if resources == nil { + return fmt.Errorf("resources cannot be nil") + } + if resources.Kubeconfig == nil { + return fmt.Errorf("kubeconfig cannot be nil") + } + if resources.ClusterDefinition == nil { + return fmt.Errorf("cluster definition cannot be nil") + } + + return WaitForModulesReady(ctx, resources.Kubeconfig, resources.ClusterDefinition, config.ModuleDeployTimeout) } -// CleanupTestCluster cleans up all resources created by CreateTestCluster -func CleanupTestCluster(resources *TestClusterResources) error { +// CleanupTestCluster cleans up all resources created by CreateTestCluster. +// It performs cleanup in the following order: +// 1. Stop test cluster tunnel and close test cluster SSH client +// 2. Close setup SSH client +// 3. Re-establish base cluster tunnel if needed (for VM cleanup via API) +// 4. Remove setup VM (always removed) +// 5. Remove test cluster VMs if TEST_CLUSTER_CLEANUP is enabled +// 6. Stop base cluster tunnel and close base cluster SSH client +func CleanupTestCluster(ctx context.Context, resources *TestClusterResources) error { + if resources == nil { + return nil // Nothing to clean up + } + var errs []error - // Stop SSH tunnel first (must be done before closing SSH client) + // Step 1: Stop test cluster tunnel and close test cluster SSH client if resources.TunnelInfo != nil && resources.TunnelInfo.StopFunc != nil { if err := resources.TunnelInfo.StopFunc(); err != nil { - errs = append(errs, fmt.Errorf("failed to stop SSH tunnel: %w", err)) + errs = append(errs, fmt.Errorf("failed to stop test cluster SSH tunnel: %w", err)) } } - // Close SSH client connection if resources.SSHClient != nil { if err := resources.SSHClient.Close(); err != nil { - errs = append(errs, fmt.Errorf("failed to close SSH client: %w", err)) + errs = append(errs, fmt.Errorf("failed to close test cluster SSH client: %w", err)) + } + } + + // Step 2: Close setup SSH client + if resources.SetupSSHClient != nil { + if err := resources.SetupSSHClient.Close(); err != nil { + errs = append(errs, fmt.Errorf("failed to close setup SSH client: %w", err)) + } + } + + // Step 3: Re-establish base cluster tunnel if needed for VM cleanup + // We need API access to remove VMs, so we need the tunnel + var baseTunnel *ssh.TunnelInfo + var cleanupKubeconfig *rest.Config + if resources.BaseClusterClient != nil && resources.VMResources != nil { + // Re-establish tunnel if it was stopped (BaseTunnelInfo is nil) + if resources.BaseTunnelInfo == nil { + var tunnelErr error + baseTunnel, tunnelErr = ssh.EstablishSSHTunnel(context.Background(), resources.BaseClusterClient, "6445") + if tunnelErr != nil { + errs = append(errs, fmt.Errorf("failed to re-establish base cluster tunnel for VM cleanup: %w", tunnelErr)) + } else { + // Update kubeconfig to use the tunnel port + if resources.BaseKubeconfigPath != "" { + if updateErr := internalcluster.UpdateKubeconfigPort(resources.BaseKubeconfigPath, baseTunnel.LocalPort); updateErr == nil { + // Rebuild kubeconfig + cleanupKubeconfig, _ = clientcmd.BuildConfigFromFlags("", resources.BaseKubeconfigPath) + } + } + } + } else { + // Tunnel already exists, use it + baseTunnel = resources.BaseTunnelInfo + cleanupKubeconfig = resources.BaseKubeconfig + } + + // Step 4 & 5: Remove VMs if we have a valid kubeconfig + if cleanupKubeconfig != nil { + // Create virtualization client for cleanup + virtClient, virtErr := virtualization.NewClient(ctx, cleanupKubeconfig) + if virtErr == nil { + // Step 4: Remove setup VM (always removed) + if resources.VMResources.SetupVMName != "" { + namespace := config.TestClusterNamespace + if removeErr := RemoveVM(ctx, virtClient, namespace, resources.VMResources.SetupVMName); removeErr != nil { + errs = append(errs, fmt.Errorf("failed to remove setup VM %s: %w", resources.VMResources.SetupVMName, removeErr)) + } + } + + // Step 5: Remove test cluster VMs if cleanup is enabled + if config.TestClusterCleanup == "true" || config.TestClusterCleanup == "True" { + if removeErr := RemoveAllVMs(ctx, resources.VMResources); removeErr != nil { + errs = append(errs, fmt.Errorf("failed to remove test cluster VMs: %w", removeErr)) + } + } + } else { + errs = append(errs, fmt.Errorf("failed to create virtualization client for cleanup: %w", virtErr)) + } + } + } + + // Step 6: Stop base cluster tunnel and close base cluster SSH client + if baseTunnel != nil && baseTunnel.StopFunc != nil { + if err := baseTunnel.StopFunc(); err != nil { + errs = append(errs, fmt.Errorf("failed to stop base cluster SSH tunnel: %w", err)) + } + } + + if resources.BaseClusterClient != nil { + if err := resources.BaseClusterClient.Close(); err != nil { + errs = append(errs, fmt.Errorf("failed to close base cluster SSH client: %w", err)) } } diff --git a/tests/test-template/cluster_config.yml b/tests/test-template/cluster_config.yml new file mode 100644 index 0000000..86fdbd7 --- /dev/null +++ b/tests/test-template/cluster_config.yml @@ -0,0 +1,67 @@ +# Test nested cluster configuration +clusterDefinition: + masters: # Master nodes configuration + - hostname: "master-1" + hostType: "vm" + role: "master" + osType: "Ubuntu 22.04 6.2.0-39-generic" + cpu: 4 + coreFraction: 50 + ram: 8 + diskSize: 30 + workers: # Worker nodes configuration // TODO implement logic allowing to deploy different number of workes and masters with the same config. + - hostname: "worker-1" + hostType: "vm" + role: "worker" + osType: "RedOS 8.0 6.6.26-1.red80.x86_64" + cpu: 2 + coreFraction: 50 + ram: 6 + diskSize: 30 + - hostname: "worker-2" + hostType: "vm" + role: "worker" + osType: "RedOS 7.3.6 5.15.78-2.el7.3.x86_64" + cpu: 2 + coreFraction: 50 + ram: 6 + diskSize: 30 + - hostname: "worker-3" + hostType: "vm" + role: "worker" + osType: "Ubuntu 24.04 6.8.0-53-generic" + cpu: 2 + coreFraction: 50 + ram: 6 + diskSize: 30 + # DKP parameters + dkpParameters: + kubernetesVersion: "Automatic" + podSubnetCIDR: "10.112.0.0/16" + serviceSubnetCIDR: "10.225.0.0/16" + clusterDomain: "cluster.local" + registryRepo: "dev-registry.deckhouse.io/sys/deckhouse-oss" + # Module configuration + modules: + - name: "snapshot-controller" # TODO add MPO + version: 1 + enabled: true + modulePullOverride: "main" # imageTag for ModulePullOverride. Main is default value, used if not specified. + dependencies: [] + - name: "sds-local-volume" + version: 1 + enabled: true + dependencies: + - "snapshot-controller" + - name: "sds-node-configurator" + version: 1 + enabled: true + settings: + enableThinProvisioning: true + dependencies: + - "sds-local-volume" + # - name: "sds-replicated-volume" + # version: 1 + # enabled: true + # dependencies: + # - "sds-node-configurator" diff --git a/tests/test-template/template_suite_test.go b/tests/test-template/template_suite_test.go new file mode 100644 index 0000000..bfb1cb0 --- /dev/null +++ b/tests/test-template/template_suite_test.go @@ -0,0 +1,33 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package test_template + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestTemplate(t *testing.T) { + RegisterFailHandler(Fail) + // Configure Ginkgo to show verbose output + suiteConfig, reporterConfig := GinkgoConfiguration() + reporterConfig.Verbose = true + reporterConfig.ShowNodeEvents = false + RunSpecs(t, "Template Test Suite", suiteConfig, reporterConfig) +} diff --git a/tests/test-template/template_test.go b/tests/test-template/template_test.go new file mode 100644 index 0000000..1878e6c --- /dev/null +++ b/tests/test-template/template_test.go @@ -0,0 +1,99 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package test_template + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/deckhouse/storage-e2e/internal/config" + "github.com/deckhouse/storage-e2e/pkg/cluster" +) + +var _ = Describe("Template Test", Ordered, func() { + var ( + testClusterResources *cluster.TestClusterResources + ) + + BeforeAll(func() { + By("Validating environment variables", func() { + GinkgoWriter.Printf(" ▶️ Validating environment variables\n") + err := config.ValidateEnvironment() + Expect(err).NotTo(HaveOccurred()) + GinkgoWriter.Printf(" ✅ Environment variables validated successfully\n") + }) + }) + + AfterAll(func() { + // Cleanup test cluster resources + if testClusterResources != nil { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + defer cancel() + + GinkgoWriter.Printf(" ▶️ Cleaning up test cluster resources...\n") + err := cluster.CleanupTestCluster(ctx, testClusterResources) + if err != nil { + GinkgoWriter.Printf(" ⚠️ Warning: Cleanup errors occurred: %v\n", err) + } else { + GinkgoWriter.Printf(" ✅ Test cluster resources cleaned up successfully\n") + } + } + }) + + // ---=== TEST CLUSTER IS CREATED AND GOT READY HERE ===--- // + + It("should create test cluster and wait for it to become ready", func() { + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Minute) + defer cancel() + + By("Creating test cluster", func() { + GinkgoWriter.Printf(" ▶️ Creating test cluster (this may take up to 60 minutes)...\n") + Eventually(func() error { + var err error + testClusterResources, err = cluster.CreateTestCluster(ctx, config.YAMLConfigFilename) + return err + }).WithTimeout(60*time.Minute).WithPolling(30*time.Second).Should(Succeed(), + "Test cluster should be created within 60 minutes") + GinkgoWriter.Printf(" ✅ Test cluster created successfully\n") + }) + + By("Waiting for test cluster to become ready", func() { + GinkgoWriter.Printf(" ▶️ Waiting for all modules to be ready in test cluster...\n") + Eventually(func() error { + return cluster.WaitForTestClusterReady(ctx, testClusterResources) + }).WithTimeout(60*time.Minute).WithPolling(30*time.Second).Should(Succeed(), + "Test cluster should become ready within 60 minutes") + GinkgoWriter.Printf(" ✅ Test cluster is ready (all modules are Ready)\n") + }) + }) // should create test cluster + + ///////////////////////////////////////////////////// ---=== TESTS START HERE ===--- ///////////////////////////////////////////////////// + + It("should perform a test", func() { + By("A test", func() { + GinkgoWriter.Printf(" ▶️ Performing a test...\n") + // TODO: Perform a test + GinkgoWriter.Printf(" ✅ Test performed successfully\n") + }) + }) // should perform a test + + ///////////////////////////////////////////////////// ---=== TESTS END HERE ===--- ///////////////////////////////////////////////////// + +}) // Describe: Template Test From c987d6081d58078d5bc3cbb73d61d1b4ca6fc008 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Tue, 23 Dec 2025 18:12:32 +0300 Subject: [PATCH 39/48] README for test-template - added --- tests/README.md | 99 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 tests/README.md diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..a9d03e5 --- /dev/null +++ b/tests/README.md @@ -0,0 +1,99 @@ +# Test Template Guide + +This guide explains how to use the `test-template` folder to create new E2E tests for Deckhouse storage components. + +## Overview + +The test template provides a complete framework for creating and managing test clusters. It includes: +- Automatic test cluster creation and configuration +- Module enablement and readiness verification +- Automatic cleanup of resources +- A ready-to-use test structure + +## Quick Start + +### Step 1: Copy the Template Folder + +Copy the `test-template` folder to create your new test: + +```bash +cd tests/ +cp -r test-template your-test-name +``` + +Replace `your-test-name` with a descriptive name for your test (e.g., `storage-class-test`, `volume-test`, etc.). + +### Step 2: Update Package Names + +The template uses `test_template` as the package name. You need to update it to match your test folder name. + +#### Update `your-test-name_suite_test.go` + +1. Rename the file: + ```bash + cd your-test-name/ + mv template_suite_test.go your-test-name_suite_test.go + ``` + +2. Update the package name and test function: + ```go + package your_test_name // Use underscores, not hyphens + + func TestYourTestName(t *testing.T) { // Update function name + RegisterFailHandler(Fail) + suiteConfig, reporterConfig := GinkgoConfiguration() + reporterConfig.Verbose = true + reporterConfig.ShowNodeEvents = false + RunSpecs(t, "Your Test Name Suite", suiteConfig, reporterConfig) // Update suite name + } + ``` + +#### Update `your-test-name_test.go` + +1. Rename the file: + ```bash + mv template_test.go your-test-name_test.go + ``` + +2. Update the package name: + ```go + package your_test_name // Must match the suite file + ``` + +3. Update the Describe block name: + ```go + var _ = Describe("Your Test Name", Ordered, func() { + // ... rest of the code + }) + ``` + +### Step 3: Configure Environment Variables + +1. You can create here `test_exports` with your values - it's included in .gitignore: + ```bash + #!/bin/bash + + # Required environment variables + export TEST_CLUSTER_CREATE_MODE='alwaysCreateNew' + export DKP_LICENSE_KEY='your-license-key-here' + export REGISTRY_DOCKER_CFG='your-docker-registry-cfg-here' + export SSH_USER='your-ssh-user' + export SSH_HOST='your-ssh-host' + export TEST_CLUSTER_STORAGE_CLASS='your-storage-class' + export KUBE_CONFIG_PATH='~/.kube/config' + export SSH_PASSPHRASE='' # Optional but required for non-interactive mode + + # Optional environment variables with defaults + export YAML_CONFIG_FILENAME='cluster_config.yml' + export SSH_PRIVATE_KEY='~/.ssh/id_rsa' + export SSH_PUBLIC_KEY='~/.ssh/id_rsa.pub' + export SSH_VM_USER='cloud' + export TEST_CLUSTER_NAMESPACE='e2e-test-cluster' + export TEST_CLUSTER_CLEANUP='false' # Set to 'true' to enable cleanup + ``` + +2. Make it executable and run to export all the envvars: + ```bash + chmod +x test_exports + ``` + From 766ecf777f14cc2cab02a60dc949054e2e3e094d Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Tue, 23 Dec 2025 18:43:58 +0300 Subject: [PATCH 40/48] Update README.md to reflect changes in test suite instructions and provide a link to the tests README --- README.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9a69173..989369c 100644 --- a/README.md +++ b/README.md @@ -35,10 +35,14 @@ export TEST_CLUSTER_CLEANUP='false' # Default: false (set to 'true' or 'True' t ## Running Tests -### Run all tests in a test suite +### Make your own test suite from template + +See [tests/README.md](tests/README.md) + +### Run all tests in the test suite ```bash -go test -timeout=90m -v ./tests/cluster-creation-by-steps -count=1 +go test -timeout=90m -v ./tests/test-folder-name -count=1 ``` The `-count=1` flag prevents Go from using cached test results. @@ -46,7 +50,7 @@ The `-count=1` flag prevents Go from using cached test results. ### Run a specific test ```bash -go test -timeout=30m -v ./tests/cluster-creation-by-steps -count=1 -ginkgo.focus="should create virtual machines" +go test -timeout=30m -v ./tests/test-folder-name -count=1 -ginkgo.focus="should create virtual machines" ``` ## Tests description From b006591da77ec8aee4d8bacaa3cbad635598c080 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Wed, 24 Dec 2025 11:56:21 +0300 Subject: [PATCH 41/48] test-template createcluster fix --- pkg/cluster/cluster.go | 151 ++++++++++++++++++++++++++- tests/test-template/template_test.go | 27 ++--- 2 files changed, 162 insertions(+), 16 deletions(-) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 2c87518..0f68edc 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -19,12 +19,16 @@ package cluster import ( "context" "fmt" + "path/filepath" + "runtime" "time" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" + "os" + internalcluster "github.com/deckhouse/storage-e2e/internal/cluster" "github.com/deckhouse/storage-e2e/internal/config" "github.com/deckhouse/storage-e2e/internal/infrastructure/ssh" @@ -34,6 +38,7 @@ import ( "github.com/deckhouse/storage-e2e/internal/kubernetes/virtualization" "github.com/deckhouse/storage-e2e/pkg/kubernetes" "github.com/deckhouse/virtualization/api/core/v1alpha2" + "gopkg.in/yaml.v3" ) // TestClusterResources holds all resources created for a test cluster connection @@ -51,6 +56,43 @@ type TestClusterResources struct { SetupSSHClient ssh.SSHClient // Setup node SSH client (for cleanup) } +// loadClusterConfigFromPath loads and validates a cluster configuration from a specific file path +func loadClusterConfigFromPath(configPath string) (*config.ClusterDefinition, error) { + // Read the YAML file + data, err := os.ReadFile(configPath) + if err != nil { + return nil, fmt.Errorf("failed to read config file %s: %w", configPath, err) + } + + // Parse YAML directly into ClusterDefinition (has custom UnmarshalYAML for root key) + var clusterDef config.ClusterDefinition + if err := yaml.Unmarshal(data, &clusterDef); err != nil { + return nil, fmt.Errorf("failed to parse YAML config: %w", err) + } + + // Validate the configuration (using the same validation logic as internal/cluster) + if len(clusterDef.Masters) == 0 { + return nil, fmt.Errorf("at least one master node is required") + } + + // Validate DKP parameters + dkpParams := clusterDef.DKPParameters + if dkpParams.PodSubnetCIDR == "" { + return nil, fmt.Errorf("dkpParameters.podSubnetCIDR is required") + } + if dkpParams.ServiceSubnetCIDR == "" { + return nil, fmt.Errorf("dkpParameters.serviceSubnetCIDR is required") + } + if dkpParams.ClusterDomain == "" { + return nil, fmt.Errorf("dkpParameters.clusterDomain is required") + } + if dkpParams.RegistryRepo == "" { + return nil, fmt.Errorf("dkpParameters.registryRepo is required") + } + + return &clusterDef, nil +} + // CreateTestCluster creates a complete test cluster by performing all necessary steps: // 1. Loading cluster configuration from YAML // 2. Connecting to base cluster @@ -73,11 +115,28 @@ func CreateTestCluster( ctx context.Context, yamlConfigFilename string, ) (*TestClusterResources, error) { + fmt.Printf(" ▶️ Step 1: Loading cluster configuration from %s\n", yamlConfigFilename) + + // Get the test file's directory (the caller of CreateTestCluster, which is the test file) + // runtime.Caller(1) gets the immediate caller (the test file that called CreateTestCluster) + _, callerFile, _, ok := runtime.Caller(1) + if !ok { + return nil, fmt.Errorf("failed to determine test file path") + } + testDir := filepath.Dir(callerFile) + yamlConfigPath := filepath.Join(testDir, yamlConfigFilename) + + fmt.Printf(" 📁 Test file directory: %s\n", testDir) + fmt.Printf(" 📁 Config file path: %s\n", yamlConfigPath) + // Step 1: Load cluster configuration from YAML - clusterDefinition, err := internalcluster.LoadClusterConfig(yamlConfigFilename) + // LoadClusterConfig uses runtime.Caller(1) which would get this function, not the test file + // So we need to load it directly from the path + clusterDefinition, err := loadClusterConfigFromPath(yamlConfigPath) if err != nil { return nil, fmt.Errorf("failed to load cluster configuration: %w", err) } + fmt.Printf(" ✅ Step 1: Cluster configuration loaded successfully from %s\n", yamlConfigPath) // Get SSH credentials from environment variables sshHost := config.SSHHost @@ -87,6 +146,7 @@ func CreateTestCluster( return nil, fmt.Errorf("failed to get SSH private key path: %w", err) } + fmt.Printf(" ▶️ Step 2: Connecting to base cluster %s@%s\n", sshUser, sshHost) // Step 2: Connect to base cluster baseClusterResources, err := ConnectToCluster(ctx, ConnectClusterOptions{ SSHUser: sshUser, @@ -97,7 +157,9 @@ func CreateTestCluster( if err != nil { return nil, fmt.Errorf("failed to connect to base cluster: %w", err) } + fmt.Printf(" ✅ Step 2: Connected to base cluster successfully\n") + fmt.Printf(" ▶️ Step 3: Verifying virtualization module is Ready\n") // Step 3: Verify virtualization module is Ready moduleCtx, cancel := context.WithTimeout(ctx, 10*time.Second) module, err := deckhouse.GetModule(moduleCtx, baseClusterResources.Kubeconfig, "virtualization") @@ -112,7 +174,9 @@ func CreateTestCluster( baseClusterResources.TunnelInfo.StopFunc() return nil, fmt.Errorf("virtualization module is not Ready (phase: %s)", module.Status.Phase) } + fmt.Printf(" ✅ Step 3: Virtualization module is Ready\n") + fmt.Printf(" ▶️ Step 4: Creating test namespace %s\n", config.TestClusterNamespace) // Step 4: Create test namespace namespaceCtx, cancel := context.WithTimeout(ctx, 30*time.Second) namespace := config.TestClusterNamespace @@ -123,7 +187,9 @@ func CreateTestCluster( baseClusterResources.TunnelInfo.StopFunc() return nil, fmt.Errorf("failed to create namespace: %w", err) } + fmt.Printf(" ✅ Step 4: Test namespace created\n") + fmt.Printf(" ▶️ Step 5: Creating virtual machines (this may take up to 25 minutes)\n") // Step 5: Create virtualization client and virtual machines virtCtx, cancel := context.WithTimeout(ctx, 25*time.Minute) virtClient, err := virtualization.NewClient(virtCtx, baseClusterResources.Kubeconfig) @@ -141,11 +207,14 @@ func CreateTestCluster( baseClusterResources.TunnelInfo.StopFunc() return nil, fmt.Errorf("failed to create virtual machines: %w", err) } + fmt.Printf(" ✅ Step 5: Created %d virtual machines: %v\n", len(vmNames), vmNames) + fmt.Printf(" ▶️ Step 5.1: Waiting for all VMs to become Running (timeout: %v)\n", config.VMsRunningTimeout) // Wait for all VMs to become Running vmWaitCtx, cancel := context.WithTimeout(ctx, config.VMsRunningTimeout) defer cancel() - for _, vmName := range vmNames { + for i, vmName := range vmNames { + fmt.Printf(" ⏳ Waiting for VM %d/%d: %s\n", i+1, len(vmNames), vmName) vmReady := false for !vmReady { select { @@ -160,11 +229,14 @@ func CreateTestCluster( } if vm.Status.Phase == v1alpha2.MachineRunning { vmReady = true + fmt.Printf(" ✅ VM %s is Running\n", vmName) } } } } + fmt.Printf(" ✅ Step 5.1: All VMs are Running\n") + fmt.Printf(" ▶️ Step 6: Gathering VM information\n") // Step 6: Gather VM information gatherCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) err = GatherVMInfo(gatherCtx, virtClient, namespace, clusterDefinition, vmResources) @@ -174,7 +246,9 @@ func CreateTestCluster( baseClusterResources.TunnelInfo.StopFunc() return nil, fmt.Errorf("failed to gather VM information: %w", err) } + fmt.Printf(" ✅ Step 6: VM information gathered\n") + fmt.Printf(" ▶️ Step 7: Establishing SSH connection to setup node\n") // Step 7: Establish SSH connection to setup node setupNode, err := GetSetupNode(clusterDefinition) if err != nil { @@ -198,7 +272,9 @@ func CreateTestCluster( baseClusterResources.TunnelInfo.StopFunc() return nil, fmt.Errorf("failed to create SSH client to setup node: %w", err) } + fmt.Printf(" ✅ Step 7: SSH connection to setup node established\n") + fmt.Printf(" ▶️ Step 8: Installing Docker on setup node (this may take up to 15 minutes)\n") // Step 8: Install Docker on setup node dockerCtx, cancel := context.WithTimeout(ctx, 15*time.Minute) err = InstallDocker(dockerCtx, setupSSHClient) @@ -209,7 +285,9 @@ func CreateTestCluster( baseClusterResources.TunnelInfo.StopFunc() return nil, fmt.Errorf("failed to install Docker on setup node: %w", err) } + fmt.Printf(" ✅ Step 8: Docker installed on setup node\n") + fmt.Printf(" ▶️ Step 9: Preparing bootstrap configuration\n") // Step 9: Prepare bootstrap config bootstrapConfig, err := PrepareBootstrapConfig(clusterDefinition) if err != nil { @@ -218,7 +296,9 @@ func CreateTestCluster( baseClusterResources.TunnelInfo.StopFunc() return nil, fmt.Errorf("failed to prepare bootstrap config: %w", err) } + fmt.Printf(" ✅ Step 9: Bootstrap configuration prepared\n") + fmt.Printf(" ▶️ Step 10: Uploading bootstrap files to setup node\n") // Step 10: Upload bootstrap files uploadCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) err = UploadBootstrapFiles(uploadCtx, setupSSHClient, sshKeyPath, bootstrapConfig) @@ -229,7 +309,9 @@ func CreateTestCluster( baseClusterResources.TunnelInfo.StopFunc() return nil, fmt.Errorf("failed to upload bootstrap files: %w", err) } + fmt.Printf(" ✅ Step 10: Bootstrap files uploaded\n") + fmt.Printf(" ▶️ Step 11: Bootstrapping cluster (this may take up to 35 minutes)\n") // Step 11: Bootstrap cluster firstMasterIP := clusterDefinition.Masters[0].IPAddress if firstMasterIP == "" { @@ -248,7 +330,9 @@ func CreateTestCluster( baseClusterResources.TunnelInfo.StopFunc() return nil, fmt.Errorf("failed to bootstrap cluster: %w", err) } + fmt.Printf(" ✅ Step 11: Cluster bootstrapped successfully\n") + fmt.Printf(" ▶️ Step 12: Stopping base cluster tunnel (needed for test cluster tunnel)\n") // Step 12: Store base cluster kubeconfig before stopping tunnel (needed for cleanup) baseKubeconfig := baseClusterResources.Kubeconfig baseKubeconfigPath := baseClusterResources.KubeconfigPath @@ -257,7 +341,9 @@ func CreateTestCluster( if baseClusterResources.TunnelInfo != nil && baseClusterResources.TunnelInfo.StopFunc != nil { baseClusterResources.TunnelInfo.StopFunc() } + fmt.Printf(" ✅ Step 12: Base cluster tunnel stopped\n") + fmt.Printf(" ▶️ Step 13: Connecting to test cluster master %s\n", firstMasterIP) // Step 14: Connect to test cluster testClusterResources, err := ConnectToCluster(ctx, ConnectClusterOptions{ SSHUser: sshUser, @@ -273,7 +359,9 @@ func CreateTestCluster( baseClusterResources.SSHClient.Close() return nil, fmt.Errorf("failed to connect to test cluster: %w", err) } + fmt.Printf(" ✅ Step 13: Connected to test cluster\n") + fmt.Printf(" ▶️ Step 14: Creating NodeGroup for workers\n") // Step 14: Create NodeGroup for workers nodegroupCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) err = CreateStaticNodeGroup(nodegroupCtx, testClusterResources.Kubeconfig, "worker") @@ -285,7 +373,50 @@ func CreateTestCluster( baseClusterResources.SSHClient.Close() return nil, fmt.Errorf("failed to create worker NodeGroup: %w", err) } + fmt.Printf(" ✅ Step 14: NodeGroup for workers created\n") + fmt.Printf(" ▶️ Step 14.1: Waiting for bootstrap secrets to appear (this may take a few minutes)\n") + // Step 14.1: Wait for bootstrap secrets to appear after NodeGroup creation + // The secrets are created by Deckhouse after the NodeGroup is created, so we need to wait + secretsWaitCtx, cancel := context.WithTimeout(ctx, 10*time.Minute) + defer cancel() + secretNamespace := "d8-cloud-instance-manager" + secretClient, err := core.NewSecretClient(testClusterResources.Kubeconfig) + if err != nil { + testClusterResources.SSHClient.Close() + testClusterResources.TunnelInfo.StopFunc() + setupSSHClient.Close() + baseClusterResources.SSHClient.Close() + return nil, fmt.Errorf("failed to create secret client: %w", err) + } + + secretsReady := false + ticker := time.NewTicker(10 * time.Second) + defer ticker.Stop() + for !secretsReady { + select { + case <-secretsWaitCtx.Done(): + testClusterResources.SSHClient.Close() + testClusterResources.TunnelInfo.StopFunc() + setupSSHClient.Close() + baseClusterResources.SSHClient.Close() + return nil, fmt.Errorf("timeout waiting for bootstrap secrets to appear") + case <-ticker.C: + // Check for both secrets + _, workerErr := secretClient.Get(secretsWaitCtx, secretNamespace, "manual-bootstrap-for-worker") + _, masterErr := secretClient.Get(secretsWaitCtx, secretNamespace, "manual-bootstrap-for-master") + if workerErr == nil && masterErr == nil { + secretsReady = true + fmt.Printf(" ✅ Bootstrap secrets are available\n") + } else { + fmt.Printf(" ⏳ Waiting for bootstrap secrets... (worker: %v, master: %v)\n", + workerErr == nil, masterErr == nil) + } + } + } + fmt.Printf(" ✅ Step 14.1: Bootstrap secrets appeared\n") + + fmt.Printf(" ▶️ Step 15: Verifying cluster is ready (this may take up to 15 minutes)\n") // Step 15: Verify cluster is ready healthCtx, cancel := context.WithTimeout(ctx, 15*time.Minute) err = CheckClusterHealth(healthCtx, testClusterResources.Kubeconfig) @@ -297,7 +428,9 @@ func CreateTestCluster( baseClusterResources.SSHClient.Close() return nil, fmt.Errorf("cluster is not ready: %w", err) } + fmt.Printf(" ✅ Step 15: Cluster is ready\n") + fmt.Printf(" ▶️ Step 16: Adding nodes to cluster (timeout: %v)\n", config.NodesReadyTimeout) // Step 16: Add nodes to cluster nodesCtx, cancel := context.WithTimeout(ctx, config.NodesReadyTimeout) err = AddNodesToCluster(nodesCtx, testClusterResources.Kubeconfig, clusterDefinition, sshUser, sshHost, sshKeyPath) @@ -309,7 +442,9 @@ func CreateTestCluster( baseClusterResources.SSHClient.Close() return nil, fmt.Errorf("failed to add nodes to cluster: %w", err) } + fmt.Printf(" ✅ Step 16: Nodes added to cluster\n") + fmt.Printf(" ▶️ Step 16.1: Waiting for all nodes to become Ready (timeout: %v)\n", config.NodesReadyTimeout) // Wait for all nodes to become Ready nodesReadyCtx, cancel := context.WithTimeout(ctx, config.NodesReadyTimeout) err = WaitForAllNodesReady(nodesReadyCtx, testClusterResources.Kubeconfig, clusterDefinition, config.NodesReadyTimeout) @@ -321,7 +456,9 @@ func CreateTestCluster( baseClusterResources.SSHClient.Close() return nil, fmt.Errorf("failed to wait for nodes to be ready: %w", err) } + fmt.Printf(" ✅ Step 16.1: All nodes are Ready\n") + fmt.Printf(" ▶️ Step 17: Enabling and configuring modules\n") // Step 17: Enable and configure modules modulesCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) err = EnableAndConfigureModules(modulesCtx, testClusterResources.Kubeconfig, clusterDefinition, testClusterResources.SSHClient) @@ -333,6 +470,7 @@ func CreateTestCluster( baseClusterResources.SSHClient.Close() return nil, fmt.Errorf("failed to enable and configure modules: %w", err) } + fmt.Printf(" ✅ Step 17: Modules enabled and configured\n") // Set cluster definition and VM resources testClusterResources.ClusterDefinition = clusterDefinition @@ -359,7 +497,14 @@ func WaitForTestClusterReady(ctx context.Context, resources *TestClusterResource return fmt.Errorf("cluster definition cannot be nil") } - return WaitForModulesReady(ctx, resources.Kubeconfig, resources.ClusterDefinition, config.ModuleDeployTimeout) + fmt.Printf(" ▶️ Waiting for all modules to be ready (timeout: %v)\n", config.ModuleDeployTimeout) + err := WaitForModulesReady(ctx, resources.Kubeconfig, resources.ClusterDefinition, config.ModuleDeployTimeout) + if err != nil { + fmt.Printf(" ❌ Failed to wait for modules to be ready: %v\n", err) + return err + } + fmt.Printf(" ✅ All modules are ready\n") + return nil } // CleanupTestCluster cleans up all resources created by CreateTestCluster. diff --git a/tests/test-template/template_test.go b/tests/test-template/template_test.go index 1878e6c..e095ff7 100644 --- a/tests/test-template/template_test.go +++ b/tests/test-template/template_test.go @@ -60,26 +60,27 @@ var _ = Describe("Template Test", Ordered, func() { // ---=== TEST CLUSTER IS CREATED AND GOT READY HERE ===--- // It("should create test cluster and wait for it to become ready", func() { - ctx, cancel := context.WithTimeout(context.Background(), 60*time.Minute) + ctx, cancel := context.WithTimeout(context.Background(), 90*time.Minute) defer cancel() By("Creating test cluster", func() { - GinkgoWriter.Printf(" ▶️ Creating test cluster (this may take up to 60 minutes)...\n") - Eventually(func() error { - var err error - testClusterResources, err = cluster.CreateTestCluster(ctx, config.YAMLConfigFilename) - return err - }).WithTimeout(60*time.Minute).WithPolling(30*time.Second).Should(Succeed(), - "Test cluster should be created within 60 minutes") + GinkgoWriter.Printf(" ▶️ Creating test cluster (this may take up to 90 minutes)...\n") + var err error + testClusterResources, err = cluster.CreateTestCluster(ctx, config.YAMLConfigFilename) + if err != nil { + GinkgoWriter.Printf(" ❌ Failed to create test cluster: %v\n", err) + Expect(err).NotTo(HaveOccurred(), "Test cluster should be created successfully") + } GinkgoWriter.Printf(" ✅ Test cluster created successfully\n") }) By("Waiting for test cluster to become ready", func() { - GinkgoWriter.Printf(" ▶️ Waiting for all modules to be ready in test cluster...\n") - Eventually(func() error { - return cluster.WaitForTestClusterReady(ctx, testClusterResources) - }).WithTimeout(60*time.Minute).WithPolling(30*time.Second).Should(Succeed(), - "Test cluster should become ready within 60 minutes") + GinkgoWriter.Printf(" ▶️ Waiting for all modules to be ready in test cluster (timeout: %v)...\n", config.ModuleDeployTimeout) + err := cluster.WaitForTestClusterReady(ctx, testClusterResources) + if err != nil { + GinkgoWriter.Printf(" ❌ Failed to wait for test cluster to be ready: %v\n", err) + Expect(err).NotTo(HaveOccurred(), "Test cluster should become ready") + } GinkgoWriter.Printf(" ✅ Test cluster is ready (all modules are Ready)\n") }) }) // should create test cluster From 552b52a10d0efec524596327dff627ea48146b65 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Wed, 24 Dec 2025 12:46:55 +0300 Subject: [PATCH 42/48] Cleanup logging fix --- pkg/cluster/cluster.go | 45 ++++++++++++++++++++++++ pkg/cluster/vms.go | 51 +++++++++++++++++++++++----- tests/test-template/template_test.go | 9 ++++- 3 files changed, 96 insertions(+), 9 deletions(-) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 0f68edc..f41218b 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -520,28 +520,40 @@ func CleanupTestCluster(ctx context.Context, resources *TestClusterResources) er return nil // Nothing to clean up } + fmt.Printf(" ▶️ Cleanup Step 1: Stopping test cluster tunnel and closing SSH client\n") var errs []error // Step 1: Stop test cluster tunnel and close test cluster SSH client if resources.TunnelInfo != nil && resources.TunnelInfo.StopFunc != nil { if err := resources.TunnelInfo.StopFunc(); err != nil { errs = append(errs, fmt.Errorf("failed to stop test cluster SSH tunnel: %w", err)) + fmt.Printf(" ❌ Failed to stop test cluster SSH tunnel: %v\n", err) + } else { + fmt.Printf(" ✅ Test cluster SSH tunnel stopped\n") } } if resources.SSHClient != nil { if err := resources.SSHClient.Close(); err != nil { errs = append(errs, fmt.Errorf("failed to close test cluster SSH client: %w", err)) + fmt.Printf(" ❌ Failed to close test cluster SSH client: %v\n", err) + } else { + fmt.Printf(" ✅ Test cluster SSH client closed\n") } } + fmt.Printf(" ▶️ Cleanup Step 2: Closing setup SSH client\n") // Step 2: Close setup SSH client if resources.SetupSSHClient != nil { if err := resources.SetupSSHClient.Close(); err != nil { errs = append(errs, fmt.Errorf("failed to close setup SSH client: %w", err)) + fmt.Printf(" ❌ Failed to close setup SSH client: %v\n", err) + } else { + fmt.Printf(" ✅ Setup SSH client closed\n") } } + fmt.Printf(" ▶️ Cleanup Step 3: Re-establishing base cluster tunnel for VM cleanup\n") // Step 3: Re-establish base cluster tunnel if needed for VM cleanup // We need API access to remove VMs, so we need the tunnel var baseTunnel *ssh.TunnelInfo @@ -549,11 +561,14 @@ func CleanupTestCluster(ctx context.Context, resources *TestClusterResources) er if resources.BaseClusterClient != nil && resources.VMResources != nil { // Re-establish tunnel if it was stopped (BaseTunnelInfo is nil) if resources.BaseTunnelInfo == nil { + fmt.Printf(" ⏳ Re-establishing base cluster tunnel...\n") var tunnelErr error baseTunnel, tunnelErr = ssh.EstablishSSHTunnel(context.Background(), resources.BaseClusterClient, "6445") if tunnelErr != nil { errs = append(errs, fmt.Errorf("failed to re-establish base cluster tunnel for VM cleanup: %w", tunnelErr)) + fmt.Printf(" ❌ Failed to re-establish base cluster tunnel: %v\n", tunnelErr) } else { + fmt.Printf(" ✅ Base cluster tunnel re-established on local port: %d\n", baseTunnel.LocalPort) // Update kubeconfig to use the tunnel port if resources.BaseKubeconfigPath != "" { if updateErr := internalcluster.UpdateKubeconfigPort(resources.BaseKubeconfigPath, baseTunnel.LocalPort); updateErr == nil { @@ -564,6 +579,7 @@ func CleanupTestCluster(ctx context.Context, resources *TestClusterResources) er } } else { // Tunnel already exists, use it + fmt.Printf(" ✅ Base cluster tunnel already exists\n") baseTunnel = resources.BaseTunnelInfo cleanupKubeconfig = resources.BaseKubeconfig } @@ -576,33 +592,62 @@ func CleanupTestCluster(ctx context.Context, resources *TestClusterResources) er // Step 4: Remove setup VM (always removed) if resources.VMResources.SetupVMName != "" { namespace := config.TestClusterNamespace + fmt.Printf(" ▶️ Cleanup Step 4: Removing setup VM %s\n", resources.VMResources.SetupVMName) if removeErr := RemoveVM(ctx, virtClient, namespace, resources.VMResources.SetupVMName); removeErr != nil { errs = append(errs, fmt.Errorf("failed to remove setup VM %s: %w", resources.VMResources.SetupVMName, removeErr)) + fmt.Printf(" ❌ Failed to remove setup VM %s: %v\n", resources.VMResources.SetupVMName, removeErr) + } else { + fmt.Printf(" ✅ Setup VM %s removed\n", resources.VMResources.SetupVMName) } } // Step 5: Remove test cluster VMs if cleanup is enabled if config.TestClusterCleanup == "true" || config.TestClusterCleanup == "True" { + fmt.Printf(" ▶️ Cleanup Step 5: Removing test cluster VMs (TEST_CLUSTER_CLEANUP is enabled)\n") + if resources.VMResources != nil && len(resources.VMResources.VMNames) > 0 { + fmt.Printf(" ⏳ Removing %d VMs: %v\n", len(resources.VMResources.VMNames), resources.VMResources.VMNames) + } if removeErr := RemoveAllVMs(ctx, resources.VMResources); removeErr != nil { errs = append(errs, fmt.Errorf("failed to remove test cluster VMs: %w", removeErr)) + fmt.Printf(" ❌ Failed to remove test cluster VMs: %v\n", removeErr) + } else { + fmt.Printf(" ✅ Test cluster VMs removed\n") } + } else { + fmt.Printf(" ⏭️ Cleanup Step 5: Skipping test cluster VM removal (TEST_CLUSTER_CLEANUP is not enabled)\n") } } else { errs = append(errs, fmt.Errorf("failed to create virtualization client for cleanup: %w", virtErr)) + fmt.Printf(" ❌ Failed to create virtualization client for cleanup: %v\n", virtErr) } + } else { + fmt.Printf(" ⚠️ Warning: Cannot remove VMs - no valid kubeconfig for cleanup\n") + } + } else { + if resources.VMResources == nil { + fmt.Printf(" ⏭️ Cleanup Step 3-5: Skipping VM cleanup (no VM resources to clean up)\n") + } else { + fmt.Printf(" ⚠️ Warning: Cannot remove VMs - base cluster client not available\n") } } + fmt.Printf(" ▶️ Cleanup Step 6: Stopping base cluster tunnel and closing SSH client\n") // Step 6: Stop base cluster tunnel and close base cluster SSH client if baseTunnel != nil && baseTunnel.StopFunc != nil { if err := baseTunnel.StopFunc(); err != nil { errs = append(errs, fmt.Errorf("failed to stop base cluster SSH tunnel: %w", err)) + fmt.Printf(" ❌ Failed to stop base cluster SSH tunnel: %v\n", err) + } else { + fmt.Printf(" ✅ Base cluster SSH tunnel stopped\n") } } if resources.BaseClusterClient != nil { if err := resources.BaseClusterClient.Close(); err != nil { errs = append(errs, fmt.Errorf("failed to close base cluster SSH client: %w", err)) + fmt.Printf(" ❌ Failed to close base cluster SSH client: %v\n", err) + } else { + fmt.Printf(" ✅ Base cluster SSH client closed\n") } } diff --git a/pkg/cluster/vms.go b/pkg/cluster/vms.go index 776c013..b35a2c0 100644 --- a/pkg/cluster/vms.go +++ b/pkg/cluster/vms.go @@ -434,12 +434,20 @@ func RemoveAllVMs(ctx context.Context, resources *VMResources) error { return fmt.Errorf("resources cannot be nil") } + if len(resources.VMNames) == 0 { + fmt.Printf(" ⏭️ No VMs to remove\n") + return nil + } + // Delete all VMs using RemoveVM - for _, vmName := range resources.VMNames { + for i, vmName := range resources.VMNames { + fmt.Printf(" ⏳ Removing VM %d/%d: %s/%s\n", i+1, len(resources.VMNames), resources.Namespace, vmName) err := RemoveVM(ctx, resources.VirtClient, resources.Namespace, vmName) if err != nil { // Log but continue - we'll try to clean up other VMs - fmt.Printf("Warning: Failed to remove VM %s/%s: %v\n", resources.Namespace, vmName, err) + fmt.Printf(" ❌ Failed to remove VM %s/%s: %v\n", resources.Namespace, vmName, err) + } else { + fmt.Printf(" ✅ VM %s/%s removed successfully\n", resources.Namespace, vmName) } } @@ -569,6 +577,7 @@ func RemoveVM(ctx context.Context, virtClient *virtualization.Client, namespace, if err != nil { if errors.IsNotFound(err) { // VM doesn't exist, nothing to clean up + fmt.Printf(" ⏭️ VM %s/%s doesn't exist, skipping\n", namespace, vmName) return nil } return fmt.Errorf("failed to get VM %s/%s: %w", namespace, vmName, err) @@ -581,6 +590,9 @@ func RemoveVM(ctx context.Context, virtClient *virtualization.Client, namespace, vdNames = append(vdNames, bdRef.Name) } } + if len(vdNames) > 0 { + fmt.Printf(" 📋 Found %d VirtualDisk(s) associated with VM: %v\n", len(vdNames), vdNames) + } // Step 2: Collect ClusterVirtualImage names from VirtualDisks before deleting them cvmiNamesSet := make(map[string]bool) @@ -591,7 +603,7 @@ func RemoveVM(ctx context.Context, virtClient *virtualization.Client, namespace, continue // Already deleted } // Log but continue - fmt.Printf("Warning: Failed to get VirtualDisk %s/%s: %v\n", namespace, vdName, err) + fmt.Printf(" ⚠️ Warning: Failed to get VirtualDisk %s/%s: %v\n", namespace, vdName, err) continue } @@ -603,6 +615,7 @@ func RemoveVM(ctx context.Context, virtClient *virtualization.Client, namespace, } // Step 3: Delete the VM + fmt.Printf(" 🗑️ Deleting VirtualMachine %s/%s\n", namespace, vmName) err = virtClient.VirtualMachines().Delete(ctx, namespace, vmName) if err != nil && !errors.IsNotFound(err) { return fmt.Errorf("failed to delete VM %s/%s: %w", namespace, vmName, err) @@ -610,15 +623,17 @@ func RemoveVM(ctx context.Context, virtClient *virtualization.Client, namespace, // Step 3.5: Wait for VM to be fully deleted before deleting VirtualDisks // Kubernetes deletion is asynchronous, so we need to wait until the VM is gone + fmt.Printf(" ⏳ Waiting for VirtualMachine %s/%s to be fully deleted...\n", namespace, vmName) for { _, err := virtClient.VirtualMachines().Get(ctx, namespace, vmName) if errors.IsNotFound(err) { // VirtualMachine is fully deleted + fmt.Printf(" ✅ VirtualMachine %s/%s deleted\n", namespace, vmName) break } if err != nil { // Some other error occurred, log and break to avoid infinite loop - fmt.Printf("Warning: Error checking if VirtualMachine %s/%s is deleted: %v\n", namespace, vmName, err) + fmt.Printf(" ⚠️ Warning: Error checking if VirtualMachine %s/%s is deleted: %v\n", namespace, vmName, err) break } // Wait a bit before checking again @@ -631,11 +646,14 @@ func RemoveVM(ctx context.Context, virtClient *virtualization.Client, namespace, } // Step 4: Delete all VirtualDisks associated with this VM + if len(vdNames) > 0 { + fmt.Printf(" 🗑️ Deleting %d VirtualDisk(s)...\n", len(vdNames)) + } deletedVDNames := make(map[string]bool) for _, vdName := range vdNames { err := virtClient.VirtualDisks().Delete(ctx, namespace, vdName) if err != nil && !errors.IsNotFound(err) { - fmt.Printf("Warning: Failed to delete VirtualDisk %s/%s: %v\n", namespace, vdName, err) + fmt.Printf(" ❌ Failed to delete VirtualDisk %s/%s: %v\n", namespace, vdName, err) } else { deletedVDNames[vdName] = true } @@ -643,6 +661,9 @@ func RemoveVM(ctx context.Context, virtClient *virtualization.Client, namespace, // Step 4.5: Wait for all VirtualDisks to be fully deleted before checking ClusterVirtualImage usage // Poll until all VirtualDisks we deleted are no longer present + if len(deletedVDNames) > 0 { + fmt.Printf(" ⏳ Waiting for %d VirtualDisk(s) to be fully deleted...\n", len(deletedVDNames)) + } for len(deletedVDNames) > 0 { allDeleted := true for vdName := range deletedVDNames { @@ -652,7 +673,7 @@ func RemoveVM(ctx context.Context, virtClient *virtualization.Client, namespace, delete(deletedVDNames, vdName) } else if err != nil { // Some other error occurred, log and remove from tracking to avoid infinite loop - fmt.Printf("Warning: Error checking if VirtualDisk %s/%s is deleted: %v\n", namespace, vdName, err) + fmt.Printf(" ⚠️ Warning: Error checking if VirtualDisk %s/%s is deleted: %v\n", namespace, vdName, err) delete(deletedVDNames, vdName) } else { // VirtualDisk still exists @@ -660,6 +681,9 @@ func RemoveVM(ctx context.Context, virtClient *virtualization.Client, namespace, } } if allDeleted { + if len(vdNames) > 0 { + fmt.Printf(" ✅ All VirtualDisks deleted\n") + } break } // Wait a bit before checking again @@ -674,9 +698,12 @@ func RemoveVM(ctx context.Context, virtClient *virtualization.Client, namespace, // Step 5: Check if ClusterVirtualImages are still in use by other VirtualDisks in the namespace and delete if not // Note: Since CVMI is cluster-scoped, it could be used by VDs in other namespaces too, // but for simplicity we only check within the current namespace + if len(cvmiNamesSet) > 0 { + fmt.Printf(" 🔍 Checking ClusterVirtualImage usage (%d image(s))...\n", len(cvmiNamesSet)) + } allVDs, err := virtClient.VirtualDisks().List(ctx, namespace) if err != nil { - fmt.Printf("Warning: Failed to list VirtualDisks to check ClusterVirtualImage usage: %v\n", err) + fmt.Printf(" ⚠️ Warning: Failed to list VirtualDisks to check ClusterVirtualImage usage: %v\n", err) allVDs = []v1alpha2.VirtualDisk{} } @@ -691,16 +718,24 @@ func RemoveVM(ctx context.Context, virtClient *virtualization.Client, namespace, } // Delete ClusterVirtualImages that are not in use (cluster-scoped, no namespace) + deletedCVMICount := 0 for cvmiName := range cvmiNamesSet { if cvmiInUse[cvmiName] { + fmt.Printf(" ⏭️ ClusterVirtualImage %s is still in use, skipping deletion\n", cvmiName) continue // Still in use, skip deletion } + fmt.Printf(" 🗑️ Deleting ClusterVirtualImage %s\n", cvmiName) err := virtClient.ClusterVirtualImages().Delete(ctx, cvmiName) if err != nil && !errors.IsNotFound(err) { - fmt.Printf("Warning: Failed to delete ClusterVirtualImage %s: %v\n", cvmiName, err) + fmt.Printf(" ❌ Failed to delete ClusterVirtualImage %s: %v\n", cvmiName, err) + } else { + deletedCVMICount++ } } + if deletedCVMICount > 0 { + fmt.Printf(" ✅ Deleted %d ClusterVirtualImage(s)\n", deletedCVMICount) + } return nil } diff --git a/tests/test-template/template_test.go b/tests/test-template/template_test.go index e095ff7..045c536 100644 --- a/tests/test-template/template_test.go +++ b/tests/test-template/template_test.go @@ -43,11 +43,18 @@ var _ = Describe("Template Test", Ordered, func() { AfterAll(func() { // Cleanup test cluster resources + // Note: Bootstrap node (setup VM) is always removed. + // Test cluster VMs (masters and workers) are only removed if TEST_CLUSTER_CLEANUP='true' or 'True' if testClusterResources != nil { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) defer cancel() - GinkgoWriter.Printf(" ▶️ Cleaning up test cluster resources...\n") + cleanupEnabled := config.TestClusterCleanup == "true" || config.TestClusterCleanup == "True" + if cleanupEnabled { + GinkgoWriter.Printf(" ▶️ Cleaning up test cluster resources (TEST_CLUSTER_CLEANUP is enabled - all VMs will be removed)...\n") + } else { + GinkgoWriter.Printf(" ▶️ Cleaning up test cluster resources (TEST_CLUSTER_CLEANUP is not enabled - only bootstrap node will be removed)...\n") + } err := cluster.CleanupTestCluster(ctx, testClusterResources) if err != nil { GinkgoWriter.Printf(" ⚠️ Warning: Cleanup errors occurred: %v\n", err) From ae80517b0b4ff253a223cdd1dbbf5b5769ac2b0c Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Wed, 24 Dec 2025 12:50:12 +0300 Subject: [PATCH 43/48] envvar output improved a bit --- .../cluster_creation_test.go | 10 ++++ tests/test-template/template_test.go | 52 +++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/tests/cluster-creation-by-steps/cluster_creation_test.go b/tests/cluster-creation-by-steps/cluster_creation_test.go index 44392f3..4ddb8bf 100644 --- a/tests/cluster-creation-by-steps/cluster_creation_test.go +++ b/tests/cluster-creation-by-steps/cluster_creation_test.go @@ -92,6 +92,16 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { GinkgoWriter.Printf(" TEST_CLUSTER_CLEANUP: %s\n", config.TestClusterCleanup) } + // TEST_CLUSTER_NAMESPACE - no masking + if config.TestClusterNamespace != "" { + GinkgoWriter.Printf(" TEST_CLUSTER_NAMESPACE: %s\n", config.TestClusterNamespace) + } + + // TEST_CLUSTER_STORAGE_CLASS - no masking + if config.TestClusterStorageClass != "" { + GinkgoWriter.Printf(" TEST_CLUSTER_STORAGE_CLASS: %s\n", config.TestClusterStorageClass) + } + // SSH_PASSPHRASE - no masking (optional, may be empty) if config.SSHPassphrase != "" { GinkgoWriter.Printf(" SSH_PASSPHRASE: \n") diff --git a/tests/test-template/template_test.go b/tests/test-template/template_test.go index 045c536..f36cf12 100644 --- a/tests/test-template/template_test.go +++ b/tests/test-template/template_test.go @@ -39,6 +39,58 @@ var _ = Describe("Template Test", Ordered, func() { Expect(err).NotTo(HaveOccurred()) GinkgoWriter.Printf(" ✅ Environment variables validated successfully\n") }) + + By("Outputting environment variables without default values", func() { + GinkgoWriter.Printf(" 📋 Environment variables (without default values):\n") + + // Helper function to mask sensitive values + maskValue := func(value string, mask bool) string { + if mask && len(value) > 5 { + return value[:5] + "***" + } + return value + } + + // DKP_LICENSE_KEY - mask first 5 characters + if config.DKPLicenseKey != "" { + GinkgoWriter.Printf(" DKP_LICENSE_KEY: %s\n", maskValue(config.DKPLicenseKey, true)) + } + + // REGISTRY_DOCKER_CFG - mask first 5 characters + if config.RegistryDockerCfg != "" { + GinkgoWriter.Printf(" REGISTRY_DOCKER_CFG: %s\n", maskValue(config.RegistryDockerCfg, true)) + } + + // TEST_CLUSTER_CREATE_MODE - no masking + if config.TestClusterCreateMode != "" { + GinkgoWriter.Printf(" TEST_CLUSTER_CREATE_MODE: %s\n", config.TestClusterCreateMode) + } + + // TEST_CLUSTER_CLEANUP - no masking + if config.TestClusterCleanup != "" { + GinkgoWriter.Printf(" TEST_CLUSTER_CLEANUP: %s\n", config.TestClusterCleanup) + } + + // TEST_CLUSTER_NAMESPACE - no masking + if config.TestClusterNamespace != "" { + GinkgoWriter.Printf(" TEST_CLUSTER_NAMESPACE: %s\n", config.TestClusterNamespace) + } + + // TEST_CLUSTER_STORAGE_CLASS - no masking + if config.TestClusterStorageClass != "" { + GinkgoWriter.Printf(" TEST_CLUSTER_STORAGE_CLASS: %s\n", config.TestClusterStorageClass) + } + + // SSH_PASSPHRASE - no masking (optional, may be empty) + if config.SSHPassphrase != "" { + GinkgoWriter.Printf(" SSH_PASSPHRASE: \n") + } + + // KUBE_CONFIG_PATH - no masking (optional, may be empty) + if config.KubeConfigPath != "" { + GinkgoWriter.Printf(" KUBE_CONFIG_PATH: %s\n", config.KubeConfigPath) + } + }) }) AfterAll(func() { From 5515e41b08feced3455938ff7fbde1c4555c2270 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Thu, 25 Dec 2025 14:34:55 +0300 Subject: [PATCH 44/48] virtualization check is disabled --- pkg/cluster/cluster.go | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index f41218b..0f1c859 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -34,7 +34,6 @@ import ( "github.com/deckhouse/storage-e2e/internal/infrastructure/ssh" "github.com/deckhouse/storage-e2e/internal/kubernetes/apps" "github.com/deckhouse/storage-e2e/internal/kubernetes/core" - "github.com/deckhouse/storage-e2e/internal/kubernetes/deckhouse" "github.com/deckhouse/storage-e2e/internal/kubernetes/virtualization" "github.com/deckhouse/storage-e2e/pkg/kubernetes" "github.com/deckhouse/virtualization/api/core/v1alpha2" @@ -159,22 +158,22 @@ func CreateTestCluster( } fmt.Printf(" ✅ Step 2: Connected to base cluster successfully\n") - fmt.Printf(" ▶️ Step 3: Verifying virtualization module is Ready\n") - // Step 3: Verify virtualization module is Ready - moduleCtx, cancel := context.WithTimeout(ctx, 10*time.Second) - module, err := deckhouse.GetModule(moduleCtx, baseClusterResources.Kubeconfig, "virtualization") - cancel() - if err != nil { - baseClusterResources.SSHClient.Close() - baseClusterResources.TunnelInfo.StopFunc() - return nil, fmt.Errorf("failed to get virtualization module: %w", err) - } - if module.Status.Phase != "Ready" { - baseClusterResources.SSHClient.Close() - baseClusterResources.TunnelInfo.StopFunc() - return nil, fmt.Errorf("virtualization module is not Ready (phase: %s)", module.Status.Phase) - } - fmt.Printf(" ✅ Step 3: Virtualization module is Ready\n") + // fmt.Printf(" ▶️ Step 3: Verifying virtualization module is Ready\n") + // // Step 3: Verify virtualization module is Ready + // moduleCtx, cancel := context.WithTimeout(ctx, 10*time.Second) + // module, err := deckhouse.GetModule(moduleCtx, baseClusterResources.Kubeconfig, "virtualization") + // cancel() + // if err != nil { + // baseClusterResources.SSHClient.Close() + // baseClusterResources.TunnelInfo.StopFunc() + // return nil, fmt.Errorf("failed to get virtualization module: %w", err) + // } + // if module.Status.Phase != "Ready" { + // baseClusterResources.SSHClient.Close() + // baseClusterResources.TunnelInfo.StopFunc() + // return nil, fmt.Errorf("virtualization module is not Ready (phase: %s)", module.Status.Phase) + // } + // fmt.Printf(" ✅ Step 3: Virtualization module is Ready\n") fmt.Printf(" ▶️ Step 4: Creating test namespace %s\n", config.TestClusterNamespace) // Step 4: Create test namespace From d97c637fbbdf6bd2e4961ffdafa2c17eeded6f48 Mon Sep 17 00:00:00 2001 From: Ivan Ogurchenok Date: Fri, 26 Dec 2025 01:26:44 +0300 Subject: [PATCH 45/48] Improve VM creation workflow and add configuration options - Add TrustIfExists option for ClusterVirtualImages to reuse existing images - Add DataDiskSize option for VMs to create additional data disk - Add SKIP_VIRTUALIZATION_CHECK env to skip virtualization module check - Cache SSH passphrase after first input to avoid repeated prompts - Check all VMs in parallel during Step 5.1 instead of sequentially - Add documentation for new options in config files and README Signed-off-by: Ivan Ogurchenok --- internal/config/env.go | 5 ++ internal/config/images.go | 44 ++++++++++++- internal/config/types.go | 6 +- internal/infrastructure/ssh/client.go | 8 +++ pkg/cluster/cluster.go | 87 +++++++++++++++++--------- pkg/cluster/vms.go | 66 ++++++++++++++++--- tests/README.md | 1 + tests/test-template/cluster_config.yml | 5 ++ 8 files changed, 184 insertions(+), 38 deletions(-) diff --git a/internal/config/env.go b/internal/config/env.go index 34b267e..275e04a 100644 --- a/internal/config/env.go +++ b/internal/config/env.go @@ -5,6 +5,7 @@ package config import ( "fmt" "os" + "strings" ) const ( @@ -71,6 +72,10 @@ var ( // RegistryDockerCfg specifies the docker registry key to download images from Deckhouse registry. RegistryDockerCfg = os.Getenv("REGISTRY_DOCKER_CFG") + + // SkipVirtualizationCheck specifies whether to skip virtualization module readiness check. + // Default is false (check is performed). Set to "true" to skip the check. + SkipVirtualizationCheck = strings.ToLower(os.Getenv("SKIP_VIRTUALIZATION_CHECK")) == "true" ) func ValidateEnvironment() error { diff --git a/internal/config/images.go b/internal/config/images.go index 5ea7b1c..8973af7 100644 --- a/internal/config/images.go +++ b/internal/config/images.go @@ -16,23 +16,65 @@ limitations under the License. package config -// OSTypeMap maps OS type names to their definitions +// OSTypeMap maps OS type names to their definitions. +// +// TrustIfExists: If ClusterVirtualImage (CVI) already exists in the k8s cluster, reuse it +// instead of treating as a conflict. This allows sharing images across multiple test runs. +// +// CVI naming convention: The CVI name is derived from the image URL filename: +// 1. Extract filename from URL (e.g., "jammy-server-cloudimg-amd64.img") +// 2. Remove extension (.img, .qcow2) +// 3. Convert to lowercase +// 4. Replace underscores and dots with hyphens +// 5. Remove consecutive hyphens +// +// Examples: +// +// URL: https://cloud-images.ubuntu.com/.../jammy-server-cloudimg-amd64.img +// CVI name: jammy-server-cloudimg-amd64 +// +// URL: https://.../redos-8-1.x86_64.qcow2 +// CVI name: redos-8-1-x86-64 var OSTypeMap = map[string]OSType{ "Ubuntu 22.04 6.2.0-39-generic": { ImageURL: "https://cloud-images.ubuntu.com/jammy/current/jammy-server-cloudimg-amd64.img", KernelVersion: "6.2.0-39-generic", + TrustIfExists: true, }, "Ubuntu 24.04 6.8.0-53-generic": { ImageURL: "https://cloud-images.ubuntu.com/noble/current/noble-server-cloudimg-amd64.img", KernelVersion: "6.8.0-53-generic", + TrustIfExists: true, }, "RedOS 8.0 6.6.26-1.red80.x86_64": { ImageURL: "https://89d64382-20df-4581-8cc7-80df331f67fa.selstorage.ru/redos/redos-8-1.x86_64.qcow2", KernelVersion: "6.6.26-1.red80.x86_64", + TrustIfExists: true, }, "RedOS 7.3.6 5.15.78-2.el7.3.x86_64": { ImageURL: "https://89d64382-20df-4581-8cc7-80df331f67fa.selstorage.ru/redos/RO732_MIN-STD.qcow2", KernelVersion: "5.15.78-2.el7.3.x86_64", + TrustIfExists: true, + }, + "Debian 12 Bookworm": { + ImageURL: "https://cloud.debian.org/images/cloud/bookworm/latest/debian-12-genericcloud-amd64.qcow2", + KernelVersion: "6.2.0", + TrustIfExists: true, + }, + "Debian 13 Trixie": { + ImageURL: "https://cdimage.debian.org/images/cloud/trixie/latest/debian-13-generic-amd64.qcow2", + KernelVersion: "6.8.0", + TrustIfExists: true, + }, + "AltLinux 10.4": { + ImageURL: "https://ftp.altlinux.org/pub/distributions/ALTLinux/p10/images/cloud/x86_64/alt-server-10.4-p10-cloud-x86_64.qcow2", + KernelVersion: "6", + TrustIfExists: true, + }, + "AltLinux 11": { + ImageURL: "https://ftp.altlinux.org/pub/distributions/ALTLinux/p11/images/cloud/x86_64/alt-server-11.0-p11-cloud-x86_64.qcow2", + KernelVersion: "6", + TrustIfExists: true, }, } diff --git a/internal/config/types.go b/internal/config/types.go index 73aca8a..32db24e 100644 --- a/internal/config/types.go +++ b/internal/config/types.go @@ -44,6 +44,7 @@ type OSType struct { Name string ImageURL string KernelVersion string + TrustIfExists bool // If true, use existing CVMI instead of treating as conflict } // ClusterNode defines a single node in the cluster @@ -57,7 +58,8 @@ type ClusterNode struct { CPU int `yaml:"cpu"` // Required for VM CoreFraction *int `yaml:"coreFraction,omitempty"` // Optional for VM, CPU core fraction as percentage (e.g., 50 for 50%). Defaults to 100% if not specified. RAM int `yaml:"ram"` // Required for VM, in GB - DiskSize int `yaml:"diskSize"` // Required for VM, in GB + DiskSize int `yaml:"diskSize"` // Required for VM, system disk in GB + DataDiskSize *int `yaml:"dataDiskSize,omitempty"` // Optional for VM, additional data disk in GB. If empty/nil - data disk not created. // Bare-metal specific fields Prepared bool `yaml:"prepared,omitempty"` // Whether the node is already prepared for DKP installation } @@ -104,6 +106,7 @@ func (n *ClusterNode) UnmarshalYAML(value *yaml.Node) error { CoreFraction *int `yaml:"coreFraction,omitempty"` RAM int `yaml:"ram"` DiskSize int `yaml:"diskSize"` + DataDiskSize *int `yaml:"dataDiskSize,omitempty"` Prepared bool `yaml:"prepared,omitempty"` } @@ -140,6 +143,7 @@ func (n *ClusterNode) UnmarshalYAML(value *yaml.Node) error { n.CoreFraction = tmp.CoreFraction n.RAM = tmp.RAM n.DiskSize = tmp.DiskSize + n.DataDiskSize = tmp.DataDiskSize n.Prepared = tmp.Prepared return nil diff --git a/internal/infrastructure/ssh/client.go b/internal/infrastructure/ssh/client.go index e900b93..34beb41 100644 --- a/internal/infrastructure/ssh/client.go +++ b/internal/infrastructure/ssh/client.go @@ -36,6 +36,9 @@ import ( "golang.org/x/term" ) +// cachedPassphrase stores the SSH key passphrase after first input to avoid repeated prompts +var cachedPassphrase []byte + // client implements Client interface type client struct { sshClient *ssh.Client @@ -186,6 +189,9 @@ func createSSHConfig(user, keyPathOrBase64 string) (*ssh.ClientConfig, error) { var pass []byte if envPass := os.Getenv("SSH_PASSPHRASE"); envPass != "" { pass = []byte(envPass) + } else if cachedPassphrase != nil { + // Use cached passphrase from previous input + pass = cachedPassphrase } else { // Try to read from terminal var readErr error @@ -193,6 +199,8 @@ func createSSHConfig(user, keyPathOrBase64 string) (*ssh.ClientConfig, error) { if readErr != nil { return nil, fmt.Errorf("SSH key '%s' is passphrase protected. Set SSH_PASSPHRASE environment variable: export SSH_PASSPHRASE='your-passphrase'\nOriginal error: %w", expandedKeyPath, readErr) } + // Cache the passphrase for future use + cachedPassphrase = pass } signer, err = ssh.ParsePrivateKeyWithPassphrase(key, pass) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 0f1c859..97fe7f5 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -34,6 +34,7 @@ import ( "github.com/deckhouse/storage-e2e/internal/infrastructure/ssh" "github.com/deckhouse/storage-e2e/internal/kubernetes/apps" "github.com/deckhouse/storage-e2e/internal/kubernetes/core" + "github.com/deckhouse/storage-e2e/internal/kubernetes/deckhouse" "github.com/deckhouse/storage-e2e/internal/kubernetes/virtualization" "github.com/deckhouse/storage-e2e/pkg/kubernetes" "github.com/deckhouse/virtualization/api/core/v1alpha2" @@ -158,22 +159,26 @@ func CreateTestCluster( } fmt.Printf(" ✅ Step 2: Connected to base cluster successfully\n") - // fmt.Printf(" ▶️ Step 3: Verifying virtualization module is Ready\n") - // // Step 3: Verify virtualization module is Ready - // moduleCtx, cancel := context.WithTimeout(ctx, 10*time.Second) - // module, err := deckhouse.GetModule(moduleCtx, baseClusterResources.Kubeconfig, "virtualization") - // cancel() - // if err != nil { - // baseClusterResources.SSHClient.Close() - // baseClusterResources.TunnelInfo.StopFunc() - // return nil, fmt.Errorf("failed to get virtualization module: %w", err) - // } - // if module.Status.Phase != "Ready" { - // baseClusterResources.SSHClient.Close() - // baseClusterResources.TunnelInfo.StopFunc() - // return nil, fmt.Errorf("virtualization module is not Ready (phase: %s)", module.Status.Phase) - // } - // fmt.Printf(" ✅ Step 3: Virtualization module is Ready\n") + // Step 3: Verify virtualization module is Ready (can be skipped with SKIP_VIRTUALIZATION_CHECK=true) + if !config.SkipVirtualizationCheck { + fmt.Printf(" ▶️ Step 3: Verifying virtualization module is Ready\n") + moduleCtx, cancel := context.WithTimeout(ctx, 10*time.Second) + module, err := deckhouse.GetModule(moduleCtx, baseClusterResources.Kubeconfig, "virtualization") + cancel() + if err != nil { + baseClusterResources.SSHClient.Close() + baseClusterResources.TunnelInfo.StopFunc() + return nil, fmt.Errorf("failed to get virtualization module: %w", err) + } + if module.Status.Phase != "Ready" { + baseClusterResources.SSHClient.Close() + baseClusterResources.TunnelInfo.StopFunc() + return nil, fmt.Errorf("virtualization module is not Ready (phase: %s)", module.Status.Phase) + } + fmt.Printf(" ✅ Step 3: Virtualization module is Ready\n") + } else { + fmt.Printf(" ⏭️ Step 3: Skipping virtualization module check (SKIP_VIRTUALIZATION_CHECK=true)\n") + } fmt.Printf(" ▶️ Step 4: Creating test namespace %s\n", config.TestClusterNamespace) // Step 4: Create test namespace @@ -209,28 +214,54 @@ func CreateTestCluster( fmt.Printf(" ✅ Step 5: Created %d virtual machines: %v\n", len(vmNames), vmNames) fmt.Printf(" ▶️ Step 5.1: Waiting for all VMs to become Running (timeout: %v)\n", config.VMsRunningTimeout) - // Wait for all VMs to become Running + // Wait for all VMs to become Running (check all VMs in parallel) vmWaitCtx, cancel := context.WithTimeout(ctx, config.VMsRunningTimeout) defer cancel() - for i, vmName := range vmNames { - fmt.Printf(" ⏳ Waiting for VM %d/%d: %s\n", i+1, len(vmNames), vmName) - vmReady := false - for !vmReady { - select { - case <-vmWaitCtx.Done(): - baseClusterResources.SSHClient.Close() - baseClusterResources.TunnelInfo.StopFunc() - return nil, fmt.Errorf("timeout waiting for VM %s to become Running", vmName) - case <-time.After(20 * time.Second): + + // Track which VMs are ready + vmStatus := make(map[string]bool) + for _, vmName := range vmNames { + vmStatus[vmName] = false + } + totalVMs := len(vmNames) + + allVMsReady := false + for !allVMsReady { + select { + case <-vmWaitCtx.Done(): + // List VMs that are not running + notRunning := make([]string, 0) + for _, vmName := range vmNames { + if !vmStatus[vmName] { + notRunning = append(notRunning, vmName) + } + } + baseClusterResources.SSHClient.Close() + baseClusterResources.TunnelInfo.StopFunc() + return nil, fmt.Errorf("timeout waiting for VMs to become Running. Not ready: %v", notRunning) + + case <-time.After(20 * time.Second): + readyCount := 0 + for _, vmName := range vmNames { + if vmStatus[vmName] { + readyCount++ + continue + } vm, err := virtClient.VirtualMachines().Get(vmWaitCtx, namespace, vmName) if err != nil { continue } if vm.Status.Phase == v1alpha2.MachineRunning { - vmReady = true + vmStatus[vmName] = true + readyCount++ fmt.Printf(" ✅ VM %s is Running\n", vmName) } } + if readyCount == totalVMs { + allVMsReady = true + } else { + fmt.Printf(" ⏳ VMs ready: %d/%d\n", readyCount, totalVMs) + } } } fmt.Printf(" ✅ Step 5.1: All VMs are Running\n") diff --git a/pkg/cluster/vms.go b/pkg/cluster/vms.go index b35a2c0..fe90fd4 100644 --- a/pkg/cluster/vms.go +++ b/pkg/cluster/vms.go @@ -144,6 +144,8 @@ func checkResourceConflicts(ctx context.Context, virtClient *virtualization.Clie vmNames := make([]string, 0, len(vmNodes)) systemDiskNames := make([]string, 0, len(vmNodes)) cvmiNamesSet := make(map[string]bool) + // Track which CVMI names have TrustIfExists enabled + cvmiTrustIfExists := make(map[string]bool) for _, node := range vmNodes { vmName := node.Hostname @@ -154,6 +156,10 @@ func checkResourceConflicts(ctx context.Context, virtClient *virtualization.Clie // Get CVMI name from image URL cvmiName := getCVMINameFromImageURL(node.OSType.ImageURL) cvmiNamesSet[cvmiName] = true + // Track TrustIfExists setting for this CVMI + if node.OSType.TrustIfExists { + cvmiTrustIfExists[cvmiName] = true + } } // Check for conflicting VirtualMachines @@ -194,8 +200,11 @@ func checkResourceConflicts(ctx context.Context, virtClient *virtualization.Clie for _, cvmiName := range cvmiNames { _, err := virtClient.ClusterVirtualImages().Get(ctx, cvmiName) if err == nil { - // CVMI exists - conflicts.ClusterVirtualImages = append(conflicts.ClusterVirtualImages, cvmiName) + // CVMI exists - only report as conflict if TrustIfExists is not set + if !cvmiTrustIfExists[cvmiName] { + conflicts.ClusterVirtualImages = append(conflicts.ClusterVirtualImages, cvmiName) + } + // If TrustIfExists is true, we trust existing CVMI and skip conflict } else if !errors.IsNotFound(err) { // Some other error occurred return nil, fmt.Errorf("failed to check ClusterVirtualImage %s: %w", cvmiName, err) @@ -295,6 +304,37 @@ func createVM(ctx context.Context, virtClient *virtualization.Client, namespace } // If VirtualDisk already exists, we'll use it + // 2.5. Create data VirtualDisk if DataDiskSize is specified + var dataDiskName string + if node.DataDiskSize != nil && *node.DataDiskSize > 0 { + dataDiskName = fmt.Sprintf("%s-data", vmName) + _, err = virtClient.VirtualDisks().Get(ctx, namespace, dataDiskName) + if err != nil { + if !errors.IsNotFound(err) { + return "", fmt.Errorf("failed to check VirtualDisk %s: %w", dataDiskName, err) + } + // VirtualDisk doesn't exist, create it (blank disk, no data source) + dataDisk := &v1alpha2.VirtualDisk{ + ObjectMeta: metav1.ObjectMeta{ + Name: dataDiskName, + Namespace: namespace, + }, + Spec: v1alpha2.VirtualDiskSpec{ + PersistentVolumeClaim: v1alpha2.VirtualDiskPersistentVolumeClaim{ + Size: resource.NewQuantity(int64(*node.DataDiskSize)*1024*1024*1024, resource.BinarySI), + StorageClass: &storageClass, + }, + // No DataSource - creates empty/blank disk + }, + } + err = virtClient.VirtualDisks().Create(ctx, dataDisk) + if err != nil { + return "", fmt.Errorf("failed to create data VirtualDisk %s: %w", dataDiskName, err) + } + } + // If VirtualDisk already exists, we'll use it + } + // 3. Create VirtualMachine (check if it exists first) _, err = virtClient.VirtualMachines().Get(ctx, namespace, vmName) if err != nil { @@ -335,12 +375,22 @@ func createVM(ctx context.Context, virtClient *virtualization.Client, namespace Memory: v1alpha2.MemorySpec{ Size: memoryQuantity, }, - BlockDeviceRefs: []v1alpha2.BlockDeviceSpecRef{ - { - Kind: v1alpha2.DiskDevice, - Name: systemDiskName, - }, - }, + BlockDeviceRefs: func() []v1alpha2.BlockDeviceSpecRef { + refs := []v1alpha2.BlockDeviceSpecRef{ + { + Kind: v1alpha2.DiskDevice, + Name: systemDiskName, + }, + } + // Add data disk if created + if dataDiskName != "" { + refs = append(refs, v1alpha2.BlockDeviceSpecRef{ + Kind: v1alpha2.DiskDevice, + Name: dataDiskName, + }) + } + return refs + }(), Provisioning: &v1alpha2.Provisioning{ Type: "UserData", UserData: generateCloudInitUserData(vmName, sshPublicKey), diff --git a/tests/README.md b/tests/README.md index a9d03e5..a88ed95 100644 --- a/tests/README.md +++ b/tests/README.md @@ -90,6 +90,7 @@ The template uses `test_template` as the package name. You need to update it to export SSH_VM_USER='cloud' export TEST_CLUSTER_NAMESPACE='e2e-test-cluster' export TEST_CLUSTER_CLEANUP='false' # Set to 'true' to enable cleanup + export SKIP_VIRTUALIZATION_CHECK='true' # Default: false (set to 'true' to skip virtualization module check) ``` 2. Make it executable and run to export all the envvars: diff --git a/tests/test-template/cluster_config.yml b/tests/test-template/cluster_config.yml index 86fdbd7..035eb3e 100644 --- a/tests/test-template/cluster_config.yml +++ b/tests/test-template/cluster_config.yml @@ -1,4 +1,9 @@ # Test nested cluster configuration +# +# VM disk options: +# diskSize - system disk size in GB (required, created from OS image) +# dataDiskSize - additional data disk size in GB (optional, if not set - data disk not created) +# clusterDefinition: masters: # Master nodes configuration - hostname: "master-1" From aa4cb3e4d4bfe69b3e8dc19262e23e86a6e7c71b Mon Sep 17 00:00:00 2001 From: Ivan Ogurchenok Date: Fri, 26 Dec 2025 03:10:59 +0300 Subject: [PATCH 46/48] move cloud-init to K8s Secret, improve SSH key handling - Store cloud-init config in Secret (bypass 2048 byte userData limit) - Use user SSH key for VM connections, bootstrap key only for dhctl upload - Add VerifyVMConfig to check/fix VM hostname after creation - Add SecretClient for virtualization API - Rename secret to e2e-cloudinit-{namespace}-{config} for clarity - Update README with cloud-init secret documentation Signed-off-by: Ivan Ogurchenok --- .gitignore | 25 +++ internal/kubernetes/virtualization/client.go | 11 + internal/kubernetes/virtualization/secret.go | 52 +++++ pkg/cluster/cluster.go | 25 ++- pkg/cluster/setup.go | 71 ++++++- pkg/cluster/vms.go | 190 +++++++++++++++--- tests/README.md | 10 + .../cluster_creation_test.go | 32 ++- 8 files changed, 379 insertions(+), 37 deletions(-) create mode 100644 .gitignore create mode 100644 internal/kubernetes/virtualization/secret.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e0d0d9f --- /dev/null +++ b/.gitignore @@ -0,0 +1,25 @@ +# Test artifacts +temp/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo +*~ + +# Go +*.exe +*.exe~ +*.dll +*.so +*.dylib +*.test +*.out +go.work +go.work.sum + +# OS +.DS_Store +Thumbs.db + diff --git a/internal/kubernetes/virtualization/client.go b/internal/kubernetes/virtualization/client.go index ca1b137..9e84484 100644 --- a/internal/kubernetes/virtualization/client.go +++ b/internal/kubernetes/virtualization/client.go @@ -20,6 +20,7 @@ import ( "context" "github.com/deckhouse/virtualization/api/core/v1alpha2" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/rest" "sigs.k8s.io/controller-runtime/pkg/client" @@ -40,6 +41,11 @@ func NewClient(ctx context.Context, config *rest.Config) (*Client, error) { return nil, err } + // Register core v1 types (for Secrets, etc.) + if err := corev1.AddToScheme(scheme); err != nil { + return nil, err + } + cl, err := client.New(config, client.Options{Scheme: scheme}) if err != nil { return nil, err @@ -72,3 +78,8 @@ func (c *Client) VirtualImages() *VirtualImageClient { func (c *Client) VirtualMachineBlockDeviceAttachments() *VMBDClient { return &VMBDClient{client: c.client} } + +// Secrets returns a Secret client for managing cloud-init secrets +func (c *Client) Secrets() *SecretClient { + return &SecretClient{client: c.client} +} diff --git a/internal/kubernetes/virtualization/secret.go b/internal/kubernetes/virtualization/secret.go new file mode 100644 index 0000000..59fb54b --- /dev/null +++ b/internal/kubernetes/virtualization/secret.go @@ -0,0 +1,52 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package virtualization + +import ( + "context" + + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// SecretClient provides operations on Secret resources for cloud-init +type SecretClient struct { + client client.Client +} + +// Get retrieves a Secret by namespace and name +func (c *SecretClient) Get(ctx context.Context, namespace, name string) (*corev1.Secret, error) { + secret := &corev1.Secret{} + err := c.client.Get(ctx, client.ObjectKey{Namespace: namespace, Name: name}, secret) + if err != nil { + return nil, err + } + return secret, nil +} + +// Create creates a new Secret +func (c *SecretClient) Create(ctx context.Context, secret *corev1.Secret) error { + return c.client.Create(ctx, secret) +} + +// Delete deletes a Secret +func (c *SecretClient) Delete(ctx context.Context, namespace, name string) error { + secret := &corev1.Secret{} + secret.Name = name + secret.Namespace = namespace + return c.client.Delete(ctx, secret) +} diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 97fe7f5..d01ef2d 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -146,6 +146,12 @@ func CreateTestCluster( return nil, fmt.Errorf("failed to get SSH private key path: %w", err) } + // Get bootstrap SSH key (used for VM connections, has no passphrase) + bootstrapKeyPath, err := GetBootstrapSSHPrivateKeyPath() + if err != nil { + return nil, fmt.Errorf("failed to get bootstrap SSH private key path: %w", err) + } + fmt.Printf(" ▶️ Step 2: Connecting to base cluster %s@%s\n", sshUser, sshHost) // Step 2: Connect to base cluster baseClusterResources, err := ConnectToCluster(ctx, ConnectClusterOptions{ @@ -295,7 +301,7 @@ func CreateTestCluster( setupSSHClient, err := ssh.NewClientWithJumpHost( sshUser, sshHost, sshKeyPath, // jump host - config.VMSSHUser, setupNodeIP, sshKeyPath, // target host + config.VMSSHUser, setupNodeIP, sshKeyPath, // target host (user's key added via cloud-init) ) if err != nil { baseClusterResources.SSHClient.Close() @@ -304,6 +310,17 @@ func CreateTestCluster( } fmt.Printf(" ✅ Step 7: SSH connection to setup node established\n") + fmt.Printf(" ▶️ Step 7.5: Verifying VM configuration on setup node\n") + // Step 7.5: Verify VM config (hostname, etc.) + // NOTE: This step can potentially be removed if DVP correctly sets hostname from VM name + pkgCtx, cancel := context.WithTimeout(ctx, 2*time.Minute) + err = VerifyVMConfig(pkgCtx, setupSSHClient, "setup-node") + cancel() + if err != nil { + fmt.Printf(" ⚠️ Warning: VM configuration check failed on setup node: %v\n", err) + // Continue anyway - this is a verification step + } + fmt.Printf(" ▶️ Step 8: Installing Docker on setup node (this may take up to 15 minutes)\n") // Step 8: Install Docker on setup node dockerCtx, cancel := context.WithTimeout(ctx, 15*time.Minute) @@ -329,9 +346,9 @@ func CreateTestCluster( fmt.Printf(" ✅ Step 9: Bootstrap configuration prepared\n") fmt.Printf(" ▶️ Step 10: Uploading bootstrap files to setup node\n") - // Step 10: Upload bootstrap files + // Step 10: Upload bootstrap files (using bootstrap key - no passphrase issues) uploadCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) - err = UploadBootstrapFiles(uploadCtx, setupSSHClient, sshKeyPath, bootstrapConfig) + err = UploadBootstrapFiles(uploadCtx, setupSSHClient, bootstrapKeyPath, bootstrapConfig) cancel() if err != nil { setupSSHClient.Close() @@ -374,7 +391,7 @@ func CreateTestCluster( fmt.Printf(" ✅ Step 12: Base cluster tunnel stopped\n") fmt.Printf(" ▶️ Step 13: Connecting to test cluster master %s\n", firstMasterIP) - // Step 14: Connect to test cluster + // Step 14: Connect to test cluster (user's key works - added via cloud-init) testClusterResources, err := ConnectToCluster(ctx, ConnectClusterOptions{ SSHUser: sshUser, SSHHost: sshHost, diff --git a/pkg/cluster/setup.go b/pkg/cluster/setup.go index 1b2534b..6f0d140 100644 --- a/pkg/cluster/setup.go +++ b/pkg/cluster/setup.go @@ -22,6 +22,7 @@ import ( "fmt" "net" "os" + "os/exec" "os/user" "path/filepath" "runtime" @@ -626,7 +627,7 @@ func addNodeToCluster(ctx context.Context, node config.ClusterNode, bootstrapScr // Create SSH client to the node through jump host (base cluster master) sshClient, err := ssh.NewClientWithJumpHost( baseSSHUser, baseSSHHost, sshKeyPath, // jump host - config.VMSSHUser, nodeIP, sshKeyPath, // target host + config.VMSSHUser, nodeIP, sshKeyPath, // target host (user's key added via cloud-init) ) if err != nil { fmt.Printf(" ❌ Failed to create SSH connection to node %s (%s): %v\n", node.Hostname, nodeIP, err) @@ -918,3 +919,71 @@ func expandPath(path string) (string, error) { return filepath.Join(usr.HomeDir, strings.TrimPrefix(path, "~/")), nil } + +// BootstrapSSHKeyDir is the directory where bootstrap SSH keys are stored +const BootstrapSSHKeyDir = "temp/bootstrap_ssh" + +// GetOrCreateBootstrapSSHKey returns paths to bootstrap SSH key pair. +// If keys don't exist, they are generated (without passphrase). +// Keys are stored in temp/bootstrap_ssh/ and reused between test runs. +// This avoids issues with user's SSH key passphrase during bootstrap. +func GetOrCreateBootstrapSSHKey() (privateKeyPath, publicKeyPath string, err error) { + // Get project root directory (where temp/ should be) + _, filename, _, ok := runtime.Caller(0) + if !ok { + return "", "", fmt.Errorf("failed to get caller info") + } + // Go up from pkg/cluster/setup.go to project root + projectRoot := filepath.Join(filepath.Dir(filename), "..", "..") + + keyDir := filepath.Join(projectRoot, BootstrapSSHKeyDir) + privateKeyPath = filepath.Join(keyDir, "id_rsa") + publicKeyPath = filepath.Join(keyDir, "id_rsa.pub") + + // Check if keys already exist + if _, err := os.Stat(privateKeyPath); err == nil { + if _, err := os.Stat(publicKeyPath); err == nil { + // Both keys exist, return them + return privateKeyPath, publicKeyPath, nil + } + } + + // Keys don't exist, create directory and generate them + if err := os.MkdirAll(keyDir, 0700); err != nil { + return "", "", fmt.Errorf("failed to create bootstrap SSH key directory: %w", err) + } + + // Generate key pair using ssh-keygen (no passphrase) + fmt.Printf(" 🔑 Generating bootstrap SSH key pair in %s\n", keyDir) + cmd := exec.Command("ssh-keygen", "-t", "rsa", "-b", "4096", "-f", privateKeyPath, "-N", "", "-q", "-C", "bootstrap-e2e-test") + if output, err := cmd.CombinedOutput(); err != nil { + return "", "", fmt.Errorf("failed to generate SSH key pair: %w\nOutput: %s", err, output) + } + + fmt.Printf(" ✅ Bootstrap SSH key pair generated\n") + return privateKeyPath, publicKeyPath, nil +} + +// GetBootstrapSSHPublicKeyContent returns the content of bootstrap SSH public key +func GetBootstrapSSHPublicKeyContent() (string, error) { + _, publicKeyPath, err := GetOrCreateBootstrapSSHKey() + if err != nil { + return "", fmt.Errorf("failed to get bootstrap SSH key: %w", err) + } + + content, err := os.ReadFile(publicKeyPath) + if err != nil { + return "", fmt.Errorf("failed to read bootstrap public key: %w", err) + } + + return strings.TrimSpace(string(content)), nil +} + +// GetBootstrapSSHPrivateKeyPath returns the path to bootstrap SSH private key +func GetBootstrapSSHPrivateKeyPath() (string, error) { + privateKeyPath, _, err := GetOrCreateBootstrapSSHKey() + if err != nil { + return "", fmt.Errorf("failed to get bootstrap SSH key: %w", err) + } + return privateKeyPath, nil +} diff --git a/pkg/cluster/vms.go b/pkg/cluster/vms.go index fe90fd4..2d997eb 100644 --- a/pkg/cluster/vms.go +++ b/pkg/cluster/vms.go @@ -22,22 +22,24 @@ import ( "strings" "time" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/deckhouse/storage-e2e/internal/config" "github.com/deckhouse/storage-e2e/internal/kubernetes/virtualization" "github.com/deckhouse/virtualization/api/core/v1alpha2" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) // VMResources tracks VM-related resources created for a test cluster type VMResources struct { - VirtClient *virtualization.Client - Namespace string - VMNames []string - CVMINames []string // ClusterVirtualImage names (cluster-scoped) - SetupVMName string // Name of the setup VM (always created) + VirtClient *virtualization.Client + Namespace string + VMNames []string + CVMINames []string // ClusterVirtualImage names (cluster-scoped) + SetupVMName string // Name of the setup VM (always created) + CloudInitSecretName string // Cloud-init secret name (for cleanup) } // CreateVirtualMachines creates virtual machines from cluster definition. @@ -115,11 +117,12 @@ func CreateVirtualMachines(ctx context.Context, virtClient *virtualization.Clien // Track setup VM separately // The setup VM is always created, so it will exist in vmNames resources := &VMResources{ - VirtClient: virtClient, - Namespace: namespace, - VMNames: vmNames, - CVMINames: cvmiNames, - SetupVMName: setupVMName, // setupVMName was set above when creating setupVM + VirtClient: virtClient, + Namespace: namespace, + VMNames: vmNames, + CVMINames: cvmiNames, + SetupVMName: setupVMName, // setupVMName was set above when creating setupVM + CloudInitSecretName: getCloudInitSecretName(), // For cleanup } return vmNames, resources, nil @@ -335,19 +338,19 @@ func createVM(ctx context.Context, virtClient *virtualization.Client, namespace // If VirtualDisk already exists, we'll use it } - // 3. Create VirtualMachine (check if it exists first) + // 3. Ensure cloud-init secret exists (shared by all VMs) + cloudInitSecretName, err := getOrCreateCloudInitSecret(ctx, virtClient, namespace) + if err != nil { + return "", fmt.Errorf("failed to ensure cloud-init secret: %w", err) + } + + // 4. Create VirtualMachine (check if it exists first) _, err = virtClient.VirtualMachines().Get(ctx, namespace, vmName) if err != nil { if !errors.IsNotFound(err) { return "", fmt.Errorf("failed to check VirtualMachine %s: %w", vmName, err) } // VirtualMachine doesn't exist, create it - // Get SSH public key content - sshPublicKey, err := GetSSHPublicKeyContent() - if err != nil { - return "", fmt.Errorf("failed to get SSH public key content: %w", err) - } - memoryQuantity := resource.MustParse(fmt.Sprintf("%dGi", node.RAM)) vm := &v1alpha2.VirtualMachine{ ObjectMeta: metav1.ObjectMeta{ @@ -392,8 +395,11 @@ func createVM(ctx context.Context, virtClient *virtualization.Client, namespace return refs }(), Provisioning: &v1alpha2.Provisioning{ - Type: "UserData", - UserData: generateCloudInitUserData(vmName, sshPublicKey), + Type: "UserDataRef", + UserDataRef: &v1alpha2.UserDataRef{ + Kind: "Secret", + Name: cloudInitSecretName, + }, }, }, } @@ -436,8 +442,24 @@ func getCVMINameFromImageURL(imageURL string) string { return name } -// generateCloudInitUserData generates cloud-init user data for VM provisioning -func generateCloudInitUserData(hostname, sshPubKey string) string { +// getCloudInitSecretName returns unique cloud-init secret name based on config and namespace. +// Format: e2e-cloudinit-{namespace}-{config-name} (without .yml/.yaml extension) +func getCloudInitSecretName() string { + // Get config filename and remove extension + configName := strings.TrimSuffix(config.YAMLConfigFilename, ".yml") + configName = strings.TrimSuffix(configName, ".yaml") + // Sanitize for Kubernetes naming + configName = strings.ToLower(configName) + configName = strings.ReplaceAll(configName, "_", "-") + namespace := strings.ToLower(config.TestClusterNamespace) + namespace = strings.ReplaceAll(namespace, "_", "-") + return fmt.Sprintf("e2e-cloudinit-%s-%s", namespace, configName) +} + +// generateCloudInitConfig generates full cloud-init configuration. +// Uses Secret to avoid 2048 byte limit of inline userData. +// Hostname is set automatically by DVP from VM name. +func generateCloudInitConfig(userPubKey, bootstrapPubKey string) string { return fmt.Sprintf(`#cloud-config package_update: true packages: @@ -463,21 +485,118 @@ users: lock_passwd: false ssh_authorized_keys: - %s + - %s + write_files: - path: /etc/ssh/sshd_config.d/allow_tcp_forwarding.conf content: | - # Разрешить TCP forwarding + # Allow TCP forwarding for SSH jump host AllowTcpForwarding yes runcmd: - - systemctl restart ssh - - hostnamectl set-hostname %s - - systemctl daemon-reload - - systemctl enable --now qemu-guest-agent.service -`, sshPubKey, hostname) + - systemctl restart ssh 2>/dev/null || systemctl restart sshd 2>/dev/null || true + - systemctl enable --now qemu-guest-agent + +final_message: "The system is finally up, after $UPTIME seconds" +`, userPubKey, bootstrapPubKey) +} + +// getOrCreateCloudInitSecret ensures the cloud-init secret exists in the namespace. +// Creates it if not exists, returns the secret name. +func getOrCreateCloudInitSecret(ctx context.Context, virtClient *virtualization.Client, namespace string) (string, error) { + secretName := getCloudInitSecretName() + + // Check if secret already exists + _, err := virtClient.Secrets().Get(ctx, namespace, secretName) + if err == nil { + // Secret exists, return its name + return secretName, nil + } + if !errors.IsNotFound(err) { + return "", fmt.Errorf("failed to check cloud-init secret: %w", err) + } + + // Secret doesn't exist, create it + fmt.Printf(" 🔐 Creating cloud-init secret %s/%s\n", namespace, secretName) + + // Get SSH public keys + userPubKey, err := GetSSHPublicKeyContent() + if err != nil { + return "", fmt.Errorf("failed to get user SSH public key: %w", err) + } + + bootstrapPubKey, err := GetBootstrapSSHPublicKeyContent() + if err != nil { + return "", fmt.Errorf("failed to get bootstrap SSH public key: %w", err) + } + + // Generate cloud-init config + cloudInitConfig := generateCloudInitConfig(userPubKey, bootstrapPubKey) + + // Create secret with cloud-init data + // Note: Kubernetes Secret.Data expects raw bytes, the API handles base64 encoding + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: namespace, + }, + Type: corev1.SecretType("provisioning.virtualization.deckhouse.io/cloud-init"), + Data: map[string][]byte{ + "userData": []byte(cloudInitConfig), + }, + } + + err = virtClient.Secrets().Create(ctx, secret) + if err != nil { + return "", fmt.Errorf("failed to create cloud-init secret: %w", err) + } + + fmt.Printf(" ✅ Cloud-init secret created\n") + return secretName, nil } -// RemoveAllVMs forcefully stops and deletes virtual machines, virtual disks, and virtual images. +// VerifyVMConfig verifies VM configuration and fixes hostname if needed. +// Should be called after VM is running and before dhctl bootstrap. +// +// NOTE: This function can potentially be removed in the future if: +// 1. DVP correctly sets hostname from VM name via metadata +// 2. All packages are installed via cloud-init Secret (already done) +// For now, we keep it to verify and fix hostname if DVP doesn't set it correctly. +func VerifyVMConfig(ctx context.Context, sshClient interface { + Exec(ctx context.Context, cmd string) (string, error) +}, vmName string) error { + fmt.Printf(" 🔧 Verifying VM configuration on %s...\n", vmName) + + // Check current hostname + currentHostname, err := sshClient.Exec(ctx, "hostname") + if err != nil { + return fmt.Errorf("failed to get hostname on %s: %w", vmName, err) + } + currentHostname = strings.TrimSpace(currentHostname) + + // Fix hostname if it doesn't match VM name + // DVP should set hostname automatically from VM name, but we verify and fix if needed + if currentHostname != vmName { + fmt.Printf(" ⚠️ Hostname mismatch on %s: got '%s', expected '%s'. Fixing...\n", vmName, currentHostname, vmName) + _, err = sshClient.Exec(ctx, fmt.Sprintf("sudo hostnamectl set-hostname %s", vmName)) + if err != nil { + return fmt.Errorf("failed to set hostname on %s: %w", vmName, err) + } + fmt.Printf(" ✅ Hostname fixed on %s\n", vmName) + } else { + fmt.Printf(" ✅ Hostname correct on %s: %s\n", vmName, currentHostname) + } + + // Verify qemu-guest-agent is running (should be started by cloud-init) + _, err = sshClient.Exec(ctx, "sudo systemctl is-active qemu-guest-agent >/dev/null 2>&1 || sudo systemctl enable --now qemu-guest-agent 2>/dev/null || true") + if err != nil { + fmt.Printf(" ⚠️ Warning: qemu-guest-agent check failed on %s: %v\n", vmName, err) + } + + return nil +} + +// RemoveAllVMs forcefully stops and deletes virtual machines, virtual disks, cloud-init secret, and virtual images. // If a VirtualImage is in use by other resources, it will be skipped but VMs and VDs will still be deleted. func RemoveAllVMs(ctx context.Context, resources *VMResources) error { if resources == nil { @@ -501,6 +620,19 @@ func RemoveAllVMs(ctx context.Context, resources *VMResources) error { } } + // Delete cloud-init secret + if resources.CloudInitSecretName != "" { + fmt.Printf(" ⏳ Removing cloud-init secret %s/%s\n", resources.Namespace, resources.CloudInitSecretName) + err := resources.VirtClient.Secrets().Delete(ctx, resources.Namespace, resources.CloudInitSecretName) + if err != nil { + if !errors.IsNotFound(err) { + fmt.Printf(" ⚠️ Warning: Failed to delete cloud-init secret %s: %v\n", resources.CloudInitSecretName, err) + } + } else { + fmt.Printf(" ✅ Cloud-init secret %s deleted\n", resources.CloudInitSecretName) + } + } + return nil } diff --git a/tests/README.md b/tests/README.md index a88ed95..8a4ac81 100644 --- a/tests/README.md +++ b/tests/README.md @@ -98,3 +98,13 @@ The template uses `test_template` as the package name. You need to update it to chmod +x test_exports ``` +### Bootstrap SSH Key + +A temporary SSH key pair (without passphrase) is auto-generated in `temp/bootstrap_ssh/` for VM bootstrap. Both user's and bootstrap public keys are added to VMs for access. + +### Cloud-init Secret + +VM provisioning uses a Kubernetes Secret with cloud-init config (packages, SSH keys, etc.). +- **Name:** `e2e-cloudinit-{namespace}-{config}` (e.g., `e2e-cloudinit-e2e-test-cluster-cluster-config`) +- **Cleanup:** Automatically deleted with VMs when `TEST_CLUSTER_CLEANUP=true` + diff --git a/tests/cluster-creation-by-steps/cluster_creation_test.go b/tests/cluster-creation-by-steps/cluster_creation_test.go index 4ddb8bf..cb670ee 100644 --- a/tests/cluster-creation-by-steps/cluster_creation_test.go +++ b/tests/cluster-creation-by-steps/cluster_creation_test.go @@ -51,6 +51,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { bootstrapConfig string testClusterResources *cluster.TestClusterResources sshKeyPath string + bootstrapKeyPath string ) BeforeAll(func() { @@ -120,6 +121,13 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { GinkgoWriter.Printf(" ✅ SSH private key path obtained successfully\n") }) + By("Getting bootstrap SSH key path (for VM connections)", func() { + GinkgoWriter.Printf(" ▶️ Getting bootstrap SSH key path\n") + bootstrapKeyPath, err = cluster.GetBootstrapSSHPrivateKeyPath() + Expect(err).NotTo(HaveOccurred(), "Failed to get bootstrap SSH key path") + GinkgoWriter.Printf(" ✅ Bootstrap SSH key path: %s\n", bootstrapKeyPath) + }) + // Stage 1: LoadConfig - verifies and parses the config from yaml file By("LoadConfig: Loading and verifying cluster configuration from YAML", func() { yamlConfigFilename := config.YAMLConfigFilename @@ -398,7 +406,7 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { config.VMSSHUser, setupNodeIP, config.SSHUser, config.SSHHost) setupSSHClient, err = ssh.NewClientWithJumpHost( config.SSHUser, config.SSHHost, sshKeyPath, // jump host - config.VMSSHUser, setupNodeIP, sshKeyPath, // target host + config.VMSSHUser, setupNodeIP, sshKeyPath, // target host (user's key added via cloud-init) ) Expect(err).NotTo(HaveOccurred()) Expect(setupSSHClient).NotTo(BeNil()) @@ -406,6 +414,24 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) }) + // Step 6.5: Verify VM configuration (hostname, etc.) + // NOTE: This step can potentially be removed if DVP correctly sets hostname from VM name + It("should verify VM configuration on setup node", func() { + By("Verifying VM configuration on setup node", func() { + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + defer cancel() + + GinkgoWriter.Printf(" ▶️ Verifying VM configuration on setup node\n") + err := cluster.VerifyVMConfig(ctx, setupSSHClient, "setup-node") + if err != nil { + GinkgoWriter.Printf(" ⚠️ Warning: VM configuration check failed: %v\n", err) + // Continue anyway - this is a verification step + } else { + GinkgoWriter.Printf(" ✅ VM configuration verified on setup node\n") + } + }) + }) + // Step 7: Install Docker on setup node (required for DKP bootstrap) It("should ensure Docker is installed on the setup node", func() { By("Installing Docker on setup node", func() { @@ -436,10 +462,10 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { defer cancel() GinkgoWriter.Printf(" ▶️ Uploading bootstrap files to setup node\n") - GinkgoWriter.Printf(" 📁 Private key: %s -> /home/cloud/.ssh/id_rsa\n", sshKeyPath) + GinkgoWriter.Printf(" 📁 Private key: %s -> /home/cloud/.ssh/id_rsa\n", bootstrapKeyPath) GinkgoWriter.Printf(" 📁 Config file: %s -> /home/cloud/config.yml\n", bootstrapConfig) - err = cluster.UploadBootstrapFiles(ctx, setupSSHClient, sshKeyPath, bootstrapConfig) + err = cluster.UploadBootstrapFiles(ctx, setupSSHClient, bootstrapKeyPath, bootstrapConfig) Expect(err).NotTo(HaveOccurred(), "Failed to upload bootstrap files to setup node") GinkgoWriter.Printf(" ✅ Bootstrap files uploaded successfully\n") }) From c57ad77a9d31a7be4d3b704c7760e2afbab7afe7 Mon Sep 17 00:00:00 2001 From: Ivan Ogurchenok Date: Fri, 26 Dec 2025 03:18:35 +0300 Subject: [PATCH 47/48] removed hostname check DVP set up is as expected. leave a comment about it. Signed-off-by: Ivan Ogurchenok --- pkg/cluster/cluster.go | 11 ----- pkg/cluster/vms.go | 46 ++----------------- .../cluster_creation_test.go | 18 -------- 3 files changed, 4 insertions(+), 71 deletions(-) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index d01ef2d..2421f57 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -310,17 +310,6 @@ func CreateTestCluster( } fmt.Printf(" ✅ Step 7: SSH connection to setup node established\n") - fmt.Printf(" ▶️ Step 7.5: Verifying VM configuration on setup node\n") - // Step 7.5: Verify VM config (hostname, etc.) - // NOTE: This step can potentially be removed if DVP correctly sets hostname from VM name - pkgCtx, cancel := context.WithTimeout(ctx, 2*time.Minute) - err = VerifyVMConfig(pkgCtx, setupSSHClient, "setup-node") - cancel() - if err != nil { - fmt.Printf(" ⚠️ Warning: VM configuration check failed on setup node: %v\n", err) - // Continue anyway - this is a verification step - } - fmt.Printf(" ▶️ Step 8: Installing Docker on setup node (this may take up to 15 minutes)\n") // Step 8: Install Docker on setup node dockerCtx, cancel := context.WithTimeout(ctx, 15*time.Minute) diff --git a/pkg/cluster/vms.go b/pkg/cluster/vms.go index 2d997eb..c4280bc 100644 --- a/pkg/cluster/vms.go +++ b/pkg/cluster/vms.go @@ -458,7 +458,10 @@ func getCloudInitSecretName() string { // generateCloudInitConfig generates full cloud-init configuration. // Uses Secret to avoid 2048 byte limit of inline userData. -// Hostname is set automatically by DVP from VM name. +// +// NOTE: Hostname is NOT set in cloud-init. DVP automatically sets the hostname +// from the VirtualMachine name via cloud-init metadata. This was verified to work +// correctly for all VMs (masters, workers, bootstrap). Do not add "hostname:" here. func generateCloudInitConfig(userPubKey, bootstrapPubKey string) string { return fmt.Sprintf(`#cloud-config package_update: true @@ -555,47 +558,6 @@ func getOrCreateCloudInitSecret(ctx context.Context, virtClient *virtualization. return secretName, nil } -// VerifyVMConfig verifies VM configuration and fixes hostname if needed. -// Should be called after VM is running and before dhctl bootstrap. -// -// NOTE: This function can potentially be removed in the future if: -// 1. DVP correctly sets hostname from VM name via metadata -// 2. All packages are installed via cloud-init Secret (already done) -// For now, we keep it to verify and fix hostname if DVP doesn't set it correctly. -func VerifyVMConfig(ctx context.Context, sshClient interface { - Exec(ctx context.Context, cmd string) (string, error) -}, vmName string) error { - fmt.Printf(" 🔧 Verifying VM configuration on %s...\n", vmName) - - // Check current hostname - currentHostname, err := sshClient.Exec(ctx, "hostname") - if err != nil { - return fmt.Errorf("failed to get hostname on %s: %w", vmName, err) - } - currentHostname = strings.TrimSpace(currentHostname) - - // Fix hostname if it doesn't match VM name - // DVP should set hostname automatically from VM name, but we verify and fix if needed - if currentHostname != vmName { - fmt.Printf(" ⚠️ Hostname mismatch on %s: got '%s', expected '%s'. Fixing...\n", vmName, currentHostname, vmName) - _, err = sshClient.Exec(ctx, fmt.Sprintf("sudo hostnamectl set-hostname %s", vmName)) - if err != nil { - return fmt.Errorf("failed to set hostname on %s: %w", vmName, err) - } - fmt.Printf(" ✅ Hostname fixed on %s\n", vmName) - } else { - fmt.Printf(" ✅ Hostname correct on %s: %s\n", vmName, currentHostname) - } - - // Verify qemu-guest-agent is running (should be started by cloud-init) - _, err = sshClient.Exec(ctx, "sudo systemctl is-active qemu-guest-agent >/dev/null 2>&1 || sudo systemctl enable --now qemu-guest-agent 2>/dev/null || true") - if err != nil { - fmt.Printf(" ⚠️ Warning: qemu-guest-agent check failed on %s: %v\n", vmName, err) - } - - return nil -} - // RemoveAllVMs forcefully stops and deletes virtual machines, virtual disks, cloud-init secret, and virtual images. // If a VirtualImage is in use by other resources, it will be skipped but VMs and VDs will still be deleted. func RemoveAllVMs(ctx context.Context, resources *VMResources) error { diff --git a/tests/cluster-creation-by-steps/cluster_creation_test.go b/tests/cluster-creation-by-steps/cluster_creation_test.go index cb670ee..c184c15 100644 --- a/tests/cluster-creation-by-steps/cluster_creation_test.go +++ b/tests/cluster-creation-by-steps/cluster_creation_test.go @@ -414,24 +414,6 @@ var _ = Describe("Cluster Creation Step-by-Step Test", Ordered, func() { }) }) - // Step 6.5: Verify VM configuration (hostname, etc.) - // NOTE: This step can potentially be removed if DVP correctly sets hostname from VM name - It("should verify VM configuration on setup node", func() { - By("Verifying VM configuration on setup node", func() { - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) - defer cancel() - - GinkgoWriter.Printf(" ▶️ Verifying VM configuration on setup node\n") - err := cluster.VerifyVMConfig(ctx, setupSSHClient, "setup-node") - if err != nil { - GinkgoWriter.Printf(" ⚠️ Warning: VM configuration check failed: %v\n", err) - // Continue anyway - this is a verification step - } else { - GinkgoWriter.Printf(" ✅ VM configuration verified on setup node\n") - } - }) - }) - // Step 7: Install Docker on setup node (required for DKP bootstrap) It("should ensure Docker is installed on the setup node", func() { By("Installing Docker on setup node", func() { From 36fce4091bb933232a5c669493757f9cbcf5b788 Mon Sep 17 00:00:00 2001 From: Ivan Ogurchenok Date: Fri, 26 Dec 2025 03:58:06 +0300 Subject: [PATCH 48/48] parallel worker bootstrap, webhook retry, kubectl aliases - Add parallel worker bootstrapping with mutex-protected progress output - Add execWithWebhookRetry with configurable retry constants - Add kubectl aliases (k=kubectl) to cloud-init via /etc/profile.d/ - Update kernel versions in images.go to match actual cluster values Signed-off-by: Ivan Ogurchenok --- internal/config/images.go | 12 ++++---- pkg/cluster/modules.go | 65 +++++++++++++++++++++++++++++++-------- pkg/cluster/setup.go | 46 ++++++++++++++++++++++++--- pkg/cluster/vms.go | 9 ++++++ 4 files changed, 109 insertions(+), 23 deletions(-) diff --git a/internal/config/images.go b/internal/config/images.go index 8973af7..95622d6 100644 --- a/internal/config/images.go +++ b/internal/config/images.go @@ -38,12 +38,12 @@ package config var OSTypeMap = map[string]OSType{ "Ubuntu 22.04 6.2.0-39-generic": { ImageURL: "https://cloud-images.ubuntu.com/jammy/current/jammy-server-cloudimg-amd64.img", - KernelVersion: "6.2.0-39-generic", + KernelVersion: "5.15.0-164-generic", TrustIfExists: true, }, "Ubuntu 24.04 6.8.0-53-generic": { ImageURL: "https://cloud-images.ubuntu.com/noble/current/noble-server-cloudimg-amd64.img", - KernelVersion: "6.8.0-53-generic", + KernelVersion: "6.8.0-90-generic", TrustIfExists: true, }, "RedOS 8.0 6.6.26-1.red80.x86_64": { @@ -58,22 +58,22 @@ var OSTypeMap = map[string]OSType{ }, "Debian 12 Bookworm": { ImageURL: "https://cloud.debian.org/images/cloud/bookworm/latest/debian-12-genericcloud-amd64.qcow2", - KernelVersion: "6.2.0", + KernelVersion: "6.1.0-41-cloud-amd64", TrustIfExists: true, }, "Debian 13 Trixie": { ImageURL: "https://cdimage.debian.org/images/cloud/trixie/latest/debian-13-generic-amd64.qcow2", - KernelVersion: "6.8.0", + KernelVersion: "6.12.57+deb13-amd64", TrustIfExists: true, }, "AltLinux 10.4": { ImageURL: "https://ftp.altlinux.org/pub/distributions/ALTLinux/p10/images/cloud/x86_64/alt-server-10.4-p10-cloud-x86_64.qcow2", - KernelVersion: "6", + KernelVersion: "6.1.130-un-def-alt1", TrustIfExists: true, }, "AltLinux 11": { ImageURL: "https://ftp.altlinux.org/pub/distributions/ALTLinux/p11/images/cloud/x86_64/alt-server-11.0-p11-cloud-x86_64.qcow2", - KernelVersion: "6", + KernelVersion: "6.12.34-6.12-alt1", TrustIfExists: true, }, } diff --git a/pkg/cluster/modules.go b/pkg/cluster/modules.go index 0e5b17d..6d49884 100644 --- a/pkg/cluster/modules.go +++ b/pkg/cluster/modules.go @@ -29,6 +29,16 @@ import ( "k8s.io/client-go/rest" ) +// Webhook retry configuration +const ( + // WebhookRetryAttempts is the number of retry attempts for webhook connection errors + WebhookRetryAttempts = 10 + // WebhookRetryInitialDelay is the initial delay before first retry + WebhookRetryInitialDelay = 3 * time.Second + // WebhookRetryBackoffMultiplier is the multiplier for exponential backoff + WebhookRetryBackoffMultiplier = 1.5 +) + // moduleGraph represents the dependency graph structure type moduleGraph struct { modules map[string]*config.ModuleConfig // module name -> module config @@ -125,8 +135,8 @@ func configureModuleConfig(ctx context.Context, kubeconfig *rest.Config, moduleC } // Retry logic for webhook connection errors - maxRetries := 10 - retryDelay := 2 * time.Second + maxRetries := WebhookRetryAttempts + retryDelay := WebhookRetryInitialDelay var lastErr error for attempt := 0; attempt < maxRetries; attempt++ { @@ -146,7 +156,7 @@ func configureModuleConfig(ctx context.Context, kubeconfig *rest.Config, moduleC return ctx.Err() case <-time.After(retryDelay): // Exponential backoff - retryDelay = time.Duration(float64(retryDelay) * 1.5) + retryDelay = time.Duration(float64(retryDelay) * WebhookRetryBackoffMultiplier) continue } } @@ -168,7 +178,7 @@ func configureModuleConfig(ctx context.Context, kubeconfig *rest.Config, moduleC return ctx.Err() case <-time.After(retryDelay): // Exponential backoff - retryDelay = time.Duration(float64(retryDelay) * 1.5) + retryDelay = time.Duration(float64(retryDelay) * WebhookRetryBackoffMultiplier) continue } } @@ -184,6 +194,7 @@ func configureModuleConfig(ctx context.Context, kubeconfig *rest.Config, moduleC // configureModuleConfigViaSSH creates or updates a ModuleConfig resource via kubectl over SSH // This ensures the webhook is called from within the cluster network +// It retries on webhook connection errors to handle cases where the webhook service isn't ready yet func configureModuleConfigViaSSH(ctx context.Context, sshClient ssh.SSHClient, moduleConfig *config.ModuleConfig) error { // Build ModuleConfig YAML moduleConfigYAML := struct { @@ -221,11 +232,9 @@ func configureModuleConfigViaSSH(ctx context.Context, sshClient ssh.SSHClient, m return fmt.Errorf("failed to marshal ModuleConfig YAML: %w", err) } - // Apply via kubectl over SSH using the found path cmd := fmt.Sprintf("sudo /opt/deckhouse/bin/kubectl apply -f - << 'MODULECONFIG_EOF'\n%sMODULECONFIG_EOF", string(yamlBytes)) - output, err := sshClient.Exec(ctx, cmd) - if err != nil { - return fmt.Errorf("failed to apply ModuleConfig %s via SSH: %w\nOutput: %s", moduleConfig.Name, err, output) + if err := execWithWebhookRetry(ctx, sshClient, cmd, moduleConfig.Name); err != nil { + return fmt.Errorf("failed to apply ModuleConfig %s via SSH: %w", moduleConfig.Name, err) } return nil @@ -288,16 +297,48 @@ func configureModulePullOverrideViaSSH(ctx context.Context, sshClient ssh.SSHCli return fmt.Errorf("failed to marshal ModulePullOverride YAML: %w", err) } - // Apply via kubectl over SSH using the found path cmd := fmt.Sprintf("sudo /opt/deckhouse/bin/kubectl apply -f - << 'MODULEPULLOVERRIDE_EOF'\n%sMODULEPULLOVERRIDE_EOF", string(yamlBytes)) - output, err := sshClient.Exec(ctx, cmd) - if err != nil { - return fmt.Errorf("failed to apply ModulePullOverride %s via SSH: %w\nOutput: %s", moduleConfig.Name, err, output) + if err := execWithWebhookRetry(ctx, sshClient, cmd, moduleConfig.Name); err != nil { + return fmt.Errorf("failed to apply ModulePullOverride %s via SSH: %w", moduleConfig.Name, err) } return nil } +// execWithWebhookRetry executes a kubectl command via SSH with retry logic for webhook errors +func execWithWebhookRetry(ctx context.Context, sshClient ssh.SSHClient, cmd, resourceName string) error { + maxRetries := WebhookRetryAttempts + retryDelay := WebhookRetryInitialDelay + + var lastOutput string + for attempt := 0; attempt < maxRetries; attempt++ { + output, err := sshClient.Exec(ctx, cmd) + if err == nil { + return nil + } + lastOutput = output + + // Check if it's a webhook connection error (check both error and output) + combinedErr := fmt.Sprintf("%v %s", err, output) + if isWebhookConnectionError(fmt.Errorf("%s", combinedErr)) { + if attempt < maxRetries-1 { + fmt.Printf(" ⏳ Webhook not ready for %s, retrying in %v (attempt %d/%d)...\n", + resourceName, retryDelay, attempt+1, maxRetries) + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(retryDelay): + retryDelay = time.Duration(float64(retryDelay) * WebhookRetryBackoffMultiplier) + continue + } + } + } + return fmt.Errorf("command failed: %w\nOutput: %s", err, output) + } + + return fmt.Errorf("command failed after %d attempts\nLast output: %s", maxRetries, lastOutput) +} + // isWebhookConnectionError checks if the error is a webhook connection error func isWebhookConnectionError(err error) bool { if err == nil { diff --git a/pkg/cluster/setup.go b/pkg/cluster/setup.go index 6f0d140..ca5ef2b 100644 --- a/pkg/cluster/setup.go +++ b/pkg/cluster/setup.go @@ -27,6 +27,7 @@ import ( "path/filepath" "runtime" "strings" + "sync" "text/template" "time" @@ -595,14 +596,49 @@ func AddNodesToCluster(ctx context.Context, kubeconfig *rest.Config, clusterDef } } - // Process all workers + // Process all workers in parallel workerCount := len(clusterDef.Workers) if workerCount > 0 { - fmt.Printf(" ▶️ Adding %d worker node(s) to the cluster\n", workerCount) + fmt.Printf(" ▶️ Adding %d worker node(s) to the cluster (parallel)\n", workerCount) + + var wg sync.WaitGroup + var mu sync.Mutex + errChan := make(chan error, workerCount) + completedCount := 0 + for _, workerNode := range clusterDef.Workers { - if err := addNodeToCluster(ctx, workerNode, workerBootstrapScript, clusterDef, baseSSHUser, baseSSHHost, sshKeyPath); err != nil { - return fmt.Errorf("failed to add worker node %s: %w", workerNode.Hostname, err) - } + wg.Add(1) + go func(node config.ClusterNode) { + defer wg.Done() + + mu.Lock() + fmt.Printf(" ⏳ Starting bootstrap on %s...\n", node.Hostname) + mu.Unlock() + + err := addNodeToCluster(ctx, node, workerBootstrapScript, clusterDef, baseSSHUser, baseSSHHost, sshKeyPath) + + mu.Lock() + if err != nil { + fmt.Printf(" ❌ Worker %s failed: %v\n", node.Hostname, err) + errChan <- fmt.Errorf("worker %s: %w", node.Hostname, err) + } else { + completedCount++ + fmt.Printf(" ✅ [%d/%d] Worker %s bootstrapped successfully\n", completedCount, workerCount, node.Hostname) + } + mu.Unlock() + }(workerNode) + } + + wg.Wait() + close(errChan) + + // Collect all errors + var errs []error + for err := range errChan { + errs = append(errs, err) + } + if len(errs) > 0 { + return fmt.Errorf("failed to add %d worker(s): %v", len(errs), errs) } } diff --git a/pkg/cluster/vms.go b/pkg/cluster/vms.go index c4280bc..8def5cd 100644 --- a/pkg/cluster/vms.go +++ b/pkg/cluster/vms.go @@ -495,6 +495,15 @@ write_files: content: | # Allow TCP forwarding for SSH jump host AllowTcpForwarding yes + - path: /etc/profile.d/kubectl-aliases.sh + permissions: '0644' + content: | + # kubectl aliases and completion + alias k=kubectl + if command -v kubectl &>/dev/null; then + source <(kubectl completion bash) + complete -o default -F __start_kubectl k + fi runcmd: - systemctl restart ssh 2>/dev/null || systemctl restart sshd 2>/dev/null || true