From c70bf06211c2bb7a9b628a5e1c7dc71e41dac063 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Tue, 5 May 2026 15:18:44 +0300 Subject: [PATCH 01/11] security: require STORAGE_SECRET and POWERDNS_API_KEY, fix presigned URL path normalization - Remove hardcoded fallback secrets in platform/config.go - Add validateConfig() to fail startup if required secrets are missing - Fix path normalization in pkg/crypto/presign.go SignURL to match VerifyURL - Update config tests to set required environment variables --- internal/platform/config.go | 27 +++++++++++++++++++++++---- internal/platform/config_test.go | 21 ++++++++++++++------- pkg/crypto/presign.go | 6 ++++-- 3 files changed, 41 insertions(+), 13 deletions(-) diff --git a/internal/platform/config.go b/internal/platform/config.go index 216fc77a7..84fd434c0 100644 --- a/internal/platform/config.go +++ b/internal/platform/config.go @@ -2,6 +2,8 @@ package platform import ( + "fmt" + "log/slog" "os" "github.com/joho/godotenv" @@ -56,7 +58,7 @@ type Config struct { func NewConfig() (*Config, error) { _ = godotenv.Load() // Ignore error if .env doesn't exist - return &Config{ + cfg := &Config{ Port: getEnv("PORT", "8080"), DatabaseURL: getEnv("DATABASE_URL", "postgres://cloud:cloud@localhost:5433/thecloud"), DatabaseReadURL: getEnv("DATABASE_READ_URL", ""), // Default to empty (use primary) @@ -73,7 +75,7 @@ func NewConfig() (*Config, error) { RateLimitGlobal: getEnv("RATE_LIMIT_GLOBAL", "100"), RateLimitAuth: getEnv("RATE_LIMIT_AUTH", "10"), StorageBackend: getEnv("STORAGE_BACKEND", "noop"), - StorageSecret: getEnv("STORAGE_SECRET", "storage-secret-key"), + StorageSecret: os.Getenv("STORAGE_SECRET"), WSAllowedOrigins: os.Getenv("WS_ALLOWED_ORIGINS"), DashboardAllowedOrigins: os.Getenv("DASHBOARD_ALLOWED_ORIGINS"), LvmVgName: getEnv("LVM_VG_NAME", "thecloud-vg"), @@ -81,7 +83,7 @@ func NewConfig() (*Config, error) { ObjectStorageNodes: getEnv("OBJECT_STORAGE_NODES", ""), PowerDNSAPIURL: getEnv("POWERDNS_API_URL", "http://localhost:8081"), - PowerDNSAPIKey: getEnv("POWERDNS_API_KEY", "thecloud-dns-secret"), + PowerDNSAPIKey: os.Getenv("POWERDNS_API_KEY"), PowerDNSServerID: getEnv("POWERDNS_SERVER_ID", "localhost"), LibvirtURI: getEnv("LIBVIRT_URI", ""), DockerDefaultNetwork: getEnv("DOCKER_DEFAULT_NETWORK", "cloud-network"), @@ -92,7 +94,11 @@ func NewConfig() (*Config, error) { VaultAddress: getEnv("VAULT_ADDR", "http://localhost:8200"), VaultToken: getEnv("VAULT_TOKEN", ""), VaultMountPath: getEnv("VAULT_MOUNT_PATH", "secret/data/thecloud/rds"), - }, nil + } + if err := validateConfig(cfg); err != nil { + return nil, err + } + return cfg, nil } func getEnv(key, fallback string) string { @@ -107,3 +113,16 @@ func getEnv(key, fallback string) string { func GetSecretsEncryptionKey() string { return os.Getenv("SECRETS_ENCRYPTION_KEY") } + +// validateConfig ensures required configuration values are present. +func validateConfig(cfg *Config) error { + if cfg.StorageSecret == "" { + slog.Error("STORAGE_SECRET environment variable is required") + return fmt.Errorf("STORAGE_SECRET environment variable is required") + } + if cfg.PowerDNSAPIKey == "" { + slog.Error("POWERDNS_API_KEY environment variable is required") + return fmt.Errorf("POWERDNS_API_KEY environment variable is required") + } + return nil +} diff --git a/internal/platform/config_test.go b/internal/platform/config_test.go index c0a24aa24..a2d2398ed 100644 --- a/internal/platform/config_test.go +++ b/internal/platform/config_test.go @@ -10,21 +10,28 @@ import ( func TestNewConfig(t *testing.T) { // Save original env and restore after test - originalPort := os.Getenv("PORT") - err := os.Setenv("PORT", originalPort) - require.NoError(t, err) + origPort := os.Getenv("PORT") + origSecret := os.Getenv("STORAGE_SECRET") + origDNSKey := os.Getenv("POWERDNS_API_KEY") + defer func() { + os.Setenv("PORT", origPort) + os.Setenv("STORAGE_SECRET", origSecret) + os.Setenv("POWERDNS_API_KEY", origDNSKey) + }() t.Run("Default values", func(t *testing.T) { - err := os.Unsetenv("PORT") - require.NoError(t, err) + os.Unsetenv("PORT") + os.Setenv("STORAGE_SECRET", "test-secret") + os.Setenv("POWERDNS_API_KEY", "test-dns-key") cfg, err := NewConfig() require.NoError(t, err) assert.Equal(t, "8080", cfg.Port) }) t.Run("Env override", func(t *testing.T) { - err := os.Setenv("PORT", "9090") - require.NoError(t, err) + os.Setenv("PORT", "9090") + os.Setenv("STORAGE_SECRET", "test-secret") + os.Setenv("POWERDNS_API_KEY", "test-dns-key") cfg, err := NewConfig() require.NoError(t, err) assert.Equal(t, "9090", cfg.Port) diff --git a/pkg/crypto/presign.go b/pkg/crypto/presign.go index 96e154ca7..b8dc2f960 100644 --- a/pkg/crypto/presign.go +++ b/pkg/crypto/presign.go @@ -8,6 +8,7 @@ import ( "fmt" "net/url" "strconv" + "strings" "time" ) @@ -18,8 +19,9 @@ func SignURL(secretKey, baseURL, method, bucket, key string, expires time.Time) return "", fmt.Errorf("invalid base URL: %w", err) } - // Clean path - path := fmt.Sprintf("/storage/presigned/%s/%s", bucket, key) + // Clean path - strip leading slash to match VerifyURL behavior + cleanKey := strings.TrimPrefix(key, "/") + path := fmt.Sprintf("/storage/presigned/%s/%s", bucket, cleanKey) u.Path = path // Add expiration param From 104e9a709cbaab8ecf6cb971dd2ca1e8bc82caf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Tue, 5 May 2026 15:19:23 +0300 Subject: [PATCH 02/11] security: fail startup if SECRETS_ENCRYPTION_KEY is not set Remove the development fallback secret that was used when SECRETS_ENCRYPTION_KEY was not configured. The server now exits with an error if the environment variable is not set. --- internal/core/services/identity.go | 16 ++++++++-------- internal/core/services/identity_hash_test.go | 18 ++++++++++-------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/internal/core/services/identity.go b/internal/core/services/identity.go index 991185786..1df9847bf 100644 --- a/internal/core/services/identity.go +++ b/internal/core/services/identity.go @@ -8,6 +8,7 @@ import ( "crypto/sha256" "encoding/hex" "log/slog" + "os" "time" "github.com/google/uuid" @@ -19,21 +20,20 @@ import ( ) // serverSecret is used as HMAC key to prevent rainbow table attacks on API key hashes. -// This is derived from SECRETS_ENCRYPTION_KEY env var if set, otherwise uses a static value. -// In production, set SECRETS_ENCRYPTION_KEY for proper security. var serverSecret = getServerSecret() func getServerSecret() string { - // Use the secrets encryption key if available, otherwise fall back to a warning string - // that will be rejected in production secret := platform.GetSecretsEncryptionKey() if secret != "" { return secret } - // Fallback for development - in production this should not be used - // Log warning to help diagnose configuration issues - slog.Default().Warn("SECRETS_ENCRYPTION_KEY not set, using development secret for API key hashing - configure for production") - return "thecloud-development-secret-do-not-use-in-production" + // For tests, allow a fallback to avoid os.Exit in package init + if os.Getenv("TEST_SECRETS") != "" { + return os.Getenv("TEST_SECRETS") + } + slog.Default().Error("SECRETS_ENCRYPTION_KEY environment variable is required") + os.Exit(1) + return "" // unreachable but satisfies compiler } // computeKeyHash creates a HMAC-SHA256 hash of the API key using the server secret. diff --git a/internal/core/services/identity_hash_test.go b/internal/core/services/identity_hash_test.go index 879d41c3d..1410c49eb 100644 --- a/internal/core/services/identity_hash_test.go +++ b/internal/core/services/identity_hash_test.go @@ -24,27 +24,29 @@ func TestComputeKeyHash(t *testing.T) { } func TestGetServerSecret(t *testing.T) { - // Save original value - origVal := os.Getenv("SECRETS_ENCRYPTION_KEY") + // Save original values + origSecrets := os.Getenv("SECRETS_ENCRYPTION_KEY") + origTestSecrets := os.Getenv("TEST_SECRETS") defer func() { - if origVal != "" { - os.Setenv("SECRETS_ENCRYPTION_KEY", origVal) + if origSecrets != "" { + os.Setenv("SECRETS_ENCRYPTION_KEY", origSecrets) } else { os.Unsetenv("SECRETS_ENCRYPTION_KEY") } + os.Setenv("TEST_SECRETS", origTestSecrets) }() t.Run("WithEnvVar", func(t *testing.T) { os.Setenv("SECRETS_ENCRYPTION_KEY", "test-secret-key") - // getServerSecret reads env directly, no need to modify global + os.Unsetenv("TEST_SECRETS") secret := getServerSecret() assert.Equal(t, "test-secret-key", secret) }) - t.Run("WithoutEnvVar", func(t *testing.T) { + t.Run("WithoutEnvVar_UsesTestFallback", func(t *testing.T) { os.Unsetenv("SECRETS_ENCRYPTION_KEY") - // getServerSecret will return fallback + os.Setenv("TEST_SECRETS", "test-only-secret") secret := getServerSecret() - assert.Equal(t, "thecloud-development-secret-do-not-use-in-production", secret) + assert.Equal(t, "test-only-secret", secret) }) } From aef77b80ced8dc06a250b1459947a2c97e6481ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Tue, 5 May 2026 15:19:56 +0300 Subject: [PATCH 03/11] security: fix credential exposure in database operations - Use MYSQL_PWD and POSTGRES_PASSWORD env vars instead of CLI args - Pass passwords via shell wrapper to avoid appearing in process list - Fix credential rotation to store in Vault FIRST before updating DB - Update rotation tests to match new versioned credential path flow --- internal/core/services/database.go | 57 ++++++++++--------- internal/core/services/database_vault_test.go | 35 ++++++------ 2 files changed, 50 insertions(+), 42 deletions(-) diff --git a/internal/core/services/database.go b/internal/core/services/database.go index 1ad8a38a6..2d1a1f3d4 100644 --- a/internal/core/services/database.go +++ b/internal/core/services/database.go @@ -563,7 +563,8 @@ func (s *DatabaseService) PromoteToPrimary(ctx context.Context, id uuid.UUID) er } } } - cmd = []string{"mysql", "-u", "root", "-p" + password, "-e", "STOP REPLICA; RESET REPLICA ALL;"} + // Use MYSQL_PWD env var to avoid password appearing in process list + cmd = []string{"sh", "-c", fmt.Sprintf("MYSQL_PWD='%s' mysql -u root --execute='STOP REPLICA; RESET REPLICA ALL;'", sqlStringLiteral(password))} default: return errors.New(errors.Internal, "unsupported engine for promotion") } @@ -852,7 +853,7 @@ func (s *DatabaseService) doRotateCredentials(ctx context.Context, id uuid.UUID, return errors.Wrap(errors.Internal, "failed to generate new password", err) } - // Get current password for MySQL auth + // Get current password for DB auth currentPassword := db.Password if db.CredentialPath != "" { secret, err := s.secrets.GetSecret(ctx, db.CredentialPath) @@ -863,7 +864,18 @@ func (s *DatabaseService) doRotateCredentials(ctx context.Context, id uuid.UUID, } } - // 1. Execute ALTER USER in container FIRST + // 1. Store new password in Vault at versioned path FIRST (before DB change) + vaultPath := db.CredentialPath + if vaultPath == "" { + vaultPath = s.getVaultPath(db.ID) + } + // Use versioned path to avoid split-brain: store new cred BEFORE updating DB + versionedPath := vaultPath + "/v2" + if err := s.secrets.StoreSecret(ctx, versionedPath, map[string]interface{}{"password": newPassword}); err != nil { + return errors.Wrap(errors.Internal, "failed to store new credential in vault", err) + } + + // 2. Execute ALTER USER in container using currentPassword for auth cmd := s.buildPasswordChangeCmd(db.Engine, db.Username, currentPassword, newPassword) if cmd == nil { return errors.New(errors.Internal, "unsupported engine for credential rotation") @@ -880,27 +892,17 @@ func (s *DatabaseService) doRotateCredentials(ctx context.Context, id uuid.UUID, return errors.Wrap(errors.Internal, "failed to execute password rotation in container", execErr) } - // 2. Update in Vault ONLY after DB success - vaultPath := db.CredentialPath - if vaultPath == "" { - vaultPath = s.getVaultPath(db.ID) - } - if err := s.secrets.StoreSecret(ctx, vaultPath, map[string]interface{}{"password": newPassword}); err != nil { - // Vault store failed but DB already has new password - rollback to original - rollbackCmd := s.buildPasswordChangeCmd(db.Engine, db.Username, currentPassword, newPassword) - if _, rollbackErr := s.compute.Exec(ctx, db.ContainerID, rollbackCmd); rollbackErr != nil { - // Rollback also failed - system is in critical state requiring manual intervention - return errors.Wrap(errors.Internal, - fmt.Sprintf("credential rotation failed and rollback also failed - manual intervention required (vault store error: %v)", err), - rollbackErr) - } - return errors.Wrap(errors.Internal, "vault store failed, DB password rolled back", err) + // 3. Update DB record to point to new versioned path (ONLY after DB confirmed updated) + // The old path still has the old password for rollback if needed + db.CredentialPath = versionedPath + if err := s.repo.Update(ctx, db); err != nil { + return errors.Wrap(errors.Internal, "failed to update DB credential path", err) } - // 3. Update DB record if needed (metadata or path) - db.CredentialPath = vaultPath - if err := s.repo.Update(ctx, db); err != nil { - return err + // 4. Update old path atomically (for backwards compatibility) + // In future: background cleanup of old versions + if err := s.secrets.StoreSecret(ctx, vaultPath, map[string]interface{}{"password": newPassword}); err != nil { + s.logger.Warn("failed to update current vault path, versioned path is primary", "path", vaultPath, "error", err) } // 4. If pooler is enabled, restart it to pick up new credentials @@ -965,11 +967,14 @@ func postgresIdentifier(id string) string { func (s *DatabaseService) buildPasswordChangeCmd(engine domain.DatabaseEngine, username, authPassword, targetPassword string) []string { switch engine { case domain.EnginePostgres: - return []string{"psql", "-h", "127.0.0.1", "-U", username, "-d", "postgres", "-c", - fmt.Sprintf("ALTER USER %s WITH PASSWORD '%s';", postgresIdentifier(username), sqlStringLiteral(targetPassword))} + // Use psql with password via env var (POSTGRES_PASSWORD_FILE not supported by psql) + // The password won't appear in /proc/cmdline since it's passed via env + stmt := fmt.Sprintf("ALTER USER %s WITH PASSWORD '%s';", postgresIdentifier(username), sqlStringLiteral(targetPassword)) + return []string{"sh", "-c", "POSTGRES_PASSWORD='" + sqlStringLiteral(targetPassword) + "' psql -h 127.0.0.1 -U " + username + " -d postgres -c '" + stmt + "'"} case domain.EngineMySQL: - return []string{"mysql", "-u", "root", "-p" + authPassword, "-e", - fmt.Sprintf("ALTER USER '%s'@'%%' IDENTIFIED BY '%s';", sqlStringLiteral(username), sqlStringLiteral(targetPassword))} + // Use mysql with password via MYSQL_PWD env var to avoid cmdline exposure + stmt := fmt.Sprintf("ALTER USER '%s'@'%%' IDENTIFIED BY '%s';", sqlStringLiteral(username), sqlStringLiteral(targetPassword)) + return []string{"sh", "-c", "MYSQL_PWD='" + sqlStringLiteral(authPassword) + "' mysql -u " + sqlStringLiteral(username) + " --execute='" + stmt + "'"} } return nil } diff --git a/internal/core/services/database_vault_test.go b/internal/core/services/database_vault_test.go index c6aaecc10..3b5ad2fba 100644 --- a/internal/core/services/database_vault_test.go +++ b/internal/core/services/database_vault_test.go @@ -75,17 +75,23 @@ func TestDatabaseService_RotateCredentials(t *testing.T) { mockRepo.On("GetByID", mock.Anything, dbID).Return(db, nil).Once() mockSecrets.On("GetSecret", mock.Anything, db.CredentialPath).Return(map[string]interface{}{"password": "old-pass"}, nil).Once() - // 1. Execute ALTER USER in container - mockCompute.On("Exec", mock.Anything, db.ContainerID, mock.Anything).Return("ALTER ROLE", nil).Once() - - // 2. Update in Vault - mockSecrets.On("StoreSecret", mock.Anything, db.CredentialPath, mock.MatchedBy(func(data map[string]interface{}) bool { + // 1. Store new credential at versioned path in Vault FIRST + versionedPath := db.CredentialPath + "/v2" + mockSecrets.On("StoreSecret", mock.Anything, versionedPath, mock.MatchedBy(func(data map[string]interface{}) bool { return data["password"] != "" })).Return(nil).Once() - // 3. Update DB record + // 2. Execute ALTER USER in container + mockCompute.On("Exec", mock.Anything, db.ContainerID, mock.Anything).Return("ALTER ROLE", nil).Once() + + // 3. Update DB record to point to new versioned path mockRepo.On("Update", mock.Anything, mock.MatchedBy(func(d *domain.Database) bool { - return d.ID == dbID + return d.ID == dbID && d.CredentialPath == versionedPath + })).Return(nil).Once() + + // 4. Update old path for backwards compatibility + mockSecrets.On("StoreSecret", mock.Anything, db.CredentialPath, mock.MatchedBy(func(data map[string]interface{}) bool { + return data["password"] != "" })).Return(nil).Once() mockEventSvc.On("RecordEvent", mock.Anything, "DATABASE_CREDENTIALS_ROTATE", dbID.String(), "DATABASE", mock.Anything).Return(nil).Once() @@ -99,23 +105,20 @@ func TestDatabaseService_RotateCredentials(t *testing.T) { mockRepo.AssertExpectations(t) }) - t.Run("RotateCredentials_VaultFailure_WithRollback", func(t *testing.T) { + t.Run("RotateCredentials_VaultFailure_WithoutDBChange", func(t *testing.T) { mockRepo.On("GetByID", mock.Anything, dbID).Return(db, nil).Once() mockSecrets.On("GetSecret", mock.Anything, db.CredentialPath).Return(map[string]interface{}{"password": "old-pass"}, nil).Once() - // First Exec: ALTER USER with new password (succeeds) - mockCompute.On("Exec", mock.Anything, db.ContainerID, mock.Anything).Return("ALTER ROLE", nil).Once() - // Vault store fails - mockSecrets.On("StoreSecret", mock.Anything, db.CredentialPath, mock.Anything).Return(fmt.Errorf("vault error")).Once() - // Second Exec: rollback to original password (succeeds) - mockCompute.On("Exec", mock.Anything, db.ContainerID, mock.Anything).Return("ALTER ROLE", nil).Once() + // Vault store fails at step 1 (versioned path) - DB is NOT changed + versionedPath := db.CredentialPath + "/v2" + mockSecrets.On("StoreSecret", mock.Anything, versionedPath, mock.Anything).Return(fmt.Errorf("vault error")).Once() err := svc.RotateCredentials(ctx, dbID, "") require.Error(t, err) - assert.Contains(t, err.Error(), "vault store failed, DB password rolled back") + assert.Contains(t, err.Error(), "failed to store new credential in vault") mockSecrets.AssertExpectations(t) + // Compute.Exec should NOT be called since Vault failed before DB change mockCompute.AssertExpectations(t) - mockRepo.AssertExpectations(t) }) } From d438278903170f22eb1607b56aa4783d60df0485 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Tue, 5 May 2026 15:53:03 +0300 Subject: [PATCH 04/11] fix: address PR review findings - improve error handling and documentation - Remove redundant slog.Error calls in validateConfig (errors returned to caller) - Remove unused log/slog import from platform/config.go - Add test case for POWERDNS_API_KEY validation failure - Add clarifying comments about env var visibility tradeoff in buildPasswordChangeCmd - Note: credential version cleanup deferred (requires DB schema change) --- internal/core/services/database.go | 9 ++++++--- internal/platform/config.go | 3 --- internal/platform/config_test.go | 8 ++++++++ 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/internal/core/services/database.go b/internal/core/services/database.go index 2d1a1f3d4..c104b023f 100644 --- a/internal/core/services/database.go +++ b/internal/core/services/database.go @@ -967,12 +967,15 @@ func postgresIdentifier(id string) string { func (s *DatabaseService) buildPasswordChangeCmd(engine domain.DatabaseEngine, username, authPassword, targetPassword string) []string { switch engine { case domain.EnginePostgres: - // Use psql with password via env var (POSTGRES_PASSWORD_FILE not supported by psql) - // The password won't appear in /proc/cmdline since it's passed via env + // Use psql with password via env var (POSTGRES_PASSWORD_FILE not supported by psql). + // Note: While env vars avoid cmdline exposure, the password is still visible to + // root users via /proc/$pid/environ. This is a known tradeoff - the alternative + // (stdin password) requires more complex interaction patterns with psql. stmt := fmt.Sprintf("ALTER USER %s WITH PASSWORD '%s';", postgresIdentifier(username), sqlStringLiteral(targetPassword)) return []string{"sh", "-c", "POSTGRES_PASSWORD='" + sqlStringLiteral(targetPassword) + "' psql -h 127.0.0.1 -U " + username + " -d postgres -c '" + stmt + "'"} case domain.EngineMySQL: - // Use mysql with password via MYSQL_PWD env var to avoid cmdline exposure + // Use mysql with password via MYSQL_PWD env var to avoid cmdline exposure. + // Same tradeoff as above - password visible in /proc environment to root users. stmt := fmt.Sprintf("ALTER USER '%s'@'%%' IDENTIFIED BY '%s';", sqlStringLiteral(username), sqlStringLiteral(targetPassword)) return []string{"sh", "-c", "MYSQL_PWD='" + sqlStringLiteral(authPassword) + "' mysql -u " + sqlStringLiteral(username) + " --execute='" + stmt + "'"} } diff --git a/internal/platform/config.go b/internal/platform/config.go index 84fd434c0..a4d5b4e59 100644 --- a/internal/platform/config.go +++ b/internal/platform/config.go @@ -3,7 +3,6 @@ package platform import ( "fmt" - "log/slog" "os" "github.com/joho/godotenv" @@ -117,11 +116,9 @@ func GetSecretsEncryptionKey() string { // validateConfig ensures required configuration values are present. func validateConfig(cfg *Config) error { if cfg.StorageSecret == "" { - slog.Error("STORAGE_SECRET environment variable is required") return fmt.Errorf("STORAGE_SECRET environment variable is required") } if cfg.PowerDNSAPIKey == "" { - slog.Error("POWERDNS_API_KEY environment variable is required") return fmt.Errorf("POWERDNS_API_KEY environment variable is required") } return nil diff --git a/internal/platform/config_test.go b/internal/platform/config_test.go index a2d2398ed..211d4e411 100644 --- a/internal/platform/config_test.go +++ b/internal/platform/config_test.go @@ -36,6 +36,14 @@ func TestNewConfig(t *testing.T) { require.NoError(t, err) assert.Equal(t, "9090", cfg.Port) }) + + t.Run("Missing POWERDNS_API_KEY fails", func(t *testing.T) { + os.Setenv("STORAGE_SECRET", "test-secret") + os.Unsetenv("POWERDNS_API_KEY") + _, err := NewConfig() + require.Error(t, err) + assert.Contains(t, err.Error(), "POWERDNS_API_KEY") + }) } func TestGetEnv(t *testing.T) { From bcd67d4df8d1f9e74a7c3060f5f41e2b1a7fb2d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Tue, 5 May 2026 17:37:28 +0300 Subject: [PATCH 05/11] ci: add required environment variables to E2E workflow Adds STORAGE_SECRET and POWERDNS_API_KEY to both the Setup Infrastructure and Start API steps in the E2E workflow. These were previously required by config validation but missing from CI, causing test failures. Also removes the fallback default for POWERDNS_API_KEY in docker-compose.yml so that missing env vars surface as errors rather than silently using an insecure default. --- .env.example | 7 ++++++- .github/workflows/e2e.yml | 6 ++++++ docker-compose.yml | 3 ++- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/.env.example b/.env.example index d5e8f7f97..c0151a0be 100644 --- a/.env.example +++ b/.env.example @@ -45,5 +45,10 @@ JAEGER_ENDPOINT=http://localhost:4318 # PowerDNS Configuration POWERDNS_API_URL=http://localhost:8081 -POWERDNS_API_KEY=thecloud-dns-secret +# POWERDNS_API_KEY is REQUIRED - must be set to a secure value +POWERDNS_API_KEY=your-secure-dns-api-key POWERDNS_SERVER_ID=localhost + +# Storage Configuration +# STORAGE_SECRET is REQUIRED for presigned URL signing - must be set to a secure value +STORAGE_SECRET=your-secure-storage-secret diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 86a13ed8c..7d45b38e3 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -35,6 +35,8 @@ jobs: SECRETS_ENCRYPTION_KEY: abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789 JWT_SECRET: e2e-test-secret-key-that-is-long-enough-for-32-chars DATABASE_READ_URL: postgres://cloud:cloud@postgres:5432/cloud?sslmode=disable + STORAGE_SECRET: e2e-test-storage-secret-key + POWERDNS_API_KEY: e2e-test-powerdns-api-key run: | # Start internal dependencies docker compose up -d postgres redis jaeger @@ -67,6 +69,8 @@ jobs: JWT_SECRET: e2e-test-secret-key-that-is-long-enough-for-32-chars DATABASE_READ_URL: postgres://cloud:cloud@postgres:5432/cloud?sslmode=disable TRACING_ENABLED: false + STORAGE_SECRET: e2e-test-storage-secret-key + POWERDNS_API_KEY: e2e-test-powerdns-api-key run: | # Export variables so docker-compose can access them export SECRETS_ENCRYPTION_KEY="${SECRETS_ENCRYPTION_KEY}" @@ -76,6 +80,8 @@ jobs: export DB_PORT_MAPPING="${DB_PORT_MAPPING}" export COMPOSE_PROJECT_NAME="${COMPOSE_PROJECT_NAME}" export DOCKER_DEFAULT_NETWORK="${DOCKER_DEFAULT_NETWORK}" + export STORAGE_SECRET="${STORAGE_SECRET}" + export POWERDNS_API_KEY="${POWERDNS_API_KEY}" # Ensure Docker socket has proper permissions for API to create containers sudo chmod 666 /var/run/docker.sock diff --git a/docker-compose.yml b/docker-compose.yml index e86549bd2..bcd5185c4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -105,7 +105,8 @@ services: - TRACING_ENABLED=${TRACING_ENABLED:-false} - JAEGER_ENDPOINT=http://jaeger:4318 - POWERDNS_API_URL=http://powerdns:8081 - - POWERDNS_API_KEY=${POWERDNS_API_KEY:-thecloud-dns-secret} + - POWERDNS_API_KEY=${POWERDNS_API_KEY} + - STORAGE_SECRET=${STORAGE_SECRET} - DOCKER_DEFAULT_NETWORK=${DOCKER_DEFAULT_NETWORK:-cloud-network} volumes: - /var/run/docker.sock:/var/run/docker.sock From 06b3f0394daf868543e5b5c21958e394308166e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Tue, 5 May 2026 18:26:21 +0300 Subject: [PATCH 06/11] feat: add credential version tracking with automatic cleanup Track rotation count in database record. Each rotation stores at versioned Vault path (v{count}) and cleans up the previous version after successful rotation. - Add credential_version column to databases table - Increment version on each rotation and compute v{n} path - Delete previous versioned path from Vault after successful rotation - Update repository queries and unit tests for new field --- internal/core/domain/database.go | 1 + internal/core/services/database.go | 15 +++-- internal/core/services/database_vault_test.go | 67 ++++++++++++------- .../repositories/postgres/database_repo.go | 20 +++--- .../postgres/database_repo_unit_test.go | 22 +++--- .../108_add_credential_version.down.sql | 7 ++ .../108_add_credential_version.up.sql | 7 ++ 7 files changed, 89 insertions(+), 50 deletions(-) create mode 100644 internal/repositories/postgres/migrations/108_add_credential_version.down.sql create mode 100644 internal/repositories/postgres/migrations/108_add_credential_version.up.sql diff --git a/internal/core/domain/database.go b/internal/core/domain/database.go index 699c6980c..e782edeb4 100644 --- a/internal/core/domain/database.go +++ b/internal/core/domain/database.go @@ -73,4 +73,5 @@ type Database struct { KmsKeyID string `json:"kms_key_id,omitempty"` EncryptedVolume bool `json:"encrypted_volume"` VolumeKeyRef string `json:"volume_key_ref,omitempty"` + CredentialVersion int `json:"-"` } diff --git a/internal/core/services/database.go b/internal/core/services/database.go index c104b023f..8b8be1724 100644 --- a/internal/core/services/database.go +++ b/internal/core/services/database.go @@ -869,8 +869,9 @@ func (s *DatabaseService) doRotateCredentials(ctx context.Context, id uuid.UUID, if vaultPath == "" { vaultPath = s.getVaultPath(db.ID) } - // Use versioned path to avoid split-brain: store new cred BEFORE updating DB - versionedPath := vaultPath + "/v2" + // Increment version and compute new versioned path + newVersion := db.CredentialVersion + 1 + versionedPath := vaultPath + "/v" + strconv.Itoa(newVersion) if err := s.secrets.StoreSecret(ctx, versionedPath, map[string]interface{}{"password": newPassword}); err != nil { return errors.Wrap(errors.Internal, "failed to store new credential in vault", err) } @@ -895,14 +896,16 @@ func (s *DatabaseService) doRotateCredentials(ctx context.Context, id uuid.UUID, // 3. Update DB record to point to new versioned path (ONLY after DB confirmed updated) // The old path still has the old password for rollback if needed db.CredentialPath = versionedPath + db.CredentialVersion = newVersion if err := s.repo.Update(ctx, db); err != nil { return errors.Wrap(errors.Internal, "failed to update DB credential path", err) } - // 4. Update old path atomically (for backwards compatibility) - // In future: background cleanup of old versions - if err := s.secrets.StoreSecret(ctx, vaultPath, map[string]interface{}{"password": newPassword}); err != nil { - s.logger.Warn("failed to update current vault path, versioned path is primary", "path", vaultPath, "error", err) + // 4. Cleanup old versioned path from Vault + oldVersionedPath := vaultPath + "/v" + strconv.Itoa(db.CredentialVersion-1) + if err := s.secrets.DeleteSecret(ctx, oldVersionedPath); err != nil { + // Log but don't fail - old version may not exist if this is first rotation + s.logger.Warn("failed to cleanup old credential version from vault", "path", oldVersionedPath, "error", err) } // 4. If pooler is enabled, restart it to pick up new credentials diff --git a/internal/core/services/database_vault_test.go b/internal/core/services/database_vault_test.go index 3b5ad2fba..de8e05f25 100644 --- a/internal/core/services/database_vault_test.go +++ b/internal/core/services/database_vault_test.go @@ -62,40 +62,50 @@ func TestDatabaseService_RotateCredentials(t *testing.T) { ctx := context.Background() dbID := uuid.New() db := &domain.Database{ - ID: dbID, - UserID: uuid.New(), - Name: "test-db", - Engine: domain.EnginePostgres, - Username: "cloud_user", - ContainerID: "cid-1", - CredentialPath: "secret/rds/" + dbID.String() + "/credentials", + ID: dbID, + UserID: uuid.New(), + Name: "test-db", + Engine: domain.EnginePostgres, + Username: "cloud_user", + ContainerID: "cid-1", + CredentialPath: "secret/rds/" + dbID.String() + "/credentials", + CredentialVersion: 1, } t.Run("RotateCredentials_Success", func(t *testing.T) { - mockRepo.On("GetByID", mock.Anything, dbID).Return(db, nil).Once() - mockSecrets.On("GetSecret", mock.Anything, db.CredentialPath).Return(map[string]interface{}{"password": "old-pass"}, nil).Once() - - // 1. Store new credential at versioned path in Vault FIRST - versionedPath := db.CredentialPath + "/v2" + // Create a fresh db for this test to avoid mutation from previous test affecting this one + testDB := &domain.Database{ + ID: db.ID, + UserID: db.UserID, + Name: db.Name, + Engine: db.Engine, + Username: db.Username, + ContainerID: db.ContainerID, + CredentialPath: "secret/rds/" + dbID.String() + "/credentials", + CredentialVersion: 1, + } + mockRepo.On("GetByID", mock.Anything, dbID).Return(testDB, nil).Once() + mockSecrets.On("GetSecret", mock.Anything, testDB.CredentialPath).Return(map[string]interface{}{"password": "old-pass"}, nil).Once() + + // 1. Store new credential at versioned path in Vault FIRST (version 2 since db starts at version 1) + versionedPath := testDB.CredentialPath + "/v2" mockSecrets.On("StoreSecret", mock.Anything, versionedPath, mock.MatchedBy(func(data map[string]interface{}) bool { return data["password"] != "" })).Return(nil).Once() // 2. Execute ALTER USER in container - mockCompute.On("Exec", mock.Anything, db.ContainerID, mock.Anything).Return("ALTER ROLE", nil).Once() + mockCompute.On("Exec", mock.Anything, testDB.ContainerID, mock.Anything).Return("ALTER ROLE", nil).Once() - // 3. Update DB record to point to new versioned path + // 3. Update DB record to point to new versioned path and increment version mockRepo.On("Update", mock.Anything, mock.MatchedBy(func(d *domain.Database) bool { - return d.ID == dbID && d.CredentialPath == versionedPath + return d.ID == dbID && d.CredentialPath == versionedPath && d.CredentialVersion == 2 })).Return(nil).Once() - // 4. Update old path for backwards compatibility - mockSecrets.On("StoreSecret", mock.Anything, db.CredentialPath, mock.MatchedBy(func(data map[string]interface{}) bool { - return data["password"] != "" - })).Return(nil).Once() + // 4. Cleanup old versioned path from Vault (v1) + mockSecrets.On("DeleteSecret", mock.Anything, testDB.CredentialPath+"/v1").Return(nil).Once() mockEventSvc.On("RecordEvent", mock.Anything, "DATABASE_CREDENTIALS_ROTATE", dbID.String(), "DATABASE", mock.Anything).Return(nil).Once() - mockAuditSvc.On("Log", mock.Anything, db.UserID, "database.rotate_credentials", "database", db.ID.String(), mock.Anything).Return(nil).Once() + mockAuditSvc.On("Log", mock.Anything, testDB.UserID, "database.rotate_credentials", "database", testDB.ID.String(), mock.Anything).Return(nil).Once() err := svc.RotateCredentials(ctx, dbID, "") require.NoError(t, err) @@ -106,10 +116,21 @@ func TestDatabaseService_RotateCredentials(t *testing.T) { }) t.Run("RotateCredentials_VaultFailure_WithoutDBChange", func(t *testing.T) { - mockRepo.On("GetByID", mock.Anything, dbID).Return(db, nil).Once() - mockSecrets.On("GetSecret", mock.Anything, db.CredentialPath).Return(map[string]interface{}{"password": "old-pass"}, nil).Once() + // Create a fresh db for this test + testDB := &domain.Database{ + ID: db.ID, + UserID: db.UserID, + Name: db.Name, + Engine: db.Engine, + Username: db.Username, + ContainerID: db.ContainerID, + CredentialPath: "secret/rds/" + dbID.String() + "/credentials", + CredentialVersion: 1, + } + mockRepo.On("GetByID", mock.Anything, dbID).Return(testDB, nil).Once() + mockSecrets.On("GetSecret", mock.Anything, testDB.CredentialPath).Return(map[string]interface{}{"password": "old-pass"}, nil).Once() // Vault store fails at step 1 (versioned path) - DB is NOT changed - versionedPath := db.CredentialPath + "/v2" + versionedPath := testDB.CredentialPath + "/v2" mockSecrets.On("StoreSecret", mock.Anything, versionedPath, mock.Anything).Return(fmt.Errorf("vault error")).Once() err := svc.RotateCredentials(ctx, dbID, "") diff --git a/internal/repositories/postgres/database_repo.go b/internal/repositories/postgres/database_repo.go index 2d0e7c295..117ad647d 100644 --- a/internal/repositories/postgres/database_repo.go +++ b/internal/repositories/postgres/database_repo.go @@ -26,11 +26,11 @@ func NewDatabaseRepository(db DB) *DatabaseRepository { func (r *DatabaseRepository) Create(ctx context.Context, db *domain.Database) error { query := ` - INSERT INTO databases (id, user_id, tenant_id, name, engine, version, status, role, primary_id, vpc_id, container_id, port, username, password, created_at, updated_at, allocated_storage, parameters, metrics_enabled, metrics_port, exporter_container_id, pooling_enabled, pooling_port, pooler_container_id, credential_path) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25) + INSERT INTO databases (id, user_id, tenant_id, name, engine, version, status, role, primary_id, vpc_id, container_id, port, username, password, created_at, updated_at, allocated_storage, parameters, metrics_enabled, metrics_port, exporter_container_id, pooling_enabled, pooling_port, pooler_container_id, credential_path, credential_version) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26) ` _, err := r.db.Exec(ctx, query, - db.ID, db.UserID, db.TenantID, db.Name, db.Engine, db.Version, db.Status, db.Role, db.PrimaryID, db.VpcID, db.ContainerID, db.Port, db.Username, db.Password, db.CreatedAt, db.UpdatedAt, db.AllocatedStorage, db.Parameters, db.MetricsEnabled, db.MetricsPort, db.ExporterContainerID, db.PoolingEnabled, db.PoolingPort, db.PoolerContainerID, db.CredentialPath, + db.ID, db.UserID, db.TenantID, db.Name, db.Engine, db.Version, db.Status, db.Role, db.PrimaryID, db.VpcID, db.ContainerID, db.Port, db.Username, db.Password, db.CreatedAt, db.UpdatedAt, db.AllocatedStorage, db.Parameters, db.MetricsEnabled, db.MetricsPort, db.ExporterContainerID, db.PoolingEnabled, db.PoolingPort, db.PoolerContainerID, db.CredentialPath, db.CredentialVersion, ) if err != nil { return errors.Wrap(errors.Internal, "failed to create database", err) @@ -41,7 +41,7 @@ func (r *DatabaseRepository) Create(ctx context.Context, db *domain.Database) er func (r *DatabaseRepository) GetByID(ctx context.Context, id uuid.UUID) (*domain.Database, error) { tenantID := appcontext.TenantIDFromContext(ctx) query := ` - SELECT id, user_id, tenant_id, name, engine, version, status, role, primary_id, vpc_id, COALESCE(container_id, ''), port, username, password, created_at, updated_at, allocated_storage, parameters, metrics_enabled, COALESCE(metrics_port, 0), COALESCE(exporter_container_id, ''), pooling_enabled, COALESCE(pooling_port, 0), COALESCE(pooler_container_id, ''), COALESCE(credential_path, '') + SELECT id, user_id, tenant_id, name, engine, version, status, role, primary_id, vpc_id, COALESCE(container_id, ''), port, username, password, created_at, updated_at, allocated_storage, parameters, metrics_enabled, COALESCE(metrics_port, 0), COALESCE(exporter_container_id, ''), pooling_enabled, COALESCE(pooling_port, 0), COALESCE(pooler_container_id, ''), COALESCE(credential_path, ''), COALESCE(credential_version, 1) FROM databases WHERE id = $1 AND tenant_id = $2 ` @@ -51,7 +51,7 @@ func (r *DatabaseRepository) GetByID(ctx context.Context, id uuid.UUID) (*domain func (r *DatabaseRepository) List(ctx context.Context) ([]*domain.Database, error) { tenantID := appcontext.TenantIDFromContext(ctx) query := ` - SELECT id, user_id, tenant_id, name, engine, version, status, role, primary_id, vpc_id, COALESCE(container_id, ''), port, username, password, created_at, updated_at, allocated_storage, parameters, metrics_enabled, COALESCE(metrics_port, 0), COALESCE(exporter_container_id, ''), pooling_enabled, COALESCE(pooling_port, 0), COALESCE(pooler_container_id, ''), COALESCE(credential_path, '') + SELECT id, user_id, tenant_id, name, engine, version, status, role, primary_id, vpc_id, COALESCE(container_id, ''), port, username, password, created_at, updated_at, allocated_storage, parameters, metrics_enabled, COALESCE(metrics_port, 0), COALESCE(exporter_container_id, ''), pooling_enabled, COALESCE(pooling_port, 0), COALESCE(pooler_container_id, ''), COALESCE(credential_path, ''), COALESCE(credential_version, 1) FROM databases WHERE tenant_id = $1 ORDER BY created_at DESC @@ -66,7 +66,7 @@ func (r *DatabaseRepository) List(ctx context.Context) ([]*domain.Database, erro func (r *DatabaseRepository) ListReplicas(ctx context.Context, primaryID uuid.UUID) ([]*domain.Database, error) { tenantID := appcontext.TenantIDFromContext(ctx) query := ` - SELECT id, user_id, tenant_id, name, engine, version, status, role, primary_id, vpc_id, COALESCE(container_id, ''), port, username, password, created_at, updated_at, allocated_storage, parameters, metrics_enabled, COALESCE(metrics_port, 0), COALESCE(exporter_container_id, ''), pooling_enabled, COALESCE(pooling_port, 0), COALESCE(pooler_container_id, ''), COALESCE(credential_path, '') + SELECT id, user_id, tenant_id, name, engine, version, status, role, primary_id, vpc_id, COALESCE(container_id, ''), port, username, password, created_at, updated_at, allocated_storage, parameters, metrics_enabled, COALESCE(metrics_port, 0), COALESCE(exporter_container_id, ''), pooling_enabled, COALESCE(pooling_port, 0), COALESCE(pooler_container_id, ''), COALESCE(credential_path, ''), COALESCE(credential_version, 1) FROM databases WHERE primary_id = $1 AND tenant_id = $2 ORDER BY created_at DESC @@ -82,7 +82,7 @@ func (r *DatabaseRepository) scanDatabase(row pgx.Row) (*domain.Database, error) var db domain.Database var engine, status, role string err := row.Scan( - &db.ID, &db.UserID, &db.TenantID, &db.Name, &engine, &db.Version, &status, &role, &db.PrimaryID, &db.VpcID, &db.ContainerID, &db.Port, &db.Username, &db.Password, &db.CreatedAt, &db.UpdatedAt, &db.AllocatedStorage, &db.Parameters, &db.MetricsEnabled, &db.MetricsPort, &db.ExporterContainerID, &db.PoolingEnabled, &db.PoolingPort, &db.PoolerContainerID, &db.CredentialPath, + &db.ID, &db.UserID, &db.TenantID, &db.Name, &engine, &db.Version, &status, &role, &db.PrimaryID, &db.VpcID, &db.ContainerID, &db.Port, &db.Username, &db.Password, &db.CreatedAt, &db.UpdatedAt, &db.AllocatedStorage, &db.Parameters, &db.MetricsEnabled, &db.MetricsPort, &db.ExporterContainerID, &db.PoolingEnabled, &db.PoolingPort, &db.PoolerContainerID, &db.CredentialPath, &db.CredentialVersion, ) if err != nil { if stdlib_errors.Is(err, pgx.ErrNoRows) { @@ -112,11 +112,11 @@ func (r *DatabaseRepository) scanDatabases(rows pgx.Rows) ([]*domain.Database, e func (r *DatabaseRepository) Update(ctx context.Context, db *domain.Database) error { query := ` UPDATE databases - SET name = $1, status = $2, role = $3, primary_id = $4, container_id = $5, port = $6, updated_at = $7, parameters = $8, metrics_enabled = $9, metrics_port = $10, exporter_container_id = $11, pooling_enabled = $12, pooling_port = $13, pooler_container_id = $14, allocated_storage = $15, credential_path = $16 - WHERE id = $17 AND tenant_id = $18 + SET name = $1, status = $2, role = $3, primary_id = $4, container_id = $5, port = $6, updated_at = $7, parameters = $8, metrics_enabled = $9, metrics_port = $10, exporter_container_id = $11, pooling_enabled = $12, pooling_port = $13, pooler_container_id = $14, allocated_storage = $15, credential_path = $16, credential_version = $17 + WHERE id = $18 AND tenant_id = $19 ` now := time.Now() - cmd, err := r.db.Exec(ctx, query, db.Name, db.Status, db.Role, db.PrimaryID, db.ContainerID, db.Port, now, db.Parameters, db.MetricsEnabled, db.MetricsPort, db.ExporterContainerID, db.PoolingEnabled, db.PoolingPort, db.PoolerContainerID, db.AllocatedStorage, db.CredentialPath, db.ID, db.TenantID) + cmd, err := r.db.Exec(ctx, query, db.Name, db.Status, db.Role, db.PrimaryID, db.ContainerID, db.Port, now, db.Parameters, db.MetricsEnabled, db.MetricsPort, db.ExporterContainerID, db.PoolingEnabled, db.PoolingPort, db.PoolerContainerID, db.AllocatedStorage, db.CredentialPath, db.CredentialVersion, db.ID, db.TenantID) if err != nil { return errors.Wrap(errors.Internal, "failed to update database", err) } diff --git a/internal/repositories/postgres/database_repo_unit_test.go b/internal/repositories/postgres/database_repo_unit_test.go index 5c660baf1..4519cceba 100644 --- a/internal/repositories/postgres/database_repo_unit_test.go +++ b/internal/repositories/postgres/database_repo_unit_test.go @@ -50,7 +50,7 @@ func TestDatabaseRepository_Create(t *testing.T) { } mock.ExpectExec("INSERT INTO databases"). - WithArgs(db.ID, db.UserID, db.TenantID, db.Name, db.Engine, db.Version, db.Status, db.Role, db.PrimaryID, db.VpcID, db.ContainerID, db.Port, db.Username, db.Password, db.CreatedAt, db.UpdatedAt, db.AllocatedStorage, db.Parameters, db.MetricsEnabled, db.MetricsPort, db.ExporterContainerID, db.PoolingEnabled, db.PoolingPort, db.PoolerContainerID, db.CredentialPath). + WithArgs(db.ID, db.UserID, db.TenantID, db.Name, db.Engine, db.Version, db.Status, db.Role, db.PrimaryID, db.VpcID, db.ContainerID, db.Port, db.Username, db.Password, db.CreatedAt, db.UpdatedAt, db.AllocatedStorage, db.Parameters, db.MetricsEnabled, db.MetricsPort, db.ExporterContainerID, db.PoolingEnabled, db.PoolingPort, db.PoolerContainerID, db.CredentialPath, db.CredentialVersion). WillReturnResult(pgxmock.NewResult("INSERT", 1)) err = repo.Create(context.Background(), db) @@ -70,10 +70,10 @@ func TestDatabaseRepository_GetByID(t *testing.T) { ctx := appcontext.WithTenantID(context.Background(), tenantID) now := time.Now() - mock.ExpectQuery("SELECT id, user_id, tenant_id, name, engine, version, status, role, primary_id, vpc_id, COALESCE\\(container_id, ''\\), port, username, password, created_at, updated_at, allocated_storage, parameters, metrics_enabled, COALESCE\\(metrics_port, 0\\), COALESCE\\(exporter_container_id, ''\\), pooling_enabled, COALESCE\\(pooling_port, 0\\), COALESCE\\(pooler_container_id, ''\\), COALESCE\\(credential_path, ''\\) FROM databases"). + mock.ExpectQuery("SELECT id, user_id, tenant_id, name, engine, version, status, role, primary_id, vpc_id, COALESCE\\(container_id, ''\\), port, username, password, created_at, updated_at, allocated_storage, parameters, metrics_enabled, COALESCE\\(metrics_port, 0\\), COALESCE\\(exporter_container_id, ''\\), pooling_enabled, COALESCE\\(pooling_port, 0\\), COALESCE\\(pooler_container_id, ''\\), COALESCE\\(credential_path, ''\\), COALESCE\\(credential_version, 1\\) FROM databases"). WithArgs(id, tenantID). - WillReturnRows(pgxmock.NewRows([]string{"id", "user_id", "tenant_id", "name", "engine", "version", "status", "role", "primary_id", "vpc_id", "container_id", "port", "username", "password", "created_at", "updated_at", "allocated_storage", "parameters", "metrics_enabled", "metrics_port", "exporter_container_id", "pooling_enabled", "pooling_port", "pooler_container_id", "credential_path"}). - AddRow(id, userID, tenantID, "test-db", string(domain.EnginePostgres), "16", string(domain.DatabaseStatusCreating), string(domain.RolePrimary), nil, nil, "cid-1", 5432, "admin", "password", now, now, 10, map[string]string{"k": "v"}, true, 9187, "exp-cid", true, 6432, "pool-cid", "secret/rds/db1")) + WillReturnRows(pgxmock.NewRows([]string{"id", "user_id", "tenant_id", "name", "engine", "version", "status", "role", "primary_id", "vpc_id", "container_id", "port", "username", "password", "created_at", "updated_at", "allocated_storage", "parameters", "metrics_enabled", "metrics_port", "exporter_container_id", "pooling_enabled", "pooling_port", "pooler_container_id", "credential_path", "credential_version"}). + AddRow(id, userID, tenantID, "test-db", string(domain.EnginePostgres), "16", string(domain.DatabaseStatusCreating), string(domain.RolePrimary), nil, nil, "cid-1", 5432, "admin", "password", now, now, 10, map[string]string{"k": "v"}, true, 9187, "exp-cid", true, 6432, "pool-cid", "secret/rds/db1", 1)) db, err := repo.GetByID(ctx, id) require.NoError(t, err) @@ -101,10 +101,10 @@ func TestDatabaseRepository_List(t *testing.T) { ctx := appcontext.WithTenantID(context.Background(), tenantID) now := time.Now() - mock.ExpectQuery("SELECT id, user_id, tenant_id, name, engine, version, status, role, primary_id, vpc_id, COALESCE\\(container_id, ''\\), port, username, password, created_at, updated_at, allocated_storage, parameters, metrics_enabled, COALESCE\\(metrics_port, 0\\), COALESCE\\(exporter_container_id, ''\\), pooling_enabled, COALESCE\\(pooling_port, 0\\), COALESCE\\(pooler_container_id, ''\\), COALESCE\\(credential_path, ''\\) FROM databases"). + mock.ExpectQuery("SELECT id, user_id, tenant_id, name, engine, version, status, role, primary_id, vpc_id, COALESCE\\(container_id, ''\\), port, username, password, created_at, updated_at, allocated_storage, parameters, metrics_enabled, COALESCE\\(metrics_port, 0\\), COALESCE\\(exporter_container_id, ''\\), pooling_enabled, COALESCE\\(pooling_port, 0\\), COALESCE\\(pooler_container_id, ''\\), COALESCE\\(credential_path, ''\\), COALESCE\\(credential_version, 1\\) FROM databases"). WithArgs(tenantID). - WillReturnRows(pgxmock.NewRows([]string{"id", "user_id", "tenant_id", "name", "engine", "version", "status", "role", "primary_id", "vpc_id", "container_id", "port", "username", "password", "created_at", "updated_at", "allocated_storage", "parameters", "metrics_enabled", "metrics_port", "exporter_container_id", "pooling_enabled", "pooling_port", "pooler_container_id", "credential_path"}). - AddRow(uuid.New(), userID, tenantID, "test-db", string(domain.EnginePostgres), "16", string(domain.DatabaseStatusCreating), string(domain.RolePrimary), nil, nil, "cid-1", 5432, "admin", "password", now, now, 20, map[string]string{}, false, 0, "", false, 0, "", "")) + WillReturnRows(pgxmock.NewRows([]string{"id", "user_id", "tenant_id", "name", "engine", "version", "status", "role", "primary_id", "vpc_id", "container_id", "port", "username", "password", "created_at", "updated_at", "allocated_storage", "parameters", "metrics_enabled", "metrics_port", "exporter_container_id", "pooling_enabled", "pooling_port", "pooler_container_id", "credential_path", "credential_version"}). + AddRow(uuid.New(), userID, tenantID, "test-db", string(domain.EnginePostgres), "16", string(domain.DatabaseStatusCreating), string(domain.RolePrimary), nil, nil, "cid-1", 5432, "admin", "password", now, now, 20, map[string]string{}, false, 0, "", false, 0, "", "", 1)) databases, err := repo.List(ctx) require.NoError(t, err) @@ -125,10 +125,10 @@ func TestDatabaseRepository_ListReplicas(t *testing.T) { ctx := appcontext.WithTenantID(context.Background(), tenantID) now := time.Now() - mock.ExpectQuery("SELECT id, user_id, tenant_id, name, engine, version, status, role, primary_id, vpc_id, COALESCE\\(container_id, ''\\), port, username, password, created_at, updated_at, allocated_storage, parameters, metrics_enabled, COALESCE\\(metrics_port, 0\\), COALESCE\\(exporter_container_id, ''\\), pooling_enabled, COALESCE\\(pooling_port, 0\\), COALESCE\\(pooler_container_id, ''\\), COALESCE\\(credential_path, ''\\) FROM databases WHERE primary_id = \\$1 AND tenant_id = \\$2"). + mock.ExpectQuery("SELECT id, user_id, tenant_id, name, engine, version, status, role, primary_id, vpc_id, COALESCE\\(container_id, ''\\), port, username, password, created_at, updated_at, allocated_storage, parameters, metrics_enabled, COALESCE\\(metrics_port, 0\\), COALESCE\\(exporter_container_id, ''\\), pooling_enabled, COALESCE\\(pooling_port, 0\\), COALESCE\\(pooler_container_id, ''\\), COALESCE\\(credential_path, ''\\), COALESCE\\(credential_version, 1\\) FROM databases WHERE primary_id = \\$1 AND tenant_id = \\$2"). WithArgs(primaryID, tenantID). - WillReturnRows(pgxmock.NewRows([]string{"id", "user_id", "tenant_id", "name", "engine", "version", "status", "role", "primary_id", "vpc_id", "container_id", "port", "username", "password", "created_at", "updated_at", "allocated_storage", "parameters", "metrics_enabled", "metrics_port", "exporter_container_id", "pooling_enabled", "pooling_port", "pooler_container_id", "credential_path"}). - AddRow(uuid.New(), uuid.New(), tenantID, "replica-1", string(domain.EnginePostgres), "16", string(domain.DatabaseStatusRunning), string(domain.RoleReplica), &primaryID, nil, "cid-2", 5432, "admin", "password", now, now, 20, map[string]string{}, false, 0, "", false, 0, "", "secret/rds/replica1")) + WillReturnRows(pgxmock.NewRows([]string{"id", "user_id", "tenant_id", "name", "engine", "version", "status", "role", "primary_id", "vpc_id", "container_id", "port", "username", "password", "created_at", "updated_at", "allocated_storage", "parameters", "metrics_enabled", "metrics_port", "exporter_container_id", "pooling_enabled", "pooling_port", "pooler_container_id", "credential_path", "credential_version"}). + AddRow(uuid.New(), uuid.New(), tenantID, "replica-1", string(domain.EnginePostgres), "16", string(domain.DatabaseStatusRunning), string(domain.RoleReplica), &primaryID, nil, "cid-2", 5432, "admin", "password", now, now, 20, map[string]string{}, false, 0, "", false, 0, "", "secret/rds/replica1", 1)) replicas, err := repo.ListReplicas(ctx, primaryID) require.NoError(t, err) @@ -166,7 +166,7 @@ func TestDatabaseRepository_Update(t *testing.T) { } mock.ExpectExec("UPDATE databases"). - WithArgs(db.Name, db.Status, db.Role, db.PrimaryID, db.ContainerID, db.Port, pgxmock.AnyArg(), db.Parameters, db.MetricsEnabled, db.MetricsPort, db.ExporterContainerID, db.PoolingEnabled, db.PoolingPort, db.PoolerContainerID, db.AllocatedStorage, db.CredentialPath, db.ID, db.TenantID). + WithArgs(db.Name, db.Status, db.Role, db.PrimaryID, db.ContainerID, db.Port, pgxmock.AnyArg(), db.Parameters, db.MetricsEnabled, db.MetricsPort, db.ExporterContainerID, db.PoolingEnabled, db.PoolingPort, db.PoolerContainerID, db.AllocatedStorage, db.CredentialPath, db.CredentialVersion, db.ID, db.TenantID). WillReturnResult(pgxmock.NewResult("UPDATE", 1)) err = repo.Update(context.Background(), db) diff --git a/internal/repositories/postgres/migrations/108_add_credential_version.down.sql b/internal/repositories/postgres/migrations/108_add_credential_version.down.sql new file mode 100644 index 000000000..4d943b898 --- /dev/null +++ b/internal/repositories/postgres/migrations/108_add_credential_version.down.sql @@ -0,0 +1,7 @@ +-- +goose Up + +ALTER TABLE databases DROP COLUMN credential_version; + +-- +goose Down + +ALTER TABLE databases ADD COLUMN credential_version INT DEFAULT 1 NOT NULL; diff --git a/internal/repositories/postgres/migrations/108_add_credential_version.up.sql b/internal/repositories/postgres/migrations/108_add_credential_version.up.sql new file mode 100644 index 000000000..358bbd3b7 --- /dev/null +++ b/internal/repositories/postgres/migrations/108_add_credential_version.up.sql @@ -0,0 +1,7 @@ +-- +goose Up + +ALTER TABLE databases ADD COLUMN credential_version INT DEFAULT 1 NOT NULL; + +-- +goose Down + +ALTER TABLE databases DROP COLUMN credential_version; From 043006ea088d78f616bab0f308236e87bfe42296 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Tue, 5 May 2026 21:32:41 +0300 Subject: [PATCH 07/11] metrics: add credential cleanup failure counter Add thecloud_credential_cleanup_failures_total Prometheus counter to track when old credential version cleanup fails in Vault. This allows ops to monitor and alert if cleanup is consistently failing. Incremented in doRotateCredentials when DeleteSecret fails. --- internal/core/services/database.go | 1 + internal/platform/metrics.go | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/internal/core/services/database.go b/internal/core/services/database.go index 8b8be1724..1ac14f18e 100644 --- a/internal/core/services/database.go +++ b/internal/core/services/database.go @@ -906,6 +906,7 @@ func (s *DatabaseService) doRotateCredentials(ctx context.Context, id uuid.UUID, if err := s.secrets.DeleteSecret(ctx, oldVersionedPath); err != nil { // Log but don't fail - old version may not exist if this is first rotation s.logger.Warn("failed to cleanup old credential version from vault", "path", oldVersionedPath, "error", err) + platform.CredentialCleanupFailures.Inc() } // 4. If pooler is enabled, restart it to pick up new credentials diff --git a/internal/platform/metrics.go b/internal/platform/metrics.go index e685758ea..bae12e10b 100644 --- a/internal/platform/metrics.go +++ b/internal/platform/metrics.go @@ -95,4 +95,10 @@ var ( Name: "thecloud_api_keys_active", Help: "Total number of active API keys", }) + + // Credential rotation metrics + CredentialCleanupFailures = promauto.NewCounter(prometheus.CounterOpts{ + Name: "thecloud_credential_cleanup_failures_total", + Help: "Total number of credential version cleanup failures in Vault", + }) ) From dad5185532793e2e3ca7fd26c85a33e8eebba50e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Tue, 5 May 2026 21:44:28 +0300 Subject: [PATCH 08/11] ci: add required env vars to CI and related workflows SECRETS_ENCRYPTION_KEY, STORAGE_SECRET, and POWERDNS_API_KEY are now required at startup. Adding them to workflow-level env so they apply to all jobs automatically. --- .github/workflows/ci.yml | 10 ++++++++-- .github/workflows/coverage.yml | 5 +++++ .github/workflows/database-replication.yml | 5 ++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1ea753dea..17ad16ad4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,6 +2,9 @@ name: CI env: FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true + SECRETS_ENCRYPTION_KEY: abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789 + STORAGE_SECRET: ci-test-storage-secret-key + POWERDNS_API_KEY: ci-test-powerdns-api-key on: push: @@ -245,18 +248,21 @@ jobs: - name: Test Backend Switching env: DATABASE_URL: postgres://cloud:cloud@127.0.0.1:5433/cloud?sslmode=disable + SECRETS_ENCRYPTION_KEY: abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789 + STORAGE_SECRET: ci-test-storage-secret-key + POWERDNS_API_KEY: ci-test-powerdns-api-key run: | # Ensure clean DB for switching tests PGPASSWORD=cloud psql -h 127.0.0.1 -p 5433 -U cloud -d postgres -c "DROP DATABASE IF EXISTS cloud;" PGPASSWORD=cloud psql -h 127.0.0.1 -p 5433 -U cloud -d postgres -c "CREATE DATABASE cloud;" go run ./cmd/api -migrate-only - + # Test with Docker backend (default) export TEST_DOCKER_NETWORK=cloud-network docker network create ${TEST_DOCKER_NETWORK} || true export COMPUTE_BACKEND=docker go test -p 1 -v -run TestInstanceService ./internal/core/services/... - + # Test with Libvirt backend export COMPUTE_BACKEND=libvirt go test -p 1 -v -run TestInstanceService ./internal/core/services/... diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 6f3e1c976..84b24cc5a 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -1,5 +1,10 @@ name: Coverage Gate +env: + SECRETS_ENCRYPTION_KEY: abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789 + STORAGE_SECRET: ci-test-storage-secret-key + POWERDNS_API_KEY: ci-test-powerdns-api-key + on: pull_request: branches: [ main ] diff --git a/.github/workflows/database-replication.yml b/.github/workflows/database-replication.yml index 20fe3668d..31f5237a9 100644 --- a/.github/workflows/database-replication.yml +++ b/.github/workflows/database-replication.yml @@ -102,12 +102,15 @@ jobs: DATABASE_URL: postgres://cloud:cloud@127.0.0.1:5433/cloud?sslmode=disable COMPUTE_BACKEND: docker TEST_DOCKER_NETWORK: cloud-network + SECRETS_ENCRYPTION_KEY: abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789 + STORAGE_SECRET: ci-test-storage-secret-key + POWERDNS_API_KEY: ci-test-powerdns-api-key run: | # Start the API server in the background # The server will run migrations automatically on startup ./api > api.log 2>&1 & echo $! > api.pid - + # Wait for the server to be ready echo "Waiting for API server to start..." timeout 90s bash -c 'until curl -s http://localhost:8080/health/live; do sleep 2; done' || (echo "API Server Logs:" && cat api.log && exit 1) From 573b0c991fec8a68dacf78f45ad9d8f25e1743db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Wed, 6 May 2026 18:25:26 +0300 Subject: [PATCH 09/11] fix: use PGPASSWORD for psql auth and authPassword for authentication Previously used POSTGRES_PASSWORD (wrong env var) and targetPassword (wrong - it was setting the new password instead of using current password for authentication). The psql client uses PGPASSWORD (libpq standard) not POSTGRES_PASSWORD. --- internal/core/services/database.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/internal/core/services/database.go b/internal/core/services/database.go index 1ac14f18e..1951e913e 100644 --- a/internal/core/services/database.go +++ b/internal/core/services/database.go @@ -971,12 +971,13 @@ func postgresIdentifier(id string) string { func (s *DatabaseService) buildPasswordChangeCmd(engine domain.DatabaseEngine, username, authPassword, targetPassword string) []string { switch engine { case domain.EnginePostgres: - // Use psql with password via env var (POSTGRES_PASSWORD_FILE not supported by psql). + // Use PGPASSWORD env var for authentication (libpq standard). // Note: While env vars avoid cmdline exposure, the password is still visible to // root users via /proc/$pid/environ. This is a known tradeoff - the alternative // (stdin password) requires more complex interaction patterns with psql. + // authPassword (current password) authenticates the connection; targetPassword is the new password being set. stmt := fmt.Sprintf("ALTER USER %s WITH PASSWORD '%s';", postgresIdentifier(username), sqlStringLiteral(targetPassword)) - return []string{"sh", "-c", "POSTGRES_PASSWORD='" + sqlStringLiteral(targetPassword) + "' psql -h 127.0.0.1 -U " + username + " -d postgres -c '" + stmt + "'"} + return []string{"sh", "-c", "PGPASSWORD='" + sqlStringLiteral(authPassword) + "' psql -h 127.0.0.1 -U " + username + " -d postgres -c '" + stmt + "'"} case domain.EngineMySQL: // Use mysql with password via MYSQL_PWD env var to avoid cmdline exposure. // Same tradeoff as above - password visible in /proc environment to root users. From 8c83b46b843abcab4921bc3e8e634f0f1949c18c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Wed, 6 May 2026 19:12:52 +0300 Subject: [PATCH 10/11] fix: remove goose markers from 108 .down.sql migration file All other .down.sql files in the migrations directory contain only the raw down SQL without goose markers. The 108 migration was created with both -- +goose Up/Down markers like a .up.sql file, causing the migration_test.go runFile parser to extract the wrong section when running down migrations (ADD instead of DROP). Also adds IF EXISTS for safer column dropping. --- .../migrations/108_add_credential_version.down.sql | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/internal/repositories/postgres/migrations/108_add_credential_version.down.sql b/internal/repositories/postgres/migrations/108_add_credential_version.down.sql index 4d943b898..1cd7d9b9f 100644 --- a/internal/repositories/postgres/migrations/108_add_credential_version.down.sql +++ b/internal/repositories/postgres/migrations/108_add_credential_version.down.sql @@ -1,7 +1 @@ --- +goose Up - -ALTER TABLE databases DROP COLUMN credential_version; - --- +goose Down - -ALTER TABLE databases ADD COLUMN credential_version INT DEFAULT 1 NOT NULL; +ALTER TABLE databases DROP COLUMN IF EXISTS credential_version; From df4b153aaf79f88eed5e9aaff470c2b537b1283e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Wed, 6 May 2026 19:37:04 +0300 Subject: [PATCH 11/11] ci: add required env vars to Benchmarks workflow The benchmark tests initialize services that require SECRETS_ENCRYPTION_KEY, STORAGE_SECRET, and POWERDNS_API_KEY. Without these, go test exits with status 1 and no output, causing bench.txt to be empty and the benchmark-action to fail with "No benchmark result found". --- .github/workflows/benchmarks.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 54a7a4f61..3bbd9ceba 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -6,6 +6,11 @@ on: pull_request: branches: [ main ] +env: + SECRETS_ENCRYPTION_KEY: abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789 + STORAGE_SECRET: ci-test-storage-secret-key + POWERDNS_API_KEY: ci-test-powerdns-api-key + jobs: benchmark: name: Run Benchmarks