diff --git a/internal/config/config.go b/internal/config/config.go index 75c829e64..26c6ed81d 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -157,6 +157,7 @@ func ResolveConfig() (*Config, error) { Features: viperInstance.GetStringSlice(FeaturesKey), Labels: resolveLabels(), LibDir: viperInstance.GetString(LibDirPathKey), + ExternalDataSource: resolveExternalDataSource(), SyslogServer: resolveSyslogServer(), } @@ -475,6 +476,7 @@ func registerFlags() { registerCollectorFlags(fs) registerClientFlags(fs) registerDataPlaneFlags(fs) + registerExternalDataSourceFlags(fs) fs.SetNormalizeFunc(normalizeFunc) @@ -489,6 +491,24 @@ func registerFlags() { }) } +func registerExternalDataSourceFlags(fs *flag.FlagSet) { + fs.String( + ExternalDataSourceProxyUrlKey, + DefExternalDataSourceProxyUrl, + "Url to the proxy service to fetch the external file.", + ) + fs.StringSlice( + ExternalDataSourceAllowDomainsKey, + []string{}, + "List of allowed domains for external data sources.", + ) + fs.Int64( + ExternalDataSourceMaxBytesKey, + DefExternalDataSourceMaxBytes, + "Maximum size in bytes for external data sources.", + ) +} + func registerDataPlaneFlags(fs *flag.FlagSet) { fs.Duration( NginxReloadMonitoringPeriodKey, @@ -628,6 +648,11 @@ func registerClientFlags(fs *flag.FlagSet) { DefMaxFileSize, "Max file size in bytes.", ) + fs.Duration( + ClientFileDownloadTimeoutKey, + DefClientFileDownloadTimeout, + "Timeout value in seconds, to downloading file for config apply.", + ) fs.Int( ClientGRPCMaxParallelFileOperationsKey, @@ -1120,6 +1145,7 @@ func resolveClient() *Client { RandomizationFactor: viperInstance.GetFloat64(ClientBackoffRandomizationFactorKey), Multiplier: viperInstance.GetFloat64(ClientBackoffMultiplierKey), }, + FileDownloadTimeout: viperInstance.GetDuration(ClientFileDownloadTimeoutKey), } } @@ -1560,3 +1586,35 @@ func areCommandServerProxyTLSSettingsSet() bool { viperInstance.IsSet(CommandServerProxyTLSSkipVerifyKey) || viperInstance.IsSet(CommandServerProxyTLSServerNameKey) } + +func resolveExternalDataSource() *ExternalDataSource { + proxyURLStruct := ProxyURL{ + URL: viperInstance.GetString(ExternalDataSourceProxyUrlKey), + } + externalDataSource := &ExternalDataSource{ + ProxyURL: proxyURLStruct, + AllowedDomains: viperInstance.GetStringSlice(ExternalDataSourceAllowDomainsKey), + MaxBytes: viperInstance.GetInt64(ExternalDataSourceMaxBytesKey), + } + + if err := validateAllowedDomains(externalDataSource.AllowedDomains); err != nil { + return nil + } + + return externalDataSource +} + +func validateAllowedDomains(domains []string) error { + if len(domains) == 0 { + return nil + } + + for _, domain := range domains { + if strings.ContainsAny(domain, "/\\ ") || domain == "" { + slog.Error("domain is not specified in allowed_domains") + return errors.New("invalid domain found in allowed_domains") + } + } + + return nil +} diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 5c4384ba8..b35818ebe 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -1425,6 +1425,13 @@ func createConfig() *Config { config.FeatureCertificates, config.FeatureFileWatcher, config.FeatureMetrics, config.FeatureAPIAction, config.FeatureLogsNap, }, + ExternalDataSource: &ExternalDataSource{ + ProxyURL: ProxyURL{ + URL: "", + }, + AllowedDomains: nil, + MaxBytes: 0, + }, } } @@ -1569,3 +1576,73 @@ func TestValidateLabel(t *testing.T) { }) } } + +func TestValidateAllowedDomains(t *testing.T) { + tests := []struct { + name string + domains []string + wantErr bool + }{ + { + name: "Test 1: Success: Empty slice", + domains: []string{}, + wantErr: false, + }, + { + name: "Test 2: Success: Nil slice", + domains: nil, + wantErr: false, + }, + { + name: "Test 3: Success: Valid domains", + domains: []string{"example.com", "api.nginx.com", "sub.domain.io"}, + wantErr: false, + }, + { + name: "Test 4: Failure: Domain contains space", + domains: []string{"valid.com", "bad domain.com"}, + wantErr: true, + }, + { + name: "Test 5: Failure: Empty string domain", + domains: []string{"valid.com", ""}, + wantErr: true, + }, + { + name: "Test 6: Failure: Domain contains forward slash /", + domains: []string{"domain.com/path"}, + wantErr: true, + }, + { + name: "Test 7: Failure: Domain contains backward slash \\", + domains: []string{"domain.com\\path"}, + wantErr: true, + }, + { + name: "Test 8: Failure: Mixed valid and invalid (first is invalid)", + domains: []string{" only.com", "good.com"}, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var logBuffer bytes.Buffer + logHandler := slog.NewTextHandler(&logBuffer, &slog.HandlerOptions{Level: slog.LevelError}) + + originalLogger := slog.Default() + slog.SetDefault(slog.New(logHandler)) + defer slog.SetDefault(originalLogger) + + actualErr := validateAllowedDomains(tt.domains) + + if tt.wantErr { + require.Error(t, actualErr, "Expected an error but got nil.") + assert.Contains(t, logBuffer.String(), "domain is not specified in allowed_domains", + "Expected the error log message to be present in the output.") + } else { + assert.NoError(t, actualErr, "Did not expect an error but got one: %v", actualErr) + } + }) + } +} diff --git a/internal/config/defaults.go b/internal/config/defaults.go index 0f1e08075..ed2cdbe90 100644 --- a/internal/config/defaults.go +++ b/internal/config/defaults.go @@ -82,6 +82,8 @@ const ( DefBackoffMaxInterval = 20 * time.Second DefBackoffMaxElapsedTime = 1 * time.Minute + DefClientFileDownloadTimeout = 60 * time.Second + // Watcher defaults DefInstanceWatcherMonitoringFrequency = 5 * time.Second DefInstanceHealthWatcherMonitoringFrequency = 5 * time.Second @@ -114,6 +116,9 @@ const ( // File defaults DefLibDir = "/var/lib/nginx-agent" + + DefExternalDataSourceProxyUrl = "" + DefExternalDataSourceMaxBytes = 100 * 1024 * 1024 ) func DefaultFeatures() []string { diff --git a/internal/config/flags.go b/internal/config/flags.go index d0f664540..135cacb71 100644 --- a/internal/config/flags.go +++ b/internal/config/flags.go @@ -25,6 +25,7 @@ const ( InstanceHealthWatcherMonitoringFrequencyKey = "watchers_instance_health_watcher_monitoring_frequency" FileWatcherKey = "watchers_file_watcher" LibDirPathKey = "lib_dir" + ExternalDataSourceRootKey = "external_data_source" ) var ( @@ -47,6 +48,7 @@ var ( ClientBackoffMaxElapsedTimeKey = pre(ClientRootKey) + "backoff_max_elapsed_time" ClientBackoffRandomizationFactorKey = pre(ClientRootKey) + "backoff_randomization_factor" ClientBackoffMultiplierKey = pre(ClientRootKey) + "backoff_multiplier" + ClientFileDownloadTimeoutKey = pre(ClientRootKey) + "file_download_timeout" CollectorConfigPathKey = pre(CollectorRootKey) + "config_path" CollectorAdditionalConfigPathsKey = pre(CollectorRootKey) + "additional_config_paths" @@ -141,6 +143,11 @@ var ( FileWatcherMonitoringFrequencyKey = pre(FileWatcherKey) + "monitoring_frequency" NginxExcludeFilesKey = pre(FileWatcherKey) + "exclude_files" + + ExternalDataSourceProxyKey = pre(ExternalDataSourceRootKey) + "proxy" + ExternalDataSourceProxyUrlKey = pre(ExternalDataSourceProxyKey) + "url" + ExternalDataSourceMaxBytesKey = pre(ExternalDataSourceRootKey) + "max_bytes" + ExternalDataSourceAllowDomainsKey = pre(ExternalDataSourceRootKey) + "allowed_domains" ) func pre(prefixes ...string) string { diff --git a/internal/config/types.go b/internal/config/types.go index 72eda1369..e553e0d40 100644 --- a/internal/config/types.go +++ b/internal/config/types.go @@ -36,21 +36,22 @@ func parseServerType(str string) (ServerType, bool) { type ( Config struct { - Command *Command `yaml:"command" mapstructure:"command"` - AuxiliaryCommand *Command `yaml:"auxiliary_command" mapstructure:"auxiliary_command"` - Log *Log `yaml:"log" mapstructure:"log"` - DataPlaneConfig *DataPlaneConfig `yaml:"data_plane_config" mapstructure:"data_plane_config"` - Client *Client `yaml:"client" mapstructure:"client"` - Collector *Collector `yaml:"collector" mapstructure:"collector"` - Watchers *Watchers `yaml:"watchers" mapstructure:"watchers"` - SyslogServer *SyslogServer `yaml:"syslog_server" mapstructure:"syslog_server"` - Labels map[string]any `yaml:"labels" mapstructure:"labels"` - Version string `yaml:"-"` - Path string `yaml:"-"` - UUID string `yaml:"-"` - LibDir string `yaml:"-"` - AllowedDirectories []string `yaml:"allowed_directories" mapstructure:"allowed_directories"` - Features []string `yaml:"features" mapstructure:"features"` + Command *Command `yaml:"command" mapstructure:"command"` + AuxiliaryCommand *Command `yaml:"auxiliary_command" mapstructure:"auxiliary_command"` + Log *Log `yaml:"log" mapstructure:"log"` + DataPlaneConfig *DataPlaneConfig `yaml:"data_plane_config" mapstructure:"data_plane_config"` + Client *Client `yaml:"client" mapstructure:"client"` + Collector *Collector `yaml:"collector" mapstructure:"collector"` + Watchers *Watchers `yaml:"watchers" mapstructure:"watchers"` + SyslogServer *SyslogServer `yaml:"syslog_server" mapstructure:"syslog_server"` + ExternalDataSource *ExternalDataSource `yaml:"external_data_source" mapstructure:"external_data_source"` + Labels map[string]any `yaml:"labels" mapstructure:"labels"` + Version string `yaml:"-"` + Path string `yaml:"-"` + UUID string `yaml:"-"` + LibDir string `yaml:"-"` + AllowedDirectories []string `yaml:"allowed_directories" mapstructure:"allowed_directories"` + Features []string `yaml:"features" mapstructure:"features"` } Log struct { @@ -74,9 +75,10 @@ type ( } Client struct { - HTTP *HTTP `yaml:"http" mapstructure:"http"` - Grpc *GRPC `yaml:"grpc" mapstructure:"grpc"` - Backoff *BackOff `yaml:"backoff" mapstructure:"backoff"` + HTTP *HTTP `yaml:"http" mapstructure:"http"` + Grpc *GRPC `yaml:"grpc" mapstructure:"grpc"` + Backoff *BackOff `yaml:"backoff" mapstructure:"backoff"` + FileDownloadTimeout time.Duration `yaml:"file_download_timeout" mapstructure:"file_download_timeout"` } HTTP struct { @@ -358,6 +360,16 @@ type ( Token string `yaml:"token,omitempty" mapstructure:"token"` Timeout time.Duration `yaml:"timeout" mapstructure:"timeout"` } + + ProxyURL struct { + URL string `yaml:"url" mapstructure:"url"` + } + + ExternalDataSource struct { + ProxyURL ProxyURL `yaml:"proxy" mapstructure:"proxy"` + AllowedDomains []string `yaml:"allowed_domains" mapstructure:"allowed_domains"` + MaxBytes int64 `yaml:"max_bytes" mapstructure:"max_bytes"` + } ) func (col *Collector) Validate(allowedDirectories []string) error { diff --git a/internal/file/file_manager_service.go b/internal/file/file_manager_service.go index af0e67a91..07f462378 100644 --- a/internal/file/file_manager_service.go +++ b/internal/file/file_manager_service.go @@ -11,10 +11,14 @@ import ( "encoding/json" "errors" "fmt" + "io" "log/slog" + "net/http" + "net/url" "os" "path/filepath" "strconv" + "strings" "sync" "golang.org/x/sync/errgroup" @@ -40,6 +44,11 @@ const ( executePerm = 0o111 ) +type DownloadHeaders struct { + ETag string + LastModified string +} + type ( fileOperator interface { Write(ctx context.Context, fileContent []byte, fileName, filePermissions string) error @@ -74,6 +83,7 @@ type ( ) error SetIsConnected(isConnected bool) RenameFile(ctx context.Context, hash, fileName, tempDir string) error + RenameExternalFile(ctx context.Context, fileName, tempDir string) error UpdateClient(ctx context.Context, fileServiceClient mpi.FileServiceClient) } @@ -104,26 +114,28 @@ type FileManagerService struct { // map of files and the actions performed on them during config apply fileActions map[string]*model.FileCache // key is file path // map of the files currently on disk, used to determine the file action during config apply - currentFilesOnDisk map[string]*mpi.File // key is file path - previousManifestFiles map[string]*model.ManifestFile - manifestFilePath string - rollbackManifest bool - filesMutex sync.RWMutex + currentFilesOnDisk map[string]*mpi.File // key is file path + previousManifestFiles map[string]*model.ManifestFile + newExternalFileHeaders map[string]DownloadHeaders + manifestFilePath string + rollbackManifest bool + filesMutex sync.RWMutex } func NewFileManagerService(fileServiceClient mpi.FileServiceClient, agentConfig *config.Config, manifestLock *sync.RWMutex, ) *FileManagerService { return &FileManagerService{ - agentConfig: agentConfig, - fileOperator: NewFileOperator(manifestLock), - fileServiceOperator: NewFileServiceOperator(agentConfig, fileServiceClient, manifestLock), - fileActions: make(map[string]*model.FileCache), - currentFilesOnDisk: make(map[string]*mpi.File), - previousManifestFiles: make(map[string]*model.ManifestFile), - rollbackManifest: true, - manifestFilePath: agentConfig.LibDir + "/manifest.json", - manifestLock: manifestLock, + agentConfig: agentConfig, + fileOperator: NewFileOperator(manifestLock), + fileServiceOperator: NewFileServiceOperator(agentConfig, fileServiceClient, manifestLock), + fileActions: make(map[string]*model.FileCache), + currentFilesOnDisk: make(map[string]*mpi.File), + previousManifestFiles: make(map[string]*model.ManifestFile), + newExternalFileHeaders: make(map[string]DownloadHeaders), + rollbackManifest: true, + manifestFilePath: agentConfig.LibDir + "/manifest.json", + manifestLock: manifestLock, } } @@ -233,7 +245,7 @@ func (fms *FileManagerService) Rollback(ctx context.Context, instanceID string) delete(fms.currentFilesOnDisk, fileAction.File.GetFileMeta().GetName()) continue - case model.Delete, model.Update: + case model.Delete, model.Update, model.ExternalFile: content, err := fms.restoreFiles(fileAction) if err != nil { return err @@ -396,13 +408,16 @@ func (fms *FileManagerService) DetermineFileActions( modifiedFile.Action = model.Add fileDiff[fileName] = modifiedFile - continue // if file currently exists and file hash is different, file has been updated // copy contents, set file action } else if ok && modifiedFile.File.GetFileMeta().GetHash() != currentFile.GetFileMeta().GetHash() { modifiedFile.Action = model.Update fileDiff[fileName] = modifiedFile } + if modifiedFile.File.GetExternalDataSource() != nil || currentFile.GetExternalDataSource() != nil { + modifiedFile.Action = model.ExternalFile + fileDiff[fileName] = modifiedFile + } } return fileDiff, nil @@ -581,7 +596,8 @@ func (fms *FileManagerService) executeFileActions(ctx context.Context) (actionEr func (fms *FileManagerService) downloadUpdatedFilesToTempLocation(ctx context.Context) (updateError error) { var downloadFiles []*model.FileCache for _, fileAction := range fms.fileActions { - if fileAction.Action == model.Add || fileAction.Action == model.Update { + if fileAction.Action == model.ExternalFile || fileAction.Action == model.Add || + fileAction.Action == model.Update { downloadFiles = append(downloadFiles, fileAction) } } @@ -590,7 +606,6 @@ func (fms *FileManagerService) downloadUpdatedFilesToTempLocation(ctx context.Co slog.DebugContext(ctx, "No updated files to download") return nil } - errGroup, errGroupCtx := errgroup.WithContext(ctx) errGroup.SetLimit(fms.agentConfig.Client.Grpc.MaxParallelFileOperations) @@ -598,13 +613,22 @@ func (fms *FileManagerService) downloadUpdatedFilesToTempLocation(ctx context.Co errGroup.Go(func() error { tempFilePath := tempFilePath(fileAction.File.GetFileMeta().GetName()) - slog.DebugContext( - errGroupCtx, - "Downloading file to temp location", - "file", tempFilePath, - ) + switch fileAction.Action { + case model.ExternalFile: + return fms.handleExternalFileDownload(errGroupCtx, fileAction, tempFilePath) + case model.Add, model.Update: + slog.DebugContext( + errGroupCtx, + "Downloading file to temp location", + "file", tempFilePath, + ) - return fms.fileUpdate(errGroupCtx, fileAction.File, tempFilePath) + return fms.fileUpdate(errGroupCtx, fileAction.File, tempFilePath) + case model.Delete, model.Unchanged: // had to add for linter + return nil + default: + return nil + } }) } @@ -614,10 +638,13 @@ func (fms *FileManagerService) downloadUpdatedFilesToTempLocation(ctx context.Co func (fms *FileManagerService) moveOrDeleteFiles(ctx context.Context, actionError error) error { actionsLoop: for _, fileAction := range fms.fileActions { + var err error + fileMeta := fileAction.File.GetFileMeta() + tempFilePath := tempFilePath(fileAction.File.GetFileMeta().GetName()) switch fileAction.Action { case model.Delete: slog.DebugContext(ctx, "Deleting file", "file", fileAction.File.GetFileMeta().GetName()) - if err := os.Remove(fileAction.File.GetFileMeta().GetName()); err != nil && !os.IsNotExist(err) { + if err = os.Remove(fileAction.File.GetFileMeta().GetName()); err != nil && !os.IsNotExist(err) { actionError = fmt.Errorf("error deleting file: %s error: %w", fileAction.File.GetFileMeta().GetName(), err) @@ -626,17 +653,16 @@ actionsLoop: continue case model.Add, model.Update: - fileMeta := fileAction.File.GetFileMeta() - tempFilePath := tempFilePath(fileAction.File.GetFileMeta().GetName()) - err := fms.fileServiceOperator.RenameFile(ctx, fileMeta.GetHash(), tempFilePath, fileMeta.GetName()) - if err != nil { - actionError = err - - break actionsLoop - } + err = fms.fileServiceOperator.RenameFile(ctx, fileMeta.GetHash(), tempFilePath, fileMeta.GetName()) + case model.ExternalFile: + err = fms.fileServiceOperator.RenameExternalFile(ctx, tempFilePath, fileMeta.GetName()) case model.Unchanged: slog.DebugContext(ctx, "File unchanged") } + if err != nil { + actionError = err + break actionsLoop + } } return actionError @@ -792,3 +818,217 @@ func tempBackupFilePath(fileName string) string { tempFileName := "." + filepath.Base(fileName) + ".agent.backup" return filepath.Join(filepath.Dir(fileName), tempFileName) } + +func (fms *FileManagerService) handleExternalFileDownload(ctx context.Context, fileAction *model.FileCache, + tempFilePath string, +) error { + location := fileAction.File.GetExternalDataSource().GetLocation() + slog.InfoContext(ctx, "Downloading external file from", "location", location) + + var contentToWrite []byte + var downloadErr, updateError error + var headers DownloadHeaders + + contentToWrite, headers, downloadErr = fms.downloadFileContent(ctx, fileAction.File) + + if downloadErr != nil { + updateError = fmt.Errorf("failed to download file %s from %s: %w", + fileAction.File.GetFileMeta().GetName(), location, downloadErr) + + return updateError + } + + if contentToWrite == nil { + slog.DebugContext(ctx, "External file unchanged (304), skipping disk write.", + "file", fileAction.File.GetFileMeta().GetName()) + + return nil + } + + fileName := fileAction.File.GetFileMeta().GetName() + fms.newExternalFileHeaders[fileName] = headers + + updateErr := fms.writeContentToTempFile(ctx, contentToWrite, tempFilePath) + + return updateErr +} + +func (fms *FileManagerService) writeContentToTempFile( + ctx context.Context, + content []byte, + path string, +) error { + writeErr := fms.fileOperator.Write( + ctx, + content, + path, + "0600", + ) + + if writeErr != nil { + return fmt.Errorf("failed to write downloaded content to temp file %s: %w", path, writeErr) + } + + return nil +} + +// downloadFileContent performs an HTTP GET request to the given URL and returns the file content as a byte slice. +func (fms *FileManagerService) downloadFileContent( + ctx context.Context, + file *mpi.File, +) (content []byte, headers DownloadHeaders, err error) { + fileName := file.GetFileMeta().GetName() + downloadURL := file.GetExternalDataSource().GetLocation() + externalConfig := fms.agentConfig.ExternalDataSource + + if !isDomainAllowed(downloadURL, externalConfig.AllowedDomains) { + return nil, DownloadHeaders{}, fmt.Errorf("download URL %s is not in the allowed domains list", downloadURL) + } + + httpClient, err := fms.setupHTTPClient(ctx, externalConfig.ProxyURL.URL) + if err != nil { + return nil, DownloadHeaders{}, err + } + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, downloadURL, nil) + if err != nil { + return nil, DownloadHeaders{}, fmt.Errorf("failed to create request for %s: %w", downloadURL, err) + } + + if externalConfig.ProxyURL.URL != "" { + fms.addConditionalHeaders(ctx, req, fileName) + } else { + slog.DebugContext(ctx, "No proxy configured; sending plain HTTP request without caching headers.") + } + + resp, err := httpClient.Do(req) + if err != nil { + return nil, DownloadHeaders{}, fmt.Errorf("failed to execute download request for %s: %w", downloadURL, err) + } + defer resp.Body.Close() + + switch resp.StatusCode { + case http.StatusOK: + headers.ETag = resp.Header.Get("ETag") + headers.LastModified = resp.Header.Get("Last-Modified") + case http.StatusNotModified: + slog.InfoContext(ctx, "File content unchanged (304 Not Modified)", "file_name", fileName) + return nil, DownloadHeaders{}, nil + default: + return nil, DownloadHeaders{}, fmt.Errorf("download failed with status code %d", resp.StatusCode) + } + + reader := io.Reader(resp.Body) + if fms.agentConfig.ExternalDataSource.MaxBytes > 0 { + reader = io.LimitReader(resp.Body, fms.agentConfig.ExternalDataSource.MaxBytes) + } + + content, err = io.ReadAll(reader) + if err != nil { + return nil, DownloadHeaders{}, fmt.Errorf("failed to read content from response body: %w", err) + } + + slog.InfoContext(ctx, "Successfully downloaded file content", "file_name", fileName, "size", len(content)) + + return content, headers, nil +} + +func isDomainAllowed(downloadURL string, allowedDomains []string) bool { + u, err := url.Parse(downloadURL) + if err != nil { + slog.Debug("Failed to parse download URL for domain check", "url", downloadURL, "error", err) + return false + } + + hostname := u.Hostname() + if hostname == "" { + return false + } + + for _, pattern := range allowedDomains { + if pattern == "" { + continue + } + + if pattern == hostname { + return true + } + + if isWildcardMatch(hostname, pattern) { + return true + } + } + + return false +} + +func (fms *FileManagerService) setupHTTPClient(ctx context.Context, proxyURLString string) (*http.Client, error) { + var transport *http.Transport + + if proxyURLString != "" { + proxyURL, err := url.Parse(proxyURLString) + if err != nil { + return nil, fmt.Errorf("invalid proxy URL configured: %w", err) + } + slog.DebugContext(ctx, "Configuring HTTP client to use proxy", "proxy_url", proxyURLString) + transport = &http.Transport{ + Proxy: http.ProxyURL(proxyURL), + } + } else { + slog.DebugContext(ctx, "Configuring HTTP client for direct connection (no proxy)") + transport = &http.Transport{ + Proxy: nil, + } + } + + httpClient := &http.Client{ + Transport: transport, + Timeout: fms.agentConfig.Client.FileDownloadTimeout, + } + + return httpClient, nil +} + +func (fms *FileManagerService) addConditionalHeaders(ctx context.Context, req *http.Request, fileName string) { + slog.DebugContext(ctx, "Proxy configured; adding headers to GET request.") + + manifestFiles, _, manifestFileErr := fms.manifestFile() + + if manifestFileErr != nil && !errors.Is(manifestFileErr, os.ErrNotExist) { + slog.WarnContext(ctx, "Error reading manifest file for headers", "error", manifestFileErr) + } + + manifestFile, ok := manifestFiles[fileName] + + if ok && manifestFile != nil && manifestFile.ManifestFileMeta != nil { + fileMeta := manifestFile.ManifestFileMeta + + if fileMeta.ETag != "" { + req.Header.Set("If-None-Match", fileMeta.ETag) + } + if fileMeta.LastModified != "" { + req.Header.Set("If-Modified-Since", fileMeta.LastModified) + } + } else { + slog.DebugContext(ctx, "File not found in manifest or missing metadata; skipping conditional headers.", + "file", fileName) + } +} + +func isWildcardMatch(hostname, pattern string) bool { + if !strings.HasPrefix(pattern, "*.") { + return false + } + + baseDomain := pattern[2:] + if strings.HasSuffix(hostname, baseDomain) { + // Check to ensure it's a true subdomain match (e.g., must have a '.' + // before baseDomain unless it IS the baseDomain) + // This handles cases like preventing 'foo.com' matching '*.oo.com' + if hostname == baseDomain || hostname[len(hostname)-len(baseDomain)-1] == '.' { + return true + } + } + + return false +} diff --git a/internal/file/file_manager_service_test.go b/internal/file/file_manager_service_test.go index e42128063..b53aa3885 100644 --- a/internal/file/file_manager_service_test.go +++ b/internal/file/file_manager_service_test.go @@ -10,11 +10,17 @@ import ( "encoding/json" "errors" "fmt" + "net/http" + "net/http/httptest" + "net/url" "os" "path/filepath" + "strings" "sync" "testing" + "time" + "github.com/nginx/agent/v3/internal/config" "github.com/nginx/agent/v3/internal/model" "github.com/nginx/agent/v3/pkg/files" @@ -29,6 +35,39 @@ import ( "github.com/stretchr/testify/require" ) +type fakeFSO struct { + renameSrc, renameDst string + renameExternalCalled bool +} + +func (f *fakeFSO) File(ctx context.Context, file *mpi.File, tempFilePath, expectedHash string) error { + return nil +} + +func (f *fakeFSO) UpdateOverview(ctx context.Context, instanceID string, filesToUpdate []*mpi.File, configPath string, + iteration int, +) error { + return nil +} + +func (f *fakeFSO) ChunkedFile(ctx context.Context, file *mpi.File, tempFilePath, expectedHash string) error { + return nil +} +func (f *fakeFSO) IsConnected() bool { return true } +func (f *fakeFSO) UpdateFile(ctx context.Context, instanceID string, fileToUpdate *mpi.File) error { + return nil +} +func (f *fakeFSO) SetIsConnected(isConnected bool) {} +func (f *fakeFSO) RenameFile(ctx context.Context, hash, fileName, tempDir string) error { return nil } +func (f *fakeFSO) RenameExternalFile(ctx context.Context, fileName, tempDir string) error { + f.renameExternalCalled = true + f.renameSrc = fileName + f.renameDst = tempDir + + return nil +} +func (f *fakeFSO) UpdateClient(ctx context.Context, fileServiceClient mpi.FileServiceClient) {} + func TestFileManagerService_ConfigApply_Add(t *testing.T) { ctx := context.Background() tempDir := t.TempDir() @@ -1173,3 +1212,328 @@ rQHX6DP4w6IwZY8JB8LS }) } } + +func TestFileManagerService_DetermineFileActions_ExternalFile(t *testing.T) { + ctx := context.Background() + tempDir := t.TempDir() + fileName := filepath.Join(tempDir, "external.conf") + + modifiedFiles := map[string]*model.FileCache{ + fileName: { + File: &mpi.File{ + FileMeta: &mpi.FileMeta{ + Name: fileName, + }, + ExternalDataSource: &mpi.ExternalDataSource{Location: "http://example.com/file"}, + }, + }, + } + + fakeFileServiceClient := &v1fakes.FakeFileServiceClient{} + fileManagerService := NewFileManagerService(fakeFileServiceClient, types.AgentConfig(), &sync.RWMutex{}) + fileManagerService.agentConfig.AllowedDirectories = []string{tempDir} + + diff, err := fileManagerService.DetermineFileActions(ctx, make(map[string]*mpi.File), modifiedFiles) + require.NoError(t, err) + + fc, ok := diff[fileName] + require.True(t, ok, "expected file to be present in diff") + assert.Equal(t, model.ExternalFile, fc.Action) +} + +//nolint:gocognit,revive,govet // cognitive complexity is 22 +func TestFileManagerService_downloadExternalFiles_Cases(t *testing.T) { + type tc struct { + allowedDomains []string + expectContent []byte + name string + expectHeaderETag string + expectHeaderLastMod string + expectErrContains string + handler http.HandlerFunc + maxBytes int + expectError bool + expectTempFile bool + } + + tests := []tc{ + { + name: "Test 1: Success", + handler: func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("ETag", "test-etag") + w.Header().Set("Last-Modified", time.RFC1123) + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("external file content")) + }, + allowedDomains: nil, + maxBytes: 0, + expectError: false, + expectTempFile: true, + expectContent: []byte("external file content"), + expectHeaderETag: "test-etag", + expectHeaderLastMod: time.RFC1123, + }, + { + name: "Test 2: NotModified", + handler: func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotModified) + }, + allowedDomains: nil, + maxBytes: 0, + expectError: false, + expectTempFile: false, + expectContent: nil, + expectHeaderETag: "", + expectHeaderLastMod: "", + }, + { + name: "Test 3: NotAllowedDomain", + handler: func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("external file content")) + }, + allowedDomains: []string{"not-the-host"}, + maxBytes: 0, + expectError: true, + expectErrContains: "not in the allowed domains", + expectTempFile: false, + }, + { + name: "Test 4: NotFound", + handler: func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + }, + allowedDomains: nil, + maxBytes: 0, + expectError: true, + expectErrContains: "status code 404", + expectTempFile: false, + }, + { + name: "Test 5: ProxyWithConditionalHeaders", + handler: func(w http.ResponseWriter, r *http.Request) { + // verify conditional headers from manifest are added + if r.Header.Get("If-None-Match") != "manifest-test-etag" { + http.Error(w, "missing If-None-Match", http.StatusBadRequest) + return + } + if r.Header.Get("If-Modified-Since") != time.RFC1123 { + http.Error(w, "missing If-Modified-Since", http.StatusBadRequest) + return + } + w.Header().Set("ETag", "resp-etag") + w.Header().Set("Last-Modified", time.RFC1123) + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("external file via proxy")) + }, + allowedDomains: nil, + maxBytes: 0, + expectError: false, + expectTempFile: true, + expectContent: []byte("external file via proxy"), + expectHeaderETag: "resp-etag", + expectHeaderLastMod: time.RFC1123, + expectErrContains: "", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + ctx := context.Background() + tempDir := t.TempDir() + fileName := filepath.Join(tempDir, "external.conf") + + ts := httptest.NewServer(test.handler) + defer ts.Close() + + u, err := url.Parse(ts.URL) + require.NoError(t, err) + host := u.Hostname() + + fakeFileServiceClient := &v1fakes.FakeFileServiceClient{} + fileManagerService := NewFileManagerService(fakeFileServiceClient, types.AgentConfig(), &sync.RWMutex{}) + + eds := &config.ExternalDataSource{ + ProxyURL: config.ProxyURL{URL: ""}, + AllowedDomains: []string{host}, + MaxBytes: int64(test.maxBytes), + } + + if test.allowedDomains != nil { + eds.AllowedDomains = test.allowedDomains + } + + if test.name == "Test 5: ProxyWithConditionalHeaders" { + manifestFiles := map[string]*model.ManifestFile{ + fileName: { + ManifestFileMeta: &model.ManifestFileMeta{ + Name: fileName, + ETag: "manifest-test-etag", + LastModified: time.RFC1123, + }, + }, + } + manifestJSON, mErr := json.MarshalIndent(manifestFiles, "", " ") + require.NoError(t, mErr) + + manifestFile, mErr := os.CreateTemp(tempDir, "manifest.json") + require.NoError(t, mErr) + _, mErr = manifestFile.Write(manifestJSON) + require.NoError(t, mErr) + _ = manifestFile.Close() + + fileManagerService.agentConfig.LibDir = tempDir + fileManagerService.manifestFilePath = manifestFile.Name() + + eds.ProxyURL = config.ProxyURL{URL: ts.URL} + } + + fileManagerService.agentConfig.ExternalDataSource = eds + + fileManagerService.fileActions = map[string]*model.FileCache{ + fileName: { + File: &mpi.File{ + FileMeta: &mpi.FileMeta{Name: fileName}, + ExternalDataSource: &mpi.ExternalDataSource{Location: ts.URL}, + }, + Action: model.ExternalFile, + }, + } + + err = fileManagerService.downloadUpdatedFilesToTempLocation(ctx) + + if test.expectError { + require.Error(t, err) + if test.expectErrContains != "" { + assert.Contains(t, err.Error(), test.expectErrContains) + } + _, statErr := os.Stat(tempFilePath(fileName)) + assert.True(t, os.IsNotExist(statErr)) + + return + } + + require.NoError(t, err) + + if test.expectTempFile { + b, readErr := os.ReadFile(tempFilePath(fileName)) + require.NoError(t, readErr) + assert.Equal(t, test.expectContent, b) + + h, ok := fileManagerService.newExternalFileHeaders[fileName] + require.True(t, ok) + assert.Equal(t, test.expectHeaderETag, h.ETag) + assert.Equal(t, test.expectHeaderLastMod, h.LastModified) + + _ = os.Remove(tempFilePath(fileName)) + } else { + _, statErr := os.Stat(tempFilePath(fileName)) + assert.True(t, os.IsNotExist(statErr)) + } + }) + } +} + +func TestMoveOrDeleteFiles_ExternalFileRenameCalled(t *testing.T) { + ctx := context.Background() + fms := NewFileManagerService(nil, types.AgentConfig(), &sync.RWMutex{}) + + fake := &fakeFSO{} + fms.fileServiceOperator = fake + + fileName := filepath.Join(t.TempDir(), "ext.conf") + fms.fileActions = map[string]*model.FileCache{ + fileName: { + File: &mpi.File{FileMeta: &mpi.FileMeta{Name: fileName}}, + Action: model.ExternalFile, + }, + } + + tempPath := tempFilePath(fileName) + reqDir := filepath.Dir(tempPath) + require.NoError(t, os.MkdirAll(reqDir, 0o755)) + require.NoError(t, os.WriteFile(tempPath, []byte("data"), 0o600)) + + err := fms.moveOrDeleteFiles(ctx, nil) + require.NoError(t, err) + assert.True(t, fake.renameExternalCalled) + assert.Equal(t, tempPath, fake.renameSrc) + assert.Equal(t, fileName, fake.renameDst) +} + +func TestDownloadFileContent_MaxBytesLimit(t *testing.T) { + ctx := context.Background() + fms := NewFileManagerService(nil, types.AgentConfig(), &sync.RWMutex{}) + + // test server returns 10 bytes, we set MaxBytes to 4 and expect only 4 bytes returned + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("ETag", "etag-1") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("0123456789")) + })) + defer ts.Close() + + u, err := url.Parse(ts.URL) + require.NoError(t, err) + + fms.agentConfig.ExternalDataSource = &config.ExternalDataSource{ + AllowedDomains: []string{u.Hostname()}, + MaxBytes: 4, + } + + fileName := filepath.Join(t.TempDir(), "external.conf") + file := &mpi.File{ + FileMeta: &mpi.FileMeta{Name: fileName}, + ExternalDataSource: &mpi.ExternalDataSource{Location: ts.URL}, + } + + content, headers, err := fms.downloadFileContent(ctx, file) + require.NoError(t, err) + assert.Len(t, content, 4) + assert.Equal(t, "etag-1", headers.ETag) +} + +func TestDownloadFileContent_InvalidProxyURL(t *testing.T) { + ctx := context.Background() + fms := NewFileManagerService(nil, types.AgentConfig(), &sync.RWMutex{}) + + downURL := "http://example.com/file" + fms.agentConfig.ExternalDataSource = &config.ExternalDataSource{ + AllowedDomains: []string{"example.com"}, + ProxyURL: config.ProxyURL{URL: "http://:"}, + } + + file := &mpi.File{ + FileMeta: &mpi.FileMeta{Name: "/tmp/file"}, + ExternalDataSource: &mpi.ExternalDataSource{Location: downURL}, + } + + _, _, err := fms.downloadFileContent(ctx, file) + require.Error(t, err) + if !strings.Contains(err.Error(), "invalid proxy URL configured") && + !strings.Contains(err.Error(), "failed to execute download request") && + !strings.Contains(err.Error(), "proxyconnect") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestIsDomainAllowed_EdgeCases(t *testing.T) { + ok := isDomainAllowed("http://%", []string{"example.com"}) + assert.False(t, ok) + + ok = isDomainAllowed("http://", []string{"example.com"}) + assert.False(t, ok) + + ok = isDomainAllowed("http://example.com/path", []string{""}) + assert.False(t, ok) + + ok = isDomainAllowed("http://example.com/path", []string{"example.com"}) + assert.True(t, ok) + + ok = isDomainAllowed("http://sub.example.com/path", []string{"*.example.com"}) + assert.True(t, ok) + + assert.True(t, isWildcardMatch("example.com", "*.example.com")) + assert.True(t, isWildcardMatch("sub.example.com", "*.example.com")) + assert.False(t, isWildcardMatch("badexample.com", "*.example.com")) +} diff --git a/internal/file/file_service_operator.go b/internal/file/file_service_operator.go index 19211c600..8dba2acf7 100644 --- a/internal/file/file_service_operator.go +++ b/internal/file/file_service_operator.go @@ -79,7 +79,10 @@ func (fso *FileServiceOperator) File( defer backoffCancel() getFile := func() (*mpi.GetFileResponse, error) { - return fso.fileServiceClient.GetFile(ctx, &mpi.GetFileRequest{ + grpcCtx, cancel := context.WithTimeout(ctx, fso.agentConfig.Client.FileDownloadTimeout) + defer cancel() + + return fso.fileServiceClient.GetFile(grpcCtx, &mpi.GetFileRequest{ MessageMeta: &mpi.MessageMeta{ MessageId: id.GenerateMessageID(), CorrelationId: logger.CorrelationID(ctx), @@ -225,7 +228,10 @@ func (fso *FileServiceOperator) ChunkedFile( ) error { slog.DebugContext(ctx, "Getting chunked file", "file", file.GetFileMeta().GetName()) - stream, err := fso.fileServiceClient.GetFileStream(ctx, &mpi.GetFileRequest{ + grpcCtx, cancel := context.WithTimeout(ctx, fso.agentConfig.Client.FileDownloadTimeout) + defer cancel() + + stream, err := fso.fileServiceClient.GetFileStream(grpcCtx, &mpi.GetFileRequest{ MessageMeta: &mpi.MessageMeta{ MessageId: id.GenerateMessageID(), CorrelationId: logger.CorrelationID(ctx), @@ -276,6 +282,23 @@ func (fso *FileServiceOperator) UpdateFile( return fso.sendUpdateFileStream(ctx, fileToUpdate, fso.agentConfig.Client.Grpc.FileChunkSize) } +func (fso *FileServiceOperator) RenameExternalFile( + ctx context.Context, source, desination string, +) error { + slog.DebugContext(ctx, fmt.Sprintf("Moving file %s to %s (no hash validation)", source, desination)) + + if err := os.MkdirAll(filepath.Dir(desination), dirPerm); err != nil { + return fmt.Errorf("failed to create directories for %s: %w", desination, err) + } + + moveErr := os.Rename(source, desination) + if moveErr != nil { + return fmt.Errorf("failed to move file: %w", moveErr) + } + + return nil +} + // renameFile, renames (moves) file from tempDir to new location to update file. func (fso *FileServiceOperator) RenameFile( ctx context.Context, hash, source, desination string, @@ -371,12 +394,15 @@ func (fso *FileServiceOperator) sendUpdateFileRequest( return nil, errors.New("CreateConnection rpc has not being called yet") } - response, updateError := fso.fileServiceClient.UpdateFile(ctx, request) + grpcCtx, cancel := context.WithTimeout(ctx, fso.agentConfig.Client.FileDownloadTimeout) + defer cancel() + + response, updateError := fso.fileServiceClient.UpdateFile(grpcCtx, request) validatedError := internalgrpc.ValidateGrpcError(updateError) if validatedError != nil { - slog.ErrorContext(ctx, "Failed to send update file", "error", validatedError) + slog.ErrorContext(grpcCtx, "Failed to send update file", "error", validatedError) return nil, validatedError } @@ -406,7 +432,10 @@ func (fso *FileServiceOperator) sendUpdateFileStream( return errors.New("file chunk size must be greater than zero") } - updateFileStreamClient, err := fso.fileServiceClient.UpdateFileStream(ctx) + grpcCtx, cancel := context.WithTimeout(ctx, fso.agentConfig.Client.FileDownloadTimeout) + defer cancel() + + updateFileStreamClient, err := fso.fileServiceClient.UpdateFileStream(grpcCtx) if err != nil { return err } diff --git a/internal/file/file_service_operator_test.go b/internal/file/file_service_operator_test.go index f8206e145..f176dbe2a 100644 --- a/internal/file/file_service_operator_test.go +++ b/internal/file/file_service_operator_test.go @@ -187,3 +187,86 @@ func TestFileManagerService_UpdateFile_LargeFile(t *testing.T) { helpers.RemoveFileWithErrorCheck(t, testFile.Name()) } + +func TestFileServiceOperator_RenameExternalFile(t *testing.T) { + tests := []struct { + prepare func(t *testing.T) (src, dst string) + name string + wantErrMsg string + wantErr bool + }{ + { + name: "Test 1: success", + prepare: func(t *testing.T) (string, string) { + t.Helper() + tmp := t.TempDir() + src := filepath.Join(tmp, "src.txt") + dst := filepath.Join(tmp, "subdir", "dest.txt") + content := []byte("hello world") + require.NoError(t, os.WriteFile(src, content, 0o600)) + + return src, dst + }, + wantErr: false, + }, + { + name: "Test 2: mkdirall_fail", + prepare: func(t *testing.T) (string, string) { + t.Helper() + tmp := t.TempDir() + parentFile := filepath.Join(tmp, "not_a_dir") + require.NoError(t, os.WriteFile(parentFile, []byte("block"), 0o600)) + dst := filepath.Join(parentFile, "dest.txt") + src := filepath.Join(tmp, "src.txt") + require.NoError(t, os.WriteFile(src, []byte("content"), 0o600)) + + return src, dst + }, + wantErr: true, + wantErrMsg: "failed to create directories for", + }, + { + name: "Test 3: rename_fail", + prepare: func(t *testing.T) (string, string) { + t.Helper() + tmp := t.TempDir() + src := filepath.Join(tmp, "does_not_exist.txt") + dst := filepath.Join(tmp, "subdir", "dest.txt") + + return src, dst + }, + wantErr: true, + wantErrMsg: "failed to move file", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + ctx := context.Background() + fso := NewFileServiceOperator(types.AgentConfig(), nil, &sync.RWMutex{}) + + src, dst := tc.prepare(t) + + err := fso.RenameExternalFile(ctx, src, dst) + if tc.wantErr { + require.Error(t, err) + if tc.wantErrMsg != "" { + require.Contains(t, err.Error(), tc.wantErrMsg) + } + + return + } + + require.NoError(t, err) + + dstContent, readErr := os.ReadFile(dst) + require.NoError(t, readErr) + if tc.name == "success" { + require.Equal(t, []byte("hello world"), dstContent) + } + + _, statErr := os.Stat(src) + require.True(t, os.IsNotExist(statErr)) + }) + } +} diff --git a/internal/model/config.go b/internal/model/config.go index d13c299fd..324a5ffd6 100644 --- a/internal/model/config.go +++ b/internal/model/config.go @@ -42,6 +42,10 @@ type ManifestFileMeta struct { Name string `json:"name"` // The hash of the file contents sha256, hex encoded Hash string `json:"hash"` + // ETag of the 3rd Party external file + ETag string `json:"etag"` + // Last modified time of the 3rd Party external file + LastModified string `json:"last_modified"` // The size of the file in bytes Size int64 `json:"size"` // File referenced in the NGINX config diff --git a/internal/model/file.go b/internal/model/file.go index fc6c5baca..671a4ff85 100644 --- a/internal/model/file.go +++ b/internal/model/file.go @@ -19,4 +19,5 @@ const ( Update Delete Unchanged + ExternalFile )