Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module github.com/stacklok/toolhive-registry-server
go 1.25.3

require (
github.com/fsnotify/fsnotify v1.9.0
github.com/go-chi/chi/v5 v5.2.3
github.com/go-git/go-billy/v5 v5.6.2
github.com/go-git/go-git/v5 v5.16.3
Expand Down Expand Up @@ -38,7 +39,6 @@ require (
github.com/emirpasic/gods v1.18.1 // indirect
github.com/evanphx/json-patch/v5 v5.9.11 // indirect
github.com/extism/go-sdk v1.7.0 // indirect
github.com/fsnotify/fsnotify v1.9.0 // indirect
github.com/fxamacker/cbor/v2 v2.9.0 // indirect
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect
github.com/go-logr/zapr v1.3.0 // indirect
Expand Down
14 changes: 7 additions & 7 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,17 @@ const (

// SourceTypeFile is the type for registry data stored in local files
SourceTypeFile = "file"
)

const (
// RegistryFormatToolHive is the native ToolHive registry format
SourceFormatToolHive = "toolhive"

// RegistryFormatUpstream is the upstream MCP registry format
SourceFormatUpstream = "upstream"
)

// ConfigLoader defines the interface for loading configuration
type ConfigLoader interface {
// LoadConfig reads and parses a configuration file from the given path.
// The file is only read, never modified.
LoadConfig(path string) (*Config, error)
}

Expand Down Expand Up @@ -118,18 +117,19 @@ func NewConfigLoader() ConfigLoader {
return &configLoader{}
}

// LoadConfig loads and parses configuration from a YAML file
// LoadConfig reads and parses a YAML configuration file.
// This is a read-only operation - the file is never modified.
func (c *configLoader) LoadConfig(path string) (*Config, error) {
// Read the entire file into memory
// Read-only file access
data, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("failed to read config file: %w", err)
}

// Parse YAML content
// Parse the YAML content
var config Config
if err := yaml.Unmarshal(data, &config); err != nil {
return nil, fmt.Errorf("failed to parse YAML config: %w", err)
return nil, fmt.Errorf("failed to parse YAML: %w", err)
}

return &config, nil
Expand Down
2 changes: 1 addition & 1 deletion pkg/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ filter:
yamlContent: `source: [invalid yaml`,
wantConfig: nil,
wantErr: true,
errMsg: "failed to parse YAML config",
errMsg: "failed to parse YAML",
},
{
name: "file_not_found",
Expand Down
224 changes: 224 additions & 0 deletions pkg/config/manager.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
package config

import (
"context"
"fmt"
"sync"

"github.com/fsnotify/fsnotify"
"github.com/stacklok/toolhive/pkg/logger"
)

// ConfigManager provides thread-safe, read-only configuration management.
// Configuration files are never modified by the application - all updates come from
// external sources (Kubernetes ConfigMaps, Docker volume mounts, orchestration tools).
//
// Design for read-only operation:
// - No file locking needed - we only observe external changes
// - No write coordination required - we never modify files
// - Uses sync.RWMutex optimized for concurrent reads
// - Validates externally-updated configs before applying
// - Preserves last known good configuration on invalid updates
// - Optimized for container environments with atomic file updates
type ConfigManager interface {
// GetConfig safely retrieves the current configuration
GetConfig() *Config

// ReloadConfig reads the latest configuration from disk and applies it if valid.
// The file is only read, never written. Returns error if the new config is invalid.
ReloadConfig() error
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think we /need/ the hot reloading? I'm concerned that this will force us to manage requests in flight, e.g. for synchronization of different data sources for sources that might have dissapeared etc.

Isn't it in the end just cleaner to force a cold reload?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By cold reload you mean an API like the POST /registry/sync that we already planned for manual sync?
In any case, even this solution would require some serialization of sync requests to avoid the sync issues you just mentioned: but the main goal here was to expose the same features as today, e.g. propagate the changes to the MCPRegistry specs during the reconciliation cycle. Optimizations could be tracked by a separate issue.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, by cold reload I meant that the registry server only reads its configuration on startup. The sync is orthogonal, it syncs an existing registry, the part of the hot reload I was concerned about was when a new registry is added or worse an existing removed.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So how can we change the configuration in K8s?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe the configuration will get updated automatically via the volumeMount, however the registry server would have already started. I suspect we will have to "restart" the registry server pod when the configmap has been updated?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so the controller has to restart the server's Deployment any time the spec changes?
if we agree, let's add a task under the thv-operator to implement this behavior

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would make for an easier and less error-prone implementation over the long term. I think it's also easier to start this way - hopefully changes to the spec by the admin would be minimal after some initial deployment going back and forth. If there is a need for more complexity and on-the-fly config changes, I'd rather implement that later.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ChrisJBurns if this is decided, do you want to review this PR and just remove the ReloadConfig method or should we keep the original design?
Also, I tracked #60 to postpone the hot reload function

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, when #59 has been merged, i'll rebase this PR and take the hot reload stuff out. This wasn't 100% done in the first place, I just like getting drafts up because (for me) reading the diffs in github is easier than locally :D and helps me keep track of the amount of changes I'm making so I can keep them below 1k for easier review

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#59 waits for another approval and we can merge


// WatchConfig observes the configuration file for external changes.
// Automatically reloads when the file is updated by external systems.
// Blocks until context is cancelled.
WatchConfig(ctx context.Context) error

// Close releases the file watcher resources
Close() error
}

// ConfigValidator defines the interface for validating configurations
// This allows custom validation logic to be injected beyond the basic validation
type ConfigValidator interface {
Validate(config *Config) error
}

// defaultValidator uses the Config's built-in validation
type defaultValidator struct{}

// Validate delegates to the Config's own Validate method
func (v *defaultValidator) Validate(config *Config) error {
return config.Validate()
}

// configManager is the concrete implementation of ConfigManager
type configManager struct {
mu sync.RWMutex // Protects concurrent access to config
config *Config // Current active configuration
configPath string // Path to configuration file
loader ConfigLoader // Loader for reading config files
validator ConfigValidator // Validator for checking config validity
watcher *fsnotify.Watcher // File system watcher (nil if not watching)
watcherMu sync.Mutex // Protects watcher field
}

// ConfigManagerOption allows customizing ConfigManager behavior
type ConfigManagerOption func(*configManager)

// WithValidator sets a custom validator for the config manager
func WithValidator(validator ConfigValidator) ConfigManagerOption {
return func(cm *configManager) {
cm.validator = validator
}
}

// WithLoader sets a custom config loader for the config manager
func WithLoader(loader ConfigLoader) ConfigManagerOption {
return func(cm *configManager) {
cm.loader = loader
}
}

// NewConfigManager creates a new ConfigManager with the given configuration file path.
// It loads and validates the initial configuration.
// Returns error if initial load or validation fails.
func NewConfigManager(configPath string, opts ...ConfigManagerOption) (ConfigManager, error) {
cm := &configManager{
configPath: configPath,
loader: NewConfigLoader(),
validator: &defaultValidator{}, // Uses Config.Validate() by default
}

// Apply options
for _, opt := range opts {
opt(cm)
}

// Load initial configuration
if err := cm.ReloadConfig(); err != nil {
return nil, fmt.Errorf("failed to load initial configuration: %w", err)
}

return cm, nil
}

// GetConfig safely retrieves the current configuration
// Multiple goroutines can safely call this method concurrently
func (cm *configManager) GetConfig() *Config {
cm.mu.RLock()
defer cm.mu.RUnlock()

// Return a copy to prevent external modifications
// This is a shallow copy, which is sufficient for our use case
// as the Config struct fields are not modified in place
configCopy := *cm.config
return &configCopy
}

// ReloadConfig reads the configuration file and applies it if valid.
// The file is treated as read-only - we never modify it.
// If the new configuration is invalid, the previous configuration remains active.
func (cm *configManager) ReloadConfig() error {
// Read the configuration file (read-only operation)
newConfig, err := cm.loader.LoadConfig(cm.configPath)
if err != nil {
return fmt.Errorf("failed to read config file: %w", err)
}

// Validate before applying
if err := cm.validator.Validate(newConfig); err != nil {
return fmt.Errorf("invalid configuration: %w", err)
}

// Atomically update the in-memory configuration
cm.mu.Lock()
cm.config = newConfig
cm.mu.Unlock()

logger.Infof("Configuration reloaded from %s", cm.configPath)
return nil
}

// WatchConfig observes the configuration file for external changes.
// Since we never write to the file, all changes come from external sources:
// - Kubernetes ConfigMap updates (atomic via symlink swaps)
// - Docker volume mount updates
// - Configuration management tools
//
// This method blocks until the context is cancelled.
func (cm *configManager) WatchConfig(ctx context.Context) error {
cm.watcherMu.Lock()
if cm.watcher != nil {
cm.watcherMu.Unlock()
return fmt.Errorf("config watcher is already running")
}

watcher, err := fsnotify.NewWatcher()
if err != nil {
cm.watcherMu.Unlock()
return fmt.Errorf("failed to create file watcher: %w", err)
}
cm.watcher = watcher
cm.watcherMu.Unlock()

// Add config file to watcher
if err := watcher.Add(cm.configPath); err != nil {
return fmt.Errorf("failed to watch config file %s: %w", cm.configPath, err)
}

logger.Infof("Started watching configuration file: %s", cm.configPath)

// Watch loop
for {
select {
case <-ctx.Done():
logger.Info("Stopping config file watcher due to context cancellation")
return ctx.Err()

case event, ok := <-watcher.Events:
if !ok {
return fmt.Errorf("watcher event channel closed")
}

// Detect external file modifications
if event.Has(fsnotify.Write) || event.Has(fsnotify.Create) {
logger.Infof("External config update detected, reloading")

if err := cm.ReloadConfig(); err != nil {
logger.Errorf("Failed to reload config: %v", err)
// Continue observing - previous config remains active
}
}

// Handle K8s ConfigMap updates (may remove/recreate symlinks)
if event.Has(fsnotify.Remove) {
logger.Debugf("Config file removed (K8s update), re-watching")
_ = watcher.Add(cm.configPath)
}

case err, ok := <-watcher.Errors:
if !ok {
return fmt.Errorf("watcher error channel closed")
}
// Log error but continue watching
logger.Errorf("File watcher error: %v", err)
}
}
}

// Close releases resources held by the config manager
// Specifically, it closes the file watcher if active
func (cm *configManager) Close() error {
cm.watcherMu.Lock()
defer cm.watcherMu.Unlock()

if cm.watcher != nil {
if err := cm.watcher.Close(); err != nil {
return fmt.Errorf("failed to close file watcher: %w", err)
}
cm.watcher = nil
logger.Info("Config watcher closed")
}

return nil
}
Loading
Loading