-
Notifications
You must be signed in to change notification settings - Fork 22
Refactor worker management and introduce orchestrator for improved pr… #251
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: feat/ha-plugin
Are you sure you want to change the base?
Changes from all commits
4eb1ca9
5cffe17
8f44c4b
7e05de9
112a280
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,5 +17,5 @@ | |
| !nginx.conf.template | ||
| !start.sh | ||
| !start-k8s.sh | ||
| !start-workers.sh | ||
| !worker_orchestrator.py | ||
| !Caddyfile | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| """ | ||
| Worker Orchestrator Package | ||
|
|
||
| This package provides a Python-based orchestration system for managing | ||
| Chronicle's worker processes, replacing the bash-based start-workers.sh script. | ||
|
|
||
| Components: | ||
| - config: Worker definitions and orchestrator configuration | ||
| - worker_registry: Build worker list with conditional logic | ||
| - process_manager: Process lifecycle management | ||
| - health_monitor: Health checks and self-healing | ||
| """ | ||
|
|
||
| from .config import WorkerDefinition, OrchestratorConfig, WorkerType | ||
| from .worker_registry import build_worker_definitions | ||
| from .process_manager import ManagedWorker, ProcessManager, WorkerState | ||
| from .health_monitor import HealthMonitor | ||
|
|
||
| __all__ = [ | ||
| "WorkerDefinition", | ||
| "OrchestratorConfig", | ||
| "WorkerType", | ||
| "build_worker_definitions", | ||
| "ManagedWorker", | ||
| "ProcessManager", | ||
| "WorkerState", | ||
| "HealthMonitor", | ||
| ] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,91 @@ | ||
| """ | ||
| Worker Orchestrator Configuration | ||
|
|
||
| Defines data structures for worker definitions and orchestrator configuration. | ||
| """ | ||
|
|
||
| import os | ||
| from dataclasses import dataclass, field | ||
| from enum import Enum | ||
| from typing import Optional, Callable, List | ||
|
|
||
|
|
||
| class WorkerType(Enum): | ||
| """Type of worker process""" | ||
|
|
||
| RQ_WORKER = "rq_worker" # RQ queue worker | ||
| STREAM_CONSUMER = "stream_consumer" # Redis Streams consumer | ||
|
|
||
|
|
||
| @dataclass | ||
| class WorkerDefinition: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This sounds like a worker model?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. worker definition and worker state should be in same place as all worker charateristings. This config file is in the orchestrator folder, so suggests its config for the orchestator not workers |
||
| """ | ||
| Definition of a single worker process. | ||
|
|
||
| Attributes: | ||
| name: Unique identifier for the worker | ||
| command: Full command to execute (as list for subprocess) | ||
| worker_type: Type of worker (RQ vs stream consumer) | ||
| queues: Queue names for RQ workers (empty for stream consumers) | ||
| enabled_check: Optional predicate function to determine if worker should start | ||
| restart_on_failure: Whether to automatically restart on failure | ||
| health_check: Optional custom health check function | ||
| """ | ||
|
|
||
| name: str | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. consider using a config file https://python-rq.org/docs/workers/#using-a-config-file |
||
| command: List[str] | ||
| worker_type: WorkerType = WorkerType.RQ_WORKER | ||
| queues: List[str] = field(default_factory=list) | ||
| enabled_check: Optional[Callable[[], bool]] = None | ||
| restart_on_failure: bool = True | ||
| health_check: Optional[Callable[[], bool]] = None | ||
|
|
||
| def is_enabled(self) -> bool: | ||
| """Check if this worker should be started""" | ||
| if self.enabled_check is None: | ||
| return True | ||
| return self.enabled_check() | ||
|
|
||
|
|
||
| @dataclass | ||
| class OrchestratorConfig: | ||
| """ | ||
| Global configuration for the worker orchestrator. | ||
|
|
||
| All settings can be overridden via environment variables. | ||
| """ | ||
|
|
||
| # Redis connection | ||
| redis_url: str = field( | ||
| default_factory=lambda: os.getenv("REDIS_URL", "redis://localhost:6379/0") | ||
| ) | ||
|
|
||
| # Health monitoring settings | ||
| check_interval: int = field( | ||
| default_factory=lambda: int(os.getenv("WORKER_CHECK_INTERVAL", "10")) | ||
| ) | ||
| min_rq_workers: int = field( | ||
| default_factory=lambda: int(os.getenv("MIN_RQ_WORKERS", "6")) | ||
| ) | ||
| startup_grace_period: int = field( | ||
| default_factory=lambda: int(os.getenv("WORKER_STARTUP_GRACE_PERIOD", "30")) | ||
| ) | ||
|
|
||
| # Shutdown settings | ||
| shutdown_timeout: int = field( | ||
| default_factory=lambda: int(os.getenv("WORKER_SHUTDOWN_TIMEOUT", "30")) | ||
| ) | ||
|
|
||
| # Logging | ||
| log_level: str = field(default_factory=lambda: os.getenv("LOG_LEVEL", "INFO")) | ||
|
|
||
| def __post_init__(self): | ||
| """Validate configuration after initialization""" | ||
| if self.check_interval <= 0: | ||
| raise ValueError("check_interval must be positive") | ||
| if self.min_rq_workers < 0: | ||
| raise ValueError("min_rq_workers must be non-negative") | ||
| if self.startup_grace_period < 0: | ||
| raise ValueError("startup_grace_period must be non-negative") | ||
| if self.shutdown_timeout <= 0: | ||
| raise ValueError("shutdown_timeout must be positive") | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
small point: I would probably put the worker orchestrator as a service, and not in the workers folder.
This means that it's just the code in the workers folder that is used by seperate processes to the main server.