From c193298bb627ae879b7388b729d64d6411708522 Mon Sep 17 00:00:00 2001 From: Mark Story Date: Fri, 28 Nov 2025 12:34:12 -0500 Subject: [PATCH 01/19] Initial copy/paste from sentry This doesn't work at all yet, but I wanted to get the code imported in one go. --- clients/python/.python-version | 1 + clients/python/README.md | 0 clients/python/pyproject.toml | 76 ++ clients/python/src/__init__.py | 2 + clients/python/src/app.py | 0 clients/python/src/client/__init__.py | 0 clients/python/src/client/client.py | 0 .../src/client/inflight_task_activation.py | 0 .../python/src/client/processing_result.py | 13 + clients/python/src/constants.py | 61 ++ clients/python/src/py.typed | 0 clients/python/src/registry.py | 271 ++++++ clients/python/src/retry.py | 107 ++ clients/python/src/router.py | 5 + clients/python/src/scheduler/__init__.py | 0 clients/python/src/scheduler/config.py | 39 + clients/python/src/scheduler/scheduler.py | 292 ++++++ clients/python/src/scheduler/schedules.py | 195 ++++ clients/python/src/state.py | 42 + clients/python/src/task.py | 254 +++++ clients/python/src/worker/__init__.py | 0 clients/python/src/worker/worker.py | 0 clients/python/src/worker/workerchild.py | 470 +++++++++ clients/python/test/__init__.py | 0 clients/python/test/client/__init__.py | 0 clients/python/test/client/test_client.py | 916 ++++++++++++++++++ clients/python/test/scheduler/__init__.py | 0 clients/python/test/scheduler/test_runner.py | 470 +++++++++ .../python/test/scheduler/test_schedules.py | 200 ++++ clients/python/test/test_app.py | 37 + clients/python/test/test_registry.py | 370 +++++++ clients/python/test/test_retry.py | 129 +++ clients/python/test/test_task.py | 357 +++++++ clients/python/test/worker/__init__.py | 0 clients/python/test/worker/test_worker.py | 722 ++++++++++++++ 35 files changed, 5029 insertions(+) create mode 100644 clients/python/.python-version create mode 100644 clients/python/README.md create mode 100644 clients/python/pyproject.toml create mode 100644 clients/python/src/__init__.py create mode 100644 clients/python/src/app.py create mode 100644 clients/python/src/client/__init__.py create mode 100644 clients/python/src/client/client.py create mode 100644 clients/python/src/client/inflight_task_activation.py create mode 100644 clients/python/src/client/processing_result.py create mode 100644 clients/python/src/constants.py create mode 100644 clients/python/src/py.typed create mode 100644 clients/python/src/registry.py create mode 100644 clients/python/src/retry.py create mode 100644 clients/python/src/router.py create mode 100644 clients/python/src/scheduler/__init__.py create mode 100644 clients/python/src/scheduler/config.py create mode 100644 clients/python/src/scheduler/scheduler.py create mode 100644 clients/python/src/scheduler/schedules.py create mode 100644 clients/python/src/state.py create mode 100644 clients/python/src/task.py create mode 100644 clients/python/src/worker/__init__.py create mode 100644 clients/python/src/worker/worker.py create mode 100644 clients/python/src/worker/workerchild.py create mode 100644 clients/python/test/__init__.py create mode 100644 clients/python/test/client/__init__.py create mode 100644 clients/python/test/client/test_client.py create mode 100644 clients/python/test/scheduler/__init__.py create mode 100644 clients/python/test/scheduler/test_runner.py create mode 100644 clients/python/test/scheduler/test_schedules.py create mode 100644 clients/python/test/test_app.py create mode 100644 clients/python/test/test_registry.py create mode 100644 clients/python/test/test_retry.py create mode 100644 clients/python/test/test_task.py create mode 100644 clients/python/test/worker/__init__.py create mode 100644 clients/python/test/worker/test_worker.py diff --git a/clients/python/.python-version b/clients/python/.python-version new file mode 100644 index 00000000..7eebfafa --- /dev/null +++ b/clients/python/.python-version @@ -0,0 +1 @@ +3.12.11 diff --git a/clients/python/README.md b/clients/python/README.md new file mode 100644 index 00000000..e69de29b diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml new file mode 100644 index 00000000..d740dc1d --- /dev/null +++ b/clients/python/pyproject.toml @@ -0,0 +1,76 @@ +[project] +name = "taskbroker-client" +version = "0.1.0" +description = "Taskbroker python client and worker runtime" +readme = "README.md" +requires-python = ">=3.12.11" +dependencies = [ + "sentry-protos>=0.2.0", + "confluent_kafka>=2.3.0", + "grpcio==1.66.1", + "orjson>=3.10.10", + "protobuf>=5.28.3", + "types-protobuf>=6.30.2.20250703", +] + +[dependency-groups] +dev = [ + "devservices>=1.2.1", + "sentry-devenv>=1.22.2", + "black==24.10.0", + "pre-commit>=4.2.0", + "pytest>=8.3.3", + "flake8>=7.3.0", + "isort>=5.13.2", + "mypy>=1.17.1", +] + +[build-system] +requires = ["uv_build>=0.8.2,<0.9.0"] +build-backend = "uv_build" + +[tool.uv] +environments = ["sys_platform == 'darwin' or sys_platform == 'linux'"] + +[[tool.uv.index]] +url = "https://pypi.devinfra.sentry.io/simple" +default = true + +[tool.pytest.ini_options] +pythonpath = ["python"] +testpaths = ["test"] +python_files = ["test_*.py"] +python_functions = ["test_*"] + +[tool.mypy] +mypy_path = "python" +explicit_package_bases = true +# minimal strictness settings +check_untyped_defs = true +no_implicit_reexport = true +warn_unreachable = true +warn_unused_configs = true +warn_unused_ignores = true +warn_redundant_casts = true +enable_error_code = ["ignore-without-code", "redundant-self"] +local_partial_types = true # compat with dmypy +disallow_any_generics = true +disallow_untyped_defs = true + +# begin: missing 3rd party stubs +[[tool.mypy.overrides]] +module = [ + "confluent_kafka.*", +] +ignore_missing_imports = true +# end: missing 3rd party stubs + +[tool.black] +# File filtering is taken care of in pre-commit. +line-length = 100 +target-version = ['py311'] + +[tool.isort] +profile = "black" +line_length = 100 +lines_between_sections = 1 diff --git a/clients/python/src/__init__.py b/clients/python/src/__init__.py new file mode 100644 index 00000000..ff768f4d --- /dev/null +++ b/clients/python/src/__init__.py @@ -0,0 +1,2 @@ +def hello() -> str: + return "Hello from python!" diff --git a/clients/python/src/app.py b/clients/python/src/app.py new file mode 100644 index 00000000..e69de29b diff --git a/clients/python/src/client/__init__.py b/clients/python/src/client/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/clients/python/src/client/client.py b/clients/python/src/client/client.py new file mode 100644 index 00000000..e69de29b diff --git a/clients/python/src/client/inflight_task_activation.py b/clients/python/src/client/inflight_task_activation.py new file mode 100644 index 00000000..e69de29b diff --git a/clients/python/src/client/processing_result.py b/clients/python/src/client/processing_result.py new file mode 100644 index 00000000..8cdbb2b4 --- /dev/null +++ b/clients/python/src/client/processing_result.py @@ -0,0 +1,13 @@ +import dataclasses + +from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivationStatus + + +@dataclasses.dataclass +class ProcessingResult: + """Result structure from child processess to parent""" + + task_id: str + status: TaskActivationStatus.ValueType + host: str + receive_timestamp: float diff --git a/clients/python/src/constants.py b/clients/python/src/constants.py new file mode 100644 index 00000000..817e25ad --- /dev/null +++ b/clients/python/src/constants.py @@ -0,0 +1,61 @@ +from enum import Enum + +DEFAULT_PROCESSING_DEADLINE = 10 +""" +The fallback/default processing_deadline that tasks +will use if neither the TaskNamespace or Task define a deadline +""" + +DEFAULT_REBALANCE_AFTER = 32 +""" +The number of tasks a worker will process before it +selects a new broker instance. +""" + +DEFAULT_CONSECUTIVE_UNAVAILABLE_ERRORS = 3 +""" +The number of consecutive unavailable errors before the worker will +stop trying to connect to the broker and choose a new one. +""" + +DEFAULT_TEMPORARY_UNAVAILABLE_HOST_TIMEOUT = 20 +""" +The number of seconds to wait before a host is considered available again. +""" + +DEFAULT_WORKER_QUEUE_SIZE = 5 +""" +The size of multiprocessing.Queue used to communicate +with child processes. +""" + +DEFAULT_CHILD_TASK_COUNT = 10000 +""" +The number of tasks a worker child process will process +before being restarted. +""" + +MAX_BACKOFF_SECONDS_WHEN_HOST_UNAVAILABLE = 20 +""" +The maximum number of seconds to wait before retrying RPCs when the host is unavailable. +""" + + +MAX_PARAMETER_BYTES_BEFORE_COMPRESSION = 3000000 # 3MB +""" +The maximum number of bytes before a task parameter is compressed. +""" + +DEFAULT_WORKER_HEALTH_CHECK_SEC_PER_TOUCH = 1.0 +""" +The number of gRPC requests before touching the health check file +""" + + +class CompressionType(Enum): + """ + The type of compression used for task parameters. + """ + + ZSTD = "zstd" + PLAINTEXT = "plaintext" diff --git a/clients/python/src/py.typed b/clients/python/src/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/clients/python/src/registry.py b/clients/python/src/registry.py new file mode 100644 index 00000000..d06f3b56 --- /dev/null +++ b/clients/python/src/registry.py @@ -0,0 +1,271 @@ +from __future__ import annotations + +import datetime +import logging +from collections.abc import Callable +from concurrent import futures +from typing import Any + +import sentry_sdk +from arroyo.backends.kafka import KafkaPayload, KafkaProducer +from arroyo.types import BrokerValue +from arroyo.types import Topic as ArroyoTopic +from django.conf import settings +from sentry.conf.types.kafka_definition import Topic +from sentry.silo.base import SiloMode +from sentry.taskworker.constants import DEFAULT_PROCESSING_DEADLINE, CompressionType +from sentry.taskworker.retry import Retry +from sentry.taskworker.router import TaskRouter +from sentry.taskworker.silolimiter import TaskSiloLimit +from sentry.taskworker.task import P, R, Task +from sentry.utils import metrics +from sentry.utils.arroyo_producer import SingletonProducer, get_arroyo_producer +from sentry.utils.imports import import_string +from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation +from sentry_sdk.consts import OP, SPANDATA + +logger = logging.getLogger(__name__) + +ProducerFuture = futures.Future[BrokerValue[KafkaPayload]] + + +class TaskNamespace: + """ + Task namespaces link topics, config and default retry mechanics together + All tasks within a namespace are stored in the same topic and run by shared + worker pool. + """ + + def __init__( + self, + name: str, + router: TaskRouter, + retry: Retry | None, + expires: int | datetime.timedelta | None = None, + processing_deadline_duration: int = DEFAULT_PROCESSING_DEADLINE, + app_feature: str | None = None, + ): + self.name = name + self.router = router + self.default_retry = retry + self.default_expires = expires # seconds + self.default_processing_deadline_duration = processing_deadline_duration # seconds + self.app_feature = app_feature or name + self._registered_tasks: dict[str, Task[Any, Any]] = {} + self._producers: dict[Topic, SingletonProducer] = {} + + def get(self, name: str) -> Task[Any, Any]: + """ + Get a registered task by name + + Raises KeyError when an unknown task is provided. + """ + if name not in self._registered_tasks: + raise KeyError(f"No task registered with the name {name}. Check your imports") + return self._registered_tasks[name] + + def contains(self, name: str) -> bool: + """ + Check if a task name has been registered + """ + return name in self._registered_tasks + + @property + def topic(self) -> Topic: + return self.router.route_namespace(self.name) + + def register( + self, + *, + name: str, + retry: Retry | None = None, + expires: int | datetime.timedelta | None = None, + processing_deadline_duration: int | datetime.timedelta | None = None, + at_most_once: bool = False, + wait_for_delivery: bool = False, + compression_type: CompressionType = CompressionType.PLAINTEXT, + silo_mode: SiloMode | None = None, + ) -> Callable[[Callable[P, R]], Task[P, R]]: + """ + Register a task. + + Applied as a decorator to functions to enable them to be run + asynchronously via taskworkers. + + Parameters + ---------- + + name: str + The name of the task. This is serialized and must be stable across deploys. + retry: Retry | None + The retry policy for the task. If none and at_most_once is not enabled + the Task namespace default retry policy will be used. + expires: int | datetime.timedelta + The number of seconds a task activation is valid for. After this + duration the activation will be discarded and not executed. + at_most_once : bool + Enable at-most-once execution. Tasks with `at_most_once` cannot + define retry policies, and use a worker side idempotency key to + prevent processing deadline based retries. + wait_for_delivery: bool + If true, the task will wait for the delivery report to be received + before returning. + compression_type: CompressionType + The compression type to use to compress the task parameters. + """ + + def wrapped(func: Callable[P, R]) -> Task[P, R]: + task_retry = retry + if not at_most_once: + task_retry = retry or self.default_retry + task = Task( + name=name, + func=func, + namespace=self, + retry=task_retry, + expires=expires or self.default_expires, + processing_deadline_duration=( + processing_deadline_duration or self.default_processing_deadline_duration + ), + at_most_once=at_most_once, + wait_for_delivery=wait_for_delivery, + compression_type=compression_type, + ) + if silo_mode: + silo_limiter = TaskSiloLimit(silo_mode) + task = silo_limiter(task) + # TODO(taskworker) tasks should be registered into the registry + # so that we can ensure task names are globally unique + self._registered_tasks[name] = task + return task + + return wrapped + + def _handle_produce_future(self, future: ProducerFuture, tags: dict[str, str]) -> None: + if future.cancelled(): + metrics.incr("taskworker.registry.send_task.cancelled", tags=tags) + elif future.exception(1): + # this does not block since this callback only gets run when the future is finished and exception is set + metrics.incr("taskworker.registry.send_task.failed", tags=tags) + else: + metrics.incr("taskworker.registry.send_task.success", tags=tags) + + def send_task(self, activation: TaskActivation, wait_for_delivery: bool = False) -> None: + topic = self.router.route_namespace(self.name) + + with sentry_sdk.start_span( + op=OP.QUEUE_PUBLISH, + name=activation.taskname, + origin="taskworker", + ) as span: + span.set_data(SPANDATA.MESSAGING_DESTINATION_NAME, activation.namespace) + span.set_data(SPANDATA.MESSAGING_MESSAGE_ID, activation.id) + span.set_data(SPANDATA.MESSAGING_SYSTEM, "taskworker") + + produce_future = self._producer(topic).produce( + ArroyoTopic(name=topic.value), + KafkaPayload(key=None, value=activation.SerializeToString(), headers=[]), + ) + + metrics.incr( + "taskworker.registry.send_task.scheduled", + tags={ + "namespace": activation.namespace, + "taskname": activation.taskname, + "topic": topic.value, + }, + ) + # We know this type is futures.Future, but cannot assert so, + # because it is also mock.Mock in tests. + produce_future.add_done_callback( # type:ignore[union-attr] + lambda future: self._handle_produce_future( + future=future, + tags={ + "namespace": activation.namespace, + "taskname": activation.taskname, + "topic": topic.value, + }, + ) + ) + if wait_for_delivery: + try: + produce_future.result(timeout=10) + except Exception: + logger.exception("Failed to wait for delivery") + + def _producer(self, topic: Topic) -> SingletonProducer: + if topic not in self._producers: + + def factory() -> KafkaProducer: + return get_arroyo_producer(f"sentry.taskworker.{topic.value}", topic) + + self._producers[topic] = SingletonProducer(factory, max_futures=1000) + return self._producers[topic] + + +class TaskRegistry: + """ + Registry of all namespaces. + + The TaskRegistry is responsible for handling namespace -> topic resolution + during startup. + """ + + def __init__(self) -> None: + self._namespaces: dict[str, TaskNamespace] = {} + self._router = self._build_router() + + def _build_router(self) -> TaskRouter: + router_name: str = settings.TASKWORKER_ROUTER + router_class = import_string(router_name) + router = router_class() + assert hasattr(router, "route_namespace") + + return router + + def contains(self, name: str) -> bool: + return name in self._namespaces + + def get(self, name: str) -> TaskNamespace: + """Fetch a namespace by name.""" + if name not in self._namespaces: + raise KeyError(f"No task namespace with the name {name}") + return self._namespaces[name] + + def get_task(self, namespace: str, task: str) -> Task[Any, Any]: + """Fetch a task by namespace and name.""" + return self.get(namespace).get(task) + + def create_namespace( + self, + name: str, + *, + retry: Retry | None = None, + expires: int | datetime.timedelta | None = None, + processing_deadline_duration: int = DEFAULT_PROCESSING_DEADLINE, + app_feature: str | None = None, + ) -> TaskNamespace: + """ + Create a task namespace. + + Namespaces are mapped onto topics through the configured router allowing + infrastructure to be scaled based on a region's requirements. + + Namespaces can define default behavior for tasks defined within a namespace. + """ + if name in self._namespaces: + raise ValueError(f"Task namespace with name {name} already exists.") + namespace = TaskNamespace( + name=name, + router=self._router, + retry=retry, + expires=expires, + processing_deadline_duration=processing_deadline_duration, + app_feature=app_feature, + ) + self._namespaces[name] = namespace + + return namespace + + +taskregistry = TaskRegistry() diff --git a/clients/python/src/retry.py b/clients/python/src/retry.py new file mode 100644 index 00000000..68d3f210 --- /dev/null +++ b/clients/python/src/retry.py @@ -0,0 +1,107 @@ +from __future__ import annotations + +from enum import Enum +from multiprocessing.context import TimeoutError + +from sentry.taskworker.state import current_task +from sentry.utils import metrics +from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( + ON_ATTEMPTS_EXCEEDED_DEADLETTER, + ON_ATTEMPTS_EXCEEDED_DISCARD, + OnAttemptsExceeded, + RetryState, +) + + +class RetryTaskError(Exception): + """ + Exception that tasks can raise to indicate that the current task activation + should be retried. + """ + + +class NoRetriesRemainingError(RetryTaskError): + """ + Exception that is raised by retry helper methods to signal to tasks that + the current attempt is terminal and there won't be any further retries. + """ + + +class LastAction(Enum): + Deadletter = 1 + Discard = 2 + + def to_proto(self) -> OnAttemptsExceeded.ValueType: + if self == LastAction.Deadletter: + return ON_ATTEMPTS_EXCEEDED_DEADLETTER + if self == LastAction.Discard: + return ON_ATTEMPTS_EXCEEDED_DISCARD + raise ValueError(f"Unknown LastAction: {self}") + + +def retry_task(exc: Exception | None = None, raise_on_no_retries: bool = True) -> None: + """ + Helper for triggering retry errors. + If all retries have been consumed, this will raise a + sentry.taskworker.retry.NoRetriesRemaining + """ + current = current_task() + if current and not current.retries_remaining: + metrics.incr("taskworker.retry.no_retries_remaining") + if raise_on_no_retries: + raise NoRetriesRemainingError() + else: + return + raise RetryTaskError() + + +class Retry: + """Used with tasks to define the retry policy for a task""" + + def __init__( + self, + *, + times: int = 1, + on: tuple[type[BaseException], ...] | None = None, + ignore: tuple[type[BaseException], ...] | None = None, + times_exceeded: LastAction = LastAction.Discard, + delay: int | None = None, + ): + self._times = times + self._allowed_exception_types: tuple[type[BaseException], ...] = on or () + self._denied_exception_types: tuple[type[BaseException], ...] = ignore or () + self._times_exceeded = times_exceeded + self._delay = delay + + def max_attempts_reached(self, state: RetryState) -> bool: + # We subtract one, as attempts starts at 0, but `times` + # starts at 1. + return state.attempts >= (self._times - 1) + + def should_retry(self, state: RetryState, exc: Exception) -> bool: + # If there are no retries remaining we should not retry + if self.max_attempts_reached(state): + return False + + # Explicit RetryTaskError with attempts left. + if isinstance(exc, RetryTaskError): + return True + + # No retries for types on the ignore list + if isinstance(exc, self._denied_exception_types): + return False + + # In the retry allow list or processing deadline is exceeded + # When processing deadline is exceeded, the subprocess raises a TimeoutError + if isinstance(exc, (TimeoutError, self._allowed_exception_types)): + return True + + return False + + def initial_state(self) -> RetryState: + return RetryState( + attempts=0, + max_attempts=self._times, + on_attempts_exceeded=self._times_exceeded.to_proto(), + delay_on_retry=self._delay, + ) diff --git a/clients/python/src/router.py b/clients/python/src/router.py new file mode 100644 index 00000000..02ed1474 --- /dev/null +++ b/clients/python/src/router.py @@ -0,0 +1,5 @@ +from typing import Protocol + + +class TaskRouter(Protocol): + def route_namespace(self, name: str) -> str: ... diff --git a/clients/python/src/scheduler/__init__.py b/clients/python/src/scheduler/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/clients/python/src/scheduler/config.py b/clients/python/src/scheduler/config.py new file mode 100644 index 00000000..d09096b5 --- /dev/null +++ b/clients/python/src/scheduler/config.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +import dataclasses +from collections.abc import Mapping +from datetime import timedelta +from typing import TypedDict + + +@dataclasses.dataclass +class crontab: + """ + crontab schedule value object + + Used in configuration to define a task schedule. + + :see sentry.taskworker.scheduler.schedules.CrontabSchedule for more details. + """ + + minute: str = "*" + hour: str = "*" + day_of_week: str = "*" + day_of_month: str = "*" + month_of_year: str = "*" + + def __str__(self) -> str: + return ( + f"{self.minute} {self.hour} {self.day_of_month} {self.month_of_year} {self.day_of_week}" + ) + + +class ScheduleConfig(TypedDict): + """The schedule definition for an individual task.""" + + task: str + schedule: timedelta | crontab + + +ScheduleConfigMap = Mapping[str, ScheduleConfig] +"""A collection of schedule configuration, usually defined in application configuration""" diff --git a/clients/python/src/scheduler/scheduler.py b/clients/python/src/scheduler/scheduler.py new file mode 100644 index 00000000..201b1ca6 --- /dev/null +++ b/clients/python/src/scheduler/scheduler.py @@ -0,0 +1,292 @@ +from __future__ import annotations + +import heapq +import logging +from collections.abc import Mapping +from datetime import datetime, timedelta +from typing import TYPE_CHECKING, Any + +from django.utils import timezone +from redis.client import StrictRedis +from rediscluster import RedisCluster +from sentry.conf.types.taskworker import ScheduleConfig, crontab +from sentry.taskworker.app import TaskworkerApp +from sentry.taskworker.scheduler.schedules import CrontabSchedule, Schedule, TimedeltaSchedule +from sentry.taskworker.task import Task +from sentry.utils import metrics +from sentry_sdk import capture_exception +from sentry_sdk.crons import MonitorStatus, capture_checkin + +logger = logging.getLogger("taskworker.scheduler") + +if TYPE_CHECKING: + from sentry_sdk._types import MonitorConfig + + +class RunStorage: + """ + Storage interface for tracking the last run time of tasks. + This is split out from `ScheduleRunner` to allow us to change storage + in the future, or adapt taskworkers for other applications should we need to. + """ + + def __init__(self, redis: RedisCluster[str] | StrictRedis[str]) -> None: + self._redis = redis + + def _make_key(self, taskname: str) -> str: + return f"tw:scheduler:{taskname}" + + def set(self, taskname: str, next_runtime: datetime) -> bool: + """ + Record a spawn time for a task. + The next_runtime parameter indicates when the record should expire, + and a task can be spawned again. + + Returns False when the key is set and a task should not be spawned. + """ + now = timezone.now() + # next_runtime & now could be the same second, and redis gets sad if ex=0 + duration = max(int((next_runtime - now).total_seconds()), 1) + + result = self._redis.set(self._make_key(taskname), now.isoformat(), ex=duration, nx=True) + return bool(result) + + def read(self, taskname: str) -> datetime | None: + """ + Retrieve the last run time of a task + Returns None if last run time has expired or is unknown. + """ + result = self._redis.get(self._make_key(taskname)) + if result: + return datetime.fromisoformat(result) + + metrics.incr("taskworker.scheduler.run_storage.read.miss", tags={"taskname": taskname}) + return None + + def read_many(self, tasknames: list[str]) -> Mapping[str, datetime | None]: + """ + Retreive last run times in bulk + """ + values = self._redis.mget([self._make_key(taskname) for taskname in tasknames]) + run_times = { + taskname: datetime.fromisoformat(value) if value else None + for taskname, value in zip(tasknames, values) + } + return run_times + + def delete(self, taskname: str) -> None: + """remove a task key - mostly for testing.""" + self._redis.delete(self._make_key(taskname)) + + +class ScheduleEntry: + """An individual task that can be scheduled to be run.""" + + def __init__(self, *, key: str, task: Task[Any, Any], schedule: timedelta | crontab) -> None: + self._key = key + self._task = task + scheduler: Schedule + if isinstance(schedule, crontab): + scheduler = CrontabSchedule(task.fullname, schedule) + else: + scheduler = TimedeltaSchedule(schedule) + self._schedule = scheduler + self._last_run: datetime | None = None + + def __lt__(self, other: ScheduleEntry) -> bool: + # Secondary sorting for heapq when remaining time is the same + return self.fullname < other.fullname + + def __repr__(self) -> str: + last_run = self._last_run.isoformat() if self._last_run else None + remaining_seconds = self.remaining_seconds() + + return f"" + + @property + def fullname(self) -> str: + return self._task.fullname + + @property + def namespace(self) -> str: + return self._task.namespace.name + + @property + def taskname(self) -> str: + return self._task.name + + def set_last_run(self, last_run: datetime | None) -> None: + self._last_run = last_run + + def is_due(self) -> bool: + return self._schedule.is_due(self._last_run) + + def remaining_seconds(self) -> int: + return self._schedule.remaining_seconds(self._last_run) + + def runtime_after(self, start: datetime) -> datetime: + return self._schedule.runtime_after(start) + + def delay_task(self) -> None: + monitor_config = self.monitor_config() + headers: dict[str, Any] = {} + if monitor_config: + check_in_id = capture_checkin( + monitor_slug=self._key, + monitor_config=monitor_config, + status=MonitorStatus.IN_PROGRESS, + ) + headers = { + "sentry-monitor-check-in-id": check_in_id, + "sentry-monitor-slug": self._key, + } + + # We don't need every task linked back to the scheduler trace + headers["sentry-propagate-traces"] = False + + self._task.apply_async(headers=headers) + + def monitor_config(self) -> MonitorConfig | None: + checkin_config: MonitorConfig = { + "schedule": {}, + "timezone": timezone.get_current_timezone_name(), + } + if isinstance(self._schedule, CrontabSchedule): + checkin_config["schedule"]["type"] = "crontab" + checkin_config["schedule"]["value"] = self._schedule.monitor_value() + elif isinstance(self._schedule, TimedeltaSchedule): + (interval_value, interval_units) = self._schedule.monitor_interval() + # Monitors does not support intervals less than 1 minute. + if interval_units == "second": + return None + + checkin_config["schedule"]["type"] = "interval" + checkin_config["schedule"]["value"] = interval_value + checkin_config["schedule"]["unit"] = interval_units + + return checkin_config + + +class ScheduleRunner: + """ + A task scheduler that a command run process can use to spawn tasks + based on their schedules. + + Contains a collection of ScheduleEntry objects which are composed + using `ScheduleRunner.add()`. Once the scheduler is built, `tick()` + is used in a while loop to spawn tasks and sleep. + """ + + def __init__(self, app: TaskworkerApp, run_storage: RunStorage) -> None: + self._entries: list[ScheduleEntry] = [] + self._app = app + self._run_storage = run_storage + self._heap: list[tuple[int, ScheduleEntry]] = [] + + def add(self, key: str, task_config: ScheduleConfig) -> None: + """Add a scheduled task to the runner.""" + try: + (namespace, taskname) = task_config["task"].split(":") + except ValueError: + raise ValueError("Invalid task name. Must be in the format namespace:taskname") + + task = self._app.taskregistry.get_task(namespace, taskname) + entry = ScheduleEntry(key=key, task=task, schedule=task_config["schedule"]) + self._entries.append(entry) + self._heap = [] + + def log_startup(self) -> None: + task_names = [entry.fullname for entry in self._entries] + logger.info("taskworker.scheduler.startup", extra={"tasks": task_names}) + + def tick(self) -> float: + """ + Check if any tasks are due to run at current_time, and spawn them. + + Returns the number of seconds to sleep until the next task is due. + """ + self._update_heap() + + if not self._heap: + logger.warning("taskworker.scheduler.no_heap") + return 60 + + while True: + # Peek at the top, and if it is due, pop, spawn and update last run time + _, entry = self._heap[0] + if entry.is_due(): + heapq.heappop(self._heap) + try: + self._try_spawn(entry) + except Exception as e: + # Trap errors from spawning/update state so that the heap stays consistent. + capture_exception(e) + heapq.heappush(self._heap, (entry.remaining_seconds(), entry)) + continue + else: + # The top of the heap isn't ready, break for sleep + break + + return self._heap[0][0] + + def _try_spawn(self, entry: ScheduleEntry) -> None: + now = timezone.now() + next_runtime = entry.runtime_after(now) + if self._run_storage.set(entry.fullname, next_runtime): + entry.delay_task() + entry.set_last_run(now) + + logger.debug("taskworker.scheduler.delay_task", extra={"fullname": entry.fullname}) + metrics.incr( + "taskworker.scheduler.delay_task", + tags={ + "taskname": entry.taskname, + "namespace": entry.namespace, + }, + sample_rate=1.0, + ) + else: + # We were not able to set a key, load last run from storage. + run_state = self._run_storage.read(entry.fullname) + entry.set_last_run(run_state) + + logger.info( + "taskworker.scheduler.sync_with_storage", + extra={ + "taskname": entry.taskname, + "namespace": entry.namespace, + "last_runtime": run_state.isoformat() if run_state else None, + }, + ) + metrics.incr( + "taskworker.scheduler.sync_with_storage", + tags={"taskname": entry.taskname, "namespace": entry.namespace}, + ) + + def _update_heap(self) -> None: + """update the heap to reflect current remaining time""" + if not self._heap: + self._load_last_run() + + heap_items = [(item.remaining_seconds(), item) for item in self._entries] + heapq.heapify(heap_items) + self._heap = heap_items + + def _load_last_run(self) -> None: + """ + load last_run state from storage + + We synchronize each time the schedule set is modified and + then incrementally as tasks spawn attempts are made. + """ + last_run_times = self._run_storage.read_many([item.fullname for item in self._entries]) + for item in self._entries: + last_run = last_run_times.get(item.fullname, None) + item.set_last_run(last_run) + logger.info( + "taskworker.scheduler.load_last_run", + extra={ + "entry_count": len(self._entries), + "loaded_count": len(last_run_times), + }, + ) diff --git a/clients/python/src/scheduler/schedules.py b/clients/python/src/scheduler/schedules.py new file mode 100644 index 00000000..5470a556 --- /dev/null +++ b/clients/python/src/scheduler/schedules.py @@ -0,0 +1,195 @@ +from __future__ import annotations + +import abc +import logging +from datetime import datetime, timedelta +from typing import TYPE_CHECKING + +from cronsim import CronSim, CronSimError +from django.utils import timezone +from sentry.conf.types.taskworker import crontab + +if TYPE_CHECKING: + from sentry_sdk._types import MonitorConfigScheduleUnit + +logger = logging.getLogger("taskworker.scheduler") + + +class Schedule(metaclass=abc.ABCMeta): + """Interface for scheduling tasks to run at specific times.""" + + @abc.abstractmethod + def is_due(self, last_run: datetime | None = None) -> bool: + """ + Check if the schedule is due to run again based on last_run. + """ + + @abc.abstractmethod + def remaining_seconds(self, last_run: datetime | None = None) -> int: + """ + Get the remaining seconds until the schedule should run again. + """ + + @abc.abstractmethod + def runtime_after(self, start: datetime) -> datetime: + """ + Get the next scheduled time after `start` + """ + + +class TimedeltaSchedule(Schedule): + """ + Task schedules defined as `datetime.timedelta` intervals + + If a timedelta interval loses it's last_run state, it will assume + that at least one interval has been missed, and it will become due immediately. + + After the first spawn, the schedule will align to to the interval's duration. + """ + + def __init__(self, delta: timedelta) -> None: + self._delta = delta + if delta.microseconds: + raise ValueError("microseconds are not supported") + if delta.total_seconds() < 0: + raise ValueError("interval must be at least one second") + + def monitor_interval(self) -> tuple[int, MonitorConfigScheduleUnit]: + time_units: tuple[tuple[MonitorConfigScheduleUnit, float], ...] = ( + ("day", 60 * 60 * 24.0), + ("hour", 60 * 60.0), + ("minute", 60.0), + ) + + seconds = self._delta.total_seconds() + for unit, divider in time_units: + if seconds >= divider: + interval = int(seconds / divider) + return (interval, unit) + + return (int(seconds), "second") + + def is_due(self, last_run: datetime | None = None) -> bool: + """Check if the schedule is due to run again based on last_run.""" + if last_run is None: + return True + remaining = self.remaining_seconds(last_run) + return remaining <= 0 + + def remaining_seconds(self, last_run: datetime | None = None) -> int: + """The number of seconds remaining until the next task should spawn""" + if last_run is None: + return 0 + # floor to timestamp as microseconds are not relevant + now = int(timezone.now().timestamp()) + last_run_ts = int(last_run.timestamp()) + + seconds_remaining = self._delta.total_seconds() - (now - last_run_ts) + return max(int(seconds_remaining), 0) + + def runtime_after(self, start: datetime) -> datetime: + """Get the next time a task should run after start""" + return start + self._delta + + +class CrontabSchedule(Schedule): + """ + Task schedules defined as crontab expressions. + + crontab expressions naturally align to clock intervals. For example + an interval of `crontab(minute="*/2")` will spawn on the even numbered minutes. + + If a crontab schedule loses its last_run state, it will assume that + one or more intervals have been missed, and it will align to the next + interval window. Missed intervals will not be recovered. + + For tasks with very long intervals, you should consider the impact of a deploy + or scheduler restart causing a missed window. Consider a more frequent interval + to help spread load out and reduce the impacts of missed intervals. + """ + + def __init__(self, name: str, crontab: crontab) -> None: + self._crontab = crontab + self._name = name + try: + self._cronsim = CronSim(str(crontab), timezone.now()) + except CronSimError as e: + raise ValueError(f"crontab expression {self._crontab} is invalid") from e + + def monitor_value(self) -> str: + """Get the crontab expression as a string""" + return str(self._crontab) + + def is_due(self, last_run: datetime | None = None) -> bool: + """Check if the schedule is due to run again based on last_run.""" + if last_run is None: + last_run = timezone.now() - timedelta(minutes=1) + remaining = self.remaining_seconds(last_run) + return remaining <= 0 + + def remaining_seconds(self, last_run: datetime | None = None) -> int: + """ + Get the number of seconds until this schedule is due again + + Use the current time to find the next schedule time + """ + if last_run is None: + last_run = timezone.now() - timedelta(minutes=1) + + # This could result in missed beats, or increased load on redis. + last_run = last_run.replace(second=0, microsecond=0) + now = timezone.now().replace(second=0, microsecond=0) + + # A future last_run means we should wait until the + # next scheduled time, and then we can try again. + # we could be competing with another scheduler, or + # missing beats. + if last_run > now: + logger.warning( + "taskworker.scheduler.future_value", + extra={ + "task": self._name, + "last_run": last_run, + "now": now, + }, + ) + next_run = self._advance(last_run + timedelta(minutes=1)) + return int(next_run.timestamp() - now.timestamp()) + + # If last run is in the past, see if the next runtime + # is in the future. + if last_run < now: + next_run = self._advance(last_run + timedelta(minutes=1)) + # Our next runtime is in the future, or now + if next_run >= now: + return int(next_run.timestamp() - now.timestamp()) + + # still in the past, we missed an interval :( + missed = next_run + next_run = self._advance(now) + logger.warning( + "taskworker.scheduler.missed_interval", + extra={ + "task": self._name, + "last_run": last_run.isoformat(), + "missed": missed.isoformat(), + "now": now.isoformat(), + "next_run": next_run.isoformat(), + }, + ) + return int(next_run.timestamp() - now.timestamp()) + + # last_run == now, we are on the beat, find the next interval + next_run = self._advance(now + timedelta(minutes=1)) + + return int(next_run.timestamp() - now.timestamp()) + + def _advance(self, dt: datetime) -> datetime: + self._cronsim.dt = dt + self._cronsim.advance() + return self._cronsim.dt + + def runtime_after(self, start: datetime) -> datetime: + """Get the next time a task should be spawned after `start`""" + start = start.replace(second=0, microsecond=0) + timedelta(minutes=1) + return self._advance(start) diff --git a/clients/python/src/state.py b/clients/python/src/state.py new file mode 100644 index 00000000..9217262c --- /dev/null +++ b/clients/python/src/state.py @@ -0,0 +1,42 @@ +import dataclasses +import threading + +from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation + +_current_state = threading.local() + + +@dataclasses.dataclass +class CurrentTaskState: + id: str + namespace: str + taskname: str + attempt: int + processing_deadline_duration: int + retries_remaining: bool + + +def current_task() -> CurrentTaskState | None: + if not hasattr(_current_state, "state"): + _current_state.state = None + + return _current_state.state + + +def set_current_task(activation: TaskActivation) -> None: + retry_state = activation.retry_state + state = CurrentTaskState( + id=activation.id, + namespace=activation.namespace, + taskname=activation.taskname, + attempt=activation.retry_state.attempts, + # We subtract one, as attempts starts at 0, but `max_attempts` + # starts at 1. + retries_remaining=(retry_state.attempts < (retry_state.max_attempts - 1)), + processing_deadline_duration=activation.processing_deadline_duration, + ) + _current_state.state = state + + +def clear_current_task() -> None: + _current_state.state = None diff --git a/clients/python/src/task.py b/clients/python/src/task.py new file mode 100644 index 00000000..293c0eae --- /dev/null +++ b/clients/python/src/task.py @@ -0,0 +1,254 @@ +from __future__ import annotations + +import base64 +import datetime +import time +from collections.abc import Callable, Collection, Mapping, MutableMapping +from functools import update_wrapper +from typing import TYPE_CHECKING, Any, Generic, ParamSpec, TypeVar +from uuid import uuid4 + +import orjson +import sentry_sdk +import zstandard as zstd +from django.conf import settings +from django.utils import timezone +from google.protobuf.timestamp_pb2 import Timestamp +from sentry.taskworker.constants import ( + DEFAULT_PROCESSING_DEADLINE, + MAX_PARAMETER_BYTES_BEFORE_COMPRESSION, + CompressionType, +) +from sentry.taskworker.retry import Retry +from sentry.utils import metrics +from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( + ON_ATTEMPTS_EXCEEDED_DISCARD, + RetryState, + TaskActivation, +) + +if TYPE_CHECKING: + from sentry.taskworker.registry import TaskNamespace + + +P = ParamSpec("P") +R = TypeVar("R") + + +class Task(Generic[P, R]): + def __init__( + self, + name: str, + func: Callable[P, R], + namespace: TaskNamespace, + retry: Retry | None = None, + expires: int | datetime.timedelta | None = None, + processing_deadline_duration: int | datetime.timedelta | None = None, + at_most_once: bool = False, + wait_for_delivery: bool = False, + compression_type: CompressionType = CompressionType.PLAINTEXT, + ): + self.name = name + self._func = func + self._namespace = namespace + self._expires = expires + self._processing_deadline_duration = ( + processing_deadline_duration or DEFAULT_PROCESSING_DEADLINE + ) + if at_most_once and retry: + raise AssertionError( + """ + You cannot enable at_most_once and have retries defined. + Having retries enabled means that a task supports being executed + multiple times and thus cannot be idempotent. + """ + ) + self._retry = retry + self.at_most_once = at_most_once + self.wait_for_delivery = wait_for_delivery + self.compression_type = compression_type + update_wrapper(self, func) + + @property + def fullname(self) -> str: + return f"{self._namespace.name}:{self.name}" + + @property + def namespace(self) -> TaskNamespace: + return self._namespace + + @property + def retry(self) -> Retry | None: + return self._retry + + def __call__(self, *args: P.args, **kwargs: P.kwargs) -> R: + """ + Call the task function immediately. + """ + return self._func(*args, **kwargs) + + def delay(self, *args: P.args, **kwargs: P.kwargs) -> None: + """ + Schedule a task to run later with a set of arguments. + + The provided parameters will be JSON encoded and stored within + a `TaskActivation` protobuf that is appended to kafka + """ + self.apply_async(args=args, kwargs=kwargs) + + def apply_async( + self, + args: Any | None = None, + kwargs: Any | None = None, + headers: MutableMapping[str, Any] | None = None, + expires: int | datetime.timedelta | None = None, + countdown: int | datetime.timedelta | None = None, + **options: Any, + ) -> None: + """ + Schedule a task to run later with a set of arguments. + + The provided parameters will be JSON encoded and stored within + a `TaskActivation` protobuf that is appended to kafka. + """ + if args is None: + args = [] + if kwargs is None: + kwargs = {} + + self._signal_send(task=self, args=args, kwargs=kwargs) + + # Generate an activation even if we're in immediate mode to + # catch serialization errors in tests. + activation = self.create_activation( + args=args, kwargs=kwargs, headers=headers, expires=expires, countdown=countdown + ) + if settings.TASKWORKER_ALWAYS_EAGER: + self._func(*args, **kwargs) + else: + # TODO(taskworker) promote parameters to headers + self._namespace.send_task( + activation, + wait_for_delivery=self.wait_for_delivery, + ) + + def _signal_send(self, task: Task[Any, Any], args: Any, kwargs: Any) -> None: + """ + This method is a stub that sentry.testutils.task_runner.BurstRunner or other testing + hooks can monkeypatch to capture tasks that are being produced. + """ + pass + + def create_activation( + self, + args: Collection[Any], + kwargs: Mapping[Any, Any], + headers: MutableMapping[str, Any] | None = None, + expires: int | datetime.timedelta | None = None, + countdown: int | datetime.timedelta | None = None, + ) -> TaskActivation: + received_at = Timestamp() + received_at.FromDatetime(timezone.now()) + + processing_deadline = self._processing_deadline_duration + if isinstance(processing_deadline, datetime.timedelta): + processing_deadline = int(processing_deadline.total_seconds()) + + if expires is None: + expires = self._expires + if isinstance(expires, datetime.timedelta): + expires = int(expires.total_seconds()) + + if isinstance(countdown, datetime.timedelta): + countdown = int(countdown.total_seconds()) + + if not headers: + headers = {} + + if headers.get("sentry-propagate-traces", True): + headers = { + "sentry-trace": sentry_sdk.get_traceparent() or "", + "baggage": sentry_sdk.get_baggage() or "", + **headers, + } + + # Monitor config is patched in by the sentry_sdk + # however, taskworkers do not support the nested object, + # nor do they use it when creating checkins. + if "sentry-monitor-config" in headers: + del headers["sentry-monitor-config"] + + for key, value in headers.items(): + if value is None or isinstance(value, (str, bytes, int, bool, float)): + headers[key] = str(value) + else: + raise ValueError( + "Only scalar header values are supported. " + f"The `{key}` header value is of type {type(value)}" + ) + + parameters_json = orjson.dumps({"args": args, "kwargs": kwargs}) + if ( + len(parameters_json) > MAX_PARAMETER_BYTES_BEFORE_COMPRESSION + or self.compression_type == CompressionType.ZSTD + ): + # Worker uses this header to determine if the parameters are decompressed + headers["compression-type"] = CompressionType.ZSTD.value + start_time = time.perf_counter() + parameters_str = base64.b64encode(zstd.compress(parameters_json)).decode("utf8") + end_time = time.perf_counter() + + metrics.distribution( + "taskworker.producer.compressed_parameters_size", + len(parameters_str), + tags={ + "namespace": self._namespace.name, + "taskname": self.name, + "topic": self._namespace.topic.value, + }, + ) + metrics.distribution( + "taskworker.producer.compression_time", + end_time - start_time, + tags={ + "namespace": self._namespace.name, + "taskname": self.name, + "topic": self._namespace.topic.value, + }, + ) + else: + parameters_str = parameters_json.decode("utf8") + + return TaskActivation( + id=uuid4().hex, + namespace=self._namespace.name, + taskname=self.name, + headers=headers, + parameters=parameters_str, + retry_state=self._create_retry_state(), + received_at=received_at, + processing_deadline_duration=processing_deadline, + expires=expires, + delay=countdown, + ) + + def _create_retry_state(self) -> RetryState: + retry = self.retry or self._namespace.default_retry or None + if not retry or self.at_most_once: + # If the task and namespace have no retry policy, + # or can only be attempted once make a single + # attempt and then discard the task. + return RetryState( + attempts=0, + max_attempts=1, + on_attempts_exceeded=ON_ATTEMPTS_EXCEEDED_DISCARD, + at_most_once=self.at_most_once, + ) + return retry.initial_state() + + def should_retry(self, state: RetryState, exc: Exception) -> bool: + # No retry policy means no retries. + retry = self.retry + if not retry: + return False + return retry.should_retry(state, exc) diff --git a/clients/python/src/worker/__init__.py b/clients/python/src/worker/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/clients/python/src/worker/worker.py b/clients/python/src/worker/worker.py new file mode 100644 index 00000000..e69de29b diff --git a/clients/python/src/worker/workerchild.py b/clients/python/src/worker/workerchild.py new file mode 100644 index 00000000..d4cf1309 --- /dev/null +++ b/clients/python/src/worker/workerchild.py @@ -0,0 +1,470 @@ +from __future__ import annotations + +import base64 +import contextlib +import logging +import queue +import signal +import time +from collections.abc import Callable, Generator +from multiprocessing.synchronize import Event +from types import FrameType +from typing import Any + +# XXX: Don't import any modules that will import django here, do those within child_process +import orjson +import sentry_sdk +import zstandard as zstd +from sentry.taskworker.client.inflight_task_activation import InflightTaskActivation +from sentry.taskworker.client.processing_result import ProcessingResult +from sentry.taskworker.constants import CompressionType +from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( + TASK_ACTIVATION_STATUS_COMPLETE, + TASK_ACTIVATION_STATUS_FAILURE, + TASK_ACTIVATION_STATUS_RETRY, + TaskActivation, + TaskActivationStatus, +) +from sentry_sdk.consts import OP, SPANDATA, SPANSTATUS +from sentry_sdk.crons import MonitorStatus, capture_checkin + +logger = logging.getLogger("sentry.taskworker.worker") + + +class ProcessingDeadlineExceeded(BaseException): + pass + + +def child_worker_init(process_type: str) -> None: + """ + Configure django and load task modules for workers + Child worker processes are spawned and don't inherit db + connections or configuration from the parent process. + """ + from sentry.runner import configure + + if process_type == "spawn": + configure() + + +@contextlib.contextmanager +def timeout_alarm( + seconds: int, handler: Callable[[int, FrameType | None], None] +) -> Generator[None]: + """ + Context manager to handle SIGALRM handlers + + To prevent tasks from consuming a worker forever, we set a timeout + alarm that will interrupt tasks that run longer than + their processing_deadline. + """ + original = signal.signal(signal.SIGALRM, handler) + try: + signal.alarm(seconds) + yield + finally: + signal.alarm(0) + signal.signal(signal.SIGALRM, original) + + +def load_parameters(data: str, headers: dict[str, str]) -> dict[str, Any]: + compression_type = headers.get("compression-type", None) + if not compression_type or compression_type == CompressionType.PLAINTEXT.value: + return orjson.loads(data) + elif compression_type == CompressionType.ZSTD.value: + return orjson.loads(zstd.decompress(base64.b64decode(data))) + else: + logger.error( + "Unsupported compression type: %s. Continuing with plaintext.", compression_type + ) + return orjson.loads(data) + + +def status_name(status: TaskActivationStatus.ValueType) -> str: + """Convert a TaskActivationStatus to a human readable name""" + if status == TASK_ACTIVATION_STATUS_COMPLETE: + return "complete" + if status == TASK_ACTIVATION_STATUS_FAILURE: + return "failure" + if status == TASK_ACTIVATION_STATUS_RETRY: + return "retry" + return f"unknown-{status}" + + +def child_process( + app_module: str, + child_tasks: queue.Queue[InflightTaskActivation], + processed_tasks: queue.Queue[ProcessingResult], + shutdown_event: Event, + max_task_count: int | None, + processing_pool_name: str, + process_type: str, +) -> None: + """ + The entrypoint for spawned worker children. + + Any import that could pull in django needs to be put inside this functiona + and not the module root. If modules that include django are imported at + the module level the wrong django settings will be used. + """ + child_worker_init(process_type) + + from sentry.taskworker.app import import_app + from sentry.taskworker.retry import NoRetriesRemainingError + from sentry.taskworker.state import clear_current_task, current_task, set_current_task + from sentry.taskworker.task import Task + from sentry.utils import metrics + from sentry.utils.memory import track_memory_usage + + app = import_app(app_module) + app.load_modules() + taskregistry = app.taskregistry + + def _get_known_task(activation: TaskActivation) -> Task[Any, Any] | None: + if not taskregistry.contains(activation.namespace): + logger.error( + "taskworker.invalid_namespace", + extra={"namespace": activation.namespace, "taskname": activation.taskname}, + ) + return None + + namespace = taskregistry.get(activation.namespace) + if not namespace.contains(activation.taskname): + logger.error( + "taskworker.invalid_taskname", + extra={"namespace": activation.namespace, "taskname": activation.taskname}, + ) + return None + return namespace.get(activation.taskname) + + def run_worker( + child_tasks: queue.Queue[InflightTaskActivation], + processed_tasks: queue.Queue[ProcessingResult], + shutdown_event: Event, + max_task_count: int | None, + processing_pool_name: str, + process_type: str, + ) -> None: + processed_task_count = 0 + + def handle_alarm(signum: int, frame: FrameType | None) -> None: + """ + Handle SIGALRM + + If we hit an alarm in a child, we need to push a result + and terminate the child. + """ + deadline = -1 + current = current_task() + taskname = "unknown" + if current: + taskname = current.taskname + deadline = current.processing_deadline_duration + raise ProcessingDeadlineExceeded( + f"execution deadline of {deadline} seconds exceeded by {taskname}" + ) + + while not shutdown_event.is_set(): + if max_task_count and processed_task_count >= max_task_count: + metrics.incr( + "taskworker.worker.max_task_count_reached", + tags={"count": processed_task_count, "processing_pool": processing_pool_name}, + ) + logger.info( + "taskworker.max_task_count_reached", extra={"count": processed_task_count} + ) + break + + try: + inflight = child_tasks.get(timeout=1.0) + except queue.Empty: + metrics.incr( + "taskworker.worker.child_task_queue_empty", + tags={"processing_pool": processing_pool_name}, + ) + continue + + task_func = _get_known_task(inflight.activation) + if not task_func: + metrics.incr( + "taskworker.worker.unknown_task", + tags={ + "namespace": inflight.activation.namespace, + "taskname": inflight.activation.taskname, + "processing_pool": processing_pool_name, + }, + sample_rate=1.0, + ) + with sentry_sdk.isolation_scope() as scope: + scope.set_tag("taskname", inflight.activation.taskname) + scope.set_tag("namespace", inflight.activation.namespace) + scope.set_tag("processing_pool", processing_pool_name) + scope.set_extra("activation", str(inflight.activation)) + scope.capture_message( + f"Unregistered task {inflight.activation.taskname} was not executed" + ) + + processed_tasks.put( + ProcessingResult( + task_id=inflight.activation.id, + status=TASK_ACTIVATION_STATUS_FAILURE, + host=inflight.host, + receive_timestamp=inflight.receive_timestamp, + ) + ) + continue + + if task_func.at_most_once: + if app.should_attempt_at_most_once(inflight.activation): + metrics.incr( + "taskworker.task.at_most_once.executed", + tags={ + "namespace": inflight.activation.namespace, + "taskname": inflight.activation.taskname, + "processing_pool": processing_pool_name, + }, + ) + else: + metrics.incr( + "taskworker.worker.at_most_once.skipped", + tags={ + "namespace": inflight.activation.namespace, + "taskname": inflight.activation.taskname, + "processing_pool": processing_pool_name, + }, + ) + continue + + set_current_task(inflight.activation) + + next_state = TASK_ACTIVATION_STATUS_FAILURE + # Use time.time() so we can measure against activation.received_at + execution_start_time = time.time() + try: + with timeout_alarm(inflight.activation.processing_deadline_duration, handle_alarm): + _execute_activation(task_func, inflight.activation) + next_state = TASK_ACTIVATION_STATUS_COMPLETE + except ProcessingDeadlineExceeded as err: + with sentry_sdk.isolation_scope() as scope: + scope.fingerprint = [ + "taskworker.processing_deadline_exceeded", + inflight.activation.namespace, + inflight.activation.taskname, + ] + scope.set_transaction_name(inflight.activation.taskname) + sentry_sdk.capture_exception(err) + metrics.incr( + "taskworker.worker.processing_deadline_exceeded", + tags={ + "processing_pool": processing_pool_name, + "namespace": inflight.activation.namespace, + "taskname": inflight.activation.taskname, + }, + ) + next_state = TASK_ACTIVATION_STATUS_FAILURE + except Exception as err: + retry = task_func.retry + captured_error = False + if retry: + if retry.should_retry(inflight.activation.retry_state, err): + logger.info( + "taskworker.task.retry", + extra={ + "namespace": inflight.activation.namespace, + "taskname": inflight.activation.taskname, + "processing_pool": processing_pool_name, + "error": str(err), + }, + ) + next_state = TASK_ACTIVATION_STATUS_RETRY + elif retry.max_attempts_reached(inflight.activation.retry_state): + with sentry_sdk.isolation_scope() as scope: + retry_error = NoRetriesRemainingError( + f"{inflight.activation.taskname} has consumed all of its retries" + ) + retry_error.__cause__ = err + scope.fingerprint = [ + "taskworker.no_retries_remaining", + inflight.activation.namespace, + inflight.activation.taskname, + ] + scope.set_transaction_name(inflight.activation.taskname) + sentry_sdk.capture_exception(retry_error) + captured_error = True + + if not captured_error and next_state != TASK_ACTIVATION_STATUS_RETRY: + sentry_sdk.capture_exception(err) + + clear_current_task() + processed_task_count += 1 + + # Get completion time before pushing to queue, so we can measure queue append time + execution_complete_time = time.time() + with metrics.timer( + "taskworker.worker.processed_tasks.put.duration", + tags={ + "processing_pool": processing_pool_name, + }, + ): + processed_tasks.put( + ProcessingResult( + task_id=inflight.activation.id, + status=next_state, + host=inflight.host, + receive_timestamp=inflight.receive_timestamp, + ) + ) + + record_task_execution( + inflight.activation, + next_state, + execution_start_time, + execution_complete_time, + processing_pool_name, + inflight.host, + ) + + def _execute_activation(task_func: Task[Any, Any], activation: TaskActivation) -> None: + """Invoke a task function with the activation parameters.""" + headers = {k: v for k, v in activation.headers.items()} + parameters = load_parameters(activation.parameters, headers) + + args = parameters.get("args", []) + kwargs = parameters.get("kwargs", {}) + + transaction = sentry_sdk.continue_trace( + environ_or_headers=headers, + op="queue.task.taskworker", + name=activation.taskname, + origin="taskworker", + ) + sampling_context = { + "taskworker": { + "task": activation.taskname, + } + } + with ( + track_memory_usage( + "taskworker.worker.memory_change", + tags={"namespace": activation.namespace, "taskname": activation.taskname}, + ), + sentry_sdk.isolation_scope(), + sentry_sdk.start_transaction(transaction, custom_sampling_context=sampling_context), + ): + transaction.set_data( + "taskworker-task", {"args": args, "kwargs": kwargs, "id": activation.id} + ) + task_added_time = activation.received_at.ToDatetime().timestamp() + # latency attribute needs to be in milliseconds + latency = (time.time() - task_added_time) * 1000 + + with sentry_sdk.start_span( + op=OP.QUEUE_PROCESS, + name=activation.taskname, + origin="taskworker", + ) as span: + span.set_data(SPANDATA.MESSAGING_DESTINATION_NAME, activation.namespace) + span.set_data(SPANDATA.MESSAGING_MESSAGE_ID, activation.id) + span.set_data(SPANDATA.MESSAGING_MESSAGE_RECEIVE_LATENCY, latency) + span.set_data( + SPANDATA.MESSAGING_MESSAGE_RETRY_COUNT, activation.retry_state.attempts + ) + span.set_data(SPANDATA.MESSAGING_SYSTEM, "taskworker") + + # TODO(taskworker) remove this when doing cleanup + # The `__start_time` parameter is spliced into task parameters by + # sentry.celery.SentryTask._add_metadata and needs to be removed + # from kwargs like sentry.tasks.base.instrumented_task does. + if "__start_time" in kwargs: + kwargs.pop("__start_time") + + try: + task_func(*args, **kwargs) + transaction.set_status(SPANSTATUS.OK) + except Exception: + transaction.set_status(SPANSTATUS.INTERNAL_ERROR) + raise + + def record_task_execution( + activation: TaskActivation, + status: TaskActivationStatus.ValueType, + start_time: float, + completion_time: float, + processing_pool_name: str, + taskbroker_host: str, + ) -> None: + task_added_time = activation.received_at.ToDatetime().timestamp() + execution_duration = completion_time - start_time + execution_latency = completion_time - task_added_time + + logger.debug( + "taskworker.task_execution", + extra={ + "taskname": activation.taskname, + "execution_duration": execution_duration, + "execution_latency": execution_latency, + "status": status_name(status), + }, + ) + metrics.incr( + "taskworker.worker.execute_task", + tags={ + "namespace": activation.namespace, + "taskname": activation.taskname, + "status": status_name(status), + "processing_pool": processing_pool_name, + "taskbroker_host": taskbroker_host, + }, + ) + metrics.distribution( + "taskworker.worker.execution_duration", + execution_duration, + tags={ + "namespace": activation.namespace, + "taskname": activation.taskname, + "processing_pool": processing_pool_name, + "taskbroker_host": taskbroker_host, + }, + ) + metrics.distribution( + "taskworker.worker.execution_latency", + execution_latency, + tags={ + "namespace": activation.namespace, + "taskname": activation.taskname, + "processing_pool": processing_pool_name, + "taskbroker_host": taskbroker_host, + }, + ) + + namespace = taskregistry.get(activation.namespace) + metrics.incr( + "taskworker.cogs.usage", + amount=int(execution_duration * 1000), + tags={"feature": namespace.app_feature}, + ) + + if ( + "sentry-monitor-check-in-id" in activation.headers + and "sentry-monitor-slug" in activation.headers + ): + monitor_status = MonitorStatus.ERROR + if status == TASK_ACTIVATION_STATUS_COMPLETE: + monitor_status = MonitorStatus.OK + + capture_checkin( + monitor_slug=activation.headers["sentry-monitor-slug"], + check_in_id=activation.headers["sentry-monitor-check-in-id"], + duration=execution_duration, + status=monitor_status, + ) + + # Run the worker loop + run_worker( + child_tasks, + processed_tasks, + shutdown_event, + max_task_count, + processing_pool_name, + process_type, + ) diff --git a/clients/python/test/__init__.py b/clients/python/test/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/clients/python/test/client/__init__.py b/clients/python/test/client/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/clients/python/test/client/test_client.py b/clients/python/test/client/test_client.py new file mode 100644 index 00000000..2a40e06f --- /dev/null +++ b/clients/python/test/client/test_client.py @@ -0,0 +1,916 @@ +import dataclasses +import random +import string +import time +from collections import defaultdict +from collections.abc import Callable +from pathlib import Path +from typing import Any +from unittest.mock import Mock, patch + +import grpc +import pytest +from google.protobuf.message import Message +from sentry.taskworker.client.client import ( + HealthCheckSettings, + HostTemporarilyUnavailable, + TaskworkerClient, + make_broker_hosts, +) +from sentry.taskworker.client.processing_result import ProcessingResult +from sentry.taskworker.constants import DEFAULT_WORKER_HEALTH_CHECK_SEC_PER_TOUCH +from sentry.testutils.pytest.fixtures import django_db_all +from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( + TASK_ACTIVATION_STATUS_COMPLETE, + TASK_ACTIVATION_STATUS_RETRY, + FetchNextTask, + GetTaskResponse, + SetTaskStatusResponse, + TaskActivation, +) + + +@dataclasses.dataclass +class MockServiceCall: + response: Any + metadata: tuple[tuple[str, str | bytes], ...] | None = None + + +class MockServiceMethod: + """Stub for grpc service methods""" + + def __init__( + self, + path: str, + responses: list[Any], + request_serializer: Callable, + response_deserializer: Callable, + ): + self.path = path + self.request_serializer = request_serializer + self.response_deserializer = response_deserializer + self.responses = responses + + def __call__(self, *args, **kwargs): + """Capture calls and use registered mocks""" + # move the head to the tail + res = self.responses[0] + tail = self.responses[1:] + self.responses = tail + [res] + + if isinstance(res.response, Exception): + raise res.response + return res.response + + def with_call(self, *args, **kwargs): + res = self.responses[0] + if res.metadata: + assert res.metadata == kwargs.get("metadata"), "Metadata mismatch" + if isinstance(res.response, Exception): + raise res.response + return (res.response, None) + + +class MockChannel: + def __init__(self): + self._responses = defaultdict(list) + + def unary_unary( + self, + path: str, + request_serializer: Callable, + response_deserializer: Callable, + *args, + **kwargs, + ): + return MockServiceMethod( + path, self._responses.get(path, []), request_serializer, response_deserializer + ) + + def add_response( + self, + path: str, + resp: Message | Exception, + metadata: tuple[tuple[str, str | bytes], ...] | None = None, + ): + self._responses[path].append(MockServiceCall(response=resp, metadata=metadata)) + + +class MockGrpcError(grpc.RpcError): + """Grpc error are elusive and this mock simulates the interface in mypy stubs""" + + def __init__(self, code, message): + self._code = code + self._message = message + + def code(self) -> grpc.StatusCode: + return self._code + + def details(self) -> str: + return self._message + + def result(self): + raise self + + +def test_make_broker_hosts() -> None: + hosts = make_broker_hosts(host_prefix="broker:50051", num_brokers=3) + assert len(hosts) == 3 + assert hosts == ["broker-0:50051", "broker-1:50051", "broker-2:50051"] + + hosts = make_broker_hosts( + host_prefix="", + num_brokers=None, + host_list="broker:50051, broker-a:50051 , , broker-b:50051", + ) + assert len(hosts) == 3 + assert hosts == ["broker:50051", "broker-a:50051", "broker-b:50051"] + + +@django_db_all +def test_init_no_hosts() -> None: + with pytest.raises(AssertionError) as err: + TaskworkerClient(hosts=[]) + assert "You must provide at least one RPC host" in str(err) + + +@django_db_all +def test_health_check_is_debounced() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="abc123", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", + SetTaskStatusResponse( + task=TaskActivation( + id="abc123", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + health_check_path = Path(f"/tmp/{''.join(random.choices(string.ascii_letters, k=16))}") + client = TaskworkerClient( + ["localhost-0:50051"], + health_check_settings=HealthCheckSettings(health_check_path, 1), + ) + client._health_check_settings.file_path = Mock() # type: ignore[union-attr] + + _ = client.get_task() + _ = client.get_task() + assert client._health_check_settings.file_path.touch.call_count == 1 # type: ignore[union-attr] + + with patch("sentry.taskworker.client.client.time") as mock_time: + mock_time.time.return_value = time.time() + 1 + _ = client.get_task() + assert client._health_check_settings.file_path.touch.call_count == 2 # type: ignore[union-attr] + + +@django_db_all +def test_get_task_ok() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="abc123", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + client = TaskworkerClient(["localhost-0:50051"]) + result = client.get_task() + + assert result + assert result.host == "localhost-0:50051" + assert result.activation.id + assert result.activation.namespace == "testing" + + +@django_db_all +def test_get_task_writes_to_health_check_file() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="abc123", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + + with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + health_check_path = Path(f"/tmp/{''.join(random.choices(string.ascii_letters, k=16))}") + client = TaskworkerClient( + ["localhost-0:50051"], + health_check_settings=HealthCheckSettings(health_check_path, 3), + ) + _ = client.get_task() + assert health_check_path.exists() + + +@django_db_all +def test_get_task_with_interceptor() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="abc123", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + metadata=( + ( + "sentry-signature", + "3202702605c1b65055c28e7c78a5835e760830cff3e9f995eb7ad5f837130b1f", + ), + ), + ) + secret = '["a long secret value","notused"]' + with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + client = TaskworkerClient(["localhost-0:50051"], rpc_secret=secret) + result = client.get_task() + + assert result + assert result.host == "localhost-0:50051" + assert result.activation.id + assert result.activation.namespace == "testing" + + +@django_db_all +def test_get_task_with_namespace() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="abc123", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + client = TaskworkerClient(hosts=make_broker_hosts("localhost:50051", num_brokers=1)) + result = client.get_task(namespace="testing") + + assert result + assert result.host == "localhost-0:50051" + assert result.activation.id + assert result.activation.namespace == "testing" + + +@django_db_all +def test_get_task_not_found() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.NOT_FOUND, "no pending task found"), + ) + with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + client = TaskworkerClient(["localhost:50051"]) + result = client.get_task() + + assert result is None + + +@django_db_all +def test_get_task_failure() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.INTERNAL, "something bad"), + ) + with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + client = TaskworkerClient(["localhost:50051"]) + with pytest.raises(grpc.RpcError): + client.get_task() + + +@django_db_all +def test_update_task_writes_to_health_check_file() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", + SetTaskStatusResponse( + task=TaskActivation( + id="abc123", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + health_check_path = Path(f"/tmp/{''.join(random.choices(string.ascii_letters, k=16))}") + client = TaskworkerClient( + make_broker_hosts("localhost:50051", num_brokers=1), + health_check_settings=HealthCheckSettings( + health_check_path, DEFAULT_WORKER_HEALTH_CHECK_SEC_PER_TOUCH + ), + ) + _ = client.update_task( + ProcessingResult("abc123", TASK_ACTIVATION_STATUS_RETRY, "localhost-0:50051", 0), + FetchNextTask(namespace=None), + ) + assert health_check_path.exists() + + +@django_db_all +def test_update_task_ok_with_next() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", + SetTaskStatusResponse( + task=TaskActivation( + id="abc123", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + client = TaskworkerClient(make_broker_hosts("localhost:50051", num_brokers=1)) + assert set(client._host_to_stubs.keys()) == {"localhost-0:50051"} + result = client.update_task( + ProcessingResult("abc123", TASK_ACTIVATION_STATUS_RETRY, "localhost-0:50051", 0), + FetchNextTask(namespace=None), + ) + + assert result + assert result.host == "localhost-0:50051" + assert result.activation.id == "abc123" + + +@django_db_all +def test_update_task_ok_with_next_namespace() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", + SetTaskStatusResponse( + task=TaskActivation( + id="abc123", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + client = TaskworkerClient(make_broker_hosts("localhost:50051", num_brokers=1)) + result = client.update_task( + ProcessingResult( + task_id="id", + status=TASK_ACTIVATION_STATUS_RETRY, + host="localhost-0:50051", + receive_timestamp=0, + ), + FetchNextTask(namespace="testing"), + ) + assert result + assert result.activation.id == "abc123" + assert result.activation.namespace == "testing" + + +@django_db_all +def test_update_task_ok_no_next() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", SetTaskStatusResponse() + ) + with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + client = TaskworkerClient(make_broker_hosts("localhost:50051", num_brokers=1)) + result = client.update_task( + ProcessingResult( + task_id="abc123", + status=TASK_ACTIVATION_STATUS_RETRY, + host="localhost-0:50051", + receive_timestamp=0, + ), + FetchNextTask(namespace=None), + ) + assert result is None + + +@django_db_all +def test_update_task_not_found() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", + MockGrpcError(grpc.StatusCode.NOT_FOUND, "no pending tasks found"), + ) + with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + client = TaskworkerClient(["localhost-0:50051"]) + result = client.update_task( + ProcessingResult( + task_id="abc123", + status=TASK_ACTIVATION_STATUS_RETRY, + host="localhost-0:50051", + receive_timestamp=0, + ), + FetchNextTask(namespace=None), + ) + assert result is None + + +@django_db_all +def test_update_task_unavailable_retain_task_to_host() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", + MockGrpcError(grpc.StatusCode.UNAVAILABLE, "broker down"), + ) + with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + client = TaskworkerClient(["localhost-0:50051"]) + with pytest.raises(MockGrpcError) as err: + client.update_task( + ProcessingResult( + task_id="abc123", + status=TASK_ACTIVATION_STATUS_RETRY, + host="localhost-0:50051", + receive_timestamp=0, + ), + FetchNextTask(namespace=None), + ) + assert "broker down" in str(err.value) + + +@django_db_all +def test_client_loadbalance() -> None: + channel_0 = MockChannel() + channel_0.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="0", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + channel_0.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", + SetTaskStatusResponse(task=None), + ) + channel_1 = MockChannel() + channel_1.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="1", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + channel_1.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", + SetTaskStatusResponse(task=None), + ) + channel_2 = MockChannel() + channel_2.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="2", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + channel_2.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", + SetTaskStatusResponse(task=None), + ) + channel_3 = MockChannel() + channel_3.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="3", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + channel_3.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", + SetTaskStatusResponse(task=None), + ) + with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + mock_channel.side_effect = [channel_0, channel_1, channel_2, channel_3] + with patch("sentry.taskworker.client.client.random.choice") as mock_randchoice: + mock_randchoice.side_effect = [ + "localhost-0:50051", + "localhost-1:50051", + "localhost-2:50051", + "localhost-3:50051", + ] + client = TaskworkerClient( + hosts=make_broker_hosts(host_prefix="localhost:50051", num_brokers=4), + max_tasks_before_rebalance=1, + ) + + task_0 = client.get_task() + assert task_0 is not None and task_0.activation.id == "0" + task_1 = client.get_task() + assert task_1 is not None and task_1.activation.id == "1" + task_2 = client.get_task() + assert task_2 is not None and task_2.activation.id == "2" + task_3 = client.get_task() + assert task_3 is not None and task_3.activation.id == "3" + + client.update_task( + ProcessingResult( + task_0.activation.id, TASK_ACTIVATION_STATUS_COMPLETE, task_0.host, 0 + ), + None, + ) + client.update_task( + ProcessingResult( + task_1.activation.id, TASK_ACTIVATION_STATUS_COMPLETE, task_1.host, 0 + ), + None, + ) + client.update_task( + ProcessingResult( + task_2.activation.id, TASK_ACTIVATION_STATUS_COMPLETE, task_2.host, 0 + ), + None, + ) + client.update_task( + ProcessingResult( + task_3.activation.id, TASK_ACTIVATION_STATUS_COMPLETE, task_3.host, 0 + ), + None, + ) + + +@django_db_all +def test_client_loadbalance_on_notfound() -> None: + channel_0 = MockChannel() + channel_0.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.NOT_FOUND, "no pending task found"), + ) + + channel_1 = MockChannel() + channel_1.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="1", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + channel_1.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", + MockGrpcError(grpc.StatusCode.NOT_FOUND, "no pending task found"), + ) + + channel_2 = MockChannel() + channel_2.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="2", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + + with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + mock_channel.side_effect = [channel_0, channel_1, channel_2] + with patch("sentry.taskworker.client.client.random.choice") as mock_randchoice: + mock_randchoice.side_effect = [ + "localhost-0:50051", + "localhost-1:50051", + "localhost-2:50051", + ] + client = TaskworkerClient( + hosts=make_broker_hosts(host_prefix="localhost:50051", num_brokers=3), + max_tasks_before_rebalance=30, + ) + + # Fetch from the first channel, it should return notfound + task_0 = client.get_task() + assert task_0 is None + + # Fetch again, this time from channel_1 + task_1 = client.get_task() + assert task_1 and task_1.activation.id == "1" + + res = client.update_task( + ProcessingResult( + task_1.activation.id, TASK_ACTIVATION_STATUS_COMPLETE, task_1.host, 0 + ), + None, + ) + assert res is None + + # Because SetStatus on channel_1 returned notfound the client + # should switch brokers. + task_2 = client.get_task() + assert task_2 and task_2.activation.id == "2" + + +@django_db_all +def test_client_loadbalance_on_unavailable() -> None: + channel_0 = MockChannel() + channel_0.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.UNAVAILABLE, "host is unavailable"), + ) + channel_0.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.UNAVAILABLE, "host is unavailable"), + ) + channel_0.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.UNAVAILABLE, "host is unavailable"), + ) + + channel_1 = MockChannel() + channel_1.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="1", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + + with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + mock_channel.side_effect = [channel_0, channel_1] + with patch("sentry.taskworker.client.client.random.choice") as mock_randchoice: + mock_randchoice.side_effect = [ + "localhost-0:50051", + "localhost-1:50051", + ] + client = TaskworkerClient( + hosts=make_broker_hosts(host_prefix="localhost:50051", num_brokers=2), + max_consecutive_unavailable_errors=3, + ) + + # Fetch from the first channel, host should be unavailable + with pytest.raises(grpc.RpcError, match="host is unavailable"): + client.get_task() + assert client._num_consecutive_unavailable_errors == 1 + + # Fetch from the first channel, host should be unavailable + with pytest.raises(grpc.RpcError, match="host is unavailable"): + client.get_task() + assert client._num_consecutive_unavailable_errors == 2 + + # Fetch from the first channel, host should be unavailable + with pytest.raises(grpc.RpcError, match="host is unavailable"): + client.get_task() + assert client._num_consecutive_unavailable_errors == 3 + + # Should rebalance to the second host and receive task + task = client.get_task() + assert task and task.activation.id == "1" + assert client._num_consecutive_unavailable_errors == 0 + + +@django_db_all +def test_client_single_host_unavailable() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.UNAVAILABLE, "host is unavailable"), + ) + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.UNAVAILABLE, "host is unavailable"), + ) + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.UNAVAILABLE, "host is unavailable"), + ) + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="1", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + + with (patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel,): + mock_channel.return_value = channel + client = TaskworkerClient( + hosts=["localhost-0:50051"], + max_consecutive_unavailable_errors=3, + temporary_unavailable_host_timeout=2, + ) + + for _ in range(3): + with pytest.raises(grpc.RpcError, match="host is unavailable"): + client.get_task() + assert client._num_consecutive_unavailable_errors == 3 + + # Verify host was marked as temporarily unavailable + assert "localhost-0:50051" in client._temporary_unavailable_hosts + assert isinstance(client._temporary_unavailable_hosts["localhost-0:50051"], float) + + client.get_task() + assert client._cur_host == "localhost-0:50051" + + +@django_db_all +def test_client_reset_errors_after_success() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.UNAVAILABLE, "host is unavailable"), + ) + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="1", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.UNAVAILABLE, "host is unavailable"), + ) + + with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + client = TaskworkerClient(["localhost:50051"], max_consecutive_unavailable_errors=3) + + with pytest.raises(grpc.RpcError, match="host is unavailable"): + client.get_task() + assert client._num_consecutive_unavailable_errors == 1 + + task = client.get_task() + assert task and task.activation.id == "1" + assert client._num_consecutive_unavailable_errors == 0 + + with pytest.raises(grpc.RpcError, match="host is unavailable"): + client.get_task() + assert client._num_consecutive_unavailable_errors == 1 + + +@django_db_all +def test_client_update_task_host_unavailable() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="1", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.UNAVAILABLE, "host is unavailable"), + ) + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.UNAVAILABLE, "host is unavailable"), + ) + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.UNAVAILABLE, "host is unavailable"), + ) + + current_time = 1000.0 + + def mock_time(): + return current_time + + with ( + patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel, + patch("sentry.taskworker.client.client.time.time", side_effect=mock_time), + ): + mock_channel.return_value = channel + client = TaskworkerClient( + ["localhost:50051"], + max_consecutive_unavailable_errors=3, + temporary_unavailable_host_timeout=10, + ) + + # Get a task to establish the host mapping + task = client.get_task() + assert task and task.activation.id == "1" + host = task.host + + # Make the host temporarily unavailable + for _ in range(3): + with pytest.raises(grpc.RpcError, match="host is unavailable"): + client.get_task() + assert client._num_consecutive_unavailable_errors == 3 + assert host in client._temporary_unavailable_hosts + + # Try to update the task + with pytest.raises( + HostTemporarilyUnavailable, match=f"Host: {host} is temporarily unavailable" + ): + client.update_task( + ProcessingResult( + task_id="1", + status=TASK_ACTIVATION_STATUS_COMPLETE, + host=host, + receive_timestamp=0, + ), + fetch_next_task=None, + ) diff --git a/clients/python/test/scheduler/__init__.py b/clients/python/test/scheduler/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/clients/python/test/scheduler/test_runner.py b/clients/python/test/scheduler/test_runner.py new file mode 100644 index 00000000..70c16cf9 --- /dev/null +++ b/clients/python/test/scheduler/test_runner.py @@ -0,0 +1,470 @@ +from datetime import UTC, datetime, timedelta +from unittest.mock import Mock, patch + +import pytest +from django.utils import timezone +from sentry.conf.types.taskworker import crontab +from sentry.silo.base import SiloMode +from sentry.taskworker.app import TaskworkerApp +from sentry.taskworker.scheduler.runner import RunStorage, ScheduleRunner +from sentry.testutils.helpers.datetime import freeze_time +from sentry.testutils.thread_leaks.pytest import thread_leak_allowlist +from sentry.utils.redis import redis_clusters + + +@pytest.fixture +def task_app() -> TaskworkerApp: + app = TaskworkerApp() + namespace = app.taskregistry.create_namespace("test") + + @namespace.register(name="valid") + def test_func() -> None: + pass + + @namespace.register(name="second") + def second_func() -> None: + pass + + return app + + +@pytest.fixture +def run_storage() -> RunStorage: + redis = redis_clusters.get("default") + redis.flushdb() + return RunStorage(redis) + + +def test_runstorage_zero_duration(run_storage: RunStorage) -> None: + with freeze_time("2025-07-19 14:25:00"): + now = timezone.now() + result = run_storage.set("test:do_stuff", now) + assert result is True + + +def test_runstorage_double_set(run_storage: RunStorage) -> None: + with freeze_time("2025-07-19 14:25:00"): + now = timezone.now() + first = run_storage.set("test:do_stuff", now) + second = run_storage.set("test:do_stuff", now) + + assert first is True, "initial set should return true" + assert second is False, "writing a key that exists should fail" + + +@pytest.mark.django_db +def test_schedulerunner_add_invalid(task_app) -> None: + run_storage = Mock(spec=RunStorage) + schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) + + with pytest.raises(ValueError) as err: + schedule_set.add( + "invalid", + { + "task": "invalid", + "schedule": timedelta(minutes=5), + }, + ) + assert "Invalid task name" in str(err) + + with pytest.raises(KeyError) as key_err: + schedule_set.add( + "invalid", + { + "task": "test:invalid", + "schedule": timedelta(minutes=5), + }, + ) + assert "No task registered" in str(key_err) + + with pytest.raises(ValueError) as err: + schedule_set.add( + "valid", + { + "task": "test:valid", + "schedule": timedelta(microseconds=99), + }, + ) + assert "microseconds" in str(err) + + +@pytest.mark.django_db +def test_schedulerunner_tick_no_tasks(task_app: TaskworkerApp, run_storage: RunStorage) -> None: + schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) + + with freeze_time("2025-01-24 14:25:00"): + sleep_time = schedule_set.tick() + assert sleep_time == 60 + + +@pytest.mark.django_db +def test_schedulerunner_tick_one_task_time_remaining( + task_app: TaskworkerApp, run_storage: RunStorage +) -> None: + schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) + + schedule_set.add( + "valid", + { + "task": "test:valid", + "schedule": timedelta(minutes=5), + }, + ) + # Last run was two minutes ago. + with freeze_time("2025-01-24 14:23:00"): + run_storage.set("test:valid", datetime(2025, 1, 24, 14, 28, 0, tzinfo=UTC)) + + namespace = task_app.taskregistry.get("test") + with freeze_time("2025-01-24 14:25:00"), patch.object(namespace, "send_task") as mock_send: + sleep_time = schedule_set.tick() + assert sleep_time == 180 + assert mock_send.call_count == 0 + + last_run = run_storage.read("test:valid") + assert last_run == datetime(2025, 1, 24, 14, 23, 0, tzinfo=UTC) + + +@pytest.mark.django_db +def test_schedulerunner_tick_one_task_spawned( + task_app: TaskworkerApp, run_storage: RunStorage +) -> None: + run_storage = Mock(spec=RunStorage) + schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) + schedule_set.add( + "valid", + { + "task": "test:valid", + "schedule": timedelta(minutes=5), + }, + ) + + # Last run was 5 minutes from the freeze_time below + run_storage.read_many.return_value = { + "test:valid": datetime(2025, 1, 24, 14, 19, 55), + } + run_storage.set.return_value = True + + namespace = task_app.taskregistry.get("test") + with freeze_time("2025-01-24 14:25:00"), patch.object(namespace, "send_task") as mock_send: + sleep_time = schedule_set.tick() + assert sleep_time == 300 + assert mock_send.call_count == 1 + + # scheduled tasks should not continue the scheduler trace + send_args = mock_send.call_args + assert send_args.args[0].headers["sentry-propagate-traces"] == "False" + assert "sentry-trace" not in send_args.args[0].headers + + assert run_storage.set.call_count == 1 + # set() is called with the correct next_run time + run_storage.set.assert_called_with("test:valid", datetime(2025, 1, 24, 14, 30, 0, tzinfo=UTC)) + + +@pytest.mark.django_db +@patch("sentry.taskworker.scheduler.runner.capture_checkin") +def test_schedulerunner_tick_create_checkin( + mock_capture_checkin: Mock, task_app: TaskworkerApp, run_storage: RunStorage +) -> None: + run_storage = Mock(spec=RunStorage) + schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) + schedule_set.add( + "important-task", + { + "task": "test:valid", + "schedule": timedelta(minutes=5), + }, + ) + + # Last run was 5 minutes from the freeze_time below + run_storage.read_many.return_value = { + "test:valid": datetime(2025, 1, 24, 14, 19, 55), + } + run_storage.set.return_value = True + mock_capture_checkin.return_value = "checkin-id" + + namespace = task_app.taskregistry.get("test") + with ( + freeze_time("2025-01-24 14:25:00"), + patch.object(namespace, "send_task") as mock_send, + ): + sleep_time = schedule_set.tick() + assert sleep_time == 300 + + assert mock_send.call_count == 1 + + # assert that the activation had the correct headers + send_args = mock_send.call_args + assert "sentry-monitor-check-in-id" in send_args.args[0].headers + assert send_args.args[0].headers["sentry-monitor-slug"] == "important-task" + assert send_args.args[0].headers["sentry-propagate-traces"] == "False" + assert "sentry-trace" not in send_args.args[0].headers + + # Ensure a checkin was created + assert mock_capture_checkin.call_count == 1 + mock_capture_checkin.assert_called_with( + monitor_slug="important-task", + monitor_config={ + "schedule": { + "type": "interval", + "unit": "minute", + "value": 5, + }, + "timezone": "UTC", + }, + status="in_progress", + ) + + +@pytest.mark.django_db +def test_schedulerunner_tick_key_exists_no_spawn( + task_app: TaskworkerApp, run_storage: RunStorage +) -> None: + schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) + schedule_set.add( + "valid", + { + "task": "test:valid", + "schedule": timedelta(minutes=5), + }, + ) + + namespace = task_app.taskregistry.get("test") + with patch.object(namespace, "send_task") as mock_send, freeze_time("2025-01-24 14:25:00"): + # Run tick() to initialize state in the scheduler. This will write a key to run_storage. + sleep_time = schedule_set.tick() + assert sleep_time == 300 + assert mock_send.call_count == 1 + + with freeze_time("2025-01-24 14:30:00"): + # Set a key into run_storage to simulate another scheduler running + run_storage.delete("test:valid") + assert run_storage.set("test:valid", timezone.now() + timedelta(minutes=2)) + + # Our scheduler would wakeup and tick again. + # The key exists in run_storage so we should not spawn a task. + # last_run time should synchronize with run_storage state, and count down from 14:30 + with freeze_time("2025-01-24 14:30:02"): + sleep_time = schedule_set.tick() + assert sleep_time == 298 + assert mock_send.call_count == 1 + + +@pytest.mark.django_db +@thread_leak_allowlist(reason="taskworker", issue=97034) +def test_schedulerunner_tick_one_task_multiple_ticks( + task_app: TaskworkerApp, run_storage: RunStorage +) -> None: + schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) + schedule_set.add( + "valid", + { + "task": "test:valid", + "schedule": timedelta(minutes=5), + }, + ) + + with freeze_time("2025-01-24 14:25:00"): + sleep_time = schedule_set.tick() + assert sleep_time == 300 + + with freeze_time("2025-01-24 14:26:00"): + sleep_time = schedule_set.tick() + assert sleep_time == 240 + + with freeze_time("2025-01-24 14:28:00"): + sleep_time = schedule_set.tick() + assert sleep_time == 120 + + +@pytest.mark.django_db +def test_schedulerunner_tick_one_task_multiple_ticks_crontab( + task_app: TaskworkerApp, run_storage: RunStorage +) -> None: + schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) + schedule_set.add( + "valid", + { + "task": "test:valid", + "schedule": crontab(minute="*/2"), + }, + ) + + namespace = task_app.taskregistry.get("test") + with patch.object(namespace, "send_task") as mock_send: + with freeze_time("2025-01-24 14:24:00"): + sleep_time = schedule_set.tick() + assert sleep_time == 120 + assert mock_send.call_count == 1 + + with freeze_time("2025-01-24 14:25:00"): + sleep_time = schedule_set.tick() + assert sleep_time == 60 + + # Remove key to simulate expiration + run_storage.delete("test:valid") + with freeze_time("2025-01-24 14:26:00"): + sleep_time = schedule_set.tick() + assert sleep_time == 120 + assert mock_send.call_count == 2 + + +@pytest.mark.django_db +def test_schedulerunner_tick_multiple_tasks( + task_app: TaskworkerApp, run_storage: RunStorage +) -> None: + schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) + schedule_set.add( + "valid", + { + "task": "test:valid", + "schedule": timedelta(minutes=5), + }, + ) + schedule_set.add( + "second", + { + "task": "test:second", + "schedule": timedelta(minutes=2), + }, + ) + + namespace = task_app.taskregistry.get("test") + with patch.object(namespace, "send_task") as mock_send: + with freeze_time("2025-01-24 14:25:00"): + sleep_time = schedule_set.tick() + assert sleep_time == 120 + + assert mock_send.call_count == 2 + + with freeze_time("2025-01-24 14:26:00"): + sleep_time = schedule_set.tick() + assert sleep_time == 60 + + assert mock_send.call_count == 2 + + # Remove the redis key, as the ttl in redis doesn't respect freeze_time() + run_storage.delete("test:second") + with freeze_time("2025-01-24 14:27:01"): + sleep_time = schedule_set.tick() + # two minutes left on the 5 min task + assert sleep_time == 120 + + assert mock_send.call_count == 3 + + +@pytest.mark.django_db +def test_schedulerunner_tick_fast_and_slow( + task_app: TaskworkerApp, run_storage: RunStorage +) -> None: + schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) + schedule_set.add( + "valid", + { + "task": "test:valid", + "schedule": timedelta(seconds=30), + }, + ) + schedule_set.add( + "second", + { + "task": "test:second", + "schedule": crontab(minute="*/2"), + }, + ) + + namespace = task_app.taskregistry.get("test") + with patch.object(namespace, "send_task") as mock_send: + with freeze_time("2025-01-24 14:25:00"): + sleep_time = schedule_set.tick() + assert sleep_time == 30 + + called = extract_sent_tasks(mock_send) + assert called == ["valid"] + + run_storage.delete("test:valid") + with freeze_time("2025-01-24 14:25:30"): + sleep_time = schedule_set.tick() + assert sleep_time == 30 + + called = extract_sent_tasks(mock_send) + assert called == ["valid", "valid"] + + run_storage.delete("test:valid") + with freeze_time("2025-01-24 14:26:00"): + sleep_time = schedule_set.tick() + assert sleep_time == 30 + + called = extract_sent_tasks(mock_send) + assert called == ["valid", "valid", "second", "valid"] + + run_storage.delete("test:valid") + with freeze_time("2025-01-24 14:26:30"): + sleep_time = schedule_set.tick() + assert sleep_time == 30 + + called = extract_sent_tasks(mock_send) + assert called == ["valid", "valid", "second", "valid", "valid"] + + run_storage.delete("test:valid") + with freeze_time("2025-01-24 14:27:00"): + sleep_time = schedule_set.tick() + assert sleep_time == 30 + + assert run_storage.read("test:valid") + called = extract_sent_tasks(mock_send) + assert called == [ + "valid", + "valid", + "second", + "valid", + "valid", + "valid", + ] + + +def extract_sent_tasks(mock: Mock) -> list[str]: + return [call[0][0].taskname for call in mock.call_args_list] + + +@pytest.mark.django_db +def test_schedulerunner_silo_limited_task_has_task_properties() -> None: + app = TaskworkerApp() + namespace = app.taskregistry.create_namespace("test") + + @namespace.register( + name="region_task", + at_most_once=True, + wait_for_delivery=True, + silo_mode=SiloMode.REGION, + ) + def region_task() -> None: + pass + + for attr in region_task.__dict__.keys(): + if attr.startswith("_") and not attr.startswith("__"): + continue + assert hasattr(region_task, attr) + + assert region_task.fullname == "test:region_task" + assert region_task.namespace.name == "test" + assert region_task.name == "region_task" + assert region_task.at_most_once is True + assert region_task.wait_for_delivery is True + + run_storage = Mock(spec=RunStorage) + schedule_set = ScheduleRunner(app=app, run_storage=run_storage) + schedule_set.add( + "region-task", + { + "task": "test:region_task", + "schedule": timedelta(minutes=5), + }, + ) + + schedule_set.log_startup() + + assert len(schedule_set._entries) == 1 + entry = schedule_set._entries[0] + assert entry.fullname == "test:region_task" + assert entry.namespace == "test" + assert entry.taskname == "region_task" diff --git a/clients/python/test/scheduler/test_schedules.py b/clients/python/test/scheduler/test_schedules.py new file mode 100644 index 00000000..1f597f20 --- /dev/null +++ b/clients/python/test/scheduler/test_schedules.py @@ -0,0 +1,200 @@ +from datetime import UTC, datetime, timedelta + +import pytest +from django.utils import timezone +from sentry.conf.types.taskworker import crontab +from sentry.taskworker.scheduler.schedules import CrontabSchedule, TimedeltaSchedule +from sentry.testutils.helpers.datetime import freeze_time + + +def test_timedeltaschedule_invalid() -> None: + with pytest.raises(ValueError): + TimedeltaSchedule(timedelta(microseconds=5)) + + with pytest.raises(ValueError): + TimedeltaSchedule(timedelta(seconds=-1)) + + +@freeze_time("2025-01-24 14:25:00") +def test_timedeltaschedule_is_due() -> None: + now = timezone.now() + schedule = TimedeltaSchedule(timedelta(minutes=5)) + + assert not schedule.is_due(now) + + four_min_ago = now - timedelta(minutes=4, seconds=59) + assert not schedule.is_due(four_min_ago) + + five_min_ago = now - timedelta(minutes=5) + assert schedule.is_due(five_min_ago) + + six_min_ago = now - timedelta(minutes=6) + assert schedule.is_due(six_min_ago) + + +def test_timedeltaschedule_monitor_interval() -> None: + schedule = TimedeltaSchedule(timedelta(seconds=10)) + assert schedule.monitor_interval() == (10, "second") + + schedule = TimedeltaSchedule(timedelta(minutes=5)) + assert schedule.monitor_interval() == (5, "minute") + + schedule = TimedeltaSchedule(timedelta(minutes=5, seconds=10)) + assert schedule.monitor_interval() == (5, "minute") + + schedule = TimedeltaSchedule(timedelta(hours=1)) + assert schedule.monitor_interval() == (1, "hour") + + +@freeze_time("2025-01-24 14:25:00") +def test_timedeltaschedule_remaining_seconds() -> None: + now = timezone.now() + delta = timedelta(minutes=5) + schedule = TimedeltaSchedule(delta) + + assert schedule.remaining_seconds(None) == 0 + assert schedule.remaining_seconds(now) == 300 + + four_min_ago = now - timedelta(minutes=4, seconds=59) + assert schedule.remaining_seconds(four_min_ago) == 1 + + five_min_ago = now - timedelta(minutes=5) + assert schedule.remaining_seconds(five_min_ago) == 0 + + ten_min_ago = now - timedelta(minutes=10) + assert schedule.remaining_seconds(ten_min_ago) == 0 + + +def test_crontabschedule_invalid() -> None: + with pytest.raises(ValueError): + CrontabSchedule("test", crontab(hour="99")) + + with pytest.raises(ValueError): + CrontabSchedule("test", crontab(hour="25")) + + with pytest.raises(ValueError): + CrontabSchedule("test", crontab(day_of_week="25")) + + +def test_crontabschedule_is_due() -> None: + schedule = CrontabSchedule("test", crontab(minute="*/5")) + + # no last_run and not time to spawn + with freeze_time("2025-01-24 14:23:00"): + now = timezone.now() + assert not schedule.is_due(None) + assert not schedule.is_due(now) + + with freeze_time("2025-01-24 14:25:00"): + now = timezone.now() + assert schedule.is_due(None) + assert not schedule.is_due(now) + + # last run was 14:20, current time is 14:22 = not due + with freeze_time("2025-01-24 14:22:00"): + two_twenty = timezone.now() - timedelta(minutes=2) + assert not schedule.is_due(two_twenty) + + # last run was 14:20, current time is 14:25 = due + with freeze_time("2025-01-24 14:25:00"): + two_twenty = timezone.now() - timedelta(minutes=5) + assert schedule.is_due(two_twenty) + + # last run was 14:15, current time is 14:25 = due as we missed an interval + with freeze_time("2025-01-24 14:25:00"): + two_fifteen = timezone.now() - timedelta(minutes=10) + assert schedule.is_due(two_fifteen) + + # last run was 14:26 (the future) current time is 14:25 = not due + with freeze_time("2025-01-24 14:25:00"): + future = timezone.now() + timedelta(minutes=1) + assert not schedule.is_due(future) + + +def test_crontabschedule_remaining_seconds() -> None: + schedule = CrontabSchedule("test", crontab(minute="*/5")) + + # no last_run, but due in one minute + with freeze_time("2025-01-24 14:24:00"): + assert not schedule.is_due(None) + assert schedule.remaining_seconds(None) == 60 + + # no last_run, but due now + with freeze_time("2025-01-24 14:25:00"): + assert schedule.is_due(None) + assert schedule.remaining_seconds(None) == 0 + + # last run was late (14:21), next spawn is at 14:25 + with freeze_time("2025-01-24 14:25:00"): + four_min_ago = timezone.now() - timedelta(minutes=4) + assert schedule.remaining_seconds(four_min_ago) == 0 + + # last run was 5 min ago, right on schedule + with freeze_time("2025-01-24 14:25:00"): + five_min_ago = timezone.now() - timedelta(minutes=5) + assert schedule.remaining_seconds(five_min_ago) == 0 + + # last run was mere seconds ago. 5 min remaining + with freeze_time("2025-01-24 14:25:10"): + five_min_ago = timezone.now() + assert schedule.remaining_seconds(five_min_ago) == 300 + + # Later in the minute. crontabs only have minute precision. + with freeze_time("2025-01-24 14:25:59"): + five_min_ago = timezone.now() + assert schedule.remaining_seconds(five_min_ago) == 300 + + # It isn't time yet, as we're mid interval + with freeze_time("2025-01-24 14:23:10"): + three_min_ago = timezone.now() - timedelta(minutes=3) + assert schedule.remaining_seconds(three_min_ago) == 120 + + # 14:19 was 1 min late, we missed a beat but we're currently on time. + with freeze_time("2025-01-24 14:25:10"): + six_min_ago = timezone.now() - timedelta(minutes=6) + assert schedule.remaining_seconds(six_min_ago) == 0 + + # We have missed a few intervals, try to get back on schedule for the next beat + with freeze_time("2025-01-24 14:23:00"): + twenty_two_min_ago = timezone.now() - timedelta(minutes=22) + assert schedule.remaining_seconds(twenty_two_min_ago) == 120 + + # We have encountered a value from the future. + # Our clock could be wrong, or we competing with another scheduler. + # Advance to the next tick 14:30. + with freeze_time("2025-01-24 14:24:00"): + future_two = timezone.now() + timedelta(minutes=2) + assert schedule.remaining_seconds(future_two) == 360 + + +@freeze_time("2025-01-24 14:25:00") +def test_crontabschedule_runtime_after() -> None: + schedule = CrontabSchedule("test", crontab(minute="*/15")) + + now = timezone.now() + assert schedule.runtime_after(now) == datetime(2025, 1, 24, 14, 30, 0, tzinfo=UTC) + + last_run = datetime(2025, 1, 24, 14, 29, 15, tzinfo=UTC) + assert schedule.runtime_after(last_run) == datetime(2025, 1, 24, 14, 30, 0, tzinfo=UTC) + + last_run = datetime(2025, 1, 24, 14, 38, 23, tzinfo=UTC) + assert schedule.runtime_after(last_run) == datetime(2025, 1, 24, 14, 45, 0, tzinfo=UTC) + + schedule = CrontabSchedule("test", crontab(minute="1", hour="*/6")) + last_run = datetime(2025, 1, 24, 14, 29, 15, tzinfo=UTC) + assert schedule.runtime_after(last_run) == datetime(2025, 1, 24, 18, 1, 0, tzinfo=UTC) + + schedule = CrontabSchedule("test", crontab(minute="*/1")) + now = timezone.now() + assert schedule.runtime_after(now) == datetime(2025, 1, 24, 14, 26, 0, tzinfo=UTC) + + +def test_crontabschedule_monitor_value() -> None: + schedule = CrontabSchedule("test", crontab(minute="*/5")) + assert schedule.monitor_value() == "*/5 * * * *" + + schedule = CrontabSchedule("test", crontab(minute="*/10", hour="*/2")) + assert schedule.monitor_value() == "*/10 */2 * * *" + + schedule = CrontabSchedule("test", crontab(minute="*/10", day_of_week="1")) + assert schedule.monitor_value() == "*/10 * * * 1" diff --git a/clients/python/test/test_app.py b/clients/python/test/test_app.py new file mode 100644 index 00000000..056f3b7b --- /dev/null +++ b/clients/python/test/test_app.py @@ -0,0 +1,37 @@ +import pytest +from django.core.cache import cache +from sentry.taskworker.app import TaskworkerApp +from sentry.taskworker.registry import TaskRegistry +from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation + + +@pytest.fixture +def clear_cache(): + cache.clear() + + +def test_taskregistry_param_and_property(): + registry = TaskRegistry() + app = TaskworkerApp(taskregistry=registry) + assert app.taskregistry == registry + + +def test_set_config(): + app = TaskworkerApp() + app.set_config({"rpc_secret": "testing", "ignored": "key"}) + assert app.config["rpc_secret"] == "testing" + assert "ignored" not in app.config + + +def test_should_attempt_at_most_once(clear_cache): + activation = TaskActivation( + id="111", + taskname="examples.simple_task", + namespace="examples", + parameters='{"args": [], "kwargs": {}}', + processing_deadline_duration=2, + ) + app = TaskworkerApp() + app.at_most_once_store(cache) + assert app.should_attempt_at_most_once(activation) + assert not app.should_attempt_at_most_once(activation) diff --git a/clients/python/test/test_registry.py b/clients/python/test/test_registry.py new file mode 100644 index 00000000..a1b63fab --- /dev/null +++ b/clients/python/test/test_registry.py @@ -0,0 +1,370 @@ +import base64 +from concurrent.futures import Future +from unittest.mock import Mock + +import orjson +import pytest +import zstandard as zstd +from django.test.utils import override_settings +from sentry.conf.types.kafka_definition import Topic +from sentry.taskworker.constants import MAX_PARAMETER_BYTES_BEFORE_COMPRESSION, CompressionType +from sentry.taskworker.registry import TaskNamespace, TaskRegistry +from sentry.taskworker.retry import LastAction, Retry +from sentry.taskworker.router import DefaultRouter +from sentry.taskworker.task import Task +from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( + ON_ATTEMPTS_EXCEEDED_DEADLETTER, + ON_ATTEMPTS_EXCEEDED_DISCARD, +) + + +def test_namespace_register_task() -> None: + namespace = TaskNamespace( + name="tests", + router=DefaultRouter(), + retry=None, + ) + + @namespace.register(name="tests.simple_task") + def simple_task() -> None: + raise NotImplementedError + + assert namespace.default_retry is None + assert namespace.contains("tests.simple_task") + assert not namespace.contains("nope") + + task = namespace.get("tests.simple_task") + assert task + assert task.name == "tests.simple_task" + + +def test_namespace_register_inherits_default_retry() -> None: + namespace = TaskNamespace( + name="tests", + router=DefaultRouter(), + retry=Retry(times=5, on=(RuntimeError,)), + ) + + @namespace.register(name="test.no_retry_param") + def no_retry_param() -> None: + raise NotImplementedError + + retry = Retry(times=2, times_exceeded=LastAction.Deadletter) + + @namespace.register(name="test.with_retry_param", retry=retry) + def with_retry_param() -> None: + raise NotImplementedError + + with_retry = namespace.get("test.with_retry_param") + assert with_retry.retry == retry + + @namespace.register(name="test.retry_none", retry=None) + def retry_none_param() -> None: + raise NotImplementedError + + with_retry = namespace.get("test.retry_none") + assert with_retry.retry == namespace.default_retry + + +def test_register_inherits_default_expires_processing_deadline() -> None: + namespace = TaskNamespace( + name="tests", + router=DefaultRouter(), + retry=None, + expires=10 * 60, + processing_deadline_duration=5, + ) + + @namespace.register(name="test.no_expires") + def no_expires() -> None: + raise NotImplementedError + + @namespace.register(name="test.with_expires", expires=30 * 60, processing_deadline_duration=10) + def with_expires() -> None: + raise NotImplementedError + + no_expires_task = namespace.get("test.no_expires") + activation = no_expires_task.create_activation([], {}) + assert activation.expires == 10 * 60 + assert activation.processing_deadline_duration == 5 + + with_expires_task = namespace.get("test.with_expires") + activation = with_expires_task.create_activation([], {}) + assert activation.expires == 30 * 60 + assert activation.processing_deadline_duration == 10 + + +def test_namespace_get_unknown() -> None: + namespace = TaskNamespace( + name="tests", + router=DefaultRouter(), + retry=None, + ) + + with pytest.raises(KeyError) as err: + namespace.get("nope") + assert "No task registered" in str(err) + + +@pytest.mark.django_db +def test_namespace_send_task_no_retry() -> None: + namespace = TaskNamespace( + name="tests", + router=DefaultRouter(), + retry=None, + ) + + @namespace.register(name="test.simpletask") + def simple_task() -> None: + raise NotImplementedError + + activation = simple_task.create_activation([], {}) + assert activation.retry_state.attempts == 0 + assert activation.retry_state.max_attempts == 1 + assert activation.retry_state.on_attempts_exceeded == ON_ATTEMPTS_EXCEEDED_DISCARD + + mock_producer = Mock() + namespace._producers[Topic.TASKWORKER] = mock_producer + + namespace.send_task(activation) + assert mock_producer.produce.call_count == 1 + + mock_call = mock_producer.produce.call_args + assert mock_call[0][0].name == "taskworker" + + proto_message = mock_call[0][1].value + assert proto_message == activation.SerializeToString() + + +@pytest.mark.django_db +def test_namespace_send_task_with_compression() -> None: + namespace = TaskNamespace( + name="tests", + router=DefaultRouter(), + retry=None, + ) + + @namespace.register(name="test.compression_task", compression_type=CompressionType.ZSTD) + def simple_task_with_compression(param: str) -> None: + raise NotImplementedError + + activation = simple_task_with_compression.create_activation( + args=["test_arg"], kwargs={"test_key": "test_value"} + ) + + assert activation.headers.get("compression-type") == CompressionType.ZSTD.value + + expected_params = {"args": ["test_arg"], "kwargs": {"test_key": "test_value"}} + + decoded_data = base64.b64decode(activation.parameters.encode("utf-8")) + decompressed_data = zstd.decompress(decoded_data) + actual_params = orjson.loads(decompressed_data) + + assert actual_params == expected_params + + +@pytest.mark.django_db +def test_namespace_send_task_with_auto_compression() -> None: + namespace = TaskNamespace( + name="tests", + router=DefaultRouter(), + retry=None, + ) + + @namespace.register(name="test.compression_task") + def simple_task_with_compression(param: str) -> None: + raise NotImplementedError + + big_args = ["x" * (MAX_PARAMETER_BYTES_BEFORE_COMPRESSION + 1)] + activation = simple_task_with_compression.create_activation( + args=big_args, kwargs={"test_key": "test_value"} + ) + + assert activation.headers.get("compression-type") == CompressionType.ZSTD.value + + expected_params = {"args": big_args, "kwargs": {"test_key": "test_value"}} + + decoded_data = base64.b64decode(activation.parameters.encode("utf-8")) + decompressed_data = zstd.decompress(decoded_data) + actual_params = orjson.loads(decompressed_data) + + assert actual_params == expected_params + + +@pytest.mark.django_db +def test_namespace_send_task_with_retry() -> None: + namespace = TaskNamespace( + name="tests", + router=DefaultRouter(), + retry=None, + ) + + @namespace.register( + name="test.simpletask", retry=Retry(times=3, times_exceeded=LastAction.Deadletter) + ) + def simple_task() -> None: + raise NotImplementedError + + activation = simple_task.create_activation([], {}) + assert activation.retry_state.attempts == 0 + assert activation.retry_state.max_attempts == 3 + assert activation.retry_state.on_attempts_exceeded == ON_ATTEMPTS_EXCEEDED_DEADLETTER + + mock_producer = Mock() + namespace._producers[Topic.TASKWORKER] = mock_producer + + namespace.send_task(activation) + assert mock_producer.produce.call_count == 1 + + mock_call = mock_producer.produce.call_args + proto_message = mock_call[0][1].value + assert proto_message == activation.SerializeToString() + + +@pytest.mark.django_db +def test_namespace_with_retry_send_task() -> None: + namespace = TaskNamespace( + name="tests", + router=DefaultRouter(), + retry=Retry(times=3), + ) + + @namespace.register(name="test.simpletask") + def simple_task() -> None: + raise NotImplementedError + + activation = simple_task.create_activation([], {}) + assert activation.retry_state.attempts == 0 + assert activation.retry_state.max_attempts == 3 + assert activation.retry_state.on_attempts_exceeded == ON_ATTEMPTS_EXCEEDED_DISCARD + + mock_producer = Mock() + namespace._producers[Topic.TASKWORKER] = mock_producer + + namespace.send_task(activation) + assert mock_producer.produce.call_count == 1 + + mock_call = mock_producer.produce.call_args + assert mock_call[0][0].name == "taskworker" + + proto_message = mock_call[0][1].value + assert proto_message == activation.SerializeToString() + + +@pytest.mark.django_db +def test_namespace_with_wait_for_delivery_send_task() -> None: + namespace = TaskNamespace( + name="tests", + router=DefaultRouter(), + retry=Retry(times=3), + ) + + @namespace.register(name="test.simpletask", wait_for_delivery=True) + def simple_task() -> None: + raise NotImplementedError + + activation = simple_task.create_activation([], {}) + + mock_producer = Mock() + namespace._producers[Topic.TASKWORKER] = mock_producer + + ret_value: Future[None] = Future() + ret_value.set_result(None) + mock_producer.produce.return_value = ret_value + namespace.send_task(activation, wait_for_delivery=True) + assert mock_producer.produce.call_count == 1 + + mock_call = mock_producer.produce.call_args + assert mock_call[0][0].name == "taskworker" + + proto_message = mock_call[0][1].value + assert proto_message == activation.SerializeToString() + + +@pytest.mark.django_db +def test_registry_get() -> None: + registry = TaskRegistry() + ns = registry.create_namespace(name="tests") + + assert isinstance(ns, TaskNamespace) + assert ns.name == "tests" + assert ns.router + assert ns == registry.get("tests") + + with pytest.raises(KeyError): + registry.get("derp") + + assert registry.contains("derp") is False + assert registry.contains("tests") + + +@pytest.mark.django_db +def test_registry_get_task() -> None: + registry = TaskRegistry() + ns = registry.create_namespace(name="tests") + + @ns.register(name="test.simpletask") + def simple_task() -> None: + raise NotImplementedError + + task = registry.get_task(ns.name, "test.simpletask") + assert isinstance(task, Task) + + with pytest.raises(KeyError): + registry.get_task("nope", "test.simpletask") + + with pytest.raises(KeyError): + registry.get_task(ns.name, "nope") + + +@pytest.mark.django_db +def test_registry_create_namespace_simple() -> None: + registry = TaskRegistry() + ns = registry.create_namespace(name="tests") + assert ns.default_retry is None + assert ns.default_expires is None + assert ns.default_processing_deadline_duration == 10 + assert ns.name == "tests" + assert ns.topic == Topic.TASKWORKER + assert ns.app_feature == "tests" + + retry = Retry(times=3) + ns = registry.create_namespace( + "test-two", + retry=retry, + expires=60 * 10, + processing_deadline_duration=60, + app_feature="anvils", + ) + assert ns.default_retry == retry + assert ns.default_processing_deadline_duration == 60 + assert ns.default_expires == 60 * 10 + assert ns.name == "test-two" + assert ns.topic == Topic.TASKWORKER + assert ns.app_feature == "anvils" + + +@pytest.mark.django_db +def test_registry_create_namespace_duplicate() -> None: + registry = TaskRegistry() + registry.create_namespace(name="tests") + with pytest.raises(ValueError, match="tests already exists"): + registry.create_namespace(name="tests") + + +@pytest.mark.django_db +def test_registry_create_namespace_route_setting() -> None: + with override_settings(TASKWORKER_ROUTES='{"profiling":"profiles", "lol":"nope"}'): + registry = TaskRegistry() + + # namespaces without routes resolve to the default topic. + tests = registry.create_namespace(name="tests") + assert tests.topic == Topic.TASKWORKER + + profiling = registry.create_namespace(name="profiling") + assert profiling.topic == Topic.PROFILES + + with pytest.raises(ValueError): + ns = registry.create_namespace(name="lol") + # Should raise as the name is routed to an invalid topic + ns.topic diff --git a/clients/python/test/test_retry.py b/clients/python/test/test_retry.py new file mode 100644 index 00000000..49258c24 --- /dev/null +++ b/clients/python/test/test_retry.py @@ -0,0 +1,129 @@ +from __future__ import annotations + +from multiprocessing.context import TimeoutError + +from sentry.taskworker.retry import LastAction, Retry, RetryTaskError +from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( + ON_ATTEMPTS_EXCEEDED_DEADLETTER, + ON_ATTEMPTS_EXCEEDED_DISCARD, +) + + +class RuntimeChildError(RuntimeError): + """Dummy exception for instanceof tests""" + + +def test_initial_state__defaults() -> None: + retry = Retry(times=2) + proto = retry.initial_state() + + assert proto.attempts == 0 + assert proto.max_attempts == 2 + assert proto.on_attempts_exceeded == ON_ATTEMPTS_EXCEEDED_DISCARD + + +def test_initial_state__discard() -> None: + retry = Retry(times=1, times_exceeded=LastAction.Discard) + proto = retry.initial_state() + + assert proto.attempts == 0 + assert proto.max_attempts == 1 + assert proto.on_attempts_exceeded == ON_ATTEMPTS_EXCEEDED_DISCARD + + +def test_initial_state__deadletter() -> None: + retry = Retry(times=5, times_exceeded=LastAction.Deadletter) + proto = retry.initial_state() + + assert proto.attempts == 0 + assert proto.max_attempts == 5 + assert proto.on_attempts_exceeded == ON_ATTEMPTS_EXCEEDED_DEADLETTER + + +def test_initial_state__delay_on_retry() -> None: + retry = Retry(times=5, delay=1) + proto = retry.initial_state() + + assert proto.attempts == 0 + assert proto.delay_on_retry == 1 + + +def test_should_retry_no_matching_error() -> None: + retry = Retry(times=5) + state = retry.initial_state() + + err = Exception("something bad") + assert not retry.should_retry(state, err) + + state.attempts = 5 + assert not retry.should_retry(state, err) + + +def test_should_retry_retryerror() -> None: + retry = Retry(times=5) + state = retry.initial_state() + + err = RetryTaskError("something bad") + assert retry.should_retry(state, err) + + state.attempts = 4 + assert not retry.should_retry(state, err) + + +def test_should_retry_multiprocessing_timeout() -> None: + retry = Retry(times=3) + state = retry.initial_state() + + timeout = TimeoutError("timeouts should retry if there are attempts left") + assert retry.should_retry(state, timeout) + + state.attempts = 1 + assert retry.should_retry(state, timeout) + + # attempt = 2 is actually the third attempt. + state.attempts = 2 + assert not retry.should_retry(state, timeout) + + state.attempts = 3 + assert not retry.should_retry(state, timeout) + + +def test_should_retry_error_allow_list() -> None: + retry = Retry(times=3, on=(RuntimeError, KeyError)) + state = retry.initial_state() + + err = RuntimeError("should retry") + assert retry.should_retry(state, err) + + key_err = KeyError("should retry") + assert retry.should_retry(state, key_err) + + err_child = RuntimeChildError("subclasses are retried") + assert retry.should_retry(state, err_child) + + value_err = ValueError("no retry") + assert not retry.should_retry(state, value_err) + + +def test_max_attempts_reached() -> None: + retry = Retry(times=5) + state = retry.initial_state() + + assert not retry.max_attempts_reached(state) + + state.attempts = 4 + assert retry.max_attempts_reached(state) + + +def test_should_retry_allow_list_ignore_parent() -> None: + retry = Retry(times=3, on=(Exception,), ignore=(RuntimeError,)) + state = retry.initial_state() + + runtime_err = RuntimeError("no retry for ignored") + assert not retry.should_retry(state, runtime_err) + + runtime_child = RuntimeChildError("no retry for subclasses of ignored") + assert not retry.should_retry(state, runtime_child) + + val_err = ValueError("other exceptions are retried") + assert retry.should_retry(state, val_err) diff --git a/clients/python/test/test_task.py b/clients/python/test/test_task.py new file mode 100644 index 00000000..ba68706a --- /dev/null +++ b/clients/python/test/test_task.py @@ -0,0 +1,357 @@ +import datetime +from typing import Any +from unittest.mock import patch + +import pytest +import sentry_sdk +from sentry.taskworker.registry import TaskNamespace +from sentry.taskworker.retry import LastAction, Retry, RetryTaskError +from sentry.taskworker.router import DefaultRouter +from sentry.taskworker.task import Task +from sentry.testutils.helpers.task_runner import TaskRunner +from sentry.utils import json +from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( + ON_ATTEMPTS_EXCEEDED_DEADLETTER, + ON_ATTEMPTS_EXCEEDED_DISCARD, +) + + +def do_things() -> None: + raise NotImplementedError + + +@pytest.fixture +def task_namespace() -> TaskNamespace: + return TaskNamespace(name="tests", router=DefaultRouter(), retry=None) + + +def test_define_task_defaults(task_namespace: TaskNamespace) -> None: + task = Task(name="test.do_things", func=do_things, namespace=task_namespace) + assert task.retry is None + assert task.name == "test.do_things" + assert task.namespace == task_namespace + + +def test_define_task_retry(task_namespace: TaskNamespace) -> None: + retry = Retry(times=3, times_exceeded=LastAction.Deadletter) + task = Task(name="test.do_things", func=do_things, namespace=task_namespace, retry=retry) + assert task.retry == retry + + +def test_define_task_at_most_once_with_retry(task_namespace: TaskNamespace) -> None: + with pytest.raises(AssertionError) as err: + Task( + name="test.do_things", + func=do_things, + namespace=task_namespace, + at_most_once=True, + retry=Retry(times=3), + ) + assert "You cannot enable at_most_once and have retries" in str(err) + + +def test_apply_async_expires(task_namespace: TaskNamespace) -> None: + def test_func(*args, **kwargs) -> None: + pass + + task = Task( + name="test.test_func", + func=test_func, + namespace=task_namespace, + ) + with patch.object(task_namespace, "send_task") as mock_send: + task.apply_async(args=["arg2"], kwargs={"org_id": 2}, expires=10, producer=None) + assert mock_send.call_count == 1 + call_params = mock_send.call_args + + activation = call_params.args[0] + assert activation.expires == 10 + assert activation.parameters == json.dumps({"args": ["arg2"], "kwargs": {"org_id": 2}}) + + +def test_apply_async_countdown(task_namespace: TaskNamespace) -> None: + def test_func(*args, **kwargs) -> None: + pass + + task = Task( + name="test.test_func", + func=test_func, + namespace=task_namespace, + ) + with patch.object(task_namespace, "send_task") as mock_send: + task.apply_async(args=["arg2"], kwargs={"org_id": 2}, countdown=600, producer=None) + assert mock_send.call_count == 1 + call_params = mock_send.call_args + + activation = call_params.args[0] + assert activation.delay == 600 + assert activation.parameters == json.dumps({"args": ["arg2"], "kwargs": {"org_id": 2}}) + + +def test_delay_taskrunner_immediate_mode(task_namespace: TaskNamespace) -> None: + calls = [] + + def test_func(*args, **kwargs) -> None: + calls.append({"args": args, "kwargs": kwargs}) + + task = Task( + name="test.test_func", + func=test_func, + namespace=task_namespace, + ) + # Within a TaskRunner context tasks should run immediately. + with TaskRunner(): + task.delay("arg", org_id=1) + task.apply_async(args=["arg2"], kwargs={"org_id": 2}) + task.apply_async() + + assert len(calls) == 3 + assert calls[0] == {"args": ("arg",), "kwargs": {"org_id": 1}} + assert calls[1] == {"args": ("arg2",), "kwargs": {"org_id": 2}} + assert calls[2] == {"args": tuple(), "kwargs": {}} + + +def test_delay_taskrunner_immediate_validate_activation(task_namespace: TaskNamespace) -> None: + calls = [] + + def test_func(mixed: Any) -> None: + calls.append({"mixed": mixed}) + + task = Task( + name="test.test_func", + func=test_func, + namespace=task_namespace, + ) + + with TaskRunner(): + task.delay(mixed=None) + task.delay(mixed="str") + + with pytest.raises(TypeError) as err: + task.delay(mixed=datetime.timedelta(days=1)) + assert "not JSON serializable" in str(err) + + assert len(calls) == 2 + assert calls[0] == {"mixed": None} + assert calls[1] == {"mixed": "str"} + + +def test_should_retry(task_namespace: TaskNamespace) -> None: + retry = Retry(times=3, times_exceeded=LastAction.Deadletter) + state = retry.initial_state() + + task = Task( + name="test.do_things", + func=do_things, + namespace=task_namespace, + retry=retry, + ) + err = RetryTaskError("try again plz") + assert task.should_retry(state, err) + + state.attempts = 3 + assert not task.should_retry(state, err) + + no_retry = Task( + name="test.no_retry", + func=do_things, + namespace=task_namespace, + retry=None, + ) + assert not no_retry.should_retry(state, err) + + +def test_create_activation(task_namespace: TaskNamespace) -> None: + no_retry_task = Task( + name="test.no_retry", + func=do_things, + namespace=task_namespace, + retry=None, + ) + + retry = Retry(times=3, times_exceeded=LastAction.Deadletter) + retry_task = Task( + name="test.with_retry", + func=do_things, + namespace=task_namespace, + retry=retry, + ) + + timedelta_expiry_task = Task( + name="test.with_timedelta_expires", + func=do_things, + namespace=task_namespace, + expires=datetime.timedelta(minutes=5), + processing_deadline_duration=datetime.timedelta(seconds=30), + ) + int_expiry_task = Task( + name="test.with_int_expires", + func=do_things, + namespace=task_namespace, + expires=5 * 60, + processing_deadline_duration=30, + ) + + at_most_once_task = Task( + name="test.at_most_once", + func=do_things, + namespace=task_namespace, + at_most_once=True, + ) + # No retries will be made as there is no retry policy on the task or namespace. + activation = no_retry_task.create_activation([], {}) + assert activation.taskname == "test.no_retry" + assert activation.namespace == task_namespace.name + assert activation.retry_state + assert activation.retry_state.attempts == 0 + assert activation.retry_state.max_attempts == 1 + assert activation.retry_state.on_attempts_exceeded == ON_ATTEMPTS_EXCEEDED_DISCARD + + activation = retry_task.create_activation([], {}) + assert activation.taskname == "test.with_retry" + assert activation.namespace == task_namespace.name + assert activation.retry_state + assert activation.retry_state.attempts == 0 + assert activation.retry_state.max_attempts == 3 + assert activation.retry_state.on_attempts_exceeded == ON_ATTEMPTS_EXCEEDED_DEADLETTER + + activation = timedelta_expiry_task.create_activation([], {}) + assert activation.taskname == "test.with_timedelta_expires" + assert activation.expires == 300 + assert activation.processing_deadline_duration == 30 + + activation = int_expiry_task.create_activation([], {}) + assert activation.taskname == "test.with_int_expires" + assert activation.expires == 300 + assert activation.processing_deadline_duration == 30 + + activation = int_expiry_task.create_activation([], {}, expires=600) + assert activation.taskname == "test.with_int_expires" + assert activation.expires == 600 + assert activation.processing_deadline_duration == 30 + + activation = at_most_once_task.create_activation([], {}) + assert activation.taskname == "test.at_most_once" + assert activation.namespace == task_namespace.name + assert activation.retry_state + assert activation.retry_state.at_most_once is True + assert activation.retry_state.attempts == 0 + assert activation.retry_state.max_attempts == 1 + assert activation.retry_state.on_attempts_exceeded == ON_ATTEMPTS_EXCEEDED_DISCARD + + +def test_create_activation_parameters(task_namespace: TaskNamespace) -> None: + @task_namespace.register(name="test.parameters") + def with_parameters(one: str, two: int, org_id: int) -> None: + raise NotImplementedError + + activation = with_parameters.create_activation(["one", 22], {"org_id": 99}) + params = json.loads(activation.parameters) + assert params["args"] + assert params["args"] == ["one", 22] + assert params["kwargs"] == {"org_id": 99} + + +def test_create_activation_tracing(task_namespace: TaskNamespace) -> None: + @task_namespace.register(name="test.parameters") + def with_parameters(one: str, two: int, org_id: int) -> None: + raise NotImplementedError + + with sentry_sdk.start_transaction(op="test.task"): + activation = with_parameters.create_activation(["one", 22], {"org_id": 99}) + + headers = activation.headers + assert headers["sentry-trace"] + assert "baggage" in headers + + +def test_create_activation_tracing_headers(task_namespace: TaskNamespace) -> None: + @task_namespace.register(name="test.parameters") + def with_parameters(one: str, two: int, org_id: int) -> None: + raise NotImplementedError + + with sentry_sdk.start_transaction(op="test.task"): + activation = with_parameters.create_activation( + ["one", 22], {"org_id": 99}, {"key": "value"} + ) + + headers = activation.headers + assert headers["sentry-trace"] + assert "baggage" in headers + assert headers["key"] == "value" + + +def test_create_activation_tracing_disable(task_namespace: TaskNamespace) -> None: + @task_namespace.register(name="test.parameters") + def with_parameters(one: str, two: int, org_id: int) -> None: + raise NotImplementedError + + with sentry_sdk.start_transaction(op="test.task"): + activation = with_parameters.create_activation( + ["one", 22], {"org_id": 99}, {"sentry-propagate-traces": False} + ) + + headers = activation.headers + assert "sentry-trace" not in headers + assert "baggage" not in headers + + +def test_create_activation_headers_scalars(task_namespace: TaskNamespace) -> None: + @task_namespace.register(name="test.parameters") + def with_parameters(one: str, two: int, org_id: int) -> None: + raise NotImplementedError + + headers = { + "str": "value", + "int": 22, + "float": 3.14, + "bool": False, + "none": None, + } + activation = with_parameters.create_activation(["one", 22], {"org_id": 99}, headers) + assert activation.headers["str"] == "value" + assert activation.headers["int"] == "22" + assert activation.headers["float"] == "3.14" + assert activation.headers["bool"] == "False" + assert activation.headers["none"] == "None" + + +def test_create_activation_headers_nested(task_namespace: TaskNamespace) -> None: + @task_namespace.register(name="test.parameters") + def with_parameters(one: str, two: int, org_id: int) -> None: + raise NotImplementedError + + headers = { + "key": "value", + "nested": { + "name": "sentry", + }, + } + with pytest.raises(ValueError) as err: + with_parameters.create_activation(["one", 22], {"org_id": 99}, headers) + assert "Only scalar header values are supported" in str(err) + assert "The `nested` header value is of type " in str(err) + + +def test_create_activation_headers_monitor_config_treatment(task_namespace: TaskNamespace) -> None: + @task_namespace.register(name="test.parameters") + def with_parameters(one: str, two: int, org_id: int) -> None: + raise NotImplementedError + + headers = { + "key": "value", + "sentry-monitor-config": { + "schedule": {"type": "crontab", "value": "*/15 * * * *"}, + "timezone": "UTC", + }, + "sentry-monitor-slug": "delete-stuff", + "sentry-monitor-check-in-id": "abc123", + } + activation = with_parameters.create_activation(["one", 22], {"org_id": 99}, headers) + + result = activation.headers + assert result + assert result["key"] == "value" + assert "sentry-monitor-config" not in result + assert "sentry-monitor-slug" in result + assert "sentry-monitor-check-in-id" in result diff --git a/clients/python/test/worker/__init__.py b/clients/python/test/worker/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/clients/python/test/worker/test_worker.py b/clients/python/test/worker/test_worker.py new file mode 100644 index 00000000..75128c73 --- /dev/null +++ b/clients/python/test/worker/test_worker.py @@ -0,0 +1,722 @@ +import base64 +import queue +import time +from multiprocessing import Event +from unittest import mock + +import grpc +import orjson +import pytest +import zstandard as zstd +from sentry.taskworker.client.inflight_task_activation import InflightTaskActivation +from sentry.taskworker.client.processing_result import ProcessingResult +from sentry.taskworker.constants import CompressionType +from sentry.taskworker.retry import NoRetriesRemainingError +from sentry.taskworker.state import current_task +from sentry.taskworker.worker import TaskWorker +from sentry.taskworker.workerchild import ProcessingDeadlineExceeded, child_process +from sentry.testutils.cases import TestCase +from sentry.testutils.helpers.options import override_options +from sentry.testutils.thread_leaks.pytest import thread_leak_allowlist +from sentry.utils.redis import redis_clusters +from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( + ON_ATTEMPTS_EXCEEDED_DISCARD, + TASK_ACTIVATION_STATUS_COMPLETE, + TASK_ACTIVATION_STATUS_FAILURE, + TASK_ACTIVATION_STATUS_RETRY, + RetryState, + TaskActivation, +) +from sentry_sdk.crons import MonitorStatus + +SIMPLE_TASK = InflightTaskActivation( + host="localhost:50051", + receive_timestamp=0, + activation=TaskActivation( + id="111", + taskname="examples.simple_task", + namespace="examples", + parameters='{"args": [], "kwargs": {}}', + processing_deadline_duration=2, + ), +) + +RETRY_TASK = InflightTaskActivation( + host="localhost:50051", + receive_timestamp=0, + activation=TaskActivation( + id="222", + taskname="examples.retry_task", + namespace="examples", + parameters='{"args": [], "kwargs": {}}', + processing_deadline_duration=2, + ), +) + +FAIL_TASK = InflightTaskActivation( + host="localhost:50051", + receive_timestamp=0, + activation=TaskActivation( + id="333", + taskname="examples.fail_task", + namespace="examples", + parameters='{"args": [], "kwargs": {}}', + processing_deadline_duration=2, + ), +) + +UNDEFINED_TASK = InflightTaskActivation( + host="localhost:50051", + receive_timestamp=0, + activation=TaskActivation( + id="444", + taskname="total.rubbish", + namespace="lolnope", + parameters='{"args": [], "kwargs": {}}', + processing_deadline_duration=2, + ), +) + +AT_MOST_ONCE_TASK = InflightTaskActivation( + host="localhost:50051", + receive_timestamp=0, + activation=TaskActivation( + id="555", + taskname="examples.at_most_once", + namespace="examples", + parameters='{"args": [], "kwargs": {}}', + processing_deadline_duration=2, + ), +) + +RETRY_STATE_TASK = InflightTaskActivation( + host="localhost:50051", + receive_timestamp=0, + activation=TaskActivation( + id="654", + taskname="examples.retry_state", + namespace="examples", + parameters='{"args": [], "kwargs": {}}', + processing_deadline_duration=2, + retry_state=RetryState( + # no more attempts left + attempts=1, + max_attempts=2, + on_attempts_exceeded=ON_ATTEMPTS_EXCEEDED_DISCARD, + ), + ), +) + +SCHEDULED_TASK = InflightTaskActivation( + host="localhost:50051", + receive_timestamp=0, + activation=TaskActivation( + id="111", + taskname="examples.simple_task", + namespace="examples", + parameters='{"args": [], "kwargs": {}}', + processing_deadline_duration=2, + headers={ + "sentry-monitor-slug": "simple-task", + "sentry-monitor-check-in-id": "abc123", + }, + ), +) + +COMPRESSED_TASK = InflightTaskActivation( + host="localhost:50051", + receive_timestamp=0, + activation=TaskActivation( + id="compressed_task_123", + taskname="examples.simple_task", + namespace="examples", + parameters=base64.b64encode( + zstd.compress( + orjson.dumps( + { + "args": ["test_arg1", "test_arg2"], + "kwargs": {"test_key": "test_value", "number": 42}, + } + ) + ) + ).decode("utf8"), + headers={ + "compression-type": CompressionType.ZSTD.value, + }, + processing_deadline_duration=2, + ), +) + + +@pytest.mark.django_db +@thread_leak_allowlist(reason="taskworker", issue=97034) +class TestTaskWorker(TestCase): + def test_tasks_exist(self) -> None: + import sentry.taskworker.tasks.examples as example_tasks + + assert example_tasks.simple_task + assert example_tasks.retry_task + assert example_tasks.at_most_once_task + + def test_fetch_task(self) -> None: + taskworker = TaskWorker( + app_module="sentry.taskworker.runtime:app", + broker_hosts=["127.0.0.1:50051"], + max_child_task_count=100, + process_type="fork", + ) + with mock.patch.object(taskworker.client, "get_task") as mock_get: + mock_get.return_value = SIMPLE_TASK + + task = taskworker.fetch_task() + mock_get.assert_called_once() + + assert task + assert task.activation.id == SIMPLE_TASK.activation.id + + def test_fetch_no_task(self) -> None: + taskworker = TaskWorker( + app_module="sentry.taskworker.runtime:app", + broker_hosts=["127.0.0.1:50051"], + max_child_task_count=100, + process_type="fork", + ) + with mock.patch.object(taskworker.client, "get_task") as mock_get: + mock_get.return_value = None + task = taskworker.fetch_task() + + mock_get.assert_called_once() + assert task is None + + def test_run_once_no_next_task(self) -> None: + max_runtime = 5 + taskworker = TaskWorker( + app_module="sentry.taskworker.runtime:app", + broker_hosts=["127.0.0.1:50051"], + max_child_task_count=1, + process_type="fork", + ) + with mock.patch.object(taskworker, "client") as mock_client: + mock_client.get_task.return_value = SIMPLE_TASK + # No next_task returned + mock_client.update_task.return_value = None + + taskworker.start_result_thread() + taskworker.start_spawn_children_thread() + start = time.time() + while True: + taskworker.run_once() + if mock_client.update_task.called: + break + if time.time() - start > max_runtime: + taskworker.shutdown() + raise AssertionError("Timeout waiting for update_task to be called") + + taskworker.shutdown() + assert mock_client.get_task.called + assert mock_client.update_task.call_count == 1 + assert mock_client.update_task.call_args.args[0].host == "localhost:50051" + assert mock_client.update_task.call_args.args[0].task_id == SIMPLE_TASK.activation.id + assert ( + mock_client.update_task.call_args.args[0].status == TASK_ACTIVATION_STATUS_COMPLETE + ) + assert mock_client.update_task.call_args.args[1] is None + + def test_run_once_with_next_task(self) -> None: + # Cover the scenario where update_task returns the next task which should + # be processed. + max_runtime = 5 + taskworker = TaskWorker( + app_module="sentry.taskworker.runtime:app", + broker_hosts=["127.0.0.1:50051"], + max_child_task_count=1, + process_type="fork", + ) + with mock.patch.object(taskworker, "client") as mock_client: + + def update_task_response(*args, **kwargs): + if mock_client.update_task.call_count >= 1: + return None + return SIMPLE_TASK + + mock_client.update_task.side_effect = update_task_response + mock_client.get_task.return_value = SIMPLE_TASK + taskworker.start_result_thread() + taskworker.start_spawn_children_thread() + + # Run until two tasks have been processed + start = time.time() + while True: + taskworker.run_once() + if mock_client.update_task.call_count >= 2: + break + if time.time() - start > max_runtime: + taskworker.shutdown() + raise AssertionError("Timeout waiting for get_task to be called") + + taskworker.shutdown() + assert mock_client.get_task.called + assert mock_client.update_task.call_count == 2 + assert mock_client.update_task.call_args.args[0].host == "localhost:50051" + assert mock_client.update_task.call_args.args[0].task_id == SIMPLE_TASK.activation.id + assert ( + mock_client.update_task.call_args.args[0].status == TASK_ACTIVATION_STATUS_COMPLETE + ) + assert mock_client.update_task.call_args.args[1] is None + + @override_options({"taskworker.fetch_next.disabled_pools": ["testing"]}) + def test_run_once_with_fetch_next_disabled(self) -> None: + # Cover the scenario where taskworker.fetch_next.disabled_pools is defined + max_runtime = 5 + taskworker = TaskWorker( + app_module="sentry.taskworker.runtime:app", + broker_hosts=["127.0.0.1:50051"], + max_child_task_count=1, + process_type="fork", + processing_pool_name="testing", + ) + with mock.patch.object(taskworker, "client") as mock_client: + mock_client.update_task.return_value = None + mock_client.get_task.return_value = SIMPLE_TASK + taskworker.start_result_thread() + taskworker.start_spawn_children_thread() + + # Run until two tasks have been processed + start = time.time() + while True: + taskworker.run_once() + if mock_client.update_task.call_count >= 2: + break + if time.time() - start > max_runtime: + taskworker.shutdown() + raise AssertionError("Timeout waiting for update_task to be called") + + taskworker.shutdown() + assert mock_client.get_task.called + assert mock_client.update_task.call_count == 2 + assert mock_client.update_task.call_args.args[0].host == "localhost:50051" + assert mock_client.update_task.call_args.args[0].task_id == SIMPLE_TASK.activation.id + assert ( + mock_client.update_task.call_args.args[0].status == TASK_ACTIVATION_STATUS_COMPLETE + ) + assert mock_client.update_task.call_args.args[1] is None + + def test_run_once_with_update_failure(self) -> None: + # Cover the scenario where update_task fails a few times in a row + # We should retain the result until RPC succeeds. + max_runtime = 5 + taskworker = TaskWorker( + app_module="sentry.taskworker.runtime:app", + broker_hosts=["127.0.0.1:50051"], + max_child_task_count=1, + process_type="fork", + ) + with mock.patch.object(taskworker, "client") as mock_client: + + def update_task_response(*args, **kwargs): + if mock_client.update_task.call_count <= 2: + # Use setattr() because internally grpc uses _InactiveRpcError + # but it isn't exported. + err = grpc.RpcError("update task failed") + setattr(err, "code", lambda: grpc.StatusCode.UNAVAILABLE) + raise err + return None + + def get_task_response(*args, **kwargs): + # Only one task that fails to update + if mock_client.get_task.call_count == 1: + return SIMPLE_TASK + return None + + mock_client.update_task.side_effect = update_task_response + mock_client.get_task.side_effect = get_task_response + taskworker.start_result_thread() + taskworker.start_spawn_children_thread() + + # Run until the update has 'completed' + start = time.time() + while True: + taskworker.run_once() + if mock_client.update_task.call_count >= 3: + break + if time.time() - start > max_runtime: + taskworker.shutdown() + raise AssertionError("Timeout waiting for get_task to be called") + + taskworker.shutdown() + assert mock_client.get_task.called + assert mock_client.update_task.call_count == 3 + + def test_run_once_current_task_state(self) -> None: + # Run a task that uses retry_task() helper + # to raise and catch a NoRetriesRemainingError + max_runtime = 5 + taskworker = TaskWorker( + app_module="sentry.taskworker.runtime:app", + broker_hosts=["127.0.0.1:50051"], + max_child_task_count=1, + process_type="fork", + ) + with mock.patch.object(taskworker, "client") as mock_client: + + def update_task_response(*args, **kwargs): + return None + + mock_client.update_task.side_effect = update_task_response + mock_client.get_task.return_value = RETRY_STATE_TASK + taskworker.start_result_thread() + taskworker.start_spawn_children_thread() + + # Run until two tasks have been processed + start = time.time() + while True: + taskworker.run_once() + if mock_client.update_task.call_count >= 1: + break + if time.time() - start > max_runtime: + taskworker.shutdown() + raise AssertionError("Timeout waiting for update_task to be called") + + taskworker.shutdown() + assert mock_client.get_task.called + assert mock_client.update_task.call_count == 1 + # status is complete, as retry_state task handles the NoRetriesRemainingError + assert mock_client.update_task.call_args.args[0].host == "localhost:50051" + assert ( + mock_client.update_task.call_args.args[0].task_id == RETRY_STATE_TASK.activation.id + ) + assert ( + mock_client.update_task.call_args.args[0].status == TASK_ACTIVATION_STATUS_COMPLETE + ) + + redis = redis_clusters.get("default") + assert current_task() is None, "should clear current task on completion" + assert redis.get("no-retries-remaining"), "key should exist if except block was hit" + redis.delete("no-retries-remaining") + + +@pytest.mark.django_db +@mock.patch("sentry.taskworker.workerchild.capture_checkin") +def test_child_process_complete(mock_capture_checkin: mock.MagicMock) -> None: + todo: queue.Queue[InflightTaskActivation] = queue.Queue() + processed: queue.Queue[ProcessingResult] = queue.Queue() + shutdown = Event() + + todo.put(SIMPLE_TASK) + child_process( + "sentry.taskworker.runtime:app", + todo, + processed, + shutdown, + max_task_count=1, + processing_pool_name="test", + process_type="fork", + ) + + assert todo.empty() + result = processed.get() + assert result.task_id == SIMPLE_TASK.activation.id + assert result.status == TASK_ACTIVATION_STATUS_COMPLETE + assert mock_capture_checkin.call_count == 0 + + +@pytest.mark.django_db +def test_child_process_remove_start_time_kwargs() -> None: + activation = InflightTaskActivation( + host="localhost:50051", + receive_timestamp=0, + activation=TaskActivation( + id="6789", + taskname="examples.will_retry", + namespace="examples", + parameters='{"args": ["stuff"], "kwargs": {"__start_time": 123}}', + processing_deadline_duration=100000, + ), + ) + todo: queue.Queue[InflightTaskActivation] = queue.Queue() + processed: queue.Queue[ProcessingResult] = queue.Queue() + shutdown = Event() + + todo.put(activation) + child_process( + "sentry.taskworker.runtime:app", + todo, + processed, + shutdown, + max_task_count=1, + processing_pool_name="test", + process_type="fork", + ) + + assert todo.empty() + result = processed.get() + assert result.task_id == activation.activation.id + assert result.status == TASK_ACTIVATION_STATUS_COMPLETE + + +@pytest.mark.django_db +def test_child_process_retry_task() -> None: + todo: queue.Queue[InflightTaskActivation] = queue.Queue() + processed: queue.Queue[ProcessingResult] = queue.Queue() + shutdown = Event() + + todo.put(RETRY_TASK) + child_process( + "sentry.taskworker.runtime:app", + todo, + processed, + shutdown, + max_task_count=1, + processing_pool_name="test", + process_type="fork", + ) + + assert todo.empty() + result = processed.get() + assert result.task_id == RETRY_TASK.activation.id + assert result.status == TASK_ACTIVATION_STATUS_RETRY + + +@mock.patch("sentry.taskworker.workerchild.sentry_sdk.capture_exception") +@pytest.mark.django_db +def test_child_process_retry_task_max_attempts(mock_capture: mock.Mock) -> None: + # Create an activation that is on its final attempt and + # will raise an error again. + activation = InflightTaskActivation( + host="localhost:50051", + receive_timestamp=0, + activation=TaskActivation( + id="6789", + taskname="examples.will_retry", + namespace="examples", + parameters='{"args": ["raise"], "kwargs": {}}', + processing_deadline_duration=100000, + retry_state=RetryState( + attempts=2, + max_attempts=3, + ), + ), + ) + todo: queue.Queue[InflightTaskActivation] = queue.Queue() + processed: queue.Queue[ProcessingResult] = queue.Queue() + shutdown = Event() + + todo.put(activation) + child_process( + "sentry.taskworker.runtime:app", + todo, + processed, + shutdown, + max_task_count=1, + processing_pool_name="test", + process_type="fork", + ) + + assert todo.empty() + result = processed.get() + assert result.task_id == activation.activation.id + assert result.status == TASK_ACTIVATION_STATUS_FAILURE + + assert mock_capture.call_count == 1 + capture_call = mock_capture.call_args[0] + # Error type and chained error should be captured. + assert isinstance(capture_call[0], NoRetriesRemainingError) + assert isinstance(capture_call[0].__cause__, RuntimeError) + + +@pytest.mark.django_db +def test_child_process_failure_task() -> None: + todo: queue.Queue[InflightTaskActivation] = queue.Queue() + processed: queue.Queue[ProcessingResult] = queue.Queue() + shutdown = Event() + + todo.put(FAIL_TASK) + child_process( + "sentry.taskworker.runtime:app", + todo, + processed, + shutdown, + max_task_count=1, + processing_pool_name="test", + process_type="fork", + ) + + assert todo.empty() + result = processed.get() + assert result.task_id == FAIL_TASK.activation.id + assert result.status == TASK_ACTIVATION_STATUS_FAILURE + + +@pytest.mark.django_db +def test_child_process_shutdown() -> None: + todo: queue.Queue[InflightTaskActivation] = queue.Queue() + processed: queue.Queue[ProcessingResult] = queue.Queue() + shutdown = Event() + shutdown.set() + + todo.put(SIMPLE_TASK) + child_process( + "sentry.taskworker.runtime:app", + todo, + processed, + shutdown, + max_task_count=1, + processing_pool_name="test", + process_type="fork", + ) + + # When shutdown has been set, the child should not process more tasks. + assert todo.qsize() == 1 + assert processed.qsize() == 0 + + +@pytest.mark.django_db +def test_child_process_unknown_task() -> None: + todo: queue.Queue[InflightTaskActivation] = queue.Queue() + processed: queue.Queue[ProcessingResult] = queue.Queue() + shutdown = Event() + + todo.put(UNDEFINED_TASK) + todo.put(SIMPLE_TASK) + child_process( + "sentry.taskworker.runtime:app", + todo, + processed, + shutdown, + max_task_count=1, + processing_pool_name="test", + process_type="fork", + ) + + result = processed.get() + assert result.task_id == UNDEFINED_TASK.activation.id + assert result.status == TASK_ACTIVATION_STATUS_FAILURE + + result = processed.get() + assert result.task_id == SIMPLE_TASK.activation.id + assert result.status == TASK_ACTIVATION_STATUS_COMPLETE + + +@pytest.mark.django_db +def test_child_process_at_most_once() -> None: + todo: queue.Queue[InflightTaskActivation] = queue.Queue() + processed: queue.Queue[ProcessingResult] = queue.Queue() + shutdown = Event() + + todo.put(AT_MOST_ONCE_TASK) + todo.put(AT_MOST_ONCE_TASK) + todo.put(SIMPLE_TASK) + child_process( + "sentry.taskworker.runtime:app", + todo, + processed, + shutdown, + max_task_count=2, + processing_pool_name="test", + process_type="fork", + ) + + assert todo.empty() + result = processed.get(block=False) + assert result.task_id == AT_MOST_ONCE_TASK.activation.id + assert result.status == TASK_ACTIVATION_STATUS_COMPLETE + + result = processed.get(block=False) + assert result.task_id == SIMPLE_TASK.activation.id + assert result.status == TASK_ACTIVATION_STATUS_COMPLETE + + +@pytest.mark.django_db +@mock.patch("sentry.taskworker.workerchild.capture_checkin") +def test_child_process_record_checkin(mock_capture_checkin: mock.Mock) -> None: + todo: queue.Queue[InflightTaskActivation] = queue.Queue() + processed: queue.Queue[ProcessingResult] = queue.Queue() + shutdown = Event() + + todo.put(SCHEDULED_TASK) + child_process( + "sentry.taskworker.runtime:app", + todo, + processed, + shutdown, + max_task_count=1, + processing_pool_name="test", + process_type="fork", + ) + + assert todo.empty() + result = processed.get() + assert result.task_id == SIMPLE_TASK.activation.id + assert result.status == TASK_ACTIVATION_STATUS_COMPLETE + + assert mock_capture_checkin.call_count == 1 + mock_capture_checkin.assert_called_with( + monitor_slug="simple-task", + check_in_id="abc123", + duration=mock.ANY, + status=MonitorStatus.OK, + ) + + +@pytest.mark.django_db +@mock.patch("sentry.taskworker.workerchild.sentry_sdk.capture_exception") +def test_child_process_terminate_task(mock_capture: mock.Mock) -> None: + todo: queue.Queue[InflightTaskActivation] = queue.Queue() + processed: queue.Queue[ProcessingResult] = queue.Queue() + shutdown = Event() + + sleepy = InflightTaskActivation( + host="localhost:50051", + receive_timestamp=0, + activation=TaskActivation( + id="111", + taskname="examples.timed", + namespace="examples", + parameters='{"args": [3], "kwargs": {}}', + processing_deadline_duration=1, + ), + ) + + todo.put(sleepy) + child_process( + "sentry.taskworker.runtime:app", + todo, + processed, + shutdown, + max_task_count=1, + processing_pool_name="test", + process_type="fork", + ) + + assert todo.empty() + result = processed.get(block=False) + assert result.task_id == sleepy.activation.id + assert result.status == TASK_ACTIVATION_STATUS_FAILURE + assert mock_capture.call_count == 1 + assert type(mock_capture.call_args.args[0]) is ProcessingDeadlineExceeded + + +@pytest.mark.django_db +@mock.patch("sentry.taskworker.workerchild.capture_checkin") +def test_child_process_decompression(mock_capture_checkin: mock.MagicMock) -> None: + + todo: queue.Queue[InflightTaskActivation] = queue.Queue() + processed: queue.Queue[ProcessingResult] = queue.Queue() + shutdown = Event() + + todo.put(COMPRESSED_TASK) + child_process( + "sentry.taskworker.runtime:app", + todo, + processed, + shutdown, + max_task_count=1, + processing_pool_name="test", + process_type="fork", + ) + + assert todo.empty() + result = processed.get() + assert result.task_id == COMPRESSED_TASK.activation.id + assert result.status == TASK_ACTIVATION_STATUS_COMPLETE + assert mock_capture_checkin.call_count == 0 From d17246a8705ab9a37f14664308a80fc986fe96e5 Mon Sep 17 00:00:00 2001 From: Mark Story Date: Fri, 28 Nov 2025 13:28:14 -0500 Subject: [PATCH 02/19] Move code around and get the first test passing. --- clients/python/pyproject.toml | 5 +- clients/python/src/app.py | 0 clients/python/src/router.py | 5 - clients/python/src/taskbroker_client/TODO | 9 + .../src/{ => taskbroker_client}/__init__.py | 0 clients/python/src/taskbroker_client/app.py | 84 +++ .../client/__init__.py | 0 .../{ => taskbroker_client}/client/client.py | 0 .../client/inflight_task_activation.py | 0 .../client/processing_result.py | 0 .../src/{ => taskbroker_client}/constants.py | 0 .../python/src/taskbroker_client/imports.py | 30 + .../src/{ => taskbroker_client}/py.typed | 0 .../src/{ => taskbroker_client}/registry.py | 38 +- .../src/{ => taskbroker_client}/retry.py | 5 +- .../python/src/taskbroker_client/router.py | 18 + .../scheduler/__init__.py | 0 .../scheduler/config.py | 0 .../scheduler/scheduler.py | 0 .../scheduler/schedules.py | 0 .../src/{ => taskbroker_client}/state.py | 0 .../src/{ => taskbroker_client}/task.py | 23 +- .../worker/__init__.py | 0 .../{ => taskbroker_client}/worker/worker.py | 0 .../worker/workerchild.py | 0 clients/python/{test => tests}/__init__.py | 0 .../python/{test => tests}/client/__init__.py | 0 .../{test => tests}/client/test_client.py | 0 .../{test => tests}/scheduler/__init__.py | 0 .../{test => tests}/scheduler/test_runner.py | 0 .../scheduler/test_schedules.py | 0 clients/python/{test => tests}/test_app.py | 28 +- .../python/{test => tests}/test_registry.py | 0 clients/python/{test => tests}/test_retry.py | 3 +- clients/python/{test => tests}/test_task.py | 0 .../python/{test => tests}/worker/__init__.py | 0 .../{test => tests}/worker/test_worker.py | 0 clients/python/uv.lock | 572 ++++++++++++++++++ 38 files changed, 769 insertions(+), 51 deletions(-) delete mode 100644 clients/python/src/app.py delete mode 100644 clients/python/src/router.py create mode 100644 clients/python/src/taskbroker_client/TODO rename clients/python/src/{ => taskbroker_client}/__init__.py (100%) create mode 100644 clients/python/src/taskbroker_client/app.py rename clients/python/src/{ => taskbroker_client}/client/__init__.py (100%) rename clients/python/src/{ => taskbroker_client}/client/client.py (100%) rename clients/python/src/{ => taskbroker_client}/client/inflight_task_activation.py (100%) rename clients/python/src/{ => taskbroker_client}/client/processing_result.py (100%) rename clients/python/src/{ => taskbroker_client}/constants.py (100%) create mode 100644 clients/python/src/taskbroker_client/imports.py rename clients/python/src/{ => taskbroker_client}/py.typed (100%) rename clients/python/src/{ => taskbroker_client}/registry.py (91%) rename clients/python/src/{ => taskbroker_client}/retry.py (97%) create mode 100644 clients/python/src/taskbroker_client/router.py rename clients/python/src/{ => taskbroker_client}/scheduler/__init__.py (100%) rename clients/python/src/{ => taskbroker_client}/scheduler/config.py (100%) rename clients/python/src/{ => taskbroker_client}/scheduler/scheduler.py (100%) rename clients/python/src/{ => taskbroker_client}/scheduler/schedules.py (100%) rename clients/python/src/{ => taskbroker_client}/state.py (100%) rename clients/python/src/{ => taskbroker_client}/task.py (97%) rename clients/python/src/{ => taskbroker_client}/worker/__init__.py (100%) rename clients/python/src/{ => taskbroker_client}/worker/worker.py (100%) rename clients/python/src/{ => taskbroker_client}/worker/workerchild.py (100%) rename clients/python/{test => tests}/__init__.py (100%) rename clients/python/{test => tests}/client/__init__.py (100%) rename clients/python/{test => tests}/client/test_client.py (100%) rename clients/python/{test => tests}/scheduler/__init__.py (100%) rename clients/python/{test => tests}/scheduler/test_runner.py (100%) rename clients/python/{test => tests}/scheduler/test_schedules.py (100%) rename clients/python/{test => tests}/test_app.py (54%) rename clients/python/{test => tests}/test_registry.py (100%) rename clients/python/{test => tests}/test_retry.py (98%) rename clients/python/{test => tests}/test_task.py (100%) rename clients/python/{test => tests}/worker/__init__.py (100%) rename clients/python/{test => tests}/worker/test_worker.py (100%) create mode 100644 clients/python/uv.lock diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml index d740dc1d..079aa56e 100644 --- a/clients/python/pyproject.toml +++ b/clients/python/pyproject.toml @@ -5,12 +5,15 @@ description = "Taskbroker python client and worker runtime" readme = "README.md" requires-python = ">=3.12.11" dependencies = [ + "sentry-arroyo>=2.33.1", + "sentry-sdk[http2]>=2.43.0", "sentry-protos>=0.2.0", "confluent_kafka>=2.3.0", "grpcio==1.66.1", "orjson>=3.10.10", "protobuf>=5.28.3", "types-protobuf>=6.30.2.20250703", + "zstandard>=0.18.0", ] [dependency-groups] @@ -38,7 +41,7 @@ default = true [tool.pytest.ini_options] pythonpath = ["python"] -testpaths = ["test"] +testpaths = ["tests"] python_files = ["test_*.py"] python_functions = ["test_*"] diff --git a/clients/python/src/app.py b/clients/python/src/app.py deleted file mode 100644 index e69de29b..00000000 diff --git a/clients/python/src/router.py b/clients/python/src/router.py deleted file mode 100644 index 02ed1474..00000000 --- a/clients/python/src/router.py +++ /dev/null @@ -1,5 +0,0 @@ -from typing import Protocol - - -class TaskRouter(Protocol): - def route_namespace(self, name: str) -> str: ... diff --git a/clients/python/src/taskbroker_client/TODO b/clients/python/src/taskbroker_client/TODO new file mode 100644 index 00000000..56311def --- /dev/null +++ b/clients/python/src/taskbroker_client/TODO @@ -0,0 +1,9 @@ +Fix ups +- metrics - How will that even work? + Maybe attach a metrics backend to the app, and import the app all over the place? +- Kafka producers? - Use arroyo? How to get at the singletons's? Perhaps have a protocol for getting a producer based on topic? + + +Application improvements +- need a way to inject a router object. +- need a way to provide a producer factory. diff --git a/clients/python/src/__init__.py b/clients/python/src/taskbroker_client/__init__.py similarity index 100% rename from clients/python/src/__init__.py rename to clients/python/src/taskbroker_client/__init__.py diff --git a/clients/python/src/taskbroker_client/app.py b/clients/python/src/taskbroker_client/app.py new file mode 100644 index 00000000..df33117f --- /dev/null +++ b/clients/python/src/taskbroker_client/app.py @@ -0,0 +1,84 @@ +import importlib +from collections.abc import Iterable +from typing import Any, Protocol + +from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation + +from taskbroker_client.registry import TaskRegistry + + +class AtMostOnceStore(Protocol): + def add(self, key: str, value: str, timeout: int) -> bool: ... + + +class TaskworkerApp: + """ + Container for an application's task setup and configuration. + """ + + def __init__(self, taskregistry: TaskRegistry | None = None) -> None: + self._config = { + "rpc_secret": None, + "at_most_once_timeout": None, + } + self._modules: Iterable[str] = [] + self._taskregistry = taskregistry or TaskRegistry() + self._at_most_once_store: AtMostOnceStore | None = None + + @property + def taskregistry(self) -> TaskRegistry: + """Get the TaskRegistry instance from this app""" + return self._taskregistry + + @property + def config(self) -> dict[str, Any]: + """Get the config data""" + return self._config + + def set_config(self, config: dict[str, Any]) -> None: + """Update configuration data""" + for key, value in config.items(): + if key in self._config: + self._config[key] = value + + def set_modules(self, modules: Iterable[str]) -> None: + """ + Set the list of modules containing tasks to be loaded by workers and schedulers. + """ + self._modules = modules + + def load_modules(self) -> None: + """Load all of the configured modules""" + for mod in self._modules: + __import__(mod) + + def at_most_once_store(self, backend: AtMostOnceStore) -> None: + """ + Set the backend store for `at_most_once` tasks. + The storage implementation should support atomic operations + to avoid races with at_most_once tasks. + """ + self._at_most_once_store = backend + + def should_attempt_at_most_once(self, activation: TaskActivation) -> bool: + if not self._at_most_once_store: + return True + key = get_at_most_once_key(activation.namespace, activation.taskname, activation.id) + return self._at_most_once_store.add( + key, "1", timeout=self._config["at_most_once_timeout"] or 60 + ) + + +def get_at_most_once_key(namespace: str, taskname: str, task_id: str) -> str: + # tw:amo -> taskworker:at_most_once + return f"tw:amo:{namespace}:{taskname}:{task_id}" + + +def import_app(app_module: str) -> TaskworkerApp: + """ + Resolve an application path like `acme.worker.runtime:app` + into the `app` symbol defined in the module. + """ + module_name, name = app_module.split(":") + module = importlib.import_module(module_name) + return getattr(module, name) diff --git a/clients/python/src/client/__init__.py b/clients/python/src/taskbroker_client/client/__init__.py similarity index 100% rename from clients/python/src/client/__init__.py rename to clients/python/src/taskbroker_client/client/__init__.py diff --git a/clients/python/src/client/client.py b/clients/python/src/taskbroker_client/client/client.py similarity index 100% rename from clients/python/src/client/client.py rename to clients/python/src/taskbroker_client/client/client.py diff --git a/clients/python/src/client/inflight_task_activation.py b/clients/python/src/taskbroker_client/client/inflight_task_activation.py similarity index 100% rename from clients/python/src/client/inflight_task_activation.py rename to clients/python/src/taskbroker_client/client/inflight_task_activation.py diff --git a/clients/python/src/client/processing_result.py b/clients/python/src/taskbroker_client/client/processing_result.py similarity index 100% rename from clients/python/src/client/processing_result.py rename to clients/python/src/taskbroker_client/client/processing_result.py diff --git a/clients/python/src/constants.py b/clients/python/src/taskbroker_client/constants.py similarity index 100% rename from clients/python/src/constants.py rename to clients/python/src/taskbroker_client/constants.py diff --git a/clients/python/src/taskbroker_client/imports.py b/clients/python/src/taskbroker_client/imports.py new file mode 100644 index 00000000..590266d9 --- /dev/null +++ b/clients/python/src/taskbroker_client/imports.py @@ -0,0 +1,30 @@ +from typing import Any + + +class ModuleProxyCache(dict[str, object]): + def __missing__(self, key: str) -> object: + if "." not in key: + return __import__(key) + + module_name, class_name = key.rsplit(".", 1) + + module = __import__(module_name, {}, {}, [class_name]) + handler = getattr(module, class_name) + + # We cache a NoneType for missing imports to avoid repeated lookups + self[key] = handler + + return handler + + +_cache = ModuleProxyCache() + + +def import_string(path: str) -> Any: + """ + Path must be module.path.ClassName + + >>> cls = import_string('sentry.models.Group') + """ + result = _cache[path] + return result diff --git a/clients/python/src/py.typed b/clients/python/src/taskbroker_client/py.typed similarity index 100% rename from clients/python/src/py.typed rename to clients/python/src/taskbroker_client/py.typed diff --git a/clients/python/src/registry.py b/clients/python/src/taskbroker_client/registry.py similarity index 91% rename from clients/python/src/registry.py rename to clients/python/src/taskbroker_client/registry.py index d06f3b56..53d10475 100644 --- a/clients/python/src/registry.py +++ b/clients/python/src/taskbroker_client/registry.py @@ -8,22 +8,22 @@ import sentry_sdk from arroyo.backends.kafka import KafkaPayload, KafkaProducer -from arroyo.types import BrokerValue -from arroyo.types import Topic as ArroyoTopic -from django.conf import settings -from sentry.conf.types.kafka_definition import Topic -from sentry.silo.base import SiloMode -from sentry.taskworker.constants import DEFAULT_PROCESSING_DEADLINE, CompressionType -from sentry.taskworker.retry import Retry -from sentry.taskworker.router import TaskRouter -from sentry.taskworker.silolimiter import TaskSiloLimit -from sentry.taskworker.task import P, R, Task -from sentry.utils import metrics -from sentry.utils.arroyo_producer import SingletonProducer, get_arroyo_producer -from sentry.utils.imports import import_string +from arroyo.types import BrokerValue, Topic from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation from sentry_sdk.consts import OP, SPANDATA +# from django.conf import settings +# from sentry.conf.types.kafka_definition import Topic +# from sentry.silo.base import SiloMode +from taskbroker_client.constants import DEFAULT_PROCESSING_DEADLINE, CompressionType + +# from sentry.utils import metrics +# from sentry.utils.arroyo_producer import SingletonProducer, get_arroyo_producer +from taskbroker_client.imports import import_string +from taskbroker_client.retry import Retry +from taskbroker_client.router import TaskRouter +from taskbroker_client.task import P, R, Task + logger = logging.getLogger(__name__) ProducerFuture = futures.Future[BrokerValue[KafkaPayload]] @@ -45,6 +45,7 @@ def __init__( processing_deadline_duration: int = DEFAULT_PROCESSING_DEADLINE, app_feature: str | None = None, ): + # TODO Figure out how to get producers here. self.name = name self.router = router self.default_retry = retry @@ -52,7 +53,7 @@ def __init__( self.default_processing_deadline_duration = processing_deadline_duration # seconds self.app_feature = app_feature or name self._registered_tasks: dict[str, Task[Any, Any]] = {} - self._producers: dict[Topic, SingletonProducer] = {} + # self._producers: dict[Topic, SingletonProducer] = {} def get(self, name: str) -> Task[Any, Any]: """ @@ -131,9 +132,6 @@ def wrapped(func: Callable[P, R]) -> Task[P, R]: wait_for_delivery=wait_for_delivery, compression_type=compression_type, ) - if silo_mode: - silo_limiter = TaskSiloLimit(silo_mode) - task = silo_limiter(task) # TODO(taskworker) tasks should be registered into the registry # so that we can ensure task names are globally unique self._registered_tasks[name] = task @@ -216,7 +214,8 @@ def __init__(self) -> None: self._router = self._build_router() def _build_router(self) -> TaskRouter: - router_name: str = settings.TASKWORKER_ROUTER + # TODO add setting for this + router_name = "taskbroker_client.router.DefaultRouter" router_class = import_string(router_name) router = router_class() assert hasattr(router, "route_namespace") @@ -266,6 +265,3 @@ def create_namespace( self._namespaces[name] = namespace return namespace - - -taskregistry = TaskRegistry() diff --git a/clients/python/src/retry.py b/clients/python/src/taskbroker_client/retry.py similarity index 97% rename from clients/python/src/retry.py rename to clients/python/src/taskbroker_client/retry.py index 68d3f210..7235a864 100644 --- a/clients/python/src/retry.py +++ b/clients/python/src/taskbroker_client/retry.py @@ -3,8 +3,7 @@ from enum import Enum from multiprocessing.context import TimeoutError -from sentry.taskworker.state import current_task -from sentry.utils import metrics +# from sentry.utils import metrics from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( ON_ATTEMPTS_EXCEEDED_DEADLETTER, ON_ATTEMPTS_EXCEEDED_DISCARD, @@ -12,6 +11,8 @@ RetryState, ) +from taskbroker_client.state import current_task + class RetryTaskError(Exception): """ diff --git a/clients/python/src/taskbroker_client/router.py b/clients/python/src/taskbroker_client/router.py new file mode 100644 index 00000000..51347546 --- /dev/null +++ b/clients/python/src/taskbroker_client/router.py @@ -0,0 +1,18 @@ +from typing import Protocol + + +class TaskRouter(Protocol): + """ + Resolves task namespaces to a topic names. + """ + + def route_namespace(self, name: str) -> str: ... + + +class DefaultRouter(TaskRouter): + """ + Stub router that resolves all namespaces to the default topic + """ + + def route_namespace(self, name: str) -> str: + return "default" diff --git a/clients/python/src/scheduler/__init__.py b/clients/python/src/taskbroker_client/scheduler/__init__.py similarity index 100% rename from clients/python/src/scheduler/__init__.py rename to clients/python/src/taskbroker_client/scheduler/__init__.py diff --git a/clients/python/src/scheduler/config.py b/clients/python/src/taskbroker_client/scheduler/config.py similarity index 100% rename from clients/python/src/scheduler/config.py rename to clients/python/src/taskbroker_client/scheduler/config.py diff --git a/clients/python/src/scheduler/scheduler.py b/clients/python/src/taskbroker_client/scheduler/scheduler.py similarity index 100% rename from clients/python/src/scheduler/scheduler.py rename to clients/python/src/taskbroker_client/scheduler/scheduler.py diff --git a/clients/python/src/scheduler/schedules.py b/clients/python/src/taskbroker_client/scheduler/schedules.py similarity index 100% rename from clients/python/src/scheduler/schedules.py rename to clients/python/src/taskbroker_client/scheduler/schedules.py diff --git a/clients/python/src/state.py b/clients/python/src/taskbroker_client/state.py similarity index 100% rename from clients/python/src/state.py rename to clients/python/src/taskbroker_client/state.py diff --git a/clients/python/src/task.py b/clients/python/src/taskbroker_client/task.py similarity index 97% rename from clients/python/src/task.py rename to clients/python/src/taskbroker_client/task.py index 293c0eae..55616a5e 100644 --- a/clients/python/src/task.py +++ b/clients/python/src/taskbroker_client/task.py @@ -11,24 +11,27 @@ import orjson import sentry_sdk import zstandard as zstd -from django.conf import settings -from django.utils import timezone + +# from django.conf import settings +# from django.utils import timezone from google.protobuf.timestamp_pb2 import Timestamp -from sentry.taskworker.constants import ( - DEFAULT_PROCESSING_DEADLINE, - MAX_PARAMETER_BYTES_BEFORE_COMPRESSION, - CompressionType, -) -from sentry.taskworker.retry import Retry -from sentry.utils import metrics + +# from sentry.utils import metrics from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( ON_ATTEMPTS_EXCEEDED_DISCARD, RetryState, TaskActivation, ) +from taskbroker_client.constants import ( + DEFAULT_PROCESSING_DEADLINE, + MAX_PARAMETER_BYTES_BEFORE_COMPRESSION, + CompressionType, +) +from taskbroker_client.retry import Retry + if TYPE_CHECKING: - from sentry.taskworker.registry import TaskNamespace + from taskbroker_client.registry import TaskNamespace P = ParamSpec("P") diff --git a/clients/python/src/worker/__init__.py b/clients/python/src/taskbroker_client/worker/__init__.py similarity index 100% rename from clients/python/src/worker/__init__.py rename to clients/python/src/taskbroker_client/worker/__init__.py diff --git a/clients/python/src/worker/worker.py b/clients/python/src/taskbroker_client/worker/worker.py similarity index 100% rename from clients/python/src/worker/worker.py rename to clients/python/src/taskbroker_client/worker/worker.py diff --git a/clients/python/src/worker/workerchild.py b/clients/python/src/taskbroker_client/worker/workerchild.py similarity index 100% rename from clients/python/src/worker/workerchild.py rename to clients/python/src/taskbroker_client/worker/workerchild.py diff --git a/clients/python/test/__init__.py b/clients/python/tests/__init__.py similarity index 100% rename from clients/python/test/__init__.py rename to clients/python/tests/__init__.py diff --git a/clients/python/test/client/__init__.py b/clients/python/tests/client/__init__.py similarity index 100% rename from clients/python/test/client/__init__.py rename to clients/python/tests/client/__init__.py diff --git a/clients/python/test/client/test_client.py b/clients/python/tests/client/test_client.py similarity index 100% rename from clients/python/test/client/test_client.py rename to clients/python/tests/client/test_client.py diff --git a/clients/python/test/scheduler/__init__.py b/clients/python/tests/scheduler/__init__.py similarity index 100% rename from clients/python/test/scheduler/__init__.py rename to clients/python/tests/scheduler/__init__.py diff --git a/clients/python/test/scheduler/test_runner.py b/clients/python/tests/scheduler/test_runner.py similarity index 100% rename from clients/python/test/scheduler/test_runner.py rename to clients/python/tests/scheduler/test_runner.py diff --git a/clients/python/test/scheduler/test_schedules.py b/clients/python/tests/scheduler/test_schedules.py similarity index 100% rename from clients/python/test/scheduler/test_schedules.py rename to clients/python/tests/scheduler/test_schedules.py diff --git a/clients/python/test/test_app.py b/clients/python/tests/test_app.py similarity index 54% rename from clients/python/test/test_app.py rename to clients/python/tests/test_app.py index 056f3b7b..d06fbd31 100644 --- a/clients/python/test/test_app.py +++ b/clients/python/tests/test_app.py @@ -1,29 +1,34 @@ -import pytest -from django.core.cache import cache -from sentry.taskworker.app import TaskworkerApp -from sentry.taskworker.registry import TaskRegistry from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation +from taskbroker_client.app import AtMostOnceStore, TaskworkerApp +from taskbroker_client.registry import TaskRegistry -@pytest.fixture -def clear_cache(): - cache.clear() +class StubAtMostOnce(AtMostOnceStore): + def __init__(self) -> None: + self._keys: dict[str, str] = {} -def test_taskregistry_param_and_property(): + def add(self, key: str, value: str, timeout: int) -> bool: + if key in self._keys: + return False + self._keys[key] = value + return True + + +def test_taskregistry_param_and_property() -> None: registry = TaskRegistry() app = TaskworkerApp(taskregistry=registry) assert app.taskregistry == registry -def test_set_config(): +def test_set_config() -> None: app = TaskworkerApp() app.set_config({"rpc_secret": "testing", "ignored": "key"}) assert app.config["rpc_secret"] == "testing" assert "ignored" not in app.config -def test_should_attempt_at_most_once(clear_cache): +def test_should_attempt_at_most_once() -> None: activation = TaskActivation( id="111", taskname="examples.simple_task", @@ -31,7 +36,8 @@ def test_should_attempt_at_most_once(clear_cache): parameters='{"args": [], "kwargs": {}}', processing_deadline_duration=2, ) + at_most = StubAtMostOnce() app = TaskworkerApp() - app.at_most_once_store(cache) + app.at_most_once_store(at_most) assert app.should_attempt_at_most_once(activation) assert not app.should_attempt_at_most_once(activation) diff --git a/clients/python/test/test_registry.py b/clients/python/tests/test_registry.py similarity index 100% rename from clients/python/test/test_registry.py rename to clients/python/tests/test_registry.py diff --git a/clients/python/test/test_retry.py b/clients/python/tests/test_retry.py similarity index 98% rename from clients/python/test/test_retry.py rename to clients/python/tests/test_retry.py index 49258c24..624efa96 100644 --- a/clients/python/test/test_retry.py +++ b/clients/python/tests/test_retry.py @@ -2,12 +2,13 @@ from multiprocessing.context import TimeoutError -from sentry.taskworker.retry import LastAction, Retry, RetryTaskError from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( ON_ATTEMPTS_EXCEEDED_DEADLETTER, ON_ATTEMPTS_EXCEEDED_DISCARD, ) +from taskbroker_client.retry import LastAction, Retry, RetryTaskError + class RuntimeChildError(RuntimeError): """Dummy exception for instanceof tests""" diff --git a/clients/python/test/test_task.py b/clients/python/tests/test_task.py similarity index 100% rename from clients/python/test/test_task.py rename to clients/python/tests/test_task.py diff --git a/clients/python/test/worker/__init__.py b/clients/python/tests/worker/__init__.py similarity index 100% rename from clients/python/test/worker/__init__.py rename to clients/python/tests/worker/__init__.py diff --git a/clients/python/test/worker/test_worker.py b/clients/python/tests/worker/test_worker.py similarity index 100% rename from clients/python/test/worker/test_worker.py rename to clients/python/tests/worker/test_worker.py diff --git a/clients/python/uv.lock b/clients/python/uv.lock new file mode 100644 index 00000000..0e88d3b7 --- /dev/null +++ b/clients/python/uv.lock @@ -0,0 +1,572 @@ +version = 1 +revision = 2 +requires-python = ">=3.12.11" +resolution-markers = [ + "sys_platform == 'darwin' or sys_platform == 'linux'", +] +supported-markers = [ + "sys_platform == 'darwin' or sys_platform == 'linux'", +] + +[[package]] +name = "black" +version = "24.10.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "click", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "mypy-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pathspec", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "platformdirs", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/black-24.10.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b5e39e0fae001df40f95bd8cc36b9165c5e2ea88900167bddf258bacef9bbdc3" }, + { url = "https://pypi.devinfra.sentry.io/wheels/black-24.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d37d422772111794b26757c5b55a3eade028aa3fde43121ab7b673d050949d65" }, + { url = "https://pypi.devinfra.sentry.io/wheels/black-24.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14b3502784f09ce2443830e3133dacf2c0110d45191ed470ecb04d0f5f6fcb0f" }, + { url = "https://pypi.devinfra.sentry.io/wheels/black-24.10.0-py3-none-any.whl", hash = "sha256:3bb2b7a1f7b685f85b11fed1ef10f8a9148bceb49853e47a294a3dd963c1dd7d" }, +] + +[[package]] +name = "certifi" +version = "2025.8.3" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5" }, +] + +[[package]] +name = "cffi" +version = "2.0.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "pycparser", marker = "(implementation_name != 'PyPy' and sys_platform == 'darwin') or (implementation_name != 'PyPy' and sys_platform == 'linux')" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26" }, +] + +[[package]] +name = "cfgv" +version = "3.4.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9" }, +] + +[[package]] +name = "click" +version = "8.3.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/click-8.3.0-py3-none-any.whl", hash = "sha256:9b9f285302c6e3064f4330c05f05b81945b2a39544279343e6e7c5f27a9baddc" }, +] + +[[package]] +name = "confluent-kafka" +version = "2.9.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.9.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:537713346e4f561341fd49e1859892e85916b43f730a3a7ebf7b4fa66457e742" }, + { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1990db1569a174eb8187ed2555b793da223317363798eb3884f88a3b501c3c15" }, + { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.9.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:d8d8d4475dedd7a0883ca14cbd1b78dc6119139bb07d2c953b9043b1b8826782" }, + { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.9.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:5920cc984047f950ce7f3050407cb816813dc434ceb0ca0156ab56303d69245e" }, + { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.9.0-cp313-cp313-macosx_13_0_arm64.whl", hash = "sha256:c594238f7a9615f812de2b2c5d3a7b91788cdb74ece7d88ed736faa87e571715" }, + { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.9.0-cp313-cp313-macosx_13_0_x86_64.whl", hash = "sha256:aa26072388f6021f95e41762338565e66a2d96f0538fdc72650bc154a0e547eb" }, + { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.9.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:bc00d0fdd4d85d7e3fcb3e9238092dba439ffcf35e7a3960e42d3eb0a41b4ce1" }, + { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.9.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:b21e4a4ba88374a1487a9353debcddc994dae385f89d6bc45f08ab372e238756" }, +] + +[[package]] +name = "devservices" +version = "1.2.3" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-devenv", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-sdk", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "supervisor", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/devservices-1.2.3-py3-none-any.whl", hash = "sha256:19beb1dabb533c5dcbd021d6a34e3f357e5c868670f0dfe8945911d3965a6494" }, +] + +[[package]] +name = "distlib" +version = "0.3.9" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87" }, +] + +[[package]] +name = "filelock" +version = "3.19.1" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d" }, +] + +[[package]] +name = "flake8" +version = "7.3.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "mccabe", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pycodestyle", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pyflakes", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/flake8-7.3.0-py2.py3-none-any.whl", hash = "sha256:b9696257b9ce8beb888cdbe31cf885c90d31928fe202be0889a7cdafad32f01e" }, +] + +[[package]] +name = "grpc-stubs" +version = "1.53.0.6" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "grpcio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/grpc_stubs-1.53.0.6-py3-none-any.whl", hash = "sha256:3ffc5a6b5bd84ac46f3d84e2434e97936c1262b47b71b462bdedc43caaf227e1" }, +] + +[[package]] +name = "grpcio" +version = "1.66.1" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/grpcio-1.66.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:fdb14bad0835914f325349ed34a51940bc2ad965142eb3090081593c6e347be9" }, + { url = "https://pypi.devinfra.sentry.io/wheels/grpcio-1.66.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:f03a5884c56256e08fd9e262e11b5cfacf1af96e2ce78dc095d2c41ccae2c80d" }, + { url = "https://pypi.devinfra.sentry.io/wheels/grpcio-1.66.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84ca1be089fb4446490dd1135828bd42a7c7f8421e74fa581611f7afdf7ab761" }, +] + +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86" }, +] + +[[package]] +name = "h2" +version = "4.2.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "hpack", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "hyperframe", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/h2-4.2.0-py3-none-any.whl", hash = "sha256:479a53ad425bb29af087f3458a61d30780bc818e4ebcf01f0b536ba916462ed0" }, +] + +[[package]] +name = "hpack" +version = "4.1.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496" }, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "certifi", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "h11", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55" }, +] + +[package.optional-dependencies] +http2 = [ + { name = "h2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] + +[[package]] +name = "hyperframe" +version = "6.1.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5" }, +] + +[[package]] +name = "identify" +version = "2.6.9" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/identify-2.6.9-py2.py3-none-any.whl", hash = "sha256:c98b4322da415a8e5a70ff6e51fbc2d2932c015532d77e9f8537b4ba7813b150" }, +] + +[[package]] +name = "iniconfig" +version = "2.0.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374" }, +] + +[[package]] +name = "isort" +version = "6.0.1" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/isort-6.0.1-py3-none-any.whl", hash = "sha256:2dc5d7f65c9678d94c88dfc29161a320eec67328bc97aad576874cb4be1e9615" }, +] + +[[package]] +name = "mccabe" +version = "0.7.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e" }, +] + +[[package]] +name = "mypy" +version = "1.18.2" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "mypy-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pathspec", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.18.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:33eca32dd124b29400c31d7cf784e795b050ace0e1f91b8dc035672725617e34" }, + { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a3c47adf30d65e89b2dcd2fa32f3aeb5e94ca970d2c15fcb25e297871c8e4764" }, + { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.18.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d6c838e831a062f5f29d11c9057c6009f60cb294fea33a98422688181fe2893" }, + { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.18.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01199871b6110a2ce984bde85acd481232d17413868c9807e95c1b0739a58914" }, + { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.18.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:07b8b0f580ca6d289e69209ec9d3911b4a26e5abfde32228a288eb79df129fcc" }, + { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.18.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ed4482847168439651d3feee5833ccedbf6657e964572706a2adb1f7fa4dfe2e" }, + { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.18.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3ad2afadd1e9fea5cf99a45a822346971ede8685cc581ed9cd4d42eaf940986" }, + { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.18.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a431a6f1ef14cf8c144c6b14793a23ec4eae3db28277c358136e79d7d062f62d" }, +] + +[[package]] +name = "mypy-extensions" +version = "1.1.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505" }, +] + +[[package]] +name = "nodeenv" +version = "1.9.1" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9" }, +] + +[[package]] +name = "orjson" +version = "3.10.10" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/orjson-3.10.10-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:8564f48f3620861f5ef1e080ce7cd122ee89d7d6dacf25fcae675ff63b4d6e05" }, + { url = "https://pypi.devinfra.sentry.io/wheels/orjson-3.10.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5bf161a32b479034098c5b81f2608f09167ad2fa1c06abd4e527ea6bf4837a9" }, + { url = "https://pypi.devinfra.sentry.io/wheels/orjson-3.10.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3be81c42f1242cbed03cbb3973501fcaa2675a0af638f8be494eaf37143d999" }, + { url = "https://pypi.devinfra.sentry.io/wheels/orjson-3.10.10-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:44bffae68c291f94ff5a9b4149fe9d1bdd4cd0ff0fb575bcea8351d48db629a1" }, + { url = "https://pypi.devinfra.sentry.io/wheels/orjson-3.10.10-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e27b4c6437315df3024f0835887127dac2a0a3ff643500ec27088d2588fa5ae1" }, + { url = "https://pypi.devinfra.sentry.io/wheels/orjson-3.10.10-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bca84df16d6b49325a4084fd8b2fe2229cb415e15c46c529f868c3387bb1339d" }, +] + +[[package]] +name = "packaging" +version = "25.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484" }, +] + +[[package]] +name = "pathspec" +version = "0.12.1" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08" }, +] + +[[package]] +name = "platformdirs" +version = "4.3.8" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4" }, +] + +[[package]] +name = "pluggy" +version = "1.5.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669" }, +] + +[[package]] +name = "pre-commit" +version = "4.2.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "cfgv", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "identify", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "nodeenv", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "virtualenv", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/pre_commit-4.2.0-py2.py3-none-any.whl", hash = "sha256:a009ca7205f1eb497d10b845e52c838a98b6cdd2102a6c8e4540e94ee75c58bd" }, +] + +[[package]] +name = "protobuf" +version = "5.29.5" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/protobuf-5.29.5-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:e38c5add5a311f2a6eb0340716ef9b039c1dfa428b28f25a7838ac329204a671" }, + { url = "https://pypi.devinfra.sentry.io/wheels/protobuf-5.29.5-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:fa18533a299d7ab6c55a238bf8629311439995f2e7eca5caaff08663606e9015" }, + { url = "https://pypi.devinfra.sentry.io/wheels/protobuf-5.29.5-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:63848923da3325e1bf7e9003d680ce6e14b07e55d0473253a690c3a8b8fd6e61" }, +] + +[[package]] +name = "pycodestyle" +version = "2.14.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/pycodestyle-2.14.0-py2.py3-none-any.whl", hash = "sha256:dd6bf7cb4ee77f8e016f9c8e74a35ddd9f67e1d5fd4184d86c3b98e07099f42d" }, +] + +[[package]] +name = "pycparser" +version = "2.23" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934" }, +] + +[[package]] +name = "pyflakes" +version = "3.4.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/pyflakes-3.4.0-py2.py3-none-any.whl", hash = "sha256:f742a7dbd0d9cb9ea41e9a24a918996e8170c799fa528688d40dd582c8265f4f" }, +] + +[[package]] +name = "pytest" +version = "8.3.3" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "iniconfig", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pluggy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2" }, +] + +[[package]] +name = "pyyaml" +version = "6.0.3" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196" }, + { url = "https://pypi.devinfra.sentry.io/wheels/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0" }, + { url = "https://pypi.devinfra.sentry.io/wheels/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28" }, + { url = "https://pypi.devinfra.sentry.io/wheels/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc" }, + { url = "https://pypi.devinfra.sentry.io/wheels/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8" }, + { url = "https://pypi.devinfra.sentry.io/wheels/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1" }, + { url = "https://pypi.devinfra.sentry.io/wheels/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c" }, + { url = "https://pypi.devinfra.sentry.io/wheels/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6" }, +] + +[[package]] +name = "sentry-arroyo" +version = "2.33.1" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "confluent-kafka", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/sentry_arroyo-2.33.1-py3-none-any.whl", hash = "sha256:10d05f81a06bd7f9ee28fe7d7a628c868c3ccbdb5987bece6d9860930e1654af" }, +] + +[[package]] +name = "sentry-devenv" +version = "1.25.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "sentry-sdk", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/sentry_devenv-1.25.0-py3-none-any.whl", hash = "sha256:fc143542d555af05e4003052b8d2f336ac69361432e430ed92c22b9a3df5bd3d" }, +] + +[[package]] +name = "sentry-protos" +version = "0.4.7" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "grpc-stubs", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "grpcio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/sentry_protos-0.4.7-py3-none-any.whl", hash = "sha256:bd14e782eceac28ed4f2ea4e36a2b2e189cdbd83a7f78ce9ac8aeab01a4242b8" }, +] + +[[package]] +name = "sentry-sdk" +version = "2.46.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "certifi", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "urllib3", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/sentry_sdk-2.46.0-py2.py3-none-any.whl", hash = "sha256:4eeeb60198074dff8d066ea153fa6f241fef1668c10900ea53a4200abc8da9b1" }, +] + +[package.optional-dependencies] +http2 = [ + { name = "httpcore", extra = ["http2"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] + +[[package]] +name = "setuptools" +version = "80.9.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922" }, +] + +[[package]] +name = "supervisor" +version = "4.2.5" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "setuptools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/supervisor-4.2.5-py2.py3-none-any.whl", hash = "sha256:2ecaede32fc25af814696374b79e42644ecaba5c09494c51016ffda9602d0f08" }, +] + +[[package]] +name = "taskbroker-client" +version = "0.1.0" +source = { editable = "." } +dependencies = [ + { name = "confluent-kafka", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "grpcio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "orjson", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-arroyo", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-protos", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-sdk", extra = ["http2"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "types-protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "zstandard", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] + +[package.dev-dependencies] +dev = [ + { name = "black", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "devservices", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "flake8", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "isort", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "mypy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pre-commit", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-devenv", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] + +[package.metadata] +requires-dist = [ + { name = "confluent-kafka", specifier = ">=2.3.0" }, + { name = "grpcio", specifier = "==1.66.1" }, + { name = "orjson", specifier = ">=3.10.10" }, + { name = "protobuf", specifier = ">=5.28.3" }, + { name = "sentry-arroyo", specifier = ">=2.33.1" }, + { name = "sentry-protos", specifier = ">=0.2.0" }, + { name = "sentry-sdk", extras = ["http2"], specifier = ">=2.43.0" }, + { name = "types-protobuf", specifier = ">=6.30.2.20250703" }, + { name = "zstandard", specifier = ">=0.18.0" }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "black", specifier = "==24.10.0" }, + { name = "devservices", specifier = ">=1.2.1" }, + { name = "flake8", specifier = ">=7.3.0" }, + { name = "isort", specifier = ">=5.13.2" }, + { name = "mypy", specifier = ">=1.17.1" }, + { name = "pre-commit", specifier = ">=4.2.0" }, + { name = "pytest", specifier = ">=8.3.3" }, + { name = "sentry-devenv", specifier = ">=1.22.2" }, +] + +[[package]] +name = "types-protobuf" +version = "6.30.2.20250703" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/types_protobuf-6.30.2.20250703-py3-none-any.whl", hash = "sha256:fa5aff9036e9ef432d703abbdd801b436a249b6802e4df5ef74513e272434e57" }, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548" }, +] + +[[package]] +name = "urllib3" +version = "2.5.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc" }, +] + +[[package]] +name = "virtualenv" +version = "20.29.3" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "distlib", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "filelock", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "platformdirs", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/virtualenv-20.29.3-py3-none-any.whl", hash = "sha256:3e3d00f5807e83b234dfb6122bf37cfadf4be216c53a49ac059d02414f819170" }, +] + +[[package]] +name = "zstandard" +version = "0.18.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "cffi", marker = "(platform_python_implementation == 'PyPy' and sys_platform == 'darwin') or (platform_python_implementation == 'PyPy' and sys_platform == 'linux')" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:2eab9516bc4352fc9763d96047c815879f3efb1dfb5dfe2f775b2e22c0289cb6" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e1f98ffd138d172efd202cd078e746af80492c6942004b080bf627c5f826da5" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:07a72264613c75fe6eb64f07ab553d3cfab7a421c8733e067a8718ef69c642a7" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee14cc6b8b40733a707b467ddc192592cab941babf82b3e6f700673e050b4bda" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:032ddaf24458986a31ff49d2fa86a4003e1e1c34c38976bedd06805350eaeddc" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d68ff7c3a4c35400d807efbfa793767c2d4866a7017770b424e65749a70e958e" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e1ef5b96f0e90855ea13d06b7213a75a77a23946d8bb186ff38578dd1ff5efd4" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:754256fb4080a36f8992983b2f65f23719d275c9a350bcf18d76344ed64efa19" }, +] From b94b53f053ad40c87089a1f1485aad956b903b7f Mon Sep 17 00:00:00 2001 From: Mark Story Date: Fri, 28 Nov 2025 13:47:19 -0500 Subject: [PATCH 03/19] Refactor router creation so it doesn't need global state. --- clients/python/src/taskbroker_client/app.py | 25 ++++++++++++++++--- .../python/src/taskbroker_client/registry.py | 15 +++-------- clients/python/tests/test_app.py | 20 ++++++++++++--- 3 files changed, 41 insertions(+), 19 deletions(-) diff --git a/clients/python/src/taskbroker_client/app.py b/clients/python/src/taskbroker_client/app.py index df33117f..3b2456e5 100644 --- a/clients/python/src/taskbroker_client/app.py +++ b/clients/python/src/taskbroker_client/app.py @@ -5,6 +5,8 @@ from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation from taskbroker_client.registry import TaskRegistry +from taskbroker_client.router import TaskRouter +from taskbroker_client.imports import import_string class AtMostOnceStore(Protocol): @@ -16,14 +18,31 @@ class TaskworkerApp: Container for an application's task setup and configuration. """ - def __init__(self, taskregistry: TaskRegistry | None = None) -> None: + def __init__( + self, + router_class: str | TaskRouter = "taskbroker_client.router.DefaultRouter", + at_most_once_store: AtMostOnceStore | None = None, + ) -> None: self._config = { "rpc_secret": None, "at_most_once_timeout": None, } self._modules: Iterable[str] = [] - self._taskregistry = taskregistry or TaskRegistry() - self._at_most_once_store: AtMostOnceStore | None = None + self._taskregistry = TaskRegistry( + router=self._build_router(router_class) + ) + if at_most_once_store: + self.at_most_once_store(at_most_once_store) + + def _build_router(self, router_name: str | TaskRouter) -> TaskRouter: + if isinstance(router_name, str): + router_class = import_string(router_name) + router = router_class() + else: + router = router_name + assert hasattr(router, "route_namespace") + + return router @property def taskregistry(self) -> TaskRegistry: diff --git a/clients/python/src/taskbroker_client/registry.py b/clients/python/src/taskbroker_client/registry.py index 53d10475..c0ad8e66 100644 --- a/clients/python/src/taskbroker_client/registry.py +++ b/clients/python/src/taskbroker_client/registry.py @@ -72,7 +72,7 @@ def contains(self, name: str) -> bool: return name in self._registered_tasks @property - def topic(self) -> Topic: + def topic(self) -> str: return self.router.route_namespace(self.name) def register( @@ -209,18 +209,9 @@ class TaskRegistry: during startup. """ - def __init__(self) -> None: + def __init__(self, router: TaskRouter) -> None: self._namespaces: dict[str, TaskNamespace] = {} - self._router = self._build_router() - - def _build_router(self) -> TaskRouter: - # TODO add setting for this - router_name = "taskbroker_client.router.DefaultRouter" - router_class = import_string(router_name) - router = router_class() - assert hasattr(router, "route_namespace") - - return router + self._router = router def contains(self, name: str) -> bool: return name in self._namespaces diff --git a/clients/python/tests/test_app.py b/clients/python/tests/test_app.py index d06fbd31..bbf0c354 100644 --- a/clients/python/tests/test_app.py +++ b/clients/python/tests/test_app.py @@ -1,6 +1,7 @@ from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation from taskbroker_client.app import AtMostOnceStore, TaskworkerApp +from taskbroker_client.router import TaskRouter from taskbroker_client.registry import TaskRegistry @@ -15,10 +16,21 @@ def add(self, key: str, value: str, timeout: int) -> bool: return True -def test_taskregistry_param_and_property() -> None: - registry = TaskRegistry() - app = TaskworkerApp(taskregistry=registry) - assert app.taskregistry == registry +class StubRouter(TaskRouter): + def route_namespace(self, name: str) -> str: + return "honk" + + +def test_taskregistry_router_object() -> None: + app = TaskworkerApp(router_class=StubRouter()) + ns = app.taskregistry.create_namespace("test") + assert ns.topic == "honk" + + +def test_taskregistry_router_str() -> None: + app = TaskworkerApp(router_class="taskbroker_client.router.DefaultRouter") + ns = app.taskregistry.create_namespace("test") + assert ns.topic == "default" def test_set_config() -> None: From 65660bcdfbd0f7b3973278d828b161e080f3827b Mon Sep 17 00:00:00 2001 From: Mark Story Date: Fri, 28 Nov 2025 13:57:58 -0500 Subject: [PATCH 04/19] Add code that was lost in a move --- .../src/taskbroker_client/client/client.py | 344 +++++++++++++++ .../client/inflight_task_activation.py | 10 + .../src/taskbroker_client/worker/worker.py | 402 ++++++++++++++++++ 3 files changed, 756 insertions(+) diff --git a/clients/python/src/taskbroker_client/client/client.py b/clients/python/src/taskbroker_client/client/client.py index e69de29b..dd34684d 100644 --- a/clients/python/src/taskbroker_client/client/client.py +++ b/clients/python/src/taskbroker_client/client/client.py @@ -0,0 +1,344 @@ +import hashlib +import hmac +import logging +import random +import threading +import time +from collections.abc import Callable +from dataclasses import dataclass +from pathlib import Path +from typing import TYPE_CHECKING, Any + +import grpc +from google.protobuf.message import Message +from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( + FetchNextTask, + GetTaskRequest, + SetTaskStatusRequest, +) +from sentry_protos.taskbroker.v1.taskbroker_pb2_grpc import ConsumerServiceStub + +from sentry.taskworker.client.inflight_task_activation import InflightTaskActivation +from sentry.taskworker.client.processing_result import ProcessingResult +from sentry.taskworker.constants import ( + DEFAULT_CONSECUTIVE_UNAVAILABLE_ERRORS, + DEFAULT_REBALANCE_AFTER, + DEFAULT_TEMPORARY_UNAVAILABLE_HOST_TIMEOUT, +) +from sentry.utils import json, metrics + +logger = logging.getLogger("sentry.taskworker.client") + +MAX_ACTIVATION_SIZE = 1024 * 1024 * 10 +"""Max payload size we will process.""" + + +def make_broker_hosts( + host_prefix: str, + num_brokers: int | None, + host_list: str | None = None, +) -> list[str]: + """ + Handle RPC host CLI options and create a list of broker host:ports + """ + if host_list: + stripped = map(lambda x: x.strip(), host_list.split(",")) + return list(filter(lambda x: len(x), stripped)) + if not num_brokers: + return [host_prefix] + domain, port = host_prefix.split(":") + return [f"{domain}-{i}:{port}" for i in range(0, num_brokers)] + + +class ClientCallDetails(grpc.ClientCallDetails): + """ + Subclass of grpc.ClientCallDetails that allows metadata to be updated + """ + + def __init__( + self, + method: str, + timeout: float | None, + metadata: tuple[tuple[str, str | bytes], ...] | None, + credentials: grpc.CallCredentials | None, + ): + self.timeout = timeout + self.method = method + self.metadata = metadata + self.credentials = credentials + + +# Type alias based on grpc-stubs +ContinuationType = Callable[[ClientCallDetails, Message], Any] + + +if TYPE_CHECKING: + InterceptorBase = grpc.UnaryUnaryClientInterceptor[Message, Message] + CallFuture = grpc.CallFuture[Message] +else: + InterceptorBase = grpc.UnaryUnaryClientInterceptor + CallFuture = Any + + +class RequestSignatureInterceptor(InterceptorBase): + def __init__(self, shared_secret: list[str]): + self._secret = shared_secret[0].encode("utf-8") + + def intercept_unary_unary( + self, + continuation: ContinuationType, + client_call_details: grpc.ClientCallDetails, + request: Message, + ) -> CallFuture: + request_body = request.SerializeToString() + method = client_call_details.method.encode("utf-8") + + signing_payload = method + b":" + request_body + signature = hmac.new(self._secret, signing_payload, hashlib.sha256).hexdigest() + + metadata = list(client_call_details.metadata) if client_call_details.metadata else [] + metadata.append(("sentry-signature", signature)) + + call_details_with_meta = ClientCallDetails( + client_call_details.method, + client_call_details.timeout, + tuple(metadata), + client_call_details.credentials, + ) + return continuation(call_details_with_meta, request) + + +class HostTemporarilyUnavailable(Exception): + """Raised when a host is temporarily unavailable and should be retried later.""" + + pass + + +@dataclass +class HealthCheckSettings: + file_path: Path + touch_interval_sec: float + + +class TaskworkerClient: + """ + Taskworker RPC client wrapper + + When num_brokers is provided, the client will connect to all brokers + and choose a new broker to pair with randomly every max_tasks_before_rebalance tasks. + """ + + def __init__( + self, + hosts: list[str], + max_tasks_before_rebalance: int = DEFAULT_REBALANCE_AFTER, + max_consecutive_unavailable_errors: int = DEFAULT_CONSECUTIVE_UNAVAILABLE_ERRORS, + temporary_unavailable_host_timeout: int = DEFAULT_TEMPORARY_UNAVAILABLE_HOST_TIMEOUT, + health_check_settings: HealthCheckSettings | None = None, + rpc_secret: str | None = None, + grpc_config: str | None = None, + ) -> None: + assert len(hosts) > 0, "You must provide at least one RPC host to connect to" + self._hosts = hosts + self._rpc_secret = rpc_secret + + self._grpc_options: list[tuple[str, Any]] = [ + ("grpc.max_receive_message_length", MAX_ACTIVATION_SIZE) + ] + if grpc_config: + self._grpc_options.append(("grpc.service_config", grpc_config)) + + logger.info( + "taskworker.client.start", extra={"hosts": hosts, "options": self._grpc_options} + ) + + self._cur_host = random.choice(self._hosts) + self._host_to_stubs: dict[str, ConsumerServiceStub] = { + self._cur_host: self._connect_to_host(self._cur_host) + } + + self._max_tasks_before_rebalance = max_tasks_before_rebalance + self._num_tasks_before_rebalance = max_tasks_before_rebalance + + self._max_consecutive_unavailable_errors = max_consecutive_unavailable_errors + self._num_consecutive_unavailable_errors = 0 + + self._temporary_unavailable_hosts: dict[str, float] = {} + self._temporary_unavailable_host_timeout = temporary_unavailable_host_timeout + + self._health_check_settings = health_check_settings + self._timestamp_since_touch_lock = threading.Lock() + self._timestamp_since_touch = 0.0 + + def _emit_health_check(self) -> None: + if self._health_check_settings is None: + return + + with self._timestamp_since_touch_lock: + cur_time = time.time() + if ( + cur_time - self._timestamp_since_touch + < self._health_check_settings.touch_interval_sec + ): + return + + self._health_check_settings.file_path.touch() + metrics.incr( + "taskworker.client.health_check.touched", + ) + self._timestamp_since_touch = cur_time + + def _connect_to_host(self, host: str) -> ConsumerServiceStub: + logger.info("taskworker.client.connect", extra={"host": host}) + channel = grpc.insecure_channel(host, options=self._grpc_options) + if self._rpc_secret: + secrets = json.loads(self._rpc_secret) + channel = grpc.intercept_channel(channel, RequestSignatureInterceptor(secrets)) + return ConsumerServiceStub(channel) + + def _check_consecutive_unavailable_errors(self) -> None: + if self._num_consecutive_unavailable_errors >= self._max_consecutive_unavailable_errors: + self._temporary_unavailable_hosts[self._cur_host] = ( + time.time() + self._temporary_unavailable_host_timeout + ) + + def _clear_temporary_unavailable_hosts(self) -> None: + hosts_to_remove = [] + for host, timeout in self._temporary_unavailable_hosts.items(): + if time.time() >= timeout: + hosts_to_remove.append(host) + + for host in hosts_to_remove: + self._temporary_unavailable_hosts.pop(host) + + def _get_cur_stub(self) -> tuple[str, ConsumerServiceStub]: + self._clear_temporary_unavailable_hosts() + available_hosts = [h for h in self._hosts if h not in self._temporary_unavailable_hosts] + if not available_hosts: + # If all hosts are temporarily unavailable, wait for the shortest timeout + current_time = time.time() + shortest_timeout = min(self._temporary_unavailable_hosts.values()) + logger.info( + "taskworker.client.no_available_hosts", + extra={"sleeping for": shortest_timeout - current_time}, + ) + time.sleep(shortest_timeout - current_time) + return self._get_cur_stub() # try again + + if self._cur_host in self._temporary_unavailable_hosts: + self._cur_host = random.choice(available_hosts) + self._num_tasks_before_rebalance = self._max_tasks_before_rebalance + self._num_consecutive_unavailable_errors = 0 + metrics.incr( + "taskworker.client.loadbalancer.rebalance", + tags={"reason": "unavailable_count_reached"}, + ) + elif self._num_tasks_before_rebalance == 0: + self._cur_host = random.choice(available_hosts) + self._num_tasks_before_rebalance = self._max_tasks_before_rebalance + self._num_consecutive_unavailable_errors = 0 + metrics.incr( + "taskworker.client.loadbalancer.rebalance", + tags={"reason": "max_tasks_reached"}, + ) + + if self._cur_host not in self._host_to_stubs: + self._host_to_stubs[self._cur_host] = self._connect_to_host(self._cur_host) + + self._num_tasks_before_rebalance -= 1 + return self._cur_host, self._host_to_stubs[self._cur_host] + + def get_task(self, namespace: str | None = None) -> InflightTaskActivation | None: + """ + Fetch a pending task. + + If a namespace is provided, only tasks for that namespace will be fetched. + This will return None if there are no tasks to fetch. + """ + self._emit_health_check() + + request = GetTaskRequest(namespace=namespace) + try: + host, stub = self._get_cur_stub() + with metrics.timer("taskworker.get_task.rpc", tags={"host": host}): + response = stub.GetTask(request) + except grpc.RpcError as err: + metrics.incr( + "taskworker.client.rpc_error", tags={"method": "GetTask", "status": err.code().name} + ) + if err.code() == grpc.StatusCode.NOT_FOUND: + # Because our current broker doesn't have any tasks, try rebalancing. + self._num_tasks_before_rebalance = 0 + return None + if err.code() == grpc.StatusCode.UNAVAILABLE: + self._num_consecutive_unavailable_errors += 1 + self._check_consecutive_unavailable_errors() + raise + self._num_consecutive_unavailable_errors = 0 + self._temporary_unavailable_hosts.pop(host, None) + if response.HasField("task"): + metrics.incr( + "taskworker.client.get_task", + tags={"namespace": response.task.namespace}, + ) + return InflightTaskActivation( + activation=response.task, host=host, receive_timestamp=time.monotonic() + ) + return None + + def update_task( + self, + processing_result: ProcessingResult, + fetch_next_task: FetchNextTask | None = None, + ) -> InflightTaskActivation | None: + """ + Update the status for a given task activation. + + The return value is the next task that should be executed. + """ + self._emit_health_check() + + metrics.incr("taskworker.client.fetch_next", tags={"next": fetch_next_task is not None}) + self._clear_temporary_unavailable_hosts() + request = SetTaskStatusRequest( + id=processing_result.task_id, + status=processing_result.status, + fetch_next_task=fetch_next_task, + ) + + try: + if processing_result.host in self._temporary_unavailable_hosts: + metrics.incr( + "taskworker.client.skipping_set_task_due_to_unavailable_host", + tags={"broker_host": processing_result.host}, + ) + raise HostTemporarilyUnavailable( + f"Host: {processing_result.host} is temporarily unavailable" + ) + + with metrics.timer("taskworker.update_task.rpc", tags={"host": processing_result.host}): + response = self._host_to_stubs[processing_result.host].SetTaskStatus(request) + except grpc.RpcError as err: + metrics.incr( + "taskworker.client.rpc_error", + tags={"method": "SetTaskStatus", "status": err.code().name}, + ) + if err.code() == grpc.StatusCode.NOT_FOUND: + # The current broker is empty, switch. + self._num_tasks_before_rebalance = 0 + + return None + if err.code() == grpc.StatusCode.UNAVAILABLE: + self._num_consecutive_unavailable_errors += 1 + self._check_consecutive_unavailable_errors() + raise + + self._num_consecutive_unavailable_errors = 0 + self._temporary_unavailable_hosts.pop(processing_result.host, None) + if response.HasField("task"): + return InflightTaskActivation( + activation=response.task, + host=processing_result.host, + receive_timestamp=time.monotonic(), + ) + return None diff --git a/clients/python/src/taskbroker_client/client/inflight_task_activation.py b/clients/python/src/taskbroker_client/client/inflight_task_activation.py index e69de29b..523a3257 100644 --- a/clients/python/src/taskbroker_client/client/inflight_task_activation.py +++ b/clients/python/src/taskbroker_client/client/inflight_task_activation.py @@ -0,0 +1,10 @@ +import dataclasses + +from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation + + +@dataclasses.dataclass +class InflightTaskActivation: + activation: TaskActivation + host: str + receive_timestamp: float diff --git a/clients/python/src/taskbroker_client/worker/worker.py b/clients/python/src/taskbroker_client/worker/worker.py index e69de29b..16e13255 100644 --- a/clients/python/src/taskbroker_client/worker/worker.py +++ b/clients/python/src/taskbroker_client/worker/worker.py @@ -0,0 +1,402 @@ +from __future__ import annotations + +import logging +import multiprocessing +import queue +import signal +import threading +import time +from concurrent.futures import ThreadPoolExecutor +from multiprocessing.context import ForkContext, SpawnContext +from multiprocessing.process import BaseProcess +from pathlib import Path +from typing import Any + +import grpc +from sentry_protos.taskbroker.v1.taskbroker_pb2 import FetchNextTask + +from sentry import options +from sentry.taskworker.app import import_app +from sentry.taskworker.client.client import ( + HealthCheckSettings, + HostTemporarilyUnavailable, + TaskworkerClient, +) +from sentry.taskworker.client.inflight_task_activation import InflightTaskActivation +from sentry.taskworker.client.processing_result import ProcessingResult +from sentry.taskworker.constants import ( + DEFAULT_REBALANCE_AFTER, + DEFAULT_WORKER_HEALTH_CHECK_SEC_PER_TOUCH, + DEFAULT_WORKER_QUEUE_SIZE, + MAX_BACKOFF_SECONDS_WHEN_HOST_UNAVAILABLE, +) +from sentry.taskworker.workerchild import child_process +from sentry.utils import metrics + +logger = logging.getLogger("sentry.taskworker.worker") + + +class TaskWorker: + """ + A TaskWorker fetches tasks from a taskworker RPC host and handles executing task activations. + + Tasks are executed in a forked process so that processing timeouts can be enforced. + As tasks are completed status changes will be sent back to the RPC host and new tasks + will be fetched. + + Taskworkers can be run with `sentry run taskworker` + """ + + mp_context: ForkContext | SpawnContext + + def __init__( + self, + app_module: str, + broker_hosts: list[str], + max_child_task_count: int | None = None, + namespace: str | None = None, + concurrency: int = 1, + child_tasks_queue_maxsize: int = DEFAULT_WORKER_QUEUE_SIZE, + result_queue_maxsize: int = DEFAULT_WORKER_QUEUE_SIZE, + rebalance_after: int = DEFAULT_REBALANCE_AFTER, + processing_pool_name: str | None = None, + process_type: str = "spawn", + health_check_file_path: str | None = None, + health_check_sec_per_touch: float = DEFAULT_WORKER_HEALTH_CHECK_SEC_PER_TOUCH, + **kwargs: dict[str, Any], + ) -> None: + self.options = kwargs + self._app_module = app_module + self._max_child_task_count = max_child_task_count + self._namespace = namespace + self._concurrency = concurrency + app = import_app(app_module) + + self.client = TaskworkerClient( + hosts=broker_hosts, + max_tasks_before_rebalance=rebalance_after, + health_check_settings=( + None + if health_check_file_path is None + else HealthCheckSettings(Path(health_check_file_path), health_check_sec_per_touch) + ), + rpc_secret=app.config["rpc_secret"], + grpc_config=options.get("taskworker.grpc_service_config"), + ) + if process_type == "fork": + self.mp_context = multiprocessing.get_context("fork") + elif process_type == "spawn": + self.mp_context = multiprocessing.get_context("spawn") + else: + raise ValueError(f"Invalid process type: {process_type}") + self._process_type = process_type + + self._child_tasks: multiprocessing.Queue[InflightTaskActivation] = self.mp_context.Queue( + maxsize=child_tasks_queue_maxsize + ) + self._processed_tasks: multiprocessing.Queue[ProcessingResult] = self.mp_context.Queue( + maxsize=result_queue_maxsize + ) + self._children: list[BaseProcess] = [] + self._shutdown_event = self.mp_context.Event() + self._result_thread: threading.Thread | None = None + self._spawn_children_thread: threading.Thread | None = None + + self._gettask_backoff_seconds = 0 + self._setstatus_backoff_seconds = 0 + + self._processing_pool_name: str = processing_pool_name or "unknown" + + def start(self) -> int: + """ + Run the worker main loop + + Once started a Worker will loop until it is killed, or + completes its max_task_count when it shuts down. + """ + self.start_result_thread() + self.start_spawn_children_thread() + + # Convert signals into KeyboardInterrupt. + # Running shutdown() within the signal handler can lead to deadlocks + def signal_handler(*args: Any) -> None: + raise KeyboardInterrupt() + + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + try: + while True: + self.run_once() + except KeyboardInterrupt: + self.shutdown() + raise + + def run_once(self) -> None: + """Access point for tests to run a single worker loop""" + self._add_task() + + def shutdown(self) -> None: + """ + Shutdown cleanly + Activate the shutdown event and drain results before terminating children. + """ + logger.info("taskworker.worker.shutdown.start") + self._shutdown_event.set() + + logger.info("taskworker.worker.shutdown.spawn_children") + if self._spawn_children_thread: + self._spawn_children_thread.join() + + logger.info("taskworker.worker.shutdown.children") + for child in self._children: + child.terminate() + for child in self._children: + child.join() + + logger.info("taskworker.worker.shutdown.result") + if self._result_thread: + # Use a timeout as sometimes this thread can deadlock on the Event. + self._result_thread.join(timeout=5) + + # Drain any remaining results synchronously + while True: + try: + result = self._processed_tasks.get_nowait() + self._send_result(result, fetch=False) + except queue.Empty: + break + + logger.info("taskworker.worker.shutdown.complete") + + def _add_task(self) -> bool: + """ + Add a task to child tasks queue. Returns False if no new task was fetched. + """ + if self._child_tasks.full(): + # I want to see how this differs between pools that operate well, + # and those that are not as effective. I suspect that with a consistent + # load of slowish tasks (like 5-15 seconds) that this will happen + # infrequently, resulting in the child tasks queue being full + # causing processing deadline expiration. + # Whereas in pools that have consistent short tasks, this happens + # more frequently, allowing workers to run more smoothly. + metrics.incr( + "taskworker.worker.add_tasks.child_tasks_full", + tags={"processing_pool": self._processing_pool_name}, + ) + # If we weren't able to add a task, backoff for a bit + time.sleep(0.1) + return False + + inflight = self.fetch_task() + if inflight: + try: + start_time = time.monotonic() + self._child_tasks.put(inflight) + metrics.distribution( + "taskworker.worker.child_task.put.duration", + time.monotonic() - start_time, + tags={"processing_pool": self._processing_pool_name}, + ) + except queue.Full: + metrics.incr( + "taskworker.worker.child_tasks.put.full", + tags={"processing_pool": self._processing_pool_name}, + ) + logger.warning( + "taskworker.add_task.child_task_queue_full", + extra={ + "task_id": inflight.activation.id, + "processing_pool": self._processing_pool_name, + }, + ) + return True + else: + return False + + def start_result_thread(self) -> None: + """ + Start a thread that delivers results and fetches new tasks. + We need to ship results in a thread because the RPC calls block for 20-50ms, + and many tasks execute more quickly than that. + + Without additional threads, we end up publishing results too slowly + and tasks accumulate in the `processed_tasks` queues and can cross + their processing deadline. + """ + + def result_thread() -> None: + logger.debug("taskworker.worker.result_thread.started") + iopool = ThreadPoolExecutor(max_workers=self._concurrency) + with iopool as executor: + while not self._shutdown_event.is_set(): + fetch_next = self._processing_pool_name not in options.get( + "taskworker.fetch_next.disabled_pools" + ) + + try: + result = self._processed_tasks.get(timeout=1.0) + executor.submit(self._send_result, result, fetch_next) + except queue.Empty: + metrics.incr( + "taskworker.worker.result_thread.queue_empty", + tags={"processing_pool": self._processing_pool_name}, + ) + continue + + self._result_thread = threading.Thread( + name="send-result", target=result_thread, daemon=True + ) + self._result_thread.start() + + def _send_result(self, result: ProcessingResult, fetch: bool = True) -> bool: + """ + Send a result to the broker and conditionally fetch an additional task + + Run in a thread to avoid blocking the process, and during shutdown/ + See `start_result_thread` + """ + metrics.distribution( + "taskworker.worker.complete_duration", + time.monotonic() - result.receive_timestamp, + tags={"processing_pool": self._processing_pool_name}, + ) + + if fetch: + fetch_next = None + if not self._child_tasks.full(): + fetch_next = FetchNextTask(namespace=self._namespace) + + next = self._send_update_task(result, fetch_next) + if next: + try: + start_time = time.monotonic() + self._child_tasks.put(next) + metrics.distribution( + "taskworker.worker.child_task.put.duration", + time.monotonic() - start_time, + tags={"processing_pool": self._processing_pool_name}, + ) + except queue.Full: + logger.warning( + "taskworker.send_result.child_task_queue_full", + extra={ + "task_id": next.activation.id, + "processing_pool": self._processing_pool_name, + }, + ) + return True + + self._send_update_task(result, fetch_next=None) + return True + + def _send_update_task( + self, result: ProcessingResult, fetch_next: FetchNextTask | None + ) -> InflightTaskActivation | None: + """ + Do the RPC call to this worker's taskbroker, and handle errors + """ + logger.debug( + "taskworker.workers._send_result", + extra={ + "task_id": result.task_id, + "next": fetch_next is not None, + "processing_pool": self._processing_pool_name, + }, + ) + # Use the shutdown_event as a sleep mechanism + self._shutdown_event.wait(self._setstatus_backoff_seconds) + + try: + next_task = self.client.update_task(result, fetch_next) + self._setstatus_backoff_seconds = 0 + return next_task + except grpc.RpcError as e: + self._setstatus_backoff_seconds = min(self._setstatus_backoff_seconds + 1, 10) + if e.code() == grpc.StatusCode.UNAVAILABLE: + self._processed_tasks.put(result) + logger.warning( + "taskworker.send_update_task.failed", + extra={"task_id": result.task_id, "error": e}, + ) + return None + except HostTemporarilyUnavailable as e: + self._setstatus_backoff_seconds = min( + self._setstatus_backoff_seconds + 4, MAX_BACKOFF_SECONDS_WHEN_HOST_UNAVAILABLE + ) + logger.info( + "taskworker.send_update_task.temporarily_unavailable", + extra={"task_id": result.task_id, "error": str(e)}, + ) + self._processed_tasks.put(result) + return None + + def start_spawn_children_thread(self) -> None: + def spawn_children_thread() -> None: + logger.debug("taskworker.worker.spawn_children_thread.started") + while not self._shutdown_event.is_set(): + self._children = [child for child in self._children if child.is_alive()] + if len(self._children) >= self._concurrency: + time.sleep(0.1) + continue + for i in range(self._concurrency - len(self._children)): + process = self.mp_context.Process( + name=f"taskworker-child-{i}", + target=child_process, + args=( + self._app_module, + self._child_tasks, + self._processed_tasks, + self._shutdown_event, + self._max_child_task_count, + self._processing_pool_name, + self._process_type, + ), + ) + process.start() + self._children.append(process) + logger.info( + "taskworker.spawn_child", + extra={"pid": process.pid, "processing_pool": self._processing_pool_name}, + ) + metrics.incr( + "taskworker.worker.spawn_child", + tags={"processing_pool": self._processing_pool_name}, + ) + + self._spawn_children_thread = threading.Thread( + name="spawn-children", target=spawn_children_thread, daemon=True + ) + self._spawn_children_thread.start() + + def fetch_task(self) -> InflightTaskActivation | None: + # Use the shutdown_event as a sleep mechanism + self._shutdown_event.wait(self._gettask_backoff_seconds) + try: + activation = self.client.get_task(self._namespace) + except grpc.RpcError as e: + logger.info( + "taskworker.fetch_task.failed", + extra={"error": e, "processing_pool": self._processing_pool_name}, + ) + + self._gettask_backoff_seconds = min( + self._gettask_backoff_seconds + 4, MAX_BACKOFF_SECONDS_WHEN_HOST_UNAVAILABLE + ) + return None + + if not activation: + metrics.incr( + "taskworker.worker.fetch_task.not_found", + tags={"processing_pool": self._processing_pool_name}, + ) + logger.debug( + "taskworker.fetch_task.not_found", + extra={"processing_pool": self._processing_pool_name}, + ) + self._gettask_backoff_seconds = min(self._gettask_backoff_seconds + 1, 5) + return None + + self._gettask_backoff_seconds = 0 + return activation From f0d5c5f6f45ae4427d624b959dba299bfb8df6a0 Mon Sep 17 00:00:00 2001 From: Mark Story Date: Fri, 28 Nov 2025 14:52:05 -0500 Subject: [PATCH 05/19] Add metrics and conftest and get more tests passing - Add a MetricsBackend like we have in arroyo and our other client libraries. - Attach metrics to the namespace so that it can be read in tasks. --- clients/python/src/taskbroker_client/app.py | 25 ++-- .../python/src/taskbroker_client/constants.py | 7 ++ .../python/src/taskbroker_client/metrics.py | 72 ++++++++++++ .../python/src/taskbroker_client/registry.py | 50 ++++---- .../python/src/taskbroker_client/router.py | 4 +- clients/python/src/taskbroker_client/task.py | 26 +++-- clients/python/src/taskbroker_client/types.py | 17 +++ clients/python/tests/conftest.py | 9 ++ clients/python/tests/test_app.py | 20 +++- clients/python/tests/test_registry.py | 108 ++++++++++-------- 10 files changed, 238 insertions(+), 100 deletions(-) create mode 100644 clients/python/src/taskbroker_client/metrics.py create mode 100644 clients/python/src/taskbroker_client/types.py create mode 100644 clients/python/tests/conftest.py diff --git a/clients/python/src/taskbroker_client/app.py b/clients/python/src/taskbroker_client/app.py index 3b2456e5..d9471ce1 100644 --- a/clients/python/src/taskbroker_client/app.py +++ b/clients/python/src/taskbroker_client/app.py @@ -1,26 +1,26 @@ import importlib from collections.abc import Iterable -from typing import Any, Protocol +from typing import Any from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation +from taskbroker_client.metrics import MetricsBackend from taskbroker_client.registry import TaskRegistry from taskbroker_client.router import TaskRouter from taskbroker_client.imports import import_string +from taskbroker_client.types import AtMostOnceStore, ProducerFactory -class AtMostOnceStore(Protocol): - def add(self, key: str, value: str, timeout: int) -> bool: ... - - -class TaskworkerApp: +class TaskbrokerApp: """ Container for an application's task setup and configuration. """ def __init__( self, + producer_factory: ProducerFactory, router_class: str | TaskRouter = "taskbroker_client.router.DefaultRouter", + metrics_class: str | MetricsBackend = "taskbroker_client.metrics.NoOpMetricsBackend", at_most_once_store: AtMostOnceStore | None = None, ) -> None: self._config = { @@ -28,8 +28,11 @@ def __init__( "at_most_once_timeout": None, } self._modules: Iterable[str] = [] + self._metrics = self._build_metrics(metrics_class) self._taskregistry = TaskRegistry( - router=self._build_router(router_class) + producer_factory=producer_factory, + router=self._build_router(router_class), + metrics=self._metrics, ) if at_most_once_store: self.at_most_once_store(at_most_once_store) @@ -44,6 +47,12 @@ def _build_router(self, router_name: str | TaskRouter) -> TaskRouter: return router + def _build_metrics(self, backend_name: str | MetricsBackend) -> MetricsBackend: + if isinstance(backend_name, str): + metrics_class = import_string(backend_name) + return metrics_class() + return backend_name + @property def taskregistry(self) -> TaskRegistry: """Get the TaskRegistry instance from this app""" @@ -93,7 +102,7 @@ def get_at_most_once_key(namespace: str, taskname: str, task_id: str) -> str: return f"tw:amo:{namespace}:{taskname}:{task_id}" -def import_app(app_module: str) -> TaskworkerApp: +def import_app(app_module: str) -> TaskbrokerApp: """ Resolve an application path like `acme.worker.runtime:app` into the `app` symbol defined in the module. diff --git a/clients/python/src/taskbroker_client/constants.py b/clients/python/src/taskbroker_client/constants.py index 817e25ad..36074a01 100644 --- a/clients/python/src/taskbroker_client/constants.py +++ b/clients/python/src/taskbroker_client/constants.py @@ -52,6 +52,13 @@ """ +ALWAYS_EAGER = False +""" +Whether or not tasks should be invoked eagerly (synchronously) +This can be mutated by application test harnesses to run tasks without Kafka. +""" + + class CompressionType(Enum): """ The type of compression used for task parameters. diff --git a/clients/python/src/taskbroker_client/metrics.py b/clients/python/src/taskbroker_client/metrics.py new file mode 100644 index 00000000..52d55098 --- /dev/null +++ b/clients/python/src/taskbroker_client/metrics.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +from abc import abstractmethod +from collections.abc import Mapping +from typing import Protocol, runtime_checkable + +Tags = Mapping[str, str] + + +@runtime_checkable +class MetricsBackend(Protocol): + """ + An abstract class that defines the interface for metrics backends. + """ + + @abstractmethod + def incr( + self, + name: str, + value: int | float = 1, + tags: Tags | None = None, + ) -> None: + """ + Increments a counter metric by a given value. + """ + raise NotImplementedError + + @abstractmethod + def gauge(self, name: str, value: int | float, tags: Tags | None = None) -> None: + """ + Sets a gauge metric to the given value. + """ + raise NotImplementedError + + @abstractmethod + def distribution( + self, + name: str, + value: int | float, + tags: Tags | None = None, + unit: str | None = None, + ) -> None: + """ + Records a distribution metric. + """ + raise NotImplementedError + + +class NoOpMetricsBackend(MetricsBackend): + """ + Default metrics backend that does not record anything. + """ + + def incr( + self, + name: str, + value: int | float = 1, + tags: Tags | None = None, + ) -> None: + pass + + def gauge(self, name: str, value: int | float, tags: Tags | None = None) -> None: + pass + + def distribution( + self, + name: str, + value: int | float, + tags: Tags | None = None, + unit: str | None = None, + ) -> None: + pass diff --git a/clients/python/src/taskbroker_client/registry.py b/clients/python/src/taskbroker_client/registry.py index c0ad8e66..eedfbd47 100644 --- a/clients/python/src/taskbroker_client/registry.py +++ b/clients/python/src/taskbroker_client/registry.py @@ -12,17 +12,13 @@ from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation from sentry_sdk.consts import OP, SPANDATA -# from django.conf import settings -# from sentry.conf.types.kafka_definition import Topic -# from sentry.silo.base import SiloMode from taskbroker_client.constants import DEFAULT_PROCESSING_DEADLINE, CompressionType -# from sentry.utils import metrics -# from sentry.utils.arroyo_producer import SingletonProducer, get_arroyo_producer -from taskbroker_client.imports import import_string +from taskbroker_client.metrics import MetricsBackend from taskbroker_client.retry import Retry from taskbroker_client.router import TaskRouter from taskbroker_client.task import P, R, Task +from taskbroker_client.types import ProducerFactory logger = logging.getLogger(__name__) @@ -39,13 +35,14 @@ class TaskNamespace: def __init__( self, name: str, + producer_factory: ProducerFactory, router: TaskRouter, + metrics: MetricsBackend, retry: Retry | None, expires: int | datetime.timedelta | None = None, processing_deadline_duration: int = DEFAULT_PROCESSING_DEADLINE, app_feature: str | None = None, ): - # TODO Figure out how to get producers here. self.name = name self.router = router self.default_retry = retry @@ -53,7 +50,9 @@ def __init__( self.default_processing_deadline_duration = processing_deadline_duration # seconds self.app_feature = app_feature or name self._registered_tasks: dict[str, Task[Any, Any]] = {} - # self._producers: dict[Topic, SingletonProducer] = {} + self._producers: dict[str, KafkaProducer] = {} + self._producer_factory = producer_factory + self.metrics = metrics def get(self, name: str) -> Task[Any, Any]: """ @@ -85,7 +84,6 @@ def register( at_most_once: bool = False, wait_for_delivery: bool = False, compression_type: CompressionType = CompressionType.PLAINTEXT, - silo_mode: SiloMode | None = None, ) -> Callable[[Callable[P, R]], Task[P, R]]: """ Register a task. @@ -141,12 +139,12 @@ def wrapped(func: Callable[P, R]) -> Task[P, R]: def _handle_produce_future(self, future: ProducerFuture, tags: dict[str, str]) -> None: if future.cancelled(): - metrics.incr("taskworker.registry.send_task.cancelled", tags=tags) + self.metrics.incr("taskworker.registry.send_task.cancelled", tags=tags) elif future.exception(1): # this does not block since this callback only gets run when the future is finished and exception is set - metrics.incr("taskworker.registry.send_task.failed", tags=tags) + self.metrics.incr("taskworker.registry.send_task.failed", tags=tags) else: - metrics.incr("taskworker.registry.send_task.success", tags=tags) + self.metrics.incr("taskworker.registry.send_task.success", tags=tags) def send_task(self, activation: TaskActivation, wait_for_delivery: bool = False) -> None: topic = self.router.route_namespace(self.name) @@ -161,16 +159,16 @@ def send_task(self, activation: TaskActivation, wait_for_delivery: bool = False) span.set_data(SPANDATA.MESSAGING_SYSTEM, "taskworker") produce_future = self._producer(topic).produce( - ArroyoTopic(name=topic.value), + Topic(name=topic), KafkaPayload(key=None, value=activation.SerializeToString(), headers=[]), ) - metrics.incr( + self.metrics.incr( "taskworker.registry.send_task.scheduled", tags={ "namespace": activation.namespace, "taskname": activation.taskname, - "topic": topic.value, + "topic": topic, }, ) # We know this type is futures.Future, but cannot assert so, @@ -181,7 +179,7 @@ def send_task(self, activation: TaskActivation, wait_for_delivery: bool = False) tags={ "namespace": activation.namespace, "taskname": activation.taskname, - "topic": topic.value, + "topic": topic, }, ) ) @@ -191,16 +189,13 @@ def send_task(self, activation: TaskActivation, wait_for_delivery: bool = False) except Exception: logger.exception("Failed to wait for delivery") - def _producer(self, topic: Topic) -> SingletonProducer: + def _producer(self, topic: str) -> KafkaProducer: if topic not in self._producers: - - def factory() -> KafkaProducer: - return get_arroyo_producer(f"sentry.taskworker.{topic.value}", topic) - - self._producers[topic] = SingletonProducer(factory, max_futures=1000) + self._producers[topic] = self._producer_factory(topic) return self._producers[topic] +# TODO(mark) All of TaskRegistry could be folded into TaskworkerApp later. class TaskRegistry: """ Registry of all namespaces. @@ -209,9 +204,16 @@ class TaskRegistry: during startup. """ - def __init__(self, router: TaskRouter) -> None: + def __init__( + self, + producer_factory: ProducerFactory, + router: TaskRouter, + metrics: MetricsBackend, + ) -> None: self._namespaces: dict[str, TaskNamespace] = {} + self._producer_factory = producer_factory self._router = router + self._metrics = metrics def contains(self, name: str) -> bool: return name in self._namespaces @@ -248,6 +250,8 @@ def create_namespace( namespace = TaskNamespace( name=name, router=self._router, + metrics=self._metrics, + producer_factory=self._producer_factory, retry=retry, expires=expires, processing_deadline_duration=processing_deadline_duration, diff --git a/clients/python/src/taskbroker_client/router.py b/clients/python/src/taskbroker_client/router.py index 51347546..657cc12c 100644 --- a/clients/python/src/taskbroker_client/router.py +++ b/clients/python/src/taskbroker_client/router.py @@ -11,8 +11,8 @@ def route_namespace(self, name: str) -> str: ... class DefaultRouter(TaskRouter): """ - Stub router that resolves all namespaces to the default topic + Stub router that resolves all namespaces to a default topic """ def route_namespace(self, name: str) -> str: - return "default" + return "taskbroker" diff --git a/clients/python/src/taskbroker_client/task.py b/clients/python/src/taskbroker_client/task.py index 55616a5e..44edb385 100644 --- a/clients/python/src/taskbroker_client/task.py +++ b/clients/python/src/taskbroker_client/task.py @@ -12,11 +12,8 @@ import sentry_sdk import zstandard as zstd -# from django.conf import settings -# from django.utils import timezone from google.protobuf.timestamp_pb2 import Timestamp -# from sentry.utils import metrics from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( ON_ATTEMPTS_EXCEEDED_DISCARD, RetryState, @@ -34,6 +31,12 @@ from taskbroker_client.registry import TaskNamespace +ALWAYS_EAGER = False +""" +Whether or not tasks should be invoked eagerly (synchronously) +This can be mutated by application test harnesses to run tasks without Kafka. +""" + P = ParamSpec("P") R = TypeVar("R") @@ -126,10 +129,9 @@ def apply_async( activation = self.create_activation( args=args, kwargs=kwargs, headers=headers, expires=expires, countdown=countdown ) - if settings.TASKWORKER_ALWAYS_EAGER: + if ALWAYS_EAGER: self._func(*args, **kwargs) else: - # TODO(taskworker) promote parameters to headers self._namespace.send_task( activation, wait_for_delivery=self.wait_for_delivery, @@ -137,8 +139,8 @@ def apply_async( def _signal_send(self, task: Task[Any, Any], args: Any, kwargs: Any) -> None: """ - This method is a stub that sentry.testutils.task_runner.BurstRunner or other testing - hooks can monkeypatch to capture tasks that are being produced. + This method is a stub that test harnesses can monkey patch to capture tasks that + are being produced. """ pass @@ -151,7 +153,7 @@ def create_activation( countdown: int | datetime.timedelta | None = None, ) -> TaskActivation: received_at = Timestamp() - received_at.FromDatetime(timezone.now()) + received_at.FromDatetime(datetime.datetime.now(tz=datetime.UTC)) processing_deadline = self._processing_deadline_duration if isinstance(processing_deadline, datetime.timedelta): @@ -201,22 +203,22 @@ def create_activation( parameters_str = base64.b64encode(zstd.compress(parameters_json)).decode("utf8") end_time = time.perf_counter() - metrics.distribution( + self.namespace.metrics.distribution( "taskworker.producer.compressed_parameters_size", len(parameters_str), tags={ "namespace": self._namespace.name, "taskname": self.name, - "topic": self._namespace.topic.value, + "topic": self._namespace.topic, }, ) - metrics.distribution( + self.namespace.metrics.distribution( "taskworker.producer.compression_time", end_time - start_time, tags={ "namespace": self._namespace.name, "taskname": self.name, - "topic": self._namespace.topic.value, + "topic": self._namespace.topic }, ) else: diff --git a/clients/python/src/taskbroker_client/types.py b/clients/python/src/taskbroker_client/types.py new file mode 100644 index 00000000..65c35772 --- /dev/null +++ b/clients/python/src/taskbroker_client/types.py @@ -0,0 +1,17 @@ +from typing import Callable, Protocol + +from arroyo.backends.kafka import KafkaProducer + + +class AtMostOnceStore(Protocol): + """ + Interface for the at_most_once store used for idempotent task execution. + """ + def add(self, key: str, value: str, timeout: int) -> bool: ... + + +ProducerFactory = Callable[[str], KafkaProducer] +""" +A factory interface for resolving topics into a KafkaProducer +that can produce on the provided topic. +""" diff --git a/clients/python/tests/conftest.py b/clients/python/tests/conftest.py new file mode 100644 index 00000000..38a202de --- /dev/null +++ b/clients/python/tests/conftest.py @@ -0,0 +1,9 @@ +from arroyo.backends.kafka import KafkaProducer + +def producer_factory(topic: str) -> KafkaProducer: + config = { + "bootstrap.servers": "127.0.0.1:9092", + "compression.type": "lz4", + "message.max.bytes": 50000000, # 50MB + } + return KafkaProducer(config) diff --git a/clients/python/tests/test_app.py b/clients/python/tests/test_app.py index bbf0c354..14203925 100644 --- a/clients/python/tests/test_app.py +++ b/clients/python/tests/test_app.py @@ -1,8 +1,9 @@ from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation -from taskbroker_client.app import AtMostOnceStore, TaskworkerApp +from taskbroker_client.app import TaskbrokerApp from taskbroker_client.router import TaskRouter -from taskbroker_client.registry import TaskRegistry +from taskbroker_client.types import AtMostOnceStore +from .conftest import producer_factory class StubAtMostOnce(AtMostOnceStore): @@ -21,20 +22,27 @@ def route_namespace(self, name: str) -> str: return "honk" + def test_taskregistry_router_object() -> None: - app = TaskworkerApp(router_class=StubRouter()) + app = TaskbrokerApp( + producer_factory=producer_factory, + router_class=StubRouter() + ) ns = app.taskregistry.create_namespace("test") assert ns.topic == "honk" def test_taskregistry_router_str() -> None: - app = TaskworkerApp(router_class="taskbroker_client.router.DefaultRouter") + app = TaskbrokerApp( + producer_factory=producer_factory, + router_class="taskbroker_client.router.DefaultRouter", + ) ns = app.taskregistry.create_namespace("test") assert ns.topic == "default" def test_set_config() -> None: - app = TaskworkerApp() + app = TaskbrokerApp(producer_factory=producer_factory) app.set_config({"rpc_secret": "testing", "ignored": "key"}) assert app.config["rpc_secret"] == "testing" assert "ignored" not in app.config @@ -49,7 +57,7 @@ def test_should_attempt_at_most_once() -> None: processing_deadline_duration=2, ) at_most = StubAtMostOnce() - app = TaskworkerApp() + app = TaskbrokerApp(producer_factory=producer_factory) app.at_most_once_store(at_most) assert app.should_attempt_at_most_once(activation) assert not app.should_attempt_at_most_once(activation) diff --git a/clients/python/tests/test_registry.py b/clients/python/tests/test_registry.py index a1b63fab..fb676f27 100644 --- a/clients/python/tests/test_registry.py +++ b/clients/python/tests/test_registry.py @@ -1,27 +1,31 @@ import base64 from concurrent.futures import Future -from unittest.mock import Mock +from unittest.mock import Mock, patch import orjson import pytest import zstandard as zstd -from django.test.utils import override_settings -from sentry.conf.types.kafka_definition import Topic -from sentry.taskworker.constants import MAX_PARAMETER_BYTES_BEFORE_COMPRESSION, CompressionType -from sentry.taskworker.registry import TaskNamespace, TaskRegistry -from sentry.taskworker.retry import LastAction, Retry -from sentry.taskworker.router import DefaultRouter -from sentry.taskworker.task import Task +# from django.test.utils import override_settings +# from sentry.conf.types.kafka_definition import Topic +from taskbroker_client.constants import MAX_PARAMETER_BYTES_BEFORE_COMPRESSION, CompressionType +from taskbroker_client.registry import TaskNamespace, TaskRegistry +from taskbroker_client.retry import LastAction, Retry +from taskbroker_client.router import DefaultRouter +from taskbroker_client.metrics import NoOpMetricsBackend +from taskbroker_client.task import Task from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( ON_ATTEMPTS_EXCEEDED_DEADLETTER, ON_ATTEMPTS_EXCEEDED_DISCARD, ) +from .conftest import producer_factory def test_namespace_register_task() -> None: namespace = TaskNamespace( name="tests", + producer_factory=producer_factory, router=DefaultRouter(), + metrics=NoOpMetricsBackend(), retry=None, ) @@ -41,7 +45,9 @@ def simple_task() -> None: def test_namespace_register_inherits_default_retry() -> None: namespace = TaskNamespace( name="tests", + producer_factory=producer_factory, router=DefaultRouter(), + metrics=NoOpMetricsBackend(), retry=Retry(times=5, on=(RuntimeError,)), ) @@ -69,7 +75,9 @@ def retry_none_param() -> None: def test_register_inherits_default_expires_processing_deadline() -> None: namespace = TaskNamespace( name="tests", + producer_factory=producer_factory, router=DefaultRouter(), + metrics=NoOpMetricsBackend(), retry=None, expires=10 * 60, processing_deadline_duration=5, @@ -97,7 +105,9 @@ def with_expires() -> None: def test_namespace_get_unknown() -> None: namespace = TaskNamespace( name="tests", + producer_factory=producer_factory, router=DefaultRouter(), + metrics=NoOpMetricsBackend(), retry=None, ) @@ -106,11 +116,12 @@ def test_namespace_get_unknown() -> None: assert "No task registered" in str(err) -@pytest.mark.django_db def test_namespace_send_task_no_retry() -> None: namespace = TaskNamespace( name="tests", + producer_factory=producer_factory, router=DefaultRouter(), + metrics=NoOpMetricsBackend(), retry=None, ) @@ -124,23 +135,24 @@ def simple_task() -> None: assert activation.retry_state.on_attempts_exceeded == ON_ATTEMPTS_EXCEEDED_DISCARD mock_producer = Mock() - namespace._producers[Topic.TASKWORKER] = mock_producer + namespace._producers["taskbroker"] = mock_producer namespace.send_task(activation) assert mock_producer.produce.call_count == 1 mock_call = mock_producer.produce.call_args - assert mock_call[0][0].name == "taskworker" + assert mock_call[0][0].name == "taskbroker" proto_message = mock_call[0][1].value assert proto_message == activation.SerializeToString() -@pytest.mark.django_db def test_namespace_send_task_with_compression() -> None: namespace = TaskNamespace( name="tests", + producer_factory=producer_factory, router=DefaultRouter(), + metrics=NoOpMetricsBackend(), retry=None, ) @@ -163,11 +175,12 @@ def simple_task_with_compression(param: str) -> None: assert actual_params == expected_params -@pytest.mark.django_db def test_namespace_send_task_with_auto_compression() -> None: namespace = TaskNamespace( name="tests", + producer_factory=producer_factory, router=DefaultRouter(), + metrics=NoOpMetricsBackend(), retry=None, ) @@ -191,11 +204,12 @@ def simple_task_with_compression(param: str) -> None: assert actual_params == expected_params -@pytest.mark.django_db def test_namespace_send_task_with_retry() -> None: namespace = TaskNamespace( name="tests", + producer_factory=producer_factory, router=DefaultRouter(), + metrics=NoOpMetricsBackend(), retry=None, ) @@ -211,7 +225,7 @@ def simple_task() -> None: assert activation.retry_state.on_attempts_exceeded == ON_ATTEMPTS_EXCEEDED_DEADLETTER mock_producer = Mock() - namespace._producers[Topic.TASKWORKER] = mock_producer + namespace._producers["taskbroker"] = mock_producer namespace.send_task(activation) assert mock_producer.produce.call_count == 1 @@ -221,11 +235,12 @@ def simple_task() -> None: assert proto_message == activation.SerializeToString() -@pytest.mark.django_db def test_namespace_with_retry_send_task() -> None: namespace = TaskNamespace( name="tests", + producer_factory=producer_factory, router=DefaultRouter(), + metrics=NoOpMetricsBackend(), retry=Retry(times=3), ) @@ -239,23 +254,24 @@ def simple_task() -> None: assert activation.retry_state.on_attempts_exceeded == ON_ATTEMPTS_EXCEEDED_DISCARD mock_producer = Mock() - namespace._producers[Topic.TASKWORKER] = mock_producer + namespace._producers["taskbroker"] = mock_producer namespace.send_task(activation) assert mock_producer.produce.call_count == 1 mock_call = mock_producer.produce.call_args - assert mock_call[0][0].name == "taskworker" + assert mock_call[0][0].name == "taskbroker" proto_message = mock_call[0][1].value assert proto_message == activation.SerializeToString() -@pytest.mark.django_db def test_namespace_with_wait_for_delivery_send_task() -> None: namespace = TaskNamespace( name="tests", + producer_factory=producer_factory, router=DefaultRouter(), + metrics=NoOpMetricsBackend(), retry=Retry(times=3), ) @@ -266,7 +282,7 @@ def simple_task() -> None: activation = simple_task.create_activation([], {}) mock_producer = Mock() - namespace._producers[Topic.TASKWORKER] = mock_producer + namespace._producers["taskbroker"] = mock_producer ret_value: Future[None] = Future() ret_value.set_result(None) @@ -275,15 +291,18 @@ def simple_task() -> None: assert mock_producer.produce.call_count == 1 mock_call = mock_producer.produce.call_args - assert mock_call[0][0].name == "taskworker" + assert mock_call[0][0].name == "taskbroker" proto_message = mock_call[0][1].value assert proto_message == activation.SerializeToString() -@pytest.mark.django_db def test_registry_get() -> None: - registry = TaskRegistry() + registry = TaskRegistry( + producer_factory=producer_factory, + router=DefaultRouter(), + metrics=NoOpMetricsBackend(), + ) ns = registry.create_namespace(name="tests") assert isinstance(ns, TaskNamespace) @@ -298,9 +317,12 @@ def test_registry_get() -> None: assert registry.contains("tests") -@pytest.mark.django_db def test_registry_get_task() -> None: - registry = TaskRegistry() + registry = TaskRegistry( + producer_factory=producer_factory, + router=DefaultRouter(), + metrics=NoOpMetricsBackend(), + ) ns = registry.create_namespace(name="tests") @ns.register(name="test.simpletask") @@ -317,15 +339,18 @@ def simple_task() -> None: registry.get_task(ns.name, "nope") -@pytest.mark.django_db def test_registry_create_namespace_simple() -> None: - registry = TaskRegistry() + registry = TaskRegistry( + producer_factory=producer_factory, + router=DefaultRouter(), + metrics=NoOpMetricsBackend(), + ) ns = registry.create_namespace(name="tests") assert ns.default_retry is None assert ns.default_expires is None assert ns.default_processing_deadline_duration == 10 assert ns.name == "tests" - assert ns.topic == Topic.TASKWORKER + assert ns.topic == "taskbroker" assert ns.app_feature == "tests" retry = Retry(times=3) @@ -340,31 +365,16 @@ def test_registry_create_namespace_simple() -> None: assert ns.default_processing_deadline_duration == 60 assert ns.default_expires == 60 * 10 assert ns.name == "test-two" - assert ns.topic == Topic.TASKWORKER + assert ns.topic == "taskbroker" assert ns.app_feature == "anvils" -@pytest.mark.django_db def test_registry_create_namespace_duplicate() -> None: - registry = TaskRegistry() + registry = TaskRegistry( + producer_factory=producer_factory, + router=DefaultRouter(), + metrics=NoOpMetricsBackend(), + ) registry.create_namespace(name="tests") with pytest.raises(ValueError, match="tests already exists"): registry.create_namespace(name="tests") - - -@pytest.mark.django_db -def test_registry_create_namespace_route_setting() -> None: - with override_settings(TASKWORKER_ROUTES='{"profiling":"profiles", "lol":"nope"}'): - registry = TaskRegistry() - - # namespaces without routes resolve to the default topic. - tests = registry.create_namespace(name="tests") - assert tests.topic == Topic.TASKWORKER - - profiling = registry.create_namespace(name="profiling") - assert profiling.topic == Topic.PROFILES - - with pytest.raises(ValueError): - ns = registry.create_namespace(name="lol") - # Should raise as the name is routed to an invalid topic - ns.topic From c638f196c160f3b009f6223fe55ba5b2f371f848 Mon Sep 17 00:00:00 2001 From: Mark Story Date: Fri, 28 Nov 2025 15:00:34 -0500 Subject: [PATCH 06/19] Get more tests passing --- clients/python/tests/test_task.py | 43 ++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/clients/python/tests/test_task.py b/clients/python/tests/test_task.py index ba68706a..3d767e42 100644 --- a/clients/python/tests/test_task.py +++ b/clients/python/tests/test_task.py @@ -2,19 +2,22 @@ from typing import Any from unittest.mock import patch +import orjson import pytest import sentry_sdk -from sentry.taskworker.registry import TaskNamespace -from sentry.taskworker.retry import LastAction, Retry, RetryTaskError -from sentry.taskworker.router import DefaultRouter -from sentry.taskworker.task import Task -from sentry.testutils.helpers.task_runner import TaskRunner -from sentry.utils import json from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( ON_ATTEMPTS_EXCEEDED_DEADLETTER, ON_ATTEMPTS_EXCEEDED_DISCARD, ) +from taskbroker_client.metrics import NoOpMetricsBackend +from taskbroker_client.registry import TaskNamespace +from taskbroker_client.retry import LastAction, Retry, RetryTaskError +from taskbroker_client.router import DefaultRouter +from taskbroker_client.task import Task + +from .conftest import producer_factory + def do_things() -> None: raise NotImplementedError @@ -22,7 +25,13 @@ def do_things() -> None: @pytest.fixture def task_namespace() -> TaskNamespace: - return TaskNamespace(name="tests", router=DefaultRouter(), retry=None) + return TaskNamespace( + name="tests", + producer_factory=producer_factory, + router=DefaultRouter(), + metrics=NoOpMetricsBackend(), + retry=None, + ) def test_define_task_defaults(task_namespace: TaskNamespace) -> None: @@ -66,7 +75,9 @@ def test_func(*args, **kwargs) -> None: activation = call_params.args[0] assert activation.expires == 10 - assert activation.parameters == json.dumps({"args": ["arg2"], "kwargs": {"org_id": 2}}) + assert activation.parameters == orjson.dumps( + {"args": ["arg2"], "kwargs": {"org_id": 2}} + ).decode("utf-8") def test_apply_async_countdown(task_namespace: TaskNamespace) -> None: @@ -85,10 +96,12 @@ def test_func(*args, **kwargs) -> None: activation = call_params.args[0] assert activation.delay == 600 - assert activation.parameters == json.dumps({"args": ["arg2"], "kwargs": {"org_id": 2}}) + assert activation.parameters == orjson.dumps( + {"args": ["arg2"], "kwargs": {"org_id": 2}} + ).decode("utf-8") -def test_delay_taskrunner_immediate_mode(task_namespace: TaskNamespace) -> None: +def test_delay_immediate_mode(task_namespace: TaskNamespace) -> None: calls = [] def test_func(*args, **kwargs) -> None: @@ -99,8 +112,8 @@ def test_func(*args, **kwargs) -> None: func=test_func, namespace=task_namespace, ) - # Within a TaskRunner context tasks should run immediately. - with TaskRunner(): + # Patch the constant that controls eager execution + with patch("taskbroker_client.task.ALWAYS_EAGER", True): task.delay("arg", org_id=1) task.apply_async(args=["arg2"], kwargs={"org_id": 2}) task.apply_async() @@ -111,7 +124,7 @@ def test_func(*args, **kwargs) -> None: assert calls[2] == {"args": tuple(), "kwargs": {}} -def test_delay_taskrunner_immediate_validate_activation(task_namespace: TaskNamespace) -> None: +def test_delay_immediate_validate_activation(task_namespace: TaskNamespace) -> None: calls = [] def test_func(mixed: Any) -> None: @@ -123,7 +136,7 @@ def test_func(mixed: Any) -> None: namespace=task_namespace, ) - with TaskRunner(): + with patch("taskbroker_client.task.ALWAYS_EAGER", True): task.delay(mixed=None) task.delay(mixed="str") @@ -246,7 +259,7 @@ def with_parameters(one: str, two: int, org_id: int) -> None: raise NotImplementedError activation = with_parameters.create_activation(["one", 22], {"org_id": 99}) - params = json.loads(activation.parameters) + params = orjson.loads(activation.parameters) assert params["args"] assert params["args"] == ["one", 22] assert params["kwargs"] == {"org_id": 99} From 7dae286c71e486bac70816d5a72d04e4c2af4d43 Mon Sep 17 00:00:00 2001 From: Mark Story Date: Fri, 28 Nov 2025 15:04:14 -0500 Subject: [PATCH 07/19] Move structs out of separate modules Combine small modules together now that import cycles are gone --- .../src/taskbroker_client/client/__init__.py | 0 .../client/inflight_task_activation.py | 10 --------- .../client/processing_result.py | 13 ----------- clients/python/src/taskbroker_client/types.py | 22 +++++++++++++++++++ .../{client => worker}/client.py | 0 clients/python/tests/client/__init__.py | 0 .../tests/{client => worker}/test_client.py | 17 +++++++------- 7 files changed, 31 insertions(+), 31 deletions(-) delete mode 100644 clients/python/src/taskbroker_client/client/__init__.py delete mode 100644 clients/python/src/taskbroker_client/client/inflight_task_activation.py delete mode 100644 clients/python/src/taskbroker_client/client/processing_result.py rename clients/python/src/taskbroker_client/{client => worker}/client.py (100%) delete mode 100644 clients/python/tests/client/__init__.py rename clients/python/tests/{client => worker}/test_client.py (99%) diff --git a/clients/python/src/taskbroker_client/client/__init__.py b/clients/python/src/taskbroker_client/client/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/clients/python/src/taskbroker_client/client/inflight_task_activation.py b/clients/python/src/taskbroker_client/client/inflight_task_activation.py deleted file mode 100644 index 523a3257..00000000 --- a/clients/python/src/taskbroker_client/client/inflight_task_activation.py +++ /dev/null @@ -1,10 +0,0 @@ -import dataclasses - -from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation - - -@dataclasses.dataclass -class InflightTaskActivation: - activation: TaskActivation - host: str - receive_timestamp: float diff --git a/clients/python/src/taskbroker_client/client/processing_result.py b/clients/python/src/taskbroker_client/client/processing_result.py deleted file mode 100644 index 8cdbb2b4..00000000 --- a/clients/python/src/taskbroker_client/client/processing_result.py +++ /dev/null @@ -1,13 +0,0 @@ -import dataclasses - -from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivationStatus - - -@dataclasses.dataclass -class ProcessingResult: - """Result structure from child processess to parent""" - - task_id: str - status: TaskActivationStatus.ValueType - host: str - receive_timestamp: float diff --git a/clients/python/src/taskbroker_client/types.py b/clients/python/src/taskbroker_client/types.py index 65c35772..081ee650 100644 --- a/clients/python/src/taskbroker_client/types.py +++ b/clients/python/src/taskbroker_client/types.py @@ -1,6 +1,8 @@ +import dataclasses from typing import Callable, Protocol from arroyo.backends.kafka import KafkaProducer +from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation, TaskActivationStatus class AtMostOnceStore(Protocol): @@ -15,3 +17,23 @@ def add(self, key: str, value: str, timeout: int) -> bool: ... A factory interface for resolving topics into a KafkaProducer that can produce on the provided topic. """ + + +@dataclasses.dataclass +class InflightTaskActivation: + """ + A TaskActivation with Metadata used within workers. + """ + activation: TaskActivation + host: str + receive_timestamp: float + + +@dataclasses.dataclass +class ProcessingResult: + """Result structure from child processess to parent""" + + task_id: str + status: TaskActivationStatus.ValueType + host: str + receive_timestamp: float diff --git a/clients/python/src/taskbroker_client/client/client.py b/clients/python/src/taskbroker_client/worker/client.py similarity index 100% rename from clients/python/src/taskbroker_client/client/client.py rename to clients/python/src/taskbroker_client/worker/client.py diff --git a/clients/python/tests/client/__init__.py b/clients/python/tests/client/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/clients/python/tests/client/test_client.py b/clients/python/tests/worker/test_client.py similarity index 99% rename from clients/python/tests/client/test_client.py rename to clients/python/tests/worker/test_client.py index 2a40e06f..dac1abb8 100644 --- a/clients/python/tests/client/test_client.py +++ b/clients/python/tests/worker/test_client.py @@ -11,6 +11,15 @@ import grpc import pytest from google.protobuf.message import Message +from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( + TASK_ACTIVATION_STATUS_COMPLETE, + TASK_ACTIVATION_STATUS_RETRY, + FetchNextTask, + GetTaskResponse, + SetTaskStatusResponse, + TaskActivation, +) + from sentry.taskworker.client.client import ( HealthCheckSettings, HostTemporarilyUnavailable, @@ -20,14 +29,6 @@ from sentry.taskworker.client.processing_result import ProcessingResult from sentry.taskworker.constants import DEFAULT_WORKER_HEALTH_CHECK_SEC_PER_TOUCH from sentry.testutils.pytest.fixtures import django_db_all -from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( - TASK_ACTIVATION_STATUS_COMPLETE, - TASK_ACTIVATION_STATUS_RETRY, - FetchNextTask, - GetTaskResponse, - SetTaskStatusResponse, - TaskActivation, -) @dataclasses.dataclass From 51d6b646b84aecf5f88e14a30b07e4f34723ec1f Mon Sep 17 00:00:00 2001 From: Mark Story Date: Fri, 28 Nov 2025 15:31:58 -0500 Subject: [PATCH 08/19] Get schedules tests passing Replace django's timezone.now() with datetime.now(tz=UTC) --- clients/python/pyproject.toml | 4 ++ .../taskbroker_client/scheduler/scheduler.py | 2 +- .../taskbroker_client/scheduler/schedules.py | 15 +++---- clients/python/tests/conftest.py | 9 ++++ .../python/tests/scheduler/test_schedules.py | 45 +++++++++---------- 5 files changed, 43 insertions(+), 32 deletions(-) diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml index 079aa56e..3e2325a8 100644 --- a/clients/python/pyproject.toml +++ b/clients/python/pyproject.toml @@ -9,10 +9,13 @@ dependencies = [ "sentry-sdk[http2]>=2.43.0", "sentry-protos>=0.2.0", "confluent_kafka>=2.3.0", + "cronsim>=2.6", "grpcio==1.66.1", "orjson>=3.10.10", "protobuf>=5.28.3", "types-protobuf>=6.30.2.20250703", + "redis>=3.4.1", + "redis-py-cluster>=2.1.0", "zstandard>=0.18.0", ] @@ -26,6 +29,7 @@ dev = [ "flake8>=7.3.0", "isort>=5.13.2", "mypy>=1.17.1", + "time-machine>=2.16.0", ] [build-system] diff --git a/clients/python/src/taskbroker_client/scheduler/scheduler.py b/clients/python/src/taskbroker_client/scheduler/scheduler.py index 201b1ca6..7f29c05c 100644 --- a/clients/python/src/taskbroker_client/scheduler/scheduler.py +++ b/clients/python/src/taskbroker_client/scheduler/scheduler.py @@ -9,7 +9,7 @@ from django.utils import timezone from redis.client import StrictRedis from rediscluster import RedisCluster -from sentry.conf.types.taskworker import ScheduleConfig, crontab +from taskbroker_client.scheduler.config import ScheduleConfig, crontab from sentry.taskworker.app import TaskworkerApp from sentry.taskworker.scheduler.schedules import CrontabSchedule, Schedule, TimedeltaSchedule from sentry.taskworker.task import Task diff --git a/clients/python/src/taskbroker_client/scheduler/schedules.py b/clients/python/src/taskbroker_client/scheduler/schedules.py index 5470a556..09a2715e 100644 --- a/clients/python/src/taskbroker_client/scheduler/schedules.py +++ b/clients/python/src/taskbroker_client/scheduler/schedules.py @@ -2,12 +2,11 @@ import abc import logging -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from typing import TYPE_CHECKING from cronsim import CronSim, CronSimError -from django.utils import timezone -from sentry.conf.types.taskworker import crontab +from taskbroker_client.scheduler.config import crontab if TYPE_CHECKING: from sentry_sdk._types import MonitorConfigScheduleUnit @@ -81,7 +80,7 @@ def remaining_seconds(self, last_run: datetime | None = None) -> int: if last_run is None: return 0 # floor to timestamp as microseconds are not relevant - now = int(timezone.now().timestamp()) + now = int(datetime.now(tz=UTC).timestamp()) last_run_ts = int(last_run.timestamp()) seconds_remaining = self._delta.total_seconds() - (now - last_run_ts) @@ -112,7 +111,7 @@ def __init__(self, name: str, crontab: crontab) -> None: self._crontab = crontab self._name = name try: - self._cronsim = CronSim(str(crontab), timezone.now()) + self._cronsim = CronSim(str(crontab), datetime.now(tz=UTC)) except CronSimError as e: raise ValueError(f"crontab expression {self._crontab} is invalid") from e @@ -123,7 +122,7 @@ def monitor_value(self) -> str: def is_due(self, last_run: datetime | None = None) -> bool: """Check if the schedule is due to run again based on last_run.""" if last_run is None: - last_run = timezone.now() - timedelta(minutes=1) + last_run = datetime.now(tz=UTC) - timedelta(minutes=1) remaining = self.remaining_seconds(last_run) return remaining <= 0 @@ -134,11 +133,11 @@ def remaining_seconds(self, last_run: datetime | None = None) -> int: Use the current time to find the next schedule time """ if last_run is None: - last_run = timezone.now() - timedelta(minutes=1) + last_run = datetime.now(tz=UTC) - timedelta(minutes=1) # This could result in missed beats, or increased load on redis. last_run = last_run.replace(second=0, microsecond=0) - now = timezone.now().replace(second=0, microsecond=0) + now = datetime.now(tz=UTC).replace(second=0, microsecond=0) # A future last_run means we should wait until the # next scheduled time, and then we can try again. diff --git a/clients/python/tests/conftest.py b/clients/python/tests/conftest.py index 38a202de..91c6308d 100644 --- a/clients/python/tests/conftest.py +++ b/clients/python/tests/conftest.py @@ -1,5 +1,8 @@ +from datetime import UTC, datetime from arroyo.backends.kafka import KafkaProducer +import time_machine + def producer_factory(topic: str) -> KafkaProducer: config = { "bootstrap.servers": "127.0.0.1:9092", @@ -7,3 +10,9 @@ def producer_factory(topic: str) -> KafkaProducer: "message.max.bytes": 50000000, # 50MB } return KafkaProducer(config) + + +def freeze_time(t: str | datetime | None = None) -> time_machine.travel: + if t is None: + t = datetime.now(UTC) + return time_machine.travel(t, tick=False) diff --git a/clients/python/tests/scheduler/test_schedules.py b/clients/python/tests/scheduler/test_schedules.py index 1f597f20..a83428d7 100644 --- a/clients/python/tests/scheduler/test_schedules.py +++ b/clients/python/tests/scheduler/test_schedules.py @@ -1,10 +1,9 @@ from datetime import UTC, datetime, timedelta import pytest -from django.utils import timezone -from sentry.conf.types.taskworker import crontab -from sentry.taskworker.scheduler.schedules import CrontabSchedule, TimedeltaSchedule -from sentry.testutils.helpers.datetime import freeze_time +from taskbroker_client.scheduler.config import crontab +from taskbroker_client.scheduler.schedules import CrontabSchedule, TimedeltaSchedule +from ..conftest import freeze_time def test_timedeltaschedule_invalid() -> None: @@ -17,7 +16,7 @@ def test_timedeltaschedule_invalid() -> None: @freeze_time("2025-01-24 14:25:00") def test_timedeltaschedule_is_due() -> None: - now = timezone.now() + now = datetime.now(tz=UTC) schedule = TimedeltaSchedule(timedelta(minutes=5)) assert not schedule.is_due(now) @@ -48,7 +47,7 @@ def test_timedeltaschedule_monitor_interval() -> None: @freeze_time("2025-01-24 14:25:00") def test_timedeltaschedule_remaining_seconds() -> None: - now = timezone.now() + now = datetime.now(tz=UTC) delta = timedelta(minutes=5) schedule = TimedeltaSchedule(delta) @@ -81,33 +80,33 @@ def test_crontabschedule_is_due() -> None: # no last_run and not time to spawn with freeze_time("2025-01-24 14:23:00"): - now = timezone.now() + now = datetime.now(tz=UTC) assert not schedule.is_due(None) assert not schedule.is_due(now) with freeze_time("2025-01-24 14:25:00"): - now = timezone.now() + now = datetime.now(tz=UTC) assert schedule.is_due(None) assert not schedule.is_due(now) # last run was 14:20, current time is 14:22 = not due with freeze_time("2025-01-24 14:22:00"): - two_twenty = timezone.now() - timedelta(minutes=2) + two_twenty = datetime.now(tz=UTC) - timedelta(minutes=2) assert not schedule.is_due(two_twenty) # last run was 14:20, current time is 14:25 = due with freeze_time("2025-01-24 14:25:00"): - two_twenty = timezone.now() - timedelta(minutes=5) + two_twenty = datetime.now(tz=UTC) - timedelta(minutes=5) assert schedule.is_due(two_twenty) # last run was 14:15, current time is 14:25 = due as we missed an interval with freeze_time("2025-01-24 14:25:00"): - two_fifteen = timezone.now() - timedelta(minutes=10) + two_fifteen = datetime.now(tz=UTC) - timedelta(minutes=10) assert schedule.is_due(two_fifteen) # last run was 14:26 (the future) current time is 14:25 = not due with freeze_time("2025-01-24 14:25:00"): - future = timezone.now() + timedelta(minutes=1) + future = datetime.now(tz=UTC) + timedelta(minutes=1) assert not schedule.is_due(future) @@ -126,52 +125,52 @@ def test_crontabschedule_remaining_seconds() -> None: # last run was late (14:21), next spawn is at 14:25 with freeze_time("2025-01-24 14:25:00"): - four_min_ago = timezone.now() - timedelta(minutes=4) + four_min_ago = datetime.now(tz=UTC) - timedelta(minutes=4) assert schedule.remaining_seconds(four_min_ago) == 0 # last run was 5 min ago, right on schedule with freeze_time("2025-01-24 14:25:00"): - five_min_ago = timezone.now() - timedelta(minutes=5) + five_min_ago = datetime.now(tz=UTC) - timedelta(minutes=5) assert schedule.remaining_seconds(five_min_ago) == 0 # last run was mere seconds ago. 5 min remaining with freeze_time("2025-01-24 14:25:10"): - five_min_ago = timezone.now() + five_min_ago = datetime.now(tz=UTC) assert schedule.remaining_seconds(five_min_ago) == 300 # Later in the minute. crontabs only have minute precision. with freeze_time("2025-01-24 14:25:59"): - five_min_ago = timezone.now() + five_min_ago = datetime.now(tz=UTC) assert schedule.remaining_seconds(five_min_ago) == 300 # It isn't time yet, as we're mid interval with freeze_time("2025-01-24 14:23:10"): - three_min_ago = timezone.now() - timedelta(minutes=3) + three_min_ago = datetime.now(tz=UTC) - timedelta(minutes=3) assert schedule.remaining_seconds(three_min_ago) == 120 # 14:19 was 1 min late, we missed a beat but we're currently on time. with freeze_time("2025-01-24 14:25:10"): - six_min_ago = timezone.now() - timedelta(minutes=6) + six_min_ago = datetime.now(tz=UTC) - timedelta(minutes=6) assert schedule.remaining_seconds(six_min_ago) == 0 # We have missed a few intervals, try to get back on schedule for the next beat with freeze_time("2025-01-24 14:23:00"): - twenty_two_min_ago = timezone.now() - timedelta(minutes=22) + twenty_two_min_ago = datetime.now(tz=UTC) - timedelta(minutes=22) assert schedule.remaining_seconds(twenty_two_min_ago) == 120 # We have encountered a value from the future. # Our clock could be wrong, or we competing with another scheduler. # Advance to the next tick 14:30. with freeze_time("2025-01-24 14:24:00"): - future_two = timezone.now() + timedelta(minutes=2) + future_two = datetime.now(tz=UTC) + timedelta(minutes=2) assert schedule.remaining_seconds(future_two) == 360 -@freeze_time("2025-01-24 14:25:00") +@freeze_time(datetime(2025, 1, 24, 14, 25, 0, tzinfo=UTC)) def test_crontabschedule_runtime_after() -> None: schedule = CrontabSchedule("test", crontab(minute="*/15")) - now = timezone.now() + now = datetime.now(tz=UTC) assert schedule.runtime_after(now) == datetime(2025, 1, 24, 14, 30, 0, tzinfo=UTC) last_run = datetime(2025, 1, 24, 14, 29, 15, tzinfo=UTC) @@ -185,7 +184,7 @@ def test_crontabschedule_runtime_after() -> None: assert schedule.runtime_after(last_run) == datetime(2025, 1, 24, 18, 1, 0, tzinfo=UTC) schedule = CrontabSchedule("test", crontab(minute="*/1")) - now = timezone.now() + now = datetime.now(tz=UTC) assert schedule.runtime_after(now) == datetime(2025, 1, 24, 14, 26, 0, tzinfo=UTC) From 6247b758390da17f899d7336e99f192385f7c9c8 Mon Sep 17 00:00:00 2001 From: Mark Story Date: Fri, 28 Nov 2025 16:03:29 -0500 Subject: [PATCH 09/19] Get scheduler tests passing Needed to add UTC to lots of datetime logic to resolve timezone deltas between frozen times and datetime.now(UTC) --- clients/python/src/taskbroker_client/app.py | 6 +- .../python/src/taskbroker_client/metrics.py | 14 +- .../scheduler/{scheduler.py => runner.py} | 35 ++-- clients/python/tests/scheduler/test_runner.py | 154 ++++++------------ 4 files changed, 77 insertions(+), 132 deletions(-) rename clients/python/src/taskbroker_client/scheduler/{scheduler.py => runner.py} (92%) diff --git a/clients/python/src/taskbroker_client/app.py b/clients/python/src/taskbroker_client/app.py index d9471ce1..adb34c21 100644 --- a/clients/python/src/taskbroker_client/app.py +++ b/clients/python/src/taskbroker_client/app.py @@ -4,10 +4,10 @@ from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation +from taskbroker_client.imports import import_string from taskbroker_client.metrics import MetricsBackend from taskbroker_client.registry import TaskRegistry from taskbroker_client.router import TaskRouter -from taskbroker_client.imports import import_string from taskbroker_client.types import AtMostOnceStore, ProducerFactory @@ -23,16 +23,16 @@ def __init__( metrics_class: str | MetricsBackend = "taskbroker_client.metrics.NoOpMetricsBackend", at_most_once_store: AtMostOnceStore | None = None, ) -> None: + self.metrics = self._build_metrics(metrics_class) self._config = { "rpc_secret": None, "at_most_once_timeout": None, } self._modules: Iterable[str] = [] - self._metrics = self._build_metrics(metrics_class) self._taskregistry = TaskRegistry( producer_factory=producer_factory, router=self._build_router(router_class), - metrics=self._metrics, + metrics=self.metrics, ) if at_most_once_store: self.at_most_once_store(at_most_once_store) diff --git a/clients/python/src/taskbroker_client/metrics.py b/clients/python/src/taskbroker_client/metrics.py index 52d55098..962ae878 100644 --- a/clients/python/src/taskbroker_client/metrics.py +++ b/clients/python/src/taskbroker_client/metrics.py @@ -19,19 +19,13 @@ def incr( name: str, value: int | float = 1, tags: Tags | None = None, + sample_rate: float | None = None, ) -> None: """ Increments a counter metric by a given value. """ raise NotImplementedError - @abstractmethod - def gauge(self, name: str, value: int | float, tags: Tags | None = None) -> None: - """ - Sets a gauge metric to the given value. - """ - raise NotImplementedError - @abstractmethod def distribution( self, @@ -39,6 +33,7 @@ def distribution( value: int | float, tags: Tags | None = None, unit: str | None = None, + sample_rate: float | None = None, ) -> None: """ Records a distribution metric. @@ -56,17 +51,16 @@ def incr( name: str, value: int | float = 1, tags: Tags | None = None, + sample_rate: float | None = None, ) -> None: pass - def gauge(self, name: str, value: int | float, tags: Tags | None = None) -> None: - pass - def distribution( self, name: str, value: int | float, tags: Tags | None = None, unit: str | None = None, + sample_rate: float | None = None, ) -> None: pass diff --git a/clients/python/src/taskbroker_client/scheduler/scheduler.py b/clients/python/src/taskbroker_client/scheduler/runner.py similarity index 92% rename from clients/python/src/taskbroker_client/scheduler/scheduler.py rename to clients/python/src/taskbroker_client/scheduler/runner.py index 7f29c05c..eee4abd2 100644 --- a/clients/python/src/taskbroker_client/scheduler/scheduler.py +++ b/clients/python/src/taskbroker_client/scheduler/runner.py @@ -3,20 +3,20 @@ import heapq import logging from collections.abc import Mapping -from datetime import datetime, timedelta +from datetime import UTC, datetime, timedelta from typing import TYPE_CHECKING, Any -from django.utils import timezone from redis.client import StrictRedis from rediscluster import RedisCluster -from taskbroker_client.scheduler.config import ScheduleConfig, crontab -from sentry.taskworker.app import TaskworkerApp -from sentry.taskworker.scheduler.schedules import CrontabSchedule, Schedule, TimedeltaSchedule -from sentry.taskworker.task import Task -from sentry.utils import metrics from sentry_sdk import capture_exception from sentry_sdk.crons import MonitorStatus, capture_checkin +from taskbroker_client.app import TaskbrokerApp +from taskbroker_client.metrics import MetricsBackend +from taskbroker_client.scheduler.config import ScheduleConfig, crontab +from taskbroker_client.scheduler.schedules import CrontabSchedule, Schedule, TimedeltaSchedule +from taskbroker_client.task import Task + logger = logging.getLogger("taskworker.scheduler") if TYPE_CHECKING: @@ -30,8 +30,11 @@ class RunStorage: in the future, or adapt taskworkers for other applications should we need to. """ - def __init__(self, redis: RedisCluster[str] | StrictRedis[str]) -> None: + def __init__( + self, metrics: MetricsBackend, redis: RedisCluster[str] | StrictRedis[str] + ) -> None: self._redis = redis + self._metrics = metrics def _make_key(self, taskname: str) -> str: return f"tw:scheduler:{taskname}" @@ -44,7 +47,7 @@ def set(self, taskname: str, next_runtime: datetime) -> bool: Returns False when the key is set and a task should not be spawned. """ - now = timezone.now() + now = datetime.now(tz=UTC) # next_runtime & now could be the same second, and redis gets sad if ex=0 duration = max(int((next_runtime - now).total_seconds()), 1) @@ -60,7 +63,9 @@ def read(self, taskname: str) -> datetime | None: if result: return datetime.fromisoformat(result) - metrics.incr("taskworker.scheduler.run_storage.read.miss", tags={"taskname": taskname}) + self._metrics.incr( + "taskworker.scheduler.run_storage.read.miss", tags={"taskname": taskname} + ) return None def read_many(self, tasknames: list[str]) -> Mapping[str, datetime | None]: @@ -149,7 +154,7 @@ def delay_task(self) -> None: def monitor_config(self) -> MonitorConfig | None: checkin_config: MonitorConfig = { "schedule": {}, - "timezone": timezone.get_current_timezone_name(), + "timezone": "UTC", } if isinstance(self._schedule, CrontabSchedule): checkin_config["schedule"]["type"] = "crontab" @@ -177,7 +182,7 @@ class ScheduleRunner: is used in a while loop to spawn tasks and sleep. """ - def __init__(self, app: TaskworkerApp, run_storage: RunStorage) -> None: + def __init__(self, app: TaskbrokerApp, run_storage: RunStorage) -> None: self._entries: list[ScheduleEntry] = [] self._app = app self._run_storage = run_storage @@ -230,14 +235,14 @@ def tick(self) -> float: return self._heap[0][0] def _try_spawn(self, entry: ScheduleEntry) -> None: - now = timezone.now() + now = datetime.now(tz=UTC) next_runtime = entry.runtime_after(now) if self._run_storage.set(entry.fullname, next_runtime): entry.delay_task() entry.set_last_run(now) logger.debug("taskworker.scheduler.delay_task", extra={"fullname": entry.fullname}) - metrics.incr( + self._app.metrics.incr( "taskworker.scheduler.delay_task", tags={ "taskname": entry.taskname, @@ -258,7 +263,7 @@ def _try_spawn(self, entry: ScheduleEntry) -> None: "last_runtime": run_state.isoformat() if run_state else None, }, ) - metrics.incr( + self._app.metrics.incr( "taskworker.scheduler.sync_with_storage", tags={"taskname": entry.taskname, "namespace": entry.namespace}, ) diff --git a/clients/python/tests/scheduler/test_runner.py b/clients/python/tests/scheduler/test_runner.py index 70c16cf9..0e83bb3e 100644 --- a/clients/python/tests/scheduler/test_runner.py +++ b/clients/python/tests/scheduler/test_runner.py @@ -2,19 +2,19 @@ from unittest.mock import Mock, patch import pytest -from django.utils import timezone -from sentry.conf.types.taskworker import crontab -from sentry.silo.base import SiloMode -from sentry.taskworker.app import TaskworkerApp -from sentry.taskworker.scheduler.runner import RunStorage, ScheduleRunner -from sentry.testutils.helpers.datetime import freeze_time -from sentry.testutils.thread_leaks.pytest import thread_leak_allowlist -from sentry.utils.redis import redis_clusters +from redis import StrictRedis + +from taskbroker_client.app import TaskbrokerApp +from taskbroker_client.metrics import NoOpMetricsBackend +from taskbroker_client.scheduler.config import crontab +from taskbroker_client.scheduler.runner import RunStorage, ScheduleRunner + +from ..conftest import freeze_time, producer_factory @pytest.fixture -def task_app() -> TaskworkerApp: - app = TaskworkerApp() +def task_app() -> TaskbrokerApp: + app = TaskbrokerApp(producer_factory=producer_factory) namespace = app.taskregistry.create_namespace("test") @namespace.register(name="valid") @@ -30,21 +30,22 @@ def second_func() -> None: @pytest.fixture def run_storage() -> RunStorage: - redis = redis_clusters.get("default") + # TODO use env vars for redis port. + redis = StrictRedis(host="localhost", port=6379, decode_responses=True) redis.flushdb() - return RunStorage(redis) + return RunStorage(metrics=NoOpMetricsBackend(), redis=redis) def test_runstorage_zero_duration(run_storage: RunStorage) -> None: with freeze_time("2025-07-19 14:25:00"): - now = timezone.now() + now = datetime.now(tz=UTC) result = run_storage.set("test:do_stuff", now) assert result is True def test_runstorage_double_set(run_storage: RunStorage) -> None: with freeze_time("2025-07-19 14:25:00"): - now = timezone.now() + now = datetime.now(tz=UTC) first = run_storage.set("test:do_stuff", now) second = run_storage.set("test:do_stuff", now) @@ -52,7 +53,6 @@ def test_runstorage_double_set(run_storage: RunStorage) -> None: assert second is False, "writing a key that exists should fail" -@pytest.mark.django_db def test_schedulerunner_add_invalid(task_app) -> None: run_storage = Mock(spec=RunStorage) schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) @@ -88,18 +88,16 @@ def test_schedulerunner_add_invalid(task_app) -> None: assert "microseconds" in str(err) -@pytest.mark.django_db -def test_schedulerunner_tick_no_tasks(task_app: TaskworkerApp, run_storage: RunStorage) -> None: +def test_schedulerunner_tick_no_tasks(task_app: TaskbrokerApp, run_storage: RunStorage) -> None: schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) - with freeze_time("2025-01-24 14:25:00"): + with freeze_time("2025-01-24 14:25:00 UTC"): sleep_time = schedule_set.tick() assert sleep_time == 60 -@pytest.mark.django_db def test_schedulerunner_tick_one_task_time_remaining( - task_app: TaskworkerApp, run_storage: RunStorage + task_app: TaskbrokerApp, run_storage: RunStorage ) -> None: schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) @@ -111,11 +109,11 @@ def test_schedulerunner_tick_one_task_time_remaining( }, ) # Last run was two minutes ago. - with freeze_time("2025-01-24 14:23:00"): + with freeze_time("2025-01-24 14:23:00 UTC"): run_storage.set("test:valid", datetime(2025, 1, 24, 14, 28, 0, tzinfo=UTC)) namespace = task_app.taskregistry.get("test") - with freeze_time("2025-01-24 14:25:00"), patch.object(namespace, "send_task") as mock_send: + with freeze_time("2025-01-24 14:25:00 UTC"), patch.object(namespace, "send_task") as mock_send: sleep_time = schedule_set.tick() assert sleep_time == 180 assert mock_send.call_count == 0 @@ -124,9 +122,8 @@ def test_schedulerunner_tick_one_task_time_remaining( assert last_run == datetime(2025, 1, 24, 14, 23, 0, tzinfo=UTC) -@pytest.mark.django_db def test_schedulerunner_tick_one_task_spawned( - task_app: TaskworkerApp, run_storage: RunStorage + task_app: TaskbrokerApp, run_storage: RunStorage ) -> None: run_storage = Mock(spec=RunStorage) schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) @@ -140,12 +137,12 @@ def test_schedulerunner_tick_one_task_spawned( # Last run was 5 minutes from the freeze_time below run_storage.read_many.return_value = { - "test:valid": datetime(2025, 1, 24, 14, 19, 55), + "test:valid": datetime(2025, 1, 24, 14, 19, 55, tzinfo=UTC), } run_storage.set.return_value = True namespace = task_app.taskregistry.get("test") - with freeze_time("2025-01-24 14:25:00"), patch.object(namespace, "send_task") as mock_send: + with freeze_time("2025-01-24 14:25:00 UTC"), patch.object(namespace, "send_task") as mock_send: sleep_time = schedule_set.tick() assert sleep_time == 300 assert mock_send.call_count == 1 @@ -160,10 +157,9 @@ def test_schedulerunner_tick_one_task_spawned( run_storage.set.assert_called_with("test:valid", datetime(2025, 1, 24, 14, 30, 0, tzinfo=UTC)) -@pytest.mark.django_db -@patch("sentry.taskworker.scheduler.runner.capture_checkin") +@patch("taskbroker_client.scheduler.runner.capture_checkin") def test_schedulerunner_tick_create_checkin( - mock_capture_checkin: Mock, task_app: TaskworkerApp, run_storage: RunStorage + mock_capture_checkin: Mock, task_app: TaskbrokerApp, run_storage: RunStorage ) -> None: run_storage = Mock(spec=RunStorage) schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) @@ -177,14 +173,14 @@ def test_schedulerunner_tick_create_checkin( # Last run was 5 minutes from the freeze_time below run_storage.read_many.return_value = { - "test:valid": datetime(2025, 1, 24, 14, 19, 55), + "test:valid": datetime(2025, 1, 24, 14, 19, 55, tzinfo=UTC), } run_storage.set.return_value = True mock_capture_checkin.return_value = "checkin-id" namespace = task_app.taskregistry.get("test") with ( - freeze_time("2025-01-24 14:25:00"), + freeze_time("2025-01-24 14:25:00 UTC"), patch.object(namespace, "send_task") as mock_send, ): sleep_time = schedule_set.tick() @@ -215,9 +211,8 @@ def test_schedulerunner_tick_create_checkin( ) -@pytest.mark.django_db def test_schedulerunner_tick_key_exists_no_spawn( - task_app: TaskworkerApp, run_storage: RunStorage + task_app: TaskbrokerApp, run_storage: RunStorage ) -> None: schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) schedule_set.add( @@ -229,30 +224,28 @@ def test_schedulerunner_tick_key_exists_no_spawn( ) namespace = task_app.taskregistry.get("test") - with patch.object(namespace, "send_task") as mock_send, freeze_time("2025-01-24 14:25:00"): + with patch.object(namespace, "send_task") as mock_send, freeze_time("2025-01-24 14:25:00 UTC"): # Run tick() to initialize state in the scheduler. This will write a key to run_storage. sleep_time = schedule_set.tick() assert sleep_time == 300 assert mock_send.call_count == 1 - with freeze_time("2025-01-24 14:30:00"): + with freeze_time("2025-01-24 14:30:00 UTC"): # Set a key into run_storage to simulate another scheduler running run_storage.delete("test:valid") - assert run_storage.set("test:valid", timezone.now() + timedelta(minutes=2)) + assert run_storage.set("test:valid", datetime.now(tz=UTC) + timedelta(minutes=2)) # Our scheduler would wakeup and tick again. # The key exists in run_storage so we should not spawn a task. # last_run time should synchronize with run_storage state, and count down from 14:30 - with freeze_time("2025-01-24 14:30:02"): + with freeze_time("2025-01-24 14:30:02 UTC"): sleep_time = schedule_set.tick() assert sleep_time == 298 assert mock_send.call_count == 1 -@pytest.mark.django_db -@thread_leak_allowlist(reason="taskworker", issue=97034) def test_schedulerunner_tick_one_task_multiple_ticks( - task_app: TaskworkerApp, run_storage: RunStorage + task_app: TaskbrokerApp, run_storage: RunStorage ) -> None: schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) schedule_set.add( @@ -263,22 +256,21 @@ def test_schedulerunner_tick_one_task_multiple_ticks( }, ) - with freeze_time("2025-01-24 14:25:00"): + with freeze_time("2025-01-24 14:25:00 UTC"): sleep_time = schedule_set.tick() assert sleep_time == 300 - with freeze_time("2025-01-24 14:26:00"): + with freeze_time("2025-01-24 14:26:00 UTC"): sleep_time = schedule_set.tick() assert sleep_time == 240 - with freeze_time("2025-01-24 14:28:00"): + with freeze_time("2025-01-24 14:28:00 UTC"): sleep_time = schedule_set.tick() assert sleep_time == 120 -@pytest.mark.django_db def test_schedulerunner_tick_one_task_multiple_ticks_crontab( - task_app: TaskworkerApp, run_storage: RunStorage + task_app: TaskbrokerApp, run_storage: RunStorage ) -> None: schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) schedule_set.add( @@ -291,26 +283,25 @@ def test_schedulerunner_tick_one_task_multiple_ticks_crontab( namespace = task_app.taskregistry.get("test") with patch.object(namespace, "send_task") as mock_send: - with freeze_time("2025-01-24 14:24:00"): + with freeze_time("2025-01-24 14:24:00 UTC"): sleep_time = schedule_set.tick() assert sleep_time == 120 assert mock_send.call_count == 1 - with freeze_time("2025-01-24 14:25:00"): + with freeze_time("2025-01-24 14:25:00 UTC"): sleep_time = schedule_set.tick() assert sleep_time == 60 # Remove key to simulate expiration run_storage.delete("test:valid") - with freeze_time("2025-01-24 14:26:00"): + with freeze_time("2025-01-24 14:26:00 UTC"): sleep_time = schedule_set.tick() assert sleep_time == 120 assert mock_send.call_count == 2 -@pytest.mark.django_db def test_schedulerunner_tick_multiple_tasks( - task_app: TaskworkerApp, run_storage: RunStorage + task_app: TaskbrokerApp, run_storage: RunStorage ) -> None: schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) schedule_set.add( @@ -330,13 +321,13 @@ def test_schedulerunner_tick_multiple_tasks( namespace = task_app.taskregistry.get("test") with patch.object(namespace, "send_task") as mock_send: - with freeze_time("2025-01-24 14:25:00"): + with freeze_time("2025-01-24 14:25:00 UTC"): sleep_time = schedule_set.tick() assert sleep_time == 120 assert mock_send.call_count == 2 - with freeze_time("2025-01-24 14:26:00"): + with freeze_time("2025-01-24 14:26:00 UTC"): sleep_time = schedule_set.tick() assert sleep_time == 60 @@ -344,7 +335,7 @@ def test_schedulerunner_tick_multiple_tasks( # Remove the redis key, as the ttl in redis doesn't respect freeze_time() run_storage.delete("test:second") - with freeze_time("2025-01-24 14:27:01"): + with freeze_time("2025-01-24 14:27:01 UTC"): sleep_time = schedule_set.tick() # two minutes left on the 5 min task assert sleep_time == 120 @@ -352,9 +343,8 @@ def test_schedulerunner_tick_multiple_tasks( assert mock_send.call_count == 3 -@pytest.mark.django_db def test_schedulerunner_tick_fast_and_slow( - task_app: TaskworkerApp, run_storage: RunStorage + task_app: TaskbrokerApp, run_storage: RunStorage ) -> None: schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) schedule_set.add( @@ -374,7 +364,7 @@ def test_schedulerunner_tick_fast_and_slow( namespace = task_app.taskregistry.get("test") with patch.object(namespace, "send_task") as mock_send: - with freeze_time("2025-01-24 14:25:00"): + with freeze_time("2025-01-24 14:25:00 UTC"): sleep_time = schedule_set.tick() assert sleep_time == 30 @@ -382,7 +372,7 @@ def test_schedulerunner_tick_fast_and_slow( assert called == ["valid"] run_storage.delete("test:valid") - with freeze_time("2025-01-24 14:25:30"): + with freeze_time("2025-01-24 14:25:30 UTC"): sleep_time = schedule_set.tick() assert sleep_time == 30 @@ -390,7 +380,7 @@ def test_schedulerunner_tick_fast_and_slow( assert called == ["valid", "valid"] run_storage.delete("test:valid") - with freeze_time("2025-01-24 14:26:00"): + with freeze_time("2025-01-24 14:26:00 UTC"): sleep_time = schedule_set.tick() assert sleep_time == 30 @@ -398,7 +388,7 @@ def test_schedulerunner_tick_fast_and_slow( assert called == ["valid", "valid", "second", "valid"] run_storage.delete("test:valid") - with freeze_time("2025-01-24 14:26:30"): + with freeze_time("2025-01-24 14:26:30 UTC"): sleep_time = schedule_set.tick() assert sleep_time == 30 @@ -406,7 +396,7 @@ def test_schedulerunner_tick_fast_and_slow( assert called == ["valid", "valid", "second", "valid", "valid"] run_storage.delete("test:valid") - with freeze_time("2025-01-24 14:27:00"): + with freeze_time("2025-01-24 14:27:00 UTC"): sleep_time = schedule_set.tick() assert sleep_time == 30 @@ -424,47 +414,3 @@ def test_schedulerunner_tick_fast_and_slow( def extract_sent_tasks(mock: Mock) -> list[str]: return [call[0][0].taskname for call in mock.call_args_list] - - -@pytest.mark.django_db -def test_schedulerunner_silo_limited_task_has_task_properties() -> None: - app = TaskworkerApp() - namespace = app.taskregistry.create_namespace("test") - - @namespace.register( - name="region_task", - at_most_once=True, - wait_for_delivery=True, - silo_mode=SiloMode.REGION, - ) - def region_task() -> None: - pass - - for attr in region_task.__dict__.keys(): - if attr.startswith("_") and not attr.startswith("__"): - continue - assert hasattr(region_task, attr) - - assert region_task.fullname == "test:region_task" - assert region_task.namespace.name == "test" - assert region_task.name == "region_task" - assert region_task.at_most_once is True - assert region_task.wait_for_delivery is True - - run_storage = Mock(spec=RunStorage) - schedule_set = ScheduleRunner(app=app, run_storage=run_storage) - schedule_set.add( - "region-task", - { - "task": "test:region_task", - "schedule": timedelta(minutes=5), - }, - ) - - schedule_set.log_startup() - - assert len(schedule_set._entries) == 1 - entry = schedule_set._entries[0] - assert entry.fullname == "test:region_task" - assert entry.namespace == "test" - assert entry.taskname == "region_task" From e6eceb437b1308e8365e15c0c8070155bfc528b8 Mon Sep 17 00:00:00 2001 From: Mark Story Date: Fri, 28 Nov 2025 16:36:39 -0500 Subject: [PATCH 10/19] Get tests passing for TaskbrokerClientGet tests passing for TaskbrokerClientGet tests passing for TaskbrokerClientGet tests passing for TaskbrokerClientGet tests passing for TaskbrokerClientGet tests passing for TaskbrokerClientGet tests passing for TaskbrokerClientGet tests passing for TaskbrokerClientGet tests passing for TaskbrokerClient --- .../python/src/taskbroker_client/metrics.py | 28 +++- .../src/taskbroker_client/worker/client.py | 34 ++-- .../src/taskbroker_client/worker/worker.py | 49 +++--- clients/python/tests/worker/test_client.py | 146 ++++++++---------- 4 files changed, 137 insertions(+), 120 deletions(-) diff --git a/clients/python/src/taskbroker_client/metrics.py b/clients/python/src/taskbroker_client/metrics.py index 962ae878..64a025d7 100644 --- a/clients/python/src/taskbroker_client/metrics.py +++ b/clients/python/src/taskbroker_client/metrics.py @@ -2,7 +2,8 @@ from abc import abstractmethod from collections.abc import Mapping -from typing import Protocol, runtime_checkable +from contextlib import contextmanager +from typing import Generator, Protocol, runtime_checkable Tags = Mapping[str, str] @@ -40,6 +41,20 @@ def distribution( """ raise NotImplementedError + @contextmanager + def timer( + self, + key: str, + instance: str | None = None, + tags: Tags | None = None, + sample_rate: float | None = None, + stacklevel: int = 0, + ) -> Generator[None]: + """ + Records a distribution metric with a context manager. + """ + raise NotImplementedError + class NoOpMetricsBackend(MetricsBackend): """ @@ -64,3 +79,14 @@ def distribution( sample_rate: float | None = None, ) -> None: pass + + @contextmanager + def timer( + self, + key: str, + instance: str | None = None, + tags: Tags | None = None, + sample_rate: float | None = None, + stacklevel: int = 0, + ) -> Generator[None]: + yield None diff --git a/clients/python/src/taskbroker_client/worker/client.py b/clients/python/src/taskbroker_client/worker/client.py index dd34684d..ae0a1497 100644 --- a/clients/python/src/taskbroker_client/worker/client.py +++ b/clients/python/src/taskbroker_client/worker/client.py @@ -10,6 +10,7 @@ from typing import TYPE_CHECKING, Any import grpc +import orjson from google.protobuf.message import Message from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( FetchNextTask, @@ -18,14 +19,13 @@ ) from sentry_protos.taskbroker.v1.taskbroker_pb2_grpc import ConsumerServiceStub -from sentry.taskworker.client.inflight_task_activation import InflightTaskActivation -from sentry.taskworker.client.processing_result import ProcessingResult -from sentry.taskworker.constants import ( +from taskbroker_client.constants import ( DEFAULT_CONSECUTIVE_UNAVAILABLE_ERRORS, DEFAULT_REBALANCE_AFTER, DEFAULT_TEMPORARY_UNAVAILABLE_HOST_TIMEOUT, ) -from sentry.utils import json, metrics +from taskbroker_client.metrics import MetricsBackend +from taskbroker_client.types import InflightTaskActivation, ProcessingResult logger = logging.getLogger("sentry.taskworker.client") @@ -120,7 +120,7 @@ class HealthCheckSettings: touch_interval_sec: float -class TaskworkerClient: +class TaskbrokerClient: """ Taskworker RPC client wrapper @@ -131,6 +131,7 @@ class TaskworkerClient: def __init__( self, hosts: list[str], + metrics: MetricsBackend, max_tasks_before_rebalance: int = DEFAULT_REBALANCE_AFTER, max_consecutive_unavailable_errors: int = DEFAULT_CONSECUTIVE_UNAVAILABLE_ERRORS, temporary_unavailable_host_timeout: int = DEFAULT_TEMPORARY_UNAVAILABLE_HOST_TIMEOUT, @@ -141,6 +142,7 @@ def __init__( assert len(hosts) > 0, "You must provide at least one RPC host to connect to" self._hosts = hosts self._rpc_secret = rpc_secret + self._metrics = metrics self._grpc_options: list[tuple[str, Any]] = [ ("grpc.max_receive_message_length", MAX_ACTIVATION_SIZE) @@ -183,7 +185,7 @@ def _emit_health_check(self) -> None: return self._health_check_settings.file_path.touch() - metrics.incr( + self._metrics.incr( "taskworker.client.health_check.touched", ) self._timestamp_since_touch = cur_time @@ -192,7 +194,7 @@ def _connect_to_host(self, host: str) -> ConsumerServiceStub: logger.info("taskworker.client.connect", extra={"host": host}) channel = grpc.insecure_channel(host, options=self._grpc_options) if self._rpc_secret: - secrets = json.loads(self._rpc_secret) + secrets = orjson.loads(self._rpc_secret) channel = grpc.intercept_channel(channel, RequestSignatureInterceptor(secrets)) return ConsumerServiceStub(channel) @@ -229,7 +231,7 @@ def _get_cur_stub(self) -> tuple[str, ConsumerServiceStub]: self._cur_host = random.choice(available_hosts) self._num_tasks_before_rebalance = self._max_tasks_before_rebalance self._num_consecutive_unavailable_errors = 0 - metrics.incr( + self._metrics.incr( "taskworker.client.loadbalancer.rebalance", tags={"reason": "unavailable_count_reached"}, ) @@ -237,7 +239,7 @@ def _get_cur_stub(self) -> tuple[str, ConsumerServiceStub]: self._cur_host = random.choice(available_hosts) self._num_tasks_before_rebalance = self._max_tasks_before_rebalance self._num_consecutive_unavailable_errors = 0 - metrics.incr( + self._metrics.incr( "taskworker.client.loadbalancer.rebalance", tags={"reason": "max_tasks_reached"}, ) @@ -260,10 +262,10 @@ def get_task(self, namespace: str | None = None) -> InflightTaskActivation | Non request = GetTaskRequest(namespace=namespace) try: host, stub = self._get_cur_stub() - with metrics.timer("taskworker.get_task.rpc", tags={"host": host}): + with self._metrics.timer("taskworker.get_task.rpc", tags={"host": host}): response = stub.GetTask(request) except grpc.RpcError as err: - metrics.incr( + self._metrics.incr( "taskworker.client.rpc_error", tags={"method": "GetTask", "status": err.code().name} ) if err.code() == grpc.StatusCode.NOT_FOUND: @@ -277,7 +279,7 @@ def get_task(self, namespace: str | None = None) -> InflightTaskActivation | Non self._num_consecutive_unavailable_errors = 0 self._temporary_unavailable_hosts.pop(host, None) if response.HasField("task"): - metrics.incr( + self._metrics.incr( "taskworker.client.get_task", tags={"namespace": response.task.namespace}, ) @@ -298,7 +300,7 @@ def update_task( """ self._emit_health_check() - metrics.incr("taskworker.client.fetch_next", tags={"next": fetch_next_task is not None}) + self._metrics.incr("taskworker.client.fetch_next", tags={"next": fetch_next_task is not None}) self._clear_temporary_unavailable_hosts() request = SetTaskStatusRequest( id=processing_result.task_id, @@ -308,7 +310,7 @@ def update_task( try: if processing_result.host in self._temporary_unavailable_hosts: - metrics.incr( + self._metrics.incr( "taskworker.client.skipping_set_task_due_to_unavailable_host", tags={"broker_host": processing_result.host}, ) @@ -316,10 +318,10 @@ def update_task( f"Host: {processing_result.host} is temporarily unavailable" ) - with metrics.timer("taskworker.update_task.rpc", tags={"host": processing_result.host}): + with self._metrics.timer("taskworker.update_task.rpc", tags={"host": processing_result.host}): response = self._host_to_stubs[processing_result.host].SetTaskStatus(request) except grpc.RpcError as err: - metrics.incr( + self._metrics.incr( "taskworker.client.rpc_error", tags={"method": "SetTaskStatus", "status": err.code().name}, ) diff --git a/clients/python/src/taskbroker_client/worker/worker.py b/clients/python/src/taskbroker_client/worker/worker.py index 16e13255..8bf950e5 100644 --- a/clients/python/src/taskbroker_client/worker/worker.py +++ b/clients/python/src/taskbroker_client/worker/worker.py @@ -15,23 +15,20 @@ import grpc from sentry_protos.taskbroker.v1.taskbroker_pb2 import FetchNextTask -from sentry import options -from sentry.taskworker.app import import_app -from sentry.taskworker.client.client import ( +from taskbroker_client.app import import_app +from taskbroker_client.worker.client import ( HealthCheckSettings, HostTemporarilyUnavailable, - TaskworkerClient, + TaskbrokerClient, ) -from sentry.taskworker.client.inflight_task_activation import InflightTaskActivation -from sentry.taskworker.client.processing_result import ProcessingResult -from sentry.taskworker.constants import ( +from taskbroker_client.types import InflightTaskActivation, ProcessingResult +from taskbroker_client.constants import ( DEFAULT_REBALANCE_AFTER, DEFAULT_WORKER_HEALTH_CHECK_SEC_PER_TOUCH, DEFAULT_WORKER_QUEUE_SIZE, MAX_BACKOFF_SECONDS_WHEN_HOST_UNAVAILABLE, ) -from sentry.taskworker.workerchild import child_process -from sentry.utils import metrics +from taskbroker_client.worker.workerchild import child_process logger = logging.getLogger("sentry.taskworker.worker") @@ -72,8 +69,9 @@ def __init__( self._concurrency = concurrency app = import_app(app_module) - self.client = TaskworkerClient( + self.client = TaskbrokerClient( hosts=broker_hosts, + metrics=app.metrics, max_tasks_before_rebalance=rebalance_after, health_check_settings=( None @@ -81,8 +79,10 @@ def __init__( else HealthCheckSettings(Path(health_check_file_path), health_check_sec_per_touch) ), rpc_secret=app.config["rpc_secret"], - grpc_config=options.get("taskworker.grpc_service_config"), + grpc_config=app.config["grpc_config"], ) + self._metrics = app.metrics + if process_type == "fork": self.mp_context = multiprocessing.get_context("fork") elif process_type == "spawn": @@ -181,7 +181,7 @@ def _add_task(self) -> bool: # causing processing deadline expiration. # Whereas in pools that have consistent short tasks, this happens # more frequently, allowing workers to run more smoothly. - metrics.incr( + self._metrics.incr( "taskworker.worker.add_tasks.child_tasks_full", tags={"processing_pool": self._processing_pool_name}, ) @@ -194,13 +194,13 @@ def _add_task(self) -> bool: try: start_time = time.monotonic() self._child_tasks.put(inflight) - metrics.distribution( + self._metrics.distribution( "taskworker.worker.child_task.put.duration", time.monotonic() - start_time, tags={"processing_pool": self._processing_pool_name}, ) except queue.Full: - metrics.incr( + self._metrics.incr( "taskworker.worker.child_tasks.put.full", tags={"processing_pool": self._processing_pool_name}, ) @@ -231,15 +231,16 @@ def result_thread() -> None: iopool = ThreadPoolExecutor(max_workers=self._concurrency) with iopool as executor: while not self._shutdown_event.is_set(): - fetch_next = self._processing_pool_name not in options.get( - "taskworker.fetch_next.disabled_pools" - ) - + # TODO We should remove fetch_next = False from sentry as it couldn't be rolled + # out everywhere. + # fetch_next = self._processing_pool_name not in options.get( + # "taskworker.fetch_next.disabled_pools" + # ) try: result = self._processed_tasks.get(timeout=1.0) - executor.submit(self._send_result, result, fetch_next) + executor.submit(self._send_result, result, fetch=True) except queue.Empty: - metrics.incr( + self._metrics.incr( "taskworker.worker.result_thread.queue_empty", tags={"processing_pool": self._processing_pool_name}, ) @@ -257,7 +258,7 @@ def _send_result(self, result: ProcessingResult, fetch: bool = True) -> bool: Run in a thread to avoid blocking the process, and during shutdown/ See `start_result_thread` """ - metrics.distribution( + self._metrics.distribution( "taskworker.worker.complete_duration", time.monotonic() - result.receive_timestamp, tags={"processing_pool": self._processing_pool_name}, @@ -273,7 +274,7 @@ def _send_result(self, result: ProcessingResult, fetch: bool = True) -> bool: try: start_time = time.monotonic() self._child_tasks.put(next) - metrics.distribution( + self._metrics.distribution( "taskworker.worker.child_task.put.duration", time.monotonic() - start_time, tags={"processing_pool": self._processing_pool_name}, @@ -360,7 +361,7 @@ def spawn_children_thread() -> None: "taskworker.spawn_child", extra={"pid": process.pid, "processing_pool": self._processing_pool_name}, ) - metrics.incr( + self._metrics.incr( "taskworker.worker.spawn_child", tags={"processing_pool": self._processing_pool_name}, ) @@ -387,7 +388,7 @@ def fetch_task(self) -> InflightTaskActivation | None: return None if not activation: - metrics.incr( + self._metrics.incr( "taskworker.worker.fetch_task.not_found", tags={"processing_pool": self._processing_pool_name}, ) diff --git a/clients/python/tests/worker/test_client.py b/clients/python/tests/worker/test_client.py index dac1abb8..24bb91d6 100644 --- a/clients/python/tests/worker/test_client.py +++ b/clients/python/tests/worker/test_client.py @@ -20,15 +20,15 @@ TaskActivation, ) -from sentry.taskworker.client.client import ( +from taskbroker_client.metrics import NoOpMetricsBackend +from taskbroker_client.worker.client import ( HealthCheckSettings, HostTemporarilyUnavailable, - TaskworkerClient, + TaskbrokerClient, make_broker_hosts, ) -from sentry.taskworker.client.processing_result import ProcessingResult -from sentry.taskworker.constants import DEFAULT_WORKER_HEALTH_CHECK_SEC_PER_TOUCH -from sentry.testutils.pytest.fixtures import django_db_all +from taskbroker_client.types import ProcessingResult +from taskbroker_client.constants import DEFAULT_WORKER_HEALTH_CHECK_SEC_PER_TOUCH @dataclasses.dataclass @@ -52,7 +52,7 @@ def __init__( self.response_deserializer = response_deserializer self.responses = responses - def __call__(self, *args, **kwargs): + def __call__(self, *args: Any, **kwargs: Any) -> Any: """Capture calls and use registered mocks""" # move the head to the tail res = self.responses[0] @@ -63,7 +63,7 @@ def __call__(self, *args, **kwargs): raise res.response return res.response - def with_call(self, *args, **kwargs): + def with_call(self, *args: Any, **kwargs: Any) -> Any: res = self.responses[0] if res.metadata: assert res.metadata == kwargs.get("metadata"), "Metadata mismatch" @@ -73,7 +73,7 @@ def with_call(self, *args, **kwargs): class MockChannel: - def __init__(self): + def __init__(self) -> None: self._responses = defaultdict(list) def unary_unary( @@ -81,9 +81,9 @@ def unary_unary( path: str, request_serializer: Callable, response_deserializer: Callable, - *args, - **kwargs, - ): + *args: Any, + **kwargs: Any, + ) -> MockServiceMethod: return MockServiceMethod( path, self._responses.get(path, []), request_serializer, response_deserializer ) @@ -93,14 +93,14 @@ def add_response( path: str, resp: Message | Exception, metadata: tuple[tuple[str, str | bytes], ...] | None = None, - ): + ) -> None: self._responses[path].append(MockServiceCall(response=resp, metadata=metadata)) class MockGrpcError(grpc.RpcError): """Grpc error are elusive and this mock simulates the interface in mypy stubs""" - def __init__(self, code, message): + def __init__(self, code: int, message: str) -> None: self._code = code self._message = message @@ -110,7 +110,7 @@ def code(self) -> grpc.StatusCode: def details(self) -> str: return self._message - def result(self): + def result(self) -> None: raise self @@ -128,14 +128,12 @@ def test_make_broker_hosts() -> None: assert hosts == ["broker:50051", "broker-a:50051", "broker-b:50051"] -@django_db_all def test_init_no_hosts() -> None: with pytest.raises(AssertionError) as err: - TaskworkerClient(hosts=[]) + TaskbrokerClient(hosts=[], metrics=NoOpMetricsBackend()) assert "You must provide at least one RPC host" in str(err) -@django_db_all def test_health_check_is_debounced() -> None: channel = MockChannel() channel.add_response( @@ -164,11 +162,12 @@ def test_health_check_is_debounced() -> None: ) ), ) - with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: mock_channel.return_value = channel health_check_path = Path(f"/tmp/{''.join(random.choices(string.ascii_letters, k=16))}") - client = TaskworkerClient( - ["localhost-0:50051"], + client = TaskbrokerClient( + hosts=["localhost-0:50051"], + metrics=NoOpMetricsBackend(), health_check_settings=HealthCheckSettings(health_check_path, 1), ) client._health_check_settings.file_path = Mock() # type: ignore[union-attr] @@ -177,13 +176,12 @@ def test_health_check_is_debounced() -> None: _ = client.get_task() assert client._health_check_settings.file_path.touch.call_count == 1 # type: ignore[union-attr] - with patch("sentry.taskworker.client.client.time") as mock_time: + with patch("taskbroker_client.worker.client.time") as mock_time: mock_time.time.return_value = time.time() + 1 _ = client.get_task() assert client._health_check_settings.file_path.touch.call_count == 2 # type: ignore[union-attr] -@django_db_all def test_get_task_ok() -> None: channel = MockChannel() channel.add_response( @@ -199,9 +197,9 @@ def test_get_task_ok() -> None: ) ), ) - with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: mock_channel.return_value = channel - client = TaskworkerClient(["localhost-0:50051"]) + client = TaskbrokerClient(["localhost-0:50051"], metrics=NoOpMetricsBackend()) result = client.get_task() assert result @@ -210,7 +208,6 @@ def test_get_task_ok() -> None: assert result.activation.namespace == "testing" -@django_db_all def test_get_task_writes_to_health_check_file() -> None: channel = MockChannel() channel.add_response( @@ -227,18 +224,18 @@ def test_get_task_writes_to_health_check_file() -> None: ), ) - with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: mock_channel.return_value = channel health_check_path = Path(f"/tmp/{''.join(random.choices(string.ascii_letters, k=16))}") - client = TaskworkerClient( + client = TaskbrokerClient( ["localhost-0:50051"], + metrics=NoOpMetricsBackend(), health_check_settings=HealthCheckSettings(health_check_path, 3), ) _ = client.get_task() assert health_check_path.exists() -@django_db_all def test_get_task_with_interceptor() -> None: channel = MockChannel() channel.add_response( @@ -261,9 +258,9 @@ def test_get_task_with_interceptor() -> None: ), ) secret = '["a long secret value","notused"]' - with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: mock_channel.return_value = channel - client = TaskworkerClient(["localhost-0:50051"], rpc_secret=secret) + client = TaskbrokerClient(["localhost-0:50051"], metrics=NoOpMetricsBackend(), rpc_secret=secret) result = client.get_task() assert result @@ -272,7 +269,6 @@ def test_get_task_with_interceptor() -> None: assert result.activation.namespace == "testing" -@django_db_all def test_get_task_with_namespace() -> None: channel = MockChannel() channel.add_response( @@ -288,9 +284,9 @@ def test_get_task_with_namespace() -> None: ) ), ) - with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: mock_channel.return_value = channel - client = TaskworkerClient(hosts=make_broker_hosts("localhost:50051", num_brokers=1)) + client = TaskbrokerClient(hosts=make_broker_hosts("localhost:50051", num_brokers=1), metrics=NoOpMetricsBackend()) result = client.get_task(namespace="testing") assert result @@ -299,36 +295,33 @@ def test_get_task_with_namespace() -> None: assert result.activation.namespace == "testing" -@django_db_all def test_get_task_not_found() -> None: channel = MockChannel() channel.add_response( "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", MockGrpcError(grpc.StatusCode.NOT_FOUND, "no pending task found"), ) - with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: mock_channel.return_value = channel - client = TaskworkerClient(["localhost:50051"]) + client = TaskbrokerClient(["localhost:50051"], metrics=NoOpMetricsBackend()) result = client.get_task() assert result is None -@django_db_all def test_get_task_failure() -> None: channel = MockChannel() channel.add_response( "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", MockGrpcError(grpc.StatusCode.INTERNAL, "something bad"), ) - with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: mock_channel.return_value = channel - client = TaskworkerClient(["localhost:50051"]) + client = TaskbrokerClient(["localhost:50051"], metrics=NoOpMetricsBackend()) with pytest.raises(grpc.RpcError): client.get_task() -@django_db_all def test_update_task_writes_to_health_check_file() -> None: channel = MockChannel() channel.add_response( @@ -344,11 +337,12 @@ def test_update_task_writes_to_health_check_file() -> None: ) ), ) - with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: mock_channel.return_value = channel health_check_path = Path(f"/tmp/{''.join(random.choices(string.ascii_letters, k=16))}") - client = TaskworkerClient( + client = TaskbrokerClient( make_broker_hosts("localhost:50051", num_brokers=1), + metrics=NoOpMetricsBackend(), health_check_settings=HealthCheckSettings( health_check_path, DEFAULT_WORKER_HEALTH_CHECK_SEC_PER_TOUCH ), @@ -360,7 +354,6 @@ def test_update_task_writes_to_health_check_file() -> None: assert health_check_path.exists() -@django_db_all def test_update_task_ok_with_next() -> None: channel = MockChannel() channel.add_response( @@ -376,9 +369,9 @@ def test_update_task_ok_with_next() -> None: ) ), ) - with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: mock_channel.return_value = channel - client = TaskworkerClient(make_broker_hosts("localhost:50051", num_brokers=1)) + client = TaskbrokerClient(make_broker_hosts("localhost:50051", num_brokers=1), metrics=NoOpMetricsBackend()) assert set(client._host_to_stubs.keys()) == {"localhost-0:50051"} result = client.update_task( ProcessingResult("abc123", TASK_ACTIVATION_STATUS_RETRY, "localhost-0:50051", 0), @@ -390,7 +383,6 @@ def test_update_task_ok_with_next() -> None: assert result.activation.id == "abc123" -@django_db_all def test_update_task_ok_with_next_namespace() -> None: channel = MockChannel() channel.add_response( @@ -406,9 +398,9 @@ def test_update_task_ok_with_next_namespace() -> None: ) ), ) - with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: mock_channel.return_value = channel - client = TaskworkerClient(make_broker_hosts("localhost:50051", num_brokers=1)) + client = TaskbrokerClient(make_broker_hosts("localhost:50051", num_brokers=1), metrics=NoOpMetricsBackend()) result = client.update_task( ProcessingResult( task_id="id", @@ -423,15 +415,14 @@ def test_update_task_ok_with_next_namespace() -> None: assert result.activation.namespace == "testing" -@django_db_all def test_update_task_ok_no_next() -> None: channel = MockChannel() channel.add_response( "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", SetTaskStatusResponse() ) - with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: mock_channel.return_value = channel - client = TaskworkerClient(make_broker_hosts("localhost:50051", num_brokers=1)) + client = TaskbrokerClient(make_broker_hosts("localhost:50051", num_brokers=1), metrics=NoOpMetricsBackend()) result = client.update_task( ProcessingResult( task_id="abc123", @@ -444,16 +435,15 @@ def test_update_task_ok_no_next() -> None: assert result is None -@django_db_all def test_update_task_not_found() -> None: channel = MockChannel() channel.add_response( "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", MockGrpcError(grpc.StatusCode.NOT_FOUND, "no pending tasks found"), ) - with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: mock_channel.return_value = channel - client = TaskworkerClient(["localhost-0:50051"]) + client = TaskbrokerClient(["localhost-0:50051"], metrics=NoOpMetricsBackend()) result = client.update_task( ProcessingResult( task_id="abc123", @@ -466,16 +456,15 @@ def test_update_task_not_found() -> None: assert result is None -@django_db_all def test_update_task_unavailable_retain_task_to_host() -> None: channel = MockChannel() channel.add_response( "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", MockGrpcError(grpc.StatusCode.UNAVAILABLE, "broker down"), ) - with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: mock_channel.return_value = channel - client = TaskworkerClient(["localhost-0:50051"]) + client = TaskbrokerClient(["localhost-0:50051"], metrics=NoOpMetricsBackend()) with pytest.raises(MockGrpcError) as err: client.update_task( ProcessingResult( @@ -489,7 +478,6 @@ def test_update_task_unavailable_retain_task_to_host() -> None: assert "broker down" in str(err.value) -@django_db_all def test_client_loadbalance() -> None: channel_0 = MockChannel() channel_0.add_response( @@ -563,17 +551,18 @@ def test_client_loadbalance() -> None: "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", SetTaskStatusResponse(task=None), ) - with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: mock_channel.side_effect = [channel_0, channel_1, channel_2, channel_3] - with patch("sentry.taskworker.client.client.random.choice") as mock_randchoice: + with patch("taskbroker_client.worker.client.random.choice") as mock_randchoice: mock_randchoice.side_effect = [ "localhost-0:50051", "localhost-1:50051", "localhost-2:50051", "localhost-3:50051", ] - client = TaskworkerClient( + client = TaskbrokerClient( hosts=make_broker_hosts(host_prefix="localhost:50051", num_brokers=4), + metrics=NoOpMetricsBackend(), max_tasks_before_rebalance=1, ) @@ -612,7 +601,6 @@ def test_client_loadbalance() -> None: ) -@django_db_all def test_client_loadbalance_on_notfound() -> None: channel_0 = MockChannel() channel_0.add_response( @@ -654,16 +642,17 @@ def test_client_loadbalance_on_notfound() -> None: ), ) - with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: mock_channel.side_effect = [channel_0, channel_1, channel_2] - with patch("sentry.taskworker.client.client.random.choice") as mock_randchoice: + with patch("taskbroker_client.worker.client.random.choice") as mock_randchoice: mock_randchoice.side_effect = [ "localhost-0:50051", "localhost-1:50051", "localhost-2:50051", ] - client = TaskworkerClient( + client = TaskbrokerClient( hosts=make_broker_hosts(host_prefix="localhost:50051", num_brokers=3), + metrics=NoOpMetricsBackend(), max_tasks_before_rebalance=30, ) @@ -689,7 +678,6 @@ def test_client_loadbalance_on_notfound() -> None: assert task_2 and task_2.activation.id == "2" -@django_db_all def test_client_loadbalance_on_unavailable() -> None: channel_0 = MockChannel() channel_0.add_response( @@ -720,15 +708,16 @@ def test_client_loadbalance_on_unavailable() -> None: ), ) - with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: mock_channel.side_effect = [channel_0, channel_1] - with patch("sentry.taskworker.client.client.random.choice") as mock_randchoice: + with patch("taskbroker_client.worker.client.random.choice") as mock_randchoice: mock_randchoice.side_effect = [ "localhost-0:50051", "localhost-1:50051", ] - client = TaskworkerClient( + client = TaskbrokerClient( hosts=make_broker_hosts(host_prefix="localhost:50051", num_brokers=2), + metrics=NoOpMetricsBackend(), max_consecutive_unavailable_errors=3, ) @@ -753,7 +742,6 @@ def test_client_loadbalance_on_unavailable() -> None: assert client._num_consecutive_unavailable_errors == 0 -@django_db_all def test_client_single_host_unavailable() -> None: channel = MockChannel() channel.add_response( @@ -782,10 +770,11 @@ def test_client_single_host_unavailable() -> None: ), ) - with (patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel,): + with (patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel,): mock_channel.return_value = channel - client = TaskworkerClient( + client = TaskbrokerClient( hosts=["localhost-0:50051"], + metrics=NoOpMetricsBackend(), max_consecutive_unavailable_errors=3, temporary_unavailable_host_timeout=2, ) @@ -803,7 +792,6 @@ def test_client_single_host_unavailable() -> None: assert client._cur_host == "localhost-0:50051" -@django_db_all def test_client_reset_errors_after_success() -> None: channel = MockChannel() channel.add_response( @@ -828,9 +816,9 @@ def test_client_reset_errors_after_success() -> None: MockGrpcError(grpc.StatusCode.UNAVAILABLE, "host is unavailable"), ) - with patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel: + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: mock_channel.return_value = channel - client = TaskworkerClient(["localhost:50051"], max_consecutive_unavailable_errors=3) + client = TaskbrokerClient(["localhost:50051"], metrics=NoOpMetricsBackend(), max_consecutive_unavailable_errors=3) with pytest.raises(grpc.RpcError, match="host is unavailable"): client.get_task() @@ -845,7 +833,6 @@ def test_client_reset_errors_after_success() -> None: assert client._num_consecutive_unavailable_errors == 1 -@django_db_all def test_client_update_task_host_unavailable() -> None: channel = MockChannel() channel.add_response( @@ -880,12 +867,13 @@ def mock_time(): return current_time with ( - patch("sentry.taskworker.client.client.grpc.insecure_channel") as mock_channel, - patch("sentry.taskworker.client.client.time.time", side_effect=mock_time), + patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel, + patch("taskbroker_client.worker.client.time.time", side_effect=mock_time), ): mock_channel.return_value = channel - client = TaskworkerClient( + client = TaskbrokerClient( ["localhost:50051"], + metrics=NoOpMetricsBackend(), max_consecutive_unavailable_errors=3, temporary_unavailable_host_timeout=10, ) From cd4d8172edcc4ed5f66e9adb5f3aa3663cd9f55b Mon Sep 17 00:00:00 2001 From: Mark Story Date: Fri, 28 Nov 2025 18:15:50 -0500 Subject: [PATCH 11/19] Get worker tests passing. --- clients/python/src/examples/__init__.py | 0 clients/python/src/examples/example_app.py | 89 +++++++++++++ clients/python/src/examples/store.py | 11 ++ clients/python/src/taskbroker_client/app.py | 6 +- .../python/src/taskbroker_client/metrics.py | 27 +++- clients/python/src/taskbroker_client/retry.py | 8 +- .../taskbroker_client/worker/workerchild.py | 36 ++--- clients/python/tests/test_app.py | 15 +-- clients/python/tests/worker/test_worker.py | 126 +++++------------- 9 files changed, 179 insertions(+), 139 deletions(-) create mode 100644 clients/python/src/examples/__init__.py create mode 100644 clients/python/src/examples/example_app.py create mode 100644 clients/python/src/examples/store.py diff --git a/clients/python/src/examples/__init__.py b/clients/python/src/examples/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/clients/python/src/examples/example_app.py b/clients/python/src/examples/example_app.py new file mode 100644 index 00000000..28e6c9f6 --- /dev/null +++ b/clients/python/src/examples/example_app.py @@ -0,0 +1,89 @@ +""" +Example taskbroker application with tasks + +Used in tests for the worker. +""" + +import logging +from time import sleep +from typing import Any +from redis import StrictRedis +from arroyo.backends.kafka import KafkaProducer + +from taskbroker_client.app import TaskbrokerApp +from taskbroker_client.retry import LastAction, NoRetriesRemainingError, Retry, RetryTaskError +from taskbroker_client.retry import retry_task as retry_task_helper +from examples.store import StubAtMostOnce + +logger = logging.getLogger(__name__) + +def producer_factory(topic: str) -> KafkaProducer: + # TODO use env vars for kafka host/port + config = { + "bootstrap.servers": "127.0.0.1:9092", + "compression.type": "lz4", + "message.max.bytes": 50000000, # 50MB + } + return KafkaProducer(config) + + +app = TaskbrokerApp( + producer_factory=producer_factory, + at_most_once_store=StubAtMostOnce(), +) + +# Create a namespace and register tasks +exampletasks = app.taskregistry.create_namespace("examples") + + +@exampletasks.register(name="examples.simple_task") +def simple_task(*args: list[Any], **kwargs: dict[str, Any]) -> None: + sleep(0.1) + logger.debug("simple_task complete") + + +@exampletasks.register(name="examples.retry_task", retry=Retry(times=2)) +def retry_task() -> None: + raise RetryTaskError + + +@exampletasks.register(name="examples.fail_task") +def fail_task() -> None: + raise ValueError("nope") + + +@exampletasks.register(name="examples.at_most_once", at_most_once=True) +def at_most_once_task() -> None: + pass + + +@exampletasks.register( + name="examples.retry_state", retry=Retry(times=2, times_exceeded=LastAction.Deadletter) +) +def retry_state() -> None: + try: + retry_task_helper() + except NoRetriesRemainingError: + # TODO read host from env vars + redis = StrictRedis(host="localhost", port=6379, decode_responses=True) + redis.set("no-retries-remaining", 1) + + +@exampletasks.register( + name="examples.will_retry", + retry=Retry(times=3, on=(RuntimeError,), times_exceeded=LastAction.Discard), +) +def will_retry(failure: str) -> None: + if failure == "retry": + logger.debug("going to retry with explicit retry error") + raise RetryTaskError + if failure == "raise": + logger.debug("raising runtimeerror") + raise RuntimeError("oh no") + logger.debug("got %s", failure) + + +@exampletasks.register(name="examples.timed") +def timed_task(sleep_seconds: float | str, *args: list[Any], **kwargs: dict[str, Any]) -> None: + sleep(float(sleep_seconds)) + logger.debug("timed_task complete") diff --git a/clients/python/src/examples/store.py b/clients/python/src/examples/store.py new file mode 100644 index 00000000..ee0c48dc --- /dev/null +++ b/clients/python/src/examples/store.py @@ -0,0 +1,11 @@ +from taskbroker_client.types import AtMostOnceStore + +class StubAtMostOnce(AtMostOnceStore): + def __init__(self) -> None: + self._keys: dict[str, str] = {} + + def add(self, key: str, value: str, timeout: int) -> bool: + if key in self._keys: + return False + self._keys[key] = value + return True diff --git a/clients/python/src/taskbroker_client/app.py b/clients/python/src/taskbroker_client/app.py index adb34c21..328d84bf 100644 --- a/clients/python/src/taskbroker_client/app.py +++ b/clients/python/src/taskbroker_client/app.py @@ -26,6 +26,7 @@ def __init__( self.metrics = self._build_metrics(metrics_class) self._config = { "rpc_secret": None, + "grpc_config": None, "at_most_once_timeout": None, } self._modules: Iterable[str] = [] @@ -34,8 +35,7 @@ def __init__( router=self._build_router(router_class), metrics=self.metrics, ) - if at_most_once_store: - self.at_most_once_store(at_most_once_store) + self.at_most_once_store(at_most_once_store) def _build_router(self, router_name: str | TaskRouter) -> TaskRouter: if isinstance(router_name, str): @@ -80,7 +80,7 @@ def load_modules(self) -> None: for mod in self._modules: __import__(mod) - def at_most_once_store(self, backend: AtMostOnceStore) -> None: + def at_most_once_store(self, backend: AtMostOnceStore | None) -> None: """ Set the backend store for `at_most_once` tasks. The storage implementation should support atomic operations diff --git a/clients/python/src/taskbroker_client/metrics.py b/clients/python/src/taskbroker_client/metrics.py index 64a025d7..93246d01 100644 --- a/clients/python/src/taskbroker_client/metrics.py +++ b/clients/python/src/taskbroker_client/metrics.py @@ -5,7 +5,7 @@ from contextlib import contextmanager from typing import Generator, Protocol, runtime_checkable -Tags = Mapping[str, str] +Tags = Mapping[str, str | int | float] @runtime_checkable @@ -45,7 +45,6 @@ def distribution( def timer( self, key: str, - instance: str | None = None, tags: Tags | None = None, sample_rate: float | None = None, stacklevel: int = 0, @@ -55,6 +54,17 @@ def timer( """ raise NotImplementedError + @contextmanager + def track_memory_usage( + self, + key: str, + tags: Tags | None = None, + ) -> Generator[None]: + """ + Records a distribution metric with a context manager. + """ + raise NotImplementedError + class NoOpMetricsBackend(MetricsBackend): """ @@ -84,9 +94,20 @@ def distribution( def timer( self, key: str, - instance: str | None = None, tags: Tags | None = None, sample_rate: float | None = None, stacklevel: int = 0, ) -> Generator[None]: yield None + + @contextmanager + def track_memory_usage( + self, + key: str, + tags: Tags | None = None, + ) -> Generator[None]: + """ + Records a distrubtion metric that tracks the delta + of rss_usage between the context manager opening and closing. + """ + yield None diff --git a/clients/python/src/taskbroker_client/retry.py b/clients/python/src/taskbroker_client/retry.py index 7235a864..09951b17 100644 --- a/clients/python/src/taskbroker_client/retry.py +++ b/clients/python/src/taskbroker_client/retry.py @@ -1,9 +1,9 @@ from __future__ import annotations +import logging from enum import Enum from multiprocessing.context import TimeoutError -# from sentry.utils import metrics from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( ON_ATTEMPTS_EXCEEDED_DEADLETTER, ON_ATTEMPTS_EXCEEDED_DISCARD, @@ -13,6 +13,8 @@ from taskbroker_client.state import current_task +logger = logging.getLogger(__name__) + class RetryTaskError(Exception): """ @@ -48,7 +50,9 @@ def retry_task(exc: Exception | None = None, raise_on_no_retries: bool = True) - """ current = current_task() if current and not current.retries_remaining: - metrics.incr("taskworker.retry.no_retries_remaining") + logger.info("taskworker.retry.no_retries_remaining", extra={ + "taskname": current.taskname + }) if raise_on_no_retries: raise NoRetriesRemainingError() else: diff --git a/clients/python/src/taskbroker_client/worker/workerchild.py b/clients/python/src/taskbroker_client/worker/workerchild.py index d4cf1309..53338949 100644 --- a/clients/python/src/taskbroker_client/worker/workerchild.py +++ b/clients/python/src/taskbroker_client/worker/workerchild.py @@ -15,9 +15,13 @@ import orjson import sentry_sdk import zstandard as zstd -from sentry.taskworker.client.inflight_task_activation import InflightTaskActivation -from sentry.taskworker.client.processing_result import ProcessingResult -from sentry.taskworker.constants import CompressionType + +from taskbroker_client.app import import_app +from taskbroker_client.types import InflightTaskActivation, ProcessingResult +from taskbroker_client.constants import CompressionType +from taskbroker_client.retry import NoRetriesRemainingError +from taskbroker_client.state import clear_current_task, current_task, set_current_task +from taskbroker_client.task import Task from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( TASK_ACTIVATION_STATUS_COMPLETE, TASK_ACTIVATION_STATUS_FAILURE, @@ -35,18 +39,6 @@ class ProcessingDeadlineExceeded(BaseException): pass -def child_worker_init(process_type: str) -> None: - """ - Configure django and load task modules for workers - Child worker processes are spawned and don't inherit db - connections or configuration from the parent process. - """ - from sentry.runner import configure - - if process_type == "spawn": - configure() - - @contextlib.contextmanager def timeout_alarm( seconds: int, handler: Callable[[int, FrameType | None], None] @@ -107,18 +99,10 @@ def child_process( and not the module root. If modules that include django are imported at the module level the wrong django settings will be used. """ - child_worker_init(process_type) - - from sentry.taskworker.app import import_app - from sentry.taskworker.retry import NoRetriesRemainingError - from sentry.taskworker.state import clear_current_task, current_task, set_current_task - from sentry.taskworker.task import Task - from sentry.utils import metrics - from sentry.utils.memory import track_memory_usage - app = import_app(app_module) app.load_modules() taskregistry = app.taskregistry + metrics = app.metrics def _get_known_task(activation: TaskActivation) -> Task[Any, Any] | None: if not taskregistry.contains(activation.namespace): @@ -344,7 +328,7 @@ def _execute_activation(task_func: Task[Any, Any], activation: TaskActivation) - } } with ( - track_memory_usage( + metrics.track_memory_usage( "taskworker.worker.memory_change", tags={"namespace": activation.namespace, "taskname": activation.taskname}, ), @@ -440,7 +424,7 @@ def record_task_execution( namespace = taskregistry.get(activation.namespace) metrics.incr( "taskworker.cogs.usage", - amount=int(execution_duration * 1000), + value=int(execution_duration * 1000), tags={"feature": namespace.app_feature}, ) diff --git a/clients/python/tests/test_app.py b/clients/python/tests/test_app.py index 14203925..74337fdf 100644 --- a/clients/python/tests/test_app.py +++ b/clients/python/tests/test_app.py @@ -2,21 +2,10 @@ from taskbroker_client.app import TaskbrokerApp from taskbroker_client.router import TaskRouter -from taskbroker_client.types import AtMostOnceStore +from examples.store import StubAtMostOnce from .conftest import producer_factory -class StubAtMostOnce(AtMostOnceStore): - def __init__(self) -> None: - self._keys: dict[str, str] = {} - - def add(self, key: str, value: str, timeout: int) -> bool: - if key in self._keys: - return False - self._keys[key] = value - return True - - class StubRouter(TaskRouter): def route_namespace(self, name: str) -> str: return "honk" @@ -38,7 +27,7 @@ def test_taskregistry_router_str() -> None: router_class="taskbroker_client.router.DefaultRouter", ) ns = app.taskregistry.create_namespace("test") - assert ns.topic == "default" + assert ns.topic == "taskbroker" def test_set_config() -> None: diff --git a/clients/python/tests/worker/test_worker.py b/clients/python/tests/worker/test_worker.py index 75128c73..ab2d1f24 100644 --- a/clients/python/tests/worker/test_worker.py +++ b/clients/python/tests/worker/test_worker.py @@ -1,24 +1,21 @@ import base64 import queue import time +from redis import StrictRedis from multiprocessing import Event -from unittest import mock +from unittest import mock, TestCase import grpc import orjson import pytest import zstandard as zstd -from sentry.taskworker.client.inflight_task_activation import InflightTaskActivation -from sentry.taskworker.client.processing_result import ProcessingResult -from sentry.taskworker.constants import CompressionType -from sentry.taskworker.retry import NoRetriesRemainingError -from sentry.taskworker.state import current_task -from sentry.taskworker.worker import TaskWorker -from sentry.taskworker.workerchild import ProcessingDeadlineExceeded, child_process -from sentry.testutils.cases import TestCase -from sentry.testutils.helpers.options import override_options -from sentry.testutils.thread_leaks.pytest import thread_leak_allowlist -from sentry.utils.redis import redis_clusters +from taskbroker_client.types import InflightTaskActivation, ProcessingResult +from taskbroker_client.constants import CompressionType +from taskbroker_client.retry import NoRetriesRemainingError +from taskbroker_client.state import current_task +from taskbroker_client.worker.worker import TaskWorker +from taskbroker_client.worker.workerchild import ProcessingDeadlineExceeded, child_process +# from sentry.utils.redis import redis_clusters from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( ON_ATTEMPTS_EXCEEDED_DISCARD, TASK_ACTIVATION_STATUS_COMPLETE, @@ -28,6 +25,7 @@ TaskActivation, ) from sentry_sdk.crons import MonitorStatus +from ..example_app import exampletasks SIMPLE_TASK = InflightTaskActivation( host="localhost:50051", @@ -148,19 +146,10 @@ ) -@pytest.mark.django_db -@thread_leak_allowlist(reason="taskworker", issue=97034) class TestTaskWorker(TestCase): - def test_tasks_exist(self) -> None: - import sentry.taskworker.tasks.examples as example_tasks - - assert example_tasks.simple_task - assert example_tasks.retry_task - assert example_tasks.at_most_once_task - def test_fetch_task(self) -> None: taskworker = TaskWorker( - app_module="sentry.taskworker.runtime:app", + app_module="examples.example_app:app", broker_hosts=["127.0.0.1:50051"], max_child_task_count=100, process_type="fork", @@ -176,7 +165,7 @@ def test_fetch_task(self) -> None: def test_fetch_no_task(self) -> None: taskworker = TaskWorker( - app_module="sentry.taskworker.runtime:app", + app_module="examples.example_app:app", broker_hosts=["127.0.0.1:50051"], max_child_task_count=100, process_type="fork", @@ -191,7 +180,7 @@ def test_fetch_no_task(self) -> None: def test_run_once_no_next_task(self) -> None: max_runtime = 5 taskworker = TaskWorker( - app_module="sentry.taskworker.runtime:app", + app_module="examples.example_app:app", broker_hosts=["127.0.0.1:50051"], max_child_task_count=1, process_type="fork", @@ -227,7 +216,7 @@ def test_run_once_with_next_task(self) -> None: # be processed. max_runtime = 5 taskworker = TaskWorker( - app_module="sentry.taskworker.runtime:app", + app_module="examples.example_app:app", broker_hosts=["127.0.0.1:50051"], max_child_task_count=1, process_type="fork", @@ -264,49 +253,12 @@ def update_task_response(*args, **kwargs): ) assert mock_client.update_task.call_args.args[1] is None - @override_options({"taskworker.fetch_next.disabled_pools": ["testing"]}) - def test_run_once_with_fetch_next_disabled(self) -> None: - # Cover the scenario where taskworker.fetch_next.disabled_pools is defined - max_runtime = 5 - taskworker = TaskWorker( - app_module="sentry.taskworker.runtime:app", - broker_hosts=["127.0.0.1:50051"], - max_child_task_count=1, - process_type="fork", - processing_pool_name="testing", - ) - with mock.patch.object(taskworker, "client") as mock_client: - mock_client.update_task.return_value = None - mock_client.get_task.return_value = SIMPLE_TASK - taskworker.start_result_thread() - taskworker.start_spawn_children_thread() - - # Run until two tasks have been processed - start = time.time() - while True: - taskworker.run_once() - if mock_client.update_task.call_count >= 2: - break - if time.time() - start > max_runtime: - taskworker.shutdown() - raise AssertionError("Timeout waiting for update_task to be called") - - taskworker.shutdown() - assert mock_client.get_task.called - assert mock_client.update_task.call_count == 2 - assert mock_client.update_task.call_args.args[0].host == "localhost:50051" - assert mock_client.update_task.call_args.args[0].task_id == SIMPLE_TASK.activation.id - assert ( - mock_client.update_task.call_args.args[0].status == TASK_ACTIVATION_STATUS_COMPLETE - ) - assert mock_client.update_task.call_args.args[1] is None - def test_run_once_with_update_failure(self) -> None: # Cover the scenario where update_task fails a few times in a row # We should retain the result until RPC succeeds. max_runtime = 5 taskworker = TaskWorker( - app_module="sentry.taskworker.runtime:app", + app_module="examples.example_app:app", broker_hosts=["127.0.0.1:50051"], max_child_task_count=1, process_type="fork", @@ -352,7 +304,7 @@ def test_run_once_current_task_state(self) -> None: # to raise and catch a NoRetriesRemainingError max_runtime = 5 taskworker = TaskWorker( - app_module="sentry.taskworker.runtime:app", + app_module="examples.example_app:app", broker_hosts=["127.0.0.1:50051"], max_child_task_count=1, process_type="fork", @@ -389,14 +341,14 @@ def update_task_response(*args, **kwargs): mock_client.update_task.call_args.args[0].status == TASK_ACTIVATION_STATUS_COMPLETE ) - redis = redis_clusters.get("default") + # TODO read host from env vars + redis = StrictRedis(host="localhost", port=6379, decode_responses=True) assert current_task() is None, "should clear current task on completion" assert redis.get("no-retries-remaining"), "key should exist if except block was hit" redis.delete("no-retries-remaining") -@pytest.mark.django_db -@mock.patch("sentry.taskworker.workerchild.capture_checkin") +@mock.patch("taskbroker_client.worker.workerchild.capture_checkin") def test_child_process_complete(mock_capture_checkin: mock.MagicMock) -> None: todo: queue.Queue[InflightTaskActivation] = queue.Queue() processed: queue.Queue[ProcessingResult] = queue.Queue() @@ -404,7 +356,7 @@ def test_child_process_complete(mock_capture_checkin: mock.MagicMock) -> None: todo.put(SIMPLE_TASK) child_process( - "sentry.taskworker.runtime:app", + "examples.example_app:app", todo, processed, shutdown, @@ -420,7 +372,6 @@ def test_child_process_complete(mock_capture_checkin: mock.MagicMock) -> None: assert mock_capture_checkin.call_count == 0 -@pytest.mark.django_db def test_child_process_remove_start_time_kwargs() -> None: activation = InflightTaskActivation( host="localhost:50051", @@ -439,7 +390,7 @@ def test_child_process_remove_start_time_kwargs() -> None: todo.put(activation) child_process( - "sentry.taskworker.runtime:app", + "examples.example_app:app", todo, processed, shutdown, @@ -454,7 +405,6 @@ def test_child_process_remove_start_time_kwargs() -> None: assert result.status == TASK_ACTIVATION_STATUS_COMPLETE -@pytest.mark.django_db def test_child_process_retry_task() -> None: todo: queue.Queue[InflightTaskActivation] = queue.Queue() processed: queue.Queue[ProcessingResult] = queue.Queue() @@ -462,7 +412,7 @@ def test_child_process_retry_task() -> None: todo.put(RETRY_TASK) child_process( - "sentry.taskworker.runtime:app", + "examples.example_app:app", todo, processed, shutdown, @@ -477,8 +427,7 @@ def test_child_process_retry_task() -> None: assert result.status == TASK_ACTIVATION_STATUS_RETRY -@mock.patch("sentry.taskworker.workerchild.sentry_sdk.capture_exception") -@pytest.mark.django_db +@mock.patch("taskbroker_client.worker.workerchild.sentry_sdk.capture_exception") def test_child_process_retry_task_max_attempts(mock_capture: mock.Mock) -> None: # Create an activation that is on its final attempt and # will raise an error again. @@ -503,7 +452,7 @@ def test_child_process_retry_task_max_attempts(mock_capture: mock.Mock) -> None: todo.put(activation) child_process( - "sentry.taskworker.runtime:app", + "examples.example_app:app", todo, processed, shutdown, @@ -524,7 +473,6 @@ def test_child_process_retry_task_max_attempts(mock_capture: mock.Mock) -> None: assert isinstance(capture_call[0].__cause__, RuntimeError) -@pytest.mark.django_db def test_child_process_failure_task() -> None: todo: queue.Queue[InflightTaskActivation] = queue.Queue() processed: queue.Queue[ProcessingResult] = queue.Queue() @@ -532,7 +480,7 @@ def test_child_process_failure_task() -> None: todo.put(FAIL_TASK) child_process( - "sentry.taskworker.runtime:app", + "examples.example_app:app", todo, processed, shutdown, @@ -547,7 +495,6 @@ def test_child_process_failure_task() -> None: assert result.status == TASK_ACTIVATION_STATUS_FAILURE -@pytest.mark.django_db def test_child_process_shutdown() -> None: todo: queue.Queue[InflightTaskActivation] = queue.Queue() processed: queue.Queue[ProcessingResult] = queue.Queue() @@ -556,7 +503,7 @@ def test_child_process_shutdown() -> None: todo.put(SIMPLE_TASK) child_process( - "sentry.taskworker.runtime:app", + "examples.example_app:app", todo, processed, shutdown, @@ -570,7 +517,6 @@ def test_child_process_shutdown() -> None: assert processed.qsize() == 0 -@pytest.mark.django_db def test_child_process_unknown_task() -> None: todo: queue.Queue[InflightTaskActivation] = queue.Queue() processed: queue.Queue[ProcessingResult] = queue.Queue() @@ -579,7 +525,7 @@ def test_child_process_unknown_task() -> None: todo.put(UNDEFINED_TASK) todo.put(SIMPLE_TASK) child_process( - "sentry.taskworker.runtime:app", + "examples.example_app:app", todo, processed, shutdown, @@ -597,7 +543,6 @@ def test_child_process_unknown_task() -> None: assert result.status == TASK_ACTIVATION_STATUS_COMPLETE -@pytest.mark.django_db def test_child_process_at_most_once() -> None: todo: queue.Queue[InflightTaskActivation] = queue.Queue() processed: queue.Queue[ProcessingResult] = queue.Queue() @@ -607,7 +552,7 @@ def test_child_process_at_most_once() -> None: todo.put(AT_MOST_ONCE_TASK) todo.put(SIMPLE_TASK) child_process( - "sentry.taskworker.runtime:app", + "examples.example_app:app", todo, processed, shutdown, @@ -626,8 +571,7 @@ def test_child_process_at_most_once() -> None: assert result.status == TASK_ACTIVATION_STATUS_COMPLETE -@pytest.mark.django_db -@mock.patch("sentry.taskworker.workerchild.capture_checkin") +@mock.patch("taskbroker_client.worker.workerchild.capture_checkin") def test_child_process_record_checkin(mock_capture_checkin: mock.Mock) -> None: todo: queue.Queue[InflightTaskActivation] = queue.Queue() processed: queue.Queue[ProcessingResult] = queue.Queue() @@ -635,7 +579,7 @@ def test_child_process_record_checkin(mock_capture_checkin: mock.Mock) -> None: todo.put(SCHEDULED_TASK) child_process( - "sentry.taskworker.runtime:app", + "examples.example_app:app", todo, processed, shutdown, @@ -658,8 +602,7 @@ def test_child_process_record_checkin(mock_capture_checkin: mock.Mock) -> None: ) -@pytest.mark.django_db -@mock.patch("sentry.taskworker.workerchild.sentry_sdk.capture_exception") +@mock.patch("taskbroker_client.worker.workerchild.sentry_sdk.capture_exception") def test_child_process_terminate_task(mock_capture: mock.Mock) -> None: todo: queue.Queue[InflightTaskActivation] = queue.Queue() processed: queue.Queue[ProcessingResult] = queue.Queue() @@ -679,7 +622,7 @@ def test_child_process_terminate_task(mock_capture: mock.Mock) -> None: todo.put(sleepy) child_process( - "sentry.taskworker.runtime:app", + "examples.example_app:app", todo, processed, shutdown, @@ -696,8 +639,7 @@ def test_child_process_terminate_task(mock_capture: mock.Mock) -> None: assert type(mock_capture.call_args.args[0]) is ProcessingDeadlineExceeded -@pytest.mark.django_db -@mock.patch("sentry.taskworker.workerchild.capture_checkin") +@mock.patch("taskbroker_client.worker.workerchild.capture_checkin") def test_child_process_decompression(mock_capture_checkin: mock.MagicMock) -> None: todo: queue.Queue[InflightTaskActivation] = queue.Queue() @@ -706,7 +648,7 @@ def test_child_process_decompression(mock_capture_checkin: mock.MagicMock) -> No todo.put(COMPRESSED_TASK) child_process( - "sentry.taskworker.runtime:app", + "examples.example_app:app", todo, processed, shutdown, From 276d5aad52a40d493b1bbd23001c06b6e1c40574 Mon Sep 17 00:00:00 2001 From: Mark Story Date: Fri, 28 Nov 2025 18:18:26 -0500 Subject: [PATCH 12/19] Remove TODO list --- clients/python/src/taskbroker_client/TODO | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 clients/python/src/taskbroker_client/TODO diff --git a/clients/python/src/taskbroker_client/TODO b/clients/python/src/taskbroker_client/TODO deleted file mode 100644 index 56311def..00000000 --- a/clients/python/src/taskbroker_client/TODO +++ /dev/null @@ -1,9 +0,0 @@ -Fix ups -- metrics - How will that even work? - Maybe attach a metrics backend to the app, and import the app all over the place? -- Kafka producers? - Use arroyo? How to get at the singletons's? Perhaps have a protocol for getting a producer based on topic? - - -Application improvements -- need a way to inject a router object. -- need a way to provide a producer factory. From 1211fdec488a900daed9991db2efbb44d62aa9b0 Mon Sep 17 00:00:00 2001 From: Mark Story Date: Fri, 28 Nov 2025 18:19:49 -0500 Subject: [PATCH 13/19] Cleanup --- clients/python/src/taskbroker_client/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/clients/python/src/taskbroker_client/__init__.py b/clients/python/src/taskbroker_client/__init__.py index ff768f4d..e69de29b 100644 --- a/clients/python/src/taskbroker_client/__init__.py +++ b/clients/python/src/taskbroker_client/__init__.py @@ -1,2 +0,0 @@ -def hello() -> str: - return "Hello from python!" From cccdd95cd8f416b2b9b9048a47c4cd5af081b8ae Mon Sep 17 00:00:00 2001 From: Mark Story Date: Sat, 29 Nov 2025 13:44:13 -0500 Subject: [PATCH 14/19] Move integration tests down a directory Fewer directories now. --- Makefile | 24 +++---- {python => integration_tests}/__init__.py | 0 .../helpers.py | 2 +- integration_tests/pyproject.toml | 71 +++++++++++++++++++ .../runner.py | 0 .../test_consumer_rebalancing.py | 1 + .../test_failed_tasks.py | 11 +-- .../test_task_worker_processing.py | 1 + .../test_upkeep_delay.py | 3 +- .../test_upkeep_expiry.py | 11 +-- .../test_upkeep_retry.py | 1 + .../worker.py | 14 ++-- pyproject.toml | 8 +-- python/integration_tests/__init__.py | 0 14 files changed, 111 insertions(+), 36 deletions(-) rename {python => integration_tests}/__init__.py (100%) rename {python/integration_tests => integration_tests}/helpers.py (99%) create mode 100644 integration_tests/pyproject.toml rename {python/integration_tests => integration_tests}/runner.py (100%) rename {python/integration_tests => integration_tests}/test_consumer_rebalancing.py (99%) rename {python/integration_tests => integration_tests}/test_failed_tasks.py (99%) rename {python/integration_tests => integration_tests}/test_task_worker_processing.py (99%) rename {python/integration_tests => integration_tests}/test_upkeep_delay.py (99%) rename {python/integration_tests => integration_tests}/test_upkeep_expiry.py (99%) rename {python/integration_tests => integration_tests}/test_upkeep_retry.py (99%) rename {python/integration_tests => integration_tests}/worker.py (100%) delete mode 100644 python/integration_tests/__init__.py diff --git a/Makefile b/Makefile index 1bdac199..7fd7f181 100644 --- a/Makefile +++ b/Makefile @@ -46,33 +46,33 @@ reset-kafka: setup ## Reset kafka .PHONY: reset-kafka test-rebalance: build reset-kafka ## Run the rebalance integration test - python -m pytest python/integration_tests/test_consumer_rebalancing.py -s - rm -r python/integration_tests/.tests_output/test_consumer_rebalancing + python -m pytest integration_tests/test_consumer_rebalancing.py -s + rm -r integration_tests/.tests_output/test_consumer_rebalancing .PHONY: test-rebalance test-worker-processing: build reset-kafka ## Run the worker processing integration test - python -m pytest python/integration_tests/test_task_worker_processing.py -s - rm -r python/integration_tests/.tests_output/test_task_worker_processing + python -m pytest integration_tests/test_task_worker_processing.py -s + rm -r integration_tests/.tests_output/test_task_worker_processing .PHONY: test-worker-processing test-upkeep-retry: build reset-kafka ## Run the upkeep retry integration test - python -m pytest python/integration_tests/test_upkeep_retry.py -s - rm -r python/integration_tests/.tests_output/test_upkeep_retry + python -m pytest integration_tests/test_upkeep_retry.py -s + rm -r integration_tests/.tests_output/test_upkeep_retry .PHONY: test-upkeep-retry test-upkeep-expiry: build reset-kafka ## Run the upkeep expiry integration test - python -m pytest python/integration_tests/test_upkeep_expiry.py -s - rm -r python/integration_tests/.tests_output/test_upkeep_expiry + python -m pytest integration_tests/test_upkeep_expiry.py -s + rm -r integration_tests/.tests_output/test_upkeep_expiry .PHONY: test-upkeep-expiry test-upkeep-delay: build reset-kafka ## Run the upkeep delay integration test - python -m pytest python/integration_tests/test_upkeep_delay.py -s - rm -r python/integration_tests/.tests_output/test_upkeep_delay + python -m pytest integration_tests/test_upkeep_delay.py -s + rm -r integration_tests/.tests_output/test_upkeep_delay .PHONY: test-upkeep-delay test-failed-tasks: build reset-kafka ## Run the failed tasks integration test - python -m pytest python/integration_tests/test_failed_tasks.py -s - rm -r python/integration_tests/.tests_output/test_failed_tasks + python -m pytest integration_tests/test_failed_tasks.py -s + rm -r integration_tests/.tests_output/test_failed_tasks .PHONY: test-failed-tasks integration-test: test-rebalance test-worker-processing test-upkeep-retry test-upkeep-expiry test-upkeep-delay test-failed-tasks ## Run all integration tests diff --git a/python/__init__.py b/integration_tests/__init__.py similarity index 100% rename from python/__init__.py rename to integration_tests/__init__.py diff --git a/python/integration_tests/helpers.py b/integration_tests/helpers.py similarity index 99% rename from python/integration_tests/helpers.py rename to integration_tests/helpers.py index e371223a..cf1a1ba3 100644 --- a/python/integration_tests/helpers.py +++ b/integration_tests/helpers.py @@ -15,7 +15,7 @@ TaskActivation, ) -TASKBROKER_ROOT = Path(__file__).parent.parent.parent +TASKBROKER_ROOT = Path(__file__).parent.parent TASKBROKER_BIN = TASKBROKER_ROOT / "target/debug/taskbroker" TESTS_OUTPUT_ROOT = Path(__file__).parent / ".tests_output" TEST_PRODUCER_CONFIG = { diff --git a/integration_tests/pyproject.toml b/integration_tests/pyproject.toml new file mode 100644 index 00000000..7ff79914 --- /dev/null +++ b/integration_tests/pyproject.toml @@ -0,0 +1,71 @@ +[project] +name = "taskbroker-integration-tests" +# we only have this here to make uv happy +# we use uv for dependency management, not packaging +version = "0.0.0" + +[dependency-groups] +dev = [ + "black==24.10.0", + "devservices>=1.2.1", + "pre-commit>=4.2.0", + "pytest>=8.3.3", + "sentry-devenv>=1.22.2", + "confluent_kafka>=2.3.0", + "grpcio==1.66.1", + "orjson>=3.10.10", + "protobuf>=5.28.3", + "pyyaml>=6.0.2", + "sentry-protos>=0.2.0", + "flake8>=7.3.0", + "isort>=5.13.2", + "mypy>=1.17.1", + "types-pyyaml>=6.0.12.20241230", + "types-protobuf>=6.30.2.20250703", +] + +[tool.uv] +environments = ["sys_platform == 'darwin' or sys_platform == 'linux'"] + +[[tool.uv.index]] +url = "https://pypi.devinfra.sentry.io/simple" +default = true + +[tool.pytest.ini_options] +pythonpath = ["python"] +testpaths = ["."] +python_files = ["test_*.py"] +python_functions = ["test_*"] + +[tool.mypy] +mypy_path = "python" +explicit_package_bases = true +# minimal strictness settings +check_untyped_defs = true +no_implicit_reexport = true +warn_unreachable = true +warn_unused_configs = true +warn_unused_ignores = true +warn_redundant_casts = true +enable_error_code = ["ignore-without-code", "redundant-self"] +local_partial_types = true # compat with dmypy +disallow_any_generics = true +disallow_untyped_defs = true + +# begin: missing 3rd party stubs +[[tool.mypy.overrides]] +module = [ + "confluent_kafka.*", +] +ignore_missing_imports = true +# end: missing 3rd party stubs + +[tool.black] +# File filtering is taken care of in pre-commit. +line-length = 100 +target-version = ['py311'] + +[tool.isort] +profile = "black" +line_length = 100 +lines_between_sections = 1 diff --git a/python/integration_tests/runner.py b/integration_tests/runner.py similarity index 100% rename from python/integration_tests/runner.py rename to integration_tests/runner.py diff --git a/python/integration_tests/test_consumer_rebalancing.py b/integration_tests/test_consumer_rebalancing.py similarity index 99% rename from python/integration_tests/test_consumer_rebalancing.py rename to integration_tests/test_consumer_rebalancing.py index 8d2dd342..f4380d1e 100644 --- a/python/integration_tests/test_consumer_rebalancing.py +++ b/integration_tests/test_consumer_rebalancing.py @@ -7,6 +7,7 @@ from threading import Thread import yaml + from integration_tests.helpers import ( TASKBROKER_BIN, TESTS_OUTPUT_ROOT, diff --git a/python/integration_tests/test_failed_tasks.py b/integration_tests/test_failed_tasks.py similarity index 99% rename from python/integration_tests/test_failed_tasks.py rename to integration_tests/test_failed_tasks.py index 4a914ae2..e2b481e0 100644 --- a/python/integration_tests/test_failed_tasks.py +++ b/integration_tests/test_failed_tasks.py @@ -8,6 +8,12 @@ import pytest import yaml from google.protobuf.timestamp_pb2 import Timestamp +from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( + OnAttemptsExceeded, + RetryState, + TaskActivation, +) + from integration_tests.helpers import ( TASKBROKER_BIN, TESTS_OUTPUT_ROOT, @@ -18,11 +24,6 @@ send_custom_messages_to_topic, ) from integration_tests.worker import ConfigurableTaskWorker, TaskWorkerClient -from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( - OnAttemptsExceeded, - RetryState, - TaskActivation, -) TEST_OUTPUT_PATH = TESTS_OUTPUT_ROOT / "test_failed_tasks" diff --git a/python/integration_tests/test_task_worker_processing.py b/integration_tests/test_task_worker_processing.py similarity index 99% rename from python/integration_tests/test_task_worker_processing.py rename to integration_tests/test_task_worker_processing.py index 0bc35a25..b8a9f4ca 100644 --- a/python/integration_tests/test_task_worker_processing.py +++ b/integration_tests/test_task_worker_processing.py @@ -6,6 +6,7 @@ import pytest import yaml + from integration_tests.helpers import ( TASKBROKER_BIN, TESTS_OUTPUT_ROOT, diff --git a/python/integration_tests/test_upkeep_delay.py b/integration_tests/test_upkeep_delay.py similarity index 99% rename from python/integration_tests/test_upkeep_delay.py rename to integration_tests/test_upkeep_delay.py index 9d30c620..e1eea1e1 100644 --- a/python/integration_tests/test_upkeep_delay.py +++ b/integration_tests/test_upkeep_delay.py @@ -9,6 +9,8 @@ import orjson import yaml from google.protobuf.timestamp_pb2 import Timestamp +from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation + from integration_tests.helpers import ( TASKBROKER_BIN, TESTS_OUTPUT_ROOT, @@ -18,7 +20,6 @@ get_num_tasks_in_sqlite, send_custom_messages_to_topic, ) -from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation TEST_OUTPUT_PATH = TESTS_OUTPUT_ROOT / "test_upkeep_delay" diff --git a/python/integration_tests/test_upkeep_expiry.py b/integration_tests/test_upkeep_expiry.py similarity index 99% rename from python/integration_tests/test_upkeep_expiry.py rename to integration_tests/test_upkeep_expiry.py index 16a1ff37..73ea7195 100644 --- a/python/integration_tests/test_upkeep_expiry.py +++ b/integration_tests/test_upkeep_expiry.py @@ -7,6 +7,12 @@ import orjson import yaml from google.protobuf.timestamp_pb2 import Timestamp +from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( + OnAttemptsExceeded, + RetryState, + TaskActivation, +) + from integration_tests.helpers import ( TASKBROKER_BIN, TESTS_OUTPUT_ROOT, @@ -15,11 +21,6 @@ get_num_tasks_in_sqlite, send_custom_messages_to_topic, ) -from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( - OnAttemptsExceeded, - RetryState, - TaskActivation, -) TEST_OUTPUT_PATH = TESTS_OUTPUT_ROOT / "test_upkeep_expiry" diff --git a/python/integration_tests/test_upkeep_retry.py b/integration_tests/test_upkeep_retry.py similarity index 99% rename from python/integration_tests/test_upkeep_retry.py rename to integration_tests/test_upkeep_retry.py index b758a8b4..4f3892d7 100644 --- a/python/integration_tests/test_upkeep_retry.py +++ b/integration_tests/test_upkeep_retry.py @@ -6,6 +6,7 @@ import pytest import yaml + from integration_tests.helpers import ( TASKBROKER_BIN, TESTS_OUTPUT_ROOT, diff --git a/python/integration_tests/worker.py b/integration_tests/worker.py similarity index 100% rename from python/integration_tests/worker.py rename to integration_tests/worker.py index e42115fc..63125c5f 100644 --- a/python/integration_tests/worker.py +++ b/integration_tests/worker.py @@ -1,17 +1,17 @@ -import grpc -import time -import random import logging +import random +import time +import grpc from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( - TaskActivation, + TASK_ACTIVATION_STATUS_COMPLETE, + TASK_ACTIVATION_STATUS_FAILURE, + TASK_ACTIVATION_STATUS_RETRY, FetchNextTask, GetTaskRequest, SetTaskStatusRequest, + TaskActivation, TaskActivationStatus, - TASK_ACTIVATION_STATUS_COMPLETE, - TASK_ACTIVATION_STATUS_FAILURE, - TASK_ACTIVATION_STATUS_RETRY, ) from sentry_protos.taskbroker.v1.taskbroker_pb2_grpc import ConsumerServiceStub diff --git a/pyproject.toml b/pyproject.toml index 4a6985d7..809e69c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,6 +3,7 @@ name = "taskbroker" # we only have this here to make uv happy # we use uv for dependency management, not packaging version = "0.0.0" +requires-python = ">=3.11" [dependency-groups] dev = [ @@ -31,11 +32,8 @@ environments = ["sys_platform == 'darwin' or sys_platform == 'linux'"] url = "https://pypi.devinfra.sentry.io/simple" default = true -[tool.pytest.ini_options] -pythonpath = ["python"] -testpaths = ["python/integration_tests"] -python_files = ["test_*.py"] -python_functions = ["test_*"] +[tool.uv.workspace] +members = ["integration_tests", "clients/python"] [tool.mypy] mypy_path = "python" diff --git a/python/integration_tests/__init__.py b/python/integration_tests/__init__.py deleted file mode 100644 index e69de29b..00000000 From d1cca672155bde2c231ac4ab29f837941aea7c2e Mon Sep 17 00:00:00 2001 From: Mark Story Date: Sat, 29 Nov 2025 14:06:42 -0500 Subject: [PATCH 15/19] Update imports --- clients/python/pyproject.toml | 1 - clients/python/tests/worker/test_worker.py | 20 +++++++++++--------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml index 3e2325a8..42ffb3a5 100644 --- a/clients/python/pyproject.toml +++ b/clients/python/pyproject.toml @@ -3,7 +3,6 @@ name = "taskbroker-client" version = "0.1.0" description = "Taskbroker python client and worker runtime" readme = "README.md" -requires-python = ">=3.12.11" dependencies = [ "sentry-arroyo>=2.33.1", "sentry-sdk[http2]>=2.43.0", diff --git a/clients/python/tests/worker/test_worker.py b/clients/python/tests/worker/test_worker.py index ab2d1f24..a5f35d77 100644 --- a/clients/python/tests/worker/test_worker.py +++ b/clients/python/tests/worker/test_worker.py @@ -1,20 +1,15 @@ import base64 import queue import time -from redis import StrictRedis from multiprocessing import Event -from unittest import mock, TestCase +from unittest import TestCase, mock import grpc import orjson import pytest import zstandard as zstd -from taskbroker_client.types import InflightTaskActivation, ProcessingResult -from taskbroker_client.constants import CompressionType -from taskbroker_client.retry import NoRetriesRemainingError -from taskbroker_client.state import current_task -from taskbroker_client.worker.worker import TaskWorker -from taskbroker_client.worker.workerchild import ProcessingDeadlineExceeded, child_process +from redis import StrictRedis + # from sentry.utils.redis import redis_clusters from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( ON_ATTEMPTS_EXCEEDED_DISCARD, @@ -25,7 +20,14 @@ TaskActivation, ) from sentry_sdk.crons import MonitorStatus -from ..example_app import exampletasks + +from examples.example_app import exampletasks +from taskbroker_client.constants import CompressionType +from taskbroker_client.retry import NoRetriesRemainingError +from taskbroker_client.state import current_task +from taskbroker_client.types import InflightTaskActivation, ProcessingResult +from taskbroker_client.worker.worker import TaskWorker +from taskbroker_client.worker.workerchild import ProcessingDeadlineExceeded, child_process SIMPLE_TASK = InflightTaskActivation( host="localhost:50051", From 3c6ac7282c66ab05bf9c337df23386901fc42db0 Mon Sep 17 00:00:00 2001 From: Mark Story Date: Sat, 29 Nov 2025 14:24:07 -0500 Subject: [PATCH 16/19] Update uv.lock --- uv.lock | 331 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 328 insertions(+), 3 deletions(-) diff --git a/uv.lock b/uv.lock index 7c2fd104..a1048f7f 100644 --- a/uv.lock +++ b/uv.lock @@ -1,6 +1,6 @@ version = 1 revision = 2 -requires-python = ">=3.12" +requires-python = ">=3.11" resolution-markers = [ "sys_platform == 'darwin' or sys_platform == 'linux'", ] @@ -8,6 +8,13 @@ supported-markers = [ "sys_platform == 'darwin' or sys_platform == 'linux'", ] +[manifest] +members = [ + "taskbroker", + "taskbroker-client", + "taskbroker-integration-tests", +] + [[package]] name = "black" version = "24.10.0" @@ -20,6 +27,9 @@ dependencies = [ { name = "platformdirs", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, ] wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/black-24.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5a2221696a8224e335c28816a9d331a6c2ae15a2ee34ec857dcf3e45dbfa99ad" }, + { url = "https://pypi.devinfra.sentry.io/wheels/black-24.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f9da3333530dbcecc1be13e69c250ed8dfa67f43c4005fb537bb426e19200d50" }, + { url = "https://pypi.devinfra.sentry.io/wheels/black-24.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4007b1393d902b48b36958a216c20c4482f601569d19ed1df294a496eb366392" }, { url = "https://pypi.devinfra.sentry.io/wheels/black-24.10.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b5e39e0fae001df40f95bd8cc36b9165c5e2ea88900167bddf258bacef9bbdc3" }, { url = "https://pypi.devinfra.sentry.io/wheels/black-24.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d37d422772111794b26757c5b55a3eade028aa3fde43121ab7b673d050949d65" }, { url = "https://pypi.devinfra.sentry.io/wheels/black-24.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14b3502784f09ce2443830e3133dacf2c0110d45191ed470ecb04d0f5f6fcb0f" }, @@ -34,6 +44,28 @@ wheels = [ { url = "https://pypi.devinfra.sentry.io/wheels/certifi-2025.7.14-py3-none-any.whl", hash = "sha256:6b31f564a415d79ee77df69d757bb49a5bb53bd9f756cbbe24394ffd6fc1f4b2" }, ] +[[package]] +name = "cffi" +version = "2.0.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "pycparser", marker = "(implementation_name != 'PyPy' and sys_platform == 'darwin') or (implementation_name != 'PyPy' and sys_platform == 'linux')" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:b4c854ef3adc177950a8dfc81a86f5115d2abd545751a304c5bcf2c2c7283cfe" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2de9a304e27f7596cd03d16f1b7c72219bd944e99cc52b84d0145aefb07cbd3c" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:730cacb21e1bdff3ce90babf007d0a0917cc3e6492f336c2f0134101e0944f93" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8941aaadaf67246224cee8c3803777eed332a19d909b47e29c9842ef1e79ac26" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26" }, +] + [[package]] name = "cfgv" version = "3.4.0" @@ -55,6 +87,10 @@ name = "confluent-kafka" version = "2.8.0" source = { registry = "https://pypi.devinfra.sentry.io/simple" } wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5de7ab587ecdc153a029d992e7d470fc68ab943e38931b18fc4a01074afd5c5c" }, + { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:52a87d1a73ad91d4f81e35a8e6e961a5ad0c49ecdb198e47bd106262e968253e" }, + { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.8.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:f03b12d009cfb16649b0e51c06514312d5cbbbe9b06e71cf4ad781b378f8b79f" }, + { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.8.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:1a01feeac7f27bff079ad1a29f1cf1b149235a975d67d7de20c1935f44b14293" }, { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.8.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:80bf43c098df04008dd6a517a9f745b67885af9c35c09d220f4d19661ae4d647" }, { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3f5e5b18c7acf50777545e817e563b0fa9c74badbabf30474665c03ae8ddcc23" }, { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.8.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:c540935d89acf1bc173fddd0b9b978ece348345f5a0fccf549ea8663cfa5152c" }, @@ -65,6 +101,14 @@ wheels = [ { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.8.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e75230b51456de5cfaefe94c35f3de5101864d8c21518f114d5cd9dd1d7d43b1" }, ] +[[package]] +name = "cronsim" +version = "2.6" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/cronsim-2.6-py3-none-any.whl", hash = "sha256:a3a823ea834c29100a17ab1d4af6179c3149612d28d8e0dec8044057570246be" }, +] + [[package]] name = "devservices" version = "1.2.1" @@ -125,11 +169,67 @@ name = "grpcio" version = "1.66.1" source = { registry = "https://pypi.devinfra.sentry.io/simple" } wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/grpcio-1.66.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8a1e224ce6f740dbb6b24c58f885422deebd7eb724aff0671a847f8951857c26" }, + { url = "https://pypi.devinfra.sentry.io/wheels/grpcio-1.66.1-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:a66fe4dc35d2330c185cfbb42959f57ad36f257e0cc4557d11d9f0a3f14311df" }, + { url = "https://pypi.devinfra.sentry.io/wheels/grpcio-1.66.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4573608e23f7e091acfbe3e84ac2045680b69751d8d67685ffa193a4429fedb1" }, { url = "https://pypi.devinfra.sentry.io/wheels/grpcio-1.66.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:fdb14bad0835914f325349ed34a51940bc2ad965142eb3090081593c6e347be9" }, { url = "https://pypi.devinfra.sentry.io/wheels/grpcio-1.66.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:f03a5884c56256e08fd9e262e11b5cfacf1af96e2ce78dc095d2c41ccae2c80d" }, { url = "https://pypi.devinfra.sentry.io/wheels/grpcio-1.66.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84ca1be089fb4446490dd1135828bd42a7c7f8421e74fa581611f7afdf7ab761" }, ] +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86" }, +] + +[[package]] +name = "h2" +version = "4.2.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "hpack", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "hyperframe", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/h2-4.2.0-py3-none-any.whl", hash = "sha256:479a53ad425bb29af087f3458a61d30780bc818e4ebcf01f0b536ba916462ed0" }, +] + +[[package]] +name = "hpack" +version = "4.1.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496" }, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "certifi", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "h11", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55" }, +] + +[package.optional-dependencies] +http2 = [ + { name = "h2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] + +[[package]] +name = "hyperframe" +version = "6.1.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5" }, +] + [[package]] name = "identify" version = "2.6.9" @@ -172,6 +272,10 @@ dependencies = [ { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, ] wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ad37544be07c5d7fba814eb370e006df58fed8ad1ef33ed1649cb1889ba6ff58" }, + { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:064e2ff508e5464b4bd807a7c1625bc5047c5022b85c70f030680e18f37273a5" }, + { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.17.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:70401bbabd2fa1aa7c43bb358f54037baf0586f41e83b0ae67dd0534fc64edfd" }, + { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.17.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e92bdc656b7757c438660f775f872a669b8ff374edc4d18277d86b63edba6b8b" }, { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.17.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:69e83ea6553a3ba79c08c6e15dbd9bfa912ec1e493bf75489ef93beb65209aeb" }, { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1b16708a66d38abb1e6b5702f5c2c87e133289da36f6a1d15f6a5221085c6403" }, { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.17.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:89e972c0035e9e05823907ad5398c5a73b9f47a002b22359b177d40bdaee7056" }, @@ -203,6 +307,9 @@ name = "orjson" version = "3.10.10" source = { registry = "https://pypi.devinfra.sentry.io/simple" } wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/orjson-3.10.10-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:879e99486c0fbb256266c7c6a67ff84f46035e4f8749ac6317cc83dacd7f993a" }, + { url = "https://pypi.devinfra.sentry.io/wheels/orjson-3.10.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:019481fa9ea5ff13b5d5d95e6fd5ab25ded0810c80b150c2c7b1cc8660b662a7" }, + { url = "https://pypi.devinfra.sentry.io/wheels/orjson-3.10.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dbf3c20c6a7db69df58672a0d5815647ecf78c8e62a4d9bd284e8621c1fe5ccb" }, { url = "https://pypi.devinfra.sentry.io/wheels/orjson-3.10.10-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:8564f48f3620861f5ef1e080ce7cd122ee89d7d6dacf25fcae675ff63b4d6e05" }, { url = "https://pypi.devinfra.sentry.io/wheels/orjson-3.10.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5bf161a32b479034098c5b81f2608f09167ad2fa1c06abd4e527ea6bf4837a9" }, { url = "https://pypi.devinfra.sentry.io/wheels/orjson-3.10.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3be81c42f1242cbed03cbb3973501fcaa2675a0af638f8be494eaf37143d999" }, @@ -276,6 +383,14 @@ wheels = [ { url = "https://pypi.devinfra.sentry.io/wheels/pycodestyle-2.14.0-py2.py3-none-any.whl", hash = "sha256:dd6bf7cb4ee77f8e016f9c8e74a35ddd9f67e1d5fd4184d86c3b98e07099f42d" }, ] +[[package]] +name = "pycparser" +version = "2.23" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934" }, +] + [[package]] name = "pyflakes" version = "3.4.0" @@ -297,11 +412,26 @@ wheels = [ { url = "https://pypi.devinfra.sentry.io/wheels/pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2" }, ] +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "six", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427" }, +] + [[package]] name = "pyyaml" version = "6.0.2" source = { registry = "https://pypi.devinfra.sentry.io/simple" } wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774" }, + { url = "https://pypi.devinfra.sentry.io/wheels/PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee" }, + { url = "https://pypi.devinfra.sentry.io/wheels/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c" }, + { url = "https://pypi.devinfra.sentry.io/wheels/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85" }, { url = "https://pypi.devinfra.sentry.io/wheels/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab" }, { url = "https://pypi.devinfra.sentry.io/wheels/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725" }, { url = "https://pypi.devinfra.sentry.io/wheels/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5" }, @@ -312,6 +442,36 @@ wheels = [ { url = "https://pypi.devinfra.sentry.io/wheels/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5" }, ] +[[package]] +name = "redis" +version = "3.5.3" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/redis-3.5.3-py2.py3-none-any.whl", hash = "sha256:432b788c4530cfe16d8d943a09d40ca6c16149727e4afe8c2c9d5580c59d9f24" }, +] + +[[package]] +name = "redis-py-cluster" +version = "2.1.3" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "redis", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/redis_py_cluster-2.1.3-py2.py3-none-any.whl", hash = "sha256:38f08850fde469ffd76bced7309721114acc487e52b76f374a0502c34c69b4ec" }, +] + +[[package]] +name = "sentry-arroyo" +version = "2.33.1" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "confluent-kafka", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/sentry_arroyo-2.33.1-py3-none-any.whl", hash = "sha256:10d05f81a06bd7f9ee28fe7d7a628c868c3ccbdb5987bece6d9860930e1654af" }, +] + [[package]] name = "sentry-devenv" version = "1.22.2" @@ -339,14 +499,19 @@ wheels = [ [[package]] name = "sentry-sdk" -version = "2.35.1" +version = "2.46.0" source = { registry = "https://pypi.devinfra.sentry.io/simple" } dependencies = [ { name = "certifi", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "urllib3", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, ] wheels = [ - { url = "https://pypi.devinfra.sentry.io/wheels/sentry_sdk-2.35.1-py2.py3-none-any.whl", hash = "sha256:13b6d6cfdae65d61fe1396a061cf9113b20f0ec1bcb257f3826b88f01bb55720" }, + { url = "https://pypi.devinfra.sentry.io/wheels/sentry_sdk-2.46.0-py2.py3-none-any.whl", hash = "sha256:4eeeb60198074dff8d066ea153fa6f241fef1668c10900ea53a4200abc8da9b1" }, +] + +[package.optional-dependencies] +http2 = [ + { name = "httpcore", extra = ["http2"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, ] [[package]] @@ -357,6 +522,14 @@ wheels = [ { url = "https://pypi.devinfra.sentry.io/wheels/setuptools-78.1.1-py3-none-any.whl", hash = "sha256:c3a9c4211ff4c309edb8b8c4f1cbfa7ae324c4ba9f91ff254e3d305b9fd54561" }, ] +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274" }, +] + [[package]] name = "supervisor" version = "4.2.5" @@ -415,6 +588,136 @@ dev = [ { name = "types-pyyaml", specifier = ">=6.0.12.20241230" }, ] +[[package]] +name = "taskbroker-client" +version = "0.1.0" +source = { editable = "clients/python" } +dependencies = [ + { name = "confluent-kafka", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "cronsim", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "grpcio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "orjson", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "redis", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "redis-py-cluster", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-arroyo", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-protos", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-sdk", extra = ["http2"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "types-protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "zstandard", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] + +[package.dev-dependencies] +dev = [ + { name = "black", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "devservices", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "flake8", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "isort", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "mypy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pre-commit", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-devenv", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "time-machine", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] + +[package.metadata] +requires-dist = [ + { name = "confluent-kafka", specifier = ">=2.3.0" }, + { name = "cronsim", specifier = ">=2.6" }, + { name = "grpcio", specifier = "==1.66.1" }, + { name = "orjson", specifier = ">=3.10.10" }, + { name = "protobuf", specifier = ">=5.28.3" }, + { name = "redis", specifier = ">=3.4.1" }, + { name = "redis-py-cluster", specifier = ">=2.1.0" }, + { name = "sentry-arroyo", specifier = ">=2.33.1" }, + { name = "sentry-protos", specifier = ">=0.2.0" }, + { name = "sentry-sdk", extras = ["http2"], specifier = ">=2.43.0" }, + { name = "types-protobuf", specifier = ">=6.30.2.20250703" }, + { name = "zstandard", specifier = ">=0.18.0" }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "black", specifier = "==24.10.0" }, + { name = "devservices", specifier = ">=1.2.1" }, + { name = "flake8", specifier = ">=7.3.0" }, + { name = "isort", specifier = ">=5.13.2" }, + { name = "mypy", specifier = ">=1.17.1" }, + { name = "pre-commit", specifier = ">=4.2.0" }, + { name = "pytest", specifier = ">=8.3.3" }, + { name = "sentry-devenv", specifier = ">=1.22.2" }, + { name = "time-machine", specifier = ">=2.16.0" }, +] + +[[package]] +name = "taskbroker-integration-tests" +version = "0.0.0" +source = { virtual = "integration_tests" } + +[package.dev-dependencies] +dev = [ + { name = "black", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "confluent-kafka", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "devservices", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "flake8", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "grpcio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "isort", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "mypy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "orjson", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pre-commit", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-devenv", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-protos", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "types-protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "types-pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] + +[package.metadata] + +[package.metadata.requires-dev] +dev = [ + { name = "black", specifier = "==24.10.0" }, + { name = "confluent-kafka", specifier = ">=2.3.0" }, + { name = "devservices", specifier = ">=1.2.1" }, + { name = "flake8", specifier = ">=7.3.0" }, + { name = "grpcio", specifier = "==1.66.1" }, + { name = "isort", specifier = ">=5.13.2" }, + { name = "mypy", specifier = ">=1.17.1" }, + { name = "orjson", specifier = ">=3.10.10" }, + { name = "pre-commit", specifier = ">=4.2.0" }, + { name = "protobuf", specifier = ">=5.28.3" }, + { name = "pytest", specifier = ">=8.3.3" }, + { name = "pyyaml", specifier = ">=6.0.2" }, + { name = "sentry-devenv", specifier = ">=1.22.2" }, + { name = "sentry-protos", specifier = ">=0.2.0" }, + { name = "types-protobuf", specifier = ">=6.30.2.20250703" }, + { name = "types-pyyaml", specifier = ">=6.0.12.20241230" }, +] + +[[package]] +name = "time-machine" +version = "2.16.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "python-dateutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/time_machine-2.16.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8f936566ef9f09136a3d5db305961ef6d897b76b240c9ff4199144aed6dd4fe5" }, + { url = "https://pypi.devinfra.sentry.io/wheels/time_machine-2.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5886e23ede3478ca2a3e0a641f5d09dd784dfa9e48c96e8e5e31fc4fe77b6dc0" }, + { url = "https://pypi.devinfra.sentry.io/wheels/time_machine-2.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c76caf539fa4941e1817b7c482c87c65c52a1903fea761e84525955c6106fafb" }, + { url = "https://pypi.devinfra.sentry.io/wheels/time_machine-2.16.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3391ae9c484736850bb44ef125cbad52fe2d1b69e42c95dc88c43af8ead2cc7" }, + { url = "https://pypi.devinfra.sentry.io/wheels/time_machine-2.16.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:84788f4d62a8b1bf5e499bb9b0e23ceceea21c415ad6030be6267ce3d639842f" }, + { url = "https://pypi.devinfra.sentry.io/wheels/time_machine-2.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:15ec236b6571730236a193d9d6c11d472432fc6ab54e85eac1c16d98ddcd71bf" }, + { url = "https://pypi.devinfra.sentry.io/wheels/time_machine-2.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cedc989717c8b44a3881ac3d68ab5a95820448796c550de6a2149ed1525157f0" }, + { url = "https://pypi.devinfra.sentry.io/wheels/time_machine-2.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:317b68b56a9c3731e0cf8886e0f94230727159e375988b36c60edce0ddbcb44a" }, + { url = "https://pypi.devinfra.sentry.io/wheels/time_machine-2.16.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:7751bf745d54e9e8b358c0afa332815da9b8a6194b26d0fd62876ab6c4d5c9c0" }, + { url = "https://pypi.devinfra.sentry.io/wheels/time_machine-2.16.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1784edf173ca840ba154de6eed000b5727f65ab92972c2f88cec5c4d6349c5f2" }, + { url = "https://pypi.devinfra.sentry.io/wheels/time_machine-2.16.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f5876a5682ce1f517e55d7ace2383432627889f6f7e338b961f99d684fd9e8d" }, + { url = "https://pypi.devinfra.sentry.io/wheels/time_machine-2.16.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:667b150fedb54acdca2a4bea5bf6da837b43e6dd12857301b48191f8803ba93f" }, +] + [[package]] name = "types-protobuf" version = "6.30.2.20250703" @@ -459,3 +762,25 @@ dependencies = [ wheels = [ { url = "https://pypi.devinfra.sentry.io/wheels/virtualenv-20.29.3-py3-none-any.whl", hash = "sha256:3e3d00f5807e83b234dfb6122bf37cfadf4be216c53a49ac059d02414f819170" }, ] + +[[package]] +name = "zstandard" +version = "0.18.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "cffi", marker = "(platform_python_implementation == 'PyPy' and sys_platform == 'darwin') or (platform_python_implementation == 'PyPy' and sys_platform == 'linux')" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:f9d32f509b84b7158d46ba673f1c5123a80062652517e9e56240ded7df3d744e" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:ed3937c3d703c7f74f341fb530c9523b012923897e7979565ac0f3cb4f808d98" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:05552e29b1b580543cc22ae7ca9fb833e136a1843ef660a96679d246e666bbeb" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dbe2bd4cab395a157c61f059f60ec4e099ef207cd970d66f0ba184f9c2e25d37" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:2eab9516bc4352fc9763d96047c815879f3efb1dfb5dfe2f775b2e22c0289cb6" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e1f98ffd138d172efd202cd078e746af80492c6942004b080bf627c5f826da5" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:07a72264613c75fe6eb64f07ab553d3cfab7a421c8733e067a8718ef69c642a7" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee14cc6b8b40733a707b467ddc192592cab941babf82b3e6f700673e050b4bda" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:032ddaf24458986a31ff49d2fa86a4003e1e1c34c38976bedd06805350eaeddc" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d68ff7c3a4c35400d807efbfa793767c2d4866a7017770b424e65749a70e958e" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e1ef5b96f0e90855ea13d06b7213a75a77a23946d8bb186ff38578dd1ff5efd4" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:754256fb4080a36f8992983b2f65f23719d275c9a350bcf18d76344ed64efa19" }, +] From 8e9cb4ae4238f046ba52284d3464ef8cc35019b8 Mon Sep 17 00:00:00 2001 From: Mark Story Date: Sat, 29 Nov 2025 14:26:43 -0500 Subject: [PATCH 17/19] Fix formatting --- clients/python/src/examples/example_app.py | 6 ++++-- clients/python/src/examples/store.py | 1 + clients/python/src/taskbroker_client/metrics.py | 2 +- clients/python/tests/conftest.py | 3 ++- clients/python/tests/test_app.py | 9 +++------ clients/python/tests/test_registry.py | 14 ++++++++------ clients/python/tests/worker/test_worker.py | 2 -- 7 files changed, 19 insertions(+), 18 deletions(-) diff --git a/clients/python/src/examples/example_app.py b/clients/python/src/examples/example_app.py index 28e6c9f6..14c43ef7 100644 --- a/clients/python/src/examples/example_app.py +++ b/clients/python/src/examples/example_app.py @@ -7,16 +7,18 @@ import logging from time import sleep from typing import Any -from redis import StrictRedis + from arroyo.backends.kafka import KafkaProducer +from redis import StrictRedis +from examples.store import StubAtMostOnce from taskbroker_client.app import TaskbrokerApp from taskbroker_client.retry import LastAction, NoRetriesRemainingError, Retry, RetryTaskError from taskbroker_client.retry import retry_task as retry_task_helper -from examples.store import StubAtMostOnce logger = logging.getLogger(__name__) + def producer_factory(topic: str) -> KafkaProducer: # TODO use env vars for kafka host/port config = { diff --git a/clients/python/src/examples/store.py b/clients/python/src/examples/store.py index ee0c48dc..3e7fc996 100644 --- a/clients/python/src/examples/store.py +++ b/clients/python/src/examples/store.py @@ -1,5 +1,6 @@ from taskbroker_client.types import AtMostOnceStore + class StubAtMostOnce(AtMostOnceStore): def __init__(self) -> None: self._keys: dict[str, str] = {} diff --git a/clients/python/src/taskbroker_client/metrics.py b/clients/python/src/taskbroker_client/metrics.py index 93246d01..f48fdf27 100644 --- a/clients/python/src/taskbroker_client/metrics.py +++ b/clients/python/src/taskbroker_client/metrics.py @@ -107,7 +107,7 @@ def track_memory_usage( tags: Tags | None = None, ) -> Generator[None]: """ - Records a distrubtion metric that tracks the delta + Records a distrubtion metric that tracks the delta of rss_usage between the context manager opening and closing. """ yield None diff --git a/clients/python/tests/conftest.py b/clients/python/tests/conftest.py index 91c6308d..e53088b7 100644 --- a/clients/python/tests/conftest.py +++ b/clients/python/tests/conftest.py @@ -1,7 +1,8 @@ from datetime import UTC, datetime -from arroyo.backends.kafka import KafkaProducer import time_machine +from arroyo.backends.kafka import KafkaProducer + def producer_factory(topic: str) -> KafkaProducer: config = { diff --git a/clients/python/tests/test_app.py b/clients/python/tests/test_app.py index 74337fdf..41a66936 100644 --- a/clients/python/tests/test_app.py +++ b/clients/python/tests/test_app.py @@ -1,8 +1,9 @@ from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation +from examples.store import StubAtMostOnce from taskbroker_client.app import TaskbrokerApp from taskbroker_client.router import TaskRouter -from examples.store import StubAtMostOnce + from .conftest import producer_factory @@ -11,12 +12,8 @@ def route_namespace(self, name: str) -> str: return "honk" - def test_taskregistry_router_object() -> None: - app = TaskbrokerApp( - producer_factory=producer_factory, - router_class=StubRouter() - ) + app = TaskbrokerApp(producer_factory=producer_factory, router_class=StubRouter()) ns = app.taskregistry.create_namespace("test") assert ns.topic == "honk" diff --git a/clients/python/tests/test_registry.py b/clients/python/tests/test_registry.py index fb676f27..0b6c1d8b 100644 --- a/clients/python/tests/test_registry.py +++ b/clients/python/tests/test_registry.py @@ -1,22 +1,24 @@ import base64 from concurrent.futures import Future -from unittest.mock import Mock, patch +from unittest.mock import Mock import orjson import pytest import zstandard as zstd +from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( + ON_ATTEMPTS_EXCEEDED_DEADLETTER, + ON_ATTEMPTS_EXCEEDED_DISCARD, +) + # from django.test.utils import override_settings # from sentry.conf.types.kafka_definition import Topic from taskbroker_client.constants import MAX_PARAMETER_BYTES_BEFORE_COMPRESSION, CompressionType +from taskbroker_client.metrics import NoOpMetricsBackend from taskbroker_client.registry import TaskNamespace, TaskRegistry from taskbroker_client.retry import LastAction, Retry from taskbroker_client.router import DefaultRouter -from taskbroker_client.metrics import NoOpMetricsBackend from taskbroker_client.task import Task -from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( - ON_ATTEMPTS_EXCEEDED_DEADLETTER, - ON_ATTEMPTS_EXCEEDED_DISCARD, -) + from .conftest import producer_factory diff --git a/clients/python/tests/worker/test_worker.py b/clients/python/tests/worker/test_worker.py index a5f35d77..2d6397f9 100644 --- a/clients/python/tests/worker/test_worker.py +++ b/clients/python/tests/worker/test_worker.py @@ -6,7 +6,6 @@ import grpc import orjson -import pytest import zstandard as zstd from redis import StrictRedis @@ -21,7 +20,6 @@ ) from sentry_sdk.crons import MonitorStatus -from examples.example_app import exampletasks from taskbroker_client.constants import CompressionType from taskbroker_client.retry import NoRetriesRemainingError from taskbroker_client.state import current_task From 17698f3b3c3f5d3033f5ed9738a6c56695b61c0e Mon Sep 17 00:00:00 2001 From: Mark Story Date: Fri, 5 Dec 2025 17:57:55 -0500 Subject: [PATCH 18/19] Make example app more representative of real usage. --- clients/python/src/examples/app.py | 21 ++++++++++++ clients/python/src/examples/py.typed | 0 .../src/examples/{example_app.py => tasks.py} | 19 +---------- clients/python/tests/worker/test_worker.py | 34 +++++++++---------- 4 files changed, 39 insertions(+), 35 deletions(-) create mode 100644 clients/python/src/examples/app.py create mode 100644 clients/python/src/examples/py.typed rename clients/python/src/examples/{example_app.py => tasks.py} (79%) diff --git a/clients/python/src/examples/app.py b/clients/python/src/examples/app.py new file mode 100644 index 00000000..0828ca5a --- /dev/null +++ b/clients/python/src/examples/app.py @@ -0,0 +1,21 @@ +from arroyo.backends.kafka import KafkaProducer + +from examples.store import StubAtMostOnce +from taskbroker_client.app import TaskbrokerApp + + +def producer_factory(topic: str) -> KafkaProducer: + # TODO use env vars for kafka host/port + config = { + "bootstrap.servers": "127.0.0.1:9092", + "compression.type": "lz4", + "message.max.bytes": 50000000, # 50MB + } + return KafkaProducer(config) + + +app = TaskbrokerApp( + producer_factory=producer_factory, + at_most_once_store=StubAtMostOnce(), +) +app.set_modules(["examples.tasks"]) diff --git a/clients/python/src/examples/py.typed b/clients/python/src/examples/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/clients/python/src/examples/example_app.py b/clients/python/src/examples/tasks.py similarity index 79% rename from clients/python/src/examples/example_app.py rename to clients/python/src/examples/tasks.py index 14c43ef7..8e87c9e3 100644 --- a/clients/python/src/examples/example_app.py +++ b/clients/python/src/examples/tasks.py @@ -8,32 +8,15 @@ from time import sleep from typing import Any -from arroyo.backends.kafka import KafkaProducer from redis import StrictRedis -from examples.store import StubAtMostOnce -from taskbroker_client.app import TaskbrokerApp +from examples.app import app from taskbroker_client.retry import LastAction, NoRetriesRemainingError, Retry, RetryTaskError from taskbroker_client.retry import retry_task as retry_task_helper logger = logging.getLogger(__name__) -def producer_factory(topic: str) -> KafkaProducer: - # TODO use env vars for kafka host/port - config = { - "bootstrap.servers": "127.0.0.1:9092", - "compression.type": "lz4", - "message.max.bytes": 50000000, # 50MB - } - return KafkaProducer(config) - - -app = TaskbrokerApp( - producer_factory=producer_factory, - at_most_once_store=StubAtMostOnce(), -) - # Create a namespace and register tasks exampletasks = app.taskregistry.create_namespace("examples") diff --git a/clients/python/tests/worker/test_worker.py b/clients/python/tests/worker/test_worker.py index 2d6397f9..f9727bc9 100644 --- a/clients/python/tests/worker/test_worker.py +++ b/clients/python/tests/worker/test_worker.py @@ -149,7 +149,7 @@ class TestTaskWorker(TestCase): def test_fetch_task(self) -> None: taskworker = TaskWorker( - app_module="examples.example_app:app", + app_module="examples.app:app", broker_hosts=["127.0.0.1:50051"], max_child_task_count=100, process_type="fork", @@ -165,7 +165,7 @@ def test_fetch_task(self) -> None: def test_fetch_no_task(self) -> None: taskworker = TaskWorker( - app_module="examples.example_app:app", + app_module="examples.app:app", broker_hosts=["127.0.0.1:50051"], max_child_task_count=100, process_type="fork", @@ -180,7 +180,7 @@ def test_fetch_no_task(self) -> None: def test_run_once_no_next_task(self) -> None: max_runtime = 5 taskworker = TaskWorker( - app_module="examples.example_app:app", + app_module="examples.app:app", broker_hosts=["127.0.0.1:50051"], max_child_task_count=1, process_type="fork", @@ -216,7 +216,7 @@ def test_run_once_with_next_task(self) -> None: # be processed. max_runtime = 5 taskworker = TaskWorker( - app_module="examples.example_app:app", + app_module="examples.app:app", broker_hosts=["127.0.0.1:50051"], max_child_task_count=1, process_type="fork", @@ -258,7 +258,7 @@ def test_run_once_with_update_failure(self) -> None: # We should retain the result until RPC succeeds. max_runtime = 5 taskworker = TaskWorker( - app_module="examples.example_app:app", + app_module="examples.app:app", broker_hosts=["127.0.0.1:50051"], max_child_task_count=1, process_type="fork", @@ -304,7 +304,7 @@ def test_run_once_current_task_state(self) -> None: # to raise and catch a NoRetriesRemainingError max_runtime = 5 taskworker = TaskWorker( - app_module="examples.example_app:app", + app_module="examples.app:app", broker_hosts=["127.0.0.1:50051"], max_child_task_count=1, process_type="fork", @@ -356,7 +356,7 @@ def test_child_process_complete(mock_capture_checkin: mock.MagicMock) -> None: todo.put(SIMPLE_TASK) child_process( - "examples.example_app:app", + "examples.app:app", todo, processed, shutdown, @@ -390,7 +390,7 @@ def test_child_process_remove_start_time_kwargs() -> None: todo.put(activation) child_process( - "examples.example_app:app", + "examples.app:app", todo, processed, shutdown, @@ -412,7 +412,7 @@ def test_child_process_retry_task() -> None: todo.put(RETRY_TASK) child_process( - "examples.example_app:app", + "examples.app:app", todo, processed, shutdown, @@ -452,7 +452,7 @@ def test_child_process_retry_task_max_attempts(mock_capture: mock.Mock) -> None: todo.put(activation) child_process( - "examples.example_app:app", + "examples.app:app", todo, processed, shutdown, @@ -480,7 +480,7 @@ def test_child_process_failure_task() -> None: todo.put(FAIL_TASK) child_process( - "examples.example_app:app", + "examples.app:app", todo, processed, shutdown, @@ -503,7 +503,7 @@ def test_child_process_shutdown() -> None: todo.put(SIMPLE_TASK) child_process( - "examples.example_app:app", + "examples.app:app", todo, processed, shutdown, @@ -525,7 +525,7 @@ def test_child_process_unknown_task() -> None: todo.put(UNDEFINED_TASK) todo.put(SIMPLE_TASK) child_process( - "examples.example_app:app", + "examples.app:app", todo, processed, shutdown, @@ -552,7 +552,7 @@ def test_child_process_at_most_once() -> None: todo.put(AT_MOST_ONCE_TASK) todo.put(SIMPLE_TASK) child_process( - "examples.example_app:app", + "examples.app:app", todo, processed, shutdown, @@ -579,7 +579,7 @@ def test_child_process_record_checkin(mock_capture_checkin: mock.Mock) -> None: todo.put(SCHEDULED_TASK) child_process( - "examples.example_app:app", + "examples.app:app", todo, processed, shutdown, @@ -622,7 +622,7 @@ def test_child_process_terminate_task(mock_capture: mock.Mock) -> None: todo.put(sleepy) child_process( - "examples.example_app:app", + "examples.app:app", todo, processed, shutdown, @@ -648,7 +648,7 @@ def test_child_process_decompression(mock_capture_checkin: mock.MagicMock) -> No todo.put(COMPRESSED_TASK) child_process( - "examples.example_app:app", + "examples.app:app", todo, processed, shutdown, From 3770c6afd94db48dff5b4a1fd8b165f50fe0f850 Mon Sep 17 00:00:00 2001 From: Mark Story Date: Fri, 5 Dec 2025 18:16:19 -0500 Subject: [PATCH 19/19] Fix mypy for the most part Still need to sort out pre-commit --- clients/python/pyproject.toml | 6 ++- clients/python/tests/scheduler/test_runner.py | 2 +- clients/python/tests/test_task.py | 6 +-- clients/python/tests/worker/test_client.py | 42 ++++++++++++------- clients/python/tests/worker/test_worker.py | 9 ++-- 5 files changed, 41 insertions(+), 24 deletions(-) diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml index 42ffb3a5..84f07d94 100644 --- a/clients/python/pyproject.toml +++ b/clients/python/pyproject.toml @@ -49,7 +49,8 @@ python_files = ["test_*.py"] python_functions = ["test_*"] [tool.mypy] -mypy_path = "python" +files = ["."] +mypy_path = ["src"] explicit_package_bases = true # minimal strictness settings check_untyped_defs = true @@ -66,6 +67,9 @@ disallow_untyped_defs = true # begin: missing 3rd party stubs [[tool.mypy.overrides]] module = [ + ".conftest", + "redis", + "rediscluster.*", "confluent_kafka.*", ] ignore_missing_imports = true diff --git a/clients/python/tests/scheduler/test_runner.py b/clients/python/tests/scheduler/test_runner.py index 0e83bb3e..10480be5 100644 --- a/clients/python/tests/scheduler/test_runner.py +++ b/clients/python/tests/scheduler/test_runner.py @@ -53,7 +53,7 @@ def test_runstorage_double_set(run_storage: RunStorage) -> None: assert second is False, "writing a key that exists should fail" -def test_schedulerunner_add_invalid(task_app) -> None: +def test_schedulerunner_add_invalid(task_app: TaskbrokerApp) -> None: run_storage = Mock(spec=RunStorage) schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) diff --git a/clients/python/tests/test_task.py b/clients/python/tests/test_task.py index 3d767e42..1e0771eb 100644 --- a/clients/python/tests/test_task.py +++ b/clients/python/tests/test_task.py @@ -60,7 +60,7 @@ def test_define_task_at_most_once_with_retry(task_namespace: TaskNamespace) -> N def test_apply_async_expires(task_namespace: TaskNamespace) -> None: - def test_func(*args, **kwargs) -> None: + def test_func(*args: Any, **kwargs: Any) -> None: pass task = Task( @@ -81,7 +81,7 @@ def test_func(*args, **kwargs) -> None: def test_apply_async_countdown(task_namespace: TaskNamespace) -> None: - def test_func(*args, **kwargs) -> None: + def test_func(*args: Any, **kwargs: Any) -> None: pass task = Task( @@ -104,7 +104,7 @@ def test_func(*args, **kwargs) -> None: def test_delay_immediate_mode(task_namespace: TaskNamespace) -> None: calls = [] - def test_func(*args, **kwargs) -> None: + def test_func(*args: Any, **kwargs: Any) -> None: calls.append({"args": args, "kwargs": kwargs}) task = Task( diff --git a/clients/python/tests/worker/test_client.py b/clients/python/tests/worker/test_client.py index 24bb91d6..61d83a83 100644 --- a/clients/python/tests/worker/test_client.py +++ b/clients/python/tests/worker/test_client.py @@ -20,15 +20,15 @@ TaskActivation, ) +from taskbroker_client.constants import DEFAULT_WORKER_HEALTH_CHECK_SEC_PER_TOUCH from taskbroker_client.metrics import NoOpMetricsBackend +from taskbroker_client.types import ProcessingResult from taskbroker_client.worker.client import ( HealthCheckSettings, HostTemporarilyUnavailable, TaskbrokerClient, make_broker_hosts, ) -from taskbroker_client.types import ProcessingResult -from taskbroker_client.constants import DEFAULT_WORKER_HEALTH_CHECK_SEC_PER_TOUCH @dataclasses.dataclass @@ -44,8 +44,8 @@ def __init__( self, path: str, responses: list[Any], - request_serializer: Callable, - response_deserializer: Callable, + request_serializer: Callable[..., Any], + response_deserializer: Callable[..., Any], ): self.path = path self.request_serializer = request_serializer @@ -74,13 +74,13 @@ def with_call(self, *args: Any, **kwargs: Any) -> Any: class MockChannel: def __init__(self) -> None: - self._responses = defaultdict(list) + self._responses: dict[str, list[Any]] = defaultdict(list) def unary_unary( self, path: str, - request_serializer: Callable, - response_deserializer: Callable, + request_serializer: Callable[..., Any], + response_deserializer: Callable[..., Any], *args: Any, **kwargs: Any, ) -> MockServiceMethod: @@ -100,7 +100,7 @@ def add_response( class MockGrpcError(grpc.RpcError): """Grpc error are elusive and this mock simulates the interface in mypy stubs""" - def __init__(self, code: int, message: str) -> None: + def __init__(self, code: grpc.StatusCode, message: str) -> None: self._code = code self._message = message @@ -260,7 +260,9 @@ def test_get_task_with_interceptor() -> None: secret = '["a long secret value","notused"]' with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: mock_channel.return_value = channel - client = TaskbrokerClient(["localhost-0:50051"], metrics=NoOpMetricsBackend(), rpc_secret=secret) + client = TaskbrokerClient( + ["localhost-0:50051"], metrics=NoOpMetricsBackend(), rpc_secret=secret + ) result = client.get_task() assert result @@ -286,7 +288,9 @@ def test_get_task_with_namespace() -> None: ) with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: mock_channel.return_value = channel - client = TaskbrokerClient(hosts=make_broker_hosts("localhost:50051", num_brokers=1), metrics=NoOpMetricsBackend()) + client = TaskbrokerClient( + hosts=make_broker_hosts("localhost:50051", num_brokers=1), metrics=NoOpMetricsBackend() + ) result = client.get_task(namespace="testing") assert result @@ -371,7 +375,9 @@ def test_update_task_ok_with_next() -> None: ) with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: mock_channel.return_value = channel - client = TaskbrokerClient(make_broker_hosts("localhost:50051", num_brokers=1), metrics=NoOpMetricsBackend()) + client = TaskbrokerClient( + make_broker_hosts("localhost:50051", num_brokers=1), metrics=NoOpMetricsBackend() + ) assert set(client._host_to_stubs.keys()) == {"localhost-0:50051"} result = client.update_task( ProcessingResult("abc123", TASK_ACTIVATION_STATUS_RETRY, "localhost-0:50051", 0), @@ -400,7 +406,9 @@ def test_update_task_ok_with_next_namespace() -> None: ) with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: mock_channel.return_value = channel - client = TaskbrokerClient(make_broker_hosts("localhost:50051", num_brokers=1), metrics=NoOpMetricsBackend()) + client = TaskbrokerClient( + make_broker_hosts("localhost:50051", num_brokers=1), metrics=NoOpMetricsBackend() + ) result = client.update_task( ProcessingResult( task_id="id", @@ -422,7 +430,9 @@ def test_update_task_ok_no_next() -> None: ) with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: mock_channel.return_value = channel - client = TaskbrokerClient(make_broker_hosts("localhost:50051", num_brokers=1), metrics=NoOpMetricsBackend()) + client = TaskbrokerClient( + make_broker_hosts("localhost:50051", num_brokers=1), metrics=NoOpMetricsBackend() + ) result = client.update_task( ProcessingResult( task_id="abc123", @@ -818,7 +828,9 @@ def test_client_reset_errors_after_success() -> None: with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: mock_channel.return_value = channel - client = TaskbrokerClient(["localhost:50051"], metrics=NoOpMetricsBackend(), max_consecutive_unavailable_errors=3) + client = TaskbrokerClient( + ["localhost:50051"], metrics=NoOpMetricsBackend(), max_consecutive_unavailable_errors=3 + ) with pytest.raises(grpc.RpcError, match="host is unavailable"): client.get_task() @@ -863,7 +875,7 @@ def test_client_update_task_host_unavailable() -> None: current_time = 1000.0 - def mock_time(): + def mock_time() -> float: return current_time with ( diff --git a/clients/python/tests/worker/test_worker.py b/clients/python/tests/worker/test_worker.py index f9727bc9..8bfb35b2 100644 --- a/clients/python/tests/worker/test_worker.py +++ b/clients/python/tests/worker/test_worker.py @@ -2,6 +2,7 @@ import queue import time from multiprocessing import Event +from typing import Any from unittest import TestCase, mock import grpc @@ -223,7 +224,7 @@ def test_run_once_with_next_task(self) -> None: ) with mock.patch.object(taskworker, "client") as mock_client: - def update_task_response(*args, **kwargs): + def update_task_response(*args: Any, **kwargs: Any) -> InflightTaskActivation | None: if mock_client.update_task.call_count >= 1: return None return SIMPLE_TASK @@ -265,7 +266,7 @@ def test_run_once_with_update_failure(self) -> None: ) with mock.patch.object(taskworker, "client") as mock_client: - def update_task_response(*args, **kwargs): + def update_task_response(*args: Any, **kwargs: Any) -> None: if mock_client.update_task.call_count <= 2: # Use setattr() because internally grpc uses _InactiveRpcError # but it isn't exported. @@ -274,7 +275,7 @@ def update_task_response(*args, **kwargs): raise err return None - def get_task_response(*args, **kwargs): + def get_task_response(*args: Any, **kwargs: Any) -> InflightTaskActivation | None: # Only one task that fails to update if mock_client.get_task.call_count == 1: return SIMPLE_TASK @@ -311,7 +312,7 @@ def test_run_once_current_task_state(self) -> None: ) with mock.patch.object(taskworker, "client") as mock_client: - def update_task_response(*args, **kwargs): + def update_task_response(*args: Any, **kwargs: Any) -> None: return None mock_client.update_task.side_effect = update_task_response