diff --git a/Makefile b/Makefile index 1bdac199..7fd7f181 100644 --- a/Makefile +++ b/Makefile @@ -46,33 +46,33 @@ reset-kafka: setup ## Reset kafka .PHONY: reset-kafka test-rebalance: build reset-kafka ## Run the rebalance integration test - python -m pytest python/integration_tests/test_consumer_rebalancing.py -s - rm -r python/integration_tests/.tests_output/test_consumer_rebalancing + python -m pytest integration_tests/test_consumer_rebalancing.py -s + rm -r integration_tests/.tests_output/test_consumer_rebalancing .PHONY: test-rebalance test-worker-processing: build reset-kafka ## Run the worker processing integration test - python -m pytest python/integration_tests/test_task_worker_processing.py -s - rm -r python/integration_tests/.tests_output/test_task_worker_processing + python -m pytest integration_tests/test_task_worker_processing.py -s + rm -r integration_tests/.tests_output/test_task_worker_processing .PHONY: test-worker-processing test-upkeep-retry: build reset-kafka ## Run the upkeep retry integration test - python -m pytest python/integration_tests/test_upkeep_retry.py -s - rm -r python/integration_tests/.tests_output/test_upkeep_retry + python -m pytest integration_tests/test_upkeep_retry.py -s + rm -r integration_tests/.tests_output/test_upkeep_retry .PHONY: test-upkeep-retry test-upkeep-expiry: build reset-kafka ## Run the upkeep expiry integration test - python -m pytest python/integration_tests/test_upkeep_expiry.py -s - rm -r python/integration_tests/.tests_output/test_upkeep_expiry + python -m pytest integration_tests/test_upkeep_expiry.py -s + rm -r integration_tests/.tests_output/test_upkeep_expiry .PHONY: test-upkeep-expiry test-upkeep-delay: build reset-kafka ## Run the upkeep delay integration test - python -m pytest python/integration_tests/test_upkeep_delay.py -s - rm -r python/integration_tests/.tests_output/test_upkeep_delay + python -m pytest integration_tests/test_upkeep_delay.py -s + rm -r integration_tests/.tests_output/test_upkeep_delay .PHONY: test-upkeep-delay test-failed-tasks: build reset-kafka ## Run the failed tasks integration test - python -m pytest python/integration_tests/test_failed_tasks.py -s - rm -r python/integration_tests/.tests_output/test_failed_tasks + python -m pytest integration_tests/test_failed_tasks.py -s + rm -r integration_tests/.tests_output/test_failed_tasks .PHONY: test-failed-tasks integration-test: test-rebalance test-worker-processing test-upkeep-retry test-upkeep-expiry test-upkeep-delay test-failed-tasks ## Run all integration tests diff --git a/clients/python/.python-version b/clients/python/.python-version new file mode 100644 index 00000000..7eebfafa --- /dev/null +++ b/clients/python/.python-version @@ -0,0 +1 @@ +3.12.11 diff --git a/python/__init__.py b/clients/python/README.md similarity index 100% rename from python/__init__.py rename to clients/python/README.md diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml new file mode 100644 index 00000000..84f07d94 --- /dev/null +++ b/clients/python/pyproject.toml @@ -0,0 +1,86 @@ +[project] +name = "taskbroker-client" +version = "0.1.0" +description = "Taskbroker python client and worker runtime" +readme = "README.md" +dependencies = [ + "sentry-arroyo>=2.33.1", + "sentry-sdk[http2]>=2.43.0", + "sentry-protos>=0.2.0", + "confluent_kafka>=2.3.0", + "cronsim>=2.6", + "grpcio==1.66.1", + "orjson>=3.10.10", + "protobuf>=5.28.3", + "types-protobuf>=6.30.2.20250703", + "redis>=3.4.1", + "redis-py-cluster>=2.1.0", + "zstandard>=0.18.0", +] + +[dependency-groups] +dev = [ + "devservices>=1.2.1", + "sentry-devenv>=1.22.2", + "black==24.10.0", + "pre-commit>=4.2.0", + "pytest>=8.3.3", + "flake8>=7.3.0", + "isort>=5.13.2", + "mypy>=1.17.1", + "time-machine>=2.16.0", +] + +[build-system] +requires = ["uv_build>=0.8.2,<0.9.0"] +build-backend = "uv_build" + +[tool.uv] +environments = ["sys_platform == 'darwin' or sys_platform == 'linux'"] + +[[tool.uv.index]] +url = "https://pypi.devinfra.sentry.io/simple" +default = true + +[tool.pytest.ini_options] +pythonpath = ["python"] +testpaths = ["tests"] +python_files = ["test_*.py"] +python_functions = ["test_*"] + +[tool.mypy] +files = ["."] +mypy_path = ["src"] +explicit_package_bases = true +# minimal strictness settings +check_untyped_defs = true +no_implicit_reexport = true +warn_unreachable = true +warn_unused_configs = true +warn_unused_ignores = true +warn_redundant_casts = true +enable_error_code = ["ignore-without-code", "redundant-self"] +local_partial_types = true # compat with dmypy +disallow_any_generics = true +disallow_untyped_defs = true + +# begin: missing 3rd party stubs +[[tool.mypy.overrides]] +module = [ + ".conftest", + "redis", + "rediscluster.*", + "confluent_kafka.*", +] +ignore_missing_imports = true +# end: missing 3rd party stubs + +[tool.black] +# File filtering is taken care of in pre-commit. +line-length = 100 +target-version = ['py311'] + +[tool.isort] +profile = "black" +line_length = 100 +lines_between_sections = 1 diff --git a/python/integration_tests/__init__.py b/clients/python/src/examples/__init__.py similarity index 100% rename from python/integration_tests/__init__.py rename to clients/python/src/examples/__init__.py diff --git a/clients/python/src/examples/app.py b/clients/python/src/examples/app.py new file mode 100644 index 00000000..0828ca5a --- /dev/null +++ b/clients/python/src/examples/app.py @@ -0,0 +1,21 @@ +from arroyo.backends.kafka import KafkaProducer + +from examples.store import StubAtMostOnce +from taskbroker_client.app import TaskbrokerApp + + +def producer_factory(topic: str) -> KafkaProducer: + # TODO use env vars for kafka host/port + config = { + "bootstrap.servers": "127.0.0.1:9092", + "compression.type": "lz4", + "message.max.bytes": 50000000, # 50MB + } + return KafkaProducer(config) + + +app = TaskbrokerApp( + producer_factory=producer_factory, + at_most_once_store=StubAtMostOnce(), +) +app.set_modules(["examples.tasks"]) diff --git a/clients/python/src/examples/py.typed b/clients/python/src/examples/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/clients/python/src/examples/store.py b/clients/python/src/examples/store.py new file mode 100644 index 00000000..3e7fc996 --- /dev/null +++ b/clients/python/src/examples/store.py @@ -0,0 +1,12 @@ +from taskbroker_client.types import AtMostOnceStore + + +class StubAtMostOnce(AtMostOnceStore): + def __init__(self) -> None: + self._keys: dict[str, str] = {} + + def add(self, key: str, value: str, timeout: int) -> bool: + if key in self._keys: + return False + self._keys[key] = value + return True diff --git a/clients/python/src/examples/tasks.py b/clients/python/src/examples/tasks.py new file mode 100644 index 00000000..8e87c9e3 --- /dev/null +++ b/clients/python/src/examples/tasks.py @@ -0,0 +1,74 @@ +""" +Example taskbroker application with tasks + +Used in tests for the worker. +""" + +import logging +from time import sleep +from typing import Any + +from redis import StrictRedis + +from examples.app import app +from taskbroker_client.retry import LastAction, NoRetriesRemainingError, Retry, RetryTaskError +from taskbroker_client.retry import retry_task as retry_task_helper + +logger = logging.getLogger(__name__) + + +# Create a namespace and register tasks +exampletasks = app.taskregistry.create_namespace("examples") + + +@exampletasks.register(name="examples.simple_task") +def simple_task(*args: list[Any], **kwargs: dict[str, Any]) -> None: + sleep(0.1) + logger.debug("simple_task complete") + + +@exampletasks.register(name="examples.retry_task", retry=Retry(times=2)) +def retry_task() -> None: + raise RetryTaskError + + +@exampletasks.register(name="examples.fail_task") +def fail_task() -> None: + raise ValueError("nope") + + +@exampletasks.register(name="examples.at_most_once", at_most_once=True) +def at_most_once_task() -> None: + pass + + +@exampletasks.register( + name="examples.retry_state", retry=Retry(times=2, times_exceeded=LastAction.Deadletter) +) +def retry_state() -> None: + try: + retry_task_helper() + except NoRetriesRemainingError: + # TODO read host from env vars + redis = StrictRedis(host="localhost", port=6379, decode_responses=True) + redis.set("no-retries-remaining", 1) + + +@exampletasks.register( + name="examples.will_retry", + retry=Retry(times=3, on=(RuntimeError,), times_exceeded=LastAction.Discard), +) +def will_retry(failure: str) -> None: + if failure == "retry": + logger.debug("going to retry with explicit retry error") + raise RetryTaskError + if failure == "raise": + logger.debug("raising runtimeerror") + raise RuntimeError("oh no") + logger.debug("got %s", failure) + + +@exampletasks.register(name="examples.timed") +def timed_task(sleep_seconds: float | str, *args: list[Any], **kwargs: dict[str, Any]) -> None: + sleep(float(sleep_seconds)) + logger.debug("timed_task complete") diff --git a/clients/python/src/taskbroker_client/__init__.py b/clients/python/src/taskbroker_client/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/clients/python/src/taskbroker_client/app.py b/clients/python/src/taskbroker_client/app.py new file mode 100644 index 00000000..328d84bf --- /dev/null +++ b/clients/python/src/taskbroker_client/app.py @@ -0,0 +1,112 @@ +import importlib +from collections.abc import Iterable +from typing import Any + +from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation + +from taskbroker_client.imports import import_string +from taskbroker_client.metrics import MetricsBackend +from taskbroker_client.registry import TaskRegistry +from taskbroker_client.router import TaskRouter +from taskbroker_client.types import AtMostOnceStore, ProducerFactory + + +class TaskbrokerApp: + """ + Container for an application's task setup and configuration. + """ + + def __init__( + self, + producer_factory: ProducerFactory, + router_class: str | TaskRouter = "taskbroker_client.router.DefaultRouter", + metrics_class: str | MetricsBackend = "taskbroker_client.metrics.NoOpMetricsBackend", + at_most_once_store: AtMostOnceStore | None = None, + ) -> None: + self.metrics = self._build_metrics(metrics_class) + self._config = { + "rpc_secret": None, + "grpc_config": None, + "at_most_once_timeout": None, + } + self._modules: Iterable[str] = [] + self._taskregistry = TaskRegistry( + producer_factory=producer_factory, + router=self._build_router(router_class), + metrics=self.metrics, + ) + self.at_most_once_store(at_most_once_store) + + def _build_router(self, router_name: str | TaskRouter) -> TaskRouter: + if isinstance(router_name, str): + router_class = import_string(router_name) + router = router_class() + else: + router = router_name + assert hasattr(router, "route_namespace") + + return router + + def _build_metrics(self, backend_name: str | MetricsBackend) -> MetricsBackend: + if isinstance(backend_name, str): + metrics_class = import_string(backend_name) + return metrics_class() + return backend_name + + @property + def taskregistry(self) -> TaskRegistry: + """Get the TaskRegistry instance from this app""" + return self._taskregistry + + @property + def config(self) -> dict[str, Any]: + """Get the config data""" + return self._config + + def set_config(self, config: dict[str, Any]) -> None: + """Update configuration data""" + for key, value in config.items(): + if key in self._config: + self._config[key] = value + + def set_modules(self, modules: Iterable[str]) -> None: + """ + Set the list of modules containing tasks to be loaded by workers and schedulers. + """ + self._modules = modules + + def load_modules(self) -> None: + """Load all of the configured modules""" + for mod in self._modules: + __import__(mod) + + def at_most_once_store(self, backend: AtMostOnceStore | None) -> None: + """ + Set the backend store for `at_most_once` tasks. + The storage implementation should support atomic operations + to avoid races with at_most_once tasks. + """ + self._at_most_once_store = backend + + def should_attempt_at_most_once(self, activation: TaskActivation) -> bool: + if not self._at_most_once_store: + return True + key = get_at_most_once_key(activation.namespace, activation.taskname, activation.id) + return self._at_most_once_store.add( + key, "1", timeout=self._config["at_most_once_timeout"] or 60 + ) + + +def get_at_most_once_key(namespace: str, taskname: str, task_id: str) -> str: + # tw:amo -> taskworker:at_most_once + return f"tw:amo:{namespace}:{taskname}:{task_id}" + + +def import_app(app_module: str) -> TaskbrokerApp: + """ + Resolve an application path like `acme.worker.runtime:app` + into the `app` symbol defined in the module. + """ + module_name, name = app_module.split(":") + module = importlib.import_module(module_name) + return getattr(module, name) diff --git a/clients/python/src/taskbroker_client/constants.py b/clients/python/src/taskbroker_client/constants.py new file mode 100644 index 00000000..36074a01 --- /dev/null +++ b/clients/python/src/taskbroker_client/constants.py @@ -0,0 +1,68 @@ +from enum import Enum + +DEFAULT_PROCESSING_DEADLINE = 10 +""" +The fallback/default processing_deadline that tasks +will use if neither the TaskNamespace or Task define a deadline +""" + +DEFAULT_REBALANCE_AFTER = 32 +""" +The number of tasks a worker will process before it +selects a new broker instance. +""" + +DEFAULT_CONSECUTIVE_UNAVAILABLE_ERRORS = 3 +""" +The number of consecutive unavailable errors before the worker will +stop trying to connect to the broker and choose a new one. +""" + +DEFAULT_TEMPORARY_UNAVAILABLE_HOST_TIMEOUT = 20 +""" +The number of seconds to wait before a host is considered available again. +""" + +DEFAULT_WORKER_QUEUE_SIZE = 5 +""" +The size of multiprocessing.Queue used to communicate +with child processes. +""" + +DEFAULT_CHILD_TASK_COUNT = 10000 +""" +The number of tasks a worker child process will process +before being restarted. +""" + +MAX_BACKOFF_SECONDS_WHEN_HOST_UNAVAILABLE = 20 +""" +The maximum number of seconds to wait before retrying RPCs when the host is unavailable. +""" + + +MAX_PARAMETER_BYTES_BEFORE_COMPRESSION = 3000000 # 3MB +""" +The maximum number of bytes before a task parameter is compressed. +""" + +DEFAULT_WORKER_HEALTH_CHECK_SEC_PER_TOUCH = 1.0 +""" +The number of gRPC requests before touching the health check file +""" + + +ALWAYS_EAGER = False +""" +Whether or not tasks should be invoked eagerly (synchronously) +This can be mutated by application test harnesses to run tasks without Kafka. +""" + + +class CompressionType(Enum): + """ + The type of compression used for task parameters. + """ + + ZSTD = "zstd" + PLAINTEXT = "plaintext" diff --git a/clients/python/src/taskbroker_client/imports.py b/clients/python/src/taskbroker_client/imports.py new file mode 100644 index 00000000..590266d9 --- /dev/null +++ b/clients/python/src/taskbroker_client/imports.py @@ -0,0 +1,30 @@ +from typing import Any + + +class ModuleProxyCache(dict[str, object]): + def __missing__(self, key: str) -> object: + if "." not in key: + return __import__(key) + + module_name, class_name = key.rsplit(".", 1) + + module = __import__(module_name, {}, {}, [class_name]) + handler = getattr(module, class_name) + + # We cache a NoneType for missing imports to avoid repeated lookups + self[key] = handler + + return handler + + +_cache = ModuleProxyCache() + + +def import_string(path: str) -> Any: + """ + Path must be module.path.ClassName + + >>> cls = import_string('sentry.models.Group') + """ + result = _cache[path] + return result diff --git a/clients/python/src/taskbroker_client/metrics.py b/clients/python/src/taskbroker_client/metrics.py new file mode 100644 index 00000000..f48fdf27 --- /dev/null +++ b/clients/python/src/taskbroker_client/metrics.py @@ -0,0 +1,113 @@ +from __future__ import annotations + +from abc import abstractmethod +from collections.abc import Mapping +from contextlib import contextmanager +from typing import Generator, Protocol, runtime_checkable + +Tags = Mapping[str, str | int | float] + + +@runtime_checkable +class MetricsBackend(Protocol): + """ + An abstract class that defines the interface for metrics backends. + """ + + @abstractmethod + def incr( + self, + name: str, + value: int | float = 1, + tags: Tags | None = None, + sample_rate: float | None = None, + ) -> None: + """ + Increments a counter metric by a given value. + """ + raise NotImplementedError + + @abstractmethod + def distribution( + self, + name: str, + value: int | float, + tags: Tags | None = None, + unit: str | None = None, + sample_rate: float | None = None, + ) -> None: + """ + Records a distribution metric. + """ + raise NotImplementedError + + @contextmanager + def timer( + self, + key: str, + tags: Tags | None = None, + sample_rate: float | None = None, + stacklevel: int = 0, + ) -> Generator[None]: + """ + Records a distribution metric with a context manager. + """ + raise NotImplementedError + + @contextmanager + def track_memory_usage( + self, + key: str, + tags: Tags | None = None, + ) -> Generator[None]: + """ + Records a distribution metric with a context manager. + """ + raise NotImplementedError + + +class NoOpMetricsBackend(MetricsBackend): + """ + Default metrics backend that does not record anything. + """ + + def incr( + self, + name: str, + value: int | float = 1, + tags: Tags | None = None, + sample_rate: float | None = None, + ) -> None: + pass + + def distribution( + self, + name: str, + value: int | float, + tags: Tags | None = None, + unit: str | None = None, + sample_rate: float | None = None, + ) -> None: + pass + + @contextmanager + def timer( + self, + key: str, + tags: Tags | None = None, + sample_rate: float | None = None, + stacklevel: int = 0, + ) -> Generator[None]: + yield None + + @contextmanager + def track_memory_usage( + self, + key: str, + tags: Tags | None = None, + ) -> Generator[None]: + """ + Records a distrubtion metric that tracks the delta + of rss_usage between the context manager opening and closing. + """ + yield None diff --git a/clients/python/src/taskbroker_client/py.typed b/clients/python/src/taskbroker_client/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/clients/python/src/taskbroker_client/registry.py b/clients/python/src/taskbroker_client/registry.py new file mode 100644 index 00000000..eedfbd47 --- /dev/null +++ b/clients/python/src/taskbroker_client/registry.py @@ -0,0 +1,262 @@ +from __future__ import annotations + +import datetime +import logging +from collections.abc import Callable +from concurrent import futures +from typing import Any + +import sentry_sdk +from arroyo.backends.kafka import KafkaPayload, KafkaProducer +from arroyo.types import BrokerValue, Topic +from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation +from sentry_sdk.consts import OP, SPANDATA + +from taskbroker_client.constants import DEFAULT_PROCESSING_DEADLINE, CompressionType + +from taskbroker_client.metrics import MetricsBackend +from taskbroker_client.retry import Retry +from taskbroker_client.router import TaskRouter +from taskbroker_client.task import P, R, Task +from taskbroker_client.types import ProducerFactory + +logger = logging.getLogger(__name__) + +ProducerFuture = futures.Future[BrokerValue[KafkaPayload]] + + +class TaskNamespace: + """ + Task namespaces link topics, config and default retry mechanics together + All tasks within a namespace are stored in the same topic and run by shared + worker pool. + """ + + def __init__( + self, + name: str, + producer_factory: ProducerFactory, + router: TaskRouter, + metrics: MetricsBackend, + retry: Retry | None, + expires: int | datetime.timedelta | None = None, + processing_deadline_duration: int = DEFAULT_PROCESSING_DEADLINE, + app_feature: str | None = None, + ): + self.name = name + self.router = router + self.default_retry = retry + self.default_expires = expires # seconds + self.default_processing_deadline_duration = processing_deadline_duration # seconds + self.app_feature = app_feature or name + self._registered_tasks: dict[str, Task[Any, Any]] = {} + self._producers: dict[str, KafkaProducer] = {} + self._producer_factory = producer_factory + self.metrics = metrics + + def get(self, name: str) -> Task[Any, Any]: + """ + Get a registered task by name + + Raises KeyError when an unknown task is provided. + """ + if name not in self._registered_tasks: + raise KeyError(f"No task registered with the name {name}. Check your imports") + return self._registered_tasks[name] + + def contains(self, name: str) -> bool: + """ + Check if a task name has been registered + """ + return name in self._registered_tasks + + @property + def topic(self) -> str: + return self.router.route_namespace(self.name) + + def register( + self, + *, + name: str, + retry: Retry | None = None, + expires: int | datetime.timedelta | None = None, + processing_deadline_duration: int | datetime.timedelta | None = None, + at_most_once: bool = False, + wait_for_delivery: bool = False, + compression_type: CompressionType = CompressionType.PLAINTEXT, + ) -> Callable[[Callable[P, R]], Task[P, R]]: + """ + Register a task. + + Applied as a decorator to functions to enable them to be run + asynchronously via taskworkers. + + Parameters + ---------- + + name: str + The name of the task. This is serialized and must be stable across deploys. + retry: Retry | None + The retry policy for the task. If none and at_most_once is not enabled + the Task namespace default retry policy will be used. + expires: int | datetime.timedelta + The number of seconds a task activation is valid for. After this + duration the activation will be discarded and not executed. + at_most_once : bool + Enable at-most-once execution. Tasks with `at_most_once` cannot + define retry policies, and use a worker side idempotency key to + prevent processing deadline based retries. + wait_for_delivery: bool + If true, the task will wait for the delivery report to be received + before returning. + compression_type: CompressionType + The compression type to use to compress the task parameters. + """ + + def wrapped(func: Callable[P, R]) -> Task[P, R]: + task_retry = retry + if not at_most_once: + task_retry = retry or self.default_retry + task = Task( + name=name, + func=func, + namespace=self, + retry=task_retry, + expires=expires or self.default_expires, + processing_deadline_duration=( + processing_deadline_duration or self.default_processing_deadline_duration + ), + at_most_once=at_most_once, + wait_for_delivery=wait_for_delivery, + compression_type=compression_type, + ) + # TODO(taskworker) tasks should be registered into the registry + # so that we can ensure task names are globally unique + self._registered_tasks[name] = task + return task + + return wrapped + + def _handle_produce_future(self, future: ProducerFuture, tags: dict[str, str]) -> None: + if future.cancelled(): + self.metrics.incr("taskworker.registry.send_task.cancelled", tags=tags) + elif future.exception(1): + # this does not block since this callback only gets run when the future is finished and exception is set + self.metrics.incr("taskworker.registry.send_task.failed", tags=tags) + else: + self.metrics.incr("taskworker.registry.send_task.success", tags=tags) + + def send_task(self, activation: TaskActivation, wait_for_delivery: bool = False) -> None: + topic = self.router.route_namespace(self.name) + + with sentry_sdk.start_span( + op=OP.QUEUE_PUBLISH, + name=activation.taskname, + origin="taskworker", + ) as span: + span.set_data(SPANDATA.MESSAGING_DESTINATION_NAME, activation.namespace) + span.set_data(SPANDATA.MESSAGING_MESSAGE_ID, activation.id) + span.set_data(SPANDATA.MESSAGING_SYSTEM, "taskworker") + + produce_future = self._producer(topic).produce( + Topic(name=topic), + KafkaPayload(key=None, value=activation.SerializeToString(), headers=[]), + ) + + self.metrics.incr( + "taskworker.registry.send_task.scheduled", + tags={ + "namespace": activation.namespace, + "taskname": activation.taskname, + "topic": topic, + }, + ) + # We know this type is futures.Future, but cannot assert so, + # because it is also mock.Mock in tests. + produce_future.add_done_callback( # type:ignore[union-attr] + lambda future: self._handle_produce_future( + future=future, + tags={ + "namespace": activation.namespace, + "taskname": activation.taskname, + "topic": topic, + }, + ) + ) + if wait_for_delivery: + try: + produce_future.result(timeout=10) + except Exception: + logger.exception("Failed to wait for delivery") + + def _producer(self, topic: str) -> KafkaProducer: + if topic not in self._producers: + self._producers[topic] = self._producer_factory(topic) + return self._producers[topic] + + +# TODO(mark) All of TaskRegistry could be folded into TaskworkerApp later. +class TaskRegistry: + """ + Registry of all namespaces. + + The TaskRegistry is responsible for handling namespace -> topic resolution + during startup. + """ + + def __init__( + self, + producer_factory: ProducerFactory, + router: TaskRouter, + metrics: MetricsBackend, + ) -> None: + self._namespaces: dict[str, TaskNamespace] = {} + self._producer_factory = producer_factory + self._router = router + self._metrics = metrics + + def contains(self, name: str) -> bool: + return name in self._namespaces + + def get(self, name: str) -> TaskNamespace: + """Fetch a namespace by name.""" + if name not in self._namespaces: + raise KeyError(f"No task namespace with the name {name}") + return self._namespaces[name] + + def get_task(self, namespace: str, task: str) -> Task[Any, Any]: + """Fetch a task by namespace and name.""" + return self.get(namespace).get(task) + + def create_namespace( + self, + name: str, + *, + retry: Retry | None = None, + expires: int | datetime.timedelta | None = None, + processing_deadline_duration: int = DEFAULT_PROCESSING_DEADLINE, + app_feature: str | None = None, + ) -> TaskNamespace: + """ + Create a task namespace. + + Namespaces are mapped onto topics through the configured router allowing + infrastructure to be scaled based on a region's requirements. + + Namespaces can define default behavior for tasks defined within a namespace. + """ + if name in self._namespaces: + raise ValueError(f"Task namespace with name {name} already exists.") + namespace = TaskNamespace( + name=name, + router=self._router, + metrics=self._metrics, + producer_factory=self._producer_factory, + retry=retry, + expires=expires, + processing_deadline_duration=processing_deadline_duration, + app_feature=app_feature, + ) + self._namespaces[name] = namespace + + return namespace diff --git a/clients/python/src/taskbroker_client/retry.py b/clients/python/src/taskbroker_client/retry.py new file mode 100644 index 00000000..09951b17 --- /dev/null +++ b/clients/python/src/taskbroker_client/retry.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +import logging +from enum import Enum +from multiprocessing.context import TimeoutError + +from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( + ON_ATTEMPTS_EXCEEDED_DEADLETTER, + ON_ATTEMPTS_EXCEEDED_DISCARD, + OnAttemptsExceeded, + RetryState, +) + +from taskbroker_client.state import current_task + +logger = logging.getLogger(__name__) + + +class RetryTaskError(Exception): + """ + Exception that tasks can raise to indicate that the current task activation + should be retried. + """ + + +class NoRetriesRemainingError(RetryTaskError): + """ + Exception that is raised by retry helper methods to signal to tasks that + the current attempt is terminal and there won't be any further retries. + """ + + +class LastAction(Enum): + Deadletter = 1 + Discard = 2 + + def to_proto(self) -> OnAttemptsExceeded.ValueType: + if self == LastAction.Deadletter: + return ON_ATTEMPTS_EXCEEDED_DEADLETTER + if self == LastAction.Discard: + return ON_ATTEMPTS_EXCEEDED_DISCARD + raise ValueError(f"Unknown LastAction: {self}") + + +def retry_task(exc: Exception | None = None, raise_on_no_retries: bool = True) -> None: + """ + Helper for triggering retry errors. + If all retries have been consumed, this will raise a + sentry.taskworker.retry.NoRetriesRemaining + """ + current = current_task() + if current and not current.retries_remaining: + logger.info("taskworker.retry.no_retries_remaining", extra={ + "taskname": current.taskname + }) + if raise_on_no_retries: + raise NoRetriesRemainingError() + else: + return + raise RetryTaskError() + + +class Retry: + """Used with tasks to define the retry policy for a task""" + + def __init__( + self, + *, + times: int = 1, + on: tuple[type[BaseException], ...] | None = None, + ignore: tuple[type[BaseException], ...] | None = None, + times_exceeded: LastAction = LastAction.Discard, + delay: int | None = None, + ): + self._times = times + self._allowed_exception_types: tuple[type[BaseException], ...] = on or () + self._denied_exception_types: tuple[type[BaseException], ...] = ignore or () + self._times_exceeded = times_exceeded + self._delay = delay + + def max_attempts_reached(self, state: RetryState) -> bool: + # We subtract one, as attempts starts at 0, but `times` + # starts at 1. + return state.attempts >= (self._times - 1) + + def should_retry(self, state: RetryState, exc: Exception) -> bool: + # If there are no retries remaining we should not retry + if self.max_attempts_reached(state): + return False + + # Explicit RetryTaskError with attempts left. + if isinstance(exc, RetryTaskError): + return True + + # No retries for types on the ignore list + if isinstance(exc, self._denied_exception_types): + return False + + # In the retry allow list or processing deadline is exceeded + # When processing deadline is exceeded, the subprocess raises a TimeoutError + if isinstance(exc, (TimeoutError, self._allowed_exception_types)): + return True + + return False + + def initial_state(self) -> RetryState: + return RetryState( + attempts=0, + max_attempts=self._times, + on_attempts_exceeded=self._times_exceeded.to_proto(), + delay_on_retry=self._delay, + ) diff --git a/clients/python/src/taskbroker_client/router.py b/clients/python/src/taskbroker_client/router.py new file mode 100644 index 00000000..657cc12c --- /dev/null +++ b/clients/python/src/taskbroker_client/router.py @@ -0,0 +1,18 @@ +from typing import Protocol + + +class TaskRouter(Protocol): + """ + Resolves task namespaces to a topic names. + """ + + def route_namespace(self, name: str) -> str: ... + + +class DefaultRouter(TaskRouter): + """ + Stub router that resolves all namespaces to a default topic + """ + + def route_namespace(self, name: str) -> str: + return "taskbroker" diff --git a/clients/python/src/taskbroker_client/scheduler/__init__.py b/clients/python/src/taskbroker_client/scheduler/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/clients/python/src/taskbroker_client/scheduler/config.py b/clients/python/src/taskbroker_client/scheduler/config.py new file mode 100644 index 00000000..d09096b5 --- /dev/null +++ b/clients/python/src/taskbroker_client/scheduler/config.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +import dataclasses +from collections.abc import Mapping +from datetime import timedelta +from typing import TypedDict + + +@dataclasses.dataclass +class crontab: + """ + crontab schedule value object + + Used in configuration to define a task schedule. + + :see sentry.taskworker.scheduler.schedules.CrontabSchedule for more details. + """ + + minute: str = "*" + hour: str = "*" + day_of_week: str = "*" + day_of_month: str = "*" + month_of_year: str = "*" + + def __str__(self) -> str: + return ( + f"{self.minute} {self.hour} {self.day_of_month} {self.month_of_year} {self.day_of_week}" + ) + + +class ScheduleConfig(TypedDict): + """The schedule definition for an individual task.""" + + task: str + schedule: timedelta | crontab + + +ScheduleConfigMap = Mapping[str, ScheduleConfig] +"""A collection of schedule configuration, usually defined in application configuration""" diff --git a/clients/python/src/taskbroker_client/scheduler/runner.py b/clients/python/src/taskbroker_client/scheduler/runner.py new file mode 100644 index 00000000..eee4abd2 --- /dev/null +++ b/clients/python/src/taskbroker_client/scheduler/runner.py @@ -0,0 +1,297 @@ +from __future__ import annotations + +import heapq +import logging +from collections.abc import Mapping +from datetime import UTC, datetime, timedelta +from typing import TYPE_CHECKING, Any + +from redis.client import StrictRedis +from rediscluster import RedisCluster +from sentry_sdk import capture_exception +from sentry_sdk.crons import MonitorStatus, capture_checkin + +from taskbroker_client.app import TaskbrokerApp +from taskbroker_client.metrics import MetricsBackend +from taskbroker_client.scheduler.config import ScheduleConfig, crontab +from taskbroker_client.scheduler.schedules import CrontabSchedule, Schedule, TimedeltaSchedule +from taskbroker_client.task import Task + +logger = logging.getLogger("taskworker.scheduler") + +if TYPE_CHECKING: + from sentry_sdk._types import MonitorConfig + + +class RunStorage: + """ + Storage interface for tracking the last run time of tasks. + This is split out from `ScheduleRunner` to allow us to change storage + in the future, or adapt taskworkers for other applications should we need to. + """ + + def __init__( + self, metrics: MetricsBackend, redis: RedisCluster[str] | StrictRedis[str] + ) -> None: + self._redis = redis + self._metrics = metrics + + def _make_key(self, taskname: str) -> str: + return f"tw:scheduler:{taskname}" + + def set(self, taskname: str, next_runtime: datetime) -> bool: + """ + Record a spawn time for a task. + The next_runtime parameter indicates when the record should expire, + and a task can be spawned again. + + Returns False when the key is set and a task should not be spawned. + """ + now = datetime.now(tz=UTC) + # next_runtime & now could be the same second, and redis gets sad if ex=0 + duration = max(int((next_runtime - now).total_seconds()), 1) + + result = self._redis.set(self._make_key(taskname), now.isoformat(), ex=duration, nx=True) + return bool(result) + + def read(self, taskname: str) -> datetime | None: + """ + Retrieve the last run time of a task + Returns None if last run time has expired or is unknown. + """ + result = self._redis.get(self._make_key(taskname)) + if result: + return datetime.fromisoformat(result) + + self._metrics.incr( + "taskworker.scheduler.run_storage.read.miss", tags={"taskname": taskname} + ) + return None + + def read_many(self, tasknames: list[str]) -> Mapping[str, datetime | None]: + """ + Retreive last run times in bulk + """ + values = self._redis.mget([self._make_key(taskname) for taskname in tasknames]) + run_times = { + taskname: datetime.fromisoformat(value) if value else None + for taskname, value in zip(tasknames, values) + } + return run_times + + def delete(self, taskname: str) -> None: + """remove a task key - mostly for testing.""" + self._redis.delete(self._make_key(taskname)) + + +class ScheduleEntry: + """An individual task that can be scheduled to be run.""" + + def __init__(self, *, key: str, task: Task[Any, Any], schedule: timedelta | crontab) -> None: + self._key = key + self._task = task + scheduler: Schedule + if isinstance(schedule, crontab): + scheduler = CrontabSchedule(task.fullname, schedule) + else: + scheduler = TimedeltaSchedule(schedule) + self._schedule = scheduler + self._last_run: datetime | None = None + + def __lt__(self, other: ScheduleEntry) -> bool: + # Secondary sorting for heapq when remaining time is the same + return self.fullname < other.fullname + + def __repr__(self) -> str: + last_run = self._last_run.isoformat() if self._last_run else None + remaining_seconds = self.remaining_seconds() + + return f"" + + @property + def fullname(self) -> str: + return self._task.fullname + + @property + def namespace(self) -> str: + return self._task.namespace.name + + @property + def taskname(self) -> str: + return self._task.name + + def set_last_run(self, last_run: datetime | None) -> None: + self._last_run = last_run + + def is_due(self) -> bool: + return self._schedule.is_due(self._last_run) + + def remaining_seconds(self) -> int: + return self._schedule.remaining_seconds(self._last_run) + + def runtime_after(self, start: datetime) -> datetime: + return self._schedule.runtime_after(start) + + def delay_task(self) -> None: + monitor_config = self.monitor_config() + headers: dict[str, Any] = {} + if monitor_config: + check_in_id = capture_checkin( + monitor_slug=self._key, + monitor_config=monitor_config, + status=MonitorStatus.IN_PROGRESS, + ) + headers = { + "sentry-monitor-check-in-id": check_in_id, + "sentry-monitor-slug": self._key, + } + + # We don't need every task linked back to the scheduler trace + headers["sentry-propagate-traces"] = False + + self._task.apply_async(headers=headers) + + def monitor_config(self) -> MonitorConfig | None: + checkin_config: MonitorConfig = { + "schedule": {}, + "timezone": "UTC", + } + if isinstance(self._schedule, CrontabSchedule): + checkin_config["schedule"]["type"] = "crontab" + checkin_config["schedule"]["value"] = self._schedule.monitor_value() + elif isinstance(self._schedule, TimedeltaSchedule): + (interval_value, interval_units) = self._schedule.monitor_interval() + # Monitors does not support intervals less than 1 minute. + if interval_units == "second": + return None + + checkin_config["schedule"]["type"] = "interval" + checkin_config["schedule"]["value"] = interval_value + checkin_config["schedule"]["unit"] = interval_units + + return checkin_config + + +class ScheduleRunner: + """ + A task scheduler that a command run process can use to spawn tasks + based on their schedules. + + Contains a collection of ScheduleEntry objects which are composed + using `ScheduleRunner.add()`. Once the scheduler is built, `tick()` + is used in a while loop to spawn tasks and sleep. + """ + + def __init__(self, app: TaskbrokerApp, run_storage: RunStorage) -> None: + self._entries: list[ScheduleEntry] = [] + self._app = app + self._run_storage = run_storage + self._heap: list[tuple[int, ScheduleEntry]] = [] + + def add(self, key: str, task_config: ScheduleConfig) -> None: + """Add a scheduled task to the runner.""" + try: + (namespace, taskname) = task_config["task"].split(":") + except ValueError: + raise ValueError("Invalid task name. Must be in the format namespace:taskname") + + task = self._app.taskregistry.get_task(namespace, taskname) + entry = ScheduleEntry(key=key, task=task, schedule=task_config["schedule"]) + self._entries.append(entry) + self._heap = [] + + def log_startup(self) -> None: + task_names = [entry.fullname for entry in self._entries] + logger.info("taskworker.scheduler.startup", extra={"tasks": task_names}) + + def tick(self) -> float: + """ + Check if any tasks are due to run at current_time, and spawn them. + + Returns the number of seconds to sleep until the next task is due. + """ + self._update_heap() + + if not self._heap: + logger.warning("taskworker.scheduler.no_heap") + return 60 + + while True: + # Peek at the top, and if it is due, pop, spawn and update last run time + _, entry = self._heap[0] + if entry.is_due(): + heapq.heappop(self._heap) + try: + self._try_spawn(entry) + except Exception as e: + # Trap errors from spawning/update state so that the heap stays consistent. + capture_exception(e) + heapq.heappush(self._heap, (entry.remaining_seconds(), entry)) + continue + else: + # The top of the heap isn't ready, break for sleep + break + + return self._heap[0][0] + + def _try_spawn(self, entry: ScheduleEntry) -> None: + now = datetime.now(tz=UTC) + next_runtime = entry.runtime_after(now) + if self._run_storage.set(entry.fullname, next_runtime): + entry.delay_task() + entry.set_last_run(now) + + logger.debug("taskworker.scheduler.delay_task", extra={"fullname": entry.fullname}) + self._app.metrics.incr( + "taskworker.scheduler.delay_task", + tags={ + "taskname": entry.taskname, + "namespace": entry.namespace, + }, + sample_rate=1.0, + ) + else: + # We were not able to set a key, load last run from storage. + run_state = self._run_storage.read(entry.fullname) + entry.set_last_run(run_state) + + logger.info( + "taskworker.scheduler.sync_with_storage", + extra={ + "taskname": entry.taskname, + "namespace": entry.namespace, + "last_runtime": run_state.isoformat() if run_state else None, + }, + ) + self._app.metrics.incr( + "taskworker.scheduler.sync_with_storage", + tags={"taskname": entry.taskname, "namespace": entry.namespace}, + ) + + def _update_heap(self) -> None: + """update the heap to reflect current remaining time""" + if not self._heap: + self._load_last_run() + + heap_items = [(item.remaining_seconds(), item) for item in self._entries] + heapq.heapify(heap_items) + self._heap = heap_items + + def _load_last_run(self) -> None: + """ + load last_run state from storage + + We synchronize each time the schedule set is modified and + then incrementally as tasks spawn attempts are made. + """ + last_run_times = self._run_storage.read_many([item.fullname for item in self._entries]) + for item in self._entries: + last_run = last_run_times.get(item.fullname, None) + item.set_last_run(last_run) + logger.info( + "taskworker.scheduler.load_last_run", + extra={ + "entry_count": len(self._entries), + "loaded_count": len(last_run_times), + }, + ) diff --git a/clients/python/src/taskbroker_client/scheduler/schedules.py b/clients/python/src/taskbroker_client/scheduler/schedules.py new file mode 100644 index 00000000..09a2715e --- /dev/null +++ b/clients/python/src/taskbroker_client/scheduler/schedules.py @@ -0,0 +1,194 @@ +from __future__ import annotations + +import abc +import logging +from datetime import datetime, timedelta, UTC +from typing import TYPE_CHECKING + +from cronsim import CronSim, CronSimError +from taskbroker_client.scheduler.config import crontab + +if TYPE_CHECKING: + from sentry_sdk._types import MonitorConfigScheduleUnit + +logger = logging.getLogger("taskworker.scheduler") + + +class Schedule(metaclass=abc.ABCMeta): + """Interface for scheduling tasks to run at specific times.""" + + @abc.abstractmethod + def is_due(self, last_run: datetime | None = None) -> bool: + """ + Check if the schedule is due to run again based on last_run. + """ + + @abc.abstractmethod + def remaining_seconds(self, last_run: datetime | None = None) -> int: + """ + Get the remaining seconds until the schedule should run again. + """ + + @abc.abstractmethod + def runtime_after(self, start: datetime) -> datetime: + """ + Get the next scheduled time after `start` + """ + + +class TimedeltaSchedule(Schedule): + """ + Task schedules defined as `datetime.timedelta` intervals + + If a timedelta interval loses it's last_run state, it will assume + that at least one interval has been missed, and it will become due immediately. + + After the first spawn, the schedule will align to to the interval's duration. + """ + + def __init__(self, delta: timedelta) -> None: + self._delta = delta + if delta.microseconds: + raise ValueError("microseconds are not supported") + if delta.total_seconds() < 0: + raise ValueError("interval must be at least one second") + + def monitor_interval(self) -> tuple[int, MonitorConfigScheduleUnit]: + time_units: tuple[tuple[MonitorConfigScheduleUnit, float], ...] = ( + ("day", 60 * 60 * 24.0), + ("hour", 60 * 60.0), + ("minute", 60.0), + ) + + seconds = self._delta.total_seconds() + for unit, divider in time_units: + if seconds >= divider: + interval = int(seconds / divider) + return (interval, unit) + + return (int(seconds), "second") + + def is_due(self, last_run: datetime | None = None) -> bool: + """Check if the schedule is due to run again based on last_run.""" + if last_run is None: + return True + remaining = self.remaining_seconds(last_run) + return remaining <= 0 + + def remaining_seconds(self, last_run: datetime | None = None) -> int: + """The number of seconds remaining until the next task should spawn""" + if last_run is None: + return 0 + # floor to timestamp as microseconds are not relevant + now = int(datetime.now(tz=UTC).timestamp()) + last_run_ts = int(last_run.timestamp()) + + seconds_remaining = self._delta.total_seconds() - (now - last_run_ts) + return max(int(seconds_remaining), 0) + + def runtime_after(self, start: datetime) -> datetime: + """Get the next time a task should run after start""" + return start + self._delta + + +class CrontabSchedule(Schedule): + """ + Task schedules defined as crontab expressions. + + crontab expressions naturally align to clock intervals. For example + an interval of `crontab(minute="*/2")` will spawn on the even numbered minutes. + + If a crontab schedule loses its last_run state, it will assume that + one or more intervals have been missed, and it will align to the next + interval window. Missed intervals will not be recovered. + + For tasks with very long intervals, you should consider the impact of a deploy + or scheduler restart causing a missed window. Consider a more frequent interval + to help spread load out and reduce the impacts of missed intervals. + """ + + def __init__(self, name: str, crontab: crontab) -> None: + self._crontab = crontab + self._name = name + try: + self._cronsim = CronSim(str(crontab), datetime.now(tz=UTC)) + except CronSimError as e: + raise ValueError(f"crontab expression {self._crontab} is invalid") from e + + def monitor_value(self) -> str: + """Get the crontab expression as a string""" + return str(self._crontab) + + def is_due(self, last_run: datetime | None = None) -> bool: + """Check if the schedule is due to run again based on last_run.""" + if last_run is None: + last_run = datetime.now(tz=UTC) - timedelta(minutes=1) + remaining = self.remaining_seconds(last_run) + return remaining <= 0 + + def remaining_seconds(self, last_run: datetime | None = None) -> int: + """ + Get the number of seconds until this schedule is due again + + Use the current time to find the next schedule time + """ + if last_run is None: + last_run = datetime.now(tz=UTC) - timedelta(minutes=1) + + # This could result in missed beats, or increased load on redis. + last_run = last_run.replace(second=0, microsecond=0) + now = datetime.now(tz=UTC).replace(second=0, microsecond=0) + + # A future last_run means we should wait until the + # next scheduled time, and then we can try again. + # we could be competing with another scheduler, or + # missing beats. + if last_run > now: + logger.warning( + "taskworker.scheduler.future_value", + extra={ + "task": self._name, + "last_run": last_run, + "now": now, + }, + ) + next_run = self._advance(last_run + timedelta(minutes=1)) + return int(next_run.timestamp() - now.timestamp()) + + # If last run is in the past, see if the next runtime + # is in the future. + if last_run < now: + next_run = self._advance(last_run + timedelta(minutes=1)) + # Our next runtime is in the future, or now + if next_run >= now: + return int(next_run.timestamp() - now.timestamp()) + + # still in the past, we missed an interval :( + missed = next_run + next_run = self._advance(now) + logger.warning( + "taskworker.scheduler.missed_interval", + extra={ + "task": self._name, + "last_run": last_run.isoformat(), + "missed": missed.isoformat(), + "now": now.isoformat(), + "next_run": next_run.isoformat(), + }, + ) + return int(next_run.timestamp() - now.timestamp()) + + # last_run == now, we are on the beat, find the next interval + next_run = self._advance(now + timedelta(minutes=1)) + + return int(next_run.timestamp() - now.timestamp()) + + def _advance(self, dt: datetime) -> datetime: + self._cronsim.dt = dt + self._cronsim.advance() + return self._cronsim.dt + + def runtime_after(self, start: datetime) -> datetime: + """Get the next time a task should be spawned after `start`""" + start = start.replace(second=0, microsecond=0) + timedelta(minutes=1) + return self._advance(start) diff --git a/clients/python/src/taskbroker_client/state.py b/clients/python/src/taskbroker_client/state.py new file mode 100644 index 00000000..9217262c --- /dev/null +++ b/clients/python/src/taskbroker_client/state.py @@ -0,0 +1,42 @@ +import dataclasses +import threading + +from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation + +_current_state = threading.local() + + +@dataclasses.dataclass +class CurrentTaskState: + id: str + namespace: str + taskname: str + attempt: int + processing_deadline_duration: int + retries_remaining: bool + + +def current_task() -> CurrentTaskState | None: + if not hasattr(_current_state, "state"): + _current_state.state = None + + return _current_state.state + + +def set_current_task(activation: TaskActivation) -> None: + retry_state = activation.retry_state + state = CurrentTaskState( + id=activation.id, + namespace=activation.namespace, + taskname=activation.taskname, + attempt=activation.retry_state.attempts, + # We subtract one, as attempts starts at 0, but `max_attempts` + # starts at 1. + retries_remaining=(retry_state.attempts < (retry_state.max_attempts - 1)), + processing_deadline_duration=activation.processing_deadline_duration, + ) + _current_state.state = state + + +def clear_current_task() -> None: + _current_state.state = None diff --git a/clients/python/src/taskbroker_client/task.py b/clients/python/src/taskbroker_client/task.py new file mode 100644 index 00000000..44edb385 --- /dev/null +++ b/clients/python/src/taskbroker_client/task.py @@ -0,0 +1,259 @@ +from __future__ import annotations + +import base64 +import datetime +import time +from collections.abc import Callable, Collection, Mapping, MutableMapping +from functools import update_wrapper +from typing import TYPE_CHECKING, Any, Generic, ParamSpec, TypeVar +from uuid import uuid4 + +import orjson +import sentry_sdk +import zstandard as zstd + +from google.protobuf.timestamp_pb2 import Timestamp + +from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( + ON_ATTEMPTS_EXCEEDED_DISCARD, + RetryState, + TaskActivation, +) + +from taskbroker_client.constants import ( + DEFAULT_PROCESSING_DEADLINE, + MAX_PARAMETER_BYTES_BEFORE_COMPRESSION, + CompressionType, +) +from taskbroker_client.retry import Retry + +if TYPE_CHECKING: + from taskbroker_client.registry import TaskNamespace + + +ALWAYS_EAGER = False +""" +Whether or not tasks should be invoked eagerly (synchronously) +This can be mutated by application test harnesses to run tasks without Kafka. +""" + +P = ParamSpec("P") +R = TypeVar("R") + + +class Task(Generic[P, R]): + def __init__( + self, + name: str, + func: Callable[P, R], + namespace: TaskNamespace, + retry: Retry | None = None, + expires: int | datetime.timedelta | None = None, + processing_deadline_duration: int | datetime.timedelta | None = None, + at_most_once: bool = False, + wait_for_delivery: bool = False, + compression_type: CompressionType = CompressionType.PLAINTEXT, + ): + self.name = name + self._func = func + self._namespace = namespace + self._expires = expires + self._processing_deadline_duration = ( + processing_deadline_duration or DEFAULT_PROCESSING_DEADLINE + ) + if at_most_once and retry: + raise AssertionError( + """ + You cannot enable at_most_once and have retries defined. + Having retries enabled means that a task supports being executed + multiple times and thus cannot be idempotent. + """ + ) + self._retry = retry + self.at_most_once = at_most_once + self.wait_for_delivery = wait_for_delivery + self.compression_type = compression_type + update_wrapper(self, func) + + @property + def fullname(self) -> str: + return f"{self._namespace.name}:{self.name}" + + @property + def namespace(self) -> TaskNamespace: + return self._namespace + + @property + def retry(self) -> Retry | None: + return self._retry + + def __call__(self, *args: P.args, **kwargs: P.kwargs) -> R: + """ + Call the task function immediately. + """ + return self._func(*args, **kwargs) + + def delay(self, *args: P.args, **kwargs: P.kwargs) -> None: + """ + Schedule a task to run later with a set of arguments. + + The provided parameters will be JSON encoded and stored within + a `TaskActivation` protobuf that is appended to kafka + """ + self.apply_async(args=args, kwargs=kwargs) + + def apply_async( + self, + args: Any | None = None, + kwargs: Any | None = None, + headers: MutableMapping[str, Any] | None = None, + expires: int | datetime.timedelta | None = None, + countdown: int | datetime.timedelta | None = None, + **options: Any, + ) -> None: + """ + Schedule a task to run later with a set of arguments. + + The provided parameters will be JSON encoded and stored within + a `TaskActivation` protobuf that is appended to kafka. + """ + if args is None: + args = [] + if kwargs is None: + kwargs = {} + + self._signal_send(task=self, args=args, kwargs=kwargs) + + # Generate an activation even if we're in immediate mode to + # catch serialization errors in tests. + activation = self.create_activation( + args=args, kwargs=kwargs, headers=headers, expires=expires, countdown=countdown + ) + if ALWAYS_EAGER: + self._func(*args, **kwargs) + else: + self._namespace.send_task( + activation, + wait_for_delivery=self.wait_for_delivery, + ) + + def _signal_send(self, task: Task[Any, Any], args: Any, kwargs: Any) -> None: + """ + This method is a stub that test harnesses can monkey patch to capture tasks that + are being produced. + """ + pass + + def create_activation( + self, + args: Collection[Any], + kwargs: Mapping[Any, Any], + headers: MutableMapping[str, Any] | None = None, + expires: int | datetime.timedelta | None = None, + countdown: int | datetime.timedelta | None = None, + ) -> TaskActivation: + received_at = Timestamp() + received_at.FromDatetime(datetime.datetime.now(tz=datetime.UTC)) + + processing_deadline = self._processing_deadline_duration + if isinstance(processing_deadline, datetime.timedelta): + processing_deadline = int(processing_deadline.total_seconds()) + + if expires is None: + expires = self._expires + if isinstance(expires, datetime.timedelta): + expires = int(expires.total_seconds()) + + if isinstance(countdown, datetime.timedelta): + countdown = int(countdown.total_seconds()) + + if not headers: + headers = {} + + if headers.get("sentry-propagate-traces", True): + headers = { + "sentry-trace": sentry_sdk.get_traceparent() or "", + "baggage": sentry_sdk.get_baggage() or "", + **headers, + } + + # Monitor config is patched in by the sentry_sdk + # however, taskworkers do not support the nested object, + # nor do they use it when creating checkins. + if "sentry-monitor-config" in headers: + del headers["sentry-monitor-config"] + + for key, value in headers.items(): + if value is None or isinstance(value, (str, bytes, int, bool, float)): + headers[key] = str(value) + else: + raise ValueError( + "Only scalar header values are supported. " + f"The `{key}` header value is of type {type(value)}" + ) + + parameters_json = orjson.dumps({"args": args, "kwargs": kwargs}) + if ( + len(parameters_json) > MAX_PARAMETER_BYTES_BEFORE_COMPRESSION + or self.compression_type == CompressionType.ZSTD + ): + # Worker uses this header to determine if the parameters are decompressed + headers["compression-type"] = CompressionType.ZSTD.value + start_time = time.perf_counter() + parameters_str = base64.b64encode(zstd.compress(parameters_json)).decode("utf8") + end_time = time.perf_counter() + + self.namespace.metrics.distribution( + "taskworker.producer.compressed_parameters_size", + len(parameters_str), + tags={ + "namespace": self._namespace.name, + "taskname": self.name, + "topic": self._namespace.topic, + }, + ) + self.namespace.metrics.distribution( + "taskworker.producer.compression_time", + end_time - start_time, + tags={ + "namespace": self._namespace.name, + "taskname": self.name, + "topic": self._namespace.topic + }, + ) + else: + parameters_str = parameters_json.decode("utf8") + + return TaskActivation( + id=uuid4().hex, + namespace=self._namespace.name, + taskname=self.name, + headers=headers, + parameters=parameters_str, + retry_state=self._create_retry_state(), + received_at=received_at, + processing_deadline_duration=processing_deadline, + expires=expires, + delay=countdown, + ) + + def _create_retry_state(self) -> RetryState: + retry = self.retry or self._namespace.default_retry or None + if not retry or self.at_most_once: + # If the task and namespace have no retry policy, + # or can only be attempted once make a single + # attempt and then discard the task. + return RetryState( + attempts=0, + max_attempts=1, + on_attempts_exceeded=ON_ATTEMPTS_EXCEEDED_DISCARD, + at_most_once=self.at_most_once, + ) + return retry.initial_state() + + def should_retry(self, state: RetryState, exc: Exception) -> bool: + # No retry policy means no retries. + retry = self.retry + if not retry: + return False + return retry.should_retry(state, exc) diff --git a/clients/python/src/taskbroker_client/types.py b/clients/python/src/taskbroker_client/types.py new file mode 100644 index 00000000..081ee650 --- /dev/null +++ b/clients/python/src/taskbroker_client/types.py @@ -0,0 +1,39 @@ +import dataclasses +from typing import Callable, Protocol + +from arroyo.backends.kafka import KafkaProducer +from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation, TaskActivationStatus + + +class AtMostOnceStore(Protocol): + """ + Interface for the at_most_once store used for idempotent task execution. + """ + def add(self, key: str, value: str, timeout: int) -> bool: ... + + +ProducerFactory = Callable[[str], KafkaProducer] +""" +A factory interface for resolving topics into a KafkaProducer +that can produce on the provided topic. +""" + + +@dataclasses.dataclass +class InflightTaskActivation: + """ + A TaskActivation with Metadata used within workers. + """ + activation: TaskActivation + host: str + receive_timestamp: float + + +@dataclasses.dataclass +class ProcessingResult: + """Result structure from child processess to parent""" + + task_id: str + status: TaskActivationStatus.ValueType + host: str + receive_timestamp: float diff --git a/clients/python/src/taskbroker_client/worker/__init__.py b/clients/python/src/taskbroker_client/worker/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/clients/python/src/taskbroker_client/worker/client.py b/clients/python/src/taskbroker_client/worker/client.py new file mode 100644 index 00000000..ae0a1497 --- /dev/null +++ b/clients/python/src/taskbroker_client/worker/client.py @@ -0,0 +1,346 @@ +import hashlib +import hmac +import logging +import random +import threading +import time +from collections.abc import Callable +from dataclasses import dataclass +from pathlib import Path +from typing import TYPE_CHECKING, Any + +import grpc +import orjson +from google.protobuf.message import Message +from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( + FetchNextTask, + GetTaskRequest, + SetTaskStatusRequest, +) +from sentry_protos.taskbroker.v1.taskbroker_pb2_grpc import ConsumerServiceStub + +from taskbroker_client.constants import ( + DEFAULT_CONSECUTIVE_UNAVAILABLE_ERRORS, + DEFAULT_REBALANCE_AFTER, + DEFAULT_TEMPORARY_UNAVAILABLE_HOST_TIMEOUT, +) +from taskbroker_client.metrics import MetricsBackend +from taskbroker_client.types import InflightTaskActivation, ProcessingResult + +logger = logging.getLogger("sentry.taskworker.client") + +MAX_ACTIVATION_SIZE = 1024 * 1024 * 10 +"""Max payload size we will process.""" + + +def make_broker_hosts( + host_prefix: str, + num_brokers: int | None, + host_list: str | None = None, +) -> list[str]: + """ + Handle RPC host CLI options and create a list of broker host:ports + """ + if host_list: + stripped = map(lambda x: x.strip(), host_list.split(",")) + return list(filter(lambda x: len(x), stripped)) + if not num_brokers: + return [host_prefix] + domain, port = host_prefix.split(":") + return [f"{domain}-{i}:{port}" for i in range(0, num_brokers)] + + +class ClientCallDetails(grpc.ClientCallDetails): + """ + Subclass of grpc.ClientCallDetails that allows metadata to be updated + """ + + def __init__( + self, + method: str, + timeout: float | None, + metadata: tuple[tuple[str, str | bytes], ...] | None, + credentials: grpc.CallCredentials | None, + ): + self.timeout = timeout + self.method = method + self.metadata = metadata + self.credentials = credentials + + +# Type alias based on grpc-stubs +ContinuationType = Callable[[ClientCallDetails, Message], Any] + + +if TYPE_CHECKING: + InterceptorBase = grpc.UnaryUnaryClientInterceptor[Message, Message] + CallFuture = grpc.CallFuture[Message] +else: + InterceptorBase = grpc.UnaryUnaryClientInterceptor + CallFuture = Any + + +class RequestSignatureInterceptor(InterceptorBase): + def __init__(self, shared_secret: list[str]): + self._secret = shared_secret[0].encode("utf-8") + + def intercept_unary_unary( + self, + continuation: ContinuationType, + client_call_details: grpc.ClientCallDetails, + request: Message, + ) -> CallFuture: + request_body = request.SerializeToString() + method = client_call_details.method.encode("utf-8") + + signing_payload = method + b":" + request_body + signature = hmac.new(self._secret, signing_payload, hashlib.sha256).hexdigest() + + metadata = list(client_call_details.metadata) if client_call_details.metadata else [] + metadata.append(("sentry-signature", signature)) + + call_details_with_meta = ClientCallDetails( + client_call_details.method, + client_call_details.timeout, + tuple(metadata), + client_call_details.credentials, + ) + return continuation(call_details_with_meta, request) + + +class HostTemporarilyUnavailable(Exception): + """Raised when a host is temporarily unavailable and should be retried later.""" + + pass + + +@dataclass +class HealthCheckSettings: + file_path: Path + touch_interval_sec: float + + +class TaskbrokerClient: + """ + Taskworker RPC client wrapper + + When num_brokers is provided, the client will connect to all brokers + and choose a new broker to pair with randomly every max_tasks_before_rebalance tasks. + """ + + def __init__( + self, + hosts: list[str], + metrics: MetricsBackend, + max_tasks_before_rebalance: int = DEFAULT_REBALANCE_AFTER, + max_consecutive_unavailable_errors: int = DEFAULT_CONSECUTIVE_UNAVAILABLE_ERRORS, + temporary_unavailable_host_timeout: int = DEFAULT_TEMPORARY_UNAVAILABLE_HOST_TIMEOUT, + health_check_settings: HealthCheckSettings | None = None, + rpc_secret: str | None = None, + grpc_config: str | None = None, + ) -> None: + assert len(hosts) > 0, "You must provide at least one RPC host to connect to" + self._hosts = hosts + self._rpc_secret = rpc_secret + self._metrics = metrics + + self._grpc_options: list[tuple[str, Any]] = [ + ("grpc.max_receive_message_length", MAX_ACTIVATION_SIZE) + ] + if grpc_config: + self._grpc_options.append(("grpc.service_config", grpc_config)) + + logger.info( + "taskworker.client.start", extra={"hosts": hosts, "options": self._grpc_options} + ) + + self._cur_host = random.choice(self._hosts) + self._host_to_stubs: dict[str, ConsumerServiceStub] = { + self._cur_host: self._connect_to_host(self._cur_host) + } + + self._max_tasks_before_rebalance = max_tasks_before_rebalance + self._num_tasks_before_rebalance = max_tasks_before_rebalance + + self._max_consecutive_unavailable_errors = max_consecutive_unavailable_errors + self._num_consecutive_unavailable_errors = 0 + + self._temporary_unavailable_hosts: dict[str, float] = {} + self._temporary_unavailable_host_timeout = temporary_unavailable_host_timeout + + self._health_check_settings = health_check_settings + self._timestamp_since_touch_lock = threading.Lock() + self._timestamp_since_touch = 0.0 + + def _emit_health_check(self) -> None: + if self._health_check_settings is None: + return + + with self._timestamp_since_touch_lock: + cur_time = time.time() + if ( + cur_time - self._timestamp_since_touch + < self._health_check_settings.touch_interval_sec + ): + return + + self._health_check_settings.file_path.touch() + self._metrics.incr( + "taskworker.client.health_check.touched", + ) + self._timestamp_since_touch = cur_time + + def _connect_to_host(self, host: str) -> ConsumerServiceStub: + logger.info("taskworker.client.connect", extra={"host": host}) + channel = grpc.insecure_channel(host, options=self._grpc_options) + if self._rpc_secret: + secrets = orjson.loads(self._rpc_secret) + channel = grpc.intercept_channel(channel, RequestSignatureInterceptor(secrets)) + return ConsumerServiceStub(channel) + + def _check_consecutive_unavailable_errors(self) -> None: + if self._num_consecutive_unavailable_errors >= self._max_consecutive_unavailable_errors: + self._temporary_unavailable_hosts[self._cur_host] = ( + time.time() + self._temporary_unavailable_host_timeout + ) + + def _clear_temporary_unavailable_hosts(self) -> None: + hosts_to_remove = [] + for host, timeout in self._temporary_unavailable_hosts.items(): + if time.time() >= timeout: + hosts_to_remove.append(host) + + for host in hosts_to_remove: + self._temporary_unavailable_hosts.pop(host) + + def _get_cur_stub(self) -> tuple[str, ConsumerServiceStub]: + self._clear_temporary_unavailable_hosts() + available_hosts = [h for h in self._hosts if h not in self._temporary_unavailable_hosts] + if not available_hosts: + # If all hosts are temporarily unavailable, wait for the shortest timeout + current_time = time.time() + shortest_timeout = min(self._temporary_unavailable_hosts.values()) + logger.info( + "taskworker.client.no_available_hosts", + extra={"sleeping for": shortest_timeout - current_time}, + ) + time.sleep(shortest_timeout - current_time) + return self._get_cur_stub() # try again + + if self._cur_host in self._temporary_unavailable_hosts: + self._cur_host = random.choice(available_hosts) + self._num_tasks_before_rebalance = self._max_tasks_before_rebalance + self._num_consecutive_unavailable_errors = 0 + self._metrics.incr( + "taskworker.client.loadbalancer.rebalance", + tags={"reason": "unavailable_count_reached"}, + ) + elif self._num_tasks_before_rebalance == 0: + self._cur_host = random.choice(available_hosts) + self._num_tasks_before_rebalance = self._max_tasks_before_rebalance + self._num_consecutive_unavailable_errors = 0 + self._metrics.incr( + "taskworker.client.loadbalancer.rebalance", + tags={"reason": "max_tasks_reached"}, + ) + + if self._cur_host not in self._host_to_stubs: + self._host_to_stubs[self._cur_host] = self._connect_to_host(self._cur_host) + + self._num_tasks_before_rebalance -= 1 + return self._cur_host, self._host_to_stubs[self._cur_host] + + def get_task(self, namespace: str | None = None) -> InflightTaskActivation | None: + """ + Fetch a pending task. + + If a namespace is provided, only tasks for that namespace will be fetched. + This will return None if there are no tasks to fetch. + """ + self._emit_health_check() + + request = GetTaskRequest(namespace=namespace) + try: + host, stub = self._get_cur_stub() + with self._metrics.timer("taskworker.get_task.rpc", tags={"host": host}): + response = stub.GetTask(request) + except grpc.RpcError as err: + self._metrics.incr( + "taskworker.client.rpc_error", tags={"method": "GetTask", "status": err.code().name} + ) + if err.code() == grpc.StatusCode.NOT_FOUND: + # Because our current broker doesn't have any tasks, try rebalancing. + self._num_tasks_before_rebalance = 0 + return None + if err.code() == grpc.StatusCode.UNAVAILABLE: + self._num_consecutive_unavailable_errors += 1 + self._check_consecutive_unavailable_errors() + raise + self._num_consecutive_unavailable_errors = 0 + self._temporary_unavailable_hosts.pop(host, None) + if response.HasField("task"): + self._metrics.incr( + "taskworker.client.get_task", + tags={"namespace": response.task.namespace}, + ) + return InflightTaskActivation( + activation=response.task, host=host, receive_timestamp=time.monotonic() + ) + return None + + def update_task( + self, + processing_result: ProcessingResult, + fetch_next_task: FetchNextTask | None = None, + ) -> InflightTaskActivation | None: + """ + Update the status for a given task activation. + + The return value is the next task that should be executed. + """ + self._emit_health_check() + + self._metrics.incr("taskworker.client.fetch_next", tags={"next": fetch_next_task is not None}) + self._clear_temporary_unavailable_hosts() + request = SetTaskStatusRequest( + id=processing_result.task_id, + status=processing_result.status, + fetch_next_task=fetch_next_task, + ) + + try: + if processing_result.host in self._temporary_unavailable_hosts: + self._metrics.incr( + "taskworker.client.skipping_set_task_due_to_unavailable_host", + tags={"broker_host": processing_result.host}, + ) + raise HostTemporarilyUnavailable( + f"Host: {processing_result.host} is temporarily unavailable" + ) + + with self._metrics.timer("taskworker.update_task.rpc", tags={"host": processing_result.host}): + response = self._host_to_stubs[processing_result.host].SetTaskStatus(request) + except grpc.RpcError as err: + self._metrics.incr( + "taskworker.client.rpc_error", + tags={"method": "SetTaskStatus", "status": err.code().name}, + ) + if err.code() == grpc.StatusCode.NOT_FOUND: + # The current broker is empty, switch. + self._num_tasks_before_rebalance = 0 + + return None + if err.code() == grpc.StatusCode.UNAVAILABLE: + self._num_consecutive_unavailable_errors += 1 + self._check_consecutive_unavailable_errors() + raise + + self._num_consecutive_unavailable_errors = 0 + self._temporary_unavailable_hosts.pop(processing_result.host, None) + if response.HasField("task"): + return InflightTaskActivation( + activation=response.task, + host=processing_result.host, + receive_timestamp=time.monotonic(), + ) + return None diff --git a/clients/python/src/taskbroker_client/worker/worker.py b/clients/python/src/taskbroker_client/worker/worker.py new file mode 100644 index 00000000..8bf950e5 --- /dev/null +++ b/clients/python/src/taskbroker_client/worker/worker.py @@ -0,0 +1,403 @@ +from __future__ import annotations + +import logging +import multiprocessing +import queue +import signal +import threading +import time +from concurrent.futures import ThreadPoolExecutor +from multiprocessing.context import ForkContext, SpawnContext +from multiprocessing.process import BaseProcess +from pathlib import Path +from typing import Any + +import grpc +from sentry_protos.taskbroker.v1.taskbroker_pb2 import FetchNextTask + +from taskbroker_client.app import import_app +from taskbroker_client.worker.client import ( + HealthCheckSettings, + HostTemporarilyUnavailable, + TaskbrokerClient, +) +from taskbroker_client.types import InflightTaskActivation, ProcessingResult +from taskbroker_client.constants import ( + DEFAULT_REBALANCE_AFTER, + DEFAULT_WORKER_HEALTH_CHECK_SEC_PER_TOUCH, + DEFAULT_WORKER_QUEUE_SIZE, + MAX_BACKOFF_SECONDS_WHEN_HOST_UNAVAILABLE, +) +from taskbroker_client.worker.workerchild import child_process + +logger = logging.getLogger("sentry.taskworker.worker") + + +class TaskWorker: + """ + A TaskWorker fetches tasks from a taskworker RPC host and handles executing task activations. + + Tasks are executed in a forked process so that processing timeouts can be enforced. + As tasks are completed status changes will be sent back to the RPC host and new tasks + will be fetched. + + Taskworkers can be run with `sentry run taskworker` + """ + + mp_context: ForkContext | SpawnContext + + def __init__( + self, + app_module: str, + broker_hosts: list[str], + max_child_task_count: int | None = None, + namespace: str | None = None, + concurrency: int = 1, + child_tasks_queue_maxsize: int = DEFAULT_WORKER_QUEUE_SIZE, + result_queue_maxsize: int = DEFAULT_WORKER_QUEUE_SIZE, + rebalance_after: int = DEFAULT_REBALANCE_AFTER, + processing_pool_name: str | None = None, + process_type: str = "spawn", + health_check_file_path: str | None = None, + health_check_sec_per_touch: float = DEFAULT_WORKER_HEALTH_CHECK_SEC_PER_TOUCH, + **kwargs: dict[str, Any], + ) -> None: + self.options = kwargs + self._app_module = app_module + self._max_child_task_count = max_child_task_count + self._namespace = namespace + self._concurrency = concurrency + app = import_app(app_module) + + self.client = TaskbrokerClient( + hosts=broker_hosts, + metrics=app.metrics, + max_tasks_before_rebalance=rebalance_after, + health_check_settings=( + None + if health_check_file_path is None + else HealthCheckSettings(Path(health_check_file_path), health_check_sec_per_touch) + ), + rpc_secret=app.config["rpc_secret"], + grpc_config=app.config["grpc_config"], + ) + self._metrics = app.metrics + + if process_type == "fork": + self.mp_context = multiprocessing.get_context("fork") + elif process_type == "spawn": + self.mp_context = multiprocessing.get_context("spawn") + else: + raise ValueError(f"Invalid process type: {process_type}") + self._process_type = process_type + + self._child_tasks: multiprocessing.Queue[InflightTaskActivation] = self.mp_context.Queue( + maxsize=child_tasks_queue_maxsize + ) + self._processed_tasks: multiprocessing.Queue[ProcessingResult] = self.mp_context.Queue( + maxsize=result_queue_maxsize + ) + self._children: list[BaseProcess] = [] + self._shutdown_event = self.mp_context.Event() + self._result_thread: threading.Thread | None = None + self._spawn_children_thread: threading.Thread | None = None + + self._gettask_backoff_seconds = 0 + self._setstatus_backoff_seconds = 0 + + self._processing_pool_name: str = processing_pool_name or "unknown" + + def start(self) -> int: + """ + Run the worker main loop + + Once started a Worker will loop until it is killed, or + completes its max_task_count when it shuts down. + """ + self.start_result_thread() + self.start_spawn_children_thread() + + # Convert signals into KeyboardInterrupt. + # Running shutdown() within the signal handler can lead to deadlocks + def signal_handler(*args: Any) -> None: + raise KeyboardInterrupt() + + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + try: + while True: + self.run_once() + except KeyboardInterrupt: + self.shutdown() + raise + + def run_once(self) -> None: + """Access point for tests to run a single worker loop""" + self._add_task() + + def shutdown(self) -> None: + """ + Shutdown cleanly + Activate the shutdown event and drain results before terminating children. + """ + logger.info("taskworker.worker.shutdown.start") + self._shutdown_event.set() + + logger.info("taskworker.worker.shutdown.spawn_children") + if self._spawn_children_thread: + self._spawn_children_thread.join() + + logger.info("taskworker.worker.shutdown.children") + for child in self._children: + child.terminate() + for child in self._children: + child.join() + + logger.info("taskworker.worker.shutdown.result") + if self._result_thread: + # Use a timeout as sometimes this thread can deadlock on the Event. + self._result_thread.join(timeout=5) + + # Drain any remaining results synchronously + while True: + try: + result = self._processed_tasks.get_nowait() + self._send_result(result, fetch=False) + except queue.Empty: + break + + logger.info("taskworker.worker.shutdown.complete") + + def _add_task(self) -> bool: + """ + Add a task to child tasks queue. Returns False if no new task was fetched. + """ + if self._child_tasks.full(): + # I want to see how this differs between pools that operate well, + # and those that are not as effective. I suspect that with a consistent + # load of slowish tasks (like 5-15 seconds) that this will happen + # infrequently, resulting in the child tasks queue being full + # causing processing deadline expiration. + # Whereas in pools that have consistent short tasks, this happens + # more frequently, allowing workers to run more smoothly. + self._metrics.incr( + "taskworker.worker.add_tasks.child_tasks_full", + tags={"processing_pool": self._processing_pool_name}, + ) + # If we weren't able to add a task, backoff for a bit + time.sleep(0.1) + return False + + inflight = self.fetch_task() + if inflight: + try: + start_time = time.monotonic() + self._child_tasks.put(inflight) + self._metrics.distribution( + "taskworker.worker.child_task.put.duration", + time.monotonic() - start_time, + tags={"processing_pool": self._processing_pool_name}, + ) + except queue.Full: + self._metrics.incr( + "taskworker.worker.child_tasks.put.full", + tags={"processing_pool": self._processing_pool_name}, + ) + logger.warning( + "taskworker.add_task.child_task_queue_full", + extra={ + "task_id": inflight.activation.id, + "processing_pool": self._processing_pool_name, + }, + ) + return True + else: + return False + + def start_result_thread(self) -> None: + """ + Start a thread that delivers results and fetches new tasks. + We need to ship results in a thread because the RPC calls block for 20-50ms, + and many tasks execute more quickly than that. + + Without additional threads, we end up publishing results too slowly + and tasks accumulate in the `processed_tasks` queues and can cross + their processing deadline. + """ + + def result_thread() -> None: + logger.debug("taskworker.worker.result_thread.started") + iopool = ThreadPoolExecutor(max_workers=self._concurrency) + with iopool as executor: + while not self._shutdown_event.is_set(): + # TODO We should remove fetch_next = False from sentry as it couldn't be rolled + # out everywhere. + # fetch_next = self._processing_pool_name not in options.get( + # "taskworker.fetch_next.disabled_pools" + # ) + try: + result = self._processed_tasks.get(timeout=1.0) + executor.submit(self._send_result, result, fetch=True) + except queue.Empty: + self._metrics.incr( + "taskworker.worker.result_thread.queue_empty", + tags={"processing_pool": self._processing_pool_name}, + ) + continue + + self._result_thread = threading.Thread( + name="send-result", target=result_thread, daemon=True + ) + self._result_thread.start() + + def _send_result(self, result: ProcessingResult, fetch: bool = True) -> bool: + """ + Send a result to the broker and conditionally fetch an additional task + + Run in a thread to avoid blocking the process, and during shutdown/ + See `start_result_thread` + """ + self._metrics.distribution( + "taskworker.worker.complete_duration", + time.monotonic() - result.receive_timestamp, + tags={"processing_pool": self._processing_pool_name}, + ) + + if fetch: + fetch_next = None + if not self._child_tasks.full(): + fetch_next = FetchNextTask(namespace=self._namespace) + + next = self._send_update_task(result, fetch_next) + if next: + try: + start_time = time.monotonic() + self._child_tasks.put(next) + self._metrics.distribution( + "taskworker.worker.child_task.put.duration", + time.monotonic() - start_time, + tags={"processing_pool": self._processing_pool_name}, + ) + except queue.Full: + logger.warning( + "taskworker.send_result.child_task_queue_full", + extra={ + "task_id": next.activation.id, + "processing_pool": self._processing_pool_name, + }, + ) + return True + + self._send_update_task(result, fetch_next=None) + return True + + def _send_update_task( + self, result: ProcessingResult, fetch_next: FetchNextTask | None + ) -> InflightTaskActivation | None: + """ + Do the RPC call to this worker's taskbroker, and handle errors + """ + logger.debug( + "taskworker.workers._send_result", + extra={ + "task_id": result.task_id, + "next": fetch_next is not None, + "processing_pool": self._processing_pool_name, + }, + ) + # Use the shutdown_event as a sleep mechanism + self._shutdown_event.wait(self._setstatus_backoff_seconds) + + try: + next_task = self.client.update_task(result, fetch_next) + self._setstatus_backoff_seconds = 0 + return next_task + except grpc.RpcError as e: + self._setstatus_backoff_seconds = min(self._setstatus_backoff_seconds + 1, 10) + if e.code() == grpc.StatusCode.UNAVAILABLE: + self._processed_tasks.put(result) + logger.warning( + "taskworker.send_update_task.failed", + extra={"task_id": result.task_id, "error": e}, + ) + return None + except HostTemporarilyUnavailable as e: + self._setstatus_backoff_seconds = min( + self._setstatus_backoff_seconds + 4, MAX_BACKOFF_SECONDS_WHEN_HOST_UNAVAILABLE + ) + logger.info( + "taskworker.send_update_task.temporarily_unavailable", + extra={"task_id": result.task_id, "error": str(e)}, + ) + self._processed_tasks.put(result) + return None + + def start_spawn_children_thread(self) -> None: + def spawn_children_thread() -> None: + logger.debug("taskworker.worker.spawn_children_thread.started") + while not self._shutdown_event.is_set(): + self._children = [child for child in self._children if child.is_alive()] + if len(self._children) >= self._concurrency: + time.sleep(0.1) + continue + for i in range(self._concurrency - len(self._children)): + process = self.mp_context.Process( + name=f"taskworker-child-{i}", + target=child_process, + args=( + self._app_module, + self._child_tasks, + self._processed_tasks, + self._shutdown_event, + self._max_child_task_count, + self._processing_pool_name, + self._process_type, + ), + ) + process.start() + self._children.append(process) + logger.info( + "taskworker.spawn_child", + extra={"pid": process.pid, "processing_pool": self._processing_pool_name}, + ) + self._metrics.incr( + "taskworker.worker.spawn_child", + tags={"processing_pool": self._processing_pool_name}, + ) + + self._spawn_children_thread = threading.Thread( + name="spawn-children", target=spawn_children_thread, daemon=True + ) + self._spawn_children_thread.start() + + def fetch_task(self) -> InflightTaskActivation | None: + # Use the shutdown_event as a sleep mechanism + self._shutdown_event.wait(self._gettask_backoff_seconds) + try: + activation = self.client.get_task(self._namespace) + except grpc.RpcError as e: + logger.info( + "taskworker.fetch_task.failed", + extra={"error": e, "processing_pool": self._processing_pool_name}, + ) + + self._gettask_backoff_seconds = min( + self._gettask_backoff_seconds + 4, MAX_BACKOFF_SECONDS_WHEN_HOST_UNAVAILABLE + ) + return None + + if not activation: + self._metrics.incr( + "taskworker.worker.fetch_task.not_found", + tags={"processing_pool": self._processing_pool_name}, + ) + logger.debug( + "taskworker.fetch_task.not_found", + extra={"processing_pool": self._processing_pool_name}, + ) + self._gettask_backoff_seconds = min(self._gettask_backoff_seconds + 1, 5) + return None + + self._gettask_backoff_seconds = 0 + return activation diff --git a/clients/python/src/taskbroker_client/worker/workerchild.py b/clients/python/src/taskbroker_client/worker/workerchild.py new file mode 100644 index 00000000..53338949 --- /dev/null +++ b/clients/python/src/taskbroker_client/worker/workerchild.py @@ -0,0 +1,454 @@ +from __future__ import annotations + +import base64 +import contextlib +import logging +import queue +import signal +import time +from collections.abc import Callable, Generator +from multiprocessing.synchronize import Event +from types import FrameType +from typing import Any + +# XXX: Don't import any modules that will import django here, do those within child_process +import orjson +import sentry_sdk +import zstandard as zstd + +from taskbroker_client.app import import_app +from taskbroker_client.types import InflightTaskActivation, ProcessingResult +from taskbroker_client.constants import CompressionType +from taskbroker_client.retry import NoRetriesRemainingError +from taskbroker_client.state import clear_current_task, current_task, set_current_task +from taskbroker_client.task import Task +from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( + TASK_ACTIVATION_STATUS_COMPLETE, + TASK_ACTIVATION_STATUS_FAILURE, + TASK_ACTIVATION_STATUS_RETRY, + TaskActivation, + TaskActivationStatus, +) +from sentry_sdk.consts import OP, SPANDATA, SPANSTATUS +from sentry_sdk.crons import MonitorStatus, capture_checkin + +logger = logging.getLogger("sentry.taskworker.worker") + + +class ProcessingDeadlineExceeded(BaseException): + pass + + +@contextlib.contextmanager +def timeout_alarm( + seconds: int, handler: Callable[[int, FrameType | None], None] +) -> Generator[None]: + """ + Context manager to handle SIGALRM handlers + + To prevent tasks from consuming a worker forever, we set a timeout + alarm that will interrupt tasks that run longer than + their processing_deadline. + """ + original = signal.signal(signal.SIGALRM, handler) + try: + signal.alarm(seconds) + yield + finally: + signal.alarm(0) + signal.signal(signal.SIGALRM, original) + + +def load_parameters(data: str, headers: dict[str, str]) -> dict[str, Any]: + compression_type = headers.get("compression-type", None) + if not compression_type or compression_type == CompressionType.PLAINTEXT.value: + return orjson.loads(data) + elif compression_type == CompressionType.ZSTD.value: + return orjson.loads(zstd.decompress(base64.b64decode(data))) + else: + logger.error( + "Unsupported compression type: %s. Continuing with plaintext.", compression_type + ) + return orjson.loads(data) + + +def status_name(status: TaskActivationStatus.ValueType) -> str: + """Convert a TaskActivationStatus to a human readable name""" + if status == TASK_ACTIVATION_STATUS_COMPLETE: + return "complete" + if status == TASK_ACTIVATION_STATUS_FAILURE: + return "failure" + if status == TASK_ACTIVATION_STATUS_RETRY: + return "retry" + return f"unknown-{status}" + + +def child_process( + app_module: str, + child_tasks: queue.Queue[InflightTaskActivation], + processed_tasks: queue.Queue[ProcessingResult], + shutdown_event: Event, + max_task_count: int | None, + processing_pool_name: str, + process_type: str, +) -> None: + """ + The entrypoint for spawned worker children. + + Any import that could pull in django needs to be put inside this functiona + and not the module root. If modules that include django are imported at + the module level the wrong django settings will be used. + """ + app = import_app(app_module) + app.load_modules() + taskregistry = app.taskregistry + metrics = app.metrics + + def _get_known_task(activation: TaskActivation) -> Task[Any, Any] | None: + if not taskregistry.contains(activation.namespace): + logger.error( + "taskworker.invalid_namespace", + extra={"namespace": activation.namespace, "taskname": activation.taskname}, + ) + return None + + namespace = taskregistry.get(activation.namespace) + if not namespace.contains(activation.taskname): + logger.error( + "taskworker.invalid_taskname", + extra={"namespace": activation.namespace, "taskname": activation.taskname}, + ) + return None + return namespace.get(activation.taskname) + + def run_worker( + child_tasks: queue.Queue[InflightTaskActivation], + processed_tasks: queue.Queue[ProcessingResult], + shutdown_event: Event, + max_task_count: int | None, + processing_pool_name: str, + process_type: str, + ) -> None: + processed_task_count = 0 + + def handle_alarm(signum: int, frame: FrameType | None) -> None: + """ + Handle SIGALRM + + If we hit an alarm in a child, we need to push a result + and terminate the child. + """ + deadline = -1 + current = current_task() + taskname = "unknown" + if current: + taskname = current.taskname + deadline = current.processing_deadline_duration + raise ProcessingDeadlineExceeded( + f"execution deadline of {deadline} seconds exceeded by {taskname}" + ) + + while not shutdown_event.is_set(): + if max_task_count and processed_task_count >= max_task_count: + metrics.incr( + "taskworker.worker.max_task_count_reached", + tags={"count": processed_task_count, "processing_pool": processing_pool_name}, + ) + logger.info( + "taskworker.max_task_count_reached", extra={"count": processed_task_count} + ) + break + + try: + inflight = child_tasks.get(timeout=1.0) + except queue.Empty: + metrics.incr( + "taskworker.worker.child_task_queue_empty", + tags={"processing_pool": processing_pool_name}, + ) + continue + + task_func = _get_known_task(inflight.activation) + if not task_func: + metrics.incr( + "taskworker.worker.unknown_task", + tags={ + "namespace": inflight.activation.namespace, + "taskname": inflight.activation.taskname, + "processing_pool": processing_pool_name, + }, + sample_rate=1.0, + ) + with sentry_sdk.isolation_scope() as scope: + scope.set_tag("taskname", inflight.activation.taskname) + scope.set_tag("namespace", inflight.activation.namespace) + scope.set_tag("processing_pool", processing_pool_name) + scope.set_extra("activation", str(inflight.activation)) + scope.capture_message( + f"Unregistered task {inflight.activation.taskname} was not executed" + ) + + processed_tasks.put( + ProcessingResult( + task_id=inflight.activation.id, + status=TASK_ACTIVATION_STATUS_FAILURE, + host=inflight.host, + receive_timestamp=inflight.receive_timestamp, + ) + ) + continue + + if task_func.at_most_once: + if app.should_attempt_at_most_once(inflight.activation): + metrics.incr( + "taskworker.task.at_most_once.executed", + tags={ + "namespace": inflight.activation.namespace, + "taskname": inflight.activation.taskname, + "processing_pool": processing_pool_name, + }, + ) + else: + metrics.incr( + "taskworker.worker.at_most_once.skipped", + tags={ + "namespace": inflight.activation.namespace, + "taskname": inflight.activation.taskname, + "processing_pool": processing_pool_name, + }, + ) + continue + + set_current_task(inflight.activation) + + next_state = TASK_ACTIVATION_STATUS_FAILURE + # Use time.time() so we can measure against activation.received_at + execution_start_time = time.time() + try: + with timeout_alarm(inflight.activation.processing_deadline_duration, handle_alarm): + _execute_activation(task_func, inflight.activation) + next_state = TASK_ACTIVATION_STATUS_COMPLETE + except ProcessingDeadlineExceeded as err: + with sentry_sdk.isolation_scope() as scope: + scope.fingerprint = [ + "taskworker.processing_deadline_exceeded", + inflight.activation.namespace, + inflight.activation.taskname, + ] + scope.set_transaction_name(inflight.activation.taskname) + sentry_sdk.capture_exception(err) + metrics.incr( + "taskworker.worker.processing_deadline_exceeded", + tags={ + "processing_pool": processing_pool_name, + "namespace": inflight.activation.namespace, + "taskname": inflight.activation.taskname, + }, + ) + next_state = TASK_ACTIVATION_STATUS_FAILURE + except Exception as err: + retry = task_func.retry + captured_error = False + if retry: + if retry.should_retry(inflight.activation.retry_state, err): + logger.info( + "taskworker.task.retry", + extra={ + "namespace": inflight.activation.namespace, + "taskname": inflight.activation.taskname, + "processing_pool": processing_pool_name, + "error": str(err), + }, + ) + next_state = TASK_ACTIVATION_STATUS_RETRY + elif retry.max_attempts_reached(inflight.activation.retry_state): + with sentry_sdk.isolation_scope() as scope: + retry_error = NoRetriesRemainingError( + f"{inflight.activation.taskname} has consumed all of its retries" + ) + retry_error.__cause__ = err + scope.fingerprint = [ + "taskworker.no_retries_remaining", + inflight.activation.namespace, + inflight.activation.taskname, + ] + scope.set_transaction_name(inflight.activation.taskname) + sentry_sdk.capture_exception(retry_error) + captured_error = True + + if not captured_error and next_state != TASK_ACTIVATION_STATUS_RETRY: + sentry_sdk.capture_exception(err) + + clear_current_task() + processed_task_count += 1 + + # Get completion time before pushing to queue, so we can measure queue append time + execution_complete_time = time.time() + with metrics.timer( + "taskworker.worker.processed_tasks.put.duration", + tags={ + "processing_pool": processing_pool_name, + }, + ): + processed_tasks.put( + ProcessingResult( + task_id=inflight.activation.id, + status=next_state, + host=inflight.host, + receive_timestamp=inflight.receive_timestamp, + ) + ) + + record_task_execution( + inflight.activation, + next_state, + execution_start_time, + execution_complete_time, + processing_pool_name, + inflight.host, + ) + + def _execute_activation(task_func: Task[Any, Any], activation: TaskActivation) -> None: + """Invoke a task function with the activation parameters.""" + headers = {k: v for k, v in activation.headers.items()} + parameters = load_parameters(activation.parameters, headers) + + args = parameters.get("args", []) + kwargs = parameters.get("kwargs", {}) + + transaction = sentry_sdk.continue_trace( + environ_or_headers=headers, + op="queue.task.taskworker", + name=activation.taskname, + origin="taskworker", + ) + sampling_context = { + "taskworker": { + "task": activation.taskname, + } + } + with ( + metrics.track_memory_usage( + "taskworker.worker.memory_change", + tags={"namespace": activation.namespace, "taskname": activation.taskname}, + ), + sentry_sdk.isolation_scope(), + sentry_sdk.start_transaction(transaction, custom_sampling_context=sampling_context), + ): + transaction.set_data( + "taskworker-task", {"args": args, "kwargs": kwargs, "id": activation.id} + ) + task_added_time = activation.received_at.ToDatetime().timestamp() + # latency attribute needs to be in milliseconds + latency = (time.time() - task_added_time) * 1000 + + with sentry_sdk.start_span( + op=OP.QUEUE_PROCESS, + name=activation.taskname, + origin="taskworker", + ) as span: + span.set_data(SPANDATA.MESSAGING_DESTINATION_NAME, activation.namespace) + span.set_data(SPANDATA.MESSAGING_MESSAGE_ID, activation.id) + span.set_data(SPANDATA.MESSAGING_MESSAGE_RECEIVE_LATENCY, latency) + span.set_data( + SPANDATA.MESSAGING_MESSAGE_RETRY_COUNT, activation.retry_state.attempts + ) + span.set_data(SPANDATA.MESSAGING_SYSTEM, "taskworker") + + # TODO(taskworker) remove this when doing cleanup + # The `__start_time` parameter is spliced into task parameters by + # sentry.celery.SentryTask._add_metadata and needs to be removed + # from kwargs like sentry.tasks.base.instrumented_task does. + if "__start_time" in kwargs: + kwargs.pop("__start_time") + + try: + task_func(*args, **kwargs) + transaction.set_status(SPANSTATUS.OK) + except Exception: + transaction.set_status(SPANSTATUS.INTERNAL_ERROR) + raise + + def record_task_execution( + activation: TaskActivation, + status: TaskActivationStatus.ValueType, + start_time: float, + completion_time: float, + processing_pool_name: str, + taskbroker_host: str, + ) -> None: + task_added_time = activation.received_at.ToDatetime().timestamp() + execution_duration = completion_time - start_time + execution_latency = completion_time - task_added_time + + logger.debug( + "taskworker.task_execution", + extra={ + "taskname": activation.taskname, + "execution_duration": execution_duration, + "execution_latency": execution_latency, + "status": status_name(status), + }, + ) + metrics.incr( + "taskworker.worker.execute_task", + tags={ + "namespace": activation.namespace, + "taskname": activation.taskname, + "status": status_name(status), + "processing_pool": processing_pool_name, + "taskbroker_host": taskbroker_host, + }, + ) + metrics.distribution( + "taskworker.worker.execution_duration", + execution_duration, + tags={ + "namespace": activation.namespace, + "taskname": activation.taskname, + "processing_pool": processing_pool_name, + "taskbroker_host": taskbroker_host, + }, + ) + metrics.distribution( + "taskworker.worker.execution_latency", + execution_latency, + tags={ + "namespace": activation.namespace, + "taskname": activation.taskname, + "processing_pool": processing_pool_name, + "taskbroker_host": taskbroker_host, + }, + ) + + namespace = taskregistry.get(activation.namespace) + metrics.incr( + "taskworker.cogs.usage", + value=int(execution_duration * 1000), + tags={"feature": namespace.app_feature}, + ) + + if ( + "sentry-monitor-check-in-id" in activation.headers + and "sentry-monitor-slug" in activation.headers + ): + monitor_status = MonitorStatus.ERROR + if status == TASK_ACTIVATION_STATUS_COMPLETE: + monitor_status = MonitorStatus.OK + + capture_checkin( + monitor_slug=activation.headers["sentry-monitor-slug"], + check_in_id=activation.headers["sentry-monitor-check-in-id"], + duration=execution_duration, + status=monitor_status, + ) + + # Run the worker loop + run_worker( + child_tasks, + processed_tasks, + shutdown_event, + max_task_count, + processing_pool_name, + process_type, + ) diff --git a/clients/python/tests/__init__.py b/clients/python/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/clients/python/tests/conftest.py b/clients/python/tests/conftest.py new file mode 100644 index 00000000..e53088b7 --- /dev/null +++ b/clients/python/tests/conftest.py @@ -0,0 +1,19 @@ +from datetime import UTC, datetime + +import time_machine +from arroyo.backends.kafka import KafkaProducer + + +def producer_factory(topic: str) -> KafkaProducer: + config = { + "bootstrap.servers": "127.0.0.1:9092", + "compression.type": "lz4", + "message.max.bytes": 50000000, # 50MB + } + return KafkaProducer(config) + + +def freeze_time(t: str | datetime | None = None) -> time_machine.travel: + if t is None: + t = datetime.now(UTC) + return time_machine.travel(t, tick=False) diff --git a/clients/python/tests/scheduler/__init__.py b/clients/python/tests/scheduler/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/clients/python/tests/scheduler/test_runner.py b/clients/python/tests/scheduler/test_runner.py new file mode 100644 index 00000000..10480be5 --- /dev/null +++ b/clients/python/tests/scheduler/test_runner.py @@ -0,0 +1,416 @@ +from datetime import UTC, datetime, timedelta +from unittest.mock import Mock, patch + +import pytest +from redis import StrictRedis + +from taskbroker_client.app import TaskbrokerApp +from taskbroker_client.metrics import NoOpMetricsBackend +from taskbroker_client.scheduler.config import crontab +from taskbroker_client.scheduler.runner import RunStorage, ScheduleRunner + +from ..conftest import freeze_time, producer_factory + + +@pytest.fixture +def task_app() -> TaskbrokerApp: + app = TaskbrokerApp(producer_factory=producer_factory) + namespace = app.taskregistry.create_namespace("test") + + @namespace.register(name="valid") + def test_func() -> None: + pass + + @namespace.register(name="second") + def second_func() -> None: + pass + + return app + + +@pytest.fixture +def run_storage() -> RunStorage: + # TODO use env vars for redis port. + redis = StrictRedis(host="localhost", port=6379, decode_responses=True) + redis.flushdb() + return RunStorage(metrics=NoOpMetricsBackend(), redis=redis) + + +def test_runstorage_zero_duration(run_storage: RunStorage) -> None: + with freeze_time("2025-07-19 14:25:00"): + now = datetime.now(tz=UTC) + result = run_storage.set("test:do_stuff", now) + assert result is True + + +def test_runstorage_double_set(run_storage: RunStorage) -> None: + with freeze_time("2025-07-19 14:25:00"): + now = datetime.now(tz=UTC) + first = run_storage.set("test:do_stuff", now) + second = run_storage.set("test:do_stuff", now) + + assert first is True, "initial set should return true" + assert second is False, "writing a key that exists should fail" + + +def test_schedulerunner_add_invalid(task_app: TaskbrokerApp) -> None: + run_storage = Mock(spec=RunStorage) + schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) + + with pytest.raises(ValueError) as err: + schedule_set.add( + "invalid", + { + "task": "invalid", + "schedule": timedelta(minutes=5), + }, + ) + assert "Invalid task name" in str(err) + + with pytest.raises(KeyError) as key_err: + schedule_set.add( + "invalid", + { + "task": "test:invalid", + "schedule": timedelta(minutes=5), + }, + ) + assert "No task registered" in str(key_err) + + with pytest.raises(ValueError) as err: + schedule_set.add( + "valid", + { + "task": "test:valid", + "schedule": timedelta(microseconds=99), + }, + ) + assert "microseconds" in str(err) + + +def test_schedulerunner_tick_no_tasks(task_app: TaskbrokerApp, run_storage: RunStorage) -> None: + schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) + + with freeze_time("2025-01-24 14:25:00 UTC"): + sleep_time = schedule_set.tick() + assert sleep_time == 60 + + +def test_schedulerunner_tick_one_task_time_remaining( + task_app: TaskbrokerApp, run_storage: RunStorage +) -> None: + schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) + + schedule_set.add( + "valid", + { + "task": "test:valid", + "schedule": timedelta(minutes=5), + }, + ) + # Last run was two minutes ago. + with freeze_time("2025-01-24 14:23:00 UTC"): + run_storage.set("test:valid", datetime(2025, 1, 24, 14, 28, 0, tzinfo=UTC)) + + namespace = task_app.taskregistry.get("test") + with freeze_time("2025-01-24 14:25:00 UTC"), patch.object(namespace, "send_task") as mock_send: + sleep_time = schedule_set.tick() + assert sleep_time == 180 + assert mock_send.call_count == 0 + + last_run = run_storage.read("test:valid") + assert last_run == datetime(2025, 1, 24, 14, 23, 0, tzinfo=UTC) + + +def test_schedulerunner_tick_one_task_spawned( + task_app: TaskbrokerApp, run_storage: RunStorage +) -> None: + run_storage = Mock(spec=RunStorage) + schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) + schedule_set.add( + "valid", + { + "task": "test:valid", + "schedule": timedelta(minutes=5), + }, + ) + + # Last run was 5 minutes from the freeze_time below + run_storage.read_many.return_value = { + "test:valid": datetime(2025, 1, 24, 14, 19, 55, tzinfo=UTC), + } + run_storage.set.return_value = True + + namespace = task_app.taskregistry.get("test") + with freeze_time("2025-01-24 14:25:00 UTC"), patch.object(namespace, "send_task") as mock_send: + sleep_time = schedule_set.tick() + assert sleep_time == 300 + assert mock_send.call_count == 1 + + # scheduled tasks should not continue the scheduler trace + send_args = mock_send.call_args + assert send_args.args[0].headers["sentry-propagate-traces"] == "False" + assert "sentry-trace" not in send_args.args[0].headers + + assert run_storage.set.call_count == 1 + # set() is called with the correct next_run time + run_storage.set.assert_called_with("test:valid", datetime(2025, 1, 24, 14, 30, 0, tzinfo=UTC)) + + +@patch("taskbroker_client.scheduler.runner.capture_checkin") +def test_schedulerunner_tick_create_checkin( + mock_capture_checkin: Mock, task_app: TaskbrokerApp, run_storage: RunStorage +) -> None: + run_storage = Mock(spec=RunStorage) + schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) + schedule_set.add( + "important-task", + { + "task": "test:valid", + "schedule": timedelta(minutes=5), + }, + ) + + # Last run was 5 minutes from the freeze_time below + run_storage.read_many.return_value = { + "test:valid": datetime(2025, 1, 24, 14, 19, 55, tzinfo=UTC), + } + run_storage.set.return_value = True + mock_capture_checkin.return_value = "checkin-id" + + namespace = task_app.taskregistry.get("test") + with ( + freeze_time("2025-01-24 14:25:00 UTC"), + patch.object(namespace, "send_task") as mock_send, + ): + sleep_time = schedule_set.tick() + assert sleep_time == 300 + + assert mock_send.call_count == 1 + + # assert that the activation had the correct headers + send_args = mock_send.call_args + assert "sentry-monitor-check-in-id" in send_args.args[0].headers + assert send_args.args[0].headers["sentry-monitor-slug"] == "important-task" + assert send_args.args[0].headers["sentry-propagate-traces"] == "False" + assert "sentry-trace" not in send_args.args[0].headers + + # Ensure a checkin was created + assert mock_capture_checkin.call_count == 1 + mock_capture_checkin.assert_called_with( + monitor_slug="important-task", + monitor_config={ + "schedule": { + "type": "interval", + "unit": "minute", + "value": 5, + }, + "timezone": "UTC", + }, + status="in_progress", + ) + + +def test_schedulerunner_tick_key_exists_no_spawn( + task_app: TaskbrokerApp, run_storage: RunStorage +) -> None: + schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) + schedule_set.add( + "valid", + { + "task": "test:valid", + "schedule": timedelta(minutes=5), + }, + ) + + namespace = task_app.taskregistry.get("test") + with patch.object(namespace, "send_task") as mock_send, freeze_time("2025-01-24 14:25:00 UTC"): + # Run tick() to initialize state in the scheduler. This will write a key to run_storage. + sleep_time = schedule_set.tick() + assert sleep_time == 300 + assert mock_send.call_count == 1 + + with freeze_time("2025-01-24 14:30:00 UTC"): + # Set a key into run_storage to simulate another scheduler running + run_storage.delete("test:valid") + assert run_storage.set("test:valid", datetime.now(tz=UTC) + timedelta(minutes=2)) + + # Our scheduler would wakeup and tick again. + # The key exists in run_storage so we should not spawn a task. + # last_run time should synchronize with run_storage state, and count down from 14:30 + with freeze_time("2025-01-24 14:30:02 UTC"): + sleep_time = schedule_set.tick() + assert sleep_time == 298 + assert mock_send.call_count == 1 + + +def test_schedulerunner_tick_one_task_multiple_ticks( + task_app: TaskbrokerApp, run_storage: RunStorage +) -> None: + schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) + schedule_set.add( + "valid", + { + "task": "test:valid", + "schedule": timedelta(minutes=5), + }, + ) + + with freeze_time("2025-01-24 14:25:00 UTC"): + sleep_time = schedule_set.tick() + assert sleep_time == 300 + + with freeze_time("2025-01-24 14:26:00 UTC"): + sleep_time = schedule_set.tick() + assert sleep_time == 240 + + with freeze_time("2025-01-24 14:28:00 UTC"): + sleep_time = schedule_set.tick() + assert sleep_time == 120 + + +def test_schedulerunner_tick_one_task_multiple_ticks_crontab( + task_app: TaskbrokerApp, run_storage: RunStorage +) -> None: + schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) + schedule_set.add( + "valid", + { + "task": "test:valid", + "schedule": crontab(minute="*/2"), + }, + ) + + namespace = task_app.taskregistry.get("test") + with patch.object(namespace, "send_task") as mock_send: + with freeze_time("2025-01-24 14:24:00 UTC"): + sleep_time = schedule_set.tick() + assert sleep_time == 120 + assert mock_send.call_count == 1 + + with freeze_time("2025-01-24 14:25:00 UTC"): + sleep_time = schedule_set.tick() + assert sleep_time == 60 + + # Remove key to simulate expiration + run_storage.delete("test:valid") + with freeze_time("2025-01-24 14:26:00 UTC"): + sleep_time = schedule_set.tick() + assert sleep_time == 120 + assert mock_send.call_count == 2 + + +def test_schedulerunner_tick_multiple_tasks( + task_app: TaskbrokerApp, run_storage: RunStorage +) -> None: + schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) + schedule_set.add( + "valid", + { + "task": "test:valid", + "schedule": timedelta(minutes=5), + }, + ) + schedule_set.add( + "second", + { + "task": "test:second", + "schedule": timedelta(minutes=2), + }, + ) + + namespace = task_app.taskregistry.get("test") + with patch.object(namespace, "send_task") as mock_send: + with freeze_time("2025-01-24 14:25:00 UTC"): + sleep_time = schedule_set.tick() + assert sleep_time == 120 + + assert mock_send.call_count == 2 + + with freeze_time("2025-01-24 14:26:00 UTC"): + sleep_time = schedule_set.tick() + assert sleep_time == 60 + + assert mock_send.call_count == 2 + + # Remove the redis key, as the ttl in redis doesn't respect freeze_time() + run_storage.delete("test:second") + with freeze_time("2025-01-24 14:27:01 UTC"): + sleep_time = schedule_set.tick() + # two minutes left on the 5 min task + assert sleep_time == 120 + + assert mock_send.call_count == 3 + + +def test_schedulerunner_tick_fast_and_slow( + task_app: TaskbrokerApp, run_storage: RunStorage +) -> None: + schedule_set = ScheduleRunner(app=task_app, run_storage=run_storage) + schedule_set.add( + "valid", + { + "task": "test:valid", + "schedule": timedelta(seconds=30), + }, + ) + schedule_set.add( + "second", + { + "task": "test:second", + "schedule": crontab(minute="*/2"), + }, + ) + + namespace = task_app.taskregistry.get("test") + with patch.object(namespace, "send_task") as mock_send: + with freeze_time("2025-01-24 14:25:00 UTC"): + sleep_time = schedule_set.tick() + assert sleep_time == 30 + + called = extract_sent_tasks(mock_send) + assert called == ["valid"] + + run_storage.delete("test:valid") + with freeze_time("2025-01-24 14:25:30 UTC"): + sleep_time = schedule_set.tick() + assert sleep_time == 30 + + called = extract_sent_tasks(mock_send) + assert called == ["valid", "valid"] + + run_storage.delete("test:valid") + with freeze_time("2025-01-24 14:26:00 UTC"): + sleep_time = schedule_set.tick() + assert sleep_time == 30 + + called = extract_sent_tasks(mock_send) + assert called == ["valid", "valid", "second", "valid"] + + run_storage.delete("test:valid") + with freeze_time("2025-01-24 14:26:30 UTC"): + sleep_time = schedule_set.tick() + assert sleep_time == 30 + + called = extract_sent_tasks(mock_send) + assert called == ["valid", "valid", "second", "valid", "valid"] + + run_storage.delete("test:valid") + with freeze_time("2025-01-24 14:27:00 UTC"): + sleep_time = schedule_set.tick() + assert sleep_time == 30 + + assert run_storage.read("test:valid") + called = extract_sent_tasks(mock_send) + assert called == [ + "valid", + "valid", + "second", + "valid", + "valid", + "valid", + ] + + +def extract_sent_tasks(mock: Mock) -> list[str]: + return [call[0][0].taskname for call in mock.call_args_list] diff --git a/clients/python/tests/scheduler/test_schedules.py b/clients/python/tests/scheduler/test_schedules.py new file mode 100644 index 00000000..a83428d7 --- /dev/null +++ b/clients/python/tests/scheduler/test_schedules.py @@ -0,0 +1,199 @@ +from datetime import UTC, datetime, timedelta + +import pytest +from taskbroker_client.scheduler.config import crontab +from taskbroker_client.scheduler.schedules import CrontabSchedule, TimedeltaSchedule +from ..conftest import freeze_time + + +def test_timedeltaschedule_invalid() -> None: + with pytest.raises(ValueError): + TimedeltaSchedule(timedelta(microseconds=5)) + + with pytest.raises(ValueError): + TimedeltaSchedule(timedelta(seconds=-1)) + + +@freeze_time("2025-01-24 14:25:00") +def test_timedeltaschedule_is_due() -> None: + now = datetime.now(tz=UTC) + schedule = TimedeltaSchedule(timedelta(minutes=5)) + + assert not schedule.is_due(now) + + four_min_ago = now - timedelta(minutes=4, seconds=59) + assert not schedule.is_due(four_min_ago) + + five_min_ago = now - timedelta(minutes=5) + assert schedule.is_due(five_min_ago) + + six_min_ago = now - timedelta(minutes=6) + assert schedule.is_due(six_min_ago) + + +def test_timedeltaschedule_monitor_interval() -> None: + schedule = TimedeltaSchedule(timedelta(seconds=10)) + assert schedule.monitor_interval() == (10, "second") + + schedule = TimedeltaSchedule(timedelta(minutes=5)) + assert schedule.monitor_interval() == (5, "minute") + + schedule = TimedeltaSchedule(timedelta(minutes=5, seconds=10)) + assert schedule.monitor_interval() == (5, "minute") + + schedule = TimedeltaSchedule(timedelta(hours=1)) + assert schedule.monitor_interval() == (1, "hour") + + +@freeze_time("2025-01-24 14:25:00") +def test_timedeltaschedule_remaining_seconds() -> None: + now = datetime.now(tz=UTC) + delta = timedelta(minutes=5) + schedule = TimedeltaSchedule(delta) + + assert schedule.remaining_seconds(None) == 0 + assert schedule.remaining_seconds(now) == 300 + + four_min_ago = now - timedelta(minutes=4, seconds=59) + assert schedule.remaining_seconds(four_min_ago) == 1 + + five_min_ago = now - timedelta(minutes=5) + assert schedule.remaining_seconds(five_min_ago) == 0 + + ten_min_ago = now - timedelta(minutes=10) + assert schedule.remaining_seconds(ten_min_ago) == 0 + + +def test_crontabschedule_invalid() -> None: + with pytest.raises(ValueError): + CrontabSchedule("test", crontab(hour="99")) + + with pytest.raises(ValueError): + CrontabSchedule("test", crontab(hour="25")) + + with pytest.raises(ValueError): + CrontabSchedule("test", crontab(day_of_week="25")) + + +def test_crontabschedule_is_due() -> None: + schedule = CrontabSchedule("test", crontab(minute="*/5")) + + # no last_run and not time to spawn + with freeze_time("2025-01-24 14:23:00"): + now = datetime.now(tz=UTC) + assert not schedule.is_due(None) + assert not schedule.is_due(now) + + with freeze_time("2025-01-24 14:25:00"): + now = datetime.now(tz=UTC) + assert schedule.is_due(None) + assert not schedule.is_due(now) + + # last run was 14:20, current time is 14:22 = not due + with freeze_time("2025-01-24 14:22:00"): + two_twenty = datetime.now(tz=UTC) - timedelta(minutes=2) + assert not schedule.is_due(two_twenty) + + # last run was 14:20, current time is 14:25 = due + with freeze_time("2025-01-24 14:25:00"): + two_twenty = datetime.now(tz=UTC) - timedelta(minutes=5) + assert schedule.is_due(two_twenty) + + # last run was 14:15, current time is 14:25 = due as we missed an interval + with freeze_time("2025-01-24 14:25:00"): + two_fifteen = datetime.now(tz=UTC) - timedelta(minutes=10) + assert schedule.is_due(two_fifteen) + + # last run was 14:26 (the future) current time is 14:25 = not due + with freeze_time("2025-01-24 14:25:00"): + future = datetime.now(tz=UTC) + timedelta(minutes=1) + assert not schedule.is_due(future) + + +def test_crontabschedule_remaining_seconds() -> None: + schedule = CrontabSchedule("test", crontab(minute="*/5")) + + # no last_run, but due in one minute + with freeze_time("2025-01-24 14:24:00"): + assert not schedule.is_due(None) + assert schedule.remaining_seconds(None) == 60 + + # no last_run, but due now + with freeze_time("2025-01-24 14:25:00"): + assert schedule.is_due(None) + assert schedule.remaining_seconds(None) == 0 + + # last run was late (14:21), next spawn is at 14:25 + with freeze_time("2025-01-24 14:25:00"): + four_min_ago = datetime.now(tz=UTC) - timedelta(minutes=4) + assert schedule.remaining_seconds(four_min_ago) == 0 + + # last run was 5 min ago, right on schedule + with freeze_time("2025-01-24 14:25:00"): + five_min_ago = datetime.now(tz=UTC) - timedelta(minutes=5) + assert schedule.remaining_seconds(five_min_ago) == 0 + + # last run was mere seconds ago. 5 min remaining + with freeze_time("2025-01-24 14:25:10"): + five_min_ago = datetime.now(tz=UTC) + assert schedule.remaining_seconds(five_min_ago) == 300 + + # Later in the minute. crontabs only have minute precision. + with freeze_time("2025-01-24 14:25:59"): + five_min_ago = datetime.now(tz=UTC) + assert schedule.remaining_seconds(five_min_ago) == 300 + + # It isn't time yet, as we're mid interval + with freeze_time("2025-01-24 14:23:10"): + three_min_ago = datetime.now(tz=UTC) - timedelta(minutes=3) + assert schedule.remaining_seconds(three_min_ago) == 120 + + # 14:19 was 1 min late, we missed a beat but we're currently on time. + with freeze_time("2025-01-24 14:25:10"): + six_min_ago = datetime.now(tz=UTC) - timedelta(minutes=6) + assert schedule.remaining_seconds(six_min_ago) == 0 + + # We have missed a few intervals, try to get back on schedule for the next beat + with freeze_time("2025-01-24 14:23:00"): + twenty_two_min_ago = datetime.now(tz=UTC) - timedelta(minutes=22) + assert schedule.remaining_seconds(twenty_two_min_ago) == 120 + + # We have encountered a value from the future. + # Our clock could be wrong, or we competing with another scheduler. + # Advance to the next tick 14:30. + with freeze_time("2025-01-24 14:24:00"): + future_two = datetime.now(tz=UTC) + timedelta(minutes=2) + assert schedule.remaining_seconds(future_two) == 360 + + +@freeze_time(datetime(2025, 1, 24, 14, 25, 0, tzinfo=UTC)) +def test_crontabschedule_runtime_after() -> None: + schedule = CrontabSchedule("test", crontab(minute="*/15")) + + now = datetime.now(tz=UTC) + assert schedule.runtime_after(now) == datetime(2025, 1, 24, 14, 30, 0, tzinfo=UTC) + + last_run = datetime(2025, 1, 24, 14, 29, 15, tzinfo=UTC) + assert schedule.runtime_after(last_run) == datetime(2025, 1, 24, 14, 30, 0, tzinfo=UTC) + + last_run = datetime(2025, 1, 24, 14, 38, 23, tzinfo=UTC) + assert schedule.runtime_after(last_run) == datetime(2025, 1, 24, 14, 45, 0, tzinfo=UTC) + + schedule = CrontabSchedule("test", crontab(minute="1", hour="*/6")) + last_run = datetime(2025, 1, 24, 14, 29, 15, tzinfo=UTC) + assert schedule.runtime_after(last_run) == datetime(2025, 1, 24, 18, 1, 0, tzinfo=UTC) + + schedule = CrontabSchedule("test", crontab(minute="*/1")) + now = datetime.now(tz=UTC) + assert schedule.runtime_after(now) == datetime(2025, 1, 24, 14, 26, 0, tzinfo=UTC) + + +def test_crontabschedule_monitor_value() -> None: + schedule = CrontabSchedule("test", crontab(minute="*/5")) + assert schedule.monitor_value() == "*/5 * * * *" + + schedule = CrontabSchedule("test", crontab(minute="*/10", hour="*/2")) + assert schedule.monitor_value() == "*/10 */2 * * *" + + schedule = CrontabSchedule("test", crontab(minute="*/10", day_of_week="1")) + assert schedule.monitor_value() == "*/10 * * * 1" diff --git a/clients/python/tests/test_app.py b/clients/python/tests/test_app.py new file mode 100644 index 00000000..41a66936 --- /dev/null +++ b/clients/python/tests/test_app.py @@ -0,0 +1,49 @@ +from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation + +from examples.store import StubAtMostOnce +from taskbroker_client.app import TaskbrokerApp +from taskbroker_client.router import TaskRouter + +from .conftest import producer_factory + + +class StubRouter(TaskRouter): + def route_namespace(self, name: str) -> str: + return "honk" + + +def test_taskregistry_router_object() -> None: + app = TaskbrokerApp(producer_factory=producer_factory, router_class=StubRouter()) + ns = app.taskregistry.create_namespace("test") + assert ns.topic == "honk" + + +def test_taskregistry_router_str() -> None: + app = TaskbrokerApp( + producer_factory=producer_factory, + router_class="taskbroker_client.router.DefaultRouter", + ) + ns = app.taskregistry.create_namespace("test") + assert ns.topic == "taskbroker" + + +def test_set_config() -> None: + app = TaskbrokerApp(producer_factory=producer_factory) + app.set_config({"rpc_secret": "testing", "ignored": "key"}) + assert app.config["rpc_secret"] == "testing" + assert "ignored" not in app.config + + +def test_should_attempt_at_most_once() -> None: + activation = TaskActivation( + id="111", + taskname="examples.simple_task", + namespace="examples", + parameters='{"args": [], "kwargs": {}}', + processing_deadline_duration=2, + ) + at_most = StubAtMostOnce() + app = TaskbrokerApp(producer_factory=producer_factory) + app.at_most_once_store(at_most) + assert app.should_attempt_at_most_once(activation) + assert not app.should_attempt_at_most_once(activation) diff --git a/clients/python/tests/test_registry.py b/clients/python/tests/test_registry.py new file mode 100644 index 00000000..0b6c1d8b --- /dev/null +++ b/clients/python/tests/test_registry.py @@ -0,0 +1,382 @@ +import base64 +from concurrent.futures import Future +from unittest.mock import Mock + +import orjson +import pytest +import zstandard as zstd +from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( + ON_ATTEMPTS_EXCEEDED_DEADLETTER, + ON_ATTEMPTS_EXCEEDED_DISCARD, +) + +# from django.test.utils import override_settings +# from sentry.conf.types.kafka_definition import Topic +from taskbroker_client.constants import MAX_PARAMETER_BYTES_BEFORE_COMPRESSION, CompressionType +from taskbroker_client.metrics import NoOpMetricsBackend +from taskbroker_client.registry import TaskNamespace, TaskRegistry +from taskbroker_client.retry import LastAction, Retry +from taskbroker_client.router import DefaultRouter +from taskbroker_client.task import Task + +from .conftest import producer_factory + + +def test_namespace_register_task() -> None: + namespace = TaskNamespace( + name="tests", + producer_factory=producer_factory, + router=DefaultRouter(), + metrics=NoOpMetricsBackend(), + retry=None, + ) + + @namespace.register(name="tests.simple_task") + def simple_task() -> None: + raise NotImplementedError + + assert namespace.default_retry is None + assert namespace.contains("tests.simple_task") + assert not namespace.contains("nope") + + task = namespace.get("tests.simple_task") + assert task + assert task.name == "tests.simple_task" + + +def test_namespace_register_inherits_default_retry() -> None: + namespace = TaskNamespace( + name="tests", + producer_factory=producer_factory, + router=DefaultRouter(), + metrics=NoOpMetricsBackend(), + retry=Retry(times=5, on=(RuntimeError,)), + ) + + @namespace.register(name="test.no_retry_param") + def no_retry_param() -> None: + raise NotImplementedError + + retry = Retry(times=2, times_exceeded=LastAction.Deadletter) + + @namespace.register(name="test.with_retry_param", retry=retry) + def with_retry_param() -> None: + raise NotImplementedError + + with_retry = namespace.get("test.with_retry_param") + assert with_retry.retry == retry + + @namespace.register(name="test.retry_none", retry=None) + def retry_none_param() -> None: + raise NotImplementedError + + with_retry = namespace.get("test.retry_none") + assert with_retry.retry == namespace.default_retry + + +def test_register_inherits_default_expires_processing_deadline() -> None: + namespace = TaskNamespace( + name="tests", + producer_factory=producer_factory, + router=DefaultRouter(), + metrics=NoOpMetricsBackend(), + retry=None, + expires=10 * 60, + processing_deadline_duration=5, + ) + + @namespace.register(name="test.no_expires") + def no_expires() -> None: + raise NotImplementedError + + @namespace.register(name="test.with_expires", expires=30 * 60, processing_deadline_duration=10) + def with_expires() -> None: + raise NotImplementedError + + no_expires_task = namespace.get("test.no_expires") + activation = no_expires_task.create_activation([], {}) + assert activation.expires == 10 * 60 + assert activation.processing_deadline_duration == 5 + + with_expires_task = namespace.get("test.with_expires") + activation = with_expires_task.create_activation([], {}) + assert activation.expires == 30 * 60 + assert activation.processing_deadline_duration == 10 + + +def test_namespace_get_unknown() -> None: + namespace = TaskNamespace( + name="tests", + producer_factory=producer_factory, + router=DefaultRouter(), + metrics=NoOpMetricsBackend(), + retry=None, + ) + + with pytest.raises(KeyError) as err: + namespace.get("nope") + assert "No task registered" in str(err) + + +def test_namespace_send_task_no_retry() -> None: + namespace = TaskNamespace( + name="tests", + producer_factory=producer_factory, + router=DefaultRouter(), + metrics=NoOpMetricsBackend(), + retry=None, + ) + + @namespace.register(name="test.simpletask") + def simple_task() -> None: + raise NotImplementedError + + activation = simple_task.create_activation([], {}) + assert activation.retry_state.attempts == 0 + assert activation.retry_state.max_attempts == 1 + assert activation.retry_state.on_attempts_exceeded == ON_ATTEMPTS_EXCEEDED_DISCARD + + mock_producer = Mock() + namespace._producers["taskbroker"] = mock_producer + + namespace.send_task(activation) + assert mock_producer.produce.call_count == 1 + + mock_call = mock_producer.produce.call_args + assert mock_call[0][0].name == "taskbroker" + + proto_message = mock_call[0][1].value + assert proto_message == activation.SerializeToString() + + +def test_namespace_send_task_with_compression() -> None: + namespace = TaskNamespace( + name="tests", + producer_factory=producer_factory, + router=DefaultRouter(), + metrics=NoOpMetricsBackend(), + retry=None, + ) + + @namespace.register(name="test.compression_task", compression_type=CompressionType.ZSTD) + def simple_task_with_compression(param: str) -> None: + raise NotImplementedError + + activation = simple_task_with_compression.create_activation( + args=["test_arg"], kwargs={"test_key": "test_value"} + ) + + assert activation.headers.get("compression-type") == CompressionType.ZSTD.value + + expected_params = {"args": ["test_arg"], "kwargs": {"test_key": "test_value"}} + + decoded_data = base64.b64decode(activation.parameters.encode("utf-8")) + decompressed_data = zstd.decompress(decoded_data) + actual_params = orjson.loads(decompressed_data) + + assert actual_params == expected_params + + +def test_namespace_send_task_with_auto_compression() -> None: + namespace = TaskNamespace( + name="tests", + producer_factory=producer_factory, + router=DefaultRouter(), + metrics=NoOpMetricsBackend(), + retry=None, + ) + + @namespace.register(name="test.compression_task") + def simple_task_with_compression(param: str) -> None: + raise NotImplementedError + + big_args = ["x" * (MAX_PARAMETER_BYTES_BEFORE_COMPRESSION + 1)] + activation = simple_task_with_compression.create_activation( + args=big_args, kwargs={"test_key": "test_value"} + ) + + assert activation.headers.get("compression-type") == CompressionType.ZSTD.value + + expected_params = {"args": big_args, "kwargs": {"test_key": "test_value"}} + + decoded_data = base64.b64decode(activation.parameters.encode("utf-8")) + decompressed_data = zstd.decompress(decoded_data) + actual_params = orjson.loads(decompressed_data) + + assert actual_params == expected_params + + +def test_namespace_send_task_with_retry() -> None: + namespace = TaskNamespace( + name="tests", + producer_factory=producer_factory, + router=DefaultRouter(), + metrics=NoOpMetricsBackend(), + retry=None, + ) + + @namespace.register( + name="test.simpletask", retry=Retry(times=3, times_exceeded=LastAction.Deadletter) + ) + def simple_task() -> None: + raise NotImplementedError + + activation = simple_task.create_activation([], {}) + assert activation.retry_state.attempts == 0 + assert activation.retry_state.max_attempts == 3 + assert activation.retry_state.on_attempts_exceeded == ON_ATTEMPTS_EXCEEDED_DEADLETTER + + mock_producer = Mock() + namespace._producers["taskbroker"] = mock_producer + + namespace.send_task(activation) + assert mock_producer.produce.call_count == 1 + + mock_call = mock_producer.produce.call_args + proto_message = mock_call[0][1].value + assert proto_message == activation.SerializeToString() + + +def test_namespace_with_retry_send_task() -> None: + namespace = TaskNamespace( + name="tests", + producer_factory=producer_factory, + router=DefaultRouter(), + metrics=NoOpMetricsBackend(), + retry=Retry(times=3), + ) + + @namespace.register(name="test.simpletask") + def simple_task() -> None: + raise NotImplementedError + + activation = simple_task.create_activation([], {}) + assert activation.retry_state.attempts == 0 + assert activation.retry_state.max_attempts == 3 + assert activation.retry_state.on_attempts_exceeded == ON_ATTEMPTS_EXCEEDED_DISCARD + + mock_producer = Mock() + namespace._producers["taskbroker"] = mock_producer + + namespace.send_task(activation) + assert mock_producer.produce.call_count == 1 + + mock_call = mock_producer.produce.call_args + assert mock_call[0][0].name == "taskbroker" + + proto_message = mock_call[0][1].value + assert proto_message == activation.SerializeToString() + + +def test_namespace_with_wait_for_delivery_send_task() -> None: + namespace = TaskNamespace( + name="tests", + producer_factory=producer_factory, + router=DefaultRouter(), + metrics=NoOpMetricsBackend(), + retry=Retry(times=3), + ) + + @namespace.register(name="test.simpletask", wait_for_delivery=True) + def simple_task() -> None: + raise NotImplementedError + + activation = simple_task.create_activation([], {}) + + mock_producer = Mock() + namespace._producers["taskbroker"] = mock_producer + + ret_value: Future[None] = Future() + ret_value.set_result(None) + mock_producer.produce.return_value = ret_value + namespace.send_task(activation, wait_for_delivery=True) + assert mock_producer.produce.call_count == 1 + + mock_call = mock_producer.produce.call_args + assert mock_call[0][0].name == "taskbroker" + + proto_message = mock_call[0][1].value + assert proto_message == activation.SerializeToString() + + +def test_registry_get() -> None: + registry = TaskRegistry( + producer_factory=producer_factory, + router=DefaultRouter(), + metrics=NoOpMetricsBackend(), + ) + ns = registry.create_namespace(name="tests") + + assert isinstance(ns, TaskNamespace) + assert ns.name == "tests" + assert ns.router + assert ns == registry.get("tests") + + with pytest.raises(KeyError): + registry.get("derp") + + assert registry.contains("derp") is False + assert registry.contains("tests") + + +def test_registry_get_task() -> None: + registry = TaskRegistry( + producer_factory=producer_factory, + router=DefaultRouter(), + metrics=NoOpMetricsBackend(), + ) + ns = registry.create_namespace(name="tests") + + @ns.register(name="test.simpletask") + def simple_task() -> None: + raise NotImplementedError + + task = registry.get_task(ns.name, "test.simpletask") + assert isinstance(task, Task) + + with pytest.raises(KeyError): + registry.get_task("nope", "test.simpletask") + + with pytest.raises(KeyError): + registry.get_task(ns.name, "nope") + + +def test_registry_create_namespace_simple() -> None: + registry = TaskRegistry( + producer_factory=producer_factory, + router=DefaultRouter(), + metrics=NoOpMetricsBackend(), + ) + ns = registry.create_namespace(name="tests") + assert ns.default_retry is None + assert ns.default_expires is None + assert ns.default_processing_deadline_duration == 10 + assert ns.name == "tests" + assert ns.topic == "taskbroker" + assert ns.app_feature == "tests" + + retry = Retry(times=3) + ns = registry.create_namespace( + "test-two", + retry=retry, + expires=60 * 10, + processing_deadline_duration=60, + app_feature="anvils", + ) + assert ns.default_retry == retry + assert ns.default_processing_deadline_duration == 60 + assert ns.default_expires == 60 * 10 + assert ns.name == "test-two" + assert ns.topic == "taskbroker" + assert ns.app_feature == "anvils" + + +def test_registry_create_namespace_duplicate() -> None: + registry = TaskRegistry( + producer_factory=producer_factory, + router=DefaultRouter(), + metrics=NoOpMetricsBackend(), + ) + registry.create_namespace(name="tests") + with pytest.raises(ValueError, match="tests already exists"): + registry.create_namespace(name="tests") diff --git a/clients/python/tests/test_retry.py b/clients/python/tests/test_retry.py new file mode 100644 index 00000000..624efa96 --- /dev/null +++ b/clients/python/tests/test_retry.py @@ -0,0 +1,130 @@ +from __future__ import annotations + +from multiprocessing.context import TimeoutError + +from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( + ON_ATTEMPTS_EXCEEDED_DEADLETTER, + ON_ATTEMPTS_EXCEEDED_DISCARD, +) + +from taskbroker_client.retry import LastAction, Retry, RetryTaskError + + +class RuntimeChildError(RuntimeError): + """Dummy exception for instanceof tests""" + + +def test_initial_state__defaults() -> None: + retry = Retry(times=2) + proto = retry.initial_state() + + assert proto.attempts == 0 + assert proto.max_attempts == 2 + assert proto.on_attempts_exceeded == ON_ATTEMPTS_EXCEEDED_DISCARD + + +def test_initial_state__discard() -> None: + retry = Retry(times=1, times_exceeded=LastAction.Discard) + proto = retry.initial_state() + + assert proto.attempts == 0 + assert proto.max_attempts == 1 + assert proto.on_attempts_exceeded == ON_ATTEMPTS_EXCEEDED_DISCARD + + +def test_initial_state__deadletter() -> None: + retry = Retry(times=5, times_exceeded=LastAction.Deadletter) + proto = retry.initial_state() + + assert proto.attempts == 0 + assert proto.max_attempts == 5 + assert proto.on_attempts_exceeded == ON_ATTEMPTS_EXCEEDED_DEADLETTER + + +def test_initial_state__delay_on_retry() -> None: + retry = Retry(times=5, delay=1) + proto = retry.initial_state() + + assert proto.attempts == 0 + assert proto.delay_on_retry == 1 + + +def test_should_retry_no_matching_error() -> None: + retry = Retry(times=5) + state = retry.initial_state() + + err = Exception("something bad") + assert not retry.should_retry(state, err) + + state.attempts = 5 + assert not retry.should_retry(state, err) + + +def test_should_retry_retryerror() -> None: + retry = Retry(times=5) + state = retry.initial_state() + + err = RetryTaskError("something bad") + assert retry.should_retry(state, err) + + state.attempts = 4 + assert not retry.should_retry(state, err) + + +def test_should_retry_multiprocessing_timeout() -> None: + retry = Retry(times=3) + state = retry.initial_state() + + timeout = TimeoutError("timeouts should retry if there are attempts left") + assert retry.should_retry(state, timeout) + + state.attempts = 1 + assert retry.should_retry(state, timeout) + + # attempt = 2 is actually the third attempt. + state.attempts = 2 + assert not retry.should_retry(state, timeout) + + state.attempts = 3 + assert not retry.should_retry(state, timeout) + + +def test_should_retry_error_allow_list() -> None: + retry = Retry(times=3, on=(RuntimeError, KeyError)) + state = retry.initial_state() + + err = RuntimeError("should retry") + assert retry.should_retry(state, err) + + key_err = KeyError("should retry") + assert retry.should_retry(state, key_err) + + err_child = RuntimeChildError("subclasses are retried") + assert retry.should_retry(state, err_child) + + value_err = ValueError("no retry") + assert not retry.should_retry(state, value_err) + + +def test_max_attempts_reached() -> None: + retry = Retry(times=5) + state = retry.initial_state() + + assert not retry.max_attempts_reached(state) + + state.attempts = 4 + assert retry.max_attempts_reached(state) + + +def test_should_retry_allow_list_ignore_parent() -> None: + retry = Retry(times=3, on=(Exception,), ignore=(RuntimeError,)) + state = retry.initial_state() + + runtime_err = RuntimeError("no retry for ignored") + assert not retry.should_retry(state, runtime_err) + + runtime_child = RuntimeChildError("no retry for subclasses of ignored") + assert not retry.should_retry(state, runtime_child) + + val_err = ValueError("other exceptions are retried") + assert retry.should_retry(state, val_err) diff --git a/clients/python/tests/test_task.py b/clients/python/tests/test_task.py new file mode 100644 index 00000000..1e0771eb --- /dev/null +++ b/clients/python/tests/test_task.py @@ -0,0 +1,370 @@ +import datetime +from typing import Any +from unittest.mock import patch + +import orjson +import pytest +import sentry_sdk +from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( + ON_ATTEMPTS_EXCEEDED_DEADLETTER, + ON_ATTEMPTS_EXCEEDED_DISCARD, +) + +from taskbroker_client.metrics import NoOpMetricsBackend +from taskbroker_client.registry import TaskNamespace +from taskbroker_client.retry import LastAction, Retry, RetryTaskError +from taskbroker_client.router import DefaultRouter +from taskbroker_client.task import Task + +from .conftest import producer_factory + + +def do_things() -> None: + raise NotImplementedError + + +@pytest.fixture +def task_namespace() -> TaskNamespace: + return TaskNamespace( + name="tests", + producer_factory=producer_factory, + router=DefaultRouter(), + metrics=NoOpMetricsBackend(), + retry=None, + ) + + +def test_define_task_defaults(task_namespace: TaskNamespace) -> None: + task = Task(name="test.do_things", func=do_things, namespace=task_namespace) + assert task.retry is None + assert task.name == "test.do_things" + assert task.namespace == task_namespace + + +def test_define_task_retry(task_namespace: TaskNamespace) -> None: + retry = Retry(times=3, times_exceeded=LastAction.Deadletter) + task = Task(name="test.do_things", func=do_things, namespace=task_namespace, retry=retry) + assert task.retry == retry + + +def test_define_task_at_most_once_with_retry(task_namespace: TaskNamespace) -> None: + with pytest.raises(AssertionError) as err: + Task( + name="test.do_things", + func=do_things, + namespace=task_namespace, + at_most_once=True, + retry=Retry(times=3), + ) + assert "You cannot enable at_most_once and have retries" in str(err) + + +def test_apply_async_expires(task_namespace: TaskNamespace) -> None: + def test_func(*args: Any, **kwargs: Any) -> None: + pass + + task = Task( + name="test.test_func", + func=test_func, + namespace=task_namespace, + ) + with patch.object(task_namespace, "send_task") as mock_send: + task.apply_async(args=["arg2"], kwargs={"org_id": 2}, expires=10, producer=None) + assert mock_send.call_count == 1 + call_params = mock_send.call_args + + activation = call_params.args[0] + assert activation.expires == 10 + assert activation.parameters == orjson.dumps( + {"args": ["arg2"], "kwargs": {"org_id": 2}} + ).decode("utf-8") + + +def test_apply_async_countdown(task_namespace: TaskNamespace) -> None: + def test_func(*args: Any, **kwargs: Any) -> None: + pass + + task = Task( + name="test.test_func", + func=test_func, + namespace=task_namespace, + ) + with patch.object(task_namespace, "send_task") as mock_send: + task.apply_async(args=["arg2"], kwargs={"org_id": 2}, countdown=600, producer=None) + assert mock_send.call_count == 1 + call_params = mock_send.call_args + + activation = call_params.args[0] + assert activation.delay == 600 + assert activation.parameters == orjson.dumps( + {"args": ["arg2"], "kwargs": {"org_id": 2}} + ).decode("utf-8") + + +def test_delay_immediate_mode(task_namespace: TaskNamespace) -> None: + calls = [] + + def test_func(*args: Any, **kwargs: Any) -> None: + calls.append({"args": args, "kwargs": kwargs}) + + task = Task( + name="test.test_func", + func=test_func, + namespace=task_namespace, + ) + # Patch the constant that controls eager execution + with patch("taskbroker_client.task.ALWAYS_EAGER", True): + task.delay("arg", org_id=1) + task.apply_async(args=["arg2"], kwargs={"org_id": 2}) + task.apply_async() + + assert len(calls) == 3 + assert calls[0] == {"args": ("arg",), "kwargs": {"org_id": 1}} + assert calls[1] == {"args": ("arg2",), "kwargs": {"org_id": 2}} + assert calls[2] == {"args": tuple(), "kwargs": {}} + + +def test_delay_immediate_validate_activation(task_namespace: TaskNamespace) -> None: + calls = [] + + def test_func(mixed: Any) -> None: + calls.append({"mixed": mixed}) + + task = Task( + name="test.test_func", + func=test_func, + namespace=task_namespace, + ) + + with patch("taskbroker_client.task.ALWAYS_EAGER", True): + task.delay(mixed=None) + task.delay(mixed="str") + + with pytest.raises(TypeError) as err: + task.delay(mixed=datetime.timedelta(days=1)) + assert "not JSON serializable" in str(err) + + assert len(calls) == 2 + assert calls[0] == {"mixed": None} + assert calls[1] == {"mixed": "str"} + + +def test_should_retry(task_namespace: TaskNamespace) -> None: + retry = Retry(times=3, times_exceeded=LastAction.Deadletter) + state = retry.initial_state() + + task = Task( + name="test.do_things", + func=do_things, + namespace=task_namespace, + retry=retry, + ) + err = RetryTaskError("try again plz") + assert task.should_retry(state, err) + + state.attempts = 3 + assert not task.should_retry(state, err) + + no_retry = Task( + name="test.no_retry", + func=do_things, + namespace=task_namespace, + retry=None, + ) + assert not no_retry.should_retry(state, err) + + +def test_create_activation(task_namespace: TaskNamespace) -> None: + no_retry_task = Task( + name="test.no_retry", + func=do_things, + namespace=task_namespace, + retry=None, + ) + + retry = Retry(times=3, times_exceeded=LastAction.Deadletter) + retry_task = Task( + name="test.with_retry", + func=do_things, + namespace=task_namespace, + retry=retry, + ) + + timedelta_expiry_task = Task( + name="test.with_timedelta_expires", + func=do_things, + namespace=task_namespace, + expires=datetime.timedelta(minutes=5), + processing_deadline_duration=datetime.timedelta(seconds=30), + ) + int_expiry_task = Task( + name="test.with_int_expires", + func=do_things, + namespace=task_namespace, + expires=5 * 60, + processing_deadline_duration=30, + ) + + at_most_once_task = Task( + name="test.at_most_once", + func=do_things, + namespace=task_namespace, + at_most_once=True, + ) + # No retries will be made as there is no retry policy on the task or namespace. + activation = no_retry_task.create_activation([], {}) + assert activation.taskname == "test.no_retry" + assert activation.namespace == task_namespace.name + assert activation.retry_state + assert activation.retry_state.attempts == 0 + assert activation.retry_state.max_attempts == 1 + assert activation.retry_state.on_attempts_exceeded == ON_ATTEMPTS_EXCEEDED_DISCARD + + activation = retry_task.create_activation([], {}) + assert activation.taskname == "test.with_retry" + assert activation.namespace == task_namespace.name + assert activation.retry_state + assert activation.retry_state.attempts == 0 + assert activation.retry_state.max_attempts == 3 + assert activation.retry_state.on_attempts_exceeded == ON_ATTEMPTS_EXCEEDED_DEADLETTER + + activation = timedelta_expiry_task.create_activation([], {}) + assert activation.taskname == "test.with_timedelta_expires" + assert activation.expires == 300 + assert activation.processing_deadline_duration == 30 + + activation = int_expiry_task.create_activation([], {}) + assert activation.taskname == "test.with_int_expires" + assert activation.expires == 300 + assert activation.processing_deadline_duration == 30 + + activation = int_expiry_task.create_activation([], {}, expires=600) + assert activation.taskname == "test.with_int_expires" + assert activation.expires == 600 + assert activation.processing_deadline_duration == 30 + + activation = at_most_once_task.create_activation([], {}) + assert activation.taskname == "test.at_most_once" + assert activation.namespace == task_namespace.name + assert activation.retry_state + assert activation.retry_state.at_most_once is True + assert activation.retry_state.attempts == 0 + assert activation.retry_state.max_attempts == 1 + assert activation.retry_state.on_attempts_exceeded == ON_ATTEMPTS_EXCEEDED_DISCARD + + +def test_create_activation_parameters(task_namespace: TaskNamespace) -> None: + @task_namespace.register(name="test.parameters") + def with_parameters(one: str, two: int, org_id: int) -> None: + raise NotImplementedError + + activation = with_parameters.create_activation(["one", 22], {"org_id": 99}) + params = orjson.loads(activation.parameters) + assert params["args"] + assert params["args"] == ["one", 22] + assert params["kwargs"] == {"org_id": 99} + + +def test_create_activation_tracing(task_namespace: TaskNamespace) -> None: + @task_namespace.register(name="test.parameters") + def with_parameters(one: str, two: int, org_id: int) -> None: + raise NotImplementedError + + with sentry_sdk.start_transaction(op="test.task"): + activation = with_parameters.create_activation(["one", 22], {"org_id": 99}) + + headers = activation.headers + assert headers["sentry-trace"] + assert "baggage" in headers + + +def test_create_activation_tracing_headers(task_namespace: TaskNamespace) -> None: + @task_namespace.register(name="test.parameters") + def with_parameters(one: str, two: int, org_id: int) -> None: + raise NotImplementedError + + with sentry_sdk.start_transaction(op="test.task"): + activation = with_parameters.create_activation( + ["one", 22], {"org_id": 99}, {"key": "value"} + ) + + headers = activation.headers + assert headers["sentry-trace"] + assert "baggage" in headers + assert headers["key"] == "value" + + +def test_create_activation_tracing_disable(task_namespace: TaskNamespace) -> None: + @task_namespace.register(name="test.parameters") + def with_parameters(one: str, two: int, org_id: int) -> None: + raise NotImplementedError + + with sentry_sdk.start_transaction(op="test.task"): + activation = with_parameters.create_activation( + ["one", 22], {"org_id": 99}, {"sentry-propagate-traces": False} + ) + + headers = activation.headers + assert "sentry-trace" not in headers + assert "baggage" not in headers + + +def test_create_activation_headers_scalars(task_namespace: TaskNamespace) -> None: + @task_namespace.register(name="test.parameters") + def with_parameters(one: str, two: int, org_id: int) -> None: + raise NotImplementedError + + headers = { + "str": "value", + "int": 22, + "float": 3.14, + "bool": False, + "none": None, + } + activation = with_parameters.create_activation(["one", 22], {"org_id": 99}, headers) + assert activation.headers["str"] == "value" + assert activation.headers["int"] == "22" + assert activation.headers["float"] == "3.14" + assert activation.headers["bool"] == "False" + assert activation.headers["none"] == "None" + + +def test_create_activation_headers_nested(task_namespace: TaskNamespace) -> None: + @task_namespace.register(name="test.parameters") + def with_parameters(one: str, two: int, org_id: int) -> None: + raise NotImplementedError + + headers = { + "key": "value", + "nested": { + "name": "sentry", + }, + } + with pytest.raises(ValueError) as err: + with_parameters.create_activation(["one", 22], {"org_id": 99}, headers) + assert "Only scalar header values are supported" in str(err) + assert "The `nested` header value is of type " in str(err) + + +def test_create_activation_headers_monitor_config_treatment(task_namespace: TaskNamespace) -> None: + @task_namespace.register(name="test.parameters") + def with_parameters(one: str, two: int, org_id: int) -> None: + raise NotImplementedError + + headers = { + "key": "value", + "sentry-monitor-config": { + "schedule": {"type": "crontab", "value": "*/15 * * * *"}, + "timezone": "UTC", + }, + "sentry-monitor-slug": "delete-stuff", + "sentry-monitor-check-in-id": "abc123", + } + activation = with_parameters.create_activation(["one", 22], {"org_id": 99}, headers) + + result = activation.headers + assert result + assert result["key"] == "value" + assert "sentry-monitor-config" not in result + assert "sentry-monitor-slug" in result + assert "sentry-monitor-check-in-id" in result diff --git a/clients/python/tests/worker/__init__.py b/clients/python/tests/worker/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/clients/python/tests/worker/test_client.py b/clients/python/tests/worker/test_client.py new file mode 100644 index 00000000..61d83a83 --- /dev/null +++ b/clients/python/tests/worker/test_client.py @@ -0,0 +1,917 @@ +import dataclasses +import random +import string +import time +from collections import defaultdict +from collections.abc import Callable +from pathlib import Path +from typing import Any +from unittest.mock import Mock, patch + +import grpc +import pytest +from google.protobuf.message import Message +from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( + TASK_ACTIVATION_STATUS_COMPLETE, + TASK_ACTIVATION_STATUS_RETRY, + FetchNextTask, + GetTaskResponse, + SetTaskStatusResponse, + TaskActivation, +) + +from taskbroker_client.constants import DEFAULT_WORKER_HEALTH_CHECK_SEC_PER_TOUCH +from taskbroker_client.metrics import NoOpMetricsBackend +from taskbroker_client.types import ProcessingResult +from taskbroker_client.worker.client import ( + HealthCheckSettings, + HostTemporarilyUnavailable, + TaskbrokerClient, + make_broker_hosts, +) + + +@dataclasses.dataclass +class MockServiceCall: + response: Any + metadata: tuple[tuple[str, str | bytes], ...] | None = None + + +class MockServiceMethod: + """Stub for grpc service methods""" + + def __init__( + self, + path: str, + responses: list[Any], + request_serializer: Callable[..., Any], + response_deserializer: Callable[..., Any], + ): + self.path = path + self.request_serializer = request_serializer + self.response_deserializer = response_deserializer + self.responses = responses + + def __call__(self, *args: Any, **kwargs: Any) -> Any: + """Capture calls and use registered mocks""" + # move the head to the tail + res = self.responses[0] + tail = self.responses[1:] + self.responses = tail + [res] + + if isinstance(res.response, Exception): + raise res.response + return res.response + + def with_call(self, *args: Any, **kwargs: Any) -> Any: + res = self.responses[0] + if res.metadata: + assert res.metadata == kwargs.get("metadata"), "Metadata mismatch" + if isinstance(res.response, Exception): + raise res.response + return (res.response, None) + + +class MockChannel: + def __init__(self) -> None: + self._responses: dict[str, list[Any]] = defaultdict(list) + + def unary_unary( + self, + path: str, + request_serializer: Callable[..., Any], + response_deserializer: Callable[..., Any], + *args: Any, + **kwargs: Any, + ) -> MockServiceMethod: + return MockServiceMethod( + path, self._responses.get(path, []), request_serializer, response_deserializer + ) + + def add_response( + self, + path: str, + resp: Message | Exception, + metadata: tuple[tuple[str, str | bytes], ...] | None = None, + ) -> None: + self._responses[path].append(MockServiceCall(response=resp, metadata=metadata)) + + +class MockGrpcError(grpc.RpcError): + """Grpc error are elusive and this mock simulates the interface in mypy stubs""" + + def __init__(self, code: grpc.StatusCode, message: str) -> None: + self._code = code + self._message = message + + def code(self) -> grpc.StatusCode: + return self._code + + def details(self) -> str: + return self._message + + def result(self) -> None: + raise self + + +def test_make_broker_hosts() -> None: + hosts = make_broker_hosts(host_prefix="broker:50051", num_brokers=3) + assert len(hosts) == 3 + assert hosts == ["broker-0:50051", "broker-1:50051", "broker-2:50051"] + + hosts = make_broker_hosts( + host_prefix="", + num_brokers=None, + host_list="broker:50051, broker-a:50051 , , broker-b:50051", + ) + assert len(hosts) == 3 + assert hosts == ["broker:50051", "broker-a:50051", "broker-b:50051"] + + +def test_init_no_hosts() -> None: + with pytest.raises(AssertionError) as err: + TaskbrokerClient(hosts=[], metrics=NoOpMetricsBackend()) + assert "You must provide at least one RPC host" in str(err) + + +def test_health_check_is_debounced() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="abc123", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", + SetTaskStatusResponse( + task=TaskActivation( + id="abc123", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + health_check_path = Path(f"/tmp/{''.join(random.choices(string.ascii_letters, k=16))}") + client = TaskbrokerClient( + hosts=["localhost-0:50051"], + metrics=NoOpMetricsBackend(), + health_check_settings=HealthCheckSettings(health_check_path, 1), + ) + client._health_check_settings.file_path = Mock() # type: ignore[union-attr] + + _ = client.get_task() + _ = client.get_task() + assert client._health_check_settings.file_path.touch.call_count == 1 # type: ignore[union-attr] + + with patch("taskbroker_client.worker.client.time") as mock_time: + mock_time.time.return_value = time.time() + 1 + _ = client.get_task() + assert client._health_check_settings.file_path.touch.call_count == 2 # type: ignore[union-attr] + + +def test_get_task_ok() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="abc123", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + client = TaskbrokerClient(["localhost-0:50051"], metrics=NoOpMetricsBackend()) + result = client.get_task() + + assert result + assert result.host == "localhost-0:50051" + assert result.activation.id + assert result.activation.namespace == "testing" + + +def test_get_task_writes_to_health_check_file() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="abc123", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + health_check_path = Path(f"/tmp/{''.join(random.choices(string.ascii_letters, k=16))}") + client = TaskbrokerClient( + ["localhost-0:50051"], + metrics=NoOpMetricsBackend(), + health_check_settings=HealthCheckSettings(health_check_path, 3), + ) + _ = client.get_task() + assert health_check_path.exists() + + +def test_get_task_with_interceptor() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="abc123", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + metadata=( + ( + "sentry-signature", + "3202702605c1b65055c28e7c78a5835e760830cff3e9f995eb7ad5f837130b1f", + ), + ), + ) + secret = '["a long secret value","notused"]' + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + client = TaskbrokerClient( + ["localhost-0:50051"], metrics=NoOpMetricsBackend(), rpc_secret=secret + ) + result = client.get_task() + + assert result + assert result.host == "localhost-0:50051" + assert result.activation.id + assert result.activation.namespace == "testing" + + +def test_get_task_with_namespace() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="abc123", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + client = TaskbrokerClient( + hosts=make_broker_hosts("localhost:50051", num_brokers=1), metrics=NoOpMetricsBackend() + ) + result = client.get_task(namespace="testing") + + assert result + assert result.host == "localhost-0:50051" + assert result.activation.id + assert result.activation.namespace == "testing" + + +def test_get_task_not_found() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.NOT_FOUND, "no pending task found"), + ) + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + client = TaskbrokerClient(["localhost:50051"], metrics=NoOpMetricsBackend()) + result = client.get_task() + + assert result is None + + +def test_get_task_failure() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.INTERNAL, "something bad"), + ) + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + client = TaskbrokerClient(["localhost:50051"], metrics=NoOpMetricsBackend()) + with pytest.raises(grpc.RpcError): + client.get_task() + + +def test_update_task_writes_to_health_check_file() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", + SetTaskStatusResponse( + task=TaskActivation( + id="abc123", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + health_check_path = Path(f"/tmp/{''.join(random.choices(string.ascii_letters, k=16))}") + client = TaskbrokerClient( + make_broker_hosts("localhost:50051", num_brokers=1), + metrics=NoOpMetricsBackend(), + health_check_settings=HealthCheckSettings( + health_check_path, DEFAULT_WORKER_HEALTH_CHECK_SEC_PER_TOUCH + ), + ) + _ = client.update_task( + ProcessingResult("abc123", TASK_ACTIVATION_STATUS_RETRY, "localhost-0:50051", 0), + FetchNextTask(namespace=None), + ) + assert health_check_path.exists() + + +def test_update_task_ok_with_next() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", + SetTaskStatusResponse( + task=TaskActivation( + id="abc123", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + client = TaskbrokerClient( + make_broker_hosts("localhost:50051", num_brokers=1), metrics=NoOpMetricsBackend() + ) + assert set(client._host_to_stubs.keys()) == {"localhost-0:50051"} + result = client.update_task( + ProcessingResult("abc123", TASK_ACTIVATION_STATUS_RETRY, "localhost-0:50051", 0), + FetchNextTask(namespace=None), + ) + + assert result + assert result.host == "localhost-0:50051" + assert result.activation.id == "abc123" + + +def test_update_task_ok_with_next_namespace() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", + SetTaskStatusResponse( + task=TaskActivation( + id="abc123", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + client = TaskbrokerClient( + make_broker_hosts("localhost:50051", num_brokers=1), metrics=NoOpMetricsBackend() + ) + result = client.update_task( + ProcessingResult( + task_id="id", + status=TASK_ACTIVATION_STATUS_RETRY, + host="localhost-0:50051", + receive_timestamp=0, + ), + FetchNextTask(namespace="testing"), + ) + assert result + assert result.activation.id == "abc123" + assert result.activation.namespace == "testing" + + +def test_update_task_ok_no_next() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", SetTaskStatusResponse() + ) + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + client = TaskbrokerClient( + make_broker_hosts("localhost:50051", num_brokers=1), metrics=NoOpMetricsBackend() + ) + result = client.update_task( + ProcessingResult( + task_id="abc123", + status=TASK_ACTIVATION_STATUS_RETRY, + host="localhost-0:50051", + receive_timestamp=0, + ), + FetchNextTask(namespace=None), + ) + assert result is None + + +def test_update_task_not_found() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", + MockGrpcError(grpc.StatusCode.NOT_FOUND, "no pending tasks found"), + ) + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + client = TaskbrokerClient(["localhost-0:50051"], metrics=NoOpMetricsBackend()) + result = client.update_task( + ProcessingResult( + task_id="abc123", + status=TASK_ACTIVATION_STATUS_RETRY, + host="localhost-0:50051", + receive_timestamp=0, + ), + FetchNextTask(namespace=None), + ) + assert result is None + + +def test_update_task_unavailable_retain_task_to_host() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", + MockGrpcError(grpc.StatusCode.UNAVAILABLE, "broker down"), + ) + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + client = TaskbrokerClient(["localhost-0:50051"], metrics=NoOpMetricsBackend()) + with pytest.raises(MockGrpcError) as err: + client.update_task( + ProcessingResult( + task_id="abc123", + status=TASK_ACTIVATION_STATUS_RETRY, + host="localhost-0:50051", + receive_timestamp=0, + ), + FetchNextTask(namespace=None), + ) + assert "broker down" in str(err.value) + + +def test_client_loadbalance() -> None: + channel_0 = MockChannel() + channel_0.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="0", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + channel_0.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", + SetTaskStatusResponse(task=None), + ) + channel_1 = MockChannel() + channel_1.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="1", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + channel_1.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", + SetTaskStatusResponse(task=None), + ) + channel_2 = MockChannel() + channel_2.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="2", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + channel_2.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", + SetTaskStatusResponse(task=None), + ) + channel_3 = MockChannel() + channel_3.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="3", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + channel_3.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", + SetTaskStatusResponse(task=None), + ) + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: + mock_channel.side_effect = [channel_0, channel_1, channel_2, channel_3] + with patch("taskbroker_client.worker.client.random.choice") as mock_randchoice: + mock_randchoice.side_effect = [ + "localhost-0:50051", + "localhost-1:50051", + "localhost-2:50051", + "localhost-3:50051", + ] + client = TaskbrokerClient( + hosts=make_broker_hosts(host_prefix="localhost:50051", num_brokers=4), + metrics=NoOpMetricsBackend(), + max_tasks_before_rebalance=1, + ) + + task_0 = client.get_task() + assert task_0 is not None and task_0.activation.id == "0" + task_1 = client.get_task() + assert task_1 is not None and task_1.activation.id == "1" + task_2 = client.get_task() + assert task_2 is not None and task_2.activation.id == "2" + task_3 = client.get_task() + assert task_3 is not None and task_3.activation.id == "3" + + client.update_task( + ProcessingResult( + task_0.activation.id, TASK_ACTIVATION_STATUS_COMPLETE, task_0.host, 0 + ), + None, + ) + client.update_task( + ProcessingResult( + task_1.activation.id, TASK_ACTIVATION_STATUS_COMPLETE, task_1.host, 0 + ), + None, + ) + client.update_task( + ProcessingResult( + task_2.activation.id, TASK_ACTIVATION_STATUS_COMPLETE, task_2.host, 0 + ), + None, + ) + client.update_task( + ProcessingResult( + task_3.activation.id, TASK_ACTIVATION_STATUS_COMPLETE, task_3.host, 0 + ), + None, + ) + + +def test_client_loadbalance_on_notfound() -> None: + channel_0 = MockChannel() + channel_0.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.NOT_FOUND, "no pending task found"), + ) + + channel_1 = MockChannel() + channel_1.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="1", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + channel_1.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/SetTaskStatus", + MockGrpcError(grpc.StatusCode.NOT_FOUND, "no pending task found"), + ) + + channel_2 = MockChannel() + channel_2.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="2", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: + mock_channel.side_effect = [channel_0, channel_1, channel_2] + with patch("taskbroker_client.worker.client.random.choice") as mock_randchoice: + mock_randchoice.side_effect = [ + "localhost-0:50051", + "localhost-1:50051", + "localhost-2:50051", + ] + client = TaskbrokerClient( + hosts=make_broker_hosts(host_prefix="localhost:50051", num_brokers=3), + metrics=NoOpMetricsBackend(), + max_tasks_before_rebalance=30, + ) + + # Fetch from the first channel, it should return notfound + task_0 = client.get_task() + assert task_0 is None + + # Fetch again, this time from channel_1 + task_1 = client.get_task() + assert task_1 and task_1.activation.id == "1" + + res = client.update_task( + ProcessingResult( + task_1.activation.id, TASK_ACTIVATION_STATUS_COMPLETE, task_1.host, 0 + ), + None, + ) + assert res is None + + # Because SetStatus on channel_1 returned notfound the client + # should switch brokers. + task_2 = client.get_task() + assert task_2 and task_2.activation.id == "2" + + +def test_client_loadbalance_on_unavailable() -> None: + channel_0 = MockChannel() + channel_0.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.UNAVAILABLE, "host is unavailable"), + ) + channel_0.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.UNAVAILABLE, "host is unavailable"), + ) + channel_0.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.UNAVAILABLE, "host is unavailable"), + ) + + channel_1 = MockChannel() + channel_1.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="1", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: + mock_channel.side_effect = [channel_0, channel_1] + with patch("taskbroker_client.worker.client.random.choice") as mock_randchoice: + mock_randchoice.side_effect = [ + "localhost-0:50051", + "localhost-1:50051", + ] + client = TaskbrokerClient( + hosts=make_broker_hosts(host_prefix="localhost:50051", num_brokers=2), + metrics=NoOpMetricsBackend(), + max_consecutive_unavailable_errors=3, + ) + + # Fetch from the first channel, host should be unavailable + with pytest.raises(grpc.RpcError, match="host is unavailable"): + client.get_task() + assert client._num_consecutive_unavailable_errors == 1 + + # Fetch from the first channel, host should be unavailable + with pytest.raises(grpc.RpcError, match="host is unavailable"): + client.get_task() + assert client._num_consecutive_unavailable_errors == 2 + + # Fetch from the first channel, host should be unavailable + with pytest.raises(grpc.RpcError, match="host is unavailable"): + client.get_task() + assert client._num_consecutive_unavailable_errors == 3 + + # Should rebalance to the second host and receive task + task = client.get_task() + assert task and task.activation.id == "1" + assert client._num_consecutive_unavailable_errors == 0 + + +def test_client_single_host_unavailable() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.UNAVAILABLE, "host is unavailable"), + ) + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.UNAVAILABLE, "host is unavailable"), + ) + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.UNAVAILABLE, "host is unavailable"), + ) + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="1", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + + with (patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel,): + mock_channel.return_value = channel + client = TaskbrokerClient( + hosts=["localhost-0:50051"], + metrics=NoOpMetricsBackend(), + max_consecutive_unavailable_errors=3, + temporary_unavailable_host_timeout=2, + ) + + for _ in range(3): + with pytest.raises(grpc.RpcError, match="host is unavailable"): + client.get_task() + assert client._num_consecutive_unavailable_errors == 3 + + # Verify host was marked as temporarily unavailable + assert "localhost-0:50051" in client._temporary_unavailable_hosts + assert isinstance(client._temporary_unavailable_hosts["localhost-0:50051"], float) + + client.get_task() + assert client._cur_host == "localhost-0:50051" + + +def test_client_reset_errors_after_success() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.UNAVAILABLE, "host is unavailable"), + ) + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="1", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.UNAVAILABLE, "host is unavailable"), + ) + + with patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel: + mock_channel.return_value = channel + client = TaskbrokerClient( + ["localhost:50051"], metrics=NoOpMetricsBackend(), max_consecutive_unavailable_errors=3 + ) + + with pytest.raises(grpc.RpcError, match="host is unavailable"): + client.get_task() + assert client._num_consecutive_unavailable_errors == 1 + + task = client.get_task() + assert task and task.activation.id == "1" + assert client._num_consecutive_unavailable_errors == 0 + + with pytest.raises(grpc.RpcError, match="host is unavailable"): + client.get_task() + assert client._num_consecutive_unavailable_errors == 1 + + +def test_client_update_task_host_unavailable() -> None: + channel = MockChannel() + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + GetTaskResponse( + task=TaskActivation( + id="1", + namespace="testing", + taskname="do_thing", + parameters="", + headers={}, + processing_deadline_duration=10, + ) + ), + ) + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.UNAVAILABLE, "host is unavailable"), + ) + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.UNAVAILABLE, "host is unavailable"), + ) + channel.add_response( + "/sentry_protos.taskbroker.v1.ConsumerService/GetTask", + MockGrpcError(grpc.StatusCode.UNAVAILABLE, "host is unavailable"), + ) + + current_time = 1000.0 + + def mock_time() -> float: + return current_time + + with ( + patch("taskbroker_client.worker.client.grpc.insecure_channel") as mock_channel, + patch("taskbroker_client.worker.client.time.time", side_effect=mock_time), + ): + mock_channel.return_value = channel + client = TaskbrokerClient( + ["localhost:50051"], + metrics=NoOpMetricsBackend(), + max_consecutive_unavailable_errors=3, + temporary_unavailable_host_timeout=10, + ) + + # Get a task to establish the host mapping + task = client.get_task() + assert task and task.activation.id == "1" + host = task.host + + # Make the host temporarily unavailable + for _ in range(3): + with pytest.raises(grpc.RpcError, match="host is unavailable"): + client.get_task() + assert client._num_consecutive_unavailable_errors == 3 + assert host in client._temporary_unavailable_hosts + + # Try to update the task + with pytest.raises( + HostTemporarilyUnavailable, match=f"Host: {host} is temporarily unavailable" + ): + client.update_task( + ProcessingResult( + task_id="1", + status=TASK_ACTIVATION_STATUS_COMPLETE, + host=host, + receive_timestamp=0, + ), + fetch_next_task=None, + ) diff --git a/clients/python/tests/worker/test_worker.py b/clients/python/tests/worker/test_worker.py new file mode 100644 index 00000000..8bfb35b2 --- /dev/null +++ b/clients/python/tests/worker/test_worker.py @@ -0,0 +1,665 @@ +import base64 +import queue +import time +from multiprocessing import Event +from typing import Any +from unittest import TestCase, mock + +import grpc +import orjson +import zstandard as zstd +from redis import StrictRedis + +# from sentry.utils.redis import redis_clusters +from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( + ON_ATTEMPTS_EXCEEDED_DISCARD, + TASK_ACTIVATION_STATUS_COMPLETE, + TASK_ACTIVATION_STATUS_FAILURE, + TASK_ACTIVATION_STATUS_RETRY, + RetryState, + TaskActivation, +) +from sentry_sdk.crons import MonitorStatus + +from taskbroker_client.constants import CompressionType +from taskbroker_client.retry import NoRetriesRemainingError +from taskbroker_client.state import current_task +from taskbroker_client.types import InflightTaskActivation, ProcessingResult +from taskbroker_client.worker.worker import TaskWorker +from taskbroker_client.worker.workerchild import ProcessingDeadlineExceeded, child_process + +SIMPLE_TASK = InflightTaskActivation( + host="localhost:50051", + receive_timestamp=0, + activation=TaskActivation( + id="111", + taskname="examples.simple_task", + namespace="examples", + parameters='{"args": [], "kwargs": {}}', + processing_deadline_duration=2, + ), +) + +RETRY_TASK = InflightTaskActivation( + host="localhost:50051", + receive_timestamp=0, + activation=TaskActivation( + id="222", + taskname="examples.retry_task", + namespace="examples", + parameters='{"args": [], "kwargs": {}}', + processing_deadline_duration=2, + ), +) + +FAIL_TASK = InflightTaskActivation( + host="localhost:50051", + receive_timestamp=0, + activation=TaskActivation( + id="333", + taskname="examples.fail_task", + namespace="examples", + parameters='{"args": [], "kwargs": {}}', + processing_deadline_duration=2, + ), +) + +UNDEFINED_TASK = InflightTaskActivation( + host="localhost:50051", + receive_timestamp=0, + activation=TaskActivation( + id="444", + taskname="total.rubbish", + namespace="lolnope", + parameters='{"args": [], "kwargs": {}}', + processing_deadline_duration=2, + ), +) + +AT_MOST_ONCE_TASK = InflightTaskActivation( + host="localhost:50051", + receive_timestamp=0, + activation=TaskActivation( + id="555", + taskname="examples.at_most_once", + namespace="examples", + parameters='{"args": [], "kwargs": {}}', + processing_deadline_duration=2, + ), +) + +RETRY_STATE_TASK = InflightTaskActivation( + host="localhost:50051", + receive_timestamp=0, + activation=TaskActivation( + id="654", + taskname="examples.retry_state", + namespace="examples", + parameters='{"args": [], "kwargs": {}}', + processing_deadline_duration=2, + retry_state=RetryState( + # no more attempts left + attempts=1, + max_attempts=2, + on_attempts_exceeded=ON_ATTEMPTS_EXCEEDED_DISCARD, + ), + ), +) + +SCHEDULED_TASK = InflightTaskActivation( + host="localhost:50051", + receive_timestamp=0, + activation=TaskActivation( + id="111", + taskname="examples.simple_task", + namespace="examples", + parameters='{"args": [], "kwargs": {}}', + processing_deadline_duration=2, + headers={ + "sentry-monitor-slug": "simple-task", + "sentry-monitor-check-in-id": "abc123", + }, + ), +) + +COMPRESSED_TASK = InflightTaskActivation( + host="localhost:50051", + receive_timestamp=0, + activation=TaskActivation( + id="compressed_task_123", + taskname="examples.simple_task", + namespace="examples", + parameters=base64.b64encode( + zstd.compress( + orjson.dumps( + { + "args": ["test_arg1", "test_arg2"], + "kwargs": {"test_key": "test_value", "number": 42}, + } + ) + ) + ).decode("utf8"), + headers={ + "compression-type": CompressionType.ZSTD.value, + }, + processing_deadline_duration=2, + ), +) + + +class TestTaskWorker(TestCase): + def test_fetch_task(self) -> None: + taskworker = TaskWorker( + app_module="examples.app:app", + broker_hosts=["127.0.0.1:50051"], + max_child_task_count=100, + process_type="fork", + ) + with mock.patch.object(taskworker.client, "get_task") as mock_get: + mock_get.return_value = SIMPLE_TASK + + task = taskworker.fetch_task() + mock_get.assert_called_once() + + assert task + assert task.activation.id == SIMPLE_TASK.activation.id + + def test_fetch_no_task(self) -> None: + taskworker = TaskWorker( + app_module="examples.app:app", + broker_hosts=["127.0.0.1:50051"], + max_child_task_count=100, + process_type="fork", + ) + with mock.patch.object(taskworker.client, "get_task") as mock_get: + mock_get.return_value = None + task = taskworker.fetch_task() + + mock_get.assert_called_once() + assert task is None + + def test_run_once_no_next_task(self) -> None: + max_runtime = 5 + taskworker = TaskWorker( + app_module="examples.app:app", + broker_hosts=["127.0.0.1:50051"], + max_child_task_count=1, + process_type="fork", + ) + with mock.patch.object(taskworker, "client") as mock_client: + mock_client.get_task.return_value = SIMPLE_TASK + # No next_task returned + mock_client.update_task.return_value = None + + taskworker.start_result_thread() + taskworker.start_spawn_children_thread() + start = time.time() + while True: + taskworker.run_once() + if mock_client.update_task.called: + break + if time.time() - start > max_runtime: + taskworker.shutdown() + raise AssertionError("Timeout waiting for update_task to be called") + + taskworker.shutdown() + assert mock_client.get_task.called + assert mock_client.update_task.call_count == 1 + assert mock_client.update_task.call_args.args[0].host == "localhost:50051" + assert mock_client.update_task.call_args.args[0].task_id == SIMPLE_TASK.activation.id + assert ( + mock_client.update_task.call_args.args[0].status == TASK_ACTIVATION_STATUS_COMPLETE + ) + assert mock_client.update_task.call_args.args[1] is None + + def test_run_once_with_next_task(self) -> None: + # Cover the scenario where update_task returns the next task which should + # be processed. + max_runtime = 5 + taskworker = TaskWorker( + app_module="examples.app:app", + broker_hosts=["127.0.0.1:50051"], + max_child_task_count=1, + process_type="fork", + ) + with mock.patch.object(taskworker, "client") as mock_client: + + def update_task_response(*args: Any, **kwargs: Any) -> InflightTaskActivation | None: + if mock_client.update_task.call_count >= 1: + return None + return SIMPLE_TASK + + mock_client.update_task.side_effect = update_task_response + mock_client.get_task.return_value = SIMPLE_TASK + taskworker.start_result_thread() + taskworker.start_spawn_children_thread() + + # Run until two tasks have been processed + start = time.time() + while True: + taskworker.run_once() + if mock_client.update_task.call_count >= 2: + break + if time.time() - start > max_runtime: + taskworker.shutdown() + raise AssertionError("Timeout waiting for get_task to be called") + + taskworker.shutdown() + assert mock_client.get_task.called + assert mock_client.update_task.call_count == 2 + assert mock_client.update_task.call_args.args[0].host == "localhost:50051" + assert mock_client.update_task.call_args.args[0].task_id == SIMPLE_TASK.activation.id + assert ( + mock_client.update_task.call_args.args[0].status == TASK_ACTIVATION_STATUS_COMPLETE + ) + assert mock_client.update_task.call_args.args[1] is None + + def test_run_once_with_update_failure(self) -> None: + # Cover the scenario where update_task fails a few times in a row + # We should retain the result until RPC succeeds. + max_runtime = 5 + taskworker = TaskWorker( + app_module="examples.app:app", + broker_hosts=["127.0.0.1:50051"], + max_child_task_count=1, + process_type="fork", + ) + with mock.patch.object(taskworker, "client") as mock_client: + + def update_task_response(*args: Any, **kwargs: Any) -> None: + if mock_client.update_task.call_count <= 2: + # Use setattr() because internally grpc uses _InactiveRpcError + # but it isn't exported. + err = grpc.RpcError("update task failed") + setattr(err, "code", lambda: grpc.StatusCode.UNAVAILABLE) + raise err + return None + + def get_task_response(*args: Any, **kwargs: Any) -> InflightTaskActivation | None: + # Only one task that fails to update + if mock_client.get_task.call_count == 1: + return SIMPLE_TASK + return None + + mock_client.update_task.side_effect = update_task_response + mock_client.get_task.side_effect = get_task_response + taskworker.start_result_thread() + taskworker.start_spawn_children_thread() + + # Run until the update has 'completed' + start = time.time() + while True: + taskworker.run_once() + if mock_client.update_task.call_count >= 3: + break + if time.time() - start > max_runtime: + taskworker.shutdown() + raise AssertionError("Timeout waiting for get_task to be called") + + taskworker.shutdown() + assert mock_client.get_task.called + assert mock_client.update_task.call_count == 3 + + def test_run_once_current_task_state(self) -> None: + # Run a task that uses retry_task() helper + # to raise and catch a NoRetriesRemainingError + max_runtime = 5 + taskworker = TaskWorker( + app_module="examples.app:app", + broker_hosts=["127.0.0.1:50051"], + max_child_task_count=1, + process_type="fork", + ) + with mock.patch.object(taskworker, "client") as mock_client: + + def update_task_response(*args: Any, **kwargs: Any) -> None: + return None + + mock_client.update_task.side_effect = update_task_response + mock_client.get_task.return_value = RETRY_STATE_TASK + taskworker.start_result_thread() + taskworker.start_spawn_children_thread() + + # Run until two tasks have been processed + start = time.time() + while True: + taskworker.run_once() + if mock_client.update_task.call_count >= 1: + break + if time.time() - start > max_runtime: + taskworker.shutdown() + raise AssertionError("Timeout waiting for update_task to be called") + + taskworker.shutdown() + assert mock_client.get_task.called + assert mock_client.update_task.call_count == 1 + # status is complete, as retry_state task handles the NoRetriesRemainingError + assert mock_client.update_task.call_args.args[0].host == "localhost:50051" + assert ( + mock_client.update_task.call_args.args[0].task_id == RETRY_STATE_TASK.activation.id + ) + assert ( + mock_client.update_task.call_args.args[0].status == TASK_ACTIVATION_STATUS_COMPLETE + ) + + # TODO read host from env vars + redis = StrictRedis(host="localhost", port=6379, decode_responses=True) + assert current_task() is None, "should clear current task on completion" + assert redis.get("no-retries-remaining"), "key should exist if except block was hit" + redis.delete("no-retries-remaining") + + +@mock.patch("taskbroker_client.worker.workerchild.capture_checkin") +def test_child_process_complete(mock_capture_checkin: mock.MagicMock) -> None: + todo: queue.Queue[InflightTaskActivation] = queue.Queue() + processed: queue.Queue[ProcessingResult] = queue.Queue() + shutdown = Event() + + todo.put(SIMPLE_TASK) + child_process( + "examples.app:app", + todo, + processed, + shutdown, + max_task_count=1, + processing_pool_name="test", + process_type="fork", + ) + + assert todo.empty() + result = processed.get() + assert result.task_id == SIMPLE_TASK.activation.id + assert result.status == TASK_ACTIVATION_STATUS_COMPLETE + assert mock_capture_checkin.call_count == 0 + + +def test_child_process_remove_start_time_kwargs() -> None: + activation = InflightTaskActivation( + host="localhost:50051", + receive_timestamp=0, + activation=TaskActivation( + id="6789", + taskname="examples.will_retry", + namespace="examples", + parameters='{"args": ["stuff"], "kwargs": {"__start_time": 123}}', + processing_deadline_duration=100000, + ), + ) + todo: queue.Queue[InflightTaskActivation] = queue.Queue() + processed: queue.Queue[ProcessingResult] = queue.Queue() + shutdown = Event() + + todo.put(activation) + child_process( + "examples.app:app", + todo, + processed, + shutdown, + max_task_count=1, + processing_pool_name="test", + process_type="fork", + ) + + assert todo.empty() + result = processed.get() + assert result.task_id == activation.activation.id + assert result.status == TASK_ACTIVATION_STATUS_COMPLETE + + +def test_child_process_retry_task() -> None: + todo: queue.Queue[InflightTaskActivation] = queue.Queue() + processed: queue.Queue[ProcessingResult] = queue.Queue() + shutdown = Event() + + todo.put(RETRY_TASK) + child_process( + "examples.app:app", + todo, + processed, + shutdown, + max_task_count=1, + processing_pool_name="test", + process_type="fork", + ) + + assert todo.empty() + result = processed.get() + assert result.task_id == RETRY_TASK.activation.id + assert result.status == TASK_ACTIVATION_STATUS_RETRY + + +@mock.patch("taskbroker_client.worker.workerchild.sentry_sdk.capture_exception") +def test_child_process_retry_task_max_attempts(mock_capture: mock.Mock) -> None: + # Create an activation that is on its final attempt and + # will raise an error again. + activation = InflightTaskActivation( + host="localhost:50051", + receive_timestamp=0, + activation=TaskActivation( + id="6789", + taskname="examples.will_retry", + namespace="examples", + parameters='{"args": ["raise"], "kwargs": {}}', + processing_deadline_duration=100000, + retry_state=RetryState( + attempts=2, + max_attempts=3, + ), + ), + ) + todo: queue.Queue[InflightTaskActivation] = queue.Queue() + processed: queue.Queue[ProcessingResult] = queue.Queue() + shutdown = Event() + + todo.put(activation) + child_process( + "examples.app:app", + todo, + processed, + shutdown, + max_task_count=1, + processing_pool_name="test", + process_type="fork", + ) + + assert todo.empty() + result = processed.get() + assert result.task_id == activation.activation.id + assert result.status == TASK_ACTIVATION_STATUS_FAILURE + + assert mock_capture.call_count == 1 + capture_call = mock_capture.call_args[0] + # Error type and chained error should be captured. + assert isinstance(capture_call[0], NoRetriesRemainingError) + assert isinstance(capture_call[0].__cause__, RuntimeError) + + +def test_child_process_failure_task() -> None: + todo: queue.Queue[InflightTaskActivation] = queue.Queue() + processed: queue.Queue[ProcessingResult] = queue.Queue() + shutdown = Event() + + todo.put(FAIL_TASK) + child_process( + "examples.app:app", + todo, + processed, + shutdown, + max_task_count=1, + processing_pool_name="test", + process_type="fork", + ) + + assert todo.empty() + result = processed.get() + assert result.task_id == FAIL_TASK.activation.id + assert result.status == TASK_ACTIVATION_STATUS_FAILURE + + +def test_child_process_shutdown() -> None: + todo: queue.Queue[InflightTaskActivation] = queue.Queue() + processed: queue.Queue[ProcessingResult] = queue.Queue() + shutdown = Event() + shutdown.set() + + todo.put(SIMPLE_TASK) + child_process( + "examples.app:app", + todo, + processed, + shutdown, + max_task_count=1, + processing_pool_name="test", + process_type="fork", + ) + + # When shutdown has been set, the child should not process more tasks. + assert todo.qsize() == 1 + assert processed.qsize() == 0 + + +def test_child_process_unknown_task() -> None: + todo: queue.Queue[InflightTaskActivation] = queue.Queue() + processed: queue.Queue[ProcessingResult] = queue.Queue() + shutdown = Event() + + todo.put(UNDEFINED_TASK) + todo.put(SIMPLE_TASK) + child_process( + "examples.app:app", + todo, + processed, + shutdown, + max_task_count=1, + processing_pool_name="test", + process_type="fork", + ) + + result = processed.get() + assert result.task_id == UNDEFINED_TASK.activation.id + assert result.status == TASK_ACTIVATION_STATUS_FAILURE + + result = processed.get() + assert result.task_id == SIMPLE_TASK.activation.id + assert result.status == TASK_ACTIVATION_STATUS_COMPLETE + + +def test_child_process_at_most_once() -> None: + todo: queue.Queue[InflightTaskActivation] = queue.Queue() + processed: queue.Queue[ProcessingResult] = queue.Queue() + shutdown = Event() + + todo.put(AT_MOST_ONCE_TASK) + todo.put(AT_MOST_ONCE_TASK) + todo.put(SIMPLE_TASK) + child_process( + "examples.app:app", + todo, + processed, + shutdown, + max_task_count=2, + processing_pool_name="test", + process_type="fork", + ) + + assert todo.empty() + result = processed.get(block=False) + assert result.task_id == AT_MOST_ONCE_TASK.activation.id + assert result.status == TASK_ACTIVATION_STATUS_COMPLETE + + result = processed.get(block=False) + assert result.task_id == SIMPLE_TASK.activation.id + assert result.status == TASK_ACTIVATION_STATUS_COMPLETE + + +@mock.patch("taskbroker_client.worker.workerchild.capture_checkin") +def test_child_process_record_checkin(mock_capture_checkin: mock.Mock) -> None: + todo: queue.Queue[InflightTaskActivation] = queue.Queue() + processed: queue.Queue[ProcessingResult] = queue.Queue() + shutdown = Event() + + todo.put(SCHEDULED_TASK) + child_process( + "examples.app:app", + todo, + processed, + shutdown, + max_task_count=1, + processing_pool_name="test", + process_type="fork", + ) + + assert todo.empty() + result = processed.get() + assert result.task_id == SIMPLE_TASK.activation.id + assert result.status == TASK_ACTIVATION_STATUS_COMPLETE + + assert mock_capture_checkin.call_count == 1 + mock_capture_checkin.assert_called_with( + monitor_slug="simple-task", + check_in_id="abc123", + duration=mock.ANY, + status=MonitorStatus.OK, + ) + + +@mock.patch("taskbroker_client.worker.workerchild.sentry_sdk.capture_exception") +def test_child_process_terminate_task(mock_capture: mock.Mock) -> None: + todo: queue.Queue[InflightTaskActivation] = queue.Queue() + processed: queue.Queue[ProcessingResult] = queue.Queue() + shutdown = Event() + + sleepy = InflightTaskActivation( + host="localhost:50051", + receive_timestamp=0, + activation=TaskActivation( + id="111", + taskname="examples.timed", + namespace="examples", + parameters='{"args": [3], "kwargs": {}}', + processing_deadline_duration=1, + ), + ) + + todo.put(sleepy) + child_process( + "examples.app:app", + todo, + processed, + shutdown, + max_task_count=1, + processing_pool_name="test", + process_type="fork", + ) + + assert todo.empty() + result = processed.get(block=False) + assert result.task_id == sleepy.activation.id + assert result.status == TASK_ACTIVATION_STATUS_FAILURE + assert mock_capture.call_count == 1 + assert type(mock_capture.call_args.args[0]) is ProcessingDeadlineExceeded + + +@mock.patch("taskbroker_client.worker.workerchild.capture_checkin") +def test_child_process_decompression(mock_capture_checkin: mock.MagicMock) -> None: + + todo: queue.Queue[InflightTaskActivation] = queue.Queue() + processed: queue.Queue[ProcessingResult] = queue.Queue() + shutdown = Event() + + todo.put(COMPRESSED_TASK) + child_process( + "examples.app:app", + todo, + processed, + shutdown, + max_task_count=1, + processing_pool_name="test", + process_type="fork", + ) + + assert todo.empty() + result = processed.get() + assert result.task_id == COMPRESSED_TASK.activation.id + assert result.status == TASK_ACTIVATION_STATUS_COMPLETE + assert mock_capture_checkin.call_count == 0 diff --git a/clients/python/uv.lock b/clients/python/uv.lock new file mode 100644 index 00000000..0e88d3b7 --- /dev/null +++ b/clients/python/uv.lock @@ -0,0 +1,572 @@ +version = 1 +revision = 2 +requires-python = ">=3.12.11" +resolution-markers = [ + "sys_platform == 'darwin' or sys_platform == 'linux'", +] +supported-markers = [ + "sys_platform == 'darwin' or sys_platform == 'linux'", +] + +[[package]] +name = "black" +version = "24.10.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "click", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "mypy-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pathspec", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "platformdirs", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/black-24.10.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b5e39e0fae001df40f95bd8cc36b9165c5e2ea88900167bddf258bacef9bbdc3" }, + { url = "https://pypi.devinfra.sentry.io/wheels/black-24.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d37d422772111794b26757c5b55a3eade028aa3fde43121ab7b673d050949d65" }, + { url = "https://pypi.devinfra.sentry.io/wheels/black-24.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14b3502784f09ce2443830e3133dacf2c0110d45191ed470ecb04d0f5f6fcb0f" }, + { url = "https://pypi.devinfra.sentry.io/wheels/black-24.10.0-py3-none-any.whl", hash = "sha256:3bb2b7a1f7b685f85b11fed1ef10f8a9148bceb49853e47a294a3dd963c1dd7d" }, +] + +[[package]] +name = "certifi" +version = "2025.8.3" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5" }, +] + +[[package]] +name = "cffi" +version = "2.0.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "pycparser", marker = "(implementation_name != 'PyPy' and sys_platform == 'darwin') or (implementation_name != 'PyPy' and sys_platform == 'linux')" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26" }, +] + +[[package]] +name = "cfgv" +version = "3.4.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9" }, +] + +[[package]] +name = "click" +version = "8.3.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/click-8.3.0-py3-none-any.whl", hash = "sha256:9b9f285302c6e3064f4330c05f05b81945b2a39544279343e6e7c5f27a9baddc" }, +] + +[[package]] +name = "confluent-kafka" +version = "2.9.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.9.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:537713346e4f561341fd49e1859892e85916b43f730a3a7ebf7b4fa66457e742" }, + { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1990db1569a174eb8187ed2555b793da223317363798eb3884f88a3b501c3c15" }, + { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.9.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:d8d8d4475dedd7a0883ca14cbd1b78dc6119139bb07d2c953b9043b1b8826782" }, + { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.9.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:5920cc984047f950ce7f3050407cb816813dc434ceb0ca0156ab56303d69245e" }, + { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.9.0-cp313-cp313-macosx_13_0_arm64.whl", hash = "sha256:c594238f7a9615f812de2b2c5d3a7b91788cdb74ece7d88ed736faa87e571715" }, + { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.9.0-cp313-cp313-macosx_13_0_x86_64.whl", hash = "sha256:aa26072388f6021f95e41762338565e66a2d96f0538fdc72650bc154a0e547eb" }, + { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.9.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:bc00d0fdd4d85d7e3fcb3e9238092dba439ffcf35e7a3960e42d3eb0a41b4ce1" }, + { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.9.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:b21e4a4ba88374a1487a9353debcddc994dae385f89d6bc45f08ab372e238756" }, +] + +[[package]] +name = "devservices" +version = "1.2.3" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-devenv", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-sdk", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "supervisor", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/devservices-1.2.3-py3-none-any.whl", hash = "sha256:19beb1dabb533c5dcbd021d6a34e3f357e5c868670f0dfe8945911d3965a6494" }, +] + +[[package]] +name = "distlib" +version = "0.3.9" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87" }, +] + +[[package]] +name = "filelock" +version = "3.19.1" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d" }, +] + +[[package]] +name = "flake8" +version = "7.3.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "mccabe", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pycodestyle", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pyflakes", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/flake8-7.3.0-py2.py3-none-any.whl", hash = "sha256:b9696257b9ce8beb888cdbe31cf885c90d31928fe202be0889a7cdafad32f01e" }, +] + +[[package]] +name = "grpc-stubs" +version = "1.53.0.6" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "grpcio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/grpc_stubs-1.53.0.6-py3-none-any.whl", hash = "sha256:3ffc5a6b5bd84ac46f3d84e2434e97936c1262b47b71b462bdedc43caaf227e1" }, +] + +[[package]] +name = "grpcio" +version = "1.66.1" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/grpcio-1.66.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:fdb14bad0835914f325349ed34a51940bc2ad965142eb3090081593c6e347be9" }, + { url = "https://pypi.devinfra.sentry.io/wheels/grpcio-1.66.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:f03a5884c56256e08fd9e262e11b5cfacf1af96e2ce78dc095d2c41ccae2c80d" }, + { url = "https://pypi.devinfra.sentry.io/wheels/grpcio-1.66.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84ca1be089fb4446490dd1135828bd42a7c7f8421e74fa581611f7afdf7ab761" }, +] + +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86" }, +] + +[[package]] +name = "h2" +version = "4.2.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "hpack", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "hyperframe", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/h2-4.2.0-py3-none-any.whl", hash = "sha256:479a53ad425bb29af087f3458a61d30780bc818e4ebcf01f0b536ba916462ed0" }, +] + +[[package]] +name = "hpack" +version = "4.1.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496" }, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "certifi", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "h11", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55" }, +] + +[package.optional-dependencies] +http2 = [ + { name = "h2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] + +[[package]] +name = "hyperframe" +version = "6.1.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5" }, +] + +[[package]] +name = "identify" +version = "2.6.9" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/identify-2.6.9-py2.py3-none-any.whl", hash = "sha256:c98b4322da415a8e5a70ff6e51fbc2d2932c015532d77e9f8537b4ba7813b150" }, +] + +[[package]] +name = "iniconfig" +version = "2.0.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374" }, +] + +[[package]] +name = "isort" +version = "6.0.1" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/isort-6.0.1-py3-none-any.whl", hash = "sha256:2dc5d7f65c9678d94c88dfc29161a320eec67328bc97aad576874cb4be1e9615" }, +] + +[[package]] +name = "mccabe" +version = "0.7.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e" }, +] + +[[package]] +name = "mypy" +version = "1.18.2" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "mypy-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pathspec", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.18.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:33eca32dd124b29400c31d7cf784e795b050ace0e1f91b8dc035672725617e34" }, + { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a3c47adf30d65e89b2dcd2fa32f3aeb5e94ca970d2c15fcb25e297871c8e4764" }, + { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.18.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d6c838e831a062f5f29d11c9057c6009f60cb294fea33a98422688181fe2893" }, + { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.18.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01199871b6110a2ce984bde85acd481232d17413868c9807e95c1b0739a58914" }, + { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.18.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:07b8b0f580ca6d289e69209ec9d3911b4a26e5abfde32228a288eb79df129fcc" }, + { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.18.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ed4482847168439651d3feee5833ccedbf6657e964572706a2adb1f7fa4dfe2e" }, + { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.18.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3ad2afadd1e9fea5cf99a45a822346971ede8685cc581ed9cd4d42eaf940986" }, + { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.18.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a431a6f1ef14cf8c144c6b14793a23ec4eae3db28277c358136e79d7d062f62d" }, +] + +[[package]] +name = "mypy-extensions" +version = "1.1.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505" }, +] + +[[package]] +name = "nodeenv" +version = "1.9.1" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9" }, +] + +[[package]] +name = "orjson" +version = "3.10.10" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/orjson-3.10.10-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:8564f48f3620861f5ef1e080ce7cd122ee89d7d6dacf25fcae675ff63b4d6e05" }, + { url = "https://pypi.devinfra.sentry.io/wheels/orjson-3.10.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5bf161a32b479034098c5b81f2608f09167ad2fa1c06abd4e527ea6bf4837a9" }, + { url = "https://pypi.devinfra.sentry.io/wheels/orjson-3.10.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3be81c42f1242cbed03cbb3973501fcaa2675a0af638f8be494eaf37143d999" }, + { url = "https://pypi.devinfra.sentry.io/wheels/orjson-3.10.10-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:44bffae68c291f94ff5a9b4149fe9d1bdd4cd0ff0fb575bcea8351d48db629a1" }, + { url = "https://pypi.devinfra.sentry.io/wheels/orjson-3.10.10-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e27b4c6437315df3024f0835887127dac2a0a3ff643500ec27088d2588fa5ae1" }, + { url = "https://pypi.devinfra.sentry.io/wheels/orjson-3.10.10-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bca84df16d6b49325a4084fd8b2fe2229cb415e15c46c529f868c3387bb1339d" }, +] + +[[package]] +name = "packaging" +version = "25.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484" }, +] + +[[package]] +name = "pathspec" +version = "0.12.1" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08" }, +] + +[[package]] +name = "platformdirs" +version = "4.3.8" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4" }, +] + +[[package]] +name = "pluggy" +version = "1.5.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669" }, +] + +[[package]] +name = "pre-commit" +version = "4.2.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "cfgv", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "identify", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "nodeenv", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "virtualenv", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/pre_commit-4.2.0-py2.py3-none-any.whl", hash = "sha256:a009ca7205f1eb497d10b845e52c838a98b6cdd2102a6c8e4540e94ee75c58bd" }, +] + +[[package]] +name = "protobuf" +version = "5.29.5" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/protobuf-5.29.5-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:e38c5add5a311f2a6eb0340716ef9b039c1dfa428b28f25a7838ac329204a671" }, + { url = "https://pypi.devinfra.sentry.io/wheels/protobuf-5.29.5-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:fa18533a299d7ab6c55a238bf8629311439995f2e7eca5caaff08663606e9015" }, + { url = "https://pypi.devinfra.sentry.io/wheels/protobuf-5.29.5-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:63848923da3325e1bf7e9003d680ce6e14b07e55d0473253a690c3a8b8fd6e61" }, +] + +[[package]] +name = "pycodestyle" +version = "2.14.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/pycodestyle-2.14.0-py2.py3-none-any.whl", hash = "sha256:dd6bf7cb4ee77f8e016f9c8e74a35ddd9f67e1d5fd4184d86c3b98e07099f42d" }, +] + +[[package]] +name = "pycparser" +version = "2.23" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934" }, +] + +[[package]] +name = "pyflakes" +version = "3.4.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/pyflakes-3.4.0-py2.py3-none-any.whl", hash = "sha256:f742a7dbd0d9cb9ea41e9a24a918996e8170c799fa528688d40dd582c8265f4f" }, +] + +[[package]] +name = "pytest" +version = "8.3.3" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "iniconfig", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pluggy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2" }, +] + +[[package]] +name = "pyyaml" +version = "6.0.3" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196" }, + { url = "https://pypi.devinfra.sentry.io/wheels/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0" }, + { url = "https://pypi.devinfra.sentry.io/wheels/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28" }, + { url = "https://pypi.devinfra.sentry.io/wheels/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc" }, + { url = "https://pypi.devinfra.sentry.io/wheels/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8" }, + { url = "https://pypi.devinfra.sentry.io/wheels/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1" }, + { url = "https://pypi.devinfra.sentry.io/wheels/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c" }, + { url = "https://pypi.devinfra.sentry.io/wheels/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6" }, +] + +[[package]] +name = "sentry-arroyo" +version = "2.33.1" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "confluent-kafka", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/sentry_arroyo-2.33.1-py3-none-any.whl", hash = "sha256:10d05f81a06bd7f9ee28fe7d7a628c868c3ccbdb5987bece6d9860930e1654af" }, +] + +[[package]] +name = "sentry-devenv" +version = "1.25.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "sentry-sdk", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/sentry_devenv-1.25.0-py3-none-any.whl", hash = "sha256:fc143542d555af05e4003052b8d2f336ac69361432e430ed92c22b9a3df5bd3d" }, +] + +[[package]] +name = "sentry-protos" +version = "0.4.7" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "grpc-stubs", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "grpcio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/sentry_protos-0.4.7-py3-none-any.whl", hash = "sha256:bd14e782eceac28ed4f2ea4e36a2b2e189cdbd83a7f78ce9ac8aeab01a4242b8" }, +] + +[[package]] +name = "sentry-sdk" +version = "2.46.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "certifi", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "urllib3", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/sentry_sdk-2.46.0-py2.py3-none-any.whl", hash = "sha256:4eeeb60198074dff8d066ea153fa6f241fef1668c10900ea53a4200abc8da9b1" }, +] + +[package.optional-dependencies] +http2 = [ + { name = "httpcore", extra = ["http2"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] + +[[package]] +name = "setuptools" +version = "80.9.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922" }, +] + +[[package]] +name = "supervisor" +version = "4.2.5" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "setuptools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/supervisor-4.2.5-py2.py3-none-any.whl", hash = "sha256:2ecaede32fc25af814696374b79e42644ecaba5c09494c51016ffda9602d0f08" }, +] + +[[package]] +name = "taskbroker-client" +version = "0.1.0" +source = { editable = "." } +dependencies = [ + { name = "confluent-kafka", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "grpcio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "orjson", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-arroyo", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-protos", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-sdk", extra = ["http2"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "types-protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "zstandard", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] + +[package.dev-dependencies] +dev = [ + { name = "black", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "devservices", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "flake8", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "isort", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "mypy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pre-commit", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-devenv", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] + +[package.metadata] +requires-dist = [ + { name = "confluent-kafka", specifier = ">=2.3.0" }, + { name = "grpcio", specifier = "==1.66.1" }, + { name = "orjson", specifier = ">=3.10.10" }, + { name = "protobuf", specifier = ">=5.28.3" }, + { name = "sentry-arroyo", specifier = ">=2.33.1" }, + { name = "sentry-protos", specifier = ">=0.2.0" }, + { name = "sentry-sdk", extras = ["http2"], specifier = ">=2.43.0" }, + { name = "types-protobuf", specifier = ">=6.30.2.20250703" }, + { name = "zstandard", specifier = ">=0.18.0" }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "black", specifier = "==24.10.0" }, + { name = "devservices", specifier = ">=1.2.1" }, + { name = "flake8", specifier = ">=7.3.0" }, + { name = "isort", specifier = ">=5.13.2" }, + { name = "mypy", specifier = ">=1.17.1" }, + { name = "pre-commit", specifier = ">=4.2.0" }, + { name = "pytest", specifier = ">=8.3.3" }, + { name = "sentry-devenv", specifier = ">=1.22.2" }, +] + +[[package]] +name = "types-protobuf" +version = "6.30.2.20250703" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/types_protobuf-6.30.2.20250703-py3-none-any.whl", hash = "sha256:fa5aff9036e9ef432d703abbdd801b436a249b6802e4df5ef74513e272434e57" }, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548" }, +] + +[[package]] +name = "urllib3" +version = "2.5.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc" }, +] + +[[package]] +name = "virtualenv" +version = "20.29.3" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "distlib", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "filelock", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "platformdirs", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/virtualenv-20.29.3-py3-none-any.whl", hash = "sha256:3e3d00f5807e83b234dfb6122bf37cfadf4be216c53a49ac059d02414f819170" }, +] + +[[package]] +name = "zstandard" +version = "0.18.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "cffi", marker = "(platform_python_implementation == 'PyPy' and sys_platform == 'darwin') or (platform_python_implementation == 'PyPy' and sys_platform == 'linux')" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:2eab9516bc4352fc9763d96047c815879f3efb1dfb5dfe2f775b2e22c0289cb6" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e1f98ffd138d172efd202cd078e746af80492c6942004b080bf627c5f826da5" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:07a72264613c75fe6eb64f07ab553d3cfab7a421c8733e067a8718ef69c642a7" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee14cc6b8b40733a707b467ddc192592cab941babf82b3e6f700673e050b4bda" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:032ddaf24458986a31ff49d2fa86a4003e1e1c34c38976bedd06805350eaeddc" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d68ff7c3a4c35400d807efbfa793767c2d4866a7017770b424e65749a70e958e" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e1ef5b96f0e90855ea13d06b7213a75a77a23946d8bb186ff38578dd1ff5efd4" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:754256fb4080a36f8992983b2f65f23719d275c9a350bcf18d76344ed64efa19" }, +] diff --git a/integration_tests/__init__.py b/integration_tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/python/integration_tests/helpers.py b/integration_tests/helpers.py similarity index 99% rename from python/integration_tests/helpers.py rename to integration_tests/helpers.py index e371223a..cf1a1ba3 100644 --- a/python/integration_tests/helpers.py +++ b/integration_tests/helpers.py @@ -15,7 +15,7 @@ TaskActivation, ) -TASKBROKER_ROOT = Path(__file__).parent.parent.parent +TASKBROKER_ROOT = Path(__file__).parent.parent TASKBROKER_BIN = TASKBROKER_ROOT / "target/debug/taskbroker" TESTS_OUTPUT_ROOT = Path(__file__).parent / ".tests_output" TEST_PRODUCER_CONFIG = { diff --git a/integration_tests/pyproject.toml b/integration_tests/pyproject.toml new file mode 100644 index 00000000..7ff79914 --- /dev/null +++ b/integration_tests/pyproject.toml @@ -0,0 +1,71 @@ +[project] +name = "taskbroker-integration-tests" +# we only have this here to make uv happy +# we use uv for dependency management, not packaging +version = "0.0.0" + +[dependency-groups] +dev = [ + "black==24.10.0", + "devservices>=1.2.1", + "pre-commit>=4.2.0", + "pytest>=8.3.3", + "sentry-devenv>=1.22.2", + "confluent_kafka>=2.3.0", + "grpcio==1.66.1", + "orjson>=3.10.10", + "protobuf>=5.28.3", + "pyyaml>=6.0.2", + "sentry-protos>=0.2.0", + "flake8>=7.3.0", + "isort>=5.13.2", + "mypy>=1.17.1", + "types-pyyaml>=6.0.12.20241230", + "types-protobuf>=6.30.2.20250703", +] + +[tool.uv] +environments = ["sys_platform == 'darwin' or sys_platform == 'linux'"] + +[[tool.uv.index]] +url = "https://pypi.devinfra.sentry.io/simple" +default = true + +[tool.pytest.ini_options] +pythonpath = ["python"] +testpaths = ["."] +python_files = ["test_*.py"] +python_functions = ["test_*"] + +[tool.mypy] +mypy_path = "python" +explicit_package_bases = true +# minimal strictness settings +check_untyped_defs = true +no_implicit_reexport = true +warn_unreachable = true +warn_unused_configs = true +warn_unused_ignores = true +warn_redundant_casts = true +enable_error_code = ["ignore-without-code", "redundant-self"] +local_partial_types = true # compat with dmypy +disallow_any_generics = true +disallow_untyped_defs = true + +# begin: missing 3rd party stubs +[[tool.mypy.overrides]] +module = [ + "confluent_kafka.*", +] +ignore_missing_imports = true +# end: missing 3rd party stubs + +[tool.black] +# File filtering is taken care of in pre-commit. +line-length = 100 +target-version = ['py311'] + +[tool.isort] +profile = "black" +line_length = 100 +lines_between_sections = 1 diff --git a/python/integration_tests/runner.py b/integration_tests/runner.py similarity index 100% rename from python/integration_tests/runner.py rename to integration_tests/runner.py diff --git a/python/integration_tests/test_consumer_rebalancing.py b/integration_tests/test_consumer_rebalancing.py similarity index 99% rename from python/integration_tests/test_consumer_rebalancing.py rename to integration_tests/test_consumer_rebalancing.py index 8d2dd342..f4380d1e 100644 --- a/python/integration_tests/test_consumer_rebalancing.py +++ b/integration_tests/test_consumer_rebalancing.py @@ -7,6 +7,7 @@ from threading import Thread import yaml + from integration_tests.helpers import ( TASKBROKER_BIN, TESTS_OUTPUT_ROOT, diff --git a/python/integration_tests/test_failed_tasks.py b/integration_tests/test_failed_tasks.py similarity index 99% rename from python/integration_tests/test_failed_tasks.py rename to integration_tests/test_failed_tasks.py index 4a914ae2..e2b481e0 100644 --- a/python/integration_tests/test_failed_tasks.py +++ b/integration_tests/test_failed_tasks.py @@ -8,6 +8,12 @@ import pytest import yaml from google.protobuf.timestamp_pb2 import Timestamp +from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( + OnAttemptsExceeded, + RetryState, + TaskActivation, +) + from integration_tests.helpers import ( TASKBROKER_BIN, TESTS_OUTPUT_ROOT, @@ -18,11 +24,6 @@ send_custom_messages_to_topic, ) from integration_tests.worker import ConfigurableTaskWorker, TaskWorkerClient -from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( - OnAttemptsExceeded, - RetryState, - TaskActivation, -) TEST_OUTPUT_PATH = TESTS_OUTPUT_ROOT / "test_failed_tasks" diff --git a/python/integration_tests/test_task_worker_processing.py b/integration_tests/test_task_worker_processing.py similarity index 99% rename from python/integration_tests/test_task_worker_processing.py rename to integration_tests/test_task_worker_processing.py index 0bc35a25..b8a9f4ca 100644 --- a/python/integration_tests/test_task_worker_processing.py +++ b/integration_tests/test_task_worker_processing.py @@ -6,6 +6,7 @@ import pytest import yaml + from integration_tests.helpers import ( TASKBROKER_BIN, TESTS_OUTPUT_ROOT, diff --git a/python/integration_tests/test_upkeep_delay.py b/integration_tests/test_upkeep_delay.py similarity index 99% rename from python/integration_tests/test_upkeep_delay.py rename to integration_tests/test_upkeep_delay.py index 9d30c620..e1eea1e1 100644 --- a/python/integration_tests/test_upkeep_delay.py +++ b/integration_tests/test_upkeep_delay.py @@ -9,6 +9,8 @@ import orjson import yaml from google.protobuf.timestamp_pb2 import Timestamp +from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation + from integration_tests.helpers import ( TASKBROKER_BIN, TESTS_OUTPUT_ROOT, @@ -18,7 +20,6 @@ get_num_tasks_in_sqlite, send_custom_messages_to_topic, ) -from sentry_protos.taskbroker.v1.taskbroker_pb2 import TaskActivation TEST_OUTPUT_PATH = TESTS_OUTPUT_ROOT / "test_upkeep_delay" diff --git a/python/integration_tests/test_upkeep_expiry.py b/integration_tests/test_upkeep_expiry.py similarity index 99% rename from python/integration_tests/test_upkeep_expiry.py rename to integration_tests/test_upkeep_expiry.py index 16a1ff37..73ea7195 100644 --- a/python/integration_tests/test_upkeep_expiry.py +++ b/integration_tests/test_upkeep_expiry.py @@ -7,6 +7,12 @@ import orjson import yaml from google.protobuf.timestamp_pb2 import Timestamp +from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( + OnAttemptsExceeded, + RetryState, + TaskActivation, +) + from integration_tests.helpers import ( TASKBROKER_BIN, TESTS_OUTPUT_ROOT, @@ -15,11 +21,6 @@ get_num_tasks_in_sqlite, send_custom_messages_to_topic, ) -from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( - OnAttemptsExceeded, - RetryState, - TaskActivation, -) TEST_OUTPUT_PATH = TESTS_OUTPUT_ROOT / "test_upkeep_expiry" diff --git a/python/integration_tests/test_upkeep_retry.py b/integration_tests/test_upkeep_retry.py similarity index 99% rename from python/integration_tests/test_upkeep_retry.py rename to integration_tests/test_upkeep_retry.py index b758a8b4..4f3892d7 100644 --- a/python/integration_tests/test_upkeep_retry.py +++ b/integration_tests/test_upkeep_retry.py @@ -6,6 +6,7 @@ import pytest import yaml + from integration_tests.helpers import ( TASKBROKER_BIN, TESTS_OUTPUT_ROOT, diff --git a/python/integration_tests/worker.py b/integration_tests/worker.py similarity index 100% rename from python/integration_tests/worker.py rename to integration_tests/worker.py index e42115fc..63125c5f 100644 --- a/python/integration_tests/worker.py +++ b/integration_tests/worker.py @@ -1,17 +1,17 @@ -import grpc -import time -import random import logging +import random +import time +import grpc from sentry_protos.taskbroker.v1.taskbroker_pb2 import ( - TaskActivation, + TASK_ACTIVATION_STATUS_COMPLETE, + TASK_ACTIVATION_STATUS_FAILURE, + TASK_ACTIVATION_STATUS_RETRY, FetchNextTask, GetTaskRequest, SetTaskStatusRequest, + TaskActivation, TaskActivationStatus, - TASK_ACTIVATION_STATUS_COMPLETE, - TASK_ACTIVATION_STATUS_FAILURE, - TASK_ACTIVATION_STATUS_RETRY, ) from sentry_protos.taskbroker.v1.taskbroker_pb2_grpc import ConsumerServiceStub diff --git a/pyproject.toml b/pyproject.toml index 4a6985d7..809e69c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,6 +3,7 @@ name = "taskbroker" # we only have this here to make uv happy # we use uv for dependency management, not packaging version = "0.0.0" +requires-python = ">=3.11" [dependency-groups] dev = [ @@ -31,11 +32,8 @@ environments = ["sys_platform == 'darwin' or sys_platform == 'linux'"] url = "https://pypi.devinfra.sentry.io/simple" default = true -[tool.pytest.ini_options] -pythonpath = ["python"] -testpaths = ["python/integration_tests"] -python_files = ["test_*.py"] -python_functions = ["test_*"] +[tool.uv.workspace] +members = ["integration_tests", "clients/python"] [tool.mypy] mypy_path = "python" diff --git a/uv.lock b/uv.lock index 7c2fd104..a1048f7f 100644 --- a/uv.lock +++ b/uv.lock @@ -1,6 +1,6 @@ version = 1 revision = 2 -requires-python = ">=3.12" +requires-python = ">=3.11" resolution-markers = [ "sys_platform == 'darwin' or sys_platform == 'linux'", ] @@ -8,6 +8,13 @@ supported-markers = [ "sys_platform == 'darwin' or sys_platform == 'linux'", ] +[manifest] +members = [ + "taskbroker", + "taskbroker-client", + "taskbroker-integration-tests", +] + [[package]] name = "black" version = "24.10.0" @@ -20,6 +27,9 @@ dependencies = [ { name = "platformdirs", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, ] wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/black-24.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5a2221696a8224e335c28816a9d331a6c2ae15a2ee34ec857dcf3e45dbfa99ad" }, + { url = "https://pypi.devinfra.sentry.io/wheels/black-24.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f9da3333530dbcecc1be13e69c250ed8dfa67f43c4005fb537bb426e19200d50" }, + { url = "https://pypi.devinfra.sentry.io/wheels/black-24.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4007b1393d902b48b36958a216c20c4482f601569d19ed1df294a496eb366392" }, { url = "https://pypi.devinfra.sentry.io/wheels/black-24.10.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b5e39e0fae001df40f95bd8cc36b9165c5e2ea88900167bddf258bacef9bbdc3" }, { url = "https://pypi.devinfra.sentry.io/wheels/black-24.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d37d422772111794b26757c5b55a3eade028aa3fde43121ab7b673d050949d65" }, { url = "https://pypi.devinfra.sentry.io/wheels/black-24.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14b3502784f09ce2443830e3133dacf2c0110d45191ed470ecb04d0f5f6fcb0f" }, @@ -34,6 +44,28 @@ wheels = [ { url = "https://pypi.devinfra.sentry.io/wheels/certifi-2025.7.14-py3-none-any.whl", hash = "sha256:6b31f564a415d79ee77df69d757bb49a5bb53bd9f756cbbe24394ffd6fc1f4b2" }, ] +[[package]] +name = "cffi" +version = "2.0.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "pycparser", marker = "(implementation_name != 'PyPy' and sys_platform == 'darwin') or (implementation_name != 'PyPy' and sys_platform == 'linux')" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:b4c854ef3adc177950a8dfc81a86f5115d2abd545751a304c5bcf2c2c7283cfe" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2de9a304e27f7596cd03d16f1b7c72219bd944e99cc52b84d0145aefb07cbd3c" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:730cacb21e1bdff3ce90babf007d0a0917cc3e6492f336c2f0134101e0944f93" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8941aaadaf67246224cee8c3803777eed332a19d909b47e29c9842ef1e79ac26" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26" }, +] + [[package]] name = "cfgv" version = "3.4.0" @@ -55,6 +87,10 @@ name = "confluent-kafka" version = "2.8.0" source = { registry = "https://pypi.devinfra.sentry.io/simple" } wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5de7ab587ecdc153a029d992e7d470fc68ab943e38931b18fc4a01074afd5c5c" }, + { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:52a87d1a73ad91d4f81e35a8e6e961a5ad0c49ecdb198e47bd106262e968253e" }, + { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.8.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:f03b12d009cfb16649b0e51c06514312d5cbbbe9b06e71cf4ad781b378f8b79f" }, + { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.8.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:1a01feeac7f27bff079ad1a29f1cf1b149235a975d67d7de20c1935f44b14293" }, { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.8.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:80bf43c098df04008dd6a517a9f745b67885af9c35c09d220f4d19661ae4d647" }, { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3f5e5b18c7acf50777545e817e563b0fa9c74badbabf30474665c03ae8ddcc23" }, { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.8.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:c540935d89acf1bc173fddd0b9b978ece348345f5a0fccf549ea8663cfa5152c" }, @@ -65,6 +101,14 @@ wheels = [ { url = "https://pypi.devinfra.sentry.io/wheels/confluent_kafka-2.8.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e75230b51456de5cfaefe94c35f3de5101864d8c21518f114d5cd9dd1d7d43b1" }, ] +[[package]] +name = "cronsim" +version = "2.6" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/cronsim-2.6-py3-none-any.whl", hash = "sha256:a3a823ea834c29100a17ab1d4af6179c3149612d28d8e0dec8044057570246be" }, +] + [[package]] name = "devservices" version = "1.2.1" @@ -125,11 +169,67 @@ name = "grpcio" version = "1.66.1" source = { registry = "https://pypi.devinfra.sentry.io/simple" } wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/grpcio-1.66.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8a1e224ce6f740dbb6b24c58f885422deebd7eb724aff0671a847f8951857c26" }, + { url = "https://pypi.devinfra.sentry.io/wheels/grpcio-1.66.1-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:a66fe4dc35d2330c185cfbb42959f57ad36f257e0cc4557d11d9f0a3f14311df" }, + { url = "https://pypi.devinfra.sentry.io/wheels/grpcio-1.66.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4573608e23f7e091acfbe3e84ac2045680b69751d8d67685ffa193a4429fedb1" }, { url = "https://pypi.devinfra.sentry.io/wheels/grpcio-1.66.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:fdb14bad0835914f325349ed34a51940bc2ad965142eb3090081593c6e347be9" }, { url = "https://pypi.devinfra.sentry.io/wheels/grpcio-1.66.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:f03a5884c56256e08fd9e262e11b5cfacf1af96e2ce78dc095d2c41ccae2c80d" }, { url = "https://pypi.devinfra.sentry.io/wheels/grpcio-1.66.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84ca1be089fb4446490dd1135828bd42a7c7f8421e74fa581611f7afdf7ab761" }, ] +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86" }, +] + +[[package]] +name = "h2" +version = "4.2.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "hpack", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "hyperframe", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/h2-4.2.0-py3-none-any.whl", hash = "sha256:479a53ad425bb29af087f3458a61d30780bc818e4ebcf01f0b536ba916462ed0" }, +] + +[[package]] +name = "hpack" +version = "4.1.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496" }, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "certifi", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "h11", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55" }, +] + +[package.optional-dependencies] +http2 = [ + { name = "h2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] + +[[package]] +name = "hyperframe" +version = "6.1.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5" }, +] + [[package]] name = "identify" version = "2.6.9" @@ -172,6 +272,10 @@ dependencies = [ { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, ] wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ad37544be07c5d7fba814eb370e006df58fed8ad1ef33ed1649cb1889ba6ff58" }, + { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:064e2ff508e5464b4bd807a7c1625bc5047c5022b85c70f030680e18f37273a5" }, + { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.17.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:70401bbabd2fa1aa7c43bb358f54037baf0586f41e83b0ae67dd0534fc64edfd" }, + { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.17.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e92bdc656b7757c438660f775f872a669b8ff374edc4d18277d86b63edba6b8b" }, { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.17.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:69e83ea6553a3ba79c08c6e15dbd9bfa912ec1e493bf75489ef93beb65209aeb" }, { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1b16708a66d38abb1e6b5702f5c2c87e133289da36f6a1d15f6a5221085c6403" }, { url = "https://pypi.devinfra.sentry.io/wheels/mypy-1.17.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:89e972c0035e9e05823907ad5398c5a73b9f47a002b22359b177d40bdaee7056" }, @@ -203,6 +307,9 @@ name = "orjson" version = "3.10.10" source = { registry = "https://pypi.devinfra.sentry.io/simple" } wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/orjson-3.10.10-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:879e99486c0fbb256266c7c6a67ff84f46035e4f8749ac6317cc83dacd7f993a" }, + { url = "https://pypi.devinfra.sentry.io/wheels/orjson-3.10.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:019481fa9ea5ff13b5d5d95e6fd5ab25ded0810c80b150c2c7b1cc8660b662a7" }, + { url = "https://pypi.devinfra.sentry.io/wheels/orjson-3.10.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dbf3c20c6a7db69df58672a0d5815647ecf78c8e62a4d9bd284e8621c1fe5ccb" }, { url = "https://pypi.devinfra.sentry.io/wheels/orjson-3.10.10-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:8564f48f3620861f5ef1e080ce7cd122ee89d7d6dacf25fcae675ff63b4d6e05" }, { url = "https://pypi.devinfra.sentry.io/wheels/orjson-3.10.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5bf161a32b479034098c5b81f2608f09167ad2fa1c06abd4e527ea6bf4837a9" }, { url = "https://pypi.devinfra.sentry.io/wheels/orjson-3.10.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3be81c42f1242cbed03cbb3973501fcaa2675a0af638f8be494eaf37143d999" }, @@ -276,6 +383,14 @@ wheels = [ { url = "https://pypi.devinfra.sentry.io/wheels/pycodestyle-2.14.0-py2.py3-none-any.whl", hash = "sha256:dd6bf7cb4ee77f8e016f9c8e74a35ddd9f67e1d5fd4184d86c3b98e07099f42d" }, ] +[[package]] +name = "pycparser" +version = "2.23" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934" }, +] + [[package]] name = "pyflakes" version = "3.4.0" @@ -297,11 +412,26 @@ wheels = [ { url = "https://pypi.devinfra.sentry.io/wheels/pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2" }, ] +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "six", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427" }, +] + [[package]] name = "pyyaml" version = "6.0.2" source = { registry = "https://pypi.devinfra.sentry.io/simple" } wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774" }, + { url = "https://pypi.devinfra.sentry.io/wheels/PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee" }, + { url = "https://pypi.devinfra.sentry.io/wheels/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c" }, + { url = "https://pypi.devinfra.sentry.io/wheels/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85" }, { url = "https://pypi.devinfra.sentry.io/wheels/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab" }, { url = "https://pypi.devinfra.sentry.io/wheels/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725" }, { url = "https://pypi.devinfra.sentry.io/wheels/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5" }, @@ -312,6 +442,36 @@ wheels = [ { url = "https://pypi.devinfra.sentry.io/wheels/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5" }, ] +[[package]] +name = "redis" +version = "3.5.3" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/redis-3.5.3-py2.py3-none-any.whl", hash = "sha256:432b788c4530cfe16d8d943a09d40ca6c16149727e4afe8c2c9d5580c59d9f24" }, +] + +[[package]] +name = "redis-py-cluster" +version = "2.1.3" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "redis", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/redis_py_cluster-2.1.3-py2.py3-none-any.whl", hash = "sha256:38f08850fde469ffd76bced7309721114acc487e52b76f374a0502c34c69b4ec" }, +] + +[[package]] +name = "sentry-arroyo" +version = "2.33.1" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "confluent-kafka", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/sentry_arroyo-2.33.1-py3-none-any.whl", hash = "sha256:10d05f81a06bd7f9ee28fe7d7a628c868c3ccbdb5987bece6d9860930e1654af" }, +] + [[package]] name = "sentry-devenv" version = "1.22.2" @@ -339,14 +499,19 @@ wheels = [ [[package]] name = "sentry-sdk" -version = "2.35.1" +version = "2.46.0" source = { registry = "https://pypi.devinfra.sentry.io/simple" } dependencies = [ { name = "certifi", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "urllib3", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, ] wheels = [ - { url = "https://pypi.devinfra.sentry.io/wheels/sentry_sdk-2.35.1-py2.py3-none-any.whl", hash = "sha256:13b6d6cfdae65d61fe1396a061cf9113b20f0ec1bcb257f3826b88f01bb55720" }, + { url = "https://pypi.devinfra.sentry.io/wheels/sentry_sdk-2.46.0-py2.py3-none-any.whl", hash = "sha256:4eeeb60198074dff8d066ea153fa6f241fef1668c10900ea53a4200abc8da9b1" }, +] + +[package.optional-dependencies] +http2 = [ + { name = "httpcore", extra = ["http2"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, ] [[package]] @@ -357,6 +522,14 @@ wheels = [ { url = "https://pypi.devinfra.sentry.io/wheels/setuptools-78.1.1-py3-none-any.whl", hash = "sha256:c3a9c4211ff4c309edb8b8c4f1cbfa7ae324c4ba9f91ff254e3d305b9fd54561" }, ] +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274" }, +] + [[package]] name = "supervisor" version = "4.2.5" @@ -415,6 +588,136 @@ dev = [ { name = "types-pyyaml", specifier = ">=6.0.12.20241230" }, ] +[[package]] +name = "taskbroker-client" +version = "0.1.0" +source = { editable = "clients/python" } +dependencies = [ + { name = "confluent-kafka", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "cronsim", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "grpcio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "orjson", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "redis", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "redis-py-cluster", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-arroyo", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-protos", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-sdk", extra = ["http2"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "types-protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "zstandard", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] + +[package.dev-dependencies] +dev = [ + { name = "black", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "devservices", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "flake8", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "isort", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "mypy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pre-commit", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-devenv", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "time-machine", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] + +[package.metadata] +requires-dist = [ + { name = "confluent-kafka", specifier = ">=2.3.0" }, + { name = "cronsim", specifier = ">=2.6" }, + { name = "grpcio", specifier = "==1.66.1" }, + { name = "orjson", specifier = ">=3.10.10" }, + { name = "protobuf", specifier = ">=5.28.3" }, + { name = "redis", specifier = ">=3.4.1" }, + { name = "redis-py-cluster", specifier = ">=2.1.0" }, + { name = "sentry-arroyo", specifier = ">=2.33.1" }, + { name = "sentry-protos", specifier = ">=0.2.0" }, + { name = "sentry-sdk", extras = ["http2"], specifier = ">=2.43.0" }, + { name = "types-protobuf", specifier = ">=6.30.2.20250703" }, + { name = "zstandard", specifier = ">=0.18.0" }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "black", specifier = "==24.10.0" }, + { name = "devservices", specifier = ">=1.2.1" }, + { name = "flake8", specifier = ">=7.3.0" }, + { name = "isort", specifier = ">=5.13.2" }, + { name = "mypy", specifier = ">=1.17.1" }, + { name = "pre-commit", specifier = ">=4.2.0" }, + { name = "pytest", specifier = ">=8.3.3" }, + { name = "sentry-devenv", specifier = ">=1.22.2" }, + { name = "time-machine", specifier = ">=2.16.0" }, +] + +[[package]] +name = "taskbroker-integration-tests" +version = "0.0.0" +source = { virtual = "integration_tests" } + +[package.dev-dependencies] +dev = [ + { name = "black", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "confluent-kafka", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "devservices", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "flake8", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "grpcio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "isort", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "mypy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "orjson", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pre-commit", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-devenv", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-protos", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "types-protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "types-pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] + +[package.metadata] + +[package.metadata.requires-dev] +dev = [ + { name = "black", specifier = "==24.10.0" }, + { name = "confluent-kafka", specifier = ">=2.3.0" }, + { name = "devservices", specifier = ">=1.2.1" }, + { name = "flake8", specifier = ">=7.3.0" }, + { name = "grpcio", specifier = "==1.66.1" }, + { name = "isort", specifier = ">=5.13.2" }, + { name = "mypy", specifier = ">=1.17.1" }, + { name = "orjson", specifier = ">=3.10.10" }, + { name = "pre-commit", specifier = ">=4.2.0" }, + { name = "protobuf", specifier = ">=5.28.3" }, + { name = "pytest", specifier = ">=8.3.3" }, + { name = "pyyaml", specifier = ">=6.0.2" }, + { name = "sentry-devenv", specifier = ">=1.22.2" }, + { name = "sentry-protos", specifier = ">=0.2.0" }, + { name = "types-protobuf", specifier = ">=6.30.2.20250703" }, + { name = "types-pyyaml", specifier = ">=6.0.12.20241230" }, +] + +[[package]] +name = "time-machine" +version = "2.16.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "python-dateutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/time_machine-2.16.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8f936566ef9f09136a3d5db305961ef6d897b76b240c9ff4199144aed6dd4fe5" }, + { url = "https://pypi.devinfra.sentry.io/wheels/time_machine-2.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5886e23ede3478ca2a3e0a641f5d09dd784dfa9e48c96e8e5e31fc4fe77b6dc0" }, + { url = "https://pypi.devinfra.sentry.io/wheels/time_machine-2.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c76caf539fa4941e1817b7c482c87c65c52a1903fea761e84525955c6106fafb" }, + { url = "https://pypi.devinfra.sentry.io/wheels/time_machine-2.16.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3391ae9c484736850bb44ef125cbad52fe2d1b69e42c95dc88c43af8ead2cc7" }, + { url = "https://pypi.devinfra.sentry.io/wheels/time_machine-2.16.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:84788f4d62a8b1bf5e499bb9b0e23ceceea21c415ad6030be6267ce3d639842f" }, + { url = "https://pypi.devinfra.sentry.io/wheels/time_machine-2.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:15ec236b6571730236a193d9d6c11d472432fc6ab54e85eac1c16d98ddcd71bf" }, + { url = "https://pypi.devinfra.sentry.io/wheels/time_machine-2.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cedc989717c8b44a3881ac3d68ab5a95820448796c550de6a2149ed1525157f0" }, + { url = "https://pypi.devinfra.sentry.io/wheels/time_machine-2.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:317b68b56a9c3731e0cf8886e0f94230727159e375988b36c60edce0ddbcb44a" }, + { url = "https://pypi.devinfra.sentry.io/wheels/time_machine-2.16.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:7751bf745d54e9e8b358c0afa332815da9b8a6194b26d0fd62876ab6c4d5c9c0" }, + { url = "https://pypi.devinfra.sentry.io/wheels/time_machine-2.16.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1784edf173ca840ba154de6eed000b5727f65ab92972c2f88cec5c4d6349c5f2" }, + { url = "https://pypi.devinfra.sentry.io/wheels/time_machine-2.16.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f5876a5682ce1f517e55d7ace2383432627889f6f7e338b961f99d684fd9e8d" }, + { url = "https://pypi.devinfra.sentry.io/wheels/time_machine-2.16.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:667b150fedb54acdca2a4bea5bf6da837b43e6dd12857301b48191f8803ba93f" }, +] + [[package]] name = "types-protobuf" version = "6.30.2.20250703" @@ -459,3 +762,25 @@ dependencies = [ wheels = [ { url = "https://pypi.devinfra.sentry.io/wheels/virtualenv-20.29.3-py3-none-any.whl", hash = "sha256:3e3d00f5807e83b234dfb6122bf37cfadf4be216c53a49ac059d02414f819170" }, ] + +[[package]] +name = "zstandard" +version = "0.18.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +dependencies = [ + { name = "cffi", marker = "(platform_python_implementation == 'PyPy' and sys_platform == 'darwin') or (platform_python_implementation == 'PyPy' and sys_platform == 'linux')" }, +] +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:f9d32f509b84b7158d46ba673f1c5123a80062652517e9e56240ded7df3d744e" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:ed3937c3d703c7f74f341fb530c9523b012923897e7979565ac0f3cb4f808d98" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:05552e29b1b580543cc22ae7ca9fb833e136a1843ef660a96679d246e666bbeb" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dbe2bd4cab395a157c61f059f60ec4e099ef207cd970d66f0ba184f9c2e25d37" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:2eab9516bc4352fc9763d96047c815879f3efb1dfb5dfe2f775b2e22c0289cb6" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e1f98ffd138d172efd202cd078e746af80492c6942004b080bf627c5f826da5" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:07a72264613c75fe6eb64f07ab553d3cfab7a421c8733e067a8718ef69c642a7" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee14cc6b8b40733a707b467ddc192592cab941babf82b3e6f700673e050b4bda" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:032ddaf24458986a31ff49d2fa86a4003e1e1c34c38976bedd06805350eaeddc" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d68ff7c3a4c35400d807efbfa793767c2d4866a7017770b424e65749a70e958e" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e1ef5b96f0e90855ea13d06b7213a75a77a23946d8bb186ff38578dd1ff5efd4" }, + { url = "https://pypi.devinfra.sentry.io/wheels/zstandard-0.18.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:754256fb4080a36f8992983b2f65f23719d275c9a350bcf18d76344ed64efa19" }, +]