diff --git a/README.md b/README.md index 86513ce6..32a1f5f1 100644 --- a/README.md +++ b/README.md @@ -17,11 +17,13 @@ The following health checks are bundled with this project: - AWS S3 storage - Celery task queue - Celery ping +- Celery Beat Health Check (via `django_celery_beat`) - RabbitMQ - Migrations - Database Heartbeat (Lightweight version of `health_check.db`) - email (SMTP) +View usage instructions of contrib health checks in `docs/contrib.rst` and `docs/settings.rst` Writing your own custom health checks is also very quick and easy. We also like contributions, so don't be afraid to make a pull request. @@ -71,6 +73,7 @@ Add the `health_check` applications to your `INSTALLED_APPS`: 'health_check.contrib.migrations', 'health_check.contrib.celery', # requires celery 'health_check.contrib.celery_ping', # requires celery + 'health_check.contrib.beat_health_check', # requires django_celery_beat 'health_check.contrib.psutil', # disk and memory utilization; requires psutil 'health_check.contrib.s3boto3_storage', # requires boto3 and S3BotoStorage backend 'health_check.contrib.rabbitmq', # requires RabbitMQ broker diff --git a/docs/contrib.rst b/docs/contrib.rst index df64f651..afa17053 100644 --- a/docs/contrib.rst +++ b/docs/contrib.rst @@ -1,19 +1,19 @@ contrib ======= -`psutil` --------- +``psutil`` +---------- Full disks and out-of-memory conditions are common causes of service outages. These situations can be averted by checking disk and memory utilization via the -`psutil` package: +``psutil`` package: .. code:: pip install psutil Once that dependency has been installed, make sure that the corresponding Django -app has been added to `INSTALLED_APPS`: +app has been added to ``INSTALLED_APPS``: .. code:: python @@ -36,14 +36,13 @@ to disable any of these checks, set its value to ``None``. 'MEMORY_MIN' = 100, # in MB } -`celery` --------- +``celery`` +---------- If you are using Celery you may choose between two different Celery checks. `health_check.contrib.celery` sends a task to the queue and it expects that task to be executed in `HEALTHCHECK_CELERY_TIMEOUT` seconds which by default is three seconds. -The task is sent with a priority of `HEALTHCHECK_CELERY_PRIORITY` (default priority by default). You may override that in your Django settings module. This check is suitable for use cases which require that tasks can be processed frequently all the time. @@ -55,5 +54,15 @@ which don't require that tasks are executed almost instantly but require that th to be executed in sometime the future i.e. that the worker process is alive and processing tasks all the time. -You may also use both of them. To use these checks add them to `INSTALLED_APPS` in your -Django settings module. +`health_check.contrib.beat_health_check` Checks for overdue tasks in a celery beat scheduler. +Uses the scheduler module dotted path that is specified in settings.py with `CELERY_BEAT_SCHEDULER`. +If not specified, defaults to `django_celery_beat`'s `django_celery_beat.schedulers.DatabaseScheduler`. +Allows a custom buffer to be set using `BEAT_HEALTH_CHECK_BUFFER_SECONDS` in settings.py. The buffer +defaults to 30 seconds if not defined. The buffer will offset the scheduler interval for when due +tasks are processed. Using a buffer avoids false positives, such as the case where a task is +technically due according to the scheduler, but that's only because the scheduler has not hit its +interval to check and process due tasks. + +To use any of the above checks, add the full dotted path to `INSTALLED_APPS` in your +Django settings module. Example: add `health_check.contrib.beat_health_check` to `INSTALLED_APPS` +to use `beat_health_check`. diff --git a/docs/settings.rst b/docs/settings.rst index 63f0cd73..52f0b7f4 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -118,6 +118,27 @@ Using `django.settings` you may exert more fine-grained control over the behavio - `None` - Specifies the healthcheck task priority. +Beat Health Check +---------------------- +Use `django.settings` to customize the target scheduler and buffer of the celery beat health check + +.. list-table:: Additional Settings + :widths: 25 10 10 55 + :header-rows: 1 + + * - Name + - Type + - Default + - Description + * - `CELERY_BEAT_SCHEDULER` + - String + - `django_celery_beat.schedulers.DatabaseScheduler` + - The Scheduler module dotted path + * - `BEAT_HEALTH_CHECK_BUFFER_SECONDS` + - Number + - 30 + - The number of seconds a task needs to be overdue for the heath check to fail + Threading ------------------- diff --git a/health_check/contrib/beat_health_check/__init__.py b/health_check/contrib/beat_health_check/__init__.py new file mode 100644 index 00000000..6d51797b --- /dev/null +++ b/health_check/contrib/beat_health_check/__init__.py @@ -0,0 +1 @@ +default_app_config = "contrib.beat_health_check.apps.HealthchecksConfig" diff --git a/health_check/contrib/beat_health_check/apps.py b/health_check/contrib/beat_health_check/apps.py new file mode 100644 index 00000000..a758131d --- /dev/null +++ b/health_check/contrib/beat_health_check/apps.py @@ -0,0 +1,11 @@ +from django.apps import AppConfig +from health_check.plugins import plugin_dir + + +class HealthchecksConfig(AppConfig): + name = "contrib.beat_health_check" + + def ready(self): + from .backends import CeleryBeatHealthCheck + + plugin_dir.register(CeleryBeatHealthCheck) diff --git a/health_check/contrib/beat_health_check/backends.py b/health_check/contrib/beat_health_check/backends.py new file mode 100644 index 00000000..a99c4b0b --- /dev/null +++ b/health_check/contrib/beat_health_check/backends.py @@ -0,0 +1,97 @@ +from datetime import timedelta +from typing import Any, Dict, List, Union + +from celery.beat import Service +from celery.schedules import crontab, solar, schedule +from django.conf import settings +from django.utils.module_loading import import_string +from django_celery_beat.models import CrontabSchedule, IntervalSchedule, SolarSchedule +from django_celery_beat.schedulers import ModelEntry +from health_check.backends import BaseHealthCheckBackend +from health_check.exceptions import ServiceReturnedUnexpectedResult, ServiceUnavailable + + +class CeleryBeatHealthCheck(BaseHealthCheckBackend): + def check_status(self): + """ + Checks for overdue tasks in a celery beat scheduler. + Uses the scheduler module dotted path that is specified in settings.py with `CELERY_BEAT_SCHEDULER`. + If not specified, defaults to `django_celery_beat`'s `django_celery_beat.schedulers.DatabaseScheduler`. + Allows a custom buffer to be set using `BEAT_HEALTH_CHECK_BUFFER_SECONDS` in settings.py. The buffer + defaults to 30 seconds if not defined. The buffer will offset the scheduler interval for when due + tasks are processed. Using a buffer avoids false positives, such as the case where a task is + technically due according to the scheduler, but that's only because the scheduler has not hit its + interval to check and process due tasks. + """ + from celery.app import default_app as celery_app + + # Dotted path to the Celery beat scheduler. Uses django_celery_beat scheduler at default. + scheduler_module_path = getattr(settings, "CELERY_BEAT_SCHEDULER", "django_celery_beat.schedulers.DatabaseScheduler") + scheduler = import_string(scheduler_module_path) + try: + # Get the celery scheduler for the current app and scheduler class via a beat Service + schedule: Dict = ( + Service(app=celery_app, scheduler_cls=scheduler).get_scheduler().schedule + ) + + schedule_tasks: Union[ModelEntry, Any] = schedule.values() + tasks_due: List[Union[ModelEntry, Any]] = [] + + for task in schedule_tasks: + if self.is_overdue(task): + tasks_due.append(task) + + if tasks_due: + self.add_error( + ServiceReturnedUnexpectedResult( + f"{len(tasks_due)} task(s) due:{[task.name for task in tasks_due]} " + ) + ) + except BaseException as e: + self.add_error( + ServiceUnavailable("Encountered unexpected error while checking Beat Tasks"), e + ) + + @staticmethod + def is_overdue(task: ModelEntry) -> bool: + """Determines if a task is overdue by checking if a task is overdue more than x seconds. + Use `BEAT_HEALTH_CHECK_BUFFER_SECONDS` (defaults to 30 seconds) when checking if a task should + be considered overdue. + Uses the `ScheduleEntry.last_run_at`, and the task's schedule in seconds to calculate the + next time the task is due. If the time that the task is due is less than the current time + plus the buffer, we say it's overdue. Otherwise, it's not. + + See celery.schedules.schedule.is_due for celery's implementation of determining when a + entry is due. + + Args: + task: ScheduleEntry + + Returns: + bool: If the task is overdue by at least the buffer. + """ + EXPECTED_SCHEDULE_CLASSES = [solar, crontab, schedule] + DEFAULT_DUE_BUFFER_SECONDS = 30 + buffer_seconds = getattr( + settings, "BEAT_HEALTH_CHECK_BUFFER_SECONDS", DEFAULT_DUE_BUFFER_SECONDS + ) + # django_celery_beat.schedulers.ModelEntry.to_model_schedule returns a Tuple: model_schedule, model_field + task_schedule: Union[ + CrontabSchedule, IntervalSchedule, SolarSchedule + ] = task.to_model_schedule(task.schedule)[0] + + # Get the celery schedule from the Modelentry. + celery_scheduler = task_schedule.schedule + if celery_scheduler.__class__ not in EXPECTED_SCHEDULE_CLASSES: + raise ServiceUnavailable( + f"Encountered unexpected celery schedule class: {celery_scheduler}" + ) + try: + celery_schedule: Union[solar, crontab, schedule] = task_schedule.schedule + due_in = celery_schedule.remaining_estimate(task.last_run_at) + except BaseException: + raise ServiceUnavailable("Encountered an error when determining if a task is overdue.") + + next_due_buffered_seconds = (due_in + timedelta(seconds=buffer_seconds)).total_seconds() + # If the task is due in the past, even with the buffer, we consider it due. + return next_due_buffered_seconds < 0