Skip to content

chore(crons): Move monitor update logic to be entirely inside the serializer take 2 #97943

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 10 additions & 116 deletions src/sentry/monitors/endpoints/base_monitor_details.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,25 @@
from __future__ import annotations

from django.db import router, transaction
from django.db.models import F, QuerySet
from django.db.models.functions import TruncMinute
from django.db.models import QuerySet
from django.utils.crypto import get_random_string
from rest_framework import serializers
from rest_framework.request import Request
from rest_framework.response import Response

from sentry import audit_log, quotas
from sentry.api.base import BaseEndpointMixin
from sentry.api.exceptions import ParameterValidationError
from sentry.api.helpers.environments import get_environments
from sentry.api.serializers import serialize
from sentry.constants import ObjectStatus
from sentry.deletions.models.scheduleddeletion import RegionScheduledDeletion
from sentry.models.environment import Environment
from sentry.models.project import Project
from sentry.models.rule import Rule, RuleActivity, RuleActivityType
from sentry.monitors.models import (
CheckInStatus,
Monitor,
MonitorCheckIn,
MonitorEnvironment,
MonitorStatus,
)
from sentry.monitors.models import Monitor, MonitorEnvironment, MonitorStatus
from sentry.monitors.serializers import MonitorSerializer
from sentry.monitors.utils import (
create_issue_alert_rule,
get_checkin_margin,
get_max_runtime,
update_issue_alert_rule,
)
from sentry.monitors.validators import MonitorValidator
from sentry.utils.auth import AuthenticatedHttpRequest
from sentry.utils.outcomes import Outcome


class MonitorDetailsMixin(BaseEndpointMixin):
Expand All @@ -57,118 +43,26 @@ def update_monitor(
"""
Update a monitor.
"""
# set existing values as validator will overwrite
existing_config = monitor.config
existing_margin = existing_config.get("checkin_margin")
existing_max_runtime = existing_config.get("max_runtime")

validator = MonitorValidator(
data=request.data,
partial=True,
instance={
"name": monitor.name,
"slug": monitor.slug,
"status": monitor.status,
"config": monitor.config,
"project": project,
},
instance=monitor,
context={
"organization": project.organization,
"access": request.access,
"request": request,
"monitor": monitor,
},
)
if not validator.is_valid():
return self.respond(validator.errors, status=400)

result = validator.save()

params = {}
if "name" in result:
params["name"] = result["name"]
if "slug" in result:
params["slug"] = result["slug"]
if "status" in result:
params["status"] = result["status"]
if "is_muted" in result:
params["is_muted"] = result["is_muted"]
if "owner" in result:
owner = result["owner"]
params["owner_user_id"] = None
params["owner_team_id"] = None
if owner and owner.is_user:
params["owner_user_id"] = owner.id
elif owner and owner.is_team:
params["owner_team_id"] = owner.id
if "config" in result:
params["config"] = result["config"]

# update timeouts + expected next check-in, as appropriate
checkin_margin = result["config"].get("checkin_margin")
if checkin_margin != existing_margin:
MonitorEnvironment.objects.filter(monitor_id=monitor.id).update(
next_checkin_latest=F("next_checkin") + get_checkin_margin(checkin_margin)
)

max_runtime = result["config"].get("max_runtime")
if max_runtime != existing_max_runtime:
MonitorCheckIn.objects.filter(
monitor_id=monitor.id, status=CheckInStatus.IN_PROGRESS
).update(timeout_at=TruncMinute(F("date_added")) + get_max_runtime(max_runtime))

if "project" in result and result["project"].id != monitor.project_id:
raise ParameterValidationError("existing monitors may not be moved between projects")

# Attempt to assign a monitor seat
if params["status"] == ObjectStatus.ACTIVE and monitor.status != ObjectStatus.ACTIVE:
outcome = quotas.backend.assign_monitor_seat(monitor)
# The MonitorValidator checks if a seat assignment is available.
# This protects against a race condition
if outcome != Outcome.ACCEPTED:
raise ParameterValidationError("Failed to enable monitor, please try again")

# Attempt to unassign the monitor seat
if params["status"] == ObjectStatus.DISABLED and monitor.status != ObjectStatus.DISABLED:
quotas.backend.disable_monitor_seat(monitor)

# Update monitor slug in billing
if "slug" in result:
quotas.backend.update_monitor_slug(monitor.slug, params["slug"], monitor.project_id)

if params:
monitor.update(**params)
self.create_audit_entry(
request=request,
organization=project.organization,
target_object=monitor.id,
event=audit_log.get_event_id("MONITOR_EDIT"),
data=monitor.get_audit_log_data(),
)

# Update alert rule after in case slug or name changed
if "alert_rule" in result:
# Check to see if rule exists
issue_alert_rule = monitor.get_issue_alert_rule()
# If rule exists, update as necessary
if issue_alert_rule:
issue_alert_rule_id = update_issue_alert_rule(
request, project, monitor, issue_alert_rule, result["alert_rule"]
)
# If rule does not exist, create
else:
issue_alert_rule_id = create_issue_alert_rule(
request, project, monitor, result["alert_rule"]
)

if issue_alert_rule_id:
# If config is not sent, use existing config to update alert_rule_id
if "config" not in params:
params["config"] = monitor.config

params["config"]["alert_rule_id"] = issue_alert_rule_id
monitor.update(**params)
try:
updated_monitor = validator.save()
except serializers.ValidationError as e:
return self.respond(e.detail, status=400)

return self.respond(serialize(monitor, request.user))
return self.respond(serialize(updated_monitor, request.user))

def delete_monitor(self, request: Request, project: Project, monitor: Monitor) -> Response:
"""
Expand Down
158 changes: 144 additions & 14 deletions src/sentry/monitors/validators.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
import re
from typing import Literal
from typing import Any, Literal

import jsonschema
import sentry_sdk
from cronsim import CronSimError
from django.core.exceptions import ValidationError
from django.db.models import F
from django.db.models.functions import TruncMinute
from django.utils import timezone
from drf_spectacular.types import OpenApiTypes
from drf_spectacular.utils import extend_schema_field, extend_schema_serializer
from rest_framework import serializers

from sentry import quotas
from sentry import audit_log, quotas
from sentry.api.fields.actor import ActorField
from sentry.api.fields.empty_integer import EmptyIntegerField
from sentry.api.fields.sentry_slug import SentrySerializerSlugField
Expand All @@ -18,11 +21,26 @@
from sentry.constants import ObjectStatus
from sentry.db.models import BoundedPositiveIntegerField
from sentry.db.models.fields.slug import DEFAULT_SLUG_MAX_LENGTH
from sentry.models.project import Project
from sentry.monitors.constants import MAX_MARGIN, MAX_THRESHOLD, MAX_TIMEOUT
from sentry.monitors.models import CheckInStatus, Monitor, ScheduleType
from sentry.monitors.models import (
MONITOR_CONFIG,
CheckInStatus,
Monitor,
MonitorCheckIn,
MonitorEnvironment,
ScheduleType,
)
from sentry.monitors.schedule import get_next_schedule, get_prev_schedule
from sentry.monitors.types import CrontabSchedule, slugify_monitor_slug
from sentry.monitors.utils import create_issue_alert_rule, signal_monitor_created
from sentry.monitors.utils import (
create_issue_alert_rule,
get_checkin_margin,
get_max_runtime,
signal_monitor_created,
update_issue_alert_rule,
)
from sentry.utils.audit import create_audit_entry
from sentry.utils.dates import AVAILABLE_TIMEZONES
from sentry.utils.outcomes import Outcome

Expand Down Expand Up @@ -162,7 +180,7 @@ def bind(self, *args, **kwargs):
super().bind(*args, **kwargs)
# Inherit instance data when used as a nested serializer
if self.parent.instance:
self.instance = self.parent.instance.get("config")
self.instance = self.parent.instance.config
self.partial = self.parent.partial

def validate_schedule_type(self, value):
Expand Down Expand Up @@ -300,7 +318,7 @@ def validate_slug(self, value):

value = slugify_monitor_slug(value)
# Ignore if slug is equal to current value
if self.instance and value == self.instance.get("slug"):
if self.instance and value == self.instance.slug:
return value

if Monitor.objects.filter(
Expand All @@ -309,14 +327,6 @@ def validate_slug(self, value):
raise ValidationError(f'The slug "{value}" is already in use.')
return value

def update(self, instance, validated_data):
config = instance.get("config", {})
config.update(validated_data.get("config", {}))
instance.update(validated_data)
if "config" in instance or "config" in validated_data:
instance["config"] = config
return instance

def create(self, validated_data):
project = validated_data.get("project", self.context.get("project"))
organization = self.context["organization"]
Expand Down Expand Up @@ -361,6 +371,126 @@ def create(self, validated_data):
monitor.update(config=config)
return monitor

def update(self, instance, validated_data):
"""
Update an existing Monitor instance.
"""
if "project" in validated_data and validated_data["project"].id != instance.project_id:
raise serializers.ValidationError(
{"detail": {"message": "existing monitors may not be moved between projects"}}
)

existing_config = instance.config.copy()
existing_margin = existing_config.get("checkin_margin")
existing_max_runtime = existing_config.get("max_runtime")
existing_slug = instance.slug

params: dict[str, Any] = {}
if "owner" in validated_data:
owner = validated_data["owner"]
params["owner_user_id"] = None
params["owner_team_id"] = None
if owner and owner.is_user:
params["owner_user_id"] = owner.id
elif owner and owner.is_team:
params["owner_team_id"] = owner.id

if "name" in validated_data:
params["name"] = validated_data["name"]
if "slug" in validated_data:
params["slug"] = validated_data["slug"]
if "status" in validated_data:
params["status"] = validated_data["status"]
if "is_muted" in validated_data:
params["is_muted"] = validated_data["is_muted"]
if "config" in validated_data:
merged_config = instance.config.copy()
merged_config.update(validated_data["config"])

try:
jsonschema.validate(merged_config, MONITOR_CONFIG)
except jsonschema.ValidationError as e:
raise serializers.ValidationError({"config": f"Invalid config: {e.message}"})

params["config"] = merged_config

if "status" in params:
# Attempt to assign a monitor seat
if params["status"] == ObjectStatus.ACTIVE and instance.status != ObjectStatus.ACTIVE:
outcome = quotas.backend.assign_monitor_seat(instance)
# The MonitorValidator checks if a seat assignment is available.
# This protects against a race condition
if outcome != Outcome.ACCEPTED:
raise serializers.ValidationError(
{"status": "Failed to enable monitor due to quota limits"}
)

# Attempt to unassign the monitor seat
if (
params["status"] == ObjectStatus.DISABLED
and instance.status != ObjectStatus.DISABLED
):
quotas.backend.disable_monitor_seat(instance)

if params:
instance.update(**params)
create_audit_entry(
request=self.context["request"],
organization_id=instance.organization_id,
target_object=instance.id,
event=audit_log.get_event_id("MONITOR_EDIT"),
data=instance.get_audit_log_data(),
)

# Update monitor slug in billing
if "slug" in params:
quotas.backend.update_monitor_slug(existing_slug, params["slug"], instance.project_id)

if "config" in validated_data:
new_config = validated_data["config"]
checkin_margin = new_config.get("checkin_margin")
if checkin_margin != existing_margin:
MonitorEnvironment.objects.filter(monitor_id=instance.id).update(
next_checkin_latest=F("next_checkin") + get_checkin_margin(checkin_margin)
)

max_runtime = new_config.get("max_runtime")
if max_runtime != existing_max_runtime:
MonitorCheckIn.objects.filter(
monitor_id=instance.id, status=CheckInStatus.IN_PROGRESS
).update(timeout_at=TruncMinute(F("date_added")) + get_max_runtime(max_runtime))
Comment on lines +449 to +461
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potential bug: The timeout update logic in MonitorValidator uses a partial config, causing custom timeouts to be reset to default values during unrelated partial updates.
  • Description: When a monitor's configuration is partially updated (e.g., only changing the schedule), the timeout update logic incorrectly uses the partial validated_data["config"]. This dictionary lacks keys like checkin_margin or max_runtime if they were not part of the update. The code then compares None (from new_config.get("checkin_margin")) with the existing value, which evaluates to true and triggers an update. This incorrectly resets the monitor's checkin_margin and max_runtime to their default values, causing users to lose their custom timeout settings and potentially leading to incorrect monitor alerting.

  • Suggested fix: The timeout update logic should check if checkin_margin and max_runtime were present in the incoming validated_data["config"] before comparing and updating their values. This ensures that only explicitly changed values are updated and prevents accidental resets.
    severity: 0.7, confidence: 0.95

Did we get this right? 👍 / 👎 to inform future reviews.


Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Serializer Logic Mismatch Causes Unnecessary Updates

In MonitorSerializer.update, the logic for checkin_margin and max_runtime updates compares against validated_data["config"]. This partial data can incorrectly trigger updates to MonitorEnvironment and MonitorCheckIn objects, even when these values haven't changed, potentially passing None to update functions.

Fix in Cursor Fix in Web

# Update alert rule after in case slug or name changed
if "alert_rule" in validated_data:
alert_rule_data = validated_data["alert_rule"]
request = self.context.get("request")
if not request:
return instance

project = Project.objects.get(id=instance.project_id)

# Check to see if rule exists
issue_alert_rule = instance.get_issue_alert_rule()
# If rule exists, update as necessary
if issue_alert_rule:
issue_alert_rule_id = update_issue_alert_rule(
request, project, instance, issue_alert_rule, alert_rule_data
)
# If rule does not exist, create
else:
# Type assertion for mypy - create_issue_alert_rule expects AuthenticatedHttpRequest
# but in tests we might have a regular Request object
issue_alert_rule_id = create_issue_alert_rule(
request, project, instance, alert_rule_data
)

if issue_alert_rule_id:
# If config is not sent, use existing config to update alert_rule_id
instance.config["alert_rule_id"] = issue_alert_rule_id
instance.update(config=instance.config)

return instance


class TraceContextValidator(serializers.Serializer):
trace_id = serializers.UUIDField(format="hex")
Expand Down
Loading
Loading