Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 40 additions & 20 deletions src/sentry/integrations/utils/commit_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
from collections.abc import Mapping, Sequence
from dataclasses import asdict
from datetime import datetime, timedelta, timezone
from datetime import datetime, timedelta
from typing import Any

from django.utils.datastructures import OrderedSet
Expand Down Expand Up @@ -36,13 +36,17 @@
logger = logging.getLogger("sentry.tasks.process_commit_context")


MAX_COMMIT_AGE_DAYS = 60


def find_commit_context_for_event_all_frames(
code_mappings: Sequence[RepositoryProjectPathConfig],
frames: Sequence[Mapping[str, Any]],
organization_id: int,
project_id: int,
platform: str,
sdk_name: str | None,
group_first_seen: datetime,
extra: dict[str, Any],
) -> tuple[FileBlameInfo | None, IntegrationInstallation | None]:
"""
Expand Down Expand Up @@ -79,14 +83,23 @@ def find_commit_context_for_event_all_frames(
extra=extra,
)

most_recent_blame = max(file_blames, key=lambda blame: blame.commit.committedDate, default=None)
# Only return suspect commits that are less than 2 months old
selected_blame = (
most_recent_blame
if most_recent_blame
and is_date_less_than_two_months(most_recent_blame.commit.committedDate)
else None
)
# We want the most recent commit that is within MAX_COMMIT_AGE_DAYS of group_first_seen
earliest_valid_date = group_first_seen - timedelta(days=MAX_COMMIT_AGE_DAYS)
selected_blame = None
selected_date = earliest_valid_date
has_too_old_blames = False
has_too_recent_blames = False
for blame in file_blames:
commit_date = blame.commit.committedDate
# Track outside-of-window commits for analytics
if commit_date < earliest_valid_date:
has_too_old_blames = True
elif commit_date > group_first_seen:
has_too_recent_blames = True
# Select best valid commit
if selected_date < commit_date < group_first_seen:
selected_blame = blame
selected_date = commit_date
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Commit Boundary Logic Excludes Relevant Commits

The commit selection logic in find_commit_context_for_event_all_frames uses strict inequalities, which means commits exactly 60 days old or at the group_first_seen timestamp are incorrectly excluded. This might cause us to miss relevant suspect commits at these boundaries.

Fix in Cursor Fix in Web


selected_install, selected_provider = (
integration_to_install_mapping[selected_blame.code_mapping.organization_integration_id]
Expand All @@ -96,7 +109,6 @@ def find_commit_context_for_event_all_frames(

_record_commit_context_all_frames_analytics(
selected_blame=selected_blame,
most_recent_blame=most_recent_blame,
organization_id=organization_id,
project_id=project_id,
extra=extra,
Expand All @@ -106,15 +118,13 @@ def find_commit_context_for_event_all_frames(
selected_provider=selected_provider,
platform=platform,
sdk_name=sdk_name,
has_too_old_blames=has_too_old_blames,
has_too_recent_blames=has_too_recent_blames,
)

return (selected_blame, selected_install)


def is_date_less_than_two_months(date: datetime) -> bool:
return date > datetime.now(tz=timezone.utc) - timedelta(days=60)


def get_or_create_commit_from_blame(
blame: FileBlameInfo, organization_id: int, extra: Mapping[str, str | int]
) -> Commit:
Expand Down Expand Up @@ -338,7 +348,6 @@ def _get_blames_from_all_integrations(

def _record_commit_context_all_frames_analytics(
selected_blame: FileBlameInfo | None,
most_recent_blame: FileBlameInfo | None,
organization_id: int,
project_id: int,
extra: Mapping[str, Any],
Expand All @@ -348,11 +357,14 @@ def _record_commit_context_all_frames_analytics(
selected_provider: str | None,
platform: str,
sdk_name: str | None,
has_too_old_blames: bool,
has_too_recent_blames: bool,
) -> None:
if not selected_blame:
reason = _get_failure_reason(
num_successfully_mapped_frames=num_successfully_mapped_frames,
has_old_blames=most_recent_blame is not None and not selected_blame,
has_too_old_blames=has_too_old_blames,
has_too_recent_blames=has_too_recent_blames,
)
metrics.incr(
"tasks.process_commit_context_all_frames.aborted",
Expand Down Expand Up @@ -415,9 +427,17 @@ def _record_commit_context_all_frames_analytics(
)


def _get_failure_reason(num_successfully_mapped_frames: int, has_old_blames: bool) -> str:
def _get_failure_reason(
num_successfully_mapped_frames: int,
has_too_old_blames: bool,
has_too_recent_blames: bool,
) -> str:
if num_successfully_mapped_frames < 1:
return "no_successful_code_mapping"
if has_old_blames:
return "commit_too_old"
return "no_commit_found"
if has_too_old_blames and has_too_recent_blames:
return "no_valid_commits"
if has_too_recent_blames:
return "commits_too_recent"
if has_too_old_blames:
return "commits_too_old"
return "no_commits_found"
3 changes: 3 additions & 0 deletions src/sentry/tasks/commit_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from sentry.locks import locks
from sentry.models.commit import Commit
from sentry.models.commitauthor import CommitAuthor
from sentry.models.group import Group
from sentry.models.groupowner import GroupOwner, GroupOwnerType, SuspectCommitStrategy
from sentry.models.project import Project
from sentry.models.projectownership import ProjectOwnership
Expand Down Expand Up @@ -92,6 +93,7 @@ def process_commit_context(
set_current_event_project(project_id)

project = Project.objects.get_from_cache(id=project_id)
group = Group.objects.get_from_cache(id=group_id)
set_tag("organization.slug", project.organization.slug)
basic_logging_details = {
"event": event_id,
Expand Down Expand Up @@ -135,6 +137,7 @@ def process_commit_context(
project_id=project_id,
platform=event_platform,
sdk_name=sdk_name,
group_first_seen=group.first_seen,
extra=basic_logging_details,
)
except ApiError:
Expand Down
149 changes: 105 additions & 44 deletions tests/sentry/tasks/test_commit_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def setUp(self) -> None:
lineno=30,
commit=CommitInfo(
commitId="commit-id-old",
committedDate=datetime.now(tz=datetime_timezone.utc) - timedelta(days=62),
committedDate=datetime.now(tz=datetime_timezone.utc) - timedelta(days=70),
commitMessage="old commit message",
commitAuthorName=None,
commitAuthorEmail="old@localhost",
Expand Down Expand Up @@ -626,7 +626,7 @@ def test_failure_no_blames(
event_id=self.event.event_id,
num_frames=1,
num_successfully_mapped_frames=1,
reason="no_commit_found",
reason="no_commits_found",
),
)

Expand All @@ -637,63 +637,124 @@ def test_failure_no_blames(
"group": self.event.group_id,
"event": self.event.event_id,
"project_id": self.project.id,
"reason": "no_commit_found",
"reason": "no_commits_found",
"num_frames": 1,
},
)

@patch("sentry.integrations.utils.commit_context.logger.info")
@patch("sentry.tasks.groupowner.process_suspect_commits.delay")
@patch("sentry.analytics.record")
@patch(
"sentry.integrations.github.integration.GitHubIntegration.get_commit_context_all_frames",
)
def test_failure_old_blame(
self, mock_get_commit_context, mock_record, mock_process_suspect_commits, mock_logger_info
def test_time_threshold_filtering(
self, mock_get_commit_context, mock_record, mock_process_suspect_commits
):
"""
A failure case where the only blame returned is past the time threshold. We bail out.
A hacky way to run a parameterized test in TestCase.
Tests the logic that filters commits by age relative to issue first_seen.
"""
mock_get_commit_context.return_value = [self.blame_too_old]
with self.tasks():
assert not GroupOwner.objects.filter(group=self.event.group).exists()
event_frames = get_frame_paths(self.event)
process_commit_context(
event_id=self.event.event_id,
event_platform=self.event.platform,
event_frames=event_frames,
group_id=self.event.group_id,
project_id=self.event.project_id,
sdk_name="sentry.python",
)

assert not GroupOwner.objects.filter(group=self.event.group).exists()
mock_process_suspect_commits.assert_not_called()
# Test cases: each tuple contains (test_case_name, blames_setup, group_first_seen_days_ago, expected_group_owner_exists, expected_commit_id)
test_cases = [
("all_commits_too_young", ["blame_recent"], 3, False, None), # All commits too young
(
"all_outside_range",
["blame_recent", "blame_too_old"],
10,
False,
None,
), # All outside range
(
"skip_young_find_valid",
["blame_recent", "blame_existing_commit"],
5,
True,
"existing-commit",
), # Skip young, find valid
(
"all_valid_picks_most_recent",
["blame_existing_commit", "blame_no_existing_commit"],
5,
True,
"existing-commit",
), # All valid, picks most recent
("only_old_commits", ["blame_too_old"], 10, False, None), # All commits too old
("empty_blames", [], 10, False, None), # No blames
]

assert_any_analytics_event(
mock_record,
IntegrationsFailedToFetchCommitContextAllFrames(
organization_id=self.organization.id,
project_id=self.project.id,
group_id=self.event.group_id,
event_id=self.event.event_id,
num_frames=1,
num_successfully_mapped_frames=1,
reason="commit_too_old",
),
existing_commit = self.create_commit(
project=self.project,
repo=self.repo,
author=self.commit_author,
key="existing-commit",
)
existing_commit.update(message="")

# Map blame names to actual blame objects
blame_mapping = {
"blame_recent": self.blame_recent,
"blame_too_old": self.blame_too_old,
"blame_existing_commit": self.blame_existing_commit,
"blame_no_existing_commit": self.blame_no_existing_commit,
}

mock_logger_info.assert_any_call(
"process_commit_context_all_frames.find_commit_context_failed",
extra={
"organization": self.organization.id,
"group": self.event.group_id,
"event": self.event.event_id,
"project_id": self.project.id,
"reason": "commit_too_old",
"num_frames": 1,
},
)
for (
case_name,
blames_setup,
group_first_seen_days_ago,
expected_group_owner_exists,
expected_commit_id,
) in test_cases:
with self.subTest(case=case_name):
# Reset mocks for each test case
mock_get_commit_context.reset_mock()
mock_record.reset_mock()

# Clean up any existing GroupOwners from previous test cases
GroupOwner.objects.filter(group=self.event.group).delete()

# Setup group first_seen
group_first_seen = datetime.now(tz=datetime_timezone.utc) - timedelta(
days=group_first_seen_days_ago
)
self.event.group.first_seen = group_first_seen
self.event.group.save()

# Build the mock return value
mock_blames = [blame_mapping[blame_name] for blame_name in blames_setup]
mock_get_commit_context.return_value = mock_blames

with self.tasks():
assert not GroupOwner.objects.filter(group=self.event.group).exists()
event_frames = get_frame_paths(self.event)

with self.options({"issues.suspect-commit-strategy": True}):
process_commit_context(
event_id=self.event.event_id,
event_platform=self.event.platform,
event_frames=event_frames,
group_id=self.event.group_id,
project_id=self.event.project_id,
sdk_name="sentry.python",
)

# Assert GroupOwner existence
actual_exists = GroupOwner.objects.filter(group=self.event.group).exists()
self.assertEqual(
actual_exists,
expected_group_owner_exists,
f"GroupOwner existence assertion failed for case: {case_name}",
)

# Assert correct commit selected
if expected_group_owner_exists and expected_commit_id:
created_commit = Commit.objects.get(key=expected_commit_id)
group_owner = GroupOwner.objects.get(group=self.event.group)
self.assertEqual(
group_owner.context["commitId"],
created_commit.id,
f"Wrong commit selected for case: {case_name}",
)

@patch("sentry.tasks.groupowner.process_suspect_commits.delay")
@patch(
Expand Down
Loading