Skip to content

Commit 9724eea

Browse files
committed
Add time-based commit filtering for suspect commit detection
1 parent 8b01cbc commit 9724eea

File tree

3 files changed

+149
-62
lines changed

3 files changed

+149
-62
lines changed

src/sentry/integrations/utils/commit_context.py

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import logging
44
from collections.abc import Mapping, Sequence
55
from dataclasses import asdict
6-
from datetime import datetime, timedelta, timezone
6+
from datetime import datetime, timedelta
77
from typing import Any
88

99
from django.utils.datastructures import OrderedSet
@@ -36,13 +36,17 @@
3636
logger = logging.getLogger("sentry.tasks.process_commit_context")
3737

3838

39+
MAX_COMMIT_AGE_DAYS = 60
40+
41+
3942
def find_commit_context_for_event_all_frames(
4043
code_mappings: Sequence[RepositoryProjectPathConfig],
4144
frames: Sequence[Mapping[str, Any]],
4245
organization_id: int,
4346
project_id: int,
4447
platform: str,
4548
sdk_name: str | None,
49+
group_first_seen: datetime,
4650
extra: dict[str, Any],
4751
) -> tuple[FileBlameInfo | None, IntegrationInstallation | None]:
4852
"""
@@ -78,14 +82,23 @@ def find_commit_context_for_event_all_frames(
7882
extra=extra,
7983
)
8084

81-
most_recent_blame = max(file_blames, key=lambda blame: blame.commit.committedDate, default=None)
82-
# Only return suspect commits that are less than 2 months old
83-
selected_blame = (
84-
most_recent_blame
85-
if most_recent_blame
86-
and is_date_less_than_two_months(most_recent_blame.commit.committedDate)
87-
else None
88-
)
85+
# Sort blames by commit date (most recent first)
86+
sorted_blames = sorted(file_blames, key=lambda blame: blame.commit.committedDate, reverse=True)
87+
88+
selected_blame = None
89+
for blame in sorted_blames:
90+
commit_date = blame.commit.committedDate
91+
92+
if is_too_young(group_first_seen, commit_date):
93+
# Skip commits that happened after the issue was first seen
94+
continue
95+
elif is_too_old(group_first_seen, commit_date):
96+
# Stop checking - all remaining commits will be even older
97+
break
98+
else:
99+
# Found a valid commit within the time window
100+
selected_blame = blame
101+
break
89102

90103
selected_install, selected_provider = (
91104
integration_to_install_mapping[selected_blame.code_mapping.organization_integration_id]
@@ -95,7 +108,7 @@ def find_commit_context_for_event_all_frames(
95108

96109
_record_commit_context_all_frames_analytics(
97110
selected_blame=selected_blame,
98-
most_recent_blame=most_recent_blame,
111+
most_recent_blame=sorted_blames[0] if sorted_blames else None,
99112
organization_id=organization_id,
100113
project_id=project_id,
101114
extra=extra,
@@ -110,8 +123,23 @@ def find_commit_context_for_event_all_frames(
110123
return (selected_blame, selected_install)
111124

112125

113-
def is_date_less_than_two_months(date: datetime) -> bool:
114-
return date > datetime.now(tz=timezone.utc) - timedelta(days=60)
126+
def is_too_young(group_first_seen: datetime, commit_date: datetime) -> bool:
127+
"""
128+
Returns True if the commit happened after the issue was first seen.
129+
These commits cannot have caused the issue.
130+
"""
131+
return commit_date > group_first_seen
132+
133+
134+
def is_too_old(
135+
group_first_seen: datetime, commit_date: datetime, max_age_days: int = MAX_COMMIT_AGE_DAYS
136+
) -> bool:
137+
"""
138+
Returns True if the commit is too old to be considered relevant.
139+
Commits older than max_age_days before the issue was first seen are ignored.
140+
"""
141+
time_threshold = group_first_seen - timedelta(days=max_age_days)
142+
return commit_date < time_threshold
115143

116144

117145
def get_or_create_commit_from_blame(

src/sentry/tasks/commit_context.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from sentry.locks import locks
2626
from sentry.models.commit import Commit
2727
from sentry.models.commitauthor import CommitAuthor
28+
from sentry.models.group import Group
2829
from sentry.models.groupowner import GroupOwner, GroupOwnerType, SuspectCommitStrategy
2930
from sentry.models.project import Project
3031
from sentry.models.projectownership import ProjectOwnership
@@ -75,7 +76,7 @@ def process_commit_context(
7576
sdk_name: str | None = None,
7677
) -> None:
7778
"""
78-
For a given event, look at the first in_app frame, and if we can find who modified the line, we can then update who is assigned to the issue.
79+
For a given event, look at ALL in_app frames, and if we can find who modified the line, we can then update who is assigned to the issue.
7980
"""
8081
lock = locks.get(
8182
f"process-commit-context:{group_id}", duration=10, name="process_commit_context"
@@ -87,6 +88,7 @@ def process_commit_context(
8788
set_current_event_project(project_id)
8889

8990
project = Project.objects.get_from_cache(id=project_id)
91+
group = Group.objects.get_from_cache(id=group_id)
9092
set_tag("organization.slug", project.organization.slug)
9193

9294
basic_logging_details = {
@@ -150,6 +152,7 @@ def process_commit_context(
150152
project_id=project_id,
151153
platform=event_platform,
152154
sdk_name=sdk_name,
155+
group_first_seen=group.first_seen,
153156
extra=basic_logging_details,
154157
)
155158
except ApiError:

tests/sentry/tasks/test_commit_context.py

Lines changed: 105 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ def setUp(self) -> None:
132132
lineno=30,
133133
commit=CommitInfo(
134134
commitId="commit-id-old",
135-
committedDate=datetime.now(tz=datetime_timezone.utc) - timedelta(days=62),
135+
committedDate=datetime.now(tz=datetime_timezone.utc) - timedelta(days=70),
136136
commitMessage="old commit message",
137137
commitAuthorName=None,
138138
commitAuthorEmail="old@localhost",
@@ -658,66 +658,122 @@ def test_failure_no_blames(
658658
},
659659
)
660660

661-
@patch("sentry.integrations.utils.commit_context.logger.info")
662661
@patch("sentry.tasks.groupowner.process_suspect_commits.delay")
663662
@patch("sentry.analytics.record")
664663
@patch(
665664
"sentry.integrations.github.integration.GitHubIntegration.get_commit_context_all_frames",
666665
)
667-
def test_failure_old_blame(
668-
self, mock_get_commit_context, mock_record, mock_process_suspect_commits, mock_logger_info
666+
def test_time_threshold_filtering(
667+
self, mock_get_commit_context, mock_record, mock_process_suspect_commits
669668
):
670669
"""
671-
A failure case where the only blame returned is past the time threshold. We bail out and fall back
672-
to the release-based suspect commits.
670+
A hacky way to run a parameterized test in TestCase.
671+
Tests the logic that filters commits by age relative to issue first_seen.
673672
"""
674-
mock_get_commit_context.return_value = [self.blame_too_old]
675-
with self.tasks():
676-
assert not GroupOwner.objects.filter(group=self.event.group).exists()
677-
event_frames = get_frame_paths(self.event)
678-
process_commit_context(
679-
event_id=self.event.event_id,
680-
event_platform=self.event.platform,
681-
event_frames=event_frames,
682-
group_id=self.event.group_id,
683-
project_id=self.event.project_id,
684-
sdk_name="sentry.python",
685-
)
673+
# Test cases: each tuple contains (test_case_name, blames_setup, group_first_seen_days_ago, expected_group_owner_exists, expected_commit_id)
674+
test_cases = [
675+
# All commits too young
676+
("all_commits_too_young", ["blame_recent"], 3, False, None),
677+
# Early exit on too old
678+
("early_exit_too_old", ["blame_too_old", "blame_existing_commit"], 10, False, None),
679+
# All outside threshold
680+
("mixed_young_then_old", ["blame_recent", "blame_too_old"], 10, False, None),
681+
# Skip young, find valid
682+
(
683+
"skip_young_find_valid",
684+
["blame_recent", "blame_existing_commit"],
685+
5,
686+
True,
687+
"existing-commit",
688+
),
689+
# All valid, picks most recent
690+
(
691+
"all_valid_picks_most_recent",
692+
["blame_existing_commit", "blame_no_existing_commit"],
693+
5,
694+
True,
695+
"existing-commit",
696+
),
697+
# All commits too old
698+
("only_old_commits", ["blame_too_old"], 10, False, None),
699+
# Empty blames
700+
("empty_blames", [], 10, False, None),
701+
]
686702

687-
assert not GroupOwner.objects.filter(group=self.event.group).exists()
688-
mock_process_suspect_commits.assert_called_once_with(
689-
event_id=self.event.event_id,
690-
event_platform=self.event.platform,
691-
event_frames=event_frames,
692-
group_id=self.event.group_id,
693-
project_id=self.event.project_id,
694-
sdk_name="sentry.python",
703+
existing_commit = self.create_commit(
704+
project=self.project,
705+
repo=self.repo,
706+
author=self.commit_author,
707+
key="existing-commit",
695708
)
709+
existing_commit.update(message="")
710+
711+
# Map blame names to actual blame objects
712+
blame_mapping = {
713+
"blame_recent": self.blame_recent,
714+
"blame_too_old": self.blame_too_old,
715+
"blame_existing_commit": self.blame_existing_commit,
716+
"blame_no_existing_commit": self.blame_no_existing_commit,
717+
}
696718

697-
assert_any_analytics_event(
698-
mock_record,
699-
IntegrationsFailedToFetchCommitContextAllFrames(
700-
organization_id=self.organization.id,
701-
project_id=self.project.id,
702-
group_id=self.event.group_id,
703-
event_id=self.event.event_id,
704-
num_frames=1,
705-
num_successfully_mapped_frames=1,
706-
reason="commit_too_old",
707-
),
708-
)
719+
for (
720+
case_name,
721+
blames_setup,
722+
group_first_seen_days_ago,
723+
expected_group_owner_exists,
724+
expected_commit_id,
725+
) in test_cases:
726+
with self.subTest(case=case_name):
727+
# Reset mocks for each test case
728+
mock_get_commit_context.reset_mock()
729+
mock_record.reset_mock()
730+
731+
# Clean up any existing GroupOwners from previous test cases
732+
GroupOwner.objects.filter(group=self.event.group).delete()
733+
734+
# Setup group first_seen
735+
group_first_seen = datetime.now(tz=datetime_timezone.utc) - timedelta(
736+
days=group_first_seen_days_ago
737+
)
738+
self.event.group.first_seen = group_first_seen
739+
self.event.group.save()
740+
741+
# Build the mock return value
742+
mock_blames = [blame_mapping[blame_name] for blame_name in blames_setup]
743+
mock_get_commit_context.return_value = mock_blames
744+
745+
with self.tasks():
746+
assert not GroupOwner.objects.filter(group=self.event.group).exists()
747+
event_frames = get_frame_paths(self.event)
748+
749+
with self.options({"issues.suspect-commit-strategy": True}):
750+
process_commit_context(
751+
event_id=self.event.event_id,
752+
event_platform=self.event.platform,
753+
event_frames=event_frames,
754+
group_id=self.event.group_id,
755+
project_id=self.event.project_id,
756+
sdk_name="sentry.python",
757+
)
758+
759+
# Assert GroupOwner existence
760+
group_owners = GroupOwner.objects.filter(group=self.event.group)
761+
actual_exists = group_owners.exists()
762+
self.assertEqual(
763+
actual_exists,
764+
expected_group_owner_exists,
765+
f"GroupOwner existence assertion failed for case: {case_name}",
766+
)
709767

710-
mock_logger_info.assert_any_call(
711-
"process_commit_context_all_frames.find_commit_context_failed",
712-
extra={
713-
"organization": self.organization.id,
714-
"group": self.event.group_id,
715-
"event": self.event.event_id,
716-
"project_id": self.project.id,
717-
"reason": "commit_too_old",
718-
"num_frames": 1,
719-
},
720-
)
768+
# Assert correct commit selected
769+
if expected_group_owner_exists and expected_commit_id:
770+
created_commit = Commit.objects.get(key=expected_commit_id)
771+
group_owner = group_owners.first()
772+
self.assertEqual(
773+
group_owner.context["commitId"],
774+
created_commit.id,
775+
f"Wrong commit selected for case: {case_name}",
776+
)
721777

722778
@patch("sentry.tasks.groupowner.process_suspect_commits.delay")
723779
@patch(

0 commit comments

Comments
 (0)