Skip to content

Commit b97b6a0

Browse files
authored
feat(deletions): Schedule task to delete pending deletions groups (#103820)
Sometimes, groups do not get deleted and end up in limbo. This task will handle groups that are pending deletion before 90 days retention and older than a day.
1 parent faf77a3 commit b97b6a0

File tree

3 files changed

+257
-0
lines changed

3 files changed

+257
-0
lines changed

src/sentry/conf/server.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -893,6 +893,7 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str:
893893
"sentry.tasks.collect_project_platforms",
894894
"sentry.tasks.commit_context",
895895
"sentry.tasks.commits",
896+
"sentry.tasks.delete_pending_groups",
896897
"sentry.tasks.delete_seer_grouping_records",
897898
"sentry.tasks.digests",
898899
"sentry.tasks.email",
@@ -1006,6 +1007,12 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str:
10061007
"task": "deletions:sentry.deletions.tasks.reattempt_deletions",
10071008
"schedule": task_crontab("0", "*/2", "*", "*", "*"),
10081009
},
1010+
"delete-pending-groups": {
1011+
"task": "deletions:sentry.tasks.delete_pending_groups",
1012+
# Runs every 2 hours during 9am-5pm Eastern Time (EST: UTC-5)
1013+
# 9am, 11am, 1pm, 3pm, 5pm EST = 14:00, 16:00, 18:00, 20:00, 22:00 UTC
1014+
"schedule": task_crontab("0", "14,16,18,20,22", "*", "*", "*"),
1015+
},
10091016
"schedule-weekly-organization-reports-new": {
10101017
"task": "reports:sentry.tasks.summaries.weekly_reports.schedule_organizations",
10111018
"schedule": task_crontab("0", "12", "sat", "*", "*"),
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import logging
2+
from collections import defaultdict
3+
from datetime import timedelta
4+
from uuid import uuid4
5+
6+
from django.utils import timezone
7+
8+
from sentry.deletions.defaults.group import GROUP_CHUNK_SIZE
9+
from sentry.deletions.tasks.groups import delete_groups_for_project
10+
from sentry.models.group import Group, GroupStatus
11+
from sentry.silo.base import SiloMode
12+
from sentry.tasks.base import instrumented_task
13+
from sentry.taskworker.namespaces import deletion_tasks
14+
from sentry.taskworker.retry import Retry
15+
from sentry.utils import metrics
16+
17+
logger = logging.getLogger(__name__)
18+
19+
BATCH_LIMIT = 1000
20+
MAX_LAST_SEEN_DAYS = 90
21+
MIN_LAST_SEEN_DAYS = 1
22+
23+
24+
@instrumented_task(
25+
name="sentry.tasks.delete_pending_groups",
26+
namespace=deletion_tasks,
27+
processing_deadline_duration=10 * 60,
28+
retry=Retry(times=3, delay=60),
29+
silo_mode=SiloMode.REGION,
30+
)
31+
def delete_pending_groups() -> None:
32+
"""
33+
Scheduled task that runs daily to clean up groups in pending deletion states.
34+
35+
This task queries groups with status PENDING_DELETION or DELETION_IN_PROGRESS
36+
and schedules deletion tasks for them. Groups are batched by project to ensure
37+
efficient deletion processing.
38+
39+
Only processes groups with last_seen between 24 hours and 90 days ago to avoid
40+
processing very recent groups (safety window) or very old stuck groups.
41+
"""
42+
statuses_to_delete = [GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS]
43+
44+
# XXX: If needed add a partial index with the status and last_seen fields
45+
# This can timeout for lack of an index on the status field
46+
# Not using the last_seen index to avoid the lack of composite index on status and last_seen
47+
groups = Group.objects.filter(status__in=statuses_to_delete).values_list(
48+
"id", "project_id", "last_seen"
49+
)[:BATCH_LIMIT]
50+
51+
if not groups:
52+
logger.info("delete_pending_groups.no_groups_found")
53+
return
54+
55+
# Round to midnight to make the task idempotent throughout the day
56+
now = timezone.now().replace(hour=0, minute=0, second=0, microsecond=0)
57+
min_last_seen = now - timedelta(days=MAX_LAST_SEEN_DAYS)
58+
max_last_seen = now - timedelta(days=MIN_LAST_SEEN_DAYS)
59+
# Group by project_id to ensure all groups in a batch belong to the same project
60+
groups_by_project: dict[int, list[int]] = defaultdict(list)
61+
for group_id, project_id, last_seen in groups:
62+
if last_seen >= min_last_seen and last_seen <= max_last_seen:
63+
groups_by_project[project_id].append(group_id)
64+
65+
total_groups = sum(len(group_ids) for group_ids in groups_by_project.values())
66+
total_tasks = 0
67+
68+
logger.info(
69+
"delete_pending_groups.started",
70+
extra={"total_groups": total_groups, "projects_count": len(groups_by_project)},
71+
)
72+
73+
for project_id, group_ids in groups_by_project.items():
74+
# Schedule deletion tasks in chunks of GROUP_CHUNK_SIZE
75+
for i in range(0, len(group_ids), GROUP_CHUNK_SIZE):
76+
chunk = group_ids[i : i + GROUP_CHUNK_SIZE]
77+
transaction_id = str(uuid4())
78+
79+
delete_groups_for_project.apply_async(
80+
kwargs={
81+
"project_id": project_id,
82+
"object_ids": chunk,
83+
"transaction_id": transaction_id,
84+
}
85+
)
86+
total_tasks += 1
87+
88+
metrics.incr("delete_pending_groups.groups_scheduled", amount=total_groups, sample_rate=1.0)
89+
metrics.incr("delete_pending_groups.tasks_scheduled", amount=total_tasks, sample_rate=1.0)
90+
91+
logger.info("delete_pending_groups.completed")
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
from __future__ import annotations
2+
3+
from datetime import datetime, timedelta
4+
from unittest.mock import MagicMock, patch
5+
6+
from django.utils import timezone
7+
8+
from sentry.models.group import Group, GroupStatus
9+
from sentry.tasks.delete_pending_groups import (
10+
MAX_LAST_SEEN_DAYS,
11+
MIN_LAST_SEEN_DAYS,
12+
delete_pending_groups,
13+
)
14+
from sentry.testutils.cases import TestCase
15+
from sentry.types.group import GroupSubStatus
16+
17+
18+
class DeletePendingGroupsTest(TestCase):
19+
def _count_groups_in_deletion_status(self) -> int:
20+
"""Count groups with deletion statuses in the valid date range."""
21+
return Group.objects.filter(
22+
status__in=[GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS],
23+
last_seen__gte=self._days_ago(MAX_LAST_SEEN_DAYS),
24+
last_seen__lte=self._days_ago(MIN_LAST_SEEN_DAYS),
25+
).count()
26+
27+
def _days_ago(self, days: int) -> datetime:
28+
return timezone.now() - timedelta(days=days)
29+
30+
def test_schedules_only_groups_within_valid_date_range(self) -> None:
31+
"""Test that only groups with last_seen between 24h-90d are scheduled for deletion."""
32+
project = self.create_project()
33+
34+
# Too recent - within 24 hours (should NOT be scheduled)
35+
too_recent = self.create_group(
36+
project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(0)
37+
)
38+
39+
# Valid range - should be scheduled
40+
valid_group = self.create_group(
41+
project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2)
42+
)
43+
44+
# Too old - over 90 days (should NOT be scheduled)
45+
too_old = self.create_group(
46+
project=project, status=GroupStatus.DELETION_IN_PROGRESS, last_seen=self._days_ago(91)
47+
)
48+
49+
# Wrong status - should NOT be scheduled
50+
wrong_status = self.create_group(
51+
project=project,
52+
status=GroupStatus.UNRESOLVED,
53+
substatus=GroupSubStatus.NEW,
54+
last_seen=self._days_ago(5),
55+
)
56+
57+
with patch(
58+
"sentry.tasks.delete_pending_groups.delete_groups_for_project.apply_async"
59+
) as mock_delete_task:
60+
delete_pending_groups()
61+
62+
# Verify only the valid group was scheduled
63+
mock_delete_task.assert_called_once()
64+
call_kwargs = mock_delete_task.call_args.kwargs["kwargs"]
65+
assert call_kwargs["object_ids"] == [valid_group.id]
66+
assert call_kwargs["project_id"] == project.id
67+
68+
assert self._count_groups_in_deletion_status() != 0
69+
with self.tasks():
70+
delete_pending_groups()
71+
72+
assert self._count_groups_in_deletion_status() == 0
73+
assert list(Group.objects.all().values_list("id", flat=True).order_by("id")) == [
74+
too_recent.id,
75+
too_old.id,
76+
wrong_status.id,
77+
]
78+
79+
@patch("sentry.tasks.delete_pending_groups.delete_groups_for_project.apply_async")
80+
def test_groups_by_project(self, mock_delete_task: MagicMock) -> None:
81+
"""Test that groups are properly grouped by project when scheduling deletion."""
82+
project1 = self.create_project()
83+
project2 = self.create_project()
84+
85+
group1 = self.create_group(
86+
project=project1, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2)
87+
)
88+
group2 = self.create_group(
89+
project=project1, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2)
90+
)
91+
group3 = self.create_group(
92+
project=project2, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2)
93+
)
94+
95+
delete_pending_groups()
96+
97+
assert mock_delete_task.call_count == 2
98+
99+
# Verify both projects got their deletion tasks scheduled
100+
all_calls = mock_delete_task.call_args_list
101+
project_ids = {call.kwargs["kwargs"]["project_id"] for call in all_calls}
102+
assert project_ids == {project1.id, project2.id}
103+
104+
# Verify correct groups are in each call
105+
for call in all_calls:
106+
call_kwargs = call.kwargs["kwargs"]
107+
if call_kwargs["project_id"] == project1.id:
108+
assert set(call_kwargs["object_ids"]) == {group1.id, group2.id}
109+
elif call_kwargs["project_id"] == project2.id:
110+
assert set(call_kwargs["object_ids"]) == {group3.id}
111+
112+
@patch("sentry.tasks.delete_pending_groups.GROUP_CHUNK_SIZE", new=10)
113+
@patch("sentry.tasks.delete_pending_groups.delete_groups_for_project.apply_async")
114+
@patch("sentry.tasks.delete_pending_groups.metrics.incr")
115+
def test_chunks_large_batches(
116+
self,
117+
mock_metrics_incr: MagicMock,
118+
mock_delete_task: MagicMock,
119+
) -> None:
120+
"""Test that groups are chunked according to GROUP_CHUNK_SIZE when scheduling deletion."""
121+
GROUP_CHUNK_SIZE = 10
122+
GROUPS_MORE_THAN_CHUNK_SIZE = 5
123+
project = self.create_project()
124+
125+
# Create more groups than GROUP_CHUNK_SIZE (10 in this test)
126+
num_groups = GROUPS_MORE_THAN_CHUNK_SIZE + GROUP_CHUNK_SIZE
127+
for _ in range(num_groups):
128+
self.create_group(
129+
project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2)
130+
)
131+
132+
delete_pending_groups()
133+
134+
# Should be called twice: one chunk of 10 and one of 5
135+
assert mock_delete_task.call_count == 2
136+
137+
# Verify first chunk has GROUP_CHUNK_SIZE groups
138+
first_call_kwargs = mock_delete_task.call_args_list[0].kwargs["kwargs"]
139+
assert len(first_call_kwargs["object_ids"]) == GROUP_CHUNK_SIZE
140+
141+
# Verify second chunk has remaining groups
142+
second_call_kwargs = mock_delete_task.call_args_list[1].kwargs["kwargs"]
143+
assert len(second_call_kwargs["object_ids"]) == GROUPS_MORE_THAN_CHUNK_SIZE
144+
145+
# Assert metrics are called with correct totals
146+
incr_calls = mock_metrics_incr.call_args_list
147+
incr_names = [c.args[0] for c in incr_calls]
148+
assert "delete_pending_groups.groups_scheduled" in incr_names
149+
assert "delete_pending_groups.tasks_scheduled" in incr_names
150+
151+
groups_scheduled_call = next(
152+
c for c in incr_calls if c.args[0] == "delete_pending_groups.groups_scheduled"
153+
)
154+
assert groups_scheduled_call.kwargs["amount"] == num_groups
155+
156+
tasks_scheduled_call = next(
157+
c for c in incr_calls if c.args[0] == "delete_pending_groups.tasks_scheduled"
158+
)
159+
assert tasks_scheduled_call.kwargs["amount"] == 2

0 commit comments

Comments
 (0)