Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 10 additions & 9 deletions src/sentry/tasks/seer/night_shift/agentic_triage.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,15 +181,14 @@ def _triage_candidates(
for v in triage_response.verdicts:
if v.group_id not in fixability_by_group_id:
continue
fixability = fixability_by_group_id[v.group_id]
attributes: dict[str, str] = {"action": v.action}
if fixability is not None:
attributes["threshold_action"] = TriageAction.from_fixability_score(fixability)
sentry_sdk.metrics.count(
"night_shift.triage_action",
1,
attributes={
"action": v.action,
"threshold_action": TriageAction.from_fixability_score(
fixability_by_group_id[v.group_id]
),
},
attributes=attributes,
)

for v in triage_response.verdicts:
Expand Down Expand Up @@ -274,7 +273,7 @@ def _build_triage_prompt(
candidates_block = "\n".join(
f"- group_id={c.group.id} | title={c.group.title or 'Unknown error'!r} "
f"| culprit={c.group.culprit or 'unknown'!r} "
f"| fixability={c.fixability:.2f} | times_seen={c.times_seen} "
f"| fixability={f'{c.fixability:.2f}' if c.fixability is not None else 'not scored'} | times_seen={c.times_seen} "
f"| first_seen={c.group.first_seen.isoformat()} "
f"| priority={priority_label(c.group.priority) or 'unknown'}"
for c in candidates
Expand Down Expand Up @@ -332,8 +331,10 @@ def _build_triage_prompt(
config not provisioned, data corruption)

The "fixability" score in the candidate data is a prior estimate of how likely
the issue is to be fixable (0.0 = not fixable, 1.0 = very fixable). Use it as
a signal but verify with your own investigation.
the issue is to be fixable (0.0 = not fixable, 1.0 = very fixable). Issues
marked "not scored" have not been evaluated yet — treat them neutrally rather
than assuming they are unfixable. Use the score as a signal but verify with
your own investigation.

For each verdict, fill the `reason` field. For `autofix` and `root_cause_only`
verdicts, the `reason` is handed off as context to the downstream autofix agent
Expand Down
53 changes: 22 additions & 31 deletions src/sentry/tasks/seer/night_shift/simple_triage.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from sentry.api.event_search import SearchFilter, SearchKey, SearchValue
from sentry.models.group import GroupStatus
from sentry.models.project import Project
from sentry.seer.autofix.constants import FixabilityScoreThresholds
from sentry.seer.autofix.utils import is_issue_category_eligible
from sentry.snuba.referrer import Referrer
from sentry.tasks.seer.night_shift.models import TriageAction, TriageResult
Expand All @@ -19,41 +20,27 @@
logger = logging.getLogger("sentry.tasks.seer.night_shift")

NIGHT_SHIFT_ISSUE_FETCH_LIMIT = 100

# Weights for candidate scoring. Set to 0 to disable a signal.
WEIGHT_FIXABILITY = 1.0
WEIGHT_SEVERITY = 0.0
WEIGHT_TIMES_SEEN = 0.0
Comment thread
cursor[bot] marked this conversation as resolved.
FIXABILITY_SCORE_THRESHOLD = FixabilityScoreThresholds.MEDIUM.value


@dataclass
class ScoredCandidate(TriageResult):
"""A candidate issue with raw signals for ranking."""

fixability: float = 0.0
fixability: float | None = None
times_seen: int = 0
severity: float = 0.0
action: TriageAction = TriageAction.AUTOFIX

@property
def score(self) -> float:
return (
WEIGHT_FIXABILITY * self.fixability
+ WEIGHT_SEVERITY * self.severity
+ WEIGHT_TIMES_SEEN * min(self.times_seen / 1000.0, 1.0)
)

def __lt__(self, other: ScoredCandidate) -> bool:
return self.score < other.score


def fixability_score_strategy(
projects: Sequence[Project],
max_candidates: int,
) -> list[ScoredCandidate]:
"""
Fetch top recommended unresolved issues that haven't been triaged by Seer yet,
then re-rank by fixability score. Doesn't require any additional LLM calls.
Fetch top recommended unresolved issues that haven't been triaged by Seer yet.
Issues with a fixability score above the threshold are taken first (sorted by
fixability), then backfilled with unscored issues in their original recommended
sort order.
"""
result = search.backend.query(
projects=projects,
Expand All @@ -78,27 +65,31 @@ def fixability_score_strategy(
},
)

candidates: list[ScoredCandidate] = []
scored: list[ScoredCandidate] = []
unscored: list[ScoredCandidate] = []
for group in result.results:
if group.id in skipped_ids:
continue
if not is_issue_category_eligible(group):
continue

candidates.append(
ScoredCandidate(
group=group,
fixability=group.seer_fixability_score or 0.0,
times_seen=group.times_seen,
severity=(group.priority or 0) / PriorityLevel.HIGH,
)
candidate = ScoredCandidate(
group=group,
fixability=group.seer_fixability_score,
times_seen=group.times_seen,
)

candidates.sort(reverse=True)
selected = candidates[:max_candidates]
if candidate.fixability is None:
unscored.append(candidate)
elif candidate.fixability >= FIXABILITY_SCORE_THRESHOLD:
scored.append(candidate)

scored.sort(key=lambda c: c.fixability or 0.0, reverse=True)
selected = (scored + unscored)[:max_candidates]

for c in selected:
sentry_sdk.metrics.distribution("night_shift.fixability_score", c.fixability)
if c.fixability is not None:
sentry_sdk.metrics.distribution("night_shift.fixability_score", c.fixability)

return selected

Expand Down
24 changes: 15 additions & 9 deletions tests/sentry/tasks/seer/test_night_shift.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ def test_selects_candidates_and_skips_triggered(self) -> None:
project, "high-fix", seer_fixability_score=0.9, times_seen=5
)
low_fix = self._store_event_and_update_group(
project, "low-fix", seer_fixability_score=0.2, times_seen=100
project, "low-fix", seer_fixability_score=0.5, times_seen=100
)
# Already triggered — should be excluded from triage.
self._store_event_and_update_group(
Expand Down Expand Up @@ -781,26 +781,32 @@ def _store_event_and_update_group(self, project, fingerprint, **group_attrs):
Group.objects.filter(id=event.group_id).update(**group_attrs)
return Group.objects.get(id=event.group_id)

def test_ranks_and_captures_signals(self) -> None:
def test_ranks_scored_above_threshold_first_then_preserves_recommended_order(self) -> None:
project = self.create_project()
high = self._store_event_and_update_group(
project, "high", seer_fixability_score=0.9, times_seen=5, priority=75
)
low = self._store_event_and_update_group(
medium = self._store_event_and_update_group(
project, "medium", seer_fixability_score=0.5, times_seen=50
)
self._store_event_and_update_group(
project, "low", seer_fixability_score=0.2, times_seen=500
)
for i in range(3):
self._store_event_and_update_group(
project, f"null-{i}", seer_fixability_score=None, times_seen=100
)
null = self._store_event_and_update_group(
project, "null", seer_fixability_score=None, times_seen=100
)

result = fixability_score_strategy([project], max_candidates=10)

result_ids = [c.group.id for c in result]

assert result[0].group.id == high.id
assert result[0].fixability == 0.9
assert result[0].times_seen == 5
assert result[0].severity == 1.0
assert result[1].group.id == low.id
assert medium.id in result_ids
assert null.id in result_ids
# Low-scored issue (below threshold) is excluded entirely
assert len(result) == 3


class TestTriageActionFromFixabilityScore:
Expand Down
Loading