diff --git a/src/sentry/tasks/seer/night_shift/agentic_triage.py b/src/sentry/tasks/seer/night_shift/agentic_triage.py index 9ff7fe507a84..4973c4aaf878 100644 --- a/src/sentry/tasks/seer/night_shift/agentic_triage.py +++ b/src/sentry/tasks/seer/night_shift/agentic_triage.py @@ -181,15 +181,14 @@ def _triage_candidates( for v in triage_response.verdicts: if v.group_id not in fixability_by_group_id: continue + fixability = fixability_by_group_id[v.group_id] + attributes: dict[str, str] = {"action": v.action} + if fixability is not None: + attributes["threshold_action"] = TriageAction.from_fixability_score(fixability) sentry_sdk.metrics.count( "night_shift.triage_action", 1, - attributes={ - "action": v.action, - "threshold_action": TriageAction.from_fixability_score( - fixability_by_group_id[v.group_id] - ), - }, + attributes=attributes, ) for v in triage_response.verdicts: @@ -274,7 +273,7 @@ def _build_triage_prompt( candidates_block = "\n".join( f"- group_id={c.group.id} | title={c.group.title or 'Unknown error'!r} " f"| culprit={c.group.culprit or 'unknown'!r} " - f"| fixability={c.fixability:.2f} | times_seen={c.times_seen} " + f"| fixability={f'{c.fixability:.2f}' if c.fixability is not None else 'not scored'} | times_seen={c.times_seen} " f"| first_seen={c.group.first_seen.isoformat()} " f"| priority={priority_label(c.group.priority) or 'unknown'}" for c in candidates @@ -332,8 +331,10 @@ def _build_triage_prompt( config not provisioned, data corruption) The "fixability" score in the candidate data is a prior estimate of how likely - the issue is to be fixable (0.0 = not fixable, 1.0 = very fixable). Use it as - a signal but verify with your own investigation. + the issue is to be fixable (0.0 = not fixable, 1.0 = very fixable). Issues + marked "not scored" have not been evaluated yet — treat them neutrally rather + than assuming they are unfixable. Use the score as a signal but verify with + your own investigation. For each verdict, fill the `reason` field. For `autofix` and `root_cause_only` verdicts, the `reason` is handed off as context to the downstream autofix agent diff --git a/src/sentry/tasks/seer/night_shift/simple_triage.py b/src/sentry/tasks/seer/night_shift/simple_triage.py index d746e64af2e8..6a1830524e3d 100644 --- a/src/sentry/tasks/seer/night_shift/simple_triage.py +++ b/src/sentry/tasks/seer/night_shift/simple_triage.py @@ -10,6 +10,7 @@ from sentry.api.event_search import SearchFilter, SearchKey, SearchValue from sentry.models.group import GroupStatus from sentry.models.project import Project +from sentry.seer.autofix.constants import FixabilityScoreThresholds from sentry.seer.autofix.utils import is_issue_category_eligible from sentry.snuba.referrer import Referrer from sentry.tasks.seer.night_shift.models import TriageAction, TriageResult @@ -19,41 +20,27 @@ logger = logging.getLogger("sentry.tasks.seer.night_shift") NIGHT_SHIFT_ISSUE_FETCH_LIMIT = 100 - -# Weights for candidate scoring. Set to 0 to disable a signal. -WEIGHT_FIXABILITY = 1.0 -WEIGHT_SEVERITY = 0.0 -WEIGHT_TIMES_SEEN = 0.0 +FIXABILITY_SCORE_THRESHOLD = FixabilityScoreThresholds.MEDIUM.value @dataclass class ScoredCandidate(TriageResult): """A candidate issue with raw signals for ranking.""" - fixability: float = 0.0 + fixability: float | None = None times_seen: int = 0 - severity: float = 0.0 action: TriageAction = TriageAction.AUTOFIX - @property - def score(self) -> float: - return ( - WEIGHT_FIXABILITY * self.fixability - + WEIGHT_SEVERITY * self.severity - + WEIGHT_TIMES_SEEN * min(self.times_seen / 1000.0, 1.0) - ) - - def __lt__(self, other: ScoredCandidate) -> bool: - return self.score < other.score - def fixability_score_strategy( projects: Sequence[Project], max_candidates: int, ) -> list[ScoredCandidate]: """ - Fetch top recommended unresolved issues that haven't been triaged by Seer yet, - then re-rank by fixability score. Doesn't require any additional LLM calls. + Fetch top recommended unresolved issues that haven't been triaged by Seer yet. + Issues with a fixability score above the threshold are taken first (sorted by + fixability), then backfilled with unscored issues in their original recommended + sort order. """ result = search.backend.query( projects=projects, @@ -78,27 +65,31 @@ def fixability_score_strategy( }, ) - candidates: list[ScoredCandidate] = [] + scored: list[ScoredCandidate] = [] + unscored: list[ScoredCandidate] = [] for group in result.results: if group.id in skipped_ids: continue if not is_issue_category_eligible(group): continue - candidates.append( - ScoredCandidate( - group=group, - fixability=group.seer_fixability_score or 0.0, - times_seen=group.times_seen, - severity=(group.priority or 0) / PriorityLevel.HIGH, - ) + candidate = ScoredCandidate( + group=group, + fixability=group.seer_fixability_score, + times_seen=group.times_seen, ) - candidates.sort(reverse=True) - selected = candidates[:max_candidates] + if candidate.fixability is None: + unscored.append(candidate) + elif candidate.fixability >= FIXABILITY_SCORE_THRESHOLD: + scored.append(candidate) + + scored.sort(key=lambda c: c.fixability or 0.0, reverse=True) + selected = (scored + unscored)[:max_candidates] for c in selected: - sentry_sdk.metrics.distribution("night_shift.fixability_score", c.fixability) + if c.fixability is not None: + sentry_sdk.metrics.distribution("night_shift.fixability_score", c.fixability) return selected diff --git a/tests/sentry/tasks/seer/test_night_shift.py b/tests/sentry/tasks/seer/test_night_shift.py index f3b8a24ddb69..14e57800534a 100644 --- a/tests/sentry/tasks/seer/test_night_shift.py +++ b/tests/sentry/tasks/seer/test_night_shift.py @@ -337,7 +337,7 @@ def test_selects_candidates_and_skips_triggered(self) -> None: project, "high-fix", seer_fixability_score=0.9, times_seen=5 ) low_fix = self._store_event_and_update_group( - project, "low-fix", seer_fixability_score=0.2, times_seen=100 + project, "low-fix", seer_fixability_score=0.5, times_seen=100 ) # Already triggered — should be excluded from triage. self._store_event_and_update_group( @@ -781,26 +781,32 @@ def _store_event_and_update_group(self, project, fingerprint, **group_attrs): Group.objects.filter(id=event.group_id).update(**group_attrs) return Group.objects.get(id=event.group_id) - def test_ranks_and_captures_signals(self) -> None: + def test_ranks_scored_above_threshold_first_then_preserves_recommended_order(self) -> None: project = self.create_project() high = self._store_event_and_update_group( project, "high", seer_fixability_score=0.9, times_seen=5, priority=75 ) - low = self._store_event_and_update_group( + medium = self._store_event_and_update_group( + project, "medium", seer_fixability_score=0.5, times_seen=50 + ) + self._store_event_and_update_group( project, "low", seer_fixability_score=0.2, times_seen=500 ) - for i in range(3): - self._store_event_and_update_group( - project, f"null-{i}", seer_fixability_score=None, times_seen=100 - ) + null = self._store_event_and_update_group( + project, "null", seer_fixability_score=None, times_seen=100 + ) result = fixability_score_strategy([project], max_candidates=10) + result_ids = [c.group.id for c in result] + assert result[0].group.id == high.id assert result[0].fixability == 0.9 assert result[0].times_seen == 5 - assert result[0].severity == 1.0 - assert result[1].group.id == low.id + assert medium.id in result_ids + assert null.id in result_ids + # Low-scored issue (below threshold) is excluded entirely + assert len(result) == 3 class TestTriageActionFromFixabilityScore: