From af4ce7927e7eebb4339be74255dc5003e9b8e59b Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Tue, 12 May 2026 18:08:21 -0400 Subject: [PATCH 1/5] fix(seer): Preserve recommended sort order for unscored issues in night shift MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, all candidates were re-ranked by a weighted fixability score, which collapsed unscored issues (null fixability) to 0.0 and disrupted the recommended sort order from Snuba. Now candidates are split into three buckets: 1. Scored above threshold (>= MEDIUM) — sorted by fixability, taken first 2. Unscored (null) — preserves original recommended sort order as backfill 3. Scored below threshold — dropped entirely (already evaluated, not actionable) Co-Authored-By: Claude --- .../tasks/seer/night_shift/simple_triage.py | 48 ++++++++----------- tests/sentry/tasks/seer/test_night_shift.py | 21 +++++--- 2 files changed, 34 insertions(+), 35 deletions(-) diff --git a/src/sentry/tasks/seer/night_shift/simple_triage.py b/src/sentry/tasks/seer/night_shift/simple_triage.py index d746e64af2e8f9..ce23a013b72ca4 100644 --- a/src/sentry/tasks/seer/night_shift/simple_triage.py +++ b/src/sentry/tasks/seer/night_shift/simple_triage.py @@ -10,6 +10,7 @@ from sentry.api.event_search import SearchFilter, SearchKey, SearchValue from sentry.models.group import GroupStatus from sentry.models.project import Project +from sentry.seer.autofix.constants import FixabilityScoreThresholds from sentry.seer.autofix.utils import is_issue_category_eligible from sentry.snuba.referrer import Referrer from sentry.tasks.seer.night_shift.models import TriageAction, TriageResult @@ -19,11 +20,7 @@ logger = logging.getLogger("sentry.tasks.seer.night_shift") NIGHT_SHIFT_ISSUE_FETCH_LIMIT = 100 - -# Weights for candidate scoring. Set to 0 to disable a signal. -WEIGHT_FIXABILITY = 1.0 -WEIGHT_SEVERITY = 0.0 -WEIGHT_TIMES_SEEN = 0.0 +FIXABILITY_SCORE_THRESHOLD = FixabilityScoreThresholds.MEDIUM.value @dataclass @@ -35,25 +32,16 @@ class ScoredCandidate(TriageResult): severity: float = 0.0 action: TriageAction = TriageAction.AUTOFIX - @property - def score(self) -> float: - return ( - WEIGHT_FIXABILITY * self.fixability - + WEIGHT_SEVERITY * self.severity - + WEIGHT_TIMES_SEEN * min(self.times_seen / 1000.0, 1.0) - ) - - def __lt__(self, other: ScoredCandidate) -> bool: - return self.score < other.score - def fixability_score_strategy( projects: Sequence[Project], max_candidates: int, ) -> list[ScoredCandidate]: """ - Fetch top recommended unresolved issues that haven't been triaged by Seer yet, - then re-rank by fixability score. Doesn't require any additional LLM calls. + Fetch top recommended unresolved issues that haven't been triaged by Seer yet. + Issues with a fixability score above the threshold are taken first (sorted by + fixability), then backfilled with unscored issues in their original recommended + sort order. """ result = search.backend.query( projects=projects, @@ -78,24 +66,28 @@ def fixability_score_strategy( }, ) - candidates: list[ScoredCandidate] = [] + scored: list[ScoredCandidate] = [] + unscored: list[ScoredCandidate] = [] for group in result.results: if group.id in skipped_ids: continue if not is_issue_category_eligible(group): continue - candidates.append( - ScoredCandidate( - group=group, - fixability=group.seer_fixability_score or 0.0, - times_seen=group.times_seen, - severity=(group.priority or 0) / PriorityLevel.HIGH, - ) + candidate = ScoredCandidate( + group=group, + fixability=group.seer_fixability_score or 0.0, + times_seen=group.times_seen, + severity=(group.priority or 0) / PriorityLevel.HIGH, ) - candidates.sort(reverse=True) - selected = candidates[:max_candidates] + if group.seer_fixability_score is None: + unscored.append(candidate) + elif candidate.fixability >= FIXABILITY_SCORE_THRESHOLD: + scored.append(candidate) + + scored.sort(key=lambda c: c.fixability, reverse=True) + selected = (scored + unscored)[:max_candidates] for c in selected: sentry_sdk.metrics.distribution("night_shift.fixability_score", c.fixability) diff --git a/tests/sentry/tasks/seer/test_night_shift.py b/tests/sentry/tasks/seer/test_night_shift.py index f3b8a24ddb69a8..3756ffc83cb3df 100644 --- a/tests/sentry/tasks/seer/test_night_shift.py +++ b/tests/sentry/tasks/seer/test_night_shift.py @@ -781,26 +781,33 @@ def _store_event_and_update_group(self, project, fingerprint, **group_attrs): Group.objects.filter(id=event.group_id).update(**group_attrs) return Group.objects.get(id=event.group_id) - def test_ranks_and_captures_signals(self) -> None: + def test_ranks_scored_above_threshold_first_then_preserves_recommended_order(self) -> None: project = self.create_project() high = self._store_event_and_update_group( project, "high", seer_fixability_score=0.9, times_seen=5, priority=75 ) - low = self._store_event_and_update_group( + medium = self._store_event_and_update_group( + project, "medium", seer_fixability_score=0.5, times_seen=50 + ) + self._store_event_and_update_group( project, "low", seer_fixability_score=0.2, times_seen=500 ) - for i in range(3): - self._store_event_and_update_group( - project, f"null-{i}", seer_fixability_score=None, times_seen=100 - ) + null = self._store_event_and_update_group( + project, "null", seer_fixability_score=None, times_seen=100 + ) result = fixability_score_strategy([project], max_candidates=10) + result_ids = [c.group.id for c in result] + assert result[0].group.id == high.id assert result[0].fixability == 0.9 assert result[0].times_seen == 5 assert result[0].severity == 1.0 - assert result[1].group.id == low.id + assert medium.id in result_ids + assert null.id in result_ids + # Low-scored issue (below threshold) is excluded entirely + assert len(result) == 3 class TestTriageActionFromFixabilityScore: From 056aadf8bcfef209f8d950f7695442110e7bc2fa Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Tue, 12 May 2026 18:15:55 -0400 Subject: [PATCH 2/5] fix(tests): Bump low_fix score above threshold in night shift test The test_selects_candidates_and_skips_triggered test used a fixability score of 0.2, which now falls below the MEDIUM threshold and gets excluded. Bump to 0.5 so the test still exercises two-candidate selection. Co-Authored-By: Claude --- tests/sentry/tasks/seer/test_night_shift.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sentry/tasks/seer/test_night_shift.py b/tests/sentry/tasks/seer/test_night_shift.py index 3756ffc83cb3df..5ee14ae2f6bbfc 100644 --- a/tests/sentry/tasks/seer/test_night_shift.py +++ b/tests/sentry/tasks/seer/test_night_shift.py @@ -337,7 +337,7 @@ def test_selects_candidates_and_skips_triggered(self) -> None: project, "high-fix", seer_fixability_score=0.9, times_seen=5 ) low_fix = self._store_event_and_update_group( - project, "low-fix", seer_fixability_score=0.2, times_seen=100 + project, "low-fix", seer_fixability_score=0.5, times_seen=100 ) # Already triggered — should be excluded from triage. self._store_event_and_update_group( From 715ad2ca3868bf7fe789d39456342a1921f44d55 Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Tue, 12 May 2026 18:27:57 -0400 Subject: [PATCH 3/5] ref(seer): Remove dead severity field from ScoredCandidate The severity field was only consumed by the score property removed in the prior commit. times_seen is kept since it's still used in triage logging. Co-Authored-By: Claude --- src/sentry/tasks/seer/night_shift/simple_triage.py | 2 -- tests/sentry/tasks/seer/test_night_shift.py | 1 - 2 files changed, 3 deletions(-) diff --git a/src/sentry/tasks/seer/night_shift/simple_triage.py b/src/sentry/tasks/seer/night_shift/simple_triage.py index ce23a013b72ca4..9b2d0ad3233577 100644 --- a/src/sentry/tasks/seer/night_shift/simple_triage.py +++ b/src/sentry/tasks/seer/night_shift/simple_triage.py @@ -29,7 +29,6 @@ class ScoredCandidate(TriageResult): fixability: float = 0.0 times_seen: int = 0 - severity: float = 0.0 action: TriageAction = TriageAction.AUTOFIX @@ -78,7 +77,6 @@ def fixability_score_strategy( group=group, fixability=group.seer_fixability_score or 0.0, times_seen=group.times_seen, - severity=(group.priority or 0) / PriorityLevel.HIGH, ) if group.seer_fixability_score is None: diff --git a/tests/sentry/tasks/seer/test_night_shift.py b/tests/sentry/tasks/seer/test_night_shift.py index 5ee14ae2f6bbfc..14e57800534af6 100644 --- a/tests/sentry/tasks/seer/test_night_shift.py +++ b/tests/sentry/tasks/seer/test_night_shift.py @@ -803,7 +803,6 @@ def test_ranks_scored_above_threshold_first_then_preserves_recommended_order(sel assert result[0].group.id == high.id assert result[0].fixability == 0.9 assert result[0].times_seen == 5 - assert result[0].severity == 1.0 assert medium.id in result_ids assert null.id in result_ids # Low-scored issue (below threshold) is excluded entirely From 76fee49e57f90c0df2a6e3a2f07be0420169ae5d Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Tue, 12 May 2026 18:51:06 -0400 Subject: [PATCH 4/5] fix(seer): Represent unscored fixability as None instead of 0.0 Unscored issues (null fixability) were being presented to the triage agent as fixability=0.00, which the prompt describes as "not fixable". Now fixability is None for unscored issues, rendered as "not scored" in the triage prompt, and the prompt instructs the agent to treat them neutrally. Co-Authored-By: Claude --- .../tasks/seer/night_shift/agentic_triage.py | 15 ++++++++------- .../tasks/seer/night_shift/simple_triage.py | 11 ++++++----- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/sentry/tasks/seer/night_shift/agentic_triage.py b/src/sentry/tasks/seer/night_shift/agentic_triage.py index 9ff7fe507a8419..1fdd3dc56c5fa5 100644 --- a/src/sentry/tasks/seer/night_shift/agentic_triage.py +++ b/src/sentry/tasks/seer/night_shift/agentic_triage.py @@ -179,16 +179,15 @@ def _triage_candidates( fixability_by_group_id = {c.group.id: c.fixability for c in candidates} for v in triage_response.verdicts: - if v.group_id not in fixability_by_group_id: + fixability = fixability_by_group_id.get(v.group_id) + if fixability is None: continue sentry_sdk.metrics.count( "night_shift.triage_action", 1, attributes={ "action": v.action, - "threshold_action": TriageAction.from_fixability_score( - fixability_by_group_id[v.group_id] - ), + "threshold_action": TriageAction.from_fixability_score(fixability), }, ) @@ -274,7 +273,7 @@ def _build_triage_prompt( candidates_block = "\n".join( f"- group_id={c.group.id} | title={c.group.title or 'Unknown error'!r} " f"| culprit={c.group.culprit or 'unknown'!r} " - f"| fixability={c.fixability:.2f} | times_seen={c.times_seen} " + f"| fixability={f'{c.fixability:.2f}' if c.fixability is not None else 'not scored'} | times_seen={c.times_seen} " f"| first_seen={c.group.first_seen.isoformat()} " f"| priority={priority_label(c.group.priority) or 'unknown'}" for c in candidates @@ -332,8 +331,10 @@ def _build_triage_prompt( config not provisioned, data corruption) The "fixability" score in the candidate data is a prior estimate of how likely - the issue is to be fixable (0.0 = not fixable, 1.0 = very fixable). Use it as - a signal but verify with your own investigation. + the issue is to be fixable (0.0 = not fixable, 1.0 = very fixable). Issues + marked "not scored" have not been evaluated yet — treat them neutrally rather + than assuming they are unfixable. Use the score as a signal but verify with + your own investigation. For each verdict, fill the `reason` field. For `autofix` and `root_cause_only` verdicts, the `reason` is handed off as context to the downstream autofix agent diff --git a/src/sentry/tasks/seer/night_shift/simple_triage.py b/src/sentry/tasks/seer/night_shift/simple_triage.py index 9b2d0ad3233577..6a1830524e3d7f 100644 --- a/src/sentry/tasks/seer/night_shift/simple_triage.py +++ b/src/sentry/tasks/seer/night_shift/simple_triage.py @@ -27,7 +27,7 @@ class ScoredCandidate(TriageResult): """A candidate issue with raw signals for ranking.""" - fixability: float = 0.0 + fixability: float | None = None times_seen: int = 0 action: TriageAction = TriageAction.AUTOFIX @@ -75,20 +75,21 @@ def fixability_score_strategy( candidate = ScoredCandidate( group=group, - fixability=group.seer_fixability_score or 0.0, + fixability=group.seer_fixability_score, times_seen=group.times_seen, ) - if group.seer_fixability_score is None: + if candidate.fixability is None: unscored.append(candidate) elif candidate.fixability >= FIXABILITY_SCORE_THRESHOLD: scored.append(candidate) - scored.sort(key=lambda c: c.fixability, reverse=True) + scored.sort(key=lambda c: c.fixability or 0.0, reverse=True) selected = (scored + unscored)[:max_candidates] for c in selected: - sentry_sdk.metrics.distribution("night_shift.fixability_score", c.fixability) + if c.fixability is not None: + sentry_sdk.metrics.distribution("night_shift.fixability_score", c.fixability) return selected From ba58873a50b746c2c143ad26b66df6f64de780fc Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Tue, 12 May 2026 19:34:55 -0400 Subject: [PATCH 5/5] fix(seer): Still emit triage_action metric for unscored issues The previous commit skipped the metric entirely when fixability was None. Restore the original unknown-group-id guard and only omit the threshold_action attribute when the score is missing. Co-Authored-By: Claude --- src/sentry/tasks/seer/night_shift/agentic_triage.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/sentry/tasks/seer/night_shift/agentic_triage.py b/src/sentry/tasks/seer/night_shift/agentic_triage.py index 1fdd3dc56c5fa5..4973c4aaf87882 100644 --- a/src/sentry/tasks/seer/night_shift/agentic_triage.py +++ b/src/sentry/tasks/seer/night_shift/agentic_triage.py @@ -179,16 +179,16 @@ def _triage_candidates( fixability_by_group_id = {c.group.id: c.fixability for c in candidates} for v in triage_response.verdicts: - fixability = fixability_by_group_id.get(v.group_id) - if fixability is None: + if v.group_id not in fixability_by_group_id: continue + fixability = fixability_by_group_id[v.group_id] + attributes: dict[str, str] = {"action": v.action} + if fixability is not None: + attributes["threshold_action"] = TriageAction.from_fixability_score(fixability) sentry_sdk.metrics.count( "night_shift.triage_action", 1, - attributes={ - "action": v.action, - "threshold_action": TriageAction.from_fixability_score(fixability), - }, + attributes=attributes, ) for v in triage_response.verdicts: