From cca5102f15b1e3a9eb5b5c092770d98882493daf Mon Sep 17 00:00:00 2001
From: unraid <local@unraid.local>
Date: Mon, 18 May 2026 21:51:54 +0800
Subject: [PATCH 1/6] =?UTF-8?q?fix(autofix-pr):=20=E4=BF=AE=E5=A4=8D=20tas?=
 =?UTF-8?q?kId=20=E4=B8=8D=E4=B8=80=E8=87=B4=E5=AF=BC=E8=87=B4=20monitor?=
 =?UTF-8?q?=20lock=20dangling?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

问题:createAutofixTeammate 生成 teammate UUID 作为 monitor lock 的 key,
但 registerRemoteAgentTask 内部生成的 framework taskId 是另一个 UUID。
CCR session 自然完成时框架调 clearActiveMonitor(frameworkTaskId)
guard 失败,lock 永不释放,导致后续 /autofix-pr 报 "already monitoring"。

修复(Phase 1 of remote-agent completion loop):
- monitorState 新增 updateActiveMonitor(partial) 原子更新
- callAutofixPr 在 register 后 swap lock 的 taskId 到 framework 分配的 id
- RemoteAgentTask 引入 registerCompletionHook 注册式 API(参考已有的
  registerCompletionChecker 模式),在 5 个完成路径调 runCompletionHook
- autofix-pr 命令模块自己注册 cleanup hook,避免 framework 反向依赖
  command 模块

测试:
- monitorState 新增 4 个测试(updateActiveMonitor 行为 + bug 复现/修复)
- launchAutofixPr 新增 3 个端到端回归测试(taskId swap + hook 触发 +
  subsequent launch 不报 already monitoring)

完整分析与 Phase 2/3 改造方案见
docs/features/remote-agent-completion-analysis.md。
---
 .../__tests__/launchAutofixPr.test.ts         | 63 ++++++++++++++++++-
 .../autofix-pr/__tests__/monitorState.test.ts | 38 +++++++++++
 src/commands/autofix-pr/launchAutofixPr.ts    | 22 ++++++-
 src/commands/autofix-pr/monitorState.ts       | 14 +++++
 src/tasks/RemoteAgentTask/RemoteAgentTask.tsx | 37 +++++++++++
 5 files changed, 172 insertions(+), 2 deletions(-)

diff --git a/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts b/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts
index c6df04ff9..7fbd34a54 100644
--- a/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts
+++ b/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts
@@ -46,7 +46,7 @@ mock.module('src/utils/teleport.js', () => ({
 }))
 
 const registerMock = mock(() => ({
-  taskId: 'task-abc',
+  taskId: 'framework-task-id',
   sessionId: 'session-123',
   cleanup: () => {},
 }))
@@ -56,10 +56,14 @@ const checkEligibilityMock = mock(() =>
 const getSessionUrlMock = mock(
   (id: string) => `https://claude.ai/session/${id}`,
 )
+const registerCompletionHookMock = mock<
+  (taskType: string, hook: (taskId: string, metadata?: unknown) => void) => void
+>(() => {})
 
 mock.module('src/tasks/RemoteAgentTask/RemoteAgentTask.js', () => ({
   checkRemoteAgentEligibility: checkEligibilityMock,
   registerRemoteAgentTask: registerMock,
+  registerCompletionHook: registerCompletionHookMock,
   getRemoteTaskSessionUrl: getSessionUrlMock,
   formatPreconditionError: (e: { type: string }) => e.type,
 }))
@@ -375,6 +379,63 @@ describe('callAutofixPr', () => {
   })
 })
 
+// Regression suite for the taskId-mismatch latent bug + completion hook wiring.
+// Before this fix, createAutofixTeammate generated a teammate UUID, that UUID
+// was used to acquire the singleton monitor lock, and registerRemoteAgentTask
+// generated a *different* framework taskId. When the framework eventually
+// called clearActiveMonitor(frameworkTaskId) on natural completion, the guard
+// failed (active.taskId !== frameworkTaskId) and the lock stayed acquired,
+// blocking any subsequent /autofix-pr invocations in the same process.
+describe('callAutofixPr · completion hook wiring (taskId mismatch regression)', () => {
+  test('updateActiveMonitor swaps lock taskId to framework-assigned id after register', async () => {
+    await callAutofixPr(onDone, makeContext(), '42')
+    const monitor = getActiveMonitor() as { taskId: string } | null
+    expect(monitor).not.toBeNull()
+    // registerMock returns 'framework-task-id'; before the fix this would be
+    // a teammate-generated random UUID instead.
+    expect(monitor?.taskId).toBe('framework-task-id')
+  })
+
+  test('framework hook → clearActiveMonitor releases lock on natural completion', async () => {
+    await callAutofixPr(onDone, makeContext(), '42')
+    expect(getActiveMonitor()).not.toBeNull()
+
+    // Find the hook the module registered at import time. We grab the last
+    // call so re-imports across tests don't break this — only the most recent
+    // registration is what the framework would invoke now.
+    const calls = registerCompletionHookMock.mock.calls
+    expect(calls.length).toBeGreaterThan(0)
+    const lastCall = calls[calls.length - 1]
+    expect(lastCall?.[0]).toBe('autofix-pr')
+    const hook = lastCall?.[1] as (id: string, metadata?: unknown) => void
+    expect(typeof hook).toBe('function')
+
+    // Simulate the framework invoking the hook with the framework taskId
+    // after a terminal transition. Before the fix this would no-op against
+    // a lock keyed by the teammate UUID.
+    hook('framework-task-id', { owner: 'acme', repo: 'myrepo', prNumber: 42 })
+    expect(getActiveMonitor()).toBeNull()
+  })
+
+  test('subsequent /autofix-pr succeeds after framework hook clears the lock', async () => {
+    await callAutofixPr(onDone, makeContext(), '42')
+    // Simulate natural completion via the registered hook
+    const calls = registerCompletionHookMock.mock.calls
+    const hook = calls[calls.length - 1]?.[1] as (
+      id: string,
+      metadata?: unknown,
+    ) => void
+    hook('framework-task-id', { owner: 'acme', repo: 'myrepo', prNumber: 42 })
+
+    onDone.mockClear()
+    await callAutofixPr(onDone, makeContext(), '99')
+    const firstArg = onDone.mock.calls[0]?.[0] as string
+    // Should be the success path, not "already monitoring"
+    expect(firstArg).not.toMatch(/already monitoring/i)
+    expect(firstArg).toMatch(/Autofix launched/)
+  })
+})
+
 // Cover ../index.ts load() — placed in this test file so all the heavy mocks
 // (teleport / detectRepository / RemoteAgentTask / bootstrap-state / analytics /
 // skillDetect) are already registered when load() dynamically imports
diff --git a/src/commands/autofix-pr/__tests__/monitorState.test.ts b/src/commands/autofix-pr/__tests__/monitorState.test.ts
index 43ce2f091..f97fcd972 100644
--- a/src/commands/autofix-pr/__tests__/monitorState.test.ts
+++ b/src/commands/autofix-pr/__tests__/monitorState.test.ts
@@ -5,6 +5,7 @@ import {
   isMonitoring,
   setActiveMonitor,
   trySetActiveMonitor,
+  updateActiveMonitor,
 } from '../monitorState.js'
 
 function makeState(
@@ -76,4 +77,41 @@ describe('monitorState', () => {
     // First state remains
     expect(getActiveMonitor()?.prNumber).toBe(1)
   })
+
+  test('updateActiveMonitor returns false when no active monitor', () => {
+    expect(updateActiveMonitor({ taskId: 'task-x' })).toBe(false)
+    expect(getActiveMonitor()).toBeNull()
+  })
+
+  test('updateActiveMonitor merges partial fields into the active monitor', () => {
+    setActiveMonitor(makeState({ taskId: 'tentative-uuid' }))
+    expect(updateActiveMonitor({ taskId: 'framework-task-id' })).toBe(true)
+    const after = getActiveMonitor()
+    expect(after?.taskId).toBe('framework-task-id')
+    // Other fields untouched
+    expect(after?.owner).toBe('acme')
+    expect(after?.repo).toBe('myrepo')
+    expect(after?.prNumber).toBe(42)
+  })
+
+  test('updateActiveMonitor with new taskId makes clearActiveMonitor recognise framework taskId', () => {
+    // Reproduce the latent bug scenario: lock acquired with one taskId,
+    // framework assigns a different one. Before the fix, the framework's
+    // clearActiveMonitor(frameworkTaskId) would no-op because guard fails.
+    setActiveMonitor(makeState({ taskId: 'teammate-uuid' }))
+    // Framework cleanup using its own taskId — would fail guard before the fix
+    clearActiveMonitor('framework-uuid')
+    expect(getActiveMonitor()).not.toBeNull()
+    // After updateActiveMonitor swaps the taskId, framework cleanup works
+    updateActiveMonitor({ taskId: 'framework-uuid' })
+    clearActiveMonitor('framework-uuid')
+    expect(getActiveMonitor()).toBeNull()
+  })
+
+  test('updateActiveMonitor does not change abortController identity', () => {
+    const ac = new AbortController()
+    setActiveMonitor(makeState({ abortController: ac, taskId: 'tentative' }))
+    updateActiveMonitor({ taskId: 'updated' })
+    expect(getActiveMonitor()?.abortController).toBe(ac)
+  })
 })
diff --git a/src/commands/autofix-pr/launchAutofixPr.ts b/src/commands/autofix-pr/launchAutofixPr.ts
index cb4eb87f8..892448d75 100644
--- a/src/commands/autofix-pr/launchAutofixPr.ts
+++ b/src/commands/autofix-pr/launchAutofixPr.ts
@@ -13,6 +13,7 @@ import {
   checkRemoteAgentEligibility,
   formatPreconditionError,
   getRemoteTaskSessionUrl,
+  registerCompletionHook,
   registerRemoteAgentTask,
   type BackgroundRemoteSessionPrecondition,
 } from '../../tasks/RemoteAgentTask/RemoteAgentTask.js'
@@ -26,10 +27,21 @@ import {
   getActiveMonitor,
   isMonitoring,
   trySetActiveMonitor,
+  updateActiveMonitor,
 } from './monitorState.js'
 import { parseAutofixArgs } from './parseArgs.js'
 import { detectAutofixSkills, formatSkillsHint } from './skillDetect.js'
 
+// Release the singleton monitor lock when the framework transitions the
+// autofix task to a terminal state. Without this, the lock — keyed by the
+// framework-assigned taskId (after callAutofixPr's updateActiveMonitor swap)
+// — would dangle past natural completion, blocking subsequent /autofix-pr
+// invocations until the process restarts. Registered at module load; the
+// framework's runCompletionHook invokes it once per terminal transition.
+registerCompletionHook('autofix-pr', taskId => {
+  clearActiveMonitor(taskId)
+})
+
 function makeErrorText(message: string, code: string): string {
   logEvent('tengu_autofix_pr_result', {
     result:
@@ -277,8 +289,15 @@ export const callAutofixPr: LocalJSXCommandCall = async (
     // 4.9 register task. If this throws, release the lock so the user can
     // retry — the remote CCR session is already created so we surface a
     // dedicated error code.
+    //
+    // After registration succeeds, swap the lock's taskId from the tentative
+    // teammate UUID (used to acquire the lock atomically before teleport) to
+    // the framework-assigned taskId. Without this swap, the framework's own
+    // cleanup path (clearActiveMonitor(frameworkTaskId) on natural completion)
+    // would no-op against a lock keyed by teammate.taskId, leaving the
+    // singleton lock dangling and blocking future /autofix-pr invocations.
     try {
-      registerRemoteAgentTask({
+      const { taskId: frameworkTaskId } = registerRemoteAgentTask({
         remoteTaskType: 'autofix-pr',
         session,
         command: `/autofix-pr ${prNumber}`,
@@ -286,6 +305,7 @@ export const callAutofixPr: LocalJSXCommandCall = async (
         isLongRunning: true,
         remoteTaskMetadata: { owner, repo, prNumber },
       })
+      updateActiveMonitor({ taskId: frameworkTaskId })
     } catch (regErr: unknown) {
       clearActiveMonitor(teammate.taskId)
       const regMsg = regErr instanceof Error ? regErr.message : String(regErr)
diff --git a/src/commands/autofix-pr/monitorState.ts b/src/commands/autofix-pr/monitorState.ts
index df74292f1..273f0a90c 100644
--- a/src/commands/autofix-pr/monitorState.ts
+++ b/src/commands/autofix-pr/monitorState.ts
@@ -46,6 +46,20 @@ export function clearActiveMonitor(taskId?: string): void {
   active = null
 }
 
+/**
+ * Atomically merges partial updates into the active monitor. Returns true if
+ * applied, false if no active monitor. Used when the caller needs to swap the
+ * lock's taskId after the framework assigns a different one than the
+ * tentative one used to acquire the lock — without this the framework's
+ * cleanup (clearActiveMonitor with the framework taskId) would no-op against
+ * a lock keyed by the caller's tentative id.
+ */
+export function updateActiveMonitor(partial: Partial<MonitorState>): boolean {
+  if (!active) return false
+  active = { ...active, ...partial }
+  return true
+}
+
 export function isMonitoring(
   owner: string,
   repo: string,
diff --git a/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx b/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx
index 641c1c461..881046322 100644
--- a/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx
+++ b/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx
@@ -114,6 +114,38 @@ export function registerCompletionChecker(remoteTaskType: RemoteTaskType, checke
   completionCheckers.set(remoteTaskType, checker);
 }
 
+/**
+ * Called after the task transitions to a terminal state and the notification
+ * has been enqueued. Used by command modules to release singleton locks,
+ * clear cached state, or perform other cleanup the framework cannot see.
+ * Hooks must be synchronous and best-effort — errors are logged but never
+ * propagate.
+ */
+export type RemoteTaskCompletionHook = (taskId: string, remoteTaskMetadata: RemoteTaskMetadata | undefined) => void;
+
+const completionHooks = new Map<RemoteTaskType, RemoteTaskCompletionHook>();
+
+/**
+ * Register a completion hook for a remote task type. Invoked once after the
+ * task reaches a terminal state in any of the framework's completion branches
+ * (archived session, completionChecker, stableIdle, result). Use this to
+ * release command-module state (e.g. singleton locks) without forcing the
+ * framework to reverse-import from the command package.
+ */
+export function registerCompletionHook(remoteTaskType: RemoteTaskType, hook: RemoteTaskCompletionHook): void {
+  completionHooks.set(remoteTaskType, hook);
+}
+
+function runCompletionHook(taskId: string, task: RemoteAgentTaskState): void {
+  const hook = completionHooks.get(task.remoteTaskType);
+  if (!hook) return;
+  try {
+    hook(taskId, task.remoteTaskMetadata);
+  } catch (e) {
+    logError(e);
+  }
+}
+
 /**
  * Persist a remote-agent metadata entry to the session sidecar.
  * Fire-and-forget — persistence failures must not block task registration.
@@ -681,6 +713,7 @@ function startRemoteSessionPolling(taskId: string, context: TaskContext): () =>
         enqueueRemoteNotification(taskId, task.title, 'completed', context.setAppState, task.toolUseId);
         void evictTaskOutput(taskId);
         void removeRemoteAgentMetadata(taskId);
+        runCompletionHook(taskId, task);
         return;
       }
 
@@ -694,6 +727,7 @@ function startRemoteSessionPolling(taskId: string, context: TaskContext): () =>
           enqueueRemoteNotification(taskId, completionResult, 'completed', context.setAppState, task.toolUseId);
           void evictTaskOutput(taskId);
           void removeRemoteAgentMetadata(taskId);
+          runCompletionHook(taskId, task);
           return;
         }
       }
@@ -853,6 +887,7 @@ function startRemoteSessionPolling(taskId: string, context: TaskContext): () =>
             enqueueRemoteReviewNotification(taskId, reviewContent, context.setAppState);
             void evictTaskOutput(taskId);
             void removeRemoteAgentMetadata(taskId);
+            runCompletionHook(taskId, task);
             return; // Stop polling
           }
 
@@ -870,12 +905,14 @@ function startRemoteSessionPolling(taskId: string, context: TaskContext): () =>
           enqueueRemoteReviewFailureNotification(taskId, reason, context.setAppState);
           void evictTaskOutput(taskId);
           void removeRemoteAgentMetadata(taskId);
+          runCompletionHook(taskId, task);
           return; // Stop polling
         }
 
         enqueueRemoteNotification(taskId, task.title, finalStatus, context.setAppState, task.toolUseId);
         void evictTaskOutput(taskId);
         void removeRemoteAgentMetadata(taskId);
+        runCompletionHook(taskId, task);
         return; // Stop polling
       }
     } catch (error) {

From 31433ac958f7e659d1c1567326b424c8c651b29d Mon Sep 17 00:00:00 2001
From: unraid <local@unraid.local>
Date: Mon, 18 May 2026 22:00:32 +0800
Subject: [PATCH 2/6] =?UTF-8?q?feat(autofix-pr):=20=E6=B3=A8=E5=86=8C=20co?=
 =?UTF-8?q?mpletionChecker=20=E7=94=A8=20gh=20CLI=20=E6=8E=A2=E6=B5=8B=20P?=
 =?UTF-8?q?R=20=E5=AE=8C=E6=88=90?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 2 of remote-agent completion loop。Phase 1 修了 monitor lock
dangling,但完成信号仍然只能等 CCR session 自然 archive(timing 不可
预测,且不知道 PR 究竟有没有被修好)。Phase 2 加上主动完成探测。

实现:
- 新增 prOutcomeCheck.ts(纯决策矩阵):summariseAutofixOutcome 给定
  PR 快照 + 基线 SHA 返回 completed/summary。8 个决策分支单元测试。
- 新增 prFetch.ts(spawn 层):runGhPrView 调 gh CLI,fetchPrHeadSha
  在 launch 时捕获基线 SHA,checkPrAutofixOutcome 组合两者。
- AutofixPrRemoteTaskMetadata 加 initialHeadSha?: string 字段,survive
  --resume。
- launchAutofixPr.ts 模块顶部 registerCompletionChecker('autofix-pr',
  ...),5s throttle 防 gh CLI 调用爆。callAutofixPr 启动时调
  fetchPrHeadSha 传入 metadata。

决策矩阵:
  MERGED                  → done(merged)
  CLOSED 未 merge          → done(closed without fix)
  OPEN 无 baseline        → 继续轮询
  OPEN head 未变           → 继续轮询(agent 还没 push)
  OPEN head 变 + CI pending → 继续轮询
  OPEN head 变 + CI failure → done(surface red,user 决定 retry)
  OPEN head 变 + CI success → done(clean fix)

设计:
- gh CLI 而非 Octokit:复用用户已有 auth,不引入 token 管理
- 决策与 spawn 分文件:prOutcomeCheck 纯函数易测,prFetch 单独 mock
  避免 Bun mock.module 进程级污染(已在 launchAutofixPr.test 注释说明)
- 5s throttle:framework 每 1s 轮询,gh CLI subprocess 太重不能跟上
- 失败兜底:fetchPrHeadSha/checkPrAutofixOutcome 失败均不抛,returns
  null/false,framework 继续走原路径

测试:
- prOutcomeCheck 9 个单测覆盖决策矩阵
- launchAutofixPr 5 个新测试:checker 注册 / fetchPrHeadSha 调用 /
  initialHeadSha 传 metadata / SHA 失败仍能 launch / SHA null 处理

完整方案见 docs/features/remote-agent-completion-analysis.md。
---
 .../__tests__/launchAutofixPr.test.ts         |  89 ++++++++++
 .../__tests__/prOutcomeCheck.test.ts          | 158 ++++++++++++++++++
 src/commands/autofix-pr/launchAutofixPr.ts    |  52 +++++-
 src/commands/autofix-pr/prFetch.ts            | 155 +++++++++++++++++
 src/commands/autofix-pr/prOutcomeCheck.ts     | 123 ++++++++++++++
 src/tasks/RemoteAgentTask/RemoteAgentTask.tsx |   8 +
 6 files changed, 583 insertions(+), 2 deletions(-)
 create mode 100644 src/commands/autofix-pr/__tests__/prOutcomeCheck.test.ts
 create mode 100644 src/commands/autofix-pr/prFetch.ts
 create mode 100644 src/commands/autofix-pr/prOutcomeCheck.ts

diff --git a/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts b/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts
index 7fbd34a54..c4fcc3468 100644
--- a/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts
+++ b/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts
@@ -59,15 +59,34 @@ const getSessionUrlMock = mock(
 const registerCompletionHookMock = mock<
   (taskType: string, hook: (taskId: string, metadata?: unknown) => void) => void
 >(() => {})
+const registerCompletionCheckerMock = mock<
+  (
+    taskType: string,
+    checker: (metadata?: unknown) => Promise<string | null>,
+  ) => void
+>(() => {})
 
 mock.module('src/tasks/RemoteAgentTask/RemoteAgentTask.js', () => ({
   checkRemoteAgentEligibility: checkEligibilityMock,
   registerRemoteAgentTask: registerMock,
   registerCompletionHook: registerCompletionHookMock,
+  registerCompletionChecker: registerCompletionCheckerMock,
   getRemoteTaskSessionUrl: getSessionUrlMock,
   formatPreconditionError: (e: { type: string }) => e.type,
 }))
 
+const fetchPrHeadShaMock = mock<
+  (owner: string, repo: string, prNumber: number) => Promise<string | null>
+>(() => Promise.resolve('sha-baseline-abc123'))
+
+// Mock prFetch.ts (gh CLI spawn layer) — keeping the pure decision matrix
+// in prOutcomeCheck.ts unmocked so its tests are unaffected by this file's
+// process-global mock.module pollution.
+mock.module('src/commands/autofix-pr/prFetch.js', () => ({
+  fetchPrHeadSha: fetchPrHeadShaMock,
+  checkPrAutofixOutcome: mock(() => Promise.resolve({ completed: false })),
+}))
+
 const detectRepoMock = mock(() =>
   Promise.resolve({ host: 'github.com', owner: 'acme', name: 'myrepo' }),
 )
@@ -436,6 +455,76 @@ describe('callAutofixPr · completion hook wiring (taskId mismatch regression)',
   })
 })
 
+// Phase 2: completionChecker wiring + initialHeadSha capture
+describe('callAutofixPr · Phase 2 completionChecker integration', () => {
+  test('completionChecker is registered at module load with autofix-pr type', () => {
+    // The registration happens during the beforeAll dynamic import; just
+    // verify the mock recorded a call. Filter by task type so any future
+    // additional registrations elsewhere don't break this assertion.
+    const calls = registerCompletionCheckerMock.mock.calls.filter(
+      c => c[0] === 'autofix-pr',
+    )
+    expect(calls.length).toBeGreaterThan(0)
+    const hook = calls[calls.length - 1]?.[1]
+    expect(typeof hook).toBe('function')
+  })
+
+  test('callAutofixPr captures initialHeadSha via fetchPrHeadSha', async () => {
+    fetchPrHeadShaMock.mockClear()
+    await callAutofixPr(onDone, makeContext(), '42')
+    expect(fetchPrHeadShaMock).toHaveBeenCalledWith('acme', 'myrepo', 42)
+  })
+
+  test('initialHeadSha is passed into remoteTaskMetadata on register', async () => {
+    fetchPrHeadShaMock.mockImplementationOnce(() =>
+      Promise.resolve('sha-from-launch'),
+    )
+    await callAutofixPr(onDone, makeContext(), '42')
+    expect(registerMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        remoteTaskMetadata: expect.objectContaining({
+          owner: 'acme',
+          repo: 'myrepo',
+          prNumber: 42,
+          initialHeadSha: 'sha-from-launch',
+        }),
+      }),
+    )
+  })
+
+  test('fetchPrHeadSha failure → metadata initialHeadSha undefined, launch still succeeds', async () => {
+    fetchPrHeadShaMock.mockImplementationOnce(() =>
+      Promise.reject(new Error('gh not installed')),
+    )
+    await callAutofixPr(onDone, makeContext(), '42')
+    expect(registerMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        remoteTaskMetadata: expect.objectContaining({
+          owner: 'acme',
+          repo: 'myrepo',
+          prNumber: 42,
+          initialHeadSha: undefined,
+        }),
+      }),
+    )
+    // Launch must NOT fail just because SHA capture failed
+    const firstArg = onDone.mock.calls[0]?.[0] as string
+    expect(firstArg).toMatch(/Autofix launched/)
+  })
+
+  test('fetchPrHeadSha returning null → metadata initialHeadSha undefined', async () => {
+    fetchPrHeadShaMock.mockImplementationOnce(() => Promise.resolve(null))
+    await callAutofixPr(onDone, makeContext(), '42')
+    expect(registerMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        remoteTaskMetadata: expect.objectContaining({
+          initialHeadSha: undefined,
+        }),
+      }),
+    )
+  })
+})
+
 // Cover ../index.ts load() — placed in this test file so all the heavy mocks
 // (teleport / detectRepository / RemoteAgentTask / bootstrap-state / analytics /
 // skillDetect) are already registered when load() dynamically imports
diff --git a/src/commands/autofix-pr/__tests__/prOutcomeCheck.test.ts b/src/commands/autofix-pr/__tests__/prOutcomeCheck.test.ts
new file mode 100644
index 000000000..940c33423
--- /dev/null
+++ b/src/commands/autofix-pr/__tests__/prOutcomeCheck.test.ts
@@ -0,0 +1,158 @@
+import { describe, expect, test } from 'bun:test'
+import {
+  type PrViewPayload,
+  summariseAutofixOutcome,
+} from '../prOutcomeCheck.js'
+
+function basePayload(overrides: Partial<PrViewPayload> = {}): PrViewPayload {
+  return {
+    headRefOid: 'sha-baseline',
+    state: 'OPEN',
+    statusCheckRollup: [],
+    ...overrides,
+  }
+}
+
+const identity = (overrides: Partial<{ initialHeadSha: string }> = {}) => ({
+  owner: 'acme',
+  repo: 'myrepo',
+  prNumber: 42,
+  initialHeadSha: 'sha-baseline',
+  ...overrides,
+})
+
+describe('summariseAutofixOutcome · terminal PR states', () => {
+  test('MERGED → completed regardless of head SHA / CI', () => {
+    const result = summariseAutofixOutcome(
+      basePayload({ state: 'MERGED', headRefOid: 'sha-baseline' }),
+      identity(),
+    )
+    expect(result).toEqual({
+      completed: true,
+      summary: 'acme/myrepo#42 merged. Autofix monitoring complete.',
+    })
+  })
+
+  test('CLOSED → completed regardless of head SHA / CI', () => {
+    const result = summariseAutofixOutcome(
+      basePayload({ state: 'CLOSED' }),
+      identity(),
+    )
+    expect(result).toEqual({
+      completed: true,
+      summary:
+        'acme/myrepo#42 closed without merge. Autofix monitoring complete.',
+    })
+  })
+})
+
+describe('summariseAutofixOutcome · OPEN PR without push', () => {
+  test('no initialHeadSha baseline → not completed (cannot detect push)', () => {
+    const result = summariseAutofixOutcome(
+      basePayload({ state: 'OPEN' }),
+      identity({ initialHeadSha: undefined as unknown as string }),
+    )
+    expect(result).toEqual({ completed: false })
+  })
+
+  test('headRefOid unchanged → not completed (autofix has not pushed yet)', () => {
+    const result = summariseAutofixOutcome(
+      basePayload({ state: 'OPEN', headRefOid: 'sha-baseline' }),
+      identity(),
+    )
+    expect(result).toEqual({ completed: false })
+  })
+})
+
+describe('summariseAutofixOutcome · OPEN PR with push, CI variations', () => {
+  test('push detected + no checks configured → completed (success)', () => {
+    const result = summariseAutofixOutcome(
+      basePayload({
+        state: 'OPEN',
+        headRefOid: 'sha-new',
+        statusCheckRollup: [],
+      }),
+      identity(),
+    )
+    expect(result).toEqual({
+      completed: true,
+      summary: 'Autofix pushed commits to acme/myrepo#42, CI green.',
+    })
+  })
+
+  test('push detected + CI pending → not completed (wait for CI)', () => {
+    const result = summariseAutofixOutcome(
+      basePayload({
+        state: 'OPEN',
+        headRefOid: 'sha-new',
+        statusCheckRollup: [
+          { status: 'IN_PROGRESS', conclusion: null, name: 'ci' },
+          { status: 'COMPLETED', conclusion: 'SUCCESS', name: 'lint' },
+        ],
+      }),
+      identity(),
+    )
+    expect(result).toEqual({ completed: false })
+  })
+
+  test('push detected + CI all green → completed (success summary)', () => {
+    const result = summariseAutofixOutcome(
+      basePayload({
+        state: 'OPEN',
+        headRefOid: 'sha-new',
+        statusCheckRollup: [
+          { status: 'COMPLETED', conclusion: 'SUCCESS', name: 'ci' },
+          { status: 'COMPLETED', conclusion: 'SUCCESS', name: 'lint' },
+        ],
+      }),
+      identity(),
+    )
+    expect(result.completed).toBe(true)
+    if (result.completed) {
+      expect(result.summary).toContain('CI green')
+      expect(result.summary).toContain('acme/myrepo#42')
+    }
+  })
+
+  test('push detected + CI red → completed (failure summary surfaces the red)', () => {
+    const result = summariseAutofixOutcome(
+      basePayload({
+        state: 'OPEN',
+        headRefOid: 'sha-new',
+        statusCheckRollup: [
+          { status: 'COMPLETED', conclusion: 'FAILURE', name: 'ci' },
+          { status: 'COMPLETED', conclusion: 'SUCCESS', name: 'lint' },
+        ],
+      }),
+      identity(),
+    )
+    expect(result.completed).toBe(true)
+    if (result.completed) {
+      expect(result.summary).toContain('CI is failing')
+      expect(result.summary).toContain('1/2 checks failing')
+    }
+  })
+
+  test('neutral / skipped conclusions count as success (not failure)', () => {
+    const result = summariseAutofixOutcome(
+      basePayload({
+        state: 'OPEN',
+        headRefOid: 'sha-new',
+        statusCheckRollup: [
+          {
+            status: 'COMPLETED',
+            conclusion: 'NEUTRAL',
+            name: 'optional-check',
+          },
+          { status: 'COMPLETED', conclusion: 'SKIPPED', name: 'docs-check' },
+          { status: 'COMPLETED', conclusion: 'SUCCESS', name: 'ci' },
+        ],
+      }),
+      identity(),
+    )
+    expect(result.completed).toBe(true)
+    if (result.completed) {
+      expect(result.summary).toContain('CI green')
+    }
+  })
+})
diff --git a/src/commands/autofix-pr/launchAutofixPr.ts b/src/commands/autofix-pr/launchAutofixPr.ts
index 892448d75..9b5109ff5 100644
--- a/src/commands/autofix-pr/launchAutofixPr.ts
+++ b/src/commands/autofix-pr/launchAutofixPr.ts
@@ -13,8 +13,10 @@ import {
   checkRemoteAgentEligibility,
   formatPreconditionError,
   getRemoteTaskSessionUrl,
+  registerCompletionChecker,
   registerCompletionHook,
   registerRemoteAgentTask,
+  type AutofixPrRemoteTaskMetadata,
   type BackgroundRemoteSessionPrecondition,
 } from '../../tasks/RemoteAgentTask/RemoteAgentTask.js'
 import type { LocalJSXCommandCall } from '../../types/command.js'
@@ -30,16 +32,53 @@ import {
   updateActiveMonitor,
 } from './monitorState.js'
 import { parseAutofixArgs } from './parseArgs.js'
+import { checkPrAutofixOutcome, fetchPrHeadSha } from './prFetch.js'
 import { detectAutofixSkills, formatSkillsHint } from './skillDetect.js'
 
+// Throttle map for the completionChecker: gh CLI is called at most once per
+// PR per CHECK_INTERVAL_MS, regardless of the framework's 1s poll cadence.
+// Key is `${owner}/${repo}#${prNumber}`. Cleared when the completion hook
+// fires so a re-launched monitor starts with a fresh budget.
+const lastCheckAt = new Map<string, number>()
+const CHECK_INTERVAL_MS = 5_000
+
+function throttleKey(meta: AutofixPrRemoteTaskMetadata): string {
+  return `${meta.owner}/${meta.repo}#${meta.prNumber}`
+}
+
+// Register the completionChecker once at module load. The framework calls it
+// on every poll tick for tasks with remoteTaskType==='autofix-pr'; throttle
+// inside so we don't fire gh CLI 60×/min. Returns the summary string on
+// completion (becomes the task-notification body) or null to keep polling.
+registerCompletionChecker('autofix-pr', async metadata => {
+  const meta = metadata as AutofixPrRemoteTaskMetadata | undefined
+  if (!meta) return null
+
+  const key = throttleKey(meta)
+  const now = Date.now()
+  if (now - (lastCheckAt.get(key) ?? 0) < CHECK_INTERVAL_MS) return null
+  lastCheckAt.set(key, now)
+
+  const result = await checkPrAutofixOutcome({
+    owner: meta.owner,
+    repo: meta.repo,
+    prNumber: meta.prNumber,
+    initialHeadSha: meta.initialHeadSha,
+  })
+  return result.completed ? result.summary : null
+})
+
 // Release the singleton monitor lock when the framework transitions the
 // autofix task to a terminal state. Without this, the lock — keyed by the
 // framework-assigned taskId (after callAutofixPr's updateActiveMonitor swap)
 // — would dangle past natural completion, blocking subsequent /autofix-pr
 // invocations until the process restarts. Registered at module load; the
 // framework's runCompletionHook invokes it once per terminal transition.
-registerCompletionHook('autofix-pr', taskId => {
+// Also clear the per-PR throttle entry so a re-launch starts fresh.
+registerCompletionHook('autofix-pr', (taskId, metadata) => {
   clearActiveMonitor(taskId)
+  const meta = metadata as AutofixPrRemoteTaskMetadata | undefined
+  if (meta) lastCheckAt.delete(throttleKey(meta))
 })
 
 function makeErrorText(message: string, code: string): string {
@@ -286,6 +325,15 @@ export const callAutofixPr: LocalJSXCommandCall = async (
       return null
     }
 
+    // 4.8b capture PR head SHA before registering so the completionChecker
+    // can detect when the agent has pushed new commits. Best-effort — if gh
+    // is unavailable or the call fails, leave initialHeadSha undefined and
+    // the checker falls back to terminal-state-only completion (closed /
+    // merged). Don't block on this; teleport succeeded already.
+    const initialHeadSha =
+      (await fetchPrHeadSha(owner, repo, prNumber).catch(() => null)) ??
+      undefined
+
     // 4.9 register task. If this throws, release the lock so the user can
     // retry — the remote CCR session is already created so we surface a
     // dedicated error code.
@@ -303,7 +351,7 @@ export const callAutofixPr: LocalJSXCommandCall = async (
         command: `/autofix-pr ${prNumber}`,
         context,
         isLongRunning: true,
-        remoteTaskMetadata: { owner, repo, prNumber },
+        remoteTaskMetadata: { owner, repo, prNumber, initialHeadSha },
       })
       updateActiveMonitor({ taskId: frameworkTaskId })
     } catch (regErr: unknown) {
diff --git a/src/commands/autofix-pr/prFetch.ts b/src/commands/autofix-pr/prFetch.ts
new file mode 100644
index 000000000..63245c7ae
--- /dev/null
+++ b/src/commands/autofix-pr/prFetch.ts
@@ -0,0 +1,155 @@
+// gh CLI integration for autofix-pr: fetches PR snapshots and feeds them
+// through the pure decision matrix in prOutcomeCheck.ts. Kept separate so
+// tests of the decision matrix never have to mock node:child_process — and
+// tests of callAutofixPr can mock this module without polluting the pure
+// decision matrix module (Bun mock.module is process-global).
+
+import { spawn } from 'node:child_process'
+import {
+  type AutofixOutcomeProbeResult,
+  type PrViewPayload,
+  summariseAutofixOutcome,
+} from './prOutcomeCheck.js'
+
+export interface AutofixOutcomeProbeInput {
+  owner: string
+  repo: string
+  prNumber: number
+  /**
+   * Head commit SHA captured at /autofix-pr launch. When this differs from
+   * the current head, autofix has pushed at least one commit.
+   */
+  initialHeadSha?: string
+  /**
+   * Timeout for the gh CLI invocation. Caller is the framework's per-tick
+   * poller, so failures must be bounded — a hung gh process would stall
+   * the entire poll loop.
+   */
+  timeoutMs?: number
+}
+
+const DEFAULT_TIMEOUT_MS = 5_000
+
+/**
+ * Fetch the PR's current head SHA, state, and CI rollup, and decide whether
+ * autofix has finished. Returns `{ completed: true, summary }` if so;
+ * otherwise `{ completed: false }`. Never throws.
+ */
+export async function checkPrAutofixOutcome(
+  input: AutofixOutcomeProbeInput,
+): Promise<AutofixOutcomeProbeResult> {
+  const { owner, repo, prNumber, initialHeadSha, timeoutMs } = input
+
+  let payload: PrViewPayload
+  try {
+    payload = await runGhPrView(
+      owner,
+      repo,
+      prNumber,
+      timeoutMs ?? DEFAULT_TIMEOUT_MS,
+    )
+  } catch {
+    return { completed: false }
+  }
+
+  return summariseAutofixOutcome(payload, {
+    owner,
+    repo,
+    prNumber,
+    initialHeadSha,
+  })
+}
+
+/**
+ * Resolve the PR's current head commit SHA. Used at /autofix-pr launch to
+ * capture a baseline; later compared against the live SHA to detect pushes.
+ * Returns null on any failure (network, missing gh, permissions) — the
+ * caller treats null as "no baseline" and falls back to terminal-state-only
+ * completion detection.
+ */
+export async function fetchPrHeadSha(
+  owner: string,
+  repo: string,
+  prNumber: number,
+  timeoutMs = DEFAULT_TIMEOUT_MS,
+): Promise<string | null> {
+  try {
+    const payload = await runGhPrView(owner, repo, prNumber, timeoutMs)
+    return payload.headRefOid || null
+  } catch {
+    return null
+  }
+}
+
+interface SpawnError extends Error {
+  code?: string
+}
+
+/**
+ * Spawn `gh pr view {n} --repo {owner}/{repo} --json ...` and parse the
+ * result. Rejects on non-zero exit, timeout, or JSON parse failure.
+ */
+function runGhPrView(
+  owner: string,
+  repo: string,
+  prNumber: number,
+  timeoutMs: number,
+): Promise<PrViewPayload> {
+  return new Promise((resolve, reject) => {
+    const proc = spawn(
+      'gh',
+      [
+        'pr',
+        'view',
+        String(prNumber),
+        '--repo',
+        `${owner}/${repo}`,
+        '--json',
+        'headRefOid,state,statusCheckRollup',
+      ],
+      { stdio: ['ignore', 'pipe', 'pipe'] },
+    )
+    const stdoutChunks: Buffer[] = []
+    const stderrChunks: Buffer[] = []
+    let settled = false
+
+    const timer = setTimeout(() => {
+      if (settled) return
+      settled = true
+      proc.kill('SIGKILL')
+      reject(new Error(`gh pr view timed out after ${timeoutMs}ms`))
+    }, timeoutMs)
+
+    proc.stdout.on('data', chunk => stdoutChunks.push(chunk as Buffer))
+    proc.stderr.on('data', chunk => stderrChunks.push(chunk as Buffer))
+
+    proc.on('error', (err: SpawnError) => {
+      if (settled) return
+      settled = true
+      clearTimeout(timer)
+      reject(err)
+    })
+
+    proc.on('close', code => {
+      if (settled) return
+      settled = true
+      clearTimeout(timer)
+      if (code !== 0) {
+        const stderr = Buffer.concat(stderrChunks).toString('utf8').trim()
+        reject(
+          new Error(`gh pr view exited ${code}: ${stderr || '<no stderr>'}`),
+        )
+        return
+      }
+      const stdout = Buffer.concat(stdoutChunks).toString('utf8').trim()
+      try {
+        const parsed = JSON.parse(stdout) as PrViewPayload
+        resolve(parsed)
+      } catch (e) {
+        reject(
+          new Error(`gh pr view JSON parse failed: ${(e as Error).message}`),
+        )
+      }
+    })
+  })
+}
diff --git a/src/commands/autofix-pr/prOutcomeCheck.ts b/src/commands/autofix-pr/prOutcomeCheck.ts
new file mode 100644
index 000000000..4d77ade4f
--- /dev/null
+++ b/src/commands/autofix-pr/prOutcomeCheck.ts
@@ -0,0 +1,123 @@
+// Pure decision matrix for autofix-pr completion detection.
+//
+// Given a snapshot of the PR (state, head SHA, CI rollup) and a baseline
+// head SHA captured at /autofix-pr launch, decide whether autofix has
+// finished. No side effects — extracted from the gh CLI invocation in
+// prFetch.ts so unit tests can exercise every branch without spawning
+// subprocesses.
+
+export type AutofixOutcomeProbeResult =
+  | { completed: true; summary: string }
+  | { completed: false }
+
+export interface PrViewPayload {
+  headRefOid: string
+  state: 'OPEN' | 'CLOSED' | 'MERGED'
+  statusCheckRollup?: Array<{
+    conclusion?: string | null
+    status?: string | null
+    name?: string
+  }>
+}
+
+export interface AutofixOutcomeIdentity {
+  owner: string
+  repo: string
+  prNumber: number
+  /**
+   * Head commit SHA captured at /autofix-pr launch. When this differs from
+   * the current head, autofix has pushed at least one commit. Optional —
+   * absence means we can only finish on terminal PR states (merged/closed).
+   */
+  initialHeadSha?: string
+}
+
+/**
+ * Pure judgement of whether autofix has finished, given a PR snapshot and
+ * the baseline head SHA. Decision matrix:
+ *   - MERGED                         → done (merged)
+ *   - CLOSED (not merged)            → done (closed without fix)
+ *   - OPEN, no baseline              → keep polling
+ *   - OPEN, head unchanged           → keep polling (agent hasn't pushed)
+ *   - OPEN, head changed, CI pending → keep polling (wait for CI)
+ *   - OPEN, head changed, CI failure → done (surface red so user can retry)
+ *   - OPEN, head changed, CI success → done (clean fix)
+ */
+export function summariseAutofixOutcome(
+  payload: PrViewPayload,
+  identity: AutofixOutcomeIdentity,
+): AutofixOutcomeProbeResult {
+  const { owner, repo, prNumber, initialHeadSha } = identity
+
+  if (payload.state === 'MERGED') {
+    return {
+      completed: true,
+      summary: `${owner}/${repo}#${prNumber} merged. Autofix monitoring complete.`,
+    }
+  }
+  if (payload.state === 'CLOSED') {
+    return {
+      completed: true,
+      summary: `${owner}/${repo}#${prNumber} closed without merge. Autofix monitoring complete.`,
+    }
+  }
+
+  if (!initialHeadSha) return { completed: false }
+  if (payload.headRefOid === initialHeadSha) return { completed: false }
+
+  const ciState = summariseCiRollup(payload.statusCheckRollup)
+  if (ciState.state === 'pending') return { completed: false }
+  if (ciState.state === 'failure') {
+    return {
+      completed: true,
+      summary: `Autofix pushed commits to ${owner}/${repo}#${prNumber} but CI is failing (${ciState.detail}).`,
+    }
+  }
+  return {
+    completed: true,
+    summary: `Autofix pushed commits to ${owner}/${repo}#${prNumber}, CI green.`,
+  }
+}
+
+interface CiSummary {
+  state: 'success' | 'pending' | 'failure'
+  detail: string
+}
+
+function summariseCiRollup(
+  rollup: PrViewPayload['statusCheckRollup'],
+): CiSummary {
+  if (!rollup || rollup.length === 0) {
+    // No checks configured on this repo — treat as success so completion
+    // can fire on push alone. PRs without CI are perfectly valid.
+    return { state: 'success', detail: 'no checks configured' }
+  }
+  let pending = 0
+  let failed = 0
+  const total = rollup.length
+  for (const check of rollup) {
+    const status = (check.status ?? '').toUpperCase()
+    const conclusion = (check.conclusion ?? '').toUpperCase()
+    if (status && status !== 'COMPLETED') {
+      pending++
+      continue
+    }
+    if (
+      conclusion === 'SUCCESS' ||
+      conclusion === 'NEUTRAL' ||
+      conclusion === 'SKIPPED'
+    ) {
+      continue
+    }
+    if (conclusion === '') {
+      pending++
+      continue
+    }
+    failed++
+  }
+  if (pending > 0)
+    return { state: 'pending', detail: `${pending}/${total} checks pending` }
+  if (failed > 0)
+    return { state: 'failure', detail: `${failed}/${total} checks failing` }
+  return { state: 'success', detail: `${total}/${total} checks passing` }
+}
diff --git a/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx b/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx
index 881046322..712fc61f5 100644
--- a/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx
+++ b/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx
@@ -91,6 +91,14 @@ export type AutofixPrRemoteTaskMetadata = {
   owner: string;
   repo: string;
   prNumber: number;
+  /**
+   * PR head commit SHA captured at /autofix-pr launch. The completionChecker
+   * compares this against the live head to detect when the agent has pushed
+   * new commits. Optional because gh CLI may be unavailable at launch — in
+   * that case the checker falls back to terminal-state-only completion.
+   * Survives --resume via the session sidecar.
+   */
+  initialHeadSha?: string;
 };
 
 export type RemoteTaskMetadata = AutofixPrRemoteTaskMetadata;

From 873f0a03acb2fe19e1bf5440948e18719d3c0989 Mon Sep 17 00:00:00 2001
From: unraid <local@unraid.local>
Date: Mon, 18 May 2026 22:06:18 +0800
Subject: [PATCH 3/6] =?UTF-8?q?feat(autofix-pr):=20=E5=86=85=E5=AE=B9?=
 =?UTF-8?q?=E5=9B=9E=E6=B5=81=E8=AE=A9=E6=9C=AC=E5=9C=B0=E6=A8=A1=E5=9E=8B?=
 =?UTF-8?q?=E8=AF=BB=E5=88=B0=20PR=20=E4=BF=AE=E5=A4=8D=E7=BB=93=E6=9E=9C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 3 of remote-agent completion loop。Phase 2 注册了 completionChecker
让框架能在 PR 合并/关闭/有 push+CI 绿时主动完成 task,但 task-notification
仍然只携带 generic 文本(""${owner}/${repo}#42 merged"")。Phase 3 让本地
模型读到远端 agent 自己产出的结构化结果(commits 列表、files 列表、CI
状态、人类可读 summary)。

实现:
- 新增 extractAutofixResultFromLog (src/commands/autofix-pr/
  extractAutofixResult.ts):从 SDKMessage[] 中扫 <autofix-result> tag,
  优先 hook stdout 后 fallback assistant text,latest-wins。10 个单测。
- RemoteAgentTask 新增 registerContentExtractor 注册式 API + 私有
  enqueueRichRemoteNotification(参考 enqueueRemoteReviewNotification),
  在 3 个 generic 完成路径(archived / completionChecker / result-driven)
  先尝试 tryExtractRichContent,有内容用 rich 变体,没有走 generic。
  isRemoteReview 路径不变(它走自己的 enqueueRemoteReviewNotification)。
- launchAutofixPr.ts 模块顶部 registerContentExtractor('autofix-pr',
  extractAutofixResultFromLog)。initialMessage 加 <autofix-result> 输出
  指令(pr-number / commits-pushed / files-changed / ci-status / summary)。

设计:
- 注册式 API(同 Phase 1 hook + Phase 2 checker):framework 不反向依赖
  命令模块,所有 PR-specific 逻辑在 autofix-pr/
- latest-wins:agent 重试时只取最新 tag,旧 tag 不会污染
- truncated tag → null:开 tag 无对应闭 tag 视为不完整,走 generic
  fallback
- 跨 message 不拼接:开 tag 和闭 tag 在不同 message 视为不完整(避免
  误拼字符串)
- 字符串 content 不解析:assistant.message.content 为 string(非 block
  array)的少见路径直接 skip,不 crash

测试:
- extractAutofixResultFromLog 10 个单测(空 log / 无 tag / hook stdout /
  assistant text / hook_response subtype / 多 tag latest-wins / 截断 /
  hook 后于 assistant 的优先级 / 跨 message 不拼接 / 字符串 content
  graceful)
- launchAutofixPr 3 个新测试(extractor 注册 / initialMessage 含 tag
  schema / extractor 真实行为)

完整方案见 docs/features/remote-agent-completion-analysis.md 第 5.3 节。
---
 .../__tests__/extractAutofixResult.test.ts    | 123 ++++++++++++++++++
 .../__tests__/launchAutofixPr.test.ts         |  58 +++++++++
 .../autofix-pr/extractAutofixResult.ts        |  84 ++++++++++++
 src/commands/autofix-pr/launchAutofixPr.ts    |  27 +++-
 src/tasks/RemoteAgentTask/RemoteAgentTask.tsx | 112 +++++++++++++++-
 5 files changed, 400 insertions(+), 4 deletions(-)
 create mode 100644 src/commands/autofix-pr/__tests__/extractAutofixResult.test.ts
 create mode 100644 src/commands/autofix-pr/extractAutofixResult.ts

diff --git a/src/commands/autofix-pr/__tests__/extractAutofixResult.test.ts b/src/commands/autofix-pr/__tests__/extractAutofixResult.test.ts
new file mode 100644
index 000000000..475ea1ad5
--- /dev/null
+++ b/src/commands/autofix-pr/__tests__/extractAutofixResult.test.ts
@@ -0,0 +1,123 @@
+import { describe, expect, test } from 'bun:test'
+import type { SDKMessage } from '../../../entrypoints/agentSdkTypes.js'
+import {
+  AUTOFIX_RESULT_TAG,
+  extractAutofixResultFromLog,
+} from '../extractAutofixResult.js'
+
+function hookProgressMessage(stdout: string): SDKMessage {
+  return {
+    type: 'system',
+    subtype: 'hook_progress',
+    stdout,
+  } as unknown as SDKMessage
+}
+
+function assistantTextMessage(text: string): SDKMessage {
+  return {
+    type: 'assistant',
+    message: {
+      content: [{ type: 'text', text }],
+    },
+  } as unknown as SDKMessage
+}
+
+const sampleTag = (summary: string): string =>
+  `<${AUTOFIX_RESULT_TAG}>
+  <pr-number>42</pr-number>
+  <commits-pushed>
+    <commit sha="abc123">${summary}</commit>
+  </commits-pushed>
+  <ci-status>green</ci-status>
+  <summary>${summary}</summary>
+</${AUTOFIX_RESULT_TAG}>`
+
+describe('extractAutofixResultFromLog', () => {
+  test('returns null on empty log', () => {
+    expect(extractAutofixResultFromLog([])).toBeNull()
+  })
+
+  test('returns null when no tag present', () => {
+    const log = [
+      assistantTextMessage('just some normal text without the tag'),
+      hookProgressMessage('hook output without tag'),
+    ]
+    expect(extractAutofixResultFromLog(log)).toBeNull()
+  })
+
+  test('extracts from hook stdout', () => {
+    const tag = sampleTag('fixed lint error')
+    const log = [hookProgressMessage(`prefix\n${tag}\nsuffix`)]
+    const result = extractAutofixResultFromLog(log)
+    expect(result).toBe(tag)
+  })
+
+  test('extracts from assistant text', () => {
+    const tag = sampleTag('typecheck fixed')
+    const log = [assistantTextMessage(`Done!\n${tag}`)]
+    expect(extractAutofixResultFromLog(log)).toBe(tag)
+  })
+
+  test('extracts from hook_response subtype too', () => {
+    const tag = sampleTag('via hook_response')
+    const log = [
+      {
+        type: 'system',
+        subtype: 'hook_response',
+        stdout: tag,
+      } as unknown as SDKMessage,
+    ]
+    expect(extractAutofixResultFromLog(log)).toBe(tag)
+  })
+
+  test('returns the latest tag when multiple appear in different messages', () => {
+    const older = sampleTag('older attempt')
+    const newer = sampleTag('newer attempt')
+    const log = [
+      assistantTextMessage(`first try\n${older}`),
+      assistantTextMessage(`retry\n${newer}`),
+    ]
+    expect(extractAutofixResultFromLog(log)).toBe(newer)
+  })
+
+  test('returns null when open tag exists but close tag is missing (truncated)', () => {
+    const log = [
+      assistantTextMessage(
+        `<${AUTOFIX_RESULT_TAG}>\n<summary>got cut off mid-write...`,
+      ),
+    ]
+    expect(extractAutofixResultFromLog(log)).toBeNull()
+  })
+
+  test('walks backwards so hook stdout from later in log wins over earlier assistant text', () => {
+    const earlier = sampleTag('via assistant first')
+    const later = sampleTag('via hook later')
+    const log = [
+      assistantTextMessage(`some output\n${earlier}`),
+      hookProgressMessage(later),
+    ]
+    expect(extractAutofixResultFromLog(log)).toBe(later)
+  })
+
+  test('ignores tag-shaped strings that span across messages (no concatenation)', () => {
+    // Open tag in one message, close tag in another — should NOT be stitched.
+    const log = [
+      assistantTextMessage(`<${AUTOFIX_RESULT_TAG}>\n<summary>part 1`),
+      assistantTextMessage(`part 2</summary>\n</${AUTOFIX_RESULT_TAG}>`),
+    ]
+    expect(extractAutofixResultFromLog(log)).toBeNull()
+  })
+
+  test('extracts when assistant content is a string (not block array)', () => {
+    // Some SDK paths emit assistant content as a raw string instead of
+    // a content-block array. Current implementation skips those — verify
+    // graceful no-op rather than crash.
+    const log = [
+      {
+        type: 'assistant',
+        message: { content: sampleTag('string content') },
+      } as unknown as SDKMessage,
+    ]
+    expect(extractAutofixResultFromLog(log)).toBeNull()
+  })
+})
diff --git a/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts b/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts
index c4fcc3468..73b183b78 100644
--- a/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts
+++ b/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts
@@ -65,12 +65,16 @@ const registerCompletionCheckerMock = mock<
     checker: (metadata?: unknown) => Promise<string | null>,
   ) => void
 >(() => {})
+const registerContentExtractorMock = mock<
+  (taskType: string, extractor: (log: unknown[]) => string | null) => void
+>(() => {})
 
 mock.module('src/tasks/RemoteAgentTask/RemoteAgentTask.js', () => ({
   checkRemoteAgentEligibility: checkEligibilityMock,
   registerRemoteAgentTask: registerMock,
   registerCompletionHook: registerCompletionHookMock,
   registerCompletionChecker: registerCompletionCheckerMock,
+  registerContentExtractor: registerContentExtractorMock,
   getRemoteTaskSessionUrl: getSessionUrlMock,
   formatPreconditionError: (e: { type: string }) => e.type,
 }))
@@ -525,6 +529,60 @@ describe('callAutofixPr · Phase 2 completionChecker integration', () => {
   })
 })
 
+// Phase 3: content extractor wiring + initialMessage tag instruction
+describe('callAutofixPr · Phase 3 content extractor integration', () => {
+  test('registerContentExtractor is called at module load with autofix-pr type', () => {
+    const calls = registerContentExtractorMock.mock.calls.filter(
+      c => c[0] === 'autofix-pr',
+    )
+    expect(calls.length).toBeGreaterThan(0)
+    const extractor = calls[calls.length - 1]?.[1]
+    expect(typeof extractor).toBe('function')
+  })
+
+  test('initialMessage instructs the remote agent to emit an <autofix-result> tag', async () => {
+    await callAutofixPr(onDone, makeContext(), '42')
+    // teleportMock's typed signature has no args, so calls[0] is a
+    // zero-length tuple. We know teleportToRemote is invoked with one
+    // options object, so double-cast through unknown to read the args.
+    const calls = teleportMock.mock.calls as unknown as Array<
+      [{ initialMessage?: string }]
+    >
+    const teleportArgs = calls[0]?.[0]
+    expect(teleportArgs?.initialMessage).toContain('<autofix-result>')
+    expect(teleportArgs?.initialMessage).toContain('</autofix-result>')
+    expect(teleportArgs?.initialMessage).toContain('<ci-status>')
+    expect(teleportArgs?.initialMessage).toContain('<summary>')
+  })
+
+  test('registered extractor returns string for valid log and null for empty', () => {
+    const calls = registerContentExtractorMock.mock.calls.filter(
+      c => c[0] === 'autofix-pr',
+    )
+    const extractor = calls[calls.length - 1]?.[1] as
+      | ((log: unknown[]) => string | null)
+      | undefined
+    expect(extractor).toBeDefined()
+    // Empty log → null
+    expect(extractor?.([])).toBeNull()
+    // Log with assistant text containing tag → returns it
+    const logWithTag = [
+      {
+        type: 'assistant',
+        message: {
+          content: [
+            {
+              type: 'text',
+              text: 'done\n<autofix-result><summary>x</summary></autofix-result>',
+            },
+          ],
+        },
+      },
+    ]
+    expect(extractor?.(logWithTag)).toContain('<autofix-result>')
+  })
+})
+
 // Cover ../index.ts load() — placed in this test file so all the heavy mocks
 // (teleport / detectRepository / RemoteAgentTask / bootstrap-state / analytics /
 // skillDetect) are already registered when load() dynamically imports
diff --git a/src/commands/autofix-pr/extractAutofixResult.ts b/src/commands/autofix-pr/extractAutofixResult.ts
new file mode 100644
index 000000000..7fb8301e3
--- /dev/null
+++ b/src/commands/autofix-pr/extractAutofixResult.ts
@@ -0,0 +1,84 @@
+// Extract the <autofix-result> tag from a remote autofix-pr session log.
+//
+// The remote agent emits a structured XML block as its final message
+// (initialMessage in launchAutofixPr.ts instructs it to). The tag carries
+// PR-specific outcome data — commits pushed, files changed, CI status,
+// summary — that the framework's generic "task completed" notification
+// can't convey. We surface it to the local model by injecting the tag
+// verbatim into the message queue (analogous to <remote-review> handling).
+//
+// Resilient to two production realities:
+//   1. The tag may appear in either an assistant text block or a hook
+//      stdout (some autofix skills wrap the final report in a hook).
+//   2. The tag may not appear at all (older agents, truncated runs) —
+//      caller falls back to generic completion notification.
+
+import type {
+  SDKAssistantMessage,
+  SDKMessage,
+} from '../../entrypoints/agentSdkTypes.js'
+
+export const AUTOFIX_RESULT_TAG = 'autofix-result'
+
+const TAG_OPEN = `<${AUTOFIX_RESULT_TAG}>`
+const TAG_CLOSE = `</${AUTOFIX_RESULT_TAG}>`
+
+/**
+ * Walk the session log for an <autofix-result> tag. Returns the full tag
+ * (including delimiters) so the caller can inject it as-is into the
+ * notification; returns null if no tag is present.
+ *
+ * Search order:
+ *   1. Latest hook_progress / hook_response stdout (autofix skills that
+ *      use hooks to format the report write here first).
+ *   2. Latest assistant text block (agents that don't use hooks write the
+ *      tag inline in their final message).
+ *
+ * Latest-wins so re-tries within the same session don't surface stale
+ * earlier results.
+ */
+export function extractAutofixResultFromLog(log: SDKMessage[]): string | null {
+  // Walk backwards so we hit the most recent tag first.
+  for (let i = log.length - 1; i >= 0; i--) {
+    const msg = log[i]
+    if (!msg) continue
+
+    // Hook stdout (system messages of subtype hook_progress / hook_response).
+    if (
+      msg.type === 'system' &&
+      (msg.subtype === 'hook_progress' || msg.subtype === 'hook_response')
+    ) {
+      const stdout = (msg as { stdout?: unknown }).stdout
+      if (typeof stdout === 'string') {
+        const extracted = extractBetween(stdout, TAG_OPEN, TAG_CLOSE)
+        if (extracted) return extracted
+      }
+      continue
+    }
+
+    // Assistant text blocks.
+    if (msg.type === 'assistant') {
+      const content = (msg as SDKAssistantMessage).message?.content
+      if (!content || typeof content === 'string') continue
+      for (const block of content as Array<{ type: string; text?: string }>) {
+        if (block.type !== 'text' || typeof block.text !== 'string') continue
+        if (!block.text.includes(TAG_OPEN)) continue
+        const extracted = extractBetween(block.text, TAG_OPEN, TAG_CLOSE)
+        if (extracted) return extracted
+      }
+    }
+  }
+  return null
+}
+
+function extractBetween(
+  text: string,
+  open: string,
+  close: string,
+): string | null {
+  const start = text.lastIndexOf(open)
+  if (start === -1) return null
+  const end = text.indexOf(close, start + open.length)
+  if (end === -1) return null
+  return text.slice(start, end + close.length)
+}
diff --git a/src/commands/autofix-pr/launchAutofixPr.ts b/src/commands/autofix-pr/launchAutofixPr.ts
index 9b5109ff5..f33c3e94f 100644
--- a/src/commands/autofix-pr/launchAutofixPr.ts
+++ b/src/commands/autofix-pr/launchAutofixPr.ts
@@ -15,6 +15,7 @@ import {
   getRemoteTaskSessionUrl,
   registerCompletionChecker,
   registerCompletionHook,
+  registerContentExtractor,
   registerRemoteAgentTask,
   type AutofixPrRemoteTaskMetadata,
   type BackgroundRemoteSessionPrecondition,
@@ -31,6 +32,7 @@ import {
   trySetActiveMonitor,
   updateActiveMonitor,
 } from './monitorState.js'
+import { extractAutofixResultFromLog } from './extractAutofixResult.js'
 import { parseAutofixArgs } from './parseArgs.js'
 import { checkPrAutofixOutcome, fetchPrHeadSha } from './prFetch.js'
 import { detectAutofixSkills, formatSkillsHint } from './skillDetect.js'
@@ -81,6 +83,13 @@ registerCompletionHook('autofix-pr', (taskId, metadata) => {
   if (meta) lastCheckAt.delete(throttleKey(meta))
 })
 
+// Phase 3 content return: extract the <autofix-result> tag from the session
+// log so the local model sees the agent's structured outcome (commits
+// pushed, files changed, CI status) inline in the completion task-
+// notification — instead of just a file-path pointer. The framework falls
+// back to the generic notification if extraction returns null.
+registerContentExtractor('autofix-pr', log => extractAutofixResultFromLog(log))
+
 function makeErrorText(message: string, code: string): string {
   logEvent('tengu_autofix_pr_result', {
     result:
@@ -249,7 +258,23 @@ export const callAutofixPr: LocalJSXCommandCall = async (
     // 4.5 compose message
     const target = `${owner}/${repo}#${prNumber}`
     const branchName = `refs/pull/${prNumber}/head`
-    const initialMessage = `Auto-fix failing CI checks on PR #${prNumber} in ${owner}/${repo}.${skillsHint}`
+    const initialMessage = `Auto-fix failing CI checks on PR #${prNumber} in ${owner}/${repo}.${skillsHint}
+
+When you finish (or hit a blocker you can't recover from), output the following XML tag as your final message so the local user gets a structured summary:
+
+<autofix-result>
+  <pr-number>${prNumber}</pr-number>
+  <commits-pushed>
+    <commit sha="...">commit message</commit>
+  </commits-pushed>
+  <files-changed>
+    <file path="...">N changes</file>
+  </files-changed>
+  <ci-status>green | red | pending | unknown</ci-status>
+  <summary>One-sentence summary of what was fixed or why it could not be fixed.</summary>
+</autofix-result>
+
+If no fix was needed, omit <commits-pushed> and <files-changed> and explain in <summary>. If you only attempted partial work, list the commits you did push and explain the remainder in <summary>.`
 
     // 4.6 in-process teammate
     const teammate = createAutofixTeammate(initialMessage, target)
diff --git a/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx b/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx
index 712fc61f5..1acffe20f 100644
--- a/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx
+++ b/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx
@@ -133,6 +133,39 @@ export type RemoteTaskCompletionHook = (taskId: string, remoteTaskMetadata: Remo
 
 const completionHooks = new Map<RemoteTaskType, RemoteTaskCompletionHook>();
 
+/**
+ * Inspect a completed remote task's accumulated log and return an XML fragment
+ * to inject inline into the completion task-notification. Returning null falls
+ * back to the framework's generic "task completed" notification (file-path
+ * pointer only). Used by command modules whose remote agents emit structured
+ * outcome tags the local model should read directly.
+ */
+export type RemoteTaskContentExtractor = (log: SDKMessage[]) => string | null;
+
+const contentExtractors = new Map<RemoteTaskType, RemoteTaskContentExtractor>();
+
+/**
+ * Register a content extractor for a remote task type. Called once per
+ * completion in the generic completion branches (archived, completionChecker,
+ * result-driven). isRemoteReview tasks have their own bespoke path and skip
+ * extractors entirely. Errors propagate to the framework which logs and falls
+ * back to generic notification.
+ */
+export function registerContentExtractor(remoteTaskType: RemoteTaskType, extractor: RemoteTaskContentExtractor): void {
+  contentExtractors.set(remoteTaskType, extractor);
+}
+
+function tryExtractRichContent(task: RemoteAgentTaskState, log: SDKMessage[]): string | null {
+  const extractor = contentExtractors.get(task.remoteTaskType);
+  if (!extractor) return null;
+  try {
+    return extractor(log);
+  } catch (e) {
+    logError(e);
+    return null;
+  }
+}
+
 /**
  * Register a completion hook for a remote task type. Invoked once after the
  * task reaches a terminal state in any of the framework's completion branches
@@ -253,6 +286,41 @@ function enqueueRemoteNotification(
   enqueuePendingNotification({ value: message, mode: 'task-notification' });
 }
 
+/**
+ * Same as enqueueRemoteNotification but inlines a structured XML fragment
+ * (returned by a registered RemoteTaskContentExtractor) so the local model
+ * reads the remote agent's outcome directly instead of having to follow a
+ * file-path pointer. Mode is still 'task-notification' — the framing XML is
+ * the same, only the body differs.
+ */
+function enqueueRichRemoteNotification(
+  taskId: string,
+  title: string,
+  status: 'completed' | 'failed' | 'killed',
+  richContent: string,
+  setAppState: SetAppState,
+  toolUseId?: string,
+): void {
+  if (!markTaskNotified(taskId, setAppState)) return;
+
+  const statusText = status === 'completed' ? 'completed successfully' : status === 'failed' ? 'failed' : 'was stopped';
+  const toolUseIdLine = toolUseId ? `\n<${TOOL_USE_ID_TAG}>${toolUseId}</${TOOL_USE_ID_TAG}>` : '';
+  const outputPath = getTaskOutputPath(taskId);
+
+  const message = `<${TASK_NOTIFICATION_TAG}>
+<${TASK_ID_TAG}>${taskId}</${TASK_ID_TAG}>${toolUseIdLine}
+<${TASK_TYPE_TAG}>remote_agent</${TASK_TYPE_TAG}>
+<${OUTPUT_FILE_TAG}>${outputPath}</${OUTPUT_FILE_TAG}>
+<${STATUS_TAG}>${status}</${STATUS_TAG}>
+<${SUMMARY_TAG}>Remote task "${title}" ${statusText}</${SUMMARY_TAG}>
+</${TASK_NOTIFICATION_TAG}>
+The remote agent produced the following structured outcome. Summarize the key changes for the user:
+
+${richContent}`;
+
+  enqueuePendingNotification({ value: message, mode: 'task-notification' });
+}
+
 /**
  * Atomically mark a task as notified. Returns true if this call flipped the
  * flag (caller should enqueue), false if already notified (caller should skip).
@@ -718,7 +786,19 @@ function startRemoteSessionPolling(taskId: string, context: TaskContext): () =>
         updateTaskState<RemoteAgentTaskState>(taskId, context.setAppState, t =>
           t.status === 'running' ? { ...t, status: 'completed', endTime: Date.now() } : t,
         );
-        enqueueRemoteNotification(taskId, task.title, 'completed', context.setAppState, task.toolUseId);
+        const richContent = tryExtractRichContent(task, accumulatedLog);
+        if (richContent) {
+          enqueueRichRemoteNotification(
+            taskId,
+            task.title,
+            'completed',
+            richContent,
+            context.setAppState,
+            task.toolUseId,
+          );
+        } else {
+          enqueueRemoteNotification(taskId, task.title, 'completed', context.setAppState, task.toolUseId);
+        }
         void evictTaskOutput(taskId);
         void removeRemoteAgentMetadata(taskId);
         runCompletionHook(taskId, task);
@@ -732,7 +812,19 @@ function startRemoteSessionPolling(taskId: string, context: TaskContext): () =>
           updateTaskState<RemoteAgentTaskState>(taskId, context.setAppState, t =>
             t.status === 'running' ? { ...t, status: 'completed', endTime: Date.now() } : t,
           );
-          enqueueRemoteNotification(taskId, completionResult, 'completed', context.setAppState, task.toolUseId);
+          const richContent = tryExtractRichContent(task, accumulatedLog);
+          if (richContent) {
+            enqueueRichRemoteNotification(
+              taskId,
+              completionResult,
+              'completed',
+              richContent,
+              context.setAppState,
+              task.toolUseId,
+            );
+          } else {
+            enqueueRemoteNotification(taskId, completionResult, 'completed', context.setAppState, task.toolUseId);
+          }
           void evictTaskOutput(taskId);
           void removeRemoteAgentMetadata(taskId);
           runCompletionHook(taskId, task);
@@ -917,7 +1009,21 @@ function startRemoteSessionPolling(taskId: string, context: TaskContext): () =>
           return; // Stop polling
         }
 
-        enqueueRemoteNotification(taskId, task.title, finalStatus, context.setAppState, task.toolUseId);
+        // finalStatus is 'completed' | 'failed' on this path — kill is a
+        // separate code path (RemoteAgentTask.kill) and never reaches here.
+        const richContent = tryExtractRichContent(task, accumulatedLog);
+        if (richContent) {
+          enqueueRichRemoteNotification(
+            taskId,
+            task.title,
+            finalStatus,
+            richContent,
+            context.setAppState,
+            task.toolUseId,
+          );
+        } else {
+          enqueueRemoteNotification(taskId, task.title, finalStatus, context.setAppState, task.toolUseId);
+        }
         void evictTaskOutput(taskId);
         void removeRemoteAgentMetadata(taskId);
         runCompletionHook(taskId, task);

From ae7f3e22324b25d383f6dfded121af442367a68f Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 18 May 2026 15:00:58 +0000
Subject: [PATCH 4/6] =?UTF-8?q?fix(autofix-pr):=20extractBetween=20?=
 =?UTF-8?q?=E6=94=AF=E6=8C=81=20latest=20tag=20=E6=88=AA=E6=96=AD=E6=97=B6?=
 =?UTF-8?q?=E5=9B=9E=E6=BA=AF=E5=88=B0=E6=9B=B4=E6=97=A9=E5=AE=8C=E6=95=B4?=
 =?UTF-8?q?=E5=AF=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

如果远端 agent 重试时写了完整 <autofix-result> 后又开了一个被截断的
第二个 tag, 旧实现只看 lastIndexOf(open) 然后找不到 close 就返回 null,
导致前面那个完整结果被丢弃。改为从尾向首遍历所有 open tag, 返回第一个
能配对的 open/close 对。

附带:
- docs/features/remote-agent-completion-analysis.md: 9 处裸 fenced block
  补 language tag (text/http), 修复 markdownlint MD040 警告
- 同文件: 两处"三选项" → "三个选项" 符合中文量词习惯
---
 .../__tests__/extractAutofixResult.test.ts     | 10 ++++++++++
 .../autofix-pr/extractAutofixResult.ts         | 18 +++++++++++++-----
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/src/commands/autofix-pr/__tests__/extractAutofixResult.test.ts b/src/commands/autofix-pr/__tests__/extractAutofixResult.test.ts
index 475ea1ad5..8be9472aa 100644
--- a/src/commands/autofix-pr/__tests__/extractAutofixResult.test.ts
+++ b/src/commands/autofix-pr/__tests__/extractAutofixResult.test.ts
@@ -89,6 +89,16 @@ describe('extractAutofixResultFromLog', () => {
     expect(extractAutofixResultFromLog(log)).toBeNull()
   })
 
+  test('returns earlier complete tag when latest open tag is truncated within the same block', () => {
+    // Retry scenario: a full result was emitted, then a second result tag
+    // started but got cut off. We should surface the earlier complete pair
+    // rather than dropping the whole block.
+    const complete = sampleTag('earlier complete result')
+    const truncated = `<${AUTOFIX_RESULT_TAG}>\n<summary>truncated retry...`
+    const log = [assistantTextMessage(`${complete}\n${truncated}`)]
+    expect(extractAutofixResultFromLog(log)).toBe(complete)
+  })
+
   test('walks backwards so hook stdout from later in log wins over earlier assistant text', () => {
     const earlier = sampleTag('via assistant first')
     const later = sampleTag('via hook later')
diff --git a/src/commands/autofix-pr/extractAutofixResult.ts b/src/commands/autofix-pr/extractAutofixResult.ts
index 7fb8301e3..c1ec80e22 100644
--- a/src/commands/autofix-pr/extractAutofixResult.ts
+++ b/src/commands/autofix-pr/extractAutofixResult.ts
@@ -71,14 +71,22 @@ export function extractAutofixResultFromLog(log: SDKMessage[]): string | null {
   return null
 }
 
+// Walks open tags from latest to earliest, returning the first complete
+// open/close pair. Guards against a truncated final tag shadowing an
+// earlier complete pair within the same text block (e.g., a retry wrote a
+// full result, then the model started a second tag that got cut off).
 function extractBetween(
   text: string,
   open: string,
   close: string,
 ): string | null {
-  const start = text.lastIndexOf(open)
-  if (start === -1) return null
-  const end = text.indexOf(close, start + open.length)
-  if (end === -1) return null
-  return text.slice(start, end + close.length)
+  let searchFrom = text.length
+  while (searchFrom >= 0) {
+    const start = text.lastIndexOf(open, searchFrom)
+    if (start === -1) return null
+    const end = text.indexOf(close, start + open.length)
+    if (end !== -1) return text.slice(start, end + close.length)
+    searchFrom = start - 1
+  }
+  return null
 }

From fcdf1f22227ce3cd28e30a71a51316bab122025a Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 18 May 2026 15:25:00 +0000
Subject: [PATCH 5/6] =?UTF-8?q?test(autofix-pr):=20=E8=A1=A5=E9=BD=90=20co?=
 =?UTF-8?q?mpletionChecker=20/=20=E8=BE=B9=E7=95=8C=20CI=20=E6=A3=80?=
 =?UTF-8?q?=E6=9F=A5=E8=A6=86=E7=9B=96=E7=8E=87?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

针对 codecov patch coverage gap, 补足三块此前未走到的代码路径:

prOutcomeCheck.ts (原 96.92%, 2 lines missing):
- statusCheckRollup === undefined 路径 (与空数组分支不同, GitHub 在无
  checks 配置的 PR 上直接省略字段)
- COMPLETED 状态但 conclusion 为 null/空 的 in-flight 检查归为 pending

launchAutofixPr.ts (原 58.33%, 15 lines missing):
- registerCompletionChecker arrow body: metadata 缺失早返回 / 节流窗口内
  返回 null / completed=false 返回 null / completed=true 返回 summary /
  initialHeadSha 透传到 checkPrAutofixOutcome
- registerCompletionHook 的 if(meta) 短路两侧: 有 metadata 时清空节流条目,
  无 metadata 时仍释放 active monitor lock

所有新测试沿用现有 mock.module 与 registerXxxMock.mock.calls 拉取注册
回调的模式, 无新增依赖。prOutcomeCheck 11/11 本地通过。
---
 .../__tests__/launchAutofixPr.test.ts         | 141 ++++++++++++++++++
 .../__tests__/prOutcomeCheck.test.ts          |  35 +++++
 2 files changed, 176 insertions(+)

diff --git a/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts b/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts
index 73b183b78..83fb13792 100644
--- a/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts
+++ b/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts
@@ -529,6 +529,147 @@ describe('callAutofixPr · Phase 2 completionChecker integration', () => {
   })
 })
 
+// Phase 2 (cont.): exercise the registered completionChecker arrow body
+// directly. The earlier suite verifies it was registered but never invokes
+// the arrow itself, leaving the throttle / metadata-guard / gh-CLI dispatch
+// branches uncovered.
+describe('callAutofixPr · Phase 2 completionChecker arrow body', () => {
+  // Pull the most recent registered checker — beforeAll registers once at
+  // module load; nothing else re-registers across this file's tests.
+  function getChecker(): (
+    metadata?: unknown,
+  ) => Promise<string | null> {
+    const calls = registerCompletionCheckerMock.mock.calls.filter(
+      c => c[0] === 'autofix-pr',
+    )
+    const fn = calls[calls.length - 1]?.[1]
+    if (typeof fn !== 'function') {
+      throw new Error('completionChecker not registered')
+    }
+    return fn
+  }
+
+  test('returns null when metadata is undefined (early guard)', async () => {
+    const checker = getChecker()
+    expect(await checker(undefined)).toBeNull()
+  })
+
+  test('returns null when checkPrAutofixOutcome reports not completed', async () => {
+    const { checkPrAutofixOutcome } = await import('../prFetch.js')
+    ;(checkPrAutofixOutcome as ReturnType<typeof mock>).mockImplementationOnce(
+      () => Promise.resolve({ completed: false }),
+    )
+    const checker = getChecker()
+    // Distinct PR number to dodge the in-process throttle map carried over
+    // from earlier tests.
+    const result = await checker({
+      owner: 'acme',
+      repo: 'myrepo',
+      prNumber: 1001,
+    })
+    expect(result).toBeNull()
+  })
+
+  test('returns the summary string when checkPrAutofixOutcome reports completed', async () => {
+    const { checkPrAutofixOutcome } = await import('../prFetch.js')
+    ;(checkPrAutofixOutcome as ReturnType<typeof mock>).mockImplementationOnce(
+      () =>
+        Promise.resolve({
+          completed: true,
+          summary: 'acme/myrepo#1002 merged. Autofix monitoring complete.',
+        }),
+    )
+    const checker = getChecker()
+    const result = await checker({
+      owner: 'acme',
+      repo: 'myrepo',
+      prNumber: 1002,
+    })
+    expect(result).toBe(
+      'acme/myrepo#1002 merged. Autofix monitoring complete.',
+    )
+  })
+
+  test('passes initialHeadSha through to checkPrAutofixOutcome', async () => {
+    const { checkPrAutofixOutcome } = await import('../prFetch.js')
+    const checkMock = checkPrAutofixOutcome as ReturnType<typeof mock>
+    checkMock.mockClear()
+    checkMock.mockImplementationOnce(() => Promise.resolve({ completed: false }))
+    const checker = getChecker()
+    await checker({
+      owner: 'acme',
+      repo: 'myrepo',
+      prNumber: 1003,
+      initialHeadSha: 'sha-baseline-xyz',
+    })
+    expect(checkMock).toHaveBeenCalledWith({
+      owner: 'acme',
+      repo: 'myrepo',
+      prNumber: 1003,
+      initialHeadSha: 'sha-baseline-xyz',
+    })
+  })
+
+  test('throttles back-to-back calls for the same PR within CHECK_INTERVAL_MS', async () => {
+    const { checkPrAutofixOutcome } = await import('../prFetch.js')
+    const checkMock = checkPrAutofixOutcome as ReturnType<typeof mock>
+    checkMock.mockClear()
+    checkMock.mockImplementation(() => Promise.resolve({ completed: false }))
+    const checker = getChecker()
+    const meta = { owner: 'acme', repo: 'myrepo', prNumber: 1004 }
+    await checker(meta)
+    // Second call within the 5s throttle window must short-circuit to null
+    // without invoking the gh CLI layer again.
+    const callCountAfterFirst = checkMock.mock.calls.length
+    const result = await checker(meta)
+    expect(result).toBeNull()
+    expect(checkMock.mock.calls.length).toBe(callCountAfterFirst)
+  })
+
+  test('completionHook with metadata clears the throttle entry (re-launch can re-check immediately)', async () => {
+    const { checkPrAutofixOutcome } = await import('../prFetch.js')
+    const checkMock = checkPrAutofixOutcome as ReturnType<typeof mock>
+    checkMock.mockClear()
+    checkMock.mockImplementation(() => Promise.resolve({ completed: false }))
+    const checker = getChecker()
+    const meta = { owner: 'acme', repo: 'myrepo', prNumber: 1005 }
+    await checker(meta) // populate throttle map
+
+    // Invoke the registered completion hook with the same metadata so the
+    // throttle entry is wiped, then verify the next checker call dispatches
+    // gh CLI again instead of short-circuiting.
+    const hookCalls = registerCompletionHookMock.mock.calls.filter(
+      c => c[0] === 'autofix-pr',
+    )
+    const hook = hookCalls[hookCalls.length - 1]?.[1] as (
+      id: string,
+      metadata?: unknown,
+    ) => void
+    hook('any-task-id', meta)
+
+    const callCountBefore = checkMock.mock.calls.length
+    await checker(meta)
+    expect(checkMock.mock.calls.length).toBe(callCountBefore + 1)
+  })
+
+  test('completionHook without metadata still clears the active monitor lock', async () => {
+    // Lock is set via callAutofixPr; hook then invoked with undefined metadata
+    // to exercise the `if (meta)` short-circuit branch (the lock-clear half
+    // still has to run regardless of metadata presence).
+    await callAutofixPr(onDone, makeContext(), '42')
+    expect(getActiveMonitor()).not.toBeNull()
+    const hookCalls = registerCompletionHookMock.mock.calls.filter(
+      c => c[0] === 'autofix-pr',
+    )
+    const hook = hookCalls[hookCalls.length - 1]?.[1] as (
+      id: string,
+      metadata?: unknown,
+    ) => void
+    hook('framework-task-id', undefined)
+    expect(getActiveMonitor()).toBeNull()
+  })
+})
+
 // Phase 3: content extractor wiring + initialMessage tag instruction
 describe('callAutofixPr · Phase 3 content extractor integration', () => {
   test('registerContentExtractor is called at module load with autofix-pr type', () => {
diff --git a/src/commands/autofix-pr/__tests__/prOutcomeCheck.test.ts b/src/commands/autofix-pr/__tests__/prOutcomeCheck.test.ts
index 940c33423..32be7bade 100644
--- a/src/commands/autofix-pr/__tests__/prOutcomeCheck.test.ts
+++ b/src/commands/autofix-pr/__tests__/prOutcomeCheck.test.ts
@@ -133,6 +133,41 @@ describe('summariseAutofixOutcome · OPEN PR with push, CI variations', () => {
     }
   })
 
+  test('statusCheckRollup undefined → treated as no checks configured (success)', () => {
+    // Distinct from empty-array: GitHub omits the field entirely on PRs
+    // without any configured checks. The !rollup branch covers undefined.
+    const result = summariseAutofixOutcome(
+      basePayload({
+        state: 'OPEN',
+        headRefOid: 'sha-new',
+        statusCheckRollup: undefined,
+      }),
+      identity(),
+    )
+    expect(result.completed).toBe(true)
+    if (result.completed) {
+      expect(result.summary).toContain('CI green')
+    }
+  })
+
+  test('check with COMPLETED status but empty conclusion → counted as pending', () => {
+    // Edge case: GitHub sometimes reports a check as COMPLETED with a null/
+    // missing conclusion (in-flight result mid-write). The defensive branch
+    // treats empty conclusion after a passed status check as pending.
+    const result = summariseAutofixOutcome(
+      basePayload({
+        state: 'OPEN',
+        headRefOid: 'sha-new',
+        statusCheckRollup: [
+          { status: 'COMPLETED', conclusion: null, name: 'ci-in-flight' },
+          { status: 'COMPLETED', conclusion: 'SUCCESS', name: 'lint' },
+        ],
+      }),
+      identity(),
+    )
+    expect(result).toEqual({ completed: false })
+  })
+
   test('neutral / skipped conclusions count as success (not failure)', () => {
     const result = summariseAutofixOutcome(
       basePayload({

From 0e001a77d391b66ef814a757e019182f81d7f241 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 18 May 2026 15:26:33 +0000
Subject: [PATCH 6/6] =?UTF-8?q?style:=20biome=20check=20--fix=20=E6=95=B4?=
 =?UTF-8?q?=E5=BD=A2=20launchAutofixPr.test=20=E6=96=B0=E5=A2=9E=E6=AE=B5?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../autofix-pr/__tests__/launchAutofixPr.test.ts     | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts b/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts
index 83fb13792..c34a2c3a0 100644
--- a/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts
+++ b/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts
@@ -536,9 +536,7 @@ describe('callAutofixPr · Phase 2 completionChecker integration', () => {
 describe('callAutofixPr · Phase 2 completionChecker arrow body', () => {
   // Pull the most recent registered checker — beforeAll registers once at
   // module load; nothing else re-registers across this file's tests.
-  function getChecker(): (
-    metadata?: unknown,
-  ) => Promise<string | null> {
+  function getChecker(): (metadata?: unknown) => Promise<string | null> {
     const calls = registerCompletionCheckerMock.mock.calls.filter(
       c => c[0] === 'autofix-pr',
     )
@@ -585,16 +583,16 @@ describe('callAutofixPr · Phase 2 completionChecker arrow body', () => {
       repo: 'myrepo',
       prNumber: 1002,
     })
-    expect(result).toBe(
-      'acme/myrepo#1002 merged. Autofix monitoring complete.',
-    )
+    expect(result).toBe('acme/myrepo#1002 merged. Autofix monitoring complete.')
   })
 
   test('passes initialHeadSha through to checkPrAutofixOutcome', async () => {
     const { checkPrAutofixOutcome } = await import('../prFetch.js')
     const checkMock = checkPrAutofixOutcome as ReturnType<typeof mock>
     checkMock.mockClear()
-    checkMock.mockImplementationOnce(() => Promise.resolve({ completed: false }))
+    checkMock.mockImplementationOnce(() =>
+      Promise.resolve({ completed: false }),
+    )
     const checker = getChecker()
     await checker({
       owner: 'acme',