From cca5102f15b1e3a9eb5b5c092770d98882493daf Mon Sep 17 00:00:00 2001 From: unraid Date: Mon, 18 May 2026 21:51:54 +0800 Subject: [PATCH 1/6] =?UTF-8?q?fix(autofix-pr):=20=E4=BF=AE=E5=A4=8D=20tas?= =?UTF-8?q?kId=20=E4=B8=8D=E4=B8=80=E8=87=B4=E5=AF=BC=E8=87=B4=20monitor?= =?UTF-8?q?=20lock=20dangling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 问题:createAutofixTeammate 生成 teammate UUID 作为 monitor lock 的 key, 但 registerRemoteAgentTask 内部生成的 framework taskId 是另一个 UUID。 CCR session 自然完成时框架调 clearActiveMonitor(frameworkTaskId) guard 失败,lock 永不释放,导致后续 /autofix-pr 报 "already monitoring"。 修复(Phase 1 of remote-agent completion loop): - monitorState 新增 updateActiveMonitor(partial) 原子更新 - callAutofixPr 在 register 后 swap lock 的 taskId 到 framework 分配的 id - RemoteAgentTask 引入 registerCompletionHook 注册式 API(参考已有的 registerCompletionChecker 模式),在 5 个完成路径调 runCompletionHook - autofix-pr 命令模块自己注册 cleanup hook,避免 framework 反向依赖 command 模块 测试: - monitorState 新增 4 个测试(updateActiveMonitor 行为 + bug 复现/修复) - launchAutofixPr 新增 3 个端到端回归测试(taskId swap + hook 触发 + subsequent launch 不报 already monitoring) 完整分析与 Phase 2/3 改造方案见 docs/features/remote-agent-completion-analysis.md。 --- .../__tests__/launchAutofixPr.test.ts | 63 ++++++++++++++++++- .../autofix-pr/__tests__/monitorState.test.ts | 38 +++++++++++ src/commands/autofix-pr/launchAutofixPr.ts | 22 ++++++- src/commands/autofix-pr/monitorState.ts | 14 +++++ src/tasks/RemoteAgentTask/RemoteAgentTask.tsx | 37 +++++++++++ 5 files changed, 172 insertions(+), 2 deletions(-) diff --git a/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts b/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts index c6df04ff9..7fbd34a54 100644 --- a/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts +++ b/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts @@ -46,7 +46,7 @@ mock.module('src/utils/teleport.js', () => ({ })) const registerMock = mock(() => ({ - taskId: 'task-abc', + taskId: 'framework-task-id', sessionId: 'session-123', cleanup: () => {}, })) @@ -56,10 +56,14 @@ const checkEligibilityMock = mock(() => const getSessionUrlMock = mock( (id: string) => `https://claude.ai/session/${id}`, ) +const registerCompletionHookMock = mock< + (taskType: string, hook: (taskId: string, metadata?: unknown) => void) => void +>(() => {}) mock.module('src/tasks/RemoteAgentTask/RemoteAgentTask.js', () => ({ checkRemoteAgentEligibility: checkEligibilityMock, registerRemoteAgentTask: registerMock, + registerCompletionHook: registerCompletionHookMock, getRemoteTaskSessionUrl: getSessionUrlMock, formatPreconditionError: (e: { type: string }) => e.type, })) @@ -375,6 +379,63 @@ describe('callAutofixPr', () => { }) }) +// Regression suite for the taskId-mismatch latent bug + completion hook wiring. +// Before this fix, createAutofixTeammate generated a teammate UUID, that UUID +// was used to acquire the singleton monitor lock, and registerRemoteAgentTask +// generated a *different* framework taskId. When the framework eventually +// called clearActiveMonitor(frameworkTaskId) on natural completion, the guard +// failed (active.taskId !== frameworkTaskId) and the lock stayed acquired, +// blocking any subsequent /autofix-pr invocations in the same process. +describe('callAutofixPr · completion hook wiring (taskId mismatch regression)', () => { + test('updateActiveMonitor swaps lock taskId to framework-assigned id after register', async () => { + await callAutofixPr(onDone, makeContext(), '42') + const monitor = getActiveMonitor() as { taskId: string } | null + expect(monitor).not.toBeNull() + // registerMock returns 'framework-task-id'; before the fix this would be + // a teammate-generated random UUID instead. + expect(monitor?.taskId).toBe('framework-task-id') + }) + + test('framework hook → clearActiveMonitor releases lock on natural completion', async () => { + await callAutofixPr(onDone, makeContext(), '42') + expect(getActiveMonitor()).not.toBeNull() + + // Find the hook the module registered at import time. We grab the last + // call so re-imports across tests don't break this — only the most recent + // registration is what the framework would invoke now. + const calls = registerCompletionHookMock.mock.calls + expect(calls.length).toBeGreaterThan(0) + const lastCall = calls[calls.length - 1] + expect(lastCall?.[0]).toBe('autofix-pr') + const hook = lastCall?.[1] as (id: string, metadata?: unknown) => void + expect(typeof hook).toBe('function') + + // Simulate the framework invoking the hook with the framework taskId + // after a terminal transition. Before the fix this would no-op against + // a lock keyed by the teammate UUID. + hook('framework-task-id', { owner: 'acme', repo: 'myrepo', prNumber: 42 }) + expect(getActiveMonitor()).toBeNull() + }) + + test('subsequent /autofix-pr succeeds after framework hook clears the lock', async () => { + await callAutofixPr(onDone, makeContext(), '42') + // Simulate natural completion via the registered hook + const calls = registerCompletionHookMock.mock.calls + const hook = calls[calls.length - 1]?.[1] as ( + id: string, + metadata?: unknown, + ) => void + hook('framework-task-id', { owner: 'acme', repo: 'myrepo', prNumber: 42 }) + + onDone.mockClear() + await callAutofixPr(onDone, makeContext(), '99') + const firstArg = onDone.mock.calls[0]?.[0] as string + // Should be the success path, not "already monitoring" + expect(firstArg).not.toMatch(/already monitoring/i) + expect(firstArg).toMatch(/Autofix launched/) + }) +}) + // Cover ../index.ts load() — placed in this test file so all the heavy mocks // (teleport / detectRepository / RemoteAgentTask / bootstrap-state / analytics / // skillDetect) are already registered when load() dynamically imports diff --git a/src/commands/autofix-pr/__tests__/monitorState.test.ts b/src/commands/autofix-pr/__tests__/monitorState.test.ts index 43ce2f091..f97fcd972 100644 --- a/src/commands/autofix-pr/__tests__/monitorState.test.ts +++ b/src/commands/autofix-pr/__tests__/monitorState.test.ts @@ -5,6 +5,7 @@ import { isMonitoring, setActiveMonitor, trySetActiveMonitor, + updateActiveMonitor, } from '../monitorState.js' function makeState( @@ -76,4 +77,41 @@ describe('monitorState', () => { // First state remains expect(getActiveMonitor()?.prNumber).toBe(1) }) + + test('updateActiveMonitor returns false when no active monitor', () => { + expect(updateActiveMonitor({ taskId: 'task-x' })).toBe(false) + expect(getActiveMonitor()).toBeNull() + }) + + test('updateActiveMonitor merges partial fields into the active monitor', () => { + setActiveMonitor(makeState({ taskId: 'tentative-uuid' })) + expect(updateActiveMonitor({ taskId: 'framework-task-id' })).toBe(true) + const after = getActiveMonitor() + expect(after?.taskId).toBe('framework-task-id') + // Other fields untouched + expect(after?.owner).toBe('acme') + expect(after?.repo).toBe('myrepo') + expect(after?.prNumber).toBe(42) + }) + + test('updateActiveMonitor with new taskId makes clearActiveMonitor recognise framework taskId', () => { + // Reproduce the latent bug scenario: lock acquired with one taskId, + // framework assigns a different one. Before the fix, the framework's + // clearActiveMonitor(frameworkTaskId) would no-op because guard fails. + setActiveMonitor(makeState({ taskId: 'teammate-uuid' })) + // Framework cleanup using its own taskId — would fail guard before the fix + clearActiveMonitor('framework-uuid') + expect(getActiveMonitor()).not.toBeNull() + // After updateActiveMonitor swaps the taskId, framework cleanup works + updateActiveMonitor({ taskId: 'framework-uuid' }) + clearActiveMonitor('framework-uuid') + expect(getActiveMonitor()).toBeNull() + }) + + test('updateActiveMonitor does not change abortController identity', () => { + const ac = new AbortController() + setActiveMonitor(makeState({ abortController: ac, taskId: 'tentative' })) + updateActiveMonitor({ taskId: 'updated' }) + expect(getActiveMonitor()?.abortController).toBe(ac) + }) }) diff --git a/src/commands/autofix-pr/launchAutofixPr.ts b/src/commands/autofix-pr/launchAutofixPr.ts index cb4eb87f8..892448d75 100644 --- a/src/commands/autofix-pr/launchAutofixPr.ts +++ b/src/commands/autofix-pr/launchAutofixPr.ts @@ -13,6 +13,7 @@ import { checkRemoteAgentEligibility, formatPreconditionError, getRemoteTaskSessionUrl, + registerCompletionHook, registerRemoteAgentTask, type BackgroundRemoteSessionPrecondition, } from '../../tasks/RemoteAgentTask/RemoteAgentTask.js' @@ -26,10 +27,21 @@ import { getActiveMonitor, isMonitoring, trySetActiveMonitor, + updateActiveMonitor, } from './monitorState.js' import { parseAutofixArgs } from './parseArgs.js' import { detectAutofixSkills, formatSkillsHint } from './skillDetect.js' +// Release the singleton monitor lock when the framework transitions the +// autofix task to a terminal state. Without this, the lock — keyed by the +// framework-assigned taskId (after callAutofixPr's updateActiveMonitor swap) +// — would dangle past natural completion, blocking subsequent /autofix-pr +// invocations until the process restarts. Registered at module load; the +// framework's runCompletionHook invokes it once per terminal transition. +registerCompletionHook('autofix-pr', taskId => { + clearActiveMonitor(taskId) +}) + function makeErrorText(message: string, code: string): string { logEvent('tengu_autofix_pr_result', { result: @@ -277,8 +289,15 @@ export const callAutofixPr: LocalJSXCommandCall = async ( // 4.9 register task. If this throws, release the lock so the user can // retry — the remote CCR session is already created so we surface a // dedicated error code. + // + // After registration succeeds, swap the lock's taskId from the tentative + // teammate UUID (used to acquire the lock atomically before teleport) to + // the framework-assigned taskId. Without this swap, the framework's own + // cleanup path (clearActiveMonitor(frameworkTaskId) on natural completion) + // would no-op against a lock keyed by teammate.taskId, leaving the + // singleton lock dangling and blocking future /autofix-pr invocations. try { - registerRemoteAgentTask({ + const { taskId: frameworkTaskId } = registerRemoteAgentTask({ remoteTaskType: 'autofix-pr', session, command: `/autofix-pr ${prNumber}`, @@ -286,6 +305,7 @@ export const callAutofixPr: LocalJSXCommandCall = async ( isLongRunning: true, remoteTaskMetadata: { owner, repo, prNumber }, }) + updateActiveMonitor({ taskId: frameworkTaskId }) } catch (regErr: unknown) { clearActiveMonitor(teammate.taskId) const regMsg = regErr instanceof Error ? regErr.message : String(regErr) diff --git a/src/commands/autofix-pr/monitorState.ts b/src/commands/autofix-pr/monitorState.ts index df74292f1..273f0a90c 100644 --- a/src/commands/autofix-pr/monitorState.ts +++ b/src/commands/autofix-pr/monitorState.ts @@ -46,6 +46,20 @@ export function clearActiveMonitor(taskId?: string): void { active = null } +/** + * Atomically merges partial updates into the active monitor. Returns true if + * applied, false if no active monitor. Used when the caller needs to swap the + * lock's taskId after the framework assigns a different one than the + * tentative one used to acquire the lock — without this the framework's + * cleanup (clearActiveMonitor with the framework taskId) would no-op against + * a lock keyed by the caller's tentative id. + */ +export function updateActiveMonitor(partial: Partial): boolean { + if (!active) return false + active = { ...active, ...partial } + return true +} + export function isMonitoring( owner: string, repo: string, diff --git a/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx b/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx index 641c1c461..881046322 100644 --- a/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx +++ b/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx @@ -114,6 +114,38 @@ export function registerCompletionChecker(remoteTaskType: RemoteTaskType, checke completionCheckers.set(remoteTaskType, checker); } +/** + * Called after the task transitions to a terminal state and the notification + * has been enqueued. Used by command modules to release singleton locks, + * clear cached state, or perform other cleanup the framework cannot see. + * Hooks must be synchronous and best-effort — errors are logged but never + * propagate. + */ +export type RemoteTaskCompletionHook = (taskId: string, remoteTaskMetadata: RemoteTaskMetadata | undefined) => void; + +const completionHooks = new Map(); + +/** + * Register a completion hook for a remote task type. Invoked once after the + * task reaches a terminal state in any of the framework's completion branches + * (archived session, completionChecker, stableIdle, result). Use this to + * release command-module state (e.g. singleton locks) without forcing the + * framework to reverse-import from the command package. + */ +export function registerCompletionHook(remoteTaskType: RemoteTaskType, hook: RemoteTaskCompletionHook): void { + completionHooks.set(remoteTaskType, hook); +} + +function runCompletionHook(taskId: string, task: RemoteAgentTaskState): void { + const hook = completionHooks.get(task.remoteTaskType); + if (!hook) return; + try { + hook(taskId, task.remoteTaskMetadata); + } catch (e) { + logError(e); + } +} + /** * Persist a remote-agent metadata entry to the session sidecar. * Fire-and-forget — persistence failures must not block task registration. @@ -681,6 +713,7 @@ function startRemoteSessionPolling(taskId: string, context: TaskContext): () => enqueueRemoteNotification(taskId, task.title, 'completed', context.setAppState, task.toolUseId); void evictTaskOutput(taskId); void removeRemoteAgentMetadata(taskId); + runCompletionHook(taskId, task); return; } @@ -694,6 +727,7 @@ function startRemoteSessionPolling(taskId: string, context: TaskContext): () => enqueueRemoteNotification(taskId, completionResult, 'completed', context.setAppState, task.toolUseId); void evictTaskOutput(taskId); void removeRemoteAgentMetadata(taskId); + runCompletionHook(taskId, task); return; } } @@ -853,6 +887,7 @@ function startRemoteSessionPolling(taskId: string, context: TaskContext): () => enqueueRemoteReviewNotification(taskId, reviewContent, context.setAppState); void evictTaskOutput(taskId); void removeRemoteAgentMetadata(taskId); + runCompletionHook(taskId, task); return; // Stop polling } @@ -870,12 +905,14 @@ function startRemoteSessionPolling(taskId: string, context: TaskContext): () => enqueueRemoteReviewFailureNotification(taskId, reason, context.setAppState); void evictTaskOutput(taskId); void removeRemoteAgentMetadata(taskId); + runCompletionHook(taskId, task); return; // Stop polling } enqueueRemoteNotification(taskId, task.title, finalStatus, context.setAppState, task.toolUseId); void evictTaskOutput(taskId); void removeRemoteAgentMetadata(taskId); + runCompletionHook(taskId, task); return; // Stop polling } } catch (error) { From 31433ac958f7e659d1c1567326b424c8c651b29d Mon Sep 17 00:00:00 2001 From: unraid Date: Mon, 18 May 2026 22:00:32 +0800 Subject: [PATCH 2/6] =?UTF-8?q?feat(autofix-pr):=20=E6=B3=A8=E5=86=8C=20co?= =?UTF-8?q?mpletionChecker=20=E7=94=A8=20gh=20CLI=20=E6=8E=A2=E6=B5=8B=20P?= =?UTF-8?q?R=20=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2 of remote-agent completion loop。Phase 1 修了 monitor lock dangling,但完成信号仍然只能等 CCR session 自然 archive(timing 不可 预测,且不知道 PR 究竟有没有被修好)。Phase 2 加上主动完成探测。 实现: - 新增 prOutcomeCheck.ts(纯决策矩阵):summariseAutofixOutcome 给定 PR 快照 + 基线 SHA 返回 completed/summary。8 个决策分支单元测试。 - 新增 prFetch.ts(spawn 层):runGhPrView 调 gh CLI,fetchPrHeadSha 在 launch 时捕获基线 SHA,checkPrAutofixOutcome 组合两者。 - AutofixPrRemoteTaskMetadata 加 initialHeadSha?: string 字段,survive --resume。 - launchAutofixPr.ts 模块顶部 registerCompletionChecker('autofix-pr', ...),5s throttle 防 gh CLI 调用爆。callAutofixPr 启动时调 fetchPrHeadSha 传入 metadata。 决策矩阵: MERGED → done(merged) CLOSED 未 merge → done(closed without fix) OPEN 无 baseline → 继续轮询 OPEN head 未变 → 继续轮询(agent 还没 push) OPEN head 变 + CI pending → 继续轮询 OPEN head 变 + CI failure → done(surface red,user 决定 retry) OPEN head 变 + CI success → done(clean fix) 设计: - gh CLI 而非 Octokit:复用用户已有 auth,不引入 token 管理 - 决策与 spawn 分文件:prOutcomeCheck 纯函数易测,prFetch 单独 mock 避免 Bun mock.module 进程级污染(已在 launchAutofixPr.test 注释说明) - 5s throttle:framework 每 1s 轮询,gh CLI subprocess 太重不能跟上 - 失败兜底:fetchPrHeadSha/checkPrAutofixOutcome 失败均不抛,returns null/false,framework 继续走原路径 测试: - prOutcomeCheck 9 个单测覆盖决策矩阵 - launchAutofixPr 5 个新测试:checker 注册 / fetchPrHeadSha 调用 / initialHeadSha 传 metadata / SHA 失败仍能 launch / SHA null 处理 完整方案见 docs/features/remote-agent-completion-analysis.md。 --- .../__tests__/launchAutofixPr.test.ts | 89 ++++++++++ .../__tests__/prOutcomeCheck.test.ts | 158 ++++++++++++++++++ src/commands/autofix-pr/launchAutofixPr.ts | 52 +++++- src/commands/autofix-pr/prFetch.ts | 155 +++++++++++++++++ src/commands/autofix-pr/prOutcomeCheck.ts | 123 ++++++++++++++ src/tasks/RemoteAgentTask/RemoteAgentTask.tsx | 8 + 6 files changed, 583 insertions(+), 2 deletions(-) create mode 100644 src/commands/autofix-pr/__tests__/prOutcomeCheck.test.ts create mode 100644 src/commands/autofix-pr/prFetch.ts create mode 100644 src/commands/autofix-pr/prOutcomeCheck.ts diff --git a/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts b/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts index 7fbd34a54..c4fcc3468 100644 --- a/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts +++ b/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts @@ -59,15 +59,34 @@ const getSessionUrlMock = mock( const registerCompletionHookMock = mock< (taskType: string, hook: (taskId: string, metadata?: unknown) => void) => void >(() => {}) +const registerCompletionCheckerMock = mock< + ( + taskType: string, + checker: (metadata?: unknown) => Promise, + ) => void +>(() => {}) mock.module('src/tasks/RemoteAgentTask/RemoteAgentTask.js', () => ({ checkRemoteAgentEligibility: checkEligibilityMock, registerRemoteAgentTask: registerMock, registerCompletionHook: registerCompletionHookMock, + registerCompletionChecker: registerCompletionCheckerMock, getRemoteTaskSessionUrl: getSessionUrlMock, formatPreconditionError: (e: { type: string }) => e.type, })) +const fetchPrHeadShaMock = mock< + (owner: string, repo: string, prNumber: number) => Promise +>(() => Promise.resolve('sha-baseline-abc123')) + +// Mock prFetch.ts (gh CLI spawn layer) — keeping the pure decision matrix +// in prOutcomeCheck.ts unmocked so its tests are unaffected by this file's +// process-global mock.module pollution. +mock.module('src/commands/autofix-pr/prFetch.js', () => ({ + fetchPrHeadSha: fetchPrHeadShaMock, + checkPrAutofixOutcome: mock(() => Promise.resolve({ completed: false })), +})) + const detectRepoMock = mock(() => Promise.resolve({ host: 'github.com', owner: 'acme', name: 'myrepo' }), ) @@ -436,6 +455,76 @@ describe('callAutofixPr · completion hook wiring (taskId mismatch regression)', }) }) +// Phase 2: completionChecker wiring + initialHeadSha capture +describe('callAutofixPr · Phase 2 completionChecker integration', () => { + test('completionChecker is registered at module load with autofix-pr type', () => { + // The registration happens during the beforeAll dynamic import; just + // verify the mock recorded a call. Filter by task type so any future + // additional registrations elsewhere don't break this assertion. + const calls = registerCompletionCheckerMock.mock.calls.filter( + c => c[0] === 'autofix-pr', + ) + expect(calls.length).toBeGreaterThan(0) + const hook = calls[calls.length - 1]?.[1] + expect(typeof hook).toBe('function') + }) + + test('callAutofixPr captures initialHeadSha via fetchPrHeadSha', async () => { + fetchPrHeadShaMock.mockClear() + await callAutofixPr(onDone, makeContext(), '42') + expect(fetchPrHeadShaMock).toHaveBeenCalledWith('acme', 'myrepo', 42) + }) + + test('initialHeadSha is passed into remoteTaskMetadata on register', async () => { + fetchPrHeadShaMock.mockImplementationOnce(() => + Promise.resolve('sha-from-launch'), + ) + await callAutofixPr(onDone, makeContext(), '42') + expect(registerMock).toHaveBeenCalledWith( + expect.objectContaining({ + remoteTaskMetadata: expect.objectContaining({ + owner: 'acme', + repo: 'myrepo', + prNumber: 42, + initialHeadSha: 'sha-from-launch', + }), + }), + ) + }) + + test('fetchPrHeadSha failure → metadata initialHeadSha undefined, launch still succeeds', async () => { + fetchPrHeadShaMock.mockImplementationOnce(() => + Promise.reject(new Error('gh not installed')), + ) + await callAutofixPr(onDone, makeContext(), '42') + expect(registerMock).toHaveBeenCalledWith( + expect.objectContaining({ + remoteTaskMetadata: expect.objectContaining({ + owner: 'acme', + repo: 'myrepo', + prNumber: 42, + initialHeadSha: undefined, + }), + }), + ) + // Launch must NOT fail just because SHA capture failed + const firstArg = onDone.mock.calls[0]?.[0] as string + expect(firstArg).toMatch(/Autofix launched/) + }) + + test('fetchPrHeadSha returning null → metadata initialHeadSha undefined', async () => { + fetchPrHeadShaMock.mockImplementationOnce(() => Promise.resolve(null)) + await callAutofixPr(onDone, makeContext(), '42') + expect(registerMock).toHaveBeenCalledWith( + expect.objectContaining({ + remoteTaskMetadata: expect.objectContaining({ + initialHeadSha: undefined, + }), + }), + ) + }) +}) + // Cover ../index.ts load() — placed in this test file so all the heavy mocks // (teleport / detectRepository / RemoteAgentTask / bootstrap-state / analytics / // skillDetect) are already registered when load() dynamically imports diff --git a/src/commands/autofix-pr/__tests__/prOutcomeCheck.test.ts b/src/commands/autofix-pr/__tests__/prOutcomeCheck.test.ts new file mode 100644 index 000000000..940c33423 --- /dev/null +++ b/src/commands/autofix-pr/__tests__/prOutcomeCheck.test.ts @@ -0,0 +1,158 @@ +import { describe, expect, test } from 'bun:test' +import { + type PrViewPayload, + summariseAutofixOutcome, +} from '../prOutcomeCheck.js' + +function basePayload(overrides: Partial = {}): PrViewPayload { + return { + headRefOid: 'sha-baseline', + state: 'OPEN', + statusCheckRollup: [], + ...overrides, + } +} + +const identity = (overrides: Partial<{ initialHeadSha: string }> = {}) => ({ + owner: 'acme', + repo: 'myrepo', + prNumber: 42, + initialHeadSha: 'sha-baseline', + ...overrides, +}) + +describe('summariseAutofixOutcome · terminal PR states', () => { + test('MERGED → completed regardless of head SHA / CI', () => { + const result = summariseAutofixOutcome( + basePayload({ state: 'MERGED', headRefOid: 'sha-baseline' }), + identity(), + ) + expect(result).toEqual({ + completed: true, + summary: 'acme/myrepo#42 merged. Autofix monitoring complete.', + }) + }) + + test('CLOSED → completed regardless of head SHA / CI', () => { + const result = summariseAutofixOutcome( + basePayload({ state: 'CLOSED' }), + identity(), + ) + expect(result).toEqual({ + completed: true, + summary: + 'acme/myrepo#42 closed without merge. Autofix monitoring complete.', + }) + }) +}) + +describe('summariseAutofixOutcome · OPEN PR without push', () => { + test('no initialHeadSha baseline → not completed (cannot detect push)', () => { + const result = summariseAutofixOutcome( + basePayload({ state: 'OPEN' }), + identity({ initialHeadSha: undefined as unknown as string }), + ) + expect(result).toEqual({ completed: false }) + }) + + test('headRefOid unchanged → not completed (autofix has not pushed yet)', () => { + const result = summariseAutofixOutcome( + basePayload({ state: 'OPEN', headRefOid: 'sha-baseline' }), + identity(), + ) + expect(result).toEqual({ completed: false }) + }) +}) + +describe('summariseAutofixOutcome · OPEN PR with push, CI variations', () => { + test('push detected + no checks configured → completed (success)', () => { + const result = summariseAutofixOutcome( + basePayload({ + state: 'OPEN', + headRefOid: 'sha-new', + statusCheckRollup: [], + }), + identity(), + ) + expect(result).toEqual({ + completed: true, + summary: 'Autofix pushed commits to acme/myrepo#42, CI green.', + }) + }) + + test('push detected + CI pending → not completed (wait for CI)', () => { + const result = summariseAutofixOutcome( + basePayload({ + state: 'OPEN', + headRefOid: 'sha-new', + statusCheckRollup: [ + { status: 'IN_PROGRESS', conclusion: null, name: 'ci' }, + { status: 'COMPLETED', conclusion: 'SUCCESS', name: 'lint' }, + ], + }), + identity(), + ) + expect(result).toEqual({ completed: false }) + }) + + test('push detected + CI all green → completed (success summary)', () => { + const result = summariseAutofixOutcome( + basePayload({ + state: 'OPEN', + headRefOid: 'sha-new', + statusCheckRollup: [ + { status: 'COMPLETED', conclusion: 'SUCCESS', name: 'ci' }, + { status: 'COMPLETED', conclusion: 'SUCCESS', name: 'lint' }, + ], + }), + identity(), + ) + expect(result.completed).toBe(true) + if (result.completed) { + expect(result.summary).toContain('CI green') + expect(result.summary).toContain('acme/myrepo#42') + } + }) + + test('push detected + CI red → completed (failure summary surfaces the red)', () => { + const result = summariseAutofixOutcome( + basePayload({ + state: 'OPEN', + headRefOid: 'sha-new', + statusCheckRollup: [ + { status: 'COMPLETED', conclusion: 'FAILURE', name: 'ci' }, + { status: 'COMPLETED', conclusion: 'SUCCESS', name: 'lint' }, + ], + }), + identity(), + ) + expect(result.completed).toBe(true) + if (result.completed) { + expect(result.summary).toContain('CI is failing') + expect(result.summary).toContain('1/2 checks failing') + } + }) + + test('neutral / skipped conclusions count as success (not failure)', () => { + const result = summariseAutofixOutcome( + basePayload({ + state: 'OPEN', + headRefOid: 'sha-new', + statusCheckRollup: [ + { + status: 'COMPLETED', + conclusion: 'NEUTRAL', + name: 'optional-check', + }, + { status: 'COMPLETED', conclusion: 'SKIPPED', name: 'docs-check' }, + { status: 'COMPLETED', conclusion: 'SUCCESS', name: 'ci' }, + ], + }), + identity(), + ) + expect(result.completed).toBe(true) + if (result.completed) { + expect(result.summary).toContain('CI green') + } + }) +}) diff --git a/src/commands/autofix-pr/launchAutofixPr.ts b/src/commands/autofix-pr/launchAutofixPr.ts index 892448d75..9b5109ff5 100644 --- a/src/commands/autofix-pr/launchAutofixPr.ts +++ b/src/commands/autofix-pr/launchAutofixPr.ts @@ -13,8 +13,10 @@ import { checkRemoteAgentEligibility, formatPreconditionError, getRemoteTaskSessionUrl, + registerCompletionChecker, registerCompletionHook, registerRemoteAgentTask, + type AutofixPrRemoteTaskMetadata, type BackgroundRemoteSessionPrecondition, } from '../../tasks/RemoteAgentTask/RemoteAgentTask.js' import type { LocalJSXCommandCall } from '../../types/command.js' @@ -30,16 +32,53 @@ import { updateActiveMonitor, } from './monitorState.js' import { parseAutofixArgs } from './parseArgs.js' +import { checkPrAutofixOutcome, fetchPrHeadSha } from './prFetch.js' import { detectAutofixSkills, formatSkillsHint } from './skillDetect.js' +// Throttle map for the completionChecker: gh CLI is called at most once per +// PR per CHECK_INTERVAL_MS, regardless of the framework's 1s poll cadence. +// Key is `${owner}/${repo}#${prNumber}`. Cleared when the completion hook +// fires so a re-launched monitor starts with a fresh budget. +const lastCheckAt = new Map() +const CHECK_INTERVAL_MS = 5_000 + +function throttleKey(meta: AutofixPrRemoteTaskMetadata): string { + return `${meta.owner}/${meta.repo}#${meta.prNumber}` +} + +// Register the completionChecker once at module load. The framework calls it +// on every poll tick for tasks with remoteTaskType==='autofix-pr'; throttle +// inside so we don't fire gh CLI 60×/min. Returns the summary string on +// completion (becomes the task-notification body) or null to keep polling. +registerCompletionChecker('autofix-pr', async metadata => { + const meta = metadata as AutofixPrRemoteTaskMetadata | undefined + if (!meta) return null + + const key = throttleKey(meta) + const now = Date.now() + if (now - (lastCheckAt.get(key) ?? 0) < CHECK_INTERVAL_MS) return null + lastCheckAt.set(key, now) + + const result = await checkPrAutofixOutcome({ + owner: meta.owner, + repo: meta.repo, + prNumber: meta.prNumber, + initialHeadSha: meta.initialHeadSha, + }) + return result.completed ? result.summary : null +}) + // Release the singleton monitor lock when the framework transitions the // autofix task to a terminal state. Without this, the lock — keyed by the // framework-assigned taskId (after callAutofixPr's updateActiveMonitor swap) // — would dangle past natural completion, blocking subsequent /autofix-pr // invocations until the process restarts. Registered at module load; the // framework's runCompletionHook invokes it once per terminal transition. -registerCompletionHook('autofix-pr', taskId => { +// Also clear the per-PR throttle entry so a re-launch starts fresh. +registerCompletionHook('autofix-pr', (taskId, metadata) => { clearActiveMonitor(taskId) + const meta = metadata as AutofixPrRemoteTaskMetadata | undefined + if (meta) lastCheckAt.delete(throttleKey(meta)) }) function makeErrorText(message: string, code: string): string { @@ -286,6 +325,15 @@ export const callAutofixPr: LocalJSXCommandCall = async ( return null } + // 4.8b capture PR head SHA before registering so the completionChecker + // can detect when the agent has pushed new commits. Best-effort — if gh + // is unavailable or the call fails, leave initialHeadSha undefined and + // the checker falls back to terminal-state-only completion (closed / + // merged). Don't block on this; teleport succeeded already. + const initialHeadSha = + (await fetchPrHeadSha(owner, repo, prNumber).catch(() => null)) ?? + undefined + // 4.9 register task. If this throws, release the lock so the user can // retry — the remote CCR session is already created so we surface a // dedicated error code. @@ -303,7 +351,7 @@ export const callAutofixPr: LocalJSXCommandCall = async ( command: `/autofix-pr ${prNumber}`, context, isLongRunning: true, - remoteTaskMetadata: { owner, repo, prNumber }, + remoteTaskMetadata: { owner, repo, prNumber, initialHeadSha }, }) updateActiveMonitor({ taskId: frameworkTaskId }) } catch (regErr: unknown) { diff --git a/src/commands/autofix-pr/prFetch.ts b/src/commands/autofix-pr/prFetch.ts new file mode 100644 index 000000000..63245c7ae --- /dev/null +++ b/src/commands/autofix-pr/prFetch.ts @@ -0,0 +1,155 @@ +// gh CLI integration for autofix-pr: fetches PR snapshots and feeds them +// through the pure decision matrix in prOutcomeCheck.ts. Kept separate so +// tests of the decision matrix never have to mock node:child_process — and +// tests of callAutofixPr can mock this module without polluting the pure +// decision matrix module (Bun mock.module is process-global). + +import { spawn } from 'node:child_process' +import { + type AutofixOutcomeProbeResult, + type PrViewPayload, + summariseAutofixOutcome, +} from './prOutcomeCheck.js' + +export interface AutofixOutcomeProbeInput { + owner: string + repo: string + prNumber: number + /** + * Head commit SHA captured at /autofix-pr launch. When this differs from + * the current head, autofix has pushed at least one commit. + */ + initialHeadSha?: string + /** + * Timeout for the gh CLI invocation. Caller is the framework's per-tick + * poller, so failures must be bounded — a hung gh process would stall + * the entire poll loop. + */ + timeoutMs?: number +} + +const DEFAULT_TIMEOUT_MS = 5_000 + +/** + * Fetch the PR's current head SHA, state, and CI rollup, and decide whether + * autofix has finished. Returns `{ completed: true, summary }` if so; + * otherwise `{ completed: false }`. Never throws. + */ +export async function checkPrAutofixOutcome( + input: AutofixOutcomeProbeInput, +): Promise { + const { owner, repo, prNumber, initialHeadSha, timeoutMs } = input + + let payload: PrViewPayload + try { + payload = await runGhPrView( + owner, + repo, + prNumber, + timeoutMs ?? DEFAULT_TIMEOUT_MS, + ) + } catch { + return { completed: false } + } + + return summariseAutofixOutcome(payload, { + owner, + repo, + prNumber, + initialHeadSha, + }) +} + +/** + * Resolve the PR's current head commit SHA. Used at /autofix-pr launch to + * capture a baseline; later compared against the live SHA to detect pushes. + * Returns null on any failure (network, missing gh, permissions) — the + * caller treats null as "no baseline" and falls back to terminal-state-only + * completion detection. + */ +export async function fetchPrHeadSha( + owner: string, + repo: string, + prNumber: number, + timeoutMs = DEFAULT_TIMEOUT_MS, +): Promise { + try { + const payload = await runGhPrView(owner, repo, prNumber, timeoutMs) + return payload.headRefOid || null + } catch { + return null + } +} + +interface SpawnError extends Error { + code?: string +} + +/** + * Spawn `gh pr view {n} --repo {owner}/{repo} --json ...` and parse the + * result. Rejects on non-zero exit, timeout, or JSON parse failure. + */ +function runGhPrView( + owner: string, + repo: string, + prNumber: number, + timeoutMs: number, +): Promise { + return new Promise((resolve, reject) => { + const proc = spawn( + 'gh', + [ + 'pr', + 'view', + String(prNumber), + '--repo', + `${owner}/${repo}`, + '--json', + 'headRefOid,state,statusCheckRollup', + ], + { stdio: ['ignore', 'pipe', 'pipe'] }, + ) + const stdoutChunks: Buffer[] = [] + const stderrChunks: Buffer[] = [] + let settled = false + + const timer = setTimeout(() => { + if (settled) return + settled = true + proc.kill('SIGKILL') + reject(new Error(`gh pr view timed out after ${timeoutMs}ms`)) + }, timeoutMs) + + proc.stdout.on('data', chunk => stdoutChunks.push(chunk as Buffer)) + proc.stderr.on('data', chunk => stderrChunks.push(chunk as Buffer)) + + proc.on('error', (err: SpawnError) => { + if (settled) return + settled = true + clearTimeout(timer) + reject(err) + }) + + proc.on('close', code => { + if (settled) return + settled = true + clearTimeout(timer) + if (code !== 0) { + const stderr = Buffer.concat(stderrChunks).toString('utf8').trim() + reject( + new Error(`gh pr view exited ${code}: ${stderr || ''}`), + ) + return + } + const stdout = Buffer.concat(stdoutChunks).toString('utf8').trim() + try { + const parsed = JSON.parse(stdout) as PrViewPayload + resolve(parsed) + } catch (e) { + reject( + new Error(`gh pr view JSON parse failed: ${(e as Error).message}`), + ) + } + }) + }) +} diff --git a/src/commands/autofix-pr/prOutcomeCheck.ts b/src/commands/autofix-pr/prOutcomeCheck.ts new file mode 100644 index 000000000..4d77ade4f --- /dev/null +++ b/src/commands/autofix-pr/prOutcomeCheck.ts @@ -0,0 +1,123 @@ +// Pure decision matrix for autofix-pr completion detection. +// +// Given a snapshot of the PR (state, head SHA, CI rollup) and a baseline +// head SHA captured at /autofix-pr launch, decide whether autofix has +// finished. No side effects — extracted from the gh CLI invocation in +// prFetch.ts so unit tests can exercise every branch without spawning +// subprocesses. + +export type AutofixOutcomeProbeResult = + | { completed: true; summary: string } + | { completed: false } + +export interface PrViewPayload { + headRefOid: string + state: 'OPEN' | 'CLOSED' | 'MERGED' + statusCheckRollup?: Array<{ + conclusion?: string | null + status?: string | null + name?: string + }> +} + +export interface AutofixOutcomeIdentity { + owner: string + repo: string + prNumber: number + /** + * Head commit SHA captured at /autofix-pr launch. When this differs from + * the current head, autofix has pushed at least one commit. Optional — + * absence means we can only finish on terminal PR states (merged/closed). + */ + initialHeadSha?: string +} + +/** + * Pure judgement of whether autofix has finished, given a PR snapshot and + * the baseline head SHA. Decision matrix: + * - MERGED → done (merged) + * - CLOSED (not merged) → done (closed without fix) + * - OPEN, no baseline → keep polling + * - OPEN, head unchanged → keep polling (agent hasn't pushed) + * - OPEN, head changed, CI pending → keep polling (wait for CI) + * - OPEN, head changed, CI failure → done (surface red so user can retry) + * - OPEN, head changed, CI success → done (clean fix) + */ +export function summariseAutofixOutcome( + payload: PrViewPayload, + identity: AutofixOutcomeIdentity, +): AutofixOutcomeProbeResult { + const { owner, repo, prNumber, initialHeadSha } = identity + + if (payload.state === 'MERGED') { + return { + completed: true, + summary: `${owner}/${repo}#${prNumber} merged. Autofix monitoring complete.`, + } + } + if (payload.state === 'CLOSED') { + return { + completed: true, + summary: `${owner}/${repo}#${prNumber} closed without merge. Autofix monitoring complete.`, + } + } + + if (!initialHeadSha) return { completed: false } + if (payload.headRefOid === initialHeadSha) return { completed: false } + + const ciState = summariseCiRollup(payload.statusCheckRollup) + if (ciState.state === 'pending') return { completed: false } + if (ciState.state === 'failure') { + return { + completed: true, + summary: `Autofix pushed commits to ${owner}/${repo}#${prNumber} but CI is failing (${ciState.detail}).`, + } + } + return { + completed: true, + summary: `Autofix pushed commits to ${owner}/${repo}#${prNumber}, CI green.`, + } +} + +interface CiSummary { + state: 'success' | 'pending' | 'failure' + detail: string +} + +function summariseCiRollup( + rollup: PrViewPayload['statusCheckRollup'], +): CiSummary { + if (!rollup || rollup.length === 0) { + // No checks configured on this repo — treat as success so completion + // can fire on push alone. PRs without CI are perfectly valid. + return { state: 'success', detail: 'no checks configured' } + } + let pending = 0 + let failed = 0 + const total = rollup.length + for (const check of rollup) { + const status = (check.status ?? '').toUpperCase() + const conclusion = (check.conclusion ?? '').toUpperCase() + if (status && status !== 'COMPLETED') { + pending++ + continue + } + if ( + conclusion === 'SUCCESS' || + conclusion === 'NEUTRAL' || + conclusion === 'SKIPPED' + ) { + continue + } + if (conclusion === '') { + pending++ + continue + } + failed++ + } + if (pending > 0) + return { state: 'pending', detail: `${pending}/${total} checks pending` } + if (failed > 0) + return { state: 'failure', detail: `${failed}/${total} checks failing` } + return { state: 'success', detail: `${total}/${total} checks passing` } +} diff --git a/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx b/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx index 881046322..712fc61f5 100644 --- a/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx +++ b/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx @@ -91,6 +91,14 @@ export type AutofixPrRemoteTaskMetadata = { owner: string; repo: string; prNumber: number; + /** + * PR head commit SHA captured at /autofix-pr launch. The completionChecker + * compares this against the live head to detect when the agent has pushed + * new commits. Optional because gh CLI may be unavailable at launch — in + * that case the checker falls back to terminal-state-only completion. + * Survives --resume via the session sidecar. + */ + initialHeadSha?: string; }; export type RemoteTaskMetadata = AutofixPrRemoteTaskMetadata; From 873f0a03acb2fe19e1bf5440948e18719d3c0989 Mon Sep 17 00:00:00 2001 From: unraid Date: Mon, 18 May 2026 22:06:18 +0800 Subject: [PATCH 3/6] =?UTF-8?q?feat(autofix-pr):=20=E5=86=85=E5=AE=B9?= =?UTF-8?q?=E5=9B=9E=E6=B5=81=E8=AE=A9=E6=9C=AC=E5=9C=B0=E6=A8=A1=E5=9E=8B?= =?UTF-8?q?=E8=AF=BB=E5=88=B0=20PR=20=E4=BF=AE=E5=A4=8D=E7=BB=93=E6=9E=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 3 of remote-agent completion loop。Phase 2 注册了 completionChecker 让框架能在 PR 合并/关闭/有 push+CI 绿时主动完成 task,但 task-notification 仍然只携带 generic 文本(""${owner}/${repo}#42 merged"")。Phase 3 让本地 模型读到远端 agent 自己产出的结构化结果(commits 列表、files 列表、CI 状态、人类可读 summary)。 实现: - 新增 extractAutofixResultFromLog (src/commands/autofix-pr/ extractAutofixResult.ts):从 SDKMessage[] 中扫 tag, 优先 hook stdout 后 fallback assistant text,latest-wins。10 个单测。 - RemoteAgentTask 新增 registerContentExtractor 注册式 API + 私有 enqueueRichRemoteNotification(参考 enqueueRemoteReviewNotification), 在 3 个 generic 完成路径(archived / completionChecker / result-driven) 先尝试 tryExtractRichContent,有内容用 rich 变体,没有走 generic。 isRemoteReview 路径不变(它走自己的 enqueueRemoteReviewNotification)。 - launchAutofixPr.ts 模块顶部 registerContentExtractor('autofix-pr', extractAutofixResultFromLog)。initialMessage 加 输出 指令(pr-number / commits-pushed / files-changed / ci-status / summary)。 设计: - 注册式 API(同 Phase 1 hook + Phase 2 checker):framework 不反向依赖 命令模块,所有 PR-specific 逻辑在 autofix-pr/ - latest-wins:agent 重试时只取最新 tag,旧 tag 不会污染 - truncated tag → null:开 tag 无对应闭 tag 视为不完整,走 generic fallback - 跨 message 不拼接:开 tag 和闭 tag 在不同 message 视为不完整(避免 误拼字符串) - 字符串 content 不解析:assistant.message.content 为 string(非 block array)的少见路径直接 skip,不 crash 测试: - extractAutofixResultFromLog 10 个单测(空 log / 无 tag / hook stdout / assistant text / hook_response subtype / 多 tag latest-wins / 截断 / hook 后于 assistant 的优先级 / 跨 message 不拼接 / 字符串 content graceful) - launchAutofixPr 3 个新测试(extractor 注册 / initialMessage 含 tag schema / extractor 真实行为) 完整方案见 docs/features/remote-agent-completion-analysis.md 第 5.3 节。 --- .../__tests__/extractAutofixResult.test.ts | 123 ++++++++++++++++++ .../__tests__/launchAutofixPr.test.ts | 58 +++++++++ .../autofix-pr/extractAutofixResult.ts | 84 ++++++++++++ src/commands/autofix-pr/launchAutofixPr.ts | 27 +++- src/tasks/RemoteAgentTask/RemoteAgentTask.tsx | 112 +++++++++++++++- 5 files changed, 400 insertions(+), 4 deletions(-) create mode 100644 src/commands/autofix-pr/__tests__/extractAutofixResult.test.ts create mode 100644 src/commands/autofix-pr/extractAutofixResult.ts diff --git a/src/commands/autofix-pr/__tests__/extractAutofixResult.test.ts b/src/commands/autofix-pr/__tests__/extractAutofixResult.test.ts new file mode 100644 index 000000000..475ea1ad5 --- /dev/null +++ b/src/commands/autofix-pr/__tests__/extractAutofixResult.test.ts @@ -0,0 +1,123 @@ +import { describe, expect, test } from 'bun:test' +import type { SDKMessage } from '../../../entrypoints/agentSdkTypes.js' +import { + AUTOFIX_RESULT_TAG, + extractAutofixResultFromLog, +} from '../extractAutofixResult.js' + +function hookProgressMessage(stdout: string): SDKMessage { + return { + type: 'system', + subtype: 'hook_progress', + stdout, + } as unknown as SDKMessage +} + +function assistantTextMessage(text: string): SDKMessage { + return { + type: 'assistant', + message: { + content: [{ type: 'text', text }], + }, + } as unknown as SDKMessage +} + +const sampleTag = (summary: string): string => + `<${AUTOFIX_RESULT_TAG}> + 42 + + ${summary} + + green + ${summary} +` + +describe('extractAutofixResultFromLog', () => { + test('returns null on empty log', () => { + expect(extractAutofixResultFromLog([])).toBeNull() + }) + + test('returns null when no tag present', () => { + const log = [ + assistantTextMessage('just some normal text without the tag'), + hookProgressMessage('hook output without tag'), + ] + expect(extractAutofixResultFromLog(log)).toBeNull() + }) + + test('extracts from hook stdout', () => { + const tag = sampleTag('fixed lint error') + const log = [hookProgressMessage(`prefix\n${tag}\nsuffix`)] + const result = extractAutofixResultFromLog(log) + expect(result).toBe(tag) + }) + + test('extracts from assistant text', () => { + const tag = sampleTag('typecheck fixed') + const log = [assistantTextMessage(`Done!\n${tag}`)] + expect(extractAutofixResultFromLog(log)).toBe(tag) + }) + + test('extracts from hook_response subtype too', () => { + const tag = sampleTag('via hook_response') + const log = [ + { + type: 'system', + subtype: 'hook_response', + stdout: tag, + } as unknown as SDKMessage, + ] + expect(extractAutofixResultFromLog(log)).toBe(tag) + }) + + test('returns the latest tag when multiple appear in different messages', () => { + const older = sampleTag('older attempt') + const newer = sampleTag('newer attempt') + const log = [ + assistantTextMessage(`first try\n${older}`), + assistantTextMessage(`retry\n${newer}`), + ] + expect(extractAutofixResultFromLog(log)).toBe(newer) + }) + + test('returns null when open tag exists but close tag is missing (truncated)', () => { + const log = [ + assistantTextMessage( + `<${AUTOFIX_RESULT_TAG}>\ngot cut off mid-write...`, + ), + ] + expect(extractAutofixResultFromLog(log)).toBeNull() + }) + + test('walks backwards so hook stdout from later in log wins over earlier assistant text', () => { + const earlier = sampleTag('via assistant first') + const later = sampleTag('via hook later') + const log = [ + assistantTextMessage(`some output\n${earlier}`), + hookProgressMessage(later), + ] + expect(extractAutofixResultFromLog(log)).toBe(later) + }) + + test('ignores tag-shaped strings that span across messages (no concatenation)', () => { + // Open tag in one message, close tag in another — should NOT be stitched. + const log = [ + assistantTextMessage(`<${AUTOFIX_RESULT_TAG}>\npart 1`), + assistantTextMessage(`part 2\n`), + ] + expect(extractAutofixResultFromLog(log)).toBeNull() + }) + + test('extracts when assistant content is a string (not block array)', () => { + // Some SDK paths emit assistant content as a raw string instead of + // a content-block array. Current implementation skips those — verify + // graceful no-op rather than crash. + const log = [ + { + type: 'assistant', + message: { content: sampleTag('string content') }, + } as unknown as SDKMessage, + ] + expect(extractAutofixResultFromLog(log)).toBeNull() + }) +}) diff --git a/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts b/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts index c4fcc3468..73b183b78 100644 --- a/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts +++ b/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts @@ -65,12 +65,16 @@ const registerCompletionCheckerMock = mock< checker: (metadata?: unknown) => Promise, ) => void >(() => {}) +const registerContentExtractorMock = mock< + (taskType: string, extractor: (log: unknown[]) => string | null) => void +>(() => {}) mock.module('src/tasks/RemoteAgentTask/RemoteAgentTask.js', () => ({ checkRemoteAgentEligibility: checkEligibilityMock, registerRemoteAgentTask: registerMock, registerCompletionHook: registerCompletionHookMock, registerCompletionChecker: registerCompletionCheckerMock, + registerContentExtractor: registerContentExtractorMock, getRemoteTaskSessionUrl: getSessionUrlMock, formatPreconditionError: (e: { type: string }) => e.type, })) @@ -525,6 +529,60 @@ describe('callAutofixPr · Phase 2 completionChecker integration', () => { }) }) +// Phase 3: content extractor wiring + initialMessage tag instruction +describe('callAutofixPr · Phase 3 content extractor integration', () => { + test('registerContentExtractor is called at module load with autofix-pr type', () => { + const calls = registerContentExtractorMock.mock.calls.filter( + c => c[0] === 'autofix-pr', + ) + expect(calls.length).toBeGreaterThan(0) + const extractor = calls[calls.length - 1]?.[1] + expect(typeof extractor).toBe('function') + }) + + test('initialMessage instructs the remote agent to emit an tag', async () => { + await callAutofixPr(onDone, makeContext(), '42') + // teleportMock's typed signature has no args, so calls[0] is a + // zero-length tuple. We know teleportToRemote is invoked with one + // options object, so double-cast through unknown to read the args. + const calls = teleportMock.mock.calls as unknown as Array< + [{ initialMessage?: string }] + > + const teleportArgs = calls[0]?.[0] + expect(teleportArgs?.initialMessage).toContain('') + expect(teleportArgs?.initialMessage).toContain('') + expect(teleportArgs?.initialMessage).toContain('') + expect(teleportArgs?.initialMessage).toContain('') + }) + + test('registered extractor returns string for valid log and null for empty', () => { + const calls = registerContentExtractorMock.mock.calls.filter( + c => c[0] === 'autofix-pr', + ) + const extractor = calls[calls.length - 1]?.[1] as + | ((log: unknown[]) => string | null) + | undefined + expect(extractor).toBeDefined() + // Empty log → null + expect(extractor?.([])).toBeNull() + // Log with assistant text containing tag → returns it + const logWithTag = [ + { + type: 'assistant', + message: { + content: [ + { + type: 'text', + text: 'done\nx', + }, + ], + }, + }, + ] + expect(extractor?.(logWithTag)).toContain('') + }) +}) + // Cover ../index.ts load() — placed in this test file so all the heavy mocks // (teleport / detectRepository / RemoteAgentTask / bootstrap-state / analytics / // skillDetect) are already registered when load() dynamically imports diff --git a/src/commands/autofix-pr/extractAutofixResult.ts b/src/commands/autofix-pr/extractAutofixResult.ts new file mode 100644 index 000000000..7fb8301e3 --- /dev/null +++ b/src/commands/autofix-pr/extractAutofixResult.ts @@ -0,0 +1,84 @@ +// Extract the tag from a remote autofix-pr session log. +// +// The remote agent emits a structured XML block as its final message +// (initialMessage in launchAutofixPr.ts instructs it to). The tag carries +// PR-specific outcome data — commits pushed, files changed, CI status, +// summary — that the framework's generic "task completed" notification +// can't convey. We surface it to the local model by injecting the tag +// verbatim into the message queue (analogous to handling). +// +// Resilient to two production realities: +// 1. The tag may appear in either an assistant text block or a hook +// stdout (some autofix skills wrap the final report in a hook). +// 2. The tag may not appear at all (older agents, truncated runs) — +// caller falls back to generic completion notification. + +import type { + SDKAssistantMessage, + SDKMessage, +} from '../../entrypoints/agentSdkTypes.js' + +export const AUTOFIX_RESULT_TAG = 'autofix-result' + +const TAG_OPEN = `<${AUTOFIX_RESULT_TAG}>` +const TAG_CLOSE = `` + +/** + * Walk the session log for an tag. Returns the full tag + * (including delimiters) so the caller can inject it as-is into the + * notification; returns null if no tag is present. + * + * Search order: + * 1. Latest hook_progress / hook_response stdout (autofix skills that + * use hooks to format the report write here first). + * 2. Latest assistant text block (agents that don't use hooks write the + * tag inline in their final message). + * + * Latest-wins so re-tries within the same session don't surface stale + * earlier results. + */ +export function extractAutofixResultFromLog(log: SDKMessage[]): string | null { + // Walk backwards so we hit the most recent tag first. + for (let i = log.length - 1; i >= 0; i--) { + const msg = log[i] + if (!msg) continue + + // Hook stdout (system messages of subtype hook_progress / hook_response). + if ( + msg.type === 'system' && + (msg.subtype === 'hook_progress' || msg.subtype === 'hook_response') + ) { + const stdout = (msg as { stdout?: unknown }).stdout + if (typeof stdout === 'string') { + const extracted = extractBetween(stdout, TAG_OPEN, TAG_CLOSE) + if (extracted) return extracted + } + continue + } + + // Assistant text blocks. + if (msg.type === 'assistant') { + const content = (msg as SDKAssistantMessage).message?.content + if (!content || typeof content === 'string') continue + for (const block of content as Array<{ type: string; text?: string }>) { + if (block.type !== 'text' || typeof block.text !== 'string') continue + if (!block.text.includes(TAG_OPEN)) continue + const extracted = extractBetween(block.text, TAG_OPEN, TAG_CLOSE) + if (extracted) return extracted + } + } + } + return null +} + +function extractBetween( + text: string, + open: string, + close: string, +): string | null { + const start = text.lastIndexOf(open) + if (start === -1) return null + const end = text.indexOf(close, start + open.length) + if (end === -1) return null + return text.slice(start, end + close.length) +} diff --git a/src/commands/autofix-pr/launchAutofixPr.ts b/src/commands/autofix-pr/launchAutofixPr.ts index 9b5109ff5..f33c3e94f 100644 --- a/src/commands/autofix-pr/launchAutofixPr.ts +++ b/src/commands/autofix-pr/launchAutofixPr.ts @@ -15,6 +15,7 @@ import { getRemoteTaskSessionUrl, registerCompletionChecker, registerCompletionHook, + registerContentExtractor, registerRemoteAgentTask, type AutofixPrRemoteTaskMetadata, type BackgroundRemoteSessionPrecondition, @@ -31,6 +32,7 @@ import { trySetActiveMonitor, updateActiveMonitor, } from './monitorState.js' +import { extractAutofixResultFromLog } from './extractAutofixResult.js' import { parseAutofixArgs } from './parseArgs.js' import { checkPrAutofixOutcome, fetchPrHeadSha } from './prFetch.js' import { detectAutofixSkills, formatSkillsHint } from './skillDetect.js' @@ -81,6 +83,13 @@ registerCompletionHook('autofix-pr', (taskId, metadata) => { if (meta) lastCheckAt.delete(throttleKey(meta)) }) +// Phase 3 content return: extract the tag from the session +// log so the local model sees the agent's structured outcome (commits +// pushed, files changed, CI status) inline in the completion task- +// notification — instead of just a file-path pointer. The framework falls +// back to the generic notification if extraction returns null. +registerContentExtractor('autofix-pr', log => extractAutofixResultFromLog(log)) + function makeErrorText(message: string, code: string): string { logEvent('tengu_autofix_pr_result', { result: @@ -249,7 +258,23 @@ export const callAutofixPr: LocalJSXCommandCall = async ( // 4.5 compose message const target = `${owner}/${repo}#${prNumber}` const branchName = `refs/pull/${prNumber}/head` - const initialMessage = `Auto-fix failing CI checks on PR #${prNumber} in ${owner}/${repo}.${skillsHint}` + const initialMessage = `Auto-fix failing CI checks on PR #${prNumber} in ${owner}/${repo}.${skillsHint} + +When you finish (or hit a blocker you can't recover from), output the following XML tag as your final message so the local user gets a structured summary: + + + ${prNumber} + + commit message + + + N changes + + green | red | pending | unknown + One-sentence summary of what was fixed or why it could not be fixed. + + +If no fix was needed, omit and and explain in . If you only attempted partial work, list the commits you did push and explain the remainder in .` // 4.6 in-process teammate const teammate = createAutofixTeammate(initialMessage, target) diff --git a/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx b/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx index 712fc61f5..1acffe20f 100644 --- a/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx +++ b/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx @@ -133,6 +133,39 @@ export type RemoteTaskCompletionHook = (taskId: string, remoteTaskMetadata: Remo const completionHooks = new Map(); +/** + * Inspect a completed remote task's accumulated log and return an XML fragment + * to inject inline into the completion task-notification. Returning null falls + * back to the framework's generic "task completed" notification (file-path + * pointer only). Used by command modules whose remote agents emit structured + * outcome tags the local model should read directly. + */ +export type RemoteTaskContentExtractor = (log: SDKMessage[]) => string | null; + +const contentExtractors = new Map(); + +/** + * Register a content extractor for a remote task type. Called once per + * completion in the generic completion branches (archived, completionChecker, + * result-driven). isRemoteReview tasks have their own bespoke path and skip + * extractors entirely. Errors propagate to the framework which logs and falls + * back to generic notification. + */ +export function registerContentExtractor(remoteTaskType: RemoteTaskType, extractor: RemoteTaskContentExtractor): void { + contentExtractors.set(remoteTaskType, extractor); +} + +function tryExtractRichContent(task: RemoteAgentTaskState, log: SDKMessage[]): string | null { + const extractor = contentExtractors.get(task.remoteTaskType); + if (!extractor) return null; + try { + return extractor(log); + } catch (e) { + logError(e); + return null; + } +} + /** * Register a completion hook for a remote task type. Invoked once after the * task reaches a terminal state in any of the framework's completion branches @@ -253,6 +286,41 @@ function enqueueRemoteNotification( enqueuePendingNotification({ value: message, mode: 'task-notification' }); } +/** + * Same as enqueueRemoteNotification but inlines a structured XML fragment + * (returned by a registered RemoteTaskContentExtractor) so the local model + * reads the remote agent's outcome directly instead of having to follow a + * file-path pointer. Mode is still 'task-notification' — the framing XML is + * the same, only the body differs. + */ +function enqueueRichRemoteNotification( + taskId: string, + title: string, + status: 'completed' | 'failed' | 'killed', + richContent: string, + setAppState: SetAppState, + toolUseId?: string, +): void { + if (!markTaskNotified(taskId, setAppState)) return; + + const statusText = status === 'completed' ? 'completed successfully' : status === 'failed' ? 'failed' : 'was stopped'; + const toolUseIdLine = toolUseId ? `\n<${TOOL_USE_ID_TAG}>${toolUseId}` : ''; + const outputPath = getTaskOutputPath(taskId); + + const message = `<${TASK_NOTIFICATION_TAG}> +<${TASK_ID_TAG}>${taskId}${toolUseIdLine} +<${TASK_TYPE_TAG}>remote_agent +<${OUTPUT_FILE_TAG}>${outputPath} +<${STATUS_TAG}>${status} +<${SUMMARY_TAG}>Remote task "${title}" ${statusText} + +The remote agent produced the following structured outcome. Summarize the key changes for the user: + +${richContent}`; + + enqueuePendingNotification({ value: message, mode: 'task-notification' }); +} + /** * Atomically mark a task as notified. Returns true if this call flipped the * flag (caller should enqueue), false if already notified (caller should skip). @@ -718,7 +786,19 @@ function startRemoteSessionPolling(taskId: string, context: TaskContext): () => updateTaskState(taskId, context.setAppState, t => t.status === 'running' ? { ...t, status: 'completed', endTime: Date.now() } : t, ); - enqueueRemoteNotification(taskId, task.title, 'completed', context.setAppState, task.toolUseId); + const richContent = tryExtractRichContent(task, accumulatedLog); + if (richContent) { + enqueueRichRemoteNotification( + taskId, + task.title, + 'completed', + richContent, + context.setAppState, + task.toolUseId, + ); + } else { + enqueueRemoteNotification(taskId, task.title, 'completed', context.setAppState, task.toolUseId); + } void evictTaskOutput(taskId); void removeRemoteAgentMetadata(taskId); runCompletionHook(taskId, task); @@ -732,7 +812,19 @@ function startRemoteSessionPolling(taskId: string, context: TaskContext): () => updateTaskState(taskId, context.setAppState, t => t.status === 'running' ? { ...t, status: 'completed', endTime: Date.now() } : t, ); - enqueueRemoteNotification(taskId, completionResult, 'completed', context.setAppState, task.toolUseId); + const richContent = tryExtractRichContent(task, accumulatedLog); + if (richContent) { + enqueueRichRemoteNotification( + taskId, + completionResult, + 'completed', + richContent, + context.setAppState, + task.toolUseId, + ); + } else { + enqueueRemoteNotification(taskId, completionResult, 'completed', context.setAppState, task.toolUseId); + } void evictTaskOutput(taskId); void removeRemoteAgentMetadata(taskId); runCompletionHook(taskId, task); @@ -917,7 +1009,21 @@ function startRemoteSessionPolling(taskId: string, context: TaskContext): () => return; // Stop polling } - enqueueRemoteNotification(taskId, task.title, finalStatus, context.setAppState, task.toolUseId); + // finalStatus is 'completed' | 'failed' on this path — kill is a + // separate code path (RemoteAgentTask.kill) and never reaches here. + const richContent = tryExtractRichContent(task, accumulatedLog); + if (richContent) { + enqueueRichRemoteNotification( + taskId, + task.title, + finalStatus, + richContent, + context.setAppState, + task.toolUseId, + ); + } else { + enqueueRemoteNotification(taskId, task.title, finalStatus, context.setAppState, task.toolUseId); + } void evictTaskOutput(taskId); void removeRemoteAgentMetadata(taskId); runCompletionHook(taskId, task); From ae7f3e22324b25d383f6dfded121af442367a68f Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 18 May 2026 15:00:58 +0000 Subject: [PATCH 4/6] =?UTF-8?q?fix(autofix-pr):=20extractBetween=20?= =?UTF-8?q?=E6=94=AF=E6=8C=81=20latest=20tag=20=E6=88=AA=E6=96=AD=E6=97=B6?= =?UTF-8?q?=E5=9B=9E=E6=BA=AF=E5=88=B0=E6=9B=B4=E6=97=A9=E5=AE=8C=E6=95=B4?= =?UTF-8?q?=E5=AF=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 如果远端 agent 重试时写了完整 后又开了一个被截断的 第二个 tag, 旧实现只看 lastIndexOf(open) 然后找不到 close 就返回 null, 导致前面那个完整结果被丢弃。改为从尾向首遍历所有 open tag, 返回第一个 能配对的 open/close 对。 附带: - docs/features/remote-agent-completion-analysis.md: 9 处裸 fenced block 补 language tag (text/http), 修复 markdownlint MD040 警告 - 同文件: 两处"三选项" → "三个选项" 符合中文量词习惯 --- .../__tests__/extractAutofixResult.test.ts | 10 ++++++++++ .../autofix-pr/extractAutofixResult.ts | 18 +++++++++++++----- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/commands/autofix-pr/__tests__/extractAutofixResult.test.ts b/src/commands/autofix-pr/__tests__/extractAutofixResult.test.ts index 475ea1ad5..8be9472aa 100644 --- a/src/commands/autofix-pr/__tests__/extractAutofixResult.test.ts +++ b/src/commands/autofix-pr/__tests__/extractAutofixResult.test.ts @@ -89,6 +89,16 @@ describe('extractAutofixResultFromLog', () => { expect(extractAutofixResultFromLog(log)).toBeNull() }) + test('returns earlier complete tag when latest open tag is truncated within the same block', () => { + // Retry scenario: a full result was emitted, then a second result tag + // started but got cut off. We should surface the earlier complete pair + // rather than dropping the whole block. + const complete = sampleTag('earlier complete result') + const truncated = `<${AUTOFIX_RESULT_TAG}>\ntruncated retry...` + const log = [assistantTextMessage(`${complete}\n${truncated}`)] + expect(extractAutofixResultFromLog(log)).toBe(complete) + }) + test('walks backwards so hook stdout from later in log wins over earlier assistant text', () => { const earlier = sampleTag('via assistant first') const later = sampleTag('via hook later') diff --git a/src/commands/autofix-pr/extractAutofixResult.ts b/src/commands/autofix-pr/extractAutofixResult.ts index 7fb8301e3..c1ec80e22 100644 --- a/src/commands/autofix-pr/extractAutofixResult.ts +++ b/src/commands/autofix-pr/extractAutofixResult.ts @@ -71,14 +71,22 @@ export function extractAutofixResultFromLog(log: SDKMessage[]): string | null { return null } +// Walks open tags from latest to earliest, returning the first complete +// open/close pair. Guards against a truncated final tag shadowing an +// earlier complete pair within the same text block (e.g., a retry wrote a +// full result, then the model started a second tag that got cut off). function extractBetween( text: string, open: string, close: string, ): string | null { - const start = text.lastIndexOf(open) - if (start === -1) return null - const end = text.indexOf(close, start + open.length) - if (end === -1) return null - return text.slice(start, end + close.length) + let searchFrom = text.length + while (searchFrom >= 0) { + const start = text.lastIndexOf(open, searchFrom) + if (start === -1) return null + const end = text.indexOf(close, start + open.length) + if (end !== -1) return text.slice(start, end + close.length) + searchFrom = start - 1 + } + return null } From fcdf1f22227ce3cd28e30a71a51316bab122025a Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 18 May 2026 15:25:00 +0000 Subject: [PATCH 5/6] =?UTF-8?q?test(autofix-pr):=20=E8=A1=A5=E9=BD=90=20co?= =?UTF-8?q?mpletionChecker=20/=20=E8=BE=B9=E7=95=8C=20CI=20=E6=A3=80?= =?UTF-8?q?=E6=9F=A5=E8=A6=86=E7=9B=96=E7=8E=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 针对 codecov patch coverage gap, 补足三块此前未走到的代码路径: prOutcomeCheck.ts (原 96.92%, 2 lines missing): - statusCheckRollup === undefined 路径 (与空数组分支不同, GitHub 在无 checks 配置的 PR 上直接省略字段) - COMPLETED 状态但 conclusion 为 null/空 的 in-flight 检查归为 pending launchAutofixPr.ts (原 58.33%, 15 lines missing): - registerCompletionChecker arrow body: metadata 缺失早返回 / 节流窗口内 返回 null / completed=false 返回 null / completed=true 返回 summary / initialHeadSha 透传到 checkPrAutofixOutcome - registerCompletionHook 的 if(meta) 短路两侧: 有 metadata 时清空节流条目, 无 metadata 时仍释放 active monitor lock 所有新测试沿用现有 mock.module 与 registerXxxMock.mock.calls 拉取注册 回调的模式, 无新增依赖。prOutcomeCheck 11/11 本地通过。 --- .../__tests__/launchAutofixPr.test.ts | 141 ++++++++++++++++++ .../__tests__/prOutcomeCheck.test.ts | 35 +++++ 2 files changed, 176 insertions(+) diff --git a/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts b/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts index 73b183b78..83fb13792 100644 --- a/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts +++ b/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts @@ -529,6 +529,147 @@ describe('callAutofixPr · Phase 2 completionChecker integration', () => { }) }) +// Phase 2 (cont.): exercise the registered completionChecker arrow body +// directly. The earlier suite verifies it was registered but never invokes +// the arrow itself, leaving the throttle / metadata-guard / gh-CLI dispatch +// branches uncovered. +describe('callAutofixPr · Phase 2 completionChecker arrow body', () => { + // Pull the most recent registered checker — beforeAll registers once at + // module load; nothing else re-registers across this file's tests. + function getChecker(): ( + metadata?: unknown, + ) => Promise { + const calls = registerCompletionCheckerMock.mock.calls.filter( + c => c[0] === 'autofix-pr', + ) + const fn = calls[calls.length - 1]?.[1] + if (typeof fn !== 'function') { + throw new Error('completionChecker not registered') + } + return fn + } + + test('returns null when metadata is undefined (early guard)', async () => { + const checker = getChecker() + expect(await checker(undefined)).toBeNull() + }) + + test('returns null when checkPrAutofixOutcome reports not completed', async () => { + const { checkPrAutofixOutcome } = await import('../prFetch.js') + ;(checkPrAutofixOutcome as ReturnType).mockImplementationOnce( + () => Promise.resolve({ completed: false }), + ) + const checker = getChecker() + // Distinct PR number to dodge the in-process throttle map carried over + // from earlier tests. + const result = await checker({ + owner: 'acme', + repo: 'myrepo', + prNumber: 1001, + }) + expect(result).toBeNull() + }) + + test('returns the summary string when checkPrAutofixOutcome reports completed', async () => { + const { checkPrAutofixOutcome } = await import('../prFetch.js') + ;(checkPrAutofixOutcome as ReturnType).mockImplementationOnce( + () => + Promise.resolve({ + completed: true, + summary: 'acme/myrepo#1002 merged. Autofix monitoring complete.', + }), + ) + const checker = getChecker() + const result = await checker({ + owner: 'acme', + repo: 'myrepo', + prNumber: 1002, + }) + expect(result).toBe( + 'acme/myrepo#1002 merged. Autofix monitoring complete.', + ) + }) + + test('passes initialHeadSha through to checkPrAutofixOutcome', async () => { + const { checkPrAutofixOutcome } = await import('../prFetch.js') + const checkMock = checkPrAutofixOutcome as ReturnType + checkMock.mockClear() + checkMock.mockImplementationOnce(() => Promise.resolve({ completed: false })) + const checker = getChecker() + await checker({ + owner: 'acme', + repo: 'myrepo', + prNumber: 1003, + initialHeadSha: 'sha-baseline-xyz', + }) + expect(checkMock).toHaveBeenCalledWith({ + owner: 'acme', + repo: 'myrepo', + prNumber: 1003, + initialHeadSha: 'sha-baseline-xyz', + }) + }) + + test('throttles back-to-back calls for the same PR within CHECK_INTERVAL_MS', async () => { + const { checkPrAutofixOutcome } = await import('../prFetch.js') + const checkMock = checkPrAutofixOutcome as ReturnType + checkMock.mockClear() + checkMock.mockImplementation(() => Promise.resolve({ completed: false })) + const checker = getChecker() + const meta = { owner: 'acme', repo: 'myrepo', prNumber: 1004 } + await checker(meta) + // Second call within the 5s throttle window must short-circuit to null + // without invoking the gh CLI layer again. + const callCountAfterFirst = checkMock.mock.calls.length + const result = await checker(meta) + expect(result).toBeNull() + expect(checkMock.mock.calls.length).toBe(callCountAfterFirst) + }) + + test('completionHook with metadata clears the throttle entry (re-launch can re-check immediately)', async () => { + const { checkPrAutofixOutcome } = await import('../prFetch.js') + const checkMock = checkPrAutofixOutcome as ReturnType + checkMock.mockClear() + checkMock.mockImplementation(() => Promise.resolve({ completed: false })) + const checker = getChecker() + const meta = { owner: 'acme', repo: 'myrepo', prNumber: 1005 } + await checker(meta) // populate throttle map + + // Invoke the registered completion hook with the same metadata so the + // throttle entry is wiped, then verify the next checker call dispatches + // gh CLI again instead of short-circuiting. + const hookCalls = registerCompletionHookMock.mock.calls.filter( + c => c[0] === 'autofix-pr', + ) + const hook = hookCalls[hookCalls.length - 1]?.[1] as ( + id: string, + metadata?: unknown, + ) => void + hook('any-task-id', meta) + + const callCountBefore = checkMock.mock.calls.length + await checker(meta) + expect(checkMock.mock.calls.length).toBe(callCountBefore + 1) + }) + + test('completionHook without metadata still clears the active monitor lock', async () => { + // Lock is set via callAutofixPr; hook then invoked with undefined metadata + // to exercise the `if (meta)` short-circuit branch (the lock-clear half + // still has to run regardless of metadata presence). + await callAutofixPr(onDone, makeContext(), '42') + expect(getActiveMonitor()).not.toBeNull() + const hookCalls = registerCompletionHookMock.mock.calls.filter( + c => c[0] === 'autofix-pr', + ) + const hook = hookCalls[hookCalls.length - 1]?.[1] as ( + id: string, + metadata?: unknown, + ) => void + hook('framework-task-id', undefined) + expect(getActiveMonitor()).toBeNull() + }) +}) + // Phase 3: content extractor wiring + initialMessage tag instruction describe('callAutofixPr · Phase 3 content extractor integration', () => { test('registerContentExtractor is called at module load with autofix-pr type', () => { diff --git a/src/commands/autofix-pr/__tests__/prOutcomeCheck.test.ts b/src/commands/autofix-pr/__tests__/prOutcomeCheck.test.ts index 940c33423..32be7bade 100644 --- a/src/commands/autofix-pr/__tests__/prOutcomeCheck.test.ts +++ b/src/commands/autofix-pr/__tests__/prOutcomeCheck.test.ts @@ -133,6 +133,41 @@ describe('summariseAutofixOutcome · OPEN PR with push, CI variations', () => { } }) + test('statusCheckRollup undefined → treated as no checks configured (success)', () => { + // Distinct from empty-array: GitHub omits the field entirely on PRs + // without any configured checks. The !rollup branch covers undefined. + const result = summariseAutofixOutcome( + basePayload({ + state: 'OPEN', + headRefOid: 'sha-new', + statusCheckRollup: undefined, + }), + identity(), + ) + expect(result.completed).toBe(true) + if (result.completed) { + expect(result.summary).toContain('CI green') + } + }) + + test('check with COMPLETED status but empty conclusion → counted as pending', () => { + // Edge case: GitHub sometimes reports a check as COMPLETED with a null/ + // missing conclusion (in-flight result mid-write). The defensive branch + // treats empty conclusion after a passed status check as pending. + const result = summariseAutofixOutcome( + basePayload({ + state: 'OPEN', + headRefOid: 'sha-new', + statusCheckRollup: [ + { status: 'COMPLETED', conclusion: null, name: 'ci-in-flight' }, + { status: 'COMPLETED', conclusion: 'SUCCESS', name: 'lint' }, + ], + }), + identity(), + ) + expect(result).toEqual({ completed: false }) + }) + test('neutral / skipped conclusions count as success (not failure)', () => { const result = summariseAutofixOutcome( basePayload({ From 0e001a77d391b66ef814a757e019182f81d7f241 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 18 May 2026 15:26:33 +0000 Subject: [PATCH 6/6] =?UTF-8?q?style:=20biome=20check=20--fix=20=E6=95=B4?= =?UTF-8?q?=E5=BD=A2=20launchAutofixPr.test=20=E6=96=B0=E5=A2=9E=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../autofix-pr/__tests__/launchAutofixPr.test.ts | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts b/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts index 83fb13792..c34a2c3a0 100644 --- a/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts +++ b/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts @@ -536,9 +536,7 @@ describe('callAutofixPr · Phase 2 completionChecker integration', () => { describe('callAutofixPr · Phase 2 completionChecker arrow body', () => { // Pull the most recent registered checker — beforeAll registers once at // module load; nothing else re-registers across this file's tests. - function getChecker(): ( - metadata?: unknown, - ) => Promise { + function getChecker(): (metadata?: unknown) => Promise { const calls = registerCompletionCheckerMock.mock.calls.filter( c => c[0] === 'autofix-pr', ) @@ -585,16 +583,16 @@ describe('callAutofixPr · Phase 2 completionChecker arrow body', () => { repo: 'myrepo', prNumber: 1002, }) - expect(result).toBe( - 'acme/myrepo#1002 merged. Autofix monitoring complete.', - ) + expect(result).toBe('acme/myrepo#1002 merged. Autofix monitoring complete.') }) test('passes initialHeadSha through to checkPrAutofixOutcome', async () => { const { checkPrAutofixOutcome } = await import('../prFetch.js') const checkMock = checkPrAutofixOutcome as ReturnType checkMock.mockClear() - checkMock.mockImplementationOnce(() => Promise.resolve({ completed: false })) + checkMock.mockImplementationOnce(() => + Promise.resolve({ completed: false }), + ) const checker = getChecker() await checker({ owner: 'acme',