diff --git a/src/commands/autofix-pr/__tests__/extractAutofixResult.test.ts b/src/commands/autofix-pr/__tests__/extractAutofixResult.test.ts
new file mode 100644
index 000000000..8be9472aa
--- /dev/null
+++ b/src/commands/autofix-pr/__tests__/extractAutofixResult.test.ts
@@ -0,0 +1,133 @@
+import { describe, expect, test } from 'bun:test'
+import type { SDKMessage } from '../../../entrypoints/agentSdkTypes.js'
+import {
+ AUTOFIX_RESULT_TAG,
+ extractAutofixResultFromLog,
+} from '../extractAutofixResult.js'
+
+function hookProgressMessage(stdout: string): SDKMessage {
+ return {
+ type: 'system',
+ subtype: 'hook_progress',
+ stdout,
+ } as unknown as SDKMessage
+}
+
+function assistantTextMessage(text: string): SDKMessage {
+ return {
+ type: 'assistant',
+ message: {
+ content: [{ type: 'text', text }],
+ },
+ } as unknown as SDKMessage
+}
+
+const sampleTag = (summary: string): string =>
+ `<${AUTOFIX_RESULT_TAG}>
+ 42
+
+ ${summary}
+
+ green
+ ${summary}
+${AUTOFIX_RESULT_TAG}>`
+
+describe('extractAutofixResultFromLog', () => {
+ test('returns null on empty log', () => {
+ expect(extractAutofixResultFromLog([])).toBeNull()
+ })
+
+ test('returns null when no tag present', () => {
+ const log = [
+ assistantTextMessage('just some normal text without the tag'),
+ hookProgressMessage('hook output without tag'),
+ ]
+ expect(extractAutofixResultFromLog(log)).toBeNull()
+ })
+
+ test('extracts from hook stdout', () => {
+ const tag = sampleTag('fixed lint error')
+ const log = [hookProgressMessage(`prefix\n${tag}\nsuffix`)]
+ const result = extractAutofixResultFromLog(log)
+ expect(result).toBe(tag)
+ })
+
+ test('extracts from assistant text', () => {
+ const tag = sampleTag('typecheck fixed')
+ const log = [assistantTextMessage(`Done!\n${tag}`)]
+ expect(extractAutofixResultFromLog(log)).toBe(tag)
+ })
+
+ test('extracts from hook_response subtype too', () => {
+ const tag = sampleTag('via hook_response')
+ const log = [
+ {
+ type: 'system',
+ subtype: 'hook_response',
+ stdout: tag,
+ } as unknown as SDKMessage,
+ ]
+ expect(extractAutofixResultFromLog(log)).toBe(tag)
+ })
+
+ test('returns the latest tag when multiple appear in different messages', () => {
+ const older = sampleTag('older attempt')
+ const newer = sampleTag('newer attempt')
+ const log = [
+ assistantTextMessage(`first try\n${older}`),
+ assistantTextMessage(`retry\n${newer}`),
+ ]
+ expect(extractAutofixResultFromLog(log)).toBe(newer)
+ })
+
+ test('returns null when open tag exists but close tag is missing (truncated)', () => {
+ const log = [
+ assistantTextMessage(
+ `<${AUTOFIX_RESULT_TAG}>\ngot cut off mid-write...`,
+ ),
+ ]
+ expect(extractAutofixResultFromLog(log)).toBeNull()
+ })
+
+ test('returns earlier complete tag when latest open tag is truncated within the same block', () => {
+ // Retry scenario: a full result was emitted, then a second result tag
+ // started but got cut off. We should surface the earlier complete pair
+ // rather than dropping the whole block.
+ const complete = sampleTag('earlier complete result')
+ const truncated = `<${AUTOFIX_RESULT_TAG}>\ntruncated retry...`
+ const log = [assistantTextMessage(`${complete}\n${truncated}`)]
+ expect(extractAutofixResultFromLog(log)).toBe(complete)
+ })
+
+ test('walks backwards so hook stdout from later in log wins over earlier assistant text', () => {
+ const earlier = sampleTag('via assistant first')
+ const later = sampleTag('via hook later')
+ const log = [
+ assistantTextMessage(`some output\n${earlier}`),
+ hookProgressMessage(later),
+ ]
+ expect(extractAutofixResultFromLog(log)).toBe(later)
+ })
+
+ test('ignores tag-shaped strings that span across messages (no concatenation)', () => {
+ // Open tag in one message, close tag in another — should NOT be stitched.
+ const log = [
+ assistantTextMessage(`<${AUTOFIX_RESULT_TAG}>\npart 1`),
+ assistantTextMessage(`part 2\n${AUTOFIX_RESULT_TAG}>`),
+ ]
+ expect(extractAutofixResultFromLog(log)).toBeNull()
+ })
+
+ test('extracts when assistant content is a string (not block array)', () => {
+ // Some SDK paths emit assistant content as a raw string instead of
+ // a content-block array. Current implementation skips those — verify
+ // graceful no-op rather than crash.
+ const log = [
+ {
+ type: 'assistant',
+ message: { content: sampleTag('string content') },
+ } as unknown as SDKMessage,
+ ]
+ expect(extractAutofixResultFromLog(log)).toBeNull()
+ })
+})
diff --git a/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts b/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts
index c6df04ff9..c34a2c3a0 100644
--- a/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts
+++ b/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts
@@ -46,7 +46,7 @@ mock.module('src/utils/teleport.js', () => ({
}))
const registerMock = mock(() => ({
- taskId: 'task-abc',
+ taskId: 'framework-task-id',
sessionId: 'session-123',
cleanup: () => {},
}))
@@ -56,14 +56,41 @@ const checkEligibilityMock = mock(() =>
const getSessionUrlMock = mock(
(id: string) => `https://claude.ai/session/${id}`,
)
+const registerCompletionHookMock = mock<
+ (taskType: string, hook: (taskId: string, metadata?: unknown) => void) => void
+>(() => {})
+const registerCompletionCheckerMock = mock<
+ (
+ taskType: string,
+ checker: (metadata?: unknown) => Promise,
+ ) => void
+>(() => {})
+const registerContentExtractorMock = mock<
+ (taskType: string, extractor: (log: unknown[]) => string | null) => void
+>(() => {})
mock.module('src/tasks/RemoteAgentTask/RemoteAgentTask.js', () => ({
checkRemoteAgentEligibility: checkEligibilityMock,
registerRemoteAgentTask: registerMock,
+ registerCompletionHook: registerCompletionHookMock,
+ registerCompletionChecker: registerCompletionCheckerMock,
+ registerContentExtractor: registerContentExtractorMock,
getRemoteTaskSessionUrl: getSessionUrlMock,
formatPreconditionError: (e: { type: string }) => e.type,
}))
+const fetchPrHeadShaMock = mock<
+ (owner: string, repo: string, prNumber: number) => Promise
+>(() => Promise.resolve('sha-baseline-abc123'))
+
+// Mock prFetch.ts (gh CLI spawn layer) — keeping the pure decision matrix
+// in prOutcomeCheck.ts unmocked so its tests are unaffected by this file's
+// process-global mock.module pollution.
+mock.module('src/commands/autofix-pr/prFetch.js', () => ({
+ fetchPrHeadSha: fetchPrHeadShaMock,
+ checkPrAutofixOutcome: mock(() => Promise.resolve({ completed: false })),
+}))
+
const detectRepoMock = mock(() =>
Promise.resolve({ host: 'github.com', owner: 'acme', name: 'myrepo' }),
)
@@ -375,6 +402,326 @@ describe('callAutofixPr', () => {
})
})
+// Regression suite for the taskId-mismatch latent bug + completion hook wiring.
+// Before this fix, createAutofixTeammate generated a teammate UUID, that UUID
+// was used to acquire the singleton monitor lock, and registerRemoteAgentTask
+// generated a *different* framework taskId. When the framework eventually
+// called clearActiveMonitor(frameworkTaskId) on natural completion, the guard
+// failed (active.taskId !== frameworkTaskId) and the lock stayed acquired,
+// blocking any subsequent /autofix-pr invocations in the same process.
+describe('callAutofixPr · completion hook wiring (taskId mismatch regression)', () => {
+ test('updateActiveMonitor swaps lock taskId to framework-assigned id after register', async () => {
+ await callAutofixPr(onDone, makeContext(), '42')
+ const monitor = getActiveMonitor() as { taskId: string } | null
+ expect(monitor).not.toBeNull()
+ // registerMock returns 'framework-task-id'; before the fix this would be
+ // a teammate-generated random UUID instead.
+ expect(monitor?.taskId).toBe('framework-task-id')
+ })
+
+ test('framework hook → clearActiveMonitor releases lock on natural completion', async () => {
+ await callAutofixPr(onDone, makeContext(), '42')
+ expect(getActiveMonitor()).not.toBeNull()
+
+ // Find the hook the module registered at import time. We grab the last
+ // call so re-imports across tests don't break this — only the most recent
+ // registration is what the framework would invoke now.
+ const calls = registerCompletionHookMock.mock.calls
+ expect(calls.length).toBeGreaterThan(0)
+ const lastCall = calls[calls.length - 1]
+ expect(lastCall?.[0]).toBe('autofix-pr')
+ const hook = lastCall?.[1] as (id: string, metadata?: unknown) => void
+ expect(typeof hook).toBe('function')
+
+ // Simulate the framework invoking the hook with the framework taskId
+ // after a terminal transition. Before the fix this would no-op against
+ // a lock keyed by the teammate UUID.
+ hook('framework-task-id', { owner: 'acme', repo: 'myrepo', prNumber: 42 })
+ expect(getActiveMonitor()).toBeNull()
+ })
+
+ test('subsequent /autofix-pr succeeds after framework hook clears the lock', async () => {
+ await callAutofixPr(onDone, makeContext(), '42')
+ // Simulate natural completion via the registered hook
+ const calls = registerCompletionHookMock.mock.calls
+ const hook = calls[calls.length - 1]?.[1] as (
+ id: string,
+ metadata?: unknown,
+ ) => void
+ hook('framework-task-id', { owner: 'acme', repo: 'myrepo', prNumber: 42 })
+
+ onDone.mockClear()
+ await callAutofixPr(onDone, makeContext(), '99')
+ const firstArg = onDone.mock.calls[0]?.[0] as string
+ // Should be the success path, not "already monitoring"
+ expect(firstArg).not.toMatch(/already monitoring/i)
+ expect(firstArg).toMatch(/Autofix launched/)
+ })
+})
+
+// Phase 2: completionChecker wiring + initialHeadSha capture
+describe('callAutofixPr · Phase 2 completionChecker integration', () => {
+ test('completionChecker is registered at module load with autofix-pr type', () => {
+ // The registration happens during the beforeAll dynamic import; just
+ // verify the mock recorded a call. Filter by task type so any future
+ // additional registrations elsewhere don't break this assertion.
+ const calls = registerCompletionCheckerMock.mock.calls.filter(
+ c => c[0] === 'autofix-pr',
+ )
+ expect(calls.length).toBeGreaterThan(0)
+ const hook = calls[calls.length - 1]?.[1]
+ expect(typeof hook).toBe('function')
+ })
+
+ test('callAutofixPr captures initialHeadSha via fetchPrHeadSha', async () => {
+ fetchPrHeadShaMock.mockClear()
+ await callAutofixPr(onDone, makeContext(), '42')
+ expect(fetchPrHeadShaMock).toHaveBeenCalledWith('acme', 'myrepo', 42)
+ })
+
+ test('initialHeadSha is passed into remoteTaskMetadata on register', async () => {
+ fetchPrHeadShaMock.mockImplementationOnce(() =>
+ Promise.resolve('sha-from-launch'),
+ )
+ await callAutofixPr(onDone, makeContext(), '42')
+ expect(registerMock).toHaveBeenCalledWith(
+ expect.objectContaining({
+ remoteTaskMetadata: expect.objectContaining({
+ owner: 'acme',
+ repo: 'myrepo',
+ prNumber: 42,
+ initialHeadSha: 'sha-from-launch',
+ }),
+ }),
+ )
+ })
+
+ test('fetchPrHeadSha failure → metadata initialHeadSha undefined, launch still succeeds', async () => {
+ fetchPrHeadShaMock.mockImplementationOnce(() =>
+ Promise.reject(new Error('gh not installed')),
+ )
+ await callAutofixPr(onDone, makeContext(), '42')
+ expect(registerMock).toHaveBeenCalledWith(
+ expect.objectContaining({
+ remoteTaskMetadata: expect.objectContaining({
+ owner: 'acme',
+ repo: 'myrepo',
+ prNumber: 42,
+ initialHeadSha: undefined,
+ }),
+ }),
+ )
+ // Launch must NOT fail just because SHA capture failed
+ const firstArg = onDone.mock.calls[0]?.[0] as string
+ expect(firstArg).toMatch(/Autofix launched/)
+ })
+
+ test('fetchPrHeadSha returning null → metadata initialHeadSha undefined', async () => {
+ fetchPrHeadShaMock.mockImplementationOnce(() => Promise.resolve(null))
+ await callAutofixPr(onDone, makeContext(), '42')
+ expect(registerMock).toHaveBeenCalledWith(
+ expect.objectContaining({
+ remoteTaskMetadata: expect.objectContaining({
+ initialHeadSha: undefined,
+ }),
+ }),
+ )
+ })
+})
+
+// Phase 2 (cont.): exercise the registered completionChecker arrow body
+// directly. The earlier suite verifies it was registered but never invokes
+// the arrow itself, leaving the throttle / metadata-guard / gh-CLI dispatch
+// branches uncovered.
+describe('callAutofixPr · Phase 2 completionChecker arrow body', () => {
+ // Pull the most recent registered checker — beforeAll registers once at
+ // module load; nothing else re-registers across this file's tests.
+ function getChecker(): (metadata?: unknown) => Promise {
+ const calls = registerCompletionCheckerMock.mock.calls.filter(
+ c => c[0] === 'autofix-pr',
+ )
+ const fn = calls[calls.length - 1]?.[1]
+ if (typeof fn !== 'function') {
+ throw new Error('completionChecker not registered')
+ }
+ return fn
+ }
+
+ test('returns null when metadata is undefined (early guard)', async () => {
+ const checker = getChecker()
+ expect(await checker(undefined)).toBeNull()
+ })
+
+ test('returns null when checkPrAutofixOutcome reports not completed', async () => {
+ const { checkPrAutofixOutcome } = await import('../prFetch.js')
+ ;(checkPrAutofixOutcome as ReturnType).mockImplementationOnce(
+ () => Promise.resolve({ completed: false }),
+ )
+ const checker = getChecker()
+ // Distinct PR number to dodge the in-process throttle map carried over
+ // from earlier tests.
+ const result = await checker({
+ owner: 'acme',
+ repo: 'myrepo',
+ prNumber: 1001,
+ })
+ expect(result).toBeNull()
+ })
+
+ test('returns the summary string when checkPrAutofixOutcome reports completed', async () => {
+ const { checkPrAutofixOutcome } = await import('../prFetch.js')
+ ;(checkPrAutofixOutcome as ReturnType).mockImplementationOnce(
+ () =>
+ Promise.resolve({
+ completed: true,
+ summary: 'acme/myrepo#1002 merged. Autofix monitoring complete.',
+ }),
+ )
+ const checker = getChecker()
+ const result = await checker({
+ owner: 'acme',
+ repo: 'myrepo',
+ prNumber: 1002,
+ })
+ expect(result).toBe('acme/myrepo#1002 merged. Autofix monitoring complete.')
+ })
+
+ test('passes initialHeadSha through to checkPrAutofixOutcome', async () => {
+ const { checkPrAutofixOutcome } = await import('../prFetch.js')
+ const checkMock = checkPrAutofixOutcome as ReturnType
+ checkMock.mockClear()
+ checkMock.mockImplementationOnce(() =>
+ Promise.resolve({ completed: false }),
+ )
+ const checker = getChecker()
+ await checker({
+ owner: 'acme',
+ repo: 'myrepo',
+ prNumber: 1003,
+ initialHeadSha: 'sha-baseline-xyz',
+ })
+ expect(checkMock).toHaveBeenCalledWith({
+ owner: 'acme',
+ repo: 'myrepo',
+ prNumber: 1003,
+ initialHeadSha: 'sha-baseline-xyz',
+ })
+ })
+
+ test('throttles back-to-back calls for the same PR within CHECK_INTERVAL_MS', async () => {
+ const { checkPrAutofixOutcome } = await import('../prFetch.js')
+ const checkMock = checkPrAutofixOutcome as ReturnType
+ checkMock.mockClear()
+ checkMock.mockImplementation(() => Promise.resolve({ completed: false }))
+ const checker = getChecker()
+ const meta = { owner: 'acme', repo: 'myrepo', prNumber: 1004 }
+ await checker(meta)
+ // Second call within the 5s throttle window must short-circuit to null
+ // without invoking the gh CLI layer again.
+ const callCountAfterFirst = checkMock.mock.calls.length
+ const result = await checker(meta)
+ expect(result).toBeNull()
+ expect(checkMock.mock.calls.length).toBe(callCountAfterFirst)
+ })
+
+ test('completionHook with metadata clears the throttle entry (re-launch can re-check immediately)', async () => {
+ const { checkPrAutofixOutcome } = await import('../prFetch.js')
+ const checkMock = checkPrAutofixOutcome as ReturnType
+ checkMock.mockClear()
+ checkMock.mockImplementation(() => Promise.resolve({ completed: false }))
+ const checker = getChecker()
+ const meta = { owner: 'acme', repo: 'myrepo', prNumber: 1005 }
+ await checker(meta) // populate throttle map
+
+ // Invoke the registered completion hook with the same metadata so the
+ // throttle entry is wiped, then verify the next checker call dispatches
+ // gh CLI again instead of short-circuiting.
+ const hookCalls = registerCompletionHookMock.mock.calls.filter(
+ c => c[0] === 'autofix-pr',
+ )
+ const hook = hookCalls[hookCalls.length - 1]?.[1] as (
+ id: string,
+ metadata?: unknown,
+ ) => void
+ hook('any-task-id', meta)
+
+ const callCountBefore = checkMock.mock.calls.length
+ await checker(meta)
+ expect(checkMock.mock.calls.length).toBe(callCountBefore + 1)
+ })
+
+ test('completionHook without metadata still clears the active monitor lock', async () => {
+ // Lock is set via callAutofixPr; hook then invoked with undefined metadata
+ // to exercise the `if (meta)` short-circuit branch (the lock-clear half
+ // still has to run regardless of metadata presence).
+ await callAutofixPr(onDone, makeContext(), '42')
+ expect(getActiveMonitor()).not.toBeNull()
+ const hookCalls = registerCompletionHookMock.mock.calls.filter(
+ c => c[0] === 'autofix-pr',
+ )
+ const hook = hookCalls[hookCalls.length - 1]?.[1] as (
+ id: string,
+ metadata?: unknown,
+ ) => void
+ hook('framework-task-id', undefined)
+ expect(getActiveMonitor()).toBeNull()
+ })
+})
+
+// Phase 3: content extractor wiring + initialMessage tag instruction
+describe('callAutofixPr · Phase 3 content extractor integration', () => {
+ test('registerContentExtractor is called at module load with autofix-pr type', () => {
+ const calls = registerContentExtractorMock.mock.calls.filter(
+ c => c[0] === 'autofix-pr',
+ )
+ expect(calls.length).toBeGreaterThan(0)
+ const extractor = calls[calls.length - 1]?.[1]
+ expect(typeof extractor).toBe('function')
+ })
+
+ test('initialMessage instructs the remote agent to emit an tag', async () => {
+ await callAutofixPr(onDone, makeContext(), '42')
+ // teleportMock's typed signature has no args, so calls[0] is a
+ // zero-length tuple. We know teleportToRemote is invoked with one
+ // options object, so double-cast through unknown to read the args.
+ const calls = teleportMock.mock.calls as unknown as Array<
+ [{ initialMessage?: string }]
+ >
+ const teleportArgs = calls[0]?.[0]
+ expect(teleportArgs?.initialMessage).toContain('')
+ expect(teleportArgs?.initialMessage).toContain('')
+ expect(teleportArgs?.initialMessage).toContain('')
+ expect(teleportArgs?.initialMessage).toContain('')
+ })
+
+ test('registered extractor returns string for valid log and null for empty', () => {
+ const calls = registerContentExtractorMock.mock.calls.filter(
+ c => c[0] === 'autofix-pr',
+ )
+ const extractor = calls[calls.length - 1]?.[1] as
+ | ((log: unknown[]) => string | null)
+ | undefined
+ expect(extractor).toBeDefined()
+ // Empty log → null
+ expect(extractor?.([])).toBeNull()
+ // Log with assistant text containing tag → returns it
+ const logWithTag = [
+ {
+ type: 'assistant',
+ message: {
+ content: [
+ {
+ type: 'text',
+ text: 'done\nx',
+ },
+ ],
+ },
+ },
+ ]
+ expect(extractor?.(logWithTag)).toContain('')
+ })
+})
+
// Cover ../index.ts load() — placed in this test file so all the heavy mocks
// (teleport / detectRepository / RemoteAgentTask / bootstrap-state / analytics /
// skillDetect) are already registered when load() dynamically imports
diff --git a/src/commands/autofix-pr/__tests__/monitorState.test.ts b/src/commands/autofix-pr/__tests__/monitorState.test.ts
index 43ce2f091..f97fcd972 100644
--- a/src/commands/autofix-pr/__tests__/monitorState.test.ts
+++ b/src/commands/autofix-pr/__tests__/monitorState.test.ts
@@ -5,6 +5,7 @@ import {
isMonitoring,
setActiveMonitor,
trySetActiveMonitor,
+ updateActiveMonitor,
} from '../monitorState.js'
function makeState(
@@ -76,4 +77,41 @@ describe('monitorState', () => {
// First state remains
expect(getActiveMonitor()?.prNumber).toBe(1)
})
+
+ test('updateActiveMonitor returns false when no active monitor', () => {
+ expect(updateActiveMonitor({ taskId: 'task-x' })).toBe(false)
+ expect(getActiveMonitor()).toBeNull()
+ })
+
+ test('updateActiveMonitor merges partial fields into the active monitor', () => {
+ setActiveMonitor(makeState({ taskId: 'tentative-uuid' }))
+ expect(updateActiveMonitor({ taskId: 'framework-task-id' })).toBe(true)
+ const after = getActiveMonitor()
+ expect(after?.taskId).toBe('framework-task-id')
+ // Other fields untouched
+ expect(after?.owner).toBe('acme')
+ expect(after?.repo).toBe('myrepo')
+ expect(after?.prNumber).toBe(42)
+ })
+
+ test('updateActiveMonitor with new taskId makes clearActiveMonitor recognise framework taskId', () => {
+ // Reproduce the latent bug scenario: lock acquired with one taskId,
+ // framework assigns a different one. Before the fix, the framework's
+ // clearActiveMonitor(frameworkTaskId) would no-op because guard fails.
+ setActiveMonitor(makeState({ taskId: 'teammate-uuid' }))
+ // Framework cleanup using its own taskId — would fail guard before the fix
+ clearActiveMonitor('framework-uuid')
+ expect(getActiveMonitor()).not.toBeNull()
+ // After updateActiveMonitor swaps the taskId, framework cleanup works
+ updateActiveMonitor({ taskId: 'framework-uuid' })
+ clearActiveMonitor('framework-uuid')
+ expect(getActiveMonitor()).toBeNull()
+ })
+
+ test('updateActiveMonitor does not change abortController identity', () => {
+ const ac = new AbortController()
+ setActiveMonitor(makeState({ abortController: ac, taskId: 'tentative' }))
+ updateActiveMonitor({ taskId: 'updated' })
+ expect(getActiveMonitor()?.abortController).toBe(ac)
+ })
})
diff --git a/src/commands/autofix-pr/__tests__/prOutcomeCheck.test.ts b/src/commands/autofix-pr/__tests__/prOutcomeCheck.test.ts
new file mode 100644
index 000000000..32be7bade
--- /dev/null
+++ b/src/commands/autofix-pr/__tests__/prOutcomeCheck.test.ts
@@ -0,0 +1,193 @@
+import { describe, expect, test } from 'bun:test'
+import {
+ type PrViewPayload,
+ summariseAutofixOutcome,
+} from '../prOutcomeCheck.js'
+
+function basePayload(overrides: Partial = {}): PrViewPayload {
+ return {
+ headRefOid: 'sha-baseline',
+ state: 'OPEN',
+ statusCheckRollup: [],
+ ...overrides,
+ }
+}
+
+const identity = (overrides: Partial<{ initialHeadSha: string }> = {}) => ({
+ owner: 'acme',
+ repo: 'myrepo',
+ prNumber: 42,
+ initialHeadSha: 'sha-baseline',
+ ...overrides,
+})
+
+describe('summariseAutofixOutcome · terminal PR states', () => {
+ test('MERGED → completed regardless of head SHA / CI', () => {
+ const result = summariseAutofixOutcome(
+ basePayload({ state: 'MERGED', headRefOid: 'sha-baseline' }),
+ identity(),
+ )
+ expect(result).toEqual({
+ completed: true,
+ summary: 'acme/myrepo#42 merged. Autofix monitoring complete.',
+ })
+ })
+
+ test('CLOSED → completed regardless of head SHA / CI', () => {
+ const result = summariseAutofixOutcome(
+ basePayload({ state: 'CLOSED' }),
+ identity(),
+ )
+ expect(result).toEqual({
+ completed: true,
+ summary:
+ 'acme/myrepo#42 closed without merge. Autofix monitoring complete.',
+ })
+ })
+})
+
+describe('summariseAutofixOutcome · OPEN PR without push', () => {
+ test('no initialHeadSha baseline → not completed (cannot detect push)', () => {
+ const result = summariseAutofixOutcome(
+ basePayload({ state: 'OPEN' }),
+ identity({ initialHeadSha: undefined as unknown as string }),
+ )
+ expect(result).toEqual({ completed: false })
+ })
+
+ test('headRefOid unchanged → not completed (autofix has not pushed yet)', () => {
+ const result = summariseAutofixOutcome(
+ basePayload({ state: 'OPEN', headRefOid: 'sha-baseline' }),
+ identity(),
+ )
+ expect(result).toEqual({ completed: false })
+ })
+})
+
+describe('summariseAutofixOutcome · OPEN PR with push, CI variations', () => {
+ test('push detected + no checks configured → completed (success)', () => {
+ const result = summariseAutofixOutcome(
+ basePayload({
+ state: 'OPEN',
+ headRefOid: 'sha-new',
+ statusCheckRollup: [],
+ }),
+ identity(),
+ )
+ expect(result).toEqual({
+ completed: true,
+ summary: 'Autofix pushed commits to acme/myrepo#42, CI green.',
+ })
+ })
+
+ test('push detected + CI pending → not completed (wait for CI)', () => {
+ const result = summariseAutofixOutcome(
+ basePayload({
+ state: 'OPEN',
+ headRefOid: 'sha-new',
+ statusCheckRollup: [
+ { status: 'IN_PROGRESS', conclusion: null, name: 'ci' },
+ { status: 'COMPLETED', conclusion: 'SUCCESS', name: 'lint' },
+ ],
+ }),
+ identity(),
+ )
+ expect(result).toEqual({ completed: false })
+ })
+
+ test('push detected + CI all green → completed (success summary)', () => {
+ const result = summariseAutofixOutcome(
+ basePayload({
+ state: 'OPEN',
+ headRefOid: 'sha-new',
+ statusCheckRollup: [
+ { status: 'COMPLETED', conclusion: 'SUCCESS', name: 'ci' },
+ { status: 'COMPLETED', conclusion: 'SUCCESS', name: 'lint' },
+ ],
+ }),
+ identity(),
+ )
+ expect(result.completed).toBe(true)
+ if (result.completed) {
+ expect(result.summary).toContain('CI green')
+ expect(result.summary).toContain('acme/myrepo#42')
+ }
+ })
+
+ test('push detected + CI red → completed (failure summary surfaces the red)', () => {
+ const result = summariseAutofixOutcome(
+ basePayload({
+ state: 'OPEN',
+ headRefOid: 'sha-new',
+ statusCheckRollup: [
+ { status: 'COMPLETED', conclusion: 'FAILURE', name: 'ci' },
+ { status: 'COMPLETED', conclusion: 'SUCCESS', name: 'lint' },
+ ],
+ }),
+ identity(),
+ )
+ expect(result.completed).toBe(true)
+ if (result.completed) {
+ expect(result.summary).toContain('CI is failing')
+ expect(result.summary).toContain('1/2 checks failing')
+ }
+ })
+
+ test('statusCheckRollup undefined → treated as no checks configured (success)', () => {
+ // Distinct from empty-array: GitHub omits the field entirely on PRs
+ // without any configured checks. The !rollup branch covers undefined.
+ const result = summariseAutofixOutcome(
+ basePayload({
+ state: 'OPEN',
+ headRefOid: 'sha-new',
+ statusCheckRollup: undefined,
+ }),
+ identity(),
+ )
+ expect(result.completed).toBe(true)
+ if (result.completed) {
+ expect(result.summary).toContain('CI green')
+ }
+ })
+
+ test('check with COMPLETED status but empty conclusion → counted as pending', () => {
+ // Edge case: GitHub sometimes reports a check as COMPLETED with a null/
+ // missing conclusion (in-flight result mid-write). The defensive branch
+ // treats empty conclusion after a passed status check as pending.
+ const result = summariseAutofixOutcome(
+ basePayload({
+ state: 'OPEN',
+ headRefOid: 'sha-new',
+ statusCheckRollup: [
+ { status: 'COMPLETED', conclusion: null, name: 'ci-in-flight' },
+ { status: 'COMPLETED', conclusion: 'SUCCESS', name: 'lint' },
+ ],
+ }),
+ identity(),
+ )
+ expect(result).toEqual({ completed: false })
+ })
+
+ test('neutral / skipped conclusions count as success (not failure)', () => {
+ const result = summariseAutofixOutcome(
+ basePayload({
+ state: 'OPEN',
+ headRefOid: 'sha-new',
+ statusCheckRollup: [
+ {
+ status: 'COMPLETED',
+ conclusion: 'NEUTRAL',
+ name: 'optional-check',
+ },
+ { status: 'COMPLETED', conclusion: 'SKIPPED', name: 'docs-check' },
+ { status: 'COMPLETED', conclusion: 'SUCCESS', name: 'ci' },
+ ],
+ }),
+ identity(),
+ )
+ expect(result.completed).toBe(true)
+ if (result.completed) {
+ expect(result.summary).toContain('CI green')
+ }
+ })
+})
diff --git a/src/commands/autofix-pr/extractAutofixResult.ts b/src/commands/autofix-pr/extractAutofixResult.ts
new file mode 100644
index 000000000..c1ec80e22
--- /dev/null
+++ b/src/commands/autofix-pr/extractAutofixResult.ts
@@ -0,0 +1,92 @@
+// Extract the tag from a remote autofix-pr session log.
+//
+// The remote agent emits a structured XML block as its final message
+// (initialMessage in launchAutofixPr.ts instructs it to). The tag carries
+// PR-specific outcome data — commits pushed, files changed, CI status,
+// summary — that the framework's generic "task completed" notification
+// can't convey. We surface it to the local model by injecting the tag
+// verbatim into the message queue (analogous to handling).
+//
+// Resilient to two production realities:
+// 1. The tag may appear in either an assistant text block or a hook
+// stdout (some autofix skills wrap the final report in a hook).
+// 2. The tag may not appear at all (older agents, truncated runs) —
+// caller falls back to generic completion notification.
+
+import type {
+ SDKAssistantMessage,
+ SDKMessage,
+} from '../../entrypoints/agentSdkTypes.js'
+
+export const AUTOFIX_RESULT_TAG = 'autofix-result'
+
+const TAG_OPEN = `<${AUTOFIX_RESULT_TAG}>`
+const TAG_CLOSE = `${AUTOFIX_RESULT_TAG}>`
+
+/**
+ * Walk the session log for an tag. Returns the full tag
+ * (including delimiters) so the caller can inject it as-is into the
+ * notification; returns null if no tag is present.
+ *
+ * Search order:
+ * 1. Latest hook_progress / hook_response stdout (autofix skills that
+ * use hooks to format the report write here first).
+ * 2. Latest assistant text block (agents that don't use hooks write the
+ * tag inline in their final message).
+ *
+ * Latest-wins so re-tries within the same session don't surface stale
+ * earlier results.
+ */
+export function extractAutofixResultFromLog(log: SDKMessage[]): string | null {
+ // Walk backwards so we hit the most recent tag first.
+ for (let i = log.length - 1; i >= 0; i--) {
+ const msg = log[i]
+ if (!msg) continue
+
+ // Hook stdout (system messages of subtype hook_progress / hook_response).
+ if (
+ msg.type === 'system' &&
+ (msg.subtype === 'hook_progress' || msg.subtype === 'hook_response')
+ ) {
+ const stdout = (msg as { stdout?: unknown }).stdout
+ if (typeof stdout === 'string') {
+ const extracted = extractBetween(stdout, TAG_OPEN, TAG_CLOSE)
+ if (extracted) return extracted
+ }
+ continue
+ }
+
+ // Assistant text blocks.
+ if (msg.type === 'assistant') {
+ const content = (msg as SDKAssistantMessage).message?.content
+ if (!content || typeof content === 'string') continue
+ for (const block of content as Array<{ type: string; text?: string }>) {
+ if (block.type !== 'text' || typeof block.text !== 'string') continue
+ if (!block.text.includes(TAG_OPEN)) continue
+ const extracted = extractBetween(block.text, TAG_OPEN, TAG_CLOSE)
+ if (extracted) return extracted
+ }
+ }
+ }
+ return null
+}
+
+// Walks open tags from latest to earliest, returning the first complete
+// open/close pair. Guards against a truncated final tag shadowing an
+// earlier complete pair within the same text block (e.g., a retry wrote a
+// full result, then the model started a second tag that got cut off).
+function extractBetween(
+ text: string,
+ open: string,
+ close: string,
+): string | null {
+ let searchFrom = text.length
+ while (searchFrom >= 0) {
+ const start = text.lastIndexOf(open, searchFrom)
+ if (start === -1) return null
+ const end = text.indexOf(close, start + open.length)
+ if (end !== -1) return text.slice(start, end + close.length)
+ searchFrom = start - 1
+ }
+ return null
+}
diff --git a/src/commands/autofix-pr/launchAutofixPr.ts b/src/commands/autofix-pr/launchAutofixPr.ts
index cb4eb87f8..f33c3e94f 100644
--- a/src/commands/autofix-pr/launchAutofixPr.ts
+++ b/src/commands/autofix-pr/launchAutofixPr.ts
@@ -13,7 +13,11 @@ import {
checkRemoteAgentEligibility,
formatPreconditionError,
getRemoteTaskSessionUrl,
+ registerCompletionChecker,
+ registerCompletionHook,
+ registerContentExtractor,
registerRemoteAgentTask,
+ type AutofixPrRemoteTaskMetadata,
type BackgroundRemoteSessionPrecondition,
} from '../../tasks/RemoteAgentTask/RemoteAgentTask.js'
import type { LocalJSXCommandCall } from '../../types/command.js'
@@ -26,10 +30,66 @@ import {
getActiveMonitor,
isMonitoring,
trySetActiveMonitor,
+ updateActiveMonitor,
} from './monitorState.js'
+import { extractAutofixResultFromLog } from './extractAutofixResult.js'
import { parseAutofixArgs } from './parseArgs.js'
+import { checkPrAutofixOutcome, fetchPrHeadSha } from './prFetch.js'
import { detectAutofixSkills, formatSkillsHint } from './skillDetect.js'
+// Throttle map for the completionChecker: gh CLI is called at most once per
+// PR per CHECK_INTERVAL_MS, regardless of the framework's 1s poll cadence.
+// Key is `${owner}/${repo}#${prNumber}`. Cleared when the completion hook
+// fires so a re-launched monitor starts with a fresh budget.
+const lastCheckAt = new Map()
+const CHECK_INTERVAL_MS = 5_000
+
+function throttleKey(meta: AutofixPrRemoteTaskMetadata): string {
+ return `${meta.owner}/${meta.repo}#${meta.prNumber}`
+}
+
+// Register the completionChecker once at module load. The framework calls it
+// on every poll tick for tasks with remoteTaskType==='autofix-pr'; throttle
+// inside so we don't fire gh CLI 60×/min. Returns the summary string on
+// completion (becomes the task-notification body) or null to keep polling.
+registerCompletionChecker('autofix-pr', async metadata => {
+ const meta = metadata as AutofixPrRemoteTaskMetadata | undefined
+ if (!meta) return null
+
+ const key = throttleKey(meta)
+ const now = Date.now()
+ if (now - (lastCheckAt.get(key) ?? 0) < CHECK_INTERVAL_MS) return null
+ lastCheckAt.set(key, now)
+
+ const result = await checkPrAutofixOutcome({
+ owner: meta.owner,
+ repo: meta.repo,
+ prNumber: meta.prNumber,
+ initialHeadSha: meta.initialHeadSha,
+ })
+ return result.completed ? result.summary : null
+})
+
+// Release the singleton monitor lock when the framework transitions the
+// autofix task to a terminal state. Without this, the lock — keyed by the
+// framework-assigned taskId (after callAutofixPr's updateActiveMonitor swap)
+// — would dangle past natural completion, blocking subsequent /autofix-pr
+// invocations until the process restarts. Registered at module load; the
+// framework's runCompletionHook invokes it once per terminal transition.
+// Also clear the per-PR throttle entry so a re-launch starts fresh.
+registerCompletionHook('autofix-pr', (taskId, metadata) => {
+ clearActiveMonitor(taskId)
+ const meta = metadata as AutofixPrRemoteTaskMetadata | undefined
+ if (meta) lastCheckAt.delete(throttleKey(meta))
+})
+
+// Phase 3 content return: extract the tag from the session
+// log so the local model sees the agent's structured outcome (commits
+// pushed, files changed, CI status) inline in the completion task-
+// notification — instead of just a file-path pointer. The framework falls
+// back to the generic notification if extraction returns null.
+registerContentExtractor('autofix-pr', log => extractAutofixResultFromLog(log))
+
function makeErrorText(message: string, code: string): string {
logEvent('tengu_autofix_pr_result', {
result:
@@ -198,7 +258,23 @@ export const callAutofixPr: LocalJSXCommandCall = async (
// 4.5 compose message
const target = `${owner}/${repo}#${prNumber}`
const branchName = `refs/pull/${prNumber}/head`
- const initialMessage = `Auto-fix failing CI checks on PR #${prNumber} in ${owner}/${repo}.${skillsHint}`
+ const initialMessage = `Auto-fix failing CI checks on PR #${prNumber} in ${owner}/${repo}.${skillsHint}
+
+When you finish (or hit a blocker you can't recover from), output the following XML tag as your final message so the local user gets a structured summary:
+
+
+ ${prNumber}
+
+ commit message
+
+
+ N changes
+
+ green | red | pending | unknown
+ One-sentence summary of what was fixed or why it could not be fixed.
+
+
+If no fix was needed, omit and and explain in . If you only attempted partial work, list the commits you did push and explain the remainder in .`
// 4.6 in-process teammate
const teammate = createAutofixTeammate(initialMessage, target)
@@ -274,18 +350,35 @@ export const callAutofixPr: LocalJSXCommandCall = async (
return null
}
+ // 4.8b capture PR head SHA before registering so the completionChecker
+ // can detect when the agent has pushed new commits. Best-effort — if gh
+ // is unavailable or the call fails, leave initialHeadSha undefined and
+ // the checker falls back to terminal-state-only completion (closed /
+ // merged). Don't block on this; teleport succeeded already.
+ const initialHeadSha =
+ (await fetchPrHeadSha(owner, repo, prNumber).catch(() => null)) ??
+ undefined
+
// 4.9 register task. If this throws, release the lock so the user can
// retry — the remote CCR session is already created so we surface a
// dedicated error code.
+ //
+ // After registration succeeds, swap the lock's taskId from the tentative
+ // teammate UUID (used to acquire the lock atomically before teleport) to
+ // the framework-assigned taskId. Without this swap, the framework's own
+ // cleanup path (clearActiveMonitor(frameworkTaskId) on natural completion)
+ // would no-op against a lock keyed by teammate.taskId, leaving the
+ // singleton lock dangling and blocking future /autofix-pr invocations.
try {
- registerRemoteAgentTask({
+ const { taskId: frameworkTaskId } = registerRemoteAgentTask({
remoteTaskType: 'autofix-pr',
session,
command: `/autofix-pr ${prNumber}`,
context,
isLongRunning: true,
- remoteTaskMetadata: { owner, repo, prNumber },
+ remoteTaskMetadata: { owner, repo, prNumber, initialHeadSha },
})
+ updateActiveMonitor({ taskId: frameworkTaskId })
} catch (regErr: unknown) {
clearActiveMonitor(teammate.taskId)
const regMsg = regErr instanceof Error ? regErr.message : String(regErr)
diff --git a/src/commands/autofix-pr/monitorState.ts b/src/commands/autofix-pr/monitorState.ts
index df74292f1..273f0a90c 100644
--- a/src/commands/autofix-pr/monitorState.ts
+++ b/src/commands/autofix-pr/monitorState.ts
@@ -46,6 +46,20 @@ export function clearActiveMonitor(taskId?: string): void {
active = null
}
+/**
+ * Atomically merges partial updates into the active monitor. Returns true if
+ * applied, false if no active monitor. Used when the caller needs to swap the
+ * lock's taskId after the framework assigns a different one than the
+ * tentative one used to acquire the lock — without this the framework's
+ * cleanup (clearActiveMonitor with the framework taskId) would no-op against
+ * a lock keyed by the caller's tentative id.
+ */
+export function updateActiveMonitor(partial: Partial): boolean {
+ if (!active) return false
+ active = { ...active, ...partial }
+ return true
+}
+
export function isMonitoring(
owner: string,
repo: string,
diff --git a/src/commands/autofix-pr/prFetch.ts b/src/commands/autofix-pr/prFetch.ts
new file mode 100644
index 000000000..63245c7ae
--- /dev/null
+++ b/src/commands/autofix-pr/prFetch.ts
@@ -0,0 +1,155 @@
+// gh CLI integration for autofix-pr: fetches PR snapshots and feeds them
+// through the pure decision matrix in prOutcomeCheck.ts. Kept separate so
+// tests of the decision matrix never have to mock node:child_process — and
+// tests of callAutofixPr can mock this module without polluting the pure
+// decision matrix module (Bun mock.module is process-global).
+
+import { spawn } from 'node:child_process'
+import {
+ type AutofixOutcomeProbeResult,
+ type PrViewPayload,
+ summariseAutofixOutcome,
+} from './prOutcomeCheck.js'
+
+export interface AutofixOutcomeProbeInput {
+ owner: string
+ repo: string
+ prNumber: number
+ /**
+ * Head commit SHA captured at /autofix-pr launch. When this differs from
+ * the current head, autofix has pushed at least one commit.
+ */
+ initialHeadSha?: string
+ /**
+ * Timeout for the gh CLI invocation. Caller is the framework's per-tick
+ * poller, so failures must be bounded — a hung gh process would stall
+ * the entire poll loop.
+ */
+ timeoutMs?: number
+}
+
+const DEFAULT_TIMEOUT_MS = 5_000
+
+/**
+ * Fetch the PR's current head SHA, state, and CI rollup, and decide whether
+ * autofix has finished. Returns `{ completed: true, summary }` if so;
+ * otherwise `{ completed: false }`. Never throws.
+ */
+export async function checkPrAutofixOutcome(
+ input: AutofixOutcomeProbeInput,
+): Promise {
+ const { owner, repo, prNumber, initialHeadSha, timeoutMs } = input
+
+ let payload: PrViewPayload
+ try {
+ payload = await runGhPrView(
+ owner,
+ repo,
+ prNumber,
+ timeoutMs ?? DEFAULT_TIMEOUT_MS,
+ )
+ } catch {
+ return { completed: false }
+ }
+
+ return summariseAutofixOutcome(payload, {
+ owner,
+ repo,
+ prNumber,
+ initialHeadSha,
+ })
+}
+
+/**
+ * Resolve the PR's current head commit SHA. Used at /autofix-pr launch to
+ * capture a baseline; later compared against the live SHA to detect pushes.
+ * Returns null on any failure (network, missing gh, permissions) — the
+ * caller treats null as "no baseline" and falls back to terminal-state-only
+ * completion detection.
+ */
+export async function fetchPrHeadSha(
+ owner: string,
+ repo: string,
+ prNumber: number,
+ timeoutMs = DEFAULT_TIMEOUT_MS,
+): Promise {
+ try {
+ const payload = await runGhPrView(owner, repo, prNumber, timeoutMs)
+ return payload.headRefOid || null
+ } catch {
+ return null
+ }
+}
+
+interface SpawnError extends Error {
+ code?: string
+}
+
+/**
+ * Spawn `gh pr view {n} --repo {owner}/{repo} --json ...` and parse the
+ * result. Rejects on non-zero exit, timeout, or JSON parse failure.
+ */
+function runGhPrView(
+ owner: string,
+ repo: string,
+ prNumber: number,
+ timeoutMs: number,
+): Promise {
+ return new Promise((resolve, reject) => {
+ const proc = spawn(
+ 'gh',
+ [
+ 'pr',
+ 'view',
+ String(prNumber),
+ '--repo',
+ `${owner}/${repo}`,
+ '--json',
+ 'headRefOid,state,statusCheckRollup',
+ ],
+ { stdio: ['ignore', 'pipe', 'pipe'] },
+ )
+ const stdoutChunks: Buffer[] = []
+ const stderrChunks: Buffer[] = []
+ let settled = false
+
+ const timer = setTimeout(() => {
+ if (settled) return
+ settled = true
+ proc.kill('SIGKILL')
+ reject(new Error(`gh pr view timed out after ${timeoutMs}ms`))
+ }, timeoutMs)
+
+ proc.stdout.on('data', chunk => stdoutChunks.push(chunk as Buffer))
+ proc.stderr.on('data', chunk => stderrChunks.push(chunk as Buffer))
+
+ proc.on('error', (err: SpawnError) => {
+ if (settled) return
+ settled = true
+ clearTimeout(timer)
+ reject(err)
+ })
+
+ proc.on('close', code => {
+ if (settled) return
+ settled = true
+ clearTimeout(timer)
+ if (code !== 0) {
+ const stderr = Buffer.concat(stderrChunks).toString('utf8').trim()
+ reject(
+ new Error(`gh pr view exited ${code}: ${stderr || ''}`),
+ )
+ return
+ }
+ const stdout = Buffer.concat(stdoutChunks).toString('utf8').trim()
+ try {
+ const parsed = JSON.parse(stdout) as PrViewPayload
+ resolve(parsed)
+ } catch (e) {
+ reject(
+ new Error(`gh pr view JSON parse failed: ${(e as Error).message}`),
+ )
+ }
+ })
+ })
+}
diff --git a/src/commands/autofix-pr/prOutcomeCheck.ts b/src/commands/autofix-pr/prOutcomeCheck.ts
new file mode 100644
index 000000000..4d77ade4f
--- /dev/null
+++ b/src/commands/autofix-pr/prOutcomeCheck.ts
@@ -0,0 +1,123 @@
+// Pure decision matrix for autofix-pr completion detection.
+//
+// Given a snapshot of the PR (state, head SHA, CI rollup) and a baseline
+// head SHA captured at /autofix-pr launch, decide whether autofix has
+// finished. No side effects — extracted from the gh CLI invocation in
+// prFetch.ts so unit tests can exercise every branch without spawning
+// subprocesses.
+
+export type AutofixOutcomeProbeResult =
+ | { completed: true; summary: string }
+ | { completed: false }
+
+export interface PrViewPayload {
+ headRefOid: string
+ state: 'OPEN' | 'CLOSED' | 'MERGED'
+ statusCheckRollup?: Array<{
+ conclusion?: string | null
+ status?: string | null
+ name?: string
+ }>
+}
+
+export interface AutofixOutcomeIdentity {
+ owner: string
+ repo: string
+ prNumber: number
+ /**
+ * Head commit SHA captured at /autofix-pr launch. When this differs from
+ * the current head, autofix has pushed at least one commit. Optional —
+ * absence means we can only finish on terminal PR states (merged/closed).
+ */
+ initialHeadSha?: string
+}
+
+/**
+ * Pure judgement of whether autofix has finished, given a PR snapshot and
+ * the baseline head SHA. Decision matrix:
+ * - MERGED → done (merged)
+ * - CLOSED (not merged) → done (closed without fix)
+ * - OPEN, no baseline → keep polling
+ * - OPEN, head unchanged → keep polling (agent hasn't pushed)
+ * - OPEN, head changed, CI pending → keep polling (wait for CI)
+ * - OPEN, head changed, CI failure → done (surface red so user can retry)
+ * - OPEN, head changed, CI success → done (clean fix)
+ */
+export function summariseAutofixOutcome(
+ payload: PrViewPayload,
+ identity: AutofixOutcomeIdentity,
+): AutofixOutcomeProbeResult {
+ const { owner, repo, prNumber, initialHeadSha } = identity
+
+ if (payload.state === 'MERGED') {
+ return {
+ completed: true,
+ summary: `${owner}/${repo}#${prNumber} merged. Autofix monitoring complete.`,
+ }
+ }
+ if (payload.state === 'CLOSED') {
+ return {
+ completed: true,
+ summary: `${owner}/${repo}#${prNumber} closed without merge. Autofix monitoring complete.`,
+ }
+ }
+
+ if (!initialHeadSha) return { completed: false }
+ if (payload.headRefOid === initialHeadSha) return { completed: false }
+
+ const ciState = summariseCiRollup(payload.statusCheckRollup)
+ if (ciState.state === 'pending') return { completed: false }
+ if (ciState.state === 'failure') {
+ return {
+ completed: true,
+ summary: `Autofix pushed commits to ${owner}/${repo}#${prNumber} but CI is failing (${ciState.detail}).`,
+ }
+ }
+ return {
+ completed: true,
+ summary: `Autofix pushed commits to ${owner}/${repo}#${prNumber}, CI green.`,
+ }
+}
+
+interface CiSummary {
+ state: 'success' | 'pending' | 'failure'
+ detail: string
+}
+
+function summariseCiRollup(
+ rollup: PrViewPayload['statusCheckRollup'],
+): CiSummary {
+ if (!rollup || rollup.length === 0) {
+ // No checks configured on this repo — treat as success so completion
+ // can fire on push alone. PRs without CI are perfectly valid.
+ return { state: 'success', detail: 'no checks configured' }
+ }
+ let pending = 0
+ let failed = 0
+ const total = rollup.length
+ for (const check of rollup) {
+ const status = (check.status ?? '').toUpperCase()
+ const conclusion = (check.conclusion ?? '').toUpperCase()
+ if (status && status !== 'COMPLETED') {
+ pending++
+ continue
+ }
+ if (
+ conclusion === 'SUCCESS' ||
+ conclusion === 'NEUTRAL' ||
+ conclusion === 'SKIPPED'
+ ) {
+ continue
+ }
+ if (conclusion === '') {
+ pending++
+ continue
+ }
+ failed++
+ }
+ if (pending > 0)
+ return { state: 'pending', detail: `${pending}/${total} checks pending` }
+ if (failed > 0)
+ return { state: 'failure', detail: `${failed}/${total} checks failing` }
+ return { state: 'success', detail: `${total}/${total} checks passing` }
+}
diff --git a/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx b/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx
index 641c1c461..1acffe20f 100644
--- a/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx
+++ b/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx
@@ -91,6 +91,14 @@ export type AutofixPrRemoteTaskMetadata = {
owner: string;
repo: string;
prNumber: number;
+ /**
+ * PR head commit SHA captured at /autofix-pr launch. The completionChecker
+ * compares this against the live head to detect when the agent has pushed
+ * new commits. Optional because gh CLI may be unavailable at launch — in
+ * that case the checker falls back to terminal-state-only completion.
+ * Survives --resume via the session sidecar.
+ */
+ initialHeadSha?: string;
};
export type RemoteTaskMetadata = AutofixPrRemoteTaskMetadata;
@@ -114,6 +122,71 @@ export function registerCompletionChecker(remoteTaskType: RemoteTaskType, checke
completionCheckers.set(remoteTaskType, checker);
}
+/**
+ * Called after the task transitions to a terminal state and the notification
+ * has been enqueued. Used by command modules to release singleton locks,
+ * clear cached state, or perform other cleanup the framework cannot see.
+ * Hooks must be synchronous and best-effort — errors are logged but never
+ * propagate.
+ */
+export type RemoteTaskCompletionHook = (taskId: string, remoteTaskMetadata: RemoteTaskMetadata | undefined) => void;
+
+const completionHooks = new Map();
+
+/**
+ * Inspect a completed remote task's accumulated log and return an XML fragment
+ * to inject inline into the completion task-notification. Returning null falls
+ * back to the framework's generic "task completed" notification (file-path
+ * pointer only). Used by command modules whose remote agents emit structured
+ * outcome tags the local model should read directly.
+ */
+export type RemoteTaskContentExtractor = (log: SDKMessage[]) => string | null;
+
+const contentExtractors = new Map();
+
+/**
+ * Register a content extractor for a remote task type. Called once per
+ * completion in the generic completion branches (archived, completionChecker,
+ * result-driven). isRemoteReview tasks have their own bespoke path and skip
+ * extractors entirely. Errors propagate to the framework which logs and falls
+ * back to generic notification.
+ */
+export function registerContentExtractor(remoteTaskType: RemoteTaskType, extractor: RemoteTaskContentExtractor): void {
+ contentExtractors.set(remoteTaskType, extractor);
+}
+
+function tryExtractRichContent(task: RemoteAgentTaskState, log: SDKMessage[]): string | null {
+ const extractor = contentExtractors.get(task.remoteTaskType);
+ if (!extractor) return null;
+ try {
+ return extractor(log);
+ } catch (e) {
+ logError(e);
+ return null;
+ }
+}
+
+/**
+ * Register a completion hook for a remote task type. Invoked once after the
+ * task reaches a terminal state in any of the framework's completion branches
+ * (archived session, completionChecker, stableIdle, result). Use this to
+ * release command-module state (e.g. singleton locks) without forcing the
+ * framework to reverse-import from the command package.
+ */
+export function registerCompletionHook(remoteTaskType: RemoteTaskType, hook: RemoteTaskCompletionHook): void {
+ completionHooks.set(remoteTaskType, hook);
+}
+
+function runCompletionHook(taskId: string, task: RemoteAgentTaskState): void {
+ const hook = completionHooks.get(task.remoteTaskType);
+ if (!hook) return;
+ try {
+ hook(taskId, task.remoteTaskMetadata);
+ } catch (e) {
+ logError(e);
+ }
+}
+
/**
* Persist a remote-agent metadata entry to the session sidecar.
* Fire-and-forget — persistence failures must not block task registration.
@@ -213,6 +286,41 @@ function enqueueRemoteNotification(
enqueuePendingNotification({ value: message, mode: 'task-notification' });
}
+/**
+ * Same as enqueueRemoteNotification but inlines a structured XML fragment
+ * (returned by a registered RemoteTaskContentExtractor) so the local model
+ * reads the remote agent's outcome directly instead of having to follow a
+ * file-path pointer. Mode is still 'task-notification' — the framing XML is
+ * the same, only the body differs.
+ */
+function enqueueRichRemoteNotification(
+ taskId: string,
+ title: string,
+ status: 'completed' | 'failed' | 'killed',
+ richContent: string,
+ setAppState: SetAppState,
+ toolUseId?: string,
+): void {
+ if (!markTaskNotified(taskId, setAppState)) return;
+
+ const statusText = status === 'completed' ? 'completed successfully' : status === 'failed' ? 'failed' : 'was stopped';
+ const toolUseIdLine = toolUseId ? `\n<${TOOL_USE_ID_TAG}>${toolUseId}${TOOL_USE_ID_TAG}>` : '';
+ const outputPath = getTaskOutputPath(taskId);
+
+ const message = `<${TASK_NOTIFICATION_TAG}>
+<${TASK_ID_TAG}>${taskId}${TASK_ID_TAG}>${toolUseIdLine}
+<${TASK_TYPE_TAG}>remote_agent${TASK_TYPE_TAG}>
+<${OUTPUT_FILE_TAG}>${outputPath}${OUTPUT_FILE_TAG}>
+<${STATUS_TAG}>${status}${STATUS_TAG}>
+<${SUMMARY_TAG}>Remote task "${title}" ${statusText}${SUMMARY_TAG}>
+${TASK_NOTIFICATION_TAG}>
+The remote agent produced the following structured outcome. Summarize the key changes for the user:
+
+${richContent}`;
+
+ enqueuePendingNotification({ value: message, mode: 'task-notification' });
+}
+
/**
* Atomically mark a task as notified. Returns true if this call flipped the
* flag (caller should enqueue), false if already notified (caller should skip).
@@ -678,9 +786,22 @@ function startRemoteSessionPolling(taskId: string, context: TaskContext): () =>
updateTaskState(taskId, context.setAppState, t =>
t.status === 'running' ? { ...t, status: 'completed', endTime: Date.now() } : t,
);
- enqueueRemoteNotification(taskId, task.title, 'completed', context.setAppState, task.toolUseId);
+ const richContent = tryExtractRichContent(task, accumulatedLog);
+ if (richContent) {
+ enqueueRichRemoteNotification(
+ taskId,
+ task.title,
+ 'completed',
+ richContent,
+ context.setAppState,
+ task.toolUseId,
+ );
+ } else {
+ enqueueRemoteNotification(taskId, task.title, 'completed', context.setAppState, task.toolUseId);
+ }
void evictTaskOutput(taskId);
void removeRemoteAgentMetadata(taskId);
+ runCompletionHook(taskId, task);
return;
}
@@ -691,9 +812,22 @@ function startRemoteSessionPolling(taskId: string, context: TaskContext): () =>
updateTaskState(taskId, context.setAppState, t =>
t.status === 'running' ? { ...t, status: 'completed', endTime: Date.now() } : t,
);
- enqueueRemoteNotification(taskId, completionResult, 'completed', context.setAppState, task.toolUseId);
+ const richContent = tryExtractRichContent(task, accumulatedLog);
+ if (richContent) {
+ enqueueRichRemoteNotification(
+ taskId,
+ completionResult,
+ 'completed',
+ richContent,
+ context.setAppState,
+ task.toolUseId,
+ );
+ } else {
+ enqueueRemoteNotification(taskId, completionResult, 'completed', context.setAppState, task.toolUseId);
+ }
void evictTaskOutput(taskId);
void removeRemoteAgentMetadata(taskId);
+ runCompletionHook(taskId, task);
return;
}
}
@@ -853,6 +987,7 @@ function startRemoteSessionPolling(taskId: string, context: TaskContext): () =>
enqueueRemoteReviewNotification(taskId, reviewContent, context.setAppState);
void evictTaskOutput(taskId);
void removeRemoteAgentMetadata(taskId);
+ runCompletionHook(taskId, task);
return; // Stop polling
}
@@ -870,12 +1005,28 @@ function startRemoteSessionPolling(taskId: string, context: TaskContext): () =>
enqueueRemoteReviewFailureNotification(taskId, reason, context.setAppState);
void evictTaskOutput(taskId);
void removeRemoteAgentMetadata(taskId);
+ runCompletionHook(taskId, task);
return; // Stop polling
}
- enqueueRemoteNotification(taskId, task.title, finalStatus, context.setAppState, task.toolUseId);
+ // finalStatus is 'completed' | 'failed' on this path — kill is a
+ // separate code path (RemoteAgentTask.kill) and never reaches here.
+ const richContent = tryExtractRichContent(task, accumulatedLog);
+ if (richContent) {
+ enqueueRichRemoteNotification(
+ taskId,
+ task.title,
+ finalStatus,
+ richContent,
+ context.setAppState,
+ task.toolUseId,
+ );
+ } else {
+ enqueueRemoteNotification(taskId, task.title, finalStatus, context.setAppState, task.toolUseId);
+ }
void evictTaskOutput(taskId);
void removeRemoteAgentMetadata(taskId);
+ runCompletionHook(taskId, task);
return; // Stop polling
}
} catch (error) {