diff --git a/src/commands/autofix-pr/__tests__/extractAutofixResult.test.ts b/src/commands/autofix-pr/__tests__/extractAutofixResult.test.ts new file mode 100644 index 000000000..8be9472aa --- /dev/null +++ b/src/commands/autofix-pr/__tests__/extractAutofixResult.test.ts @@ -0,0 +1,133 @@ +import { describe, expect, test } from 'bun:test' +import type { SDKMessage } from '../../../entrypoints/agentSdkTypes.js' +import { + AUTOFIX_RESULT_TAG, + extractAutofixResultFromLog, +} from '../extractAutofixResult.js' + +function hookProgressMessage(stdout: string): SDKMessage { + return { + type: 'system', + subtype: 'hook_progress', + stdout, + } as unknown as SDKMessage +} + +function assistantTextMessage(text: string): SDKMessage { + return { + type: 'assistant', + message: { + content: [{ type: 'text', text }], + }, + } as unknown as SDKMessage +} + +const sampleTag = (summary: string): string => + `<${AUTOFIX_RESULT_TAG}> + 42 + + ${summary} + + green + ${summary} +` + +describe('extractAutofixResultFromLog', () => { + test('returns null on empty log', () => { + expect(extractAutofixResultFromLog([])).toBeNull() + }) + + test('returns null when no tag present', () => { + const log = [ + assistantTextMessage('just some normal text without the tag'), + hookProgressMessage('hook output without tag'), + ] + expect(extractAutofixResultFromLog(log)).toBeNull() + }) + + test('extracts from hook stdout', () => { + const tag = sampleTag('fixed lint error') + const log = [hookProgressMessage(`prefix\n${tag}\nsuffix`)] + const result = extractAutofixResultFromLog(log) + expect(result).toBe(tag) + }) + + test('extracts from assistant text', () => { + const tag = sampleTag('typecheck fixed') + const log = [assistantTextMessage(`Done!\n${tag}`)] + expect(extractAutofixResultFromLog(log)).toBe(tag) + }) + + test('extracts from hook_response subtype too', () => { + const tag = sampleTag('via hook_response') + const log = [ + { + type: 'system', + subtype: 'hook_response', + stdout: tag, + } as unknown as SDKMessage, + ] + expect(extractAutofixResultFromLog(log)).toBe(tag) + }) + + test('returns the latest tag when multiple appear in different messages', () => { + const older = sampleTag('older attempt') + const newer = sampleTag('newer attempt') + const log = [ + assistantTextMessage(`first try\n${older}`), + assistantTextMessage(`retry\n${newer}`), + ] + expect(extractAutofixResultFromLog(log)).toBe(newer) + }) + + test('returns null when open tag exists but close tag is missing (truncated)', () => { + const log = [ + assistantTextMessage( + `<${AUTOFIX_RESULT_TAG}>\ngot cut off mid-write...`, + ), + ] + expect(extractAutofixResultFromLog(log)).toBeNull() + }) + + test('returns earlier complete tag when latest open tag is truncated within the same block', () => { + // Retry scenario: a full result was emitted, then a second result tag + // started but got cut off. We should surface the earlier complete pair + // rather than dropping the whole block. + const complete = sampleTag('earlier complete result') + const truncated = `<${AUTOFIX_RESULT_TAG}>\ntruncated retry...` + const log = [assistantTextMessage(`${complete}\n${truncated}`)] + expect(extractAutofixResultFromLog(log)).toBe(complete) + }) + + test('walks backwards so hook stdout from later in log wins over earlier assistant text', () => { + const earlier = sampleTag('via assistant first') + const later = sampleTag('via hook later') + const log = [ + assistantTextMessage(`some output\n${earlier}`), + hookProgressMessage(later), + ] + expect(extractAutofixResultFromLog(log)).toBe(later) + }) + + test('ignores tag-shaped strings that span across messages (no concatenation)', () => { + // Open tag in one message, close tag in another — should NOT be stitched. + const log = [ + assistantTextMessage(`<${AUTOFIX_RESULT_TAG}>\npart 1`), + assistantTextMessage(`part 2\n`), + ] + expect(extractAutofixResultFromLog(log)).toBeNull() + }) + + test('extracts when assistant content is a string (not block array)', () => { + // Some SDK paths emit assistant content as a raw string instead of + // a content-block array. Current implementation skips those — verify + // graceful no-op rather than crash. + const log = [ + { + type: 'assistant', + message: { content: sampleTag('string content') }, + } as unknown as SDKMessage, + ] + expect(extractAutofixResultFromLog(log)).toBeNull() + }) +}) diff --git a/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts b/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts index c6df04ff9..c34a2c3a0 100644 --- a/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts +++ b/src/commands/autofix-pr/__tests__/launchAutofixPr.test.ts @@ -46,7 +46,7 @@ mock.module('src/utils/teleport.js', () => ({ })) const registerMock = mock(() => ({ - taskId: 'task-abc', + taskId: 'framework-task-id', sessionId: 'session-123', cleanup: () => {}, })) @@ -56,14 +56,41 @@ const checkEligibilityMock = mock(() => const getSessionUrlMock = mock( (id: string) => `https://claude.ai/session/${id}`, ) +const registerCompletionHookMock = mock< + (taskType: string, hook: (taskId: string, metadata?: unknown) => void) => void +>(() => {}) +const registerCompletionCheckerMock = mock< + ( + taskType: string, + checker: (metadata?: unknown) => Promise, + ) => void +>(() => {}) +const registerContentExtractorMock = mock< + (taskType: string, extractor: (log: unknown[]) => string | null) => void +>(() => {}) mock.module('src/tasks/RemoteAgentTask/RemoteAgentTask.js', () => ({ checkRemoteAgentEligibility: checkEligibilityMock, registerRemoteAgentTask: registerMock, + registerCompletionHook: registerCompletionHookMock, + registerCompletionChecker: registerCompletionCheckerMock, + registerContentExtractor: registerContentExtractorMock, getRemoteTaskSessionUrl: getSessionUrlMock, formatPreconditionError: (e: { type: string }) => e.type, })) +const fetchPrHeadShaMock = mock< + (owner: string, repo: string, prNumber: number) => Promise +>(() => Promise.resolve('sha-baseline-abc123')) + +// Mock prFetch.ts (gh CLI spawn layer) — keeping the pure decision matrix +// in prOutcomeCheck.ts unmocked so its tests are unaffected by this file's +// process-global mock.module pollution. +mock.module('src/commands/autofix-pr/prFetch.js', () => ({ + fetchPrHeadSha: fetchPrHeadShaMock, + checkPrAutofixOutcome: mock(() => Promise.resolve({ completed: false })), +})) + const detectRepoMock = mock(() => Promise.resolve({ host: 'github.com', owner: 'acme', name: 'myrepo' }), ) @@ -375,6 +402,326 @@ describe('callAutofixPr', () => { }) }) +// Regression suite for the taskId-mismatch latent bug + completion hook wiring. +// Before this fix, createAutofixTeammate generated a teammate UUID, that UUID +// was used to acquire the singleton monitor lock, and registerRemoteAgentTask +// generated a *different* framework taskId. When the framework eventually +// called clearActiveMonitor(frameworkTaskId) on natural completion, the guard +// failed (active.taskId !== frameworkTaskId) and the lock stayed acquired, +// blocking any subsequent /autofix-pr invocations in the same process. +describe('callAutofixPr · completion hook wiring (taskId mismatch regression)', () => { + test('updateActiveMonitor swaps lock taskId to framework-assigned id after register', async () => { + await callAutofixPr(onDone, makeContext(), '42') + const monitor = getActiveMonitor() as { taskId: string } | null + expect(monitor).not.toBeNull() + // registerMock returns 'framework-task-id'; before the fix this would be + // a teammate-generated random UUID instead. + expect(monitor?.taskId).toBe('framework-task-id') + }) + + test('framework hook → clearActiveMonitor releases lock on natural completion', async () => { + await callAutofixPr(onDone, makeContext(), '42') + expect(getActiveMonitor()).not.toBeNull() + + // Find the hook the module registered at import time. We grab the last + // call so re-imports across tests don't break this — only the most recent + // registration is what the framework would invoke now. + const calls = registerCompletionHookMock.mock.calls + expect(calls.length).toBeGreaterThan(0) + const lastCall = calls[calls.length - 1] + expect(lastCall?.[0]).toBe('autofix-pr') + const hook = lastCall?.[1] as (id: string, metadata?: unknown) => void + expect(typeof hook).toBe('function') + + // Simulate the framework invoking the hook with the framework taskId + // after a terminal transition. Before the fix this would no-op against + // a lock keyed by the teammate UUID. + hook('framework-task-id', { owner: 'acme', repo: 'myrepo', prNumber: 42 }) + expect(getActiveMonitor()).toBeNull() + }) + + test('subsequent /autofix-pr succeeds after framework hook clears the lock', async () => { + await callAutofixPr(onDone, makeContext(), '42') + // Simulate natural completion via the registered hook + const calls = registerCompletionHookMock.mock.calls + const hook = calls[calls.length - 1]?.[1] as ( + id: string, + metadata?: unknown, + ) => void + hook('framework-task-id', { owner: 'acme', repo: 'myrepo', prNumber: 42 }) + + onDone.mockClear() + await callAutofixPr(onDone, makeContext(), '99') + const firstArg = onDone.mock.calls[0]?.[0] as string + // Should be the success path, not "already monitoring" + expect(firstArg).not.toMatch(/already monitoring/i) + expect(firstArg).toMatch(/Autofix launched/) + }) +}) + +// Phase 2: completionChecker wiring + initialHeadSha capture +describe('callAutofixPr · Phase 2 completionChecker integration', () => { + test('completionChecker is registered at module load with autofix-pr type', () => { + // The registration happens during the beforeAll dynamic import; just + // verify the mock recorded a call. Filter by task type so any future + // additional registrations elsewhere don't break this assertion. + const calls = registerCompletionCheckerMock.mock.calls.filter( + c => c[0] === 'autofix-pr', + ) + expect(calls.length).toBeGreaterThan(0) + const hook = calls[calls.length - 1]?.[1] + expect(typeof hook).toBe('function') + }) + + test('callAutofixPr captures initialHeadSha via fetchPrHeadSha', async () => { + fetchPrHeadShaMock.mockClear() + await callAutofixPr(onDone, makeContext(), '42') + expect(fetchPrHeadShaMock).toHaveBeenCalledWith('acme', 'myrepo', 42) + }) + + test('initialHeadSha is passed into remoteTaskMetadata on register', async () => { + fetchPrHeadShaMock.mockImplementationOnce(() => + Promise.resolve('sha-from-launch'), + ) + await callAutofixPr(onDone, makeContext(), '42') + expect(registerMock).toHaveBeenCalledWith( + expect.objectContaining({ + remoteTaskMetadata: expect.objectContaining({ + owner: 'acme', + repo: 'myrepo', + prNumber: 42, + initialHeadSha: 'sha-from-launch', + }), + }), + ) + }) + + test('fetchPrHeadSha failure → metadata initialHeadSha undefined, launch still succeeds', async () => { + fetchPrHeadShaMock.mockImplementationOnce(() => + Promise.reject(new Error('gh not installed')), + ) + await callAutofixPr(onDone, makeContext(), '42') + expect(registerMock).toHaveBeenCalledWith( + expect.objectContaining({ + remoteTaskMetadata: expect.objectContaining({ + owner: 'acme', + repo: 'myrepo', + prNumber: 42, + initialHeadSha: undefined, + }), + }), + ) + // Launch must NOT fail just because SHA capture failed + const firstArg = onDone.mock.calls[0]?.[0] as string + expect(firstArg).toMatch(/Autofix launched/) + }) + + test('fetchPrHeadSha returning null → metadata initialHeadSha undefined', async () => { + fetchPrHeadShaMock.mockImplementationOnce(() => Promise.resolve(null)) + await callAutofixPr(onDone, makeContext(), '42') + expect(registerMock).toHaveBeenCalledWith( + expect.objectContaining({ + remoteTaskMetadata: expect.objectContaining({ + initialHeadSha: undefined, + }), + }), + ) + }) +}) + +// Phase 2 (cont.): exercise the registered completionChecker arrow body +// directly. The earlier suite verifies it was registered but never invokes +// the arrow itself, leaving the throttle / metadata-guard / gh-CLI dispatch +// branches uncovered. +describe('callAutofixPr · Phase 2 completionChecker arrow body', () => { + // Pull the most recent registered checker — beforeAll registers once at + // module load; nothing else re-registers across this file's tests. + function getChecker(): (metadata?: unknown) => Promise { + const calls = registerCompletionCheckerMock.mock.calls.filter( + c => c[0] === 'autofix-pr', + ) + const fn = calls[calls.length - 1]?.[1] + if (typeof fn !== 'function') { + throw new Error('completionChecker not registered') + } + return fn + } + + test('returns null when metadata is undefined (early guard)', async () => { + const checker = getChecker() + expect(await checker(undefined)).toBeNull() + }) + + test('returns null when checkPrAutofixOutcome reports not completed', async () => { + const { checkPrAutofixOutcome } = await import('../prFetch.js') + ;(checkPrAutofixOutcome as ReturnType).mockImplementationOnce( + () => Promise.resolve({ completed: false }), + ) + const checker = getChecker() + // Distinct PR number to dodge the in-process throttle map carried over + // from earlier tests. + const result = await checker({ + owner: 'acme', + repo: 'myrepo', + prNumber: 1001, + }) + expect(result).toBeNull() + }) + + test('returns the summary string when checkPrAutofixOutcome reports completed', async () => { + const { checkPrAutofixOutcome } = await import('../prFetch.js') + ;(checkPrAutofixOutcome as ReturnType).mockImplementationOnce( + () => + Promise.resolve({ + completed: true, + summary: 'acme/myrepo#1002 merged. Autofix monitoring complete.', + }), + ) + const checker = getChecker() + const result = await checker({ + owner: 'acme', + repo: 'myrepo', + prNumber: 1002, + }) + expect(result).toBe('acme/myrepo#1002 merged. Autofix monitoring complete.') + }) + + test('passes initialHeadSha through to checkPrAutofixOutcome', async () => { + const { checkPrAutofixOutcome } = await import('../prFetch.js') + const checkMock = checkPrAutofixOutcome as ReturnType + checkMock.mockClear() + checkMock.mockImplementationOnce(() => + Promise.resolve({ completed: false }), + ) + const checker = getChecker() + await checker({ + owner: 'acme', + repo: 'myrepo', + prNumber: 1003, + initialHeadSha: 'sha-baseline-xyz', + }) + expect(checkMock).toHaveBeenCalledWith({ + owner: 'acme', + repo: 'myrepo', + prNumber: 1003, + initialHeadSha: 'sha-baseline-xyz', + }) + }) + + test('throttles back-to-back calls for the same PR within CHECK_INTERVAL_MS', async () => { + const { checkPrAutofixOutcome } = await import('../prFetch.js') + const checkMock = checkPrAutofixOutcome as ReturnType + checkMock.mockClear() + checkMock.mockImplementation(() => Promise.resolve({ completed: false })) + const checker = getChecker() + const meta = { owner: 'acme', repo: 'myrepo', prNumber: 1004 } + await checker(meta) + // Second call within the 5s throttle window must short-circuit to null + // without invoking the gh CLI layer again. + const callCountAfterFirst = checkMock.mock.calls.length + const result = await checker(meta) + expect(result).toBeNull() + expect(checkMock.mock.calls.length).toBe(callCountAfterFirst) + }) + + test('completionHook with metadata clears the throttle entry (re-launch can re-check immediately)', async () => { + const { checkPrAutofixOutcome } = await import('../prFetch.js') + const checkMock = checkPrAutofixOutcome as ReturnType + checkMock.mockClear() + checkMock.mockImplementation(() => Promise.resolve({ completed: false })) + const checker = getChecker() + const meta = { owner: 'acme', repo: 'myrepo', prNumber: 1005 } + await checker(meta) // populate throttle map + + // Invoke the registered completion hook with the same metadata so the + // throttle entry is wiped, then verify the next checker call dispatches + // gh CLI again instead of short-circuiting. + const hookCalls = registerCompletionHookMock.mock.calls.filter( + c => c[0] === 'autofix-pr', + ) + const hook = hookCalls[hookCalls.length - 1]?.[1] as ( + id: string, + metadata?: unknown, + ) => void + hook('any-task-id', meta) + + const callCountBefore = checkMock.mock.calls.length + await checker(meta) + expect(checkMock.mock.calls.length).toBe(callCountBefore + 1) + }) + + test('completionHook without metadata still clears the active monitor lock', async () => { + // Lock is set via callAutofixPr; hook then invoked with undefined metadata + // to exercise the `if (meta)` short-circuit branch (the lock-clear half + // still has to run regardless of metadata presence). + await callAutofixPr(onDone, makeContext(), '42') + expect(getActiveMonitor()).not.toBeNull() + const hookCalls = registerCompletionHookMock.mock.calls.filter( + c => c[0] === 'autofix-pr', + ) + const hook = hookCalls[hookCalls.length - 1]?.[1] as ( + id: string, + metadata?: unknown, + ) => void + hook('framework-task-id', undefined) + expect(getActiveMonitor()).toBeNull() + }) +}) + +// Phase 3: content extractor wiring + initialMessage tag instruction +describe('callAutofixPr · Phase 3 content extractor integration', () => { + test('registerContentExtractor is called at module load with autofix-pr type', () => { + const calls = registerContentExtractorMock.mock.calls.filter( + c => c[0] === 'autofix-pr', + ) + expect(calls.length).toBeGreaterThan(0) + const extractor = calls[calls.length - 1]?.[1] + expect(typeof extractor).toBe('function') + }) + + test('initialMessage instructs the remote agent to emit an tag', async () => { + await callAutofixPr(onDone, makeContext(), '42') + // teleportMock's typed signature has no args, so calls[0] is a + // zero-length tuple. We know teleportToRemote is invoked with one + // options object, so double-cast through unknown to read the args. + const calls = teleportMock.mock.calls as unknown as Array< + [{ initialMessage?: string }] + > + const teleportArgs = calls[0]?.[0] + expect(teleportArgs?.initialMessage).toContain('') + expect(teleportArgs?.initialMessage).toContain('') + expect(teleportArgs?.initialMessage).toContain('') + expect(teleportArgs?.initialMessage).toContain('') + }) + + test('registered extractor returns string for valid log and null for empty', () => { + const calls = registerContentExtractorMock.mock.calls.filter( + c => c[0] === 'autofix-pr', + ) + const extractor = calls[calls.length - 1]?.[1] as + | ((log: unknown[]) => string | null) + | undefined + expect(extractor).toBeDefined() + // Empty log → null + expect(extractor?.([])).toBeNull() + // Log with assistant text containing tag → returns it + const logWithTag = [ + { + type: 'assistant', + message: { + content: [ + { + type: 'text', + text: 'done\nx', + }, + ], + }, + }, + ] + expect(extractor?.(logWithTag)).toContain('') + }) +}) + // Cover ../index.ts load() — placed in this test file so all the heavy mocks // (teleport / detectRepository / RemoteAgentTask / bootstrap-state / analytics / // skillDetect) are already registered when load() dynamically imports diff --git a/src/commands/autofix-pr/__tests__/monitorState.test.ts b/src/commands/autofix-pr/__tests__/monitorState.test.ts index 43ce2f091..f97fcd972 100644 --- a/src/commands/autofix-pr/__tests__/monitorState.test.ts +++ b/src/commands/autofix-pr/__tests__/monitorState.test.ts @@ -5,6 +5,7 @@ import { isMonitoring, setActiveMonitor, trySetActiveMonitor, + updateActiveMonitor, } from '../monitorState.js' function makeState( @@ -76,4 +77,41 @@ describe('monitorState', () => { // First state remains expect(getActiveMonitor()?.prNumber).toBe(1) }) + + test('updateActiveMonitor returns false when no active monitor', () => { + expect(updateActiveMonitor({ taskId: 'task-x' })).toBe(false) + expect(getActiveMonitor()).toBeNull() + }) + + test('updateActiveMonitor merges partial fields into the active monitor', () => { + setActiveMonitor(makeState({ taskId: 'tentative-uuid' })) + expect(updateActiveMonitor({ taskId: 'framework-task-id' })).toBe(true) + const after = getActiveMonitor() + expect(after?.taskId).toBe('framework-task-id') + // Other fields untouched + expect(after?.owner).toBe('acme') + expect(after?.repo).toBe('myrepo') + expect(after?.prNumber).toBe(42) + }) + + test('updateActiveMonitor with new taskId makes clearActiveMonitor recognise framework taskId', () => { + // Reproduce the latent bug scenario: lock acquired with one taskId, + // framework assigns a different one. Before the fix, the framework's + // clearActiveMonitor(frameworkTaskId) would no-op because guard fails. + setActiveMonitor(makeState({ taskId: 'teammate-uuid' })) + // Framework cleanup using its own taskId — would fail guard before the fix + clearActiveMonitor('framework-uuid') + expect(getActiveMonitor()).not.toBeNull() + // After updateActiveMonitor swaps the taskId, framework cleanup works + updateActiveMonitor({ taskId: 'framework-uuid' }) + clearActiveMonitor('framework-uuid') + expect(getActiveMonitor()).toBeNull() + }) + + test('updateActiveMonitor does not change abortController identity', () => { + const ac = new AbortController() + setActiveMonitor(makeState({ abortController: ac, taskId: 'tentative' })) + updateActiveMonitor({ taskId: 'updated' }) + expect(getActiveMonitor()?.abortController).toBe(ac) + }) }) diff --git a/src/commands/autofix-pr/__tests__/prOutcomeCheck.test.ts b/src/commands/autofix-pr/__tests__/prOutcomeCheck.test.ts new file mode 100644 index 000000000..32be7bade --- /dev/null +++ b/src/commands/autofix-pr/__tests__/prOutcomeCheck.test.ts @@ -0,0 +1,193 @@ +import { describe, expect, test } from 'bun:test' +import { + type PrViewPayload, + summariseAutofixOutcome, +} from '../prOutcomeCheck.js' + +function basePayload(overrides: Partial = {}): PrViewPayload { + return { + headRefOid: 'sha-baseline', + state: 'OPEN', + statusCheckRollup: [], + ...overrides, + } +} + +const identity = (overrides: Partial<{ initialHeadSha: string }> = {}) => ({ + owner: 'acme', + repo: 'myrepo', + prNumber: 42, + initialHeadSha: 'sha-baseline', + ...overrides, +}) + +describe('summariseAutofixOutcome · terminal PR states', () => { + test('MERGED → completed regardless of head SHA / CI', () => { + const result = summariseAutofixOutcome( + basePayload({ state: 'MERGED', headRefOid: 'sha-baseline' }), + identity(), + ) + expect(result).toEqual({ + completed: true, + summary: 'acme/myrepo#42 merged. Autofix monitoring complete.', + }) + }) + + test('CLOSED → completed regardless of head SHA / CI', () => { + const result = summariseAutofixOutcome( + basePayload({ state: 'CLOSED' }), + identity(), + ) + expect(result).toEqual({ + completed: true, + summary: + 'acme/myrepo#42 closed without merge. Autofix monitoring complete.', + }) + }) +}) + +describe('summariseAutofixOutcome · OPEN PR without push', () => { + test('no initialHeadSha baseline → not completed (cannot detect push)', () => { + const result = summariseAutofixOutcome( + basePayload({ state: 'OPEN' }), + identity({ initialHeadSha: undefined as unknown as string }), + ) + expect(result).toEqual({ completed: false }) + }) + + test('headRefOid unchanged → not completed (autofix has not pushed yet)', () => { + const result = summariseAutofixOutcome( + basePayload({ state: 'OPEN', headRefOid: 'sha-baseline' }), + identity(), + ) + expect(result).toEqual({ completed: false }) + }) +}) + +describe('summariseAutofixOutcome · OPEN PR with push, CI variations', () => { + test('push detected + no checks configured → completed (success)', () => { + const result = summariseAutofixOutcome( + basePayload({ + state: 'OPEN', + headRefOid: 'sha-new', + statusCheckRollup: [], + }), + identity(), + ) + expect(result).toEqual({ + completed: true, + summary: 'Autofix pushed commits to acme/myrepo#42, CI green.', + }) + }) + + test('push detected + CI pending → not completed (wait for CI)', () => { + const result = summariseAutofixOutcome( + basePayload({ + state: 'OPEN', + headRefOid: 'sha-new', + statusCheckRollup: [ + { status: 'IN_PROGRESS', conclusion: null, name: 'ci' }, + { status: 'COMPLETED', conclusion: 'SUCCESS', name: 'lint' }, + ], + }), + identity(), + ) + expect(result).toEqual({ completed: false }) + }) + + test('push detected + CI all green → completed (success summary)', () => { + const result = summariseAutofixOutcome( + basePayload({ + state: 'OPEN', + headRefOid: 'sha-new', + statusCheckRollup: [ + { status: 'COMPLETED', conclusion: 'SUCCESS', name: 'ci' }, + { status: 'COMPLETED', conclusion: 'SUCCESS', name: 'lint' }, + ], + }), + identity(), + ) + expect(result.completed).toBe(true) + if (result.completed) { + expect(result.summary).toContain('CI green') + expect(result.summary).toContain('acme/myrepo#42') + } + }) + + test('push detected + CI red → completed (failure summary surfaces the red)', () => { + const result = summariseAutofixOutcome( + basePayload({ + state: 'OPEN', + headRefOid: 'sha-new', + statusCheckRollup: [ + { status: 'COMPLETED', conclusion: 'FAILURE', name: 'ci' }, + { status: 'COMPLETED', conclusion: 'SUCCESS', name: 'lint' }, + ], + }), + identity(), + ) + expect(result.completed).toBe(true) + if (result.completed) { + expect(result.summary).toContain('CI is failing') + expect(result.summary).toContain('1/2 checks failing') + } + }) + + test('statusCheckRollup undefined → treated as no checks configured (success)', () => { + // Distinct from empty-array: GitHub omits the field entirely on PRs + // without any configured checks. The !rollup branch covers undefined. + const result = summariseAutofixOutcome( + basePayload({ + state: 'OPEN', + headRefOid: 'sha-new', + statusCheckRollup: undefined, + }), + identity(), + ) + expect(result.completed).toBe(true) + if (result.completed) { + expect(result.summary).toContain('CI green') + } + }) + + test('check with COMPLETED status but empty conclusion → counted as pending', () => { + // Edge case: GitHub sometimes reports a check as COMPLETED with a null/ + // missing conclusion (in-flight result mid-write). The defensive branch + // treats empty conclusion after a passed status check as pending. + const result = summariseAutofixOutcome( + basePayload({ + state: 'OPEN', + headRefOid: 'sha-new', + statusCheckRollup: [ + { status: 'COMPLETED', conclusion: null, name: 'ci-in-flight' }, + { status: 'COMPLETED', conclusion: 'SUCCESS', name: 'lint' }, + ], + }), + identity(), + ) + expect(result).toEqual({ completed: false }) + }) + + test('neutral / skipped conclusions count as success (not failure)', () => { + const result = summariseAutofixOutcome( + basePayload({ + state: 'OPEN', + headRefOid: 'sha-new', + statusCheckRollup: [ + { + status: 'COMPLETED', + conclusion: 'NEUTRAL', + name: 'optional-check', + }, + { status: 'COMPLETED', conclusion: 'SKIPPED', name: 'docs-check' }, + { status: 'COMPLETED', conclusion: 'SUCCESS', name: 'ci' }, + ], + }), + identity(), + ) + expect(result.completed).toBe(true) + if (result.completed) { + expect(result.summary).toContain('CI green') + } + }) +}) diff --git a/src/commands/autofix-pr/extractAutofixResult.ts b/src/commands/autofix-pr/extractAutofixResult.ts new file mode 100644 index 000000000..c1ec80e22 --- /dev/null +++ b/src/commands/autofix-pr/extractAutofixResult.ts @@ -0,0 +1,92 @@ +// Extract the tag from a remote autofix-pr session log. +// +// The remote agent emits a structured XML block as its final message +// (initialMessage in launchAutofixPr.ts instructs it to). The tag carries +// PR-specific outcome data — commits pushed, files changed, CI status, +// summary — that the framework's generic "task completed" notification +// can't convey. We surface it to the local model by injecting the tag +// verbatim into the message queue (analogous to handling). +// +// Resilient to two production realities: +// 1. The tag may appear in either an assistant text block or a hook +// stdout (some autofix skills wrap the final report in a hook). +// 2. The tag may not appear at all (older agents, truncated runs) — +// caller falls back to generic completion notification. + +import type { + SDKAssistantMessage, + SDKMessage, +} from '../../entrypoints/agentSdkTypes.js' + +export const AUTOFIX_RESULT_TAG = 'autofix-result' + +const TAG_OPEN = `<${AUTOFIX_RESULT_TAG}>` +const TAG_CLOSE = `` + +/** + * Walk the session log for an tag. Returns the full tag + * (including delimiters) so the caller can inject it as-is into the + * notification; returns null if no tag is present. + * + * Search order: + * 1. Latest hook_progress / hook_response stdout (autofix skills that + * use hooks to format the report write here first). + * 2. Latest assistant text block (agents that don't use hooks write the + * tag inline in their final message). + * + * Latest-wins so re-tries within the same session don't surface stale + * earlier results. + */ +export function extractAutofixResultFromLog(log: SDKMessage[]): string | null { + // Walk backwards so we hit the most recent tag first. + for (let i = log.length - 1; i >= 0; i--) { + const msg = log[i] + if (!msg) continue + + // Hook stdout (system messages of subtype hook_progress / hook_response). + if ( + msg.type === 'system' && + (msg.subtype === 'hook_progress' || msg.subtype === 'hook_response') + ) { + const stdout = (msg as { stdout?: unknown }).stdout + if (typeof stdout === 'string') { + const extracted = extractBetween(stdout, TAG_OPEN, TAG_CLOSE) + if (extracted) return extracted + } + continue + } + + // Assistant text blocks. + if (msg.type === 'assistant') { + const content = (msg as SDKAssistantMessage).message?.content + if (!content || typeof content === 'string') continue + for (const block of content as Array<{ type: string; text?: string }>) { + if (block.type !== 'text' || typeof block.text !== 'string') continue + if (!block.text.includes(TAG_OPEN)) continue + const extracted = extractBetween(block.text, TAG_OPEN, TAG_CLOSE) + if (extracted) return extracted + } + } + } + return null +} + +// Walks open tags from latest to earliest, returning the first complete +// open/close pair. Guards against a truncated final tag shadowing an +// earlier complete pair within the same text block (e.g., a retry wrote a +// full result, then the model started a second tag that got cut off). +function extractBetween( + text: string, + open: string, + close: string, +): string | null { + let searchFrom = text.length + while (searchFrom >= 0) { + const start = text.lastIndexOf(open, searchFrom) + if (start === -1) return null + const end = text.indexOf(close, start + open.length) + if (end !== -1) return text.slice(start, end + close.length) + searchFrom = start - 1 + } + return null +} diff --git a/src/commands/autofix-pr/launchAutofixPr.ts b/src/commands/autofix-pr/launchAutofixPr.ts index cb4eb87f8..f33c3e94f 100644 --- a/src/commands/autofix-pr/launchAutofixPr.ts +++ b/src/commands/autofix-pr/launchAutofixPr.ts @@ -13,7 +13,11 @@ import { checkRemoteAgentEligibility, formatPreconditionError, getRemoteTaskSessionUrl, + registerCompletionChecker, + registerCompletionHook, + registerContentExtractor, registerRemoteAgentTask, + type AutofixPrRemoteTaskMetadata, type BackgroundRemoteSessionPrecondition, } from '../../tasks/RemoteAgentTask/RemoteAgentTask.js' import type { LocalJSXCommandCall } from '../../types/command.js' @@ -26,10 +30,66 @@ import { getActiveMonitor, isMonitoring, trySetActiveMonitor, + updateActiveMonitor, } from './monitorState.js' +import { extractAutofixResultFromLog } from './extractAutofixResult.js' import { parseAutofixArgs } from './parseArgs.js' +import { checkPrAutofixOutcome, fetchPrHeadSha } from './prFetch.js' import { detectAutofixSkills, formatSkillsHint } from './skillDetect.js' +// Throttle map for the completionChecker: gh CLI is called at most once per +// PR per CHECK_INTERVAL_MS, regardless of the framework's 1s poll cadence. +// Key is `${owner}/${repo}#${prNumber}`. Cleared when the completion hook +// fires so a re-launched monitor starts with a fresh budget. +const lastCheckAt = new Map() +const CHECK_INTERVAL_MS = 5_000 + +function throttleKey(meta: AutofixPrRemoteTaskMetadata): string { + return `${meta.owner}/${meta.repo}#${meta.prNumber}` +} + +// Register the completionChecker once at module load. The framework calls it +// on every poll tick for tasks with remoteTaskType==='autofix-pr'; throttle +// inside so we don't fire gh CLI 60×/min. Returns the summary string on +// completion (becomes the task-notification body) or null to keep polling. +registerCompletionChecker('autofix-pr', async metadata => { + const meta = metadata as AutofixPrRemoteTaskMetadata | undefined + if (!meta) return null + + const key = throttleKey(meta) + const now = Date.now() + if (now - (lastCheckAt.get(key) ?? 0) < CHECK_INTERVAL_MS) return null + lastCheckAt.set(key, now) + + const result = await checkPrAutofixOutcome({ + owner: meta.owner, + repo: meta.repo, + prNumber: meta.prNumber, + initialHeadSha: meta.initialHeadSha, + }) + return result.completed ? result.summary : null +}) + +// Release the singleton monitor lock when the framework transitions the +// autofix task to a terminal state. Without this, the lock — keyed by the +// framework-assigned taskId (after callAutofixPr's updateActiveMonitor swap) +// — would dangle past natural completion, blocking subsequent /autofix-pr +// invocations until the process restarts. Registered at module load; the +// framework's runCompletionHook invokes it once per terminal transition. +// Also clear the per-PR throttle entry so a re-launch starts fresh. +registerCompletionHook('autofix-pr', (taskId, metadata) => { + clearActiveMonitor(taskId) + const meta = metadata as AutofixPrRemoteTaskMetadata | undefined + if (meta) lastCheckAt.delete(throttleKey(meta)) +}) + +// Phase 3 content return: extract the tag from the session +// log so the local model sees the agent's structured outcome (commits +// pushed, files changed, CI status) inline in the completion task- +// notification — instead of just a file-path pointer. The framework falls +// back to the generic notification if extraction returns null. +registerContentExtractor('autofix-pr', log => extractAutofixResultFromLog(log)) + function makeErrorText(message: string, code: string): string { logEvent('tengu_autofix_pr_result', { result: @@ -198,7 +258,23 @@ export const callAutofixPr: LocalJSXCommandCall = async ( // 4.5 compose message const target = `${owner}/${repo}#${prNumber}` const branchName = `refs/pull/${prNumber}/head` - const initialMessage = `Auto-fix failing CI checks on PR #${prNumber} in ${owner}/${repo}.${skillsHint}` + const initialMessage = `Auto-fix failing CI checks on PR #${prNumber} in ${owner}/${repo}.${skillsHint} + +When you finish (or hit a blocker you can't recover from), output the following XML tag as your final message so the local user gets a structured summary: + + + ${prNumber} + + commit message + + + N changes + + green | red | pending | unknown + One-sentence summary of what was fixed or why it could not be fixed. + + +If no fix was needed, omit and and explain in . If you only attempted partial work, list the commits you did push and explain the remainder in .` // 4.6 in-process teammate const teammate = createAutofixTeammate(initialMessage, target) @@ -274,18 +350,35 @@ export const callAutofixPr: LocalJSXCommandCall = async ( return null } + // 4.8b capture PR head SHA before registering so the completionChecker + // can detect when the agent has pushed new commits. Best-effort — if gh + // is unavailable or the call fails, leave initialHeadSha undefined and + // the checker falls back to terminal-state-only completion (closed / + // merged). Don't block on this; teleport succeeded already. + const initialHeadSha = + (await fetchPrHeadSha(owner, repo, prNumber).catch(() => null)) ?? + undefined + // 4.9 register task. If this throws, release the lock so the user can // retry — the remote CCR session is already created so we surface a // dedicated error code. + // + // After registration succeeds, swap the lock's taskId from the tentative + // teammate UUID (used to acquire the lock atomically before teleport) to + // the framework-assigned taskId. Without this swap, the framework's own + // cleanup path (clearActiveMonitor(frameworkTaskId) on natural completion) + // would no-op against a lock keyed by teammate.taskId, leaving the + // singleton lock dangling and blocking future /autofix-pr invocations. try { - registerRemoteAgentTask({ + const { taskId: frameworkTaskId } = registerRemoteAgentTask({ remoteTaskType: 'autofix-pr', session, command: `/autofix-pr ${prNumber}`, context, isLongRunning: true, - remoteTaskMetadata: { owner, repo, prNumber }, + remoteTaskMetadata: { owner, repo, prNumber, initialHeadSha }, }) + updateActiveMonitor({ taskId: frameworkTaskId }) } catch (regErr: unknown) { clearActiveMonitor(teammate.taskId) const regMsg = regErr instanceof Error ? regErr.message : String(regErr) diff --git a/src/commands/autofix-pr/monitorState.ts b/src/commands/autofix-pr/monitorState.ts index df74292f1..273f0a90c 100644 --- a/src/commands/autofix-pr/monitorState.ts +++ b/src/commands/autofix-pr/monitorState.ts @@ -46,6 +46,20 @@ export function clearActiveMonitor(taskId?: string): void { active = null } +/** + * Atomically merges partial updates into the active monitor. Returns true if + * applied, false if no active monitor. Used when the caller needs to swap the + * lock's taskId after the framework assigns a different one than the + * tentative one used to acquire the lock — without this the framework's + * cleanup (clearActiveMonitor with the framework taskId) would no-op against + * a lock keyed by the caller's tentative id. + */ +export function updateActiveMonitor(partial: Partial): boolean { + if (!active) return false + active = { ...active, ...partial } + return true +} + export function isMonitoring( owner: string, repo: string, diff --git a/src/commands/autofix-pr/prFetch.ts b/src/commands/autofix-pr/prFetch.ts new file mode 100644 index 000000000..63245c7ae --- /dev/null +++ b/src/commands/autofix-pr/prFetch.ts @@ -0,0 +1,155 @@ +// gh CLI integration for autofix-pr: fetches PR snapshots and feeds them +// through the pure decision matrix in prOutcomeCheck.ts. Kept separate so +// tests of the decision matrix never have to mock node:child_process — and +// tests of callAutofixPr can mock this module without polluting the pure +// decision matrix module (Bun mock.module is process-global). + +import { spawn } from 'node:child_process' +import { + type AutofixOutcomeProbeResult, + type PrViewPayload, + summariseAutofixOutcome, +} from './prOutcomeCheck.js' + +export interface AutofixOutcomeProbeInput { + owner: string + repo: string + prNumber: number + /** + * Head commit SHA captured at /autofix-pr launch. When this differs from + * the current head, autofix has pushed at least one commit. + */ + initialHeadSha?: string + /** + * Timeout for the gh CLI invocation. Caller is the framework's per-tick + * poller, so failures must be bounded — a hung gh process would stall + * the entire poll loop. + */ + timeoutMs?: number +} + +const DEFAULT_TIMEOUT_MS = 5_000 + +/** + * Fetch the PR's current head SHA, state, and CI rollup, and decide whether + * autofix has finished. Returns `{ completed: true, summary }` if so; + * otherwise `{ completed: false }`. Never throws. + */ +export async function checkPrAutofixOutcome( + input: AutofixOutcomeProbeInput, +): Promise { + const { owner, repo, prNumber, initialHeadSha, timeoutMs } = input + + let payload: PrViewPayload + try { + payload = await runGhPrView( + owner, + repo, + prNumber, + timeoutMs ?? DEFAULT_TIMEOUT_MS, + ) + } catch { + return { completed: false } + } + + return summariseAutofixOutcome(payload, { + owner, + repo, + prNumber, + initialHeadSha, + }) +} + +/** + * Resolve the PR's current head commit SHA. Used at /autofix-pr launch to + * capture a baseline; later compared against the live SHA to detect pushes. + * Returns null on any failure (network, missing gh, permissions) — the + * caller treats null as "no baseline" and falls back to terminal-state-only + * completion detection. + */ +export async function fetchPrHeadSha( + owner: string, + repo: string, + prNumber: number, + timeoutMs = DEFAULT_TIMEOUT_MS, +): Promise { + try { + const payload = await runGhPrView(owner, repo, prNumber, timeoutMs) + return payload.headRefOid || null + } catch { + return null + } +} + +interface SpawnError extends Error { + code?: string +} + +/** + * Spawn `gh pr view {n} --repo {owner}/{repo} --json ...` and parse the + * result. Rejects on non-zero exit, timeout, or JSON parse failure. + */ +function runGhPrView( + owner: string, + repo: string, + prNumber: number, + timeoutMs: number, +): Promise { + return new Promise((resolve, reject) => { + const proc = spawn( + 'gh', + [ + 'pr', + 'view', + String(prNumber), + '--repo', + `${owner}/${repo}`, + '--json', + 'headRefOid,state,statusCheckRollup', + ], + { stdio: ['ignore', 'pipe', 'pipe'] }, + ) + const stdoutChunks: Buffer[] = [] + const stderrChunks: Buffer[] = [] + let settled = false + + const timer = setTimeout(() => { + if (settled) return + settled = true + proc.kill('SIGKILL') + reject(new Error(`gh pr view timed out after ${timeoutMs}ms`)) + }, timeoutMs) + + proc.stdout.on('data', chunk => stdoutChunks.push(chunk as Buffer)) + proc.stderr.on('data', chunk => stderrChunks.push(chunk as Buffer)) + + proc.on('error', (err: SpawnError) => { + if (settled) return + settled = true + clearTimeout(timer) + reject(err) + }) + + proc.on('close', code => { + if (settled) return + settled = true + clearTimeout(timer) + if (code !== 0) { + const stderr = Buffer.concat(stderrChunks).toString('utf8').trim() + reject( + new Error(`gh pr view exited ${code}: ${stderr || ''}`), + ) + return + } + const stdout = Buffer.concat(stdoutChunks).toString('utf8').trim() + try { + const parsed = JSON.parse(stdout) as PrViewPayload + resolve(parsed) + } catch (e) { + reject( + new Error(`gh pr view JSON parse failed: ${(e as Error).message}`), + ) + } + }) + }) +} diff --git a/src/commands/autofix-pr/prOutcomeCheck.ts b/src/commands/autofix-pr/prOutcomeCheck.ts new file mode 100644 index 000000000..4d77ade4f --- /dev/null +++ b/src/commands/autofix-pr/prOutcomeCheck.ts @@ -0,0 +1,123 @@ +// Pure decision matrix for autofix-pr completion detection. +// +// Given a snapshot of the PR (state, head SHA, CI rollup) and a baseline +// head SHA captured at /autofix-pr launch, decide whether autofix has +// finished. No side effects — extracted from the gh CLI invocation in +// prFetch.ts so unit tests can exercise every branch without spawning +// subprocesses. + +export type AutofixOutcomeProbeResult = + | { completed: true; summary: string } + | { completed: false } + +export interface PrViewPayload { + headRefOid: string + state: 'OPEN' | 'CLOSED' | 'MERGED' + statusCheckRollup?: Array<{ + conclusion?: string | null + status?: string | null + name?: string + }> +} + +export interface AutofixOutcomeIdentity { + owner: string + repo: string + prNumber: number + /** + * Head commit SHA captured at /autofix-pr launch. When this differs from + * the current head, autofix has pushed at least one commit. Optional — + * absence means we can only finish on terminal PR states (merged/closed). + */ + initialHeadSha?: string +} + +/** + * Pure judgement of whether autofix has finished, given a PR snapshot and + * the baseline head SHA. Decision matrix: + * - MERGED → done (merged) + * - CLOSED (not merged) → done (closed without fix) + * - OPEN, no baseline → keep polling + * - OPEN, head unchanged → keep polling (agent hasn't pushed) + * - OPEN, head changed, CI pending → keep polling (wait for CI) + * - OPEN, head changed, CI failure → done (surface red so user can retry) + * - OPEN, head changed, CI success → done (clean fix) + */ +export function summariseAutofixOutcome( + payload: PrViewPayload, + identity: AutofixOutcomeIdentity, +): AutofixOutcomeProbeResult { + const { owner, repo, prNumber, initialHeadSha } = identity + + if (payload.state === 'MERGED') { + return { + completed: true, + summary: `${owner}/${repo}#${prNumber} merged. Autofix monitoring complete.`, + } + } + if (payload.state === 'CLOSED') { + return { + completed: true, + summary: `${owner}/${repo}#${prNumber} closed without merge. Autofix monitoring complete.`, + } + } + + if (!initialHeadSha) return { completed: false } + if (payload.headRefOid === initialHeadSha) return { completed: false } + + const ciState = summariseCiRollup(payload.statusCheckRollup) + if (ciState.state === 'pending') return { completed: false } + if (ciState.state === 'failure') { + return { + completed: true, + summary: `Autofix pushed commits to ${owner}/${repo}#${prNumber} but CI is failing (${ciState.detail}).`, + } + } + return { + completed: true, + summary: `Autofix pushed commits to ${owner}/${repo}#${prNumber}, CI green.`, + } +} + +interface CiSummary { + state: 'success' | 'pending' | 'failure' + detail: string +} + +function summariseCiRollup( + rollup: PrViewPayload['statusCheckRollup'], +): CiSummary { + if (!rollup || rollup.length === 0) { + // No checks configured on this repo — treat as success so completion + // can fire on push alone. PRs without CI are perfectly valid. + return { state: 'success', detail: 'no checks configured' } + } + let pending = 0 + let failed = 0 + const total = rollup.length + for (const check of rollup) { + const status = (check.status ?? '').toUpperCase() + const conclusion = (check.conclusion ?? '').toUpperCase() + if (status && status !== 'COMPLETED') { + pending++ + continue + } + if ( + conclusion === 'SUCCESS' || + conclusion === 'NEUTRAL' || + conclusion === 'SKIPPED' + ) { + continue + } + if (conclusion === '') { + pending++ + continue + } + failed++ + } + if (pending > 0) + return { state: 'pending', detail: `${pending}/${total} checks pending` } + if (failed > 0) + return { state: 'failure', detail: `${failed}/${total} checks failing` } + return { state: 'success', detail: `${total}/${total} checks passing` } +} diff --git a/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx b/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx index 641c1c461..1acffe20f 100644 --- a/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx +++ b/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx @@ -91,6 +91,14 @@ export type AutofixPrRemoteTaskMetadata = { owner: string; repo: string; prNumber: number; + /** + * PR head commit SHA captured at /autofix-pr launch. The completionChecker + * compares this against the live head to detect when the agent has pushed + * new commits. Optional because gh CLI may be unavailable at launch — in + * that case the checker falls back to terminal-state-only completion. + * Survives --resume via the session sidecar. + */ + initialHeadSha?: string; }; export type RemoteTaskMetadata = AutofixPrRemoteTaskMetadata; @@ -114,6 +122,71 @@ export function registerCompletionChecker(remoteTaskType: RemoteTaskType, checke completionCheckers.set(remoteTaskType, checker); } +/** + * Called after the task transitions to a terminal state and the notification + * has been enqueued. Used by command modules to release singleton locks, + * clear cached state, or perform other cleanup the framework cannot see. + * Hooks must be synchronous and best-effort — errors are logged but never + * propagate. + */ +export type RemoteTaskCompletionHook = (taskId: string, remoteTaskMetadata: RemoteTaskMetadata | undefined) => void; + +const completionHooks = new Map(); + +/** + * Inspect a completed remote task's accumulated log and return an XML fragment + * to inject inline into the completion task-notification. Returning null falls + * back to the framework's generic "task completed" notification (file-path + * pointer only). Used by command modules whose remote agents emit structured + * outcome tags the local model should read directly. + */ +export type RemoteTaskContentExtractor = (log: SDKMessage[]) => string | null; + +const contentExtractors = new Map(); + +/** + * Register a content extractor for a remote task type. Called once per + * completion in the generic completion branches (archived, completionChecker, + * result-driven). isRemoteReview tasks have their own bespoke path and skip + * extractors entirely. Errors propagate to the framework which logs and falls + * back to generic notification. + */ +export function registerContentExtractor(remoteTaskType: RemoteTaskType, extractor: RemoteTaskContentExtractor): void { + contentExtractors.set(remoteTaskType, extractor); +} + +function tryExtractRichContent(task: RemoteAgentTaskState, log: SDKMessage[]): string | null { + const extractor = contentExtractors.get(task.remoteTaskType); + if (!extractor) return null; + try { + return extractor(log); + } catch (e) { + logError(e); + return null; + } +} + +/** + * Register a completion hook for a remote task type. Invoked once after the + * task reaches a terminal state in any of the framework's completion branches + * (archived session, completionChecker, stableIdle, result). Use this to + * release command-module state (e.g. singleton locks) without forcing the + * framework to reverse-import from the command package. + */ +export function registerCompletionHook(remoteTaskType: RemoteTaskType, hook: RemoteTaskCompletionHook): void { + completionHooks.set(remoteTaskType, hook); +} + +function runCompletionHook(taskId: string, task: RemoteAgentTaskState): void { + const hook = completionHooks.get(task.remoteTaskType); + if (!hook) return; + try { + hook(taskId, task.remoteTaskMetadata); + } catch (e) { + logError(e); + } +} + /** * Persist a remote-agent metadata entry to the session sidecar. * Fire-and-forget — persistence failures must not block task registration. @@ -213,6 +286,41 @@ function enqueueRemoteNotification( enqueuePendingNotification({ value: message, mode: 'task-notification' }); } +/** + * Same as enqueueRemoteNotification but inlines a structured XML fragment + * (returned by a registered RemoteTaskContentExtractor) so the local model + * reads the remote agent's outcome directly instead of having to follow a + * file-path pointer. Mode is still 'task-notification' — the framing XML is + * the same, only the body differs. + */ +function enqueueRichRemoteNotification( + taskId: string, + title: string, + status: 'completed' | 'failed' | 'killed', + richContent: string, + setAppState: SetAppState, + toolUseId?: string, +): void { + if (!markTaskNotified(taskId, setAppState)) return; + + const statusText = status === 'completed' ? 'completed successfully' : status === 'failed' ? 'failed' : 'was stopped'; + const toolUseIdLine = toolUseId ? `\n<${TOOL_USE_ID_TAG}>${toolUseId}` : ''; + const outputPath = getTaskOutputPath(taskId); + + const message = `<${TASK_NOTIFICATION_TAG}> +<${TASK_ID_TAG}>${taskId}${toolUseIdLine} +<${TASK_TYPE_TAG}>remote_agent +<${OUTPUT_FILE_TAG}>${outputPath} +<${STATUS_TAG}>${status} +<${SUMMARY_TAG}>Remote task "${title}" ${statusText} + +The remote agent produced the following structured outcome. Summarize the key changes for the user: + +${richContent}`; + + enqueuePendingNotification({ value: message, mode: 'task-notification' }); +} + /** * Atomically mark a task as notified. Returns true if this call flipped the * flag (caller should enqueue), false if already notified (caller should skip). @@ -678,9 +786,22 @@ function startRemoteSessionPolling(taskId: string, context: TaskContext): () => updateTaskState(taskId, context.setAppState, t => t.status === 'running' ? { ...t, status: 'completed', endTime: Date.now() } : t, ); - enqueueRemoteNotification(taskId, task.title, 'completed', context.setAppState, task.toolUseId); + const richContent = tryExtractRichContent(task, accumulatedLog); + if (richContent) { + enqueueRichRemoteNotification( + taskId, + task.title, + 'completed', + richContent, + context.setAppState, + task.toolUseId, + ); + } else { + enqueueRemoteNotification(taskId, task.title, 'completed', context.setAppState, task.toolUseId); + } void evictTaskOutput(taskId); void removeRemoteAgentMetadata(taskId); + runCompletionHook(taskId, task); return; } @@ -691,9 +812,22 @@ function startRemoteSessionPolling(taskId: string, context: TaskContext): () => updateTaskState(taskId, context.setAppState, t => t.status === 'running' ? { ...t, status: 'completed', endTime: Date.now() } : t, ); - enqueueRemoteNotification(taskId, completionResult, 'completed', context.setAppState, task.toolUseId); + const richContent = tryExtractRichContent(task, accumulatedLog); + if (richContent) { + enqueueRichRemoteNotification( + taskId, + completionResult, + 'completed', + richContent, + context.setAppState, + task.toolUseId, + ); + } else { + enqueueRemoteNotification(taskId, completionResult, 'completed', context.setAppState, task.toolUseId); + } void evictTaskOutput(taskId); void removeRemoteAgentMetadata(taskId); + runCompletionHook(taskId, task); return; } } @@ -853,6 +987,7 @@ function startRemoteSessionPolling(taskId: string, context: TaskContext): () => enqueueRemoteReviewNotification(taskId, reviewContent, context.setAppState); void evictTaskOutput(taskId); void removeRemoteAgentMetadata(taskId); + runCompletionHook(taskId, task); return; // Stop polling } @@ -870,12 +1005,28 @@ function startRemoteSessionPolling(taskId: string, context: TaskContext): () => enqueueRemoteReviewFailureNotification(taskId, reason, context.setAppState); void evictTaskOutput(taskId); void removeRemoteAgentMetadata(taskId); + runCompletionHook(taskId, task); return; // Stop polling } - enqueueRemoteNotification(taskId, task.title, finalStatus, context.setAppState, task.toolUseId); + // finalStatus is 'completed' | 'failed' on this path — kill is a + // separate code path (RemoteAgentTask.kill) and never reaches here. + const richContent = tryExtractRichContent(task, accumulatedLog); + if (richContent) { + enqueueRichRemoteNotification( + taskId, + task.title, + finalStatus, + richContent, + context.setAppState, + task.toolUseId, + ); + } else { + enqueueRemoteNotification(taskId, task.title, finalStatus, context.setAppState, task.toolUseId); + } void evictTaskOutput(taskId); void removeRemoteAgentMetadata(taskId); + runCompletionHook(taskId, task); return; // Stop polling } } catch (error) {