diff --git a/src/browser/components/ChatPane/ChatPane.tsx b/src/browser/components/ChatPane/ChatPane.tsx index 8b31d89ffd..6f943dcfff 100644 --- a/src/browser/components/ChatPane/ChatPane.tsx +++ b/src/browser/components/ChatPane/ChatPane.tsx @@ -572,7 +572,8 @@ export const ChatPane: React.FC = (props) => { workspaceState.messages, workspaceState.pendingStreamStartTime, workspaceState.runtimeStatus, - workspaceState.lastAbortReason + workspaceState.lastAbortReason, + workspaceState.awaitingUserQuestion ) : null; @@ -860,7 +861,11 @@ export const ChatPane: React.FC = (props) => { /> )} {isAtCutoff && } - {shouldShowInterruptedBarrier(msg) && } + {shouldShowInterruptedBarrier( + msg, + deferredMessages, + workspaceState.awaitingUserQuestion + ) && } ); })} diff --git a/src/browser/utils/messages/StreamingMessageAggregator.status.test.ts b/src/browser/utils/messages/StreamingMessageAggregator.status.test.ts index f99d1f3b7d..ca28ee7658 100644 --- a/src/browser/utils/messages/StreamingMessageAggregator.status.test.ts +++ b/src/browser/utils/messages/StreamingMessageAggregator.status.test.ts @@ -93,6 +93,689 @@ describe("ask_user_question waiting state", () => { expect(aggregator.hasAwaitingUserQuestion()).toBe(true); }); + + it("keeps awaiting input when sibling input-available tools follow ask_user_question", () => { + const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z"); + + aggregator.loadHistoricalMessages([ + { + id: "assistant-1", + role: "assistant" as const, + parts: [ + { + type: "dynamic-tool" as const, + toolCallId: "call-ask-1", + toolName: "ask_user_question", + state: "input-available" as const, + input: { + questions: [ + { + header: "Approach", + question: "Which approach should we take?", + options: [ + { label: "A", description: "Approach A" }, + { label: "B", description: "Approach B" }, + ], + multiSelect: false, + }, + ], + }, + }, + { + type: "dynamic-tool" as const, + toolCallId: "call-todo-1", + toolName: "todo_write", + state: "input-available" as const, + input: { todos: [{ content: "Waiting for answers", status: "in_progress" }] }, + }, + ], + metadata: { + timestamp: 1000, + historySequence: 1, + partial: true, + }, + }, + ]); + + expect(aggregator.hasAwaitingUserQuestion()).toBe(true); + + const askRow = aggregator + .getDisplayedMessages() + .find((message) => message.type === "tool" && message.toolName === "ask_user_question"); + if (askRow?.type !== "tool") { + throw new Error("Expected ask_user_question tool row"); + } + expect(askRow.status).toBe("executing"); + + const todoRow = aggregator + .getDisplayedMessages() + .find((message) => message.type === "tool" && message.toolName === "todo_write"); + if (todoRow?.type !== "tool") { + throw new Error("Expected todo_write tool row"); + } + expect(todoRow.status).toBe("pending"); + }); + + it("keeps every pending ask_user_question row answerable in the same turn", () => { + const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z"); + + aggregator.loadHistoricalMessages([ + { + id: "assistant-1", + role: "assistant" as const, + parts: [ + { + type: "dynamic-tool" as const, + toolCallId: "call-ask-1", + toolName: "ask_user_question", + state: "input-available" as const, + input: { + questions: [ + { + header: "Approach", + question: "Which approach should we take first?", + options: [ + { label: "A", description: "Approach A" }, + { label: "B", description: "Approach B" }, + ], + multiSelect: false, + }, + ], + }, + }, + { + type: "dynamic-tool" as const, + toolCallId: "call-ask-2", + toolName: "ask_user_question", + state: "input-available" as const, + input: { + questions: [ + { + header: "Verification", + question: "Need anything else before we continue?", + options: [ + { label: "No", description: "Continue" }, + { label: "Yes", description: "Add more checks" }, + ], + multiSelect: false, + }, + ], + }, + }, + ], + metadata: { + timestamp: 1000, + historySequence: 1, + partial: true, + }, + }, + ]); + + expect(aggregator.hasAwaitingUserQuestion()).toBe(true); + + const askRows = aggregator + .getDisplayedMessages() + .filter((message) => message.type === "tool" && message.toolName === "ask_user_question"); + + expect(askRows).toHaveLength(2); + for (const askRow of askRows) { + if (askRow.type !== "tool") { + throw new Error("Expected ask_user_question tool row"); + } + expect(askRow.status).toBe("executing"); + } + }); + + it("clears awaiting input when completed tool output follows ask_user_question", () => { + const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z"); + + aggregator.loadHistoricalMessages([ + { + id: "assistant-1", + role: "assistant" as const, + parts: [ + { + type: "dynamic-tool" as const, + toolCallId: "call-ask-1", + toolName: "ask_user_question", + state: "input-available" as const, + input: { + questions: [ + { + header: "Approach", + question: "Which approach should we take?", + options: [ + { label: "A", description: "Approach A" }, + { label: "B", description: "Approach B" }, + ], + multiSelect: false, + }, + ], + }, + }, + { + type: "dynamic-tool" as const, + toolCallId: "call-todo-1", + toolName: "todo_write", + state: "output-available" as const, + input: { todos: [{ content: "Waiting for answers", status: "in_progress" }] }, + output: { success: true }, + }, + ], + metadata: { + timestamp: 1000, + historySequence: 1, + partial: true, + }, + }, + ]); + + expect(aggregator.hasAwaitingUserQuestion()).toBe(false); + + const askRow = aggregator + .getDisplayedMessages() + .find((message) => message.type === "tool" && message.toolName === "ask_user_question"); + if (askRow?.type !== "tool") { + throw new Error("Expected ask_user_question tool row"); + } + expect(askRow.status).toBe("executing"); + }); + + it("clears awaiting input when a later tool result fails after the question", () => { + const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z"); + + aggregator.loadHistoricalMessages([ + { + id: "assistant-1", + role: "assistant" as const, + parts: [ + { + type: "dynamic-tool" as const, + toolCallId: "call-ask-1", + toolName: "ask_user_question", + state: "input-available" as const, + input: { + questions: [ + { + header: "Approach", + question: "Which approach should we take?", + options: [ + { label: "A", description: "Approach A" }, + { label: "B", description: "Approach B" }, + ], + multiSelect: false, + }, + ], + }, + }, + { + type: "dynamic-tool" as const, + toolCallId: "call-todo-1", + toolName: "todo_write", + state: "output-available" as const, + input: { todos: [{ content: "Waiting for answers", status: "in_progress" }] }, + output: { success: false, error: "write failed" }, + }, + ], + metadata: { + timestamp: 1000, + historySequence: 1, + partial: true, + }, + }, + ]); + + expect(aggregator.hasAwaitingUserQuestion()).toBe(false); + + const askRow = aggregator + .getDisplayedMessages() + .find((message) => message.type === "tool" && message.toolName === "ask_user_question"); + if (askRow?.type !== "tool") { + throw new Error("Expected ask_user_question tool row"); + } + expect(askRow.status).toBe("executing"); + }); + + it("clears awaiting input when a later failed redacted tool follows the question", () => { + const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z"); + + aggregator.loadHistoricalMessages([ + { + id: "assistant-1", + role: "assistant" as const, + parts: [ + { + type: "dynamic-tool" as const, + toolCallId: "call-ask-1", + toolName: "ask_user_question", + state: "input-available" as const, + input: { + questions: [ + { + header: "Approach", + question: "Which approach should we take?", + options: [ + { label: "A", description: "Approach A" }, + { label: "B", description: "Approach B" }, + ], + multiSelect: false, + }, + ], + }, + }, + { + type: "dynamic-tool" as const, + toolCallId: "call-bash-1", + toolName: "bash", + state: "output-redacted" as const, + input: { script: "exit 1" }, + failed: true, + }, + ], + metadata: { + timestamp: 1000, + historySequence: 1, + partial: true, + }, + }, + ]); + + expect(aggregator.hasAwaitingUserQuestion()).toBe(false); + + const askRow = aggregator + .getDisplayedMessages() + .find((message) => message.type === "tool" && message.toolName === "ask_user_question"); + if (askRow?.type !== "tool") { + throw new Error("Expected ask_user_question tool row"); + } + expect(askRow.status).toBe("executing"); + }); + + it("clears awaiting input when a later partial text segment follows the question", () => { + const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z"); + + aggregator.loadHistoricalMessages([ + { + id: "assistant-1", + role: "assistant" as const, + parts: [ + { + type: "dynamic-tool" as const, + toolCallId: "call-ask-1", + toolName: "ask_user_question", + state: "input-available" as const, + input: { + questions: [ + { + header: "Approach", + question: "Which approach should we take?", + options: [ + { label: "A", description: "Approach A" }, + { label: "B", description: "Approach B" }, + ], + multiSelect: false, + }, + ], + }, + }, + { type: "text" as const, text: "Continuing with unrelated output..." }, + ], + metadata: { + timestamp: 1000, + historySequence: 1, + partial: true, + }, + }, + ]); + + expect(aggregator.hasAwaitingUserQuestion()).toBe(false); + + const askRow = aggregator + .getDisplayedMessages() + .find((message) => message.type === "tool" && message.toolName === "ask_user_question"); + if (askRow?.type !== "tool") { + throw new Error("Expected ask_user_question tool row"); + } + expect(askRow.status).toBe("executing"); + }); + it("does not treat older question turns as awaiting after chat moves on", () => { + const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z"); + + aggregator.loadHistoricalMessages([ + { + id: "assistant-1", + role: "assistant" as const, + parts: [ + { + type: "dynamic-tool" as const, + toolCallId: "call-ask-1", + toolName: "ask_user_question", + state: "input-available" as const, + input: { + questions: [ + { + header: "Approach", + question: "Which approach should we take?", + options: [ + { label: "A", description: "Approach A" }, + { label: "B", description: "Approach B" }, + ], + multiSelect: false, + }, + ], + }, + }, + ], + metadata: { + timestamp: 1000, + historySequence: 1, + partial: true, + }, + }, + { + id: "user-2", + role: "user" as const, + parts: [{ type: "text" as const, text: "Skipping this and moving on" }], + metadata: { + timestamp: 2000, + historySequence: 2, + }, + }, + ]); + + expect(aggregator.hasAwaitingUserQuestion()).toBe(false); + }); + + it("does not pin ask_user_question rows from older turns during truncation", () => { + const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z"); + + const trailingToolParts = Array.from({ length: 80 }, (_, index) => ({ + type: "dynamic-tool" as const, + toolCallId: `call-todo-${index}`, + toolName: "todo_write", + state: "output-available" as const, + input: { todos: [{ content: `Task ${index}`, status: "in_progress" }] }, + output: { success: true }, + })); + + aggregator.loadHistoricalMessages([ + { + id: "assistant-1", + role: "assistant" as const, + parts: [ + { + type: "dynamic-tool" as const, + toolCallId: "call-ask-1", + toolName: "ask_user_question", + state: "input-available" as const, + input: { + questions: [ + { + header: "Approach", + question: "Which approach should we take?", + options: [ + { label: "A", description: "Approach A" }, + { label: "B", description: "Approach B" }, + ], + multiSelect: false, + }, + ], + }, + }, + ], + metadata: { + timestamp: 1000, + historySequence: 1, + partial: true, + }, + }, + { + id: "user-2", + role: "user" as const, + parts: [{ type: "text" as const, text: "Move on" }], + metadata: { + timestamp: 2000, + historySequence: 2, + }, + }, + { + id: "assistant-3", + role: "assistant" as const, + parts: trailingToolParts, + metadata: { + timestamp: 3000, + historySequence: 3, + partial: true, + }, + }, + ]); + + const displayed = aggregator.getDisplayedMessages(); + expect( + displayed.some( + (message) => message.type === "tool" && message.toolName === "ask_user_question" + ) + ).toBe(false); + expect(aggregator.hasAwaitingUserQuestion()).toBe(false); + }); + + it("keeps awaiting input and keeps ask_user_question visible with pending sibling tools", () => { + const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z"); + + const trailingToolParts = Array.from({ length: 80 }, (_, index) => ({ + type: "dynamic-tool" as const, + toolCallId: `call-todo-${index}`, + toolName: "todo_write", + state: "input-available" as const, + input: { todos: [{ content: `Task ${index}`, status: "in_progress" }] }, + })); + + aggregator.loadHistoricalMessages([ + { + id: "assistant-1", + role: "assistant" as const, + parts: [ + { + type: "dynamic-tool" as const, + toolCallId: "call-ask-1", + toolName: "ask_user_question", + state: "input-available" as const, + input: { + questions: [ + { + header: "Approach", + question: "Which approach should we take?", + options: [ + { label: "A", description: "Approach A" }, + { label: "B", description: "Approach B" }, + ], + multiSelect: false, + }, + ], + }, + }, + ...trailingToolParts, + ], + metadata: { + timestamp: 1000, + historySequence: 1, + partial: true, + }, + }, + ]); + + const displayed = aggregator.getDisplayedMessages(); + const askRow = displayed.find( + (message) => message.type === "tool" && message.toolName === "ask_user_question" + ); + + expect(askRow).toBeDefined(); + if (askRow?.type !== "tool") { + throw new Error("Expected ask_user_question tool row"); + } + expect(askRow.status).toBe("executing"); + expect(aggregator.hasAwaitingUserQuestion()).toBe(true); + }); + + it("clears awaiting input when truncation keeps ask_user_question visible behind resolved tool tails", () => { + const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z"); + + const trailingToolParts = Array.from({ length: 80 }, (_, index) => ({ + type: "dynamic-tool" as const, + toolCallId: `call-todo-${index}`, + toolName: "todo_write", + state: "output-available" as const, + input: { todos: [{ content: `Task ${index}`, status: "in_progress" }] }, + output: { success: true }, + })); + + aggregator.loadHistoricalMessages([ + { + id: "assistant-1", + role: "assistant" as const, + parts: [ + { + type: "dynamic-tool" as const, + toolCallId: "call-ask-1", + toolName: "ask_user_question", + state: "input-available" as const, + input: { + questions: [ + { + header: "Approach", + question: "Which approach should we take?", + options: [ + { label: "A", description: "Approach A" }, + { label: "B", description: "Approach B" }, + ], + multiSelect: false, + }, + ], + }, + }, + ...trailingToolParts, + ], + metadata: { + timestamp: 1000, + historySequence: 1, + partial: true, + }, + }, + ]); + + const displayed = aggregator.getDisplayedMessages(); + const askRow = displayed.find( + (message) => message.type === "tool" && message.toolName === "ask_user_question" + ); + + expect(askRow).toBeDefined(); + if (askRow?.type !== "tool") { + throw new Error("Expected ask_user_question tool row"); + } + expect(askRow.status).toBe("executing"); + expect(aggregator.hasAwaitingUserQuestion()).toBe(false); + }); + + it("does not report awaiting input when latest assistant turn has stream error metadata", () => { + const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z"); + + aggregator.loadHistoricalMessages([ + { + id: "assistant-1", + role: "assistant" as const, + parts: [ + { + type: "dynamic-tool" as const, + toolCallId: "call-ask-1", + toolName: "ask_user_question", + state: "input-available" as const, + input: { + questions: [ + { + header: "Approach", + question: "Which approach should we take?", + options: [ + { label: "A", description: "Approach A" }, + { label: "B", description: "Approach B" }, + ], + multiSelect: false, + }, + ], + }, + }, + ], + metadata: { + timestamp: 1000, + historySequence: 1, + partial: true, + error: "Connection dropped", + errorType: "network", + }, + }, + ]); + + expect(aggregator.hasAwaitingUserQuestion()).toBe(false); + + const askRow = aggregator + .getDisplayedMessages() + .find((message) => message.type === "tool" && message.toolName === "ask_user_question"); + if (askRow?.type !== "tool") { + throw new Error("Expected ask_user_question tool row"); + } + expect(askRow.status).toBe("executing"); + }); + + it("ignores plan-display rows when inferring awaiting input", () => { + const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z"); + + aggregator.loadHistoricalMessages([ + { + id: "assistant-1", + role: "assistant" as const, + parts: [ + { + type: "dynamic-tool" as const, + toolCallId: "call-ask-1", + toolName: "ask_user_question", + state: "input-available" as const, + input: { + questions: [ + { + header: "Approach", + question: "Which approach should we take?", + options: [ + { label: "A", description: "Approach A" }, + { label: "B", description: "Approach B" }, + ], + multiSelect: false, + }, + ], + }, + }, + ], + metadata: { + timestamp: 1000, + historySequence: 1, + partial: true, + }, + }, + { + id: "plan-display-1", + role: "assistant" as const, + parts: [{ type: "text" as const, text: "# Plan\n\n- Draft" }], + metadata: { + timestamp: 2000, + historySequence: Number.MAX_SAFE_INTEGER, + muxMetadata: { + type: "plan-display", + path: "/tmp/plan.md", + }, + }, + }, + ]); + + expect(aggregator.hasAwaitingUserQuestion()).toBe(true); + }); }); describe("StreamingMessageAggregator - Agent Status", () => { diff --git a/src/browser/utils/messages/StreamingMessageAggregator.ts b/src/browser/utils/messages/StreamingMessageAggregator.ts index 75785a453d..4e825bb533 100644 --- a/src/browser/utils/messages/StreamingMessageAggregator.ts +++ b/src/browser/utils/messages/StreamingMessageAggregator.ts @@ -168,6 +168,180 @@ function hasFailureResult(result: unknown): boolean { return false; } +interface AskUserQuestionResolutionOptions { + suppressForMessageError: boolean; + suppressForLaterToolPart: boolean; + suppressForLaterTextOrReasoning: boolean; +} + +/** + * Returns the toolCallId of the latest ask_user_question in this assistant turn + * that should remain answerable in the UI, or null when it should be treated as + * an interruption/retry tail. + */ +function resolveAskUserQuestionToolCallId( + message: MuxMessage, + options: AskUserQuestionResolutionOptions +): string | null { + if (message.role !== "assistant") { + return null; + } + + if (options.suppressForMessageError && message.metadata?.error != null) { + // Error metadata means this turn ended in failure; surface retry/error state + // instead of presenting the turn as awaiting user input. + return null; + } + + let latestPendingQuestionIndex = -1; + let latestPendingQuestionToolCallId: string | null = null; + for (let partIndex = 0; partIndex < message.parts.length; partIndex++) { + const part = message.parts[partIndex]; + if ( + isDynamicToolPart(part) && + part.toolName === "ask_user_question" && + part.state === "input-available" + ) { + latestPendingQuestionIndex = partIndex; + latestPendingQuestionToolCallId = part.toolCallId; + } + } + + if (latestPendingQuestionIndex === -1 || latestPendingQuestionToolCallId === null) { + return null; + } + + // Provider-planned sibling tools can remain input-available after the question; + // keep ask_user_question answerable in that case. Only resolved tool output + // tails should suppress the awaiting-input workspace signal. + const hasLaterResolvedToolPart = message.parts.some( + (part, partIndex) => + partIndex > latestPendingQuestionIndex && + isDynamicToolPart(part) && + (part.state === "output-available" || part.state === "output-redacted") + ); + if (options.suppressForLaterToolPart && hasLaterResolvedToolPart) { + return null; + } + + if (options.suppressForLaterTextOrReasoning && message.metadata?.partial === true) { + const hasLaterTextOrReasoning = message.parts.some((part, partIndex) => { + if (partIndex <= latestPendingQuestionIndex) { + return false; + } + + return ( + (part.type === "text" && part.text.length > 0) || + (part.type === "reasoning" && part.text.length > 0) + ); + }); + + if (hasLaterTextOrReasoning) { + return null; + } + } + + return latestPendingQuestionToolCallId; +} + +/** + * Awaiting-input workspace state should clear when later failed tools appear so + * retry/interruption affordances can be shown. + */ +function getAwaitingAskUserQuestionToolCallId(message: MuxMessage): string | null { + return resolveAskUserQuestionToolCallId(message, { + suppressForMessageError: true, + suppressForLaterToolPart: true, + suppressForLaterTextOrReasoning: true, + }); +} + +/** + * Keep every pending ask_user_question row answerable after restart, even when + * later partial output exists. answerAskUserQuestion resolves by toolCallId, + * so each still-pending tool call must remain executable in replayed UI. + */ +function getAnswerableAskUserQuestionToolCallIds(message: MuxMessage): Set { + const answerableToolCallIds = new Set(); + + if (message.role !== "assistant") { + return answerableToolCallIds; + } + + for (const part of message.parts) { + if ( + isDynamicToolPart(part) && + part.toolName === "ask_user_question" && + part.state === "input-available" + ) { + answerableToolCallIds.add(part.toolCallId); + } + } + + return answerableToolCallIds; +} + +function getInputAvailableToolCallIdsBlockedByAwaitingQuestion(message: MuxMessage): Set { + const blockedToolCallIds = new Set(); + + if (message.role !== "assistant") { + return blockedToolCallIds; + } + + const awaitingToolCallId = getAwaitingAskUserQuestionToolCallId(message); + if (awaitingToolCallId === null) { + return blockedToolCallIds; + } + + let hasReachedAwaitingQuestion = false; + for (const part of message.parts) { + if (!hasReachedAwaitingQuestion) { + hasReachedAwaitingQuestion = + isDynamicToolPart(part) && + part.toolName === "ask_user_question" && + part.state === "input-available" && + part.toolCallId === awaitingToolCallId; + continue; + } + + if ( + isDynamicToolPart(part) && + part.state === "input-available" && + part.toolName !== "ask_user_question" + ) { + blockedToolCallIds.add(part.toolCallId); + } + } + + return blockedToolCallIds; +} + +function getLatestAnswerableAskUserQuestionMessageId( + allMessages: MuxMessage[], + showSyntheticMessages: boolean +): string | null { + for (let i = allMessages.length - 1; i >= 0; i--) { + const message = allMessages[i]; + const isSynthetic = message.metadata?.synthetic === true; + const isUiVisibleSynthetic = message.metadata?.uiVisible === true; + if (isSynthetic && !showSyntheticMessages && !isUiVisibleSynthetic) { + continue; + } + + if (message.metadata?.muxMetadata?.type === "plan-display") { + continue; + } + + if (message.role !== "assistant") { + return null; + } + + return getAnswerableAskUserQuestionToolCallIds(message).size > 0 ? message.id : null; + } + + return null; +} + function resolveRouteProvider( routeProvider: string | undefined, routedThroughGateway: boolean | undefined @@ -716,19 +890,35 @@ export class StreamingMessageAggregator { * Used to show "Awaiting your input" instead of "streaming..." in the UI. */ hasAwaitingUserQuestion(): boolean { - // Only treat the workspace as "awaiting input" when the *latest* displayed - // message is an executing ask_user_question tool. - // - // This avoids false positives from stale historical partials if the user - // continued the chat after skipping/canceling the questions. - const displayed = this.getDisplayedMessages(); - const last = displayed[displayed.length - 1]; + const showSyntheticMessages = + typeof window !== "undefined" && window.api?.debugLlmRequest === true; - if (last?.type !== "tool") { - return false; + // Start from untruncated history so we identify the latest assistant turn + // even when recent transcript rows include structural markers. + const allMessages = this.getAllMessages(); + + for (let i = allMessages.length - 1; i >= 0; i--) { + const message = allMessages[i]; + const isSynthetic = message.metadata?.synthetic === true; + const isUiVisibleSynthetic = message.metadata?.uiVisible === true; + if (isSynthetic && !showSyntheticMessages && !isUiVisibleSynthetic) { + continue; + } + + // Ignore ephemeral /plan transcript rows when determining whether the + // underlying assistant turn is still waiting for ask_user_question input. + if (message.metadata?.muxMetadata?.type === "plan-display") { + continue; + } + + if (message.role !== "assistant") { + return false; + } + + return getAwaitingAskUserQuestionToolCallId(message) !== null; } - return last.toolName === "ask_user_question" && last.status === "executing"; + return false; } /** @@ -2636,6 +2826,10 @@ export class StreamingMessageAggregator { // Merge adjacent text/reasoning parts for display const mergedParts = mergeAdjacentParts(message.parts); + const answerableAskUserQuestionToolCallIds = getAnswerableAskUserQuestionToolCallIds(message); + const inputAvailableToolCallIdsBlockedByAwaitingQuestion = + getInputAvailableToolCallIdsBlockedByAwaitingQuestion(message); + // Find the last part that will produce a DisplayedMessage // (reasoning, text parts with content, OR tool parts) let lastPartIndex = -1; @@ -2720,7 +2914,16 @@ export class StreamingMessageAggregator { // so after restart we should keep it answerable ("executing") instead of // showing retry/auto-resume UX. if (part.toolName === "ask_user_question") { - status = "executing"; + status = answerableAskUserQuestionToolCallIds.has(part.toolCallId) + ? "executing" + : isPartial + ? "interrupted" + : "executing"; + } else if ( + isPartial && + inputAvailableToolCallIdsBlockedByAwaitingQuestion.has(part.toolCallId) + ) { + status = "pending"; } else if (isPartial) { status = "interrupted"; } else { @@ -2843,6 +3046,11 @@ export class StreamingMessageAggregator { const showSyntheticMessages = typeof window !== "undefined" && window.api?.debugLlmRequest === true; + const latestAnswerableAskUserQuestionMessageId = getLatestAnswerableAskUserQuestionMessageId( + allMessages, + showSyntheticMessages + ); + // Synthetic agent-skill snapshot messages are hidden from the transcript unless // debugLlmRequest is enabled. We still want to surface their content in the UI by // attaching the resolved snapshot (frontmatterYaml + body) to the *subsequent* @@ -2908,10 +3116,23 @@ export class StreamingMessageAggregator { // and materialize omission runs as explicit history-hidden marker rows. // Full history is still maintained internally for token counting. if (!this.showAllMessages && displayedMessages.length > MAX_DISPLAYED_MESSAGES) { + const alwaysKeepMessageIds = new Set( + displayedMessages + .filter( + (message) => + message.type === "tool" && + message.toolName === "ask_user_question" && + message.status === "executing" && + message.historyId === latestAnswerableAskUserQuestionMessageId + ) + .map((message) => message.id) + ); + const truncationPlan = buildTranscriptTruncationPlan({ displayedMessages, maxDisplayedMessages: MAX_DISPLAYED_MESSAGES, alwaysKeepMessageTypes: ALWAYS_KEEP_MESSAGE_TYPES, + alwaysKeepMessageIds, }); resultMessages = diff --git a/src/browser/utils/messages/messageUtils.test.ts b/src/browser/utils/messages/messageUtils.test.ts index adb754c52e..8ae1c6fb56 100644 --- a/src/browser/utils/messages/messageUtils.test.ts +++ b/src/browser/utils/messages/messageUtils.test.ts @@ -28,6 +28,115 @@ describe("shouldShowInterruptedBarrier", () => { expect(shouldShowInterruptedBarrier(msg)).toBe(false); }); + it("returns false for executing ask_user_question when the turn also has trailing stream-error", () => { + const askRow: DisplayedMessage = { + type: "tool", + id: "tool-ask", + historyId: "assistant-1", + toolName: "ask_user_question", + toolCallId: "call-ask", + args: { questions: [] }, + status: "executing", + isPartial: true, + historySequence: 2, + streamSequence: 0, + isLastPartOfMessage: true, + }; + + const streamError: DisplayedMessage = { + type: "stream-error", + id: "stream-error-1", + historyId: "assistant-1", + error: "Connection dropped", + errorType: "network", + historySequence: 2, + }; + + expect(shouldShowInterruptedBarrier(askRow, [askRow, streamError])).toBe(false); + }); + + it("returns true for trailing partial rows when fallback inference sees later unfinished output", () => { + const questionTool: DisplayedMessage = { + type: "tool", + id: "tool-ask", + historyId: "assistant-1", + toolName: "ask_user_question", + toolCallId: "call-ask", + args: { questions: [] }, + status: "executing", + isPartial: true, + historySequence: 2, + streamSequence: 0, + isLastPartOfMessage: false, + }; + + const trailingPartialText: DisplayedMessage = { + type: "assistant", + id: "assistant-tail", + historyId: "assistant-1", + content: "Please answer above.", + historySequence: 2, + streamSequence: 1, + isStreaming: false, + isPartial: true, + isLastPartOfMessage: true, + isCompacted: false, + isIdleCompacted: false, + }; + + const messages = [questionTool, trailingPartialText]; + + expect(shouldShowInterruptedBarrier(trailingPartialText, messages)).toBe(true); + }); + + it("returns false when authoritative awaitingUserQuestion flag is true", () => { + const trailingPartialText: DisplayedMessage = { + type: "assistant", + id: "assistant-tail", + historyId: "assistant-1", + content: "Please answer above.", + historySequence: 2, + streamSequence: 1, + isStreaming: false, + isPartial: true, + isLastPartOfMessage: true, + isCompacted: false, + isIdleCompacted: false, + }; + + expect(shouldShowInterruptedBarrier(trailingPartialText, [trailingPartialText], true)).toBe( + false + ); + }); + + it("ignores trailing plan-display rows when awaitingUserQuestion is true", () => { + const trailingPartialText: DisplayedMessage = { + type: "assistant", + id: "assistant-tail", + historyId: "assistant-1", + content: "Please answer above.", + historySequence: 2, + streamSequence: 1, + isStreaming: false, + isPartial: true, + isLastPartOfMessage: true, + isCompacted: false, + isIdleCompacted: false, + }; + + const planDisplay: DisplayedMessage = { + type: "plan-display", + id: "plan-display-1", + historyId: "plan-display-1", + content: "# Plan", + path: "/tmp/plan.md", + historySequence: Number.MAX_SAFE_INTEGER, + }; + + expect( + shouldShowInterruptedBarrier(trailingPartialText, [trailingPartialText, planDisplay], true) + ).toBe(false); + }); it("returns false for decorative compaction boundary rows", () => { const msg: DisplayedMessage = { type: "compaction-boundary", diff --git a/src/browser/utils/messages/messageUtils.ts b/src/browser/utils/messages/messageUtils.ts index 2495d7ae3e..e07a11d499 100644 --- a/src/browser/utils/messages/messageUtils.ts +++ b/src/browser/utils/messages/messageUtils.ts @@ -1,6 +1,10 @@ import type { DisplayedMessage } from "@/common/types/message"; import { formatReviewForModel } from "@/common/types/review"; import type { BashOutputToolArgs } from "@/common/types/tools"; +import { + getLastNonDecorativeMessage, + hasExecutingAskUserQuestionInLatestTurn, +} from "@/common/utils/messages/retryEligibility"; /** * Returns the text that should be placed into the ChatInput when editing a user message. @@ -72,7 +76,11 @@ export interface BashOutputGroupInfo { * - Message was interrupted (isPartial) AND not currently streaming * - For multi-part messages, only show on the last part */ -export function shouldShowInterruptedBarrier(msg: DisplayedMessage): boolean { +export function shouldShowInterruptedBarrier( + msg: DisplayedMessage, + allMessages: DisplayedMessage[] = [msg], + awaitingUserQuestion = false +): boolean { if ( msg.type === "user" || msg.type === "stream-error" || @@ -83,9 +91,53 @@ export function shouldShowInterruptedBarrier(msg: DisplayedMessage): boolean { ) return false; + const lastMessage = (() => { + const latest = getLastNonDecorativeMessage(allMessages); + if (latest?.type !== "plan-display") { + return latest; + } + + // /plan previews are ephemeral transcript rows and should not redefine the + // latest actionable assistant turn for interruption UI. + for (let i = allMessages.length - 1; i >= 0; i--) { + const candidate = allMessages[i]; + if ( + candidate.type === "plan-display" || + candidate.type === "history-hidden" || + candidate.type === "workspace-init" || + candidate.type === "compaction-boundary" + ) { + continue; + } + return candidate; + } + + return undefined; + })(); + + const isLatestTurnRow = + lastMessage != null && + "historyId" in msg && + "historyId" in lastMessage && + msg.historyId === lastMessage.historyId; + // ask_user_question is intentionally a "waiting for input" state. Even if the - // underlying message is a persisted partial (e.g. after app restart), we keep - // it answerable instead of showing "Interrupted". + // question row is truncated in the displayed transcript, the authoritative + // awaitingUserQuestion workspace state should still suppress interrupted UI for + // the latest turn. + if (isLatestTurnRow && awaitingUserQuestion) { + return false; + } + + // Fallback for callers that don't provide the authoritative awaiting flag: + // infer from displayed rows only. + if (isLatestTurnRow && hasExecutingAskUserQuestionInLatestTurn(allMessages)) { + return false; + } + + // Keep executing ask_user_question rows free of interruption markers even when + // the same turn has a trailing stream-error row; those questions remain + // answerable and should not show contradictory "interrupted" affordances. if (msg.type === "tool" && msg.toolName === "ask_user_question" && msg.status === "executing") { return false; } diff --git a/src/browser/utils/messages/transcriptTruncationPlan.test.ts b/src/browser/utils/messages/transcriptTruncationPlan.test.ts index 3facf942a2..4e40484d1b 100644 --- a/src/browser/utils/messages/transcriptTruncationPlan.test.ts +++ b/src/browser/utils/messages/transcriptTruncationPlan.test.ts @@ -133,6 +133,28 @@ describe("buildTranscriptTruncationPlan", () => { expect(plan.rows[trailingMarkerIndex + 1]?.id).toBe("a2"); }); + test("preserves explicitly pinned message IDs in truncated history", () => { + const displayedMessages: DisplayedMessage[] = [ + user("u0", 0), + assistant("a0", 1), + user("u1", 2), + assistant("a1", 3), + tool("tool-1", 4), + assistant("a2", 5), + user("u2", 6), + assistant("a3", 7), + ]; + + const plan = buildTranscriptTruncationPlan({ + displayedMessages, + maxDisplayedMessages: 3, + alwaysKeepMessageTypes: ALWAYS_KEEP_MESSAGE_TYPES, + alwaysKeepMessageIds: new Set(["a1"]), + }); + + expect(plan.rows.some((message) => message.id === "a1")).toBe(true); + }); + test("caps omission markers by merging older runs", () => { const displayedMessages: DisplayedMessage[] = []; for (let i = 0; i < 20; i++) { diff --git a/src/browser/utils/messages/transcriptTruncationPlan.ts b/src/browser/utils/messages/transcriptTruncationPlan.ts index a5e59dc8cd..5ba5246ab0 100644 --- a/src/browser/utils/messages/transcriptTruncationPlan.ts +++ b/src/browser/utils/messages/transcriptTruncationPlan.ts @@ -30,6 +30,7 @@ export interface BuildTranscriptTruncationPlanArgs { displayedMessages: DisplayedMessage[]; maxDisplayedMessages: number; alwaysKeepMessageTypes: Set; + alwaysKeepMessageIds?: Set; maxHiddenSegments?: number; } @@ -47,7 +48,8 @@ interface OmissionRunState { function collectOmissions( oldMessages: DisplayedMessage[], - alwaysKeepMessageTypes: Set + alwaysKeepMessageTypes: Set, + alwaysKeepMessageIds: Set ): CollectedOmissions { const keptOldMessages: DisplayedMessage[] = []; const segments: OmissionSegment[] = []; @@ -55,7 +57,7 @@ function collectOmissions( let activeRun: OmissionRunState | null = null; for (const message of oldMessages) { - if (alwaysKeepMessageTypes.has(message.type)) { + if (alwaysKeepMessageTypes.has(message.type) || alwaysKeepMessageIds.has(message.id)) { if (activeRun !== null) { segments.push(activeRun); activeRun = null; @@ -192,7 +194,11 @@ export function buildTranscriptTruncationPlan( const recentMessages = args.displayedMessages.slice(-args.maxDisplayedMessages); const oldMessages = args.displayedMessages.slice(0, -args.maxDisplayedMessages); - const omissionCollection = collectOmissions(oldMessages, args.alwaysKeepMessageTypes); + const omissionCollection = collectOmissions( + oldMessages, + args.alwaysKeepMessageTypes, + args.alwaysKeepMessageIds ?? new Set() + ); if (omissionCollection.hiddenCount === 0) { return { rows: [...omissionCollection.keptOldMessages, ...recentMessages], diff --git a/src/common/utils/messages/retryEligibility.test.ts b/src/common/utils/messages/retryEligibility.test.ts index a391280bcc..3378acdcc9 100644 --- a/src/common/utils/messages/retryEligibility.test.ts +++ b/src/common/utils/messages/retryEligibility.test.ts @@ -31,7 +31,6 @@ describe("getLastNonDecorativeMessage", () => { const lastMessage = getLastNonDecorativeMessage(messages); expect(lastMessage?.id).toBe("error-1"); }); - it("returns undefined when all rows are decorative", () => { const messages: DisplayedMessage[] = [ { @@ -170,6 +169,208 @@ describe("hasInterruptedStream", () => { expect(hasInterruptedStream(messages)).toBe(false); }); + + it("returns true when later unfinished parts follow executing ask_user_question", () => { + const messages: DisplayedMessage[] = [ + { + type: "user", + id: "user-1", + historyId: "user-1", + content: "Hello", + historySequence: 1, + }, + { + type: "tool", + id: "tool-ask", + historyId: "assistant-1", + toolName: "ask_user_question", + toolCallId: "call-ask", + args: { questions: [] }, + status: "executing", + isPartial: true, + historySequence: 2, + streamSequence: 0, + isLastPartOfMessage: false, + }, + { + type: "tool", + id: "tool-todo", + historyId: "assistant-1", + toolName: "todo_write", + toolCallId: "call-todo", + args: { todos: [] }, + status: "completed", + isPartial: true, + historySequence: 2, + streamSequence: 1, + isLastPartOfMessage: false, + }, + { + type: "assistant", + id: "assistant-tail", + historyId: "assistant-1", + content: "Please answer above.", + historySequence: 2, + streamSequence: 2, + isStreaming: false, + isPartial: true, + isLastPartOfMessage: true, + isCompacted: false, + isIdleCompacted: false, + }, + ]; + + expect(hasInterruptedStream(messages)).toBe(true); + }); + + it("returns true when a failed tool follows executing ask_user_question", () => { + const messages: DisplayedMessage[] = [ + { + type: "tool", + id: "tool-ask", + historyId: "assistant-1", + toolName: "ask_user_question", + toolCallId: "call-ask", + args: { questions: [] }, + status: "executing", + isPartial: true, + historySequence: 2, + streamSequence: 0, + isLastPartOfMessage: false, + }, + { + type: "tool", + id: "tool-todo", + historyId: "assistant-1", + toolName: "todo_write", + toolCallId: "call-todo", + args: { todos: [] }, + result: { success: false, error: "write failed" }, + status: "failed", + isPartial: true, + historySequence: 2, + streamSequence: 1, + isLastPartOfMessage: true, + }, + ]; + + expect(hasInterruptedStream(messages)).toBe(true); + }); + + it("returns true when a redacted tool follows executing ask_user_question", () => { + const messages: DisplayedMessage[] = [ + { + type: "tool", + id: "tool-ask", + historyId: "assistant-1", + toolName: "ask_user_question", + toolCallId: "call-ask", + args: { questions: [] }, + status: "executing", + isPartial: true, + historySequence: 2, + streamSequence: 0, + isLastPartOfMessage: false, + }, + { + type: "tool", + id: "tool-bash", + historyId: "assistant-1", + toolName: "bash", + toolCallId: "call-bash", + args: { script: "echo hi" }, + status: "redacted", + isPartial: true, + historySequence: 2, + streamSequence: 1, + isLastPartOfMessage: true, + }, + ]; + + expect(hasInterruptedStream(messages)).toBe(true); + }); + + it("returns true when completed tool output follows ask_user_question", () => { + const messages: DisplayedMessage[] = [ + { + type: "tool", + id: "tool-ask", + historyId: "assistant-1", + toolName: "ask_user_question", + toolCallId: "call-ask", + args: { questions: [] }, + status: "executing", + isPartial: true, + historySequence: 2, + streamSequence: 0, + isLastPartOfMessage: false, + }, + { + type: "tool", + id: "tool-todo", + historyId: "assistant-1", + toolName: "todo_write", + toolCallId: "call-todo", + args: { todos: [] }, + result: { success: true }, + status: "completed", + isPartial: true, + historySequence: 2, + streamSequence: 1, + isLastPartOfMessage: true, + }, + ]; + + expect(hasInterruptedStream(messages)).toBe(true); + }); + it("returns true when latest row is stream-error even if turn includes executing ask_user_question", () => { + const messages: DisplayedMessage[] = [ + { + type: "tool", + id: "tool-ask", + historyId: "assistant-1", + toolName: "ask_user_question", + toolCallId: "call-ask", + args: { questions: [] }, + status: "executing", + isPartial: true, + historySequence: 2, + streamSequence: 0, + isLastPartOfMessage: false, + }, + { + type: "stream-error", + id: "assistant-1-error", + historyId: "assistant-1", + error: "Connection dropped", + errorType: "network", + historySequence: 2, + }, + ]; + + expect(hasInterruptedStream(messages)).toBe(true); + }); + + it("returns false when authoritative awaitingUserQuestion flag is true", () => { + const messages: DisplayedMessage[] = [ + { + type: "assistant", + id: "assistant-tail", + historyId: "assistant-1", + content: "Please answer above.", + historySequence: 2, + streamSequence: 0, + isStreaming: false, + isPartial: true, + isLastPartOfMessage: true, + isCompacted: false, + isIdleCompacted: false, + }, + ]; + + expect(hasInterruptedStream(messages, null, null, null, true)).toBe(false); + expect(isEligibleForAutoRetry(messages, null, null, null, true)).toBe(false); + }); it("returns true for partial tool message", () => { const messages: DisplayedMessage[] = [ { diff --git a/src/common/utils/messages/retryEligibility.ts b/src/common/utils/messages/retryEligibility.ts index bb1ac38e99..139f796f8e 100644 --- a/src/common/utils/messages/retryEligibility.ts +++ b/src/common/utils/messages/retryEligibility.ts @@ -114,6 +114,105 @@ export function getLastNonDecorativeMessage( return undefined; } +/** + * Check whether the latest non-decorative turn is intentionally waiting on + * ask_user_question input. + * + * We scope this to the latest historyId turn so stale historical questions do + * not suppress interruption/retry UI once conversation has moved on. + */ +export function hasExecutingAskUserQuestionInLatestTurn(messages: DisplayedMessage[]): boolean { + const lastMessage = (() => { + const latest = getLastNonDecorativeMessage(messages); + if (latest?.type !== "plan-display") { + return latest; + } + + // /plan previews are ephemeral transcript rows and should not redefine the + // latest actionable turn when inferring pending ask_user_question state. + for (let i = messages.length - 1; i >= 0; i--) { + const candidate = messages[i]; + if ( + candidate.type === "plan-display" || + candidate.type === "history-hidden" || + candidate.type === "workspace-init" || + candidate.type === "compaction-boundary" + ) { + continue; + } + return candidate; + } + + return undefined; + })(); + + if (!lastMessage || !("historyId" in lastMessage)) { + return false; + } + + // If the latest visible row is a stream error, preserve interruption/retry UX. + if (lastMessage.type === "stream-error") { + return false; + } + + const latestHistoryId = lastMessage.historyId; + const isLatestTurnProgressRow = (message: DisplayedMessage): boolean => { + switch (message.type) { + case "tool": + return ( + message.status === "executing" || + message.status === "pending" || + message.status === "interrupted" || + message.status === "failed" || + message.status === "completed" || + message.status === "redacted" + ); + case "assistant": + case "reasoning": + return message.isPartial === true; + default: + return false; + } + }; + + for (let i = messages.length - 1; i >= 0; i--) { + const message = messages[i]; + if ( + message.type === "plan-display" || + message.type === "history-hidden" || + message.type === "workspace-init" || + message.type === "compaction-boundary" + ) { + continue; + } + + if (!("historyId" in message)) { + continue; + } + + if (message.historyId !== latestHistoryId) { + break; + } + + // Latest error should keep retry/interruption affordances visible. + if (message.type === "stream-error") { + return false; + } + + if (!isLatestTurnProgressRow(message)) { + continue; + } + + return ( + message.type === "tool" && + message.toolName === "ask_user_question" && + message.status === "executing" + ); + } + + return false; +} + /** * Check if messages contain an interrupted stream * @@ -132,7 +231,8 @@ export function getLastNonDecorativeMessage( function computeHasInterruptedStream( messages: DisplayedMessage[], pendingStreamStartTime: number | null = null, - runtimeStatus: RuntimeStatusEvent | null = null + runtimeStatus: RuntimeStatusEvent | null = null, + awaitingUserQuestion = false ): boolean { if (messages.length === 0) return false; @@ -166,16 +266,20 @@ function computeHasInterruptedStream( return false; } + // WorkspaceStore derives awaitingUserQuestion from untruncated history. When + // provided, trust that authoritative signal so display truncation cannot + // re-enable interruption/retry UI for intentionally pending questions. + if (awaitingUserQuestion) { + return false; + } + // ask_user_question is a special case: an unfinished tool call represents an // intentional "waiting for user input" state, not a stream interruption. // - // Treating it as interrupted causes RetryBarrier + auto-resume to fire on app - // restart, which re-runs the LLM call and re-asks the questions. - if ( - lastMessage.type === "tool" && - lastMessage.toolName === "ask_user_question" && - lastMessage.status === "executing" - ) { + // We suppress interruption/retry for the entire latest turn when it contains + // an executing ask_user_question call, including cases where later parts in + // the same turn were emitted after the question. + if (hasExecutingAskUserQuestionInLatestTurn(messages)) { return false; } @@ -201,12 +305,14 @@ export function getInterruptionContext( messages: DisplayedMessage[], pendingStreamStartTime: number | null = null, runtimeStatus: RuntimeStatusEvent | null = null, - lastAbortReason: StreamAbortReasonSnapshot | null = null + lastAbortReason: StreamAbortReasonSnapshot | null = null, + awaitingUserQuestion = false ): InterruptionContext { const hasInterrupted = computeHasInterruptedStream( messages, pendingStreamStartTime, - runtimeStatus + runtimeStatus, + awaitingUserQuestion ); if (!hasInterrupted) { @@ -243,10 +349,16 @@ export function hasInterruptedStream( messages: DisplayedMessage[], pendingStreamStartTime: number | null = null, runtimeStatus: RuntimeStatusEvent | null = null, - lastAbortReason: StreamAbortReasonSnapshot | null = null + lastAbortReason: StreamAbortReasonSnapshot | null = null, + awaitingUserQuestion = false ): boolean { - return getInterruptionContext(messages, pendingStreamStartTime, runtimeStatus, lastAbortReason) - .hasInterruptedStream; + return getInterruptionContext( + messages, + pendingStreamStartTime, + runtimeStatus, + lastAbortReason, + awaitingUserQuestion + ).hasInterruptedStream; } /** @@ -264,8 +376,14 @@ export function isEligibleForAutoRetry( messages: DisplayedMessage[], pendingStreamStartTime: number | null = null, runtimeStatus: RuntimeStatusEvent | null = null, - lastAbortReason: StreamAbortReasonSnapshot | null = null + lastAbortReason: StreamAbortReasonSnapshot | null = null, + awaitingUserQuestion = false ): boolean { - return getInterruptionContext(messages, pendingStreamStartTime, runtimeStatus, lastAbortReason) - .isEligibleForAutoRetry; + return getInterruptionContext( + messages, + pendingStreamStartTime, + runtimeStatus, + lastAbortReason, + awaitingUserQuestion + ).isEligibleForAutoRetry; } diff --git a/src/node/services/agentSession.startupAutoRetry.test.ts b/src/node/services/agentSession.startupAutoRetry.test.ts index dbe13b44b7..0b16269fe3 100644 --- a/src/node/services/agentSession.startupAutoRetry.test.ts +++ b/src/node/services/agentSession.startupAutoRetry.test.ts @@ -1018,4 +1018,290 @@ describe("AgentSession startup auto-retry recovery", () => { session.dispose(); }); + + test("schedules startup auto-retry when pending ask_user_question turn has error metadata", async () => { + const workspaceId = "startup-retry-ask-user-with-error"; + const { session, historyService, events, cleanup } = await createSessionBundle(workspaceId); + cleanups.push(cleanup); + + const writePartialResult = await historyService.writePartial( + workspaceId, + createMuxMessage( + "assistant-1", + "assistant", + "", + { + timestamp: Date.now(), + model: "anthropic:claude-sonnet-4-5", + partial: true, + agentId: "exec", + error: "Connection dropped", + errorType: "network", + }, + [ + { + type: "dynamic-tool", + state: "input-available", + toolCallId: "tool-ask", + toolName: "ask_user_question", + input: { question: "Name?" }, + }, + ] + ) + ); + expect(writePartialResult.success).toBe(true); + + const startupRetryModelHint = await session.getStartupAutoRetryModelHint(); + expect(startupRetryModelHint).toBe("anthropic:claude-sonnet-4-5"); + + session.ensureStartupAutoRetryCheck(); + + const startupCheckPromise = ( + session as unknown as { startupAutoRetryCheckPromise: Promise | null } + ).startupAutoRetryCheckPromise; + await startupCheckPromise; + + expect(events.some((event) => event.type === "auto-retry-scheduled")).toBe(true); + + session.dispose(); + }); + + test("schedules startup auto-retry when a failed tool follows ask_user_question", async () => { + const workspaceId = "startup-retry-ask-user-failed-tail"; + const { session, historyService, events, cleanup } = await createSessionBundle(workspaceId); + cleanups.push(cleanup); + + const appendUserResult = await historyService.appendToHistory( + workspaceId, + createMuxMessage("user-1", "user", "Hello", { + timestamp: Date.now(), + }) + ); + expect(appendUserResult.success).toBe(true); + + const writePartialResult = await historyService.writePartial( + workspaceId, + createMuxMessage( + "assistant-1", + "assistant", + "", + { + timestamp: Date.now(), + model: "anthropic:claude-sonnet-4-5", + partial: true, + agentId: "exec", + }, + [ + { + type: "dynamic-tool", + state: "input-available", + toolCallId: "tool-ask", + toolName: "ask_user_question", + input: { question: "Name?" }, + }, + { + type: "dynamic-tool", + state: "output-available", + toolCallId: "tool-todo", + toolName: "todo_write", + input: { todos: [] }, + output: { success: false, error: "write failed" }, + }, + ] + ) + ); + expect(writePartialResult.success).toBe(true); + + const startupRetryModelHint = await session.getStartupAutoRetryModelHint(); + expect(startupRetryModelHint).toBe("anthropic:claude-sonnet-4-5"); + + session.ensureStartupAutoRetryCheck(); + + const startupCheckPromise = ( + session as unknown as { startupAutoRetryCheckPromise: Promise | null } + ).startupAutoRetryCheckPromise; + await startupCheckPromise; + + expect(events.some((event) => event.type === "auto-retry-scheduled")).toBe(true); + + session.dispose(); + }); + + test("does not schedule startup auto-retry when sibling input-available tool follows ask_user_question", async () => { + const workspaceId = "startup-retry-ask-user-sibling-pending"; + const { session, historyService, events, cleanup } = await createSessionBundle(workspaceId); + cleanups.push(cleanup); + + const appendUserResult = await historyService.appendToHistory( + workspaceId, + createMuxMessage("user-1", "user", "Hello", { + timestamp: Date.now(), + }) + ); + expect(appendUserResult.success).toBe(true); + + const writePartialResult = await historyService.writePartial( + workspaceId, + createMuxMessage( + "assistant-1", + "assistant", + "", + { + timestamp: Date.now(), + model: "anthropic:claude-sonnet-4-5", + partial: true, + agentId: "exec", + }, + [ + { + type: "dynamic-tool", + state: "input-available", + toolCallId: "tool-ask", + toolName: "ask_user_question", + input: { question: "Name?" }, + }, + { + type: "dynamic-tool", + state: "input-available", + toolCallId: "tool-todo", + toolName: "todo_write", + input: { todos: [] }, + }, + ] + ) + ); + expect(writePartialResult.success).toBe(true); + + const startupRetryModelHint = await session.getStartupAutoRetryModelHint(); + expect(startupRetryModelHint).toBeNull(); + + session.ensureStartupAutoRetryCheck(); + + const startupCheckPromise = ( + session as unknown as { startupAutoRetryCheckPromise: Promise | null } + ).startupAutoRetryCheckPromise; + await startupCheckPromise; + + expect(events.some((event) => event.type === "auto-retry-scheduled")).toBe(false); + + session.dispose(); + }); + + test("schedules startup auto-retry when completed tool output follows ask_user_question", async () => { + const workspaceId = "startup-retry-ask-user-completed-tail"; + const { session, historyService, events, cleanup } = await createSessionBundle(workspaceId); + cleanups.push(cleanup); + + const appendUserResult = await historyService.appendToHistory( + workspaceId, + createMuxMessage("user-1", "user", "Hello", { + timestamp: Date.now(), + }) + ); + expect(appendUserResult.success).toBe(true); + + const writePartialResult = await historyService.writePartial( + workspaceId, + createMuxMessage( + "assistant-1", + "assistant", + "", + { + timestamp: Date.now(), + model: "anthropic:claude-sonnet-4-5", + partial: true, + agentId: "exec", + }, + [ + { + type: "dynamic-tool", + state: "input-available", + toolCallId: "tool-ask", + toolName: "ask_user_question", + input: { question: "Name?" }, + }, + { + type: "dynamic-tool", + state: "output-available", + toolCallId: "tool-todo", + toolName: "todo_write", + input: { todos: [] }, + output: { success: true }, + }, + ] + ) + ); + expect(writePartialResult.success).toBe(true); + + const startupRetryModelHint = await session.getStartupAutoRetryModelHint(); + expect(startupRetryModelHint).toBe("anthropic:claude-sonnet-4-5"); + + session.ensureStartupAutoRetryCheck(); + + const startupCheckPromise = ( + session as unknown as { startupAutoRetryCheckPromise: Promise | null } + ).startupAutoRetryCheckPromise; + await startupCheckPromise; + + expect(events.some((event) => event.type === "auto-retry-scheduled")).toBe(true); + + session.dispose(); + }); + + test("schedules startup auto-retry when text follows ask_user_question", async () => { + const workspaceId = "startup-retry-ask-user-text-tail"; + const { session, historyService, events, cleanup } = await createSessionBundle(workspaceId); + cleanups.push(cleanup); + + const appendUserResult = await historyService.appendToHistory( + workspaceId, + createMuxMessage("user-1", "user", "Hello", { + timestamp: Date.now(), + }) + ); + expect(appendUserResult.success).toBe(true); + + const writePartialResult = await historyService.writePartial( + workspaceId, + createMuxMessage( + "assistant-1", + "assistant", + "", + { + timestamp: Date.now(), + model: "anthropic:claude-sonnet-4-5", + partial: true, + agentId: "exec", + }, + [ + { + type: "dynamic-tool", + state: "input-available", + toolCallId: "tool-ask", + toolName: "ask_user_question", + input: { question: "Name?" }, + }, + { + type: "text", + text: "Continuing with interrupted output", + }, + ] + ) + ); + expect(writePartialResult.success).toBe(true); + + const startupRetryModelHint = await session.getStartupAutoRetryModelHint(); + expect(startupRetryModelHint).toBe("anthropic:claude-sonnet-4-5"); + + session.ensureStartupAutoRetryCheck(); + + const startupCheckPromise = ( + session as unknown as { startupAutoRetryCheckPromise: Promise | null } + ).startupAutoRetryCheckPromise; + await startupCheckPromise; + + expect(events.some((event) => event.type === "auto-retry-scheduled")).toBe(true); + + session.dispose(); + }); }); diff --git a/src/node/services/agentSession.ts b/src/node/services/agentSession.ts index 2e8e5d6f78..f07681bf92 100644 --- a/src/node/services/agentSession.ts +++ b/src/node/services/agentSession.ts @@ -943,12 +943,54 @@ export class AgentSession { return false; } - return message.parts.some( - (part) => + if (message.metadata?.error != null) { + return false; + } + + let latestPendingQuestionIndex = -1; + for (let partIndex = 0; partIndex < message.parts.length; partIndex += 1) { + const part = message.parts[partIndex]; + if ( part.type === "dynamic-tool" && part.toolName === "ask_user_question" && part.state === "input-available" + ) { + latestPendingQuestionIndex = partIndex; + } + } + + if (latestPendingQuestionIndex === -1) { + return false; + } + + const hasLaterResolvedToolPart = message.parts.some( + (part, partIndex) => + partIndex > latestPendingQuestionIndex && + part.type === "dynamic-tool" && + (part.state === "output-available" || part.state === "output-redacted") ); + if (hasLaterResolvedToolPart) { + return false; + } + + if (message.metadata?.partial === true) { + const hasLaterTextOrReasoning = message.parts.some((part, partIndex) => { + if (partIndex <= latestPendingQuestionIndex) { + return false; + } + + return ( + (part.type === "text" && part.text.length > 0) || + (part.type === "reasoning" && part.text.length > 0) + ); + }); + + if (hasLaterTextOrReasoning) { + return false; + } + } + + return true; } private isSyntheticSnapshotUserMessage(message: MuxMessage): boolean {