From 351c772ec0aaf951ea8acc9b21ea7d0132f11edd Mon Sep 17 00:00:00 2001 From: Ammar Date: Thu, 19 Mar 2026 09:56:13 -0500 Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20refactor:=20remove=20plan=20suba?= =?UTF-8?q?gent=20auto-handoff?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove plan-mode subagent auto-handoff after propose_plan, drop the executor-routing settings and backend router, reject plan-like task creation, and update related docs/stories/tests. --- _Generated with `mux` • Model: `openai:gpt-5.4` • Thinking: `high` • Cost: `$2.78`_ --- docs/AGENTS.md | 11 +- docs/agents/index.mdx | 26 +- .../components/icons/EmojiIcon/EmojiIcon.tsx | 1 - .../Settings/Sections/TasksSection.tsx | 41 -- .../ProposePlanToolCall.stories.tsx | 83 --- src/common/config/schemas/appConfigOnDisk.ts | 4 +- src/common/config/schemas/taskSettings.ts | 6 - src/common/constants/planAutoRoutingStatus.ts | 4 - src/common/types/tasks.test.ts | 30 +- src/common/types/tasks.ts | 49 +- src/node/builtinAgents/orchestrator.md | 24 +- src/node/builtinAgents/plan.md | 2 +- .../builtInAgentContent.generated.ts | 4 +- .../builtInSkillContent.generated.ts | 37 +- src/node/services/planExecutorRouter.test.ts | 107 --- src/node/services/planExecutorRouter.ts | 147 ---- src/node/services/taskService.test.ts | 635 +++++------------ src/node/services/taskService.ts | 652 ++++-------------- .../ipc/streaming/sendMessage.context.test.ts | 132 ++-- 19 files changed, 444 insertions(+), 1551 deletions(-) delete mode 100644 src/common/constants/planAutoRoutingStatus.ts delete mode 100644 src/node/services/planExecutorRouter.test.ts delete mode 100644 src/node/services/planExecutorRouter.ts diff --git a/docs/AGENTS.md b/docs/AGENTS.md index 835e971cc0..946024c7e9 100644 --- a/docs/AGENTS.md +++ b/docs/AGENTS.md @@ -59,6 +59,7 @@ description: Agent instructions for AI assistants working on the Mux codebase Use `agent-browser` for web automation. Run `agent-browser --help` for all commands. Core workflow: + 1. `agent-browser open ` - Navigate to page 2. `agent-browser snapshot -i` - Get interactive elements with refs (@e1, @e2) 3. `agent-browser click @e1` / `fill @e2 "text"` - Interact using refs @@ -68,8 +69,8 @@ Core workflow: - If a PR has Codex review comments, address + resolve them, then re-request review by commenting `@codex review` on the PR. - Prefer `gh` CLI for GitHub interactions over manual web/curl flows. -- In Orchestrator mode, delegate implementation/verification commands to `exec` or `explore` sub-agents and integrate their patches; do not bypass delegation with direct local edits. -- In Orchestrator mode, route higher-complexity implementation tasks to `plan` sub-agents so they can research and produce a precise plan before auto-handoff to implementation. +- When delegation is required by the active mode, use `exec` or `explore` sub-agents as directed and integrate their patches; do not bypass delegation with direct local edits. +- Keep implementation tasks on `exec` sub-agents; use a top-level plan workspace when you need a separate planning phase before delegation. - User preference: when work is already on an open PR, push branch updates at the end of each completed change set so the PR stays current. - **PR creation gate:** Do **not** open/create a pull request unless the user explicitly asks (e.g., "open a PR", "create PR", "submit this"). By default, complete local validation, commit/push branch updates as requested, and let the user review before deciding whether to open a PR. @@ -81,11 +82,11 @@ Core workflow: When a PR exists, you MUST remain in this loop until the PR is fully ready: 1. Push your latest fixes. -2. Run local validation (`make static-check` and targeted tests as needed); in Orchestrator mode, delegate command execution to sub-agents. +2. Run local validation (`make static-check` and targeted tests as needed); delegate command execution to sub-agents when the active mode requires it. 3. Request review with `@codex review`. 4. Run `./scripts/wait_pr_ready.sh ` (which must execute `./scripts/wait_pr_checks.sh --once` while checks are pending). -5. If Codex leaves comments, address them (delegate fixes in Orchestrator mode), resolve threads with `./scripts/resolve_pr_comment.sh `, push, and repeat. -6. If checks/mergeability fail, fix issues locally (delegate fixes in Orchestrator mode), push, and repeat. +5. If Codex leaves comments, address them (delegating fixes when required by the active mode), resolve threads with `./scripts/resolve_pr_comment.sh `, push, and repeat. +6. If checks/mergeability fail, fix issues locally (delegating fixes when required by the active mode), push, and repeat. The only early-stop exception is when the reviewer is clearly misunderstanding the intended change and further churn would be counterproductive. In that case, leave a clarifying PR comment and pause for human direction. diff --git a/docs/agents/index.mdx b/docs/agents/index.mdx index c8f25d66db..df3714e24a 100644 --- a/docs/agents/index.mdx +++ b/docs/agents/index.mdx @@ -436,7 +436,7 @@ When a plan is present (default): - Treat the accepted plan as the source of truth. Its file paths, symbols, and structure were validated during planning — do not routinely spawn `explore` to re-confirm them. Exception: if the plan references stale paths or appears to have been authored/edited by the user without planner validation, a single targeted `explore` to sanity-check critical paths is acceptable. - Spawning `explore` to gather _additional_ context beyond what the plan provides is encouraged (e.g., checking whether a helper already exists, locating test files not mentioned in the plan, discovering existing patterns to match). This produces better implementation task briefs. - Do not spawn `explore` just to verify that a planner-generated plan is correct — that is the planner's job, and the plan was accepted by the user. -- Convert the plan into concrete implementation subtasks and start delegation (`exec` for low complexity, `plan` for higher complexity). +- Convert the plan into concrete implementation subtasks and start delegation with `exec` sub-agents. What you are allowed to do directly in this workspace: @@ -452,8 +452,8 @@ Hard rules (delegate-first): - Trust `explore` sub-agent reports as authoritative for repo facts (paths/symbols/callsites). Do not redo the same investigation yourself; only re-check if the report is ambiguous or contradicts other evidence. - For correctness claims, an `explore` sub-agent report counts as having read the referenced files. - **Do not do broad repo investigation here.** If you need context, spawn an `explore` sub-agent with a narrow prompt (keeps this agent focused on coordination). -- **Do not implement features/bugfixes directly here.** Spawn `exec` (simple) or `plan` (complex) sub-agents and have them complete the work end-to-end. -- **Do not use `bash` for file reads/writes, manual code editing, or broad repo exploration.** `bash` in this workspace is for orchestration-only operations: `git`/`gh` repo management, targeted post-apply verification checks, and waiting for PR review/CI outcomes. If direct checks fail due to code issues, delegate fixes to `exec`/`plan` sub-agents instead of implementing changes here. +- **Do not implement features/bugfixes directly here.** Spawn `exec` sub-agents and have them complete the work end-to-end. +- **Do not use `bash` for file reads/writes, manual code editing, or broad repo exploration.** `bash` in this workspace is for orchestration-only operations: `git`/`gh` repo management, targeted post-apply verification checks, and waiting for PR review/CI outcomes. If direct checks fail due to code issues, delegate fixes to `exec` sub-agents instead of implementing changes here. - **Never read or scan session storage.** This includes `~/.mux/sessions/**` and `~/.mux/sessions/subagent-patches/**`. Treat session storage as an internal implementation detail; do not shell out to locate patch artifacts on disk. Only use `task_apply_git_patch` to access patches. Delegation guide: @@ -474,12 +474,10 @@ Delegation guide: Trust Explore reports as authoritative; do not re-verify unless ambiguous/contradictory. If starting points + acceptance are already clear, skip initial explore and only explore when blocked. - Create one or more git commits before `agent_report`. -- Use `plan` for higher-complexity subtasks that touch multiple files/locations, require non-trivial investigation, or have an unclear implementation approach. - - Default to `plan` when a subtask needs coordinated updates across multiple locations, unless the edits are mechanical and already fully specified. - - For higher-complexity implementation work, prefer `plan` over `exec` so the sub-agent can do targeted research and produce a precise plan before implementation begins. +- Use `exec` for implementation subtasks, including higher-complexity work. + - For higher-complexity work, do a small amount of parent-side framing first so the `exec` brief includes the goal, constraints, sequencing, and key files. - Good fit: multi-file refactors, cross-module behavior changes, unfamiliar subsystems, or work where sequencing/dependencies need discovery. - - Plan subtasks automatically hand off to implementation after a successful `propose_plan`; expect the usual task completion output once implementation finishes. - - For `plan` briefs, prioritize goal + constraints + acceptance criteria over file-by-file diff instructions. + - If the implementation approach is still unclear after targeted exploration, switch to a top-level plan workspace before continuing delegation instead of spawning a plan sub-agent. - Use `desktop` for GUI-heavy desktop automation that requires repeated screenshot → act → verify loops (for example, interacting with application windows, clicking through UI flows, or visual verification). The desktop agent enforces a grounding discipline that keeps visual context local. Recommended Orchestrator → Exec task brief template: @@ -505,7 +503,7 @@ Recommended Orchestrator → Exec task brief template: If starting points + acceptance are already clear, skip initial explore and only explore when blocked. - Create one or more git commits before `agent_report`. -Dependency analysis (required before spawning implementation tasks — `exec` or `plan`): +Dependency analysis (required before spawning implementation tasks): - For each candidate subtask, write: - Outputs: files/targets/artifacts introduced/renamed/generated @@ -526,9 +524,9 @@ Example dependency chain (schema download → generation): Patch integration loop (default): 1. Identify a batch of independent subtasks. -2. Spawn one implementation sub-agent task per subtask with `run_in_background: true` (`exec` for low complexity, `plan` for higher complexity). +2. Spawn one `exec` implementation sub-agent task per subtask with `run_in_background: true`. 3. Await the batch via `task_await`. -4. For each successful implementation task (`exec` directly, or `plan` after auto-handoff to implementation), integrate patches one at a time: +4. For each successful implementation task, integrate patches one at a time: - Treat every successful child task with a `taskId` as pending patch integration, whether the completion arrived inline from `task` or later from `task_await`. - Complete each dry-run + real-apply pair before starting the next patch. Applying one patch changes `HEAD`, which can invalidate later dry-run results. - Dry-run apply: `task_apply_git_patch` with `dry_run: true`. @@ -544,11 +542,11 @@ Patch integration loop (default): - Run focused verification directly with `bash` when practical (for example: targeted tests or the repo's standard full-validation command), or delegate verification to `explore`/`exec` when investigation/fixes are likely. - Use `git`/`gh` directly for PR orchestration when a PR already exists (pushes, review-request comments, replies to review remarks, and CI/check-status waiting loops). Create a new PR only when the user explicitly asks. - PASS: summary-only (no long logs). - - FAIL: include the failing command + key error lines; then delegate a fix to `exec`/`plan` and re-verify. + - FAIL: include the failing command + key error lines; then delegate a fix to `exec` and re-verify. Sequential protocol (only for dependency chains): -1. Spawn the prerequisite implementation task (`exec` or `plan`, based on complexity) with `run_in_background: false`. +1. Spawn the prerequisite implementation task with `agentId: "exec"` and `run_in_background: false`. 2. If step 1 returns `queued`/`running` without a completed report, call `task_await` with the returned `taskId` before attempting any patch apply. If step 1 returns `status: completed` inline, that same `taskId` still requires patch application. 3. Dry-run apply its patch (`dry_run: true`); then apply for real (`dry_run: false`). If either step fails, follow the conflict playbook above (including `git am --abort` only when a real apply leaves a git-am session in progress). 4. Only after the patch is applied, spawn the dependent implementation task. @@ -579,7 +577,7 @@ description: Create a plan before coding ui: color: var(--color-plan-mode) subagent: - runnable: true + runnable: false tools: add: # Allow all tools by default (includes MCP tools which have dynamic names) diff --git a/src/browser/components/icons/EmojiIcon/EmojiIcon.tsx b/src/browser/components/icons/EmojiIcon/EmojiIcon.tsx index b9da8229ea..6d5488f5ee 100644 --- a/src/browser/components/icons/EmojiIcon/EmojiIcon.tsx +++ b/src/browser/components/icons/EmojiIcon/EmojiIcon.tsx @@ -48,7 +48,6 @@ const EMOJI_TO_ICON: Record = { "🔗": Link, "🔄": RefreshCw, "🧪": Beaker, - // Used by auto-handoff routing status while selecting the executor. "🤔": CircleHelp, // Directions diff --git a/src/browser/features/Settings/Sections/TasksSection.tsx b/src/browser/features/Settings/Sections/TasksSection.tsx index fbd691197e..cde49e8b6d 100644 --- a/src/browser/features/Settings/Sections/TasksSection.tsx +++ b/src/browser/features/Settings/Sections/TasksSection.tsx @@ -34,9 +34,7 @@ import { import { DEFAULT_TASK_SETTINGS, TASK_SETTINGS_LIMITS, - isPlanSubagentExecutorRouting, normalizeTaskSettings, - type PlanSubagentExecutorRouting, type TaskSettings, } from "@/common/types/tasks"; import { getThinkingOptionLabel, type ThinkingLevel } from "@/common/types/thinking"; @@ -173,8 +171,6 @@ function areTaskSettingsEqual(a: TaskSettings, b: TaskSettings): boolean { a.maxParallelAgentTasks === b.maxParallelAgentTasks && a.maxTaskNestingDepth === b.maxTaskNestingDepth && a.proposePlanImplementReplacesChatHistory === b.proposePlanImplementReplacesChatHistory && - a.planSubagentExecutorRouting === b.planSubagentExecutorRouting && - a.planSubagentDefaultsToOrchestrator === b.planSubagentDefaultsToOrchestrator && a.bashOutputCompactionMinLines === b.bashOutputCompactionMinLines && a.bashOutputCompactionMinTotalBytes === b.bashOutputCompactionMinTotalBytes && a.bashOutputCompactionMaxKeptLines === b.bashOutputCompactionMaxKeptLines && @@ -499,25 +495,10 @@ export function TasksSection() { ); }; - const setPlanSubagentExecutorRouting = (value: string) => { - if (!isPlanSubagentExecutorRouting(value)) { - return; - } - - setTaskSettings((prev) => - normalizeTaskSettings({ - ...prev, - planSubagentExecutorRouting: value, - }) - ); - }; const setNewWorkspaceDefaultAgentId = (agentId: string) => { setGlobalDefaultAgentIdRaw(coerceAgentId(agentId)); }; - const planSubagentExecutorRouting: PlanSubagentExecutorRouting = - taskSettings.planSubagentExecutorRouting ?? "exec"; - const setAgentModel = (agentId: string, value: string) => { setAgentAiDefaults((prev) => updateAgentDefaultEntry(prev, agentId, (updated) => { @@ -917,28 +898,6 @@ export function TasksSection() { aria-label="Toggle plan Implement replaces conversation with plan" /> - -
-
-
Plan sub-agents: executor routing
-
- Choose how plan sub-agent tasks route after propose_plan. -
-
- -
{saveError ?
{saveError}
: null} diff --git a/src/browser/features/Tools/ProposePlan/ProposePlanToolCall.stories.tsx b/src/browser/features/Tools/ProposePlan/ProposePlanToolCall.stories.tsx index 643877b558..b210f5ea8e 100644 --- a/src/browser/features/Tools/ProposePlan/ProposePlanToolCall.stories.tsx +++ b/src/browser/features/Tools/ProposePlan/ProposePlanToolCall.stories.tsx @@ -6,12 +6,7 @@ import { createUserMessage, createAssistantMessage, createProposePlanTool, - createStatusTool, } from "@/browser/stories/mockFactory"; -import { - PLAN_AUTO_ROUTING_STATUS_EMOJI, - PLAN_AUTO_ROUTING_STATUS_MESSAGE, -} from "@/common/constants/planAutoRoutingStatus"; const meta = { ...appMeta, title: "App/Chat/Tools/ProposePlan" }; export default meta; @@ -167,84 +162,6 @@ graph TD }, }; -/** - * Captures the handoff pause after a plan is presented and before the executor stream starts. - * - * This reproduces the visual state where the sidebar shows "Deciding execution strategy…" - * while the proposed plan remains visible in the conversation. - */ -export const ProposePlanAutoRoutingDecisionGap: AppStory = { - render: () => ( - - setupSimpleChatStory({ - workspaceId: "ws-plan-auto-routing-gap", - workspaceName: "feature/plan-auto-routing", - messages: [ - createUserMessage( - "msg-1", - "Plan and implement a safe migration rollout for auth tokens.", - { - historySequence: 1, - timestamp: STABLE_TIMESTAMP - 240000, - } - ), - createAssistantMessage("msg-2", "Here is the implementation plan.", { - historySequence: 2, - timestamp: STABLE_TIMESTAMP - 230000, - toolCalls: [ - createProposePlanTool( - "call-plan-1", - `# Auth Token Migration Rollout - -## Goals - -- Migrate token validation to the new signing service. -- Maintain compatibility during rollout. -- Keep rollback simple and low risk. - -## Steps - -1. Add dual-read token validation behind a feature flag. -2. Ship telemetry for token verification outcomes. -3. Enable new validator for 10% of traffic. -4. Ramp to 100% after stability checks. -5. Remove legacy validator once metrics stay healthy. - -## Rollback - -- Disable the rollout flag to return to legacy validation immediately. -- Keep telemetry running to confirm recovery.` - ), - ], - }), - createAssistantMessage("msg-3", "Selecting the right executor for this plan.", { - historySequence: 3, - timestamp: STABLE_TIMESTAMP - 220000, - toolCalls: [ - createStatusTool( - "call-status-1", - PLAN_AUTO_ROUTING_STATUS_EMOJI, - PLAN_AUTO_ROUTING_STATUS_MESSAGE - ), - ], - }), - ], - }) - } - /> - ), - parameters: { - docs: { - description: { - story: - "Chromatic regression story for the plan auto-routing gap: after `propose_plan` succeeds, " + - "the sidebar stays in a working state with a 'Deciding execution strategy…' status before executor kickoff.", - }, - }, - }, -}; - /** * Mobile viewport version of ProposePlan. * diff --git a/src/common/config/schemas/appConfigOnDisk.ts b/src/common/config/schemas/appConfigOnDisk.ts index ed65a8208a..b061bf87a1 100644 --- a/src/common/config/schemas/appConfigOnDisk.ts +++ b/src/common/config/schemas/appConfigOnDisk.ts @@ -8,8 +8,8 @@ import { TaskSettingsSchema } from "./taskSettings"; export { RuntimeEnablementOverridesSchema } from "../../schemas/runtimeEnablement"; export type { RuntimeEnablementOverrides } from "../../schemas/runtimeEnablement"; -export { PlanSubagentExecutorRoutingSchema, TaskSettingsSchema } from "./taskSettings"; -export type { PlanSubagentExecutorRouting, TaskSettings } from "./taskSettings"; +export { TaskSettingsSchema } from "./taskSettings"; +export type { TaskSettings } from "./taskSettings"; export const AgentAiDefaultsEntrySchema = z.object({ modelString: z.string().optional(), diff --git a/src/common/config/schemas/taskSettings.ts b/src/common/config/schemas/taskSettings.ts index 6158ea238e..122ab5a6e0 100644 --- a/src/common/config/schemas/taskSettings.ts +++ b/src/common/config/schemas/taskSettings.ts @@ -12,10 +12,6 @@ export const SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS = { bashOutputCompactionTimeoutMs: { min: 1_000, max: 120_000, default: 5_000 }, } as const; -export const PlanSubagentExecutorRoutingSchema = z.enum(["exec", "orchestrator", "auto"]); - -export type PlanSubagentExecutorRouting = z.infer; - export const TaskSettingsSchema = z.object({ maxParallelAgentTasks: z .number() @@ -30,8 +26,6 @@ export const TaskSettingsSchema = z.object({ .max(TASK_SETTINGS_LIMITS.maxTaskNestingDepth.max) .optional(), proposePlanImplementReplacesChatHistory: z.boolean().optional(), - planSubagentExecutorRouting: PlanSubagentExecutorRoutingSchema.optional(), - planSubagentDefaultsToOrchestrator: z.boolean().optional(), bashOutputCompactionMinLines: z .number() .int() diff --git a/src/common/constants/planAutoRoutingStatus.ts b/src/common/constants/planAutoRoutingStatus.ts deleted file mode 100644 index f23d34cbb5..0000000000 --- a/src/common/constants/planAutoRoutingStatus.ts +++ /dev/null @@ -1,4 +0,0 @@ -// Auto plan->executor routing can spend up to the router timeout selecting an executor. -// We surface this as a transient sidebar status so users know the handoff is still progressing. -export const PLAN_AUTO_ROUTING_STATUS_EMOJI = "🤔"; -export const PLAN_AUTO_ROUTING_STATUS_MESSAGE = "Deciding execution strategy…"; diff --git a/src/common/types/tasks.test.ts b/src/common/types/tasks.test.ts index b06a7ec172..015e02c2f4 100644 --- a/src/common/types/tasks.test.ts +++ b/src/common/types/tasks.test.ts @@ -53,36 +53,12 @@ describe("normalizeTaskSettings", () => { expect(normalized).toEqual(DEFAULT_TASK_SETTINGS); }); - test("preserves explicit planSubagentExecutorRouting values", () => { + test("ignores removed plan subagent handoff settings", () => { const normalized = normalizeTaskSettings({ - planSubagentExecutorRouting: "auto", - }); - - expect(normalized.planSubagentExecutorRouting).toBe("auto"); - expect(normalized.planSubagentDefaultsToOrchestrator).toBe(false); - }); - - test("migrates deprecated planSubagentDefaultsToOrchestrator when routing is unset", () => { - expect( - normalizeTaskSettings({ - planSubagentDefaultsToOrchestrator: true, - }).planSubagentExecutorRouting - ).toBe("orchestrator"); - - expect( - normalizeTaskSettings({ - planSubagentDefaultsToOrchestrator: false, - }).planSubagentExecutorRouting - ).toBe("exec"); - }); - - test("prefers planSubagentExecutorRouting when both new and deprecated fields are set", () => { - const normalized = normalizeTaskSettings({ - planSubagentExecutorRouting: "exec", + planSubagentExecutorRouting: "orchestrator", planSubagentDefaultsToOrchestrator: true, }); - expect(normalized.planSubagentExecutorRouting).toBe("exec"); - expect(normalized.planSubagentDefaultsToOrchestrator).toBe(false); + expect(normalized).toEqual(DEFAULT_TASK_SETTINGS); }); }); diff --git a/src/common/types/tasks.ts b/src/common/types/tasks.ts index 4868b25ad1..49d51e654f 100644 --- a/src/common/types/tasks.ts +++ b/src/common/types/tasks.ts @@ -1,7 +1,4 @@ -import type { - PlanSubagentExecutorRouting, - TaskSettings as TaskSettingsOnDisk, -} from "@/common/config/schemas/taskSettings"; +import type { TaskSettings as TaskSettingsOnDisk } from "@/common/config/schemas/taskSettings"; import { SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS, TASK_SETTINGS_LIMITS, @@ -13,7 +10,7 @@ import type { import assert from "@/common/utils/assert"; import { coerceThinkingLevel, type ThinkingLevel } from "./thinking"; -export type { PlanSubagentExecutorRouting, SubagentAiDefaults, SubagentAiDefaultsEntry }; +export type { SubagentAiDefaults, SubagentAiDefaultsEntry }; export { SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS, TASK_SETTINGS_LIMITS, @@ -29,8 +26,6 @@ export const DEFAULT_TASK_SETTINGS: TaskSettings = { maxParallelAgentTasks: TASK_SETTINGS_LIMITS.maxParallelAgentTasks.default, maxTaskNestingDepth: TASK_SETTINGS_LIMITS.maxTaskNestingDepth.default, proposePlanImplementReplacesChatHistory: false, - planSubagentExecutorRouting: "auto", - planSubagentDefaultsToOrchestrator: false, bashOutputCompactionMinLines: SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinLines.default, @@ -84,12 +79,6 @@ function clampInt(value: unknown, fallback: number, min: number, max: number): n return rounded; } -export function isPlanSubagentExecutorRouting( - value: unknown -): value is PlanSubagentExecutorRouting { - return value === "exec" || value === "orchestrator" || value === "auto"; -} - export function normalizeTaskSettings(raw: unknown): TaskSettings { const record = raw && typeof raw === "object" ? (raw as Record) : ({} as const); @@ -111,28 +100,6 @@ export function normalizeTaskSettings(raw: unknown): TaskSettings { ? record.proposePlanImplementReplacesChatHistory : (DEFAULT_TASK_SETTINGS.proposePlanImplementReplacesChatHistory ?? false); - const normalizedPlanSubagentExecutorRouting = isPlanSubagentExecutorRouting( - record.planSubagentExecutorRouting - ) - ? record.planSubagentExecutorRouting - : undefined; - - const migratedPlanSubagentExecutorRouting = - normalizedPlanSubagentExecutorRouting ?? - (typeof record.planSubagentDefaultsToOrchestrator === "boolean" - ? record.planSubagentDefaultsToOrchestrator - ? "orchestrator" - : "exec" - : undefined); - - const planSubagentExecutorRouting = - migratedPlanSubagentExecutorRouting ?? - DEFAULT_TASK_SETTINGS.planSubagentExecutorRouting ?? - "exec"; - - // Keep the deprecated boolean in sync for downgrade compatibility. - const planSubagentDefaultsToOrchestrator = planSubagentExecutorRouting === "orchestrator"; - const bashOutputCompactionMinLines = clampInt( record.bashOutputCompactionMinLines, SYSTEM1_BASH_OUTPUT_COMPACTION_LIMITS.bashOutputCompactionMinLines.default, @@ -168,8 +135,6 @@ export function normalizeTaskSettings(raw: unknown): TaskSettings { maxParallelAgentTasks, maxTaskNestingDepth, proposePlanImplementReplacesChatHistory, - planSubagentExecutorRouting, - planSubagentDefaultsToOrchestrator, bashOutputCompactionMinLines, bashOutputCompactionMinTotalBytes, bashOutputCompactionMaxKeptLines, @@ -191,16 +156,6 @@ export function normalizeTaskSettings(raw: unknown): TaskSettings { "normalizeTaskSettings: proposePlanImplementReplacesChatHistory must be a boolean" ); - assert( - isPlanSubagentExecutorRouting(planSubagentExecutorRouting), - "normalizeTaskSettings: planSubagentExecutorRouting must be exec, orchestrator, or auto" - ); - - assert( - typeof planSubagentDefaultsToOrchestrator === "boolean", - "normalizeTaskSettings: planSubagentDefaultsToOrchestrator must be a boolean" - ); - assert( Number.isInteger(bashOutputCompactionMinLines), "normalizeTaskSettings: bashOutputCompactionMinLines must be an integer" diff --git a/src/node/builtinAgents/orchestrator.md b/src/node/builtinAgents/orchestrator.md index 94f2cef1ca..77f5a20b46 100644 --- a/src/node/builtinAgents/orchestrator.md +++ b/src/node/builtinAgents/orchestrator.md @@ -36,7 +36,7 @@ When a plan is present (default): - Treat the accepted plan as the source of truth. Its file paths, symbols, and structure were validated during planning — do not routinely spawn `explore` to re-confirm them. Exception: if the plan references stale paths or appears to have been authored/edited by the user without planner validation, a single targeted `explore` to sanity-check critical paths is acceptable. - Spawning `explore` to gather _additional_ context beyond what the plan provides is encouraged (e.g., checking whether a helper already exists, locating test files not mentioned in the plan, discovering existing patterns to match). This produces better implementation task briefs. - Do not spawn `explore` just to verify that a planner-generated plan is correct — that is the planner's job, and the plan was accepted by the user. -- Convert the plan into concrete implementation subtasks and start delegation (`exec` for low complexity, `plan` for higher complexity). +- Convert the plan into concrete implementation subtasks and start delegation with `exec` sub-agents. What you are allowed to do directly in this workspace: @@ -52,8 +52,8 @@ Hard rules (delegate-first): - Trust `explore` sub-agent reports as authoritative for repo facts (paths/symbols/callsites). Do not redo the same investigation yourself; only re-check if the report is ambiguous or contradicts other evidence. - For correctness claims, an `explore` sub-agent report counts as having read the referenced files. - **Do not do broad repo investigation here.** If you need context, spawn an `explore` sub-agent with a narrow prompt (keeps this agent focused on coordination). -- **Do not implement features/bugfixes directly here.** Spawn `exec` (simple) or `plan` (complex) sub-agents and have them complete the work end-to-end. -- **Do not use `bash` for file reads/writes, manual code editing, or broad repo exploration.** `bash` in this workspace is for orchestration-only operations: `git`/`gh` repo management, targeted post-apply verification checks, and waiting for PR review/CI outcomes. If direct checks fail due to code issues, delegate fixes to `exec`/`plan` sub-agents instead of implementing changes here. +- **Do not implement features/bugfixes directly here.** Spawn `exec` sub-agents and have them complete the work end-to-end. +- **Do not use `bash` for file reads/writes, manual code editing, or broad repo exploration.** `bash` in this workspace is for orchestration-only operations: `git`/`gh` repo management, targeted post-apply verification checks, and waiting for PR review/CI outcomes. If direct checks fail due to code issues, delegate fixes to `exec` sub-agents instead of implementing changes here. - **Never read or scan session storage.** This includes `~/.mux/sessions/**` and `~/.mux/sessions/subagent-patches/**`. Treat session storage as an internal implementation detail; do not shell out to locate patch artifacts on disk. Only use `task_apply_git_patch` to access patches. Delegation guide: @@ -74,12 +74,10 @@ Delegation guide: Trust Explore reports as authoritative; do not re-verify unless ambiguous/contradictory. If starting points + acceptance are already clear, skip initial explore and only explore when blocked. - Create one or more git commits before `agent_report`. -- Use `plan` for higher-complexity subtasks that touch multiple files/locations, require non-trivial investigation, or have an unclear implementation approach. - - Default to `plan` when a subtask needs coordinated updates across multiple locations, unless the edits are mechanical and already fully specified. - - For higher-complexity implementation work, prefer `plan` over `exec` so the sub-agent can do targeted research and produce a precise plan before implementation begins. +- Use `exec` for implementation subtasks, including higher-complexity work. + - For higher-complexity work, do a small amount of parent-side framing first so the `exec` brief includes the goal, constraints, sequencing, and key files. - Good fit: multi-file refactors, cross-module behavior changes, unfamiliar subsystems, or work where sequencing/dependencies need discovery. - - Plan subtasks automatically hand off to implementation after a successful `propose_plan`; expect the usual task completion output once implementation finishes. - - For `plan` briefs, prioritize goal + constraints + acceptance criteria over file-by-file diff instructions. + - If the implementation approach is still unclear after targeted exploration, switch to a top-level plan workspace before continuing delegation instead of spawning a plan sub-agent. - Use `desktop` for GUI-heavy desktop automation that requires repeated screenshot → act → verify loops (for example, interacting with application windows, clicking through UI flows, or visual verification). The desktop agent enforces a grounding discipline that keeps visual context local. Recommended Orchestrator → Exec task brief template: @@ -105,7 +103,7 @@ Recommended Orchestrator → Exec task brief template: If starting points + acceptance are already clear, skip initial explore and only explore when blocked. - Create one or more git commits before `agent_report`. -Dependency analysis (required before spawning implementation tasks — `exec` or `plan`): +Dependency analysis (required before spawning implementation tasks): - For each candidate subtask, write: - Outputs: files/targets/artifacts introduced/renamed/generated @@ -126,9 +124,9 @@ Example dependency chain (schema download → generation): Patch integration loop (default): 1. Identify a batch of independent subtasks. -2. Spawn one implementation sub-agent task per subtask with `run_in_background: true` (`exec` for low complexity, `plan` for higher complexity). +2. Spawn one `exec` implementation sub-agent task per subtask with `run_in_background: true`. 3. Await the batch via `task_await`. -4. For each successful implementation task (`exec` directly, or `plan` after auto-handoff to implementation), integrate patches one at a time: +4. For each successful implementation task, integrate patches one at a time: - Treat every successful child task with a `taskId` as pending patch integration, whether the completion arrived inline from `task` or later from `task_await`. - Complete each dry-run + real-apply pair before starting the next patch. Applying one patch changes `HEAD`, which can invalidate later dry-run results. - Dry-run apply: `task_apply_git_patch` with `dry_run: true`. @@ -144,11 +142,11 @@ Patch integration loop (default): - Run focused verification directly with `bash` when practical (for example: targeted tests or the repo's standard full-validation command), or delegate verification to `explore`/`exec` when investigation/fixes are likely. - Use `git`/`gh` directly for PR orchestration when a PR already exists (pushes, review-request comments, replies to review remarks, and CI/check-status waiting loops). Create a new PR only when the user explicitly asks. - PASS: summary-only (no long logs). - - FAIL: include the failing command + key error lines; then delegate a fix to `exec`/`plan` and re-verify. + - FAIL: include the failing command + key error lines; then delegate a fix to `exec` and re-verify. Sequential protocol (only for dependency chains): -1. Spawn the prerequisite implementation task (`exec` or `plan`, based on complexity) with `run_in_background: false`. +1. Spawn the prerequisite implementation task with `agentId: "exec"` and `run_in_background: false`. 2. If step 1 returns `queued`/`running` without a completed report, call `task_await` with the returned `taskId` before attempting any patch apply. If step 1 returns `status: completed` inline, that same `taskId` still requires patch application. 3. Dry-run apply its patch (`dry_run: true`); then apply for real (`dry_run: false`). If either step fails, follow the conflict playbook above (including `git am --abort` only when a real apply leaves a git-am session in progress). 4. Only after the patch is applied, spawn the dependent implementation task. diff --git a/src/node/builtinAgents/plan.md b/src/node/builtinAgents/plan.md index 6b56b4c7a7..f520883894 100644 --- a/src/node/builtinAgents/plan.md +++ b/src/node/builtinAgents/plan.md @@ -4,7 +4,7 @@ description: Create a plan before coding ui: color: var(--color-plan-mode) subagent: - runnable: true + runnable: false tools: add: # Allow all tools by default (includes MCP tools which have dynamic names) diff --git a/src/node/services/agentDefinitions/builtInAgentContent.generated.ts b/src/node/services/agentDefinitions/builtInAgentContent.generated.ts index b906aa8448..e1dcc69acc 100644 --- a/src/node/services/agentDefinitions/builtInAgentContent.generated.ts +++ b/src/node/services/agentDefinitions/builtInAgentContent.generated.ts @@ -11,7 +11,7 @@ export const BUILTIN_AGENT_CONTENT = { "explore": "---\nname: Explore\ndescription: Read-only exploration of repository, environment, web, etc. Useful for investigation before making changes.\nbase: exec\nui:\n hidden: true\nsubagent:\n runnable: true\n skip_init_hook: true\n append_prompt: |\n You are an Explore sub-agent running inside a child workspace.\n\n - Explore the repository to answer the prompt using read-only investigation.\n - Return concise, actionable findings (paths, symbols, callsites, and facts).\n - When you have a final answer, call agent_report exactly once.\n - Do not call agent_report until you have completed the assigned task.\ntools:\n # Remove editing and task tools from exec base (read-only agent; skill tools are kept)\n remove:\n - file_edit_.*\n - task\n - task_apply_git_patch\n - task_.*\n---\n\nYou are in Explore mode (read-only).\n\n=== CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS ===\n\n- You MUST NOT manually create, edit, delete, move, copy, or rename tracked files.\n- You MUST NOT stage/commit or otherwise modify git state.\n- You MUST NOT use redirect operators (>, >>) or heredocs to write to files.\n - Pipes are allowed for processing, but MUST NOT be used to write to files (for example via `tee`).\n- You MUST NOT run commands that are explicitly about modifying the filesystem or repo state (rm, mv, cp, mkdir, touch, git add/commit, installs, etc.).\n- You MAY run verification commands (fmt-check/lint/typecheck/test) even if they create build artifacts/caches, but they MUST NOT modify tracked files.\n - After running verification, check `git status --porcelain` and report if it is non-empty.\n- Prefer `file_read` for reading file contents (supports offset/limit paging).\n- Use bash for read-only operations (rg, ls, git diff/show/log, etc.) and verification commands.\n", "mux": "---\nname: Chat With Mux\ndescription: Configure Mux settings, skills, and agent instructions\nui:\n hidden: true\n routable: true\nsubagent:\n runnable: false\ntools:\n add:\n - mux_agents_read\n - mux_agents_write\n - mux_config_read\n - mux_config_write\n - agent_skill_read\n - agent_skill_read_file\n - agent_skill_list\n - agent_skill_write\n - agent_skill_delete\n - skills_catalog_search\n - skills_catalog_read\n - ask_user_question\n - todo_read\n - todo_write\n - status_set\n - notify\n - analytics_query\n---\n\nYou are the **Mux system assistant**.\n\nYour tools are **context-aware** — they automatically target the right scope:\n\n**In a project workspace** (routed via Auto):\n\n- **Project skills**: Create, update, list, and delete project skills (`.mux/skills/`)\n- **Project instructions**: Edit the project's `AGENTS.md`\n\n**In the system workspace** (Chat with Mux):\n\n- **Global skills**: Create, update, list, and delete global skills (`~/.mux/skills/`)\n- **Global instructions**: Edit the mux-wide `~/.mux/AGENTS.md`\n\n**Always global** (regardless of context):\n\n- **App config**: Read and write Mux configuration (`~/.mux/config.json`)\n\n## Safety rules\n\n- You do **not** have access to arbitrary filesystem tools.\n- You do **not** have access to project secrets.\n- Before writing AGENTS.md, you must:\n 1. Read the current file (`mux_agents_read`).\n 2. Propose the exact change (show the new content or a concise diff).\n 3. Ask for explicit confirmation via `ask_user_question`.\n 4. Only then call `mux_agents_write` with `confirm: true`.\n- Before writing a skill, show the proposed `SKILL.md` content and confirm.\n\nIf the user declines, do not write anything.\n", "name_workspace": "---\nname: Name Workspace\ndescription: Generate workspace name and title from user message\nui:\n hidden: true\nsubagent:\n runnable: false\ntools:\n require:\n - propose_name\n---\n\nYou are a workspace naming assistant. Your only job is to call the `propose_name` tool with a suitable name and title.\n\nDo not emit text responses. Call the `propose_name` tool immediately.\n", - "orchestrator": "---\nname: Orchestrator\ndescription: Coordinate sub-agent implementation and apply patches\nbase: exec\nsubagent:\n runnable: false\n append_prompt: |\n You are running as a sub-agent orchestrator in a child workspace.\n\n - Your parent workspace handles all PR management.\n Do NOT create pull requests, push to remote branches, or run any\n `gh pr` / `git push` commands. This applies even if AGENTS.md or\n other instructions say otherwise — those PR instructions target the\n top-level workspace only.\n - Orchestrate your delegated subtasks (spawn, await, apply patches,\n verify locally), then call `agent_report` exactly once with:\n - What changed (paths / key details)\n - What you ran (tests, typecheck, lint)\n - Any follow-ups / risks\n - Do not expand scope beyond the delegated task.\ntools:\n add:\n - ask_user_question\n remove:\n - propose_plan\n # Keep Orchestrator focused on coordination: no direct file edits.\n - file_edit_.*\n---\n\nYou are an internal Orchestrator agent running in Exec mode.\n\n**Mission:** coordinate implementation by delegating investigation + coding to sub-agents, then integrating their patches into this workspace.\n\nWhen a plan is present (default):\n\n- Treat the accepted plan as the source of truth. Its file paths, symbols, and structure were validated during planning — do not routinely spawn `explore` to re-confirm them. Exception: if the plan references stale paths or appears to have been authored/edited by the user without planner validation, a single targeted `explore` to sanity-check critical paths is acceptable.\n- Spawning `explore` to gather _additional_ context beyond what the plan provides is encouraged (e.g., checking whether a helper already exists, locating test files not mentioned in the plan, discovering existing patterns to match). This produces better implementation task briefs.\n- Do not spawn `explore` just to verify that a planner-generated plan is correct — that is the planner's job, and the plan was accepted by the user.\n- Convert the plan into concrete implementation subtasks and start delegation (`exec` for low complexity, `plan` for higher complexity).\n\nWhat you are allowed to do directly in this workspace:\n\n- Spawn/await/manage sub-agent tasks (`task`, `task_await`, `task_list`, `task_terminate`).\n- Apply patches (`task_apply_git_patch`).\n- Use `bash` for orchestration workflows: repo coordination via `git`/`gh`, targeted post-apply verification runs, and waiting on review/CI completion after PR updates (for example: `git push`, `gh pr comment`, `gh pr view`, `gh pr checks --watch`). Only run `gh pr create` when the user explicitly asks you to open a PR.\n- Ask clarifying questions with `ask_user_question` when blocked.\n- Coordinate targeted verification after integrating patches by running focused checks directly (when appropriate) or delegating runs to `explore`/`exec`.\n- Delegate patch-conflict reconciliation to `exec` sub-agents.\n\nHard rules (delegate-first):\n\n- Trust `explore` sub-agent reports as authoritative for repo facts (paths/symbols/callsites). Do not redo the same investigation yourself; only re-check if the report is ambiguous or contradicts other evidence.\n- For correctness claims, an `explore` sub-agent report counts as having read the referenced files.\n- **Do not do broad repo investigation here.** If you need context, spawn an `explore` sub-agent with a narrow prompt (keeps this agent focused on coordination).\n- **Do not implement features/bugfixes directly here.** Spawn `exec` (simple) or `plan` (complex) sub-agents and have them complete the work end-to-end.\n- **Do not use `bash` for file reads/writes, manual code editing, or broad repo exploration.** `bash` in this workspace is for orchestration-only operations: `git`/`gh` repo management, targeted post-apply verification checks, and waiting for PR review/CI outcomes. If direct checks fail due to code issues, delegate fixes to `exec`/`plan` sub-agents instead of implementing changes here.\n- **Never read or scan session storage.** This includes `~/.mux/sessions/**` and `~/.mux/sessions/subagent-patches/**`. Treat session storage as an internal implementation detail; do not shell out to locate patch artifacts on disk. Only use `task_apply_git_patch` to access patches.\n\nDelegation guide:\n\n- Use `explore` for narrowly-scoped read-only questions (confirm an assumption, locate a symbol/callsite, find relevant tests). Avoid \"scan the repo\" prompts.\n- Use `exec` for straightforward, low-complexity work where the implementation path is obvious from the task brief.\n - Good fit: single-file edits, localized wiring to existing helpers, straightforward command execution, or narrowly scoped follow-ups with clear acceptance.\n - Provide a compact task brief (so the sub-agent can act without reading the full plan) with:\n - Task: one sentence\n - Background (why this matters): 1–3 bullets\n - Scope / non-goals: what to change, and what not to change\n - Starting points: relevant files/symbols/paths (from prior exploration)\n - Acceptance: bullets / checks\n - Deliverables: commits + verification commands to run\n - Constraints:\n - Do not expand scope.\n - Prefer `explore` tasks for repo investigation (paths/symbols/tests/patterns) to preserve your context window for implementation.\n Trust Explore reports as authoritative; do not re-verify unless ambiguous/contradictory.\n If starting points + acceptance are already clear, skip initial explore and only explore when blocked.\n - Create one or more git commits before `agent_report`.\n- Use `plan` for higher-complexity subtasks that touch multiple files/locations, require non-trivial investigation, or have an unclear implementation approach.\n - Default to `plan` when a subtask needs coordinated updates across multiple locations, unless the edits are mechanical and already fully specified.\n - For higher-complexity implementation work, prefer `plan` over `exec` so the sub-agent can do targeted research and produce a precise plan before implementation begins.\n - Good fit: multi-file refactors, cross-module behavior changes, unfamiliar subsystems, or work where sequencing/dependencies need discovery.\n - Plan subtasks automatically hand off to implementation after a successful `propose_plan`; expect the usual task completion output once implementation finishes.\n - For `plan` briefs, prioritize goal + constraints + acceptance criteria over file-by-file diff instructions.\n- Use `desktop` for GUI-heavy desktop automation that requires repeated screenshot → act → verify loops (for example, interacting with application windows, clicking through UI flows, or visual verification). The desktop agent enforces a grounding discipline that keeps visual context local.\n\nRecommended Orchestrator → Exec task brief template:\n\n- Task: \n- Background (why this matters):\n - \n- Scope / non-goals:\n - Scope: \n - Non-goals: \n- Starting points: \n- Dependencies / assumptions:\n - Assumes: \n - If unmet: stop and report back; do not expand scope to create prerequisites.\n- Acceptance: \n- Deliverables:\n - Commits: \n - Verification: \n- Constraints:\n - Do not expand scope.\n - Prefer `explore` tasks for repo investigation (paths/symbols/tests/patterns) to preserve your context window for implementation.\n Trust Explore reports as authoritative; do not re-verify unless ambiguous/contradictory.\n If starting points + acceptance are already clear, skip initial explore and only explore when blocked.\n - Create one or more git commits before `agent_report`.\n\nDependency analysis (required before spawning implementation tasks — `exec` or `plan`):\n\n- For each candidate subtask, write:\n - Outputs: files/targets/artifacts introduced/renamed/generated\n - Inputs / prerequisites (including for verification): what must already exist\n- A subtask is \"independent\" only if its patch can be applied + verified on the current parent workspace HEAD, without any other pending patch.\n- Parallelism is the default: maximize the size of each independent batch and run it in parallel.\n Use the sequential protocol only when a subtask has a concrete prerequisite on another subtask's outputs.\n- If task B depends on outputs from task A:\n - Do not spawn B until A has completed and A's patch is applied in the parent workspace.\n - If the dependency chain is tight (download → generate → wire-up), prefer one `exec` task rather than splitting.\n\nExample dependency chain (schema download → generation):\n\n- Task A outputs: a new download target + new schema files.\n- Task B inputs: those schema files; verifies by running generation.\n- Therefore: run Task A (await + apply patch) before spawning Task B.\n\nPatch integration loop (default):\n\n1. Identify a batch of independent subtasks.\n2. Spawn one implementation sub-agent task per subtask with `run_in_background: true` (`exec` for low complexity, `plan` for higher complexity).\n3. Await the batch via `task_await`.\n4. For each successful implementation task (`exec` directly, or `plan` after auto-handoff to implementation), integrate patches one at a time:\n - Treat every successful child task with a `taskId` as pending patch integration, whether the completion arrived inline from `task` or later from `task_await`.\n - Complete each dry-run + real-apply pair before starting the next patch. Applying one patch changes `HEAD`, which can invalidate later dry-run results.\n - Dry-run apply: `task_apply_git_patch` with `dry_run: true`.\n - If dry-run succeeds, immediately apply for real: `task_apply_git_patch` with `dry_run: false`.\n - Do not assume an inline `status: completed` result means the child changes are already present in this workspace.\n - If dry-run fails, treat it as a patch conflict and delegate reconciliation:\n 1. Do not attempt a real apply for that patch in this workspace.\n 2. Spawn a dedicated `exec` task. In the brief, include the original failing `task_id` and instruct the sub-agent to replay that patch via `task_apply_git_patch`, resolve conflicts in its own workspace, run `git am --continue`, commit the resolved result, and report back with a new patch to apply cleanly.\n - If real apply fails unexpectedly:\n 1. Restore a clean working tree before delegating: run `git am --abort` via `bash` only when a git-am session is in progress; if abort reports no operation in progress, continue.\n 2. Then follow the same delegated reconciliation flow above.\n5. Verify + review:\n - Run focused verification directly with `bash` when practical (for example: targeted tests or the repo's standard full-validation command), or delegate verification to `explore`/`exec` when investigation/fixes are likely.\n - Use `git`/`gh` directly for PR orchestration when a PR already exists (pushes, review-request comments, replies to review remarks, and CI/check-status waiting loops). Create a new PR only when the user explicitly asks.\n - PASS: summary-only (no long logs).\n - FAIL: include the failing command + key error lines; then delegate a fix to `exec`/`plan` and re-verify.\n\nSequential protocol (only for dependency chains):\n\n1. Spawn the prerequisite implementation task (`exec` or `plan`, based on complexity) with `run_in_background: false`.\n2. If step 1 returns `queued`/`running` without a completed report, call `task_await` with the returned `taskId` before attempting any patch apply. If step 1 returns `status: completed` inline, that same `taskId` still requires patch application.\n3. Dry-run apply its patch (`dry_run: true`); then apply for real (`dry_run: false`). If either step fails, follow the conflict playbook above (including `git am --abort` only when a real apply leaves a git-am session in progress).\n4. Only after the patch is applied, spawn the dependent implementation task.\n5. Repeat until the dependency chain is complete.\n\nNote: child workspaces are created at spawn time. Spawning dependents too early means they work from the wrong repo snapshot and get forced into scope expansion.\n\nKeep context minimal:\n\n- Do not request, paste, or restate large plans.\n- Prefer short, actionable prompts, but include enough context that the sub-agent does not need your plan file.\n - Child workspaces do not automatically have access to the parent's plan file; summarize just the relevant slice or provide file pointers.\n- Prefer file paths/symbols over long prose.\n", - "plan": "---\nname: Plan\ndescription: Create a plan before coding\nui:\n color: var(--color-plan-mode)\nsubagent:\n runnable: true\ntools:\n add:\n # Allow all tools by default (includes MCP tools which have dynamic names)\n # Use tools.remove in child agents to restrict specific tools\n - .*\n remove:\n # Plan should not apply sub-agent patches.\n - task_apply_git_patch\n # Global config tools are restricted to the mux agent\n - mux_agents_.*\n - agent_skill_write\n - agent_skill_delete\n - mux_config_read\n - mux_config_write\n - skills_catalog_.*\n - analytics_query\n require:\n - propose_plan\n # Note: file_edit_* tools ARE available but restricted to plan file only at runtime\n # Note: task tools ARE enabled - Plan delegates to Explore sub-agents\n---\n\nYou are in Plan Mode.\n\n- Every response MUST produce or update a plan.\n- Match the plan's size and structure to the problem.\n- Keep the plan self-contained and scannable.\n- Assume the user wants the completed plan, not a description of how you would make one.\n\n## Investigate only what you need\n\nBefore proposing a plan, figure out what you need to verify and gather that evidence.\n\n- When delegation is available, use Explore sub-agents for repo investigation. In Plan Mode, only\n spawn `agentId: \"explore\"` tasks.\n- Give each Explore task specific deliverables, and parallelize them when that helps.\n- Trust completed Explore reports for repo facts. Do not re-investigate just to second-guess them.\n If something is missing, ambiguous, or conflicting, spawn another focused Explore task.\n- If task delegation is unavailable, do the narrowest read-only investigation yourself.\n- Reserve `file_read` for the plan file itself, user-provided text already in this conversation,\n and that narrow fallback. When reading the plan file, prefer `file_read` over `bash cat` so long\n plans do not get compacted.\n- Wait for any spawned Explore tasks before calling `propose_plan`.\n\n## Write the plan\n\n- Use whatever structure best fits the problem: a few bullets, phases, workstreams, risks, or\n decision points are all fine.\n- Include the context, constraints, evidence, and concrete path forward somewhere in that\n structure.\n- Name the files, symbols, or subsystems that matter, and order the work so an implementer can\n follow it.\n- Keep uncertainty brief and local to the relevant step. Use `ask_user_question` when you need the\n user to decide something.\n- Include small code snippets only when they materially reduce ambiguity.\n- Put long rationale or background into `
/` blocks.\n\n## Questions and handoff\n\n- If you need clarification from the user, use `ask_user_question` instead of asking in chat or\n adding an \"Open Questions\" section to the plan.\n- Ask up to 4 questions at a time (2–4 options each; \"Other\" remains available for free-form\n input).\n- After you get answers, update the plan and then call `propose_plan` when it is ready for review.\n- After calling `propose_plan`, do not paste the plan into chat or mention the plan file path.\n- If the user wants edits to other files, ask them to switch to Exec mode.\n\nWorkspace-specific runtime instructions (plan file path, edit restrictions, nesting warnings) are\nprovided separately.\n", + "orchestrator": "---\nname: Orchestrator\ndescription: Coordinate sub-agent implementation and apply patches\nbase: exec\nsubagent:\n runnable: false\n append_prompt: |\n You are running as a sub-agent orchestrator in a child workspace.\n\n - Your parent workspace handles all PR management.\n Do NOT create pull requests, push to remote branches, or run any\n `gh pr` / `git push` commands. This applies even if AGENTS.md or\n other instructions say otherwise — those PR instructions target the\n top-level workspace only.\n - Orchestrate your delegated subtasks (spawn, await, apply patches,\n verify locally), then call `agent_report` exactly once with:\n - What changed (paths / key details)\n - What you ran (tests, typecheck, lint)\n - Any follow-ups / risks\n - Do not expand scope beyond the delegated task.\ntools:\n add:\n - ask_user_question\n remove:\n - propose_plan\n # Keep Orchestrator focused on coordination: no direct file edits.\n - file_edit_.*\n---\n\nYou are an internal Orchestrator agent running in Exec mode.\n\n**Mission:** coordinate implementation by delegating investigation + coding to sub-agents, then integrating their patches into this workspace.\n\nWhen a plan is present (default):\n\n- Treat the accepted plan as the source of truth. Its file paths, symbols, and structure were validated during planning — do not routinely spawn `explore` to re-confirm them. Exception: if the plan references stale paths or appears to have been authored/edited by the user without planner validation, a single targeted `explore` to sanity-check critical paths is acceptable.\n- Spawning `explore` to gather _additional_ context beyond what the plan provides is encouraged (e.g., checking whether a helper already exists, locating test files not mentioned in the plan, discovering existing patterns to match). This produces better implementation task briefs.\n- Do not spawn `explore` just to verify that a planner-generated plan is correct — that is the planner's job, and the plan was accepted by the user.\n- Convert the plan into concrete implementation subtasks and start delegation with `exec` sub-agents.\n\nWhat you are allowed to do directly in this workspace:\n\n- Spawn/await/manage sub-agent tasks (`task`, `task_await`, `task_list`, `task_terminate`).\n- Apply patches (`task_apply_git_patch`).\n- Use `bash` for orchestration workflows: repo coordination via `git`/`gh`, targeted post-apply verification runs, and waiting on review/CI completion after PR updates (for example: `git push`, `gh pr comment`, `gh pr view`, `gh pr checks --watch`). Only run `gh pr create` when the user explicitly asks you to open a PR.\n- Ask clarifying questions with `ask_user_question` when blocked.\n- Coordinate targeted verification after integrating patches by running focused checks directly (when appropriate) or delegating runs to `explore`/`exec`.\n- Delegate patch-conflict reconciliation to `exec` sub-agents.\n\nHard rules (delegate-first):\n\n- Trust `explore` sub-agent reports as authoritative for repo facts (paths/symbols/callsites). Do not redo the same investigation yourself; only re-check if the report is ambiguous or contradicts other evidence.\n- For correctness claims, an `explore` sub-agent report counts as having read the referenced files.\n- **Do not do broad repo investigation here.** If you need context, spawn an `explore` sub-agent with a narrow prompt (keeps this agent focused on coordination).\n- **Do not implement features/bugfixes directly here.** Spawn `exec` sub-agents and have them complete the work end-to-end.\n- **Do not use `bash` for file reads/writes, manual code editing, or broad repo exploration.** `bash` in this workspace is for orchestration-only operations: `git`/`gh` repo management, targeted post-apply verification checks, and waiting for PR review/CI outcomes. If direct checks fail due to code issues, delegate fixes to `exec` sub-agents instead of implementing changes here.\n- **Never read or scan session storage.** This includes `~/.mux/sessions/**` and `~/.mux/sessions/subagent-patches/**`. Treat session storage as an internal implementation detail; do not shell out to locate patch artifacts on disk. Only use `task_apply_git_patch` to access patches.\n\nDelegation guide:\n\n- Use `explore` for narrowly-scoped read-only questions (confirm an assumption, locate a symbol/callsite, find relevant tests). Avoid \"scan the repo\" prompts.\n- Use `exec` for straightforward, low-complexity work where the implementation path is obvious from the task brief.\n - Good fit: single-file edits, localized wiring to existing helpers, straightforward command execution, or narrowly scoped follow-ups with clear acceptance.\n - Provide a compact task brief (so the sub-agent can act without reading the full plan) with:\n - Task: one sentence\n - Background (why this matters): 1–3 bullets\n - Scope / non-goals: what to change, and what not to change\n - Starting points: relevant files/symbols/paths (from prior exploration)\n - Acceptance: bullets / checks\n - Deliverables: commits + verification commands to run\n - Constraints:\n - Do not expand scope.\n - Prefer `explore` tasks for repo investigation (paths/symbols/tests/patterns) to preserve your context window for implementation.\n Trust Explore reports as authoritative; do not re-verify unless ambiguous/contradictory.\n If starting points + acceptance are already clear, skip initial explore and only explore when blocked.\n - Create one or more git commits before `agent_report`.\n- Use `exec` for implementation subtasks, including higher-complexity work.\n - For higher-complexity work, do a small amount of parent-side framing first so the `exec` brief includes the goal, constraints, sequencing, and key files.\n - Good fit: multi-file refactors, cross-module behavior changes, unfamiliar subsystems, or work where sequencing/dependencies need discovery.\n - If the implementation approach is still unclear after targeted exploration, switch to a top-level plan workspace before continuing delegation instead of spawning a plan sub-agent.\n- Use `desktop` for GUI-heavy desktop automation that requires repeated screenshot → act → verify loops (for example, interacting with application windows, clicking through UI flows, or visual verification). The desktop agent enforces a grounding discipline that keeps visual context local.\n\nRecommended Orchestrator → Exec task brief template:\n\n- Task: \n- Background (why this matters):\n - \n- Scope / non-goals:\n - Scope: \n - Non-goals: \n- Starting points: \n- Dependencies / assumptions:\n - Assumes: \n - If unmet: stop and report back; do not expand scope to create prerequisites.\n- Acceptance: \n- Deliverables:\n - Commits: \n - Verification: \n- Constraints:\n - Do not expand scope.\n - Prefer `explore` tasks for repo investigation (paths/symbols/tests/patterns) to preserve your context window for implementation.\n Trust Explore reports as authoritative; do not re-verify unless ambiguous/contradictory.\n If starting points + acceptance are already clear, skip initial explore and only explore when blocked.\n - Create one or more git commits before `agent_report`.\n\nDependency analysis (required before spawning implementation tasks):\n\n- For each candidate subtask, write:\n - Outputs: files/targets/artifacts introduced/renamed/generated\n - Inputs / prerequisites (including for verification): what must already exist\n- A subtask is \"independent\" only if its patch can be applied + verified on the current parent workspace HEAD, without any other pending patch.\n- Parallelism is the default: maximize the size of each independent batch and run it in parallel.\n Use the sequential protocol only when a subtask has a concrete prerequisite on another subtask's outputs.\n- If task B depends on outputs from task A:\n - Do not spawn B until A has completed and A's patch is applied in the parent workspace.\n - If the dependency chain is tight (download → generate → wire-up), prefer one `exec` task rather than splitting.\n\nExample dependency chain (schema download → generation):\n\n- Task A outputs: a new download target + new schema files.\n- Task B inputs: those schema files; verifies by running generation.\n- Therefore: run Task A (await + apply patch) before spawning Task B.\n\nPatch integration loop (default):\n\n1. Identify a batch of independent subtasks.\n2. Spawn one `exec` implementation sub-agent task per subtask with `run_in_background: true`.\n3. Await the batch via `task_await`.\n4. For each successful implementation task, integrate patches one at a time:\n - Treat every successful child task with a `taskId` as pending patch integration, whether the completion arrived inline from `task` or later from `task_await`.\n - Complete each dry-run + real-apply pair before starting the next patch. Applying one patch changes `HEAD`, which can invalidate later dry-run results.\n - Dry-run apply: `task_apply_git_patch` with `dry_run: true`.\n - If dry-run succeeds, immediately apply for real: `task_apply_git_patch` with `dry_run: false`.\n - Do not assume an inline `status: completed` result means the child changes are already present in this workspace.\n - If dry-run fails, treat it as a patch conflict and delegate reconciliation:\n 1. Do not attempt a real apply for that patch in this workspace.\n 2. Spawn a dedicated `exec` task. In the brief, include the original failing `task_id` and instruct the sub-agent to replay that patch via `task_apply_git_patch`, resolve conflicts in its own workspace, run `git am --continue`, commit the resolved result, and report back with a new patch to apply cleanly.\n - If real apply fails unexpectedly:\n 1. Restore a clean working tree before delegating: run `git am --abort` via `bash` only when a git-am session is in progress; if abort reports no operation in progress, continue.\n 2. Then follow the same delegated reconciliation flow above.\n5. Verify + review:\n - Run focused verification directly with `bash` when practical (for example: targeted tests or the repo's standard full-validation command), or delegate verification to `explore`/`exec` when investigation/fixes are likely.\n - Use `git`/`gh` directly for PR orchestration when a PR already exists (pushes, review-request comments, replies to review remarks, and CI/check-status waiting loops). Create a new PR only when the user explicitly asks.\n - PASS: summary-only (no long logs).\n - FAIL: include the failing command + key error lines; then delegate a fix to `exec` and re-verify.\n\nSequential protocol (only for dependency chains):\n\n1. Spawn the prerequisite implementation task with `agentId: \"exec\"` and `run_in_background: false`.\n2. If step 1 returns `queued`/`running` without a completed report, call `task_await` with the returned `taskId` before attempting any patch apply. If step 1 returns `status: completed` inline, that same `taskId` still requires patch application.\n3. Dry-run apply its patch (`dry_run: true`); then apply for real (`dry_run: false`). If either step fails, follow the conflict playbook above (including `git am --abort` only when a real apply leaves a git-am session in progress).\n4. Only after the patch is applied, spawn the dependent implementation task.\n5. Repeat until the dependency chain is complete.\n\nNote: child workspaces are created at spawn time. Spawning dependents too early means they work from the wrong repo snapshot and get forced into scope expansion.\n\nKeep context minimal:\n\n- Do not request, paste, or restate large plans.\n- Prefer short, actionable prompts, but include enough context that the sub-agent does not need your plan file.\n - Child workspaces do not automatically have access to the parent's plan file; summarize just the relevant slice or provide file pointers.\n- Prefer file paths/symbols over long prose.\n", + "plan": "---\nname: Plan\ndescription: Create a plan before coding\nui:\n color: var(--color-plan-mode)\nsubagent:\n runnable: false\ntools:\n add:\n # Allow all tools by default (includes MCP tools which have dynamic names)\n # Use tools.remove in child agents to restrict specific tools\n - .*\n remove:\n # Plan should not apply sub-agent patches.\n - task_apply_git_patch\n # Global config tools are restricted to the mux agent\n - mux_agents_.*\n - agent_skill_write\n - agent_skill_delete\n - mux_config_read\n - mux_config_write\n - skills_catalog_.*\n - analytics_query\n require:\n - propose_plan\n # Note: file_edit_* tools ARE available but restricted to plan file only at runtime\n # Note: task tools ARE enabled - Plan delegates to Explore sub-agents\n---\n\nYou are in Plan Mode.\n\n- Every response MUST produce or update a plan.\n- Match the plan's size and structure to the problem.\n- Keep the plan self-contained and scannable.\n- Assume the user wants the completed plan, not a description of how you would make one.\n\n## Investigate only what you need\n\nBefore proposing a plan, figure out what you need to verify and gather that evidence.\n\n- When delegation is available, use Explore sub-agents for repo investigation. In Plan Mode, only\n spawn `agentId: \"explore\"` tasks.\n- Give each Explore task specific deliverables, and parallelize them when that helps.\n- Trust completed Explore reports for repo facts. Do not re-investigate just to second-guess them.\n If something is missing, ambiguous, or conflicting, spawn another focused Explore task.\n- If task delegation is unavailable, do the narrowest read-only investigation yourself.\n- Reserve `file_read` for the plan file itself, user-provided text already in this conversation,\n and that narrow fallback. When reading the plan file, prefer `file_read` over `bash cat` so long\n plans do not get compacted.\n- Wait for any spawned Explore tasks before calling `propose_plan`.\n\n## Write the plan\n\n- Use whatever structure best fits the problem: a few bullets, phases, workstreams, risks, or\n decision points are all fine.\n- Include the context, constraints, evidence, and concrete path forward somewhere in that\n structure.\n- Name the files, symbols, or subsystems that matter, and order the work so an implementer can\n follow it.\n- Keep uncertainty brief and local to the relevant step. Use `ask_user_question` when you need the\n user to decide something.\n- Include small code snippets only when they materially reduce ambiguity.\n- Put long rationale or background into `
/` blocks.\n\n## Questions and handoff\n\n- If you need clarification from the user, use `ask_user_question` instead of asking in chat or\n adding an \"Open Questions\" section to the plan.\n- Ask up to 4 questions at a time (2–4 options each; \"Other\" remains available for free-form\n input).\n- After you get answers, update the plan and then call `propose_plan` when it is ready for review.\n- After calling `propose_plan`, do not paste the plan into chat or mention the plan file path.\n- If the user wants edits to other files, ask them to switch to Exec mode.\n\nWorkspace-specific runtime instructions (plan file path, edit restrictions, nesting warnings) are\nprovided separately.\n", "system1_bash": "---\nname: System1 Bash\ndescription: Fast bash-output filtering (internal)\nui:\n hidden: true\nsubagent:\n runnable: false\ntools:\n add:\n - system1_keep_ranges\n---\n\nYou are a fast bash-output filtering assistant.\n\nYou will be given:\n\n- `maxKeptLines` (budget)\n- `Display name` (optional): a short intent label for the command\n- `Bash script`\n- `Numbered output`\n\nGiven the numbered output, decide which lines to keep so the user sees the most relevant information.\n\nIMPORTANT:\n\n- You MUST call `system1_keep_ranges` exactly once.\n- Do NOT output markdown or prose. Only the tool call (with valid JSON arguments).\n\nRules:\n\n- Line numbers are 1-based indices into the numbered output.\n- Use the `Display name` and `Bash script` as intent hints.\n- If intent is exploration/listing/search (e.g. `ls`, `find`, `rg`, `grep`, `git status`), prioritize keeping\n representative file paths/matches and any summary/counts (not just errors).\n- If intent is build/test/logs, prefer errors, stack traces, failing test summaries, and actionable warnings.\n- If the script already narrows output to a slice (e.g. `head`, `tail`, `sed -n` line ranges), avoid extra\n denoising: prefer keeping most/all lines within the budget.\n- Never filter out git merge conflict markers (`<<<<<<<`, `|||||||`, `=======`, `>>>>>>>`). If the command is searching for these markers (e.g. `rg`/`grep`), do not keep only representative matches; keep all matches within the budget.\n- Prefer omitting tool-generated advisory blocks (especially git lines starting with `hint:`) that only suggest\n next-step commands or point to docs/help. Keep the underlying `error:`/`fatal:`/`CONFLICT` lines, file paths,\n and conflict markers instead.\n- Exception: keep `hint:` blocks when the script is explicitly searching for them (e.g. `rg '^hint:'`) or when\n the hint is the only clue explaining a blocking state.\n- Prefer high signal density: keep ranges tight around important lines plus minimal surrounding context.\n- Merge adjacent/overlapping ranges only when the lines between are also informative. Do NOT add noise just\n to reduce range count; it's OK to return many ranges when denoising (e.g., > 8).\n- Denoise aggressively: omit duplicate/redundant lines and repeated messages with the same meaning\n (e.g., repeated progress, retries, or identical stack traces). If the same error repeats, keep only\n the most informative instance plus minimal surrounding context.\n- If there are many similar warnings/errors, keep only a few representative examples (prefer those\n with file paths/line numbers) plus any summary/count.\n- Always keep at least 1 line if any output exists.\n- Choose ranges that keep at most `maxKeptLines` lines total (the caller may truncate).\n\nExample:\n\n- Numbered output:\n - 0001| building...\n - 0002| ERROR: expected X, got Y\n - 0003| at path/to/file.ts:12:3\n - 0004| done\n- Tool call:\n - system1_keep_ranges({\"keep_ranges\":[{\"start\":2,\"end\":3,\"reason\":\"error\"}]})\n", }; diff --git a/src/node/services/agentSkills/builtInSkillContent.generated.ts b/src/node/services/agentSkills/builtInSkillContent.generated.ts index e1e0f468fb..3c9cc2039f 100644 --- a/src/node/services/agentSkills/builtInSkillContent.generated.ts +++ b/src/node/services/agentSkills/builtInSkillContent.generated.ts @@ -2544,6 +2544,7 @@ export const BUILTIN_SKILL_FILES: Record> = { "Use `agent-browser` for web automation. Run `agent-browser --help` for all commands.", "", "Core workflow:", + "", "1. `agent-browser open ` - Navigate to page", "2. `agent-browser snapshot -i` - Get interactive elements with refs (@e1, @e2)", '3. `agent-browser click @e1` / `fill @e2 "text"` - Interact using refs', @@ -2553,8 +2554,8 @@ export const BUILTIN_SKILL_FILES: Record> = { "", "- If a PR has Codex review comments, address + resolve them, then re-request review by commenting `@codex review` on the PR.", "- Prefer `gh` CLI for GitHub interactions over manual web/curl flows.", - "- In Orchestrator mode, delegate implementation/verification commands to `exec` or `explore` sub-agents and integrate their patches; do not bypass delegation with direct local edits.", - "- In Orchestrator mode, route higher-complexity implementation tasks to `plan` sub-agents so they can research and produce a precise plan before auto-handoff to implementation.", + "- When delegation is required by the active mode, use `exec` or `explore` sub-agents as directed and integrate their patches; do not bypass delegation with direct local edits.", + "- Keep implementation tasks on `exec` sub-agents; use a top-level plan workspace when you need a separate planning phase before delegation.", "", "- User preference: when work is already on an open PR, push branch updates at the end of each completed change set so the PR stays current.", '- **PR creation gate:** Do **not** open/create a pull request unless the user explicitly asks (e.g., "open a PR", "create PR", "submit this"). By default, complete local validation, commit/push branch updates as requested, and let the user review before deciding whether to open a PR.', @@ -2566,11 +2567,11 @@ export const BUILTIN_SKILL_FILES: Record> = { "When a PR exists, you MUST remain in this loop until the PR is fully ready:", "", "1. Push your latest fixes.", - "2. Run local validation (`make static-check` and targeted tests as needed); in Orchestrator mode, delegate command execution to sub-agents.", + "2. Run local validation (`make static-check` and targeted tests as needed); delegate command execution to sub-agents when the active mode requires it.", "3. Request review with `@codex review`.", "4. Run `./scripts/wait_pr_ready.sh ` (which must execute `./scripts/wait_pr_checks.sh --once` while checks are pending).", - "5. If Codex leaves comments, address them (delegate fixes in Orchestrator mode), resolve threads with `./scripts/resolve_pr_comment.sh `, push, and repeat.", - "6. If checks/mergeability fail, fix issues locally (delegate fixes in Orchestrator mode), push, and repeat.", + "5. If Codex leaves comments, address them (delegating fixes when required by the active mode), resolve threads with `./scripts/resolve_pr_comment.sh `, push, and repeat.", + "6. If checks/mergeability fail, fix issues locally (delegating fixes when required by the active mode), push, and repeat.", "", "The only early-stop exception is when the reviewer is clearly misunderstanding the intended change and further churn would be counterproductive. In that case, leave a clarifying PR comment and pause for human direction.", "", @@ -3423,7 +3424,7 @@ export const BUILTIN_SKILL_FILES: Record> = { "- Treat the accepted plan as the source of truth. Its file paths, symbols, and structure were validated during planning — do not routinely spawn `explore` to re-confirm them. Exception: if the plan references stale paths or appears to have been authored/edited by the user without planner validation, a single targeted `explore` to sanity-check critical paths is acceptable.", "- Spawning `explore` to gather _additional_ context beyond what the plan provides is encouraged (e.g., checking whether a helper already exists, locating test files not mentioned in the plan, discovering existing patterns to match). This produces better implementation task briefs.", "- Do not spawn `explore` just to verify that a planner-generated plan is correct — that is the planner's job, and the plan was accepted by the user.", - "- Convert the plan into concrete implementation subtasks and start delegation (`exec` for low complexity, `plan` for higher complexity).", + "- Convert the plan into concrete implementation subtasks and start delegation with `exec` sub-agents.", "", "What you are allowed to do directly in this workspace:", "", @@ -3439,8 +3440,8 @@ export const BUILTIN_SKILL_FILES: Record> = { "- Trust `explore` sub-agent reports as authoritative for repo facts (paths/symbols/callsites). Do not redo the same investigation yourself; only re-check if the report is ambiguous or contradicts other evidence.", "- For correctness claims, an `explore` sub-agent report counts as having read the referenced files.", "- **Do not do broad repo investigation here.** If you need context, spawn an `explore` sub-agent with a narrow prompt (keeps this agent focused on coordination).", - "- **Do not implement features/bugfixes directly here.** Spawn `exec` (simple) or `plan` (complex) sub-agents and have them complete the work end-to-end.", - "- **Do not use `bash` for file reads/writes, manual code editing, or broad repo exploration.** `bash` in this workspace is for orchestration-only operations: `git`/`gh` repo management, targeted post-apply verification checks, and waiting for PR review/CI outcomes. If direct checks fail due to code issues, delegate fixes to `exec`/`plan` sub-agents instead of implementing changes here.", + "- **Do not implement features/bugfixes directly here.** Spawn `exec` sub-agents and have them complete the work end-to-end.", + "- **Do not use `bash` for file reads/writes, manual code editing, or broad repo exploration.** `bash` in this workspace is for orchestration-only operations: `git`/`gh` repo management, targeted post-apply verification checks, and waiting for PR review/CI outcomes. If direct checks fail due to code issues, delegate fixes to `exec` sub-agents instead of implementing changes here.", "- **Never read or scan session storage.** This includes `~/.mux/sessions/**` and `~/.mux/sessions/subagent-patches/**`. Treat session storage as an internal implementation detail; do not shell out to locate patch artifacts on disk. Only use `task_apply_git_patch` to access patches.", "", "Delegation guide:", @@ -3461,12 +3462,10 @@ export const BUILTIN_SKILL_FILES: Record> = { " Trust Explore reports as authoritative; do not re-verify unless ambiguous/contradictory.", " If starting points + acceptance are already clear, skip initial explore and only explore when blocked.", " - Create one or more git commits before `agent_report`.", - "- Use `plan` for higher-complexity subtasks that touch multiple files/locations, require non-trivial investigation, or have an unclear implementation approach.", - " - Default to `plan` when a subtask needs coordinated updates across multiple locations, unless the edits are mechanical and already fully specified.", - " - For higher-complexity implementation work, prefer `plan` over `exec` so the sub-agent can do targeted research and produce a precise plan before implementation begins.", + "- Use `exec` for implementation subtasks, including higher-complexity work.", + " - For higher-complexity work, do a small amount of parent-side framing first so the `exec` brief includes the goal, constraints, sequencing, and key files.", " - Good fit: multi-file refactors, cross-module behavior changes, unfamiliar subsystems, or work where sequencing/dependencies need discovery.", - " - Plan subtasks automatically hand off to implementation after a successful `propose_plan`; expect the usual task completion output once implementation finishes.", - " - For `plan` briefs, prioritize goal + constraints + acceptance criteria over file-by-file diff instructions.", + " - If the implementation approach is still unclear after targeted exploration, switch to a top-level plan workspace before continuing delegation instead of spawning a plan sub-agent.", "- Use `desktop` for GUI-heavy desktop automation that requires repeated screenshot → act → verify loops (for example, interacting with application windows, clicking through UI flows, or visual verification). The desktop agent enforces a grounding discipline that keeps visual context local.", "", "Recommended Orchestrator → Exec task brief template:", @@ -3492,7 +3491,7 @@ export const BUILTIN_SKILL_FILES: Record> = { " If starting points + acceptance are already clear, skip initial explore and only explore when blocked.", " - Create one or more git commits before `agent_report`.", "", - "Dependency analysis (required before spawning implementation tasks — `exec` or `plan`):", + "Dependency analysis (required before spawning implementation tasks):", "", "- For each candidate subtask, write:", " - Outputs: files/targets/artifacts introduced/renamed/generated", @@ -3513,9 +3512,9 @@ export const BUILTIN_SKILL_FILES: Record> = { "Patch integration loop (default):", "", "1. Identify a batch of independent subtasks.", - "2. Spawn one implementation sub-agent task per subtask with `run_in_background: true` (`exec` for low complexity, `plan` for higher complexity).", + "2. Spawn one `exec` implementation sub-agent task per subtask with `run_in_background: true`.", "3. Await the batch via `task_await`.", - "4. For each successful implementation task (`exec` directly, or `plan` after auto-handoff to implementation), integrate patches one at a time:", + "4. For each successful implementation task, integrate patches one at a time:", " - Treat every successful child task with a `taskId` as pending patch integration, whether the completion arrived inline from `task` or later from `task_await`.", " - Complete each dry-run + real-apply pair before starting the next patch. Applying one patch changes `HEAD`, which can invalidate later dry-run results.", " - Dry-run apply: `task_apply_git_patch` with `dry_run: true`.", @@ -3531,11 +3530,11 @@ export const BUILTIN_SKILL_FILES: Record> = { " - Run focused verification directly with `bash` when practical (for example: targeted tests or the repo's standard full-validation command), or delegate verification to `explore`/`exec` when investigation/fixes are likely.", " - Use `git`/`gh` directly for PR orchestration when a PR already exists (pushes, review-request comments, replies to review remarks, and CI/check-status waiting loops). Create a new PR only when the user explicitly asks.", " - PASS: summary-only (no long logs).", - " - FAIL: include the failing command + key error lines; then delegate a fix to `exec`/`plan` and re-verify.", + " - FAIL: include the failing command + key error lines; then delegate a fix to `exec` and re-verify.", "", "Sequential protocol (only for dependency chains):", "", - "1. Spawn the prerequisite implementation task (`exec` or `plan`, based on complexity) with `run_in_background: false`.", + '1. Spawn the prerequisite implementation task with `agentId: "exec"` and `run_in_background: false`.', "2. If step 1 returns `queued`/`running` without a completed report, call `task_await` with the returned `taskId` before attempting any patch apply. If step 1 returns `status: completed` inline, that same `taskId` still requires patch application.", "3. Dry-run apply its patch (`dry_run: true`); then apply for real (`dry_run: false`). If either step fails, follow the conflict playbook above (including `git am --abort` only when a real apply leaves a git-am session in progress).", "4. Only after the patch is applied, spawn the dependent implementation task.", @@ -3566,7 +3565,7 @@ export const BUILTIN_SKILL_FILES: Record> = { "ui:", " color: var(--color-plan-mode)", "subagent:", - " runnable: true", + " runnable: false", "tools:", " add:", " # Allow all tools by default (includes MCP tools which have dynamic names)", diff --git a/src/node/services/planExecutorRouter.test.ts b/src/node/services/planExecutorRouter.test.ts deleted file mode 100644 index 32939c0251..0000000000 --- a/src/node/services/planExecutorRouter.test.ts +++ /dev/null @@ -1,107 +0,0 @@ -import { describe, expect, it } from "bun:test"; -import type { LanguageModel } from "ai"; - -import { routePlanToExecutor } from "./planExecutorRouter"; - -describe("planExecutorRouter", () => { - it("returns orchestrator when select_executor chooses orchestrator", async () => { - let calls = 0; - - const decision = await routePlanToExecutor({ - model: {} as unknown as LanguageModel, - planContent: "Update backend, frontend, and tests in parallel.", - timeoutMs: 5_000, - generateTextImpl: async (args) => { - calls += 1; - - expect((args as { toolChoice?: unknown }).toolChoice).toEqual({ - type: "tool", - toolName: "select_executor", - }); - - const tools = (args as { tools?: unknown }).tools as Record; - const selectExecutorTool = tools.select_executor as { - execute: (input: unknown, options: unknown) => Promise; - }; - - await selectExecutorTool.execute( - { - target: "orchestrator", - reasoning: "Plan spans independent workstreams.", - }, - {} - ); - - return { finishReason: "stop" }; - }, - }); - - expect(calls).toBe(1); - expect(decision).toEqual({ - target: "orchestrator", - reasoning: "Plan spans independent workstreams.", - }); - }); - - it("retries once with a reminder when no tool call is produced", async () => { - let calls = 0; - - const decision = await routePlanToExecutor({ - model: {} as unknown as LanguageModel, - planContent: "Single-file refactor.", - timeoutMs: 5_000, - generateTextImpl: async (args) => { - calls += 1; - - const messages = (args as { messages?: unknown }).messages as - | Array<{ content?: unknown }> - | undefined; - expect(Array.isArray(messages)).toBe(true); - - if (calls === 1) { - expect(messages?.length).toBe(1); - return { finishReason: "stop" }; - } - - expect(messages?.length).toBe(2); - expect(messages?.[1]?.content).toBe( - "Reminder: You MUST call select_executor exactly once. Do not output any text." - ); - - const tools = (args as { tools?: unknown }).tools as Record; - const selectExecutorTool = tools.select_executor as { - execute: (input: unknown, options: unknown) => Promise; - }; - - await selectExecutorTool.execute( - { - target: "exec", - reasoning: "Plan is focused and sequential.", - }, - {} - ); - - return { finishReason: "stop" }; - }, - }); - - expect(calls).toBe(2); - expect(decision).toEqual({ - target: "exec", - reasoning: "Plan is focused and sequential.", - }); - }); - - it("defaults to exec when the model never calls select_executor", async () => { - const decision = await routePlanToExecutor({ - model: {} as unknown as LanguageModel, - planContent: "Any plan", - timeoutMs: 5_000, - generateTextImpl: () => { - return Promise.resolve({ finishReason: "stop" }); - }, - }); - - expect(decision).toEqual({ target: "exec" }); - }); -}); diff --git a/src/node/services/planExecutorRouter.ts b/src/node/services/planExecutorRouter.ts deleted file mode 100644 index 2b1da25f1b..0000000000 --- a/src/node/services/planExecutorRouter.ts +++ /dev/null @@ -1,147 +0,0 @@ -import assert from "@/common/utils/assert"; - -import { generateText, tool, type LanguageModel, type Tool } from "ai"; -import { z } from "zod"; - -import { getErrorMessage } from "@/common/utils/errors"; -import { log } from "@/node/services/log"; -import { linkAbortSignal } from "@/node/utils/abort"; - -export type PlanExecutorRoutingTarget = "exec" | "orchestrator"; - -export interface PlanExecutorRoutingDecision { - target: PlanExecutorRoutingTarget; - reasoning?: string; -} - -export type GenerateTextLike = ( - args: Parameters[0] -) => Promise<{ finishReason?: string }>; - -interface RoutePlanToExecutorParams { - model: LanguageModel; - planContent: string; - timeoutMs?: number; - abortSignal?: AbortSignal; - generateTextImpl?: GenerateTextLike; -} - -const PLAN_EXECUTOR_ROUTING_TIMEOUT_MS = 15_000; - -const PLAN_EXECUTOR_ROUTING_PROMPT = `You are a routing agent. - -Given a software implementation plan, decide which executor should implement it: -- "exec": a single execution agent should implement the plan. -- "orchestrator": an orchestrator should coordinate multiple sub-agents. - -Choose "exec" when: -- The plan is focused and mostly sequential. -- The work is likely confined to one subsystem or a small set of related files. -- Parallelism would add coordination overhead without clear benefit. - -Choose "orchestrator" when: -- The plan spans multiple subsystems with separable workstreams. -- The plan can be parallelized into independent tasks. -- The implementation likely needs coordinated backend/frontend/test updates in parallel. - -You MUST call select_executor exactly once. -Do not output plain text.`; - -const SELECT_EXECUTOR_REMINDER = - "Reminder: You MUST call select_executor exactly once. Do not output any text."; - -const selectExecutorInputSchema = z.object({ - target: z.enum(["exec", "orchestrator"]), - reasoning: z.string().min(1), -}); - -export async function routePlanToExecutor( - params: RoutePlanToExecutorParams -): Promise { - assert(params, "routePlanToExecutor: params is required"); - assert(params.model, "routePlanToExecutor: model is required"); - assert( - typeof params.planContent === "string" && params.planContent.trim().length > 0, - "routePlanToExecutor: planContent must be a non-empty string" - ); - - const timeoutMs = params.timeoutMs ?? PLAN_EXECUTOR_ROUTING_TIMEOUT_MS; - assert( - Number.isInteger(timeoutMs) && timeoutMs > 0, - "routePlanToExecutor: timeoutMs must be a positive integer" - ); - - const routeAbortController = new AbortController(); - const unlinkAbortSignal = linkAbortSignal(params.abortSignal, routeAbortController); - - let timedOut = false; - const timeout = setTimeout(() => { - timedOut = true; - routeAbortController.abort(); - }, timeoutMs); - timeout.unref?.(); - - let selectedDecision: PlanExecutorRoutingDecision | undefined; - - const tools: Record = { - select_executor: tool({ - description: "Select which executor should implement this plan.", - inputSchema: selectExecutorInputSchema, - execute: (input) => { - const reasoning = input.reasoning.trim(); - selectedDecision = { - target: input.target, - reasoning: reasoning.length > 0 ? reasoning : undefined, - }; - - // Signal-tool semantics: the decision is consumed by the caller. - return { - ok: true, - target: input.target, - }; - }, - }), - }; - - const attemptMessages: Array[0]["messages"]>> = [ - [{ role: "user", content: params.planContent }], - [ - { role: "user", content: params.planContent }, - { role: "user", content: SELECT_EXECUTOR_REMINDER }, - ], - ]; - - const generate = params.generateTextImpl ?? generateText; - - try { - for (const messages of attemptMessages) { - selectedDecision = undefined; - - await generate({ - model: params.model, - system: PLAN_EXECUTOR_ROUTING_PROMPT, - messages, - tools, - toolChoice: { type: "tool", toolName: "select_executor" }, - maxRetries: 0, - abortSignal: routeAbortController.signal, - }); - - if (selectedDecision) { - return selectedDecision; - } - } - - log.warn("Plan executor routing returned no tool decision; defaulting to exec"); - return { target: "exec" }; - } catch (error: unknown) { - log.warn("Plan executor routing failed; defaulting to exec", { - timedOut, - error: getErrorMessage(error), - }); - return { target: "exec" }; - } finally { - clearTimeout(timeout); - unlinkAbortSignal(); - } -} diff --git a/src/node/services/taskService.test.ts b/src/node/services/taskService.test.ts index 0fae026e34..ad5a7e05f9 100644 --- a/src/node/services/taskService.test.ts +++ b/src/node/services/taskService.test.ts @@ -23,14 +23,7 @@ import { createRuntime } from "@/node/runtime/runtimeFactory"; import * as runtimeFactory from "@/node/runtime/runtimeFactory"; import * as forkOrchestrator from "@/node/services/utils/forkOrchestrator"; import { Ok, Err, type Result } from "@/common/types/result"; -import { defaultModel } from "@/common/utils/ai/models"; -import type { PlanSubagentExecutorRouting } from "@/common/types/tasks"; -import type { ThinkingLevel } from "@/common/types/thinking"; import type { ErrorEvent, StreamEndEvent } from "@/common/types/stream"; -import { - PLAN_AUTO_ROUTING_STATUS_EMOJI, - PLAN_AUTO_ROUTING_STATUS_MESSAGE, -} from "@/common/constants/planAutoRoutingStatus"; import { createMuxMessage, type MuxMessage } from "@/common/types/message"; import { isDynamicToolPart, type DynamicToolPart } from "@/common/types/toolParts"; import type { WorkspaceMetadata } from "@/common/types/workspace"; @@ -3494,7 +3487,7 @@ describe("TaskService", () => { ); }); - test("initialize uses propose_plan reminders for plan-inheriting awaiting_report tasks", async () => { + test("initialize interrupts legacy plan-inheriting awaiting_report tasks", async () => { const config = await createTestConfig(rootDir); const projectPath = path.join(rootDir, "repo"); @@ -3555,14 +3548,13 @@ describe("TaskService", () => { await taskService.initialize(); - expect(sendMessage).toHaveBeenCalledWith( - childId, - expect.stringContaining("awaiting its final propose_plan"), - expect.objectContaining({ - toolPolicy: [{ regex_match: "^propose_plan$", action: "require" }], - }), - expect.objectContaining({ synthetic: true }) - ); + expect(sendMessage).not.toHaveBeenCalled(); + + const postCfg = config.loadConfigOrDefault(); + const updatedTask = Array.from(postCfg.projects.values()) + .flatMap((project) => project.workspaces) + .find((workspace) => workspace.id === childId); + expect(updatedTask?.taskStatus).toBe("interrupted"); }); describe("backgroundForegroundWaitsForWorkspace", () => { @@ -7570,20 +7562,7 @@ describe("TaskService", () => { expect(remove).toHaveBeenCalledWith(childTwoId, true); }); - async function setupPlanModeStreamEndHarness(options?: { - planSubagentExecutorRouting?: PlanSubagentExecutorRouting; - planSubagentDefaultsToOrchestrator?: boolean; - childAgentId?: string; - disableOrchestrator?: boolean; - maxTaskNestingDepth?: number; - parentAiSettingsByAgent?: Record; - agentAiDefaults?: Record< - string, - { modelString: string; thinkingLevel: ThinkingLevel; enabled?: boolean } - >; - sendMessageOverride?: ReturnType; - aiServiceOverrides?: Parameters[1]; - }) { + async function setupPlanModeStreamEndHarness(options?: { childAgentId?: string }) { const config = await createTestConfig(rootDir); const projectPath = path.join(rootDir, "repo"); @@ -7611,19 +7590,6 @@ describe("TaskService", () => { ); } - const agentAiDefaults = { - ...(options?.agentAiDefaults ?? {}), - ...(options?.disableOrchestrator - ? { - orchestrator: { - modelString: "openai:gpt-4o-mini", - thinkingLevel: "off" as ThinkingLevel, - enabled: false, - }, - } - : {}), - }; - await config.saveConfig({ projects: new Map([ [ @@ -7636,7 +7602,6 @@ describe("TaskService", () => { id: parentId, name: "parent", runtimeConfig, - aiSettingsByAgent: options?.parentAiSettingsByAgent, }, { path: childWorkspacePath, @@ -7656,255 +7621,248 @@ describe("TaskService", () => { ]), taskSettings: { maxParallelAgentTasks: 3, - maxTaskNestingDepth: options?.maxTaskNestingDepth ?? 3, - planSubagentExecutorRouting: - options?.planSubagentExecutorRouting ?? - (options?.planSubagentDefaultsToOrchestrator ? "orchestrator" : "exec"), - ...(typeof options?.planSubagentDefaultsToOrchestrator === "boolean" - ? { - planSubagentDefaultsToOrchestrator: options.planSubagentDefaultsToOrchestrator, - } - : {}), + maxTaskNestingDepth: 3, }, - agentAiDefaults: Object.keys(agentAiDefaults).length > 0 ? agentAiDefaults : undefined, }); - const getInfo = mock(() => ({ - id: childId, - name: "agent_plan_child", - projectName: "repo", - projectPath, - runtimeConfig, - namedWorkspacePath: childWorkspacePath, - })); - const replaceHistory = mock((): Promise> => Promise.resolve(Ok(undefined))); - const { workspaceService, sendMessage, updateAgentStatus } = createWorkspaceServiceMocks({ - getInfo, - replaceHistory, - sendMessage: options?.sendMessageOverride, - }); - - const { aiService, createModel } = createAIServiceMocks(config, options?.aiServiceOverrides); - const { taskService } = createTaskServiceHarness(config, { workspaceService, aiService }); + const { workspaceService, sendMessage } = createWorkspaceServiceMocks(); + const { taskService } = createTaskServiceHarness(config, { workspaceService }); const internal = taskService as unknown as { handleStreamEnd: (event: StreamEndEvent) => Promise; - resolvePlanAutoHandoffTargetAgentId: (args: { - workspaceId: string; - entry: { - projectPath: string; - workspace: { - id?: string; - name?: string; - path?: string; - runtimeConfig?: unknown; - taskModelString?: string; - }; - }; - routing: PlanSubagentExecutorRouting; - planContent: string | null; - }) => Promise<"exec" | "orchestrator">; }; return { config, - projectPath, childId, sendMessage, - replaceHistory, - createModel, - updateAgentStatus, internal, }; } - function makeSuccessfulProposePlanStreamEndEvent(workspaceId: string): StreamEndEvent { - return { + test("legacy plan task stream-end with final assistant text interrupts instead of retrying propose_plan", async () => { + const { config, childId, sendMessage, internal } = await setupPlanModeStreamEndHarness(); + + await internal.handleStreamEnd({ type: "stream-end", - workspaceId, + workspaceId: childId, messageId: "assistant-plan-output", - metadata: { model: "openai:gpt-4o-mini" }, - parts: [ - { - type: "dynamic-tool", - toolCallId: "propose-plan-call-1", - toolName: "propose_plan", - state: "output-available", - output: { success: true, planPath: "/tmp/test-plan.md" }, - input: { plan: "test plan" }, - }, - ], - }; - } - - test("stream-end with propose_plan success triggers handoff instead of awaiting_report reminder", async () => { - const { config, childId, sendMessage, replaceHistory, internal } = - await setupPlanModeStreamEndHarness(); - - await internal.handleStreamEnd(makeSuccessfulProposePlanStreamEndEvent(childId)); - - expect(replaceHistory).toHaveBeenCalledWith( - childId, - expect.anything(), - expect.objectContaining({ mode: "append-compaction-boundary" }) - ); - - expect(sendMessage).toHaveBeenCalledTimes(1); - expect(sendMessage).toHaveBeenCalledWith( - childId, - expect.stringContaining("Implement the plan"), - expect.objectContaining({ - agentId: "exec", - model: "openai:gpt-4o-mini", - thinkingLevel: "off", - }), - expect.objectContaining({ synthetic: true }) - ); + metadata: { model: "openai:gpt-4o-mini", finishReason: "stop" }, + parts: [{ type: "text", text: "Here is the final plan in prose, but no propose_plan call." }], + }); - const kickoffMessage = (sendMessage as unknown as { mock: { calls: Array<[string, string]> } }) - .mock.calls[0]?.[1]; - expect(kickoffMessage).not.toContain("agent_report"); + expect(sendMessage).not.toHaveBeenCalled(); const postCfg = config.loadConfigOrDefault(); const updatedTask = Array.from(postCfg.projects.values()) .flatMap((project) => project.workspaces) .find((workspace) => workspace.id === childId); - - expect(updatedTask?.agentId).toBe("exec"); - expect(updatedTask?.taskStatus).toBe("running"); + expect(updatedTask?.taskStatus).toBe("interrupted"); }); - test("stream-end with propose_plan success uses global exec defaults for handoff", async () => { - const { config, childId, sendMessage, internal } = await setupPlanModeStreamEndHarness({ - parentAiSettingsByAgent: { - exec: { - model: "anthropic:claude-sonnet-4-5", - thinkingLevel: "low", - }, - }, - agentAiDefaults: { - exec: { - modelString: "openai:gpt-5.3-codex", - thinkingLevel: "xhigh", - }, - }, - }); + test("legacy plan task stream-end without propose_plan interrupts instead of retrying propose_plan", async () => { + const { config, childId, sendMessage, internal } = await setupPlanModeStreamEndHarness(); - await internal.handleStreamEnd(makeSuccessfulProposePlanStreamEndEvent(childId)); + await internal.handleStreamEnd({ + type: "stream-end", + workspaceId: childId, + messageId: "assistant-plan-output", + metadata: { model: "openai:gpt-4o-mini" }, + parts: [], + }); - expect(sendMessage).toHaveBeenCalledTimes(1); - expect(sendMessage).toHaveBeenCalledWith( - childId, - expect.stringContaining("Implement the plan"), - expect.objectContaining({ - agentId: "exec", - model: "openai:gpt-5.3-codex", - thinkingLevel: "xhigh", - }), - expect.objectContaining({ synthetic: true }) - ); + expect(sendMessage).not.toHaveBeenCalled(); const postCfg = config.loadConfigOrDefault(); const updatedTask = Array.from(postCfg.projects.values()) .flatMap((project) => project.workspaces) .find((workspace) => workspace.id === childId); - - expect(updatedTask?.agentId).toBe("exec"); - expect(updatedTask?.taskModelString).toBe("openai:gpt-5.3-codex"); - expect(updatedTask?.taskThinkingLevel).toBe("xhigh"); + expect(updatedTask?.taskStatus).toBe("interrupted"); }); - test("stream-end handoff falls back to default model when inherited task model is whitespace", async () => { - const { config, childId, sendMessage, internal } = await setupPlanModeStreamEndHarness(); + test("create rejects custom plan-like agents even when they are marked runnable", async () => { + const config = await createTestConfig(rootDir); + stubStableIds(config, ["aaaaaaaaaa"], "bbbbbbbbbb"); - const preCfg = config.loadConfigOrDefault(); - const childEntry = Array.from(preCfg.projects.values()) - .flatMap((project) => project.workspaces) - .find((workspace) => workspace.id === childId); - expect(childEntry).toBeTruthy(); - if (!childEntry) return; + const projectPath = await createTestProject(rootDir); - childEntry.taskModelString = " "; - await config.saveConfig(preCfg); + const runtimeConfig = { type: "worktree" as const, srcBaseDir: config.srcDir }; + const runtime = createRuntime(runtimeConfig, { projectPath }); + const initLogger = createNullInitLogger(); - await internal.handleStreamEnd(makeSuccessfulProposePlanStreamEndEvent(childId)); + const parentName = "parent"; + const parentCreate = await runtime.createWorkspace({ + projectPath, + branchName: parentName, + trunkBranch: "main", + directoryName: parentName, + initLogger, + }); + expect(parentCreate.success).toBe(true); + if (!parentCreate.success) return; - expect(sendMessage).toHaveBeenCalledTimes(1); - expect(sendMessage).toHaveBeenCalledWith( - childId, - expect.stringContaining("Implement the plan"), - expect.objectContaining({ - agentId: "exec", - model: defaultModel, - }), - expect.objectContaining({ synthetic: true }) + const parentId = "parent-111"; + const parentPath = runtime.getWorkspacePath(projectPath, parentName); + const customAgentDir = path.join(parentPath, ".mux", "agents"); + await fsPromises.mkdir(customAgentDir, { recursive: true }); + await fsPromises.writeFile( + path.join(customAgentDir, "custom_plan_runner.md"), + [ + "---", + "name: Custom Plan Agent", + "base: plan", + "subagent:", + " runnable: true", + "---", + "Custom plan-like subagent used by taskService tests.", + "", + ].join("\n") ); - const postCfg = config.loadConfigOrDefault(); - const updatedTask = Array.from(postCfg.projects.values()) - .flatMap((project) => project.workspaces) - .find((workspace) => workspace.id === childId); + await config.saveConfig({ + projects: new Map([ + [ + projectPath, + { + trusted: true, + workspaces: [ + { + path: parentPath, + id: parentId, + name: parentName, + runtimeConfig, + }, + ], + }, + ], + ]), + taskSettings: { maxParallelAgentTasks: 3, maxTaskNestingDepth: 3 }, + }); - expect(updatedTask?.taskModelString).toBe(defaultModel); - }); + const { taskService } = createTaskServiceHarness(config); + const created = await taskService.create({ + parentWorkspaceId: parentId, + kind: "agent", + agentId: "custom_plan_runner", + agentType: "custom_plan_runner", + prompt: "Produce a plan.", + title: "Plan-like task", + }); - test("stream-end with propose_plan success triggers handoff for custom plan-like agents", async () => { - const { config, childId, sendMessage, replaceHistory, internal } = - await setupPlanModeStreamEndHarness({ - childAgentId: "custom_plan_runner", - }); + expect(created.success).toBe(false); + if (created.success) return; + expect(created.error).toContain("plan-mode agents are not supported as sub-agents"); + }); - await internal.handleStreamEnd(makeSuccessfulProposePlanStreamEndEvent(childId)); + test("legacy plan task stream-end with successful propose_plan interrupts instead of awaiting another plan", async () => { + const { config, childId, sendMessage, internal } = await setupPlanModeStreamEndHarness(); - expect(replaceHistory).toHaveBeenCalledWith( - childId, - expect.anything(), - expect.objectContaining({ mode: "append-compaction-boundary" }) - ); + await internal.handleStreamEnd({ + type: "stream-end", + workspaceId: childId, + messageId: "assistant-plan-output", + metadata: { model: "openai:gpt-4o-mini" }, + parts: [ + { + type: "dynamic-tool", + toolCallId: "propose-plan-call-1", + toolName: "propose_plan", + state: "output-available", + output: { success: true, planPath: "/tmp/test-plan.md" }, + input: { plan: "test plan" }, + }, + ], + }); - expect(sendMessage).toHaveBeenCalledTimes(1); - expect(sendMessage).toHaveBeenCalledWith( - childId, - expect.stringContaining("Implement the plan"), - expect.objectContaining({ agentId: "exec" }), - expect.objectContaining({ synthetic: true }) - ); + expect(sendMessage).not.toHaveBeenCalled(); const postCfg = config.loadConfigOrDefault(); const updatedTask = Array.from(postCfg.projects.values()) .flatMap((project) => project.workspaces) .find((workspace) => workspace.id === childId); - - expect(updatedTask?.agentId).toBe("exec"); - expect(updatedTask?.taskStatus).toBe("running"); + expect(updatedTask?.taskStatus).toBe("interrupted"); }); - test("plan task stream-end with final assistant text still requires propose_plan", async () => { - const { config, childId, sendMessage, internal } = await setupPlanModeStreamEndHarness(); + test("queued legacy plan task is interrupted instead of dequeued", async () => { + const config = await createTestConfig(rootDir); - await internal.handleStreamEnd({ - type: "stream-end", - workspaceId: childId, - messageId: "assistant-plan-output", - metadata: { model: "openai:gpt-4o-mini", finishReason: "stop" }, - parts: [{ type: "text", text: "Here is the final plan in prose, but no propose_plan call." }], + const projectPath = path.join(rootDir, "repo"); + const parentId = "parent-111"; + const childId = "child-custom-plan-queued-222"; + const customAgentId = "custom_plan_runner"; + const runtimeConfig = { type: "worktree" as const, srcBaseDir: config.srcDir }; + const parentWorkspacePath = path.join(projectPath, "parent"); + + const customAgentDir = path.join(parentWorkspacePath, ".mux", "agents"); + await fsPromises.mkdir(customAgentDir, { recursive: true }); + await fsPromises.writeFile( + path.join(customAgentDir, `${customAgentId}.md`), + [ + "---", + "name: Custom Plan Runner", + "base: plan", + "subagent:", + " runnable: true", + "---", + "Custom plan-like agent for queued-task tests.", + "", + ].join("\n") + ); + + await config.saveConfig({ + projects: new Map([ + [ + projectPath, + { + trusted: true, + workspaces: [ + { + path: parentWorkspacePath, + id: parentId, + name: "parent", + runtimeConfig, + }, + { + path: path.join(projectPath, "child-custom-plan-queued"), + id: childId, + name: "agent_custom_plan_child", + parentWorkspaceId: parentId, + agentId: customAgentId, + agentType: customAgentId, + taskStatus: "queued", + taskPrompt: "Produce a plan.", + runtimeConfig, + }, + ], + }, + ], + ]), + taskSettings: { maxParallelAgentTasks: 1, maxTaskNestingDepth: 3 }, }); - expect(sendMessage).toHaveBeenCalledTimes(1); - const reminderMessage = (sendMessage as unknown as { mock: { calls: Array<[string, string]> } }) - .mock.calls[0]?.[1]; - expect(reminderMessage).toContain("propose_plan"); - expect(reminderMessage).not.toContain("agent_report"); + const { workspaceService, sendMessage } = createWorkspaceServiceMocks(); + const { taskService } = createTaskServiceHarness(config, { workspaceService }); + const internal = taskService as unknown as { + maybeStartQueuedTasks: () => Promise; + }; + + await internal.maybeStartQueuedTasks(); + + expect(sendMessage).not.toHaveBeenCalled(); const postCfg = config.loadConfigOrDefault(); const updatedTask = Array.from(postCfg.projects.values()) .flatMap((project) => project.workspaces) .find((workspace) => workspace.id === childId); - expect(updatedTask?.taskStatus).toBe("awaiting_report"); + expect(updatedTask?.taskStatus).toBe("interrupted"); }); - test("plan task stream-end without propose_plan sends propose_plan reminder (not agent_report)", async () => { - const { config, childId, sendMessage, internal } = await setupPlanModeStreamEndHarness(); + test("legacy plan task stream-end rechecks the queue after interruption", async () => { + const { childId, internal } = await setupPlanModeStreamEndHarness(); + const internalWithQueue = internal as typeof internal & { + maybeStartQueuedTasks: () => Promise; + }; + const maybeStartQueuedTasks = spyOn(internalWithQueue, "maybeStartQueuedTasks"); await internal.handleStreamEnd({ type: "stream-end", @@ -7914,18 +7872,7 @@ describe("TaskService", () => { parts: [], }); - expect(sendMessage).toHaveBeenCalledTimes(1); - - const reminderMessage = (sendMessage as unknown as { mock: { calls: Array<[string, string]> } }) - .mock.calls[0]?.[1]; - expect(reminderMessage).toContain("propose_plan"); - expect(reminderMessage).not.toContain("agent_report"); - - const postCfg = config.loadConfigOrDefault(); - const updatedTask = Array.from(postCfg.projects.values()) - .flatMap((project) => project.workspaces) - .find((workspace) => workspace.id === childId); - expect(updatedTask?.taskStatus).toBe("awaiting_report"); + expect(maybeStartQueuedTasks).toHaveBeenCalledTimes(1); }); test("awaiting_report tasks keep retrying agent_report after recovery errors instead of fabricating fallback reports", async () => { @@ -8090,210 +8037,6 @@ describe("TaskService", () => { expect(childWorkspace?.taskStatus).toBe("interrupted"); }); - test("stream-end with propose_plan success in auto routing falls back to exec when plan content is unavailable", async () => { - const { config, childId, sendMessage, createModel, updateAgentStatus, internal } = - await setupPlanModeStreamEndHarness({ - planSubagentExecutorRouting: "auto", - }); - - await internal.handleStreamEnd(makeSuccessfulProposePlanStreamEndEvent(childId)); - - expect(createModel).not.toHaveBeenCalled(); - expect(sendMessage).toHaveBeenCalledTimes(1); - expect(sendMessage).toHaveBeenCalledWith( - childId, - expect.stringContaining("Implement the plan"), - expect.objectContaining({ agentId: "exec" }), - expect.objectContaining({ synthetic: true }) - ); - expect(updateAgentStatus).toHaveBeenNthCalledWith( - 1, - childId, - expect.objectContaining({ - emoji: PLAN_AUTO_ROUTING_STATUS_EMOJI, - message: PLAN_AUTO_ROUTING_STATUS_MESSAGE, - url: "", - }) - ); - expect(updateAgentStatus).toHaveBeenNthCalledWith(2, childId, null); - - const postCfg = config.loadConfigOrDefault(); - const updatedTask = Array.from(postCfg.projects.values()) - .flatMap((project) => project.workspaces) - .find((workspace) => workspace.id === childId); - - expect(updatedTask?.agentId).toBe("exec"); - expect(updatedTask?.taskStatus).toBe("running"); - }); - - test("auto plan handoff routing defaults to exec when orchestrator would have no task tools", async () => { - const { config, projectPath, childId, createModel, internal } = - await setupPlanModeStreamEndHarness({ - planSubagentExecutorRouting: "auto", - maxTaskNestingDepth: 1, - }); - - const cfg = config.loadConfigOrDefault(); - const childWorkspace = Array.from(cfg.projects.values()) - .flatMap((project) => project.workspaces) - .find((workspace) => workspace.id === childId); - expect(childWorkspace).toBeTruthy(); - if (!childWorkspace) return; - - const targetAgentId = await internal.resolvePlanAutoHandoffTargetAgentId({ - workspaceId: childId, - entry: { - projectPath, - workspace: childWorkspace, - }, - routing: "auto", - planContent: "1. Delegate implementation work to a child task.", - }); - - expect(targetAgentId).toBe("exec"); - expect(createModel).not.toHaveBeenCalled(); - }); - - test("auto plan handoff routing still evaluates the model when task tools are available", async () => { - const { config, projectPath, childId, createModel, internal } = - await setupPlanModeStreamEndHarness({ - planSubagentExecutorRouting: "auto", - }); - - const cfg = config.loadConfigOrDefault(); - const childWorkspace = Array.from(cfg.projects.values()) - .flatMap((project) => project.workspaces) - .find((workspace) => workspace.id === childId); - expect(childWorkspace).toBeTruthy(); - if (!childWorkspace) return; - - const targetAgentId = await internal.resolvePlanAutoHandoffTargetAgentId({ - workspaceId: childId, - entry: { - projectPath, - workspace: childWorkspace, - }, - routing: "auto", - planContent: "1. Implement the changes directly in this workspace.", - }); - - expect(targetAgentId).toBe("exec"); - expect(createModel).toHaveBeenCalledTimes(1); - }); - - test("stream-end with propose_plan success hands off to orchestrator when routing is orchestrator", async () => { - const { config, childId, sendMessage, replaceHistory, internal } = - await setupPlanModeStreamEndHarness({ - planSubagentExecutorRouting: "orchestrator", - }); - - await internal.handleStreamEnd(makeSuccessfulProposePlanStreamEndEvent(childId)); - - expect(replaceHistory).toHaveBeenCalledWith( - childId, - expect.anything(), - expect.objectContaining({ mode: "append-compaction-boundary" }) - ); - - expect(sendMessage).toHaveBeenCalledTimes(1); - expect(sendMessage).toHaveBeenCalledWith( - childId, - expect.stringContaining("orchestrating"), - expect.objectContaining({ agentId: "orchestrator" }), - expect.objectContaining({ synthetic: true }) - ); - - const postCfg = config.loadConfigOrDefault(); - const updatedTask = Array.from(postCfg.projects.values()) - .flatMap((project) => project.workspaces) - .find((workspace) => workspace.id === childId); - - expect(updatedTask?.agentId).toBe("orchestrator"); - expect(updatedTask?.taskStatus).toBe("running"); - }); - - test("orchestrator handoff inherits parent model when orchestrator defaults are unset", async () => { - const { config, childId, sendMessage, internal } = await setupPlanModeStreamEndHarness({ - planSubagentExecutorRouting: "orchestrator", - agentAiDefaults: { - exec: { - modelString: "openai:gpt-5.3-codex", - thinkingLevel: "xhigh", - }, - }, - }); - - await internal.handleStreamEnd(makeSuccessfulProposePlanStreamEndEvent(childId)); - - expect(sendMessage).toHaveBeenCalledTimes(1); - expect(sendMessage).toHaveBeenCalledWith( - childId, - expect.stringContaining("orchestrating"), - expect.objectContaining({ - agentId: "orchestrator", - model: "openai:gpt-4o-mini", - thinkingLevel: "off", - }), - expect.objectContaining({ synthetic: true }) - ); - - const postCfg = config.loadConfigOrDefault(); - const updatedTask = Array.from(postCfg.projects.values()) - .flatMap((project) => project.workspaces) - .find((workspace) => workspace.id === childId); - - expect(updatedTask?.agentId).toBe("orchestrator"); - expect(updatedTask?.taskModelString).toBe("openai:gpt-4o-mini"); - expect(updatedTask?.taskThinkingLevel).toBe("off"); - }); - - test("stream-end with propose_plan success falls back to exec when orchestrator is disabled", async () => { - const { config, childId, sendMessage, internal } = await setupPlanModeStreamEndHarness({ - planSubagentExecutorRouting: "orchestrator", - disableOrchestrator: true, - }); - - await internal.handleStreamEnd(makeSuccessfulProposePlanStreamEndEvent(childId)); - - expect(sendMessage).toHaveBeenCalledTimes(1); - expect(sendMessage).toHaveBeenCalledWith( - childId, - expect.stringContaining("Implement the plan"), - expect.objectContaining({ agentId: "exec" }), - expect.objectContaining({ synthetic: true }) - ); - - const postCfg = config.loadConfigOrDefault(); - const updatedTask = Array.from(postCfg.projects.values()) - .flatMap((project) => project.workspaces) - .find((workspace) => workspace.id === childId); - - expect(updatedTask?.agentId).toBe("exec"); - expect(updatedTask?.taskStatus).toBe("running"); - }); - - test("handoff kickoff sendMessage failure keeps task status as running for restart recovery", async () => { - const sendMessageFailure = mock( - (): Promise> => Promise.resolve(Err("kickoff failed")) - ); - const { config, childId, internal } = await setupPlanModeStreamEndHarness({ - sendMessageOverride: sendMessageFailure, - }); - - await internal.handleStreamEnd(makeSuccessfulProposePlanStreamEndEvent(childId)); - - expect(sendMessageFailure).toHaveBeenCalledTimes(1); - - const postCfg = config.loadConfigOrDefault(); - const updatedTask = Array.from(postCfg.projects.values()) - .flatMap((project) => project.workspaces) - .find((workspace) => workspace.id === childId); - - // Task stays "running" so initialize() can retry the kickoff on next startup, - // rather than "awaiting_report" which could finalize it prematurely. - expect(updatedTask?.taskStatus).toBe("running"); - }); - test("falls back to default trunk when parent branch does not exist locally", async () => { const config = await createTestConfig(rootDir); stubStableIds(config, ["aaaaaaaaaa"], "bbbbbbbbbb"); diff --git a/src/node/services/taskService.ts b/src/node/services/taskService.ts index b2cc8bf140..6874a42318 100644 --- a/src/node/services/taskService.ts +++ b/src/node/services/taskService.ts @@ -23,8 +23,6 @@ import { createRuntimeForWorkspace } from "@/node/runtime/runtimeHelpers"; import { MultiProjectRuntime } from "@/node/runtime/multiProjectRuntime"; import { runBackgroundInit } from "@/node/runtime/runtimeFactory"; import type { InitLogger, Runtime } from "@/node/runtime/Runtime"; -import { readPlanFile } from "@/node/utils/runtime/helpers"; -import { routePlanToExecutor } from "@/node/services/planExecutorRouter"; import { coerceNonEmptyString, tryReadGitHeadCommitSha, @@ -40,17 +38,10 @@ import { } from "@/common/utils/tools/taskGroups"; import { stripTrailingSlashes } from "@/node/utils/pathUtils"; import { Ok, Err, type Result } from "@/common/types/result"; -import { - DEFAULT_TASK_SETTINGS, - type PlanSubagentExecutorRouting, - type TaskSettings, -} from "@/common/types/tasks"; +import { DEFAULT_TASK_SETTINGS, type TaskSettings } from "@/common/types/tasks"; import { createMuxMessage, type MuxMessage } from "@/common/types/message"; -import { - createCompactionSummaryMessageId, - createTaskReportMessageId, -} from "@/node/services/utils/messageIds"; +import { createTaskReportMessageId } from "@/node/services/utils/messageIds"; import { defaultModel, normalizeToCanonical } from "@/common/utils/ai/models"; import { EXPERIMENT_IDS } from "@/common/constants/experiments"; import { DEFAULT_RUNTIME_CONFIG } from "@/common/constants/workspace"; @@ -67,13 +58,9 @@ import { TaskToolResultSchema, TaskToolArgsSchema, } from "@/common/utils/tools/toolDefinitions"; -import { isPlanLikeInResolvedChain, isToolEnabledInResolvedChain } from "@/common/utils/agentTools"; +import { isPlanLikeInResolvedChain } from "@/common/utils/agentTools"; import { formatSendMessageError } from "@/node/services/utils/sendMessageError"; import { enforceThinkingPolicy } from "@/common/utils/thinking/policy"; -import { - PLAN_AUTO_ROUTING_STATUS_EMOJI, - PLAN_AUTO_ROUTING_STATUS_MESSAGE, -} from "@/common/constants/planAutoRoutingStatus"; import { taskQueueDebug } from "@/node/services/taskQueueDebug"; import { readSubagentGitPatchArtifact } from "@/node/services/subagentGitPatchArtifacts"; import { @@ -318,7 +305,6 @@ export class TaskService { // Bounded by max entries; disk persistence is the source of truth for restart-safety. private readonly completedReportsByTaskId = new Map(); private readonly gitPatchArtifactService: GitPatchArtifactService; - private readonly handoffInProgress = new Set(); /** * Hard-interrupted parent workspaces must not auto-resume until the next user message. * This closes races where descendants could report between parent interrupt and cascade cleanup. @@ -500,267 +486,6 @@ export class TaskService { } } - private getTaskWorkspaceAgentResolutionContext(args: { - projectPath: string; - workspace: Pick; - }): { - workspaceName: string; - runtime: Runtime; - workspacePath: string; - } | null { - assert( - args.projectPath.length > 0, - "getTaskWorkspaceAgentResolutionContext: projectPath must be non-empty" - ); - - const workspaceName = coerceNonEmptyString(args.workspace.name) ?? args.workspace.id; - if (!workspaceName) { - return null; - } - - const runtimeConfig = args.workspace.runtimeConfig ?? DEFAULT_RUNTIME_CONFIG; - const runtime = createRuntimeForWorkspace({ - runtimeConfig, - projectPath: args.projectPath, - name: workspaceName, - }); - const workspacePath = - coerceNonEmptyString(args.workspace.path) ?? - runtime.getWorkspacePath(args.projectPath, workspaceName); - if (!workspacePath) { - return null; - } - - return { - workspaceName, - runtime, - workspacePath, - }; - } - - private async isAgentEnabledForTaskWorkspace(args: { - workspaceId: string; - projectPath: string; - workspace: Pick; - agentId: "exec" | "orchestrator"; - }): Promise { - assert( - args.workspaceId.length > 0, - "isAgentEnabledForTaskWorkspace: workspaceId must be non-empty" - ); - assert( - args.projectPath.length > 0, - "isAgentEnabledForTaskWorkspace: projectPath must be non-empty" - ); - - const resolutionContext = this.getTaskWorkspaceAgentResolutionContext({ - projectPath: args.projectPath, - workspace: args.workspace, - }); - if (!resolutionContext) { - return false; - } - - try { - const resolvedFrontmatter = await resolveAgentFrontmatter( - resolutionContext.runtime, - resolutionContext.workspacePath, - args.agentId - ); - const cfg = this.config.loadConfigOrDefault(); - const effectivelyDisabled = isAgentEffectivelyDisabled({ - cfg, - agentId: args.agentId, - resolvedFrontmatter, - }); - return !effectivelyDisabled; - } catch (error: unknown) { - log.warn("Failed to resolve task handoff target agent availability", { - workspaceId: args.workspaceId, - agentId: args.agentId, - error: getErrorMessage(error), - }); - return false; - } - } - - private async canAgentSpawnTasksInWorkspace(args: { - workspaceId: string; - projectPath: string; - workspace: Pick; - agentId: "orchestrator"; - }): Promise { - assert( - args.workspaceId.length > 0, - "canAgentSpawnTasksInWorkspace: workspaceId must be non-empty" - ); - assert( - args.projectPath.length > 0, - "canAgentSpawnTasksInWorkspace: projectPath must be non-empty" - ); - - const resolutionContext = this.getTaskWorkspaceAgentResolutionContext({ - projectPath: args.projectPath, - workspace: args.workspace, - }); - if (!resolutionContext) { - return false; - } - - try { - const cfg = this.config.loadConfigOrDefault(); - const resolvedFrontmatter = await resolveAgentFrontmatter( - resolutionContext.runtime, - resolutionContext.workspacePath, - args.agentId - ); - const effectivelyDisabled = isAgentEffectivelyDisabled({ - cfg, - agentId: args.agentId, - resolvedFrontmatter, - }); - if (effectivelyDisabled) { - return false; - } - - const agentDefinition = await readAgentDefinition( - resolutionContext.runtime, - resolutionContext.workspacePath, - args.agentId - ); - const chain = await resolveAgentInheritanceChain({ - runtime: resolutionContext.runtime, - workspacePath: resolutionContext.workspacePath, - agentId: agentDefinition.id, - agentDefinition, - workspaceId: args.workspaceId, - }); - const taskSettings = cfg.taskSettings ?? DEFAULT_TASK_SETTINGS; - const taskDepth = this.getTaskDepth(cfg, args.workspaceId); - const disableTaskToolsForDepth = taskDepth >= taskSettings.maxTaskNestingDepth; - - return !disableTaskToolsForDepth && isToolEnabledInResolvedChain("task", chain); - } catch (error: unknown) { - log.warn("Failed to resolve task handoff target task-spawning capability", { - workspaceId: args.workspaceId, - agentId: args.agentId, - error: getErrorMessage(error), - }); - return false; - } - } - - private async resolvePlanAutoHandoffTargetAgentId(args: { - workspaceId: string; - entry: { - projectPath: string; - workspace: Pick< - WorkspaceConfigEntry, - "id" | "name" | "path" | "runtimeConfig" | "taskModelString" - >; - }; - routing: PlanSubagentExecutorRouting; - planContent: string | null; - }): Promise<"exec" | "orchestrator"> { - assert( - args.workspaceId.length > 0, - "resolvePlanAutoHandoffTargetAgentId: workspaceId must be non-empty" - ); - assert( - args.routing === "exec" || args.routing === "orchestrator" || args.routing === "auto", - "resolvePlanAutoHandoffTargetAgentId: routing must be exec, orchestrator, or auto" - ); - - const resolveOrchestratorAvailability = async (): Promise<"exec" | "orchestrator"> => { - const orchestratorEnabled = await this.isAgentEnabledForTaskWorkspace({ - workspaceId: args.workspaceId, - projectPath: args.entry.projectPath, - workspace: args.entry.workspace, - agentId: "orchestrator", - }); - if (orchestratorEnabled) { - return "orchestrator"; - } - - // If orchestrator is disabled/unavailable, fall back to exec before mutating - // workspace agent state so the handoff stream can still proceed. - log.warn("Plan-task auto-handoff falling back to exec because orchestrator is unavailable", { - workspaceId: args.workspaceId, - }); - return "exec"; - }; - - if (args.routing === "exec") { - return "exec"; - } - - if (args.routing === "orchestrator") { - return resolveOrchestratorAvailability(); - } - - if (!args.planContent || args.planContent.trim().length === 0) { - log.warn("Plan-task auto-handoff auto-routing has no plan content; defaulting to exec", { - workspaceId: args.workspaceId, - }); - return "exec"; - } - - const orchestratorCanSpawnTasks = await this.canAgentSpawnTasksInWorkspace({ - workspaceId: args.workspaceId, - projectPath: args.entry.projectPath, - workspace: args.entry.workspace, - agentId: "orchestrator", - }); - if (!orchestratorCanSpawnTasks) { - log.warn( - "Plan-task auto-handoff auto-routing defaulting to exec because orchestrator cannot orchestrate in this workspace", - { - workspaceId: args.workspaceId, - } - ); - return "exec"; - } - - const modelString = normalizeToCanonical( - coerceNonEmptyString(args.entry.workspace.taskModelString) ?? defaultModel - ); - assert( - modelString.trim().length > 0, - "resolvePlanAutoHandoffTargetAgentId: modelString must be non-empty" - ); - - const modelResult = await this.aiService.createModel(modelString, undefined, { - agentInitiated: true, - workspaceId: args.workspaceId, - }); - if (!modelResult.success) { - log.warn("Plan-task auto-handoff auto-routing failed to create model; defaulting to exec", { - workspaceId: args.workspaceId, - model: modelString, - error: modelResult.error, - }); - return "exec"; - } - - const decision = await routePlanToExecutor({ - model: modelResult.data, - planContent: args.planContent, - }); - - log.info("Plan-task auto-handoff routing decision", { - workspaceId: args.workspaceId, - target: decision.target, - reasoning: decision.reasoning, - model: modelString, - }); - - if (decision.target === "orchestrator") { - return resolveOrchestratorAvailability(); - } - - return "exec"; - } - private async emitWorkspaceMetadata(workspaceId: string): Promise { assert(workspaceId.length > 0, "emitWorkspaceMetadata: workspaceId must be non-empty"); @@ -848,6 +573,16 @@ export class TaskService { for (const task of awaitingReportTasks) { if (!task.id) continue; + const isPlanLike = await this.isPlanLikeTaskWorkspace({ + projectPath: task.projectPath, + workspace: task, + }); + if (isPlanLike) { + await this.interruptLegacyPlanLikeTask(task, "startup-awaiting_report"); + failedAwaitingReportCount += 1; + continue; + } + // Avoid resuming a task while it still has active descendants (it shouldn't report yet). const hasActiveDescendants = this.hasActiveDescendantAgentTasks(config, task.id); if (hasActiveDescendants) { @@ -885,6 +620,12 @@ export class TaskService { workspace: task, }); + if (isPlanLike) { + await this.interruptLegacyPlanLikeTask(task, "startup-running"); + failedRunningCount += 1; + continue; + } + const model = task.taskModelString ?? defaultModel; const agentId = task.agentId ?? TASK_RECOVERY_FALLBACK_AGENT_ID; log.info("[startup] Resuming running task", { @@ -1239,6 +980,22 @@ export class TaskService { const hint = await getRunnableHint(); return Err(`Task.create: agentId is disabled (${agentId}). ${hint}`); } + + const agentDefinition = await readAgentDefinition(runtime, parentWorkspacePath, agentId); + const chain = await resolveAgentInheritanceChain({ + runtime, + workspacePath: parentWorkspacePath, + agentId: agentDefinition.id, + agentDefinition, + workspaceId: parentWorkspaceId, + }); + if (isPlanLikeInResolvedChain(chain)) { + return Err( + `Task.create: plan-mode agents are not supported as sub-agents (${agentId}). ` + + "Use a top-level plan workspace instead." + ); + } + skipInitHook = frontmatter.subagent?.skip_init_hook === true; } catch { const hint = await getRunnableHint(); @@ -2564,6 +2321,18 @@ export class TaskService { continue; } + const isParentInPlace = taskEntry.projectPath === parentWorkspaceName; + const parentWorkspacePath = + coerceNonEmptyString(parentEntry.workspace.path) ?? + (isParentInPlace + ? taskEntry.projectPath + : createRuntimeForWorkspace({ + runtimeConfig: + parentEntry.workspace.runtimeConfig ?? task.runtimeConfig ?? DEFAULT_RUNTIME_CONFIG, + projectPath: taskEntry.projectPath, + name: parentWorkspaceName, + }).getWorkspacePath(taskEntry.projectPath, parentWorkspaceName)); + const taskRuntimeConfig = task.runtimeConfig ?? parentEntry.workspace.runtimeConfig; if (!taskRuntimeConfig) { log.error("Queued task missing runtimeConfig; cannot start", { taskId }); @@ -2577,6 +2346,59 @@ export class TaskService { projectPath: taskEntry.projectPath, name: workspaceName, }); + + const agentIdRaw = coerceNonEmptyString(task.agentId ?? task.agentType); + if (agentIdRaw && parentWorkspacePath) { + const parsedAgentId = AgentIdSchema.safeParse(agentIdRaw.trim().toLowerCase()); + const isLegacyPlanLikeQueuedTask = await (async (): Promise => { + if (!parsedAgentId.success) { + return agentIdRaw.trim().toLowerCase() === "plan"; + } + try { + const agentDefinition = await readAgentDefinition( + runtime, + parentWorkspacePath, + parsedAgentId.data + ); + const chain = await resolveAgentInheritanceChain({ + runtime, + workspacePath: parentWorkspacePath, + agentId: agentDefinition.id, + agentDefinition, + workspaceId: taskId, + }); + return isPlanLikeInResolvedChain(chain); + } catch (error: unknown) { + log.debug( + "Queued task: failed to resolve agent mode while checking legacy plan-task support", + { + taskId, + agentId: agentIdRaw, + error: error instanceof Error ? error.message : String(error), + } + ); + return parsedAgentId.data === "plan"; + } + })(); + + if (isLegacyPlanLikeQueuedTask) { + log.warn( + "Skipping queued legacy plan-like subagent task after plan-task support removal", + { + taskId, + taskName: task.name, + projectPath: taskEntry.projectPath, + agentId: agentIdRaw, + } + ); + await this.setTaskStatus(taskId, "interrupted"); + this.rejectWaiters( + taskId, + new Error("Task skipped: legacy plan-mode subagents are no longer supported") + ); + continue; + } + } let runtimeForTaskWorkspace = runtime; let forkedRuntimeConfig = taskRuntimeConfig; @@ -2851,7 +2673,6 @@ export class TaskService { this.opResolver ); let skipInitHook = false; - const agentIdRaw = coerceNonEmptyString(task.agentId ?? task.agentType); if (agentIdRaw) { const parsedAgentId = AgentIdSchema.safeParse(agentIdRaw.trim().toLowerCase()); if (parsedAgentId.success) { @@ -3113,6 +2934,34 @@ export class TaskService { } } + private async interruptLegacyPlanLikeTask( + task: WorkspaceConfigEntry & { projectPath: string }, + reason: "startup-awaiting_report" | "startup-running" | "stream_end" + ): Promise { + const workspaceId = task.id; + if (!workspaceId) { + return; + } + + log.warn("Interrupting legacy plan-like subagent task after plan-task support removal", { + workspaceId, + taskName: task.name, + projectPath: task.projectPath, + reason, + }); + + await this.setTaskStatus(workspaceId, "interrupted"); + await this.settleInterruptedTaskAtStreamEnd( + workspaceId, + { projectPath: task.projectPath, workspace: task }, + null + ); + + // Interrupting a legacy plan-like task frees a parallel slot. Kick queue processing now so + // unrelated queued tasks do not stay blocked until some later event happens to re-run it. + await this.maybeStartQueuedTasks(); + } + private async promptTaskForRequiredCompletionTool( workspaceId: string, options?: { @@ -3320,22 +3169,18 @@ export class TaskService { return; } - const proposePlanResult = this.findProposePlanSuccessInParts(event.parts); - if (isPlanLike && proposePlanResult) { - await this.handleSuccessfulProposePlanAutoHandoff({ - workspaceId, - entry, - proposePlanResult, - planSubagentExecutorRouting: - (cfg.taskSettings ?? DEFAULT_TASK_SETTINGS).planSubagentExecutorRouting ?? "exec", - }); + if (isPlanLike) { + await this.interruptLegacyPlanLikeTask( + { ...entry.workspace, projectPath: entry.projectPath }, + "stream_end" + ); return; } // Only infer an implicit report from a clean natural stop. Length-truncated or other // provider finish reasons still go through explicit completion-tool recovery so partial // assistant text cannot prematurely finalize the task. - if (!isPlanLike && status !== "awaiting_report" && event.metadata.finishReason === "stop") { + if (status !== "awaiting_report" && event.metadata.finishReason === "stop") { const implicitReportArgs = this.findImplicitAgentReportArgsInParts(event.parts); if (implicitReportArgs) { await this.finalizeAgentTaskReport(workspaceId, entry, implicitReportArgs); @@ -3429,213 +3274,6 @@ export class TaskService { } } - private async handleSuccessfulProposePlanAutoHandoff(args: { - workspaceId: string; - entry: { projectPath: string; workspace: WorkspaceConfigEntry }; - proposePlanResult: { planPath: string }; - planSubagentExecutorRouting: PlanSubagentExecutorRouting; - }): Promise { - assert( - args.workspaceId.length > 0, - "handleSuccessfulProposePlanAutoHandoff: workspaceId must be non-empty" - ); - assert( - args.proposePlanResult.planPath.length > 0, - "handleSuccessfulProposePlanAutoHandoff: planPath must be non-empty" - ); - - if (this.handoffInProgress.has(args.workspaceId)) { - log.debug("Skipping duplicate plan-task auto-handoff", { workspaceId: args.workspaceId }); - return; - } - - this.handoffInProgress.add(args.workspaceId); - - try { - let planSummary: { content: string; path: string } | null = null; - - try { - const info = await this.workspaceService.getInfo(args.workspaceId); - if (!info) { - log.error("Plan-task auto-handoff could not read workspace metadata", { - workspaceId: args.workspaceId, - }); - } else { - const runtime = createRuntimeForWorkspace(info); - const planResult = await readPlanFile( - runtime, - info.name, - info.projectName, - args.workspaceId - ); - if (planResult.exists) { - planSummary = { content: planResult.content, path: planResult.path }; - } else { - log.error("Plan-task auto-handoff did not find plan file content", { - workspaceId: args.workspaceId, - planPath: args.proposePlanResult.planPath, - }); - } - } - } catch (error: unknown) { - log.error("Plan-task auto-handoff failed to read plan file", { - workspaceId: args.workspaceId, - planPath: args.proposePlanResult.planPath, - error, - }); - } - - const targetAgentId = await (async () => { - const shouldShowRoutingStatus = args.planSubagentExecutorRouting === "auto"; - if (shouldShowRoutingStatus) { - // Auto routing can pause for up to the LLM timeout; surface progress in the sidebar. - await this.workspaceService.updateAgentStatus(args.workspaceId, { - emoji: PLAN_AUTO_ROUTING_STATUS_EMOJI, - message: PLAN_AUTO_ROUTING_STATUS_MESSAGE, - // ExtensionMetadataService carries forward the previous status URL when url is omitted. - // Use an explicit empty string sentinel to clear stale links for this transient status. - url: "", - }); - } - - try { - return await this.resolvePlanAutoHandoffTargetAgentId({ - workspaceId: args.workspaceId, - entry: { - projectPath: args.entry.projectPath, - workspace: { - id: args.entry.workspace.id, - name: args.entry.workspace.name, - path: args.entry.workspace.path, - runtimeConfig: args.entry.workspace.runtimeConfig, - taskModelString: args.entry.workspace.taskModelString, - }, - }, - routing: args.planSubagentExecutorRouting, - planContent: planSummary?.content ?? null, - }); - } finally { - if (shouldShowRoutingStatus) { - await this.workspaceService.updateAgentStatus(args.workspaceId, null); - } - } - })(); - - const summaryContent = planSummary - ? `# Plan\n\n${planSummary.content}\n\nNote: This chat already contains the full plan; no need to re-open the plan file.\n\n---\n\n*Plan file preserved at:* \`${planSummary.path}\`` - : `A plan was proposed at ${args.proposePlanResult.planPath}. Read the plan file and implement it.`; - - const summaryMessage = createMuxMessage( - createCompactionSummaryMessageId(), - "assistant", - summaryContent, - { - timestamp: Date.now(), - compacted: "user", - agentId: "plan", - } - ); - - const replaceHistoryResult = await this.workspaceService.replaceHistory( - args.workspaceId, - summaryMessage, - { - mode: "append-compaction-boundary", - deletePlanFile: false, - } - ); - if (!replaceHistoryResult.success) { - log.error("Plan-task auto-handoff failed to compact history", { - workspaceId: args.workspaceId, - error: replaceHistoryResult.error, - }); - } - - // Handoff resolution follows the same precedence as Task.create: - // global per-agent defaults, else inherit the plan task's active model. - const latestCfg = this.config.loadConfigOrDefault(); - const globalDefault = latestCfg.agentAiDefaults?.[targetAgentId]; - const parentActiveModelCandidate = - typeof args.entry.workspace.taskModelString === "string" - ? args.entry.workspace.taskModelString.trim() - : ""; - const parentActiveModel = - parentActiveModelCandidate.length > 0 ? parentActiveModelCandidate : defaultModel; - - const configuredModel = globalDefault?.modelString?.trim(); - const preferredModel = - configuredModel && configuredModel.length > 0 ? configuredModel : parentActiveModel; - const resolvedModel = normalizeToCanonical( - preferredModel.length > 0 ? preferredModel : defaultModel - ); - assert( - resolvedModel.trim().length > 0, - "handleSuccessfulProposePlanAutoHandoff: resolved model must be non-empty" - ); - const requestedThinking: ThinkingLevel = - globalDefault?.thinkingLevel ?? args.entry.workspace.taskThinkingLevel ?? "off"; - const resolvedThinking = enforceThinkingPolicy(resolvedModel, requestedThinking); - - await this.editWorkspaceEntry(args.workspaceId, (workspace) => { - workspace.agentId = targetAgentId; - workspace.agentType = targetAgentId; - workspace.taskModelString = resolvedModel; - workspace.taskThinkingLevel = resolvedThinking; - }); - - await this.setTaskStatus(args.workspaceId, "running"); - - const kickoffMsg = - targetAgentId === "orchestrator" - ? "Start orchestrating the implementation of this plan." - : "Implement the plan."; - try { - const sendKickoffResult = await this.workspaceService.sendMessage( - args.workspaceId, - kickoffMsg, - { - model: resolvedModel, - agentId: targetAgentId, - thinkingLevel: resolvedThinking, - experiments: args.entry.workspace.taskExperiments, - }, - { synthetic: true, agentInitiated: true } - ); - if (!sendKickoffResult.success) { - // Keep status as "running" so the restart handler in initialize() can - // re-attempt the kickoff on next startup, rather than moving to - // "awaiting_report" which could finalize the task prematurely. - log.error( - "Plan-task auto-handoff failed to send kickoff message; task stays running for retry on restart", - { - workspaceId: args.workspaceId, - targetAgentId, - error: sendKickoffResult.error, - } - ); - } - } catch (error: unknown) { - // Same as above: leave status as "running" for restart recovery. - log.error( - "Plan-task auto-handoff failed to send kickoff message; task stays running for retry on restart", - { - workspaceId: args.workspaceId, - targetAgentId, - error, - } - ); - } - } catch (error: unknown) { - log.error("Plan-task auto-handoff failed", { - workspaceId: args.workspaceId, - planPath: args.proposePlanResult.planPath, - error, - }); - } finally { - this.handoffInProgress.delete(args.workspaceId); - } - } - private async finalizeTerminationPhaseForReportedTask(workspaceId: string): Promise { assert( workspaceId.length > 0, @@ -4154,28 +3792,6 @@ export class TaskService { } } - private findProposePlanSuccessInParts(parts: readonly unknown[]): { planPath: string } | null { - for (let i = parts.length - 1; i >= 0; i--) { - const part = parts[i]; - if (!isDynamicToolPart(part)) continue; - if (part.toolName !== "propose_plan") continue; - if (part.state !== "output-available") continue; - if (!isSuccessfulToolResult(part.output)) continue; - - const planPath = - typeof part.output === "object" && - part.output !== null && - "planPath" in part.output && - typeof (part.output as { planPath?: unknown }).planPath === "string" - ? (part.output as { planPath: string }).planPath.trim() - : ""; - if (!planPath) continue; - - return { planPath }; - } - return null; - } - private findImplicitAgentReportArgsInParts( parts: readonly unknown[] ): { reportMarkdown: string } | null { diff --git a/tests/ipc/streaming/sendMessage.context.test.ts b/tests/ipc/streaming/sendMessage.context.test.ts index 299d24a639..3dc284f5c0 100644 --- a/tests/ipc/streaming/sendMessage.context.test.ts +++ b/tests/ipc/streaming/sendMessage.context.test.ts @@ -178,87 +178,83 @@ describeIntegration("sendMessage context handling tests", () => { }); describe("tool calls", () => { - test.concurrent( - "should execute bash tool when requested", - async () => { - await withSharedWorkspace("anthropic", async ({ env, workspaceId, collector }) => { - const repoPath = getSharedRepoPath(); - - // Create a test file in the workspace - const testFilePath = path.join(repoPath, "test-tool-file.txt"); - await fs.writeFile(testFilePath, "Hello from test file!"); - - try { - // Ask to read the file using bash - const result = await sendMessageWithModel( - env, - workspaceId, - `Use bash to run: cat ${testFilePath}. Set display_name="read-file" and timeout_secs=30. Do not spawn a sub-agent.`, - modelString("anthropic", KNOWN_MODELS.HAIKU.providerModelId), - { - toolPolicy: [{ regex_match: "bash", action: "require" }], - } - ); - - expect(result.success).toBe(true); - - // Wait for completion (tool calls take longer) - await collector.waitForEvent("stream-end", 45000); - - // Check for tool call events - const events = collector.getEvents(); - const toolCallStarts = events.filter( - (e) => "type" in e && (e as { type: string }).type === "tool-call-start" - ); - - // Should have at least one bash tool call - const bashCall = toolCallStarts.find((e) => { - if (!("toolName" in e) || e.toolName !== "bash") return false; - return true; - }); - expect(bashCall).toBeDefined(); - } finally { - // Cleanup test file - try { - await fs.unlink(testFilePath); - } catch { - // Ignore cleanup errors - } - } - }); - }, - 60000 - ); - - test.concurrent( - "should respect tool policy 'none'", - async () => { - await withSharedWorkspace("anthropic", async ({ env, workspaceId, collector }) => { - // Ask for something that would normally use tools - // Policy to disable all tools: match any tool name and disable + test("should execute bash tool when requested", async () => { + // OpenAI has been more reliable than Anthropic here under full-suite CI load, and these + // assertions are about tool-policy plumbing rather than provider-specific tool behavior. + await withSharedWorkspace("openai", async ({ env, workspaceId, collector }) => { + const repoPath = getSharedRepoPath(); + + // Create a test file in the workspace + const testFilePath = path.join(repoPath, "test-tool-file.txt"); + await fs.writeFile(testFilePath, "Hello from test file!"); + + try { + // Ask to read the file using bash const result = await sendMessageWithModel( env, workspaceId, - "Run the command 'echo test' using bash.", - modelString("anthropic", KNOWN_MODELS.HAIKU.providerModelId), + `Use bash to run: cat ${testFilePath}. Set display_name="read-file" and timeout_secs=30. Do not spawn a sub-agent.`, + modelString("openai", KNOWN_MODELS.GPT.providerModelId), { - toolPolicy: [{ regex_match: ".*", action: "disable" }], + toolPolicy: [{ regex_match: "bash", action: "require" }], } ); expect(result.success).toBe(true); - await collector.waitForEvent("stream-end", 15000); - // Should NOT have tool calls when policy is 'none' + // Wait for completion (tool calls take longer) + await collector.waitForEvent("stream-end", 45000); + + // Check for tool call events const events = collector.getEvents(); const toolCallStarts = events.filter( (e) => "type" in e && (e as { type: string }).type === "tool-call-start" ); - expect(toolCallStarts.length).toBe(0); - }); - }, - 25000 - ); + + // Should have at least one bash tool call + const bashCall = toolCallStarts.find((e) => { + if (!("toolName" in e) || e.toolName !== "bash") return false; + return true; + }); + expect(bashCall).toBeDefined(); + } finally { + // Cleanup test file + try { + await fs.unlink(testFilePath); + } catch { + // Ignore cleanup errors + } + } + }); + }, 60000); + + test("should respect tool policy 'none'", async () => { + // OpenAI has been more reliable than Anthropic here under full-suite CI load, and these + // assertions are about tool-policy plumbing rather than provider-specific tool behavior. + await withSharedWorkspace("openai", async ({ env, workspaceId, collector }) => { + // Ask for something that would normally use tools + // Policy to disable all tools: match any tool name and disable + const result = await sendMessageWithModel( + env, + workspaceId, + "Run the command 'echo test' using bash.", + modelString("openai", KNOWN_MODELS.GPT.providerModelId), + { + toolPolicy: [{ regex_match: ".*", action: "disable" }], + } + ); + + expect(result.success).toBe(true); + await collector.waitForEvent("stream-end", 15000); + + // Should NOT have tool calls when policy is 'none' + const events = collector.getEvents(); + const toolCallStarts = events.filter( + (e) => "type" in e && (e as { type: string }).type === "tool-call-start" + ); + expect(toolCallStarts.length).toBe(0); + }); + }, 25000); }); describe("history truncation", () => {