diff --git a/src/chat/agent-loop.js b/src/chat/agent-loop.js index 1cd032d..032fa05 100644 --- a/src/chat/agent-loop.js +++ b/src/chat/agent-loop.js @@ -272,6 +272,16 @@ class AgentLoop { // that an unrelated session's long-running job doesn't trap this loop. const myBgJobs = () => getActiveBgJobsForSession(sid); run._pendingBgJobEvents = []; + // Issue #167: track bg jobs that were started by run_shell_bg WITHIN this + // run. The BG_WAIT_SKIPPED_MODEL_DONE early-exit path is safe for + // pre-existing long-running jobs (dev servers, watchers from earlier + // turns), but it must NOT fire when the model just spawned a new + // background task in this very turn — otherwise the model can + // "promise to notify you when done" and silently end the run before + // any completion event is delivered. tool-executor passes + // ctx.registerBgJob into bg-shell so each freshly started jobId is + // recorded here; we clear entries on end so memory is bounded. + run._sessionStartedBgJobs = new Set(); // Only accept job-end events for jobs that belong to THIS session. // Require an exact sessionId match so events without a sessionId (orphaned // terminals not registered via addActiveBgJob) are also dropped — this @@ -279,6 +289,7 @@ class AgentLoop { const _bgJobEndHandler = (payload) => { if (!payload || payload.sessionId !== sid) return; run._pendingBgJobEvents.push(payload); + if (payload.jobId) run._sessionStartedBgJobs.delete(payload.jobId); }; onBgJobEnded(_bgJobEndHandler); @@ -622,8 +633,20 @@ class AgentLoop { // bypass this guard if we used a text-match heuristic. if (assistantText.trim()) { const _hasPendingEvents = !!(run._pendingBgJobEvents && run._pendingBgJobEvents.length); - if (!_hasPendingEvents) { - Logger.info('BG_WAIT_SKIPPED_MODEL_DONE', { jobs: [...myBgJobs()] }); + // Issue #167: in addition to the pending-event check, + // refuse to break out of the wait loop when the model + // started a new bg job IN THIS RUN that is still alive. + // Without this guard, the model can defuse the wait by + // emitting a single "I'll notify you when it's done" + // sentence, leaving the just-spawned job orphaned. + const _activeJobsNow = myBgJobs(); + const _hasOwnRunningJob = [...run._sessionStartedBgJobs] + .some(j => _activeJobsNow.has(j)); + if (!_hasPendingEvents && !_hasOwnRunningJob) { + Logger.info('BG_WAIT_SKIPPED_MODEL_DONE', { + jobs: [..._activeJobsNow], + sessionStartedJobs: [...run._sessionStartedBgJobs], + }); break; } } diff --git a/src/chat/archive-export.js b/src/chat/archive-export.js index 5213698..ce23c03 100644 --- a/src/chat/archive-export.js +++ b/src/chat/archive-export.js @@ -4,7 +4,7 @@ // (toggle `archived` flag). Users expected real archiving — a Markdown // snapshot they can grep, commit, or share. This module renders the // session record to Markdown and writes it under -// `/.deepcopilot/archives/yyyyMMdd-HHmmss-.md`. +// `<workspace>/.deep-copilot/archives/yyyyMMdd-HHmmss-<title>.md`. // // Edge cases handled: // - No workspace open → fall back to vscode.window.showSaveDialog. @@ -20,7 +20,11 @@ const path = require('path'); const fs = require('fs/promises'); const { t } = require('../utils/i18n'); -const ARCHIVE_SUBDIR = '.deepcopilot/archives'; +// Post-merge review: align with the rest of the codebase's workspace-artifact +// convention (`.deep-copilot/plans`, `.deep-copilot/memory.md`, +// `.deep-copilot/logs`). Previously this lived under `.deepcopilot/archives`, +// which produced an inconsistent second hidden directory in user workspaces. +const ARCHIVE_SUBDIR = '.deep-copilot/archives'; /** * Strip filesystem-hostile characters and trim length. diff --git a/src/chat/session-store.js b/src/chat/session-store.js index c0851f2..13b91c4 100644 --- a/src/chat/session-store.js +++ b/src/chat/session-store.js @@ -369,7 +369,7 @@ class SessionStore { * actually *archived* anywhere they could see, find, or grep. * * New behaviour: render the session to Markdown and write it under - * <workspace>/.deepcopilot/archives/yyyyMMdd-HHmmss-<title>.md + * <workspace>/.deep-copilot/archives/yyyyMMdd-HHmmss-<title>.md * Then perform the original soft-hide so the session leaves the sidebar. * * The "un-archive" gesture (clicking again on an already-archived item) @@ -423,7 +423,7 @@ class SessionStore { /** * Show the bottom-right toast with "Open" / "Reveal in Explorer" buttons. * Path display is workspace-relative when possible so users see - * ".deepcopilot/archives/20260526-….md" + * ".deep-copilot/archives/20260526-….md" * instead of a long absolute path. Delegates the relativisation to * `findContainingFolder` so multi-root + nested-root cases stay correct. */ diff --git a/src/chat/tool-executor.js b/src/chat/tool-executor.js index fa129c6..1f8abc8 100644 --- a/src/chat/tool-executor.js +++ b/src/chat/tool-executor.js @@ -522,6 +522,16 @@ class ToolExecutor { // the webview tagged with the tool-call id so the card can render // a live tail (GH-Copilot-style terminal card). const ctx = { abortSignal, secrets: this._context.secrets, sessionId: run?.sessionId ?? null }; + // Issue #167: let run_shell_bg register the just-started jobId + // into the run's own "session-started" set, so agent-loop's + // BG_WAIT_SKIPPED_MODEL_DONE guard can refuse to end the turn while + // a freshly spawned background task is still alive. No-op when run + // is null (e.g. one-shot dispatch from tests). + ctx.registerBgJob = (jobId) => { + if (!jobId || !run) return; + if (!(run._sessionStartedBgJobs instanceof Set)) run._sessionStartedBgJobs = new Set(); + run._sessionStartedBgJobs.add(jobId); + }; if (tcId) { ctx.onStreamDelta = (delta) => { if (!delta) return; diff --git a/src/tools/bg-shell.js b/src/tools/bg-shell.js index 6c26ea6..f0e46ab 100644 --- a/src/tools/bg-shell.js +++ b/src/tools/bg-shell.js @@ -117,6 +117,35 @@ async function toolRunShellBg(args, ctx = {}) { // onDidEndTerminalShellExecution never fires and the job would never be // removed, causing agent-loop to spin indefinitely. if (usedSI) { + // Issue #167 follow-up (post-merge review): register the jobId in the + // run-scoped set BEFORE publishing to the cross-session active-job + // registry. If the order were reversed, an extremely short-lived + // command could fire its `bg-job-end` event between addActiveBgJob() + // and registerBgJob(); the end handler would then call + // `_sessionStartedBgJobs.delete(jobId)` on a set that doesn't contain + // it yet, and the subsequent registration would leave a stale + // jobId hanging in the set forever (defeating the cleanup contract). + // Doing the local registration first guarantees: either the jobId is + // in the set when the end event fires (and gets cleaned up), or the + // end event is observed after both registrations completed. + if (typeof ctx.registerBgJob === 'function') { + try { + ctx.registerBgJob(jobId); + } catch (e) { + // Do not swallow silently — if this ever fails, the agent-loop + // loses the "started in this run" signal and the original + // orphan-job bug from #167 can resurface. Log so regressions + // surface in debug-logs even when no exception bubbles up. + // Logger only exposes `.info` today (see src/logger.js); the + // tag itself carries the severity for grep-ability. + try { + Logger.info('BG_JOB_REGISTER_FAILED', { + jobId, + err: (e && e.message) || String(e), + }); + } catch {} + } + } addActiveBgJob(jobId, ctx.sessionId || null); } @@ -175,19 +204,30 @@ async function toolRunShellBg(args, ctx = {}) { terminalName: jobId, status: 'running', shellIntegrationAvailable: usedSI, + // Issue #167: the previous hint promised the agent it would be + // "automatically woken when this job ends" and instructed it to + // "simply end your turn". That contract was incomplete — the agent + // loop used to break out of its wait the moment the model produced + // ANY closing sentence, leaving the just-spawned job orphaned. The + // loop now refuses to end while one of this run's own bg jobs is + // still alive, so the model MUST stay on duty. We restate the + // contract honestly here to discourage the model from emitting + // pseudo-completion messages like "I'll tell you when it's done". hint: [ `Background job "${jobId}" started.`, usedSI ? [ - `Shell integration active — the agent will be suspended and automatically woken when this job ends.`, + `Shell integration active — agent-loop will KEEP YOU ON DUTY until this job ends.`, + `You CANNOT close this turn by saying "I'll let you know later": the loop ignores any such promise while this job is alive and will re-invoke you with the real exit code + tail output as a <system-reminder> when the process exits.`, `CRITICAL: do NOT call ping, sleep, Start-Sleep, or any wait/poll command — it wastes time and blocks failure detection.`, - `CRITICAL: do NOT call read_terminal now. Simply end your turn; the system delivers the job result automatically.`, + `CRITICAL: do NOT call read_terminal now — the system delivers the final result automatically. Either narrate genuine progress, or stay silent until you receive the end-of-job system-reminder.`, ].join('\n') : [ - `Shell integration unavailable — you must poll manually:`, + `Shell integration unavailable — agent-loop CANNOT track this job's exit automatically; you must poll manually:`, ` 1. Wait: run_shell(command: "ping -n 16 127.0.0.1 > nul") ← ~15 s pause on Windows`, ` 2. Check: read_terminal(terminal: "${jobId}")`, ` Output shows "[exit N]" or "[finished]" when done; "[running]" means still active.`, + `Do NOT tell the user "I'll notify you when it's done" in this branch — there is no automatic notification.`, ].join('\n'), `To cancel: ask the user to close the terminal named "${jobId}".`, ].join('\n'),