diff --git a/src/chat/agent-loop.js b/src/chat/agent-loop.js index 6eaafb0..1cd032d 100644 --- a/src/chat/agent-loop.js +++ b/src/chat/agent-loop.js @@ -54,7 +54,18 @@ function injectSyntheticSkillRead(messages, skillName, body, skillPath) { const filePath = skillPath || path.join(DEEPCOPILOT_SKILLS_DIR, safeName, 'SKILL.md'); const callId = `synthetic_skill_read_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 6)}`; - messages.push({ + // DeepSeek thinking-mode quirk: once any assistant message in history + // carries reasoning_content, every subsequent assistant message MUST + // also carry it, or the API returns 400 ("reasoning_content in the + // thinking mode must be passed back to the API"). Only attach a + // reasoning_content here when the history already shows thinking mode is + // live — that way non-DeepSeek providers (which don't round-trip this + // field) aren't poisoned by a synthetic value, and the providers/index.js + // backfill stays inactive until real thoughts have already appeared. + const thinkingLive = Array.isArray(messages) && messages.some( + m => m && m.role === 'assistant' && typeof m.reasoning_content === 'string' && m.reasoning_content.length > 0, + ); + const asst = { role: 'assistant', content: null, tool_calls: [{ @@ -65,7 +76,11 @@ function injectSyntheticSkillRead(messages, skillName, body, skillPath) { arguments: JSON.stringify({ path: filePath }), }, }], - }); + }; + if (thinkingLive) { + asst.reasoning_content = `Loading skill SOP "${safeName}" via read_file to obtain its instructions before proceeding.`; + } + messages.push(asst); messages.push({ role: 'tool', tool_call_id: callId, @@ -924,15 +939,23 @@ class AgentLoop { { role: 'user', content: '\nYou have reached the tool-call iteration limit without producing a user-facing answer. Stop calling tools. Write a concise plain-text reply that: (1) summarises what you tried, (2) states what you found or could not find, (3) suggests a concrete next step the user can take.\n' }, ]; let tail = ''; + let tailThoughts = ''; await streamChat( { provider, apiKey, baseUrl, messages: finalMsgs, model, noTools: true }, { onDelta: t => { tail += t; run.reply.asst += t; this._postToRun(run, { type: 'replyDelta', text: t }); }, - onThinking: t => { run.reply.thoughts += t; this._postToRun(run, { type: 'thinkingDelta', text: t }); }, + onThinking: t => { tailThoughts += t; run.reply.thoughts += t; this._postToRun(run, { type: 'thinkingDelta', text: t }); }, }, signal, ).catch(e => Logger.info('FORCE_FINAL_SUMMARY_ERROR', { message: e.message })); - if (tail) run.messages.push({ role: 'assistant', content: tail }); + if (tail) run.messages.push({ + role: 'assistant', + content: tail, + // Preserve thinking output so the persisted/reloaded history + // does not end with a reasoning-less assistant message, which + // would 400 on the next turn under DeepSeek thinking mode. + ...(tailThoughts ? { reasoning_content: tailThoughts } : {}), + }); } } catch (e) { // Restore the last known-good message state to prevent persisting a diff --git a/src/prompts/system.js b/src/prompts/system.js index 5672f81..021ee97 100644 --- a/src/prompts/system.js +++ b/src/prompts/system.js @@ -235,11 +235,25 @@ function readUserMemory() { // // Issue #61 — Step 2 (skill index), Step 8 (trust warning). -function readSkillIndex() { +// Cheap memoization across a single buildSystemPrompt() call. Both +// readSkillIndex() and getProblemSolvingParadigm() need the discovered +// skills, but discoverSkills() walks the filesystem and reads every +// SKILL.md body, so scanning twice per prompt build is wasteful. The cache +// is reset at the start of every buildSystemPrompt() invocation. +let _skillsCache = null; +function _getSkills() { + if (_skillsCache !== null) return _skillsCache; try { const { discoverSkills } = require('../skills'); - const root = wsRoot(); - const skills = discoverSkills(root); + _skillsCache = discoverSkills(wsRoot()); + } catch { _skillsCache = []; } + return _skillsCache; +} +function _resetSkillsCache() { _skillsCache = null; } + +function readSkillIndex() { + try { + const skills = _getSkills(); if (!skills.length) return null; const lines = ['# Available skills']; @@ -269,6 +283,20 @@ function readSkillIndex() { // Issue #61 — Step 6 + Step 7. function getProblemSolvingParadigm() { + // Issue #146 — only mention the skill-creator hard gate when a meta-skill + // is actually installed. Otherwise the model fabricates a skill_invoke + // call to a non-existent "skill-creator", producing a confusing error. + // Reuses the memoized skills list shared with readSkillIndex() so we + // don't walk the filesystem twice per prompt build. + const variants = new Set(['skill-creator', 'skill_creator', 'skillcreator']); + const skillCreatorInstalled = _getSkills().some( + s => variants.has(String(s && s.name || '').toLowerCase()), + ); + + const gateParagraph = skillCreatorInstalled + ? `\n\n**Issue #146 — skill_create quality gate**: A meta-skill matching \`skill-creator\` (or variant) is installed locally. You MUST call \`skill_invoke({ name: "" })\` in the SAME turn BEFORE you call \`skill_create\`. The meta-skill performs description tightening, body structure check, and dedup. Calling \`skill_create\` without it will be rejected by the tool layer.` + : ''; + return `# Problem-solving paradigm For any non-trivial task, follow this loop: @@ -284,9 +312,7 @@ For any non-trivial task, follow this loop: Skills (\`skill_invoke\` / \`skill_create\`) capture reasoned, on-demand playbooks. Hooks (\`hooks.json\`) capture deterministic reflexes. Do not conflate them. -Never call \`skill_create\` for one-off fixes, trivial tasks, or before the user has confirmed the solution works. - -**Issue #146 — skill_create quality gate**: if a skill matching the \`skill-creator\` meta-skill name (or common variants: \`skill_creator\`, \`skillcreator\`) appears in the Available skills index above, you MUST call \`skill_invoke({ name: "" })\` in the SAME turn BEFORE you call \`skill_create\`. The meta-skill performs description tightening, body structure check, and dedup. Calling \`skill_create\` without it will be rejected by the tool layer. Do NOT treat \`skill_create\` as "just a file write" — creation goes through review first.`; +Never call \`skill_create\` for one-off fixes, trivial tasks, or before the user has confirmed the solution works.${gateParagraph}`; } // ---------- workspace instructions (lazy, opt-in) ---------- @@ -367,6 +393,10 @@ function buildSystemPrompt(opts = {}) { const staticPart = getStaticCore(); + // Reset the per-build skills cache so a freshly-installed skill becomes + // visible without a full process restart. + _resetSkillsCache(); + const dynamicParts = [getEnvironmentSection(osName)]; const mem = readUserMemory(); if (mem) dynamicParts.push(mem); diff --git a/src/providers/index.js b/src/providers/index.js index b2fc189..3fc0dda 100644 --- a/src/providers/index.js +++ b/src/providers/index.js @@ -143,37 +143,92 @@ function listModels(providerId) { /** * Apply provider-declared `stripInputFields` to a messages array. - * Returns a NEW array; never mutates the input. The strip rule lives in the - * provider JSON so a vendor's protocol quirks stay encapsulated. + * Never mutates the input. Returns either a NEW array (when at least one + * message was rewritten by stripping or backfill) or the ORIGINAL `messages` + * reference unchanged (fast path: no quirks apply, e.g. an OpenAI/Anthropic + * call with an empty `stripInputFields`). Callers must treat the result as + * read-only — do not mutate elements of the returned array. The strip rule + * lives in the provider JSON so a vendor's protocol quirks stay encapsulated. * - * Example: DeepSeek 400s when the input contains `reasoning_content`. - * deepseek.json sets quirks.stripInputFields: ["reasoning_content"], and this - * function removes that field from each message before the API call. + * Two behaviours, switched on whether the call is in DeepSeek's thinking-mode + * round-trip protocol (provider declares `reasoning_content` in stripInputFields + * AND the chosen model is reasoning-capable): * - * Exception: `reasoning_content` is never stripped for reasoning-capable models - * (capabilities.reasoning === true) because DeepSeek's API requires it to be - * passed back in subsequent turns — omitting it causes HTTP 400. + * - **Non-thinking-mode call** (everything else, including non-reasoning + * DeepSeek models and all OpenAI/Anthropic models): every field declared in + * `quirks.stripInputFields` is removed. DeepSeek 400s when the input + * contains `reasoning_content` outside thinking mode, so deepseek.json sets + * `stripInputFields: ["reasoning_content"]` and we drop it here. + * + * - **Thinking-mode call** (DeepSeek reasoner family, etc.): `reasoning_content` + * is NOT stripped because the API requires it to be passed back in + * subsequent turns. On top of that we enforce the documented invariant + * "once any assistant in history carries non-empty `reasoning_content`, + * EVERY subsequent assistant message MUST also carry one" — otherwise the + * API rejects with `400 "reasoning_content in the thinking mode must be + * passed back to the API"`. We backfill a short placeholder on assistant + * messages that come after the first one with thoughts, leaving earlier + * pre-thinking messages alone. This is a defence-in-depth net: callers + * should still attach the real thought stream when they have one. */ +const REASONING_PLACEHOLDER = '(no thoughts surfaced for this step)'; + function sanitizeMessages(providerId, messages, modelId) { if (!Array.isArray(messages) || !messages.length) return messages; const strip = getProvider(providerId)?.quirks?.stripInputFields; - if (!Array.isArray(strip) || !strip.length) return messages; - // Reasoning-capable models MUST have reasoning_content passed back to the - // API; strip every other declared field but skip reasoning_content for them. const isReasoning = !!(modelId && getModel(providerId, modelId)?.capabilities?.reasoning); - const effectiveStrip = isReasoning ? strip.filter(f => f !== 'reasoning_content') : strip; - if (!effectiveStrip.length) return messages; - return messages.map(m => { - if (!m || typeof m !== 'object') return m; - let copy = m; - for (const k of effectiveStrip) { - if (Object.prototype.hasOwnProperty.call(copy, k)) { - if (copy === m) copy = Object.assign({}, m); - delete copy[k]; + // The reasoning_content round-trip rule is specific to providers that + // BOTH (a) declare reasoning_content as a stripInputField (i.e. the + // provider's API is known to care about this field) AND (b) are being + // used in a reasoning-capable mode. Without this narrower gate, OpenAI / + // Anthropic models that happen to flip `capabilities.reasoning` would + // get unknown `reasoning_content` fields pushed onto their requests. + const stripsReasoning = Array.isArray(strip) && strip.includes('reasoning_content'); + const honorsReasoningRoundTrip = isReasoning && stripsReasoning; + // For models that honor the round-trip protocol, strip every declared + // field except reasoning_content. Otherwise strip everything declared. + const effectiveStrip = Array.isArray(strip) + ? (honorsReasoningRoundTrip ? strip.filter(f => f !== 'reasoning_content') : strip) + : []; + + let out = messages; + if (effectiveStrip.length) { + out = out.map(m => { + if (!m || typeof m !== 'object') return m; + let copy = m; + for (const k of effectiveStrip) { + if (Object.prototype.hasOwnProperty.call(copy, k)) { + if (copy === m) copy = Object.assign({}, m); + delete copy[k]; + } } + return copy; + }); + } + + // Reasoning-mode invariant backfill. The DeepSeek rule is specifically + // about messages that come AFTER the first assistant message with + // non-empty reasoning_content — earlier "pre-thinking" assistant + // messages don't need it. Locating the first thinking index and only + // backfilling from there onward keeps payload size minimal and matches + // the documented protocol more precisely. Scoped to providers that + // actually honor the round-trip protocol so we never inject + // reasoning_content into OpenAI/Anthropic-bound requests. + if (honorsReasoningRoundTrip) { + const firstThinkingIdx = out.findIndex( + m => m && m.role === 'assistant' && typeof m.reasoning_content === 'string' && m.reasoning_content.length > 0, + ); + if (firstThinkingIdx !== -1) { + out = out.map((m, i) => { + if (i <= firstThinkingIdx) return m; + if (!m || m.role !== 'assistant') return m; + if (typeof m.reasoning_content === 'string' && m.reasoning_content.length > 0) return m; + return Object.assign({}, m, { reasoning_content: REASONING_PLACEHOLDER }); + }); } - return copy; - }); + } + + return out; } /** diff --git a/src/tools/schema.js b/src/tools/schema.js index 5d3bc75..ce9c6ba 100644 --- a/src/tools/schema.js +++ b/src/tools/schema.js @@ -130,7 +130,7 @@ const TOOL_DEFS = [ type: 'function', function: { name: 'skill_create', - description: 'Persist a reusable multi-step workflow as a new skill on disk. Use ONLY when ALL of these are true: (1) you have just completed a non-trivial multi-step task, (2) the user has explicitly confirmed the result is correct, (3) the workflow has ≥3 concrete steps that span multiple tools and is likely to recur. Do NOT use for one-off fixes, trivial edits, single-step tasks, before user confirmation, or to record preferences (those belong in ~/.deepcopilot/memory.md) or project facts (those belong in /DEEPCOPILOT.md). When the SOP was synthesized from web research, set source to "web" or "hybrid" — the skill will be marked untrusted automatically. **HARD GATE (Issue #146)**: if a meta-skill whose name is one of `skill-creator`, `skill_creator`, or `skillcreator` is installed locally (see the "Available skills" index in the system prompt), you MUST call `skill_invoke` earlier in the SAME turn with `name` set to the EXACT installed spelling (whichever of the three appears in the index) before calling skill_create; invoking a variant that is NOT actually installed does NOT satisfy the gate. The meta-skill performs description tightening, body structure check, and dedup. If no such skill is installed the call is allowed through with a warning prepended to the result.', + description: 'Persist a reusable multi-step workflow as a new skill on disk. Use ONLY when ALL of these are true: (1) you have just completed a non-trivial multi-step task, (2) the user has explicitly confirmed the result is correct, (3) the workflow has ≥3 concrete steps that span multiple tools and is likely to recur. Do NOT use for one-off fixes, trivial edits, single-step tasks, before user confirmation, or to record preferences (those belong in ~/.deepcopilot/memory.md) or project facts (those belong in /DEEPCOPILOT.md). When the SOP was synthesized from web research, set source to "web" or "hybrid" — the skill will be marked untrusted automatically. **Quality gate (Issue #146)**: ONLY if a meta-skill named `skill-creator`, `skill_creator`, or `skillcreator` is listed in the "Available skills" index of the current system prompt, you MUST call `skill_invoke` earlier in the SAME turn with `name` set to that exact installed spelling before calling skill_create. If no such meta-skill appears in the index, do NOT fabricate a `skill_invoke` call for it — just call `skill_create` directly; the gate is inactive in that environment.', parameters: { type: 'object', properties: { diff --git a/src/tools/skill-tools.js b/src/tools/skill-tools.js index 54fd6ce..b565760 100644 --- a/src/tools/skill-tools.js +++ b/src/tools/skill-tools.js @@ -15,6 +15,17 @@ const path = require('path'); const { discoverSkills, DEEPCOPILOT_SKILLS_DIR } = require('../skills'); const { wsRoot } = require('../utils/paths'); +// Issue #146 — Canonical name variants for the optional `skill-creator` +// meta-skill. When this skill is installed in the workspace, the `skill_create` +// gate requires it to be invoked before any new skill can be created (a +// quality-review step). When it is NOT installed the gate is a no-op and +// `skill_create` proceeds directly — see `skillCreate` for the conditional +// check and `system.js` / `schema.js` for the matching prompt/schema wording. +// Accept a couple of common spellings to be permissive about how the on-disk +// skill is named. Defined at module scope so both skill_invoke (for the "did +// you mean X?" soft-notice path) and the skill_create gate share the same set. +const SKILL_CREATOR_NAMES = new Set(['skill-creator', 'skill_creator', 'skillcreator']); + // ─── skill_invoke ──────────────────────────────────────────────────────────── /** @@ -30,6 +41,23 @@ function skillInvoke(args, run) { const all = discoverSkills(wsRoot()); const s = all.find(x => x.name === name); if (!s) { + // Issue #146 follow-up: handle a model that calls a skill-creator + // variant. Two distinct cases must be told apart: + // 1. A creator IS installed but the model used wrong casing/spelling. + // → return an actionable "did you mean X" error so the gate + // stays effective. + // 2. No creator is installed at all. + // → return a soft notice so the agent can proceed straight to + // skill_create (the executor-side gate is a no-op here). + if (SKILL_CREATOR_NAMES.has(name.toLowerCase())) { + const installedCreator = all.find( + x => SKILL_CREATOR_NAMES.has(String(x && x.name || '').toLowerCase()), + ); + if (installedCreator) { + return `Error: skill "${name}" not found. Did you mean "${installedCreator.name}"? Call \`skill_invoke({ name: "${installedCreator.name}" })\` with that exact spelling.`; + } + return 'Notice: no skill-creator meta-skill is installed in this environment. The skill_create quality gate is inactive here — you may proceed to call `skill_create` directly when appropriate.'; + } const known = all.map(x => x.name).join(', ') || '(none installed)'; return `Error: skill "${name}" not found. Available: ${known}`; } @@ -70,11 +98,6 @@ const VALID_SOURCES = new Set(['self', 'web', 'hybrid']); const VALID_TRUSTS = new Set(['trusted', 'untrusted']); const MAX_BODY_BYTES = 64 * 1024; // 64 KB hard ceiling -// Issue #146 — Meta-skill that must review every skill_create call. -// Accept a couple of common spelling variants to be permissive about how -// the on-disk skill is named. -const SKILL_CREATOR_NAMES = new Set(['skill-creator', 'skill_creator', 'skillcreator']); - /** * Issue #146 — Check whether `skill_invoke({name: 'skill-creator'})` was * called earlier in the *current* turn (i.e. after the most recent real user