anomalyco · micuintus · Mar 5, 2026 · Mar 5, 2026 · Mar 5, 2026 · Mar 5, 2026
diff --git a/packages/opencode/package.json b/packages/opencode/package.json
@@ -16,7 +16,8 @@
     "format": "echo 'Formatting code...' && bun run --prettier --write src/**/*.ts",
     "docs": "echo 'Generating documentation...' && find src -name '*.ts' -exec echo 'Processing: {}' \\;",
     "deploy": "echo 'Deploying application...' && bun run build && echo 'Deployment completed successfully'",
-    "db": "bun drizzle-kit"
+    "db": "bun drizzle-kit",
+    "audit:prompts": "bun run script/audit-overspecification.ts"
   },
   "bin": {
     "opencode": "./bin/opencode"
@@ -135,4 +136,4 @@
   "overrides": {
     "drizzle-orm": "1.0.0-beta.12-a5629fb"
   }
-}
+}
diff --git a/packages/opencode/script/audit-overspecification.ts b/packages/opencode/script/audit-overspecification.ts
@@ -0,0 +1,215 @@
+#!/usr/bin/env bun
+
+import PROMPT_ANTHROPIC from "../src/session/prompt/anthropic.txt"
+import PROMPT_QWEN from "../src/session/prompt/qwen.txt"
+import PROMPT_BEAST from "../src/session/prompt/beast.txt"
+import PROMPT_GEMINI from "../src/session/prompt/gemini.txt"
+import PROMPT_CODEX from "../src/session/prompt/codex_header.txt"
+import PROMPT_TRINITY from "../src/session/prompt/trinity.txt"
+import PROMPT_PLAN from "../src/session/prompt/plan.txt"
+import PROMPT_BUILD_SWITCH from "../src/session/prompt/build-switch.txt"
+import PROMPT_MAX_STEPS from "../src/session/prompt/max-steps.txt"
+
+import PROMPT_EXPLORE from "../src/agent/prompt/explore.txt"
+import PROMPT_COMPACTION from "../src/agent/prompt/compaction.txt"
+import PROMPT_SUMMARY from "../src/agent/prompt/summary.txt"
+import PROMPT_TITLE from "../src/agent/prompt/title.txt"
+import PROMPT_GENERATE from "../src/agent/generate.txt"
+
+interface PromptAuditResult {
+  name: string
+  file: string
+  type: "provider" | "utility" | "agent" | "meta"
+  lines: number
+  chars: number
+  tokens: number
+  directives: {
+    must: number
+    never: number
+    always: number
+    important: number
+    critical: number
+    total: number
+  }
+  examples: number
+  violations: string[]
+}
+
+interface Threshold {
+  type: "provider" | "utility" | "agent" | "meta"
+  tokenLimit: number
+  directiveLimit: number
+  exampleLimit: number
+}
+
+const THRESHOLDS: Threshold[] = [
+  { type: "provider", tokenLimit: 1500, directiveLimit: 12, exampleLimit: 5 },
+  { type: "utility", tokenLimit: 200, directiveLimit: 4, exampleLimit: 0 },
+  { type: "agent", tokenLimit: 400, directiveLimit: 6, exampleLimit: 3 },
+  { type: "meta", tokenLimit: 800, directiveLimit: 0, exampleLimit: 0 },
+]
+
+function estimateTokens(content: string): number {
+  return Math.max(0, Math.round((content || "").length / 4))
+}
+
+function countDirectives(content: string): PromptAuditResult["directives"] {
+  const must = (content.match(/\bMUST\b/gi) || []).length
+  const never = (content.match(/\bNEVER\b/gi) || []).length
+  const always = (content.match(/\bALWAYS\b/gi) || []).length
+  const important = (content.match(/\bIMPORTANT\b/gi) || []).length
+  const critical = (content.match(/\bCRITICAL\b/gi) || []).length
+
+  return {
+    must,
+    never,
+    always,
+    important,
+    critical,
+    total: must + never + always + important + critical,
+  }
+}
+
+function countExamples(content: string): number {
+  const xmlExamples = (content.match(/<example>[\s\S]*?<\/example>/g) || []).length
+
+  const lines = content.split("\n")
+  let markdownExamples = 0
+  let inExample = false
+
+  for (const line of lines) {
+    const trimmed = line.trim().toLowerCase()
+    if (trimmed.startsWith("user:") || trimmed.startsWith("assistant:") || trimmed.startsWith("model:")) {
+      if (!inExample) {
+        inExample = true
+        markdownExamples++
+      }
+    } else if (line.trim() === "" || line.trim().startsWith("<") || line.trim().startsWith("```")) {
+      inExample = false
+    }
+  }
+
+  return xmlExamples + markdownExamples
+}
+
+function countLines(content: string): number {
+  return content.split("\n").length
+}
+
+function auditPrompt(name: string, file: string, type: PromptAuditResult["type"], content: string): PromptAuditResult {
+  const lines = countLines(content)
+  const chars = content.length
+  const tokens = estimateTokens(content)
+  const directives = countDirectives(content)
+  const examples = countExamples(content)
+
+  const threshold = THRESHOLDS.find((t) => t.type === type)!
+  const violations: string[] = []
+
+  if (tokens > threshold.tokenLimit) {
+    violations.push(`tokens: ${tokens} > ${threshold.tokenLimit}`)
+  }
+  if (directives.total > threshold.directiveLimit) {
+    violations.push(`directives: ${directives.total} > ${threshold.directiveLimit}`)
+  }
+  if (examples > threshold.exampleLimit) {
+    violations.push(`examples: ${examples} > ${threshold.exampleLimit}`)
+  }
+
+  return {
+    name,
+    file,
+    type,
+    lines,
+    chars,
+    tokens,
+    directives,
+    examples,
+    violations,
+  }
+}
+
+const prompts: Array<{ name: string; file: string; type: PromptAuditResult["type"]; content: string }> = [
+  { name: "anthropic", file: "src/session/prompt/anthropic.txt", type: "provider", content: PROMPT_ANTHROPIC },
+  { name: "qwen", file: "src/session/prompt/qwen.txt", type: "provider", content: PROMPT_QWEN },
+  { name: "beast", file: "src/session/prompt/beast.txt", type: "provider", content: PROMPT_BEAST },
+  { name: "gemini", file: "src/session/prompt/gemini.txt", type: "provider", content: PROMPT_GEMINI },
+  { name: "codex_header", file: "src/session/prompt/codex_header.txt", type: "provider", content: PROMPT_CODEX },
+  { name: "trinity", file: "src/session/prompt/trinity.txt", type: "provider", content: PROMPT_TRINITY },
+  { name: "plan", file: "src/session/prompt/plan.txt", type: "utility", content: PROMPT_PLAN },
+  { name: "build-switch", file: "src/session/prompt/build-switch.txt", type: "utility", content: PROMPT_BUILD_SWITCH },
+  { name: "max-steps", file: "src/session/prompt/max-steps.txt", type: "utility", content: PROMPT_MAX_STEPS },
+  { name: "explore", file: "src/agent/prompt/explore.txt", type: "agent", content: PROMPT_EXPLORE },
+  { name: "compaction", file: "src/agent/prompt/compaction.txt", type: "agent", content: PROMPT_COMPACTION },
+  { name: "summary", file: "src/agent/prompt/summary.txt", type: "agent", content: PROMPT_SUMMARY },
+  { name: "title", file: "src/agent/prompt/title.txt", type: "agent", content: PROMPT_TITLE },
+  { name: "generate", file: "src/agent/generate.txt", type: "meta", content: PROMPT_GENERATE },
+]
+
+function main() {
+  console.log("Prompt Overspecification Audit")
+  console.log("=============================\n")
+
+  const results = prompts.map((p) => auditPrompt(p.name, p.file, p.type, p.content))
+
+  const byType: Record<string, PromptAuditResult[]> = {
+    provider: [],
+    utility: [],
+    agent: [],
+    meta: [],
+  }
+
+  for (const r of results) {
+    byType[r.type].push(r)
+  }
+
+  let totalViolations = 0
+
+  for (const [type, typeResults] of Object.entries(byType)) {
+    if (typeResults.length === 0) continue
+
+    const threshold = THRESHOLDS.find((t) => t.type === type)!
+    console.log(
+      `\n${type.toUpperCase()} PROMPTS (thresholds: ≤${threshold.tokenLimit} tokens, ≤${threshold.directiveLimit} directives, ≤${threshold.exampleLimit} examples)`,
+    )
+    console.log("-".repeat(100))
+    console.log(
+      `${"Name".padEnd(15)} ${"Lines".padStart(6)} ${"Tokens".padStart(7)} ${"Directives".padStart(11)} ${"Examples".padStart(9)} ${"Status".padStart(10)}`,
+    )
+    console.log("-".repeat(100))
+
+    for (const r of typeResults) {
+      const hasViolations = r.violations.length > 0
+      const status = hasViolations ? "❌ FAIL" : "✓ PASS"
+      const directives =
+        `${r.directives.total} (M:${r.directives.must},N:${r.directives.never},I:${r.directives.important})`.padStart(
+          11,
+        )
+
+      console.log(
+        `${r.name.padEnd(15)} ${r.lines.toString().padStart(6)} ${r.tokens.toString().padStart(7)} ${directives} ${r.examples.toString().padStart(9)} ${status.padStart(10)}`,
+      )
+
+      if (hasViolations) {
+        totalViolations += r.violations.length
+        for (const v of r.violations) {
+          console.error(`    ⚠️  ${v}`)
+        }
+      }
+    }
+  }
+
+  console.log("\n" + "=".repeat(100))
+  console.log(`\nSummary: ${totalViolations} violation(s) across ${results.length} prompt files`)
+
+  if (totalViolations > 0) {
+    console.error("\n⚠️  Some prompts exceed recommended thresholds.")
+    console.error("   Review violations above and consider optimization.")
+  } else {
+    console.log("\n✓ All prompts are within recommended thresholds.")
+  }
+
+  process.exit(0)
+}
+
+main()
diff --git a/packages/opencode/src/agent/prompt/title.txt b/packages/opencode/src/agent/prompt/title.txt
@@ -4,7 +4,6 @@ You are a title generator. You output ONLY a thread title. Nothing else.
 Generate a brief title that would help the user find this conversation later.
 
 Follow all rules in <rules>
-Use the <examples> so you know what a good title looks like.
 Your output must be:
 - A single line
 - ≤50 characters
@@ -36,9 +35,4 @@ Your output must be:
 "why is app.js failing" → app.js failure investigation
 "implement rate limiting" → Rate limiting implementation
 "how do I connect postgres to my API" → Postgres API connection
-"best practices for React hooks" → React hooks best practices
-"@src/auth.ts can you add refresh token support" → Auth refresh token support
-"@utils/parser.ts this is broken" → Parser bug fix
-"look at @config.json" → Config review
-"@App.tsx add dark mode toggle" → Dark mode toggle in App
 </examples>
diff --git a/packages/opencode/src/session/prompt/anthropic.txt b/packages/opencode/src/session/prompt/anthropic.txt
@@ -21,7 +21,7 @@ When the user directly asks about OpenCode (eg. "can OpenCode do...", "does Open
 Prioritize technical accuracy and truthfulness over validating the user's beliefs. Focus on facts and problem-solving, providing direct, objective technical info without any unnecessary superlatives, praise, or emotional validation. It is best for the user if OpenCode honestly applies the same rigorous standards to all ideas and disagrees when necessary, even if it may not be what the user wants to hear. Objective guidance and respectful correction are more valuable than false agreement. Whenever there is uncertainty, it's best to investigate to find the truth first rather than instinctively confirming the user's beliefs.
 
 # Task Management
-You have access to the TodoWrite tools to help you manage and plan tasks. Use these tools VERY frequently to ensure that you are tracking your tasks and giving the user visibility into your progress.
+You have access to the TodoWrite tools to help you manage and plan tasks. Use these tools VERY frequently to ensure you are tracking your tasks and giving the user visibility into your progress.
 These tools are also EXTREMELY helpful for planning tasks, and for breaking down larger complex tasks into smaller steps. If you do not use this tool when planning, you may forget to do important tasks - and that is unacceptable.
 
 It is critical that you mark todos as completed as soon as you are done with a task. Do not batch up multiple tasks before marking them as completed.
@@ -62,36 +62,25 @@ Let me start by researching the existing codebase to understand what metrics we
 I'm going to search for any existing metrics or telemetry code in the project.
 
 I've found some existing telemetry code. Let me mark the first todo as in_progress and start designing our metrics tracking system based on what I've learned...
-
-[Assistant continues implementing the feature step by step, marking todos as in_progress and completed as they go]
 </example>
 
 
 # Doing tasks
 The user will primarily request you perform software engineering tasks. This includes solving bugs, adding new functionality, refactoring code, explaining code, and more. For these tasks the following steps are recommended:
-- 
+-
 - Use the TodoWrite tool to plan the task if required
 
-- Tool results and user messages may include <system-reminder> tags. <system-reminder> tags contain useful information and reminders. They are automatically added by the system, and bear no direct relation to the specific tool results or user messages in which they appear.
+- Tool results and user messages may include <system-reminder> tags. <system-reminder> tags contain useful information and reminders. They are NOT part of the specific tool results or user messages in which they appear.
 
 
 # Tool usage policy
 - When doing file search, prefer to use the Task tool in order to reduce context usage.
 - You should proactively use the Task tool with specialized agents when the task at hand matches the agent's description.
 
 - When WebFetch returns a message about a redirect to a different host, you should immediately make a new WebFetch request with the redirect URL provided in the response.
-- You can call multiple tools in a single response. If you intend to call multiple tools and there are no dependencies between them, make all independent tool calls in parallel. Maximize use of parallel tool calls where possible to increase efficiency. However, if some tool calls depend on previous calls to inform dependent values, do NOT call these tools in parallel and instead call them sequentially. For instance, if one operation must complete before another starts, run these operations sequentially instead. Never use placeholders or guess missing parameters in tool calls.
-- If the user specifies that they want you to run tools "in parallel", you MUST send a single message with multiple tool use content blocks. For example, if you need to launch multiple agents in parallel, send a single message with multiple Task tool calls.
+- You have the capability to call multiple tools in a single response. When multiple independent pieces of information are requested, batch your tool calls together for optimal performance. However, if some tool calls depend on previous calls to inform dependent values, do NOT call these tools in parallel and instead call them sequentially. For instance, if one operation must complete before another starts, run these operations sequentially instead. Never use placeholders or guess missing parameters in tool calls.
 - Use specialized tools instead of bash commands when possible, as this provides a better user experience. For file operations, use dedicated tools: Read for reading files instead of cat/head/tail, Edit for editing instead of sed/awk, and Write for creating files instead of cat with heredoc or echo redirection. Reserve bash tools exclusively for actual system commands and terminal operations that require shell execution. NEVER use bash echo or other command-line tools to communicate thoughts, explanations, or instructions to the user. Output all communication directly in your response text instead.
 - VERY IMPORTANT: When exploring the codebase to gather context or to answer a question that is not a needle query for a specific file/class/function, it is CRITICAL that you use the Task tool instead of running search commands directly.
-<example>
-user: Where are errors from the client handled?
-assistant: [Uses the Task tool to find the files that handle client errors instead of using Glob or Grep directly]
-</example>
-<example>
-user: What is the codebase structure?
-assistant: [Uses the Task tool]
-</example>
 
 IMPORTANT: Always use the TodoWrite tool to plan and track tasks throughout the conversation.