Contentrain · ABB65 · May 15, 2026 · May 15, 2026
diff --git a/ee/enterprise/conversation-api.ts b/ee/enterprise/conversation-api.ts
@@ -1,6 +1,6 @@
 import { createError, getHeader, getQuery, getRouterParam, readBody, type H3Event } from 'h3'
 import { useRuntimeConfig } from '#imports'
-import type { AIContentBlock, AIMessage } from '../../server/providers/ai'
+import type { AIContentBlock } from '../../server/providers/ai'
 import type { DatabaseProvider } from '../../server/providers/database'
 import type { AgentPermissions } from '../../server/utils/agent-permissions'
 import type { ChatUIContext } from '../../server/utils/agent-types'
@@ -14,6 +14,7 @@ import { createContentEngine } from '../../server/utils/content-engine'
 import { errorMessage } from '../../server/utils/content-strings'
 import { normalizeContentRoot } from '../../server/utils/content-paths'
 import { runConversationLoop } from '../../server/utils/conversation-engine'
+import { buildPromptMessages, selectHistoryBudget } from '../../server/utils/conversation-history'
 import { validateConversationKey } from '../../server/utils/conversation-keys'
 import { saveApiChatResult } from '../../server/utils/db'
 import { getPlanLimit, getWorkspacePlan, hasFeature } from '../../server/utils/license'
@@ -124,7 +125,13 @@ async function resolveConversationApiContext(event: H3Event): Promise<Conversati
   return { db, keyData, project: project as ConversationApiContext['project'], workspace: workspace as ConversationApiContext['workspace'], plan }
 }
 
-async function loadConversationMessages(
+/**
+ * Format conversation messages for the `/history.get` route's JSON
+ * response. The runtime chat path (`runConversationMessage`) goes
+ * through `selectHistoryBudget` + `buildPromptMessages` instead — this
+ * shape is purely for external API consumers reading their thread.
+ */
+async function loadConversationHistoryForResponse(
   db: DatabaseProvider,
   conversationId: string,
   limit: number,
@@ -284,32 +291,19 @@ async function runConversationMessage(
     if (!conversationId)
       throw createError({ statusCode: 500, message: errorMessage('chat.conversation_create_failed') })
 
-    const historyRows = await loadConversationMessages(db, conversationId, 50)
-    const messages: AIMessage[] = []
-    const HISTORY_TOKEN_BUDGET = 8000
-
-    const budgetStart = (() => {
-      let tokens = 0
-      for (let i = historyRows.length - 1; i >= 0; i--) {
-        const row = historyRows[i]!
-        const content = row.toolCalls ? (row.toolCalls as AIContentBlock[]) : row.content
-        const estimate = typeof content === 'string'
-          ? Math.ceil(content.length / 4)
-          : Math.ceil(JSON.stringify(content).length / 4)
-        tokens += estimate
-        if (tokens > HISTORY_TOKEN_BUDGET) return i + 1
-      }
-      return 0
-    })()
-
-    for (let i = budgetStart; i < historyRows.length; i++) {
-      const row = historyRows[i]!
-      const content = row.toolCalls ? (row.toolCalls as AIContentBlock[]) : (row.content as string | AIContentBlock[])
-      messages.push({ role: row.role as 'user' | 'assistant', content })
-    }
-    messages.push({ role: 'user', content: body.message })
-
     const model = keyData.aiModel
+    const budget = selectHistoryBudget({ plan, model, source: 'api' })
+    const historyRows = await db.loadConversationMessages(
+      conversationId,
+      budget.rowLimit,
+      'role, content, tool_calls',
+    )
+    const messages = buildPromptMessages({
+      history: historyRows ?? [],
+      newUserMessage: body.message,
+      budget,
+    })
+
     const configWorkflow = projectConfig?.workflow ?? 'auto-merge'
     const workflow = hasFeature(plan, 'workflow.review') ? configWorkflow : 'auto-merge'
 
@@ -406,7 +400,7 @@ async function runConversationHistory(event: H3Event) {
     throw createError({ statusCode: 404, message: errorMessage('chat.conversation_not_found') })
 
   const limit = Math.min(Number(query.limit ?? 50), 100)
-  const messages = await loadConversationMessages(db, conversationId, limit)
+  const messages = await loadConversationHistoryForResponse(db, conversationId, limit)
 
   return { conversationId, messages }
 }

diff --git a/server/api/workspaces/[workspaceId]/projects/[projectId]/chat.post.ts b/server/api/workspaces/[workspaceId]/projects/[projectId]/chat.post.ts
@@ -1,10 +1,11 @@
-import type { AIMessage, AIContentBlock } from '~~/server/providers/ai'
+import type { AIContentBlock } from '~~/server/providers/ai'
 import type { ChatRequest } from '~~/server/utils/agent-types'
 import { createEventStream } from 'h3'
 import { toAITools } from '~~/server/utils/agent-types'
 import { deriveProjectPhase } from '~~/server/utils/agent-state-machine'
 import { classifyIntent } from '~~/server/utils/agent-context'
 import { runConversationLoop } from '~~/server/utils/conversation-engine'
+import { buildPromptMessages, selectHistoryBudget } from '~~/server/utils/conversation-history'
 import { resolveEnterpriseChatApiKey } from '../../../../../utils/enterprise'
 import { getEffectiveLimit } from '../../../../../utils/overage'
 
@@ -18,8 +19,6 @@ import { getEffectiveLimit } from '../../../../../utils/overage'
  * The AI loop + tool execution logic lives in the reusable engine.
  */
 
-const HISTORY_TOKEN_BUDGET = 8000
-
 export default defineEventHandler(async (event) => {
   const session = requireAuth(event)
   const workspaceId = getRouterParam(event, 'workspaceId')
@@ -143,32 +142,23 @@ export default defineEventHandler(async (event) => {
     if (!conversationId)
       throw createError({ statusCode: 500, message: errorMessage('chat.conversation_create_failed') })
 
-    // === HISTORY ===
-    const historyRows = await db.loadConversationMessages(conversationId, 50)
-
-    // Build message history: chronological order, newest messages prioritized within budget
-    const allHistory = historyRows ?? []
-    const messages: AIMessage[] = []
-
-    // Walk backwards to find budget cutoff, then take from that point forward
-    const budgetStart = (() => {
-      let tokens = 0
-      for (let i = allHistory.length - 1; i >= 0; i--) {
-        const row = allHistory[i]!
-        const content = row.tool_calls ? (row.tool_calls as AIContentBlock[]) : row.content
-        const estimate = typeof content === 'string' ? Math.ceil(content.length / 4) : Math.ceil(JSON.stringify(content).length / 4)
-        tokens += estimate
-        if (tokens > HISTORY_TOKEN_BUDGET) return i + 1
-      }
-      return 0
-    })()
+    // Model: plan-gated selection. Picked here (before history) because
+    // `selectHistoryBudget` is model-aware — Haiku gets a smaller window
+    // than Sonnet/Opus.
+    const ALL_MODELS = ['claude-sonnet-4-20250514', 'claude-opus-4-20250514', 'claude-haiku-4-5-20251001']
+    const STARTER_MODELS = ['claude-haiku-4-5-20251001']
+    const availableModels = hasFeature(plan, 'ai.studio_key') ? ALL_MODELS : STARTER_MODELS
+    const requestedModel = body.model as string | undefined
+    const model = (requestedModel && availableModels.includes(requestedModel)) ? requestedModel : availableModels[0]!
 
-    for (let i = budgetStart; i < allHistory.length; i++) {
-      const row = allHistory[i]!
-      const content = row.tool_calls ? (row.tool_calls as AIContentBlock[]) : (row.content as string | AIContentBlock[])
-      messages.push({ role: row.role as 'user' | 'assistant', content })
-    }
-    messages.push({ role: 'user', content: body.message })
+    // === HISTORY ===
+    const budget = selectHistoryBudget({ plan, model, source: usageSource })
+    const historyRows = await db.loadConversationMessages(conversationId, budget.rowLimit)
+    const messages = buildPromptMessages({
+      history: historyRows ?? [],
+      newUserMessage: body.message,
+      budget,
+    })
 
     // === LOAD SCHEMA (from brain cache) ===
     const brain = await getOrBuildBrainCache(git, contentRoot, projectId)
@@ -212,13 +202,6 @@ export default defineEventHandler(async (event) => {
     const phaseFiltered = permissionFiltered.filter(t => t.requiredPhase.includes(phase))
     const aiTools = toAITools(phaseFiltered)
 
-    // Model: plan-gated selection
-    const ALL_MODELS = ['claude-sonnet-4-20250514', 'claude-opus-4-20250514', 'claude-haiku-4-5-20251001']
-    const STARTER_MODELS = ['claude-haiku-4-5-20251001']
-    const availableModels = hasFeature(plan, 'ai.studio_key') ? ALL_MODELS : STARTER_MODELS
-    const requestedModel = body.model as string | undefined
-    const model = (requestedModel && availableModels.includes(requestedModel)) ? requestedModel : availableModels[0]!
-
     // Workflow: plans without review feature always auto-merge regardless of config
     const configWorkflow = projectConfig?.workflow ?? 'auto-merge'
     const workflow = hasFeature(plan, 'workflow.review') ? configWorkflow : 'auto-merge'

diff --git a/server/utils/conversation-history.ts b/server/utils/conversation-history.ts
@@ -0,0 +1,154 @@
+/**
+ * Shared chat-history builder.
+ *
+ * Both the Studio chat handler and the Conversation API handler load
+ * prior messages from `messages`, walk them back-to-front under a
+ * token ceiling, and append the current user message. The duplicate
+ * lived in two files with a hard-coded 8K budget, a magic 50-row cap,
+ * and divergent `tool_calls`/`toolCalls` casing — see `chat.post.ts`
+ * pre-refactor and `ee/enterprise/conversation-api.ts`.
+ *
+ * Two pure helpers consolidate the logic:
+ *
+ *   - `selectHistoryBudget({ plan, model, source })` returns the
+ *     model-aware token ceiling, scaled by plan and source. Model
+ *     drives capability; plan drives Contentrain's per-message
+ *     margin; source drives who pays.
+ *   - `buildPromptMessages({ history, newUserMessage, budget })`
+ *     converts DB rows into the `AIMessage[]` shape the provider
+ *     contract expects, slicing oldest rows when the budget runs out.
+ *
+ * No DB, no provider — pure functions, unit-testable.
+ */
+import type { AIContentBlock, AIMessage } from '../providers/ai'
+import type { DatabaseRow } from '../providers/database'
+
+export interface HistoryBudget {
+  /** Approximate input-token ceiling for the entire history block. */
+  maxTokens: number
+  /**
+   * Upper bound on rows fetched from DB. The token cutoff does the
+   * real work; this is a safety bound against pathologically long
+   * conversations. Scales with `maxTokens`.
+   */
+  rowLimit: number
+}
+
+/**
+ * Per-model history budgets, picked conservatively to leave headroom
+ * for system prompt (5-15K with the brain content index), tools
+ * (~2K), the new user message, and 2-4K of output.
+ *
+ * Sonnet/Opus values stay well below the 200K long-context boundary
+ * because that tier carries premium pricing. Once prompt caching
+ * (per-block `cache_control`) lands these can grow — cache reads cost
+ * ~10% of base input, so the same dollar of input buys a much larger
+ * effective window. Until then, conservative is right.
+ *
+ * Sources: claude.com/docs/en/about-claude/models/overview and
+ * claude.com/docs/en/about-claude/pricing.
+ */
+const MODEL_HISTORY_BUDGETS: Record<string, number> = {
+  'claude-haiku-4-5-20251001': 12_000,
+
+  'claude-sonnet-4-20250514': 32_000,
+  'claude-sonnet-4-5': 40_000,
+  'claude-sonnet-4-6': 48_000,
+
+  'claude-opus-4-20250514': 32_000,
+  'claude-opus-4-1-20250805': 32_000,
+  'claude-opus-4-7': 48_000,
+}
+
+/** Unknown model IDs (future / preview) get the same starting point as Haiku. */
+const FALLBACK_BUDGET = 16_000
+
+/**
+ * Source axis: who pays for the input tokens.
+ *
+ *  - studio: Contentrain pays. Default budget.
+ *  - api: workspace pays via its plan + overage. Default budget.
+ *  - byoa: workspace user pays Anthropic directly; we can afford to
+ *    send more history because the marginal cost is on them.
+ */
+const SOURCE_MULTIPLIER: Record<'studio' | 'byoa' | 'api', number> = {
+  studio: 1,
+  api: 1,
+  byoa: 1.5,
+}
+
+/**
+ * Plan axis: matches Studio's per-message margin posture.
+ * `free` evaluates to 0 by design — free tier should never reach this
+ * code path (gated upstream by feature/limit checks). If something
+ * routes free traffic here, history degrades to "only the current
+ * user message" rather than fabricating budget the plan doesn't fund.
+ */
+const PLAN_MULTIPLIER: Record<string, number> = {
+  free: 0,
+  starter: 0.75,
+  pro: 1,
+  enterprise: 1.25,
+  community: 1,
+}
+const FALLBACK_PLAN_MULTIPLIER = 1
+
+export function selectHistoryBudget(input: {
+  plan: string
+  model: string
+  source: 'studio' | 'byoa' | 'api'
+}): HistoryBudget {
+  const base = MODEL_HISTORY_BUDGETS[input.model] ?? FALLBACK_BUDGET
+  const planMul = PLAN_MULTIPLIER[input.plan] ?? FALLBACK_PLAN_MULTIPLIER
+  const sourceMul = SOURCE_MULTIPLIER[input.source]
+  const maxTokens = Math.floor(base * planMul * sourceMul)
+  // ~120 tokens per row floor (short user/assistant message) — bounds
+  // DB pagination without ever undercutting what the budget can hold.
+  const rowLimit = Math.max(50, Math.ceil(maxTokens / 120))
+  return { maxTokens, rowLimit }
+}
+
+export function buildPromptMessages(input: {
+  history: DatabaseRow[]
+  newUserMessage: string
+  budget: HistoryBudget
+}): AIMessage[] {
+  const messages: AIMessage[] = []
+  const cutoff = findBudgetCutoff(input.history, input.budget.maxTokens)
+  for (let i = cutoff; i < input.history.length; i++) {
+    const row = input.history[i]!
+    messages.push({
+      role: row.role as 'user' | 'assistant',
+      content: extractContent(row),
+    })
+  }
+  messages.push({ role: 'user', content: input.newUserMessage })
+  return messages
+}
+
+/**
+ * Tolerates both `tool_calls` (Studio path — `db.loadConversationMessages`
+ * returns snake_case rows) and `toolCalls` (EE handler's pre-refactor
+ * wrapper renamed it). Once that wrapper is gone the second branch is
+ * dead — leave it as a safety net for any external caller.
+ */
+function extractContent(row: DatabaseRow): string | AIContentBlock[] {
+  const blocks = (row.tool_calls ?? row.toolCalls) as AIContentBlock[] | null | undefined
+  if (blocks && Array.isArray(blocks) && blocks.length > 0) return blocks
+  return row.content as string | AIContentBlock[]
+}
+
+function findBudgetCutoff(history: DatabaseRow[], maxTokens: number): number {
+  if (maxTokens <= 0) return history.length
+  let tokens = 0
+  for (let i = history.length - 1; i >= 0; i--) {
+    const row = history[i]!
+    const content = extractContent(row)
+    const estimate = typeof content === 'string'
+      ? Math.ceil(content.length / 4)
+      : Math.ceil(JSON.stringify(content).length / 4)
+    tokens += estimate
+    if (tokens > maxTokens) return i + 1
+  }
+  return 0
+}
diff --git a/tests/integration/chat-route.integration.test.ts b/tests/integration/chat-route.integration.test.ts
@@ -5,6 +5,7 @@ vi.mock('~~/server/utils/agent-types', async () => await import('../../server/ut
 vi.mock('~~/server/utils/agent-state-machine', async () => await import('../../server/utils/agent-state-machine'))
 vi.mock('~~/server/utils/agent-context', async () => await import('../../server/utils/agent-context'))
 vi.mock('~~/server/utils/conversation-engine', async () => await import('../../server/utils/conversation-engine'))
+vi.mock('~~/server/utils/conversation-history', async () => await import('../../server/utils/conversation-history'))
 
 async function loadChatHandler() {
   return (await import('../../server/api/workspaces/[workspaceId]/projects/[projectId]/chat.post')).default
@@ -190,7 +191,10 @@ describe('chat route integration', () => {
       expect(payload).toContain('"id":"conversation-new"')
       expect(payload).toContain('"type":"done"')
       expect(mockCreateConversation).toHaveBeenCalledWith('project-1', 'user-1', 'hello')
-      expect(mockLoadMessages).toHaveBeenCalledWith('conversation-new', 50)
+      // rowLimit is derived from selectHistoryBudget(plan, model, source);
+      // budget math is covered by conversation-history.test.ts. Here we only
+      // check that the handler asked for some bounded history slice.
+      expect(mockLoadMessages).toHaveBeenCalledWith('conversation-new', expect.any(Number))
       expect(saveChatResult).toHaveBeenCalledWith(
         'conversation-new',
         'hello',

diff --git a/tests/integration/overage-soft-cap.integration.test.ts b/tests/integration/overage-soft-cap.integration.test.ts
@@ -5,6 +5,7 @@ vi.mock('~~/server/utils/agent-types', async () => await import('../../server/ut
 vi.mock('~~/server/utils/agent-state-machine', async () => await import('../../server/utils/agent-state-machine'))
 vi.mock('~~/server/utils/agent-context', async () => await import('../../server/utils/agent-context'))
 vi.mock('~~/server/utils/conversation-engine', async () => await import('../../server/utils/conversation-engine'))
+vi.mock('~~/server/utils/conversation-history', async () => await import('../../server/utils/conversation-history'))
 
 async function loadChatHandler() {
   return (await import('../../server/api/workspaces/[workspaceId]/projects/[projectId]/chat.post')).default