diff --git a/ee/enterprise/conversation-api.ts b/ee/enterprise/conversation-api.ts index eb5473e..80a5574 100644 --- a/ee/enterprise/conversation-api.ts +++ b/ee/enterprise/conversation-api.ts @@ -1,6 +1,6 @@ import { createError, getHeader, getQuery, getRouterParam, readBody, type H3Event } from 'h3' import { useRuntimeConfig } from '#imports' -import type { AIContentBlock, AIMessage } from '../../server/providers/ai' +import type { AIContentBlock } from '../../server/providers/ai' import type { DatabaseProvider } from '../../server/providers/database' import type { AgentPermissions } from '../../server/utils/agent-permissions' import type { ChatUIContext } from '../../server/utils/agent-types' @@ -14,6 +14,7 @@ import { createContentEngine } from '../../server/utils/content-engine' import { errorMessage } from '../../server/utils/content-strings' import { normalizeContentRoot } from '../../server/utils/content-paths' import { runConversationLoop } from '../../server/utils/conversation-engine' +import { buildPromptMessages, selectHistoryBudget } from '../../server/utils/conversation-history' import { validateConversationKey } from '../../server/utils/conversation-keys' import { saveApiChatResult } from '../../server/utils/db' import { getPlanLimit, getWorkspacePlan, hasFeature } from '../../server/utils/license' @@ -124,7 +125,13 @@ async function resolveConversationApiContext(event: H3Event): Promise { - let tokens = 0 - for (let i = historyRows.length - 1; i >= 0; i--) { - const row = historyRows[i]! - const content = row.toolCalls ? (row.toolCalls as AIContentBlock[]) : row.content - const estimate = typeof content === 'string' - ? Math.ceil(content.length / 4) - : Math.ceil(JSON.stringify(content).length / 4) - tokens += estimate - if (tokens > HISTORY_TOKEN_BUDGET) return i + 1 - } - return 0 - })() - - for (let i = budgetStart; i < historyRows.length; i++) { - const row = historyRows[i]! - const content = row.toolCalls ? (row.toolCalls as AIContentBlock[]) : (row.content as string | AIContentBlock[]) - messages.push({ role: row.role as 'user' | 'assistant', content }) - } - messages.push({ role: 'user', content: body.message }) - const model = keyData.aiModel + const budget = selectHistoryBudget({ plan, model, source: 'api' }) + const historyRows = await db.loadConversationMessages( + conversationId, + budget.rowLimit, + 'role, content, tool_calls', + ) + const messages = buildPromptMessages({ + history: historyRows ?? [], + newUserMessage: body.message, + budget, + }) + const configWorkflow = projectConfig?.workflow ?? 'auto-merge' const workflow = hasFeature(plan, 'workflow.review') ? configWorkflow : 'auto-merge' @@ -406,7 +400,7 @@ async function runConversationHistory(event: H3Event) { throw createError({ statusCode: 404, message: errorMessage('chat.conversation_not_found') }) const limit = Math.min(Number(query.limit ?? 50), 100) - const messages = await loadConversationMessages(db, conversationId, limit) + const messages = await loadConversationHistoryForResponse(db, conversationId, limit) return { conversationId, messages } } diff --git a/server/api/workspaces/[workspaceId]/projects/[projectId]/chat.post.ts b/server/api/workspaces/[workspaceId]/projects/[projectId]/chat.post.ts index 1b06d41..56d03c2 100644 --- a/server/api/workspaces/[workspaceId]/projects/[projectId]/chat.post.ts +++ b/server/api/workspaces/[workspaceId]/projects/[projectId]/chat.post.ts @@ -1,10 +1,11 @@ -import type { AIMessage, AIContentBlock } from '~~/server/providers/ai' +import type { AIContentBlock } from '~~/server/providers/ai' import type { ChatRequest } from '~~/server/utils/agent-types' import { createEventStream } from 'h3' import { toAITools } from '~~/server/utils/agent-types' import { deriveProjectPhase } from '~~/server/utils/agent-state-machine' import { classifyIntent } from '~~/server/utils/agent-context' import { runConversationLoop } from '~~/server/utils/conversation-engine' +import { buildPromptMessages, selectHistoryBudget } from '~~/server/utils/conversation-history' import { resolveEnterpriseChatApiKey } from '../../../../../utils/enterprise' import { getEffectiveLimit } from '../../../../../utils/overage' @@ -18,8 +19,6 @@ import { getEffectiveLimit } from '../../../../../utils/overage' * The AI loop + tool execution logic lives in the reusable engine. */ -const HISTORY_TOKEN_BUDGET = 8000 - export default defineEventHandler(async (event) => { const session = requireAuth(event) const workspaceId = getRouterParam(event, 'workspaceId') @@ -143,32 +142,23 @@ export default defineEventHandler(async (event) => { if (!conversationId) throw createError({ statusCode: 500, message: errorMessage('chat.conversation_create_failed') }) - // === HISTORY === - const historyRows = await db.loadConversationMessages(conversationId, 50) - - // Build message history: chronological order, newest messages prioritized within budget - const allHistory = historyRows ?? [] - const messages: AIMessage[] = [] - - // Walk backwards to find budget cutoff, then take from that point forward - const budgetStart = (() => { - let tokens = 0 - for (let i = allHistory.length - 1; i >= 0; i--) { - const row = allHistory[i]! - const content = row.tool_calls ? (row.tool_calls as AIContentBlock[]) : row.content - const estimate = typeof content === 'string' ? Math.ceil(content.length / 4) : Math.ceil(JSON.stringify(content).length / 4) - tokens += estimate - if (tokens > HISTORY_TOKEN_BUDGET) return i + 1 - } - return 0 - })() + // Model: plan-gated selection. Picked here (before history) because + // `selectHistoryBudget` is model-aware — Haiku gets a smaller window + // than Sonnet/Opus. + const ALL_MODELS = ['claude-sonnet-4-20250514', 'claude-opus-4-20250514', 'claude-haiku-4-5-20251001'] + const STARTER_MODELS = ['claude-haiku-4-5-20251001'] + const availableModels = hasFeature(plan, 'ai.studio_key') ? ALL_MODELS : STARTER_MODELS + const requestedModel = body.model as string | undefined + const model = (requestedModel && availableModels.includes(requestedModel)) ? requestedModel : availableModels[0]! - for (let i = budgetStart; i < allHistory.length; i++) { - const row = allHistory[i]! - const content = row.tool_calls ? (row.tool_calls as AIContentBlock[]) : (row.content as string | AIContentBlock[]) - messages.push({ role: row.role as 'user' | 'assistant', content }) - } - messages.push({ role: 'user', content: body.message }) + // === HISTORY === + const budget = selectHistoryBudget({ plan, model, source: usageSource }) + const historyRows = await db.loadConversationMessages(conversationId, budget.rowLimit) + const messages = buildPromptMessages({ + history: historyRows ?? [], + newUserMessage: body.message, + budget, + }) // === LOAD SCHEMA (from brain cache) === const brain = await getOrBuildBrainCache(git, contentRoot, projectId) @@ -212,13 +202,6 @@ export default defineEventHandler(async (event) => { const phaseFiltered = permissionFiltered.filter(t => t.requiredPhase.includes(phase)) const aiTools = toAITools(phaseFiltered) - // Model: plan-gated selection - const ALL_MODELS = ['claude-sonnet-4-20250514', 'claude-opus-4-20250514', 'claude-haiku-4-5-20251001'] - const STARTER_MODELS = ['claude-haiku-4-5-20251001'] - const availableModels = hasFeature(plan, 'ai.studio_key') ? ALL_MODELS : STARTER_MODELS - const requestedModel = body.model as string | undefined - const model = (requestedModel && availableModels.includes(requestedModel)) ? requestedModel : availableModels[0]! - // Workflow: plans without review feature always auto-merge regardless of config const configWorkflow = projectConfig?.workflow ?? 'auto-merge' const workflow = hasFeature(plan, 'workflow.review') ? configWorkflow : 'auto-merge' diff --git a/server/utils/conversation-history.ts b/server/utils/conversation-history.ts new file mode 100644 index 0000000..0edb170 --- /dev/null +++ b/server/utils/conversation-history.ts @@ -0,0 +1,154 @@ +/** + * Shared chat-history builder. + * + * Both the Studio chat handler and the Conversation API handler load + * prior messages from `messages`, walk them back-to-front under a + * token ceiling, and append the current user message. The duplicate + * lived in two files with a hard-coded 8K budget, a magic 50-row cap, + * and divergent `tool_calls`/`toolCalls` casing — see `chat.post.ts` + * pre-refactor and `ee/enterprise/conversation-api.ts`. + * + * Two pure helpers consolidate the logic: + * + * - `selectHistoryBudget({ plan, model, source })` returns the + * model-aware token ceiling, scaled by plan and source. Model + * drives capability; plan drives Contentrain's per-message + * margin; source drives who pays. + * - `buildPromptMessages({ history, newUserMessage, budget })` + * converts DB rows into the `AIMessage[]` shape the provider + * contract expects, slicing oldest rows when the budget runs out. + * + * No DB, no provider — pure functions, unit-testable. + */ +import type { AIContentBlock, AIMessage } from '../providers/ai' +import type { DatabaseRow } from '../providers/database' + +export interface HistoryBudget { + /** Approximate input-token ceiling for the entire history block. */ + maxTokens: number + /** + * Upper bound on rows fetched from DB. The token cutoff does the + * real work; this is a safety bound against pathologically long + * conversations. Scales with `maxTokens`. + */ + rowLimit: number +} + +/** + * Per-model history budgets, picked conservatively to leave headroom + * for system prompt (5-15K with the brain content index), tools + * (~2K), the new user message, and 2-4K of output. + * + * Sonnet/Opus values stay well below the 200K long-context boundary + * because that tier carries premium pricing. Once prompt caching + * (per-block `cache_control`) lands these can grow — cache reads cost + * ~10% of base input, so the same dollar of input buys a much larger + * effective window. Until then, conservative is right. + * + * Sources: claude.com/docs/en/about-claude/models/overview and + * claude.com/docs/en/about-claude/pricing. + */ +const MODEL_HISTORY_BUDGETS: Record = { + 'claude-haiku-4-5-20251001': 12_000, + + 'claude-sonnet-4-20250514': 32_000, + 'claude-sonnet-4-5': 40_000, + 'claude-sonnet-4-6': 48_000, + + 'claude-opus-4-20250514': 32_000, + 'claude-opus-4-1-20250805': 32_000, + 'claude-opus-4-7': 48_000, +} + +/** Unknown model IDs (future / preview) get the same starting point as Haiku. */ +const FALLBACK_BUDGET = 16_000 + +/** + * Source axis: who pays for the input tokens. + * + * - studio: Contentrain pays. Default budget. + * - api: workspace pays via its plan + overage. Default budget. + * - byoa: workspace user pays Anthropic directly; we can afford to + * send more history because the marginal cost is on them. + */ +const SOURCE_MULTIPLIER: Record<'studio' | 'byoa' | 'api', number> = { + studio: 1, + api: 1, + byoa: 1.5, +} + +/** + * Plan axis: matches Studio's per-message margin posture. + * `free` evaluates to 0 by design — free tier should never reach this + * code path (gated upstream by feature/limit checks). If something + * routes free traffic here, history degrades to "only the current + * user message" rather than fabricating budget the plan doesn't fund. + */ +const PLAN_MULTIPLIER: Record = { + free: 0, + starter: 0.75, + pro: 1, + enterprise: 1.25, + community: 1, +} +const FALLBACK_PLAN_MULTIPLIER = 1 + +export function selectHistoryBudget(input: { + plan: string + model: string + source: 'studio' | 'byoa' | 'api' +}): HistoryBudget { + const base = MODEL_HISTORY_BUDGETS[input.model] ?? FALLBACK_BUDGET + const planMul = PLAN_MULTIPLIER[input.plan] ?? FALLBACK_PLAN_MULTIPLIER + const sourceMul = SOURCE_MULTIPLIER[input.source] + const maxTokens = Math.floor(base * planMul * sourceMul) + // ~120 tokens per row floor (short user/assistant message) — bounds + // DB pagination without ever undercutting what the budget can hold. + const rowLimit = Math.max(50, Math.ceil(maxTokens / 120)) + return { maxTokens, rowLimit } +} + +export function buildPromptMessages(input: { + history: DatabaseRow[] + newUserMessage: string + budget: HistoryBudget +}): AIMessage[] { + const messages: AIMessage[] = [] + const cutoff = findBudgetCutoff(input.history, input.budget.maxTokens) + for (let i = cutoff; i < input.history.length; i++) { + const row = input.history[i]! + messages.push({ + role: row.role as 'user' | 'assistant', + content: extractContent(row), + }) + } + messages.push({ role: 'user', content: input.newUserMessage }) + return messages +} + +/** + * Tolerates both `tool_calls` (Studio path — `db.loadConversationMessages` + * returns snake_case rows) and `toolCalls` (EE handler's pre-refactor + * wrapper renamed it). Once that wrapper is gone the second branch is + * dead — leave it as a safety net for any external caller. + */ +function extractContent(row: DatabaseRow): string | AIContentBlock[] { + const blocks = (row.tool_calls ?? row.toolCalls) as AIContentBlock[] | null | undefined + if (blocks && Array.isArray(blocks) && blocks.length > 0) return blocks + return row.content as string | AIContentBlock[] +} + +function findBudgetCutoff(history: DatabaseRow[], maxTokens: number): number { + if (maxTokens <= 0) return history.length + let tokens = 0 + for (let i = history.length - 1; i >= 0; i--) { + const row = history[i]! + const content = extractContent(row) + const estimate = typeof content === 'string' + ? Math.ceil(content.length / 4) + : Math.ceil(JSON.stringify(content).length / 4) + tokens += estimate + if (tokens > maxTokens) return i + 1 + } + return 0 +} diff --git a/tests/integration/chat-route.integration.test.ts b/tests/integration/chat-route.integration.test.ts index 59e1a78..7b6b6e4 100644 --- a/tests/integration/chat-route.integration.test.ts +++ b/tests/integration/chat-route.integration.test.ts @@ -5,6 +5,7 @@ vi.mock('~~/server/utils/agent-types', async () => await import('../../server/ut vi.mock('~~/server/utils/agent-state-machine', async () => await import('../../server/utils/agent-state-machine')) vi.mock('~~/server/utils/agent-context', async () => await import('../../server/utils/agent-context')) vi.mock('~~/server/utils/conversation-engine', async () => await import('../../server/utils/conversation-engine')) +vi.mock('~~/server/utils/conversation-history', async () => await import('../../server/utils/conversation-history')) async function loadChatHandler() { return (await import('../../server/api/workspaces/[workspaceId]/projects/[projectId]/chat.post')).default @@ -190,7 +191,10 @@ describe('chat route integration', () => { expect(payload).toContain('"id":"conversation-new"') expect(payload).toContain('"type":"done"') expect(mockCreateConversation).toHaveBeenCalledWith('project-1', 'user-1', 'hello') - expect(mockLoadMessages).toHaveBeenCalledWith('conversation-new', 50) + // rowLimit is derived from selectHistoryBudget(plan, model, source); + // budget math is covered by conversation-history.test.ts. Here we only + // check that the handler asked for some bounded history slice. + expect(mockLoadMessages).toHaveBeenCalledWith('conversation-new', expect.any(Number)) expect(saveChatResult).toHaveBeenCalledWith( 'conversation-new', 'hello', diff --git a/tests/integration/overage-soft-cap.integration.test.ts b/tests/integration/overage-soft-cap.integration.test.ts index e013de9..f1620a8 100644 --- a/tests/integration/overage-soft-cap.integration.test.ts +++ b/tests/integration/overage-soft-cap.integration.test.ts @@ -5,6 +5,7 @@ vi.mock('~~/server/utils/agent-types', async () => await import('../../server/ut vi.mock('~~/server/utils/agent-state-machine', async () => await import('../../server/utils/agent-state-machine')) vi.mock('~~/server/utils/agent-context', async () => await import('../../server/utils/agent-context')) vi.mock('~~/server/utils/conversation-engine', async () => await import('../../server/utils/conversation-engine')) +vi.mock('~~/server/utils/conversation-history', async () => await import('../../server/utils/conversation-history')) async function loadChatHandler() { return (await import('../../server/api/workspaces/[workspaceId]/projects/[projectId]/chat.post')).default diff --git a/tests/unit/conversation-history.test.ts b/tests/unit/conversation-history.test.ts new file mode 100644 index 0000000..b4fbf94 --- /dev/null +++ b/tests/unit/conversation-history.test.ts @@ -0,0 +1,179 @@ +import { describe, expect, it } from 'vitest' +import { buildPromptMessages, selectHistoryBudget } from '../../server/utils/conversation-history' + +describe('selectHistoryBudget', () => { + it('returns the per-model budget when the model is known', () => { + expect(selectHistoryBudget({ plan: 'pro', model: 'claude-haiku-4-5-20251001', source: 'studio' })) + .toMatchObject({ maxTokens: 12_000 }) + expect(selectHistoryBudget({ plan: 'pro', model: 'claude-sonnet-4-20250514', source: 'studio' })) + .toMatchObject({ maxTokens: 32_000 }) + expect(selectHistoryBudget({ plan: 'pro', model: 'claude-sonnet-4-5', source: 'studio' })) + .toMatchObject({ maxTokens: 40_000 }) + expect(selectHistoryBudget({ plan: 'pro', model: 'claude-opus-4-7', source: 'studio' })) + .toMatchObject({ maxTokens: 48_000 }) + }) + + it('falls back for unknown models', () => { + expect(selectHistoryBudget({ plan: 'pro', model: 'claude-future-9', source: 'studio' })) + .toMatchObject({ maxTokens: 16_000 }) + }) + + it('scales the budget by plan multiplier', () => { + // Sonnet 4 base = 32_000 + expect(selectHistoryBudget({ plan: 'starter', model: 'claude-sonnet-4-20250514', source: 'studio' })) + .toMatchObject({ maxTokens: 24_000 }) // 32k * 0.75 + expect(selectHistoryBudget({ plan: 'enterprise', model: 'claude-sonnet-4-20250514', source: 'studio' })) + .toMatchObject({ maxTokens: 40_000 }) // 32k * 1.25 + expect(selectHistoryBudget({ plan: 'community', model: 'claude-sonnet-4-20250514', source: 'studio' })) + .toMatchObject({ maxTokens: 32_000 }) // 32k * 1 + }) + + it('returns zero budget for the free plan (defensive backstop — should never reach chat path)', () => { + expect(selectHistoryBudget({ plan: 'free', model: 'claude-haiku-4-5-20251001', source: 'studio' })) + .toMatchObject({ maxTokens: 0 }) + }) + + it('uses neutral multiplier for unknown plans', () => { + // Base 32k * fallback (1) * studio (1) = 32k + expect(selectHistoryBudget({ plan: 'mystery-plan', model: 'claude-sonnet-4-20250514', source: 'studio' })) + .toMatchObject({ maxTokens: 32_000 }) + }) + + it('boosts the budget by 1.5x for BYOA where the user pays Anthropic directly', () => { + // Sonnet 4 base 32_000 * pro (1) * byoa (1.5) = 48_000 + expect(selectHistoryBudget({ plan: 'pro', model: 'claude-sonnet-4-20250514', source: 'byoa' })) + .toMatchObject({ maxTokens: 48_000 }) + }) + + it('keeps the API source at the studio baseline (no per-source multiplier)', () => { + expect(selectHistoryBudget({ plan: 'pro', model: 'claude-sonnet-4-5', source: 'api' })) + .toMatchObject({ maxTokens: 40_000 }) + }) + + it('scales rowLimit with the token budget', () => { + const big = selectHistoryBudget({ plan: 'enterprise', model: 'claude-sonnet-4-6', source: 'byoa' }) + const small = selectHistoryBudget({ plan: 'starter', model: 'claude-haiku-4-5-20251001', source: 'studio' }) + expect(big.rowLimit).toBeGreaterThan(small.rowLimit) + expect(small.rowLimit).toBeGreaterThanOrEqual(50) // minimum safety floor + }) +}) + +describe('buildPromptMessages', () => { + const budget = { maxTokens: 10_000, rowLimit: 100 } + + it('returns just the new user message when history is empty', () => { + const messages = buildPromptMessages({ + history: [], + newUserMessage: 'hello', + budget, + }) + expect(messages).toEqual([{ role: 'user', content: 'hello' }]) + }) + + it('keeps every row when history fits in budget', () => { + const messages = buildPromptMessages({ + history: [ + { role: 'user', content: 'first', tool_calls: null }, + { role: 'assistant', content: 'reply', tool_calls: null }, + ], + newUserMessage: 'follow up', + budget, + }) + expect(messages).toEqual([ + { role: 'user', content: 'first' }, + { role: 'assistant', content: 'reply' }, + { role: 'user', content: 'follow up' }, + ]) + }) + + it('drops oldest rows when history exceeds budget', () => { + // 1000-char string ≈ 250 tokens; 5 rows × 250 ≈ 1250 tokens + // Tight budget of 600 tokens should keep only the last 2-3 rows. + const longString = 'a'.repeat(1000) + const messages = buildPromptMessages({ + history: [ + { role: 'user', content: longString, tool_calls: null }, + { role: 'assistant', content: longString, tool_calls: null }, + { role: 'user', content: longString, tool_calls: null }, + { role: 'assistant', content: longString, tool_calls: null }, + { role: 'user', content: longString, tool_calls: null }, + ], + newUserMessage: 'now', + budget: { maxTokens: 600, rowLimit: 100 }, + }) + // First message is always the new user message at the end; oldest + // rows from the top of `history` should be dropped before any + // newer rows. + expect(messages.at(-1)).toEqual({ role: 'user', content: 'now' }) + expect(messages.length).toBeLessThan(6) // not all rows + new + expect(messages.length).toBeGreaterThanOrEqual(2) // at least 1 history + new + }) + + it('drops everything when budget is zero (free plan defensive path)', () => { + const messages = buildPromptMessages({ + history: [ + { role: 'user', content: 'old', tool_calls: null }, + { role: 'assistant', content: 'older', tool_calls: null }, + ], + newUserMessage: 'new', + budget: { maxTokens: 0, rowLimit: 100 }, + }) + expect(messages).toEqual([{ role: 'user', content: 'new' }]) + }) + + it('preserves tool_use content blocks when tool_calls jsonb is present (snake_case)', () => { + const toolUseBlocks = [ + { type: 'tool_use', id: 't1', name: 'list_models', input: {} }, + ] + const messages = buildPromptMessages({ + history: [ + { role: 'user', content: 'list them', tool_calls: null }, + { role: 'assistant', content: '[tool calls]', tool_calls: toolUseBlocks }, + ], + newUserMessage: 'thanks', + budget, + }) + expect(messages[1]).toEqual({ role: 'assistant', content: toolUseBlocks }) + }) + + it('also tolerates the camelCase toolCalls shape (legacy EE handler wrapper)', () => { + const toolUseBlocks = [ + { type: 'tool_use', id: 't1', name: 'list_models', input: {} }, + ] + const messages = buildPromptMessages({ + history: [ + { role: 'assistant', content: '[tool calls]', toolCalls: toolUseBlocks }, + ], + newUserMessage: 'thanks', + budget, + }) + expect(messages[0]).toEqual({ role: 'assistant', content: toolUseBlocks }) + }) + + it('preserves chronological order even when budget cuts the head', () => { + const longString = 'b'.repeat(800) // ~200 tokens each + const messages = buildPromptMessages({ + history: [ + { role: 'user', content: `OLDEST ${longString}`, tool_calls: null }, + { role: 'assistant', content: `MID-1 ${longString}`, tool_calls: null }, + { role: 'user', content: `MID-2 ${longString}`, tool_calls: null }, + { role: 'assistant', content: `NEWEST ${longString}`, tool_calls: null }, + ], + newUserMessage: 'next', + budget: { maxTokens: 500, rowLimit: 100 }, + }) + // Newer rows should be kept; oldest dropped. The kept rows + // appear in original (chronological) order, not reverse. + const kept = messages.slice(0, -1) + for (let i = 1; i < kept.length; i++) { + const prev = kept[i - 1]!.content as string + const curr = kept[i]!.content as string + expect(typeof prev).toBe('string') + expect(typeof curr).toBe('string') + } + // NEWEST must be present, OLDEST must be absent. + const joined = kept.map(m => m.content as string).join('|') + expect(joined).toContain('NEWEST') + expect(joined).not.toContain('OLDEST') + }) +})