Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 22 additions & 28 deletions ee/enterprise/conversation-api.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { createError, getHeader, getQuery, getRouterParam, readBody, type H3Event } from 'h3'
import { useRuntimeConfig } from '#imports'
import type { AIContentBlock, AIMessage } from '../../server/providers/ai'
import type { AIContentBlock } from '../../server/providers/ai'
import type { DatabaseProvider } from '../../server/providers/database'
import type { AgentPermissions } from '../../server/utils/agent-permissions'
import type { ChatUIContext } from '../../server/utils/agent-types'
Expand All @@ -14,6 +14,7 @@ import { createContentEngine } from '../../server/utils/content-engine'
import { errorMessage } from '../../server/utils/content-strings'
import { normalizeContentRoot } from '../../server/utils/content-paths'
import { runConversationLoop } from '../../server/utils/conversation-engine'
import { buildPromptMessages, selectHistoryBudget } from '../../server/utils/conversation-history'
import { validateConversationKey } from '../../server/utils/conversation-keys'
import { saveApiChatResult } from '../../server/utils/db'
import { getPlanLimit, getWorkspacePlan, hasFeature } from '../../server/utils/license'
Expand Down Expand Up @@ -124,7 +125,13 @@ async function resolveConversationApiContext(event: H3Event): Promise<Conversati
return { db, keyData, project: project as ConversationApiContext['project'], workspace: workspace as ConversationApiContext['workspace'], plan }
}

async function loadConversationMessages(
/**
* Format conversation messages for the `/history.get` route's JSON
* response. The runtime chat path (`runConversationMessage`) goes
* through `selectHistoryBudget` + `buildPromptMessages` instead — this
* shape is purely for external API consumers reading their thread.
*/
async function loadConversationHistoryForResponse(
db: DatabaseProvider,
conversationId: string,
limit: number,
Expand Down Expand Up @@ -284,32 +291,19 @@ async function runConversationMessage(
if (!conversationId)
throw createError({ statusCode: 500, message: errorMessage('chat.conversation_create_failed') })

const historyRows = await loadConversationMessages(db, conversationId, 50)
const messages: AIMessage[] = []
const HISTORY_TOKEN_BUDGET = 8000

const budgetStart = (() => {
let tokens = 0
for (let i = historyRows.length - 1; i >= 0; i--) {
const row = historyRows[i]!
const content = row.toolCalls ? (row.toolCalls as AIContentBlock[]) : row.content
const estimate = typeof content === 'string'
? Math.ceil(content.length / 4)
: Math.ceil(JSON.stringify(content).length / 4)
tokens += estimate
if (tokens > HISTORY_TOKEN_BUDGET) return i + 1
}
return 0
})()

for (let i = budgetStart; i < historyRows.length; i++) {
const row = historyRows[i]!
const content = row.toolCalls ? (row.toolCalls as AIContentBlock[]) : (row.content as string | AIContentBlock[])
messages.push({ role: row.role as 'user' | 'assistant', content })
}
messages.push({ role: 'user', content: body.message })

const model = keyData.aiModel
const budget = selectHistoryBudget({ plan, model, source: 'api' })
const historyRows = await db.loadConversationMessages(
conversationId,
budget.rowLimit,
'role, content, tool_calls',
)
const messages = buildPromptMessages({
history: historyRows ?? [],
newUserMessage: body.message,
budget,
})

const configWorkflow = projectConfig?.workflow ?? 'auto-merge'
const workflow = hasFeature(plan, 'workflow.review') ? configWorkflow : 'auto-merge'

Expand Down Expand Up @@ -406,7 +400,7 @@ async function runConversationHistory(event: H3Event) {
throw createError({ statusCode: 404, message: errorMessage('chat.conversation_not_found') })

const limit = Math.min(Number(query.limit ?? 50), 100)
const messages = await loadConversationMessages(db, conversationId, limit)
const messages = await loadConversationHistoryForResponse(db, conversationId, limit)

return { conversationId, messages }
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import type { AIMessage, AIContentBlock } from '~~/server/providers/ai'
import type { AIContentBlock } from '~~/server/providers/ai'
import type { ChatRequest } from '~~/server/utils/agent-types'
import { createEventStream } from 'h3'
import { toAITools } from '~~/server/utils/agent-types'
import { deriveProjectPhase } from '~~/server/utils/agent-state-machine'
import { classifyIntent } from '~~/server/utils/agent-context'
import { runConversationLoop } from '~~/server/utils/conversation-engine'
import { buildPromptMessages, selectHistoryBudget } from '~~/server/utils/conversation-history'
import { resolveEnterpriseChatApiKey } from '../../../../../utils/enterprise'
import { getEffectiveLimit } from '../../../../../utils/overage'

Expand All @@ -18,8 +19,6 @@ import { getEffectiveLimit } from '../../../../../utils/overage'
* The AI loop + tool execution logic lives in the reusable engine.
*/

const HISTORY_TOKEN_BUDGET = 8000

export default defineEventHandler(async (event) => {
const session = requireAuth(event)
const workspaceId = getRouterParam(event, 'workspaceId')
Expand Down Expand Up @@ -143,32 +142,23 @@ export default defineEventHandler(async (event) => {
if (!conversationId)
throw createError({ statusCode: 500, message: errorMessage('chat.conversation_create_failed') })

// === HISTORY ===
const historyRows = await db.loadConversationMessages(conversationId, 50)

// Build message history: chronological order, newest messages prioritized within budget
const allHistory = historyRows ?? []
const messages: AIMessage[] = []

// Walk backwards to find budget cutoff, then take from that point forward
const budgetStart = (() => {
let tokens = 0
for (let i = allHistory.length - 1; i >= 0; i--) {
const row = allHistory[i]!
const content = row.tool_calls ? (row.tool_calls as AIContentBlock[]) : row.content
const estimate = typeof content === 'string' ? Math.ceil(content.length / 4) : Math.ceil(JSON.stringify(content).length / 4)
tokens += estimate
if (tokens > HISTORY_TOKEN_BUDGET) return i + 1
}
return 0
})()
// Model: plan-gated selection. Picked here (before history) because
// `selectHistoryBudget` is model-aware — Haiku gets a smaller window
// than Sonnet/Opus.
const ALL_MODELS = ['claude-sonnet-4-20250514', 'claude-opus-4-20250514', 'claude-haiku-4-5-20251001']
const STARTER_MODELS = ['claude-haiku-4-5-20251001']
const availableModels = hasFeature(plan, 'ai.studio_key') ? ALL_MODELS : STARTER_MODELS
const requestedModel = body.model as string | undefined
const model = (requestedModel && availableModels.includes(requestedModel)) ? requestedModel : availableModels[0]!

for (let i = budgetStart; i < allHistory.length; i++) {
const row = allHistory[i]!
const content = row.tool_calls ? (row.tool_calls as AIContentBlock[]) : (row.content as string | AIContentBlock[])
messages.push({ role: row.role as 'user' | 'assistant', content })
}
messages.push({ role: 'user', content: body.message })
// === HISTORY ===
const budget = selectHistoryBudget({ plan, model, source: usageSource })
const historyRows = await db.loadConversationMessages(conversationId, budget.rowLimit)
const messages = buildPromptMessages({
history: historyRows ?? [],
newUserMessage: body.message,
budget,
})

// === LOAD SCHEMA (from brain cache) ===
const brain = await getOrBuildBrainCache(git, contentRoot, projectId)
Expand Down Expand Up @@ -212,13 +202,6 @@ export default defineEventHandler(async (event) => {
const phaseFiltered = permissionFiltered.filter(t => t.requiredPhase.includes(phase))
const aiTools = toAITools(phaseFiltered)

// Model: plan-gated selection
const ALL_MODELS = ['claude-sonnet-4-20250514', 'claude-opus-4-20250514', 'claude-haiku-4-5-20251001']
const STARTER_MODELS = ['claude-haiku-4-5-20251001']
const availableModels = hasFeature(plan, 'ai.studio_key') ? ALL_MODELS : STARTER_MODELS
const requestedModel = body.model as string | undefined
const model = (requestedModel && availableModels.includes(requestedModel)) ? requestedModel : availableModels[0]!

// Workflow: plans without review feature always auto-merge regardless of config
const configWorkflow = projectConfig?.workflow ?? 'auto-merge'
const workflow = hasFeature(plan, 'workflow.review') ? configWorkflow : 'auto-merge'
Expand Down
154 changes: 154 additions & 0 deletions server/utils/conversation-history.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
/**
* Shared chat-history builder.
*
* Both the Studio chat handler and the Conversation API handler load
* prior messages from `messages`, walk them back-to-front under a
* token ceiling, and append the current user message. The duplicate
* lived in two files with a hard-coded 8K budget, a magic 50-row cap,
* and divergent `tool_calls`/`toolCalls` casing — see `chat.post.ts`
* pre-refactor and `ee/enterprise/conversation-api.ts`.
*
* Two pure helpers consolidate the logic:
*
* - `selectHistoryBudget({ plan, model, source })` returns the
* model-aware token ceiling, scaled by plan and source. Model
* drives capability; plan drives Contentrain's per-message
* margin; source drives who pays.
* - `buildPromptMessages({ history, newUserMessage, budget })`
* converts DB rows into the `AIMessage[]` shape the provider
* contract expects, slicing oldest rows when the budget runs out.
*
* No DB, no provider — pure functions, unit-testable.
*/
import type { AIContentBlock, AIMessage } from '../providers/ai'
import type { DatabaseRow } from '../providers/database'

export interface HistoryBudget {
/** Approximate input-token ceiling for the entire history block. */
maxTokens: number
/**
* Upper bound on rows fetched from DB. The token cutoff does the
* real work; this is a safety bound against pathologically long
* conversations. Scales with `maxTokens`.
*/
rowLimit: number
}

/**
* Per-model history budgets, picked conservatively to leave headroom
* for system prompt (5-15K with the brain content index), tools
* (~2K), the new user message, and 2-4K of output.
*
* Sonnet/Opus values stay well below the 200K long-context boundary
* because that tier carries premium pricing. Once prompt caching
* (per-block `cache_control`) lands these can grow — cache reads cost
* ~10% of base input, so the same dollar of input buys a much larger
* effective window. Until then, conservative is right.
*
* Sources: claude.com/docs/en/about-claude/models/overview and
* claude.com/docs/en/about-claude/pricing.
*/
const MODEL_HISTORY_BUDGETS: Record<string, number> = {
'claude-haiku-4-5-20251001': 12_000,

'claude-sonnet-4-20250514': 32_000,
'claude-sonnet-4-5': 40_000,
'claude-sonnet-4-6': 48_000,

'claude-opus-4-20250514': 32_000,
'claude-opus-4-1-20250805': 32_000,
'claude-opus-4-7': 48_000,
}

/** Unknown model IDs (future / preview) get the same starting point as Haiku. */
const FALLBACK_BUDGET = 16_000

/**
* Source axis: who pays for the input tokens.
*
* - studio: Contentrain pays. Default budget.
* - api: workspace pays via its plan + overage. Default budget.
* - byoa: workspace user pays Anthropic directly; we can afford to
* send more history because the marginal cost is on them.
*/
const SOURCE_MULTIPLIER: Record<'studio' | 'byoa' | 'api', number> = {
studio: 1,
api: 1,
byoa: 1.5,
}

/**
* Plan axis: matches Studio's per-message margin posture.
* `free` evaluates to 0 by design — free tier should never reach this
* code path (gated upstream by feature/limit checks). If something
* routes free traffic here, history degrades to "only the current
* user message" rather than fabricating budget the plan doesn't fund.
*/
const PLAN_MULTIPLIER: Record<string, number> = {
free: 0,
starter: 0.75,
pro: 1,
enterprise: 1.25,
community: 1,
}
const FALLBACK_PLAN_MULTIPLIER = 1

export function selectHistoryBudget(input: {
plan: string
model: string
source: 'studio' | 'byoa' | 'api'
}): HistoryBudget {
const base = MODEL_HISTORY_BUDGETS[input.model] ?? FALLBACK_BUDGET
const planMul = PLAN_MULTIPLIER[input.plan] ?? FALLBACK_PLAN_MULTIPLIER
const sourceMul = SOURCE_MULTIPLIER[input.source]
const maxTokens = Math.floor(base * planMul * sourceMul)
// ~120 tokens per row floor (short user/assistant message) — bounds
// DB pagination without ever undercutting what the budget can hold.
const rowLimit = Math.max(50, Math.ceil(maxTokens / 120))
return { maxTokens, rowLimit }
}

export function buildPromptMessages(input: {
history: DatabaseRow[]
newUserMessage: string
budget: HistoryBudget
}): AIMessage[] {
const messages: AIMessage[] = []
const cutoff = findBudgetCutoff(input.history, input.budget.maxTokens)
for (let i = cutoff; i < input.history.length; i++) {
const row = input.history[i]!
messages.push({
role: row.role as 'user' | 'assistant',
content: extractContent(row),
})
}
messages.push({ role: 'user', content: input.newUserMessage })
return messages
}

/**
* Tolerates both `tool_calls` (Studio path — `db.loadConversationMessages`
* returns snake_case rows) and `toolCalls` (EE handler's pre-refactor
* wrapper renamed it). Once that wrapper is gone the second branch is
* dead — leave it as a safety net for any external caller.
*/
function extractContent(row: DatabaseRow): string | AIContentBlock[] {
const blocks = (row.tool_calls ?? row.toolCalls) as AIContentBlock[] | null | undefined
if (blocks && Array.isArray(blocks) && blocks.length > 0) return blocks
return row.content as string | AIContentBlock[]
}

function findBudgetCutoff(history: DatabaseRow[], maxTokens: number): number {
if (maxTokens <= 0) return history.length
let tokens = 0
for (let i = history.length - 1; i >= 0; i--) {
const row = history[i]!
const content = extractContent(row)
const estimate = typeof content === 'string'
? Math.ceil(content.length / 4)
: Math.ceil(JSON.stringify(content).length / 4)
tokens += estimate
if (tokens > maxTokens) return i + 1
}
return 0
}
6 changes: 5 additions & 1 deletion tests/integration/chat-route.integration.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ vi.mock('~~/server/utils/agent-types', async () => await import('../../server/ut
vi.mock('~~/server/utils/agent-state-machine', async () => await import('../../server/utils/agent-state-machine'))
vi.mock('~~/server/utils/agent-context', async () => await import('../../server/utils/agent-context'))
vi.mock('~~/server/utils/conversation-engine', async () => await import('../../server/utils/conversation-engine'))
vi.mock('~~/server/utils/conversation-history', async () => await import('../../server/utils/conversation-history'))

async function loadChatHandler() {
return (await import('../../server/api/workspaces/[workspaceId]/projects/[projectId]/chat.post')).default
Expand Down Expand Up @@ -190,7 +191,10 @@ describe('chat route integration', () => {
expect(payload).toContain('"id":"conversation-new"')
expect(payload).toContain('"type":"done"')
expect(mockCreateConversation).toHaveBeenCalledWith('project-1', 'user-1', 'hello')
expect(mockLoadMessages).toHaveBeenCalledWith('conversation-new', 50)
// rowLimit is derived from selectHistoryBudget(plan, model, source);
// budget math is covered by conversation-history.test.ts. Here we only
// check that the handler asked for some bounded history slice.
expect(mockLoadMessages).toHaveBeenCalledWith('conversation-new', expect.any(Number))
expect(saveChatResult).toHaveBeenCalledWith(
'conversation-new',
'hello',
Expand Down
1 change: 1 addition & 0 deletions tests/integration/overage-soft-cap.integration.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ vi.mock('~~/server/utils/agent-types', async () => await import('../../server/ut
vi.mock('~~/server/utils/agent-state-machine', async () => await import('../../server/utils/agent-state-machine'))
vi.mock('~~/server/utils/agent-context', async () => await import('../../server/utils/agent-context'))
vi.mock('~~/server/utils/conversation-engine', async () => await import('../../server/utils/conversation-engine'))
vi.mock('~~/server/utils/conversation-history', async () => await import('../../server/utils/conversation-history'))

async function loadChatHandler() {
return (await import('../../server/api/workspaces/[workspaceId]/projects/[projectId]/chat.post')).default
Expand Down
Loading
Loading