From 23631b23f20e82a6efab1324b012777f0ed35b34 Mon Sep 17 00:00:00 2001 From: Contentrain Date: Fri, 15 May 2026 15:46:58 +0300 Subject: [PATCH] fix(billing): correct Conversation API actor model + usage table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Conversation API endpoint was wired through `agent_usage` with `source='api'` and `user_id=keyData.keyId`, but `agent_usage`'s CHECK constraint only accepts `'studio'|'byoa'` and its `user_id` FK points at `profiles(id)`. Both violations fire on the first DB call, so the endpoint has been unreachable since it shipped. The system was never production-deployed, so there is no data to migrate — this PR restructures the model before the route can be opened. Code review concluded that squeezing API usage into the per-user `agent_usage` table with nullable user_id + COALESCE-UNIQUE was the wrong shape: API key actors differ from user actors in lifecycle, scope, permissions, and cap semantics. The same key-keyed aggregate shape will be needed for MCP Cloud keys, so a dedicated table is the forward-compatible model. Migration 006: - New `api_message_usage (workspace, api_key, month)` aggregate with message + token counters and a workspace-admin SELECT RLS policy. - `increment_api_usage_if_allowed` RPC enforces BOTH the per-key monthly cap and the workspace plan cap in one advisory-locked transaction and reports which cap fired via a `reason` discriminator so the 429 message can name the right limit. - `increment_api_usage_tokens` RPC for post-AI token accounting. - `conversations.user_id` relaxed to nullable, `api_key_id` added as nullable FK → `conversation_api_keys`, and a `num_nonnulls(user_id, api_key_id) = 1` CHECK enforces exactly one actor per row. Existing rows all satisfy the check trivially. Provider surface: - `incrementAPIUsageIfAllowed` / `updateAPIUsageTokens` thin wrappers over the new RPCs. - `createApiConversation` is its own method (not a `createConversation` overload) so the call site states intent. `getConversation` now takes a discriminated union `{ userId } | { apiKeyId }` filter so the type system rules out mixed ownership lookups. - `getWorkspaceMonthlyAPIUsage` now reads from `api_message_usage` instead of the always-empty `agent_usage.source='api'` slice. EE wiring: - `conversation-api.ts` calls the new RPC, computes both caps via `getEffectiveLimit` (so overage-enabled workspaces fall through to the meter outbox), uses the new conversation helpers, and persists via the new `saveApiChatResult`. - `conversation-keys.ts` update path now clamps `monthly_message_limit` to the current `api.messages_per_month` plan cap, matching the create path. Plan downgrades can no longer leave keys with stale-high limits. The Studio chat path (`agent_usage`, `saveChatResult`) is untouched. `saveChatResult` is narrowed to `'byoa'|'studio'` since `'api'` is now invalid by construction and routes through `saveApiChatResult`. --- ee/enterprise/conversation-api.ts | 39 ++-- ee/enterprise/conversation-keys.ts | 19 +- server/providers/database.ts | 53 ++++- server/providers/supabase-db/conversations.ts | 54 ++++- server/providers/supabase-db/usage.ts | 5 +- server/utils/db.ts | 112 ++++++++--- ...i_message_usage_and_conversation_actor.sql | 187 ++++++++++++++++++ tests/unit/db.test.ts | 36 +++- 8 files changed, 455 insertions(+), 50 deletions(-) create mode 100644 supabase/migrations/006_api_message_usage_and_conversation_actor.sql diff --git a/ee/enterprise/conversation-api.ts b/ee/enterprise/conversation-api.ts index 102ed5c9..14e19d84 100644 --- a/ee/enterprise/conversation-api.ts +++ b/ee/enterprise/conversation-api.ts @@ -15,8 +15,9 @@ import { errorMessage } from '../../server/utils/content-strings' import { normalizeContentRoot } from '../../server/utils/content-paths' import { runConversationLoop } from '../../server/utils/conversation-engine' import { validateConversationKey } from '../../server/utils/conversation-keys' -import { saveChatResult } from '../../server/utils/db' -import { getWorkspacePlan, hasFeature } from '../../server/utils/license' +import { saveApiChatResult } from '../../server/utils/db' +import { getPlanLimit, getWorkspacePlan, hasFeature } from '../../server/utils/license' +import { getEffectiveLimit } from '../../server/utils/overage' import { checkRateLimit } from '../../server/utils/rate-limit' import { useDatabaseProvider, useGitProvider } from '../../server/utils/providers' @@ -163,18 +164,28 @@ async function runConversationMessage( if (!rateCheck.allowed) throw createError({ statusCode: 429, message: errorMessage('chat.rate_limited', { seconds: Math.ceil(rateCheck.retryAfterMs / 1000) }) }) - // Atomic: check monthly limit + reserve a message slot (prevents race conditions) + // Atomic: enforce per-key AND per-workspace caps in one RPC, then + // reserve a message slot. `getEffectiveLimit` raises the cap to + // SOFT_CAP_MAX when overage is enabled or the plan is Infinity, so + // the RPC stays integer-typed and overage requests fall through to + // the meter outbox below. const usageMonth = new Date().toISOString().substring(0, 7) - const { allowed } = await db.incrementAgentUsageIfAllowed({ + const overageSettings = (event.context as { billing?: { overageSettings?: Record } }).billing?.overageSettings + const workspacePlanLimit = getPlanLimit(plan, 'api.messages_per_month') + const workspaceLimit = getEffectiveLimit(workspacePlanLimit, 'api.messages_per_month', overageSettings) + const keyLimit = getEffectiveLimit(keyData.monthlyMessageLimit, 'api.messages_per_month', overageSettings) + + const usageCheck = await db.incrementAPIUsageIfAllowed({ workspaceId: keyData.workspaceId, - userId: keyData.keyId, apiKeyId: keyData.keyId, month: usageMonth, - source: 'api', - limit: keyData.monthlyMessageLimit, + keyLimit, + workspaceLimit, }) - if (!allowed) - throw createError({ statusCode: 429, message: errorMessage('conversation.monthly_limit', { limit: keyData.monthlyMessageLimit }) }) + if (!usageCheck.allowed) { + const limitForMessage = usageCheck.reason === 'workspace_limit' ? workspacePlanLimit : keyData.monthlyMessageLimit + throw createError({ statusCode: 429, message: errorMessage('conversation.monthly_limit', { limit: limitForMessage }) }) + } // Best-effort meter write for overage billing. Fire-and-forget. recordAPIUsage({ workspaceId: keyData.workspaceId, count: 1, apiKeyId: keyData.keyId, month: usageMonth }).catch(() => {}) @@ -240,13 +251,13 @@ async function runConversationMessage( let conversationId = body.conversationId if (conversationId) { - const conv = await db.getConversation(conversationId, keyData.projectId, { userId: keyData.keyId }) + const conv = await db.getConversation(conversationId, keyData.projectId, { apiKeyId: keyData.keyId }) if (!conv) conversationId = undefined } if (!conversationId) { - conversationId = await db.createConversation(keyData.projectId, keyData.keyId, body.message.substring(0, 100)) ?? undefined + conversationId = await db.createApiConversation(keyData.projectId, keyData.keyId, body.message.substring(0, 100)) ?? undefined } if (!conversationId) @@ -320,7 +331,7 @@ async function runConversationMessage( } } - await saveChatResult( + await saveApiChatResult( conversationId, body.message, responseText, @@ -330,9 +341,7 @@ async function runConversationMessage( totalOutputTokens, keyData.workspaceId, keyData.keyId, - 'api', usageMonth, - keyData.keyId, ) return { @@ -354,7 +363,7 @@ async function runConversationHistory(event: H3Event) { if (!conversationId) throw createError({ statusCode: 400, message: errorMessage('validation.conversation_id_required') }) - const conv = await db.getConversation(conversationId, keyData.projectId, { userId: keyData.keyId }) + const conv = await db.getConversation(conversationId, keyData.projectId, { apiKeyId: keyData.keyId }) if (!conv) throw createError({ statusCode: 404, message: errorMessage('chat.conversation_not_found') }) diff --git a/ee/enterprise/conversation-keys.ts b/ee/enterprise/conversation-keys.ts index 6f79469c..cdb39ad7 100644 --- a/ee/enterprise/conversation-keys.ts +++ b/ee/enterprise/conversation-keys.ts @@ -149,11 +149,19 @@ export function createConversationKeysBridge() { await db.requireWorkspaceRole(session.accessToken, session.user.id, workspaceId, ['owner', 'admin']) - if (body.customInstructions !== undefined) { + // Resolve plan once if any plan-gated field is being updated. + // monthlyMessageLimit must be clamped to the workspace plan cap + // — otherwise a downgraded workspace could carry a stale per-key + // limit that exceeds its current `api.messages_per_month`. + const needsPlan = body.customInstructions !== undefined || body.monthlyMessageLimit !== undefined + let plan: ReturnType | null = null + if (needsPlan) { const workspace = await db.getWorkspaceById(workspaceId, 'plan') + plan = getWorkspacePlan(workspace ?? {}) + } - const plan = getWorkspacePlan(workspace ?? {}) - if (!hasFeature(plan, 'api.custom_instructions') && body.customInstructions) + if (body.customInstructions !== undefined) { + if (!hasFeature(plan!, 'api.custom_instructions') && body.customInstructions) throw createError({ statusCode: 403, message: errorMessage('conversation.upgrade') }) } @@ -170,7 +178,10 @@ export function createConversationKeysBridge() { if (body.customInstructions !== undefined) updates.custom_instructions = body.customInstructions ? body.customInstructions.substring(0, 2000) : body.customInstructions if (body.aiModel !== undefined && validModels.includes(body.aiModel)) updates.ai_model = body.aiModel if (body.rateLimitPerMinute !== undefined) updates.rate_limit_per_minute = Math.max(1, Math.min(body.rateLimitPerMinute, 60)) - if (body.monthlyMessageLimit !== undefined) updates.monthly_message_limit = Math.max(1, Math.min(body.monthlyMessageLimit, 100_000)) + if (body.monthlyMessageLimit !== undefined) { + const planCap = getPlanLimit(plan!, 'api.messages_per_month') + updates.monthly_message_limit = Math.max(1, Math.min(body.monthlyMessageLimit, planCap)) + } if (Object.keys(updates).length === 0) throw createError({ statusCode: 400, message: errorMessage('validation.no_fields_to_update') }) diff --git a/server/providers/database.ts b/server/providers/database.ts index 0a5d1377..50145ba7 100644 --- a/server/providers/database.ts +++ b/server/providers/database.ts @@ -251,7 +251,24 @@ export interface DatabaseProvider { // ═══════════════════════════════════════════════════ createConversation: (projectId: string, userId: string, title: string) => Promise - getConversation: (conversationId: string, projectId: string, filters?: { userId?: string }) => Promise + /** + * Create a Conversation API-owned chat thread. The API key acts as + * the actor in place of a workspace member; `conversations.user_id` + * stays NULL and `api_key_id` carries the owner reference. Enforced + * by `conversations_actor_xor` — exactly one of user_id / api_key_id + * is non-null per row. + */ + createApiConversation: (projectId: string, apiKeyId: string, title: string) => Promise + /** + * Ownership check before continuing a conversation. Pass either a + * `userId` (Studio chat) or an `apiKeyId` (Conversation API) — never + * both. The XOR is mirrored from the DB-level CHECK on `conversations`. + */ + getConversation: ( + conversationId: string, + projectId: string, + filters?: { userId: string } | { apiKeyId: string }, + ) => Promise listConversations: (accessToken: string, projectId: string, userId: string) => Promise deleteConversation: (accessToken: string, conversationId: string, userId: string, projectId: string) => Promise updateConversationTimestamp: (conversationId: string) => Promise @@ -311,6 +328,40 @@ export interface DatabaseProvider { outputTokens: number }) => Promise + // ═══════════════════════════════════════════════════ + // API MESSAGE USAGE (Conversation API) + // ═══════════════════════════════════════════════════ + + /** + * Atomic: enforce BOTH per-key and per-workspace monthly caps in + * one RPC and reserve a message slot. Reason discriminator lets the + * caller surface the right 429 message ("your key is over" vs "the + * workspace plan is over"). + * + * Pass `SOFT_CAP_MAX` (from `getEffectiveLimit`) for either limit + * when overage is enabled or the plan grants Infinity. + */ + incrementAPIUsageIfAllowed: (input: { + workspaceId: string + apiKeyId: string + month: string + keyLimit: number + workspaceLimit: number + }) => Promise<{ + allowed: boolean + reason: 'ok' | 'key_limit' | 'workspace_limit' + current: number + }> + + /** Increment token counters on the `api_message_usage` row after the AI call settles. */ + updateAPIUsageTokens: (input: { + workspaceId: string + apiKeyId: string + month: string + inputTokens: number + outputTokens: number + }) => Promise + // ═══════════════════════════════════════════════════ // MEDIA ASSETS // ═══════════════════════════════════════════════════ diff --git a/server/providers/supabase-db/conversations.ts b/server/providers/supabase-db/conversations.ts index 69d030d9..354de3a7 100644 --- a/server/providers/supabase-db/conversations.ts +++ b/server/providers/supabase-db/conversations.ts @@ -8,6 +8,7 @@ import { getAdmin, getUser } from './helpers' type ConversationMethods = Pick< DatabaseProvider, | 'createConversation' + | 'createApiConversation' | 'getConversation' | 'listConversations' | 'deleteConversation' @@ -19,6 +20,8 @@ type ConversationMethods = Pick< | 'getMonthlyUsageSummary' | 'incrementAgentUsageIfAllowed' | 'updateAgentUsageTokens' + | 'incrementAPIUsageIfAllowed' + | 'updateAPIUsageTokens' | 'getBYOAKey' > @@ -39,15 +42,31 @@ export function conversationMethods(): ConversationMethods { return data?.id ?? null }, + async createApiConversation(projectId, apiKeyId, title) { + const admin = getAdmin() + const { data } = await admin + .from('conversations') + .insert({ + project_id: projectId, + api_key_id: apiKeyId, + title: title.substring(0, 100), + }) + .select('id') + .single() + + return data?.id ?? null + }, + async getConversation(conversationId, projectId, filters) { const admin = getAdmin() let query = admin .from('conversations') - .select('id, title, user_id, created_at, updated_at') + .select('id, title, user_id, api_key_id, created_at, updated_at') .eq('id', conversationId) .eq('project_id', projectId) - if (filters?.userId) query = query.eq('user_id', filters.userId) + if (filters && 'userId' in filters) query = query.eq('user_id', filters.userId) + else if (filters && 'apiKeyId' in filters) query = query.eq('api_key_id', filters.apiKeyId) const { data, error } = await query.single() if (error) { @@ -226,6 +245,37 @@ export function conversationMethods(): ConversationMethods { }) }, + // ─── API Message Usage (Conversation API) ─── + + async incrementAPIUsageIfAllowed(input) { + const admin = getAdmin() + const { data, error } = await admin.rpc('increment_api_usage_if_allowed', { + p_workspace_id: input.workspaceId, + p_api_key_id: input.apiKeyId, + p_month: input.month, + p_key_limit: input.keyLimit, + p_workspace_limit: input.workspaceLimit, + }) + + if (error) { + throw createError({ statusCode: 500, message: `Atomic API usage check failed: ${error.message}` }) + } + + const result = data as { allowed: boolean, reason: 'ok' | 'key_limit' | 'workspace_limit', current: number } + return result + }, + + async updateAPIUsageTokens(input) { + const admin = getAdmin() + await admin.rpc('increment_api_usage_tokens', { + p_workspace_id: input.workspaceId, + p_api_key_id: input.apiKeyId, + p_month: input.month, + p_input_tokens: input.inputTokens, + p_output_tokens: input.outputTokens, + }) + }, + // ─── BYOA Key ─── async getBYOAKey(accessToken, workspaceId, userId) { diff --git a/server/providers/supabase-db/usage.ts b/server/providers/supabase-db/usage.ts index edf0fcf7..4e20eba4 100644 --- a/server/providers/supabase-db/usage.ts +++ b/server/providers/supabase-db/usage.ts @@ -34,12 +34,13 @@ export function usageMethods(): UsageMethods { }, async getWorkspaceMonthlyAPIUsage(workspaceId, month) { + // Conversation API usage is keyed by `api_key_id`, not `user_id`, + // and lives in its own aggregate table — see migration 006. const { data } = await getAdmin() - .from('agent_usage') + .from('api_message_usage') .select('message_count') .eq('workspace_id', workspaceId) .eq('month', month) - .eq('source', 'api') return (data ?? []).reduce( (sum: number, r: Record) => sum + ((r.message_count as number) ?? 0), diff --git a/server/utils/db.ts b/server/utils/db.ts index e623e508..866be47c 100644 --- a/server/utils/db.ts +++ b/server/utils/db.ts @@ -219,11 +219,44 @@ export async function inviteOrLookupUser( // ─── Cross-domain: Chat Persistence ─── +async function persistChatMessages(input: { + conversationId: string + userMessage: string + assistantText: string + assistantContent: unknown[] + model: string + inputTokens: number + outputTokens: number +}) { + const db = useDatabaseProvider() + await db.insertMessage({ conversationId: input.conversationId, role: 'user', content: input.userMessage }) + try { + await db.insertMessage({ + conversationId: input.conversationId, + role: 'assistant', + content: input.assistantText || '[tool calls]', + toolCalls: input.assistantContent.length > 0 ? input.assistantContent : null, + tokenCountInput: input.inputTokens, + tokenCountOutput: input.outputTokens, + model: input.model, + }) + } + catch (err) { + // eslint-disable-next-line no-console + console.error('[chat-persist] Failed to insert assistant message:', err) + throw err + } +} + /** - * Save chat messages + update token counts. - * Message count is already reserved atomically before the chat via - * incrementAgentUsageIfAllowed — this function only persists messages - * and updates the token metadata on the existing usage row. + * Save chat messages + update token counts for a Studio (workspace + * member) chat. Message count is already reserved atomically before + * the AI call via `incrementAgentUsageIfAllowed`; this function only + * persists messages and bumps the token metadata on the row that the + * reservation created. + * + * Conversation API has its own actor model (key-keyed, not user-keyed) + * and a dedicated `api_message_usage` table — use `saveApiChatResult`. */ export async function saveChatResult( conversationId: string, @@ -235,32 +268,21 @@ export async function saveChatResult( outputTokens: number, workspaceId: string, userId: string, - usageSource: 'byoa' | 'studio' | 'api', + usageSource: 'byoa' | 'studio', usageMonth: string, - _apiKeyId?: string, ) { const db = useDatabaseProvider() - // Insert both messages together — if assistant insert fails, log but don't leave orphan - await db.insertMessage({ conversationId, role: 'user', content: userMessage }) - try { - await db.insertMessage({ - conversationId, - role: 'assistant', - content: assistantText || '[tool calls]', - toolCalls: assistantContent.length > 0 ? assistantContent : null, - tokenCountInput: inputTokens, - tokenCountOutput: outputTokens, - model, - }) - } - catch (err) { - console.error('[saveChatResult] Failed to insert assistant message:', err) - throw err - } + await persistChatMessages({ + conversationId, + userMessage, + assistantText, + assistantContent, + model, + inputTokens, + outputTokens, + }) - // Token counts only — message_count already reserved atomically. - // Uses SQL increment to prevent concurrent overwrites. await db.updateAgentUsageTokens({ workspaceId, userId, @@ -272,3 +294,43 @@ export async function saveChatResult( await db.updateConversationTimestamp(conversationId) } + +/** + * Save chat messages + update token counts for a Conversation API + * chat. The actor here is an API key (no workspace member identity), + * so usage flows through `api_message_usage` instead of `agent_usage`. + */ +export async function saveApiChatResult( + conversationId: string, + userMessage: string, + assistantText: string, + assistantContent: unknown[], + model: string, + inputTokens: number, + outputTokens: number, + workspaceId: string, + apiKeyId: string, + usageMonth: string, +) { + const db = useDatabaseProvider() + + await persistChatMessages({ + conversationId, + userMessage, + assistantText, + assistantContent, + model, + inputTokens, + outputTokens, + }) + + await db.updateAPIUsageTokens({ + workspaceId, + apiKeyId, + month: usageMonth, + inputTokens, + outputTokens, + }) + + await db.updateConversationTimestamp(conversationId) +} diff --git a/supabase/migrations/006_api_message_usage_and_conversation_actor.sql b/supabase/migrations/006_api_message_usage_and_conversation_actor.sql new file mode 100644 index 00000000..804ba529 --- /dev/null +++ b/supabase/migrations/006_api_message_usage_and_conversation_actor.sql @@ -0,0 +1,187 @@ +-- Conversation API actor model + dedicated API usage table. +-- +-- Two related concerns ship together because either alone leaves the +-- Conversation API endpoint broken at the next DB call: +-- +-- 1. API message billing +-- The Conversation API actor is an API key, not a user. Earlier +-- code routed through `agent_usage` with `source='api'`, which +-- violates both `agent_usage_source_check` (only allows +-- 'studio'|'byoa') and `agent_usage_user_id_fkey` (FK → profiles). +-- Squeezing API usage into the per-user `agent_usage` model with +-- nullable user_id + COALESCE-UNIQUE was rejected during review — +-- actor types are too different (lifecycle, scope, permissions). +-- +-- A dedicated `api_message_usage` table aggregates per +-- `(workspace, api_key, month)` with both message_count and token +-- counts, mirroring `agent_usage`'s aggregate shape but keyed by +-- API key. The same pattern is expected for MCP Cloud keys later. +-- +-- 2. Conversation ownership +-- `conversations.user_id` is `NOT NULL REFERENCES profiles(id)`, +-- so a Conversation-API-initiated chat would fail FK on +-- createConversation even after the usage fix. This migration +-- relaxes the column, adds nullable `api_key_id` FK, and enforces +-- an XOR check so every row has exactly one actor. Existing rows +-- all carry user_id and trivially satisfy the new check. +-- +-- RLS is unaffected for the user path — `user_id = auth.uid()` +-- evaluates to NULL (= "not visible") for API-owned rows, which is +-- the correct behavior since API-owned conversations are only +-- accessed via the service-role admin client. + +-- ───────────────────────────────────────────────────────────────── +-- 1. api_message_usage table +-- ───────────────────────────────────────────────────────────────── + +CREATE TABLE public.api_message_usage ( + id uuid PRIMARY KEY DEFAULT gen_random_uuid(), + workspace_id uuid NOT NULL REFERENCES public.workspaces(id) ON DELETE CASCADE, + api_key_id uuid NOT NULL REFERENCES public.conversation_api_keys(id) ON DELETE CASCADE, + month text NOT NULL, + message_count integer NOT NULL DEFAULT 0, + input_tokens bigint NOT NULL DEFAULT 0, + output_tokens bigint NOT NULL DEFAULT 0, + updated_at timestamptz NOT NULL DEFAULT now(), + UNIQUE (workspace_id, api_key_id, month) +); + +CREATE INDEX idx_api_message_usage_workspace_month + ON public.api_message_usage (workspace_id, month); + +ALTER TABLE public.api_message_usage ENABLE ROW LEVEL SECURITY; + +-- Workspace owners and admins can read API usage for their workspaces; +-- mirrors the conversation_api_keys read policy so the billing UI can +-- render per-key usage without exposing cross-workspace rows. +CREATE POLICY "Workspace admins view API usage" ON public.api_message_usage + FOR SELECT USING ( + workspace_id IN ( + SELECT wm.workspace_id FROM public.workspace_members wm + WHERE wm.user_id = auth.uid() + AND wm.role IN ('owner', 'admin') + ) + ); + +-- All writes go through SECURITY DEFINER RPCs called from the service +-- role; no INSERT/UPDATE policy is granted to authenticated users. + +-- ───────────────────────────────────────────────────────────────── +-- 2. Atomic reserve: two-cap (per-key + per-workspace) message limit +-- ───────────────────────────────────────────────────────────────── + +CREATE FUNCTION public.increment_api_usage_if_allowed( + p_workspace_id uuid, + p_api_key_id uuid, + p_month text, + p_key_limit integer, + p_workspace_limit integer +) RETURNS jsonb +LANGUAGE plpgsql SECURITY DEFINER +SET search_path TO '' +AS $$ +DECLARE + v_key_current INTEGER; + v_workspace_current INTEGER; +BEGIN + -- Serialize concurrent reservations for the same key/month so two + -- racing requests can't both pass the cap check. + PERFORM pg_advisory_xact_lock( + hashtext(p_workspace_id::text || ':' || p_api_key_id::text || ':' || p_month) + ); + + -- Per-key current count + SELECT COALESCE(message_count, 0) INTO v_key_current + FROM public.api_message_usage + WHERE workspace_id = p_workspace_id + AND api_key_id = p_api_key_id + AND month = p_month; + + IF v_key_current IS NULL THEN + v_key_current := 0; + END IF; + + IF v_key_current >= p_key_limit THEN + RETURN jsonb_build_object( + 'allowed', false, + 'reason', 'key_limit', + 'current', v_key_current + ); + END IF; + + -- Workspace total across every API key in this workspace+month + SELECT COALESCE(SUM(message_count), 0) INTO v_workspace_current + FROM public.api_message_usage + WHERE workspace_id = p_workspace_id + AND month = p_month; + + IF v_workspace_current >= p_workspace_limit THEN + RETURN jsonb_build_object( + 'allowed', false, + 'reason', 'workspace_limit', + 'current', v_workspace_current + ); + END IF; + + -- Reserve: atomic upsert +1 message + INSERT INTO public.api_message_usage ( + workspace_id, api_key_id, month, message_count, input_tokens, output_tokens + ) + VALUES ( + p_workspace_id, p_api_key_id, p_month, 1, 0, 0 + ) + ON CONFLICT (workspace_id, api_key_id, month) DO UPDATE SET + message_count = public.api_message_usage.message_count + 1, + updated_at = now(); + + RETURN jsonb_build_object( + 'allowed', true, + 'reason', 'ok', + 'current', v_key_current + 1 + ); +END; +$$; + +-- ───────────────────────────────────────────────────────────────── +-- 3. Token accounting (after AI call completes) +-- ───────────────────────────────────────────────────────────────── + +CREATE FUNCTION public.increment_api_usage_tokens( + p_workspace_id uuid, + p_api_key_id uuid, + p_month text, + p_input_tokens bigint, + p_output_tokens bigint +) RETURNS void +LANGUAGE plpgsql SECURITY DEFINER +SET search_path TO '' +AS $$ +BEGIN + UPDATE public.api_message_usage + SET + input_tokens = input_tokens + p_input_tokens, + output_tokens = output_tokens + p_output_tokens, + updated_at = now() + WHERE workspace_id = p_workspace_id + AND api_key_id = p_api_key_id + AND month = p_month; +END; +$$; + +-- ───────────────────────────────────────────────────────────────── +-- 4. conversations actor relaxation +-- ───────────────────────────────────────────────────────────────── + +ALTER TABLE public.conversations + ALTER COLUMN user_id DROP NOT NULL; + +ALTER TABLE public.conversations + ADD COLUMN api_key_id uuid REFERENCES public.conversation_api_keys(id) ON DELETE CASCADE; + +ALTER TABLE public.conversations + ADD CONSTRAINT conversations_actor_xor + CHECK (num_nonnulls(user_id, api_key_id) = 1); + +CREATE INDEX idx_conversations_api_key + ON public.conversations (api_key_id) + WHERE api_key_id IS NOT NULL; diff --git a/tests/unit/db.test.ts b/tests/unit/db.test.ts index fafb2ae9..6db62238 100644 --- a/tests/unit/db.test.ts +++ b/tests/unit/db.test.ts @@ -47,6 +47,7 @@ describe('db helpers', () => { insertMessage: vi.fn().mockResolvedValue(undefined), upsertAgentUsage: vi.fn().mockResolvedValue(undefined), updateAgentUsageTokens: vi.fn().mockResolvedValue(undefined), + updateAPIUsageTokens: vi.fn().mockResolvedValue(undefined), updateConversationTimestamp: vi.fn().mockResolvedValue(undefined), getBYOAKey: vi.fn().mockResolvedValue(null), } @@ -145,7 +146,6 @@ describe('db helpers', () => { 'user-1', 'byoa', '2026-04', - 'key-123', ) expect(mockDb.updateAgentUsageTokens).toHaveBeenCalledWith(expect.objectContaining({ @@ -155,4 +155,38 @@ describe('db helpers', () => { outputTokens: 2, })) }) + + it('saveApiChatResult writes to api_message_usage, not agent_usage', async () => { + const { saveApiChatResult } = await loadDbModule() + await saveApiChatResult( + 'conv-2', + 'Hello', + 'World', + [{ type: 'text', text: 'World' }], + 'claude-sonnet-4-5', + 11, + 5, + 'workspace-1', + 'key-abc', + '2026-04', + ) + + // Messages persist into the shared messages table the same way as + // the Studio path; the difference is only in which usage table the + // token counters land on. + expect(mockDb.insertMessage).toHaveBeenCalledTimes(2) + expect(mockDb.insertMessage).toHaveBeenCalledWith(expect.objectContaining({ role: 'user', content: 'Hello' })) + expect(mockDb.insertMessage).toHaveBeenCalledWith(expect.objectContaining({ role: 'assistant', content: 'World' })) + + expect(mockDb.updateAPIUsageTokens).toHaveBeenCalledWith({ + workspaceId: 'workspace-1', + apiKeyId: 'key-abc', + month: '2026-04', + inputTokens: 11, + outputTokens: 5, + }) + // Critical: must NOT touch the user-keyed agent_usage path. + expect(mockDb.updateAgentUsageTokens).not.toHaveBeenCalled() + expect(mockDb.updateConversationTimestamp).toHaveBeenCalledWith('conv-2') + }) })