From 23631b23f20e82a6efab1324b012777f0ed35b34 Mon Sep 17 00:00:00 2001
From: Contentrain <mcp@contentrain.io>
Date: Fri, 15 May 2026 15:46:58 +0300
Subject: [PATCH] fix(billing): correct Conversation API actor model + usage
 table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Conversation API endpoint was wired through `agent_usage` with
`source='api'` and `user_id=keyData.keyId`, but `agent_usage`'s CHECK
constraint only accepts `'studio'|'byoa'` and its `user_id` FK points
at `profiles(id)`. Both violations fire on the first DB call, so the
endpoint has been unreachable since it shipped. The system was never
production-deployed, so there is no data to migrate — this PR
restructures the model before the route can be opened.

Code review concluded that squeezing API usage into the per-user
`agent_usage` table with nullable user_id + COALESCE-UNIQUE was the
wrong shape: API key actors differ from user actors in lifecycle,
scope, permissions, and cap semantics. The same key-keyed aggregate
shape will be needed for MCP Cloud keys, so a dedicated table is the
forward-compatible model.

Migration 006:
- New `api_message_usage (workspace, api_key, month)` aggregate with
  message + token counters and a workspace-admin SELECT RLS policy.
- `increment_api_usage_if_allowed` RPC enforces BOTH the per-key
  monthly cap and the workspace plan cap in one advisory-locked
  transaction and reports which cap fired via a `reason`
  discriminator so the 429 message can name the right limit.
- `increment_api_usage_tokens` RPC for post-AI token accounting.
- `conversations.user_id` relaxed to nullable, `api_key_id` added as
  nullable FK → `conversation_api_keys`, and a
  `num_nonnulls(user_id, api_key_id) = 1` CHECK enforces exactly one
  actor per row. Existing rows all satisfy the check trivially.

Provider surface:
- `incrementAPIUsageIfAllowed` / `updateAPIUsageTokens` thin wrappers
  over the new RPCs.
- `createApiConversation` is its own method (not a `createConversation`
  overload) so the call site states intent. `getConversation` now
  takes a discriminated union `{ userId } | { apiKeyId }` filter so
  the type system rules out mixed ownership lookups.
- `getWorkspaceMonthlyAPIUsage` now reads from `api_message_usage`
  instead of the always-empty `agent_usage.source='api'` slice.

EE wiring:
- `conversation-api.ts` calls the new RPC, computes both caps via
  `getEffectiveLimit` (so overage-enabled workspaces fall through to
  the meter outbox), uses the new conversation helpers, and persists
  via the new `saveApiChatResult`.
- `conversation-keys.ts` update path now clamps
  `monthly_message_limit` to the current `api.messages_per_month`
  plan cap, matching the create path. Plan downgrades can no longer
  leave keys with stale-high limits.

The Studio chat path (`agent_usage`, `saveChatResult`) is untouched.
`saveChatResult` is narrowed to `'byoa'|'studio'` since `'api'` is
now invalid by construction and routes through `saveApiChatResult`.
---
 ee/enterprise/conversation-api.ts             |  39 ++--
 ee/enterprise/conversation-keys.ts            |  19 +-
 server/providers/database.ts                  |  53 ++++-
 server/providers/supabase-db/conversations.ts |  54 ++++-
 server/providers/supabase-db/usage.ts         |   5 +-
 server/utils/db.ts                            | 112 ++++++++---
 ...i_message_usage_and_conversation_actor.sql | 187 ++++++++++++++++++
 tests/unit/db.test.ts                         |  36 +++-
 8 files changed, 455 insertions(+), 50 deletions(-)
 create mode 100644 supabase/migrations/006_api_message_usage_and_conversation_actor.sql

diff --git a/ee/enterprise/conversation-api.ts b/ee/enterprise/conversation-api.ts
index 102ed5c9..14e19d84 100644
--- a/ee/enterprise/conversation-api.ts
+++ b/ee/enterprise/conversation-api.ts
@@ -15,8 +15,9 @@ import { errorMessage } from '../../server/utils/content-strings'
 import { normalizeContentRoot } from '../../server/utils/content-paths'
 import { runConversationLoop } from '../../server/utils/conversation-engine'
 import { validateConversationKey } from '../../server/utils/conversation-keys'
-import { saveChatResult } from '../../server/utils/db'
-import { getWorkspacePlan, hasFeature } from '../../server/utils/license'
+import { saveApiChatResult } from '../../server/utils/db'
+import { getPlanLimit, getWorkspacePlan, hasFeature } from '../../server/utils/license'
+import { getEffectiveLimit } from '../../server/utils/overage'
 import { checkRateLimit } from '../../server/utils/rate-limit'
 import { useDatabaseProvider, useGitProvider } from '../../server/utils/providers'
 
@@ -163,18 +164,28 @@ async function runConversationMessage(
   if (!rateCheck.allowed)
     throw createError({ statusCode: 429, message: errorMessage('chat.rate_limited', { seconds: Math.ceil(rateCheck.retryAfterMs / 1000) }) })
 
-  // Atomic: check monthly limit + reserve a message slot (prevents race conditions)
+  // Atomic: enforce per-key AND per-workspace caps in one RPC, then
+  // reserve a message slot. `getEffectiveLimit` raises the cap to
+  // SOFT_CAP_MAX when overage is enabled or the plan is Infinity, so
+  // the RPC stays integer-typed and overage requests fall through to
+  // the meter outbox below.
   const usageMonth = new Date().toISOString().substring(0, 7)
-  const { allowed } = await db.incrementAgentUsageIfAllowed({
+  const overageSettings = (event.context as { billing?: { overageSettings?: Record<string, boolean> } }).billing?.overageSettings
+  const workspacePlanLimit = getPlanLimit(plan, 'api.messages_per_month')
+  const workspaceLimit = getEffectiveLimit(workspacePlanLimit, 'api.messages_per_month', overageSettings)
+  const keyLimit = getEffectiveLimit(keyData.monthlyMessageLimit, 'api.messages_per_month', overageSettings)
+
+  const usageCheck = await db.incrementAPIUsageIfAllowed({
     workspaceId: keyData.workspaceId,
-    userId: keyData.keyId,
     apiKeyId: keyData.keyId,
     month: usageMonth,
-    source: 'api',
-    limit: keyData.monthlyMessageLimit,
+    keyLimit,
+    workspaceLimit,
   })
-  if (!allowed)
-    throw createError({ statusCode: 429, message: errorMessage('conversation.monthly_limit', { limit: keyData.monthlyMessageLimit }) })
+  if (!usageCheck.allowed) {
+    const limitForMessage = usageCheck.reason === 'workspace_limit' ? workspacePlanLimit : keyData.monthlyMessageLimit
+    throw createError({ statusCode: 429, message: errorMessage('conversation.monthly_limit', { limit: limitForMessage }) })
+  }
 
   // Best-effort meter write for overage billing. Fire-and-forget.
   recordAPIUsage({ workspaceId: keyData.workspaceId, count: 1, apiKeyId: keyData.keyId, month: usageMonth }).catch(() => {})
@@ -240,13 +251,13 @@ async function runConversationMessage(
 
   let conversationId = body.conversationId
   if (conversationId) {
-    const conv = await db.getConversation(conversationId, keyData.projectId, { userId: keyData.keyId })
+    const conv = await db.getConversation(conversationId, keyData.projectId, { apiKeyId: keyData.keyId })
 
     if (!conv) conversationId = undefined
   }
 
   if (!conversationId) {
-    conversationId = await db.createConversation(keyData.projectId, keyData.keyId, body.message.substring(0, 100)) ?? undefined
+    conversationId = await db.createApiConversation(keyData.projectId, keyData.keyId, body.message.substring(0, 100)) ?? undefined
   }
 
   if (!conversationId)
@@ -320,7 +331,7 @@ async function runConversationMessage(
     }
   }
 
-  await saveChatResult(
+  await saveApiChatResult(
     conversationId,
     body.message,
     responseText,
@@ -330,9 +341,7 @@ async function runConversationMessage(
     totalOutputTokens,
     keyData.workspaceId,
     keyData.keyId,
-    'api',
     usageMonth,
-    keyData.keyId,
   )
 
   return {
@@ -354,7 +363,7 @@ async function runConversationHistory(event: H3Event) {
   if (!conversationId)
     throw createError({ statusCode: 400, message: errorMessage('validation.conversation_id_required') })
 
-  const conv = await db.getConversation(conversationId, keyData.projectId, { userId: keyData.keyId })
+  const conv = await db.getConversation(conversationId, keyData.projectId, { apiKeyId: keyData.keyId })
 
   if (!conv)
     throw createError({ statusCode: 404, message: errorMessage('chat.conversation_not_found') })
diff --git a/ee/enterprise/conversation-keys.ts b/ee/enterprise/conversation-keys.ts
index 6f79469c..cdb39ad7 100644
--- a/ee/enterprise/conversation-keys.ts
+++ b/ee/enterprise/conversation-keys.ts
@@ -149,11 +149,19 @@ export function createConversationKeysBridge() {
 
       await db.requireWorkspaceRole(session.accessToken, session.user.id, workspaceId, ['owner', 'admin'])
 
-      if (body.customInstructions !== undefined) {
+      // Resolve plan once if any plan-gated field is being updated.
+      // monthlyMessageLimit must be clamped to the workspace plan cap
+      // — otherwise a downgraded workspace could carry a stale per-key
+      // limit that exceeds its current `api.messages_per_month`.
+      const needsPlan = body.customInstructions !== undefined || body.monthlyMessageLimit !== undefined
+      let plan: ReturnType<typeof getWorkspacePlan> | null = null
+      if (needsPlan) {
         const workspace = await db.getWorkspaceById(workspaceId, 'plan')
+        plan = getWorkspacePlan(workspace ?? {})
+      }
 
-        const plan = getWorkspacePlan(workspace ?? {})
-        if (!hasFeature(plan, 'api.custom_instructions') && body.customInstructions)
+      if (body.customInstructions !== undefined) {
+        if (!hasFeature(plan!, 'api.custom_instructions') && body.customInstructions)
           throw createError({ statusCode: 403, message: errorMessage('conversation.upgrade') })
       }
 
@@ -170,7 +178,10 @@ export function createConversationKeysBridge() {
       if (body.customInstructions !== undefined) updates.custom_instructions = body.customInstructions ? body.customInstructions.substring(0, 2000) : body.customInstructions
       if (body.aiModel !== undefined && validModels.includes(body.aiModel)) updates.ai_model = body.aiModel
       if (body.rateLimitPerMinute !== undefined) updates.rate_limit_per_minute = Math.max(1, Math.min(body.rateLimitPerMinute, 60))
-      if (body.monthlyMessageLimit !== undefined) updates.monthly_message_limit = Math.max(1, Math.min(body.monthlyMessageLimit, 100_000))
+      if (body.monthlyMessageLimit !== undefined) {
+        const planCap = getPlanLimit(plan!, 'api.messages_per_month')
+        updates.monthly_message_limit = Math.max(1, Math.min(body.monthlyMessageLimit, planCap))
+      }
 
       if (Object.keys(updates).length === 0)
         throw createError({ statusCode: 400, message: errorMessage('validation.no_fields_to_update') })
diff --git a/server/providers/database.ts b/server/providers/database.ts
index 0a5d1377..50145ba7 100644
--- a/server/providers/database.ts
+++ b/server/providers/database.ts
@@ -251,7 +251,24 @@ export interface DatabaseProvider {
   // ═══════════════════════════════════════════════════
 
   createConversation: (projectId: string, userId: string, title: string) => Promise<string | null>
-  getConversation: (conversationId: string, projectId: string, filters?: { userId?: string }) => Promise<DatabaseRow | null>
+  /**
+   * Create a Conversation API-owned chat thread. The API key acts as
+   * the actor in place of a workspace member; `conversations.user_id`
+   * stays NULL and `api_key_id` carries the owner reference. Enforced
+   * by `conversations_actor_xor` — exactly one of user_id / api_key_id
+   * is non-null per row.
+   */
+  createApiConversation: (projectId: string, apiKeyId: string, title: string) => Promise<string | null>
+  /**
+   * Ownership check before continuing a conversation. Pass either a
+   * `userId` (Studio chat) or an `apiKeyId` (Conversation API) — never
+   * both. The XOR is mirrored from the DB-level CHECK on `conversations`.
+   */
+  getConversation: (
+    conversationId: string,
+    projectId: string,
+    filters?: { userId: string } | { apiKeyId: string },
+  ) => Promise<DatabaseRow | null>
   listConversations: (accessToken: string, projectId: string, userId: string) => Promise<DatabaseRow[]>
   deleteConversation: (accessToken: string, conversationId: string, userId: string, projectId: string) => Promise<void>
   updateConversationTimestamp: (conversationId: string) => Promise<void>
@@ -311,6 +328,40 @@ export interface DatabaseProvider {
     outputTokens: number
   }) => Promise<void>
 
+  // ═══════════════════════════════════════════════════
+  // API MESSAGE USAGE (Conversation API)
+  // ═══════════════════════════════════════════════════
+
+  /**
+   * Atomic: enforce BOTH per-key and per-workspace monthly caps in
+   * one RPC and reserve a message slot. Reason discriminator lets the
+   * caller surface the right 429 message ("your key is over" vs "the
+   * workspace plan is over").
+   *
+   * Pass `SOFT_CAP_MAX` (from `getEffectiveLimit`) for either limit
+   * when overage is enabled or the plan grants Infinity.
+   */
+  incrementAPIUsageIfAllowed: (input: {
+    workspaceId: string
+    apiKeyId: string
+    month: string
+    keyLimit: number
+    workspaceLimit: number
+  }) => Promise<{
+    allowed: boolean
+    reason: 'ok' | 'key_limit' | 'workspace_limit'
+    current: number
+  }>
+
+  /** Increment token counters on the `api_message_usage` row after the AI call settles. */
+  updateAPIUsageTokens: (input: {
+    workspaceId: string
+    apiKeyId: string
+    month: string
+    inputTokens: number
+    outputTokens: number
+  }) => Promise<void>
+
   // ═══════════════════════════════════════════════════
   // MEDIA ASSETS
   // ═══════════════════════════════════════════════════
diff --git a/server/providers/supabase-db/conversations.ts b/server/providers/supabase-db/conversations.ts
index 69d030d9..354de3a7 100644
--- a/server/providers/supabase-db/conversations.ts
+++ b/server/providers/supabase-db/conversations.ts
@@ -8,6 +8,7 @@ import { getAdmin, getUser } from './helpers'
 type ConversationMethods = Pick<
   DatabaseProvider,
   | 'createConversation'
+  | 'createApiConversation'
   | 'getConversation'
   | 'listConversations'
   | 'deleteConversation'
@@ -19,6 +20,8 @@ type ConversationMethods = Pick<
   | 'getMonthlyUsageSummary'
   | 'incrementAgentUsageIfAllowed'
   | 'updateAgentUsageTokens'
+  | 'incrementAPIUsageIfAllowed'
+  | 'updateAPIUsageTokens'
   | 'getBYOAKey'
 >
 
@@ -39,15 +42,31 @@ export function conversationMethods(): ConversationMethods {
       return data?.id ?? null
     },
 
+    async createApiConversation(projectId, apiKeyId, title) {
+      const admin = getAdmin()
+      const { data } = await admin
+        .from('conversations')
+        .insert({
+          project_id: projectId,
+          api_key_id: apiKeyId,
+          title: title.substring(0, 100),
+        })
+        .select('id')
+        .single()
+
+      return data?.id ?? null
+    },
+
     async getConversation(conversationId, projectId, filters) {
       const admin = getAdmin()
       let query = admin
         .from('conversations')
-        .select('id, title, user_id, created_at, updated_at')
+        .select('id, title, user_id, api_key_id, created_at, updated_at')
         .eq('id', conversationId)
         .eq('project_id', projectId)
 
-      if (filters?.userId) query = query.eq('user_id', filters.userId)
+      if (filters && 'userId' in filters) query = query.eq('user_id', filters.userId)
+      else if (filters && 'apiKeyId' in filters) query = query.eq('api_key_id', filters.apiKeyId)
 
       const { data, error } = await query.single()
       if (error) {
@@ -226,6 +245,37 @@ export function conversationMethods(): ConversationMethods {
       })
     },
 
+    // ─── API Message Usage (Conversation API) ───
+
+    async incrementAPIUsageIfAllowed(input) {
+      const admin = getAdmin()
+      const { data, error } = await admin.rpc('increment_api_usage_if_allowed', {
+        p_workspace_id: input.workspaceId,
+        p_api_key_id: input.apiKeyId,
+        p_month: input.month,
+        p_key_limit: input.keyLimit,
+        p_workspace_limit: input.workspaceLimit,
+      })
+
+      if (error) {
+        throw createError({ statusCode: 500, message: `Atomic API usage check failed: ${error.message}` })
+      }
+
+      const result = data as { allowed: boolean, reason: 'ok' | 'key_limit' | 'workspace_limit', current: number }
+      return result
+    },
+
+    async updateAPIUsageTokens(input) {
+      const admin = getAdmin()
+      await admin.rpc('increment_api_usage_tokens', {
+        p_workspace_id: input.workspaceId,
+        p_api_key_id: input.apiKeyId,
+        p_month: input.month,
+        p_input_tokens: input.inputTokens,
+        p_output_tokens: input.outputTokens,
+      })
+    },
+
     // ─── BYOA Key ───
 
     async getBYOAKey(accessToken, workspaceId, userId) {
diff --git a/server/providers/supabase-db/usage.ts b/server/providers/supabase-db/usage.ts
index edf0fcf7..4e20eba4 100644
--- a/server/providers/supabase-db/usage.ts
+++ b/server/providers/supabase-db/usage.ts
@@ -34,12 +34,13 @@ export function usageMethods(): UsageMethods {
     },
 
     async getWorkspaceMonthlyAPIUsage(workspaceId, month) {
+      // Conversation API usage is keyed by `api_key_id`, not `user_id`,
+      // and lives in its own aggregate table — see migration 006.
       const { data } = await getAdmin()
-        .from('agent_usage')
+        .from('api_message_usage')
         .select('message_count')
         .eq('workspace_id', workspaceId)
         .eq('month', month)
-        .eq('source', 'api')
 
       return (data ?? []).reduce(
         (sum: number, r: Record<string, unknown>) => sum + ((r.message_count as number) ?? 0),
diff --git a/server/utils/db.ts b/server/utils/db.ts
index e623e508..866be47c 100644
--- a/server/utils/db.ts
+++ b/server/utils/db.ts
@@ -219,11 +219,44 @@ export async function inviteOrLookupUser(
 
 // ─── Cross-domain: Chat Persistence ───
 
+async function persistChatMessages(input: {
+  conversationId: string
+  userMessage: string
+  assistantText: string
+  assistantContent: unknown[]
+  model: string
+  inputTokens: number
+  outputTokens: number
+}) {
+  const db = useDatabaseProvider()
+  await db.insertMessage({ conversationId: input.conversationId, role: 'user', content: input.userMessage })
+  try {
+    await db.insertMessage({
+      conversationId: input.conversationId,
+      role: 'assistant',
+      content: input.assistantText || '[tool calls]',
+      toolCalls: input.assistantContent.length > 0 ? input.assistantContent : null,
+      tokenCountInput: input.inputTokens,
+      tokenCountOutput: input.outputTokens,
+      model: input.model,
+    })
+  }
+  catch (err) {
+    // eslint-disable-next-line no-console
+    console.error('[chat-persist] Failed to insert assistant message:', err)
+    throw err
+  }
+}
+
 /**
- * Save chat messages + update token counts.
- * Message count is already reserved atomically before the chat via
- * incrementAgentUsageIfAllowed — this function only persists messages
- * and updates the token metadata on the existing usage row.
+ * Save chat messages + update token counts for a Studio (workspace
+ * member) chat. Message count is already reserved atomically before
+ * the AI call via `incrementAgentUsageIfAllowed`; this function only
+ * persists messages and bumps the token metadata on the row that the
+ * reservation created.
+ *
+ * Conversation API has its own actor model (key-keyed, not user-keyed)
+ * and a dedicated `api_message_usage` table — use `saveApiChatResult`.
  */
 export async function saveChatResult(
   conversationId: string,
@@ -235,32 +268,21 @@ export async function saveChatResult(
   outputTokens: number,
   workspaceId: string,
   userId: string,
-  usageSource: 'byoa' | 'studio' | 'api',
+  usageSource: 'byoa' | 'studio',
   usageMonth: string,
-  _apiKeyId?: string,
 ) {
   const db = useDatabaseProvider()
 
-  // Insert both messages together — if assistant insert fails, log but don't leave orphan
-  await db.insertMessage({ conversationId, role: 'user', content: userMessage })
-  try {
-    await db.insertMessage({
-      conversationId,
-      role: 'assistant',
-      content: assistantText || '[tool calls]',
-      toolCalls: assistantContent.length > 0 ? assistantContent : null,
-      tokenCountInput: inputTokens,
-      tokenCountOutput: outputTokens,
-      model,
-    })
-  }
-  catch (err) {
-    console.error('[saveChatResult] Failed to insert assistant message:', err)
-    throw err
-  }
+  await persistChatMessages({
+    conversationId,
+    userMessage,
+    assistantText,
+    assistantContent,
+    model,
+    inputTokens,
+    outputTokens,
+  })
 
-  // Token counts only — message_count already reserved atomically.
-  // Uses SQL increment to prevent concurrent overwrites.
   await db.updateAgentUsageTokens({
     workspaceId,
     userId,
@@ -272,3 +294,43 @@ export async function saveChatResult(
 
   await db.updateConversationTimestamp(conversationId)
 }
+
+/**
+ * Save chat messages + update token counts for a Conversation API
+ * chat. The actor here is an API key (no workspace member identity),
+ * so usage flows through `api_message_usage` instead of `agent_usage`.
+ */
+export async function saveApiChatResult(
+  conversationId: string,
+  userMessage: string,
+  assistantText: string,
+  assistantContent: unknown[],
+  model: string,
+  inputTokens: number,
+  outputTokens: number,
+  workspaceId: string,
+  apiKeyId: string,
+  usageMonth: string,
+) {
+  const db = useDatabaseProvider()
+
+  await persistChatMessages({
+    conversationId,
+    userMessage,
+    assistantText,
+    assistantContent,
+    model,
+    inputTokens,
+    outputTokens,
+  })
+
+  await db.updateAPIUsageTokens({
+    workspaceId,
+    apiKeyId,
+    month: usageMonth,
+    inputTokens,
+    outputTokens,
+  })
+
+  await db.updateConversationTimestamp(conversationId)
+}
diff --git a/supabase/migrations/006_api_message_usage_and_conversation_actor.sql b/supabase/migrations/006_api_message_usage_and_conversation_actor.sql
new file mode 100644
index 00000000..804ba529
--- /dev/null
+++ b/supabase/migrations/006_api_message_usage_and_conversation_actor.sql
@@ -0,0 +1,187 @@
+-- Conversation API actor model + dedicated API usage table.
+--
+-- Two related concerns ship together because either alone leaves the
+-- Conversation API endpoint broken at the next DB call:
+--
+-- 1. API message billing
+--    The Conversation API actor is an API key, not a user. Earlier
+--    code routed through `agent_usage` with `source='api'`, which
+--    violates both `agent_usage_source_check` (only allows
+--    'studio'|'byoa') and `agent_usage_user_id_fkey` (FK → profiles).
+--    Squeezing API usage into the per-user `agent_usage` model with
+--    nullable user_id + COALESCE-UNIQUE was rejected during review —
+--    actor types are too different (lifecycle, scope, permissions).
+--
+--    A dedicated `api_message_usage` table aggregates per
+--    `(workspace, api_key, month)` with both message_count and token
+--    counts, mirroring `agent_usage`'s aggregate shape but keyed by
+--    API key. The same pattern is expected for MCP Cloud keys later.
+--
+-- 2. Conversation ownership
+--    `conversations.user_id` is `NOT NULL REFERENCES profiles(id)`,
+--    so a Conversation-API-initiated chat would fail FK on
+--    createConversation even after the usage fix. This migration
+--    relaxes the column, adds nullable `api_key_id` FK, and enforces
+--    an XOR check so every row has exactly one actor. Existing rows
+--    all carry user_id and trivially satisfy the new check.
+--
+-- RLS is unaffected for the user path — `user_id = auth.uid()`
+-- evaluates to NULL (= "not visible") for API-owned rows, which is
+-- the correct behavior since API-owned conversations are only
+-- accessed via the service-role admin client.
+
+-- ─────────────────────────────────────────────────────────────────
+-- 1. api_message_usage table
+-- ─────────────────────────────────────────────────────────────────
+
+CREATE TABLE public.api_message_usage (
+  id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
+  workspace_id uuid NOT NULL REFERENCES public.workspaces(id) ON DELETE CASCADE,
+  api_key_id uuid NOT NULL REFERENCES public.conversation_api_keys(id) ON DELETE CASCADE,
+  month text NOT NULL,
+  message_count integer NOT NULL DEFAULT 0,
+  input_tokens bigint NOT NULL DEFAULT 0,
+  output_tokens bigint NOT NULL DEFAULT 0,
+  updated_at timestamptz NOT NULL DEFAULT now(),
+  UNIQUE (workspace_id, api_key_id, month)
+);
+
+CREATE INDEX idx_api_message_usage_workspace_month
+  ON public.api_message_usage (workspace_id, month);
+
+ALTER TABLE public.api_message_usage ENABLE ROW LEVEL SECURITY;
+
+-- Workspace owners and admins can read API usage for their workspaces;
+-- mirrors the conversation_api_keys read policy so the billing UI can
+-- render per-key usage without exposing cross-workspace rows.
+CREATE POLICY "Workspace admins view API usage" ON public.api_message_usage
+  FOR SELECT USING (
+    workspace_id IN (
+      SELECT wm.workspace_id FROM public.workspace_members wm
+      WHERE wm.user_id = auth.uid()
+        AND wm.role IN ('owner', 'admin')
+    )
+  );
+
+-- All writes go through SECURITY DEFINER RPCs called from the service
+-- role; no INSERT/UPDATE policy is granted to authenticated users.
+
+-- ─────────────────────────────────────────────────────────────────
+-- 2. Atomic reserve: two-cap (per-key + per-workspace) message limit
+-- ─────────────────────────────────────────────────────────────────
+
+CREATE FUNCTION public.increment_api_usage_if_allowed(
+  p_workspace_id uuid,
+  p_api_key_id uuid,
+  p_month text,
+  p_key_limit integer,
+  p_workspace_limit integer
+) RETURNS jsonb
+LANGUAGE plpgsql SECURITY DEFINER
+SET search_path TO ''
+AS $$
+DECLARE
+  v_key_current INTEGER;
+  v_workspace_current INTEGER;
+BEGIN
+  -- Serialize concurrent reservations for the same key/month so two
+  -- racing requests can't both pass the cap check.
+  PERFORM pg_advisory_xact_lock(
+    hashtext(p_workspace_id::text || ':' || p_api_key_id::text || ':' || p_month)
+  );
+
+  -- Per-key current count
+  SELECT COALESCE(message_count, 0) INTO v_key_current
+  FROM public.api_message_usage
+  WHERE workspace_id = p_workspace_id
+    AND api_key_id = p_api_key_id
+    AND month = p_month;
+
+  IF v_key_current IS NULL THEN
+    v_key_current := 0;
+  END IF;
+
+  IF v_key_current >= p_key_limit THEN
+    RETURN jsonb_build_object(
+      'allowed', false,
+      'reason', 'key_limit',
+      'current', v_key_current
+    );
+  END IF;
+
+  -- Workspace total across every API key in this workspace+month
+  SELECT COALESCE(SUM(message_count), 0) INTO v_workspace_current
+  FROM public.api_message_usage
+  WHERE workspace_id = p_workspace_id
+    AND month = p_month;
+
+  IF v_workspace_current >= p_workspace_limit THEN
+    RETURN jsonb_build_object(
+      'allowed', false,
+      'reason', 'workspace_limit',
+      'current', v_workspace_current
+    );
+  END IF;
+
+  -- Reserve: atomic upsert +1 message
+  INSERT INTO public.api_message_usage (
+    workspace_id, api_key_id, month, message_count, input_tokens, output_tokens
+  )
+  VALUES (
+    p_workspace_id, p_api_key_id, p_month, 1, 0, 0
+  )
+  ON CONFLICT (workspace_id, api_key_id, month) DO UPDATE SET
+    message_count = public.api_message_usage.message_count + 1,
+    updated_at = now();
+
+  RETURN jsonb_build_object(
+    'allowed', true,
+    'reason', 'ok',
+    'current', v_key_current + 1
+  );
+END;
+$$;
+
+-- ─────────────────────────────────────────────────────────────────
+-- 3. Token accounting (after AI call completes)
+-- ─────────────────────────────────────────────────────────────────
+
+CREATE FUNCTION public.increment_api_usage_tokens(
+  p_workspace_id uuid,
+  p_api_key_id uuid,
+  p_month text,
+  p_input_tokens bigint,
+  p_output_tokens bigint
+) RETURNS void
+LANGUAGE plpgsql SECURITY DEFINER
+SET search_path TO ''
+AS $$
+BEGIN
+  UPDATE public.api_message_usage
+  SET
+    input_tokens = input_tokens + p_input_tokens,
+    output_tokens = output_tokens + p_output_tokens,
+    updated_at = now()
+  WHERE workspace_id = p_workspace_id
+    AND api_key_id = p_api_key_id
+    AND month = p_month;
+END;
+$$;
+
+-- ─────────────────────────────────────────────────────────────────
+-- 4. conversations actor relaxation
+-- ─────────────────────────────────────────────────────────────────
+
+ALTER TABLE public.conversations
+  ALTER COLUMN user_id DROP NOT NULL;
+
+ALTER TABLE public.conversations
+  ADD COLUMN api_key_id uuid REFERENCES public.conversation_api_keys(id) ON DELETE CASCADE;
+
+ALTER TABLE public.conversations
+  ADD CONSTRAINT conversations_actor_xor
+    CHECK (num_nonnulls(user_id, api_key_id) = 1);
+
+CREATE INDEX idx_conversations_api_key
+  ON public.conversations (api_key_id)
+  WHERE api_key_id IS NOT NULL;
diff --git a/tests/unit/db.test.ts b/tests/unit/db.test.ts
index fafb2ae9..6db62238 100644
--- a/tests/unit/db.test.ts
+++ b/tests/unit/db.test.ts
@@ -47,6 +47,7 @@ describe('db helpers', () => {
       insertMessage: vi.fn().mockResolvedValue(undefined),
       upsertAgentUsage: vi.fn().mockResolvedValue(undefined),
       updateAgentUsageTokens: vi.fn().mockResolvedValue(undefined),
+      updateAPIUsageTokens: vi.fn().mockResolvedValue(undefined),
       updateConversationTimestamp: vi.fn().mockResolvedValue(undefined),
       getBYOAKey: vi.fn().mockResolvedValue(null),
     }
@@ -145,7 +146,6 @@ describe('db helpers', () => {
       'user-1',
       'byoa',
       '2026-04',
-      'key-123',
     )
 
     expect(mockDb.updateAgentUsageTokens).toHaveBeenCalledWith(expect.objectContaining({
@@ -155,4 +155,38 @@ describe('db helpers', () => {
       outputTokens: 2,
     }))
   })
+
+  it('saveApiChatResult writes to api_message_usage, not agent_usage', async () => {
+    const { saveApiChatResult } = await loadDbModule()
+    await saveApiChatResult(
+      'conv-2',
+      'Hello',
+      'World',
+      [{ type: 'text', text: 'World' }],
+      'claude-sonnet-4-5',
+      11,
+      5,
+      'workspace-1',
+      'key-abc',
+      '2026-04',
+    )
+
+    // Messages persist into the shared messages table the same way as
+    // the Studio path; the difference is only in which usage table the
+    // token counters land on.
+    expect(mockDb.insertMessage).toHaveBeenCalledTimes(2)
+    expect(mockDb.insertMessage).toHaveBeenCalledWith(expect.objectContaining({ role: 'user', content: 'Hello' }))
+    expect(mockDb.insertMessage).toHaveBeenCalledWith(expect.objectContaining({ role: 'assistant', content: 'World' }))
+
+    expect(mockDb.updateAPIUsageTokens).toHaveBeenCalledWith({
+      workspaceId: 'workspace-1',
+      apiKeyId: 'key-abc',
+      month: '2026-04',
+      inputTokens: 11,
+      outputTokens: 5,
+    })
+    // Critical: must NOT touch the user-keyed agent_usage path.
+    expect(mockDb.updateAgentUsageTokens).not.toHaveBeenCalled()
+    expect(mockDb.updateConversationTimestamp).toHaveBeenCalledWith('conv-2')
+  })
 })