Address second review pass on freebuff model selector

jahooma · claude · jahooma · commit a1b3b280d50d · 2026-04-20T15:05:53.000-07:00
- Drop dead x-freebuff-model header on GET — the server only reads it on
  POST, and tick() always POSTs first so GET-before-POST never happens.
- Derive FREEBUFF_MODEL_OVERRIDABLE_AGENT_IDS from the server's
  FREE_MODE_AGENT_MODELS (agents whose allowlist includes every freebuff
  model) so adding a new model doesn't require updating two lists.
- Extract shouldReleaseSlot() — DELETE-eligibility predicate was inlined
  in two places.
- Probe Fireworks once per admission tick instead of N times (N = number
  of models). Adds a TODO for when we add a non-Fireworks model.
- Tighten model-selector key handler to /^[1-9]$/ so "1abc" isn't
  treated as 1.
- Make FREEBUFF_MODELS a literal tuple so isFreebuffModelId narrows to
  the actual id union instead of plain string.

Co-Authored-By: Claude Opus 4.7 &lt;noreply@anthropic.com&gt;
diff --git a/cli/src/components/freebuff-model-selector.tsx b/cli/src/components/freebuff-model-selector.tsx
@@ -47,10 +47,10 @@ export const FreebuffModelSelector: React.FC<FreebuffModelSelectorProps> = ({
     useCallback(
       (key: KeyEvent) => {
         if (disabled || pending) return
-        const digit = parseInt(key.name ?? '', 10)
-        if (!Number.isFinite(digit) || digit < 1 || digit > FREEBUFF_MODELS.length) {
-          return
-        }
+        const name = key.name ?? ''
+        if (!/^[1-9]$/.test(name)) return
+        const digit = Number(name)
+        if (digit > FREEBUFF_MODELS.length) return
         const target = FREEBUFF_MODELS[digit - 1]
         if (target && target.id !== selectedModel) {
           key.preventDefault?.()
diff --git a/cli/src/hooks/use-freebuff-session.ts b/cli/src/hooks/use-freebuff-session.ts
@@ -20,9 +20,7 @@ const POLL_INTERVAL_ERROR_MS = 10_000
  *  account has rotated the id and respond with `{ status: 'superseded' }`. */
 const FREEBUFF_INSTANCE_HEADER = 'x-freebuff-instance-id'
 
-/** Header sent on POST/GET telling the server which model's queue we want.
- *  POST uses it to (re-)join that model's queue; GET uses it only for the
- *  rare GET-before-POST edge where there's no row yet. */
+/** Header sent on POST telling the server which model's queue to join. */
 const FREEBUFF_MODEL_HEADER = 'x-freebuff-model'
 
 /** Play the terminal bell so users get an audible notification on admission. */
@@ -48,7 +46,7 @@ async function callSession(
   if (method === 'GET' && opts.instanceId) {
     headers[FREEBUFF_INSTANCE_HEADER] = opts.instanceId
   }
-  if ((method === 'POST' || method === 'GET') && opts.model) {
+  if (method === 'POST' && opts.model) {
     headers[FREEBUFF_MODEL_HEADER] = opts.model
   }
   const resp = await fetch(sessionEndpoint(), {
@@ -216,6 +214,21 @@ export function markFreebuffSessionEnded(): void {
   controller?.apply({ status: 'ended' })
 }
 
+/** True when the session row represents a server-side slot the caller is
+ *  holding (queued, active, or in the post-expiry grace window with a live
+ *  instance id). DELETE only matters in those states; otherwise we'd fire a
+ *  spurious request the server has nothing to act on. */
+function shouldReleaseSlot(
+  current: FreebuffSessionResponse | null,
+): boolean {
+  if (!current) return false
+  return (
+    current.status === 'queued' ||
+    current.status === 'active' ||
+    (current.status === 'ended' && Boolean(current.instanceId))
+  )
+}
+
 /**
  * Best-effort DELETE of the caller's session row. Used by exit paths that
  * skip React unmount (process.exit on Ctrl+C) so the seat frees up quickly
@@ -224,13 +237,7 @@ export function markFreebuffSessionEnded(): void {
 export async function endFreebuffSessionBestEffort(): Promise<void> {
   if (!IS_FREEBUFF) return
   const current = useFreebuffSessionStore.getState().session
-  if (!current) return
-  // Only fire DELETE if we actually held a slot.
-  const heldSlot =
-    current.status === 'queued' ||
-    current.status === 'active' ||
-    (current.status === 'ended' && Boolean(current.instanceId))
-  if (!heldSlot) return
+  if (!shouldReleaseSlot(current)) return
   const { token } = getAuthTokenDetails()
   if (!token) return
   try {
@@ -389,12 +396,7 @@ export function useFreebuffSession(): UseFreebuffSessionResult {
 
       // Fire-and-forget DELETE. Only release if we actually held a slot so
       // we don't generate spurious DELETEs (e.g. HMR before POST completes).
-      if (
-        current &&
-        (current.status === 'queued' ||
-          current.status === 'active' ||
-          (current.status === 'ended' && current.instanceId))
-      ) {
+      if (shouldReleaseSlot(current)) {
         callSession('DELETE', token).catch(() => {})
       }
       setSession(null)
diff --git a/cli/src/utils/local-agent-registry.ts b/cli/src/utils/local-agent-registry.ts
@@ -7,22 +7,25 @@ import { loadLocalAgents as sdkLoadLocalAgents, loadMCPConfigSync } from '@codeb
 
 import type { MCPConfig } from '@codebuff/common/types/mcp'
 
+import { FREE_MODE_AGENT_MODELS } from '@codebuff/common/constants/free-agents'
+import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models'
+
 import { getSelectedFreebuffModel } from '../state/freebuff-model-store'
 import { getProjectRoot } from '../project-files'
 import { AGENT_MODE_TO_ID, IS_FREEBUFF, type AgentMode } from './constants'
 import { logger } from './logger'
 import * as bundledAgentsModule from '../agents/bundled-agents.generated'
 
 /** Agents whose hardcoded model gets swapped out for the user's currently
- *  selected freebuff model. Each entry must also be allowlisted under the
- *  matching id in `FREE_MODE_AGENT_MODELS` (server-side check) for both
- *  glm-5.1 and minimax-m2.7 — otherwise the chat-completions endpoint will
- *  reject the request with `free_mode_invalid_agent_model`. */
-const FREEBUFF_MODEL_OVERRIDABLE_AGENT_IDS = new Set([
-  'base2-free',
-  'editor-lite',
-  'code-reviewer-lite',
-])
+ *  selected freebuff model. Derived from the server's
+ *  `FREE_MODE_AGENT_MODELS` — any agent whose allowlist contains every
+ *  freebuff model is safe to retarget client-side without tripping the
+ *  server's `free_mode_invalid_agent_model` rejection. */
+const FREEBUFF_MODEL_OVERRIDABLE_AGENT_IDS: ReadonlySet<string> = new Set(
+  Object.entries(FREE_MODE_AGENT_MODELS)
+    .filter(([, allowed]) => FREEBUFF_MODELS.every((m) => allowed.has(m.id)))
+    .map(([agentId]) => agentId),
+)
 
 import type { AgentDefinition } from '@codebuff/common/templates/initial-agents-dir/types/agent-definition'
 
diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts
@@ -15,7 +15,7 @@ export interface FreebuffModelOption {
   tagline: string
 }
 
-export const FREEBUFF_MODELS: readonly FreebuffModelOption[] = [
+export const FREEBUFF_MODELS = [
   {
     id: 'z-ai/glm-5.1',
     displayName: 'GLM 5.1',
@@ -26,16 +26,22 @@ export const FREEBUFF_MODELS: readonly FreebuffModelOption[] = [
     displayName: 'MiniMax M2.7',
     tagline: 'Fast, lighter wait.',
   },
-] as const
+] as const satisfies readonly FreebuffModelOption[]
 
-export const DEFAULT_FREEBUFF_MODEL_ID: string = FREEBUFF_MODELS[0].id
+export type FreebuffModelId = (typeof FREEBUFF_MODELS)[number]['id']
 
-export function isFreebuffModelId(id: string | null | undefined): id is string {
+export const DEFAULT_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_MODELS[0].id
+
+export function isFreebuffModelId(
+  id: string | null | undefined,
+): id is FreebuffModelId {
   if (!id) return false
   return FREEBUFF_MODELS.some((m) => m.id === id)
 }
 
-export function resolveFreebuffModel(id: string | null | undefined): string {
+export function resolveFreebuffModel(
+  id: string | null | undefined,
+): FreebuffModelId {
   return isFreebuffModelId(id) ? id : DEFAULT_FREEBUFF_MODEL_ID
 }
 
diff --git a/web/src/app/api/v1/freebuff/session/_handlers.ts b/web/src/app/api/v1/freebuff/session/_handlers.ts
@@ -39,10 +39,7 @@ function countryBlockedResponse(req: NextRequest): NextResponse | null {
 /** Header the CLI uses to identify which instance is polling. Used by GET to
  *  detect when another CLI on the same account has rotated the id. */
 export const FREEBUFF_INSTANCE_HEADER = 'x-freebuff-instance-id'
-/** Header the CLI uses to communicate which freebuff model it wants to be in
- *  the queue for. Used by both POST (join/switch) and GET (read-only — the
- *  server doesn't change the model on a GET, but uses the header for the
- *  rare GET-before-POST case where there's no row yet). */
+/** Header the CLI sends on POST to pick which model's queue to join. */
 export const FREEBUFF_MODEL_HEADER = 'x-freebuff-model'
 
 export interface FreebuffSessionDeps {
diff --git a/web/src/server/free-session/admission.ts b/web/src/server/free-session/admission.ts
@@ -81,6 +81,12 @@ export async function runAdmissionTick(
 
   const models = deps.models ?? FREEBUFF_MODELS.map((m) => m.id)
 
+  // Probe upstream health once per tick. Today every model shares a Fireworks
+  // deployment so a single probe gates them all — TODO: when we add a
+  // non-Fireworks model, plumb a model/deploymentId into the probe.
+  const health = await deps.getFireworksHealth()
+  const sharedHealth = async () => health
+
   // Run per-model admission in parallel — they only contend on independent
   // advisory locks and a single update each.
   const perModel = await Promise.all(
@@ -89,7 +95,7 @@ export async function runAdmissionTick(
         model,
         sessionLengthMs: deps.sessionLengthMs,
         now,
-        getFireworksHealth: deps.getFireworksHealth,
+        getFireworksHealth: sharedHealth,
       })
       const depth = await deps.queueDepth({ model })
       return { model, admittedCount: admitted.length, depth, skipped }
@@ -101,8 +107,6 @@ export async function runAdmissionTick(
   const queueDepthByModel = Object.fromEntries(
     perModel.map((r) => [r.model, r.depth]),
   )
-  // Use the most-degraded skipped reason for the top-level result. They all
-  // come from the same shared probe so they'll usually agree anyway.
   const skipped = perModel.find((r) => r.skipped)?.skipped ?? null
 
   return {