switch to kimi k2.6, 9am ET to 5pm PT

jahooma · jahooma · commit 6043ee25a8af · 2026-04-24T12:23:31.000-07:00
diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts
@@ -25,18 +25,16 @@ export function createBase2(
   const isFree = mode === 'free' || mode === 'lite'
 
   const isSonnet = false
-  const model = isFree ? 'minimax/minimax-m2.7' : 'anthropic/claude-opus-4.7'
+  const model = isFree ? 'moonshotai/kimi-k2.6' : 'anthropic/claude-opus-4.7'
 
   return {
     publisher,
     model,
-    providerOptions: isFree
-      ? {
-          data_collection: 'deny',
-        }
-      : {
-          only: ['amazon-bedrock'],
-        },
+    providerOptions: isFree ? {
+      data_collection: 'deny',
+    } : {
+      only: ['amazon-bedrock'],
+    },
     displayName: 'Buffy the Orchestrator',
     spawnerPrompt:
       'Advanced base agent that orchestrates planning, editing, and reviewing for complex coding tasks',
diff --git a/agents/reviewer/code-reviewer-lite.ts b/agents/reviewer/code-reviewer-lite.ts
@@ -5,7 +5,7 @@ import { createReviewer } from './code-reviewer'
 const definition: SecretAgentDefinition = {
   id: 'code-reviewer-lite',
   publisher,
-  ...createReviewer('minimax/minimax-m2.7'),
+  ...createReviewer('moonshotai/kimi-k2.6'),
 }
 
 export default definition
diff --git a/agents/types/agent-definition.ts b/agents/types/agent-definition.ts
@@ -423,8 +423,8 @@ export type ModelName =
   // Other open source models
   | 'moonshotai/kimi-k2'
   | 'moonshotai/kimi-k2:nitro'
-  | 'moonshotai/kimi-k2.5'
-  | 'moonshotai/kimi-k2.5:nitro'
+  | 'moonshotai/kimi-k2.6'
+  | 'moonshotai/kimi-k2.6:nitro'
   | 'z-ai/glm-5'
   | 'z-ai/glm-5.1'
   | 'z-ai/glm-4.6'
diff --git a/cli/src/hooks/use-freebuff-session.ts b/cli/src/hooks/use-freebuff-session.ts
@@ -78,7 +78,8 @@ async function callSession(
   }
   // 409 from POST means the selected model cannot be joined right now, either
   // because an active session is locked to another model or because a
-  // deployment-hours-only model is closed. Surface both as non-throw states.
+  // Surface model-switch conflicts and temporary model availability closures
+  // as non-throw states.
   if (resp.status === 409 && method === 'POST') {
     const body = (await resp.json().catch(() => null)) as
       | FreebuffSessionResponse
diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts
@@ -28,7 +28,7 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
   // Root orchestrator
   'base2-free': new Set([
     'minimax/minimax-m2.7',
-    'moonshotai/kimi-k2.5',
+    'moonshotai/kimi-k2.6',
   ]),
 
   // File exploration agents
@@ -46,13 +46,13 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
   // Editor for free mode
   'editor-lite': new Set([
     'minimax/minimax-m2.7',
-    'moonshotai/kimi-k2.5',
+    'moonshotai/kimi-k2.6',
   ]),
 
   // Code reviewer for free mode
   'code-reviewer-lite': new Set([
     'minimax/minimax-m2.7',
-    'moonshotai/kimi-k2.5',
+    'moonshotai/kimi-k2.6',
   ]),
 }
 
diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts
@@ -18,7 +18,7 @@ export interface FreebuffModelOption {
 }
 
 export const FREEBUFF_DEPLOYMENT_HOURS_LABEL = '9am ET-5pm PT'
-export const FREEBUFF_KIMI_MODEL_ID = 'moonshotai/kimi-k2.5'
+export const FREEBUFF_KIMI_MODEL_ID = 'moonshotai/kimi-k2.6'
 
 export const FREEBUFF_MODELS = [
   {
@@ -29,7 +29,7 @@ export const FREEBUFF_MODELS = [
   },
   {
     id: FREEBUFF_KIMI_MODEL_ID,
-    displayName: 'Kimi K2.5',
+    displayName: 'Kimi K2.6',
     tagline: 'Balanced',
     availability: 'deployment_hours',
   },
@@ -83,7 +83,7 @@ export function isFreebuffDeploymentHours(now: Date = new Date()): boolean {
   const eastern = getZonedParts(now, 'America/New_York')
   const pacific = getZonedParts(now, 'America/Los_Angeles')
   if (eastern.weekday === 'Sat' || eastern.weekday === 'Sun') return false
-  return eastern.minutes >= 9 * 60 && pacific.minutes < 24 * 60
+  return eastern.minutes >= 9 * 60 && pacific.minutes < 17 * 60
 }
 
 export function isFreebuffModelAvailable(
diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts
@@ -423,8 +423,8 @@ export type ModelName =
   // Other open source models
   | 'moonshotai/kimi-k2'
   | 'moonshotai/kimi-k2:nitro'
-  | 'moonshotai/kimi-k2.5'
-  | 'moonshotai/kimi-k2.5:nitro'
+  | 'moonshotai/kimi-k2.6'
+  | 'moonshotai/kimi-k2.6:nitro'
   | 'z-ai/glm-5'
   | 'z-ai/glm-5.1'
   | 'z-ai/glm-4.6'
diff --git a/common/src/types/freebuff-session.ts b/common/src/types/freebuff-session.ts
@@ -93,8 +93,7 @@ export type FreebuffSessionServerResponse =
       requestedModel: string
     }
   | {
-      /** Requested model is valid but not selectable right now. Currently
-       *  used for deployment-hours-only models such as Kimi K2.5. */
+      /** Requested model is valid but not selectable right now. */
       status: 'model_unavailable'
       requestedModel: string
       availableHours: string
diff --git a/docs/freebuff-waiting-room.md b/docs/freebuff-waiting-room.md
@@ -5,7 +5,7 @@
 The waiting room is the admission control layer for **free-mode** requests against the freebuff Fireworks deployments. It has three jobs:
 
 1. **Drip-admit users per model** — each selectable freebuff model has its own FIFO queue. Admission runs one tick (default `ADMISSION_TICK_MS`, 15s) that tries to admit one user per model, so heavier models can sit cold without starving lighter ones.
-2. **Gate on per-deployment health and hours** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` and currently available admit that tick; Kimi K2.5 is available during 9am ET-5pm PT on weekdays, while MiniMax M2.7 is serverless and always available.
+2. **Gate on per-deployment health and hours** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` and currently available admit that tick; Kimi K2.6 is available during 9am ET-5pm PT on weekdays, while MiniMax M2.7 is serverless and always available.
 3. **One instance per account** — prevent a single user from running N concurrent freebuff CLIs to get N× throughput.
 
 Users who cannot be admitted immediately are placed in the queue for their chosen model and given an estimated wait time. Admitted users get a fixed-length session (default 1h) bound to the model they were admitted on; chat completions use that model for the life of the session.
@@ -149,8 +149,8 @@ The final tick result carries a `queueDepthByModel` map and a single `skipped` r
 | Constant | Location | Default | Purpose |
 |---|---|---|---|
 | `ADMISSION_TICK_MS` | `config.ts` | 15000 | How often the ticker fires. Up to one user is admitted per model per tick. |
-| `FREEBUFF_MODELS` | `common/src/constants/freebuff-models.ts` | `minimax-m2.7`, `kimi-k2.5` | Selectable models; each gets its own queue and admission slot. |
-| `FIREWORKS_DEPLOYMENT_MAP` | `web/src/llm-api/fireworks-config.ts` | `kimi-k2.5` | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback) — drop this default when they migrate to their own deployments. |
+| `FREEBUFF_MODELS` | `common/src/constants/freebuff-models.ts` | `minimax-m2.7`, `kimi-k2.6` | Selectable models; each gets its own queue and admission slot. |
+| `FIREWORKS_DEPLOYMENT_MAP` | `web/src/llm-api/fireworks-config.ts` | `kimi-k2.6` | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback) — drop this default when they migrate to their own deployments. |
 | `HEALTH_CACHE_TTL_MS` | `fireworks-health.ts` | 25000 | Fleet probe cache TTL. Sits just under the Fireworks 30s exporter cadence and 6 req/min rate limit. |
 | `FREEBUFF_SESSION_LENGTH_MS` | env | 3_600_000 | Session lifetime |
 | `FREEBUFF_SESSION_GRACE_MS` | env | 1_800_000 | Drain window after expiry — gate still admits requests so an in-flight agent can finish, but the CLI is expected to block new prompts. Hard cutoff at `expires_at + grace`. |
@@ -185,7 +185,7 @@ Response shapes:
   "queueDepth": 43,        // size of this model's queue
   "queueDepthByModel": {   // snapshot of every model's queue — powers the
     "minimax/minimax-m2.7": 43, //  "N ahead" hint in the selector. Missing
-    "moonshotai/kimi-k2.5": 4   //  entries should be treated as 0.
+    "moonshotai/kimi-k2.6": 4   //  entries should be treated as 0.
   },
   "estimatedWaitMs": 384000,
   "queuedAt": "2026-04-17T12:00:00Z"
@@ -285,7 +285,7 @@ waitMs = (position - 1) * 24_000
 - Position 1 → 0 (next tick admits you)
 - Position 2 → 24s, and so on.
 
-`position` is scoped to this model's queue — a user at position 1 in the `minimax/minimax-m2.7` queue is not affected by the depth of the `moonshotai/kimi-k2.5` queue. The estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence, health-gated pauses, and deployment-hours availability (during a Kimi Fireworks incident or outside 9am ET-5pm PT, only Kimi's queue stalls; MiniMax keeps draining), so the real wait can be longer or shorter.
+`position` is scoped to this model's queue — a user at position 1 in the `minimax/minimax-m2.7` queue is not affected by the depth of the `moonshotai/kimi-k2.6` queue. The estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence, health-gated pauses, and deployment-hours availability (during a Kimi Fireworks incident or outside 9am ET-5pm PT, only Kimi's queue stalls; MiniMax keeps draining), so the real wait can be longer or shorter.
 
 ## CLI Integration (frontend-side contract)
 
diff --git a/scripts/test-fireworks-cache-intervals.ts b/scripts/test-fireworks-cache-intervals.ts
@@ -13,7 +13,7 @@
  *
  * Models:
  *   glm-5.1   (default) — z-ai/glm-5.1
- *   kimi-k2.5           — moonshotai/kimi-k2.5
+ *   kimi-k2.6           — moonshotai/kimi-k2.6
  *   minimax             — minimax/minimax-m2.5
  *
  * Flags:
@@ -26,10 +26,10 @@
  *   bun scripts/test-fireworks-cache-intervals.ts
  *
  *   # Custom Kimi deployment with a faster sweep
- *   bun scripts/test-fireworks-cache-intervals.ts kimi-k2.5 --deployment --intervals=30,60,120,300,600
+ *   bun scripts/test-fireworks-cache-intervals.ts kimi-k2.6 --deployment --intervals=30,60,120,300,600
  *
  *   # Long sweep up to 1 hour
- *   bun scripts/test-fireworks-cache-intervals.ts kimi-k2.5 --deployment --intervals=60,300,600,1200,1800,2700,3600
+ *   bun scripts/test-fireworks-cache-intervals.ts kimi-k2.6 --deployment --intervals=60,300,600,1200,1800,2700,3600
  */
 
 export {}
@@ -53,10 +53,10 @@ const MODEL_CONFIGS: Record<string, ModelConfig> = {
     cachedInputCostPerToken: 0.26 / 1_000_000,
     outputCostPerToken: 4.4 / 1_000_000,
   },
-  'kimi-k2.5': {
-    id: 'moonshotai/kimi-k2.5',
-    standardModel: 'accounts/fireworks/models/kimi-k2p5',
-    deploymentModel: 'accounts/james-65d217/deployments/y5b3z17u',
+  'kimi-k2.6': {
+    id: 'moonshotai/kimi-k2.6',
+    standardModel: 'accounts/fireworks/models/kimi-k2p6',
+    deploymentModel: 'accounts/james-65d217/deployments/j8ar2x0y',
     inputCostPerToken: 0.6 / 1_000_000,
     cachedInputCostPerToken: 0.1 / 1_000_000,
     outputCostPerToken: 3.0 / 1_000_000,
diff --git a/scripts/test-fireworks-long.ts b/scripts/test-fireworks-long.ts
@@ -11,15 +11,15 @@
  *
  * Models:
  *   glm-5.1   (default) — z-ai/glm-5.1
- *   kimi-k2.5           — moonshotai/kimi-k2.5
+ *   kimi-k2.6           — moonshotai/kimi-k2.6
  *   minimax             — minimax/minimax-m2.5
  *   minimax-m2.7        — minimax/minimax-m2.7
  *
  * Flags:
  *   --deployment   Use custom deployment instead of serverless (standard API)
  *                  Serverless is the default
  * Examples:
- *   bun scripts/test-fireworks-long.ts kimi-k2.5 --deployment
+ *   bun scripts/test-fireworks-long.ts kimi-k2.6 --deployment
  */
 
 import { FIREWORKS_DEPLOYMENT_MAP } from '../web/src/llm-api/fireworks-config'
@@ -45,10 +45,10 @@ const MODEL_CONFIGS: Record<string, ModelConfig> = {
     cachedInputCostPerToken: 0.26 / 1_000_000,
     outputCostPerToken: 4.40 / 1_000_000,
   },
-  'kimi-k2.5': {
-    id: 'moonshotai/kimi-k2.5',
-    standardModel: 'accounts/fireworks/models/kimi-k2p5',
-    deploymentModel: FIREWORKS_DEPLOYMENT_MAP['moonshotai/kimi-k2.5'],
+  'kimi-k2.6': {
+    id: 'moonshotai/kimi-k2.6',
+    standardModel: 'accounts/fireworks/models/kimi-k2p6',
+    deploymentModel: FIREWORKS_DEPLOYMENT_MAP['moonshotai/kimi-k2.6'],
     inputCostPerToken: 0.60 / 1_000_000,
     cachedInputCostPerToken: 0.10 / 1_000_000,
     outputCostPerToken: 3.00 / 1_000_000,
@@ -75,9 +75,9 @@ const DEFAULT_MODEL = 'glm-5.1'
 const MODEL_ALIASES: Record<string, keyof typeof MODEL_CONFIGS> = {
   glm: 'glm-5.1',
   'z-ai/glm-5.1': 'glm-5.1',
-  kimi: 'kimi-k2.5',
-  'kimi-k2': 'kimi-k2.5',
-  'moonshotai/kimi-k2.5': 'kimi-k2.5',
+  kimi: 'kimi-k2.6',
+  'kimi-k2': 'kimi-k2.6',
+  'moonshotai/kimi-k2.6': 'kimi-k2.6',
   'minimax/minimax-m2.5': 'minimax',
   'minimax/minimax-m2.7': 'minimax-m2.7',
 }
diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
@@ -1,8 +1,6 @@
 import { afterEach, beforeEach, describe, expect, mock, it } from 'bun:test'
 import { NextRequest } from 'next/server'
 
-import { isFreebuffDeploymentHours } from '@codebuff/common/constants/freebuff-models'
-
 import { formatQuotaResetCountdown, postChatCompletions } from '../_post'
 
 import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics'
@@ -557,15 +555,15 @@ describe('/api/v1/chat/completions POST endpoint', () => {
       expect(response.status).toBe(200)
     })
 
-    it('lets freebuff use Kimi K2.5 through Fireworks availability rules', async () => {
+    it('lets freebuff use Kimi K2.6 through Fireworks availability rules', async () => {
       const fetchedBodies: Record<string, unknown>[] = []
       const fetchViaFireworks = mock(
         async (_url: string | URL | Request, init?: RequestInit) => {
           fetchedBodies.push(JSON.parse(init?.body as string))
           return new Response(
             JSON.stringify({
               id: 'test-id',
-              model: 'accounts/james-65d217/deployments/y5b3z17u',
+              model: 'accounts/james-65d217/deployments/j8ar2x0y',
               choices: [{ message: { content: 'test response' } }],
               usage: {
                 prompt_tokens: 10,
@@ -587,7 +585,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
           method: 'POST',
           headers: { Authorization: 'Bearer test-api-key-new-free' },
           body: JSON.stringify({
-            model: 'moonshotai/kimi-k2.5',
+            model: 'moonshotai/kimi-k2.6',
             stream: false,
             codebuff_metadata: {
               run_id: 'run-free',
@@ -612,19 +610,13 @@ describe('/api/v1/chat/completions POST endpoint', () => {
       })
 
       const body = await response.json()
-      if (isFreebuffDeploymentHours()) {
-        expect(response.status).toBe(200)
-        expect(fetchedBodies).toHaveLength(1)
-        expect(fetchedBodies[0].model).toBe(
-          'accounts/james-65d217/deployments/y5b3z17u',
-        )
-        expect(body.model).toBe('moonshotai/kimi-k2.5')
-        expect(body.provider).toBe('Fireworks')
-      } else {
-        expect(response.status).toBe(503)
-        expect(fetchedBodies).toHaveLength(0)
-        expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS')
-      }
+      expect(response.status).toBe(200)
+      expect(fetchedBodies).toHaveLength(1)
+      expect(fetchedBodies[0].model).toBe(
+        'accounts/james-65d217/deployments/j8ar2x0y',
+      )
+      expect(body.model).toBe('moonshotai/kimi-k2.6')
+      expect(body.provider).toBe('Fireworks')
     })
 
     it('skips credit check when in FREE mode even with 0 credits', async () => {
diff --git a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
@@ -158,7 +158,7 @@ describe('POST /api/v1/freebuff/session', () => {
   test('returns model_unavailable for Kimi outside deployment hours', async () => {
     const sessionDeps = makeSessionDeps()
     const resp = await postFreebuffSession(
-      makeReq('ok', { model: 'moonshotai/kimi-k2.5' }),
+      makeReq('ok', { model: 'moonshotai/kimi-k2.6' }),
       makeDeps(sessionDeps, 'u1'),
     )
     expect(resp.status).toBe(409)
diff --git a/web/src/llm-api/__tests__/fireworks-deployment.test.ts b/web/src/llm-api/__tests__/fireworks-deployment.test.ts
diff --git a/web/src/llm-api/fireworks-config.ts b/web/src/llm-api/fireworks-config.ts
diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts
diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts
diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts

Original file line number	Diff line number	Diff line change
`@@ -5,7 +5,7 @@ import { createReviewer } from './code-reviewer'`
`5`	`5`	`const definition: SecretAgentDefinition = {`
`6`	`6`	`id: 'code-reviewer-lite',`
`7`	`7`	`publisher,`
`8`		`- ...createReviewer('minimax/minimax-m2.7'),`
	`8`	`+ ...createReviewer('moonshotai/kimi-k2.6'),`
`9`	`9`	`}`
`10`	`10`
`11`	`11`	`export default definition`
Original file line number	Diff line number	Diff line change
`@@ -18,7 +18,7 @@ export interface FreebuffModelOption {`
`18`	`18`	`}`
`19`	`19`
`20`	`20`	`export const FREEBUFF_DEPLOYMENT_HOURS_LABEL = '9am ET-5pm PT'`
`21`		`-export const FREEBUFF_KIMI_MODEL_ID = 'moonshotai/kimi-k2.5'`
	`21`	`+export const FREEBUFF_KIMI_MODEL_ID = 'moonshotai/kimi-k2.6'`
`22`	`22`
`23`	`23`	`export const FREEBUFF_MODELS = [`
`24`	`24`	`{`
`@@ -29,7 +29,7 @@ export const FREEBUFF_MODELS = [`
`29`	`29`	`},`
`30`	`30`	`{`
`31`	`31`	`id: FREEBUFF_KIMI_MODEL_ID,`
`32`		`- displayName: 'Kimi K2.5',`
	`32`	`+ displayName: 'Kimi K2.6',`
`33`	`33`	`tagline: 'Balanced',`
`34`	`34`	`availability: 'deployment_hours',`
`35`	`35`	`},`
`@@ -83,7 +83,7 @@ export function isFreebuffDeploymentHours(now: Date = new Date()): boolean {`
`83`	`83`	`const eastern = getZonedParts(now, 'America/New_York')`
`84`	`84`	`const pacific = getZonedParts(now, 'America/Los_Angeles')`
`85`	`85`	`if (eastern.weekday === 'Sat' \|\| eastern.weekday === 'Sun') return false`
`86`		`- return eastern.minutes >= 9 * 60 && pacific.minutes < 24 * 60`
	`86`	`+ return eastern.minutes >= 9 * 60 && pacific.minutes < 17 * 60`
`87`	`87`	`}`
`88`	`88`
`89`	`89`	`export function isFreebuffModelAvailable(`