Skip to content

Commit 8a2ae6b

Browse files
committed
feat: replace Kimi K2.6 with GLM 5.1 as freebuff deployment-hours model
- Switch base2-free, editor-lite, code-reviewer-lite agents from kimi-k2.6 to z-ai/glm-5.1 - Update FREEBUFF_KIMI_MODEL_ID → FREEBUFF_GLM_MODEL_ID constant - Update Fireworks deployment map (mjb4i7ea), model map, and pricing - Remove moonshotai/kimi-k2.6 and kimi-k2.6:nitro from ModelName type - Update freebuff model selector to show GLM first with 'Smartest' tagline - Update all test files with new model IDs and deployment IDs - Update docs and scripts to reference GLM instead of Kimi
1 parent 6043ee2 commit 8a2ae6b

18 files changed

Lines changed: 82 additions & 96 deletions

File tree

agents/base2/base2.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ export function createBase2(
2525
const isFree = mode === 'free' || mode === 'lite'
2626

2727
const isSonnet = false
28-
const model = isFree ? 'moonshotai/kimi-k2.6' : 'anthropic/claude-opus-4.7'
28+
const model = isFree ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.7'
2929

3030
return {
3131
publisher,

agents/editor/editor-lite.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import { createCodeEditor } from './editor'
33
import type { AgentDefinition } from '../types/agent-definition'
44

55
const definition: AgentDefinition = {
6-
...createCodeEditor({ model: 'minimax' }),
6+
...createCodeEditor({ model: 'glm' }),
77
id: 'editor-lite',
88
}
99
export default definition

agents/reviewer/code-reviewer-lite.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { createReviewer } from './code-reviewer'
55
const definition: SecretAgentDefinition = {
66
id: 'code-reviewer-lite',
77
publisher,
8-
...createReviewer('moonshotai/kimi-k2.6'),
8+
...createReviewer('z-ai/glm-5.1'),
99
}
1010

1111
export default definition

agents/types/agent-definition.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -423,8 +423,6 @@ export type ModelName =
423423
// Other open source models
424424
| 'moonshotai/kimi-k2'
425425
| 'moonshotai/kimi-k2:nitro'
426-
| 'moonshotai/kimi-k2.6'
427-
| 'moonshotai/kimi-k2.6:nitro'
428426
| 'z-ai/glm-5'
429427
| 'z-ai/glm-5.1'
430428
| 'z-ai/glm-4.6'

cli/src/components/freebuff-model-selector.tsx

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import { Button } from './button'
66
import {
77
DEFAULT_FREEBUFF_MODEL_ID,
88
FREEBUFF_DEPLOYMENT_HOURS_LABEL,
9+
FREEBUFF_GLM_MODEL_ID,
910
FREEBUFF_MODELS,
1011
isFreebuffModelAvailable,
1112
} from '@codebuff/common/constants/freebuff-models'
@@ -19,6 +20,11 @@ import { useTheme } from '../hooks/use-theme'
1920

2021
import type { KeyEvent } from '@opentui/core'
2122

23+
const FREEBUFF_MODEL_SELECTOR_MODELS = [
24+
...FREEBUFF_MODELS.filter((model) => model.id === FREEBUFF_GLM_MODEL_ID),
25+
...FREEBUFF_MODELS.filter((model) => model.id !== FREEBUFF_GLM_MODEL_ID),
26+
]
27+
2228
/**
2329
* Dual-purpose model picker:
2430
* - Pre-chat landing (session 'none'): user hasn't joined any queue. Picking
@@ -109,7 +115,7 @@ export const FreebuffModelSelector: React.FC = () => {
109115
const stackVertically = useMemo(() => {
110116
const BUTTON_CHROME = 4 // 2 border + 2 padding
111117
const GAP = 2
112-
const total = FREEBUFF_MODELS.reduce((sum, model, idx) => {
118+
const total = FREEBUFF_MODEL_SELECTOR_MODELS.reduce((sum, model, idx) => {
113119
const inner =
114120
2 /* indicator + space */ +
115121
model.displayName.length +
@@ -167,13 +173,15 @@ export const FreebuffModelSelector: React.FC = () => {
167173
}
168174
return
169175
}
170-
const currentIdx = FREEBUFF_MODELS.findIndex((m) => m.id === focusedId)
176+
const currentIdx = FREEBUFF_MODEL_SELECTOR_MODELS.findIndex(
177+
(m) => m.id === focusedId,
178+
)
171179
if (currentIdx === -1) return
172-
const len = FREEBUFF_MODELS.length
180+
const len = FREEBUFF_MODEL_SELECTOR_MODELS.length
173181
const nextIdx = isForward
174182
? (currentIdx + 1) % len
175183
: (currentIdx - 1 + len) % len
176-
const target = FREEBUFF_MODELS[nextIdx]
184+
const target = FREEBUFF_MODEL_SELECTOR_MODELS[nextIdx]
177185
if (target) {
178186
key.preventDefault?.()
179187
setFocusedId(target.id)
@@ -198,7 +206,7 @@ export const FreebuffModelSelector: React.FC = () => {
198206
alignItems: 'flex-start',
199207
}}
200208
>
201-
{FREEBUFF_MODELS.map((model) => {
209+
{FREEBUFF_MODEL_SELECTOR_MODELS.map((model) => {
202210
// 'Selected' means the dot is filled and the label is bold. On the
203211
// landing screen ('none') this tracks the pre-focused pick; on the
204212
// queued screen it tracks the model the server has us on. Either

common/src/constants/free-agents.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
2828
// Root orchestrator
2929
'base2-free': new Set([
3030
'minimax/minimax-m2.7',
31-
'moonshotai/kimi-k2.6',
31+
'z-ai/glm-5.1',
3232
]),
3333

3434
// File exploration agents
@@ -46,13 +46,13 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
4646
// Editor for free mode
4747
'editor-lite': new Set([
4848
'minimax/minimax-m2.7',
49-
'moonshotai/kimi-k2.6',
49+
'z-ai/glm-5.1',
5050
]),
5151

5252
// Code reviewer for free mode
5353
'code-reviewer-lite': new Set([
5454
'minimax/minimax-m2.7',
55-
'moonshotai/kimi-k2.6',
55+
'z-ai/glm-5.1',
5656
]),
5757
}
5858

common/src/constants/freebuff-models.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ export interface FreebuffModelOption {
1818
}
1919

2020
export const FREEBUFF_DEPLOYMENT_HOURS_LABEL = '9am ET-5pm PT'
21-
export const FREEBUFF_KIMI_MODEL_ID = 'moonshotai/kimi-k2.6'
21+
export const FREEBUFF_GLM_MODEL_ID = 'z-ai/glm-5.1'
2222

2323
export const FREEBUFF_MODELS = [
2424
{
@@ -28,9 +28,9 @@ export const FREEBUFF_MODELS = [
2828
availability: 'always',
2929
},
3030
{
31-
id: FREEBUFF_KIMI_MODEL_ID,
32-
displayName: 'Kimi K2.6',
33-
tagline: 'Balanced',
31+
id: FREEBUFF_GLM_MODEL_ID,
32+
displayName: 'GLM 5.1',
33+
tagline: 'Smartest',
3434
availability: 'deployment_hours',
3535
},
3636
] as const satisfies readonly FreebuffModelOption[]

common/src/templates/initial-agents-dir/types/agent-definition.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -423,8 +423,6 @@ export type ModelName =
423423
// Other open source models
424424
| 'moonshotai/kimi-k2'
425425
| 'moonshotai/kimi-k2:nitro'
426-
| 'moonshotai/kimi-k2.6'
427-
| 'moonshotai/kimi-k2.6:nitro'
428426
| 'z-ai/glm-5'
429427
| 'z-ai/glm-5.1'
430428
| 'z-ai/glm-4.6'

docs/freebuff-waiting-room.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
The waiting room is the admission control layer for **free-mode** requests against the freebuff Fireworks deployments. It has three jobs:
66

77
1. **Drip-admit users per model** — each selectable freebuff model has its own FIFO queue. Admission runs one tick (default `ADMISSION_TICK_MS`, 15s) that tries to admit one user per model, so heavier models can sit cold without starving lighter ones.
8-
2. **Gate on per-deployment health and hours** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` and currently available admit that tick; Kimi K2.6 is available during 9am ET-5pm PT on weekdays, while MiniMax M2.7 is serverless and always available.
8+
2. **Gate on per-deployment health and hours** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` and currently available admit that tick; GLM 5.1 is available during 9am ET-5pm PT on weekdays, while MiniMax M2.7 is serverless and always available.
99
3. **One instance per account** — prevent a single user from running N concurrent freebuff CLIs to get N× throughput.
1010

1111
Users who cannot be admitted immediately are placed in the queue for their chosen model and given an estimated wait time. Admitted users get a fixed-length session (default 1h) bound to the model they were admitted on; chat completions use that model for the life of the session.
@@ -149,8 +149,8 @@ The final tick result carries a `queueDepthByModel` map and a single `skipped` r
149149
| Constant | Location | Default | Purpose |
150150
|---|---|---|---|
151151
| `ADMISSION_TICK_MS` | `config.ts` | 15000 | How often the ticker fires. Up to one user is admitted per model per tick. |
152-
| `FREEBUFF_MODELS` | `common/src/constants/freebuff-models.ts` | `minimax-m2.7`, `kimi-k2.6` | Selectable models; each gets its own queue and admission slot. |
153-
| `FIREWORKS_DEPLOYMENT_MAP` | `web/src/llm-api/fireworks-config.ts` | `kimi-k2.6` | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback) — drop this default when they migrate to their own deployments. |
152+
| `FREEBUFF_MODELS` | `common/src/constants/freebuff-models.ts` | `minimax-m2.7`, `glm-5.1` | Selectable models; each gets its own queue and admission slot. |
153+
| `FIREWORKS_DEPLOYMENT_MAP` | `web/src/llm-api/fireworks-config.ts` | `glm-5.1` | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback) — drop this default when they migrate to their own deployments. |
154154
| `HEALTH_CACHE_TTL_MS` | `fireworks-health.ts` | 25000 | Fleet probe cache TTL. Sits just under the Fireworks 30s exporter cadence and 6 req/min rate limit. |
155155
| `FREEBUFF_SESSION_LENGTH_MS` | env | 3_600_000 | Session lifetime |
156156
| `FREEBUFF_SESSION_GRACE_MS` | env | 1_800_000 | Drain window after expiry — gate still admits requests so an in-flight agent can finish, but the CLI is expected to block new prompts. Hard cutoff at `expires_at + grace`. |
@@ -185,7 +185,7 @@ Response shapes:
185185
"queueDepth": 43, // size of this model's queue
186186
"queueDepthByModel": { // snapshot of every model's queue — powers the
187187
"minimax/minimax-m2.7": 43, // "N ahead" hint in the selector. Missing
188-
"moonshotai/kimi-k2.6": 4 // entries should be treated as 0.
188+
"z-ai/glm-5.1": 4 // entries should be treated as 0.
189189
},
190190
"estimatedWaitMs": 384000,
191191
"queuedAt": "2026-04-17T12:00:00Z"
@@ -285,7 +285,7 @@ waitMs = (position - 1) * 24_000
285285
- Position 1 → 0 (next tick admits you)
286286
- Position 2 → 24s, and so on.
287287

288-
`position` is scoped to this model's queue — a user at position 1 in the `minimax/minimax-m2.7` queue is not affected by the depth of the `moonshotai/kimi-k2.6` queue. The estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence, health-gated pauses, and deployment-hours availability (during a Kimi Fireworks incident or outside 9am ET-5pm PT, only Kimi's queue stalls; MiniMax keeps draining), so the real wait can be longer or shorter.
288+
`position` is scoped to this model's queue — a user at position 1 in the `minimax/minimax-m2.7` queue is not affected by the depth of the `z-ai/glm-5.1` queue. The estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence, health-gated pauses, and deployment-hours availability (during a GLM Fireworks incident or outside 9am ET-5pm PT, only GLM's queue stalls; MiniMax keeps draining), so the real wait can be longer or shorter.
289289

290290
## CLI Integration (frontend-side contract)
291291

@@ -324,7 +324,7 @@ The `disabled` response means the server has the waiting room turned off. CLI tr
324324
| Spamming POST/GET to starve admission tick | Admission uses per-model Postgres advisory locks; DDoS protection is upstream (Next's global rate limits). Consider adding a per-user limiter on `/session` if traffic warrants. |
325325
| Repeatedly POSTing different models to get across every queue | Single row per user (PK on `user_id`); switching models moves the row, never clones it. A user holds exactly one queue slot at any time. |
326326
| Fireworks metrics endpoint down / slow | `getFleetHealth()` fails closed (timeout, non-OK, or missing API key) → every dedicated-deployment model is flagged `unhealthy` and its queue pauses. |
327-
| One deployment degraded while others are fine | Health is classified per-deployment; only the affected model's queue pauses, so a degraded Kimi deployment doesn't block MiniMax admissions. |
327+
| One deployment degraded while others are fine | Health is classified per-deployment; only the affected model's queue pauses, so a degraded GLM deployment doesn't block MiniMax admissions. |
328328
| Zombie expired sessions holding capacity | Swept on every admission tick, even when upstream is unhealthy |
329329

330330
## Testing

scripts/test-fireworks-cache-intervals.ts

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
*
1414
* Models:
1515
* glm-5.1 (default) — z-ai/glm-5.1
16-
* kimi-k2.6 — moonshotai/kimi-k2.6
1716
* minimax — minimax/minimax-m2.5
1817
*
1918
* Flags:
@@ -25,11 +24,11 @@
2524
* # Default glm-5.1 serverless with default intervals
2625
* bun scripts/test-fireworks-cache-intervals.ts
2726
*
28-
* # Custom Kimi deployment with a faster sweep
29-
* bun scripts/test-fireworks-cache-intervals.ts kimi-k2.6 --deployment --intervals=30,60,120,300,600
27+
* # Custom GLM deployment with a faster sweep
28+
* bun scripts/test-fireworks-cache-intervals.ts glm-5.1 --deployment --intervals=30,60,120,300,600
3029
*
3130
* # Long sweep up to 1 hour
32-
* bun scripts/test-fireworks-cache-intervals.ts kimi-k2.6 --deployment --intervals=60,300,600,1200,1800,2700,3600
31+
* bun scripts/test-fireworks-cache-intervals.ts glm-5.1 --deployment --intervals=60,300,600,1200,1800,2700,3600
3332
*/
3433

3534
export {}
@@ -49,18 +48,11 @@ const MODEL_CONFIGS: Record<string, ModelConfig> = {
4948
'glm-5.1': {
5049
id: 'z-ai/glm-5.1',
5150
standardModel: 'accounts/fireworks/models/glm-5p1',
51+
deploymentModel: 'accounts/james-65d217/deployments/mjb4i7ea',
5252
inputCostPerToken: 1.4 / 1_000_000,
5353
cachedInputCostPerToken: 0.26 / 1_000_000,
5454
outputCostPerToken: 4.4 / 1_000_000,
5555
},
56-
'kimi-k2.6': {
57-
id: 'moonshotai/kimi-k2.6',
58-
standardModel: 'accounts/fireworks/models/kimi-k2p6',
59-
deploymentModel: 'accounts/james-65d217/deployments/j8ar2x0y',
60-
inputCostPerToken: 0.6 / 1_000_000,
61-
cachedInputCostPerToken: 0.1 / 1_000_000,
62-
outputCostPerToken: 3.0 / 1_000_000,
63-
},
6456
minimax: {
6557
id: 'minimax/minimax-m2.5',
6658
standardModel: 'accounts/fireworks/models/minimax-m2p5',

0 commit comments

Comments
 (0)