Skip to content

Commit 7dd903b

Browse files
jahoomaclaude
andcommitted
Rate-limit freebuff GLM sessions to 5 per 20 hours
Adds a free_session_admit audit log (one row per queued→active transition) and gates POST /api/v1/freebuff/session against it so GLM 5.1 users who've already had 5 one-hour sessions in the last 20h are blocked with a new rate_limited status (HTTP 429). Queued/active responses now carry an optional rateLimit quota the CLI renders as "N / 5 used in last 20h" so users see their remaining allowance as soon as they join the waitlist. Minimax is left unlimited. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 359a039 commit 7dd903b

13 files changed

Lines changed: 3825 additions & 17 deletions

File tree

cli/src/app.tsx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,7 @@ const AuthedSurface = ({
380380
// 'queued' → waiting our turn
381381
// 'country_blocked' → terminal region-gate message
382382
// 'banned' → terminal account-banned message
383+
// 'rate_limited' → hit per-model session quota; terminal for this run
383384
//
384385
// 'ended' deliberately falls through to <Chat>: the agent may still be
385386
// finishing work under the server-side grace period, and the chat surface
@@ -390,7 +391,8 @@ const AuthedSurface = ({
390391
session.status === 'queued' ||
391392
session.status === 'none' ||
392393
session.status === 'country_blocked' ||
393-
session.status === 'banned')
394+
session.status === 'banned' ||
395+
session.status === 'rate_limited')
394396
) {
395397
return <WaitingRoomScreen session={session} error={sessionError} />
396398
}

cli/src/components/waiting-room-screen.tsx

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,18 @@ const formatElapsed = (ms: number): string => {
4444
return `${minutes}m ${seconds.toString().padStart(2, '0')}s`
4545
}
4646

47+
/** "in ~3h 20m" / "in ~45 min" / "in under a minute". Used on the
48+
* rate-limited screen so users know when they can try again. */
49+
const formatRetryAfter = (ms: number): string => {
50+
if (!Number.isFinite(ms) || ms <= 0) return 'any moment now'
51+
const minutes = Math.round(ms / 60_000)
52+
if (minutes < 1) return 'under a minute'
53+
if (minutes < 60) return `${minutes} min`
54+
const hours = Math.floor(minutes / 60)
55+
const rem = minutes % 60
56+
return rem === 0 ? `${hours}h` : `${hours}h ${rem}m`
57+
}
58+
4759
export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
4860
session,
4961
error,
@@ -217,6 +229,18 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
217229
<span>Elapsed </span>
218230
{formatElapsed(elapsedMs)}
219231
</text>
232+
{/* Per-model session quota (e.g. GLM 5.1 caps at 5/20h). Only
233+
rendered for rate-limited models so the Minimax queue stays
234+
clutter-free. */}
235+
{session.rateLimit && (
236+
<text style={{ fg: theme.muted, alignSelf: 'flex-start' }}>
237+
<span>Sessions </span>
238+
<span fg={theme.foreground}>
239+
{session.rateLimit.recentCount} / {session.rateLimit.limit}
240+
</span>
241+
<span> used in last {session.rateLimit.windowHours}h</span>
242+
</text>
243+
)}
220244
</box>
221245
</>
222246
)}
@@ -259,6 +283,29 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
259283
</text>
260284
</>
261285
)}
286+
287+
{/* Per-model session quota exhausted (e.g. 5+ GLM sessions in the
288+
last 20h). Terminal for this run — the user can exit and come
289+
back once the oldest session in the window rolls off. */}
290+
{session?.status === 'rate_limited' && (
291+
<>
292+
<text style={{ fg: theme.secondary, marginBottom: 1 }}>
293+
⚠ Session limit reached
294+
</text>
295+
<text style={{ fg: theme.muted, wrapMode: 'word' }}>
296+
You've used{' '}
297+
<span fg={theme.foreground}>
298+
{session.recentCount} of {session.limit}
299+
</span>{' '}
300+
hour-long sessions on {session.model} in the last{' '}
301+
{session.windowHours}h. Try again in{' '}
302+
<span fg={theme.foreground}>
303+
{formatRetryAfter(session.retryAfterMs)}
304+
</span>
305+
. Press Ctrl+C to exit.
306+
</text>
307+
</>
308+
)}
262309
</box>
263310
</box>
264311

cli/src/hooks/use-freebuff-session.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,19 @@ async function callSession(
8686
return body
8787
}
8888
}
89+
// 429 from POST is the per-model session-quota reject (e.g. too many GLM
90+
// sessions in the last 20h). Terminal for the current poll — the CLI shows
91+
// a screen explaining the limit and when the user can try again. The 429
92+
// status (rather than 200) keeps older CLIs in their error path so they
93+
// back off instead of tight-polling an unrecognized 200 body.
94+
if (resp.status === 429 && method === 'POST') {
95+
const body = (await resp.json().catch(() => null)) as
96+
| FreebuffSessionResponse
97+
| null
98+
if (body && body.status === 'rate_limited') {
99+
return body
100+
}
101+
}
89102
if (!resp.ok) {
90103
const text = await resp.text().catch(() => '')
91104
throw new Error(
@@ -119,6 +132,7 @@ function nextDelayMs(next: FreebuffSessionResponse): number | null {
119132
case 'country_blocked':
120133
case 'banned':
121134
case 'model_locked':
135+
case 'rate_limited':
122136
return null
123137
}
124138
}

common/src/types/freebuff-session.ts

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,22 @@
55
*
66
* The CLI uses these shapes directly; there are no client-only states.
77
*/
8+
9+
/**
10+
* Per-model usage counter surfaced to the CLI so the waiting-room UI can
11+
* render "N of M sessions used" alongside queue/active state. Present when
12+
* the joined model has a rate limit applied (today: GLM 5.1 with 5 admits
13+
* per 20-hour window). `recentCount` is the number of admissions inside
14+
* `windowHours` at the time the response was produced — see also the
15+
* standalone `rate_limited` status for the reject path.
16+
*/
17+
export interface FreebuffSessionRateLimit {
18+
model: string
19+
limit: number
20+
windowHours: number
21+
recentCount: number
22+
}
23+
824
export type FreebuffSessionServerResponse =
925
| {
1026
/** Waiting room is globally off; free-mode requests flow through
@@ -38,6 +54,10 @@ export type FreebuffSessionServerResponse =
3854
queueDepthByModel: Record<string, number>
3955
estimatedWaitMs: number
4056
queuedAt: string
57+
/** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent
58+
* for unlimited models or when the status was produced outside the
59+
* rate-limit check path (e.g. pure read via GET). */
60+
rateLimit?: FreebuffSessionRateLimit
4161
}
4262
| {
4363
status: 'active'
@@ -47,6 +67,10 @@ export type FreebuffSessionServerResponse =
4767
admittedAt: string
4868
expiresAt: string
4969
remainingMs: number
70+
/** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent
71+
* for unlimited models or when the status was produced outside the
72+
* rate-limit check path (e.g. pure read via GET). */
73+
rateLimit?: FreebuffSessionRateLimit
5074
}
5175
| {
5276
/** Session is over. While `instanceId` is present we're inside the
@@ -99,3 +123,24 @@ export type FreebuffSessionServerResponse =
99123
* stops polling and shows a banned message. */
100124
status: 'banned'
101125
}
126+
| {
127+
/** User has used up their per-model admission quota in the rolling
128+
* window (GLM 5.1: 5 one-hour sessions per 20h). Returned from POST
129+
* /session before the user is placed in the queue. `retryAfterMs` is
130+
* the time until the oldest admission inside the window falls off
131+
* and one quota slot opens up — clients should show the user when
132+
* they can try again. Terminal for the CLI's current poll session;
133+
* the user can exit and come back later. */
134+
status: 'rate_limited'
135+
/** The freebuff model the user tried to join. */
136+
model: string
137+
/** Max admissions permitted per window (e.g. 5). */
138+
limit: number
139+
/** Rolling window size in hours (e.g. 20). */
140+
windowHours: number
141+
/** Admission count inside the window at check time — will be ≥ limit. */
142+
recentCount: number
143+
/** Milliseconds from now until the oldest admission in the window
144+
* exits and the user regains one quota slot. */
145+
retryAfterMs: number
146+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
CREATE TABLE "free_session_admit" (
2+
"id" text PRIMARY KEY NOT NULL,
3+
"user_id" text NOT NULL,
4+
"model" text NOT NULL,
5+
"admitted_at" timestamp with time zone DEFAULT now() NOT NULL
6+
);
7+
--> statement-breakpoint
8+
ALTER TABLE "free_session_admit" ADD CONSTRAINT "free_session_admit_user_id_user_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."user"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
9+
CREATE INDEX "idx_free_session_admit_user_model_time" ON "free_session_admit" USING btree ("user_id","model","admitted_at");

0 commit comments

Comments
 (0)