Skip to content

Commit 27fb633

Browse files
jahoomaclaude
andcommitted
Allow GLM rate-limit reclaim for existing queued/active row
requestSession is the takeover path as well as the join path, so a user whose 5th GLM admit put them at the cap would get rate_limited on CLI restart and lose access to their still-active session (or their queue position). Skip the quota check when the caller already holds a queued or active+unexpired row for the same model — admit counts only need to gate fresh admissions, not re-anchoring to an existing row. Expired rows still count as fresh and remain blocked. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 3a26454 commit 27fb633

2 files changed

Lines changed: 153 additions & 16 deletions

File tree

web/src/server/free-session/__tests__/public-api.test.ts

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,125 @@ describe('requestSession', () => {
436436
})
437437
})
438438

439+
test('rate_limited: takeover of an active GLM row is allowed even when at cap', async () => {
440+
// Reclaim path: user has an active+unexpired GLM session and restarts
441+
// the CLI. POST must rotate their instance id (takeover) and NOT reject
442+
// with rate_limited — otherwise they'd be stranded with a live session
443+
// they can't reconnect to. The 5th admission is already in the log, so
444+
// this also exercises "at the cap" rather than "over the cap".
445+
deps._tick(GLM_OPEN_TIME)
446+
const now = deps._now()
447+
// Seed 5 prior admits (the cap), with the latest one matching the
448+
// active row we're about to install.
449+
const ages = [19, 4, 3, 2, 0]
450+
for (const hoursAgo of ages) {
451+
deps.admits.push({
452+
user_id: 'u1',
453+
model: GLM_MODEL,
454+
admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000),
455+
})
456+
}
457+
// Install the active row directly (skipping the normal request path so
458+
// we don't have to unwind the rate-limit gate to set up the fixture).
459+
const admittedAt = new Date(now.getTime() - 30 * 60 * 1000)
460+
deps.rows.set('u1', {
461+
user_id: 'u1',
462+
status: 'active',
463+
active_instance_id: 'inst-pre',
464+
model: GLM_MODEL,
465+
queued_at: admittedAt,
466+
admitted_at: admittedAt,
467+
expires_at: new Date(admittedAt.getTime() + SESSION_LEN),
468+
created_at: admittedAt,
469+
updated_at: admittedAt,
470+
})
471+
472+
const state = await requestSession({
473+
userId: 'u1',
474+
model: GLM_MODEL,
475+
deps,
476+
})
477+
expect(state.status).toBe('active')
478+
if (state.status !== 'active') throw new Error('unreachable')
479+
// Instance id rotated; quota snapshot still reflects the full window.
480+
expect(state.instanceId).not.toBe('inst-pre')
481+
expect(state.rateLimit?.recentCount).toBe(GLM_LIMIT)
482+
})
483+
484+
test('rate_limited: reclaim of a queued GLM row is allowed even when at cap', async () => {
485+
// Same reclaim exception for queued rows: if a user has already queued
486+
// (say they slipped in just before their 5th admit landed), a subsequent
487+
// POST from the same CLI must preserve their queue position instead of
488+
// flipping to rate_limited.
489+
deps._tick(GLM_OPEN_TIME)
490+
const now = deps._now()
491+
for (let i = 0; i < GLM_LIMIT; i++) {
492+
deps.admits.push({
493+
user_id: 'u1',
494+
model: GLM_MODEL,
495+
admitted_at: new Date(now.getTime() - (i + 1) * 60 * 60 * 1000),
496+
})
497+
}
498+
const queuedAt = new Date(now.getTime() - 5 * 60 * 1000)
499+
deps.rows.set('u1', {
500+
user_id: 'u1',
501+
status: 'queued',
502+
active_instance_id: 'inst-pre',
503+
model: GLM_MODEL,
504+
queued_at: queuedAt,
505+
admitted_at: null,
506+
expires_at: null,
507+
created_at: queuedAt,
508+
updated_at: queuedAt,
509+
})
510+
511+
const state = await requestSession({
512+
userId: 'u1',
513+
model: GLM_MODEL,
514+
deps,
515+
})
516+
expect(state.status).toBe('queued')
517+
if (state.status !== 'queued') throw new Error('unreachable')
518+
// Same position (1) since we preserved queued_at and nobody else is
519+
// ahead; the instance id rotated so any prior CLI is superseded.
520+
expect(state.instanceId).not.toBe('inst-pre')
521+
expect(state.rateLimit?.recentCount).toBe(GLM_LIMIT)
522+
})
523+
524+
test('rate_limited: expired GLM row is not a reclaim — quota still applies', async () => {
525+
// The stored row's expires_at is in the past, so it doesn't represent
526+
// an in-flight session. This POST is effectively a fresh request and
527+
// must be blocked by the quota.
528+
deps._tick(GLM_OPEN_TIME)
529+
const now = deps._now()
530+
const ages = [19, 4, 3, 2, 1]
531+
for (const hoursAgo of ages) {
532+
deps.admits.push({
533+
user_id: 'u1',
534+
model: GLM_MODEL,
535+
admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000),
536+
})
537+
}
538+
const admittedAt = new Date(now.getTime() - 2 * SESSION_LEN)
539+
deps.rows.set('u1', {
540+
user_id: 'u1',
541+
status: 'active',
542+
active_instance_id: 'inst-pre',
543+
model: GLM_MODEL,
544+
queued_at: admittedAt,
545+
admitted_at: admittedAt,
546+
expires_at: new Date(admittedAt.getTime() + SESSION_LEN),
547+
created_at: admittedAt,
548+
updated_at: admittedAt,
549+
})
550+
const state = await requestSession({
551+
userId: 'u1',
552+
model: GLM_MODEL,
553+
deps,
554+
})
555+
expect(state.status).toBe('rate_limited')
556+
})
557+
439558
test('instant-admit bumps the quota count for the freshly-written admit row', async () => {
440559
const admitDeps = makeDeps({ getInstantAdmitCapacity: () => 3 })
441560
admitDeps._tick(GLM_OPEN_TIME)

web/src/server/free-session/public-api.ts

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -250,22 +250,40 @@ export async function requestSession(params: {
250250
// Rate-limit check runs before joinOrTakeOver so heavy users never even
251251
// create a queued row. Only models listed in RATE_LIMITS are gated; others
252252
// (Minimax today) fall through unchanged.
253-
const snapshot = await fetchRateLimitSnapshot(params.userId, model, deps)
254-
if (snapshot && snapshot.info.recentCount >= snapshot.info.limit) {
255-
// Oldest admit's window-anniversary is when one slot opens back up.
256-
// Clamped at 0 so a clock skew can't surface a negative retry-after.
257-
const windowMs = snapshot.info.windowHours * 60 * 60 * 1000
258-
const retryAfterMs = Math.max(
259-
0,
260-
(snapshot.oldest?.getTime() ?? 0) + windowMs - nowOf(deps).getTime(),
261-
)
262-
return {
263-
status: 'rate_limited',
264-
model,
265-
limit: snapshot.info.limit,
266-
windowHours: snapshot.info.windowHours,
267-
recentCount: snapshot.info.recentCount,
268-
retryAfterMs,
253+
//
254+
// Takeover/reclaim exception: a user who already holds a queued or
255+
// active+unexpired row on this same model is re-anchoring (CLI restart,
256+
// same-account tab switch) rather than starting a new session. Admit
257+
// counts are written at promotion time, so the quota only needs to gate
258+
// fresh admissions — blocking a reclaim here would strand a user with an
259+
// active 5th session unable to reconnect after a CLI restart.
260+
const existing = await deps.getSessionRow(params.userId)
261+
const isReclaim =
262+
!!existing &&
263+
existing.model === model &&
264+
(existing.status === 'queued' ||
265+
(existing.status === 'active' &&
266+
!!existing.expires_at &&
267+
existing.expires_at.getTime() > now.getTime()))
268+
269+
if (!isReclaim) {
270+
const snapshot = await fetchRateLimitSnapshot(params.userId, model, deps)
271+
if (snapshot && snapshot.info.recentCount >= snapshot.info.limit) {
272+
// Oldest admit's window-anniversary is when one slot opens back up.
273+
// Clamped at 0 so a clock skew can't surface a negative retry-after.
274+
const windowMs = snapshot.info.windowHours * 60 * 60 * 1000
275+
const retryAfterMs = Math.max(
276+
0,
277+
(snapshot.oldest?.getTime() ?? 0) + windowMs - now.getTime(),
278+
)
279+
return {
280+
status: 'rate_limited',
281+
model,
282+
limit: snapshot.info.limit,
283+
windowHours: snapshot.info.windowHours,
284+
recentCount: snapshot.info.recentCount,
285+
retryAfterMs,
286+
}
269287
}
270288
}
271289

0 commit comments

Comments
 (0)