Allow GLM rate-limit reclaim for existing queued/active row

jahooma · claude · jahooma · commit 27fb6333534a · 2026-04-24T15:37:13.000-07:00
requestSession is the takeover path as well as the join path, so a user
whose 5th GLM admit put them at the cap would get rate_limited on CLI
restart and lose access to their still-active session (or their queue
position). Skip the quota check when the caller already holds a queued
or active+unexpired row for the same model — admit counts only need to
gate fresh admissions, not re-anchoring to an existing row. Expired
rows still count as fresh and remain blocked.

Co-Authored-By: Claude Opus 4.7 &lt;noreply@anthropic.com&gt;
diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts
@@ -436,6 +436,125 @@ describe('requestSession', () => {
     })
   })
 
+  test('rate_limited: takeover of an active GLM row is allowed even when at cap', async () => {
+    // Reclaim path: user has an active+unexpired GLM session and restarts
+    // the CLI. POST must rotate their instance id (takeover) and NOT reject
+    // with rate_limited — otherwise they'd be stranded with a live session
+    // they can't reconnect to. The 5th admission is already in the log, so
+    // this also exercises "at the cap" rather than "over the cap".
+    deps._tick(GLM_OPEN_TIME)
+    const now = deps._now()
+    // Seed 5 prior admits (the cap), with the latest one matching the
+    // active row we're about to install.
+    const ages = [19, 4, 3, 2, 0]
+    for (const hoursAgo of ages) {
+      deps.admits.push({
+        user_id: 'u1',
+        model: GLM_MODEL,
+        admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000),
+      })
+    }
+    // Install the active row directly (skipping the normal request path so
+    // we don't have to unwind the rate-limit gate to set up the fixture).
+    const admittedAt = new Date(now.getTime() - 30 * 60 * 1000)
+    deps.rows.set('u1', {
+      user_id: 'u1',
+      status: 'active',
+      active_instance_id: 'inst-pre',
+      model: GLM_MODEL,
+      queued_at: admittedAt,
+      admitted_at: admittedAt,
+      expires_at: new Date(admittedAt.getTime() + SESSION_LEN),
+      created_at: admittedAt,
+      updated_at: admittedAt,
+    })
+
+    const state = await requestSession({
+      userId: 'u1',
+      model: GLM_MODEL,
+      deps,
+    })
+    expect(state.status).toBe('active')
+    if (state.status !== 'active') throw new Error('unreachable')
+    // Instance id rotated; quota snapshot still reflects the full window.
+    expect(state.instanceId).not.toBe('inst-pre')
+    expect(state.rateLimit?.recentCount).toBe(GLM_LIMIT)
+  })
+
+  test('rate_limited: reclaim of a queued GLM row is allowed even when at cap', async () => {
+    // Same reclaim exception for queued rows: if a user has already queued
+    // (say they slipped in just before their 5th admit landed), a subsequent
+    // POST from the same CLI must preserve their queue position instead of
+    // flipping to rate_limited.
+    deps._tick(GLM_OPEN_TIME)
+    const now = deps._now()
+    for (let i = 0; i < GLM_LIMIT; i++) {
+      deps.admits.push({
+        user_id: 'u1',
+        model: GLM_MODEL,
+        admitted_at: new Date(now.getTime() - (i + 1) * 60 * 60 * 1000),
+      })
+    }
+    const queuedAt = new Date(now.getTime() - 5 * 60 * 1000)
+    deps.rows.set('u1', {
+      user_id: 'u1',
+      status: 'queued',
+      active_instance_id: 'inst-pre',
+      model: GLM_MODEL,
+      queued_at: queuedAt,
+      admitted_at: null,
+      expires_at: null,
+      created_at: queuedAt,
+      updated_at: queuedAt,
+    })
+
+    const state = await requestSession({
+      userId: 'u1',
+      model: GLM_MODEL,
+      deps,
+    })
+    expect(state.status).toBe('queued')
+    if (state.status !== 'queued') throw new Error('unreachable')
+    // Same position (1) since we preserved queued_at and nobody else is
+    // ahead; the instance id rotated so any prior CLI is superseded.
+    expect(state.instanceId).not.toBe('inst-pre')
+    expect(state.rateLimit?.recentCount).toBe(GLM_LIMIT)
+  })
+
+  test('rate_limited: expired GLM row is not a reclaim — quota still applies', async () => {
+    // The stored row's expires_at is in the past, so it doesn't represent
+    // an in-flight session. This POST is effectively a fresh request and
+    // must be blocked by the quota.
+    deps._tick(GLM_OPEN_TIME)
+    const now = deps._now()
+    const ages = [19, 4, 3, 2, 1]
+    for (const hoursAgo of ages) {
+      deps.admits.push({
+        user_id: 'u1',
+        model: GLM_MODEL,
+        admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000),
+      })
+    }
+    const admittedAt = new Date(now.getTime() - 2 * SESSION_LEN)
+    deps.rows.set('u1', {
+      user_id: 'u1',
+      status: 'active',
+      active_instance_id: 'inst-pre',
+      model: GLM_MODEL,
+      queued_at: admittedAt,
+      admitted_at: admittedAt,
+      expires_at: new Date(admittedAt.getTime() + SESSION_LEN),
+      created_at: admittedAt,
+      updated_at: admittedAt,
+    })
+    const state = await requestSession({
+      userId: 'u1',
+      model: GLM_MODEL,
+      deps,
+    })
+    expect(state.status).toBe('rate_limited')
+  })
+
   test('instant-admit bumps the quota count for the freshly-written admit row', async () => {
     const admitDeps = makeDeps({ getInstantAdmitCapacity: () => 3 })
     admitDeps._tick(GLM_OPEN_TIME)
diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts
@@ -250,22 +250,40 @@ export async function requestSession(params: {
   // Rate-limit check runs before joinOrTakeOver so heavy users never even
   // create a queued row. Only models listed in RATE_LIMITS are gated; others
   // (Minimax today) fall through unchanged.
-  const snapshot = await fetchRateLimitSnapshot(params.userId, model, deps)
-  if (snapshot && snapshot.info.recentCount >= snapshot.info.limit) {
-    // Oldest admit's window-anniversary is when one slot opens back up.
-    // Clamped at 0 so a clock skew can't surface a negative retry-after.
-    const windowMs = snapshot.info.windowHours * 60 * 60 * 1000
-    const retryAfterMs = Math.max(
-      0,
-      (snapshot.oldest?.getTime() ?? 0) + windowMs - nowOf(deps).getTime(),
-    )
-    return {
-      status: 'rate_limited',
-      model,
-      limit: snapshot.info.limit,
-      windowHours: snapshot.info.windowHours,
-      recentCount: snapshot.info.recentCount,
-      retryAfterMs,
+  //
+  // Takeover/reclaim exception: a user who already holds a queued or
+  // active+unexpired row on this same model is re-anchoring (CLI restart,
+  // same-account tab switch) rather than starting a new session. Admit
+  // counts are written at promotion time, so the quota only needs to gate
+  // fresh admissions — blocking a reclaim here would strand a user with an
+  // active 5th session unable to reconnect after a CLI restart.
+  const existing = await deps.getSessionRow(params.userId)
+  const isReclaim =
+    !!existing &&
+    existing.model === model &&
+    (existing.status === 'queued' ||
+      (existing.status === 'active' &&
+        !!existing.expires_at &&
+        existing.expires_at.getTime() > now.getTime()))
+
+  if (!isReclaim) {
+    const snapshot = await fetchRateLimitSnapshot(params.userId, model, deps)
+    if (snapshot && snapshot.info.recentCount >= snapshot.info.limit) {
+      // Oldest admit's window-anniversary is when one slot opens back up.
+      // Clamped at 0 so a clock skew can't surface a negative retry-after.
+      const windowMs = snapshot.info.windowHours * 60 * 60 * 1000
+      const retryAfterMs = Math.max(
+        0,
+        (snapshot.oldest?.getTime() ?? 0) + windowMs - now.getTime(),
+      )
+      return {
+        status: 'rate_limited',
+        model,
+        limit: snapshot.info.limit,
+        windowHours: snapshot.info.windowHours,
+        recentCount: snapshot.info.recentCount,
+        retryAfterMs,
+      }
     }
   }