CodebuffAI
diff --git a/‎cli/src/app.tsx‎
Lines changed: 3 additions & 1 deletion b/‎cli/src/app.tsx‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎cli/src/components/waiting-room-screen.tsx‎
Lines changed: 47 additions & 0 deletions b/‎cli/src/components/waiting-room-screen.tsx‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎cli/src/hooks/use-freebuff-session.ts‎
Lines changed: 14 additions & 0 deletions b/‎cli/src/hooks/use-freebuff-session.ts‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎common/src/types/freebuff-session.ts‎
Lines changed: 45 additions & 0 deletions b/‎common/src/types/freebuff-session.ts‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎packages/internal/src/db/migrations/0046_cloudy_firedrake.sql‎
Lines changed: 9 additions & 0 deletions b/‎packages/internal/src/db/migrations/0046_cloudy_firedrake.sql‎
Lines changed: 9 additions & 0 deletions
@@ -380,6 +380,7 @@ const AuthedSurface = ({
   //   'queued' → waiting our turn
   //   'country_blocked' → terminal region-gate message
   //   'banned' → terminal account-banned message
+  //   'rate_limited' → hit per-model session quota; terminal for this run
   //
   // 'ended' deliberately falls through to <Chat>: the agent may still be
   // finishing work under the server-side grace period, and the chat surface
@@ -390,7 +391,8 @@ const AuthedSurface = ({
       session.status === 'queued' ||
       session.status === 'none' ||
       session.status === 'country_blocked' ||
-      session.status === 'banned')
+      session.status === 'banned' ||
+      session.status === 'rate_limited')
   ) {
     return <WaitingRoomScreen session={session} error={sessionError} />
   }
 
@@ -44,6 +44,18 @@ const formatElapsed = (ms: number): string => {
   return `${minutes}m ${seconds.toString().padStart(2, '0')}s`
 }
 
+/** "in ~3h 20m" / "in ~45 min" / "in under a minute". Used on the
+ *  rate-limited screen so users know when they can try again. */
+const formatRetryAfter = (ms: number): string => {
+  if (!Number.isFinite(ms) || ms <= 0) return 'any moment now'
+  const minutes = Math.round(ms / 60_000)
+  if (minutes < 1) return 'under a minute'
+  if (minutes < 60) return `${minutes} min`
+  const hours = Math.floor(minutes / 60)
+  const rem = minutes % 60
+  return rem === 0 ? `${hours}h` : `${hours}h ${rem}m`
+}
+
 export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
   session,
   error,
@@ -217,6 +229,18 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
                   <span>Elapsed  </span>
                   {formatElapsed(elapsedMs)}
                 </text>
+                {/* Per-model session quota (e.g. GLM 5.1 caps at 5/20h). Only
+                    rendered for rate-limited models so the Minimax queue stays
+                    clutter-free. */}
+                {session.rateLimit && (
+                  <text style={{ fg: theme.muted, alignSelf: 'flex-start' }}>
+                    <span>Sessions </span>
+                    <span fg={theme.foreground}>
+                      {session.rateLimit.recentCount} / {session.rateLimit.limit}
+                    </span>
+                    <span> used in last {session.rateLimit.windowHours}h</span>
+                  </text>
+                )}
               </box>
             </>
           )}
@@ -259,6 +283,29 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
               </text>
             </>
           )}
+
+          {/* Per-model session quota exhausted (e.g. 5+ GLM sessions in the
+              last 20h). Terminal for this run — the user can exit and come
+              back once the oldest session in the window rolls off. */}
+          {session?.status === 'rate_limited' && (
+            <>
+              <text style={{ fg: theme.secondary, marginBottom: 1 }}>
+                ⚠ Session limit reached
+              </text>
+              <text style={{ fg: theme.muted, wrapMode: 'word' }}>
+                You've used{' '}
+                <span fg={theme.foreground}>
+                  {session.recentCount} of {session.limit}
+                </span>{' '}
+                hour-long sessions on {session.model} in the last{' '}
+                {session.windowHours}h. Try again in{' '}
+                <span fg={theme.foreground}>
+                  {formatRetryAfter(session.retryAfterMs)}
+                </span>
+                . Press Ctrl+C to exit.
+              </text>
+            </>
+          )}
         </box>
       </box>
 
 
@@ -86,6 +86,19 @@ async function callSession(
       return body
     }
   }
+  // 429 from POST is the per-model session-quota reject (e.g. too many GLM
+  // sessions in the last 20h). Terminal for the current poll — the CLI shows
+  // a screen explaining the limit and when the user can try again. The 429
+  // status (rather than 200) keeps older CLIs in their error path so they
+  // back off instead of tight-polling an unrecognized 200 body.
+  if (resp.status === 429 && method === 'POST') {
+    const body = (await resp.json().catch(() => null)) as
+      | FreebuffSessionResponse
+      | null
+    if (body && body.status === 'rate_limited') {
+      return body
+    }
+  }
   if (!resp.ok) {
     const text = await resp.text().catch(() => '')
     throw new Error(
@@ -119,6 +132,7 @@ function nextDelayMs(next: FreebuffSessionResponse): number | null {
     case 'country_blocked':
     case 'banned':
     case 'model_locked':
+    case 'rate_limited':
       return null
   }
 }
 
@@ -5,6 +5,22 @@
  *
  * The CLI uses these shapes directly; there are no client-only states.
  */
+
+/**
+ * Per-model usage counter surfaced to the CLI so the waiting-room UI can
+ * render "N of M sessions used" alongside queue/active state. Present when
+ * the joined model has a rate limit applied (today: GLM 5.1 with 5 admits
+ * per 20-hour window). `recentCount` is the number of admissions inside
+ * `windowHours` at the time the response was produced — see also the
+ * standalone `rate_limited` status for the reject path.
+ */
+export interface FreebuffSessionRateLimit {
+  model: string
+  limit: number
+  windowHours: number
+  recentCount: number
+}
+
 export type FreebuffSessionServerResponse =
   | {
       /** Waiting room is globally off; free-mode requests flow through
@@ -38,6 +54,10 @@ export type FreebuffSessionServerResponse =
       queueDepthByModel: Record<string, number>
       estimatedWaitMs: number
       queuedAt: string
+      /** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent
+       *  for unlimited models or when the status was produced outside the
+       *  rate-limit check path (e.g. pure read via GET). */
+      rateLimit?: FreebuffSessionRateLimit
     }
   | {
       status: 'active'
@@ -47,6 +67,10 @@ export type FreebuffSessionServerResponse =
       admittedAt: string
       expiresAt: string
       remainingMs: number
+      /** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent
+       *  for unlimited models or when the status was produced outside the
+       *  rate-limit check path (e.g. pure read via GET). */
+      rateLimit?: FreebuffSessionRateLimit
     }
   | {
       /** Session is over. While `instanceId` is present we're inside the
@@ -99,3 +123,24 @@ export type FreebuffSessionServerResponse =
        *  stops polling and shows a banned message. */
       status: 'banned'
     }
+  | {
+      /** User has used up their per-model admission quota in the rolling
+       *  window (GLM 5.1: 5 one-hour sessions per 20h). Returned from POST
+       *  /session before the user is placed in the queue. `retryAfterMs` is
+       *  the time until the oldest admission inside the window falls off
+       *  and one quota slot opens up — clients should show the user when
+       *  they can try again. Terminal for the CLI's current poll session;
+       *  the user can exit and come back later. */
+      status: 'rate_limited'
+      /** The freebuff model the user tried to join. */
+      model: string
+      /** Max admissions permitted per window (e.g. 5). */
+      limit: number
+      /** Rolling window size in hours (e.g. 20). */
+      windowHours: number
+      /** Admission count inside the window at check time — will be ≥ limit. */
+      recentCount: number
+      /** Milliseconds from now until the oldest admission in the window
+       *  exits and the user regains one quota slot. */
+      retryAfterMs: number
+    }
@@ -0,0 +1,9 @@
+CREATE TABLE "free_session_admit" (
+	"id" text PRIMARY KEY NOT NULL,
+	"user_id" text NOT NULL,
+	"model" text NOT NULL,
+	"admitted_at" timestamp with time zone DEFAULT now() NOT NULL
+);
+--> statement-breakpoint
+ALTER TABLE "free_session_admit" ADD CONSTRAINT "free_session_admit_user_id_user_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."user"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
+CREATE INDEX "idx_free_session_admit_user_model_time" ON "free_session_admit" USING btree ("user_id","model","admitted_at");