From 1cf2626ea36936b6712a9642ca75324461a0657a Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Mon, 18 May 2026 14:22:50 -0400 Subject: [PATCH 01/10] PagePool: materialize the reserved module/command tab; plumb priority through definition lookups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two narrowly-scoped fixes for the self-referential prerender deadlock that surfaces when a card render fires `_federated-search`, the search needs a card definition not yet in the modules table, `CachingDefinitionLookup` spawns a `prerenderModule` for the missing module, and that sub-prerender queues behind the very tab the file render is holding. 1. PagePool#selectEntryForAffinity: when a non-file caller enters the spawn branch and both orphan-claim + standby-commandeer miss, await `#ensureStandbyPool` synchronously and retry commandeer — regardless of priority and regardless of whether the high-priority tier is configured. The file-admission cap reserves global-pool headroom for one module/command slot per affinity; this change actually produces the tab that headroom was being held for. Mirrors the existing brand- new-affinity branch (entryList.length === 0) one branch further down. `#ensureStandbyPool` respects `#maxPages` via `#prepareSlotForStandby`, so this can't oversubscribe the global pool. 2. `CachingDefinitionLookup.getModuleDefinitionsViaPrerenderer`: thread priority through `prerenderModule(...)`. Today the call drops priority on the floor — user-initiated reindex work (priority 10) silently downgrades to priority 0 for its definition-lookup sub-prerenders. `LookupContext` was the wrong public surface (it carries `requestingRealm`, internal); added a narrower `DefinitionLookupOptions` type with `{ priority?: number }` instead and threaded it through `getModuleCacheEntry` → `loadModuleCacheEntry{,Uncached,Coordinated}` → `getModuleDefinitionsViaPrerenderer` → `prerenderModule`. IndexRunner's pre-warm call site passes `this.#jobPriority`. Tests: - `page-pool-standby-refill-test.ts`: held file render on affinity A + a module call on A produces a fresh tab synchronously, not queued on the busy tab. Bounded with a 2s timeout so a regression fails cleanly instead of hanging. - `definition-lookup-test.ts`: priority forwards end-to-end; cached entry skips re-prerender even when priorities differ; default priority forwards as `undefined` (which the prerender server treats as 0). Non-card-module path: `definitions: {}` persists to the modules table and short-circuits subsequent lookups, confirming the no-card- marker behavior the pre-warm cost analysis depends on. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/realm-server/prerender/page-pool.ts | 26 ++++ .../tests/definition-lookup-test.ts | 112 ++++++++++++++++++ .../tests/page-pool-standby-refill-test.ts | 63 ++++++++++ packages/runtime-common/definition-lookup.ts | 27 ++++- packages/runtime-common/index-runner.ts | 4 +- 5 files changed, 229 insertions(+), 3 deletions(-) diff --git a/packages/realm-server/prerender/page-pool.ts b/packages/realm-server/prerender/page-pool.ts index f2f2e953bb6..cd70b3069ce 100644 --- a/packages/realm-server/prerender/page-pool.ts +++ b/packages/realm-server/prerender/page-pool.ts @@ -1870,6 +1870,32 @@ export class PagePool { let releaseTab = await commandeered.queue.acquire(signal, priority); return { entry: commandeered, reused: false, releaseTab, tabStartupMs }; } + // module / command callers must produce the reserved tab the + // file-admission cap is supposed to keep room for. The cap + // bounds file workload at `affinityTabMax − 1`, leaving global- + // pool headroom for a non-file call — but the headroom is only + // a reservation, not a spawned tab. Without this synchronous + // refill+retry, the call falls through to the busy-tab fallback + // below and queues behind the file render that's awaiting this + // sub-render: the self-referential prerender deadlock. Mirrors + // the brand-new-affinity branch's await on `#ensureStandbyPool` + // (entryList.length === 0 path further down). `#ensureStandbyPool` + // respects `#maxPages` via `#prepareSlotForStandby`, so this + // can't oversubscribe the global pool. + if (queue !== 'file' && entryList.length < this.#affinityTabMax) { + if (this.#currentStandbyCount() < this.#desiredStandbyCount()) { + let startedAt = Date.now(); + await this.#ensureStandbyPool(); + tabStartupMs += Date.now() - startedAt; + } + let refilled = this.#commandeerDormantTab(affinityKey, { + standbyOnly: true, + }); + if (refilled) { + let releaseTab = await refilled.queue.acquire(signal, priority); + return { entry: refilled, reused: false, releaseTab, tabStartupMs }; + } + } // No orphan, no commandeer-able tab/standby. If we got here // through the dynamic-expansion escape hatch, drive an // expansion + fresh spawn so the saturated module/command diff --git a/packages/realm-server/tests/definition-lookup-test.ts b/packages/realm-server/tests/definition-lookup-test.ts index e1fb88eee57..ade33011c3e 100644 --- a/packages/realm-server/tests/definition-lookup-test.ts +++ b/packages/realm-server/tests/definition-lookup-test.ts @@ -99,16 +99,34 @@ module(basename(__filename), function () { let mockRemotePrerenderer: Prerenderer; let dbAdapter: PgAdapter; let prerenderModuleCalls: number = 0; + let prerenderModulePriorities: (number | undefined)[] = []; + let forceEmptyDefinitions: boolean = false; let virtualNetwork: VirtualNetwork; hooks.beforeEach(async () => { prerenderModuleCalls = 0; + prerenderModulePriorities = []; + forceEmptyDefinitions = false; }); hooks.before(async () => { virtualNetwork = createVirtualNetwork(); mockRemotePrerenderer = { async prerenderModule(args: ModulePrerenderArgs) { prerenderModuleCalls++; + prerenderModulePriorities.push(args.priority); + if (forceEmptyDefinitions) { + return Promise.resolve({ + id: 'example-id', + status: 'ready' as const, + nonce: '12345', + isShimmed: false, + lastModified: +new Date(), + createdAt: +new Date(), + deps: [], + definitions: {}, + error: undefined, + }) as Promise; + } let moduleURL = new URL(args.url); let modulePathWithoutExtension = moduleURL.href.replace(/\.gts$/, ''); return Promise.resolve({ @@ -317,6 +335,100 @@ module(basename(__filename), function () { ); }); + test('getModuleCacheEntry forwards priority to prerenderModule', async function (assert) { + await dbAdapter.execute('DELETE FROM modules'); + + await definitionLookup.getModuleCacheEntry(`${realmURL}person.gts`, { + priority: 10, + }); + assert.strictEqual( + prerenderModuleCalls, + 1, + 'prerenderModule was called once', + ); + assert.deepEqual( + prerenderModulePriorities, + [10], + 'priority 10 was forwarded to prerenderModule', + ); + + // A subsequent call with a different priority on a cached entry must + // not fire the prerenderer again — cache short-circuits. + await definitionLookup.getModuleCacheEntry(`${realmURL}person.gts`, { + priority: 0, + }); + assert.strictEqual( + prerenderModuleCalls, + 1, + 'cached entry returned without re-invoking the prerenderer', + ); + + // Default-priority call (no opts) must end up at priority `undefined` + // at the prerenderer, which the prerender server reads as 0. + await definitionLookup.invalidate(`${realmURL}person.gts`); + await definitionLookup.getModuleCacheEntry(`${realmURL}person.gts`); + assert.strictEqual(prerenderModuleCalls, 2); + assert.deepEqual( + prerenderModulePriorities, + [10, undefined], + 'omitted priority forwards as undefined (default = 0 on the server)', + ); + }); + + test('getModuleCacheEntry caches non-card modules as no-card markers', async function (assert) { + await dbAdapter.execute('DELETE FROM modules'); + forceEmptyDefinitions = true; + + let first = await definitionLookup.getModuleCacheEntry( + `${realmURL}person.gts`, + ); + assert.strictEqual( + prerenderModuleCalls, + 1, + 'prerenderModule was called once', + ); + assert.ok(first, 'returned a cache entry'); + assert.deepEqual( + first?.definitions, + {}, + 'no-card module returns empty definitions', + ); + + // The empty-definitions row must persist to the modules table and be + // treated as a valid cache hit on the next lookup — the prerenderer + // must not fire a second time. + let second = await definitionLookup.getModuleCacheEntry( + `${realmURL}person.gts`, + ); + assert.strictEqual( + prerenderModuleCalls, + 1, + 'second call short-circuited at the cache; prerenderModule still called once', + ); + assert.deepEqual( + second?.definitions, + {}, + 'second call returned the same empty-definitions row', + ); + + // Confirm the row really is in the database (not a transient in-memory + // entry that survived only the in-flight dedupe window). + let rows = await dbAdapter.execute( + `SELECT url, definitions FROM modules WHERE resolved_realm_url = $1`, + { bind: [realmURL] }, + ); + assert.strictEqual( + rows.length, + 1, + 'modules table has exactly one row for the realm', + ); + assert.deepEqual( + (rows[0] as { definitions: Record }).definitions, + {}, + 'persisted row has empty definitions', + ); + }); + test('invalidates cached module after module update', async function (assert) { await dbAdapter.execute('DELETE FROM modules'); diff --git a/packages/realm-server/tests/page-pool-standby-refill-test.ts b/packages/realm-server/tests/page-pool-standby-refill-test.ts index a8ec2400b71..234704145a6 100644 --- a/packages/realm-server/tests/page-pool-standby-refill-test.ts +++ b/packages/realm-server/tests/page-pool-standby-refill-test.ts @@ -228,4 +228,67 @@ module(basename(__filename), function (hooks) { resB.release(); gate.unblockAll(); }); + + // The file-admission cap reserves a slot per-affinity for module / command + // work, but only as a *cap* on file callers — it doesn't itself produce + // the reserved tab. `#selectEntryForAffinity` for a non-file caller used + // to fall through to `#selectLeastPendingTab` when both the orphan-claim + // and the standby-commandeer paths missed, queueing the caller behind + // the busy file render that was waiting on it: the self-referential + // prerender deadlock. The fix synchronously refills the standby pool + // and retries commandeer for module / command callers under the + // per-affinity cap, regardless of priority. + test('module call on an existing affinity with all owned tabs busy materializes a fresh tab', async function (assert) { + let gate = makeManualGate(); + let browserManager = makeBrowserStub({ gate: gate.gate }); + let pool = makePool({ maxPages: 2, browserManager }); + + // File render acquires the only tab on affinity A and holds it. + let held = await pool.getPage('A', 'file'); + assert.ok(held.pageId, 'first getPage on A returned a page'); + + // A module sub-render arrives for the same affinity while A1 is busy. + // Pre-fix: this would queue behind A1 on its per-tab queue (the + // deadlock state). Post-fix: a fresh tab is materialized. + let modCall = pool.getPage('A', 'module'); + + // Race the module call against the held tab — if the test ever hangs + // waiting on `modCall`, that's the deadlock regression returning. + // Bound with a 2s timeout so a regression fails the test cleanly. + let timeoutFired = false; + let timeoutHandle: NodeJS.Timeout | undefined; + let timer = new Promise<'timeout'>((resolve) => { + timeoutHandle = setTimeout(() => { + timeoutFired = true; + resolve('timeout'); + }, 2000); + }); + let winner = await Promise.race([ + modCall.then((r) => ({ kind: 'mod' as const, result: r })), + timer, + ]); + if (timeoutHandle) clearTimeout(timeoutHandle); + + assert.false( + timeoutFired, + 'module call did not block on the held file-render tab (no deadlock)', + ); + if (winner !== 'timeout') { + assert.strictEqual( + winner.kind, + 'mod', + 'module call resolved without waiting on the file render', + ); + // The module render must land on a different tab than the held one + // (which is still in use by the file render). + assert.notStrictEqual( + winner.result.pageId, + held.pageId, + 'module call got its own tab, distinct from the held file-render tab', + ); + winner.result.release(); + } + held.release(); + gate.unblockAll(); + }); }); diff --git a/packages/runtime-common/definition-lookup.ts b/packages/runtime-common/definition-lookup.ts index 394e97214c6..a79c8fac53a 100644 --- a/packages/runtime-common/definition-lookup.ts +++ b/packages/runtime-common/definition-lookup.ts @@ -258,6 +258,16 @@ export interface PopulateCoordinator { waitForKey(coalesceKey: string, timeoutMs: number): Promise; } +// Public option shape for definition lookup calls. `priority` is forwarded to +// the prerender server when a cache miss requires a sub-prerender — same +// numeric scale as worker-job priority (0 = system-initiated background, +// 10 = userInitiatedPriority). Callers in the indexer thread their job +// priority through here so user-initiated reindex work doesn't silently +// downgrade to background priority for its module sub-renders. +export interface DefinitionLookupOptions { + priority?: number; +} + export interface DefinitionLookup { lookupDefinition(codeRef: ResolvedCodeRef): Promise; // Like lookupDefinition but does not trigger a prerenderer call or @@ -272,7 +282,10 @@ export interface DefinitionLookup { clearAllModules(): Promise; registerRealm(realm: LocalRealm): void; forRealm(realm: LocalRealm): DefinitionLookup; - getModuleCacheEntry(moduleUrl: string): Promise; + getModuleCacheEntry( + moduleUrl: string, + opts?: DefinitionLookupOptions, + ): Promise; getModuleCacheEntries( query: ModuleCacheEntryQuery, ): Promise; @@ -383,6 +396,7 @@ export class CachingDefinitionLookup implements DefinitionLookup { async getModuleCacheEntry( moduleUrl: string, + opts?: DefinitionLookupOptions, ): Promise { let canonicalModuleURL = canonicalURL(moduleUrl); let context = await this.buildLookupContext(canonicalModuleURL); @@ -403,6 +417,7 @@ export class CachingDefinitionLookup implements DefinitionLookup { cacheScope, cacheUserId, prerenderUserId, + priority: opts?.priority, }); } @@ -417,6 +432,7 @@ export class CachingDefinitionLookup implements DefinitionLookup { cacheScope: CacheScope; cacheUserId: string; prerenderUserId: string; + priority?: number; }): Promise { let key = inFlightKey(args); let existing = this.#inFlight.get(key); @@ -486,6 +502,7 @@ export class CachingDefinitionLookup implements DefinitionLookup { cacheScope: CacheScope; cacheUserId: string; prerenderUserId: string; + priority?: number; }, coordinator: PopulateCoordinator, ): Promise { @@ -534,6 +551,7 @@ export class CachingDefinitionLookup implements DefinitionLookup { cacheScope, cacheUserId, prerenderUserId, + priority, }: { moduleURL: string; realmURL: string; @@ -541,6 +559,7 @@ export class CachingDefinitionLookup implements DefinitionLookup { cacheScope: CacheScope; cacheUserId: string; prerenderUserId: string; + priority?: number; }): Promise { // Snapshot invalidation generations BEFORE the first await. // clearRealmCache (and any future synchronous bump) runs entirely before @@ -578,6 +597,7 @@ export class CachingDefinitionLookup implements DefinitionLookup { candidateURL, realmURL, prerenderUserId, + priority, ); if ( response.status === 'error' && @@ -1082,6 +1102,7 @@ export class CachingDefinitionLookup implements DefinitionLookup { moduleUrl: string, realmURL: string, userId: string, + priority?: number, ): Promise { let permissions = await fetchUserPermissions(this.#dbAdapter, { userId }); let auth = this.#createPrerenderAuth(userId, permissions); @@ -1091,6 +1112,7 @@ export class CachingDefinitionLookup implements DefinitionLookup { realm: realmURL, url: moduleUrl, auth, + priority, }); } @@ -1735,8 +1757,9 @@ class RealmScopedDefinitionLookup implements DefinitionLookup { async getModuleCacheEntry( moduleUrl: string, + opts?: DefinitionLookupOptions, ): Promise { - return await this.#inner.getModuleCacheEntry(moduleUrl); + return await this.#inner.getModuleCacheEntry(moduleUrl, opts); } async getModuleCacheEntries( diff --git a/packages/runtime-common/index-runner.ts b/packages/runtime-common/index-runner.ts index be0a529b34b..a38c892378b 100644 --- a/packages/runtime-common/index-runner.ts +++ b/packages/runtime-common/index-runner.ts @@ -630,7 +630,9 @@ export class IndexRunner { let failed = 0; for (let moduleUrl of toWarm) { try { - await this.#definitionLookup.getModuleCacheEntry(moduleUrl); + await this.#definitionLookup.getModuleCacheEntry(moduleUrl, { + priority: this.#jobPriority, + }); } catch { failed += 1; } From 0aa6fa912f1dc86bfc96dc5288f737e25c9c03e1 Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Mon, 18 May 2026 14:48:38 -0400 Subject: [PATCH 02/10] Address Codex review: await ensureStandbyPool unconditionally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `current < desired` guard around the new synchronous refill in #selectEntryForAffinity skipped the await when a refill was already in flight. `#currentStandbyCount() = #standbys.size + #creatingStandbys`, so when a file render just consumed the only standby and its post- acquire kickStandbyRefill is mid-creation, creatingStandbys > 0 inflates current to match desired even though no real standby exists yet. The follow-up commandeer then fails and the caller falls through to the busy-tab branch — exactly the deadlock the change was meant to prevent. Drop the guard. #ensureStandbyPool deduplicates internally via #ensuringStandbys, so an unconditional await is a no-op when there is nothing to do and waits on the in-flight refill when there is. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/realm-server/prerender/page-pool.ts | 24 +++++++++++++++---- .../tests/page-pool-standby-refill-test.ts | 2 +- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/packages/realm-server/prerender/page-pool.ts b/packages/realm-server/prerender/page-pool.ts index cd70b3069ce..b72e6943e95 100644 --- a/packages/realm-server/prerender/page-pool.ts +++ b/packages/realm-server/prerender/page-pool.ts @@ -1882,12 +1882,26 @@ export class PagePool { // (entryList.length === 0 path further down). `#ensureStandbyPool` // respects `#maxPages` via `#prepareSlotForStandby`, so this // can't oversubscribe the global pool. + // + // We await `#ensureStandbyPool` UNCONDITIONALLY here (not gated + // on `current < desired`). Reason: `#currentStandbyCount` = + // `#standbys.size + #creatingStandbys`. If the file render that + // arrived just before this caller consumed the only standby and + // its post-acquire `#kickStandbyRefill` is already creating a + // replacement, `creatingStandbys > 0` inflates `current` to + // meet `desired` while `#standbys.size` is still 0. A + // `current < desired` guard would skip the await; the + // subsequent `commandeerDormantTab(standbyOnly:true)` would + // then fail to find a real standby and the caller would fall + // through to the busy-tab branch — exactly the deadlock this + // change is meant to prevent. Awaiting unconditionally lets + // the in-flight refill complete (the `#ensuringStandbys` + // dedup makes the call a no-op when there is no in-flight + // refill and a wait when there is). if (queue !== 'file' && entryList.length < this.#affinityTabMax) { - if (this.#currentStandbyCount() < this.#desiredStandbyCount()) { - let startedAt = Date.now(); - await this.#ensureStandbyPool(); - tabStartupMs += Date.now() - startedAt; - } + let startedAt = Date.now(); + await this.#ensureStandbyPool(); + tabStartupMs += Date.now() - startedAt; let refilled = this.#commandeerDormantTab(affinityKey, { standbyOnly: true, }); diff --git a/packages/realm-server/tests/page-pool-standby-refill-test.ts b/packages/realm-server/tests/page-pool-standby-refill-test.ts index 234704145a6..d94fe685537 100644 --- a/packages/realm-server/tests/page-pool-standby-refill-test.ts +++ b/packages/realm-server/tests/page-pool-standby-refill-test.ts @@ -235,7 +235,7 @@ module(basename(__filename), function (hooks) { // to fall through to `#selectLeastPendingTab` when both the orphan-claim // and the standby-commandeer paths missed, queueing the caller behind // the busy file render that was waiting on it: the self-referential - // prerender deadlock. The fix synchronously refills the standby pool + // prerender deadlock. The fix unconditionally awaits `#ensureStandbyPool` // and retries commandeer for module / command callers under the // per-affinity cap, regardless of priority. test('module call on an existing affinity with all owned tabs busy materializes a fresh tab', async function (assert) { From 9c3ddbb35855e7deaaf60049d4c9575a6f2ab5e0 Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Mon, 18 May 2026 14:59:54 -0400 Subject: [PATCH 03/10] Drop the same current --- packages/realm-server/prerender/page-pool.ts | 45 ++++++++++++++------ 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/packages/realm-server/prerender/page-pool.ts b/packages/realm-server/prerender/page-pool.ts index b72e6943e95..e423f56efd1 100644 --- a/packages/realm-server/prerender/page-pool.ts +++ b/packages/realm-server/prerender/page-pool.ts @@ -1877,11 +1877,15 @@ export class PagePool { // a reservation, not a spawned tab. Without this synchronous // refill+retry, the call falls through to the busy-tab fallback // below and queues behind the file render that's awaiting this - // sub-render: the self-referential prerender deadlock. Mirrors - // the brand-new-affinity branch's await on `#ensureStandbyPool` - // (entryList.length === 0 path further down). `#ensureStandbyPool` - // respects `#maxPages` via `#prepareSlotForStandby`, so this - // can't oversubscribe the global pool. + // sub-render: the self-referential prerender deadlock. + // `#ensureStandbyPool` respects `#maxPages` via + // `#prepareSlotForStandby`, so this can't oversubscribe the + // global pool. Note this path only fires under + // `entryList.length < #affinityTabMax` — the at-cap case (every + // tab held by file renders, no dynamic-expansion budget) still + // falls through to busy-tab below. That residual deadlock + // requires either operator-side capacity tuning or the high- + // priority tier escape hatch beneath this branch. // // We await `#ensureStandbyPool` UNCONDITIONALLY here (not gated // on `current < desired`). Reason: `#currentStandbyCount` = @@ -1894,10 +1898,15 @@ export class PagePool { // subsequent `commandeerDormantTab(standbyOnly:true)` would // then fail to find a real standby and the caller would fall // through to the busy-tab branch — exactly the deadlock this - // change is meant to prevent. Awaiting unconditionally lets - // the in-flight refill complete (the `#ensuringStandbys` - // dedup makes the call a no-op when there is no in-flight - // refill and a wait when there is). + // change is meant to prevent. Two scenarios produce no-op + // behavior: (a) the pool is genuinely healthy with + // `#standbys.size >= desired` and no refill in flight — + // `#ensureStandbyPoolInternal`'s loop returns at line 1266 + // (`current >= desired`); (b) a refill is in flight — + // `#ensureStandbyPool` returns the existing `#ensuringStandbys` + // promise via dedup at line 1242 and we wait for it. Both + // produce the right shape: no spurious creation when not needed, + // wait when needed. if (queue !== 'file' && entryList.length < this.#affinityTabMax) { let startedAt = Date.now(); await this.#ensureStandbyPool(); @@ -1964,11 +1973,19 @@ export class PagePool { // #ensureStandbyPool()` in `getPage` made this ordering implicit; // now we make it explicit here so brand-new affinities still get // a fresh standby in preference to busy-tab queueing. - if (this.#currentStandbyCount() < this.#desiredStandbyCount()) { - let startedAt = Date.now(); - await this.#ensureStandbyPool(); - tabStartupMs += Date.now() - startedAt; - } + // + // Await `#ensureStandbyPool` unconditionally rather than gating + // on `current < desired`. `#currentStandbyCount` includes + // `#creatingStandbys`, so a refill that's in flight on another + // affinity inflates `current` past `desired` here while + // `#standbys.size` is still 0 — gating would prematurely fall + // through to cross-affinity steal instead of waiting for the + // standby a peer just kicked off. Mirrors the same fix in the + // non-file spawn-branch above. `#ensuringStandbys` dedup makes + // this a no-op when the pool is genuinely healthy. + let startedAt = Date.now(); + await this.#ensureStandbyPool(); + tabStartupMs += Date.now() - startedAt; throwIfAborted(signal); let retryShared = this.#tryClaimOrphanContext(affinityKey); if (retryShared) { From 8129768565da1df794a7c4a883a50cc4bd896eac Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Mon, 18 May 2026 15:41:20 -0400 Subject: [PATCH 04/10] Plumb job priority through lookupDefinition (x-boxel-job-priority header); fix queues-same-realm regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two changes addressing review feedback on the earlier PR A commits. 1. Priority threading through the lookupDefinition path The PR's previous priority plumbing covered `getModuleCacheEntry` (the pre-warm path) but not `lookupDefinition`, which is the path that fires sub-`prerenderModule` calls from `_federated-search` during a card render — i.e. the actual deadlock site. This closes the gap. Chain: - `runtime-common/prerender-headers.ts`: new `X_BOXEL_JOB_PRIORITY_HEADER` constant + `sanitizeJobPriorityHeader` sanitizer. Same scale as worker-job priority (0/10 etc), non-negative safe integer. - `realm-server/prerender/prerender-constants.ts`: re-exports as `PRERENDER_JOB_PRIORITY_HEADER` alongside `PRERENDER_JOB_ID_HEADER`. - `remote-prerenderer.ts`: producer sends the header on every outbound prerender request (visit / module / command / screenshot) when the attributes include `priority`. - `render-runner.ts`: prerenderVisitAttempt's CDP page.evaluate now injects `globalThis.__boxelJobPriority` alongside the existing `__boxelJobId`, scoped to the render. - `host/app/services/store.ts`: `jobPriorityHeader()` reads `__boxelJobPriority` and stamps `X-Boxel-Job-Priority` on outbound `_federated-search` fetches alongside the existing `X-Boxel-Job-Id` / `X-Boxel-Consuming-Realm` plumbing. - `realm-server/handlers/handle-search.ts`: extracts and sanitizes the header, threads `priority` into the search opts passed to `searchRealms`. - `realm-index-query-engine.ts`: `Options` carries `priority`; `lookupDefinitionForOpts` and the `_addCardConditions` populate- query-fields paths thread it into `lookupDefinition(codeRef, opts)`. - `definition-lookup.ts`: `DefinitionLookup.lookupDefinition` now accepts the existing `DefinitionLookupOptions` shape; `LookupContext` carries `priority`; `lookupDefinitionWithContext` threads it into the `loadModuleCacheEntry` args alongside the existing fields. The `RealmScopedDefinitionLookup` wrapper forwards opts to `lookupDefinitionForRealm`. User / API callers don't stamp the header (they aren't inside a prerender tab), so live traffic continues to fall back to priority 0 — same observable behavior as today. Indexer-driven card renders fire their sub-prerenders at the originating job's priority, which is useful for observability (timing_diagnostics.priority on errored rows) and for the high-priority-tier expansion path when those env vars are configured. 2. Revert brand-new-affinity branch's unconditional await The earlier commit removed the `current < desired` gate on the brand-new-affinity branch's `await #ensureStandbyPool()` in `#selectEntryForAffinity`, mirroring the same fix applied to the new non-file spawn-branch. Code-review feedback suggested it was symmetric — but it isn't, and CI caught the regression: `prerendering-test.ts > queues same-realm request when tab is transitioning` now times out. Root cause: the unconditional await yields one extra microtask even when `current >= desired` already. In the brand-new-affinity case that microtask shifts the ordering against a concurrent same-affinity file caller arriving on an idle tab — the same caller bumps `#queue.pendingCount` past 1 before the cross-affinity-steal scan, the scan's `pendingCount > 1` filter rejects, and the brand-new caller throws `'No standby page available for prerender'` instead of queueing. The gate is asymmetric on purpose: - Non-file spawn-branch (entryList.length > 0, queue != 'file'): await unconditionally — the cost of skipping is unbounded (the caller queues on the very tab whose work blocks it). - Brand-new-affinity branch (entryList.length === 0): keep the gate — the cost of skipping is bounded (a cross-affinity-steal hop, itself a designed fallback). Adding the extra microtask actively breaks an existing test that relies on the synchronous ordering. Comment updated to explain the asymmetry; the test that caught this is named in the comment for future reference. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/host/app/services/store.ts | 16 +++++++++ .../realm-server/handlers/handle-search.ts | 23 +++++++++--- packages/realm-server/prerender/page-pool.ts | 28 ++++++++------- .../prerender/prerender-constants.ts | 10 ++++++ .../prerender/remote-prerenderer.ts | 16 ++++++++- .../realm-server/prerender/render-runner.ts | 29 ++++++++++----- packages/runtime-common/definition-lookup.ts | 35 ++++++++++++++++--- packages/runtime-common/prerender-headers.ts | 33 +++++++++++++++++ .../realm-index-query-engine.ts | 15 ++++++-- 9 files changed, 173 insertions(+), 32 deletions(-) diff --git a/packages/host/app/services/store.ts b/packages/host/app/services/store.ts index 38b2ca893c1..65a4eb34774 100644 --- a/packages/host/app/services/store.ts +++ b/packages/host/app/services/store.ts @@ -35,6 +35,7 @@ import { resolveFileDefCodeRef, X_BOXEL_CONSUMING_REALM_HEADER, X_BOXEL_JOB_ID_HEADER, + X_BOXEL_JOB_PRIORITY_HEADER, Deferred, delay, mergeRelationships, @@ -159,6 +160,19 @@ function jobIdHeader(): Record { let j = (globalThis as unknown as { __boxelJobId?: string }).__boxelJobId; return j ? { [X_BOXEL_JOB_ID_HEADER]: j } : {}; } + +// Companion to `jobIdHeader()`. The prerender server's render-runner +// also injects `__boxelJobPriority` onto the page before transitioning +// into the render route. Outside a prerender tab the global is +// undefined and we send no header — user / API callers leave the +// realm-server's lookup paths to fall back to priority 0 as today. +function jobPriorityHeader(): Record { + let p = (globalThis as unknown as { __boxelJobPriority?: number }) + .__boxelJobPriority; + return typeof p === 'number' && Number.isSafeInteger(p) && p >= 0 + ? { [X_BOXEL_JOB_PRIORITY_HEADER]: String(p) } + : {}; +} const queryFieldSeedFromSearchSymbol = Symbol.for( 'cardstack-query-field-seed-from-search', ); @@ -871,6 +885,7 @@ export default class StoreService extends Service implements StoreInterface { ...duringPrerenderHeaders(), ...consumingRealmHeader(), ...jobIdHeader(), + ...jobPriorityHeader(), }, body: JSON.stringify({ ...query, realms }), }, @@ -940,6 +955,7 @@ export default class StoreService extends Service implements StoreInterface { ...duringPrerenderHeaders(), ...consumingRealmHeader(), ...jobIdHeader(), + ...jobPriorityHeader(), }, body: JSON.stringify({ ...query, realms }), }, diff --git a/packages/realm-server/handlers/handle-search.ts b/packages/realm-server/handlers/handle-search.ts index a6128279415..0d47ace9c04 100644 --- a/packages/realm-server/handlers/handle-search.ts +++ b/packages/realm-server/handlers/handle-search.ts @@ -22,6 +22,8 @@ import { import type { JobScopedSearchCache } from '../job-scoped-search-cache'; import { PRERENDER_JOB_ID_HEADER, + PRERENDER_JOB_PRIORITY_HEADER, + sanitizeJobPriorityHeader, sanitizePrerenderJobId, } from '../prerender/prerender-constants'; @@ -53,14 +55,27 @@ export default function handleSearch(opts?: { } let cacheOnlyDefinitions = ctxt.get(DURING_PRERENDER_HEADER).length > 0; - let searchOpts = cacheOnlyDefinitions - ? { cacheOnlyDefinitions: true } - : undefined; + // The host's `_federated-search` fetch wrapper stamps + // `x-boxel-job-priority` while rendering inside a prerender tab. + // Threading it into search opts here lets `CachingDefinitionLookup` + // sub-prerenders (fired when a `type:` filter misses the modules + // cache) inherit the originating job's priority instead of silently + // dropping to 0. User / API callers don't stamp the header, so the + // value is `null` for live traffic — falls back to priority 0 + // (system-initiated default), same observable behavior as today. + let jobPriority = sanitizeJobPriorityHeader( + ctxt.get(PRERENDER_JOB_PRIORITY_HEADER), + ); + let searchOpts: { cacheOnlyDefinitions?: true; priority?: number } = {}; + if (cacheOnlyDefinitions) searchOpts.cacheOnlyDefinitions = true; + if (jobPriority !== null) searchOpts.priority = jobPriority; + let normalizedSearchOpts = + Object.keys(searchOpts).length > 0 ? searchOpts : undefined; let runSearch = () => searchRealms( realmList.map((realmURL) => realmByURL.get(realmURL)), cardsQuery, - searchOpts, + normalizedSearchOpts, ); // Job-scoped cache. Gated on: diff --git a/packages/realm-server/prerender/page-pool.ts b/packages/realm-server/prerender/page-pool.ts index e423f56efd1..ca755fe9209 100644 --- a/packages/realm-server/prerender/page-pool.ts +++ b/packages/realm-server/prerender/page-pool.ts @@ -1974,18 +1974,22 @@ export class PagePool { // now we make it explicit here so brand-new affinities still get // a fresh standby in preference to busy-tab queueing. // - // Await `#ensureStandbyPool` unconditionally rather than gating - // on `current < desired`. `#currentStandbyCount` includes - // `#creatingStandbys`, so a refill that's in flight on another - // affinity inflates `current` past `desired` here while - // `#standbys.size` is still 0 — gating would prematurely fall - // through to cross-affinity steal instead of waiting for the - // standby a peer just kicked off. Mirrors the same fix in the - // non-file spawn-branch above. `#ensuringStandbys` dedup makes - // this a no-op when the pool is genuinely healthy. - let startedAt = Date.now(); - await this.#ensureStandbyPool(); - tabStartupMs += Date.now() - startedAt; + // The gate here is intentional and asymmetric with the non-file + // spawn-branch above. There the deadlock cost of skipping is + // unbounded (the caller queues on the very tab whose work it + // blocks), so we await unconditionally. Here the cost of + // skipping is only a cross-affinity-steal hop — itself a + // designed fallback — so paying an extra microtask for a no-op + // await is the wrong trade. It also shifts microtask ordering + // against a concurrent same-affinity file caller arriving on + // an idle tab, which the + // `queues same-realm request when tab is transitioning` test in + // `prerendering-test.ts` pins. + if (this.#currentStandbyCount() < this.#desiredStandbyCount()) { + let startedAt = Date.now(); + await this.#ensureStandbyPool(); + tabStartupMs += Date.now() - startedAt; + } throwIfAborted(signal); let retryShared = this.#tryClaimOrphanContext(affinityKey); if (retryShared) { diff --git a/packages/realm-server/prerender/prerender-constants.ts b/packages/realm-server/prerender/prerender-constants.ts index 671cb4dc09b..7e2ac03f545 100644 --- a/packages/realm-server/prerender/prerender-constants.ts +++ b/packages/realm-server/prerender/prerender-constants.ts @@ -42,6 +42,16 @@ export function sanitizePrerenderRequestId( // imports keep working unchanged. export { X_BOXEL_JOB_ID_HEADER as PRERENDER_JOB_ID_HEADER } from '@cardstack/runtime-common'; +// Worker-job priority of the request that triggered this prerender. +// Producer side stamps this header so any sub-`prerenderModule` the +// host fires during render inherits the originating priority instead +// of silently dropping to 0 — see prerender-headers.ts for the full +// chain rationale. +export { + X_BOXEL_JOB_PRIORITY_HEADER as PRERENDER_JOB_PRIORITY_HEADER, + sanitizeJobPriorityHeader, +} from '@cardstack/runtime-common'; + // Stamped on the host's outbound _federated-search / _search calls // when the host SPA detects it's running inside a prerender tab. The // prerender server signals "you are in a prerender" by injecting diff --git a/packages/realm-server/prerender/remote-prerenderer.ts b/packages/realm-server/prerender/remote-prerenderer.ts index 824e23364ce..2018e44e9e2 100644 --- a/packages/realm-server/prerender/remote-prerenderer.ts +++ b/packages/realm-server/prerender/remote-prerenderer.ts @@ -11,6 +11,7 @@ import { } from '@cardstack/runtime-common'; import { PRERENDER_JOB_ID_HEADER, + PRERENDER_JOB_PRIORITY_HEADER, PRERENDER_REQUEST_ID_HEADER, PRERENDER_SERVER_DRAINING_STATUS_CODE, PRERENDER_SERVER_STATUS_DRAINING, @@ -74,7 +75,11 @@ export function createRemotePrerenderer( let endpoint = new URL(path, prerenderURL); // jobId is request metadata, not part of the validated body — strip // it out before sending so the prerender-server's payload schema - // doesn't need to know about it. + // doesn't need to know about it. `priority` IS part of the validated + // body (existing PagePool plumbing), so we read it without stripping + // and additionally forward it as a header so the host's + // `_federated-search` fetch wrapper can re-stamp it on sub-prerender + // requests fired during render. let { jobId, ...attributesWithoutJobId } = attributes as Record< string, any @@ -87,6 +92,12 @@ export function createRemotePrerenderer( }; let sanitizedJobId = typeof jobId === 'string' ? sanitizePrerenderJobId(jobId) : null; + let priorityHeaderValue = + typeof attributes.priority === 'number' && + Number.isSafeInteger(attributes.priority) && + attributes.priority >= 0 + ? String(attributes.priority) + : null; // CS-10872: one correlation ID per logical client call, reused on // retries so operators can follow the full manager/prerender-server // log story for the same intent rather than hunting through N @@ -111,6 +122,9 @@ export function createRemotePrerenderer( ...(sanitizedJobId ? { [PRERENDER_JOB_ID_HEADER]: sanitizedJobId } : {}), + ...(priorityHeaderValue + ? { [PRERENDER_JOB_PRIORITY_HEADER]: priorityHeaderValue } + : {}), }, body: JSON.stringify(body), signal: ac.signal, diff --git a/packages/realm-server/prerender/render-runner.ts b/packages/realm-server/prerender/render-runner.ts index 985f9d0194a..91fa9f7098b 100644 --- a/packages/realm-server/prerender/render-runner.ts +++ b/packages/realm-server/prerender/render-runner.ts @@ -775,22 +775,35 @@ export class RenderRunner { // The check lives inside the try so `finally { release() }` frees // the tab slot if the caller aborted during the getPage handoff. throwIfAborted(signal, 'queued'); - // Single CDP round-trip that sets both the session auth and the - // indexing job id on the page. The job id surfaces to the host's - // `_federated-search` fetch wrapper via `globalThis.__boxelJobId` - // — the realm-server's handle-search gate pairs it with + // Single CDP round-trip that sets the session auth and the + // indexing job's id + priority on the page. Both surface to the + // host's `_federated-search` fetch wrapper via + // `globalThis.__boxelJobId` / `__boxelJobPriority` — the + // realm-server's handle-search gate pairs the id with // `x-boxel-consuming-realm` to decide whether to consult the - // JobScopedSearchCache. Always overwrite (including with - // undefined) so a tab reused across multiple visits never bleeds - // a prior visit's job id into the next render. + // JobScopedSearchCache, and threads the priority into + // `LookupContext.priority` so any sub-`prerenderModule` fired by + // `CachingDefinitionLookup` for a missed definition inherits the + // originating priority instead of silently dropping to 0. Always + // overwrite (including with undefined) so a tab reused across + // multiple visits never bleeds a prior visit's values into the + // next render. await page.evaluate( - (sessionAuth: string, id: string | undefined) => { + ( + sessionAuth: string, + id: string | undefined, + jobPriority: number | undefined, + ) => { localStorage.setItem('boxel-session', sessionAuth); (globalThis as unknown as { __boxelJobId?: string }).__boxelJobId = id; + ( + globalThis as unknown as { __boxelJobPriority?: number } + ).__boxelJobPriority = jobPriority; }, auth, jobId, + priority, ); // defense-in-depth: clear any stale file render data left on globalThis // from a prior visit before we start running passes. diff --git a/packages/runtime-common/definition-lookup.ts b/packages/runtime-common/definition-lookup.ts index a79c8fac53a..62e7d17cd67 100644 --- a/packages/runtime-common/definition-lookup.ts +++ b/packages/runtime-common/definition-lookup.ts @@ -269,7 +269,10 @@ export interface DefinitionLookupOptions { } export interface DefinitionLookup { - lookupDefinition(codeRef: ResolvedCodeRef): Promise; + lookupDefinition( + codeRef: ResolvedCodeRef, + opts?: DefinitionLookupOptions, + ): Promise; // Like lookupDefinition but does not trigger a prerenderer call or // populate missing definitions. It may still perform lookup-context // resolution (including remote visibility probing) before reading from the @@ -293,6 +296,13 @@ export interface DefinitionLookup { interface LookupContext { requestingRealm?: LocalRealm; + // Worker-job priority forwarded into sub-`prerenderModule` calls for + // definition cache misses. Origin is the `x-boxel-job-priority` + // header on `_federated-search` calls during a prerendered card + // render — `handle-search` sanitizes the header and passes it into + // `RealmIndexQueryEngine`'s search opts; the search engine threads + // it here when it calls `lookupDefinition`. + priority?: number; } export class CachingDefinitionLookup implements DefinitionLookup { @@ -350,8 +360,13 @@ export class CachingDefinitionLookup implements DefinitionLookup { this.#populateCoordinator = populateCoordinator; } - async lookupDefinition(codeRef: ResolvedCodeRef): Promise { - return await this.lookupDefinitionWithContext(codeRef); + async lookupDefinition( + codeRef: ResolvedCodeRef, + opts?: DefinitionLookupOptions, + ): Promise { + return await this.lookupDefinitionWithContext(codeRef, { + ...(opts?.priority !== undefined ? { priority: opts.priority } : {}), + }); } async lookupCachedDefinition( @@ -770,6 +785,7 @@ export class CachingDefinitionLookup implements DefinitionLookup { cacheScope, cacheUserId, prerenderUserId, + priority: contextOpts?.priority, }); if (!moduleEntry) { @@ -1005,9 +1021,11 @@ export class CachingDefinitionLookup implements DefinitionLookup { async lookupDefinitionForRealm( codeRef: ResolvedCodeRef, realm: LocalRealm, + opts?: DefinitionLookupOptions, ): Promise { return await this.lookupDefinitionWithContext(codeRef, { requestingRealm: realm, + ...(opts?.priority !== undefined ? { priority: opts.priority } : {}), }); } @@ -1723,8 +1741,15 @@ class RealmScopedDefinitionLookup implements DefinitionLookup { this.#realm = realm; } - async lookupDefinition(codeRef: ResolvedCodeRef): Promise { - return await this.#inner.lookupDefinitionForRealm(codeRef, this.#realm); + async lookupDefinition( + codeRef: ResolvedCodeRef, + opts?: DefinitionLookupOptions, + ): Promise { + return await this.#inner.lookupDefinitionForRealm( + codeRef, + this.#realm, + opts, + ); } async lookupCachedDefinition( diff --git a/packages/runtime-common/prerender-headers.ts b/packages/runtime-common/prerender-headers.ts index 91f1f2e220a..db5fccff14e 100644 --- a/packages/runtime-common/prerender-headers.ts +++ b/packages/runtime-common/prerender-headers.ts @@ -42,3 +42,36 @@ export function sanitizeConsumingRealmHeader( if (!trimmed) return null; return REALM_URL_PATTERN.test(trimmed) ? trimmed : null; } + +// HTTP header carrying the worker-job priority of the request that +// triggered the prerender. Threaded from `pg-queue` job priority → +// `remote-prerenderer` → prerender-server → render-runner → page +// (`globalThis.__boxelJobPriority`) → host's `_federated-search` fetch +// wrapper → realm-server's `handle-search`. The realm-server forwards +// it into `LookupContext.priority` so any sub-`prerenderModule` fired +// by `CachingDefinitionLookup` for a missed definition inherits the +// originating job's priority instead of silently dropping to 0. +// +// Same scale as worker-job priority — 0 = system-initiated, 10 = +// userInitiatedPriority — small non-negative integers. +export const X_BOXEL_JOB_PRIORITY_HEADER = 'x-boxel-job-priority'; + +// Sanitize the inbound job-priority header value. The producer side +// stringifies a small non-negative integer; the consumer side must +// reject anything that isn't a base-10 integer in a reasonable range +// before passing it on as a number. Rejecting an out-of-range value +// returns `null` rather than clamping so an upstream regression +// (e.g. someone sending a bogus value) surfaces as "no priority" — +// safer than silently substituting a plausible-looking integer. +const JOB_PRIORITY_MAX = 1_000_000; +export function sanitizeJobPriorityHeader( + raw: string | null | undefined, +): number | null { + if (typeof raw !== 'string') return null; + let trimmed = raw.trim(); + if (!trimmed) return null; + if (!/^\d+$/.test(trimmed)) return null; + let n = Number(trimmed); + if (!Number.isSafeInteger(n) || n < 0 || n > JOB_PRIORITY_MAX) return null; + return n; +} diff --git a/packages/runtime-common/realm-index-query-engine.ts b/packages/runtime-common/realm-index-query-engine.ts index f1dcfcd88ee..fcd573d3fcb 100644 --- a/packages/runtime-common/realm-index-query-engine.ts +++ b/packages/runtime-common/realm-index-query-engine.ts @@ -83,6 +83,13 @@ type Options = { // when file-meta responses are served during card prerendering (the single // prerender semaphore permit is already held). cacheOnlyDefinitions?: boolean; + // Worker-job priority threaded from the caller (typically + // `_federated-search`'s `x-boxel-job-priority` header, set by the + // host's fetch wrapper during a prerendered card render). Forwarded + // to `lookupDefinition` so any sub-`prerenderModule` fired for a + // missed `type:` filter resolution inherits the originating + // priority. Defaults to 0 when absent. + priority?: number; } & QueryOptions; type SearchResult = SearchResultDoc | SearchResultError; @@ -381,7 +388,9 @@ export class RealmIndexQueryEngine { return false; } } else { - definition = await this.#definitionLookup.lookupDefinition(codeRef); + definition = await this.#definitionLookup.lookupDefinition(codeRef, { + ...(opts?.priority !== undefined ? { priority: opts.priority } : {}), + }); } if (!definition) { return false; @@ -715,7 +724,9 @@ export class RealmIndexQueryEngine { if (opts?.cacheOnlyDefinitions) { return await this.#definitionLookup.lookupCachedDefinition(codeRef); } - return await this.#definitionLookup.lookupDefinition(codeRef); + return await this.#definitionLookup.lookupDefinition(codeRef, { + ...(opts?.priority !== undefined ? { priority: opts.priority } : {}), + }); } // Populate query-based relationship fields using pre-extracted metadata From 95e988ef5065cd6f0a79fa6a06cb1181fdf62f5a Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Mon, 18 May 2026 14:55:44 -0400 Subject: [PATCH 05/10] Indexer: pre-warm every realm .gts/.gjs module, not just per-row deps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `preWarmModulesTable` previously walked each invalidation's `boxel_index.deps` and added any executable URLs it found. That misses modules referenced by *string* in card templates — a typical pattern in dashboard cards: `cohort.gts` is referenced as a string parameter to the search filter, never imported into the dashboard's runtime module graph. So it never appears in the dashboard row's `deps`, never gets pre-warmed, and when the dashboard renders during indexing the `_federated-search` call fires a same-affinity `prerenderModule` for `cohort.gts` mid-card- render. That sub-prerender queues behind the dashboard's tab — the self-referential prerender deadlock. Add a realm-wide sweep on top of the existing per-row deps walk. Source is the filesystem-mtimes map: from-scratch already pays for the walk via `discoverInvalidations`, so we just filter and reuse it; incremental adds a fresh `reader.mtimes()` call before pre-warm (typical ~200 ms on a 500-file realm, one call per job, completely dominated by the indexing work that follows). Filter to `.gts` + `.gjs` only — cards can only live in template-bearing modules, so `.ts`/`.js` helpers are correctly excluded from the realm-wide layer. Helpers that ARE needed by a card already get pre-warmed through the existing per-row deps walk; the realm-wide layer is purely additive on top. Adds `hasCardExtension` / `cardExtensions` next to `hasExecutableExtension` / `executableExtensions` in `runtime-common/index.ts`, same shape — `.gts` and `.gjs` are the extensions a `CardDef` / `FieldDef` can live in because they're the ones with a Glimmer template surface. The modules cache treats `definitions: {}` (an empty result from a non-card module — e.g. a rare `.gts` helper) as a valid cache hit at `definition-lookup.ts:561-563`. So the worst case is one wasted prerender per non-card `.gts` per `clearRealmCache` cycle; subsequent lookups skip the prerender entirely. That contract is pinned by `getModuleCacheEntry caches non-card modules as no-card markers` in PR A. Cost analysis for ambitious-piranha (the staging realm where the deadlock fired): 9 `.gts` files + 9 `.ts` files = 18 executables, but only 9 `.gts` files in the realm-wide sweep (the `.ts` files are `bxl/*` helpers). Pre-warm is still serial (concurrency comes in a follow-up PR after baseline measurement). Roughly ~200 ms per file × 9 files = ~1.8 s additional pre-warm cost per from-scratch reindex. Stacked on top of PR #4863 (PagePool tab-materialization + priority plumbing). The two changes attack the same bug from different angles: PR #4863 makes sure the deadlock cannot happen when a sub-prerender DOES fire; this PR makes sure it almost never has to fire in the first place. PR #4863 is the load-bearing fix; this is the preventive layer. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/runtime-common/index-runner.ts | 47 +++++++++++++++++++++---- packages/runtime-common/index.ts | 16 +++++++++ 2 files changed, 57 insertions(+), 6 deletions(-) diff --git a/packages/runtime-common/index-runner.ts b/packages/runtime-common/index-runner.ts index a38c892378b..34266c67e62 100644 --- a/packages/runtime-common/index-runner.ts +++ b/packages/runtime-common/index-runner.ts @@ -8,6 +8,7 @@ import { Memoize } from 'typescript-memoize'; import { logger, + hasCardExtension, hasExecutableExtension, SupportedMimeType, jobIdentity, @@ -203,7 +204,17 @@ export class IndexRunner { await current.#dependencyResolver.orderInvalidationsByDependencies( invalidations, ); - await current.preWarmModulesTable(invalidations); + // Pre-warm the modules cache. Combines per-row deps (which catch + // most modules used during a from-scratch pass) with the realm- + // wide `.gts` / `.gjs` sweep (which catches sibling card modules + // referenced by string in templates — the typical + // `` + // pattern). The filesystem-mtimes walk was already paid by + // discoverInvalidations above; we just filter and reuse it. + let allRealmCardModules = Object.keys( + discoverResult.filesystemMtimes, + ).filter(hasCardExtension); + await current.preWarmModulesTable(invalidations, allRealmCardModules); let resumedRows = current.batch.resumedRows; let resumedSkipped = 0; current.#onProgress?.({ @@ -354,7 +365,14 @@ export class IndexRunner { } current.#scheduleClearCacheForNextRender(); } - await current.preWarmModulesTable(invalidations); + // Pre-warm: combine per-row deps with a realm-wide `.gts`/`.gjs` + // sweep. Incremental skips `discoverInvalidations` so the + // filesystem-mtimes walk hasn't happened yet — call it here. + // Typical realm sizes make this < 200 ms; one call per job. + let incrementalMtimes = await current.#reader.mtimes(); + let allRealmCardModules = + Object.keys(incrementalMtimes).filter(hasCardExtension); + await current.preWarmModulesTable(invalidations, allRealmCardModules); let hrefs = urls.map((u) => u.href); let resumedRows = current.batch.resumedRows; @@ -568,11 +586,26 @@ export class IndexRunner { // Failures here are warned but do not fail the batch — a mid-render // sub-prerender will still fire on demand if pre-warm misses a // module. - private async preWarmModulesTable(invalidations: URL[]): Promise { - if (invalidations.length === 0) { + private async preWarmModulesTable( + invalidations: URL[], + allRealmCardModules: string[] = [], + ): Promise { + if (invalidations.length === 0 && allRealmCardModules.length === 0) { return; } let preWarmStart = Date.now(); + + // Base layer: every `.gts` / `.gjs` file in the realm, regardless of + // whether it appears in this batch's invalidation set. Catches sibling + // card modules that are referenced by *string* in templates (e.g. + // ``) + // and so do not appear in any instance's runtime `deps`. Those + // modules are exactly what `_federated-search` needs the modules- + // table cache to be warm for; without this layer the search fires a + // same-affinity `prerenderModule` mid-card-render and risks the + // self-referential prerender deadlock. + let toWarm = new Set(allRealmCardModules); + let hrefs = invalidations.map((u) => u.href); let existingRows = await this.batch.getDependencyRows(hrefs); let bestByUrl = new Map(); @@ -585,13 +618,15 @@ export class IndexRunner { } } - let toWarm = new Set(); let novelJsonUrls: URL[] = []; for (let url of invalidations) { // Module files in the invalidation set are deps that instances // in the same batch will consume — pre-warm them directly. This // covers from-scratch and atomic-update batches where most rows - // have no prior `deps` data yet. + // have no prior `deps` data yet. Unlike the realm-wide layer + // above, this includes `.ts` / `.js` helpers — only the ones the + // batch is actually touching, so cost is bounded by invalidation + // size rather than realm size. if (hasExecutableExtension(url.href)) { toWarm.add(url.href); } diff --git a/packages/runtime-common/index.ts b/packages/runtime-common/index.ts index 2a0e21c7a6a..914c6c19a8c 100644 --- a/packages/runtime-common/index.ts +++ b/packages/runtime-common/index.ts @@ -645,6 +645,13 @@ export * from './pr-manifest'; export * from './file-def-code-ref'; export const executableExtensions = ['.js', '.gjs', '.ts', '.gts']; +// Card / field definitions live in template-bearing modules — `.gts` +// (Glimmer + TS) or `.gjs` (Glimmer + JS) — since `CardDef` / +// `FieldDef` components need a Glimmer template surface. Plain `.ts` +// and `.js` cannot host a card definition and so are excluded from +// the realm-wide pre-warm sweep that primes the modules cache before +// the visit loop. +export const cardExtensions = ['.gts', '.gjs']; export { createResponse } from './create-response'; export * from './db-queries/db-types'; @@ -1007,6 +1014,15 @@ export function hasExecutableExtension(path: string): boolean { return false; } +export function hasCardExtension(path: string): boolean { + for (let extension of cardExtensions) { + if (path.endsWith(extension)) { + return true; + } + } + return false; +} + export function trimExecutableExtension( input: RealmResourceIdentifier, ): RealmResourceIdentifier { From 6a45c7aa18b28511010833a95524fa43910749f6 Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Mon, 18 May 2026 15:47:56 -0400 Subject: [PATCH 06/10] Host: stamp user priority on outbound _federated-search by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Companion change to PR #4863. That PR plumbed `x-boxel-job-priority` through every layer of the prerender / search stack, but only the producer-side prerender tab was stamping the header — user-facing fetches (the host SPA in a real user's browser, external API callers) sent no priority, so any sub-`prerenderModule` fired by `_federated-search` for a `lookupDefinition` cache miss queued behind concurrent background indexing at priority 0. User clicks that drive search are user-initiated work and should outrank background fan-out. Policy on the outbound `jobPriorityHeader()`: 1. Inside a prerender tab (`__boxelDuringPrerender === true`): forward `__boxelJobPriority` as-is, including 0. The render-runner sets the global from the worker job's priority; a 0 is meaningful (the originating job is system-initiated indexing) and must not be upgraded. Falls back to 0 if the global is absent. 2. Outside a prerender tab: stamp `userInitiatedPriority` (10) unless the caller has explicitly set `globalThis.__boxelJobPriority` to something else. Why gate on `__boxelDuringPrerender` rather than the presence of `__boxelJobPriority`: a prerender tab whose render-runner build predates the priority global, or a test fixture that didn't set it, should still be treated as prerender-context (default 0), not as live user traffic (default 10). Stacked on top of #4863 — that PR adds `X_BOXEL_JOB_PRIORITY_HEADER` and the `userInitiatedPriority` constant import is already wired through. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/host/app/services/store.ts | 53 ++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/packages/host/app/services/store.ts b/packages/host/app/services/store.ts index 65a4eb34774..41a51eef2c4 100644 --- a/packages/host/app/services/store.ts +++ b/packages/host/app/services/store.ts @@ -36,6 +36,7 @@ import { X_BOXEL_CONSUMING_REALM_HEADER, X_BOXEL_JOB_ID_HEADER, X_BOXEL_JOB_PRIORITY_HEADER, + userInitiatedPriority, Deferred, delay, mergeRelationships, @@ -161,17 +162,53 @@ function jobIdHeader(): Record { return j ? { [X_BOXEL_JOB_ID_HEADER]: j } : {}; } -// Companion to `jobIdHeader()`. The prerender server's render-runner -// also injects `__boxelJobPriority` onto the page before transitioning -// into the render route. Outside a prerender tab the global is -// undefined and we send no header — user / API callers leave the -// realm-server's lookup paths to fall back to priority 0 as today. +// Companion to `jobIdHeader()`. Policy is two-state, gated by +// `__boxelDuringPrerender` (the same flag `duringPrerenderHeaders` +// reads), not by the presence of `__boxelJobPriority`: +// +// 1. Inside a prerender tab: forward the worker job's priority as-is. +// The render-runner injects `__boxelJobPriority` alongside +// `__boxelJobId` on each visit — a priority of 0 is meaningful +// (the originating job is system-initiated background indexing) +// and must be preserved, not upgraded. Sub-`prerenderModule` +// calls fired by `_federated-search` for a `lookupDefinition` +// cache miss inherit this priority so they don't outrun the +// parent. If `__boxelJobPriority` is missing here (older +// render-runner build, test fixture, etc.) treat as 0 — the +// safe default for prerender-context work. +// +// 2. Outside a prerender tab (the host SPA in a real user's browser, +// or any external API caller that isn't a prerender visit): +// stamp `userInitiatedPriority` (10). User clicks driving a +// search are by definition user-initiated work and should outrank +// background indexing on the realm-server's PagePool. Without +// this, a user search whose definition lookup misses the modules +// cache would fire its sub-prerender at priority 0 and queue +// behind concurrent indexing fan-out. +// +// External API callers that need a different priority (e.g. batch +// tooling that wants explicit routing) can override +// `globalThis.__boxelJobPriority` themselves — outside a prerender +// tab the policy is still "stamp what's there, default to user +// priority." function jobPriorityHeader(): Record { + let duringPrerender = ( + globalThis as unknown as { __boxelDuringPrerender?: boolean } + ).__boxelDuringPrerender; let p = (globalThis as unknown as { __boxelJobPriority?: number }) .__boxelJobPriority; - return typeof p === 'number' && Number.isSafeInteger(p) && p >= 0 - ? { [X_BOXEL_JOB_PRIORITY_HEADER]: String(p) } - : {}; + let valid = + typeof p === 'number' && Number.isSafeInteger(p) && p >= 0 ? p : undefined; + if (duringPrerender) { + // Forward as-is, including 0. Fall back to 0 if the global is + // absent or malformed. + return { [X_BOXEL_JOB_PRIORITY_HEADER]: String(valid ?? 0) }; + } + // Outside prerender: user-initiated unless the caller overrode the + // global with their own value. + return { + [X_BOXEL_JOB_PRIORITY_HEADER]: String(valid ?? userInitiatedPriority), + }; } const queryFieldSeedFromSearchSymbol = Symbol.for( 'cardstack-query-field-seed-from-search', From 9a40c88e1ad8af810f6de07c58d7cf6160c37fe7 Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Mon, 18 May 2026 15:59:41 -0400 Subject: [PATCH 07/10] Broaden pre-warm sweep to all executables: .ts/.js can host CardDef too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drops the `hasCardExtension` helper added in the previous commit. Copilot's review caught a real case I'd missed: `packages/catalog-realm/commands/collect-submission-files.ts` defines `class CollectSubmissionFilesInput extends CardDef` — a card-defining `.ts` file, no Glimmer template. The `.gts`/`.gjs` extension filter would have skipped it on every realm-wide pre-warm sweep, and any `_federated-search` filter referencing it as a `type:` would have fired a same-affinity `prerenderModule` mid-card-render — the exact deadlock pattern this PR aims to prevent. Fix: realm-wide sweep now uses the existing `hasExecutableExtension` (`.gts` / `.gjs` / `.ts` / `.js`, excluding `.d.ts`). Non-card modules pre-warmed by the sweep persist to the modules table as empty- definitions rows (no-card markers — `definition-lookup.ts:561-563`), so subsequent `getCachedDefinitions` / `lookupDefinition` calls short-circuit at the cache without re-firing the prerenderer. That contract is pinned by the `getCachedDefinitions caches non-card modules as no-card markers` test in PR #4863. Variable renamed `allRealmCardModules` → `allRealmExecutables` and comments updated. The `hasCardExtension` helper + `cardExtensions` constant are removed entirely. Cost analysis update: ambitious-piranha goes from 9 candidates (just the `.gts` files) to 18 (9 `.gts` + 9 `.ts` helpers in `bxl/`). At ~200 ms per prerender × 18 modules with serial pre-warm = ~3.6 s additional cost on top of the from-scratch reindex. After the first sweep, the modules table caches the helper rows; subsequent reindexes only pay for modules that actually have card defs. Bounded. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/runtime-common/index-runner.ts | 72 ++++++++++++++++--------- packages/runtime-common/index.ts | 16 ------ 2 files changed, 46 insertions(+), 42 deletions(-) diff --git a/packages/runtime-common/index-runner.ts b/packages/runtime-common/index-runner.ts index 752f8bdb19b..da5e75a8985 100644 --- a/packages/runtime-common/index-runner.ts +++ b/packages/runtime-common/index-runner.ts @@ -8,7 +8,6 @@ import { Memoize } from 'typescript-memoize'; import { logger, - hasCardExtension, hasExecutableExtension, SupportedMimeType, jobIdentity, @@ -205,16 +204,26 @@ export class IndexRunner { invalidations, ); // Pre-warm the modules cache. Combines per-row deps (which catch - // most modules used during a from-scratch pass) with the realm- - // wide `.gts` / `.gjs` sweep (which catches sibling card modules - // referenced by string in templates — the typical + // most modules used during a from-scratch pass) with a realm-wide + // sweep of every executable file in the realm — `.gts` / `.gjs` + // (cards with Glimmer templates) and `.ts` / `.js` (cards without + // templates, e.g. command-input cards in catalog-realm; and pure + // helpers). The realm-wide sweep catches sibling modules referenced + // by string in card templates, the typical // `` - // pattern). The filesystem-mtimes walk was already paid by + // pattern that no instance's runtime `deps` will capture. Non-card + // modules pre-warmed by this sweep persist to the modules table as + // empty-definitions rows (no-card markers — see + // `definition-lookup.ts:561-563`), so subsequent + // `lookupDefinition` / `getCachedDefinitions` calls short-circuit + // at the cache without re-firing the prerenderer. + // + // The filesystem-mtimes walk was already paid by // discoverInvalidations above; we just filter and reuse it. - let allRealmCardModules = Object.keys( + let allRealmExecutables = Object.keys( discoverResult.filesystemMtimes, - ).filter(hasCardExtension); - await current.preWarmModulesTable(invalidations, allRealmCardModules); + ).filter(hasExecutableExtension); + await current.preWarmModulesTable(invalidations, allRealmExecutables); let resumedRows = current.batch.resumedRows; let resumedSkipped = 0; current.#onProgress?.({ @@ -365,14 +374,17 @@ export class IndexRunner { } current.#scheduleClearCacheForNextRender(); } - // Pre-warm: combine per-row deps with a realm-wide `.gts`/`.gjs` - // sweep. Incremental skips `discoverInvalidations` so the - // filesystem-mtimes walk hasn't happened yet — call it here. - // Typical realm sizes make this < 200 ms; one call per job. + // Pre-warm: combine per-row deps with a realm-wide sweep of every + // executable file in the realm. Incremental skips + // `discoverInvalidations` so the filesystem-mtimes walk hasn't + // happened yet — call it here. Typical realm sizes make this + // < 200 ms; one call per job. Non-card modules pre-warmed by this + // sweep persist to the modules table as empty-definitions rows + // (no-card markers) and short-circuit subsequent lookups. let incrementalMtimes = await current.#reader.mtimes(); - let allRealmCardModules = - Object.keys(incrementalMtimes).filter(hasCardExtension); - await current.preWarmModulesTable(invalidations, allRealmCardModules); + let allRealmExecutables = + Object.keys(incrementalMtimes).filter(hasExecutableExtension); + await current.preWarmModulesTable(invalidations, allRealmExecutables); let hrefs = urls.map((u) => u.href); let resumedRows = current.batch.resumedRows; @@ -588,23 +600,31 @@ export class IndexRunner { // module. private async preWarmModulesTable( invalidations: URL[], - allRealmCardModules: string[] = [], + allRealmExecutables: string[] = [], ): Promise { - if (invalidations.length === 0 && allRealmCardModules.length === 0) { + if (invalidations.length === 0 && allRealmExecutables.length === 0) { return; } let preWarmStart = Date.now(); - // Base layer: every `.gts` / `.gjs` file in the realm, regardless of - // whether it appears in this batch's invalidation set. Catches sibling - // card modules that are referenced by *string* in templates (e.g. + // Base layer: every executable file in the realm, regardless of + // whether it appears in this batch's invalidation set. Catches + // sibling card modules referenced by *string* in templates (e.g. // ``) - // and so do not appear in any instance's runtime `deps`. Those - // modules are exactly what `_federated-search` needs the modules- - // table cache to be warm for; without this layer the search fires a - // same-affinity `prerenderModule` mid-card-render and risks the - // self-referential prerender deadlock. - let toWarm = new Set(allRealmCardModules); + // — those don't appear in any instance's runtime `deps`. They're + // exactly what `_federated-search` needs the modules-table cache + // to be warm for; without this layer the search fires a same- + // affinity `prerenderModule` mid-card-render and risks the self- + // referential prerender deadlock. + // + // Includes `.ts` / `.js` files because card definitions can live + // there too (e.g. `catalog-realm/commands/collect-submission-files.ts` + // — `class CollectSubmissionFilesInput extends CardDef`). Non-card + // helpers in `.ts` / `.js` pre-warmed here persist as empty- + // definitions rows in the modules table (no-card markers — see + // `definition-lookup.ts:561-563`); subsequent lookups short-circuit + // at the cache without re-firing the prerenderer. + let toWarm = new Set(allRealmExecutables); let hrefs = invalidations.map((u) => u.href); let existingRows = await this.batch.getDependencyRows(hrefs); diff --git a/packages/runtime-common/index.ts b/packages/runtime-common/index.ts index 914c6c19a8c..2a0e21c7a6a 100644 --- a/packages/runtime-common/index.ts +++ b/packages/runtime-common/index.ts @@ -645,13 +645,6 @@ export * from './pr-manifest'; export * from './file-def-code-ref'; export const executableExtensions = ['.js', '.gjs', '.ts', '.gts']; -// Card / field definitions live in template-bearing modules — `.gts` -// (Glimmer + TS) or `.gjs` (Glimmer + JS) — since `CardDef` / -// `FieldDef` components need a Glimmer template surface. Plain `.ts` -// and `.js` cannot host a card definition and so are excluded from -// the realm-wide pre-warm sweep that primes the modules cache before -// the visit loop. -export const cardExtensions = ['.gts', '.gjs']; export { createResponse } from './create-response'; export * from './db-queries/db-types'; @@ -1014,15 +1007,6 @@ export function hasExecutableExtension(path: string): boolean { return false; } -export function hasCardExtension(path: string): boolean { - for (let extension of cardExtensions) { - if (path.endsWith(extension)) { - return true; - } - } - return false; -} - export function trimExecutableExtension( input: RealmResourceIdentifier, ): RealmResourceIdentifier { From 2b74fc22d19ed9397326d5a5bfd61bb098c80a1b Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Mon, 18 May 2026 16:05:06 -0400 Subject: [PATCH 08/10] Address review: tighten === true check, fix doc comment, add unit tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three issues from the Copilot review + Hassan's comment on PR #4866. 1. (Copilot) Reword doc comment to not imply external HTTP callers can affect this global. The helper only runs inside the host SPA's JS runtime. External HTTP callers — anything not in the host SPA — bypass the helper entirely and set `X-Boxel-Job-Priority` directly on their request if they care. 2. (Copilot) Tighten gating from truthy `if (duringPrerender)` to `if (duringPrerender === true)`. The global is typed as a boolean; a stray truthy string from a future code path shouldn't silently flip the policy from "user priority" to "preserve 0" — a real user-facing fetch would then queue behind background indexing. Strict-equality is the safer default. 3. (Hassan) Pin the policy with unit tests. Refactored the inline policy logic into an exported pure helper `resolveOutboundJobPriority({duringPrerender, jobPriority})` and wrote `tests/unit/job-priority-header-test.ts` covering: Outside prerender (user / API caller): - No globals → returns `userInitiatedPriority` - `__boxelDuringPrerender = false` → `userInitiatedPriority` - Explicit `__boxelJobPriority` override is honored (3 → 3, 0 → 0; 0 is NOT coerced to user priority) - Truthy but non-`true` `__boxelDuringPrerender` (`'yes'`, `1`) does NOT trigger forward-as-0 — guards against the issue above Inside prerender: - Explicit 10 → 10 - Explicit 0 → 0 (must not upgrade; preserves system-initiated) - Missing `__boxelJobPriority` → 0 (safe default for prerender- context work) - Malformed values (-1, 1.5, '10') fall through to the active branch's default `jobPriorityHeader` (the wrapper that reads the globals) is now a thin shim that calls `resolveOutboundJobPriority` with the actual globals. No behavioral change at the call sites. Note: I considered tightening `duringPrerenderHeaders()` similarly per Copilot's "and similarly in related helpers" suggestion, but left it alone — changing the existing `X-Boxel-During-Prerender` gating is broader scope than this PR and would change observable behavior at the realm-server's cache-only-definitions gate. Worth a separate cleanup if we want strict-boolean across all the related helpers. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/host/app/services/store.ts | 66 +++++--- .../tests/unit/job-priority-header-test.ts | 150 ++++++++++++++++++ 2 files changed, 194 insertions(+), 22 deletions(-) create mode 100644 packages/host/tests/unit/job-priority-header-test.ts diff --git a/packages/host/app/services/store.ts b/packages/host/app/services/store.ts index 41a51eef2c4..fc447da681a 100644 --- a/packages/host/app/services/store.ts +++ b/packages/host/app/services/store.ts @@ -177,8 +177,7 @@ function jobIdHeader(): Record { // render-runner build, test fixture, etc.) treat as 0 — the // safe default for prerender-context work. // -// 2. Outside a prerender tab (the host SPA in a real user's browser, -// or any external API caller that isn't a prerender visit): +// 2. Outside a prerender tab (the host SPA in a real user's browser): // stamp `userInitiatedPriority` (10). User clicks driving a // search are by definition user-initiated work and should outrank // background indexing on the realm-server's PagePool. Without @@ -186,28 +185,51 @@ function jobIdHeader(): Record { // cache would fire its sub-prerender at priority 0 and queue // behind concurrent indexing fan-out. // -// External API callers that need a different priority (e.g. batch -// tooling that wants explicit routing) can override -// `globalThis.__boxelJobPriority` themselves — outside a prerender -// tab the policy is still "stamp what's there, default to user -// priority." -function jobPriorityHeader(): Record { - let duringPrerender = ( - globalThis as unknown as { __boxelDuringPrerender?: boolean } - ).__boxelDuringPrerender; - let p = (globalThis as unknown as { __boxelJobPriority?: number }) - .__boxelJobPriority; +// External (non-host) HTTP callers — anything that doesn't run in +// the host SPA's JS runtime — bypass this helper entirely and set +// `X-Boxel-Job-Priority` directly on their request if they care. +// This helper covers the host SPA only. +// +// Both globals are checked with `=== true` / strict-number rather +// than truthy coercion: `__boxelDuringPrerender` is typed as a +// boolean and a stray truthy string from a future code path +// shouldn't silently flip the policy from "user-priority" to +// "preserve 0." +// Pure resolver — exported for the unit test in +// `tests/integration/job-priority-header-test.ts`. See the comment +// above for the policy rationale; the function is the literal +// translation of that policy to numbers. +export function resolveOutboundJobPriority({ + duringPrerender, + jobPriority, +}: { + duringPrerender: unknown; + jobPriority: unknown; +}): number { let valid = - typeof p === 'number' && Number.isSafeInteger(p) && p >= 0 ? p : undefined; - if (duringPrerender) { - // Forward as-is, including 0. Fall back to 0 if the global is - // absent or malformed. - return { [X_BOXEL_JOB_PRIORITY_HEADER]: String(valid ?? 0) }; - } - // Outside prerender: user-initiated unless the caller overrode the - // global with their own value. + typeof jobPriority === 'number' && + Number.isSafeInteger(jobPriority) && + jobPriority >= 0 + ? jobPriority + : undefined; + if (duringPrerender === true) { + return valid ?? 0; + } + return valid ?? userInitiatedPriority; +} + +function jobPriorityHeader(): Record { + let g = globalThis as unknown as { + __boxelDuringPrerender?: boolean; + __boxelJobPriority?: number; + }; return { - [X_BOXEL_JOB_PRIORITY_HEADER]: String(valid ?? userInitiatedPriority), + [X_BOXEL_JOB_PRIORITY_HEADER]: String( + resolveOutboundJobPriority({ + duringPrerender: g.__boxelDuringPrerender, + jobPriority: g.__boxelJobPriority, + }), + ), }; } const queryFieldSeedFromSearchSymbol = Symbol.for( diff --git a/packages/host/tests/unit/job-priority-header-test.ts b/packages/host/tests/unit/job-priority-header-test.ts new file mode 100644 index 00000000000..bc2cbd9e69d --- /dev/null +++ b/packages/host/tests/unit/job-priority-header-test.ts @@ -0,0 +1,150 @@ +import { module, test } from 'qunit'; + +import { userInitiatedPriority } from '@cardstack/runtime-common'; + +import { resolveOutboundJobPriority } from '@cardstack/host/services/store'; + +// Pure-resolver tests for the policy that decides what +// `X-Boxel-Job-Priority` value the host SPA stamps on outbound +// `_federated-search` calls. The function is module-internal logic +// extracted so its policy can be pinned without acceptance-test +// scaffolding. +// +// Two states gated by `__boxelDuringPrerender`: +// - inside prerender → forward (preserve 0) +// - outside prerender → user-initiated (10) by default +module('Unit | job-priority-header | resolveOutboundJobPriority', function () { + module('outside a prerender tab (user / API caller)', function () { + test('returns userInitiatedPriority when no global is set', function (assert) { + assert.strictEqual( + resolveOutboundJobPriority({ + duringPrerender: undefined, + jobPriority: undefined, + }), + userInitiatedPriority, + ); + }); + + test('returns userInitiatedPriority when __boxelDuringPrerender is false', function (assert) { + assert.strictEqual( + resolveOutboundJobPriority({ + duringPrerender: false, + jobPriority: undefined, + }), + userInitiatedPriority, + ); + }); + + test('honors an explicit override on __boxelJobPriority', function (assert) { + // Batch / scripting tooling running in the host SPA can set the + // global before issuing a fetch; outside a prerender tab we still + // forward what they set rather than overriding to user priority. + assert.strictEqual( + resolveOutboundJobPriority({ + duringPrerender: undefined, + jobPriority: 3, + }), + 3, + ); + assert.strictEqual( + resolveOutboundJobPriority({ + duringPrerender: false, + jobPriority: 0, + }), + 0, + 'override with 0 is preserved (not coerced to user priority)', + ); + }); + + test('rejects a truthy but non-boolean __boxelDuringPrerender — uses strict === true', function (assert) { + // If `__boxelDuringPrerender` somehow ended up as a stringy + // truthy value (e.g. set by a future code path that didn't + // coerce), the policy must NOT silently flip to "forward 0"; + // a real user-facing fetch would then queue behind background + // indexing. The check is `=== true` for exactly this reason. + assert.strictEqual( + resolveOutboundJobPriority({ + duringPrerender: 'yes', + jobPriority: undefined, + }), + userInitiatedPriority, + ); + assert.strictEqual( + resolveOutboundJobPriority({ + duringPrerender: 1, + jobPriority: undefined, + }), + userInitiatedPriority, + ); + }); + }); + + module('inside a prerender tab', function () { + test('forwards an explicit __boxelJobPriority of 10', function (assert) { + assert.strictEqual( + resolveOutboundJobPriority({ + duringPrerender: true, + jobPriority: 10, + }), + 10, + ); + }); + + test('forwards an explicit __boxelJobPriority of 0 — must NOT upgrade', function (assert) { + // System-initiated indexing has priority 0. A + // `_federated-search` fired by the card render must preserve + // that or its sub-prerenders would outrank the parent job. + assert.strictEqual( + resolveOutboundJobPriority({ + duringPrerender: true, + jobPriority: 0, + }), + 0, + ); + }); + + test('defaults to 0 when __boxelJobPriority is missing (older render-runner / test fixture)', function (assert) { + assert.strictEqual( + resolveOutboundJobPriority({ + duringPrerender: true, + jobPriority: undefined, + }), + 0, + ); + }); + + test('rejects malformed __boxelJobPriority values', function (assert) { + // Non-number / negative / non-integer values fall through to + // the default for the active branch. + assert.strictEqual( + resolveOutboundJobPriority({ + duringPrerender: true, + jobPriority: -1, + }), + 0, + ); + assert.strictEqual( + resolveOutboundJobPriority({ + duringPrerender: true, + jobPriority: 1.5, + }), + 0, + ); + assert.strictEqual( + resolveOutboundJobPriority({ + duringPrerender: true, + jobPriority: '10', + }), + 0, + ); + assert.strictEqual( + resolveOutboundJobPriority({ + duringPrerender: false, + jobPriority: -1, + }), + userInitiatedPriority, + 'malformed value outside prerender → user-initiated default', + ); + }); + }); +}); From 8bff6681c97fe40c841b532980a18cee90d6fef3 Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Mon, 18 May 2026 16:07:01 -0400 Subject: [PATCH 09/10] Restore `.gts`/`.gjs`-only pre-warm sweep with corrected rationale MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reverts the previous "broaden to all executables" commit. The case that broadening fixed — `.ts` files that host CardDef (rare; the only example in the repo today is command-input cards) — is handled correctly by the on-demand `lookupDefinition` read-through during the visit. The PagePool's new tab-materialization for module/command callers makes that on-demand path safe (the sub-prerender gets its own tab instead of queueing behind the render that triggered it), so the cost trade favors the narrow sweep: - Realm-wide pre-warm of `.gts` / `.gjs` only: ~9 prerenders × ~200 ms on a typical card-heavy realm. - Broader sweep including `.ts` / `.js` helpers: ~18+ prerenders, most of them producing no-card-marker rows for files that almost never define a card. This is an optimization on every reindex, not a correctness change. Comments updated to call out the trade-off honestly: `.ts` / `.js` CAN host card defs, the narrow filter exists because the on-demand path is now safe. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/runtime-common/index-runner.ts | 82 +++++++++++-------------- packages/runtime-common/index.ts | 22 +++++++ 2 files changed, 59 insertions(+), 45 deletions(-) diff --git a/packages/runtime-common/index-runner.ts b/packages/runtime-common/index-runner.ts index da5e75a8985..1e06d505872 100644 --- a/packages/runtime-common/index-runner.ts +++ b/packages/runtime-common/index-runner.ts @@ -8,6 +8,7 @@ import { Memoize } from 'typescript-memoize'; import { logger, + hasCardExtension, hasExecutableExtension, SupportedMimeType, jobIdentity, @@ -204,26 +205,16 @@ export class IndexRunner { invalidations, ); // Pre-warm the modules cache. Combines per-row deps (which catch - // most modules used during a from-scratch pass) with a realm-wide - // sweep of every executable file in the realm — `.gts` / `.gjs` - // (cards with Glimmer templates) and `.ts` / `.js` (cards without - // templates, e.g. command-input cards in catalog-realm; and pure - // helpers). The realm-wide sweep catches sibling modules referenced - // by string in card templates, the typical + // most modules used during a from-scratch pass) with the realm- + // wide `.gts` / `.gjs` sweep (which catches sibling card modules + // referenced by string in templates — the typical // `` - // pattern that no instance's runtime `deps` will capture. Non-card - // modules pre-warmed by this sweep persist to the modules table as - // empty-definitions rows (no-card markers — see - // `definition-lookup.ts:561-563`), so subsequent - // `lookupDefinition` / `getCachedDefinitions` calls short-circuit - // at the cache without re-firing the prerenderer. - // - // The filesystem-mtimes walk was already paid by + // pattern). The filesystem-mtimes walk was already paid by // discoverInvalidations above; we just filter and reuse it. - let allRealmExecutables = Object.keys( + let allRealmCardModules = Object.keys( discoverResult.filesystemMtimes, - ).filter(hasExecutableExtension); - await current.preWarmModulesTable(invalidations, allRealmExecutables); + ).filter(hasCardExtension); + await current.preWarmModulesTable(invalidations, allRealmCardModules); let resumedRows = current.batch.resumedRows; let resumedSkipped = 0; current.#onProgress?.({ @@ -374,17 +365,14 @@ export class IndexRunner { } current.#scheduleClearCacheForNextRender(); } - // Pre-warm: combine per-row deps with a realm-wide sweep of every - // executable file in the realm. Incremental skips - // `discoverInvalidations` so the filesystem-mtimes walk hasn't - // happened yet — call it here. Typical realm sizes make this - // < 200 ms; one call per job. Non-card modules pre-warmed by this - // sweep persist to the modules table as empty-definitions rows - // (no-card markers) and short-circuit subsequent lookups. + // Pre-warm: combine per-row deps with a realm-wide `.gts`/`.gjs` + // sweep. Incremental skips `discoverInvalidations` so the + // filesystem-mtimes walk hasn't happened yet — call it here. + // Typical realm sizes make this < 200 ms; one call per job. let incrementalMtimes = await current.#reader.mtimes(); - let allRealmExecutables = - Object.keys(incrementalMtimes).filter(hasExecutableExtension); - await current.preWarmModulesTable(invalidations, allRealmExecutables); + let allRealmCardModules = + Object.keys(incrementalMtimes).filter(hasCardExtension); + await current.preWarmModulesTable(invalidations, allRealmCardModules); let hrefs = urls.map((u) => u.href); let resumedRows = current.batch.resumedRows; @@ -600,31 +588,35 @@ export class IndexRunner { // module. private async preWarmModulesTable( invalidations: URL[], - allRealmExecutables: string[] = [], + allRealmCardModules: string[] = [], ): Promise { - if (invalidations.length === 0 && allRealmExecutables.length === 0) { + if (invalidations.length === 0 && allRealmCardModules.length === 0) { return; } let preWarmStart = Date.now(); - // Base layer: every executable file in the realm, regardless of - // whether it appears in this batch's invalidation set. Catches - // sibling card modules referenced by *string* in templates (e.g. + // Base layer: every `.gts` / `.gjs` file in the realm, regardless of + // whether it appears in this batch's invalidation set. Catches sibling + // card modules referenced by *string* in templates (e.g. // ``) - // — those don't appear in any instance's runtime `deps`. They're - // exactly what `_federated-search` needs the modules-table cache - // to be warm for; without this layer the search fires a same- - // affinity `prerenderModule` mid-card-render and risks the self- - // referential prerender deadlock. + // — those don't appear in any instance's runtime `deps`. Without + // this layer the search fires a same-affinity `prerenderModule` + // mid-card-render at lookup time, which is the wait-shape the + // PagePool's tab-materialization for module/command callers is + // meant to relieve. // - // Includes `.ts` / `.js` files because card definitions can live - // there too (e.g. `catalog-realm/commands/collect-submission-files.ts` - // — `class CollectSubmissionFilesInput extends CardDef`). Non-card - // helpers in `.ts` / `.js` pre-warmed here persist as empty- - // definitions rows in the modules table (no-card markers — see - // `definition-lookup.ts:561-563`); subsequent lookups short-circuit - // at the cache without re-firing the prerenderer. - let toWarm = new Set(allRealmExecutables); + // `.gts` / `.gjs` only is an optimization, not a correctness gate: + // `.ts` / `.js` files CAN host `CardDef` (e.g. command-input + // cards). If pre-warm misses such a module, the on-demand + // `lookupDefinition` read-through during the visit fires a + // `prerenderModule` for it — safe because the PagePool now + // materializes a tab for the sub-prerender instead of queueing it + // behind the render that triggered the lookup. Restricting the + // sweep to the extensions where cards live almost exclusively + // avoids paying the prerender cost on every reindex for files that + // rarely define a card (typical realms have many helper `.ts` + // files alongside their cards). + let toWarm = new Set(allRealmCardModules); let hrefs = invalidations.map((u) => u.href); let existingRows = await this.batch.getDependencyRows(hrefs); diff --git a/packages/runtime-common/index.ts b/packages/runtime-common/index.ts index 2a0e21c7a6a..11ff54faa2f 100644 --- a/packages/runtime-common/index.ts +++ b/packages/runtime-common/index.ts @@ -645,6 +645,19 @@ export * from './pr-manifest'; export * from './file-def-code-ref'; export const executableExtensions = ['.js', '.gjs', '.ts', '.gts']; +// Extensions covered by the realm-wide pre-warm sweep that primes the +// modules cache before the visit loop. This is an optimization, not a +// correctness gate: a `.ts` / `.js` file CAN host a `CardDef` +// (e.g. command-input cards), and if pre-warm misses one the on-demand +// `lookupDefinition` cache read-through fires a `prerenderModule` for +// it during the visit. The PagePool's tab-materialization for +// module/command callers makes that on-demand path safe (the sub- +// prerender gets its own tab instead of queueing behind the render +// that triggered it). Restricting the sweep to `.gts` / `.gjs` — where +// cards live almost exclusively in practice — avoids paying the +// prerender cost on every index for a file type that rarely contains +// card definitions. +export const cardExtensions = ['.gts', '.gjs']; export { createResponse } from './create-response'; export * from './db-queries/db-types'; @@ -1007,6 +1020,15 @@ export function hasExecutableExtension(path: string): boolean { return false; } +export function hasCardExtension(path: string): boolean { + for (let extension of cardExtensions) { + if (path.endsWith(extension)) { + return true; + } + } + return false; +} + export function trimExecutableExtension( input: RealmResourceIdentifier, ): RealmResourceIdentifier { From db7c29906d64ebce2f262c66b747a60853c1ef7a Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Mon, 18 May 2026 17:33:10 -0400 Subject: [PATCH 10/10] Add X-Boxel-Job-Priority to CORS allow-headers The new x-boxel-job-priority request header needs to be listed in Access-Control-Allow-Headers so the browser's CORS preflight passes when prerender contexts on origin :4200 fetch :4201's _federated-search endpoint. Without this, the OPTIONS preflight succeeds (no header restriction) but the follow-up QUERY is blocked by Chrome with "Request header field x-boxel-job-priority is not allowed by Access-Control-Allow-Headers in preflight response", so every search fired by host code in a prerendering context silently fails. The priority routing feature this PR introduces depends on those searches landing. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/realm-server/server.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/realm-server/server.ts b/packages/realm-server/server.ts index 6f42e0a4307..8010ced33d6 100644 --- a/packages/realm-server/server.ts +++ b/packages/realm-server/server.ts @@ -395,7 +395,7 @@ export class RealmServer { cors({ origin: '*', allowHeaders: - 'Authorization, Content-Type, If-Match, If-None-Match, X-Requested-With, X-Boxel-Client-Request-Id, X-Boxel-Assume-User, X-HTTP-Method-Override, X-Boxel-Disable-Module-Cache, X-Filename, X-Boxel-During-Prerender, X-Boxel-Consuming-Realm, X-Boxel-Job-Id, X-Grafana-Device-Id', + 'Authorization, Content-Type, If-Match, If-None-Match, X-Requested-With, X-Boxel-Client-Request-Id, X-Boxel-Assume-User, X-HTTP-Method-Override, X-Boxel-Disable-Module-Cache, X-Filename, X-Boxel-During-Prerender, X-Boxel-Consuming-Realm, X-Boxel-Job-Id, X-Boxel-Job-Priority, X-Grafana-Device-Id', allowMethods: 'GET,HEAD,PUT,POST,DELETE,PATCH,OPTIONS,QUERY', // Cache the preflight response for 24 h. Without this @koa/cors // omits Access-Control-Max-Age and Chrome falls back to its