Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 163 additions & 2 deletions packages/realm-server/tests/module-cache-race-test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@ import {
SupportedMimeType,
param,
query,
userInitiatedPriority,
} from '@cardstack/runtime-common';
import type { PgAdapter } from '@cardstack/postgres';
import { ModuleCacheCoordinator } from '../lib/module-cache-coordination';
import { RealmFileChangesListener } from '../lib/realm-file-changes-listener';
import {
setupPermissionedRealmCached,
setupDB,
Expand Down Expand Up @@ -1253,6 +1255,20 @@ module(basename(__filename), function () {
}
}

// `#dropAllTranspiledModuleCacheEntries` fires the L2 bulk DELETE as
// a fire-and-forget — the .then chain doesn't await it. Poll briefly
// so the assertion isn't racing the UPDATE landing on slower CI
// machines.
async function waitForZeroLiveRows(timeoutMs = 5000): Promise<number> {
let started = Date.now();
let n = await countLiveRowsForRealm();
while (n > 0 && Date.now() - started <= timeoutMs) {
await new Promise((resolve) => setTimeout(resolve, 50));
n = await countLiveRowsForRealm();
}
return n;
}

test('reindex tombstones live L2 rows for the realm', async function (assert) {
await seedL2Row('reindex-happy.gts');
assert.ok(
Expand All @@ -1263,7 +1279,7 @@ module(basename(__filename), function () {
await testRealm.reindex();

assert.strictEqual(
await countLiveRowsForRealm(),
await waitForZeroLiveRows(),
0,
'reindex tombstoned every live L2 row for the realm',
);
Expand Down Expand Up @@ -1293,11 +1309,156 @@ module(basename(__filename), function () {
}

assert.strictEqual(
await countLiveRowsForRealm(),
await waitForZeroLiveRows(),
0,
'bulk L2 tombstone ran even though clearRealmDefinitions threw',
);
});
},
);

// CS-11182 follow-up: the original fix only fired the L2 bulk
// tombstone from `Realm.startReindex`'s post-completion `.then`, which
// only covers `POST <realm>/_full-reindex` / `POST <realm>/_reindex`.
// Production reindexes triggered via the operator-action endpoints
// (`/_grafana-reindex`, `/_grafana-full-reindex`, `/_post-deployment`)
// and the publish-realm flow (`Realm.fullIndex`) all bypass
// `startReindex` and so left the L2 row live with pre-reindex bytes.
// The wider fix emits `notifyAllFileChanges(dbAdapter, realmURL)` from
// the worker side of the `from-scratch-index` task — every replica's
// `realm_file_changes` wildcard listener then drops L1 and fires the
// L2 bulk tombstone. This test exercises the bypass path
// (`realmIndexUpdater.fullIndex`, which never wires up the
// `startReindex` callback) and pins the new cross-replica behavior.
module(
'Worker-side notify covers reindexes that bypass Realm.startReindex (CS-11182)',
function (hooks) {
let realmURL = new URL('http://127.0.0.1:4444/test/');
let testRealm: Realm;
let request: RealmRequest;
let dbAdapter: PgAdapter;
let listener: RealmFileChangesListener | undefined;

function onRealmSetup(args: {
testRealm: Realm;
testRealmHttpServer: Server;
request: SuperTest<Test>;
dbAdapter: PgAdapter;
}) {
testRealm = args.testRealm;
request = withRealmPath(args.request, realmURL);
dbAdapter = args.dbAdapter;
}

setupPermissionedRealmCached(hooks, {
fixture: 'blank',
realmURL,
permissions: {
'*': ['read', 'write'],
user: ['read', 'write', 'realm-owner'],
'@node-test_realm:localhost': ['read', 'realm-owner'],
},
onRealmSetup,
});

hooks.beforeEach(async function () {
// Production wires `RealmFileChangesListener` up in `main.ts`; the
// permissioned-realm test fixture doesn't, so set up the equivalent
// here. Without it, the worker's NOTIFY would fire into the void
// and no replica would receive the wildcard wipe — the test
// would erroneously pass on the listener side regardless of the
// worker-side emit.
listener = new RealmFileChangesListener({
dbAdapter,
lookupMountedRealm: (url) =>
url === realmURL.href ? testRealm : undefined,
});
await listener.start();
});

hooks.afterEach(async function () {
await listener?.shutDown();
listener = undefined;
});

const reindexSource = `
import { contains, field, CardDef, Component } from "https://cardstack.com/base/card-api";
import StringField from "https://cardstack.com/base/string";

export class WorkerNotifyCard extends CardDef {
@field name = contains(StringField);
static isolated = class Isolated extends Component<typeof this> {
<template>
<div data-test-worker-notify><@fields.name/></div>
</template>
}
}
`;

function authHeader() {
return `Bearer ${createJWT(testRealm, 'user', ['read', 'write'])}`;
}

async function countLiveRowsForRealm(): Promise<number> {
let rows = (await query(dbAdapter, [
'SELECT COUNT(*)::int AS n FROM module_transpile_cache WHERE realm_url =',
param(realmURL.href),
'AND body IS NOT NULL',
])) as { n: number }[];
return rows[0]?.n ?? 0;
}

async function seedL2Row(modulePath: string): Promise<void> {
await testRealm.write(modulePath, reindexSource);
let response = await request
.get(`/${modulePath}`)
.set('Accept', SupportedMimeType.All)
.set('Authorization', authHeader());
if (response.status !== 200) {
throw new Error(
`seedL2Row: expected 200 for /${modulePath}, got ${response.status}`,
);
}
}

async function waitForZeroLiveRows(timeoutMs = 5000): Promise<number> {
// The worker emits NOTIFY synchronously after batch.done(); the
// listener's clearLocalSourceCaches fires-and-forgets the L2 bulk
// tombstone. Both legs settle quickly but neither is on the
// job.done critical path. Poll briefly so the assertion isn't
// racing the tombstone landing.
let started = Date.now();
let n = await countLiveRowsForRealm();
while (n > 0 && Date.now() - started <= timeoutMs) {
await new Promise((resolve) => setTimeout(resolve, 50));
n = await countLiveRowsForRealm();
}
return n;
}

test('realmIndexUpdater.fullIndex (no startReindex .then wired up) still tombstones L2 rows via the worker-side NOTIFY', async function (assert) {
await seedL2Row('worker-notify.gts');
assert.ok(
(await countLiveRowsForRealm()) >= 1,
'precondition: at least one live L2 row before reindex',
);

// Bypass `Realm.startReindex` (which DOES wire up the cache-drop
// .then per the original CS-11182 fix) and go straight through
// `RealmIndexUpdater.fullIndex`. This mirrors the production
// bypass paths (`handle-reindex.ts:reindex`, the `full-reindex`
// queue task, `Realm.fullIndex`) — none of them touch the
// `startReindex` chain. With only the original fix in place this
// assertion would fail; the worker-side `notifyAllFileChanges`
// is what makes it pass.
await testRealm.realmIndexUpdater.fullIndex(userInitiatedPriority);

assert.strictEqual(
await waitForZeroLiveRows(),
0,
'L2 rows tombstoned by the worker-side NOTIFY even though startReindex never ran',
);
});
},
);
});
15 changes: 15 additions & 0 deletions packages/runtime-common/tasks/indexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import type * as JSONTypes from 'json-typescript';
import type { Task, WorkerArgs } from './index';
import {
jobIdentity,
notifyAllFileChanges,
userIdFromUsername,
fetchUserPermissions,
type RealmPermissions,
Expand Down Expand Up @@ -365,6 +366,20 @@ const fromScratchIndex: Task<FromScratchArgs, FromScratchResult> = ({
args.realmURL
}:\n${JSON.stringify(stats, null, 2)}`,
);
// CS-11182: emit the cross-replica `<realmURL>:*` wildcard so every
// mounted Realm drops its in-memory `#sourceCache` / `#transpiledModuleCache`
// and fires the L2 `module_transpile_cache` bulk tombstone for this
// realm. This is the single chokepoint that every from-scratch
// reindex flows through — startReindex's post-completion `.then`
// (the original fix) only covered POST /_full-reindex and
// POST /_reindex; the Grafana `/_grafana-reindex`,
// `/_grafana-full-reindex`, `/_post-deployment`, publish-realm
// `Realm.fullIndex`, and direct `enqueueReindexRealmJob` paths all
// bypassed it, leaving stale L1+L2 even after a successful reindex.
// Doing it here covers them all uniformly. Best-effort: failures
// fall back to a bounded staleness window because the next
// reader's transpile path re-tombstones the L2 row.
await notifyAllFileChanges(dbAdapter, args.realmURL);
reportStatus(args.jobInfo, 'finish');
return {
invalidations,
Expand Down
Loading