From ed798911207953c6f51d57a4af2098c886f7a623 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Tue, 17 Mar 2026 14:08:10 +0000 Subject: [PATCH 01/12] feat: add persisted sync metadata RFC Document the transactional metadata model for persisted collections, including row and collection metadata, query retention, and Electric resume state. Made-with: Cursor --- RFC-persisted-sync-metadata.md | 898 +++++++++++++++++++++++++++++++++ 1 file changed, 898 insertions(+) create mode 100644 RFC-persisted-sync-metadata.md diff --git a/RFC-persisted-sync-metadata.md b/RFC-persisted-sync-metadata.md new file mode 100644 index 000000000..ee0a336cd --- /dev/null +++ b/RFC-persisted-sync-metadata.md @@ -0,0 +1,898 @@ +# RFC: Transactional Persisted Sync Metadata + +## Status + +Draft + +## Summary + +This RFC proposes a transactional metadata API that sync implementations can +optionally use to persist and restore metadata alongside synced collection data. + +The design supports two metadata scopes: + +- **Row metadata**: metadata attached to a specific synced row +- **Collection metadata**: metadata attached to the collection as a whole + +The API is designed so that metadata changes can be committed atomically with +persisted row changes. This is required for correctness in two cases that are +already visible in the codebase: + +- `query-db-collection` needs persisted ownership and GC state so warm-starts do + not incorrectly delete or leak rows +- `electric-db-collection` needs persisted resume state and related metadata so + it can safely warm-start from persisted data and continue streaming + +This RFC is intentionally ordered around the consumer-facing API first, then the +SQLite implementation, then how query collections use it, and finally how +Electric collections use it. + +## Problem + +Today, persisted SQLite rows and sync-layer runtime metadata live on different +planes: + +- persisted collections store row values durably +- sync implementations keep important state in memory only + +That leads to restart gaps: + +- query collections lose row ownership state and cannot safely decide whether a + row should be deleted when the first query result arrives after restart +- Electric collections do not have a durable, transactional place to store + stream resume state such as offsets or handles + +The central requirement is not merely "persist metadata", but: + +1. collections must be able to **read persisted metadata on startup** +2. collections must be able to **update metadata as part of normal sync work** +3. persisted metadata that affects row existence must be **transactional with + row persistence** + +Non-transactional sidecar metadata is not sufficient for correctness. If row +data commits without matching metadata, or metadata commits without matching row +data, restart behavior can still be wrong. + +## Goals + +- Provide an optional metadata API to sync implementations +- Keep the API generic enough for multiple sync implementations +- Preserve crash consistency by making metadata transactional with row changes +- Support both row-local and collection-level metadata +- Support persisted GC state for query collections +- Support persisted resume state for Electric collections + +## Non-Goals + +- Define every possible metadata schema for all sync implementations +- Require metadata support for non-persisted collections +- Force all persistence adapters to implement advanced GC optimizations on day + one + +## Proposed API + +### Design principles + +The API exposed to a collection's sync implementation should be: + +- **optional**: absent for non-persisted collections +- **transaction-scoped**: metadata mutations participate in the current sync + transaction +- **scope-aware**: row metadata and collection metadata are separate +- **readable at startup**: sync implementations can restore state before or + during hydration + +### Sync API additions + +The `sync.sync()` params gain an optional `metadata` capability: + +```ts +type SyncMetadataApi = { + row: { + get: (key: TKey) => unknown | undefined + set: (key: TKey, metadata: unknown) => void + delete: (key: TKey) => void + } + collection: { + get: (key: string) => unknown | undefined + set: (key: string, value: unknown) => void + delete: (key: string) => void + list: (prefix?: string) => ReadonlyArray<{ + key: string + value: unknown + }> + } +} + +type SyncParams = { + collection: Collection + begin: (options?: { immediate?: boolean }) => void + write: (message: ChangeMessageOrDeleteKeyMessage) => void + commit: () => void + markReady: () => void + truncate: () => void + metadata?: SyncMetadataApi +} +``` + +### Semantics + +`metadata` is only available when the collection is backed by a persistence +layer that supports it. + +`metadata.row.*` operates on the durable metadata associated with synced rows in +the current collection. + +`metadata.collection.*` operates on durable collection-scoped metadata entries. +These entries are not attached to a single row, but they still participate in +the current sync transaction. + +### Transaction model + +Metadata operations are only valid while a sync transaction is open, that is, +between `begin()` and `commit()`. + +This RFC explicitly requires support for four kinds of committed sync +transactions: + +- row mutations only +- row mutations plus metadata mutations +- collection metadata mutations only +- row metadata mutations only + +If `metadata.row.set`, `metadata.row.delete`, `metadata.collection.set`, or +`metadata.collection.delete` is called outside an open transaction, the +implementation should throw, just as `write()` does today when called without a +pending sync transaction. + +### Read-your-own-writes + +Reads performed through `metadata.row.get`, `metadata.collection.get`, and +`metadata.collection.list` inside an open transaction must reflect any staged +writes from that same transaction. + +This is required so sync implementations can safely merge metadata within a +transaction without having to mirror staged state themselves. + +The write semantics are: + +- `row.set` updates the metadata that will be committed for that row +- `row.delete` removes persisted row metadata for that row +- `collection.set` stages a collection metadata update in the current sync + transaction +- `collection.delete` stages a collection metadata delete in the current sync + transaction + +The read semantics are: + +- `row.get` returns the currently hydrated metadata for a row, if known +- `collection.get` and `collection.list` return the persisted collection + metadata that was loaded during startup or hydration + +### Relationship to `write({ metadata })` + +The existing `write({ type, value, metadata })` path and `metadata.row.*` must +target the same underlying row metadata store. + +They serve different purposes: + +- `write({ ..., metadata })` attaches metadata to a row mutation +- `metadata.row.set()` and `metadata.row.delete()` allow explicit metadata-only + row changes when the row value itself did not change + +Within a single transaction, implementations should treat these as staged +updates to the same row metadata slot. If both are used for the same row in the +same transaction, the effective metadata should follow transaction order +semantics, with later staged changes winning. + +### Why this shape + +This API is deliberately **not** an async sidecar KV API like +`load/store/delete`. A free-floating async store suggests independent writes at +arbitrary times. That is exactly what we want to avoid for correctness-sensitive +state. + +Instead, the API is modeled as an extension of the existing sync transaction +surface: + +- read previously persisted metadata +- stage metadata changes +- commit metadata together with row changes + +### Serialization + +Persisted metadata values are JSON-serialized using the same persisted JSON +encoding rules used elsewhere in the SQLite adapter. Metadata should therefore +be kept JSON-compatible and reasonably small. + +## SQLite Persistence Implementation + +### Overview + +The SQLite persisted collection layer implements the metadata API using two +durable stores: + +1. **row metadata** stored with persisted rows +2. **collection metadata** stored in a separate table + +Both participate in the same SQLite transaction used to apply a committed sync +transaction. + +### Schema changes + +#### Persisted rows + +Add a `metadata` column to the collection table: + +```sql +CREATE TABLE IF NOT EXISTS ( + key TEXT PRIMARY KEY NOT NULL, + value TEXT NOT NULL, + metadata TEXT, + row_version INTEGER NOT NULL +) +``` + +The tombstone table may also optionally carry the last row metadata if useful +for debugging or future recovery, but that is not required for the core design. + +#### Collection metadata + +Add a collection-level metadata table: + +```sql +CREATE TABLE IF NOT EXISTS collection_metadata ( + collection_id TEXT NOT NULL, + key TEXT NOT NULL, + value TEXT NOT NULL, + updated_at INTEGER NOT NULL, + PRIMARY KEY (collection_id, key) +) +``` + +This table stores collection-scoped metadata such as: + +- Electric resume state +- query collection placeholder GC state +- future sync-implementation-specific metadata + +### Adapter contract + +The SQLite adapter extends its persistence internals so a single committed sync +transaction can include: + +- row mutations +- row metadata mutations +- collection metadata mutations + +This requires the persisted runtime to stage metadata on the pending sync +transaction itself, not in a side buffer detached from `begin()` / `commit()`. + +One possible shape is: + +```ts +type PersistedRowMutation = + | { type: 'insert'; key: TKey; value: T; metadata?: unknown } + | { type: 'update'; key: TKey; value: T; metadata?: unknown } + | { type: 'delete'; key: TKey; value: T } + +type PersistedCollectionMetadataMutation = + | { type: 'set'; key: string; value: unknown } + | { type: 'delete'; key: string } + +type PersistedTx = { + txId: string + term: number + seq: number + rowVersion: number + mutations: Array> + collectionMetadataMutations?: Array +} +``` + +This preserves a crucial invariant: + +> if a sync transaction commits, both the row data and the metadata that explains +> that row data commit together + +### PersistenceAdapter changes + +This RFC implies an explicit adapter contract change: + +- persisted row hydration must be able to return row metadata +- persisted transaction application must be able to apply collection metadata + mutations as part of the same commit + +One possible updated hydration shape is: + +```ts +type PersistedLoadedRow = { + key: TKey + value: T + metadata?: unknown +} +``` + +Existing adapters that do not yet provide metadata can remain compatible by +returning rows with `metadata: undefined`. + +### Startup and hydration + +The persisted runtime loads: + +- row values and row metadata during normal subset hydration +- collection metadata during runtime startup + +This means metadata restoration does **not** require a separate full database +scan beyond what the collection was already going to hydrate. + +In eager mode, the initial hydrated subset carries its row metadata with it. + +In on-demand mode, metadata is restored lazily for whichever subsets are loaded. + +Collection metadata should be loaded before new sync subscriptions begin +processing, so startup GC or resume-state decisions can run against a stable +baseline. + +### Backward compatibility and migrations + +Existing persisted databases without metadata support should migrate by: + +1. adding the new `metadata` column to collection tables +2. creating `collection_metadata` +3. treating missing metadata as `undefined` + +No existing rows need rewriting during migration. + +## Query Collection Usage + +### Problem to solve + +`query-db-collection` keeps ownership state in memory: + +- `queryToRows` +- `rowToQueries` +- `queryRefCounts` + +After restart, persisted rows are restored into the base collection, but query +ownership is lost. The first query result can then incorrectly delete rows that +were hydrated from persistence but not yet claimed in memory. + +### What the query collection should persist + +The query collection should persist two categories of state: + +1. **per-row ownership metadata** +2. **per-query GC state** + +### Row metadata shape + +Ownership should be stored in row metadata, not in a global sidecar blob: + +```ts +type QueryRowMetadata = { + queryCollection?: { + owners: Record + } +} +``` + +Where the `owners` keys are hashed query identities. + +This makes persisted ownership: + +- local to the row it explains +- transactional with the row write +- reconstructible during ordinary row hydration + +This also means ownership updates can happen without inventing synthetic row +value updates. A query may stop owning a row while another query still owns it; +that is a metadata-only row change. + +### Reconstructing in-memory state + +When rows are hydrated from persistence, the query collection can rebuild: + +- `rowToQueries` from each row's persisted `owners` +- `queryToRows` by reversing that mapping + +This reconstruction is incremental. It happens for the rows being hydrated, not +by requiring a separate full read of all persisted rows. + +In on-demand mode, that means the in-memory ownership graph is only complete for +the hydrated subsets. This is sufficient for warm-start correctness of loaded +data, but not by itself sufficient for storage-level GC over entirely cold rows. + +### Query refcounts + +`queryRefCounts` should remain in-memory only. + +They represent live subscriber/process state, not durable row ownership. After +restart, refcounts should begin at zero and grow as real subscriptions attach. + +### Query lifecycle controls + +Query collections now need three distinct lifecycle controls: + +- `staleTime`: freshness of query data when re-requested +- `gcTime`: in-memory observer and TanStack Query cache retention +- `persistedGcTime`: durable placeholder and persisted-row retention + +These controls solve different problems and must remain independent. + +`staleTime` answers: + +- should this query be considered stale when requested again? + +`gcTime` answers: + +- how long should the in-memory query observer and query cache survive after the + query becomes inactive? + +`persistedGcTime` answers: + +- how long should persisted ownership placeholders and persisted rows survive + after the query becomes inactive? + +This separation is required for offline-first users who want persisted query +results to survive long periods offline even after in-memory query GC has +occurred. + +### Persisted query retention state + +Warm-start correctness also requires persisted query retention state for query +placeholders that still own rows but currently have no active subscribers. + +That state is collection-level metadata and should support both finite TTL-based +retention and indefinite retention until the query is revalidated. + +```ts +type PersistedQueryRetentionEntry = + | { + queryHash: string + mode: 'ttl' + expiresAt: number + } + | { + queryHash: string + mode: 'until-revalidated' + } +``` + +Suggested keys: + +- `queryCollection:gc:` + +The value should contain at least: + +- either `expiresAt` for finite TTL retention +- or `mode: 'until-revalidated'` for indefinite persisted retention +- optionally debug fields like `lastActiveAt` + +The `until-revalidated` mode is intended for products that want persisted query +results to remain available indefinitely while offline and only be reconciled +once the same query is requested again. + +### Query identity + +The GC entry must be tied to the same canonical identity used for row ownership. + +If the query collection needs more than the hash for debugging or future +matching, it may also persist: + +- `queryCollection:query:` -> serialized query identity + +This is collection-scoped metadata, not row metadata. + +### GC behavior + +When a query becomes idle and would normally begin its GC countdown: + +1. keep row ownership on the rows +2. persist `queryCollection:gc:` with either: + - `mode: 'ttl'` and `expiresAt`, or + - `mode: 'until-revalidated'` + +On restart: + +1. load collection metadata entries matching `queryCollection:gc:` +2. for any query placeholder with `mode: 'ttl'` and expired `expiresAt`, run + persisted cleanup +3. skip startup GC for placeholders with `mode: 'until-revalidated'` +4. remove the placeholder's ownership from rows when cleanup runs +5. delete rows that no longer have owners +6. delete the GC metadata entry when cleanup completes + +Restart GC must run before new query subscriptions are allowed to attach for the +same collection, or under the same startup mutex that serializes hydration and +replay work. This avoids races where a placeholder is cleaned up while a real +query is simultaneously reattaching. + +When a query with `mode: 'until-revalidated'` is requested again: + +1. match the placeholder using the same canonical query identity +2. reconstruct the query's persisted ownership baseline +3. run the query and diff the result against the persisted owned rows +4. remove rows that are no longer owned after revalidation +5. clear or refresh the retention entry based on the newly active query state + +This gives the desired offline behavior: + +- persisted rows remain available indefinitely +- they are not deleted just because in-memory `gcTime` elapsed +- they are eventually reconciled when the query is re-requested + +### Persisted GC implementation strategies + +There are two viable implementation levels: + +#### Level 1: simple row-metadata rewrite + +Use row metadata as the source of truth and perform cleanup by: + +- loading affected rows +- removing the owner from row metadata +- deleting rows whose owner set becomes empty + +This is simpler and consistent with the row-metadata design, but it is less +efficient for large collections. + +Level 1 also has an important limitation: if the adapter cannot efficiently +enumerate rows owned by a query, cleanup may degrade into a full collection scan +and row-metadata JSON rewrite. That is acceptable as an initial correctness +implementation, but it should be treated as a potentially expensive path. + +This cost matters even more when persisted retention is long-lived, because more +query placeholders and retained rows may accumulate over time. + +#### Level 2: normalized ownership index + +Add an adapter-level ownership table: + +```sql +CREATE TABLE query_row_ownership ( + collection_id TEXT NOT NULL, + row_key TEXT NOT NULL, + query_hash TEXT NOT NULL, + PRIMARY KEY (collection_id, row_key, query_hash) +) +``` + +This allows persisted GC to run efficiently in SQLite without scanning or +rewriting every row blob. The row metadata can remain the logical API surface, +while the adapter maintains the normalized index as an optimization. + +This RFC does not require Level 2 for the initial API, but it leaves room for +it because query GC on persisted data is a first-class requirement. + +Another acceptable future variation is to denormalize owned row keys into the GC +entry itself. This RFC does not require that initially, but it is compatible +with the collection metadata model. + +### Query API surface + +The query collection should expose persisted retention separately from +`staleTime` and `gcTime`. + +One possible shape is: + +```ts +queryCollectionOptions({ + queryKey: ['messages', spaceId, pageId], + queryFn, + staleTime: 0, + gcTime: 5 * 60_000, + persistedGcTime: Infinity, +}) +``` + +An alternative shape that leaves more room for future extension is: + +```ts +queryCollectionOptions({ + queryKey: ['messages', spaceId, pageId], + queryFn, + staleTime: 0, + gcTime: 5 * 60_000, + persistedRetention: { + gcTime: Infinity, + }, +}) +``` + +This RFC does not require the final option name, but it does require persisted +retention to be distinct from the existing in-memory `gcTime`. + +## Electric Collection Usage + +### Problem to solve + +Electric has a different persistence problem from query ownership. + +It needs durable collection-level resume state so that after restart it can: + +- warm-start from persisted rows +- safely resume streaming from the correct point + +Today, Electric can hydrate row data from persistence, but it does not have a +dedicated transactional metadata path for persisted resume state. + +### What Electric should persist + +Electric should use both metadata scopes: + +#### Collection metadata + +Use collection metadata for stream resume state, for example: + +```ts +type ElectricResumeMetadata = + | { + kind: 'resume' + offset: string + handle: string + shapeId: string + updatedAt: number + } + | { + kind: 'reset' + updatedAt: number + } +``` + +Suggested key: + +- `electric:resume` + +This metadata must be committed transactionally with the row changes that were +applied from the same Electric stream batch. + +That gives the required safety property: + +- if the row batch commits, the resume state commits +- if the row batch does not commit, the resume state does not advance either + +#### Row metadata + +Electric already attaches sync metadata to rows from stream headers. That row +metadata should flow through the same row metadata API so it can survive restart +where useful. + +This includes information like: + +- relation identity +- other per-row sync headers that are useful after hydration + +### Resume semantics + +On startup, Electric should: + +1. read `electric:resume` from collection metadata +2. prefer that persisted resume state over a default `now` fallback +3. hydrate persisted rows +4. continue streaming from the persisted resume point + +### Interaction with derived in-memory state + +Electric also maintains in-memory derived state such as: + +- tag tracking for move-out handling +- synced key tracking +- snapshot and txid matching helpers + +This RFC does not require every derived Electric structure to become durable in +the first iteration. But it does define the metadata API needed to do so where +necessary. + +The practical rule is: + +- if a piece of Electric state affects whether rows should exist after restart, + it should eventually become durable, either as row metadata or collection + metadata +- if that state cannot yet be reconstructed safely, Electric should fall back to + a conservative reload path rather than assuming warm-started data is exact + +## API Usage Examples + +### Query collection example + +```ts +sync: ({ begin, write, commit, metadata }) => { + const setRowOwners = ( + rowKey: string | number, + owners: Record, + ) => { + const current = (metadata?.row.get(rowKey) ?? {}) as Record + metadata?.row.set(rowKey, { + ...current, + queryCollection: { + owners, + }, + }) + } + + begin() + // Normal sync logic... + commit() +} +``` + +### Electric example + +```ts +sync: ({ begin, write, commit, metadata }) => { + const resumeState = metadata?.collection.get('electric:resume') as + | { offset?: string; handle?: string } + | undefined + + // use resumeState to configure the stream + + // later, when committing a batch: + begin() + write({ type: 'update', value: row, metadata: rowHeaders }) + metadata?.collection.set('electric:resume', { + offset: nextOffset, + handle: nextHandle, + updatedAt: Date.now(), + }) + commit() +} +``` + +## Design Decisions + +### Why row metadata and collection metadata both exist + +They solve different problems: + +- row metadata explains why a specific row exists and what sync state belongs to + it +- collection metadata tracks collection-wide runtime state such as resume points + and query placeholder GC entries + +Trying to store everything in one global metadata blob would force unnecessary +bootstrap work and make transactional coupling harder. + +### Why metadata is part of the sync transaction model + +The metadata API is not just a convenience wrapper. It is part of the sync +transaction model. + +That means implementations must stage row operations, row metadata mutations, +and collection metadata mutations on the same pending sync transaction and apply +them together during commit. + +### Why query GC state is collection metadata + +GC timers are properties of query placeholders, not of individual rows. They +must persist across restart, but they are not naturally attached to a specific +row. + +The ownership edges themselves belong with rows, but the expiration state belongs +with the query placeholder. + +This also allows persisted retention to express policies that are not ordinary +timers, such as `until-revalidated`. + +### Why refcounts are not persisted + +Live refcounts describe current subscribers and current process state. That +state is not durable and should not survive restart. Durable ownership and +placeholder GC state are enough to reconstruct the correct baseline. + +### Why persisted retention is separate from `gcTime` + +Products may want in-memory query state to be short-lived while persisted data +remains durable for much longer, including indefinitely until the query is +requested again. + +Keeping `persistedGcTime` separate allows: + +- normal in-memory memory pressure behavior +- long-lived offline warm starts +- explicit control over how durable query placeholders are retained + +### Metadata replay and recovery + +Cross-tab replay, targeted invalidation, and `pullSince` recovery currently +transport row keys and values, but not metadata deltas. + +The first implementation should preserve correctness before optimizing for +efficiency: + +- if a committed tx includes metadata changes that cannot be replayed exactly, + persisted runtimes may conservatively fall back to reload behavior +- targeted metadata replay can be added later as a follow-up optimization + +This allows metadata support to ship without requiring a fully optimized replay +protocol on day one. + +### Namespacing convention + +Sync implementations that write collection metadata must namespace their keys. + +The convention is: + +- `:` + +Examples: + +- `queryCollection:gc:` +- `queryCollection:query:` +- `electric:resume` + +This RFC does not require a registry mechanism initially, but namespaced keys +are mandatory to avoid collisions. + +## Rollout Plan + +### Phase 1 + +- add optional metadata API to sync params +- stage metadata writes on pending sync transactions +- support metadata-only committed sync transactions +- add SQLite support for row metadata and collection metadata +- hydrate row metadata alongside persisted rows + +### Phase 2 + +- use row metadata in query collections for durable ownership +- persist query placeholder retention state in collection metadata +- implement restart-safe GC behavior +- use conservative reload fallback for metadata-bearing replay/recovery paths +- support separate persisted retention policy for query collections + +### Phase 3 + +- use collection metadata in Electric for persisted resume state +- evaluate which additional Electric-derived state must become durable for exact + restart behavior + +## Open Questions + +1. Should the initial SQLite implementation store query ownership only inside row + metadata blobs, or also maintain a normalized ownership index from the start? + +2. Should collection metadata be exposed to sync implementations only at startup + and during transactions, or also via a read-only utility surface outside + `sync.sync()`? + +3. Should persisted query GC cleanup run only on startup and local unload paths, + or also as part of a background maintenance task in persisted runtimes? + +4. Should Electric persist only a resume offset, or also a stronger stream + identity payload including shape/handle information to detect incompatible + resume state? + +## Testing Invariants + +Any implementation of this RFC should add tests for at least these invariants: + +- metadata commits iff the corresponding sync transaction commits +- row hydration restores row metadata together with row values +- query collection warm-start does not delete persisted rows before ownership is + reconstructed +- persisted query GC deletes rows only when ownership is truly orphaned +- metadata-only sync transactions persist correctly +- truncate clears row metadata and any collection metadata that is defined as + reset-scoped +- Electric resume metadata advances only when the corresponding batch commits +- metadata-bearing replay and recovery paths remain correct, even when they fall + back to reload behavior + +## Recommendation + +Adopt a transactional metadata API with two scopes: + +- row metadata for per-row durable sync state +- collection metadata for durable collection-wide state + +Implement both in the SQLite persisted collection layer, then migrate: + +- `query-db-collection` to durable row ownership plus collection-level GC state +- `electric-db-collection` to transactional persisted resume metadata + +This keeps the API generic while preserving the key correctness property: + +> metadata that affects persisted row behavior commits together with the row +> state it explains From 7a31dfc630ab8545a784997433c7d67d7bc2ea6f Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Tue, 17 Mar 2026 14:08:17 +0000 Subject: [PATCH 02/12] feat: add persisted sync metadata implementation plan Break the persisted sync metadata RFC into phased implementation docs covering the core API, SQLite integration, query collection, Electric collection, and required invariants tests. Made-with: Cursor --- persisted-sync-metadata-plan/01-core-api.md | 160 +++++++++++++ .../02-sqlite-implementation.md | 162 +++++++++++++ .../03-query-collection.md | 216 ++++++++++++++++++ .../04-electric-collection.md | 166 ++++++++++++++ persisted-sync-metadata-plan/05-test-plan.md | 195 ++++++++++++++++ persisted-sync-metadata-plan/README.md | 51 +++++ 6 files changed, 950 insertions(+) create mode 100644 persisted-sync-metadata-plan/01-core-api.md create mode 100644 persisted-sync-metadata-plan/02-sqlite-implementation.md create mode 100644 persisted-sync-metadata-plan/03-query-collection.md create mode 100644 persisted-sync-metadata-plan/04-electric-collection.md create mode 100644 persisted-sync-metadata-plan/05-test-plan.md create mode 100644 persisted-sync-metadata-plan/README.md diff --git a/persisted-sync-metadata-plan/01-core-api.md b/persisted-sync-metadata-plan/01-core-api.md new file mode 100644 index 000000000..0512418cc --- /dev/null +++ b/persisted-sync-metadata-plan/01-core-api.md @@ -0,0 +1,160 @@ +# Phase 1: Core API + +## Objective + +Add a transactional sync metadata API to `@tanstack/db` that supports: + +- row metadata +- collection metadata +- metadata-only committed sync transactions +- read-your-own-writes semantics inside a sync transaction + +This phase should not require query collection or Electric changes to ship. It +is the core primitive they will later consume. + +## Primary code areas + +- `packages/db/src/types.ts` +- `packages/db/src/collection/sync.ts` +- `packages/db/src/collection/state.ts` +- `packages/db/tests/collection.test.ts` +- any new core tests needed for metadata transaction behavior + +## Proposed implementation steps + +### 1. Extend sync types + +Update the sync params type to include: + +- `metadata.row.get` +- `metadata.row.set` +- `metadata.row.delete` +- `metadata.collection.get` +- `metadata.collection.set` +- `metadata.collection.delete` +- `metadata.collection.list` + +Key requirements: + +- metadata API is optional +- metadata calls outside an active sync transaction throw +- reads inside an active transaction must reflect staged metadata writes + +### 2. Extend pending sync transaction state + +Update the internal pending synced transaction shape so it can stage: + +- row operations +- row metadata writes +- collection metadata writes +- truncate/reset state + +Suggested internal shape: + +```ts +type PendingMetadataWrite = + | { type: 'set'; value: unknown } + | { type: 'delete' } + +type PendingSyncedTransaction = { + committed: boolean + operations: Array> + deletedKeys: Set + rowMetadataWrites: Map + collectionMetadataWrites: Map + truncate?: boolean + immediate?: boolean +} +``` + +Exact naming is flexible, but the staged metadata writes must be co-located with +the existing pending sync transaction. + +### 3. Add in-memory collection metadata state + +Add a new in-memory store in `CollectionStateManager` for collection-scoped +synced metadata. + +Suggested field: + +```ts +public syncedCollectionMetadata = new Map() +``` + +This should behave like `syncedMetadata`, but keyed by metadata key rather than +row key. + +### 4. Define merge and overwrite semantics + +Document and implement these rules: + +- `write({ metadata })` and `metadata.row.set()` target the same underlying row + metadata state +- later staged writes win within a transaction +- `insert` metadata replaces row metadata +- `update` metadata merges with the existing row metadata, following current + `syncedMetadata` behavior +- `delete` removes row metadata +- `metadata.row.set()` replaces the full row metadata blob +- `metadata.row.delete()` removes row metadata +- `metadata.collection.set()` replaces the full collection metadata value for + that key +- `metadata.collection.delete()` removes the value + +### 5. Support metadata-only transactions + +Ensure `commitPendingTransactions()` can commit a transaction with: + +- zero row operations and non-zero metadata changes +- row metadata changes only +- collection metadata changes only + +This is a hard requirement for later Electric resume persistence and query +retention persistence. + +### 6. Define truncate behavior + +Core truncate semantics must be explicit: + +- clear `syncedData` +- clear `syncedMetadata` +- clear any row-scoped staged metadata +- leave collection metadata alone unless a higher layer explicitly resets it + +The core layer should not silently delete collection metadata on truncate. +Per-sync reset behavior can be layered on later. + +## Edge cases to handle + +- `metadata.row.set()` called before `begin()` +- `metadata.collection.set()` called after `commit()` +- `metadata.row.get()` after a staged `row.set()` in the same transaction +- `metadata.collection.list(prefix)` after multiple staged collection writes +- mixing `write({ metadata })` and `metadata.row.set()` for the same key in the + same transaction +- truncate followed by new staged row metadata in the same transaction +- empty transaction commit with only metadata writes + +## Acceptance criteria + +- core sync API can stage and commit row metadata +- core sync API can stage and commit collection metadata +- metadata reads inside a transaction see staged writes +- metadata-only commits work +- existing collection behavior without metadata remains unchanged + +## Suggested tests + +- commit row metadata through `write({ metadata })` +- commit row metadata through `metadata.row.set()` +- commit collection metadata through `metadata.collection.set()` +- verify read-your-own-writes inside a transaction +- verify last-write-wins for staged row metadata +- verify metadata calls outside a transaction throw +- verify truncate clears row metadata but not collection metadata +- verify metadata-only transactions commit successfully + +## Exit criteria + +Phase 1 is complete when the core collection layer can represent, stage, commit, +and read metadata correctly in memory, independent of any persistence adapter. diff --git a/persisted-sync-metadata-plan/02-sqlite-implementation.md b/persisted-sync-metadata-plan/02-sqlite-implementation.md new file mode 100644 index 000000000..3594d3778 --- /dev/null +++ b/persisted-sync-metadata-plan/02-sqlite-implementation.md @@ -0,0 +1,162 @@ +# Phase 2: SQLite Implementation + +## Objective + +Make `db-sqlite-persisted-collection-core` the reference implementation of the +metadata API by persisting: + +- row metadata with row values +- collection metadata in a dedicated table +- row and metadata changes in the same SQLite transaction + +## Primary code areas + +- `packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts` +- `packages/db-sqlite-persisted-collection-core/src/persisted.ts` +- `packages/db-sqlite-persisted-collection-core/tests/persisted.test.ts` +- `packages/db-sqlite-persisted-collection-core/tests/sqlite-core-adapter.test.ts` +- restart/runtime persistence contract tests + +## Proposed implementation steps + +### 1. Extend SQLite schema + +Add: + +- `metadata TEXT` column to persisted collection row tables +- `collection_metadata` table for collection-scoped metadata + +Suggested shape: + +```sql +CREATE TABLE IF NOT EXISTS collection_metadata ( + collection_id TEXT NOT NULL, + key TEXT NOT NULL, + value TEXT NOT NULL, + updated_at INTEGER NOT NULL, + PRIMARY KEY (collection_id, key) +) +``` + +### 2. Extend persisted row hydration + +Update the adapter hydration path to return: + +```ts +type PersistedLoadedRow = { + key: TKey + value: T + metadata?: unknown +} +``` + +The persisted runtime must pass hydrated metadata into the collection sync +transaction, not drop it during `applyRowsToCollection()` or related paths. + +### 3. Extend persisted tx shape + +Update internal persisted tx machinery to support: + +- row value writes +- row metadata writes +- collection metadata writes + +This should be reflected in: + +- normalized sync operation shapes +- buffered sync transactions +- adapter `applyCommittedTx()` + +### 4. Make metadata transactional in SQLite + +All of these must commit in one SQLite transaction: + +- row inserts/updates/deletes +- row metadata changes +- collection metadata changes +- version/stream position updates already associated with the tx + +This is the key correctness property for the whole design. + +### 5. Load collection metadata at startup + +The persisted runtime should load collection metadata during startup, before new +sync subscriptions start processing. This is necessary for: + +- query placeholder retention decisions +- Electric resume-state restoration +- future collection-scoped metadata consumers + +### 6. Carry metadata through replay and hydration + +Metadata must not be lost in: + +- initial hydration +- buffered sync transaction application +- internal persisted transaction creation +- self/follower replay + +For the first implementation, it is acceptable to use conservative reload +fallback when a metadata-bearing committed tx cannot be replayed exactly through +targeted invalidation. + +## Important design constraints + +### Metadata-only committed txs + +The persisted layer must support transactions with: + +- no row mutations +- collection metadata changes only + +This is required for: + +- Electric resume metadata commits +- query retention metadata updates + +### Backward compatibility + +Existing adapters or persisted databases without metadata should still function +by treating row metadata as `undefined` and collection metadata as empty. + +### Serialization + +Use the same persisted JSON encoding and decoding path already used for row +values, so metadata can safely round-trip supported value types. + +## Edge cases to handle + +- hydrating rows when `metadata` column is absent in old data +- metadata-only tx commit +- delete row with row metadata present +- row update with partial row value and metadata merge semantics +- crash/restart between repeated tx applications +- replay of metadata-bearing committed txs to follower tabs +- sequence-gap recovery when metadata changed in a missed tx +- full reload fallback correctness when targeted metadata replay is unavailable + +## Acceptance criteria + +- persisted rows round-trip metadata +- collection metadata round-trips independently +- row data and metadata commit atomically +- metadata-only committed txs persist correctly +- startup loads collection metadata and hydrated row metadata +- replay/recovery remains correct, even if it uses conservative reload fallback + +## Suggested tests + +- SQLite adapter stores and loads row metadata +- SQLite adapter stores and loads collection metadata +- `applyCommittedTx()` atomically commits row and collection metadata +- metadata-only tx survives restart +- hydrated rows apply metadata into collection state +- follower runtime converges on metadata-bearing txs +- seq-gap recovery remains correct when metadata changed +- migration from old schema leaves behavior unchanged for collections with no + metadata + +## Exit criteria + +Phase 2 is complete when SQLite-backed persisted collections can durably store, +hydrate, and replay metadata with the same transactional guarantees as row data. diff --git a/persisted-sync-metadata-plan/03-query-collection.md b/persisted-sync-metadata-plan/03-query-collection.md new file mode 100644 index 000000000..9cc790d9f --- /dev/null +++ b/persisted-sync-metadata-plan/03-query-collection.md @@ -0,0 +1,216 @@ +# Phase 3: Query Collection + +## Objective + +Migrate `query-db-collection` to the new metadata primitives so it can: + +- preserve row ownership across restart +- support persisted query retention independently from in-memory `gcTime` +- support long-lived offline warm starts +- reconcile retained persisted rows when the same query is requested again + +## Primary code areas + +- `packages/query-db-collection/src/query.ts` +- `packages/query-db-collection/src/serialization.ts` +- `packages/query-db-collection/tests/query.test.ts` +- persisted runtime integration tests combining query collection and SQLite + +## High-level design + +### Persisted on rows + +Store per-row ownership in row metadata: + +```ts +type QueryRowMetadata = { + queryCollection?: { + owners: Record + } +} +``` + +### Persisted at collection scope + +Store query retention/placeholder metadata at collection scope. + +Suggested entry shape: + +```ts +type PersistedQueryRetentionEntry = + | { + queryHash: string + mode: 'ttl' + expiresAt: number + } + | { + queryHash: string + mode: 'until-revalidated' + } +``` + +Suggested keys: + +- `queryCollection:gc:` +- optionally `queryCollection:query:` for serialized query identity +- optionally `queryCollection:metaVersion` for migration/versioning + +## Proposed implementation steps + +### 1. Add persisted retention option to query collection config + +Introduce a durable retention control that is independent from: + +- `staleTime` +- in-memory `gcTime` + +Possible public API shapes: + +```ts +persistedGcTime?: number | typeof Infinity +``` + +or + +```ts +persistedRetention?: { + gcTime: number | typeof Infinity +} +``` + +The second shape is more extensible, but either is acceptable. + +### 2. Rebuild ownership from hydrated rows + +When rows are hydrated from persistence: + +- inspect row metadata for query owners +- rebuild `rowToQueries` +- rebuild `queryToRows` + +This reconstruction is incremental and subset-scoped. + +### 3. Keep refcounts in memory only + +Do not persist `queryRefCounts`. + +They represent live subscriber/process state and should restart from zero. + +### 4. Persist ownership changes transactionally + +Whenever ownership changes for a row: + +- update row metadata in the same sync transaction + +This includes metadata-only ownership changes where the row value itself is +unchanged. + +### 5. Persist query retention state + +When a query becomes inactive: + +- if persisted retention is finite, persist `mode: 'ttl'` with `expiresAt` +- if persisted retention is infinite, persist `mode: 'until-revalidated'` + +This retention entry is independent from in-memory query `gcTime`. + +### 6. Startup retention handling + +At startup: + +- load collection metadata retention entries before new subscriptions attach +- clean up expired `ttl` placeholders +- skip startup GC for `until-revalidated` placeholders + +Startup retention cleanup must run under the same mutex or startup critical +section as hydration and replay to avoid races with new query subscriptions. + +### 7. Revalidation flow for indefinite persisted retention + +When a query retained with `mode: 'until-revalidated'` is requested again: + +1. match the placeholder by canonical query identity +2. use persisted ownership as the baseline +3. run the query +4. diff server results against previously owned rows +5. remove rows that are no longer owned +6. clear or refresh the retention entry based on the new lifecycle state + +This is the key behavior required for long offline periods. + +### 8. Consider narrowing cleanup diff logic + +As an implementation improvement, consider moving away from diffing against all +rows in `collection._state.syncedData` and instead diff against: + +- the rows previously owned by the specific query + +That is a more semantically accurate baseline and reduces dependence on unrelated +persisted rows already being present in the collection. + +## Important design constraints + +### Persisted retention is not freshness + +Long-lived persisted data may be very stale. + +That is acceptable as long as: + +- re-requesting the query still follows normal query refetch behavior +- persisted retention does not imply anything about `staleTime` + +### Infinite persisted retention needs explicit eviction eventually + +If `persistedGcTime: Infinity` or `mode: 'until-revalidated'` is supported, +storage can grow without bound. This phase does not need to ship explicit +eviction APIs, but the design should leave room for: + +- evict one query placeholder +- evict all query placeholders for a collection +- evict by age or storage-pressure policy + +### Versioning matters + +If query identity hashing or serialization changes across app versions, retained +placeholders may become unreachable. + +The implementation should leave room for: + +- metadata versioning +- collection-level invalidation of incompatible retained placeholders + +## Edge cases to handle + +- multiple overlapping queries owning the same row +- query unsubscribes and resubscribes before persisted retention cleanup runs +- query retained indefinitely while another query updates shared rows +- startup with only a subset of rows hydrated in on-demand mode +- placeholder exists but the same query is never requested again +- query identity serialization changes across versions +- metadata-only ownership updates with unchanged row values +- rows retained indefinitely while offline for a long period + +## Acceptance criteria + +- restart does not incorrectly delete persisted rows before ownership is restored +- row ownership survives restart +- query retention is persisted independently from `gcTime` +- `until-revalidated` retention keeps persisted rows available indefinitely +- re-requesting a retained query reconciles the retained rows correctly + +## Suggested tests + +- warm-start with multiple disjoint queries does not drop unrelated rows +- overlapping queries preserve shared row ownership across restart +- finite persisted retention expires and cleans up orphaned rows +- indefinite persisted retention survives restart and long offline gaps +- re-requesting an indefinite retained query reconciles deleted rows correctly +- in-memory `gcTime` expiry does not remove indefinitely retained persisted rows +- on-demand hydration reconstructs ownership for loaded subsets +- metadata-only ownership updates persist correctly + +## Exit criteria + +Phase 3 is complete when query collections can warm-start safely from persisted +data, preserve ownership across restart, and independently control durable query +retention for offline-first users. diff --git a/persisted-sync-metadata-plan/04-electric-collection.md b/persisted-sync-metadata-plan/04-electric-collection.md new file mode 100644 index 000000000..13346f58a --- /dev/null +++ b/persisted-sync-metadata-plan/04-electric-collection.md @@ -0,0 +1,166 @@ +# Phase 4: Electric Collection + +## Objective + +Migrate `electric-db-collection` to use transactional collection metadata and +row metadata so it can: + +- persist durable resume state +- warm-start from persisted rows safely +- resume streaming from a persisted stream identity when valid +- leave room for future persistence of additional Electric-derived state + +## Primary code areas + +- `packages/electric-db-collection/src/electric.ts` +- `packages/electric-db-collection/tests/electric.test.ts` +- `packages/electric-db-collection/tests/electric-live-query.test.ts` +- persisted integration tests combining Electric and SQLite persistence + +## High-level design + +### Collection metadata + +Persist Electric resume state at collection scope. + +Suggested shape: + +```ts +type ElectricResumeMetadata = + | { + kind: 'resume' + offset: string + handle: string + shapeId: string + updatedAt: number + } + | { + kind: 'reset' + updatedAt: number + } +``` + +Suggested key: + +- `electric:resume` + +### Row metadata + +Persist useful per-row sync metadata through the same row metadata channel used +by `write({ metadata })`. + +Examples: + +- relation identity +- row sync headers that are useful after hydration + +## Proposed implementation steps + +### 1. Read resume metadata at startup + +On sync initialization: + +- read `electric:resume` from collection metadata +- if `kind: 'resume'`, prefer that persisted stream identity over the current + fallback behavior +- if resume metadata is absent or invalid, fall back to the existing startup + behavior + +### 2. Persist resume state transactionally + +When an Electric batch advances the durable resume point: + +- stage the new `electric:resume` metadata in the same sync transaction as the + row changes from that batch + +This prevents the invalid state where a resume token advances beyond the rows +that were actually committed. + +### 3. Support metadata-only resume updates when needed + +If Electric needs to persist a new durable resume state on a control-message +boundary without a row mutation in the same batch, use a metadata-only sync +transaction. + +This depends on Phase 1 and Phase 2 support for metadata-only commits. + +### 4. Define reset behavior + +When Electric determines the persisted resume state is invalid or a must-refetch +equivalent restart path is required: + +- clear or replace `electric:resume` with a `kind: 'reset'` marker +- perform the corresponding conservative reload path + +This makes restart behavior explicit rather than relying on stale resume state. + +### 5. Carry row metadata through hydration + +Hydrated rows from SQLite should restore the Electric row metadata that was +originally written through `write({ metadata })`. + +This provides a better baseline for future Electric restart reconstruction work. + +## Important design constraints + +### Resume metadata is not the full Electric state + +Electric also maintains derived in-memory state such as: + +- tag indexes +- synced key tracking +- snapshot and txid matching state + +This phase does not require exact restart reconstruction of every one of these. +It only requires a sound transactional place to persist the pieces that should +survive restart. + +### Be conservative when reconstruction is incomplete + +If persisted resume metadata is present but the required derived state is not +reconstructible safely, Electric should fall back to a conservative reload path +rather than assume exact restart correctness. + +### Strong stream identity matters + +Resume metadata should persist enough identity to detect incompatible resume +state, not just an offset. + +At minimum: + +- `offset` +- `handle` +- `shapeId` + +## Edge cases to handle + +- persisted resume metadata missing one required field +- resume metadata exists but shape identity no longer matches server state +- metadata-only resume update +- restart after partially applied or replayed batches +- must-refetch/reset flows clearing or replacing persisted resume state +- hydrated rows restoring row metadata while resume metadata is absent + +## Acceptance criteria + +- Electric resume state survives restart +- resume metadata only advances when the corresponding batch commits +- invalid resume metadata triggers conservative fallback +- metadata-only resume commits work +- persisted row metadata survives hydration where relevant + +## Suggested tests + +- batch commit persists rows and resume metadata atomically +- failed batch does not advance resume metadata +- restart uses persisted resume metadata when valid +- restart falls back safely when persisted resume metadata is invalid +- metadata-only resume tx survives restart +- must-refetch/reset clears or invalidates persisted resume state correctly +- row metadata written by Electric survives SQLite hydration + +## Exit criteria + +Phase 4 is complete when Electric has a durable, transactional resume-state +story that is compatible with persisted warm starts and conservative fallback +behavior. diff --git a/persisted-sync-metadata-plan/05-test-plan.md b/persisted-sync-metadata-plan/05-test-plan.md new file mode 100644 index 000000000..e4e66e93c --- /dev/null +++ b/persisted-sync-metadata-plan/05-test-plan.md @@ -0,0 +1,195 @@ +# Phase 5: Test Plan + +## Objective + +Validate the persisted sync metadata design with invariants-focused tests across: + +- core collection state +- SQLite persistence +- query collection restart and retention behavior +- Electric resume behavior + +This plan is intentionally thorough. The feature crosses multiple layers and is +easy to get "mostly working" while still breaking on restart, replay, or long +offline gaps. + +## Testing principles + +- prefer behavior/invariant tests over implementation-detail tests +- add restart tests wherever durable state is introduced +- add crash-consistency style tests wherever atomicity is claimed +- test both eager and on-demand flows where behavior differs +- test replay/recovery paths, not just happy-path startup + +## Invariants + +### Core invariants + +- metadata that is staged in a sync transaction is visible to reads in that same + transaction +- metadata is committed iff the surrounding sync transaction commits +- metadata-only transactions are valid committed sync transactions +- row metadata and collection metadata are isolated but share the same commit + boundary +- truncate clears row metadata but does not silently clear collection metadata + +### SQLite invariants + +- row values and row metadata are committed atomically +- collection metadata commits atomically with the same persisted tx +- hydrated rows restore both value and metadata +- old persisted databases without metadata remain readable + +### Query collection invariants + +- warm-start does not delete unrelated persisted rows before ownership is + reconstructed +- row ownership survives restart +- query placeholder retention survives restart +- finite persisted retention expires correctly +- indefinite persisted retention does not expire due to in-memory `gcTime` +- re-requesting an indefinitely retained query reconciles retained rows +- retained rows may be stale, but they remain available until revalidation or + explicit cleanup + +### Electric invariants + +- resume metadata advances iff the corresponding batch commits +- invalid resume metadata does not cause unsafe resume behavior +- metadata-only resume updates are persisted +- restart can use persisted resume metadata when valid + +### Replay and recovery invariants + +- follower tabs converge on metadata-bearing tx behavior +- sequence-gap recovery remains correct when metadata changed +- conservative reload fallback remains correct when targeted metadata replay is + unavailable + +## Test matrix + +### Core API tests + +Target files: + +- `packages/db/tests/collection.test.ts` +- additional focused tests if needed + +Cases: + +- `metadata.row.set()` inside a transaction +- `metadata.collection.set()` inside a transaction +- read-your-own-writes for row metadata +- read-your-own-writes for collection metadata +- metadata-only commit +- metadata calls outside a transaction throw +- `write({ metadata })` and `metadata.row.set()` on the same row in one tx +- truncate behavior with row metadata present + +### SQLite adapter and runtime tests + +Target files: + +- `packages/db-sqlite-persisted-collection-core/tests/sqlite-core-adapter.test.ts` +- `packages/db-sqlite-persisted-collection-core/tests/persisted.test.ts` +- runtime persistence contract tests + +Cases: + +- row metadata persists and hydrates +- collection metadata persists and loads +- metadata-only tx survives restart +- row delete removes row metadata +- migration from pre-metadata schema +- metadata-bearing tx replay correctness +- sequence-gap recovery with metadata changes + +### Query collection integration tests + +Target files: + +- `packages/query-db-collection/tests/query.test.ts` +- new persisted integration tests as needed + +Cases: + +- multiple disjoint queries warm-start without deleting each other's rows +- overlapping queries preserve shared ownership across restart +- persisted ownership reconstruction in eager mode +- persisted ownership reconstruction in on-demand mode for loaded subsets +- finite persisted retention expiry +- `persistedGcTime: Infinity` or equivalent indefinite retention +- in-memory `gcTime` expiry does not remove indefinitely retained persisted rows +- re-requesting an indefinitely retained query reconciles stale/deleted rows +- query identity version mismatch / incompatible retained metadata fallback + +### Electric integration tests + +Target files: + +- `packages/electric-db-collection/tests/electric.test.ts` +- `packages/electric-db-collection/tests/electric-live-query.test.ts` +- new persisted integration tests as needed + +Cases: + +- commit rows + resume metadata atomically +- failed commit does not advance resume metadata +- metadata-only resume transaction +- valid resume metadata used on restart +- invalid resume metadata triggers conservative fallback +- reset/must-refetch clears or invalidates resume metadata +- row metadata survives SQLite hydration + +## Suggested delivery cadence + +### While implementing Phase 1 + +Add: + +- core transaction semantics tests +- metadata-only transaction tests + +### While implementing Phase 2 + +Add: + +- SQLite schema and hydration tests +- adapter atomicity tests +- runtime restart tests + +### While implementing Phase 3 + +Add: + +- query ownership restart tests +- finite retention tests +- indefinite retention tests +- long-offline warm-start tests + +### While implementing Phase 4 + +Add: + +- resume metadata tests +- metadata-only resume tests +- invalid resume fallback tests + +## Failure modes the tests must catch + +- persisted rows exist but metadata is missing after restart +- metadata exists but corresponding rows were not committed +- query warm-start deletes rows it does not own +- rows retained indefinitely disappear because in-memory GC elapsed +- startup GC races with new subscriptions +- follower runtimes diverge because metadata-bearing txs were not replayed +- Electric resumes from a token that was never durably committed + +## Definition of done + +This plan is complete when: + +- each phase ships with the tests listed for that phase +- restart, replay, and retention invariants are covered +- the long-offline persisted query use case is explicitly validated +- metadata atomicity is tested, not just assumed diff --git a/persisted-sync-metadata-plan/README.md b/persisted-sync-metadata-plan/README.md new file mode 100644 index 000000000..10c99ec74 --- /dev/null +++ b/persisted-sync-metadata-plan/README.md @@ -0,0 +1,51 @@ +# Persisted Sync Metadata Plan + +This directory breaks the `RFC-persisted-sync-metadata.md` design into an +implementation plan with explicit phases. + +The recommended execution order is: + +1. `01-core-api.md` +2. `02-sqlite-implementation.md` +3. `03-query-collection.md` +4. `04-electric-collection.md` +5. `05-test-plan.md` + +## Goals + +- land the core metadata transaction model first +- make SQLite the reference persistence implementation +- migrate `query-db-collection` onto the new primitives +- migrate `electric-db-collection` onto the new primitives +- validate correctness with thorough invariants-focused tests + +## Non-Goals + +- optimizing every replay and GC path in the first pass +- implementing every possible metadata-backed feature before the core API is + stable + +## Guiding principles + +- metadata that affects persisted row behavior must commit with the row state it + explains +- row metadata and collection metadata are distinct scopes +- metadata-only sync transactions are first-class +- restart correctness comes before targeted replay optimization +- persisted query retention is separate from in-memory `gcTime` + +## Phase dependencies + +- Phase 1 is required before any other phase +- Phase 2 depends on Phase 1 +- Phase 3 depends on Phases 1 and 2 +- Phase 4 depends on Phases 1 and 2 +- Phase 5 spans all phases and should be updated continuously + +## Recommended delivery strategy + +- implement Phase 1 and Phase 2 behind a narrow internal API +- land Phase 3 next because it is the primary motivator +- land Phase 4 once the core metadata model has proven stable under restart and + replay tests +- keep `05-test-plan.md` as the definition of done for each phase From a9e43d48ff53b2f0a11bac38f0ffc792dcf3877c Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Tue, 17 Mar 2026 14:15:25 +0000 Subject: [PATCH 03/12] docs: refine persisted sync metadata design docs Tighten the RFC and phased plan around startup metadata reads, query-owned reconciliation, cold-row retention cleanup, replay fallback behavior, and Electric reset semantics. Made-with: Cursor --- RFC-persisted-sync-metadata.md | 24 ++++--- persisted-sync-metadata-plan/01-core-api.md | 46 +++++++++++-- .../02-sqlite-implementation.md | 44 +++++++++--- .../03-query-collection.md | 68 +++++++++++++++++-- .../04-electric-collection.md | 8 +++ persisted-sync-metadata-plan/05-test-plan.md | 40 ++++++++++- 6 files changed, 192 insertions(+), 38 deletions(-) diff --git a/RFC-persisted-sync-metadata.md b/RFC-persisted-sync-metadata.md index ee0a336cd..d853aab0e 100644 --- a/RFC-persisted-sync-metadata.md +++ b/RFC-persisted-sync-metadata.md @@ -334,16 +334,6 @@ Collection metadata should be loaded before new sync subscriptions begin processing, so startup GC or resume-state decisions can run against a stable baseline. -### Backward compatibility and migrations - -Existing persisted databases without metadata support should migrate by: - -1. adding the new `metadata` column to collection tables -2. creating `collection_metadata` -3. treating missing metadata as `undefined` - -No existing rows need rewriting during migration. - ## Query Collection Usage ### Problem to solve @@ -722,7 +712,17 @@ sync: ({ begin, write, commit, metadata }) => { ```ts sync: ({ begin, write, commit, metadata }) => { const resumeState = metadata?.collection.get('electric:resume') as - | { offset?: string; handle?: string } + | { + kind: 'resume' + offset: string + handle: string + shapeId: string + updatedAt: number + } + | { + kind: 'reset' + updatedAt: number + } | undefined // use resumeState to configure the stream @@ -731,8 +731,10 @@ sync: ({ begin, write, commit, metadata }) => { begin() write({ type: 'update', value: row, metadata: rowHeaders }) metadata?.collection.set('electric:resume', { + kind: 'resume', offset: nextOffset, handle: nextHandle, + shapeId: nextShapeId, updatedAt: Date.now(), }) commit() diff --git a/persisted-sync-metadata-plan/01-core-api.md b/persisted-sync-metadata-plan/01-core-api.md index 0512418cc..83f551232 100644 --- a/persisted-sync-metadata-plan/01-core-api.md +++ b/persisted-sync-metadata-plan/01-core-api.md @@ -37,7 +37,9 @@ Update the sync params type to include: Key requirements: - metadata API is optional -- metadata calls outside an active sync transaction throw +- metadata writes outside an active sync transaction throw +- startup reads through `metadata.row.get`, `metadata.collection.get`, and + `metadata.collection.list` are allowed outside a transaction - reads inside an active transaction must reflect staged metadata writes ### 2. Extend pending sync transaction state @@ -84,16 +86,19 @@ public syncedCollectionMetadata = new Map() This should behave like `syncedMetadata`, but keyed by metadata key rather than row key. -### 4. Define merge and overwrite semantics +Note: this naming sits next to the existing row-scoped `syncedMetadata`. If the +implementation keeps both names, it should add clear comments distinguishing row +metadata from collection metadata. Renaming the existing row-scoped field to +something more explicit can be considered as a follow-up cleanup. + +### 4. Define overwrite semantics Document and implement these rules: - `write({ metadata })` and `metadata.row.set()` target the same underlying row metadata state - later staged writes win within a transaction -- `insert` metadata replaces row metadata -- `update` metadata merges with the existing row metadata, following current - `syncedMetadata` behavior +- every staged row metadata write is a replace at the transaction layer - `delete` removes row metadata - `metadata.row.set()` replaces the full row metadata blob - `metadata.row.delete()` removes row metadata @@ -101,6 +106,15 @@ Document and implement these rules: that key - `metadata.collection.delete()` removes the value +If callers need merge behavior, they should: + +1. read the current metadata value +2. compute the merged result +3. stage the merged result explicitly + +This avoids contradictory rules when `write({ metadata })` and +`metadata.row.set()` are both used for the same row in one transaction. + ### 5. Support metadata-only transactions Ensure `commitPendingTransactions()` can commit a transaction with: @@ -124,14 +138,32 @@ Core truncate semantics must be explicit: The core layer should not silently delete collection metadata on truncate. Per-sync reset behavior can be layered on later. +### 7. Define row-delete semantics + +Deleting a row through sync also deletes its row metadata. + +This should hold regardless of whether row metadata had previously been staged +through `write({ metadata })` or `metadata.row.set()`. + +### 8. Scope metadata to sync paths + +This metadata API is sync-only. + +It is not intended to flow through user mutation transport types such as +`PersistedMutationEnvelope`. User mutations may still observe `syncMetadata` +coming from already-synced rows, but they do not independently persist metadata +through this API. + ## Edge cases to handle - `metadata.row.set()` called before `begin()` - `metadata.collection.set()` called after `commit()` +- `metadata.collection.get()` called before `begin()` during startup - `metadata.row.get()` after a staged `row.set()` in the same transaction - `metadata.collection.list(prefix)` after multiple staged collection writes - mixing `write({ metadata })` and `metadata.row.set()` for the same key in the same transaction +- row delete after earlier staged row metadata updates in the same transaction - truncate followed by new staged row metadata in the same transaction - empty transaction commit with only metadata writes @@ -149,8 +181,10 @@ Per-sync reset behavior can be layered on later. - commit row metadata through `metadata.row.set()` - commit collection metadata through `metadata.collection.set()` - verify read-your-own-writes inside a transaction +- verify startup reads outside a transaction succeed - verify last-write-wins for staged row metadata -- verify metadata calls outside a transaction throw +- verify metadata writes outside a transaction throw +- verify row delete removes row metadata - verify truncate clears row metadata but not collection metadata - verify metadata-only transactions commit successfully diff --git a/persisted-sync-metadata-plan/02-sqlite-implementation.md b/persisted-sync-metadata-plan/02-sqlite-implementation.md index 3594d3778..f922210b3 100644 --- a/persisted-sync-metadata-plan/02-sqlite-implementation.md +++ b/persisted-sync-metadata-plan/02-sqlite-implementation.md @@ -66,6 +66,8 @@ This should be reflected in: - normalized sync operation shapes - buffered sync transactions - adapter `applyCommittedTx()` +- replay payload classification so the runtime knows when exact targeted replay + is possible and when it must fall back to reload ### 4. Make metadata transactional in SQLite @@ -87,6 +89,17 @@ sync subscriptions start processing. This is necessary for: - Electric resume-state restoration - future collection-scoped metadata consumers +This should be reflected in the adapter contract explicitly, for example via: + +```ts +loadCollectionMetadata?: ( + collectionId: string, +) => Promise> +``` + +The exact method name is flexible, but startup collection metadata loading must +be a first-class adapter capability. + ### 6. Carry metadata through replay and hydration Metadata must not be lost in: @@ -95,10 +108,18 @@ Metadata must not be lost in: - buffered sync transaction application - internal persisted transaction creation - self/follower replay +- `pullSince`-style gap recovery + +For the first pass, replay behavior should be explicit: -For the first implementation, it is acceptable to use conservative reload -fallback when a metadata-bearing committed tx cannot be replayed exactly through -targeted invalidation. +- hydration must carry row metadata exactly +- local commit must carry row and collection metadata exactly +- if a committed tx contains metadata changes and the targeted replay protocol + cannot represent them exactly, followers should fall back to reload behavior +- if gap recovery encounters metadata-bearing changes it cannot replay exactly, + recovery should also fall back to reload behavior + +This must be documented in the implementation, not left implicit. ## Important design constraints @@ -114,19 +135,20 @@ This is required for: - Electric resume metadata commits - query retention metadata updates -### Backward compatibility - -Existing adapters or persisted databases without metadata should still function -by treating row metadata as `undefined` and collection metadata as empty. - ### Serialization Use the same persisted JSON encoding and decoding path already used for row values, so metadata can safely round-trip supported value types. +### Crash-consistency boundary + +The implementation must keep row writes, row metadata writes, and collection +metadata writes inside the same SQLite transaction boundary. + +If any part of the tx fails, all three categories must roll back together. + ## Edge cases to handle -- hydrating rows when `metadata` column is absent in old data - metadata-only tx commit - delete row with row metadata present - row update with partial row value and metadata merge semantics @@ -134,6 +156,7 @@ values, so metadata can safely round-trip supported value types. - replay of metadata-bearing committed txs to follower tabs - sequence-gap recovery when metadata changed in a missed tx - full reload fallback correctness when targeted metadata replay is unavailable +- startup collection metadata load before subscription processing ## Acceptance criteria @@ -153,8 +176,7 @@ values, so metadata can safely round-trip supported value types. - hydrated rows apply metadata into collection state - follower runtime converges on metadata-bearing txs - seq-gap recovery remains correct when metadata changed -- migration from old schema leaves behavior unchanged for collections with no - metadata +- startup collection metadata loads before any sync subscription attaches ## Exit criteria diff --git a/persisted-sync-metadata-plan/03-query-collection.md b/persisted-sync-metadata-plan/03-query-collection.md index 9cc790d9f..a467cd6ae 100644 --- a/persisted-sync-metadata-plan/03-query-collection.md +++ b/persisted-sync-metadata-plan/03-query-collection.md @@ -53,7 +53,7 @@ Suggested keys: - `queryCollection:gc:` - optionally `queryCollection:query:` for serialized query identity -- optionally `queryCollection:metaVersion` for migration/versioning +- optionally `queryCollection:metaVersion` for query metadata versioning ## Proposed implementation steps @@ -80,6 +80,9 @@ persistedRetention?: { The second shape is more extensible, but either is acceptable. +This should be added to the public query collection option types defined in +`packages/query-db-collection/src/query.ts`. + ### 2. Rebuild ownership from hydrated rows When rows are hydrated from persistence: @@ -125,7 +128,28 @@ At startup: Startup retention cleanup must run under the same mutex or startup critical section as hydration and replay to avoid races with new query subscriptions. -### 7. Revalidation flow for indefinite persisted retention +### 7. Explicit cold-row cleanup strategy for expired TTL placeholders + +Phase 3 must define a concrete cold-row cleanup path for on-demand mode. + +For the initial Level 1 implementation, that path should be one of: + +- adapter-driven full scan of persisted rows with non-null row metadata, or +- denormalized owned row keys stored on the retention entry itself + +The implementation must choose one and document it. Startup cleanup cannot be +left as an abstract promise if expired placeholders may own rows that are not +currently hydrated. + +If the first implementation uses the scan-based path, it should do all of the +following under the same startup mutex: + +1. find rows owned by the expired placeholder +2. remove the placeholder from each row's owner set +3. delete rows whose owner set becomes empty +4. delete the placeholder retention entry + +### 8. Revalidation flow for indefinite persisted retention When a query retained with `mode: 'until-revalidated'` is requested again: @@ -138,15 +162,27 @@ When a query retained with `mode: 'until-revalidated'` is requested again: This is the key behavior required for long offline periods. -### 8. Consider narrowing cleanup diff logic +This revalidation baseline is required for correctness. The implementation must +not continue to diff only against all rows in `collection._state.syncedData`, +because that would preserve the warm-start deletion bug this phase is intended +to fix. -As an implementation improvement, consider moving away from diffing against all -rows in `collection._state.syncedData` and instead diff against: +In on-demand mode, if the previously owned rows are not all hydrated in memory, +the implementation must obtain the baseline from persisted ownership data +directly, either via: + +- row metadata scan / lookup, or +- denormalized owned row keys on the retention entry, or +- a future normalized ownership index + +### 9. Use query-owned baseline for reconciliation + +When reconciling a query after restart or revalidation, diff against: - the rows previously owned by the specific query -That is a more semantically accurate baseline and reduces dependence on unrelated -persisted rows already being present in the collection. +This is not an optional improvement. It is the required reconciliation model for +Phase 3. ## Important design constraints @@ -169,6 +205,21 @@ eviction APIs, but the design should leave room for: - evict all query placeholders for a collection - evict by age or storage-pressure policy +### Runtime TTL expiry needs explicit policy + +Finite persisted retention should not only be handled on restart. + +When a `ttl` placeholder expires while the app remains running, the runtime +should schedule the same cleanup flow that startup cleanup would perform: + +1. locate the rows owned by the placeholder +2. remove the placeholder from those rows +3. delete orphaned rows +4. remove the retention entry + +This runtime TTL cleanup should run under the same mutex used for startup +cleanup and query revalidation. + ### Versioning matters If query identity hashing or serialization changes across app versions, retained @@ -185,6 +236,7 @@ The implementation should leave room for: - query unsubscribes and resubscribes before persisted retention cleanup runs - query retained indefinitely while another query updates shared rows - startup with only a subset of rows hydrated in on-demand mode +- expired `ttl` placeholder owning only cold rows in on-demand mode - placeholder exists but the same query is never requested again - query identity serialization changes across versions - metadata-only ownership updates with unchanged row values @@ -203,10 +255,12 @@ The implementation should leave room for: - warm-start with multiple disjoint queries does not drop unrelated rows - overlapping queries preserve shared row ownership across restart - finite persisted retention expires and cleans up orphaned rows +- finite persisted retention expires while the app remains running - indefinite persisted retention survives restart and long offline gaps - re-requesting an indefinite retained query reconciles deleted rows correctly - in-memory `gcTime` expiry does not remove indefinitely retained persisted rows - on-demand hydration reconstructs ownership for loaded subsets +- on-demand expired-placeholder cleanup handles cold rows correctly - metadata-only ownership updates persist correctly ## Exit criteria diff --git a/persisted-sync-metadata-plan/04-electric-collection.md b/persisted-sync-metadata-plan/04-electric-collection.md index 13346f58a..7586e4a8e 100644 --- a/persisted-sync-metadata-plan/04-electric-collection.md +++ b/persisted-sync-metadata-plan/04-electric-collection.md @@ -94,6 +94,14 @@ equivalent restart path is required: This makes restart behavior explicit rather than relying on stale resume state. +Ordering requirement: + +- write the `kind: 'reset'` marker before starting the refetch/reload path, + using a metadata-only transaction if needed + +That way, if the app crashes during refetch, restart will not attempt to resume +from stale persisted stream state. + ### 5. Carry row metadata through hydration Hydrated rows from SQLite should restore the Electric row metadata that was diff --git a/persisted-sync-metadata-plan/05-test-plan.md b/persisted-sync-metadata-plan/05-test-plan.md index e4e66e93c..9dc49fc0e 100644 --- a/persisted-sync-metadata-plan/05-test-plan.md +++ b/persisted-sync-metadata-plan/05-test-plan.md @@ -32,13 +32,13 @@ offline gaps. - row metadata and collection metadata are isolated but share the same commit boundary - truncate clears row metadata but does not silently clear collection metadata +- startup reads of persisted metadata are allowed outside a transaction ### SQLite invariants - row values and row metadata are committed atomically - collection metadata commits atomically with the same persisted tx - hydrated rows restore both value and metadata -- old persisted databases without metadata remain readable ### Query collection invariants @@ -79,10 +79,11 @@ Cases: - `metadata.row.set()` inside a transaction - `metadata.collection.set()` inside a transaction +- `metadata.collection.get()` outside a transaction during startup - read-your-own-writes for row metadata - read-your-own-writes for collection metadata - metadata-only commit -- metadata calls outside a transaction throw +- metadata writes outside a transaction throw - `write({ metadata })` and `metadata.row.set()` on the same row in one tx - truncate behavior with row metadata present @@ -100,7 +101,6 @@ Cases: - collection metadata persists and loads - metadata-only tx survives restart - row delete removes row metadata -- migration from pre-metadata schema - metadata-bearing tx replay correctness - sequence-gap recovery with metadata changes @@ -118,9 +118,13 @@ Cases: - persisted ownership reconstruction in eager mode - persisted ownership reconstruction in on-demand mode for loaded subsets - finite persisted retention expiry +- finite persisted retention expiry while the app remains running - `persistedGcTime: Infinity` or equivalent indefinite retention - in-memory `gcTime` expiry does not remove indefinitely retained persisted rows - re-requesting an indefinitely retained query reconciles stale/deleted rows +- query reconciliation diffs against the query-owned baseline, not the whole + collection +- expired placeholder cleanup handles cold rows in on-demand mode - query identity version mismatch / incompatible retained metadata fallback ### Electric integration tests @@ -157,6 +161,8 @@ Add: - SQLite schema and hydration tests - adapter atomicity tests - runtime restart tests +- transaction-boundary tests that prove row data, row metadata, and collection + metadata share the same SQLite commit/rollback boundary ### While implementing Phase 3 @@ -166,6 +172,8 @@ Add: - finite retention tests - indefinite retention tests - long-offline warm-start tests +- on-demand cold-row cleanup tests +- runtime TTL expiry tests ### While implementing Phase 4 @@ -185,6 +193,32 @@ Add: - follower runtimes diverge because metadata-bearing txs were not replayed - Electric resumes from a token that was never durably committed +## Crash-consistency testing approach + +Where atomicity is claimed, tests should verify transaction boundaries rather +than merely assume SQLite atomicity. + +Suggested approach: + +- use a driver or adapter double that records transaction boundaries +- force failures after some writes have been staged but before commit completes +- verify row values, row metadata, and collection metadata all roll back + together + +This is especially important for `applyCommittedTx()` and any metadata-only tx +paths. + +## Performance regression checks + +Add lightweight regression coverage for: + +- row hydration with metadata present +- row writes with metadata absent +- row writes with metadata present + +These do not need to be strict benchmarks, but they should catch obvious +accidental regressions caused by metadata serialization or replay changes. + ## Definition of done This plan is complete when: From df97fb62148ea41f03e688420a9d9a6bcd5ec032 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Tue, 17 Mar 2026 14:52:40 +0000 Subject: [PATCH 04/12] feat: implement persisted sync metadata support Add transactional row and collection metadata plumbing across core sync state, SQLite persistence, query collections, and Electric resume state so persisted ownership and resume metadata survive restarts. Made-with: Cursor --- .../src/persisted.ts | 205 +++++++++++++++++- .../src/sqlite-core-adapter.ts | 112 +++++++++- .../tests/persisted.test.ts | 56 +++++ .../tests/sqlite-core-adapter.test.ts | 121 +++++++++++ packages/db/src/collection/state.ts | 48 ++-- packages/db/src/collection/sync.ts | 116 ++++++++++ packages/db/src/types.ts | 18 ++ packages/db/tests/collection.test.ts | 121 +++++++++++ .../electric-db-collection/src/electric.ts | 59 ++++- packages/query-db-collection/src/query.ts | 124 ++++++++++- .../query-db-collection/tests/query.test.ts | 41 +++- 11 files changed, 976 insertions(+), 45 deletions(-) diff --git a/packages/db-sqlite-persisted-collection-core/src/persisted.ts b/packages/db-sqlite-persisted-collection-core/src/persisted.ts index 1691b19cd..9aef35143 100644 --- a/packages/db-sqlite-persisted-collection-core/src/persisted.ts +++ b/packages/db-sqlite-persisted-collection-core/src/persisted.ts @@ -19,6 +19,7 @@ import type { PendingMutation, SyncConfig, SyncConfigRes, + SyncMetadataApi, UpdateMutationFnParams, UtilsRecord, } from '@tanstack/db' @@ -179,10 +180,30 @@ export type PersistedTx< seq: number rowVersion: number mutations: Array< - | { type: `insert`; key: TKey; value: T } - | { type: `update`; key: TKey; value: T } + | { + type: `insert` + key: TKey + value: T + metadata?: unknown + metadataChanged?: boolean + } + | { + type: `update` + key: TKey + value: T + metadata?: unknown + metadataChanged?: boolean + } | { type: `delete`; key: TKey; value: T } > + rowMetadataMutations?: Array< + | { type: `set`; key: TKey; value: unknown } + | { type: `delete`; key: TKey } + > + collectionMetadataMutations?: Array< + | { type: `set`; key: string; value: unknown } + | { type: `delete`; key: string } + > } export interface PersistenceAdapter< @@ -193,11 +214,14 @@ export interface PersistenceAdapter< collectionId: string, options: LoadSubsetOptions, ctx?: { requiredIndexSignatures?: ReadonlyArray }, - ) => Promise> + ) => Promise> applyCommittedTx: ( collectionId: string, tx: PersistedTx, ) => Promise + loadCollectionMetadata?: ( + collectionId: string, + ) => Promise> ensureIndex: ( collectionId: string, signature: string, @@ -366,13 +390,14 @@ type SyncControlFns = { write: | (( message: - | { type: `insert`; value: T } - | { type: `update`; value: T } + | { type: `insert`; value: T; metadata?: Record } + | { type: `update`; value: T; metadata?: Record } | { type: `delete`; key: TKey }, ) => void) | null commit: (() => void) | null truncate: (() => void) | null + metadata: SyncMetadataApi | null } /** @@ -511,6 +536,7 @@ type NormalizedSyncOperation = type: `update` key: TKey value: T + metadata?: Record } | { type: `delete` @@ -520,6 +546,11 @@ type NormalizedSyncOperation = type BufferedSyncTransaction = { operations: Array> + rowMetadataWrites: Map + collectionMetadataWrites: Map< + string, + { type: `set`; value: unknown } | { type: `delete` } + > truncate: boolean internal: boolean } @@ -536,6 +567,7 @@ type SyncWriteNormalization = { | { type: `update` value: T + metadata?: Record } | { type: `delete` @@ -735,6 +767,7 @@ class PersistedCollectionRuntime< write: null, commit: null, truncate: null, + metadata: null, } private started = false private startPromise: Promise | null = null @@ -771,6 +804,7 @@ class PersistedCollectionRuntime< write: null, commit: null, truncate: null, + metadata: null, } } @@ -829,6 +863,8 @@ class PersistedCollectionRuntime< ) } + await this.loadCollectionMetadataIntoCollection() + const indexBootstrapSnapshot = this.collection?.getIndexMetadata() ?? [] this.attachIndexLifecycleListeners() await this.bootstrapPersistedIndexes(indexBootstrapSnapshot) @@ -841,6 +877,32 @@ class PersistedCollectionRuntime< } } + private async loadCollectionMetadataIntoCollection(): Promise { + if ( + !this.persistence.adapter.loadCollectionMetadata || + !this.syncControls.begin || + !this.syncControls.commit || + !this.syncControls.metadata + ) { + return + } + + const collectionMetadata = + await this.persistence.adapter.loadCollectionMetadata(this.collectionId) + + if (collectionMetadata.length === 0) { + return + } + + this.withInternalApply(() => { + this.syncControls.begin?.({ immediate: true }) + collectionMetadata.forEach(({ key, value }) => { + this.syncControls.metadata?.collection.set(key, value) + }) + this.syncControls.commit?.() + }) + } + async loadSubset( options: LoadSubsetOptions, upstreamLoadSubset?: (options: LoadSubsetOptions) => true | Promise, @@ -951,11 +1013,13 @@ class PersistedCollectionRuntime< forwardMessage: { type: `update`, value: message.value, + metadata: message.metadata, }, operation: { type: `update`, key, value: message.value, + metadata: message.metadata, }, } } @@ -1042,7 +1106,7 @@ class PersistedCollectionRuntime< private loadSubsetRowsUnsafe( options: LoadSubsetOptions, - ): Promise> { + ): Promise> { return this.persistence.adapter.loadSubset(this.collectionId, options, { requiredIndexSignatures: this.getRequiredIndexSignatures(), }) @@ -1071,7 +1135,9 @@ class PersistedCollectionRuntime< } } - private applyRowsToCollection(rows: Array<{ key: TKey; value: T }>): void { + private applyRowsToCollection( + rows: Array<{ key: TKey; value: T; metadata?: unknown }>, + ): void { if ( !this.syncControls.begin || !this.syncControls.write || @@ -1087,6 +1153,7 @@ class PersistedCollectionRuntime< this.syncControls.write?.({ type: `update`, value: row.value, + metadata: row.metadata as Record | undefined, }) } @@ -1094,7 +1161,9 @@ class PersistedCollectionRuntime< }) } - private replaceCollectionRows(rows: Array<{ key: TKey; value: T }>): void { + private replaceCollectionRows( + rows: Array<{ key: TKey; value: T; metadata?: unknown }>, + ): void { if ( !this.syncControls.begin || !this.syncControls.write || @@ -1111,6 +1180,7 @@ class PersistedCollectionRuntime< this.syncControls.write?.({ type: `update`, value: row.value, + metadata: row.metadata as Record | undefined, }) } @@ -1156,10 +1226,27 @@ class PersistedCollectionRuntime< this.syncControls.write?.({ type: `update`, value: operation.value, + metadata: operation.metadata, }) } } + for (const [key, metadataWrite] of transaction.rowMetadataWrites) { + if (metadataWrite.type === `delete`) { + this.syncControls.metadata?.row.delete(key) + } else { + this.syncControls.metadata?.row.set(key, metadataWrite.value) + } + } + + for (const [key, metadataWrite] of transaction.collectionMetadataWrites) { + if (metadataWrite.type === `delete`) { + this.syncControls.metadata?.collection.delete(key) + } else { + this.syncControls.metadata?.collection.set(key, metadataWrite.value) + } + } + this.syncControls.commit?.() } @@ -1181,7 +1268,12 @@ class PersistedCollectionRuntime< const streamPosition = this.nextLocalStreamPosition() - if (transaction.truncate || transaction.operations.length === 0) { + if ( + transaction.truncate || + (transaction.operations.length === 0 && + transaction.rowMetadataWrites.size === 0 && + transaction.collectionMetadataWrites.size === 0) + ) { this.publishTxCommittedEvent( this.createTxCommittedPayload({ term: streamPosition.term, @@ -1197,7 +1289,7 @@ class PersistedCollectionRuntime< } const tx = this.createPersistedTxFromOperations( - transaction.operations, + transaction, streamPosition, ) @@ -1208,6 +1300,9 @@ class PersistedCollectionRuntime< seq: tx.seq, txId: tx.txId, latestRowVersion: tx.rowVersion, + hasMetadataChanges: + transaction.rowMetadataWrites.size > 0 || + transaction.collectionMetadataWrites.size > 0, changedRows: transaction.operations .filter((operation) => operation.type === `update`) .map((operation) => ({ key: operation.key, value: operation.value })), @@ -1219,7 +1314,7 @@ class PersistedCollectionRuntime< } private createPersistedTxFromOperations( - operations: Array>, + transaction: BufferedSyncTransaction, streamPosition: { term: number; seq: number; rowVersion: number }, ): PersistedTx { return { @@ -1227,12 +1322,14 @@ class PersistedCollectionRuntime< term: streamPosition.term, seq: streamPosition.seq, rowVersion: streamPosition.rowVersion, - mutations: operations.map((operation) => + mutations: transaction.operations.map((operation) => operation.type === `update` ? { type: `update`, key: operation.key, value: operation.value, + metadata: operation.metadata, + metadataChanged: operation.metadata !== undefined, } : { type: `delete`, @@ -1240,6 +1337,19 @@ class PersistedCollectionRuntime< value: operation.value, }, ), + rowMetadataMutations: Array.from(transaction.rowMetadataWrites.entries()).map( + ([key, metadataWrite]) => + metadataWrite.type === `delete` + ? { type: `delete`, key } + : { type: `set`, key, value: metadataWrite.value }, + ), + collectionMetadataMutations: Array.from( + transaction.collectionMetadataWrites.entries(), + ).map(([key, metadataWrite]) => + metadataWrite.type === `delete` + ? { type: `delete`, key } + : { type: `set`, key, value: metadataWrite.value }, + ), } } @@ -1387,6 +1497,11 @@ class PersistedCollectionRuntime< seq: tx.seq, txId: tx.txId, latestRowVersion: tx.rowVersion, + hasMetadataChanges: + (tx.rowMetadataMutations !== undefined && + tx.rowMetadataMutations.length > 0) || + tx.collectionMetadataMutations !== undefined && + tx.collectionMetadataMutations.length > 0, changedRows: mutations .filter((mutation) => mutation.type !== `delete`) .map((mutation) => ({ @@ -1409,10 +1524,12 @@ class PersistedCollectionRuntime< latestRowVersion: number changedRows: Array<{ key: TKey; value: T }> deletedKeys: Array + hasMetadataChanges?: boolean requiresFullReload?: boolean }): TxCommitted { const requiresFullReload = args.requiresFullReload === true || + args.hasMetadataChanges === true || args.changedRows.length + args.deletedKeys.length > TARGETED_INVALIDATION_KEY_LIMIT @@ -1925,6 +2042,7 @@ function createWrappedSyncConfig< write: params.write as SyncControlFns[`write`], commit: params.commit, truncate: params.truncate, + metadata: params.metadata ?? null, }) runtime.setCollection( params.collection as Collection, @@ -1949,6 +2067,8 @@ function createWrappedSyncConfig< begin: (options?: { immediate?: boolean }) => { const transaction: OpenSyncTransaction = { operations: [], + rowMetadataWrites: new Map(), + collectionMetadataWrites: new Map(), truncate: false, internal: runtime.isApplyingInternally(), queuedBecauseHydrating: @@ -1970,10 +2090,66 @@ function createWrappedSyncConfig< } openTransaction.operations.push(normalization.operation) + if (normalization.operation.type === `delete`) { + openTransaction.rowMetadataWrites.set(normalization.operation.key, { + type: `delete`, + }) + } else if (normalization.operation.metadata !== undefined) { + openTransaction.rowMetadataWrites.set(normalization.operation.key, { + type: `set`, + value: normalization.operation.metadata, + }) + } if (!openTransaction.queuedBecauseHydrating) { params.write(normalization.forwardMessage) } }, + metadata: params.metadata + ? { + row: { + get: (key: TKey) => params.metadata!.row.get(key), + set: (key: TKey, value: unknown) => { + const openTransaction = + transactionStack[transactionStack.length - 1] + openTransaction?.rowMetadataWrites.set(key, { + type: `set`, + value, + }) + params.metadata!.row.set(key, value) + }, + delete: (key: TKey) => { + const openTransaction = + transactionStack[transactionStack.length - 1] + openTransaction?.rowMetadataWrites.set(key, { + type: `delete`, + }) + params.metadata!.row.delete(key) + }, + }, + collection: { + get: (key: string) => params.metadata!.collection.get(key), + set: (key: string, value: unknown) => { + const openTransaction = + transactionStack[transactionStack.length - 1] + openTransaction?.collectionMetadataWrites.set(key, { + type: `set`, + value, + }) + params.metadata!.collection.set(key, value) + }, + delete: (key: string) => { + const openTransaction = + transactionStack[transactionStack.length - 1] + openTransaction?.collectionMetadataWrites.set(key, { + type: `delete`, + }) + params.metadata!.collection.delete(key) + }, + list: (prefix?: string) => + params.metadata!.collection.list(prefix), + }, + } + : undefined, truncate: () => { const openTransaction = transactionStack[transactionStack.length - 1] if (!openTransaction) { @@ -1996,6 +2172,8 @@ function createWrappedSyncConfig< if (openTransaction.queuedBecauseHydrating) { runtime.queueHydrationBufferedTransaction({ operations: openTransaction.operations, + rowMetadataWrites: openTransaction.rowMetadataWrites, + collectionMetadataWrites: openTransaction.collectionMetadataWrites, truncate: openTransaction.truncate, internal: openTransaction.internal, }) @@ -2007,6 +2185,8 @@ function createWrappedSyncConfig< void runtime .persistAndBroadcastExternalSyncTransaction({ operations: openTransaction.operations, + rowMetadataWrites: openTransaction.rowMetadataWrites, + collectionMetadataWrites: openTransaction.collectionMetadataWrites, truncate: openTransaction.truncate, internal: false, }) @@ -2051,6 +2231,7 @@ function createLoopbackSyncConfig< write: params.write as SyncControlFns[`write`], commit: params.commit, truncate: params.truncate, + metadata: params.metadata ?? null, }) runtime.setCollection( params.collection as Collection, diff --git a/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts b/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts index 9f1662de1..5d357d78a 100644 --- a/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts +++ b/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts @@ -37,6 +37,7 @@ type CompiledSqlFragment = { type StoredSqliteRow = { key: string value: string + metadata: string | null row_version: number } @@ -588,6 +589,7 @@ function sanitizeExpressionSqlFragment(fragment: string): string { type InMemoryRow = { key: TKey value: T + metadata?: unknown rowVersion: number } @@ -1035,7 +1037,7 @@ export class SQLiteCorePersistenceAdapter< collectionId: string, options: LoadSubsetOptions, ctx?: { requiredIndexSignatures?: ReadonlyArray }, - ): Promise> { + ): Promise> { const tableMapping = await this.ensureCollectionReady(collectionId) await this.touchRequiredIndexes(collectionId, ctx?.requiredIndexSignatures) @@ -1072,6 +1074,7 @@ export class SQLiteCorePersistenceAdapter< return orderedRows.map((row) => ({ key: row.key, value: row.value, + metadata: row.metadata, })) } @@ -1079,6 +1082,7 @@ export class SQLiteCorePersistenceAdapter< return rows.map((row) => ({ key: row.key, value: row.value, + metadata: row.metadata, })) } @@ -1140,8 +1144,11 @@ export class SQLiteCorePersistenceAdapter< continue } - const existingRows = await transactionDriver.query<{ value: string }>( - `SELECT value + const existingRows = await transactionDriver.query<{ + value: string + metadata: string | null + }>( + `SELECT value, metadata FROM ${collectionTableSql} WHERE key = ? LIMIT 1`, @@ -1150,18 +1157,32 @@ export class SQLiteCorePersistenceAdapter< const existingValue = existingRows[0]?.value ? deserializePersistedRowValue(existingRows[0].value) : undefined + const existingMetadata = + existingRows[0]?.metadata != null + ? deserializePersistedRowValue(existingRows[0].metadata) + : undefined const mergedValue = mutation.type === `update` ? mergeObjectRows(existingValue, mutation.value) : mutation.value + const nextMetadata = + mutation.metadataChanged === true ? mutation.metadata : existingMetadata await transactionDriver.run( - `INSERT INTO ${collectionTableSql} (key, value, row_version) - VALUES (?, ?, ?) + `INSERT INTO ${collectionTableSql} (key, value, metadata, row_version) + VALUES (?, ?, ?, ?) ON CONFLICT(key) DO UPDATE SET value = excluded.value, + metadata = excluded.metadata, row_version = excluded.row_version`, - [encodedKey, serializePersistedRowValue(mergedValue), nextRowVersion], + [ + encodedKey, + serializePersistedRowValue(mergedValue), + nextMetadata === undefined + ? null + : serializePersistedRowValue(nextMetadata), + nextRowVersion, + ], ) await transactionDriver.run( `DELETE FROM ${tombstoneTableSql} @@ -1170,6 +1191,53 @@ export class SQLiteCorePersistenceAdapter< ) } + for (const rowMetadataMutation of tx.rowMetadataMutations ?? []) { + const encodedKey = encodePersistedStorageKey(rowMetadataMutation.key) + if (rowMetadataMutation.type === `delete`) { + await transactionDriver.run( + `UPDATE ${collectionTableSql} + SET metadata = NULL + WHERE key = ?`, + [encodedKey], + ) + } else { + await transactionDriver.run( + `UPDATE ${collectionTableSql} + SET metadata = ? + WHERE key = ?`, + [ + rowMetadataMutation.value === undefined + ? null + : serializePersistedRowValue(rowMetadataMutation.value), + encodedKey, + ], + ) + } + } + + for (const metadataMutation of tx.collectionMetadataMutations ?? []) { + if (metadataMutation.type === `delete`) { + await transactionDriver.run( + `DELETE FROM collection_metadata + WHERE collection_id = ? AND key = ?`, + [collectionId, metadataMutation.key], + ) + } else { + await transactionDriver.run( + `INSERT INTO collection_metadata (collection_id, key, value, updated_at) + VALUES (?, ?, ?, CAST(strftime('%s', 'now') AS INTEGER)) + ON CONFLICT(collection_id, key) DO UPDATE SET + value = excluded.value, + updated_at = excluded.updated_at`, + [ + collectionId, + metadataMutation.key, + serializePersistedRowValue(metadataMutation.value), + ], + ) + } + } + await transactionDriver.run( `INSERT INTO collection_version (collection_id, latest_row_version) VALUES (?, ?) @@ -1207,6 +1275,22 @@ export class SQLiteCorePersistenceAdapter< }) } + async loadCollectionMetadata( + collectionId: string, + ): Promise> { + const rows = await this.driver.query<{ key: string; value: string }>( + `SELECT key, value + FROM collection_metadata + WHERE collection_id = ?`, + [collectionId], + ) + + return rows.map((row) => ({ + key: row.key, + value: deserializePersistedRowValue(row.value), + })) + } + async ensureIndex( collectionId: string, signature: string, @@ -1405,7 +1489,7 @@ export class SQLiteCorePersistenceAdapter< const orderByCompiled = compileOrderByClauses(options.orderBy) const queryParams: Array = [] - let sql = `SELECT key, value, row_version FROM ${collectionTableSql}` + let sql = `SELECT key, value, metadata, row_version FROM ${collectionTableSql}` if (options.where && whereCompiled.supported) { sql = `${sql} WHERE ${whereCompiled.sql}` @@ -1427,6 +1511,10 @@ export class SQLiteCorePersistenceAdapter< return { key, value, + metadata: + row.metadata != null + ? deserializePersistedRowValue(row.metadata) + : undefined, rowVersion: row.row_version, } }) @@ -1646,6 +1734,7 @@ export class SQLiteCorePersistenceAdapter< `CREATE TABLE IF NOT EXISTS ${collectionTableSql} ( key TEXT PRIMARY KEY, value TEXT NOT NULL, + metadata TEXT, row_version INTEGER NOT NULL )`, ) @@ -1800,6 +1889,15 @@ export class SQLiteCorePersistenceAdapter< latest_row_version INTEGER NOT NULL )`, ) + await this.driver.exec( + `CREATE TABLE IF NOT EXISTS collection_metadata ( + collection_id TEXT NOT NULL, + key TEXT NOT NULL, + value TEXT NOT NULL, + updated_at INTEGER NOT NULL, + PRIMARY KEY (collection_id, key) + )`, + ) await this.driver.exec( `CREATE TABLE IF NOT EXISTS leader_term ( collection_id TEXT PRIMARY KEY, diff --git a/packages/db-sqlite-persisted-collection-core/tests/persisted.test.ts b/packages/db-sqlite-persisted-collection-core/tests/persisted.test.ts index 6ac202f13..b463d8cde 100644 --- a/packages/db-sqlite-persisted-collection-core/tests/persisted.test.ts +++ b/packages/db-sqlite-persisted-collection-core/tests/persisted.test.ts @@ -44,7 +44,9 @@ type RecordingAdapter = PersistenceAdapter & { options: LoadSubsetOptions requiredIndexSignatures: ReadonlyArray }> + loadCollectionMetadataCalls: Array rows: Map + collectionMetadata: Map } function createRecordingAdapter( @@ -54,10 +56,12 @@ function createRecordingAdapter( const adapter: RecordingAdapter = { rows, + collectionMetadata: new Map(), applyCommittedTxCalls: [], ensureIndexCalls: [], markIndexRemovedCalls: [], loadSubsetCalls: [], + loadCollectionMetadataCalls: [], loadSubset: (collectionId, options, ctx) => { adapter.loadSubsetCalls.push({ collectionId, @@ -71,6 +75,15 @@ function createRecordingAdapter( })), ) }, + loadCollectionMetadata: (collectionId) => { + adapter.loadCollectionMetadataCalls.push(collectionId) + return Promise.resolve( + Array.from(adapter.collectionMetadata.entries()).map(([key, value]) => ({ + key, + value, + })), + ) + }, applyCommittedTx: (collectionId, tx) => { adapter.applyCommittedTxCalls.push({ collectionId, @@ -92,6 +105,13 @@ function createRecordingAdapter( rows.set(mutation.key, mutation.value) } } + for (const metadataMutation of tx.collectionMetadataMutations ?? []) { + if (metadataMutation.type === `delete`) { + adapter.collectionMetadata.delete(metadataMutation.key) + } else { + adapter.collectionMetadata.set(metadataMutation.key, metadataMutation.value) + } + } return Promise.resolve() }, ensureIndex: (collectionId, signature) => { @@ -257,6 +277,42 @@ describe(`persistedCollectionOptions`, () => { expect(adapter.applyCommittedTxCalls).toHaveLength(1) }) + it(`loads collection metadata into collection state during startup`, async () => { + const adapter = createRecordingAdapter() + adapter.collectionMetadata.set(`electric:resume`, { + kind: `resume`, + offset: `10_0`, + handle: `handle-1`, + shapeId: `shape-1`, + updatedAt: 1, + }) + + const collection = createCollection( + persistedCollectionOptions({ + id: `persisted-startup-metadata`, + getKey: (item) => item.id, + persistence: { + adapter, + }, + }), + ) + + await collection.stateWhenReady() + + expect(adapter.loadCollectionMetadataCalls).toEqual([ + `persisted-startup-metadata`, + ]) + expect(collection._state.syncedCollectionMetadata.get(`electric:resume`)).toEqual( + { + kind: `resume`, + offset: `10_0`, + handle: `handle-1`, + shapeId: `shape-1`, + updatedAt: 1, + }, + ) + }) + it(`throws InvalidSyncConfigError when sync key is present but null`, () => { const invalidOptions = { id: `invalid-sync-null`, diff --git a/packages/db-sqlite-persisted-collection-core/tests/sqlite-core-adapter.test.ts b/packages/db-sqlite-persisted-collection-core/tests/sqlite-core-adapter.test.ts index 7c45e964b..6af6dff2c 100644 --- a/packages/db-sqlite-persisted-collection-core/tests/sqlite-core-adapter.test.ts +++ b/packages/db-sqlite-persisted-collection-core/tests/sqlite-core-adapter.test.ts @@ -423,6 +423,127 @@ export function runSQLiteCoreAdapterContractSuite( expect(txRows[0]?.count).toBe(0) }) + it(`persists row metadata and collection metadata atomically`, async () => { + const { adapter, driver } = registerContractHarness() + const collectionId = `metadata-roundtrip` + + await adapter.applyCommittedTx(collectionId, { + txId: `metadata-1`, + term: 1, + seq: 1, + rowVersion: 1, + mutations: [ + { + type: `insert`, + key: `1`, + value: { + id: `1`, + title: `Tracked`, + createdAt: `2026-01-01T00:00:00.000Z`, + score: 1, + }, + metadata: { + queryCollection: { + owners: { + q1: true, + }, + }, + }, + metadataChanged: true, + }, + ], + collectionMetadataMutations: [ + { + type: `set`, + key: `electric:resume`, + value: { + kind: `resume`, + offset: `10_0`, + handle: `handle-1`, + shapeId: `shape-1`, + updatedAt: 1, + }, + }, + ], + }) + + const rows = await adapter.loadSubset(collectionId, {}) + expect(rows).toEqual([ + { + key: `1`, + value: { + id: `1`, + title: `Tracked`, + createdAt: `2026-01-01T00:00:00.000Z`, + score: 1, + }, + metadata: { + queryCollection: { + owners: { + q1: true, + }, + }, + }, + }, + ]) + + const collectionMetadata = + await adapter.loadCollectionMetadata?.(collectionId) + expect(collectionMetadata).toEqual([ + { + key: `electric:resume`, + value: { + kind: `resume`, + offset: `10_0`, + handle: `handle-1`, + shapeId: `shape-1`, + updatedAt: 1, + }, + }, + ]) + + await expect( + adapter.applyCommittedTx(collectionId, { + txId: `metadata-2`, + term: 1, + seq: 2, + rowVersion: 2, + mutations: [ + { + type: `insert`, + key: `2`, + value: { + id: `2`, + title: `Bad`, + createdAt: `2026-01-01T00:00:00.000Z`, + score: 2, + }, + }, + ], + collectionMetadataMutations: [ + { + type: `set`, + key: `broken`, + value: { + invalid: new Date(Number.NaN), + }, + }, + ], + }), + ).rejects.toThrow() + + const rowsAfterFailure = await adapter.loadSubset(collectionId, {}) + expect(rowsAfterFailure).toEqual(rows) + + const metadataRows = await driver.query<{ key: string }>( + `SELECT key + FROM collection_metadata + WHERE collection_id = ?`, + [collectionId], + ) + expect(metadataRows).toEqual([{ key: `electric:resume` }]) + }) + it(`supports pushdown operators with correctness-preserving fallback`, async () => { const { adapter } = registerContractHarness() const collectionId = `todos` diff --git a/packages/db/src/collection/state.ts b/packages/db/src/collection/state.ts index cc435f0d9..ce54ea403 100644 --- a/packages/db/src/collection/state.ts +++ b/packages/db/src/collection/state.ts @@ -28,6 +28,8 @@ interface PendingSyncedTransaction< operations: Array> truncate?: boolean deletedKeys: Set + rowMetadataWrites: Map + collectionMetadataWrites: Map optimisticSnapshot?: { upserts: Map deletes: Set @@ -40,6 +42,10 @@ interface PendingSyncedTransaction< immediate?: boolean } +type PendingMetadataWrite = + | { type: `set`; value: unknown } + | { type: `delete` } + type InternalChangeMessage< T extends object = Record, TKey extends string | number = string | number, @@ -70,6 +76,7 @@ export class CollectionStateManager< > = [] public syncedData: SortedMap public syncedMetadata = new Map() + public syncedCollectionMetadata = new Map() // Optimistic state tracking - make public for testing public optimisticUpserts = new Map() @@ -870,6 +877,9 @@ export class CollectionStateManager< for (const operation of transaction.operations) { changedKeys.add(operation.key as TKey) } + for (const [key] of transaction.rowMetadataWrites) { + changedKeys.add(key) + } } // Use pre-captured state if available (from optimistic scenarios), @@ -959,26 +969,6 @@ export class CollectionStateManager< const key = operation.key as TKey this.syncedKeys.add(key) - // Update metadata - switch (operation.type) { - case `insert`: - this.syncedMetadata.set(key, operation.metadata) - break - case `update`: - this.syncedMetadata.set( - key, - Object.assign( - {}, - this.syncedMetadata.get(key), - operation.metadata, - ), - ) - break - case `delete`: - this.syncedMetadata.delete(key) - break - } - // Determine origin: 'local' for local-only collections or pending local changes const origin: VirtualOrigin = this.isLocalOnly || @@ -1025,6 +1015,7 @@ export class CollectionStateManager< } case `delete`: this.syncedData.delete(key) + this.syncedMetadata.delete(key) // Clean up origin and pending tracking for deleted rows this.rowOrigins.delete(key) this.pendingLocalChanges.delete(key) @@ -1036,6 +1027,22 @@ export class CollectionStateManager< break } } + + for (const [key, metadataWrite] of transaction.rowMetadataWrites) { + if (metadataWrite.type === `delete`) { + this.syncedMetadata.delete(key) + continue + } + this.syncedMetadata.set(key, metadataWrite.value) + } + + for (const [key, metadataWrite] of transaction.collectionMetadataWrites) { + if (metadataWrite.type === `delete`) { + this.syncedCollectionMetadata.delete(key) + continue + } + this.syncedCollectionMetadata.set(key, metadataWrite.value) + } } // After applying synced operations, if this commit included a truncate, @@ -1365,6 +1372,7 @@ export class CollectionStateManager< public cleanup(): void { this.syncedData.clear() this.syncedMetadata.clear() + this.syncedCollectionMetadata.clear() this.optimisticUpserts.clear() this.optimisticDeletes.clear() this.pendingOptimisticUpserts.clear() diff --git a/packages/db/src/collection/sync.ts b/packages/db/src/collection/sync.ts index 1f50cc889..f3eae8d02 100644 --- a/packages/db/src/collection/sync.ts +++ b/packages/db/src/collection/sync.ts @@ -18,6 +18,7 @@ import type { LoadSubsetOptions, OptimisticChangeMessage, SyncConfigRes, + SyncMetadataApi, } from '../types' import type { CollectionImpl } from './index.js' import type { CollectionStateManager } from './state' @@ -93,6 +94,8 @@ export class CollectionSyncManager< committed: false, operations: [], deletedKeys: new Set(), + rowMetadataWrites: new Map(), + collectionMetadataWrites: new Map(), immediate: options?.immediate, }) }, @@ -169,6 +172,15 @@ export class CollectionSyncManager< if (messageType === `delete`) { pendingTransaction.deletedKeys.add(key) + pendingTransaction.rowMetadataWrites.set(key, { type: `delete` }) + } else if ( + messageType === `insert` || + message.metadata !== undefined + ) { + pendingTransaction.rowMetadataWrites.set(key, { + type: `set`, + value: message.metadata, + }) } }, commit: () => { @@ -205,6 +217,7 @@ export class CollectionSyncManager< // Clear all operations from the current transaction pendingTransaction.operations = [] pendingTransaction.deletedKeys.clear() + pendingTransaction.rowMetadataWrites.clear() // Mark the transaction as a truncate operation. During commit, this triggers: // - Delete events for all previously synced keys (excluding optimistic-deleted keys) @@ -220,6 +233,7 @@ export class CollectionSyncManager< deletes: new Set(this.state.optimisticDeletes), } }, + metadata: this.createSyncMetadataApi(), }), ) @@ -245,6 +259,108 @@ export class CollectionSyncManager< } } + private getActivePendingSyncTransaction() { + const pendingTransaction = + this.state.pendingSyncedTransactions[ + this.state.pendingSyncedTransactions.length - 1 + ] + + if (!pendingTransaction) { + throw new NoPendingSyncTransactionWriteError() + } + if (pendingTransaction.committed) { + throw new SyncTransactionAlreadyCommittedWriteError() + } + + return pendingTransaction + } + + private createSyncMetadataApi(): SyncMetadataApi { + return { + row: { + get: (key) => { + const pendingTransaction = + this.state.pendingSyncedTransactions[ + this.state.pendingSyncedTransactions.length - 1 + ] + const pendingWrite = pendingTransaction?.rowMetadataWrites.get(key) + if (pendingWrite) { + return pendingWrite.type === `delete` + ? undefined + : pendingWrite.value + } + return this.state.syncedMetadata.get(key) + }, + set: (key, metadata) => { + const pendingTransaction = this.getActivePendingSyncTransaction() + pendingTransaction.rowMetadataWrites.set(key, { + type: `set`, + value: metadata, + }) + }, + delete: (key) => { + const pendingTransaction = this.getActivePendingSyncTransaction() + pendingTransaction.rowMetadataWrites.set(key, { + type: `delete`, + }) + }, + }, + collection: { + get: (key) => { + const pendingTransaction = + this.state.pendingSyncedTransactions[ + this.state.pendingSyncedTransactions.length - 1 + ] + const pendingWrite = pendingTransaction?.collectionMetadataWrites.get( + key, + ) + if (pendingWrite) { + return pendingWrite.type === `delete` + ? undefined + : pendingWrite.value + } + return this.state.syncedCollectionMetadata.get(key) + }, + set: (key, value) => { + const pendingTransaction = this.getActivePendingSyncTransaction() + pendingTransaction.collectionMetadataWrites.set(key, { + type: `set`, + value, + }) + }, + delete: (key) => { + const pendingTransaction = this.getActivePendingSyncTransaction() + pendingTransaction.collectionMetadataWrites.set(key, { + type: `delete`, + }) + }, + list: (prefix) => { + const merged = new Map(this.state.syncedCollectionMetadata) + const pendingTransaction = + this.state.pendingSyncedTransactions[ + this.state.pendingSyncedTransactions.length - 1 + ] + if (pendingTransaction) { + for (const [key, pendingWrite] of pendingTransaction.collectionMetadataWrites) { + if (pendingWrite.type === `delete`) { + merged.delete(key) + } else { + merged.set(key, pendingWrite.value) + } + } + } + + return Array.from(merged.entries()) + .filter(([key]) => (prefix ? key.startsWith(prefix) : true)) + .map(([key, value]) => ({ + key, + value, + })) + }, + }, + } + } + /** * Preload the collection data by starting sync if not already started * Multiple concurrent calls will share the same promise diff --git a/packages/db/src/types.ts b/packages/db/src/types.ts index 9d84a1099..d42edbf6f 100644 --- a/packages/db/src/types.ts +++ b/packages/db/src/types.ts @@ -339,6 +339,7 @@ export interface SyncConfig< commit: () => void markReady: () => void truncate: () => void + metadata?: SyncMetadataApi }) => void | CleanupFn | SyncConfigRes /** @@ -357,6 +358,23 @@ export interface SyncConfig< rowUpdateMode?: `partial` | `full` } +export interface SyncMetadataApi { + row: { + get: (key: TKey) => unknown | undefined + set: (key: TKey, metadata: unknown) => void + delete: (key: TKey) => void + } + collection: { + get: (key: string) => unknown | undefined + set: (key: string, value: unknown) => void + delete: (key: string) => void + list: (prefix?: string) => ReadonlyArray<{ + key: string + value: unknown + }> + } +} + export interface ChangeMessage< T extends object = Record, TKey extends string | number = string | number, diff --git a/packages/db/tests/collection.test.ts b/packages/db/tests/collection.test.ts index 17f9a2ed9..f3e7d79ee 100644 --- a/packages/db/tests/collection.test.ts +++ b/packages/db/tests/collection.test.ts @@ -1440,6 +1440,127 @@ describe(`Collection`, () => { expect(collection._state.syncedMetadata.size).toBe(0) }) + it(`should allow startup metadata reads and commit metadata-only sync transactions`, async () => { + let observedCollectionMetadata: unknown + let testSyncFunctions: any = null + + const collection = createCollection<{ id: number; value: string }>({ + id: `sync-metadata-startup-read-test`, + getKey: (item) => item.id, + startSync: true, + sync: { + sync: ({ begin, commit, markReady, metadata }) => { + observedCollectionMetadata = metadata?.collection.get(`startup:key`) + + begin() + metadata?.collection.set(`startup:key`, { ready: true }) + commit() + markReady() + + testSyncFunctions = { begin, commit, metadata } + }, + }, + }) + + await collection.stateWhenReady() + + expect(observedCollectionMetadata).toBeUndefined() + expect(collection._state.syncedCollectionMetadata.get(`startup:key`)).toEqual({ + ready: true, + }) + + const { begin, commit, metadata } = testSyncFunctions + begin() + metadata.collection.set(`runtime:key`, { persisted: true }) + commit() + + expect(collection._state.syncedCollectionMetadata.get(`runtime:key`)).toEqual( + { persisted: true }, + ) + }) + + it(`should use last-write-wins for row metadata in sync transactions`, async () => { + let testSyncFunctions: any = null + + const collection = createCollection<{ id: number; value: string }>({ + id: `sync-row-metadata-last-write-test`, + getKey: (item) => item.id, + startSync: true, + sync: { + sync: ({ begin, write, commit, markReady, metadata }) => { + begin() + write({ + type: `insert`, + value: { id: 1, value: `initial` }, + metadata: { source: `write` }, + }) + metadata?.row.set(1, { source: `explicit-set` }) + commit() + markReady() + + testSyncFunctions = { begin, write, commit, metadata } + }, + }, + }) + + await collection.stateWhenReady() + + expect(collection._state.syncedMetadata.get(1)).toEqual({ + source: `explicit-set`, + }) + + const { begin, write, commit, metadata } = testSyncFunctions + begin() + metadata.row.set(1, { source: `set-first` }) + write({ + type: `update`, + value: { id: 1, value: `updated` }, + metadata: { source: `write-last` }, + }) + commit() + + expect(collection._state.syncedMetadata.get(1)).toEqual({ + source: `write-last`, + }) + }) + + it(`should delete row metadata when sync deletes the row`, async () => { + let testSyncFunctions: any = null + + const collection = createCollection<{ id: number; value: string }>({ + id: `sync-row-metadata-delete-test`, + getKey: (item) => item.id, + startSync: true, + sync: { + sync: ({ begin, write, commit, markReady }) => { + begin() + write({ + type: `insert`, + value: { id: 1, value: `initial` }, + metadata: { source: `sync` }, + }) + commit() + markReady() + + testSyncFunctions = { begin, write, commit } + }, + }, + }) + + await collection.stateWhenReady() + expect(collection._state.syncedMetadata.get(1)).toEqual({ source: `sync` }) + + const { begin, write, commit } = testSyncFunctions + begin() + write({ + type: `delete`, + key: 1, + }) + commit() + + expect(collection._state.syncedMetadata.has(1)).toBe(false) + }) + it(`open sync transaction isn't applied when optimistic mutation is resolved/rejected`, async () => { type Row = { id: number; name: string } diff --git a/packages/electric-db-collection/src/electric.ts b/packages/electric-db-collection/src/electric.ts index 1197e7734..f3fc7ebe0 100644 --- a/packages/electric-db-collection/src/electric.ts +++ b/packages/electric-db-collection/src/electric.ts @@ -47,6 +47,7 @@ import type { ControlMessage, GetExtensions, Message, + Offset, PostgresSnapshot, Row, ShapeStreamOptions, @@ -1181,7 +1182,21 @@ function createElectricSync>( return { sync: (params: Parameters[`sync`]>[0]) => { - const { begin, write, commit, markReady, truncate, collection } = params + const { begin, write, commit, markReady, truncate, collection, metadata } = + params + const persistedResumeState = metadata?.collection.get(`electric:resume`) as + | { + kind: `resume` + offset: string + handle: string + shapeId: string + updatedAt: number + } + | { + kind: `reset` + updatedAt: number + } + | undefined // Wrap markReady to wait for test hook in progressive mode let progressiveReadyGate: Promise | null = null @@ -1239,7 +1254,17 @@ function createElectricSync>( // In on-demand mode, we only need the changes from the point of time the collection was created // so we default to `now` when there is no saved offset. offset: - shapeOptions.offset ?? (syncMode === `on-demand` ? `now` : undefined), + shapeOptions.offset ?? + (persistedResumeState?.kind === `resume` + ? (persistedResumeState.offset as Offset) + : syncMode === `on-demand` + ? `now` + : undefined), + handle: + shapeOptions.handle ?? + (persistedResumeState?.kind === `resume` + ? persistedResumeState.handle + : undefined), signal: abortController.signal, onError: (errorParams) => { // Just immediately mark ready if there's an error to avoid blocking @@ -1280,6 +1305,25 @@ function createElectricSync>( // duplicate key errors when the row's data has changed between requests. const syncedKeys = new Set() + const stageResumeMetadata = () => { + if (!metadata) { + return + } + const shapeHandle = stream.shapeHandle + const lastOffset = stream.lastOffset + if (!shapeHandle || lastOffset === `-1`) { + return + } + + metadata.collection.set(`electric:resume`, { + kind: `resume`, + offset: lastOffset, + handle: shapeHandle, + shapeId: shapeHandle, + updatedAt: Date.now(), + }) + } + /** * Process a change message: handle tags and write the mutation */ @@ -1464,6 +1508,11 @@ function createElectricSync>( transactionStarted = true } + metadata?.collection.set(`electric:resume`, { + kind: `reset`, + updatedAt: Date.now(), + }) + truncate() // Clear tag tracking state @@ -1534,6 +1583,7 @@ function createElectricSync>( } // Commit the atomic swap + stageResumeMetadata() commit() // Exit buffering phase by marking that we've received up-to-date @@ -1547,8 +1597,13 @@ function createElectricSync>( // Normal mode or on-demand: commit transaction if one was started // Both up-to-date and subset-end trigger a commit if (transactionStarted) { + stageResumeMetadata() commit() transactionStarted = false + } else if (commitPoint === `up-to-date` && metadata) { + begin() + stageResumeMetadata() + commit() } } wrappedMarkReady(isBufferingInitialSync()) diff --git a/packages/query-db-collection/src/query.ts b/packages/query-db-collection/src/query.ts index 7bc8f532b..cae8e145f 100644 --- a/packages/query-db-collection/src/query.ts +++ b/packages/query-db-collection/src/query.ts @@ -118,6 +118,7 @@ export interface QueryCollectionConfig< TQueryData, TQueryKey >[`staleTime`] + persistedGcTime?: number /** * Metadata to pass to the query. @@ -547,6 +548,7 @@ export function queryCollectionOptions( retry, retryDelay, staleTime, + persistedGcTime, getKey, onInsert, onUpdate, @@ -645,11 +647,74 @@ export function queryCollectionOptions( } const internalSync: SyncConfig[`sync`] = (params) => { - const { begin, write, commit, markReady, collection } = params + const { begin, write, commit, markReady, collection, metadata } = params // Track whether sync has been started let syncStarted = false + const getRowMetadata = (rowKey: string | number) => { + return (metadata?.row.get(rowKey) ?? + collection._state.syncedMetadata.get(rowKey)) as + | Record + | undefined + } + + const getPersistedOwners = (rowKey: string | number) => { + const rowMetadata = getRowMetadata(rowKey) + const queryMetadata = rowMetadata?.queryCollection + if (!queryMetadata || typeof queryMetadata !== `object`) { + return new Set() + } + + const owners = (queryMetadata as Record).owners + if (!owners || typeof owners !== `object`) { + return new Set() + } + + return new Set(Object.keys(owners as Record)) + } + + const setPersistedOwners = ( + rowKey: string | number, + owners: Set, + ) => { + if (!metadata) { + return + } + + const currentMetadata = { ...(getRowMetadata(rowKey) ?? {}) } + if (owners.size === 0) { + delete currentMetadata.queryCollection + if (Object.keys(currentMetadata).length === 0) { + metadata.row.delete(rowKey) + } else { + metadata.row.set(rowKey, currentMetadata) + } + return + } + + metadata.row.set(rowKey, { + ...currentMetadata, + queryCollection: { + owners: Object.fromEntries( + Array.from(owners.values()).map((owner) => [owner, true]), + ), + }, + }) + } + + const getOwnedRowsForQuery = (hashedQueryKey: string) => { + const ownedRows = new Set() + for (const [rowKey] of collection._state.syncedData.entries()) { + const owners = getPersistedOwners(rowKey) + if (owners.has(hashedQueryKey)) { + ownedRows.add(rowKey) + addRow(rowKey, hashedQueryKey) + } + } + return ownedRows + } + /** * Generate a consistent query key from LoadSubsetOptions. * CRITICAL: Must use identical logic in both createQueryFromOpts and unloadSubset @@ -680,6 +745,12 @@ export function queryCollectionOptions( const hashedQueryKey = hashKey(key) const extendedMeta = { ...meta, loadSubsetOptions: opts } + if (metadata) { + begin() + metadata.collection.delete(`queryCollection:gc:${hashedQueryKey}`) + commit() + } + if (state.observers.has(hashedQueryKey)) { // We already have a query for this queryKey // Increment reference count since another consumer is using this observer @@ -830,6 +901,7 @@ export function queryCollectionOptions( const currentSyncedItems: Map = new Map( collection._state.syncedData.entries(), ) + const previouslyOwnedRows = getOwnedRowsForQuery(hashedQueryKey) const newItemsMap = new Map() newItemsArray.forEach((item) => { const key = getKey(item) @@ -838,9 +910,16 @@ export function queryCollectionOptions( begin() - currentSyncedItems.forEach((oldItem, key) => { + previouslyOwnedRows.forEach((key) => { + const oldItem = currentSyncedItems.get(key) + if (!oldItem) { + return + } const newItem = newItemsMap.get(key) if (!newItem) { + const owners = getPersistedOwners(key) + owners.delete(hashedQueryKey) + setPersistedOwners(key, owners) const needToRemove = removeRow(key, hashedQueryKey) // returns true if the row is no longer referenced by any queries if (needToRemove) { write({ type: `delete`, value: oldItem }) @@ -852,6 +931,11 @@ export function queryCollectionOptions( }) newItemsMap.forEach((newItem, key) => { + const owners = getPersistedOwners(key) + if (!owners.has(hashedQueryKey)) { + owners.add(hashedQueryKey) + setPersistedOwners(key, owners) + } addRow(key, hashedQueryKey) if (!currentSyncedItems.has(key)) { write({ type: `insert`, value: newItem }) @@ -968,6 +1052,11 @@ export function queryCollectionOptions( const rowKeys = queryToRows.get(hashedQueryKey) ?? new Set() const rowsToDelete: Array = [] + const shouldWriteMetadata = metadata !== undefined && rowKeys.size > 0 + + if (shouldWriteMetadata) { + begin() + } rowKeys.forEach((rowKey) => { const queries = rowToQueries.get(rowKey) @@ -977,6 +1066,7 @@ export function queryCollectionOptions( } queries.delete(hashedQueryKey) + setPersistedOwners(rowKey, queries) if (queries.size === 0) { rowToQueries.delete(rowKey) @@ -987,11 +1077,17 @@ export function queryCollectionOptions( } }) - if (rowsToDelete.length > 0) { + if (!shouldWriteMetadata && rowsToDelete.length > 0) { begin() + } + + if (rowsToDelete.length > 0) { rowsToDelete.forEach((row) => { write({ type: `delete`, value: row }) }) + } + + if (shouldWriteMetadata || rowsToDelete.length > 0) { commit() } @@ -1034,6 +1130,28 @@ export function queryCollectionOptions( ) } + if (persistedGcTime !== undefined) { + if (metadata) { + begin() + metadata.collection.set(`queryCollection:gc:${hashedQueryKey}`, { + queryHash: hashedQueryKey, + mode: + persistedGcTime === Number.POSITIVE_INFINITY + ? `until-revalidated` + : `ttl`, + ...(persistedGcTime === Number.POSITIVE_INFINITY + ? {} + : { expiresAt: Date.now() + persistedGcTime }), + }) + commit() + } + unsubscribes.get(hashedQueryKey)?.() + unsubscribes.delete(hashedQueryKey) + state.observers.delete(hashedQueryKey) + queryRefCounts.set(hashedQueryKey, 0) + return + } + cleanupQueryInternal(hashedQueryKey) } diff --git a/packages/query-db-collection/tests/query.test.ts b/packages/query-db-collection/tests/query.test.ts index 9ad0f251f..2be591d7b 100644 --- a/packages/query-db-collection/tests/query.test.ts +++ b/packages/query-db-collection/tests/query.test.ts @@ -1,5 +1,5 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' -import { QueryClient } from '@tanstack/query-core' +import { QueryClient, hashKey } from '@tanstack/query-core' import { createCollection, createLiveQueryCollection, @@ -4275,6 +4275,45 @@ describe(`QueryCollection`, () => { }) }) + it(`should diff against persisted query-owned rows on warm start`, async () => { + const baseQueryKey = [`persisted-baseline-test`] + const queryFn = vi.fn().mockResolvedValue([]) + + const config: QueryCollectionConfig = { + id: `persisted-baseline-test`, + queryClient, + queryKey: baseQueryKey, + queryFn, + getKey: (item) => item.id, + syncMode: `eager`, + startSync: false, + } + + const collection = createCollection(queryCollectionOptions(config)) + const ownedRow = { id: `1`, name: `Owned row`, category: `A` } + const unrelatedRow = { id: `2`, name: `Unrelated row`, category: `B` } + const ownedQueryHash = hashKey(baseQueryKey) + + collection._state.syncedData.set(ownedRow.id, ownedRow) + collection._state.syncedData.set(unrelatedRow.id, unrelatedRow) + collection._state.syncedMetadata.set(ownedRow.id, { + queryCollection: { + owners: { + [ownedQueryHash]: true, + }, + }, + }) + collection._state.syncedMetadata.set(unrelatedRow.id, undefined) + collection._state.size = 2 + + await collection.preload() + await flushPromises() + + expect(queryFn).toHaveBeenCalledTimes(1) + expect(collection.has(ownedRow.id)).toBe(false) + expect(collection.has(unrelatedRow.id)).toBe(true) + }) + it(`should reset refcount after query GC and reload (stale refcount bug)`, async () => { // This test catches Bug 2: stale refcounts after GC/remove // When TanStack Query GCs a query, the refcount should be cleaned up From 95aaff0b544e42235a5d6f4670ef494ec6ff1502 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Tue, 17 Mar 2026 15:06:52 +0000 Subject: [PATCH 05/12] ci: apply automated fixes --- .../src/persisted.ts | 38 ++++++----- .../src/sqlite-core-adapter.ts | 4 +- .../tests/persisted.test.ts | 33 ++++++---- packages/db/src/collection/state.ts | 9 +-- packages/db/src/collection/sync.ts | 10 +-- packages/db/src/types.ts | 4 +- packages/db/tests/collection.test.ts | 10 +-- .../electric-db-collection/src/electric.ts | 15 ++++- .../query-db-collection/tests/query.test.ts | 66 +++++++++---------- persisted-sync-metadata-plan/01-core-api.md | 4 +- 10 files changed, 108 insertions(+), 85 deletions(-) diff --git a/packages/db-sqlite-persisted-collection-core/src/persisted.ts b/packages/db-sqlite-persisted-collection-core/src/persisted.ts index 9aef35143..2cdcbec45 100644 --- a/packages/db-sqlite-persisted-collection-core/src/persisted.ts +++ b/packages/db-sqlite-persisted-collection-core/src/persisted.ts @@ -197,8 +197,7 @@ export type PersistedTx< | { type: `delete`; key: TKey; value: T } > rowMetadataMutations?: Array< - | { type: `set`; key: TKey; value: unknown } - | { type: `delete`; key: TKey } + { type: `set`; key: TKey; value: unknown } | { type: `delete`; key: TKey } > collectionMetadataMutations?: Array< | { type: `set`; key: string; value: unknown } @@ -546,7 +545,10 @@ type NormalizedSyncOperation = type BufferedSyncTransaction = { operations: Array> - rowMetadataWrites: Map + rowMetadataWrites: Map< + TKey, + { type: `set`; value: unknown } | { type: `delete` } + > collectionMetadataWrites: Map< string, { type: `set`; value: unknown } | { type: `delete` } @@ -1013,13 +1015,13 @@ class PersistedCollectionRuntime< forwardMessage: { type: `update`, value: message.value, - metadata: message.metadata, + metadata: message.metadata, }, operation: { type: `update`, key, value: message.value, - metadata: message.metadata, + metadata: message.metadata, }, } } @@ -1288,10 +1290,7 @@ class PersistedCollectionRuntime< return } - const tx = this.createPersistedTxFromOperations( - transaction, - streamPosition, - ) + const tx = this.createPersistedTxFromOperations(transaction, streamPosition) await this.persistence.adapter.applyCommittedTx(this.collectionId, tx) this.publishTxCommittedEvent( @@ -1337,11 +1336,12 @@ class PersistedCollectionRuntime< value: operation.value, }, ), - rowMetadataMutations: Array.from(transaction.rowMetadataWrites.entries()).map( - ([key, metadataWrite]) => - metadataWrite.type === `delete` - ? { type: `delete`, key } - : { type: `set`, key, value: metadataWrite.value }, + rowMetadataMutations: Array.from( + transaction.rowMetadataWrites.entries(), + ).map(([key, metadataWrite]) => + metadataWrite.type === `delete` + ? { type: `delete`, key } + : { type: `set`, key, value: metadataWrite.value }, ), collectionMetadataMutations: Array.from( transaction.collectionMetadataWrites.entries(), @@ -1500,8 +1500,8 @@ class PersistedCollectionRuntime< hasMetadataChanges: (tx.rowMetadataMutations !== undefined && tx.rowMetadataMutations.length > 0) || - tx.collectionMetadataMutations !== undefined && - tx.collectionMetadataMutations.length > 0, + (tx.collectionMetadataMutations !== undefined && + tx.collectionMetadataMutations.length > 0), changedRows: mutations .filter((mutation) => mutation.type !== `delete`) .map((mutation) => ({ @@ -2173,7 +2173,8 @@ function createWrappedSyncConfig< runtime.queueHydrationBufferedTransaction({ operations: openTransaction.operations, rowMetadataWrites: openTransaction.rowMetadataWrites, - collectionMetadataWrites: openTransaction.collectionMetadataWrites, + collectionMetadataWrites: + openTransaction.collectionMetadataWrites, truncate: openTransaction.truncate, internal: openTransaction.internal, }) @@ -2186,7 +2187,8 @@ function createWrappedSyncConfig< .persistAndBroadcastExternalSyncTransaction({ operations: openTransaction.operations, rowMetadataWrites: openTransaction.rowMetadataWrites, - collectionMetadataWrites: openTransaction.collectionMetadataWrites, + collectionMetadataWrites: + openTransaction.collectionMetadataWrites, truncate: openTransaction.truncate, internal: false, }) diff --git a/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts b/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts index 5d357d78a..b482d96f6 100644 --- a/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts +++ b/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts @@ -1166,7 +1166,9 @@ export class SQLiteCorePersistenceAdapter< ? mergeObjectRows(existingValue, mutation.value) : mutation.value const nextMetadata = - mutation.metadataChanged === true ? mutation.metadata : existingMetadata + mutation.metadataChanged === true + ? mutation.metadata + : existingMetadata await transactionDriver.run( `INSERT INTO ${collectionTableSql} (key, value, metadata, row_version) diff --git a/packages/db-sqlite-persisted-collection-core/tests/persisted.test.ts b/packages/db-sqlite-persisted-collection-core/tests/persisted.test.ts index b463d8cde..9ce212fc7 100644 --- a/packages/db-sqlite-persisted-collection-core/tests/persisted.test.ts +++ b/packages/db-sqlite-persisted-collection-core/tests/persisted.test.ts @@ -78,10 +78,12 @@ function createRecordingAdapter( loadCollectionMetadata: (collectionId) => { adapter.loadCollectionMetadataCalls.push(collectionId) return Promise.resolve( - Array.from(adapter.collectionMetadata.entries()).map(([key, value]) => ({ - key, - value, - })), + Array.from(adapter.collectionMetadata.entries()).map( + ([key, value]) => ({ + key, + value, + }), + ), ) }, applyCommittedTx: (collectionId, tx) => { @@ -109,7 +111,10 @@ function createRecordingAdapter( if (metadataMutation.type === `delete`) { adapter.collectionMetadata.delete(metadataMutation.key) } else { - adapter.collectionMetadata.set(metadataMutation.key, metadataMutation.value) + adapter.collectionMetadata.set( + metadataMutation.key, + metadataMutation.value, + ) } } return Promise.resolve() @@ -302,15 +307,15 @@ describe(`persistedCollectionOptions`, () => { expect(adapter.loadCollectionMetadataCalls).toEqual([ `persisted-startup-metadata`, ]) - expect(collection._state.syncedCollectionMetadata.get(`electric:resume`)).toEqual( - { - kind: `resume`, - offset: `10_0`, - handle: `handle-1`, - shapeId: `shape-1`, - updatedAt: 1, - }, - ) + expect( + collection._state.syncedCollectionMetadata.get(`electric:resume`), + ).toEqual({ + kind: `resume`, + offset: `10_0`, + handle: `handle-1`, + shapeId: `shape-1`, + updatedAt: 1, + }) }) it(`throws InvalidSyncConfigError when sync key is present but null`, () => { diff --git a/packages/db/src/collection/state.ts b/packages/db/src/collection/state.ts index ce54ea403..af65cb801 100644 --- a/packages/db/src/collection/state.ts +++ b/packages/db/src/collection/state.ts @@ -42,9 +42,7 @@ interface PendingSyncedTransaction< immediate?: boolean } -type PendingMetadataWrite = - | { type: `set`; value: unknown } - | { type: `delete` } +type PendingMetadataWrite = { type: `set`; value: unknown } | { type: `delete` } type InternalChangeMessage< T extends object = Record, @@ -1036,7 +1034,10 @@ export class CollectionStateManager< this.syncedMetadata.set(key, metadataWrite.value) } - for (const [key, metadataWrite] of transaction.collectionMetadataWrites) { + for (const [ + key, + metadataWrite, + ] of transaction.collectionMetadataWrites) { if (metadataWrite.type === `delete`) { this.syncedCollectionMetadata.delete(key) continue diff --git a/packages/db/src/collection/sync.ts b/packages/db/src/collection/sync.ts index f3eae8d02..7b1fdc61e 100644 --- a/packages/db/src/collection/sync.ts +++ b/packages/db/src/collection/sync.ts @@ -311,9 +311,8 @@ export class CollectionSyncManager< this.state.pendingSyncedTransactions[ this.state.pendingSyncedTransactions.length - 1 ] - const pendingWrite = pendingTransaction?.collectionMetadataWrites.get( - key, - ) + const pendingWrite = + pendingTransaction?.collectionMetadataWrites.get(key) if (pendingWrite) { return pendingWrite.type === `delete` ? undefined @@ -341,7 +340,10 @@ export class CollectionSyncManager< this.state.pendingSyncedTransactions.length - 1 ] if (pendingTransaction) { - for (const [key, pendingWrite] of pendingTransaction.collectionMetadataWrites) { + for (const [ + key, + pendingWrite, + ] of pendingTransaction.collectionMetadataWrites) { if (pendingWrite.type === `delete`) { merged.delete(key) } else { diff --git a/packages/db/src/types.ts b/packages/db/src/types.ts index d42edbf6f..0dbd01780 100644 --- a/packages/db/src/types.ts +++ b/packages/db/src/types.ts @@ -358,7 +358,9 @@ export interface SyncConfig< rowUpdateMode?: `partial` | `full` } -export interface SyncMetadataApi { +export interface SyncMetadataApi< + TKey extends string | number = string | number, +> { row: { get: (key: TKey) => unknown | undefined set: (key: TKey, metadata: unknown) => void diff --git a/packages/db/tests/collection.test.ts b/packages/db/tests/collection.test.ts index f3e7d79ee..9bb5cc63a 100644 --- a/packages/db/tests/collection.test.ts +++ b/packages/db/tests/collection.test.ts @@ -1465,7 +1465,9 @@ describe(`Collection`, () => { await collection.stateWhenReady() expect(observedCollectionMetadata).toBeUndefined() - expect(collection._state.syncedCollectionMetadata.get(`startup:key`)).toEqual({ + expect( + collection._state.syncedCollectionMetadata.get(`startup:key`), + ).toEqual({ ready: true, }) @@ -1474,9 +1476,9 @@ describe(`Collection`, () => { metadata.collection.set(`runtime:key`, { persisted: true }) commit() - expect(collection._state.syncedCollectionMetadata.get(`runtime:key`)).toEqual( - { persisted: true }, - ) + expect( + collection._state.syncedCollectionMetadata.get(`runtime:key`), + ).toEqual({ persisted: true }) }) it(`should use last-write-wins for row metadata in sync transactions`, async () => { diff --git a/packages/electric-db-collection/src/electric.ts b/packages/electric-db-collection/src/electric.ts index f3fc7ebe0..9f6ebff9d 100644 --- a/packages/electric-db-collection/src/electric.ts +++ b/packages/electric-db-collection/src/electric.ts @@ -1182,9 +1182,18 @@ function createElectricSync>( return { sync: (params: Parameters[`sync`]>[0]) => { - const { begin, write, commit, markReady, truncate, collection, metadata } = - params - const persistedResumeState = metadata?.collection.get(`electric:resume`) as + const { + begin, + write, + commit, + markReady, + truncate, + collection, + metadata, + } = params + const persistedResumeState = metadata?.collection.get( + `electric:resume`, + ) as | { kind: `resume` offset: string diff --git a/packages/query-db-collection/tests/query.test.ts b/packages/query-db-collection/tests/query.test.ts index 2be591d7b..4d0116fe2 100644 --- a/packages/query-db-collection/tests/query.test.ts +++ b/packages/query-db-collection/tests/query.test.ts @@ -4275,44 +4275,44 @@ describe(`QueryCollection`, () => { }) }) - it(`should diff against persisted query-owned rows on warm start`, async () => { - const baseQueryKey = [`persisted-baseline-test`] - const queryFn = vi.fn().mockResolvedValue([]) + it(`should diff against persisted query-owned rows on warm start`, async () => { + const baseQueryKey = [`persisted-baseline-test`] + const queryFn = vi.fn().mockResolvedValue([]) - const config: QueryCollectionConfig = { - id: `persisted-baseline-test`, - queryClient, - queryKey: baseQueryKey, - queryFn, - getKey: (item) => item.id, - syncMode: `eager`, - startSync: false, - } + const config: QueryCollectionConfig = { + id: `persisted-baseline-test`, + queryClient, + queryKey: baseQueryKey, + queryFn, + getKey: (item) => item.id, + syncMode: `eager`, + startSync: false, + } - const collection = createCollection(queryCollectionOptions(config)) - const ownedRow = { id: `1`, name: `Owned row`, category: `A` } - const unrelatedRow = { id: `2`, name: `Unrelated row`, category: `B` } - const ownedQueryHash = hashKey(baseQueryKey) - - collection._state.syncedData.set(ownedRow.id, ownedRow) - collection._state.syncedData.set(unrelatedRow.id, unrelatedRow) - collection._state.syncedMetadata.set(ownedRow.id, { - queryCollection: { - owners: { - [ownedQueryHash]: true, + const collection = createCollection(queryCollectionOptions(config)) + const ownedRow = { id: `1`, name: `Owned row`, category: `A` } + const unrelatedRow = { id: `2`, name: `Unrelated row`, category: `B` } + const ownedQueryHash = hashKey(baseQueryKey) + + collection._state.syncedData.set(ownedRow.id, ownedRow) + collection._state.syncedData.set(unrelatedRow.id, unrelatedRow) + collection._state.syncedMetadata.set(ownedRow.id, { + queryCollection: { + owners: { + [ownedQueryHash]: true, + }, }, - }, - }) - collection._state.syncedMetadata.set(unrelatedRow.id, undefined) - collection._state.size = 2 + }) + collection._state.syncedMetadata.set(unrelatedRow.id, undefined) + collection._state.size = 2 - await collection.preload() - await flushPromises() + await collection.preload() + await flushPromises() - expect(queryFn).toHaveBeenCalledTimes(1) - expect(collection.has(ownedRow.id)).toBe(false) - expect(collection.has(unrelatedRow.id)).toBe(true) - }) + expect(queryFn).toHaveBeenCalledTimes(1) + expect(collection.has(ownedRow.id)).toBe(false) + expect(collection.has(unrelatedRow.id)).toBe(true) + }) it(`should reset refcount after query GC and reload (stale refcount bug)`, async () => { // This test catches Bug 2: stale refcounts after GC/remove diff --git a/persisted-sync-metadata-plan/01-core-api.md b/persisted-sync-metadata-plan/01-core-api.md index 83f551232..d25a9b279 100644 --- a/persisted-sync-metadata-plan/01-core-api.md +++ b/persisted-sync-metadata-plan/01-core-api.md @@ -54,9 +54,7 @@ Update the internal pending synced transaction shape so it can stage: Suggested internal shape: ```ts -type PendingMetadataWrite = - | { type: 'set'; value: unknown } - | { type: 'delete' } +type PendingMetadataWrite = { type: 'set'; value: unknown } | { type: 'delete' } type PendingSyncedTransaction = { committed: boolean From 7591ee1d4e90ce795ea504ed1ecbb8f392a12a29 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Tue, 17 Mar 2026 15:39:47 +0000 Subject: [PATCH 06/12] fix: align persisted metadata writes with sync startup Buffer persisted metadata writes within wrapper transactions and dedupe concurrent collection setup so warm starts no longer trip missing sync transaction errors or collection registry races. Made-with: Cursor --- .../src/persisted.ts | 44 ++++-- .../src/sqlite-core-adapter.ts | 22 +++ .../tests/sqlite-core-adapter.test.ts | 36 +++++ .../query-db-collection/tests/query.test.ts | 127 ++++++++++++++++++ 4 files changed, 221 insertions(+), 8 deletions(-) diff --git a/packages/db-sqlite-persisted-collection-core/src/persisted.ts b/packages/db-sqlite-persisted-collection-core/src/persisted.ts index 2cdcbec45..c9b326152 100644 --- a/packages/db-sqlite-persisted-collection-core/src/persisted.ts +++ b/packages/db-sqlite-persisted-collection-core/src/persisted.ts @@ -2111,19 +2111,33 @@ function createWrappedSyncConfig< set: (key: TKey, value: unknown) => { const openTransaction = transactionStack[transactionStack.length - 1] - openTransaction?.rowMetadataWrites.set(key, { + if (!openTransaction) { + throw new InvalidPersistedCollectionConfigError( + `metadata.row.set must be called within an open sync transaction`, + ) + } + openTransaction.rowMetadataWrites.set(key, { type: `set`, value, }) - params.metadata!.row.set(key, value) + if (!openTransaction.queuedBecauseHydrating) { + params.metadata!.row.set(key, value) + } }, delete: (key: TKey) => { const openTransaction = transactionStack[transactionStack.length - 1] - openTransaction?.rowMetadataWrites.set(key, { + if (!openTransaction) { + throw new InvalidPersistedCollectionConfigError( + `metadata.row.delete must be called within an open sync transaction`, + ) + } + openTransaction.rowMetadataWrites.set(key, { type: `delete`, }) - params.metadata!.row.delete(key) + if (!openTransaction.queuedBecauseHydrating) { + params.metadata!.row.delete(key) + } }, }, collection: { @@ -2131,19 +2145,33 @@ function createWrappedSyncConfig< set: (key: string, value: unknown) => { const openTransaction = transactionStack[transactionStack.length - 1] - openTransaction?.collectionMetadataWrites.set(key, { + if (!openTransaction) { + throw new InvalidPersistedCollectionConfigError( + `metadata.collection.set must be called within an open sync transaction`, + ) + } + openTransaction.collectionMetadataWrites.set(key, { type: `set`, value, }) - params.metadata!.collection.set(key, value) + if (!openTransaction.queuedBecauseHydrating) { + params.metadata!.collection.set(key, value) + } }, delete: (key: string) => { const openTransaction = transactionStack[transactionStack.length - 1] - openTransaction?.collectionMetadataWrites.set(key, { + if (!openTransaction) { + throw new InvalidPersistedCollectionConfigError( + `metadata.collection.delete must be called within an open sync transaction`, + ) + } + openTransaction.collectionMetadataWrites.set(key, { type: `delete`, }) - params.metadata!.collection.delete(key) + if (!openTransaction.queuedBecauseHydrating) { + params.metadata!.collection.delete(key) + } }, list: (prefix?: string) => params.metadata!.collection.list(prefix), diff --git a/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts b/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts index b482d96f6..e3faa084e 100644 --- a/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts +++ b/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts @@ -974,6 +974,10 @@ export class SQLiteCorePersistenceAdapter< string, CollectionTableMapping >() + private readonly collectionTableLoads = new Map< + string, + Promise + >() constructor(options: SQLiteCoreAdapterOptions) { const schemaVersion = options.schemaVersion ?? DEFAULT_SCHEMA_VERSION @@ -1685,6 +1689,24 @@ export class SQLiteCorePersistenceAdapter< return cached } + const inFlight = this.collectionTableLoads.get(collectionId) + if (inFlight) { + return inFlight + } + + const loadPromise = this.ensureCollectionReadyInternal(collectionId) + this.collectionTableLoads.set(collectionId, loadPromise) + + try { + return await loadPromise + } finally { + this.collectionTableLoads.delete(collectionId) + } + } + + private async ensureCollectionReadyInternal( + collectionId: string, + ): Promise { const existingRows = await this.driver.query<{ table_name: string tombstone_table_name: string diff --git a/packages/db-sqlite-persisted-collection-core/tests/sqlite-core-adapter.test.ts b/packages/db-sqlite-persisted-collection-core/tests/sqlite-core-adapter.test.ts index 6af6dff2c..b76f6055b 100644 --- a/packages/db-sqlite-persisted-collection-core/tests/sqlite-core-adapter.test.ts +++ b/packages/db-sqlite-persisted-collection-core/tests/sqlite-core-adapter.test.ts @@ -1072,6 +1072,42 @@ export function runSQLiteCoreAdapterContractSuite( expect(loadedRows[0]?.key).toBe(`safe`) }) + it(`deduplicates concurrent ensureCollectionReady calls for the same collection`, async () => { + const { adapter } = registerContractHarness() + const collectionId = `concurrent-startup` + + const [rowsA, rowsB] = await Promise.all([ + adapter.loadSubset(collectionId, {}), + adapter.loadSubset(collectionId, {}), + ]) + + expect(rowsA).toEqual([]) + expect(rowsB).toEqual([]) + + await adapter.applyCommittedTx(collectionId, { + txId: `concurrent-startup-seed`, + term: 1, + seq: 1, + rowVersion: 1, + mutations: [ + { + type: `insert`, + key: `1`, + value: { + id: `1`, + title: `Seeded`, + createdAt: `2026-01-01T00:00:00.000Z`, + score: 1, + }, + }, + ], + }) + + const loadedRows = await adapter.loadSubset(collectionId, {}) + expect(loadedRows).toHaveLength(1) + expect(loadedRows[0]?.key).toBe(`1`) + }) + it(`prunes applied_tx rows by sequence threshold`, async () => { const { adapter, driver } = registerContractHarness({ appliedTxPruneMaxRows: 2, diff --git a/packages/query-db-collection/tests/query.test.ts b/packages/query-db-collection/tests/query.test.ts index 4d0116fe2..8f05131ed 100644 --- a/packages/query-db-collection/tests/query.test.ts +++ b/packages/query-db-collection/tests/query.test.ts @@ -5246,4 +5246,131 @@ describe(`QueryCollection`, () => { customQueryClient.clear() }) }) + + describe(`rows from external sync sources`, () => { + it(`should retain pre-hydrated rows when a disjoint query correctly returns empty results`, async () => { + // Simulates a warm-start scenario where a persistence layer has already + // hydrated "history" rows into the collection. Two disjoint queries + // share the collection: + // - "history": returns the same rows (but with a server delay) + // - "live": correctly returns [] (there are no live items yet) + // + // When the live query resolves first with its correct empty result, + // the history rows should remain in the collection. The live query's + // empty result only means there are no *live* items — it should not + // affect rows from a different query's domain. + + const preHydratedItems: Array = [ + { id: `1`, name: `History 1`, category: `history` }, + { id: `2`, name: `History 2`, category: `history` }, + { id: `3`, name: `History 3`, category: `history` }, + ] + + let resolveHistoryQueryFn!: (value: Array) => void + + const isQueryCategory = (category: string, where: any): boolean => { + return ( + where && + where.type === `func` && + where.name === `eq` && + where.args[0]?.path?.[0] === `category` && + where.args[1]?.value === category + ) + } + + const queryFn = vi.fn().mockImplementation((ctx: any) => { + const where = ctx.meta?.loadSubsetOptions?.where + + if (isQueryCategory(`history`, where)) { + // History query: returns data, but the server is slow + return new Promise>((resolve) => { + resolveHistoryQueryFn = resolve + }) + } + + if (isQueryCategory(`live`, where)) { + // Live query: correctly returns empty — no live items exist yet + return Promise.resolve([]) + } + + return Promise.resolve([]) + }) + + const baseQueryKey = [`warm-start-disjoint-test`] + + const baseOptions = queryCollectionOptions({ + id: `warm-start-disjoint-test`, + queryClient, + queryKey: (opts: any) => { + if (opts.where) { + return [...baseQueryKey, opts.where] + } + return baseQueryKey + }, + queryFn, + getKey: (item: CategorisedItem) => item.id, + syncMode: `on-demand`, + startSync: true, + }) + + const originalSync = baseOptions.sync + const collection = createCollection({ + ...baseOptions, + sync: { + sync: (params: Parameters[0]) => { + // Simulate a persistence layer hydrating rows from a previous + // session on warm start, before the query layer initializes. + params.begin({ immediate: true }) + for (const item of preHydratedItems) { + params.write({ type: `insert`, value: item }) + } + params.commit() + + return originalSync.sync(params) + }, + }, + }) + + // Verify the persistence layer's hydrated rows are present + expect(collection.size).toBe(3) + + // Subscribe two disjoint queries — history (delayed) and live (immediate) + const historyQuery = createLiveQueryCollection({ + query: (q) => + q + .from({ item: collection }) + .where(({ item }) => eq(item.category, `history`)), + }) + + const liveQuery = createLiveQueryCollection({ + query: (q) => + q + .from({ item: collection }) + .where(({ item }) => eq(item.category, `live`)), + }) + + // Trigger both queries. The history queryFn is pending; the live + // queryFn resolves immediately with []. + const historyPreload = historyQuery.preload() + await liveQuery.preload() + await flushPromises() + + // The live query correctly returned [] (no live items exist). + // The pre-hydrated history rows should still be in the collection. + expect(collection.size).toBe(3) + expect(collection.has(`1`)).toBe(true) + expect(collection.has(`2`)).toBe(true) + expect(collection.has(`3`)).toBe(true) + + // Now the history query's server response arrives + resolveHistoryQueryFn(preHydratedItems) + await historyPreload + + // Collection should still have all history items + expect(collection.size).toBe(3) + + await historyQuery.cleanup() + await liveQuery.cleanup() + }) + }) }) From 8e32e884cc662f07316c92c864381f9164638d9d Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Tue, 17 Mar 2026 17:40:38 +0000 Subject: [PATCH 07/12] chore: remove persisted sync metadata docs from branch Drop the RFC and phased implementation plan from the branch while leaving the local working copies in place. Made-with: Cursor --- RFC-persisted-sync-metadata.md | 900 ------------------ persisted-sync-metadata-plan/01-core-api.md | 192 ---- .../02-sqlite-implementation.md | 184 ---- .../03-query-collection.md | 270 ------ .../04-electric-collection.md | 174 ---- persisted-sync-metadata-plan/05-test-plan.md | 229 ----- persisted-sync-metadata-plan/README.md | 51 - 7 files changed, 2000 deletions(-) delete mode 100644 RFC-persisted-sync-metadata.md delete mode 100644 persisted-sync-metadata-plan/01-core-api.md delete mode 100644 persisted-sync-metadata-plan/02-sqlite-implementation.md delete mode 100644 persisted-sync-metadata-plan/03-query-collection.md delete mode 100644 persisted-sync-metadata-plan/04-electric-collection.md delete mode 100644 persisted-sync-metadata-plan/05-test-plan.md delete mode 100644 persisted-sync-metadata-plan/README.md diff --git a/RFC-persisted-sync-metadata.md b/RFC-persisted-sync-metadata.md deleted file mode 100644 index d853aab0e..000000000 --- a/RFC-persisted-sync-metadata.md +++ /dev/null @@ -1,900 +0,0 @@ -# RFC: Transactional Persisted Sync Metadata - -## Status - -Draft - -## Summary - -This RFC proposes a transactional metadata API that sync implementations can -optionally use to persist and restore metadata alongside synced collection data. - -The design supports two metadata scopes: - -- **Row metadata**: metadata attached to a specific synced row -- **Collection metadata**: metadata attached to the collection as a whole - -The API is designed so that metadata changes can be committed atomically with -persisted row changes. This is required for correctness in two cases that are -already visible in the codebase: - -- `query-db-collection` needs persisted ownership and GC state so warm-starts do - not incorrectly delete or leak rows -- `electric-db-collection` needs persisted resume state and related metadata so - it can safely warm-start from persisted data and continue streaming - -This RFC is intentionally ordered around the consumer-facing API first, then the -SQLite implementation, then how query collections use it, and finally how -Electric collections use it. - -## Problem - -Today, persisted SQLite rows and sync-layer runtime metadata live on different -planes: - -- persisted collections store row values durably -- sync implementations keep important state in memory only - -That leads to restart gaps: - -- query collections lose row ownership state and cannot safely decide whether a - row should be deleted when the first query result arrives after restart -- Electric collections do not have a durable, transactional place to store - stream resume state such as offsets or handles - -The central requirement is not merely "persist metadata", but: - -1. collections must be able to **read persisted metadata on startup** -2. collections must be able to **update metadata as part of normal sync work** -3. persisted metadata that affects row existence must be **transactional with - row persistence** - -Non-transactional sidecar metadata is not sufficient for correctness. If row -data commits without matching metadata, or metadata commits without matching row -data, restart behavior can still be wrong. - -## Goals - -- Provide an optional metadata API to sync implementations -- Keep the API generic enough for multiple sync implementations -- Preserve crash consistency by making metadata transactional with row changes -- Support both row-local and collection-level metadata -- Support persisted GC state for query collections -- Support persisted resume state for Electric collections - -## Non-Goals - -- Define every possible metadata schema for all sync implementations -- Require metadata support for non-persisted collections -- Force all persistence adapters to implement advanced GC optimizations on day - one - -## Proposed API - -### Design principles - -The API exposed to a collection's sync implementation should be: - -- **optional**: absent for non-persisted collections -- **transaction-scoped**: metadata mutations participate in the current sync - transaction -- **scope-aware**: row metadata and collection metadata are separate -- **readable at startup**: sync implementations can restore state before or - during hydration - -### Sync API additions - -The `sync.sync()` params gain an optional `metadata` capability: - -```ts -type SyncMetadataApi = { - row: { - get: (key: TKey) => unknown | undefined - set: (key: TKey, metadata: unknown) => void - delete: (key: TKey) => void - } - collection: { - get: (key: string) => unknown | undefined - set: (key: string, value: unknown) => void - delete: (key: string) => void - list: (prefix?: string) => ReadonlyArray<{ - key: string - value: unknown - }> - } -} - -type SyncParams = { - collection: Collection - begin: (options?: { immediate?: boolean }) => void - write: (message: ChangeMessageOrDeleteKeyMessage) => void - commit: () => void - markReady: () => void - truncate: () => void - metadata?: SyncMetadataApi -} -``` - -### Semantics - -`metadata` is only available when the collection is backed by a persistence -layer that supports it. - -`metadata.row.*` operates on the durable metadata associated with synced rows in -the current collection. - -`metadata.collection.*` operates on durable collection-scoped metadata entries. -These entries are not attached to a single row, but they still participate in -the current sync transaction. - -### Transaction model - -Metadata operations are only valid while a sync transaction is open, that is, -between `begin()` and `commit()`. - -This RFC explicitly requires support for four kinds of committed sync -transactions: - -- row mutations only -- row mutations plus metadata mutations -- collection metadata mutations only -- row metadata mutations only - -If `metadata.row.set`, `metadata.row.delete`, `metadata.collection.set`, or -`metadata.collection.delete` is called outside an open transaction, the -implementation should throw, just as `write()` does today when called without a -pending sync transaction. - -### Read-your-own-writes - -Reads performed through `metadata.row.get`, `metadata.collection.get`, and -`metadata.collection.list` inside an open transaction must reflect any staged -writes from that same transaction. - -This is required so sync implementations can safely merge metadata within a -transaction without having to mirror staged state themselves. - -The write semantics are: - -- `row.set` updates the metadata that will be committed for that row -- `row.delete` removes persisted row metadata for that row -- `collection.set` stages a collection metadata update in the current sync - transaction -- `collection.delete` stages a collection metadata delete in the current sync - transaction - -The read semantics are: - -- `row.get` returns the currently hydrated metadata for a row, if known -- `collection.get` and `collection.list` return the persisted collection - metadata that was loaded during startup or hydration - -### Relationship to `write({ metadata })` - -The existing `write({ type, value, metadata })` path and `metadata.row.*` must -target the same underlying row metadata store. - -They serve different purposes: - -- `write({ ..., metadata })` attaches metadata to a row mutation -- `metadata.row.set()` and `metadata.row.delete()` allow explicit metadata-only - row changes when the row value itself did not change - -Within a single transaction, implementations should treat these as staged -updates to the same row metadata slot. If both are used for the same row in the -same transaction, the effective metadata should follow transaction order -semantics, with later staged changes winning. - -### Why this shape - -This API is deliberately **not** an async sidecar KV API like -`load/store/delete`. A free-floating async store suggests independent writes at -arbitrary times. That is exactly what we want to avoid for correctness-sensitive -state. - -Instead, the API is modeled as an extension of the existing sync transaction -surface: - -- read previously persisted metadata -- stage metadata changes -- commit metadata together with row changes - -### Serialization - -Persisted metadata values are JSON-serialized using the same persisted JSON -encoding rules used elsewhere in the SQLite adapter. Metadata should therefore -be kept JSON-compatible and reasonably small. - -## SQLite Persistence Implementation - -### Overview - -The SQLite persisted collection layer implements the metadata API using two -durable stores: - -1. **row metadata** stored with persisted rows -2. **collection metadata** stored in a separate table - -Both participate in the same SQLite transaction used to apply a committed sync -transaction. - -### Schema changes - -#### Persisted rows - -Add a `metadata` column to the collection table: - -```sql -CREATE TABLE IF NOT EXISTS ( - key TEXT PRIMARY KEY NOT NULL, - value TEXT NOT NULL, - metadata TEXT, - row_version INTEGER NOT NULL -) -``` - -The tombstone table may also optionally carry the last row metadata if useful -for debugging or future recovery, but that is not required for the core design. - -#### Collection metadata - -Add a collection-level metadata table: - -```sql -CREATE TABLE IF NOT EXISTS collection_metadata ( - collection_id TEXT NOT NULL, - key TEXT NOT NULL, - value TEXT NOT NULL, - updated_at INTEGER NOT NULL, - PRIMARY KEY (collection_id, key) -) -``` - -This table stores collection-scoped metadata such as: - -- Electric resume state -- query collection placeholder GC state -- future sync-implementation-specific metadata - -### Adapter contract - -The SQLite adapter extends its persistence internals so a single committed sync -transaction can include: - -- row mutations -- row metadata mutations -- collection metadata mutations - -This requires the persisted runtime to stage metadata on the pending sync -transaction itself, not in a side buffer detached from `begin()` / `commit()`. - -One possible shape is: - -```ts -type PersistedRowMutation = - | { type: 'insert'; key: TKey; value: T; metadata?: unknown } - | { type: 'update'; key: TKey; value: T; metadata?: unknown } - | { type: 'delete'; key: TKey; value: T } - -type PersistedCollectionMetadataMutation = - | { type: 'set'; key: string; value: unknown } - | { type: 'delete'; key: string } - -type PersistedTx = { - txId: string - term: number - seq: number - rowVersion: number - mutations: Array> - collectionMetadataMutations?: Array -} -``` - -This preserves a crucial invariant: - -> if a sync transaction commits, both the row data and the metadata that explains -> that row data commit together - -### PersistenceAdapter changes - -This RFC implies an explicit adapter contract change: - -- persisted row hydration must be able to return row metadata -- persisted transaction application must be able to apply collection metadata - mutations as part of the same commit - -One possible updated hydration shape is: - -```ts -type PersistedLoadedRow = { - key: TKey - value: T - metadata?: unknown -} -``` - -Existing adapters that do not yet provide metadata can remain compatible by -returning rows with `metadata: undefined`. - -### Startup and hydration - -The persisted runtime loads: - -- row values and row metadata during normal subset hydration -- collection metadata during runtime startup - -This means metadata restoration does **not** require a separate full database -scan beyond what the collection was already going to hydrate. - -In eager mode, the initial hydrated subset carries its row metadata with it. - -In on-demand mode, metadata is restored lazily for whichever subsets are loaded. - -Collection metadata should be loaded before new sync subscriptions begin -processing, so startup GC or resume-state decisions can run against a stable -baseline. - -## Query Collection Usage - -### Problem to solve - -`query-db-collection` keeps ownership state in memory: - -- `queryToRows` -- `rowToQueries` -- `queryRefCounts` - -After restart, persisted rows are restored into the base collection, but query -ownership is lost. The first query result can then incorrectly delete rows that -were hydrated from persistence but not yet claimed in memory. - -### What the query collection should persist - -The query collection should persist two categories of state: - -1. **per-row ownership metadata** -2. **per-query GC state** - -### Row metadata shape - -Ownership should be stored in row metadata, not in a global sidecar blob: - -```ts -type QueryRowMetadata = { - queryCollection?: { - owners: Record - } -} -``` - -Where the `owners` keys are hashed query identities. - -This makes persisted ownership: - -- local to the row it explains -- transactional with the row write -- reconstructible during ordinary row hydration - -This also means ownership updates can happen without inventing synthetic row -value updates. A query may stop owning a row while another query still owns it; -that is a metadata-only row change. - -### Reconstructing in-memory state - -When rows are hydrated from persistence, the query collection can rebuild: - -- `rowToQueries` from each row's persisted `owners` -- `queryToRows` by reversing that mapping - -This reconstruction is incremental. It happens for the rows being hydrated, not -by requiring a separate full read of all persisted rows. - -In on-demand mode, that means the in-memory ownership graph is only complete for -the hydrated subsets. This is sufficient for warm-start correctness of loaded -data, but not by itself sufficient for storage-level GC over entirely cold rows. - -### Query refcounts - -`queryRefCounts` should remain in-memory only. - -They represent live subscriber/process state, not durable row ownership. After -restart, refcounts should begin at zero and grow as real subscriptions attach. - -### Query lifecycle controls - -Query collections now need three distinct lifecycle controls: - -- `staleTime`: freshness of query data when re-requested -- `gcTime`: in-memory observer and TanStack Query cache retention -- `persistedGcTime`: durable placeholder and persisted-row retention - -These controls solve different problems and must remain independent. - -`staleTime` answers: - -- should this query be considered stale when requested again? - -`gcTime` answers: - -- how long should the in-memory query observer and query cache survive after the - query becomes inactive? - -`persistedGcTime` answers: - -- how long should persisted ownership placeholders and persisted rows survive - after the query becomes inactive? - -This separation is required for offline-first users who want persisted query -results to survive long periods offline even after in-memory query GC has -occurred. - -### Persisted query retention state - -Warm-start correctness also requires persisted query retention state for query -placeholders that still own rows but currently have no active subscribers. - -That state is collection-level metadata and should support both finite TTL-based -retention and indefinite retention until the query is revalidated. - -```ts -type PersistedQueryRetentionEntry = - | { - queryHash: string - mode: 'ttl' - expiresAt: number - } - | { - queryHash: string - mode: 'until-revalidated' - } -``` - -Suggested keys: - -- `queryCollection:gc:` - -The value should contain at least: - -- either `expiresAt` for finite TTL retention -- or `mode: 'until-revalidated'` for indefinite persisted retention -- optionally debug fields like `lastActiveAt` - -The `until-revalidated` mode is intended for products that want persisted query -results to remain available indefinitely while offline and only be reconciled -once the same query is requested again. - -### Query identity - -The GC entry must be tied to the same canonical identity used for row ownership. - -If the query collection needs more than the hash for debugging or future -matching, it may also persist: - -- `queryCollection:query:` -> serialized query identity - -This is collection-scoped metadata, not row metadata. - -### GC behavior - -When a query becomes idle and would normally begin its GC countdown: - -1. keep row ownership on the rows -2. persist `queryCollection:gc:` with either: - - `mode: 'ttl'` and `expiresAt`, or - - `mode: 'until-revalidated'` - -On restart: - -1. load collection metadata entries matching `queryCollection:gc:` -2. for any query placeholder with `mode: 'ttl'` and expired `expiresAt`, run - persisted cleanup -3. skip startup GC for placeholders with `mode: 'until-revalidated'` -4. remove the placeholder's ownership from rows when cleanup runs -5. delete rows that no longer have owners -6. delete the GC metadata entry when cleanup completes - -Restart GC must run before new query subscriptions are allowed to attach for the -same collection, or under the same startup mutex that serializes hydration and -replay work. This avoids races where a placeholder is cleaned up while a real -query is simultaneously reattaching. - -When a query with `mode: 'until-revalidated'` is requested again: - -1. match the placeholder using the same canonical query identity -2. reconstruct the query's persisted ownership baseline -3. run the query and diff the result against the persisted owned rows -4. remove rows that are no longer owned after revalidation -5. clear or refresh the retention entry based on the newly active query state - -This gives the desired offline behavior: - -- persisted rows remain available indefinitely -- they are not deleted just because in-memory `gcTime` elapsed -- they are eventually reconciled when the query is re-requested - -### Persisted GC implementation strategies - -There are two viable implementation levels: - -#### Level 1: simple row-metadata rewrite - -Use row metadata as the source of truth and perform cleanup by: - -- loading affected rows -- removing the owner from row metadata -- deleting rows whose owner set becomes empty - -This is simpler and consistent with the row-metadata design, but it is less -efficient for large collections. - -Level 1 also has an important limitation: if the adapter cannot efficiently -enumerate rows owned by a query, cleanup may degrade into a full collection scan -and row-metadata JSON rewrite. That is acceptable as an initial correctness -implementation, but it should be treated as a potentially expensive path. - -This cost matters even more when persisted retention is long-lived, because more -query placeholders and retained rows may accumulate over time. - -#### Level 2: normalized ownership index - -Add an adapter-level ownership table: - -```sql -CREATE TABLE query_row_ownership ( - collection_id TEXT NOT NULL, - row_key TEXT NOT NULL, - query_hash TEXT NOT NULL, - PRIMARY KEY (collection_id, row_key, query_hash) -) -``` - -This allows persisted GC to run efficiently in SQLite without scanning or -rewriting every row blob. The row metadata can remain the logical API surface, -while the adapter maintains the normalized index as an optimization. - -This RFC does not require Level 2 for the initial API, but it leaves room for -it because query GC on persisted data is a first-class requirement. - -Another acceptable future variation is to denormalize owned row keys into the GC -entry itself. This RFC does not require that initially, but it is compatible -with the collection metadata model. - -### Query API surface - -The query collection should expose persisted retention separately from -`staleTime` and `gcTime`. - -One possible shape is: - -```ts -queryCollectionOptions({ - queryKey: ['messages', spaceId, pageId], - queryFn, - staleTime: 0, - gcTime: 5 * 60_000, - persistedGcTime: Infinity, -}) -``` - -An alternative shape that leaves more room for future extension is: - -```ts -queryCollectionOptions({ - queryKey: ['messages', spaceId, pageId], - queryFn, - staleTime: 0, - gcTime: 5 * 60_000, - persistedRetention: { - gcTime: Infinity, - }, -}) -``` - -This RFC does not require the final option name, but it does require persisted -retention to be distinct from the existing in-memory `gcTime`. - -## Electric Collection Usage - -### Problem to solve - -Electric has a different persistence problem from query ownership. - -It needs durable collection-level resume state so that after restart it can: - -- warm-start from persisted rows -- safely resume streaming from the correct point - -Today, Electric can hydrate row data from persistence, but it does not have a -dedicated transactional metadata path for persisted resume state. - -### What Electric should persist - -Electric should use both metadata scopes: - -#### Collection metadata - -Use collection metadata for stream resume state, for example: - -```ts -type ElectricResumeMetadata = - | { - kind: 'resume' - offset: string - handle: string - shapeId: string - updatedAt: number - } - | { - kind: 'reset' - updatedAt: number - } -``` - -Suggested key: - -- `electric:resume` - -This metadata must be committed transactionally with the row changes that were -applied from the same Electric stream batch. - -That gives the required safety property: - -- if the row batch commits, the resume state commits -- if the row batch does not commit, the resume state does not advance either - -#### Row metadata - -Electric already attaches sync metadata to rows from stream headers. That row -metadata should flow through the same row metadata API so it can survive restart -where useful. - -This includes information like: - -- relation identity -- other per-row sync headers that are useful after hydration - -### Resume semantics - -On startup, Electric should: - -1. read `electric:resume` from collection metadata -2. prefer that persisted resume state over a default `now` fallback -3. hydrate persisted rows -4. continue streaming from the persisted resume point - -### Interaction with derived in-memory state - -Electric also maintains in-memory derived state such as: - -- tag tracking for move-out handling -- synced key tracking -- snapshot and txid matching helpers - -This RFC does not require every derived Electric structure to become durable in -the first iteration. But it does define the metadata API needed to do so where -necessary. - -The practical rule is: - -- if a piece of Electric state affects whether rows should exist after restart, - it should eventually become durable, either as row metadata or collection - metadata -- if that state cannot yet be reconstructed safely, Electric should fall back to - a conservative reload path rather than assuming warm-started data is exact - -## API Usage Examples - -### Query collection example - -```ts -sync: ({ begin, write, commit, metadata }) => { - const setRowOwners = ( - rowKey: string | number, - owners: Record, - ) => { - const current = (metadata?.row.get(rowKey) ?? {}) as Record - metadata?.row.set(rowKey, { - ...current, - queryCollection: { - owners, - }, - }) - } - - begin() - // Normal sync logic... - commit() -} -``` - -### Electric example - -```ts -sync: ({ begin, write, commit, metadata }) => { - const resumeState = metadata?.collection.get('electric:resume') as - | { - kind: 'resume' - offset: string - handle: string - shapeId: string - updatedAt: number - } - | { - kind: 'reset' - updatedAt: number - } - | undefined - - // use resumeState to configure the stream - - // later, when committing a batch: - begin() - write({ type: 'update', value: row, metadata: rowHeaders }) - metadata?.collection.set('electric:resume', { - kind: 'resume', - offset: nextOffset, - handle: nextHandle, - shapeId: nextShapeId, - updatedAt: Date.now(), - }) - commit() -} -``` - -## Design Decisions - -### Why row metadata and collection metadata both exist - -They solve different problems: - -- row metadata explains why a specific row exists and what sync state belongs to - it -- collection metadata tracks collection-wide runtime state such as resume points - and query placeholder GC entries - -Trying to store everything in one global metadata blob would force unnecessary -bootstrap work and make transactional coupling harder. - -### Why metadata is part of the sync transaction model - -The metadata API is not just a convenience wrapper. It is part of the sync -transaction model. - -That means implementations must stage row operations, row metadata mutations, -and collection metadata mutations on the same pending sync transaction and apply -them together during commit. - -### Why query GC state is collection metadata - -GC timers are properties of query placeholders, not of individual rows. They -must persist across restart, but they are not naturally attached to a specific -row. - -The ownership edges themselves belong with rows, but the expiration state belongs -with the query placeholder. - -This also allows persisted retention to express policies that are not ordinary -timers, such as `until-revalidated`. - -### Why refcounts are not persisted - -Live refcounts describe current subscribers and current process state. That -state is not durable and should not survive restart. Durable ownership and -placeholder GC state are enough to reconstruct the correct baseline. - -### Why persisted retention is separate from `gcTime` - -Products may want in-memory query state to be short-lived while persisted data -remains durable for much longer, including indefinitely until the query is -requested again. - -Keeping `persistedGcTime` separate allows: - -- normal in-memory memory pressure behavior -- long-lived offline warm starts -- explicit control over how durable query placeholders are retained - -### Metadata replay and recovery - -Cross-tab replay, targeted invalidation, and `pullSince` recovery currently -transport row keys and values, but not metadata deltas. - -The first implementation should preserve correctness before optimizing for -efficiency: - -- if a committed tx includes metadata changes that cannot be replayed exactly, - persisted runtimes may conservatively fall back to reload behavior -- targeted metadata replay can be added later as a follow-up optimization - -This allows metadata support to ship without requiring a fully optimized replay -protocol on day one. - -### Namespacing convention - -Sync implementations that write collection metadata must namespace their keys. - -The convention is: - -- `:` - -Examples: - -- `queryCollection:gc:` -- `queryCollection:query:` -- `electric:resume` - -This RFC does not require a registry mechanism initially, but namespaced keys -are mandatory to avoid collisions. - -## Rollout Plan - -### Phase 1 - -- add optional metadata API to sync params -- stage metadata writes on pending sync transactions -- support metadata-only committed sync transactions -- add SQLite support for row metadata and collection metadata -- hydrate row metadata alongside persisted rows - -### Phase 2 - -- use row metadata in query collections for durable ownership -- persist query placeholder retention state in collection metadata -- implement restart-safe GC behavior -- use conservative reload fallback for metadata-bearing replay/recovery paths -- support separate persisted retention policy for query collections - -### Phase 3 - -- use collection metadata in Electric for persisted resume state -- evaluate which additional Electric-derived state must become durable for exact - restart behavior - -## Open Questions - -1. Should the initial SQLite implementation store query ownership only inside row - metadata blobs, or also maintain a normalized ownership index from the start? - -2. Should collection metadata be exposed to sync implementations only at startup - and during transactions, or also via a read-only utility surface outside - `sync.sync()`? - -3. Should persisted query GC cleanup run only on startup and local unload paths, - or also as part of a background maintenance task in persisted runtimes? - -4. Should Electric persist only a resume offset, or also a stronger stream - identity payload including shape/handle information to detect incompatible - resume state? - -## Testing Invariants - -Any implementation of this RFC should add tests for at least these invariants: - -- metadata commits iff the corresponding sync transaction commits -- row hydration restores row metadata together with row values -- query collection warm-start does not delete persisted rows before ownership is - reconstructed -- persisted query GC deletes rows only when ownership is truly orphaned -- metadata-only sync transactions persist correctly -- truncate clears row metadata and any collection metadata that is defined as - reset-scoped -- Electric resume metadata advances only when the corresponding batch commits -- metadata-bearing replay and recovery paths remain correct, even when they fall - back to reload behavior - -## Recommendation - -Adopt a transactional metadata API with two scopes: - -- row metadata for per-row durable sync state -- collection metadata for durable collection-wide state - -Implement both in the SQLite persisted collection layer, then migrate: - -- `query-db-collection` to durable row ownership plus collection-level GC state -- `electric-db-collection` to transactional persisted resume metadata - -This keeps the API generic while preserving the key correctness property: - -> metadata that affects persisted row behavior commits together with the row -> state it explains diff --git a/persisted-sync-metadata-plan/01-core-api.md b/persisted-sync-metadata-plan/01-core-api.md deleted file mode 100644 index d25a9b279..000000000 --- a/persisted-sync-metadata-plan/01-core-api.md +++ /dev/null @@ -1,192 +0,0 @@ -# Phase 1: Core API - -## Objective - -Add a transactional sync metadata API to `@tanstack/db` that supports: - -- row metadata -- collection metadata -- metadata-only committed sync transactions -- read-your-own-writes semantics inside a sync transaction - -This phase should not require query collection or Electric changes to ship. It -is the core primitive they will later consume. - -## Primary code areas - -- `packages/db/src/types.ts` -- `packages/db/src/collection/sync.ts` -- `packages/db/src/collection/state.ts` -- `packages/db/tests/collection.test.ts` -- any new core tests needed for metadata transaction behavior - -## Proposed implementation steps - -### 1. Extend sync types - -Update the sync params type to include: - -- `metadata.row.get` -- `metadata.row.set` -- `metadata.row.delete` -- `metadata.collection.get` -- `metadata.collection.set` -- `metadata.collection.delete` -- `metadata.collection.list` - -Key requirements: - -- metadata API is optional -- metadata writes outside an active sync transaction throw -- startup reads through `metadata.row.get`, `metadata.collection.get`, and - `metadata.collection.list` are allowed outside a transaction -- reads inside an active transaction must reflect staged metadata writes - -### 2. Extend pending sync transaction state - -Update the internal pending synced transaction shape so it can stage: - -- row operations -- row metadata writes -- collection metadata writes -- truncate/reset state - -Suggested internal shape: - -```ts -type PendingMetadataWrite = { type: 'set'; value: unknown } | { type: 'delete' } - -type PendingSyncedTransaction = { - committed: boolean - operations: Array> - deletedKeys: Set - rowMetadataWrites: Map - collectionMetadataWrites: Map - truncate?: boolean - immediate?: boolean -} -``` - -Exact naming is flexible, but the staged metadata writes must be co-located with -the existing pending sync transaction. - -### 3. Add in-memory collection metadata state - -Add a new in-memory store in `CollectionStateManager` for collection-scoped -synced metadata. - -Suggested field: - -```ts -public syncedCollectionMetadata = new Map() -``` - -This should behave like `syncedMetadata`, but keyed by metadata key rather than -row key. - -Note: this naming sits next to the existing row-scoped `syncedMetadata`. If the -implementation keeps both names, it should add clear comments distinguishing row -metadata from collection metadata. Renaming the existing row-scoped field to -something more explicit can be considered as a follow-up cleanup. - -### 4. Define overwrite semantics - -Document and implement these rules: - -- `write({ metadata })` and `metadata.row.set()` target the same underlying row - metadata state -- later staged writes win within a transaction -- every staged row metadata write is a replace at the transaction layer -- `delete` removes row metadata -- `metadata.row.set()` replaces the full row metadata blob -- `metadata.row.delete()` removes row metadata -- `metadata.collection.set()` replaces the full collection metadata value for - that key -- `metadata.collection.delete()` removes the value - -If callers need merge behavior, they should: - -1. read the current metadata value -2. compute the merged result -3. stage the merged result explicitly - -This avoids contradictory rules when `write({ metadata })` and -`metadata.row.set()` are both used for the same row in one transaction. - -### 5. Support metadata-only transactions - -Ensure `commitPendingTransactions()` can commit a transaction with: - -- zero row operations and non-zero metadata changes -- row metadata changes only -- collection metadata changes only - -This is a hard requirement for later Electric resume persistence and query -retention persistence. - -### 6. Define truncate behavior - -Core truncate semantics must be explicit: - -- clear `syncedData` -- clear `syncedMetadata` -- clear any row-scoped staged metadata -- leave collection metadata alone unless a higher layer explicitly resets it - -The core layer should not silently delete collection metadata on truncate. -Per-sync reset behavior can be layered on later. - -### 7. Define row-delete semantics - -Deleting a row through sync also deletes its row metadata. - -This should hold regardless of whether row metadata had previously been staged -through `write({ metadata })` or `metadata.row.set()`. - -### 8. Scope metadata to sync paths - -This metadata API is sync-only. - -It is not intended to flow through user mutation transport types such as -`PersistedMutationEnvelope`. User mutations may still observe `syncMetadata` -coming from already-synced rows, but they do not independently persist metadata -through this API. - -## Edge cases to handle - -- `metadata.row.set()` called before `begin()` -- `metadata.collection.set()` called after `commit()` -- `metadata.collection.get()` called before `begin()` during startup -- `metadata.row.get()` after a staged `row.set()` in the same transaction -- `metadata.collection.list(prefix)` after multiple staged collection writes -- mixing `write({ metadata })` and `metadata.row.set()` for the same key in the - same transaction -- row delete after earlier staged row metadata updates in the same transaction -- truncate followed by new staged row metadata in the same transaction -- empty transaction commit with only metadata writes - -## Acceptance criteria - -- core sync API can stage and commit row metadata -- core sync API can stage and commit collection metadata -- metadata reads inside a transaction see staged writes -- metadata-only commits work -- existing collection behavior without metadata remains unchanged - -## Suggested tests - -- commit row metadata through `write({ metadata })` -- commit row metadata through `metadata.row.set()` -- commit collection metadata through `metadata.collection.set()` -- verify read-your-own-writes inside a transaction -- verify startup reads outside a transaction succeed -- verify last-write-wins for staged row metadata -- verify metadata writes outside a transaction throw -- verify row delete removes row metadata -- verify truncate clears row metadata but not collection metadata -- verify metadata-only transactions commit successfully - -## Exit criteria - -Phase 1 is complete when the core collection layer can represent, stage, commit, -and read metadata correctly in memory, independent of any persistence adapter. diff --git a/persisted-sync-metadata-plan/02-sqlite-implementation.md b/persisted-sync-metadata-plan/02-sqlite-implementation.md deleted file mode 100644 index f922210b3..000000000 --- a/persisted-sync-metadata-plan/02-sqlite-implementation.md +++ /dev/null @@ -1,184 +0,0 @@ -# Phase 2: SQLite Implementation - -## Objective - -Make `db-sqlite-persisted-collection-core` the reference implementation of the -metadata API by persisting: - -- row metadata with row values -- collection metadata in a dedicated table -- row and metadata changes in the same SQLite transaction - -## Primary code areas - -- `packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts` -- `packages/db-sqlite-persisted-collection-core/src/persisted.ts` -- `packages/db-sqlite-persisted-collection-core/tests/persisted.test.ts` -- `packages/db-sqlite-persisted-collection-core/tests/sqlite-core-adapter.test.ts` -- restart/runtime persistence contract tests - -## Proposed implementation steps - -### 1. Extend SQLite schema - -Add: - -- `metadata TEXT` column to persisted collection row tables -- `collection_metadata` table for collection-scoped metadata - -Suggested shape: - -```sql -CREATE TABLE IF NOT EXISTS collection_metadata ( - collection_id TEXT NOT NULL, - key TEXT NOT NULL, - value TEXT NOT NULL, - updated_at INTEGER NOT NULL, - PRIMARY KEY (collection_id, key) -) -``` - -### 2. Extend persisted row hydration - -Update the adapter hydration path to return: - -```ts -type PersistedLoadedRow = { - key: TKey - value: T - metadata?: unknown -} -``` - -The persisted runtime must pass hydrated metadata into the collection sync -transaction, not drop it during `applyRowsToCollection()` or related paths. - -### 3. Extend persisted tx shape - -Update internal persisted tx machinery to support: - -- row value writes -- row metadata writes -- collection metadata writes - -This should be reflected in: - -- normalized sync operation shapes -- buffered sync transactions -- adapter `applyCommittedTx()` -- replay payload classification so the runtime knows when exact targeted replay - is possible and when it must fall back to reload - -### 4. Make metadata transactional in SQLite - -All of these must commit in one SQLite transaction: - -- row inserts/updates/deletes -- row metadata changes -- collection metadata changes -- version/stream position updates already associated with the tx - -This is the key correctness property for the whole design. - -### 5. Load collection metadata at startup - -The persisted runtime should load collection metadata during startup, before new -sync subscriptions start processing. This is necessary for: - -- query placeholder retention decisions -- Electric resume-state restoration -- future collection-scoped metadata consumers - -This should be reflected in the adapter contract explicitly, for example via: - -```ts -loadCollectionMetadata?: ( - collectionId: string, -) => Promise> -``` - -The exact method name is flexible, but startup collection metadata loading must -be a first-class adapter capability. - -### 6. Carry metadata through replay and hydration - -Metadata must not be lost in: - -- initial hydration -- buffered sync transaction application -- internal persisted transaction creation -- self/follower replay -- `pullSince`-style gap recovery - -For the first pass, replay behavior should be explicit: - -- hydration must carry row metadata exactly -- local commit must carry row and collection metadata exactly -- if a committed tx contains metadata changes and the targeted replay protocol - cannot represent them exactly, followers should fall back to reload behavior -- if gap recovery encounters metadata-bearing changes it cannot replay exactly, - recovery should also fall back to reload behavior - -This must be documented in the implementation, not left implicit. - -## Important design constraints - -### Metadata-only committed txs - -The persisted layer must support transactions with: - -- no row mutations -- collection metadata changes only - -This is required for: - -- Electric resume metadata commits -- query retention metadata updates - -### Serialization - -Use the same persisted JSON encoding and decoding path already used for row -values, so metadata can safely round-trip supported value types. - -### Crash-consistency boundary - -The implementation must keep row writes, row metadata writes, and collection -metadata writes inside the same SQLite transaction boundary. - -If any part of the tx fails, all three categories must roll back together. - -## Edge cases to handle - -- metadata-only tx commit -- delete row with row metadata present -- row update with partial row value and metadata merge semantics -- crash/restart between repeated tx applications -- replay of metadata-bearing committed txs to follower tabs -- sequence-gap recovery when metadata changed in a missed tx -- full reload fallback correctness when targeted metadata replay is unavailable -- startup collection metadata load before subscription processing - -## Acceptance criteria - -- persisted rows round-trip metadata -- collection metadata round-trips independently -- row data and metadata commit atomically -- metadata-only committed txs persist correctly -- startup loads collection metadata and hydrated row metadata -- replay/recovery remains correct, even if it uses conservative reload fallback - -## Suggested tests - -- SQLite adapter stores and loads row metadata -- SQLite adapter stores and loads collection metadata -- `applyCommittedTx()` atomically commits row and collection metadata -- metadata-only tx survives restart -- hydrated rows apply metadata into collection state -- follower runtime converges on metadata-bearing txs -- seq-gap recovery remains correct when metadata changed -- startup collection metadata loads before any sync subscription attaches - -## Exit criteria - -Phase 2 is complete when SQLite-backed persisted collections can durably store, -hydrate, and replay metadata with the same transactional guarantees as row data. diff --git a/persisted-sync-metadata-plan/03-query-collection.md b/persisted-sync-metadata-plan/03-query-collection.md deleted file mode 100644 index a467cd6ae..000000000 --- a/persisted-sync-metadata-plan/03-query-collection.md +++ /dev/null @@ -1,270 +0,0 @@ -# Phase 3: Query Collection - -## Objective - -Migrate `query-db-collection` to the new metadata primitives so it can: - -- preserve row ownership across restart -- support persisted query retention independently from in-memory `gcTime` -- support long-lived offline warm starts -- reconcile retained persisted rows when the same query is requested again - -## Primary code areas - -- `packages/query-db-collection/src/query.ts` -- `packages/query-db-collection/src/serialization.ts` -- `packages/query-db-collection/tests/query.test.ts` -- persisted runtime integration tests combining query collection and SQLite - -## High-level design - -### Persisted on rows - -Store per-row ownership in row metadata: - -```ts -type QueryRowMetadata = { - queryCollection?: { - owners: Record - } -} -``` - -### Persisted at collection scope - -Store query retention/placeholder metadata at collection scope. - -Suggested entry shape: - -```ts -type PersistedQueryRetentionEntry = - | { - queryHash: string - mode: 'ttl' - expiresAt: number - } - | { - queryHash: string - mode: 'until-revalidated' - } -``` - -Suggested keys: - -- `queryCollection:gc:` -- optionally `queryCollection:query:` for serialized query identity -- optionally `queryCollection:metaVersion` for query metadata versioning - -## Proposed implementation steps - -### 1. Add persisted retention option to query collection config - -Introduce a durable retention control that is independent from: - -- `staleTime` -- in-memory `gcTime` - -Possible public API shapes: - -```ts -persistedGcTime?: number | typeof Infinity -``` - -or - -```ts -persistedRetention?: { - gcTime: number | typeof Infinity -} -``` - -The second shape is more extensible, but either is acceptable. - -This should be added to the public query collection option types defined in -`packages/query-db-collection/src/query.ts`. - -### 2. Rebuild ownership from hydrated rows - -When rows are hydrated from persistence: - -- inspect row metadata for query owners -- rebuild `rowToQueries` -- rebuild `queryToRows` - -This reconstruction is incremental and subset-scoped. - -### 3. Keep refcounts in memory only - -Do not persist `queryRefCounts`. - -They represent live subscriber/process state and should restart from zero. - -### 4. Persist ownership changes transactionally - -Whenever ownership changes for a row: - -- update row metadata in the same sync transaction - -This includes metadata-only ownership changes where the row value itself is -unchanged. - -### 5. Persist query retention state - -When a query becomes inactive: - -- if persisted retention is finite, persist `mode: 'ttl'` with `expiresAt` -- if persisted retention is infinite, persist `mode: 'until-revalidated'` - -This retention entry is independent from in-memory query `gcTime`. - -### 6. Startup retention handling - -At startup: - -- load collection metadata retention entries before new subscriptions attach -- clean up expired `ttl` placeholders -- skip startup GC for `until-revalidated` placeholders - -Startup retention cleanup must run under the same mutex or startup critical -section as hydration and replay to avoid races with new query subscriptions. - -### 7. Explicit cold-row cleanup strategy for expired TTL placeholders - -Phase 3 must define a concrete cold-row cleanup path for on-demand mode. - -For the initial Level 1 implementation, that path should be one of: - -- adapter-driven full scan of persisted rows with non-null row metadata, or -- denormalized owned row keys stored on the retention entry itself - -The implementation must choose one and document it. Startup cleanup cannot be -left as an abstract promise if expired placeholders may own rows that are not -currently hydrated. - -If the first implementation uses the scan-based path, it should do all of the -following under the same startup mutex: - -1. find rows owned by the expired placeholder -2. remove the placeholder from each row's owner set -3. delete rows whose owner set becomes empty -4. delete the placeholder retention entry - -### 8. Revalidation flow for indefinite persisted retention - -When a query retained with `mode: 'until-revalidated'` is requested again: - -1. match the placeholder by canonical query identity -2. use persisted ownership as the baseline -3. run the query -4. diff server results against previously owned rows -5. remove rows that are no longer owned -6. clear or refresh the retention entry based on the new lifecycle state - -This is the key behavior required for long offline periods. - -This revalidation baseline is required for correctness. The implementation must -not continue to diff only against all rows in `collection._state.syncedData`, -because that would preserve the warm-start deletion bug this phase is intended -to fix. - -In on-demand mode, if the previously owned rows are not all hydrated in memory, -the implementation must obtain the baseline from persisted ownership data -directly, either via: - -- row metadata scan / lookup, or -- denormalized owned row keys on the retention entry, or -- a future normalized ownership index - -### 9. Use query-owned baseline for reconciliation - -When reconciling a query after restart or revalidation, diff against: - -- the rows previously owned by the specific query - -This is not an optional improvement. It is the required reconciliation model for -Phase 3. - -## Important design constraints - -### Persisted retention is not freshness - -Long-lived persisted data may be very stale. - -That is acceptable as long as: - -- re-requesting the query still follows normal query refetch behavior -- persisted retention does not imply anything about `staleTime` - -### Infinite persisted retention needs explicit eviction eventually - -If `persistedGcTime: Infinity` or `mode: 'until-revalidated'` is supported, -storage can grow without bound. This phase does not need to ship explicit -eviction APIs, but the design should leave room for: - -- evict one query placeholder -- evict all query placeholders for a collection -- evict by age or storage-pressure policy - -### Runtime TTL expiry needs explicit policy - -Finite persisted retention should not only be handled on restart. - -When a `ttl` placeholder expires while the app remains running, the runtime -should schedule the same cleanup flow that startup cleanup would perform: - -1. locate the rows owned by the placeholder -2. remove the placeholder from those rows -3. delete orphaned rows -4. remove the retention entry - -This runtime TTL cleanup should run under the same mutex used for startup -cleanup and query revalidation. - -### Versioning matters - -If query identity hashing or serialization changes across app versions, retained -placeholders may become unreachable. - -The implementation should leave room for: - -- metadata versioning -- collection-level invalidation of incompatible retained placeholders - -## Edge cases to handle - -- multiple overlapping queries owning the same row -- query unsubscribes and resubscribes before persisted retention cleanup runs -- query retained indefinitely while another query updates shared rows -- startup with only a subset of rows hydrated in on-demand mode -- expired `ttl` placeholder owning only cold rows in on-demand mode -- placeholder exists but the same query is never requested again -- query identity serialization changes across versions -- metadata-only ownership updates with unchanged row values -- rows retained indefinitely while offline for a long period - -## Acceptance criteria - -- restart does not incorrectly delete persisted rows before ownership is restored -- row ownership survives restart -- query retention is persisted independently from `gcTime` -- `until-revalidated` retention keeps persisted rows available indefinitely -- re-requesting a retained query reconciles the retained rows correctly - -## Suggested tests - -- warm-start with multiple disjoint queries does not drop unrelated rows -- overlapping queries preserve shared row ownership across restart -- finite persisted retention expires and cleans up orphaned rows -- finite persisted retention expires while the app remains running -- indefinite persisted retention survives restart and long offline gaps -- re-requesting an indefinite retained query reconciles deleted rows correctly -- in-memory `gcTime` expiry does not remove indefinitely retained persisted rows -- on-demand hydration reconstructs ownership for loaded subsets -- on-demand expired-placeholder cleanup handles cold rows correctly -- metadata-only ownership updates persist correctly - -## Exit criteria - -Phase 3 is complete when query collections can warm-start safely from persisted -data, preserve ownership across restart, and independently control durable query -retention for offline-first users. diff --git a/persisted-sync-metadata-plan/04-electric-collection.md b/persisted-sync-metadata-plan/04-electric-collection.md deleted file mode 100644 index 7586e4a8e..000000000 --- a/persisted-sync-metadata-plan/04-electric-collection.md +++ /dev/null @@ -1,174 +0,0 @@ -# Phase 4: Electric Collection - -## Objective - -Migrate `electric-db-collection` to use transactional collection metadata and -row metadata so it can: - -- persist durable resume state -- warm-start from persisted rows safely -- resume streaming from a persisted stream identity when valid -- leave room for future persistence of additional Electric-derived state - -## Primary code areas - -- `packages/electric-db-collection/src/electric.ts` -- `packages/electric-db-collection/tests/electric.test.ts` -- `packages/electric-db-collection/tests/electric-live-query.test.ts` -- persisted integration tests combining Electric and SQLite persistence - -## High-level design - -### Collection metadata - -Persist Electric resume state at collection scope. - -Suggested shape: - -```ts -type ElectricResumeMetadata = - | { - kind: 'resume' - offset: string - handle: string - shapeId: string - updatedAt: number - } - | { - kind: 'reset' - updatedAt: number - } -``` - -Suggested key: - -- `electric:resume` - -### Row metadata - -Persist useful per-row sync metadata through the same row metadata channel used -by `write({ metadata })`. - -Examples: - -- relation identity -- row sync headers that are useful after hydration - -## Proposed implementation steps - -### 1. Read resume metadata at startup - -On sync initialization: - -- read `electric:resume` from collection metadata -- if `kind: 'resume'`, prefer that persisted stream identity over the current - fallback behavior -- if resume metadata is absent or invalid, fall back to the existing startup - behavior - -### 2. Persist resume state transactionally - -When an Electric batch advances the durable resume point: - -- stage the new `electric:resume` metadata in the same sync transaction as the - row changes from that batch - -This prevents the invalid state where a resume token advances beyond the rows -that were actually committed. - -### 3. Support metadata-only resume updates when needed - -If Electric needs to persist a new durable resume state on a control-message -boundary without a row mutation in the same batch, use a metadata-only sync -transaction. - -This depends on Phase 1 and Phase 2 support for metadata-only commits. - -### 4. Define reset behavior - -When Electric determines the persisted resume state is invalid or a must-refetch -equivalent restart path is required: - -- clear or replace `electric:resume` with a `kind: 'reset'` marker -- perform the corresponding conservative reload path - -This makes restart behavior explicit rather than relying on stale resume state. - -Ordering requirement: - -- write the `kind: 'reset'` marker before starting the refetch/reload path, - using a metadata-only transaction if needed - -That way, if the app crashes during refetch, restart will not attempt to resume -from stale persisted stream state. - -### 5. Carry row metadata through hydration - -Hydrated rows from SQLite should restore the Electric row metadata that was -originally written through `write({ metadata })`. - -This provides a better baseline for future Electric restart reconstruction work. - -## Important design constraints - -### Resume metadata is not the full Electric state - -Electric also maintains derived in-memory state such as: - -- tag indexes -- synced key tracking -- snapshot and txid matching state - -This phase does not require exact restart reconstruction of every one of these. -It only requires a sound transactional place to persist the pieces that should -survive restart. - -### Be conservative when reconstruction is incomplete - -If persisted resume metadata is present but the required derived state is not -reconstructible safely, Electric should fall back to a conservative reload path -rather than assume exact restart correctness. - -### Strong stream identity matters - -Resume metadata should persist enough identity to detect incompatible resume -state, not just an offset. - -At minimum: - -- `offset` -- `handle` -- `shapeId` - -## Edge cases to handle - -- persisted resume metadata missing one required field -- resume metadata exists but shape identity no longer matches server state -- metadata-only resume update -- restart after partially applied or replayed batches -- must-refetch/reset flows clearing or replacing persisted resume state -- hydrated rows restoring row metadata while resume metadata is absent - -## Acceptance criteria - -- Electric resume state survives restart -- resume metadata only advances when the corresponding batch commits -- invalid resume metadata triggers conservative fallback -- metadata-only resume commits work -- persisted row metadata survives hydration where relevant - -## Suggested tests - -- batch commit persists rows and resume metadata atomically -- failed batch does not advance resume metadata -- restart uses persisted resume metadata when valid -- restart falls back safely when persisted resume metadata is invalid -- metadata-only resume tx survives restart -- must-refetch/reset clears or invalidates persisted resume state correctly -- row metadata written by Electric survives SQLite hydration - -## Exit criteria - -Phase 4 is complete when Electric has a durable, transactional resume-state -story that is compatible with persisted warm starts and conservative fallback -behavior. diff --git a/persisted-sync-metadata-plan/05-test-plan.md b/persisted-sync-metadata-plan/05-test-plan.md deleted file mode 100644 index 9dc49fc0e..000000000 --- a/persisted-sync-metadata-plan/05-test-plan.md +++ /dev/null @@ -1,229 +0,0 @@ -# Phase 5: Test Plan - -## Objective - -Validate the persisted sync metadata design with invariants-focused tests across: - -- core collection state -- SQLite persistence -- query collection restart and retention behavior -- Electric resume behavior - -This plan is intentionally thorough. The feature crosses multiple layers and is -easy to get "mostly working" while still breaking on restart, replay, or long -offline gaps. - -## Testing principles - -- prefer behavior/invariant tests over implementation-detail tests -- add restart tests wherever durable state is introduced -- add crash-consistency style tests wherever atomicity is claimed -- test both eager and on-demand flows where behavior differs -- test replay/recovery paths, not just happy-path startup - -## Invariants - -### Core invariants - -- metadata that is staged in a sync transaction is visible to reads in that same - transaction -- metadata is committed iff the surrounding sync transaction commits -- metadata-only transactions are valid committed sync transactions -- row metadata and collection metadata are isolated but share the same commit - boundary -- truncate clears row metadata but does not silently clear collection metadata -- startup reads of persisted metadata are allowed outside a transaction - -### SQLite invariants - -- row values and row metadata are committed atomically -- collection metadata commits atomically with the same persisted tx -- hydrated rows restore both value and metadata - -### Query collection invariants - -- warm-start does not delete unrelated persisted rows before ownership is - reconstructed -- row ownership survives restart -- query placeholder retention survives restart -- finite persisted retention expires correctly -- indefinite persisted retention does not expire due to in-memory `gcTime` -- re-requesting an indefinitely retained query reconciles retained rows -- retained rows may be stale, but they remain available until revalidation or - explicit cleanup - -### Electric invariants - -- resume metadata advances iff the corresponding batch commits -- invalid resume metadata does not cause unsafe resume behavior -- metadata-only resume updates are persisted -- restart can use persisted resume metadata when valid - -### Replay and recovery invariants - -- follower tabs converge on metadata-bearing tx behavior -- sequence-gap recovery remains correct when metadata changed -- conservative reload fallback remains correct when targeted metadata replay is - unavailable - -## Test matrix - -### Core API tests - -Target files: - -- `packages/db/tests/collection.test.ts` -- additional focused tests if needed - -Cases: - -- `metadata.row.set()` inside a transaction -- `metadata.collection.set()` inside a transaction -- `metadata.collection.get()` outside a transaction during startup -- read-your-own-writes for row metadata -- read-your-own-writes for collection metadata -- metadata-only commit -- metadata writes outside a transaction throw -- `write({ metadata })` and `metadata.row.set()` on the same row in one tx -- truncate behavior with row metadata present - -### SQLite adapter and runtime tests - -Target files: - -- `packages/db-sqlite-persisted-collection-core/tests/sqlite-core-adapter.test.ts` -- `packages/db-sqlite-persisted-collection-core/tests/persisted.test.ts` -- runtime persistence contract tests - -Cases: - -- row metadata persists and hydrates -- collection metadata persists and loads -- metadata-only tx survives restart -- row delete removes row metadata -- metadata-bearing tx replay correctness -- sequence-gap recovery with metadata changes - -### Query collection integration tests - -Target files: - -- `packages/query-db-collection/tests/query.test.ts` -- new persisted integration tests as needed - -Cases: - -- multiple disjoint queries warm-start without deleting each other's rows -- overlapping queries preserve shared ownership across restart -- persisted ownership reconstruction in eager mode -- persisted ownership reconstruction in on-demand mode for loaded subsets -- finite persisted retention expiry -- finite persisted retention expiry while the app remains running -- `persistedGcTime: Infinity` or equivalent indefinite retention -- in-memory `gcTime` expiry does not remove indefinitely retained persisted rows -- re-requesting an indefinitely retained query reconciles stale/deleted rows -- query reconciliation diffs against the query-owned baseline, not the whole - collection -- expired placeholder cleanup handles cold rows in on-demand mode -- query identity version mismatch / incompatible retained metadata fallback - -### Electric integration tests - -Target files: - -- `packages/electric-db-collection/tests/electric.test.ts` -- `packages/electric-db-collection/tests/electric-live-query.test.ts` -- new persisted integration tests as needed - -Cases: - -- commit rows + resume metadata atomically -- failed commit does not advance resume metadata -- metadata-only resume transaction -- valid resume metadata used on restart -- invalid resume metadata triggers conservative fallback -- reset/must-refetch clears or invalidates resume metadata -- row metadata survives SQLite hydration - -## Suggested delivery cadence - -### While implementing Phase 1 - -Add: - -- core transaction semantics tests -- metadata-only transaction tests - -### While implementing Phase 2 - -Add: - -- SQLite schema and hydration tests -- adapter atomicity tests -- runtime restart tests -- transaction-boundary tests that prove row data, row metadata, and collection - metadata share the same SQLite commit/rollback boundary - -### While implementing Phase 3 - -Add: - -- query ownership restart tests -- finite retention tests -- indefinite retention tests -- long-offline warm-start tests -- on-demand cold-row cleanup tests -- runtime TTL expiry tests - -### While implementing Phase 4 - -Add: - -- resume metadata tests -- metadata-only resume tests -- invalid resume fallback tests - -## Failure modes the tests must catch - -- persisted rows exist but metadata is missing after restart -- metadata exists but corresponding rows were not committed -- query warm-start deletes rows it does not own -- rows retained indefinitely disappear because in-memory GC elapsed -- startup GC races with new subscriptions -- follower runtimes diverge because metadata-bearing txs were not replayed -- Electric resumes from a token that was never durably committed - -## Crash-consistency testing approach - -Where atomicity is claimed, tests should verify transaction boundaries rather -than merely assume SQLite atomicity. - -Suggested approach: - -- use a driver or adapter double that records transaction boundaries -- force failures after some writes have been staged but before commit completes -- verify row values, row metadata, and collection metadata all roll back - together - -This is especially important for `applyCommittedTx()` and any metadata-only tx -paths. - -## Performance regression checks - -Add lightweight regression coverage for: - -- row hydration with metadata present -- row writes with metadata absent -- row writes with metadata present - -These do not need to be strict benchmarks, but they should catch obvious -accidental regressions caused by metadata serialization or replay changes. - -## Definition of done - -This plan is complete when: - -- each phase ships with the tests listed for that phase -- restart, replay, and retention invariants are covered -- the long-offline persisted query use case is explicitly validated -- metadata atomicity is tested, not just assumed diff --git a/persisted-sync-metadata-plan/README.md b/persisted-sync-metadata-plan/README.md deleted file mode 100644 index 10c99ec74..000000000 --- a/persisted-sync-metadata-plan/README.md +++ /dev/null @@ -1,51 +0,0 @@ -# Persisted Sync Metadata Plan - -This directory breaks the `RFC-persisted-sync-metadata.md` design into an -implementation plan with explicit phases. - -The recommended execution order is: - -1. `01-core-api.md` -2. `02-sqlite-implementation.md` -3. `03-query-collection.md` -4. `04-electric-collection.md` -5. `05-test-plan.md` - -## Goals - -- land the core metadata transaction model first -- make SQLite the reference persistence implementation -- migrate `query-db-collection` onto the new primitives -- migrate `electric-db-collection` onto the new primitives -- validate correctness with thorough invariants-focused tests - -## Non-Goals - -- optimizing every replay and GC path in the first pass -- implementing every possible metadata-backed feature before the core API is - stable - -## Guiding principles - -- metadata that affects persisted row behavior must commit with the row state it - explains -- row metadata and collection metadata are distinct scopes -- metadata-only sync transactions are first-class -- restart correctness comes before targeted replay optimization -- persisted query retention is separate from in-memory `gcTime` - -## Phase dependencies - -- Phase 1 is required before any other phase -- Phase 2 depends on Phase 1 -- Phase 3 depends on Phases 1 and 2 -- Phase 4 depends on Phases 1 and 2 -- Phase 5 spans all phases and should be updated continuously - -## Recommended delivery strategy - -- implement Phase 1 and Phase 2 behind a narrow internal API -- land Phase 3 next because it is the primary motivator -- land Phase 4 once the core metadata model has proven stable under restart and - replay tests -- keep `05-test-plan.md` as the definition of done for each phase From 1959807ce781018e3a75eb910c076897d218e285 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Tue, 17 Mar 2026 19:53:21 +0000 Subject: [PATCH 08/12] fix: tighten persisted sync metadata restart behavior Finish the core persisted metadata follow-through so reloads, retained query ownership, and Electric resume/reset state behave correctly across startup and recovery while clarifying metadata semantics around inserts and cleanup. Made-with: Cursor --- .../src/persisted.ts | 257 +++++++++--- .../src/sqlite-core-adapter.ts | 5 + .../tests/persisted.test.ts | 328 +++++++++++++++ .../tests/sqlite-core-adapter.test.ts | 100 +++++ packages/db/src/collection/sync.ts | 19 +- packages/db/tests/collection.test.ts | 97 ++++- .../electric-db-collection/src/electric.ts | 76 +++- .../tests/electric.test.ts | 393 ++++++++++++++++++ packages/query-db-collection/src/query.ts | 191 +++++++-- .../query-db-collection/tests/query.test.ts | 268 +++++++++++- 10 files changed, 1609 insertions(+), 125 deletions(-) diff --git a/packages/db-sqlite-persisted-collection-core/src/persisted.ts b/packages/db-sqlite-persisted-collection-core/src/persisted.ts index c9b326152..2952e56ea 100644 --- a/packages/db-sqlite-persisted-collection-core/src/persisted.ts +++ b/packages/db-sqlite-persisted-collection-core/src/persisted.ts @@ -179,6 +179,7 @@ export type PersistedTx< term: number seq: number rowVersion: number + truncate?: boolean mutations: Array< | { type: `insert` @@ -772,6 +773,7 @@ class PersistedCollectionRuntime< metadata: null, } private started = false + private startupMetadataPromise: Promise | null = null private startPromise: Promise | null = null private internalApplyDepth = 0 private isHydrating = false @@ -845,6 +847,15 @@ class PersistedCollectionRuntime< return this.startPromise } + async ensureStartupMetadataLoaded(): Promise { + if (this.startupMetadataPromise) { + return this.startupMetadataPromise + } + + this.startupMetadataPromise = this.loadStartupMetadataInternal() + return this.startupMetadataPromise + } + private async startInternal(): Promise { if (this.started) { return @@ -852,6 +863,21 @@ class PersistedCollectionRuntime< this.started = true + await this.ensureStartupMetadataLoaded() + + const indexBootstrapSnapshot = this.collection?.getIndexMetadata() ?? [] + this.attachIndexLifecycleListeners() + await this.bootstrapPersistedIndexes(indexBootstrapSnapshot) + + if (this.syncMode !== `on-demand`) { + this.activeSubsets.set(this.getSubsetKey({}), {}) + await this.applyMutex.run(() => + this.hydrateSubsetUnsafe({}, { requestRemoteEnsure: false }), + ) + } + } + + private async loadStartupMetadataInternal(): Promise { // Restore stream position from the database so that new mutations // don't collide with previously applied transactions. if (this.persistence.adapter.getStreamPosition) { @@ -866,22 +892,27 @@ class PersistedCollectionRuntime< } await this.loadCollectionMetadataIntoCollection() + } - const indexBootstrapSnapshot = this.collection?.getIndexMetadata() ?? [] - this.attachIndexLifecycleListeners() - await this.bootstrapPersistedIndexes(indexBootstrapSnapshot) + private async loadCollectionMetadataIntoCollection(): Promise { + const collectionMetadata = await this.loadCollectionMetadataSnapshot() + this.replaceCollectionMetadataSnapshot(collectionMetadata) + } - if (this.syncMode !== `on-demand`) { - this.activeSubsets.set(this.getSubsetKey({}), {}) - await this.applyMutex.run(() => - this.hydrateSubsetUnsafe({}, { requestRemoteEnsure: false }), - ) + private async loadCollectionMetadataSnapshot(): Promise< + Array<{ key: string; value: unknown }> + > { + if (!this.persistence.adapter.loadCollectionMetadata) { + return [] } + + return this.persistence.adapter.loadCollectionMetadata(this.collectionId) } - private async loadCollectionMetadataIntoCollection(): Promise { + private replaceCollectionMetadataSnapshot( + collectionMetadata: Array<{ key: string; value: unknown }>, + ): void { if ( - !this.persistence.adapter.loadCollectionMetadata || !this.syncControls.begin || !this.syncControls.commit || !this.syncControls.metadata @@ -889,18 +920,26 @@ class PersistedCollectionRuntime< return } - const collectionMetadata = - await this.persistence.adapter.loadCollectionMetadata(this.collectionId) - - if (collectionMetadata.length === 0) { - return - } + const nextMetadata = new Map( + collectionMetadata.map(({ key, value }) => [key, value]), + ) + const currentKeys = this.syncControls.metadata.collection + .list() + .map(({ key }) => key) this.withInternalApply(() => { this.syncControls.begin?.({ immediate: true }) - collectionMetadata.forEach(({ key, value }) => { + + currentKeys.forEach((key) => { + if (!nextMetadata.has(key)) { + this.syncControls.metadata?.collection.delete(key) + } + }) + + nextMetadata.forEach((value, key) => { this.syncControls.metadata?.collection.set(key, value) }) + this.syncControls.commit?.() }) } @@ -1163,17 +1202,26 @@ class PersistedCollectionRuntime< }) } - private replaceCollectionRows( + private replaceCollectionSnapshot( rows: Array<{ key: TKey; value: T; metadata?: unknown }>, + collectionMetadata: Array<{ key: string; value: unknown }>, ): void { if ( !this.syncControls.begin || !this.syncControls.write || - !this.syncControls.commit + !this.syncControls.commit || + !this.syncControls.metadata ) { return } + const nextMetadata = new Map( + collectionMetadata.map(({ key, value }) => [key, value]), + ) + const currentKeys = this.syncControls.metadata.collection + .list() + .map(({ key }) => key) + this.withInternalApply(() => { this.syncControls.begin?.({ immediate: true }) this.syncControls.truncate?.() @@ -1186,6 +1234,16 @@ class PersistedCollectionRuntime< }) } + currentKeys.forEach((key) => { + if (!nextMetadata.has(key)) { + this.syncControls.metadata?.collection.delete(key) + } + }) + + nextMetadata.forEach((value, key) => { + this.syncControls.metadata?.collection.set(key, value) + }) + this.syncControls.commit?.() }) } @@ -1271,10 +1329,10 @@ class PersistedCollectionRuntime< const streamPosition = this.nextLocalStreamPosition() if ( - transaction.truncate || - (transaction.operations.length === 0 && - transaction.rowMetadataWrites.size === 0 && - transaction.collectionMetadataWrites.size === 0) + !transaction.truncate && + transaction.operations.length === 0 && + transaction.rowMetadataWrites.size === 0 && + transaction.collectionMetadataWrites.size === 0 ) { this.publishTxCommittedEvent( this.createTxCommittedPayload({ @@ -1302,6 +1360,7 @@ class PersistedCollectionRuntime< hasMetadataChanges: transaction.rowMetadataWrites.size > 0 || transaction.collectionMetadataWrites.size > 0, + requiresFullReload: transaction.truncate, changedRows: transaction.operations .filter((operation) => operation.type === `update`) .map((operation) => ({ key: operation.key, value: operation.value })), @@ -1321,14 +1380,13 @@ class PersistedCollectionRuntime< term: streamPosition.term, seq: streamPosition.seq, rowVersion: streamPosition.rowVersion, + truncate: transaction.truncate, mutations: transaction.operations.map((operation) => operation.type === `update` ? { type: `update`, key: operation.key, value: operation.value, - metadata: operation.metadata, - metadataChanged: operation.metadata !== undefined, } : { type: `delete`, @@ -1909,19 +1967,25 @@ class PersistedCollectionRuntime< this.isHydrating = true try { - const mergedRows = new Map() + const mergedRows = new Map() + const collectionMetadata = await this.loadCollectionMetadataSnapshot() for (const options of activeSubsetOptions) { const subsetRows = await this.loadSubsetRowsUnsafe(options) for (const row of subsetRows) { - mergedRows.set(row.key, row.value) + mergedRows.set(row.key, { + value: row.value, + metadata: row.metadata, + }) } } - this.replaceCollectionRows( - Array.from(mergedRows.entries()).map(([key, value]) => ({ + this.replaceCollectionSnapshot( + Array.from(mergedRows.entries()).map(([key, row]) => ({ key, - value, + value: row.value, + metadata: row.metadata, })), + collectionMetadata, ) } finally { this.isHydrating = false @@ -2037,6 +2101,9 @@ function createWrappedSyncConfig< ...sourceSyncConfig, sync: (params) => { const transactionStack: Array> = [] + const getOpenTransaction = () => transactionStack[transactionStack.length - 1] + let fullStartPromise: Promise | null = null + const cancelledLoads = new WeakSet() runtime.setSyncControls({ begin: params.begin, write: params.write as SyncControlFns[`write`], @@ -2051,18 +2118,9 @@ function createWrappedSyncConfig< const wrappedParams = { ...params, markReady: () => { - void runtime - .ensureStarted() - .then(() => { - params.markReady() - }) - .catch((error) => { - console.warn( - `Failed persisted sync startup before markReady:`, - error, - ) - params.markReady() - }) + void (fullStartPromise ?? runtime.ensureStarted()).then(() => { + params.markReady() + }) }, begin: (options?: { immediate?: boolean }) => { const transaction: OpenSyncTransaction = { @@ -2082,7 +2140,7 @@ function createWrappedSyncConfig< }, write: (message: ChangeMessageOrDeleteKeyMessage) => { const normalization = runtime.normalizeSyncWriteMessage(message) - const openTransaction = transactionStack[transactionStack.length - 1] + const openTransaction = getOpenTransaction() if (!openTransaction) { params.write(normalization.forwardMessage) @@ -2094,6 +2152,13 @@ function createWrappedSyncConfig< openTransaction.rowMetadataWrites.set(normalization.operation.key, { type: `delete`, }) + } else if ( + message.type === `insert` && + normalization.operation.metadata === undefined + ) { + openTransaction.rowMetadataWrites.set(normalization.operation.key, { + type: `delete`, + }) } else if (normalization.operation.metadata !== undefined) { openTransaction.rowMetadataWrites.set(normalization.operation.key, { type: `set`, @@ -2107,10 +2172,21 @@ function createWrappedSyncConfig< metadata: params.metadata ? { row: { - get: (key: TKey) => params.metadata!.row.get(key), + get: (key: TKey) => { + const openTransaction = getOpenTransaction() + const pendingWrite = openTransaction?.rowMetadataWrites.get(key) + if (pendingWrite) { + return pendingWrite.type === `delete` + ? undefined + : pendingWrite.value + } + if (openTransaction?.truncate) { + return undefined + } + return params.metadata!.row.get(key) + }, set: (key: TKey, value: unknown) => { - const openTransaction = - transactionStack[transactionStack.length - 1] + const openTransaction = getOpenTransaction() if (!openTransaction) { throw new InvalidPersistedCollectionConfigError( `metadata.row.set must be called within an open sync transaction`, @@ -2125,8 +2201,7 @@ function createWrappedSyncConfig< } }, delete: (key: TKey) => { - const openTransaction = - transactionStack[transactionStack.length - 1] + const openTransaction = getOpenTransaction() if (!openTransaction) { throw new InvalidPersistedCollectionConfigError( `metadata.row.delete must be called within an open sync transaction`, @@ -2141,10 +2216,19 @@ function createWrappedSyncConfig< }, }, collection: { - get: (key: string) => params.metadata!.collection.get(key), + get: (key: string) => { + const openTransaction = getOpenTransaction() + const pendingWrite = + openTransaction?.collectionMetadataWrites.get(key) + if (pendingWrite) { + return pendingWrite.type === `delete` + ? undefined + : pendingWrite.value + } + return params.metadata!.collection.get(key) + }, set: (key: string, value: unknown) => { - const openTransaction = - transactionStack[transactionStack.length - 1] + const openTransaction = getOpenTransaction() if (!openTransaction) { throw new InvalidPersistedCollectionConfigError( `metadata.collection.set must be called within an open sync transaction`, @@ -2159,8 +2243,7 @@ function createWrappedSyncConfig< } }, delete: (key: string) => { - const openTransaction = - transactionStack[transactionStack.length - 1] + const openTransaction = getOpenTransaction() if (!openTransaction) { throw new InvalidPersistedCollectionConfigError( `metadata.collection.delete must be called within an open sync transaction`, @@ -2173,18 +2256,45 @@ function createWrappedSyncConfig< params.metadata!.collection.delete(key) } }, - list: (prefix?: string) => - params.metadata!.collection.list(prefix), + list: (prefix?: string) => { + const merged = new Map( + params.metadata!.collection + .list() + .map(({ key, value }) => [key, value]), + ) + const openTransaction = getOpenTransaction() + if (openTransaction) { + for (const [ + key, + metadataWrite, + ] of openTransaction.collectionMetadataWrites) { + if (metadataWrite.type === `delete`) { + merged.delete(key) + } else { + merged.set(key, metadataWrite.value) + } + } + } + + return Array.from(merged.entries()) + .filter(([key]) => (prefix ? key.startsWith(prefix) : true)) + .map(([key, value]) => ({ + key, + value, + })) + }, }, } : undefined, truncate: () => { - const openTransaction = transactionStack[transactionStack.length - 1] + const openTransaction = getOpenTransaction() if (!openTransaction) { params.truncate() return } + openTransaction.operations = [] + openTransaction.rowMetadataWrites.clear() openTransaction.truncate = true if (!openTransaction.queuedBecauseHydrating) { params.truncate() @@ -2230,21 +2340,40 @@ function createWrappedSyncConfig< }, } - const sourceResult = normalizeSyncFnResult( - sourceSyncConfig.sync(wrappedParams), - ) - void runtime.ensureStarted() + let sourceResult: SyncConfigRes = {} + const startupState = { cleanedUp: false } + fullStartPromise = runtime.ensureStarted() + const sourceResultPromise = (async () => { + await runtime.ensureStartupMetadataLoaded() + + if (startupState.cleanedUp) { + return sourceResult + } + + sourceResult = normalizeSyncFnResult(sourceSyncConfig.sync(wrappedParams)) + return sourceResult + })() return { cleanup: () => { + startupState.cleanedUp = true sourceResult.cleanup?.() runtime.cleanup() runtime.clearSyncControls() }, - loadSubset: (options: LoadSubsetOptions) => - runtime.loadSubset(options, sourceResult.loadSubset), - unloadSubset: (options: LoadSubsetOptions) => - runtime.unloadSubset(options, sourceResult.unloadSubset), + loadSubset: async (options: LoadSubsetOptions) => { + cancelledLoads.delete(options as object) + await fullStartPromise + const resolvedSourceResult = await sourceResultPromise + if (startupState.cleanedUp || cancelledLoads.has(options as object)) { + return + } + await runtime.loadSubset(options, resolvedSourceResult.loadSubset) + }, + unloadSubset: (options: LoadSubsetOptions) => { + cancelledLoads.add(options as object) + runtime.unloadSubset(options, sourceResult.unloadSubset) + }, } }, } diff --git a/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts b/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts index e3faa084e..92582e0eb 100644 --- a/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts +++ b/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts @@ -1123,6 +1123,11 @@ export class SQLiteCorePersistenceAdapter< const currentRowVersion = versionRows[0]?.latest_row_version ?? 0 const nextRowVersion = Math.max(currentRowVersion + 1, tx.rowVersion) + if (tx.truncate) { + await transactionDriver.run(`DELETE FROM ${collectionTableSql}`) + await transactionDriver.run(`DELETE FROM ${tombstoneTableSql}`) + } + for (const mutation of tx.mutations) { const encodedKey = encodePersistedStorageKey(mutation.key) if (mutation.type === `delete`) { diff --git a/packages/db-sqlite-persisted-collection-core/tests/persisted.test.ts b/packages/db-sqlite-persisted-collection-core/tests/persisted.test.ts index 9ce212fc7..6b96fe90b 100644 --- a/packages/db-sqlite-persisted-collection-core/tests/persisted.test.ts +++ b/packages/db-sqlite-persisted-collection-core/tests/persisted.test.ts @@ -34,6 +34,7 @@ type RecordingAdapter = PersistenceAdapter & { term: number seq: number rowVersion: number + truncate?: boolean mutations: Array<{ type: `insert` | `update` | `delete`; key: string }> } }> @@ -46,6 +47,7 @@ type RecordingAdapter = PersistenceAdapter & { }> loadCollectionMetadataCalls: Array rows: Map + rowMetadata: Map collectionMetadata: Map } @@ -53,9 +55,11 @@ function createRecordingAdapter( initialRows: Array = [], ): RecordingAdapter { const rows = new Map(initialRows.map((row) => [row.id, row])) + const rowMetadata = new Map() const adapter: RecordingAdapter = { rows, + rowMetadata, collectionMetadata: new Map(), applyCommittedTxCalls: [], ensureIndexCalls: [], @@ -72,6 +76,7 @@ function createRecordingAdapter( Array.from(rows.values()).map((value) => ({ key: value.id, value, + metadata: rowMetadata.get(value.id), })), ) }, @@ -93,6 +98,7 @@ function createRecordingAdapter( term: tx.term, seq: tx.seq, rowVersion: tx.rowVersion, + truncate: tx.truncate, mutations: tx.mutations.map((mutation) => ({ type: mutation.type, key: mutation.key, @@ -100,11 +106,27 @@ function createRecordingAdapter( }, }) + if (tx.truncate) { + rows.clear() + rowMetadata.clear() + } + for (const mutation of tx.mutations) { if (mutation.type === `delete`) { rows.delete(mutation.key) + rowMetadata.delete(mutation.key) } else { rows.set(mutation.key, mutation.value) + if (mutation.metadataChanged) { + rowMetadata.set(mutation.key, mutation.metadata) + } + } + } + for (const rowMetadataMutation of tx.rowMetadataMutations ?? []) { + if (rowMetadataMutation.type === `delete`) { + rowMetadata.delete(rowMetadataMutation.key) + } else { + rowMetadata.set(rowMetadataMutation.key, rowMetadataMutation.value) } } for (const metadataMutation of tx.collectionMetadataMutations ?? []) { @@ -318,6 +340,128 @@ describe(`persistedCollectionOptions`, () => { }) }) + it(`restores row and collection metadata after metadata-bearing full reload`, async () => { + const adapter = createRecordingAdapter([ + { + id: `1`, + title: `Tracked`, + }, + ]) + adapter.rowMetadata.set(`1`, { + source: `initial`, + }) + adapter.collectionMetadata.set(`electric:resume`, { + kind: `resume`, + offset: `10_0`, + handle: `handle-1`, + shapeId: `shape-1`, + updatedAt: 1, + }) + const coordinator = createCoordinatorHarness() + + const collection = createCollection( + persistedCollectionOptions({ + id: `sync-present`, + getKey: (item) => item.id, + sync: { + sync: ({ markReady }) => { + markReady() + }, + }, + persistence: { + adapter, + coordinator, + }, + }), + ) + + await collection.preload() + await flushAsyncWork() + + expect(collection._state.syncedMetadata.get(`1`)).toEqual({ + source: `initial`, + }) + expect( + collection._state.syncedCollectionMetadata.get(`electric:resume`), + ).toEqual({ + kind: `resume`, + offset: `10_0`, + handle: `handle-1`, + shapeId: `shape-1`, + updatedAt: 1, + }) + + adapter.rowMetadata.set(`1`, { + source: `reloaded`, + }) + adapter.collectionMetadata.delete(`electric:resume`) + adapter.collectionMetadata.set(`queryCollection:gc:q1`, { + queryHash: `q1`, + mode: `until-revalidated`, + }) + + coordinator.emit({ + type: `tx:committed`, + term: 1, + seq: 1, + txId: `tx-reload`, + latestRowVersion: 1, + requiresFullReload: true, + }) + + await flushAsyncWork() + await flushAsyncWork() + + expect(collection._state.syncedMetadata.get(`1`)).toEqual({ + source: `reloaded`, + }) + expect( + collection._state.syncedCollectionMetadata.has(`electric:resume`), + ).toBe(false) + expect( + collection._state.syncedCollectionMetadata.get(`queryCollection:gc:q1`), + ).toEqual({ + queryHash: `q1`, + mode: `until-revalidated`, + }) + }) + + it(`persists metadata-only wrapped sync transactions`, async () => { + const adapter = createRecordingAdapter() + + const collection = createCollection( + persistedCollectionOptions({ + id: `persisted-metadata-only`, + getKey: (item) => item.id, + sync: { + sync: ({ begin, commit, markReady, metadata }) => { + begin() + metadata?.collection.set(`runtime:key`, { persisted: true }) + commit() + markReady() + }, + }, + persistence: { + adapter, + }, + }), + ) + + await collection.stateWhenReady() + await flushAsyncWork() + + expect(adapter.applyCommittedTxCalls).toHaveLength(1) + expect(adapter.applyCommittedTxCalls[0]?.tx.mutations).toEqual([]) + expect(adapter.collectionMetadata.get(`runtime:key`)).toEqual({ + persisted: true, + }) + expect( + collection._state.syncedCollectionMetadata.get(`runtime:key`), + ).toEqual({ + persisted: true, + }) + }) + it(`throws InvalidSyncConfigError when sync key is present but null`, () => { const invalidOptions = { id: `invalid-sync-null`, @@ -642,6 +786,190 @@ describe(`persistedCollectionOptions`, () => { }) }) + it(`reads staged metadata writes during hydration-queued transactions`, async () => { + const adapter = createRecordingAdapter([ + { + id: `cached-1`, + title: `Cached row`, + }, + ]) + adapter.rowMetadata.set(`cached-1`, { source: `persisted` }) + adapter.collectionMetadata.set(`startup:key`, { ready: true }) + + let resolveLoadSubset: (() => void) | undefined + adapter.loadSubset = async () => { + await new Promise((resolve) => { + resolveLoadSubset = resolve + }) + return [ + { + key: `cached-1`, + value: { + id: `cached-1`, + title: `Cached row`, + }, + metadata: adapter.rowMetadata.get(`cached-1`), + }, + ] + } + + let remoteBegin: (() => void) | undefined + let remoteCommit: (() => void) | undefined + let remoteTruncate: (() => void) | undefined + let remoteMetadata: + | Parameters[`sync`]>[0][`metadata`] + | undefined + + const collection = createCollection( + persistedCollectionOptions({ + id: `sync-present-metadata-read`, + getKey: (item) => item.id, + sync: { + sync: ({ begin, commit, truncate, markReady, metadata }) => { + remoteBegin = begin + remoteCommit = commit + remoteTruncate = truncate + remoteMetadata = metadata + markReady() + return {} + }, + }, + persistence: { + adapter, + }, + }), + ) + + const readyPromise = collection.stateWhenReady() + for (let attempt = 0; attempt < 20 && !resolveLoadSubset; attempt++) { + await flushAsyncWork() + } + + expect(resolveLoadSubset).toBeDefined() + expect(remoteBegin).toBeDefined() + expect(remoteMetadata).toBeDefined() + + remoteBegin?.() + remoteMetadata?.row.set(`cached-1`, { source: `staged` }) + remoteMetadata?.collection.set(`runtime:key`, { persisted: true }) + + expect(remoteMetadata?.row.get(`cached-1`)).toEqual({ source: `staged` }) + expect(remoteMetadata?.collection.get(`runtime:key`)).toEqual({ + persisted: true, + }) + expect(remoteMetadata?.collection.list()).toContainEqual({ + key: `runtime:key`, + value: { persisted: true }, + }) + + remoteTruncate?.() + + expect(remoteMetadata?.row.get(`cached-1`)).toBeUndefined() + expect(remoteMetadata?.collection.get(`startup:key`)).toEqual({ + ready: true, + }) + + remoteCommit?.() + resolveLoadSubset?.() + await readyPromise + }) + + it(`persists truncate transactions and preserves intended collection metadata`, async () => { + const adapter = createRecordingAdapter() + + let remoteBegin: (() => void) | undefined + let remoteWrite: + | ((message: { type: `insert`; value: Todo }) => void) + | undefined + let remoteCommit: (() => void) | undefined + let remoteTruncate: (() => void) | undefined + let remoteMetadata: + | Parameters[`sync`]>[0][`metadata`] + | undefined + + const collection = createCollection( + persistedCollectionOptions({ + id: `sync-present-truncate`, + getKey: (item) => item.id, + sync: { + sync: ({ begin, write, commit, truncate, markReady, metadata }) => { + remoteBegin = begin + remoteWrite = write as (message: { + type: `insert` + value: Todo + }) => void + remoteCommit = commit + remoteTruncate = truncate + remoteMetadata = metadata + markReady() + return {} + }, + }, + persistence: { + adapter, + }, + }), + ) + + await collection.stateWhenReady() + await flushAsyncWork() + + remoteBegin?.() + remoteWrite?.({ + type: `insert`, + value: { + id: `pre-truncate`, + title: `Pre truncate`, + }, + }) + remoteMetadata?.collection.set(`electric:resume`, { + kind: `reset`, + updatedAt: 1, + }) + remoteTruncate?.() + remoteWrite?.({ + type: `insert`, + value: { + id: `post-truncate`, + title: `Post truncate`, + }, + }) + remoteCommit?.() + await flushAsyncWork() + + expect(adapter.applyCommittedTxCalls.at(-1)?.tx.truncate).toBe(true) + + const reloadedCollection = createCollection( + persistedCollectionOptions({ + id: `sync-present-truncate`, + getKey: (item) => item.id, + sync: { + sync: ({ markReady }) => { + markReady() + }, + }, + persistence: { + adapter, + }, + }), + ) + + await reloadedCollection.preload() + await flushAsyncWork() + + expect(reloadedCollection.get(`pre-truncate`)).toBeUndefined() + expect(stripVirtualProps(reloadedCollection.get(`post-truncate`))).toEqual({ + id: `post-truncate`, + title: `Post truncate`, + }) + expect( + reloadedCollection._state.syncedCollectionMetadata.get(`electric:resume`), + ).toEqual({ + kind: `reset`, + updatedAt: 1, + }) + }) + it(`uses pullSince recovery when tx sequence gaps are detected`, async () => { const adapter = createRecordingAdapter([ { diff --git a/packages/db-sqlite-persisted-collection-core/tests/sqlite-core-adapter.test.ts b/packages/db-sqlite-persisted-collection-core/tests/sqlite-core-adapter.test.ts index b76f6055b..e70e6b0db 100644 --- a/packages/db-sqlite-persisted-collection-core/tests/sqlite-core-adapter.test.ts +++ b/packages/db-sqlite-persisted-collection-core/tests/sqlite-core-adapter.test.ts @@ -544,6 +544,106 @@ export function runSQLiteCoreAdapterContractSuite( expect(metadataRows).toEqual([{ key: `electric:resume` }]) }) + it(`persists truncate transactions while preserving explicit collection metadata`, async () => { + const { adapter } = registerContractHarness() + const collectionId = `truncate-metadata-roundtrip` + + await adapter.applyCommittedTx(collectionId, { + txId: `seed-1`, + term: 1, + seq: 1, + rowVersion: 1, + mutations: [ + { + type: `insert`, + key: `1`, + value: { + id: `1`, + title: `Before truncate`, + createdAt: `2026-01-01T00:00:00.000Z`, + score: 1, + }, + metadata: { + owner: `before`, + }, + metadataChanged: true, + }, + ], + collectionMetadataMutations: [ + { + type: `set`, + key: `electric:resume`, + value: { + kind: `resume`, + offset: `10_0`, + handle: `handle-1`, + shapeId: `shape-1`, + updatedAt: 1, + }, + }, + ], + }) + + await adapter.applyCommittedTx(collectionId, { + txId: `truncate-2`, + term: 1, + seq: 2, + rowVersion: 2, + truncate: true, + mutations: [ + { + type: `insert`, + key: `2`, + value: { + id: `2`, + title: `After truncate`, + createdAt: `2026-01-02T00:00:00.000Z`, + score: 2, + }, + metadata: { + owner: `after`, + }, + metadataChanged: true, + }, + ], + collectionMetadataMutations: [ + { + type: `set`, + key: `electric:resume`, + value: { + kind: `reset`, + updatedAt: 2, + }, + }, + ], + }) + + expect(await adapter.loadSubset(collectionId, {})).toEqual([ + { + key: `2`, + value: { + id: `2`, + title: `After truncate`, + createdAt: `2026-01-02T00:00:00.000Z`, + score: 2, + }, + metadata: { + owner: `after`, + }, + }, + ]) + + expect(await adapter.loadCollectionMetadata?.(collectionId)).toEqual([ + { + key: `electric:resume`, + value: { + kind: `reset`, + updatedAt: 2, + }, + }, + ]) + }) + it(`supports pushdown operators with correctness-preserving fallback`, async () => { const { adapter } = registerContractHarness() const collectionId = `todos` diff --git a/packages/db/src/collection/sync.ts b/packages/db/src/collection/sync.ts index 7b1fdc61e..25dc63c15 100644 --- a/packages/db/src/collection/sync.ts +++ b/packages/db/src/collection/sync.ts @@ -173,10 +173,18 @@ export class CollectionSyncManager< if (messageType === `delete`) { pendingTransaction.deletedKeys.add(key) pendingTransaction.rowMetadataWrites.set(key, { type: `delete` }) - } else if ( - messageType === `insert` || - message.metadata !== undefined - ) { + } else if (messageType === `insert`) { + if (message.metadata !== undefined) { + pendingTransaction.rowMetadataWrites.set(key, { + type: `set`, + value: message.metadata, + }) + } else { + pendingTransaction.rowMetadataWrites.set(key, { + type: `delete`, + }) + } + } else if (message.metadata !== undefined) { pendingTransaction.rowMetadataWrites.set(key, { type: `set`, value: message.metadata, @@ -289,6 +297,9 @@ export class CollectionSyncManager< ? undefined : pendingWrite.value } + if (pendingTransaction?.truncate) { + return undefined + } return this.state.syncedMetadata.get(key) }, set: (key, metadata) => { diff --git a/packages/db/tests/collection.test.ts b/packages/db/tests/collection.test.ts index 9bb5cc63a..c5d09039e 100644 --- a/packages/db/tests/collection.test.ts +++ b/packages/db/tests/collection.test.ts @@ -1400,7 +1400,7 @@ describe(`Collection`, () => { value: `should not be cleared`, }) expect(collection._state.syncedData.size).toBe(1) - expect(collection._state.syncedMetadata.size).toBe(1) + expect(collection._state.syncedMetadata.size).toBe(0) }) it(`should handle truncate with empty collection`, async () => { @@ -1563,6 +1563,101 @@ describe(`Collection`, () => { expect(collection._state.syncedMetadata.has(1)).toBe(false) }) + it(`should not retain a synced metadata entry for inserts without metadata`, async () => { + const collection = createCollection<{ id: number; value: string }>({ + id: `sync-insert-no-metadata-test`, + getKey: (item) => item.id, + startSync: true, + sync: { + sync: ({ begin, write, commit, markReady }) => { + begin() + write({ + type: `insert`, + value: { id: 1, value: `initial` }, + }) + commit() + markReady() + }, + }, + }) + + await collection.stateWhenReady() + + expect(collection._state.syncedMetadata.has(1)).toBe(false) + }) + + it(`should treat row metadata as cleared after truncate within the same sync transaction`, async () => { + let testSyncFunctions: any = null + + const collection = createCollection<{ id: number; value: string }>({ + id: `sync-row-metadata-truncate-read-test`, + getKey: (item) => item.id, + startSync: true, + sync: { + sync: ({ begin, write, commit, markReady, metadata, truncate }) => { + begin() + write({ + type: `insert`, + value: { id: 1, value: `initial` }, + metadata: { source: `sync` }, + }) + commit() + markReady() + + testSyncFunctions = { begin, commit, metadata, truncate } + }, + }, + }) + + await collection.stateWhenReady() + expect(collection._state.syncedMetadata.get(1)).toEqual({ source: `sync` }) + + const { begin, commit, metadata, truncate } = testSyncFunctions + begin() + expect(metadata.row.get(1)).toEqual({ source: `sync` }) + metadata.collection.set(`survivor:key`, { persisted: true }) + truncate() + expect(metadata.row.get(1)).toBeUndefined() + expect(metadata.collection.get(`survivor:key`)).toEqual({ persisted: true }) + expect(metadata.collection.list()).toContainEqual({ + key: `survivor:key`, + value: { persisted: true }, + }) + commit() + }) + + it(`should preserve collection metadata across truncate unless explicitly changed`, async () => { + let testSyncFunctions: any = null + + const collection = createCollection<{ id: number; value: string }>({ + id: `sync-collection-metadata-truncate-test`, + getKey: (item) => item.id, + startSync: true, + sync: { + sync: ({ begin, commit, markReady, metadata, truncate }) => { + begin() + metadata?.collection.set(`startup:key`, { ready: true }) + commit() + markReady() + + testSyncFunctions = { begin, commit, metadata, truncate } + }, + }, + }) + + await collection.stateWhenReady() + + const { begin, commit, metadata, truncate } = testSyncFunctions + begin() + truncate() + expect(metadata.collection.get(`startup:key`)).toEqual({ ready: true }) + expect(metadata.collection.list()).toContainEqual({ + key: `startup:key`, + value: { ready: true }, + }) + commit() + }) + it(`open sync transaction isn't applied when optimistic mutation is resolved/rejected`, async () => { type Row = { id: number; name: string } diff --git a/packages/electric-db-collection/src/electric.ts b/packages/electric-db-collection/src/electric.ts index 9f6ebff9d..ca5fe64b3 100644 --- a/packages/electric-db-collection/src/electric.ts +++ b/packages/electric-db-collection/src/electric.ts @@ -1191,21 +1191,47 @@ function createElectricSync>( collection, metadata, } = params - const persistedResumeState = metadata?.collection.get( - `electric:resume`, - ) as - | { - kind: `resume` - offset: string - handle: string - shapeId: string - updatedAt: number + const readPersistedResumeState = () => { + const persistedResumeState = metadata?.collection.get(`electric:resume`) + if (!persistedResumeState || typeof persistedResumeState !== `object`) { + return undefined + } + + const record = persistedResumeState as Record + if ( + record.kind === `resume` && + typeof record.offset === `string` && + typeof record.handle === `string` && + typeof record.shapeId === `string` && + typeof record.updatedAt === `number` + ) { + return { + kind: `resume` as const, + offset: record.offset, + handle: record.handle, + shapeId: record.shapeId, + updatedAt: record.updatedAt, } - | { - kind: `reset` - updatedAt: number + } + + if ( + record.kind === `reset` && + typeof record.updatedAt === `number` + ) { + return { + kind: `reset` as const, + updatedAt: record.updatedAt, } - | undefined + } + + return undefined + } + + const persistedResumeState = readPersistedResumeState() + const canUsePersistedResume = + shapeOptions.offset === undefined && + shapeOptions.handle === undefined && + persistedResumeState?.kind === `resume` // Wrap markReady to wait for test hook in progressive mode let progressiveReadyGate: Promise | null = null @@ -1264,14 +1290,14 @@ function createElectricSync>( // so we default to `now` when there is no saved offset. offset: shapeOptions.offset ?? - (persistedResumeState?.kind === `resume` + (canUsePersistedResume ? (persistedResumeState.offset as Offset) : syncMode === `on-demand` ? `now` : undefined), handle: shapeOptions.handle ?? - (persistedResumeState?.kind === `resume` + (canUsePersistedResume ? persistedResumeState.handle : undefined), signal: abortController.signal, @@ -1333,6 +1359,19 @@ function createElectricSync>( }) } + const commitResetResumeMetadataImmediately = () => { + if (!metadata) { + return + } + + begin({ immediate: true }) + metadata.collection.set(`electric:resume`, { + kind: `reset`, + updatedAt: Date.now(), + }) + commit() + } + /** * Process a change message: handle tags and write the mutation */ @@ -1511,17 +1550,14 @@ function createElectricSync>( `${collectionId ? `[${collectionId}] ` : ``}Received must-refetch message, starting transaction with truncate`, ) + commitResetResumeMetadataImmediately() + // Start a transaction and truncate the collection if (!transactionStarted) { begin() transactionStarted = true } - metadata?.collection.set(`electric:resume`, { - kind: `reset`, - updatedAt: Date.now(), - }) - truncate() // Clear tag tracking state diff --git a/packages/electric-db-collection/tests/electric.test.ts b/packages/electric-db-collection/tests/electric.test.ts index 87b0ee44d..24e0108e3 100644 --- a/packages/electric-db-collection/tests/electric.test.ts +++ b/packages/electric-db-collection/tests/electric.test.ts @@ -12,6 +12,7 @@ import type { InsertMutationFnParams, MutationFnParams, PendingMutation, + SyncMetadataApi, Transaction, TransactionWithMutations, } from '@tanstack/db' @@ -29,6 +30,8 @@ const mockStream = { fetchSnapshot: mockFetchSnapshot, forceDisconnectAndRefresh: mockForceDisconnectAndRefresh, isUpToDate: false, + shapeHandle: undefined as string | undefined, + lastOffset: `-1` as string, } vi.mock(`@electric-sql/client`, async () => { @@ -57,6 +60,38 @@ describe(`Electric Integration`, () => { ]), ) + const createInMemorySyncMetadataApi = ( + seed?: ReadonlyMap, + ): { + api: SyncMetadataApi + collectionMetadata: Map + } => { + const collectionMetadata = new Map(seed) + return { + collectionMetadata, + api: { + row: { + get: () => undefined, + set: () => {}, + delete: () => {}, + }, + collection: { + get: (key) => collectionMetadata.get(key), + set: (key, value) => { + collectionMetadata.set(key, value) + }, + delete: (key) => { + collectionMetadata.delete(key) + }, + list: (prefix) => + Array.from(collectionMetadata.entries()) + .filter(([key]) => (prefix ? key.startsWith(prefix) : true)) + .map(([key, value]) => ({ key, value })), + }, + }, + } + } + beforeEach(() => { vi.clearAllMocks() @@ -70,6 +105,8 @@ describe(`Electric Integration`, () => { mockRequestSnapshot.mockResolvedValue(undefined) mockForceDisconnectAndRefresh.mockResolvedValue(undefined) mockStream.isUpToDate = false + mockStream.shapeHandle = undefined + mockStream.lastOffset = `-1` // Create collection with Electric configuration const config = { @@ -2928,6 +2965,362 @@ describe(`Electric Integration`, () => { }), ) }) + + it(`should use persisted resume metadata when no explicit offset or handle is provided`, async () => { + vi.clearAllMocks() + + const { ShapeStream } = await import(`@electric-sql/client`) + const metadataHarness = createInMemorySyncMetadataApi( + new Map([ + [ + `electric:resume`, + { + kind: `resume`, + offset: `10_0`, + handle: `handle-1`, + shapeId: `shape-1`, + updatedAt: 1, + }, + ], + ]), + ) + + const baseOptions = electricCollectionOptions({ + id: `persisted-resume-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + }, + syncMode: `on-demand` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + }) + + const originalSync = baseOptions.sync + createCollection({ + ...baseOptions, + sync: { + sync: (params: Parameters[0]) => + originalSync.sync({ + ...params, + metadata: metadataHarness.api, + }), + }, + }) + + expect(ShapeStream).toHaveBeenCalledWith( + expect.objectContaining({ + offset: `10_0`, + handle: `handle-1`, + }), + ) + }) + + it(`should ignore reset resume metadata and fall back to default startup`, async () => { + vi.clearAllMocks() + + const { ShapeStream } = await import(`@electric-sql/client`) + const metadataHarness = createInMemorySyncMetadataApi( + new Map([ + [ + `electric:resume`, + { + kind: `reset`, + updatedAt: 1, + }, + ], + ]), + ) + + const baseOptions = electricCollectionOptions({ + id: `persisted-reset-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + }, + syncMode: `on-demand` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + }) + + const originalSync = baseOptions.sync + createCollection({ + ...baseOptions, + sync: { + sync: (params: Parameters[0]) => + originalSync.sync({ + ...params, + metadata: metadataHarness.api, + }), + }, + }) + + expect(ShapeStream).toHaveBeenCalledWith( + expect.objectContaining({ + offset: `now`, + handle: undefined, + }), + ) + }) + + it(`should not mix explicit handle with persisted offset`, async () => { + vi.clearAllMocks() + + const { ShapeStream } = await import(`@electric-sql/client`) + const metadataHarness = createInMemorySyncMetadataApi( + new Map([ + [ + `electric:resume`, + { + kind: `resume`, + offset: `10_0`, + handle: `persisted-handle`, + shapeId: `shape-1`, + updatedAt: 1, + }, + ], + ]), + ) + + const baseOptions = electricCollectionOptions({ + id: `persisted-partial-override-handle-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + handle: `explicit-handle`, + }, + syncMode: `on-demand` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + }) + + const originalSync = baseOptions.sync + createCollection({ + ...baseOptions, + sync: { + sync: (params: Parameters[0]) => + originalSync.sync({ + ...params, + metadata: metadataHarness.api, + }), + }, + }) + + expect(ShapeStream).toHaveBeenCalledWith( + expect.objectContaining({ + offset: `now`, + handle: `explicit-handle`, + }), + ) + }) + + it(`should not mix explicit offset with persisted handle`, async () => { + vi.clearAllMocks() + + const { ShapeStream } = await import(`@electric-sql/client`) + const metadataHarness = createInMemorySyncMetadataApi( + new Map([ + [ + `electric:resume`, + { + kind: `resume`, + offset: `10_0`, + handle: `persisted-handle`, + shapeId: `shape-1`, + updatedAt: 1, + }, + ], + ]), + ) + + const baseOptions = electricCollectionOptions({ + id: `persisted-partial-override-offset-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + offset: -1 as any, + }, + syncMode: `on-demand` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + }) + + const originalSync = baseOptions.sync + createCollection({ + ...baseOptions, + sync: { + sync: (params: Parameters[0]) => + originalSync.sync({ + ...params, + metadata: metadataHarness.api, + }), + }, + }) + + expect(ShapeStream).toHaveBeenCalledWith( + expect.objectContaining({ + offset: -1, + handle: undefined, + }), + ) + }) + + it(`should ignore malformed persisted resume metadata`, async () => { + vi.clearAllMocks() + + const { ShapeStream } = await import(`@electric-sql/client`) + const metadataHarness = createInMemorySyncMetadataApi( + new Map([ + [ + `electric:resume`, + { + kind: `resume`, + offset: 10, + updatedAt: 1, + }, + ], + ]), + ) + + const baseOptions = electricCollectionOptions({ + id: `persisted-malformed-resume-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + }, + syncMode: `on-demand` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + }) + + const originalSync = baseOptions.sync + createCollection({ + ...baseOptions, + sync: { + sync: (params: Parameters[0]) => + originalSync.sync({ + ...params, + metadata: metadataHarness.api, + }), + }, + }) + + expect(ShapeStream).toHaveBeenCalledWith( + expect.objectContaining({ + offset: `now`, + handle: undefined, + }), + ) + }) + + it(`should persist reset resume metadata immediately on must-refetch`, () => { + const metadataHarness = createInMemorySyncMetadataApi() + const baseOptions = electricCollectionOptions({ + id: `must-refetch-reset-metadata-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + }, + getKey: (item: Row) => item.id as number, + startSync: true, + }) + + const originalSync = baseOptions.sync + createCollection({ + ...baseOptions, + sync: { + sync: (params: Parameters[0]) => + originalSync.sync({ + ...params, + metadata: metadataHarness.api, + }), + }, + }) + + subscriber([ + { + headers: { control: `must-refetch` }, + }, + ]) + + expect( + metadataHarness.collectionMetadata.get(`electric:resume`), + ).toEqual( + expect.objectContaining({ + kind: `reset`, + }), + ) + }) + + it(`should only advance resume metadata when a batch commits`, () => { + const metadataHarness = createInMemorySyncMetadataApi() + mockStream.shapeHandle = `shape-1` + mockStream.lastOffset = `10_0` + + const baseOptions = electricCollectionOptions({ + id: `resume-commit-boundary-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + }, + getKey: (item: Row) => item.id as number, + startSync: true, + }) + + const originalSync = baseOptions.sync + createCollection({ + ...baseOptions, + sync: { + sync: (params: Parameters[0]) => + originalSync.sync({ + ...params, + metadata: metadataHarness.api, + }), + }, + }) + + subscriber([ + { + key: `1`, + value: { id: 1, name: `Before commit` }, + headers: { operation: `insert` }, + }, + ]) + + expect(metadataHarness.collectionMetadata.has(`electric:resume`)).toBe( + false, + ) + + subscriber([ + { + headers: { control: `up-to-date` }, + }, + ]) + + expect( + metadataHarness.collectionMetadata.get(`electric:resume`), + ).toEqual( + expect.objectContaining({ + kind: `resume`, + offset: `10_0`, + handle: `shape-1`, + }), + ) + }) }) // Tests for overlapping subset queries with duplicate keys diff --git a/packages/query-db-collection/src/query.ts b/packages/query-db-collection/src/query.ts index cae8e145f..344a1658b 100644 --- a/packages/query-db-collection/src/query.ts +++ b/packages/query-db-collection/src/query.ts @@ -220,6 +220,19 @@ interface QueryCollectionState { > } +type PersistedQueryRetentionEntry = + | { + queryHash: string + mode: `ttl` + expiresAt: number + } + | { + queryHash: string + mode: `until-revalidated` + } + +const QUERY_COLLECTION_GC_PREFIX = `queryCollection:gc:` + /** * Implementation class for QueryCollectionUtils with explicit dependency injection * for better testability and architectural clarity @@ -703,18 +716,130 @@ export function queryCollectionOptions( }) } - const getOwnedRowsForQuery = (hashedQueryKey: string) => { + const parsePersistedQueryRetentionEntry = ( + value: unknown, + expectedHash: string, + ): PersistedQueryRetentionEntry | undefined => { + if (!value || typeof value !== `object`) { + return undefined + } + + const record = value as Record + if (record.queryHash !== expectedHash) { + return undefined + } + + if (record.mode === `until-revalidated`) { + return { + queryHash: expectedHash, + mode: `until-revalidated`, + } + } + + if ( + record.mode === `ttl` && + typeof record.expiresAt === `number` && + Number.isFinite(record.expiresAt) + ) { + return { + queryHash: expectedHash, + mode: `ttl`, + expiresAt: record.expiresAt, + } + } + + return undefined + } + + const getPersistedOwnedRowsForQueryBaseline = (hashedQueryKey: string) => { + const knownRows = queryToRows.get(hashedQueryKey) + if (knownRows) { + return new Set(knownRows) + } + const ownedRows = new Set() for (const [rowKey] of collection._state.syncedData.entries()) { const owners = getPersistedOwners(rowKey) + if (owners.size === 0) { + continue + } + + rowToQueries.set(rowKey, new Set(owners)) + owners.forEach((owner) => { + const queryToRowsSet = queryToRows.get(owner) || new Set() + queryToRowsSet.add(rowKey) + queryToRows.set(owner, queryToRowsSet) + }) + if (owners.has(hashedQueryKey)) { ownedRows.add(rowKey) - addRow(rowKey, hashedQueryKey) } } return ownedRows } + const cleanupPersistedPlaceholder = ( + hashedQueryKey: string, + options?: { deleteRetentionEntry?: boolean }, + ) => { + const rowKeys = getPersistedOwnedRowsForQueryBaseline(hashedQueryKey) + const rowsToDelete: Array = [] + const needsTransaction = metadata !== undefined + + if (!needsTransaction) { + return + } + + begin() + + rowKeys.forEach((rowKey) => { + const oldItem = collection.get(rowKey) + if (!oldItem) { + return + } + + const owners = getPersistedOwners(rowKey) + owners.delete(hashedQueryKey) + setPersistedOwners(rowKey, owners) + const needToRemove = removeRow(rowKey, hashedQueryKey) + if (needToRemove) { + rowsToDelete.push(oldItem) + } + }) + + rowsToDelete.forEach((row) => { + write({ type: `delete`, value: row }) + }) + + if (options?.deleteRetentionEntry !== false) { + metadata.collection.delete(`${QUERY_COLLECTION_GC_PREFIX}${hashedQueryKey}`) + } + commit() + } + + const consumePersistedQueryRetentionAtStartup = () => { + if (!metadata) { + return + } + + const retentionEntries = metadata.collection.list(QUERY_COLLECTION_GC_PREFIX) + const now = Date.now() + + retentionEntries.forEach(({ key, value }) => { + const hashedQueryKey = key.slice(QUERY_COLLECTION_GC_PREFIX.length) + const parsed = parsePersistedQueryRetentionEntry(value, hashedQueryKey) + if (!parsed) { + return + } + + if (parsed.mode === `ttl` && parsed.expiresAt <= now) { + cleanupPersistedPlaceholder(parsed.queryHash, { + deleteRetentionEntry: syncMode !== `on-demand`, + }) + } + }) + } + /** * Generate a consistent query key from LoadSubsetOptions. * CRITICAL: Must use identical logic in both createQueryFromOpts and unloadSubset @@ -745,12 +870,6 @@ export function queryCollectionOptions( const hashedQueryKey = hashKey(key) const extendedMeta = { ...meta, loadSubsetOptions: opts } - if (metadata) { - begin() - metadata.collection.delete(`queryCollection:gc:${hashedQueryKey}`) - commit() - } - if (state.observers.has(hashedQueryKey)) { // We already have a query for this queryKey // Increment reference count since another consumer is using this observer @@ -901,7 +1020,8 @@ export function queryCollectionOptions( const currentSyncedItems: Map = new Map( collection._state.syncedData.entries(), ) - const previouslyOwnedRows = getOwnedRowsForQuery(hashedQueryKey) + const previouslyOwnedRows = + getPersistedOwnedRowsForQueryBaseline(hashedQueryKey) const newItemsMap = new Map() newItemsArray.forEach((item) => { const key = getKey(item) @@ -909,6 +1029,11 @@ export function queryCollectionOptions( }) begin() + if (metadata) { + metadata.collection.delete( + `${QUERY_COLLECTION_GC_PREFIX}${hashedQueryKey}`, + ) + } previouslyOwnedRows.forEach((key) => { const oldItem = currentSyncedItems.get(key) @@ -1002,6 +1127,8 @@ export function queryCollectionOptions( unsubscribes.clear() } + consumePersistedQueryRetentionAtStartup() + // Mark that sync has started syncStarted = true @@ -1051,12 +1178,8 @@ export function queryCollectionOptions( unsubscribes.delete(hashedQueryKey) const rowKeys = queryToRows.get(hashedQueryKey) ?? new Set() + const nextOwnersByRow = new Map>() const rowsToDelete: Array = [] - const shouldWriteMetadata = metadata !== undefined && rowKeys.size > 0 - - if (shouldWriteMetadata) { - begin() - } rowKeys.forEach((rowKey) => { const queries = rowToQueries.get(rowKey) @@ -1065,29 +1188,41 @@ export function queryCollectionOptions( return } - queries.delete(hashedQueryKey) - setPersistedOwners(rowKey, queries) + const nextOwners = new Set(queries) + nextOwners.delete(hashedQueryKey) + nextOwnersByRow.set(rowKey, nextOwners) - if (queries.size === 0) { - rowToQueries.delete(rowKey) - - if (collection.has(rowKey)) { - rowsToDelete.push(collection.get(rowKey)) - } + if (nextOwners.size === 0 && collection.has(rowKey)) { + rowsToDelete.push(collection.get(rowKey)) } }) - if (!shouldWriteMetadata && rowsToDelete.length > 0) { + const shouldWriteMetadata = + metadata !== undefined && nextOwnersByRow.size > 0 + const needsTransaction = shouldWriteMetadata || rowsToDelete.length > 0 + if (needsTransaction) { begin() } + nextOwnersByRow.forEach((owners, rowKey) => { + if (owners.size === 0) { + rowToQueries.delete(rowKey) + } else { + rowToQueries.set(rowKey, owners) + } + + if (shouldWriteMetadata) { + setPersistedOwners(rowKey, owners) + } + }) + if (rowsToDelete.length > 0) { rowsToDelete.forEach((row) => { write({ type: `delete`, value: row }) }) } - if (shouldWriteMetadata || rowsToDelete.length > 0) { + if (needsTransaction) { commit() } @@ -1133,7 +1268,9 @@ export function queryCollectionOptions( if (persistedGcTime !== undefined) { if (metadata) { begin() - metadata.collection.set(`queryCollection:gc:${hashedQueryKey}`, { + metadata.collection.set( + `${QUERY_COLLECTION_GC_PREFIX}${hashedQueryKey}`, + { queryHash: hashedQueryKey, mode: persistedGcTime === Number.POSITIVE_INFINITY @@ -1142,12 +1279,14 @@ export function queryCollectionOptions( ...(persistedGcTime === Number.POSITIVE_INFINITY ? {} : { expiresAt: Date.now() + persistedGcTime }), - }) + }, + ) commit() } unsubscribes.get(hashedQueryKey)?.() unsubscribes.delete(hashedQueryKey) state.observers.delete(hashedQueryKey) + hashToQueryKey.delete(hashedQueryKey) queryRefCounts.set(hashedQueryKey, 0) return } diff --git a/packages/query-db-collection/tests/query.test.ts b/packages/query-db-collection/tests/query.test.ts index 8f05131ed..0e8e98ccd 100644 --- a/packages/query-db-collection/tests/query.test.ts +++ b/packages/query-db-collection/tests/query.test.ts @@ -14,6 +14,7 @@ import type { Collection, DeleteMutationFnParams, InsertMutationFnParams, + SyncMetadataApi, TransactionWithMutations, UpdateMutationFnParams, } from '@tanstack/db' @@ -36,6 +37,49 @@ const getKey = (item: TestItem) => item.id // Helper to advance timers and allow microtasks to flush const flushPromises = () => new Promise((resolve) => setTimeout(resolve, 0)) +function createInMemorySyncMetadataApi< + TKey extends string | number = string | number, +>(seed?: { + rowMetadata?: ReadonlyMap + collectionMetadata?: ReadonlyMap +}): { + api: SyncMetadataApi + rowMetadata: Map + collectionMetadata: Map +} { + const rowMetadata = new Map(seed?.rowMetadata) + const collectionMetadata = new Map(seed?.collectionMetadata) + + return { + rowMetadata, + collectionMetadata, + api: { + row: { + get: (key) => rowMetadata.get(key), + set: (key, value) => { + rowMetadata.set(key, value) + }, + delete: (key) => { + rowMetadata.delete(key) + }, + }, + collection: { + get: (key) => collectionMetadata.get(key), + set: (key, value) => { + collectionMetadata.set(key, value) + }, + delete: (key) => { + collectionMetadata.delete(key) + }, + list: (prefix) => + Array.from(collectionMetadata.entries()) + .filter(([key]) => (prefix ? key.startsWith(prefix) : true)) + .map(([key, value]) => ({ key, value })), + }, + }, + } +} + describe(`QueryCollection`, () => { let queryClient: QueryClient @@ -4275,7 +4319,7 @@ describe(`QueryCollection`, () => { }) }) - it(`should diff against persisted query-owned rows on warm start`, async () => { + it(`should diff against retained query-owned rows on warm start`, async () => { const baseQueryKey = [`persisted-baseline-test`] const queryFn = vi.fn().mockResolvedValue([]) @@ -4289,22 +4333,51 @@ describe(`QueryCollection`, () => { startSync: false, } - const collection = createCollection(queryCollectionOptions(config)) + const baseOptions = queryCollectionOptions(config) + const originalSync = baseOptions.sync const ownedRow = { id: `1`, name: `Owned row`, category: `A` } const unrelatedRow = { id: `2`, name: `Unrelated row`, category: `B` } const ownedQueryHash = hashKey(baseQueryKey) + const metadataHarness = createInMemorySyncMetadataApi({ + rowMetadata: new Map([ + [ + ownedRow.id, + { + queryCollection: { + owners: { + [ownedQueryHash]: true, + }, + }, + }, + ], + ]), + collectionMetadata: new Map([ + [ + `queryCollection:gc:${ownedQueryHash}`, + { + queryHash: ownedQueryHash, + mode: `until-revalidated`, + }, + ], + ]), + }) - collection._state.syncedData.set(ownedRow.id, ownedRow) - collection._state.syncedData.set(unrelatedRow.id, unrelatedRow) - collection._state.syncedMetadata.set(ownedRow.id, { - queryCollection: { - owners: { - [ownedQueryHash]: true, + const collection = createCollection({ + ...baseOptions, + sync: { + sync: (params: Parameters[0]) => { + params.begin({ immediate: true }) + params.write({ type: `insert`, value: ownedRow }) + params.write({ type: `insert`, value: unrelatedRow }) + params.commit() + + return originalSync.sync({ + ...params, + metadata: metadataHarness.api, + }) }, }, }) - collection._state.syncedMetadata.set(unrelatedRow.id, undefined) - collection._state.size = 2 await collection.preload() await flushPromises() @@ -4312,6 +4385,181 @@ describe(`QueryCollection`, () => { expect(queryFn).toHaveBeenCalledTimes(1) expect(collection.has(ownedRow.id)).toBe(false) expect(collection.has(unrelatedRow.id)).toBe(true) + expect( + metadataHarness.collectionMetadata.has( + `queryCollection:gc:${ownedQueryHash}`, + ), + ).toBe(false) + }) + + it(`should clean up expired persisted ttl placeholders on startup`, async () => { + const baseQueryKey = [`persisted-ttl-cleanup-test`] + const queryFn = vi.fn().mockResolvedValue([]) + const expiredQueryHash = hashKey(baseQueryKey) + const otherOwnerHash = `other-owner` + + const config: QueryCollectionConfig = { + id: `persisted-ttl-cleanup-test`, + queryClient, + queryKey: baseQueryKey, + queryFn, + getKey: (item) => item.id, + syncMode: `on-demand`, + startSync: true, + } + + const baseOptions = queryCollectionOptions(config) + const originalSync = baseOptions.sync + const orphanRow = { id: `1`, name: `Orphan`, category: `A` } + const sharedRow = { id: `2`, name: `Shared`, category: `B` } + const metadataHarness = createInMemorySyncMetadataApi({ + rowMetadata: new Map([ + [ + orphanRow.id, + { + queryCollection: { + owners: { + [expiredQueryHash]: true, + }, + }, + }, + ], + [ + sharedRow.id, + { + queryCollection: { + owners: { + [expiredQueryHash]: true, + [otherOwnerHash]: true, + }, + }, + }, + ], + ]), + collectionMetadata: new Map([ + [ + `queryCollection:gc:${expiredQueryHash}`, + { + queryHash: expiredQueryHash, + mode: `ttl`, + expiresAt: Date.now() - 1_000, + }, + ], + ]), + }) + + const collection = createCollection({ + ...baseOptions, + sync: { + sync: (params: Parameters[0]) => { + params.begin({ immediate: true }) + params.write({ type: `insert`, value: orphanRow }) + params.write({ type: `insert`, value: sharedRow }) + params.commit() + + return originalSync.sync({ + ...params, + metadata: metadataHarness.api, + }) + }, + }, + }) + + await collection.stateWhenReady() + await flushPromises() + + expect(queryFn).not.toHaveBeenCalled() + expect(collection.has(orphanRow.id)).toBe(false) + expect(collection.has(sharedRow.id)).toBe(true) + expect( + metadataHarness.collectionMetadata.get( + `queryCollection:gc:${expiredQueryHash}`, + ), + ).toEqual({ + queryHash: expiredQueryHash, + mode: `ttl`, + expiresAt: expect.any(Number), + }) + expect(metadataHarness.rowMetadata.get(sharedRow.id)).toEqual({ + queryCollection: { + owners: { + [otherOwnerHash]: true, + }, + }, + }) + }) + + it(`should preserve until-revalidated retained rows on startup`, async () => { + const baseQueryKey = [`persisted-until-revalidated-test`] + const queryFn = vi.fn().mockResolvedValue([]) + const retainedQueryHash = hashKey(baseQueryKey) + + const config: QueryCollectionConfig = { + id: `persisted-until-revalidated-test`, + queryClient, + queryKey: baseQueryKey, + queryFn, + getKey: (item) => item.id, + syncMode: `on-demand`, + startSync: true, + } + + const baseOptions = queryCollectionOptions(config) + const originalSync = baseOptions.sync + const retainedRow = { id: `1`, name: `Retained`, category: `A` } + const metadataHarness = createInMemorySyncMetadataApi({ + rowMetadata: new Map([ + [ + retainedRow.id, + { + queryCollection: { + owners: { + [retainedQueryHash]: true, + }, + }, + }, + ], + ]), + collectionMetadata: new Map([ + [ + `queryCollection:gc:${retainedQueryHash}`, + { + queryHash: retainedQueryHash, + mode: `until-revalidated`, + }, + ], + ]), + }) + + const collection = createCollection({ + ...baseOptions, + sync: { + sync: (params: Parameters[0]) => { + params.begin({ immediate: true }) + params.write({ type: `insert`, value: retainedRow }) + params.commit() + + return originalSync.sync({ + ...params, + metadata: metadataHarness.api, + }) + }, + }, + }) + + await collection.stateWhenReady() + await flushPromises() + + expect(queryFn).not.toHaveBeenCalled() + expect(collection.has(retainedRow.id)).toBe(true) + expect( + metadataHarness.collectionMetadata.get( + `queryCollection:gc:${retainedQueryHash}`, + ), + ).toEqual({ + queryHash: retainedQueryHash, + mode: `until-revalidated`, + }) }) it(`should reset refcount after query GC and reload (stale refcount bug)`, async () => { From 985ade1f286d1639e48e61b2d6d5c2aad18aee0d Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Tue, 17 Mar 2026 21:39:54 +0000 Subject: [PATCH 09/12] feat: complete persisted sync metadata coverage Finish the remaining persisted metadata work by adding cold-row retained query cleanup, runtime TTL expiry, stronger Electric resume identity checks, and metadata delta replay for follower recovery while keeping reload fallback for reset-like cases. Made-with: Cursor --- .../src/persisted.ts | 153 +++++- .../src/sqlite-core-adapter.ts | 164 ++++++- .../tests/persisted.test.ts | 188 +++++++- .../tests/sqlite-core-adapter.test.ts | 132 ++++++ .../electric-db-collection/src/electric.ts | 26 +- .../tests/electric.test.ts | 141 +++++- packages/query-db-collection/src/query.ts | 445 ++++++++++++++---- .../query-db-collection/tests/query.test.ts | 354 ++++++++++++-- 8 files changed, 1438 insertions(+), 165 deletions(-) diff --git a/packages/db-sqlite-persisted-collection-core/src/persisted.ts b/packages/db-sqlite-persisted-collection-core/src/persisted.ts index 2952e56ea..5a8844faa 100644 --- a/packages/db-sqlite-persisted-collection-core/src/persisted.ts +++ b/packages/db-sqlite-persisted-collection-core/src/persisted.ts @@ -78,6 +78,8 @@ export type TxCommitted = { value: Record }> deletedKeys: Array + rowMetadataMutations?: Array> + collectionMetadataMutations?: Array } ) @@ -151,6 +153,7 @@ export type PullSinceResponse = requiresFullReload: false changedKeys: Array deletedKeys: Array + deltas?: Array, string | number>> } | { type: `rpc:pullSince:res` @@ -171,6 +174,41 @@ export interface PersistedIndexSpec { readonly metadata?: Readonly> } +export type PersistedRowMetadataMutation< + TKey extends string | number = string | number, +> = + | { type: `set`; key: TKey; value: unknown } + | { type: `delete`; key: TKey } + +export type PersistedCollectionMetadataMutation = + | { type: `set`; key: string; value: unknown } + | { type: `delete`; key: string } + +export type ReplayableTxDelta< + T extends Record = Record, + TKey extends string | number = string | number, +> = { + txId: string + latestRowVersion: number + changedRows: Array<{ key: TKey; value: T }> + deletedKeys: Array + rowMetadataMutations: Array> + collectionMetadataMutations: Array +} + +export type PersistedScannedRow< + T extends object, + TKey extends string | number = string | number, +> = { + key: TKey + value: T + metadata?: unknown +} + +export type PersistedRowScanOptions = { + metadataOnly?: boolean +} + export type PersistedTx< T extends object, TKey extends string | number = string | number, @@ -197,13 +235,8 @@ export type PersistedTx< } | { type: `delete`; key: TKey; value: T } > - rowMetadataMutations?: Array< - { type: `set`; key: TKey; value: unknown } | { type: `delete`; key: TKey } - > - collectionMetadataMutations?: Array< - | { type: `set`; key: string; value: unknown } - | { type: `delete`; key: string } - > + rowMetadataMutations?: Array> + collectionMetadataMutations?: Array } export interface PersistenceAdapter< @@ -222,6 +255,10 @@ export interface PersistenceAdapter< loadCollectionMetadata?: ( collectionId: string, ) => Promise> + scanRows?: ( + collectionId: string, + options?: PersistedRowScanOptions, + ) => Promise>> ensureIndex: ( collectionId: string, signature: string, @@ -442,6 +479,7 @@ export class SingleProcessCoordinator implements PersistedCollectionCoordinator requiresFullReload: false, changedKeys: [], deletedKeys: [], + deltas: [], }) } } @@ -715,7 +753,12 @@ function isTxCommittedPayload(payload: unknown): payload is TxCommitted { } return ( - Array.isArray(payload.changedRows) && Array.isArray(payload.deletedKeys) + Array.isArray(payload.changedRows) && + Array.isArray(payload.deletedKeys) && + (payload.rowMetadataMutations === undefined || + Array.isArray(payload.rowMetadataMutations)) && + (payload.collectionMetadataMutations === undefined || + Array.isArray(payload.collectionMetadataMutations)) ) } @@ -1153,6 +1196,22 @@ class PersistedCollectionRuntime< }) } + private async scanPersistedRowsUnsafe( + options?: PersistedRowScanOptions, + ): Promise>> { + if (!this.persistence.adapter.scanRows) { + return [] + } + + return this.persistence.adapter.scanRows(this.collectionId, options) + } + + async scanPersistedRows( + options?: PersistedRowScanOptions, + ): Promise>> { + return this.applyMutex.run(() => this.scanPersistedRowsUnsafe(options)) + } + private async hydrateSubsetUnsafe( options: LoadSubsetOptions, config: { @@ -1357,9 +1416,6 @@ class PersistedCollectionRuntime< seq: tx.seq, txId: tx.txId, latestRowVersion: tx.rowVersion, - hasMetadataChanges: - transaction.rowMetadataWrites.size > 0 || - transaction.collectionMetadataWrites.size > 0, requiresFullReload: transaction.truncate, changedRows: transaction.operations .filter((operation) => operation.type === `update`) @@ -1367,6 +1423,8 @@ class PersistedCollectionRuntime< deletedKeys: transaction.operations .filter((operation) => operation.type === `delete`) .map((operation) => operation.key), + rowMetadataMutations: tx.rowMetadataMutations, + collectionMetadataMutations: tx.collectionMetadataMutations, }), ) } @@ -1555,11 +1613,6 @@ class PersistedCollectionRuntime< seq: tx.seq, txId: tx.txId, latestRowVersion: tx.rowVersion, - hasMetadataChanges: - (tx.rowMetadataMutations !== undefined && - tx.rowMetadataMutations.length > 0) || - (tx.collectionMetadataMutations !== undefined && - tx.collectionMetadataMutations.length > 0), changedRows: mutations .filter((mutation) => mutation.type !== `delete`) .map((mutation) => ({ @@ -1569,6 +1622,8 @@ class PersistedCollectionRuntime< deletedKeys: mutations .filter((mutation) => mutation.type === `delete`) .map((mutation) => mutation.key as TKey), + rowMetadataMutations: tx.rowMetadataMutations, + collectionMetadataMutations: tx.collectionMetadataMutations, }), ) @@ -1582,13 +1637,19 @@ class PersistedCollectionRuntime< latestRowVersion: number changedRows: Array<{ key: TKey; value: T }> deletedKeys: Array + rowMetadataMutations?: Array> + collectionMetadataMutations?: Array hasMetadataChanges?: boolean requiresFullReload?: boolean }): TxCommitted { + const rowMetadataMutations = args.rowMetadataMutations ?? [] + const collectionMetadataMutations = args.collectionMetadataMutations ?? [] const requiresFullReload = args.requiresFullReload === true || - args.hasMetadataChanges === true || - args.changedRows.length + args.deletedKeys.length > + args.changedRows.length + + args.deletedKeys.length + + rowMetadataMutations.length + + collectionMetadataMutations.length > TARGETED_INVALIDATION_KEY_LIMIT if (requiresFullReload) { @@ -1614,6 +1675,9 @@ class PersistedCollectionRuntime< value: Record }>, deletedKeys: args.deletedKeys, + rowMetadataMutations: + rowMetadataMutations as Array>, + collectionMetadataMutations, } } @@ -1842,6 +1906,13 @@ class PersistedCollectionRuntime< if (hasGap) { await this.recoverFromSeqGapUnsafe() + if ( + txCommitted.term < this.latestTerm || + (txCommitted.term === this.latestTerm && + txCommitted.seq <= this.latestSeq) + ) { + return + } } this.observeStreamPosition( @@ -1867,7 +1938,25 @@ class PersistedCollectionRuntime< pullResponse.latestSeq, pullResponse.latestRowVersion, ) - await this.reloadActiveSubsetsUnsafe() + if (pullResponse.requiresFullReload || !pullResponse.deltas) { + await this.reloadActiveSubsetsUnsafe() + return + } + + for (const delta of pullResponse.deltas) { + await this.invalidateFromCommittedTxUnsafe({ + type: `tx:committed`, + term: pullResponse.latestTerm, + seq: pullResponse.latestSeq, + txId: delta.txId, + latestRowVersion: delta.latestRowVersion, + requiresFullReload: false, + changedRows: delta.changedRows, + deletedKeys: delta.deletedKeys, + rowMetadataMutations: delta.rowMetadataMutations, + collectionMetadataMutations: delta.collectionMetadataMutations, + }) + } return } } catch (error) { @@ -1915,7 +2004,7 @@ class PersistedCollectionRuntime< (opt) => opt.limit != null || opt.offset != null || opt.cursor != null, ) - if (!hasPaginatedSubset) { + if (!hasPaginatedSubset || changedKeyCount === 0) { await this.applyTargetedInvalidationUnsafe(txCommitted) return } @@ -1955,6 +2044,28 @@ class PersistedCollectionRuntime< this.syncControls.write?.({ type: `delete`, key: deletedKey as TKey }) } + txCommitted.rowMetadataMutations?.forEach((mutation) => { + if (mutation.type === `delete`) { + this.syncControls.metadata?.row.delete(mutation.key as TKey) + } else { + this.syncControls.metadata?.row.set( + mutation.key as TKey, + mutation.value, + ) + } + }) + + txCommitted.collectionMetadataMutations?.forEach((mutation) => { + if (mutation.type === `delete`) { + this.syncControls.metadata?.collection.delete(mutation.key) + } else { + this.syncControls.metadata?.collection.set( + mutation.key, + mutation.value, + ) + } + }) + this.syncControls.commit?.() }) } @@ -2185,6 +2296,8 @@ function createWrappedSyncConfig< } return params.metadata!.row.get(key) }, + scanPersisted: (options?: PersistedRowScanOptions) => + runtime.scanPersistedRows(options), set: (key: TKey, value: unknown) => { const openTransaction = getOpenTransaction() if (!openTransaction) { diff --git a/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts b/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts index 92582e0eb..92beffe6f 100644 --- a/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts +++ b/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts @@ -15,8 +15,11 @@ import { import type { LoadSubsetOptions } from '@tanstack/db' import type { PersistedIndexSpec, + PersistedRowScanOptions, + PersistedScannedRow, PersistedTx, PersistenceAdapter, + ReplayableTxDelta, SQLiteDriver, } from './persisted' @@ -65,6 +68,7 @@ export type SQLitePullSinceResult = requiresFullReload: false changedKeys: Array deletedKeys: Array + deltas: Array, TKey>> } const DEFAULT_SCHEMA_VERSION = 1 @@ -593,6 +597,28 @@ type InMemoryRow = { rowVersion: number } +function decodeStoredSqliteRows( + storedRows: ReadonlyArray, +): Array> { + return storedRows.map((row) => { + const key = decodePersistedStorageKey(row.key) as TKey + const value = deserializePersistedRowValue(row.value) + return { + key, + value, + metadata: + row.metadata != null + ? deserializePersistedRowValue(row.metadata) + : undefined, + rowVersion: row.row_version, + } + }) +} + +function stableStringify(value: unknown): string { + return JSON.stringify(value) +} + function compileSqlExpression( expression: IR.BasicExpression, ): CompiledSqlFragment { @@ -1122,6 +1148,24 @@ export class SQLiteCorePersistenceAdapter< ) const currentRowVersion = versionRows[0]?.latest_row_version ?? 0 const nextRowVersion = Math.max(currentRowVersion + 1, tx.rowVersion) + const replayDelta: ReplayableTxDelta, TKey> | null = + tx.truncate + ? null + : { + txId: tx.txId, + latestRowVersion: nextRowVersion, + changedRows: tx.mutations + .filter((mutation) => mutation.type !== `delete`) + .map((mutation) => ({ + key: mutation.key, + value: mutation.value as Record, + })), + deletedKeys: tx.mutations + .filter((mutation) => mutation.type === `delete`) + .map((mutation) => mutation.key), + rowMetadataMutations: tx.rowMetadataMutations ?? [], + collectionMetadataMutations: tx.collectionMetadataMutations ?? [], + } if (tx.truncate) { await transactionDriver.run(`DELETE FROM ${collectionTableSql}`) @@ -1276,10 +1320,20 @@ export class SQLiteCorePersistenceAdapter< seq, tx_id, row_version, + replay_json, + replay_requires_full_reload, applied_at ) - VALUES (?, ?, ?, ?, ?, CAST(strftime('%s', 'now') AS INTEGER))`, - [collectionId, tx.term, tx.seq, tx.txId, nextRowVersion], + VALUES (?, ?, ?, ?, ?, ?, ?, CAST(strftime('%s', 'now') AS INTEGER))`, + [ + collectionId, + tx.term, + tx.seq, + tx.txId, + nextRowVersion, + replayDelta ? stableStringify(replayDelta) : null, + tx.truncate ? 1 : 0, + ], ) await this.pruneAppliedTxRows(collectionId, transactionDriver) @@ -1302,6 +1356,29 @@ export class SQLiteCorePersistenceAdapter< })) } + async scanRows( + collectionId: string, + options?: PersistedRowScanOptions, + ): Promise>> { + const tableMapping = await this.ensureCollectionReady(collectionId) + const collectionTableSql = quoteIdentifier(tableMapping.tableName) + + const storedRows = await this.driver.query( + options?.metadataOnly + ? `SELECT key, value, metadata, row_version + FROM ${collectionTableSql} + WHERE metadata IS NOT NULL` + : `SELECT key, value, metadata, row_version + FROM ${collectionTableSql}`, + ) + + return decodeStoredSqliteRows(storedRows).map((row) => ({ + key: row.key, + value: row.value, + metadata: row.metadata, + })) + } + async ensureIndex( collectionId: string, signature: string, @@ -1439,7 +1516,8 @@ export class SQLiteCorePersistenceAdapter< const collectionTableSql = quoteIdentifier(tableMapping.tableName) const tombstoneTableSql = quoteIdentifier(tableMapping.tombstoneTableName) - const [changedRows, deletedRows, latestVersionRows] = await Promise.all([ + const [changedRows, deletedRows, latestVersionRows, replayRows] = + await Promise.all([ this.driver.query<{ key: string }>( `SELECT key FROM ${collectionTableSql} @@ -1459,6 +1537,18 @@ export class SQLiteCorePersistenceAdapter< LIMIT 1`, [collectionId], ), + this.driver.query<{ + tx_id: string + row_version: number + replay_json: string | null + replay_requires_full_reload: number + }>( + `SELECT tx_id, row_version, replay_json, replay_requires_full_reload + FROM applied_tx + WHERE collection_id = ? AND row_version > ? + ORDER BY term ASC, seq ASC`, + [collectionId, fromRowVersion], + ), ]) const latestRowVersion = latestVersionRows[0]?.latest_row_version ?? 0 @@ -1471,6 +1561,18 @@ export class SQLiteCorePersistenceAdapter< } } + if ( + replayRows.some( + (row) => + row.replay_requires_full_reload !== 0 || row.replay_json == null, + ) + ) { + return { + latestRowVersion, + requiresFullReload: true, + } + } + const decodeKey = (encodedKey: string): TKey => { try { return decodePersistedStorageKey(encodedKey) as TKey @@ -1481,11 +1583,41 @@ export class SQLiteCorePersistenceAdapter< } } + const deltas = replayRows.map((row) => { + const parsed = JSON.parse( + row.replay_json ?? `null`, + ) as ReplayableTxDelta, TKey> | null + if (!parsed) { + throw new InvalidPersistedCollectionConfigError( + `missing replay payload for applied_tx row`, + ) + } + return parsed + }) + + const replayChangeCount = deltas.reduce( + (count, delta) => + count + + delta.changedRows.length + + delta.deletedKeys.length + + delta.rowMetadataMutations.length + + delta.collectionMetadataMutations.length, + 0, + ) + + if (replayChangeCount > this.pullSinceReloadThreshold) { + return { + latestRowVersion, + requiresFullReload: true, + } + } + return { latestRowVersion, requiresFullReload: false, changedKeys: changedRows.map((row) => decodeKey(row.key)), deletedKeys: deletedRows.map((row) => decodeKey(row.key)), + deltas, } } @@ -1516,19 +1648,7 @@ export class SQLiteCorePersistenceAdapter< sql, queryParams, ) - const parsedRows = storedRows.map((row) => { - const key = decodePersistedStorageKey(row.key) as TKey - const value = deserializePersistedRowValue(row.value) - return { - key, - value, - metadata: - row.metadata != null - ? deserializePersistedRowValue(row.metadata) - : undefined, - rowVersion: row.row_version, - } - }) + const parsedRows = decodeStoredSqliteRows(storedRows) const filteredRows = this.applyInMemoryWhere(parsedRows, options.where) const orderedRows = this.applyInMemoryOrderBy(filteredRows, options.orderBy) @@ -1908,10 +2028,22 @@ export class SQLiteCorePersistenceAdapter< seq INTEGER NOT NULL, tx_id TEXT NOT NULL, row_version INTEGER NOT NULL, + replay_json TEXT, + replay_requires_full_reload INTEGER NOT NULL DEFAULT 0, applied_at INTEGER NOT NULL, PRIMARY KEY (collection_id, term, seq) )`, ) + try { + await this.driver.exec( + `ALTER TABLE applied_tx ADD COLUMN replay_json TEXT`, + ) + } catch {} + try { + await this.driver.exec( + `ALTER TABLE applied_tx ADD COLUMN replay_requires_full_reload INTEGER NOT NULL DEFAULT 0`, + ) + } catch {} await this.driver.exec( `CREATE TABLE IF NOT EXISTS collection_version ( collection_id TEXT PRIMARY KEY, diff --git a/packages/db-sqlite-persisted-collection-core/tests/persisted.test.ts b/packages/db-sqlite-persisted-collection-core/tests/persisted.test.ts index 6b96fe90b..114b3d4bd 100644 --- a/packages/db-sqlite-persisted-collection-core/tests/persisted.test.ts +++ b/packages/db-sqlite-persisted-collection-core/tests/persisted.test.ts @@ -1,5 +1,9 @@ import { describe, expect, it } from 'vitest' -import { IR, createCollection, createTransaction } from '@tanstack/db' +import { + IR, + createCollection, + createTransaction, +} from '@tanstack/db' import { InvalidPersistedCollectionCoordinatorError, InvalidPersistedStorageKeyEncodingError, @@ -91,6 +95,14 @@ function createRecordingAdapter( ), ) }, + scanRows: () => + Promise.resolve( + Array.from(rows.values()).map((value) => ({ + key: value.id, + value, + metadata: rowMetadata.get(value.id), + })), + ), applyCommittedTx: (collectionId, tx) => { adapter.applyCommittedTxCalls.push({ collectionId, @@ -462,6 +474,180 @@ describe(`persistedCollectionOptions`, () => { }) }) + it(`replays metadata-only tx:committed deltas without full reload`, async () => { + const adapter = createRecordingAdapter([ + { + id: `1`, + title: `Tracked`, + }, + ]) + adapter.rowMetadata.set(`1`, { source: `initial` }) + const coordinator = createCoordinatorHarness() + + const collection = createCollection( + persistedCollectionOptions({ + id: `sync-present`, + getKey: (item) => item.id, + sync: { + sync: ({ markReady }) => { + markReady() + }, + }, + persistence: { + adapter, + coordinator, + }, + }), + ) + + await collection.preload() + await flushAsyncWork() + const loadSubsetCallsAfterPreload = adapter.loadSubsetCalls.length + + coordinator.emit({ + type: `tx:committed`, + term: 1, + seq: 1, + txId: `tx-metadata-only`, + latestRowVersion: 2, + requiresFullReload: false, + changedRows: [], + deletedKeys: [], + rowMetadataMutations: [ + { + type: `set`, + key: `1`, + value: { source: `replayed` }, + }, + ], + collectionMetadataMutations: [ + { + type: `set`, + key: `electric:resume`, + value: { + kind: `reset`, + updatedAt: 2, + }, + }, + ], + }) + + await flushAsyncWork() + await flushAsyncWork() + + expect(adapter.loadSubsetCalls.length).toBe(loadSubsetCallsAfterPreload) + expect(collection._state.syncedMetadata.get(`1`)).toEqual({ + source: `replayed`, + }) + expect( + collection._state.syncedCollectionMetadata.get(`electric:resume`), + ).toEqual({ + kind: `reset`, + updatedAt: 2, + }) + }) + + it(`uses pullSince replay deltas for metadata-bearing seq-gap recovery`, async () => { + const adapter = createRecordingAdapter([ + { + id: `1`, + title: `Tracked`, + }, + ]) + adapter.rowMetadata.set(`1`, { source: `initial` }) + const coordinator = createCoordinatorHarness() + coordinator.setPullSinceResponse({ + type: `rpc:pullSince:res`, + rpcId: `pull-metadata`, + ok: true, + latestTerm: 1, + latestSeq: 3, + latestRowVersion: 3, + requiresFullReload: false, + changedKeys: [], + deletedKeys: [], + deltas: [ + { + txId: `tx-gap-1`, + latestRowVersion: 2, + changedRows: [], + deletedKeys: [], + rowMetadataMutations: [ + { + type: `set`, + key: `1`, + value: { source: `gap-replayed` }, + }, + ], + collectionMetadataMutations: [], + }, + { + txId: `tx-gap-2`, + latestRowVersion: 3, + changedRows: [], + deletedKeys: [], + rowMetadataMutations: [], + collectionMetadataMutations: [ + { + type: `set`, + key: `queryCollection:gc:q1`, + value: { + queryHash: `q1`, + mode: `until-revalidated`, + }, + }, + ], + }, + ], + }) + + const collection = createCollection( + persistedCollectionOptions({ + id: `sync-present`, + getKey: (item) => item.id, + sync: { + sync: ({ markReady }) => { + markReady() + }, + }, + persistence: { + adapter, + coordinator, + }, + }), + ) + + await collection.preload() + await flushAsyncWork() + const loadSubsetCallsAfterPreload = adapter.loadSubsetCalls.length + + coordinator.emit({ + type: `tx:committed`, + term: 1, + seq: 3, + txId: `tx-gap-trigger`, + latestRowVersion: 3, + requiresFullReload: false, + changedRows: [], + deletedKeys: [], + }) + + await flushAsyncWork() + await flushAsyncWork() + + expect(coordinator.pullSinceCalls).toBe(1) + expect(adapter.loadSubsetCalls.length).toBe(loadSubsetCallsAfterPreload) + expect(collection._state.syncedMetadata.get(`1`)).toEqual({ + source: `gap-replayed`, + }) + expect( + collection._state.syncedCollectionMetadata.get(`queryCollection:gc:q1`), + ).toEqual({ + queryHash: `q1`, + mode: `until-revalidated`, + }) + }) + it(`throws InvalidSyncConfigError when sync key is present but null`, () => { const invalidOptions = { id: `invalid-sync-null`, diff --git a/packages/db-sqlite-persisted-collection-core/tests/sqlite-core-adapter.test.ts b/packages/db-sqlite-persisted-collection-core/tests/sqlite-core-adapter.test.ts index e70e6b0db..35e28eeba 100644 --- a/packages/db-sqlite-persisted-collection-core/tests/sqlite-core-adapter.test.ts +++ b/packages/db-sqlite-persisted-collection-core/tests/sqlite-core-adapter.test.ts @@ -1085,11 +1085,143 @@ export function runSQLiteCoreAdapterContractSuite( } expect(delta.changedKeys).toEqual([]) expect(delta.deletedKeys).toEqual([`1`]) + expect(delta.deltas).toEqual([ + { + txId: `seed-pull-2`, + latestRowVersion: 2, + changedRows: [], + deletedKeys: [`1`], + rowMetadataMutations: [], + collectionMetadataMutations: [], + }, + ]) const fullReload = await adapter.pullSince(collectionId, 0) expect(fullReload.requiresFullReload).toBe(true) }) + it(`scans persisted rows with metadata and replays metadata-only deltas`, async () => { + const { adapter } = registerContractHarness() + const collectionId = `scan-and-replay` + + await adapter.applyCommittedTx(collectionId, { + txId: `scan-seed-1`, + term: 1, + seq: 1, + rowVersion: 1, + mutations: [ + { + type: `insert`, + key: `1`, + value: { + id: `1`, + title: `Tracked`, + createdAt: `2026-01-01T00:00:00.000Z`, + score: 1, + }, + metadata: { + queryCollection: { + owners: { + q1: true, + }, + }, + }, + metadataChanged: true, + }, + ], + }) + + const scannedRows = await adapter.scanRows?.(collectionId, { + metadataOnly: true, + }) + expect(scannedRows).toEqual([ + { + key: `1`, + value: { + id: `1`, + title: `Tracked`, + createdAt: `2026-01-01T00:00:00.000Z`, + score: 1, + }, + metadata: { + queryCollection: { + owners: { + q1: true, + }, + }, + }, + }, + ]) + + await adapter.applyCommittedTx(collectionId, { + txId: `scan-seed-2`, + term: 1, + seq: 2, + rowVersion: 2, + mutations: [], + rowMetadataMutations: [ + { + type: `set`, + key: `1`, + value: { + queryCollection: { + owners: { + q2: true, + }, + }, + }, + }, + ], + collectionMetadataMutations: [ + { + type: `set`, + key: `electric:resume`, + value: { + kind: `reset`, + updatedAt: 2, + }, + }, + ], + }) + + const replayDelta = await adapter.pullSince(collectionId, 1) + if (replayDelta.requiresFullReload) { + throw new Error(`Expected replay delta, received full reload`) + } + + expect(replayDelta.deltas).toEqual([ + { + txId: `scan-seed-2`, + latestRowVersion: 2, + changedRows: [], + deletedKeys: [], + rowMetadataMutations: [ + { + type: `set`, + key: `1`, + value: { + queryCollection: { + owners: { + q2: true, + }, + }, + }, + }, + ], + collectionMetadataMutations: [ + { + type: `set`, + key: `electric:resume`, + value: { + kind: `reset`, + updatedAt: 2, + }, + }, + ], + }, + ]) + }) + it(`keeps numeric and string keys distinct in storage`, async () => { const { driver } = registerContractHarness() const adapter = new SQLiteCorePersistenceAdapter< diff --git a/packages/electric-db-collection/src/electric.ts b/packages/electric-db-collection/src/electric.ts index ca5fe64b3..f446e11da 100644 --- a/packages/electric-db-collection/src/electric.ts +++ b/packages/electric-db-collection/src/electric.ts @@ -323,6 +323,16 @@ function parseSnapshotMessage(message: SnapshotEndMessage): PostgresSnapshot { } } +function getStableShapeIdentity(shapeOptions: { + url: string + params?: Record +}): string { + return JSON.stringify({ + url: shapeOptions.url, + params: shapeOptions.params ?? null, + }) +} + // Check if a message contains txids in its headers function hasTxids>( message: Message, @@ -1228,10 +1238,18 @@ function createElectricSync>( } const persistedResumeState = readPersistedResumeState() + const shapeIdentity = getStableShapeIdentity({ + url: shapeOptions.url, + params: shapeOptions.params as Record | undefined, + }) + const hasIncompatiblePersistedResume = + persistedResumeState?.kind === `resume` && + persistedResumeState.shapeId !== shapeIdentity const canUsePersistedResume = shapeOptions.offset === undefined && shapeOptions.handle === undefined && - persistedResumeState?.kind === `resume` + persistedResumeState?.kind === `resume` && + !hasIncompatiblePersistedResume // Wrap markReady to wait for test hook in progressive mode let progressiveReadyGate: Promise | null = null @@ -1354,7 +1372,7 @@ function createElectricSync>( kind: `resume`, offset: lastOffset, handle: shapeHandle, - shapeId: shapeHandle, + shapeId: shapeIdentity, updatedAt: Date.now(), }) } @@ -1372,6 +1390,10 @@ function createElectricSync>( commit() } + if (hasIncompatiblePersistedResume) { + commitResetResumeMetadataImmediately() + } + /** * Process a change message: handle tags and write the mutation */ diff --git a/packages/electric-db-collection/tests/electric.test.ts b/packages/electric-db-collection/tests/electric.test.ts index 24e0108e3..d9259e863 100644 --- a/packages/electric-db-collection/tests/electric.test.ts +++ b/packages/electric-db-collection/tests/electric.test.ts @@ -4,6 +4,7 @@ import { createCollection, createTransaction, } from '@tanstack/db' +import { persistedCollectionOptions } from '../../db-sqlite-persisted-collection-core/src' import { electricCollectionOptions, isChangeMessage } from '../src/electric' import { stripVirtualProps } from '../../db/tests/utils' import type { ElectricCollectionUtils } from '../src/electric' @@ -92,6 +93,27 @@ describe(`Electric Integration`, () => { } } + const createPersistedAdapter = (collectionMetadata?: Map) => ({ + loadSubset: async () => [], + loadCollectionMetadata: async () => + Array.from((collectionMetadata ?? new Map()).entries()).map( + ([key, value]) => ({ + key, + value, + }), + ), + applyCommittedTx: async (_collectionId: string, tx: any) => { + for (const mutation of tx.collectionMetadataMutations ?? []) { + if (mutation.type === `delete`) { + collectionMetadata?.delete(mutation.key) + } else { + collectionMetadata?.set(mutation.key, mutation.value) + } + } + }, + ensureIndex: async () => {}, + }) + beforeEach(() => { vi.clearAllMocks() @@ -2978,7 +3000,10 @@ describe(`Electric Integration`, () => { kind: `resume`, offset: `10_0`, handle: `handle-1`, - shapeId: `shape-1`, + shapeId: JSON.stringify({ + url: `http://test-url`, + params: { table: `test_table` }, + }), updatedAt: 1, }, ], @@ -3067,6 +3092,51 @@ describe(`Electric Integration`, () => { ) }) + it(`should honor persisted reset resume metadata through the persisted wrapper`, async () => { + vi.clearAllMocks() + + const { ShapeStream } = await import(`@electric-sql/client`) + const collectionMetadata = new Map([ + [ + `electric:resume`, + { + kind: `reset`, + updatedAt: 1, + }, + ], + ]) + + const persistedCollection = createCollection( + persistedCollectionOptions({ + ...(electricCollectionOptions({ + id: `persisted-wrapper-reset-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + }, + syncMode: `on-demand` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + }) as any), + persistence: { + adapter: createPersistedAdapter(collectionMetadata), + }, + }) as any, + ) + + persistedCollection.startSyncImmediate() + await new Promise((resolve) => setTimeout(resolve, 0)) + + expect(ShapeStream).toHaveBeenCalledWith( + expect.objectContaining({ + offset: `now`, + handle: undefined, + }), + ) + }) + it(`should not mix explicit handle with persisted offset`, async () => { vi.clearAllMocks() @@ -3079,7 +3149,10 @@ describe(`Electric Integration`, () => { kind: `resume`, offset: `10_0`, handle: `persisted-handle`, - shapeId: `shape-1`, + shapeId: JSON.stringify({ + url: `http://test-url`, + params: { table: `test_table` }, + }), updatedAt: 1, }, ], @@ -3132,7 +3205,10 @@ describe(`Electric Integration`, () => { kind: `resume`, offset: `10_0`, handle: `persisted-handle`, - shapeId: `shape-1`, + shapeId: JSON.stringify({ + url: `http://test-url`, + params: { table: `test_table` }, + }), updatedAt: 1, }, ], @@ -3223,6 +3299,65 @@ describe(`Electric Integration`, () => { ) }) + it(`should reset and fall back when persisted resume identity is incompatible`, async () => { + vi.clearAllMocks() + + const { ShapeStream } = await import(`@electric-sql/client`) + const metadataHarness = createInMemorySyncMetadataApi( + new Map([ + [ + `electric:resume`, + { + kind: `resume`, + offset: `10_0`, + handle: `handle-1`, + shapeId: `{"url":"http://other-url","params":{"table":"test_table"}}`, + updatedAt: 1, + }, + ], + ]), + ) + + const baseOptions = electricCollectionOptions({ + id: `persisted-incompatible-resume-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + }, + syncMode: `on-demand` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + }) + + const originalSync = baseOptions.sync + createCollection({ + ...baseOptions, + sync: { + sync: (params: Parameters[0]) => + originalSync.sync({ + ...params, + metadata: metadataHarness.api, + }), + }, + }) + + expect(ShapeStream).toHaveBeenCalledWith( + expect.objectContaining({ + offset: `now`, + handle: undefined, + }), + ) + expect( + metadataHarness.collectionMetadata.get(`electric:resume`), + ).toEqual( + expect.objectContaining({ + kind: `reset`, + }), + ) + }) + it(`should persist reset resume metadata immediately on must-refetch`, () => { const metadataHarness = createInMemorySyncMetadataApi() const baseOptions = electricCollectionOptions({ diff --git a/packages/query-db-collection/src/query.ts b/packages/query-db-collection/src/query.ts index 344a1658b..105e1a8a9 100644 --- a/packages/query-db-collection/src/query.ts +++ b/packages/query-db-collection/src/query.ts @@ -16,6 +16,7 @@ import type { InsertMutationFnParams, LoadSubsetOptions, SyncConfig, + SyncMetadataApi, UpdateMutationFnParams, UtilsRecord, } from '@tanstack/db' @@ -233,6 +234,21 @@ type PersistedQueryRetentionEntry = const QUERY_COLLECTION_GC_PREFIX = `queryCollection:gc:` +type PersistedScannedRowForQuery = { + key: string | number + value: TItem + metadata?: unknown +} + +type QuerySyncMetadataWithPersistedScan = + SyncMetadataApi & { + row: SyncMetadataApi[`row`] & { + scanPersisted?: (options?: { + metadataOnly?: boolean + }) => Promise>> + } + } + /** * Implementation class for QueryCollectionUtils with explicit dependency injection * for better testability and architectural clarity @@ -661,9 +677,19 @@ export function queryCollectionOptions( const internalSync: SyncConfig[`sync`] = (params) => { const { begin, write, commit, markReady, collection, metadata } = params + const persistedMetadata = metadata as + | QuerySyncMetadataWithPersistedScan + | undefined // Track whether sync has been started let syncStarted = false + let startupRetentionSettled = false + const retainedQueriesPendingRevalidation = new Set() + const persistedRetentionTimers = new Map< + string, + ReturnType + >() + let persistedRetentionMaintenance = Promise.resolve() const getRowMetadata = (rowKey: string | number) => { return (metadata?.row.get(rowKey) ?? @@ -751,7 +777,23 @@ export function queryCollectionOptions( return undefined } - const getPersistedOwnedRowsForQueryBaseline = (hashedQueryKey: string) => { + const runPersistedRetentionMaintenance = (task: () => Promise) => { + persistedRetentionMaintenance = persistedRetentionMaintenance.then( + task, + task, + ) + return persistedRetentionMaintenance + } + + const cancelPersistedRetentionExpiry = (hashedQueryKey: string) => { + const timer = persistedRetentionTimers.get(hashedQueryKey) + if (timer) { + clearTimeout(timer) + persistedRetentionTimers.delete(hashedQueryKey) + } + } + + const getHydratedOwnedRowsForQueryBaseline = (hashedQueryKey: string) => { const knownRows = queryToRows.get(hashedQueryKey) if (knownRows) { return new Set(knownRows) @@ -778,27 +820,113 @@ export function queryCollectionOptions( return ownedRows } - const cleanupPersistedPlaceholder = ( + const loadPersistedBaselineForQuery = async ( hashedQueryKey: string, - options?: { deleteRetentionEntry?: boolean }, - ) => { - const rowKeys = getPersistedOwnedRowsForQueryBaseline(hashedQueryKey) - const rowsToDelete: Array = [] - const needsTransaction = metadata !== undefined + ): Promise< + Map< + string | number, + { + value: any + owners: Set + } + > + > => { + const knownRows = queryToRows.get(hashedQueryKey) + if ( + knownRows && + Array.from(knownRows).every((rowKey) => collection.has(rowKey)) + ) { + const baseline = new Map< + string | number, + { value: any; owners: Set } + >() + knownRows.forEach((rowKey) => { + const value = collection.get(rowKey) + const owners = rowToQueries.get(rowKey) + if (value && owners) { + baseline.set(rowKey, { + value, + owners: new Set(owners), + }) + } + }) + return baseline + } - if (!needsTransaction) { - return + const scanPersisted = persistedMetadata?.row.scanPersisted + if (!scanPersisted) { + const baseline = new Map< + string | number, + { value: any; owners: Set } + >() + getHydratedOwnedRowsForQueryBaseline(hashedQueryKey).forEach((rowKey) => { + const value = collection.get(rowKey) + const owners = rowToQueries.get(rowKey) + if (value && owners) { + baseline.set(rowKey, { + value, + owners: new Set(owners), + }) + } + }) + return baseline } - begin() + const baseline = new Map< + string | number, + { value: any; owners: Set } + >() + const scannedRows = await scanPersisted() + + scannedRows.forEach((row) => { + const rowMetadata = + row.metadata as Record | undefined + const queryMetadata = rowMetadata?.queryCollection + if (!queryMetadata || typeof queryMetadata !== `object`) { + return + } - rowKeys.forEach((rowKey) => { - const oldItem = collection.get(rowKey) - if (!oldItem) { + const owners = (queryMetadata as Record).owners + if (!owners || typeof owners !== `object`) { return } - const owners = getPersistedOwners(rowKey) + const ownerSet = new Set(Object.keys(owners as Record)) + if (ownerSet.size === 0) { + return + } + + rowToQueries.set(row.key, new Set(ownerSet)) + ownerSet.forEach((owner) => { + const queryToRowsSet = queryToRows.get(owner) || new Set() + queryToRowsSet.add(row.key) + queryToRows.set(owner, queryToRowsSet) + }) + + if (ownerSet.has(hashedQueryKey)) { + baseline.set(row.key, { + value: row.value, + owners: ownerSet, + }) + } + }) + + return baseline + } + + const cleanupPersistedPlaceholder = async ( + hashedQueryKey: string, + ) => { + if (!metadata) { + return + } + + const baseline = await loadPersistedBaselineForQuery(hashedQueryKey) + const rowsToDelete: Array = [] + + begin() + + baseline.forEach(({ value: oldItem, owners }, rowKey) => { owners.delete(hashedQueryKey) setPersistedOwners(rowKey, owners) const needToRemove = removeRow(rowKey, hashedQueryKey) @@ -811,13 +939,45 @@ export function queryCollectionOptions( write({ type: `delete`, value: row }) }) - if (options?.deleteRetentionEntry !== false) { - metadata.collection.delete(`${QUERY_COLLECTION_GC_PREFIX}${hashedQueryKey}`) - } + metadata.collection.delete(`${QUERY_COLLECTION_GC_PREFIX}${hashedQueryKey}`) commit() } - const consumePersistedQueryRetentionAtStartup = () => { + const schedulePersistedRetentionExpiry = ( + entry: PersistedQueryRetentionEntry, + ) => { + if (entry.mode !== `ttl`) { + return + } + + cancelPersistedRetentionExpiry(entry.queryHash) + + const delay = Math.max(0, entry.expiresAt - Date.now()) + const timer = setTimeout(() => { + persistedRetentionTimers.delete(entry.queryHash) + void runPersistedRetentionMaintenance(async () => { + const currentEntry = metadata?.collection.get( + `${QUERY_COLLECTION_GC_PREFIX}${entry.queryHash}`, + ) + const parsedCurrentEntry = parsePersistedQueryRetentionEntry( + currentEntry, + entry.queryHash, + ) + if ( + !parsedCurrentEntry || + parsedCurrentEntry.mode !== `ttl` || + parsedCurrentEntry.expiresAt > Date.now() + ) { + return + } + await cleanupPersistedPlaceholder(entry.queryHash) + }) + }, delay) + + persistedRetentionTimers.set(entry.queryHash, timer) + } + + const consumePersistedQueryRetentionAtStartup = async () => { if (!metadata) { return } @@ -825,19 +985,19 @@ export function queryCollectionOptions( const retentionEntries = metadata.collection.list(QUERY_COLLECTION_GC_PREFIX) const now = Date.now() - retentionEntries.forEach(({ key, value }) => { + for (const { key, value } of retentionEntries) { const hashedQueryKey = key.slice(QUERY_COLLECTION_GC_PREFIX.length) const parsed = parsePersistedQueryRetentionEntry(value, hashedQueryKey) if (!parsed) { - return + continue } if (parsed.mode === `ttl` && parsed.expiresAt <= now) { - cleanupPersistedPlaceholder(parsed.queryHash, { - deleteRetentionEntry: syncMode !== `on-demand`, - }) + await cleanupPersistedPlaceholder(parsed.queryHash) + } else if (parsed.mode === `ttl`) { + schedulePersistedRetentionExpiry(parsed) } - }) + } } /** @@ -861,14 +1021,48 @@ export function queryCollectionOptions( } } + const startupRetentionEntries = metadata?.collection.list( + QUERY_COLLECTION_GC_PREFIX, + ) + const startupRetentionMaintenancePromise = + !startupRetentionEntries || startupRetentionEntries.length === 0 + ? (() => { + startupRetentionSettled = true + return Promise.resolve() + })() + : runPersistedRetentionMaintenance(async () => { + try { + await consumePersistedQueryRetentionAtStartup() + } finally { + startupRetentionSettled = true + } + }) + const createQueryFromOpts = ( opts: LoadSubsetOptions = {}, queryFunction: typeof queryFn = queryFn, ): true | Promise => { + if (!startupRetentionSettled) { + return startupRetentionMaintenancePromise.then(() => { + const resumed = createQueryFromOpts(opts, queryFunction) + return resumed === true ? undefined : resumed + }) + } + // Generate key using common function const key = generateQueryKeyFromOptions(opts) const hashedQueryKey = hashKey(key) const extendedMeta = { ...meta, loadSubsetOptions: opts } + const retainedEntry = metadata?.collection.get( + `${QUERY_COLLECTION_GC_PREFIX}${hashedQueryKey}`, + ) + if ( + parsePersistedQueryRetentionEntry(retainedEntry, hashedQueryKey) !== + undefined + ) { + retainedQueriesPendingRevalidation.add(hashedQueryKey) + } + cancelPersistedRetentionExpiry(hashedQueryKey) if (state.observers.has(hashedQueryKey)) { // We already have a query for this queryKey @@ -993,84 +1187,127 @@ export function queryCollectionOptions( type UpdateHandler = Parameters[0] - // eslint-disable-next-line no-shadow - const makeQueryResultHandler = (queryKey: QueryKey) => { + const applySuccessfulResult = ( + queryKey: QueryKey, + result: QueryObserverResult, + persistedBaseline?: Map< + string | number, + { + value: any + owners: Set + } + >, + ) => { const hashedQueryKey = hashKey(queryKey) - const handleQueryResult: UpdateHandler = (result) => { - if (result.isSuccess) { - // Clear error state - state.lastError = undefined - state.errorCount = 0 - const rawData = result.data - const newItemsArray = select ? select(rawData) : rawData + if (collection.status === `cleaned-up`) { + return + } - if ( - !Array.isArray(newItemsArray) || - newItemsArray.some((item) => typeof item !== `object`) - ) { - const errorMessage = select - ? `@tanstack/query-db-collection: select() must return an array of objects. Got: ${typeof newItemsArray} for queryKey ${JSON.stringify(queryKey)}` - : `@tanstack/query-db-collection: queryFn must return an array of objects. Got: ${typeof newItemsArray} for queryKey ${JSON.stringify(queryKey)}` + // Clear error state + state.lastError = undefined + state.errorCount = 0 - console.error(errorMessage) - return - } + const rawData = result.data + const newItemsArray = select ? select(rawData) : rawData - const currentSyncedItems: Map = new Map( - collection._state.syncedData.entries(), - ) - const previouslyOwnedRows = - getPersistedOwnedRowsForQueryBaseline(hashedQueryKey) - const newItemsMap = new Map() - newItemsArray.forEach((item) => { - const key = getKey(item) - newItemsMap.set(key, item) - }) + if ( + !Array.isArray(newItemsArray) || + newItemsArray.some((item) => typeof item !== `object`) + ) { + const errorMessage = select + ? `@tanstack/query-db-collection: select() must return an array of objects. Got: ${typeof newItemsArray} for queryKey ${JSON.stringify(queryKey)}` + : `@tanstack/query-db-collection: queryFn must return an array of objects. Got: ${typeof newItemsArray} for queryKey ${JSON.stringify(queryKey)}` - begin() - if (metadata) { - metadata.collection.delete( - `${QUERY_COLLECTION_GC_PREFIX}${hashedQueryKey}`, - ) + console.error(errorMessage) + return + } + + const currentSyncedItems: Map = new Map( + collection._state.syncedData.entries(), + ) + const shouldUsePersistedBaseline = persistedBaseline !== undefined + const previouslyOwnedRows = shouldUsePersistedBaseline + ? new Set(persistedBaseline.keys()) + : getHydratedOwnedRowsForQueryBaseline(hashedQueryKey) + const newItemsMap = new Map() + newItemsArray.forEach((item) => { + const key = getKey(item) + newItemsMap.set(key, item) + }) + + begin() + if (metadata) { + metadata.collection.delete(`${QUERY_COLLECTION_GC_PREFIX}${hashedQueryKey}`) + } + + previouslyOwnedRows.forEach((key) => { + const oldItem = shouldUsePersistedBaseline + ? persistedBaseline.get(key)?.value + : currentSyncedItems.get(key) + if (!oldItem) { + return + } + const newItem = newItemsMap.get(key) + if (!newItem) { + const owners = getPersistedOwners(key) + owners.delete(hashedQueryKey) + setPersistedOwners(key, owners) + const needToRemove = removeRow(key, hashedQueryKey) + if (needToRemove) { + write({ type: `delete`, value: oldItem }) } + } else if (!deepEquals(oldItem, newItem)) { + write({ type: `update`, value: newItem }) + } + }) - previouslyOwnedRows.forEach((key) => { - const oldItem = currentSyncedItems.get(key) - if (!oldItem) { - return - } - const newItem = newItemsMap.get(key) - if (!newItem) { - const owners = getPersistedOwners(key) - owners.delete(hashedQueryKey) - setPersistedOwners(key, owners) - const needToRemove = removeRow(key, hashedQueryKey) // returns true if the row is no longer referenced by any queries - if (needToRemove) { - write({ type: `delete`, value: oldItem }) - } - } else if (!deepEquals(oldItem, newItem)) { - // Only update if there are actual differences in the properties - write({ type: `update`, value: newItem }) - } - }) + newItemsMap.forEach((newItem, key) => { + const owners = getPersistedOwners(key) + if (!owners.has(hashedQueryKey)) { + owners.add(hashedQueryKey) + setPersistedOwners(key, owners) + } + addRow(key, hashedQueryKey) + if (!currentSyncedItems.has(key)) { + write({ type: `insert`, value: newItem }) + } + }) - newItemsMap.forEach((newItem, key) => { - const owners = getPersistedOwners(key) - if (!owners.has(hashedQueryKey)) { - owners.add(hashedQueryKey) - setPersistedOwners(key, owners) - } - addRow(key, hashedQueryKey) - if (!currentSyncedItems.has(key)) { - write({ type: `insert`, value: newItem }) - } - }) + commit() + retainedQueriesPendingRevalidation.delete(hashedQueryKey) + cancelPersistedRetentionExpiry(hashedQueryKey) - commit() + // Mark collection as ready after first successful query result + markReady() + } - // Mark collection as ready after first successful query result - markReady() + const reconcileSuccessfulResult = async ( + queryKey: QueryKey, + result: QueryObserverResult, + ) => { + const hashedQueryKey = hashKey(queryKey) + const persistedBaseline = await loadPersistedBaselineForQuery(hashedQueryKey) + if (collection.status === `cleaned-up`) { + return + } + applySuccessfulResult(queryKey, result, persistedBaseline) + } + + // eslint-disable-next-line no-shadow + const makeQueryResultHandler = (queryKey: QueryKey) => { + const handleQueryResult: UpdateHandler = (result) => { + if (result.isSuccess) { + if (retainedQueriesPendingRevalidation.has(hashKey(queryKey))) { + void reconcileSuccessfulResult(queryKey, result).catch((error) => { + console.error( + `[QueryCollection] Error reconciling query ${String(queryKey)}:`, + error, + ) + }) + } else { + applySuccessfulResult(queryKey, result) + } } else if (result.isError) { const isNewError = result.errorUpdatedAt !== state.lastErrorUpdatedAt || @@ -1127,8 +1364,6 @@ export function queryCollectionOptions( unsubscribes.clear() } - consumePersistedQueryRetentionAtStartup() - // Mark that sync has started syncStarted = true @@ -1154,8 +1389,15 @@ export function queryCollectionOptions( }) } } else { - // In on-demand mode, mark ready immediately since there's no initial query - markReady() + if (startupRetentionSettled) { + markReady() + } else { + // In on-demand mode, there is no initial query, but retained-placeholder + // maintenance still needs to finish before the collection is treated as ready. + void startupRetentionMaintenancePromise.then(() => { + markReady() + }) + } } // Always subscribe when sync starts (this could be from preload(), startSync config, or first subscriber) @@ -1176,6 +1418,8 @@ export function queryCollectionOptions( const cleanupQueryInternal = (hashedQueryKey: string) => { unsubscribes.get(hashedQueryKey)?.() unsubscribes.delete(hashedQueryKey) + cancelPersistedRetentionExpiry(hashedQueryKey) + retainedQueriesPendingRevalidation.delete(hashedQueryKey) const rowKeys = queryToRows.get(hashedQueryKey) ?? new Set() const nextOwnersByRow = new Map>() @@ -1282,6 +1526,13 @@ export function queryCollectionOptions( }, ) commit() + if (persistedGcTime !== Number.POSITIVE_INFINITY) { + schedulePersistedRetentionExpiry({ + queryHash: hashedQueryKey, + mode: `ttl`, + expiresAt: Date.now() + persistedGcTime, + }) + } } unsubscribes.get(hashedQueryKey)?.() unsubscribes.delete(hashedQueryKey) @@ -1320,6 +1571,10 @@ export function queryCollectionOptions( const cleanup = async () => { unsubscribeFromCollectionEvents() unsubscribeFromQueries() + persistedRetentionTimers.forEach((timer) => { + clearTimeout(timer) + }) + persistedRetentionTimers.clear() const allQueryKeys = [...hashToQueryKey.values()] const allHashedKeys = [...state.observers.keys()] diff --git a/packages/query-db-collection/tests/query.test.ts b/packages/query-db-collection/tests/query.test.ts index 0e8e98ccd..44c7938ba 100644 --- a/packages/query-db-collection/tests/query.test.ts +++ b/packages/query-db-collection/tests/query.test.ts @@ -8,6 +8,7 @@ import { or, } from '@tanstack/db' import { stripVirtualProps } from '../../db/tests/utils' +import { persistedCollectionOptions } from '../../db-sqlite-persisted-collection-core/src' import { queryCollectionOptions } from '../src/query' import type { QueryFunctionContext } from '@tanstack/query-core' import type { @@ -39,44 +40,120 @@ const flushPromises = () => new Promise((resolve) => setTimeout(resolve, 0)) function createInMemorySyncMetadataApi< TKey extends string | number = string | number, + TItem extends object = Record, >(seed?: { rowMetadata?: ReadonlyMap collectionMetadata?: ReadonlyMap + persistedRows?: ReadonlyMap }): { api: SyncMetadataApi rowMetadata: Map collectionMetadata: Map + persistedRows: Map } { const rowMetadata = new Map(seed?.rowMetadata) const collectionMetadata = new Map(seed?.collectionMetadata) + const persistedRows = new Map(seed?.persistedRows) + const api = { + row: { + get: (key: TKey) => rowMetadata.get(key), + set: (key: TKey, value: unknown) => { + rowMetadata.set(key, value) + }, + delete: (key: TKey) => { + rowMetadata.delete(key) + }, + scanPersisted: async () => + Array.from(persistedRows.entries()).map(([key, value]) => ({ + key, + value, + metadata: rowMetadata.get(key), + })), + }, + collection: { + get: (key: string) => collectionMetadata.get(key), + set: (key: string, value: unknown) => { + collectionMetadata.set(key, value) + }, + delete: (key: string) => { + collectionMetadata.delete(key) + }, + list: (prefix?: string) => + Array.from(collectionMetadata.entries()) + .filter(([key]) => (prefix ? key.startsWith(prefix) : true)) + .map(([key, value]) => ({ key, value })), + }, + } return { rowMetadata, collectionMetadata, - api: { - row: { - get: (key) => rowMetadata.get(key), - set: (key, value) => { - rowMetadata.set(key, value) - }, - delete: (key) => { - rowMetadata.delete(key) - }, - }, - collection: { - get: (key) => collectionMetadata.get(key), - set: (key, value) => { - collectionMetadata.set(key, value) - }, - delete: (key) => { - collectionMetadata.delete(key) - }, - list: (prefix) => - Array.from(collectionMetadata.entries()) - .filter(([key]) => (prefix ? key.startsWith(prefix) : true)) - .map(([key, value]) => ({ key, value })), - }, + persistedRows, + api: api as SyncMetadataApi, + } +} + +function createPersistedQueryAdapter( + seed: { + rows?: ReadonlyMap + rowMetadata?: ReadonlyMap + collectionMetadata?: ReadonlyMap + } = {}, +) { + const rows = new Map(seed.rows) + const rowMetadata = new Map(seed.rowMetadata) + const collectionMetadata = new Map(seed.collectionMetadata) + + return { + rows, + rowMetadata, + collectionMetadata, + loadSubset: async () => + Array.from(rows.values()).map((value) => ({ + key: value.id, + value, + metadata: rowMetadata.get(value.id), + })), + loadCollectionMetadata: async () => + Array.from(collectionMetadata.entries()).map(([key, value]) => ({ + key, + value, + })), + scanRows: async () => + Array.from(rows.values()).map((value) => ({ + key: value.id, + value, + metadata: rowMetadata.get(value.id), + })), + applyCommittedTx: async (_collectionId: string, tx: any) => { + if (tx.truncate) { + rows.clear() + rowMetadata.clear() + } + for (const mutation of tx.mutations) { + if (mutation.type === `delete`) { + rows.delete(mutation.key) + rowMetadata.delete(mutation.key) + } else { + rows.set(mutation.key, mutation.value) + } + } + for (const mutation of tx.rowMetadataMutations ?? []) { + if (mutation.type === `delete`) { + rowMetadata.delete(mutation.key) + } else { + rowMetadata.set(mutation.key, mutation.value) + } + } + for (const mutation of tx.collectionMetadataMutations ?? []) { + if (mutation.type === `delete`) { + collectionMetadata.delete(mutation.key) + } else { + collectionMetadata.set(mutation.key, mutation.value) + } + } }, + ensureIndex: async () => {}, } } @@ -4360,6 +4437,10 @@ describe(`QueryCollection`, () => { }, ], ]), + persistedRows: new Map([ + [ownedRow.id, ownedRow], + [unrelatedRow.id, unrelatedRow], + ]), }) const collection = createCollection({ @@ -4446,6 +4527,10 @@ describe(`QueryCollection`, () => { }, ], ]), + persistedRows: new Map([ + [orphanRow.id, orphanRow], + [sharedRow.id, sharedRow], + ]), }) const collection = createCollection({ @@ -4475,11 +4560,7 @@ describe(`QueryCollection`, () => { metadataHarness.collectionMetadata.get( `queryCollection:gc:${expiredQueryHash}`, ), - ).toEqual({ - queryHash: expiredQueryHash, - mode: `ttl`, - expiresAt: expect.any(Number), - }) + ).toBeUndefined() expect(metadataHarness.rowMetadata.get(sharedRow.id)).toEqual({ queryCollection: { owners: { @@ -4562,6 +4643,223 @@ describe(`QueryCollection`, () => { }) }) + it(`should clean up expired retained placeholders for cold persisted rows through the persisted wrapper`, async () => { + const queryHash = hashKey([`persisted-cold-ttl-cleanup`]) + const otherOwnerHash = `other-owner` + const orphanRow = { id: `1`, name: `Cold orphan`, category: `A` } + const sharedRow = { id: `2`, name: `Cold shared`, category: `B` } + const adapter = createPersistedQueryAdapter({ + rows: new Map([ + [orphanRow.id, orphanRow], + [sharedRow.id, sharedRow], + ]), + rowMetadata: new Map([ + [ + orphanRow.id, + { + queryCollection: { + owners: { + [queryHash]: true, + }, + }, + }, + ], + [ + sharedRow.id, + { + queryCollection: { + owners: { + [queryHash]: true, + [otherOwnerHash]: true, + }, + }, + }, + ], + ]), + collectionMetadata: new Map([ + [ + `queryCollection:gc:${queryHash}`, + { + queryHash, + mode: `ttl`, + expiresAt: Date.now() - 1_000, + }, + ], + ]), + }) + + const collection = createCollection( + persistedCollectionOptions({ + ...(queryCollectionOptions({ + id: `persisted-cold-ttl-cleanup`, + queryClient, + queryKey: [`persisted-cold-ttl-cleanup`], + queryFn: async () => [], + getKey: (item: CategorisedItem): string => item.id, + syncMode: `on-demand`, + startSync: true, + }) as any), + persistence: { + adapter, + }, + }) as any, + ) + + await collection.stateWhenReady() + await flushPromises() + + expect(adapter.rows.has(orphanRow.id)).toBe(false) + expect(adapter.rows.has(sharedRow.id)).toBe(true) + expect( + adapter.collectionMetadata.has(`queryCollection:gc:${queryHash}`), + ).toBe(false) + expect(adapter.rowMetadata.get(sharedRow.id)).toEqual({ + queryCollection: { + owners: { + [otherOwnerHash]: true, + }, + }, + }) + }) + + it(`should revalidate retained queries against cold persisted baselines through the persisted wrapper`, async () => { + const queryHash = hashKey([`persisted-cold-retained`]) + const retainedRow = { id: `1`, name: `Stale retained`, category: `A` } + const adapter = createPersistedQueryAdapter({ + rows: new Map([[retainedRow.id, retainedRow]]), + rowMetadata: new Map([ + [ + retainedRow.id, + { + queryCollection: { + owners: { + [queryHash]: true, + }, + }, + }, + ], + ]), + collectionMetadata: new Map([ + [ + `queryCollection:gc:${queryHash}`, + { + queryHash, + mode: `until-revalidated`, + }, + ], + ]), + }) + + const collection = createCollection( + persistedCollectionOptions({ + ...(queryCollectionOptions({ + id: `persisted-cold-retained`, + queryClient, + queryKey: [`persisted-cold-retained`], + queryFn: async () => [], + getKey: (item: CategorisedItem): string => item.id, + syncMode: `on-demand`, + startSync: true, + }) as any), + persistence: { + adapter, + }, + }) as any, + ) + + await collection.stateWhenReady() + expect(adapter.rows.has(retainedRow.id)).toBe(true) + + const liveQuery = createLiveQueryCollection({ + query: (q) => q.from({ item: collection }), + }) + + await liveQuery.preload() + await flushPromises() + + expect(adapter.rows.has(retainedRow.id)).toBe(false) + expect( + adapter.collectionMetadata.has(`queryCollection:gc:${queryHash}`), + ).toBe(false) + }) + + it(`should expire retained ttl placeholders while the app stays open`, async () => { + vi.useFakeTimers() + try { + const baseQueryKey = [`runtime-ttl-retention-test`] + const retainedQueryHash = hashKey(baseQueryKey) + const items: Array = [ + { id: `1`, name: `Retained`, category: `A` }, + ] + const queryFn = vi.fn().mockResolvedValue(items) + + const config: QueryCollectionConfig = { + id: `runtime-ttl-retention-test`, + queryClient, + queryKey: () => baseQueryKey, + queryFn, + getKey: (item) => item.id, + syncMode: `on-demand`, + startSync: true, + persistedGcTime: 100, + } + + const baseOptions = queryCollectionOptions(config) + const originalSync = baseOptions.sync + const metadataHarness = createInMemorySyncMetadataApi< + string | number, + CategorisedItem + >({ + persistedRows: new Map(items.map((item) => [item.id, item])), + }) + + const collection = createCollection({ + ...baseOptions, + sync: { + sync: (params: Parameters[0]) => + originalSync.sync({ + ...params, + metadata: metadataHarness.api, + }), + }, + }) + + const liveQuery = createLiveQueryCollection({ + query: (q) => + q.from({ item: collection }).where(({ item }) => eq(item.category, `A`)), + }) + + await liveQuery.preload() + await vi.waitFor(() => { + expect(collection.size).toBe(1) + }) + + await liveQuery.cleanup() + + expect( + metadataHarness.collectionMetadata.get( + `queryCollection:gc:${retainedQueryHash}`, + ), + ).toEqual({ + queryHash: retainedQueryHash, + mode: `ttl`, + expiresAt: expect.any(Number), + }) + + await vi.advanceTimersByTimeAsync(150) + await vi.runOnlyPendingTimersAsync() + + expect( + metadataHarness.collectionMetadata.get( + `queryCollection:gc:${retainedQueryHash}`, + ), + ).toBeUndefined() + expect(collection.has(`1`)).toBe(false) + } finally { + vi.useRealTimers() + } + }) + it(`should reset refcount after query GC and reload (stale refcount bug)`, async () => { // This test catches Bug 2: stale refcounts after GC/remove // When TanStack Query GCs a query, the refcount should be cleaned up From fbb87e3aca2a1ccd333ed2ee4884da0239eef4e2 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Tue, 17 Mar 2026 21:41:03 +0000 Subject: [PATCH 10/12] ci: apply automated fixes --- .../src/persisted.ts | 31 ++++--- .../src/sqlite-core-adapter.ts | 89 ++++++++++--------- .../tests/persisted.test.ts | 6 +- .../electric-db-collection/src/electric.ts | 9 +- .../tests/electric.test.ts | 16 ++-- packages/query-db-collection/src/query.ts | 75 +++++++++------- .../query-db-collection/tests/query.test.ts | 4 +- 7 files changed, 118 insertions(+), 112 deletions(-) diff --git a/packages/db-sqlite-persisted-collection-core/src/persisted.ts b/packages/db-sqlite-persisted-collection-core/src/persisted.ts index 5a8844faa..ba4c0cfde 100644 --- a/packages/db-sqlite-persisted-collection-core/src/persisted.ts +++ b/packages/db-sqlite-persisted-collection-core/src/persisted.ts @@ -78,7 +78,9 @@ export type TxCommitted = { value: Record }> deletedKeys: Array - rowMetadataMutations?: Array> + rowMetadataMutations?: Array< + PersistedRowMetadataMutation + > collectionMetadataMutations?: Array } ) @@ -153,7 +155,9 @@ export type PullSinceResponse = requiresFullReload: false changedKeys: Array deletedKeys: Array - deltas?: Array, string | number>> + deltas?: Array< + ReplayableTxDelta, string | number> + > } | { type: `rpc:pullSince:res` @@ -176,9 +180,7 @@ export interface PersistedIndexSpec { export type PersistedRowMetadataMutation< TKey extends string | number = string | number, -> = - | { type: `set`; key: TKey; value: unknown } - | { type: `delete`; key: TKey } +> = { type: `set`; key: TKey; value: unknown } | { type: `delete`; key: TKey } export type PersistedCollectionMetadataMutation = | { type: `set`; key: string; value: unknown } @@ -1675,8 +1677,9 @@ class PersistedCollectionRuntime< value: Record }>, deletedKeys: args.deletedKeys, - rowMetadataMutations: - rowMetadataMutations as Array>, + rowMetadataMutations: rowMetadataMutations as Array< + PersistedRowMetadataMutation + >, collectionMetadataMutations, } } @@ -2212,7 +2215,8 @@ function createWrappedSyncConfig< ...sourceSyncConfig, sync: (params) => { const transactionStack: Array> = [] - const getOpenTransaction = () => transactionStack[transactionStack.length - 1] + const getOpenTransaction = () => + transactionStack[transactionStack.length - 1] let fullStartPromise: Promise | null = null const cancelledLoads = new WeakSet() runtime.setSyncControls({ @@ -2285,7 +2289,8 @@ function createWrappedSyncConfig< row: { get: (key: TKey) => { const openTransaction = getOpenTransaction() - const pendingWrite = openTransaction?.rowMetadataWrites.get(key) + const pendingWrite = + openTransaction?.rowMetadataWrites.get(key) if (pendingWrite) { return pendingWrite.type === `delete` ? undefined @@ -2371,8 +2376,8 @@ function createWrappedSyncConfig< }, list: (prefix?: string) => { const merged = new Map( - params.metadata!.collection - .list() + params + .metadata!.collection.list() .map(({ key, value }) => [key, value]), ) const openTransaction = getOpenTransaction() @@ -2463,7 +2468,9 @@ function createWrappedSyncConfig< return sourceResult } - sourceResult = normalizeSyncFnResult(sourceSyncConfig.sync(wrappedParams)) + sourceResult = normalizeSyncFnResult( + sourceSyncConfig.sync(wrappedParams), + ) return sourceResult })() diff --git a/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts b/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts index 92beffe6f..48b3eb820 100644 --- a/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts +++ b/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts @@ -1148,24 +1148,26 @@ export class SQLiteCorePersistenceAdapter< ) const currentRowVersion = versionRows[0]?.latest_row_version ?? 0 const nextRowVersion = Math.max(currentRowVersion + 1, tx.rowVersion) - const replayDelta: ReplayableTxDelta, TKey> | null = - tx.truncate - ? null - : { - txId: tx.txId, - latestRowVersion: nextRowVersion, - changedRows: tx.mutations - .filter((mutation) => mutation.type !== `delete`) - .map((mutation) => ({ - key: mutation.key, - value: mutation.value as Record, - })), - deletedKeys: tx.mutations - .filter((mutation) => mutation.type === `delete`) - .map((mutation) => mutation.key), - rowMetadataMutations: tx.rowMetadataMutations ?? [], - collectionMetadataMutations: tx.collectionMetadataMutations ?? [], - } + const replayDelta: ReplayableTxDelta< + Record, + TKey + > | null = tx.truncate + ? null + : { + txId: tx.txId, + latestRowVersion: nextRowVersion, + changedRows: tx.mutations + .filter((mutation) => mutation.type !== `delete`) + .map((mutation) => ({ + key: mutation.key, + value: mutation.value as Record, + })), + deletedKeys: tx.mutations + .filter((mutation) => mutation.type === `delete`) + .map((mutation) => mutation.key), + rowMetadataMutations: tx.rowMetadataMutations ?? [], + collectionMetadataMutations: tx.collectionMetadataMutations ?? [], + } if (tx.truncate) { await transactionDriver.run(`DELETE FROM ${collectionTableSql}`) @@ -1518,38 +1520,38 @@ export class SQLiteCorePersistenceAdapter< const [changedRows, deletedRows, latestVersionRows, replayRows] = await Promise.all([ - this.driver.query<{ key: string }>( - `SELECT key + this.driver.query<{ key: string }>( + `SELECT key FROM ${collectionTableSql} WHERE row_version > ?`, - [fromRowVersion], - ), - this.driver.query<{ key: string }>( - `SELECT key + [fromRowVersion], + ), + this.driver.query<{ key: string }>( + `SELECT key FROM ${tombstoneTableSql} WHERE row_version > ?`, - [fromRowVersion], - ), - this.driver.query<{ latest_row_version: number }>( - `SELECT latest_row_version + [fromRowVersion], + ), + this.driver.query<{ latest_row_version: number }>( + `SELECT latest_row_version FROM collection_version WHERE collection_id = ? LIMIT 1`, - [collectionId], - ), - this.driver.query<{ - tx_id: string - row_version: number - replay_json: string | null - replay_requires_full_reload: number - }>( - `SELECT tx_id, row_version, replay_json, replay_requires_full_reload + [collectionId], + ), + this.driver.query<{ + tx_id: string + row_version: number + replay_json: string | null + replay_requires_full_reload: number + }>( + `SELECT tx_id, row_version, replay_json, replay_requires_full_reload FROM applied_tx WHERE collection_id = ? AND row_version > ? ORDER BY term ASC, seq ASC`, - [collectionId, fromRowVersion], - ), - ]) + [collectionId, fromRowVersion], + ), + ]) const latestRowVersion = latestVersionRows[0]?.latest_row_version ?? 0 const changedKeyCount = changedRows.length + deletedRows.length @@ -1584,9 +1586,10 @@ export class SQLiteCorePersistenceAdapter< } const deltas = replayRows.map((row) => { - const parsed = JSON.parse( - row.replay_json ?? `null`, - ) as ReplayableTxDelta, TKey> | null + const parsed = JSON.parse(row.replay_json ?? `null`) as ReplayableTxDelta< + Record, + TKey + > | null if (!parsed) { throw new InvalidPersistedCollectionConfigError( `missing replay payload for applied_tx row`, diff --git a/packages/db-sqlite-persisted-collection-core/tests/persisted.test.ts b/packages/db-sqlite-persisted-collection-core/tests/persisted.test.ts index 114b3d4bd..2ccd3a721 100644 --- a/packages/db-sqlite-persisted-collection-core/tests/persisted.test.ts +++ b/packages/db-sqlite-persisted-collection-core/tests/persisted.test.ts @@ -1,9 +1,5 @@ import { describe, expect, it } from 'vitest' -import { - IR, - createCollection, - createTransaction, -} from '@tanstack/db' +import { IR, createCollection, createTransaction } from '@tanstack/db' import { InvalidPersistedCollectionCoordinatorError, InvalidPersistedStorageKeyEncodingError, diff --git a/packages/electric-db-collection/src/electric.ts b/packages/electric-db-collection/src/electric.ts index f446e11da..9761feca4 100644 --- a/packages/electric-db-collection/src/electric.ts +++ b/packages/electric-db-collection/src/electric.ts @@ -1224,10 +1224,7 @@ function createElectricSync>( } } - if ( - record.kind === `reset` && - typeof record.updatedAt === `number` - ) { + if (record.kind === `reset` && typeof record.updatedAt === `number`) { return { kind: `reset` as const, updatedAt: record.updatedAt, @@ -1315,9 +1312,7 @@ function createElectricSync>( : undefined), handle: shapeOptions.handle ?? - (canUsePersistedResume - ? persistedResumeState.handle - : undefined), + (canUsePersistedResume ? persistedResumeState.handle : undefined), signal: abortController.signal, onError: (errorParams) => { // Just immediately mark ready if there's an error to avoid blocking diff --git a/packages/electric-db-collection/tests/electric.test.ts b/packages/electric-db-collection/tests/electric.test.ts index d9259e863..7824ae4ca 100644 --- a/packages/electric-db-collection/tests/electric.test.ts +++ b/packages/electric-db-collection/tests/electric.test.ts @@ -93,7 +93,9 @@ describe(`Electric Integration`, () => { } } - const createPersistedAdapter = (collectionMetadata?: Map) => ({ + const createPersistedAdapter = ( + collectionMetadata?: Map, + ) => ({ loadSubset: async () => [], loadCollectionMetadata: async () => Array.from((collectionMetadata ?? new Map()).entries()).map( @@ -3349,9 +3351,7 @@ describe(`Electric Integration`, () => { handle: undefined, }), ) - expect( - metadataHarness.collectionMetadata.get(`electric:resume`), - ).toEqual( + expect(metadataHarness.collectionMetadata.get(`electric:resume`)).toEqual( expect.objectContaining({ kind: `reset`, }), @@ -3390,9 +3390,7 @@ describe(`Electric Integration`, () => { }, ]) - expect( - metadataHarness.collectionMetadata.get(`electric:resume`), - ).toEqual( + expect(metadataHarness.collectionMetadata.get(`electric:resume`)).toEqual( expect.objectContaining({ kind: `reset`, }), @@ -3446,9 +3444,7 @@ describe(`Electric Integration`, () => { }, ]) - expect( - metadataHarness.collectionMetadata.get(`electric:resume`), - ).toEqual( + expect(metadataHarness.collectionMetadata.get(`electric:resume`)).toEqual( expect.objectContaining({ kind: `resume`, offset: `10_0`, diff --git a/packages/query-db-collection/src/query.ts b/packages/query-db-collection/src/query.ts index 105e1a8a9..62efb144e 100644 --- a/packages/query-db-collection/src/query.ts +++ b/packages/query-db-collection/src/query.ts @@ -240,14 +240,15 @@ type PersistedScannedRowForQuery = { metadata?: unknown } -type QuerySyncMetadataWithPersistedScan = - SyncMetadataApi & { - row: SyncMetadataApi[`row`] & { - scanPersisted?: (options?: { - metadataOnly?: boolean - }) => Promise>> - } +type QuerySyncMetadataWithPersistedScan = SyncMetadataApi< + string | number +> & { + row: SyncMetadataApi[`row`] & { + scanPersisted?: (options?: { + metadataOnly?: boolean + }) => Promise>> } +} /** * Implementation class for QueryCollectionUtils with explicit dependency injection @@ -859,16 +860,18 @@ export function queryCollectionOptions( string | number, { value: any; owners: Set } >() - getHydratedOwnedRowsForQueryBaseline(hashedQueryKey).forEach((rowKey) => { - const value = collection.get(rowKey) - const owners = rowToQueries.get(rowKey) - if (value && owners) { - baseline.set(rowKey, { - value, - owners: new Set(owners), - }) - } - }) + getHydratedOwnedRowsForQueryBaseline(hashedQueryKey).forEach( + (rowKey) => { + const value = collection.get(rowKey) + const owners = rowToQueries.get(rowKey) + if (value && owners) { + baseline.set(rowKey, { + value, + owners: new Set(owners), + }) + } + }, + ) return baseline } @@ -879,8 +882,7 @@ export function queryCollectionOptions( const scannedRows = await scanPersisted() scannedRows.forEach((row) => { - const rowMetadata = - row.metadata as Record | undefined + const rowMetadata = row.metadata as Record | undefined const queryMetadata = rowMetadata?.queryCollection if (!queryMetadata || typeof queryMetadata !== `object`) { return @@ -914,9 +916,7 @@ export function queryCollectionOptions( return baseline } - const cleanupPersistedPlaceholder = async ( - hashedQueryKey: string, - ) => { + const cleanupPersistedPlaceholder = async (hashedQueryKey: string) => { if (!metadata) { return } @@ -939,7 +939,9 @@ export function queryCollectionOptions( write({ type: `delete`, value: row }) }) - metadata.collection.delete(`${QUERY_COLLECTION_GC_PREFIX}${hashedQueryKey}`) + metadata.collection.delete( + `${QUERY_COLLECTION_GC_PREFIX}${hashedQueryKey}`, + ) commit() } @@ -982,7 +984,9 @@ export function queryCollectionOptions( return } - const retentionEntries = metadata.collection.list(QUERY_COLLECTION_GC_PREFIX) + const retentionEntries = metadata.collection.list( + QUERY_COLLECTION_GC_PREFIX, + ) const now = Date.now() for (const { key, value } of retentionEntries) { @@ -1238,7 +1242,9 @@ export function queryCollectionOptions( begin() if (metadata) { - metadata.collection.delete(`${QUERY_COLLECTION_GC_PREFIX}${hashedQueryKey}`) + metadata.collection.delete( + `${QUERY_COLLECTION_GC_PREFIX}${hashedQueryKey}`, + ) } previouslyOwnedRows.forEach((key) => { @@ -1287,7 +1293,8 @@ export function queryCollectionOptions( result: QueryObserverResult, ) => { const hashedQueryKey = hashKey(queryKey) - const persistedBaseline = await loadPersistedBaselineForQuery(hashedQueryKey) + const persistedBaseline = + await loadPersistedBaselineForQuery(hashedQueryKey) if (collection.status === `cleaned-up`) { return } @@ -1515,14 +1522,14 @@ export function queryCollectionOptions( metadata.collection.set( `${QUERY_COLLECTION_GC_PREFIX}${hashedQueryKey}`, { - queryHash: hashedQueryKey, - mode: - persistedGcTime === Number.POSITIVE_INFINITY - ? `until-revalidated` - : `ttl`, - ...(persistedGcTime === Number.POSITIVE_INFINITY - ? {} - : { expiresAt: Date.now() + persistedGcTime }), + queryHash: hashedQueryKey, + mode: + persistedGcTime === Number.POSITIVE_INFINITY + ? `until-revalidated` + : `ttl`, + ...(persistedGcTime === Number.POSITIVE_INFINITY + ? {} + : { expiresAt: Date.now() + persistedGcTime }), }, ) commit() diff --git a/packages/query-db-collection/tests/query.test.ts b/packages/query-db-collection/tests/query.test.ts index 44c7938ba..42171a58e 100644 --- a/packages/query-db-collection/tests/query.test.ts +++ b/packages/query-db-collection/tests/query.test.ts @@ -4826,7 +4826,9 @@ describe(`QueryCollection`, () => { const liveQuery = createLiveQueryCollection({ query: (q) => - q.from({ item: collection }).where(({ item }) => eq(item.category, `A`)), + q + .from({ item: collection }) + .where(({ item }) => eq(item.category, `A`)), }) await liveQuery.preload() From 9c5c4e6ae443d0f32bc472bf433f70bdec36da9b Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Tue, 17 Mar 2026 21:54:25 +0000 Subject: [PATCH 11/12] fix: encode replay deltas with persisted serializer Use the persisted JSON encoder for replay payloads so bigint and date values survive applied_tx serialization and package-level SQLite adapter tests pass under the CLI runtime. Made-with: Cursor --- .../src/sqlite-core-adapter.ts | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts b/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts index 48b3eb820..fd3849c79 100644 --- a/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts +++ b/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts @@ -616,7 +616,7 @@ function decodeStoredSqliteRows( } function stableStringify(value: unknown): string { - return JSON.stringify(value) + return serializePersistedRowValue(value) } function compileSqlExpression( @@ -1586,10 +1586,9 @@ export class SQLiteCorePersistenceAdapter< } const deltas = replayRows.map((row) => { - const parsed = JSON.parse(row.replay_json ?? `null`) as ReplayableTxDelta< - Record, - TKey - > | null + const parsed = deserializePersistedRowValue< + ReplayableTxDelta, TKey> | null + >(row.replay_json ?? `null`) if (!parsed) { throw new InvalidPersistedCollectionConfigError( `missing replay payload for applied_tx row`, From a1a2f2ef71446f53d04348e19956d5ff660204e9 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Tue, 17 Mar 2026 21:56:25 +0000 Subject: [PATCH 12/12] ci: apply automated fixes --- .../src/sqlite-core-adapter.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts b/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts index fd3849c79..78748e849 100644 --- a/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts +++ b/packages/db-sqlite-persisted-collection-core/src/sqlite-core-adapter.ts @@ -1586,9 +1586,10 @@ export class SQLiteCorePersistenceAdapter< } const deltas = replayRows.map((row) => { - const parsed = deserializePersistedRowValue< - ReplayableTxDelta, TKey> | null - >(row.replay_json ?? `null`) + const parsed = deserializePersistedRowValue, + TKey + > | null>(row.replay_json ?? `null`) if (!parsed) { throw new InvalidPersistedCollectionConfigError( `missing replay payload for applied_tx row`,