diff --git a/.changeset/calm-batches-track.md b/.changeset/calm-batches-track.md new file mode 100644 index 000000000000..9410799efcc2 --- /dev/null +++ b/.changeset/calm-batches-track.md @@ -0,0 +1,10 @@ +--- +"@fluidframework/container-runtime": minor +"__section": other +--- + +Remove the `Fluid.ContainerRuntime.DisableBatchIdTracking` config kill-switch and gate batchId tracking on the Offline Load opt-in + +The internal `Fluid.ContainerRuntime.DisableBatchIdTracking` config flag has been removed. It was previously used as a kill-switch to suppress batchId stamping and `DuplicateBatchDetector` activation when both `FlushMode.TurnBased` and grouped batching were enabled. The flag is no longer needed: batchId tracking is now enabled iff the Offline Load feature is opted into via `Fluid.Container.enableOfflineFull`, which is also the natural off-ramp if a regression is observed. + +Containers that do not opt into Offline Load no longer run `DuplicateBatchDetector`. Forked-container duplicate detection now requires the Offline Load opt-in. diff --git a/packages/runtime/container-runtime/src/containerRuntime.ts b/packages/runtime/container-runtime/src/containerRuntime.ts index 9dec4133dc03..724242905724 100644 --- a/packages/runtime/container-runtime/src/containerRuntime.ts +++ b/packages/runtime/container-runtime/src/containerRuntime.ts @@ -1455,13 +1455,13 @@ export class ContainerRuntime private readonly _flushMode: FlushMode; private readonly stagingModeAutoFlushThreshold: number; /** - * BatchId tracking is needed whenever there's a possibility of a "forked Container", - * where the same local state is pending in two different running Containers, each of - * which is trying to ensure it's persisted. - * "Offline Load" from serialized pending state is one such scenario since two Containers - * could load from the same serialized pending state. + * Whether the "Offline Load" feature is opted into via Fluid.Container.enableOfflineFull. + * This gates batchId stamping on resubmit and DuplicateBatchDetector, both of which are + * only meaningful when "forked Container" scenarios are possible (i.e. when the same + * local state is pending in two different running Containers loaded from the same + * serialized pending state). */ - private readonly batchIdTrackingEnabled: boolean; + private readonly offlineEnabled: boolean; private flushScheduled = false; private canSendOps: boolean; @@ -1890,39 +1890,28 @@ export class ContainerRuntime this.mc.config.getNumber("Fluid.ContainerRuntime.StagingModeAutoFlushThreshold") ?? runtimeOptions.stagingModeAutoFlushThreshold ?? defaultStagingModeAutoFlushThreshold; - // BatchId tracking powers DuplicateBatchDetector (catching forked-container duplicates) - // and is also a prerequisite for the Offline Load feature. It is enabled by default - // when both TurnBased flush mode and grouped batching are active; the kill-switch - // below allows disabling it without a code change if a regression is observed. - // Grouped batching is required because resubmits can produce empty batches that must - // still be sent on the wire as a placeholder grouped batch to preserve their batchId - // (see OpGroupingManager.createEmptyGroupedBatch / outbox.flushEmptyBatch). - // Offline Load requires both prerequisites, so a consumer that opts into it without - // them gets an explicit UsageError rather than silent degradation. - const offlineLoadRequested = + // Offline Load requires both TurnBased flush mode and grouped batching; grouped + // batching matters because resubmits can produce empty batches that must still be + // sent as a placeholder grouped batch to preserve their batchId (see + // OpGroupingManager.createEmptyGroupedBatch / outbox.flushEmptyBatch). + this.offlineEnabled = this.mc.config.getBoolean("Fluid.Container.enableOfflineFull") === true; - const disableBatchIdTracking = - this.mc.config.getBoolean("Fluid.ContainerRuntime.DisableBatchIdTracking") === true; - if (offlineLoadRequested && this._flushMode !== FlushMode.TurnBased) { + if (this.offlineEnabled && this._flushMode !== FlushMode.TurnBased) { const error = new UsageError("Offline mode is only supported in turn-based mode"); this.closeFn(error); throw error; } - if (offlineLoadRequested && !this.groupedBatchingEnabled) { + if (this.offlineEnabled && !this.groupedBatchingEnabled) { const error = new UsageError("Offline mode requires grouped batching to be enabled"); this.closeFn(error); throw error; } - this.batchIdTrackingEnabled = - !disableBatchIdTracking && - this._flushMode === FlushMode.TurnBased && - this.groupedBatchingEnabled; - // DuplicateBatchDetector maintains a cache of all batchIds/sequenceNumbers within the - // collab window. Skip allocating it when batchId tracking is off. - if (this.batchIdTrackingEnabled) { + // collab window. It is only needed in the Offline Load scenario (catching forked- + // container duplicates), so skip allocating it otherwise. + if (this.offlineEnabled) { this.duplicateBatchDetector = new DuplicateBatchDetector(recentBatchInfo); } @@ -4995,7 +4984,7 @@ export class ContainerRuntime const resubmitInfo = { // Only include Batch ID if "Offline Load" feature is enabled // It's only needed to identify batches across container forks arising from misuse of offline load. - batchId: this.batchIdTrackingEnabled ? batchId : undefined, + batchId: this.offlineEnabled ? batchId : undefined, staged, }; diff --git a/packages/runtime/container-runtime/src/test/containerRuntime.spec.ts b/packages/runtime/container-runtime/src/test/containerRuntime.spec.ts index 93e78c4317a2..a1740594d16e 100644 --- a/packages/runtime/container-runtime/src/test/containerRuntime.spec.ts +++ b/packages/runtime/container-runtime/src/test/containerRuntime.spec.ts @@ -436,8 +436,13 @@ describe("Runtime", () => { let batchBegin = 0; let batchEnd = 0; let callsToEnsure = 0; + // Empty-batch placeholder resubmit is part of the Offline Load feature + // (it preserves a forked container's view of the batchId), so this test + // needs to opt into it. const { runtime: containerRuntime } = await ContainerRuntime.loadRuntime2({ - context: getMockContext() as IContainerContext, + context: getMockContext({ + settings: { "Fluid.Container.enableOfflineFull": true }, + }) as IContainerContext, registry: new FluidDataStoreRegistry([]), existing: false, runtimeOptions: {}, @@ -477,13 +482,14 @@ describe("Runtime", () => { assert.strictEqual(containerRuntime.isDirty, false); }); - // BatchId tracking is on by default (TurnBased mode); the kill-switch suppresses stamping. + // BatchId tracking (and thus batchId stamping on resubmit) is enabled iff + // the Offline Load feature is opted into via Fluid.Container.enableOfflineFull. for (const variant of [ - { name: "default (no settings)", settings: {}, expectStamped: true }, + { name: "default (no settings)", settings: {}, expectStamped: false }, { - name: "kill-switch active", - settings: { "Fluid.ContainerRuntime.DisableBatchIdTracking": true }, - expectStamped: false, + name: "Offline Load enabled", + settings: { "Fluid.Container.enableOfflineFull": true }, + expectStamped: true, }, ]) it(`Replaying ops should resend in correct order, with batch ID if applicable (${variant.name})`, async () => { @@ -2357,13 +2363,14 @@ describe("Runtime", () => { }); describe("Duplicate Batch Detection", () => { - // BatchId tracking is on by default (TurnBased); the kill-switch suppresses detection. + // BatchId tracking (and thus duplicate batch detection) is enabled iff the + // Offline Load feature is opted into via Fluid.Container.enableOfflineFull. for (const variant of [ - { name: "default (no settings)", settings: {}, expectDetection: true }, + { name: "default (no settings)", settings: {}, expectDetection: false }, { - name: "kill-switch active", - settings: { "Fluid.ContainerRuntime.DisableBatchIdTracking": true }, - expectDetection: false, + name: "Offline Load enabled", + settings: { "Fluid.Container.enableOfflineFull": true }, + expectDetection: true, }, ]) { it(`DuplicateBatchDetector reflects batch-id tracking enablement (${variant.name})`, async () => { @@ -2413,7 +2420,7 @@ describe("Runtime", () => { }); } - it("Default-on tracking is silently skipped in FlushMode.Immediate (no UsageError)", async () => { + it("Without Offline Load opt-in, no tracking happens in FlushMode.Immediate", async () => { const { runtime: containerRuntime } = await ContainerRuntime.loadRuntime2({ context: getMockContext() as IContainerContext, registry: new FluidDataStoreRegistry([]), @@ -2424,7 +2431,8 @@ describe("Runtime", () => { }, provideEntryPoint: mockProvideEntryPoint, }); - // Sending a duplicate batchId should not throw because tracking is inactive in Immediate mode. + // Sending a duplicate batchId should not throw because tracking is inactive without + // the Offline Load opt-in. containerRuntime.process( { sequenceNumber: 123, @@ -2474,10 +2482,7 @@ describe("Runtime", () => { assert.strictEqual(containerErrors.length, 1); }); - // Regression: enabling default-on batchId tracking when grouped batching is disabled - // asserts 0xa00 in OpGroupingManager.createEmptyGroupedBatch the first time a resubmit - // produces an empty batch. Tracking must be silently skipped in that configuration. - it("Default-on tracking is silently skipped when grouped batching is disabled", async () => { + it("Without Offline Load opt-in, no tracking happens when grouped batching is disabled", async () => { const { runtime: containerRuntime } = await ContainerRuntime.loadRuntime2({ context: getMockContext() as IContainerContext, registry: new FluidDataStoreRegistry([]), @@ -2488,8 +2493,8 @@ describe("Runtime", () => { }, provideEntryPoint: mockProvideEntryPoint, }); - // Sending a duplicate batchId should not throw because tracking is inactive - // when grouped batching is off. + // Sending a duplicate batchId should not throw because tracking is inactive without + // the Offline Load opt-in (the grouped-batching-off setting is incidental). containerRuntime.process( { sequenceNumber: 123, @@ -2540,9 +2545,11 @@ describe("Runtime", () => { }); it("Can roundtrip DuplicateBatchDetector state through summary/snapshot", async () => { - // Duplicate Batch Detection is on by default in TurnBased mode. + // Duplicate Batch Detection is enabled by opting into Offline Load. const { runtime: containerRuntime } = await ContainerRuntime.loadRuntime2({ - context: getMockContext() as IContainerContext, + context: getMockContext({ + settings: { "Fluid.Container.enableOfflineFull": true }, + }) as IContainerContext, registry: new FluidDataStoreRegistry([]), existing: false, runtimeOptions: { @@ -2574,6 +2581,7 @@ describe("Runtime", () => { }; const { runtime: containerRuntime2 } = await ContainerRuntime.loadRuntime2({ context: getMockContext({ + settings: { "Fluid.Container.enableOfflineFull": true }, baseSnapshot: { trees: {}, blobs: { [recentBatchInfoBlobName]: "nonempty_id_ignored_by_mockStorage" }, diff --git a/packages/test/test-end-to-end-tests/src/test/fewerBatches.spec.ts b/packages/test/test-end-to-end-tests/src/test/fewerBatches.spec.ts index 059c75ccd24a..7d923e1da455 100644 --- a/packages/test/test-end-to-end-tests/src/test/fewerBatches.spec.ts +++ b/packages/test/test-end-to-end-tests/src/test/fewerBatches.spec.ts @@ -165,12 +165,10 @@ describeCompat("Fewer batches", "NoCompat", (getTestObjectProvider, apis) => { expectedErrors, async () => { // The fake sequenced op injected below reuses a sequence number that the - // DuplicateBatchDetector has already seen, which would trip its invariants and - // short-circuit the flow before the deltaManager's non-Sequential check fires. - // Disable batchId tracking here so the test can exercise its actual scenario. - await processOutOfOrderOp({ - "Fluid.ContainerRuntime.DisableBatchIdTracking": true, - }); + // DuplicateBatchDetector would catch — but tracking is gated on the Offline + // Load opt-in (Fluid.Container.enableOfflineFull), which this test does not + // set, so the detector is inactive and the test exercises its actual scenario. + await processOutOfOrderOp(); assert.strictEqual(capturedBatches.length, 2); }, ); diff --git a/packages/tools/replay-tool/src/helpers.ts b/packages/tools/replay-tool/src/helpers.ts index d4063d2b8b12..c769e28092c6 100644 --- a/packages/tools/replay-tool/src/helpers.ts +++ b/packages/tools/replay-tool/src/helpers.ts @@ -200,6 +200,8 @@ export async function loadContainer( "Fluid.Container.summarizeProtocolTree2": true, // This is to align with the snapshot tests which may upgrade GC Version before the default is changed. "Fluid.GarbageCollection.GCVersionUpgradeToV4": false, + // manually enable offline full mode while this flag is still not true by default. + "Fluid.Container.enableOfflineFull": true, }; const configProvider: IConfigProviderBase = { getRawConfig: (name: string): ConfigTypes => settings[name],