Skip to content
22 changes: 22 additions & 0 deletions API-INTERNAL.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@ If the requested key is a collection, it will return an object with all the coll
<dt><a href="#remove">remove()</a></dt>
<dd><p>Remove a key from Onyx and update the subscribers</p>
</dd>
<dt><a href="#wait">wait()</a></dt>
<dd><p>Returns a promise that resolves after the given number of milliseconds.</p>
</dd>
<dt><a href="#getRetryDelay">getRetryDelay()</a></dt>
<dd><p>Calculates exponential backoff delay with jitter for a given retry attempt.
Formula: baseDelay * 2^attempt ± jitter
Attempt 0: ~100ms, Attempt 1: ~200ms, ..., Attempt 4: ~1600ms</p>
</dd>
<dt><a href="#retryOperation">retryOperation()</a></dt>
<dd><p>Handles storage operation failures based on the error type:</p>
<ul>
Expand Down Expand Up @@ -337,6 +345,20 @@ Gets the data for a given an array of matching keys, combines them into an objec
## remove()
Remove a key from Onyx and update the subscribers

**Kind**: global function
<a name="wait"></a>

## wait()
Returns a promise that resolves after the given number of milliseconds.

**Kind**: global function
<a name="getRetryDelay"></a>

## getRetryDelay()
Calculates exponential backoff delay with jitter for a given retry attempt.
Formula: baseDelay * 2^attempt ± jitter
Attempt 0: ~100ms, Attempt 1: ~200ms, ..., Attempt 4: ~1600ms

**Kind**: global function
<a name="retryOperation"></a>

Expand Down
64 changes: 61 additions & 3 deletions lib/OnyxUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,24 @@ const STORAGE_ERRORS = [...IDB_STORAGE_ERRORS, ...SQLITE_STORAGE_ERRORS];
// Max number of retries for failed storage operations
const MAX_STORAGE_OPERATION_RETRY_ATTEMPTS = 5;

// Connection/state errors where the DB needs time to recover — backoff helps, eviction does not
const IDB_CONNECTION_ERRORS = [
'internal error opening backing store', // Chrome/Edge: corrupted IDB state
'connection to indexed database server lost', // Safari: IDB connection dropped
'the database connection is closing', // Cross-browser: DB closing during write
] as const;

const SQLITE_CONNECTION_ERRORS = [
'disk i/o error', // Native: filesystem/device stress
'database is locked', // Native: concurrent access contention
] as const;

const CONNECTION_ERRORS = [...IDB_CONNECTION_ERRORS, ...SQLITE_CONNECTION_ERRORS];

// Retry backoff configuration
const RETRY_BASE_DELAY_MS = 100;
const RETRY_JITTER_FACTOR = 0.25;

type OnyxMethod = ValueOf<typeof METHOD>;

// Key/value store of Onyx key and arrays of values to merge
Expand Down Expand Up @@ -763,6 +781,26 @@ function remove<TKey extends OnyxKey>(key: TKey, isProcessingCollectionUpdate?:
return Storage.removeItem(key).then(() => undefined);
}

/**
* Returns a promise that resolves after the given number of milliseconds.
*/
function wait(ms: number): Promise<void> {
return new Promise((resolve) => {
setTimeout(resolve, ms);
});
}

/**
* Calculates exponential backoff delay with jitter for a given retry attempt.
* Formula: baseDelay * 2^attempt ± jitter
* Attempt 0: ~100ms, Attempt 1: ~200ms, ..., Attempt 4: ~1600ms
*/
function getRetryDelay(attempt: number): number {
const baseDelay = RETRY_BASE_DELAY_MS * 2 ** attempt;
const jitter = baseDelay * RETRY_JITTER_FACTOR * (2 * Math.random() - 1);
return Math.max(0, Math.round(baseDelay + jitter));
}

function reportStorageQuota(error?: Error): Promise<void> {
return Storage.getDatabaseSize()
.then(({bytesUsed, bytesRemaining}) => {
Expand Down Expand Up @@ -793,15 +831,35 @@ function retryOperation<TMethod extends RetriableOnyxOperation>(error: Error, on
const errorMessage = error?.message?.toLowerCase?.();
const errorName = error?.name?.toLowerCase?.();
const isStorageCapacityError = STORAGE_ERRORS.some((storageError) => errorName?.includes(storageError) || errorMessage?.includes(storageError));
const isConnectionError = CONNECTION_ERRORS.some((connError) => errorName?.includes(connError) || errorMessage?.includes(connError));

if (nextRetryAttempt > MAX_STORAGE_OPERATION_RETRY_ATTEMPTS) {
Logger.logAlert(`Storage operation failed after 5 retries. Error: ${error}. onyxMethod: ${onyxMethod.name}.`);
if (isConnectionError) {
Logger.logAlert(`Connection error exhausted all retries with backoff. Error: ${error}. onyxMethod: ${onyxMethod.name}.`);
} else {
Logger.logAlert(`Storage operation failed after 5 retries. Error: ${error}. onyxMethod: ${onyxMethod.name}.`);
}
return Promise.resolve();
}

if (!isStorageCapacityError) {
// @ts-expect-error No overload matches this call.
return onyxMethod(defaultParams, nextRetryAttempt);
const delay = getRetryDelay(currentRetryAttempt);

if (isConnectionError) {
Logger.logInfo(
`Connection error detected, retrying with backoff (${delay}ms). Error: ${error}. onyxMethod: ${onyxMethod.name}. retryAttempt: ${nextRetryAttempt}/${MAX_STORAGE_OPERATION_RETRY_ATTEMPTS}`,
);
}

return wait(delay).then(() =>
// @ts-expect-error No overload matches this call.
Promise.resolve(onyxMethod(defaultParams, nextRetryAttempt)).then(() => {
if (!isConnectionError) {
return;
}
Logger.logInfo(`Connection error recovered after backoff on attempt ${nextRetryAttempt}/${MAX_STORAGE_OPERATION_RETRY_ATTEMPTS}. onyxMethod: ${onyxMethod.name}.`);
Comment on lines +856 to +860
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Avoid logging recovery when connection retries still exhaust

The new recovery log is emitted whenever onyxMethod(defaultParams, nextRetryAttempt) resolves, but retryOperation() itself resolves even after exhausting all retries, so a permanently failing connection error will still produce Connection error recovered... before/alongside the exhaustion alert. In practice this creates false-positive recovery telemetry for the experiment and can mislead rollout decisions, because exhausted retry chains are counted as successful recoveries.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This sounds valid in the case where we fail multiple times rather than just once?

}),
);
}

// Find the least recently accessed evictable key that we can remove
Expand Down
126 changes: 118 additions & 8 deletions tests/unit/onyxUtilsTest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -432,21 +432,35 @@ describe('OnyxUtils', () => {
const diskFullError = new Error('database or disk is full');

it('should retry only one time if the operation is firstly failed and then passed', async () => {
StorageMock.setItem = jest.fn(StorageMock.setItem).mockRejectedValueOnce(genericError).mockImplementation(StorageMock.setItem);
jest.useFakeTimers();
try {
StorageMock.setItem = jest.fn(StorageMock.setItem).mockRejectedValueOnce(genericError).mockImplementation(StorageMock.setItem);

await Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'});
const setPromise = Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'});
await jest.runAllTimersAsync();
await setPromise;

// Should be called once, since Storage.setItem if failed only once
expect(retryOperationSpy).toHaveBeenCalledTimes(1);
// Should be called once, since Storage.setItem failed only once
expect(retryOperationSpy).toHaveBeenCalledTimes(1);
} finally {
jest.useRealTimers();
}
});

it('should stop retrying after MAX_STORAGE_OPERATION_RETRY_ATTEMPTS retries for failing operation', async () => {
StorageMock.setItem = jest.fn().mockRejectedValue(genericError);
jest.useFakeTimers();
try {
StorageMock.setItem = jest.fn().mockRejectedValue(genericError);

await Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'});
const setPromise = Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'});
await jest.runAllTimersAsync();
await setPromise;

// Should be called 6 times: initial attempt + 5 retries (MAX_STORAGE_OPERATION_RETRY_ATTEMPTS)
expect(retryOperationSpy).toHaveBeenCalledTimes(6);
// Should be called 6 times: initial attempt + 5 retries (MAX_STORAGE_OPERATION_RETRY_ATTEMPTS)
expect(retryOperationSpy).toHaveBeenCalledTimes(6);
} finally {
jest.useRealTimers();
}
});

it("should throw error for if operation failed with \"Failed to execute 'put' on 'IDBObjectStore': invalid data\" error", async () => {
Expand Down Expand Up @@ -512,6 +526,102 @@ describe('OnyxUtils', () => {
await OnyxUtils.remove(evictableKey);
expect(OnyxCache.getKeyForEviction()).toBeUndefined();
});

it('should apply exponential backoff delay for non-capacity errors', async () => {
jest.useFakeTimers();
try {
const setTimeoutSpy = jest.spyOn(global, 'setTimeout');
StorageMock.setItem = jest.fn().mockRejectedValue(genericError);

const setPromise = Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'});
await jest.runAllTimersAsync();
await setPromise;

// Filter setTimeout calls to only those from our wait() helper (delay > 0)
const backoffDelays = setTimeoutSpy.mock.calls.map((call) => call[1]).filter((delay): delay is number => typeof delay === 'number' && delay > 0);

// Should have 5 backoff delays (one before each of the 5 retries, attempts 0-4)
// The 6th call to retryOperation (attempt 5) hits the MAX check and resolves without waiting
expect(backoffDelays).toHaveLength(5);

// Verify exponential growth pattern: each delay should be roughly double the previous
// With ±25% jitter, delay[n+1] / delay[n] should be between ~1.2 and ~3.3
for (let i = 1; i < backoffDelays.length; i++) {
const ratio = backoffDelays[i] / backoffDelays[i - 1];
expect(ratio).toBeGreaterThan(1.0);
expect(ratio).toBeLessThan(4.0);
}

setTimeoutSpy.mockRestore();
} finally {
jest.useRealTimers();
}
});

it('should log connection error with backoff delay info', async () => {
jest.useFakeTimers();
try {
const logInfoSpy = jest.spyOn(Logger, 'logInfo');
const connectionError = new Error('Connection to Indexed Database server lost. Refresh the page to try again');
StorageMock.setItem = jest.fn(StorageMock.setItem).mockRejectedValueOnce(connectionError).mockImplementation(StorageMock.setItem);

const setPromise = Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'});
await jest.runAllTimersAsync();
await setPromise;

expect(logInfoSpy).toHaveBeenCalledWith(expect.stringContaining('Connection error detected, retrying with backoff'));
expect(logInfoSpy).toHaveBeenCalledWith(expect.stringContaining('Connection to Indexed Database server lost'));
} finally {
jest.useRealTimers();
}
});

it('should log recovery when connection error succeeds after backoff', async () => {
jest.useFakeTimers();
try {
const logInfoSpy = jest.spyOn(Logger, 'logInfo');
const connectionError = new Error('Connection to Indexed Database server lost. Refresh the page to try again');
StorageMock.setItem = jest.fn(StorageMock.setItem).mockRejectedValueOnce(connectionError).mockImplementation(StorageMock.setItem);

const setPromise = Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'});
await jest.runAllTimersAsync();
await setPromise;

expect(logInfoSpy).toHaveBeenCalledWith(expect.stringContaining('Connection error recovered after backoff on attempt'));
} finally {
jest.useRealTimers();
}
});

it('should log connection-specific exhaustion message when all retries fail', async () => {
jest.useFakeTimers();
try {
const logAlertSpy = jest.spyOn(Logger, 'logAlert');
const connectionError = new Error('Connection to Indexed Database server lost. Refresh the page to try again');
StorageMock.setItem = jest.fn().mockRejectedValue(connectionError);

const setPromise = Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'});
await jest.runAllTimersAsync();
await setPromise;

expect(logAlertSpy).toHaveBeenCalledWith(expect.stringContaining('Connection error exhausted all retries with backoff'));
} finally {
jest.useRealTimers();
}
});

it('should NOT apply backoff delay for capacity errors (immediate retry with eviction)', async () => {
const setTimeoutSpy = jest.spyOn(global, 'setTimeout');
StorageMock.setItem = jest.fn().mockRejectedValue(diskFullError);

await Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'});

// Capacity errors should not trigger any backoff delays (delay > 0)
const backoffDelays = setTimeoutSpy.mock.calls.map((call) => call[1]).filter((delay): delay is number => typeof delay === 'number' && delay > 0);

expect(backoffDelays).toHaveLength(0);
setTimeoutSpy.mockRestore();
});
});

describe('storage eviction', () => {
Expand Down
Loading