From bde0a0a660d5bdd0cb8fdbffe3fd8e55b2220cfa Mon Sep 17 00:00:00 2001 From: Rushil Mehra Date: Mon, 2 Feb 2026 11:43:38 -0500 Subject: [PATCH] Fix race condition with sending a request while container is stopping Due to some quirks in the runtime, it's possible for the DO to send a request to a container when it thinks the container is in a running state, but while the request is in flight, the container stops and the monitor promise resolves. This results in an error, and instead of retrying we throw a 500 error. Instead, recognize this case and restart the container. This is a bandage solution, but we will follow up with some improvements to the runtime that will clean up the state management required in this DO class. --- src/lib/container.ts | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/lib/container.ts b/src/lib/container.ts index d049dd5..79509a4 100644 --- a/src/lib/container.ts +++ b/src/lib/container.ts @@ -77,6 +77,14 @@ const isRuntimeSignalledError = (error: unknown): boolean => const isNotListeningError = (error: unknown): boolean => isErrorOfType(error, NOT_LISTENING_ERROR); const isContainerExitNonZeroError = (error: unknown): boolean => isErrorOfType(error, UNEXPECTED_EXIT_ERROR); +const isContainerNotRunningError = (error: unknown): boolean => { + const patterns = [ + 'the container is not running', + 'not expected to be running', + 'consider calling start()', + ]; + return patterns.some(pattern => isErrorOfType(error, pattern)); +}; function getExitCodeFromError(error: unknown): number | null { if (!(error instanceof Error)) { @@ -721,6 +729,25 @@ export class Container extends DurableObject { throw e; } + // If container stopped during the request (e.g., sleepAfter expired), restart and retry + if (!this.container.running || isContainerNotRunningError(e)) { + try { + await this.startAndWaitForPorts(port); + return await tcpPort.fetch(containerUrl, request); + } catch (retryError) { + if (isNoInstanceError(retryError)) { + return new Response( + 'There is no Container instance available at this time.\nThis is likely because you have reached your max concurrent instance count (set in wrangler config) or are you currently provisioning the Container.\nIf you are deploying your Container for the first time, check your dashboard to see provisioning status, this may take a few minutes.', + { status: 503 } + ); + } + return new Response( + `Failed to restart container: ${retryError instanceof Error ? retryError.message : String(retryError)}`, + { status: 500 } + ); + } + } + // This error means that the container might've just restarted if (e.message.includes('Network connection lost.')) { return new Response('Container suddenly disconnected, try again', { status: 500 });