Skip to content

Commit b1f58e0

Browse files
fix: hold lock during dead-connection replacement to prevent size race
The dead-connection replacement path decremented _size and called _create_connection() without holding self._lock. During the await, another coroutine could enter the lock-protected creation path, see _size < max_size, and create an extra connection -- exceeding max_size. Now both the decrement and re-creation are under the lock. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent ccf1061 commit b1f58e0

File tree

2 files changed

+46
-2
lines changed

2 files changed

+46
-2
lines changed

src/dqliteclient/pool.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,9 @@ async def acquire(self) -> AsyncIterator[DqliteConnection]:
103103

104104
# If connection is dead, discard and create a fresh one with leader discovery
105105
if not conn.is_connected:
106-
self._size -= 1
107-
conn = await self._create_connection()
106+
async with self._lock:
107+
self._size -= 1
108+
conn = await self._create_connection()
108109

109110
self._in_use.add(conn)
110111
try:

tests/test_pool.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,49 @@ async def test_acquire_timeout_when_pool_exhausted(self) -> None:
156156
pass
157157

158158

159+
async def test_dead_conn_replacement_respects_max_size(self) -> None:
160+
"""Dead-connection replacement must not allow _size to exceed max_size."""
161+
pool = ConnectionPool(["localhost:9001"], min_size=1, max_size=2)
162+
163+
dead_conn = MagicMock()
164+
dead_conn.is_connected = False
165+
dead_conn.connect = AsyncMock()
166+
dead_conn.close = AsyncMock()
167+
168+
with patch.object(pool._cluster, "connect", return_value=dead_conn):
169+
await pool.initialize()
170+
171+
assert pool._size == 1
172+
173+
# Track whether the lock is held during dead-conn replacement.
174+
# If _create_connection is called while the lock is NOT held,
175+
# another coroutine could race and exceed max_size.
176+
lock_was_held_during_create = False
177+
original_create = pool._create_connection
178+
179+
async def tracking_create():
180+
nonlocal lock_was_held_during_create
181+
if pool._lock.locked():
182+
lock_was_held_during_create = True
183+
return await original_create()
184+
185+
new_conn = MagicMock()
186+
new_conn.is_connected = True
187+
new_conn.connect = AsyncMock()
188+
new_conn.close = AsyncMock()
189+
190+
pool._create_connection = tracking_create
191+
with patch.object(pool._cluster, "connect", return_value=new_conn):
192+
async with pool.acquire() as conn:
193+
assert conn is new_conn
194+
195+
assert lock_was_held_during_create, (
196+
"Dead-connection replacement must hold the lock to prevent "
197+
"_size race with concurrent acquire() calls"
198+
)
199+
200+
await pool.close()
201+
159202
async def test_dead_connection_triggers_leader_rediscovery(self) -> None:
160203
"""A dead connection should be replaced via leader discovery, not reconnected."""
161204
pool = ConnectionPool(["localhost:9001"], max_size=1)

0 commit comments

Comments
 (0)