From e293bc8ada37e7740877462658940cae5b28efa6 Mon Sep 17 00:00:00 2001 From: Allen Porter Date: Sat, 13 Dec 2025 21:53:55 -0800 Subject: [PATCH 1/5] fix: harden the initial startup logic We currently see "Initial connection attempt took longer than expected" and this is an attempt to see if the reason is due to uncaught exceptions. This adds some exception handling for uncaught exceptions to make sure the connection attempt gets fired. --- roborock/devices/device.py | 29 ++++++++++++++++++---------- tests/devices/test_device_manager.py | 28 +++++++++++++++++++++++++-- 2 files changed, 45 insertions(+), 12 deletions(-) diff --git a/roborock/devices/device.py b/roborock/devices/device.py index 87814391..bf974f47 100644 --- a/roborock/devices/device.py +++ b/roborock/devices/device.py @@ -147,34 +147,42 @@ async def start_connect(self) -> None: called. The device will automatically attempt to reconnect if the connection is lost. """ - start_attempt: asyncio.Event = asyncio.Event() + # We don't care about the value of the future, just that we wait long enough + # to let the first connection attempt happen. + start_attempt: asyncio.Future[bool] = asyncio.Future() async def connect_loop() -> None: - backoff = MIN_BACKOFF_INTERVAL try: + backoff = MIN_BACKOFF_INTERVAL while True: try: await self.connect() - start_attempt.set() self._has_connected = True self._ready_callbacks(self) - return + if not start_attempt.done(): + start_attempt.set_result(True) + break except RoborockException as e: - start_attempt.set() + if not start_attempt.done(): + start_attempt.set_result(False) self._logger.info("Failed to connect (retry %s): %s", backoff.total_seconds(), e) await asyncio.sleep(backoff.total_seconds()) backoff = min(backoff * BACKOFF_MULTIPLIER, MAX_BACKOFF_INTERVAL) + except Exception as e: # pylint: disable=broad-except + if not start_attempt.done(): + start_attempt.set_exception(e) + self._logger.exception("Unexpected error during connect: %s", e) + break except asyncio.CancelledError: + if not start_attempt.done(): + start_attempt.set_result(False) self._logger.debug("connect_loop was cancelled for device %s", self.duid) - # Clean exit on cancellation - return - finally: - start_attempt.set() self._connect_task = asyncio.create_task(connect_loop()) try: - await asyncio.wait_for(start_attempt.wait(), timeout=START_ATTEMPT_TIMEOUT.total_seconds()) + async with asyncio.timeout(START_ATTEMPT_TIMEOUT.total_seconds()): + await start_attempt except TimeoutError: self._logger.debug("Initial connection attempt took longer than expected, will keep trying in background") @@ -190,6 +198,7 @@ async def connect(self) -> None: except RoborockException: unsub() raise + self._logger.info("Connected to device") self._unsub = unsub async def close(self) -> None: diff --git a/tests/devices/test_device_manager.py b/tests/devices/test_device_manager.py index 239762bb..ef183afa 100644 --- a/tests/devices/test_device_manager.py +++ b/tests/devices/test_device_manager.py @@ -66,11 +66,17 @@ def mock_sleep() -> Generator[None, None, None]: yield +@pytest.fixture(name="channel_exception") +def channel_failure_exception_fixture(mock_rpc_channel: AsyncMock) -> Exception: + """Fixture that provides the exception to be raised by the failing channel.""" + return RoborockException("Connection failed") + + @pytest.fixture(name="channel_failure") -def channel_failure_fixture(mock_rpc_channel: AsyncMock) -> Generator[Mock, None, None]: +def channel_failure_fixture(mock_rpc_channel: AsyncMock, channel_exception: Exception) -> Generator[Mock, None, None]: """Fixture that makes channel subscribe fail.""" with patch("roborock.devices.device_manager.create_v1_channel") as mock_channel: - mock_channel.return_value.subscribe = AsyncMock(side_effect=RoborockException("Connection failed")) + mock_channel.return_value.subscribe = AsyncMock(side_effect=channel_exception) mock_channel.return_value.is_connected = False mock_channel.return_value.rpc_channel = mock_rpc_channel yield mock_channel @@ -192,6 +198,12 @@ async def test_ready_callback(home_data: HomeData) -> None: await device_manager.close() +@pytest.mark.parametrize( + ("channel_exception"), + [ + RoborockException("Connection failed"), + ], +) async def test_start_connect_failure(home_data: HomeData, channel_failure: Mock, mock_sleep: Mock) -> None: """Test that start_connect retries when connection fails.""" ready_devices: list[RoborockDevice] = [] @@ -231,3 +243,15 @@ async def test_start_connect_failure(home_data: HomeData, channel_failure: Mock, await device_manager.close() assert mock_unsub.call_count == 1 + + +@pytest.mark.parametrize( + ("channel_exception"), + [ + Exception("Unexpected error"), + ], +) +async def test_start_connect_unexpected_error(home_data: HomeData, channel_failure: Mock, mock_sleep: Mock) -> None: + """Test that some exceptions from start_connect are propagated.""" + with pytest.raises(Exception, match="Unexpected error"): + await create_device_manager(USER_PARAMS) From ae8eed6393ea8be3a8cd5cb4ec866ff9f41594aa Mon Sep 17 00:00:00 2001 From: Allen Porter Date: Sat, 13 Dec 2025 21:54:07 -0800 Subject: [PATCH 2/5] chore: fix logging --- roborock/devices/device.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roborock/devices/device.py b/roborock/devices/device.py index bf974f47..393b41c1 100644 --- a/roborock/devices/device.py +++ b/roborock/devices/device.py @@ -171,7 +171,7 @@ async def connect_loop() -> None: except Exception as e: # pylint: disable=broad-except if not start_attempt.done(): start_attempt.set_exception(e) - self._logger.exception("Unexpected error during connect: %s", e) + self._logger.exception("Uncaught error during connect: %s", e) break except asyncio.CancelledError: if not start_attempt.done(): From 91071abfa8f2c1ab61fc7974ce32bb38a8e36947 Mon Sep 17 00:00:00 2001 From: Allen Porter Date: Sat, 13 Dec 2025 21:56:45 -0800 Subject: [PATCH 3/5] chore: revert whitespace change --- roborock/devices/device.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/roborock/devices/device.py b/roborock/devices/device.py index 393b41c1..929eee36 100644 --- a/roborock/devices/device.py +++ b/roborock/devices/device.py @@ -157,10 +157,10 @@ async def connect_loop() -> None: while True: try: await self.connect() - self._has_connected = True - self._ready_callbacks(self) if not start_attempt.done(): start_attempt.set_result(True) + self._has_connected = True + self._ready_callbacks(self) break except RoborockException as e: if not start_attempt.done(): @@ -174,9 +174,10 @@ async def connect_loop() -> None: self._logger.exception("Uncaught error during connect: %s", e) break except asyncio.CancelledError: + self._logger.debug("connect_loop was cancelled for device %s", self.duid) + finally: if not start_attempt.done(): start_attempt.set_result(False) - self._logger.debug("connect_loop was cancelled for device %s", self.duid) self._connect_task = asyncio.create_task(connect_loop()) From 84af81cab9f7e612d0e05c74affdef7a8ced1f0d Mon Sep 17 00:00:00 2001 From: Allen Porter Date: Sat, 13 Dec 2025 21:57:21 -0800 Subject: [PATCH 4/5] chore: reduce whitespace changes --- roborock/devices/device.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/roborock/devices/device.py b/roborock/devices/device.py index 929eee36..3a6906e1 100644 --- a/roborock/devices/device.py +++ b/roborock/devices/device.py @@ -161,7 +161,7 @@ async def connect_loop() -> None: start_attempt.set_result(True) self._has_connected = True self._ready_callbacks(self) - break + return except RoborockException as e: if not start_attempt.done(): start_attempt.set_result(False) @@ -172,7 +172,7 @@ async def connect_loop() -> None: if not start_attempt.done(): start_attempt.set_exception(e) self._logger.exception("Uncaught error during connect: %s", e) - break + return except asyncio.CancelledError: self._logger.debug("connect_loop was cancelled for device %s", self.duid) finally: From 11976c5ef9e71c17244528597bc08aa550341922 Mon Sep 17 00:00:00 2001 From: Allen Porter Date: Sat, 13 Dec 2025 22:05:32 -0800 Subject: [PATCH 5/5] chore: clarify comments and docstrings --- roborock/devices/device.py | 6 ++++-- tests/devices/test_device_manager.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/roborock/devices/device.py b/roborock/devices/device.py index 3a6906e1..dfb924e3 100644 --- a/roborock/devices/device.py +++ b/roborock/devices/device.py @@ -147,8 +147,10 @@ async def start_connect(self) -> None: called. The device will automatically attempt to reconnect if the connection is lost. """ - # We don't care about the value of the future, just that we wait long enough - # to let the first connection attempt happen. + # The future will be set to True if the first attempt succeeds, False if + # it fails, or an exception if an unexpected error occurs. + # We use this to wait a short time for the first attempt to complete. We + # don't actually care about the result, just that we waited long enough. start_attempt: asyncio.Future[bool] = asyncio.Future() async def connect_loop() -> None: diff --git a/tests/devices/test_device_manager.py b/tests/devices/test_device_manager.py index ef183afa..85696df2 100644 --- a/tests/devices/test_device_manager.py +++ b/tests/devices/test_device_manager.py @@ -252,6 +252,6 @@ async def test_start_connect_failure(home_data: HomeData, channel_failure: Mock, ], ) async def test_start_connect_unexpected_error(home_data: HomeData, channel_failure: Mock, mock_sleep: Mock) -> None: - """Test that some exceptions from start_connect are propagated.""" + """Test that some unexpected errors from start_connect are propagated.""" with pytest.raises(Exception, match="Unexpected error"): await create_device_manager(USER_PARAMS)