Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions roborock/devices/device.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
MIN_BACKOFF_INTERVAL = datetime.timedelta(seconds=10)
MAX_BACKOFF_INTERVAL = datetime.timedelta(minutes=30)
BACKOFF_MULTIPLIER = 1.5
START_ATTEMPT_TIMEOUT = datetime.timedelta(seconds=5)


class RoborockDevice(ABC, TraitsMixin):
Expand Down Expand Up @@ -107,25 +108,32 @@ def is_local_connected(self) -> bool:
"""
return self._channel.is_local_connected

def start_connect(self) -> None:
async def start_connect(self) -> None:
"""Start a background task to connect to the device.

This will attempt to connect to the device using the appropriate protocol
channel. If the connection fails, it will retry with exponential backoff.
This will give a moment for the first connection attempt to start so
that the device will have connections established -- however, this will
never directly fail.

If the connection fails, it will retry in the background with
exponential backoff.

Once connected, the device will remain connected until `close()` is
called. The device will automatically attempt to reconnect if the connection
is lost.
"""
start_attempt: asyncio.Event = asyncio.Event()

async def connect_loop() -> None:
backoff = MIN_BACKOFF_INTERVAL
try:
while True:
try:
await self.connect()
start_attempt.set()
return
except RoborockException as e:
start_attempt.set()
_LOGGER.info("Failed to connect to device %s: %s", self.name, e)
_LOGGER.info(
"Retrying connection to device %s in %s seconds", self.name, backoff.total_seconds()
Expand All @@ -136,9 +144,16 @@ async def connect_loop() -> None:
_LOGGER.info("connect_loop for device %s was cancelled", self.name)
# Clean exit on cancellation
return
finally:
start_attempt.set()

self._connect_task = asyncio.create_task(connect_loop())

try:
await asyncio.wait_for(start_attempt.wait(), timeout=START_ATTEMPT_TIMEOUT.total_seconds())
except TimeoutError:
_LOGGER.debug("Initial connection attempt to device %s is taking longer than expected", self.name)

async def connect(self) -> None:
"""Connect to the device using the appropriate protocol channel."""
if self._unsub:
Expand Down
4 changes: 3 additions & 1 deletion roborock/devices/device_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,16 @@ async def discover_devices(self) -> list[RoborockDevice]:

# These are connected serially to avoid overwhelming the MQTT broker
Copy link

Copilot AI Dec 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is now misleading. The code previously connected devices serially, but with the changes in this PR, devices now start connecting in parallel (via asyncio.gather() on line 94). The comment should be updated to reflect the new behavior, or removed if parallel connection is the intended behavior.

If overwhelming the MQTT broker is still a concern, consider updating the comment to explain why parallel connection is now acceptable, or implement a mechanism to limit concurrency (e.g., using a semaphore).

Suggested change
# These are connected serially to avoid overwhelming the MQTT broker

Copilot uses AI. Check for mistakes.
new_devices = {}
start_tasks = []
for duid, (device, product) in device_products.items():
if duid in self._devices:
continue
new_device = self._device_creator(home_data, device, product)
new_device.start_connect()
start_tasks.append(new_device.start_connect())
new_devices[duid] = new_device

self._devices.update(new_devices)
await asyncio.gather(*start_tasks)
Copy link

Copilot AI Dec 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The asyncio.gather(*start_tasks) call should include return_exceptions=True to prevent one device's connection failure from blocking the discovery of other devices. Without this parameter, if any start_connect() raises an exception that escapes the timeout, the entire discover_devices() operation will fail.

Consider changing this to:

await asyncio.gather(*start_tasks, return_exceptions=True)
Suggested change
await asyncio.gather(*start_tasks)
await asyncio.gather(*start_tasks, return_exceptions=True)

Copilot uses AI. Check for mistakes.
return list(self._devices.values())

async def get_device(self, duid: str) -> RoborockDevice | None:
Expand Down
9 changes: 3 additions & 6 deletions tests/devices/test_device_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,13 +176,10 @@ async def test_start_connect_failure(home_data: HomeData, channel_failure: Mock,
device_manager = await create_device_manager(USER_PARAMS)
devices = await device_manager.get_devices()

# Wait for the device to attempt to connect
attempts = 0
# The device should attempt to connect in the background at least once
# by the time this function returns.
subscribe_mock = channel_failure.return_value.subscribe
while subscribe_mock.call_count < 1:
await asyncio.sleep(0.01)
attempts += 1
assert attempts < 10, "Device did not connect after multiple attempts"
assert subscribe_mock.call_count > 0

# Device should exist but not be connected
assert len(devices) == 1
Expand Down