From 11765adb75e80f1bea0ad09380749eb0d4324dfa Mon Sep 17 00:00:00 2001 From: linkliti Date: Sat, 9 May 2026 21:00:08 +0300 Subject: [PATCH 1/2] perf(watchdog): batch registrations, exclude noise directories, and skip 9p mounts Add batch_watchdogs() context manager to defer observer refreshes until all watchdog registrations complete, eliminating redundant reschedule cycles during initialization. Switch from recursive=True to per-directory recursive=False scheduling with os.walk pruning so noise folders like __pycache__, .venv, and node_modules are never observed in the first place. Refresh the observer on directory create and move events to pick up new subdirectories under non-recursive watches. Extract a reusable get_noise_folders() into helpers/exclusion.py covering version control, build output, caches, and language-specific directories, replacing the hardcoded __pycache__ ignore patterns in watchdog defaults. Detect 9p remote mounts via /proc/mounts and skip them in add_watchdog since inotify produces zero events on 9p filesystems. --- .../init_a0/end/_10_register_watchdogs.py | 6 +- helpers/exclusion.py | 109 ++++++++++++++++++ helpers/watchdog.py | 81 +++++++++++-- 3 files changed, 183 insertions(+), 13 deletions(-) create mode 100644 helpers/exclusion.py diff --git a/extensions/python/_functions/__main__/init_a0/end/_10_register_watchdogs.py b/extensions/python/_functions/__main__/init_a0/end/_10_register_watchdogs.py index e61001e031..7ac419d13a 100644 --- a/extensions/python/_functions/__main__/init_a0/end/_10_register_watchdogs.py +++ b/extensions/python/_functions/__main__/init_a0/end/_10_register_watchdogs.py @@ -6,6 +6,8 @@ class RegisterWatchDogs(Extension): def execute(self, **kwargs): from helpers.plugins import register_watchdogs as register_plugins_watchdogs from helpers.api import register_watchdogs as register_api_watchdogs + from helpers.watchdog import batch_watchdogs - register_plugins_watchdogs() - register_api_watchdogs() \ No newline at end of file + with batch_watchdogs(): + register_plugins_watchdogs() + register_api_watchdogs() \ No newline at end of file diff --git a/helpers/exclusion.py b/helpers/exclusion.py new file mode 100644 index 0000000000..74a92c9827 --- /dev/null +++ b/helpers/exclusion.py @@ -0,0 +1,109 @@ +def get_noise_folders() -> set[str]: + """Generic noise folders""" + return { + # version control + ".git", + ".hg", + ".svn", + # build output + "artifacts", + "bin", + "build", + "debug", + "dist", + "generated", + "log", + "logs", + "obj", + "out", + "release", + # general cache + ".cache", + "temp", + "tmp", + # python + "__generated__", + "__pycache__", + "__pypackages__", + ".eggs", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pdm-build", + ".pixi", + ".pyre", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".tox", + ".venv", + "develop-eggs", + "eggs", + "htmlcov", + "lib-cov", + "pip-wheel-metadata", + "sdist", + "venv", + "wheels", + # javascript / node + ".angular", + ".cache-loader", + ".eslintcache", + ".next", + ".npm", + ".nuxt", + ".nx", + ".nyc_output", + ".output", + ".parcel-cache", + ".playwright", + ".pnpm-store", + ".sass-cache", + ".storybook-cache", + ".svelte-kit", + ".swc", + ".turbo", + ".vite", + ".webpack", + "bower_components", + "jspm_packages", + "node_modules", + "playwright-report", + "storybook-static", + "web_modules", + # c / c++ + "_deps", + "CMakeFiles", + "cmake-build-debug", + "cmake-build-release", + # java / kotlin + ".grunt", + "target", + # php / go + "vendor", + # swift / ios + "DerivedData", + "Pods", + # dart / flutter + ".dart_tool", + ".pub-cache", + # scala + ".bloop", + # haskell + ".stack-work", + "dist-newstyle", + # infrastructure / cloud + ".serverless", + ".terraform", + "cdk.out", + # static site generators + ".docusaurus", + ".jekyll-cache", + "_build", + "_site", + # data science / ml + ".mlruns", + "wandb", + # coverage + "coverage", + } diff --git a/helpers/watchdog.py b/helpers/watchdog.py index abdb79035e..8025de419d 100644 --- a/helpers/watchdog.py +++ b/helpers/watchdog.py @@ -2,11 +2,16 @@ import os import threading +from collections.abc import Iterable +from contextlib import contextmanager from dataclasses import dataclass from pathlib import PurePosixPath -from typing import Any, Callable, Iterable, Literal, cast +from typing import Any, Callable, Literal, cast + from watchdog.observers import Observer as _WatchdogObserver +from helpers.exclusion import get_noise_folders + class _DispatchHandler: def __init__(self, registry: "_WatchRegistry", scheduled_root: str): @@ -25,8 +30,6 @@ def dispatch(self, event: Any): _DEFAULT_PATTERNS = ["**/*"] _DEFAULT_IGNORE_PATTERNS = [ - "**/__pycache__", - "**/__pycache__/*", "**/*.pyc", "**/*.pyo", ] @@ -43,6 +46,15 @@ def dispatch(self, event: Any): } +def _iter_watchable_dirs(root: str) -> list[str]: + excluded = get_noise_folders() + result = [root] + for dirpath, dirnames, _ in os.walk(root, topdown=True): + dirnames[:] = [d for d in dirnames if d not in excluded] + result.extend(os.path.join(dirpath, d) for d in dirnames) + return result + + @dataclass(frozen=True) class _Watch: id: str @@ -70,6 +82,7 @@ def __init__(self): self._watch_ids_by_group: dict[str, set[str]] = {} self._scheduled_roots: set[str] = set() self._pending_batches: dict[str, _PendingBatch] = {} + self._batching: bool = False def add( self, @@ -82,7 +95,7 @@ def add( handler: WatchHandler, ) -> None: self._ensure_watchdog_available() - normalized_roots = _normalize_roots(roots) + normalized_roots = [r for r in _normalize_roots(roots) if not _is_9p_mount(r)] normalized_patterns = _normalize_patterns(patterns) normalized_ignore_patterns = _normalize_patterns( ignore_patterns, default=_DEFAULT_IGNORE_PATTERNS @@ -117,7 +130,8 @@ def add( pending.timer.cancel() self._watches.update(watches) self._watch_ids_by_group[id] = set(watches) - self._refresh_observer() + if not self._batching: + self._refresh_observer() def remove(self, id: str) -> bool: with self._lock: @@ -128,7 +142,7 @@ def remove(self, id: str) -> bool: pending = self._pending_batches.pop(watch_id, None) if pending and pending.timer: pending.timer.cancel() - if removed: + if removed and not self._batching: self._refresh_observer() return removed @@ -138,7 +152,8 @@ def clear(self) -> None: self._watch_ids_by_group.clear() pending_batches = list(self._pending_batches.values()) self._pending_batches.clear() - self._refresh_observer() + if not self._batching: + self._refresh_observer() for pending in pending_batches: if pending.timer: pending.timer.cancel() @@ -182,6 +197,10 @@ def dispatch(self, scheduled_root: str, event: Any) -> None: if not watch.matcher(path): continue self._queue_event(watch, path, event_type) + if event_type in ("create", "move") and bool(getattr(event, "is_directory", False)): + src_path = getattr(event, "src_path", None) + if isinstance(src_path, str) and os.path.basename(src_path) not in get_noise_folders(): + threading.Thread(target=self._refresh_observer, daemon=True).start() def _ensure_watchdog_available(self) -> None: return None @@ -229,13 +248,14 @@ def _refresh_observer(self) -> None: observer = self._create_observer() self._observer = observer observer.start() - if target_roots == self._scheduled_roots: + dir_set = set(d for root in target_roots for d in _iter_watchable_dirs(root)) + if dir_set == self._scheduled_roots: return observer = cast(Any, observer) observer.unschedule_all() - for root in target_roots: - observer.schedule(_DispatchHandler(self, root), root, recursive=True) - self._scheduled_roots = target_roots + for dir_path in dir_set: + observer.schedule(_DispatchHandler(self, dir_path), dir_path, recursive=False) + self._scheduled_roots = dir_set def _stop_observer(self) -> None: with self._lock: @@ -252,6 +272,15 @@ def _create_observer(self) -> Any: observer = cast(Any, _WatchdogObserver()) return observer + @contextmanager + def batch(self): + self._batching = True + try: + yield + finally: + self._batching = False + self._refresh_observer() + def _normalize_root(root: str) -> str: normalized = os.path.abspath(os.path.normpath(root)) @@ -312,6 +341,31 @@ def _covering_roots(roots: Iterable[str]) -> set[str]: return covered +def _is_9p_mount(path: str) -> bool: + """ + Check if path resides on a 9p remote filesystem + Related: https://github.com/microsoft/WSL/issues/4739 + """ + path = os.path.realpath(path) + best = "" + try: + with open("/proc/mounts", "r") as f: + for line in f: + parts = line.split() + if len(parts) < 3: + continue + mountpoint, fstype = parts[1], parts[2] + if fstype != "9p": + continue + real_mp = os.path.realpath(mountpoint) + if path.startswith(real_mp + os.sep) or path == real_mp: + if len(real_mp) > len(best): + best = real_mp + except OSError: + return False + return bool(best) + + def _is_same_or_nested(path: str, root: str) -> bool: return path == root or path.startswith(root + os.sep) @@ -400,6 +454,10 @@ def clear_watchdogs() -> None: _registry.clear() +def batch_watchdogs(): + return _registry.batch() + + def start_watchdog_daemon() -> None: _registry.start() @@ -416,6 +474,7 @@ def stop_watchdog_daemon() -> None: "add_watchdog", "remove_watchdog", "clear_watchdogs", + "batch_watchdogs", "start_watchdog_daemon", "stop_watchdog_daemon", ] From f5379ba2825b88aeb2bbd520684bebe2eadb471c Mon Sep 17 00:00:00 2001 From: linkliti Date: Mon, 11 May 2026 21:54:46 +0300 Subject: [PATCH 2/2] fix(watchdog): move observer refresh outside registry lock in add/remove/clear Dedent _refresh_observer() calls out of with self._lock: blocks in _WatchRegistry.add(), remove(), and clear() to break an AB-BA deadlock between _WatchRegistry._lock (L1) and BaseObserver._lock (L2) in `/opt/venv-a0/lib/python3.12/site-packages/watchdog/observers/api.py`. The main thread held L1 while calling unschedule_all() which needs L2; the observer dispatch thread held L2 while calling dispatch() which needs L1. --- helpers/watchdog.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/helpers/watchdog.py b/helpers/watchdog.py index 8025de419d..7f6322cc6d 100644 --- a/helpers/watchdog.py +++ b/helpers/watchdog.py @@ -130,8 +130,8 @@ def add( pending.timer.cancel() self._watches.update(watches) self._watch_ids_by_group[id] = set(watches) - if not self._batching: - self._refresh_observer() + if not self._batching: + self._refresh_observer() def remove(self, id: str) -> bool: with self._lock: @@ -142,9 +142,9 @@ def remove(self, id: str) -> bool: pending = self._pending_batches.pop(watch_id, None) if pending and pending.timer: pending.timer.cancel() - if removed and not self._batching: - self._refresh_observer() - return removed + if removed and not self._batching: + self._refresh_observer() + return removed def clear(self) -> None: with self._lock: @@ -152,8 +152,8 @@ def clear(self) -> None: self._watch_ids_by_group.clear() pending_batches = list(self._pending_batches.values()) self._pending_batches.clear() - if not self._batching: - self._refresh_observer() + if not self._batching: + self._refresh_observer() for pending in pending_batches: if pending.timer: pending.timer.cancel()