From 77bc198e0b17aeddc566a7ca798e105b10fb2302 Mon Sep 17 00:00:00 2001 From: Farhan Ali Raza Date: Wed, 29 Apr 2026 23:14:32 +0500 Subject: [PATCH 1/6] fix(pyi): regenerate stubs from pyi_hashes.json baseline, not sentinel files Replace the .pyi_generator_last_run/.pyi_generator_diff sentinel files with the last commit that touched pyi_hashes.json as the regen baseline, and expand the change set along the import graph so editing a parent class also regenerates subclass stubs. Set pass_filenames: false on the pre-commit hook and rewrite scan_all's pyi_hashes.json update as a per-file evidence-based merge so partial-target runs no longer truncate the file. --- .gitignore | 2 - .pre-commit-config.yaml | 1 + AGENTS.md | 2 +- .../src/reflex_base/utils/pyi_generator.py | 111 +++---- scripts/make_pyi.py | 298 ++++++++++++++---- .../utils/pyi_generator/test_hash_merge.py | 203 ++++++++++++ 6 files changed, 487 insertions(+), 130 deletions(-) create mode 100644 tests/units/reflex_base/utils/pyi_generator/test_hash_merge.py diff --git a/.gitignore b/.gitignore index 508d57ca9d6..a4a37f36b16 100644 --- a/.gitignore +++ b/.gitignore @@ -12,8 +12,6 @@ examples/ .venv venv requirements.txt -.pyi_generator_last_run -.pyi_generator_diff reflex.db .codspeed .env diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a06454f5d16..cd53f99c595 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -31,6 +31,7 @@ repos: entry: python3 scripts/make_pyi.py language: system always_run: true + pass_filenames: false require_serial: true - repo: local hooks: diff --git a/AGENTS.md b/AGENTS.md index 3c9593ff624..10910290a47 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -85,7 +85,7 @@ Playwright tests use the `page` fixture and navigate to `harness.frontend_url`. ## .pyi stubs -When adding/modifying components: `uv run python scripts/make_pyi.py`. Commit `pyi_hashes.json` (not `.pyi` files). If the diff removes many modules, run `uv sync`, delete `.pyi_generator_last_run`, and regenerate. +When components change, run `uv run python scripts/make_pyi.py` and commit only `pyi_hashes.json`. It incrementally regenerates changed stubs and dependent subclasses. ## Breaking changes and deprecation diff --git a/packages/reflex-base/src/reflex_base/utils/pyi_generator.py b/packages/reflex-base/src/reflex_base/utils/pyi_generator.py index 0425f03d7b1..2d7e08a73e5 100644 --- a/packages/reflex-base/src/reflex_base/utils/pyi_generator.py +++ b/packages/reflex-base/src/reflex_base/utils/pyi_generator.py @@ -1607,7 +1607,10 @@ class PyiGenerator: modules: list = [] root: str = "" current_module: Any = {} - written_files: list[tuple[str, str]] = [] + + def __init__(self) -> None: + """Initialize per-instance scan state.""" + self.written_files: list[tuple[str, str]] = [] def _scan_files(self, files: list[Path]): max_workers = min(multiprocessing.cpu_count() or 1, len(files), 8) @@ -1673,7 +1676,7 @@ def scan_all( and target_path.suffix == ".py" and target_path.name not in EXCLUDED_FILES ): - file_targets.append(target_path) + file_targets.append(target_path.resolve()) continue if not target_path.is_dir(): continue @@ -1712,69 +1715,47 @@ def scan_all( subprocess.run(["ruff", "format", *file_paths]) subprocess.run(["ruff", "check", "--fix", *file_paths]) - if use_json: - if file_paths and changed_files is None: - file_paths = list(map(Path, file_paths)) - top_dir = file_paths[0].parent - for file_path in file_paths: - file_parent = file_path.parent - while len(file_parent.parts) > len(top_dir.parts): - file_parent = file_parent.parent - while len(top_dir.parts) > len(file_parent.parts): - top_dir = top_dir.parent - while not file_parent.samefile(top_dir): - file_parent = file_parent.parent - top_dir = top_dir.parent - - while ( - not top_dir.samefile(top_dir.parent) - and not (top_dir / PYI_HASHES).exists() - ): - top_dir = top_dir.parent - - pyi_hashes_file = top_dir / PYI_HASHES - - if pyi_hashes_file.exists(): - pyi_hashes_file.write_text( - json.dumps( - dict( - zip( - [ - f.relative_to(pyi_hashes_file.parent).as_posix() - for f in file_paths - ], - hashes, - strict=True, - ) - ), - indent=2, - sort_keys=True, - ) - + "\n", - ) - elif file_paths: - file_paths = list(map(Path, file_paths)) - pyi_hashes_parent = file_paths[0].parent - while ( - not pyi_hashes_parent.samefile(pyi_hashes_parent.parent) - and not (pyi_hashes_parent / PYI_HASHES).exists() - ): - pyi_hashes_parent = pyi_hashes_parent.parent - - pyi_hashes_file = pyi_hashes_parent / PYI_HASHES - if pyi_hashes_file.exists(): - pyi_hashes = json.loads(pyi_hashes_file.read_text()) - for file_path, hashed_content in zip( - file_paths, hashes, strict=False - ): - formatted_path = file_path.relative_to( - pyi_hashes_parent - ).as_posix() - pyi_hashes[formatted_path] = hashed_content - - pyi_hashes_file.write_text( - json.dumps(pyi_hashes, indent=2, sort_keys=True) + "\n" - ) + if use_json and (file_paths or file_targets): + file_paths = list(map(Path, file_paths)) + anchor = ( + file_paths[0].parent if file_paths else file_targets[0].resolve().parent + ) + pyi_hashes_parent = anchor + while ( + pyi_hashes_parent != pyi_hashes_parent.parent + and not (pyi_hashes_parent / PYI_HASHES).exists() + ): + pyi_hashes_parent = pyi_hashes_parent.parent + + pyi_hashes_file = pyi_hashes_parent / PYI_HASHES + if pyi_hashes_file.exists(): + existing = json.loads(pyi_hashes_file.read_text()) + else: + pyi_hashes_file = (Path.cwd() / PYI_HASHES).resolve() + pyi_hashes_parent = pyi_hashes_file.parent + existing = {} + + produced = { + f.relative_to(pyi_hashes_parent).as_posix(): h + for f, h in zip(file_paths, hashes, strict=True) + } + scanned: set[str] = set() + for source in file_targets: + pyi_path = source.with_suffix(".pyi") + if pyi_path.is_relative_to(pyi_hashes_parent): + scanned.add(pyi_path.relative_to(pyi_hashes_parent).as_posix()) + pyi_hashes = { + entry: produced.get(entry, current) + for entry, current in existing.items() + if (entry in produced or entry not in scanned) + and (pyi_hashes_parent / entry).with_suffix(".py").exists() + } + for entry, hashed in produced.items(): + pyi_hashes.setdefault(entry, hashed) + + pyi_hashes_file.write_text( + json.dumps(pyi_hashes, indent=2, sort_keys=True) + "\n" + ) if __name__ == "__main__": diff --git a/scripts/make_pyi.py b/scripts/make_pyi.py index fc905a15036..6379ffc5d5c 100644 --- a/scripts/make_pyi.py +++ b/scripts/make_pyi.py @@ -1,17 +1,34 @@ -"""The pyi generator module.""" +"""The pyi generator module. +The last commit that touched ``pyi_hashes.json`` is used as the baseline of +"last successful regeneration". Sources changed since that commit (committed, +staged, unstaged, untracked) drive an incremental run; the change set is +expanded along the import graph so modifying a parent class also regenerates +the stubs of every subclass that inherits from it. + +A full regeneration is forced when ``pyi_hashes.json`` is absent, or when the +generator's own files (``scripts/make_pyi.py`` or the ``PyiGenerator`` +library) appear in the change set. +""" + +import ast import logging import subprocess import sys +from collections import defaultdict +from collections.abc import Iterable from pathlib import Path -from reflex_base.utils.pyi_generator import PyiGenerator, _relative_to_pwd +from reflex_base.utils.pyi_generator import PyiGenerator logger = logging.getLogger("pyi_generator") -LAST_RUN_COMMIT_SHA_FILE = Path(".pyi_generator_last_run").resolve() -GENERATOR_FILE = Path(__file__).resolve() -GENERATOR_DIFF_FILE = Path(".pyi_generator_diff").resolve() +PYI_HASHES = Path("pyi_hashes.json") +GENERATOR_PATHS = frozenset({ + "scripts/make_pyi.py", + "packages/reflex-base/src/reflex_base/utils/pyi_generator.py", +}) + DEFAULT_TARGETS = [ "reflex/components", "reflex/experimental", @@ -31,75 +48,219 @@ ] -def _git_diff(args: list[str]) -> str: - """Run a git diff command. +def _git(*args: str) -> list[str]: + """Run ``git`` with `args` and return non-empty stdout lines. Args: - args: The args to pass to git diff. + *args: Arguments forwarded to ``git``. + + Returns: + Non-empty lines of standard output, with trailing newlines stripped. + """ + result = subprocess.run(["git", *args], capture_output=True, text=True, check=False) + return [line for line in result.stdout.splitlines() if line] + + +def _last_regen_sha() -> str | None: + """Return the SHA of the last commit that touched ``pyi_hashes.json``. Returns: - The output of the git diff command. + The commit SHA, or ``None`` if the file is missing or has no history. """ - cmd = ["git", "diff", "--no-color", *args] - return subprocess.run(cmd, capture_output=True, encoding="utf-8").stdout + if not PYI_HASHES.exists(): + return None + out = _git("log", "-1", "--format=%H", "--", str(PYI_HASHES)) + return out[0] if out else None + +def _changed_python_paths(sha: str) -> set[str]: + """All ``.py`` paths changed since `sha`. -def _git_changed_files(args: list[str] | None = None) -> list[Path]: - """Get the list of changed files for a git diff command. + A single ``git diff `` covers committed, staged, and unstaged changes + (it diffs the working tree against the commit). Brand-new untracked files + aren't included; ``git add`` them first to bring them into scope. Args: - args: The args to pass to git diff. + sha: The baseline commit SHA. Returns: - The list of changed files. + Repo-relative paths of every ``.py`` file changed since `sha`. """ - if not args: - args = [] + return {p for p in _git("diff", "--name-only", sha) if p.endswith(".py")} + + +def _key(path: Path) -> str: + """POSIX-style repo-relative string key for `path`. - if "--name-only" not in args: - args.insert(0, "--name-only") + Args: + path: The absolute path to convert. - diff = _git_diff(args).splitlines() - return [Path(file.strip()) for file in diff] + Returns: + Repo-relative POSIX path string. + """ + return path.relative_to(Path.cwd()).as_posix() -def _get_changed_files() -> list[Path] | None: - """Get the list of changed files since the last run of the generator. +def _gather_sources(targets: list[str]) -> list[Path]: + """Resolve every ``.py`` file reachable from `targets`. + + Args: + targets: User-provided target list (files or directories). Returns: - The list of changed files, or None if all files should be regenerated. + Sorted list of absolute paths to ``.py`` files under `targets`. """ - try: - last_run_commit_sha = LAST_RUN_COMMIT_SHA_FILE.read_text().strip() - except FileNotFoundError: - logger.info( - "make_pyi.py last run could not be determined, regenerating all .pyi files" - ) - return None - changed_files = _git_changed_files([f"{last_run_commit_sha}..HEAD"]) - # get all unstaged changes - changed_files.extend(_git_changed_files()) - if _relative_to_pwd(GENERATOR_FILE) not in changed_files: - return changed_files - logger.info("make_pyi.py has changed, checking diff now") - diff = "".join(_git_diff([GENERATOR_FILE.as_posix()]).splitlines()[2:]) + seen: set[Path] = set() + for target in targets: + p = Path(target).resolve() + if p.is_file() and p.suffix == ".py": + seen.add(p) + elif p.is_dir(): + seen.update(p.rglob("*.py")) + return sorted(seen) + + +def _package_parts(path: Path) -> list[str]: + """Dotted parts of the package containing `path`. + + For ``pkg/foo/bar.py`` and for ``pkg/foo/__init__.py`` this returns + ``["pkg", "foo"]`` — i.e. the package the module participates in, not the + module itself. + + Args: + path: Absolute path to a ``.py`` file. + Returns: + Package parts in import order (top-level first), or ``[]`` if `path` + is not inside a package. + """ + parts: list[str] = [] + parent = path.parent + while (parent / "__init__.py").exists() and parent != parent.parent: + parts.append(parent.name) + parent = parent.parent + return list(reversed(parts)) + + +def _module_aliases(path: Path) -> set[str]: + """Dotted module names that an ``import`` could resolve to `path`. + + Walks upward while parent directories contain ``__init__.py`` to recover + the top-level package. For ``__init__.py`` files, also emits the package + name on its own (``import pkg`` reaches ``pkg/__init__.py``). + + Args: + path: Absolute path to a ``.py`` file. + + Returns: + Set of dotted module names that could refer to `path`. + """ + pkg = _package_parts(path) + if path.stem == "__init__": + full = ".".join([*pkg, "__init__"]) + aliases = {full} + if pkg: + aliases.add(".".join(pkg)) + return aliases + return {".".join([*pkg, path.stem])} if pkg else {path.stem} + + +def _iter_import_nodes( + nodes: Iterable[ast.AST], +) -> Iterable[ast.Import | ast.ImportFrom]: + """Yield import nodes reachable without entering function or class bodies. + + Imports live at module top level or inside ``if TYPE_CHECKING:`` / + ``try/except ImportError`` / ``with`` blocks. Walking function and class + bodies wastes time and never finds anything that shapes the import graph. + + Args: + nodes: AST nodes to scan (typically ``tree.body``). + + Yields: + Each ``ast.Import`` / ``ast.ImportFrom`` node encountered. + """ + for node in nodes: + if isinstance(node, (ast.Import, ast.ImportFrom)): + yield node + elif isinstance(node, ast.If): + yield from _iter_import_nodes(node.body) + yield from _iter_import_nodes(node.orelse) + elif isinstance(node, ast.Try): + yield from _iter_import_nodes(node.body) + yield from _iter_import_nodes(node.orelse) + yield from _iter_import_nodes(node.finalbody) + for handler in node.handlers: + yield from _iter_import_nodes(handler.body) + elif isinstance(node, (ast.With, ast.AsyncWith)): + yield from _iter_import_nodes(node.body) + + +def _imports_in(path: Path) -> set[str]: + """Absolute module names imported by `path`. + + For ``from pkg import name`` we emit both ``pkg`` and ``pkg.name`` so the + graph captures dependencies on either the package or one of its submodules. + Relative imports (``from .base import X``, ``from ..util import Y``) are + resolved against `path`'s own package so they participate in the graph. + + Args: + path: Absolute path to a ``.py`` file. + + Returns: + Dotted module names referenced by imports in `path`. + """ try: - last_diff = GENERATOR_DIFF_FILE.read_text() - if diff != last_diff: - logger.info("make_pyi.py has changed, regenerating all .pyi files") - changed_files = None - else: - logger.info("make_pyi.py has not changed, only regenerating changed files") - except FileNotFoundError: - logger.info( - "make_pyi.py diff could not be determined, regenerating all .pyi files" - ) - changed_files = None + tree = ast.parse(path.read_bytes(), filename=str(path)) + except (OSError, SyntaxError): + return set() + imports: set[str] = set() + pkg = _package_parts(path) + for node in _iter_import_nodes(tree.body): + if isinstance(node, ast.Import): + imports.update(alias.name for alias in node.names) + continue + if node.level == 0: + if node.module: + imports.add(node.module) + imports.update(f"{node.module}.{alias.name}" for alias in node.names) + continue + if node.level > len(pkg): + continue + base = pkg[: len(pkg) - (node.level - 1)] + if not base: + continue + target = ".".join([*base, node.module]) if node.module else ".".join(base) + imports.add(target) + imports.update(f"{target}.{alias.name}" for alias in node.names) + return imports - GENERATOR_DIFF_FILE.write_text(diff) - return changed_files +def _expand_with_dependents(changed: set[Path], sources: list[Path]) -> set[Path]: + """Add every source that transitively imports a changed source. + + Args: + changed: Sources detected as directly modified. + sources: All sources reachable from the targets. + + Returns: + `changed` union all sources whose import graph reaches a changed source. + """ + importers: dict[str, set[Path]] = defaultdict(set) + for src in sources: + for mod in _imports_in(src): + importers[mod].add(src) + + seen = set(changed) + queue = list(changed) + while queue: + current = queue.pop() + for alias in _module_aliases(current): + for dependent in importers.get(alias, ()): + if dependent not in seen: + seen.add(dependent) + queue.append(dependent) + return seen if __name__ == "__main__": @@ -111,8 +272,6 @@ def _get_changed_files() -> list[Path] | None: if len(sys.argv) > 1 else DEFAULT_TARGETS ) - - # Only include targets that have a prefix in the default target list targets = [ target for target in targets @@ -121,16 +280,31 @@ def _get_changed_files() -> list[Path] | None: logger.info(f"Running .pyi generator for {targets}") - changed_files = _get_changed_files() - if changed_files is None: - logger.info("Changed files could not be detected, regenerating all .pyi files") + sha = _last_regen_sha() + if sha is None: + logger.info("No pyi_hashes.json baseline in git, regenerating all .pyi files") + changed_files: list[Path] | None = None else: - logger.info(f"Detected changed files: {changed_files}") + changed = _changed_python_paths(sha) + if changed & GENERATOR_PATHS: + logger.info("Generator changed, regenerating all .pyi files") + changed_files = None + else: + sources = _gather_sources(targets) + sources_by_key = {_key(p): p for p in sources} + directly_changed = { + sources_by_key[p] for p in changed if p in sources_by_key + } + if not directly_changed: + logger.info("No source files changed since last regeneration") + changed_files = [] + else: + expanded = _expand_with_dependents(directly_changed, sources) + logger.info( + f"Detected {len(directly_changed)} direct change(s), " + f"{len(expanded)} after transitive expansion" + ) + changed_files = [Path(_key(p)) for p in expanded] gen = PyiGenerator() gen.scan_all(targets, changed_files, use_json=True) - - current_commit_sha = subprocess.run( - ["git", "rev-parse", "HEAD"], capture_output=True, encoding="utf-8" - ).stdout.strip() - LAST_RUN_COMMIT_SHA_FILE.write_text(current_commit_sha) diff --git a/tests/units/reflex_base/utils/pyi_generator/test_hash_merge.py b/tests/units/reflex_base/utils/pyi_generator/test_hash_merge.py new file mode 100644 index 00000000000..788007174ef --- /dev/null +++ b/tests/units/reflex_base/utils/pyi_generator/test_hash_merge.py @@ -0,0 +1,203 @@ +"""Tests for ``pyi_hashes.json`` merge behavior in ``PyiGenerator.scan_all``.""" + +from __future__ import annotations + +import json +from pathlib import Path + +from reflex_base.utils.pyi_generator import PyiGenerator + + +def _write_hashes(path: Path, mapping: dict[str, str]) -> None: + path.write_text(json.dumps(mapping, indent=2, sort_keys=True) + "\n") + + +def _make_workspace(root: Path) -> Path: + """Lay out a fake workspace with a couple of source files and a hash file. + + Args: + root: tmp directory to populate. + + Returns: + The workspace root. + """ + pkg = root / "pkg" + pkg.mkdir() + (pkg / "foo.py").write_text("# placeholder\n") + (pkg / "bar.py").write_text("# placeholder\n") + other = root / "other" + other.mkdir() + (other / "baz.py").write_text("# placeholder\n") + return root + + +def test_partial_run_preserves_unrelated_entries(tmp_path, monkeypatch): + """Entries for files outside the run's scope are preserved.""" + workspace = _make_workspace(tmp_path) + monkeypatch.chdir(workspace) + + hashes_path = workspace / "pyi_hashes.json" + _write_hashes( + hashes_path, + { + "pkg/foo.pyi": "OLD_FOO", + "pkg/bar.pyi": "BAR", + "other/baz.pyi": "BAZ", + }, + ) + + foo_pyi = (workspace / "pkg" / "foo.py").with_suffix(".pyi").resolve() + + def fake_scan(self, files): + self.written_files.append((str(foo_pyi), "NEW_FOO")) + + monkeypatch.setattr(PyiGenerator, "_scan_files", fake_scan) + + gen = PyiGenerator() + gen.scan_all(["pkg/foo.py"], changed_files=None, use_json=True) + + result = json.loads(hashes_path.read_text()) + assert result == { + "pkg/foo.pyi": "NEW_FOO", + "pkg/bar.pyi": "BAR", + "other/baz.pyi": "BAZ", + } + + +def test_scanned_file_with_no_output_drops_entry(tmp_path, monkeypatch): + """A file scanned this run that produces no stub has its hash entry removed.""" + workspace = _make_workspace(tmp_path) + monkeypatch.chdir(workspace) + + hashes_path = workspace / "pyi_hashes.json" + _write_hashes( + hashes_path, + { + "pkg/foo.pyi": "OLD_FOO", + "pkg/bar.pyi": "BAR", + }, + ) + + bar_pyi = (workspace / "pkg" / "bar.py").with_suffix(".pyi").resolve() + + def fake_scan(self, files): + self.written_files.append((str(bar_pyi), "BAR_NEW")) + + monkeypatch.setattr(PyiGenerator, "_scan_files", fake_scan) + + gen = PyiGenerator() + gen.scan_all(["pkg/foo.py", "pkg/bar.py"], changed_files=None, use_json=True) + + result = json.loads(hashes_path.read_text()) + assert result == {"pkg/bar.pyi": "BAR_NEW"} + + +def test_single_scanned_file_with_no_output_drops_entry(tmp_path, monkeypatch): + """Scanning one file that produces no stub still drops its old hash entry.""" + workspace = _make_workspace(tmp_path) + monkeypatch.chdir(workspace) + + hashes_path = workspace / "pyi_hashes.json" + _write_hashes( + hashes_path, + { + "pkg/foo.pyi": "OLD_FOO", + "pkg/bar.pyi": "BAR", + }, + ) + + def fake_scan(self, files): + return + + monkeypatch.setattr(PyiGenerator, "_scan_files", fake_scan) + + gen = PyiGenerator() + gen.scan_all(["pkg/foo.py"], changed_files=None, use_json=True) + + result = json.loads(hashes_path.read_text()) + assert result == {"pkg/bar.pyi": "BAR"} + + +def test_creates_hashes_file_when_missing(tmp_path, monkeypatch): + """If ``pyi_hashes.json`` doesn't exist, the merge creates it.""" + workspace = _make_workspace(tmp_path) + monkeypatch.chdir(workspace) + + hashes_path = workspace / "pyi_hashes.json" + assert not hashes_path.exists() + + foo_pyi = (workspace / "pkg" / "foo.py").with_suffix(".pyi").resolve() + + def fake_scan(self, files): + self.written_files.append((str(foo_pyi), "FOO")) + + monkeypatch.setattr(PyiGenerator, "_scan_files", fake_scan) + + gen = PyiGenerator() + gen.scan_all(["pkg/foo.py"], changed_files=None, use_json=True) + + assert hashes_path.exists() + assert json.loads(hashes_path.read_text()) == {"pkg/foo.pyi": "FOO"} + + +def test_missing_source_file_drops_entry(tmp_path, monkeypatch): + """An entry whose source ``.py`` no longer exists is cleaned up.""" + workspace = _make_workspace(tmp_path) + monkeypatch.chdir(workspace) + + hashes_path = workspace / "pyi_hashes.json" + _write_hashes( + hashes_path, + { + "pkg/foo.pyi": "FOO", + "pkg/deleted.pyi": "STALE", + }, + ) + + foo_pyi = (workspace / "pkg" / "foo.py").with_suffix(".pyi").resolve() + + def fake_scan(self, files): + self.written_files.append((str(foo_pyi), "FOO_NEW")) + + monkeypatch.setattr(PyiGenerator, "_scan_files", fake_scan) + + gen = PyiGenerator() + gen.scan_all(["pkg/foo.py"], changed_files=None, use_json=True) + + result = json.loads(hashes_path.read_text()) + assert result == {"pkg/foo.pyi": "FOO_NEW"} + + +def test_incremental_run_merges_into_existing(tmp_path, monkeypatch): + """An incremental run (``changed_files`` set) merges new hashes into the existing file.""" + workspace = _make_workspace(tmp_path) + monkeypatch.chdir(workspace) + + hashes_path = workspace / "pyi_hashes.json" + _write_hashes( + hashes_path, + { + "pkg/foo.pyi": "OLD_FOO", + "pkg/bar.pyi": "BAR", + }, + ) + + foo_pyi = (workspace / "pkg" / "foo.py").with_suffix(".pyi").resolve() + + def fake_scan(self, files): + self.written_files.append((str(foo_pyi), "NEW_FOO")) + + monkeypatch.setattr(PyiGenerator, "_scan_files", fake_scan) + + gen = PyiGenerator() + gen.scan_all( + ["pkg/foo.py"], + changed_files=[Path("pkg/foo.py")], + use_json=True, + ) + + result = json.loads(hashes_path.read_text()) + assert result == { + "pkg/foo.pyi": "NEW_FOO", + "pkg/bar.pyi": "BAR", + } From 84a007c57735d1566a0d443f3b26eabf258a8d08 Mon Sep 17 00:00:00 2001 From: Farhan Ali Raza Date: Wed, 29 Apr 2026 23:55:54 +0500 Subject: [PATCH 2/6] fix(pyi): skip pyi_hashes.json in build hooks to avoid parallel-build races Add --no-update-hashes flag to pyi_generator and use it from both the top-level and per-package hatch build hooks. The shared hash file is a dev-tooling artifact; reading/writing it during parallel workspace package builds caused races and isn't needed at wheel-build time. --- .../src/hatch_reflex_pyi/plugin.py | 11 +++++++- .../src/reflex_base/utils/pyi_generator.py | 10 ++++++- scripts/hatch_build.py | 7 ++++- .../utils/pyi_generator/test_hash_merge.py | 26 +++++++++++++++++++ 4 files changed, 51 insertions(+), 3 deletions(-) diff --git a/packages/hatch-reflex-pyi/src/hatch_reflex_pyi/plugin.py b/packages/hatch-reflex-pyi/src/hatch_reflex_pyi/plugin.py index 8ca81b2d0e7..2c3277a72be 100644 --- a/packages/hatch-reflex-pyi/src/hatch_reflex_pyi/plugin.py +++ b/packages/hatch-reflex-pyi/src/hatch_reflex_pyi/plugin.py @@ -67,8 +67,17 @@ def initialize(self, version: str, build_data: dict[str, Any]) -> None: # Run from src/ so _path_to_module_name produces valid import names # (e.g. "reflex_components_core.core.banner" instead of # "packages.reflex-components-core.src.reflex_components_core.core.banner"). + # --no-update-hashes: parallel workspace builds would race on the shared + # pyi_hashes.json; that file is a dev-tooling artifact and isn't needed + # at install/wheel-build time. subprocess.run( - [sys.executable, "-m", "reflex_base.utils.pyi_generator", src_dir.name], + [ + sys.executable, + "-m", + "reflex_base.utils.pyi_generator", + "--no-update-hashes", + src_dir.name, + ], cwd=src_dir.parent, check=True, ) diff --git a/packages/reflex-base/src/reflex_base/utils/pyi_generator.py b/packages/reflex-base/src/reflex_base/utils/pyi_generator.py index 2d7e08a73e5..ed24741be0b 100644 --- a/packages/reflex-base/src/reflex_base/utils/pyi_generator.py +++ b/packages/reflex-base/src/reflex_base/utils/pyi_generator.py @@ -1768,10 +1768,18 @@ def scan_all( default=["reflex/components", "reflex/experimental", "reflex/__init__.py"], help="Target directories/files to process", ) + parser.add_argument( + "--no-update-hashes", + dest="update_hashes", + action="store_false", + help="Do not read or write the workspace pyi_hashes.json. " + "Use this from build hooks where parallel package builds would race " + "on a single shared hash file.", + ) args = parser.parse_args() logging.basicConfig(level=logging.INFO) logging.getLogger("blib2to3.pgen2.driver").setLevel(logging.INFO) gen = PyiGenerator() - gen.scan_all(args.targets, None, use_json=True) + gen.scan_all(args.targets, None, use_json=args.update_hashes) diff --git a/scripts/hatch_build.py b/scripts/hatch_build.py index 37d3a913707..1083ae6d1fe 100644 --- a/scripts/hatch_build.py +++ b/scripts/hatch_build.py @@ -41,7 +41,12 @@ def initialize(self, version: str, build_data: dict[str, Any]) -> None: file.unlink(missing_ok=True) subprocess.run( - [sys.executable, "-m", "reflex_base.utils.pyi_generator"], + [ + sys.executable, + "-m", + "reflex_base.utils.pyi_generator", + "--no-update-hashes", + ], check=True, ) self.marker().touch() diff --git a/tests/units/reflex_base/utils/pyi_generator/test_hash_merge.py b/tests/units/reflex_base/utils/pyi_generator/test_hash_merge.py index 788007174ef..a7944d8fe83 100644 --- a/tests/units/reflex_base/utils/pyi_generator/test_hash_merge.py +++ b/tests/units/reflex_base/utils/pyi_generator/test_hash_merge.py @@ -168,6 +168,32 @@ def fake_scan(self, files): assert result == {"pkg/foo.pyi": "FOO_NEW"} +def test_use_json_false_does_not_touch_hashes_file(tmp_path, monkeypatch): + """With ``use_json=False``, ``pyi_hashes.json`` is neither read nor written. + + Build hooks rely on this so parallel workspace builds don't race on a + single shared hash file at the workspace root. + """ + workspace = _make_workspace(tmp_path) + monkeypatch.chdir(workspace) + + hashes_path = workspace / "pyi_hashes.json" + original = b"\x00not even json\x00" + hashes_path.write_bytes(original) + + foo_pyi = (workspace / "pkg" / "foo.py").with_suffix(".pyi").resolve() + + def fake_scan(self, files): + self.written_files.append((str(foo_pyi), "FOO")) + + monkeypatch.setattr(PyiGenerator, "_scan_files", fake_scan) + + gen = PyiGenerator() + gen.scan_all(["pkg/foo.py"], changed_files=None, use_json=False) + + assert hashes_path.read_bytes() == original + + def test_incremental_run_merges_into_existing(tmp_path, monkeypatch): """An incremental run (``changed_files`` set) merges new hashes into the existing file.""" workspace = _make_workspace(tmp_path) From 2844d734106d3b12a5c4d550602f39c424188ebd Mon Sep 17 00:00:00 2001 From: Farhan Ali Raza <62690310+FarhanAliRaza@users.noreply.github.com> Date: Thu, 30 Apr 2026 00:21:11 +0500 Subject: [PATCH 3/6] Update scripts/make_pyi.py Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --- scripts/make_pyi.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/make_pyi.py b/scripts/make_pyi.py index 6379ffc5d5c..e78f2541b28 100644 --- a/scripts/make_pyi.py +++ b/scripts/make_pyi.py @@ -192,6 +192,10 @@ def _iter_import_nodes( yield from _iter_import_nodes(node.finalbody) for handler in node.handlers: yield from _iter_import_nodes(handler.body) + elif hasattr(ast, "TryStar") and isinstance(node, ast.TryStar): + yield from _iter_import_nodes(node.body) + for handler in node.handlers: + yield from _iter_import_nodes(handler.body) elif isinstance(node, (ast.With, ast.AsyncWith)): yield from _iter_import_nodes(node.body) From b37901b62ff323bf4d66758c3941a7bd3ebcd298 Mon Sep 17 00:00:00 2001 From: Farhan Ali Raza Date: Thu, 30 Apr 2026 00:27:10 +0500 Subject: [PATCH 4/6] fix(pyi): anchor pyi_hashes.json lookup at cwd, not first written file The hash file is workspace-scoped, so walking up from the first entry in written_files made the resolved path depend on iteration order and could latch onto a package-local file before reaching the repo root. Also warn when pyi_hashes.json exists locally but isn't tracked in git, since that forces a full regen on every run until it's committed. --- .../src/reflex_base/utils/pyi_generator.py | 12 ++++++++---- scripts/make_pyi.py | 10 +++++++++- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/packages/reflex-base/src/reflex_base/utils/pyi_generator.py b/packages/reflex-base/src/reflex_base/utils/pyi_generator.py index ed24741be0b..858faaaeea4 100644 --- a/packages/reflex-base/src/reflex_base/utils/pyi_generator.py +++ b/packages/reflex-base/src/reflex_base/utils/pyi_generator.py @@ -1717,10 +1717,14 @@ def scan_all( if use_json and (file_paths or file_targets): file_paths = list(map(Path, file_paths)) - anchor = ( - file_paths[0].parent if file_paths else file_targets[0].resolve().parent - ) - pyi_hashes_parent = anchor + # Anchor the pyi_hashes.json search at cwd, not at the first written + # file. The hash file is workspace-scoped — there is one for the whole + # repo — so walking up from cwd is what the caller (make_pyi.py, run + # from the repo root) actually means. Anchoring at file_paths[0] would + # make the result depend on which file happened to land first in + # `written_files`, and could find a package-local hash file in a + # leaf-package walk-up before reaching the workspace root. + pyi_hashes_parent = Path.cwd().resolve() while ( pyi_hashes_parent != pyi_hashes_parent.parent and not (pyi_hashes_parent / PYI_HASHES).exists() diff --git a/scripts/make_pyi.py b/scripts/make_pyi.py index 6379ffc5d5c..46a05951b7d 100644 --- a/scripts/make_pyi.py +++ b/scripts/make_pyi.py @@ -282,7 +282,15 @@ def _expand_with_dependents(changed: set[Path], sources: list[Path]) -> set[Path sha = _last_regen_sha() if sha is None: - logger.info("No pyi_hashes.json baseline in git, regenerating all .pyi files") + if PYI_HASHES.exists(): + logger.warning( + f"{PYI_HASHES} exists locally but has no git history; " + "every run will full-regenerate until the file is committed." + ) + else: + logger.info( + "No pyi_hashes.json baseline in git, regenerating all .pyi files" + ) changed_files: list[Path] | None = None else: changed = _changed_python_paths(sha) From 6c7f0a6ac94092916a448c5acb8040617d4632fb Mon Sep 17 00:00:00 2001 From: Farhan Ali Raza Date: Thu, 30 Apr 2026 13:02:37 +0500 Subject: [PATCH 5/6] refactor(pyi): extract hash-merge into helper and drop dead state Move the pyi_hashes.json merge logic out of PyiGenerator.scan_all into a module-level _update_pyi_hashes_file() so scan_all reads as scan + ruff fixup + (optional) hash update. Remove unused class attributes (modules, root, current_module) that were never read or written, and collapse a duplicate _relative_to_pwd() call in the file walk. Also fix test_tailwind flake: the stylesheet is now written to Path.cwd() / assets so it survives AppHarness's importlib.reload() across pytest reruns (the cached __file__ pinned writes to the prior tmp_path while the compiler read from the current cwd). --- .../src/reflex_base/utils/pyi_generator.py | 126 +++++++++--------- tests/integration/test_tailwind.py | 7 +- 2 files changed, 71 insertions(+), 62 deletions(-) diff --git a/packages/reflex-base/src/reflex_base/utils/pyi_generator.py b/packages/reflex-base/src/reflex_base/utils/pyi_generator.py index 858faaaeea4..4bc1342392e 100644 --- a/packages/reflex-base/src/reflex_base/utils/pyi_generator.py +++ b/packages/reflex-base/src/reflex_base/utils/pyi_generator.py @@ -1599,15 +1599,69 @@ def _scan_file(module_path: Path) -> tuple[str, str] | None: return str(module_path.with_suffix(".pyi").resolve()), content_hash +def _update_pyi_hashes_file( + written_files: list[tuple[str, str]], + scanned_sources: list[Path], +) -> None: + """Merge a scan's results into the workspace ``pyi_hashes.json``. + + Walks up from the current working directory to find an existing + ``pyi_hashes.json`` (the file is workspace-scoped — one per repo). If none + exists, a fresh one is created in cwd. Existing entries survive unless + their source file disappeared, or their stub was scanned this run but no + longer produces a stub. Newly produced hashes are written in. + + Args: + written_files: ``(absolute pyi path, content md5)`` tuples for stubs + written this run. + scanned_sources: Absolute ``.py`` paths that were scanned this run, + including those that produced no stub. An entry whose source was + scanned but produced nothing this run is dropped from the file. + """ + written_paths = [Path(p) for p, _ in written_files] + hashes = [h for _, h in written_files] + + pyi_hashes_parent = Path.cwd().resolve() + while ( + pyi_hashes_parent != pyi_hashes_parent.parent + and not (pyi_hashes_parent / PYI_HASHES).exists() + ): + pyi_hashes_parent = pyi_hashes_parent.parent + + pyi_hashes_file = pyi_hashes_parent / PYI_HASHES + if pyi_hashes_file.exists(): + existing = json.loads(pyi_hashes_file.read_text()) + else: + pyi_hashes_file = (Path.cwd() / PYI_HASHES).resolve() + pyi_hashes_parent = pyi_hashes_file.parent + existing = {} + + produced = { + p.relative_to(pyi_hashes_parent).as_posix(): h + for p, h in zip(written_paths, hashes, strict=True) + } + scanned = { + source.with_suffix(".pyi").relative_to(pyi_hashes_parent).as_posix() + for source in scanned_sources + if source.with_suffix(".pyi").is_relative_to(pyi_hashes_parent) + } + pyi_hashes = { + entry: produced.get(entry, current) + for entry, current in existing.items() + if (entry in produced or entry not in scanned) + and (pyi_hashes_parent / entry).with_suffix(".py").exists() + } + for entry, hashed in produced.items(): + pyi_hashes.setdefault(entry, hashed) + + pyi_hashes_file.write_text(json.dumps(pyi_hashes, indent=2, sort_keys=True) + "\n") + + class PyiGenerator: """A .pyi file generator that will scan all defined Component in Reflex and generate the appropriate stub. """ - modules: list = [] - root: str = "" - current_module: Any = {} - def __init__(self) -> None: """Initialize per-instance scan state.""" self.written_files: list[tuple[str, str]] = [] @@ -1684,10 +1738,7 @@ def scan_all( relative = _relative_to_pwd(file_path) if relative.name in EXCLUDED_FILES or file_path.suffix != ".py": continue - if ( - changed_files is not None - and _relative_to_pwd(file_path) not in changed_files - ): + if changed_files is not None and relative not in changed_files: continue file_targets.append(file_path) @@ -1705,61 +1756,14 @@ def scan_all( self._scan_files(file_targets) - file_paths, hashes = ( - [f[0] for f in self.written_files], - [f[1] for f in self.written_files], - ) - # Fix generated pyi files with ruff. - if file_paths: - subprocess.run(["ruff", "format", *file_paths]) - subprocess.run(["ruff", "check", "--fix", *file_paths]) - - if use_json and (file_paths or file_targets): - file_paths = list(map(Path, file_paths)) - # Anchor the pyi_hashes.json search at cwd, not at the first written - # file. The hash file is workspace-scoped — there is one for the whole - # repo — so walking up from cwd is what the caller (make_pyi.py, run - # from the repo root) actually means. Anchoring at file_paths[0] would - # make the result depend on which file happened to land first in - # `written_files`, and could find a package-local hash file in a - # leaf-package walk-up before reaching the workspace root. - pyi_hashes_parent = Path.cwd().resolve() - while ( - pyi_hashes_parent != pyi_hashes_parent.parent - and not (pyi_hashes_parent / PYI_HASHES).exists() - ): - pyi_hashes_parent = pyi_hashes_parent.parent + if self.written_files: + written_paths = [p for p, _ in self.written_files] + subprocess.run(["ruff", "format", *written_paths]) + subprocess.run(["ruff", "check", "--fix", *written_paths]) - pyi_hashes_file = pyi_hashes_parent / PYI_HASHES - if pyi_hashes_file.exists(): - existing = json.loads(pyi_hashes_file.read_text()) - else: - pyi_hashes_file = (Path.cwd() / PYI_HASHES).resolve() - pyi_hashes_parent = pyi_hashes_file.parent - existing = {} - - produced = { - f.relative_to(pyi_hashes_parent).as_posix(): h - for f, h in zip(file_paths, hashes, strict=True) - } - scanned: set[str] = set() - for source in file_targets: - pyi_path = source.with_suffix(".pyi") - if pyi_path.is_relative_to(pyi_hashes_parent): - scanned.add(pyi_path.relative_to(pyi_hashes_parent).as_posix()) - pyi_hashes = { - entry: produced.get(entry, current) - for entry, current in existing.items() - if (entry in produced or entry not in scanned) - and (pyi_hashes_parent / entry).with_suffix(".py").exists() - } - for entry, hashed in produced.items(): - pyi_hashes.setdefault(entry, hashed) - - pyi_hashes_file.write_text( - json.dumps(pyi_hashes, indent=2, sort_keys=True) + "\n" - ) + if use_json and (self.written_files or file_targets): + _update_pyi_hashes_file(self.written_files, file_targets) if __name__ == "__main__": diff --git a/tests/integration/test_tailwind.py b/tests/integration/test_tailwind.py index 071570ce44a..ac3e2bf18a5 100644 --- a/tests/integration/test_tailwind.py +++ b/tests/integration/test_tailwind.py @@ -42,7 +42,12 @@ def index(): id="p-content", ) - assets = Path(__file__).resolve().parent.parent / "assets" + # Anchor on cwd so the stylesheet lands where the compiler will look. + # AppHarness re-runs the app via importlib.reload, which preserves the + # module's original file location; on a pytest rerun with a fresh + # tmp_path, an anchor based on the module file would still point at the + # previous tmp_path while cwd has been updated by the harness. + assets = Path.cwd() / "assets" assets.mkdir(exist_ok=True) stylesheet = assets / "test_styles.css" stylesheet.write_text(".external { color: rgba(0, 0, 255, 0.5) }") From 4062d22d38eeda056bb84fccaae83b74e8695fcc Mon Sep 17 00:00:00 2001 From: Farhan Ali Raza Date: Fri, 1 May 2026 15:12:31 +0500 Subject: [PATCH 6/6] test(pyi): use shared workspace fixture in hash-merge tests --- .../utils/pyi_generator/test_hash_merge.py | 35 ++++++++----------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/tests/units/reflex_base/utils/pyi_generator/test_hash_merge.py b/tests/units/reflex_base/utils/pyi_generator/test_hash_merge.py index a7944d8fe83..4cbb33b515c 100644 --- a/tests/units/reflex_base/utils/pyi_generator/test_hash_merge.py +++ b/tests/units/reflex_base/utils/pyi_generator/test_hash_merge.py @@ -5,6 +5,7 @@ import json from pathlib import Path +import pytest from reflex_base.utils.pyi_generator import PyiGenerator @@ -12,28 +13,28 @@ def _write_hashes(path: Path, mapping: dict[str, str]) -> None: path.write_text(json.dumps(mapping, indent=2, sort_keys=True) + "\n") -def _make_workspace(root: Path) -> Path: - """Lay out a fake workspace with a couple of source files and a hash file. +@pytest.fixture +def workspace(tmp_path: Path) -> Path: + """Lay out a fake workspace with a couple of source files. Args: - root: tmp directory to populate. + tmp_path: pytest-provided tmp directory to populate. Returns: The workspace root. """ - pkg = root / "pkg" + pkg = tmp_path / "pkg" pkg.mkdir() (pkg / "foo.py").write_text("# placeholder\n") (pkg / "bar.py").write_text("# placeholder\n") - other = root / "other" + other = tmp_path / "other" other.mkdir() (other / "baz.py").write_text("# placeholder\n") - return root + return tmp_path -def test_partial_run_preserves_unrelated_entries(tmp_path, monkeypatch): +def test_partial_run_preserves_unrelated_entries(workspace, monkeypatch): """Entries for files outside the run's scope are preserved.""" - workspace = _make_workspace(tmp_path) monkeypatch.chdir(workspace) hashes_path = workspace / "pyi_hashes.json" @@ -64,9 +65,8 @@ def fake_scan(self, files): } -def test_scanned_file_with_no_output_drops_entry(tmp_path, monkeypatch): +def test_scanned_file_with_no_output_drops_entry(workspace, monkeypatch): """A file scanned this run that produces no stub has its hash entry removed.""" - workspace = _make_workspace(tmp_path) monkeypatch.chdir(workspace) hashes_path = workspace / "pyi_hashes.json" @@ -92,9 +92,8 @@ def fake_scan(self, files): assert result == {"pkg/bar.pyi": "BAR_NEW"} -def test_single_scanned_file_with_no_output_drops_entry(tmp_path, monkeypatch): +def test_single_scanned_file_with_no_output_drops_entry(workspace, monkeypatch): """Scanning one file that produces no stub still drops its old hash entry.""" - workspace = _make_workspace(tmp_path) monkeypatch.chdir(workspace) hashes_path = workspace / "pyi_hashes.json" @@ -118,9 +117,8 @@ def fake_scan(self, files): assert result == {"pkg/bar.pyi": "BAR"} -def test_creates_hashes_file_when_missing(tmp_path, monkeypatch): +def test_creates_hashes_file_when_missing(workspace, monkeypatch): """If ``pyi_hashes.json`` doesn't exist, the merge creates it.""" - workspace = _make_workspace(tmp_path) monkeypatch.chdir(workspace) hashes_path = workspace / "pyi_hashes.json" @@ -140,9 +138,8 @@ def fake_scan(self, files): assert json.loads(hashes_path.read_text()) == {"pkg/foo.pyi": "FOO"} -def test_missing_source_file_drops_entry(tmp_path, monkeypatch): +def test_missing_source_file_drops_entry(workspace, monkeypatch): """An entry whose source ``.py`` no longer exists is cleaned up.""" - workspace = _make_workspace(tmp_path) monkeypatch.chdir(workspace) hashes_path = workspace / "pyi_hashes.json" @@ -168,13 +165,12 @@ def fake_scan(self, files): assert result == {"pkg/foo.pyi": "FOO_NEW"} -def test_use_json_false_does_not_touch_hashes_file(tmp_path, monkeypatch): +def test_use_json_false_does_not_touch_hashes_file(workspace, monkeypatch): """With ``use_json=False``, ``pyi_hashes.json`` is neither read nor written. Build hooks rely on this so parallel workspace builds don't race on a single shared hash file at the workspace root. """ - workspace = _make_workspace(tmp_path) monkeypatch.chdir(workspace) hashes_path = workspace / "pyi_hashes.json" @@ -194,9 +190,8 @@ def fake_scan(self, files): assert hashes_path.read_bytes() == original -def test_incremental_run_merges_into_existing(tmp_path, monkeypatch): +def test_incremental_run_merges_into_existing(workspace, monkeypatch): """An incremental run (``changed_files`` set) merges new hashes into the existing file.""" - workspace = _make_workspace(tmp_path) monkeypatch.chdir(workspace) hashes_path = workspace / "pyi_hashes.json"