From 84795858a26ea99b3ab28f28a7ceefb55bbfbbf4 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Fri, 20 Feb 2026 04:10:41 +0000 Subject: [PATCH] Optimize _extract_child_components MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This optimization achieves a **67% runtime improvement** (from 1.67ms to 993μs) by replacing an inefficient Python-level iteration pattern with C-optimized regex operations and faster data structures. ## Key Optimizations **1. Batch Regex Extraction with `findall()`** - **Original**: Used `finditer()` to create match objects, then called `match.group(1)` for each match individually - **Optimized**: Uses `findall()` to extract all component names in a single C-optimized operation - **Impact**: Line profiler shows the original spent 34.6% of time in `finditer()` iteration plus 24.5% in `match.group(1)` calls (59.1% total). The optimized version completes name extraction in just 41.6% of total time with a single `findall()` call. **2. Frozenset for Built-in Component Checks** - **Original**: Created a tuple on every function call for membership testing: `name not in ("React.Fragment", "Fragment", ...)` - **Optimized**: Uses a module-level `frozenset` that's created once and reused across all calls - **Impact**: Frozenset membership tests are O(1) and faster than tuple scans. This optimization particularly benefits the large-scale tests, where the `test_many_duplicate_components` shows 94.4% speedup (328μs → 169μs). **3. Set Comprehension for Filtering** - **Original**: Manually built a set with `children.add(name)` in a loop - **Optimized**: Uses a set comprehension `{name for name in names if name not in _BUILT_IN_COMPONENTS}` - **Impact**: Set comprehensions execute in optimized C code and avoid repeated Python-level method calls ## Performance Characteristics Based on the annotated tests: - **Small inputs** (single components): 31-41% faster - **Medium inputs** (10-20 components): 35-47% faster - **Large-scale workloads** (100-1000 components): 42-98% faster, with the most dramatic improvements when processing many duplicates or built-in components The optimization scales particularly well with input size. Tests like `test_many_duplicate_components` (1000 instances) show 94.4% speedup, and `test_performance_very_long_source` shows 98.8% speedup, demonstrating that the reduced Python-level overhead compounds significantly at scale. ## Impact on Calling Contexts Looking at the function references in `tests/react/test_context.py`, this function is used to analyze React component hierarchies and extract child component relationships. The 67% speedup means: - **React codebase analysis** becomes substantially faster when processing files with many JSX components - **Large React projects** with hundreds of components benefit most from the scaling improvements - The function maintains identical behavior (same deduplication, sorting, and exclusion logic), so all existing call sites work unchanged --- .../languages/javascript/frameworks/react/context.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/codeflash/languages/javascript/frameworks/react/context.py b/codeflash/languages/javascript/frameworks/react/context.py index 83b927ed3..773540939 100644 --- a/codeflash/languages/javascript/frameworks/react/context.py +++ b/codeflash/languages/javascript/frameworks/react/context.py @@ -22,6 +22,8 @@ from codeflash.languages.javascript.frameworks.react.discovery import ReactComponentInfo from codeflash.languages.javascript.treesitter import TreeSitterAnalyzer +_BUILT_IN_COMPONENTS = frozenset(("React.Fragment", "Fragment", "Suspense", "React.Suspense")) + _HOOK_PATTERN = re.compile(r"\b(use[A-Z]\w*)\s*(?:<[^>]*>)?\s*\(") _JSX_COMPONENT_RE = re.compile(r"<([A-Z][a-zA-Z0-9.]*)") @@ -163,12 +165,9 @@ def _extract_hook_usages(component_source: str) -> list[HookUsage]: def _extract_child_components(component_source: str, analyzer: TreeSitterAnalyzer, full_source: str) -> list[str]: """Find child component names rendered in JSX.""" - children = set() - for match in _JSX_COMPONENT_RE.finditer(component_source): - name = match.group(1) - # Skip React built-ins like React.Fragment - if name not in ("React.Fragment", "Fragment", "Suspense", "React.Suspense"): - children.add(name) + names = _JSX_COMPONENT_RE.findall(component_source) + # Skip React built-ins like React.Fragment + children = {name for name in names if name not in _BUILT_IN_COMPONENTS} return sorted(children)