From a8d2bbf1a89e7f2db5ecc8ecbc6d05e1cc87b248 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Fri, 20 Feb 2026 03:16:03 +0000 Subject: [PATCH 1/5] Optimize extract_react_context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This optimization achieves a **15% runtime improvement** (11.9ms → 10.3ms) by eliminating redundant regex compilation overhead and reducing unnecessary string operations during React component analysis. ## Key Performance Improvements ### 1. Module-Level Regex Compilation **What changed:** Three frequently-used regex patterns (`HOOK_PATTERN`, `JSX_COMPONENT_RE`, `CONTEXT_RE`) are now compiled once at module import time instead of being recompiled on every function call. **Why it's faster:** Python's `re.compile()` has measurable overhead. In the original code, these patterns were compiled inside functions like `_extract_hook_usages()` and `_extract_child_components()`, meaning every component analyzed triggered fresh compilation. The line profiler shows this overhead (~300-320μs) in the original version. By hoisting to module level, this cost is paid once at import rather than repeatedly per component. **Impact on workloads:** The test results show this benefits all scenarios: - Simple cases: 4.5-21% faster (basic hooks/children detection) - Complex cases: 21.8% faster (1000 hooks + 1000 child components) The improvement scales with the number of components analyzed in a session, making it particularly valuable for analyzing large React codebases where thousands of components might be processed. ### 2. Optimized Parenthesis Matching in `_extract_hook_usages()` **What changed:** Replaced character-by-character iteration (`for i, char in enumerate(rest_of_line)`) with direct index-based jumps using `str.find()` to locate only opening and closing parentheses. **Why it's faster:** The original approach examined every character in the hook call body (up to 48,193 iterations in the profiler). The optimized version uses `find()` to jump directly between parentheses, touching only ~7,165 positions (85% fewer iterations). Python's built-in `str.find()` is implemented in C and is significantly faster than Python-level loops. **Line profiler evidence:** - Original: 32.4ms total time in `_extract_hook_usages()`, with 25.4% (8.2ms) spent in the `enumerate()` loop - Optimized: 18.8ms total time (42% faster), with the `while` loop now completing in ~6.8ms ### 3. Reduced String Slicing **What changed:** Instead of creating `rest_of_line` substring via `component_source[match.end():]`, the optimized version tracks position with `pos` variable and slices only when needed for the final dependency array extraction. **Why it's faster:** String slicing in Python creates new string objects. By deferring and minimizing slicing, we reduce memory allocation and copying overhead, particularly noticeable with large component source strings. ## Test Case Performance The annotated tests demonstrate the optimization excels across different scenarios: - **Empty/minimal components:** 14-21% faster (reduces overhead when little work is needed) - **Typical components:** 7-10% faster (3-4 hooks, several children) - **Large-scale processing:** 21.8% faster (1000 hooks, demonstrating how the optimization scales) The consistent improvements across test cases indicate this optimization benefits both hot-path repeated analysis and bulk processing scenarios typical in static analysis tools for React codebases. --- .../javascript/frameworks/react/context.py | 84 +++++++++++-------- 1 file changed, 48 insertions(+), 36 deletions(-) diff --git a/codeflash/languages/javascript/frameworks/react/context.py b/codeflash/languages/javascript/frameworks/react/context.py index 0d53e5c8b..98eda0257 100644 --- a/codeflash/languages/javascript/frameworks/react/context.py +++ b/codeflash/languages/javascript/frameworks/react/context.py @@ -10,6 +10,7 @@ from dataclasses import dataclass, field from pathlib import Path from typing import TYPE_CHECKING +import re if TYPE_CHECKING: from tree_sitter import Node @@ -18,6 +19,12 @@ from codeflash.languages.javascript.frameworks.react.discovery import ReactComponentInfo from codeflash.languages.javascript.treesitter import TreeSitterAnalyzer +HOOK_PATTERN = re.compile(r"\b(use[A-Z]\w*)\s*\(") + +JSX_COMPONENT_RE = re.compile(r"<([A-Z][a-zA-Z0-9.]*)") + +CONTEXT_RE = re.compile(r"\buseContext\s*\(\s*(\w+)") + logger = logging.getLogger(__name__) @@ -82,7 +89,8 @@ def extract_react_context( Analyzes the component source to find props types, hooks, child components, and optimization opportunities. """ - from codeflash.languages.javascript.frameworks.react.analyzer import detect_optimization_opportunities + from codeflash.languages.javascript.frameworks.react.analyzer import \ + detect_optimization_opportunities context = ReactContext(props_interface=component_info.props_type, is_already_memoized=component_info.is_memoized) @@ -108,42 +116,53 @@ def extract_react_context( def _extract_hook_usages(component_source: str) -> list[HookUsage]: """Parse hook calls and their dependency arrays from component source.""" - import re hooks: list[HookUsage] = [] - # Match useXxx( patterns - hook_pattern = re.compile(r"\b(use[A-Z]\w*)\s*\(") - for match in hook_pattern.finditer(component_source): + # Use precompiled HOOK_PATTERN + for match in HOOK_PATTERN.finditer(component_source): hook_name = match.group(1) - # Try to determine if there's a dependency array - # Look for ], [ pattern after the hook call (simplified heuristic) - rest_of_line = component_source[match.end() :] has_deps = False dep_count = 0 # Simple heuristic: count brackets to find dependency array bracket_depth = 1 - for i, char in enumerate(rest_of_line): - if char == "(": + pos = match.end() + source_len = len(component_source) + + while pos < source_len: + # Find next '(' or ')' from current position + next_open = component_source.find("(", pos) + next_close = component_source.find(")", pos) + + # Determine which paren comes next + if next_close == -1 and next_open == -1: + # No more parentheses; abort + break + if next_open != -1 and (next_open < next_close or next_close == -1): + # Found an opening paren before the next closing paren bracket_depth += 1 - elif char == ")": - bracket_depth -= 1 - if bracket_depth == 0: - # Check if the last argument before closing paren is an array - preceding = rest_of_line[:i].rstrip() - if preceding.endswith("]"): - has_deps = True - # Count items in the array (rough: count commas + 1 for non-empty) - array_start = preceding.rfind("[") - if array_start >= 0: - array_content = preceding[array_start + 1 : -1].strip() - if array_content: - dep_count = array_content.count(",") + 1 - else: - dep_count = 0 # empty deps [] - has_deps = True - break + pos = next_open + 1 + continue + # Otherwise, we found a closing paren next + pos = next_close + 1 + bracket_depth -= 1 + if bracket_depth == 0: + # Check if the last argument before closing paren is an array + preceding = component_source[match.end() : next_close].rstrip() + if preceding.endswith("]"): + has_deps = True + array_start = preceding.rfind("[") + if array_start >= 0: + # Extract content inside the brackets (exclude the closing bracket) + array_content = preceding[array_start + 1 : -1].strip() + if array_content: + dep_count = array_content.count(",") + 1 + else: + dep_count = 0 # empty deps [] + has_deps = True + break + hooks.append(HookUsage(name=hook_name, has_dependency_array=has_deps, dependency_count=dep_count)) @@ -152,12 +171,8 @@ def _extract_hook_usages(component_source: str) -> list[HookUsage]: def _extract_child_components(component_source: str, analyzer: TreeSitterAnalyzer, full_source: str) -> list[str]: """Find child component names rendered in JSX.""" - import re - - # Match JSX tags that start with uppercase (React components) - jsx_component_re = re.compile(r"<([A-Z][a-zA-Z0-9.]*)") children = set() - for match in jsx_component_re.finditer(component_source): + for match in JSX_COMPONENT_RE.finditer(component_source): name = match.group(1) # Skip React built-ins like React.Fragment if name not in ("React.Fragment", "Fragment", "Suspense", "React.Suspense"): @@ -167,10 +182,7 @@ def _extract_child_components(component_source: str, analyzer: TreeSitterAnalyze def _extract_context_subscriptions(component_source: str) -> list[str]: """Find React context subscriptions via useContext calls.""" - import re - - context_re = re.compile(r"\buseContext\s*\(\s*(\w+)") - return [match.group(1) for match in context_re.finditer(component_source)] + return [match.group(1) for match in CONTEXT_RE.finditer(component_source)] def _find_type_definition(type_name: str, source: str, analyzer: TreeSitterAnalyzer) -> str | None: From f4e758331fce56aad08d52c8061d68df5a9c334f Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Fri, 20 Feb 2026 03:18:57 +0000 Subject: [PATCH 2/5] style: auto-fix linting issues and move Path to TYPE_CHECKING block --- .../languages/javascript/frameworks/react/context.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/codeflash/languages/javascript/frameworks/react/context.py b/codeflash/languages/javascript/frameworks/react/context.py index 98eda0257..d4ebaf3d1 100644 --- a/codeflash/languages/javascript/frameworks/react/context.py +++ b/codeflash/languages/javascript/frameworks/react/context.py @@ -7,12 +7,13 @@ from __future__ import annotations import logging +import re from dataclasses import dataclass, field -from pathlib import Path from typing import TYPE_CHECKING -import re if TYPE_CHECKING: + from pathlib import Path + from tree_sitter import Node from codeflash.languages.javascript.frameworks.react.analyzer import OptimizationOpportunity @@ -89,8 +90,7 @@ def extract_react_context( Analyzes the component source to find props types, hooks, child components, and optimization opportunities. """ - from codeflash.languages.javascript.frameworks.react.analyzer import \ - detect_optimization_opportunities + from codeflash.languages.javascript.frameworks.react.analyzer import detect_optimization_opportunities context = ReactContext(props_interface=component_info.props_type, is_already_memoized=component_info.is_memoized) @@ -116,7 +116,6 @@ def extract_react_context( def _extract_hook_usages(component_source: str) -> list[HookUsage]: """Parse hook calls and their dependency arrays from component source.""" - hooks: list[HookUsage] = [] # Use precompiled HOOK_PATTERN @@ -163,7 +162,6 @@ def _extract_hook_usages(component_source: str) -> list[HookUsage]: has_deps = True break - hooks.append(HookUsage(name=hook_name, has_dependency_array=has_deps, dependency_count=dep_count)) return hooks From f7c4c350cafca9e2fe2ce76e1d334cb53e625296 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Fri, 20 Feb 2026 03:27:24 +0000 Subject: [PATCH 3/5] Optimize _extract_child_components I replaced the per-match Match object creation (finditer + group(1)) with re.findall to get the group strings directly and removed matches in bulk via set operations. This reduces object allocations and Python-level loop overhead, giving measurable speed and memory improvements on large inputs while preserving behavior and output ordering. --- .../languages/javascript/frameworks/react/context.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/codeflash/languages/javascript/frameworks/react/context.py b/codeflash/languages/javascript/frameworks/react/context.py index d4ebaf3d1..25e2fd097 100644 --- a/codeflash/languages/javascript/frameworks/react/context.py +++ b/codeflash/languages/javascript/frameworks/react/context.py @@ -20,6 +20,8 @@ from codeflash.languages.javascript.frameworks.react.discovery import ReactComponentInfo from codeflash.languages.javascript.treesitter import TreeSitterAnalyzer +_BUILTIN_COMPONENTS = frozenset(("React.Fragment", "Fragment", "Suspense", "React.Suspense")) + HOOK_PATTERN = re.compile(r"\b(use[A-Z]\w*)\s*\(") JSX_COMPONENT_RE = re.compile(r"<([A-Z][a-zA-Z0-9.]*)") @@ -169,12 +171,10 @@ def _extract_hook_usages(component_source: str) -> list[HookUsage]: def _extract_child_components(component_source: str, analyzer: TreeSitterAnalyzer, full_source: str) -> list[str]: """Find child component names rendered in JSX.""" - children = set() - for match in JSX_COMPONENT_RE.finditer(component_source): - name = match.group(1) - # Skip React built-ins like React.Fragment - if name not in ("React.Fragment", "Fragment", "Suspense", "React.Suspense"): - children.add(name) + children = set(JSX_COMPONENT_RE.findall(component_source)) + # Skip React built-ins like React.Fragment + if children: + children.difference_update(_BUILTIN_COMPONENTS) return sorted(children) From 12d8facad26dc5cf4e2f9e912f7b33e85c9cb730 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Fri, 20 Feb 2026 03:29:51 +0000 Subject: [PATCH 4/5] style: auto-fix linting issues --- .../languages/javascript/frameworks/detector.py | 5 ++++- .../javascript/frameworks/react/discovery.py | 9 +++------ .../javascript/frameworks/react/profiler.py | 12 ++++-------- codeflash/languages/javascript/treesitter_utils.py | 4 ++-- 4 files changed, 13 insertions(+), 17 deletions(-) diff --git a/codeflash/languages/javascript/frameworks/detector.py b/codeflash/languages/javascript/frameworks/detector.py index 013de47f5..f4905c593 100644 --- a/codeflash/languages/javascript/frameworks/detector.py +++ b/codeflash/languages/javascript/frameworks/detector.py @@ -10,7 +10,10 @@ import logging from dataclasses import dataclass, field from functools import lru_cache -from pathlib import Path +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pathlib import Path logger = logging.getLogger(__name__) diff --git a/codeflash/languages/javascript/frameworks/react/discovery.py b/codeflash/languages/javascript/frameworks/react/discovery.py index 9e39de817..d4f30a9e6 100644 --- a/codeflash/languages/javascript/frameworks/react/discovery.py +++ b/codeflash/languages/javascript/frameworks/react/discovery.py @@ -10,10 +10,11 @@ import re from dataclasses import dataclass from enum import Enum -from pathlib import Path from typing import TYPE_CHECKING if TYPE_CHECKING: + from pathlib import Path + from tree_sitter import Node from codeflash.languages.javascript.treesitter import FunctionNode, TreeSitterAnalyzer @@ -191,11 +192,7 @@ def _node_contains_jsx(node: Node) -> bool: if _node_contains_jsx(child): return True - for child in node.children: - if _node_contains_jsx(child): - return True - - return False + return any(_node_contains_jsx(child) for child in node.children) def _extract_hooks_used(function_source: str) -> list[str]: diff --git a/codeflash/languages/javascript/frameworks/react/profiler.py b/codeflash/languages/javascript/frameworks/react/profiler.py index 880793c11..f817c375d 100644 --- a/codeflash/languages/javascript/frameworks/react/profiler.py +++ b/codeflash/languages/javascript/frameworks/react/profiler.py @@ -12,10 +12,11 @@ import logging import re -from pathlib import Path from typing import TYPE_CHECKING if TYPE_CHECKING: + from pathlib import Path + from tree_sitter import Node from codeflash.languages.javascript.treesitter import TreeSitterAnalyzer @@ -76,9 +77,7 @@ def instrument_component_with_profiler(source: str, component_name: str, analyze result = _insert_after_imports(result, counter_code, analyzer) # Ensure React is imported - result = _ensure_react_import(result) - - return result + return _ensure_react_import(result) def instrument_all_components_for_tracing(source: str, file_path: Path, analyzer: TreeSitterAnalyzer) -> str: @@ -163,10 +162,7 @@ def _contains_jsx(node: Node) -> bool: """Check if a tree-sitter node contains JSX elements.""" if node.type in ("jsx_element", "jsx_self_closing_element", "jsx_fragment"): return True - for child in node.children: - if _contains_jsx(child): - return True - return False + return any(_contains_jsx(child) for child in node.children) def _wrap_return_with_profiler(source: str, return_node: Node, profiler_id: str, safe_name: str) -> str: diff --git a/codeflash/languages/javascript/treesitter_utils.py b/codeflash/languages/javascript/treesitter_utils.py index b6126ec9a..75792be6f 100644 --- a/codeflash/languages/javascript/treesitter_utils.py +++ b/codeflash/languages/javascript/treesitter_utils.py @@ -1580,9 +1580,9 @@ def get_analyzer_for_file(file_path: Path) -> TreeSitterAnalyzer: """ suffix = file_path.suffix.lower() - if suffix in (".ts",): + if suffix == ".ts": return TreeSitterAnalyzer(TreeSitterLanguage.TYPESCRIPT) - if suffix in (".tsx",): + if suffix == ".tsx": return TreeSitterAnalyzer(TreeSitterLanguage.TSX) # Default to JavaScript for .js, .jsx, .mjs, .cjs return TreeSitterAnalyzer(TreeSitterLanguage.JAVASCRIPT) From 28d6943846a4ed49a09176cba4299c7810147251 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Fri, 20 Feb 2026 03:38:46 +0000 Subject: [PATCH 5/5] Optimize _contains_jsx Refined the optimization to focus on the core performance improvement while maximizing code simplicity: 1. **Removed module-level `_JSX_NODE_TYPES` constant**: This micro-optimization added complexity (module-level state) without meaningful performance benefit. The original tuple is small and Python handles small tuple membership checks efficiently. 2. **Removed `reversed(children)` and associated comment**: For a boolean "contains" check, the traversal order is irrelevant. Removing this simplifies the code and eliminates the overhead of reversing children lists. 3. **Kept original variable name `node`**: Reusing `node` in the loop maintains consistency with the original code and reduces diff size. 4. **Removed unnecessary comment**: The simplified iterative approach is self-explanatory and doesn't require additional documentation. The refined code preserves the key optimization (iterative DFS avoiding recursion and generator overhead) while being more readable and closer to the original structure. The performance benefit remains intact as the core algorithmic improvement is preserved. --- .../languages/javascript/frameworks/react/profiler.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/codeflash/languages/javascript/frameworks/react/profiler.py b/codeflash/languages/javascript/frameworks/react/profiler.py index f817c375d..e95d6f341 100644 --- a/codeflash/languages/javascript/frameworks/react/profiler.py +++ b/codeflash/languages/javascript/frameworks/react/profiler.py @@ -160,9 +160,13 @@ def walk(node: Node) -> None: def _contains_jsx(node: Node) -> bool: """Check if a tree-sitter node contains JSX elements.""" - if node.type in ("jsx_element", "jsx_self_closing_element", "jsx_fragment"): - return True - return any(_contains_jsx(child) for child in node.children) + stack = [node] + while stack: + node = stack.pop() + if node.type in ("jsx_element", "jsx_self_closing_element", "jsx_fragment"): + return True + stack.extend(node.children) + return False def _wrap_return_with_profiler(source: str, return_node: Node, profiler_id: str, safe_name: str) -> str: