From a753b11aafa780ec7b32ff3cd759e6ddbe24b600 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Sat, 21 Feb 2026 00:19:03 +0000 Subject: [PATCH 1/2] Optimize JavaAssertTransformer._find_balanced_parens The optimized code achieves a **41% runtime improvement** by replacing character-by-character iteration with regex-based scanning to find special characters (`'`, `"`, `(`, `)`). ## Key Optimization **Original approach**: Iterates through every character in the code string (26,253 iterations in profiler), checking each one against multiple conditions. **Optimized approach**: Uses `self._special_re.search(code, pos)` to jump directly to the next special character (only 4,621 iterations in profiler), reducing iteration count by **~82%**. ## Why This Works 1. **Reduces iteration overhead**: In typical Java code, special characters are sparse. The regex engine (implemented in C) efficiently scans to the next occurrence, skipping irrelevant characters like alphanumerics, whitespace, and operators. 2. **Per-character cost reduction**: The profiler shows the original `while pos < end and depth > 0:` line alone consumed 15.6% of runtime with ~190ns per hit. The optimized version's `m = self._special_re.search(code, pos)` takes ~525ns per hit but executes 5.6x fewer times, resulting in net savings. 3. **Elimination of escape tracking**: The original tracked `prev_char` for every iteration. The optimized version checks `code[i - 1]` only when needed (at special character positions), avoiding 26,253 assignments. ## Performance Characteristics The optimization excels when processing: - **Large flat content** (many arguments): 1051% faster on 1000 comma-separated elements because it skips over all the commas and identifiers - **Long strings with few special chars**: 74.5% faster on large strings because it jumps past text content - **Mixed content**: 13.5-53% faster on realistic mixed structures Trade-off for deeply nested structures: - **Deep nesting** (500 levels): 68% slower because regex overhead dominates when every character is a paren. This is acceptable since deeply nested structures are rare in practice. The acceptance is justified by the significant runtime improvement on realistic code patterns where special characters represent a small fraction of total characters. --- codeflash/languages/java/remove_asserts.py | 24 ++++++++++------------ 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/codeflash/languages/java/remove_asserts.py b/codeflash/languages/java/remove_asserts.py index a9050c7ca..8a6811675 100644 --- a/codeflash/languages/java/remove_asserts.py +++ b/codeflash/languages/java/remove_asserts.py @@ -192,6 +192,7 @@ def __init__( # Precompile the assignment-detection regex to avoid recompiling on each call. self._assign_re = re.compile(r"(\w+(?:<[^>]+>)?)\s+(\w+)\s*=\s*$") + self._special_re = re.compile(r"""['"()]""") def transform(self, source: str) -> str: """Remove assertions from source code, preserving target function calls. @@ -804,17 +805,20 @@ def _find_balanced_parens(self, code: str, open_paren_pos: int) -> tuple[str | N string_char = None in_char = False - # Track previous character locally to avoid repeated indexing (code[pos-1]). - prev_char = code[open_paren_pos] + while depth > 0: + m = self._special_re.search(code, pos) + if m is None: + return None, -1 - while pos < end and depth > 0: - char = code[pos] + i = m.start() + char = m.group() + escaped = i > 0 and code[i - 1] == "\\" # Handle character literals - if char == "'" and not in_string and prev_char != "\\": + if char == "'" and not in_string and not escaped: in_char = not in_char # Handle string literals (double quotes) - elif char == '"' and not in_char and prev_char != "\\": + elif char == '"' and not in_char and not escaped: if not in_string: in_string = True string_char = char @@ -827,13 +831,7 @@ def _find_balanced_parens(self, code: str, open_paren_pos: int) -> tuple[str | N elif char == ")": depth -= 1 - pos += 1 - - prev_char = char - - if depth != 0: - return None, -1 - + pos = i + 1 return code[open_paren_pos + 1 : pos - 1], pos def _find_balanced_braces(self, code: str, open_brace_pos: int) -> tuple[str | None, int]: From 4dc61584bebb6c5ce7b7235ecb0113dc0d02e816 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Sat, 21 Feb 2026 00:21:57 +0000 Subject: [PATCH 2/2] style: auto-fix linting issues Co-Authored-By: Claude Opus 4.6 --- codeflash/languages/registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeflash/languages/registry.py b/codeflash/languages/registry.py index 38688cab6..e32bb5c16 100644 --- a/codeflash/languages/registry.py +++ b/codeflash/languages/registry.py @@ -56,7 +56,7 @@ def _ensure_languages_registered() -> None: from codeflash.languages.javascript import support as _ with contextlib.suppress(ImportError): - from codeflash.languages.java import support as _ + from codeflash.languages.java import support as _ # noqa: F401 _languages_registered = True