From a753b11aafa780ec7b32ff3cd759e6ddbe24b600 Mon Sep 17 00:00:00 2001
From: "codeflash-ai[bot]"
 <148906541+codeflash-ai[bot]@users.noreply.github.com>
Date: Sat, 21 Feb 2026 00:19:03 +0000
Subject: [PATCH 1/2] Optimize JavaAssertTransformer._find_balanced_parens

The optimized code achieves a **41% runtime improvement** by replacing character-by-character iteration with regex-based scanning to find special characters (`'`, `"`, `(`, `)`).

## Key Optimization

**Original approach**: Iterates through every character in the code string (26,253 iterations in profiler), checking each one against multiple conditions.

**Optimized approach**: Uses `self._special_re.search(code, pos)` to jump directly to the next special character (only 4,621 iterations in profiler), reducing iteration count by **~82%**.

## Why This Works

1. **Reduces iteration overhead**: In typical Java code, special characters are sparse. The regex engine (implemented in C) efficiently scans to the next occurrence, skipping irrelevant characters like alphanumerics, whitespace, and operators.

2. **Per-character cost reduction**: The profiler shows the original `while pos < end and depth > 0:` line alone consumed 15.6% of runtime with ~190ns per hit. The optimized version's `m = self._special_re.search(code, pos)` takes ~525ns per hit but executes 5.6x fewer times, resulting in net savings.

3. **Elimination of escape tracking**: The original tracked `prev_char` for every iteration. The optimized version checks `code[i - 1]` only when needed (at special character positions), avoiding 26,253 assignments.

## Performance Characteristics

The optimization excels when processing:
- **Large flat content** (many arguments): 1051% faster on 1000 comma-separated elements because it skips over all the commas and identifiers
- **Long strings with few special chars**: 74.5% faster on large strings because it jumps past text content
- **Mixed content**: 13.5-53% faster on realistic mixed structures

Trade-off for deeply nested structures:
- **Deep nesting** (500 levels): 68% slower because regex overhead dominates when every character is a paren. This is acceptable since deeply nested structures are rare in practice.

The acceptance is justified by the significant runtime improvement on realistic code patterns where special characters represent a small fraction of total characters.
---
 codeflash/languages/java/remove_asserts.py | 24 ++++++++++------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/codeflash/languages/java/remove_asserts.py b/codeflash/languages/java/remove_asserts.py
index a9050c7ca..8a6811675 100644
--- a/codeflash/languages/java/remove_asserts.py
+++ b/codeflash/languages/java/remove_asserts.py
@@ -192,6 +192,7 @@ def __init__(
 
         # Precompile the assignment-detection regex to avoid recompiling on each call.
         self._assign_re = re.compile(r"(\w+(?:<[^>]+>)?)\s+(\w+)\s*=\s*$")
+        self._special_re = re.compile(r"""['"()]""")
 
     def transform(self, source: str) -> str:
         """Remove assertions from source code, preserving target function calls.
@@ -804,17 +805,20 @@ def _find_balanced_parens(self, code: str, open_paren_pos: int) -> tuple[str | N
         string_char = None
         in_char = False
 
-        # Track previous character locally to avoid repeated indexing (code[pos-1]).
-        prev_char = code[open_paren_pos]
+        while depth > 0:
+            m = self._special_re.search(code, pos)
+            if m is None:
+                return None, -1
 
-        while pos < end and depth > 0:
-            char = code[pos]
+            i = m.start()
+            char = m.group()
+            escaped = i > 0 and code[i - 1] == "\\"
 
             # Handle character literals
-            if char == "'" and not in_string and prev_char != "\\":
+            if char == "'" and not in_string and not escaped:
                 in_char = not in_char
             # Handle string literals (double quotes)
-            elif char == '"' and not in_char and prev_char != "\\":
+            elif char == '"' and not in_char and not escaped:
                 if not in_string:
                     in_string = True
                     string_char = char
@@ -827,13 +831,7 @@ def _find_balanced_parens(self, code: str, open_paren_pos: int) -> tuple[str | N
                 elif char == ")":
                     depth -= 1
 
-            pos += 1
-
-            prev_char = char
-
-        if depth != 0:
-            return None, -1
-
+            pos = i + 1
         return code[open_paren_pos + 1 : pos - 1], pos
 
     def _find_balanced_braces(self, code: str, open_brace_pos: int) -> tuple[str | None, int]:

From 4dc61584bebb6c5ce7b7235ecb0113dc0d02e816 Mon Sep 17 00:00:00 2001
From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com>
Date: Sat, 21 Feb 2026 00:21:57 +0000
Subject: [PATCH 2/2] style: auto-fix linting issues

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 codeflash/languages/registry.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codeflash/languages/registry.py b/codeflash/languages/registry.py
index 38688cab6..e32bb5c16 100644
--- a/codeflash/languages/registry.py
+++ b/codeflash/languages/registry.py
@@ -56,7 +56,7 @@ def _ensure_languages_registered() -> None:
         from codeflash.languages.javascript import support as _
 
     with contextlib.suppress(ImportError):
-        from codeflash.languages.java import support as _
+        from codeflash.languages.java import support as _  # noqa: F401
 
     _languages_registered = True