From cdc2a4b464634a84ee87893f0bb00741a4b6ff2b Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Thu, 19 Feb 2026 15:00:40 +0000
Subject: [PATCH 01/30] fix: JUnit version detection for multi-module Maven
 projects

- Check dependencyManagement section in pom.xml for test dependencies
- Recursively check submodule pom.xml files (test, tests, etc.)
- Change default fallback from JUnit 5 to JUnit 4 (more common in legacy)
- Add debug logging for framework detection decisions
- Fixes Bug #7: 64% of optimizations blocked by incorrect JUnit 5 detection
---
 codeflash/languages/java/config.py           | 96 ++++++++++++++------
 codeflash/verification/verification_utils.py |  2 +-
 2 files changed, 71 insertions(+), 27 deletions(-)

diff --git a/codeflash/languages/java/config.py b/codeflash/languages/java/config.py
index 408dcecaf..1001ef040 100644
--- a/codeflash/languages/java/config.py
+++ b/codeflash/languages/java/config.py
@@ -152,16 +152,20 @@ def _detect_test_framework(project_root: Path, build_tool: BuildTool) -> tuple[s
             except Exception:
                 pass
 
-    # Determine primary framework (prefer JUnit 5)
+    # Determine primary framework (prefer JUnit 5 if explicitly found)
     if has_junit5:
+        logger.debug("Selected JUnit 5 as test framework")
         return "junit5", has_junit5, has_junit4, has_testng
     if has_junit4:
+        logger.debug("Selected JUnit 4 as test framework")
         return "junit4", has_junit5, has_junit4, has_testng
     if has_testng:
+        logger.debug("Selected TestNG as test framework")
         return "testng", has_junit5, has_junit4, has_testng
 
-    # Default to JUnit 5 if nothing detected
-    return "junit5", has_junit5, has_junit4, has_testng
+    # Default to JUnit 4 if nothing detected (more common in legacy projects)
+    logger.debug("No test framework detected, defaulting to JUnit 4")
+    return "junit4", has_junit5, has_junit4, has_testng
 
 
 def _detect_test_deps_from_pom(project_root: Path) -> tuple[bool, bool, bool]:
@@ -179,6 +183,36 @@ def _detect_test_deps_from_pom(project_root: Path) -> tuple[bool, bool, bool]:
     has_junit4 = False
     has_testng = False
 
+    def check_dependencies(deps_element, ns):
+        """Check dependencies element for test frameworks."""
+        nonlocal has_junit5, has_junit4, has_testng
+
+        if deps_element is None:
+            return
+
+        for dep_path in ["dependency", "m:dependency"]:
+            deps_list = deps_element.findall(dep_path, ns) if "m:" in dep_path else deps_element.findall(dep_path)
+            for dep in deps_list:
+                artifact_id = None
+                group_id = None
+
+                for child in dep:
+                    tag = child.tag.replace("{http://maven.apache.org/POM/4.0.0}", "")
+                    if tag == "artifactId":
+                        artifact_id = child.text
+                    elif tag == "groupId":
+                        group_id = child.text
+
+                if group_id == "org.junit.jupiter" or (artifact_id and "junit-jupiter" in artifact_id):
+                    has_junit5 = True
+                    logger.debug(f"Found JUnit 5 dependency: {group_id}:{artifact_id}")
+                elif group_id == "junit" and artifact_id == "junit":
+                    has_junit4 = True
+                    logger.debug(f"Found JUnit 4 dependency: {group_id}:{artifact_id}")
+                elif group_id == "org.testng":
+                    has_testng = True
+                    logger.debug(f"Found TestNG dependency: {group_id}:{artifact_id}")
+
     try:
         tree = ET.parse(pom_path)
         root = tree.getroot()
@@ -186,35 +220,45 @@ def _detect_test_deps_from_pom(project_root: Path) -> tuple[bool, bool, bool]:
         # Handle namespace
         ns = {"m": "http://maven.apache.org/POM/4.0.0"}
 
-        # Search for dependencies
+        logger.debug(f"Checking pom.xml at {pom_path}")
+
+        # Search for direct dependencies
         for deps_path in ["dependencies", "m:dependencies"]:
             deps = root.find(deps_path, ns) if "m:" in deps_path else root.find(deps_path)
-            if deps is None:
-                continue
-
-            for dep_path in ["dependency", "m:dependency"]:
-                deps_list = deps.findall(dep_path, ns) if "m:" in dep_path else deps.findall(dep_path)
-                for dep in deps_list:
-                    artifact_id = None
-                    group_id = None
-
-                    for child in dep:
-                        tag = child.tag.replace("{http://maven.apache.org/POM/4.0.0}", "")
-                        if tag == "artifactId":
-                            artifact_id = child.text
-                        elif tag == "groupId":
-                            group_id = child.text
-
-                    if group_id == "org.junit.jupiter" or (artifact_id and "junit-jupiter" in artifact_id):
-                        has_junit5 = True
-                    elif group_id == "junit" and artifact_id == "junit":
-                        has_junit4 = True
-                    elif group_id == "org.testng":
-                        has_testng = True
+            if deps is not None:
+                logger.debug(f"Found dependencies section in {pom_path}")
+                check_dependencies(deps, ns)
+
+        # Also check dependencyManagement section (for multi-module projects)
+        for dep_mgmt_path in ["dependencyManagement", "m:dependencyManagement"]:
+            dep_mgmt = root.find(dep_mgmt_path, ns) if "m:" in dep_mgmt_path else root.find(dep_mgmt_path)
+            if dep_mgmt is not None:
+                logger.debug(f"Found dependencyManagement section in {pom_path}")
+                for deps_path in ["dependencies", "m:dependencies"]:
+                    deps = dep_mgmt.find(deps_path, ns) if "m:" in deps_path else dep_mgmt.find(deps_path)
+                    if deps is not None:
+                        check_dependencies(deps, ns)
 
     except ET.ParseError:
+        logger.debug(f"Failed to parse pom.xml at {pom_path}")
         pass
 
+    # For multi-module projects, also check submodule pom.xml files
+    if not (has_junit5 or has_junit4 or has_testng):
+        logger.debug(f"No test deps in root pom, checking submodules")
+        # Check common submodule locations
+        for submodule_name in ["test", "tests", "src/test", "testing"]:
+            submodule_pom = project_root / submodule_name / "pom.xml"
+            if submodule_pom.exists():
+                logger.debug(f"Checking submodule pom at {submodule_pom}")
+                sub_junit5, sub_junit4, sub_testng = _detect_test_deps_from_pom(project_root / submodule_name)
+                has_junit5 = has_junit5 or sub_junit5
+                has_junit4 = has_junit4 or sub_junit4
+                has_testng = has_testng or sub_testng
+                if has_junit5 or has_junit4 or has_testng:
+                    break
+
+    logger.debug(f"Test framework detection result: junit5={has_junit5}, junit4={has_junit4}, testng={has_testng}")
     return has_junit5, has_junit4, has_testng
 
 
diff --git a/codeflash/verification/verification_utils.py b/codeflash/verification/verification_utils.py
index c6650ef99..477f36c74 100644
--- a/codeflash/verification/verification_utils.py
+++ b/codeflash/verification/verification_utils.py
@@ -232,7 +232,7 @@ def _detect_java_test_framework(self) -> str:
                 return config.test_framework
         except Exception:
             pass
-        return "junit5"  # Default fallback
+        return "junit4"  # Default fallback (JUnit 4 is more common in legacy projects)
 
     def set_language(self, language: str) -> None:
         """Set the language for this test config.

From 06382ea9b56c1ba7c6744e9a8856a3b114751bd0 Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Thu, 19 Feb 2026 15:10:00 +0000
Subject: [PATCH 02/30] fix: Add path caching for test file resolution in
 benchmarks

- Add cache dict to avoid repeated rglob calls for same test files
- Cache both positive and negative results
- Significantly reduces file system traversals during benchmark parsing
- Partially addresses Bug #2 (still need to filter irrelevant test cases)
---
 codeflash/verification/parse_test_output.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py
index e00c3a827..6b8128dbc 100644
--- a/codeflash/verification/parse_test_output.py
+++ b/codeflash/verification/parse_test_output.py
@@ -143,6 +143,10 @@ def parse_concurrency_metrics(test_results: TestResults, function_name: str) ->
     )
 
 
+# Cache for resolved test file paths to avoid repeated rglob calls
+_test_file_path_cache: dict[tuple[str, Path], Path | None] = {}
+
+
 def resolve_test_file_from_class_path(test_class_path: str, base_dir: Path) -> Path | None:
     """Resolve test file path from pytest's test class path or Java class path.
 
@@ -164,6 +168,13 @@ def resolve_test_file_from_class_path(test_class_path: str, base_dir: Path) -> P
         >>> # Should find: /path/to/tests/unittest/test_file.py
 
     """
+    # Check cache first
+    cache_key = (test_class_path, base_dir)
+    if cache_key in _test_file_path_cache:
+        cached_result = _test_file_path_cache[cache_key]
+        logger.debug(f"[RESOLVE] Cache hit for {test_class_path}: {cached_result}")
+        return cached_result
+
     # Handle Java class paths (convert dots to path and add .java extension)
     # Java class paths look like "com.example.TestClass" and should map to
     # src/test/java/com/example/TestClass.java
@@ -178,6 +189,7 @@ def resolve_test_file_from_class_path(test_class_path: str, base_dir: Path) -> P
         logger.debug(f"[RESOLVE] Attempt 1: checking {potential_path}")
         if potential_path.exists():
             logger.debug(f"[RESOLVE] Attempt 1 SUCCESS: found {potential_path}")
+            _test_file_path_cache[cache_key] = potential_path
             return potential_path
 
         # 2. Under src/test/java relative to project root
@@ -189,6 +201,7 @@ def resolve_test_file_from_class_path(test_class_path: str, base_dir: Path) -> P
             logger.debug(f"[RESOLVE] Attempt 2: checking {potential_path} (project_root={project_root})")
             if potential_path.exists():
                 logger.debug(f"[RESOLVE] Attempt 2 SUCCESS: found {potential_path}")
+                _test_file_path_cache[cache_key] = potential_path
                 return potential_path
 
         # 3. Search for the file in base_dir and its subdirectories
@@ -196,9 +209,11 @@ def resolve_test_file_from_class_path(test_class_path: str, base_dir: Path) -> P
         logger.debug(f"[RESOLVE] Attempt 3: rglob for {file_name} in {base_dir}")
         for java_file in base_dir.rglob(file_name):
             logger.debug(f"[RESOLVE] Attempt 3 SUCCESS: rglob found {java_file}")
+            _test_file_path_cache[cache_key] = java_file
             return java_file
 
         logger.warning(f"[RESOLVE] FAILED to resolve {test_class_path} in base_dir {base_dir}")
+        _test_file_path_cache[cache_key] = None  # Cache negative results too
         return None
 
     # Handle file paths (contain slashes and extensions like .js/.ts)
@@ -207,6 +222,7 @@ def resolve_test_file_from_class_path(test_class_path: str, base_dir: Path) -> P
         # Try the path as-is if it's absolute
         potential_path = Path(test_class_path)
         if potential_path.is_absolute() and potential_path.exists():
+            _test_file_path_cache[cache_key] = potential_path
             return potential_path
 
         # Try to resolve relative to base_dir's parent (project root)
@@ -216,6 +232,7 @@ def resolve_test_file_from_class_path(test_class_path: str, base_dir: Path) -> P
         try:
             potential_path = potential_path.resolve()
             if potential_path.exists():
+                _test_file_path_cache[cache_key] = potential_path
                 return potential_path
         except (OSError, RuntimeError):
             pass
@@ -225,10 +242,12 @@ def resolve_test_file_from_class_path(test_class_path: str, base_dir: Path) -> P
         try:
             potential_path = potential_path.resolve()
             if potential_path.exists():
+                _test_file_path_cache[cache_key] = potential_path
                 return potential_path
         except (OSError, RuntimeError):
             pass
 
+        _test_file_path_cache[cache_key] = None  # Cache negative results
         return None
 
     # First try the full path (Python module path)
@@ -259,6 +278,8 @@ def resolve_test_file_from_class_path(test_class_path: str, base_dir: Path) -> P
                 if test_file_path:
                     break
 
+    # Cache the result (could be None)
+    _test_file_path_cache[cache_key] = test_file_path
     return test_file_path
 
 

From c7b4534f9ffb40bdbe5e7f3a85acd05c5516039d Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Thu, 19 Feb 2026 15:15:55 +0000
Subject: [PATCH 03/30] fix: Handle complex expressions in Java test
 instrumentation

- Add detection for cast expressions, ternary, array access, etc.
- Skip instrumentation when method call is inside complex expression
- Prevents syntax errors when instrumenting tests with casts like (Long)list.get(2)
- Addresses Bug #6: instrumentation breaking complex Java expressions
---
 codeflash/languages/java/instrumentation.py | 43 +++++++++++++++++++--
 1 file changed, 39 insertions(+), 4 deletions(-)

diff --git a/codeflash/languages/java/instrumentation.py b/codeflash/languages/java/instrumentation.py
index 6f2725b9b..ab0e94a4f 100644
--- a/codeflash/languages/java/instrumentation.py
+++ b/codeflash/languages/java/instrumentation.py
@@ -76,6 +76,35 @@ def _is_inside_lambda(node) -> bool:
     return False
 
 
+def _is_inside_complex_expression(node) -> bool:
+    """Check if a tree-sitter node is inside a complex expression that shouldn't be instrumented directly.
+
+    This includes:
+    - Cast expressions: (Long)list.get(2)
+    - Ternary expressions: condition ? func() : other
+    - Array access: arr[func()]
+    - Binary operations: func() + 1
+
+    Returns True if the node should not be directly instrumented.
+    """
+    current = node.parent
+    while current is not None:
+        # Stop at statement boundaries
+        if current.type in {"method_declaration", "block", "if_statement", "for_statement",
+                          "while_statement", "try_statement", "expression_statement"}:
+            return False
+
+        # These are complex expressions that shouldn't have instrumentation inserted in the middle
+        if current.type in {"cast_expression", "ternary_expression", "array_access",
+                          "binary_expression", "unary_expression", "parenthesized_expression",
+                          "instanceof_expression"}:
+            logger.debug(f"Found complex expression parent: {current.type}")
+            return True
+
+        current = current.parent
+    return False
+
+
 _TS_BODY_PREFIX = "class _D { void _m() {\n"
 _TS_BODY_SUFFIX = "\n}}"
 _TS_BODY_PREFIX_BYTES = _TS_BODY_PREFIX.encode("utf8")
@@ -116,10 +145,11 @@ def wrap_target_calls_with_treesitter(
         line_byte_starts.append(offset)
         offset += len(line.encode("utf8")) + 1  # +1 for \n from join
 
-    # Group non-lambda calls by their line index
+    # Group non-lambda and non-complex-expression calls by their line index
     calls_by_line: dict[int, list] = {}
     for call in calls:
-        if call["in_lambda"]:
+        if call["in_lambda"] or call.get("in_complex", False):
+            logger.debug(f"Skipping behavior instrumentation for call in lambda or complex expression")
             continue
         line_idx = _byte_to_line_index(call["start_byte"], line_byte_starts)
         calls_by_line.setdefault(line_idx, []).append(call)
@@ -225,6 +255,7 @@ def _collect_calls(node, wrapper_bytes, body_bytes, prefix_len, func_name, analy
                         "full_call": analyzer.get_node_text(node, wrapper_bytes),
                         "parent_type": parent_type,
                         "in_lambda": _is_inside_lambda(node),
+                        "in_complex": _is_inside_complex_expression(node),
                         "es_start_byte": es_start,
                         "es_end_byte": es_end,
                     }
@@ -666,8 +697,12 @@ def collect_test_methods(node, out) -> None:
     def collect_target_calls(node, wrapper_bytes: bytes, func: str, out) -> None:
         if node.type == "method_invocation":
             name_node = node.child_by_field_name("name")
-            if name_node and analyzer.get_node_text(name_node, wrapper_bytes) == func and not _is_inside_lambda(node):
-                out.append(node)
+            if name_node and analyzer.get_node_text(name_node, wrapper_bytes) == func:
+                # Skip if inside lambda or complex expression
+                if not _is_inside_lambda(node) and not _is_inside_complex_expression(node):
+                    out.append(node)
+                else:
+                    logger.debug(f"Skipping instrumentation of {func} inside lambda or complex expression")
         for child in node.children:
             collect_target_calls(child, wrapper_bytes, func, out)
 

From 90afeda9466de9b26419e8d63a3e948d195b172b Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Thu, 19 Feb 2026 15:22:01 +0000
Subject: [PATCH 04/30] fix: Direct JVM execution for multi-module Maven
 projects

- Detect JUnit 4 vs JUnit 5 and use appropriate runner (JUnitCore vs ConsoleLauncher)
- Include all module target/classes in classpath for multi-module projects
- Add stderr logging for debugging when direct execution fails
- Fixes Bug #3: Direct JVM now works, avoiding slow Maven fallback (~0.3s vs ~5-10s)
---
 codeflash/languages/java/test_runner.py | 139 +++++++++++++++++-------
 1 file changed, 102 insertions(+), 37 deletions(-)

diff --git a/codeflash/languages/java/test_runner.py b/codeflash/languages/java/test_runner.py
index 5ca2f2f8f..bd761018a 100644
--- a/codeflash/languages/java/test_runner.py
+++ b/codeflash/languages/java/test_runner.py
@@ -562,6 +562,17 @@ def _get_test_classpath(
         if main_classes.exists():
             cp_parts.append(str(main_classes))
 
+        # For multi-module projects, also include target/classes from all modules
+        # This is needed because the test module may depend on other modules
+        if test_module:
+            # Find all target/classes directories in sibling modules
+            for module_dir in project_root.iterdir():
+                if module_dir.is_dir() and module_dir.name != test_module:
+                    module_classes = module_dir / "target" / "classes"
+                    if module_classes.exists():
+                        logger.debug(f"Adding multi-module classpath: {module_classes}")
+                        cp_parts.append(str(module_classes))
+
         return os.pathsep.join(cp_parts)
 
     except subprocess.TimeoutExpired:
@@ -605,49 +616,99 @@ def _run_tests_direct(
 
     java = _find_java_executable() or "java"
 
-    # Build command using JUnit Platform Console Launcher
-    # The launcher is included in junit-platform-console-standalone or junit-jupiter
-    cmd = [
+    # Try to detect if JUnit 4 is being used (check for JUnit 4 runner in classpath)
+    # If JUnit 4, use JUnitCore directly instead of ConsoleLauncher
+    is_junit4 = False
+    # Check if org.junit.runner.JUnitCore is in classpath (JUnit 4)
+    # and org.junit.platform.console.ConsoleLauncher is not (JUnit 5)
+    check_junit4_cmd = [
         str(java),
-        # Java 16+ module system: Kryo needs reflective access to internal JDK classes
-        "--add-opens",
-        "java.base/java.util=ALL-UNNAMED",
-        "--add-opens",
-        "java.base/java.lang=ALL-UNNAMED",
-        "--add-opens",
-        "java.base/java.lang.reflect=ALL-UNNAMED",
-        "--add-opens",
-        "java.base/java.io=ALL-UNNAMED",
-        "--add-opens",
-        "java.base/java.math=ALL-UNNAMED",
-        "--add-opens",
-        "java.base/java.net=ALL-UNNAMED",
-        "--add-opens",
-        "java.base/java.util.zip=ALL-UNNAMED",
         "-cp",
         classpath,
-        "org.junit.platform.console.ConsoleLauncher",
-        "--disable-banner",
-        "--disable-ansi-colors",
-        # Use 'none' details to avoid duplicate output
-        # Timing markers are captured in XML via stdout capture config
-        "--details=none",
-        # Enable stdout/stderr capture in XML reports
-        # This ensures timing markers are included in the XML system-out element
-        "--config=junit.platform.output.capture.stdout=true",
-        "--config=junit.platform.output.capture.stderr=true",
+        "org.junit.runner.JUnitCore",
+        "-version"
     ]
+    try:
+        result = subprocess.run(check_junit4_cmd, capture_output=True, text=True, timeout=2)
+        # JUnit 4's JUnitCore will show version, JUnit 5 won't have this class
+        if "JUnit version" in result.stdout or result.returncode == 0:
+            is_junit4 = True
+            logger.debug("Detected JUnit 4, using JUnitCore for direct execution")
+    except (subprocess.TimeoutExpired, Exception):
+        pass
 
-    # Add reports directory if specified (for XML output)
-    if reports_dir:
-        reports_dir.mkdir(parents=True, exist_ok=True)
-        cmd.extend(["--reports-dir", str(reports_dir)])
-
-    # Add test classes to select
-    for test_class in test_classes:
-        cmd.extend(["--select-class", test_class])
+    if is_junit4:
+        # Use JUnit 4's JUnitCore runner
+        cmd = [
+            str(java),
+            # Java 16+ module system: Kryo needs reflective access to internal JDK classes
+            "--add-opens",
+            "java.base/java.util=ALL-UNNAMED",
+            "--add-opens",
+            "java.base/java.lang=ALL-UNNAMED",
+            "--add-opens",
+            "java.base/java.lang.reflect=ALL-UNNAMED",
+            "--add-opens",
+            "java.base/java.io=ALL-UNNAMED",
+            "--add-opens",
+            "java.base/java.math=ALL-UNNAMED",
+            "--add-opens",
+            "java.base/java.net=ALL-UNNAMED",
+            "--add-opens",
+            "java.base/java.util.zip=ALL-UNNAMED",
+            "-cp",
+            classpath,
+            "org.junit.runner.JUnitCore",
+        ]
+        # Add test classes
+        cmd.extend(test_classes)
+    else:
+        # Build command using JUnit Platform Console Launcher (JUnit 5)
+        # The launcher is included in junit-platform-console-standalone or junit-jupiter
+        cmd = [
+            str(java),
+            # Java 16+ module system: Kryo needs reflective access to internal JDK classes
+            "--add-opens",
+            "java.base/java.util=ALL-UNNAMED",
+            "--add-opens",
+            "java.base/java.lang=ALL-UNNAMED",
+            "--add-opens",
+            "java.base/java.lang.reflect=ALL-UNNAMED",
+            "--add-opens",
+            "java.base/java.io=ALL-UNNAMED",
+            "--add-opens",
+            "java.base/java.math=ALL-UNNAMED",
+            "--add-opens",
+            "java.base/java.net=ALL-UNNAMED",
+            "--add-opens",
+            "java.base/java.util.zip=ALL-UNNAMED",
+            "-cp",
+            classpath,
+            "org.junit.platform.console.ConsoleLauncher",
+            "--disable-banner",
+            "--disable-ansi-colors",
+            # Use 'none' details to avoid duplicate output
+            # Timing markers are captured in XML via stdout capture config
+            "--details=none",
+            # Enable stdout/stderr capture in XML reports
+            # This ensures timing markers are included in the XML system-out element
+            "--config=junit.platform.output.capture.stdout=true",
+            "--config=junit.platform.output.capture.stderr=true",
+        ]
+
+        # Add reports directory if specified (for XML output)
+        if reports_dir:
+            reports_dir.mkdir(parents=True, exist_ok=True)
+            cmd.extend(["--reports-dir", str(reports_dir)])
+
+        # Add test classes to select
+        for test_class in test_classes:
+            cmd.extend(["--select-class", test_class])
 
-    logger.debug("Running tests directly: java -cp ... ConsoleLauncher --select-class %s", test_classes)
+    if is_junit4:
+        logger.debug("Running tests directly: java -cp ... JUnitCore %s", test_classes)
+    else:
+        logger.debug("Running tests directly: java -cp ... ConsoleLauncher --select-class %s", test_classes)
 
     try:
         return subprocess.run(
@@ -982,6 +1043,10 @@ def run_benchmarking_tests(
 
         logger.debug("Loop %d completed in %.2fs (returncode=%d)", loop_idx, loop_time, result.returncode)
 
+        # Log stderr if direct JVM execution failed (for debugging)
+        if result.returncode != 0 and result.stderr:
+            logger.debug("Direct JVM stderr: %s", result.stderr[:500])
+
         # Check if direct JVM execution failed on the first loop.
         # Fall back to Maven-based execution for:
         # - JUnit 4 projects (ConsoleLauncher not on classpath or no tests discovered)

From 54e0b38847d615ff6ede37fc6d270ebb58b3e508 Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Thu, 19 Feb 2026 17:47:08 +0000
Subject: [PATCH 05/30] fix: set perf_stdout for Java performance tests to fix
 throughput calculation

Bug #10: Timing marker sum was 0 because perf_stdout was never set for Java tests.
The timing markers were being parsed correctly but the raw stdout containing them
was not stored in TestResults.perf_stdout, causing calculate_function_throughput_from_test_results
to return 0 and skip all optimizations.

This fix ensures the subprocess stdout is preserved in perf_stdout field for Java
performance tests, allowing throughput calculation to work correctly.
---
 codeflash/verification/parse_test_output.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py
index 6b8128dbc..bf2ddb060 100644
--- a/codeflash/verification/parse_test_output.py
+++ b/codeflash/verification/parse_test_output.py
@@ -1249,6 +1249,19 @@ def parse_test_results(
 
     results = merge_test_results(test_results_xml, test_results_data, test_config.test_framework)
 
+    # Bug #10 Fix: For Java performance tests, preserve subprocess stdout containing timing markers
+    # This is needed for calculate_function_throughput_from_test_results to work correctly
+    if is_java() and testing_type == TestingMode.PERFORMANCE and run_result is not None:
+        try:
+            # Extract stdout from subprocess result containing timing markers
+            if isinstance(run_result.stdout, bytes):
+                results.perf_stdout = run_result.stdout.decode('utf-8', errors='replace')
+            elif isinstance(run_result.stdout, str):
+                results.perf_stdout = run_result.stdout
+            logger.debug(f"Bug #10 Fix: Set perf_stdout for Java performance tests ({len(results.perf_stdout or '')} chars)")
+        except Exception as e:
+            logger.debug(f"Bug #10 Fix: Failed to set perf_stdout: {e}")
+
     all_args = False
     coverage = None
     if coverage_database_file and source_file and code_context and function_name:

From 0001fb59219df4d5dc7b9e27e585dd49cad10dda Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Fri, 20 Feb 2026 04:28:20 +0000
Subject: [PATCH 06/30] fix: store actual test method name in SQLite for Java
 behavior tests

The instrumented Java test code was storing "{class_name}Test" as the
test_function_name in SQLite instead of the actual test method name
(e.g., "testAdd"). This fixes parity with Python instrumentation.

- Add _extract_test_method_name() with compiled regex patterns
- Inject _cf_test variable with actual method name in behavior code
- Fix setString(3, ...) to use _cf_test instead of hardcoded class name
- Optimize _byte_to_line_index() with bisect.bisect_right()
- Update all behavior mode test expectations

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 codeflash/languages/java/instrumentation.py   | 49 ++++++++++++++++---
 .../test_java/test_instrumentation.py         | 24 ++++++---
 2 files changed, 58 insertions(+), 15 deletions(-)

diff --git a/codeflash/languages/java/instrumentation.py b/codeflash/languages/java/instrumentation.py
index ab0e94a4f..dc31d89e3 100644
--- a/codeflash/languages/java/instrumentation.py
+++ b/codeflash/languages/java/instrumentation.py
@@ -42,6 +42,24 @@ def _get_function_name(func: Any) -> str:
     raise AttributeError(msg)
 
 
+_METHOD_SIG_PATTERN = re.compile(
+    r"\b(?:public|private|protected)?\s*(?:static)?\s*(?:final)?\s*"
+    r"(?:void|String|int|long|boolean|double|float|char|byte|short|\w+(?:\[\])?)\s+(\w+)\s*\("
+)
+_FALLBACK_METHOD_PATTERN = re.compile(r"\b(\w+)\s*\(")
+
+
+def _extract_test_method_name(method_lines: list[str]) -> str:
+    method_sig = " ".join(method_lines).strip()
+    match = _METHOD_SIG_PATTERN.search(method_sig)
+    if match:
+        return match.group(1)
+    fallback_match = _FALLBACK_METHOD_PATTERN.search(method_sig)
+    if fallback_match:
+        return fallback_match.group(1)
+    return "unknown"
+
+
 # Pattern to detect primitive array types in assertions
 _PRIMITIVE_ARRAY_PATTERN = re.compile(r"new\s+(int|long|double|float|short|byte|char|boolean)\s*\[\s*\]")
 
@@ -90,14 +108,27 @@ def _is_inside_complex_expression(node) -> bool:
     current = node.parent
     while current is not None:
         # Stop at statement boundaries
-        if current.type in {"method_declaration", "block", "if_statement", "for_statement",
-                          "while_statement", "try_statement", "expression_statement"}:
+        if current.type in {
+            "method_declaration",
+            "block",
+            "if_statement",
+            "for_statement",
+            "while_statement",
+            "try_statement",
+            "expression_statement",
+        }:
             return False
 
         # These are complex expressions that shouldn't have instrumentation inserted in the middle
-        if current.type in {"cast_expression", "ternary_expression", "array_access",
-                          "binary_expression", "unary_expression", "parenthesized_expression",
-                          "instanceof_expression"}:
+        if current.type in {
+            "cast_expression",
+            "ternary_expression",
+            "array_access",
+            "binary_expression",
+            "unary_expression",
+            "parenthesized_expression",
+            "instanceof_expression",
+        }:
             logger.debug(f"Found complex expression parent: {current.type}")
             return True
 
@@ -149,7 +180,7 @@ def wrap_target_calls_with_treesitter(
     calls_by_line: dict[int, list] = {}
     for call in calls:
         if call["in_lambda"] or call.get("in_complex", False):
-            logger.debug(f"Skipping behavior instrumentation for call in lambda or complex expression")
+            logger.debug("Skipping behavior instrumentation for call in lambda or complex expression")
             continue
         line_idx = _byte_to_line_index(call["start_byte"], line_byte_starts)
         calls_by_line.setdefault(line_idx, []).append(call)
@@ -528,6 +559,9 @@ def _add_behavior_instrumentation(source: str, class_name: str, func_name: str)
                 result.append(ml)
             i += 1
 
+            # Extract the test method name from the method signature
+            test_method_name = _extract_test_method_name(method_lines)
+
             # We're now inside the method body
             iteration_counter += 1
             iter_id = iteration_counter
@@ -573,6 +607,7 @@ def _add_behavior_instrumentation(source: str, class_name: str, func_name: str)
                 f'{indent}String _cf_outputFile{iter_id} = System.getenv("CODEFLASH_OUTPUT_FILE");',
                 f'{indent}String _cf_testIteration{iter_id} = System.getenv("CODEFLASH_TEST_ITERATION");',
                 f'{indent}if (_cf_testIteration{iter_id} == null) _cf_testIteration{iter_id} = "0";',
+                f'{indent}String _cf_test{iter_id} = "{test_method_name}";',
                 f'{indent}System.out.println("!$######" + _cf_mod{iter_id} + ":" + _cf_cls{iter_id} + ":" + _cf_fn{iter_id} + ":" + _cf_loop{iter_id} + ":" + _cf_iter{iter_id} + "######$!");',
                 f"{indent}byte[] _cf_serializedResult{iter_id} = null;",
                 f"{indent}long _cf_end{iter_id} = -1;",
@@ -610,7 +645,7 @@ def _add_behavior_instrumentation(source: str, class_name: str, func_name: str)
                 f"{indent}                try (PreparedStatement _cf_pstmt{iter_id} = _cf_conn{iter_id}.prepareStatement(_cf_sql{iter_id})) {{",
                 f"{indent}                    _cf_pstmt{iter_id}.setString(1, _cf_mod{iter_id});",
                 f"{indent}                    _cf_pstmt{iter_id}.setString(2, _cf_cls{iter_id});",
-                f'{indent}                    _cf_pstmt{iter_id}.setString(3, "{class_name}Test");',
+                f"{indent}                    _cf_pstmt{iter_id}.setString(3, _cf_test{iter_id});",
                 f"{indent}                    _cf_pstmt{iter_id}.setString(4, _cf_fn{iter_id});",
                 f"{indent}                    _cf_pstmt{iter_id}.setInt(5, _cf_loop{iter_id});",
                 f'{indent}                    _cf_pstmt{iter_id}.setString(6, _cf_iter{iter_id} + "_" + _cf_testIteration{iter_id});',
diff --git a/tests/test_languages/test_java/test_instrumentation.py b/tests/test_languages/test_java/test_instrumentation.py
index a5452f094..64f161e73 100644
--- a/tests/test_languages/test_java/test_instrumentation.py
+++ b/tests/test_languages/test_java/test_instrumentation.py
@@ -145,6 +145,7 @@ def test_instrument_behavior_mode_simple(self, tmp_path: Path):
         String _cf_outputFile1 = System.getenv("CODEFLASH_OUTPUT_FILE");
         String _cf_testIteration1 = System.getenv("CODEFLASH_TEST_ITERATION");
         if (_cf_testIteration1 == null) _cf_testIteration1 = "0";
+        String _cf_test1 = "testAdd";
         System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + "######$!");
         byte[] _cf_serializedResult1 = null;
         long _cf_end1 = -1;
@@ -175,7 +176,7 @@ def test_instrument_behavior_mode_simple(self, tmp_path: Path):
                         try (PreparedStatement _cf_pstmt1 = _cf_conn1.prepareStatement(_cf_sql1)) {
                             _cf_pstmt1.setString(1, _cf_mod1);
                             _cf_pstmt1.setString(2, _cf_cls1);
-                            _cf_pstmt1.setString(3, "CalculatorTestTest");
+                            _cf_pstmt1.setString(3, _cf_test1);
                             _cf_pstmt1.setString(4, _cf_fn1);
                             _cf_pstmt1.setInt(5, _cf_loop1);
                             _cf_pstmt1.setString(6, _cf_iter1 + "_" + _cf_testIteration1);
@@ -256,6 +257,7 @@ def test_instrument_behavior_mode_assert_throws_expression_lambda(self, tmp_path
         String _cf_outputFile1 = System.getenv("CODEFLASH_OUTPUT_FILE");
         String _cf_testIteration1 = System.getenv("CODEFLASH_TEST_ITERATION");
         if (_cf_testIteration1 == null) _cf_testIteration1 = "0";
+        String _cf_test1 = "testNegativeInput_ThrowsIllegalArgumentException";
         System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + "######$!");
         byte[] _cf_serializedResult1 = null;
         long _cf_end1 = -1;
@@ -281,7 +283,7 @@ def test_instrument_behavior_mode_assert_throws_expression_lambda(self, tmp_path
                         try (PreparedStatement _cf_pstmt1 = _cf_conn1.prepareStatement(_cf_sql1)) {
                             _cf_pstmt1.setString(1, _cf_mod1);
                             _cf_pstmt1.setString(2, _cf_cls1);
-                            _cf_pstmt1.setString(3, "FibonacciTestTest");
+                            _cf_pstmt1.setString(3, _cf_test1);
                             _cf_pstmt1.setString(4, _cf_fn1);
                             _cf_pstmt1.setInt(5, _cf_loop1);
                             _cf_pstmt1.setString(6, _cf_iter1 + "_" + _cf_testIteration1);
@@ -309,6 +311,7 @@ def test_instrument_behavior_mode_assert_throws_expression_lambda(self, tmp_path
         String _cf_outputFile2 = System.getenv("CODEFLASH_OUTPUT_FILE");
         String _cf_testIteration2 = System.getenv("CODEFLASH_TEST_ITERATION");
         if (_cf_testIteration2 == null) _cf_testIteration2 = "0";
+        String _cf_test2 = "testZeroInput_ReturnsZero";
         System.out.println("!$######" + _cf_mod2 + ":" + _cf_cls2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + _cf_iter2 + "######$!");
         byte[] _cf_serializedResult2 = null;
         long _cf_end2 = -1;
@@ -338,7 +341,7 @@ def test_instrument_behavior_mode_assert_throws_expression_lambda(self, tmp_path
                         try (PreparedStatement _cf_pstmt2 = _cf_conn2.prepareStatement(_cf_sql2)) {
                             _cf_pstmt2.setString(1, _cf_mod2);
                             _cf_pstmt2.setString(2, _cf_cls2);
-                            _cf_pstmt2.setString(3, "FibonacciTestTest");
+                            _cf_pstmt2.setString(3, _cf_test2);
                             _cf_pstmt2.setString(4, _cf_fn2);
                             _cf_pstmt2.setInt(5, _cf_loop2);
                             _cf_pstmt2.setString(6, _cf_iter2 + "_" + _cf_testIteration2);
@@ -420,6 +423,7 @@ def test_instrument_behavior_mode_assert_throws_block_lambda(self, tmp_path: Pat
         String _cf_outputFile1 = System.getenv("CODEFLASH_OUTPUT_FILE");
         String _cf_testIteration1 = System.getenv("CODEFLASH_TEST_ITERATION");
         if (_cf_testIteration1 == null) _cf_testIteration1 = "0";
+        String _cf_test1 = "testNegativeInput_ThrowsIllegalArgumentException";
         System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + "######$!");
         byte[] _cf_serializedResult1 = null;
         long _cf_end1 = -1;
@@ -447,7 +451,7 @@ def test_instrument_behavior_mode_assert_throws_block_lambda(self, tmp_path: Pat
                         try (PreparedStatement _cf_pstmt1 = _cf_conn1.prepareStatement(_cf_sql1)) {
                             _cf_pstmt1.setString(1, _cf_mod1);
                             _cf_pstmt1.setString(2, _cf_cls1);
-                            _cf_pstmt1.setString(3, "FibonacciTestTest");
+                            _cf_pstmt1.setString(3, _cf_test1);
                             _cf_pstmt1.setString(4, _cf_fn1);
                             _cf_pstmt1.setInt(5, _cf_loop1);
                             _cf_pstmt1.setString(6, _cf_iter1 + "_" + _cf_testIteration1);
@@ -475,6 +479,7 @@ def test_instrument_behavior_mode_assert_throws_block_lambda(self, tmp_path: Pat
         String _cf_outputFile2 = System.getenv("CODEFLASH_OUTPUT_FILE");
         String _cf_testIteration2 = System.getenv("CODEFLASH_TEST_ITERATION");
         if (_cf_testIteration2 == null) _cf_testIteration2 = "0";
+        String _cf_test2 = "testZeroInput_ReturnsZero";
         System.out.println("!$######" + _cf_mod2 + ":" + _cf_cls2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + _cf_iter2 + "######$!");
         byte[] _cf_serializedResult2 = null;
         long _cf_end2 = -1;
@@ -504,7 +509,7 @@ def test_instrument_behavior_mode_assert_throws_block_lambda(self, tmp_path: Pat
                         try (PreparedStatement _cf_pstmt2 = _cf_conn2.prepareStatement(_cf_sql2)) {
                             _cf_pstmt2.setString(1, _cf_mod2);
                             _cf_pstmt2.setString(2, _cf_cls2);
-                            _cf_pstmt2.setString(3, "FibonacciTestTest");
+                            _cf_pstmt2.setString(3, _cf_test2);
                             _cf_pstmt2.setString(4, _cf_fn2);
                             _cf_pstmt2.setInt(5, _cf_loop2);
                             _cf_pstmt2.setString(6, _cf_iter2 + "_" + _cf_testIteration2);
@@ -816,6 +821,7 @@ class TestKryoSerializerUsage:
         String _cf_outputFile1 = System.getenv("CODEFLASH_OUTPUT_FILE");
         String _cf_testIteration1 = System.getenv("CODEFLASH_TEST_ITERATION");
         if (_cf_testIteration1 == null) _cf_testIteration1 = "0";
+        String _cf_test1 = "testFoo";
         System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + "######$!");
         byte[] _cf_serializedResult1 = null;
         long _cf_end1 = -1;
@@ -844,7 +850,7 @@ class TestKryoSerializerUsage:
                         try (PreparedStatement _cf_pstmt1 = _cf_conn1.prepareStatement(_cf_sql1)) {
                             _cf_pstmt1.setString(1, _cf_mod1);
                             _cf_pstmt1.setString(2, _cf_cls1);
-                            _cf_pstmt1.setString(3, "MyTestTest");
+                            _cf_pstmt1.setString(3, _cf_test1);
                             _cf_pstmt1.setString(4, _cf_fn1);
                             _cf_pstmt1.setInt(5, _cf_loop1);
                             _cf_pstmt1.setString(6, _cf_iter1 + "_" + _cf_testIteration1);
@@ -1317,6 +1323,7 @@ def test_instrument_generated_test_behavior_mode(self):
         String _cf_outputFile1 = System.getenv("CODEFLASH_OUTPUT_FILE");
         String _cf_testIteration1 = System.getenv("CODEFLASH_TEST_ITERATION");
         if (_cf_testIteration1 == null) _cf_testIteration1 = "0";
+        String _cf_test1 = "testAdd";
         System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + "######$!");
         byte[] _cf_serializedResult1 = null;
         long _cf_end1 = -1;
@@ -1346,7 +1353,7 @@ def test_instrument_generated_test_behavior_mode(self):
                         try (PreparedStatement _cf_pstmt1 = _cf_conn1.prepareStatement(_cf_sql1)) {
                             _cf_pstmt1.setString(1, _cf_mod1);
                             _cf_pstmt1.setString(2, _cf_cls1);
-                            _cf_pstmt1.setString(3, "CalculatorTestTest");
+                            _cf_pstmt1.setString(3, _cf_test1);
                             _cf_pstmt1.setString(4, _cf_fn1);
                             _cf_pstmt1.setInt(5, _cf_loop1);
                             _cf_pstmt1.setString(6, _cf_iter1 + "_" + _cf_testIteration1);
@@ -2522,6 +2529,7 @@ def test_behavior_mode_writes_to_sqlite(self, java_project):
         String _cf_outputFile1 = System.getenv("CODEFLASH_OUTPUT_FILE");
         String _cf_testIteration1 = System.getenv("CODEFLASH_TEST_ITERATION");
         if (_cf_testIteration1 == null) _cf_testIteration1 = "0";
+        String _cf_test1 = "testIncrement";
         System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + "######$!");
         byte[] _cf_serializedResult1 = null;
         long _cf_end1 = -1;
@@ -2552,7 +2560,7 @@ def test_behavior_mode_writes_to_sqlite(self, java_project):
                         try (PreparedStatement _cf_pstmt1 = _cf_conn1.prepareStatement(_cf_sql1)) {
                             _cf_pstmt1.setString(1, _cf_mod1);
                             _cf_pstmt1.setString(2, _cf_cls1);
-                            _cf_pstmt1.setString(3, "CounterTestTest");
+                            _cf_pstmt1.setString(3, _cf_test1);
                             _cf_pstmt1.setString(4, _cf_fn1);
                             _cf_pstmt1.setInt(5, _cf_loop1);
                             _cf_pstmt1.setString(6, _cf_iter1 + "_" + _cf_testIteration1);

From 6220ace975a521e17c28a65298a1965644ed4c20 Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Fri, 20 Feb 2026 04:28:25 +0000
Subject: [PATCH 07/30] chore: auto-format lint fixes from pre-commit

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 codeflash/languages/java/config.py          | 3 +--
 codeflash/languages/java/test_runner.py     | 8 +-------
 codeflash/verification/parse_test_output.py | 6 ++++--
 3 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/codeflash/languages/java/config.py b/codeflash/languages/java/config.py
index 1001ef040..748298bc9 100644
--- a/codeflash/languages/java/config.py
+++ b/codeflash/languages/java/config.py
@@ -241,11 +241,10 @@ def check_dependencies(deps_element, ns):
 
     except ET.ParseError:
         logger.debug(f"Failed to parse pom.xml at {pom_path}")
-        pass
 
     # For multi-module projects, also check submodule pom.xml files
     if not (has_junit5 or has_junit4 or has_testng):
-        logger.debug(f"No test deps in root pom, checking submodules")
+        logger.debug("No test deps in root pom, checking submodules")
         # Check common submodule locations
         for submodule_name in ["test", "tests", "src/test", "testing"]:
             submodule_pom = project_root / submodule_name / "pom.xml"
diff --git a/codeflash/languages/java/test_runner.py b/codeflash/languages/java/test_runner.py
index bd761018a..d326d38c2 100644
--- a/codeflash/languages/java/test_runner.py
+++ b/codeflash/languages/java/test_runner.py
@@ -621,13 +621,7 @@ def _run_tests_direct(
     is_junit4 = False
     # Check if org.junit.runner.JUnitCore is in classpath (JUnit 4)
     # and org.junit.platform.console.ConsoleLauncher is not (JUnit 5)
-    check_junit4_cmd = [
-        str(java),
-        "-cp",
-        classpath,
-        "org.junit.runner.JUnitCore",
-        "-version"
-    ]
+    check_junit4_cmd = [str(java), "-cp", classpath, "org.junit.runner.JUnitCore", "-version"]
     try:
         result = subprocess.run(check_junit4_cmd, capture_output=True, text=True, timeout=2)
         # JUnit 4's JUnitCore will show version, JUnit 5 won't have this class
diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py
index bf2ddb060..a662cd2e6 100644
--- a/codeflash/verification/parse_test_output.py
+++ b/codeflash/verification/parse_test_output.py
@@ -1255,10 +1255,12 @@ def parse_test_results(
         try:
             # Extract stdout from subprocess result containing timing markers
             if isinstance(run_result.stdout, bytes):
-                results.perf_stdout = run_result.stdout.decode('utf-8', errors='replace')
+                results.perf_stdout = run_result.stdout.decode("utf-8", errors="replace")
             elif isinstance(run_result.stdout, str):
                 results.perf_stdout = run_result.stdout
-            logger.debug(f"Bug #10 Fix: Set perf_stdout for Java performance tests ({len(results.perf_stdout or '')} chars)")
+            logger.debug(
+                f"Bug #10 Fix: Set perf_stdout for Java performance tests ({len(results.perf_stdout or '')} chars)"
+            )
         except Exception as e:
             logger.debug(f"Bug #10 Fix: Failed to set perf_stdout: {e}")
 

From 6113bacbad6c63033a90720d035862e7f16e2a2a Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Fri, 20 Feb 2026 04:30:33 +0000
Subject: [PATCH 08/30] fix: add JUnit Console Standalone to classpath for
 direct JVM execution

Direct JVM execution with ConsoleLauncher was always failing because
junit-platform-console-standalone is not included in the standard
junit-jupiter dependency tree. The _get_test_classpath() function now
finds and adds the console standalone JAR from ~/.m2, downloading it
via Maven if needed.

This enables direct JVM test execution for JUnit 5 projects, avoiding
the Maven overhead (~500ms vs ~5-10s per invocation) and Surefire
configuration issues (e.g., custom <includes> that ignore -Dtest).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 codeflash/languages/java/test_runner.py | 61 +++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/codeflash/languages/java/test_runner.py b/codeflash/languages/java/test_runner.py
index d326d38c2..e14becb6f 100644
--- a/codeflash/languages/java/test_runner.py
+++ b/codeflash/languages/java/test_runner.py
@@ -573,6 +573,15 @@ def _get_test_classpath(
                         logger.debug(f"Adding multi-module classpath: {module_classes}")
                         cp_parts.append(str(module_classes))
 
+        # Add JUnit Platform Console Standalone JAR if not already on classpath.
+        # This is required for direct JVM execution with ConsoleLauncher,
+        # which is NOT included in the standard junit-jupiter dependency tree.
+        if "console-standalone" not in classpath and "ConsoleLauncher" not in classpath:
+            console_jar = _find_junit_console_standalone()
+            if console_jar:
+                logger.debug("Adding JUnit Console Standalone to classpath: %s", console_jar)
+                cp_parts.append(str(console_jar))
+
         return os.pathsep.join(cp_parts)
 
     except subprocess.TimeoutExpired:
@@ -587,6 +596,58 @@ def _get_test_classpath(
             cp_file.unlink()
 
 
+def _find_junit_console_standalone() -> Path | None:
+    """Find the JUnit Platform Console Standalone JAR in the local Maven repository.
+
+    This JAR contains ConsoleLauncher which is required for direct JVM test execution
+    with JUnit 5. It is NOT included in the standard junit-jupiter dependency tree.
+
+    Returns:
+        Path to the console standalone JAR, or None if not found.
+
+    """
+    m2_base = Path.home() / ".m2" / "repository" / "org" / "junit" / "platform" / "junit-platform-console-standalone"
+    if not m2_base.exists():
+        # Try to download it via Maven
+        mvn = find_maven_executable()
+        if mvn:
+            logger.debug("Console standalone not found in cache, downloading via Maven")
+            try:
+                subprocess.run(
+                    [
+                        mvn,
+                        "dependency:get",
+                        "-Dartifact=org.junit.platform:junit-platform-console-standalone:1.10.0",
+                        "-q",
+                        "-B",
+                    ],
+                    check=False,
+                    capture_output=True,
+                    text=True,
+                    timeout=30,
+                )
+            except (subprocess.TimeoutExpired, Exception):
+                pass
+        if not m2_base.exists():
+            return None
+
+    # Find the latest version available
+    try:
+        versions = sorted(
+            [d for d in m2_base.iterdir() if d.is_dir()],
+            key=lambda d: d.name,
+            reverse=True,
+        )
+        for version_dir in versions:
+            jar = version_dir / f"junit-platform-console-standalone-{version_dir.name}.jar"
+            if jar.exists():
+                return jar
+    except Exception:
+        pass
+
+    return None
+
+
 def _run_tests_direct(
     classpath: str,
     test_classes: list[str],

From a3f5943789f0111f2e36c13307d171b6a7c2392c Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Fri, 20 Feb 2026 04:52:40 +0000
Subject: [PATCH 09/30] fix: cache TestConfig.test_framework to avoid repeated
 pom.xml parsing

TestConfig.test_framework was an uncached @property that called
_detect_java_test_framework() -> detect_java_project() ->
_detect_test_deps_from_pom() (parses pom.xml) on every access.
During test result parsing, this was accessed once per testcase,
causing 300K+ redundant pom.xml parses and massive debug log spam.

Cache the result after first detection using _test_framework field.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 codeflash/verification/verification_utils.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/codeflash/verification/verification_utils.py b/codeflash/verification/verification_utils.py
index 477f36c74..0a613c1fe 100644
--- a/codeflash/verification/verification_utils.py
+++ b/codeflash/verification/verification_utils.py
@@ -179,6 +179,7 @@ class TestConfig:
     use_cache: bool = True
     _language: Optional[str] = None  # Language identifier for multi-language support
     js_project_root: Optional[Path] = None  # JavaScript project root (directory containing package.json)
+    _test_framework: Optional[str] = None  # Cached test framework detection result
 
     def __post_init__(self) -> None:
         self.tests_root = self.tests_root.resolve()
@@ -191,14 +192,19 @@ def test_framework(self) -> str:
 
         For JavaScript/TypeScript: uses the configured framework (vitest, jest, or mocha).
         For Python: uses pytest as default.
+        Result is cached after first detection to avoid repeated pom.xml parsing.
         """
+        if self._test_framework is not None:
+            return self._test_framework
         if is_javascript():
             from codeflash.languages.test_framework import get_js_test_framework_or_default
 
-            return get_js_test_framework_or_default()
-        if is_java():
-            return self._detect_java_test_framework()
-        return "pytest"
+            self._test_framework = get_js_test_framework_or_default()
+        elif is_java():
+            self._test_framework = self._detect_java_test_framework()
+        else:
+            self._test_framework = "pytest"
+        return self._test_framework
 
     def _detect_java_test_framework(self) -> str:
         """Detect the Java test framework from the project configuration.

From bb6f38fb40e6f2a73627b16d238fef242bf92070 Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Fri, 20 Feb 2026 05:08:26 +0000
Subject: [PATCH 10/30] fix: detect JUnit version from classpath strings
 instead of subprocess probing

The previous detection ran `java -cp ... JUnitCore -version` to check for
JUnit 4, but JUnit 5 projects include JUnit 4 classes via junit-vintage-engine,
causing false positive detection. This made direct JVM execution always fail
and fall back to Maven. Now checks for JUnit 5 JAR names (junit-jupiter,
junit-platform, console-standalone) in the classpath string instead.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 codeflash/languages/java/test_runner.py | 26 ++++++++++++-------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/codeflash/languages/java/test_runner.py b/codeflash/languages/java/test_runner.py
index e14becb6f..32a42e8b7 100644
--- a/codeflash/languages/java/test_runner.py
+++ b/codeflash/languages/java/test_runner.py
@@ -677,20 +677,18 @@ def _run_tests_direct(
 
     java = _find_java_executable() or "java"
 
-    # Try to detect if JUnit 4 is being used (check for JUnit 4 runner in classpath)
-    # If JUnit 4, use JUnitCore directly instead of ConsoleLauncher
-    is_junit4 = False
-    # Check if org.junit.runner.JUnitCore is in classpath (JUnit 4)
-    # and org.junit.platform.console.ConsoleLauncher is not (JUnit 5)
-    check_junit4_cmd = [str(java), "-cp", classpath, "org.junit.runner.JUnitCore", "-version"]
-    try:
-        result = subprocess.run(check_junit4_cmd, capture_output=True, text=True, timeout=2)
-        # JUnit 4's JUnitCore will show version, JUnit 5 won't have this class
-        if "JUnit version" in result.stdout or result.returncode == 0:
-            is_junit4 = True
-            logger.debug("Detected JUnit 4, using JUnitCore for direct execution")
-    except (subprocess.TimeoutExpired, Exception):
-        pass
+    # Detect JUnit version from the classpath string.
+    # Previously this probed the classpath via subprocess, but that's unreliable:
+    # JUnit 5 projects often have JUnit 4 classes via junit-vintage-engine,
+    # causing false JUnit 4 detection and failed test execution.
+    # Instead, check if ConsoleLauncher (JUnit 5) is available on the classpath.
+    has_console_launcher = "console-standalone" in classpath or "ConsoleLauncher" in classpath
+    has_junit5 = "junit-jupiter" in classpath or "junit-platform" in classpath
+    is_junit4 = not (has_console_launcher or has_junit5)
+    if is_junit4:
+        logger.debug("JUnit 4 detected (no JUnit 5 platform JARs on classpath), using JUnitCore")
+    else:
+        logger.debug("JUnit 5 detected on classpath, using ConsoleLauncher")
 
     if is_junit4:
         # Use JUnit 4's JUnitCore runner

From d86085768345ffed71d39a68b7197e3163f28a30 Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Thu, 19 Feb 2026 15:00:40 +0000
Subject: [PATCH 11/30] fix: JUnit version detection for multi-module Maven
 projects

- Check dependencyManagement section in pom.xml for test dependencies
- Recursively check submodule pom.xml files (test, tests, etc.)
- Change default fallback from JUnit 5 to JUnit 4 (more common in legacy)
- Add debug logging for framework detection decisions
- Fixes Bug #7: 64% of optimizations blocked by incorrect JUnit 5 detection
---
 codeflash/languages/java/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codeflash/languages/java/config.py b/codeflash/languages/java/config.py
index 748298bc9..53041280e 100644
--- a/codeflash/languages/java/config.py
+++ b/codeflash/languages/java/config.py
@@ -240,7 +240,7 @@ def check_dependencies(deps_element, ns):
                         check_dependencies(deps, ns)
 
     except ET.ParseError:
-        logger.debug(f"Failed to parse pom.xml at {pom_path}")
+        logger.debug("Failed to parse pom.xml at %s", pom_path)
 
     # For multi-module projects, also check submodule pom.xml files
     if not (has_junit5 or has_junit4 or has_testng):

From b6564e673f14c9055f15c3adfff5486e99e35a9d Mon Sep 17 00:00:00 2001
From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com>
Date: Fri, 20 Feb 2026 05:58:29 +0000
Subject: [PATCH 12/30] style: auto-fix linting issues

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 codeflash/languages/java/test_runner.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/codeflash/languages/java/test_runner.py b/codeflash/languages/java/test_runner.py
index 32a42e8b7..8a21f6e1a 100644
--- a/codeflash/languages/java/test_runner.py
+++ b/codeflash/languages/java/test_runner.py
@@ -633,11 +633,7 @@ def _find_junit_console_standalone() -> Path | None:
 
     # Find the latest version available
     try:
-        versions = sorted(
-            [d for d in m2_base.iterdir() if d.is_dir()],
-            key=lambda d: d.name,
-            reverse=True,
-        )
+        versions = sorted([d for d in m2_base.iterdir() if d.is_dir()], key=lambda d: d.name, reverse=True)
         for version_dir in versions:
             jar = version_dir / f"junit-platform-console-standalone-{version_dir.name}.jar"
             if jar.exists():

From cfcbd92b89c89f77030a7806cb43ecdefe36425d Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Fri, 20 Feb 2026 06:22:04 +0000
Subject: [PATCH 13/30] fix: correct JUnit version logging for projects using
 ConsoleLauncher with vintage engine

ConsoleLauncher runs both JUnit 4 (via vintage engine) and JUnit 5 tests.
The detection now correctly distinguishes between JUnit 5 projects (have
junit-jupiter on classpath) and JUnit 4 projects using ConsoleLauncher as
the runner. Previously, the injected console-standalone JAR falsely triggered
"JUnit 5 detected" for all projects.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 codeflash/languages/java/test_runner.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/codeflash/languages/java/test_runner.py b/codeflash/languages/java/test_runner.py
index 8a21f6e1a..ca8b1b2c7 100644
--- a/codeflash/languages/java/test_runner.py
+++ b/codeflash/languages/java/test_runner.py
@@ -674,17 +674,23 @@ def _run_tests_direct(
     java = _find_java_executable() or "java"
 
     # Detect JUnit version from the classpath string.
-    # Previously this probed the classpath via subprocess, but that's unreliable:
-    # JUnit 5 projects often have JUnit 4 classes via junit-vintage-engine,
-    # causing false JUnit 4 detection and failed test execution.
-    # Instead, check if ConsoleLauncher (JUnit 5) is available on the classpath.
+    # We check for junit-jupiter (the JUnit 5 test API) as the indicator of JUnit 5 tests.
+    # Note: console-standalone and junit-platform are NOT reliable indicators because
+    # we inject console-standalone ourselves in _get_test_classpath(), so it's always present.
+    # ConsoleLauncher can run both JUnit 5 and JUnit 4 tests (via vintage engine),
+    # so we prefer it when available and only fall back to JUnitCore for pure JUnit 4
+    # projects without ConsoleLauncher on the classpath.
+    has_junit5_tests = "junit-jupiter" in classpath
     has_console_launcher = "console-standalone" in classpath or "ConsoleLauncher" in classpath
-    has_junit5 = "junit-jupiter" in classpath or "junit-platform" in classpath
-    is_junit4 = not (has_console_launcher or has_junit5)
+    # Use ConsoleLauncher if available (works for both JUnit 4 via vintage and JUnit 5).
+    # Only use JUnitCore when ConsoleLauncher is not on the classpath at all.
+    is_junit4 = not has_console_launcher
     if is_junit4:
-        logger.debug("JUnit 4 detected (no JUnit 5 platform JARs on classpath), using JUnitCore")
+        logger.debug("JUnit 4 project, no ConsoleLauncher available, using JUnitCore")
+    elif has_junit5_tests:
+        logger.debug("JUnit 5 project, using ConsoleLauncher")
     else:
-        logger.debug("JUnit 5 detected on classpath, using ConsoleLauncher")
+        logger.debug("JUnit 4 project, using ConsoleLauncher (via vintage engine)")
 
     if is_junit4:
         # Use JUnit 4's JUnitCore runner

From d54aa6859afa6c9543cfec23a1b54bb846c42da4 Mon Sep 17 00:00:00 2001
From: Saurabh Misra <misra.saurabh1@gmail.com>
Date: Fri, 20 Feb 2026 00:32:38 -0800
Subject: [PATCH 14/30] Apply suggestion from @claude[bot]

Co-authored-by: claude[bot] <209825114+claude[bot]@users.noreply.github.com>
---
 codeflash/languages/java/test_runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codeflash/languages/java/test_runner.py b/codeflash/languages/java/test_runner.py
index ca8b1b2c7..7453146e0 100644
--- a/codeflash/languages/java/test_runner.py
+++ b/codeflash/languages/java/test_runner.py
@@ -633,7 +633,7 @@ def _find_junit_console_standalone() -> Path | None:
 
     # Find the latest version available
     try:
-        versions = sorted([d for d in m2_base.iterdir() if d.is_dir()], key=lambda d: d.name, reverse=True)
+        versions = sorted([d for d in m2_base.iterdir() if d.is_dir()], key=lambda d: tuple(int(x) for x in d.name.split('.') if x.isdigit()), reverse=True)
         for version_dir in versions:
             jar = version_dir / f"junit-platform-console-standalone-{version_dir.name}.jar"
             if jar.exists():

From 53528a21f271a9b78aec1c089fa880f24de8f0a5 Mon Sep 17 00:00:00 2001
From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com>
Date: Fri, 20 Feb 2026 08:35:26 +0000
Subject: [PATCH 15/30] style: auto-fix linting issues

Convert f-string logging to lazy % formatting (G004) and replace try-except-pass with contextlib.suppress (SIM105).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 codeflash/languages/java/config.py          | 16 ++++++++--------
 codeflash/languages/java/instrumentation.py |  4 ++--
 codeflash/languages/java/test_runner.py     | 13 ++++++++-----
 3 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/codeflash/languages/java/config.py b/codeflash/languages/java/config.py
index 53041280e..ceb7fd4b9 100644
--- a/codeflash/languages/java/config.py
+++ b/codeflash/languages/java/config.py
@@ -205,13 +205,13 @@ def check_dependencies(deps_element, ns):
 
                 if group_id == "org.junit.jupiter" or (artifact_id and "junit-jupiter" in artifact_id):
                     has_junit5 = True
-                    logger.debug(f"Found JUnit 5 dependency: {group_id}:{artifact_id}")
+                    logger.debug("Found JUnit 5 dependency: %s:%s", group_id, artifact_id)
                 elif group_id == "junit" and artifact_id == "junit":
                     has_junit4 = True
-                    logger.debug(f"Found JUnit 4 dependency: {group_id}:{artifact_id}")
+                    logger.debug("Found JUnit 4 dependency: %s:%s", group_id, artifact_id)
                 elif group_id == "org.testng":
                     has_testng = True
-                    logger.debug(f"Found TestNG dependency: {group_id}:{artifact_id}")
+                    logger.debug("Found TestNG dependency: %s:%s", group_id, artifact_id)
 
     try:
         tree = ET.parse(pom_path)
@@ -220,20 +220,20 @@ def check_dependencies(deps_element, ns):
         # Handle namespace
         ns = {"m": "http://maven.apache.org/POM/4.0.0"}
 
-        logger.debug(f"Checking pom.xml at {pom_path}")
+        logger.debug("Checking pom.xml at %s", pom_path)
 
         # Search for direct dependencies
         for deps_path in ["dependencies", "m:dependencies"]:
             deps = root.find(deps_path, ns) if "m:" in deps_path else root.find(deps_path)
             if deps is not None:
-                logger.debug(f"Found dependencies section in {pom_path}")
+                logger.debug("Found dependencies section in %s", pom_path)
                 check_dependencies(deps, ns)
 
         # Also check dependencyManagement section (for multi-module projects)
         for dep_mgmt_path in ["dependencyManagement", "m:dependencyManagement"]:
             dep_mgmt = root.find(dep_mgmt_path, ns) if "m:" in dep_mgmt_path else root.find(dep_mgmt_path)
             if dep_mgmt is not None:
-                logger.debug(f"Found dependencyManagement section in {pom_path}")
+                logger.debug("Found dependencyManagement section in %s", pom_path)
                 for deps_path in ["dependencies", "m:dependencies"]:
                     deps = dep_mgmt.find(deps_path, ns) if "m:" in deps_path else dep_mgmt.find(deps_path)
                     if deps is not None:
@@ -249,7 +249,7 @@ def check_dependencies(deps_element, ns):
         for submodule_name in ["test", "tests", "src/test", "testing"]:
             submodule_pom = project_root / submodule_name / "pom.xml"
             if submodule_pom.exists():
-                logger.debug(f"Checking submodule pom at {submodule_pom}")
+                logger.debug("Checking submodule pom at %s", submodule_pom)
                 sub_junit5, sub_junit4, sub_testng = _detect_test_deps_from_pom(project_root / submodule_name)
                 has_junit5 = has_junit5 or sub_junit5
                 has_junit4 = has_junit4 or sub_junit4
@@ -257,7 +257,7 @@ def check_dependencies(deps_element, ns):
                 if has_junit5 or has_junit4 or has_testng:
                     break
 
-    logger.debug(f"Test framework detection result: junit5={has_junit5}, junit4={has_junit4}, testng={has_testng}")
+    logger.debug("Test framework detection result: junit5=%s, junit4=%s, testng=%s", has_junit5, has_junit4, has_testng)
     return has_junit5, has_junit4, has_testng
 
 
diff --git a/codeflash/languages/java/instrumentation.py b/codeflash/languages/java/instrumentation.py
index dc31d89e3..884b36b67 100644
--- a/codeflash/languages/java/instrumentation.py
+++ b/codeflash/languages/java/instrumentation.py
@@ -129,7 +129,7 @@ def _is_inside_complex_expression(node) -> bool:
             "parenthesized_expression",
             "instanceof_expression",
         }:
-            logger.debug(f"Found complex expression parent: {current.type}")
+            logger.debug("Found complex expression parent: %s", current.type)
             return True
 
         current = current.parent
@@ -737,7 +737,7 @@ def collect_target_calls(node, wrapper_bytes: bytes, func: str, out) -> None:
                 if not _is_inside_lambda(node) and not _is_inside_complex_expression(node):
                     out.append(node)
                 else:
-                    logger.debug(f"Skipping instrumentation of {func} inside lambda or complex expression")
+                    logger.debug("Skipping instrumentation of %s inside lambda or complex expression", func)
         for child in node.children:
             collect_target_calls(child, wrapper_bytes, func, out)
 
diff --git a/codeflash/languages/java/test_runner.py b/codeflash/languages/java/test_runner.py
index 7453146e0..2bf4e6334 100644
--- a/codeflash/languages/java/test_runner.py
+++ b/codeflash/languages/java/test_runner.py
@@ -6,6 +6,7 @@
 
 from __future__ import annotations
 
+import contextlib
 import logging
 import os
 import re
@@ -570,7 +571,7 @@ def _get_test_classpath(
                 if module_dir.is_dir() and module_dir.name != test_module:
                     module_classes = module_dir / "target" / "classes"
                     if module_classes.exists():
-                        logger.debug(f"Adding multi-module classpath: {module_classes}")
+                        logger.debug("Adding multi-module classpath: %s", module_classes)
                         cp_parts.append(str(module_classes))
 
         # Add JUnit Platform Console Standalone JAR if not already on classpath.
@@ -612,7 +613,7 @@ def _find_junit_console_standalone() -> Path | None:
         mvn = find_maven_executable()
         if mvn:
             logger.debug("Console standalone not found in cache, downloading via Maven")
-            try:
+            with contextlib.suppress(subprocess.TimeoutExpired, Exception):
                 subprocess.run(
                     [
                         mvn,
@@ -626,14 +627,16 @@ def _find_junit_console_standalone() -> Path | None:
                     text=True,
                     timeout=30,
                 )
-            except (subprocess.TimeoutExpired, Exception):
-                pass
         if not m2_base.exists():
             return None
 
     # Find the latest version available
     try:
-        versions = sorted([d for d in m2_base.iterdir() if d.is_dir()], key=lambda d: tuple(int(x) for x in d.name.split('.') if x.isdigit()), reverse=True)
+        versions = sorted(
+            [d for d in m2_base.iterdir() if d.is_dir()],
+            key=lambda d: tuple(int(x) for x in d.name.split(".") if x.isdigit()),
+            reverse=True,
+        )
         for version_dir in versions:
             jar = version_dir / f"junit-platform-console-standalone-{version_dir.name}.jar"
             if jar.exists():

From b8ec2353d5e54b9524785f7c70915e12d16d734b Mon Sep 17 00:00:00 2001
From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com>
Date: Fri, 20 Feb 2026 08:55:19 +0000
Subject: [PATCH 16/30] fix: resolve mypy type errors in Java config and
 instrumentation

---
 codeflash/languages/java/config.py          |  2 +-
 codeflash/languages/java/instrumentation.py | 53 ++++++++++++---------
 2 files changed, 32 insertions(+), 23 deletions(-)

diff --git a/codeflash/languages/java/config.py b/codeflash/languages/java/config.py
index ceb7fd4b9..788c93c50 100644
--- a/codeflash/languages/java/config.py
+++ b/codeflash/languages/java/config.py
@@ -183,7 +183,7 @@ def _detect_test_deps_from_pom(project_root: Path) -> tuple[bool, bool, bool]:
     has_junit4 = False
     has_testng = False
 
-    def check_dependencies(deps_element, ns):
+    def check_dependencies(deps_element: ET.Element | None, ns: dict[str, str]) -> None:
         """Check dependencies element for test frameworks."""
         nonlocal has_junit5, has_junit4, has_testng
 
diff --git a/codeflash/languages/java/instrumentation.py b/codeflash/languages/java/instrumentation.py
index 884b36b67..5a60b75ab 100644
--- a/codeflash/languages/java/instrumentation.py
+++ b/codeflash/languages/java/instrumentation.py
@@ -35,9 +35,9 @@
 def _get_function_name(func: Any) -> str:
     """Get the function name from FunctionToOptimize."""
     if hasattr(func, "function_name"):
-        return func.function_name
+        return str(func.function_name)
     if hasattr(func, "name"):
-        return func.name
+        return str(func.name)
     msg = f"Cannot get function name from {type(func)}"
     raise AttributeError(msg)
 
@@ -82,7 +82,7 @@ def _is_test_annotation(stripped_line: str) -> bool:
     return bool(_TEST_ANNOTATION_RE.match(stripped_line))
 
 
-def _is_inside_lambda(node) -> bool:
+def _is_inside_lambda(node: Any) -> bool:
     """Check if a tree-sitter node is inside a lambda_expression."""
     current = node.parent
     while current is not None:
@@ -94,7 +94,7 @@ def _is_inside_lambda(node) -> bool:
     return False
 
 
-def _is_inside_complex_expression(node) -> bool:
+def _is_inside_complex_expression(node: Any) -> bool:
     """Check if a tree-sitter node is inside a complex expression that shouldn't be instrumented directly.
 
     This includes:
@@ -163,7 +163,7 @@ def wrap_target_calls_with_treesitter(
     tree = analyzer.parse(wrapper_bytes)
 
     # Collect all matching calls with their metadata
-    calls = []
+    calls: list[dict[str, Any]] = []
     _collect_calls(tree.root_node, wrapper_bytes, body_bytes, prefix_len, func_name, analyzer, calls)
 
     if not calls:
@@ -177,7 +177,7 @@ def wrap_target_calls_with_treesitter(
         offset += len(line.encode("utf8")) + 1  # +1 for \n from join
 
     # Group non-lambda and non-complex-expression calls by their line index
-    calls_by_line: dict[int, list] = {}
+    calls_by_line: dict[int, list[dict[str, Any]]] = {}
     for call in calls:
         if call["in_lambda"] or call.get("in_complex", False):
             logger.debug("Skipping behavior instrumentation for call in lambda or complex expression")
@@ -263,7 +263,15 @@ def wrap_target_calls_with_treesitter(
     return wrapped, call_counter
 
 
-def _collect_calls(node, wrapper_bytes, body_bytes, prefix_len, func_name, analyzer, out):
+def _collect_calls(
+    node: Any,
+    wrapper_bytes: bytes,
+    body_bytes: bytes,
+    prefix_len: int,
+    func_name: str,
+    analyzer: JavaAnalyzer,
+    out: list[dict[str, Any]],
+) -> None:
     """Recursively collect method_invocation nodes matching func_name."""
     node_type = node.type
     if node_type == "method_invocation":
@@ -331,7 +339,7 @@ def _infer_array_cast_type(line: str) -> str | None:
 def _get_qualified_name(func: Any) -> str:
     """Get the qualified name from FunctionToOptimize."""
     if hasattr(func, "qualified_name"):
-        return func.qualified_name
+        return str(func.qualified_name)
     # Build qualified name from function_name and parents
     if hasattr(func, "function_name"):
         parts = []
@@ -702,7 +710,7 @@ def _add_timing_instrumentation(source: str, class_name: str, func_name: str) ->
     analyzer = get_java_analyzer()
     tree = analyzer.parse(source_bytes)
 
-    def has_test_annotation(method_node) -> bool:
+    def has_test_annotation(method_node: Any) -> bool:
         modifiers = None
         for child in method_node.children:
             if child.type == "modifiers":
@@ -721,7 +729,7 @@ def has_test_annotation(method_node) -> bool:
                     return True
         return False
 
-    def collect_test_methods(node, out) -> None:
+    def collect_test_methods(node: Any, out: list[tuple[Any, Any]]) -> None:
         if node.type == "method_declaration" and has_test_annotation(node):
             body_node = node.child_by_field_name("body")
             if body_node is not None:
@@ -729,7 +737,7 @@ def collect_test_methods(node, out) -> None:
         for child in node.children:
             collect_test_methods(child, out)
 
-    def collect_target_calls(node, wrapper_bytes: bytes, func: str, out) -> None:
+    def collect_target_calls(node: Any, wrapper_bytes: bytes, func: str, out: list[Any]) -> None:
         if node.type == "method_invocation":
             name_node = node.child_by_field_name("name")
             if name_node and analyzer.get_node_text(name_node, wrapper_bytes) == func:
@@ -756,13 +764,13 @@ def reindent_block(text: str, target_indent: str) -> str:
             reindented.append(f"{target_indent}{line[min_leading:]}")
         return "\n".join(reindented)
 
-    def find_top_level_statement(node, body_node):
+    def find_top_level_statement(node: Any, body_node: Any) -> Any:
         current = node
         while current is not None and current.parent is not None and current.parent != body_node:
             current = current.parent
         return current if current is not None and current.parent == body_node else None
 
-    def split_var_declaration(stmt_node, source_bytes_ref: bytes) -> tuple[str, str] | None:
+    def split_var_declaration(stmt_node: Any, source_bytes_ref: bytes) -> tuple[str, str] | None:
         """Split a local_variable_declaration into a hoisted declaration and an assignment.
 
         When a target call is inside a variable declaration like:
@@ -834,7 +842,7 @@ def build_instrumented_body(body_text: str, next_wrapper_id: int, base_indent: s
         wrapped_body = wrapped_method.child_by_field_name("body")
         if wrapped_body is None:
             return body_text, next_wrapper_id
-        calls = []
+        calls: list[Any] = []
         collect_target_calls(wrapped_body, wrapper_bytes, func_name, calls)
 
         indent = base_indent
@@ -933,14 +941,14 @@ def build_instrumented_body(body_text: str, next_wrapper_id: int, base_indent: s
             result_parts.append(suffix)
             return "".join(result_parts), current_id
 
-        result_parts: list[str] = []
+        multi_result_parts: list[str] = []
         cursor = 0
         wrapper_id = next_wrapper_id
 
         for stmt_start, stmt_end, stmt_ast_node in unique_ranges:
             prefix = body_text[cursor:stmt_start]
             target_stmt = body_text[stmt_start:stmt_end]
-            result_parts.append(prefix.rstrip(" \t"))
+            multi_result_parts.append(prefix.rstrip(" \t"))
 
             wrapper_id += 1
             current_id = wrapper_id
@@ -982,14 +990,14 @@ def build_instrumented_body(body_text: str, next_wrapper_id: int, base_indent: s
                 f"{indent}}}",
             ]
 
-            result_parts.append("\n" + "\n".join(setup_lines))
-            result_parts.append("\n".join(timing_lines))
+            multi_result_parts.append("\n" + "\n".join(setup_lines))
+            multi_result_parts.append("\n".join(timing_lines))
             cursor = stmt_end
 
-        result_parts.append(body_text[cursor:])
-        return "".join(result_parts), wrapper_id
+        multi_result_parts.append(body_text[cursor:])
+        return "".join(multi_result_parts), wrapper_id
 
-    test_methods = []
+    test_methods: list[tuple[Any, Any]] = []
     collect_test_methods(tree.root_node, test_methods)
     if not test_methods:
         return source
@@ -1137,12 +1145,13 @@ def instrument_generated_java_test(
             function_name,
         )
     elif mode == "behavior":
-        _, modified_code = instrument_existing_test(
+        _, behavior_code = instrument_existing_test(
             test_string=test_code,
             mode=mode,
             function_to_optimize=function_to_optimize,
             test_class_name=original_class_name,
         )
+        modified_code = behavior_code or test_code
     else:
         modified_code = test_code
 

From 58561c8f667f8d053367409bfceaa61a681eaba9 Mon Sep 17 00:00:00 2001
From: misrasaurabh1 <misra.saurabh1@gmail.com>
Date: Fri, 20 Feb 2026 01:16:05 -0800
Subject: [PATCH 17/30] coverage reported correctly

---
 codeflash/models/models.py               | 2 +-
 codeflash/verification/coverage_utils.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/codeflash/models/models.py b/codeflash/models/models.py
index 9baa8f83e..70267c067 100644
--- a/codeflash/models/models.py
+++ b/codeflash/models/models.py
@@ -664,7 +664,7 @@ def log_coverage(self) -> None:
         from rich.tree import Tree
 
         tree = Tree("Test Coverage Results")
-        tree.add(f"Main Function: {self.main_func_coverage.name}: {self.coverage:.2f}%")
+        tree.add(f"Main Function: {self.main_func_coverage.name}: {self.main_func_coverage.coverage:.2f}%")
         if self.dependent_func_coverage:
             tree.add(
                 f"Dependent Function: {self.dependent_func_coverage.name}: {self.dependent_func_coverage.coverage:.2f}%"
diff --git a/codeflash/verification/coverage_utils.py b/codeflash/verification/coverage_utils.py
index f5a41a737..1b2341680 100644
--- a/codeflash/verification/coverage_utils.py
+++ b/codeflash/verification/coverage_utils.py
@@ -327,7 +327,8 @@ def load_from_jacoco_xml(
                         bare_name = method.get("name")
                         if bare_name:
                             all_methods[bare_name] = (method, method_line)
-                        if bare_name == function_name:
+                        # Match against bare name or qualified name (e.g., "computeDigest" or "Crypto.computeDigest")
+                        if bare_name == function_name or function_name.endswith("." + bare_name):
                             method_elem = method
                             method_start_line = method_line
 

From 8a1ab8e1ad6efa70265df9c310c1868ce9ae879e Mon Sep 17 00:00:00 2001
From: misrasaurabh1 <misra.saurabh1@gmail.com>
Date: Fri, 20 Feb 2026 01:45:49 -0800
Subject: [PATCH 18/30] fix pr creation bug

---
 codeflash/languages/java/support.py | 36 ++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/codeflash/languages/java/support.py b/codeflash/languages/java/support.py
index d3f8d0db3..5b2f55be9 100644
--- a/codeflash/languages/java/support.py
+++ b/codeflash/languages/java/support.py
@@ -40,7 +40,7 @@
     from codeflash.discovery.functions_to_optimize import FunctionToOptimize
     from codeflash.languages.base import CodeContext, FunctionFilterCriteria, HelperFunction, TestInfo, TestResult
     from codeflash.languages.java.concurrency_analyzer import ConcurrencyInfo
-    from codeflash.models.models import GeneratedTestsList
+    from codeflash.models.models import GeneratedTestsList, InvocationId
 
 logger = logging.getLogger(__name__)
 
@@ -199,6 +199,40 @@ def remove_test_functions(self, test_source: str, functions_to_remove: list[str]
         """Remove specific test functions from test source code."""
         return remove_test_functions(test_source, functions_to_remove, self._analyzer)
 
+    def remove_test_functions_from_generated_tests(
+        self, generated_tests: GeneratedTestsList, functions_to_remove: list[str]
+    ) -> GeneratedTestsList:
+        """Remove specific test functions from generated tests."""
+        from codeflash.models.models import GeneratedTests, GeneratedTestsList
+
+        updated_tests: list[GeneratedTests] = []
+        for test in generated_tests.generated_tests:
+            updated_tests.append(
+                GeneratedTests(
+                    generated_original_test_source=self.remove_test_functions(
+                        test.generated_original_test_source, functions_to_remove
+                    ),
+                    instrumented_behavior_test_source=test.instrumented_behavior_test_source,
+                    instrumented_perf_test_source=test.instrumented_perf_test_source,
+                    behavior_file_path=test.behavior_file_path,
+                    perf_file_path=test.perf_file_path,
+                )
+            )
+        return GeneratedTestsList(generated_tests=updated_tests)
+
+    def add_runtime_comments_to_generated_tests(
+        self,
+        generated_tests: GeneratedTestsList,
+        original_runtimes: dict[InvocationId, list[int]],
+        optimized_runtimes: dict[InvocationId, list[int]],
+        tests_project_rootdir: Path | None = None,
+    ) -> GeneratedTestsList:
+        """Add runtime comments to generated tests."""
+        _ = tests_project_rootdir
+        # For Java, we currently don't add runtime comments to generated tests
+        # Return the generated tests unchanged
+        return generated_tests
+
     # === Test Result Comparison ===
 
     def compare_test_results(

From 96d94cd47780531fb8ba38c75ef9aef954a6c6d8 Mon Sep 17 00:00:00 2001
From: "codeflash-ai[bot]"
 <148906541+codeflash-ai[bot]@users.noreply.github.com>
Date: Fri, 20 Feb 2026 10:00:31 +0000
Subject: [PATCH 19/30] Optimize _add_behavior_instrumentation

The optimized code achieves a **196% speedup** (from 13.3ms to 4.49ms) primarily through two focused optimizations that target the hottest paths identified by the line profiler:

## Key Optimizations

### 1. Early Exit in `wrap_target_calls_with_treesitter` (Primary Driver)
The profiler shows that in the original code, 55.5% of `wrap_target_calls_with_treesitter`'s time (9.7ms out of 17.5ms) was spent in `_collect_calls`, which parses Java code with tree-sitter. The optimization adds:

```python
body_text = "\n".join(body_lines)
if func_name not in body_text:
    return list(body_lines), 0
```

This simple string membership check avoids expensive tree-sitter parsing when the target function isn't present in the test method body. Since many test methods don't call the function being instrumented, this provides massive savings. The annotated tests confirm this pattern - tests with empty or simple bodies (no function calls) show the largest speedups: 639% for large methods and 1018% for complex expressions.

### 2. Optimized `_is_test_annotation` (Secondary Improvement)
The profiler shows `_is_test_annotation` being called 1,950 times, spending 100% of its time (1.21ms) on regex matching. The optimization replaces the regex with direct string checks:

```python
if not stripped_line.startswith("@Test"):
    return False
if len(stripped_line) == 5:  # exactly "@Test"
    return True
next_char = stripped_line[5]
return next_char == " " or next_char == "("
```

This avoids regex overhead for the 1,737 non-`@Test` annotations that can be rejected immediately with `startswith()`. The profiler shows this reduced time from 1.21ms to 0.91ms (25% faster in this function).

## Performance Impact by Test Type

The annotated tests reveal optimization effectiveness varies by workload:
- **Empty/simple methods**: 107-154% faster (early exit dominates)
- **Methods with complex expressions**: 396-1018% faster (avoids parsing large expression trees)
- **Large methods with many statements**: 510-639% faster (early exit + reduced AST traversal)
- **Methods with actual function calls**: 111-152% faster (smaller benefit since tree-sitter must run)

## Context and Production Impact

Based on `function_references`, this function is called from test discovery in `test_instrumentation.py`, specifically for behavior instrumentation that captures return values. The early exit optimization is particularly valuable here because:

1. Test discovery processes many test methods, but typically only a subset call the target function
2. The function operates on the hot path during test suite instrumentation
3. Large test suites with 100+ test methods (see test case showing 154% speedup for 150 methods) benefit significantly

The optimization maintains correctness - all test cases pass with identical output, confirming the early exit safely bypasses work that produces no changes when the function isn't present.
---
 codeflash/languages/java/instrumentation.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/codeflash/languages/java/instrumentation.py b/codeflash/languages/java/instrumentation.py
index 5a60b75ab..9a7842861 100644
--- a/codeflash/languages/java/instrumentation.py
+++ b/codeflash/languages/java/instrumentation.py
@@ -79,7 +79,12 @@ def _is_test_annotation(stripped_line: str) -> bool:
         @TestFactory
         @TestTemplate
     """
-    return bool(_TEST_ANNOTATION_RE.match(stripped_line))
+    if not stripped_line.startswith("@Test"):
+        return False
+    if len(stripped_line) == 5:
+        return True
+    next_char = stripped_line[5]
+    return next_char == " " or next_char == "("
 
 
 def _is_inside_lambda(node: Any) -> bool:
@@ -154,8 +159,11 @@ def wrap_target_calls_with_treesitter(
     """
     from codeflash.languages.java.parser import get_java_analyzer
 
-    analyzer = get_java_analyzer()
     body_text = "\n".join(body_lines)
+    if func_name not in body_text:
+        return list(body_lines), 0
+
+    analyzer = get_java_analyzer()
     body_bytes = body_text.encode("utf8")
     prefix_len = len(_TS_BODY_PREFIX_BYTES)
 

From 864f87f0168199f2adb3eef63aff151e048cd4c7 Mon Sep 17 00:00:00 2001
From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com>
Date: Fri, 20 Feb 2026 10:12:51 +0000
Subject: [PATCH 20/30] style: merge multiple comparisons per PLR1714

---
 codeflash/languages/java/instrumentation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codeflash/languages/java/instrumentation.py b/codeflash/languages/java/instrumentation.py
index 9a7842861..45a5b801f 100644
--- a/codeflash/languages/java/instrumentation.py
+++ b/codeflash/languages/java/instrumentation.py
@@ -84,7 +84,7 @@ def _is_test_annotation(stripped_line: str) -> bool:
     if len(stripped_line) == 5:
         return True
     next_char = stripped_line[5]
-    return next_char == " " or next_char == "("
+    return next_char in {" ", "("}
 
 
 def _is_inside_lambda(node: Any) -> bool:

From 648a6138844ea387c129b6c6313cc8cc62a61278 Mon Sep 17 00:00:00 2001
From: "codeflash-ai[bot]"
 <148906541+codeflash-ai[bot]@users.noreply.github.com>
Date: Fri, 20 Feb 2026 09:26:55 +0000
Subject: [PATCH 21/30] Optimize _add_timing_instrumentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This optimization achieves a **15% runtime improvement** (10.2ms → 8.81ms) by replacing recursive AST traversal with iterative stack-based traversal in two critical functions: `collect_test_methods` and `collect_target_calls`.

## Key Changes

**1. Iterative AST Traversal (Primary Speedup)**
- Replaced recursive tree walking with explicit stack-based iteration
- In `collect_test_methods`: Changed from recursive calls to `while stack` loop with `stack.extend(reversed(current.children))`
- In `collect_target_calls`: Similar transformation using explicit stack management
- **Impact**: Line profiler shows `collect_test_methods` dropped from 24.2% to 3.8% of total runtime (81% reduction in that function)

**2. Why This Works in Python**
- Python function calls have significant overhead (frame creation, argument binding, scope setup)
- Recursive traversal compounds this overhead across potentially deep AST trees
- Iterative approach uses a simple list for the stack, avoiding repeated function call overhead
- The `reversed()` call ensures children are processed in the same order as recursive traversal, preserving correctness

**3. Performance Characteristics**
Based on annotated tests:
- **Large method bodies** (500+ lines): 23.8% faster - most benefit from reduced recursion overhead
- **Many test methods** (100 methods): 9.2% faster - cumulative savings across many traversals
- **Simple cases**: 2-5% faster - overhead reduction still measurable
- **Empty/no-match cases**: Minor regression (8-9% slower) due to negligible baseline times (12-40μs)

## Impact on Workloads

The function references show `_add_timing_instrumentation` is called from test instrumentation code. This optimization particularly benefits:
- **Java projects with large test suites** containing many `@Test` methods
- **Complex test methods** with deep AST structures and multiple method invocations
- **Batch instrumentation operations** where the function is called repeatedly

The iterative approach scales better than recursion as AST depth and method count increase, making it especially valuable for large Java codebases where instrumentation is applied across hundreds of test methods.
---
 codeflash/languages/java/instrumentation.py | 40 ++++++++++++---------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/codeflash/languages/java/instrumentation.py b/codeflash/languages/java/instrumentation.py
index 45a5b801f..c015b08e1 100644
--- a/codeflash/languages/java/instrumentation.py
+++ b/codeflash/languages/java/instrumentation.py
@@ -738,24 +738,31 @@ def has_test_annotation(method_node: Any) -> bool:
         return False
 
     def collect_test_methods(node: Any, out: list[tuple[Any, Any]]) -> None:
-        if node.type == "method_declaration" and has_test_annotation(node):
-            body_node = node.child_by_field_name("body")
-            if body_node is not None:
-                out.append((node, body_node))
-        for child in node.children:
-            collect_test_methods(child, out)
+        stack = [node]
+        while stack:
+            current = stack.pop()
+            if current.type == "method_declaration" and has_test_annotation(current):
+                body_node = current.child_by_field_name("body")
+                if body_node is not None:
+                    out.append((current, body_node))
+                continue
+            if current.children:
+                stack.extend(reversed(current.children))
 
     def collect_target_calls(node: Any, wrapper_bytes: bytes, func: str, out: list[Any]) -> None:
-        if node.type == "method_invocation":
-            name_node = node.child_by_field_name("name")
-            if name_node and analyzer.get_node_text(name_node, wrapper_bytes) == func:
-                # Skip if inside lambda or complex expression
-                if not _is_inside_lambda(node) and not _is_inside_complex_expression(node):
-                    out.append(node)
-                else:
-                    logger.debug("Skipping instrumentation of %s inside lambda or complex expression", func)
-        for child in node.children:
-            collect_target_calls(child, wrapper_bytes, func, out)
+        stack = [node]
+        while stack:
+            current = stack.pop()
+            if current.type == "method_invocation":
+                name_node = current.child_by_field_name("name")
+                if name_node and analyzer.get_node_text(name_node, wrapper_bytes) == func:
+                    if not _is_inside_lambda(current) and not _is_inside_complex_expression(current):
+                        out.append(current)
+                    else:
+                        logger.debug("Skipping instrumentation of %s inside lambda or complex expression", func)
+            if current.children:
+                stack.extend(reversed(current.children))
+
 
     def reindent_block(text: str, target_indent: str) -> str:
         lines = text.splitlines()
@@ -853,6 +860,7 @@ def build_instrumented_body(body_text: str, next_wrapper_id: int, base_indent: s
         calls: list[Any] = []
         collect_target_calls(wrapped_body, wrapper_bytes, func_name, calls)
 
+
         indent = base_indent
         inner_indent = f"{indent}    "
         inner_body_indent = f"{inner_indent}    "

From a523c9ad46c133b94cb9dd4bd6122be316e73dd7 Mon Sep 17 00:00:00 2001
From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com>
Date: Fri, 20 Feb 2026 09:29:04 +0000
Subject: [PATCH 22/30] style: auto-fix linting issues

---
 codeflash/languages/java/instrumentation.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/codeflash/languages/java/instrumentation.py b/codeflash/languages/java/instrumentation.py
index c015b08e1..bd9f8d108 100644
--- a/codeflash/languages/java/instrumentation.py
+++ b/codeflash/languages/java/instrumentation.py
@@ -763,7 +763,6 @@ def collect_target_calls(node: Any, wrapper_bytes: bytes, func: str, out: list[A
             if current.children:
                 stack.extend(reversed(current.children))
 
-
     def reindent_block(text: str, target_indent: str) -> str:
         lines = text.splitlines()
         non_empty = [line for line in lines if line.strip()]
@@ -860,7 +859,6 @@ def build_instrumented_body(body_text: str, next_wrapper_id: int, base_indent: s
         calls: list[Any] = []
         collect_target_calls(wrapped_body, wrapper_bytes, func_name, calls)
 
-
         indent = base_indent
         inner_indent = f"{indent}    "
         inner_body_indent = f"{inner_indent}    "

From 75762bd4178faa931a3403daed3dfc15f1f1e127 Mon Sep 17 00:00:00 2001
From: "codeflash-ai[bot]"
 <148906541+codeflash-ai[bot]@users.noreply.github.com>
Date: Fri, 20 Feb 2026 09:12:29 +0000
Subject: [PATCH 23/30] Optimize _is_inside_lambda
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The optimization achieves a **17% runtime improvement** (from 1.05ms to 894μs) by caching the `current.type` attribute access in a local variable (`t` or `current_type`) inside the loop. This seemingly small change reduces repeated attribute lookups on the same object during each iteration.

**What Changed:**
Instead of accessing `current.type` twice per iteration (once for each conditional check), the optimized version stores it in a local variable and reuses that value. This transforms two attribute lookups into one per iteration.

**Why This Improves Performance:**
In Python, attribute access involves dictionary lookups in the object's `__dict__`, which carries overhead. By caching the attribute value in a local variable, the code performs this lookup once per iteration instead of twice. Local variable access in Python is significantly faster than attribute access because it's a simple array index operation at the bytecode level (LOAD_FAST) versus a dictionary lookup (LOAD_ATTR).

**Key Performance Characteristics:**
The line profiler shows the optimization is particularly effective for the common case where both conditions need to be checked. The time spent on the two conditional checks decreased from 28% + 23.4% = 51.4% of total time to 22.4% + 15.3% = 37.7%, demonstrating measurable savings from the reduced attribute access overhead.

**Test Case Performance:**
- The optimization shows the most significant gains in **large-scale traversal scenarios** (1000-node chains), with 4-5% speedups in `test_long_chain_with_lambda_at_top_large_scale` and `test_long_chain_with_method_declaration_earlier_large_scale`
- Shorter chains show slight regressions (1-6% slower) in individual test cases, likely due to measurement noise and the overhead of the additional variable assignment being more noticeable in very short executions
- The overall **17% improvement** across the full workload confirms the optimization is beneficial when amortized across realistic usage patterns with varying tree depths

This optimization is particularly valuable when traversing deep AST structures, where the function may iterate many times before finding a lambda or method declaration, making the cumulative savings from reduced attribute access substantial.
---
 codeflash/languages/java/instrumentation.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/codeflash/languages/java/instrumentation.py b/codeflash/languages/java/instrumentation.py
index bd9f8d108..c40a47ed5 100644
--- a/codeflash/languages/java/instrumentation.py
+++ b/codeflash/languages/java/instrumentation.py
@@ -91,9 +91,10 @@ def _is_inside_lambda(node: Any) -> bool:
     """Check if a tree-sitter node is inside a lambda_expression."""
     current = node.parent
     while current is not None:
-        if current.type == "lambda_expression":
+        t = current.type
+        if t == "lambda_expression":
             return True
-        if current.type == "method_declaration":
+        if t == "method_declaration":
             return False
         current = current.parent
     return False

From 2c0e1d9aa3d404dee8b6c0d14eedc679050eb72e Mon Sep 17 00:00:00 2001
From: "codeflash-ai[bot]"
 <148906541+codeflash-ai[bot]@users.noreply.github.com>
Date: Fri, 20 Feb 2026 06:34:52 +0000
Subject: [PATCH 24/30] Optimize _byte_to_line_index

The main optimization here is eliminating the `max(0, idx)` call by handling the edge case directly. Since `bisect_right` returns 0 when `byte_offset` is less than all elements, subtracting 1 gives -1, which we can catch with a simple comparison. This avoids the function call overhead of `max()`.
---
 codeflash/languages/java/instrumentation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codeflash/languages/java/instrumentation.py b/codeflash/languages/java/instrumentation.py
index c40a47ed5..c87cc189f 100644
--- a/codeflash/languages/java/instrumentation.py
+++ b/codeflash/languages/java/instrumentation.py
@@ -315,7 +315,7 @@ def _collect_calls(
 def _byte_to_line_index(byte_offset: int, line_byte_starts: list[int]) -> int:
     """Map a byte offset in body_text to a body_lines index."""
     idx = bisect.bisect_right(line_byte_starts, byte_offset) - 1
-    return max(0, idx)
+    return 0 if idx < 0 else idx
 
 
 def _infer_array_cast_type(line: str) -> str | None:

From 42946015dfac07e7b676c1be5cb6722bef5764ae Mon Sep 17 00:00:00 2001
From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com>
Date: Fri, 20 Feb 2026 06:37:15 +0000
Subject: [PATCH 25/30] style: auto-fix linting issues

---
 codeflash/languages/java/instrumentation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codeflash/languages/java/instrumentation.py b/codeflash/languages/java/instrumentation.py
index c87cc189f..c339a1a0a 100644
--- a/codeflash/languages/java/instrumentation.py
+++ b/codeflash/languages/java/instrumentation.py
@@ -315,7 +315,7 @@ def _collect_calls(
 def _byte_to_line_index(byte_offset: int, line_byte_starts: list[int]) -> int:
     """Map a byte offset in body_text to a body_lines index."""
     idx = bisect.bisect_right(line_byte_starts, byte_offset) - 1
-    return 0 if idx < 0 else idx
+    return max(idx, 0)
 
 
 def _infer_array_cast_type(line: str) -> str | None:

From 06dfb96b57ec76bd1ed6c5af166ff972fb725c4d Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Fri, 20 Feb 2026 15:05:33 +0000
Subject: [PATCH 26/30] fix: implement Java process_review methods to prevent
 crash after optimization

The base class stubs for remove_test_functions_from_generated_tests() and
add_runtime_comments_to_generated_tests() return None, causing an AttributeError
crash in function_optimizer.py when iterating generated_tests.generated_tests.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 codeflash/languages/java/support.py | 47 +++++++++++++++++++++++++----
 1 file changed, 41 insertions(+), 6 deletions(-)

diff --git a/codeflash/languages/java/support.py b/codeflash/languages/java/support.py
index 5b2f55be9..f56a0dab5 100644
--- a/codeflash/languages/java/support.py
+++ b/codeflash/languages/java/support.py
@@ -202,7 +202,6 @@ def remove_test_functions(self, test_source: str, functions_to_remove: list[str]
     def remove_test_functions_from_generated_tests(
         self, generated_tests: GeneratedTestsList, functions_to_remove: list[str]
     ) -> GeneratedTestsList:
-        """Remove specific test functions from generated tests."""
         from codeflash.models.models import GeneratedTests, GeneratedTestsList
 
         updated_tests: list[GeneratedTests] = []
@@ -227,11 +226,47 @@ def add_runtime_comments_to_generated_tests(
         optimized_runtimes: dict[InvocationId, list[int]],
         tests_project_rootdir: Path | None = None,
     ) -> GeneratedTestsList:
-        """Add runtime comments to generated tests."""
-        _ = tests_project_rootdir
-        # For Java, we currently don't add runtime comments to generated tests
-        # Return the generated tests unchanged
-        return generated_tests
+        from codeflash.models.models import GeneratedTests, GeneratedTestsList
+
+        original_runtimes_dict = self._build_runtime_map(original_runtimes)
+        optimized_runtimes_dict = self._build_runtime_map(optimized_runtimes)
+
+        modified_tests: list[GeneratedTests] = []
+        for test in generated_tests.generated_tests:
+            modified_source = self.add_runtime_comments(
+                test.generated_original_test_source, original_runtimes_dict, optimized_runtimes_dict
+            )
+            modified_tests.append(
+                GeneratedTests(
+                    generated_original_test_source=modified_source,
+                    instrumented_behavior_test_source=test.instrumented_behavior_test_source,
+                    instrumented_perf_test_source=test.instrumented_perf_test_source,
+                    behavior_file_path=test.behavior_file_path,
+                    perf_file_path=test.perf_file_path,
+                )
+            )
+        return GeneratedTestsList(generated_tests=modified_tests)
+
+    def _build_runtime_map(self, inv_id_runtimes: dict[InvocationId, list[int]]) -> dict[str, int]:
+        unique_inv_ids: dict[str, int] = {}
+        for inv_id, runtimes in inv_id_runtimes.items():
+            test_qualified_name = (
+                inv_id.test_class_name + "." + inv_id.test_function_name
+                if inv_id.test_class_name
+                else inv_id.test_function_name
+            )
+            if not test_qualified_name:
+                continue
+
+            key = test_qualified_name
+            if inv_id.iteration_id:
+                parts = inv_id.iteration_id.split("_")
+                cur_invid = parts[0] if len(parts) < 3 else "_".join(parts[:-1])
+                key = key + "#" + cur_invid
+            if key not in unique_inv_ids:
+                unique_inv_ids[key] = 0
+            unique_inv_ids[key] += min(runtimes)
+        return unique_inv_ids
 
     # === Test Result Comparison ===
 

From f06acba3549496cebce8910636aef60fcf0e8a7e Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Fri, 20 Feb 2026 15:17:33 +0000
Subject: [PATCH 27/30] fix: add test method name to Java stdout markers for
 unique identification

Java stdout markers now include the test method name in the class field
(e.g., "TestClass.testMethod") matching the Python marker format. The
parser extracts the test method name from this combined field.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 codeflash/languages/java/instrumentation.py   |  25 +--
 codeflash/verification/parse_test_output.py   |  19 ++-
 .../test_java/test_instrumentation.py         | 144 ++++++++++--------
 3 files changed, 107 insertions(+), 81 deletions(-)

diff --git a/codeflash/languages/java/instrumentation.py b/codeflash/languages/java/instrumentation.py
index c339a1a0a..a59ed48a4 100644
--- a/codeflash/languages/java/instrumentation.py
+++ b/codeflash/languages/java/instrumentation.py
@@ -6,8 +6,8 @@
 
 Timing instrumentation adds System.nanoTime() calls around the function being tested
 and prints timing markers in a format compatible with Python/JS implementations:
-  Start: !$######testModule:testClass:funcName:loopIndex:iterationId######$!
-  End:   !######testModule:testClass:funcName:loopIndex:iterationId:durationNs######!
+  Start: !$######testModule:testClass.testMethod:funcName:loopIndex:iterationId######$!
+  End:   !######testModule:testClass.testMethod:funcName:loopIndex:iterationId:durationNs######!
 
 This allows codeflash to extract timing data from stdout for accurate benchmarking.
 """
@@ -625,7 +625,7 @@ def _add_behavior_instrumentation(source: str, class_name: str, func_name: str)
                 f'{indent}String _cf_testIteration{iter_id} = System.getenv("CODEFLASH_TEST_ITERATION");',
                 f'{indent}if (_cf_testIteration{iter_id} == null) _cf_testIteration{iter_id} = "0";',
                 f'{indent}String _cf_test{iter_id} = "{test_method_name}";',
-                f'{indent}System.out.println("!$######" + _cf_mod{iter_id} + ":" + _cf_cls{iter_id} + ":" + _cf_fn{iter_id} + ":" + _cf_loop{iter_id} + ":" + _cf_iter{iter_id} + "######$!");',
+                f'{indent}System.out.println("!$######" + _cf_mod{iter_id} + ":" + _cf_cls{iter_id} + "." + _cf_test{iter_id} + ":" + _cf_fn{iter_id} + ":" + _cf_loop{iter_id} + ":" + _cf_iter{iter_id} + "######$!");',
                 f"{indent}byte[] _cf_serializedResult{iter_id} = null;",
                 f"{indent}long _cf_end{iter_id} = -1;",
                 f"{indent}long _cf_start{iter_id} = 0;",
@@ -646,7 +646,7 @@ def _add_behavior_instrumentation(source: str, class_name: str, func_name: str)
                 f"{indent}}} finally {{",
                 f"{indent}    long _cf_end{iter_id}_finally = System.nanoTime();",
                 f"{indent}    long _cf_dur{iter_id} = (_cf_end{iter_id} != -1 ? _cf_end{iter_id} : _cf_end{iter_id}_finally) - _cf_start{iter_id};",
-                f'{indent}    System.out.println("!######" + _cf_mod{iter_id} + ":" + _cf_cls{iter_id} + ":" + _cf_fn{iter_id} + ":" + _cf_loop{iter_id} + ":" + _cf_iter{iter_id} + ":" + _cf_dur{iter_id} + "######!");',
+                f'{indent}    System.out.println("!######" + _cf_mod{iter_id} + ":" + _cf_cls{iter_id} + "." + _cf_test{iter_id} + ":" + _cf_fn{iter_id} + ":" + _cf_loop{iter_id} + ":" + _cf_iter{iter_id} + ":" + _cf_dur{iter_id} + "######!");',
                 f"{indent}    // Write to SQLite if output file is set",
                 f"{indent}    if (_cf_outputFile{iter_id} != null && !_cf_outputFile{iter_id}.isEmpty()) {{",
                 f"{indent}        try {{",
@@ -840,7 +840,7 @@ def split_var_declaration(stmt_node: Any, source_bytes_ref: bytes) -> tuple[str,
         assignment = f"{name_text} = {value_text};"
         return hoisted, assignment
 
-    def build_instrumented_body(body_text: str, next_wrapper_id: int, base_indent: str) -> tuple[str, int]:
+    def build_instrumented_body(body_text: str, next_wrapper_id: int, base_indent: str, test_method_name: str = "unknown") -> tuple[str, int]:
         body_bytes = body_text.encode("utf8")
         wrapper_bytes = _TS_BODY_PREFIX_BYTES + body_bytes + _TS_BODY_SUFFIX.encode("utf8")
         wrapper_tree = analyzer.parse(wrapper_bytes)
@@ -909,6 +909,7 @@ def build_instrumented_body(body_text: str, next_wrapper_id: int, base_indent: s
                 f'{indent}int _cf_innerIterations{current_id} = Integer.parseInt(System.getenv().getOrDefault("CODEFLASH_INNER_ITERATIONS", "100"));',
                 f'{indent}String _cf_mod{current_id} = "{class_name}";',
                 f'{indent}String _cf_cls{current_id} = "{class_name}";',
+                f'{indent}String _cf_test{current_id} = "{test_method_name}";',
                 f'{indent}String _cf_fn{current_id} = "{func_name}";',
                 "",
             ]
@@ -925,7 +926,7 @@ def build_instrumented_body(body_text: str, next_wrapper_id: int, base_indent: s
                 stmt_in_try = reindent_block(target_stmt, inner_body_indent)
             timing_lines = [
                 f"{indent}for (int _cf_i{current_id} = 0; _cf_i{current_id} < _cf_innerIterations{current_id}; _cf_i{current_id}++) {{",
-                f'{inner_indent}System.out.println("!$######" + _cf_mod{current_id} + ":" + _cf_cls{current_id} + ":" + _cf_fn{current_id} + ":" + _cf_loop{current_id} + ":" + _cf_i{current_id} + "######$!");',
+                f'{inner_indent}System.out.println("!$######" + _cf_mod{current_id} + ":" + _cf_cls{current_id} + "." + _cf_test{current_id} + ":" + _cf_fn{current_id} + ":" + _cf_loop{current_id} + ":" + _cf_i{current_id} + "######$!");',
                 f"{inner_indent}long _cf_end{current_id} = -1;",
                 f"{inner_indent}long _cf_start{current_id} = 0;",
                 f"{inner_indent}try {{",
@@ -935,7 +936,7 @@ def build_instrumented_body(body_text: str, next_wrapper_id: int, base_indent: s
                 f"{inner_indent}}} finally {{",
                 f"{inner_body_indent}long _cf_end{current_id}_finally = System.nanoTime();",
                 f"{inner_body_indent}long _cf_dur{current_id} = (_cf_end{current_id} != -1 ? _cf_end{current_id} : _cf_end{current_id}_finally) - _cf_start{current_id};",
-                f'{inner_body_indent}System.out.println("!######" + _cf_mod{current_id} + ":" + _cf_cls{current_id} + ":" + _cf_fn{current_id} + ":" + _cf_loop{current_id} + ":" + _cf_i{current_id} + ":" + _cf_dur{current_id} + "######!");',
+                f'{inner_body_indent}System.out.println("!######" + _cf_mod{current_id} + ":" + _cf_cls{current_id} + "." + _cf_test{current_id} + ":" + _cf_fn{current_id} + ":" + _cf_loop{current_id} + ":" + _cf_i{current_id} + ":" + _cf_dur{current_id} + "######!");',
                 f"{inner_indent}}}",
                 f"{indent}}}",
             ]
@@ -974,6 +975,7 @@ def build_instrumented_body(body_text: str, next_wrapper_id: int, base_indent: s
                 f'{indent}int _cf_innerIterations{current_id} = Integer.parseInt(System.getenv().getOrDefault("CODEFLASH_INNER_ITERATIONS", "100"));',
                 f'{indent}String _cf_mod{current_id} = "{class_name}";',
                 f'{indent}String _cf_cls{current_id} = "{class_name}";',
+                f'{indent}String _cf_test{current_id} = "{test_method_name}";',
                 f'{indent}String _cf_fn{current_id} = "{func_name}";',
                 "",
             ]
@@ -990,7 +992,7 @@ def build_instrumented_body(body_text: str, next_wrapper_id: int, base_indent: s
 
             timing_lines = [
                 f"{indent}for (int _cf_i{current_id} = 0; _cf_i{current_id} < _cf_innerIterations{current_id}; _cf_i{current_id}++) {{",
-                f'{inner_indent}System.out.println("!$######" + _cf_mod{current_id} + ":" + _cf_cls{current_id} + ":" + _cf_fn{current_id} + ":" + _cf_loop{current_id} + ":" + {iteration_id_expr} + "######$!");',
+                f'{inner_indent}System.out.println("!$######" + _cf_mod{current_id} + ":" + _cf_cls{current_id} + "." + _cf_test{current_id} + ":" + _cf_fn{current_id} + ":" + _cf_loop{current_id} + ":" + {iteration_id_expr} + "######$!");',
                 f"{inner_indent}long _cf_end{current_id} = -1;",
                 f"{inner_indent}long _cf_start{current_id} = 0;",
                 f"{inner_indent}try {{",
@@ -1000,7 +1002,7 @@ def build_instrumented_body(body_text: str, next_wrapper_id: int, base_indent: s
                 f"{inner_indent}}} finally {{",
                 f"{inner_body_indent}long _cf_end{current_id}_finally = System.nanoTime();",
                 f"{inner_body_indent}long _cf_dur{current_id} = (_cf_end{current_id} != -1 ? _cf_end{current_id} : _cf_end{current_id}_finally) - _cf_start{current_id};",
-                f'{inner_body_indent}System.out.println("!######" + _cf_mod{current_id} + ":" + _cf_cls{current_id} + ":" + _cf_fn{current_id} + ":" + _cf_loop{current_id} + ":" + {iteration_id_expr} + ":" + _cf_dur{current_id} + "######!");',
+                f'{inner_body_indent}System.out.println("!######" + _cf_mod{current_id} + ":" + _cf_cls{current_id} + "." + _cf_test{current_id} + ":" + _cf_fn{current_id} + ":" + _cf_loop{current_id} + ":" + {iteration_id_expr} + ":" + _cf_dur{current_id} + "######!");',
                 f"{inner_indent}}}",
                 f"{indent}}}",
             ]
@@ -1024,8 +1026,11 @@ def build_instrumented_body(body_text: str, next_wrapper_id: int, base_indent: s
         body_end = body_node.end_byte - 1  # skip '}'
         body_text = source_bytes[body_start:body_end].decode("utf8")
         base_indent = " " * (method_node.start_point[1] + 4)
+        # Extract test method name from AST
+        name_node = method_node.child_by_field_name("name")
+        test_method_name = analyzer.get_node_text(name_node, source_bytes) if name_node else "unknown"
         next_wrapper_id = max(wrapper_id, method_ordinal - 1)
-        new_body, new_wrapper_id = build_instrumented_body(body_text, next_wrapper_id, base_indent)
+        new_body, new_wrapper_id = build_instrumented_body(body_text, next_wrapper_id, base_indent, test_method_name)
         # Reserve one id slot per @Test method even when no instrumentation is added,
         # matching existing deterministic numbering expected by tests.
         wrapper_id = method_ordinal if new_wrapper_id == next_wrapper_id else new_wrapper_id
diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py
index a662cd2e6..30df53498 100644
--- a/codeflash/verification/parse_test_output.py
+++ b/codeflash/verification/parse_test_output.py
@@ -816,14 +816,14 @@ def parse_test_xml(
             sys_stdout = testcase.system_out or ""
 
             # Use different patterns for Java (5-field start, 6-field end) vs Python (6-field both)
-            # Java format: !$######module:class:func:loop:iter######$! (start)
-            #              !######module:class:func:loop:iter:duration######! (end)
+            # Java format: !$######module:class.test:func:loop:iter######$! (start)
+            #              !######module:class.test:func:loop:iter:duration######! (end)
             if is_java():
                 begin_matches = list(start_pattern.finditer(sys_stdout))
                 end_matches = {}
                 for match in end_pattern.finditer(sys_stdout):
                     groups = match.groups()
-                    # Key is first 5 groups (module, class, func, loop, iter)
+                    # Key is first 5 groups (module, class.test, func, loop, iter)
                     end_matches[groups[:5]] = match
 
                 # For Java: fallback to pre-parsed subprocess stdout when XML system-out has no timing markers
@@ -884,17 +884,22 @@ def parse_test_xml(
                     groups = match.groups()
 
                     # Java and Python have different marker formats:
-                    # Java:   5 groups - (module, class, func, loop_index, iteration_id)
+                    # Java:   5 groups - (module, class.test, func, loop_index, iteration_id)
                     # Python: 6 groups - (module, class.test, _, func, loop_index, iteration_id)
                     if is_java():
-                        # Java format: !$######module:class:func:loop:iter######$!
+                        # Java format: !$######module:class.test:func:loop:iter######$!
                         end_key = groups[:5]  # Use all 5 groups as key
                         end_match = end_matches.get(end_key)
                         iteration_id = groups[4]  # iter is at index 4
                         loop_idx = int(groups[3])  # loop is at index 3
                         test_module = groups[0]  # module
-                        test_class_str = groups[1]  # class
-                        test_func = test_function  # Use the testcase name from XML
+                        # groups[1] is "class.testMethod" — extract class and test name
+                        class_test_field = groups[1]
+                        if "." in class_test_field:
+                            test_class_str, test_func = class_test_field.rsplit(".", 1)
+                        else:
+                            test_class_str = class_test_field
+                            test_func = test_function  # Fallback to testcase name from XML
                         func_getting_tested = groups[2]  # func being tested
                         runtime = None
 
diff --git a/tests/test_languages/test_java/test_instrumentation.py b/tests/test_languages/test_java/test_instrumentation.py
index 64f161e73..588f803a3 100644
--- a/tests/test_languages/test_java/test_instrumentation.py
+++ b/tests/test_languages/test_java/test_instrumentation.py
@@ -146,7 +146,7 @@ def test_instrument_behavior_mode_simple(self, tmp_path: Path):
         String _cf_testIteration1 = System.getenv("CODEFLASH_TEST_ITERATION");
         if (_cf_testIteration1 == null) _cf_testIteration1 = "0";
         String _cf_test1 = "testAdd";
-        System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + "######$!");
+        System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + "######$!");
         byte[] _cf_serializedResult1 = null;
         long _cf_end1 = -1;
         long _cf_start1 = 0;
@@ -160,7 +160,7 @@ def test_instrument_behavior_mode_simple(self, tmp_path: Path):
         } finally {
             long _cf_end1_finally = System.nanoTime();
             long _cf_dur1 = (_cf_end1 != -1 ? _cf_end1 : _cf_end1_finally) - _cf_start1;
-            System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + ":" + _cf_dur1 + "######!");
+            System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + ":" + _cf_dur1 + "######!");
             // Write to SQLite if output file is set
             if (_cf_outputFile1 != null && !_cf_outputFile1.isEmpty()) {
                 try {
@@ -258,7 +258,7 @@ def test_instrument_behavior_mode_assert_throws_expression_lambda(self, tmp_path
         String _cf_testIteration1 = System.getenv("CODEFLASH_TEST_ITERATION");
         if (_cf_testIteration1 == null) _cf_testIteration1 = "0";
         String _cf_test1 = "testNegativeInput_ThrowsIllegalArgumentException";
-        System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + "######$!");
+        System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + "######$!");
         byte[] _cf_serializedResult1 = null;
         long _cf_end1 = -1;
         long _cf_start1 = 0;
@@ -267,7 +267,7 @@ def test_instrument_behavior_mode_assert_throws_expression_lambda(self, tmp_path
         } finally {
             long _cf_end1_finally = System.nanoTime();
             long _cf_dur1 = (_cf_end1 != -1 ? _cf_end1 : _cf_end1_finally) - _cf_start1;
-            System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + ":" + _cf_dur1 + "######!");
+            System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + ":" + _cf_dur1 + "######!");
             // Write to SQLite if output file is set
             if (_cf_outputFile1 != null && !_cf_outputFile1.isEmpty()) {
                 try {
@@ -312,7 +312,7 @@ def test_instrument_behavior_mode_assert_throws_expression_lambda(self, tmp_path
         String _cf_testIteration2 = System.getenv("CODEFLASH_TEST_ITERATION");
         if (_cf_testIteration2 == null) _cf_testIteration2 = "0";
         String _cf_test2 = "testZeroInput_ReturnsZero";
-        System.out.println("!$######" + _cf_mod2 + ":" + _cf_cls2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + _cf_iter2 + "######$!");
+        System.out.println("!$######" + _cf_mod2 + ":" + _cf_cls2 + "." + _cf_test2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + _cf_iter2 + "######$!");
         byte[] _cf_serializedResult2 = null;
         long _cf_end2 = -1;
         long _cf_start2 = 0;
@@ -325,7 +325,7 @@ def test_instrument_behavior_mode_assert_throws_expression_lambda(self, tmp_path
         } finally {
             long _cf_end2_finally = System.nanoTime();
             long _cf_dur2 = (_cf_end2 != -1 ? _cf_end2 : _cf_end2_finally) - _cf_start2;
-            System.out.println("!######" + _cf_mod2 + ":" + _cf_cls2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + _cf_iter2 + ":" + _cf_dur2 + "######!");
+            System.out.println("!######" + _cf_mod2 + ":" + _cf_cls2 + "." + _cf_test2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + _cf_iter2 + ":" + _cf_dur2 + "######!");
             // Write to SQLite if output file is set
             if (_cf_outputFile2 != null && !_cf_outputFile2.isEmpty()) {
                 try {
@@ -424,7 +424,7 @@ def test_instrument_behavior_mode_assert_throws_block_lambda(self, tmp_path: Pat
         String _cf_testIteration1 = System.getenv("CODEFLASH_TEST_ITERATION");
         if (_cf_testIteration1 == null) _cf_testIteration1 = "0";
         String _cf_test1 = "testNegativeInput_ThrowsIllegalArgumentException";
-        System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + "######$!");
+        System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + "######$!");
         byte[] _cf_serializedResult1 = null;
         long _cf_end1 = -1;
         long _cf_start1 = 0;
@@ -435,7 +435,7 @@ def test_instrument_behavior_mode_assert_throws_block_lambda(self, tmp_path: Pat
         } finally {
             long _cf_end1_finally = System.nanoTime();
             long _cf_dur1 = (_cf_end1 != -1 ? _cf_end1 : _cf_end1_finally) - _cf_start1;
-            System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + ":" + _cf_dur1 + "######!");
+            System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + ":" + _cf_dur1 + "######!");
             // Write to SQLite if output file is set
             if (_cf_outputFile1 != null && !_cf_outputFile1.isEmpty()) {
                 try {
@@ -480,7 +480,7 @@ def test_instrument_behavior_mode_assert_throws_block_lambda(self, tmp_path: Pat
         String _cf_testIteration2 = System.getenv("CODEFLASH_TEST_ITERATION");
         if (_cf_testIteration2 == null) _cf_testIteration2 = "0";
         String _cf_test2 = "testZeroInput_ReturnsZero";
-        System.out.println("!$######" + _cf_mod2 + ":" + _cf_cls2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + _cf_iter2 + "######$!");
+        System.out.println("!$######" + _cf_mod2 + ":" + _cf_cls2 + "." + _cf_test2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + _cf_iter2 + "######$!");
         byte[] _cf_serializedResult2 = null;
         long _cf_end2 = -1;
         long _cf_start2 = 0;
@@ -493,7 +493,7 @@ def test_instrument_behavior_mode_assert_throws_block_lambda(self, tmp_path: Pat
         } finally {
             long _cf_end2_finally = System.nanoTime();
             long _cf_dur2 = (_cf_end2 != -1 ? _cf_end2 : _cf_end2_finally) - _cf_start2;
-            System.out.println("!######" + _cf_mod2 + ":" + _cf_cls2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + _cf_iter2 + ":" + _cf_dur2 + "######!");
+            System.out.println("!######" + _cf_mod2 + ":" + _cf_cls2 + "." + _cf_test2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + _cf_iter2 + ":" + _cf_dur2 + "######!");
             // Write to SQLite if output file is set
             if (_cf_outputFile2 != null && !_cf_outputFile2.isEmpty()) {
                 try {
@@ -572,11 +572,12 @@ def test_instrument_performance_mode_simple(self, tmp_path: Path):
         int _cf_innerIterations1 = Integer.parseInt(System.getenv().getOrDefault("CODEFLASH_INNER_ITERATIONS", "100"));
         String _cf_mod1 = "CalculatorTest";
         String _cf_cls1 = "CalculatorTest";
+        String _cf_test1 = "testAdd";
         String _cf_fn1 = "add";
         
         Calculator calc = new Calculator();
         for (int _cf_i1 = 0; _cf_i1 < _cf_innerIterations1; _cf_i1++) {
-            System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + "######$!");
+            System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + "######$!");
             long _cf_end1 = -1;
             long _cf_start1 = 0;
             try {
@@ -586,7 +587,7 @@ def test_instrument_performance_mode_simple(self, tmp_path: Path):
             } finally {
                 long _cf_end1_finally = System.nanoTime();
                 long _cf_dur1 = (_cf_end1 != -1 ? _cf_end1 : _cf_end1_finally) - _cf_start1;
-                System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + ":" + _cf_dur1 + "######!");
+                System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + ":" + _cf_dur1 + "######!");
             }
         }
     }
@@ -641,10 +642,11 @@ def test_instrument_performance_mode_multiple_tests(self, tmp_path: Path):
         int _cf_innerIterations1 = Integer.parseInt(System.getenv().getOrDefault("CODEFLASH_INNER_ITERATIONS", "100"));
         String _cf_mod1 = "MathTest";
         String _cf_cls1 = "MathTest";
+        String _cf_test1 = "testAdd";
         String _cf_fn1 = "add";
 
         for (int _cf_i1 = 0; _cf_i1 < _cf_innerIterations1; _cf_i1++) {
-            System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + "######$!");
+            System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + "######$!");
             long _cf_end1 = -1;
             long _cf_start1 = 0;
             try {
@@ -654,7 +656,7 @@ def test_instrument_performance_mode_multiple_tests(self, tmp_path: Path):
             } finally {
                 long _cf_end1_finally = System.nanoTime();
                 long _cf_dur1 = (_cf_end1 != -1 ? _cf_end1 : _cf_end1_finally) - _cf_start1;
-                System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + ":" + _cf_dur1 + "######!");
+                System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + ":" + _cf_dur1 + "######!");
             }
         }
     }
@@ -666,10 +668,11 @@ def test_instrument_performance_mode_multiple_tests(self, tmp_path: Path):
         int _cf_innerIterations2 = Integer.parseInt(System.getenv().getOrDefault("CODEFLASH_INNER_ITERATIONS", "100"));
         String _cf_mod2 = "MathTest";
         String _cf_cls2 = "MathTest";
+        String _cf_test2 = "testSubtract";
         String _cf_fn2 = "add";
 
         for (int _cf_i2 = 0; _cf_i2 < _cf_innerIterations2; _cf_i2++) {
-            System.out.println("!$######" + _cf_mod2 + ":" + _cf_cls2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + _cf_i2 + "######$!");
+            System.out.println("!$######" + _cf_mod2 + ":" + _cf_cls2 + "." + _cf_test2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + _cf_i2 + "######$!");
             long _cf_end2 = -1;
             long _cf_start2 = 0;
             try {
@@ -679,7 +682,7 @@ def test_instrument_performance_mode_multiple_tests(self, tmp_path: Path):
             } finally {
                 long _cf_end2_finally = System.nanoTime();
                 long _cf_dur2 = (_cf_end2 != -1 ? _cf_end2 : _cf_end2_finally) - _cf_start2;
-                System.out.println("!######" + _cf_mod2 + ":" + _cf_cls2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + _cf_i2 + ":" + _cf_dur2 + "######!");
+                System.out.println("!######" + _cf_mod2 + ":" + _cf_cls2 + "." + _cf_test2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + _cf_i2 + ":" + _cf_dur2 + "######!");
             }
         }
     }
@@ -741,10 +744,11 @@ def test_instrument_preserves_annotations(self, tmp_path: Path):
         int _cf_innerIterations1 = Integer.parseInt(System.getenv().getOrDefault("CODEFLASH_INNER_ITERATIONS", "100"));
         String _cf_mod1 = "ServiceTest";
         String _cf_cls1 = "ServiceTest";
+        String _cf_test1 = "testService";
         String _cf_fn1 = "call";
 
         for (int _cf_i1 = 0; _cf_i1 < _cf_innerIterations1; _cf_i1++) {
-            System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + "######$!");
+            System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + "######$!");
             long _cf_end1 = -1;
             long _cf_start1 = 0;
             try {
@@ -754,7 +758,7 @@ def test_instrument_preserves_annotations(self, tmp_path: Path):
             } finally {
                 long _cf_end1_finally = System.nanoTime();
                 long _cf_dur1 = (_cf_end1 != -1 ? _cf_end1 : _cf_end1_finally) - _cf_start1;
-                System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + ":" + _cf_dur1 + "######!");
+                System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + ":" + _cf_dur1 + "######!");
             }
         }
     }
@@ -822,7 +826,7 @@ class TestKryoSerializerUsage:
         String _cf_testIteration1 = System.getenv("CODEFLASH_TEST_ITERATION");
         if (_cf_testIteration1 == null) _cf_testIteration1 = "0";
         String _cf_test1 = "testFoo";
-        System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + "######$!");
+        System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + "######$!");
         byte[] _cf_serializedResult1 = null;
         long _cf_end1 = -1;
         long _cf_start1 = 0;
@@ -834,7 +838,7 @@ class TestKryoSerializerUsage:
         } finally {
             long _cf_end1_finally = System.nanoTime();
             long _cf_dur1 = (_cf_end1 != -1 ? _cf_end1 : _cf_end1_finally) - _cf_start1;
-            System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + ":" + _cf_dur1 + "######!");
+            System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + ":" + _cf_dur1 + "######!");
             // Write to SQLite if output file is set
             if (_cf_outputFile1 != null && !_cf_outputFile1.isEmpty()) {
                 try {
@@ -879,10 +883,11 @@ class TestKryoSerializerUsage:
         int _cf_innerIterations1 = Integer.parseInt(System.getenv().getOrDefault("CODEFLASH_INNER_ITERATIONS", "100"));
         String _cf_mod1 = "MyTest";
         String _cf_cls1 = "MyTest";
+        String _cf_test1 = "testFoo";
         String _cf_fn1 = "foo";
 
         for (int _cf_i1 = 0; _cf_i1 < _cf_innerIterations1; _cf_i1++) {
-            System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + "######$!");
+            System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + "######$!");
             long _cf_end1 = -1;
             long _cf_start1 = 0;
             try {
@@ -892,7 +897,7 @@ class TestKryoSerializerUsage:
             } finally {
                 long _cf_end1_finally = System.nanoTime();
                 long _cf_dur1 = (_cf_end1 != -1 ? _cf_end1 : _cf_end1_finally) - _cf_start1;
-                System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + ":" + _cf_dur1 + "######!");
+                System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + ":" + _cf_dur1 + "######!");
             }
         }
     }
@@ -952,10 +957,11 @@ def test_single_test_method(self):
         int _cf_innerIterations1 = Integer.parseInt(System.getenv().getOrDefault("CODEFLASH_INNER_ITERATIONS", "100"));
         String _cf_mod1 = "SimpleTest";
         String _cf_cls1 = "SimpleTest";
+        String _cf_test1 = "testSomething";
         String _cf_fn1 = "doSomething";
 
         for (int _cf_i1 = 0; _cf_i1 < _cf_innerIterations1; _cf_i1++) {
-            System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + "######$!");
+            System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + "######$!");
             long _cf_end1 = -1;
             long _cf_start1 = 0;
             try {
@@ -965,7 +971,7 @@ def test_single_test_method(self):
             } finally {
                 long _cf_end1_finally = System.nanoTime();
                 long _cf_dur1 = (_cf_end1 != -1 ? _cf_end1 : _cf_end1_finally) - _cf_start1;
-                System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + ":" + _cf_dur1 + "######!");
+                System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + ":" + _cf_dur1 + "######!");
             }
         }
     }
@@ -998,10 +1004,11 @@ def test_multiple_test_methods(self):
         int _cf_innerIterations1 = Integer.parseInt(System.getenv().getOrDefault("CODEFLASH_INNER_ITERATIONS", "100"));
         String _cf_mod1 = "MultiTest";
         String _cf_cls1 = "MultiTest";
+        String _cf_test1 = "testFirst";
         String _cf_fn1 = "func";
 
         for (int _cf_i1 = 0; _cf_i1 < _cf_innerIterations1; _cf_i1++) {
-            System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + "######$!");
+            System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + "######$!");
             long _cf_end1 = -1;
             long _cf_start1 = 0;
             try {
@@ -1011,7 +1018,7 @@ def test_multiple_test_methods(self):
             } finally {
                 long _cf_end1_finally = System.nanoTime();
                 long _cf_dur1 = (_cf_end1 != -1 ? _cf_end1 : _cf_end1_finally) - _cf_start1;
-                System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + ":" + _cf_dur1 + "######!");
+                System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + ":" + _cf_dur1 + "######!");
             }
         }
     }
@@ -1023,11 +1030,12 @@ def test_multiple_test_methods(self):
         int _cf_innerIterations2 = Integer.parseInt(System.getenv().getOrDefault("CODEFLASH_INNER_ITERATIONS", "100"));
         String _cf_mod2 = "MultiTest";
         String _cf_cls2 = "MultiTest";
+        String _cf_test2 = "testSecond";
         String _cf_fn2 = "func";
         
         second();
         for (int _cf_i2 = 0; _cf_i2 < _cf_innerIterations2; _cf_i2++) {
-            System.out.println("!$######" + _cf_mod2 + ":" + _cf_cls2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + _cf_i2 + "######$!");
+            System.out.println("!$######" + _cf_mod2 + ":" + _cf_cls2 + "." + _cf_test2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + _cf_i2 + "######$!");
             long _cf_end2 = -1;
             long _cf_start2 = 0;
             try {
@@ -1037,7 +1045,7 @@ def test_multiple_test_methods(self):
             } finally {
                 long _cf_end2_finally = System.nanoTime();
                 long _cf_dur2 = (_cf_end2 != -1 ? _cf_end2 : _cf_end2_finally) - _cf_start2;
-                System.out.println("!######" + _cf_mod2 + ":" + _cf_cls2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + _cf_i2 + ":" + _cf_dur2 + "######!");
+                System.out.println("!######" + _cf_mod2 + ":" + _cf_cls2 + "." + _cf_test2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + _cf_i2 + ":" + _cf_dur2 + "######!");
             }
         }
     }
@@ -1084,9 +1092,10 @@ def test_multiple_target_calls_in_single_test_method(self):
         int _cf_innerIterations1 = Integer.parseInt(System.getenv().getOrDefault("CODEFLASH_INNER_ITERATIONS", "100"));
         String _cf_mod1 = "RepeatTest";
         String _cf_cls1 = "RepeatTest";
+        String _cf_test1 = "testRepeat";
         String _cf_fn1 = "target";
         for (int _cf_i1 = 0; _cf_i1 < _cf_innerIterations1; _cf_i1++) {
-            System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + "1_" + _cf_i1 + "######$!");
+            System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + "1_" + _cf_i1 + "######$!");
             long _cf_end1 = -1;
             long _cf_start1 = 0;
             try {
@@ -1096,7 +1105,7 @@ def test_multiple_target_calls_in_single_test_method(self):
             } finally {
                 long _cf_end1_finally = System.nanoTime();
                 long _cf_dur1 = (_cf_end1 != -1 ? _cf_end1 : _cf_end1_finally) - _cf_start1;
-                System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + "1_" + _cf_i1 + ":" + _cf_dur1 + "######!");
+                System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + "1_" + _cf_i1 + ":" + _cf_dur1 + "######!");
             }
         }
         helper();
@@ -1106,9 +1115,10 @@ def test_multiple_target_calls_in_single_test_method(self):
         int _cf_innerIterations2 = Integer.parseInt(System.getenv().getOrDefault("CODEFLASH_INNER_ITERATIONS", "100"));
         String _cf_mod2 = "RepeatTest";
         String _cf_cls2 = "RepeatTest";
+        String _cf_test2 = "testRepeat";
         String _cf_fn2 = "target";
         for (int _cf_i2 = 0; _cf_i2 < _cf_innerIterations2; _cf_i2++) {
-            System.out.println("!$######" + _cf_mod2 + ":" + _cf_cls2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + "2_" + _cf_i2 + "######$!");
+            System.out.println("!$######" + _cf_mod2 + ":" + _cf_cls2 + "." + _cf_test2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + "2_" + _cf_i2 + "######$!");
             long _cf_end2 = -1;
             long _cf_start2 = 0;
             try {
@@ -1118,7 +1128,7 @@ def test_multiple_target_calls_in_single_test_method(self):
             } finally {
                 long _cf_end2_finally = System.nanoTime();
                 long _cf_dur2 = (_cf_end2 != -1 ? _cf_end2 : _cf_end2_finally) - _cf_start2;
-                System.out.println("!######" + _cf_mod2 + ":" + _cf_cls2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + "2_" + _cf_i2 + ":" + _cf_dur2 + "######!");
+                System.out.println("!######" + _cf_mod2 + ":" + _cf_cls2 + "." + _cf_test2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + "2_" + _cf_i2 + ":" + _cf_dur2 + "######!");
             }
         }
         teardown();
@@ -1324,7 +1334,7 @@ def test_instrument_generated_test_behavior_mode(self):
         String _cf_testIteration1 = System.getenv("CODEFLASH_TEST_ITERATION");
         if (_cf_testIteration1 == null) _cf_testIteration1 = "0";
         String _cf_test1 = "testAdd";
-        System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + "######$!");
+        System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + "######$!");
         byte[] _cf_serializedResult1 = null;
         long _cf_end1 = -1;
         long _cf_start1 = 0;
@@ -1337,7 +1347,7 @@ def test_instrument_generated_test_behavior_mode(self):
         } finally {
             long _cf_end1_finally = System.nanoTime();
             long _cf_dur1 = (_cf_end1 != -1 ? _cf_end1 : _cf_end1_finally) - _cf_start1;
-            System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + ":" + _cf_dur1 + "######!");
+            System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + ":" + _cf_dur1 + "######!");
             // Write to SQLite if output file is set
             if (_cf_outputFile1 != null && !_cf_outputFile1.isEmpty()) {
                 try {
@@ -1411,10 +1421,11 @@ def test_instrument_generated_test_performance_mode(self):
         int _cf_innerIterations1 = Integer.parseInt(System.getenv().getOrDefault("CODEFLASH_INNER_ITERATIONS", "100"));
         String _cf_mod1 = "GeneratedTest";
         String _cf_cls1 = "GeneratedTest";
+        String _cf_test1 = "testMethod";
         String _cf_fn1 = "method";
 
         for (int _cf_i1 = 0; _cf_i1 < _cf_innerIterations1; _cf_i1++) {
-            System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + "######$!");
+            System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + "######$!");
             long _cf_end1 = -1;
             long _cf_start1 = 0;
             try {
@@ -1424,7 +1435,7 @@ def test_instrument_generated_test_performance_mode(self):
             } finally {
                 long _cf_end1_finally = System.nanoTime();
                 long _cf_dur1 = (_cf_end1 != -1 ? _cf_end1 : _cf_end1_finally) - _cf_start1;
-                System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + ":" + _cf_dur1 + "######!");
+                System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + ":" + _cf_dur1 + "######!");
             }
         }
     }
@@ -1440,9 +1451,9 @@ def test_timing_markers_can_be_parsed(self):
         """Test that generated timing markers can be parsed with the standard regex."""
         # Simulate stdout from instrumented test
         stdout = """
-!$######TestModule:TestClass:targetFunc:1:1######$!
+!$######TestModule:TestClass.testMethod:targetFunc:1:1######$!
 Running test...
-!######TestModule:TestClass:targetFunc:1:1:12345678######!
+!######TestModule:TestClass.testMethod:targetFunc:1:1:12345678######!
 """
         # Use the same regex patterns from parse_test_output.py
         start_pattern = re.compile(r"!\$######([^:]*):([^:]*):([^:]*):([^:]*):([^:]+)######\$!")
@@ -1457,14 +1468,14 @@ def test_timing_markers_can_be_parsed(self):
         # Verify parsed values
         start = start_matches[0]
         assert start[0] == "TestModule"
-        assert start[1] == "TestClass"
+        assert start[1] == "TestClass.testMethod"
         assert start[2] == "targetFunc"
         assert start[3] == "1"
         assert start[4] == "1"
 
         end = end_matches[0]
         assert end[0] == "TestModule"
-        assert end[1] == "TestClass"
+        assert end[1] == "TestClass.testMethod"
         assert end[2] == "targetFunc"
         assert end[3] == "1"
         assert end[4] == "1"
@@ -1473,15 +1484,15 @@ def test_timing_markers_can_be_parsed(self):
     def test_multiple_timing_markers(self):
         """Test parsing multiple timing markers."""
         stdout = """
-!$######Module:Class:func:1:1######$!
+!$######Module:Class.testMethod:func:1:1######$!
 test 1
-!######Module:Class:func:1:1:100000######!
-!$######Module:Class:func:2:1######$!
+!######Module:Class.testMethod:func:1:1:100000######!
+!$######Module:Class.testMethod:func:2:1######$!
 test 2
-!######Module:Class:func:2:1:200000######!
-!$######Module:Class:func:3:1######$!
+!######Module:Class.testMethod:func:2:1:200000######!
+!$######Module:Class.testMethod:func:3:1######$!
 test 3
-!######Module:Class:func:3:1:150000######!
+!######Module:Class.testMethod:func:3:1:150000######!
 """
         end_pattern = re.compile(r"!######([^:]*):([^:]*):([^:]*):([^:]*):([^:]+):([^:]+)######!")
         end_matches = end_pattern.findall(stdout)
@@ -1499,15 +1510,15 @@ def test_inner_loop_timing_markers(self):
         """
         # Simulate stdout from 3 inner iterations (inner_iterations=3)
         stdout = """
-!$######Module:Class:func:1:0######$!
+!$######Module:Class.testMethod:func:1:0######$!
 iteration 0
-!######Module:Class:func:1:0:150000######!
-!$######Module:Class:func:1:1######$!
+!######Module:Class.testMethod:func:1:0:150000######!
+!$######Module:Class.testMethod:func:1:1######$!
 iteration 1
-!######Module:Class:func:1:1:50000######!
-!$######Module:Class:func:1:2######$!
+!######Module:Class.testMethod:func:1:1:50000######!
+!$######Module:Class.testMethod:func:1:2######$!
 iteration 2
-!######Module:Class:func:1:2:45000######!
+!######Module:Class.testMethod:func:1:2:45000######!
 """
         start_pattern = re.compile(r"!\$######([^:]*):([^:]*):([^:]*):([^:]*):([^:]+)######\$!")
         end_pattern = re.compile(r"!######([^:]*):([^:]*):([^:]*):([^:]*):([^:]+):([^:]+)######!")
@@ -1595,10 +1606,11 @@ def test_instrumented_code_has_balanced_braces(self, tmp_path: Path):
         int _cf_innerIterations2 = Integer.parseInt(System.getenv().getOrDefault("CODEFLASH_INNER_ITERATIONS", "100"));
         String _cf_mod2 = "BraceTest";
         String _cf_cls2 = "BraceTest";
+        String _cf_test2 = "testTwo";
         String _cf_fn2 = "process";
 
         for (int _cf_i2 = 0; _cf_i2 < _cf_innerIterations2; _cf_i2++) {
-            System.out.println("!$######" + _cf_mod2 + ":" + _cf_cls2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + _cf_i2 + "######$!");
+            System.out.println("!$######" + _cf_mod2 + ":" + _cf_cls2 + "." + _cf_test2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + _cf_i2 + "######$!");
             long _cf_end2 = -1;
             long _cf_start2 = 0;
             try {
@@ -1610,7 +1622,7 @@ def test_instrumented_code_has_balanced_braces(self, tmp_path: Path):
             } finally {
                 long _cf_end2_finally = System.nanoTime();
                 long _cf_dur2 = (_cf_end2 != -1 ? _cf_end2 : _cf_end2_finally) - _cf_start2;
-                System.out.println("!######" + _cf_mod2 + ":" + _cf_cls2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + _cf_i2 + ":" + _cf_dur2 + "######!");
+                System.out.println("!######" + _cf_mod2 + ":" + _cf_cls2 + "." + _cf_test2 + ":" + _cf_fn2 + ":" + _cf_loop2 + ":" + _cf_i2 + ":" + _cf_dur2 + "######!");
             }
         }
     }
@@ -1671,11 +1683,12 @@ def test_instrumented_code_preserves_imports(self, tmp_path: Path):
         int _cf_innerIterations1 = Integer.parseInt(System.getenv().getOrDefault("CODEFLASH_INNER_ITERATIONS", "100"));
         String _cf_mod1 = "ImportTest";
         String _cf_cls1 = "ImportTest";
+        String _cf_test1 = "testCollections";
         String _cf_fn1 = "size";
         
         List<String> list = new ArrayList<>();
         for (int _cf_i1 = 0; _cf_i1 < _cf_innerIterations1; _cf_i1++) {
-            System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + "######$!");
+            System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + "######$!");
             long _cf_end1 = -1;
             long _cf_start1 = 0;
             try {
@@ -1685,7 +1698,7 @@ def test_instrumented_code_preserves_imports(self, tmp_path: Path):
             } finally {
                 long _cf_end1_finally = System.nanoTime();
                 long _cf_dur1 = (_cf_end1 != -1 ? _cf_end1 : _cf_end1_finally) - _cf_start1;
-                System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + ":" + _cf_dur1 + "######!");
+                System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + ":" + _cf_dur1 + "######!");
             }
         }
     }
@@ -1786,10 +1799,11 @@ def test_test_with_nested_braces(self, tmp_path: Path):
         int _cf_innerIterations1 = Integer.parseInt(System.getenv().getOrDefault("CODEFLASH_INNER_ITERATIONS", "100"));
         String _cf_mod1 = "NestedTest";
         String _cf_cls1 = "NestedTest";
+        String _cf_test1 = "testNested";
         String _cf_fn1 = "process";
 
         for (int _cf_i1 = 0; _cf_i1 < _cf_innerIterations1; _cf_i1++) {
-            System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + "######$!");
+            System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + "######$!");
             long _cf_end1 = -1;
             long _cf_start1 = 0;
             try {
@@ -1805,7 +1819,7 @@ def test_test_with_nested_braces(self, tmp_path: Path):
             } finally {
                 long _cf_end1_finally = System.nanoTime();
                 long _cf_dur1 = (_cf_end1 != -1 ? _cf_end1 : _cf_end1_finally) - _cf_start1;
-                System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + ":" + _cf_dur1 + "######!");
+                System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + ":" + _cf_dur1 + "######!");
             }
         }
     }
@@ -2147,11 +2161,12 @@ def test_run_and_parse_performance_mode(self, java_project):
         int _cf_innerIterations1 = Integer.parseInt(System.getenv().getOrDefault("CODEFLASH_INNER_ITERATIONS", "100"));
         String _cf_mod1 = "MathUtilsTest";
         String _cf_cls1 = "MathUtilsTest";
+        String _cf_test1 = "testMultiply";
         String _cf_fn1 = "multiply";
         
         MathUtils math = new MathUtils();
         for (int _cf_i1 = 0; _cf_i1 < _cf_innerIterations1; _cf_i1++) {
-            System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + "######$!");
+            System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + "######$!");
             long _cf_end1 = -1;
             long _cf_start1 = 0;
             try {
@@ -2161,7 +2176,7 @@ def test_run_and_parse_performance_mode(self, java_project):
             } finally {
                 long _cf_end1_finally = System.nanoTime();
                 long _cf_dur1 = (_cf_end1 != -1 ? _cf_end1 : _cf_end1_finally) - _cf_start1;
-                System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + ":" + _cf_dur1 + "######!");
+                System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + ":" + _cf_dur1 + "######!");
             }
         }
     }
@@ -2530,7 +2545,7 @@ def test_behavior_mode_writes_to_sqlite(self, java_project):
         String _cf_testIteration1 = System.getenv("CODEFLASH_TEST_ITERATION");
         if (_cf_testIteration1 == null) _cf_testIteration1 = "0";
         String _cf_test1 = "testIncrement";
-        System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + "######$!");
+        System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + "######$!");
         byte[] _cf_serializedResult1 = null;
         long _cf_end1 = -1;
         long _cf_start1 = 0;
@@ -2544,7 +2559,7 @@ def test_behavior_mode_writes_to_sqlite(self, java_project):
         } finally {
             long _cf_end1_finally = System.nanoTime();
             long _cf_dur1 = (_cf_end1 != -1 ? _cf_end1 : _cf_end1_finally) - _cf_start1;
-            System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + ":" + _cf_dur1 + "######!");
+            System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_iter1 + ":" + _cf_dur1 + "######!");
             // Write to SQLite if output file is set
             if (_cf_outputFile1 != null && !_cf_outputFile1.isEmpty()) {
                 try {
@@ -2745,11 +2760,12 @@ def test_performance_mode_inner_loop_timing_markers(self, java_project):
         int _cf_innerIterations1 = Integer.parseInt(System.getenv().getOrDefault("CODEFLASH_INNER_ITERATIONS", "100"));
         String _cf_mod1 = "FibonacciTest";
         String _cf_cls1 = "FibonacciTest";
+        String _cf_test1 = "testFib";
         String _cf_fn1 = "fib";
         
         Fibonacci fib = new Fibonacci();
         for (int _cf_i1 = 0; _cf_i1 < _cf_innerIterations1; _cf_i1++) {
-            System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + "######$!");
+            System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + "######$!");
             long _cf_end1 = -1;
             long _cf_start1 = 0;
             try {
@@ -2759,7 +2775,7 @@ def test_performance_mode_inner_loop_timing_markers(self, java_project):
             } finally {
                 long _cf_end1_finally = System.nanoTime();
                 long _cf_dur1 = (_cf_end1 != -1 ? _cf_end1 : _cf_end1_finally) - _cf_start1;
-                System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + ":" + _cf_dur1 + "######!");
+                System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + _cf_i1 + ":" + _cf_dur1 + "######!");
             }
         }
     }

From d4add6102c459be0a0850db52da0c269f91f2ad0 Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Fri, 20 Feb 2026 15:33:22 +0000
Subject: [PATCH 28/30] fix: clear test file path cache between optimization
 iterations in --all mode

The module-level _test_file_path_cache persists across optimization
iterations, which can cause negative cache entries to mask test files
generated in later iterations.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 codeflash/optimization/optimizer.py         | 2 ++
 codeflash/verification/parse_test_output.py | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/codeflash/optimization/optimizer.py b/codeflash/optimization/optimizer.py
index 63afaa566..ed99e8083 100644
--- a/codeflash/optimization/optimizer.py
+++ b/codeflash/optimization/optimizer.py
@@ -33,6 +33,7 @@
 from codeflash.languages import current_language_support, is_java, is_javascript, set_current_language
 from codeflash.models.models import ValidCode
 from codeflash.telemetry.posthog_cf import ph
+from codeflash.verification.parse_test_output import clear_test_file_path_cache
 from codeflash.verification.verification_utils import TestConfig
 
 if TYPE_CHECKING:
@@ -689,6 +690,7 @@ def run(self) -> None:
                     if function_optimizer is not None:
                         function_optimizer.executor.shutdown(wait=True)
                         function_optimizer.cleanup_generated_files()
+                        clear_test_file_path_cache()
 
             ph("cli-optimize-run-finished", {"optimizations_found": optimizations_found})
             if len(self.patch_files) > 0:
diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py
index 30df53498..deb7d3a4b 100644
--- a/codeflash/verification/parse_test_output.py
+++ b/codeflash/verification/parse_test_output.py
@@ -147,6 +147,10 @@ def parse_concurrency_metrics(test_results: TestResults, function_name: str) ->
 _test_file_path_cache: dict[tuple[str, Path], Path | None] = {}
 
 
+def clear_test_file_path_cache() -> None:
+    _test_file_path_cache.clear()
+
+
 def resolve_test_file_from_class_path(test_class_path: str, base_dir: Path) -> Path | None:
     """Resolve test file path from pytest's test class path or Java class path.
 

From 38d63090450ae1a95c681b4928db7cd86951e1d3 Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Fri, 20 Feb 2026 15:36:14 +0000
Subject: [PATCH 29/30] chore: log debug message when JUnitCore ignores
 reports_dir parameter

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 codeflash/languages/java/test_runner.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/codeflash/languages/java/test_runner.py b/codeflash/languages/java/test_runner.py
index 2bf4e6334..1ebc2bc8f 100644
--- a/codeflash/languages/java/test_runner.py
+++ b/codeflash/languages/java/test_runner.py
@@ -696,6 +696,12 @@ def _run_tests_direct(
         logger.debug("JUnit 4 project, using ConsoleLauncher (via vintage engine)")
 
     if is_junit4:
+        if reports_dir:
+            logger.debug(
+                "JUnitCore does not support XML report generation; reports_dir=%s ignored. "
+                "XML reports require ConsoleLauncher.",
+                reports_dir,
+            )
         # Use JUnit 4's JUnitCore runner
         cmd = [
             str(java),

From 5346cabef8cc10ceb9c72050add99ad32e08dc25 Mon Sep 17 00:00:00 2001
From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com>
Date: Fri, 20 Feb 2026 20:15:06 +0000
Subject: [PATCH 30/30] style: auto-fix linting issues

---
 codeflash/languages/java/instrumentation.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/codeflash/languages/java/instrumentation.py b/codeflash/languages/java/instrumentation.py
index a59ed48a4..1cacbef5b 100644
--- a/codeflash/languages/java/instrumentation.py
+++ b/codeflash/languages/java/instrumentation.py
@@ -840,7 +840,9 @@ def split_var_declaration(stmt_node: Any, source_bytes_ref: bytes) -> tuple[str,
         assignment = f"{name_text} = {value_text};"
         return hoisted, assignment
 
-    def build_instrumented_body(body_text: str, next_wrapper_id: int, base_indent: str, test_method_name: str = "unknown") -> tuple[str, int]:
+    def build_instrumented_body(
+        body_text: str, next_wrapper_id: int, base_indent: str, test_method_name: str = "unknown"
+    ) -> tuple[str, int]:
         body_bytes = body_text.encode("utf8")
         wrapper_bytes = _TS_BODY_PREFIX_BYTES + body_bytes + _TS_BODY_SUFFIX.encode("utf8")
         wrapper_tree = analyzer.parse(wrapper_bytes)