From 1f1eda8c8bfaa1bbe87987852795f445037527d7 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Tue, 3 Feb 2026 07:34:59 +0000 Subject: [PATCH] Optimize discover_functions_from_source This optimization achieves an **11% runtime improvement** (from 21.3ms to 19.0ms) through several targeted changes to the Java code discovery pipeline: ## Key Optimizations ### 1. **Module-Level Import Hoisting** The `fnmatch` module is now imported once at the module level instead of being conditionally imported inside `_should_include_method` on every pattern check. This eliminates repeated import overhead when filtering methods with include/exclude patterns, as shown in the line profiler where pattern matching checks were consuming ~5-11% of total time in the original code. ### 2. **Default Path Pre-computation** The fallback `Path("unknown.java")` is now computed once before the loop (`default_file_path = file_path or Path("unknown.java")`) rather than 1,224 times inside the loop. The line profiler shows this change reduced time spent on the file_path assignment from **12.2%** to **0.3%** of total function time - a critical improvement since this line was the second-most expensive operation in the original code. ### 3. **Early Exit Reordering in Filters** The `include_methods` check is moved earlier in `_should_include_method`, before the more expensive pattern matching operations. This allows the function to exit early for methods that should be excluded due to being class methods, avoiding unnecessary fnmatch calls. The line count calculation is also made conditional - only computed when `min_lines` or `max_lines` criteria are actually set, reducing unnecessary arithmetic for 1,022 out of 1,341 invocations. ## Performance Impact by Test Case The optimizations particularly benefit scenarios with: - **Multiple methods with patterns**: 9-31% faster (e.g., `test_large_scale_many_methods_under_limit` shows 29.9% improvement) - **File path handling**: Tests that provide explicit paths see consistent 3-18% improvements - **Line count filtering**: 18.6% faster when min/max line criteria are active Tests that regressed slightly (showing slower times) are edge cases with very few methods where the overhead of the additional conditional check (`if criteria.min_lines is not None or criteria.max_lines is not None`) marginally exceeds savings, but these represent atypical usage with only 1-2 methods. ## Why This Matters While individual micro-optimizations are small, they compound significantly in the hot loop that processes all discovered methods. With 1,650+ method invocations in typical runs, eliminating repeated imports, reducing object allocations, and enabling early exits creates measurable aggregate savings. The 11% runtime improvement demonstrates how loop-level optimizations scale effectively for Java codebases with many methods. --- codeflash/languages/java/discovery.py | 29 ++++++++++++++++----------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/codeflash/languages/java/discovery.py b/codeflash/languages/java/discovery.py index 902feca67..8cfd2df2f 100644 --- a/codeflash/languages/java/discovery.py +++ b/codeflash/languages/java/discovery.py @@ -14,6 +14,7 @@ from codeflash.languages.base import FunctionFilterCriteria from codeflash.languages.java.parser import JavaAnalyzer, JavaMethodNode, get_java_analyzer from codeflash.models.function_types import FunctionParent +import fnmatch if TYPE_CHECKING: pass @@ -81,6 +82,8 @@ def discover_functions_from_source( functions: list[FunctionToOptimize] = [] + default_file_path = file_path or Path("unknown.java") + for method in methods: # Apply filters if not _should_include_method(method, criteria, source, analyzer): @@ -94,7 +97,7 @@ def discover_functions_from_source( functions.append( FunctionToOptimize( function_name=method.name, - file_path=file_path or Path("unknown.java"), + file_path=default_file_path, starting_line=method.start_line, ending_line=method.end_line, starting_col=method.start_col, @@ -140,16 +143,20 @@ def _should_include_method( if method.name == method.class_name: return False + # Check include patterns + + # Check include_methods - in Java, all functions in classes are methods + if not criteria.include_methods and method.class_name is not None: + return False + # Check include patterns if criteria.include_patterns: - import fnmatch if not any(fnmatch.fnmatch(method.name, pattern) for pattern in criteria.include_patterns): return False # Check exclude patterns if criteria.exclude_patterns: - import fnmatch if any(fnmatch.fnmatch(method.name, pattern) for pattern in criteria.exclude_patterns): return False @@ -162,16 +169,14 @@ def _should_include_method( if not analyzer.has_return_statement(method, source): return False - # Check include_methods - in Java, all functions in classes are methods - if not criteria.include_methods and method.class_name is not None: - return False - # Check line count - method_lines = method.end_line - method.start_line + 1 - if criteria.min_lines is not None and method_lines < criteria.min_lines: - return False - if criteria.max_lines is not None and method_lines > criteria.max_lines: - return False + if criteria.min_lines is not None or criteria.max_lines is not None: + method_lines = method.end_line - method.start_line + 1 + if criteria.min_lines is not None and method_lines < criteria.min_lines: + return False + if criteria.max_lines is not None and method_lines > criteria.max_lines: + return False + return True