From 4f8ff2d28846d5ef5c20d6c8bc21a2ef5dbb4554 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Mon, 2 Feb 2026 00:48:38 +0000 Subject: [PATCH] Optimize _extract_class_body_context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The optimized code achieves an **11% runtime improvement** (93.6μs → 83.6μs) through two key changes: **1. Caching `child.type` in a local variable** ```python child_type = child.type # Cache the attribute access if child_type in ("{", "}", ";", ","): ``` In the loop over `body_node.children`, `child.type` was accessed 3-4 times per iteration. By storing it once in `child_type`, we eliminate repeated attribute lookups on the Node object, which are more expensive than local variable access in Python. **2. Replacing `append("".join(...))` with `extend(...)`** Original: ```python field_lines = lines[javadoc_start : end_line + 1] field_parts.append("".join(field_lines)) # Join then append ``` Optimized: ```python field_parts.extend(lines[javadoc_start : end_line + 1]) # Directly extend ``` This eliminates intermediate string concatenations inside the loop. Instead of creating a joined string for each field/constructor and appending it to the list, we extend the list with the raw line slices. The final `"".join(field_parts)` at the end performs one single join operation over all accumulated lines, which is significantly more efficient than multiple joins. **Performance impact by test case:** - **Large-scale test** (200 fields): 16.5% faster (71.6μs → 61.5μs) - the extend optimization scales particularly well with many fields - **Multiple mixed fields/constructors**: 4.86% faster - benefits from both optimizations - **Basic single field tests**: slight variation (some 0.5-5% slower) - the overhead of the extra local variable assignment is negligible for single-element cases but the optimization still maintains correctness The optimization is most effective when processing Java files with many field declarations or constructors, which is common in real-world codebases. The deferred string joining pattern is a classic Python performance technique that reduces memory allocations and intermediate object creation. --- codeflash/languages/java/context.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/codeflash/languages/java/context.py b/codeflash/languages/java/context.py index a5597351c..701dddd96 100644 --- a/codeflash/languages/java/context.py +++ b/codeflash/languages/java/context.py @@ -413,18 +413,20 @@ def _extract_type_body_context( enum_constant_parts: list[str] = [] for child in body_node.children: + child_type = child.type + # Skip braces, semicolons, and commas - if child.type in ("{", "}", ";", ","): + if child_type in ("{", "}", ";", ","): continue # Handle enum constants (only for enums) # Extract just the constant name/text, not the whole line - if child.type == "enum_constant" and type_kind == "enum": + if child_type == "enum_constant" and type_kind == "enum": constant_text = source_bytes[child.start_byte : child.end_byte].decode("utf8") enum_constant_parts.append(constant_text) # Handle field declarations - elif child.type == "field_declaration": + elif child_type == "field_declaration": start_line = child.start_point[0] end_line = child.end_point[0] @@ -436,18 +438,16 @@ def _extract_type_body_context( if comment_text.strip().startswith("/**"): javadoc_start = prev_sibling.start_point[0] - field_lines = lines[javadoc_start : end_line + 1] - field_parts.append("".join(field_lines)) + field_parts.extend(lines[javadoc_start : end_line + 1]) # Handle constant declarations (for interfaces) - elif child.type == "constant_declaration" and type_kind == "interface": + elif child_type == "constant_declaration" and type_kind == "interface": start_line = child.start_point[0] end_line = child.end_point[0] - constant_lines = lines[start_line : end_line + 1] - field_parts.append("".join(constant_lines)) + field_parts.extend(lines[start_line : end_line + 1]) # Handle constructor declarations - elif child.type == "constructor_declaration": + elif child_type == "constructor_declaration": start_line = child.start_point[0] end_line = child.end_point[0] @@ -459,8 +459,8 @@ def _extract_type_body_context( if comment_text.strip().startswith("/**"): javadoc_start = prev_sibling.start_point[0] - constructor_lines = lines[javadoc_start : end_line + 1] - constructor_parts.append("".join(constructor_lines)) + constructor_parts.extend(lines[javadoc_start : end_line + 1]) + fields_code = "".join(field_parts) constructors_code = "".join(constructor_parts)