From b058dee829ca0c2362385f6bf4c6293ba44028bb Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Wed, 18 Feb 2026 21:57:50 -0500 Subject: [PATCH 01/16] perf: disable Sentry subprocess instrumentation and tracing StdlibIntegration monkeypatches subprocess.Popen, adding span/breadcrumb overhead to every subprocess call. Profiling showed ~342k samples (~8.9% of total runtime) in sentry_sdk.integrations.stdlib and sentry_sdk.utils. Disable subprocess instrumentation and set traces/profiles sample rates to 0 since codeflash is a CLI tool that doesn't need performance tracing. Error capturing (capture_exception, capture_message, LoggingIntegration) remains fully functional. --- codeflash/telemetry/sentry.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/codeflash/telemetry/sentry.py b/codeflash/telemetry/sentry.py index 2357439dc..5f1c2a06b 100644 --- a/codeflash/telemetry/sentry.py +++ b/codeflash/telemetry/sentry.py @@ -2,6 +2,7 @@ import sentry_sdk from sentry_sdk.integrations.logging import LoggingIntegration +from sentry_sdk.integrations.stdlib import StdlibIntegration def init_sentry(*, enabled: bool = False, exclude_errors: bool = False) -> None: @@ -15,13 +16,8 @@ def init_sentry(*, enabled: bool = False, exclude_errors: bool = False) -> None: sentry_sdk.init( dsn="https://4b9a1902f9361b48c04376df6483bc96@o4506833230561280.ingest.sentry.io/4506833262477312", - integrations=[sentry_logging], - # Set traces_sample_rate to 1.0 to capture 100% - # of transactions for performance monitoring. - traces_sample_rate=1.0, - # Set profiles_sample_rate to 1.0 to profile 100% - # of sampled transactions. - # We recommend adjusting this value in production. - profiles_sample_rate=1.0, + integrations=[sentry_logging, StdlibIntegration(subprocess_instrumentation=False)], + traces_sample_rate=0, + profiles_sample_rate=0, ignore_errors=[KeyboardInterrupt], ) From b19e1bda00a3a6bedb15aeb49eb7e205d4215754 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Wed, 18 Feb 2026 22:19:02 -0500 Subject: [PATCH 02/16] perf: replace backtracking regexes with character-class patterns in parse_test_output Replace lazy `.*?` quantifiers in matches_re_start/matches_re_end with negated character classes (`[^:]`, `[^#]`, `[^.:]`) to eliminate quadratic backtracking. Replace per-line regex search for the pytest FAILURES header with a simple `"= FAILURES =" in line` string check. Add tests for the regex patterns and failure header detection. --- codeflash/verification/parse_test_output.py | 23 ++- tests/test_parse_test_output_regex.py | 189 ++++++++++++++++++++ 2 files changed, 208 insertions(+), 4 deletions(-) create mode 100644 tests/test_parse_test_output_regex.py diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py index 4c2c809eb..6fef22a99 100644 --- a/codeflash/verification/parse_test_output.py +++ b/codeflash/verification/parse_test_output.py @@ -47,8 +47,24 @@ def parse_func(file_path: Path) -> XMLParser: return parse(file_path, xml_parser) -matches_re_start = re.compile(r"!\$######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)######\$!\n") -matches_re_end = re.compile(r"!######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)######!") +matches_re_start = re.compile( + r"!\$######([^:]*)" # group 1: module path + r":((?:[^:.]*\.)*)" # group 2: class prefix with trailing dot, or empty + r"([^.:]*)" # group 3: test function name + r":([^:]*)" # group 4: function being tested + r":([^:]*)" # group 5: loop index + r":([^#]*)" # group 6: iteration id + r"######\$!\n" +) +matches_re_end = re.compile( + r"!######([^:]*)" # group 1: module path + r":((?:[^:.]*\.)*)" # group 2: class prefix with trailing dot, or empty + r"([^.:]*)" # group 3: test function name + r":([^:]*)" # group 4: function being tested + r":([^:]*)" # group 5: loop index + r":([^#]*)" # group 6: iteration_id or iteration_id:runtime + r"######!" +) start_pattern = re.compile(r"!\$######([^:]*):([^:]*):([^:]*):([^:]*):([^:]+)######\$!") @@ -893,7 +909,6 @@ def merge_test_results( return merged_test_results -FAILURES_HEADER_RE = re.compile(r"=+ FAILURES =+") TEST_HEADER_RE = re.compile(r"_{3,}\s*(.*?)\s*_{3,}$") @@ -903,7 +918,7 @@ def parse_test_failures_from_stdout(stdout: str) -> dict[str, str]: start = end = None for i, line in enumerate(lines): - if FAILURES_HEADER_RE.search(line.strip()): + if "= FAILURES =" in line: start = i break diff --git a/tests/test_parse_test_output_regex.py b/tests/test_parse_test_output_regex.py new file mode 100644 index 000000000..d91d57e55 --- /dev/null +++ b/tests/test_parse_test_output_regex.py @@ -0,0 +1,189 @@ +"""Tests for the regex patterns and string matching in parse_test_output.py.""" + +from codeflash.verification.parse_test_output import ( + matches_re_end, + matches_re_start, + parse_test_failures_from_stdout, +) + + +# --- matches_re_start tests --- + + +class TestMatchesReStart: + def test_simple_no_class(self): + s = "!$######tests.test_foo:test_bar:target_func:1:abc######$!\n" + m = matches_re_start.search(s) + assert m is not None + assert m.groups() == ("tests.test_foo", "", "test_bar", "target_func", "1", "abc") + + def test_with_class(self): + s = "!$######tests.test_foo:MyClass.test_bar:target_func:1:abc######$!\n" + m = matches_re_start.search(s) + assert m is not None + assert m.groups() == ("tests.test_foo", "MyClass.", "test_bar", "target_func", "1", "abc") + + def test_nested_class(self): + s = "!$######a.b.c:A.B.test_x:func:3:id123######$!\n" + m = matches_re_start.search(s) + assert m is not None + assert m.groups() == ("a.b.c", "A.B.", "test_x", "func", "3", "id123") + + def test_empty_class_and_function(self): + s = "!$######mod::func:0:iter######$!\n" + m = matches_re_start.search(s) + assert m is not None + assert m.groups() == ("mod", "", "", "func", "0", "iter") + + def test_embedded_in_stdout(self): + s = "some output\n!$######mod:test_fn:f:1:x######$!\nmore output\n" + m = matches_re_start.search(s) + assert m is not None + assert m.groups() == ("mod", "", "test_fn", "f", "1", "x") + + def test_multiple_matches(self): + s = ( + "!$######m1:C1.fn1:t1:1:a######$!\n" + "!$######m2:fn2:t2:2:b######$!\n" + ) + matches = list(matches_re_start.finditer(s)) + assert len(matches) == 2 + assert matches[0].groups() == ("m1", "C1.", "fn1", "t1", "1", "a") + assert matches[1].groups() == ("m2", "", "fn2", "t2", "2", "b") + + def test_no_match_without_newline(self): + s = "!$######mod:test_fn:f:1:x######$!" + m = matches_re_start.search(s) + assert m is None + + def test_dots_in_module_path(self): + s = "!$######a.b.c.d.e:test_fn:f:1:x######$!\n" + m = matches_re_start.search(s) + assert m is not None + assert m.group(1) == "a.b.c.d.e" + + +# --- matches_re_end tests --- + + +class TestMatchesReEnd: + def test_simple_no_class_with_runtime(self): + s = "!######tests.test_foo:test_bar:target_func:1:abc:12345######!" + m = matches_re_end.search(s) + assert m is not None + assert m.groups() == ("tests.test_foo", "", "test_bar", "target_func", "1", "abc:12345") + + def test_with_class_no_runtime(self): + s = "!######tests.test_foo:MyClass.test_bar:target_func:1:abc######!" + m = matches_re_end.search(s) + assert m is not None + assert m.groups() == ("tests.test_foo", "MyClass.", "test_bar", "target_func", "1", "abc") + + def test_nested_class_with_runtime(self): + s = "!######mod:A.B.test_x:func:3:id123:99999######!" + m = matches_re_end.search(s) + assert m is not None + assert m.groups() == ("mod", "A.B.", "test_x", "func", "3", "id123:99999") + + def test_runtime_colon_preserved_in_group6(self): + """Group 6 must capture 'iteration_id:runtime' as a single string (colon included).""" + s = "!######m:fn:f:1:iter42:98765######!" + m = matches_re_end.search(s) + assert m is not None + assert m.group(6) == "iter42:98765" + + def test_embedded_in_stdout(self): + s = "captured output\n!######mod:test_fn:f:1:x:500######!\nmore" + m = matches_re_end.search(s) + assert m is not None + assert m.groups() == ("mod", "", "test_fn", "f", "1", "x:500") + + +# --- Start/End pairing (simulates parse_test_xml matching logic) --- + + +class TestStartEndPairing: + def test_paired_markers(self): + stdout = ( + "!$######mod:Class.test_fn:func:1:iter1######$!\n" + "test output here\n" + "!######mod:Class.test_fn:func:1:iter1:54321######!" + ) + starts = list(matches_re_start.finditer(stdout)) + ends = {} + for match in matches_re_end.finditer(stdout): + groups = match.groups() + g5 = groups[5] + colon_pos = g5.find(":") + if colon_pos != -1: + key = groups[:5] + (g5[:colon_pos],) + else: + key = groups + ends[key] = match + + assert len(starts) == 1 + assert len(ends) == 1 + # Start and end should pair on the first 5 groups + iteration_id + start_groups = starts[0].groups() + assert start_groups in ends + + +# --- parse_test_failures_from_stdout tests --- + + +class TestParseTestFailuresHeader: + def test_standard_pytest_header(self): + stdout = ( + "..F.\n" + "=================================== FAILURES ===================================\n" + "_______ test_foo _______\n" + "\n" + " def test_foo():\n" + "> assert False\n" + "E AssertionError\n" + "\n" + "test.py:3: AssertionError\n" + "=========================== short test summary info ============================\n" + "FAILED test.py::test_foo\n" + ) + result = parse_test_failures_from_stdout(stdout) + assert "test_foo" in result + + def test_minimal_equals(self): + """Even a short '= FAILURES =' header should be detected.""" + stdout = ( + "= FAILURES =\n" + "_______ test_bar _______\n" + "\n" + " assert False\n" + "\n" + "test.py:1: AssertionError\n" + "= short test summary info =\n" + ) + result = parse_test_failures_from_stdout(stdout) + assert "test_bar" in result + + def test_no_failures_section(self): + stdout = "....\n4 passed in 0.1s\n" + result = parse_test_failures_from_stdout(stdout) + assert result == {} + + def test_word_failures_without_equals_is_not_matched(self): + """'FAILURES' without surrounding '=' signs should not trigger the header detection.""" + stdout = ( + "FAILURES detected in module\n" + "_______ test_baz _______\n" + "\n" + " assert False\n" + ) + result = parse_test_failures_from_stdout(stdout) + assert result == {} + + def test_failures_in_test_output_not_matched(self): + """A test printing 'FAILURES' (no = signs) should not trigger header detection.""" + stdout = ( + "Testing FAILURES handling\n" + "All good\n" + ) + result = parse_test_failures_from_stdout(stdout) + assert result == {} From 67ee7340d09ef5741e88c7058a8944b8633c542d Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Wed, 18 Feb 2026 22:47:47 -0500 Subject: [PATCH 03/16] perf: remove redundant project_root_path.resolve() from hot path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolve project root paths once at construction time (TestConfig.__post_init__, FunctionOptimizer.__init__, filter_functions entry) instead of on every call to module_name_from_file_path — eliminating ~776 redundant filesystem syscalls. --- codeflash/code_utils/code_utils.py | 2 +- codeflash/discovery/functions_to_optimize.py | 3 ++- codeflash/optimization/function_optimizer.py | 4 ++-- codeflash/verification/verification_utils.py | 4 ++++ 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/codeflash/code_utils/code_utils.py b/codeflash/code_utils/code_utils.py index 7a9afc96f..c5d3e832f 100644 --- a/codeflash/code_utils/code_utils.py +++ b/codeflash/code_utils/code_utils.py @@ -340,7 +340,7 @@ def get_qualified_name(module_name: str, full_qualified_name: str) -> str: def module_name_from_file_path(file_path: Path, project_root_path: Path, *, traverse_up: bool = False) -> str: try: - relative_path = file_path.resolve().relative_to(project_root_path.resolve()) + relative_path = file_path.resolve().relative_to(project_root_path) return relative_path.with_suffix("").as_posix().replace("/", ".") except ValueError: if traverse_up: diff --git a/codeflash/discovery/functions_to_optimize.py b/codeflash/discovery/functions_to_optimize.py index ed18ed53b..65622801f 100644 --- a/codeflash/discovery/functions_to_optimize.py +++ b/codeflash/discovery/functions_to_optimize.py @@ -836,6 +836,7 @@ def filter_functions( *, disable_logs: bool = False, ) -> tuple[dict[Path, list[FunctionToOptimize]], int]: + resolved_project_root = project_root.resolve() filtered_modified_functions: dict[str, list[FunctionToOptimize]] = {} blocklist_funcs = get_blocklisted_functions() logger.debug(f"Blocklisted functions: {blocklist_funcs}") @@ -912,7 +913,7 @@ def is_test_file(file_path_normalized: str) -> bool: lang_support = get_language_support(Path(file_path)) if lang_support.language == Language.PYTHON: try: - ast.parse(f"import {module_name_from_file_path(Path(file_path), project_root)}") + ast.parse(f"import {module_name_from_file_path(Path(file_path), resolved_project_root)}") except SyntaxError: malformed_paths_count += 1 continue diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 7cbcda976..3269869ac 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -443,7 +443,7 @@ def __init__( args: Namespace | None = None, replay_tests_dir: Path | None = None, ) -> None: - self.project_root = test_cfg.project_root_path + self.project_root = test_cfg.project_root_path.resolve() self.test_cfg = test_cfg self.aiservice_client = aiservice_client if aiservice_client else AiServiceClient() self.function_to_optimize = function_to_optimize @@ -1451,7 +1451,7 @@ def reformat_code_and_helpers( optimized_code = "" if optimized_context is not None: file_to_code_context = optimized_context.file_to_path() - optimized_code = file_to_code_context.get(str(path.relative_to(self.project_root)), "") + optimized_code = file_to_code_context.get(str(path.resolve().relative_to(self.project_root)), "") new_code = format_code( self.args.formatter_cmds, path, optimized_code=optimized_code, check_diff=True, exit_on_failure=False diff --git a/codeflash/verification/verification_utils.py b/codeflash/verification/verification_utils.py index c567e6a9a..d586d9962 100644 --- a/codeflash/verification/verification_utils.py +++ b/codeflash/verification/verification_utils.py @@ -158,6 +158,10 @@ class TestConfig: _language: Optional[str] = None # Language identifier for multi-language support js_project_root: Optional[Path] = None # JavaScript project root (directory containing package.json) + def __post_init__(self) -> None: + self.project_root_path = self.project_root_path.resolve() + self.tests_project_rootdir = self.tests_project_rootdir.resolve() + @property def test_framework(self) -> str: """Returns the appropriate test framework based on language. From 377083a0f032a911e87940d556afa6976b2cce29 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Thu, 19 Feb 2026 03:51:11 +0000 Subject: [PATCH 04/16] style: auto-fix ruff formatting in parse_test_output.py --- codeflash/verification/parse_test_output.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py index 6fef22a99..53012feb1 100644 --- a/codeflash/verification/parse_test_output.py +++ b/codeflash/verification/parse_test_output.py @@ -50,19 +50,19 @@ def parse_func(file_path: Path) -> XMLParser: matches_re_start = re.compile( r"!\$######([^:]*)" # group 1: module path r":((?:[^:.]*\.)*)" # group 2: class prefix with trailing dot, or empty - r"([^.:]*)" # group 3: test function name - r":([^:]*)" # group 4: function being tested - r":([^:]*)" # group 5: loop index - r":([^#]*)" # group 6: iteration id + r"([^.:]*)" # group 3: test function name + r":([^:]*)" # group 4: function being tested + r":([^:]*)" # group 5: loop index + r":([^#]*)" # group 6: iteration id r"######\$!\n" ) matches_re_end = re.compile( - r"!######([^:]*)" # group 1: module path + r"!######([^:]*)" # group 1: module path r":((?:[^:.]*\.)*)" # group 2: class prefix with trailing dot, or empty - r"([^.:]*)" # group 3: test function name - r":([^:]*)" # group 4: function being tested - r":([^:]*)" # group 5: loop index - r":([^#]*)" # group 6: iteration_id or iteration_id:runtime + r"([^.:]*)" # group 3: test function name + r":([^:]*)" # group 4: function being tested + r":([^:]*)" # group 5: loop index + r":([^#]*)" # group 6: iteration_id or iteration_id:runtime r"######!" ) From 486f6b81d1b27dc995809946200813797d76536c Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Wed, 18 Feb 2026 22:51:58 -0500 Subject: [PATCH 05/16] fix: use disabled_integrations to exclude StdlibIntegration StdlibIntegration in sentry-sdk 2.x doesn't accept subprocess_instrumentation as a parameter. Use disabled_integrations instead, which also avoids httplib patching overhead. --- codeflash/telemetry/sentry.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/codeflash/telemetry/sentry.py b/codeflash/telemetry/sentry.py index 5f1c2a06b..3ee266326 100644 --- a/codeflash/telemetry/sentry.py +++ b/codeflash/telemetry/sentry.py @@ -16,7 +16,8 @@ def init_sentry(*, enabled: bool = False, exclude_errors: bool = False) -> None: sentry_sdk.init( dsn="https://4b9a1902f9361b48c04376df6483bc96@o4506833230561280.ingest.sentry.io/4506833262477312", - integrations=[sentry_logging, StdlibIntegration(subprocess_instrumentation=False)], + integrations=[sentry_logging], + disabled_integrations=[StdlibIntegration], traces_sample_rate=0, profiles_sample_rate=0, ignore_errors=[KeyboardInterrupt], From 89952791bec67c504f4636ce40be2905827aaab6 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Thu, 19 Feb 2026 03:56:13 +0000 Subject: [PATCH 06/16] style: add return type annotations to test methods --- tests/test_parse_test_output_regex.py | 38 +++++++++++++-------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/tests/test_parse_test_output_regex.py b/tests/test_parse_test_output_regex.py index d91d57e55..e313885ab 100644 --- a/tests/test_parse_test_output_regex.py +++ b/tests/test_parse_test_output_regex.py @@ -11,37 +11,37 @@ class TestMatchesReStart: - def test_simple_no_class(self): + def test_simple_no_class(self) -> None: s = "!$######tests.test_foo:test_bar:target_func:1:abc######$!\n" m = matches_re_start.search(s) assert m is not None assert m.groups() == ("tests.test_foo", "", "test_bar", "target_func", "1", "abc") - def test_with_class(self): + def test_with_class(self) -> None: s = "!$######tests.test_foo:MyClass.test_bar:target_func:1:abc######$!\n" m = matches_re_start.search(s) assert m is not None assert m.groups() == ("tests.test_foo", "MyClass.", "test_bar", "target_func", "1", "abc") - def test_nested_class(self): + def test_nested_class(self) -> None: s = "!$######a.b.c:A.B.test_x:func:3:id123######$!\n" m = matches_re_start.search(s) assert m is not None assert m.groups() == ("a.b.c", "A.B.", "test_x", "func", "3", "id123") - def test_empty_class_and_function(self): + def test_empty_class_and_function(self) -> None: s = "!$######mod::func:0:iter######$!\n" m = matches_re_start.search(s) assert m is not None assert m.groups() == ("mod", "", "", "func", "0", "iter") - def test_embedded_in_stdout(self): + def test_embedded_in_stdout(self) -> None: s = "some output\n!$######mod:test_fn:f:1:x######$!\nmore output\n" m = matches_re_start.search(s) assert m is not None assert m.groups() == ("mod", "", "test_fn", "f", "1", "x") - def test_multiple_matches(self): + def test_multiple_matches(self) -> None: s = ( "!$######m1:C1.fn1:t1:1:a######$!\n" "!$######m2:fn2:t2:2:b######$!\n" @@ -51,12 +51,12 @@ def test_multiple_matches(self): assert matches[0].groups() == ("m1", "C1.", "fn1", "t1", "1", "a") assert matches[1].groups() == ("m2", "", "fn2", "t2", "2", "b") - def test_no_match_without_newline(self): + def test_no_match_without_newline(self) -> None: s = "!$######mod:test_fn:f:1:x######$!" m = matches_re_start.search(s) assert m is None - def test_dots_in_module_path(self): + def test_dots_in_module_path(self) -> None: s = "!$######a.b.c.d.e:test_fn:f:1:x######$!\n" m = matches_re_start.search(s) assert m is not None @@ -67,32 +67,32 @@ def test_dots_in_module_path(self): class TestMatchesReEnd: - def test_simple_no_class_with_runtime(self): + def test_simple_no_class_with_runtime(self) -> None: s = "!######tests.test_foo:test_bar:target_func:1:abc:12345######!" m = matches_re_end.search(s) assert m is not None assert m.groups() == ("tests.test_foo", "", "test_bar", "target_func", "1", "abc:12345") - def test_with_class_no_runtime(self): + def test_with_class_no_runtime(self) -> None: s = "!######tests.test_foo:MyClass.test_bar:target_func:1:abc######!" m = matches_re_end.search(s) assert m is not None assert m.groups() == ("tests.test_foo", "MyClass.", "test_bar", "target_func", "1", "abc") - def test_nested_class_with_runtime(self): + def test_nested_class_with_runtime(self) -> None: s = "!######mod:A.B.test_x:func:3:id123:99999######!" m = matches_re_end.search(s) assert m is not None assert m.groups() == ("mod", "A.B.", "test_x", "func", "3", "id123:99999") - def test_runtime_colon_preserved_in_group6(self): + def test_runtime_colon_preserved_in_group6(self) -> None: """Group 6 must capture 'iteration_id:runtime' as a single string (colon included).""" s = "!######m:fn:f:1:iter42:98765######!" m = matches_re_end.search(s) assert m is not None assert m.group(6) == "iter42:98765" - def test_embedded_in_stdout(self): + def test_embedded_in_stdout(self) -> None: s = "captured output\n!######mod:test_fn:f:1:x:500######!\nmore" m = matches_re_end.search(s) assert m is not None @@ -103,7 +103,7 @@ def test_embedded_in_stdout(self): class TestStartEndPairing: - def test_paired_markers(self): + def test_paired_markers(self) -> None: stdout = ( "!$######mod:Class.test_fn:func:1:iter1######$!\n" "test output here\n" @@ -132,7 +132,7 @@ def test_paired_markers(self): class TestParseTestFailuresHeader: - def test_standard_pytest_header(self): + def test_standard_pytest_header(self) -> None: stdout = ( "..F.\n" "=================================== FAILURES ===================================\n" @@ -149,7 +149,7 @@ def test_standard_pytest_header(self): result = parse_test_failures_from_stdout(stdout) assert "test_foo" in result - def test_minimal_equals(self): + def test_minimal_equals(self) -> None: """Even a short '= FAILURES =' header should be detected.""" stdout = ( "= FAILURES =\n" @@ -163,12 +163,12 @@ def test_minimal_equals(self): result = parse_test_failures_from_stdout(stdout) assert "test_bar" in result - def test_no_failures_section(self): + def test_no_failures_section(self) -> None: stdout = "....\n4 passed in 0.1s\n" result = parse_test_failures_from_stdout(stdout) assert result == {} - def test_word_failures_without_equals_is_not_matched(self): + def test_word_failures_without_equals_is_not_matched(self) -> None: """'FAILURES' without surrounding '=' signs should not trigger the header detection.""" stdout = ( "FAILURES detected in module\n" @@ -179,7 +179,7 @@ def test_word_failures_without_equals_is_not_matched(self): result = parse_test_failures_from_stdout(stdout) assert result == {} - def test_failures_in_test_output_not_matched(self): + def test_failures_in_test_output_not_matched(self) -> None: """A test printing 'FAILURES' (no = signs) should not trigger header detection.""" stdout = ( "Testing FAILURES handling\n" From 474258595a6b212c2e2641fd57b3ad52d515e5c4 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Wed, 18 Feb 2026 23:00:07 -0500 Subject: [PATCH 07/16] perf: remove remaining redundant .resolve() calls on pre-resolved paths Drop .resolve() from ImportResolver, TestsCache, init_javascript, create_pr, and filter_functions where callers already pass resolved paths via CLI init or TestConfig.__post_init__. --- codeflash/cli_cmds/init_javascript.py | 4 ++-- codeflash/discovery/discover_unit_tests.py | 4 ++-- codeflash/discovery/functions_to_optimize.py | 2 +- codeflash/languages/javascript/import_resolver.py | 3 +-- codeflash/result/create_pr.py | 4 ++-- 5 files changed, 8 insertions(+), 9 deletions(-) diff --git a/codeflash/cli_cmds/init_javascript.py b/codeflash/cli_cmds/init_javascript.py index d88c69904..e43bdc167 100644 --- a/codeflash/cli_cmds/init_javascript.py +++ b/codeflash/cli_cmds/init_javascript.py @@ -128,7 +128,7 @@ def determine_js_package_manager(project_root: Path) -> JsPackageManager: """ # Search from project_root up to filesystem root for lock files # This supports monorepo setups where lock file is at workspace root - current_dir = project_root.resolve() + current_dir = project_root while current_dir != current_dir.parent: if (current_dir / "bun.lockb").exists() or (current_dir / "bun.lock").exists(): return JsPackageManager.BUN @@ -161,7 +161,7 @@ def find_node_modules_with_package(project_root: Path, package_name: str) -> Pat Path to the node_modules directory containing the package, or None if not found. """ - current_dir = project_root.resolve() + current_dir = project_root while current_dir != current_dir.parent: node_modules = current_dir / "node_modules" if node_modules.exists(): diff --git a/codeflash/discovery/discover_unit_tests.py b/codeflash/discovery/discover_unit_tests.py index d1ef28a8d..c01a2f2e1 100644 --- a/codeflash/discovery/discover_unit_tests.py +++ b/codeflash/discovery/discover_unit_tests.py @@ -69,8 +69,8 @@ class TestFunction: class TestsCache: SCHEMA_VERSION = 1 # Increment this when schema changes - def __init__(self, project_root_path: str | Path) -> None: - self.project_root_path = Path(project_root_path).resolve().as_posix() + def __init__(self, project_root_path: Path) -> None: + self.project_root_path = project_root_path.as_posix() self.connection = sqlite3.connect(codeflash_cache_db) self.cur = self.connection.cursor() diff --git a/codeflash/discovery/functions_to_optimize.py b/codeflash/discovery/functions_to_optimize.py index 65622801f..d0bfe6c02 100644 --- a/codeflash/discovery/functions_to_optimize.py +++ b/codeflash/discovery/functions_to_optimize.py @@ -935,7 +935,7 @@ def is_test_file(file_path_normalized: str) -> bool: if previous_checkpoint_functions: functions_tmp = [] for function in _functions: - if function.qualified_name_with_modules_from_root(project_root) in previous_checkpoint_functions: + if function.qualified_name_with_modules_from_root(resolved_project_root) in previous_checkpoint_functions: previous_checkpoint_functions_removed_count += 1 continue functions_tmp.append(function) diff --git a/codeflash/languages/javascript/import_resolver.py b/codeflash/languages/javascript/import_resolver.py index 8f5dbe8ca..b5ec67115 100644 --- a/codeflash/languages/javascript/import_resolver.py +++ b/codeflash/languages/javascript/import_resolver.py @@ -44,8 +44,7 @@ def __init__(self, project_root: Path) -> None: project_root: Root directory of the project. """ - # Resolve to real path to handle macOS symlinks like /var -> /private/var - self.project_root = project_root.resolve() + self.project_root = project_root self._resolution_cache: dict[tuple[Path, str], Path | None] = {} def resolve_import(self, import_info: ImportInfo, source_file: Path) -> ResolvedImport | None: diff --git a/codeflash/result/create_pr.py b/codeflash/result/create_pr.py index 0be4e1cf8..b276725f2 100644 --- a/codeflash/result/create_pr.py +++ b/codeflash/result/create_pr.py @@ -126,10 +126,10 @@ def existing_tests_source_for( tests_dir_name = test_cfg.tests_project_rootdir.name if file_path.startswith((tests_dir_name + os.sep, tests_dir_name + "/")): # Module path includes "tests." - use project root parent - instrumented_abs_path = (test_cfg.tests_project_rootdir.parent / file_path).resolve() + instrumented_abs_path = test_cfg.tests_project_rootdir.parent / file_path else: # Module path doesn't include tests dir - use tests root directly - instrumented_abs_path = (test_cfg.tests_project_rootdir / file_path).resolve() + instrumented_abs_path = test_cfg.tests_project_rootdir / file_path logger.debug(f"[PR-DEBUG] Looking up: {instrumented_abs_path}") logger.debug(f"[PR-DEBUG] Available keys: {list(instrumented_to_original.keys())[:3]}") # Try to map instrumented path to original path From dc9c60a0617943c94c1a74fed4d0ec9e0ff087f8 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Wed, 18 Feb 2026 23:05:04 -0500 Subject: [PATCH 08/16] perf: remove remaining redundant .resolve() calls on pre-resolved paths Drop .resolve() from ImportResolver, TestsCache, init_javascript, create_pr, and filter_functions where callers already pass resolved paths via CLI init or TestConfig.__post_init__. Also exclude test and fixture dirs from mypy to match ruff/ty config. --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index fe96c54cb..56736fc6c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -207,6 +207,8 @@ warn_unreachable = true install_types = true plugins = ["pydantic.mypy"] +exclude = ["tests/", "code_to_optimize/", "pie_test_set/", "experiments/"] + [[tool.mypy.overrides]] module = ["jedi", "jedi.api.classes", "inquirer", "inquirer.themes", "numba"] ignore_missing_imports = true From ac096d90757ddbd2b72bea70ebbd9c9a69a202e3 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Thu, 19 Feb 2026 04:07:49 +0000 Subject: [PATCH 09/16] style: auto-fix line length formatting in functions_to_optimize.py --- codeflash/discovery/functions_to_optimize.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/codeflash/discovery/functions_to_optimize.py b/codeflash/discovery/functions_to_optimize.py index d0bfe6c02..14455b890 100644 --- a/codeflash/discovery/functions_to_optimize.py +++ b/codeflash/discovery/functions_to_optimize.py @@ -935,7 +935,10 @@ def is_test_file(file_path_normalized: str) -> bool: if previous_checkpoint_functions: functions_tmp = [] for function in _functions: - if function.qualified_name_with_modules_from_root(resolved_project_root) in previous_checkpoint_functions: + if ( + function.qualified_name_with_modules_from_root(resolved_project_root) + in previous_checkpoint_functions + ): previous_checkpoint_functions_removed_count += 1 continue functions_tmp.append(function) From e2859088d75c98c99f44e0d2521504841176841e Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Wed, 18 Feb 2026 23:14:51 -0500 Subject: [PATCH 10/16] fix: resolve tests_project_root at inject_profiling entry point Paths with .. segments (e.g. Path(__file__) / "../foo") don't match resolved file_path in module_name_from_file_path. Resolve once at the public entry point and pass down to the async variant. Also exclude test dirs from mypy config. --- codeflash/code_utils/instrument_existing_tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/codeflash/code_utils/instrument_existing_tests.py b/codeflash/code_utils/instrument_existing_tests.py index 9486fc677..f15b2d56a 100644 --- a/codeflash/code_utils/instrument_existing_tests.py +++ b/codeflash/code_utils/instrument_existing_tests.py @@ -709,6 +709,7 @@ def inject_profiling_into_existing_test( tests_project_root: Path, mode: TestingMode = TestingMode.BEHAVIOR, ) -> tuple[bool, str | None]: + tests_project_root = tests_project_root.resolve() if function_to_optimize.is_async: return inject_async_profiling_into_existing_test( test_path, call_positions, function_to_optimize, tests_project_root, mode From b3fd6bf085559b31e096f648b8d7d12a5c1e3922 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Wed, 18 Feb 2026 23:47:04 -0500 Subject: [PATCH 11/16] fix: resolve both sides of path comparisons for Windows 8.3 name consistency On Windows, Path.resolve() can return 8.3 short names (e.g. RUNNER~1) that differ from long-name forms. Jedi returns resolved paths that may not match the project root's form, causing relative_to() failures. - Normalize Jedi definition paths through the project root via safe_relative_to to ensure consistent dict keys - Resolve both sides in is_project_path and module_name_from_file_path - Restore resolve in TestsCache for consistent cache keys --- codeflash/code_utils/code_utils.py | 2 +- codeflash/discovery/discover_unit_tests.py | 2 +- .../languages/python/context/code_context_extractor.py | 9 ++++++++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/codeflash/code_utils/code_utils.py b/codeflash/code_utils/code_utils.py index c5d3e832f..7a9afc96f 100644 --- a/codeflash/code_utils/code_utils.py +++ b/codeflash/code_utils/code_utils.py @@ -340,7 +340,7 @@ def get_qualified_name(module_name: str, full_qualified_name: str) -> str: def module_name_from_file_path(file_path: Path, project_root_path: Path, *, traverse_up: bool = False) -> str: try: - relative_path = file_path.resolve().relative_to(project_root_path) + relative_path = file_path.resolve().relative_to(project_root_path.resolve()) return relative_path.with_suffix("").as_posix().replace("/", ".") except ValueError: if traverse_up: diff --git a/codeflash/discovery/discover_unit_tests.py b/codeflash/discovery/discover_unit_tests.py index c01a2f2e1..1a032ec36 100644 --- a/codeflash/discovery/discover_unit_tests.py +++ b/codeflash/discovery/discover_unit_tests.py @@ -70,7 +70,7 @@ class TestsCache: SCHEMA_VERSION = 1 # Increment this when schema changes def __init__(self, project_root_path: Path) -> None: - self.project_root_path = project_root_path.as_posix() + self.project_root_path = project_root_path.resolve().as_posix() self.connection = sqlite3.connect(codeflash_cache_db) self.cur = self.connection.cursor() diff --git a/codeflash/languages/python/context/code_context_extractor.py b/codeflash/languages/python/context/code_context_extractor.py index 09b045dcc..3bb644b5d 100644 --- a/codeflash/languages/python/context/code_context_extractor.py +++ b/codeflash/languages/python/context/code_context_extractor.py @@ -512,6 +512,9 @@ def get_function_sources_from_jedi( # TODO: there can be multiple definitions, see how to handle such cases definition = definitions[0] definition_path = definition.module_path + rel = safe_relative_to(definition_path, project_root_path) + if not rel.is_absolute(): + definition_path = project_root_path / rel # The definition is part of this project and not defined within the original function is_valid_definition = ( @@ -936,7 +939,11 @@ def is_project_path(module_path: Path | None, project_root_path: Path) -> bool: # site-packages must be checked first because .venv/site-packages is under project root if path_belongs_to_site_packages(module_path): return False - return str(module_path).startswith(str(project_root_path) + os.sep) + try: + module_path.resolve().relative_to(project_root_path.resolve()) + return True + except ValueError: + return False def _is_project_module(module_name: str, project_root_path: Path) -> bool: From a726b4644e43a2acf469478a2ad4159ebf3ef510 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Wed, 18 Feb 2026 23:51:13 -0500 Subject: [PATCH 12/16] fix: guard against None definition_path from Jedi --- .../languages/python/context/code_context_extractor.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/codeflash/languages/python/context/code_context_extractor.py b/codeflash/languages/python/context/code_context_extractor.py index 3bb644b5d..55c3c4620 100644 --- a/codeflash/languages/python/context/code_context_extractor.py +++ b/codeflash/languages/python/context/code_context_extractor.py @@ -512,9 +512,10 @@ def get_function_sources_from_jedi( # TODO: there can be multiple definitions, see how to handle such cases definition = definitions[0] definition_path = definition.module_path - rel = safe_relative_to(definition_path, project_root_path) - if not rel.is_absolute(): - definition_path = project_root_path / rel + if definition_path is not None: + rel = safe_relative_to(definition_path, project_root_path) + if not rel.is_absolute(): + definition_path = project_root_path / rel # The definition is part of this project and not defined within the original function is_valid_definition = ( From 5efa1eeb0aa767224629d7a0588129efd13e7b3f Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Thu, 19 Feb 2026 04:53:24 +0000 Subject: [PATCH 13/16] style: remove unused os import in code_context_extractor --- codeflash/languages/python/context/code_context_extractor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/codeflash/languages/python/context/code_context_extractor.py b/codeflash/languages/python/context/code_context_extractor.py index 55c3c4620..9a4daf726 100644 --- a/codeflash/languages/python/context/code_context_extractor.py +++ b/codeflash/languages/python/context/code_context_extractor.py @@ -2,7 +2,6 @@ import ast import hashlib -import os from collections import defaultdict from itertools import chain from pathlib import Path From 74246e66544141074a543ae8c20cd61e9c4e470d Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Thu, 19 Feb 2026 05:09:39 +0000 Subject: [PATCH 14/16] fix: resolve function_to_optimize.file_path for Windows 8.3 name consistency TestConfig.__post_init__ resolves project_root_path to long names, but FunctionToOptimize.file_path could retain Windows 8.3 short names (e.g., RUNNER~1 vs runneradmin), causing relative_to() failures. Co-authored-by: Kevin Turcios Co-Authored-By: Claude Opus 4.6 --- codeflash/optimization/function_optimizer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 3269869ac..b5a607531 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -447,6 +447,7 @@ def __init__( self.test_cfg = test_cfg self.aiservice_client = aiservice_client if aiservice_client else AiServiceClient() self.function_to_optimize = function_to_optimize + self.function_to_optimize.file_path = self.function_to_optimize.file_path.resolve() self.function_to_optimize_source_code = ( function_to_optimize_source_code if function_to_optimize_source_code From 3cdf7100cf89f7241c293e0d27828a1d15452111 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Thu, 19 Feb 2026 05:16:04 +0000 Subject: [PATCH 15/16] fix: use dataclasses.replace() for frozen FunctionToOptimize path resolution FunctionToOptimize is a frozen Pydantic dataclass, so directly assigning to file_path raises FrozenInstanceError. Use dataclasses.replace() to create a new instance with the resolved path instead. Co-authored-by: Kevin Turcios --- codeflash/optimization/function_optimizer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index b5a607531..c24d84ae5 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -2,6 +2,7 @@ import ast import concurrent.futures +import dataclasses import logging import os import queue @@ -446,8 +447,10 @@ def __init__( self.project_root = test_cfg.project_root_path.resolve() self.test_cfg = test_cfg self.aiservice_client = aiservice_client if aiservice_client else AiServiceClient() + resolved_file_path = function_to_optimize.file_path.resolve() + if resolved_file_path != function_to_optimize.file_path: + function_to_optimize = dataclasses.replace(function_to_optimize, file_path=resolved_file_path) self.function_to_optimize = function_to_optimize - self.function_to_optimize.file_path = self.function_to_optimize.file_path.resolve() self.function_to_optimize_source_code = ( function_to_optimize_source_code if function_to_optimize_source_code From 8f65c9f78825a96aba33beee3146974e05c1edac Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Thu, 19 Feb 2026 05:30:11 +0000 Subject: [PATCH 16/16] fix: resolve paths in revert_unused_helper_functions for Windows 8.3 name consistency On Windows, tempfile.mkdtemp() returns paths with 8.3 short names while Jedi resolves to long-form paths. The dict lookup for original_helper_code failed silently, causing unused helpers to never be reverted. Co-authored-by: Kevin Turcios Co-Authored-By: Claude Opus 4.6 --- .../python/context/unused_definition_remover.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/codeflash/languages/python/context/unused_definition_remover.py b/codeflash/languages/python/context/unused_definition_remover.py index 38b58f63e..ba6e4d549 100644 --- a/codeflash/languages/python/context/unused_definition_remover.py +++ b/codeflash/languages/python/context/unused_definition_remover.py @@ -587,17 +587,20 @@ def revert_unused_helper_functions( logger.debug(f"Reverting {len(unused_helpers)} unused helper function(s) to original definitions") + # Resolve all path keys for consistent comparison (Windows 8.3 short names may differ from Jedi-resolved paths) + resolved_original_helper_code = {p.resolve(): code for p, code in original_helper_code.items()} + # Group unused helpers by file path unused_helpers_by_file = defaultdict(list) for helper in unused_helpers: - unused_helpers_by_file[helper.file_path].append(helper) + unused_helpers_by_file[helper.file_path.resolve()].append(helper) # For each file, revert the unused helper functions to their original definitions for file_path, helpers_in_file in unused_helpers_by_file.items(): - if file_path in original_helper_code: + if file_path in resolved_original_helper_code: try: # Get original code for this file - original_code = original_helper_code[file_path] + original_code = resolved_original_helper_code[file_path] # Use the code replacer to selectively revert only the unused helper functions helper_names = [helper.qualified_name for helper in helpers_in_file]