From bdc96e67ba5f02a214e0e56982fe3bcc8823fe44 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 20 Feb 2026 01:42:23 -0500 Subject: [PATCH 1/8] feat: add --agent flag and skip checkpoint in agent mode Add --agent CLI flag for AI agent integrations that skips all interactive prompts. In agent mode, checkpoint resume is skipped entirely so each run starts fresh. Also gates the existing checkpoint prompt behind --yes. --- codeflash/cli_cmds/cli.py | 5 +++++ codeflash/code_utils/checkpoint.py | 14 ++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/codeflash/cli_cmds/cli.py b/codeflash/cli_cmds/cli.py index daee371d7..6fb9bc5c4 100644 --- a/codeflash/cli_cmds/cli.py +++ b/codeflash/cli_cmds/cli.py @@ -130,6 +130,11 @@ def parse_args() -> Namespace: "--reset-config", action="store_true", help="Remove codeflash configuration from project config file." ) parser.add_argument("-y", "--yes", action="store_true", help="Skip confirmation prompts (useful for CI/scripts).") + parser.add_argument( + "--agent", + action="store_true", + help="Agent mode: skip all interactive prompts with sensible defaults. Designed for AI agent integrations.", + ) args, unknown_args = parser.parse_known_args() sys.argv[:] = [sys.argv[0], *unknown_args] diff --git a/codeflash/code_utils/checkpoint.py b/codeflash/code_utils/checkpoint.py index 1160bf2e0..56b7283b5 100644 --- a/codeflash/code_utils/checkpoint.py +++ b/codeflash/code_utils/checkpoint.py @@ -141,12 +141,18 @@ def get_all_historical_functions(module_root: Path, checkpoint_dir: Path) -> dic def ask_should_use_checkpoint_get_functions(args: argparse.Namespace) -> Optional[dict[str, dict[str, str]]]: previous_checkpoint_functions = None + if getattr(args, "agent", False): + console.rule() + return None if args.all and codeflash_temp_dir.is_dir(): previous_checkpoint_functions = get_all_historical_functions(args.module_root, codeflash_temp_dir) - if previous_checkpoint_functions and Confirm.ask( - "Previous Checkpoint detected from an incomplete optimization run, shall I continue the optimization from that point?", - default=True, - console=console, + if previous_checkpoint_functions and ( + getattr(args, "yes", False) + or Confirm.ask( + "Previous Checkpoint detected from an incomplete optimization run, shall I continue the optimization from that point?", + default=True, + console=console, + ) ): console.rule() else: From 44a2afe5f287ec0e3278d55b0c9e24d379590a0c Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 20 Feb 2026 01:57:00 -0500 Subject: [PATCH 2/8] Update uv.lock --- uv.lock | 1 - 1 file changed, 1 deletion(-) diff --git a/uv.lock b/uv.lock index 05b79c606..b5222447b 100644 --- a/uv.lock +++ b/uv.lock @@ -605,7 +605,6 @@ tests = [ [[package]] name = "codeflash-benchmark" -version = "0.3.0" source = { editable = "codeflash-benchmark" } dependencies = [ { name = "pytest", version = "8.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, From 6672ce4037b06c6990f9a38adb1c0b6e4242c8ae Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 20 Feb 2026 03:18:57 -0500 Subject: [PATCH 3/8] feat: suppress Rich UI in --agent mode, emit plain-text results In agent mode, disable all Rich output (panels, spinners, progress bars, syntax highlighting) and use a plain StreamHandler for logging. Optimization results with explanation and unified diff are written to stdout. A log filter strips LSP prefixes and drops noisy test/file-path messages. Also skip checkpoint creation and suppress Python warnings in agent mode. --- codeflash/cli_cmds/cli.py | 2 + codeflash/cli_cmds/console.py | 118 ++++++++++++++++--- codeflash/cli_cmds/logging_config.py | 13 ++ codeflash/lsp/helpers.py | 5 + codeflash/main.py | 6 + codeflash/optimization/function_optimizer.py | 20 +++- codeflash/optimization/optimizer.py | 12 +- 7 files changed, 155 insertions(+), 21 deletions(-) diff --git a/codeflash/cli_cmds/cli.py b/codeflash/cli_cmds/cli.py index 6fb9bc5c4..b69b55809 100644 --- a/codeflash/cli_cmds/cli.py +++ b/codeflash/cli_cmds/cli.py @@ -138,6 +138,8 @@ def parse_args() -> Namespace: args, unknown_args = parser.parse_known_args() sys.argv[:] = [sys.argv[0], *unknown_args] + if args.agent: + args.yes = True return process_and_validate_cmd_args(args) diff --git a/codeflash/cli_cmds/console.py b/codeflash/cli_cmds/console.py index fdc5a420a..ac74986ad 100644 --- a/codeflash/cli_cmds/console.py +++ b/codeflash/cli_cmds/console.py @@ -21,7 +21,7 @@ from codeflash.cli_cmds.console_constants import SPINNER_TYPES from codeflash.cli_cmds.logging_config import BARE_LOGGING_FORMAT -from codeflash.lsp.helpers import is_LSP_enabled +from codeflash.lsp.helpers import is_agent_mode, is_LSP_enabled from codeflash.lsp.lsp_logger import enhanced_log from codeflash.lsp.lsp_message import LspCodeMessage, LspTextMessage @@ -39,28 +39,54 @@ console = Console() -if is_LSP_enabled(): +if is_LSP_enabled() or is_agent_mode(): console.quiet = True -logging.basicConfig( - level=logging.INFO, - handlers=[RichHandler(rich_tracebacks=True, markup=False, console=console, show_path=False, show_time=False)], - format=BARE_LOGGING_FORMAT, -) +if is_agent_mode(): + import re + import sys + + _lsp_prefix_re = re.compile(r"^(?:!?lsp,?|h[2-4]|loading)\|") + _agent_drop_patterns = ( + "Test log -", + "Test failed to load", + "Examining file ", + "Generated ", + "Add custom marker", + "Disabling all autouse", + "Reverting code and helpers", + ) + + class _AgentLogFilter(logging.Filter): + def filter(self, record: logging.LogRecord) -> bool: + record.msg = _lsp_prefix_re.sub("", str(record.msg)) + msg = record.getMessage() + return not any(msg.startswith(p) for p in _agent_drop_patterns) + + _agent_handler = logging.StreamHandler(sys.stderr) + _agent_handler.addFilter(_AgentLogFilter()) + logging.basicConfig(level=logging.INFO, handlers=[_agent_handler], format="%(levelname)s: %(message)s") +else: + logging.basicConfig( + level=logging.INFO, + handlers=[RichHandler(rich_tracebacks=True, markup=False, console=console, show_path=False, show_time=False)], + format=BARE_LOGGING_FORMAT, + ) logger = logging.getLogger("rich") logging.getLogger("parso").setLevel(logging.WARNING) # override the logger to reformat the messages for the lsp -for level in ("info", "debug", "warning", "error"): - real_fn = getattr(logger, level) - setattr( - logger, - level, - lambda msg, *args, _real_fn=real_fn, _level=level, **kwargs: enhanced_log( - msg, _real_fn, _level, *args, **kwargs - ), - ) +if not is_agent_mode(): + for level in ("info", "debug", "warning", "error"): + real_fn = getattr(logger, level) + setattr( + logger, + level, + lambda msg, *args, _real_fn=real_fn, _level=level, **kwargs: enhanced_log( + msg, _real_fn, _level, *args, **kwargs + ), + ) class DummyTask: @@ -87,6 +113,8 @@ def paneled_text( text: str, panel_args: dict[str, str | bool] | None = None, text_args: dict[str, str] | None = None ) -> None: """Print text in a panel.""" + if is_agent_mode(): + return from rich.panel import Panel from rich.text import Text @@ -115,6 +143,8 @@ def code_print( language: Programming language for syntax highlighting ('python', 'javascript', 'typescript') """ + if is_agent_mode(): + return if is_LSP_enabled(): lsp_log( LspCodeMessage(code=code_str, file_name=file_name, function_name=function_name, message_id=lsp_message_id) @@ -152,6 +182,10 @@ def progress_bar( """ global _progress_bar_active + if is_agent_mode(): + yield DummyTask().id + return + if is_LSP_enabled(): lsp_log(LspTextMessage(text=message, takes_time=True)) yield @@ -183,6 +217,10 @@ def progress_bar( @contextmanager def test_files_progress_bar(total: int, description: str) -> Generator[tuple[Progress, TaskID], None, None]: """Progress bar for test files.""" + if is_agent_mode(): + yield DummyProgress(), DummyTask().id + return + if is_LSP_enabled(): lsp_log(LspTextMessage(text=description, takes_time=True)) dummy_progress = DummyProgress() @@ -216,6 +254,10 @@ def call_graph_live_display( from rich.text import Text from rich.tree import Tree + if is_agent_mode(): + yield lambda _: None + return + if is_LSP_enabled(): lsp_log(LspTextMessage(text="Building call graph", takes_time=True)) yield lambda _: None @@ -344,8 +386,52 @@ def call_graph_summary(call_graph: DependencyResolver, file_to_funcs: dict[Path, f"Standalone: {leaf_functions}" ) + if is_agent_mode(): + return + if is_LSP_enabled(): lsp_log(LspTextMessage(text=summary)) return console.print(Panel(summary, title="Call Graph Summary", border_style="cyan")) + + +def agent_log_optimization_result( + function_name: str, + file_path: Path, + perf_improvement_line: str, + original_runtime_ns: int, + best_runtime_ns: int, + raw_explanation: str, + original_code: dict[Path, str], + new_code: dict[Path, str], + review: str, +) -> None: + import sys + + from codeflash.code_utils.code_utils import unified_diff_strings + from codeflash.code_utils.time_utils import humanize_runtime + + lines = [ + "=== Optimization Result ===", + f"Function: {function_name}", + f"File: {file_path}", + f"Performance: {perf_improvement_line}", + f"Original runtime: {humanize_runtime(original_runtime_ns)} | Optimized runtime: {humanize_runtime(best_runtime_ns)}", + ] + if review: + lines.append(f"Reviewer confidence: {review}") + lines.append("") + lines.append("Explanation:") + lines.append(raw_explanation.strip()) + lines.append("") + + for path in original_code: + old = original_code.get(path, "") + new = new_code.get(path, "") + if old != new: + diff = unified_diff_strings(old, new, fromfile=str(path), tofile=str(path)) + if diff: + lines.append(diff) + + sys.stdout.write("\n".join(lines) + "\n") diff --git a/codeflash/cli_cmds/logging_config.py b/codeflash/cli_cmds/logging_config.py index 09dc0f1f2..eedfe54c4 100644 --- a/codeflash/cli_cmds/logging_config.py +++ b/codeflash/cli_cmds/logging_config.py @@ -5,8 +5,21 @@ def set_level(level: int, *, echo_setting: bool = True) -> None: import logging + import sys import time + from codeflash.lsp.helpers import is_agent_mode + + if is_agent_mode(): + logging.basicConfig( + level=level, + handlers=[logging.StreamHandler(sys.stderr)], + format="%(levelname)s: %(message)s", + force=True, + ) + logging.getLogger().setLevel(level) + return + from rich.logging import RichHandler from codeflash.cli_cmds.console import console diff --git a/codeflash/lsp/helpers.py b/codeflash/lsp/helpers.py index b8840e046..fb4c9e6e0 100644 --- a/codeflash/lsp/helpers.py +++ b/codeflash/lsp/helpers.py @@ -18,6 +18,11 @@ def is_LSP_enabled() -> bool: return os.getenv("CODEFLASH_LSP", default="false").lower() == "true" +@lru_cache(maxsize=1) +def is_agent_mode() -> bool: + return os.getenv("CODEFLASH_AGENT_MODE", default="false").lower() == "true" + + def tree_to_markdown(tree: Tree, level: int = 0) -> str: """Convert a rich Tree into a Markdown bullet list.""" indent = " " * level diff --git a/codeflash/main.py b/codeflash/main.py index 690c1ae98..b140efd08 100644 --- a/codeflash/main.py +++ b/codeflash/main.py @@ -11,6 +11,12 @@ from pathlib import Path from typing import TYPE_CHECKING +if "--agent" in sys.argv: + os.environ["CODEFLASH_AGENT_MODE"] = "true" + import warnings + + warnings.filterwarnings("ignore") + from codeflash.cli_cmds.cli import parse_args, process_pyproject_config from codeflash.cli_cmds.cmd_init import CODEFLASH_LOGO, ask_run_end_to_end_test from codeflash.cli_cmds.console import paneled_text diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index dd8e41dd8..7bf1e417b 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -24,7 +24,7 @@ from codeflash.api.aiservice import AiServiceClient, AIServiceRefinerRequest, LocalAiServiceClient from codeflash.api.cfapi import add_code_context_hash, create_staging, get_cfapi_base_urls, mark_optimization_success from codeflash.benchmarking.utils import process_benchmark_data -from codeflash.cli_cmds.console import code_print, console, logger, lsp_log, progress_bar +from codeflash.cli_cmds.console import agent_log_optimization_result, code_print, console, logger, lsp_log, progress_bar from codeflash.code_utils import env_utils from codeflash.code_utils.code_utils import ( choose_weights, @@ -78,7 +78,7 @@ ) from codeflash.languages.python.static_analysis.line_profile_utils import add_decorator_imports, contains_jit_decorator from codeflash.languages.python.static_analysis.static_analysis import get_first_top_level_function_or_method_ast -from codeflash.lsp.helpers import is_LSP_enabled, report_to_markdown_table, tree_to_markdown +from codeflash.lsp.helpers import is_agent_mode, is_LSP_enabled, report_to_markdown_table, tree_to_markdown from codeflash.lsp.lsp_message import LspCodeMessage, LspMarkdownMessage, LSPMessageId from codeflash.models.ExperimentMetadata import ExperimentMetadata from codeflash.models.models import ( @@ -1349,6 +1349,8 @@ def repair_optimization( def log_successful_optimization( self, explanation: Explanation, generated_tests: GeneratedTestsList, exp_type: str ) -> None: + if is_agent_mode(): + return if is_LSP_enabled(): md_lines = [ "### ⚡️ Optimization Summary", @@ -2198,7 +2200,19 @@ def process_review( self.optimization_review = opt_review_result.review # Display the reviewer result to the user - if opt_review_result.review: + if is_agent_mode(): + agent_log_optimization_result( + function_name=new_explanation.function_name, + file_path=new_explanation.file_path, + perf_improvement_line=new_explanation.perf_improvement_line, + original_runtime_ns=new_explanation.original_runtime_ns, + best_runtime_ns=new_explanation.best_runtime_ns, + raw_explanation=new_explanation.raw_explanation_message, + original_code=original_code_combined, + new_code=new_code_combined, + review=opt_review_result.review, + ) + elif opt_review_result.review: review_display = { "high": ("[bold green]High[/bold green]", "green", "Recommended to merge"), "medium": ("[bold yellow]Medium[/bold yellow]", "yellow", "Review recommended before merging"), diff --git a/codeflash/optimization/optimizer.py b/codeflash/optimization/optimizer.py index 5527a0567..818f97584 100644 --- a/codeflash/optimization/optimizer.py +++ b/codeflash/optimization/optimizer.py @@ -31,6 +31,7 @@ from codeflash.code_utils.time_utils import humanize_runtime from codeflash.either import is_successful from codeflash.languages import current_language_support, is_javascript, set_current_language +from codeflash.lsp.helpers import is_agent_mode from codeflash.models.models import ValidCode from codeflash.telemetry.posthog_cf import ph from codeflash.verification.verification_utils import TestConfig @@ -603,7 +604,7 @@ def run(self) -> None: return function_to_tests, _ = self.discover_tests(file_to_funcs_to_optimize) - if self.args.all: + if self.args.all and not getattr(self.args, "agent", False): self.functions_checkpoint = CodeflashRunCheckpoint(self.args.module_root) # GLOBAL RANKING: Rank all functions together before optimizing @@ -690,7 +691,14 @@ def run(self) -> None: self.functions_checkpoint.cleanup() if hasattr(self.args, "command") and self.args.command == "optimize": self.cleanup_replay_tests() - if optimizations_found == 0: + if is_agent_mode(): + import sys + + if optimizations_found == 0: + sys.stdout.write("NO OPTIMIZATIONS FOUND\n") + else: + sys.stdout.write(f"COMPLETE: {optimizations_found} optimization(s) applied\n") + elif optimizations_found == 0: logger.info("❌ No optimizations found.") elif self.args.all: logger.info("✨ All functions have been optimized! ✨") From 624925a3cdb1bcdf4e6dae162b99970269eda8a0 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 20 Feb 2026 03:38:40 -0500 Subject: [PATCH 4/8] feat: agent mode implies --no-pr/--worktree, uses XML output, skips concolic tests - --agent now implies --no-pr and --worktree so source files stay clean - Output uses structured XML (codeflash-optimization) with optimized-code for the consuming agent to apply via Edit/Write - Skip concolic test generation in agent mode - Skip patch file creation in worktree + agent mode --- codeflash/cli_cmds/cli.py | 2 + codeflash/cli_cmds/console.py | 52 ++++++++++++++------ codeflash/optimization/function_optimizer.py | 25 +++++++--- codeflash/optimization/optimizer.py | 10 ++-- 4 files changed, 61 insertions(+), 28 deletions(-) diff --git a/codeflash/cli_cmds/cli.py b/codeflash/cli_cmds/cli.py index b69b55809..3df83476c 100644 --- a/codeflash/cli_cmds/cli.py +++ b/codeflash/cli_cmds/cli.py @@ -140,6 +140,8 @@ def parse_args() -> Namespace: sys.argv[:] = [sys.argv[0], *unknown_args] if args.agent: args.yes = True + args.no_pr = True + args.worktree = True return process_and_validate_cmd_args(args) diff --git a/codeflash/cli_cmds/console.py b/codeflash/cli_cmds/console.py index ac74986ad..a3c201bca 100644 --- a/codeflash/cli_cmds/console.py +++ b/codeflash/cli_cmds/console.py @@ -408,30 +408,50 @@ def agent_log_optimization_result( review: str, ) -> None: import sys + from xml.sax.saxutils import escape from codeflash.code_utils.code_utils import unified_diff_strings from codeflash.code_utils.time_utils import humanize_runtime - lines = [ - "=== Optimization Result ===", - f"Function: {function_name}", - f"File: {file_path}", - f"Performance: {perf_improvement_line}", - f"Original runtime: {humanize_runtime(original_runtime_ns)} | Optimized runtime: {humanize_runtime(best_runtime_ns)}", - ] - if review: - lines.append(f"Reviewer confidence: {review}") - lines.append("") - lines.append("Explanation:") - lines.append(raw_explanation.strip()) - lines.append("") - + diff_parts = [] for path in original_code: old = original_code.get(path, "") new = new_code.get(path, "") if old != new: diff = unified_diff_strings(old, new, fromfile=str(path), tofile=str(path)) if diff: - lines.append(diff) + diff_parts.append(diff) + + diff_str = "\n".join(diff_parts) + + original_runtime = humanize_runtime(original_runtime_ns) + optimized_runtime = humanize_runtime(best_runtime_ns) + + xml = [ + "", + f" {escape(function_name)}", + f" {escape(str(file_path))}", + f" {escape(perf_improvement_line)}", + f" {escape(original_runtime)}", + f" {escape(optimized_runtime)}", + ] + if review: + xml.append(f" {escape(review)}") + xml.append(f" {escape(raw_explanation.strip())}") + if diff_str: + xml.append(f" {escape(diff_str)}") + for path in new_code: + if new_code[path] != original_code.get(path, ""): + xml.append(f' {escape(new_code[path])}') + xml.append(" ") + xml.append(" Present this optimization to the user using AskUserQuestion.") + xml.append(" Show the function name, performance improvement, and explanation as the question text.") + xml.append(" Provide two options: 'Apply' (write the optimized code to the file) and 'Reject' (do nothing).") + xml.append( + " Put the full diff in the 'Apply' option's markdown preview so the user can review the exact changes." + ) + xml.append(" If the user chooses 'Apply', write the content from optimized-code to the corresponding file.") + xml.append(" ") + xml.append("") - sys.stdout.write("\n".join(lines) + "\n") + sys.stdout.write("\n".join(xml) + "\n") diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 7bf1e417b..594068187 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -1741,14 +1741,24 @@ def generate_tests( self.executor, testgen_context.markdown, helper_fqns, generated_test_paths, generated_perf_test_paths ) - future_concolic_tests = self.executor.submit( - generate_concolic_tests, self.test_cfg, self.args, self.function_to_optimize, self.function_to_optimize_ast - ) + if is_agent_mode(): + future_concolic_tests = None + else: + future_concolic_tests = self.executor.submit( + generate_concolic_tests, + self.test_cfg, + self.args, + self.function_to_optimize, + self.function_to_optimize_ast, + ) if not self.args.no_gen_tests: # Wait for test futures to complete - concurrent.futures.wait([*future_tests, future_concolic_tests]) - else: + futures_to_wait = [*future_tests] + if future_concolic_tests is not None: + futures_to_wait.append(future_concolic_tests) + concurrent.futures.wait(futures_to_wait) + elif future_concolic_tests is not None: concurrent.futures.wait([future_concolic_tests]) # Process test generation results tests: list[GeneratedTests] = [] @@ -1777,7 +1787,10 @@ def generate_tests( logger.warning(f"Failed to generate and instrument tests for {self.function_to_optimize.function_name}") return Failure(f"/!\\ NO TESTS GENERATED for {self.function_to_optimize.function_name}") - function_to_concolic_tests, concolic_test_str = future_concolic_tests.result() + if future_concolic_tests is not None: + function_to_concolic_tests, concolic_test_str = future_concolic_tests.result() + else: + function_to_concolic_tests, concolic_test_str = {}, None count_tests = len(tests) if concolic_test_str: count_tests += 1 diff --git a/codeflash/optimization/optimizer.py b/codeflash/optimization/optimizer.py index 818f97584..9214927f4 100644 --- a/codeflash/optimization/optimizer.py +++ b/codeflash/optimization/optimizer.py @@ -658,7 +658,7 @@ def run(self) -> None: if is_successful(best_optimization): optimizations_found += 1 # create a diff patch for successful optimization - if self.current_worktree: + if self.current_worktree and not is_agent_mode(): best_opt = best_optimization.unwrap() read_writable_code = best_opt.code_context.read_writable_code relative_file_paths = [ @@ -692,12 +692,10 @@ def run(self) -> None: if hasattr(self.args, "command") and self.args.command == "optimize": self.cleanup_replay_tests() if is_agent_mode(): - import sys - if optimizations_found == 0: - sys.stdout.write("NO OPTIMIZATIONS FOUND\n") - else: - sys.stdout.write(f"COMPLETE: {optimizations_found} optimization(s) applied\n") + import sys + + sys.stdout.write("No optimizations found.\n") elif optimizations_found == 0: logger.info("❌ No optimizations found.") elif self.args.all: From fa50977266a92f494a03183ed6c4a9d0fe59fc17 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Fri, 20 Feb 2026 20:21:08 +0000 Subject: [PATCH 5/8] style: auto-fix ruff formatting in logging_config.py --- codeflash/cli_cmds/logging_config.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/codeflash/cli_cmds/logging_config.py b/codeflash/cli_cmds/logging_config.py index eedfe54c4..fdd922245 100644 --- a/codeflash/cli_cmds/logging_config.py +++ b/codeflash/cli_cmds/logging_config.py @@ -12,10 +12,7 @@ def set_level(level: int, *, echo_setting: bool = True) -> None: if is_agent_mode(): logging.basicConfig( - level=level, - handlers=[logging.StreamHandler(sys.stderr)], - format="%(levelname)s: %(message)s", - force=True, + level=level, handlers=[logging.StreamHandler(sys.stderr)], format="%(levelname)s: %(message)s", force=True ) logging.getLogger().setLevel(level) return From ea83a7042555fb275928720af1dc614fb042ce4e Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 20 Feb 2026 16:17:03 -0500 Subject: [PATCH 6/8] refactor: rename --agent to --subagent Rename the CLI flag, env var (CODEFLASH_SUBAGENT_MODE), helper (is_subagent_mode), and related symbols to avoid confusion with CodeFlash's own agent terminology. --- codeflash/cli_cmds/cli.py | 6 ++--- codeflash/cli_cmds/console.py | 26 ++++++++++---------- codeflash/cli_cmds/logging_config.py | 4 +-- codeflash/code_utils/checkpoint.py | 2 +- codeflash/lsp/helpers.py | 4 +-- codeflash/main.py | 4 +-- codeflash/optimization/function_optimizer.py | 19 +++++++++----- codeflash/optimization/optimizer.py | 6 ++--- 8 files changed, 39 insertions(+), 32 deletions(-) diff --git a/codeflash/cli_cmds/cli.py b/codeflash/cli_cmds/cli.py index 3df83476c..9e492fa46 100644 --- a/codeflash/cli_cmds/cli.py +++ b/codeflash/cli_cmds/cli.py @@ -131,14 +131,14 @@ def parse_args() -> Namespace: ) parser.add_argument("-y", "--yes", action="store_true", help="Skip confirmation prompts (useful for CI/scripts).") parser.add_argument( - "--agent", + "--subagent", action="store_true", - help="Agent mode: skip all interactive prompts with sensible defaults. Designed for AI agent integrations.", + help="Subagent mode: skip all interactive prompts with sensible defaults. Designed for AI agent integrations.", ) args, unknown_args = parser.parse_known_args() sys.argv[:] = [sys.argv[0], *unknown_args] - if args.agent: + if args.subagent: args.yes = True args.no_pr = True args.worktree = True diff --git a/codeflash/cli_cmds/console.py b/codeflash/cli_cmds/console.py index a3c201bca..232168880 100644 --- a/codeflash/cli_cmds/console.py +++ b/codeflash/cli_cmds/console.py @@ -21,7 +21,7 @@ from codeflash.cli_cmds.console_constants import SPINNER_TYPES from codeflash.cli_cmds.logging_config import BARE_LOGGING_FORMAT -from codeflash.lsp.helpers import is_agent_mode, is_LSP_enabled +from codeflash.lsp.helpers import is_LSP_enabled, is_subagent_mode from codeflash.lsp.lsp_logger import enhanced_log from codeflash.lsp.lsp_message import LspCodeMessage, LspTextMessage @@ -39,15 +39,15 @@ console = Console() -if is_LSP_enabled() or is_agent_mode(): +if is_LSP_enabled() or is_subagent_mode(): console.quiet = True -if is_agent_mode(): +if is_subagent_mode(): import re import sys _lsp_prefix_re = re.compile(r"^(?:!?lsp,?|h[2-4]|loading)\|") - _agent_drop_patterns = ( + _subagent_drop_patterns = ( "Test log -", "Test failed to load", "Examining file ", @@ -61,7 +61,7 @@ class _AgentLogFilter(logging.Filter): def filter(self, record: logging.LogRecord) -> bool: record.msg = _lsp_prefix_re.sub("", str(record.msg)) msg = record.getMessage() - return not any(msg.startswith(p) for p in _agent_drop_patterns) + return not any(msg.startswith(p) for p in _subagent_drop_patterns) _agent_handler = logging.StreamHandler(sys.stderr) _agent_handler.addFilter(_AgentLogFilter()) @@ -77,7 +77,7 @@ def filter(self, record: logging.LogRecord) -> bool: logging.getLogger("parso").setLevel(logging.WARNING) # override the logger to reformat the messages for the lsp -if not is_agent_mode(): +if not is_subagent_mode(): for level in ("info", "debug", "warning", "error"): real_fn = getattr(logger, level) setattr( @@ -113,7 +113,7 @@ def paneled_text( text: str, panel_args: dict[str, str | bool] | None = None, text_args: dict[str, str] | None = None ) -> None: """Print text in a panel.""" - if is_agent_mode(): + if is_subagent_mode(): return from rich.panel import Panel from rich.text import Text @@ -143,7 +143,7 @@ def code_print( language: Programming language for syntax highlighting ('python', 'javascript', 'typescript') """ - if is_agent_mode(): + if is_subagent_mode(): return if is_LSP_enabled(): lsp_log( @@ -182,7 +182,7 @@ def progress_bar( """ global _progress_bar_active - if is_agent_mode(): + if is_subagent_mode(): yield DummyTask().id return @@ -217,7 +217,7 @@ def progress_bar( @contextmanager def test_files_progress_bar(total: int, description: str) -> Generator[tuple[Progress, TaskID], None, None]: """Progress bar for test files.""" - if is_agent_mode(): + if is_subagent_mode(): yield DummyProgress(), DummyTask().id return @@ -254,7 +254,7 @@ def call_graph_live_display( from rich.text import Text from rich.tree import Tree - if is_agent_mode(): + if is_subagent_mode(): yield lambda _: None return @@ -386,7 +386,7 @@ def call_graph_summary(call_graph: DependencyResolver, file_to_funcs: dict[Path, f"Standalone: {leaf_functions}" ) - if is_agent_mode(): + if is_subagent_mode(): return if is_LSP_enabled(): @@ -396,7 +396,7 @@ def call_graph_summary(call_graph: DependencyResolver, file_to_funcs: dict[Path, console.print(Panel(summary, title="Call Graph Summary", border_style="cyan")) -def agent_log_optimization_result( +def subagent_log_optimization_result( function_name: str, file_path: Path, perf_improvement_line: str, diff --git a/codeflash/cli_cmds/logging_config.py b/codeflash/cli_cmds/logging_config.py index fdd922245..53a0b49fb 100644 --- a/codeflash/cli_cmds/logging_config.py +++ b/codeflash/cli_cmds/logging_config.py @@ -8,9 +8,9 @@ def set_level(level: int, *, echo_setting: bool = True) -> None: import sys import time - from codeflash.lsp.helpers import is_agent_mode + from codeflash.lsp.helpers import is_subagent_mode - if is_agent_mode(): + if is_subagent_mode(): logging.basicConfig( level=level, handlers=[logging.StreamHandler(sys.stderr)], format="%(levelname)s: %(message)s", force=True ) diff --git a/codeflash/code_utils/checkpoint.py b/codeflash/code_utils/checkpoint.py index 56b7283b5..367e150b7 100644 --- a/codeflash/code_utils/checkpoint.py +++ b/codeflash/code_utils/checkpoint.py @@ -141,7 +141,7 @@ def get_all_historical_functions(module_root: Path, checkpoint_dir: Path) -> dic def ask_should_use_checkpoint_get_functions(args: argparse.Namespace) -> Optional[dict[str, dict[str, str]]]: previous_checkpoint_functions = None - if getattr(args, "agent", False): + if getattr(args, "subagent", False): console.rule() return None if args.all and codeflash_temp_dir.is_dir(): diff --git a/codeflash/lsp/helpers.py b/codeflash/lsp/helpers.py index fb4c9e6e0..14121ec68 100644 --- a/codeflash/lsp/helpers.py +++ b/codeflash/lsp/helpers.py @@ -19,8 +19,8 @@ def is_LSP_enabled() -> bool: @lru_cache(maxsize=1) -def is_agent_mode() -> bool: - return os.getenv("CODEFLASH_AGENT_MODE", default="false").lower() == "true" +def is_subagent_mode() -> bool: + return os.getenv("CODEFLASH_SUBAGENT_MODE", default="false").lower() == "true" def tree_to_markdown(tree: Tree, level: int = 0) -> str: diff --git a/codeflash/main.py b/codeflash/main.py index b140efd08..32ae9c66c 100644 --- a/codeflash/main.py +++ b/codeflash/main.py @@ -11,8 +11,8 @@ from pathlib import Path from typing import TYPE_CHECKING -if "--agent" in sys.argv: - os.environ["CODEFLASH_AGENT_MODE"] = "true" +if "--subagent" in sys.argv: + os.environ["CODEFLASH_SUBAGENT_MODE"] = "true" import warnings warnings.filterwarnings("ignore") diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 594068187..83952d7d2 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -24,7 +24,14 @@ from codeflash.api.aiservice import AiServiceClient, AIServiceRefinerRequest, LocalAiServiceClient from codeflash.api.cfapi import add_code_context_hash, create_staging, get_cfapi_base_urls, mark_optimization_success from codeflash.benchmarking.utils import process_benchmark_data -from codeflash.cli_cmds.console import agent_log_optimization_result, code_print, console, logger, lsp_log, progress_bar +from codeflash.cli_cmds.console import ( + code_print, + console, + logger, + lsp_log, + progress_bar, + subagent_log_optimization_result, +) from codeflash.code_utils import env_utils from codeflash.code_utils.code_utils import ( choose_weights, @@ -78,7 +85,7 @@ ) from codeflash.languages.python.static_analysis.line_profile_utils import add_decorator_imports, contains_jit_decorator from codeflash.languages.python.static_analysis.static_analysis import get_first_top_level_function_or_method_ast -from codeflash.lsp.helpers import is_agent_mode, is_LSP_enabled, report_to_markdown_table, tree_to_markdown +from codeflash.lsp.helpers import is_LSP_enabled, is_subagent_mode, report_to_markdown_table, tree_to_markdown from codeflash.lsp.lsp_message import LspCodeMessage, LspMarkdownMessage, LSPMessageId from codeflash.models.ExperimentMetadata import ExperimentMetadata from codeflash.models.models import ( @@ -1349,7 +1356,7 @@ def repair_optimization( def log_successful_optimization( self, explanation: Explanation, generated_tests: GeneratedTestsList, exp_type: str ) -> None: - if is_agent_mode(): + if is_subagent_mode(): return if is_LSP_enabled(): md_lines = [ @@ -1741,7 +1748,7 @@ def generate_tests( self.executor, testgen_context.markdown, helper_fqns, generated_test_paths, generated_perf_test_paths ) - if is_agent_mode(): + if is_subagent_mode(): future_concolic_tests = None else: future_concolic_tests = self.executor.submit( @@ -2213,8 +2220,8 @@ def process_review( self.optimization_review = opt_review_result.review # Display the reviewer result to the user - if is_agent_mode(): - agent_log_optimization_result( + if is_subagent_mode(): + subagent_log_optimization_result( function_name=new_explanation.function_name, file_path=new_explanation.file_path, perf_improvement_line=new_explanation.perf_improvement_line, diff --git a/codeflash/optimization/optimizer.py b/codeflash/optimization/optimizer.py index 9214927f4..1db66d1b7 100644 --- a/codeflash/optimization/optimizer.py +++ b/codeflash/optimization/optimizer.py @@ -31,7 +31,7 @@ from codeflash.code_utils.time_utils import humanize_runtime from codeflash.either import is_successful from codeflash.languages import current_language_support, is_javascript, set_current_language -from codeflash.lsp.helpers import is_agent_mode +from codeflash.lsp.helpers import is_subagent_mode from codeflash.models.models import ValidCode from codeflash.telemetry.posthog_cf import ph from codeflash.verification.verification_utils import TestConfig @@ -658,7 +658,7 @@ def run(self) -> None: if is_successful(best_optimization): optimizations_found += 1 # create a diff patch for successful optimization - if self.current_worktree and not is_agent_mode(): + if self.current_worktree and not is_subagent_mode(): best_opt = best_optimization.unwrap() read_writable_code = best_opt.code_context.read_writable_code relative_file_paths = [ @@ -691,7 +691,7 @@ def run(self) -> None: self.functions_checkpoint.cleanup() if hasattr(self.args, "command") and self.args.command == "optimize": self.cleanup_replay_tests() - if is_agent_mode(): + if is_subagent_mode(): if optimizations_found == 0: import sys From a0983a60aa128b933a1a89128cad74392d6205a6 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 20 Feb 2026 16:17:28 -0500 Subject: [PATCH 7/8] fix: move is_subagent_mode() check to top of call_graph_summary Avoids computing the full summary (callee counts, string formatting) only to discard it when running in subagent mode. --- codeflash/cli_cmds/console.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/codeflash/cli_cmds/console.py b/codeflash/cli_cmds/console.py index 232168880..1839c0ac0 100644 --- a/codeflash/cli_cmds/console.py +++ b/codeflash/cli_cmds/console.py @@ -365,6 +365,9 @@ def call_graph_summary(call_graph: DependencyResolver, file_to_funcs: dict[Path, if not total_functions: return + if is_subagent_mode(): + return + # Build the mapping expected by the dependency resolver file_items = file_to_funcs.items() mapping = {file_path: {func.qualified_name for func in funcs} for file_path, funcs in file_items} @@ -386,9 +389,6 @@ def call_graph_summary(call_graph: DependencyResolver, file_to_funcs: dict[Path, f"Standalone: {leaf_functions}" ) - if is_subagent_mode(): - return - if is_LSP_enabled(): lsp_log(LspTextMessage(text=summary)) return From a4797b6d32c25a33b669e749280454e5f81ad5ae Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 20 Feb 2026 16:17:51 -0500 Subject: [PATCH 8/8] feat: add verification report to subagent XML output Include a block with per-test-type pass/fail counts in the XML emitted by subagent_log_optimization_result, matching the correctness table shown in GitHub PR comments. --- codeflash/cli_cmds/console.py | 24 ++++++++++++++++++++ codeflash/optimization/function_optimizer.py | 1 + 2 files changed, 25 insertions(+) diff --git a/codeflash/cli_cmds/console.py b/codeflash/cli_cmds/console.py index 1839c0ac0..8c6a9af9d 100644 --- a/codeflash/cli_cmds/console.py +++ b/codeflash/cli_cmds/console.py @@ -34,6 +34,7 @@ from codeflash.discovery.functions_to_optimize import FunctionToOptimize from codeflash.languages.base import DependencyResolver, IndexResult from codeflash.lsp.lsp_message import LspMessage + from codeflash.models.models import TestResults DEBUG_MODE = logging.getLogger().getEffectiveLevel() == logging.DEBUG @@ -406,12 +407,14 @@ def subagent_log_optimization_result( original_code: dict[Path, str], new_code: dict[Path, str], review: str, + test_results: TestResults, ) -> None: import sys from xml.sax.saxutils import escape from codeflash.code_utils.code_utils import unified_diff_strings from codeflash.code_utils.time_utils import humanize_runtime + from codeflash.models.test_type import TestType diff_parts = [] for path in original_code: @@ -427,6 +430,24 @@ def subagent_log_optimization_result( original_runtime = humanize_runtime(original_runtime_ns) optimized_runtime = humanize_runtime(best_runtime_ns) + report = test_results.get_test_pass_fail_report_by_type() + verification_rows = [] + for test_type in TestType: + if test_type is TestType.INIT_STATE_TEST: + continue + name = test_type.to_name() + if not name: + continue + passed = report[test_type]["passed"] + failed = report[test_type]["failed"] + if passed == 0 and failed == 0: + status = "None Found" + elif failed > 0: + status = f"{failed} Failed, {passed} Passed" + else: + status = f"{passed} Passed" + verification_rows.append(f' ') + xml = [ "", f" {escape(function_name)}", @@ -438,6 +459,9 @@ def subagent_log_optimization_result( if review: xml.append(f" {escape(review)}") xml.append(f" {escape(raw_explanation.strip())}") + xml.append(" ") + xml.extend(verification_rows) + xml.append(" ") if diff_str: xml.append(f" {escape(diff_str)}") for path in new_code: diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 83952d7d2..efccd9b57 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -2231,6 +2231,7 @@ def process_review( original_code=original_code_combined, new_code=new_code_combined, review=opt_review_result.review, + test_results=new_explanation.winning_behavior_test_results, ) elif opt_review_result.review: review_display = {