From bdc96e67ba5f02a214e0e56982fe3bcc8823fe44 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 20 Feb 2026 01:42:23 -0500
Subject: [PATCH 1/8] feat: add --agent flag and skip checkpoint in agent mode

Add --agent CLI flag for AI agent integrations that skips all
interactive prompts. In agent mode, checkpoint resume is skipped
entirely so each run starts fresh. Also gates the existing checkpoint
prompt behind --yes.
---
 codeflash/cli_cmds/cli.py          |  5 +++++
 codeflash/code_utils/checkpoint.py | 14 ++++++++++----
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/codeflash/cli_cmds/cli.py b/codeflash/cli_cmds/cli.py
index daee371d7..6fb9bc5c4 100644
--- a/codeflash/cli_cmds/cli.py
+++ b/codeflash/cli_cmds/cli.py
@@ -130,6 +130,11 @@ def parse_args() -> Namespace:
         "--reset-config", action="store_true", help="Remove codeflash configuration from project config file."
     )
     parser.add_argument("-y", "--yes", action="store_true", help="Skip confirmation prompts (useful for CI/scripts).")
+    parser.add_argument(
+        "--agent",
+        action="store_true",
+        help="Agent mode: skip all interactive prompts with sensible defaults. Designed for AI agent integrations.",
+    )
 
     args, unknown_args = parser.parse_known_args()
     sys.argv[:] = [sys.argv[0], *unknown_args]
diff --git a/codeflash/code_utils/checkpoint.py b/codeflash/code_utils/checkpoint.py
index 1160bf2e0..56b7283b5 100644
--- a/codeflash/code_utils/checkpoint.py
+++ b/codeflash/code_utils/checkpoint.py
@@ -141,12 +141,18 @@ def get_all_historical_functions(module_root: Path, checkpoint_dir: Path) -> dic
 
 def ask_should_use_checkpoint_get_functions(args: argparse.Namespace) -> Optional[dict[str, dict[str, str]]]:
     previous_checkpoint_functions = None
+    if getattr(args, "agent", False):
+        console.rule()
+        return None
     if args.all and codeflash_temp_dir.is_dir():
         previous_checkpoint_functions = get_all_historical_functions(args.module_root, codeflash_temp_dir)
-        if previous_checkpoint_functions and Confirm.ask(
-            "Previous Checkpoint detected from an incomplete optimization run, shall I continue the optimization from that point?",
-            default=True,
-            console=console,
+        if previous_checkpoint_functions and (
+            getattr(args, "yes", False)
+            or Confirm.ask(
+                "Previous Checkpoint detected from an incomplete optimization run, shall I continue the optimization from that point?",
+                default=True,
+                console=console,
+            )
         ):
             console.rule()
         else:

From 44a2afe5f287ec0e3278d55b0c9e24d379590a0c Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 20 Feb 2026 01:57:00 -0500
Subject: [PATCH 2/8] Update uv.lock

---
 uv.lock | 1 -
 1 file changed, 1 deletion(-)

diff --git a/uv.lock b/uv.lock
index 05b79c606..b5222447b 100644
--- a/uv.lock
+++ b/uv.lock
@@ -605,7 +605,6 @@ tests = [
 
 [[package]]
 name = "codeflash-benchmark"
-version = "0.3.0"
 source = { editable = "codeflash-benchmark" }
 dependencies = [
     { name = "pytest", version = "8.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },

From 6672ce4037b06c6990f9a38adb1c0b6e4242c8ae Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 20 Feb 2026 03:18:57 -0500
Subject: [PATCH 3/8] feat: suppress Rich UI in --agent mode, emit plain-text
 results

In agent mode, disable all Rich output (panels, spinners, progress bars,
syntax highlighting) and use a plain StreamHandler for logging. Optimization
results with explanation and unified diff are written to stdout. A log
filter strips LSP prefixes and drops noisy test/file-path messages.
Also skip checkpoint creation and suppress Python warnings in agent mode.
---
 codeflash/cli_cmds/cli.py                    |   2 +
 codeflash/cli_cmds/console.py                | 118 ++++++++++++++++---
 codeflash/cli_cmds/logging_config.py         |  13 ++
 codeflash/lsp/helpers.py                     |   5 +
 codeflash/main.py                            |   6 +
 codeflash/optimization/function_optimizer.py |  20 +++-
 codeflash/optimization/optimizer.py          |  12 +-
 7 files changed, 155 insertions(+), 21 deletions(-)

diff --git a/codeflash/cli_cmds/cli.py b/codeflash/cli_cmds/cli.py
index 6fb9bc5c4..b69b55809 100644
--- a/codeflash/cli_cmds/cli.py
+++ b/codeflash/cli_cmds/cli.py
@@ -138,6 +138,8 @@ def parse_args() -> Namespace:
 
     args, unknown_args = parser.parse_known_args()
     sys.argv[:] = [sys.argv[0], *unknown_args]
+    if args.agent:
+        args.yes = True
     return process_and_validate_cmd_args(args)
 
 
diff --git a/codeflash/cli_cmds/console.py b/codeflash/cli_cmds/console.py
index fdc5a420a..ac74986ad 100644
--- a/codeflash/cli_cmds/console.py
+++ b/codeflash/cli_cmds/console.py
@@ -21,7 +21,7 @@
 
 from codeflash.cli_cmds.console_constants import SPINNER_TYPES
 from codeflash.cli_cmds.logging_config import BARE_LOGGING_FORMAT
-from codeflash.lsp.helpers import is_LSP_enabled
+from codeflash.lsp.helpers import is_agent_mode, is_LSP_enabled
 from codeflash.lsp.lsp_logger import enhanced_log
 from codeflash.lsp.lsp_message import LspCodeMessage, LspTextMessage
 
@@ -39,28 +39,54 @@
 
 console = Console()
 
-if is_LSP_enabled():
+if is_LSP_enabled() or is_agent_mode():
     console.quiet = True
 
-logging.basicConfig(
-    level=logging.INFO,
-    handlers=[RichHandler(rich_tracebacks=True, markup=False, console=console, show_path=False, show_time=False)],
-    format=BARE_LOGGING_FORMAT,
-)
+if is_agent_mode():
+    import re
+    import sys
+
+    _lsp_prefix_re = re.compile(r"^(?:!?lsp,?|h[2-4]|loading)\|")
+    _agent_drop_patterns = (
+        "Test log -",
+        "Test failed to load",
+        "Examining file ",
+        "Generated ",
+        "Add custom marker",
+        "Disabling all autouse",
+        "Reverting code and helpers",
+    )
+
+    class _AgentLogFilter(logging.Filter):
+        def filter(self, record: logging.LogRecord) -> bool:
+            record.msg = _lsp_prefix_re.sub("", str(record.msg))
+            msg = record.getMessage()
+            return not any(msg.startswith(p) for p in _agent_drop_patterns)
+
+    _agent_handler = logging.StreamHandler(sys.stderr)
+    _agent_handler.addFilter(_AgentLogFilter())
+    logging.basicConfig(level=logging.INFO, handlers=[_agent_handler], format="%(levelname)s: %(message)s")
+else:
+    logging.basicConfig(
+        level=logging.INFO,
+        handlers=[RichHandler(rich_tracebacks=True, markup=False, console=console, show_path=False, show_time=False)],
+        format=BARE_LOGGING_FORMAT,
+    )
 
 logger = logging.getLogger("rich")
 logging.getLogger("parso").setLevel(logging.WARNING)
 
 # override the logger to reformat the messages for the lsp
-for level in ("info", "debug", "warning", "error"):
-    real_fn = getattr(logger, level)
-    setattr(
-        logger,
-        level,
-        lambda msg, *args, _real_fn=real_fn, _level=level, **kwargs: enhanced_log(
-            msg, _real_fn, _level, *args, **kwargs
-        ),
-    )
+if not is_agent_mode():
+    for level in ("info", "debug", "warning", "error"):
+        real_fn = getattr(logger, level)
+        setattr(
+            logger,
+            level,
+            lambda msg, *args, _real_fn=real_fn, _level=level, **kwargs: enhanced_log(
+                msg, _real_fn, _level, *args, **kwargs
+            ),
+        )
 
 
 class DummyTask:
@@ -87,6 +113,8 @@ def paneled_text(
     text: str, panel_args: dict[str, str | bool] | None = None, text_args: dict[str, str] | None = None
 ) -> None:
     """Print text in a panel."""
+    if is_agent_mode():
+        return
     from rich.panel import Panel
     from rich.text import Text
 
@@ -115,6 +143,8 @@ def code_print(
         language: Programming language for syntax highlighting ('python', 'javascript', 'typescript')
 
     """
+    if is_agent_mode():
+        return
     if is_LSP_enabled():
         lsp_log(
             LspCodeMessage(code=code_str, file_name=file_name, function_name=function_name, message_id=lsp_message_id)
@@ -152,6 +182,10 @@ def progress_bar(
     """
     global _progress_bar_active
 
+    if is_agent_mode():
+        yield DummyTask().id
+        return
+
     if is_LSP_enabled():
         lsp_log(LspTextMessage(text=message, takes_time=True))
         yield
@@ -183,6 +217,10 @@ def progress_bar(
 @contextmanager
 def test_files_progress_bar(total: int, description: str) -> Generator[tuple[Progress, TaskID], None, None]:
     """Progress bar for test files."""
+    if is_agent_mode():
+        yield DummyProgress(), DummyTask().id
+        return
+
     if is_LSP_enabled():
         lsp_log(LspTextMessage(text=description, takes_time=True))
         dummy_progress = DummyProgress()
@@ -216,6 +254,10 @@ def call_graph_live_display(
     from rich.text import Text
     from rich.tree import Tree
 
+    if is_agent_mode():
+        yield lambda _: None
+        return
+
     if is_LSP_enabled():
         lsp_log(LspTextMessage(text="Building call graph", takes_time=True))
         yield lambda _: None
@@ -344,8 +386,52 @@ def call_graph_summary(call_graph: DependencyResolver, file_to_funcs: dict[Path,
         f"Standalone: {leaf_functions}"
     )
 
+    if is_agent_mode():
+        return
+
     if is_LSP_enabled():
         lsp_log(LspTextMessage(text=summary))
         return
 
     console.print(Panel(summary, title="Call Graph Summary", border_style="cyan"))
+
+
+def agent_log_optimization_result(
+    function_name: str,
+    file_path: Path,
+    perf_improvement_line: str,
+    original_runtime_ns: int,
+    best_runtime_ns: int,
+    raw_explanation: str,
+    original_code: dict[Path, str],
+    new_code: dict[Path, str],
+    review: str,
+) -> None:
+    import sys
+
+    from codeflash.code_utils.code_utils import unified_diff_strings
+    from codeflash.code_utils.time_utils import humanize_runtime
+
+    lines = [
+        "=== Optimization Result ===",
+        f"Function: {function_name}",
+        f"File: {file_path}",
+        f"Performance: {perf_improvement_line}",
+        f"Original runtime: {humanize_runtime(original_runtime_ns)} | Optimized runtime: {humanize_runtime(best_runtime_ns)}",
+    ]
+    if review:
+        lines.append(f"Reviewer confidence: {review}")
+    lines.append("")
+    lines.append("Explanation:")
+    lines.append(raw_explanation.strip())
+    lines.append("")
+
+    for path in original_code:
+        old = original_code.get(path, "")
+        new = new_code.get(path, "")
+        if old != new:
+            diff = unified_diff_strings(old, new, fromfile=str(path), tofile=str(path))
+            if diff:
+                lines.append(diff)
+
+    sys.stdout.write("\n".join(lines) + "\n")
diff --git a/codeflash/cli_cmds/logging_config.py b/codeflash/cli_cmds/logging_config.py
index 09dc0f1f2..eedfe54c4 100644
--- a/codeflash/cli_cmds/logging_config.py
+++ b/codeflash/cli_cmds/logging_config.py
@@ -5,8 +5,21 @@
 
 def set_level(level: int, *, echo_setting: bool = True) -> None:
     import logging
+    import sys
     import time
 
+    from codeflash.lsp.helpers import is_agent_mode
+
+    if is_agent_mode():
+        logging.basicConfig(
+            level=level,
+            handlers=[logging.StreamHandler(sys.stderr)],
+            format="%(levelname)s: %(message)s",
+            force=True,
+        )
+        logging.getLogger().setLevel(level)
+        return
+
     from rich.logging import RichHandler
 
     from codeflash.cli_cmds.console import console
diff --git a/codeflash/lsp/helpers.py b/codeflash/lsp/helpers.py
index b8840e046..fb4c9e6e0 100644
--- a/codeflash/lsp/helpers.py
+++ b/codeflash/lsp/helpers.py
@@ -18,6 +18,11 @@ def is_LSP_enabled() -> bool:
     return os.getenv("CODEFLASH_LSP", default="false").lower() == "true"
 
 
+@lru_cache(maxsize=1)
+def is_agent_mode() -> bool:
+    return os.getenv("CODEFLASH_AGENT_MODE", default="false").lower() == "true"
+
+
 def tree_to_markdown(tree: Tree, level: int = 0) -> str:
     """Convert a rich Tree into a Markdown bullet list."""
     indent = "  " * level
diff --git a/codeflash/main.py b/codeflash/main.py
index 690c1ae98..b140efd08 100644
--- a/codeflash/main.py
+++ b/codeflash/main.py
@@ -11,6 +11,12 @@
 from pathlib import Path
 from typing import TYPE_CHECKING
 
+if "--agent" in sys.argv:
+    os.environ["CODEFLASH_AGENT_MODE"] = "true"
+    import warnings
+
+    warnings.filterwarnings("ignore")
+
 from codeflash.cli_cmds.cli import parse_args, process_pyproject_config
 from codeflash.cli_cmds.cmd_init import CODEFLASH_LOGO, ask_run_end_to_end_test
 from codeflash.cli_cmds.console import paneled_text
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index dd8e41dd8..7bf1e417b 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -24,7 +24,7 @@
 from codeflash.api.aiservice import AiServiceClient, AIServiceRefinerRequest, LocalAiServiceClient
 from codeflash.api.cfapi import add_code_context_hash, create_staging, get_cfapi_base_urls, mark_optimization_success
 from codeflash.benchmarking.utils import process_benchmark_data
-from codeflash.cli_cmds.console import code_print, console, logger, lsp_log, progress_bar
+from codeflash.cli_cmds.console import agent_log_optimization_result, code_print, console, logger, lsp_log, progress_bar
 from codeflash.code_utils import env_utils
 from codeflash.code_utils.code_utils import (
     choose_weights,
@@ -78,7 +78,7 @@
 )
 from codeflash.languages.python.static_analysis.line_profile_utils import add_decorator_imports, contains_jit_decorator
 from codeflash.languages.python.static_analysis.static_analysis import get_first_top_level_function_or_method_ast
-from codeflash.lsp.helpers import is_LSP_enabled, report_to_markdown_table, tree_to_markdown
+from codeflash.lsp.helpers import is_agent_mode, is_LSP_enabled, report_to_markdown_table, tree_to_markdown
 from codeflash.lsp.lsp_message import LspCodeMessage, LspMarkdownMessage, LSPMessageId
 from codeflash.models.ExperimentMetadata import ExperimentMetadata
 from codeflash.models.models import (
@@ -1349,6 +1349,8 @@ def repair_optimization(
     def log_successful_optimization(
         self, explanation: Explanation, generated_tests: GeneratedTestsList, exp_type: str
     ) -> None:
+        if is_agent_mode():
+            return
         if is_LSP_enabled():
             md_lines = [
                 "### ⚡️ Optimization Summary",
@@ -2198,7 +2200,19 @@ def process_review(
         self.optimization_review = opt_review_result.review
 
         # Display the reviewer result to the user
-        if opt_review_result.review:
+        if is_agent_mode():
+            agent_log_optimization_result(
+                function_name=new_explanation.function_name,
+                file_path=new_explanation.file_path,
+                perf_improvement_line=new_explanation.perf_improvement_line,
+                original_runtime_ns=new_explanation.original_runtime_ns,
+                best_runtime_ns=new_explanation.best_runtime_ns,
+                raw_explanation=new_explanation.raw_explanation_message,
+                original_code=original_code_combined,
+                new_code=new_code_combined,
+                review=opt_review_result.review,
+            )
+        elif opt_review_result.review:
             review_display = {
                 "high": ("[bold green]High[/bold green]", "green", "Recommended to merge"),
                 "medium": ("[bold yellow]Medium[/bold yellow]", "yellow", "Review recommended before merging"),
diff --git a/codeflash/optimization/optimizer.py b/codeflash/optimization/optimizer.py
index 5527a0567..818f97584 100644
--- a/codeflash/optimization/optimizer.py
+++ b/codeflash/optimization/optimizer.py
@@ -31,6 +31,7 @@
 from codeflash.code_utils.time_utils import humanize_runtime
 from codeflash.either import is_successful
 from codeflash.languages import current_language_support, is_javascript, set_current_language
+from codeflash.lsp.helpers import is_agent_mode
 from codeflash.models.models import ValidCode
 from codeflash.telemetry.posthog_cf import ph
 from codeflash.verification.verification_utils import TestConfig
@@ -603,7 +604,7 @@ def run(self) -> None:
                 return
 
             function_to_tests, _ = self.discover_tests(file_to_funcs_to_optimize)
-            if self.args.all:
+            if self.args.all and not getattr(self.args, "agent", False):
                 self.functions_checkpoint = CodeflashRunCheckpoint(self.args.module_root)
 
             # GLOBAL RANKING: Rank all functions together before optimizing
@@ -690,7 +691,14 @@ def run(self) -> None:
                 self.functions_checkpoint.cleanup()
             if hasattr(self.args, "command") and self.args.command == "optimize":
                 self.cleanup_replay_tests()
-            if optimizations_found == 0:
+            if is_agent_mode():
+                import sys
+
+                if optimizations_found == 0:
+                    sys.stdout.write("NO OPTIMIZATIONS FOUND\n")
+                else:
+                    sys.stdout.write(f"COMPLETE: {optimizations_found} optimization(s) applied\n")
+            elif optimizations_found == 0:
                 logger.info("❌ No optimizations found.")
             elif self.args.all:
                 logger.info("✨ All functions have been optimized! ✨")

From 624925a3cdb1bcdf4e6dae162b99970269eda8a0 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 20 Feb 2026 03:38:40 -0500
Subject: [PATCH 4/8] feat: agent mode implies --no-pr/--worktree, uses XML
 output, skips concolic tests

- --agent now implies --no-pr and --worktree so source files stay clean
- Output uses structured XML (codeflash-optimization) with optimized-code
  for the consuming agent to apply via Edit/Write
- Skip concolic test generation in agent mode
- Skip patch file creation in worktree + agent mode
---
 codeflash/cli_cmds/cli.py                    |  2 +
 codeflash/cli_cmds/console.py                | 52 ++++++++++++++------
 codeflash/optimization/function_optimizer.py | 25 +++++++---
 codeflash/optimization/optimizer.py          | 10 ++--
 4 files changed, 61 insertions(+), 28 deletions(-)

diff --git a/codeflash/cli_cmds/cli.py b/codeflash/cli_cmds/cli.py
index b69b55809..3df83476c 100644
--- a/codeflash/cli_cmds/cli.py
+++ b/codeflash/cli_cmds/cli.py
@@ -140,6 +140,8 @@ def parse_args() -> Namespace:
     sys.argv[:] = [sys.argv[0], *unknown_args]
     if args.agent:
         args.yes = True
+        args.no_pr = True
+        args.worktree = True
     return process_and_validate_cmd_args(args)
 
 
diff --git a/codeflash/cli_cmds/console.py b/codeflash/cli_cmds/console.py
index ac74986ad..a3c201bca 100644
--- a/codeflash/cli_cmds/console.py
+++ b/codeflash/cli_cmds/console.py
@@ -408,30 +408,50 @@ def agent_log_optimization_result(
     review: str,
 ) -> None:
     import sys
+    from xml.sax.saxutils import escape
 
     from codeflash.code_utils.code_utils import unified_diff_strings
     from codeflash.code_utils.time_utils import humanize_runtime
 
-    lines = [
-        "=== Optimization Result ===",
-        f"Function: {function_name}",
-        f"File: {file_path}",
-        f"Performance: {perf_improvement_line}",
-        f"Original runtime: {humanize_runtime(original_runtime_ns)} | Optimized runtime: {humanize_runtime(best_runtime_ns)}",
-    ]
-    if review:
-        lines.append(f"Reviewer confidence: {review}")
-    lines.append("")
-    lines.append("Explanation:")
-    lines.append(raw_explanation.strip())
-    lines.append("")
-
+    diff_parts = []
     for path in original_code:
         old = original_code.get(path, "")
         new = new_code.get(path, "")
         if old != new:
             diff = unified_diff_strings(old, new, fromfile=str(path), tofile=str(path))
             if diff:
-                lines.append(diff)
+                diff_parts.append(diff)
+
+    diff_str = "\n".join(diff_parts)
+
+    original_runtime = humanize_runtime(original_runtime_ns)
+    optimized_runtime = humanize_runtime(best_runtime_ns)
+
+    xml = [
+        "<codeflash-optimization>",
+        f"  <function>{escape(function_name)}</function>",
+        f"  <file>{escape(str(file_path))}</file>",
+        f"  <performance>{escape(perf_improvement_line)}</performance>",
+        f"  <original-runtime>{escape(original_runtime)}</original-runtime>",
+        f"  <optimized-runtime>{escape(optimized_runtime)}</optimized-runtime>",
+    ]
+    if review:
+        xml.append(f"  <reviewer-confidence>{escape(review)}</reviewer-confidence>")
+    xml.append(f"  <explanation>{escape(raw_explanation.strip())}</explanation>")
+    if diff_str:
+        xml.append(f"  <diff>{escape(diff_str)}</diff>")
+    for path in new_code:
+        if new_code[path] != original_code.get(path, ""):
+            xml.append(f'  <optimized-code file="{escape(str(path))}">{escape(new_code[path])}</optimized-code>')
+    xml.append("  <action>")
+    xml.append("    Present this optimization to the user using AskUserQuestion.")
+    xml.append("    Show the function name, performance improvement, and explanation as the question text.")
+    xml.append("    Provide two options: 'Apply' (write the optimized code to the file) and 'Reject' (do nothing).")
+    xml.append(
+        "    Put the full diff in the 'Apply' option's markdown preview so the user can review the exact changes."
+    )
+    xml.append("    If the user chooses 'Apply', write the content from optimized-code to the corresponding file.")
+    xml.append("  </action>")
+    xml.append("</codeflash-optimization>")
 
-    sys.stdout.write("\n".join(lines) + "\n")
+    sys.stdout.write("\n".join(xml) + "\n")
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 7bf1e417b..594068187 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -1741,14 +1741,24 @@ def generate_tests(
                 self.executor, testgen_context.markdown, helper_fqns, generated_test_paths, generated_perf_test_paths
             )
 
-        future_concolic_tests = self.executor.submit(
-            generate_concolic_tests, self.test_cfg, self.args, self.function_to_optimize, self.function_to_optimize_ast
-        )
+        if is_agent_mode():
+            future_concolic_tests = None
+        else:
+            future_concolic_tests = self.executor.submit(
+                generate_concolic_tests,
+                self.test_cfg,
+                self.args,
+                self.function_to_optimize,
+                self.function_to_optimize_ast,
+            )
 
         if not self.args.no_gen_tests:
             # Wait for test futures to complete
-            concurrent.futures.wait([*future_tests, future_concolic_tests])
-        else:
+            futures_to_wait = [*future_tests]
+            if future_concolic_tests is not None:
+                futures_to_wait.append(future_concolic_tests)
+            concurrent.futures.wait(futures_to_wait)
+        elif future_concolic_tests is not None:
             concurrent.futures.wait([future_concolic_tests])
         # Process test generation results
         tests: list[GeneratedTests] = []
@@ -1777,7 +1787,10 @@ def generate_tests(
                 logger.warning(f"Failed to generate and instrument tests for {self.function_to_optimize.function_name}")
                 return Failure(f"/!\\ NO TESTS GENERATED for {self.function_to_optimize.function_name}")
 
-        function_to_concolic_tests, concolic_test_str = future_concolic_tests.result()
+        if future_concolic_tests is not None:
+            function_to_concolic_tests, concolic_test_str = future_concolic_tests.result()
+        else:
+            function_to_concolic_tests, concolic_test_str = {}, None
         count_tests = len(tests)
         if concolic_test_str:
             count_tests += 1
diff --git a/codeflash/optimization/optimizer.py b/codeflash/optimization/optimizer.py
index 818f97584..9214927f4 100644
--- a/codeflash/optimization/optimizer.py
+++ b/codeflash/optimization/optimizer.py
@@ -658,7 +658,7 @@ def run(self) -> None:
                     if is_successful(best_optimization):
                         optimizations_found += 1
                         # create a diff patch for successful optimization
-                        if self.current_worktree:
+                        if self.current_worktree and not is_agent_mode():
                             best_opt = best_optimization.unwrap()
                             read_writable_code = best_opt.code_context.read_writable_code
                             relative_file_paths = [
@@ -692,12 +692,10 @@ def run(self) -> None:
             if hasattr(self.args, "command") and self.args.command == "optimize":
                 self.cleanup_replay_tests()
             if is_agent_mode():
-                import sys
-
                 if optimizations_found == 0:
-                    sys.stdout.write("NO OPTIMIZATIONS FOUND\n")
-                else:
-                    sys.stdout.write(f"COMPLETE: {optimizations_found} optimization(s) applied\n")
+                    import sys
+
+                    sys.stdout.write("<codeflash-summary>No optimizations found.</codeflash-summary>\n")
             elif optimizations_found == 0:
                 logger.info("❌ No optimizations found.")
             elif self.args.all:

From fa50977266a92f494a03183ed6c4a9d0fe59fc17 Mon Sep 17 00:00:00 2001
From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com>
Date: Fri, 20 Feb 2026 20:21:08 +0000
Subject: [PATCH 5/8] style: auto-fix ruff formatting in logging_config.py

---
 codeflash/cli_cmds/logging_config.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/codeflash/cli_cmds/logging_config.py b/codeflash/cli_cmds/logging_config.py
index eedfe54c4..fdd922245 100644
--- a/codeflash/cli_cmds/logging_config.py
+++ b/codeflash/cli_cmds/logging_config.py
@@ -12,10 +12,7 @@ def set_level(level: int, *, echo_setting: bool = True) -> None:
 
     if is_agent_mode():
         logging.basicConfig(
-            level=level,
-            handlers=[logging.StreamHandler(sys.stderr)],
-            format="%(levelname)s: %(message)s",
-            force=True,
+            level=level, handlers=[logging.StreamHandler(sys.stderr)], format="%(levelname)s: %(message)s", force=True
         )
         logging.getLogger().setLevel(level)
         return

From ea83a7042555fb275928720af1dc614fb042ce4e Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 20 Feb 2026 16:17:03 -0500
Subject: [PATCH 6/8] refactor: rename --agent to --subagent

Rename the CLI flag, env var (CODEFLASH_SUBAGENT_MODE), helper
(is_subagent_mode), and related symbols to avoid confusion with
CodeFlash's own agent terminology.
---
 codeflash/cli_cmds/cli.py                    |  6 ++---
 codeflash/cli_cmds/console.py                | 26 ++++++++++----------
 codeflash/cli_cmds/logging_config.py         |  4 +--
 codeflash/code_utils/checkpoint.py           |  2 +-
 codeflash/lsp/helpers.py                     |  4 +--
 codeflash/main.py                            |  4 +--
 codeflash/optimization/function_optimizer.py | 19 +++++++++-----
 codeflash/optimization/optimizer.py          |  6 ++---
 8 files changed, 39 insertions(+), 32 deletions(-)

diff --git a/codeflash/cli_cmds/cli.py b/codeflash/cli_cmds/cli.py
index 3df83476c..9e492fa46 100644
--- a/codeflash/cli_cmds/cli.py
+++ b/codeflash/cli_cmds/cli.py
@@ -131,14 +131,14 @@ def parse_args() -> Namespace:
     )
     parser.add_argument("-y", "--yes", action="store_true", help="Skip confirmation prompts (useful for CI/scripts).")
     parser.add_argument(
-        "--agent",
+        "--subagent",
         action="store_true",
-        help="Agent mode: skip all interactive prompts with sensible defaults. Designed for AI agent integrations.",
+        help="Subagent mode: skip all interactive prompts with sensible defaults. Designed for AI agent integrations.",
     )
 
     args, unknown_args = parser.parse_known_args()
     sys.argv[:] = [sys.argv[0], *unknown_args]
-    if args.agent:
+    if args.subagent:
         args.yes = True
         args.no_pr = True
         args.worktree = True
diff --git a/codeflash/cli_cmds/console.py b/codeflash/cli_cmds/console.py
index a3c201bca..232168880 100644
--- a/codeflash/cli_cmds/console.py
+++ b/codeflash/cli_cmds/console.py
@@ -21,7 +21,7 @@
 
 from codeflash.cli_cmds.console_constants import SPINNER_TYPES
 from codeflash.cli_cmds.logging_config import BARE_LOGGING_FORMAT
-from codeflash.lsp.helpers import is_agent_mode, is_LSP_enabled
+from codeflash.lsp.helpers import is_LSP_enabled, is_subagent_mode
 from codeflash.lsp.lsp_logger import enhanced_log
 from codeflash.lsp.lsp_message import LspCodeMessage, LspTextMessage
 
@@ -39,15 +39,15 @@
 
 console = Console()
 
-if is_LSP_enabled() or is_agent_mode():
+if is_LSP_enabled() or is_subagent_mode():
     console.quiet = True
 
-if is_agent_mode():
+if is_subagent_mode():
     import re
     import sys
 
     _lsp_prefix_re = re.compile(r"^(?:!?lsp,?|h[2-4]|loading)\|")
-    _agent_drop_patterns = (
+    _subagent_drop_patterns = (
         "Test log -",
         "Test failed to load",
         "Examining file ",
@@ -61,7 +61,7 @@ class _AgentLogFilter(logging.Filter):
         def filter(self, record: logging.LogRecord) -> bool:
             record.msg = _lsp_prefix_re.sub("", str(record.msg))
             msg = record.getMessage()
-            return not any(msg.startswith(p) for p in _agent_drop_patterns)
+            return not any(msg.startswith(p) for p in _subagent_drop_patterns)
 
     _agent_handler = logging.StreamHandler(sys.stderr)
     _agent_handler.addFilter(_AgentLogFilter())
@@ -77,7 +77,7 @@ def filter(self, record: logging.LogRecord) -> bool:
 logging.getLogger("parso").setLevel(logging.WARNING)
 
 # override the logger to reformat the messages for the lsp
-if not is_agent_mode():
+if not is_subagent_mode():
     for level in ("info", "debug", "warning", "error"):
         real_fn = getattr(logger, level)
         setattr(
@@ -113,7 +113,7 @@ def paneled_text(
     text: str, panel_args: dict[str, str | bool] | None = None, text_args: dict[str, str] | None = None
 ) -> None:
     """Print text in a panel."""
-    if is_agent_mode():
+    if is_subagent_mode():
         return
     from rich.panel import Panel
     from rich.text import Text
@@ -143,7 +143,7 @@ def code_print(
         language: Programming language for syntax highlighting ('python', 'javascript', 'typescript')
 
     """
-    if is_agent_mode():
+    if is_subagent_mode():
         return
     if is_LSP_enabled():
         lsp_log(
@@ -182,7 +182,7 @@ def progress_bar(
     """
     global _progress_bar_active
 
-    if is_agent_mode():
+    if is_subagent_mode():
         yield DummyTask().id
         return
 
@@ -217,7 +217,7 @@ def progress_bar(
 @contextmanager
 def test_files_progress_bar(total: int, description: str) -> Generator[tuple[Progress, TaskID], None, None]:
     """Progress bar for test files."""
-    if is_agent_mode():
+    if is_subagent_mode():
         yield DummyProgress(), DummyTask().id
         return
 
@@ -254,7 +254,7 @@ def call_graph_live_display(
     from rich.text import Text
     from rich.tree import Tree
 
-    if is_agent_mode():
+    if is_subagent_mode():
         yield lambda _: None
         return
 
@@ -386,7 +386,7 @@ def call_graph_summary(call_graph: DependencyResolver, file_to_funcs: dict[Path,
         f"Standalone: {leaf_functions}"
     )
 
-    if is_agent_mode():
+    if is_subagent_mode():
         return
 
     if is_LSP_enabled():
@@ -396,7 +396,7 @@ def call_graph_summary(call_graph: DependencyResolver, file_to_funcs: dict[Path,
     console.print(Panel(summary, title="Call Graph Summary", border_style="cyan"))
 
 
-def agent_log_optimization_result(
+def subagent_log_optimization_result(
     function_name: str,
     file_path: Path,
     perf_improvement_line: str,
diff --git a/codeflash/cli_cmds/logging_config.py b/codeflash/cli_cmds/logging_config.py
index fdd922245..53a0b49fb 100644
--- a/codeflash/cli_cmds/logging_config.py
+++ b/codeflash/cli_cmds/logging_config.py
@@ -8,9 +8,9 @@ def set_level(level: int, *, echo_setting: bool = True) -> None:
     import sys
     import time
 
-    from codeflash.lsp.helpers import is_agent_mode
+    from codeflash.lsp.helpers import is_subagent_mode
 
-    if is_agent_mode():
+    if is_subagent_mode():
         logging.basicConfig(
             level=level, handlers=[logging.StreamHandler(sys.stderr)], format="%(levelname)s: %(message)s", force=True
         )
diff --git a/codeflash/code_utils/checkpoint.py b/codeflash/code_utils/checkpoint.py
index 56b7283b5..367e150b7 100644
--- a/codeflash/code_utils/checkpoint.py
+++ b/codeflash/code_utils/checkpoint.py
@@ -141,7 +141,7 @@ def get_all_historical_functions(module_root: Path, checkpoint_dir: Path) -> dic
 
 def ask_should_use_checkpoint_get_functions(args: argparse.Namespace) -> Optional[dict[str, dict[str, str]]]:
     previous_checkpoint_functions = None
-    if getattr(args, "agent", False):
+    if getattr(args, "subagent", False):
         console.rule()
         return None
     if args.all and codeflash_temp_dir.is_dir():
diff --git a/codeflash/lsp/helpers.py b/codeflash/lsp/helpers.py
index fb4c9e6e0..14121ec68 100644
--- a/codeflash/lsp/helpers.py
+++ b/codeflash/lsp/helpers.py
@@ -19,8 +19,8 @@ def is_LSP_enabled() -> bool:
 
 
 @lru_cache(maxsize=1)
-def is_agent_mode() -> bool:
-    return os.getenv("CODEFLASH_AGENT_MODE", default="false").lower() == "true"
+def is_subagent_mode() -> bool:
+    return os.getenv("CODEFLASH_SUBAGENT_MODE", default="false").lower() == "true"
 
 
 def tree_to_markdown(tree: Tree, level: int = 0) -> str:
diff --git a/codeflash/main.py b/codeflash/main.py
index b140efd08..32ae9c66c 100644
--- a/codeflash/main.py
+++ b/codeflash/main.py
@@ -11,8 +11,8 @@
 from pathlib import Path
 from typing import TYPE_CHECKING
 
-if "--agent" in sys.argv:
-    os.environ["CODEFLASH_AGENT_MODE"] = "true"
+if "--subagent" in sys.argv:
+    os.environ["CODEFLASH_SUBAGENT_MODE"] = "true"
     import warnings
 
     warnings.filterwarnings("ignore")
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 594068187..83952d7d2 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -24,7 +24,14 @@
 from codeflash.api.aiservice import AiServiceClient, AIServiceRefinerRequest, LocalAiServiceClient
 from codeflash.api.cfapi import add_code_context_hash, create_staging, get_cfapi_base_urls, mark_optimization_success
 from codeflash.benchmarking.utils import process_benchmark_data
-from codeflash.cli_cmds.console import agent_log_optimization_result, code_print, console, logger, lsp_log, progress_bar
+from codeflash.cli_cmds.console import (
+    code_print,
+    console,
+    logger,
+    lsp_log,
+    progress_bar,
+    subagent_log_optimization_result,
+)
 from codeflash.code_utils import env_utils
 from codeflash.code_utils.code_utils import (
     choose_weights,
@@ -78,7 +85,7 @@
 )
 from codeflash.languages.python.static_analysis.line_profile_utils import add_decorator_imports, contains_jit_decorator
 from codeflash.languages.python.static_analysis.static_analysis import get_first_top_level_function_or_method_ast
-from codeflash.lsp.helpers import is_agent_mode, is_LSP_enabled, report_to_markdown_table, tree_to_markdown
+from codeflash.lsp.helpers import is_LSP_enabled, is_subagent_mode, report_to_markdown_table, tree_to_markdown
 from codeflash.lsp.lsp_message import LspCodeMessage, LspMarkdownMessage, LSPMessageId
 from codeflash.models.ExperimentMetadata import ExperimentMetadata
 from codeflash.models.models import (
@@ -1349,7 +1356,7 @@ def repair_optimization(
     def log_successful_optimization(
         self, explanation: Explanation, generated_tests: GeneratedTestsList, exp_type: str
     ) -> None:
-        if is_agent_mode():
+        if is_subagent_mode():
             return
         if is_LSP_enabled():
             md_lines = [
@@ -1741,7 +1748,7 @@ def generate_tests(
                 self.executor, testgen_context.markdown, helper_fqns, generated_test_paths, generated_perf_test_paths
             )
 
-        if is_agent_mode():
+        if is_subagent_mode():
             future_concolic_tests = None
         else:
             future_concolic_tests = self.executor.submit(
@@ -2213,8 +2220,8 @@ def process_review(
         self.optimization_review = opt_review_result.review
 
         # Display the reviewer result to the user
-        if is_agent_mode():
-            agent_log_optimization_result(
+        if is_subagent_mode():
+            subagent_log_optimization_result(
                 function_name=new_explanation.function_name,
                 file_path=new_explanation.file_path,
                 perf_improvement_line=new_explanation.perf_improvement_line,
diff --git a/codeflash/optimization/optimizer.py b/codeflash/optimization/optimizer.py
index 9214927f4..1db66d1b7 100644
--- a/codeflash/optimization/optimizer.py
+++ b/codeflash/optimization/optimizer.py
@@ -31,7 +31,7 @@
 from codeflash.code_utils.time_utils import humanize_runtime
 from codeflash.either import is_successful
 from codeflash.languages import current_language_support, is_javascript, set_current_language
-from codeflash.lsp.helpers import is_agent_mode
+from codeflash.lsp.helpers import is_subagent_mode
 from codeflash.models.models import ValidCode
 from codeflash.telemetry.posthog_cf import ph
 from codeflash.verification.verification_utils import TestConfig
@@ -658,7 +658,7 @@ def run(self) -> None:
                     if is_successful(best_optimization):
                         optimizations_found += 1
                         # create a diff patch for successful optimization
-                        if self.current_worktree and not is_agent_mode():
+                        if self.current_worktree and not is_subagent_mode():
                             best_opt = best_optimization.unwrap()
                             read_writable_code = best_opt.code_context.read_writable_code
                             relative_file_paths = [
@@ -691,7 +691,7 @@ def run(self) -> None:
                 self.functions_checkpoint.cleanup()
             if hasattr(self.args, "command") and self.args.command == "optimize":
                 self.cleanup_replay_tests()
-            if is_agent_mode():
+            if is_subagent_mode():
                 if optimizations_found == 0:
                     import sys
 

From a0983a60aa128b933a1a89128cad74392d6205a6 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 20 Feb 2026 16:17:28 -0500
Subject: [PATCH 7/8] fix: move is_subagent_mode() check to top of
 call_graph_summary

Avoids computing the full summary (callee counts, string formatting)
only to discard it when running in subagent mode.
---
 codeflash/cli_cmds/console.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/codeflash/cli_cmds/console.py b/codeflash/cli_cmds/console.py
index 232168880..1839c0ac0 100644
--- a/codeflash/cli_cmds/console.py
+++ b/codeflash/cli_cmds/console.py
@@ -365,6 +365,9 @@ def call_graph_summary(call_graph: DependencyResolver, file_to_funcs: dict[Path,
     if not total_functions:
         return
 
+    if is_subagent_mode():
+        return
+
     # Build the mapping expected by the dependency resolver
     file_items = file_to_funcs.items()
     mapping = {file_path: {func.qualified_name for func in funcs} for file_path, funcs in file_items}
@@ -386,9 +389,6 @@ def call_graph_summary(call_graph: DependencyResolver, file_to_funcs: dict[Path,
         f"Standalone: {leaf_functions}"
     )
 
-    if is_subagent_mode():
-        return
-
     if is_LSP_enabled():
         lsp_log(LspTextMessage(text=summary))
         return

From a4797b6d32c25a33b669e749280454e5f81ad5ae Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 20 Feb 2026 16:17:51 -0500
Subject: [PATCH 8/8] feat: add verification report to subagent XML output

Include a <verification> block with per-test-type pass/fail counts
in the XML emitted by subagent_log_optimization_result, matching
the correctness table shown in GitHub PR comments.
---
 codeflash/cli_cmds/console.py                | 24 ++++++++++++++++++++
 codeflash/optimization/function_optimizer.py |  1 +
 2 files changed, 25 insertions(+)

diff --git a/codeflash/cli_cmds/console.py b/codeflash/cli_cmds/console.py
index 1839c0ac0..8c6a9af9d 100644
--- a/codeflash/cli_cmds/console.py
+++ b/codeflash/cli_cmds/console.py
@@ -34,6 +34,7 @@
     from codeflash.discovery.functions_to_optimize import FunctionToOptimize
     from codeflash.languages.base import DependencyResolver, IndexResult
     from codeflash.lsp.lsp_message import LspMessage
+    from codeflash.models.models import TestResults
 
 DEBUG_MODE = logging.getLogger().getEffectiveLevel() == logging.DEBUG
 
@@ -406,12 +407,14 @@ def subagent_log_optimization_result(
     original_code: dict[Path, str],
     new_code: dict[Path, str],
     review: str,
+    test_results: TestResults,
 ) -> None:
     import sys
     from xml.sax.saxutils import escape
 
     from codeflash.code_utils.code_utils import unified_diff_strings
     from codeflash.code_utils.time_utils import humanize_runtime
+    from codeflash.models.test_type import TestType
 
     diff_parts = []
     for path in original_code:
@@ -427,6 +430,24 @@ def subagent_log_optimization_result(
     original_runtime = humanize_runtime(original_runtime_ns)
     optimized_runtime = humanize_runtime(best_runtime_ns)
 
+    report = test_results.get_test_pass_fail_report_by_type()
+    verification_rows = []
+    for test_type in TestType:
+        if test_type is TestType.INIT_STATE_TEST:
+            continue
+        name = test_type.to_name()
+        if not name:
+            continue
+        passed = report[test_type]["passed"]
+        failed = report[test_type]["failed"]
+        if passed == 0 and failed == 0:
+            status = "None Found"
+        elif failed > 0:
+            status = f"{failed} Failed, {passed} Passed"
+        else:
+            status = f"{passed} Passed"
+        verification_rows.append(f'    <test type="{escape(name)}" status="{escape(status)}"/>')
+
     xml = [
         "<codeflash-optimization>",
         f"  <function>{escape(function_name)}</function>",
@@ -438,6 +459,9 @@ def subagent_log_optimization_result(
     if review:
         xml.append(f"  <reviewer-confidence>{escape(review)}</reviewer-confidence>")
     xml.append(f"  <explanation>{escape(raw_explanation.strip())}</explanation>")
+    xml.append("  <verification>")
+    xml.extend(verification_rows)
+    xml.append("  </verification>")
     if diff_str:
         xml.append(f"  <diff>{escape(diff_str)}</diff>")
     for path in new_code:
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 83952d7d2..efccd9b57 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -2231,6 +2231,7 @@ def process_review(
                 original_code=original_code_combined,
                 new_code=new_code_combined,
                 review=opt_review_result.review,
+                test_results=new_explanation.winning_behavior_test_results,
             )
         elif opt_review_result.review:
             review_display = {