From 72517154c9fd8c2b96536e7c9a57050c3b823338 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Thu, 7 May 2026 20:13:58 -0500 Subject: [PATCH] feat: send concurrency metrics to LLM for async function optimization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For async functions, run the concurrency benchmark before submitting the optimization request so the LLM receives runtime proof that the function blocks (concurrency_ratio ≈ 1.0). This steers the model toward correct async optimizations (e.g. time.sleep → asyncio.sleep). Sync functions keep the existing parallel test-gen + optimization flow. --- codeflash/api/aiservice.py | 4 ++ codeflash/languages/function_optimizer.py | 67 +++++++++++++++++++---- 2 files changed, 59 insertions(+), 12 deletions(-) diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index 3127649f2..2c6fedba2 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -157,6 +157,7 @@ def optimize_code( n_candidates: int = 5, is_numerical_code: bool | None = None, rerun_trace_id: str | None = None, + concurrency_metrics: dict[str, float] | None = None, ) -> list[OptimizedCandidate]: """Optimize the given code for performance by making a request to the Django endpoint. @@ -200,6 +201,9 @@ def optimize_code( "rerun_trace_id": rerun_trace_id, } + if concurrency_metrics is not None: + payload["concurrency_metrics"] = concurrency_metrics + self.add_language_metadata(payload, language_version, module_system) # DEBUG: Print payload language field diff --git a/codeflash/languages/function_optimizer.py b/codeflash/languages/function_optimizer.py index 859e6ba16..46f3b3c1f 100644 --- a/codeflash/languages/function_optimizer.py +++ b/codeflash/languages/function_optimizer.py @@ -81,6 +81,7 @@ AIServiceRefinerRequest, BestOptimization, CandidateEvaluationContext, + ConcurrencyMetrics, GeneratedTests, GeneratedTestsList, OptimizationReviewResult, @@ -502,6 +503,7 @@ def __init__( self.experiment_id = os.getenv("CODEFLASH_EXPERIMENT_ID", None) self.local_aiservice_client = LocalAiServiceClient() if self.experiment_id else None self.test_files = TestFiles(test_files=[]) + self.cached_concurrency_metrics: ConcurrencyMetrics | None = None default_effort = getattr(args, "effort", EffortLevel.MEDIUM.value) if args else EffortLevel.MEDIUM.value self.effort = effort_override or default_effort @@ -788,20 +790,53 @@ def optimize_function(self) -> Result[BestOptimization, str]: ): console.rule() new_code_context = code_context - # Generate tests and optimizations in parallel - future_tests = self.executor.submit(self.generate_and_instrument_tests, new_code_context) - future_optimizations = self.executor.submit( - self.generate_optimizations, - read_writable_code=code_context.read_writable_code, - read_only_context_code=code_context.read_only_context_code, - run_experiment=should_run_experiment, - is_numerical_code=self.is_numerical_code and not self.args.no_jit_opts, - ) - concurrent.futures.wait([future_tests, future_optimizations]) + if self.function_to_optimize.is_async: + future_tests = self.executor.submit(self.generate_and_instrument_tests, new_code_context) + concurrent.futures.wait([future_tests]) + test_setup_result = future_tests.result() + + pre_optimization_concurrency_metrics: dict[str, float] | None = None + if is_successful(test_setup_result) and self.test_files.test_files: + test_env = self.get_test_env( + codeflash_loop_index=0, codeflash_test_iteration=0, codeflash_tracer_disable=1 + ) + metrics = self.run_concurrency_benchmark( + code_context=code_context, original_helper_code=original_helper_code, test_env=test_env + ) + if metrics is not None: + self.cached_concurrency_metrics = metrics + pre_optimization_concurrency_metrics = { + "concurrency_ratio": metrics.concurrency_ratio, + "sequential_time_ns": float(metrics.sequential_time_ns), + "concurrent_time_ns": float(metrics.concurrent_time_ns), + } + + future_optimizations = self.executor.submit( + self.generate_optimizations, + read_writable_code=code_context.read_writable_code, + read_only_context_code=code_context.read_only_context_code, + run_experiment=should_run_experiment, + is_numerical_code=self.is_numerical_code and not self.args.no_jit_opts, + concurrency_metrics=pre_optimization_concurrency_metrics, + ) + concurrent.futures.wait([future_optimizations]) + optimization_result = future_optimizations.result() + else: + future_tests = self.executor.submit(self.generate_and_instrument_tests, new_code_context) + future_optimizations = self.executor.submit( + self.generate_optimizations, + read_writable_code=code_context.read_writable_code, + read_only_context_code=code_context.read_only_context_code, + run_experiment=should_run_experiment, + is_numerical_code=self.is_numerical_code and not self.args.no_jit_opts, + ) + + concurrent.futures.wait([future_tests, future_optimizations]) + + test_setup_result = future_tests.result() + optimization_result = future_optimizations.result() - test_setup_result = future_tests.result() - optimization_result = future_optimizations.result() console.rule() if not is_successful(test_setup_result): @@ -1861,6 +1896,7 @@ def generate_optimizations( read_only_context_code: str, run_experiment: bool = False, is_numerical_code: bool | None = None, + concurrency_metrics: dict[str, float] | None = None, ) -> Result[tuple[OptimizationSet, str], str]: """Generate optimization candidates for the function. Backend handles multi-model diversity.""" n_candidates = get_effort_value(EffortKeys.N_OPTIMIZER_CANDIDATES, self.effort) @@ -1876,6 +1912,7 @@ def generate_optimizations( n_candidates=n_candidates, is_numerical_code=is_numerical_code, rerun_trace_id=self.rerun_trace_id, + concurrency_metrics=concurrency_metrics, ) future_references = self.executor.submit( @@ -1902,6 +1939,7 @@ def generate_optimizations( language_version=self.language_support.language_version, is_async=self.function_to_optimize.is_async, n_candidates=n_candidates, + concurrency_metrics=concurrency_metrics, ) futures.append(future_candidates_exp) @@ -3291,6 +3329,11 @@ def run_concurrency_benchmark( if not self.function_to_optimize.is_async: return None + if self.cached_concurrency_metrics is not None: + cached = self.cached_concurrency_metrics + self.cached_concurrency_metrics = None + return cached + from codeflash.code_utils.instrument_existing_tests import add_async_decorator_to_function try: