Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
a969187
refactor: replace jedi_definition with definition_type on FunctionSource
KRRT7 Feb 10, 2026
b69a713
feat: add persistent CallGraph class with SQLite caching
KRRT7 Feb 10, 2026
0078539
feat: wire CallGraph into the optimization pipeline
KRRT7 Feb 10, 2026
b604bf0
test: add unit and caching tests for CallGraph
KRRT7 Feb 10, 2026
5c4a65c
feat: add Rich Live visualization for call graph indexing
KRRT7 Feb 10, 2026
0c1397e
feat: enrich call graph display with cross-file tracking and dependen…
KRRT7 Feb 10, 2026
92be009
feat: rank functions by dependency count when no trace file is available
KRRT7 Feb 10, 2026
7d5638d
feat: parallelize call graph indexing with ProcessPoolExecutor
KRRT7 Feb 10, 2026
fada839
Merge branch 'main' into call-graphee
KRRT7 Feb 11, 2026
9cc1004
refactor: improve call graph live display and filter non-Python files
KRRT7 Feb 12, 2026
f3f0b0e
refactor: move CallGraph into Python language support layer
KRRT7 Feb 12, 2026
ed31cd1
refactor: filter files by language extensions and show project-relati…
KRRT7 Feb 12, 2026
5341ac8
fix: improve CLI output formatting for runtime estimate and call grap…
KRRT7 Feb 12, 2026
ee0da84
refactor: simplify call graph DB schema to two flat human-readable ta…
KRRT7 Feb 12, 2026
0a5e814
Update config_consts.py
KRRT7 Feb 12, 2026
13f8490
Merge branch 'main' into call-graphee
KRRT7 Feb 12, 2026
54aa7e1
fix: skip call graph building in CI and fix ruff formatting
KRRT7 Feb 12, 2026
c096c82
refactor: simplify compat.py by removing unnecessary class wrapper
KRRT7 Feb 12, 2026
bc29463
Merge branch 'main' into call-graphee
KRRT7 Feb 12, 2026
8555da0
fix: resolve mypy type errors in call_graph.py
github-actions[bot] Feb 12, 2026
513e590
fix: revert token limits back to 16K from unintended 100K increase
KRRT7 Feb 12, 2026
be4a2ca
feat: increase optimization and testgen token limits to 64K
KRRT7 Feb 12, 2026
9e90448
fix: use explicit token limits in tests to decouple from global constant
KRRT7 Feb 12, 2026
fc42548
test: update token limit tests for 64K default
KRRT7 Feb 12, 2026
80759c9
fix: add None guard for lang_support before accessing file_extensions
KRRT7 Feb 12, 2026
d7edef8
refactor: batch callee counting in call_graph_summary with new count_…
KRRT7 Feb 12, 2026
c3fdf31
refactor: use batch count_callees_per_function for dependency ranking…
KRRT7 Feb 12, 2026
4572783
fix: remove slots=True from dataclass for Python 3.9 compatibility
KRRT7 Feb 12, 2026
267dff9
Optimize call_graph_summary
codeflash-ai[bot] Feb 12, 2026
182c1b0
style: auto-fix linting issues
github-actions[bot] Feb 12, 2026
4523ac2
Optimize _analyze_imports_in_optimized_code
codeflash-ai[bot] Feb 12, 2026
0e284ad
style: auto-fix linting issues
github-actions[bot] Feb 12, 2026
e909182
Merge pull request #1462 from codeflash-ai/codeflash/optimize-pr1460-…
KRRT7 Feb 12, 2026
5ac33e1
Merge pull request #1463 from codeflash-ai/codeflash/optimize-pr1460-…
KRRT7 Feb 12, 2026
e962f73
Merge branch 'main' into call-graphee
KRRT7 Feb 13, 2026
bba3e0a
Merge branch 'main' into call-graphee
KRRT7 Feb 17, 2026
0bcc483
Merge branch 'main' into call-graphee
KRRT7 Feb 17, 2026
11543f0
fix: use (file_path, qualified_name) key in count_callees_per_function
KRRT7 Feb 17, 2026
aa10823
Merge branch 'main' into call-graphee
KRRT7 Feb 18, 2026
3012ec9
Merge branch 'main' into call-graphee
KRRT7 Feb 18, 2026
223af06
Merge branch 'main' into call-graphee
KRRT7 Feb 18, 2026
43d74a8
fix: use iterative DFS for function discovery to avoid RecursionError
KRRT7 Feb 18, 2026
5663985
style: auto-fix linting issues
github-actions[bot] Feb 18, 2026
890c466
Optimize find_functions_with_return_statement
codeflash-ai[bot] Feb 18, 2026
88b0ee5
fix: batch SQL queries and deduplicate Path.resolve() in call graph
KRRT7 Feb 18, 2026
9b0606a
Merge pull request #1534 from codeflash-ai/codeflash/optimize-pr1460-…
KRRT7 Feb 18, 2026
f7b0401
Optimize function_has_return_statement
codeflash-ai[bot] Feb 18, 2026
35d4d92
Merge branch 'main' into call-graphee
KRRT7 Feb 18, 2026
392453a
Merge pull request #1535 from codeflash-ai/codeflash/optimize-pr1460-…
KRRT7 Feb 18, 2026
2652e71
Merge remote-tracking branch 'origin/main' into call-graphee
KRRT7 Feb 19, 2026
522969b
fix: restore call_graph parameter to get_code_optimization_context
KRRT7 Feb 19, 2026
f1c707a
Simplify dependency summary output
KRRT7 Feb 19, 2026
12f36fb
Clarify call graph UI text
KRRT7 Feb 19, 2026
09ad2f5
Rename Python call graph to reference graph
KRRT7 Feb 19, 2026
cb91158
refactor: rename test file and imports to match reference graph rename
KRRT7 Feb 19, 2026
82783f8
disable it for now
KRRT7 Feb 19, 2026
d2dea5c
fix: remove stale jedi_definition argument from FunctionSource calls
KRRT7 Feb 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 154 additions & 1 deletion codeflash/cli_cmds/console.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from __future__ import annotations

import logging
from collections import deque
from contextlib import contextmanager
from itertools import cycle
from typing import TYPE_CHECKING, Optional

from rich.console import Console
from rich.logging import RichHandler
from rich.panel import Panel
from rich.progress import (
BarColumn,
MofNCompleteColumn,
Expand All @@ -24,10 +26,13 @@
from codeflash.lsp.lsp_message import LspCodeMessage, LspTextMessage

if TYPE_CHECKING:
from collections.abc import Generator
from collections.abc import Callable, Generator
from pathlib import Path

from rich.progress import TaskID

from codeflash.discovery.functions_to_optimize import FunctionToOptimize
from codeflash.languages.base import DependencyResolver, IndexResult
from codeflash.lsp.lsp_message import LspMessage

DEBUG_MODE = logging.getLogger().getEffectiveLevel() == logging.DEBUG
Expand Down Expand Up @@ -196,3 +201,151 @@ def test_files_progress_bar(total: int, description: str) -> Generator[tuple[Pro
) as progress:
task_id = progress.add_task(description, total=total)
yield progress, task_id


MAX_TREE_ENTRIES = 8


@contextmanager
def call_graph_live_display(
total: int, project_root: Path | None = None
) -> Generator[Callable[[IndexResult], None], None, None]:
from rich.console import Group
from rich.live import Live
from rich.panel import Panel
from rich.text import Text
from rich.tree import Tree

if is_LSP_enabled():
lsp_log(LspTextMessage(text="Building call graph", takes_time=True))
yield lambda _: None
return

progress = Progress(
SpinnerColumn(next(spinners)),
TextColumn("[progress.description]{task.description}"),
BarColumn(complete_style="cyan", finished_style="green", pulse_style="yellow"),
MofNCompleteColumn(),
TimeElapsedColumn(),
TimeRemainingColumn(),
auto_refresh=False,
)
task_id = progress.add_task("Analyzing files", total=total)

results: deque[IndexResult] = deque(maxlen=MAX_TREE_ENTRIES)
stats = {"indexed": 0, "cached": 0, "edges": 0, "external": 0, "errors": 0}

tree = Tree("[bold]Recent Files[/bold]")
stats_text = Text("0 calls found", style="dim")
panel = Panel(
Group(progress, Text(""), tree, Text(""), stats_text), title="Building Call Graph", border_style="cyan"
)

def create_tree_node(result: IndexResult) -> Tree:
if project_root:
try:
name = str(result.file_path.resolve().relative_to(project_root.resolve()))
except ValueError:
name = f"{result.file_path.parent.name}/{result.file_path.name}"
else:
name = f"{result.file_path.parent.name}/{result.file_path.name}"

if result.error:
return Tree(f"[red]{name} (error)[/red]")

if result.cached:
return Tree(f"[dim]{name} (cached)[/dim]")

local_edges = result.num_edges - result.cross_file_edges
edge_info = []

if local_edges:
edge_info.append(f"{local_edges} calls in same file")
if result.cross_file_edges:
edge_info.append(f"{result.cross_file_edges} calls from other modules")

label = ", ".join(edge_info) if edge_info else "no calls"
return Tree(f"[cyan]{name}[/cyan] [dim]{label}[/dim]")

def refresh_display() -> None:
tree.children = [create_tree_node(r) for r in results]
tree.children.extend([Tree(" ")] * (MAX_TREE_ENTRIES - len(results)))

# Update stats
stat_parts = []
if stats["indexed"]:
stat_parts.append(f"{stats['indexed']} files analyzed")
if stats["cached"]:
stat_parts.append(f"{stats['cached']} cached")
if stats["errors"]:
stat_parts.append(f"{stats['errors']} errors")
stat_parts.append(f"{stats['edges']} calls found")
if stats["external"]:
stat_parts.append(f"{stats['external']} cross-file calls")

stats_text.truncate(0)
stats_text.append(" · ".join(stat_parts), style="dim")

batch: list[IndexResult] = []

def process_batch() -> None:
for result in batch:
results.append(result)

if result.error:
stats["errors"] += 1
elif result.cached:
stats["cached"] += 1
else:
stats["indexed"] += 1
stats["edges"] += result.num_edges
stats["external"] += result.cross_file_edges

progress.advance(task_id)

batch.clear()
refresh_display()
live.refresh()

def update(result: IndexResult) -> None:
batch.append(result)
if len(batch) >= 8:
process_batch()

with Live(panel, console=console, transient=False, auto_refresh=False) as live:
yield update
if batch:
process_batch()


def call_graph_summary(call_graph: DependencyResolver, file_to_funcs: dict[Path, list[FunctionToOptimize]]) -> None:
total_functions = sum(map(len, file_to_funcs.values()))
if not total_functions:
return

# Build the mapping expected by the dependency resolver
file_items = file_to_funcs.items()
mapping = {file_path: {func.qualified_name for func in funcs} for file_path, funcs in file_items}

callee_counts = call_graph.count_callees_per_function(mapping)

# Use built-in sum for C-level loops to reduce Python overhead
total_callees = sum(callee_counts.values())
with_context = sum(1 for count in callee_counts.values() if count > 0)

leaf_functions = total_functions - with_context
avg_callees = total_callees / total_functions

function_label = "function" if total_functions == 1 else "functions"

summary = (
f"{total_functions} {function_label} ready for optimization\n"
f"Uses other functions: {with_context} · "
f"Standalone: {leaf_functions}"
)

if is_LSP_enabled():
lsp_log(LspTextMessage(text=summary))
return

console.print(Panel(summary, title="Call Graph Summary", border_style="cyan"))
3 changes: 1 addition & 2 deletions codeflash/code_utils/code_replacer.py
Original file line number Diff line number Diff line change
Expand Up @@ -871,8 +871,7 @@ def replace_optimized_code(
[
callee.qualified_name
for callee in code_context.helper_functions
if callee.file_path == module_path
and (callee.jedi_definition is None or callee.jedi_definition.type != "class")
if callee.file_path == module_path and callee.definition_type != "class"
]
),
candidate.source_code,
Expand Down
44 changes: 7 additions & 37 deletions codeflash/code_utils/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,46 +2,16 @@
import sys
import tempfile
from pathlib import Path
from typing import TYPE_CHECKING

from platformdirs import user_config_dir

if TYPE_CHECKING:
codeflash_temp_dir: Path
codeflash_cache_dir: Path
codeflash_cache_db: Path
LF: str = os.linesep
IS_POSIX: bool = os.name != "nt"
SAFE_SYS_EXECUTABLE: str = Path(sys.executable).as_posix()

codeflash_cache_dir: Path = Path(user_config_dir(appname="codeflash", appauthor="codeflash-ai", ensure_exists=True))

class Compat:
# os-independent newline
LF: str = os.linesep
codeflash_temp_dir: Path = Path(tempfile.gettempdir()) / "codeflash"
codeflash_temp_dir.mkdir(parents=True, exist_ok=True)

SAFE_SYS_EXECUTABLE: str = Path(sys.executable).as_posix()

IS_POSIX: bool = os.name != "nt"

@property
def codeflash_cache_dir(self) -> Path:
return Path(user_config_dir(appname="codeflash", appauthor="codeflash-ai", ensure_exists=True))

@property
def codeflash_temp_dir(self) -> Path:
temp_dir = Path(tempfile.gettempdir()) / "codeflash"
if not temp_dir.exists():
temp_dir.mkdir(parents=True, exist_ok=True)
return temp_dir

@property
def codeflash_cache_db(self) -> Path:
return self.codeflash_cache_dir / "codeflash_cache.db"


_compat = Compat()


codeflash_temp_dir = _compat.codeflash_temp_dir
codeflash_cache_dir = _compat.codeflash_cache_dir
codeflash_cache_db = _compat.codeflash_cache_db
LF = _compat.LF
SAFE_SYS_EXECUTABLE = _compat.SAFE_SYS_EXECUTABLE
IS_POSIX = _compat.IS_POSIX
codeflash_cache_db: Path = codeflash_cache_dir / "codeflash_cache.db"
4 changes: 2 additions & 2 deletions codeflash/code_utils/config_consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from typing import Any, Union

MAX_TEST_RUN_ITERATIONS = 5
OPTIMIZATION_CONTEXT_TOKEN_LIMIT = 48000
TESTGEN_CONTEXT_TOKEN_LIMIT = 48000
OPTIMIZATION_CONTEXT_TOKEN_LIMIT = 64000
TESTGEN_CONTEXT_TOKEN_LIMIT = 64000
INDIVIDUAL_TESTCASE_TIMEOUT = 15
MAX_FUNCTION_TEST_SECONDS = 60
MIN_IMPROVEMENT_THRESHOLD = 0.05
Expand Down
65 changes: 35 additions & 30 deletions codeflash/discovery/functions_to_optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,32 +114,30 @@ def visit_FunctionDef(self, node: cst.FunctionDef) -> None:
)


class FunctionWithReturnStatement(ast.NodeVisitor):
def __init__(self, file_path: Path) -> None:
self.functions: list[FunctionToOptimize] = []
self.ast_path: list[FunctionParent] = []
self.file_path: Path = file_path

def visit_FunctionDef(self, node: FunctionDef) -> None:
if function_has_return_statement(node) and not function_is_a_property(node):
self.functions.append(
FunctionToOptimize(function_name=node.name, file_path=self.file_path, parents=self.ast_path[:])
)

def visit_AsyncFunctionDef(self, node: AsyncFunctionDef) -> None:
if function_has_return_statement(node) and not function_is_a_property(node):
self.functions.append(
FunctionToOptimize(
function_name=node.name, file_path=self.file_path, parents=self.ast_path[:], is_async=True
def find_functions_with_return_statement(ast_module: ast.Module, file_path: Path) -> list[FunctionToOptimize]:
results: list[FunctionToOptimize] = []
# (node, parent_path) — iterative DFS avoids RecursionError on deeply nested ASTs
stack: list[tuple[ast.AST, list[FunctionParent]]] = [(ast_module, [])]
while stack:
node, ast_path = stack.pop()
if isinstance(node, (FunctionDef, AsyncFunctionDef)):
if function_has_return_statement(node) and not function_is_a_property(node):
results.append(
FunctionToOptimize(
function_name=node.name,
file_path=file_path,
parents=ast_path[:],
is_async=isinstance(node, AsyncFunctionDef),
)
)
)

def generic_visit(self, node: ast.AST) -> None:
if isinstance(node, (FunctionDef, AsyncFunctionDef, ClassDef)):
self.ast_path.append(FunctionParent(node.name, node.__class__.__name__))
super().generic_visit(node)
if isinstance(node, (FunctionDef, AsyncFunctionDef, ClassDef)):
self.ast_path.pop()
# Don't recurse into function bodies (matches original visitor behaviour)
continue
child_path = (
[*ast_path, FunctionParent(node.name, node.__class__.__name__)] if isinstance(node, ClassDef) else ast_path
)
for child in reversed(list(ast.iter_child_nodes(node))):
stack.append((child, child_path))
return results


# =============================================================================
Expand Down Expand Up @@ -265,9 +263,7 @@ def _find_all_functions_in_python_file(file_path: Path) -> dict[Path, list[Funct
if DEBUG_MODE:
logger.exception(e)
return functions
function_name_visitor = FunctionWithReturnStatement(file_path)
function_name_visitor.visit(ast_module)
functions[file_path] = function_name_visitor.functions
functions[file_path] = find_functions_with_return_statement(ast_module, file_path)
return functions


Expand Down Expand Up @@ -992,12 +988,21 @@ def filter_files_optimized(file_path: Path, tests_root: Path, ignore_paths: list

def function_has_return_statement(function_node: FunctionDef | AsyncFunctionDef) -> bool:
# Custom DFS, return True as soon as a Return node is found
stack: list[ast.AST] = [function_node]
stack: list[ast.AST] = list(function_node.body)
while stack:
node = stack.pop()
if isinstance(node, ast.Return):
return True
stack.extend(ast.iter_child_nodes(node))
# Only push child nodes that are statements; Return nodes are statements,
# so this preserves correctness while avoiding unnecessary traversal into expr/Name/etc.
for field in getattr(node, "_fields", ()):
child = getattr(node, field, None)
if isinstance(child, list):
for item in child:
if isinstance(item, ast.stmt):
stack.append(item)
elif isinstance(child, ast.stmt):
stack.append(child)
return False


Expand Down
4 changes: 4 additions & 0 deletions codeflash/languages/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@

from codeflash.languages.base import (
CodeContext,
DependencyResolver,
HelperFunction,
IndexResult,
Language,
LanguageSupport,
ParentInfo,
Expand Down Expand Up @@ -82,8 +84,10 @@ def __getattr__(name: str):

__all__ = [
"CodeContext",
"DependencyResolver",
"FunctionInfo",
"HelperFunction",
"IndexResult",
"Language",
"LanguageSupport",
"ParentInfo",
Expand Down
Loading
Loading