codeflash-ai · KRRT7 · Feb 20, 2026 · Feb 3, 2026 · Feb 3, 2026 · Feb 3, 2026
diff --git a/.claude/rules/architecture.md b/.claude/rules/architecture.md
@@ -26,3 +26,17 @@ codeflash/
 ├── result/                 # Result types and handling
 └── version.py              # Version information
 ```
+
+## Key Entry Points
+
+| Task | Start here |
+|------|------------|
+| CLI arguments & commands | `cli_cmds/cli.py` |
+| Optimization orchestration | `optimization/optimizer.py` → `run()` |
+| Per-function optimization | `optimization/function_optimizer.py` |
+| Function discovery | `discovery/functions_to_optimize.py` |
+| Context extraction | `context/code_context_extractor.py` |
+| Test execution | `verification/test_runner.py`, `verification/pytest_plugin.py` |
+| Performance ranking | `benchmarking/function_ranker.py` |
+| Domain types | `models/models.py`, `models/function_types.py` |
+| Result handling | `either.py` (`Result`, `Success`, `Failure`, `is_successful`) |
diff --git a/.claude/rules/code-style.md b/.claude/rules/code-style.md
@@ -2,6 +2,7 @@
 
 - **Line length**: 120 characters
 - **Python**: 3.9+ syntax
+- **Package management**: Always use `uv`, never `pip`
 - **Tooling**: Ruff for linting/formatting, mypy strict mode, prek for pre-commit checks
 - **Comments**: Minimal - only explain "why", not "what"
 - **Docstrings**: Do not add unless explicitly requested

diff --git a/.claude/rules/git.md b/.claude/rules/git.md
@@ -1,5 +1,6 @@
 # Git Commits & Pull Requests
 
+- **Always create a new branch from `main` before starting any new work** — never commit directly to `main` or reuse an existing feature branch for unrelated changes
 - Use conventional commit format: `fix:`, `feat:`, `refactor:`, `docs:`, `test:`, `chore:`
 - Keep commits atomic - one logical change per commit
 - Commit message body should be concise (1-2 sentences max)

diff --git a/.claude/rules/language-patterns.md b/.claude/rules/language-patterns.md
@@ -0,0 +1,12 @@
+---
+paths:
+  - "codeflash/languages/**/*.py"
+---
+
+# Language Support Patterns
+
+- Current language is a module-level singleton in `languages/current.py` — use `set_current_language()` / `current_language()`, never pass language as a parameter through call chains
+- Use `get_language_support(identifier)` from `languages/registry.py` to get a `LanguageSupport` instance — never import language classes directly
+- New language support classes must use the `@register_language` decorator to register with the extension and language registries
+- `languages/__init__.py` uses `__getattr__` for lazy imports to avoid circular dependencies — follow this pattern when adding new exports
+- `is_javascript()` returns `True` for both JavaScript and TypeScript
diff --git a/.claude/rules/optimization-patterns.md b/.claude/rules/optimization-patterns.md
@@ -0,0 +1,17 @@
+---
+paths:
+  - "codeflash/optimization/**/*.py"
+  - "codeflash/verification/**/*.py"
+  - "codeflash/benchmarking/**/*.py"
+  - "codeflash/context/**/*.py"
+---
+
+# Optimization Pipeline Patterns
+
+- All major operations return `Result[SuccessType, ErrorType]` — construct with `Success(value)` / `Failure(error)`, check with `is_successful()` before calling `unwrap()`
+- Code context has token limits (`OPTIMIZATION_CONTEXT_TOKEN_LIMIT`, `TESTGEN_CONTEXT_TOKEN_LIMIT` in `config_consts.py`) — exceeding them rejects the function
+- `read_writable_code` can span multiple files; `read_only_context_code` is reference-only
+- Code is serialized as markdown code blocks: ` ```language:filepath\ncode\n``` ` (see `CodeStringsMarkdown`)
+- Candidates form a forest (DAG): refinements/repairs reference `parent_id` on previous candidates
+- Test generation and optimization run concurrently — coordinate through `CandidateEvaluationContext`
+- Generated tests are instrumented with `codeflash_capture.py` to record return values and traces
diff --git a/.claude/rules/source-code.md b/.claude/rules/source-code.md
@@ -6,6 +6,3 @@ paths:
 # Source Code Rules
 
 - Use `libcst` for code modification/transformation to preserve formatting. `ast` is acceptable for read-only analysis and parsing.
-- NEVER use leading underscores for function names (e.g., `_helper`). Python has no true private functions. Always use public names.
-- Any new feature or bug fix that can be tested automatically must have test cases.
-- If changes affect existing test expectations, update the tests accordingly. Tests must always pass after changes.
diff --git a/.claude/rules/testing.md b/.claude/rules/testing.md
@@ -13,3 +13,5 @@ paths:
 - Use `.as_posix()` when converting resolved paths to strings (normalizes to forward slashes).
 - Any new feature or bug fix that can be tested automatically must have test cases.
 - If changes affect existing test expectations, update the tests accordingly. Tests must always pass after changes.
+- The pytest plugin patches `time`, `random`, `uuid`, and `datetime` for deterministic test execution — never assume real randomness or real time in verification tests.
+- `conftest.py` uses an autouse fixture that calls `reset_current_language()` — tests always start with Python as the default language.
diff --git a/.claude/skills/fix-mypy.md b/.claude/skills/fix-mypy.md
@@ -0,0 +1,12 @@
+# Fix mypy errors
+
+When modifying code, fix any mypy type errors in the files you changed:
+
+```bash
+uv run mypy --non-interactive --config-file pyproject.toml <changed_files>
+```
+
+- Fix type annotation issues: missing return types, incorrect types, Optional/None unions, import errors for type hints
+- Do NOT add `# type: ignore` comments — always fix the root cause
+- Do NOT fix type errors that require logic changes, complex generic type rework, or anything that could change runtime behavior
+- Files in `mypy_allowlist.txt` are checked in CI — ensure they remain error-free
diff --git a/.claude/skills/fix-prek.md b/.claude/skills/fix-prek.md
@@ -0,0 +1,9 @@
+# Fix prek failures
+
+When prek (pre-commit) checks fail:
+
+1. Run `uv run prek run` to see failures (local, checks staged files)
+2. In CI, the equivalent is `uv run prek run --from-ref origin/main`
+3. prek runs ruff format, ruff check, and mypy on changed files
+4. Fix issues in order: formatting → lint → type errors
+5. Re-run `uv run prek run` to verify all checks pass
diff --git a/.codex/skills/.gitignore b/.codex/skills/.gitignore
@@ -0,0 +1,2 @@
+# Managed by Tessl
+tessl:*
diff --git a/.gemini/skills/.gitignore b/.gemini/skills/.gitignore
@@ -0,0 +1,2 @@
+# Managed by Tessl
+tessl:*
diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml
@@ -48,7 +48,7 @@ jobs:
         with:
           use_foundry: "true"
           use_sticky_comment: true
-          allowed_bots: "claude[bot]"
+          allowed_bots: "claude[bot],codeflash-ai[bot]"
           prompt: |
             REPO: ${{ github.repository }}
             PR NUMBER: ${{ github.event.pull_request.number }}

diff --git a/.github/workflows/duplicate-code-detector.yml b/.github/workflows/duplicate-code-detector.yml
@@ -0,0 +1,114 @@
+name: Duplicate Code Detector
+
+on:
+  workflow_dispatch:
+  pull_request:
+    types: [opened, synchronize]
+
+jobs:
+  detect-duplicates:
+    if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name == 'workflow_dispatch'
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: write
+      issues: write
+      id-token: write
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          ref: ${{ github.event.pull_request.head.ref || github.ref }}
+
+      - name: Start Serena MCP server
+        run: |
+          docker pull ghcr.io/github/serena-mcp-server:latest
+          docker run -d --name serena \
+            --network host \
+            -v "${{ github.workspace }}:${{ github.workspace }}:rw" \
+            ghcr.io/github/serena-mcp-server:latest \
+            serena start-mcp-server --context codex --project "${{ github.workspace }}"
+
+          mkdir -p /tmp/mcp-config
+          cat > /tmp/mcp-config/mcp-servers.json << 'EOF'
+          {
+            "mcpServers": {
+              "serena": {
+                "command": "docker",
+                "args": ["exec", "-i", "serena", "serena", "start-mcp-server", "--context", "codex", "--project", "${{ github.workspace }}"]
+              }
+            }
+          }
+          EOF
+
+      - name: Run Claude Code
+        uses: anthropics/claude-code-action@v1
+        with:
+          use_foundry: "true"
+          use_sticky_comment: true
+          allowed_bots: "claude[bot],codeflash-ai[bot]"
+          claude_args: '--mcp-config /tmp/mcp-config/mcp-servers.json --allowedTools "Read,Glob,Grep,Bash(git diff:*),Bash(git log:*),Bash(git show:*),Bash(wc *),Bash(find *),mcp__serena__*"'
+          prompt: |
+            You are a duplicate code detector with access to Serena semantic code analysis.
+
+            ## Setup
+
+            First activate the project in Serena:
+            - Use `mcp__serena__activate_project` with the workspace path `${{ github.workspace }}`
+
+            ## Steps
+
+            1. Get the list of changed .py files (excluding tests):
+               `git diff --name-only origin/main...HEAD -- '*.py' | grep -v -E '(test_|_test\.py|/tests/|/test/)'`
+
+            2. Use Serena's semantic analysis on changed files:
+               - `mcp__serena__get_symbols_overview` to understand file structure
+               - `mcp__serena__find_symbol` to search for similarly named symbols across the codebase
+               - `mcp__serena__find_referencing_symbols` to understand usage patterns
+               - `mcp__serena__search_for_pattern` to find similar code patterns
+
+            3. For each changed file, look for:
+               - **Exact Duplication**: Identical code blocks (>10 lines) in multiple locations
+               - **Structural Duplication**: Same logic with minor variations (different variable names)
+               - **Functional Duplication**: Different implementations of the same functionality
+               - **Copy-Paste Programming**: Similar blocks that could be extracted into shared utilities
+
+            4. Cross-reference against the rest of the codebase using Serena:
+               - Search for similar function signatures and logic patterns
+               - Check if new code duplicates existing utilities or helpers
+               - Look for repeated patterns across modules
+
+            ## What to Report
+
+            - Identical or nearly identical functions in different files
+            - Repeated code blocks that could be extracted to utilities
+            - Similar classes or modules with overlapping functionality
+            - Copy-pasted code with minor modifications
+            - Duplicated business logic across components
+
+            ## What to Skip
+
+            - Standard boilerplate (imports, __init__, etc.)
+            - Test setup/teardown code
+            - Configuration with similar structure
+            - Language-specific patterns (constructors, getters/setters)
+            - Small snippets (<5 lines) unless highly repetitive
+            - Workflow files under .github/
+
+            ## Output
+
+            Post a single PR comment with your findings. For each pattern found:
+            - Severity (High/Medium/Low)
+            - File locations with line numbers
+            - Code samples showing the duplication
+            - Concrete refactoring suggestion
+
+            If no significant duplication is found, say so briefly. Do not create issues — just comment on the PR.
+        env:
+          ANTHROPIC_FOUNDRY_API_KEY: ${{ secrets.AZURE_ANTHROPIC_API_KEY }}
+          ANTHROPIC_FOUNDRY_BASE_URL: ${{ secrets.AZURE_ANTHROPIC_ENDPOINT }}
+
+      - name: Stop Serena
+        if: always()
+        run: docker stop serena && docker rm serena || true
diff --git a/.mcp.json b/.mcp.json
@@ -0,0 +1,12 @@
+{
+  "mcpServers": {
+    "tessl": {
+      "type": "stdio",
+      "command": "tessl",
+      "args": [
+        "mcp",
+        "start"
+      ]
+    }
+  }
+}
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -1,37 +1,32 @@
 # CLAUDE.md
 
-This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
-
 ## Project Overview
 
 CodeFlash is an AI-powered Python code optimizer that automatically improves code performance while maintaining correctness. It uses LLMs to generate optimization candidates, verifies correctness through test execution, and benchmarks performance improvements.
 
-## Common Commands
-
-```bash
-# Package management (NEVER use pip)
-uv sync                          # Install dependencies
-uv sync --group dev              # Install dev dependencies
-uv add <package>                 # Add a package
-
-# Running tests
-uv run pytest tests/             # Run all tests
-uv run pytest tests/test_foo.py  # Run specific test file
-uv run pytest tests/test_foo.py::test_bar -v  # Run single test
-
-# Type checking and linting
-uv run mypy codeflash/           # Type check
-uv run ruff check codeflash/     # Lint
-uv run ruff format codeflash/    # Format
+## Optimization Pipeline
 
-# Linting (run before committing)
-uv run prek run --from-ref origin/main
-
-# Running the CLI
-uv run codeflash --help
-uv run codeflash init            # Initialize in a project
-uv run codeflash --all           # Optimize entire codebase
 ```
+Discovery → Ranking → Context Extraction → Test Gen + Optimization → Baseline → Candidate Evaluation → PR
+```
+
+1. **Discovery** (`discovery/`): Find optimizable functions across the codebase
+2. **Ranking** (`benchmarking/function_ranker.py`): Rank functions by addressable time using trace data
+3. **Context** (`context/`): Extract code dependencies (read-writable code + read-only imports)
+4. **Optimization** (`optimization/`, `api/`): Generate candidates via AI service, run in parallel with test generation
+5. **Verification** (`verification/`): Run candidates against tests, compare outputs via custom pytest plugin
+6. **Benchmarking** (`benchmarking/`): Measure performance, select best candidate by speedup
+7. **Result** (`result/`, `github/`): Create PR with winning optimization
+
+## Domain Glossary
+
+- **Optimization candidate**: A generated code variant that might be faster (`OptimizedCandidate`)
+- **Function context**: All code needed for optimization — split into read-writable (modifiable) and read-only (reference)
+- **Addressable time**: Time a function spends that could be optimized (own time + callee time / call count)
+- **Candidate forest**: DAG of candidates where refinements/repairs build on previous candidates
+- **Replay test**: Test generated from recorded benchmark data to reproduce real workloads
+- **Tracer**: Profiling system that records function call trees and timings (`tracing/`, `tracer.py`)
+- **Worktree mode**: Git worktree-based parallel optimization (`--worktree` flag)
 
 <!-- Section below is auto-generated by `tessl install` - do not edit manually -->
 

diff --git a/code_to_optimize/js/code_to_optimize_js/bubble_sort.js b/code_to_optimize/js/code_to_optimize_js/bubble_sort.js
@@ -11,14 +11,21 @@ function bubbleSort(arr) {
     const result = arr.slice();
     const n = result.length;
 
-    for (let i = 0; i < n; i++) {
-        for (let j = 0; j < n - 1; j++) {
-            if (result[j] > result[j + 1]) {
-                const temp = result[j];
-                result[j] = result[j + 1];
-                result[j + 1] = temp;
+    if (n <= 1) return result;
+
+    for (let i = 0; i < n - 1; i++) {
+        let swapped = false;
+        const limit = n - i - 1;
+        for (let j = 0; j < limit; j++) {
+            const a = result[j];
+            const b = result[j + 1];
+            if (a > b) {
+                result[j] = b;
+                result[j + 1] = a;
+                swapped = true;
             }
         }
+        if (!swapped) break;
     }
 
     return result;

diff --git a/code_to_optimize/js/code_to_optimize_vitest/package-lock.json b/code_to_optimize/js/code_to_optimize_vitest/package-lock.json
diff --git a/codeflash/benchmarking/trace_benchmarks.py b/codeflash/benchmarking/trace_benchmarks.py
@@ -1,23 +1,18 @@
 from __future__ import annotations
 
-import os
 import re
 import subprocess
 from pathlib import Path
 
 from codeflash.cli_cmds.console import logger
 from codeflash.code_utils.compat import SAFE_SYS_EXECUTABLE
-from codeflash.code_utils.shell_utils import get_cross_platform_subprocess_run_args
+from codeflash.code_utils.shell_utils import get_cross_platform_subprocess_run_args, make_env_with_project_root
 
 
 def trace_benchmarks_pytest(
     benchmarks_root: Path, tests_root: Path, project_root: Path, trace_file: Path, timeout: int = 300
 ) -> None:
-    benchmark_env = os.environ.copy()
-    if "PYTHONPATH" not in benchmark_env:
-        benchmark_env["PYTHONPATH"] = str(project_root)
-    else:
-        benchmark_env["PYTHONPATH"] += os.pathsep + str(project_root)
+    benchmark_env = make_env_with_project_root(project_root)
     run_args = get_cross_platform_subprocess_run_args(
         cwd=project_root, env=benchmark_env, timeout=timeout, check=False, text=True, capture_output=True
     )