diff --git a/npx/python/cli/local_fetcher.py b/npx/python/cli/local_fetcher.py new file mode 100644 index 0000000..8d36085 --- /dev/null +++ b/npx/python/cli/local_fetcher.py @@ -0,0 +1,296 @@ +"""Local directory context building for code review.""" + +import os +import subprocess +from pathlib import Path +from typing import Optional + + +def validate_local_path(path: str) -> str: + """Resolve and validate a local directory path. + + Args: + path: Directory path (relative or absolute) + + Returns: + Absolute path as string + + Raises: + ValueError: If path doesn't exist or is not a directory + """ + abs_path = os.path.abspath(os.path.expanduser(path)) + if not os.path.isdir(abs_path): + raise ValueError(f"Path is not a valid directory: {path}") + return abs_path + + +def is_git_repo(path: str) -> bool: + """Check if a directory is a git repository. + + Args: + path: Directory path + + Returns: + True if .git exists or git rev-parse succeeds + """ + git_dir = os.path.join(path, ".git") + if os.path.exists(git_dir): + return True + + try: + result = subprocess.run( + ["git", "rev-parse", "--git-dir"], + cwd=path, + capture_output=True, + text=True, + timeout=10, + ) + return result.returncode == 0 + except (subprocess.TimeoutExpired, FileNotFoundError): + return False + + +def get_git_diff(path: str) -> Optional[str]: + """Get git diff (staged + unstaged changes). + + Args: + path: Directory path + + Returns: + Combined diff string or None if not a git repo or no changes + """ + if not is_git_repo(path): + return None + + try: + # Get unstaged changes + unstaged = subprocess.run( + ["git", "diff"], + cwd=path, + capture_output=True, + text=True, + timeout=10, + ) + + # Get staged changes + staged = subprocess.run( + ["git", "diff", "--staged"], + cwd=path, + capture_output=True, + text=True, + timeout=10, + ) + + diff_output = (staged.stdout + unstaged.stdout).strip() + + if not diff_output: + return None + + # Truncate if exceeds 100KB + if len(diff_output) > 100 * 1024: + diff_output = diff_output[:100 * 1024] + "\n... (truncated)" + + return diff_output + except (subprocess.TimeoutExpired, FileNotFoundError): + return None + + +def get_git_info(path: str) -> Optional[dict]: + """Get git repository information. + + Args: + path: Directory path + + Returns: + Dict with branch, commits, repo_name or None if not a git repo + """ + if not is_git_repo(path): + return None + + try: + # Get branch name + branch_result = subprocess.run( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], + cwd=path, + capture_output=True, + text=True, + timeout=10, + ) + branch = branch_result.stdout.strip() if branch_result.returncode == 0 else "unknown" + + # Get recent commits + commits_result = subprocess.run( + ["git", "log", "--oneline", "-10"], + cwd=path, + capture_output=True, + text=True, + timeout=10, + ) + commits = commits_result.stdout.strip().split("\n") if commits_result.returncode == 0 else [] + + # Get repo name from directory or git remote + repo_name = os.path.basename(path) + try: + remote_result = subprocess.run( + ["git", "config", "--get", "remote.origin.url"], + cwd=path, + capture_output=True, + text=True, + timeout=10, + ) + if remote_result.returncode == 0: + remote_url = remote_result.stdout.strip() + # Extract repo name from URL + repo_name = remote_url.split("/")[-1].replace(".git", "") + except (subprocess.TimeoutExpired, FileNotFoundError): + pass + + return { + "branch": branch, + "commits": [c for c in commits if c], + "repo_name": repo_name, + } + except (subprocess.TimeoutExpired, FileNotFoundError): + return None + + +def get_project_structure(path: str, max_depth: int = 3, max_files: int = 200) -> str: + """Generate a tree-like representation of the project structure. + + Args: + path: Directory path + max_depth: Maximum directory depth to traverse + max_files: Maximum number of files to include + + Returns: + Tree-like string representation + """ + ignore_patterns = { + ".git", "node_modules", "__pycache__", ".venv", "venv", + ".env", "dist", "build", ".next", ".tox", ".pytest_cache", + ".mypy_cache", ".coverage", "*.pyc", ".DS_Store", ".idea", + ".vscode", "*.egg-info", ".gradle", "target", "out", + } + + def should_ignore(name: str) -> bool: + """Check if a file/dir should be ignored.""" + if name.startswith("."): + return True + for pattern in ignore_patterns: + if pattern.startswith("*"): + if name.endswith(pattern[1:]): + return True + elif name == pattern: + return True + return False + + lines = [] + file_count = [0] # Use list to allow modification in nested function + + def walk_tree(dir_path: str, prefix: str = "", depth: int = 0) -> None: + """Recursively walk directory tree.""" + if depth > max_depth or file_count[0] >= max_files: + return + + try: + entries = sorted(os.listdir(dir_path)) + except (PermissionError, OSError): + return + + dirs = [] + files = [] + + for entry in entries: + if should_ignore(entry): + continue + full_path = os.path.join(dir_path, entry) + if os.path.isdir(full_path): + dirs.append(entry) + else: + files.append(entry) + + # Process directories first + for i, dir_name in enumerate(dirs): + is_last_dir = (i == len(dirs) - 1) and len(files) == 0 + connector = "└── " if is_last_dir else "├── " + lines.append(f"{prefix}{connector}{dir_name}/") + + if file_count[0] < max_files: + next_prefix = prefix + (" " if is_last_dir else "│ ") + walk_tree(os.path.join(dir_path, dir_name), next_prefix, depth + 1) + + # Process files + for i, file_name in enumerate(files): + if file_count[0] >= max_files: + break + is_last = i == len(files) - 1 + connector = "└── " if is_last else "├── " + lines.append(f"{prefix}{connector}{file_name}") + file_count[0] += 1 + + walk_tree(path) + return "\n".join(lines) if lines else "(empty directory)" + + +def build_local_context(path: str) -> str: + """Build a structured text context for local directory review. + + Args: + path: Directory path + + Returns: + Structured text context for RLM input + """ + abs_path = validate_local_path(path) + dir_name = os.path.basename(abs_path) + + lines = [ + f"# Local Repository Review: {dir_name}", + "", + f"**Path:** {abs_path}", + ] + + # Get git info if available + git_info = get_git_info(abs_path) + if git_info: + lines.extend([ + f"**Branch:** {git_info['branch']}", + f"**Repository:** {git_info['repo_name']}", + "", + ]) + + # Recent commits + if git_info["commits"]: + lines.extend([ + "## Recent Commits", + "", + ]) + for commit in git_info["commits"]: + lines.append(f"- {commit}") + lines.append("") + + # Git diff + diff = get_git_diff(abs_path) + if diff: + lines.extend([ + "## Git Diff", + "", + "```diff", + diff, + "```", + "", + ]) + else: + lines.append("**Status:** Not a git repository") + lines.append("") + + # Project structure + lines.extend([ + "## Project Structure", + "", + get_project_structure(abs_path), + "", + ]) + + return "\n".join(lines) + diff --git a/npx/python/cli/local_repo_tools.py b/npx/python/cli/local_repo_tools.py new file mode 100644 index 0000000..6b15f31 --- /dev/null +++ b/npx/python/cli/local_repo_tools.py @@ -0,0 +1,210 @@ +"""Local filesystem version of RepoTools for agentic code review. + +Provides the same interface as RepoTools but reads from local filesystem +instead of GitHub API. Enables RLM to explore local files during review. +""" + +import asyncio +import os +import subprocess +from typing import Any + +from .repo_tools import MAX_FILE_BYTES, sanitize_path, find_line_range + +# Common ignore patterns for directory listing +IGNORE_PATTERNS = { + "node_modules", + "__pycache__", + ".git", + ".venv", + "venv", + "dist", + "build", + ".pytest_cache", + ".mypy_cache", + "*.egg-info", +} + +# File extensions to search +SEARCH_EXTENSIONS = ( + ".py", ".js", ".ts", ".tsx", ".jsx", + ".go", ".rs", ".java", ".md", ".json", + ".yaml", ".yml", ".sh", ".bash" +) + + +class LocalRepoTools: + """Tools for exploring a local repository.""" + + def __init__(self, root_path: str): + """Initialize with local directory root path. + + Args: + root_path: Absolute or relative path to repository root + """ + self.root_path = os.path.realpath(root_path) + if not os.path.isdir(self.root_path): + raise ValueError(f"root_path is not a directory: {self.root_path}") + + def _resolve_path(self, path: str) -> str | None: + """Resolve and validate a path relative to root_path. + + Returns absolute path if valid, None if invalid or outside root. + """ + clean = sanitize_path(path) if path else "" + if clean is None: + return None + + # Build absolute path + abs_path = os.path.realpath(os.path.join(self.root_path, clean)) + + # Security: ensure resolved path is within root_path + if not abs_path.startswith(self.root_path + os.sep) and abs_path != self.root_path: + return None + + return abs_path + + async def fetch_file(self, path: str) -> str: + """Fetch a file from the local filesystem. + + Returns file content or error/skip stub. + """ + abs_path = self._resolve_path(path) + if abs_path is None: + return "[ERROR: invalid path]" + + if not os.path.exists(abs_path): + return "[ERROR: 404 - not found]" + + if not os.path.isfile(abs_path): + return "[SKIPPED: path is a directory, use list_directory]" + + # Check size + try: + size = os.path.getsize(abs_path) + except OSError: + return "[ERROR: cannot read file]" + + if size > MAX_FILE_BYTES: + return f"[SKIPPED: file exceeds {MAX_FILE_BYTES // 1000}KB limit ({size // 1000}KB)]" + + # Try to read as text + try: + with open(abs_path, "r", encoding="utf-8") as f: + content = f.read() + return content + except (UnicodeDecodeError, OSError): + return "[SKIPPED: binary/unsupported file]" + + async def list_directory(self, path: str = "") -> list[dict[str, Any]]: + """List files and directories at a path. + + Returns structured entries: [{path, type, size}] + """ + # Treat ".", "./", "/" same as "" (root directory) + abs_path = self._resolve_path(path) if path and path.strip() not in (".", "./", "/") else self.root_path + if abs_path is None: + return [{"error": "invalid path"}] + + if not os.path.exists(abs_path): + return [{"error": "not found"}] + + # Single file case + if os.path.isfile(abs_path): + rel_path = os.path.relpath(abs_path, self.root_path) + return [{ + "path": rel_path.replace(os.sep, "/"), + "type": "file", + "size": os.path.getsize(abs_path), + }] + + # Directory listing + entries = [] + try: + for entry in os.listdir(abs_path): + # Skip hidden files/dirs + if entry.startswith("."): + continue + # Skip ignore patterns + if entry in IGNORE_PATTERNS: + continue + + entry_path = os.path.join(abs_path, entry) + rel_path = os.path.relpath(entry_path, self.root_path) + + if os.path.isdir(entry_path): + entries.append({ + "path": rel_path.replace(os.sep, "/"), + "type": "dir", + "size": 0, + }) + else: + entries.append({ + "path": rel_path.replace(os.sep, "/"), + "type": "file", + "size": os.path.getsize(entry_path), + }) + except OSError: + return [{"error": "cannot read directory"}] + + return entries + + async def search_code(self, query: str) -> list[dict[str, Any]]: + """Search for code patterns in the local repo using grep. + + Returns paths + fragments. Soft-fails on error (returns []). + """ + if not query or not query.strip(): + return [] + + query = query.strip() + + # Build grep command as list (no shell=True to prevent shell injection) + args = ["grep", "-rn"] + for ext in SEARCH_EXTENSIONS: + args.append(f"--include=*{ext}") + args.append("--") # End of options, prevents query from being interpreted as flag + args.append(query) + args.append(self.root_path) + + try: + result = subprocess.run( + args, + capture_output=True, + text=True, + timeout=10, + ) + except subprocess.TimeoutExpired: + return [] # Soft fail + except Exception: + return [] # Soft fail + + if result.returncode != 0: + return [] # No matches or error + + results = [] + for line in result.stdout.splitlines()[:10]: # Limit to 10 results + # Parse grep output: path:line:content + parts = line.split(":", 2) + if len(parts) >= 3: + file_path = parts[0] + rel_path = os.path.relpath(file_path, self.root_path) + fragment = parts[2][:500] # Limit fragment size + results.append({ + "path": rel_path.replace(os.sep, "/"), + "fragment": fragment, + }) + + return results + + async def close(self): + """No-op for local tools (no HTTP client to close).""" + pass + + def format_source(self, path: str, content: str | None = None, needle: str | None = None) -> str: + """Format a source citation as local:path#Lx-Ly.""" + line_range = "" + if content: + line_range = find_line_range(content, needle) + return f"local:{path}{line_range}" + diff --git a/npx/python/cli/main.py b/npx/python/cli/main.py index ff3761c..9f8207c 100644 --- a/npx/python/cli/main.py +++ b/npx/python/cli/main.py @@ -24,6 +24,7 @@ from . import __version__ from .github_fetcher import parse_github_url, post_comment +from .local_fetcher import validate_local_path from .output_formatter import format_output from .virtual_runner import VirtualReviewRunner from .expert_prompts import get_expert_prompt @@ -153,6 +154,76 @@ async def run_review( sys.exit(1) +async def run_local_review( + path: str, + question: str | None = None, + output_format: str = "text", + quiet: bool = False, + model: str | None = None, + expert: bool = False, +): + """Run a review on a local directory.""" + # Validate path first + try: + abs_path = validate_local_path(path) + except ValueError as e: + print_error(str(e)) + sys.exit(1) + + # Determine the question to use + if expert: + actual_question = get_expert_prompt(question) + review_mode = "Expert Review" + elif question: + actual_question = question + review_mode = "Review" + else: + print_error("Either --question or --expert is required") + sys.exit(1) + + if not quiet: + print_info(f"Reviewing local directory: {abs_path}") + if expert: + print_info(f"Mode: Expert Code Review (SOLID, Security, Code Quality)") + else: + print_info(f"Question: {actual_question}") + console.print() + + # Create runner + runner = VirtualReviewRunner( + model=model, + quiet=quiet, + on_step=None if quiet else print_step, + ) + + try: + answer, sources, metadata = await runner.review_local(abs_path, actual_question) + except Exception as e: + print_error(f"Review failed: {e}") + sys.exit(1) + + # Format and print output + model_name = metadata.get("model", model or "unknown") + output = format_output( + answer=answer, + sources=sources, + model=model_name, + output_format=output_format, + metadata=metadata if output_format == "json" else None, + ) + + if quiet or output_format == "json": + # Raw output for scripting + print(output) + else: + # Rich formatted output + console.print() + if output_format == "markdown": + console.print(Panel(Markdown(output), title="Review", border_style="green")) + else: + console.print(Panel(output, title="Review", border_style="green")) + + def main(): """Main CLI entry point.""" parser = argparse.ArgumentParser( @@ -177,20 +248,28 @@ def main(): # review command review_parser = subparsers.add_parser( "review", - help="Review a GitHub PR or Issue", + help="Review a GitHub PR/Issue or local directory", ) - review_parser.add_argument( + + # URL and path are mutually exclusive + source_group = review_parser.add_mutually_exclusive_group(required=True) + source_group.add_argument( "--url", "-u", type=str, - required=True, help="GitHub PR or Issue URL", ) + source_group.add_argument( + "--path", "-p", + type=str, + help="Local directory path to review", + ) + review_parser.add_argument( "--question", "-q", type=str, required=False, default=None, - help="Question to ask about the PR/Issue (optional with --expert)", + help="Question to ask about the PR/Issue/directory (optional with --expert)", ) review_parser.add_argument( "--expert", @@ -218,21 +297,37 @@ def main(): review_parser.add_argument( "--submit", action="store_true", - help="Post review as a comment on the PR/Issue (requires GITHUB_TOKEN)", + help="Post review as a comment on the PR/Issue (GitHub only, requires GITHUB_TOKEN)", ) - + args = parser.parse_args() - + if args.command == "review": - asyncio.run(run_review( - url=args.url, - question=args.question, - output_format=args.output, - quiet=args.quiet, - model=args.model, - expert=args.expert, - submit=args.submit, - )) + # Validate --submit is only used with --url + if args.submit and args.path: + print_error("--submit can only be used with --url (GitHub reviews)") + sys.exit(1) + + # Dispatch to appropriate review function + if args.url: + asyncio.run(run_review( + url=args.url, + question=args.question, + output_format=args.output, + quiet=args.quiet, + model=args.model, + expert=args.expert, + submit=args.submit, + )) + elif args.path: + asyncio.run(run_local_review( + path=args.path, + question=args.question, + output_format=args.output, + quiet=args.quiet, + model=args.model, + expert=args.expert, + )) else: parser.print_help() sys.exit(1) diff --git a/npx/python/cli/repo_tools.py b/npx/python/cli/repo_tools.py index 8a487d5..fcb31e1 100644 --- a/npx/python/cli/repo_tools.py +++ b/npx/python/cli/repo_tools.py @@ -219,7 +219,8 @@ async def list_directory(self, path: str = "") -> list[dict[str, Any]]: Returns structured entries: [{path, type, size}] """ - clean_path = sanitize_path(path) if path else "" + # Treat ".", "./", "/" same as "" (root directory) + clean_path = sanitize_path(path) if path and path.strip() not in (".", "./", "/") else "" if clean_path is None: return [{"error": "invalid path"}] diff --git a/npx/python/cli/virtual_runner.py b/npx/python/cli/virtual_runner.py index ed2ec0f..89d51e4 100644 --- a/npx/python/cli/virtual_runner.py +++ b/npx/python/cli/virtual_runner.py @@ -1,14 +1,12 @@ """Virtual review runner - runs RLM reviews on GitHub content without local repo.""" import asyncio +import concurrent.futures import logging -import re from typing import Callable import dspy from dspy.primitives.python_interpreter import PythonInterpreter -from dspy.primitives.prediction import Prediction -from dspy.primitives.repl_types import REPLHistory from cr.config import MAIN_MODEL, SUB_MODEL, MAX_ITERATIONS, MAX_LLM_CALLS from cr.rlm_runner import build_deno_command @@ -19,82 +17,19 @@ fetch_issue, build_review_context, ) +from .local_fetcher import build_local_context, validate_local_path +from .local_repo_tools import LocalRepoTools from .repo_tools import RepoTools -# Tool usage instructions for the model - simplified and clear -AGENTIC_TOOLS_PROMPT = """ -## AVAILABLE COMMANDS (USE ONLY THESE!) - -⚠️ **ONLY these 3 commands exist. Any other command will NOT work:** - -| Command | Purpose | Example | -|---------|---------|---------| -| `SEARCH_CODE:term` | Find files by name/content | `print("SEARCH_CODE:rlm.py")` | -| `FETCH_FILE:path` | Read file contents | `print("FETCH_FILE:dspy/predict/rlm.py")` | -| `LIST_DIR:path` | List directory contents | `print("LIST_DIR:dspy/predict")` | - -### ❌ FORBIDDEN - These commands DO NOT EXIST: -- `READ_FILE` - WRONG! Use `FETCH_FILE` instead -- `READ_CODE` - WRONG! Use `FETCH_FILE` instead -- `GET_FILE` - WRONG! Use `FETCH_FILE` instead -- `LIST_FILES` - WRONG! Use `LIST_DIR` instead -- `open()` / `os.path` - WRONG! Won't work in sandbox -- Any other command not listed above - ---- - -### SEARCH_CODE - Find files by name or content -```python -print("SEARCH_CODE:rlm.py") -print("SEARCH_CODE:enable_tool_optimization") -``` -Results appear in `search_results` on your NEXT step. - -### FETCH_FILE - Read file contents (NOT read_file, NOT read_code!) -```python -print("FETCH_FILE:dspy/predict/rlm.py") -print("FETCH_FILE:tests/predict/test_rlm.py") -``` -Content appears in `repo_files['dspy/predict/rlm.py']` on your NEXT step. - -### LIST_DIR - List directory contents -```python -print("LIST_DIR:dspy/predict") -print("LIST_DIR:tests") -``` -Entries appear in `repo_dirs['dspy/predict']` on your NEXT step. - -### WORKFLOW: -1. `print("SEARCH_CODE:filename")` → find paths -2. `print("FETCH_FILE:path/to/file.py")` → read content -3. Check `repo_files['path/to/file.py']` in next step - ---- - -## EXPERT REVIEW CHECKLISTS (for --expert mode) - -When performing expert code reviews, you can fetch these local checklists for detailed guidance: - -| Category | Command | Use For | -|----------|---------|---------| -| SOLID | `print("FETCH_FILE:checklists/solid-checklist.md")` | Design principle violations, code smells | -| Security | `print("FETCH_FILE:checklists/security-checklist.md")` | XSS, injection, auth gaps, race conditions | -| Code Quality | `print("FETCH_FILE:checklists/code-quality-checklist.md")` | Error handling, performance, boundaries | -| Removal Plan | `print("FETCH_FILE:checklists/removal-plan.md")` | Dead code identification template | - -Fetch the relevant checklists based on what the PR changes require. You decide which categories apply. -""" - - class VirtualReviewRunner: - """Run RLM code reviews on GitHub PRs without a local repository. - - Creates a 'virtual' codebase context from GitHub API data. - Supports agentic file fetching via FETCH_FILE/LIST_DIR/SEARCH_CODE commands. + """Run RLM code reviews on GitHub PRs and local directories. + + Creates a 'virtual' codebase context from GitHub API data or local filesystem. + Uses native DSPy RLM tools for agentic file fetching, directory listing, and code search. """ def __init__( @@ -118,30 +53,134 @@ def __init__( self._lm = None # Repo tools state (set per-review) self._repo_tools: RepoTools | None = None - self._repo_files: dict[str, str] = {} # Fetched file contents - self._repo_dirs: dict[str, list] = {} # Directory listings - self._search_results: list[dict] = [] # Search results def _load_local_checklist(self, path: str) -> str: """Load a bundled checklist file from the CLI package. - + Args: path: Path like 'checklists/solid-checklist.md' - + Returns: Content of the checklist file, or error message if not found """ from pathlib import Path - + # Get the directory where this module is located cli_dir = Path(__file__).parent checklist_path = cli_dir / path - + if checklist_path.exists(): return checklist_path.read_text() else: return f"[Error] Checklist not found: {path}" - + + def _sync_call(self, coro): + """Bridge async coroutines to sync using ThreadPoolExecutor. + + DSPy RLM tools must be sync, but RepoTools/LocalRepoTools methods are async. + This helper runs the coroutine in a thread pool and returns the result. + + Args: + coro: An async coroutine to execute + + Returns: + The result of the coroutine + """ + with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor: + future = executor.submit(asyncio.run, coro) + return future.result() + + def _create_tool_functions(self): + """Create sync tool wrapper functions for DSPy RLM. + + Returns a dict of three sync tool functions as closures that capture + self by reference (so self._repo_tools can change between review calls). + + Returns: + Dict mapping tool name to function: {fetch_file, list_dir, search_code} + """ + runner = self + + def fetch_file(path: str) -> str: + """Fetch a file from the repository by path. + + Handles both regular repository files and bundled checklist files. + Returns file content as a string, or an error message if the file + cannot be read. + + Args: + path: File path (e.g., 'src/main.py' or 'checklists/solid-checklist.md') + + Returns: + File content as string, or error message + """ + if path.startswith("checklists/"): + return runner._load_local_checklist(path) + return runner._sync_call(runner._repo_tools.fetch_file(path)) + + def list_dir(path: str) -> str: + """List directory contents at the given path. + + Returns a formatted text listing of files and directories, + showing path, type (file/dir), and size in bytes. + + Args: + path: Directory path (e.g., 'src' or '') + + Returns: + Formatted text listing of directory contents + """ + entries = runner._sync_call(runner._repo_tools.list_directory(path)) + if not entries: + return "[No entries]" + + # Format as readable text + lines = [] + for entry in entries: + if "error" in entry: + lines.append(f"[Error] {entry['error']}") + else: + entry_path = entry.get("path", "?") + entry_type = entry.get("type", "?") + entry_size = entry.get("size", 0) + if entry_type == "dir": + lines.append(f"[DIR] {entry_path}") + else: + lines.append(f"[FILE] {entry_path} ({entry_size} bytes)") + return "\n".join(lines) + + def search_code(query: str) -> str: + """Search for code patterns in the repository. + + Searches for code content, filenames, or paths. Returns a formatted + text listing of matching files with code fragments. + + Args: + query: Search query (e.g., 'enable_tool_optimization' or 'rlm.py') + + Returns: + Formatted text listing of search results with file paths and fragments + """ + results = runner._sync_call(runner._repo_tools.search_code(query)) + if not results: + return "[No matches found]" + + # Format as readable text + lines = [] + for result in results: + path = result.get("path", "?") + fragment = result.get("fragment", "") + if fragment: + # Truncate fragment if too long + if len(fragment) > 100: + fragment = fragment[:100] + "..." + lines.append(f"{path}: {fragment}") + else: + lines.append(f"{path}") + return "\n".join(lines) + + return {"fetch_file": fetch_file, "list_dir": list_dir, "search_code": search_code} + def _ensure_configured(self): """Configure DSPy and RLM on first use.""" if self._configured: @@ -179,51 +218,60 @@ def _ensure_configured(self): sub_lm=dspy.LM(sub_model, cache=False), verbose=not self.quiet, interpreter=interpreter, + tools=self._create_tool_functions(), ) self._configured = True async def review(self, url: str, question: str) -> tuple[str, list[str], dict]: """Review a GitHub URL (PR or Issue). - + Args: url: GitHub PR or Issue URL question: Question to ask about the content - + Returns: Tuple of (answer, sources, metadata) """ # Parse URL to determine type owner, repo, number, url_type = parse_github_url(url) - + # Fetch content if url_type == "pr": data = await fetch_pr(owner, repo, number) else: data = await fetch_issue(owner, repo, number) - + # Get head SHA for PR (for consistent file reads) head_sha = data.get("head_sha", "HEAD") - + # Create repo tools for this review self._repo_tools = RepoTools(owner, repo, head_sha) - self._repo_files = {} - self._repo_dirs = {} - self._search_results = [] - - # Build context from PR data (tools go in question, not context) + + # Build context from PR data context = build_review_context(data) - + # Run RLM self._ensure_configured() - + try: - answer, sources = await self._run_rlm_with_tools(context, question) + with dspy.context(lm=self._lm): + result = await self._rlm.aforward(context=context, question=question) + + # Extract answer and sources from result + answer = result.answer + sources = result.sources + if isinstance(sources, str): + sources = [s.strip() for s in sources.split(",") if s.strip()] + + # Note: DSPy's verbose=True already shows step-by-step progress in real-time. + # Post-hoc trajectory replay via on_step is intentionally removed to avoid + # duplicate output (steps were being shown twice). finally: # Cleanup if self._repo_tools: await self._repo_tools.close() self._repo_tools = None - + metadata = { "type": url_type, "owner": owner, @@ -231,151 +279,61 @@ async def review(self, url: str, question: str) -> tuple[str, list[str], dict]: "number": number, "title": data.get("title", ""), "model": self.model, - "files_fetched": list(self._repo_files.keys()), + "files_fetched": [], } - + return answer, sources, metadata - - async def _process_tool_requests(self, output: str) -> bool: - """Parse output for tool requests and execute them. - - Returns True if any tools were executed. + + async def review_local(self, path: str, question: str) -> tuple[str, list[str], dict]: + """Review a local directory. + + Args: + path: Local directory path (relative or absolute) + question: Question to ask about the code + + Returns: + Tuple of (answer, sources, metadata) """ - if not self._repo_tools: - return False - - executed = False - - # Check for FETCH_FILE requests - fetch_matches = re.findall(r'FETCH_FILE:([^\s\n]+)', output) - if fetch_matches and not self.quiet: - print(f"\n[DEBUG] Found FETCH_FILE requests: {fetch_matches}") - for path in fetch_matches[:3]: # Limit to 3 per iteration - if path not in self._repo_files: - if not self.quiet: - print(f"[DEBUG] Fetching file: {path}") - - # Handle local checklists (bundled with CLI) - if path.startswith("checklists/"): - content = self._load_local_checklist(path) - else: - content = await self._repo_tools.fetch_file(path) - - self._repo_files[path] = content - if not self.quiet: - print(f"[DEBUG] Fetched {path}: {len(content)} chars, starts with: {content[:100]}...") - executed = True - - # Check for LIST_DIR requests - dir_matches = re.findall(r'LIST_DIR:([^\s\n]+)', output) - if dir_matches and not self.quiet: - print(f"\n[DEBUG] Found LIST_DIR requests: {dir_matches}") - for path in dir_matches[:2]: # Limit to 2 per iteration - if path not in self._repo_dirs: - entries = await self._repo_tools.list_directory(path) - self._repo_dirs[path] = entries - if not self.quiet: - print(f"[DEBUG] Listed {path}: {len(entries)} entries") - executed = True - - # Check for SEARCH_CODE requests - search_matches = re.findall(r'SEARCH_CODE:(.+?)(?:\n|$)', output) - if search_matches and not self.quiet: - print(f"\n[DEBUG] Found SEARCH_CODE requests: {search_matches}") - for query in search_matches[:1]: # Limit to 1 per iteration - results = await self._repo_tools.search_code(query.strip()) - self._search_results = results - if not self.quiet: - print(f"[DEBUG] Search for '{query.strip()}': {len(results)} results") - for r in results[:3]: - print(f"[DEBUG] - {r.get('path')}") - executed = True - - return executed - - async def _run_rlm_with_tools(self, context: str, question: str) -> tuple[str, list[str]]: - """Run the RLM with agentic tool support.""" - from dspy.predict.rlm import _strip_code_fences - - rlm = self._rlm - output_field_names = list(rlm.signature.output_fields.keys()) - execution_tools = rlm._prepare_execution_tools() - - # Prepend tool instructions to question (treated as instructions, not data) - augmented_question = AGENTIC_TOOLS_PROMPT + "\n\n---\n\n**USER QUESTION:** " + question - - with dspy.context(lm=self._lm): - with rlm._interpreter_context(execution_tools) as repl: - history = REPLHistory() - - for iteration in range(rlm.max_iterations): - # Rebuild variables with current tool state so LLM sees available data - input_args = { - "context": context, - "question": augmented_question, - "repo_files": self._repo_files, - "repo_dirs": self._repo_dirs, - "search_results": self._search_results, - } - variables = rlm._build_variables(**input_args) - - variables_info = [variable.format() for variable in variables] - pred = await rlm.generate_action.acall( - variables_info=variables_info, - repl_history=history, - iteration=f"{iteration + 1}/{rlm.max_iterations}", - ) - - # Execute the code with current repo state - try: - code = _strip_code_fences(pred.code) - - # Inject serializable data only - exec_vars = { - "context": context, - "question": question, - "repo_files": self._repo_files, - "repo_dirs": self._repo_dirs, - "search_results": self._search_results, - } - - result = repl.execute(code, variables=exec_vars) - except Exception as e: - result = f"[Error] {e}" - - # Format output - if isinstance(result, list): - output = "\n".join(map(str, result)) - else: - output = str(result) if result else "" - - # Process any tool requests in the output - await self._process_tool_requests(output) - - # Call step callback if provided (pass reasoning, code, and output) - if self.on_step: - self.on_step(iteration + 1, pred.reasoning, code, output) - - # Process result to check if done - processed = rlm._process_execution_result(pred, result, history, output_field_names) - - if isinstance(processed, Prediction): - # Done! - answer = getattr(processed, "answer", str(processed)) - sources = getattr(processed, "sources", []) - if isinstance(sources, str): - sources = [s.strip() for s in sources.split(",") if s.strip()] - return answer, sources - - history = processed - - # Max iterations reached - final_result = await rlm._aextract_fallback(variables, history, output_field_names) - answer = getattr(final_result, "answer", str(final_result)) - sources = getattr(final_result, "sources", []) - if isinstance(sources, str): - sources = [s.strip() for s in sources.split(",") if s.strip()] - return answer, sources + # Validate and resolve path + abs_path = validate_local_path(path) + + # Create local repo tools + local_tools = LocalRepoTools(abs_path) + self._repo_tools = local_tools + + # Build context from local directory + context = build_local_context(abs_path) + + # Run RLM + self._ensure_configured() + + try: + with dspy.context(lm=self._lm): + result = await self._rlm.aforward(context=context, question=question) + + # Extract answer and sources from result + answer = result.answer + sources = result.sources + if isinstance(sources, str): + sources = [s.strip() for s in sources.split(",") if s.strip()] + + # Note: DSPy's verbose=True already shows step-by-step progress in real-time. + # Post-hoc trajectory replay via on_step is intentionally removed to avoid + # duplicate output (steps were being shown twice). + finally: + # Cleanup + if self._repo_tools: + await self._repo_tools.close() + self._repo_tools = None + + metadata = { + "type": "local", + "path": abs_path, + "model": self.model, + "files_fetched": [], + } + + return answer, sources, metadata async def review_pr(self, url: str, question: str) -> tuple[str, list[str], dict]: """Review a GitHub PR with full diff context.""" diff --git a/npx/src/cli.ts b/npx/src/cli.ts index b41f776..bc5571b 100644 --- a/npx/src/cli.ts +++ b/npx/src/cli.ts @@ -15,7 +15,8 @@ import { } from './python-runner.js'; export interface ReviewOptions { - url: string; + url?: string; + path?: string; question?: string; output: string; quiet?: boolean; @@ -26,19 +27,23 @@ export interface ReviewOptions { } export async function runReview(options: ReviewOptions): Promise { - const { url, question, output, quiet = false, model, api, githubToken, expert = false } = options; + const { url, path, question, output, quiet = false, model, api, githubToken, expert = false } = options; try { // 4. Get API key const apiKey = await getApiKey(api); - // 5. Get GitHub token (required for code search API) - const ghToken = await getGitHubToken(githubToken, true); + // 5. Get GitHub token only if using URL mode (not required for local path mode) + let ghToken: string | undefined; + if (url) { + ghToken = await getGitHubToken(githubToken, true); + } // 6. Run the review if (!quiet) { - console.log(chalk.cyan(`\n 🔍 Reviewing: ${url}`)); + const target = url || path; + console.log(chalk.cyan(`\n 🔍 Reviewing: ${target}`)); if (expert) { console.log(chalk.cyan(` Mode: Expert Code Review (SOLID, Security, Code Quality)\n`)); } else if (question) { @@ -48,6 +53,7 @@ export async function runReview(options: ReviewOptions): Promise { const result = await runPythonReview({ url, + path, question, output, quiet, diff --git a/npx/src/index.ts b/npx/src/index.ts index 579b2b9..c2a47cb 100644 --- a/npx/src/index.ts +++ b/npx/src/index.ts @@ -16,8 +16,9 @@ program program .command('review') - .description('Review a GitHub PR or Issue') - .requiredOption('-u, --url ', 'GitHub PR or Issue URL') + .description('Review a GitHub PR/Issue or local directory') + .option('-u, --url ', 'GitHub PR or Issue URL') + .option('-p, --path ', 'Local directory path to review') .option('-q, --question ', 'Question to ask about the PR/Issue (optional with --expert)') .option('--expert', 'Run expert code review (SOLID, Security, Performance, Code Quality)') .option('-o, --output ', 'Output format: text, markdown, json', 'text') @@ -26,6 +27,15 @@ program .option('--api ', 'Gemini API key (defaults to GEMINI_API_KEY env var)') .option('--github-token ', 'GitHub token for private repos (defaults to GITHUB_TOKEN env var)') .action(async (options) => { + // Validate mutual exclusion: exactly one of --url or --path must be provided + if (!options.url && !options.path) { + console.error('Error: Either --url or --path must be provided'); + process.exit(1); + } + if (options.url && options.path) { + console.error('Error: --url and --path are mutually exclusive'); + process.exit(1); + } await runReview(options); }); diff --git a/npx/src/python-runner.ts b/npx/src/python-runner.ts index 6b7e6c5..5bd7295 100644 --- a/npx/src/python-runner.ts +++ b/npx/src/python-runner.ts @@ -251,7 +251,8 @@ export async function installAsyncReview(systemPython: string, quiet: boolean = } export interface RunOptions { - url: string; + url?: string; + path?: string; question?: string; output: string; quiet: boolean; @@ -282,10 +283,17 @@ export async function runPythonReview(options: RunOptions): Promise { const args = [ '-m', 'cli.main', 'review', - '--url', options.url, - '--output', options.output, ]; + // Add either --url or --path (mutually exclusive) + if (options.url) { + args.push('--url', options.url); + } else if (options.path) { + args.push('--path', options.path); + } + + args.push('--output', options.output); + // Add question if provided if (options.question) { args.push('-q', options.question);