Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 37 additions & 20 deletions cli/github_fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,16 @@
UrlType = Literal["pr", "issue"]


def _get_login(user: dict) -> str:
"""Get user login, falling back to username (Gitea compat)."""
return user.get("login") or user.get("username") or "unknown"


def parse_github_url(url: str) -> tuple[str, str, int, UrlType]:
"""Parse a GitHub URL into (owner, repo, number, type).
"""Parse a GitHub/Gitea/GitLab/Bitbucket URL into (owner, repo, number, type).

Args:
url: GitHub Issue or PR URL
url: Issue/PR/MR URL from any supported forge

Returns:
Tuple of (owner, repo, number, type)
Expand All @@ -25,26 +30,38 @@ def parse_github_url(url: str) -> tuple[str, str, int, UrlType]:
ValueError: If URL format is invalid

Examples:
>>> parse_github_url("https://github.com/vercel-labs/json-render/pull/35")
('vercel-labs', 'json-render', 35, 'pr')
>>> parse_github_url("https://github.com/AsyncFuncAI/AsyncReview/issues/1")
('AsyncFuncAI', 'AsyncReview', 1, 'issue')
>>> parse_github_url("https://github.com/owner/repo/pull/35")
('owner', 'repo', 35, 'pr')
>>> parse_github_url("https://gitea.example.com/org/repo/pulls/42")
('org', 'repo', 42, 'pr')
>>> parse_github_url("https://gitlab.com/org/repo/-/merge_requests/10")
('org', 'repo', 10, 'pr')
>>> parse_github_url("https://bitbucket.org/org/repo/pull-requests/5")
('org', 'repo', 5, 'pr')
>>> parse_github_url("https://github.com/owner/repo/issues/1")
('owner', 'repo', 1, 'issue')
"""
# Try PR URL first (primary use case)
pr_pattern = r"github\.com/([^/]+)/([^/]+)/pull/(\d+)"
# PR patterns — accept any domain, support multiple forge URL formats:
# GitHub: /owner/repo/pull/123
# Gitea: /owner/repo/pulls/123
# GitLab: /owner/repo/-/merge_requests/123
# Bitbucket: /owner/repo/pull-requests/123
pr_pattern = r"^https?://[^/]+/([^/]+)/([^/]+)/(?:-/)?(?:pulls?|merge_requests|pull-requests)/(\d+)(?:[/?#].*)?$"
pr_match = re.search(pr_pattern, url)
if pr_match:
return pr_match.group(1), pr_match.group(2), int(pr_match.group(3)), "pr"

# Try Issue URL
issue_pattern = r"github\.com/([^/]+)/([^/]+)/issues/(\d+)"

# Issue patterns — GitHub/Gitea/GitLab
# GitLab: /owner/repo/-/issues/123
issue_pattern = r"^https?://[^/]+/([^/]+)/([^/]+)/(?:-/)?issues/(\d+)(?:[/?#].*)?$"
issue_match = re.search(issue_pattern, url)
if issue_match:
return issue_match.group(1), issue_match.group(2), int(issue_match.group(3)), "issue"

raise ValueError(
f"Invalid GitHub URL: {url}\n"
"Expected format: https://github.com/owner/repo/pull/123 or .../issues/123"
f"Invalid URL: {url}\n"
"Expected: https://host/owner/repo/{pull,pulls,merge_requests,pull-requests}/123\n"
" or: https://host/owner/repo/issues/123"
)


Expand Down Expand Up @@ -119,7 +136,7 @@ async def fetch_pr(owner: str, repo: str, number: int) -> dict:
comments_data = comments_resp.json()
comments_list = [
{
"author": c["user"]["login"],
"author": _get_login(c["user"]),
"body": c["body"],
}
for c in comments_data
Expand All @@ -144,10 +161,10 @@ async def fetch_pr(owner: str, repo: str, number: int) -> dict:
"number": number,
"title": pr_data.get("title", ""),
"body": pr_data.get("body") or "",
"author": pr_data["user"]["login"],
"author": _get_login(pr_data["user"]),
"state": pr_data.get("state", "open"),
"base_branch": pr_data["base"]["ref"],
"head_branch": pr_data["head"]["ref"],
"base_branch": pr_data.get("base", {}).get("ref", "main"),
"head_branch": pr_data.get("head", {}).get("ref", "unknown"),
"files": files,
"commits": commits_list,
"comments": comments_list,
Expand Down Expand Up @@ -181,7 +198,7 @@ async def fetch_issue(owner: str, repo: str, number: int) -> dict:
comments_data = comments_resp.json()
comments_list = [
{
"author": c["user"]["login"],
"author": _get_login(c["user"]),
"body": c["body"],
}
for c in comments_data
Expand All @@ -194,7 +211,7 @@ async def fetch_issue(owner: str, repo: str, number: int) -> dict:
"number": number,
"title": issue_data.get("title", ""),
"body": issue_data.get("body") or "",
"author": issue_data["user"]["login"],
"author": _get_login(issue_data["user"]),
"state": issue_data.get("state", "open"),
"labels": [l["name"] for l in issue_data.get("labels", [])],
"comments": comments_list,
Expand Down
7 changes: 4 additions & 3 deletions cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import sys

from rich.console import Console
from rich.markup import escape as rich_escape
from rich.panel import Panel
from rich.markdown import Markdown

Expand All @@ -42,12 +43,12 @@ def print_step(step_num: int, reasoning: str, code: str):

def print_info(message: str):
"""Print an info message."""
console.print(f"[dim]{message}[/dim]")
console.print(f"[dim]{rich_escape(message)}[/dim]")


def print_error(message: str):
"""Print an error message."""
console.print(f"[red]Error: {message}[/red]")
console.print(f"[red]Error: {rich_escape(message)}[/red]")


async def run_review(
Expand Down Expand Up @@ -103,7 +104,7 @@ async def run_review(
if output_format == "markdown":
console.print(Panel(Markdown(output), title="Review", border_style="green"))
else:
console.print(Panel(output, title="Review", border_style="green"))
console.print(Panel(rich_escape(output), title="Review", border_style="green"))


def main():
Expand Down
5 changes: 3 additions & 2 deletions cli/virtual_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ def _ensure_configured(self):

# Configure DSPy with specified model
model_name = self.model
if not model_name.startswith("gemini/"):
# Only add gemini/ prefix if no provider prefix exists (no / in name)
if "/" not in model_name:
model_name = f"gemini/{model_name}"

dspy.configure(lm=dspy.LM(model_name))
Expand All @@ -76,7 +77,7 @@ def _ensure_configured(self):
signature="context, question -> answer, sources",
max_iterations=MAX_ITERATIONS,
max_llm_calls=MAX_LLM_CALLS,
sub_lm=dspy.LM(f"gemini/{SUB_MODEL}" if not SUB_MODEL.startswith("gemini/") else SUB_MODEL),
sub_lm=dspy.LM(SUB_MODEL if "/" in SUB_MODEL else f"gemini/{SUB_MODEL}"),
verbose=not self.quiet,
interpreter=interpreter,
)
Expand Down
41 changes: 26 additions & 15 deletions cr/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,37 @@
from .diff_types import PRInfo, FileContents, DiffFileContext


def _get_login(user: dict) -> str:
"""Get user login, falling back to username (Gitea compat)."""
return user.get("login") or user.get("username") or "unknown"
Comment on lines +16 to +18

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

This _get_login function is also defined in cli/github_fetcher.py. Similarly, the parse_pr_url function on line 25 duplicates logic and regular expressions from cli/github_fetcher.py. To improve maintainability and avoid inconsistencies, these shared helper functions should be extracted into a common utility module and imported where needed.



# In-memory store for loaded PRs (MVP - no persistence)
_pr_cache: dict[str, PRInfo] = {}


def parse_pr_url(url: str) -> tuple[str, str, int]:
"""Parse a GitHub PR URL into (owner, repo, number).
"""Parse a PR/MR URL into (owner, repo, number).

Supports GitHub, Gitea, GitLab, and Bitbucket URL formats.

Args:
url: GitHub PR URL like https://github.com/owner/repo/pull/123
url: PR/MR URL (e.g. /pull/1, /pulls/1, /-/merge_requests/1, /pull-requests/1)

Returns:
Tuple of (owner, repo, pr_number)

Raises:
ValueError: If URL format is invalid
"""
pattern = r"github\.com/([^/]+)/([^/]+)/pull/(\d+)"
# GitHub /pull/, Gitea /pulls/, GitLab /-/merge_requests/, Bitbucket /pull-requests/
pattern = r"^https?://[^/]+/([^/]+)/([^/]+)/(?:-/)?(?:pulls?|merge_requests|pull-requests)/(\d+)(?:[/?#].*)?$"
match = re.search(pattern, url)
if not match:
raise ValueError(f"Invalid GitHub PR URL: {url}")
raise ValueError(
f"Invalid PR URL: {url}\n"
"Expected: https://host/owner/repo/{pull,pulls,merge_requests,pull-requests}/123"
)
return match.group(1), match.group(2), int(match.group(3))


Expand Down Expand Up @@ -96,10 +107,10 @@ async def load_pr(pr_url: str) -> PRInfo:
"author": {
"name": c["commit"]["author"]["name"],
"date": c["commit"]["author"]["date"],
"login": c["author"]["login"] if c.get("author") else None,
"avatar_url": c["author"]["avatar_url"] if c.get("author") else None,
"login": _get_login(c["author"]) if c.get("author") else None,
"avatar_url": c["author"].get("avatar_url", "") if c.get("author") else None,
},
"html_url": c["html_url"],
"html_url": c.get("html_url", ""),
}
for c in commits_data
]
Expand All @@ -118,12 +129,12 @@ async def load_pr(pr_url: str) -> PRInfo:
{
"id": c["id"],
"user": {
"login": c["user"]["login"],
"avatar_url": c["user"]["avatar_url"],
"login": _get_login(c["user"]),
"avatar_url": c["user"].get("avatar_url", ""),
},
"body": c["body"],
"created_at": c["created_at"],
"html_url": c["html_url"],
"html_url": c.get("html_url", ""),
}
for c in comments_data
]
Expand All @@ -146,15 +157,15 @@ async def load_pr(pr_url: str) -> PRInfo:
number=number,
title=pr_data.get("title", ""),
body=pr_data.get("body") or "",
base_sha=pr_data["base"]["sha"],
head_sha=pr_data["head"]["sha"],
base_sha=pr_data.get("base", {}).get("sha", "HEAD"),
head_sha=pr_data.get("head", {}).get("sha", "HEAD"),
files=files,
created_at=datetime.now(),
user={"login": pr_data["user"]["login"], "avatar_url": pr_data["user"]["avatar_url"]},
user={"login": _get_login(pr_data["user"]), "avatar_url": pr_data["user"].get("avatar_url", "")},
state=pr_data.get("state", "open"),
draft=pr_data.get("draft", False),
head_ref=pr_data["head"]["ref"],
base_ref=pr_data["base"]["ref"],
head_ref=pr_data.get("head", {}).get("ref", "unknown"),
base_ref=pr_data.get("base", {}).get("ref", "main"),
Comment on lines +160 to +168

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Hardcoding default values like "HEAD" for SHAs and "main" for the base branch can be brittle. For example, many repositories still use master as their default branch. If the API response is missing these fields for an unexpected reason, it could lead to incorrect behavior. It would be more robust to either fail explicitly if this critical information is missing, or use None as a fallback and ensure downstream code handles it gracefully.

commits=pr_data.get("commits", 0),
additions=pr_data.get("additions", 0),
deletions=pr_data.get("deletions", 0),
Expand Down
9 changes: 5 additions & 4 deletions cr/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from rich.console import Console
from rich.markdown import Markdown
from rich.markup import escape as rich_escape
from rich.panel import Panel
from rich.syntax import Syntax
from rich.table import Table
Expand Down Expand Up @@ -112,12 +113,12 @@ def print_help():

def print_error(message: str):
"""Print an error message."""
console.print(f"\n[red]Error: {message}[/red]")
console.print(f"\n[red]Error: {rich_escape(message)}[/red]")


def print_info(message: str):
"""Print an info message."""
console.print(f"[dim]{message}[/dim]")
console.print(f"[dim]{rich_escape(message)}[/dim]")


def print_history(history: list[tuple[str, str]]):
Expand All @@ -126,10 +127,10 @@ def print_history(history: list[tuple[str, str]]):
console.print("[dim]No history yet.[/dim]")
else:
for i, (q, a) in enumerate(history, 1):
console.print(f"\n[cyan]Q{i}:[/cyan] {q}")
console.print(f"\n[cyan]Q{i}:[/cyan] {rich_escape(q)}")
# Truncate long answers
display_a = a[:200] + "..." if len(a) > 200 else a
console.print(f"[green]A{i}:[/green] {display_a}")
console.print(f"[green]A{i}:[/green] {rich_escape(display_a)}")
console.print()


Expand Down
Loading