From af21811bb331f103d257cab01bad96b8fffab325 Mon Sep 17 00:00:00 2001 From: Gloria Date: Mon, 16 Feb 2026 11:59:10 +0000 Subject: [PATCH] feat: multi-forge support (GitHub, Gitea, GitLab, Bitbucket) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for multiple Git forge URL formats in PR/issue parsing: - GitHub: /owner/repo/pull/123 - Gitea: /owner/repo/pulls/123 - GitLab: /owner/repo/-/merge_requests/123 - Bitbucket: /owner/repo/pull-requests/123 Also includes: - Defensive .get() chains for API response fields that differ across forges (user.login vs user.username, optional avatar_url, html_url) - _get_login() helper with fallback for Gitea's 'username' field - Rich markup escaping for all user-generated console output - Gitea search API compatibility (data vs items response keys) - Provider-prefixed model passthrough (e.g. openai/gpt-4) instead of forcing gemini/ prefix — enables use with LiteLLM and other proxies - 24 regression tests covering all forge URL formats Closes AsyncFuncAI/AsyncReview#8 --- cli/github_fetcher.py | 57 ++++++++++++++++++---------- cli/main.py | 7 ++-- cli/virtual_runner.py | 5 ++- cr/github.py | 41 ++++++++++++-------- cr/render.py | 9 +++-- npx/python/cli/github_fetcher.py | 63 +++++++++++++++++++------------ npx/python/cli/main.py | 11 +++--- npx/python/cli/repo_tools.py | 8 +++- npx/python/cli/virtual_runner.py | 13 ++++--- npx/python/cr/github.py | 41 ++++++++++++-------- tests/test_url_parsing.py | 64 ++++++++++++++++++++++++++++++++ 11 files changed, 225 insertions(+), 94 deletions(-) create mode 100644 tests/test_url_parsing.py diff --git a/cli/github_fetcher.py b/cli/github_fetcher.py index a9a9555..385f2dc 100644 --- a/cli/github_fetcher.py +++ b/cli/github_fetcher.py @@ -12,11 +12,16 @@ UrlType = Literal["pr", "issue"] +def _get_login(user: dict) -> str: + """Get user login, falling back to username (Gitea compat).""" + return user.get("login") or user.get("username") or "unknown" + + def parse_github_url(url: str) -> tuple[str, str, int, UrlType]: - """Parse a GitHub URL into (owner, repo, number, type). + """Parse a GitHub/Gitea/GitLab/Bitbucket URL into (owner, repo, number, type). Args: - url: GitHub Issue or PR URL + url: Issue/PR/MR URL from any supported forge Returns: Tuple of (owner, repo, number, type) @@ -25,26 +30,38 @@ def parse_github_url(url: str) -> tuple[str, str, int, UrlType]: ValueError: If URL format is invalid Examples: - >>> parse_github_url("https://github.com/vercel-labs/json-render/pull/35") - ('vercel-labs', 'json-render', 35, 'pr') - >>> parse_github_url("https://github.com/AsyncFuncAI/AsyncReview/issues/1") - ('AsyncFuncAI', 'AsyncReview', 1, 'issue') + >>> parse_github_url("https://github.com/owner/repo/pull/35") + ('owner', 'repo', 35, 'pr') + >>> parse_github_url("https://gitea.example.com/org/repo/pulls/42") + ('org', 'repo', 42, 'pr') + >>> parse_github_url("https://gitlab.com/org/repo/-/merge_requests/10") + ('org', 'repo', 10, 'pr') + >>> parse_github_url("https://bitbucket.org/org/repo/pull-requests/5") + ('org', 'repo', 5, 'pr') + >>> parse_github_url("https://github.com/owner/repo/issues/1") + ('owner', 'repo', 1, 'issue') """ - # Try PR URL first (primary use case) - pr_pattern = r"github\.com/([^/]+)/([^/]+)/pull/(\d+)" + # PR patterns — accept any domain, support multiple forge URL formats: + # GitHub: /owner/repo/pull/123 + # Gitea: /owner/repo/pulls/123 + # GitLab: /owner/repo/-/merge_requests/123 + # Bitbucket: /owner/repo/pull-requests/123 + pr_pattern = r"^https?://[^/]+/([^/]+)/([^/]+)/(?:-/)?(?:pulls?|merge_requests|pull-requests)/(\d+)(?:[/?#].*)?$" pr_match = re.search(pr_pattern, url) if pr_match: return pr_match.group(1), pr_match.group(2), int(pr_match.group(3)), "pr" - - # Try Issue URL - issue_pattern = r"github\.com/([^/]+)/([^/]+)/issues/(\d+)" + + # Issue patterns — GitHub/Gitea/GitLab + # GitLab: /owner/repo/-/issues/123 + issue_pattern = r"^https?://[^/]+/([^/]+)/([^/]+)/(?:-/)?issues/(\d+)(?:[/?#].*)?$" issue_match = re.search(issue_pattern, url) if issue_match: return issue_match.group(1), issue_match.group(2), int(issue_match.group(3)), "issue" - + raise ValueError( - f"Invalid GitHub URL: {url}\n" - "Expected format: https://github.com/owner/repo/pull/123 or .../issues/123" + f"Invalid URL: {url}\n" + "Expected: https://host/owner/repo/{pull,pulls,merge_requests,pull-requests}/123\n" + " or: https://host/owner/repo/issues/123" ) @@ -119,7 +136,7 @@ async def fetch_pr(owner: str, repo: str, number: int) -> dict: comments_data = comments_resp.json() comments_list = [ { - "author": c["user"]["login"], + "author": _get_login(c["user"]), "body": c["body"], } for c in comments_data @@ -144,10 +161,10 @@ async def fetch_pr(owner: str, repo: str, number: int) -> dict: "number": number, "title": pr_data.get("title", ""), "body": pr_data.get("body") or "", - "author": pr_data["user"]["login"], + "author": _get_login(pr_data["user"]), "state": pr_data.get("state", "open"), - "base_branch": pr_data["base"]["ref"], - "head_branch": pr_data["head"]["ref"], + "base_branch": pr_data.get("base", {}).get("ref", "main"), + "head_branch": pr_data.get("head", {}).get("ref", "unknown"), "files": files, "commits": commits_list, "comments": comments_list, @@ -181,7 +198,7 @@ async def fetch_issue(owner: str, repo: str, number: int) -> dict: comments_data = comments_resp.json() comments_list = [ { - "author": c["user"]["login"], + "author": _get_login(c["user"]), "body": c["body"], } for c in comments_data @@ -194,7 +211,7 @@ async def fetch_issue(owner: str, repo: str, number: int) -> dict: "number": number, "title": issue_data.get("title", ""), "body": issue_data.get("body") or "", - "author": issue_data["user"]["login"], + "author": _get_login(issue_data["user"]), "state": issue_data.get("state", "open"), "labels": [l["name"] for l in issue_data.get("labels", [])], "comments": comments_list, diff --git a/cli/main.py b/cli/main.py index 930cd3b..b51b5cd 100644 --- a/cli/main.py +++ b/cli/main.py @@ -19,6 +19,7 @@ import sys from rich.console import Console +from rich.markup import escape as rich_escape from rich.panel import Panel from rich.markdown import Markdown @@ -42,12 +43,12 @@ def print_step(step_num: int, reasoning: str, code: str): def print_info(message: str): """Print an info message.""" - console.print(f"[dim]{message}[/dim]") + console.print(f"[dim]{rich_escape(message)}[/dim]") def print_error(message: str): """Print an error message.""" - console.print(f"[red]Error: {message}[/red]") + console.print(f"[red]Error: {rich_escape(message)}[/red]") async def run_review( @@ -103,7 +104,7 @@ async def run_review( if output_format == "markdown": console.print(Panel(Markdown(output), title="Review", border_style="green")) else: - console.print(Panel(output, title="Review", border_style="green")) + console.print(Panel(rich_escape(output), title="Review", border_style="green")) def main(): diff --git a/cli/virtual_runner.py b/cli/virtual_runner.py index b9ad29e..caca7ef 100644 --- a/cli/virtual_runner.py +++ b/cli/virtual_runner.py @@ -60,7 +60,8 @@ def _ensure_configured(self): # Configure DSPy with specified model model_name = self.model - if not model_name.startswith("gemini/"): + # Only add gemini/ prefix if no provider prefix exists (no / in name) + if "/" not in model_name: model_name = f"gemini/{model_name}" dspy.configure(lm=dspy.LM(model_name)) @@ -76,7 +77,7 @@ def _ensure_configured(self): signature="context, question -> answer, sources", max_iterations=MAX_ITERATIONS, max_llm_calls=MAX_LLM_CALLS, - sub_lm=dspy.LM(f"gemini/{SUB_MODEL}" if not SUB_MODEL.startswith("gemini/") else SUB_MODEL), + sub_lm=dspy.LM(SUB_MODEL if "/" in SUB_MODEL else f"gemini/{SUB_MODEL}"), verbose=not self.quiet, interpreter=interpreter, ) diff --git a/cr/github.py b/cr/github.py index d7822dc..9ea45ba 100644 --- a/cr/github.py +++ b/cr/github.py @@ -13,15 +13,22 @@ from .diff_types import PRInfo, FileContents, DiffFileContext +def _get_login(user: dict) -> str: + """Get user login, falling back to username (Gitea compat).""" + return user.get("login") or user.get("username") or "unknown" + + # In-memory store for loaded PRs (MVP - no persistence) _pr_cache: dict[str, PRInfo] = {} def parse_pr_url(url: str) -> tuple[str, str, int]: - """Parse a GitHub PR URL into (owner, repo, number). + """Parse a PR/MR URL into (owner, repo, number). + + Supports GitHub, Gitea, GitLab, and Bitbucket URL formats. Args: - url: GitHub PR URL like https://github.com/owner/repo/pull/123 + url: PR/MR URL (e.g. /pull/1, /pulls/1, /-/merge_requests/1, /pull-requests/1) Returns: Tuple of (owner, repo, pr_number) @@ -29,10 +36,14 @@ def parse_pr_url(url: str) -> tuple[str, str, int]: Raises: ValueError: If URL format is invalid """ - pattern = r"github\.com/([^/]+)/([^/]+)/pull/(\d+)" + # GitHub /pull/, Gitea /pulls/, GitLab /-/merge_requests/, Bitbucket /pull-requests/ + pattern = r"^https?://[^/]+/([^/]+)/([^/]+)/(?:-/)?(?:pulls?|merge_requests|pull-requests)/(\d+)(?:[/?#].*)?$" match = re.search(pattern, url) if not match: - raise ValueError(f"Invalid GitHub PR URL: {url}") + raise ValueError( + f"Invalid PR URL: {url}\n" + "Expected: https://host/owner/repo/{pull,pulls,merge_requests,pull-requests}/123" + ) return match.group(1), match.group(2), int(match.group(3)) @@ -96,10 +107,10 @@ async def load_pr(pr_url: str) -> PRInfo: "author": { "name": c["commit"]["author"]["name"], "date": c["commit"]["author"]["date"], - "login": c["author"]["login"] if c.get("author") else None, - "avatar_url": c["author"]["avatar_url"] if c.get("author") else None, + "login": _get_login(c["author"]) if c.get("author") else None, + "avatar_url": c["author"].get("avatar_url", "") if c.get("author") else None, }, - "html_url": c["html_url"], + "html_url": c.get("html_url", ""), } for c in commits_data ] @@ -118,12 +129,12 @@ async def load_pr(pr_url: str) -> PRInfo: { "id": c["id"], "user": { - "login": c["user"]["login"], - "avatar_url": c["user"]["avatar_url"], + "login": _get_login(c["user"]), + "avatar_url": c["user"].get("avatar_url", ""), }, "body": c["body"], "created_at": c["created_at"], - "html_url": c["html_url"], + "html_url": c.get("html_url", ""), } for c in comments_data ] @@ -146,15 +157,15 @@ async def load_pr(pr_url: str) -> PRInfo: number=number, title=pr_data.get("title", ""), body=pr_data.get("body") or "", - base_sha=pr_data["base"]["sha"], - head_sha=pr_data["head"]["sha"], + base_sha=pr_data.get("base", {}).get("sha", "HEAD"), + head_sha=pr_data.get("head", {}).get("sha", "HEAD"), files=files, created_at=datetime.now(), - user={"login": pr_data["user"]["login"], "avatar_url": pr_data["user"]["avatar_url"]}, + user={"login": _get_login(pr_data["user"]), "avatar_url": pr_data["user"].get("avatar_url", "")}, state=pr_data.get("state", "open"), draft=pr_data.get("draft", False), - head_ref=pr_data["head"]["ref"], - base_ref=pr_data["base"]["ref"], + head_ref=pr_data.get("head", {}).get("ref", "unknown"), + base_ref=pr_data.get("base", {}).get("ref", "main"), commits=pr_data.get("commits", 0), additions=pr_data.get("additions", 0), deletions=pr_data.get("deletions", 0), diff --git a/cr/render.py b/cr/render.py index 10f62c1..f1b2f15 100644 --- a/cr/render.py +++ b/cr/render.py @@ -2,6 +2,7 @@ from rich.console import Console from rich.markdown import Markdown +from rich.markup import escape as rich_escape from rich.panel import Panel from rich.syntax import Syntax from rich.table import Table @@ -112,12 +113,12 @@ def print_help(): def print_error(message: str): """Print an error message.""" - console.print(f"\n[red]Error: {message}[/red]") + console.print(f"\n[red]Error: {rich_escape(message)}[/red]") def print_info(message: str): """Print an info message.""" - console.print(f"[dim]{message}[/dim]") + console.print(f"[dim]{rich_escape(message)}[/dim]") def print_history(history: list[tuple[str, str]]): @@ -126,10 +127,10 @@ def print_history(history: list[tuple[str, str]]): console.print("[dim]No history yet.[/dim]") else: for i, (q, a) in enumerate(history, 1): - console.print(f"\n[cyan]Q{i}:[/cyan] {q}") + console.print(f"\n[cyan]Q{i}:[/cyan] {rich_escape(q)}") # Truncate long answers display_a = a[:200] + "..." if len(a) > 200 else a - console.print(f"[green]A{i}:[/green] {display_a}") + console.print(f"[green]A{i}:[/green] {rich_escape(display_a)}") console.print() diff --git a/npx/python/cli/github_fetcher.py b/npx/python/cli/github_fetcher.py index 66e386f..9dc7305 100644 --- a/npx/python/cli/github_fetcher.py +++ b/npx/python/cli/github_fetcher.py @@ -12,11 +12,16 @@ UrlType = Literal["pr", "issue"] +def _get_login(user: dict) -> str: + """Get user login, falling back to username (Gitea compat).""" + return user.get("login") or user.get("username") or "unknown" + + def parse_github_url(url: str) -> tuple[str, str, int, UrlType]: - """Parse a GitHub URL into (owner, repo, number, type). + """Parse a GitHub/Gitea/GitLab/Bitbucket URL into (owner, repo, number, type). Args: - url: GitHub Issue or PR URL + url: Issue/PR/MR URL from any supported forge Returns: Tuple of (owner, repo, number, type) @@ -25,26 +30,38 @@ def parse_github_url(url: str) -> tuple[str, str, int, UrlType]: ValueError: If URL format is invalid Examples: - >>> parse_github_url("https://github.com/vercel-labs/json-render/pull/35") - ('vercel-labs', 'json-render', 35, 'pr') - >>> parse_github_url("https://github.com/AsyncFuncAI/AsyncReview/issues/1") - ('AsyncFuncAI', 'AsyncReview', 1, 'issue') + >>> parse_github_url("https://github.com/owner/repo/pull/35") + ('owner', 'repo', 35, 'pr') + >>> parse_github_url("https://gitea.example.com/org/repo/pulls/42") + ('org', 'repo', 42, 'pr') + >>> parse_github_url("https://gitlab.com/org/repo/-/merge_requests/10") + ('org', 'repo', 10, 'pr') + >>> parse_github_url("https://bitbucket.org/org/repo/pull-requests/5") + ('org', 'repo', 5, 'pr') + >>> parse_github_url("https://github.com/owner/repo/issues/1") + ('owner', 'repo', 1, 'issue') """ - # Try PR URL first (primary use case) - pr_pattern = r"github\.com/([^/]+)/([^/]+)/pull/(\d+)" + # PR patterns — accept any domain, support multiple forge URL formats: + # GitHub: /owner/repo/pull/123 + # Gitea: /owner/repo/pulls/123 + # GitLab: /owner/repo/-/merge_requests/123 + # Bitbucket: /owner/repo/pull-requests/123 + pr_pattern = r"^https?://[^/]+/([^/]+)/([^/]+)/(?:-/)?(?:pulls?|merge_requests|pull-requests)/(\d+)(?:[/?#].*)?$" pr_match = re.search(pr_pattern, url) if pr_match: return pr_match.group(1), pr_match.group(2), int(pr_match.group(3)), "pr" - - # Try Issue URL - issue_pattern = r"github\.com/([^/]+)/([^/]+)/issues/(\d+)" + + # Issue patterns — GitHub/Gitea/GitLab + # GitLab: /owner/repo/-/issues/123 + issue_pattern = r"^https?://[^/]+/([^/]+)/([^/]+)/(?:-/)?issues/(\d+)(?:[/?#].*)?$" issue_match = re.search(issue_pattern, url) if issue_match: return issue_match.group(1), issue_match.group(2), int(issue_match.group(3)), "issue" - + raise ValueError( - f"Invalid GitHub URL: {url}\n" - "Expected format: https://github.com/owner/repo/pull/123 or .../issues/123" + f"Invalid URL: {url}\n" + "Expected: https://host/owner/repo/{pull,pulls,merge_requests,pull-requests}/123\n" + " or: https://host/owner/repo/issues/123" ) @@ -119,12 +136,12 @@ async def fetch_pr(owner: str, repo: str, number: int) -> dict: comments_data = comments_resp.json() comments_list = [ { - "author": c["user"]["login"], + "author": _get_login(c["user"]), "body": c["body"], } for c in comments_data ] - + # Build structured result files = [ { @@ -144,11 +161,11 @@ async def fetch_pr(owner: str, repo: str, number: int) -> dict: "number": number, "title": pr_data.get("title", ""), "body": pr_data.get("body") or "", - "author": pr_data["user"]["login"], + "author": _get_login(pr_data["user"]), "state": pr_data.get("state", "open"), - "base_branch": pr_data["base"]["ref"], - "head_branch": pr_data["head"]["ref"], - "head_sha": pr_data["head"]["sha"], # For consistent file reads + "base_branch": pr_data.get("base", {}).get("ref", "main"), + "head_branch": pr_data.get("head", {}).get("ref", "unknown"), + "head_sha": pr_data.get("head", {}).get("sha", "HEAD"), # For consistent file reads "files": files, "commits": commits_list, "comments": comments_list, @@ -182,12 +199,12 @@ async def fetch_issue(owner: str, repo: str, number: int) -> dict: comments_data = comments_resp.json() comments_list = [ { - "author": c["user"]["login"], + "author": _get_login(c["user"]), "body": c["body"], } for c in comments_data ] - + return { "type": "issue", "owner": owner, @@ -195,7 +212,7 @@ async def fetch_issue(owner: str, repo: str, number: int) -> dict: "number": number, "title": issue_data.get("title", ""), "body": issue_data.get("body") or "", - "author": issue_data["user"]["login"], + "author": _get_login(issue_data["user"]), "state": issue_data.get("state", "open"), "labels": [l["name"] for l in issue_data.get("labels", [])], "comments": comments_list, diff --git a/npx/python/cli/main.py b/npx/python/cli/main.py index ff3761c..d00e1bf 100644 --- a/npx/python/cli/main.py +++ b/npx/python/cli/main.py @@ -19,6 +19,7 @@ import sys from rich.console import Console +from rich.markup import escape as rich_escape from rich.panel import Panel from rich.markdown import Markdown @@ -41,7 +42,7 @@ def print_step(step_num: int, reasoning: str, code: str, output: str = ""): # Show reasoning if reasoning: reasoning_display = reasoning[:800] + "..." if len(reasoning) > 800 else reasoning - console.print(f"[dim]💭 {reasoning_display}[/dim]") + console.print(f"[dim]💭 {rich_escape(reasoning_display)}[/dim]") # Show executed code with syntax highlighting if code and code.strip(): @@ -53,17 +54,17 @@ def print_step(step_num: int, reasoning: str, code: str, output: str = ""): # Show output (truncated) if output and output.strip(): output_display = output[:500] + "..." if len(output) > 500 else output - console.print(f"\n[green]📤 Output:[/green] [dim]{output_display}[/dim]") + console.print(f"\n[green]📤 Output:[/green] [dim]{rich_escape(output_display)}[/dim]") def print_info(message: str): """Print an info message.""" - console.print(f"[dim]{message}[/dim]") + console.print(f"[dim]{rich_escape(message)}[/dim]") def print_error(message: str): """Print an error message.""" - console.print(f"[red]Error: {message}[/red]") + console.print(f"[red]Error: {rich_escape(message)}[/red]") async def run_review( @@ -135,7 +136,7 @@ async def run_review( if output_format == "markdown": console.print(Panel(Markdown(output), title="Review", border_style="green")) else: - console.print(Panel(output, title="Review", border_style="green")) + console.print(Panel(rich_escape(output), title="Review", border_style="green")) # Submit as GitHub comment if requested if submit: diff --git a/npx/python/cli/repo_tools.py b/npx/python/cli/repo_tools.py index 8a487d5..35e44ab 100644 --- a/npx/python/cli/repo_tools.py +++ b/npx/python/cli/repo_tools.py @@ -320,7 +320,13 @@ async def search_code(self, query: str) -> list[dict[str, Any]]: data = resp.json() results = [] - for item in data.get("items", []): + # GitHub uses "items", Gitea may use "data" — handle both + items = data.get("items") or data.get("data") or [] + if not items: + # Could mean: no matches, search disabled, or API incompatibility + total = data.get("total_count", data.get("total", "?")) + print(f"[DEBUG-SEARCH] No results (total_count={total}). Search may be disabled on this instance.") + for item in items: entry = {"path": item.get("path", "")} # Extract fragment from text_matches if available text_matches = item.get("text_matches", []) diff --git a/npx/python/cli/virtual_runner.py b/npx/python/cli/virtual_runner.py index ed2ec0f..4cdfdc5 100644 --- a/npx/python/cli/virtual_runner.py +++ b/npx/python/cli/virtual_runner.py @@ -161,17 +161,18 @@ def _ensure_configured(self): # Configure DSPy with specified model (cache=False to prevent disk caching) model_name = self.model - if not model_name.startswith("gemini/"): + # Only add gemini/ prefix if no provider prefix exists (no / in name) + if "/" not in model_name: model_name = f"gemini/{model_name}" - + self._lm = dspy.LM(model_name, cache=False) - + # Create RLM with custom interpreter that has Deno 2.x fix deno_command = build_deno_command() interpreter = PythonInterpreter(deno_command=deno_command) - - # Standard signature - sub_model = f"gemini/{SUB_MODEL}" if not SUB_MODEL.startswith("gemini/") else SUB_MODEL + + # Standard signature - same prefix logic for sub model + sub_model = SUB_MODEL if "/" in SUB_MODEL else f"gemini/{SUB_MODEL}" self._rlm = dspy.RLM( signature="context, question -> answer, sources", max_iterations=MAX_ITERATIONS, diff --git a/npx/python/cr/github.py b/npx/python/cr/github.py index d7822dc..9ea45ba 100644 --- a/npx/python/cr/github.py +++ b/npx/python/cr/github.py @@ -13,15 +13,22 @@ from .diff_types import PRInfo, FileContents, DiffFileContext +def _get_login(user: dict) -> str: + """Get user login, falling back to username (Gitea compat).""" + return user.get("login") or user.get("username") or "unknown" + + # In-memory store for loaded PRs (MVP - no persistence) _pr_cache: dict[str, PRInfo] = {} def parse_pr_url(url: str) -> tuple[str, str, int]: - """Parse a GitHub PR URL into (owner, repo, number). + """Parse a PR/MR URL into (owner, repo, number). + + Supports GitHub, Gitea, GitLab, and Bitbucket URL formats. Args: - url: GitHub PR URL like https://github.com/owner/repo/pull/123 + url: PR/MR URL (e.g. /pull/1, /pulls/1, /-/merge_requests/1, /pull-requests/1) Returns: Tuple of (owner, repo, pr_number) @@ -29,10 +36,14 @@ def parse_pr_url(url: str) -> tuple[str, str, int]: Raises: ValueError: If URL format is invalid """ - pattern = r"github\.com/([^/]+)/([^/]+)/pull/(\d+)" + # GitHub /pull/, Gitea /pulls/, GitLab /-/merge_requests/, Bitbucket /pull-requests/ + pattern = r"^https?://[^/]+/([^/]+)/([^/]+)/(?:-/)?(?:pulls?|merge_requests|pull-requests)/(\d+)(?:[/?#].*)?$" match = re.search(pattern, url) if not match: - raise ValueError(f"Invalid GitHub PR URL: {url}") + raise ValueError( + f"Invalid PR URL: {url}\n" + "Expected: https://host/owner/repo/{pull,pulls,merge_requests,pull-requests}/123" + ) return match.group(1), match.group(2), int(match.group(3)) @@ -96,10 +107,10 @@ async def load_pr(pr_url: str) -> PRInfo: "author": { "name": c["commit"]["author"]["name"], "date": c["commit"]["author"]["date"], - "login": c["author"]["login"] if c.get("author") else None, - "avatar_url": c["author"]["avatar_url"] if c.get("author") else None, + "login": _get_login(c["author"]) if c.get("author") else None, + "avatar_url": c["author"].get("avatar_url", "") if c.get("author") else None, }, - "html_url": c["html_url"], + "html_url": c.get("html_url", ""), } for c in commits_data ] @@ -118,12 +129,12 @@ async def load_pr(pr_url: str) -> PRInfo: { "id": c["id"], "user": { - "login": c["user"]["login"], - "avatar_url": c["user"]["avatar_url"], + "login": _get_login(c["user"]), + "avatar_url": c["user"].get("avatar_url", ""), }, "body": c["body"], "created_at": c["created_at"], - "html_url": c["html_url"], + "html_url": c.get("html_url", ""), } for c in comments_data ] @@ -146,15 +157,15 @@ async def load_pr(pr_url: str) -> PRInfo: number=number, title=pr_data.get("title", ""), body=pr_data.get("body") or "", - base_sha=pr_data["base"]["sha"], - head_sha=pr_data["head"]["sha"], + base_sha=pr_data.get("base", {}).get("sha", "HEAD"), + head_sha=pr_data.get("head", {}).get("sha", "HEAD"), files=files, created_at=datetime.now(), - user={"login": pr_data["user"]["login"], "avatar_url": pr_data["user"]["avatar_url"]}, + user={"login": _get_login(pr_data["user"]), "avatar_url": pr_data["user"].get("avatar_url", "")}, state=pr_data.get("state", "open"), draft=pr_data.get("draft", False), - head_ref=pr_data["head"]["ref"], - base_ref=pr_data["base"]["ref"], + head_ref=pr_data.get("head", {}).get("ref", "unknown"), + base_ref=pr_data.get("base", {}).get("ref", "main"), commits=pr_data.get("commits", 0), additions=pr_data.get("additions", 0), deletions=pr_data.get("deletions", 0), diff --git a/tests/test_url_parsing.py b/tests/test_url_parsing.py new file mode 100644 index 0000000..c6067b6 --- /dev/null +++ b/tests/test_url_parsing.py @@ -0,0 +1,64 @@ +"""Regression tests for PR/issue URL parsing across forges.""" + +import pytest +from cli.github_fetcher import parse_github_url +from cr.github import parse_pr_url + + +# --- parse_github_url (cli) --- + +@pytest.mark.parametrize("url,expected", [ + # GitHub + ("https://github.com/owner/repo/pull/35", ("owner", "repo", 35, "pr")), + ("https://github.com/owner/repo/pull/1?tab=files", ("owner", "repo", 1, "pr")), + ("https://github.com/owner/repo/pull/1/", ("owner", "repo", 1, "pr")), + # Gitea + ("https://gitea.example.com/org/repo/pulls/42", ("org", "repo", 42, "pr")), + ("https://gitea.example.com/org/my-project/pulls/1#issuecomment-20", ("org", "my-project", 1, "pr")), + # GitLab + ("https://gitlab.com/org/repo/-/merge_requests/10", ("org", "repo", 10, "pr")), + ("https://gitlab.com/org/repo/-/merge_requests/10#note_123", ("org", "repo", 10, "pr")), + # Bitbucket + ("https://bitbucket.org/org/repo/pull-requests/5", ("org", "repo", 5, "pr")), + ("https://bitbucket.org/org/repo/pull-requests/5?t=1", ("org", "repo", 5, "pr")), + # Issues + ("https://github.com/owner/repo/issues/1", ("owner", "repo", 1, "issue")), + ("https://gitea.example.com/org/repo/issues/99", ("org", "repo", 99, "issue")), + ("https://gitlab.com/org/repo/-/issues/7", ("org", "repo", 7, "issue")), +]) +def test_parse_github_url(url, expected): + assert parse_github_url(url) == expected + + +@pytest.mark.parametrize("url", [ + "https://github.com/owner/repo", + "https://github.com/owner/repo/tree/main", + "not-a-url", + "", +]) +def test_parse_github_url_invalid(url): + with pytest.raises(ValueError): + parse_github_url(url) + + +# --- parse_pr_url (cr) --- + +@pytest.mark.parametrize("url,expected", [ + ("https://github.com/o/r/pull/1", ("o", "r", 1)), + ("https://gitea.example.com/org/repo/pulls/42", ("org", "repo", 42)), + ("https://gitlab.com/org/repo/-/merge_requests/10", ("org", "repo", 10)), + ("https://bitbucket.org/org/repo/pull-requests/5", ("org", "repo", 5)), + ("https://github.com/o/r/pull/1?tab=files#diff", ("o", "r", 1)), +]) +def test_parse_pr_url(url, expected): + assert parse_pr_url(url) == expected + + +@pytest.mark.parametrize("url", [ + "https://github.com/owner/repo/issues/1", + "https://github.com/owner/repo", + "garbage", +]) +def test_parse_pr_url_invalid(url): + with pytest.raises(ValueError): + parse_pr_url(url)