From af21811bb331f103d257cab01bad96b8fffab325 Mon Sep 17 00:00:00 2001
From: Gloria <gloria@sylo.top>
Date: Mon, 16 Feb 2026 11:59:10 +0000
Subject: [PATCH] feat: multi-forge support (GitHub, Gitea, GitLab, Bitbucket)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for multiple Git forge URL formats in PR/issue parsing:
- GitHub: /owner/repo/pull/123
- Gitea: /owner/repo/pulls/123
- GitLab: /owner/repo/-/merge_requests/123
- Bitbucket: /owner/repo/pull-requests/123

Also includes:
- Defensive .get() chains for API response fields that differ across
  forges (user.login vs user.username, optional avatar_url, html_url)
- _get_login() helper with fallback for Gitea's 'username' field
- Rich markup escaping for all user-generated console output
- Gitea search API compatibility (data vs items response keys)
- Provider-prefixed model passthrough (e.g. openai/gpt-4) instead of
  forcing gemini/ prefix — enables use with LiteLLM and other proxies
- 24 regression tests covering all forge URL formats

Closes AsyncFuncAI/AsyncReview#8
---
 cli/github_fetcher.py            | 57 ++++++++++++++++++----------
 cli/main.py                      |  7 ++--
 cli/virtual_runner.py            |  5 ++-
 cr/github.py                     | 41 ++++++++++++--------
 cr/render.py                     |  9 +++--
 npx/python/cli/github_fetcher.py | 63 +++++++++++++++++++------------
 npx/python/cli/main.py           | 11 +++---
 npx/python/cli/repo_tools.py     |  8 +++-
 npx/python/cli/virtual_runner.py | 13 ++++---
 npx/python/cr/github.py          | 41 ++++++++++++--------
 tests/test_url_parsing.py        | 64 ++++++++++++++++++++++++++++++++
 11 files changed, 225 insertions(+), 94 deletions(-)
 create mode 100644 tests/test_url_parsing.py

diff --git a/cli/github_fetcher.py b/cli/github_fetcher.py
index a9a9555..385f2dc 100644
--- a/cli/github_fetcher.py
+++ b/cli/github_fetcher.py
@@ -12,11 +12,16 @@
 UrlType = Literal["pr", "issue"]
 
 
+def _get_login(user: dict) -> str:
+    """Get user login, falling back to username (Gitea compat)."""
+    return user.get("login") or user.get("username") or "unknown"
+
+
 def parse_github_url(url: str) -> tuple[str, str, int, UrlType]:
-    """Parse a GitHub URL into (owner, repo, number, type).
+    """Parse a GitHub/Gitea/GitLab/Bitbucket URL into (owner, repo, number, type).
     
     Args:
-        url: GitHub Issue or PR URL
+        url: Issue/PR/MR URL from any supported forge
         
     Returns:
         Tuple of (owner, repo, number, type)
@@ -25,26 +30,38 @@ def parse_github_url(url: str) -> tuple[str, str, int, UrlType]:
         ValueError: If URL format is invalid
         
     Examples:
-        >>> parse_github_url("https://github.com/vercel-labs/json-render/pull/35")
-        ('vercel-labs', 'json-render', 35, 'pr')
-        >>> parse_github_url("https://github.com/AsyncFuncAI/AsyncReview/issues/1")
-        ('AsyncFuncAI', 'AsyncReview', 1, 'issue')
+        >>> parse_github_url("https://github.com/owner/repo/pull/35")
+        ('owner', 'repo', 35, 'pr')
+        >>> parse_github_url("https://gitea.example.com/org/repo/pulls/42")
+        ('org', 'repo', 42, 'pr')
+        >>> parse_github_url("https://gitlab.com/org/repo/-/merge_requests/10")
+        ('org', 'repo', 10, 'pr')
+        >>> parse_github_url("https://bitbucket.org/org/repo/pull-requests/5")
+        ('org', 'repo', 5, 'pr')
+        >>> parse_github_url("https://github.com/owner/repo/issues/1")
+        ('owner', 'repo', 1, 'issue')
     """
-    # Try PR URL first (primary use case)
-    pr_pattern = r"github\.com/([^/]+)/([^/]+)/pull/(\d+)"
+    # PR patterns — accept any domain, support multiple forge URL formats:
+    #   GitHub:    /owner/repo/pull/123
+    #   Gitea:     /owner/repo/pulls/123
+    #   GitLab:    /owner/repo/-/merge_requests/123
+    #   Bitbucket: /owner/repo/pull-requests/123
+    pr_pattern = r"^https?://[^/]+/([^/]+)/([^/]+)/(?:-/)?(?:pulls?|merge_requests|pull-requests)/(\d+)(?:[/?#].*)?$"
     pr_match = re.search(pr_pattern, url)
     if pr_match:
         return pr_match.group(1), pr_match.group(2), int(pr_match.group(3)), "pr"
-    
-    # Try Issue URL
-    issue_pattern = r"github\.com/([^/]+)/([^/]+)/issues/(\d+)"
+
+    # Issue patterns — GitHub/Gitea/GitLab
+    #   GitLab:    /owner/repo/-/issues/123
+    issue_pattern = r"^https?://[^/]+/([^/]+)/([^/]+)/(?:-/)?issues/(\d+)(?:[/?#].*)?$"
     issue_match = re.search(issue_pattern, url)
     if issue_match:
         return issue_match.group(1), issue_match.group(2), int(issue_match.group(3)), "issue"
-    
+
     raise ValueError(
-        f"Invalid GitHub URL: {url}\n"
-        "Expected format: https://github.com/owner/repo/pull/123 or .../issues/123"
+        f"Invalid URL: {url}\n"
+        "Expected: https://host/owner/repo/{pull,pulls,merge_requests,pull-requests}/123\n"
+        "     or:  https://host/owner/repo/issues/123"
     )
 
 
@@ -119,7 +136,7 @@ async def fetch_pr(owner: str, repo: str, number: int) -> dict:
             comments_data = comments_resp.json()
             comments_list = [
                 {
-                    "author": c["user"]["login"],
+                    "author": _get_login(c["user"]),
                     "body": c["body"],
                 }
                 for c in comments_data
@@ -144,10 +161,10 @@ async def fetch_pr(owner: str, repo: str, number: int) -> dict:
         "number": number,
         "title": pr_data.get("title", ""),
         "body": pr_data.get("body") or "",
-        "author": pr_data["user"]["login"],
+        "author": _get_login(pr_data["user"]),
         "state": pr_data.get("state", "open"),
-        "base_branch": pr_data["base"]["ref"],
-        "head_branch": pr_data["head"]["ref"],
+        "base_branch": pr_data.get("base", {}).get("ref", "main"),
+        "head_branch": pr_data.get("head", {}).get("ref", "unknown"),
         "files": files,
         "commits": commits_list,
         "comments": comments_list,
@@ -181,7 +198,7 @@ async def fetch_issue(owner: str, repo: str, number: int) -> dict:
             comments_data = comments_resp.json()
             comments_list = [
                 {
-                    "author": c["user"]["login"],
+                    "author": _get_login(c["user"]),
                     "body": c["body"],
                 }
                 for c in comments_data
@@ -194,7 +211,7 @@ async def fetch_issue(owner: str, repo: str, number: int) -> dict:
         "number": number,
         "title": issue_data.get("title", ""),
         "body": issue_data.get("body") or "",
-        "author": issue_data["user"]["login"],
+        "author": _get_login(issue_data["user"]),
         "state": issue_data.get("state", "open"),
         "labels": [l["name"] for l in issue_data.get("labels", [])],
         "comments": comments_list,
diff --git a/cli/main.py b/cli/main.py
index 930cd3b..b51b5cd 100644
--- a/cli/main.py
+++ b/cli/main.py
@@ -19,6 +19,7 @@
 import sys
 
 from rich.console import Console
+from rich.markup import escape as rich_escape
 from rich.panel import Panel
 from rich.markdown import Markdown
 
@@ -42,12 +43,12 @@ def print_step(step_num: int, reasoning: str, code: str):
 
 def print_info(message: str):
     """Print an info message."""
-    console.print(f"[dim]{message}[/dim]")
+    console.print(f"[dim]{rich_escape(message)}[/dim]")
 
 
 def print_error(message: str):
     """Print an error message."""
-    console.print(f"[red]Error: {message}[/red]")
+    console.print(f"[red]Error: {rich_escape(message)}[/red]")
 
 
 async def run_review(
@@ -103,7 +104,7 @@ async def run_review(
         if output_format == "markdown":
             console.print(Panel(Markdown(output), title="Review", border_style="green"))
         else:
-            console.print(Panel(output, title="Review", border_style="green"))
+            console.print(Panel(rich_escape(output), title="Review", border_style="green"))
 
 
 def main():
diff --git a/cli/virtual_runner.py b/cli/virtual_runner.py
index b9ad29e..caca7ef 100644
--- a/cli/virtual_runner.py
+++ b/cli/virtual_runner.py
@@ -60,7 +60,8 @@ def _ensure_configured(self):
         
         # Configure DSPy with specified model
         model_name = self.model
-        if not model_name.startswith("gemini/"):
+        # Only add gemini/ prefix if no provider prefix exists (no / in name)
+        if "/" not in model_name:
             model_name = f"gemini/{model_name}"
         
         dspy.configure(lm=dspy.LM(model_name))
@@ -76,7 +77,7 @@ def _ensure_configured(self):
             signature="context, question -> answer, sources",
             max_iterations=MAX_ITERATIONS,
             max_llm_calls=MAX_LLM_CALLS,
-            sub_lm=dspy.LM(f"gemini/{SUB_MODEL}" if not SUB_MODEL.startswith("gemini/") else SUB_MODEL),
+            sub_lm=dspy.LM(SUB_MODEL if "/" in SUB_MODEL else f"gemini/{SUB_MODEL}"),
             verbose=not self.quiet,
             interpreter=interpreter,
         )
diff --git a/cr/github.py b/cr/github.py
index d7822dc..9ea45ba 100644
--- a/cr/github.py
+++ b/cr/github.py
@@ -13,15 +13,22 @@
 from .diff_types import PRInfo, FileContents, DiffFileContext
 
 
+def _get_login(user: dict) -> str:
+    """Get user login, falling back to username (Gitea compat)."""
+    return user.get("login") or user.get("username") or "unknown"
+
+
 # In-memory store for loaded PRs (MVP - no persistence)
 _pr_cache: dict[str, PRInfo] = {}
 
 
 def parse_pr_url(url: str) -> tuple[str, str, int]:
-    """Parse a GitHub PR URL into (owner, repo, number).
+    """Parse a PR/MR URL into (owner, repo, number).
+    
+    Supports GitHub, Gitea, GitLab, and Bitbucket URL formats.
     
     Args:
-        url: GitHub PR URL like https://github.com/owner/repo/pull/123
+        url: PR/MR URL (e.g. /pull/1, /pulls/1, /-/merge_requests/1, /pull-requests/1)
         
     Returns:
         Tuple of (owner, repo, pr_number)
@@ -29,10 +36,14 @@ def parse_pr_url(url: str) -> tuple[str, str, int]:
     Raises:
         ValueError: If URL format is invalid
     """
-    pattern = r"github\.com/([^/]+)/([^/]+)/pull/(\d+)"
+    # GitHub /pull/, Gitea /pulls/, GitLab /-/merge_requests/, Bitbucket /pull-requests/
+    pattern = r"^https?://[^/]+/([^/]+)/([^/]+)/(?:-/)?(?:pulls?|merge_requests|pull-requests)/(\d+)(?:[/?#].*)?$"
     match = re.search(pattern, url)
     if not match:
-        raise ValueError(f"Invalid GitHub PR URL: {url}")
+        raise ValueError(
+            f"Invalid PR URL: {url}\n"
+            "Expected: https://host/owner/repo/{pull,pulls,merge_requests,pull-requests}/123"
+        )
     return match.group(1), match.group(2), int(match.group(3))
 
 
@@ -96,10 +107,10 @@ async def load_pr(pr_url: str) -> PRInfo:
                     "author": {
                         "name": c["commit"]["author"]["name"],
                         "date": c["commit"]["author"]["date"],
-                        "login": c["author"]["login"] if c.get("author") else None,
-                        "avatar_url": c["author"]["avatar_url"] if c.get("author") else None,
+                        "login": _get_login(c["author"]) if c.get("author") else None,
+                        "avatar_url": c["author"].get("avatar_url", "") if c.get("author") else None,
                     },
-                    "html_url": c["html_url"],
+                    "html_url": c.get("html_url", ""),
                 }
                 for c in commits_data
             ]
@@ -118,12 +129,12 @@ async def load_pr(pr_url: str) -> PRInfo:
                  {
                      "id": c["id"],
                      "user": {
-                         "login": c["user"]["login"],
-                         "avatar_url": c["user"]["avatar_url"],
+                         "login": _get_login(c["user"]),
+                         "avatar_url": c["user"].get("avatar_url", ""),
                      },
                      "body": c["body"],
                      "created_at": c["created_at"],
-                     "html_url": c["html_url"],
+                     "html_url": c.get("html_url", ""),
                  }
                  for c in comments_data
              ]
@@ -146,15 +157,15 @@ async def load_pr(pr_url: str) -> PRInfo:
         number=number,
         title=pr_data.get("title", ""),
         body=pr_data.get("body") or "",
-        base_sha=pr_data["base"]["sha"],
-        head_sha=pr_data["head"]["sha"],
+        base_sha=pr_data.get("base", {}).get("sha", "HEAD"),
+        head_sha=pr_data.get("head", {}).get("sha", "HEAD"),
         files=files,
         created_at=datetime.now(),
-        user={"login": pr_data["user"]["login"], "avatar_url": pr_data["user"]["avatar_url"]},
+        user={"login": _get_login(pr_data["user"]), "avatar_url": pr_data["user"].get("avatar_url", "")},
         state=pr_data.get("state", "open"),
         draft=pr_data.get("draft", False),
-        head_ref=pr_data["head"]["ref"],
-        base_ref=pr_data["base"]["ref"],
+        head_ref=pr_data.get("head", {}).get("ref", "unknown"),
+        base_ref=pr_data.get("base", {}).get("ref", "main"),
         commits=pr_data.get("commits", 0),
         additions=pr_data.get("additions", 0),
         deletions=pr_data.get("deletions", 0),
diff --git a/cr/render.py b/cr/render.py
index 10f62c1..f1b2f15 100644
--- a/cr/render.py
+++ b/cr/render.py
@@ -2,6 +2,7 @@
 
 from rich.console import Console
 from rich.markdown import Markdown
+from rich.markup import escape as rich_escape
 from rich.panel import Panel
 from rich.syntax import Syntax
 from rich.table import Table
@@ -112,12 +113,12 @@ def print_help():
 
 def print_error(message: str):
     """Print an error message."""
-    console.print(f"\n[red]Error: {message}[/red]")
+    console.print(f"\n[red]Error: {rich_escape(message)}[/red]")
 
 
 def print_info(message: str):
     """Print an info message."""
-    console.print(f"[dim]{message}[/dim]")
+    console.print(f"[dim]{rich_escape(message)}[/dim]")
 
 
 def print_history(history: list[tuple[str, str]]):
@@ -126,10 +127,10 @@ def print_history(history: list[tuple[str, str]]):
         console.print("[dim]No history yet.[/dim]")
     else:
         for i, (q, a) in enumerate(history, 1):
-            console.print(f"\n[cyan]Q{i}:[/cyan] {q}")
+            console.print(f"\n[cyan]Q{i}:[/cyan] {rich_escape(q)}")
             # Truncate long answers
             display_a = a[:200] + "..." if len(a) > 200 else a
-            console.print(f"[green]A{i}:[/green] {display_a}")
+            console.print(f"[green]A{i}:[/green] {rich_escape(display_a)}")
     console.print()
 
 
diff --git a/npx/python/cli/github_fetcher.py b/npx/python/cli/github_fetcher.py
index 66e386f..9dc7305 100644
--- a/npx/python/cli/github_fetcher.py
+++ b/npx/python/cli/github_fetcher.py
@@ -12,11 +12,16 @@
 UrlType = Literal["pr", "issue"]
 
 
+def _get_login(user: dict) -> str:
+    """Get user login, falling back to username (Gitea compat)."""
+    return user.get("login") or user.get("username") or "unknown"
+
+
 def parse_github_url(url: str) -> tuple[str, str, int, UrlType]:
-    """Parse a GitHub URL into (owner, repo, number, type).
+    """Parse a GitHub/Gitea/GitLab/Bitbucket URL into (owner, repo, number, type).
     
     Args:
-        url: GitHub Issue or PR URL
+        url: Issue/PR/MR URL from any supported forge
         
     Returns:
         Tuple of (owner, repo, number, type)
@@ -25,26 +30,38 @@ def parse_github_url(url: str) -> tuple[str, str, int, UrlType]:
         ValueError: If URL format is invalid
         
     Examples:
-        >>> parse_github_url("https://github.com/vercel-labs/json-render/pull/35")
-        ('vercel-labs', 'json-render', 35, 'pr')
-        >>> parse_github_url("https://github.com/AsyncFuncAI/AsyncReview/issues/1")
-        ('AsyncFuncAI', 'AsyncReview', 1, 'issue')
+        >>> parse_github_url("https://github.com/owner/repo/pull/35")
+        ('owner', 'repo', 35, 'pr')
+        >>> parse_github_url("https://gitea.example.com/org/repo/pulls/42")
+        ('org', 'repo', 42, 'pr')
+        >>> parse_github_url("https://gitlab.com/org/repo/-/merge_requests/10")
+        ('org', 'repo', 10, 'pr')
+        >>> parse_github_url("https://bitbucket.org/org/repo/pull-requests/5")
+        ('org', 'repo', 5, 'pr')
+        >>> parse_github_url("https://github.com/owner/repo/issues/1")
+        ('owner', 'repo', 1, 'issue')
     """
-    # Try PR URL first (primary use case)
-    pr_pattern = r"github\.com/([^/]+)/([^/]+)/pull/(\d+)"
+    # PR patterns — accept any domain, support multiple forge URL formats:
+    #   GitHub:    /owner/repo/pull/123
+    #   Gitea:     /owner/repo/pulls/123
+    #   GitLab:    /owner/repo/-/merge_requests/123
+    #   Bitbucket: /owner/repo/pull-requests/123
+    pr_pattern = r"^https?://[^/]+/([^/]+)/([^/]+)/(?:-/)?(?:pulls?|merge_requests|pull-requests)/(\d+)(?:[/?#].*)?$"
     pr_match = re.search(pr_pattern, url)
     if pr_match:
         return pr_match.group(1), pr_match.group(2), int(pr_match.group(3)), "pr"
-    
-    # Try Issue URL
-    issue_pattern = r"github\.com/([^/]+)/([^/]+)/issues/(\d+)"
+
+    # Issue patterns — GitHub/Gitea/GitLab
+    #   GitLab:    /owner/repo/-/issues/123
+    issue_pattern = r"^https?://[^/]+/([^/]+)/([^/]+)/(?:-/)?issues/(\d+)(?:[/?#].*)?$"
     issue_match = re.search(issue_pattern, url)
     if issue_match:
         return issue_match.group(1), issue_match.group(2), int(issue_match.group(3)), "issue"
-    
+
     raise ValueError(
-        f"Invalid GitHub URL: {url}\n"
-        "Expected format: https://github.com/owner/repo/pull/123 or .../issues/123"
+        f"Invalid URL: {url}\n"
+        "Expected: https://host/owner/repo/{pull,pulls,merge_requests,pull-requests}/123\n"
+        "     or:  https://host/owner/repo/issues/123"
     )
 
 
@@ -119,12 +136,12 @@ async def fetch_pr(owner: str, repo: str, number: int) -> dict:
             comments_data = comments_resp.json()
             comments_list = [
                 {
-                    "author": c["user"]["login"],
+                    "author": _get_login(c["user"]),
                     "body": c["body"],
                 }
                 for c in comments_data
             ]
-    
+
     # Build structured result
     files = [
         {
@@ -144,11 +161,11 @@ async def fetch_pr(owner: str, repo: str, number: int) -> dict:
         "number": number,
         "title": pr_data.get("title", ""),
         "body": pr_data.get("body") or "",
-        "author": pr_data["user"]["login"],
+        "author": _get_login(pr_data["user"]),
         "state": pr_data.get("state", "open"),
-        "base_branch": pr_data["base"]["ref"],
-        "head_branch": pr_data["head"]["ref"],
-        "head_sha": pr_data["head"]["sha"],  # For consistent file reads
+        "base_branch": pr_data.get("base", {}).get("ref", "main"),
+        "head_branch": pr_data.get("head", {}).get("ref", "unknown"),
+        "head_sha": pr_data.get("head", {}).get("sha", "HEAD"),  # For consistent file reads
         "files": files,
         "commits": commits_list,
         "comments": comments_list,
@@ -182,12 +199,12 @@ async def fetch_issue(owner: str, repo: str, number: int) -> dict:
             comments_data = comments_resp.json()
             comments_list = [
                 {
-                    "author": c["user"]["login"],
+                    "author": _get_login(c["user"]),
                     "body": c["body"],
                 }
                 for c in comments_data
             ]
-    
+
     return {
         "type": "issue",
         "owner": owner,
@@ -195,7 +212,7 @@ async def fetch_issue(owner: str, repo: str, number: int) -> dict:
         "number": number,
         "title": issue_data.get("title", ""),
         "body": issue_data.get("body") or "",
-        "author": issue_data["user"]["login"],
+        "author": _get_login(issue_data["user"]),
         "state": issue_data.get("state", "open"),
         "labels": [l["name"] for l in issue_data.get("labels", [])],
         "comments": comments_list,
diff --git a/npx/python/cli/main.py b/npx/python/cli/main.py
index ff3761c..d00e1bf 100644
--- a/npx/python/cli/main.py
+++ b/npx/python/cli/main.py
@@ -19,6 +19,7 @@
 import sys
 
 from rich.console import Console
+from rich.markup import escape as rich_escape
 from rich.panel import Panel
 from rich.markdown import Markdown
 
@@ -41,7 +42,7 @@ def print_step(step_num: int, reasoning: str, code: str, output: str = ""):
     # Show reasoning
     if reasoning:
         reasoning_display = reasoning[:800] + "..." if len(reasoning) > 800 else reasoning
-        console.print(f"[dim]💭 {reasoning_display}[/dim]")
+        console.print(f"[dim]💭 {rich_escape(reasoning_display)}[/dim]")
     
     # Show executed code with syntax highlighting
     if code and code.strip():
@@ -53,17 +54,17 @@ def print_step(step_num: int, reasoning: str, code: str, output: str = ""):
     # Show output (truncated)
     if output and output.strip():
         output_display = output[:500] + "..." if len(output) > 500 else output
-        console.print(f"\n[green]📤 Output:[/green] [dim]{output_display}[/dim]")
+        console.print(f"\n[green]📤 Output:[/green] [dim]{rich_escape(output_display)}[/dim]")
 
 
 def print_info(message: str):
     """Print an info message."""
-    console.print(f"[dim]{message}[/dim]")
+    console.print(f"[dim]{rich_escape(message)}[/dim]")
 
 
 def print_error(message: str):
     """Print an error message."""
-    console.print(f"[red]Error: {message}[/red]")
+    console.print(f"[red]Error: {rich_escape(message)}[/red]")
 
 
 async def run_review(
@@ -135,7 +136,7 @@ async def run_review(
         if output_format == "markdown":
             console.print(Panel(Markdown(output), title="Review", border_style="green"))
         else:
-            console.print(Panel(output, title="Review", border_style="green"))
+            console.print(Panel(rich_escape(output), title="Review", border_style="green"))
     
     # Submit as GitHub comment if requested
     if submit:
diff --git a/npx/python/cli/repo_tools.py b/npx/python/cli/repo_tools.py
index 8a487d5..35e44ab 100644
--- a/npx/python/cli/repo_tools.py
+++ b/npx/python/cli/repo_tools.py
@@ -320,7 +320,13 @@ async def search_code(self, query: str) -> list[dict[str, Any]]:
         data = resp.json()
         results = []
         
-        for item in data.get("items", []):
+        # GitHub uses "items", Gitea may use "data" — handle both
+        items = data.get("items") or data.get("data") or []
+        if not items:
+            # Could mean: no matches, search disabled, or API incompatibility
+            total = data.get("total_count", data.get("total", "?"))
+            print(f"[DEBUG-SEARCH] No results (total_count={total}). Search may be disabled on this instance.")
+        for item in items:
             entry = {"path": item.get("path", "")}
             # Extract fragment from text_matches if available
             text_matches = item.get("text_matches", [])
diff --git a/npx/python/cli/virtual_runner.py b/npx/python/cli/virtual_runner.py
index ed2ec0f..4cdfdc5 100644
--- a/npx/python/cli/virtual_runner.py
+++ b/npx/python/cli/virtual_runner.py
@@ -161,17 +161,18 @@ def _ensure_configured(self):
         
         # Configure DSPy with specified model (cache=False to prevent disk caching)
         model_name = self.model
-        if not model_name.startswith("gemini/"):
+        # Only add gemini/ prefix if no provider prefix exists (no / in name)
+        if "/" not in model_name:
             model_name = f"gemini/{model_name}"
-        
+
         self._lm = dspy.LM(model_name, cache=False)
-        
+
         # Create RLM with custom interpreter that has Deno 2.x fix
         deno_command = build_deno_command()
         interpreter = PythonInterpreter(deno_command=deno_command)
-        
-        # Standard signature
-        sub_model = f"gemini/{SUB_MODEL}" if not SUB_MODEL.startswith("gemini/") else SUB_MODEL
+
+        # Standard signature - same prefix logic for sub model
+        sub_model = SUB_MODEL if "/" in SUB_MODEL else f"gemini/{SUB_MODEL}"
         self._rlm = dspy.RLM(
             signature="context, question -> answer, sources",
             max_iterations=MAX_ITERATIONS,
diff --git a/npx/python/cr/github.py b/npx/python/cr/github.py
index d7822dc..9ea45ba 100644
--- a/npx/python/cr/github.py
+++ b/npx/python/cr/github.py
@@ -13,15 +13,22 @@
 from .diff_types import PRInfo, FileContents, DiffFileContext
 
 
+def _get_login(user: dict) -> str:
+    """Get user login, falling back to username (Gitea compat)."""
+    return user.get("login") or user.get("username") or "unknown"
+
+
 # In-memory store for loaded PRs (MVP - no persistence)
 _pr_cache: dict[str, PRInfo] = {}
 
 
 def parse_pr_url(url: str) -> tuple[str, str, int]:
-    """Parse a GitHub PR URL into (owner, repo, number).
+    """Parse a PR/MR URL into (owner, repo, number).
+    
+    Supports GitHub, Gitea, GitLab, and Bitbucket URL formats.
     
     Args:
-        url: GitHub PR URL like https://github.com/owner/repo/pull/123
+        url: PR/MR URL (e.g. /pull/1, /pulls/1, /-/merge_requests/1, /pull-requests/1)
         
     Returns:
         Tuple of (owner, repo, pr_number)
@@ -29,10 +36,14 @@ def parse_pr_url(url: str) -> tuple[str, str, int]:
     Raises:
         ValueError: If URL format is invalid
     """
-    pattern = r"github\.com/([^/]+)/([^/]+)/pull/(\d+)"
+    # GitHub /pull/, Gitea /pulls/, GitLab /-/merge_requests/, Bitbucket /pull-requests/
+    pattern = r"^https?://[^/]+/([^/]+)/([^/]+)/(?:-/)?(?:pulls?|merge_requests|pull-requests)/(\d+)(?:[/?#].*)?$"
     match = re.search(pattern, url)
     if not match:
-        raise ValueError(f"Invalid GitHub PR URL: {url}")
+        raise ValueError(
+            f"Invalid PR URL: {url}\n"
+            "Expected: https://host/owner/repo/{pull,pulls,merge_requests,pull-requests}/123"
+        )
     return match.group(1), match.group(2), int(match.group(3))
 
 
@@ -96,10 +107,10 @@ async def load_pr(pr_url: str) -> PRInfo:
                     "author": {
                         "name": c["commit"]["author"]["name"],
                         "date": c["commit"]["author"]["date"],
-                        "login": c["author"]["login"] if c.get("author") else None,
-                        "avatar_url": c["author"]["avatar_url"] if c.get("author") else None,
+                        "login": _get_login(c["author"]) if c.get("author") else None,
+                        "avatar_url": c["author"].get("avatar_url", "") if c.get("author") else None,
                     },
-                    "html_url": c["html_url"],
+                    "html_url": c.get("html_url", ""),
                 }
                 for c in commits_data
             ]
@@ -118,12 +129,12 @@ async def load_pr(pr_url: str) -> PRInfo:
                  {
                      "id": c["id"],
                      "user": {
-                         "login": c["user"]["login"],
-                         "avatar_url": c["user"]["avatar_url"],
+                         "login": _get_login(c["user"]),
+                         "avatar_url": c["user"].get("avatar_url", ""),
                      },
                      "body": c["body"],
                      "created_at": c["created_at"],
-                     "html_url": c["html_url"],
+                     "html_url": c.get("html_url", ""),
                  }
                  for c in comments_data
              ]
@@ -146,15 +157,15 @@ async def load_pr(pr_url: str) -> PRInfo:
         number=number,
         title=pr_data.get("title", ""),
         body=pr_data.get("body") or "",
-        base_sha=pr_data["base"]["sha"],
-        head_sha=pr_data["head"]["sha"],
+        base_sha=pr_data.get("base", {}).get("sha", "HEAD"),
+        head_sha=pr_data.get("head", {}).get("sha", "HEAD"),
         files=files,
         created_at=datetime.now(),
-        user={"login": pr_data["user"]["login"], "avatar_url": pr_data["user"]["avatar_url"]},
+        user={"login": _get_login(pr_data["user"]), "avatar_url": pr_data["user"].get("avatar_url", "")},
         state=pr_data.get("state", "open"),
         draft=pr_data.get("draft", False),
-        head_ref=pr_data["head"]["ref"],
-        base_ref=pr_data["base"]["ref"],
+        head_ref=pr_data.get("head", {}).get("ref", "unknown"),
+        base_ref=pr_data.get("base", {}).get("ref", "main"),
         commits=pr_data.get("commits", 0),
         additions=pr_data.get("additions", 0),
         deletions=pr_data.get("deletions", 0),
diff --git a/tests/test_url_parsing.py b/tests/test_url_parsing.py
new file mode 100644
index 0000000..c6067b6
--- /dev/null
+++ b/tests/test_url_parsing.py
@@ -0,0 +1,64 @@
+"""Regression tests for PR/issue URL parsing across forges."""
+
+import pytest
+from cli.github_fetcher import parse_github_url
+from cr.github import parse_pr_url
+
+
+# --- parse_github_url (cli) ---
+
+@pytest.mark.parametrize("url,expected", [
+    # GitHub
+    ("https://github.com/owner/repo/pull/35", ("owner", "repo", 35, "pr")),
+    ("https://github.com/owner/repo/pull/1?tab=files", ("owner", "repo", 1, "pr")),
+    ("https://github.com/owner/repo/pull/1/", ("owner", "repo", 1, "pr")),
+    # Gitea
+    ("https://gitea.example.com/org/repo/pulls/42", ("org", "repo", 42, "pr")),
+    ("https://gitea.example.com/org/my-project/pulls/1#issuecomment-20", ("org", "my-project", 1, "pr")),
+    # GitLab
+    ("https://gitlab.com/org/repo/-/merge_requests/10", ("org", "repo", 10, "pr")),
+    ("https://gitlab.com/org/repo/-/merge_requests/10#note_123", ("org", "repo", 10, "pr")),
+    # Bitbucket
+    ("https://bitbucket.org/org/repo/pull-requests/5", ("org", "repo", 5, "pr")),
+    ("https://bitbucket.org/org/repo/pull-requests/5?t=1", ("org", "repo", 5, "pr")),
+    # Issues
+    ("https://github.com/owner/repo/issues/1", ("owner", "repo", 1, "issue")),
+    ("https://gitea.example.com/org/repo/issues/99", ("org", "repo", 99, "issue")),
+    ("https://gitlab.com/org/repo/-/issues/7", ("org", "repo", 7, "issue")),
+])
+def test_parse_github_url(url, expected):
+    assert parse_github_url(url) == expected
+
+
+@pytest.mark.parametrize("url", [
+    "https://github.com/owner/repo",
+    "https://github.com/owner/repo/tree/main",
+    "not-a-url",
+    "",
+])
+def test_parse_github_url_invalid(url):
+    with pytest.raises(ValueError):
+        parse_github_url(url)
+
+
+# --- parse_pr_url (cr) ---
+
+@pytest.mark.parametrize("url,expected", [
+    ("https://github.com/o/r/pull/1", ("o", "r", 1)),
+    ("https://gitea.example.com/org/repo/pulls/42", ("org", "repo", 42)),
+    ("https://gitlab.com/org/repo/-/merge_requests/10", ("org", "repo", 10)),
+    ("https://bitbucket.org/org/repo/pull-requests/5", ("org", "repo", 5)),
+    ("https://github.com/o/r/pull/1?tab=files#diff", ("o", "r", 1)),
+])
+def test_parse_pr_url(url, expected):
+    assert parse_pr_url(url) == expected
+
+
+@pytest.mark.parametrize("url", [
+    "https://github.com/owner/repo/issues/1",
+    "https://github.com/owner/repo",
+    "garbage",
+])
+def test_parse_pr_url_invalid(url):
+    with pytest.raises(ValueError):
+        parse_pr_url(url)