From 45ebb626f4e5ed587f23863af161af323ef20d03 Mon Sep 17 00:00:00 2001 From: ElliotJLT Date: Fri, 13 Mar 2026 11:23:09 +0000 Subject: [PATCH 1/2] fix(git): add missing argument injection guards to git_show, git_create_branch, git_log, and git_branch git_diff and git_checkout already reject user-supplied values starting with '-' to prevent flag injection (even when a malicious ref exists via filesystem manipulation). The same defense-in-depth pattern was missing from four other functions: - git_show: revision parameter passed directly to repo.commit() - git_create_branch: branch_name and base_branch unchecked - git_log: start_timestamp and end_timestamp passed to --since/--until - git_branch: contains and not_contains passed as raw args to repo.git.branch() Adds the same startswith("-") guards with matching tests for each function. Co-Authored-By: Claude Opus 4.6 --- src/git/src/mcp_server_git/server.py | 20 ++++++++++ src/git/tests/test_server.py | 59 ++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/src/git/src/mcp_server_git/server.py b/src/git/src/mcp_server_git/server.py index 1d0298b465..d2aa3782dc 100644 --- a/src/git/src/mcp_server_git/server.py +++ b/src/git/src/mcp_server_git/server.py @@ -142,6 +142,11 @@ def git_reset(repo: git.Repo) -> str: def git_log(repo: git.Repo, max_count: int = 10, start_timestamp: Optional[str] = None, end_timestamp: Optional[str] = None) -> list[str]: if start_timestamp or end_timestamp: + # Defense in depth: reject timestamps starting with '-' to prevent flag injection + if start_timestamp and start_timestamp.startswith("-"): + raise ValueError(f"Invalid start_timestamp: '{start_timestamp}' - cannot start with '-'") + if end_timestamp and end_timestamp.startswith("-"): + raise ValueError(f"Invalid end_timestamp: '{end_timestamp}' - cannot start with '-'") # Use git log command with date filtering args = [] if start_timestamp: @@ -177,6 +182,11 @@ def git_log(repo: git.Repo, max_count: int = 10, start_timestamp: Optional[str] return log def git_create_branch(repo: git.Repo, branch_name: str, base_branch: str | None = None) -> str: + # Defense in depth: reject names starting with '-' to prevent flag injection + if branch_name.startswith("-"): + raise BadName(f"Invalid branch name: '{branch_name}' - cannot start with '-'") + if base_branch and base_branch.startswith("-"): + raise BadName(f"Invalid base branch: '{base_branch}' - cannot start with '-'") if base_branch: base = repo.references[base_branch] else: @@ -197,6 +207,10 @@ def git_checkout(repo: git.Repo, branch_name: str) -> str: def git_show(repo: git.Repo, revision: str) -> str: + # Defense in depth: reject revisions starting with '-' to prevent flag injection, + # even if a malicious ref with that name exists (e.g. via filesystem manipulation) + if revision.startswith("-"): + raise BadName(f"Invalid revision: '{revision}' - cannot start with '-'") commit = repo.commit(revision) output = [ f"Commit: {commit.hexsha!r}\n" @@ -241,6 +255,12 @@ def validate_repo_path(repo_path: Path, allowed_repository: Path | None) -> None def git_branch(repo: git.Repo, branch_type: str, contains: str | None = None, not_contains: str | None = None) -> str: + # Defense in depth: reject values starting with '-' to prevent flag injection + if contains and contains.startswith("-"): + raise BadName(f"Invalid contains value: '{contains}' - cannot start with '-'") + if not_contains and not_contains.startswith("-"): + raise BadName(f"Invalid not_contains value: '{not_contains}' - cannot start with '-'") + match contains: case None: contains_sha = (None,) diff --git a/src/git/tests/test_server.py b/src/git/tests/test_server.py index 054bf8c756..a5492adc85 100644 --- a/src/git/tests/test_server.py +++ b/src/git/tests/test_server.py @@ -423,3 +423,62 @@ def test_git_checkout_rejects_malicious_refs(test_repository): # Cleanup malicious_ref_path.unlink() + + +# Tests for argument injection protection in git_show, git_create_branch, +# git_log, and git_branch — matching the existing guards on git_diff and +# git_checkout. + +def test_git_show_rejects_flag_injection(test_repository): + """git_show should reject revisions starting with '-'.""" + with pytest.raises(BadName): + git_show(test_repository, "--output=/tmp/evil") + + with pytest.raises(BadName): + git_show(test_repository, "-p") + + +def test_git_show_rejects_malicious_refs(test_repository): + """git_show should reject refs starting with '-' even if they exist.""" + sha = test_repository.head.commit.hexsha + refs_dir = Path(test_repository.git_dir) / "refs" / "heads" + malicious_ref_path = refs_dir / "--format=evil" + malicious_ref_path.write_text(sha) + + with pytest.raises(BadName): + git_show(test_repository, "--format=evil") + + malicious_ref_path.unlink() + + +def test_git_create_branch_rejects_flag_injection(test_repository): + """git_create_branch should reject branch names starting with '-'.""" + with pytest.raises(BadName): + git_create_branch(test_repository, "--track=evil") + + with pytest.raises(BadName): + git_create_branch(test_repository, "-f") + + +def test_git_create_branch_rejects_base_branch_flag_injection(test_repository): + """git_create_branch should reject base branch names starting with '-'.""" + with pytest.raises(BadName): + git_create_branch(test_repository, "new-branch", "--track=evil") + + +def test_git_log_rejects_timestamp_flag_injection(test_repository): + """git_log should reject timestamps starting with '-'.""" + with pytest.raises(ValueError): + git_log(test_repository, start_timestamp="--exec=evil") + + with pytest.raises(ValueError): + git_log(test_repository, end_timestamp="--exec=evil") + + +def test_git_branch_rejects_contains_flag_injection(test_repository): + """git_branch should reject contains/not_contains values starting with '-'.""" + with pytest.raises(BadName): + git_branch(test_repository, "local", contains="--exec=evil") + + with pytest.raises(BadName): + git_branch(test_repository, "local", not_contains="--exec=evil") From c1fa924f667819cb24c6fc5690a55d841134c024 Mon Sep 17 00:00:00 2001 From: ElliotJLT Date: Fri, 13 Mar 2026 13:49:19 +0000 Subject: [PATCH 2/2] fix(fetch): block autonomous fetching when robots.txt returns 5xx Per RFC 9309 Section 2.3.1.3, when a robots.txt fetch results in a server error (5xx), crawlers should assume the site is fully restricted. Previously, 5xx responses fell through to the robots.txt parser, which would parse the error page HTML body, find no Disallow rules, and silently allow crawling. This meant a temporarily-down robots.txt became a free pass to autonomously fetch any URL on that site. Now 5xx responses raise McpError with a clear message pointing users to the manual fetch prompt, consistent with the 401/403 handling. Co-Authored-By: Claude Opus 4.6 --- src/fetch/src/mcp_server_fetch/server.py | 11 +++++++ src/fetch/tests/test_server.py | 41 ++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/src/fetch/src/mcp_server_fetch/server.py b/src/fetch/src/mcp_server_fetch/server.py index d128987351..5008ed2f7d 100644 --- a/src/fetch/src/mcp_server_fetch/server.py +++ b/src/fetch/src/mcp_server_fetch/server.py @@ -89,6 +89,17 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str, proxy_url: code=INTERNAL_ERROR, message=f"When fetching robots.txt ({robot_txt_url}), received status {response.status_code} so assuming that autonomous fetching is not allowed, the user can try manually fetching by using the fetch prompt", )) + elif response.status_code >= 500: + # Per RFC 9309 Section 2.3.1.3, server errors mean the robots.txt + # status is undefined and crawlers should assume full restriction. + # Treating 5xx as "allowed" would let a temporarily-down robots.txt + # become a free pass to crawl any URL on the site. + raise McpError(ErrorData( + code=INTERNAL_ERROR, + message=f"When fetching robots.txt ({robot_txt_url}), received server error status {response.status_code}. " + f"Per RFC 9309, server errors mean autonomous fetching should not proceed. " + f"The user can try manually fetching by using the fetch prompt.", + )) elif 400 <= response.status_code < 500: return robot_txt = response.text diff --git a/src/fetch/tests/test_server.py b/src/fetch/tests/test_server.py index 96c1cb38c7..6a2bae8244 100644 --- a/src/fetch/tests/test_server.py +++ b/src/fetch/tests/test_server.py @@ -184,6 +184,47 @@ async def test_blocks_when_robots_txt_disallows_all(self): ) + @pytest.mark.asyncio + async def test_blocks_when_robots_txt_500(self): + """Test that fetching is blocked when robots.txt returns 500. + + Per RFC 9309 Section 2.3.1.3, server errors mean the robots.txt + status is undefined and crawlers should assume full restriction. + """ + mock_response = MagicMock() + mock_response.status_code = 500 + + with patch("httpx.AsyncClient") as mock_client_class: + mock_client = AsyncMock() + mock_client.get = AsyncMock(return_value=mock_response) + mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client) + mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None) + + with pytest.raises(McpError): + await check_may_autonomously_fetch_url( + "https://example.com/page", + DEFAULT_USER_AGENT_AUTONOMOUS + ) + + @pytest.mark.asyncio + async def test_blocks_when_robots_txt_503(self): + """Test that fetching is blocked when robots.txt returns 503.""" + mock_response = MagicMock() + mock_response.status_code = 503 + + with patch("httpx.AsyncClient") as mock_client_class: + mock_client = AsyncMock() + mock_client.get = AsyncMock(return_value=mock_response) + mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client) + mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None) + + with pytest.raises(McpError): + await check_may_autonomously_fetch_url( + "https://example.com/page", + DEFAULT_USER_AGENT_AUTONOMOUS + ) + + class TestFetchUrl: """Tests for fetch_url function."""