diff --git a/src/fetch/src/mcp_server_fetch/server.py b/src/fetch/src/mcp_server_fetch/server.py index d128987351..5008ed2f7d 100644 --- a/src/fetch/src/mcp_server_fetch/server.py +++ b/src/fetch/src/mcp_server_fetch/server.py @@ -89,6 +89,17 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str, proxy_url: code=INTERNAL_ERROR, message=f"When fetching robots.txt ({robot_txt_url}), received status {response.status_code} so assuming that autonomous fetching is not allowed, the user can try manually fetching by using the fetch prompt", )) + elif response.status_code >= 500: + # Per RFC 9309 Section 2.3.1.3, server errors mean the robots.txt + # status is undefined and crawlers should assume full restriction. + # Treating 5xx as "allowed" would let a temporarily-down robots.txt + # become a free pass to crawl any URL on the site. + raise McpError(ErrorData( + code=INTERNAL_ERROR, + message=f"When fetching robots.txt ({robot_txt_url}), received server error status {response.status_code}. " + f"Per RFC 9309, server errors mean autonomous fetching should not proceed. " + f"The user can try manually fetching by using the fetch prompt.", + )) elif 400 <= response.status_code < 500: return robot_txt = response.text diff --git a/src/fetch/tests/test_server.py b/src/fetch/tests/test_server.py index 96c1cb38c7..6a2bae8244 100644 --- a/src/fetch/tests/test_server.py +++ b/src/fetch/tests/test_server.py @@ -184,6 +184,47 @@ async def test_blocks_when_robots_txt_disallows_all(self): ) + @pytest.mark.asyncio + async def test_blocks_when_robots_txt_500(self): + """Test that fetching is blocked when robots.txt returns 500. + + Per RFC 9309 Section 2.3.1.3, server errors mean the robots.txt + status is undefined and crawlers should assume full restriction. + """ + mock_response = MagicMock() + mock_response.status_code = 500 + + with patch("httpx.AsyncClient") as mock_client_class: + mock_client = AsyncMock() + mock_client.get = AsyncMock(return_value=mock_response) + mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client) + mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None) + + with pytest.raises(McpError): + await check_may_autonomously_fetch_url( + "https://example.com/page", + DEFAULT_USER_AGENT_AUTONOMOUS + ) + + @pytest.mark.asyncio + async def test_blocks_when_robots_txt_503(self): + """Test that fetching is blocked when robots.txt returns 503.""" + mock_response = MagicMock() + mock_response.status_code = 503 + + with patch("httpx.AsyncClient") as mock_client_class: + mock_client = AsyncMock() + mock_client.get = AsyncMock(return_value=mock_response) + mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client) + mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None) + + with pytest.raises(McpError): + await check_may_autonomously_fetch_url( + "https://example.com/page", + DEFAULT_USER_AGENT_AUTONOMOUS + ) + + class TestFetchUrl: """Tests for fetch_url function.""" diff --git a/src/git/src/mcp_server_git/server.py b/src/git/src/mcp_server_git/server.py index 1d0298b465..d2aa3782dc 100644 --- a/src/git/src/mcp_server_git/server.py +++ b/src/git/src/mcp_server_git/server.py @@ -142,6 +142,11 @@ def git_reset(repo: git.Repo) -> str: def git_log(repo: git.Repo, max_count: int = 10, start_timestamp: Optional[str] = None, end_timestamp: Optional[str] = None) -> list[str]: if start_timestamp or end_timestamp: + # Defense in depth: reject timestamps starting with '-' to prevent flag injection + if start_timestamp and start_timestamp.startswith("-"): + raise ValueError(f"Invalid start_timestamp: '{start_timestamp}' - cannot start with '-'") + if end_timestamp and end_timestamp.startswith("-"): + raise ValueError(f"Invalid end_timestamp: '{end_timestamp}' - cannot start with '-'") # Use git log command with date filtering args = [] if start_timestamp: @@ -177,6 +182,11 @@ def git_log(repo: git.Repo, max_count: int = 10, start_timestamp: Optional[str] return log def git_create_branch(repo: git.Repo, branch_name: str, base_branch: str | None = None) -> str: + # Defense in depth: reject names starting with '-' to prevent flag injection + if branch_name.startswith("-"): + raise BadName(f"Invalid branch name: '{branch_name}' - cannot start with '-'") + if base_branch and base_branch.startswith("-"): + raise BadName(f"Invalid base branch: '{base_branch}' - cannot start with '-'") if base_branch: base = repo.references[base_branch] else: @@ -197,6 +207,10 @@ def git_checkout(repo: git.Repo, branch_name: str) -> str: def git_show(repo: git.Repo, revision: str) -> str: + # Defense in depth: reject revisions starting with '-' to prevent flag injection, + # even if a malicious ref with that name exists (e.g. via filesystem manipulation) + if revision.startswith("-"): + raise BadName(f"Invalid revision: '{revision}' - cannot start with '-'") commit = repo.commit(revision) output = [ f"Commit: {commit.hexsha!r}\n" @@ -241,6 +255,12 @@ def validate_repo_path(repo_path: Path, allowed_repository: Path | None) -> None def git_branch(repo: git.Repo, branch_type: str, contains: str | None = None, not_contains: str | None = None) -> str: + # Defense in depth: reject values starting with '-' to prevent flag injection + if contains and contains.startswith("-"): + raise BadName(f"Invalid contains value: '{contains}' - cannot start with '-'") + if not_contains and not_contains.startswith("-"): + raise BadName(f"Invalid not_contains value: '{not_contains}' - cannot start with '-'") + match contains: case None: contains_sha = (None,) diff --git a/src/git/tests/test_server.py b/src/git/tests/test_server.py index 054bf8c756..a5492adc85 100644 --- a/src/git/tests/test_server.py +++ b/src/git/tests/test_server.py @@ -423,3 +423,62 @@ def test_git_checkout_rejects_malicious_refs(test_repository): # Cleanup malicious_ref_path.unlink() + + +# Tests for argument injection protection in git_show, git_create_branch, +# git_log, and git_branch — matching the existing guards on git_diff and +# git_checkout. + +def test_git_show_rejects_flag_injection(test_repository): + """git_show should reject revisions starting with '-'.""" + with pytest.raises(BadName): + git_show(test_repository, "--output=/tmp/evil") + + with pytest.raises(BadName): + git_show(test_repository, "-p") + + +def test_git_show_rejects_malicious_refs(test_repository): + """git_show should reject refs starting with '-' even if they exist.""" + sha = test_repository.head.commit.hexsha + refs_dir = Path(test_repository.git_dir) / "refs" / "heads" + malicious_ref_path = refs_dir / "--format=evil" + malicious_ref_path.write_text(sha) + + with pytest.raises(BadName): + git_show(test_repository, "--format=evil") + + malicious_ref_path.unlink() + + +def test_git_create_branch_rejects_flag_injection(test_repository): + """git_create_branch should reject branch names starting with '-'.""" + with pytest.raises(BadName): + git_create_branch(test_repository, "--track=evil") + + with pytest.raises(BadName): + git_create_branch(test_repository, "-f") + + +def test_git_create_branch_rejects_base_branch_flag_injection(test_repository): + """git_create_branch should reject base branch names starting with '-'.""" + with pytest.raises(BadName): + git_create_branch(test_repository, "new-branch", "--track=evil") + + +def test_git_log_rejects_timestamp_flag_injection(test_repository): + """git_log should reject timestamps starting with '-'.""" + with pytest.raises(ValueError): + git_log(test_repository, start_timestamp="--exec=evil") + + with pytest.raises(ValueError): + git_log(test_repository, end_timestamp="--exec=evil") + + +def test_git_branch_rejects_contains_flag_injection(test_repository): + """git_branch should reject contains/not_contains values starting with '-'.""" + with pytest.raises(BadName): + git_branch(test_repository, "local", contains="--exec=evil") + + with pytest.raises(BadName): + git_branch(test_repository, "local", not_contains="--exec=evil")