diff --git a/amplifier_app_cli/utils/source_status.py b/amplifier_app_cli/utils/source_status.py
index 82d747c..e6efa9b 100644
--- a/amplifier_app_cli/utils/source_status.py
+++ b/amplifier_app_cli/utils/source_status.py
@@ -6,7 +6,6 @@
import json
import logging
-import re
import subprocess
from dataclasses import dataclass
from dataclasses import field
@@ -543,18 +542,24 @@ def _cache_age_days_from_string(cached_at: str) -> int:
async def _get_github_commit_sha(
client: httpx.AsyncClient, repo_url: str, ref: str
) -> str:
- """Get SHA for ref using GitHub Atom feed.
+ """Get SHA for ref using the GitHub REST API.
- Uses public Atom feed: https://github.com/{owner}/{repo}/commits/{ref}.atom
+ Uses: https://api.github.com/repos/{owner}/{repo}/commits/{ref}
- Uses auth headers when available (GITHUB_TOKEN or gh auth login) to
- avoid GitHub's unauthenticated rate limit of 60 requests/hour.
- Authenticated requests get 5,000 requests/hour.
+ The web atom feed at github.com/.../commits/{ref}.atom does not accept
+ Bearer auth. For SAML-protected private repos, GitHub silently returns
+ HTTP 200 with an empty body, causing SHA extraction to fail with an
+ opaque ValueError that bypasses the HTTPStatusError handler in
+ _check_all_cached_modules. The REST API honors PAT auth and returns
+ proper 401/403/404 status codes, letting the existing handler surface
+ the right "private repo or rate limited" hint.
+
+ Authenticated requests get 5,000 requests/hour
+ (vs. 60/hr unauthenticated).
Args:
client: Shared httpx client for all HTTP requests
"""
- # Remove .git suffix properly (not with rstrip - it removes any char in '.git'!)
url_clean = repo_url[:-4] if repo_url.endswith(".git") else repo_url
parts = url_clean.split("github.com/")[-1].split("/")
if len(parts) < 2:
@@ -562,24 +567,17 @@ async def _get_github_commit_sha(
owner, repo = parts[0], parts[1]
- atom_url = f"https://github.com/{owner}/{repo}/commits/{ref}.atom"
-
- # Use auth headers proactively to avoid GitHub's unauthenticated rate limit
- # (60 req/hr). Authenticated requests get 5,000 req/hr. Falls back gracefully
- # to no-auth when GITHUB_TOKEN is unset and gh auth login hasn't been run.
- auth_headers = _get_github_auth_headers()
- response = await client.get(atom_url, headers=auth_headers)
- response.raise_for_status()
-
- # Parse XML for first commit SHA
- # Format: tag:github.com,2008:Grit::Commit/{SHA}
- match = re.search(
- r"tag:github\.com,2008:Grit::Commit/([a-f0-9]{40})", response.text
+ api_url = f"https://api.github.com/repos/{owner}/{repo}/commits/{ref}"
+ response = await client.get(
+ api_url,
+ headers={
+ "Accept": "application/vnd.github.v3+json",
+ **_get_github_auth_headers(),
+ },
)
- if not match:
- raise ValueError(f"Could not extract commit SHA from Atom feed: {atom_url}")
+ response.raise_for_status()
- return match.group(1)
+ return response.json()["sha"]
async def _get_commit_details(
diff --git a/tests/test_source_status_auth.py b/tests/test_source_status_auth.py
index e3573fe..2fd842d 100644
--- a/tests/test_source_status_auth.py
+++ b/tests/test_source_status_auth.py
@@ -18,20 +18,15 @@ async def test_uses_auth_headers_on_first_request_when_available(self):
"""Auth headers should be sent on the first request, not only after a 404."""
from amplifier_app_cli.utils.source_status import _get_github_commit_sha
- atom_content = """
-
-
- tag:github.com,2008:Grit::Commit/abcdef1234567890abcdef1234567890abcdef12
-
-"""
-
captured_requests = []
async def mock_get(url, **kwargs):
captured_requests.append({"url": url, "headers": kwargs.get("headers", {})})
response = MagicMock(spec=httpx.Response)
response.status_code = 200
- response.text = atom_content
+ response.json = MagicMock(
+ return_value={"sha": "abcdef1234567890abcdef1234567890abcdef12"}
+ )
response.raise_for_status = MagicMock()
return response
@@ -63,17 +58,12 @@ async def test_proceeds_without_auth_when_unavailable(self):
"""Requests without auth should still work when no token is configured."""
from amplifier_app_cli.utils.source_status import _get_github_commit_sha
- atom_content = """
-
-
- tag:github.com,2008:Grit::Commit/deadbeefdeadbeefdeadbeefdeadbeefdeadbeef
-
-"""
-
async def mock_get(url, **kwargs):
response = MagicMock(spec=httpx.Response)
response.status_code = 200
- response.text = atom_content
+ response.json = MagicMock(
+ return_value={"sha": "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef"}
+ )
response.raise_for_status = MagicMock()
return response
@@ -172,3 +162,86 @@ def test_docstring_does_not_claim_no_rate_limits(self):
"Docstring falsely claims 'no rate limits' — GitHub Atom feeds "
"DO have rate limits for unauthenticated requests"
)
+
+
+class TestUsesRestApiNotAtomFeed:
+ """Regression: must use the REST API, not the web atom feed.
+
+ The web atom feed (github.com/.../commits/{ref}.atom) does not accept
+ Bearer auth — for SAML-protected private repos GitHub silently returns
+ HTTP 200 with an empty body, causing SHA extraction to fail with an
+ opaque ValueError. The REST API honors PAT auth and returns proper
+ 401/403/404 status codes for unauthorized requests.
+ """
+
+ @pytest.mark.asyncio
+ async def test_calls_rest_api_endpoint(self):
+ """The function must call api.github.com, not github.com/.../*.atom."""
+ from amplifier_app_cli.utils.source_status import _get_github_commit_sha
+
+ captured_urls = []
+
+ async def mock_get(url, **kwargs):
+ captured_urls.append(url)
+ response = MagicMock(spec=httpx.Response)
+ response.status_code = 200
+ response.json = MagicMock(return_value={"sha": "a" * 40})
+ response.raise_for_status = MagicMock()
+ return response
+
+ mock_client = MagicMock(spec=httpx.AsyncClient)
+ mock_client.get = AsyncMock(side_effect=mock_get)
+
+ with patch(
+ "amplifier_app_cli.utils.source_status._get_github_auth_headers",
+ return_value={"Authorization": "Bearer test-token"},
+ ):
+ await _get_github_commit_sha(
+ mock_client,
+ "https://github.com/microsoft/amplifier",
+ "main",
+ )
+
+ assert len(captured_urls) == 1
+ assert captured_urls[0].startswith("https://api.github.com/"), (
+ f"Must use REST API, got: {captured_urls[0]}. "
+ "The web atom feed does not accept Bearer auth and silently "
+ "returns HTTP 200 with empty body for SAML-protected private repos."
+ )
+ assert ".atom" not in captured_urls[0]
+
+ @pytest.mark.asyncio
+ async def test_403_surfaces_as_http_status_error(self):
+ """A 403 from the API must raise HTTPStatusError, not ValueError.
+
+ This is the contract the _check_all_cached_modules error handler
+ relies on to print "private repo or rate limited" instead of
+ "Unexpected error".
+ """
+ from amplifier_app_cli.utils.source_status import _get_github_commit_sha
+
+ async def mock_get(url, **kwargs):
+ response = MagicMock(spec=httpx.Response)
+ response.status_code = 403
+ request = MagicMock()
+ response.raise_for_status = MagicMock(
+ side_effect=httpx.HTTPStatusError(
+ "403 Forbidden", request=request, response=response
+ )
+ )
+ return response
+
+ mock_client = MagicMock(spec=httpx.AsyncClient)
+ mock_client.get = AsyncMock(side_effect=mock_get)
+
+ with patch(
+ "amplifier_app_cli.utils.source_status._get_github_auth_headers",
+ return_value={"Authorization": "Bearer test-token"},
+ ):
+ with pytest.raises(httpx.HTTPStatusError):
+ await _get_github_commit_sha(
+ mock_client,
+ "https://github.com/Aleph-Alpha/private-repo",
+ "main",
+ )
+