From bf7c4bd2a5fd5f2890e70cdf28c4fc9fcf4fddee Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Tue, 27 Jan 2026 16:57:27 -0800 Subject: [PATCH 1/6] add optional --- pyproject.toml | 1 + uv.lock | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 71b5ed2821..8afcd7d362 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -96,6 +96,7 @@ hf = ["huggingface-hub>=0.24.0"] pyiceberg-core = ["pyiceberg-core>=0.5.1,<0.9.0"] datafusion = ["datafusion>=51,<52"] gcp-auth = ["google-auth>=2.4.0"] +entra-auth = ["azure-identity>=1.25.1"] [dependency-groups] dev = [ diff --git a/uv.lock b/uv.lock index 89b02d4fec..112adebdbc 100644 --- a/uv.lock +++ b/uv.lock @@ -4361,6 +4361,9 @@ duckdb = [ dynamodb = [ { name = "boto3" }, ] +entra-auth = [ + { name = "azure-identity" }, +] gcp-auth = [ { name = "google-auth" }, ] @@ -4459,6 +4462,7 @@ notebook = [ [package.metadata] requires-dist = [ { name = "adlfs", marker = "extra == 'adlfs'", specifier = ">=2024.7.0" }, + { name = "azure-identity", marker = "extra == 'entra-auth'", specifier = ">=1.25.1" }, { name = "bodo", marker = "extra == 'bodo'", specifier = ">=2025.7.4" }, { name = "boto3", marker = "extra == 'dynamodb'", specifier = ">=1.24.59" }, { name = "boto3", marker = "extra == 'glue'", specifier = ">=1.24.59" }, @@ -4502,7 +4506,7 @@ requires-dist = [ { name = "thrift-sasl", marker = "extra == 'hive-kerberos'", specifier = ">=0.4.3" }, { name = "zstandard", specifier = ">=0.13.0,<1.0.0" }, ] -provides-extras = ["pyarrow", "pandas", "duckdb", "ray", "bodo", "daft", "polars", "snappy", "hive", "hive-kerberos", "s3fs", "glue", "adlfs", "dynamodb", "bigquery", "sql-postgres", "sql-sqlite", "gcsfs", "rest-sigv4", "hf", "pyiceberg-core", "datafusion", "gcp-auth"] +provides-extras = ["pyarrow", "pandas", "duckdb", "ray", "bodo", "daft", "polars", "snappy", "hive", "hive-kerberos", "s3fs", "glue", "adlfs", "dynamodb", "bigquery", "sql-postgres", "sql-sqlite", "gcsfs", "rest-sigv4", "hf", "pyiceberg-core", "datafusion", "gcp-auth", "entra-auth"] [package.metadata.requires-dev] dev = [ From c23db8d50f0e438aaedaad7d2bacc48670261618 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Tue, 27 Jan 2026 17:00:36 -0800 Subject: [PATCH 2/6] docs --- mkdocs/docs/configuration.md | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index efe6ddee7a..1fbf3e798f 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -395,6 +395,7 @@ The RESTCatalog supports pluggable authentication via the `auth` configuration b - `oauth2`: OAuth2 client credentials flow. - `custom`: Custom authentication manager (requires `auth.impl`). - `google`: Google Authentication support +- `entra`: Microsoft Entra ID (Azure AD) authentication support ###### Configuration Properties @@ -422,6 +423,7 @@ catalog: | `auth.oauth2` | If type is `oauth2` | Block containing OAuth2 configuration (see below). | | `auth.custom` | If type is `custom` | Block containing configuration for the custom AuthManager. | | `auth.google` | If type is `google` | Block containing `credentials_path` to a service account file (if using). Will default to using Application Default Credentials. | +| `auth.entra` | If type is `entra` | Block containing Entra ID configuration. Will default to using DefaultAzureCredential. | ###### Examples @@ -578,22 +580,38 @@ catalog: See [OneLake table APIs for Iceberg](https://aka.ms/onelakeircdocs) for detailed documentation. +Using Entra ID authentication (recommended): + +```yaml +catalog: + onelake_catalog: + type: rest + uri: https://onelake.table.fabric.microsoft.com/iceberg + warehouse: / + auth: + type: entra + adls.account-name: onelake + adls.account-host: onelake.blob.fabric.microsoft.com +``` + +Using static token: + ```yaml catalog: onelake_catalog: type: rest uri: https://onelake.table.fabric.microsoft.com/iceberg warehouse: / # Example : DB0CE1EE-B014-47D3-8F0C-9D64C39C0FC2/F470A1D2-6D6D-4C9D-8796-46286C80B7C0 - token: , - adls.account-name: onelake, - adls.account-host: onelake.blob.fabric.microsoft.com, + token: + adls.account-name: onelake + adls.account-host: onelake.blob.fabric.microsoft.com adls.credential: ``` !!! Note "OneLake Authentication Models" - For Authentication: You can use DefautlAzureCredential from `azure.identity` package or refer to other [authentication flows](https://learn.microsoft.com/en-us/entra/identity-platform/authentication-flows-app-scenarios) for detailed documentation. + For Authentication: You can use the `entra` auth type which leverages `DefaultAzureCredential` from the `azure.identity` package. This supports multiple authentication methods including environment variables, managed identity, Azure CLI, and interactive browser login. Install with `pip install pyiceberg[entra-auth]`. Refer to [DefaultAzureCredential overview](https://learn.microsoft.com/en-us/azure/developer/python/sdk/authentication/credential-chains?tabs=dac#defaultazurecredential-overview) for detailed documentation. ### SQL Catalog From 84ce1ce57e17e9e8a3d23389cd90a2ec574e1043 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Tue, 27 Jan 2026 17:29:21 -0800 Subject: [PATCH 3/6] add entra auth manager impl --- pyiceberg/catalog/rest/auth.py | 63 ++++++++++++++++++++++++++ tests/catalog/test_rest_auth.py | 79 ++++++++++++++++++++++++++++++++- 2 files changed, 141 insertions(+), 1 deletion(-) diff --git a/pyiceberg/catalog/rest/auth.py b/pyiceberg/catalog/rest/auth.py index 72235d8760..602074282c 100644 --- a/pyiceberg/catalog/rest/auth.py +++ b/pyiceberg/catalog/rest/auth.py @@ -249,6 +249,68 @@ def auth_header(self) -> str: return f"Bearer {self.credentials.token}" +class EntraAuthManager(AuthManager): + """Auth Manager implementation that supports Microsoft Entra ID (Azure AD) authentication. + + This manager uses the Azure Identity library's DefaultAzureCredential which automatically + tries multiple authentication methods including environment variables, managed identity, + and Azure CLI. + + See https://learn.microsoft.com/en-us/azure/developer/python/sdk/authentication/credential-chains + for more details on DefaultAzureCredential. + """ + + DEFAULT_SCOPE = "https://storage.azure.com/.default" + + def __init__( + self, + scopes: list[str] | None = None, + **credential_kwargs: Any, + ): + """ + Initialize EntraAuthManager. + + Args: + scopes: List of OAuth2 scopes. Defaults to ["https://storage.azure.com/.default"]. + **credential_kwargs: Arguments passed to DefaultAzureCredential. + Supported authentication methods: + - Environment Variables: Set AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET + - Managed Identity: Works automatically on Azure; for user-assigned, pass managed_identity_client_id + - Azure CLI: Works automatically if logged in via `az login` + - Workload Identity: Works automatically in AKS with workload identity configured # codespell:ignore aks + """ + try: + from azure.identity import DefaultAzureCredential + except ImportError as e: + raise ImportError("Azure Identity library not found. Please install with: pip install pyiceberg[entra-auth]") from e + + self._scopes = scopes or [self.DEFAULT_SCOPE] + self._lock = threading.Lock() + self._token: str | None = None + self._expires_at: float = 0 + self._credential = DefaultAzureCredential(**credential_kwargs) + + def _refresh_token(self) -> None: + """Refresh the access token from Azure.""" + token = self._credential.get_token(*self._scopes) + self._token = token.token + # expires_on is a Unix timestamp; add a 60-second margin for safety + self._expires_at = token.expires_on - 60 + + def _get_token(self) -> str: + """Get a valid access token, refreshing if necessary.""" + with self._lock: + if not self._token or time.time() >= self._expires_at: + self._refresh_token() + if self._token is None: + raise ValueError("Failed to obtain Entra access token") + return self._token + + def auth_header(self) -> str: + """Return the Authorization header value with a valid Bearer token.""" + return f"Bearer {self._get_token()}" + + class AuthManagerAdapter(AuthBase): """A `requests.auth.AuthBase` adapter for integrating an `AuthManager` into a `requests.Session`. @@ -330,3 +392,4 @@ def create(cls, class_or_name: str, config: dict[str, Any]) -> AuthManager: AuthManagerFactory.register("legacyoauth2", LegacyOAuth2AuthManager) AuthManagerFactory.register("oauth2", OAuth2AuthManager) AuthManagerFactory.register("google", GoogleAuthManager) +AuthManagerFactory.register("entra", EntraAuthManager) diff --git a/tests/catalog/test_rest_auth.py b/tests/catalog/test_rest_auth.py index 2ef02ed005..80b74f7e4c 100644 --- a/tests/catalog/test_rest_auth.py +++ b/tests/catalog/test_rest_auth.py @@ -22,7 +22,7 @@ import requests from requests_mock import Mocker -from pyiceberg.catalog.rest.auth import AuthManagerAdapter, BasicAuthManager, GoogleAuthManager, NoopAuthManager +from pyiceberg.catalog.rest.auth import AuthManagerAdapter, BasicAuthManager, EntraAuthManager, GoogleAuthManager, NoopAuthManager TEST_URI = "https://iceberg-test-catalog/" GOOGLE_CREDS_URI = "https://oauth2.googleapis.com/token" @@ -153,3 +153,80 @@ def test_google_auth_manager_import_error() -> None: with patch.dict("sys.modules", {"google.auth": None, "google.auth.transport.requests": None}): with pytest.raises(ImportError, match="Google Auth libraries not found. Please install 'google-auth'."): GoogleAuthManager() + + +@patch("azure.identity.DefaultAzureCredential") +def test_entra_auth_manager_default_credential(mock_default_cred: MagicMock, rest_mock: Mocker) -> None: + """Test EntraAuthManager with DefaultAzureCredential.""" + mock_credential_instance = MagicMock() + mock_token = MagicMock() + mock_token.token = "entra_default_token" + mock_token.expires_on = 9999999999 # Far future timestamp + mock_credential_instance.get_token.return_value = mock_token + mock_default_cred.return_value = mock_credential_instance + + auth_manager = EntraAuthManager() + session = requests.Session() + session.auth = AuthManagerAdapter(auth_manager) + session.get(TEST_URI) + + mock_default_cred.assert_called_once_with() + mock_credential_instance.get_token.assert_called_once_with("https://storage.azure.com/.default") + history = rest_mock.request_history + assert len(history) == 1 + actual_headers = history[0].headers + assert actual_headers["Authorization"] == "Bearer entra_default_token" + + +@patch("azure.identity.DefaultAzureCredential") +def test_entra_auth_manager_with_managed_identity_client_id(mock_default_cred: MagicMock, rest_mock: Mocker) -> None: + """Test EntraAuthManager with managed_identity_client_id passed to DefaultAzureCredential.""" + mock_credential_instance = MagicMock() + mock_token = MagicMock() + mock_token.token = "entra_mi_token" + mock_token.expires_on = 9999999999 + mock_credential_instance.get_token.return_value = mock_token + mock_default_cred.return_value = mock_credential_instance + + auth_manager = EntraAuthManager(managed_identity_client_id="user-assigned-client-id") + session = requests.Session() + session.auth = AuthManagerAdapter(auth_manager) + session.get(TEST_URI) + + mock_default_cred.assert_called_once_with(managed_identity_client_id="user-assigned-client-id") + mock_credential_instance.get_token.assert_called_once_with("https://storage.azure.com/.default") + history = rest_mock.request_history + assert len(history) == 1 + actual_headers = history[0].headers + assert actual_headers["Authorization"] == "Bearer entra_mi_token" + + +@patch("azure.identity.DefaultAzureCredential") +def test_entra_auth_manager_custom_scopes(mock_default_cred: MagicMock, rest_mock: Mocker) -> None: + """Test EntraAuthManager with custom scopes.""" + mock_credential_instance = MagicMock() + mock_token = MagicMock() + mock_token.token = "entra_custom_scope_token" + mock_token.expires_on = 9999999999 + mock_credential_instance.get_token.return_value = mock_token + mock_default_cred.return_value = mock_credential_instance + + custom_scopes = ["https://datalake.azure.net/.default", "https://storage.azure.com/.default"] + auth_manager = EntraAuthManager(scopes=custom_scopes) + session = requests.Session() + session.auth = AuthManagerAdapter(auth_manager) + session.get(TEST_URI) + + mock_default_cred.assert_called_once_with() + mock_credential_instance.get_token.assert_called_once_with(*custom_scopes) + history = rest_mock.request_history + assert len(history) == 1 + actual_headers = history[0].headers + assert actual_headers["Authorization"] == "Bearer entra_custom_scope_token" + + +def test_entra_auth_manager_import_error() -> None: + """Test EntraAuthManager raises ImportError if azure-identity is not installed.""" + with patch.dict("sys.modules", {"azure.identity": None}): + with pytest.raises(ImportError, match="Azure Identity library not found"): + EntraAuthManager() From 5fca4ac7982c825750a034eb032e948e80a83d0b Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Tue, 27 Jan 2026 17:41:45 -0800 Subject: [PATCH 4/6] concise docs --- mkdocs/docs/configuration.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 1fbf3e798f..e590578f69 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -610,8 +610,8 @@ catalog: -!!! Note "OneLake Authentication Models" - For Authentication: You can use the `entra` auth type which leverages `DefaultAzureCredential` from the `azure.identity` package. This supports multiple authentication methods including environment variables, managed identity, Azure CLI, and interactive browser login. Install with `pip install pyiceberg[entra-auth]`. Refer to [DefaultAzureCredential overview](https://learn.microsoft.com/en-us/azure/developer/python/sdk/authentication/credential-chains?tabs=dac#defaultazurecredential-overview) for detailed documentation. +!!! Note "OneLake Authentication" + Use the `entra` auth type for Entra ID (Azure AD) authentication via [DefaultAzureCredential](https://learn.microsoft.com/en-us/azure/developer/python/sdk/authentication/credential-chains?tabs=dac#defaultazurecredential-overview), which supports environment variables, managed identity, Azure CLI, and more. Install with `pip install pyiceberg[entra-auth]`. ### SQL Catalog From 2e064ef60a77aeb2396b7580be5d7ed9a3835774 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Wed, 28 Jan 2026 18:19:37 -0800 Subject: [PATCH 5/6] add missing extras to docs --- mkdocs/docs/index.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index 86736ec045..a37f3be8b0 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -63,6 +63,9 @@ You can mix and match optional dependencies depending on your needs: | rest-sigv4 | Support for generating AWS SIGv4 authentication headers for REST Catalogs | | pyiceberg-core | Installs iceberg-rust powered core | | datafusion | Installs both PyArrow and Apache DataFusion | +| hf | Support for Hugging Face Hub | +| gcp-auth | Support for Google Cloud authentication | +| entra-auth | Support for Azure Entra authentication | You either need to install `s3fs`, `adlfs`, `gcsfs`, or `pyarrow` to be able to fetch files from an object store. From e79cb8bcb7de86badddbf3e3bf1af6a88e1bb743 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Wed, 28 Jan 2026 20:11:31 -0800 Subject: [PATCH 6/6] add test --- tests/catalog/test_rest_auth.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/catalog/test_rest_auth.py b/tests/catalog/test_rest_auth.py index 80b74f7e4c..ae5d40f5aa 100644 --- a/tests/catalog/test_rest_auth.py +++ b/tests/catalog/test_rest_auth.py @@ -230,3 +230,22 @@ def test_entra_auth_manager_import_error() -> None: with patch.dict("sys.modules", {"azure.identity": None}): with pytest.raises(ImportError, match="Azure Identity library not found"): EntraAuthManager() + + +@patch("azure.identity.DefaultAzureCredential") +def test_entra_auth_manager_token_failure(mock_default_cred: MagicMock, rest_mock: Mocker) -> None: + """Test EntraAuthManager raises exception when token acquisition fails.""" + mock_credential_instance = MagicMock() + mock_credential_instance.get_token.side_effect = Exception("Failed to acquire token") + mock_default_cred.return_value = mock_credential_instance + + auth_manager = EntraAuthManager() + session = requests.Session() + session.auth = AuthManagerAdapter(auth_manager) + + with pytest.raises(Exception, match="Failed to acquire token"): + session.get(TEST_URI) + + # Verify no requests were made with a blank/missing auth header + history = rest_mock.request_history + assert len(history) == 0