Skip to content

Commit 0618b66

Browse files
Add anon property to fsspec adls file io config to ease usage of DefaultCredential pipeline (#2661)
<!-- Thanks for opening a pull request! --> <!-- In the case this PR will resolve an issue, please replace ${GITHUB_ISSUE_ID} below with the actual Github issue id. --> <!-- Closes #${GITHUB_ISSUE_ID} --> # Rationale for this change We are using default credential pipeline to get access to Azure (more concretely, [managed identities](https://learn.microsoft.com/en-us/entra/identity/managed-identities-azure-resources/overview)). We found out that fsspec library [only allows it if we set anon=False](https://github.com/fsspec/adlfs/blob/main/adlfs/spec.py#L357-L367) and specify the account name. Thus, the anon property is added to pyiceberg config of the file io. ## Are these changes tested? We've tested that this works with the following snippet: ``` import os from fsspec import AbstractFileSystem from pyiceberg.io.fsspec import FsspecFileIO from pyiceberg.catalog.rest import RestCatalog from typing import Any ADLS_ANON = "adls.anon" ADLS_CONNECTION_STRING = "adls.connection-string" ADLS_ACCOUNT_NAME = "adls.account-name" ADLS_ACCOUNT_KEY = "adls.account-key" ADLS_SAS_TOKEN = "adls.sas-token" ADLS_TENANT_ID = "adls.tenant-id" ADLS_CLIENT_ID = "adls.client-id" ADLS_CLIENT_SECRET = "adls.client-secret" ADLS_ACCOUNT_HOST = "adls.account-host" Properties = dict[str, Any] def my_adls(properties: Properties) -> AbstractFileSystem: from adlfs import AzureBlobFileSystem for key, sas_token in { key.replace(f"{ADLS_SAS_TOKEN}.", ""): value for key, value in properties.items() if key.startswith(ADLS_SAS_TOKEN) }.items(): if ADLS_ACCOUNT_NAME not in properties: properties[ADLS_ACCOUNT_NAME] = key.split(".")[0] if ADLS_SAS_TOKEN not in properties: properties[ADLS_SAS_TOKEN] = sas_token return AzureBlobFileSystem( connection_string=properties.get(ADLS_CONNECTION_STRING), anon=properties.get(ADLS_ANON), account_name=properties.get(ADLS_ACCOUNT_NAME), account_key=properties.get(ADLS_ACCOUNT_KEY), sas_token=properties.get(ADLS_SAS_TOKEN), tenant_id=properties.get(ADLS_TENANT_ID), client_id=properties.get(ADLS_CLIENT_ID), client_secret=properties.get(ADLS_CLIENT_SECRET), account_host=properties.get(ADLS_ACCOUNT_HOST), ) injected_file_io = FsspecFileIO(properties={ADLS_ANON: False, ADLS_ACCOUNT_NAME: "usagestorageprod"}) injected_file_io.get_fs = lambda scheme: my_adls(injected_file_io.properties) CATALOG_URI = "https://lakehouse..." catalog_config = { "uri": CATALOG_URI, "properties": { "io-impl": "pyiceberg.io.fsspec.FsspecFileIO", }, ... } catalog = RestCatalog("lakehouse", **catalog_config) catalog.file_io = injected_file_io table = catalog.load_table("some_ns.some_table") table.io = injected_file_io table.scan(snapshot_id=xxx).count() ``` ## Are there any user-facing changes? Zero breaking changes <!-- In the case of user-facing changes, please add the changelog label. -->
1 parent bc5a528 commit 0618b66

File tree

3 files changed

+4
-0
lines changed

3 files changed

+4
-0
lines changed

pyiceberg/io/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
ADLS_BLOB_STORAGE_SCHEME = "adls.blob-storage-scheme"
8686
ADLS_DFS_STORAGE_SCHEME = "adls.dfs-storage-scheme"
8787
ADLS_TOKEN = "adls.token"
88+
ADLS_ANON = "adls.anon"
8889
GCS_TOKEN = "gcs.oauth2.token"
8990
GCS_TOKEN_EXPIRES_AT_MS = "gcs.oauth2.token-expires-at"
9091
GCS_PROJECT_ID = "gcs.project-id"

pyiceberg/io/fsspec.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
ADLS_ACCOUNT_HOST,
4444
ADLS_ACCOUNT_KEY,
4545
ADLS_ACCOUNT_NAME,
46+
ADLS_ANON,
4647
ADLS_CLIENT_ID,
4748
ADLS_CLIENT_SECRET,
4849
ADLS_CONNECTION_STRING,
@@ -286,6 +287,7 @@ async def get_token(self, *scopes: str, **kwargs: Any) -> AccessToken:
286287
client_id=properties.get(ADLS_CLIENT_ID),
287288
client_secret=properties.get(ADLS_CLIENT_SECRET),
288289
account_host=properties.get(ADLS_ACCOUNT_HOST),
290+
anon=properties.get(ADLS_ANON),
289291
)
290292

291293

tests/io/test_fsspec.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,7 @@ def test_adls_account_name_sas_token_extraction() -> None:
602602
client_id=None,
603603
client_secret=None,
604604
account_host="testaccount.dfs.core.windows.net",
605+
anon=None,
605606
)
606607

607608

0 commit comments

Comments
 (0)