From 9e54553abc64372e6e849a7be1c59ca2c0942655 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Mon, 27 Oct 2025 17:05:34 +0100 Subject: [PATCH 1/7] allow variable chunk sizes for zarr v3 --- xarray/backends/zarr.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index fe004c212b6..88c22661404 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -279,7 +279,7 @@ async def async_getitem(self, key): ) -def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name): +def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, zarr_format): """ Given encoding chunks (possibly None or []) and variable chunks (possibly None or []). @@ -301,6 +301,9 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name): # while dask chunks can be variable sized # https://dask.pydata.org/en/latest/array-design.html#chunks if var_chunks and not enc_chunks: + if zarr_format == 3: + return tuple(var_chunks) + if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks): raise ValueError( "Zarr requires uniform chunk sizes except for final chunk. " @@ -475,6 +478,7 @@ def extract_zarr_variable_encoding( var_chunks=variable.chunks, ndim=variable.ndim, name=name, + zarr_format=zarr_format, ) if _zarr_v3() and chunks is None: chunks = "auto" From ebfe0bee19c6f2aad0fbe818786fe1d108378234 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Mon, 27 Oct 2025 17:07:39 +0100 Subject: [PATCH 2/7] change the error message to mention zarr v2 --- xarray/backends/zarr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 88c22661404..8201e68e2f1 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -306,13 +306,13 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, zarr_format): if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks): raise ValueError( - "Zarr requires uniform chunk sizes except for final chunk. " + "Zarr v2 requires uniform chunk sizes except for final chunk. " f"Variable named {name!r} has incompatible dask chunks: {var_chunks!r}. " "Consider rechunking using `chunk()`." ) if any((chunks[0] < chunks[-1]) for chunks in var_chunks): raise ValueError( - "Final chunk of Zarr array must be the same size or smaller " + "Final chunk of a Zarr v2 array must be the same size or smaller " f"than the first. Variable named {name!r} has incompatible Dask chunks {var_chunks!r}." "Consider either rechunking using `chunk()` or instead deleting " "or modifying `encoding['chunks']`." From e2fd4c3b647b730dc81b0aea539e77ecbc008b9c Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Mon, 27 Oct 2025 17:08:51 +0100 Subject: [PATCH 3/7] infer the chunk shape(s) from the zarr v3 metadata --- xarray/backends/zarr.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 8201e68e2f1..5d6425b37a0 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -857,9 +857,18 @@ def open_store_variable(self, name): ) attributes = dict(attributes) + if hasattr(zarr_array, "metadata"): + chunk_grid = zarr_array.metadata.chunk_grid + chunks = getattr(chunk_grid, "chunk_shapes", None) + # regular chunk grid + if chunks is None: + chunks = chunk_grid.chunk_shape + else: + chunks = zarr_array.chunks + encoding = { - "chunks": zarr_array.chunks, - "preferred_chunks": dict(zip(dimensions, zarr_array.chunks, strict=True)), + "chunks": chunks, + "preferred_chunks": dict(zip(dimensions, chunks, strict=True)), } if _zarr_v3(): From c99881ca62a191eb9840596c304700611b4f19dc Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 8 Jan 2026 21:06:06 +0100 Subject: [PATCH 4/7] temporarily install from the zarr branch --- pixi.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pixi.toml b/pixi.toml index 6990777b42d..4e1195f31c3 100644 --- a/pixi.toml +++ b/pixi.toml @@ -159,7 +159,7 @@ pyarrow = { version = "*", index = "https://pypi.anaconda.org/scientific-python- dask = { git = "https://github.com/dask/dask" } distributed = { git = "https://github.com/dask/distributed" } -zarr = { git = "https://github.com/zarr-developers/zarr-python" } +zarr = { git = "https://github.com/jhamman/zarr-python", branch = "feature/rectilinear-chunk-grid" } numcodecs = { git = "https://github.com/zarr-developers/numcodecs" } cftime = { git = "https://github.com/Unidata/cftime" } # packaging = { git = "https://github.com/pypa/packaging"} #? Pixi warns if this is enabled From 52ea952d4330771862e29249ea31df27e927947d Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 8 Jan 2026 22:41:06 +0100 Subject: [PATCH 5/7] try to detect if variable chunk sizes are supported --- xarray/backends/zarr.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 3b1a19bda83..0c5abf6b007 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -45,6 +45,13 @@ from xarray.core.datatree import DataTree from xarray.core.types import ZarrArray, ZarrGroup +try: + from zarr import RectilinearChunks, RegularChunks # noqa: F401 + + has_variable_chunk_support = True +except ImportError: + has_variable_chunk_support = False + def _get_mappers(*, storage_options, store, chunk_store): # expand str and path-like arguments @@ -302,7 +309,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, zarr_format): # while dask chunks can be variable sized # https://dask.pydata.org/en/latest/array-design.html#chunks if var_chunks and not enc_chunks: - if zarr_format == 3: + if zarr_format == 3 and has_variable_chunk_support: return tuple(var_chunks) if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks): From 960a18ae7ce632b0194d10879dd191ce147f8ed2 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 8 Jan 2026 22:41:40 +0100 Subject: [PATCH 6/7] add a bit more detail to the error messages --- xarray/backends/zarr.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 0c5abf6b007..935e0207730 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -316,14 +316,17 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, zarr_format): raise ValueError( "Zarr v2 requires uniform chunk sizes except for final chunk. " f"Variable named {name!r} has incompatible dask chunks: {var_chunks!r}. " - "Consider rechunking using `chunk()`." + "Consider rechunking using `chunk()`, or switching to the " + "zarr v3 format with zarr-python>=3.2." ) if any((chunks[0] < chunks[-1]) for chunks in var_chunks): raise ValueError( - "Final chunk of a Zarr v2 array must be the same size or smaller " - f"than the first. Variable named {name!r} has incompatible Dask chunks {var_chunks!r}." - "Consider either rechunking using `chunk()` or instead deleting " - "or modifying `encoding['chunks']`." + "Final chunk of a Zarr v2 array or a Zarr v3 array without the " + "rectilinear chunks extension must be the same size or smaller " + f"than the first. Variable named {name!r} has incompatible Dask " + f"chunks {var_chunks!r}. " + "Consider switching to Zarr v3 with the rectilinear chunks extension, " + "rechunking using `chunk()` or deleting or modifying `encoding['chunks']`." ) # return the first chunk for each dimension return tuple(chunk[0] for chunk in var_chunks) From f4a01b486f320560e20b35a55b02c3d8393e2306 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 8 Jan 2026 22:42:03 +0100 Subject: [PATCH 7/7] simplify the construction of the chunks / preferred_chunks --- xarray/backends/zarr.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 935e0207730..a5c43a689ee 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -868,18 +868,12 @@ def open_store_variable(self, name): ) attributes = dict(attributes) - if hasattr(zarr_array, "metadata"): - chunk_grid = zarr_array.metadata.chunk_grid - chunks = getattr(chunk_grid, "chunk_shapes", None) - # regular chunk grid - if chunks is None: - chunks = chunk_grid.chunk_shape - else: - chunks = zarr_array.chunks + chunks = tuple(zarr_array.chunks) + preferred_chunks = dict(zip(dimensions, chunks, strict=True)) encoding = { "chunks": chunks, - "preferred_chunks": dict(zip(dimensions, chunks, strict=True)), + "preferred_chunks": preferred_chunks, } if _zarr_v3():