From 9f8bce722e243a1a492a17d06bbdf55c64cafb8e Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 15 Apr 2026 09:02:13 +0200 Subject: [PATCH 1/3] perf: cache default ArraySpec for regular chunk grids MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For regular grids, all chunks have the same codec_shape, so we can build the ArraySpec once and reuse it for every chunk — avoiding the per-chunk ChunkGrid.__getitem__ + ArraySpec construction overhead. Adds _get_default_chunk_spec() and uses it in _get_selection and _set_selection. Saves ~5ms per 1000 chunks. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/zarr/core/array.py | 43 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 4736805b9d..0f6531fdcc 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -5778,6 +5778,37 @@ def _get_chunk_spec( ) +def _get_default_chunk_spec( + metadata: ArrayMetadata, + chunk_grid: ChunkGrid, + array_config: ArrayConfig, + prototype: BufferPrototype, +) -> ArraySpec | None: + """Build an ArraySpec for the regular (non-edge) chunk shape, or None if not regular. + + For regular grids, all chunks have the same codec_shape, so we can + build the ArraySpec once and reuse it for every chunk — avoiding the + per-chunk ChunkGrid.__getitem__ + ArraySpec construction overhead. + + .. note:: + Ideally the per-chunk ArraySpec would not exist at all: dtype, + fill_value, config, and prototype are constant across chunks — + only the shape varies (and only for edge chunks). A cleaner + design would pass a single ArraySpec plus a per-chunk shape + override, which ChunkTransform.decode_chunk already supports + via its ``chunk_shape`` parameter. + """ + if chunk_grid.is_regular: + return ArraySpec( + shape=chunk_grid.chunk_shape, + dtype=metadata.dtype, + fill_value=metadata.fill_value, + config=array_config, + prototype=prototype, + ) + return None + + async def _get_selection( store_path: StorePath, metadata: ArrayMetadata, @@ -5857,11 +5888,16 @@ async def _get_selection( # reading chunks and decoding them indexed_chunks = list(indexer) + # Pre-compute the default chunk spec for regular grids to avoid + # per-chunk ChunkGrid lookups and ArraySpec construction. + default_spec = _get_default_chunk_spec(metadata, chunk_grid, _config, prototype) results = await codec_pipeline.read( [ ( store_path / metadata.encode_chunk_key(chunk_coords), - _get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype), + default_spec + if default_spec is not None + else _get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype), chunk_selection, out_selection, is_complete_chunk, @@ -6200,11 +6236,14 @@ async def _set_selection( _config = replace(_config, order=order) # merging with existing data and encoding chunks + default_spec = _get_default_chunk_spec(metadata, chunk_grid, _config, prototype) await codec_pipeline.write( [ ( store_path / metadata.encode_chunk_key(chunk_coords), - _get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype), + default_spec + if default_spec is not None + else _get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype), chunk_selection, out_selection, is_complete_chunk, From 0cb71977c3957b05e7a4d14bfc3522f5c2db047e Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 15 Apr 2026 11:26:35 +0200 Subject: [PATCH 2/3] docs: changelog --- changes/3908.misc.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/3908.misc.md diff --git a/changes/3908.misc.md b/changes/3908.misc.md new file mode 100644 index 0000000000..66717e8444 --- /dev/null +++ b/changes/3908.misc.md @@ -0,0 +1 @@ +Reuse a constant `ArraySpec` during indexing when possible. \ No newline at end of file From ff088912c80cf66ddecd7aa90008f8e584595b8a Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 6 May 2026 15:01:55 -0400 Subject: [PATCH 3/3] refactor: inline creation of default arrayspec --- src/zarr/core/array.py | 65 +++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 39 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index e04e112796..04de4185ea 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -5370,37 +5370,6 @@ def _get_chunk_spec( ) -def _get_default_chunk_spec( - metadata: ArrayMetadata, - chunk_grid: ChunkGrid, - array_config: ArrayConfig, - prototype: BufferPrototype, -) -> ArraySpec | None: - """Build an ArraySpec for the regular (non-edge) chunk shape, or None if not regular. - - For regular grids, all chunks have the same codec_shape, so we can - build the ArraySpec once and reuse it for every chunk — avoiding the - per-chunk ChunkGrid.__getitem__ + ArraySpec construction overhead. - - .. note:: - Ideally the per-chunk ArraySpec would not exist at all: dtype, - fill_value, config, and prototype are constant across chunks — - only the shape varies (and only for edge chunks). A cleaner - design would pass a single ArraySpec plus a per-chunk shape - override, which ChunkTransform.decode_chunk already supports - via its ``chunk_shape`` parameter. - """ - if chunk_grid.is_regular: - return ArraySpec( - shape=chunk_grid.chunk_shape, - dtype=metadata.dtype, - fill_value=metadata.fill_value, - config=array_config, - prototype=prototype, - ) - return None - - async def _get_selection( store_path: StorePath, metadata: ArrayMetadata, @@ -5480,15 +5449,23 @@ async def _get_selection( # reading chunks and decoding them indexed_chunks = list(indexer) - # Pre-compute the default chunk spec for regular grids to avoid - # per-chunk ChunkGrid lookups and ArraySpec construction. - default_spec = _get_default_chunk_spec(metadata, chunk_grid, _config, prototype) + # For regular grids, all chunks share the same ArraySpec, so build it once + # and reuse it to avoid per-chunk ChunkGrid lookups and ArraySpec construction. + regular_grid = chunk_grid.is_regular + if regular_grid: + regular_chunk_spec = ArraySpec( + shape=chunk_grid.chunk_shape, + dtype=metadata.dtype, + fill_value=metadata.fill_value, + config=_config, + prototype=prototype, + ) results = await codec_pipeline.read( [ ( store_path / metadata.encode_chunk_key(chunk_coords), - default_spec - if default_spec is not None + regular_chunk_spec + if regular_grid else _get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype), chunk_selection, out_selection, @@ -5828,13 +5805,23 @@ async def _set_selection( _config = replace(_config, order=order) # merging with existing data and encoding chunks - default_spec = _get_default_chunk_spec(metadata, chunk_grid, _config, prototype) + # For regular grids, all chunks share the same ArraySpec, so build it once + # and reuse it to avoid per-chunk ChunkGrid lookups and ArraySpec construction. + regular_grid = chunk_grid.is_regular + if regular_grid: + regular_chunk_spec = ArraySpec( + shape=chunk_grid.chunk_shape, + dtype=metadata.dtype, + fill_value=metadata.fill_value, + config=_config, + prototype=prototype, + ) await codec_pipeline.write( [ ( store_path / metadata.encode_chunk_key(chunk_coords), - default_spec - if default_spec is not None + regular_chunk_spec + if regular_grid else _get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype), chunk_selection, out_selection,