From 90ab6e19a937c6ef895d61aaa43f96e4db193d93 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Thu, 18 Dec 2025 15:01:45 -0500 Subject: [PATCH 1/4] Cache the result of the zero-check Closes https://github.com/zarr-developers/zarr-python/issues/3627 --- changes/3628.misc.md | 1 + src/zarr/core/codec_pipeline.py | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 changes/3628.misc.md diff --git a/changes/3628.misc.md b/changes/3628.misc.md new file mode 100644 index 0000000000..0aa706e5cd --- /dev/null +++ b/changes/3628.misc.md @@ -0,0 +1 @@ +Avoid reading lazy arrays or on device arrays twice when comparing them to 0 during the writing process. diff --git a/src/zarr/core/codec_pipeline.py b/src/zarr/core/codec_pipeline.py index fd557ac43e..e77526e5b8 100644 --- a/src/zarr/core/codec_pipeline.py +++ b/src/zarr/core/codec_pipeline.py @@ -413,6 +413,12 @@ async def _read_key( if chunk_array is None: chunk_array_batch.append(None) # type: ignore[unreachable] else: + # The operation array_equal operation below effectively will force the array + # into memory. + # if the result is useful, we want to avoid reading it twice + # from a potentially lazy operation. So we cache it here. + # If the result is not useful, we leave it for the garbage collector. + chunk_array._data = chunk_array.as_numpy_array() if not chunk_spec.config.write_empty_chunks and chunk_array.all_equal( fill_value_or_default(chunk_spec) ): From afb8fffc4ddfb376001099013e937f2ffd4fc47a Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Thu, 1 Jan 2026 18:37:22 -0500 Subject: [PATCH 2/4] Update for improved GPU support --- src/zarr/core/codec_pipeline.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/src/zarr/core/codec_pipeline.py b/src/zarr/core/codec_pipeline.py index e77526e5b8..53d0c5aa09 100644 --- a/src/zarr/core/codec_pipeline.py +++ b/src/zarr/core/codec_pipeline.py @@ -5,6 +5,8 @@ from typing import TYPE_CHECKING, Any, TypeVar from warnings import warn +import numpy as np + from zarr.abc.codec import ( ArrayArrayCodec, ArrayBytesCodec, @@ -19,6 +21,7 @@ from zarr.core.indexing import SelectorTuple, is_scalar from zarr.errors import ZarrUserWarning from zarr.registry import register_pipeline +from zarr.core.buffer import NDBuffer if TYPE_CHECKING: from collections.abc import Iterable, Iterator @@ -26,7 +29,7 @@ from zarr.abc.store import ByteGetter, ByteSetter from zarr.core.array_spec import ArraySpec - from zarr.core.buffer import Buffer, BufferPrototype, NDBuffer + from zarr.core.buffer import Buffer, BufferPrototype from zarr.core.chunk_grids import ChunkGrid from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType @@ -413,18 +416,21 @@ async def _read_key( if chunk_array is None: chunk_array_batch.append(None) # type: ignore[unreachable] else: - # The operation array_equal operation below effectively will force the array - # into memory. - # if the result is useful, we want to avoid reading it twice - # from a potentially lazy operation. So we cache it here. - # If the result is not useful, we leave it for the garbage collector. - chunk_array._data = chunk_array.as_numpy_array() - if not chunk_spec.config.write_empty_chunks and chunk_array.all_equal( - fill_value_or_default(chunk_spec) - ): - chunk_array_batch.append(None) - else: - chunk_array_batch.append(chunk_array) + if not chunk_spec.config.write_empty_chunks: + # The operation array_equal operation below effectively will force the array + # into memory. + # if the result is useful, we want to avoid reading it twice + # from a potentially lazy operation. So we cache it here. + # If the result is not useful, we leave it for the garbage collector. + # We optimize this operation for the case that the GPU + if not hasattr(chunk_array._data, '__cuda_array_interface__'): + chunk_array = NDBuffer(np.asarray(chunk_array._data)) + + if chunk_array.all_equal( + fill_value_or_default(chunk_spec) + ): + chunk_array = None + chunk_array_batch.append(chunk_array) chunk_bytes_batch = await self.encode_batch( [ From db37fed6ef8630e5f8002f8acdc8323eea9e433e Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Fri, 2 Jan 2026 08:16:16 -0500 Subject: [PATCH 3/4] try again --- src/zarr/core/codec_pipeline.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/zarr/core/codec_pipeline.py b/src/zarr/core/codec_pipeline.py index 53d0c5aa09..f612105dd3 100644 --- a/src/zarr/core/codec_pipeline.py +++ b/src/zarr/core/codec_pipeline.py @@ -416,7 +416,9 @@ async def _read_key( if chunk_array is None: chunk_array_batch.append(None) # type: ignore[unreachable] else: - if not chunk_spec.config.write_empty_chunks: + if chunk_spec.config.write_empty_chunks: + chunk_array_batch.append(chunk_array) + else: # The operation array_equal operation below effectively will force the array # into memory. # if the result is useful, we want to avoid reading it twice @@ -429,8 +431,9 @@ async def _read_key( if chunk_array.all_equal( fill_value_or_default(chunk_spec) ): - chunk_array = None - chunk_array_batch.append(chunk_array) + chunk_array_batch.append(None) + else: + chunk_array_batch.append(chunk_array) chunk_bytes_batch = await self.encode_batch( [ From eeedcccea263c28cc824b98f07d25c541a9efb90 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Fri, 2 Jan 2026 09:01:31 -0500 Subject: [PATCH 4/4] hack --- src/zarr/core/codec_pipeline.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/zarr/core/codec_pipeline.py b/src/zarr/core/codec_pipeline.py index f612105dd3..a9f1cb8938 100644 --- a/src/zarr/core/codec_pipeline.py +++ b/src/zarr/core/codec_pipeline.py @@ -426,7 +426,11 @@ async def _read_key( # If the result is not useful, we leave it for the garbage collector. # We optimize this operation for the case that the GPU if not hasattr(chunk_array._data, '__cuda_array_interface__'): - chunk_array = NDBuffer(np.asarray(chunk_array._data)) + # I'm not sure why this implementation doesn't work + # it seems like something is getting missed by me + # chunk_array = NDBuffer(np.asarray(chunk_array._data)) + # This line here just feels more dirty + chunk_array._data = np.asarray(chunk_array._data) if chunk_array.all_equal( fill_value_or_default(chunk_spec)