Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions python/zarrs/_internal.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,6 @@ import typing
import numpy.typing
import zarr.abc.store

@typing.final
class Basic:
def __new__(cls, byte_interface: typing.Any, chunk_spec: typing.Any) -> Basic: ...

@typing.final
class CodecPipelineImpl:
def __new__(
Expand Down Expand Up @@ -40,8 +36,9 @@ class CodecPipelineImpl:
class WithSubset:
def __new__(
cls,
item: Basic,
key: builtins.str,
chunk_subset: typing.Sequence[slice],
chunk_shape: typing.Sequence[builtins.int],
subset: typing.Sequence[slice],
shape: typing.Sequence[builtins.int],
) -> WithSubset: ...
2 changes: 1 addition & 1 deletion python/zarrs/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def get_codec_pipeline_impl(
num_threads=config.get("threading.max_workers", None),
direct_io=config.get("codec_pipeline.direct_io", False),
)
except TypeError as e:
except (TypeError, ValueError) as e:
if strict:
raise UnsupportedMetadataError() from e

Expand Down
6 changes: 3 additions & 3 deletions python/zarrs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from zarr.core.array_spec import ArraySpec
from zarr.core.indexing import SelectorTuple, is_integer

from zarrs._internal import Basic, WithSubset
from zarrs._internal import WithSubset

if TYPE_CHECKING:
from collections.abc import Iterable
Expand Down Expand Up @@ -178,7 +178,6 @@ def make_chunk_info_for_rust_with_indices(
chunk_spec.config,
chunk_spec.prototype,
)
chunk_info = Basic(byte_getter, chunk_spec)
out_selection_as_slices = selector_tuple_to_slice_selection(out_selection)
chunk_selection_as_slices = selector_tuple_to_slice_selection(chunk_selection)
shape_chunk_selection_slices = get_shape_for_selector(
Expand All @@ -196,8 +195,9 @@ def make_chunk_info_for_rust_with_indices(
)
chunk_info_with_indices.append(
WithSubset(
chunk_info,
key=byte_getter.path,
chunk_subset=chunk_selection_as_slices,
chunk_shape=chunk_spec.shape,
subset=out_selection_as_slices,
shape=shape,
)
Expand Down
146 changes: 28 additions & 118 deletions src/chunk_item.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,101 +2,24 @@ use std::num::NonZeroU64;

use pyo3::{
Bound, PyAny, PyErr, PyResult,
exceptions::{PyIndexError, PyRuntimeError, PyValueError},
exceptions::{PyIndexError, PyValueError},
pyclass, pymethods,
types::{PyAnyMethods, PyBytes, PyBytesMethods, PyInt, PySlice, PySliceMethods as _},
};
use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pymethods};
use zarrs::{
array::{ChunkShape, DataType, FillValue, NamedDataType},
array_subset::ArraySubset,
metadata::v3::MetadataV3,
storage::StoreKey,
};
use zarrs::{array_subset::ArraySubset, storage::StoreKey};

use crate::utils::PyErrExt;

pub(crate) trait ChunksItem {
fn key(&self) -> &StoreKey;
fn shape(&self) -> &[NonZeroU64];
fn data_type(&self) -> &DataType;
fn fill_value(&self) -> &FillValue;
}

#[derive(Clone)]
#[gen_stub_pyclass]
#[pyclass]
pub(crate) struct Basic {
key: StoreKey,
shape: ChunkShape,
data_type: DataType,
fill_value: FillValue,
}

fn fill_value_to_bytes(dtype: &str, fill_value: &Bound<'_, PyAny>) -> PyResult<Vec<u8>> {
if dtype == "string" {
// Match zarr-python 2.x.x string fill value behaviour with a 0 fill value
// See https://github.com/zarr-developers/zarr-python/issues/2792#issuecomment-2644362122
if let Ok(fill_value_downcast) = fill_value.cast::<PyInt>() {
let fill_value_usize: usize = fill_value_downcast.extract()?;
if fill_value_usize == 0 {
return Ok(vec![]);
}
Err(PyErr::new::<PyValueError, _>(format!(
"Cannot understand non-zero integer {fill_value_usize} fill value for dtype {dtype}"
)))?;
}
}

if let Ok(fill_value_downcast) = fill_value.cast::<PyBytes>() {
Ok(fill_value_downcast.as_bytes().to_vec())
} else if fill_value.hasattr("tobytes")? {
Ok(fill_value.call_method0("tobytes")?.extract()?)
} else {
Err(PyErr::new::<PyValueError, _>(format!(
"Unsupported fill value {fill_value:?}"
)))
}
}

#[gen_stub_pymethods]
#[pymethods]
impl Basic {
#[new]
fn new(byte_interface: &Bound<'_, PyAny>, chunk_spec: &Bound<'_, PyAny>) -> PyResult<Self> {
let path: String = byte_interface.getattr("path")?.extract()?;

let shape: Vec<NonZeroU64> = chunk_spec.getattr("shape")?.extract()?;

let mut dtype: String = chunk_spec
.getattr("dtype")?
.call_method0("to_native_dtype")?
.call_method0("__str__")?
.extract()?;
if dtype == "object" {
// zarrs doesn't understand `object` which is the output of `np.dtype("|O").__str__()`
// but maps it to "string" internally https://github.com/LDeakin/zarrs/blob/0532fe983b7b42b59dbf84e50a2fe5e6f7bad4ce/zarrs_metadata/src/v2_to_v3.rs#L288
dtype = String::from("string");
}
let data_type = get_data_type_from_dtype(&dtype)?;
let fill_value: Bound<'_, PyAny> = chunk_spec.getattr("fill_value")?;
let fill_value = FillValue::new(fill_value_to_bytes(&dtype, &fill_value)?);
Ok(Self {
key: StoreKey::new(path).map_py_err::<PyValueError>()?,
shape,
data_type,
fill_value,
})
}
}

#[derive(Clone)]
#[gen_stub_pyclass]
#[pyclass]
pub(crate) struct WithSubset {
pub item: Basic,
key: StoreKey,
pub chunk_subset: ArraySubset,
pub subset: ArraySubset,
shape: Vec<NonZeroU64>,
pub num_elements: u64,
}

#[gen_stub_pymethods]
Expand All @@ -105,13 +28,14 @@ impl WithSubset {
#[new]
#[allow(clippy::needless_pass_by_value)]
fn new(
item: Basic,
key: String,
chunk_subset: Vec<Bound<'_, PySlice>>,
chunk_shape: Vec<u64>,
subset: Vec<Bound<'_, PySlice>>,
shape: Vec<u64>,
) -> PyResult<Self> {
let chunk_subset = selection_to_array_subset(&chunk_subset, &item.shape)?;
let shape: Vec<NonZeroU64> = shape
let num_elements = chunk_shape.iter().product();
let shape_nonzero_u64: Vec<NonZeroU64> = shape
.into_iter()
.map(|dim| {
NonZeroU64::new(dim).ok_or_else(|| {
Expand All @@ -121,56 +45,42 @@ impl WithSubset {
})
})
.collect::<PyResult<Vec<NonZeroU64>>>()?;
let subset = selection_to_array_subset(&subset, &shape)?;
let chunk_shape_nonzero_u64: Vec<NonZeroU64> = chunk_shape
.into_iter()
.map(|dim| {
NonZeroU64::new(dim).ok_or_else(|| {
PyErr::new::<PyValueError, _>(
"subset dimensions must be greater than zero".to_string(),
)
})
})
.collect::<PyResult<Vec<NonZeroU64>>>()?;
let chunk_subset = selection_to_array_subset(&chunk_subset, &chunk_shape_nonzero_u64)?;
let subset = selection_to_array_subset(&subset, &shape_nonzero_u64)?;
// Check that subset and chunk_subset have the same number of elements.
// This permits broadcasting of a constant input.
if subset.num_elements() != chunk_subset.num_elements() && subset.num_elements() > 1 {
return Err(PyErr::new::<PyIndexError, _>(format!(
"the size of the chunk subset {chunk_subset} and input/output subset {subset} are incompatible",
)));
}

Ok(Self {
item,
key: StoreKey::new(key).map_py_err::<PyValueError>()?,
chunk_subset,
subset,
shape: chunk_shape_nonzero_u64,
num_elements,
})
}
}

impl ChunksItem for Basic {
fn key(&self) -> &StoreKey {
impl WithSubset {
pub fn key(&self) -> &StoreKey {
&self.key
}
fn shape(&self) -> &[NonZeroU64] {
pub fn shape(&self) -> &[NonZeroU64] {
&self.shape
}
fn data_type(&self) -> &DataType {
&self.data_type
}
fn fill_value(&self) -> &FillValue {
&self.fill_value
}
}

impl ChunksItem for WithSubset {
fn key(&self) -> &StoreKey {
&self.item.key
}
fn shape(&self) -> &[NonZeroU64] {
&self.item.shape
}
fn data_type(&self) -> &DataType {
&self.item.data_type
}
fn fill_value(&self) -> &FillValue {
&self.item.fill_value
}
}

fn get_data_type_from_dtype(dtype: &str) -> PyResult<DataType> {
let data_type =
NamedDataType::try_from(&MetadataV3::new(dtype)).map_py_err::<PyRuntimeError>()?;
Ok(data_type.into())
}

fn slice_to_range(slice: &Bound<'_, PySlice>, length: isize) -> PyResult<std::ops::Range<u64>> {
Expand Down
11 changes: 4 additions & 7 deletions src/concurrency.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use zarrs::array::{
concurrency::calc_concurrency_outer_inner,
};

use crate::{CodecPipelineImpl, chunk_item::ChunksItem, utils::PyCodecErrExt as _};
use crate::{CodecPipelineImpl, chunk_item::WithSubset, utils::PyCodecErrExt as _};

pub trait ChunkConcurrentLimitAndCodecOptions {
fn get_chunk_concurrent_limit_and_codec_options(
Expand All @@ -13,22 +13,19 @@ pub trait ChunkConcurrentLimitAndCodecOptions {
) -> PyResult<Option<(usize, CodecOptions)>>;
}

impl<T> ChunkConcurrentLimitAndCodecOptions for Vec<T>
where
T: ChunksItem,
{
impl ChunkConcurrentLimitAndCodecOptions for Vec<WithSubset> {
fn get_chunk_concurrent_limit_and_codec_options(
&self,
codec_pipeline_impl: &CodecPipelineImpl,
) -> PyResult<Option<(usize, CodecOptions)>> {
let num_chunks = self.len();
let Some(chunk_descriptions0) = self.first() else {
let Some(item) = self.first() else {
return Ok(None);
};

let codec_concurrency = codec_pipeline_impl
.codec_chain
.recommended_concurrency(chunk_descriptions0.shape(), chunk_descriptions0.data_type())
.recommended_concurrency(item.shape(), &codec_pipeline_impl.data_type)
.map_codec_err()?;

let min_concurrent_chunks =
Expand Down
Loading