Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@ repos:
- id: yamllint
exclude: pre-commit-config.yaml
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: "v0.14.8"
rev: "v0.14.10"
hooks:
- id: ruff-format
- id: ruff-check
- repo: https://github.com/rhysd/actionlint
rev: v1.7.9
rev: v1.7.10
hooks:
- id: actionlint
- repo: https://gitlab.com/vojko.pribudic.foss/pre-commit-update
Expand Down
26 changes: 26 additions & 0 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,29 @@ references:
JUMP (cpg0000-jump-pilot) was used to help demonstrate CytoDataFrame performance
with large data. See here for more information:
https://github.com/broadinstitute/cellpainting-gallery
- type: article
authors:
- family-names: Blin
given-names: Guillaume
- family-names: Sadurska
given-names: Dominika
- family-names: Portero Migueles
given-names: Rafael
- family-names: Chen
given-names: Ni
- family-names: Watson
given-names: James A.
- family-names: Lowell
given-names: Sally
title: "Nessys: A new set of tools for the automated detection of nuclei within intact tissues and dense 3D cultures"
journal: PLoS Biology
volume: "17"
issue: "8"
pages: e3000388
year: 2019
doi: "10.1371/journal.pbio.3000388"
url: "https://doi.org/10.1371/journal.pbio.3000388"
notes: >
This work used the file "6001240_labels.zarr" from the DISCEPTS imaging
dataset, available through the Image Data Resource (IDR) under accession
number idr0062.
86 changes: 72 additions & 14 deletions docs/src/examples/learning_to_fly_with_ome-arrow.ipynb

Large diffs are not rendered by default.

10 changes: 9 additions & 1 deletion docs/src/examples/learning_to_fly_with_ome-arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,10 @@
stack

# we can visualize the stack using pyvista for 3D rendering
stack.view(how="pyvista")
# note: we use manually specified scaling values here
# and can also default to what the image metadata provides
# with `scaling_values=None` (the default).
stack.view(how="pyvista", scaling_values=(1, 0.1, 0.1))

# here we demonstrate that the data can be exported again
# into numpy format and re-imported
Expand Down Expand Up @@ -95,3 +98,8 @@
)
# by default, the image and metadata are shown
oa_image

# read a 3d zarr image from IDR
oa_image = OMEArrow(data="../../../tests/data/idr0062A/6001240_labels.zarr")
# show the image using pyvista
oa_image.view(how="pyvista")
11 changes: 11 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,15 @@ lint.select = [
"C4",
# mccabe
"C90",
# pydocstyle (docstring presence)
"D100",
"D101",
"D102",
"D103",
"D104",
"D105",
"D106",
"D107",
# pycodestyle
"E",
# pyflakes
Expand All @@ -102,6 +111,8 @@ lint.select = [
lint.per-file-ignores."*" = [ "ANN401", "C901", "PLC0415", "PLR0912", "PLR0913", "PLR0915", "PLR2004" ]
# Ignore `F401` (unused imports) for `__init__.py` file
lint.per-file-ignores."__init__.py" = [ "F401" ]
# ignore docstring presence checks for docs
lint.per-file-ignores."docs/*" = [ "D100", "D101", "D102", "D103", "D104", "D105", "D106", "D107" ]
# ignore typing rules for tests
lint.per-file-ignores."tests/*" = [ "ANN201", "E501", "PLR0913", "PLR2004" ]

Expand Down
7 changes: 5 additions & 2 deletions src/ome_arrow/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ def view(
opacity: str | float = "sigmoid",
clim: tuple[float, float] | None = None,
show_axes: bool = True,
scaling_values: tuple[float, float, float] | None = (1.0, 0.1, 0.1),
scaling_values: tuple[float, float, float] | None = None,
) -> matplotlib.figure.Figure | "pyvista.Plotter":
"""
Render an OME-Arrow record using Matplotlib or PyVista.
Expand Down Expand Up @@ -337,7 +337,10 @@ def view(
clim: Contrast limits (``(low, high)``) for PyVista rendering.
show_axes: If ``True``, display axes in the PyVista scene.
scaling_values: Physical scale multipliers for the (x, y, z) axes used by
PyVista, typically to express anisotropy. Defaults to ``(1.0, 0.1, 0.1)``.
PyVista, typically to express anisotropy. If ``None``, uses metadata
scaling from the OME-Arrow record (pixels_meta.physical_size_x/y/z).
These scaling values will default to 1µm if metadata is missing in
source image metadata.

Returns:
matplotlib.figure.Figure | pyvista.Plotter:
Expand Down
167 changes: 151 additions & 16 deletions src/ome_arrow/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import itertools
import json
import re
import warnings
from datetime import datetime, timezone
Expand All @@ -20,6 +21,129 @@
from ome_arrow.meta import OME_ARROW_STRUCT, OME_ARROW_TAG_TYPE, OME_ARROW_TAG_VERSION


def _normalize_unit(unit: str | None) -> str | None:
if not unit:
return None
u = unit.strip().lower()
if u in {"micrometer", "micrometre", "micron", "microns", "um", "µm"}:
return "µm"
if u in {"nanometer", "nanometre", "nm"}:
return "nm"
return unit


def _read_physical_pixel_sizes(
img: BioImage,
) -> tuple[float, float, float, str | None, bool]:
pps = getattr(img, "physical_pixel_sizes", None)
if pps is None:
return 1.0, 1.0, 1.0, None, False

vx = getattr(pps, "X", None) or getattr(pps, "x", None)
vy = getattr(pps, "Y", None) or getattr(pps, "y", None)
vz = getattr(pps, "Z", None) or getattr(pps, "z", None)

if vx is None and vy is None and vz is None:
return 1.0, 1.0, 1.0, None, False

try:
psize_x = float(vx or 1.0)
psize_y = float(vy or 1.0)
psize_z = float(vz or 1.0)
except Exception:
return 1.0, 1.0, 1.0, None, False

unit = getattr(pps, "unit", None) or getattr(pps, "units", None)
unit = _normalize_unit(str(unit)) if unit is not None else None

return psize_x, psize_y, psize_z, unit, True


def _load_zarr_attrs(zarr_path: Path) -> dict:
zarr_json = zarr_path / "zarr.json"
if zarr_json.exists():
try:
data = json.loads(zarr_json.read_text())
return data.get("attributes") or data.get("attrs") or {}
except Exception:
return {}
zattrs = zarr_path / ".zattrs"
if zattrs.exists():
try:
return json.loads(zattrs.read_text())
except Exception:
return {}
return {}


def _extract_multiscales(attrs: dict) -> list[dict]:
if not isinstance(attrs, dict):
return []
ome = attrs.get("ome")
if isinstance(ome, dict) and isinstance(ome.get("multiscales"), list):
return ome["multiscales"]
if isinstance(attrs.get("multiscales"), list):
return attrs["multiscales"]
return []


def _read_ngff_scale(zarr_path: Path) -> tuple[float, float, float, str | None] | None:
zarr_root = zarr_path
for parent in [zarr_path, *list(zarr_path.parents)]:
if parent.suffix.lower() in {".zarr", ".ome.zarr"}:
zarr_root = parent
break

for candidate in (zarr_path, zarr_root):
attrs = _load_zarr_attrs(candidate)
multiscales = _extract_multiscales(attrs)
if multiscales:
break
else:
return None

ms = multiscales[0]
axes = ms.get("axes") or []
datasets = ms.get("datasets") or []
if not axes or not datasets:
return None

ds = next((d for d in datasets if str(d.get("path")) == "0"), datasets[0])
cts = ds.get("coordinateTransformations") or []
scale_ct = next((ct for ct in cts if ct.get("type") == "scale"), None)
if not scale_ct:
return None

scale = scale_ct.get("scale") or []
if len(scale) != len(axes):
return None

axis_scale: dict[str, float] = {}
axis_unit: dict[str, str] = {}
for i, ax in enumerate(axes):
name = str(ax.get("name", "")).lower()
if name in {"x", "y", "z"}:
try:
axis_scale[name] = float(scale[i])
except Exception:
continue
unit = _normalize_unit(ax.get("unit"))
if unit:
axis_unit[name] = unit

if not axis_scale:
return None

psize_x = axis_scale.get("x", 1.0)
psize_y = axis_scale.get("y", 1.0)
psize_z = axis_scale.get("z", 1.0)

units = [axis_unit.get(a) for a in ("x", "y", "z") if axis_unit.get(a)]
unit = units[0] if units and len(set(units)) == 1 else None

return psize_x, psize_y, psize_z, unit


def to_ome_arrow(
type_: str = OME_ARROW_TAG_TYPE,
version: str = OME_ARROW_TAG_VERSION,
Expand Down Expand Up @@ -338,13 +462,8 @@ def from_tiff(
if size_x <= 0 or size_y <= 0:
raise ValueError("Image must have positive Y and X dims.")

pps = getattr(img, "physical_pixel_sizes", None)
try:
psize_x = float(getattr(pps, "X", None) or 1.0)
psize_y = float(getattr(pps, "Y", None) or 1.0)
psize_z = float(getattr(pps, "Z", None) or 1.0)
except Exception:
psize_x = psize_y = psize_z = 1.0
psize_x, psize_y, psize_z, unit, _pps_valid = _read_physical_pixel_sizes(img)
psize_unit = unit or "µm"

# --- NEW: coerce top-level strings --------------------------------
img_id = str(image_id or p.stem)
Expand Down Expand Up @@ -394,7 +513,7 @@ def from_tiff(
physical_size_x=psize_x,
physical_size_y=psize_y,
physical_size_z=psize_z,
physical_size_unit="µm",
physical_size_unit=psize_unit,
channels=channels,
planes=planes,
masks=None,
Expand All @@ -410,6 +529,20 @@ def from_stack_pattern_path(
image_id: Optional[str] = None,
name: Optional[str] = None,
) -> pa.StructScalar:
"""Build an OME-Arrow record from a filename pattern describing a stack.

Args:
pattern_path: Path or pattern string describing the stack layout.
default_dim_for_unspecified: Dimension to use when tokens lack a dim.
map_series_to: Dimension to map series tokens to (e.g., "T"), or None.
clamp_to_uint16: Whether to clamp pixel values to uint16.
channel_names: Optional list of channel names to apply.
image_id: Optional image identifier override.
name: Optional display name override.

Returns:
A validated OME-Arrow StructScalar describing the stack.
"""
path = Path(pattern_path)
folder = path.parent
line = path.name.strip()
Expand Down Expand Up @@ -741,13 +874,15 @@ def from_ome_zarr(
if size_x <= 0 or size_y <= 0:
raise ValueError("Image must have positive Y and X dimensions.")

pps = getattr(img, "physical_pixel_sizes", None)
try:
psize_x = float(getattr(pps, "X", None) or 1.0)
psize_y = float(getattr(pps, "Y", None) or 1.0)
psize_z = float(getattr(pps, "Z", None) or 1.0)
except Exception:
psize_x = psize_y = psize_z = 1.0
psize_x, psize_y, psize_z, unit, pps_valid = _read_physical_pixel_sizes(img)
psize_unit = unit or "µm"

if not pps_valid:
ngff_scale = _read_ngff_scale(p)
if ngff_scale is not None:
psize_x, psize_y, psize_z, unit = ngff_scale
if unit:
psize_unit = unit

img_id = str(image_id or p.stem)
display_name = str(name or p.name)
Expand Down Expand Up @@ -805,7 +940,7 @@ def from_ome_zarr(
physical_size_x=psize_x,
physical_size_y=psize_y,
physical_size_z=psize_z,
physical_size_unit="µm",
physical_size_unit=psize_unit,
channels=channels,
planes=planes,
masks=None,
Expand Down
17 changes: 17 additions & 0 deletions src/ome_arrow/view.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,23 @@ def view_matplotlib(
cmap: str = "gray",
show: bool = True,
) -> tuple[Figure, Axes, AxesImage]:
"""Render a single (t, c, z) plane with Matplotlib.

Args:
data: OME-Arrow row or dict containing pixels_meta and planes.
tcz: (t, c, z) indices of the plane to render.
autoscale: If True, infer vmin/vmax from the image data.
vmin: Explicit lower display limit for intensity scaling.
vmax: Explicit upper display limit for intensity scaling.
cmap: Matplotlib colormap name.
show: Whether to display the plot immediately.

Returns:
A tuple of (figure, axes, image) from Matplotlib.

Raises:
ValueError: If the requested plane is missing or pixel sizes mismatch.
"""
if isinstance(data, pa.StructScalar):
data = data.as_py()

Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
1 change: 1 addition & 0 deletions tests/data/idr0062A/6001240_labels.zarr/0/zarr.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"chunk_grid":{"configuration":{"chunk_shape":[1,10,512,512]},"name":"regular"},"chunk_key_encoding":{"name":"default"},"codecs":[{"configuration":{"chunk_shape":[1,1,256,256],"codecs":[{"configuration":{"endian":"little"},"name":"bytes"},{"configuration":{"blocksize":0,"clevel":5,"cname":"zstd","shuffle":"shuffle","typesize":2},"name":"blosc"}],"index_codecs":[{"configuration":{"endian":"little"},"name":"bytes"},{"name":"crc32c"}]},"name":"sharding_indexed"}],"data_type":"uint16","dimension_names":["c","z","y","x"],"fill_value":0,"node_type":"array","shape":[2,236,275,271],"zarr_format":3}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
1 change: 1 addition & 0 deletions tests/data/idr0062A/6001240_labels.zarr/1/zarr.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"chunk_grid":{"configuration":{"chunk_shape":[1,10,512,512]},"name":"regular"},"chunk_key_encoding":{"name":"default"},"codecs":[{"configuration":{"chunk_shape":[1,1,256,256],"codecs":[{"configuration":{"endian":"little"},"name":"bytes"},{"configuration":{"blocksize":0,"clevel":5,"cname":"zstd","shuffle":"shuffle","typesize":2},"name":"blosc"}],"index_codecs":[{"configuration":{"endian":"little"},"name":"bytes"},{"name":"crc32c"}]},"name":"sharding_indexed"}],"data_type":"uint16","dimension_names":["c","z","y","x"],"fill_value":0,"node_type":"array","shape":[2,236,137,135],"zarr_format":3}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
1 change: 1 addition & 0 deletions tests/data/idr0062A/6001240_labels.zarr/2/zarr.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"chunk_grid":{"configuration":{"chunk_shape":[1,10,512,512]},"name":"regular"},"chunk_key_encoding":{"name":"default"},"codecs":[{"configuration":{"chunk_shape":[1,1,256,256],"codecs":[{"configuration":{"endian":"little"},"name":"bytes"},{"configuration":{"blocksize":0,"clevel":5,"cname":"zstd","shuffle":"shuffle","typesize":2},"name":"blosc"}],"index_codecs":[{"configuration":{"endian":"little"},"name":"bytes"},{"name":"crc32c"}]},"name":"sharding_indexed"}],"data_type":"uint16","dimension_names":["c","z","y","x"],"fill_value":0,"node_type":"array","shape":[2,236,68,67],"zarr_format":3}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{
"@context": [
"https://w3id.org/ro/crate/1.1/context",
{
"organism_classification": "https://schema.org/taxonomicRange",
"BioChemEntity": "https://schema.org/BioChemEntity",
"channel": "https://www.openmicroscopy.org/Schemas/Documentation/Generated/OME-2016-06/ome_xsd.html#Channel",
"obo": "http://purl.obolibrary.org/obo/",
"FBcv": "http://ontobee.org/ontology/FBcv/",
"acquisiton_method": {
"@reverse": "https://schema.org/result",
"@type": "@id"
},
"biological_entity": "https://schema.org/about",
"biosample": "http://purl.obolibrary.org/obo/OBI_0002648",
"preparation_method": "https://www.wikidata.org/wiki/Property:P1537",
"specimen": "http://purl.obolibrary.org/obo/HSO_0000308"
}
],
"@graph": [
{
"@id": "./",
"@type": "Dataset",
"name": "dataset name",
"description": "dataset description",
"licence": "https://creativecommons.org/licenses/by/4.0/"
},
{
"@id": "ro-crate-metadata.json",
"@type": "CreativeWork",
"conformsTo": {
"@id": "https://w3id.org/ro/crate/1.1"
},
"about": {
"@id": "./"
}
}
]
}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"chunk_grid":{"configuration":{"chunk_shape":[1,10,512,512]},"name":"regular"},"chunk_key_encoding":{"name":"default"},"codecs":[{"configuration":{"chunk_shape":[1,1,256,256],"codecs":[{"name":"bytes"},{"configuration":{"blocksize":0,"clevel":5,"cname":"zstd","shuffle":"bitshuffle","typesize":1},"name":"blosc"}],"index_codecs":[{"configuration":{"endian":"little"},"name":"bytes"},{"name":"crc32c"}]},"name":"sharding_indexed"}],"data_type":"int8","dimension_names":["c","z","y","x"],"fill_value":0,"node_type":"array","shape":[1,236,275,271],"zarr_format":3}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading