diff --git a/docs/history.rst b/docs/history.rst index 53313e49..10315139 100644 --- a/docs/history.rst +++ b/docs/history.rst @@ -3,6 +3,7 @@ History Latest ------ +- ENH: Add read support for Zarr spatial and proj conventions (pull #900) - ENH: Add `convention` option to `set_options()` for future multi-convention support (pull #899) - REF: Extract CF convention logic to `_convention/cf.py` module (pull #899) diff --git a/rioxarray/_convention/_core.py b/rioxarray/_convention/_core.py index 3169eceb..462f860f 100644 --- a/rioxarray/_convention/_core.py +++ b/rioxarray/_convention/_core.py @@ -10,13 +10,15 @@ from rioxarray._convention._base import ConventionProtocol from rioxarray._convention.cf import CFConvention +from rioxarray._convention.zarr import ZarrConvention from rioxarray._options import CONVENTION, get_option from rioxarray.crs import crs_from_user_input from rioxarray.enum import Convention # Convention classes mapped by Convention enum _CONVENTION_MODULES: dict[Convention, ConventionProtocol] = { - Convention.CF: CFConvention # type: ignore[dict-item] + Convention.CF: CFConvention, # type: ignore[dict-item] + Convention.ZARR: ZarrConvention, # type: ignore[dict-item] } diff --git a/rioxarray/_convention/zarr.py b/rioxarray/_convention/zarr.py new file mode 100644 index 00000000..0784ad54 --- /dev/null +++ b/rioxarray/_convention/zarr.py @@ -0,0 +1,336 @@ +""" +Zarr spatial and proj convention support for rioxarray. + +This module provides functions for reading geospatial metadata according to: +- Zarr spatial convention: https://github.com/zarr-conventions/spatial +- Zarr geo-proj convention: https://github.com/zarr-conventions/geo-proj +""" +from typing import Optional, Union + +import rasterio.crs +import xarray +from affine import Affine + +from rioxarray.crs import crs_from_user_input + +# Convention identifiers +PROJ_CONVENTION = { + "schema_url": "https://raw.githubusercontent.com/zarr-conventions/geo-proj/refs/tags/v1/schema.json", + "spec_url": "https://github.com/zarr-conventions/geo-proj/blob/v1/README.md", + "uuid": "f17cb550-5864-4468-aeb7-f3180cfb622f", + "name": "proj:", + "description": "Coordinate reference system information for geospatial data", +} + +SPATIAL_CONVENTION = { + "schema_url": "https://raw.githubusercontent.com/zarr-conventions/spatial/refs/tags/v1/schema.json", + "spec_url": "https://github.com/zarr-conventions/spatial/blob/v1/README.md", + "uuid": "689b58e2-cf7b-45e0-9fff-9cfc0883d6b4", + "name": "spatial:", + "description": "Spatial coordinate information", +} + + +def has_convention_declared(attrs: dict, convention_name: str) -> bool: + """ + Check if a specific convention is declared in zarr_conventions. + + Parameters + ---------- + attrs : dict + Attributes dictionary to check + convention_name : str + Name of convention to check for (e.g., "proj:" or "spatial:") + + Returns + ------- + bool + True if convention is declared + """ + zarr_conventions = attrs.get("zarr_conventions", []) + if not isinstance(zarr_conventions, list): + return False + + for convention in zarr_conventions: + if isinstance(convention, dict) and convention.get("name") == convention_name: + return True + return False + + +def get_declared_conventions(attrs: dict) -> set: + """ + Get set of declared convention names from attrs. + + Parameters + ---------- + attrs : dict + Attributes dictionary to check + + Returns + ------- + set + Set of declared convention names (e.g., {"proj:", "spatial:"}) + """ + zarr_conventions = attrs.get("zarr_conventions", []) + if not isinstance(zarr_conventions, list): + return set() + + declared = set() + for convention in zarr_conventions: + if isinstance(convention, dict) and "name" in convention: + declared.add(convention["name"]) + + return declared + + +# ============================================================================ +# Parsing utilities +# ============================================================================ + + +def parse_spatial_transform( + spatial_transform: Union[list, tuple], +) -> Optional[Affine]: + """ + Convert spatial:transform array to Affine object. + + Parameters + ---------- + spatial_transform : list or tuple + Transform as [a, b, c, d, e, f] array + + Returns + ------- + affine.Affine or None + Affine transform object, or None if invalid + """ + if not isinstance(spatial_transform, (list, tuple)): + return None + if len(spatial_transform) != 6: + return None + try: + return Affine(*spatial_transform) + except (TypeError, ValueError): + return None + + +# ============================================================================ +# Internal parsing helpers +# ============================================================================ + + +def _parse_crs_from_attrs( + attrs: dict, convention_check: bool = True +) -> Optional[rasterio.crs.CRS]: + """ + Parse CRS from proj: attributes with fallback priority. + + Parameters + ---------- + attrs : dict + Attributes dictionary to parse from + convention_check : bool, default True + Whether to check for convention declaration + + Returns + ------- + rasterio.crs.CRS or None + Parsed CRS object, or None if not found + """ + if convention_check and not has_convention_declared(attrs, "proj:"): + return None + + # Try proj attributes in priority order: wkt2, code, projjson + for proj_attr in ("proj:wkt2", "proj:code", "proj:projjson"): + try: + proj_value = attrs.get(proj_attr) + if proj_value is not None: + parsed_crs = crs_from_user_input(proj_value) + if parsed_crs is not None: + return parsed_crs + except (KeyError, TypeError, ValueError): + pass + return None + + +def _parse_transform_from_attrs( + attrs: dict, convention_check: bool = True +) -> Optional[Affine]: + """ + Parse transform from spatial: attributes. + + Parameters + ---------- + attrs : dict + Attributes dictionary to parse from + convention_check : bool, default True + Whether to check for convention declaration + + Returns + ------- + affine.Affine or None + Parsed transform object, or None if not found + """ + if convention_check and not has_convention_declared(attrs, "spatial:"): + return None + + try: + spatial_transform = attrs.get("spatial:transform") + if spatial_transform is not None: + return parse_spatial_transform(spatial_transform) + except (KeyError, TypeError, ValueError): + pass + return None + + +# ============================================================================ +# ZarrConvention class implementing ConventionProtocol +# ============================================================================ + + +class ZarrConvention: + """Zarr convention class implementing ConventionProtocol.""" + + @classmethod + def read_crs( + cls, obj: Union[xarray.Dataset, xarray.DataArray] + ) -> Optional[rasterio.crs.CRS]: + """ + Read CRS from Zarr proj: convention. + + Parameters + ---------- + obj : xarray.Dataset or xarray.DataArray + Object to read CRS from + + Returns + ------- + rasterio.crs.CRS or None + CRS object, or None if not found + """ + return _parse_crs_from_attrs(obj.attrs) + + @classmethod + def read_transform( + cls, obj: Union[xarray.Dataset, xarray.DataArray] + ) -> Optional[Affine]: + """ + Read transform from Zarr spatial: convention. + + Parameters + ---------- + obj : xarray.Dataset or xarray.DataArray + Object to read transform from + + Returns + ------- + affine.Affine or None + Transform object, or None if not found + """ + return _parse_transform_from_attrs(obj.attrs) + + @classmethod + def read_spatial_dimensions( + cls, obj: Union[xarray.Dataset, xarray.DataArray] + ) -> Optional[tuple[str, str]]: + """ + Read spatial dimensions from Zarr spatial: convention. + + Parameters + ---------- + obj : xarray.Dataset or xarray.DataArray + Object to read spatial dimensions from + + Returns + ------- + tuple of (y_dim, x_dim) or None + Tuple of dimension names, or None if not found + """ + # Only interpret spatial:* attributes if convention is declared + if not has_convention_declared(obj.attrs, "spatial:"): + return None + + try: + spatial_dims = obj.attrs.get("spatial:dimensions") + if spatial_dims is not None and len(spatial_dims) >= 2: + # spatial:dimensions format is ["y", "x"] or similar + y_dim_name, x_dim_name = spatial_dims[-2:] # Take last two + if y_dim_name in obj.dims and x_dim_name in obj.dims: + return y_dim_name, x_dim_name + except (KeyError, TypeError, ValueError): + pass + + return None + + @classmethod + def write_crs( + cls, + obj: Union[xarray.Dataset, xarray.DataArray], + crs: rasterio.crs.CRS, + **kwargs, # pylint: disable=unused-argument + ) -> Union[xarray.Dataset, xarray.DataArray]: + """ + Write CRS using Zarr conventions. + + Note: Writing support will be implemented in a future PR. + + Parameters + ---------- + obj : xarray.Dataset or xarray.DataArray + Object to write CRS to + crs : rasterio.crs.CRS + CRS to write + **kwargs + Additional convention-specific parameters + + Returns + ------- + xarray.Dataset or xarray.DataArray + Object with CRS written + + Raises + ------ + NotImplementedError + Zarr write support is not yet implemented + """ + raise NotImplementedError( + "Zarr CRS writing is not yet implemented. " + "Use Convention.CF for writing or wait for a future release." + ) + + @classmethod + def write_transform( + cls, + obj: Union[xarray.Dataset, xarray.DataArray], + *, + transform: Affine, + **kwargs, # pylint: disable=unused-argument + ) -> Union[xarray.Dataset, xarray.DataArray]: + """ + Write transform using Zarr conventions. + + Note: Writing support will be implemented in a future PR. + + Parameters + ---------- + obj : xarray.Dataset or xarray.DataArray + Object to write transform to + transform : affine.Affine + Transform to write + **kwargs + Additional convention-specific parameters + + Returns + ------- + xarray.Dataset or xarray.DataArray + Object with transform written + + Raises + ------ + NotImplementedError + Zarr write support is not yet implemented + """ + raise NotImplementedError( + "Zarr transform writing is not yet implemented. " + "Use Convention.CF for writing or wait for a future release." + ) diff --git a/rioxarray/enum.py b/rioxarray/enum.py index 33cf7fd0..93cb83e1 100644 --- a/rioxarray/enum.py +++ b/rioxarray/enum.py @@ -10,6 +10,7 @@ class Convention(Enum): Currently supported: - CF: Climate and Forecasts convention using grid_mapping coordinates + - Zarr: Zarr spatial and proj conventions for cloud-native formats The convention can be set globally using set_options() or per-method using the convention parameter. @@ -34,8 +35,15 @@ class Convention(Enum): References ---------- .. [1] CF Conventions: https://github.com/cf-convention/cf-conventions + .. [2] Zarr spatial convention: https://github.com/zarr-conventions/spatial + .. [3] Zarr geo-proj convention: https://github.com/zarr-conventions/geo-proj """ #: Climate and Forecasts convention (default) #: https://github.com/cf-convention/cf-conventions CF = "CF" + + #: Zarr spatial and proj conventions + #: https://github.com/zarr-conventions/spatial + #: https://github.com/zarr-conventions/geo-proj + ZARR = "ZARR" diff --git a/test/integration/test_integration_zarr_conventions.py b/test/integration/test_integration_zarr_conventions.py new file mode 100644 index 00000000..926fa602 --- /dev/null +++ b/test/integration/test_integration_zarr_conventions.py @@ -0,0 +1,211 @@ +"""Integration tests for reading Zarr conventions.""" +import numpy as np +import pyproj +import xarray as xr +from affine import Affine +from rasterio.crs import CRS + +import rioxarray # noqa: F401 +from rioxarray import set_options +from rioxarray._convention import zarr +from rioxarray.enum import Convention + + +def _create_zarr_array_with_proj(): + """Create a DataArray with Zarr proj: convention attributes.""" + data = xr.DataArray( + np.random.rand(10, 20), + dims=["y", "x"], + coords={ + "y": np.arange(10), + "x": np.arange(20), + }, + ) + data.attrs["zarr_conventions"] = [zarr.PROJ_CONVENTION] + data.attrs["proj:wkt2"] = CRS.from_epsg(4326).to_wkt() + return data + + +def _create_zarr_array_with_spatial(): + """Create a DataArray with Zarr spatial: convention attributes.""" + data = xr.DataArray( + np.random.rand(10, 20), + dims=["lat", "lon"], + coords={ + "lat": np.arange(10), + "lon": np.arange(20), + }, + ) + data.attrs["zarr_conventions"] = [zarr.SPATIAL_CONVENTION] + data.attrs["spatial:transform"] = [1.0, 0.0, 100.0, 0.0, -1.0, 200.0] + data.attrs["spatial:dimensions"] = ["lat", "lon"] + return data + + +def _create_zarr_array_with_both(): + """Create a DataArray with both Zarr conventions.""" + data = xr.DataArray( + np.random.rand(10, 20), + dims=["lat", "lon"], + coords={ + "lat": np.arange(10), + "lon": np.arange(20), + }, + ) + data.attrs["zarr_conventions"] = [zarr.PROJ_CONVENTION, zarr.SPATIAL_CONVENTION] + data.attrs["proj:wkt2"] = CRS.from_epsg(32618).to_wkt() + data.attrs["spatial:transform"] = [10.0, 0.0, 500000.0, 0.0, -10.0, 4500000.0] + data.attrs["spatial:dimensions"] = ["lat", "lon"] + return data + + +def test_read_crs_from_zarr_convention(): + """Test reading CRS from DataArray with Zarr proj: convention.""" + data = _create_zarr_array_with_proj() + + crs = data.rio.crs + assert crs is not None + assert crs == CRS.from_epsg(4326) + + +def test_read_crs_from_zarr_convention__with_setting(): + """Test reading CRS with Convention.ZARR setting.""" + data = _create_zarr_array_with_proj() + + with set_options(convention=Convention.ZARR): + crs = data.rio.crs + assert crs is not None + assert crs == CRS.from_epsg(4326) + + +def test_read_transform_from_zarr_convention(): + """Test reading transform from DataArray with Zarr spatial: convention.""" + data = _create_zarr_array_with_spatial() + + # Access transform via rio accessor + # Check the cached version reads from Zarr spatial:transform + cached = data.rio._cached_transform() + assert cached is not None + assert cached == Affine(1.0, 0.0, 100.0, 0.0, -1.0, 200.0) + + +def test_read_spatial_dimensions_from_zarr_convention(): + """Test reading spatial dimensions from Zarr spatial: convention.""" + data = _create_zarr_array_with_spatial() + + assert data.rio.x_dim == "lon" + assert data.rio.y_dim == "lat" + + +def test_read_both_conventions(): + """Test reading from DataArray with both Zarr conventions.""" + data = _create_zarr_array_with_both() + + # CRS from proj: + crs = data.rio.crs + assert crs is not None + assert crs == CRS.from_epsg(32618) + + # Transform from spatial: + cached = data.rio._cached_transform() + assert cached is not None + assert cached == Affine(10.0, 0.0, 500000.0, 0.0, -10.0, 4500000.0) + + # Dimensions from spatial: + assert data.rio.x_dim == "lon" + assert data.rio.y_dim == "lat" + + +def test_fallback_zarr_to_cf(): + """Test that CF convention is tried as fallback when Zarr not found.""" + # Create data with CF convention + data = xr.DataArray( + np.random.rand(10, 20), + dims=["y", "x"], + coords={ + "y": np.arange(10), + "x": np.arange(20), + }, + ) + data.coords["spatial_ref"] = xr.Variable((), 0) + data.coords["spatial_ref"].attrs["spatial_ref"] = "EPSG:4326" + + # Even with Zarr preference, should fall back to CF + with set_options(convention=Convention.ZARR): + crs = data.rio.crs + assert crs is not None + assert crs == CRS.from_epsg(4326) + + +def test_fallback_cf_to_zarr(): + """Test that Zarr convention is tried as fallback when CF not found.""" + # Create data with Zarr convention only + data = _create_zarr_array_with_proj() + + # With CF preference (default), should fall back to Zarr + crs = data.rio.crs + assert crs is not None + assert crs == CRS.from_epsg(4326) + + +def test_priority_zarr_over_cf(): + """Test that Zarr convention takes priority when setting is Zarr.""" + # Create data with both conventions (different CRS values) + data = xr.DataArray( + np.random.rand(10, 20), + dims=["y", "x"], + coords={ + "y": np.arange(10), + "x": np.arange(20), + }, + ) + # CF convention + data.coords["spatial_ref"] = xr.Variable((), 0) + data.coords["spatial_ref"].attrs["spatial_ref"] = "EPSG:4326" + + # Zarr convention (different CRS) + data.attrs["zarr_conventions"] = [zarr.PROJ_CONVENTION] + data.attrs["proj:wkt2"] = CRS.from_epsg(32618).to_wkt() + + # With Zarr setting, should prefer Zarr CRS + with set_options(convention=Convention.ZARR): + crs = data.rio.crs + assert crs is not None + assert crs == CRS.from_epsg(32618) + + # Reset to check default + data2 = data.copy(deep=True) + data2.rio._crs = None # Reset cached CRS + + # With default setting (CF priority), should prefer CF CRS + crs = data2.rio.crs + assert crs is not None + assert crs == CRS.from_epsg(4326) + + +def test_read_proj_code(): + """Test reading CRS from proj:code attribute.""" + data = xr.DataArray( + np.random.rand(10, 20), + dims=["y", "x"], + ) + data.attrs["zarr_conventions"] = [zarr.PROJ_CONVENTION] + data.attrs["proj:code"] = "EPSG:32618" + + crs = data.rio.crs + assert crs is not None + assert crs == CRS.from_epsg(32618) + + +def test_read_proj_projjson(): + """Test reading CRS from proj:projjson attribute.""" + data = xr.DataArray( + np.random.rand(10, 20), + dims=["y", "x"], + ) + data.attrs["zarr_conventions"] = [zarr.PROJ_CONVENTION] + data.attrs["proj:projjson"] = pyproj.CRS.from_epsg(4326).to_json_dict() + + crs = data.rio.crs + assert crs is not None + assert crs == CRS.from_epsg(4326) diff --git a/test/unit/test_convention_zarr.py b/test/unit/test_convention_zarr.py new file mode 100644 index 00000000..83f4f5b7 --- /dev/null +++ b/test/unit/test_convention_zarr.py @@ -0,0 +1,165 @@ +"""Unit tests for the Zarr convention module.""" +import numpy as np +import xarray as xr +from affine import Affine +from rasterio.crs import CRS + +from rioxarray._convention import zarr +from rioxarray._convention.zarr import ZarrConvention + + +def test_has_convention_declared__proj(): + """Test checking for proj: convention declaration.""" + attrs = { + "zarr_conventions": [ + { + "name": "proj:", + "uuid": "f17cb550-5864-4468-aeb7-f3180cfb622f", + } + ] + } + assert zarr.has_convention_declared(attrs, "proj:") is True + assert zarr.has_convention_declared(attrs, "spatial:") is False + + +def test_has_convention_declared__spatial(): + """Test checking for spatial: convention declaration.""" + attrs = { + "zarr_conventions": [ + { + "name": "spatial:", + "uuid": "689b58e2-cf7b-45e0-9fff-9cfc0883d6b4", + } + ] + } + assert zarr.has_convention_declared(attrs, "spatial:") is True + assert zarr.has_convention_declared(attrs, "proj:") is False + + +def test_has_convention_declared__not_declared(): + """Test when no convention is declared.""" + attrs = {} + assert zarr.has_convention_declared(attrs, "proj:") is False + assert zarr.has_convention_declared(attrs, "spatial:") is False + + +def test_get_declared_conventions(): + """Test getting all declared conventions.""" + attrs = { + "zarr_conventions": [ + {"name": "proj:", "uuid": "test-uuid-1"}, + {"name": "spatial:", "uuid": "test-uuid-2"}, + ] + } + declared = zarr.get_declared_conventions(attrs) + assert declared == {"proj:", "spatial:"} + + +def test_parse_spatial_transform(): + """Test parsing spatial:transform array.""" + transform_array = [1.0, 0.0, 100.0, 0.0, -1.0, 200.0] + result = zarr.parse_spatial_transform(transform_array) + assert result == Affine(1.0, 0.0, 100.0, 0.0, -1.0, 200.0) + + +def test_parse_spatial_transform__invalid(): + """Test parsing invalid spatial:transform.""" + assert zarr.parse_spatial_transform([1, 2, 3]) is None + assert zarr.parse_spatial_transform("invalid") is None + + +def test_read_crs__from_wkt2(): + """Test reading CRS from proj:wkt2 attribute.""" + data = xr.DataArray(np.random.rand(10, 10), dims=["y", "x"]) + data.attrs["zarr_conventions"] = [zarr.PROJ_CONVENTION] + data.attrs["proj:wkt2"] = CRS.from_epsg(4326).to_wkt() + + crs = ZarrConvention.read_crs(data) + assert crs is not None + assert crs == CRS.from_epsg(4326) + + +def test_read_crs__from_code(): + """Test reading CRS from proj:code attribute.""" + data = xr.DataArray(np.random.rand(10, 10), dims=["y", "x"]) + data.attrs["zarr_conventions"] = [zarr.PROJ_CONVENTION] + data.attrs["proj:code"] = "EPSG:4326" + + crs = ZarrConvention.read_crs(data) + assert crs is not None + assert crs == CRS.from_epsg(4326) + + +def test_read_crs__not_found(): + """Test that None is returned when no CRS is found.""" + data = xr.DataArray(np.random.rand(10, 10), dims=["y", "x"]) + + crs = ZarrConvention.read_crs(data) + assert crs is None + + +def test_read_crs__no_convention_declared(): + """Test that CRS is not read when convention is not declared.""" + data = xr.DataArray(np.random.rand(10, 10), dims=["y", "x"]) + # Add proj attributes but no convention declaration + data.attrs["proj:wkt2"] = CRS.from_epsg(4326).to_wkt() + + crs = ZarrConvention.read_crs(data) + assert crs is None + + +def test_read_transform__from_spatial_transform(): + """Test reading transform from spatial:transform attribute.""" + data = xr.DataArray(np.random.rand(10, 10), dims=["y", "x"]) + data.attrs["zarr_conventions"] = [zarr.SPATIAL_CONVENTION] + data.attrs["spatial:transform"] = [1.0, 0.0, 100.0, 0.0, -1.0, 200.0] + + transform = ZarrConvention.read_transform(data) + assert transform is not None + assert transform == Affine(1.0, 0.0, 100.0, 0.0, -1.0, 200.0) + + +def test_read_transform__not_found(): + """Test that None is returned when no transform is found.""" + data = xr.DataArray(np.random.rand(10, 10), dims=["y", "x"]) + + transform = ZarrConvention.read_transform(data) + assert transform is None + + +def test_read_transform__no_convention_declared(): + """Test that transform is not read when convention is not declared.""" + data = xr.DataArray(np.random.rand(10, 10), dims=["y", "x"]) + # Add spatial attributes but no convention declaration + data.attrs["spatial:transform"] = [1.0, 0.0, 100.0, 0.0, -1.0, 200.0] + + transform = ZarrConvention.read_transform(data) + assert transform is None + + +def test_read_spatial_dimensions(): + """Test reading spatial dimensions from spatial:dimensions attribute.""" + data = xr.DataArray(np.random.rand(10, 20), dims=["lat", "lon"]) + data.attrs["zarr_conventions"] = [zarr.SPATIAL_CONVENTION] + data.attrs["spatial:dimensions"] = ["lat", "lon"] + + dims = ZarrConvention.read_spatial_dimensions(data) + assert dims == ("lat", "lon") + + +def test_read_spatial_dimensions__not_found(): + """Test that None is returned when no spatial dimensions are found.""" + data = xr.DataArray(np.random.rand(10, 10), dims=["y", "x"]) + + dims = ZarrConvention.read_spatial_dimensions(data) + assert dims is None + + +def test_read_spatial_dimensions__no_convention_declared(): + """Test that spatial dims are not read when convention is not declared.""" + data = xr.DataArray(np.random.rand(10, 10), dims=["y", "x"]) + # Add spatial attributes but no convention declaration + data.attrs["spatial:dimensions"] = ["y", "x"] + + dims = ZarrConvention.read_spatial_dimensions(data) + assert dims is None diff --git a/test/unit/test_options.py b/test/unit/test_options.py index 704b4ae7..33e4931a 100644 --- a/test/unit/test_options.py +++ b/test/unit/test_options.py @@ -53,6 +53,14 @@ def test_set_options__convention_cf(): assert get_option(CONVENTION) is None +def test_set_options__convention_zarr(): + """Test setting convention to Zarr.""" + assert get_option(CONVENTION) is None + with set_options(convention=Convention.ZARR): + assert get_option(CONVENTION) is Convention.ZARR + assert get_option(CONVENTION) is None + + def test_set_options__convention_none(): """Test setting convention back to None.""" with set_options(convention=Convention.CF):