From 6e46f714e74e93316991066c0c0a5043b5d91150 Mon Sep 17 00:00:00 2001 From: Janick Martinez Esturo Date: Wed, 6 May 2026 15:14:08 +0200 Subject: [PATCH 1/3] feat: add CameraLabelsComponent with tagged-union type system, compat layer, and tests --- docs/data/formats.rst | 84 +++ ncore/data/__init__.py | 16 + ncore/data/v4/__init__.py | 2 + ncore/impl/data/compat.py | 91 ++- ncore/impl/data/types.py | 228 +++++++- ncore/impl/data/v4/compat.py | 69 +++ ncore/impl/data/v4/components.py | 239 ++++++++ ncore/impl/data/v4/components_test.py | 689 +++++++++++++++++++++++ ncore/impl/data/v4/types.py | 11 + tools/data_converter/colmap/converter.py | 1 + tools/data_converter/pai/converter.py | 1 + 11 files changed, 1429 insertions(+), 2 deletions(-) diff --git a/docs/data/formats.rst b/docs/data/formats.rst index 95c5b225..380f0b60 100644 --- a/docs/data/formats.rst +++ b/docs/data/formats.rst @@ -57,6 +57,8 @@ types: annotations * :class:`~ncore.data.v4.PointCloudsComponent` - Pre-computed point clouds with optional typed per-point attributes +* :class:`~ncore.data.v4.CameraLabelsComponent` - Per-camera image-aligned + labels (depth, flow, segmentation, masks, normals, features) The component architecture is extensible, allowing custom component types to be defined for application-specific data. @@ -317,6 +319,88 @@ Lidar and radar point clouds can also be accessed through the unified :class:`~ncore.data.PointCloudsSourceProtocol` via the :class:`~ncore.data.RayBundleSensorPointCloudsSourceAdapter`. +Camera Labels Component +~~~~~~~~~~~~~~~~~~~~~~~ + +Per-camera image-aligned labels (depth maps, optical flow, segmentation, masks, +surface normals, material properties, feature embeddings) are stored as +independently-timestamped label instances. Each instance stores labels of +**one type** for **one camera**, enabling sparse coverage and multiple label +sources per camera. + +.. code-block:: text + + camera_labels/ + └── {instance_name}/ (e.g., "depth.z@front_50fov") + │ + ├── timestamps_us [N] uint64 (sorted label timestamps) + │ + └── labels/ + ├── {descriptor} + │ ├── camera_id: str (associated camera identifier) + │ ├── label_type: { (tagged-union type descriptor) + │ │ "category": str, ("DEPTH", "FLOW", ...) + │ │ "qualifier": str, ("z", "optical_forward", ...) + │ │ "unit": str | null ("METERS", "PIXELS", ...) + │ │ } + │ ├── label_source: str ("GT_ANNOTATION", "EXTERNAL", ...) + │ ├── label_schema: { (storage format descriptor) + │ │ "dtype": str, (e.g., "float32", "uint8") + │ │ "shape_suffix": [int, ...], (trailing dims after [H, W]) + │ │ "encoding": str, ("RAW" | "IMAGE_ENCODED") + │ │ "encoded_format": str | null, ("png", "jpeg", null) + │ │ "quantization": {...} | null (optional dequant params) + │ │ } + │ └── generic_meta_data: {...} (metadata common for all labels) + │ + └── {timestamp_us}/ (keyed by camera end-of-frame timestamp) + ├── data [H, W, ...] or |Sx (label array or encoded bytes) + └── {attrs} + ├── generic_meta_data: {...} (per-label metadata) + └── format: str (IMAGE_ENCODED only) + +**Label Type System:** + +Labels use a *tagged-union* type consisting of a high-level +:class:`~ncore.data.LabelCategory` enum and a free-form qualifier string. +Well-known types are provided as constants (e.g., ``LabelType.DEPTH_Z_M``, +``LabelType.SEGMENTATION_SEMANTIC``), while project-specific labels use custom +qualifiers without any code changes. + +Supported categories: + +* ``DEPTH`` -- Per-pixel distance measures (``"z"``, ``"ray"``, ``"relative"``, ...) +* ``FLOW`` -- Motion displacement fields (``"optical_forward"``, ``"scene_backward"``, ...) +* ``SEGMENTATION`` -- Per-pixel classification (``"semantic"``, ``"instance"``, ``"logits"``) +* ``MASK`` -- Binary or multi-level masks (``"background"``, ``"dynamic"``, ``"ego"``, ...) +* ``GEOMETRY`` -- Per-pixel geometric vectors (``"normal_camera"``, ``"ray_direction"``, ...) +* ``MATERIAL`` -- Surface material properties (``"albedo"``, ``"roughness"``, ...) +* ``FEATURE`` -- Per-pixel feature embeddings (``"dinov2"``, ``"clip"``, ...) +* ``OTHER`` -- Catch-all for uncategorised labels + +**Encoding:** + +* ``RAW`` -- Numpy array stored as a zarr dataset regular compression. Shape + is ``[H, W] + shape_suffix`` (e.g., ``[H, W, 2]`` for optical flow). + Transparent quantization of raw labels is supported optionally + (e.g., float32 depth quantized to uint16 with scale/offset). +* ``IMAGE_ENCODED`` -- Pre-encoded image bytes (PNG, JPEG) stored as a 1-D + zarr uint8 dataset with no compression. Consumers can call ``get_encoded_data()`` for raw + bytes (GPU-based decoding) or ``get_data()`` for Pillow-decoded numpy arrays. + +**Instance naming convention:** + +Instance names are opaque identifiers. The recommended convention is +``category.qualifier@camera_id`` (e.g., ``depth.z@front_50fov``). The +component does *not* parse or validate instance names. + +**Compat layer access:** + +Labels are accessed through :class:`~ncore.data.CameraLabelsProtocol` via +:meth:`~ncore.data.SequenceLoaderProtocol.get_camera_labels` (by ID) or +:meth:`~ncore.data.SequenceLoaderProtocol.query_camera_labels` (by camera +and optional type/category filter). + Component Groups ~~~~~~~~~~~~~~~~ diff --git a/ncore/data/__init__.py b/ncore/data/__init__.py index 9a74aab2..25650bba 100644 --- a/ncore/data/__init__.py +++ b/ncore/data/__init__.py @@ -16,6 +16,7 @@ """Package exposing methods related to NCore's basic data types and abstract APIs""" from ncore.impl.data.compat import ( + CameraLabelsProtocol, CameraSensorProtocol, LidarSensorProtocol, PointCloudsSourceProtocol, @@ -27,6 +28,7 @@ from ncore.impl.data.types import ( BBox3, BivariateWindshieldModelParameters, + CameraLabelDescriptor, ConcreteCameraModelParametersUnion, ConcreteExternalDistortionParametersUnion, ConcreteLidarModelParametersUnion, @@ -35,10 +37,16 @@ EncodedImageHandle, FrameTimepoint, FThetaCameraModelParameters, + LabelCategory, + LabelEncoding, + LabelSchema, LabelSource, + LabelType, + LabelUnit, OpenCVFisheyeCameraModelParameters, OpenCVPinholeCameraModelParameters, PointCloud, + QuantizationParams, ReferencePolynomial, RowOffsetStructuredSpinningLidarModelParameters, ShutterType, @@ -64,6 +72,13 @@ "ConcreteExternalDistortionParametersUnion", "ConcreteLidarModelParametersUnion", "PointCloud", + "LabelCategory", + "LabelUnit", + "LabelEncoding", + "LabelType", + "QuantizationParams", + "LabelSchema", + "CameraLabelDescriptor", # compat protocols "SequenceLoaderProtocol", "SensorProtocol", @@ -72,4 +87,5 @@ "RadarSensorProtocol", "PointCloudsSourceProtocol", "RayBundleSensorPointCloudsSourceAdapter", + "CameraLabelsProtocol", ] diff --git a/ncore/data/v4/__init__.py b/ncore/data/v4/__init__.py index a27b1bcd..e22d53c0 100644 --- a/ncore/data/v4/__init__.py +++ b/ncore/data/v4/__init__.py @@ -17,6 +17,7 @@ from ncore.impl.data.v4.compat import SequenceLoaderV4 from ncore.impl.data.v4.components import ( + CameraLabelsComponent, CameraSensorComponent, ComponentReader, ComponentWriter, @@ -46,6 +47,7 @@ "RadarSensorComponent", "CuboidsComponent", "PointCloudsComponent", + "CameraLabelsComponent", # compat APIs "SequenceLoaderV4", ] diff --git a/ncore/impl/data/compat.py b/ncore/impl/data/compat.py index ee9a4d83..2166685f 100644 --- a/ncore/impl/data/compat.py +++ b/ncore/impl/data/compat.py @@ -62,12 +62,15 @@ from ncore.impl.common.transformations import HalfClosedInterval, PoseGraphInterpolator from ncore.impl.data.types import ( + CameraLabelDescriptor, ConcreteCameraModelParametersUnion, ConcreteLidarModelParametersUnion, CuboidTrackObservation, EncodedImageData, FrameTimepoint, JsonLike, + LabelCategory, + LabelType, PointCloud, ) from ncore.impl.data.util import closest_index_sorted @@ -166,6 +169,34 @@ def get_point_clouds_source(self, source_id: str, *, return_index: int = 0) -> P """ ... + @property + def camera_labels_ids(self) -> List[str]: + """List of all camera label instance IDs.""" + ... + + def get_camera_labels(self, camera_label_id: str) -> CameraLabelsProtocol: + """Get a camera label instance by instance ID.""" + ... + + def query_camera_labels( + self, + camera_id: str, + label_type: Optional[LabelType] = None, + label_category: Optional[LabelCategory] = None, + ) -> List[CameraLabelsProtocol]: + """Query camera label instances matching filters. + + Parameters + ---------- + camera_id + Camera ID to match. + label_type + If provided, only return sources with this exact label type. + label_category + If provided, only return sources whose label type category matches. + """ + ... + @runtime_checkable class SensorProtocol(Protocol): @@ -611,7 +642,7 @@ def get_pc_generic_data(self, pc_index: int, name: str) -> npt.NDArray: ... def get_pc_generic_meta_data(self, pc_index: int) -> Dict[str, JsonLike]: - """Return generic JSON metadata associated with the given point cloud.""" + """Returns generic point cloud meta-data for a specific point-cloud.""" ... def get_pc_index_range( @@ -816,3 +847,61 @@ def get_pc_index_range( step: Optional[int] = None, ) -> range: return range(*slice(start, stop, step).indices(self.pcs_count)) + + +@runtime_checkable +class CameraLabelsProtocol(Protocol): + """Protocol for accessing camera-associated image labels. + + Each instance provides labels of one type for one camera, with independently-managed timestamps. + """ + + @property + def label_descriptor(self) -> CameraLabelDescriptor: + """Descriptor of this label instance.""" + ... + + @property + def labels_count(self) -> int: + """Number of stored labels.""" + ... + + @property + def label_timestamps_us(self) -> npt.NDArray[np.uint64]: + """Timestamps of all stored labels, sorted ascending.""" + ... + + @property + def labels_generic_meta_data(self) -> Dict[str, JsonLike]: + """Generic metadata associated with all labels.""" + ... + + @runtime_checkable + class CameraLabelHandleProtocol(Protocol): + """Protocol for a single camera label at a specific timestamp. + + Returned by :meth:`CameraLabelsProtocol.get_label` and provides deferred + access to the label data and metadata. + """ + + @property + def timestamp_us(self) -> int: + """Timestamp of this label in microseconds (usually associated with the camera end-of-frame timestamp).""" + ... + + @property + def generic_meta_data(self) -> Dict[str, JsonLike]: + """Per-label generic metadata.""" + ... + + def get_data(self) -> npt.NDArray[Any]: + """Load and return the label data as a numpy array.""" + ... + + def get_encoded_data(self) -> Optional[bytes]: + """Return raw encoded bytes for IMAGE_ENCODED labels, or None for RAW labels.""" + ... + + def get_label(self, timestamp_us: int) -> CameraLabelHandleProtocol: + """Return a lazy handle to the label data at the given timestamp.""" + ... diff --git a/ncore/impl/data/types.py b/ncore/impl/data/types.py index d8b4d14f..fdfc36d2 100644 --- a/ncore/impl/data/types.py +++ b/ncore/impl/data/types.py @@ -23,7 +23,20 @@ from dataclasses import dataclass, replace from enum import IntEnum, auto, unique from functools import lru_cache -from typing import TYPE_CHECKING, Callable, Dict, List, Literal, Mapping, Optional, Protocol, Tuple, TypeVar, Union +from typing import ( + TYPE_CHECKING, + Callable, + ClassVar, + Dict, + List, + Literal, + Mapping, + Optional, + Protocol, + Tuple, + TypeVar, + Union, +) import dataclasses_json import numpy as np @@ -771,6 +784,11 @@ def __post_init__(self): assert all(isinstance(i, float) for i in self.rot) +# --------------------------------------------------------------------------- +# Label type system +# --------------------------------------------------------------------------- + + @unique class LabelSource(IntEnum): """Enumerates different sources for labels (auto, manual, GT, synthetic etc.)""" @@ -779,6 +797,187 @@ class LabelSource(IntEnum): EXTERNAL = auto() #: Label originates from an unspecified external source, e.g., from third-party processes GT_SYNTHETIC = auto() #: Label originates from a synthetic data simulation and is considered ground-truth GT_ANNOTATION = auto() #: Label originates from manual annotation and is considered ground-truth + UNKNOWN = -1 #: Unrecognised / fallback source (reader-only) + + @classmethod + def resolve(cls, name: str) -> LabelSource: + """Return the member matching *name*, or :py:attr:`UNKNOWN` if unrecognised.""" + try: + return cls.__members__[name] + except KeyError: + return cls.UNKNOWN + + +@unique +class LabelCategory(IntEnum): + """High-level category of a label types.""" + + DEPTH = 0 #: Distance measures (z-axis, ray, relative) + FLOW = 1 #: Motion displacement fields (optical, scene) + SEGMENTATION = 2 #: Classification (semantic, instance, panoptic) + MASK = 3 #: Binary / multi-level masks + GEOMETRY = 4 #: Geometric vectors (normals, ray directions) + MATERIAL = 5 #: Material / surface properties (albedo, roughness) + FEATURE = 6 #: Feature embeddings (DINOv2, CLIP) + OTHER = 7 #: Catch-all for uncategorised labels + UNKNOWN = -1 #: Unrecognised / fallback category (reader-only) + + @classmethod + def resolve(cls, name: str) -> LabelCategory: + """Return the member matching *name*, or :py:attr:`UNKNOWN` if unrecognised.""" + try: + return cls.__members__[name] + except KeyError: + return cls.UNKNOWN + + +@unique +class LabelUnit(IntEnum): + """Physical unit associated with a label's numeric values.""" + + METERS = 0 #: Meters (metric) + PIXELS = 1 #: Pixel displacement + UNITLESS = 2 #: Dimensionless quantity (e.g. class IDs, masks) + UNKNOWN = -1 #: Unrecognised / fallback unit (reader-only) + + @classmethod + def resolve(cls, name: str) -> LabelUnit: + """Return the member matching *name*, or :py:attr:`UNKNOWN` if unrecognised.""" + try: + return cls.__members__[name] + except KeyError: + return cls.UNKNOWN + + +@unique +class LabelEncoding(IntEnum): + """Describes how the raw label data is stored on disk.""" + + RAW = 0 #: Stored as a raw numeric array + IMAGE_ENCODED = 1 #: Stored as an encoded image (e.g. PNG, JPEG) + UNKNOWN = -1 #: Unrecognised / fallback encoding (reader-only) + + @classmethod + def resolve(cls, name: str) -> LabelEncoding: + """Return the member matching *name*, or :py:attr:`UNKNOWN` if unrecognised.""" + try: + return cls.__members__[name] + except KeyError: + return cls.UNKNOWN + + +@dataclass(**({"slots": True, "frozen": True} if sys.version_info >= (3, 10) else {"frozen": True})) +class LabelType(dataclasses_json.DataClassJsonMixin): + """Describes the semantic kind of a label: category + qualifier + unit. + + Well-known combinations are exposed as class-level constants (e.g. ``LabelType.DEPTH_Z_M``). + Project-specific labels use custom qualifiers with no code changes required. + """ + + category: LabelCategory = util.enum_field(LabelCategory) #: High-level label family + qualifier: str = "" #: Free-form variant identifier (e.g. "z", "optical_forward", "semantic") + unit: Optional[LabelUnit] = dataclasses.field( + default=None, + metadata=dataclasses_json.config( + encoder=lambda u: u.name if u is not None else None, + decoder=lambda s: LabelUnit.resolve(s) if s is not None else None, + ), + ) #: Physical unit of the label values, if applicable + + def __post_init__(self): + # Sanity checks + assert isinstance(self.category, LabelCategory) + assert isinstance(self.qualifier, str) + assert len(self.qualifier) > 0, ( + "Qualifier should be a non-empty string to avoid confusion with default LabelType" + ) + assert self.unit is None or isinstance(self.unit, LabelUnit) + + # Well-known constants (assigned after class definition) + DEPTH_Z_M: ClassVar[LabelType] + DEPTH_RAY_M: ClassVar[LabelType] + DEPTH_RELATIVE: ClassVar[LabelType] + FLOW_OPTICAL_FORWARD_PX: ClassVar[LabelType] + FLOW_OPTICAL_BACKWARD_PX: ClassVar[LabelType] + FLOW_SCENE_FORWARD_M: ClassVar[LabelType] + FLOW_SCENE_BACKWARD_M: ClassVar[LabelType] + SEGMENTATION_SEMANTIC: ClassVar[LabelType] + SEGMENTATION_INSTANCE: ClassVar[LabelType] + MASK_BACKGROUND: ClassVar[LabelType] + MASK_DYNAMIC: ClassVar[LabelType] + GEOMETRY_NORMAL_CAMERA: ClassVar[LabelType] + GEOMETRY_NORMAL_WORLD: ClassVar[LabelType] + GEOMETRY_RAY_DIRECTION: ClassVar[LabelType] + + +# Well-known LabelType constants (set after class definition) +LabelType.DEPTH_Z_M = LabelType(LabelCategory.DEPTH, "z", LabelUnit.METERS) +LabelType.DEPTH_RAY_M = LabelType(LabelCategory.DEPTH, "ray", LabelUnit.METERS) +LabelType.DEPTH_RELATIVE = LabelType(LabelCategory.DEPTH, "relative", LabelUnit.UNITLESS) +LabelType.FLOW_OPTICAL_FORWARD_PX = LabelType(LabelCategory.FLOW, "optical_forward", LabelUnit.PIXELS) +LabelType.FLOW_OPTICAL_BACKWARD_PX = LabelType(LabelCategory.FLOW, "optical_backward", LabelUnit.PIXELS) +LabelType.FLOW_SCENE_FORWARD_M = LabelType(LabelCategory.FLOW, "scene_forward", LabelUnit.METERS) +LabelType.FLOW_SCENE_BACKWARD_M = LabelType(LabelCategory.FLOW, "scene_backward", LabelUnit.METERS) +LabelType.SEGMENTATION_SEMANTIC = LabelType(LabelCategory.SEGMENTATION, "semantic", LabelUnit.UNITLESS) +LabelType.SEGMENTATION_INSTANCE = LabelType(LabelCategory.SEGMENTATION, "instance", LabelUnit.UNITLESS) +LabelType.MASK_BACKGROUND = LabelType(LabelCategory.MASK, "background", LabelUnit.UNITLESS) +LabelType.MASK_DYNAMIC = LabelType(LabelCategory.MASK, "dynamic", LabelUnit.UNITLESS) +LabelType.GEOMETRY_NORMAL_CAMERA = LabelType(LabelCategory.GEOMETRY, "normal_camera", LabelUnit.UNITLESS) +LabelType.GEOMETRY_NORMAL_WORLD = LabelType(LabelCategory.GEOMETRY, "normal_world", LabelUnit.UNITLESS) +LabelType.GEOMETRY_RAY_DIRECTION = LabelType(LabelCategory.GEOMETRY, "ray_direction", LabelUnit.UNITLESS) + + +@dataclass(**({"slots": True, "frozen": True} if sys.version_info >= (3, 10) else {"frozen": True})) +class QuantizationParams(dataclasses_json.DataClassJsonMixin): + """Parameters for de-quantizing stored integer data back to physical values. + + The physical value is recovered as ``value = stored * scale + offset``. + """ + + quantized_dtype: np.dtype = util.dtype_field() #: Numpy dtype of the quantized on-disk representation + scale: float = 1.0 #: Multiplicative scale factor + offset: float = 0.0 #: Additive offset + intermediate_dtype: np.dtype = dataclasses.field( + default=np.dtype("float64"), + metadata=dataclasses_json.config(exclude=lambda _: True), + ) #: Numpy dtype for intermediate arithmetic during (de-)quantization + + def __post_init__(self): + assert np.issubdtype(self.quantized_dtype, np.integer), ( + f"quantized_dtype must be an integer type, got {self.quantized_dtype}" + ) + assert np.issubdtype(self.intermediate_dtype, np.floating), ( + f"intermediate_dtype must be a floating type, got {self.intermediate_dtype}" + ) + + +@dataclass(**({"slots": True, "frozen": True} if sys.version_info >= (3, 10) else {"frozen": True})) +class LabelSchema(dataclasses_json.DataClassJsonMixin): + """Schema describing the dtype, shape, encoding and quantization of a single label layer.""" + + dtype: np.dtype = util.dtype_field() #: Numpy dtype of the label data (after decoding / de-quantization) + shape_suffix: Tuple[int, ...] = dataclasses.field( + default=(), + metadata=dataclasses_json.config(encoder=list, decoder=tuple), + ) #: Extra dimensions appended to (H, W) + encoding: LabelEncoding = util.enum_field(LabelEncoding) #: How the label data is stored on disk + encoded_format: Optional[str] = ( + None #: Image format string (e.g. ``"png"``, ``"jpeg"``) when ``encoding == IMAGE_ENCODED`` + ) + quantization: Optional[QuantizationParams] = None #: Optional quantization parameters + + def __post_init__(self): + # Sanity checks + assert isinstance(self.dtype, np.dtype) + assert isinstance(self.shape_suffix, tuple) and all(isinstance(i, int) for i in self.shape_suffix) + assert isinstance(self.encoding, LabelEncoding) + if self.encoding == LabelEncoding.IMAGE_ENCODED: + assert self.encoded_format is not None, "encoded_format must be provided when encoding is IMAGE_ENCODED" + else: + assert self.encoded_format is None, "encoded_format should only be provided when encoding is IMAGE_ENCODED" + if self.quantization is not None: + assert isinstance(self.quantization, QuantizationParams) + assert self.encoding == LabelEncoding.RAW, "Quantization is only supported for RAW encoding" @dataclass @@ -1056,3 +1255,30 @@ class EncodedImageHandle(Protocol): """Protocol type to reference encoded image data (e.g., file-based, container-based, memory-based)""" def get_data(self) -> EncodedImageData: ... + + +@dataclass(**({"slots": True, "frozen": True} if sys.version_info >= (3, 10) else {"frozen": True})) +class CameraLabelDescriptor(dataclasses_json.DataClassJsonMixin): + """Compound descriptor bundling the identity and schema of one camera label instance. + + Passed directly to :class:`CameraLabelsComponent.Writer` to define what it stores. + The :attr:`default_instance_name` property provides a recommended naming convention. + """ + + camera_id: str + label_type: LabelType + label_schema: LabelSchema + label_source: LabelSource = util.enum_field(LabelSource) + + @property + def default_instance_name(self) -> str: + """Recommended instance name: ``category.qualifier@camera_id``.""" + cat = self.label_type.category.name.lower() + return f"{cat}.{self.label_type.qualifier}@{self.camera_id}" + + def __post_init__(self): + # Sanity checks + assert isinstance(self.camera_id, str) and len(self.camera_id) > 0, "camera_id should be a non-empty string" + assert isinstance(self.label_type, LabelType) + assert isinstance(self.label_schema, LabelSchema) + assert isinstance(self.label_source, LabelSource) diff --git a/ncore/impl/data/v4/compat.py b/ncore/impl/data/v4/compat.py index f18dcfe6..46fd3ed8 100644 --- a/ncore/impl/data/v4/compat.py +++ b/ncore/impl/data/v4/compat.py @@ -26,6 +26,7 @@ from ncore.impl.common.transformations import HalfClosedInterval, MotionCompensator, PoseGraphInterpolator from ncore.impl.common.util import unpack_optional from ncore.impl.data.compat import ( + CameraLabelsProtocol, CameraSensorProtocol, LidarSensorProtocol, PointCloudsSourceProtocol, @@ -36,15 +37,19 @@ SequenceLoaderProtocol, ) from ncore.impl.data.types import ( + CameraLabelDescriptor, ConcreteCameraModelParametersUnion, ConcreteLidarModelParametersUnion, CuboidTrackObservation, FrameTimepoint, JsonLike, + LabelCategory, + LabelType, PointCloud, ) from ncore.impl.data.v4.components import ( BaseRayBundleSensorComponentReader, + CameraLabelsComponent, CameraSensorComponent, CuboidsComponent, IntrinsicsComponent, @@ -127,6 +132,10 @@ def __init__( PointCloudsComponent.Reader ) + self._camera_labels_readers: Dict[str, CameraLabelsComponent.Reader] = self._reader.open_component_readers( + CameraLabelsComponent.Reader + ) + # init pose graph self._pose_graph: PoseGraphInterpolator = PoseGraphInterpolator( # static edges @@ -534,6 +543,36 @@ def get_radar_sensor(self, sensor_id: str) -> RadarSensorProtocol: pose_graph=self._pose_graph, ) + class CameraLabels(CameraLabelsProtocol): + """Wraps a :class:`CameraLabelsComponent.Reader` to implement :class:`CameraLabelsProtocol`.""" + + def __init__(self, reader: CameraLabelsComponent.Reader) -> None: + self._reader = reader + + @property + @override + def label_descriptor(self) -> CameraLabelDescriptor: + return self._reader.label_descriptor + + @property + @override + def labels_count(self) -> int: + return self._reader.labels_count + + @property + @override + def label_timestamps_us(self) -> npt.NDArray[np.uint64]: + return self._reader.timestamps_us + + @property + @override + def labels_generic_meta_data(self) -> Dict[str, JsonLike]: + return self._reader.generic_meta_data + + @override + def get_label(self, timestamp_us: int) -> CameraLabelsComponent.Reader.CameraLabelHandle: + return self._reader.get_label(timestamp_us) + class PointCloudsSource(PointCloudsSourceProtocol): """Native point-clouds source wrapping a :class:`PointCloudsComponent.Reader`. @@ -631,6 +670,36 @@ def get_point_clouds_source(self, source_id: str, *, return_index: int = 0) -> P raise KeyError(f"Point-clouds source '{source_id}' not found in native sources, lidars, or radars") + @property + @override + def camera_labels_ids(self) -> List[str]: + return list(self._camera_labels_readers.keys()) + + @override + def get_camera_labels(self, camera_label_id: str) -> CameraLabelsProtocol: + if camera_label_id not in self._camera_labels_readers: + raise KeyError(f"Camera labels '{camera_label_id}' not found") + return self.CameraLabels(self._camera_labels_readers[camera_label_id]) + + @override + def query_camera_labels( + self, + camera_id: str, + label_type: Optional[LabelType] = None, + label_category: Optional[LabelCategory] = None, + ) -> List[CameraLabelsProtocol]: + results: List[CameraLabelsProtocol] = [] + for reader in self._camera_labels_readers.values(): + if reader.label_descriptor.camera_id != camera_id: + continue + reader_lt = reader.label_descriptor.label_type + if label_type is not None and reader_lt != label_type: + continue + if label_category is not None and reader_lt.category != label_category: + continue + results.append(self.CameraLabels(reader)) + return results + @override def get_cuboid_track_observations( self, timestamp_interval_us: Optional[HalfClosedInterval] = None diff --git a/ncore/impl/data/v4/components.py b/ncore/impl/data/v4/components.py index b1a6a1c9..fbcb2626 100644 --- a/ncore/impl/data/v4/components.py +++ b/ncore/impl/data/v4/components.py @@ -1846,3 +1846,242 @@ def get_pc_generic_data(self, pc_index: int, name: str) -> npt.NDArray[Any]: def get_pc_generic_meta_data(self, pc_index: int) -> Dict[str, types.JsonLike]: return dict(self._pc_group(pc_index).attrs.get("generic_meta_data", {})) + + +class CameraLabelsComponent: + """Data component for storing per-camera image-aligned labels (depth, segmentation, flow, etc.).""" + + COMPONENT_NAME: str = "camera_labels" + + # -------------------------------------------------------------------------- + # Writer + # -------------------------------------------------------------------------- + + class Writer(ComponentWriter): + """Camera-labels component writer.""" + + @staticmethod + def get_component_name() -> str: + return CameraLabelsComponent.COMPONENT_NAME + + @staticmethod + def get_component_version() -> str: + return "v1" + + def __init__( + self, + component_group: zarr.Group, + sequence_timestamp_interval_us: HalfClosedInterval, + descriptor: types.CameraLabelDescriptor, + ) -> None: + super().__init__(component_group, sequence_timestamp_interval_us) + + self._descriptor = descriptor + + # Initialize labels group and timestamps list + self._labels_group = self._group.require_group("labels") + self._labels_group.attrs.put( + { + "descriptor": descriptor.to_dict(), + } + ) + self._timestamps: List[int] = [] + + def store_label( + self, + data: "Union[npt.NDArray[Any], bytes]", + timestamp_us: int, + generic_meta_data: Dict[str, types.JsonLike] = {}, + ) -> None: + """Store a single camera label. + + Parameters + ---------- + data + For RAW encoding: a numpy array of shape ``(H, W)`` or ``(H, W, *schema.shape_suffix)`` + in the logical dtype (``schema.dtype``). When quantization is configured, the + data is automatically quantized to the on-disk integer representation. + For IMAGE_ENCODED encoding: raw image bytes in the schema.encoded_format. + timestamp_us + Timestamp in microseconds - must fall within the sequence interval. + generic_meta_data + Optional per-label metadata. + """ + compressor = Blosc(cname="lz4", clevel=5, shuffle=Blosc.BITSHUFFLE) + + # Sanity checks + assert timestamp_us in self._sequence_timestamp_interval_us, ( + f"timestamp_us {timestamp_us} not in sequence time range" + ) + assert timestamp_us not in self._timestamps, f"Duplicate timestamp_us: {timestamp_us}" + + # Store label-associated data in a dedicated subgroup named by the timestamp + label_group = self._labels_group.require_group(str(timestamp_us)) + + if self._descriptor.label_schema.encoding == types.LabelEncoding.RAW: + assert isinstance(data, np.ndarray), "RAW encoding requires a numpy array" + + # Validate shape: (H, W) for scalar, (H, W, *shape_suffix) for multi-channel + if self._descriptor.label_schema.shape_suffix: + assert data.ndim == 2 + len(self._descriptor.label_schema.shape_suffix), ( + f"Expected ndim={2 + len(self._descriptor.label_schema.shape_suffix)}, got {data.ndim}" + ) + assert data.shape[2:] == self._descriptor.label_schema.shape_suffix, ( + f"shape_suffix mismatch: expected {self._descriptor.label_schema.shape_suffix}, got {data.shape[2:]}" + ) + else: + assert data.ndim == 2, f"Scalar label must be 2-D (H, W), got ndim={data.ndim}" + + # Validate dtype — caller must pass data in the expected dtype + assert np.dtype(data.dtype) == self._descriptor.label_schema.dtype, ( + f"dtype mismatch: expected {self._descriptor.label_schema.dtype}, got {data.dtype}" + ) + + # Quantize if configured + stored = data + if (q := self._descriptor.label_schema.quantization) is not None: + stored = np.round((data.astype(q.intermediate_dtype) - q.offset) / q.scale).astype( + q.quantized_dtype + ) + + label_group.create_dataset("data", data=stored, chunks=stored.shape, compressor=compressor) + + elif self._descriptor.label_schema.encoding == types.LabelEncoding.IMAGE_ENCODED: + assert isinstance(data, bytes), "IMAGE_ENCODED encoding requires bytes" + + label_group.create_dataset( + "data", + data=np.asarray(bytearray(data), dtype=np.uint8), + compressor=None, + ).attrs["format"] = self._descriptor.label_schema.encoded_format + + else: + raise ValueError(f"Unsupported label encoding: {self._descriptor.label_schema.encoding}") + + label_group.attrs["generic_meta_data"] = generic_meta_data + + self._timestamps.append(timestamp_us) + + def finalize(self) -> None: + """Write sorted timestamps_us array.""" + ts_array = np.array(sorted(self._timestamps), dtype=np.uint64) + self._group.create_dataset( + "timestamps_us", + data=ts_array, + chunks=(max(1, len(ts_array)),), + compressor=Blosc(cname="lz4", clevel=5, shuffle=Blosc.BITSHUFFLE), + ) + + # -------------------------------------------------------------------------- + # Reader + # -------------------------------------------------------------------------- + + class Reader(ComponentReader): + """Camera-labels component reader.""" + + @staticmethod + def get_component_name() -> str: + return CameraLabelsComponent.COMPONENT_NAME + + @staticmethod + def supports_component_version(version: str) -> bool: + return version == "v1" + + def __init__(self, component_instance_name: str, component_group: zarr.Group) -> None: + super().__init__(component_instance_name, component_group) + + self._timestamps_us: npt.NDArray[np.uint64] = np.array(self._group["timestamps_us"][:]) + self._timestamp_to_index: Dict[int, int] = {int(ts): i for i, ts in enumerate(self._timestamps_us)} + self._descriptor = types.CameraLabelDescriptor.from_dict(self._group["labels"].attrs["descriptor"]) + + # -- properties -------------------------------------------------------- + + @property + def label_descriptor(self) -> types.CameraLabelDescriptor: + return self._descriptor + + @property + def labels_count(self) -> int: + return len(self._timestamps_us) + + @property + def timestamps_us(self) -> npt.NDArray[np.uint64]: + return self._timestamps_us + + # -- per-label access -------------------------------------------------- + + def _label_group(self, timestamp_us: int) -> zarr.Group: + assert timestamp_us in self._timestamp_to_index, ( + f"Unknown timestamp: {timestamp_us}. Available: {list(self._timestamp_to_index.keys())[:5]}..." + ) + return cast(zarr.Group, self._group["labels"][str(timestamp_us)]) + + class CameraLabelHandle: + """References label data without eagerly loading it. + + Implements the :class:`CameraLabelHandleProtocol` protocol, providing access to the label + data, schema, timestamp, and per-label metadata. + """ + + def __init__( + self, + label_group: zarr.Group, + descriptor: types.CameraLabelDescriptor, + timestamp_us: int, + generic_meta_data: Dict[str, types.JsonLike], + ) -> None: + self._label_group = label_group + self._descriptor = descriptor + self._timestamp_us = timestamp_us + self._generic_meta_data = generic_meta_data + + @property + def descriptor(self) -> types.CameraLabelDescriptor: + return self._descriptor + + @property + def timestamp_us(self) -> int: + return self._timestamp_us + + @property + def generic_meta_data(self) -> Dict[str, types.JsonLike]: + return self._generic_meta_data + + def get_data(self) -> npt.NDArray[Any]: + """Load and return the label data as a numpy array. + + For RAW encoding, applies de-quantization if specified in the schema. + For IMAGE_ENCODED encoding, decodes the image bytes via PIL. + """ + if self._descriptor.label_schema.encoding == types.LabelEncoding.RAW: + arr = np.array(self._label_group["data"][:]) + + # De-quantize if configured + if (q := self._descriptor.label_schema.quantization) is not None: + arr = (arr.astype(q.intermediate_dtype) * q.scale + q.offset).astype( + self._descriptor.label_schema.dtype + ) + + return arr + + elif self._descriptor.label_schema.encoding == types.LabelEncoding.IMAGE_ENCODED: + raw_bytes = bytes(self._label_group["data"][:]) + image = PILImage.open(io.BytesIO(raw_bytes)) + + return np.asarray(image, dtype=self._descriptor.label_schema.dtype) + + else: + raise ValueError(f"Unsupported label encoding: {self._descriptor.label_schema.encoding}") + + def get_encoded_data(self) -> Optional[bytes]: + """Return the raw encoded bytes for IMAGE_ENCODED labels, or None for RAW.""" + if self._descriptor.label_schema.encoding == types.LabelEncoding.IMAGE_ENCODED: + return bytes(self._label_group["data"][:]) + return None + + def get_label(self, timestamp_us: int) -> CameraLabelHandle: + """Return a lazy handle to the label data at the given timestamp.""" + label_group = self._label_group(timestamp_us) + return self.CameraLabelHandle( + label_group, self._descriptor, timestamp_us, label_group.attrs["generic_meta_data"] + ) diff --git a/ncore/impl/data/v4/components_test.py b/ncore/impl/data/v4/components_test.py index 36043d19..542a0c87 100644 --- a/ncore/impl/data/v4/components_test.py +++ b/ncore/impl/data/v4/components_test.py @@ -32,16 +32,24 @@ from ncore.impl.data.types import ( BBox3, BivariateWindshieldModelParameters, + CameraLabelDescriptor, CuboidTrackObservation, JsonLike, + LabelCategory, + LabelEncoding, + LabelSchema, LabelSource, + LabelType, + LabelUnit, OpenCVFisheyeCameraModelParameters, PointCloud, + QuantizationParams, ReferencePolynomial, RowOffsetStructuredSpinningLidarModelParameters, ShutterType, ) from ncore.impl.data.v4.components import ( + CameraLabelsComponent, CameraSensorComponent, ComponentReader, ComponentWriter, @@ -1835,3 +1843,684 @@ def test_generic_data_and_metadata(self): self.assertEqual(loaded_gmd["tags"], ["outdoor", "sunny"]) tmpdir.cleanup() + + +@parameterized_class( + ("store_type"), + [ + ("itar",), + ("directory",), + ], +) +class TestCameraLabelsComponent(unittest.TestCase): + """Round-trip tests for the CameraLabelsComponent Writer/Reader.""" + + store_type: Literal["itar", "directory"] + + def setUp(self): + np.set_printoptions(floatmode="unique", linewidth=200, suppress=True) + + def _make_writer( + self, + descriptor: CameraLabelDescriptor, + instance_name=None, + generic_meta_data: Dict[str, JsonLike] = {}, + ) -> Tuple[CameraLabelsComponent.Writer, SequenceComponentGroupsWriter, tempfile.TemporaryDirectory]: + """Create SequenceComponentGroupsWriter, register CameraLabelsComponent.Writer, + and return (writer, store_writer, tmpdir).""" + + tmpdir = tempfile.TemporaryDirectory() + timestamp_interval = HalfClosedInterval(0, 10_000_001) + + if instance_name is None: + instance_name = descriptor.default_instance_name + + store_writer = SequenceComponentGroupsWriter( + output_dir_path=UPath(tmpdir.name), + store_base_name=(seq_id := "label-test-seq"), + sequence_id=seq_id, + sequence_timestamp_interval_us=timestamp_interval, + store_type=self.store_type, + generic_meta_data={}, + ) + + writer = store_writer.register_component_writer( + CameraLabelsComponent.Writer, + instance_name, + generic_meta_data=generic_meta_data, + descriptor=descriptor, + ) + + return writer, store_writer, tmpdir + + def _finalize_and_open_readers( + self, store_writer: SequenceComponentGroupsWriter + ) -> Dict[str, CameraLabelsComponent.Reader]: + """Finalize the writer, open a reader, and return all CameraLabelsComponent.Readers keyed by instance name.""" + + store_paths = store_writer.finalize() + + reader = SequenceComponentGroupsReader(component_group_paths=store_paths) + + return reader.open_component_readers(CameraLabelsComponent.Reader) + + # ------------------------------------------------------------------ + # 1. test_raw_depth_roundtrip + # ------------------------------------------------------------------ + def test_raw_depth_roundtrip(self) -> None: + """Write 2 RAW float32 depth labels at different timestamps, read back and verify.""" + + writer, store_writer, tmpdir = self._make_writer( + ref_descriptor := CameraLabelDescriptor( + camera_id="front", + label_type=LabelType.DEPTH_Z_M, + label_schema=LabelSchema( + dtype=np.dtype("float32"), + shape_suffix=(), + encoding=LabelEncoding.RAW, + ), + label_source=LabelSource.AUTOLABEL, + ), + ) + + depth1 = np.random.default_rng().random((64, 80), dtype=np.float32) * 100.0 + depth2 = np.random.default_rng().random((64, 80), dtype=np.float32) * 50.0 + + writer.store_label(data=depth1, timestamp_us=1_000_000) + writer.store_label(data=depth2, timestamp_us=2_000_000) + + readers = self._finalize_and_open_readers(store_writer) + instance_name = "depth.z@front" + self.assertIn(instance_name, readers) + reader = readers[instance_name] + + # Verify properties + descriptor = reader.label_descriptor + self.assertEqual(descriptor.camera_id, "front") + self.assertEqual(descriptor.label_type, LabelType.DEPTH_Z_M) + self.assertEqual(descriptor.label_type.category, LabelCategory.DEPTH) + self.assertEqual(descriptor.label_type.qualifier, "z") + self.assertEqual(descriptor.label_type.unit, LabelUnit.METERS) + self.assertEqual(descriptor.label_schema.encoding, LabelEncoding.RAW) + self.assertEqual(descriptor.label_schema.dtype, np.dtype("float32")) + self.assertEqual(descriptor.label_schema.shape_suffix, ()) + self.assertEqual(descriptor.label_source, LabelSource.AUTOLABEL) + self.assertEqual(descriptor.to_dict(), ref_descriptor.to_dict()) + + # Verify counts and timestamps + self.assertEqual(reader.labels_count, 2) + np.testing.assert_array_equal( + reader.timestamps_us, + np.array([1_000_000, 2_000_000], dtype=np.uint64), + ) + + # Verify data via get_label() + np.testing.assert_array_almost_equal(reader.get_label(1_000_000).get_data(), depth1) + np.testing.assert_array_almost_equal(reader.get_label(2_000_000).get_data(), depth2) + + # RAW encoding should return None for get_encoded_data + self.assertIsNone(reader.get_label(1_000_000).get_encoded_data()) + + tmpdir.cleanup() + + # ------------------------------------------------------------------ + # 2. test_raw_optical_flow_roundtrip + # ------------------------------------------------------------------ + def test_raw_optical_flow_roundtrip(self) -> None: + """Write RAW float32 optical flow with shape_suffix=(2,), verify shape and data.""" + + writer, store_writer, tmpdir = self._make_writer( + CameraLabelDescriptor( + camera_id="front", + label_type=LabelType.FLOW_OPTICAL_FORWARD_PX, + label_schema=LabelSchema( + dtype=np.dtype("float32"), + shape_suffix=(2,), + encoding=LabelEncoding.RAW, + ), + label_source=LabelSource.AUTOLABEL, + ), + ) + + flow = np.random.default_rng().random((48, 64, 2), dtype=np.float32) * 10.0 + writer.store_label(data=flow, timestamp_us=500_000) + + readers = self._finalize_and_open_readers(store_writer) + reader = readers["flow.optical_forward@front"] + + loaded = reader.get_label(500_000).get_data() + self.assertEqual(loaded.shape, (48, 64, 2)) + np.testing.assert_array_almost_equal(loaded, flow) + + tmpdir.cleanup() + + # ------------------------------------------------------------------ + # 3. test_image_encoded_segmentation_roundtrip + # ------------------------------------------------------------------ + def test_image_encoded_segmentation_roundtrip(self) -> None: + """Create a uint8 mask, encode as PNG, store as IMAGE_ENCODED, verify round-trip.""" + + writer, store_writer, tmpdir = self._make_writer( + descriptor := CameraLabelDescriptor( + camera_id="left", + label_type=LabelType.SEGMENTATION_SEMANTIC, + label_schema=LabelSchema( + dtype=np.dtype("uint8"), + encoding=LabelEncoding.IMAGE_ENCODED, + encoded_format="png", + ), + label_source=LabelSource.AUTOLABEL, + ) + ) + + mask = np.random.default_rng().integers(0, 10, size=(32, 48), dtype=np.uint8) + PILImage.fromarray(mask, mode="L").save(buf := io.BytesIO(), format="PNG") + png_bytes = buf.getvalue() + + writer.store_label(data=png_bytes, timestamp_us=1_000_000) + + readers = self._finalize_and_open_readers(store_writer) + reader = readers["segmentation.semantic@left"] + + # Verify decoded data matches original + label = reader.get_label(1_000_000) + decoded = label.get_data() + self.assertEqual(decoded.dtype, descriptor.label_schema.dtype) + np.testing.assert_array_equal(decoded, mask) + + # Verify encoded data round-trips + encoded = label.get_encoded_data() + self.assertIsNotNone(encoded) + self.assertEqual(encoded, png_bytes) + + tmpdir.cleanup() + + # ------------------------------------------------------------------ + # 4. test_quantized_depth_roundtrip + # ------------------------------------------------------------------ + def test_quantized_depth_roundtrip(self) -> None: + """Store float32 depth with quantization to uint16, verify dequantized read is close to original.""" + + quant = QuantizationParams( + quantized_dtype=np.dtype("uint16"), + scale=0.001, + offset=0.0, + ) + + writer, store_writer, tmpdir = self._make_writer( + descriptor := CameraLabelDescriptor( + camera_id="front", + label_type=LabelType.DEPTH_Z_M, + label_schema=LabelSchema( + dtype=np.dtype("float32"), + shape_suffix=(), + encoding=LabelEncoding.RAW, + quantization=quant, + ), + label_source=LabelSource.AUTOLABEL, + ) + ) + + # Original data in range [0, 65.535] so it fits uint16 after quantization + original = np.random.default_rng().random((32, 48), dtype=np.float32) * 60.0 + + writer.store_label(data=original, timestamp_us=1_000_000) + + readers = self._finalize_and_open_readers(store_writer) + reader = readers[descriptor.default_instance_name] + + dequantized = reader.get_label(1_000_000).get_data() + + # Expect quantization error of at most 0.5 * scale = 0.0005 + np.testing.assert_allclose(dequantized, original, atol=0.5 * quant.scale, rtol=0) + + tmpdir.cleanup() + + # ------------------------------------------------------------------ + # 4b. test_quantized_depth_with_offset + # ------------------------------------------------------------------ + def test_quantized_depth_with_offset(self) -> None: + """Store float32 depth with non-zero offset quantization, verify roundtrip.""" + + quant = QuantizationParams( + quantized_dtype=np.dtype("int16"), + scale=0.01, + offset=-100.0, + ) + + writer, store_writer, tmpdir = self._make_writer( + descriptor := CameraLabelDescriptor( + camera_id="front", + label_type=LabelType.DEPTH_Z_M, + label_schema=LabelSchema( + dtype=np.dtype("float32"), + shape_suffix=(), + encoding=LabelEncoding.RAW, + quantization=quant, + ), + label_source=LabelSource.AUTOLABEL, + ) + ) + + # Data in range [-100, 227.67] maps to int16 range [0, 32767] + original = (np.random.default_rng().random((16, 24), dtype=np.float32) * 300.0) - 100.0 + + writer.store_label(data=original, timestamp_us=1_000_000) + + readers = self._finalize_and_open_readers(store_writer) + reader = readers[descriptor.default_instance_name] + + dequantized = reader.get_label(1_000_000).get_data() + np.testing.assert_allclose(dequantized, original, atol=quant.scale, rtol=0) + + tmpdir.cleanup() + + # ------------------------------------------------------------------ + # 4c. test_quantized_float32_intermediate + # ------------------------------------------------------------------ + def test_quantized_float32_intermediate(self) -> None: + """Verify quantization works with float32 intermediate for uint16 data.""" + + quant = QuantizationParams( + quantized_dtype=np.dtype("uint16"), + scale=0.001, + offset=0.0, + intermediate_dtype=np.dtype("float32"), + ) + + writer, store_writer, tmpdir = self._make_writer( + descriptor := CameraLabelDescriptor( + camera_id="front", + label_type=LabelType.DEPTH_Z_M, + label_schema=LabelSchema( + dtype=np.dtype("float32"), + shape_suffix=(), + encoding=LabelEncoding.RAW, + quantization=quant, + ), + label_source=LabelSource.AUTOLABEL, + ) + ) + + original = np.random.default_rng().random((16, 24), dtype=np.float32) * 60.0 + + writer.store_label(data=original, timestamp_us=1_000_000) + + readers = self._finalize_and_open_readers(store_writer) + reader = readers[descriptor.default_instance_name] + + dequantized = reader.get_label(1_000_000).get_data() + + # float32 intermediate introduces slightly more error than float64 due to + # limited mantissa precision in the division; allow 1 LSB tolerance + np.testing.assert_allclose(dequantized, original, atol=1.0 * quant.scale, rtol=0) + + tmpdir.cleanup() + + # ------------------------------------------------------------------ + # 4d. test_quantization_params_rejects_float_dtype + # ------------------------------------------------------------------ + def test_quantization_params_rejects_float_dtype(self) -> None: + """QuantizationParams must reject non-integer quantized_dtype.""" + + with self.assertRaises(AssertionError): + QuantizationParams(quantized_dtype=np.dtype("float32"), scale=1.0, offset=0.0) + with self.assertRaises(AssertionError): + QuantizationParams(quantized_dtype=np.dtype("float64"), scale=1.0, offset=0.0) + + # ------------------------------------------------------------------ + # 4e. test_quantization_params_rejects_non_float_intermediate + # ------------------------------------------------------------------ + def test_quantization_params_rejects_non_float_intermediate(self) -> None: + """QuantizationParams must reject non-floating intermediate_dtype.""" + + with self.assertRaises(AssertionError): + QuantizationParams( + quantized_dtype=np.dtype("uint16"), scale=1.0, offset=0.0, intermediate_dtype=np.dtype("int32") + ) + with self.assertRaises(AssertionError): + QuantizationParams( + quantized_dtype=np.dtype("uint16"), scale=1.0, offset=0.0, intermediate_dtype=np.dtype("uint8") + ) + + # ------------------------------------------------------------------ + # 5. test_multiple_label_types_per_camera + # ------------------------------------------------------------------ + def test_multiple_label_types_per_camera(self) -> None: + """Register both depth and segmentation writers for the same camera, verify both readers exist.""" + + tmpdir = tempfile.TemporaryDirectory() + timestamp_interval = HalfClosedInterval(0, 10_000_001) + + store_writer = SequenceComponentGroupsWriter( + output_dir_path=UPath(tmpdir.name), + store_base_name=(seq_id := "multi-label-seq"), + sequence_id=seq_id, + sequence_timestamp_interval_us=timestamp_interval, + store_type="directory", + generic_meta_data={}, + ) + + depth_schema = LabelSchema( + dtype=np.dtype("float32"), + encoding=LabelEncoding.RAW, + ) + seg_schema = LabelSchema( + dtype=np.dtype("uint8"), + encoding=LabelEncoding.IMAGE_ENCODED, + encoded_format="png", + ) + + depth_descriptor = CameraLabelDescriptor( + camera_id="front", + label_type=LabelType.DEPTH_Z_M, + label_schema=depth_schema, + label_source=LabelSource.AUTOLABEL, + ) + seg_descriptor = CameraLabelDescriptor( + camera_id="front", + label_type=LabelType.SEGMENTATION_SEMANTIC, + label_schema=seg_schema, + label_source=LabelSource.AUTOLABEL, + ) + + self.assertEqual("depth.z@front", depth_descriptor.default_instance_name) + self.assertEqual("segmentation.semantic@front", seg_descriptor.default_instance_name) + + depth_writer = store_writer.register_component_writer( + CameraLabelsComponent.Writer, + depth_descriptor.default_instance_name, + descriptor=depth_descriptor, + ) + seg_writer = store_writer.register_component_writer( + CameraLabelsComponent.Writer, + seg_descriptor.default_instance_name, + descriptor=seg_descriptor, + ) + + depth_writer.store_label(data=np.ones((16, 16), dtype=np.float32), timestamp_us=1_000_000) + + mask = np.zeros((16, 16), dtype=np.uint8) + buf = io.BytesIO() + PILImage.fromarray(mask, mode="L").save(buf, format="PNG") + seg_writer.store_label(data=buf.getvalue(), timestamp_us=1_000_000) + + readers = self._finalize_and_open_readers(store_writer) + self.assertIn("depth.z@front", readers) + self.assertIn("segmentation.semantic@front", readers) + self.assertEqual(readers["depth.z@front"].label_descriptor.camera_id, "front") + self.assertEqual(readers["segmentation.semantic@front"].label_descriptor.camera_id, "front") + + tmpdir.cleanup() + + # ------------------------------------------------------------------ + # 6. test_sparse_label_coverage + # ------------------------------------------------------------------ + def test_sparse_label_coverage(self) -> None: + """Store labels at only 2 out of many possible timestamps, verify timestamps_us is sorted.""" + + writer, store_writer, tmpdir = self._make_writer( + CameraLabelDescriptor( + camera_id="front", + label_type=LabelType.DEPTH_Z_M, + label_schema=LabelSchema( + dtype=np.dtype("float32"), + encoding=LabelEncoding.RAW, + ), + label_source=LabelSource.AUTOLABEL, + ) + ) + + # Store in non-sorted order + writer.store_label(data=np.ones((8, 8), dtype=np.float32), timestamp_us=5_000_000) + writer.store_label(data=np.ones((8, 8), dtype=np.float32), timestamp_us=1_000_000) + + readers = self._finalize_and_open_readers(store_writer) + reader = readers["depth.z@front"] + + self.assertEqual(reader.labels_count, 2) + timestamps_us = reader.timestamps_us + # Must be sorted + self.assertTrue(np.all(timestamps_us[:-1] <= timestamps_us[1:])) + np.testing.assert_array_equal(timestamps_us, np.array([1_000_000, 5_000_000], dtype=np.uint64)) + + tmpdir.cleanup() + + # ------------------------------------------------------------------ + # 7. test_forward_compat_unknown_label_type + # ------------------------------------------------------------------ + def test_forward_compat_unknown_label_type(self) -> None: + """Use a custom label type with OTHER category; reader should round-trip correctly.""" + + custom_type = LabelType(LabelCategory.OTHER, "some_future") + + writer, store_writer, tmpdir = self._make_writer( + descriptor := CameraLabelDescriptor( + camera_id="front", + label_type=custom_type, + label_schema=LabelSchema( + dtype=np.dtype("float32"), + encoding=LabelEncoding.RAW, + ), + label_source=LabelSource.AUTOLABEL, + ), + instance_name=(instance_name := "some-other-instance-name"), + ) + + self.assertEqual(descriptor.default_instance_name, "other.some_future@front") + + writer.store_label(data=np.ones((8, 8), dtype=np.float32), timestamp_us=1_000_000) + + readers = self._finalize_and_open_readers(store_writer) + reader = readers[instance_name] + + self.assertEqual(reader.label_descriptor.label_type.category, LabelCategory.OTHER) + self.assertEqual(reader.label_descriptor.label_type.qualifier, "some_future") + self.assertEqual(reader.label_descriptor.label_type, custom_type) + + # Data should still be readable + data = reader.get_label(1_000_000).get_data() + np.testing.assert_array_equal(data, np.ones((8, 8), dtype=np.float32)) + + tmpdir.cleanup() + + def test_forward_compat_unknown_category(self) -> None: + """An unknown category string in LabelType resolution should give LabelCategory.UNKNOWN.""" + + # Test the LabelCategory.resolve() mechanism directly + self.assertEqual(LabelCategory.resolve("TOTALLY_NEW_CATEGORY"), LabelCategory.UNKNOWN) + self.assertEqual(LabelCategory.resolve("DEPTH"), LabelCategory.DEPTH) + + # Construct a LabelType with UNKNOWN category (simulating what the reader would produce) + lt = LabelType(LabelCategory.resolve("TOTALLY_NEW_CATEGORY"), "v2") + self.assertEqual(lt.category, LabelCategory.UNKNOWN) + self.assertEqual(lt.qualifier, "v2") + self.assertIsNone(lt.unit) + + # Ensure the round-trip through to_dict/from_dict preserves UNKNOWN + d = lt.to_dict() + self.assertEqual(d["category"], "UNKNOWN") + self.assertEqual(d["qualifier"], "v2") + rt = LabelType.from_dict(d) + self.assertEqual(rt.category, LabelCategory.UNKNOWN) + self.assertEqual(rt.qualifier, "v2") + + # ------------------------------------------------------------------ + # 8. test_reject_empty_camera_id + # ------------------------------------------------------------------ + def test_reject_empty_camera_id(self) -> None: + """Passing an empty camera_id should raise AssertionError.""" + + tmpdir = tempfile.TemporaryDirectory() + timestamp_interval = HalfClosedInterval(0, 10_000_001) + + store_writer = SequenceComponentGroupsWriter( + output_dir_path=UPath(tmpdir.name), + store_base_name=(seq_id := "reject-at-seq"), + sequence_id=seq_id, + sequence_timestamp_interval_us=timestamp_interval, + store_type=self.store_type, + generic_meta_data={}, + ) + + with self.assertRaises(AssertionError): + store_writer.register_component_writer( + CameraLabelsComponent.Writer, + "depth.z@front", + descriptor=CameraLabelDescriptor( + camera_id="", + label_type=LabelType.DEPTH_Z_M, + label_schema=LabelSchema( + dtype=np.dtype("float32"), + encoding=LabelEncoding.RAW, + ), + label_source=LabelSource.EXTERNAL, + ), + ) + + tmpdir.cleanup() + + # ------------------------------------------------------------------ + # 9. test_per_label_generic_meta_data + # ------------------------------------------------------------------ + def test_per_label_generic_meta_data(self) -> None: + """Store labels with per-label and component-level generic metadata, verify round-trip.""" + + component_meta: Dict[str, JsonLike] = {"source": "ground_truth", "version": 2} + writer, store_writer, tmpdir = self._make_writer( + CameraLabelDescriptor( + camera_id="front", + label_type=LabelType.DEPTH_Z_M, + label_schema=LabelSchema( + dtype=np.dtype("float32"), + encoding=LabelEncoding.RAW, + ), + label_source=LabelSource.AUTOLABEL, + ), + generic_meta_data=component_meta, + ) + + per_label_meta: Dict[str, JsonLike] = {"quality": 0.95, "annotator": "auto"} + writer.store_label( + data=np.ones((8, 8), dtype=np.float32), + timestamp_us=1_000_000, + generic_meta_data=per_label_meta, + ) + + readers = self._finalize_and_open_readers(store_writer) + reader = readers["depth.z@front"] + + # Component-level generic_meta_data + self.assertEqual(reader.generic_meta_data, component_meta) + + # Per-label generic_meta_data via get_label() + label = reader.get_label(1_000_000) + self.assertEqual(label.generic_meta_data, per_label_meta) + + tmpdir.cleanup() + + # ------------------------------------------------------------------ + # 10. test_label_handle_deferred_decoding + # ------------------------------------------------------------------ + def test_label_handle_deferred_decoding(self) -> None: + """Get a CameraLabelHandle via get_label(), verify its schema, then call get_data() and get_encoded_data().""" + + writer, store_writer, tmpdir = self._make_writer( + descriptor := CameraLabelDescriptor( + camera_id="front", + label_type=LabelType.DEPTH_Z_M, + label_schema=LabelSchema( + dtype=np.dtype("float32"), + encoding=LabelEncoding.RAW, + ), + label_source=LabelSource.AUTOLABEL, + ) + ) + + depth = np.random.default_rng().random((16, 16), dtype=np.float32) + generic_meta_data: Dict[str, JsonLike] = {"info": "test label"} + writer.store_label(data=depth, timestamp_us=1_000_000, generic_meta_data=generic_meta_data) + + readers = self._finalize_and_open_readers(store_writer) + reader = readers["depth.z@front"] + + handle = reader.get_label(1_000_000) + self.assertEqual(handle.descriptor, descriptor) + self.assertEqual(handle.timestamp_us, 1_000_000) + self.assertEqual(handle.generic_meta_data, generic_meta_data) + + np.testing.assert_array_almost_equal(handle.get_data(), depth) + self.assertIsNone(handle.get_encoded_data()) + + tmpdir.cleanup() + + # ------------------------------------------------------------------ + # 11. test_empty_writer_finalize + # ------------------------------------------------------------------ + def test_empty_writer_finalize(self) -> None: + """Finalize with no labels stored; verify labels_count=0 and timestamps_us is empty.""" + + _, store_writer, tmpdir = self._make_writer( + CameraLabelDescriptor( + camera_id="front", + label_type=LabelType.DEPTH_Z_M, + label_schema=LabelSchema( + dtype=np.dtype("float32"), + encoding=LabelEncoding.RAW, + ), + label_source=LabelSource.AUTOLABEL, + ) + ) + + readers = self._finalize_and_open_readers(store_writer) + reader = readers["depth.z@front"] + + self.assertEqual(reader.labels_count, 0) + np.testing.assert_array_equal(reader.timestamps_us, np.array([], dtype=np.uint64)) + + tmpdir.cleanup() + + # ------------------------------------------------------------------ + # 12. test_schema_json_roundtrip + # ------------------------------------------------------------------ + def test_schema_json_roundtrip(self) -> None: + """Create a LabelSchema with all fields set, round-trip through to_dict()/from_dict().""" + + quant = QuantizationParams( + quantized_dtype=np.dtype("uint16"), + scale=0.001, + offset=-5.0, + ) + original = LabelSchema( + dtype=np.dtype("float32"), + shape_suffix=(2,), + encoding=LabelEncoding.RAW, + quantization=quant, + ) + + serialized = original.to_dict() + deserialized = LabelSchema.from_dict(serialized) + + self.assertEqual(deserialized.dtype, original.dtype) + self.assertEqual(deserialized.shape_suffix, original.shape_suffix) + self.assertEqual(deserialized.encoding, original.encoding) + self.assertEqual(deserialized.encoded_format, original.encoded_format) + + # Quantization + self.assertIsNotNone(deserialized.quantization) + quantization = unpack_optional(deserialized.quantization) + self.assertEqual(quantization.quantized_dtype, quant.quantized_dtype) + self.assertEqual(quantization.intermediate_dtype, quant.intermediate_dtype) + self.assertAlmostEqual(quantization.scale, quant.scale) + self.assertAlmostEqual(quantization.offset, quant.offset) + + # Also test with None quantization + minimal = LabelSchema( + dtype=np.dtype("uint8"), + encoding=LabelEncoding.IMAGE_ENCODED, + encoded_format="png", + ) + rt = LabelSchema.from_dict(minimal.to_dict()) + self.assertEqual(rt.dtype, np.dtype("uint8")) + self.assertEqual(rt.encoding, LabelEncoding.IMAGE_ENCODED) + self.assertIsNone(rt.quantization) diff --git a/ncore/impl/data/v4/types.py b/ncore/impl/data/v4/types.py index 70b26b9b..215d9e9c 100644 --- a/ncore/impl/data/v4/types.py +++ b/ncore/impl/data/v4/types.py @@ -27,6 +27,7 @@ class ComponentGroupAssignments: lidar_component_groups: Dict[str, str] # indexed by lidar_id radar_component_groups: Dict[str, str] # indexed by radar_id point_clouds_component_groups: Dict[str, str] # indexed by point_clouds_id + camera_labels_component_groups: Dict[str, str] # indexed by camera_label_id poses_component_group: Optional[str] intrinsics_component_group: Optional[str] masks_component_group: Optional[str] @@ -38,6 +39,7 @@ def create( lidar_ids: List[str], radar_ids: List[str], point_clouds_ids: List[str], + camera_labels_ids: List[str], profile: Literal["default", "separate-sensors", "separate-all"], # Component-specific overrides poses_component_group: Optional[str] = None, @@ -47,6 +49,7 @@ def create( lidar_component_groups: Optional[Dict[str, str]] = None, radar_component_groups: Optional[Dict[str, str]] = None, point_clouds_component_groups: Optional[Dict[str, str]] = None, + camera_labels_component_groups: Optional[Dict[str, str]] = None, cuboid_track_observations_component_group: Optional[str] = None, ) -> ComponentGroupAssignments: """Factory function to create ComponentGroups based on a profile. @@ -56,6 +59,7 @@ def create( lidar_ids: IDs of lidar sensors radar_ids: IDs of radar sensors point_clouds_ids: IDs of native point cloud sources + camera_labels_ids: IDs of camera label instances profile: One of: - "default": Use provided overrides or fall back to default groups - "separate-sensors": Each sensor gets its own group named "" unless overwritten, remaining components use default store @@ -67,6 +71,7 @@ def create( lidar_component_groups: Override for per-lidar groups radar_component_groups: Override for per-radar groups point_clouds_component_groups: Override for per-point-cloud groups + camera_labels_component_groups: Override for per-camera-label groups cuboid_track_observations_component_group: Override for cuboids group Returns: @@ -77,6 +82,7 @@ def create( lidar_groups = {lidar_id: lidar_id for lidar_id in lidar_ids} radar_groups = {radar_id: radar_id for radar_id in radar_ids} pc_groups = {pc_id: pc_id for pc_id in point_clouds_ids} + cl_groups = {cl_id: cl_id for cl_id in camera_labels_ids} # Apply optional overwrites if camera_component_groups is not None: @@ -87,6 +93,8 @@ def create( radar_groups.update(radar_component_groups) if point_clouds_component_groups is not None: pc_groups.update(point_clouds_component_groups) + if camera_labels_component_groups is not None: + cl_groups.update(camera_labels_component_groups) if profile == "default": return ComponentGroupAssignments( @@ -97,6 +105,7 @@ def create( lidar_component_groups=lidar_component_groups if lidar_component_groups else {}, radar_component_groups=radar_component_groups if radar_component_groups else {}, point_clouds_component_groups=point_clouds_component_groups if point_clouds_component_groups else {}, + camera_labels_component_groups=camera_labels_component_groups if camera_labels_component_groups else {}, cuboid_track_observations_component_group=cuboid_track_observations_component_group, ) @@ -109,6 +118,7 @@ def create( lidar_component_groups=lidar_groups, radar_component_groups=radar_groups, point_clouds_component_groups=pc_groups, + camera_labels_component_groups=cl_groups, cuboid_track_observations_component_group=cuboid_track_observations_component_group, ) @@ -123,6 +133,7 @@ def create( lidar_component_groups=lidar_groups, radar_component_groups=radar_groups, point_clouds_component_groups=pc_groups, + camera_labels_component_groups=cl_groups, cuboid_track_observations_component_group="cuboids" if cuboid_track_observations_component_group is None else cuboid_track_observations_component_group, diff --git a/tools/data_converter/colmap/converter.py b/tools/data_converter/colmap/converter.py index f466a52c..18560983 100644 --- a/tools/data_converter/colmap/converter.py +++ b/tools/data_converter/colmap/converter.py @@ -245,6 +245,7 @@ def convert_sequence(self, sequence_id: str) -> None: lidar_ids=[], radar_ids=[], point_clouds_ids=[point_clouds_id] if point_clouds_id else [], + camera_labels_ids=[], # No camera labels profile=self.component_group_profile, ) diff --git a/tools/data_converter/pai/converter.py b/tools/data_converter/pai/converter.py index 35b8a567..d5cfc0f6 100644 --- a/tools/data_converter/pai/converter.py +++ b/tools/data_converter/pai/converter.py @@ -246,6 +246,7 @@ def _convert_clip(self, clip_id: str) -> None: lidar_ids=active_lidar_ids, radar_ids=active_radar_ids, point_clouds_ids=[], # No native point cloud sources + camera_labels_ids=[], # No camera labels profile=self.component_group_profile, ) From fa891b1bf129b11397fa521be5d910cda17e7967 Mon Sep 17 00:00:00 2001 From: Janick Martinez Esturo Date: Wed, 6 May 2026 15:14:23 +0200 Subject: [PATCH 2/3] feat: migrate Waymo panoptic segmentation from generic_data to CameraLabelsComponent --- docs/conversions/waymo/waymo.rst | 9 +++-- tools/data_converter/waymo/converter.py | 45 ++++++++++++++++++++++--- 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/docs/conversions/waymo/waymo.rst b/docs/conversions/waymo/waymo.rst index 1a9ed9fa..aad1759f 100644 --- a/docs/conversions/waymo/waymo.rst +++ b/docs/conversions/waymo/waymo.rst @@ -34,7 +34,11 @@ The Waymo camera frame convention is: **Note:** The converter transforms this to NCore's camera convention (principal axis +z, x-axis right, y-axis down). -Each camera provides panoptic segmentation data with 29 semantic classes. +Each camera provides panoptic segmentation data with 29 semantic classes, +stored via :class:`~ncore.data.v4.CameraLabelsComponent` as ``IMAGE_ENCODED`` +PNG labels (label type: ``SEGMENTATION / "panoptic"``). The per-label metadata +includes ``panoptic_label_divisor`` for decoding semantic class and instance IDs +from the combined panoptic map. LiDAR Sensors ^^^^^^^^^^^^^ @@ -56,7 +60,7 @@ Conversion The converter uses NCore V4's component-based architecture. Each sequence is parsed from ``.tfrecord`` files and written to NCore format via :class:`~ncore.data.v4.SequenceComponentGroupsWriter` with specialized component -writers for poses, intrinsics, lidar, cameras, masks, and 3D labels. +writers for poses, intrinsics, lidar, cameras, camera labels, masks, and 3D labels. Usage ^^^^^ @@ -167,6 +171,7 @@ API Reference - :class:`~ncore.data.v4.IntrinsicsComponent` - Camera and lidar intrinsics - :class:`~ncore.data.v4.LidarSensorComponent` - Lidar frame data - :class:`~ncore.data.v4.CameraSensorComponent` - Camera frame data +- :class:`~ncore.data.v4.CameraLabelsComponent` - Per-camera image labels - :class:`~ncore.data.v4.CuboidsComponent` - 3D cuboid track observations - :class:`~ncore.data.v4.MasksComponent` - Camera masks diff --git a/tools/data_converter/waymo/converter.py b/tools/data_converter/waymo/converter.py index 9c754b17..6033ee14 100644 --- a/tools/data_converter/waymo/converter.py +++ b/tools/data_converter/waymo/converter.py @@ -41,15 +41,21 @@ from ncore.impl.common.util import unpack_optional from ncore.impl.data.types import ( BBox3, + CameraLabelDescriptor, CuboidTrackObservation, JsonLike, + LabelCategory, + LabelEncoding, + LabelSchema, LabelSource, + LabelType, OpenCVPinholeCameraModelParameters, RowOffsetStructuredSpinningLidarModelParameters, ShutterType, ) from ncore.impl.data.util import FOV, relative_angle from ncore.impl.data.v4.components import ( + CameraLabelsComponent, CameraSensorComponent, CuboidsComponent, IntrinsicsComponent, @@ -205,11 +211,28 @@ def convert_sequence(self, sequence_id: str) -> None: self.lidar_ids = self.get_active_lidar_ids([lidar for lidar in self.LIDAR_MAP.values()]) self.radar_ids = (self.get_active_radar_ids([]),) + # Generate camera labels descriptors for panoptic segmentation + self.panoptic_descriptors = { + camera_id: CameraLabelDescriptor( + camera_id=camera_id, + label_type=LabelType(LabelCategory.SEGMENTATION, "panoptic"), + label_schema=LabelSchema( + dtype=np.dtype("uint8"), + shape_suffix=(), + encoding=LabelEncoding.IMAGE_ENCODED, + encoded_format="png", + ), + label_source=LabelSource.EXTERNAL, + ) + for camera_id in self.camera_ids + } + self.component_groups = ComponentGroupAssignments.create( camera_ids=self.camera_ids, lidar_ids=self.lidar_ids, radar_ids=[], # No radars for now point_clouds_ids=[], # No native point cloud sources + camera_labels_ids=[desc.default_instance_name for desc in self.panoptic_descriptors.values()], profile=self.component_group_profile, ) @@ -890,12 +913,24 @@ def decode_cameras(self, frames: list[dataset_pb2.Frame]) -> None: CameraSensorComponent.Writer, component_instance_name=camera_ncore_id, group_name=self.component_groups.camera_component_groups.get(camera_ncore_id), + generic_meta_data={}, + ) + + # Register a panoptic segmentation label writer for this camera + panoptic_descriptor = self.panoptic_descriptors[camera_ncore_id] + panoptic_writer = self.store_writer.register_component_writer( + CameraLabelsComponent.Writer, + component_instance_name=panoptic_descriptor.default_instance_name, + group_name=self.component_groups.camera_labels_component_groups.get( + panoptic_descriptor.default_instance_name + ), generic_meta_data={ "label-class-string-id-map": { label_string: label_id for label_id, label_string in self.CAMERA_LABEL_CLASS_ID_STRING_MAP.items() - } + }, }, + descriptor=panoptic_descriptor, ) for frame in tqdm.tqdm(frames, desc=f"Process {camera_ncore_id}"): @@ -930,11 +965,11 @@ def decode_cameras(self, frames: list[dataset_pb2.Frame]) -> None: and (panoptic_label_divisor := camera_segmentation_label.panoptic_label_divisor) > 0 and hasattr(camera_segmentation_label, "panoptic_label") ): - # Store the original waymo png segmentation data - generic_data["panoptic_label_png"] = np.frombuffer( - camera_segmentation_label.panoptic_label, dtype=np.uint8 + panoptic_writer.store_label( + data=bytes(camera_segmentation_label.panoptic_label), + timestamp_us=frame_end_timestamp_us, + generic_meta_data={"panoptic_label_divisor": panoptic_label_divisor}, ) - generic_meta_data["panoptic_label_divisor"] = panoptic_label_divisor # Store the image and its metadata camera_writer.store_frame( From e2234b247030ff0f1aaa834449a76c8a94a500ac Mon Sep 17 00:00:00 2001 From: Janick Martinez Esturo Date: Wed, 6 May 2026 15:27:07 +0200 Subject: [PATCH 3/3] feat: add camera label overlay support to ncore_vis --- docs/tools/ncore_vis.rst | 36 ++++ tools/ncore_vis/BUILD.bazel | 1 + tools/ncore_vis/components/camera.py | 260 ++++++++++++++++++++++++++- tools/ncore_vis/data_loader.py | 26 ++- 4 files changed, 321 insertions(+), 2 deletions(-) diff --git a/docs/tools/ncore_vis.rst b/docs/tools/ncore_vis.rst index f5cfd7bb..8924ba43 100644 --- a/docs/tools/ncore_vis.rst +++ b/docs/tools/ncore_vis.rst @@ -180,6 +180,42 @@ from all cameras in the sequence and shown in a shared dropdown. The opacity of the tint is adjustable. Masks are boolean images stored per-sensor and are not per-frame. +Camera Labels Overlay +^^^^^^^^^^^^^^^^^^^^^ + +When a sequence contains :class:`~ncore.data.v4.CameraLabelsComponent` data, +a **Camera Labels** overlay section appears in the Cameras tab. This allows +visualizing per-frame labels (depth maps, segmentation masks, surface normals, +etc.) on camera images. + +Global controls: + +- **Show Labels**: master toggle to enable camera label visualization + (disabled by default) +- **Matching**: timestamp matching strategy for sparse labels + + - *Closest*: always picks the nearest available label timestamp (default) + - *Exact*: only shows a label if one exists at the camera frame's exact + timestamp; otherwise shows the RGB image + +- **Opacity**: blend factor between RGB image and label visualization + (0.0 = full RGB, 1.0 = full label) + +Per-camera controls: + +- **Label**: dropdown showing only labels associated with this camera, + populated from :meth:`~ncore.data.SequenceLoaderProtocol.query_camera_labels`. + Defaults to the first available label so that toggling "Show Labels" on + immediately renders something. + +Visualization adapts to the label category: + +- **DEPTH** -- TURBO colormap with percentile-based normalization +- **SEGMENTATION** -- 20-color class palette +- **MASK** -- green tint overlay (3-channel masks rendered as RGB) +- **GEOMETRY** -- normal vectors mapped to RGB ([-1,1] → [0,255]) +- **Other categories** -- grayscale normalization + Lidar Point Clouds ^^^^^^^^^^^^^^^^^^ diff --git a/tools/ncore_vis/BUILD.bazel b/tools/ncore_vis/BUILD.bazel index f1b3b7cd..58d67dc2 100644 --- a/tools/ncore_vis/BUILD.bazel +++ b/tools/ncore_vis/BUILD.bazel @@ -87,6 +87,7 @@ py_library( "//ncore/impl/common:pylib_transformations", "//ncore/impl/data:pylib_compat", "//ncore/impl/data:pylib_types", + "//ncore/impl/data:pylib_util", "//ncore/impl/sensors:pylib_camera", requirement("matplotlib"), requirement("numpy"), diff --git a/tools/ncore_vis/components/camera.py b/tools/ncore_vis/components/camera.py index 4555abab..aaa10372 100644 --- a/tools/ncore_vis/components/camera.py +++ b/tools/ncore_vis/components/camera.py @@ -31,7 +31,8 @@ from scipy.spatial.transform import Rotation as RotLib from ncore.impl.common.transformations import HalfClosedInterval, transform_point_cloud -from ncore.impl.data.types import FrameTimepoint, LabelSource +from ncore.impl.data.types import FrameTimepoint, LabelCategory, LabelSource +from ncore.impl.data.util import closest_index_sorted from ncore.impl.sensors.camera import CameraModel from tools.ncore_vis.components.base import VisualizationComponent, register_component from tools.ncore_vis.utils import se3_to_position_wxyz @@ -81,6 +82,113 @@ _JET_CMAP: matplotlib.colors.Colormap = matplotlib.colormaps["jet"] _TURBO_CMAP: matplotlib.colors.Colormap = matplotlib.colormaps["turbo"] +# 20-color palette for segmentation visualization. +_SEGMENTATION_PALETTE: np.ndarray = np.array( + [ + [0, 0, 0], + [128, 0, 0], + [0, 128, 0], + [128, 128, 0], + [0, 0, 128], + [128, 0, 128], + [0, 128, 128], + [128, 128, 128], + [64, 0, 0], + [192, 0, 0], + [64, 128, 0], + [192, 128, 0], + [64, 0, 128], + [192, 0, 128], + [64, 128, 128], + [192, 128, 128], + [0, 64, 0], + [128, 64, 0], + [0, 192, 0], + [128, 192, 0], + ], + dtype=np.uint8, +) + + +def _colorize_depth(data: np.ndarray, target_h: int, target_w: int) -> np.ndarray: + """Colorize a depth map using a turbo colormap with percentile normalization.""" + if data.ndim > 2: + data = data[:, :, 0] + valid = data[data > 0] if np.any(data > 0) else data.ravel() + vmin, vmax = float(np.percentile(valid, 2)), float(np.percentile(valid, 98)) + if vmax <= vmin: + vmax = vmin + 1.0 + normalized = np.clip((data - vmin) / (vmax - vmin), 0.0, 1.0) + rgba = _TURBO_CMAP(normalized) + colored: np.ndarray = (rgba[:, :, :3] * 255.0).astype(np.uint8) + if colored.shape[:2] != (target_h, target_w): + colored = cv2.resize(colored, (target_w, target_h), interpolation=cv2.INTER_NEAREST) + return colored + + +def _colorize_segmentation(data: np.ndarray, target_h: int, target_w: int) -> np.ndarray: + """Colorize a segmentation map using a fixed color palette.""" + if data.ndim > 2: + data = data[:, :, 0] + indices = data.astype(np.int32) % len(_SEGMENTATION_PALETTE) + colored = _SEGMENTATION_PALETTE[indices] + if colored.shape[:2] != (target_h, target_w): + colored = cv2.resize(colored, (target_w, target_h), interpolation=cv2.INTER_NEAREST) + return colored + + +def _colorize_mask(data: np.ndarray, target_h: int, target_w: int) -> np.ndarray: + """Colorize a binary or multi-level mask as green on black.""" + if data.ndim == 3 and data.shape[2] == 3: + # Already RGB (e.g. depth validity mask) -- display directly + colored = data.astype(np.uint8) if data.dtype != np.uint8 else data + elif data.ndim > 2: + data = data[:, :, 0] + colored = np.zeros((data.shape[0], data.shape[1], 3), dtype=np.uint8) + colored[data > 0] = [0, 255, 0] + else: + colored = np.zeros((data.shape[0], data.shape[1], 3), dtype=np.uint8) + colored[data > 0] = [0, 255, 0] + if colored.shape[:2] != (target_h, target_w): + colored = cv2.resize(colored, (target_w, target_h), interpolation=cv2.INTER_NEAREST) + return colored + + +def _colorize_geometry(data: np.ndarray, target_h: int, target_w: int) -> np.ndarray: + """Colorize geometry data (normals, ray directions) as RGB from [-1, 1] range.""" + colored: np.ndarray + if data.ndim == 3 and data.shape[2] >= 3: + if data.dtype in (np.float32, np.float64, np.float16): + colored = np.clip((data[:, :, :3] + 1.0) * 127.5, 0, 255).astype(np.uint8) + else: + colored = data[:, :, :3].astype(np.uint8) + else: + return _colorize_generic(data, target_h, target_w) + if colored.shape[:2] != (target_h, target_w): + colored = cv2.resize(colored, (target_w, target_h), interpolation=cv2.INTER_NEAREST) + return colored + + +def _colorize_generic(data: np.ndarray, target_h: int, target_w: int) -> np.ndarray: + """Generic grayscale visualization for unknown label types.""" + if data.ndim > 2: + data = data[:, :, 0] + if data.dtype in (np.float32, np.float64, np.float16): + valid = data[np.isfinite(data)] + if len(valid) > 0: + vmin, vmax = float(np.percentile(valid, 2)), float(np.percentile(valid, 98)) + else: + vmin, vmax = 0.0, 1.0 + if vmax <= vmin: + vmax = vmin + 1.0 + gray = np.clip((data - vmin) / (vmax - vmin) * 255, 0, 255).astype(np.uint8) + else: + gray = data.astype(np.uint8) + colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB) + if colored.shape[:2] != (target_h, target_w): + colored = cv2.resize(colored, (target_w, target_h), interpolation=cv2.INTER_NEAREST) + return colored + @register_component class CameraComponent(VisualizationComponent): @@ -325,6 +433,37 @@ def _(_: viser.GuiEvent) -> None: mask_opacity_slider=mask_opacity_slider, ) + # -- Camera labels overlay -- + camera_labels_ids = self.data_loader.camera_labels_ids + self._show_camera_labels: bool = False + self._camera_label_matching: str = "Closest" + self._camera_label_opacity_value: float = 0.5 + self._camera_label_selects: Dict[str, viser.GuiInputHandle[str]] = {} + if camera_labels_ids: + with self.client.gui.add_folder("Camera Labels"): + show_camera_labels_checkbox = self.client.gui.add_checkbox( + "Show Labels", initial_value=False, hint="Enable camera label visualization" + ) + camera_label_matching = self.client.gui.add_dropdown( + "Matching", + options=["Closest", "Exact"], + initial_value="Closest", + hint="Closest: nearest label timestamp. Exact: only show if label exists at frame timestamp.", + ) + camera_label_opacity = self.client.gui.add_slider( + "Opacity", + min=0.0, + max=1.0, + step=0.05, + initial_value=0.5, + hint="Blend opacity (1.0 = full label, 0.0 = full RGB)", + ) + self._bind_camera_label_settings( + show_camera_labels_checkbox=show_camera_labels_checkbox, + camera_label_matching=camera_label_matching, + camera_label_opacity=camera_label_opacity, + ) + # -- Per-camera folders -- for camera_id in self.data_loader.camera_ids: cam = self.data_loader.get_camera_sensor(camera_id) @@ -337,6 +476,31 @@ def _(_: viser.GuiEvent) -> None: self._frame_sliders[camera_id] = slider show_checkbox = self.client.gui.add_checkbox("Show Camera", initial_value=True) + + # Per-camera label selector (only labels associated with this camera) + cam_label_sources = self.data_loader.query_camera_labels(camera_id) + if cam_label_sources: + cam_label_ids = [ + lid + for lid in camera_labels_ids + if any( + s.label_descriptor.camera_id == camera_id + for s in [self.data_loader.get_camera_labels(lid)] + ) + ] + if cam_label_ids: + label_dropdown = self.client.gui.add_dropdown( + "Label", + options=["(none)"] + cam_label_ids, + initial_value=cam_label_ids[0], + hint="Select label to visualize for this camera", + ) + self._camera_label_selects[camera_id] = label_dropdown + + @label_dropdown.on_update + def _(event: viser.GuiEvent, _cid: str = camera_id) -> None: + self._update_camera(_cid) + go_to_frame = self.client.gui.add_button("Go to Frame") # Wire up per-camera callbacks @@ -532,6 +696,29 @@ def _(_: viser.GuiEvent) -> None: # type: ignore[no-redef] self._mask_opacity = mask_opacity_slider.value self._refresh_all_cameras() + def _bind_camera_label_settings( + self, + show_camera_labels_checkbox: viser.GuiInputHandle[bool], + camera_label_matching: viser.GuiInputHandle[str], + camera_label_opacity: viser.GuiInputHandle[float], + ) -> None: + """Wire up camera labels overlay global-setting callbacks.""" + + @show_camera_labels_checkbox.on_update + def _(_: viser.GuiEvent) -> None: + self._show_camera_labels = show_camera_labels_checkbox.value + self._refresh_all_cameras() + + @camera_label_matching.on_update + def _(_: viser.GuiEvent) -> None: # type: ignore[no-redef] + self._camera_label_matching = camera_label_matching.value + self._refresh_all_cameras() + + @camera_label_opacity.on_update + def _(_: viser.GuiEvent) -> None: # type: ignore[no-redef] + self._camera_label_opacity_value = camera_label_opacity.value + self._refresh_all_cameras() + def _build_camera_models(self) -> None: """Build (or rebuild) the per-camera :class:`CameraModel` cache using ``self._device``.""" self._camera_models = { @@ -635,6 +822,12 @@ def _update_camera(self, camera_id: str) -> None: except Exception: logger.debug("Mask overlay failed for %s frame %d", camera_id, frame_idx, exc_info=True) + if self._show_camera_labels: + try: + image = self._overlay_camera_labels(camera_id, frame_idx, image) + except Exception: + logger.debug("Camera label overlay failed for %s frame %d", camera_id, frame_idx, exc_info=True) + frustum_handle = self.client.scene.add_camera_frustum( f"/cameras/{camera_id}/pose/frustum", fov=_DEFAULT_CAMERA_FOV, @@ -706,6 +899,71 @@ def _overlay_mask(self, camera_id: str, image: np.ndarray) -> np.ndarray: return output + # ------------------------------------------------------------------ + # Camera labels overlay + # ------------------------------------------------------------------ + + def _overlay_camera_labels(self, camera_id: str, frame_idx: int, image: np.ndarray) -> np.ndarray: + """Overlay or replace the camera image with a camera label visualization. + + Args: + camera_id: Camera sensor ID. + frame_idx: Camera frame index. + image: RGB image array (H, W, 3), uint8. + + Returns: + Image with label visualization applied. + """ + if camera_id not in self._camera_label_selects: + return image + + label_id = self._camera_label_selects[camera_id].value + if label_id == "(none)": + return image + + source = self.data_loader.get_camera_labels(label_id) + + # Get camera frame timestamp + cam = self.data_loader.get_camera_sensor(camera_id) + frame_ts = cam.get_frame_timestamp_us(frame_idx, FrameTimepoint.END) + + # Find label based on matching mode + if source.labels_count == 0: + return image + + if self._camera_label_matching == "Exact": + # Only show if there's a label at exactly this timestamp + if frame_ts not in source.label_timestamps_us: + return image + ts = frame_ts + else: # "Closest" + ts = source.label_timestamps_us[closest_index_sorted(source.label_timestamps_us, frame_ts)] + + label = source.get_label(ts) + label_data = label.get_data() + + # Render based on category + category = source.label_descriptor.label_type.category + h, w = image.shape[:2] + + if category == LabelCategory.DEPTH: + vis = _colorize_depth(label_data, h, w) + elif category == LabelCategory.SEGMENTATION: + vis = _colorize_segmentation(label_data, h, w) + elif category == LabelCategory.MASK: + vis = _colorize_mask(label_data, h, w) + elif category == LabelCategory.GEOMETRY: + vis = _colorize_geometry(label_data, h, w) + else: + vis = _colorize_generic(label_data, h, w) + + # Blend based on global opacity + alpha = self._camera_label_opacity_value + if alpha >= 1.0: + return vis + else: + return (image.astype(np.float32) * (1 - alpha) + vis.astype(np.float32) * alpha).astype(np.uint8) + # ------------------------------------------------------------------ # Lidar projection overlay # ------------------------------------------------------------------ diff --git a/tools/ncore_vis/data_loader.py b/tools/ncore_vis/data_loader.py index ea7687f6..fd5e578d 100644 --- a/tools/ncore_vis/data_loader.py +++ b/tools/ncore_vis/data_loader.py @@ -28,6 +28,7 @@ from ncore.impl.common.transformations import HalfClosedInterval, PoseGraphInterpolator from ncore.impl.data.compat import ( + CameraLabelsProtocol, CameraSensorProtocol, LidarSensorProtocol, PointCloudsSourceProtocol, @@ -35,7 +36,7 @@ SensorProtocol, SequenceLoaderProtocol, ) -from ncore.impl.data.types import CuboidTrackObservation, FrameTimepoint, LabelSource +from ncore.impl.data.types import CuboidTrackObservation, FrameTimepoint, LabelCategory, LabelSource, LabelType from tools.ncore_vis.tracks import CuboidTrack @@ -195,6 +196,29 @@ def get_point_clouds_source(self, source_id: str) -> PointCloudsSourceProtocol: """Return a point-clouds source by ID (cached).""" return self._loader.get_point_clouds_source(source_id) + # ------------------------------------------------------------------ + # Camera labels + # ------------------------------------------------------------------ + + @property + def camera_labels_ids(self) -> List[str]: + """All camera label instance IDs in the sequence.""" + return self._loader.camera_labels_ids + + @functools.lru_cache(maxsize=None) + def get_camera_labels(self, camera_labels_id: str) -> CameraLabelsProtocol: + """Return a camera labels source by instance ID (cached).""" + return self._loader.get_camera_labels(camera_labels_id) + + def query_camera_labels( + self, + camera_id: str, + label_type: Optional[LabelType] = None, + label_category: Optional[LabelCategory] = None, + ) -> List[CameraLabelsProtocol]: + """Query camera label sources matching filters.""" + return self._loader.query_camera_labels(camera_id, label_type, label_category) + # ------------------------------------------------------------------ # Cross-sensor frame synchronization # ------------------------------------------------------------------