Source code for isaaclab.sensors.camera.camera_data

# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

from __future__ import annotations

from typing import Any

import warp as wp

# Re-exported as part of the public isaaclab.sensors.camera API
from isaaclab.renderers.output_contract import RenderBufferKind, RenderBufferSpec
from isaaclab.utils.warp import ProxyArray
from isaaclab.utils.warp.warp_math import convert_camera_frame_orientation_convention_wp

__all__ = ["CameraData", "RenderBufferKind", "RenderBufferSpec"]



[docs]
class CameraData:
    """Data container for the camera sensor.

    Public properties return :class:`~isaaclab.utils.warp.ProxyArray` wrappers.
    Use ``.torch`` for a cached zero-copy :class:`torch.Tensor` view or
    ``.warp`` for the underlying :class:`warp.array`.
    """

    def __init__(self):
        # ProxyArray wrappers — created in create_buffers()
        self._pos_w: ProxyArray | None = None
        self._quat_w_world: ProxyArray | None = None
        self._intrinsic_matrices: ProxyArray | None = None
        self._quat_w_ros: ProxyArray | None = None
        self._quat_w_opengl: ProxyArray | None = None

        # Output image buffers — allocated in allocate()
        self._output: dict[str, ProxyArray] | None = None

        self.image_shape: tuple[int, int] | None = None
        """A tuple containing (height, width) of the camera sensor."""

        self.info: dict[str, Any] | None = None
        """The retrieved sensor info with sensor types as key.

        This contains extra information provided by the sensor such as semantic segmentation label mapping, prim paths.
        For semantic-based data, this corresponds to the ``"info"`` key in the output of the sensor. For other sensor
        types, the info is empty.
        """

    ##
    # Frame state.
    ##

    @property
    def pos_w(self) -> ProxyArray:
        """Position of the sensor origin in world frame [m], following ROS convention.

        Shape is (N,), dtype ``wp.vec3f``. In torch this resolves to (N, 3),
        where N is the number of sensors. Use ``.warp`` for the underlying
        ``wp.array`` or ``.torch`` for a cached zero-copy ``torch.Tensor`` view.
        """
        return self._pos_w

    @property
    def quat_w_world(self) -> ProxyArray:
        """Quaternion orientation ``(x, y, z, w)`` of the sensor origin in world frame,
        following the world coordinate frame convention.

        .. note::
            World frame convention follows the camera aligned with forward axis +X and up axis +Z.

        Shape is (N,), dtype ``wp.quatf``. In torch this resolves to (N, 4),
        where N is the number of sensors. Use ``.warp`` for the underlying
        ``wp.array`` or ``.torch`` for a cached zero-copy ``torch.Tensor`` view.
        """
        return self._quat_w_world

    ##
    # Camera data
    ##

    @property
    def intrinsic_matrices(self) -> ProxyArray:
        """The intrinsic matrices for the camera.

        Shape is (N,), dtype ``wp.mat33f``. In torch this resolves to (N, 3, 3),
        where N is the number of sensors. Use ``.warp`` for the underlying
        ``wp.array`` or ``.torch`` for a cached zero-copy ``torch.Tensor`` view.
        """
        return self._intrinsic_matrices

    @property
    def output(self) -> dict[str, ProxyArray] | None:
        """The retrieved sensor data with sensor types as key.

        Each value is a :class:`~isaaclab.utils.warp.ProxyArray` of shape
        ``(N, H, W, C)`` where N is the number of views, H/W are image dimensions,
        and C is the number of channels. Use ``.torch`` for a ``torch.Tensor`` view
        or ``.warp`` for the underlying ``wp.array``.

        The format of the data is available in the `Replicator Documentation`_. For semantic-based data,
        this corresponds to the ``"data"`` key in the output of the sensor.

        .. _Replicator Documentation: https://docs.omniverse.nvidia.com/prod_extensions/prod_extensions/ext_replicator/annotators_details.html#annotator-output
        """
        return self._output


[docs]
    def create_buffers(self, num_views: int, device: str) -> None:
        """Allocate warp arrays for pose and intrinsics and create their :class:`ProxyArray` wrappers.

        Called by :class:`~isaaclab.sensors.camera.Camera` after :meth:`allocate` to
        populate the pose and intrinsics buffers.

        Args:
            num_views: Number of camera views (batch dimension).
            device: Device for tensor storage (e.g. ``"cuda:0"``).
        """
        self._pos_w = ProxyArray(wp.zeros(num_views, dtype=wp.vec3f, device=device))
        self._quat_w_world = ProxyArray(wp.zeros(num_views, dtype=wp.quatf, device=device))
        self._intrinsic_matrices = ProxyArray(wp.zeros(num_views, dtype=wp.mat33f, device=device))
        self._quat_w_ros = ProxyArray(wp.zeros(num_views, dtype=wp.quatf, device=device))
        self._quat_w_opengl = ProxyArray(wp.zeros(num_views, dtype=wp.quatf, device=device))



[docs]
    @classmethod
    def allocate(
        cls,
        data_types: list[str],
        height: int,
        width: int,
        num_views: int,
        device: str,
        supported_specs: dict[RenderBufferKind, RenderBufferSpec],
    ) -> CameraData:
        """Build a :class:`CameraData` with output buffers pre-allocated as warp arrays.

        Allocates one ``(num_views, height, width, channels)`` warp array per kind
        in the intersection of ``data_types`` and ``supported_specs``, using
        the channels and dtype from each :class:`RenderBufferSpec`. Each buffer is
        wrapped in a :class:`~isaaclab.utils.warp.ProxyArray`; call ``.torch`` on
        the result to obtain a zero-copy :class:`torch.Tensor` view.

        Args:
            data_types: Requested output names (typically :attr:`CameraCfg.data_types`).
                Every name must be a member of :class:`RenderBufferKind`.
            height: Image height in pixels.
            width: Image width in pixels.
            num_views: Number of camera views (batch dimension).
            device: Device on which to allocate the buffers.
            supported_specs: Per-buffer layout the active renderer can produce,
                keyed by :class:`RenderBufferKind`. Names absent from this mapping
                are not allocated.

        Returns:
            A new :class:`CameraData` with :attr:`image_shape`, :attr:`output`,
            and :attr:`info` populated; pose/intrinsic buffers must be created
            separately via :meth:`create_buffers`.

        Raises:
            ValueError: If ``data_types`` contains names that are not members of
                :class:`RenderBufferKind`.
        """
        valid_names = {kind.value for kind in RenderBufferKind}
        unknown = [name for name in data_types if name not in valid_names]
        if unknown:
            raise ValueError(f"Unknown RenderBufferKind name(s): {unknown}. Expected members of RenderBufferKind.")
        requested = {RenderBufferKind(name) for name in data_types}

        rgb_kinds = {RenderBufferKind.RGB, RenderBufferKind.RGBA}
        rgb_alias = rgb_kinds <= supported_specs.keys() and not requested.isdisjoint(rgb_kinds)
        if rgb_alias:
            requested.update(rgb_kinds)

        allocated = requested.intersection(supported_specs)
        if rgb_alias:
            allocated.remove(RenderBufferKind.RGB)

        buffers: dict[str, ProxyArray] = {}
        for name, spec in supported_specs.items():
            if name not in allocated:
                continue
            shape = (num_views, height, width, spec.channels)
            buffers[str(name)] = ProxyArray(wp.zeros(shape, dtype=spec.dtype, device=device))

        if rgb_alias:
            # Zero-copy strided view into rgba: shape (N, H, W, 3), skipping the alpha channel.
            # Byte strides for a contiguous (N, H, W, 4) uint8 array are (H*W*4, W*4, 4, 1).
            # Using the same outer strides but limiting the last dim to 3 channels gives a
            # non-contiguous view where each pixel reads RGB without the alpha byte.
            rgba_wp = buffers[str(RenderBufferKind.RGBA)].warp
            rgb_wp = wp.array(
                ptr=rgba_wp.ptr,
                shape=(num_views, height, width, 3),
                strides=(height * width * 4, width * 4, 4, 1),
                dtype=wp.uint8,
                device=rgba_wp.device,
                copy=False,
            )
            buffers[str(RenderBufferKind.RGB)] = ProxyArray(rgb_wp)

        obj = cls()
        obj.image_shape = (height, width)
        obj._output = buffers
        obj.info = {name: None for name in buffers}
        return obj


    ##
    # Additional Frame orientation conventions
    ##

    @property
    def quat_w_ros(self) -> ProxyArray:
        """Quaternion orientation ``(x, y, z, w)`` of the sensor origin in the world frame, following ROS convention.

        .. note::
            ROS convention follows the camera aligned with forward axis +Z and up axis -Y.

        Shape is (N,), dtype ``wp.quatf``. In torch this resolves to (N, 4),
        where N is the number of sensors. Use ``.warp`` for the underlying
        ``wp.array`` or ``.torch`` for a cached zero-copy ``torch.Tensor`` view.
        """
        convert_camera_frame_orientation_convention_wp(self._quat_w_world.warp, self._quat_w_ros.warp, "world", "ros")
        return self._quat_w_ros

    @property
    def quat_w_opengl(self) -> ProxyArray:
        """Quaternion orientation ``(x, y, z, w)`` of the sensor origin in the world frame, following
        Opengl / USD Camera convention.

        .. note::
            OpenGL convention follows the camera aligned with forward axis -Z and up axis +Y.

        Shape is (N,), dtype ``wp.quatf``. In torch this resolves to (N, 4),
        where N is the number of sensors. Use ``.warp`` for the underlying
        ``wp.array`` or ``.torch`` for a cached zero-copy ``torch.Tensor`` view.
        """
        convert_camera_frame_orientation_convention_wp(
            self._quat_w_world.warp, self._quat_w_opengl.warp, "world", "opengl"
        )
        return self._quat_w_opengl