Source code for omni.isaac.lab.sensors.ray_caster.ray_caster_camera

# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

from __future__ import annotations

import torch
from collections.abc import Sequence
from typing import TYPE_CHECKING, ClassVar, Literal

import omni.isaac.core.utils.stage as stage_utils
import omni.physics.tensors.impl.api as physx
from omni.isaac.core.prims import XFormPrimView

import omni.isaac.lab.utils.math as math_utils
from omni.isaac.lab.sensors.camera import CameraData
from omni.isaac.lab.utils.warp import raycast_mesh

from .ray_caster import RayCaster

if TYPE_CHECKING:
    from .ray_caster_camera_cfg import RayCasterCameraCfg


[docs]class RayCasterCamera(RayCaster): """A ray-casting camera sensor. The ray-caster camera uses a set of rays to get the distances to meshes in the scene. The rays are defined in the sensor's local coordinate frame. The sensor has the same interface as the :class:`omni.isaac.lab.sensors.Camera` that implements the camera class through USD camera prims. However, this class provides a faster image generation. The sensor converts meshes from the list of primitive paths provided in the configuration to Warp meshes. The camera then ray-casts against these Warp meshes only. Currently, only the following annotators are supported: - ``"distance_to_camera"``: An image containing the distance to camera optical center. - ``"distance_to_image_plane"``: An image containing distances of 3D points from camera plane along camera's z-axis. - ``"normals"``: An image containing the local surface normal vectors at each pixel. .. note:: Currently, only static meshes are supported. Extending the warp mesh to support dynamic meshes is a work in progress. """ cfg: RayCasterCameraCfg """The configuration parameters.""" UNSUPPORTED_TYPES: ClassVar[set[str]] = { "rgb", "instance_id_segmentation", "instance_id_segmentation_fast", "instance_segmentation", "instance_segmentation_fast", "semantic_segmentation", "skeleton_data", "motion_vectors", "bounding_box_2d_tight", "bounding_box_2d_tight_fast", "bounding_box_2d_loose", "bounding_box_2d_loose_fast", "bounding_box_3d", "bounding_box_3d_fast", } """A set of sensor types that are not supported by the ray-caster camera."""
[docs] def __init__(self, cfg: RayCasterCameraCfg): """Initializes the camera object. Args: cfg: The configuration parameters. Raises: ValueError: If the provided data types are not supported by the ray-caster camera. """ # perform check on supported data types self._check_supported_data_types(cfg) # initialize base class super().__init__(cfg) # create empty variables for storing output data self._data = CameraData()
def __str__(self) -> str: """Returns: A string containing information about the instance.""" return ( f"Ray-Caster-Camera @ '{self.cfg.prim_path}': \n" f"\tview type : {self._view.__class__}\n" f"\tupdate period (s) : {self.cfg.update_period}\n" f"\tnumber of meshes : {len(self.meshes)}\n" f"\tnumber of sensors : {self._view.count}\n" f"\tnumber of rays/sensor: {self.num_rays}\n" f"\ttotal number of rays : {self.num_rays * self._view.count}\n" f"\timage shape : {self.image_shape}" ) """ Properties """ @property def data(self) -> CameraData: # update sensors if needed self._update_outdated_buffers() # return the data return self._data @property def image_shape(self) -> tuple[int, int]: """A tuple containing (height, width) of the camera sensor.""" return (self.cfg.pattern_cfg.height, self.cfg.pattern_cfg.width) @property def frame(self) -> torch.tensor: """Frame number when the measurement took place.""" return self._frame """ Operations. """
[docs] def set_intrinsic_matrices( self, matrices: torch.Tensor, focal_length: float = 1.0, env_ids: Sequence[int] | None = None ): """Set the intrinsic matrix of the camera. Args: matrices: The intrinsic matrices for the camera. Shape is (N, 3, 3). focal_length: Focal length to use when computing aperture values (in cm). Defaults to 1.0. env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices. """ # resolve env_ids if env_ids is None: env_ids = slice(None) # save new intrinsic matrices and focal length self._data.intrinsic_matrices[env_ids] = matrices.to(self._device) self._focal_length = focal_length # recompute ray directions self.ray_starts[env_ids], self.ray_directions[env_ids] = self.cfg.pattern_cfg.func( self.cfg.pattern_cfg, self._data.intrinsic_matrices[env_ids], self._device )
[docs] def reset(self, env_ids: Sequence[int] | None = None): # reset the timestamps super().reset(env_ids) # resolve None if env_ids is None: env_ids = slice(None) # reset the data # note: this recomputation is useful if one performs events such as randomizations on the camera poses. pos_w, quat_w = self._compute_camera_world_poses(env_ids) self._data.pos_w[env_ids] = pos_w self._data.quat_w_world[env_ids] = quat_w # Reset the frame count self._frame[env_ids] = 0
[docs] def set_world_poses( self, positions: torch.Tensor | None = None, orientations: torch.Tensor | None = None, env_ids: Sequence[int] | None = None, convention: Literal["opengl", "ros", "world"] = "ros", ): """Set the pose of the camera w.r.t. the world frame using specified convention. Since different fields use different conventions for camera orientations, the method allows users to set the camera poses in the specified convention. Possible conventions are: - :obj:`"opengl"` - forward axis: -Z - up axis +Y - Offset is applied in the OpenGL (Usd.Camera) convention - :obj:`"ros"` - forward axis: +Z - up axis -Y - Offset is applied in the ROS convention - :obj:`"world"` - forward axis: +X - up axis +Z - Offset is applied in the World Frame convention See :meth:`omni.isaac.lab.utils.maths.convert_camera_frame_orientation_convention` for more details on the conventions. Args: positions: The cartesian coordinates (in meters). Shape is (N, 3). Defaults to None, in which case the camera position in not changed. orientations: The quaternion orientation in (w, x, y, z). Shape is (N, 4). Defaults to None, in which case the camera orientation in not changed. env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices. convention: The convention in which the poses are fed. Defaults to "ros". Raises: RuntimeError: If the camera prim is not set. Need to call :meth:`initialize` method first. """ # resolve env_ids if env_ids is None: env_ids = self._ALL_INDICES # get current positions pos_w, quat_w = self._compute_view_world_poses(env_ids) if positions is not None: # transform to camera frame pos_offset_world_frame = positions - pos_w self._offset_pos[env_ids] = math_utils.quat_apply(math_utils.quat_inv(quat_w), pos_offset_world_frame) if orientations is not None: # convert rotation matrix from input convention to world quat_w_set = math_utils.convert_camera_frame_orientation_convention( orientations, origin=convention, target="world" ) self._offset_quat[env_ids] = math_utils.quat_mul(math_utils.quat_inv(quat_w), quat_w_set) # update the data pos_w, quat_w = self._compute_camera_world_poses(env_ids) self._data.pos_w[env_ids] = pos_w self._data.quat_w_world[env_ids] = quat_w
[docs] def set_world_poses_from_view( self, eyes: torch.Tensor, targets: torch.Tensor, env_ids: Sequence[int] | None = None ): """Set the poses of the camera from the eye position and look-at target position. Args: eyes: The positions of the camera's eye. Shape is N, 3). targets: The target locations to look at. Shape is (N, 3). env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices. Raises: RuntimeError: If the camera prim is not set. Need to call :meth:`initialize` method first. NotImplementedError: If the stage up-axis is not "Y" or "Z". """ # get up axis of current stage up_axis = stage_utils.get_stage_up_axis() # camera position and rotation in opengl convention orientations = math_utils.quat_from_matrix( math_utils.create_rotation_matrix_from_view(eyes, targets, up_axis=up_axis, device=self._device) ) self.set_world_poses(eyes, orientations, env_ids, convention="opengl")
""" Implementation. """ def _initialize_rays_impl(self): # Create all indices buffer self._ALL_INDICES = torch.arange(self._view.count, device=self._device, dtype=torch.long) # Create frame count buffer self._frame = torch.zeros(self._view.count, device=self._device, dtype=torch.long) # create buffers self._create_buffers() # compute intrinsic matrices self._compute_intrinsic_matrices() # compute ray stars and directions self.ray_starts, self.ray_directions = self.cfg.pattern_cfg.func( self.cfg.pattern_cfg, self._data.intrinsic_matrices, self._device ) self.num_rays = self.ray_directions.shape[1] # create buffer to store ray hits self.ray_hits_w = torch.zeros(self._view.count, self.num_rays, 3, device=self._device) # set offsets quat_w = math_utils.convert_camera_frame_orientation_convention( torch.tensor([self.cfg.offset.rot], device=self._device), origin=self.cfg.offset.convention, target="world" ) self._offset_quat = quat_w.repeat(self._view.count, 1) self._offset_pos = torch.tensor(list(self.cfg.offset.pos), device=self._device).repeat(self._view.count, 1) def _update_buffers_impl(self, env_ids: Sequence[int]): """Fills the buffers of the sensor data.""" # increment frame count self._frame[env_ids] += 1 # compute poses from current view pos_w, quat_w = self._compute_camera_world_poses(env_ids) # update the data self._data.pos_w[env_ids] = pos_w self._data.quat_w_world[env_ids] = quat_w # note: full orientation is considered ray_starts_w = math_utils.quat_apply(quat_w.repeat(1, self.num_rays), self.ray_starts[env_ids]) ray_starts_w += pos_w.unsqueeze(1) ray_directions_w = math_utils.quat_apply(quat_w.repeat(1, self.num_rays), self.ray_directions[env_ids]) # ray cast and store the hits # note: we set max distance to 1e6 during the ray-casting. THis is because we clip the distance # to the image plane and distance to the camera to the maximum distance afterwards in-order to # match the USD camera behavior. # TODO: Make ray-casting work for multiple meshes? # necessary for regular dictionaries. self.ray_hits_w, ray_depth, ray_normal, _ = raycast_mesh( ray_starts_w, ray_directions_w, mesh=self.meshes[self.cfg.mesh_prim_paths[0]], max_dist=1e6, return_distance=any( [name in self.cfg.data_types for name in ["distance_to_image_plane", "distance_to_camera"]] ), return_normal="normals" in self.cfg.data_types, ) # update output buffers if "distance_to_image_plane" in self.cfg.data_types: # note: data is in camera frame so we only take the first component (z-axis of camera frame) distance_to_image_plane = ( math_utils.quat_apply( math_utils.quat_inv(quat_w).repeat(1, self.num_rays), (ray_depth[:, :, None] * ray_directions_w), ) )[:, :, 0] # apply the maximum distance after the transformation if self.cfg.depth_clipping_behavior == "max": distance_to_image_plane = torch.clip(distance_to_image_plane, max=self.cfg.max_distance) distance_to_image_plane[torch.isnan(distance_to_image_plane)] = self.cfg.max_distance elif self.cfg.depth_clipping_behavior == "zero": distance_to_image_plane[distance_to_image_plane > self.cfg.max_distance] = 0.0 distance_to_image_plane[torch.isnan(distance_to_image_plane)] = 0.0 self._data.output["distance_to_image_plane"][env_ids] = distance_to_image_plane.view( -1, *self.image_shape, 1 ) if "distance_to_camera" in self.cfg.data_types: if self.cfg.depth_clipping_behavior == "max": ray_depth = torch.clip(ray_depth, max=self.cfg.max_distance) elif self.cfg.depth_clipping_behavior == "zero": ray_depth[ray_depth > self.cfg.max_distance] = 0.0 self._data.output["distance_to_camera"][env_ids] = ray_depth.view(-1, *self.image_shape, 1) if "normals" in self.cfg.data_types: self._data.output["normals"][env_ids] = ray_normal.view(-1, *self.image_shape, 3) def _debug_vis_callback(self, event): # in case it crashes be safe if not hasattr(self, "ray_hits_w"): return # show ray hit positions self.ray_visualizer.visualize(self.ray_hits_w.view(-1, 3)) """ Private Helpers """ def _check_supported_data_types(self, cfg: RayCasterCameraCfg): """Checks if the data types are supported by the ray-caster camera.""" # check if there is any intersection in unsupported types # reason: we cannot obtain this data from simplified warp-based ray caster common_elements = set(cfg.data_types) & RayCasterCamera.UNSUPPORTED_TYPES if common_elements: raise ValueError( f"RayCasterCamera class does not support the following sensor types: {common_elements}." "\n\tThis is because these sensor types cannot be obtained in a fast way using ''warp''." "\n\tHint: If you need to work with these sensor types, we recommend using the USD camera" " interface from the omni.isaac.lab.sensors.camera module." ) def _create_buffers(self): """Create buffers for storing data.""" # prepare drift self.drift = torch.zeros(self._view.count, 3, device=self.device) # create the data object # -- pose of the cameras self._data.pos_w = torch.zeros((self._view.count, 3), device=self._device) self._data.quat_w_world = torch.zeros((self._view.count, 4), device=self._device) # -- intrinsic matrix self._data.intrinsic_matrices = torch.zeros((self._view.count, 3, 3), device=self._device) self._data.intrinsic_matrices[:, 2, 2] = 1.0 self._data.image_shape = self.image_shape # -- output data # create the buffers to store the annotator data. self._data.output = {} self._data.info = [{name: None for name in self.cfg.data_types}] * self._view.count for name in self.cfg.data_types: if name in ["distance_to_image_plane", "distance_to_camera"]: shape = (self.cfg.pattern_cfg.height, self.cfg.pattern_cfg.width, 1) elif name in ["normals"]: shape = (self.cfg.pattern_cfg.height, self.cfg.pattern_cfg.width, 3) else: raise ValueError(f"Received unknown data type: {name}. Please check the configuration.") # allocate tensor to store the data self._data.output[name] = torch.zeros((self._view.count, *shape), device=self._device) def _compute_intrinsic_matrices(self): """Computes the intrinsic matrices for the camera based on the config provided.""" # get the sensor properties pattern_cfg = self.cfg.pattern_cfg # check if vertical aperture is provided # if not then it is auto-computed based on the aspect ratio to preserve squared pixels if pattern_cfg.vertical_aperture is None: pattern_cfg.vertical_aperture = pattern_cfg.horizontal_aperture * pattern_cfg.height / pattern_cfg.width # compute the intrinsic matrix f_x = pattern_cfg.width * pattern_cfg.focal_length / pattern_cfg.horizontal_aperture f_y = pattern_cfg.height * pattern_cfg.focal_length / pattern_cfg.vertical_aperture c_x = pattern_cfg.horizontal_aperture_offset * f_x + pattern_cfg.width / 2 c_y = pattern_cfg.vertical_aperture_offset * f_y + pattern_cfg.height / 2 # allocate the intrinsic matrices self._data.intrinsic_matrices[:, 0, 0] = f_x self._data.intrinsic_matrices[:, 0, 2] = c_x self._data.intrinsic_matrices[:, 1, 1] = f_y self._data.intrinsic_matrices[:, 1, 2] = c_y # save focal length self._focal_length = pattern_cfg.focal_length def _compute_view_world_poses(self, env_ids: Sequence[int]) -> tuple[torch.Tensor, torch.Tensor]: """Obtains the pose of the view the camera is attached to in the world frame. Returns: A tuple of the position (in meters) and quaternion (w, x, y, z). """ # obtain the poses of the sensors # note: clone arg doesn't exist for xform prim view so we need to do this manually if isinstance(self._view, XFormPrimView): pos_w, quat_w = self._view.get_world_poses(env_ids) elif isinstance(self._view, physx.ArticulationView): pos_w, quat_w = self._view.get_root_transforms()[env_ids].split([3, 4], dim=-1) quat_w = math_utils.convert_quat(quat_w, to="wxyz") elif isinstance(self._view, physx.RigidBodyView): pos_w, quat_w = self._view.get_transforms()[env_ids].split([3, 4], dim=-1) quat_w = math_utils.convert_quat(quat_w, to="wxyz") else: raise RuntimeError(f"Unsupported view type: {type(self._view)}") # return the pose return pos_w.clone(), quat_w.clone() def _compute_camera_world_poses(self, env_ids: Sequence[int]) -> tuple[torch.Tensor, torch.Tensor]: """Computes the pose of the camera in the world frame. This function applies the offset pose to the pose of the view the camera is attached to. Returns: A tuple of the position (in meters) and quaternion (w, x, y, z) in "world" convention. """ # get the pose of the view the camera is attached to pos_w, quat_w = self._compute_view_world_poses(env_ids) # apply offsets # need to apply quat because offset relative to parent frame pos_w += math_utils.quat_apply(quat_w, self._offset_pos[env_ids]) quat_w = math_utils.quat_mul(quat_w, self._offset_quat[env_ids]) return pos_w, quat_w