Source code for omni.isaac.lab.sensors.camera.tiled_camera
# Copyright (c) 2022-2025, The Isaac Lab Project Developers.# All rights reserved.## SPDX-License-Identifier: BSD-3-Clausefrom__future__importannotationsimportmathimportnumpyasnpimporttorchfromcollections.abcimportSequencefromtypingimportTYPE_CHECKING,Anyimportcarbimportomni.usdimportwarpaswpfromomni.isaac.core.primsimportXFormPrimViewfromomni.isaac.versionimportget_versionfrompxrimportUsdGeomfromomni.isaac.lab.utils.warp.kernelsimportreshape_tiled_imagefrom..sensor_baseimportSensorBasefrom.cameraimportCameraifTYPE_CHECKING:from.tiled_camera_cfgimportTiledCameraCfg
[docs]classTiledCamera(Camera):r"""The tiled rendering based camera sensor for acquiring the same data as the Camera class. This class inherits from the :class:`Camera` class but uses the tiled-rendering API to acquire the visual data. Tiled-rendering concatenates the rendered images from multiple cameras into a single image. This allows for rendering multiple cameras in parallel and is useful for rendering large scenes with multiple cameras efficiently. The following sensor types are supported: - ``"rgb"``: A 3-channel rendered color image. - ``"rgba"``: A 4-channel rendered color image with alpha channel. - ``"distance_to_camera"``: An image containing the distance to camera optical center. - ``"distance_to_image_plane"``: An image containing distances of 3D points from camera plane along camera's z-axis. - ``"depth"``: Alias for ``"distance_to_image_plane"``. - ``"normals"``: An image containing the local surface normal vectors at each pixel. - ``"motion_vectors"``: An image containing the motion vector data at each pixel. - ``"semantic_segmentation"``: The semantic segmentation data. - ``"instance_segmentation_fast"``: The instance segmentation data. - ``"instance_id_segmentation_fast"``: The instance id segmentation data. .. note:: Currently the following sensor types are not supported in a "view" format: - ``"instance_segmentation"``: The instance segmentation data. Please use the fast counterparts instead. - ``"instance_id_segmentation"``: The instance id segmentation data. Please use the fast counterparts instead. - ``"bounding_box_2d_tight"``: The tight 2D bounding box data (only contains non-occluded regions). - ``"bounding_box_2d_tight_fast"``: The tight 2D bounding box data (only contains non-occluded regions). - ``"bounding_box_2d_loose"``: The loose 2D bounding box data (contains occluded regions). - ``"bounding_box_2d_loose_fast"``: The loose 2D bounding box data (contains occluded regions). - ``"bounding_box_3d"``: The 3D view space bounding box data. - ``"bounding_box_3d_fast"``: The 3D view space bounding box data. .. _replicator extension: https://docs.omniverse.nvidia.com/extensions/latest/ext_replicator/annotators_details.html#annotator-output .. _USDGeom Camera: https://graphics.pixar.com/usd/docs/api/class_usd_geom_camera.html .. versionadded:: v1.0.0 This feature is available starting from Isaac Sim 4.2. Before this version, the tiled rendering APIs were not available. """cfg:TiledCameraCfg"""The configuration parameters."""
[docs]def__init__(self,cfg:TiledCameraCfg):"""Initializes the tiled camera sensor. Args: cfg: The configuration parameters. Raises: RuntimeError: If no camera prim is found at the given path. RuntimeError: If Isaac Sim version < 4.2 ValueError: If the provided data types are not supported by the camera. """isaac_sim_version=float(".".join(get_version()[2:4]))ifisaac_sim_version<4.2:raiseRuntimeError(f"TiledCamera is only available from Isaac Sim 4.2.0. Current version is {isaac_sim_version}. Please"" update to Isaac Sim 4.2.0")super().__init__(cfg)
def__del__(self):"""Unsubscribes from callbacks and detach from the replicator registry."""# unsubscribe from callbacksSensorBase.__del__(self)# detach from the replicator registryforannotatorinself._annotators.values():annotator.detach(self.render_product_paths)def__str__(self)->str:"""Returns: A string containing information about the instance."""# message for classreturn(f"Tiled Camera @ '{self.cfg.prim_path}': \n"f"\tdata types : {list(self.data.output.keys())}\n"f"\tsemantic filter : {self.cfg.semantic_filter}\n"f"\tcolorize semantic segm. : {self.cfg.colorize_semantic_segmentation}\n"f"\tcolorize instance segm. : {self.cfg.colorize_instance_segmentation}\n"f"\tcolorize instance id segm.: {self.cfg.colorize_instance_id_segmentation}\n"f"\tupdate period (s): {self.cfg.update_period}\n"f"\tshape : {self.image_shape}\n"f"\tnumber of sensors : {self._view.count}")""" Operations """
[docs]defreset(self,env_ids:Sequence[int]|None=None):ifnotself._is_initialized:raiseRuntimeError("TiledCamera could not be initialized. Please ensure --enable_cameras is used to enable rendering.")# reset the timestampsSensorBase.reset(self,env_ids)# resolve Noneifenv_idsisNone:env_ids=slice(None)# reset the frame countself._frame[env_ids]=0
""" Implementation. """def_initialize_impl(self):"""Initializes the sensor handles and internal buffers. This function creates handles and registers the provided data types with the replicator registry to be able to access the data from the sensor. It also initializes the internal buffers to store the data. Raises: RuntimeError: If the number of camera prims in the view does not match the number of environments. RuntimeError: If replicator was not found. """carb_settings_iface=carb.settings.get_settings()ifnotcarb_settings_iface.get("/isaaclab/cameras_enabled"):raiseRuntimeError("A camera was spawned without the --enable_cameras flag. Please use --enable_cameras to enable"" rendering.")importomni.replicator.coreasrep# Initialize parent classSensorBase._initialize_impl(self)# Create a view for the sensorself._view=XFormPrimView(self.cfg.prim_path,reset_xform_properties=False)self._view.initialize()# Check that sizes are correctifself._view.count!=self._num_envs:raiseRuntimeError(f"Number of camera prims in the view ({self._view.count}) does not match"f" the number of environments ({self._num_envs}).")# Create all env_ids bufferself._ALL_INDICES=torch.arange(self._view.count,device=self._device,dtype=torch.long)# Create frame count bufferself._frame=torch.zeros(self._view.count,device=self._device,dtype=torch.long)# Obtain current stagestage=omni.usd.get_context().get_stage()# Convert all encapsulated prims to Cameraforcam_prim_pathinself._view.prim_paths:# Get camera primcam_prim=stage.GetPrimAtPath(cam_prim_path)# Check if prim is a cameraifnotcam_prim.IsA(UsdGeom.Camera):raiseRuntimeError(f"Prim at path '{cam_prim_path}' is not a Camera.")# Add to listsensor_prim=UsdGeom.Camera(cam_prim)self._sensor_prims.append(sensor_prim)# Create replicator tiled render productrp=rep.create.render_product_tiled(cameras=self._view.prim_paths,tile_resolution=(self.cfg.width,self.cfg.height))self._render_product_paths=[rp.path]# WAR: use DLAA antialiasing to avoid frame offset issue at small resolutionsifself._tiling_grid_shape()[0]*self.cfg.width<265orself._tiling_grid_shape()[1]*self.cfg.height<265:rep.settings.set_render_rtx_realtime(antialiasing="DLAA")# Define the annotators based on requested data typesself._annotators=dict()forannotator_typeinself.cfg.data_types:ifannotator_type=="rgba"orannotator_type=="rgb":annotator=rep.AnnotatorRegistry.get_annotator("rgb",device=self.device,do_array_copy=False)self._annotators["rgba"]=annotatorelifannotator_type=="depth"orannotator_type=="distance_to_image_plane":# keep depth for backwards compatibilityannotator=rep.AnnotatorRegistry.get_annotator("distance_to_image_plane",device=self.device,do_array_copy=False)self._annotators[annotator_type]=annotator# note: we are verbose here to make it easier to understand the code.# if colorize is true, the data is mapped to colors and a uint8 4 channel image is returned.# if colorize is false, the data is returned as a uint32 image with ids as values.else:init_params=Noneifannotator_type=="semantic_segmentation":init_params={"colorize":self.cfg.colorize_semantic_segmentation}elifannotator_type=="instance_segmentation_fast":init_params={"colorize":self.cfg.colorize_instance_segmentation}elifannotator_type=="instance_id_segmentation_fast":init_params={"colorize":self.cfg.colorize_instance_id_segmentation}annotator=rep.AnnotatorRegistry.get_annotator(annotator_type,init_params,device=self.device,do_array_copy=False)self._annotators[annotator_type]=annotator# Attach the annotator to the render productforannotatorinself._annotators.values():annotator.attach(self._render_product_paths)# Create internal buffersself._create_buffers()def_update_buffers_impl(self,env_ids:Sequence[int]):# Increment frame countself._frame[env_ids]+=1# Extract the flattened image bufferfordata_type,annotatorinself._annotators.items():# check whether returned data is a dict (used for segmentation)output=annotator.get_data()ifisinstance(output,dict):tiled_data_buffer=output["data"]self._data.info[data_type]=output["info"]else:tiled_data_buffer=output# convert data buffer to warp arrayifisinstance(tiled_data_buffer,np.ndarray):tiled_data_buffer=wp.array(tiled_data_buffer,device=self.device,dtype=wp.uint8)else:tiled_data_buffer=tiled_data_buffer.to(device=self.device)# process data for different segmentation types# Note: Replicator returns raw buffers of dtype uint32 for segmentation types# so we need to convert them to uint8 4 channel images for colorized typesif((data_type=="semantic_segmentation"andself.cfg.colorize_semantic_segmentation)or(data_type=="instance_segmentation_fast"andself.cfg.colorize_instance_segmentation)or(data_type=="instance_id_segmentation_fast"andself.cfg.colorize_instance_id_segmentation)):tiled_data_buffer=wp.array(ptr=tiled_data_buffer.ptr,shape=(*tiled_data_buffer.shape,4),dtype=wp.uint8,device=self.device)wp.launch(kernel=reshape_tiled_image,dim=(self._view.count,self.cfg.height,self.cfg.width),inputs=[tiled_data_buffer.flatten(),wp.from_torch(self._data.output[data_type]),# zero-copy alias*list(self._data.output[data_type].shape[1:]),# height, width, num_channelsself._tiling_grid_shape()[0],# num_tiles_x],device=self.device,)# alias rgb as first 3 channels of rgbaifdata_type=="rgba"and"rgb"inself.cfg.data_types:self._data.output["rgb"]=self._data.output["rgba"][...,:3]# NOTE: The `distance_to_camera` annotator returns the distance to the camera optical center. However,# the replicator depth clipping is applied w.r.t. to the image plane which may result in values# larger than the clipping range in the output. We apply an additional clipping to ensure values# are within the clipping range for all the annotators.ifdata_type=="distance_to_camera":self._data.output[data_type][self._data.output[data_type]>self.cfg.spawn.clipping_range[1]]=torch.inf# apply defined clipping behaviorif(data_type=="distance_to_camera"ordata_type=="distance_to_image_plane"ordata_type=="depth")andself.cfg.depth_clipping_behavior!="none":self._data.output[data_type][torch.isinf(self._data.output[data_type])]=(0.0ifself.cfg.depth_clipping_behavior=="zero"elseself.cfg.spawn.clipping_range[1])""" Private Helpers """def_check_supported_data_types(self,cfg:TiledCameraCfg):"""Checks if the data types are supported by the ray-caster camera."""# check if there is any intersection in unsupported types# reason: these use np structured data types which we can't yet convert to torch tensorcommon_elements=set(cfg.data_types)&Camera.UNSUPPORTED_TYPESifcommon_elements:# provide alternative fast counterpartsfast_common_elements=[]foritemincommon_elements:if"instance_segmentation"initemor"instance_id_segmentation"initem:fast_common_elements.append(item+"_fast")# raise errorraiseValueError(f"TiledCamera class does not support the following sensor types: {common_elements}.""\n\tThis is because these sensor types output numpy structured data types which""can't be converted to torch tensors easily.""\n\tHint: If you need to work with these sensor types, we recommend using their fast counterparts."f"\n\t\tFast counterparts: {fast_common_elements}")def_create_buffers(self):"""Create buffers for storing data."""# create the data object# -- pose of the camerasself._data.pos_w=torch.zeros((self._view.count,3),device=self._device)self._data.quat_w_world=torch.zeros((self._view.count,4),device=self._device)self._update_poses(self._ALL_INDICES)# -- intrinsic matrixself._data.intrinsic_matrices=torch.zeros((self._view.count,3,3),device=self._device)self._update_intrinsic_matrices(self._ALL_INDICES)self._data.image_shape=self.image_shape# -- output datadata_dict=dict()if"rgba"inself.cfg.data_typesor"rgb"inself.cfg.data_types:data_dict["rgba"]=torch.zeros((self._view.count,self.cfg.height,self.cfg.width,4),device=self.device,dtype=torch.uint8).contiguous()if"rgb"inself.cfg.data_types:# RGB is the first 3 channels of RGBAdata_dict["rgb"]=data_dict["rgba"][...,:3]if"distance_to_image_plane"inself.cfg.data_types:data_dict["distance_to_image_plane"]=torch.zeros((self._view.count,self.cfg.height,self.cfg.width,1),device=self.device,dtype=torch.float32).contiguous()if"depth"inself.cfg.data_types:data_dict["depth"]=torch.zeros((self._view.count,self.cfg.height,self.cfg.width,1),device=self.device,dtype=torch.float32).contiguous()if"distance_to_camera"inself.cfg.data_types:data_dict["distance_to_camera"]=torch.zeros((self._view.count,self.cfg.height,self.cfg.width,1),device=self.device,dtype=torch.float32).contiguous()if"normals"inself.cfg.data_types:data_dict["normals"]=torch.zeros((self._view.count,self.cfg.height,self.cfg.width,3),device=self.device,dtype=torch.float32).contiguous()if"motion_vectors"inself.cfg.data_types:data_dict["motion_vectors"]=torch.zeros((self._view.count,self.cfg.height,self.cfg.width,2),device=self.device,dtype=torch.float32).contiguous()if"semantic_segmentation"inself.cfg.data_types:ifself.cfg.colorize_semantic_segmentation:data_dict["semantic_segmentation"]=torch.zeros((self._view.count,self.cfg.height,self.cfg.width,4),device=self.device,dtype=torch.uint8).contiguous()else:data_dict["semantic_segmentation"]=torch.zeros((self._view.count,self.cfg.height,self.cfg.width,1),device=self.device,dtype=torch.int32).contiguous()if"instance_segmentation_fast"inself.cfg.data_types:ifself.cfg.colorize_instance_segmentation:data_dict["instance_segmentation_fast"]=torch.zeros((self._view.count,self.cfg.height,self.cfg.width,4),device=self.device,dtype=torch.uint8).contiguous()else:data_dict["instance_segmentation_fast"]=torch.zeros((self._view.count,self.cfg.height,self.cfg.width,1),device=self.device,dtype=torch.int32).contiguous()if"instance_id_segmentation_fast"inself.cfg.data_types:ifself.cfg.colorize_instance_id_segmentation:data_dict["instance_id_segmentation_fast"]=torch.zeros((self._view.count,self.cfg.height,self.cfg.width,4),device=self.device,dtype=torch.uint8).contiguous()else:data_dict["instance_id_segmentation_fast"]=torch.zeros((self._view.count,self.cfg.height,self.cfg.width,1),device=self.device,dtype=torch.int32).contiguous()self._data.output=data_dictself._data.info=dict()def_tiled_image_shape(self)->tuple[int,int]:"""Returns a tuple containing the dimension of the tiled image."""cols,rows=self._tiling_grid_shape()return(self.cfg.width*cols,self.cfg.height*rows)def_tiling_grid_shape(self)->tuple[int,int]:"""Returns a tuple containing the tiling grid dimension."""cols=math.ceil(math.sqrt(self._view.count))rows=math.ceil(self._view.count/cols)return(cols,rows)def_create_annotator_data(self):# we do not need to create annotator data for the tiled camera sensorraiseRuntimeError("This function should not be called for the tiled camera sensor.")def_process_annotator_output(self,name:str,output:Any)->tuple[torch.tensor,dict|None]:# we do not need to process annotator output for the tiled camera sensorraiseRuntimeError("This function should not be called for the tiled camera sensor.")""" Internal simulation callbacks. """def_invalidate_initialize_callback(self,event):"""Invalidates the scene elements."""# call parentsuper()._invalidate_initialize_callback(event)# set all existing views to None to invalidate themself._view=None