Saving rendered images and 3D re-projection

Saving rendered images and 3D re-projection#

This guide accompanied with the run_usd_camera.py script in the IsaacLab/source/standalone/tutorials/04_sensors directory.

Code for run_usd_camera.py
  1# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
  2# All rights reserved.
  3#
  4# SPDX-License-Identifier: BSD-3-Clause
  5
  6"""
  7This script shows how to use the camera sensor from the Isaac Lab framework.
  8
  9The camera sensor is created and interfaced through the Omniverse Replicator API. However, instead of using
 10the simulator or OpenGL convention for the camera, we use the robotics or ROS convention.
 11
 12.. code-block:: bash
 13
 14    # Usage with GUI
 15    ./isaaclab.sh -p source/standalone/tutorials/04_sensors/run_usd_camera.py
 16
 17    # Usage with headless
 18    ./isaaclab.sh -p source/standalone/tutorials/04_sensors/run_usd_camera.py --headless --enable_cameras
 19
 20"""
 21
 22"""Launch Isaac Sim Simulator first."""
 23
 24import argparse
 25
 26from omni.isaac.lab.app import AppLauncher
 27
 28# add argparse arguments
 29parser = argparse.ArgumentParser(description="This script demonstrates how to use the camera sensor.")
 30parser.add_argument("--cpu", action="store_true", default=False, help="Use CPU device for camera output.")
 31parser.add_argument(
 32    "--draw",
 33    action="store_true",
 34    default=False,
 35    help="Draw the pointcloud from camera at index specified by ``--camera_id``.",
 36)
 37parser.add_argument(
 38    "--save",
 39    action="store_true",
 40    default=False,
 41    help="Save the data from camera at index specified by ``--camera_id``.",
 42)
 43parser.add_argument(
 44    "--camera_id",
 45    type=int,
 46    choices={0, 1},
 47    default=0,
 48    help=(
 49        "The camera ID to use for displaying points or saving the camera data. Default is 0."
 50        " The viewport will always initialize with the perspective of camera 0."
 51    ),
 52)
 53# append AppLauncher cli args
 54AppLauncher.add_app_launcher_args(parser)
 55# parse the arguments
 56args_cli = parser.parse_args()
 57args_cli.enable_cameras = True
 58# launch omniverse app
 59app_launcher = AppLauncher(args_cli)
 60simulation_app = app_launcher.app
 61
 62"""Rest everything follows."""
 63
 64import numpy as np
 65import os
 66import random
 67import torch
 68
 69import omni.isaac.core.utils.prims as prim_utils
 70import omni.replicator.core as rep
 71
 72import omni.isaac.lab.sim as sim_utils
 73from omni.isaac.lab.assets import RigidObject, RigidObjectCfg
 74from omni.isaac.lab.markers import VisualizationMarkers
 75from omni.isaac.lab.markers.config import RAY_CASTER_MARKER_CFG
 76from omni.isaac.lab.sensors.camera import Camera, CameraCfg
 77from omni.isaac.lab.sensors.camera.utils import create_pointcloud_from_depth
 78from omni.isaac.lab.utils import convert_dict_to_backend
 79
 80
 81def define_sensor() -> Camera:
 82    """Defines the camera sensor to add to the scene."""
 83    # Setup camera sensor
 84    # In contrast to the ray-cast camera, we spawn the prim at these locations.
 85    # This means the camera sensor will be attached to these prims.
 86    prim_utils.create_prim("/World/Origin_00", "Xform")
 87    prim_utils.create_prim("/World/Origin_01", "Xform")
 88    camera_cfg = CameraCfg(
 89        prim_path="/World/Origin_.*/CameraSensor",
 90        update_period=0,
 91        height=480,
 92        width=640,
 93        data_types=[
 94            "rgb",
 95            "distance_to_image_plane",
 96            "normals",
 97            "semantic_segmentation",
 98            "instance_segmentation_fast",
 99            "instance_id_segmentation_fast",
100        ],
101        colorize_semantic_segmentation=True,
102        colorize_instance_id_segmentation=True,
103        colorize_instance_segmentation=True,
104        spawn=sim_utils.PinholeCameraCfg(
105            focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1.0e5)
106        ),
107    )
108    # Create camera
109    camera = Camera(cfg=camera_cfg)
110
111    return camera
112
113
114def design_scene() -> dict:
115    """Design the scene."""
116    # Populate scene
117    # -- Ground-plane
118    cfg = sim_utils.GroundPlaneCfg()
119    cfg.func("/World/defaultGroundPlane", cfg)
120    # -- Lights
121    cfg = sim_utils.DistantLightCfg(intensity=3000.0, color=(0.75, 0.75, 0.75))
122    cfg.func("/World/Light", cfg)
123
124    # Create a dictionary for the scene entities
125    scene_entities = {}
126
127    # Xform to hold objects
128    prim_utils.create_prim("/World/Objects", "Xform")
129    # Random objects
130    for i in range(8):
131        # sample random position
132        position = np.random.rand(3) - np.asarray([0.05, 0.05, -1.0])
133        position *= np.asarray([1.5, 1.5, 0.5])
134        # sample random color
135        color = (random.random(), random.random(), random.random())
136        # choose random prim type
137        prim_type = random.choice(["Cube", "Cone", "Cylinder"])
138        common_properties = {
139            "rigid_props": sim_utils.RigidBodyPropertiesCfg(),
140            "mass_props": sim_utils.MassPropertiesCfg(mass=5.0),
141            "collision_props": sim_utils.CollisionPropertiesCfg(),
142            "visual_material": sim_utils.PreviewSurfaceCfg(diffuse_color=color, metallic=0.5),
143            "semantic_tags": [("class", prim_type)],
144        }
145        if prim_type == "Cube":
146            shape_cfg = sim_utils.CuboidCfg(size=(0.25, 0.25, 0.25), **common_properties)
147        elif prim_type == "Cone":
148            shape_cfg = sim_utils.ConeCfg(radius=0.1, height=0.25, **common_properties)
149        elif prim_type == "Cylinder":
150            shape_cfg = sim_utils.CylinderCfg(radius=0.25, height=0.25, **common_properties)
151        # Rigid Object
152        obj_cfg = RigidObjectCfg(
153            prim_path=f"/World/Objects/Obj_{i:02d}",
154            spawn=shape_cfg,
155            init_state=RigidObjectCfg.InitialStateCfg(pos=position),
156        )
157        scene_entities[f"rigid_object{i}"] = RigidObject(cfg=obj_cfg)
158
159    # Sensors
160    camera = define_sensor()
161
162    # return the scene information
163    scene_entities["camera"] = camera
164    return scene_entities
165
166
167def run_simulator(sim: sim_utils.SimulationContext, scene_entities: dict):
168    """Run the simulator."""
169    # extract entities for simplified notation
170    camera: Camera = scene_entities["camera"]
171
172    # Create replicator writer
173    output_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "output", "camera")
174    rep_writer = rep.BasicWriter(
175        output_dir=output_dir,
176        frame_padding=0,
177        colorize_instance_id_segmentation=camera.cfg.colorize_instance_id_segmentation,
178        colorize_instance_segmentation=camera.cfg.colorize_instance_segmentation,
179        colorize_semantic_segmentation=camera.cfg.colorize_semantic_segmentation,
180    )
181
182    # Camera positions, targets, orientations
183    camera_positions = torch.tensor([[2.5, 2.5, 2.5], [-2.5, -2.5, 2.5]], device=sim.device)
184    camera_targets = torch.tensor([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], device=sim.device)
185    # These orientations are in ROS-convention, and will position the cameras to view the origin
186    camera_orientations = torch.tensor(  # noqa: F841
187        [[-0.1759, 0.3399, 0.8205, -0.4247], [-0.4247, 0.8205, -0.3399, 0.1759]], device=sim.device
188    )
189
190    # Set pose: There are two ways to set the pose of the camera.
191    # -- Option-1: Set pose using view
192    camera.set_world_poses_from_view(camera_positions, camera_targets)
193    # -- Option-2: Set pose using ROS
194    # camera.set_world_poses(camera_positions, camera_orientations, convention="ros")
195
196    # Index of the camera to use for visualization and saving
197    camera_index = args_cli.camera_id
198
199    # Create the markers for the --draw option outside of is_running() loop
200    if sim.has_gui() and args_cli.draw:
201        cfg = RAY_CASTER_MARKER_CFG.replace(prim_path="/Visuals/CameraPointCloud")
202        cfg.markers["hit"].radius = 0.002
203        pc_markers = VisualizationMarkers(cfg)
204
205    # Simulate physics
206    while simulation_app.is_running():
207        # Step simulation
208        sim.step()
209        # Update camera data
210        camera.update(dt=sim.get_physics_dt())
211
212        # Print camera info
213        print(camera)
214        if "rgb" in camera.data.output.keys():
215            print("Received shape of rgb image        : ", camera.data.output["rgb"].shape)
216        if "distance_to_image_plane" in camera.data.output.keys():
217            print("Received shape of depth image      : ", camera.data.output["distance_to_image_plane"].shape)
218        if "normals" in camera.data.output.keys():
219            print("Received shape of normals          : ", camera.data.output["normals"].shape)
220        if "semantic_segmentation" in camera.data.output.keys():
221            print("Received shape of semantic segm.   : ", camera.data.output["semantic_segmentation"].shape)
222        if "instance_segmentation_fast" in camera.data.output.keys():
223            print("Received shape of instance segm.   : ", camera.data.output["instance_segmentation_fast"].shape)
224        if "instance_id_segmentation_fast" in camera.data.output.keys():
225            print("Received shape of instance id segm.: ", camera.data.output["instance_id_segmentation_fast"].shape)
226        print("-------------------------------")
227
228        # Extract camera data
229        if args_cli.save:
230            # Save images from camera at camera_index
231            # note: BasicWriter only supports saving data in numpy format, so we need to convert the data to numpy.
232            # tensordict allows easy indexing of tensors in the dictionary
233            single_cam_data = convert_dict_to_backend(camera.data.output[camera_index], backend="numpy")
234
235            # Extract the other information
236            single_cam_info = camera.data.info[camera_index]
237
238            # Pack data back into replicator format to save them using its writer
239            if sim.get_version()[0] == 4:
240                rep_output = {"annotators": {}}
241                for key, data, info in zip(single_cam_data.keys(), single_cam_data.values(), single_cam_info.values()):
242                    if info is not None:
243                        rep_output["annotators"][key] = {"render_product": {"data": data, **info}}
244                    else:
245                        rep_output["annotators"][key] = {"render_product": {"data": data}}
246            else:
247                rep_output = dict()
248                for key, data, info in zip(single_cam_data.keys(), single_cam_data.values(), single_cam_info.values()):
249                    if info is not None:
250                        rep_output[key] = {"data": data, "info": info}
251                    else:
252                        rep_output[key] = data
253            # Save images
254            # Note: We need to provide On-time data for Replicator to save the images.
255            rep_output["trigger_outputs"] = {"on_time": camera.frame[camera_index]}
256            rep_writer.write(rep_output)
257
258        # Draw pointcloud if there is a GUI and --draw has been passed
259        if sim.has_gui() and args_cli.draw and "distance_to_image_plane" in camera.data.output.keys():
260            # Derive pointcloud from camera at camera_index
261            pointcloud = create_pointcloud_from_depth(
262                intrinsic_matrix=camera.data.intrinsic_matrices[camera_index],
263                depth=camera.data.output[camera_index]["distance_to_image_plane"],
264                position=camera.data.pos_w[camera_index],
265                orientation=camera.data.quat_w_ros[camera_index],
266                device=sim.device,
267            )
268
269            # In the first few steps, things are still being instanced and Camera.data
270            # can be empty. If we attempt to visualize an empty pointcloud it will crash
271            # the sim, so we check that the pointcloud is not empty.
272            if pointcloud.size()[0] > 0:
273                pc_markers.visualize(translations=pointcloud)
274
275
276def main():
277    """Main function."""
278    # Load simulation context
279    sim_cfg = sim_utils.SimulationCfg(device="cpu" if args_cli.cpu else "cuda")
280    sim = sim_utils.SimulationContext(sim_cfg)
281    # Set main camera
282    sim.set_camera_view([2.5, 2.5, 2.5], [0.0, 0.0, 0.0])
283    # design the scene
284    scene_entities = design_scene()
285    # Play simulator
286    sim.reset()
287    # Now we are ready!
288    print("[INFO]: Setup complete...")
289    # Run simulator
290    run_simulator(sim, scene_entities)
291
292
293if __name__ == "__main__":
294    # run the main function
295    main()
296    # close sim app
297    simulation_app.close()

Saving using Replicator Basic Writer#

To save camera outputs, we use the basic write class from Omniverse Replicator. This class allows us to save the images in a numpy format. For more information on the basic writer, please check the documentation.

    rep_writer = rep.BasicWriter(
        output_dir=output_dir,
        frame_padding=0,
        colorize_instance_id_segmentation=camera.cfg.colorize_instance_id_segmentation,
        colorize_instance_segmentation=camera.cfg.colorize_instance_segmentation,
        colorize_semantic_segmentation=camera.cfg.colorize_semantic_segmentation,
    )

While stepping the simulator, the images can be saved to the defined folder. Since the BasicWriter only supports saving data using NumPy format, we first need to convert the PyTorch sensors to NumPy arrays before packing them in a dictionary.

            # Save images from camera at camera_index
            # note: BasicWriter only supports saving data in numpy format, so we need to convert the data to numpy.
            # tensordict allows easy indexing of tensors in the dictionary
            single_cam_data = convert_dict_to_backend(camera.data.output[camera_index], backend="numpy")

            # Extract the other information
            single_cam_info = camera.data.info[camera_index]

After this step, we can save the images using the BasicWriter.

            # Pack data back into replicator format to save them using its writer
            if sim.get_version()[0] == 4:
                rep_output = {"annotators": {}}
                for key, data, info in zip(single_cam_data.keys(), single_cam_data.values(), single_cam_info.values()):
                    if info is not None:
                        rep_output["annotators"][key] = {"render_product": {"data": data, **info}}
                    else:
                        rep_output["annotators"][key] = {"render_product": {"data": data}}
            else:
                rep_output = dict()
                for key, data, info in zip(single_cam_data.keys(), single_cam_data.values(), single_cam_info.values()):
                    if info is not None:
                        rep_output[key] = {"data": data, "info": info}
                    else:
                        rep_output[key] = data
            # Save images
            # Note: We need to provide On-time data for Replicator to save the images.
            rep_output["trigger_outputs"] = {"on_time": camera.frame[camera_index]}
            rep_writer.write(rep_output)

Projection into 3D Space#

We include utilities to project the depth image into 3D Space. The re-projection operations are done using PyTorch operations which allows faster computation.

from omni.isaac.lab.utils.math import transform_points, unproject_depth

# Pointcloud in world frame
points_3d_cam = unproject_depth(
   camera.data.output["distance_to_image_plane"], camera.data.intrinsic_matrices
)

points_3d_world = transform_points(points_3d_cam, camera.data.pos_w, camera.data.quat_w_ros)

Alternately, we can use the omni.isaac.lab.sensors.camera.utils.create_pointcloud_from_depth() function to create a point cloud from the depth image and transform it to the world frame.

            # Derive pointcloud from camera at camera_index
            pointcloud = create_pointcloud_from_depth(
                intrinsic_matrix=camera.data.intrinsic_matrices[camera_index],
                depth=camera.data.output[camera_index]["distance_to_image_plane"],
                position=camera.data.pos_w[camera_index],
                orientation=camera.data.quat_w_ros[camera_index],
                device=sim.device,
            )

The resulting point cloud can be visualized using the omni.isaac.debug_draw extension from Isaac Sim. This makes it easy to visualize the point cloud in the 3D space.

            # In the first few steps, things are still being instanced and Camera.data
            # can be empty. If we attempt to visualize an empty pointcloud it will crash
            # the sim, so we check that the pointcloud is not empty.
            if pointcloud.size()[0] > 0:
                pc_markers.visualize(translations=pointcloud)

Executing the script#

To run the accompanying script, execute the following command:

# Usage with saving and drawing
./isaaclab.sh -p source/standalone/tutorials/04_sensors/run_usd_camera.py --save --draw

# Usage with saving only in headless mode
./isaaclab.sh -p source/standalone/tutorials/04_sensors/run_usd_camera.py --save --headless --enable_cameras

The simulation should start, and you can observe different objects falling down. An output folder will be created in the IsaacLab/source/standalone/tutorials/04_sensors directory, where the images will be saved. Additionally, you should see the point cloud in the 3D space drawn on the viewport.

To stop the simulation, close the window, press the STOP button in the UI, or use Ctrl+C in the terminal.