# Copyright (c) 2022-2025, The Isaac Lab Project Developers.# All rights reserved.## SPDX-License-Identifier: BSD-3-Clause"""Common functions that can be used to enable reward functions.The functions can be passed to the :class:`isaaclab.managers.RewardTermCfg` object to includethe reward introduced by the function."""from__future__importannotationsimporttorchfromtypingimportTYPE_CHECKINGfromisaaclab.assetsimportArticulation,RigidObjectfromisaaclab.managersimportSceneEntityCfgfromisaaclab.managers.manager_baseimportManagerTermBasefromisaaclab.managers.manager_term_cfgimportRewardTermCfgfromisaaclab.sensorsimportContactSensor,RayCasterifTYPE_CHECKING:fromisaaclab.envsimportManagerBasedRLEnv"""General."""
[docs]defis_alive(env:ManagerBasedRLEnv)->torch.Tensor:"""Reward for being alive."""return(~env.termination_manager.terminated).float()
[docs]defis_terminated(env:ManagerBasedRLEnv)->torch.Tensor:"""Penalize terminated episodes that don't correspond to episodic timeouts."""returnenv.termination_manager.terminated.float()
[docs]classis_terminated_term(ManagerTermBase):"""Penalize termination for specific terms that don't correspond to episodic timeouts. The parameters are as follows: * attr:`term_keys`: The termination terms to penalize. This can be a string, a list of strings or regular expressions. Default is ".*" which penalizes all terminations. The reward is computed as the sum of the termination terms that are not episodic timeouts. This means that the reward is 0 if the episode is terminated due to an episodic timeout. Otherwise, if two termination terms are active, the reward is 2. """
[docs]def__init__(self,cfg:RewardTermCfg,env:ManagerBasedRLEnv):# initialize the base classsuper().__init__(cfg,env)# find and store the termination termsterm_keys=cfg.params.get("term_keys",".*")self._term_names=env.termination_manager.find_terms(term_keys)
def__call__(self,env:ManagerBasedRLEnv,term_keys:str|list[str]=".*")->torch.Tensor:# Return the unweighted reward for the termination termsreset_buf=torch.zeros(env.num_envs,device=env.device)forterminself._term_names:# Sums over terminations term values to account for multiple terminations in the same stepreset_buf+=env.termination_manager.get_term(term)return(reset_buf*(~env.termination_manager.time_outs)).float()
"""Root penalties."""
[docs]deflin_vel_z_l2(env:ManagerBasedRLEnv,asset_cfg:SceneEntityCfg=SceneEntityCfg("robot"))->torch.Tensor:"""Penalize z-axis base linear velocity using L2 squared kernel."""# extract the used quantities (to enable type-hinting)asset:RigidObject=env.scene[asset_cfg.name]returntorch.square(asset.data.root_lin_vel_b[:,2])
[docs]defang_vel_xy_l2(env:ManagerBasedRLEnv,asset_cfg:SceneEntityCfg=SceneEntityCfg("robot"))->torch.Tensor:"""Penalize xy-axis base angular velocity using L2 squared kernel."""# extract the used quantities (to enable type-hinting)asset:RigidObject=env.scene[asset_cfg.name]returntorch.sum(torch.square(asset.data.root_ang_vel_b[:,:2]),dim=1)
[docs]defflat_orientation_l2(env:ManagerBasedRLEnv,asset_cfg:SceneEntityCfg=SceneEntityCfg("robot"))->torch.Tensor:"""Penalize non-flat base orientation using L2 squared kernel. This is computed by penalizing the xy-components of the projected gravity vector. """# extract the used quantities (to enable type-hinting)asset:RigidObject=env.scene[asset_cfg.name]returntorch.sum(torch.square(asset.data.projected_gravity_b[:,:2]),dim=1)
[docs]defbase_height_l2(env:ManagerBasedRLEnv,target_height:float,asset_cfg:SceneEntityCfg=SceneEntityCfg("robot"),sensor_cfg:SceneEntityCfg|None=None,)->torch.Tensor:"""Penalize asset height from its target using L2 squared kernel. Note: For flat terrain, target height is in the world frame. For rough terrain, sensor readings can adjust the target height to account for the terrain. """# extract the used quantities (to enable type-hinting)asset:RigidObject=env.scene[asset_cfg.name]ifsensor_cfgisnotNone:sensor:RayCaster=env.scene[sensor_cfg.name]# Adjust the target height using the sensor dataadjusted_target_height=target_height+torch.mean(sensor.data.ray_hits_w[...,2],dim=1)else:# Use the provided target height directly for flat terrainadjusted_target_height=target_height# Compute the L2 squared penaltyreturntorch.square(asset.data.root_pos_w[:,2]-adjusted_target_height)
[docs]defbody_lin_acc_l2(env:ManagerBasedRLEnv,asset_cfg:SceneEntityCfg=SceneEntityCfg("robot"))->torch.Tensor:"""Penalize the linear acceleration of bodies using L2-kernel."""asset:Articulation=env.scene[asset_cfg.name]returntorch.sum(torch.norm(asset.data.body_lin_acc_w[:,asset_cfg.body_ids,:],dim=-1),dim=1)
"""Joint penalties."""
[docs]defjoint_torques_l2(env:ManagerBasedRLEnv,asset_cfg:SceneEntityCfg=SceneEntityCfg("robot"))->torch.Tensor:"""Penalize joint torques applied on the articulation using L2 squared kernel. NOTE: Only the joints configured in :attr:`asset_cfg.joint_ids` will have their joint torques contribute to the term. """# extract the used quantities (to enable type-hinting)asset:Articulation=env.scene[asset_cfg.name]returntorch.sum(torch.square(asset.data.applied_torque[:,asset_cfg.joint_ids]),dim=1)
[docs]defjoint_vel_l1(env:ManagerBasedRLEnv,asset_cfg:SceneEntityCfg)->torch.Tensor:"""Penalize joint velocities on the articulation using an L1-kernel."""# extract the used quantities (to enable type-hinting)asset:Articulation=env.scene[asset_cfg.name]returntorch.sum(torch.abs(asset.data.joint_vel[:,asset_cfg.joint_ids]),dim=1)
[docs]defjoint_vel_l2(env:ManagerBasedRLEnv,asset_cfg:SceneEntityCfg=SceneEntityCfg("robot"))->torch.Tensor:"""Penalize joint velocities on the articulation using L2 squared kernel. NOTE: Only the joints configured in :attr:`asset_cfg.joint_ids` will have their joint velocities contribute to the term. """# extract the used quantities (to enable type-hinting)asset:Articulation=env.scene[asset_cfg.name]returntorch.sum(torch.square(asset.data.joint_vel[:,asset_cfg.joint_ids]),dim=1)
[docs]defjoint_acc_l2(env:ManagerBasedRLEnv,asset_cfg:SceneEntityCfg=SceneEntityCfg("robot"))->torch.Tensor:"""Penalize joint accelerations on the articulation using L2 squared kernel. NOTE: Only the joints configured in :attr:`asset_cfg.joint_ids` will have their joint accelerations contribute to the term. """# extract the used quantities (to enable type-hinting)asset:Articulation=env.scene[asset_cfg.name]returntorch.sum(torch.square(asset.data.joint_acc[:,asset_cfg.joint_ids]),dim=1)
[docs]defjoint_deviation_l1(env:ManagerBasedRLEnv,asset_cfg:SceneEntityCfg=SceneEntityCfg("robot"))->torch.Tensor:"""Penalize joint positions that deviate from the default one."""# extract the used quantities (to enable type-hinting)asset:Articulation=env.scene[asset_cfg.name]# compute out of limits constraintsangle=asset.data.joint_pos[:,asset_cfg.joint_ids]-asset.data.default_joint_pos[:,asset_cfg.joint_ids]returntorch.sum(torch.abs(angle),dim=1)
[docs]defjoint_pos_limits(env:ManagerBasedRLEnv,asset_cfg:SceneEntityCfg=SceneEntityCfg("robot"))->torch.Tensor:"""Penalize joint positions if they cross the soft limits. This is computed as a sum of the absolute value of the difference between the joint position and the soft limits. """# extract the used quantities (to enable type-hinting)asset:Articulation=env.scene[asset_cfg.name]# compute out of limits constraintsout_of_limits=-(asset.data.joint_pos[:,asset_cfg.joint_ids]-asset.data.soft_joint_pos_limits[:,asset_cfg.joint_ids,0]).clip(max=0.0)out_of_limits+=(asset.data.joint_pos[:,asset_cfg.joint_ids]-asset.data.soft_joint_pos_limits[:,asset_cfg.joint_ids,1]).clip(min=0.0)returntorch.sum(out_of_limits,dim=1)
[docs]defjoint_vel_limits(env:ManagerBasedRLEnv,soft_ratio:float,asset_cfg:SceneEntityCfg=SceneEntityCfg("robot"))->torch.Tensor:"""Penalize joint velocities if they cross the soft limits. This is computed as a sum of the absolute value of the difference between the joint velocity and the soft limits. Args: soft_ratio: The ratio of the soft limits to be used. """# extract the used quantities (to enable type-hinting)asset:Articulation=env.scene[asset_cfg.name]# compute out of limits constraintsout_of_limits=(torch.abs(asset.data.joint_vel[:,asset_cfg.joint_ids])-asset.data.soft_joint_vel_limits[:,asset_cfg.joint_ids]*soft_ratio)# clip to max error = 1 rad/s per joint to avoid huge penaltiesout_of_limits=out_of_limits.clip_(min=0.0,max=1.0)returntorch.sum(out_of_limits,dim=1)
"""Action penalties."""
[docs]defapplied_torque_limits(env:ManagerBasedRLEnv,asset_cfg:SceneEntityCfg=SceneEntityCfg("robot"))->torch.Tensor:"""Penalize applied torques if they cross the limits. This is computed as a sum of the absolute value of the difference between the applied torques and the limits. .. caution:: Currently, this only works for explicit actuators since we manually compute the applied torques. For implicit actuators, we currently cannot retrieve the applied torques from the physics engine. """# extract the used quantities (to enable type-hinting)asset:Articulation=env.scene[asset_cfg.name]# compute out of limits constraints# TODO: We need to fix this to support implicit joints.out_of_limits=torch.abs(asset.data.applied_torque[:,asset_cfg.joint_ids]-asset.data.computed_torque[:,asset_cfg.joint_ids])returntorch.sum(out_of_limits,dim=1)
[docs]defaction_rate_l2(env:ManagerBasedRLEnv)->torch.Tensor:"""Penalize the rate of change of the actions using L2 squared kernel."""returntorch.sum(torch.square(env.action_manager.action-env.action_manager.prev_action),dim=1)
[docs]defaction_l2(env:ManagerBasedRLEnv)->torch.Tensor:"""Penalize the actions using L2 squared kernel."""returntorch.sum(torch.square(env.action_manager.action),dim=1)
"""Contact sensor."""
[docs]defundesired_contacts(env:ManagerBasedRLEnv,threshold:float,sensor_cfg:SceneEntityCfg)->torch.Tensor:"""Penalize undesired contacts as the number of violations that are above a threshold."""# extract the used quantities (to enable type-hinting)contact_sensor:ContactSensor=env.scene.sensors[sensor_cfg.name]# check if contact force is above thresholdnet_contact_forces=contact_sensor.data.net_forces_w_historyis_contact=torch.max(torch.norm(net_contact_forces[:,:,sensor_cfg.body_ids],dim=-1),dim=1)[0]>threshold# sum over contacts for each environmentreturntorch.sum(is_contact,dim=1)
[docs]defcontact_forces(env:ManagerBasedRLEnv,threshold:float,sensor_cfg:SceneEntityCfg)->torch.Tensor:"""Penalize contact forces as the amount of violations of the net contact force."""# extract the used quantities (to enable type-hinting)contact_sensor:ContactSensor=env.scene.sensors[sensor_cfg.name]net_contact_forces=contact_sensor.data.net_forces_w_history# compute the violationviolation=torch.max(torch.norm(net_contact_forces[:,:,sensor_cfg.body_ids],dim=-1),dim=1)[0]-threshold# compute the penaltyreturntorch.sum(violation.clip(min=0.0),dim=1)
"""Velocity-tracking rewards."""
[docs]deftrack_lin_vel_xy_exp(env:ManagerBasedRLEnv,std:float,command_name:str,asset_cfg:SceneEntityCfg=SceneEntityCfg("robot"))->torch.Tensor:"""Reward tracking of linear velocity commands (xy axes) using exponential kernel."""# extract the used quantities (to enable type-hinting)asset:RigidObject=env.scene[asset_cfg.name]# compute the errorlin_vel_error=torch.sum(torch.square(env.command_manager.get_command(command_name)[:,:2]-asset.data.root_lin_vel_b[:,:2]),dim=1,)returntorch.exp(-lin_vel_error/std**2)
[docs]deftrack_ang_vel_z_exp(env:ManagerBasedRLEnv,std:float,command_name:str,asset_cfg:SceneEntityCfg=SceneEntityCfg("robot"))->torch.Tensor:"""Reward tracking of angular velocity commands (yaw) using exponential kernel."""# extract the used quantities (to enable type-hinting)asset:RigidObject=env.scene[asset_cfg.name]# compute the errorang_vel_error=torch.square(env.command_manager.get_command(command_name)[:,2]-asset.data.root_ang_vel_b[:,2])returntorch.exp(-ang_vel_error/std**2)