Source code for omni.isaac.lab_tasks.utils.wrappers.rsl_rl.rl_cfg
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from dataclasses import MISSING
from typing import Literal
from omni.isaac.lab.utils import configclass
[docs]@configclass
class RslRlPpoActorCriticCfg:
"""Configuration for the PPO actor-critic networks."""
class_name: str = "ActorCritic"
"""The policy class name. Default is ActorCritic."""
init_noise_std: float = MISSING
"""The initial noise standard deviation for the policy."""
actor_hidden_dims: list[int] = MISSING
"""The hidden dimensions of the actor network."""
critic_hidden_dims: list[int] = MISSING
"""The hidden dimensions of the critic network."""
activation: str = MISSING
"""The activation function for the actor and critic networks."""
[docs]@configclass
class RslRlPpoAlgorithmCfg:
"""Configuration for the PPO algorithm."""
class_name: str = "PPO"
"""The algorithm class name. Default is PPO."""
value_loss_coef: float = MISSING
"""The coefficient for the value loss."""
use_clipped_value_loss: bool = MISSING
"""Whether to use clipped value loss."""
clip_param: float = MISSING
"""The clipping parameter for the policy."""
entropy_coef: float = MISSING
"""The coefficient for the entropy loss."""
num_learning_epochs: int = MISSING
"""The number of learning epochs per update."""
num_mini_batches: int = MISSING
"""The number of mini-batches per update."""
learning_rate: float = MISSING
"""The learning rate for the policy."""
schedule: str = MISSING
"""The learning rate schedule."""
gamma: float = MISSING
"""The discount factor."""
lam: float = MISSING
"""The lambda parameter for Generalized Advantage Estimation (GAE)."""
desired_kl: float = MISSING
"""The desired KL divergence."""
max_grad_norm: float = MISSING
"""The maximum gradient norm."""
[docs]@configclass
class RslRlOnPolicyRunnerCfg:
"""Configuration of the runner for on-policy algorithms."""
seed: int = 42
"""The seed for the experiment. Default is 42."""
device: str = "cuda:0"
"""The device for the rl-agent. Default is cuda:0."""
num_steps_per_env: int = MISSING
"""The number of steps per environment per update."""
max_iterations: int = MISSING
"""The maximum number of iterations."""
empirical_normalization: bool = MISSING
"""Whether to use empirical normalization."""
policy: RslRlPpoActorCriticCfg = MISSING
"""The policy configuration."""
algorithm: RslRlPpoAlgorithmCfg = MISSING
"""The algorithm configuration."""
##
# Checkpointing parameters
##
save_interval: int = MISSING
"""The number of iterations between saves."""
experiment_name: str = MISSING
"""The experiment name."""
run_name: str = ""
"""The run name. Default is empty string.
The name of the run directory is typically the time-stamp at execution. If the run name is not empty,
then it is appended to the run directory's name, i.e. the logging directory's name will become
``{time-stamp}_{run_name}``.
"""
##
# Logging parameters
##
logger: Literal["tensorboard", "neptune", "wandb"] = "tensorboard"
"""The logger to use. Default is tensorboard."""
neptune_project: str = "isaaclab"
"""The neptune project name. Default is "isaaclab"."""
wandb_project: str = "isaaclab"
"""The wandb project name. Default is "isaaclab"."""
##
# Loading parameters
##
resume: bool = False
"""Whether to resume. Default is False."""
load_run: str = ".*"
"""The run directory to load. Default is ".*" (all).
If regex expression, the latest (alphabetical order) matching run will be loaded.
"""
load_checkpoint: str = "model_.*.pt"
"""The checkpoint file to load. Default is ``"model_.*.pt"`` (all).
If regex expression, the latest (alphabetical order) matching file will be loaded.
"""