# Copyright (c) 2022-2025, The Isaac Lab Project Developers.# All rights reserved.## SPDX-License-Identifier: BSD-3-ClausefromdataclassesimportMISSINGfromtypingimportLiteralfromisaaclab.utilsimportconfigclassfrom.rnd_cfgimportRslRlRndCfgfrom.symmetry_cfgimportRslRlSymmetryCfg
[docs]@configclassclassRslRlPpoActorCriticCfg:"""Configuration for the PPO actor-critic networks."""class_name:str="ActorCritic""""The policy class name. Default is ActorCritic."""init_noise_std:float=MISSING"""The initial noise standard deviation for the policy."""noise_std_type:Literal["scalar","log"]="scalar""""The type of noise standard deviation for the policy. Default is scalar."""actor_hidden_dims:list[int]=MISSING"""The hidden dimensions of the actor network."""critic_hidden_dims:list[int]=MISSING"""The hidden dimensions of the critic network."""activation:str=MISSING"""The activation function for the actor and critic networks."""
[docs]@configclassclassRslRlPpoAlgorithmCfg:"""Configuration for the PPO algorithm."""class_name:str="PPO""""The algorithm class name. Default is PPO."""value_loss_coef:float=MISSING"""The coefficient for the value loss."""use_clipped_value_loss:bool=MISSING"""Whether to use clipped value loss."""clip_param:float=MISSING"""The clipping parameter for the policy."""entropy_coef:float=MISSING"""The coefficient for the entropy loss."""num_learning_epochs:int=MISSING"""The number of learning epochs per update."""num_mini_batches:int=MISSING"""The number of mini-batches per update."""learning_rate:float=MISSING"""The learning rate for the policy."""schedule:str=MISSING"""The learning rate schedule."""gamma:float=MISSING"""The discount factor."""lam:float=MISSING"""The lambda parameter for Generalized Advantage Estimation (GAE)."""desired_kl:float=MISSING"""The desired KL divergence."""max_grad_norm:float=MISSING"""The maximum gradient norm."""normalize_advantage_per_mini_batch:bool=False"""Whether to normalize the advantage per mini-batch. Default is False. If True, the advantage is normalized over the entire collected trajectories. Otherwise, the advantage is normalized over the mini-batches only. """symmetry_cfg:RslRlSymmetryCfg|None=None"""The symmetry configuration. Default is None, in which case symmetry is not used."""rnd_cfg:RslRlRndCfg|None=None"""The configuration for the Random Network Distillation (RND) module. Default is None, in which case RND is not used. """
[docs]@configclassclassRslRlOnPolicyRunnerCfg:"""Configuration of the runner for on-policy algorithms."""seed:int=42"""The seed for the experiment. Default is 42."""device:str="cuda:0""""The device for the rl-agent. Default is cuda:0."""num_steps_per_env:int=MISSING"""The number of steps per environment per update."""max_iterations:int=MISSING"""The maximum number of iterations."""empirical_normalization:bool=MISSING"""Whether to use empirical normalization."""policy:RslRlPpoActorCriticCfg=MISSING"""The policy configuration."""algorithm:RslRlPpoAlgorithmCfg=MISSING"""The algorithm configuration."""clip_actions:float|None=None"""The clipping value for actions. If ``None``, then no clipping is done. .. note:: This clipping is performed inside the :class:`RslRlVecEnvWrapper` wrapper. """### Checkpointing parameters##save_interval:int=MISSING"""The number of iterations between saves."""experiment_name:str=MISSING"""The experiment name."""run_name:str="""""The run name. Default is empty string. The name of the run directory is typically the time-stamp at execution. If the run name is not empty, then it is appended to the run directory's name, i.e. the logging directory's name will become ``{time-stamp}_{run_name}``. """### Logging parameters##logger:Literal["tensorboard","neptune","wandb"]="tensorboard""""The logger to use. Default is tensorboard."""neptune_project:str="isaaclab""""The neptune project name. Default is "isaaclab"."""wandb_project:str="isaaclab""""The wandb project name. Default is "isaaclab"."""### Loading parameters##resume:bool=False"""Whether to resume. Default is False."""load_run:str=".*""""The run directory to load. Default is ".*" (all). If regex expression, the latest (alphabetical order) matching run will be loaded. """load_checkpoint:str="model_.*.pt""""The checkpoint file to load. Default is ``"model_.*.pt"`` (all). If regex expression, the latest (alphabetical order) matching file will be loaded. """