Source code for isaaclab_rl.rsl_rl.utils

# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

from __future__ import annotations

from dataclasses import MISSING
from typing import TYPE_CHECKING

from packaging import version

if TYPE_CHECKING:
    from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg


_V4_0_0 = version.parse("4.0.0")
_V5_0_0 = version.parse("5.0.0")
_MODEL_CFG_NAMES = ("actor", "critic", "student", "teacher")



[docs]
def handle_deprecated_rsl_rl_cfg(agent_cfg: RslRlBaseRunnerCfg, installed_version) -> RslRlBaseRunnerCfg:
    """Handle deprecated RSL-RL configurations across version boundaries.

    This function mutates ``agent_cfg`` to keep configurations compatible with the installed ``rsl-rl`` version:

    - For ``rsl-rl < 4.0.0``, ``policy`` is required; new model configs (``actor``, ``critic``, ``student``,
        ``teacher``) are ignored and cleared.
    - For ``rsl-rl >= 4.0.0``, deprecated ``policy`` can be used to infer missing model configs, then ``policy`` is
        cleared.
    - For ``rsl-rl >= 5.0.0``, legacy stochastic parameters are migrated to ``distribution_cfg`` when needed; for
        ``4.0.0 <= rsl-rl < 5.0.0``, those legacy parameters are validated instead.

    Raises:
        ValueError: If required legacy parameters are missing for the selected ``rsl-rl`` version.
    """
    installed_version = version.parse(installed_version)

    # Handle configurations for rsl-rl < 4.0.0
    if installed_version < _V4_0_0:
        # exit if no policy configuration is present
        if not hasattr(agent_cfg, "policy") or _is_missing(agent_cfg.policy):
            raise ValueError(
                "The `policy` configuration is required for rsl-rl < 4.0.0. Please specify the `policy` configuration"
                " or update rsl-rl."
            )

        # handle deprecated obs_normalization argument
        if _has_non_missing_attr(agent_cfg, "empirical_normalization"):
            _handle_empirical_normalization(agent_cfg.policy, agent_cfg)

        # remove optimizer argument for PPO only available in rsl-rl >= 4.0.0
        from isaaclab_rl.rsl_rl import RslRlPpoAlgorithmCfg

        if hasattr(agent_cfg.algorithm, "optimizer") and isinstance(agent_cfg.algorithm, RslRlPpoAlgorithmCfg):
            if agent_cfg.algorithm.optimizer != "adam":
                print(
                    "[WARNING]: The `optimizer` parameter for PPO is only available for rsl-rl >= 4.0.0. Consider"
                    " updating rsl-rl to use this feature. Defaulting to `adam` optimizer."
                )
            del agent_cfg.algorithm.optimizer

        # warn about model configurations only used in rsl-rl >= 4.0.0
        for model_name in _MODEL_CFG_NAMES:
            if _has_non_missing_attr(agent_cfg, model_name):
                _clear_new_model_cfg(agent_cfg, model_name)

    # Handle configurations for rsl-rl >= 4.0.0
    else:
        # Handle deprecated policy configuration
        if _has_non_missing_attr(agent_cfg, "policy"):
            print(
                "[WARNING]: The `policy` configuration is deprecated for rsl-rl >= 4.0.0. Please use, e.g., `actor` and"
                " `critic` model configurations instead."
            )

            # handle deprecated obs_normalization argument
            if _has_non_missing_attr(agent_cfg, "empirical_normalization"):
                _handle_empirical_normalization(agent_cfg.policy, agent_cfg)

            # import old and new config classes
            from isaaclab_rl.rsl_rl import (
                RslRlDistillationStudentTeacherCfg,
                RslRlDistillationStudentTeacherRecurrentCfg,
                RslRlMLPModelCfg,
                RslRlPpoActorCriticCfg,
                RslRlPpoActorCriticRecurrentCfg,
                RslRlRNNModelCfg,
            )

            # set actor model configuration if missing
            if hasattr(agent_cfg, "actor") and _is_missing(agent_cfg.actor):
                print("[WARNING]: The `policy` configuration is used to infer the `actor` model configuration.")
                if type(agent_cfg.policy) is RslRlPpoActorCriticCfg:
                    agent_cfg.actor = RslRlMLPModelCfg(
                        hidden_dims=agent_cfg.policy.actor_hidden_dims,
                        activation=agent_cfg.policy.activation,
                        obs_normalization=agent_cfg.policy.actor_obs_normalization,
                        stochastic=True,
                        init_noise_std=agent_cfg.policy.init_noise_std,
                        noise_std_type=agent_cfg.policy.noise_std_type,
                        state_dependent_std=agent_cfg.policy.state_dependent_std,
                    )
                elif type(agent_cfg.policy) is RslRlPpoActorCriticRecurrentCfg:
                    agent_cfg.actor = RslRlRNNModelCfg(
                        hidden_dims=agent_cfg.policy.actor_hidden_dims,
                        activation=agent_cfg.policy.activation,
                        obs_normalization=agent_cfg.policy.actor_obs_normalization,
                        stochastic=True,
                        init_noise_std=agent_cfg.policy.init_noise_std,
                        noise_std_type=agent_cfg.policy.noise_std_type,
                        state_dependent_std=agent_cfg.policy.state_dependent_std,
                        rnn_type=agent_cfg.policy.rnn_type,
                        rnn_hidden_dim=agent_cfg.policy.rnn_hidden_dim,
                        rnn_num_layers=agent_cfg.policy.rnn_num_layers,
                    )
            # set critic model configuration if missing
            if hasattr(agent_cfg, "critic") and _is_missing(agent_cfg.critic):
                print("[WARNING]: The `policy` configuration is used to infer the `critic` model configuration.")
                if type(agent_cfg.policy) is RslRlPpoActorCriticCfg:
                    agent_cfg.critic = RslRlMLPModelCfg(
                        hidden_dims=agent_cfg.policy.critic_hidden_dims,
                        activation=agent_cfg.policy.activation,
                        obs_normalization=agent_cfg.policy.critic_obs_normalization,
                        stochastic=False,
                    )
                elif type(agent_cfg.policy) is RslRlPpoActorCriticRecurrentCfg:
                    agent_cfg.critic = RslRlRNNModelCfg(
                        hidden_dims=agent_cfg.policy.critic_hidden_dims,
                        activation=agent_cfg.policy.activation,
                        obs_normalization=agent_cfg.policy.critic_obs_normalization,
                        stochastic=False,
                        rnn_type=agent_cfg.policy.rnn_type,
                        rnn_hidden_dim=agent_cfg.policy.rnn_hidden_dim,
                        rnn_num_layers=agent_cfg.policy.rnn_num_layers,
                    )
            # set student model configuration if missing
            if hasattr(agent_cfg, "student") and _is_missing(agent_cfg.student):
                print("[WARNING]: The `policy` configuration is used to infer the `student` model configuration.")
                if type(agent_cfg.policy) is RslRlDistillationStudentTeacherCfg:
                    agent_cfg.student = RslRlMLPModelCfg(
                        hidden_dims=agent_cfg.policy.student_hidden_dims,
                        activation=agent_cfg.policy.activation,
                        obs_normalization=agent_cfg.policy.student_obs_normalization,
                        stochastic=True,
                        init_noise_std=agent_cfg.policy.init_noise_std,
                        noise_std_type=agent_cfg.policy.noise_std_type,
                    )
                elif type(agent_cfg.policy) is RslRlDistillationStudentTeacherRecurrentCfg:
                    agent_cfg.student = RslRlRNNModelCfg(
                        hidden_dims=agent_cfg.policy.student_hidden_dims,
                        activation=agent_cfg.policy.activation,
                        obs_normalization=agent_cfg.policy.student_obs_normalization,
                        stochastic=True,
                        init_noise_std=agent_cfg.policy.init_noise_std,
                        noise_std_type=agent_cfg.policy.noise_std_type,
                        rnn_type=agent_cfg.policy.rnn_type,
                        rnn_hidden_dim=agent_cfg.policy.rnn_hidden_dim,
                        rnn_num_layers=agent_cfg.policy.rnn_num_layers,
                    )
            # set teacher model configuration if missing
            if hasattr(agent_cfg, "teacher") and _is_missing(agent_cfg.teacher):
                print("[WARNING]: The `policy` configuration is used to infer the `teacher` model configuration.")
                if type(agent_cfg.policy) is RslRlDistillationStudentTeacherCfg:
                    agent_cfg.teacher = RslRlMLPModelCfg(
                        hidden_dims=agent_cfg.policy.teacher_hidden_dims,
                        activation=agent_cfg.policy.activation,
                        obs_normalization=agent_cfg.policy.teacher_obs_normalization,
                        stochastic=True,
                        init_noise_std=0.0,
                    )
                elif type(agent_cfg.policy) is RslRlDistillationStudentTeacherRecurrentCfg:
                    agent_cfg.teacher = RslRlRNNModelCfg(
                        hidden_dims=agent_cfg.policy.teacher_hidden_dims,
                        activation=agent_cfg.policy.activation,
                        obs_normalization=agent_cfg.policy.teacher_obs_normalization,
                        stochastic=True,
                        init_noise_std=0.0,
                        rnn_type=agent_cfg.policy.rnn_type,
                        rnn_hidden_dim=agent_cfg.policy.rnn_hidden_dim,
                        rnn_num_layers=agent_cfg.policy.rnn_num_layers,
                    )

            # remove deprecated policy configuration
            agent_cfg.policy = MISSING

        # Handle new distribution configuration
        if installed_version < _V5_0_0:
            for model_name in _MODEL_CFG_NAMES:
                if _has_non_missing_attr(agent_cfg, model_name):
                    _validate_old_stochastic_cfg(getattr(agent_cfg, model_name))
        else:  # rsl-rl >= 5.0.0
            # import new distribution config classes
            from isaaclab_rl.rsl_rl import RslRlMLPModelCfg

            for model_name in _MODEL_CFG_NAMES:
                if _has_non_missing_attr(agent_cfg, model_name):
                    _update_distribution_cfg(getattr(agent_cfg, model_name), RslRlMLPModelCfg)

    return agent_cfg



def _is_missing(value) -> bool:
    return isinstance(value, type(MISSING))


def _has_non_missing_attr(obj, attr_name: str) -> bool:
    return hasattr(obj, attr_name) and not _is_missing(getattr(obj, attr_name))


def _handle_empirical_normalization(policy_cfg, agent_cfg):
    print(
        "[WARNING]: The `empirical_normalization` parameter is deprecated. Please set `actor_obs_normalization` and"
        " `critic_obs_normalization` as part of the `policy` configuration instead."
    )
    if _is_missing(policy_cfg.actor_obs_normalization):
        policy_cfg.actor_obs_normalization = agent_cfg.empirical_normalization
    if _is_missing(policy_cfg.critic_obs_normalization):
        policy_cfg.critic_obs_normalization = agent_cfg.empirical_normalization
    agent_cfg.empirical_normalization = MISSING


def _clear_new_model_cfg(agent_cfg, model_name: str):
    print(
        f"[WARNING]: The `{model_name}` model configuration is only used for rsl-rl >= 4.0.0. Consider updating rsl-rl"
        " or use the `policy` configuration for rsl-rl < 4.0.0."
    )
    setattr(agent_cfg, model_name, MISSING)


def _validate_old_stochastic_cfg(model_cfg):
    if not hasattr(model_cfg, "stochastic") or _is_missing(model_cfg.stochastic):
        raise ValueError(
            "Please parameterize the output distribution using the old parameters `stochastic`, `init_noise_std`,"
            " `noise_std_type`, and `state_dependent_std` or update rsl-rl."
        )
    # remove new distribution configuration
    if hasattr(model_cfg, "distribution_cfg"):
        del model_cfg.distribution_cfg


def _update_distribution_cfg(model_cfg, rsl_rl_mlp_model_cfg_cls):
    if model_cfg.distribution_cfg is not None:
        pass  # new distribution configuration is used, no need to handle deprecated configurations
    elif model_cfg.stochastic is True:  # distribution config is None but stochastic output is requested
        print(
            "[WARNING]: The `distribution_cfg` configuration is now used to specify the output distribution for"
            " stochastic policies. Consider updating the configuration to use `distribution_cfg` instead of"
            " `stochastic`, `init_noise_std`, `noise_std_type`, and `state_dependent_std` parameters."
        )
        if model_cfg.state_dependent_std is False:  # gaussian distribution
            model_cfg.distribution_cfg = rsl_rl_mlp_model_cfg_cls.GaussianDistributionCfg(
                init_std=model_cfg.init_noise_std, std_type=model_cfg.noise_std_type
            )
        elif model_cfg.state_dependent_std is True:  # heteroscedastic gaussian distribution
            model_cfg.distribution_cfg = rsl_rl_mlp_model_cfg_cls.HeteroscedasticGaussianDistributionCfg(
                init_std=model_cfg.init_noise_std, std_type=model_cfg.noise_std_type
            )
    # remove deprecated stochastic parameters
    if hasattr(model_cfg, "stochastic"):
        del model_cfg.stochastic
    if hasattr(model_cfg, "init_noise_std"):
        del model_cfg.init_noise_std
    if hasattr(model_cfg, "noise_std_type"):
        del model_cfg.noise_std_type
    if hasattr(model_cfg, "state_dependent_std"):
        del model_cfg.state_dependent_std