Skip to content
Draft
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Policy Training
---------------

This workflow covers training an RL policy from scratch using RSL-RL's PPO implementation.
The training is fully parallelized across hundreds of environments for sample-efficient learning.
The training is fully parallelized across hundreds of environments for efficient learning.

**Docker Container**: Base (see :doc:`../../quickstart/docker_containers` for more details)

Expand Down
9 changes: 8 additions & 1 deletion isaaclab_arena/environments/arena_env_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,10 +263,17 @@ def build_registered(
# THIS WILL BE REMOVED IN THE FUTURE.
cfg_entry = self.modify_env_cfg(cfg_entry)
entry_point = self.get_entry_point()
# Register the environment with the Gym registry.
kwargs = {
"env_cfg_entry_point": cfg_entry,
}
if self.arena_env.rl_framework is not None:
assert self.arena_env.rl_policy_cfg is not None
kwargs[self.arena_env.rl_framework.get_entry_point_string()] = self.arena_env.rl_policy_cfg
gym.register(
id=name,
entry_point=entry_point,
kwargs={"env_cfg_entry_point": cfg_entry},
kwargs=kwargs,
disable_env_checker=True,
)
cfg = parse_env_cfg(
Expand Down
5 changes: 5 additions & 0 deletions isaaclab_arena/environments/isaaclab_arena_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from isaaclab_arena.embodiments.embodiment_base import EmbodimentBase
from isaaclab_arena.environments.isaaclab_arena_manager_based_env import IsaacLabArenaManagerBasedRLEnvCfg
from isaaclab_arena.orchestrator.orchestrator_base import OrchestratorBase
from isaaclab_arena.reinforcement_learning.frameworks import RLFramework
from isaaclab_arena.scene.scene import Scene
from isaaclab_arena.tasks.task_base import TaskBase

Expand All @@ -29,6 +30,8 @@ def __init__(
teleop_device: TeleopDeviceBase | None = None,
orchestrator: OrchestratorBase | None = None,
env_cfg_callback: Callable[IsaacLabArenaManagerBasedRLEnvCfg] | None = None,
rl_framework: RLFramework | None = None,
rl_policy_cfg: str | None = None,
):
"""
Args:
Expand All @@ -47,3 +50,5 @@ def __init__(
self.teleop_device = teleop_device
self.orchestrator = orchestrator
self.env_cfg_callback = env_cfg_callback
self.rl_framework = rl_framework
self.rl_policy_cfg = rl_policy_cfg
51 changes: 51 additions & 0 deletions isaaclab_arena/environments/isaaclab_interop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright (c) 2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

import argparse

from isaaclab_arena_environments.cli import ExampleEnvironments


def environment_registration_callback() -> list[str]:
"""This function is for use with Isaac Lab scripts to register an IsaacLab Arena environment.

This function is passed to an Isaac Lab script as an external callback function. Example:

python IsaacLab/scripts/reinforcement_learning/rsl_rl/train.py
--external_callback isaaclab_arena.environments.isaaclab_interop.environment_registration_callback
--task lift_object
--num_envs 512

In this case the "lift_object" environment is registered with Isaac Lab before
running the RSL RL training script. The training script will then run the
training for the lift_object environment.

"""
from isaaclab.app import AppLauncher

from isaaclab_arena.cli.isaaclab_arena_cli import add_isaac_lab_cli_args, add_isaaclab_arena_cli_args
from isaaclab_arena.environments.arena_env_builder import ArenaEnvBuilder

# Get the requested environment from the CLI.
parser = argparse.ArgumentParser()
# NOTE(alexmillane, 2026.02.12): With the Isaac Lab interop, we use the task name to
# determine the environment to register. The environment is also registered under this name.
# The result is that a single argument tells Arena what to register, and Lab what to run.
parser.add_argument("--task", type=str, required=True, help="Name of the IsaacLab Arena environment to register.")
environment_name = parser.parse_known_args()[0].task
environment = ExampleEnvironments[environment_name]()
# Get the full list of environment-specific CLI args.
AppLauncher.add_app_launcher_args(parser)
add_isaac_lab_cli_args(parser)
add_isaaclab_arena_cli_args(parser)
environment.add_cli_args(parser)
args, remaining_args = parser.parse_known_args()
# Create the environment config
isaaclab_arena_environment = environment.get_env(args)
# Build and register the environment
env_builder = ArenaEnvBuilder(isaaclab_arena_environment, args)
env_builder.build_registered()
# Return the arguments that were not consumed by this callback
return remaining_args
45 changes: 45 additions & 0 deletions isaaclab_arena/examples/rigid_object_variant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright (c) 2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

rigid_object_variant_cfg = RigidObjectVariantCfg(
assets={
"box": RigidObjectCfg(
name="box",
spawn=UsdFileCfg(
usd_path="path/to/box.usd",
),
scale=(1.0, 1.0, 1.0),
initial_pose=Pose(position=(1.0, 2.0, 3.0), orientation=(0.0, 0.0, 0.0, 1.0)),
),
"sphere": RigidObjectCfg(
name="sphere",
spawn=UsdFileCfg(
usd_path="path/to/sphere.usd",
),
scale=(2.0, 2.0, 2.0),
initial_pose=Pose(position=(4.0, 5.0, 6.0), orientation=(0.0, 0.0, 0.0, 1.0)),
),
}
}



cracker_box = asset_registry.get_asset_by_name("cracker_box")()
tomato_soup_can = asset_registry.get_asset_by_name("tomato_soup_can")()
object_set = RigidObjectSet(name="object_set", objects=[cracker_box, tomato_soup_can])
object_set.set_initial_pose(Pose(position=(0.0, 0.0, 0.0), orientation=(1.0, 0.0, 0.0, 0.0)))




cracker_box = asset_registry.get_asset_by_name("cracker_box")()
tomato_soup_can = asset_registry.get_asset_by_name("tomato_soup_can")()
object_set = RigidObjectSet(name="object_set", objects=[cracker_box, tomato_soup_can])
object_set.set_initial_pose(
PoseVariant(poses={
cracker_box: Pose(position=(1.0, 2.0, 3.0), orientation=(0.0, 0.0, 0.0, 1.0)),
tomato_soup_can: Pose(position=(4.0, 5.0, 6.0), orientation=(0.0, 0.0, 0.0, 1.0)),
})
)
88 changes: 26 additions & 62 deletions isaaclab_arena/policy/rl_policy/base_rsl_rl_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,19 @@
#
# SPDX-License-Identifier: Apache-2.0

# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import json
from dataclasses import field
from typing import Any

from isaaclab.utils import configclass
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg


@configclass
class RLPolicyCfg(RslRlOnPolicyRunnerCfg):
"""Default RSL-RL runner configuration for Arena environments.

Used as the ``rsl_rl_cfg_entry_point`` when registering environments with gym,
allowing IsaacLab's ``train.py`` to load it via ``@hydra_task_config``.
"""

num_steps_per_env: int = 24
max_iterations: int = 4000
Expand All @@ -39,49 +27,25 @@ class RLPolicyCfg(RslRlOnPolicyRunnerCfg):
"critic": ["policy"],
}
)
policy: RslRlPpoActorCriticCfg = field(default_factory=RslRlPpoActorCriticCfg)
algorithm: RslRlPpoAlgorithmCfg = field(default_factory=RslRlPpoAlgorithmCfg)

@classmethod
def update_cfg(
cls,
policy_cfg: dict[str, Any],
algorithm_cfg: dict[str, Any],
obs_groups: dict[str, list[str]],
num_steps_per_env: int,
max_iterations: int,
save_interval: int,
experiment_name: str,
):
cfg = cls()
cfg.policy = RslRlPpoActorCriticCfg(**policy_cfg)
cfg.algorithm = RslRlPpoAlgorithmCfg(**algorithm_cfg)
cfg.obs_groups = obs_groups
cfg.num_steps_per_env = num_steps_per_env
cfg.max_iterations = max_iterations
cfg.save_interval = save_interval
cfg.experiment_name = experiment_name
return cfg


def get_agent_cfg(args_cli: argparse.Namespace) -> Any:
"""Get the environment and agent configuration from the command line arguments."""

# Read a json file containing the agent configuration
with open(args_cli.agent_cfg_path) as f:
agent_cfg_dict = json.load(f)

policy_cfg = agent_cfg_dict["policy_cfg"]
algorithm_cfg = agent_cfg_dict["algorithm_cfg"]
obs_groups = agent_cfg_dict["obs_groups"]
# Load all other arguments if they are in args_cli as policy arguments
num_steps_per_env = args_cli.num_steps_per_env
max_iterations = args_cli.max_iterations
save_interval = args_cli.save_interval
experiment_name = args_cli.experiment_name

agent_cfg = RLPolicyCfg.update_cfg(
policy_cfg, algorithm_cfg, obs_groups, num_steps_per_env, max_iterations, save_interval, experiment_name
policy: RslRlPpoActorCriticCfg = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_obs_normalization=False,
critic_obs_normalization=False,
actor_hidden_dims=[256, 128, 64],
critic_hidden_dims=[256, 128, 64],
activation="elu",
)
algorithm: RslRlPpoAlgorithmCfg = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.006,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=0.0001,
schedule="adaptive",
gamma=0.98,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)

return agent_cfg
28 changes: 0 additions & 28 deletions isaaclab_arena/policy/rl_policy/generic_policy.json

This file was deleted.

Loading
Loading