isaac-sim · alexmillane · Feb 12, 2026 · Feb 12, 2026 · Feb 12, 2026 · Feb 12, 2026
diff --git a/docs/pages/example_workflows/reinforcement_learning/step_2_policy_training.rst b/docs/pages/example_workflows/reinforcement_learning/step_2_policy_training.rst
@@ -2,7 +2,7 @@ Policy Training
 ---------------
 
 This workflow covers training an RL policy from scratch using RSL-RL's PPO implementation.
-The training is fully parallelized across hundreds of environments for sample-efficient learning.
+The training is fully parallelized across hundreds of environments for efficient learning.
 
 **Docker Container**: Base (see :doc:`../../quickstart/docker_containers` for more details)
 

diff --git a/isaaclab_arena/environments/arena_env_builder.py b/isaaclab_arena/environments/arena_env_builder.py
@@ -263,10 +263,17 @@ def build_registered(
         # THIS WILL BE REMOVED IN THE FUTURE.
         cfg_entry = self.modify_env_cfg(cfg_entry)
         entry_point = self.get_entry_point()
+        # Register the environment with the Gym registry.
+        kwargs = {
+            "env_cfg_entry_point": cfg_entry,
+        }
+        if self.arena_env.rl_framework is not None:
+            assert self.arena_env.rl_policy_cfg is not None
+            kwargs[self.arena_env.rl_framework.get_entry_point_string()] = self.arena_env.rl_policy_cfg
         gym.register(
             id=name,
             entry_point=entry_point,
-            kwargs={"env_cfg_entry_point": cfg_entry},
+            kwargs=kwargs,
             disable_env_checker=True,
         )
         cfg = parse_env_cfg(

diff --git a/isaaclab_arena/environments/isaaclab_arena_environment.py b/isaaclab_arena/environments/isaaclab_arena_environment.py
@@ -13,6 +13,7 @@
     from isaaclab_arena.embodiments.embodiment_base import EmbodimentBase
     from isaaclab_arena.environments.isaaclab_arena_manager_based_env import IsaacLabArenaManagerBasedRLEnvCfg
     from isaaclab_arena.orchestrator.orchestrator_base import OrchestratorBase
+    from isaaclab_arena.reinforcement_learning.frameworks import RLFramework
     from isaaclab_arena.scene.scene import Scene
     from isaaclab_arena.tasks.task_base import TaskBase
 
@@ -29,6 +30,8 @@ def __init__(
         teleop_device: TeleopDeviceBase | None = None,
         orchestrator: OrchestratorBase | None = None,
         env_cfg_callback: Callable[IsaacLabArenaManagerBasedRLEnvCfg] | None = None,
+        rl_framework: RLFramework | None = None,
+        rl_policy_cfg: str | None = None,
     ):
         """
         Args:
@@ -47,3 +50,5 @@ def __init__(
         self.teleop_device = teleop_device
         self.orchestrator = orchestrator
         self.env_cfg_callback = env_cfg_callback
+        self.rl_framework = rl_framework
+        self.rl_policy_cfg = rl_policy_cfg
diff --git a/isaaclab_arena/environments/isaaclab_interop.py b/isaaclab_arena/environments/isaaclab_interop.py
@@ -0,0 +1,51 @@
+# Copyright (c) 2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import argparse
+
+from isaaclab_arena_environments.cli import ExampleEnvironments
+
+
+def environment_registration_callback() -> list[str]:
+    """This function is for use with Isaac Lab scripts to register an IsaacLab Arena environment.
+
+    This function is passed to an Isaac Lab script as an external callback function. Example:
+
+    python IsaacLab/scripts/reinforcement_learning/rsl_rl/train.py
+        --external_callback isaaclab_arena.environments.isaaclab_interop.environment_registration_callback
+        --task lift_object
+        --num_envs 512
+
+    In this case the "lift_object" environment is registered with Isaac Lab before
+    running the RSL RL training script. The training script will then run the
+    training for the lift_object environment.
+
+    """
+    from isaaclab.app import AppLauncher
+
+    from isaaclab_arena.cli.isaaclab_arena_cli import add_isaac_lab_cli_args, add_isaaclab_arena_cli_args
+    from isaaclab_arena.environments.arena_env_builder import ArenaEnvBuilder
+
+    # Get the requested environment from the CLI.
+    parser = argparse.ArgumentParser()
+    # NOTE(alexmillane, 2026.02.12): With the Isaac Lab interop, we use the task name to
+    # determine the environment to register. The environment is also registered under this name.
+    # The result is that a single argument tells Arena what to register, and Lab what to run.
+    parser.add_argument("--task", type=str, required=True, help="Name of the IsaacLab Arena environment to register.")
+    environment_name = parser.parse_known_args()[0].task
+    environment = ExampleEnvironments[environment_name]()
+    # Get the full list of environment-specific CLI args.
+    AppLauncher.add_app_launcher_args(parser)
+    add_isaac_lab_cli_args(parser)
+    add_isaaclab_arena_cli_args(parser)
+    environment.add_cli_args(parser)
+    args, remaining_args = parser.parse_known_args()
+    # Create the environment config
+    isaaclab_arena_environment = environment.get_env(args)
+    # Build and register the environment
+    env_builder = ArenaEnvBuilder(isaaclab_arena_environment, args)
+    env_builder.build_registered()
+    # Return the arguments that were not consumed by this callback
+    return remaining_args
diff --git a/isaaclab_arena/examples/rigid_object_variant.py b/isaaclab_arena/examples/rigid_object_variant.py
@@ -0,0 +1,45 @@
+# Copyright (c) 2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+rigid_object_variant_cfg = RigidObjectVariantCfg(
+    assets={
+        "box": RigidObjectCfg(
+            name="box",
+            spawn=UsdFileCfg(
+                usd_path="path/to/box.usd",
+            ),
+            scale=(1.0, 1.0, 1.0),
+            initial_pose=Pose(position=(1.0, 2.0, 3.0), orientation=(0.0, 0.0, 0.0, 1.0)),
+        ),
+        "sphere": RigidObjectCfg(
+            name="sphere",
+            spawn=UsdFileCfg(
+                usd_path="path/to/sphere.usd",
+            ),
+            scale=(2.0, 2.0, 2.0),
+            initial_pose=Pose(position=(4.0, 5.0, 6.0), orientation=(0.0, 0.0, 0.0, 1.0)),
+        ),
+    }
+}
+
+
+
+cracker_box = asset_registry.get_asset_by_name("cracker_box")()
+tomato_soup_can = asset_registry.get_asset_by_name("tomato_soup_can")()
+object_set = RigidObjectSet(name="object_set", objects=[cracker_box, tomato_soup_can])
+object_set.set_initial_pose(Pose(position=(0.0, 0.0, 0.0), orientation=(1.0, 0.0, 0.0, 0.0)))
+
+
+
+
+cracker_box = asset_registry.get_asset_by_name("cracker_box")()
+tomato_soup_can = asset_registry.get_asset_by_name("tomato_soup_can")()
+object_set = RigidObjectSet(name="object_set", objects=[cracker_box, tomato_soup_can])
+object_set.set_initial_pose(
+    PoseVariant(poses={
+        cracker_box: Pose(position=(1.0, 2.0, 3.0), orientation=(0.0, 0.0, 0.0, 1.0)),
+        tomato_soup_can: Pose(position=(4.0, 5.0, 6.0), orientation=(0.0, 0.0, 0.0, 1.0)),
+    })
+)
diff --git a/isaaclab_arena/policy/rl_policy/base_rsl_rl_policy.py b/isaaclab_arena/policy/rl_policy/base_rsl_rl_policy.py
@@ -3,31 +3,19 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
 from dataclasses import field
-from typing import Any
 
 from isaaclab.utils import configclass
 from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
 
 
 @configclass
 class RLPolicyCfg(RslRlOnPolicyRunnerCfg):
+    """Default RSL-RL runner configuration for Arena environments.
+
+    Used as the ``rsl_rl_cfg_entry_point`` when registering environments with gym,
+    allowing IsaacLab's ``train.py`` to load it via ``@hydra_task_config``.
+    """
 
     num_steps_per_env: int = 24
     max_iterations: int = 4000
@@ -39,49 +27,25 @@ class RLPolicyCfg(RslRlOnPolicyRunnerCfg):
             "critic": ["policy"],
         }
     )
-    policy: RslRlPpoActorCriticCfg = field(default_factory=RslRlPpoActorCriticCfg)
-    algorithm: RslRlPpoAlgorithmCfg = field(default_factory=RslRlPpoAlgorithmCfg)
-
-    @classmethod
-    def update_cfg(
-        cls,
-        policy_cfg: dict[str, Any],
-        algorithm_cfg: dict[str, Any],
-        obs_groups: dict[str, list[str]],
-        num_steps_per_env: int,
-        max_iterations: int,
-        save_interval: int,
-        experiment_name: str,
-    ):
-        cfg = cls()
-        cfg.policy = RslRlPpoActorCriticCfg(**policy_cfg)
-        cfg.algorithm = RslRlPpoAlgorithmCfg(**algorithm_cfg)
-        cfg.obs_groups = obs_groups
-        cfg.num_steps_per_env = num_steps_per_env
-        cfg.max_iterations = max_iterations
-        cfg.save_interval = save_interval
-        cfg.experiment_name = experiment_name
-        return cfg
-
-
-def get_agent_cfg(args_cli: argparse.Namespace) -> Any:
-    """Get the environment and agent configuration from the command line arguments."""
-
-    # Read a json file containing the agent configuration
-    with open(args_cli.agent_cfg_path) as f:
-        agent_cfg_dict = json.load(f)
-
-    policy_cfg = agent_cfg_dict["policy_cfg"]
-    algorithm_cfg = agent_cfg_dict["algorithm_cfg"]
-    obs_groups = agent_cfg_dict["obs_groups"]
-    # Load all other arguments if they are in args_cli as policy arguments
-    num_steps_per_env = args_cli.num_steps_per_env
-    max_iterations = args_cli.max_iterations
-    save_interval = args_cli.save_interval
-    experiment_name = args_cli.experiment_name
-
-    agent_cfg = RLPolicyCfg.update_cfg(
-        policy_cfg, algorithm_cfg, obs_groups, num_steps_per_env, max_iterations, save_interval, experiment_name
+    policy: RslRlPpoActorCriticCfg = RslRlPpoActorCriticCfg(
+        init_noise_std=1.0,
+        actor_obs_normalization=False,
+        critic_obs_normalization=False,
+        actor_hidden_dims=[256, 128, 64],
+        critic_hidden_dims=[256, 128, 64],
+        activation="elu",
+    )
+    algorithm: RslRlPpoAlgorithmCfg = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.006,
+        num_learning_epochs=5,
+        num_mini_batches=4,
+        learning_rate=0.0001,
+        schedule="adaptive",
+        gamma=0.98,
+        lam=0.95,
+        desired_kl=0.01,
+        max_grad_norm=1.0,
     )
-
-    return agent_cfg
diff --git a/isaaclab_arena/policy/rl_policy/generic_policy.json b/isaaclab_arena/policy/rl_policy/generic_policy.json