From 99d6eca3b670bcf08a72fdc8b840cdd6c9676006 Mon Sep 17 00:00:00 2001
From: mgussert <mgussert@nvidia.com>
Date: Wed, 29 Oct 2025 11:22:58 -0700
Subject: [PATCH 1/8] initial commit and first 'working' env

---
 .../lift/config/ur10_SG/__init__.py           | 26 +++++
 .../lift/config/ur10_SG/agents/__init__.py    |  4 +
 .../config/ur10_SG/agents/skrl_ppo_cfg.yaml   | 85 ++++++++++++++++
 .../lift/config/ur10_SG/joint_pos_env_cfg.py  | 99 +++++++++++++++++++
 .../manipulation/lift/lift_env_cfg.py         |  2 +-
 5 files changed, 215 insertions(+), 1 deletion(-)
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/__init__.py
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/__init__.py
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/skrl_ppo_cfg.yaml
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/joint_pos_env_cfg.py

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/__init__.py
new file mode 100644
index 00000000000..e9601144bdf
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/__init__.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+import gymnasium as gym
+import os
+
+from . import agents
+
+##
+# Register Gym environments.
+##
+
+##
+# Joint Position Control
+##
+
+gym.register(
+    id="Isaac-Lift-Cube-UR10SG-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10SGCubeLiftEnvCfg",
+        "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
+    },
+    disable_env_checker=True,
+)
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/__init__.py
new file mode 100644
index 00000000000..2e924fbf1b1
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/skrl_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/skrl_ppo_cfg.yaml
new file mode 100644
index 00000000000..1a7216cb6b4
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/skrl_ppo_cfg.yaml
@@ -0,0 +1,85 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+seed: 42
+
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+models:
+  separate: False
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [256, 128, 64]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: OBSERVATIONS
+        layers: [256, 128, 64]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
+agent:
+  class: PPO
+  rollouts: 24
+  learning_epochs: 8
+  mini_batches: 4
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 1.0e-04
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.01
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.001
+  value_loss_scale: 2.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 0.01
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "ur10sg_lift"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 36000
+  environment_info: log
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/joint_pos_env_cfg.py
new file mode 100644
index 00000000000..6af19399ffd
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/joint_pos_env_cfg.py
@@ -0,0 +1,99 @@
+from isaaclab.assets import RigidObjectCfg, SurfaceGripperCfg
+from isaaclab.sensors import FrameTransformerCfg
+from isaaclab.sensors.frame_transformer.frame_transformer_cfg import OffsetCfg
+from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg
+from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg
+from isaaclab.utils import configclass
+from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR
+
+from isaaclab_tasks.manager_based.manipulation.lift import mdp
+from isaaclab_tasks.manager_based.manipulation.lift.lift_env_cfg import LiftEnvCfg
+
+##
+# Pre-defined configs
+##
+from isaaclab.markers.config import FRAME_MARKER_CFG  # isort: skip
+from isaaclab_assets.robots.universal_robots import UR10_SHORT_SUCTION_CFG
+
+@configclass
+class UR10SGCubeLiftEnvCfg(LiftEnvCfg):
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+
+        # Set UR10 as robot
+        self.scene.robot = UR10_SHORT_SUCTION_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
+
+        # define the surface gripper
+        self.scene.surface_gripper = SurfaceGripperCfg(
+            prim_path="{ENV_REGEX_NS}/Robot/ee_link/SurfaceGripper",
+            max_grip_distance=0.05,
+            shear_force_limit=5000.0,
+            coaxial_force_limit=5000.0,
+            retry_interval=0.05,
+        )
+
+        # Set actions for the specific robot type (UR10)
+        self.actions.arm_action = mdp.JointPositionActionCfg(
+            asset_name="robot", 
+            joint_names=[".*_joint"], 
+            scale=0.5, 
+            use_default_offset=True
+        )
+        self.actions.gripper_action = mdp.SurfaceGripperBinaryActionCfg(
+            asset_name="surface_gripper",
+            open_command=-1.0,
+            close_command=1.0,
+        )
+        # Set the body name for the end effector
+        self.commands.object_pose.body_name = "ee_link"
+
+        # Set Cube as object
+        self.scene.object = RigidObjectCfg(
+            prim_path="{ENV_REGEX_NS}/Object",
+            init_state=RigidObjectCfg.InitialStateCfg(pos=[0.5, 0, 0.055], rot=[1, 0, 0, 0]),
+            spawn=UsdFileCfg(
+                usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Blocks/DexCube/dex_cube_instanceable.usd",
+                scale=(0.8, 0.8, 0.8),
+                rigid_props=RigidBodyPropertiesCfg(
+                    solver_position_iteration_count=16,
+                    solver_velocity_iteration_count=1,
+                    max_angular_velocity=1000.0,
+                    max_linear_velocity=1000.0,
+                    max_depenetration_velocity=5.0,
+                    disable_gravity=False,
+                ),
+            ),
+        )
+
+        # Listens to the required transforms
+        marker_cfg = FRAME_MARKER_CFG.copy()
+        marker_cfg.markers["frame"].scale = (0.1, 0.1, 0.1)
+        marker_cfg.prim_path = "/Visuals/FrameTransformer"
+
+        self.scene.ee_frame = FrameTransformerCfg(
+            prim_path="{ENV_REGEX_NS}/Robot/base_link",
+            debug_vis=True,
+            visualizer_cfg=marker_cfg,
+            target_frames=[
+                FrameTransformerCfg.FrameCfg(
+                    prim_path="{ENV_REGEX_NS}/Robot/ee_link",
+                    name="end_effector",
+                    offset=OffsetCfg(
+                        pos=[0.1585, 0.0, 0.0],
+                    ),
+                ),
+            ],
+        )
+
+
+@configclass
+class UR10SGCubeLiftEnvCfg_PLAY(UR10SGCubeLiftEnvCfg):
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+        # make a smaller scene for play
+        self.scene.num_envs = 50
+        self.scene.env_spacing = 2.5
+        # disable randomization for play
+        self.observations.policy.enable_corruption = False
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/lift_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/lift_env_cfg.py
index 3a4f458854d..0d0b6a735ec 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/lift_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/lift_env_cfg.py
@@ -89,7 +89,7 @@ class ActionsCfg:
 
     # will be set by agent env cfg
     arm_action: mdp.JointPositionActionCfg | mdp.DifferentialInverseKinematicsActionCfg = MISSING
-    gripper_action: mdp.BinaryJointPositionActionCfg = MISSING
+    gripper_action: mdp.BinaryJointPositionActionCfg | mdp.SurfaceGripperBinaryActionCfg = MISSING
 
 
 @configclass

From 81ea7b1a0e21809774d9c4791190eb3e8c81507e Mon Sep 17 00:00:00 2001
From: mgussert <mgussert@nvidia.com>
Date: Wed, 29 Oct 2025 13:14:23 -0700
Subject: [PATCH 2/8] modified initial pose for the suction gripper

---
 .../isaaclab_assets/isaaclab_assets/robots/universal_robots.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py b/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py
index 4433b824235..275ab1ca5ac 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py
+++ b/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py
@@ -113,7 +113,7 @@
     "shoulder_lift_joint": -1.5707,
     "elbow_joint": 1.5707,
     "wrist_1_joint": -1.5707,
-    "wrist_2_joint": 1.5707,
+    "wrist_2_joint": -1.5707,
     "wrist_3_joint": 0.0,
 }
 

From e4c76c43631cda6c434fdad2f43f798831fed719 Mon Sep 17 00:00:00 2001
From: mgussert <mgussert@nvidia.com>
Date: Fri, 31 Oct 2025 10:52:06 -0700
Subject: [PATCH 3/8] added additional backends

---
 .../lift/config/ur10_SG/__init__.py           | 18 +++-
 .../ur10_SG/agents/rl_games_ppo_cfg.yaml      | 84 +++++++++++++++++++
 .../config/ur10_SG/agents/rsl_rl_ppo_cfg.py   | 38 +++++++++
 .../config/ur10_SG/agents/sb3_ppo_cfg.yaml    | 34 ++++++++
 .../lift/config/ur10_SG/joint_pos_env_cfg.py  | 23 ++++-
 5 files changed, 195 insertions(+), 2 deletions(-)
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rl_games_ppo_cfg.yaml
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rsl_rl_ppo_cfg.py
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/sb3_ppo_cfg.yaml

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/__init__.py
index e9601144bdf..6c07c69c16e 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/__init__.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/__init__.py
@@ -16,11 +16,27 @@
 ##
 
 gym.register(
-    id="Isaac-Lift-Cube-UR10SG-v0",
+    id="Isaac-Lift-Cube-UR10SG-Short-v0",
     entry_point="isaaclab.envs:ManagerBasedRLEnv",
     kwargs={
         "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10SGCubeLiftEnvCfg",
+        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10LiftCubePPORunnerCfg",
         "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
+        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
+        "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
+    },
+    disable_env_checker=True,
+)
+
+gym.register(
+    id="Isaac-Lift-Cube-UR10SG-Long-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10SGLongCubeLiftEnvCfg",
+        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10LiftCubePPORunnerCfg",
+        "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
+        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
+        "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
     },
     disable_env_checker=True,
 )
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rl_games_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rl_games_ppo_cfg.yaml
new file mode 100644
index 00000000000..339905b645d
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rl_games_ppo_cfg.yaml
@@ -0,0 +1,84 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+params:
+  seed: 42
+
+  # environment wrapper clipping
+  env:
+    clip_observations: 100.0
+    clip_actions: 100.0
+
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  network:
+    name: actor_critic
+    separate: False
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: 0
+        fixed_sigma: True
+    mlp:
+      units: [256, 128, 64]
+      activation: elu
+      d2rl: False
+
+      initializer:
+        name: default
+      regularizer:
+        name: None
+
+  load_checkpoint: False # flag which sets whether to load the checkpoint
+  load_path: '' # path to the checkpoint to load
+
+  config:
+    name: UR10SG_lift
+    env_name: rlgpu
+    device: 'cuda:0'
+    device_name: 'cuda:0'
+    multi_gpu: False
+    ppo: True
+    mixed_precision: False
+    normalize_input: True
+    normalize_value: True
+    value_bootstrap: False
+    num_actors: -1
+    reward_shaper:
+      scale_value: 0.01
+    normalize_advantage: True
+    gamma: 0.99
+    tau: 0.95
+    learning_rate: 1e-4
+    lr_schedule: adaptive
+    schedule_type: legacy
+    kl_threshold: 0.01
+    score_to_win: 100000000
+    max_epochs: 1500
+    save_best_after: 100
+    save_frequency: 50
+    print_stats: True
+    grad_norm: 1.0
+    entropy_coef: 0.001
+    truncate_grads: True
+    e_clip: 0.2
+    horizon_length: 24
+    minibatch_size: 24576
+    mini_epochs: 8
+    critic_coef: 4
+    clip_value: True
+    clip_actions: False
+    seq_len: 4
+    bounds_loss_coef: 0.0001
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rsl_rl_ppo_cfg.py
new file mode 100644
index 00000000000..d4b30f603e1
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rsl_rl_ppo_cfg.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from isaaclab.utils import configclass
+
+from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
+
+
+@configclass
+class UR10LiftCubePPORunnerCfg(RslRlOnPolicyRunnerCfg):
+    num_steps_per_env = 24
+    max_iterations = 1500
+    save_interval = 50
+    experiment_name = "UR10SG_lift"
+    policy = RslRlPpoActorCriticCfg(
+        init_noise_std=1.0,
+        actor_obs_normalization=False,
+        critic_obs_normalization=False,
+        actor_hidden_dims=[256, 128, 64],
+        critic_hidden_dims=[256, 128, 64],
+        activation="elu",
+    )
+    algorithm = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.006,
+        num_learning_epochs=5,
+        num_mini_batches=4,
+        learning_rate=1.0e-4,
+        schedule="adaptive",
+        gamma=0.98,
+        lam=0.95,
+        desired_kl=0.01,
+        max_grad_norm=1.0,
+    )
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/sb3_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/sb3_ppo_cfg.yaml
new file mode 100644
index 00000000000..91ae4f0d9f0
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/sb3_ppo_cfg.yaml
@@ -0,0 +1,34 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
+seed: 42
+
+# epoch * n_steps * nenvs: 500×512*8*8
+n_timesteps: 16384000
+policy: 'MlpPolicy'
+n_steps: 64
+# mini batch size: num_envs * nsteps / nminibatches 2048×512÷2048
+batch_size: 192
+gae_lambda: 0.95
+gamma: 0.99
+n_epochs: 8
+ent_coef: 0.00
+vf_coef: 0.0001
+learning_rate: !!float 3e-4
+clip_range: 0.2
+policy_kwargs:
+  activation_fn: 'nn.ELU'
+  net_arch:
+    pi: [256, 128, 64]
+    vf: [256, 128, 64]
+target_kl: 0.01
+max_grad_norm: 1.0
+
+# # Uses VecNormalize class to normalize obs
+# normalize_input: True
+# # Uses VecNormalize class to normalize rew
+# normalize_value: True
+# clip_obs: 5
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/joint_pos_env_cfg.py
index 6af19399ffd..e51b6e01ab2 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/joint_pos_env_cfg.py
@@ -13,7 +13,7 @@
 # Pre-defined configs
 ##
 from isaaclab.markers.config import FRAME_MARKER_CFG  # isort: skip
-from isaaclab_assets.robots.universal_robots import UR10_SHORT_SUCTION_CFG
+from isaaclab_assets.robots.universal_robots import UR10_SHORT_SUCTION_CFG, UR10_LONG_SUCTION_CFG
 
 @configclass
 class UR10SGCubeLiftEnvCfg(LiftEnvCfg):
@@ -97,3 +97,24 @@ def __post_init__(self):
         self.scene.env_spacing = 2.5
         # disable randomization for play
         self.observations.policy.enable_corruption = False
+
+@configclass
+class UR10SGLongCubeLiftEnvCfg(UR10SGCubeLiftEnvCfg):
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+
+        # Set UR10 as robot
+        self.scene.robot = UR10_LONG_SUCTION_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
+
+@configclass
+class UR10SGLongCubeLiftEnvCfg_PLAY(UR10SGLongCubeLiftEnvCfg):
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+        # make a smaller scene for play
+        self.scene.num_envs = 50
+        self.scene.env_spacing = 2.5
+        # disable randomization for play
+        self.observations.policy.enable_corruption = False
+

From fab2d122dd7fe0513c2d49a20fa4642eb2be37dc Mon Sep 17 00:00:00 2001
From: mgussert <mgussert@nvidia.com>
Date: Fri, 31 Oct 2025 11:12:02 -0700
Subject: [PATCH 4/8] removed rl_games support.  default config from franka env
 hanged.  on examanination, the file is called PPO but the algorithm is A2C. 
 not sure what's going on so i removed it until clarified

---
 .../ur10_SG/agents/rl_games_ppo_cfg.yaml      | 84 -------------------
 1 file changed, 84 deletions(-)
 delete mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rl_games_ppo_cfg.yaml

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rl_games_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rl_games_ppo_cfg.yaml
deleted file mode 100644
index 339905b645d..00000000000
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rl_games_ppo_cfg.yaml
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
-# All rights reserved.
-#
-# SPDX-License-Identifier: BSD-3-Clause
-
-params:
-  seed: 42
-
-  # environment wrapper clipping
-  env:
-    clip_observations: 100.0
-    clip_actions: 100.0
-
-  algo:
-    name: a2c_continuous
-
-  model:
-    name: continuous_a2c_logstd
-
-  network:
-    name: actor_critic
-    separate: False
-    space:
-      continuous:
-        mu_activation: None
-        sigma_activation: None
-
-        mu_init:
-          name: default
-        sigma_init:
-          name: const_initializer
-          val: 0
-        fixed_sigma: True
-    mlp:
-      units: [256, 128, 64]
-      activation: elu
-      d2rl: False
-
-      initializer:
-        name: default
-      regularizer:
-        name: None
-
-  load_checkpoint: False # flag which sets whether to load the checkpoint
-  load_path: '' # path to the checkpoint to load
-
-  config:
-    name: UR10SG_lift
-    env_name: rlgpu
-    device: 'cuda:0'
-    device_name: 'cuda:0'
-    multi_gpu: False
-    ppo: True
-    mixed_precision: False
-    normalize_input: True
-    normalize_value: True
-    value_bootstrap: False
-    num_actors: -1
-    reward_shaper:
-      scale_value: 0.01
-    normalize_advantage: True
-    gamma: 0.99
-    tau: 0.95
-    learning_rate: 1e-4
-    lr_schedule: adaptive
-    schedule_type: legacy
-    kl_threshold: 0.01
-    score_to_win: 100000000
-    max_epochs: 1500
-    save_best_after: 100
-    save_frequency: 50
-    print_stats: True
-    grad_norm: 1.0
-    entropy_coef: 0.001
-    truncate_grads: True
-    e_clip: 0.2
-    horizon_length: 24
-    minibatch_size: 24576
-    mini_epochs: 8
-    critic_coef: 4
-    clip_value: True
-    clip_actions: False
-    seq_len: 4
-    bounds_loss_coef: 0.0001

From 9e14cc24511a0f6694aa65eb8f033b19681a32da Mon Sep 17 00:00:00 2001
From: mgussert <mgussert@nvidia.com>
Date: Fri, 31 Oct 2025 11:19:29 -0700
Subject: [PATCH 5/8] format!

---
 .../lift/config/ur10_SG/joint_pos_env_cfg.py    | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/joint_pos_env_cfg.py
index e51b6e01ab2..2d71ac74b15 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/joint_pos_env_cfg.py
@@ -1,3 +1,10 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from isaaclab_assets.robots.universal_robots import UR10_LONG_SUCTION_CFG, UR10_SHORT_SUCTION_CFG
+
 from isaaclab.assets import RigidObjectCfg, SurfaceGripperCfg
 from isaaclab.sensors import FrameTransformerCfg
 from isaaclab.sensors.frame_transformer.frame_transformer_cfg import OffsetCfg
@@ -13,7 +20,7 @@
 # Pre-defined configs
 ##
 from isaaclab.markers.config import FRAME_MARKER_CFG  # isort: skip
-from isaaclab_assets.robots.universal_robots import UR10_SHORT_SUCTION_CFG, UR10_LONG_SUCTION_CFG
+
 
 @configclass
 class UR10SGCubeLiftEnvCfg(LiftEnvCfg):
@@ -35,10 +42,7 @@ def __post_init__(self):
 
         # Set actions for the specific robot type (UR10)
         self.actions.arm_action = mdp.JointPositionActionCfg(
-            asset_name="robot", 
-            joint_names=[".*_joint"], 
-            scale=0.5, 
-            use_default_offset=True
+            asset_name="robot", joint_names=[".*_joint"], scale=0.5, use_default_offset=True
         )
         self.actions.gripper_action = mdp.SurfaceGripperBinaryActionCfg(
             asset_name="surface_gripper",
@@ -98,6 +102,7 @@ def __post_init__(self):
         # disable randomization for play
         self.observations.policy.enable_corruption = False
 
+
 @configclass
 class UR10SGLongCubeLiftEnvCfg(UR10SGCubeLiftEnvCfg):
     def __post_init__(self):
@@ -107,6 +112,7 @@ def __post_init__(self):
         # Set UR10 as robot
         self.scene.robot = UR10_LONG_SUCTION_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
 
+
 @configclass
 class UR10SGLongCubeLiftEnvCfg_PLAY(UR10SGLongCubeLiftEnvCfg):
     def __post_init__(self):
@@ -117,4 +123,3 @@ def __post_init__(self):
         self.scene.env_spacing = 2.5
         # disable randomization for play
         self.observations.policy.enable_corruption = False
-

From 7e7d1f1eedec6cc2c3c48c1dac0c3fb9db6c92e5 Mon Sep 17 00:00:00 2001
From: mgussert <mgussert@nvidia.com>
Date: Fri, 31 Oct 2025 11:35:11 -0700
Subject: [PATCH 6/8] removed rl games entry in init

---
 .../manager_based/manipulation/lift/config/ur10_SG/__init__.py  | 2 --
 1 file changed, 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/__init__.py
index 6c07c69c16e..d6afcaecdea 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/__init__.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/__init__.py
@@ -22,7 +22,6 @@
         "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10SGCubeLiftEnvCfg",
         "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10LiftCubePPORunnerCfg",
         "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
-        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
         "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
     },
     disable_env_checker=True,
@@ -35,7 +34,6 @@
         "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10SGLongCubeLiftEnvCfg",
         "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10LiftCubePPORunnerCfg",
         "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
-        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
         "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
     },
     disable_env_checker=True,

From 234a74fe845c7b90dfb445043409c20cf6b855a4 Mon Sep 17 00:00:00 2001
From: mgussert <mgussert@nvidia.com>
Date: Fri, 31 Oct 2025 13:50:59 -0700
Subject: [PATCH 7/8] restored rl games

---
 .../lift/config/ur10_SG/__init__.py           |  2 +
 .../ur10_SG/agents/rl_games_ppo_cfg.yaml      | 84 +++++++++++++++++++
 2 files changed, 86 insertions(+)
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rl_games_ppo_cfg.yaml

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/__init__.py
index d6afcaecdea..6c07c69c16e 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/__init__.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/__init__.py
@@ -22,6 +22,7 @@
         "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10SGCubeLiftEnvCfg",
         "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10LiftCubePPORunnerCfg",
         "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
+        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
         "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
     },
     disable_env_checker=True,
@@ -34,6 +35,7 @@
         "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10SGLongCubeLiftEnvCfg",
         "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10LiftCubePPORunnerCfg",
         "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
+        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
         "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
     },
     disable_env_checker=True,
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rl_games_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rl_games_ppo_cfg.yaml
new file mode 100644
index 00000000000..339905b645d
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rl_games_ppo_cfg.yaml
@@ -0,0 +1,84 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+params:
+  seed: 42
+
+  # environment wrapper clipping
+  env:
+    clip_observations: 100.0
+    clip_actions: 100.0
+
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  network:
+    name: actor_critic
+    separate: False
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: 0
+        fixed_sigma: True
+    mlp:
+      units: [256, 128, 64]
+      activation: elu
+      d2rl: False
+
+      initializer:
+        name: default
+      regularizer:
+        name: None
+
+  load_checkpoint: False # flag which sets whether to load the checkpoint
+  load_path: '' # path to the checkpoint to load
+
+  config:
+    name: UR10SG_lift
+    env_name: rlgpu
+    device: 'cuda:0'
+    device_name: 'cuda:0'
+    multi_gpu: False
+    ppo: True
+    mixed_precision: False
+    normalize_input: True
+    normalize_value: True
+    value_bootstrap: False
+    num_actors: -1
+    reward_shaper:
+      scale_value: 0.01
+    normalize_advantage: True
+    gamma: 0.99
+    tau: 0.95
+    learning_rate: 1e-4
+    lr_schedule: adaptive
+    schedule_type: legacy
+    kl_threshold: 0.01
+    score_to_win: 100000000
+    max_epochs: 1500
+    save_best_after: 100
+    save_frequency: 50
+    print_stats: True
+    grad_norm: 1.0
+    entropy_coef: 0.001
+    truncate_grads: True
+    e_clip: 0.2
+    horizon_length: 24
+    minibatch_size: 24576
+    mini_epochs: 8
+    critic_coef: 4
+    clip_value: True
+    clip_actions: False
+    seq_len: 4
+    bounds_loss_coef: 0.0001

From b139fc1913f013ccbd2c86452e16e01f532bffad Mon Sep 17 00:00:00 2001
From: mgussert <mgussert@nvidia.com>
Date: Mon, 3 Nov 2025 15:30:50 -0800
Subject: [PATCH 8/8] swapping to other work

---
 .../manipulation/lift/config/ur10_SG/agents/rsl_rl_ppo_cfg.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rsl_rl_ppo_cfg.py
index d4b30f603e1..f66727c1364 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rsl_rl_ppo_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rsl_rl_ppo_cfg.py
@@ -13,7 +13,7 @@ class UR10LiftCubePPORunnerCfg(RslRlOnPolicyRunnerCfg):
     num_steps_per_env = 24
     max_iterations = 1500
     save_interval = 50
-    experiment_name = "UR10SG_lift"
+    experiment_name = "franka_lift"
     policy = RslRlPpoActorCriticCfg(
         init_noise_std=1.0,
         actor_obs_normalization=False,