From 2a93b04d58de5edd8c52f37845a8b37e7a320473 Mon Sep 17 00:00:00 2001 From: Jeremy Gillick Date: Thu, 5 Feb 2026 22:21:38 -0800 Subject: [PATCH 1/7] Update action managers to separate processing actions to sending to the simulator --- genesis_forge/managed_env.py | 3 +- genesis_forge/managers/action/base.py | 59 +++++++++++++++---- .../action/position_action_manager.py | 30 +++------- .../managers/action/position_within_limits.py | 18 +++--- 4 files changed, 67 insertions(+), 43 deletions(-) diff --git a/genesis_forge/managed_env.py b/genesis_forge/managed_env.py index 54b2f67..5e025f1 100644 --- a/genesis_forge/managed_env.py +++ b/genesis_forge/managed_env.py @@ -295,7 +295,8 @@ def step( self.managers["action"] ): (start, end) = self._action_ranges[i] - action_manager.step(actions[:, start:end]) + processed_actions = action_manager.step(actions[:, start:end]) + action_manager.send_actions_to_simulation(processed_actions) self.scene.step() # Update entity managers diff --git a/genesis_forge/managers/action/base.py b/genesis_forge/managers/action/base.py index 266846c..8a31a84 100644 --- a/genesis_forge/managers/action/base.py +++ b/genesis_forge/managers/action/base.py @@ -1,4 +1,5 @@ import re +from torch._tensor import Tensor import torch import numpy as np from gymnasium import spaces @@ -150,7 +151,7 @@ def actions(self) -> torch.Tensor: @property def raw_actions(self) -> torch.Tensor: """ - The actions received from the policy, before being converted. + The actions received from the policy, before being processed. """ if self._raw_actions is None: return torch.zeros((self.env.num_envs, self.num_actions)) @@ -213,6 +214,47 @@ def get_dofs_force(self, clip_to_max_force: bool = False): clip_to_max_force=clip_to_max_force, dofs_idx=self.dofs_idx ) + def get_actions(self) -> torch.Tensor: + """ + Get the current actions for the environments. + """ + if self._actions is None: + return torch.zeros((self.env.num_envs, self.num_actions)) + return self._actions + + def get_actions_dict(self, env_idx: int = 0) -> dict[str, float]: + """ + Get the latest actions for an environment as a dictionary of DOF names and values. + """ + return { + name: value.item() + for name, value in zip[tuple[str, Tensor]]( + self.dofs.keys(), self._actions[env_idx, :] + ) + } + + def process_actions(self, actions: torch.Tensor) -> torch.Tensor: + """ + Process the actions and convert them to actuator commands. + Override this function if you want to change the action processing logic. + + Args: + actions: The incoming step actions to handle. + + Returns: + The processed and converted actions. + """ + return actions + + def send_actions_to_simulation(self) -> torch.Tensor: + """ + Send the latest processed actions to the actuators in the simulation. + Override this function to define how the actions are sent to the simulation. + """ + raise NotImplementedError( + "handle_actions is not implemented for this action manager." + ) + """ Lifecycle Operations """ @@ -237,7 +279,7 @@ def build(self): def step(self, actions: torch.Tensor) -> None: """ - Handle the received actions. + Handle actions received in this step. """ # Action delay buffer if self._delay_step > 0: @@ -248,7 +290,10 @@ def step(self, actions: torch.Tensor) -> None: self._raw_actions = actions if self._actions is None: self._actions = torch.empty_like(actions, device=gs.device) - self._actions[:] = self._raw_actions[:] + + # Process the actions + self._actions[:] = self.process_actions(self._raw_actions[:]) + return self._actions def reset(self, envs_idx: list[int] | None): @@ -262,11 +307,3 @@ def reset(self, envs_idx: list[int] | None): self._action_delay_buffer.append( torch.zeros((self.env.num_envs, self.num_actions), device=gs.device) ) - - def get_actions(self) -> torch.Tensor: - """ - Get the current actions for the environments. - """ - if self._actions is None: - return torch.zeros((self.env.num_envs, self.num_actions)) - return self._actions diff --git a/genesis_forge/managers/action/position_action_manager.py b/genesis_forge/managers/action/position_action_manager.py index d3e58d6..450d381 100644 --- a/genesis_forge/managers/action/position_action_manager.py +++ b/genesis_forge/managers/action/position_action_manager.py @@ -175,31 +175,16 @@ def build(self): offset = self._offset_cfg if self._offset_cfg is not None else 0.0 self._offset_values = self._get_dof_value_tensor(offset) - def step(self, actions: torch.Tensor) -> torch.Tensor: + def process_actions(self, actions: torch.Tensor) -> torch.Tensor: """ - Take the incoming actions for this step and handle them. - - Args: - actions: The incoming step actions to handle. - """ - if not self.enabled: - return - actions = super().step(actions) - self._actions = self.handle_actions(actions) - return self._actions - - def handle_actions(self, actions: torch.Tensor) -> torch.Tensor: - """ - Converts the actions to position commands, and send them to the DOF actuators. - Override this function if you want to change the action handling logic. + Convert the actions to position commands, and clamp them to the limits. Args: actions: The incoming step actions to handle. Returns: - The processed and handled actions. + The actions as position commands. """ - # Validate actions if not self._quiet_action_errors: if torch.isnan(actions).any(): @@ -214,12 +199,15 @@ def handle_actions(self, actions: torch.Tensor) -> torch.Tensor: min=self._clip_values[:, 0], max=self._clip_values[:, 1], ) + return actions - # Set target positions + def send_actions_to_simulation(self, actions: torch.Tensor) -> torch.Tensor: + """ + Sends the actions as position commands to the actuators in the simulation. + """ + actions = self.get_actions() self.actuator_manager.control_dofs_position(actions, self.dofs_idx) - return actions - """ Internal methods """ diff --git a/genesis_forge/managers/action/position_within_limits.py b/genesis_forge/managers/action/position_within_limits.py index 766cc86..1cb3aea 100644 --- a/genesis_forge/managers/action/position_within_limits.py +++ b/genesis_forge/managers/action/position_within_limits.py @@ -18,8 +18,9 @@ class PositionWithinLimitsActionManager(PositionActionManager): actuator_manager: The actuator manager which is used to setup and control the DOF joints. actuator_joints: Which joints of the actuator manager that this action manager will control. These can be full names or regular expressions. - limit: (optional) A dictionary of DOF name patterns and their position limits. + limit: A dictionary of DOF name patterns and their position limits. If omitted, the limits will be set to the limits of the actuators defined in the model. + soft_limit_scale_factor: Scales the range of all limits by this factor to establish a safety region within the limits. Defaults to 1.0. quiet_action_errors: Whether to quiet action errors. delay_step: The number of steps to delay the actions for. This is an easy way to emulate the latency in the system. @@ -76,6 +77,7 @@ def __init__( actuator_joints: list[str] | str = ".*", quiet_action_errors: bool = False, limit: tuple[float, float] | dict[str, tuple[float, float]] = {}, + soft_limit_scale_factor: float = 1.0, delay_step: int = 0, **kwargs, ): @@ -88,6 +90,7 @@ def __init__( **kwargs, ) self._limit_cfg = ensure_dof_pattern(limit) + self._soft_limit_scale_factor = soft_limit_scale_factor """ Lifecycle Operations @@ -103,26 +106,21 @@ def build(self): lower = lower.unsqueeze(0).expand(self.env.num_envs, -1) upper = upper.unsqueeze(0).expand(self.env.num_envs, -1) self._offset = (upper + lower) * 0.5 - self._scale = (upper - lower) * 0.5 + self._scale = (upper - lower) * 0.5 * self._soft_limit_scale_factor - def handle_actions(self, actions: torch.Tensor) -> torch.Tensor: + def process_actions(self, actions: torch.Tensor) -> torch.Tensor: """ - Converts the actions to position commands, and send them to the DOF actuators. - Override this function if you want to change the action handling logic. + Convert the actions to position commands within the limits. Args: actions: The incoming step actions to handle. Returns: - The processed and handled actions. + The actions as position commands. """ # Convert the action from -1 to 1, to absolute position within the actuator limits actions.clamp_(-1.0, 1.0) actions = actions * self._scale + self._offset - - # Set target positions - self.actuator_manager.control_dofs_position(actions, self.dofs_idx) - return actions """ From d0318ce96b825afbb97f595d891dfd6357e8688e Mon Sep 17 00:00:00 2001 From: Jeremy Gillick Date: Thu, 5 Feb 2026 22:22:09 -0800 Subject: [PATCH 2/7] Add override values to the observation manager to make it easier to send deploy values. --- genesis_forge/managers/observation_manager.py | 49 +++++++++++++++---- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/genesis_forge/managers/observation_manager.py b/genesis_forge/managers/observation_manager.py index c3d47b6..671d0cf 100644 --- a/genesis_forge/managers/observation_manager.py +++ b/genesis_forge/managers/observation_manager.py @@ -211,13 +211,29 @@ def build(self): device=gs.device, ) - def get_observations(self) -> torch.Tensor: - """Generate current observations for all environments.""" + def get_observations( + self, values: dict[str, float | torch.Tensor] | None = None + ) -> torch.Tensor: + """ + Generate current observations for all environments. + + When you deploy to a real robot, you can provide the observations directly as a dictionary of values, + and this method will return the formatted & scaled tensor that you can pass to the policy. + + Args: + values: (optional) If provided, use these values instead of fetching observations from the config functions. + It's expected that this dict contains a key for every observation configuration. + These values will be scaled, based on the configuration, but not receive any noise. + This is useful for providing observations for deployment. + + Returns: + The observations for all environments. + """ if not self.enabled: return torch.zeros((self.env.num_envs, self._observation_size)) buffer = self._history.pop() - self._perform_observation(buffer) + self._perform_observation(buffer, values) self._history.insert(0, buffer) # Concatenate the history buffers into the pre-allocated output buffer @@ -249,7 +265,11 @@ def _setup_observation_functions(self) -> int: raise e return size - def _perform_observation(self, output: torch.Tensor) -> torch.Tensor: + def _perform_observation( + self, + output: torch.Tensor, + override_values: dict[str, float | torch.Tensor] | None = None, + ) -> torch.Tensor: """ Perform a round of observations. @@ -257,22 +277,31 @@ def _perform_observation(self, output: torch.Tensor) -> torch.Tensor: output: The output tensor to fill with the observations. """ offset = 0 + has_overrides = override_values is not None for name, cfg in self.cfg.items(): try: # Get values params = cfg.params - value = cfg.fn(env=self.env, **params) + if override_values is not None: + if name not in override_values: + raise ValueError(f"Value '{name}' not found in override values") + value = override_values[name] + if not isinstance(value, torch.Tensor): + value = torch.tensor(value, device=gs.device) + else: + value = cfg.fn(env=self.env, **params) # Apply scale scale = cfg.scale if scale is not None and scale != 1.0: value *= scale - # Add noise - noise = cfg.noise or self.noise - if noise is not None and noise != 0.0: - noise_value = torch.empty_like(value).uniform_(-1, 1) * noise - value += noise_value + # Add noise, if the value is not an override + if not has_overrides: + noise = cfg.noise or self.noise + if noise is not None and noise != 0.0: + noise_value = torch.empty_like(value).uniform_(-1, 1) * noise + value += noise_value # Copy directly into output buffer value_size = value.shape[-1] From 8435b28a06198e05f25b763cd4956e4bb592f246 Mon Sep 17 00:00:00 2001 From: Jeremy Date: Mon, 9 Mar 2026 22:10:12 -0700 Subject: [PATCH 3/7] Genesis 4.0 support --- genesis_forge/managers/contact/kernel.py | 8 ++++---- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/genesis_forge/managers/contact/kernel.py b/genesis_forge/managers/contact/kernel.py index 9424cbf..bdd6639 100644 --- a/genesis_forge/managers/contact/kernel.py +++ b/genesis_forge/managers/contact/kernel.py @@ -1,5 +1,5 @@ -import gstaichi as ti -from genesis.utils.geom import ti_inv_transform_by_quat +import quadrants as ti +from genesis.utils.geom import qd_inv_transform_by_quat @ti.kernel @@ -73,9 +73,9 @@ def kernel_get_contact_forces( # Transform force to local frame of target link if is_target_b: - force_vec = ti_inv_transform_by_quat(force_vec, quat_b) + force_vec = qd_inv_transform_by_quat(force_vec, quat_b) else: - force_vec = ti_inv_transform_by_quat(-force_vec, quat_a) + force_vec = qd_inv_transform_by_quat(-force_vec, quat_a) # Accumulate force and position for j in ti.static(range(3)): diff --git a/pyproject.toml b/pyproject.toml index dabec8f..56f8410 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ description = "A robotics RL training framework for Genesis inspired by Isaac La readme = "README.md" requires-python = ">=3.10" dependencies = [ - "genesis-world>=0.3.4", + "genesis-world>=0.4.0", "gymnasium", "deprecated", "hidapi", From 824c308082b3ad4cc01998bdc8b2635fbe29a9f2 Mon Sep 17 00:00:00 2001 From: Jeremy Date: Mon, 9 Mar 2026 22:11:16 -0700 Subject: [PATCH 4/7] New rewards --- .../managers/actuator/actuator_manager.py | 28 ++++ genesis_forge/managers/config/config_item.py | 9 ++ genesis_forge/managers/config/mdp_fn_class.py | 4 + genesis_forge/managers/reward_manager.py | 5 + genesis_forge/mdp/rewards.py | 138 ++++++++++++++++++ 5 files changed, 184 insertions(+) diff --git a/genesis_forge/managers/actuator/actuator_manager.py b/genesis_forge/managers/actuator/actuator_manager.py index 29e016c..7ac14cd 100644 --- a/genesis_forge/managers/actuator/actuator_manager.py +++ b/genesis_forge/managers/actuator/actuator_manager.py @@ -261,6 +261,34 @@ def get_dofs_force( [lower, upper] = self._robot.get_dofs_force_range(dofs_idx or self.dofs_idx) force = force.clamp(lower, upper) return force + + def get_dofs_control_force( + self, + noise: float = 0.0, + clip_to_max_force: bool = False, + dofs_idx: list[int] | None = None, + ) -> torch.Tensor: + """ + Return the force output by the configured DOFs. + This is a wrapper for `RigidEntity.get_dofs_control_force`. + + Args: + noise: The maximum amount of random noise to add to the force values returned. + clip_to_max_force: Clip the force returned to the maximum force of the actuators. + dofs_idx: The indices of the DOFs to get the force for. If None, all the DOFs of this actuator manager are used. + + Returns: + force: torch.Tensor, shape (n_envs, n_dofs) + The force experienced by the enabled DOFs. + """ + dofs_idx = dofs_idx if dofs_idx is not None else self.dofs_idx + force = self._robot.get_dofs_control_force(dofs_idx) + if noise > 0.0: + force = self._add_random_noise(force, noise) + if clip_to_max_force: + [lower, upper] = self._robot.get_dofs_force_range(dofs_idx or self.dofs_idx) + force = force.clamp(lower, upper) + return force def get_dofs_limits( self, dofs_idx: list[int] | None = None diff --git a/genesis_forge/managers/config/config_item.py b/genesis_forge/managers/config/config_item.py index e612a92..4dd80a0 100644 --- a/genesis_forge/managers/config/config_item.py +++ b/genesis_forge/managers/config/config_item.py @@ -55,6 +55,15 @@ def build(self, **kwargs): return self._init_fn_class() + def reset(self, envs_idx: list[int]): + """ + Reset the function class for the given environments. + No-op if the function is not a class instance. + """ + if not self._is_class: + return + self._fn.reset(envs_idx) + def execute(self, envs_idx: list[int]): """ Call the function for the given environment ids. diff --git a/genesis_forge/managers/config/mdp_fn_class.py b/genesis_forge/managers/config/mdp_fn_class.py index 66e3dba..e39a2c2 100644 --- a/genesis_forge/managers/config/mdp_fn_class.py +++ b/genesis_forge/managers/config/mdp_fn_class.py @@ -22,6 +22,10 @@ def build(self): """Called during the environment build phase and when MDP params are changed.""" pass + def reset(self, envs_idx): + """Called when environments are reset. Override to clear per-env state.""" + pass + def __call__( self, env: GenesisEnv, diff --git a/genesis_forge/managers/reward_manager.py b/genesis_forge/managers/reward_manager.py index 246cd9f..01d4d7a 100644 --- a/genesis_forge/managers/reward_manager.py +++ b/genesis_forge/managers/reward_manager.py @@ -199,6 +199,11 @@ def reset(self, envs_idx: list[int] | None = None): if envs_idx is None: envs_idx = torch.arange(self.env.num_envs, device=gs.device) + # Reset function classes + for cfg in self.cfg.values(): + cfg.reset(envs_idx) + + # Log the reward data if self.enabled and self.logging_enabled: logging_dict = self.env.extras[self.env.extras_logging_key] diff --git a/genesis_forge/mdp/rewards.py b/genesis_forge/mdp/rewards.py index 78cf337..0972405 100644 --- a/genesis_forge/mdp/rewards.py +++ b/genesis_forge/mdp/rewards.py @@ -282,6 +282,113 @@ def action_rate_l2(env: GenesisEnv) -> torch.Tensor: return torch.sum(torch.square(last_actions - actions), dim=1) +class action_acceleration_l2(MdpFnClass): + """ + Penalize the second-order finite difference of actions (discrete acceleration) using the L2 + squared kernel.This targets jittery oscillation rather than smooth consistent movement, + where a smooth ramp has zero acceleration even at high velocity. + + A smooth action ramp looks like this: 0.5 → 0.6 → 0.7 → 0.8 + * Velocities: 0.1, 0.1, 0.1 + * Accelerations: 0.0, 0.0 (zero -- perfectly smooth) + * Penalty: zero + + A jittery action ramp looks like this: 0.5 → 0.8 → 0.5 → 0.8 + * Velocities: 0.3, -0.3, 0.3 + * Accelerations: -0.6, 0.6 (large -- direction keeps reversing) + * Penalty: very large + + The acceleration is computed as: + + .. math:: + + \\text{acc}_t = a_t - 2 \\cdot a_{t-1} + a_{t-2} + + and the penalty is :math:`\\sum \\text{acc}_t^2` across all action dimensions. + + Args: + env: The Genesis environment containing the robot + action_manager: Optional action manager to source actions from. + If not provided, actions are read from ``env.actions``. + """ + + def __init__( + self, + env: GenesisEnv, + action_manager: PositionActionManager = None, + ): + super().__init__(env) + self.env = env + self._prev_action: torch.Tensor | None = None + self._prev_prev_action: torch.Tensor | None = None + self._action_log_count: torch.Tensor | None = None + + def _init_buffers(self, actions: torch.Tensor): + self._prev_action = torch.zeros_like(actions) + self._prev_prev_action = torch.zeros_like(actions) + self._action_log_count = torch.zeros((self.env.num_envs, ), dtype=torch.long, device=gs.device) + + def reset(self, envs_idx): + """ + Clear the action history for the specified environments. + """ + if self._prev_action is None: + return + self._prev_action[envs_idx] = 0.0 + self._prev_prev_action[envs_idx] = 0.0 + self._action_log_count[envs_idx] = 0 + + def __call__( + self, + env: GenesisEnv, + action_manager: PositionActionManager = None, + ) -> torch.Tensor: + # Get the current actions for this step + actions = env.actions + if action_manager is not None: + actions = action_manager.get_actions() + + # Initialize the buffers, if necessary + if self._prev_action is None: + self._init_buffers(actions) + + # Calculate the acceleration + acceleration = actions - 2.0 * self._prev_action + self._prev_prev_action + penalty = torch.sum(torch.square(acceleration), dim=1) + + # Mask out envs that don't yet have two steps of valid history + penalty = penalty * (self._action_log_count >= 2) + + # Shift the actions to the next step + self._prev_prev_action = self._prev_action + self._prev_action = actions.clone() + self._action_log_count.add_(1).clamp_(max=2) + + return penalty + + +def dof_torque_l2( + env: GenesisEnv, + actuator_manager: ActuatorManager, +) -> torch.Tensor: + """ + Penalize joint torque effort using the L2 squared kernel. + + Discourages the policy from applying unnecessary force, particularly when the + robot is near equilibrium. This helps reduce actuator oscillation when the robot + is stationary or moving slowly. + + Args: + env: The Genesis environment containing the robot + actuator_manager: The actuator manager to retrieve DOF forces from. + + Returns: + torch.Tensor: Penalty for joint torque effort, shape (num_envs,) + """ + torque = actuator_manager.get_dofs_control_force() + return torch.sum(torch.square(torque), dim=1) + + """ Velocity Command Rewards """ @@ -490,6 +597,37 @@ def feet_air_time( return reward +def feet_ground_time( + env: GenesisEnv, + contact_manager: ContactManager, + time_threshold: float, +) -> torch.Tensor: + """Penalize brief ground contacts (foot tapping) using a linear kernel. + + Fires at the moment a foot lifts off. The penalty is proportional to how + much the stance duration fell below time_threshold. A proper stance phase + (contact_time >= time_threshold) produces zero penalty. + + Intended to be paired with feet_air_time (positive reward) to fully shape + gait timing: feet_air_time rewards long swings while this penalizes taps. + Use a negative weight in the RewardManager. + + Args: + env: The Genesis Forge environment + contact_manager: The contact manager to check for contact + time_threshold: Contacts shorter than this (in seconds) are penalized. + Set independently from the feet_air_time threshold based + on the expected stance duration of your target gait. + + Returns: + The penalty for brief ground contacts, shape (num_envs,) + """ + just_lifted = contact_manager.has_broken_contact(env.dt) + last_contact_time = contact_manager.last_contact_time + short_contact = (time_threshold - last_contact_time).clamp(min=0.0) * just_lifted + return torch.sum(short_contact, dim=1) + + def feet_slide( env, contact_manager: ContactManager, From 607c63c8b5a75ec53f7fcda57799678c971c3fcd Mon Sep 17 00:00:00 2001 From: Jeremy Date: Mon, 9 Mar 2026 22:11:33 -0700 Subject: [PATCH 5/7] Observation: read_imu --- genesis_forge/mdp/observations.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/genesis_forge/mdp/observations.py b/genesis_forge/mdp/observations.py index ef386b9..0b96c04 100644 --- a/genesis_forge/mdp/observations.py +++ b/genesis_forge/mdp/observations.py @@ -1,4 +1,5 @@ from __future__ import annotations +from genesis import gs import torch from genesis_forge.genesis_env import GenesisEnv from genesis_forge.managers import ( @@ -81,6 +82,16 @@ def entity_projected_gravity( return entity_projected_gravity(entity) +def read_imu(env: GenesisEnv, imu: gs.sensors.IMU) -> torch.Tensor: + """ + Makes an IMU reading and returns the concatenated linear acceleration and angular velocity readings. + + Returns: + torch.Tensor: Shape (n_envs, 6): [lin_acc_xyz, ang_vel_xyz] per env. + """ + value = imu.read() + return torch.cat([value.lin_acc, value.ang_vel], dim=-1) + """ DOF/Join observations """ @@ -196,7 +207,7 @@ def current_actions( def contact_force(env: GenesisEnv, contact_manager: ContactManager) -> torch.Tensor: """ - Returns the normalized contact force at each contact point. + Returns the vector norm contact force at each contact point. Args: env: The Genesis Forge environment @@ -205,3 +216,20 @@ def contact_force(env: GenesisEnv, contact_manager: ContactManager) -> torch.Ten Returns: tensor of shape (num_envs, num_contacts) """ return torch.norm(contact_manager.contacts[:, :, :], dim=-1) + +def has_contact( + env: GenesisEnv, contact_manager: ContactManager, threshold=1.0 +) -> torch.Tensor: + """ + Return boolean (1/0) for each link in the contact manager that meets the contact threshold. + + Args: + env: The Genesis Forge environment + contact_manager: The contact manager to check for contact + threshold: The minimum force necessary for contact detection (default: 1.0) + + Returns: + 1 for each link meeting the contact threshold + """ + has_contact = contact_manager.contacts.norm(dim=-1) > threshold + return has_contact.float() \ No newline at end of file From d8837781822aef97c1008ae63185564a8db302e6 Mon Sep 17 00:00:00 2001 From: Jeremy Date: Mon, 9 Mar 2026 22:12:02 -0700 Subject: [PATCH 6/7] Don't mutate actions --- genesis_forge/managers/action/position_within_limits.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genesis_forge/managers/action/position_within_limits.py b/genesis_forge/managers/action/position_within_limits.py index 1cb3aea..1ccd584 100644 --- a/genesis_forge/managers/action/position_within_limits.py +++ b/genesis_forge/managers/action/position_within_limits.py @@ -119,7 +119,7 @@ def process_actions(self, actions: torch.Tensor) -> torch.Tensor: The actions as position commands. """ # Convert the action from -1 to 1, to absolute position within the actuator limits - actions.clamp_(-1.0, 1.0) + actions = actions.clamp(-1.0, 1.0) actions = actions * self._scale + self._offset return actions From 62d2134270b3f36dd2aec34487c1a50bc106f10a Mon Sep 17 00:00:00 2001 From: Jeremy Date: Mon, 9 Mar 2026 22:12:20 -0700 Subject: [PATCH 7/7] Add soft_limit_scale_factor to position action manager --- genesis_forge/managers/action/base.py | 2 +- .../managers/action/position_action_manager.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/genesis_forge/managers/action/base.py b/genesis_forge/managers/action/base.py index 8a31a84..95a608c 100644 --- a/genesis_forge/managers/action/base.py +++ b/genesis_forge/managers/action/base.py @@ -228,7 +228,7 @@ def get_actions_dict(self, env_idx: int = 0) -> dict[str, float]: """ return { name: value.item() - for name, value in zip[tuple[str, Tensor]]( + for name, value in zip( self.dofs.keys(), self._actions[env_idx, :] ) } diff --git a/genesis_forge/managers/action/position_action_manager.py b/genesis_forge/managers/action/position_action_manager.py index 450d381..56a4367 100644 --- a/genesis_forge/managers/action/position_action_manager.py +++ b/genesis_forge/managers/action/position_action_manager.py @@ -31,6 +31,9 @@ class PositionActionManager(BaseActionManager): offset: Offset factor for the action. use_default_offset: Whether to use default joint positions configured in the articulation asset as offset. Defaults to True. clip: Clip the action values to the range. If omitted, the action values will automatically be clipped to the joint limits. + soft_limit_scale_factor: Scales the clip range of all limits by this factor around the midpoint + of each joint's limits to establish a safety region within the limits. + Defaults to 1.0. quiet_action_errors: Whether to quiet action errors. delay_step: The number of steps to delay the actions for. This is an easy way to emulate the latency in the system. @@ -110,6 +113,7 @@ def __init__( scale: float | dict[str, float] = 1.0, offset: float | dict[str, float] = 0.0, clip: tuple[float, float] | dict[str, tuple[float, float]] = None, + soft_limit_scale_factor: float = 1.0, use_default_offset: bool = True, action_handler: Callable[[torch.Tensor], None] = None, quiet_action_errors: bool = False, @@ -126,6 +130,7 @@ def __init__( self._offset_cfg = ensure_dof_pattern(offset) self._scale_cfg = ensure_dof_pattern(scale) self._clip_cfg = ensure_dof_pattern(clip) + self._soft_limit_scale_factor = soft_limit_scale_factor self._quiet_action_errors = quiet_action_errors self._enabled_dof = None self._use_default_offset = use_default_offset @@ -161,6 +166,11 @@ def build(self): self._clip_values = torch.stack([lower_limit, upper_limit], dim=1) if self._clip_cfg is not None: self._get_dof_value_tensor(self._clip_cfg, output=self._clip_values) + if self._soft_limit_scale_factor != 1.0: + midpoint = (self._clip_values[:, 0] + self._clip_values[:, 1]) * 0.5 + half_range = (self._clip_values[:, 1] - self._clip_values[:, 0]) * 0.5 * self._soft_limit_scale_factor + self._clip_values[:, 0] = midpoint - half_range + self._clip_values[:, 1] = midpoint + half_range # Scale self._scale_values = None