diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index ebfc174f0a7..592a10c9497 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -56,6 +56,7 @@ Guidelines for modifications: * Cathy Y. Li * Cheng-Rong Lai * Chenyu Yang +* Christopher Allred * Connor Smith * CY (Chien-Ying) Chen * David Yang @@ -73,8 +74,10 @@ Guidelines for modifications: * Hongwei Xiong * Hongyu Li * Huihua Zhao +* Isaac Peterson * Iretiayo Akinola * Jack Zeng +* Jacob Hate * Jan Kerner * Jean Tampon * Jeonghwan Kim @@ -162,4 +165,4 @@ Guidelines for modifications: * Hammad Mazhar * Marco Hutter * Yan Chang -* Yashraj Narang +* Yashraj Narang \ No newline at end of file diff --git a/scripts/reinforcement_learning/harl/play.py b/scripts/reinforcement_learning/harl/play.py new file mode 100644 index 00000000000..65cbab9d52d --- /dev/null +++ b/scripts/reinforcement_learning/harl/play.py @@ -0,0 +1,145 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Train an algorithm.""" + +import argparse + +# import numpy as np +import sys +import torch + +from isaaclab.app import AppLauncher + +parser = argparse.ArgumentParser(description="Train an RL agent with HARL.") +parser.add_argument( + "--algorithm", + type=str, + default="happo", + choices=[ + "happo", + "hatrpo", + "haa2c", + "haddpg", + "hatd3", + "hasac", + "had3qn", + "maddpg", + "matd3", + "mappo", + "happo_adv", + ], + help="Algorithm name. Choose from: happo, hatrpo, haa2c, haddpg, hatd3, hasac, had3qn, maddpg, matd3, mappo.", +) +parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") +parser.add_argument("--task", type=str, default=None, help="Name of the task.") +parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") +parser.add_argument("--num_env_steps", type=int, default=None, help="RL Policy training iterations.") +parser.add_argument("--dir", type=str, default=None, help="folder with trained models") +parser.add_argument("--debug", action="store_true", help="whether to run in debug mode for visualization") + +# append AppLauncher cli args +AppLauncher.add_app_launcher_args(parser) +# parse the arguments +args_cli, hydra_args = parser.parse_known_args() + +# clear out sys.argv for Hydra +sys.argv = [sys.argv[0]] + hydra_args + +# launch omniverse app +app_launcher = AppLauncher(args_cli) +simulation_app = app_launcher.app + +from harl.runners import RUNNER_REGISTRY + +from isaaclab.envs import DirectMARLEnvCfg, DirectRLEnvCfg, ManagerBasedRLEnvCfg + +import isaaclab_tasks # noqa: F401 +from isaaclab_tasks.utils.hydra import hydra_task_config + +algorithm = args_cli.algorithm.lower() +agent_cfg_entry_point = f"harl_{algorithm}_cfg_entry_point" + + +@hydra_task_config(args_cli.task, agent_cfg_entry_point) +def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: dict): + args = args_cli.__dict__ + + args["env"] = "isaaclab" + args["algo"] = args["algorithm"] + args["exp_name"] = "play" + + algo_args = agent_cfg + + algo_args["eval"]["use_eval"] = False + algo_args["render"]["use_render"] = True + algo_args["train"]["model_dir"] = args["dir"] + + env_args = {} + env_cfg.scene.num_envs = args["num_envs"] + env_args["task"] = args["task"] + env_args["config"] = env_cfg + env_args["video_settings"] = {} + env_args["video_settings"]["video"] = False + env_args["headless"] = args["headless"] + env_args["debug"] = args["debug"] + + # create runner + runner = RUNNER_REGISTRY[args["algo"]](args, algo_args, env_args) + + obs, _, _ = runner.env.reset() + + max_action_space = 0 + + for agent_id, obs_space in runner.env.action_space.items(): + if obs_space.shape[0] > max_action_space: + max_action_space = obs_space.shape[0] + + actions = torch.zeros((args["num_envs"], runner.num_agents, max_action_space), dtype=torch.float32, device="cuda:0") + rnn_states = torch.zeros( + ( + args["num_envs"], + runner.num_agents, + runner.recurrent_n, + runner.rnn_hidden_size, + ), + dtype=torch.float32, + device="cuda:0", + ) + masks = torch.ones( + (args["num_envs"], runner.num_agents, 1), + dtype=torch.float32, + ) + + while simulation_app.is_running(): + with torch.inference_mode(): + for agent_id, (_, agent_obs) in enumerate(obs.items()): + action, _, rnn_state = runner.actor[agent_id].get_actions( + agent_obs, rnn_states[:, agent_id, :], masks[:, agent_id, :], None, None + ) + action_space = action.shape[1] + actions[:, agent_id, :action_space] = action + rnn_states[:, agent_id, :] = rnn_state + + obs, _, _, dones, _, _ = runner.env.step(actions) + + dones_env = torch.all(dones, dim=1) + masks = torch.ones((args["num_envs"], runner.num_agents, 1), dtype=torch.float32, device="cuda:0") + masks[dones_env] = 0.0 + rnn_states[dones_env] = torch.zeros( + ((dones_env).sum(), runner.num_agents, runner.recurrent_n, runner.rnn_hidden_size), + dtype=torch.float32, + device="cuda:0", + ) + + if runner.env.unwrapped.sim._number_of_steps >= args["num_env_steps"]: + break + + runner.env.close() + + +if __name__ == "__main__": + main() + simulation_app.close() diff --git a/scripts/reinforcement_learning/harl/train.py b/scripts/reinforcement_learning/harl/train.py new file mode 100644 index 00000000000..ba4eb471de2 --- /dev/null +++ b/scripts/reinforcement_learning/harl/train.py @@ -0,0 +1,138 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Train an algorithm.""" + +import argparse +import sys +import time + +from isaaclab.app import AppLauncher + +parser = argparse.ArgumentParser(description="Train an RL agent with HARL.") +parser.add_argument("--video", action="store_true", help="Record videos during training.") +parser.add_argument("--video_length", type=int, default=500, help="Length of the recorded video (in steps).") +parser.add_argument("--video_interval", type=int, default=20000, help="Interval between video recordings (in steps).") +parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") +parser.add_argument("--task", type=str, default=None, help="Name of the task.") +parser.add_argument("--seed", type=int, default=1, help="Seed used for the environment") +parser.add_argument("--save_interval", type=int, default=None, help="How often to save the model") +parser.add_argument("--save_checkpoints", action="store_true", default=False, help="Whether or not to save checkpoints") +parser.add_argument("--checkpoint_interval", type=int, default=200, help="How often to save a model checkpoint") +parser.add_argument("--log_interval", type=int, default=None, help="How often to log outputs") +parser.add_argument("--exp_name", type=str, default="test", help="Name of the Experiment") +parser.add_argument("--num_env_steps", type=int, default=None, help="RL Policy training iterations.") +parser.add_argument("--dir", type=str, default=None, help="folder with trained models") +parser.add_argument("--debug", action="store_true", help="whether to run in debug mode for visualization") +parser.add_argument( + "--adversarial_training_mode", + default="parallel", + choices=["parallel", "ladder", "leapfrog"], + help=( + "the mode type for adversarial training, note on ladder training with teams that are" + " composed of heterogeneous agents, the two teams must place the robots in the same order in their environment " + " for ladder to work" + ), +) +parser.add_argument( + "--adversarial_training_iterations", + default=50_000_000, + type=int, + help="the number of iterations to swap training for adversarial modes like ladder and leapfrog", +) + +parser.add_argument( + "--algorithm", + type=str, + default="happo", + choices=["happo", "hatrpo", "haa2c", "mappo", "mappo_unshare", "happo_adv"], + help="Algorithm name. Choose from: happo, hatrpo, haa2c, mappo, and mappo_unshare.", +) + +# append AppLauncher cli args +AppLauncher.add_app_launcher_args(parser) +# parse the arguments +args_cli, hydra_args = parser.parse_known_args() +# always enable cameras to record video +if args_cli.video: + args_cli.enable_cameras = True + +# clear out sys.argv for Hydra +sys.argv = [sys.argv[0]] + hydra_args + +# launch omniverse app +app_launcher = AppLauncher(args_cli) +simulation_app = app_launcher.app + +import os + +from harl.runners import RUNNER_REGISTRY + +from isaaclab.envs import DirectMARLEnvCfg, DirectRLEnvCfg, ManagerBasedRLEnvCfg + +import isaaclab_tasks # noqa: F401 +from isaaclab_tasks.utils.hydra import hydra_task_config + +algorithm = args_cli.algorithm.lower() +agent_cfg_entry_point = f"harl_{algorithm}_cfg_entry_point" + + +@hydra_task_config(args_cli.task, agent_cfg_entry_point) +def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: dict): + + args = args_cli.__dict__ + + args["env"] = "isaaclab" + + args["algo"] = args["algorithm"] + + algo_args = agent_cfg + + algo_args["eval"]["use_eval"] = False + algo_args["train"]["n_rollout_threads"] = args["num_envs"] + algo_args["train"]["num_env_steps"] = args["num_env_steps"] + algo_args["train"]["eval_interval"] = args["save_interval"] + algo_args["train"]["save_checkpoints"] = args["save_checkpoints"] + algo_args["train"]["checkpoint_interval"] = args["checkpoint_interval"] + algo_args["train"]["log_interval"] = args["log_interval"] + algo_args["train"]["model_dir"] = args["dir"] + algo_args["seed"]["specify_seed"] = True + algo_args["seed"]["seed"] = args["seed"] + algo_args["algo"]["adversarial_training_mode"] = args["adversarial_training_mode"] + algo_args["algo"]["adversarial_training_iterations"] = args["adversarial_training_iterations"] + + env_args = {} + env_cfg.scene.num_envs = args["num_envs"] + env_args["task"] = args["task"] + env_args["config"] = env_cfg + hms_time = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + env_args["video_settings"] = { + "video": bool(args["video"]), + "video_length": args["video_length"], + "video_interval": args["video_interval"], + "log_dir": os.path.join( + algo_args["logger"]["log_dir"], + "isaaclab", + args["task"], + args["algorithm"], + args["exp_name"], + "-".join(["seed-{:0>5}".format(agent_cfg["seed"]["seed"]), hms_time]), + "videos", + ), + } + + env_args["headless"] = args["headless"] + env_args["debug"] = args["debug"] + + # create runner + + runner = RUNNER_REGISTRY[args["algo"]](args, algo_args, env_args) + runner.run() + runner.close() + + +if __name__ == "__main__": + main() # type: ignore + simulation_app.close() diff --git a/source/isaaclab_rl/setup.py b/source/isaaclab_rl/setup.py index 173c8257c73..3c1b2b58b2a 100644 --- a/source/isaaclab_rl/setup.py +++ b/source/isaaclab_rl/setup.py @@ -46,7 +46,8 @@ "rl-games @ git+https://github.com/isaac-sim/rl_games.git@python3.11", "gym", ], # rl-games still needs gym :( - "rsl-rl": ["rsl-rl-lib==3.0.1", "onnxscript>=0.5"], # linux aarch 64 requires manual onnxscript installation + "rsl-rl": ["rsl-rl-lib==3.0.1", "onnxscript>=0.5"], # linux aarch 64 requires manual onnxscript installation, + "harl": ["harl@git+https://github.com/DIRECTLab/HARL.git"], } # Add the names with hyphens as aliases for convenience EXTRAS_REQUIRE["rl_games"] = EXTRAS_REQUIRE["rl-games"] diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/anymal_c/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/direct/anymal_c/__init__.py index 6c7759c049c..5cfb90ed6ed 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/anymal_c/__init__.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/anymal_c/__init__.py @@ -4,7 +4,7 @@ # SPDX-License-Identifier: BSD-3-Clause """ -Ant locomotion environment. +Anymal-C environment. """ import gymnasium as gym @@ -15,6 +15,16 @@ # Register Gym environments. ## +gym.register( + id="Isaac-Velocity-Flat-Anymal-C-Happo-Direct-v0", + entry_point=f"{__name__}.anymal_c_happo_env:AnymalCHappoEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.anymal_c_happo_env:AnymalCHappoEnvCfg", + "harl_happo_cfg_entry_point": f"{agents.__name__}:harl_happo_cfg.yaml", + }, +) + gym.register( id="Isaac-Velocity-Flat-Anymal-C-Direct-v0", entry_point=f"{__name__}.anymal_c_env:AnymalCEnv", diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/anymal_c/anymal_c_happo_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/anymal_c/anymal_c_happo_env.py new file mode 100644 index 00000000000..5d9eb26c614 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/anymal_c/anymal_c_happo_env.py @@ -0,0 +1,419 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import copy +import torch + +import isaaclab.envs.mdp as mdp +import isaaclab.sim as sim_utils +from isaaclab.assets import Articulation, ArticulationCfg +from isaaclab.envs import DirectMARLEnv, DirectMARLEnvCfg +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.markers import VisualizationMarkers, VisualizationMarkersCfg +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.sensors import ContactSensor, ContactSensorCfg +from isaaclab.sim import SimulationCfg +from isaaclab.terrains import TerrainImporterCfg +from isaaclab.utils import configclass +from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR +from isaaclab.utils.math import quat_from_angle_axis + +## +# Pre-defined configs +## +from isaaclab_assets.robots.anymal import ANYMAL_C_CFG # isort: skip + + +@configclass +class EventCfg: + """Configuration for randomization.""" + + physics_material_0 = EventTerm( + func=mdp.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot_0", body_names=".*"), + "static_friction_range": (0.8, 0.8), + "dynamic_friction_range": (0.6, 0.6), + "restitution_range": (0.0, 0.0), + "num_buckets": 64, + }, + ) + + add_base_mass_0 = EventTerm( + func=mdp.randomize_rigid_body_mass, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot_0", body_names="base"), + "mass_distribution_params": (-5.0, 5.0), + "operation": "add", + }, + ) + + +def define_markers() -> VisualizationMarkers: + marker_cfg = VisualizationMarkersCfg( + prim_path="/Visuals/myMarkers", + markers={ + "sphere1": sim_utils.SphereCfg( + radius=0.1, + visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(0.8, 0.0, 0.0)), + ), + "sphere2": sim_utils.SphereCfg( + radius=0.1, + visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(0.0, 1.0, 1.0)), + ), + "arrow1": sim_utils.UsdFileCfg( + usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/UIElements/arrow_x.usd", + scale=(0.1, 0.1, 1.0), + visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(0.8, 0.0, 0.0)), + ), + "arrow2": sim_utils.UsdFileCfg( + usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/UIElements/arrow_x.usd", + scale=(0.1, 0.1, 1.0), + visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(0.0, 1.0, 1.0)), + ), + }, + ) + return VisualizationMarkers(marker_cfg) + + +@configclass +class AnymalCHappoEnvCfg(DirectMARLEnvCfg): + # env + episode_length_s = 20.0 + decimation = 4 + action_scale = 0.5 + action_space = 12 + action_spaces = {f"robot_{i}": 12 for i in range(1)} + # observation_space = 48 + observation_space = 48 + observation_spaces = {f"robot_{i}": 48 for i in range(1)} + state_space = 0 + state_spaces = {f"robot_{i}": 0 for i in range(1)} + possible_agents = ["robot_0"] + + # simulation + sim: SimulationCfg = SimulationCfg( + dt=1 / 200, + render_interval=decimation, + physics_material=sim_utils.RigidBodyMaterialCfg( + friction_combine_mode="multiply", + restitution_combine_mode="multiply", + static_friction=1.0, + dynamic_friction=1.0, + restitution=0.0, + ), + ) + terrain = TerrainImporterCfg( + prim_path="/World/ground", + terrain_type="plane", + collision_group=-1, + physics_material=sim_utils.RigidBodyMaterialCfg( + friction_combine_mode="multiply", + restitution_combine_mode="multiply", + static_friction=1.0, + dynamic_friction=1.0, + restitution=0.0, + ), + debug_vis=False, + ) + + # scene + scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=1, env_spacing=6.0, replicate_physics=True) + + # events + events: EventCfg = EventCfg() + + # robot + robot_0: ArticulationCfg = ANYMAL_C_CFG.replace(prim_path="/World/envs/env_.*/Robot_0") + contact_sensor_0: ContactSensorCfg = ContactSensorCfg( + prim_path="/World/envs/env_.*/Robot_0/.*", history_length=3, update_period=0.005, track_air_time=True + ) + robot_0.init_state.rot = (1.0, 0.0, 0.0, 0.0) + robot_0.init_state.pos = (0.0, 0.0, 0.5) + + # reward scales (override from flat config) + flat_orientation_reward_scale = 0.0 + + # reward scales + lin_vel_reward_scale = 1.0 + yaw_rate_reward_scale = 0.5 + z_vel_reward_scale = -2.0 + ang_vel_reward_scale = -0.05 + joint_torque_reward_scale = -2.5e-5 + joint_accel_reward_scale = -2.5e-7 + action_rate_reward_scale = -0.01 + feet_air_time_reward_scale = 0.5 + undesired_contact_reward_scale = -1.0 + flat_orientation_reward_scale = -5.0 + + +class AnymalCHappoEnv(DirectMARLEnv): + cfg: AnymalCHappoEnvCfg + + def __init__(self, cfg: AnymalCHappoEnvCfg, render_mode: str | None = None, **kwargs): + super().__init__(cfg, render_mode, **kwargs) + # Joint position command (deviation from default joint positions) + + self.actions = { + agent: torch.zeros(self.num_envs, action_space, device=self.device) + for agent, action_space in self.cfg.action_spaces.items() + } + self.previous_actions = { + agent: torch.zeros(self.num_envs, action_space, device=self.device) + for agent, action_space in self.cfg.action_spaces.items() + } + # X/Y linear velocity and yaw angular velocity commands + # self._commands = {agent : torch.zeros(self.num_envs, 3, device=self.device) for agent in self.cfg.possible_agents} + self._commands = torch.zeros(self.num_envs, 3, device=self.device) + + # Logging + self._episode_sums = { + key: torch.zeros(self.num_envs, dtype=torch.float, device=self.device) + for key in [ + "track_lin_vel_xy_exp", + "track_ang_vel_z_exp", + "lin_vel_z_l2", + "ang_vel_xy_l2", + "dof_torques_l2", + "dof_acc_l2", + "action_rate_l2", + "feet_air_time", + "undesired_contacts", + "flat_orientation_l2", + ] + } + + self.base_ids = {} + self.feet_ids = {} + self.undesired_body_contact_ids = {} + + for robot_id, contact_sensor in self.contact_sensors.items(): + _base_id, _ = contact_sensor.find_bodies("base") + _feet_ids, _ = contact_sensor.find_bodies(".*FOOT") + _undesired_contact_body_ids, _ = contact_sensor.find_bodies(".*THIGH") + self.base_ids[robot_id] = _base_id + self.feet_ids[robot_id] = _feet_ids + self.undesired_body_contact_ids[robot_id] = _undesired_contact_body_ids + + def _setup_scene(self): + self.num_robots = sum(1 for key in self.cfg.__dict__.keys() if "robot_" in key) + self.robots = {} + self.contact_sensors = {} + self.height_scanners = {} + self.my_visualizer = define_markers() + + for i in range(self.num_robots): + self.robots[f"robot_{i}"] = Articulation(self.cfg.__dict__["robot_" + str(i)]) + self.scene.articulations[f"robot_{i}"] = self.robots[f"robot_{i}"] + self.contact_sensors[f"robot_{i}"] = ContactSensor(self.cfg.__dict__["contact_sensor_" + str(i)]) + self.scene.sensors[f"robot_{i}"] = self.contact_sensors[f"robot_{i}"] + + self.cfg.terrain.num_envs = self.scene.cfg.num_envs + self.cfg.terrain.env_spacing = self.scene.cfg.env_spacing + self._terrain = self.cfg.terrain.class_type(self.cfg.terrain) + # clone, filter, and replicate + self.scene.clone_environments(copy_from_source=False) + self.scene.filter_collisions(global_prim_paths=[self.cfg.terrain.prim_path]) + # add lights + light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75)) + light_cfg.func("/World/Light", light_cfg) + + def _pre_physics_step(self, actions: torch.Tensor): + # We need to process the actions for each scene independently + self.processed_actions = {} + for robot_id, robot in self.robots.items(): + self.actions[robot_id] = actions[robot_id].clone() + self.processed_actions[robot_id] = ( + self.cfg.action_scale * self.actions[robot_id] + robot.data.default_joint_pos + ) + + def _apply_action(self): + for robot_id, robot in self.robots.items(): + robot.set_joint_position_target(self.processed_actions[robot_id]) + + def _get_observations(self) -> dict: + self.previous_actions = copy.deepcopy(self.actions) + obs = {} + + for robot_id, robot in self.robots.items(): + obs[robot_id] = torch.cat( + [ + tensor + for tensor in ( + robot.data.root_lin_vel_b, + robot.data.root_ang_vel_b, + robot.data.projected_gravity_b, + self._commands, + robot.data.joint_pos - robot.data.default_joint_pos, + robot.data.joint_vel, + self.actions[robot_id], + ) + if tensor is not None + ], + dim=-1, + ) + + return obs + + def _draw_markers(self, command): + xy_commands = command.clone() + z_commands = xy_commands[:, 2].clone() + xy_commands[:, 2] = 0 + + marker_ids = torch.concat( + [ + 0 * torch.zeros(2 * self._commands.shape[0]), + 1 * torch.ones(self._commands.shape[0]), + 2 * torch.ones(self._commands.shape[0]), + 3 * torch.ones(self._commands.shape[0]), + ], + dim=0, + ) + + robot_pos = self.robots["robot_0"].data.root_pos_w.clone() + robot_yaw = self.robots["robot_0"].data.root_ang_vel_b[:, 2].clone() + + scale1 = torch.ones((self._commands.shape[0], 3), device=self.device) + scale1[:, 0] = torch.abs(z_commands) + + scale2 = torch.ones((self._commands.shape[0], 3), device=self.device) + scale2[:, 0] = torch.abs(robot_yaw) + + offset1 = torch.zeros((self._commands.shape[0], 3), device=self.device) + offset1[:, 1] = 0 + + offset2 = torch.zeros((self._commands.shape[0], 3), device=self.device) + offset2[:, 1] = 0 + + marker_scales = torch.concat( + [torch.ones((3 * self._commands.shape[0], 3), device=self.device), scale1, scale2], dim=0 + ) + + marker_locations = torch.concat( + [ + robot_pos, + robot_pos + xy_commands, + robot_pos + self.robots["robot_0"].data.root_lin_vel_b, + robot_pos + offset1, + robot_pos + offset2, + ], + dim=0, + ) + + _90 = (-3.14 / 2) * torch.ones(self._commands.shape[0]).to(self.device) + + marker_orientations = quat_from_angle_axis( + torch.concat( + [ + torch.zeros(3 * self._commands.shape[0]).to(self.device), + torch.sign(z_commands) * _90, + torch.sign(robot_yaw) * _90, + ], + dim=0, + ), + torch.tensor([0.0, 1.0, 0.0], device=self.device), + ) + + self.my_visualizer.visualize( + marker_locations, marker_orientations, scales=marker_scales, marker_indices=marker_ids + ) + + def _get_rewards(self) -> dict: + self._draw_markers(self._commands) + # linear velocity tracking + lin_vel_error = torch.sum( + torch.square(self._commands[:, :2] - self.robots["robot_0"].data.root_lin_vel_b[:, :2]), dim=1 + ) + lin_vel_error_mapped = torch.exp(-lin_vel_error / 0.25) + # yaw rate tracking + yaw_rate_error = torch.square(self._commands[:, 2] - self.robots["robot_0"].data.root_ang_vel_b[:, 2]) + yaw_rate_error_mapped = torch.exp(-yaw_rate_error / 0.25) + # z velocity tracking + z_vel_error = torch.square(self.robots["robot_0"].data.root_lin_vel_b[:, 2]) + # angular velocity x/y + ang_vel_error = torch.sum(torch.square(self.robots["robot_0"].data.root_ang_vel_b[:, :2]), dim=1) + # joint torques + joint_torques = torch.sum(torch.square(self.robots["robot_0"].data.applied_torque), dim=1) + # joint acceleration + joint_accel = torch.sum(torch.square(self.robots["robot_0"].data.joint_acc), dim=1) + # action rate + action_rate = torch.sum(torch.square(self.actions["robot_0"] - self.previous_actions["robot_0"]), dim=1) + # feet air time + first_contact = self.contact_sensors["robot_0"].compute_first_contact(self.step_dt)[:, self.feet_ids["robot_0"]] + last_air_time = self.contact_sensors["robot_0"].data.last_air_time[:, self.feet_ids["robot_0"]] + air_time = torch.sum((last_air_time - 0.5) * first_contact, dim=1) * ( + torch.norm(self._commands[:, :2], dim=1) > 0.1 + ) + # undesired contacts + net_contact_forces = self.contact_sensors["robot_0"].data.net_forces_w_history + is_contact = ( + torch.max(torch.norm(net_contact_forces[:, :, self.undesired_body_contact_ids["robot_0"]], dim=-1), dim=1)[ + 0 + ] + > 1.0 + ) + contacts = torch.sum(is_contact, dim=1) + # flat orientation + flat_orientation = torch.sum(torch.square(self.robots["robot_0"].data.projected_gravity_b[:, :2]), dim=1) + + rewards = { + "track_lin_vel_xy_exp": lin_vel_error_mapped * self.cfg.lin_vel_reward_scale * self.step_dt, + "track_ang_vel_z_exp": yaw_rate_error_mapped * self.cfg.yaw_rate_reward_scale * self.step_dt, + "lin_vel_z_l2": z_vel_error * self.cfg.z_vel_reward_scale * self.step_dt, + "ang_vel_xy_l2": ang_vel_error * self.cfg.ang_vel_reward_scale * self.step_dt, + "dof_torques_l2": joint_torques * self.cfg.joint_torque_reward_scale * self.step_dt, + "dof_acc_l2": joint_accel * self.cfg.joint_accel_reward_scale * self.step_dt, + "action_rate_l2": action_rate * self.cfg.action_rate_reward_scale * self.step_dt, + "feet_air_time": air_time * self.cfg.feet_air_time_reward_scale * self.step_dt, + "undesired_contacts": contacts * self.cfg.undesired_contact_reward_scale * self.step_dt, + "flat_orientation_l2": flat_orientation * self.cfg.flat_orientation_reward_scale * self.step_dt, + } + reward = torch.sum(torch.stack(list(rewards.values())), dim=0) + # Logging + for key, value in rewards.items(): + self._episode_sums[key] += value + return {"robot_0": reward} + + def _get_dones(self) -> tuple[dict, dict]: + time_out = self.episode_length_buf >= self.max_episode_length - 1 + net_contact_forces = self.contact_sensors["robot_0"].data.net_forces_w_history + died = torch.any( + torch.max(torch.norm(net_contact_forces[:, :, self.base_ids["robot_0"]], dim=-1), dim=1)[0] > 1.0, dim=1 + ) + return {"robot_0": died}, {"robot_0": time_out} + + def _reset_idx(self, env_ids: torch.Tensor | None): + if env_ids is None or len(env_ids) == self.num_envs: + env_ids = self.robots["robot_0"]._ALL_INDICES + self.robots["robot_0"].reset(env_ids) + super()._reset_idx(env_ids) + if len(env_ids) == self.num_envs: + # Spread out the resets to avoid spikes in training when many environments reset at a similar time + self.episode_length_buf[:] = torch.randint_like(self.episode_length_buf, high=int(self.max_episode_length)) + self.actions["robot_0"][env_ids] = 0.0 + self.previous_actions["robot_0"][env_ids] = 0.0 + # Sample new commands + self._commands[env_ids] = torch.zeros_like(self._commands[env_ids]).uniform_(-1.0, 1.0) + # Reset robot state + joint_pos = self.robots["robot_0"].data.default_joint_pos[env_ids] + joint_vel = self.robots["robot_0"].data.default_joint_vel[env_ids] + default_root_state = self.robots["robot_0"].data.default_root_state[env_ids] + default_root_state[:, :3] += self._terrain.env_origins[env_ids] + self.robots["robot_0"].write_root_pose_to_sim(default_root_state[:, :7], env_ids) + self.robots["robot_0"].write_root_velocity_to_sim(default_root_state[:, 7:], env_ids) + self.robots["robot_0"].write_joint_state_to_sim(joint_pos, joint_vel, None, env_ids) + # Logging + extras = dict() + for key in self._episode_sums.keys(): + episodic_sum_avg = torch.mean(self._episode_sums[key][env_ids]) + extras["Episode_Reward/" + key] = episodic_sum_avg / self.max_episode_length_s + self._episode_sums[key][env_ids] = 0.0 + self.extras["log"] = dict() + self.extras["log"].update(extras) diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/multi_agent_coordination/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/direct/multi_agent_coordination/__init__.py new file mode 100644 index 00000000000..1a072ce5318 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/multi_agent_coordination/__init__.py @@ -0,0 +1,38 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" +Multi Agent Envs. +""" + +import gymnasium as gym + +from . import agents +from .anymal_c_multi_agent_bar_env import AnymalCMultiAgentBar, AnymalCMultiAgentBarEnvCfg +from .h1_anymal_push_env import HeterogeneousPushMultiAgentEnv, HeterogeneousPushMultiAgentEnvCfg + +## +# Register Gym environments. +## + +gym.register( + id="Isaac-Multi-Agent-Anymal-C-Bar-Direct-v0", + entry_point=AnymalCMultiAgentBar, + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": AnymalCMultiAgentBarEnvCfg, + "harl_happo_cfg_entry_point": f"{agents.__name__}:harl_happo_cfg.yaml", + }, +) + +gym.register( + id="Isaac-Multi-Agent-H1-Anymal-C-Push-Direct-v0", + entry_point=HeterogeneousPushMultiAgentEnv, + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": HeterogeneousPushMultiAgentEnvCfg, + "harl_happo_cfg_entry_point": f"{agents.__name__}:harl_happo_cfg.yaml", + }, +) diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/multi_agent_coordination/agents/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/direct/multi_agent_coordination/agents/__init__.py new file mode 100644 index 00000000000..2e924fbf1b1 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/multi_agent_coordination/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/multi_agent_coordination/agents/harl_happo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/multi_agent_coordination/agents/harl_happo_cfg.yaml new file mode 100644 index 00000000000..fd72a6f48c1 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/multi_agent_coordination/agents/harl_happo_cfg.yaml @@ -0,0 +1,130 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +# This is the configuration file for the HAPPO algorithm. +seed: + # whether to use the specified seed + seed_specify: True + # seed + seed: 1 +device: + # whether to use CUDA + cuda: True + # whether to set CUDA deterministic + cuda_deterministic: True + # arg to torch.set_num_threads + torch_threads: 4 + # if you have multiple GPUs, you can specify which GPU to use + gpu_id: 0 +train: + # number of parallel environments for training data collection + n_rollout_threads: 20 + # number of total training steps + num_env_steps: 10000000 + # number of steps per environment per training data collection + episode_length: 200 + # logging interval, also the interval by which the average done reward is checked + log_interval: 5 + # evaluation interval + eval_interval: 25 + # whether to use ValueNorm + use_valuenorm: True + # whether to use linear learning rate decay + use_linear_lr_decay: False + # whether to consider the case of truncation when an episode is done + use_proper_time_limits: True + # if set, load models from this directory; otherwise, randomly initialise the models + model_dir: # + # whether or not to save the entire model, or just the state dict + save_entire_model: False +eval: + # whether to use evaluation + use_eval: True + # number of parallel environments for evaluation + n_eval_rollout_threads: 10 + # number of episodes per evaluation + eval_episodes: 20 +render: + # whether to use render + use_render: False + # number of episodes to render + render_episodes: 10 +model: + # network parameters + # hidden sizes for mlp module in the network + hidden_sizes: [128, 128] + # activation function, choose from sigmoid, tanh, relu, leaky_relu, selu + activation_func: relu + # whether to use feature normalization + use_feature_normalization: True + # initialization method for network parameters, choose from xavier_uniform_, orthogonal_, ... + initialization_method: orthogonal_ + # gain of the output layer of the network. + gain: 0.01 + # recurrent parameters + # whether to use rnn policy (data is not chunked for training) + use_naive_recurrent_policy: False + # whether to use rnn policy (data is chunked for training) + use_recurrent_policy: False + # number of recurrent layers + recurrent_n: 1 + # length of data chunk; only useful when use_recurrent_policy is True; episode_length has to be a multiple of data_chunk_length + data_chunk_length: 10 + # optimizer parameters + # actor learning rate + lr: 0.0005 + # critic learning rate + critic_lr: 0.0005 + # eps in Adam + opti_eps: 0.00001 + # weight_decay in Adam + weight_decay: 0 + # parameters of diagonal Gaussian distribution + std_x_coef: 1 + # parameters of diagonal Gaussian distribution + std_y_coef: 0.5 +algo: + # ppo parameters + # number of epochs for actor update + ppo_epoch: 5 + # number of epochs for critic update + critic_epoch: 5 + # whether to use clipped value loss + use_clipped_value_loss: True + # clip parameter + clip_param: 0.2 + # number of mini-batches per epoch for actor update + actor_num_mini_batch: 1 + # number of mini-batches per epoch for critic update + critic_num_mini_batch: 1 + # coefficient for entropy term in actor loss + entropy_coef: 0.01 + # coefficient for value loss + value_loss_coef: 1 + # whether to clip gradient norm + use_max_grad_norm: True + # max gradient norm + max_grad_norm: 10.0 + # whether to use Generalized Advantage Estimation (GAE) + use_gae: True + # discount factor + gamma: 0.99 + # GAE lambda + gae_lambda: 0.95 + # whether to use huber loss + use_huber_loss: True + # whether to use policy active masks + use_policy_active_masks: True + # huber delta + huber_delta: 10.0 + # method of aggregating the probability of multi-dimensional actions, choose from prod, mean + action_aggregation: prod + # whether to share parameter among actors + share_param: False + # whether to use a fixed optimisation order + fixed_order: False +logger: + # logging directory + log_dir: "./results" diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/multi_agent_coordination/anymal_c_multi_agent_bar_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/multi_agent_coordination/anymal_c_multi_agent_bar_env.py new file mode 100644 index 00000000000..c7e5473bcdb --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/multi_agent_coordination/anymal_c_multi_agent_bar_env.py @@ -0,0 +1,460 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import copy +import torch + +import isaaclab.envs.mdp as mdp +import isaaclab.sim as sim_utils +from isaaclab.assets import Articulation, ArticulationCfg, RigidObject, RigidObjectCfg +from isaaclab.envs import DirectMARLEnv, DirectMARLEnvCfg +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.markers import VisualizationMarkers, VisualizationMarkersCfg +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.sensors import ContactSensor, ContactSensorCfg +from isaaclab.sim import SimulationCfg +from isaaclab.terrains import TerrainImporterCfg +from isaaclab.utils import configclass +from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR +from isaaclab.utils.math import quat_from_angle_axis + +## +# Pre-defined configs +## +from isaaclab_assets.robots.anymal import ANYMAL_C_CFG # isort: skip + + +@configclass +class EventCfg: + """Configuration for randomization.""" + + physics_material_0 = EventTerm( + func=mdp.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot_0", body_names=".*"), + "static_friction_range": (0.8, 0.8), + "dynamic_friction_range": (0.6, 0.6), + "restitution_range": (0.0, 0.0), + "num_buckets": 64, + }, + ) + + physics_material_1 = EventTerm( + func=mdp.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot_1", body_names=".*"), + "static_friction_range": (0.8, 0.8), + "dynamic_friction_range": (0.6, 0.6), + "restitution_range": (0.0, 0.0), + "num_buckets": 64, + }, + ) + + add_base_mass_0 = EventTerm( + func=mdp.randomize_rigid_body_mass, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot_0", body_names="base"), + "mass_distribution_params": (-5.0, 5.0), + "operation": "add", + }, + ) + + add_base_mass_1 = EventTerm( + func=mdp.randomize_rigid_body_mass, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot_1", body_names="base"), + "mass_distribution_params": (-5.0, 5.0), + "operation": "add", + }, + ) + + +def define_markers() -> VisualizationMarkers: + marker_cfg = VisualizationMarkersCfg( + prim_path="/Visuals/myMarkers", + markers={ + "sphere1": sim_utils.SphereCfg( + radius=0.1, + visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(0.8, 0.0, 0.0)), + ), + "sphere2": sim_utils.SphereCfg( + radius=0.1, + visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(0.0, 1.0, 1.0)), + ), + "arrow1": sim_utils.UsdFileCfg( + usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/UIElements/arrow_x.usd", + scale=(0.1, 0.1, 1.0), + visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(0.8, 0.0, 0.0)), + ), + "arrow2": sim_utils.UsdFileCfg( + usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/UIElements/arrow_x.usd", + scale=(0.1, 0.1, 1.0), + visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(0.0, 1.0, 1.0)), + ), + }, + ) + return VisualizationMarkers(marker_cfg) + + +@configclass +class AnymalCMultiAgentBarEnvCfg(DirectMARLEnvCfg): + # env + episode_length_s = 20.0 + decimation = 4 + action_scale = 0.5 + action_space = 12 + action_spaces = {f"robot_{i}": 12 for i in range(2)} + # observation_space = 48 + observation_space = 48 + observation_spaces = {f"robot_{i}": 48 for i in range(2)} + state_space = 0 + state_spaces = {f"robot_{i}": 0 for i in range(2)} + possible_agents = ["robot_0", "robot_1"] + + # simulation + sim: SimulationCfg = SimulationCfg( + dt=1 / 200, + render_interval=decimation, + physics_material=sim_utils.RigidBodyMaterialCfg( + friction_combine_mode="multiply", + restitution_combine_mode="multiply", + static_friction=1.0, + dynamic_friction=1.0, + restitution=0.0, + ), + ) + terrain = TerrainImporterCfg( + prim_path="/World/ground", + terrain_type="plane", + collision_group=-1, + physics_material=sim_utils.RigidBodyMaterialCfg( + friction_combine_mode="multiply", + restitution_combine_mode="multiply", + static_friction=1.0, + dynamic_friction=1.0, + restitution=0.0, + ), + debug_vis=False, + ) + + # scene + scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=1, env_spacing=6.0, replicate_physics=True) + + # events + events: EventCfg = EventCfg() + + # robot + robot_0: ArticulationCfg = ANYMAL_C_CFG.replace(prim_path="/World/envs/env_.*/Robot_0") + contact_sensor_0: ContactSensorCfg = ContactSensorCfg( + prim_path="/World/envs/env_.*/Robot_0/.*", history_length=3, update_period=0.005, track_air_time=True + ) + robot_0.init_state.rot = (1.0, 0.0, 0.0, 1) + robot_0.init_state.pos = (-1.0, 0.0, 0.5) + + robot_1: ArticulationCfg = ANYMAL_C_CFG.replace(prim_path="/World/envs/env_.*/Robot_1") + contact_sensor_1: ContactSensorCfg = ContactSensorCfg( + prim_path="/World/envs/env_.*/Robot_1/.*", history_length=3, update_period=0.005, track_air_time=True + ) + robot_1.init_state.rot = (1.0, 0.0, 0.0, 1) + robot_1.init_state.pos = (1.0, 0.0, 0.5) + + # rec prism + cfg_rec_prism = RigidObjectCfg( + prim_path="/World/envs/env_.*/Object", + spawn=sim_utils.CuboidCfg( + size=(5, 0.1, 0.1), + rigid_props=sim_utils.RigidBodyPropertiesCfg(), + mass_props=sim_utils.MassPropertiesCfg(mass=0.01), # changed from 1.0 to 0.5 + collision_props=sim_utils.CollisionPropertiesCfg(), + visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(0.0, 1.0, 0.0)), + ), + init_state=RigidObjectCfg.InitialStateCfg( + pos=(0.0, 0, 0.61), rot=(1.0, 0.0, 0.0, 0.0) + ), # started the bar lower + ) + + # reward scales + lin_vel_reward_scale = 1.0 + yaw_rate_reward_scale = 1.0 + + bar_z_min_pos = 0.6 + + anymal_min_z_pos = 0.1 + max_bar_roll_angle_rad = 1 + + +class AnymalCMultiAgentBar(DirectMARLEnv): + cfg: AnymalCMultiAgentBarEnvCfg + + def __init__(self, cfg: AnymalCMultiAgentBarEnvCfg, render_mode: str | None = None, **kwargs): + super().__init__(cfg, render_mode, **kwargs) + # Joint position command (deviation from default joint positions) + + self.actions = { + agent: torch.zeros(self.num_envs, action_space, device=self.device) + for agent, action_space in self.cfg.action_spaces.items() + } + self.previous_actions = { + agent: torch.zeros(self.num_envs, action_space, device=self.device) + for agent, action_space in self.cfg.action_spaces.items() + } + # X/Y linear velocity and yaw angular velocity commands + self._commands = torch.zeros(self.num_envs, 3, device=self.device) + + # Logging + self._episode_sums = { + key: torch.zeros(self.num_envs, dtype=torch.float, device=self.device) + for key in [ + "track_lin_vel_xy_exp", + "track_ang_vel_z_exp", + ] + } + + self.base_ids = {} + self.feet_ids = {} + self.undesired_body_contact_ids = {} + + for robot_id, contact_sensor in self.contact_sensors.items(): + _base_id, _ = contact_sensor.find_bodies("base") + _feet_ids, _ = contact_sensor.find_bodies(".*FOOT") + _undesired_contact_body_ids, _ = contact_sensor.find_bodies(".*THIGH") + self.base_ids[robot_id] = _base_id + self.feet_ids[robot_id] = _feet_ids + self.undesired_body_contact_ids[robot_id] = _undesired_contact_body_ids + + def _setup_scene(self): + self.num_robots = sum(1 for key in self.cfg.__dict__.keys() if "robot_" in key) + self.robots = {} + self.contact_sensors = {} + self.height_scanners = {} + self.object = RigidObject(self.cfg.cfg_rec_prism) + self.my_visualizer = define_markers() + self.scene.rigid_objects["object"] = self.object + + for i in range(self.num_robots): + self.robots[f"robot_{i}"] = Articulation(self.cfg.__dict__["robot_" + str(i)]) + self.scene.articulations[f"robot_{i}"] = self.robots[f"robot_{i}"] + self.contact_sensors[f"robot_{i}"] = ContactSensor(self.cfg.__dict__["contact_sensor_" + str(i)]) + self.scene.sensors[f"robot_{i}"] = self.contact_sensors[f"robot_{i}"] + + self.cfg.terrain.num_envs = self.scene.cfg.num_envs + self.cfg.terrain.env_spacing = self.scene.cfg.env_spacing + self._terrain = self.cfg.terrain.class_type(self.cfg.terrain) + # clone, filter, and replicate + self.scene.clone_environments(copy_from_source=False) + self.scene.filter_collisions(global_prim_paths=[self.cfg.terrain.prim_path]) + # add lights + light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75)) + light_cfg.func("/World/Light", light_cfg) + + def _pre_physics_step(self, actions: torch.Tensor): + # We need to process the actions for each scene independently + self.processed_actions = copy.deepcopy(actions) + for robot_id, robot in self.robots.items(): + self.actions[robot_id] = actions[robot_id].clone() + self.processed_actions[robot_id] = ( + self.cfg.action_scale * self.actions[robot_id] + robot.data.default_joint_pos + ) + + def _apply_action(self): + for robot_id, robot in self.robots.items(): + robot.set_joint_position_target(self.processed_actions[robot_id]) + + def _get_observations(self) -> dict: + self.previous_actions = copy.deepcopy(self.actions) + + obs = {} + + for robot_id, robot in self.robots.items(): + obs[robot_id] = torch.cat( + [ + tensor + for tensor in ( + robot.data.root_com_lin_vel_b, + robot.data.root_com_ang_vel_b, + robot.data.projected_gravity_b, + self._commands, + robot.data.joint_pos - robot.data.default_joint_pos, + robot.data.joint_vel, + self.actions[robot_id], + ) + if tensor is not None + ], + dim=-1, + ) + # obs = torch.cat(obs, dim=0) + # observations = {"policy": obs} + return obs + + def get_y_euler_from_quat(self, quaternion): + w, x, y, z = quaternion[:, 0], quaternion[:, 1], quaternion[:, 2], quaternion[:, 3] + y_euler_angle = torch.arcsin(2 * (w * y - z * x)) + return y_euler_angle + + def _draw_markers(self, command): + xy_commands = command.clone() + z_commands = xy_commands[:, 2].clone() + xy_commands[:, 2] = 0 + + marker_ids = torch.concat( + [ + 0 * torch.zeros(2 * self._commands.shape[0]), + 1 * torch.ones(self._commands.shape[0]), + 2 * torch.ones(self._commands.shape[0]), + 3 * torch.ones(self._commands.shape[0]), + ], + dim=0, + ) + + bar_pos = self.object.data.body_com_pos_w.squeeze(1).clone() + bar_yaw = self.object.data.root_com_ang_vel_b[:, 2].clone() + + scale1 = torch.ones((self._commands.shape[0], 3), device=self.device) + scale1[:, 0] = torch.abs(z_commands) + + scale2 = torch.ones((self._commands.shape[0], 3), device=self.device) + scale2[:, 0] = torch.abs(bar_yaw) + + offset1 = torch.zeros((self._commands.shape[0], 3), device=self.device) + offset1[:, 1] = 0 + + offset2 = torch.zeros((self._commands.shape[0], 3), device=self.device) + offset2[:, 1] = 0 + + _90 = (-3.14 / 2) * torch.ones(self._commands.shape[0]).to(self.device) + + marker_orientations = quat_from_angle_axis( + torch.concat( + [ + torch.zeros(3 * self._commands.shape[0]).to(self.device), + torch.sign(z_commands) * _90, + torch.sign(bar_yaw) * _90, + ], + dim=0, + ), + torch.tensor([0.0, 1.0, 0.0], device=self.device), + ) + + marker_scales = torch.concat( + [torch.ones((3 * self._commands.shape[0], 3), device=self.device), scale1, scale2], dim=0 + ) + + marker_locations = torch.concat( + [ + bar_pos, + bar_pos + xy_commands, + bar_pos + self.object.data.root_com_lin_vel_b, + bar_pos + offset1, + bar_pos + offset2, + ], + dim=0, + ) + + self.my_visualizer.visualize( + marker_locations, marker_orientations, scales=marker_scales, marker_indices=marker_ids + ) + + def _get_rewards(self) -> dict: + bar_commands = torch.stack([-self._commands[:, 1], self._commands[:, 0], self._commands[:, 2]]).t() + + self._draw_markers(bar_commands) + + # xy linear velocity tracking + lin_vel_error = torch.sum(torch.square(bar_commands[:, :2] - self.object.data.root_com_lin_vel_b[:, :2]), dim=1) + lin_vel_error_mapped = torch.exp(-lin_vel_error) + + # yaw rate tracking + yaw_rate_error = torch.square(self._commands[:, 2] - self.object.data.root_com_ang_vel_b[:, 2]) + yaw_rate_error_mapped = torch.exp(-yaw_rate_error) + + rewards = { + "track_lin_vel_xy_exp": lin_vel_error_mapped * self.cfg.lin_vel_reward_scale * self.step_dt, + "track_ang_vel_z_exp": yaw_rate_error_mapped * self.cfg.yaw_rate_reward_scale * self.step_dt, + } + reward = torch.sum(torch.stack(list(rewards.values())), dim=0) + + # Logging + for key, value in rewards.items(): + self._episode_sums[key] += value + + return {"robot_0": reward, "robot_1": reward} + + def _get_anymal_fallen(self): + agent_dones = [] + + for _, robot in self.robots.items(): + died = robot.data.body_com_pos_w[:, 0, 2].view(-1) < self.cfg.anymal_min_z_pos + agent_dones.append(died) + + return torch.any(torch.stack(agent_dones), dim=0) + + def _get_bar_fallen(self): + bar_z_pos = self.object.data.body_com_pos_w[:, :, 2].view(-1) + bar_roll_angle = torch.abs(self.get_y_euler_from_quat(self.object.data.root_com_quat_w)) + + bar_angle_maxes = bar_roll_angle > self.cfg.max_bar_roll_angle_rad + bar_fallen = bar_z_pos < self.cfg.bar_z_min_pos + + return torch.logical_or(bar_angle_maxes, bar_fallen) + + def _get_timeouts(self): + return self.episode_length_buf >= self.max_episode_length - 1 + + def _get_dones(self) -> tuple[dict, dict]: + time_out = self._get_timeouts() + anymal_fallen = self._get_anymal_fallen() + bar_fallen = self._get_bar_fallen() + + dones = torch.logical_or(anymal_fallen, bar_fallen) + + return {key: time_out for key in self.robots.keys()}, {key: dones for key in self.robots.keys()} + + def _reset_idx(self, env_ids: torch.Tensor): + super()._reset_idx(env_ids) + + object_default_state = self.object.data.default_root_state.clone()[env_ids] + object_default_state[:, 0:3] = object_default_state[:, 0:3] + self.scene.env_origins[env_ids] + self.object.write_root_state_to_sim(object_default_state, env_ids) + self.object.reset(env_ids) + + for agent, _ in self.cfg.action_spaces.items(): + self.actions[agent][env_ids] = 0.0 + self.previous_actions[agent][env_ids] = 0.0 + + self._commands[env_ids] = torch.zeros_like(self._commands[env_ids]).uniform_(-1.0, 1.0) + + for _, robot in self.robots.items(): + if env_ids is None or len(env_ids) == self.num_envs: + env_ids = robot._ALL_INDICES + robot.reset(env_ids) + if len(env_ids) == self.num_envs: + # Spread out the resets to avoid spikes in training when many environments reset at a similar time + self.episode_length_buf[:] = torch.randint_like( + self.episode_length_buf, high=int(self.max_episode_length) + ) + + # Reset robot state + joint_pos = robot.data.default_joint_pos[env_ids] + joint_vel = robot.data.default_joint_vel[env_ids] + default_root_state = robot.data.default_root_state[env_ids] + default_root_state[:, :3] += self._terrain.env_origins[env_ids] + robot.write_root_pose_to_sim(default_root_state[:, :7], env_ids) + robot.write_root_velocity_to_sim(default_root_state[:, 7:], env_ids) + robot.write_joint_state_to_sim(joint_pos, joint_vel, None, env_ids) + # Logging + extras = dict() + for key in self._episode_sums.keys(): + episodic_sum_avg = torch.mean(self._episode_sums[key][env_ids]) + extras["Episode_Reward_s/" + key] = episodic_sum_avg / self.max_episode_length_s + self._episode_sums[key][env_ids] = 0.0 + self.extras["log"] = dict() + self.extras["log"].update(extras) diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/multi_agent_coordination/h1_anymal_push_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/multi_agent_coordination/h1_anymal_push_env.py new file mode 100644 index 00000000000..4628868467b --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/multi_agent_coordination/h1_anymal_push_env.py @@ -0,0 +1,688 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import copy +import torch + +import isaacsim.core.utils.torch as torch_utils +from isaacsim.core.utils.torch.rotations import compute_heading_and_up, compute_rot, quat_conjugate + +import isaaclab.envs.mdp as mdp +import isaaclab.sim as sim_utils +from isaaclab.assets import Articulation, ArticulationCfg, RigidObject, RigidObjectCfg +from isaaclab.envs import DirectMARLEnv, DirectMARLEnvCfg +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.markers import VisualizationMarkers, VisualizationMarkersCfg +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.sensors import ContactSensor, ContactSensorCfg +from isaaclab.sim import SimulationCfg +from isaaclab.terrains import TerrainImporterCfg +from isaaclab.utils import configclass +from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR +from isaaclab.utils.math import quat_from_angle_axis + +## +# Pre-defined configs +## +from isaaclab_assets.robots.anymal import ANYMAL_C_CFG # isort: skip +from isaaclab_assets.robots.unitree import H1_CFG # isort: skip + + +def normalize_angle(x): + return torch.atan2(torch.sin(x), torch.cos(x)) + + +@configclass +class EventCfg: + """Configuration for randomization.""" + + physics_material_0 = EventTerm( + func=mdp.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot_0", body_names=".*"), + "static_friction_range": (0.8, 0.8), + "dynamic_friction_range": (0.6, 0.6), + "restitution_range": (0.0, 0.0), + "num_buckets": 64, + }, + ) + + physics_material_1 = EventTerm( + func=mdp.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot_1", body_names=".*"), + "static_friction_range": (0.8, 0.8), + "dynamic_friction_range": (0.6, 0.6), + "restitution_range": (0.0, 0.0), + "num_buckets": 64, + }, + ) + + add_base_mass_0 = EventTerm( + func=mdp.randomize_rigid_body_mass, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot_0", body_names="base"), + "mass_distribution_params": (-5.0, 5.0), + "operation": "add", + }, + ) + + add_base_mass_1 = EventTerm( + func=mdp.randomize_rigid_body_mass, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot_1", body_names="pelvis"), + "mass_distribution_params": (-5.0, 5.0), + "operation": "add", + }, + ) + + +@configclass +class HeterogeneousPushMultiAgentEnvCfg(DirectMARLEnvCfg): + # env + episode_length_s = 20.0 + decimation = 4 + anymal_action_scale = 0.5 + action_space = 12 + action_spaces = {"robot_0": 12, "robot_1": 19} + + observation_spaces = {"robot_0": 48, "robot_1": 72} + state_space = 0 + state_spaces = {f"robot_{i}": 0 for i in range(2)} + possible_agents = ["robot_0", "robot_1"] + + # simulation + sim: SimulationCfg = SimulationCfg( + dt=1 / 200, + render_interval=decimation, + physics_material=sim_utils.RigidBodyMaterialCfg( + friction_combine_mode="multiply", + restitution_combine_mode="multiply", + static_friction=1.0, + dynamic_friction=1.0, + restitution=0.0, + ), + ) + terrain = TerrainImporterCfg( + prim_path="/World/ground", + terrain_type="plane", + collision_group=-1, + physics_material=sim_utils.RigidBodyMaterialCfg( + friction_combine_mode="multiply", + restitution_combine_mode="multiply", + static_friction=1.0, + dynamic_friction=1.0, + restitution=0.0, + ), + debug_vis=False, + ) + + # scene + scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=1, env_spacing=6.0, replicate_physics=True) + + # events + events: EventCfg = EventCfg() + + # robot + robot_0: ArticulationCfg = ANYMAL_C_CFG.replace(prim_path="/World/envs/env_.*/Robot_0") + contact_sensor_0: ContactSensorCfg = ContactSensorCfg( + prim_path="/World/envs/env_.*/Robot_0/.*", history_length=3, update_period=0.005, track_air_time=True + ) + robot_0.init_state.rot = (1.0, 0.0, 0.0, 1.0) + robot_0.init_state.pos = (-1.0, 0.0, 0.5) + + robot_1: ArticulationCfg = H1_CFG.replace(prim_path="/World/envs/env_.*/Robot_1") + robot_1.init_state.rot = (1.0, 0.0, 0.0, 1.0) + robot_1.init_state.pos = (1.0, 0.0, 1.0) + + # rec prism + cfg_rec_prism = RigidObjectCfg( + prim_path="/World/envs/env_.*/Object", + spawn=sim_utils.CuboidCfg( + size=(3, 2, 2), + rigid_props=sim_utils.RigidBodyPropertiesCfg(), + mass_props=sim_utils.MassPropertiesCfg(mass=10), + collision_props=sim_utils.CollisionPropertiesCfg(), + visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(0.0, 1.0, 0.0)), + # visual_material=sim_utils.GlassMdlCfg(glass_color=(0.0, 1.0, 0.0), frosting_roughness=0.7), + ), + init_state=RigidObjectCfg.InitialStateCfg(pos=(0.0, 2, 0.6), rot=(1.0, 0.0, 0.0, 0.0)), # started the bar lower + ) + + # reward scales (override from flat config) + flat_orientation_reward_scale = 0.0 + + # reward scales + lin_vel_reward_scale = 1.0 + yaw_rate_reward_scale = 0.5 + z_vel_reward_scale = -2.0 + ang_vel_reward_scale = -0.05 + joint_torque_reward_scale = -2.5e-5 + joint_accel_reward_scale = -2.5e-7 + action_rate_reward_scale = -0.01 + feet_air_time_reward_scale = 0.5 + undersired_contact_reward_scale = -1.0 + flat_orientation_reward_scale = -5.0 + flat_bar_roll_angle_reward_scale = -1.0 + angular_velocity_scale: float = 0.25 + dof_vel_scale: float = 0.1 + h1_action_scale = 1.0 + termination_height: float = 0.8 + anymal_min_z_pos = 0.3 + h1_min_z_pos = 0.8 + + joint_gears: list = [ + 50.0, # left_hip_yaw + 50.0, # right_hip_yaw + 50.0, # torso + 50.0, # left_hip_roll + 50.0, # right_hip_roll + 50.0, # left_shoulder_pitch + 50.0, # right_shoulder_pitch + 50.0, # left_hip_pitch + 50.0, # right_hip_pitch + 50.0, # left_shoulder_roll + 50.0, # right_shoulder_roll + 50.0, # left_knee + 50.0, # right_knee + 50.0, # left_shoulder_yaw + 50.0, # right_shoulder_yaw + 50.0, # left_ankle + 50.0, # right_ankle + 50.0, # left_elbow + 50.0, # right_elbow + ] + + +def define_markers() -> VisualizationMarkers: + marker_cfg = VisualizationMarkersCfg( + prim_path="/Visuals/myMarkers", + markers={ + "sphere1": sim_utils.SphereCfg( + radius=0.15, + visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(0.8, 0.0, 0.0)), + ), + "sphere2": sim_utils.SphereCfg( + radius=0.15, + visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(0.0, 1.0, 1.0)), + ), + "arrow1": sim_utils.UsdFileCfg( + usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/UIElements/arrow_x.usd", + scale=(0.1, 0.1, 1.0), + visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(0.8, 0.0, 0.0)), + ), + "arrow2": sim_utils.UsdFileCfg( + usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/UIElements/arrow_x.usd", + scale=(0.1, 0.1, 1.0), + visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(0.0, 1.0, 1.0)), + ), + }, + ) + return VisualizationMarkers(marker_cfg) + + +class HeterogeneousPushMultiAgentEnv(DirectMARLEnv): + cfg: HeterogeneousPushMultiAgentEnvCfg + + def __init__( + self, + cfg: HeterogeneousPushMultiAgentEnvCfg, + render_mode: str | None = None, + **kwargs, + ): + super().__init__(cfg, render_mode, **kwargs) + # Joint position command (deviation from default joint positions) + self.actions = { + agent: torch.zeros(self.num_envs, action_space, device=self.device) + for agent, action_space in self.cfg.action_spaces.items() + } + self.previous_actions = { + agent: torch.zeros(self.num_envs, action_space, device=self.device) + for agent, action_space in self.cfg.action_spaces.items() + } + self.base_bodies = ["base", "pelvis"] + # X/Y linear velocity and yaw angular velocity commands + self._commands = torch.zeros(self.num_envs, 3, device=self.device) + self._joint_dof_idx, _ = self.robots["robot_1"].find_joints(".*") + + # Logging + self._episode_sums = { + key: torch.zeros(self.num_envs, dtype=torch.float32, device=self.device) + for key in [ + "track_lin_vel_xy_exp", + "track_ang_vel_z_exp", + ] + } + + self.base_ids = {} + self.feet_ids = {} + self.undesired_body_contact_ids = {} + + for idx, (robot_id, contact_sensor) in enumerate(self.contact_sensors.items()): + _base_id, _ = contact_sensor.find_bodies(self.base_bodies[idx]) + _feet_ids, _ = contact_sensor.find_bodies(".*FOOT") + _undesired_contact_body_ids, _ = contact_sensor.find_bodies(".*THIGH") + self.base_ids[robot_id] = _base_id + self.feet_ids[robot_id] = _feet_ids + self.undesired_body_contact_ids[robot_id] = _undesired_contact_body_ids + + self.potentials = torch.zeros(self.num_envs, dtype=torch.float32, device=self.sim.device) + self.prev_potentials = torch.zeros_like(self.potentials) + self.targets = torch.tensor([1000, 0, 0], dtype=torch.float32, device=self.sim.device).repeat( + (self.num_envs, 1) + ) + self.joint_gears = torch.tensor(self.cfg.joint_gears, dtype=torch.float32, device=self.sim.device) + self.targets = torch.tensor([1000, 0, 0], dtype=torch.float32, device=self.sim.device).repeat( + (self.num_envs, 1) + ) + self.targets += self.scene.env_origins + self.heading_vec = torch.tensor([1, 0, 0], dtype=torch.float32, device=self.sim.device).repeat( + (self.num_envs, 1) + ) + self.up_vec = torch.tensor([0, 0, 1], dtype=torch.float32, device=self.sim.device).repeat((self.num_envs, 1)) + self.start_rotation = torch.tensor([1, 0, 0, 0], device=self.sim.device, dtype=torch.float32) + self.inv_start_rot = quat_conjugate(self.start_rotation).repeat((self.num_envs, 1)) + self.basis_vec0 = self.heading_vec.clone() + self.basis_vec1 = self.up_vec.clone() + + def _setup_scene(self): + self.num_robots = sum(1 for key in self.cfg.__dict__.keys() if "robot_" in key) + self.robots = {} + self.contact_sensors = {} + self.height_scanners = {} + self.my_visualizer = define_markers() + self.object = RigidObject(self.cfg.cfg_rec_prism) + + self.scene.rigid_objects["object"] = self.object + + for i in range(self.num_robots): + robot_id = f"robot_{i}" + if robot_id in self.cfg.__dict__: + self.robots[f"robot_{i}"] = Articulation(self.cfg.__dict__["robot_" + str(i)]) + self.scene.articulations[f"robot_{i}"] = self.robots[f"robot_{i}"] + + contact_sensor_id = "contact_sensor_" + str(i) + + if contact_sensor_id in self.cfg.__dict__: + self.contact_sensors[f"robot_{i}"] = ContactSensor(self.cfg.__dict__["contact_sensor_" + str(i)]) + self.scene.sensors[f"robot_{i}"] = self.contact_sensors[f"robot_{i}"] + + self.cfg.terrain.num_envs = self.scene.cfg.num_envs + self.cfg.terrain.env_spacing = self.scene.cfg.env_spacing + self._terrain = self.cfg.terrain.class_type(self.cfg.terrain) + # clone, filter, and replicate + self.scene.clone_environments(copy_from_source=False) + self.scene.filter_collisions(global_prim_paths=[self.cfg.terrain.prim_path]) + # add lights + light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75)) + light_cfg.func("/World/Light", light_cfg) + + def _pre_physics_step(self, actions: dict): + # We need to process the actions for each scene independently + self.processed_actions = copy.deepcopy(actions) + + robot_id = "robot_0" + robot_action_space = self.action_spaces[robot_id].shape[0] + self.actions[robot_id] = actions[robot_id][:, :robot_action_space].clone() + self.processed_actions[robot_id] = ( + self.cfg.anymal_action_scale * self.actions[robot_id] + self.robots[robot_id].data.default_joint_pos + ) + + robot_id = "robot_1" + self.actions[robot_id] = actions[robot_id].clone() + + def _get_anymal_fallen(self): + agent_dones = [] + robot = self.robots["robot_0"] + died = robot.data.body_com_pos_w[:, 0, 2].view(-1) < self.cfg.anymal_min_z_pos + agent_dones.append(died) + + robot = self.robots["robot_1"] + died = robot.data.body_com_pos_w[:, 0, 2].view(-1) < self.cfg.h1_min_z_pos + agent_dones.append(died) + + return torch.any(torch.stack(agent_dones), dim=0) + + def _apply_action(self): + + robot_id = "robot_0" + self.robots[robot_id].set_joint_position_target(self.processed_actions[robot_id]) + + robot_id = "robot_1" + forces = self.cfg.h1_action_scale * self.joint_gears * self.actions[robot_id] + self.robots[robot_id].set_joint_effort_target(forces, joint_ids=self._joint_dof_idx) + + def _compute_intermediate_values(self): + self.torso_position, self.torso_rotation = ( + self.robots["robot_1"].data.root_link_pos_w, + self.robots["robot_1"].data.root_link_quat_w, + ) + self.velocity, self.ang_velocity = ( + self.robots["robot_1"].data.root_com_lin_vel_w, + self.robots["robot_1"].data.root_com_ang_vel_w, + ) + self.dof_pos, self.dof_vel = self.robots["robot_1"].data.joint_pos, self.robots["robot_1"].data.joint_vel + + ( + self.up_proj, + self.heading_proj, + self.up_vec, + self.heading_vec, + self.vel_loc, + self.angvel_loc, + self.roll, + self.pitch, + self.yaw, + self.angle_to_target, + self.dof_pos_scaled, + self.prev_potentials, + self.potentials, + ) = compute_intermediate_values( + self.targets, + self.torso_position, + self.torso_rotation, + self.velocity, + self.ang_velocity, + self.dof_pos, + self.robots["robot_1"].data.soft_joint_pos_limits[0, :, 0], + self.robots["robot_1"].data.soft_joint_pos_limits[0, :, 1], + self.inv_start_rot, + self.basis_vec0, + self.basis_vec1, + self.potentials, + self.prev_potentials, + self.cfg.sim.dt, + ) + + def _get_observations(self) -> dict: + self.previous_actions = copy.deepcopy(self.actions) + + obs = {} + + robot_id = "robot_0" + robot = self.robots[robot_id] + # anymal_commands = torch.zeros_like(self._commands) + obs[robot_id] = torch.cat( + [ + tensor + for tensor in ( + robot.data.root_com_lin_vel_b, + robot.data.root_com_ang_vel_b, + robot.data.projected_gravity_b, + self._commands, + robot.data.joint_pos - robot.data.default_joint_pos, + robot.data.joint_vel, + None, + self.actions[robot_id], + ) + if tensor is not None + ], + dim=-1, + ) + + robot_id = "robot_1" + robot = self.robots[robot_id] + obs[robot_id] = torch.cat( + ( + self.torso_position[:, 2].view(-1, 1), + self.vel_loc, + self.angvel_loc * self.cfg.angular_velocity_scale, + normalize_angle(self.yaw).unsqueeze(-1), + normalize_angle(self.roll).unsqueeze(-1), + normalize_angle(self.angle_to_target).unsqueeze(-1), + self.up_proj.unsqueeze(-1), + self.heading_proj.unsqueeze(-1), + self.dof_pos_scaled, + self.dof_vel * self.cfg.dof_vel_scale, + self.actions[robot_id], + self._commands, + ), + dim=-1, + ) + return obs + + def get_y_euler_from_quat(self, quaternion): + w, x, y, z = quaternion[:, 0], quaternion[:, 1], quaternion[:, 2], quaternion[:, 3] + y_euler_angle = torch.arcsin(2 * (w * y - z * x)) + return y_euler_angle + + def _draw_markers(self, command): + xy_commands = command.clone() + z_commands = xy_commands[:, 2].clone() + xy_commands[:, 2] = 0 + + marker_ids = torch.concat( + [ + 0 * torch.zeros(2 * self._commands.shape[0]), + 1 * torch.ones(self._commands.shape[0]), + 2 * torch.ones(self._commands.shape[0]), + 3 * torch.ones(self._commands.shape[0]), + ], + dim=0, + ) + + bar_pos = self.object.data.body_com_pos_w.squeeze(1).clone() + bar_yaw = self.object.data.root_com_ang_vel_b[:, 2].clone() + + scale1 = torch.ones((self._commands.shape[0], 3), device=self.device) + scale1[:, 0] = torch.abs(z_commands) + + scale2 = torch.ones((self._commands.shape[0], 3), device=self.device) + scale2[:, 0] = torch.abs(bar_yaw) + + offset1 = torch.zeros((self._commands.shape[0], 3), device=self.device) + offset1[:, 1] = 0 + + offset2 = torch.zeros((self._commands.shape[0], 3), device=self.device) + offset2[:, 1] = 0 + + _90 = (-3.14 / 2) * torch.ones(self._commands.shape[0]).to(self.device) + + marker_orientations = quat_from_angle_axis( + torch.concat( + [ + torch.zeros(3 * self._commands.shape[0]).to(self.device), + torch.sign(z_commands) * _90, + torch.sign(bar_yaw) * _90, + ], + dim=0, + ), + torch.tensor([0.0, 1.0, 0.0], device=self.device), + ) + + marker_scales = torch.concat( + [torch.ones((3 * self._commands.shape[0], 3), device=self.device), scale1, scale2], dim=0 + ) + + obj_vel = self.object.data.root_com_lin_vel_b.clone() + obj_vel[:, 2] = 0 + + offset = torch.tensor([0, 2, 0], device=self.device) + + marker_locations = torch.concat( + [ + bar_pos + offset, + bar_pos + xy_commands + offset, + bar_pos + obj_vel + offset, + bar_pos + offset1 + offset, + bar_pos + offset2 + offset, + ], + dim=0, + ) + + self.my_visualizer.visualize( + marker_locations, marker_orientations, scales=marker_scales, marker_indices=marker_ids + ) + + def _get_rewards(self) -> dict: + reward = {} + + bar_commands = torch.stack([-self._commands[:, 1], self._commands[:, 0], self._commands[:, 2]]).t() + self._draw_markers(bar_commands) + obj_xy_vel = self.object.data.root_com_lin_vel_b[:, :2] + + # linear velocity tracking + lin_vel_error = torch.sum(torch.square(bar_commands[:, :2] - obj_xy_vel), dim=1) # changing this to the bar + lin_vel_error_mapped = torch.exp(-lin_vel_error) + + # angular velocity tracking + yaw_rate_error = torch.square(self._commands[:, 2] - self.object.data.root_com_ang_vel_b[:, 2]) + yaw_rate_error_mapped = torch.exp(-yaw_rate_error) + + rewards = { + "track_lin_vel_xy_exp": lin_vel_error_mapped * self.cfg.lin_vel_reward_scale * self.step_dt, + "track_ang_vel_z_exp": yaw_rate_error_mapped * self.cfg.yaw_rate_reward_scale * self.step_dt, + } + reward = torch.sum(torch.stack(list(rewards.values())), dim=0) + + for key, val in rewards.items(): + self._episode_sums[key] += val + + return {"robot_0": reward, "robot_1": reward} + + def _get_too_far_away(self): + anymal_pos = self.robots["robot_0"].data.body_com_pos_w[:, 0, :] + h1_pos = self.robots["robot_1"].data.body_com_pos_w[:, 0, :] + + box_pos = self.object.data.body_com_pos_w[:, 0, :] + + anymal_too_far = ( + torch.sqrt(torch.square(anymal_pos[:, 0] - box_pos[:, 0]) + torch.square(anymal_pos[:, 1] - box_pos[:, 1])) + > 3 + ) + h1_too_far = ( + torch.sqrt(torch.square(h1_pos[:, 0] - box_pos[:, 0]) + torch.square(h1_pos[:, 1] - box_pos[:, 1])) > 3 + ) + + return torch.logical_or(anymal_too_far, h1_too_far) + + def _get_dones(self) -> tuple[dict, dict]: + self._compute_intermediate_values() + time_out = self.episode_length_buf >= self.max_episode_length - 1 + h1_died = self.torso_position[:, 2] < self.cfg.termination_height + anymal_fallen = self._get_anymal_fallen() + too_far = self._get_too_far_away() + dones = torch.logical_or(h1_died, anymal_fallen) + dones = torch.logical_or(dones, too_far) + return {key: time_out for key in self.robots.keys()}, {key: dones for key in self.robots.keys()} + + def _reset_idx(self, env_ids: torch.Tensor | None): + super()._reset_idx(env_ids) + object_default_state = self.object.data.default_root_state.clone()[env_ids] + object_default_state[:, 0:3] = object_default_state[:, 0:3] + self.scene.env_origins[env_ids] + self.object.write_root_state_to_sim(object_default_state, env_ids) + self.object.reset(env_ids) + + if len(env_ids) == self.num_envs: + # Spread out the resets to avoid spikes in training when many environments reset at a similar time + self.episode_length_buf[:] = torch.randint_like(self.episode_length_buf, high=int(self.max_episode_length)) + + # Joint position command (deviation from default joint positions) + for agent, action_space in self.cfg.action_spaces.items(): + self.actions[agent][env_ids] = torch.zeros(env_ids.shape[0], action_space, device=self.device) + self.previous_actions[agent][env_ids] = torch.zeros(env_ids.shape[0], action_space, device=self.device) + + # X/Y linear velocity and yaw angular velocity commands + self._commands[env_ids] = torch.zeros_like(self._commands[env_ids]).uniform_(-1.0, 1.0) + self._commands[env_ids, 0] = torch.zeros_like(self._commands[env_ids, 0]).uniform_(0.5, 1.0) + + # reset idx for anymal # + robot = self.robots["robot_0"] + if env_ids is None or len(env_ids) == self.num_envs: + env_ids = robot._ALL_INDICES + robot.reset(env_ids) + + joint_pos = robot.data.default_joint_pos[env_ids] + joint_vel = robot.data.default_joint_vel[env_ids] + default_root_state = robot.data.default_root_state[env_ids] + default_root_state[:, :3] += self._terrain.env_origins[env_ids] + robot.write_root_pose_to_sim(default_root_state[:, :7], env_ids) + robot.write_root_velocity_to_sim(default_root_state[:, 7:], env_ids) + robot.write_joint_state_to_sim(joint_pos, joint_vel, None, env_ids) + + robot = self.robots["robot_1"] + if env_ids is None or len(env_ids) == self.num_envs: + env_ids = robot._ALL_INDICES + robot.reset(env_ids) + + # Reset robot state + to_target = self.targets[env_ids] - default_root_state[:, :3] + to_target[:, 2] = 0.0 + self.potentials[env_ids] = -torch.norm(to_target, p=2, dim=-1) / self.cfg.sim.dt + self._compute_intermediate_values() + joint_pos = robot.data.default_joint_pos[env_ids] + joint_vel = robot.data.default_joint_vel[env_ids] + default_root_state = robot.data.default_root_state[env_ids] + default_root_state[:, :3] += self._terrain.env_origins[env_ids] + robot.write_root_pose_to_sim(default_root_state[:, :7], env_ids) + robot.write_root_velocity_to_sim(default_root_state[:, 7:], env_ids) + robot.write_joint_state_to_sim(joint_pos, joint_vel, None, env_ids) + # Logging + extras = dict() + for key in self._episode_sums.keys(): + episodic_sum_avg = torch.mean(self._episode_sums[key][env_ids]) + extras["Episode_Reward/" + key] = episodic_sum_avg / self.max_episode_length_s + self._episode_sums[key][env_ids] = 0.0 + self.extras["log"] = dict() + self.extras["log"].update(extras) + extras = dict() + + +@torch.jit.script +def compute_intermediate_values( + targets: torch.Tensor, + torso_position: torch.Tensor, + torso_rotation: torch.Tensor, + velocity: torch.Tensor, + ang_velocity: torch.Tensor, + dof_pos: torch.Tensor, + dof_lower_limits: torch.Tensor, + dof_upper_limits: torch.Tensor, + inv_start_rot: torch.Tensor, + basis_vec0: torch.Tensor, + basis_vec1: torch.Tensor, + potentials: torch.Tensor, + prev_potentials: torch.Tensor, + dt: float, +): + to_target = targets - torso_position + to_target[:, 2] = 0.0 + torso_quat, up_proj, heading_proj, up_vec, heading_vec = compute_heading_and_up( + torso_rotation, inv_start_rot, to_target, basis_vec0, basis_vec1, 2 + ) + + vel_loc, angvel_loc, roll, pitch, yaw, angle_to_target = compute_rot( + torso_quat, velocity, ang_velocity, targets, torso_position + ) + + dof_pos_scaled = torch_utils.maths.unscale(dof_pos, dof_lower_limits, dof_upper_limits) + + to_target = targets - torso_position + to_target[:, 2] = 0.0 + prev_potentials[:] = potentials + potentials = -torch.norm(to_target, p=2, dim=-1) / dt + + return ( + up_proj, + heading_proj, + up_vec, + heading_vec, + vel_loc, + angvel_loc, + roll, + pitch, + yaw, + angle_to_target, + dof_pos_scaled, + prev_potentials, + potentials, + )