diff --git a/.gitignore b/.gitignore index 9d6232dd..be6b8f02 100644 --- a/.gitignore +++ b/.gitignore @@ -172,3 +172,29 @@ MUJOCO_LOG.TXT _METADATA checkpoint wandb/ +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +*.code-workspace + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + +realsense-ros/ +.vscode/ +data_collector/data_collector/**/*.csv +data_collector/data_collector/**/*.jpg +data_collector/data_collector/**/*.jpeg +data_collector/data_collector/**/*.png + +.~lock.* + +examples/async_peg_insert_drq/checkpoints*/** +examples/async_peg_insert_drq/log*/** +examples/async_peg_insert_drq/demo_data*/**/** +examples/async_peg_insert_drq/experiments*/**/** diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..e3d46646 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,15 @@ +[submodule "fri"] + path = fri + url = git@github.com:lbr-stack/fri.git +[submodule "lbr_fri_idl"] + path = lbr_fri_idl + url = https://github.com/RROS-Lab/serl_lbr_fri_idl.git + branch=serl-kuka +[submodule "lbr_fri_ros2_stack"] + path = lbr_fri_ros2_stack + url = https://github.com/RROS-Lab/serl_lbr_fri_ros2_stack.git + branch=serl-kuka +[submodule "data_collector"] + path = data_collector + url = https://github.com/nikitasarawgi/expert_data_collector.git + branch = data-collector-only diff --git a/data_collector b/data_collector new file mode 160000 index 00000000..b511fd4e --- /dev/null +++ b/data_collector @@ -0,0 +1 @@ +Subproject commit b511fd4ef702111fd36de7fba38a192ced0e7f98 diff --git a/examples/async_peg_insert_drq/async_drq_randomized.py b/examples/async_peg_insert_drq/async_drq_randomized.py index 4fd76f08..10c84b1e 100644 --- a/examples/async_peg_insert_drq/async_drq_randomized.py +++ b/examples/async_peg_insert_drq/async_drq_randomized.py @@ -1,5 +1,18 @@ #!/usr/bin/env python3 - +import sys +import os +import getpass + +username = getpass.getuser() +env = "serl" +print("Ensure to change anaconda3 or miniconda3 and change your environment name") +print("Conda Environment name is: ", env) +sys.path.append( + "/home/" + username + "/anaconda3/envs/" + env + "/lib/python3.10/site-packages" +) +import rclpy +import rclpy.duration +from rclpy.executors import MultiThreadedExecutor import time from functools import partial import jax @@ -26,27 +39,47 @@ make_trainer_config, make_wandb_logger, ) +from kuka_server.robot_interface import RobotInterfaceNode from serl_launcher.data.data_store import MemoryEfficientReplayBufferDataStore from serl_launcher.wrappers.serl_obs_wrappers import SERLObsWrapper -from franka_env.envs.relative_env import RelativeFrame -from franka_env.envs.wrappers import ( - GripperCloseEnv, - SpacemouseIntervention, + +import os + +MAIN_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) +sys.path.append(MAIN_DIR) +from serl_robot_infra.kuka_env.envs.relative_env import RelativeFrame +from serl_robot_infra.kuka_env.envs.wrappers import ( Quat2EulerWrapper, ) -import franka_env +import serl_robot_infra.franka_env +import serl_robot_infra.kuka_env -FLAGS = flags.FLAGS +# sys.argv = sys.argv[:1] + +# # `app.run` calls `sys.exit` +# try: +# app.run(lambda argv: None) +# except: +# pass -flags.DEFINE_string("env", "FrankaEnv-Vision-v0", "Name of environment.") +FLAGS = flags.FLAGS +flags.DEFINE_string("env", "KukaPegInsert-Vision-v0", "Name of environment.") flags.DEFINE_string("agent", "drq", "Name of agent.") flags.DEFINE_string("exp_name", None, "Name of the experiment for wandb logging.") -flags.DEFINE_integer("max_traj_length", 100, "Maximum length of trajectory.") + +## TODO :: NISARA :: change max trajectory length to at least 300 (or see what the new data wants) +flags.DEFINE_integer("max_traj_length", 200, "Maximum length of trajectory.") + flags.DEFINE_integer("seed", 42, "Random seed.") flags.DEFINE_bool("save_model", False, "Whether to save model.") flags.DEFINE_integer("critic_actor_ratio", 4, "critic to actor update ratio.") +flags.DEFINE_boolean( + "load_checkpoint", False, "Whether to start from previous checkpoint or not." +) +flags.DEFINE_string("load_checkpoint_path", None, "Checkpoint to start training from.") +flags.DEFINE_integer("batch_size", 256, "Batch size for training the policy.") flags.DEFINE_integer("max_steps", 1000000, "Maximum number of training steps.") flags.DEFINE_integer("replay_buffer_capacity", 200000, "Replay buffer capacity.") @@ -56,6 +89,7 @@ flags.DEFINE_integer("log_period", 10, "Logging period.") flags.DEFINE_integer("eval_period", 2000, "Evaluation period.") +flags.DEFINE_integer("loaded_checkpoint_step", 1000, "Loaded Checkpoint Step.") # flag to indicate if this is a leaner or a actor flags.DEFINE_boolean("learner", False, "Is this a learner or a trainer.") @@ -94,6 +128,7 @@ def actor(agent: DrQAgent, data_store, env, sampling_rng): This is the actor loop, which runs when "--actor" is set to True. """ if FLAGS.eval_checkpoint_step: + print("Evaluating the policy") success_counter = 0 time_list = [] @@ -104,14 +139,17 @@ def actor(agent: DrQAgent, data_store, env, sampling_rng): ) agent = agent.replace(state=ckpt) - for episode in range(FLAGS.eval_n_trajs): + for episode in tqdm.tqdm(range(FLAGS.eval_n_trajs), "Evaluation Trajectory"): obs, _ = env.reset() done = False start_time = time.time() while not done: + sampling_rng, key = jax.random.split(sampling_rng) actions = agent.sample_actions( observations=jax.device_put(obs), - argmax=True, + # argmax=True, + seed=key, + # deterministic=False, ) actions = np.asarray(jax.device_get(actions)) @@ -151,10 +189,11 @@ def update_params(params): done = False # training loop + print("Entering Training Loop") timer = Timer() running_return = 0.0 - for step in tqdm.tqdm(range(FLAGS.max_steps), dynamic_ncols=True): + for step in tqdm.tqdm(range(FLAGS.max_steps), dynamic_ncols=True, desc="actor"): timer.tick("total") with timer.context("sample_actions"): @@ -168,6 +207,8 @@ def update_params(params): deterministic=False, ) actions = np.asarray(jax.device_get(actions)) + # nisara : Comment + # print("Actions in Training Loop: ", actions) # Step environment with timer.context("step_env"): @@ -215,8 +256,9 @@ def learner(rng, agent: DrQAgent, replay_buffer, demo_buffer): The learner loop, which runs when "--learner" is set to True. """ # set up wandb and logging + # wandb_logger = None wandb_logger = make_wandb_logger( - project="serl_dev", + project="serl_testing", description=FLAGS.exp_name or FLAGS.env, debug=FLAGS.debug, ) @@ -304,7 +346,7 @@ def stats_callback(type: str, payload: dict) -> dict: if FLAGS.checkpoint_period and update_steps % FLAGS.checkpoint_period == 0: assert FLAGS.checkpoint_path is not None checkpoints.save_checkpoint( - FLAGS.checkpoint_path, agent.state, step=update_steps, keep=100 + FLAGS.checkpoint_path, agent.state, step=update_steps, keep=200 ) update_steps += 1 @@ -314,28 +356,42 @@ def stats_callback(type: str, payload: dict) -> dict: def main(_): + rclpy.init(args=_) + print("ROS2 initialized.") + robot_interface_node = RobotInterfaceNode() + executor = MultiThreadedExecutor() + executor.add_node(robot_interface_node) + robot_interface_node.get_logger().info("Robot interface node started.") + # executor.spin_once() + assert FLAGS.batch_size % num_devices == 0 # seed rng = jax.random.PRNGKey(FLAGS.seed) - + print("Initializing environment") # create env and load dataset + print("Value of learner: ", FLAGS.learner) + env = gym.make( FLAGS.env, fake_env=FLAGS.learner, save_video=FLAGS.eval_checkpoint_step, + robot_interface_node=robot_interface_node, ) - env = GripperCloseEnv(env) - if FLAGS.actor: - env = SpacemouseIntervention(env) + print("Environment initialized") + # env = GripperCloseEnv(env) + # if FLAGS.actor: + # env = SpacemouseIntervention(env) env = RelativeFrame(env) env = Quat2EulerWrapper(env) env = SERLObsWrapper(env) env = ChunkingWrapper(env, obs_horizon=1, act_exec_horizon=None) env = RecordEpisodeStatistics(env) + print("Environment wrapped") image_keys = [key for key in env.observation_space.keys() if key != "state"] rng, sampling_rng = jax.random.split(rng) + print("Creating agent") agent: DrQAgent = make_drq_agent( seed=FLAGS.seed, sample_obs=env.observation_space.sample(), @@ -344,13 +400,26 @@ def main(_): encoder_type=FLAGS.encoder_type, ) + if FLAGS.load_checkpoint: + print(f"Loading Checkpoint from Previous Run:{FLAGS.load_checkpoint_path}") + + ckpt = checkpoints.restore_checkpoint( + FLAGS.load_checkpoint_path, + agent.state, + step=FLAGS.loaded_checkpoint_step, + ) + agent = agent.replace(state=ckpt) + + print("Agent created") # replicate agent across devices # need the jnp.array to avoid a bug where device_put doesn't recognize primitives + print("Replicating agent") agent: DrQAgent = jax.device_put( jax.tree_map(jnp.array, agent), sharding.replicate() ) if FLAGS.learner: + print("Learner code executed") sampling_rng = jax.device_put(sampling_rng, device=sharding.replicate()) replay_buffer = MemoryEfficientReplayBufferDataStore( env.observation_space, @@ -358,30 +427,28 @@ def main(_): capacity=FLAGS.replay_buffer_capacity, image_keys=image_keys, ) + print("Replay buffer initialized") + print("Checkpoint Path: ", FLAGS.checkpoint_path) demo_buffer = MemoryEfficientReplayBufferDataStore( env.observation_space, env.action_space, capacity=10000, image_keys=image_keys, ) - import pickle as pkl + import pickle5 as pkl with open(FLAGS.demo_path, "rb") as f: - trajs = pkl.load(f) + trajs = pkl.load(f, fix_imports=True) for traj in trajs: demo_buffer.insert(traj) print(f"demo buffer size: {len(demo_buffer)}") # learner loop print_green("starting learner loop") - learner( - sampling_rng, - agent, - replay_buffer, - demo_buffer=demo_buffer, - ) + learner(sampling_rng, agent=agent, replay_buffer=replay_buffer, demo_buffer=demo_buffer) elif FLAGS.actor: + print("Initializing Actor Node") sampling_rng = jax.device_put(sampling_rng, sharding.replicate()) data_store = QueuedDataStore(2000) # the queue size on the actor @@ -392,6 +459,8 @@ def main(_): else: raise NotImplementedError("Must be either a learner or an actor") + rclpy.shutdown() + if __name__ == "__main__": app.run(main) diff --git a/examples/async_peg_insert_drq/rect_peg/SERL_rectangle v1.stl b/examples/async_peg_insert_drq/rect_peg/SERL_rectangle v1.stl new file mode 100644 index 00000000..be805c84 Binary files /dev/null and b/examples/async_peg_insert_drq/rect_peg/SERL_rectangle v1.stl differ diff --git a/examples/async_peg_insert_drq/run_actor.sh b/examples/async_peg_insert_drq/run_actor.sh index a251e756..6889c920 100644 --- a/examples/async_peg_insert_drq/run_actor.sh +++ b/examples/async_peg_insert_drq/run_actor.sh @@ -1,12 +1,19 @@ export XLA_PYTHON_CLIENT_PREALLOCATE=false && \ export XLA_PYTHON_CLIENT_MEM_FRACTION=.1 && \ -python async_drq_randomized.py "$@" \ - --actor \ +export XLA_PYTHON_CLIENT_ALLOCATOR=platform &&\ +python3 async_drq_randomized.py "$@" \ + --actor 1\ --render \ - --env FrankaPegInsert-Vision-v0 \ + --env KukaPegInsert-Vision-v0 \ --exp_name=serl_dev_drq_rlpd10demos_peg_insert_random_resnet \ --seed 0 \ - --random_steps 0 \ - --training_starts 200 \ + --random_steps 200 \ + --training_starts 300 \ --encoder_type resnet-pretrained \ - --demo_path peg_insert_20_demos_2023-12-25_16-13-25.pkl \ + --demo_path /home/rp/SERL/src/examples/async_peg_insert_drq/rect_peg/rect_peg_skips_sdf_action6_surface_relaxed.pkl \ + --eval_checkpoint_step 16500 \ + --loaded_checkpoint_step 16500 \ + --eval_n_trajs 11 \ + --load_checkpoint_path /home/rp/SERL/src/examples/async_peg_insert_drq/rect_peg/checkpoints/ \ + --checkpoint_path /home/rp/SERL/src/examples/async_peg_insert_drq/rect_peg/checkpoints_new/ \ + --load_checkpoint 1 \ diff --git a/examples/async_peg_insert_drq/run_learner.sh b/examples/async_peg_insert_drq/run_learner.sh index c2823a19..953e4f0c 100644 --- a/examples/async_peg_insert_drq/run_learner.sh +++ b/examples/async_peg_insert_drq/run_learner.sh @@ -1,16 +1,21 @@ export XLA_PYTHON_CLIENT_PREALLOCATE=false && \ export XLA_PYTHON_CLIENT_MEM_FRACTION=.2 && \ -python async_drq_randomized.py "$@" \ - --learner \ - --env FrankaPegInsert-Vision-v0 \ +export XLA_PYTHON_CLIENT_ALLOCATOR=platform &&\ +python3 async_drq_randomized.py "$@" \ + --learner 1\ + --env KukaPegInsert-Vision-v0 \ --exp_name=serl_dev_drq_rlpd10demos_peg_insert_random_resnet_097 \ --seed 0 \ - --random_steps 1000 \ - --training_starts 200 \ + --random_steps 200 \ + --training_starts 300 \ --critic_actor_ratio 4 \ --batch_size 256 \ --eval_period 2000 \ --encoder_type resnet-pretrained \ - --demo_path peg_insert_20_demos_2023-12-25_16-13-25.pkl \ - --checkpoint_period 1000 \ - --checkpoint_path /home/undergrad/code/serl_dev/examples/async_peg_insert_drq/5x5_20degs_20demos_rand_peg_insert_097 + --demo_path /home/rp/SERL/src/examples/async_peg_insert_drq/rect_peg/rect_peg_skips_sdf_action6_surface_relaxed.pkl \ + --checkpoint_period 300 \ + --loaded_checkpoint_step 1200 \ + --load_checkpoint_path /home/rp/SERL/src/examples/async_peg_insert_drq/rect_peg/checkpoints/ \ + --checkpoint_path /home/rp/SERL/src/examples/async_peg_insert_drq/rect_peg/checkpoints_new/ \ + --load_checkpoint 1 \ + diff --git a/examples/async_sac_state_sim/tmux_launch.sh b/examples/async_sac_state_sim/tmux_launch.sh index 78ff94a8..b5fca4f4 100644 --- a/examples/async_sac_state_sim/tmux_launch.sh +++ b/examples/async_sac_state_sim/tmux_launch.sh @@ -1,7 +1,7 @@ #!/bin/bash EXAMPLE_DIR=${EXAMPLE_DIR:-"examples/async_sac_state_sim"} -CONDA_ENV=${CONDA_ENV:-"serl"} +CONDA_ENV=${CONDA_ENV:-"serl-docker"} cd $EXAMPLE_DIR echo "Running from $(pwd)" diff --git a/fri b/fri new file mode 160000 index 00000000..58119424 --- /dev/null +++ b/fri @@ -0,0 +1 @@ +Subproject commit 581194240a7e05bb2bc4d613ab59e450c3a2291b diff --git a/kuka_server/kuka_server/__init__.py b/kuka_server/kuka_server/__init__.py new file mode 100755 index 00000000..e69de29b diff --git a/kuka_server/kuka_server/data.txt b/kuka_server/kuka_server/data.txt new file mode 100644 index 00000000..956382df --- /dev/null +++ b/kuka_server/kuka_server/data.txt @@ -0,0 +1,55 @@ +difference: [np.float64(0.0019766595082607585) np.float64(0.0004628820030692024) + np.float64(0.028018749190857797) 0.02504313037427308 0.003815908558840551 + 0.010258544890220822] + +Processed folder: OM_Feb8_SERL_2013-01-02_05-27-28 + +NOT INSERTED + + + + + +difference: [np.float64(0.001436364561131831) np.float64(0.0004622432071985996) + np.float64(0.006333442177054199) 0.017690793548203132 0.03313624476328898 + 0.007315268674972231] + +Processed folder: OM_Feb8_SERL_2013-01-02_05-24-36 + +INSERTED + + + + + +difference: [np.float64(0.0015521302069487763) np.float64(0.00042343143758170193) + np.float64(0.006008299968794095) 0.030791564054494902 0.00497890869768973 + 0.007003545883071727] + +Processed folder: OM_Feb8_SERL_2013-01-02_05-25-44 + +INSERTED + + + + + +difference: [np.float64(0.0029457311018545873) np.float64(0.0009184325443422001) + np.float64(0.028379779133595004) 0.021396361779027595 + 0.014063390831159997 0.002100298832244274] + +Processed folder: OM_Feb8_SERL_2013-01-02_05-24-46 + +NOT INSERTED + + + + + +difference: [np.float64(0.003487982539743917) np.float64(0.0008351758736137027) + np.float64(0.028138092827257696) 0.01969423339553389 0.004430696798094312 + 0.00665583903938094] + +Processed folder: OM_Feb8_SERL_2013-01-02_05-26-50 + +NOT INSERTED \ No newline at end of file diff --git a/kuka_server/kuka_server/robot_interface.py b/kuka_server/kuka_server/robot_interface.py new file mode 100644 index 00000000..b86c7d04 --- /dev/null +++ b/kuka_server/kuka_server/robot_interface.py @@ -0,0 +1,616 @@ +import time + +import rclpy +import rclpy.duration +from rclpy.node import Node +from rclpy.action import ActionClient +from rclpy.callback_groups import ReentrantCallbackGroup, MutuallyExclusiveCallbackGroup +from rclpy.executors import MultiThreadedExecutor +from rcl_interfaces.msg import ParameterValue +from rcl_interfaces.srv import GetParameters +import numpy as np +from geometry_msgs.msg import Pose, Point, Quaternion +from std_msgs.msg import String +import rclpy.task +from sensor_msgs.msg import JointState +from geometry_msgs.msg import WrenchStamped +from control_msgs.action import FollowJointTrajectory +from trajectory_msgs.msg import JointTrajectory, JointTrajectoryPoint +import os +import sys +import socket +from moveit_msgs.msg import ( + RobotState, + RobotTrajectory, + MoveItErrorCodes, + Constraints, + JointConstraint, +) +from moveit_msgs.srv import GetPositionIK, GetMotionPlan, GetPositionFK +from moveit_msgs.action import ExecuteTrajectory +from kuka_server.wait_for_message import wait_for_message +from moveit_configs_utils.launches import generate_move_group_launch + +from kuka_server.utils import quat_to_euler, convert_wrench_to_numpy, euler_to_quat +from lbr_fri_idl.msg import LBRState +import copy +import optas + + +class RobotInterfaceNode(Node): + timeout_sec_ = 5.0 + + move_group_name_ = "arm" + namespace_ = "lbr" + + joint_state_topic_ = "joint_states" + lbr_state_topic_ = "state" + plan_srv_name_ = "plan_kinematic_path" + ik_srv_name_ = "compute_ik" + fk_srv_name_ = "compute_fk" + execute_action_name_ = "execute_trajectory" + fri_execute_action_name_ = "/lbr/joint_trajectory_controller/follow_joint_trajectory" + WRENCH_TOPIC = "/lbr/force_torque_broadcaster/wrench" + robot_desc_topic_ = "robot_description" + port_id = 30200 + robot_ip = "192.168.10.122" + base_ = "link_0" ###Changed for compatibility with latest lbr stack + end_effector_ = "link_ee" + + def __init__(self) -> None: + super().__init__("robot_interface_node", namespace=self.namespace_) + + self.fk_client_callback = ReentrantCallbackGroup() + self.ik_client_callback = MutuallyExclusiveCallbackGroup() + self.plan_client_callback = MutuallyExclusiveCallbackGroup() + self.execute_client_callback = MutuallyExclusiveCallbackGroup() + + self.robot_state = {} + + self.wrench_sub = self.create_subscription( + WrenchStamped, self.WRENCH_TOPIC, self.wrench_callback, 10 + ) + + self.state_sub = self.create_subscription( + LBRState, self.lbr_state_topic_, self.state_callback, 100 + ) + + self._init_jacobian() + self.session_ok = True + + self.ik_client_ = self.create_client( + GetPositionIK, self.ik_srv_name_, callback_group=self.ik_client_callback + ) + if not self.ik_client_.wait_for_service(timeout_sec=self.timeout_sec_): + self.get_logger().error("IK service not available.") + exit(1) + + self.fk_client_ = self.create_client( + GetPositionFK, self.fk_srv_name_, callback_group=self.fk_client_callback + ) + if not self.fk_client_.wait_for_service(timeout_sec=self.timeout_sec_): + self.get_logger().error("FK service not available.") + exit(1) + + self.plan_client_ = self.create_client( + GetMotionPlan, self.plan_srv_name_, callback_group=self.plan_client_callback + ) + if not self.plan_client_.wait_for_service(timeout_sec=self.timeout_sec_): + self.get_logger().error("Plan service not available.") + exit(1) + + self.execute_client_ = ActionClient( + self, + ExecuteTrajectory, + self.execute_action_name_, + callback_group=self.execute_client_callback, + ) + if not self.execute_client_.wait_for_server(timeout_sec=self.timeout_sec_): + self.get_logger().error("Execute action not available.") + exit(1) + + self.fri_execute_client_ = ActionClient( + self, + FollowJointTrajectory, + self.fri_execute_action_name_, + callback_group=MutuallyExclusiveCallbackGroup(), + ) + if not self.fri_execute_client_.wait_for_server(timeout_sec=self.timeout_sec_): + self.get_logger().error("FRI Execute action not available.") + exit(1) + + def state_callback(self, state: LBRState): + self.session_state = state.session_state + external_torque = state.external_torque + # if(np.any(np.abs(external_torque)>10) or self.session_state!=4): + if self.session_state != 4: + self.get_logger().info( + f"Torque Exceeded or State Change to {self.session_state}" + ) + self.session_ok = False + + return + + def get_ik(self, target_pose: Pose) -> JointState | None: + request = GetPositionIK.Request() + request.ik_request.group_name = self.move_group_name_ + tf_prefix = self.get_namespace()[1:] + request.ik_request.pose_stamped.header.frame_id = f"{tf_prefix}/{self.base_}" + + request.ik_request.pose_stamped.header.stamp = self.get_clock().now().to_msg() + request.ik_request.pose_stamped.pose = target_pose + request.ik_request.avoid_collisions = True + + future = self.ik_client_.call_async(request) + + rclpy.spin_until_future_complete(self, future) + if future.result() is None: + self.get_logger().error("Failed to get IK solution") + return None + + response = future.result() + if response.error_code.val != MoveItErrorCodes.SUCCESS: + return None + + return response.solution.joint_state + + def wrench_callback(self, msg): + self.latest_wrench = convert_wrench_to_numpy(msg) + # self.get_logger().info(f"Received wrench data: {self.latest_wrench}") + return + + def _init_jacobian(self): + print("Initializing Jacobian") + + self._robot_description = self._retrieve_parameter( + "/lbr/robot_state_publisher/get_parameters", "robot_description" + ).string_value + self._robot = optas.RobotModel( + urdf_string=self._robot_description, time_derivs=[0, 1] + ) + + self.jacobian_func = self._robot.get_link_geometric_jacobian_function( + link=self.end_effector_, base_link=self.base_, numpy_output=True + ) + + return + + def get_fk(self) -> Pose | None: + current_joint_state = self.get_joint_state() + if current_joint_state is None: + self.get_logger().error("Failed to get joint state") + return None + + current_robot_state = RobotState() + current_robot_state.joint_state = current_joint_state + + request = GetPositionFK.Request() + request.header.frame_id = f"{self.namespace_}/{self.base_}" + request.header.stamp = self.get_clock().now().to_msg() + + request.fk_link_names.append(self.end_effector_) + request.robot_state = current_robot_state + + future = self.fk_client_.call_async(request) + + rclpy.spin_until_future_complete(self, future) + if future.result() is None: + self.get_logger().error("Failed to get FK solution") + return None + + response = future.result() + if response.error_code.val != MoveItErrorCodes.SUCCESS: + self.get_logger().error( + f"Failed to get FK solution: {response.error_code.val}" + ) + return None + + # print( + # "In get_fk, printing the current pose after fk: ", + # response.pose_stamped[0].pose, + # ) + return response.pose_stamped[0].pose, current_joint_state + + def get_fk_lbr(self, commanded: bool = False) -> Pose | None: + lbr_state_set, lbr_state = wait_for_message( + LBRState, self, self.lbr_state_topic_ + ) + joint_position = lbr_state.measured_joint_position.tolist() + if commanded: + joint_position = lbr_state.commanded_joint_position.tolist() + + joint_position[2], joint_position[3] = joint_position[3], joint_position[2] + + current_robot_state = RobotState() + current_robot_state.joint_state = self.get_joint_state() + current_robot_state.joint_state.position = joint_position + + request = GetPositionFK.Request() + + request.header.frame_id = f"{self.namespace_}/{self.base_}" + request.header.stamp = self.get_clock().now().to_msg() + + request.fk_link_names.append(self.end_effector_) + request.robot_state = current_robot_state + + future = self.fk_client_.call_async(request) + + rclpy.spin_until_future_complete(self, future) + if future.result() is None: + self.get_logger().error("Failed to get FK solution") + return None + + response = future.result() + if response.error_code.val != MoveItErrorCodes.SUCCESS: + self.get_logger().error( + f"Failed to get FK solution: {response.error_code.val}" + ) + return None + + return response.pose_stamped[0].pose + + def sum_of_square_diff( + self, joint_state_1: JointState, joint_state_2: JointState + ) -> float: + return np.sum( + np.square(np.subtract(joint_state_1.position, joint_state_2.position)) + ) + + def _retrieve_parameter(self, service: str, parameter_name: str) -> ParameterValue: + parameter_client = self.create_client(GetParameters, service) + while not parameter_client.wait_for_service(timeout_sec=1.0): + if not rclpy.ok(): + self.get_logger().error( + "Interrupted while waiting for the service. Exiting." + ) + return None + self.get_logger().info(f"Waiting for '{service}' service...") + request = GetParameters.Request(names=[parameter_name]) + future = parameter_client.call_async(request=request) + rclpy.spin_until_future_complete(self, future) + if future.result() is None: + self.get_logger().error(f"Failed to retrieve '{parameter_name}'.") + return None + return future.result().values[0] + + def get_current_state(self): + ##Return a dictionary with all the corresponding state variables + # Pose, Velocity, Force, Torque, Jacobian, Joint Angles, Joint Velocity, Gripper Pose + self.robot_state["pose"] = np.zeros((7,)) + self.robot_state["vel"] = np.zeros((6,)) + self.robot_state["force"] = np.zeros((3,)) + self.robot_state["torque"] = np.zeros((3,)) + self.robot_state["q"] = np.zeros((7,)) + self.robot_state["dq"] = np.zeros((7,)) + + ##TCP pose + current_ee_geom_pose, current_joint_state = self.get_fk() + self.robot_state["pose"] = np.array( + [ + current_ee_geom_pose.position.x, + current_ee_geom_pose.position.y, + current_ee_geom_pose.position.z, + current_ee_geom_pose.orientation.x, + current_ee_geom_pose.orientation.y, + current_ee_geom_pose.orientation.z, + current_ee_geom_pose.orientation.w, + ] + ) + + ##Getting Joint Angles + joint_angles = [ + current_joint_state.position[0], + current_joint_state.position[1], + current_joint_state.position[3], + current_joint_state.position[2], + current_joint_state.position[4], + current_joint_state.position[5], + current_joint_state.position[6], + ] + + self.robot_state["q"] = joint_angles + joint_velocity = [ + current_joint_state.velocity[0], + current_joint_state.velocity[1], + current_joint_state.velocity[3], + current_joint_state.velocity[2], + current_joint_state.velocity[4], + current_joint_state.velocity[5], + current_joint_state.velocity[6], + ] + + self.robot_state["dq"] = joint_velocity + + ##Getting Force and Wrench + current_wrench = copy.deepcopy(self.latest_wrench) + self.robot_state["force"] = current_wrench[:3] + self.robot_state["torque"] = current_wrench[3:] + self.robot_state["jacobian"] = self.jacobian_func(joint_angles) + self.robot_state["vel"] = np.dot(self.robot_state["jacobian"], joint_velocity) + return self.robot_state + + def is_socket_open(self, host, port): + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.settimeout(1) # Timeout after 1 second + try: + sock.connect((host, port)) + return True # If the connection succeeds, the socket is open + except (socket.timeout, socket.error): + return False # If there’s an error, the socket is likely closed + finally: + sock.close() # Always close the socket after checking + + def move_to_pose(self, pose: np.ndarray, reset_pose: np.ndarray): + + if not self.session_ok: + print("ERROR in LBR FRI Restart the node") + input("Press Enter When Done Resetting") + input("Are you sure everything is good? Press Enter Again") + self.session_ok = True + + target_pose = Pose() + + target_pose.position.x = float(pose[0]) + target_pose.position.y = float(pose[1]) + target_pose.position.z = float(pose[2]) + + # target_pose_quaternion = euler_to_quat(pose[3:]) + target_pose.orientation.x = float(pose[3]) + target_pose.orientation.y = float(pose[4]) + target_pose.orientation.z = float(pose[5]) + target_pose.orientation.w = float(pose[6]) + + traj = self.get_motion_plan(target_pose=target_pose, linear=True, scaling_factor=0.2) + if traj: + client = self.get_motion_execute_client() + goal = ExecuteTrajectory.Goal() + goal.trajectory = traj + + future = client.send_goal_async(goal) + rclpy.spin_until_future_complete(self, future) + + goal_handle = future.result() + if not goal_handle.accepted: + self.get_logger().error("Failed to execute trajectory") + else: + self.get_logger().debug("Trajectory accepted") + + result_future = goal_handle.get_result_async() + + expect_duration = traj.joint_trajectory.points[-1].time_from_start + expect_time = time.time() + 2 * expect_duration.sec + while not result_future.done() and time.time() < expect_time: + time.sleep(0.01) + + self.get_logger().debug("Trajectory executed") + + return + + def move_to_joint_pos(self, joint_pos: np.ndarray): + + if not self.session_ok: + print("ERROR in LBR FRI Restart the node") + input("Press Enter When Done Resetting") + input("Are you sure everything is good? Press Enter Again") + self.session_ok = True + + + + goal_msg = FollowJointTrajectory.Goal() + trajectory_msg = JointTrajectory() + trajectory_msg.joint_names = ["A1", "A2", "A4", "A3", "A5", "A6", "A7"] + + point = JointTrajectoryPoint() + point.positions = [float(val) for val in joint_pos] + point.time_from_start.sec = 2 + trajectory_msg.points.append(point) + goal_msg.trajectory = trajectory_msg + self.fri_execute_client_.wait_for_server() + + send_goal_future = self.fri_execute_client_.send_goal_async( + goal_msg + ) + + rclpy.spin_until_future_complete(self, send_goal_future) + + goal_handle = send_goal_future.result() + + if not goal_handle.accepted: + print(f"Goal for init point was rejected") + return + + print(f"Goal for init point was accepted") + get_result_future = goal_handle.get_result_async() + rclpy.spin_until_future_complete(self, get_result_future) + result = get_result_future.result().result + time.sleep(1) + return + + def get_best_ik(self, target_pose: Pose, attempts: int = 100) -> JointState | None: + current_joint_state = self.get_joint_state() + if current_joint_state is None: + self.get_logger().error("Failed to get joint state") + return None + + best_cost = np.inf + best_joint_state = None + + for _ in range(attempts): + joint_state = self.get_ik(target_pose) + if joint_state is None: + continue + + cost = self.sum_of_square_diff(current_joint_state, joint_state) + if cost < best_cost: + best_cost = cost + best_joint_state = joint_state + + if not best_joint_state: + self.get_logger().error("Failed to get IK solution") + + return best_joint_state + + def get_joint_state(self) -> JointState: + current_joint_state_set, current_joint_state = wait_for_message( + JointState, self, self.joint_state_topic_ + ) + if not current_joint_state_set: + self.get_logger().error("Failed to get current joint state") + return None + + + return current_joint_state + + # https://github.com/moveit/moveit/issues/3583#issuecomment-2035540755 + def adjust_trajectory_timings(self, trajectory): + epsilon = 0.001 # Small time increment to adjust duplicate timings + for i in range(1, len(trajectory.joint_trajectory.points)): + if trajectory.joint_trajectory.points[i].time_from_start == trajectory.joint_trajectory.points[i - 1].time_from_start: + print("THIS IS TRUE HERE") + trajectory.joint_trajectory.points[i].time_from_start += rclpy.duration.Duration(seconds=epsilon) + return trajectory + + def get_motion_plan( + self, + target_pose: Pose = None, + linear: bool = False, + scaling_factor: float = 0.1, + attempts: int = 10, + target_joint_state_in: np.ndarray = None, + ) -> RobotTrajectory | None: + current_pose = self.get_fk()[0] + if current_pose is None: + self.get_logger().error("Failed to get current pose") + + # if check_same_pose(current_pose, target_pose): + # self.get_logger().info("Detected same pose as current pose, neglected") + # return RobotTrajectory() + + current_joint_state = self.get_joint_state() + if current_joint_state is None: + self.get_logger().error("Failed to get joint state") + return None + + current_robot_state = RobotState() + current_robot_state.joint_state.position = current_joint_state.position + current_robot_state.joint_state.name = current_joint_state.name + + if target_joint_state_in is not None: + target_joint_state = JointState() + target_joint_state.position = [float(val) for val in target_joint_state_in] + target_joint_state.name = current_joint_state.name + # print("Printing target joint state: ", target_joint_state) + + else: + target_joint_state = self.get_best_ik(target_pose) + if target_joint_state is None: + self.get_logger().error("Failed to get target joint state") + return None + + target_constraint = Constraints() + for i in range(len(target_joint_state.position)): + joint_constraint = JointConstraint() + joint_constraint.joint_name = target_joint_state.name[i] + joint_constraint.position = target_joint_state.position[i] + joint_constraint.tolerance_above = 0.001 + joint_constraint.tolerance_below = 0.001 + joint_constraint.weight = 1.0 + target_constraint.joint_constraints.append(joint_constraint) + + + + request = GetMotionPlan.Request() + request.motion_plan_request.group_name = self.move_group_name_ + request.motion_plan_request.start_state = current_robot_state + request.motion_plan_request.goal_constraints.append(target_constraint) + request.motion_plan_request.num_planning_attempts = 10 + request.motion_plan_request.allowed_planning_time = 5.0 + request.motion_plan_request.max_velocity_scaling_factor = scaling_factor + request.motion_plan_request.max_acceleration_scaling_factor = scaling_factor + + if linear: + request.motion_plan_request.pipeline_id = "pilz_industrial_motion_planner" + request.motion_plan_request.planner_id = "LIN" + else: + request.motion_plan_request.pipeline_id = "ompl" + request.motion_plan_request.planner_id = "APSConfigDefault" + + for i in range(attempts): + # print(f"Attempt {i} to generate a trajectory") + plan_future = self.plan_client_.call_async(request) + rclpy.spin_until_future_complete(self, plan_future) + + if plan_future.result() is None: + self.get_logger().error("Failed to get motion plan") + + response = plan_future.result() + if response.motion_plan_response.error_code.val != MoveItErrorCodes.SUCCESS: + self.get_logger().error( + f"Failed to get motion plan: {response.motion_plan_response.error_code.val}" + ) + else: + traj = response.motion_plan_response.trajectory + # # Adjust timings as mentioned above + # traj = self.adjust_trajectory_timings(traj) + return traj + + return None + + def get_motion_execute_client(self) -> ActionClient: + return self.execute_client_ + + def get_fri_motion_execute(self, traj: RobotTrajectory, short_wait=False) -> bool: + joint_trajectory_goal = FollowJointTrajectory.Goal() + joint_trajectory_goal.trajectory = traj.joint_trajectory + + goal_future = self.fri_execute_client_.send_goal_async(joint_trajectory_goal) + rclpy.spin_until_future_complete(self, goal_future) + goal_handle = goal_future.result() + if not goal_handle.accepted: + self.get_logger().error("Trajectory goal rejected") + return False + self.get_logger().debug("Trajectory goal accepted") + + result_future = goal_handle.get_result_async() + rclpy.spin_until_future_complete(self, result_future) + + execution_finished = False + expected_timout = time.time() + 30 + if short_wait: + expected_timout = time.time() + 2.0 + while not execution_finished and time.time() < expected_timout: + _, lbr_state = wait_for_message(LBRState, self, self.lbr_state_topic_) + if ( + np.max( + np.abs( + np.subtract( + lbr_state.measured_joint_position, + traj.joint_trajectory.points[-1].positions, + ) + ) + ) + < 0.0002 + ): + execution_finished = True + + time.sleep(0.01) + + self.get_logger().debug("Trajectory executed") + return True + + +def main(args=None): + rclpy.init(args=args) + + robot_interface_node = RobotInterfaceNode() + executor = MultiThreadedExecutor() + executor.add_node(robot_interface_node) + robot_interface_node.get_logger().info("Robot interface node started.") + + executor.spin() + + rclpy.shutdown() + + +if __name__ == "__main__": + main() diff --git a/kuka_server/kuka_server/utils.py b/kuka_server/kuka_server/utils.py new file mode 100644 index 00000000..bf0a3c44 --- /dev/null +++ b/kuka_server/kuka_server/utils.py @@ -0,0 +1,54 @@ +from scipy.spatial.transform import Rotation as R +import numpy as np + + +def euler_to_quat(euler_angles_abc, degrees=False): + q = R.from_euler("ZYX", euler_angles_abc, degrees=degrees).as_quat() + + return q + + +def euler_to_rotmat(euler_angles_abc, degrees=False): + rot_mat = R.from_euler("ZYX", euler_angles_abc, degrees=degrees).as_matrix() + + return rot_mat + + +def rotmat_to_quat(rotmat): + + quat = R.from_matrix(rotmat).as_quat() + + return quat + + +def quat_to_rotmat(quat): + + rotmat = R.from_quat(quat).as_matrix() + + return rotmat + + +def quat_to_euler(quat, degrees=False): + + euler = R.from_quat(quat).as_euler("ZYX", degrees=degrees) + return euler + + +def convert_wrench_to_numpy(msg): + force = msg.wrench.force + torque = msg.wrench.torque + return np.array([force.x, force.y, force.z, torque.x, torque.y, torque.z]) + + +def xyzabc_to_se3(xyzabc, degrees=False): + + tranformation_mat = np.eye(4) + tranformation_mat[0, 3] = xyzabc[0] + tranformation_mat[1, 3] = xyzabc[1] + if len(xyzabc) == 6: + tranformation_mat[2, 3] = xyzabc[2] + tranformation_mat[:3, :3] = euler_to_rotmat(xyzabc[3:], degrees) + else: + tranformation_mat[:3, :3] = euler_to_rotmat(xyzabc[2:], degrees) + + return tranformation_mat diff --git a/kuka_server/kuka_server/wait_for_message.py b/kuka_server/kuka_server/wait_for_message.py new file mode 100644 index 00000000..226c5d86 --- /dev/null +++ b/kuka_server/kuka_server/wait_for_message.py @@ -0,0 +1,71 @@ +# Copyright 2022 Sony Group Corporation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Union + +from rclpy.impl.implementation_singleton import rclpy_implementation as _rclpy +from rclpy.node import Node +from rclpy.qos import QoSProfile +from rclpy.signals import SignalHandlerGuardCondition +from rclpy.utilities import timeout_sec_to_nsec + + +def wait_for_message( + msg_type, + node: 'Node', + topic: str, + *, + qos_profile: Union[QoSProfile, int] = 1, + time_to_wait=-1 +): + """ + Wait for the next incoming message. + + :param msg_type: message type + :param node: node to initialize the subscription on + :param topic: topic name to wait for message + :param qos_profile: QoS profile to use for the subscription + :param time_to_wait: seconds to wait before returning + :returns: (True, msg) if a message was successfully received, (False, None) if message + could not be obtained or shutdown was triggered asynchronously on the context. + """ + context = node.context + wait_set = _rclpy.WaitSet(1, 1, 0, 0, 0, 0, context.handle) + wait_set.clear_entities() + + sub = node.create_subscription(msg_type, topic, lambda _: None, qos_profile=qos_profile) + try: + wait_set.add_subscription(sub.handle) + sigint_gc = SignalHandlerGuardCondition(context=context) + wait_set.add_guard_condition(sigint_gc.handle) + + timeout_nsec = timeout_sec_to_nsec(time_to_wait) + wait_set.wait(timeout_nsec) + + subs_ready = wait_set.get_ready_entities('subscription') + guards_ready = wait_set.get_ready_entities('guard_condition') + + if guards_ready: + if sigint_gc.handle.pointer in guards_ready: + return False, None + + if subs_ready: + if sub.handle.pointer in subs_ready: + msg_info = sub.handle.take_message(sub.msg_type, sub.raw) + if msg_info is not None: + return True, msg_info[0] + finally: + node.destroy_subscription(sub) + + return False, None \ No newline at end of file diff --git a/kuka_server/package.xml b/kuka_server/package.xml new file mode 100755 index 00000000..28ebaa99 --- /dev/null +++ b/kuka_server/package.xml @@ -0,0 +1,21 @@ + + + + kuka_server + 0.0.0 + TODO: Package description + root + TODO: License declaration + + serl_robot_infra + moveit + + ament_copyright + ament_flake8 + ament_pep257 + python3-pytest + + + ament_python + + diff --git a/kuka_server/resource/kuka_server b/kuka_server/resource/kuka_server new file mode 100755 index 00000000..e69de29b diff --git a/kuka_server/setup.cfg b/kuka_server/setup.cfg new file mode 100755 index 00000000..a253a0f2 --- /dev/null +++ b/kuka_server/setup.cfg @@ -0,0 +1,4 @@ +[develop] +script_dir=$base/lib/kuka_server +[install] +install_scripts=$base/lib/kuka_server diff --git a/kuka_server/setup.py b/kuka_server/setup.py new file mode 100755 index 00000000..3aa55290 --- /dev/null +++ b/kuka_server/setup.py @@ -0,0 +1,25 @@ +from setuptools import find_packages, setup + +package_name = 'kuka_server' + +setup( + name=package_name, + version='0.0.0', + packages=find_packages(exclude=['test']), + data_files=[ + ('share/ament_index/resource_index/packages', + ['resource/' + package_name]), + ('share/' + package_name, ['package.xml']), + ], + install_requires=['setuptools'], + zip_safe=True, + maintainer='root', + maintainer_email='root@todo.todo', + description='TODO: Package description', + license='TODO: License declaration', + tests_require=['pytest'], + entry_points={ + 'console_scripts': ['robot_server = kuka_server.robot_interface:main' + ], + }, +) diff --git a/kuka_server/test/test_copyright.py b/kuka_server/test/test_copyright.py new file mode 100755 index 00000000..97a39196 --- /dev/null +++ b/kuka_server/test/test_copyright.py @@ -0,0 +1,25 @@ +# Copyright 2015 Open Source Robotics Foundation, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ament_copyright.main import main +import pytest + + +# Remove the `skip` decorator once the source file(s) have a copyright header +@pytest.mark.skip(reason='No copyright header has been placed in the generated source file.') +@pytest.mark.copyright +@pytest.mark.linter +def test_copyright(): + rc = main(argv=['.', 'test']) + assert rc == 0, 'Found errors' diff --git a/kuka_server/test/test_flake8.py b/kuka_server/test/test_flake8.py new file mode 100755 index 00000000..27ee1078 --- /dev/null +++ b/kuka_server/test/test_flake8.py @@ -0,0 +1,25 @@ +# Copyright 2017 Open Source Robotics Foundation, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ament_flake8.main import main_with_errors +import pytest + + +@pytest.mark.flake8 +@pytest.mark.linter +def test_flake8(): + rc, errors = main_with_errors(argv=[]) + assert rc == 0, \ + 'Found %d code style errors / warnings:\n' % len(errors) + \ + '\n'.join(errors) diff --git a/kuka_server/test/test_pep257.py b/kuka_server/test/test_pep257.py new file mode 100755 index 00000000..b234a384 --- /dev/null +++ b/kuka_server/test/test_pep257.py @@ -0,0 +1,23 @@ +# Copyright 2015 Open Source Robotics Foundation, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ament_pep257.main import main +import pytest + + +@pytest.mark.linter +@pytest.mark.pep257 +def test_pep257(): + rc = main(argv=['.', 'test']) + assert rc == 0, 'Found code style errors / warnings' diff --git a/lbr_fri_idl b/lbr_fri_idl new file mode 160000 index 00000000..7e32dc9d --- /dev/null +++ b/lbr_fri_idl @@ -0,0 +1 @@ +Subproject commit 7e32dc9d100e9ef7b675f79aaea1ad74f30cc426 diff --git a/lbr_fri_ros2_stack b/lbr_fri_ros2_stack new file mode 160000 index 00000000..8d550710 --- /dev/null +++ b/lbr_fri_ros2_stack @@ -0,0 +1 @@ +Subproject commit 8d550710146511d7aa54f3fcc38b981d4a3d369f diff --git a/serl_robot_infra/kuka_env/__init__.py b/serl_robot_infra/kuka_env/__init__.py new file mode 100644 index 00000000..4e90e1a5 --- /dev/null +++ b/serl_robot_infra/kuka_env/__init__.py @@ -0,0 +1,14 @@ +from gym.envs.registration import register +import numpy as np + +register( + id="KukaEnv-Vision-v0", + entry_point="kuka_env.envs:KukaEnv", + max_episode_steps=200, +) + +register( + id="KukaPegInsert-Vision-v0", + entry_point="kuka_env.envs.peg_env:KukaPegInsert", + max_episode_steps=200, +) diff --git a/serl_robot_infra/kuka_env/envs/kuka_env.py b/serl_robot_infra/kuka_env/envs/kuka_env.py new file mode 100644 index 00000000..3d85aa87 --- /dev/null +++ b/serl_robot_infra/kuka_env/envs/kuka_env.py @@ -0,0 +1,690 @@ +"""Gym Interface for Franka""" + +import sys +import time +import getpass + +username = getpass.getuser() +env = "serl" +print("Ensure to change anaconda3 or miniconda3 and change your environment name") +print("Conda Environment name is: ", env) +sys.path.append( + "/home/" + username + "/miniconda3/envs/" + env + "/lib/python3.10/site-packages" +) + +import numpy as np +import gym +import cv2 +import copy +from scipy.spatial.transform import Rotation +import requests +import queue +import threading +from datetime import datetime +from collections import OrderedDict +from typing import Dict +import logging +import os +import shutil + +sys.path.append("/home/rp/SERL/src/") +from serl_robot_infra.franka_env.camera.video_capture import VideoCapture +from serl_robot_infra.franka_env.camera.rs_capture import RSCapture +from serl_robot_infra.kuka_env.utils.rotations import euler_2_quat, quat_2_euler + + +# from kuka_server.kuka_server.robot_interface import RobotInterfaceNode + + +class ImageDisplayer(threading.Thread): + def __init__(self, queue): + threading.Thread.__init__(self) + self.queue = queue + self.daemon = True # make this a daemon thread + + def run(self): + while True: + img_array = self.queue.get() # retrieve an image from the queue + if img_array is None: # None is our signal to exit + break + + frame = np.concatenate( + [v for k, v in img_array.items() if "full" not in k], axis=0 + ) + + cv2.imshow("RealSense Cameras", frame) + cv2.waitKey(1) + + +############################################################################## + + +class DefaultEnvConfig: + """Default configuration for KukaEnv. Fill in the values below.""" + + ROBOT_IP: str = "192.168.10.122" + REALSENSE_CAMERAS: Dict = { + "wrist_1": "840412060409", + "wrist_2": "932122060300", + } + TARGET_POSE: np.ndarray = np.zeros((6,)) + REWARD_THRESHOLD: np.ndarray = np.zeros((6,)) + ACTION_SCALE = np.zeros((3,)) + RESET_POSE = np.zeros((6,)) + RANDOM_RESET = (False,) + RANDOM_XY_RANGE = (0.0,) + RANDOM_RZ_RANGE = (0.0,) + ABS_POSE_LIMIT_HIGH = np.zeros((6,)) + ABS_POSE_LIMIT_LOW = np.zeros((6,)) + COMPLIANCE_PARAM: Dict[str, float] = {} + PRECISION_PARAM: Dict[str, float] = {} + BINARY_GRIPPER_THREASHOLD: float = 0.5 + APPLY_GRIPPER_PENALTY: bool = True + GRIPPER_PENALTY: float = 0.1 + USE_GRIPPER: bool = False + + +############################################################################## + + +class KukaEnv(gym.Env): + def __init__( + self, + hz=10, + fake_env=False, + save_video=False, + config: DefaultEnvConfig = None, + max_episode_length=100, + robot_interface_node=None, + ): + print("Initializing KukaEnv") + self.robot_interface_node = robot_interface_node + self.action_scale = config.ACTION_SCALE + self._TARGET_POSE = config.TARGET_POSE + self._REWARD_THRESHOLD = config.REWARD_THRESHOLD + self.url = config.ROBOT_IP + self.config = config + self.max_episode_length = max_episode_length + self.max_episode_length = 200 + self.task_reward = 1.0 + + # convert last 3 elements from euler to quat, from size (6,) to (7,) + self.resetpos = np.concatenate( + [config.RESET_POSE[:3], euler_2_quat(config.RESET_POSE[3:])] + ) + + self.reset_joint_pos = config.RESET_JOINT_POSITION.copy() + + self.currpos = self.resetpos.copy() + self.currvel = np.zeros((6,)) + self.q = np.zeros((7,)) + self.dq = np.zeros((7,)) + self.currforce = np.zeros((3,)) + self.currtorque = np.zeros((3,)) + self.currjacobian = np.zeros((6, 7)) + + self.curr_gripper_pos = 0 + self.gripper_binary_state = 0 # 0 for open, 1 for closed + self.lastsent = time.time() + self.randomreset = config.RANDOM_RESET + self.random_xy_range = config.RANDOM_XY_RANGE + self.random_rz_range = config.RANDOM_RZ_RANGE + self.hz = hz + self.joint_reset_cycle = 200 # reset the robot joint every 200 cycles + + if save_video: + print("Saving videos!") + self.save_video = save_video + self.recording_frames = [] + + # nisara : Saving some important information here + if not fake_env: + timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + self.exp_folder = os.path.join('experiments', f"exp_{timestamp}") + os.makedirs(self.exp_folder, exist_ok=True) + + # Save the parameters (config.py) used for the experiment + config_path = '/home/rp/SERL/src/serl_robot_infra/kuka_env/envs/peg_env/config.py' + parameters_path = os.path.join(self.exp_folder, "parameters.txt") + shutil.copy2(config_path, parameters_path) + + # Initialize a reward over the whole episode + self.episode_max_reward = 0 + self.save_reward_stats = True + self.reward_stats_file = os.path.join(self.exp_folder, "reward_stats.txt") + + # Setting logging + self.logger = logging.getLogger() + self.logger.handlers.pop(0) + self.logger.setLevel(logging.DEBUG) + formatter = logging.Formatter('%(asctime)s | %(levelname)s | %(message)s') + file_handler = logging.FileHandler(os.path.join(self.exp_folder, 'reward_log.log')) + file_handler.setLevel(logging.INFO) + file_handler.setFormatter(formatter) + self.logger.addHandler(file_handler) + self.logger.propagate = False + + # boundary box + self.xyz_bounding_box = gym.spaces.Box( + config.ABS_POSE_LIMIT_LOW[:3], + config.ABS_POSE_LIMIT_HIGH[:3], + dtype=np.float64, + ) + self.rpy_bounding_box = gym.spaces.Box( + config.ABS_POSE_LIMIT_LOW[3:], + config.ABS_POSE_LIMIT_HIGH[3:], + dtype=np.float64, + ) + # Action/Observation Space + # TODO :: change to 6 + self.action_space = gym.spaces.Box( + np.ones((6,), dtype=np.float32) * -1, + np.ones((6,), dtype=np.float32), + ) + + self.observation_space = gym.spaces.Dict( + { + "state": gym.spaces.Dict( + { + "tcp_pose": gym.spaces.Box( + -np.inf, np.inf, shape=(7,) + ), # xyz + quat + "tcp_vel": gym.spaces.Box(-np.inf, np.inf, shape=(6,)), + "tcp_force": gym.spaces.Box(-np.inf, np.inf, shape=(3,)), + "tcp_torque": gym.spaces.Box(-np.inf, np.inf, shape=(3,)), + } + ), + "images": gym.spaces.Dict( + { + "wrist_1": gym.spaces.Box( + 0, 255, shape=(128, 128, 3), dtype=np.uint8 + ), + "wrist_2": gym.spaces.Box( + 0, 255, shape=(128, 128, 3), dtype=np.uint8 + ), + } + ), + } + ) + self.cycle_count = 0 + + if fake_env: + print("returning from fake env") + return + print("Initializing Cameras") + self.cap = None + self.init_cameras(config.REALSENSE_CAMERAS) + self.img_queue = queue.Queue() + self.displayer = ImageDisplayer(self.img_queue) + self.displayer.start() + self.use_gripper = config.USE_GRIPPER + print("Initialized Kuka") + + # Initialization for the sdf based reward + self.mesh_file_path = config.MESH_FILE_PATH + self.num_sampled_points = 1000 + self.voxel_size = 0.001 + import trimesh + import pysdf + self.og_peg_mesh = trimesh.load(self.mesh_file_path) + # The given mesh file is not in meters, convert it + self.og_peg_mesh.apply_scale(0.001) + + self.target_transform = np.eye(4) + self.target_transform[:3, 3] = self._TARGET_POSE[:3] + self.target_transform[:3, :3] = Rotation.from_euler("ZYX", self._TARGET_POSE[3:]).as_matrix() + self.transformed_peg_mesh = self.og_peg_mesh.copy() + self.transformed_peg_mesh.apply_transform(self.target_transform) + + vertices = np.array(self.transformed_peg_mesh.vertices, dtype=np.float32) + faces = np.array(self.transformed_peg_mesh.faces, dtype=np.uint32) + + self.sdf = pysdf.SDF(vertices, faces, robust = True) + self.local_surface_points, _ = trimesh.sample.sample_surface_even(self.transformed_peg_mesh, self.num_sampled_points) + self.local_surface_points = np.array(self.local_surface_points) + self.transform_target_inverse = np.linalg.inv(self.target_transform) + + + + + + def clip_safety_box(self, pose: np.ndarray) -> np.ndarray: + """Clip the pose to be within the safety box.""" + pose[:3] = np.clip( + pose[:3], self.xyz_bounding_box.low, self.xyz_bounding_box.high + ) + euler = Rotation.from_quat(pose[3:]).as_euler("ZYX") + + # Clip first two euler angles separately due to discontinuity from pi to -pi + # sign = np.sign(euler[2]) + # euler[2] = sign * ( + # np.clip( + # np.abs(euler[2]), + # self.rpy_bounding_box.low[2], + # self.rpy_bounding_box.high[2], + # ) + # ) + + # nisara : Clipping the B and C euler angles separately because they are at inflection point + + # A + euler[0] = ( + np.clip( + euler[0], + self.rpy_bounding_box.low[0], + self.rpy_bounding_box.high[0], + ) + ) + # B - The max range is -10 to +10 degrees + euler[1] = ( + np.clip( + euler[1], + self.rpy_bounding_box.low[1], + self.rpy_bounding_box.high[1], + ) + ) + # C - The max range is -170 to +170 degrees + sign_C = np.sign(euler[2]) + euler[2] = sign_C * ( + np.clip( + np.abs(euler[2]), + self.rpy_bounding_box.high[2], + np.abs(np.deg2rad(180)), + ) + ) + + pose[3:] = Rotation.from_euler("ZYX", euler).as_quat() + + return pose + + def step(self, action: np.ndarray) -> tuple: + """standard gym step function.""" + # print("In step function") + + start_time = time.time() + self.logger.info(f"Action before clipping: {action}") + action = np.clip(action, self.action_space.low, self.action_space.high) + xyz_delta = action[:3] + self.logger.info(f"Action after clipping ==xyz_delta==: {xyz_delta}") + + self.nextpos = self.currpos.copy() + # # nisara : Comment + # print("Current position in step: ", self.nextpos) + self.nextpos[:3] = self.nextpos[:3] + xyz_delta * self.action_scale[0] + + # GET ORIENTATION FROM ACTION + self.nextpos[3:] = Rotation.from_matrix( + ( + np.matmul( + Rotation.from_quat(self.currpos[3:]).as_matrix(), + Rotation.from_euler( + "ZYX", action[3:6] * self.action_scale[1] + ).as_matrix(), + ) + ) + ).as_quat() + + ##Remove gripper action NOTE: Omey + if self.use_gripper: + gripper_action = action[6] * self.action_scale[2] + gripper_action_effective = self._send_gripper_command(gripper_action) + + self._send_pos_command(self.clip_safety_box(self.nextpos)) + + # ## GO TO TARGET POSE FOR DEBUGGING + # tp = self._TARGET_POSE.copy() + # self.nextpos[:3] = tp[:3] + # self.nextpos[3:] = euler_2_quat(tp[3:]) + # self._send_pos_command(self.clip_safety_box(self.nextpos)) + # ## END DEBUGGING + + self.curr_path_length += 1 + dt = time.time() - start_time + time.sleep(max(0, (1.0 / self.hz) - dt)) + + self._update_currpos() + ob = self._get_obs() + if self.use_gripper: + reward = self.compute_reward(ob, gripper_action_effective) + else: + reward = self.compute_reward(ob) + + done = self.curr_path_length >= self.max_episode_length or reward == self.task_reward + return ob, reward, done, False, {} + + def compute_reward(self, obs, gripper_action_effective=None) -> bool: + """We are using a sparse reward function.""" + current_pose = obs["state"]["tcp_pose"] + + # convert from quat to euler first + euler_angles_degrees = Rotation.from_quat(current_pose[3:]).as_euler("ZYX", degrees=True) + euler_angles = quat_2_euler(current_pose[3:]) + + # nisara: IMPORTANT: Convert the target pose to absolute values too + target_pose = self._TARGET_POSE.copy() + + current_pose = np.hstack([current_pose[:3], euler_angles]) + curr_pose_print = np.hstack([current_pose[:3], euler_angles_degrees]) + + give_reward = np.zeros(6, dtype=bool) + difference = np.zeros(6) + + difference[:3] = np.abs(current_pose[:3] - target_pose[:3]) + give_reward[:3] = difference[:3] <= self._REWARD_THRESHOLD[:3] + + # Calculation of B and C's delta is going to be different + # For B, the range is -5 to +5 degrees + # For C, the range is -175 to -180 and +175 to +180 degrees + + ## A + + difference[3] = np.abs(current_pose[3] - target_pose[3]) + give_reward[3] = difference[3] <= self._REWARD_THRESHOLD[3] + + ## B + difference[4] = np.abs(current_pose[4] - target_pose[4]) + if difference[4] <= self._REWARD_THRESHOLD[4]: + give_reward[4] = 1 + + ## C + difference[5] = np.pi - np.abs(current_pose[5]) + if difference[5] <= self._REWARD_THRESHOLD[5]: + give_reward[5] = 1 + + if np.all(give_reward): + print("RECEIVED COMPLETE REWARD 1.0!!!!") + self.logger.info("RECEIVED COMPLETE REWARD 1.0!!!!") + self.logger.info(f"Received reward 1.0 at \n\tcurrent pose: {curr_pose_print} and \n\tdiff: {difference}") + reward = self.task_reward + + else: + ## DO i add reward for 0.099 = z-value, reward = 0.98 + if current_pose[2] <= 0.099: + reward = 0.98 + self.logger.info("Received surface reward 0.98") + else: + # SDF BASED REWARD + tcp_transform = np.eye(4) + tcp_transform[:3, 3] = current_pose[:3] + tcp_transform[:3, :3] = Rotation.from_euler("ZYX", current_pose[3:]).as_matrix() + transform_target_current = np.linalg.inv(tcp_transform) @ self.target_transform + tcp_rot_matrix = transform_target_current[:3, :3] + tcp_t = transform_target_current[:3, 3] + global_surface_points = (tcp_rot_matrix @ self.local_surface_points.T).T + tcp_t + + # Calculate the signed distance field + sdf_values = np.array([self.sdf(point) for point in global_surface_points]) + + # Calculate the reward based on the sdf values + reward = np.sqrt(np.mean(np.square(sdf_values))) + reward = 1.0 - (reward / 0.1) # Normalize the reward + reward = np.clip(reward, -1.0, 1.0) # Ensure the reward is between -1 and 1 + self.logger.info(f"Goal not reached, the difference is \n\tcurrent pose: {curr_pose_print} and \n\tdiff (in rad): {difference} ") + + """ Commenting out the existing reward function to make place for sdf + else: + # if current_pose[2] < 0.10000: + # # Let's give more reward if z is even lower + # self.logger.info("Received surface reward 0.6") + # reward = 0.7 + # else: + # if current_pose[2] < 0.10300: + # self.logger.info("Received surface reward 0.4") + # reward = 0.4 + # else: + # reward = 0.0 + + ## Give reward based on euclidean distance + euclidean_distance = np.linalg.norm(np.abs(current_pose) - np.abs(target_pose)) + ## The range of th edistance is 0.06 and 0.018 in demo buffer so we follow the same thing here + reward = np.abs(1.0 - (euclidean_distance / 0.06)) + self.logger.info(f"Goal not reached, the difference is \n\tcurrent pose: {curr_pose_print} and \n\tdiff (in rad): {difference} ") + """ + self.logger.info(f"Reward: {reward}") + self.logger.info("\n") + + # if self.config.APPLY_GRIPPER_PENALTY and gripper_action_effective: + # reward -= self.config.GRIPPER_PENALTY + + self.episode_max_reward = max(self.episode_max_reward, reward) + return reward + + def crop_image(self, name, image) -> np.ndarray: + """Crop realsense images to be a square.""" + if name == "wrist_1": + return image[:, 80:650, :] + elif name == "wrist_2": + return image[:, 80:650, :] + else: + return ValueError(f"Camera {name} not recognized in cropping") + + def get_im(self) -> Dict[str, np.ndarray]: + """Get images from the realsense cameras.""" + images = {} + display_images = {} + for key, cap in self.cap.items(): + try: + rgb = cap.read() + cropped_rgb = self.crop_image(key, rgb) + cropped_rgb = np.array(cropped_rgb) + resized = cv2.resize( + cropped_rgb, self.observation_space["images"][key].shape[:2][::-1] + ) + images[key] = resized[..., ::-1] + display_images[key] = resized + display_images[key + "_full"] = cropped_rgb + except queue.Empty: + input( + f"{key} camera frozen. Check connect, then press enter to relaunch..." + ) + cap.close() + self.init_cameras(self.config.REALSENSE_CAMERAS) + return self.get_im() + + self.recording_frames.append( + np.concatenate([display_images[f"{k}_full"] for k in self.cap], axis=0) + ) + self.img_queue.put(display_images) + return images + + def interpolate_move(self, goal: np.ndarray, timeout: float): + """Move the robot to the goal position with linear interpolation.""" + steps = int(timeout * self.hz) + self._update_currpos() + path = np.linspace(self.currpos, goal, steps) + for p in path: + self._send_pos_command(p) + time.sleep(1 / self.hz) + self._update_currpos() + + def go_to_rest(self, joint_reset=False): + """ + The concrete steps to perform reset should be + implemented each subclass for the specific task. + Should override this method if custom reset procedure is needed. + """ + # Change to precision mode for reset + # requests.post(self.url + "update_param", json=self.config.PRECISION_PARAM) + + # Perform Carteasian reset + if self.randomreset: # randomize reset position in xy plane + reset_pose = self.resetpos.copy() + reset_pose[:2] += np.random.uniform( + -self.random_xy_range, self.random_xy_range, (2,) + ) + euler_random = self._TARGET_POSE[3:].copy() + euler_random[-1] += np.random.uniform( + -self.random_rz_range, self.random_rz_range + ) + reset_pose[3:] = euler_2_quat(euler_random) + self._send_pos_command(reset_pose) + else: + reset_pose = self.resetpos.copy() + + # self._send_pos_command(reset_pose) + + # Reset to a specified joint position and not cartesian position to avoid singularity + reset_joint_pose = self.reset_joint_pos.copy() + self._send_reset_joint_command(reset_joint_pose) + + # Change to compliance mode + # requests.post(self.url + "update_param", json=self.config.COMPLIANCE_PARAM) + + def _send_reset_joint_command(self, joint_pos: np.ndarray): + """ + Internal function to send joint position command to the robot. + Make sure that the joint angles are in radians and not degrees. + """ + pos = np.array(joint_pos).astype(np.float32) + self.robot_interface_node.move_to_joint_pos(pos) + + + + def reset(self, joint_reset=False, **kwargs): + # requests.post(self.url + "update_param", json=self.config.COMPLIANCE_PARAM) + + # nisara: Have a maximum episode reward also and log that somewhere to maintain the increasing trend if any + + if self.save_reward_stats: + with open(self.reward_stats_file, "a") as f: + f.write(f"{self.episode_max_reward}\n") + self.logger.info(f"Max reward for the episode: {self.episode_max_reward}") + self.episode_max_reward = 0 + + self.logger.info(f"\nResetting the environment\n") + + if self.save_video: + self.save_video_recording() + + self.cycle_count += 1 + if self.cycle_count % self.joint_reset_cycle == 0: + self.cycle_count = 0 + joint_reset = True + + self.go_to_rest(joint_reset=joint_reset) + self.curr_path_length = 0 + + self._update_currpos() + obs = self._get_obs() + + return obs, {} + + def save_video_recording(self): + try: + if len(self.recording_frames): + video_writer = cv2.VideoWriter( + f'./videos/{datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.mp4', + cv2.VideoWriter_fourcc(*"mp4v"), + 10, + self.recording_frames[0].shape[:2][::-1], + ) + for frame in self.recording_frames: + video_writer.write(frame) + video_writer.release() + self.recording_frames.clear() + except Exception as e: + print(f"Failed to save video: {e}") + + def init_cameras(self, name_serial_dict=None): + """Init both wrist cameras.""" + if self.cap is not None: # close cameras if they are already open + self.close_cameras() + + self.cap = OrderedDict() + + for cam_name, cam_serial in name_serial_dict.items(): + cap = VideoCapture( + RSCapture(name=cam_name, serial_number=cam_serial, depth=False) + ) + self.cap[cam_name] = cap + + def close_cameras(self): + """Close both wrist cameras.""" + try: + for cap in self.cap.values(): + cap.close() + except Exception as e: + print(f"Failed to close cameras: {e}") + + def _recover(self): + """Internal function to recover the robot from error state.""" + print("Implement a function to recover from error state") + return + + def _send_pos_command(self, pos: np.ndarray): + """Internal function to send position command to the robot.""" + + np.set_printoptions(precision=5, suppress=True) + + # # nisara : Comment + # curr_pos_euler = Rotation.from_quat(self.currpos[3:]).as_euler("ZYX", degrees=True) + # print_curr_pos = np.concatenate([self.currpos[:3] * 1000, curr_pos_euler]) + # print("Current pose in _send_pos_command: ", print_curr_pos) + + # nisara : Comment + arr_euler = Rotation.from_quat(pos[3:]).as_euler("ZYX", degrees=True) + print_arr_pos = np.concatenate([pos[:3], arr_euler]) + self.logger.info(f"Sending position command to \n\tmove to pos: {print_arr_pos}") + + arr = np.array(pos).astype(np.float32) + + self.robot_interface_node.move_to_pose(arr, self.resetpos) + print("Done moving the robot") + + def _send_gripper_command(self, pos: float, mode="binary"): + """Internal function to send gripper command to the robot.""" + if mode == "binary": + if ( + pos <= -self.config.BINARY_GRIPPER_THREASHOLD + and self.gripper_binary_state == 0 + ): # close gripper + requests.post(self.url + "close_gripper") + time.sleep(0.6) + self.gripper_binary_state = 1 + return True + elif ( + pos >= self.config.BINARY_GRIPPER_THREASHOLD + and self.gripper_binary_state == 1 + ): # open gripper + requests.post(self.url + "open_gripper") + time.sleep(0.6) + self.gripper_binary_state = 0 + return True + else: # do nothing to the gripper + return False + elif mode == "continuous": + raise NotImplementedError("Continuous gripper control is optional") + + def _update_currpos(self): + """ + Internal function to get the latest state of the robot and its gripper. + """ + ps = self.robot_interface_node.get_current_state() + self.currpos[:] = np.array(ps["pose"], dtype=np.float32) + self.currvel[:] = np.array(ps["vel"], dtype=np.float32) + + self.currforce[:] = np.array(ps["force"], dtype=np.float32) + self.currtorque[:] = np.array(ps["torque"], dtype=np.float32) + self.currjacobian[:] = np.reshape( + np.array(ps["jacobian"], dtype=np.float32), (6, 7) + ) + + self.q[:] = np.array(ps["q"], dtype=np.float32) + self.dq[:] = np.array(ps["dq"], dtype=np.float32) + + if self.use_gripper: + self.curr_gripper_pos = np.array(ps["gripper_pos"]) + + def _get_obs(self) -> dict: + images = self.get_im() + state_observation = { + "tcp_pose": self.currpos, + "tcp_vel": self.currvel, + "tcp_force": self.currforce, + "tcp_torque": self.currtorque, + } + return copy.deepcopy(dict(images=images, state=state_observation)) + + +if __name__ == "__main__": + env = gym.make("KukaEnv") diff --git a/serl_robot_infra/kuka_env/envs/peg_env/__init__.py b/serl_robot_infra/kuka_env/envs/peg_env/__init__.py new file mode 100644 index 00000000..fdb1bb44 --- /dev/null +++ b/serl_robot_infra/kuka_env/envs/peg_env/__init__.py @@ -0,0 +1 @@ +from kuka_env.envs.peg_env.kuka_peg_insert import KukaPegInsert diff --git a/serl_robot_infra/kuka_env/envs/peg_env/config.py b/serl_robot_infra/kuka_env/envs/peg_env/config.py new file mode 100644 index 00000000..de19db07 --- /dev/null +++ b/serl_robot_infra/kuka_env/envs/peg_env/config.py @@ -0,0 +1,134 @@ +import numpy as np +from franka_env.envs.franka_env import DefaultEnvConfig + + +class PegEnvConfig(DefaultEnvConfig): + """Set the configuration for FrankaEnv.""" + + ROBOT_IP: str = "192.168.10.122" + REALSENSE_CAMERAS = { + "wrist_1": "932122060300", + "wrist_2": "023322060732", + } + MESH_FILE_PATH = "/home/rp/SERL/src/examples/async_peg_insert_drq/rect_peg/SERL_rectangle v1.stl" + TARGET_POSE = np.array( + [ + 0.77049, + -0.04725, + 0.09370, + -1.5875515, + 0.009599311, + -3.13583307 + ] + ) + # nisara - P2 + RESET_POSE = np.array( + [ + 0.77644, + -0.04044, + 0.10319, + -1.5510741, + 0.009599311, + -3.13583307 + ] + ) + # RESET JOINT POSITION + # MAKE SURE IT IS: A1, A2, A4, A3, A5, A6, A7 + # Radians + ## From the topic: + # RESET_JOINT_POSITION = np.array( + # [ + # -0.010134220158701582, + # 1.2094114255907469, + # -0.6516944804629864, + # -0.14664185220364415, + # 0.15364255700526772, + # 1.2824023060585676, + # -1.6960934354739192, + # ] + # ) + ## From the pendant: + RESET_JOINT_POSITION = np.array( + [ + -0.01012291, + 1.209513, + -0.65170594, + -0.146608, + 0.1534144, + 1.2824679, + -1.696111 + ] + ) + REWARD_THRESHOLD: np.ndarray = np.array( + [0.0035, 0.0035, 0.001, 0.174533, 0.174533, 0.174533] + ) + APPLY_GRIPPER_PENALTY = False + ACTION_SCALE = np.array([0.005, 0.01, 1]) # action_scale[1] defines the scaling in radians + RANDOM_RESET = False + RANDOM_XY_RANGE = 0.008 # only deviates 8mm from target pose + RANDOM_RZ_RANGE = np.pi / 6 + USE_GRIPPER: bool = False + ABS_POSE_LIMIT_LOW = np.array( + [ + TARGET_POSE[0] - RANDOM_XY_RANGE, + TARGET_POSE[1] - RANDOM_XY_RANGE, + TARGET_POSE[2], + TARGET_POSE[3] - RANDOM_RZ_RANGE, + # Least value of B is -10 degrees + -0.174533, + # Least value of C is -170 degrees + -2.96706 + ] + ) + ABS_POSE_LIMIT_HIGH = np.array( + [ + TARGET_POSE[0] + RANDOM_XY_RANGE, + TARGET_POSE[1] + RANDOM_XY_RANGE, + TARGET_POSE[2] + 0.012, + TARGET_POSE[3] + RANDOM_RZ_RANGE, + # Least value of B is +10 degrees + 0.174533, + # Least value of C is +170 degrees + 2.96706, + ] + ) + COMPLIANCE_PARAM = { + "translational_stiffness": 1000, + "translational_damping": 0.89, + "rotational_stiffness": 150, + "rotational_damping": 0.7, + "translational_Ki": 0, + "translational_clip_x": 0.003, + "translational_clip_y": 0.003, + "translational_clip_z": 0.01, + "translational_clip_neg_x": 0.003, + "translational_clip_neg_y": 0.003, + "translational_clip_neg_z": 0.01, + "rotational_clip_x": 0.02, + "rotational_clip_y": 0.02, + "rotational_clip_z": 0.02, + "rotational_clip_neg_x": 0.02, + "rotational_clip_neg_y": 0.02, + "rotational_clip_neg_z": 0.02, + "rotational_Ki": 0, + } + PRECISION_PARAM = { + "translational_stiffness": 3000, + "translational_damping": 0.89, + "rotational_stiffness": 300, + "rotational_damping": 0.9, + "translational_Ki": 0.1, + "translational_clip_x": 0.01, + "translational_clip_y": 0.01, + "translational_clip_z": 0.01, + "translational_clip_neg_x": 0.01, + "translational_clip_neg_y": 0.01, + "translational_clip_neg_z": 0.01, + "rotational_clip_x": 0.05, + "rotational_clip_y": 0.05, + "rotational_clip_z": 0.05, + "rotational_clip_neg_x": 0.05, + "rotational_clip_neg_y": 0.05, + "rotational_clip_neg_z": 0.05, + "rotational_Ki": 0.1, + } diff --git a/serl_robot_infra/kuka_env/envs/peg_env/kuka_peg_insert.py b/serl_robot_infra/kuka_env/envs/peg_env/kuka_peg_insert.py new file mode 100644 index 00000000..691e3935 --- /dev/null +++ b/serl_robot_infra/kuka_env/envs/peg_env/kuka_peg_insert.py @@ -0,0 +1,34 @@ +import numpy as np +import gym +import time +import requests +import copy + +from kuka_env.envs.kuka_env import KukaEnv +from franka_env.utils.rotations import euler_2_quat +from kuka_env.envs.peg_env.config import PegEnvConfig + + +class KukaPegInsert(KukaEnv): + def __init__(self, **kwargs): + super().__init__(**kwargs, config=PegEnvConfig) + + def go_to_rest(self, joint_reset=False): + """ + Move to the rest position defined in base class. + Add a small z offset before going to rest to avoid collision with object. + """ + # self._send_gripper_command(-1) + self._update_currpos() + self._send_pos_command(self.currpos) + time.sleep(0.5) + + # Move up to clear the slot + self._update_currpos() + reset_pose = copy.deepcopy(self.currpos) + reset_pose[2] += 0.05 + self._send_pos_command(reset_pose) + + # execute the go_to_rest method from the parent class + time.sleep(1) + super().go_to_rest(joint_reset) diff --git a/serl_robot_infra/kuka_env/envs/relative_env.py b/serl_robot_infra/kuka_env/envs/relative_env.py new file mode 100644 index 00000000..bec6b120 --- /dev/null +++ b/serl_robot_infra/kuka_env/envs/relative_env.py @@ -0,0 +1,109 @@ +from scipy.spatial.transform import Rotation as R +import gym +import numpy as np +from gym import Env +from serl_robot_infra.franka_env.utils.transformations import ( + construct_adjoint_matrix, + construct_homogeneous_matrix, +) + + +class RelativeFrame(gym.Wrapper): + """ + This wrapper transforms the observation and action to be expressed in the end-effector frame. + Optionally, it can transform the tcp_pose into a relative frame defined as the reset pose. + + This wrapper is expected to be used on top of the base Franka environment, which has the following + observation space: + { + "state": spaces.Dict( + { + "tcp_pose": spaces.Box(-np.inf, np.inf, shape=(7,)), # xyz + quat + ...... + } + ), + ...... + }, and at least 6 DoF action space with (x, y, z, rx, ry, rz, ...). + By convention, the 7th dimension of the action space is used for the gripper. + + """ + + def __init__(self, env: Env, include_relative_pose=True): + super().__init__(env) + self.adjoint_matrix = np.zeros((6, 6)) + + self.include_relative_pose = include_relative_pose + if self.include_relative_pose: + # Homogeneous transformation matrix from reset pose's relative frame to base frame + self.T_r_o_inv = np.zeros((4, 4)) + + def step(self, action: np.ndarray): + # action is assumed to be (x, y, z, rx, ry, rz, gripper) + # Transform action from end-effector frame to base frame + transformed_action = self.transform_action(action) + + obs, reward, done, truncated, info = self.env.step(transformed_action) + + # this is to convert the spacemouse intervention action + if "intervene_action" in info: + info["intervene_action"] = self.transform_action_inv( + info["intervene_action"] + ) + + # Update adjoint matrix + self.adjoint_matrix = construct_adjoint_matrix(obs["state"]["tcp_pose"]) + + # Transform observation to spatial frame + transformed_obs = self.transform_observation(obs) + return transformed_obs, reward, done, truncated, info + + def reset(self, **kwargs): + obs, info = self.env.reset(**kwargs) + + # Update adjoint matrix + self.adjoint_matrix = construct_adjoint_matrix(obs["state"]["tcp_pose"]) + if self.include_relative_pose: + # Update transformation matrix from the reset pose's relative frame to base frame + self.T_r_o_inv = np.linalg.inv( + construct_homogeneous_matrix(obs["state"]["tcp_pose"]) + ) + + # Transform observation to spatial frame + return self.transform_observation(obs), info + + def transform_observation(self, obs): + """ + Transform observations from spatial(base) frame into body(end-effector) frame + using the adjoint matrix + """ + adjoint_inv = np.linalg.inv(self.adjoint_matrix) + obs["state"]["tcp_vel"] = adjoint_inv @ obs["state"]["tcp_vel"] + + if self.include_relative_pose: + T_b_o = construct_homogeneous_matrix(obs["state"]["tcp_pose"]) + T_b_r = self.T_r_o_inv @ T_b_o + + # Reconstruct transformed tcp_pose vector + p_b_r = T_b_r[:3, 3] + theta_b_r = R.from_matrix(T_b_r[:3, :3]).as_quat() + obs["state"]["tcp_pose"] = np.concatenate((p_b_r, theta_b_r)) + + return obs + + def transform_action(self, action: np.ndarray): + """ + Transform action from body(end-effector) frame into into spatial(base) frame + using the adjoint matrix + """ + action = np.array(action) # in case action is a jax read-only array + action[:6] = self.adjoint_matrix @ action[:6] + return action + + def transform_action_inv(self, action: np.ndarray): + """ + Transform action from spatial(base) frame into body(end-effector) frame + using the adjoint matrix. + """ + action = np.array(action) + action[:6] = np.linalg.inv(self.adjoint_matrix) @ action[:6] + return action diff --git a/serl_robot_infra/kuka_env/envs/wrappers.py b/serl_robot_infra/kuka_env/envs/wrappers.py new file mode 100644 index 00000000..321b813f --- /dev/null +++ b/serl_robot_infra/kuka_env/envs/wrappers.py @@ -0,0 +1,221 @@ +import time +from gym import Env, spaces +import gym +import numpy as np +from gym.spaces import Box +import copy +from franka_env.spacemouse.spacemouse_expert import SpaceMouseExpert +from kuka_env.utils.rotations import quat_2_euler + +sigmoid = lambda x: 1 / (1 + np.exp(-x)) + + +class FWBWFrontCameraBinaryRewardClassifierWrapper(gym.Wrapper): + """ + This wrapper uses the front camera images to compute the reward, + which is not part of the RL policy's observation space. This is used for the + forward backward reset-free bin picking task, where there are two classifiers, + one for classifying success + failure for the forward and one for the + backward task. Here we also use these two classifiers to decide which + task to transition into next at the end of the episode to maximize the + learning efficiency. + """ + + def __init__(self, env: Env, fw_reward_classifier_func, bw_reward_classifier_func): + # check if env.task_id exists + assert hasattr(env, "task_id"), "fwbw env must have task_idx attribute" + assert hasattr(env, "task_graph"), "fwbw env must have a task_graph method" + + super().__init__(env) + self.reward_classifier_funcs = [ + fw_reward_classifier_func, + bw_reward_classifier_func, + ] + + def task_graph(self, obs): + """ + predict the next task to transition into based on the current observation + if the current task is not successful, stay in the current task + else transition to the next task + """ + success = self.compute_reward(obs) + if success: + return (self.task_id + 1) % 2 + return self.task_id + + def compute_reward(self, obs): + reward = self.reward_classifier_funcs[self.task_id](obs).item() + return (sigmoid(reward) >= 0.5) * 1 + + def step(self, action): + obs, rew, done, truncated, info = self.env.step(action) + success = self.compute_reward(self.env.get_front_cam_obs()) + rew += success + done = done or success + return obs, rew, done, truncated, info + + +class FrontCameraBinaryRewardClassifierWrapper(gym.Wrapper): + """ + This wrapper uses the front camera images to compute the reward, + which is not part of the observation space + """ + + def __init__(self, env: Env, reward_classifier_func): + super().__init__(env) + self.reward_classifier_func = reward_classifier_func + + def compute_reward(self, obs): + if self.reward_classifier_func is not None: + logit = self.reward_classifier_func(obs).item() + return (sigmoid(logit) >= 0.5) * 1 + return 0 + + def step(self, action): + obs, rew, done, truncated, info = self.env.step(action) + success = self.compute_reward(self.env.get_front_cam_obs()) + rew += success + done = done or success + return obs, rew, done, truncated, info + + +class BinaryRewardClassifierWrapper(gym.Wrapper): + """ + Compute reward with custom binary reward classifier fn + """ + + def __init__(self, env: Env, reward_classifier_func): + super().__init__(env) + self.reward_classifier_func = reward_classifier_func + + def compute_reward(self, obs): + if self.reward_classifier_func is not None: + logit = self.reward_classifier_func(obs).item() + return (sigmoid(logit) >= 0.5) * 1 + return 0 + + def step(self, action): + obs, rew, done, truncated, info = self.env.step(action) + success = self.compute_reward(obs) + rew += success + done = done or success + return obs, rew, done, truncated, info + + +class ZOnlyWrapper(gym.ObservationWrapper): + """ + Removal of X and Y coordinates + """ + + def __init__(self, env: Env): + super().__init__(env) + self.observation_space["state"] = spaces.Box(-np.inf, np.inf, shape=(14,)) + + def observation(self, observation): + observation["state"] = np.concatenate( + ( + observation["state"][:4], + np.array(observation["state"][6])[..., None], + observation["state"][10:], + ), + axis=-1, + ) + return observation + + +class Quat2EulerWrapper(gym.ObservationWrapper): + """ + Convert the quaternion representation of the tcp pose to euler angles + """ + + def __init__(self, env: Env): + super().__init__(env) + # from xyz + quat to xyz + euler + self.observation_space["state"]["tcp_pose"] = spaces.Box( + -np.inf, np.inf, shape=(6,) + ) + + def observation(self, observation): + # convert tcp pose from quat to euler + tcp_pose = observation["state"]["tcp_pose"] + observation["state"]["tcp_pose"] = np.concatenate( + (tcp_pose[:3], quat_2_euler(tcp_pose[3:])) + ) + return observation + + +class GripperCloseEnv(gym.ActionWrapper): + """ + Use this wrapper to task that requires the gripper to be closed + """ + + def __init__(self, env): + super().__init__(env) + ub = self.env.action_space + assert ub.shape == (7,) + self.action_space = Box(ub.low[:6], ub.high[:6]) + + def action(self, action: np.ndarray) -> np.ndarray: + new_action = np.zeros((7,), dtype=np.float32) + new_action[:6] = action.copy() + return new_action + + def step(self, action): + new_action = self.action(action) + obs, rew, done, truncated, info = self.env.step(new_action) + if "intervene_action" in info: + info["intervene_action"] = info["intervene_action"][:6] + return obs, rew, done, truncated, info + + +class SpacemouseIntervention(gym.ActionWrapper): + def __init__(self, env): + super().__init__(env) + + self.gripper_enabled = True + if self.action_space.shape == (6,): + self.gripper_enabled = False + + self.expert = SpaceMouseExpert() + self.last_intervene = 0 + self.left, self.right = False, False + + def action(self, action: np.ndarray) -> np.ndarray: + """ + Input: + - action: policy action + Output: + - action: spacemouse action if nonezero; else, policy action + """ + expert_a, buttons = self.expert.get_action() + self.left, self.right = tuple(buttons) + + if np.linalg.norm(expert_a) > 0.001: + self.last_intervene = time.time() + + if self.gripper_enabled: + if self.left: # close gripper + gripper_action = np.random.uniform(-1, -0.9, size=(1,)) + self.last_intervene = time.time() + elif self.right: # open gripper + gripper_action = np.random.uniform(0.9, 1, size=(1,)) + self.last_intervene = time.time() + else: + gripper_action = np.zeros((1,)) + expert_a = np.concatenate((expert_a, gripper_action), axis=0) + + if time.time() - self.last_intervene < 0.5: + return expert_a, True + + return action, False + + def step(self, action): + + new_action, replaced = self.action(action) + + obs, rew, done, truncated, info = self.env.step(new_action) + if replaced: + info["intervene_action"] = new_action + info["left"] = self.left + info["right"] = self.right + return obs, rew, done, truncated, info diff --git a/serl_robot_infra/kuka_env/utils/rotations.py b/serl_robot_infra/kuka_env/utils/rotations.py new file mode 100644 index 00000000..a8487be6 --- /dev/null +++ b/serl_robot_infra/kuka_env/utils/rotations.py @@ -0,0 +1,11 @@ +from scipy.spatial.transform import Rotation as R + + +def quat_2_euler(quat, degrees=False): + """calculates and returns: yaw, pitch, roll from given quaternion""" + return R.from_quat(quat).as_euler("ZYX", degrees=degrees) + + +def euler_2_quat(xyz_as_abc): + q = R.from_euler("ZYX", xyz_as_abc).as_quat() + return q