diff --git a/envs/__pycache__/__init__.cpython-312.pyc b/envs/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..918e01e Binary files /dev/null and b/envs/__pycache__/__init__.cpython-312.pyc differ diff --git a/envs/__pycache__/simple_biped_env.cpython-312.pyc b/envs/__pycache__/simple_biped_env.cpython-312.pyc new file mode 100644 index 0000000..673bc88 Binary files /dev/null and b/envs/__pycache__/simple_biped_env.cpython-312.pyc differ diff --git a/envs/simple_biped_env.py b/envs/simple_biped_env.py new file mode 100644 index 0000000..9b5b918 --- /dev/null +++ b/envs/simple_biped_env.py @@ -0,0 +1,111 @@ +import gymnasium as gym +from gymnasium import spaces +import pybullet as p +import pybullet_data +import numpy as np + +class simpleBipedEnv(gym.Env): + def __init__(self, render=False, max_steps=10000): + super(simpleBipedEnv, self).__init__() + self.render_mode = render + self.physicsClient = p.connect(p.GUI if render else p.DIRECT) + self.robot_id = p.loadURDF("walkers/simple_biped.urdf", [0, 0, 1.0]) + p.setAdditionalSearchPath(pybullet_data.getDataPath()) + + num_joints = 4 # left_hip, right_hip, left_knee, right_knee + self.action_space = spaces.Box(low=-1, high=1, shape=(num_joints,), dtype=np.float32) + self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2*num_joints + 6,), dtype=np.float32) + self.max_steps = max_steps + self.current_step = 0 + self.last_action = None + + + + def reset(self, seed=None, options=None): + p.resetBasePositionAndOrientation(self.robot_id, [0,0,1], [0,0,0,1]) + p.setGravity(0, 0, -9.8) + plane_id = p.loadURDF("plane.urdf") + base_pos = p.getBasePositionAndOrientation(self.robot_id)[0] + self.current_step = 0 + return self._get_obs(), {} + + def step(self, action): + num_joints = p.getNumJoints(self.robot_id) + if np.isscalar(action) or (isinstance(action, np.ndarray) and action.shape == (1,)): + action = np.full((num_joints,), action if np.isscalar(action) else action[0], dtype=np.float32) + else: + action = np.asarray(action, dtype=np.float32) + if action.shape[0] != num_joints: + raise ValueError(f"Action shape {action.shape} does not match number of joints {num_joints}") + + self.last_action = action + + # Get current joint positions + joint_states = p.getJointStates(self.robot_id, range(num_joints)) + current_positions = np.array([state[0] for state in joint_states], dtype=np.float32) + + # Apply relative change to joint angles + delta = action * 0.05 # scale step size as needed + target_positions = current_positions + delta + + p.setJointMotorControlArray( + bodyUniqueId=self.robot_id, + jointIndices=list(range(num_joints)), + controlMode=p.POSITION_CONTROL, + targetPositions=target_positions, + forces=[200] * num_joints # max force + ) + p.stepSimulation() + obs = self._get_obs() + reward = self._compute_reward() + done = self._check_termination() + + self.current_step += 1 + truncated = self.current_step >= self.max_steps + + return obs, reward, done, truncated, {} + + def _get_obs(self): + joint_states = p.getJointStates(self.robot_id, range(p.getNumJoints(self.robot_id))) + joint_positions = [state[0] for state in joint_states] + joint_velocities = [state[1] for state in joint_states] + base_pos, base_ori = p.getBasePositionAndOrientation(self.robot_id) + base_vel, base_ang_vel = p.getBaseVelocity(self.robot_id) + return np.array(joint_positions + joint_velocities + list(base_pos) + list(base_vel), dtype=np.float32) + + + def _check_termination(self): + base_pos = p.getBasePositionAndOrientation(self.robot_id)[0] + return base_pos[2] < 0.2 or base_pos[2] > 1.5 # fallen + + def _compute_reward(self): + base_pos, _ = p.getBasePositionAndOrientation(self.robot_id) + base_vel, _ = p.getBaseVelocity(self.robot_id) + + forward_reward = base_vel[0] # reward x velocity + if not self._check_termination(): + alive_bonus = 1 + else: + alive_bonus = -500 + torque_penalty = 0.001 * np.sum(np.square(self.last_action)) + + #print(forward_reward, alive_bonus, torque_penalty) + + return forward_reward + alive_bonus - torque_penalty + + + def render(self): + if self.render_mode: + if hasattr(self, 'robot_id'): + base_pos = p.getBasePositionAndOrientation(self.robot_id)[0] + p.resetDebugVisualizerCamera( + cameraDistance=2.0, + cameraYaw=45, + cameraPitch=-30, + cameraTargetPosition=base_pos + ) + else: + pass + + def close(self): + p.disconnect() diff --git a/envs/test.py b/envs/test.py new file mode 100644 index 0000000..c238b28 --- /dev/null +++ b/envs/test.py @@ -0,0 +1,63 @@ +import sys +import os +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +from stable_baselines3 import DDPG, PPO +from envs.simple_biped_env import simpleBipedEnv +from stable_baselines3.common.vec_env import DummyVecEnv +import gymnasium as gym +from gymnasium.envs.registration import register + +from simple_biped_env import simpleBipedEnv + +register( + id='simpleBiped-v0', + entry_point='envs.simple_biped_env:simpleBipedEnv', + max_episode_steps=10000, +) + +def train(): + LOG_DIR = "./logs/" + env = gym.make('simpleBiped-v0', render=False) + model = DDPG("MlpPolicy", env, verbose=1, device="auto", tensorboard_log=LOG_DIR) + model.learn(total_timesteps=10000000) + model.save("../models/humanoid_ppo") + env.close() + + + +def run(): + env = DummyVecEnv([lambda: simpleBipedEnv(render=True)]) + model = PPO.load("models/humanoid_ppo", env=env) + for _ in range(5): # Run 3 episodes + result = env.reset() + if isinstance(result, tuple): + obs, info = result + else: + obs = result + info = {} + + done = False + total_reward = 0.0 + while not done: + action, _ = model.predict(obs) + obs, reward, done, _ = env.step(action) + total_reward += reward + print(f"Episode finished. Total reward: {total_reward}") + env.close() + +def test(): + env = simpleBipedEnv(render=True) + for _ in range(10): # Run 1 test episode + obs = env.reset()[0] + done = False + total_reward = 0.0 + while not done: + action = env.action_space.sample() # Random action for testing + obs, reward, done, _, _ = env.step(action) + total_reward += reward + print(f"Test episode finished. Total reward: {total_reward}") + env.close() + +train() +#run() +#test() diff --git a/envs/walker_env.py b/envs/walker_env.py index 1716587..e69de29 100644 --- a/envs/walker_env.py +++ b/envs/walker_env.py @@ -1,68 +0,0 @@ -import gymnasium as gym -from gymnasium import spaces -import pybullet as p -import pybullet_data -import numpy as np - -class WalkerEnv(gym.Env): - def __init__(self, render=False): - super(WalkerEnv, self).__init__() - self.render_mode = render - self.physicsClient = p.connect(p.GUI if render else p.DIRECT) - p.setAdditionalSearchPath(pybullet_data.getDataPath()) - - self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32) - self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float32) - - def reset(self, seed=None, options=None): - p.resetSimulation() - p.setGravity(0, 0, -9.8) - plane_id = p.loadURDF("plane.urdf") - self.robot_id = p.loadURDF("walkers/simple_biped.urdf", [0, 0, 1.0]) - base_pos = p.getBasePositionAndOrientation(self.robot_id)[0] - print(f"Robot spawned at position: {base_pos}") - return self._get_obs(), {} - - def step(self, action): - # apply motor torques or joint control here - # p.setJointMotorControlArray(...) - p.stepSimulation() - obs = self._get_obs() - reward = self._compute_reward() - done = self._check_termination() - return obs, reward, done, False, {} - - def _get_obs(self): - joint_states = p.getJointStates(self.robot_id, range(p.getNumJoints(self.robot_id))) - joint_positions = [state[0] for state in joint_states] - joint_velocities = [state[1] for state in joint_states] - # For simple_biped.urdf, only one joint: hip_joint - # So obs = [position, velocity], shape=(2,) - return np.array(joint_positions + joint_velocities, dtype=np.float32) - - def _compute_reward(self): - # reward forward movement, penalize falls - base_pos = p.getBasePositionAndOrientation(self.robot_id)[0] - return base_pos[0] # reward forward x-motion - - def _check_termination(self): - base_pos = p.getBasePositionAndOrientation(self.robot_id)[0] - return base_pos[2] < 0.5 # fallen - - def render(self): - if self.render_mode: - # Focus camera on robot's current position - if hasattr(self, 'robot_id'): - base_pos = p.getBasePositionAndOrientation(self.robot_id)[0] - p.resetDebugVisualizerCamera( - cameraDistance=2.0, - cameraYaw=45, - cameraPitch=-30, - cameraTargetPosition=base_pos - ) - # Rendering handled by PyBullet GUI - else: - pass # No rendering in DIRECT mode - - def close(self): - p.disconnect() diff --git a/logs/DDPG_1/events.out.tfevents.1756720705.LAPTOP-U4CU9OKR.12120.0 b/logs/DDPG_1/events.out.tfevents.1756720705.LAPTOP-U4CU9OKR.12120.0 new file mode 100644 index 0000000..6d5134e Binary files /dev/null and b/logs/DDPG_1/events.out.tfevents.1756720705.LAPTOP-U4CU9OKR.12120.0 differ diff --git a/logs/PPO_1/events.out.tfevents.1756717278.LAPTOP-U4CU9OKR.26564.0 b/logs/PPO_1/events.out.tfevents.1756717278.LAPTOP-U4CU9OKR.26564.0 new file mode 100644 index 0000000..2009ec2 Binary files /dev/null and b/logs/PPO_1/events.out.tfevents.1756717278.LAPTOP-U4CU9OKR.26564.0 differ diff --git a/logs/PPO_2/events.out.tfevents.1756718026.LAPTOP-U4CU9OKR.25076.0 b/logs/PPO_2/events.out.tfevents.1756718026.LAPTOP-U4CU9OKR.25076.0 new file mode 100644 index 0000000..524c52e Binary files /dev/null and b/logs/PPO_2/events.out.tfevents.1756718026.LAPTOP-U4CU9OKR.25076.0 differ diff --git a/logs/PPO_3/events.out.tfevents.1756718088.LAPTOP-U4CU9OKR.12380.0 b/logs/PPO_3/events.out.tfevents.1756718088.LAPTOP-U4CU9OKR.12380.0 new file mode 100644 index 0000000..a6a66f7 Binary files /dev/null and b/logs/PPO_3/events.out.tfevents.1756718088.LAPTOP-U4CU9OKR.12380.0 differ diff --git a/models/humanoid_ppo.zip b/models/humanoid_ppo.zip index e631778..e3614c4 100644 Binary files a/models/humanoid_ppo.zip and b/models/humanoid_ppo.zip differ diff --git a/models/test.py b/models/test.py deleted file mode 100644 index 4d972ae..0000000 --- a/models/test.py +++ /dev/null @@ -1,21 +0,0 @@ -import sys -import os -sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) -from stable_baselines3 import PPO -from envs.walker_env import WalkerEnv -from stable_baselines3.common.vec_env import DummyVecEnv - -env = DummyVecEnv([lambda: WalkerEnv(render=False)]) -model = PPO("MlpPolicy", env, verbose=1) -model.learn(total_timesteps=5000) -model.save("models/humanoid_ppo") - - -env = WalkerEnv(render=True) -obs, _ = env.reset() -done = False -while not done: - action, _ = model.predict(obs) - obs, reward, done, _, _ = env.step(action) - -input("Press Enter to exit and close the visualization window...") diff --git a/walkers/simple_biped.urdf b/walkers/simple_biped.urdf index 9753348..45435fa 100644 --- a/walkers/simple_biped.urdf +++ b/walkers/simple_biped.urdf @@ -1,25 +1,153 @@ - - + + + + + + + + + + + + + + + + + + + - - + + + + + + - + + + + + + + + + + + + + + + + + - - - - - - - + + + + + + + + + + + + + + + + + + + + + - + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +