diff --git a/envs/__pycache__/__init__.cpython-312.pyc b/envs/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000..918e01e
Binary files /dev/null and b/envs/__pycache__/__init__.cpython-312.pyc differ
diff --git a/envs/__pycache__/simple_biped_env.cpython-312.pyc b/envs/__pycache__/simple_biped_env.cpython-312.pyc
new file mode 100644
index 0000000..673bc88
Binary files /dev/null and b/envs/__pycache__/simple_biped_env.cpython-312.pyc differ
diff --git a/envs/simple_biped_env.py b/envs/simple_biped_env.py
new file mode 100644
index 0000000..9b5b918
--- /dev/null
+++ b/envs/simple_biped_env.py
@@ -0,0 +1,111 @@
+import gymnasium as gym
+from gymnasium import spaces
+import pybullet as p
+import pybullet_data
+import numpy as np
+
+class simpleBipedEnv(gym.Env):
+    def __init__(self, render=False, max_steps=10000):
+        super(simpleBipedEnv, self).__init__()
+        self.render_mode = render
+        self.physicsClient = p.connect(p.GUI if render else p.DIRECT)
+        self.robot_id = p.loadURDF("walkers/simple_biped.urdf", [0, 0, 1.0])
+        p.setAdditionalSearchPath(pybullet_data.getDataPath())
+
+        num_joints = 4  # left_hip, right_hip, left_knee, right_knee
+        self.action_space = spaces.Box(low=-1, high=1, shape=(num_joints,), dtype=np.float32)
+        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2*num_joints + 6,), dtype=np.float32)
+        self.max_steps = max_steps
+        self.current_step = 0
+        self.last_action = None
+
+
+
+    def reset(self, seed=None, options=None):
+        p.resetBasePositionAndOrientation(self.robot_id, [0,0,1], [0,0,0,1])
+        p.setGravity(0, 0, -9.8)
+        plane_id = p.loadURDF("plane.urdf")
+        base_pos = p.getBasePositionAndOrientation(self.robot_id)[0]
+        self.current_step = 0
+        return self._get_obs(), {}
+
+    def step(self, action):
+        num_joints = p.getNumJoints(self.robot_id)
+        if np.isscalar(action) or (isinstance(action, np.ndarray) and action.shape == (1,)):
+            action = np.full((num_joints,), action if np.isscalar(action) else action[0], dtype=np.float32)
+        else:
+            action = np.asarray(action, dtype=np.float32)
+            if action.shape[0] != num_joints:
+                raise ValueError(f"Action shape {action.shape} does not match number of joints {num_joints}")
+
+        self.last_action = action
+
+        # Get current joint positions
+        joint_states = p.getJointStates(self.robot_id, range(num_joints))
+        current_positions = np.array([state[0] for state in joint_states], dtype=np.float32)
+
+        # Apply relative change to joint angles
+        delta = action * 0.05  # scale step size as needed
+        target_positions = current_positions + delta
+
+        p.setJointMotorControlArray(
+            bodyUniqueId=self.robot_id,
+            jointIndices=list(range(num_joints)),
+            controlMode=p.POSITION_CONTROL,
+            targetPositions=target_positions,
+            forces=[200] * num_joints  # max force
+        )
+        p.stepSimulation()
+        obs = self._get_obs()
+        reward = self._compute_reward()
+        done = self._check_termination()
+
+        self.current_step += 1
+        truncated = self.current_step >= self.max_steps
+
+        return obs, reward, done, truncated, {}
+
+    def _get_obs(self):
+        joint_states = p.getJointStates(self.robot_id, range(p.getNumJoints(self.robot_id)))
+        joint_positions = [state[0] for state in joint_states]
+        joint_velocities = [state[1] for state in joint_states]
+        base_pos, base_ori = p.getBasePositionAndOrientation(self.robot_id)
+        base_vel, base_ang_vel = p.getBaseVelocity(self.robot_id)
+        return np.array(joint_positions + joint_velocities + list(base_pos) + list(base_vel), dtype=np.float32)
+
+
+    def _check_termination(self):
+        base_pos = p.getBasePositionAndOrientation(self.robot_id)[0]
+        return base_pos[2] < 0.2  or base_pos[2] > 1.5  # fallen
+
+    def _compute_reward(self):
+        base_pos, _ = p.getBasePositionAndOrientation(self.robot_id)
+        base_vel, _ = p.getBaseVelocity(self.robot_id)
+
+        forward_reward = base_vel[0]        # reward x velocity
+        if not self._check_termination(): 
+            alive_bonus = 1
+        else: 
+            alive_bonus = -500
+        torque_penalty = 0.001 * np.sum(np.square(self.last_action))
+
+        #print(forward_reward, alive_bonus, torque_penalty)
+
+        return forward_reward + alive_bonus - torque_penalty
+
+
+    def render(self):
+        if self.render_mode:
+            if hasattr(self, 'robot_id'):
+                base_pos = p.getBasePositionAndOrientation(self.robot_id)[0]
+                p.resetDebugVisualizerCamera(
+                    cameraDistance=2.0,
+                    cameraYaw=45,
+                    cameraPitch=-30,
+                    cameraTargetPosition=base_pos
+                )
+        else:
+            pass
+
+    def close(self):
+        p.disconnect()
diff --git a/envs/test.py b/envs/test.py
new file mode 100644
index 0000000..c238b28
--- /dev/null
+++ b/envs/test.py
@@ -0,0 +1,63 @@
+import sys
+import os
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+from stable_baselines3 import DDPG, PPO
+from envs.simple_biped_env import simpleBipedEnv
+from stable_baselines3.common.vec_env import DummyVecEnv
+import gymnasium as gym
+from gymnasium.envs.registration import register
+
+from simple_biped_env import simpleBipedEnv
+
+register(
+    id='simpleBiped-v0',
+    entry_point='envs.simple_biped_env:simpleBipedEnv',
+    max_episode_steps=10000,
+)
+
+def train():
+    LOG_DIR = "./logs/"
+    env = gym.make('simpleBiped-v0', render=False)
+    model = DDPG("MlpPolicy", env, verbose=1, device="auto", tensorboard_log=LOG_DIR)
+    model.learn(total_timesteps=10000000)
+    model.save("../models/humanoid_ppo")
+    env.close()
+
+    
+
+def run():
+    env = DummyVecEnv([lambda: simpleBipedEnv(render=True)])
+    model = PPO.load("models/humanoid_ppo", env=env)
+    for _ in range(5):  # Run 3 episodes
+        result = env.reset()
+        if isinstance(result, tuple):
+            obs, info = result
+        else:
+            obs = result
+            info = {}
+
+        done = False
+        total_reward = 0.0
+        while not done:
+            action, _ = model.predict(obs)
+            obs, reward, done, _ = env.step(action)
+            total_reward += reward
+        print(f"Episode finished. Total reward: {total_reward}")
+    env.close()
+
+def test():
+    env = simpleBipedEnv(render=True)
+    for _ in range(10):  # Run 1 test episode
+        obs = env.reset()[0]
+        done = False
+        total_reward = 0.0
+        while not done:
+            action = env.action_space.sample()  # Random action for testing
+            obs, reward, done, _, _ = env.step(action)
+            total_reward += reward
+        print(f"Test episode finished. Total reward: {total_reward}")
+    env.close()
+
+train()
+#run()
+#test()
diff --git a/envs/walker_env.py b/envs/walker_env.py
index 1716587..e69de29 100644
--- a/envs/walker_env.py
+++ b/envs/walker_env.py
@@ -1,68 +0,0 @@
-import gymnasium as gym
-from gymnasium import spaces
-import pybullet as p
-import pybullet_data
-import numpy as np
-
-class WalkerEnv(gym.Env):
-    def __init__(self, render=False):
-        super(WalkerEnv, self).__init__()
-        self.render_mode = render
-        self.physicsClient = p.connect(p.GUI if render else p.DIRECT)
-        p.setAdditionalSearchPath(pybullet_data.getDataPath())
-
-        self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32)
-        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float32)
-
-    def reset(self, seed=None, options=None):
-        p.resetSimulation()
-        p.setGravity(0, 0, -9.8)
-        plane_id = p.loadURDF("plane.urdf")
-        self.robot_id = p.loadURDF("walkers/simple_biped.urdf", [0, 0, 1.0])
-        base_pos = p.getBasePositionAndOrientation(self.robot_id)[0]
-        print(f"Robot spawned at position: {base_pos}")
-        return self._get_obs(), {}
-
-    def step(self, action):
-        # apply motor torques or joint control here
-        # p.setJointMotorControlArray(...)
-        p.stepSimulation()
-        obs = self._get_obs()
-        reward = self._compute_reward()
-        done = self._check_termination()
-        return obs, reward, done, False, {}
-
-    def _get_obs(self):
-        joint_states = p.getJointStates(self.robot_id, range(p.getNumJoints(self.robot_id)))
-        joint_positions = [state[0] for state in joint_states]
-        joint_velocities = [state[1] for state in joint_states]
-        # For simple_biped.urdf, only one joint: hip_joint
-        # So obs = [position, velocity], shape=(2,)
-        return np.array(joint_positions + joint_velocities, dtype=np.float32)
-
-    def _compute_reward(self):
-        # reward forward movement, penalize falls
-        base_pos = p.getBasePositionAndOrientation(self.robot_id)[0]
-        return base_pos[0]  # reward forward x-motion
-
-    def _check_termination(self):
-        base_pos = p.getBasePositionAndOrientation(self.robot_id)[0]
-        return base_pos[2] < 0.5  # fallen
-
-    def render(self):
-        if self.render_mode:
-            # Focus camera on robot's current position
-            if hasattr(self, 'robot_id'):
-                base_pos = p.getBasePositionAndOrientation(self.robot_id)[0]
-                p.resetDebugVisualizerCamera(
-                    cameraDistance=2.0,
-                    cameraYaw=45,
-                    cameraPitch=-30,
-                    cameraTargetPosition=base_pos
-                )
-            # Rendering handled by PyBullet GUI
-        else:
-            pass  # No rendering in DIRECT mode
-
-    def close(self):
-        p.disconnect()
diff --git a/logs/DDPG_1/events.out.tfevents.1756720705.LAPTOP-U4CU9OKR.12120.0 b/logs/DDPG_1/events.out.tfevents.1756720705.LAPTOP-U4CU9OKR.12120.0
new file mode 100644
index 0000000..6d5134e
Binary files /dev/null and b/logs/DDPG_1/events.out.tfevents.1756720705.LAPTOP-U4CU9OKR.12120.0 differ
diff --git a/logs/PPO_1/events.out.tfevents.1756717278.LAPTOP-U4CU9OKR.26564.0 b/logs/PPO_1/events.out.tfevents.1756717278.LAPTOP-U4CU9OKR.26564.0
new file mode 100644
index 0000000..2009ec2
Binary files /dev/null and b/logs/PPO_1/events.out.tfevents.1756717278.LAPTOP-U4CU9OKR.26564.0 differ
diff --git a/logs/PPO_2/events.out.tfevents.1756718026.LAPTOP-U4CU9OKR.25076.0 b/logs/PPO_2/events.out.tfevents.1756718026.LAPTOP-U4CU9OKR.25076.0
new file mode 100644
index 0000000..524c52e
Binary files /dev/null and b/logs/PPO_2/events.out.tfevents.1756718026.LAPTOP-U4CU9OKR.25076.0 differ
diff --git a/logs/PPO_3/events.out.tfevents.1756718088.LAPTOP-U4CU9OKR.12380.0 b/logs/PPO_3/events.out.tfevents.1756718088.LAPTOP-U4CU9OKR.12380.0
new file mode 100644
index 0000000..a6a66f7
Binary files /dev/null and b/logs/PPO_3/events.out.tfevents.1756718088.LAPTOP-U4CU9OKR.12380.0 differ
diff --git a/models/humanoid_ppo.zip b/models/humanoid_ppo.zip
index e631778..e3614c4 100644
Binary files a/models/humanoid_ppo.zip and b/models/humanoid_ppo.zip differ
diff --git a/models/test.py b/models/test.py
deleted file mode 100644
index 4d972ae..0000000
--- a/models/test.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import sys
-import os
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
-from stable_baselines3 import PPO
-from envs.walker_env import WalkerEnv
-from stable_baselines3.common.vec_env import DummyVecEnv
-
-env = DummyVecEnv([lambda: WalkerEnv(render=False)])
-model = PPO("MlpPolicy", env, verbose=1)
-model.learn(total_timesteps=5000)
-model.save("models/humanoid_ppo")
-
-
-env = WalkerEnv(render=True)
-obs, _ = env.reset()
-done = False
-while not done:
-    action, _ = model.predict(obs)
-    obs, reward, done, _, _ = env.step(action)
-
-input("Press Enter to exit and close the visualization window...")
diff --git a/walkers/simple_biped.urdf b/walkers/simple_biped.urdf
index 9753348..45435fa 100644
--- a/walkers/simple_biped.urdf
+++ b/walkers/simple_biped.urdf
@@ -1,25 +1,153 @@
-<robot name="simple_biped">
-  <link name="base_link">
+<?xml version="1.0"?>
+<robot name="biped_with_knees">
+
+  <!-- Torso -->
+  <link name="torso">
+    <visual>
+      <geometry>
+        <box size="0.2 0.1 0.3"/>
+      </geometry>
+      <origin xyz="0 0 0" rpy="0 0 0"/>
+      <material name="gray">
+        <color rgba="0.6 0.6 0.6 1"/>
+      </material>
+    </visual>
+    <collision>
+      <geometry>
+        <box size="0.2 0.1 0.3"/>
+      </geometry>
+    </collision>
     <inertial>
-      <mass value="1"/>
-      <inertia ixx="1" iyy="1" izz="1"/>
+      <mass value="5.0"/>
+      <inertia ixx="0.05" ixy="0" ixz="0" iyy="0.05" iyz="0" izz="0.05"/>
     </inertial>
+  </link>
+
+  <!-- Upper and lower legs (left) -->
+  <link name="left_upper_leg">
     <visual>
-      <geometry><box size="0.2 0.2 0.2"/></geometry>
+      <geometry>
+        <cylinder length="0.3" radius="0.05"/>
+      </geometry>
+      <origin xyz="0 0 -0.15" rpy="0 0 0"/>
+      <material name="blue">
+        <color rgba="0 0 1 1"/>
+      </material>
     </visual>
+    <collision>
+      <geometry>
+        <cylinder length="0.3" radius="0.05"/>
+      </geometry>
+      <origin xyz="0 0 -0.15" rpy="0 0 0"/>
+    </collision>
+    <inertial>
+      <mass value="1.5"/>
+      <inertia ixx="0.01" ixy="0" ixz="0" iyy="0.01" iyz="0" izz="0.01"/>
+    </inertial>
   </link>
 
-  <joint name="hip_joint" type="revolute">
-    <parent link="base_link"/>
-    <child link="left_leg"/>
-    <origin xyz="0 0 -0.1" rpy="0 0 0"/>
-    <axis xyz="1 0 0"/>
-    <limit lower="-1.57" upper="1.57" effort="10" velocity="1"/>
-  </joint>
+  <link name="left_lower_leg">
+    <visual>
+      <geometry>
+        <cylinder length="0.3" radius="0.04"/>
+      </geometry>
+      <origin xyz="0 0 -0.15" rpy="0 0 0"/>
+      <material name="cyan">
+        <color rgba="0 1 1 1"/>
+      </material>
+    </visual>
+    <collision>
+      <geometry>
+        <cylinder length="0.3" radius="0.04"/>
+      </geometry>
+      <origin xyz="0 0 -0.15" rpy="0 0 0"/>
+    </collision>
+    <inertial>
+      <mass value="1.0"/>
+      <inertia ixx="0.005" ixy="0" ixz="0" iyy="0.005" iyz="0" izz="0.005"/>
+    </inertial>
+  </link>
 
-  <link name="left_leg">
+  <!-- Upper and lower legs (right) -->
+  <link name="right_upper_leg">
     <visual>
-      <geometry><box size="0.1 0.1 0.3"/></geometry>
+      <geometry>
+        <cylinder length="0.3" radius="0.05"/>
+      </geometry>
+      <origin xyz="0 0 -0.15" rpy="0 0 0"/>
+      <material name="red">
+        <color rgba="1 0 0 1"/>
+      </material>
     </visual>
+    <collision>
+      <geometry>
+        <cylinder length="0.3" radius="0.05"/>
+      </geometry>
+      <origin xyz="0 0 -0.15" rpy="0 0 0"/>
+    </collision>
+    <inertial>
+      <mass value="1.5"/>
+      <inertia ixx="0.01" ixy="0" ixz="0" iyy="0.01" iyz="0" izz="0.01"/>
+    </inertial>
   </link>
+
+  <link name="right_lower_leg">
+    <visual>
+      <geometry>
+        <cylinder length="0.3" radius="0.04"/>
+      </geometry>
+      <origin xyz="0 0 -0.15" rpy="0 0 0"/>
+      <material name="pink">
+        <color rgba="1 0 1 1"/>
+      </material>
+    </visual>
+    <collision>
+      <geometry>
+        <cylinder length="0.3" radius="0.04"/>
+      </geometry>
+      <origin xyz="0 0 -0.15" rpy="0 0 0"/>
+    </collision>
+    <inertial>
+      <mass value="1.0"/>
+      <inertia ixx="0.005" ixy="0" ixz="0" iyy="0.005" iyz="0" izz="0.005"/>
+    </inertial>
+  </link>
+
+  <!-- Joints -->
+  <!-- Left hip -->
+  <joint name="left_hip_joint" type="revolute">
+    <parent link="torso"/>
+    <child link="left_upper_leg"/>
+    <origin xyz="0.1 0 -0.15" rpy="0 0 0"/>
+    <axis xyz="1 0 0"/>
+    <limit effort="20" lower="-1.57" upper="1.57" velocity="2.0"/>
+  </joint>
+
+  <!-- Left knee -->
+  <joint name="left_knee_joint" type="revolute">
+    <parent link="left_upper_leg"/>
+    <child link="left_lower_leg"/>
+    <origin xyz="0 0 -0.3" rpy="0 0 0"/>
+    <axis xyz="1 0 0"/>
+    <limit effort="20" lower="0" upper="2.0" velocity="2.0"/>
+  </joint>
+
+  <!-- Right hip -->
+  <joint name="right_hip_joint" type="revolute">
+    <parent link="torso"/>
+    <child link="right_upper_leg"/>
+    <origin xyz="-0.1 0 -0.15" rpy="0 0 0"/>
+    <axis xyz="1 0 0"/>
+    <limit effort="20" lower="-1.57" upper="1.57" velocity="2.0"/>
+  </joint>
+
+  <!-- Right knee -->
+  <joint name="right_knee_joint" type="revolute">
+    <parent link="right_upper_leg"/>
+    <child link="right_lower_leg"/>
+    <origin xyz="0 0 -0.3" rpy="0 0 0"/>
+    <axis xyz="1 0 0"/>
+    <limit effort="20" lower="0" upper="2.0" velocity="2.0"/>
+  </joint>
+
 </robot>