-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmeditab.py
More file actions
284 lines (239 loc) · 11.6 KB
/
meditab.py
File metadata and controls
284 lines (239 loc) · 11.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
import numpy as np
import random
import matplotlib.pyplot as plt
from IPython.display import clear_output
import time # Import time for measuring episode duration
import tkinter as tk # Import tkinter for GUI
from tkinter import messagebox
# Define constants for actions
ACTION_FORWARD = 0
ACTION_REVERSE = 1
ACTION_LEFT = 2
ACTION_RIGHT = 3
ACTION_WAIT = 4
# Define the environment
class WarehouseEnv:
def __init__(self, grid):
self.grid = grid
self.rows, self.cols = grid.shape
self.start_positions = {}
self.destination_positions = {}
self.obstacles = []
self.find_positions()
self.reset()
def find_positions(self):
"""Identify the positions of the autobots and destinations."""
for i in range(self.rows):
for j in range(self.cols):
cell = self.grid[i, j]
if cell.startswith('A'):
self.start_positions[cell] = (i, j)
elif cell.startswith('B'):
self.destination_positions[cell] = (i, j)
elif cell == 'X':
self.obstacles.append((i, j))
def reset(self):
"""Reset the environment to the initial state."""
self.autobots = []
for bot_name, start_pos in self.start_positions.items():
dest_name = 'B' + bot_name[1:] # Find the corresponding destination
dest_pos = self.destination_positions.get(dest_name)
if dest_pos is None:
raise ValueError(f"Destination not found for bot {bot_name}")
self.autobots.append({
'name': bot_name,
'row': start_pos[0],
'col': start_pos[1],
'direction': 'up',
'destination_row': dest_pos[0],
'destination_col': dest_pos[1],
'reached': False # Track if the bot has reached its destination
})
return self.get_state()
def get_state(self):
"""Get the current state representation."""
state = 0
for i, autobot in enumerate(self.autobots):
direction_index = {'up': 0, 'down': 1, 'left': 2, 'right': 3}[autobot['direction']]
state += (autobot['row'] * self.cols + autobot['col'] + direction_index * self.rows * self.cols) * (5 ** i)
return state % (self.rows * self.cols * 4 ** len(self.autobots))
def step(self, actions):
"""Take a step in the environment based on actions taken by autobots."""
done = all(autobot['reached'] for autobot in self.autobots) # Check if all reached
reward = 0
for i, autobot in enumerate(self.autobots):
if not autobot['reached']: # Only move the autobot if it hasn't reached its destination
self.execute_action(autobot, actions[i])
if self.is_at_destination(autobot):
reward += 100 # Reward for reaching destination
autobot['reached'] = True # Mark as reached
return self.get_state(), reward, done
def execute_action(self, autobot, action):
"""Execute the given action for the autobot."""
if action == ACTION_FORWARD:
self.move_autobot(autobot, 'forward')
elif action == ACTION_REVERSE:
self.move_autobot(autobot, 'reverse')
elif action == ACTION_LEFT:
autobot['direction'] = self.turn('left', autobot['direction'])
elif action == ACTION_RIGHT:
autobot['direction'] = self.turn('right', autobot['direction'])
elif action == ACTION_WAIT:
pass # No movement, just wait
def move_autobot(self, autobot, move_type):
"""Move the autobot in the specified direction."""
current_row, current_col = autobot['row'], autobot['col']
new_row, new_col = self.get_new_position(current_row, current_col, autobot['direction'], move_type)
if self.is_valid_move(new_row, new_col):
autobot['row'], autobot['col'] = new_row, new_col
def get_new_position(self, row, col, direction, move_type):
"""Calculate new position based on current position, direction, and move type."""
move_offsets = {
'up': (-1, 0),
'down': (1, 0),
'left': (0, -1),
'right': (0, 1),
}
dy, dx = move_offsets[direction] if move_type == 'forward' else (-move_offsets[direction][0], -move_offsets[direction][1])
return row + dy, col + dx
def is_valid_move(self, row, col):
"""Check if the move is valid (within bounds and not into an obstacle)."""
return 0 <= row < self.rows and 0 <= col < self.cols and self.grid[row, col] != 'X'
def is_at_destination(self, autobot):
"""Check if the autobot has reached its destination."""
return autobot['row'] == autobot['destination_row'] and autobot['col'] == autobot['destination_col']
def turn(self, turn_direction, current_direction):
"""Change the direction of the autobot based on turn direction."""
directions = ['up', 'right', 'down', 'left']
idx = directions.index(current_direction)
return directions[(idx - 1) % 4] if turn_direction == 'left' else directions[(idx + 1) % 4]
# Q-Learning Agent
class QLearningAgent:
def __init__(self, state_space_size, action_space_size, learning_rate=0.1, discount_factor=0.95, exploration_rate=1.0, exploration_decay_rate=0.99):
self.q_table = np.zeros((state_space_size, action_space_size))
self.learning_rate = learning_rate
self.discount_factor = discount_factor
self.exploration_rate = exploration_rate
self.exploration_decay_rate = exploration_decay_rate
def choose_action(self, state):
"""Choose an action based on the current state using an epsilon-greedy strategy."""
if random.uniform(0, 1) < self.exploration_rate:
return random.choice(np.arange(ACTION_WAIT + 1)) # Choose random action
return np.argmax(self.q_table[state, :]) # Choose best action
def learn(self, state, actions, reward, next_state, done):
"""Update the Q-table based on the action taken and the reward received."""
for i, action in enumerate(actions):
target = reward + (self.discount_factor * np.max(self.q_table[next_state, :]) if not done else 0)
self.q_table[state, action] += self.learning_rate * (target - self.q_table[state, action])
def decay_exploration_rate(self):
"""Decay the exploration rate after each episode."""
self.exploration_rate *= self.exploration_decay_rate
# GUI Class
class WarehouseGUI:
def __init__(self, master, env, agent):
self.master = master
self.env = env
self.agent = agent
self.master.title("Warehouse Navigation")
# Create a canvas for the grid
self.canvas = tk.Canvas(master, width=400, height=400)
self.canvas.pack()
# Start button
self.start_button = tk.Button(master, text="Start Training", command=self.start_training)
self.start_button.pack()
# Test button
self.test_button = tk.Button(master, text="Test Agent", command=self.test_agent)
self.test_button.pack()
# Variable to track training status
self.training = False
def draw_grid(self):
"""Draw the warehouse grid on the canvas."""
self.canvas.delete("all") # Clear previous drawings
cell_size = 40
for i in range(self.env.rows):
for j in range(self.env.cols):
x1 = j * cell_size
y1 = i * cell_size
x2 = x1 + cell_size
y2 = y1 + cell_size
cell = self.env.grid[i, j]
# Draw the cells based on their content
if cell.startswith('A'): # Autobots
self.canvas.create_rectangle(x1, y1, x2, y2, fill="blue")
self.canvas.create_text((x1+x2)/2, (y1+y2)/2, text=cell, fill="white")
elif cell.startswith('B'): # Destinations
self.canvas.create_rectangle(x1, y1, x2, y2, fill="yellow")
self.canvas.create_text((x1+x2)/2, (y1+y2)/2, text=cell, fill="white")
elif cell == 'X': # Obstacles
self.canvas.create_rectangle(x1, y1, x2, y2, fill="red")
else: # Empty space
self.canvas.create_rectangle(x1, y1, x2, y2, fill="white")
# Draw the autobots' positions
for autobot in self.env.autobots:
self.canvas.create_text((autobot['col'] * cell_size) + cell_size / 2,
(autobot['row'] * cell_size) + cell_size / 2,
text=autobot['name'], fill="black")
def start_training(self):
"""Start the training process."""
self.training = True
num_episodes = 10000 # Number of episodes to train
best_episode = None
min_time = float('inf')
best_time = None
for episode in range(num_episodes):
start_time = time.time()
state = self.env.reset() # Reset the environment for each episode
done = False
total_reward = 0
while not done:
actions = [self.agent.choose_action(state) for _ in range(len(self.env.autobots))]
next_state, reward, done = self.env.step(actions)
self.agent.learn(state, actions, reward, next_state, done)
total_reward += reward
state = next_state
episode_time = time.time() - start_time
print(f"Episode {episode + 1}/{num_episodes} - Total Reward: {total_reward}, Time: {episode_time:.2f} seconds")
# Update minimum time and best episode if necessary
if episode_time < min_time:
min_time = episode_time
best_episode = episode
self.agent.decay_exploration_rate() # Decay exploration rate
# Update the grid after each episode
self.draw_grid()
self.master.update() # Update the GUI
messagebox.showinfo("Training Complete", f"Best Episode: {best_episode + 1} with Time: {min_time:.2f} seconds")
def test_agent(self):
"""Test the trained agent using the learned Q-values."""
self.env.reset() # Reset environment to the initial state
state = self.env.get_state()
done = False
total_reward = 0
print("\nTesting Agent")
while not done:
actions = [self.agent.choose_action(state) for _ in range(len(self.env.autobots))]
next_state, reward, done = self.env.step(actions)
total_reward += reward
self.draw_grid() # Update grid display
self.master.update() # Update the GUI
# Add a delay to visualize movements clearly
time.sleep(0.5)
print(f"Actions Taken: {actions}, State: {state}, Next State: {next_state}, Reward: {reward}")
state = next_state
print(f"Total Reward During Testing: {total_reward}")
# Main execution
if __name__ == "__main__":
# Define a simple grid (modify as needed)
grid = np.array([
['A1',' ','X',' ','X','B1'],
[' ','X',' ',' ','X',''],
[' ',' ',' ',' ',' ',' '],
[' ','X',' ','X',' ','B2'],
['A2',' ',' ',' ','X',' '],
['A3',' ','X',' ','X','B3'],
])
env = WarehouseEnv(grid)
agent = QLearningAgent(state_space_size=env.rows * env.cols * 4 ** len(env.start_positions)
, action_space_size=ACTION_WAIT + 1)
root = tk.Tk()
app = WarehouseGUI(root, env, agent)
root.mainloop()