-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
115 lines (85 loc) · 3.3 KB
/
main.py
File metadata and controls
115 lines (85 loc) · 3.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import sys
import time
import pygame
from Agent import qLearningAgent
from Environment import Playground, BuildPlayground
# initializing colors
GREEN = (0, 150, 0)
WHITE = (255, 255, 255)
ORANGE = (255, 165, 0)
RED = (255, 0, 0)
BLACK = (0, 0, 0)
display_height, display_width = 700, 600
pygame.init()
pygame.display.set_caption('Grid environment')
gameDisplay = pygame.display.set_mode((display_width, display_height))
clock = pygame.time.Clock()
game_matrix = BuildPlayground(rows=10, columns=10)
env = Playground(gameDisplay, game_matrix)
# agents are initialized
police = qLearningAgent(env, alpha=0.1, nA=4)
thief = qLearningAgent(env, alpha=0.1, nA=4)
# displaying function
def show_info(money, burglar):
pygame.draw.rect(gameDisplay, BLACK, [0, 600, 600, 5])
font = pygame.font.SysFont(None, 40)
text1 = font.render("Thief gets the money: " + str(money), True, GREEN)
text2 = font.render("Thief gets caught: " + str(burglar), True, RED)
gameDisplay.blit(text1, (50, 610))
gameDisplay.blit(text2, (50, 655))
# indicative rectangle to show money grabbed or thief caught
def draw_rect(color, x, y, width, height):
pygame.draw.rect(gameDisplay, color, [x * width, y * height, width, height], 10)
pygame.display.update()
time.sleep(2)
total_thief_caught = 0
total_money_grabbed = 0
epsilon, eps_decay, eps_min = 1.0, 0.99, 0.05
# number of escapes in one run
numEscapes = 2000
# loop over escapes
for escape in range(1, numEscapes + 1):
if escape % 100 == 0:
print("\rRounds {}/{}".format(escape, numEscapes), end="")
# sys.stdout.flush()
if escape % 500 == 0:
print("\nMoney Grabbed: " + str(total_money_grabbed) + "\n" + "Thief Caught: " + str(total_thief_caught))
# sys.stdout.flush()
epsilon = max(epsilon * eps_decay, eps_min)
state = env.reset()
action_thief = thief.greedyApproach(state['thief'], epsilon)
action_police = police.greedyApproach(state['police'], epsilon)
# render the playground
env.render(escape)
while True:
for event in pygame.event.get():
if event.type == pygame.QUIT:
pygame.quit()
quit()
next_state, reward, done, info = env.step(action_thief, action_police)
# learning calls for agent
thief.learn(state['thief'], action_thief, reward['thief'], next_state['thief'])
police.learn(state['police'], action_police, reward['police'], next_state['police'])
# render the playground
gameDisplay.fill(WHITE)
env.render(escape)
show_info(total_money_grabbed, total_thief_caught)
# display updated
pygame.display.update()
clock.tick(1000)
if done:
if info['money_grabbed']:
total_money_grabbed += 1
draw_rect(GREEN, info['x'], info['y'], info['width'], info['height'])
if info['thief_caught']:
total_thief_caught += 1
draw_rect(RED, info['x'], info['y'], info['width'], info['height'])
break
state = next_state
action_thief = thief.greedyApproach(state['thief'], epsilon)
action_police = police.greedyApproach(state['police'], epsilon)
police.savePolicy()
thief.savePolicy()
police.save('_police')
thief.save('_thief')
# Saving policy as pickle file