-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathenv.py
More file actions
109 lines (87 loc) · 3.43 KB
/
env.py
File metadata and controls
109 lines (87 loc) · 3.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import random
class HospitalEnv:
def __init__(self, initial_state=None):
"""
Initialize the Hospital Resource Management Environment.
"""
self.initial_state_config = initial_state
self.state = {}
self.reset(self.initial_state_config)
def reset(self, initial_state=None):
"""
Resets the environment to an initial state.
If no initial_state is provided, it generates a random reasonable state.
"""
if initial_state is not None:
self.state = initial_state.copy()
else:
self.state = {
"patients_waiting": random.randint(1, 5),
"emergency_patients": random.randint(0, 3),
"doctors_available": random.randint(1, 3),
"time_step": 0
}
return self.get_state()
def get_state(self):
"""
Returns the current observation state.
"""
return self.state.copy()
def step(self, action: int):
"""
Applies chosen action, updates environment state, and computes reward.
Actions:
0 -> treat_normal_patient
1 -> treat_emergency_patient
2 -> wait
"""
#extract current state
patients = self.state["patients_waiting"]
emergencies = self.state["emergency_patients"]
doctors = self.state["doctors_available"]
reward = 0.0
#validate action and apply state transitions
if action == 1 and emergencies > 0:
reward += 20
self.state["emergency_patients"] -= 1
elif action == 1 and emergencies == 0:
reward -= 5
elif action == 0 and patients > 0:
reward += 10
self.state["patients_waiting"] -= 1
elif action == 2:
reward -= 5
#penalize if emergencies are ignored
if emergencies > 0 and action != 1:
reward -= 20
if doctors > 0 and (action == 0 or action == 1):
self.state["doctors_available"] -= 1
elif doctors <= 0 and (action == 0 or action == 1):
reward -= 10
self.state["doctors_available"] = max(1, self.state["doctors_available"])
#increment time step
self.state["time_step"] += 1
time_step = self.state["time_step"]
#check termination condition
patients_left = self.state["patients_waiting"]
emergencies_left = self.state["emergency_patients"]
done = (patients_left == 0 and emergencies_left == 0) or (time_step >= 10)
return self.get_state(), float(reward), done, {}
if __name__ == "__main__":
#simple test loop to simulate random actions
print("Initializing HospitalEnv...")
env = HospitalEnv()
state = env.reset()
print(f"Initial State: {state}")
total_reward = 0
done = False
while not done:
# Choose a random action (0, 1, or 2)
action = random.choice([0, 1, 2])
action_names = {0: "Treat Normal", 1: "Treat Emergency", 2: "Wait"}
print(f"\nTime Step: {state['time_step']} | Taking Action: {action} ({action_names[action]})")
next_state, reward, done, info = env.step(action)
total_reward += reward
print(f"Reward: {reward} | State: {next_state} | Done: {done}")
state = next_state
print(f"\nEpisode finished! Total Reward: {total_reward}")