MedFlow-AI/env.py at main · NilinR/MedFlow-AI · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import random

class HospitalEnv:
    def __init__(self, initial_state=None):
        """
        Initialize the Hospital Resource Management Environment.
        """
        self.initial_state_config = initial_state
        self.state = {}
        self.reset(self.initial_state_config)

    def reset(self, initial_state=None):
        """
        Resets the environment to an initial state.
        If no initial_state is provided, it generates a random reasonable state.
        """
        if initial_state is not None:
            self.state = initial_state.copy()
        else:
            self.state = {
                "patients_waiting": random.randint(1, 5),
                "emergency_patients": random.randint(0, 3),
                "doctors_available": random.randint(1, 3),
                "time_step": 0
            }
        return self.get_state()

    def get_state(self):
        """
        Returns the current observation state.
        """
        return self.state.copy()

    def step(self, action: int):
        """
        Applies chosen action, updates environment state, and computes reward.

        Actions:
        0 -> treat_normal_patient
        1 -> treat_emergency_patient
        2 -> wait
        """
        #extract current state
        patients = self.state["patients_waiting"]
        emergencies = self.state["emergency_patients"]
        doctors = self.state["doctors_available"]

        reward = 0.0

        #validate action and apply state transitions
        if action == 1 and emergencies > 0:
            reward += 20
            self.state["emergency_patients"] -= 1
        elif action == 1 and emergencies == 0:
            reward -= 5
        elif action == 0 and patients > 0:
            reward += 10
            self.state["patients_waiting"] -= 1
        elif action == 2:
            reward -= 5

        #penalize if emergencies are ignored
        if emergencies > 0 and action != 1:
            reward -= 20

        if doctors > 0 and (action == 0 or action == 1):
            self.state["doctors_available"] -= 1
        elif doctors <= 0 and (action == 0 or action == 1):
            reward -= 10

        self.state["doctors_available"] = max(1, self.state["doctors_available"])

        #increment time step
        self.state["time_step"] += 1
        time_step = self.state["time_step"]

        #check termination condition
        patients_left = self.state["patients_waiting"]
        emergencies_left = self.state["emergency_patients"]

        done = (patients_left == 0 and emergencies_left == 0) or (time_step >= 10)

        return self.get_state(), float(reward), done, {}


if __name__ == "__main__":
    #simple test loop to simulate random actions
    print("Initializing HospitalEnv...")
    env = HospitalEnv()
    state = env.reset()
    print(f"Initial State: {state}")

    total_reward = 0
    done = False

    while not done:
        # Choose a random action (0, 1, or 2)
        action = random.choice([0, 1, 2])
        action_names = {0: "Treat Normal", 1: "Treat Emergency", 2: "Wait"}

        print(f"\nTime Step: {state['time_step']} | Taking Action: {action} ({action_names[action]})")

        next_state, reward, done, info = env.step(action)
        total_reward += reward

        print(f"Reward: {reward} | State: {next_state} | Done: {done}")
        state = next_state

    print(f"\nEpisode finished! Total Reward: {total_reward}")